The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 46dad3c1e57897ab9228332f03e1c14798d2d3b9
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024042347-create-item-ebf8@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
46dad3c1e578 ("init/main.c: Fix potential static_command_line memory overflow")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 46dad3c1e57897ab9228332f03e1c14798d2d3b9 Mon Sep 17 00:00:00 2001
From: Yuntao Wang <ytcoode(a)gmail.com>
Date: Fri, 12 Apr 2024 16:17:32 +0800
Subject: [PATCH] init/main.c: Fix potential static_command_line memory
overflow
We allocate memory of size 'xlen + strlen(boot_command_line) + 1' for
static_command_line, but the strings copied into static_command_line are
extra_command_line and command_line, rather than extra_command_line and
boot_command_line.
When strlen(command_line) > strlen(boot_command_line), static_command_line
will overflow.
This patch just recovers strlen(command_line) which was miss-consolidated
with strlen(boot_command_line) in the commit f5c7310ac73e ("init/main: add
checks for the return value of memblock_alloc*()")
Link: https://lore.kernel.org/all/20240412081733.35925-2-ytcoode@gmail.com/
Fixes: f5c7310ac73e ("init/main: add checks for the return value of memblock_alloc*()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Yuntao Wang <ytcoode(a)gmail.com>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat(a)kernel.org>
diff --git a/init/main.c b/init/main.c
index 881f6230ee59..5dcf5274c09c 100644
--- a/init/main.c
+++ b/init/main.c
@@ -636,6 +636,8 @@ static void __init setup_command_line(char *command_line)
if (!saved_command_line)
panic("%s: Failed to allocate %zu bytes\n", __func__, len + ilen);
+ len = xlen + strlen(command_line) + 1;
+
static_command_line = memblock_alloc(len, SMP_CACHE_BYTES);
if (!static_command_line)
panic("%s: Failed to allocate %zu bytes\n", __func__, len);
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 49ff3b4aec51e3abfc9369997cc603319b02af9a
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024042325-rockstar-civic-9c1a@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
49ff3b4aec51 ("KVM: x86/pmu: Do not mask LVTPC when handling a PMI on AMD platforms")
a16eb25b09c0 ("KVM: x86: Mask LVTPC when handling a PMI")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 49ff3b4aec51e3abfc9369997cc603319b02af9a Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan.das(a)amd.com>
Date: Fri, 5 Apr 2024 16:55:55 -0700
Subject: [PATCH] KVM: x86/pmu: Do not mask LVTPC when handling a PMI on AMD
platforms
On AMD and Hygon platforms, the local APIC does not automatically set
the mask bit of the LVTPC register when handling a PMI and there is
no need to clear it in the kernel's PMI handler.
For guests, the mask bit is currently set by kvm_apic_local_deliver()
and unless it is cleared by the guest kernel's PMI handler, PMIs stop
arriving and break use-cases like sampling with perf record.
This does not affect non-PerfMonV2 guests because PMIs are handled in
the guest kernel by x86_pmu_handle_irq() which always clears the LVTPC
mask bit irrespective of the vendor.
Before:
$ perf record -e cycles:u true
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.001 MB perf.data (1 samples) ]
After:
$ perf record -e cycles:u true
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.002 MB perf.data (19 samples) ]
Fixes: a16eb25b09c0 ("KVM: x86: Mask LVTPC when handling a PMI")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sandipan Das <sandipan.das(a)amd.com>
Reviewed-by: Jim Mattson <jmattson(a)google.com>
[sean: use is_intel_compatible instead of !is_amd_or_hygon()]
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-ID: <20240405235603.1173076-3-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index cf37586f0466..ebf41023be38 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2776,7 +2776,8 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL);
- if (r && lvt_type == APIC_LVTPC)
+ if (r && lvt_type == APIC_LVTPC &&
+ guest_cpuid_is_intel_compatible(apic->vcpu))
kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED);
return r;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 49ff3b4aec51e3abfc9369997cc603319b02af9a
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024042320-deflected-pester-4998@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
49ff3b4aec51 ("KVM: x86/pmu: Do not mask LVTPC when handling a PMI on AMD platforms")
a16eb25b09c0 ("KVM: x86: Mask LVTPC when handling a PMI")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 49ff3b4aec51e3abfc9369997cc603319b02af9a Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan.das(a)amd.com>
Date: Fri, 5 Apr 2024 16:55:55 -0700
Subject: [PATCH] KVM: x86/pmu: Do not mask LVTPC when handling a PMI on AMD
platforms
On AMD and Hygon platforms, the local APIC does not automatically set
the mask bit of the LVTPC register when handling a PMI and there is
no need to clear it in the kernel's PMI handler.
For guests, the mask bit is currently set by kvm_apic_local_deliver()
and unless it is cleared by the guest kernel's PMI handler, PMIs stop
arriving and break use-cases like sampling with perf record.
This does not affect non-PerfMonV2 guests because PMIs are handled in
the guest kernel by x86_pmu_handle_irq() which always clears the LVTPC
mask bit irrespective of the vendor.
Before:
$ perf record -e cycles:u true
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.001 MB perf.data (1 samples) ]
After:
$ perf record -e cycles:u true
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.002 MB perf.data (19 samples) ]
Fixes: a16eb25b09c0 ("KVM: x86: Mask LVTPC when handling a PMI")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sandipan Das <sandipan.das(a)amd.com>
Reviewed-by: Jim Mattson <jmattson(a)google.com>
[sean: use is_intel_compatible instead of !is_amd_or_hygon()]
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-ID: <20240405235603.1173076-3-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index cf37586f0466..ebf41023be38 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2776,7 +2776,8 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL);
- if (r && lvt_type == APIC_LVTPC)
+ if (r && lvt_type == APIC_LVTPC &&
+ guest_cpuid_is_intel_compatible(apic->vcpu))
kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED);
return r;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 49ff3b4aec51e3abfc9369997cc603319b02af9a
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024042306-overfeed-simmering-3d3c@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
49ff3b4aec51 ("KVM: x86/pmu: Do not mask LVTPC when handling a PMI on AMD platforms")
a16eb25b09c0 ("KVM: x86: Mask LVTPC when handling a PMI")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 49ff3b4aec51e3abfc9369997cc603319b02af9a Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan.das(a)amd.com>
Date: Fri, 5 Apr 2024 16:55:55 -0700
Subject: [PATCH] KVM: x86/pmu: Do not mask LVTPC when handling a PMI on AMD
platforms
On AMD and Hygon platforms, the local APIC does not automatically set
the mask bit of the LVTPC register when handling a PMI and there is
no need to clear it in the kernel's PMI handler.
For guests, the mask bit is currently set by kvm_apic_local_deliver()
and unless it is cleared by the guest kernel's PMI handler, PMIs stop
arriving and break use-cases like sampling with perf record.
This does not affect non-PerfMonV2 guests because PMIs are handled in
the guest kernel by x86_pmu_handle_irq() which always clears the LVTPC
mask bit irrespective of the vendor.
Before:
$ perf record -e cycles:u true
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.001 MB perf.data (1 samples) ]
After:
$ perf record -e cycles:u true
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.002 MB perf.data (19 samples) ]
Fixes: a16eb25b09c0 ("KVM: x86: Mask LVTPC when handling a PMI")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sandipan Das <sandipan.das(a)amd.com>
Reviewed-by: Jim Mattson <jmattson(a)google.com>
[sean: use is_intel_compatible instead of !is_amd_or_hygon()]
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-ID: <20240405235603.1173076-3-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index cf37586f0466..ebf41023be38 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2776,7 +2776,8 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL);
- if (r && lvt_type == APIC_LVTPC)
+ if (r && lvt_type == APIC_LVTPC &&
+ guest_cpuid_is_intel_compatible(apic->vcpu))
kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED);
return r;
}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 54c4ec5f8c471b7c1137a1f769648549c423c026
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024042357-dropbox-unhitched-ffb8@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
54c4ec5f8c47 ("serial: mxs-auart: add spinlock around changing cts state")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 54c4ec5f8c471b7c1137a1f769648549c423c026 Mon Sep 17 00:00:00 2001
From: Emil Kronborg <emil.kronborg(a)protonmail.com>
Date: Wed, 20 Mar 2024 12:15:36 +0000
Subject: [PATCH] serial: mxs-auart: add spinlock around changing cts state
The uart_handle_cts_change() function in serial_core expects the caller
to hold uport->lock. For example, I have seen the below kernel splat,
when the Bluetooth driver is loaded on an i.MX28 board.
[ 85.119255] ------------[ cut here ]------------
[ 85.124413] WARNING: CPU: 0 PID: 27 at /drivers/tty/serial/serial_core.c:3453 uart_handle_cts_change+0xb4/0xec
[ 85.134694] Modules linked in: hci_uart bluetooth ecdh_generic ecc wlcore_sdio configfs
[ 85.143314] CPU: 0 PID: 27 Comm: kworker/u3:0 Not tainted 6.6.3-00021-gd62a2f068f92 #1
[ 85.151396] Hardware name: Freescale MXS (Device Tree)
[ 85.156679] Workqueue: hci0 hci_power_on [bluetooth]
(...)
[ 85.191765] uart_handle_cts_change from mxs_auart_irq_handle+0x380/0x3f4
[ 85.198787] mxs_auart_irq_handle from __handle_irq_event_percpu+0x88/0x210
(...)
Cc: stable(a)vger.kernel.org
Fixes: 4d90bb147ef6 ("serial: core: Document and assert lock requirements for irq helpers")
Reviewed-by: Frank Li <Frank.Li(a)nxp.com>
Signed-off-by: Emil Kronborg <emil.kronborg(a)protonmail.com>
Link: https://lore.kernel.org/r/20240320121530.11348-1-emil.kronborg@protonmail.c…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c
index 4749331fe618..1e8853eae504 100644
--- a/drivers/tty/serial/mxs-auart.c
+++ b/drivers/tty/serial/mxs-auart.c
@@ -1086,11 +1086,13 @@ static void mxs_auart_set_ldisc(struct uart_port *port,
static irqreturn_t mxs_auart_irq_handle(int irq, void *context)
{
- u32 istat;
+ u32 istat, stat;
struct mxs_auart_port *s = context;
u32 mctrl_temp = s->mctrl_prev;
- u32 stat = mxs_read(s, REG_STAT);
+ uart_port_lock(&s->port);
+
+ stat = mxs_read(s, REG_STAT);
istat = mxs_read(s, REG_INTR);
/* ack irq */
@@ -1126,6 +1128,8 @@ static irqreturn_t mxs_auart_irq_handle(int irq, void *context)
istat &= ~AUART_INTR_TXIS;
}
+ uart_port_unlock(&s->port);
+
return IRQ_HANDLED;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 54c4ec5f8c471b7c1137a1f769648549c423c026
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024042355-imagines-such-7bf6@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
54c4ec5f8c47 ("serial: mxs-auart: add spinlock around changing cts state")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 54c4ec5f8c471b7c1137a1f769648549c423c026 Mon Sep 17 00:00:00 2001
From: Emil Kronborg <emil.kronborg(a)protonmail.com>
Date: Wed, 20 Mar 2024 12:15:36 +0000
Subject: [PATCH] serial: mxs-auart: add spinlock around changing cts state
The uart_handle_cts_change() function in serial_core expects the caller
to hold uport->lock. For example, I have seen the below kernel splat,
when the Bluetooth driver is loaded on an i.MX28 board.
[ 85.119255] ------------[ cut here ]------------
[ 85.124413] WARNING: CPU: 0 PID: 27 at /drivers/tty/serial/serial_core.c:3453 uart_handle_cts_change+0xb4/0xec
[ 85.134694] Modules linked in: hci_uart bluetooth ecdh_generic ecc wlcore_sdio configfs
[ 85.143314] CPU: 0 PID: 27 Comm: kworker/u3:0 Not tainted 6.6.3-00021-gd62a2f068f92 #1
[ 85.151396] Hardware name: Freescale MXS (Device Tree)
[ 85.156679] Workqueue: hci0 hci_power_on [bluetooth]
(...)
[ 85.191765] uart_handle_cts_change from mxs_auart_irq_handle+0x380/0x3f4
[ 85.198787] mxs_auart_irq_handle from __handle_irq_event_percpu+0x88/0x210
(...)
Cc: stable(a)vger.kernel.org
Fixes: 4d90bb147ef6 ("serial: core: Document and assert lock requirements for irq helpers")
Reviewed-by: Frank Li <Frank.Li(a)nxp.com>
Signed-off-by: Emil Kronborg <emil.kronborg(a)protonmail.com>
Link: https://lore.kernel.org/r/20240320121530.11348-1-emil.kronborg@protonmail.c…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c
index 4749331fe618..1e8853eae504 100644
--- a/drivers/tty/serial/mxs-auart.c
+++ b/drivers/tty/serial/mxs-auart.c
@@ -1086,11 +1086,13 @@ static void mxs_auart_set_ldisc(struct uart_port *port,
static irqreturn_t mxs_auart_irq_handle(int irq, void *context)
{
- u32 istat;
+ u32 istat, stat;
struct mxs_auart_port *s = context;
u32 mctrl_temp = s->mctrl_prev;
- u32 stat = mxs_read(s, REG_STAT);
+ uart_port_lock(&s->port);
+
+ stat = mxs_read(s, REG_STAT);
istat = mxs_read(s, REG_INTR);
/* ack irq */
@@ -1126,6 +1128,8 @@ static irqreturn_t mxs_auart_irq_handle(int irq, void *context)
istat &= ~AUART_INTR_TXIS;
}
+ uart_port_unlock(&s->port);
+
return IRQ_HANDLED;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 54c4ec5f8c471b7c1137a1f769648549c423c026
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024042354-childless-album-3181@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
54c4ec5f8c47 ("serial: mxs-auart: add spinlock around changing cts state")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 54c4ec5f8c471b7c1137a1f769648549c423c026 Mon Sep 17 00:00:00 2001
From: Emil Kronborg <emil.kronborg(a)protonmail.com>
Date: Wed, 20 Mar 2024 12:15:36 +0000
Subject: [PATCH] serial: mxs-auart: add spinlock around changing cts state
The uart_handle_cts_change() function in serial_core expects the caller
to hold uport->lock. For example, I have seen the below kernel splat,
when the Bluetooth driver is loaded on an i.MX28 board.
[ 85.119255] ------------[ cut here ]------------
[ 85.124413] WARNING: CPU: 0 PID: 27 at /drivers/tty/serial/serial_core.c:3453 uart_handle_cts_change+0xb4/0xec
[ 85.134694] Modules linked in: hci_uart bluetooth ecdh_generic ecc wlcore_sdio configfs
[ 85.143314] CPU: 0 PID: 27 Comm: kworker/u3:0 Not tainted 6.6.3-00021-gd62a2f068f92 #1
[ 85.151396] Hardware name: Freescale MXS (Device Tree)
[ 85.156679] Workqueue: hci0 hci_power_on [bluetooth]
(...)
[ 85.191765] uart_handle_cts_change from mxs_auart_irq_handle+0x380/0x3f4
[ 85.198787] mxs_auart_irq_handle from __handle_irq_event_percpu+0x88/0x210
(...)
Cc: stable(a)vger.kernel.org
Fixes: 4d90bb147ef6 ("serial: core: Document and assert lock requirements for irq helpers")
Reviewed-by: Frank Li <Frank.Li(a)nxp.com>
Signed-off-by: Emil Kronborg <emil.kronborg(a)protonmail.com>
Link: https://lore.kernel.org/r/20240320121530.11348-1-emil.kronborg@protonmail.c…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c
index 4749331fe618..1e8853eae504 100644
--- a/drivers/tty/serial/mxs-auart.c
+++ b/drivers/tty/serial/mxs-auart.c
@@ -1086,11 +1086,13 @@ static void mxs_auart_set_ldisc(struct uart_port *port,
static irqreturn_t mxs_auart_irq_handle(int irq, void *context)
{
- u32 istat;
+ u32 istat, stat;
struct mxs_auart_port *s = context;
u32 mctrl_temp = s->mctrl_prev;
- u32 stat = mxs_read(s, REG_STAT);
+ uart_port_lock(&s->port);
+
+ stat = mxs_read(s, REG_STAT);
istat = mxs_read(s, REG_INTR);
/* ack irq */
@@ -1126,6 +1128,8 @@ static irqreturn_t mxs_auart_irq_handle(int irq, void *context)
istat &= ~AUART_INTR_TXIS;
}
+ uart_port_unlock(&s->port);
+
return IRQ_HANDLED;
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 54c4ec5f8c471b7c1137a1f769648549c423c026
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024042353-mayday-porcupine-efb6@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
54c4ec5f8c47 ("serial: mxs-auart: add spinlock around changing cts state")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 54c4ec5f8c471b7c1137a1f769648549c423c026 Mon Sep 17 00:00:00 2001
From: Emil Kronborg <emil.kronborg(a)protonmail.com>
Date: Wed, 20 Mar 2024 12:15:36 +0000
Subject: [PATCH] serial: mxs-auart: add spinlock around changing cts state
The uart_handle_cts_change() function in serial_core expects the caller
to hold uport->lock. For example, I have seen the below kernel splat,
when the Bluetooth driver is loaded on an i.MX28 board.
[ 85.119255] ------------[ cut here ]------------
[ 85.124413] WARNING: CPU: 0 PID: 27 at /drivers/tty/serial/serial_core.c:3453 uart_handle_cts_change+0xb4/0xec
[ 85.134694] Modules linked in: hci_uart bluetooth ecdh_generic ecc wlcore_sdio configfs
[ 85.143314] CPU: 0 PID: 27 Comm: kworker/u3:0 Not tainted 6.6.3-00021-gd62a2f068f92 #1
[ 85.151396] Hardware name: Freescale MXS (Device Tree)
[ 85.156679] Workqueue: hci0 hci_power_on [bluetooth]
(...)
[ 85.191765] uart_handle_cts_change from mxs_auart_irq_handle+0x380/0x3f4
[ 85.198787] mxs_auart_irq_handle from __handle_irq_event_percpu+0x88/0x210
(...)
Cc: stable(a)vger.kernel.org
Fixes: 4d90bb147ef6 ("serial: core: Document and assert lock requirements for irq helpers")
Reviewed-by: Frank Li <Frank.Li(a)nxp.com>
Signed-off-by: Emil Kronborg <emil.kronborg(a)protonmail.com>
Link: https://lore.kernel.org/r/20240320121530.11348-1-emil.kronborg@protonmail.c…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c
index 4749331fe618..1e8853eae504 100644
--- a/drivers/tty/serial/mxs-auart.c
+++ b/drivers/tty/serial/mxs-auart.c
@@ -1086,11 +1086,13 @@ static void mxs_auart_set_ldisc(struct uart_port *port,
static irqreturn_t mxs_auart_irq_handle(int irq, void *context)
{
- u32 istat;
+ u32 istat, stat;
struct mxs_auart_port *s = context;
u32 mctrl_temp = s->mctrl_prev;
- u32 stat = mxs_read(s, REG_STAT);
+ uart_port_lock(&s->port);
+
+ stat = mxs_read(s, REG_STAT);
istat = mxs_read(s, REG_INTR);
/* ack irq */
@@ -1126,6 +1128,8 @@ static irqreturn_t mxs_auart_irq_handle(int irq, void *context)
istat &= ~AUART_INTR_TXIS;
}
+ uart_port_unlock(&s->port);
+
return IRQ_HANDLED;
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 631426ba1d45a8672b177ee85ad4cabe760dd131
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024042338-ideally-setting-9717@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
631426ba1d45 ("mm/madvise: make MADV_POPULATE_(READ|WRITE) handle VM_FAULT_RETRY properly")
0f20bba1688b ("mm/gup: explicitly define and check internal GUP flags, disallow FOLL_TOUCH")
6cd06ab12d1a ("gup: make the stack expansion warning a bit more targeted")
9471f1f2f502 ("Merge branch 'expand-stack'")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 631426ba1d45a8672b177ee85ad4cabe760dd131 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david(a)redhat.com>
Date: Thu, 14 Mar 2024 17:12:59 +0100
Subject: [PATCH] mm/madvise: make MADV_POPULATE_(READ|WRITE) handle
VM_FAULT_RETRY properly
Darrick reports that in some cases where pread() would fail with -EIO and
mmap()+access would generate a SIGBUS signal, MADV_POPULATE_READ /
MADV_POPULATE_WRITE will keep retrying forever and not fail with -EFAULT.
While the madvise() call can be interrupted by a signal, this is not the
desired behavior. MADV_POPULATE_READ / MADV_POPULATE_WRITE should behave
like page faults in that case: fail and not retry forever.
A reproducer can be found at [1].
The reason is that __get_user_pages(), as called by
faultin_vma_page_range(), will not handle VM_FAULT_RETRY in a proper way:
it will simply return 0 when VM_FAULT_RETRY happened, making
madvise_populate()->faultin_vma_page_range() retry again and again, never
setting FOLL_TRIED->FAULT_FLAG_TRIED for __get_user_pages().
__get_user_pages_locked() does what we want, but duplicating that logic in
faultin_vma_page_range() feels wrong.
So let's use __get_user_pages_locked() instead, that will detect
VM_FAULT_RETRY and set FOLL_TRIED when retrying, making the fault handler
return VM_FAULT_SIGBUS (VM_FAULT_ERROR) at some point, propagating -EFAULT
from faultin_page() to __get_user_pages(), all the way to
madvise_populate().
But, there is an issue: __get_user_pages_locked() will end up re-taking
the MM lock and then __get_user_pages() will do another VMA lookup. In
the meantime, the VMA layout could have changed and we'd fail with
different error codes than we'd want to.
As __get_user_pages() will currently do a new VMA lookup either way, let
it do the VMA handling in a different way, controlled by a new
FOLL_MADV_POPULATE flag, effectively moving these checks from
madvise_populate() + faultin_page_range() in there.
With this change, Darricks reproducer properly fails with -EFAULT, as
documented for MADV_POPULATE_READ / MADV_POPULATE_WRITE.
[1] https://lore.kernel.org/all/20240313171936.GN1927156@frogsfrogsfrogs/
Link: https://lkml.kernel.org/r/20240314161300.382526-1-david@redhat.com
Link: https://lkml.kernel.org/r/20240314161300.382526-2-david@redhat.com
Fixes: 4ca9b3859dac ("mm/madvise: introduce MADV_POPULATE_(READ|WRITE) to prefault page tables")
Signed-off-by: David Hildenbrand <david(a)redhat.com>
Reported-by: Darrick J. Wong <djwong(a)kernel.org>
Closes: https://lore.kernel.org/all/20240311223815.GW1927156@frogsfrogsfrogs/
Cc: Darrick J. Wong <djwong(a)kernel.org>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Jason Gunthorpe <jgg(a)nvidia.com>
Cc: John Hubbard <jhubbard(a)nvidia.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/gup.c b/mm/gup.c
index af8edadc05d1..1611e73b1121 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1206,6 +1206,22 @@ static long __get_user_pages(struct mm_struct *mm,
/* first iteration or cross vma bound */
if (!vma || start >= vma->vm_end) {
+ /*
+ * MADV_POPULATE_(READ|WRITE) wants to handle VMA
+ * lookups+error reporting differently.
+ */
+ if (gup_flags & FOLL_MADV_POPULATE) {
+ vma = vma_lookup(mm, start);
+ if (!vma) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ if (check_vma_flags(vma, gup_flags)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ goto retry;
+ }
vma = gup_vma_lookup(mm, start);
if (!vma && in_gate_area(mm, start)) {
ret = get_gate_page(mm, start & PAGE_MASK,
@@ -1685,35 +1701,35 @@ long populate_vma_page_range(struct vm_area_struct *vma,
}
/*
- * faultin_vma_page_range() - populate (prefault) page tables inside the
- * given VMA range readable/writable
+ * faultin_page_range() - populate (prefault) page tables inside the
+ * given range readable/writable
*
* This takes care of mlocking the pages, too, if VM_LOCKED is set.
*
- * @vma: target vma
+ * @mm: the mm to populate page tables in
* @start: start address
* @end: end address
* @write: whether to prefault readable or writable
* @locked: whether the mmap_lock is still held
*
- * Returns either number of processed pages in the vma, or a negative error
- * code on error (see __get_user_pages()).
+ * Returns either number of processed pages in the MM, or a negative error
+ * code on error (see __get_user_pages()). Note that this function reports
+ * errors related to VMAs, such as incompatible mappings, as expected by
+ * MADV_POPULATE_(READ|WRITE).
*
- * vma->vm_mm->mmap_lock must be held. The range must be page-aligned and
- * covered by the VMA. If it's released, *@locked will be set to 0.
+ * The range must be page-aligned.
+ *
+ * mm->mmap_lock must be held. If it's released, *@locked will be set to 0.
*/
-long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start,
- unsigned long end, bool write, int *locked)
+long faultin_page_range(struct mm_struct *mm, unsigned long start,
+ unsigned long end, bool write, int *locked)
{
- struct mm_struct *mm = vma->vm_mm;
unsigned long nr_pages = (end - start) / PAGE_SIZE;
int gup_flags;
long ret;
VM_BUG_ON(!PAGE_ALIGNED(start));
VM_BUG_ON(!PAGE_ALIGNED(end));
- VM_BUG_ON_VMA(start < vma->vm_start, vma);
- VM_BUG_ON_VMA(end > vma->vm_end, vma);
mmap_assert_locked(mm);
/*
@@ -1725,19 +1741,13 @@ long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start,
* a poisoned page.
* !FOLL_FORCE: Require proper access permissions.
*/
- gup_flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_UNLOCKABLE;
+ gup_flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_UNLOCKABLE |
+ FOLL_MADV_POPULATE;
if (write)
gup_flags |= FOLL_WRITE;
- /*
- * We want to report -EINVAL instead of -EFAULT for any permission
- * problems or incompatible mappings.
- */
- if (check_vma_flags(vma, gup_flags))
- return -EINVAL;
-
- ret = __get_user_pages(mm, start, nr_pages, gup_flags,
- NULL, locked);
+ ret = __get_user_pages_locked(mm, start, nr_pages, NULL, locked,
+ gup_flags);
lru_add_drain();
return ret;
}
diff --git a/mm/internal.h b/mm/internal.h
index 7e486f2c502c..07ad2675a88b 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -686,9 +686,8 @@ struct anon_vma *folio_anon_vma(struct folio *folio);
void unmap_mapping_folio(struct folio *folio);
extern long populate_vma_page_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end, int *locked);
-extern long faultin_vma_page_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end,
- bool write, int *locked);
+extern long faultin_page_range(struct mm_struct *mm, unsigned long start,
+ unsigned long end, bool write, int *locked);
extern bool mlock_future_ok(struct mm_struct *mm, unsigned long flags,
unsigned long bytes);
@@ -1127,10 +1126,13 @@ enum {
FOLL_FAST_ONLY = 1 << 20,
/* allow unlocking the mmap lock */
FOLL_UNLOCKABLE = 1 << 21,
+ /* VMA lookup+checks compatible with MADV_POPULATE_(READ|WRITE) */
+ FOLL_MADV_POPULATE = 1 << 22,
};
#define INTERNAL_GUP_FLAGS (FOLL_TOUCH | FOLL_TRIED | FOLL_REMOTE | FOLL_PIN | \
- FOLL_FAST_ONLY | FOLL_UNLOCKABLE)
+ FOLL_FAST_ONLY | FOLL_UNLOCKABLE | \
+ FOLL_MADV_POPULATE)
/*
* Indicates for which pages that are write-protected in the page table,
diff --git a/mm/madvise.c b/mm/madvise.c
index 44a498c94158..1a073fcc4c0c 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -908,27 +908,14 @@ static long madvise_populate(struct vm_area_struct *vma,
{
const bool write = behavior == MADV_POPULATE_WRITE;
struct mm_struct *mm = vma->vm_mm;
- unsigned long tmp_end;
int locked = 1;
long pages;
*prev = vma;
while (start < end) {
- /*
- * We might have temporarily dropped the lock. For example,
- * our VMA might have been split.
- */
- if (!vma || start >= vma->vm_end) {
- vma = vma_lookup(mm, start);
- if (!vma)
- return -ENOMEM;
- }
-
- tmp_end = min_t(unsigned long, end, vma->vm_end);
/* Populate (prefault) page tables readable/writable. */
- pages = faultin_vma_page_range(vma, start, tmp_end, write,
- &locked);
+ pages = faultin_page_range(mm, start, end, write, &locked);
if (!locked) {
mmap_read_lock(mm);
locked = 1;
@@ -949,7 +936,7 @@ static long madvise_populate(struct vm_area_struct *vma,
pr_warn_once("%s: unhandled return value: %ld\n",
__func__, pages);
fallthrough;
- case -ENOMEM:
+ case -ENOMEM: /* No VMA or out of memory. */
return -ENOMEM;
}
}
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 631426ba1d45a8672b177ee85ad4cabe760dd131
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024042336-tinfoil-trusting-765e@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
631426ba1d45 ("mm/madvise: make MADV_POPULATE_(READ|WRITE) handle VM_FAULT_RETRY properly")
0f20bba1688b ("mm/gup: explicitly define and check internal GUP flags, disallow FOLL_TOUCH")
6cd06ab12d1a ("gup: make the stack expansion warning a bit more targeted")
9471f1f2f502 ("Merge branch 'expand-stack'")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 631426ba1d45a8672b177ee85ad4cabe760dd131 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david(a)redhat.com>
Date: Thu, 14 Mar 2024 17:12:59 +0100
Subject: [PATCH] mm/madvise: make MADV_POPULATE_(READ|WRITE) handle
VM_FAULT_RETRY properly
Darrick reports that in some cases where pread() would fail with -EIO and
mmap()+access would generate a SIGBUS signal, MADV_POPULATE_READ /
MADV_POPULATE_WRITE will keep retrying forever and not fail with -EFAULT.
While the madvise() call can be interrupted by a signal, this is not the
desired behavior. MADV_POPULATE_READ / MADV_POPULATE_WRITE should behave
like page faults in that case: fail and not retry forever.
A reproducer can be found at [1].
The reason is that __get_user_pages(), as called by
faultin_vma_page_range(), will not handle VM_FAULT_RETRY in a proper way:
it will simply return 0 when VM_FAULT_RETRY happened, making
madvise_populate()->faultin_vma_page_range() retry again and again, never
setting FOLL_TRIED->FAULT_FLAG_TRIED for __get_user_pages().
__get_user_pages_locked() does what we want, but duplicating that logic in
faultin_vma_page_range() feels wrong.
So let's use __get_user_pages_locked() instead, that will detect
VM_FAULT_RETRY and set FOLL_TRIED when retrying, making the fault handler
return VM_FAULT_SIGBUS (VM_FAULT_ERROR) at some point, propagating -EFAULT
from faultin_page() to __get_user_pages(), all the way to
madvise_populate().
But, there is an issue: __get_user_pages_locked() will end up re-taking
the MM lock and then __get_user_pages() will do another VMA lookup. In
the meantime, the VMA layout could have changed and we'd fail with
different error codes than we'd want to.
As __get_user_pages() will currently do a new VMA lookup either way, let
it do the VMA handling in a different way, controlled by a new
FOLL_MADV_POPULATE flag, effectively moving these checks from
madvise_populate() + faultin_page_range() in there.
With this change, Darricks reproducer properly fails with -EFAULT, as
documented for MADV_POPULATE_READ / MADV_POPULATE_WRITE.
[1] https://lore.kernel.org/all/20240313171936.GN1927156@frogsfrogsfrogs/
Link: https://lkml.kernel.org/r/20240314161300.382526-1-david@redhat.com
Link: https://lkml.kernel.org/r/20240314161300.382526-2-david@redhat.com
Fixes: 4ca9b3859dac ("mm/madvise: introduce MADV_POPULATE_(READ|WRITE) to prefault page tables")
Signed-off-by: David Hildenbrand <david(a)redhat.com>
Reported-by: Darrick J. Wong <djwong(a)kernel.org>
Closes: https://lore.kernel.org/all/20240311223815.GW1927156@frogsfrogsfrogs/
Cc: Darrick J. Wong <djwong(a)kernel.org>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Jason Gunthorpe <jgg(a)nvidia.com>
Cc: John Hubbard <jhubbard(a)nvidia.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/gup.c b/mm/gup.c
index af8edadc05d1..1611e73b1121 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1206,6 +1206,22 @@ static long __get_user_pages(struct mm_struct *mm,
/* first iteration or cross vma bound */
if (!vma || start >= vma->vm_end) {
+ /*
+ * MADV_POPULATE_(READ|WRITE) wants to handle VMA
+ * lookups+error reporting differently.
+ */
+ if (gup_flags & FOLL_MADV_POPULATE) {
+ vma = vma_lookup(mm, start);
+ if (!vma) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ if (check_vma_flags(vma, gup_flags)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ goto retry;
+ }
vma = gup_vma_lookup(mm, start);
if (!vma && in_gate_area(mm, start)) {
ret = get_gate_page(mm, start & PAGE_MASK,
@@ -1685,35 +1701,35 @@ long populate_vma_page_range(struct vm_area_struct *vma,
}
/*
- * faultin_vma_page_range() - populate (prefault) page tables inside the
- * given VMA range readable/writable
+ * faultin_page_range() - populate (prefault) page tables inside the
+ * given range readable/writable
*
* This takes care of mlocking the pages, too, if VM_LOCKED is set.
*
- * @vma: target vma
+ * @mm: the mm to populate page tables in
* @start: start address
* @end: end address
* @write: whether to prefault readable or writable
* @locked: whether the mmap_lock is still held
*
- * Returns either number of processed pages in the vma, or a negative error
- * code on error (see __get_user_pages()).
+ * Returns either number of processed pages in the MM, or a negative error
+ * code on error (see __get_user_pages()). Note that this function reports
+ * errors related to VMAs, such as incompatible mappings, as expected by
+ * MADV_POPULATE_(READ|WRITE).
*
- * vma->vm_mm->mmap_lock must be held. The range must be page-aligned and
- * covered by the VMA. If it's released, *@locked will be set to 0.
+ * The range must be page-aligned.
+ *
+ * mm->mmap_lock must be held. If it's released, *@locked will be set to 0.
*/
-long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start,
- unsigned long end, bool write, int *locked)
+long faultin_page_range(struct mm_struct *mm, unsigned long start,
+ unsigned long end, bool write, int *locked)
{
- struct mm_struct *mm = vma->vm_mm;
unsigned long nr_pages = (end - start) / PAGE_SIZE;
int gup_flags;
long ret;
VM_BUG_ON(!PAGE_ALIGNED(start));
VM_BUG_ON(!PAGE_ALIGNED(end));
- VM_BUG_ON_VMA(start < vma->vm_start, vma);
- VM_BUG_ON_VMA(end > vma->vm_end, vma);
mmap_assert_locked(mm);
/*
@@ -1725,19 +1741,13 @@ long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start,
* a poisoned page.
* !FOLL_FORCE: Require proper access permissions.
*/
- gup_flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_UNLOCKABLE;
+ gup_flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_UNLOCKABLE |
+ FOLL_MADV_POPULATE;
if (write)
gup_flags |= FOLL_WRITE;
- /*
- * We want to report -EINVAL instead of -EFAULT for any permission
- * problems or incompatible mappings.
- */
- if (check_vma_flags(vma, gup_flags))
- return -EINVAL;
-
- ret = __get_user_pages(mm, start, nr_pages, gup_flags,
- NULL, locked);
+ ret = __get_user_pages_locked(mm, start, nr_pages, NULL, locked,
+ gup_flags);
lru_add_drain();
return ret;
}
diff --git a/mm/internal.h b/mm/internal.h
index 7e486f2c502c..07ad2675a88b 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -686,9 +686,8 @@ struct anon_vma *folio_anon_vma(struct folio *folio);
void unmap_mapping_folio(struct folio *folio);
extern long populate_vma_page_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end, int *locked);
-extern long faultin_vma_page_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end,
- bool write, int *locked);
+extern long faultin_page_range(struct mm_struct *mm, unsigned long start,
+ unsigned long end, bool write, int *locked);
extern bool mlock_future_ok(struct mm_struct *mm, unsigned long flags,
unsigned long bytes);
@@ -1127,10 +1126,13 @@ enum {
FOLL_FAST_ONLY = 1 << 20,
/* allow unlocking the mmap lock */
FOLL_UNLOCKABLE = 1 << 21,
+ /* VMA lookup+checks compatible with MADV_POPULATE_(READ|WRITE) */
+ FOLL_MADV_POPULATE = 1 << 22,
};
#define INTERNAL_GUP_FLAGS (FOLL_TOUCH | FOLL_TRIED | FOLL_REMOTE | FOLL_PIN | \
- FOLL_FAST_ONLY | FOLL_UNLOCKABLE)
+ FOLL_FAST_ONLY | FOLL_UNLOCKABLE | \
+ FOLL_MADV_POPULATE)
/*
* Indicates for which pages that are write-protected in the page table,
diff --git a/mm/madvise.c b/mm/madvise.c
index 44a498c94158..1a073fcc4c0c 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -908,27 +908,14 @@ static long madvise_populate(struct vm_area_struct *vma,
{
const bool write = behavior == MADV_POPULATE_WRITE;
struct mm_struct *mm = vma->vm_mm;
- unsigned long tmp_end;
int locked = 1;
long pages;
*prev = vma;
while (start < end) {
- /*
- * We might have temporarily dropped the lock. For example,
- * our VMA might have been split.
- */
- if (!vma || start >= vma->vm_end) {
- vma = vma_lookup(mm, start);
- if (!vma)
- return -ENOMEM;
- }
-
- tmp_end = min_t(unsigned long, end, vma->vm_end);
/* Populate (prefault) page tables readable/writable. */
- pages = faultin_vma_page_range(vma, start, tmp_end, write,
- &locked);
+ pages = faultin_page_range(mm, start, end, write, &locked);
if (!locked) {
mmap_read_lock(mm);
locked = 1;
@@ -949,7 +936,7 @@ static long madvise_populate(struct vm_area_struct *vma,
pr_warn_once("%s: unhandled return value: %ld\n",
__func__, pages);
fallthrough;
- case -ENOMEM:
+ case -ENOMEM: /* No VMA or out of memory. */
return -ENOMEM;
}
}