The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x c7f49dadfcdf27e1f747442e874e9baa52ab7674
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025082104-whooping-armband-6944@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c7f49dadfcdf27e1f747442e874e9baa52ab7674 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers(a)kernel.org>
Date: Tue, 8 Jul 2025 12:38:28 -0700
Subject: [PATCH] crypto: x86/aegis - Fix sleeping when disallowed on
PREEMPT_RT
skcipher_walk_done() can call kfree(), which takes a spinlock, which
makes it incorrect to call while preemption is disabled on PREEMPT_RT.
Therefore, end the kernel-mode FPU section before calling
skcipher_walk_done(), and restart it afterwards.
Moreover, pass atomic=false to skcipher_walk_aead_encrypt() instead of
atomic=true. The point of atomic=true was to make skcipher_walk_done()
safe to call while in a kernel-mode FPU section, but that does not
actually work. So just use the usual atomic=false.
Fixes: 1d373d4e8e15 ("crypto: x86 - Add optimized AEGIS implementations")
Cc: stable(a)vger.kernel.org
Signed-off-by: Eric Biggers <ebiggers(a)kernel.org>
Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au>
diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c
index f1b6d40154e3..3cb5c193038b 100644
--- a/arch/x86/crypto/aegis128-aesni-glue.c
+++ b/arch/x86/crypto/aegis128-aesni-glue.c
@@ -119,7 +119,9 @@ crypto_aegis128_aesni_process_crypt(struct aegis_state *state,
walk->dst.virt.addr,
round_down(walk->nbytes,
AEGIS128_BLOCK_SIZE));
+ kernel_fpu_end();
skcipher_walk_done(walk, walk->nbytes % AEGIS128_BLOCK_SIZE);
+ kernel_fpu_begin();
}
if (walk->nbytes) {
@@ -131,7 +133,9 @@ crypto_aegis128_aesni_process_crypt(struct aegis_state *state,
aegis128_aesni_dec_tail(state, walk->src.virt.addr,
walk->dst.virt.addr,
walk->nbytes);
+ kernel_fpu_end();
skcipher_walk_done(walk, 0);
+ kernel_fpu_begin();
}
}
@@ -176,9 +180,9 @@ crypto_aegis128_aesni_crypt(struct aead_request *req,
struct aegis_state state;
if (enc)
- skcipher_walk_aead_encrypt(&walk, req, true);
+ skcipher_walk_aead_encrypt(&walk, req, false);
else
- skcipher_walk_aead_decrypt(&walk, req, true);
+ skcipher_walk_aead_decrypt(&walk, req, false);
kernel_fpu_begin();
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x c7f49dadfcdf27e1f747442e874e9baa52ab7674
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025082104-syrup-acquire-04a4@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c7f49dadfcdf27e1f747442e874e9baa52ab7674 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers(a)kernel.org>
Date: Tue, 8 Jul 2025 12:38:28 -0700
Subject: [PATCH] crypto: x86/aegis - Fix sleeping when disallowed on
PREEMPT_RT
skcipher_walk_done() can call kfree(), which takes a spinlock, which
makes it incorrect to call while preemption is disabled on PREEMPT_RT.
Therefore, end the kernel-mode FPU section before calling
skcipher_walk_done(), and restart it afterwards.
Moreover, pass atomic=false to skcipher_walk_aead_encrypt() instead of
atomic=true. The point of atomic=true was to make skcipher_walk_done()
safe to call while in a kernel-mode FPU section, but that does not
actually work. So just use the usual atomic=false.
Fixes: 1d373d4e8e15 ("crypto: x86 - Add optimized AEGIS implementations")
Cc: stable(a)vger.kernel.org
Signed-off-by: Eric Biggers <ebiggers(a)kernel.org>
Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au>
diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c
index f1b6d40154e3..3cb5c193038b 100644
--- a/arch/x86/crypto/aegis128-aesni-glue.c
+++ b/arch/x86/crypto/aegis128-aesni-glue.c
@@ -119,7 +119,9 @@ crypto_aegis128_aesni_process_crypt(struct aegis_state *state,
walk->dst.virt.addr,
round_down(walk->nbytes,
AEGIS128_BLOCK_SIZE));
+ kernel_fpu_end();
skcipher_walk_done(walk, walk->nbytes % AEGIS128_BLOCK_SIZE);
+ kernel_fpu_begin();
}
if (walk->nbytes) {
@@ -131,7 +133,9 @@ crypto_aegis128_aesni_process_crypt(struct aegis_state *state,
aegis128_aesni_dec_tail(state, walk->src.virt.addr,
walk->dst.virt.addr,
walk->nbytes);
+ kernel_fpu_end();
skcipher_walk_done(walk, 0);
+ kernel_fpu_begin();
}
}
@@ -176,9 +180,9 @@ crypto_aegis128_aesni_crypt(struct aead_request *req,
struct aegis_state state;
if (enc)
- skcipher_walk_aead_encrypt(&walk, req, true);
+ skcipher_walk_aead_encrypt(&walk, req, false);
else
- skcipher_walk_aead_decrypt(&walk, req, true);
+ skcipher_walk_aead_decrypt(&walk, req, false);
kernel_fpu_begin();
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x c7f49dadfcdf27e1f747442e874e9baa52ab7674
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025082103-chamomile-hesitant-52e3@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c7f49dadfcdf27e1f747442e874e9baa52ab7674 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers(a)kernel.org>
Date: Tue, 8 Jul 2025 12:38:28 -0700
Subject: [PATCH] crypto: x86/aegis - Fix sleeping when disallowed on
PREEMPT_RT
skcipher_walk_done() can call kfree(), which takes a spinlock, which
makes it incorrect to call while preemption is disabled on PREEMPT_RT.
Therefore, end the kernel-mode FPU section before calling
skcipher_walk_done(), and restart it afterwards.
Moreover, pass atomic=false to skcipher_walk_aead_encrypt() instead of
atomic=true. The point of atomic=true was to make skcipher_walk_done()
safe to call while in a kernel-mode FPU section, but that does not
actually work. So just use the usual atomic=false.
Fixes: 1d373d4e8e15 ("crypto: x86 - Add optimized AEGIS implementations")
Cc: stable(a)vger.kernel.org
Signed-off-by: Eric Biggers <ebiggers(a)kernel.org>
Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au>
diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c
index f1b6d40154e3..3cb5c193038b 100644
--- a/arch/x86/crypto/aegis128-aesni-glue.c
+++ b/arch/x86/crypto/aegis128-aesni-glue.c
@@ -119,7 +119,9 @@ crypto_aegis128_aesni_process_crypt(struct aegis_state *state,
walk->dst.virt.addr,
round_down(walk->nbytes,
AEGIS128_BLOCK_SIZE));
+ kernel_fpu_end();
skcipher_walk_done(walk, walk->nbytes % AEGIS128_BLOCK_SIZE);
+ kernel_fpu_begin();
}
if (walk->nbytes) {
@@ -131,7 +133,9 @@ crypto_aegis128_aesni_process_crypt(struct aegis_state *state,
aegis128_aesni_dec_tail(state, walk->src.virt.addr,
walk->dst.virt.addr,
walk->nbytes);
+ kernel_fpu_end();
skcipher_walk_done(walk, 0);
+ kernel_fpu_begin();
}
}
@@ -176,9 +180,9 @@ crypto_aegis128_aesni_crypt(struct aead_request *req,
struct aegis_state state;
if (enc)
- skcipher_walk_aead_encrypt(&walk, req, true);
+ skcipher_walk_aead_encrypt(&walk, req, false);
else
- skcipher_walk_aead_decrypt(&walk, req, true);
+ skcipher_walk_aead_decrypt(&walk, req, false);
kernel_fpu_begin();
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 5bd398e20f0833ae8a1267d4f343591a2dd20185
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025082100-snowiness-profanity-df3a@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5bd398e20f0833ae8a1267d4f343591a2dd20185 Mon Sep 17 00:00:00 2001
From: Youssef Samir <quic_yabdulra(a)quicinc.com>
Date: Mon, 14 Jul 2025 18:30:39 +0200
Subject: [PATCH] bus: mhi: host: Detect events pointing to unexpected TREs
When a remote device sends a completion event to the host, it contains a
pointer to the consumed TRE. The host uses this pointer to process all of
the TREs between it and the host's local copy of the ring's read pointer.
This works when processing completion for chained transactions, but can
lead to nasty results if the device sends an event for a single-element
transaction with a read pointer that is multiple elements ahead of the
host's read pointer.
For instance, if the host accesses an event ring while the device is
updating it, the pointer inside of the event might still point to an old
TRE. If the host uses the channel's xfer_cb() to directly free the buffer
pointed to by the TRE, the buffer will be double-freed.
This behavior was observed on an ep that used upstream EP stack without
'commit 6f18d174b73d ("bus: mhi: ep: Update read pointer only after buffer
is written")'. Where the device updated the events ring pointer before
updating the event contents, so it left a window where the host was able to
access the stale data the event pointed to, before the device had the
chance to update them. The usual pattern was that the host received an
event pointing to a TRE that is not immediately after the last processed
one, so it got treated as if it was a chained transaction, processing all
of the TREs in between the two read pointers.
This commit aims to harden the host by ensuring transactions where the
event points to a TRE that isn't local_rp + 1 are chained.
Fixes: 1d3173a3bae7 ("bus: mhi: core: Add support for processing events from client device")
Signed-off-by: Youssef Samir <quic_yabdulra(a)quicinc.com>
[mani: added stable tag and reworded commit message]
Signed-off-by: Manivannan Sadhasivam <mani(a)kernel.org>
Reviewed-by: Jeff Hugo <jeff.hugo(a)oss.qualcomm.com>
Cc: stable(a)vger.kernel.org
Link: https://patch.msgid.link/20250714163039.3438985-1-quic_yabdulra@quicinc.com
diff --git a/drivers/bus/mhi/host/main.c b/drivers/bus/mhi/host/main.c
index 3041ee6747e3..52bef663e182 100644
--- a/drivers/bus/mhi/host/main.c
+++ b/drivers/bus/mhi/host/main.c
@@ -602,7 +602,7 @@ static int parse_xfer_event(struct mhi_controller *mhi_cntrl,
{
dma_addr_t ptr = MHI_TRE_GET_EV_PTR(event);
struct mhi_ring_element *local_rp, *ev_tre;
- void *dev_rp;
+ void *dev_rp, *next_rp;
struct mhi_buf_info *buf_info;
u16 xfer_len;
@@ -621,6 +621,16 @@ static int parse_xfer_event(struct mhi_controller *mhi_cntrl,
result.dir = mhi_chan->dir;
local_rp = tre_ring->rp;
+
+ next_rp = local_rp + 1;
+ if (next_rp >= tre_ring->base + tre_ring->len)
+ next_rp = tre_ring->base;
+ if (dev_rp != next_rp && !MHI_TRE_DATA_GET_CHAIN(local_rp)) {
+ dev_err(&mhi_cntrl->mhi_dev->dev,
+ "Event element points to an unexpected TRE\n");
+ break;
+ }
+
while (local_rp != dev_rp) {
buf_info = buf_ring->rp;
/* If it's the last TRE, get length from the event */
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 51888393cc64dd0462d0b96c13ab94873abbc030
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025082130-duchess-reflux-c692@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 51888393cc64dd0462d0b96c13ab94873abbc030 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki(a)intel.com>
Date: Wed, 9 Jul 2025 12:41:45 +0200
Subject: [PATCH] PM: runtime: Take active children into account in
pm_runtime_get_if_in_use()
For all practical purposes, there is no difference between the situation
in which a given device is not ignoring children and its active child
count is nonzero and the situation in which its runtime PM usage counter
is nonzero. However, pm_runtime_get_if_in_use() will only increment the
device's usage counter and return 1 in the latter case.
For consistency, make it do so in the former case either by adjusting
pm_runtime_get_conditional() and update the related kerneldoc comments
accordingly.
Fixes: c111566bea7c ("PM: runtime: Add pm_runtime_get_if_active()")
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Reviewed-by: Ulf Hansson <ulf.hansson(a)linaro.org>
Reviewed-by: Sakari Ailus <sakari.ailus(a)linux.intel.com>
Cc: 5.10+ <stable(a)vger.kernel.org> # 5.10+: c0ef3df8dbae: PM: runtime: Simplify pm_runtime_get_if_active() usage
Cc: 5.10+ <stable(a)vger.kernel.org> # 5.10+
Link: https://patch.msgid.link/12700973.O9o76ZdvQC@rjwysocki.net
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index c55a7c70bc1a..2ba0dfd1de5a 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1191,10 +1191,12 @@ EXPORT_SYMBOL_GPL(__pm_runtime_resume);
*
* Return -EINVAL if runtime PM is disabled for @dev.
*
- * Otherwise, if the runtime PM status of @dev is %RPM_ACTIVE and either
- * @ign_usage_count is %true or the runtime PM usage counter of @dev is not
- * zero, increment the usage counter of @dev and return 1. Otherwise, return 0
- * without changing the usage counter.
+ * Otherwise, if its runtime PM status is %RPM_ACTIVE and (1) @ign_usage_count
+ * is set, or (2) @dev is not ignoring children and its active child count is
+ * nonero, or (3) the runtime PM usage counter of @dev is not zero, increment
+ * the usage counter of @dev and return 1.
+ *
+ * Otherwise, return 0 without changing the usage counter.
*
* If @ign_usage_count is %true, this function can be used to prevent suspending
* the device when its runtime PM status is %RPM_ACTIVE.
@@ -1216,7 +1218,8 @@ static int pm_runtime_get_conditional(struct device *dev, bool ign_usage_count)
retval = -EINVAL;
} else if (dev->power.runtime_status != RPM_ACTIVE) {
retval = 0;
- } else if (ign_usage_count) {
+ } else if (ign_usage_count || (!dev->power.ignore_children &&
+ atomic_read(&dev->power.child_count) > 0)) {
retval = 1;
atomic_inc(&dev->power.usage_count);
} else {
@@ -1249,10 +1252,16 @@ EXPORT_SYMBOL_GPL(pm_runtime_get_if_active);
* @dev: Target device.
*
* Increment the runtime PM usage counter of @dev if its runtime PM status is
- * %RPM_ACTIVE and its runtime PM usage counter is greater than 0, in which case
- * it returns 1. If the device is in a different state or its usage_count is 0,
- * 0 is returned. -EINVAL is returned if runtime PM is disabled for the device,
- * in which case also the usage_count will remain unmodified.
+ * %RPM_ACTIVE and its runtime PM usage counter is greater than 0 or it is not
+ * ignoring children and its active child count is nonzero. 1 is returned in
+ * this case.
+ *
+ * If @dev is in a different state or it is not in use (that is, its usage
+ * counter is 0, or it is ignoring children, or its active child count is 0),
+ * 0 is returned.
+ *
+ * -EINVAL is returned if runtime PM is disabled for the device, in which case
+ * also the usage counter of @dev is not updated.
*/
int pm_runtime_get_if_in_use(struct device *dev)
{
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 51888393cc64dd0462d0b96c13ab94873abbc030
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025082130-droop-update-8564@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 51888393cc64dd0462d0b96c13ab94873abbc030 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki(a)intel.com>
Date: Wed, 9 Jul 2025 12:41:45 +0200
Subject: [PATCH] PM: runtime: Take active children into account in
pm_runtime_get_if_in_use()
For all practical purposes, there is no difference between the situation
in which a given device is not ignoring children and its active child
count is nonzero and the situation in which its runtime PM usage counter
is nonzero. However, pm_runtime_get_if_in_use() will only increment the
device's usage counter and return 1 in the latter case.
For consistency, make it do so in the former case either by adjusting
pm_runtime_get_conditional() and update the related kerneldoc comments
accordingly.
Fixes: c111566bea7c ("PM: runtime: Add pm_runtime_get_if_active()")
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Reviewed-by: Ulf Hansson <ulf.hansson(a)linaro.org>
Reviewed-by: Sakari Ailus <sakari.ailus(a)linux.intel.com>
Cc: 5.10+ <stable(a)vger.kernel.org> # 5.10+: c0ef3df8dbae: PM: runtime: Simplify pm_runtime_get_if_active() usage
Cc: 5.10+ <stable(a)vger.kernel.org> # 5.10+
Link: https://patch.msgid.link/12700973.O9o76ZdvQC@rjwysocki.net
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index c55a7c70bc1a..2ba0dfd1de5a 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1191,10 +1191,12 @@ EXPORT_SYMBOL_GPL(__pm_runtime_resume);
*
* Return -EINVAL if runtime PM is disabled for @dev.
*
- * Otherwise, if the runtime PM status of @dev is %RPM_ACTIVE and either
- * @ign_usage_count is %true or the runtime PM usage counter of @dev is not
- * zero, increment the usage counter of @dev and return 1. Otherwise, return 0
- * without changing the usage counter.
+ * Otherwise, if its runtime PM status is %RPM_ACTIVE and (1) @ign_usage_count
+ * is set, or (2) @dev is not ignoring children and its active child count is
+ * nonero, or (3) the runtime PM usage counter of @dev is not zero, increment
+ * the usage counter of @dev and return 1.
+ *
+ * Otherwise, return 0 without changing the usage counter.
*
* If @ign_usage_count is %true, this function can be used to prevent suspending
* the device when its runtime PM status is %RPM_ACTIVE.
@@ -1216,7 +1218,8 @@ static int pm_runtime_get_conditional(struct device *dev, bool ign_usage_count)
retval = -EINVAL;
} else if (dev->power.runtime_status != RPM_ACTIVE) {
retval = 0;
- } else if (ign_usage_count) {
+ } else if (ign_usage_count || (!dev->power.ignore_children &&
+ atomic_read(&dev->power.child_count) > 0)) {
retval = 1;
atomic_inc(&dev->power.usage_count);
} else {
@@ -1249,10 +1252,16 @@ EXPORT_SYMBOL_GPL(pm_runtime_get_if_active);
* @dev: Target device.
*
* Increment the runtime PM usage counter of @dev if its runtime PM status is
- * %RPM_ACTIVE and its runtime PM usage counter is greater than 0, in which case
- * it returns 1. If the device is in a different state or its usage_count is 0,
- * 0 is returned. -EINVAL is returned if runtime PM is disabled for the device,
- * in which case also the usage_count will remain unmodified.
+ * %RPM_ACTIVE and its runtime PM usage counter is greater than 0 or it is not
+ * ignoring children and its active child count is nonzero. 1 is returned in
+ * this case.
+ *
+ * If @dev is in a different state or it is not in use (that is, its usage
+ * counter is 0, or it is ignoring children, or its active child count is 0),
+ * 0 is returned.
+ *
+ * -EINVAL is returned if runtime PM is disabled for the device, in which case
+ * also the usage counter of @dev is not updated.
*/
int pm_runtime_get_if_in_use(struct device *dev)
{
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 51888393cc64dd0462d0b96c13ab94873abbc030
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025082129-outdoors-semantic-147a@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 51888393cc64dd0462d0b96c13ab94873abbc030 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki(a)intel.com>
Date: Wed, 9 Jul 2025 12:41:45 +0200
Subject: [PATCH] PM: runtime: Take active children into account in
pm_runtime_get_if_in_use()
For all practical purposes, there is no difference between the situation
in which a given device is not ignoring children and its active child
count is nonzero and the situation in which its runtime PM usage counter
is nonzero. However, pm_runtime_get_if_in_use() will only increment the
device's usage counter and return 1 in the latter case.
For consistency, make it do so in the former case either by adjusting
pm_runtime_get_conditional() and update the related kerneldoc comments
accordingly.
Fixes: c111566bea7c ("PM: runtime: Add pm_runtime_get_if_active()")
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Reviewed-by: Ulf Hansson <ulf.hansson(a)linaro.org>
Reviewed-by: Sakari Ailus <sakari.ailus(a)linux.intel.com>
Cc: 5.10+ <stable(a)vger.kernel.org> # 5.10+: c0ef3df8dbae: PM: runtime: Simplify pm_runtime_get_if_active() usage
Cc: 5.10+ <stable(a)vger.kernel.org> # 5.10+
Link: https://patch.msgid.link/12700973.O9o76ZdvQC@rjwysocki.net
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index c55a7c70bc1a..2ba0dfd1de5a 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1191,10 +1191,12 @@ EXPORT_SYMBOL_GPL(__pm_runtime_resume);
*
* Return -EINVAL if runtime PM is disabled for @dev.
*
- * Otherwise, if the runtime PM status of @dev is %RPM_ACTIVE and either
- * @ign_usage_count is %true or the runtime PM usage counter of @dev is not
- * zero, increment the usage counter of @dev and return 1. Otherwise, return 0
- * without changing the usage counter.
+ * Otherwise, if its runtime PM status is %RPM_ACTIVE and (1) @ign_usage_count
+ * is set, or (2) @dev is not ignoring children and its active child count is
+ * nonero, or (3) the runtime PM usage counter of @dev is not zero, increment
+ * the usage counter of @dev and return 1.
+ *
+ * Otherwise, return 0 without changing the usage counter.
*
* If @ign_usage_count is %true, this function can be used to prevent suspending
* the device when its runtime PM status is %RPM_ACTIVE.
@@ -1216,7 +1218,8 @@ static int pm_runtime_get_conditional(struct device *dev, bool ign_usage_count)
retval = -EINVAL;
} else if (dev->power.runtime_status != RPM_ACTIVE) {
retval = 0;
- } else if (ign_usage_count) {
+ } else if (ign_usage_count || (!dev->power.ignore_children &&
+ atomic_read(&dev->power.child_count) > 0)) {
retval = 1;
atomic_inc(&dev->power.usage_count);
} else {
@@ -1249,10 +1252,16 @@ EXPORT_SYMBOL_GPL(pm_runtime_get_if_active);
* @dev: Target device.
*
* Increment the runtime PM usage counter of @dev if its runtime PM status is
- * %RPM_ACTIVE and its runtime PM usage counter is greater than 0, in which case
- * it returns 1. If the device is in a different state or its usage_count is 0,
- * 0 is returned. -EINVAL is returned if runtime PM is disabled for the device,
- * in which case also the usage_count will remain unmodified.
+ * %RPM_ACTIVE and its runtime PM usage counter is greater than 0 or it is not
+ * ignoring children and its active child count is nonzero. 1 is returned in
+ * this case.
+ *
+ * If @dev is in a different state or it is not in use (that is, its usage
+ * counter is 0, or it is ignoring children, or its active child count is 0),
+ * 0 is returned.
+ *
+ * -EINVAL is returned if runtime PM is disabled for the device, in which case
+ * also the usage counter of @dev is not updated.
*/
int pm_runtime_get_if_in_use(struct device *dev)
{
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 51888393cc64dd0462d0b96c13ab94873abbc030
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025082129-botany-headlamp-b026@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 51888393cc64dd0462d0b96c13ab94873abbc030 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki(a)intel.com>
Date: Wed, 9 Jul 2025 12:41:45 +0200
Subject: [PATCH] PM: runtime: Take active children into account in
pm_runtime_get_if_in_use()
For all practical purposes, there is no difference between the situation
in which a given device is not ignoring children and its active child
count is nonzero and the situation in which its runtime PM usage counter
is nonzero. However, pm_runtime_get_if_in_use() will only increment the
device's usage counter and return 1 in the latter case.
For consistency, make it do so in the former case either by adjusting
pm_runtime_get_conditional() and update the related kerneldoc comments
accordingly.
Fixes: c111566bea7c ("PM: runtime: Add pm_runtime_get_if_active()")
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Reviewed-by: Ulf Hansson <ulf.hansson(a)linaro.org>
Reviewed-by: Sakari Ailus <sakari.ailus(a)linux.intel.com>
Cc: 5.10+ <stable(a)vger.kernel.org> # 5.10+: c0ef3df8dbae: PM: runtime: Simplify pm_runtime_get_if_active() usage
Cc: 5.10+ <stable(a)vger.kernel.org> # 5.10+
Link: https://patch.msgid.link/12700973.O9o76ZdvQC@rjwysocki.net
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index c55a7c70bc1a..2ba0dfd1de5a 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1191,10 +1191,12 @@ EXPORT_SYMBOL_GPL(__pm_runtime_resume);
*
* Return -EINVAL if runtime PM is disabled for @dev.
*
- * Otherwise, if the runtime PM status of @dev is %RPM_ACTIVE and either
- * @ign_usage_count is %true or the runtime PM usage counter of @dev is not
- * zero, increment the usage counter of @dev and return 1. Otherwise, return 0
- * without changing the usage counter.
+ * Otherwise, if its runtime PM status is %RPM_ACTIVE and (1) @ign_usage_count
+ * is set, or (2) @dev is not ignoring children and its active child count is
+ * nonero, or (3) the runtime PM usage counter of @dev is not zero, increment
+ * the usage counter of @dev and return 1.
+ *
+ * Otherwise, return 0 without changing the usage counter.
*
* If @ign_usage_count is %true, this function can be used to prevent suspending
* the device when its runtime PM status is %RPM_ACTIVE.
@@ -1216,7 +1218,8 @@ static int pm_runtime_get_conditional(struct device *dev, bool ign_usage_count)
retval = -EINVAL;
} else if (dev->power.runtime_status != RPM_ACTIVE) {
retval = 0;
- } else if (ign_usage_count) {
+ } else if (ign_usage_count || (!dev->power.ignore_children &&
+ atomic_read(&dev->power.child_count) > 0)) {
retval = 1;
atomic_inc(&dev->power.usage_count);
} else {
@@ -1249,10 +1252,16 @@ EXPORT_SYMBOL_GPL(pm_runtime_get_if_active);
* @dev: Target device.
*
* Increment the runtime PM usage counter of @dev if its runtime PM status is
- * %RPM_ACTIVE and its runtime PM usage counter is greater than 0, in which case
- * it returns 1. If the device is in a different state or its usage_count is 0,
- * 0 is returned. -EINVAL is returned if runtime PM is disabled for the device,
- * in which case also the usage_count will remain unmodified.
+ * %RPM_ACTIVE and its runtime PM usage counter is greater than 0 or it is not
+ * ignoring children and its active child count is nonzero. 1 is returned in
+ * this case.
+ *
+ * If @dev is in a different state or it is not in use (that is, its usage
+ * counter is 0, or it is ignoring children, or its active child count is 0),
+ * 0 is returned.
+ *
+ * -EINVAL is returned if runtime PM is disabled for the device, in which case
+ * also the usage counter of @dev is not updated.
*/
int pm_runtime_get_if_in_use(struct device *dev)
{
Our syztester report the lockdep WARNING [1], which was identified in
stable kernel version 5.10. However, this deadlock path no longer exists
due to the refactoring of console_lock in v6.2-rc1 [2]. Coincidentally,
there are two types of deadlocks that we have found here. One is the ABBA
deadlock, as mentioned above [1], and the other is the AA deadlock was
reported by Breno [3]. The latter's deadlock issue persists.
To solve this problem, switch to printk_safe mode before printing warning
message, this will redirect all printk()-s to a special per-CPU buffer,
which will be flushed later from a safe context (irq work), and this
deadlock problem can be avoided. The proper API to use should be
printk_deferred_enter()/printk_deferred_exit() [4].
[1]
https://lore.kernel.org/all/20250730094914.566582-1-gubowen5@huawei.com/
[2]
https://lore.kernel.org/all/20221116162152.193147-1-john.ogness@linutronix.…
[3]
https://lore.kernel.org/all/20250731-kmemleak_lock-v1-1-728fd470198f@debian…
[4]
https://lore.kernel.org/all/5ca375cd-4a20-4807-b897-68b289626550@redhat.com/
====================
Signed-off-by: Gu Bowen <gubowen5(a)huawei.com>
---
mm/kmemleak.c | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 84265983f239..26113b89d09b 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -437,9 +437,15 @@ static struct kmemleak_object *__lookup_object(unsigned long ptr, int alias,
else if (untagged_objp == untagged_ptr || alias)
return object;
else {
+ /*
+ * Printk deferring due to the kmemleak_lock held.
+ * This is done to avoid deadlock.
+ */
+ printk_deferred_enter();
kmemleak_warn("Found object by alias at 0x%08lx\n",
ptr);
dump_object_info(object);
+ printk_deferred_exit();
break;
}
}
@@ -736,6 +742,11 @@ static int __link_object(struct kmemleak_object *object, unsigned long ptr,
else if (untagged_objp + parent->size <= untagged_ptr)
link = &parent->rb_node.rb_right;
else {
+ /*
+ * Printk deferring due to the kmemleak_lock held.
+ * This is done to avoid deadlock.
+ */
+ printk_deferred_enter();
kmemleak_stop("Cannot insert 0x%lx into the object search tree (overlaps existing)\n",
ptr);
/*
@@ -743,6 +754,7 @@ static int __link_object(struct kmemleak_object *object, unsigned long ptr,
* be freed while the kmemleak_lock is held.
*/
dump_object_info(parent);
+ printk_deferred_exit();
return -EEXIST;
}
}
@@ -858,8 +870,14 @@ static void delete_object_part(unsigned long ptr, size_t size,
object = __find_and_remove_object(ptr, 1, objflags);
if (!object) {
#ifdef DEBUG
+ /*
+ * Printk deferring due to the kmemleak_lock held.
+ * This is done to avoid deadlock.
+ */
+ printk_deferred_enter();
kmemleak_warn("Partially freeing unknown object at 0x%08lx (size %zu)\n",
ptr, size);
+ printk_deferred_exit();
#endif
goto unlock;
}
--
2.43.0
Add fixes to the CC contaminant/connection detection logic to improve
reliability and stability of the maxim tcpc driver. This patchset has
been tested on a PD Tester.
---
Changes in v2:
- Fix improperly formatted patch for stable inclusion. Tagged every
patch in patchset for stable.
- Link to v1: https://lore.kernel.org/r/20250814-fix-upstream-contaminant-v1-0-801ce80890…
---
Amit Sunil Dhamne (2):
usb: typec: maxim_contaminant: disable low power mode when reading comparator values
usb: typec: maxim_contaminant: re-enable cc toggle if cc is open and port is clean
drivers/usb/typec/tcpm/maxim_contaminant.c | 58 ++++++++++++++++++++++++++++++
drivers/usb/typec/tcpm/tcpci_maxim.h | 1 +
2 files changed, 59 insertions(+)
---
base-commit: 89be9a83ccf1f88522317ce02f854f30d6115c41
change-id: 20250802-fix-upstream-contaminant-16910e2762ca
Best regards,
--
Amit Sunil Dhamne <amitsd(a)google.com>
The quilt patch titled
Subject: mm/damon/core: set quota->charged_from to jiffies at first charge window
has been removed from the -mm tree. Its filename was
mm-damon-core-set-quota-charged_from-to-jiffies-at-first-charge-window.patch
This patch was dropped because an updated version will be issued
------------------------------------------------------
From: Sang-Heon Jeon <ekffu200098(a)gmail.com>
Subject: mm/damon/core: set quota->charged_from to jiffies at first charge window
Date: Wed, 20 Aug 2025 00:01:23 +0900
Kernel initializes "jiffies" timer as 5 minutes below zero, as shown in
include/linux/jiffies.h
/*
* Have the 32 bit jiffies value wrap 5 minutes after boot
* so jiffies wrap bugs show up earlier.
*/
#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ))
And they cast unsigned value to signed to cover wraparound
#define time_after_eq(a,b) \
(typecheck(unsigned long, a) && \
typecheck(unsigned long, b) && \
((long)((a) - (b)) >= 0))
In 64bit systems, these might not be a problem because wrapround occurs
300 million years after the boot, assuming HZ value is 1000.
With same assuming, In 32bit system, wraparound occurs 5 minutues after
the initial boot and every 49 days after the first wraparound. And about
25 days after first wraparound, it continues quota charging window up to
next 25 days.
Example 1: initial boot
jiffies=0xFFFB6C20, charged_from+interval=0x000003E8
time_after_eq(jiffies, charged_from+interval)=(long)0xFFFB6838; In
signed values, it is considered negative so it is false.
Example 2: after about 25 days first wraparound
jiffies=0x800004E8, charged_from+interval=0x000003E8
time_after_eq(jiffies, charged_from+interval)=(long)0x80000100; In
signed values, it is considered negative so it is false
So, change quota->charged_from to jiffies at damos_adjust_quota() when
it is consider first charge window.
In theory; but almost impossible; quota->total_charged_sz and
qutoa->charged_from should be both zero even if it is not in first
charge window. But It will only delay one reset_interval, So it is not
big problem.
Link: https://lkml.kernel.org/r/20250819150123.1532458-1-ekffu200098@gmail.com
Fixes: 2b8a248d5873 ("mm/damon/schemes: implement size quota for schemes application speed control") [5.16]
Signed-off-by: Sang-Heon Jeon <ekffu200098(a)gmail.com>
Reviewed-by: SeongJae Park <sj(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/damon/core.c | 4 ++++
1 file changed, 4 insertions(+)
--- a/mm/damon/core.c~mm-damon-core-set-quota-charged_from-to-jiffies-at-first-charge-window
+++ a/mm/damon/core.c
@@ -2111,6 +2111,10 @@ static void damos_adjust_quota(struct da
if (!quota->ms && !quota->sz && list_empty("a->goals))
return;
+ /* First charge window */
+ if (!quota->total_charged_sz && !quota->charged_from)
+ quota->charged_from = jiffies;
+
/* New charge window starts */
if (time_after_eq(jiffies, quota->charged_from +
msecs_to_jiffies(quota->reset_interval))) {
_
Patches currently in -mm which might be from ekffu200098(a)gmail.com are
mm-damon-update-expired-description-of-damos_action.patch
docs-mm-damon-design-fix-typo-s-sz_trtied-sz_tried.patch
selftests-damon-test-no-op-commit-broke-damon-status.patch
selftests-damon-test-no-op-commit-broke-damon-status-fix.patch
mm-damon-tests-core-kunit-add-damos_commit_filter-test.patch
From: Steven Rostedt <rostedt(a)goodmis.org>
Currently the reader of set_ftrace_filter and set_ftrace_notrace just adds
the pointer to the global tracer hash to its iterator. Unlike the writer
that allocates a copy of the hash, the reader keeps the pointer to the
filter hashes. This is problematic because this pointer is static across
function calls that release the locks that can update the global tracer
hashes. This can cause UAF and similar bugs.
Allocate and copy the hash for reading the filter files like it is done
for the writers. This not only fixes UAF bugs, but also makes the code a
bit simpler as it doesn't have to differentiate when to free the
iterator's hash between writers and readers.
Cc: stable(a)vger.kernel.org
Cc: Masami Hiramatsu <mhiramat(a)kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Link: https://lore.kernel.org/20250820091913.146b77ea@gandalf.local.home
Fixes: c20489dad156 ("ftrace: Assign iter->hash to filter or notrace hashes on seq read")
Closes: https://lore.kernel.org/all/20250813023044.2121943-1-wutengda@huaweicloud.c…
Reported-by: Tengda Wu <wutengda(a)huaweicloud.com>
Tested-by: Tengda Wu <wutengda(a)huaweicloud.com>
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/ftrace.c | 16 +++++++---------
1 file changed, 7 insertions(+), 9 deletions(-)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 00b76d450a89..f992a5eb878e 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -4661,13 +4661,14 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
} else {
iter->hash = alloc_and_copy_ftrace_hash(size_bits, hash);
}
+ } else {
+ iter->hash = alloc_and_copy_ftrace_hash(hash->size_bits, hash);
+ }
- if (!iter->hash) {
- trace_parser_put(&iter->parser);
- goto out_unlock;
- }
- } else
- iter->hash = hash;
+ if (!iter->hash) {
+ trace_parser_put(&iter->parser);
+ goto out_unlock;
+ }
ret = 0;
@@ -6543,9 +6544,6 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
ftrace_hash_move_and_update_ops(iter->ops, orig_hash,
iter->hash, filter_hash);
mutex_unlock(&ftrace_lock);
- } else {
- /* For read only, the hash is the ops hash */
- iter->hash = NULL;
}
mutex_unlock(&iter->ops->func_hash->regex_lock);
--
2.50.1
The patch titled
Subject: ocfs2: prevent release journal inode after journal shutdown
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
ocfs2-prevent-release-journal-inode-after-journal-shutdown.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Edward Adam Davis <eadavis(a)qq.com>
Subject: ocfs2: prevent release journal inode after journal shutdown
Date: Tue, 19 Aug 2025 21:41:02 +0800
Before calling ocfs2_delete_osb(), ocfs2_journal_shutdown() has already
been executed in ocfs2_dismount_volume(), so osb->journal must be NULL.
Therefore, the following calltrace will inevitably fail when it reaches
jbd2_journal_release_jbd_inode().
ocfs2_dismount_volume()->
ocfs2_delete_osb()->
ocfs2_free_slot_info()->
__ocfs2_free_slot_info()->
evict()->
ocfs2_evict_inode()->
ocfs2_clear_inode()->
jbd2_journal_release_jbd_inode(osb->journal->j_journal,
Adding osb->journal checks will prevent null-ptr-deref during the above
execution path.
Link: https://lkml.kernel.org/r/tencent_357489BEAEE4AED74CBD67D246DBD2C4C606@qq.c…
Fixes: da5e7c87827e ("ocfs2: cleanup journal init and shutdown")
Signed-off-by: Edward Adam Davis <eadavis(a)qq.com>
Reported-by: syzbot+47d8cb2f2cc1517e515a(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=47d8cb2f2cc1517e515a
Tested-by: syzbot+47d8cb2f2cc1517e515a(a)syzkaller.appspotmail.com
Reviewed-by: Mark Tinguely <mark.tinguely(a)oracle.com>
Reviewed-by: Joseph Qi <joseph.qi(a)linux.alibaba.com>
Cc: Mark Fasheh <mark(a)fasheh.com>
Cc: Joel Becker <jlbec(a)evilplan.org>
Cc: Junxiao Bi <junxiao.bi(a)oracle.com>
Cc: Changwei Ge <gechangwei(a)live.cn>
Cc: Jun Piao <piaojun(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/ocfs2/inode.c | 3 +++
1 file changed, 3 insertions(+)
--- a/fs/ocfs2/inode.c~ocfs2-prevent-release-journal-inode-after-journal-shutdown
+++ a/fs/ocfs2/inode.c
@@ -1281,6 +1281,9 @@ static void ocfs2_clear_inode(struct ino
* the journal is flushed before journal shutdown. Thus it is safe to
* have inodes get cleaned up after journal shutdown.
*/
+ if (!osb->journal)
+ return;
+
jbd2_journal_release_jbd_inode(osb->journal->j_journal,
&oi->ip_jinode);
}
_
Patches currently in -mm which might be from eadavis(a)qq.com are
ocfs2-prevent-release-journal-inode-after-journal-shutdown.patch
On 8/17/2025 8:48 AM, Sasha Levin wrote:
> This is a note to let you know that I've just added the patch titled
>
> rtc: ds1307: remove clear of oscillator stop flag (OSF) in probe
>
> to the 5.4-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
Hi Sasha,
FYI, patch 2/2 of the series wasn't applied to 5.4, but was applied to all the other trees.
"rtc: ds1307: handle oscillator stop flag (OSF) for ds1341"
[PATCH 2/2] rtc: ds1307: handle oscillator stop flag (OSF) for ds1341 - Meagan Lloyd <https://lore.kernel.org/all/1749665656-30108-3-git-send-email-meaganlloyd@l…>
(upstream commit 523923cfd5d622b8f4ba893fdaf29fa6adeb8c3e)
Thank you,
Meagan
This is a note to let you know that I've just added the patch titled
iio: pressure: bmp280: Use IS_ERR() in bmp280_common_probe()
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 43c0f6456f801181a80b73d95def0e0fd134e1cc Mon Sep 17 00:00:00 2001
From: Salah Triki <salah.triki(a)gmail.com>
Date: Mon, 18 Aug 2025 10:27:30 +0100
Subject: iio: pressure: bmp280: Use IS_ERR() in bmp280_common_probe()
`devm_gpiod_get_optional()` may return non-NULL error pointer on failure.
Check its return value using `IS_ERR()` and propagate the error if
necessary.
Fixes: df6e71256c84 ("iio: pressure: bmp280: Explicitly mark GPIO optional")
Signed-off-by: Salah Triki <salah.triki(a)gmail.com>
Reviewed-by: David Lechner <dlechner(a)baylibre.com>
Link: https://patch.msgid.link/20250818092740.545379-2-salah.triki@gmail.com
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/pressure/bmp280-core.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/drivers/iio/pressure/bmp280-core.c b/drivers/iio/pressure/bmp280-core.c
index 74505c9ec1a0..6cdc8ed53520 100644
--- a/drivers/iio/pressure/bmp280-core.c
+++ b/drivers/iio/pressure/bmp280-core.c
@@ -3213,11 +3213,12 @@ int bmp280_common_probe(struct device *dev,
/* Bring chip out of reset if there is an assigned GPIO line */
gpiod = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH);
+ if (IS_ERR(gpiod))
+ return dev_err_probe(dev, PTR_ERR(gpiod), "failed to get reset GPIO\n");
+
/* Deassert the signal */
- if (gpiod) {
- dev_info(dev, "release reset\n");
- gpiod_set_value(gpiod, 0);
- }
+ dev_info(dev, "release reset\n");
+ gpiod_set_value(gpiod, 0);
data->regmap = regmap;
--
2.50.1
This is a note to let you know that I've just added the patch titled
iio: light: as73211: Ensure buffer holes are zeroed
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 433b99e922943efdfd62b9a8e3ad1604838181f2 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
Date: Sat, 2 Aug 2025 17:44:21 +0100
Subject: iio: light: as73211: Ensure buffer holes are zeroed
Given that the buffer is copied to a kfifo that ultimately user space
can read, ensure we zero it.
Fixes: 403e5586b52e ("iio: light: as73211: New driver")
Reviewed-by: Matti Vaittinen <mazziesaccount(a)gmail.com>
Reviewed-by: Andy Shevchenko <andy(a)kernel.org>
Link: https://patch.msgid.link/20250802164436.515988-2-jic23@kernel.org
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/light/as73211.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iio/light/as73211.c b/drivers/iio/light/as73211.c
index 68f60dc3c79d..32719f584c47 100644
--- a/drivers/iio/light/as73211.c
+++ b/drivers/iio/light/as73211.c
@@ -639,7 +639,7 @@ static irqreturn_t as73211_trigger_handler(int irq __always_unused, void *p)
struct {
__le16 chan[4];
aligned_s64 ts;
- } scan;
+ } scan = { };
int data_result, ret;
mutex_lock(&data->mutex);
--
2.50.1
This is a note to let you know that I've just added the patch titled
iio: adc: rzg2l: Cleanup suspend/resume path
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From a3c6eabe3bbd6b0e7124d68b2d3bc32fed17362e Mon Sep 17 00:00:00 2001
From: Claudiu Beznea <claudiu.beznea.uj(a)bp.renesas.com>
Date: Sun, 10 Aug 2025 15:33:27 +0300
Subject: iio: adc: rzg2l: Cleanup suspend/resume path
There is no need to manually track the runtime PM status in the driver.
The pm_runtime_force_suspend() and pm_runtime_force_resume() functions
already call pm_runtime_status_suspended() to check the runtime PM state.
Additionally, avoid calling pm_runtime_put_autosuspend() during the
suspend/resume path, as this would decrease the usage counter of a
potential user that had the ADC open before the suspend/resume cycle.
Fixes: 563cf94f9329 ("iio: adc: rzg2l_adc: Add suspend/resume support")
Reviewed-by: Ulf Hansson <ulf.hansson(a)linaro.org>
Reviewed-by: Lad Prabhakar <prabhakar.mahadev-lad.rj(a)bp.renesas.com>
Signed-off-by: Claudiu Beznea <claudiu.beznea.uj(a)bp.renesas.com>
Link: https://patch.msgid.link/20250810123328.800104-2-claudiu.beznea.uj@bp.renes…
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/rzg2l_adc.c | 29 ++++++++---------------------
1 file changed, 8 insertions(+), 21 deletions(-)
diff --git a/drivers/iio/adc/rzg2l_adc.c b/drivers/iio/adc/rzg2l_adc.c
index 9674d48074c9..0cb5a67fd497 100644
--- a/drivers/iio/adc/rzg2l_adc.c
+++ b/drivers/iio/adc/rzg2l_adc.c
@@ -89,7 +89,6 @@ struct rzg2l_adc {
struct completion completion;
struct mutex lock;
u16 last_val[RZG2L_ADC_MAX_CHANNELS];
- bool was_rpm_active;
};
/**
@@ -541,14 +540,9 @@ static int rzg2l_adc_suspend(struct device *dev)
};
int ret;
- if (pm_runtime_suspended(dev)) {
- adc->was_rpm_active = false;
- } else {
- ret = pm_runtime_force_suspend(dev);
- if (ret)
- return ret;
- adc->was_rpm_active = true;
- }
+ ret = pm_runtime_force_suspend(dev);
+ if (ret)
+ return ret;
ret = reset_control_bulk_assert(ARRAY_SIZE(resets), resets);
if (ret)
@@ -557,9 +551,7 @@ static int rzg2l_adc_suspend(struct device *dev)
return 0;
rpm_restore:
- if (adc->was_rpm_active)
- pm_runtime_force_resume(dev);
-
+ pm_runtime_force_resume(dev);
return ret;
}
@@ -577,11 +569,9 @@ static int rzg2l_adc_resume(struct device *dev)
if (ret)
return ret;
- if (adc->was_rpm_active) {
- ret = pm_runtime_force_resume(dev);
- if (ret)
- goto resets_restore;
- }
+ ret = pm_runtime_force_resume(dev);
+ if (ret)
+ goto resets_restore;
ret = rzg2l_adc_hw_init(dev, adc);
if (ret)
@@ -590,10 +580,7 @@ static int rzg2l_adc_resume(struct device *dev)
return 0;
rpm_restore:
- if (adc->was_rpm_active) {
- pm_runtime_mark_last_busy(dev);
- pm_runtime_put_autosuspend(dev);
- }
+ pm_runtime_force_suspend(dev);
resets_restore:
reset_control_bulk_assert(ARRAY_SIZE(resets), resets);
return ret;
--
2.50.1
This is a note to let you know that I've just added the patch titled
iio: adc: rzg2l_adc: Set driver data before enabling runtime PM
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From c69e13965f26b8058f538ea8bdbd2d7718cf1fbe Mon Sep 17 00:00:00 2001
From: Claudiu Beznea <claudiu.beznea.uj(a)bp.renesas.com>
Date: Sun, 10 Aug 2025 15:33:28 +0300
Subject: iio: adc: rzg2l_adc: Set driver data before enabling runtime PM
When stress-testing the system by repeatedly unbinding and binding the ADC
device in a loop, and the ADC is a supplier for another device (e.g., a
thermal hardware block that reads temperature through the ADC), it may
happen that the ADC device is runtime-resumed immediately after runtime PM
is enabled, triggered by its consumer. At this point, since drvdata is not
yet set and the driver's runtime PM callbacks rely on it, a crash can
occur. To avoid this, set drvdata just after it was allocated.
Fixes: 89ee8174e8c8 ("iio: adc: rzg2l_adc: Simplify the runtime PM code")
Signed-off-by: Claudiu Beznea <claudiu.beznea.uj(a)bp.renesas.com>
Link: https://patch.msgid.link/20250810123328.800104-3-claudiu.beznea.uj@bp.renes…
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/rzg2l_adc.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/iio/adc/rzg2l_adc.c b/drivers/iio/adc/rzg2l_adc.c
index 0cb5a67fd497..cadb0446bc29 100644
--- a/drivers/iio/adc/rzg2l_adc.c
+++ b/drivers/iio/adc/rzg2l_adc.c
@@ -427,6 +427,8 @@ static int rzg2l_adc_probe(struct platform_device *pdev)
if (!indio_dev)
return -ENOMEM;
+ platform_set_drvdata(pdev, indio_dev);
+
adc = iio_priv(indio_dev);
adc->hw_params = device_get_match_data(dev);
@@ -459,8 +461,6 @@ static int rzg2l_adc_probe(struct platform_device *pdev)
if (ret)
return ret;
- platform_set_drvdata(pdev, indio_dev);
-
ret = rzg2l_adc_hw_init(dev, adc);
if (ret)
return dev_err_probe(&pdev->dev, ret,
--
2.50.1
This is a note to let you know that I've just added the patch titled
iio: adc: bd79124: Add GPIOLIB dependency
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 8a6ededaad2d2dcaac8e545bffee1073dca9db95 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <mazziesaccount(a)gmail.com>
Date: Wed, 13 Aug 2025 12:16:06 +0300
Subject: iio: adc: bd79124: Add GPIOLIB dependency
The bd79124 has ADC inputs which can be muxed to be GPIOs. The driver
supports this by registering a GPIO-chip for channels which aren't used
as ADC.
The Kconfig entry does not handle the dependency to GPIOLIB, which
causes errors:
ERROR: modpost: "devm_gpiochip_add_data_with_key" [drivers/iio/adc/rohm-bd79124.ko] undefined!
ERROR: modpost: "gpiochip_get_data" [drivers/iio/adc/rohm-bd79124.ko] undefined!
at linking phase if GPIOLIB is not configured to be used.
Fix this by adding dependency to the GPIOLIB.
Reported-by: kernel test robot <lkp(a)intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202508131533.5sSkq80B-lkp@intel.com/
Fixes: 3f57a3b9ab74 ("iio: adc: Support ROHM BD79124 ADC")
Signed-off-by: Matti Vaittinen <mazziesaccount(a)gmail.com>
Reviewed-by: Bartosz Golaszewski <bartosz.golaszewski(a)linaro.org>
Link: https://patch.msgid.link/6837249bddf358924e67566293944506206d2d62.175507636…
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig
index 6de2abad0197..24f2572c487e 100644
--- a/drivers/iio/adc/Kconfig
+++ b/drivers/iio/adc/Kconfig
@@ -1300,7 +1300,7 @@ config RN5T618_ADC
config ROHM_BD79124
tristate "Rohm BD79124 ADC driver"
- depends on I2C
+ depends on I2C && GPIOLIB
select REGMAP_I2C
select IIO_ADC_HELPER
help
--
2.50.1
This is a note to let you know that I've just added the patch titled
iio: adc: ad7124: fix channel lookup in syscalib functions
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 197e299aae42ffa19028eaea92b2f30dd9fb8445 Mon Sep 17 00:00:00 2001
From: David Lechner <dlechner(a)baylibre.com>
Date: Sat, 26 Jul 2025 11:28:48 -0500
Subject: iio: adc: ad7124: fix channel lookup in syscalib functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Fix possible incorrect channel lookup in the syscalib functions by using
the correct channel address instead of the channel number.
In the ad7124 driver, the channel field of struct iio_chan_spec is the
input pin number of the positive input of the channel. This can be, but
is not always the same as the index in the channels array. The correct
index in the channels array is stored in the address field (and also
scan_index). We use the address field to perform the correct lookup.
Fixes: 47036a03a303 ("iio: adc: ad7124: Implement internal calibration at probe time")
Signed-off-by: David Lechner <dlechner(a)baylibre.com>
Reviewed-by: Nuno Sá <nuno.sa(a)analog.com>
Link: https://patch.msgid.link/20250726-iio-adc-ad7124-fix-channel-lookup-in-sysc…
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/ad7124.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c
index 9808df2e9242..4d8c6bafd1c3 100644
--- a/drivers/iio/adc/ad7124.c
+++ b/drivers/iio/adc/ad7124.c
@@ -849,7 +849,7 @@ enum {
static int ad7124_syscalib_locked(struct ad7124_state *st, const struct iio_chan_spec *chan)
{
struct device *dev = &st->sd.spi->dev;
- struct ad7124_channel *ch = &st->channels[chan->channel];
+ struct ad7124_channel *ch = &st->channels[chan->address];
int ret;
if (ch->syscalib_mode == AD7124_SYSCALIB_ZERO_SCALE) {
@@ -865,8 +865,8 @@ static int ad7124_syscalib_locked(struct ad7124_state *st, const struct iio_chan
if (ret < 0)
return ret;
- dev_dbg(dev, "offset for channel %d after zero-scale calibration: 0x%x\n",
- chan->channel, ch->cfg.calibration_offset);
+ dev_dbg(dev, "offset for channel %lu after zero-scale calibration: 0x%x\n",
+ chan->address, ch->cfg.calibration_offset);
} else {
ch->cfg.calibration_gain = st->gain_default;
@@ -880,8 +880,8 @@ static int ad7124_syscalib_locked(struct ad7124_state *st, const struct iio_chan
if (ret < 0)
return ret;
- dev_dbg(dev, "gain for channel %d after full-scale calibration: 0x%x\n",
- chan->channel, ch->cfg.calibration_gain);
+ dev_dbg(dev, "gain for channel %lu after full-scale calibration: 0x%x\n",
+ chan->address, ch->cfg.calibration_gain);
}
return 0;
@@ -924,7 +924,7 @@ static int ad7124_set_syscalib_mode(struct iio_dev *indio_dev,
{
struct ad7124_state *st = iio_priv(indio_dev);
- st->channels[chan->channel].syscalib_mode = mode;
+ st->channels[chan->address].syscalib_mode = mode;
return 0;
}
@@ -934,7 +934,7 @@ static int ad7124_get_syscalib_mode(struct iio_dev *indio_dev,
{
struct ad7124_state *st = iio_priv(indio_dev);
- return st->channels[chan->channel].syscalib_mode;
+ return st->channels[chan->address].syscalib_mode;
}
static const struct iio_enum ad7124_syscalib_mode_enum = {
--
2.50.1
This is a note to let you know that I've just added the patch titled
iio: temperature: maxim_thermocouple: use DMA-safe buffer for
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From ae5bc07ec9f73a41734270ef3f800c5c8a7e0ad3 Mon Sep 17 00:00:00 2001
From: David Lechner <dlechner(a)baylibre.com>
Date: Mon, 21 Jul 2025 18:04:04 -0500
Subject: iio: temperature: maxim_thermocouple: use DMA-safe buffer for
spi_read()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Replace using stack-allocated buffers with a DMA-safe buffer for use
with spi_read(). This allows the driver to be safely used with
DMA-enabled SPI controllers.
The buffer array is also converted to a struct with a union to make the
usage of the memory in the buffer more clear and ensure proper alignment.
Fixes: 1f25ca11d84a ("iio: temperature: add support for Maxim thermocouple chips")
Signed-off-by: David Lechner <dlechner(a)baylibre.com>
Reviewed-by: Nuno Sá <nuno.sa(a)analog.com>
Link: https://patch.msgid.link/20250721-iio-use-more-iio_declare_buffer_with_ts-3…
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/temperature/maxim_thermocouple.c | 26 ++++++++++++--------
1 file changed, 16 insertions(+), 10 deletions(-)
diff --git a/drivers/iio/temperature/maxim_thermocouple.c b/drivers/iio/temperature/maxim_thermocouple.c
index cae8e84821d7..205939680fd4 100644
--- a/drivers/iio/temperature/maxim_thermocouple.c
+++ b/drivers/iio/temperature/maxim_thermocouple.c
@@ -11,6 +11,7 @@
#include <linux/module.h>
#include <linux/err.h>
#include <linux/spi/spi.h>
+#include <linux/types.h>
#include <linux/iio/iio.h>
#include <linux/iio/sysfs.h>
#include <linux/iio/trigger.h>
@@ -121,8 +122,15 @@ struct maxim_thermocouple_data {
struct spi_device *spi;
const struct maxim_thermocouple_chip *chip;
char tc_type;
-
- u8 buffer[16] __aligned(IIO_DMA_MINALIGN);
+ /* Buffer for reading up to 2 hardware channels. */
+ struct {
+ union {
+ __be16 raw16;
+ __be32 raw32;
+ __be16 raw[2];
+ };
+ aligned_s64 timestamp;
+ } buffer __aligned(IIO_DMA_MINALIGN);
};
static int maxim_thermocouple_read(struct maxim_thermocouple_data *data,
@@ -130,18 +138,16 @@ static int maxim_thermocouple_read(struct maxim_thermocouple_data *data,
{
unsigned int storage_bytes = data->chip->read_size;
unsigned int shift = chan->scan_type.shift + (chan->address * 8);
- __be16 buf16;
- __be32 buf32;
int ret;
switch (storage_bytes) {
case 2:
- ret = spi_read(data->spi, (void *)&buf16, storage_bytes);
- *val = be16_to_cpu(buf16);
+ ret = spi_read(data->spi, &data->buffer.raw16, storage_bytes);
+ *val = be16_to_cpu(data->buffer.raw16);
break;
case 4:
- ret = spi_read(data->spi, (void *)&buf32, storage_bytes);
- *val = be32_to_cpu(buf32);
+ ret = spi_read(data->spi, &data->buffer.raw32, storage_bytes);
+ *val = be32_to_cpu(data->buffer.raw32);
break;
default:
ret = -EINVAL;
@@ -166,9 +172,9 @@ static irqreturn_t maxim_thermocouple_trigger_handler(int irq, void *private)
struct maxim_thermocouple_data *data = iio_priv(indio_dev);
int ret;
- ret = spi_read(data->spi, data->buffer, data->chip->read_size);
+ ret = spi_read(data->spi, data->buffer.raw, data->chip->read_size);
if (!ret) {
- iio_push_to_buffers_with_ts(indio_dev, data->buffer,
+ iio_push_to_buffers_with_ts(indio_dev, &data->buffer,
sizeof(data->buffer),
iio_get_time_ns(indio_dev));
}
--
2.50.1
This is a note to let you know that I've just added the patch titled
iio: proximity: isl29501: fix buffered read on big-endian systems
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From de18e978d0cda23e4c102e18092b63a5b0b3a800 Mon Sep 17 00:00:00 2001
From: David Lechner <dlechner(a)baylibre.com>
Date: Tue, 22 Jul 2025 15:54:21 -0500
Subject: iio: proximity: isl29501: fix buffered read on big-endian systems
Fix passing a u32 value as a u16 buffer scan item. This works on little-
endian systems, but not on big-endian systems.
A new local variable is introduced for getting the register value and
the array is changed to a struct to make the data layout more explicit
rather than just changing the type and having to recalculate the proper
length needed for the timestamp.
Fixes: 1c28799257bc ("iio: light: isl29501: Add support for the ISL29501 ToF sensor.")
Signed-off-by: David Lechner <dlechner(a)baylibre.com>
Link: https://patch.msgid.link/20250722-iio-use-more-iio_declare_buffer_with_ts-7…
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/proximity/isl29501.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/drivers/iio/proximity/isl29501.c b/drivers/iio/proximity/isl29501.c
index d1510fe24050..f69db6f2f380 100644
--- a/drivers/iio/proximity/isl29501.c
+++ b/drivers/iio/proximity/isl29501.c
@@ -938,12 +938,18 @@ static irqreturn_t isl29501_trigger_handler(int irq, void *p)
struct iio_dev *indio_dev = pf->indio_dev;
struct isl29501_private *isl29501 = iio_priv(indio_dev);
const unsigned long *active_mask = indio_dev->active_scan_mask;
- u32 buffer[4] __aligned(8) = {}; /* 1x16-bit + naturally aligned ts */
+ u32 value;
+ struct {
+ u16 data;
+ aligned_s64 ts;
+ } scan = { };
- if (test_bit(ISL29501_DISTANCE_SCAN_INDEX, active_mask))
- isl29501_register_read(isl29501, REG_DISTANCE, buffer);
+ if (test_bit(ISL29501_DISTANCE_SCAN_INDEX, active_mask)) {
+ isl29501_register_read(isl29501, REG_DISTANCE, &value);
+ scan.data = value;
+ }
- iio_push_to_buffers_with_timestamp(indio_dev, buffer, pf->timestamp);
+ iio_push_to_buffers_with_timestamp(indio_dev, &scan, pf->timestamp);
iio_trigger_notify_done(indio_dev->trig);
return IRQ_HANDLED;
--
2.50.1
This is a note to let you know that I've just added the patch titled
iio: adc: ad7173: prevent scan if too many setups requested
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 1cfb22c277c7274f54babaa5b416dfbc00181e16 Mon Sep 17 00:00:00 2001
From: David Lechner <dlechner(a)baylibre.com>
Date: Tue, 22 Jul 2025 14:20:07 -0500
Subject: iio: adc: ad7173: prevent scan if too many setups requested
Add a check to ad7173_update_scan_mode() to ensure that we didn't exceed
the maximum number of unique channel configurations.
In the AD7173 family of chips, there are some chips that have 16
CHANNELx registers but only 8 setups (combination of CONFIGx, FILTERx,
GAINx and OFFSETx registers). Since commit 92c247216918 ("iio: adc:
ad7173: fix num_slots"), it is possible to have more than 8 channels
enabled in a scan at the same time, so it is possible to get a bad
configuration when more than 8 channels are using unique configurations.
This happens because the algorithm to allocate the setup slots only
takes into account which slot has been least recently used and doesn't
know about the maximum number of slots available.
Since the algorithm to allocate the setup slots is quite complex, it is
simpler to check after the fact if the current state is valid or not.
So this patch adds a check in ad7173_update_scan_mode() after setting up
all of the configurations to make sure that the actual setup still
matches the requested setup for each enabled channel. If not, we prevent
the scan from being enabled and return an error.
The setup comparison in ad7173_setup_equal() is refactored to a separate
function since we need to call it in two places now.
Fixes: 92c247216918 ("iio: adc: ad7173: fix num_slots")
Signed-off-by: David Lechner <dlechner(a)baylibre.com>
Link: https://patch.msgid.link/20250722-iio-adc-ad7173-fix-setup-use-limits-v2-1-…
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/ad7173.c | 87 ++++++++++++++++++++++++++++++++++------
1 file changed, 75 insertions(+), 12 deletions(-)
diff --git a/drivers/iio/adc/ad7173.c b/drivers/iio/adc/ad7173.c
index 4413207be28f..683146e83ab2 100644
--- a/drivers/iio/adc/ad7173.c
+++ b/drivers/iio/adc/ad7173.c
@@ -200,7 +200,7 @@ struct ad7173_channel_config {
/*
* Following fields are used to compare equality. If you
* make adaptations in it, you most likely also have to adapt
- * ad7173_find_live_config(), too.
+ * ad7173_is_setup_equal(), too.
*/
struct_group(config_props,
bool bipolar;
@@ -561,12 +561,19 @@ static void ad7173_reset_usage_cnts(struct ad7173_state *st)
st->config_usage_counter = 0;
}
-static struct ad7173_channel_config *
-ad7173_find_live_config(struct ad7173_state *st, struct ad7173_channel_config *cfg)
+/**
+ * ad7173_is_setup_equal - Compare two channel setups
+ * @cfg1: First channel configuration
+ * @cfg2: Second channel configuration
+ *
+ * Compares all configuration options that affect the registers connected to
+ * SETUP_SEL, namely CONFIGx, FILTERx, GAINx and OFFSETx.
+ *
+ * Returns: true if the setups are identical, false otherwise
+ */
+static bool ad7173_is_setup_equal(const struct ad7173_channel_config *cfg1,
+ const struct ad7173_channel_config *cfg2)
{
- struct ad7173_channel_config *cfg_aux;
- int i;
-
/*
* This is just to make sure that the comparison is adapted after
* struct ad7173_channel_config was changed.
@@ -579,14 +586,22 @@ ad7173_find_live_config(struct ad7173_state *st, struct ad7173_channel_config *c
u8 ref_sel;
}));
+ return cfg1->bipolar == cfg2->bipolar &&
+ cfg1->input_buf == cfg2->input_buf &&
+ cfg1->odr == cfg2->odr &&
+ cfg1->ref_sel == cfg2->ref_sel;
+}
+
+static struct ad7173_channel_config *
+ad7173_find_live_config(struct ad7173_state *st, struct ad7173_channel_config *cfg)
+{
+ struct ad7173_channel_config *cfg_aux;
+ int i;
+
for (i = 0; i < st->num_channels; i++) {
cfg_aux = &st->channels[i].cfg;
- if (cfg_aux->live &&
- cfg->bipolar == cfg_aux->bipolar &&
- cfg->input_buf == cfg_aux->input_buf &&
- cfg->odr == cfg_aux->odr &&
- cfg->ref_sel == cfg_aux->ref_sel)
+ if (cfg_aux->live && ad7173_is_setup_equal(cfg, cfg_aux))
return cfg_aux;
}
return NULL;
@@ -1228,7 +1243,7 @@ static int ad7173_update_scan_mode(struct iio_dev *indio_dev,
const unsigned long *scan_mask)
{
struct ad7173_state *st = iio_priv(indio_dev);
- int i, ret;
+ int i, j, k, ret;
for (i = 0; i < indio_dev->num_channels; i++) {
if (test_bit(i, scan_mask))
@@ -1239,6 +1254,54 @@ static int ad7173_update_scan_mode(struct iio_dev *indio_dev,
return ret;
}
+ /*
+ * On some chips, there are more channels that setups, so if there were
+ * more unique setups requested than the number of available slots,
+ * ad7173_set_channel() will have written over some of the slots. We
+ * can detect this by making sure each assigned cfg_slot matches the
+ * requested configuration. If it doesn't, we know that the slot was
+ * overwritten by a different channel.
+ */
+ for_each_set_bit(i, scan_mask, indio_dev->num_channels) {
+ const struct ad7173_channel_config *cfg1, *cfg2;
+
+ cfg1 = &st->channels[i].cfg;
+
+ for_each_set_bit(j, scan_mask, indio_dev->num_channels) {
+ cfg2 = &st->channels[j].cfg;
+
+ /*
+ * Only compare configs that are assigned to the same
+ * SETUP_SEL slot and don't compare channel to itself.
+ */
+ if (i == j || cfg1->cfg_slot != cfg2->cfg_slot)
+ continue;
+
+ /*
+ * If we find two different configs trying to use the
+ * same SETUP_SEL slot, then we know that the that we
+ * have too many unique configurations requested for
+ * the available slots and at least one was overwritten.
+ */
+ if (!ad7173_is_setup_equal(cfg1, cfg2)) {
+ /*
+ * At this point, there isn't a way to tell
+ * which setups are actually programmed in the
+ * ADC anymore, so we could read them back to
+ * see, but it is simpler to just turn off all
+ * of the live flags so that everything gets
+ * reprogramed on the next attempt read a sample.
+ */
+ for (k = 0; k < st->num_channels; k++)
+ st->channels[k].cfg.live = false;
+
+ dev_err(&st->sd.spi->dev,
+ "Too many unique channel configurations requested for scan\n");
+ return -EINVAL;
+ }
+ }
+ }
+
return 0;
}
--
2.50.1
The Cadence PCIe Controller integrated in the TI K3 SoCs supports both
Root-Complex and Endpoint modes of operation. The Glue Layer allows
"strapping" the mode of operation of the Controller, the Link Speed
and the Link Width. This is enabled by programming the "PCIEn_CTRL"
register (n corresponds to the PCIe instance) within the CTRL_MMR
memory-mapped register space.
In the PCIe Controller's register space, the same set of registers
that correspond to the Root-Port configuration space when the
Controller is configured for Root-Complex mode of operation, also
correspond to the Physical Function configuration space when the
Controller is configured for Endpoint mode of operation. As a result,
the "reset-value" of these set of registers _should_ vary depending
on the selected mode of operation. This is the expected behavior
according to the description of the registers and their reset values
in the Technical Reference Manual for the SoCs.
However, it is observed that the "reset-value" seen in practice
do not match the description. To be precise, when the Controller
is configured for Root-Complex mode of operation, the "reset-value"
of the Root-Port configuration space reflect the "reset-value"
corresponding to the Physical Function configuration space.
This can be attributed to the fact that the "strap" settings play
a role in "switching" the "reset-value" of the registers to match
the expected values as determined by the selected mode of operation.
Since the "strap" settings are sampled the moment the PCIe Controller
is powered ON, the "reset-value" of the registers are setup at that
point in time. As a result, if the "strap" settings are programmed
at a later point in time, it _will not_ update the "reset-value" of
the registers. This will cause the Physical Function configuration
space to be seen when the Root-Port configuration space is accessed
after programming the PCIe Controller for Root-Complex mode of
operation.
Fix this by powering off the PCIe Controller before programming the
"strap" settings and powering it on after that. This will ensure
that the "strap" settings that have been sampled convey the intended
mode of operation, thereby resulting in the "reset-value" of the
registers being accurate.
Fixes: f3e25911a430 ("PCI: j721e: Add TI J721E PCIe driver")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Siddharth Vadapalli <s-vadapalli(a)ti.com>
---
Hello,
This patch is based on commit
be48bcf004f9 Merge tag 'for-6.17-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
of Mainline Linux.
v1 of this patch is at:
https://lore.kernel.org/r/20250716102851.121742-1-s-vadapalli@ti.com/
Changes since v1:
- Rebased patch on latest Mainline Linux.
Regards,
Siddharth.
drivers/pci/controller/cadence/pci-j721e.c | 82 ++++++++++++++--------
1 file changed, 53 insertions(+), 29 deletions(-)
diff --git a/drivers/pci/controller/cadence/pci-j721e.c b/drivers/pci/controller/cadence/pci-j721e.c
index 6c93f39d0288..d5e7cb7277dc 100644
--- a/drivers/pci/controller/cadence/pci-j721e.c
+++ b/drivers/pci/controller/cadence/pci-j721e.c
@@ -19,6 +19,7 @@
#include <linux/of.h>
#include <linux/pci.h>
#include <linux/platform_device.h>
+#include <linux/pm_domain.h>
#include <linux/pm_runtime.h>
#include <linux/regmap.h>
@@ -173,10 +174,9 @@ static const struct cdns_pcie_ops j721e_pcie_ops = {
.link_up = j721e_pcie_link_up,
};
-static int j721e_pcie_set_mode(struct j721e_pcie *pcie, struct regmap *syscon,
- unsigned int offset)
+static int j721e_pcie_set_mode(struct j721e_pcie *pcie, struct device *dev,
+ struct regmap *syscon, unsigned int offset)
{
- struct device *dev = pcie->cdns_pcie->dev;
u32 mask = J721E_MODE_RC;
u32 mode = pcie->mode;
u32 val = 0;
@@ -193,9 +193,9 @@ static int j721e_pcie_set_mode(struct j721e_pcie *pcie, struct regmap *syscon,
}
static int j721e_pcie_set_link_speed(struct j721e_pcie *pcie,
+ struct device *dev,
struct regmap *syscon, unsigned int offset)
{
- struct device *dev = pcie->cdns_pcie->dev;
struct device_node *np = dev->of_node;
int link_speed;
u32 val = 0;
@@ -214,9 +214,9 @@ static int j721e_pcie_set_link_speed(struct j721e_pcie *pcie,
}
static int j721e_pcie_set_lane_count(struct j721e_pcie *pcie,
+ struct device *dev,
struct regmap *syscon, unsigned int offset)
{
- struct device *dev = pcie->cdns_pcie->dev;
u32 lanes = pcie->num_lanes;
u32 mask = BIT(8);
u32 val = 0;
@@ -234,9 +234,9 @@ static int j721e_pcie_set_lane_count(struct j721e_pcie *pcie,
}
static int j721e_enable_acspcie_refclk(struct j721e_pcie *pcie,
+ struct device *dev,
struct regmap *syscon)
{
- struct device *dev = pcie->cdns_pcie->dev;
struct device_node *node = dev->of_node;
u32 mask = ACSPCIE_PAD_DISABLE_MASK;
struct of_phandle_args args;
@@ -263,9 +263,8 @@ static int j721e_enable_acspcie_refclk(struct j721e_pcie *pcie,
return 0;
}
-static int j721e_pcie_ctrl_init(struct j721e_pcie *pcie)
+static int j721e_pcie_ctrl_init(struct j721e_pcie *pcie, struct device *dev)
{
- struct device *dev = pcie->cdns_pcie->dev;
struct device_node *node = dev->of_node;
struct of_phandle_args args;
unsigned int offset = 0;
@@ -284,19 +283,19 @@ static int j721e_pcie_ctrl_init(struct j721e_pcie *pcie)
if (!ret)
offset = args.args[0];
- ret = j721e_pcie_set_mode(pcie, syscon, offset);
+ ret = j721e_pcie_set_mode(pcie, dev, syscon, offset);
if (ret < 0) {
dev_err(dev, "Failed to set pci mode\n");
return ret;
}
- ret = j721e_pcie_set_link_speed(pcie, syscon, offset);
+ ret = j721e_pcie_set_link_speed(pcie, dev, syscon, offset);
if (ret < 0) {
dev_err(dev, "Failed to set link speed\n");
return ret;
}
- ret = j721e_pcie_set_lane_count(pcie, syscon, offset);
+ ret = j721e_pcie_set_lane_count(pcie, dev, syscon, offset);
if (ret < 0) {
dev_err(dev, "Failed to set num-lanes\n");
return ret;
@@ -308,7 +307,7 @@ static int j721e_pcie_ctrl_init(struct j721e_pcie *pcie)
if (!syscon)
return 0;
- return j721e_enable_acspcie_refclk(pcie, syscon);
+ return j721e_enable_acspcie_refclk(pcie, dev, syscon);
}
static int cdns_ti_pcie_config_read(struct pci_bus *bus, unsigned int devfn,
@@ -469,6 +468,47 @@ static int j721e_pcie_probe(struct platform_device *pdev)
if (!pcie)
return -ENOMEM;
+ pcie->mode = mode;
+
+ ret = of_property_read_u32(node, "num-lanes", &num_lanes);
+ if (ret || num_lanes > data->max_lanes) {
+ dev_warn(dev, "num-lanes property not provided or invalid, setting num-lanes to 1\n");
+ num_lanes = 1;
+ }
+
+ pcie->num_lanes = num_lanes;
+ pcie->max_lanes = data->max_lanes;
+
+ /*
+ * The PCIe Controller's registers have different "reset-values" depending
+ * on the "strap" settings programmed into the Controller's Glue Layer.
+ * This is because the same set of registers are used for representing the
+ * Physical Function configuration space in Endpoint mode and for
+ * representing the Root-Port configuration space in Root-Complex mode.
+ *
+ * The registers latch onto a "reset-value" based on the "strap" settings
+ * sampled after the Controller is powered on. Therefore, for the
+ * "reset-value" to be accurate, it is necessary to program the "strap"
+ * settings when the Controller is powered off, and power on the Controller
+ * after the "strap" settings have been programmed.
+ *
+ * The "strap" settings are programmed by "j721e_pcie_ctrl_init()".
+ * Therefore, power off the Controller before invoking "j721e_pcie_ctrl_init()",
+ * program the "strap" settings, and then power on the Controller. This ensures
+ * that the reset values are accurate and reflect the "strap" settings.
+ */
+ dev_pm_domain_detach(dev, true);
+
+ ret = j721e_pcie_ctrl_init(pcie, dev);
+ if (ret < 0)
+ return ret;
+
+ ret = dev_pm_domain_attach(dev, true);
+ if (ret < 0) {
+ dev_err(dev, "failed to power on device\n");
+ return ret;
+ }
+
switch (mode) {
case PCI_MODE_RC:
if (!IS_ENABLED(CONFIG_PCI_J721E_HOST))
@@ -510,7 +550,6 @@ static int j721e_pcie_probe(struct platform_device *pdev)
return 0;
}
- pcie->mode = mode;
pcie->linkdown_irq_regfield = data->linkdown_irq_regfield;
base = devm_platform_ioremap_resource_byname(pdev, "intd_cfg");
@@ -523,15 +562,6 @@ static int j721e_pcie_probe(struct platform_device *pdev)
return PTR_ERR(base);
pcie->user_cfg_base = base;
- ret = of_property_read_u32(node, "num-lanes", &num_lanes);
- if (ret || num_lanes > data->max_lanes) {
- dev_warn(dev, "num-lanes property not provided or invalid, setting num-lanes to 1\n");
- num_lanes = 1;
- }
-
- pcie->num_lanes = num_lanes;
- pcie->max_lanes = data->max_lanes;
-
if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)))
return -EINVAL;
@@ -547,12 +577,6 @@ static int j721e_pcie_probe(struct platform_device *pdev)
goto err_get_sync;
}
- ret = j721e_pcie_ctrl_init(pcie);
- if (ret < 0) {
- dev_err_probe(dev, ret, "pm_runtime_get_sync failed\n");
- goto err_get_sync;
- }
-
ret = devm_request_irq(dev, irq, j721e_pcie_link_irq_handler, 0,
"j721e-pcie-link-down-irq", pcie);
if (ret < 0) {
@@ -680,7 +704,7 @@ static int j721e_pcie_resume_noirq(struct device *dev)
struct cdns_pcie *cdns_pcie = pcie->cdns_pcie;
int ret;
- ret = j721e_pcie_ctrl_init(pcie);
+ ret = j721e_pcie_ctrl_init(pcie, dev);
if (ret < 0)
return ret;
--
2.43.0
From: Chen Junlin <chen.junlin(a)zte.com.cn>
Although the upstream commit 2b0f2fc9ed62 ("Bluetooth: hci_conn:
Use disable_delayed_work_sync") has fixed the issue CVE-2024-56591, that
patch depends on the implementaion of disable/enable_work() of workqueue
[1], which are merged into 6.9/6.10 and so on. But for branch linux-6.6,
there's no these feature of workqueue.
To solve CVE-2024-56591 without backport too many feature patches about
workqueue, we can set a new flag HCI_CONN_DELETE when hci_conn_dell() is
called, and the subsequent queuing of work will be ignored.
[1] https://lore.kernel.org/all/20240216180559.208276-1-tj@kernel.org/
Signed-off-by: Chen Junlin <chen.junlin(a)zte.com.cn>
Signed-off-by: xu xin <xu.xin16(a)zte.com.cn>
---
include/net/bluetooth/hci_core.h | 8 +++++++-
net/bluetooth/hci_conn.c | 1 +
2 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 4f067599e6e9..9a3ec55079a1 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -954,6 +954,7 @@ enum {
HCI_CONN_BIG_SYNC_FAILED,
HCI_CONN_PA_SYNC,
HCI_CONN_PA_SYNC_FAILED,
+ HCI_CONN_DELETE,
};
static inline bool hci_conn_ssp_enabled(struct hci_conn *conn)
@@ -1575,7 +1576,12 @@ static inline void hci_conn_drop(struct hci_conn *conn)
}
cancel_delayed_work(&conn->disc_work);
- queue_delayed_work(conn->hdev->workqueue,
+ /*
+ * When HCI_CONN_DELETE is set, the conn is goint to be freed.
+ * Don't queue the work to avoid noisy WARNing about refcnt < 0.
+ */
+ if (!test_bit(HCI_CONN_DELETE, &conn->flags))
+ queue_delayed_work(conn->hdev->workqueue,
&conn->disc_work, timeo);
}
}
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 549ee9e87d63..67a6513bb01c 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -1112,6 +1112,7 @@ void hci_conn_del(struct hci_conn *conn)
hci_conn_unlink(conn);
+ set_bit(HCI_CONN_DELETE, &conn->flags);
cancel_delayed_work_sync(&conn->disc_work);
cancel_delayed_work_sync(&conn->auto_accept_work);
cancel_delayed_work_sync(&conn->idle_work);
--
2.15.2
The quilt patch titled
Subject: mm/mremap: fix WARN with uffd that has remap events disabled
has been removed from the -mm tree. Its filename was
mm-mremap-fix-warn-with-uffd-that-has-remap-events-disabled.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: David Hildenbrand <david(a)redhat.com>
Subject: mm/mremap: fix WARN with uffd that has remap events disabled
Date: Mon, 18 Aug 2025 19:53:58 +0200
Registering userfaultd on a VMA that spans at least one PMD and then
mremap()'ing that VMA can trigger a WARN when recovering from a failed
page table move due to a page table allocation error.
The code ends up doing the right thing (recurse, avoiding moving actual
page tables), but triggering that WARN is unpleasant:
WARNING: CPU: 2 PID: 6133 at mm/mremap.c:357 move_normal_pmd mm/mremap.c:357 [inline]
WARNING: CPU: 2 PID: 6133 at mm/mremap.c:357 move_pgt_entry mm/mremap.c:595 [inline]
WARNING: CPU: 2 PID: 6133 at mm/mremap.c:357 move_page_tables+0x3832/0x44a0 mm/mremap.c:852
Modules linked in:
CPU: 2 UID: 0 PID: 6133 Comm: syz.0.19 Not tainted 6.17.0-rc1-syzkaller-00004-g53e760d89498 #0 PREEMPT(full)
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014
RIP: 0010:move_normal_pmd mm/mremap.c:357 [inline]
RIP: 0010:move_pgt_entry mm/mremap.c:595 [inline]
RIP: 0010:move_page_tables+0x3832/0x44a0 mm/mremap.c:852
Code: ...
RSP: 0018:ffffc900037a76d8 EFLAGS: 00010293
RAX: 0000000000000000 RBX: 0000000032930007 RCX: ffffffff820c6645
RDX: ffff88802e56a440 RSI: ffffffff820c7201 RDI: 0000000000000007
RBP: ffff888037728fc0 R08: 0000000000000007 R09: 0000000000000000
R10: 0000000032930007 R11: 0000000000000000 R12: 0000000000000000
R13: ffffc900037a79a8 R14: 0000000000000001 R15: dffffc0000000000
FS: 000055556316a500(0000) GS:ffff8880d68bc000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000001b30863fff CR3: 0000000050171000 CR4: 0000000000352ef0
Call Trace:
<TASK>
copy_vma_and_data+0x468/0x790 mm/mremap.c:1215
move_vma+0x548/0x1780 mm/mremap.c:1282
mremap_to+0x1b7/0x450 mm/mremap.c:1406
do_mremap+0xfad/0x1f80 mm/mremap.c:1921
__do_sys_mremap+0x119/0x170 mm/mremap.c:1977
do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
do_syscall_64+0xcd/0x4c0 arch/x86/entry/syscall_64.c:94
entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7f00d0b8ebe9
Code: ...
RSP: 002b:00007ffe5ea5ee98 EFLAGS: 00000246 ORIG_RAX: 0000000000000019
RAX: ffffffffffffffda RBX: 00007f00d0db5fa0 RCX: 00007f00d0b8ebe9
RDX: 0000000000400000 RSI: 0000000000c00000 RDI: 0000200000000000
RBP: 00007ffe5ea5eef0 R08: 0000200000c00000 R09: 0000000000000000
R10: 0000000000000003 R11: 0000000000000246 R12: 0000000000000002
R13: 00007f00d0db5fa0 R14: 00007f00d0db5fa0 R15: 0000000000000005
</TASK>
The underlying issue is that we recurse during the original page table
move, but not during the recovery move.
Fix it by checking for both VMAs and performing the check before the
pmd_none() sanity check.
Add a new helper where we perform+document that check for the PMD and PUD
level.
Thanks to Harry for bisecting.
Link: https://lkml.kernel.org/r/20250818175358.1184757-1-david@redhat.com
Fixes: 0cef0bb836e3 ("mm: clear uffd-wp PTE/PMD state on mremap()")
Signed-off-by: David Hildenbrand <david(a)redhat.com>
Reported-by: syzbot+4d9a13f0797c46a29e42(a)syzkaller.appspotmail.com
Closes: https://lkml.kernel.org/r/689bb893.050a0220.7f033.013a.GAE@google.com
Tested-by: Harry Yoo <harry.yoo(a)oracle.com>
Cc: "Liam R. Howlett" <Liam.Howlett(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Jann Horn <jannh(a)google.com>
Cc: Pedro Falcato <pfalcato(a)suse.de>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/mremap.c | 41 +++++++++++++++++++++++------------------
1 file changed, 23 insertions(+), 18 deletions(-)
--- a/mm/mremap.c~mm-mremap-fix-warn-with-uffd-that-has-remap-events-disabled
+++ a/mm/mremap.c
@@ -323,6 +323,25 @@ static inline bool arch_supports_page_ta
}
#endif
+static inline bool uffd_supports_page_table_move(struct pagetable_move_control *pmc)
+{
+ /*
+ * If we are moving a VMA that has uffd-wp registered but with
+ * remap events disabled (new VMA will not be registered with uffd), we
+ * need to ensure that the uffd-wp state is cleared from all pgtables.
+ * This means recursing into lower page tables in move_page_tables().
+ *
+ * We might get called with VMAs reversed when recovering from a
+ * failed page table move. In that case, the
+ * "old"-but-actually-"originally new" VMA during recovery will not have
+ * a uffd context. Recursing into lower page tables during the original
+ * move but not during the recovery move will cause trouble, because we
+ * run into already-existing page tables. So check both VMAs.
+ */
+ return !vma_has_uffd_without_event_remap(pmc->old) &&
+ !vma_has_uffd_without_event_remap(pmc->new);
+}
+
#ifdef CONFIG_HAVE_MOVE_PMD
static bool move_normal_pmd(struct pagetable_move_control *pmc,
pmd_t *old_pmd, pmd_t *new_pmd)
@@ -335,6 +354,8 @@ static bool move_normal_pmd(struct paget
if (!arch_supports_page_table_move())
return false;
+ if (!uffd_supports_page_table_move(pmc))
+ return false;
/*
* The destination pmd shouldn't be established, free_pgtables()
* should have released it.
@@ -361,15 +382,6 @@ static bool move_normal_pmd(struct paget
if (WARN_ON_ONCE(!pmd_none(*new_pmd)))
return false;
- /* If this pmd belongs to a uffd vma with remap events disabled, we need
- * to ensure that the uffd-wp state is cleared from all pgtables. This
- * means recursing into lower page tables in move_page_tables(), and we
- * can reuse the existing code if we simply treat the entry as "not
- * moved".
- */
- if (vma_has_uffd_without_event_remap(vma))
- return false;
-
/*
* We don't have to worry about the ordering of src and dst
* ptlocks because exclusive mmap_lock prevents deadlock.
@@ -418,6 +430,8 @@ static bool move_normal_pud(struct paget
if (!arch_supports_page_table_move())
return false;
+ if (!uffd_supports_page_table_move(pmc))
+ return false;
/*
* The destination pud shouldn't be established, free_pgtables()
* should have released it.
@@ -425,15 +439,6 @@ static bool move_normal_pud(struct paget
if (WARN_ON_ONCE(!pud_none(*new_pud)))
return false;
- /* If this pud belongs to a uffd vma with remap events disabled, we need
- * to ensure that the uffd-wp state is cleared from all pgtables. This
- * means recursing into lower page tables in move_page_tables(), and we
- * can reuse the existing code if we simply treat the entry as "not
- * moved".
- */
- if (vma_has_uffd_without_event_remap(vma))
- return false;
-
/*
* We don't have to worry about the ordering of src and dst
* ptlocks because exclusive mmap_lock prevents deadlock.
_
Patches currently in -mm which might be from david(a)redhat.com are
mm-migrate-remove-migratepage_unmap.patch
mm-migrate-remove-migratepage_unmap-fix.patch
treewide-remove-migratepage_success.patch
mm-huge_memory-move-more-common-code-into-insert_pmd.patch
mm-huge_memory-move-more-common-code-into-insert_pud.patch
mm-huge_memory-support-huge-zero-folio-in-vmf_insert_folio_pmd.patch
fs-dax-use-vmf_insert_folio_pmd-to-insert-the-huge-zero-folio.patch
mm-huge_memory-mark-pmd-mappings-of-the-huge-zero-folio-special.patch
powerpc-ptdump-rename-struct-pgtable_level-to-struct-ptdump_pglevel.patch
mm-rmap-convert-enum-rmap_level-to-enum-pgtable_level.patch
mm-memory-convert-print_bad_pte-to-print_bad_page_map.patch
mm-memory-factor-out-common-code-from-vm_normal_page_.patch
mm-introduce-and-use-vm_normal_page_pud.patch
mm-rename-vm_ops-find_special_page-to-vm_ops-find_normal_page.patch
prctl-extend-pr_set_thp_disable-to-optionally-exclude-vm_hugepage.patch
mm-huge_memory-convert-tva_flags-to-enum-tva_type.patch
mm-huge_memory-respect-madv_collapse-with-pr_thp_disable_except_advised.patch
The quilt patch titled
Subject: mm/damon/core: fix damos_commit_filter not changing allow
has been removed from the -mm tree. Its filename was
mm-damon-core-fix-damos_commit_filter-not-changing-allow.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Sang-Heon Jeon <ekffu200098(a)gmail.com>
Subject: mm/damon/core: fix damos_commit_filter not changing allow
Date: Sat, 16 Aug 2025 10:51:16 +0900
Current damos_commit_filter() does not persist the `allow' value of the
filter. As a result, changing the `allow' value of a filter and
committing doesn't change the `allow' value.
Add the missing `allow' value update, so committing the filter
persistently changes the `allow' value well.
Link: https://lkml.kernel.org/r/20250816015116.194589-1-ekffu200098@gmail.com
Fixes: fe6d7fdd6249 ("mm/damon/core: add damos_filter->allow field")
Signed-off-by: Sang-Heon Jeon <ekffu200098(a)gmail.com>
Reviewed-by: SeongJae Park <sj(a)kernel.org>
Cc: <stable(a)vger.kernel.org> [6.14.x]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/damon/core.c | 1 +
1 file changed, 1 insertion(+)
--- a/mm/damon/core.c~mm-damon-core-fix-damos_commit_filter-not-changing-allow
+++ a/mm/damon/core.c
@@ -883,6 +883,7 @@ static void damos_commit_filter(
{
dst->type = src->type;
dst->matching = src->matching;
+ dst->allow = src->allow;
damos_commit_filter_arg(dst, src);
}
_
Patches currently in -mm which might be from ekffu200098(a)gmail.com are
mm-damon-core-set-quota-charged_from-to-jiffies-at-first-charge-window.patch
mm-damon-update-expired-description-of-damos_action.patch
docs-mm-damon-design-fix-typo-s-sz_trtied-sz_tried.patch
selftests-damon-test-no-op-commit-broke-damon-status.patch
selftests-damon-test-no-op-commit-broke-damon-status-fix.patch
mm-damon-tests-core-kunit-add-damos_commit_filter-test.patch
The quilt patch titled
Subject: mm/memory-failure: fix infinite UCE for VM_PFNMAP pfn
has been removed from the -mm tree. Its filename was
mm-memory-failure-fix-infinite-uce-for-vm_pfnmap-pfn.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Jinjiang Tu <tujinjiang(a)huawei.com>
Subject: mm/memory-failure: fix infinite UCE for VM_PFNMAP pfn
Date: Fri, 15 Aug 2025 15:32:09 +0800
When memory_failure() is called for a already hwpoisoned pfn,
kill_accessing_process() will be called to kill current task. However, if
the vma of the accessing vaddr is VM_PFNMAP, walk_page_range() will skip
the vma in walk_page_test() and return 0.
Before commit aaf99ac2ceb7 ("mm/hwpoison: do not send SIGBUS to processes
with recovered clean pages"), kill_accessing_process() will return EFAULT.
For x86, the current task will be killed in kill_me_maybe().
However, after this commit, kill_accessing_process() simplies return 0,
that means UCE is handled properly, but it doesn't actually. In such
case, the user task will trigger UCE infinitely.
To fix it, add .test_walk callback for hwpoison_walk_ops to scan all vmas.
Link: https://lkml.kernel.org/r/20250815073209.1984582-1-tujinjiang@huawei.com
Fixes: aaf99ac2ceb7 ("mm/hwpoison: do not send SIGBUS to processes with recovered clean pages")
Signed-off-by: Jinjiang Tu <tujinjiang(a)huawei.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Acked-by: Miaohe Lin <linmiaohe(a)huawei.com>
Reviewed-by: Jane Chu <jane.chu(a)oracle.com>
Cc: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Cc: Naoya Horiguchi <nao.horiguchi(a)gmail.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Shuai Xue <xueshuai(a)linux.alibaba.com>
Cc: Zi Yan <ziy(a)nvidia.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memory-failure.c | 8 ++++++++
1 file changed, 8 insertions(+)
--- a/mm/memory-failure.c~mm-memory-failure-fix-infinite-uce-for-vm_pfnmap-pfn
+++ a/mm/memory-failure.c
@@ -853,9 +853,17 @@ static int hwpoison_hugetlb_range(pte_t
#define hwpoison_hugetlb_range NULL
#endif
+static int hwpoison_test_walk(unsigned long start, unsigned long end,
+ struct mm_walk *walk)
+{
+ /* We also want to consider pages mapped into VM_PFNMAP. */
+ return 0;
+}
+
static const struct mm_walk_ops hwpoison_walk_ops = {
.pmd_entry = hwpoison_pte_range,
.hugetlb_entry = hwpoison_hugetlb_range,
+ .test_walk = hwpoison_test_walk,
.walk_lock = PGWALK_RDLOCK,
};
_
Patches currently in -mm which might be from tujinjiang(a)huawei.com are
mm-memory_hotplug-fix-hwpoisoned-large-folio-handling-in-do_migrate_range.patch
The quilt patch titled
Subject: iov_iter: iterate_folioq: fix handling of offset >= folio size
has been removed from the -mm tree. Its filename was
iov_iter-iterate_folioq-fix-handling-of-offset-=-folio-size.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Dominique Martinet <asmadeus(a)codewreck.org>
Subject: iov_iter: iterate_folioq: fix handling of offset >= folio size
Date: Wed, 13 Aug 2025 15:04:55 +0900
It's apparently possible to get an iov advanced all the way up to the end
of the current page we're looking at, e.g.
(gdb) p *iter
$24 = {iter_type = 4 '\004', nofault = false, data_source = false, iov_offset = 4096, {__ubuf_iovec = {
iov_base = 0xffff88800f5bc000, iov_len = 655}, {{__iov = 0xffff88800f5bc000, kvec = 0xffff88800f5bc000,
bvec = 0xffff88800f5bc000, folioq = 0xffff88800f5bc000, xarray = 0xffff88800f5bc000,
ubuf = 0xffff88800f5bc000}, count = 655}}, {nr_segs = 2, folioq_slot = 2 '\002', xarray_start = 2}}
Where iov_offset is 4k with 4k-sized folios
This should have been fine because we're only in the 2nd slot and there's
another one after this, but iterate_folioq should not try to map a folio
that skips the whole size, and more importantly part here does not end up
zero (because 'PAGE_SIZE - skip % PAGE_SIZE' ends up PAGE_SIZE and not
zero..), so skip forward to the "advance to next folio" code
Link: https://lkml.kernel.org/r/20250813-iot_iter_folio-v3-0-a0ffad2b665a@codewre…
Link: https://lkml.kernel.org/r/20250813-iot_iter_folio-v3-1-a0ffad2b665a@codewre…
Signed-off-by: Dominique Martinet <asmadeus(a)codewreck.org>
Fixes: db0aa2e9566f ("mm: Define struct folio_queue and ITER_FOLIOQ to handle a sequence of folios")
Reported-by: Maximilian Bosch <maximilian(a)mbosch.me>
Reported-by: Ryan Lahfa <ryan(a)lahfa.xyz>
Reported-by: Christian Theune <ct(a)flyingcircus.io>
Reported-by: Arnout Engelen <arnout(a)bzzt.net>
Link: https://lkml.kernel.org/r/D4LHHUNLG79Y.12PI0X6BEHRHW@mbosch.me/
Acked-by: David Howells <dhowells(a)redhat.com>
Cc: Al Viro <viro(a)zeniv.linux.org.uk>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: <stable(a)vger.kernel.org> [6.12+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/iov_iter.h | 20 +++++++++++---------
1 file changed, 11 insertions(+), 9 deletions(-)
--- a/include/linux/iov_iter.h~iov_iter-iterate_folioq-fix-handling-of-offset-=-folio-size
+++ a/include/linux/iov_iter.h
@@ -160,7 +160,7 @@ size_t iterate_folioq(struct iov_iter *i
do {
struct folio *folio = folioq_folio(folioq, slot);
- size_t part, remain, consumed;
+ size_t part, remain = 0, consumed;
size_t fsize;
void *base;
@@ -168,14 +168,16 @@ size_t iterate_folioq(struct iov_iter *i
break;
fsize = folioq_folio_size(folioq, slot);
- base = kmap_local_folio(folio, skip);
- part = umin(len, PAGE_SIZE - skip % PAGE_SIZE);
- remain = step(base, progress, part, priv, priv2);
- kunmap_local(base);
- consumed = part - remain;
- len -= consumed;
- progress += consumed;
- skip += consumed;
+ if (skip < fsize) {
+ base = kmap_local_folio(folio, skip);
+ part = umin(len, PAGE_SIZE - skip % PAGE_SIZE);
+ remain = step(base, progress, part, priv, priv2);
+ kunmap_local(base);
+ consumed = part - remain;
+ len -= consumed;
+ progress += consumed;
+ skip += consumed;
+ }
if (skip >= fsize) {
skip = 0;
slot++;
_
Patches currently in -mm which might be from asmadeus(a)codewreck.org are
The quilt patch titled
Subject: mm/damon/core: fix commit_ops_filters by using correct nth function
has been removed from the -mm tree. Its filename was
mm-damon-core-fix-commit_ops_filters-by-using-correct-nth-function.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Sang-Heon Jeon <ekffu200098(a)gmail.com>
Subject: mm/damon/core: fix commit_ops_filters by using correct nth function
Date: Sun, 10 Aug 2025 21:42:01 +0900
damos_commit_ops_filters() incorrectly uses damos_nth_filter() which
iterates core_filters. As a result, performing a commit unintentionally
corrupts ops_filters.
Add damos_nth_ops_filter() which iterates ops_filters. Use this function
to fix issues caused by wrong iteration.
Link: https://lkml.kernel.org/r/20250810124201.15743-1-ekffu200098@gmail.com
Fixes: 3607cc590f18 ("mm/damon/core: support committing ops_filters") # 6.15.x
Signed-off-by: Sang-Heon Jeon <ekffu200098(a)gmail.com>
Reviewed-by: SeongJae Park <sj(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/damon/core.c | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)
--- a/mm/damon/core.c~mm-damon-core-fix-commit_ops_filters-by-using-correct-nth-function
+++ a/mm/damon/core.c
@@ -845,6 +845,18 @@ static struct damos_filter *damos_nth_fi
return NULL;
}
+static struct damos_filter *damos_nth_ops_filter(int n, struct damos *s)
+{
+ struct damos_filter *filter;
+ int i = 0;
+
+ damos_for_each_ops_filter(filter, s) {
+ if (i++ == n)
+ return filter;
+ }
+ return NULL;
+}
+
static void damos_commit_filter_arg(
struct damos_filter *dst, struct damos_filter *src)
{
@@ -908,7 +920,7 @@ static int damos_commit_ops_filters(stru
int i = 0, j = 0;
damos_for_each_ops_filter_safe(dst_filter, next, dst) {
- src_filter = damos_nth_filter(i++, src);
+ src_filter = damos_nth_ops_filter(i++, src);
if (src_filter)
damos_commit_filter(dst_filter, src_filter);
else
_
Patches currently in -mm which might be from ekffu200098(a)gmail.com are
mm-damon-core-set-quota-charged_from-to-jiffies-at-first-charge-window.patch
mm-damon-update-expired-description-of-damos_action.patch
docs-mm-damon-design-fix-typo-s-sz_trtied-sz_tried.patch
selftests-damon-test-no-op-commit-broke-damon-status.patch
selftests-damon-test-no-op-commit-broke-damon-status-fix.patch
mm-damon-tests-core-kunit-add-damos_commit_filter-test.patch
The quilt patch titled
Subject: mm/debug_vm_pgtable: clear page table entries at destroy_args()
has been removed from the -mm tree. Its filename was
mm-debug_vm_pgtable-clear-page-table-entries-at-destroy_args.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: "Herton R. Krzesinski" <herton(a)redhat.com>
Subject: mm/debug_vm_pgtable: clear page table entries at destroy_args()
Date: Thu, 31 Jul 2025 18:40:51 -0300
The mm/debug_vm_pagetable test allocates manually page table entries for
the tests it runs, using also its manually allocated mm_struct. That in
itself is ok, but when it exits, at destroy_args() it fails to clear those
entries with the *_clear functions.
The problem is that leaves stale entries. If another process allocates an
mm_struct with a pgd at the same address, it may end up running into the
stale entry. This is happening in practice on a debug kernel with
CONFIG_DEBUG_VM_PGTABLE=y, for example this is the output with some extra
debugging I added (it prints a warning trace if pgtables_bytes goes
negative, in addition to the warning at check_mm() function):
[ 2.539353] debug_vm_pgtable: [get_random_vaddr ]: random_vaddr is 0x7ea247140000
[ 2.539366] kmem_cache info
[ 2.539374] kmem_cachep 0x000000002ce82385 - freelist 0x0000000000000000 - offset 0x508
[ 2.539447] debug_vm_pgtable: [init_args ]: args->mm is 0x000000002267cc9e
(...)
[ 2.552800] WARNING: CPU: 5 PID: 116 at include/linux/mm.h:2841 free_pud_range+0x8bc/0x8d0
[ 2.552816] Modules linked in:
[ 2.552843] CPU: 5 UID: 0 PID: 116 Comm: modprobe Not tainted 6.12.0-105.debug_vm2.el10.ppc64le+debug #1 VOLUNTARY
[ 2.552859] Hardware name: IBM,9009-41A POWER9 (architected) 0x4e0202 0xf000005 of:IBM,FW910.00 (VL910_062) hv:phyp pSeries
[ 2.552872] NIP: c0000000007eef3c LR: c0000000007eef30 CTR: c0000000003d8c90
[ 2.552885] REGS: c0000000622e73b0 TRAP: 0700 Not tainted (6.12.0-105.debug_vm2.el10.ppc64le+debug)
[ 2.552899] MSR: 800000000282b033 <SF,VEC,VSX,EE,FP,ME,IR,DR,RI,LE> CR: 24002822 XER: 0000000a
[ 2.552954] CFAR: c0000000008f03f0 IRQMASK: 0
[ 2.552954] GPR00: c0000000007eef30 c0000000622e7650 c000000002b1ac00 0000000000000001
[ 2.552954] GPR04: 0000000000000008 0000000000000000 c0000000007eef30 ffffffffffffffff
[ 2.552954] GPR08: 00000000ffff00f5 0000000000000001 0000000000000048 0000000000004000
[ 2.552954] GPR12: 00000003fa440000 c000000017ffa300 c0000000051d9f80 ffffffffffffffdb
[ 2.552954] GPR16: 0000000000000000 0000000000000008 000000000000000a 60000000000000e0
[ 2.552954] GPR20: 4080000000000000 c0000000113af038 00007fffcf130000 0000700000000000
[ 2.552954] GPR24: c000000062a6a000 0000000000000001 8000000062a68000 0000000000000001
[ 2.552954] GPR28: 000000000000000a c000000062ebc600 0000000000002000 c000000062ebc760
[ 2.553170] NIP [c0000000007eef3c] free_pud_range+0x8bc/0x8d0
[ 2.553185] LR [c0000000007eef30] free_pud_range+0x8b0/0x8d0
[ 2.553199] Call Trace:
[ 2.553207] [c0000000622e7650] [c0000000007eef30] free_pud_range+0x8b0/0x8d0 (unreliable)
[ 2.553229] [c0000000622e7750] [c0000000007f40b4] free_pgd_range+0x284/0x3b0
[ 2.553248] [c0000000622e7800] [c0000000007f4630] free_pgtables+0x450/0x570
[ 2.553274] [c0000000622e78e0] [c0000000008161c0] exit_mmap+0x250/0x650
[ 2.553292] [c0000000622e7a30] [c0000000001b95b8] __mmput+0x98/0x290
[ 2.558344] [c0000000622e7a80] [c0000000001d1018] exit_mm+0x118/0x1b0
[ 2.558361] [c0000000622e7ac0] [c0000000001d141c] do_exit+0x2ec/0x870
[ 2.558376] [c0000000622e7b60] [c0000000001d1ca8] do_group_exit+0x88/0x150
[ 2.558391] [c0000000622e7bb0] [c0000000001d1db8] sys_exit_group+0x48/0x50
[ 2.558407] [c0000000622e7be0] [c00000000003d810] system_call_exception+0x1e0/0x4c0
[ 2.558423] [c0000000622e7e50] [c00000000000d05c] system_call_vectored_common+0x15c/0x2ec
(...)
[ 2.558892] ---[ end trace 0000000000000000 ]---
[ 2.559022] BUG: Bad rss-counter state mm:000000002267cc9e type:MM_ANONPAGES val:1
[ 2.559037] BUG: non-zero pgtables_bytes on freeing mm: -6144
Here the modprobe process ended up with an allocated mm_struct from the
mm_struct slab that was used before by the debug_vm_pgtable test. That is
not a problem, since the mm_struct is initialized again etc., however, if
it ends up using the same pgd table, it bumps into the old stale entry
when clearing/freeing the page table entries, so it tries to free an entry
already gone (that one which was allocated by the debug_vm_pgtable test),
which also explains the negative pgtables_bytes since it's accounting for
not allocated entries in the current process.
As far as I looked pgd_{alloc,free} etc. does not clear entries, and
clearing of the entries is explicitly done in the free_pgtables->
free_pgd_range->free_p4d_range->free_pud_range->free_pmd_range->
free_pte_range path. However, the debug_vm_pgtable test does not call
free_pgtables, since it allocates mm_struct and entries manually for its
test and eg. not goes through page faults. So it also should clear
manually the entries before exit at destroy_args().
This problem was noticed on a reboot X number of times test being done on
a powerpc host, with a debug kernel with CONFIG_DEBUG_VM_PGTABLE enabled.
Depends on the system, but on a 100 times reboot loop the problem could
manifest once or twice, if a process ends up getting the right mm->pgd
entry with the stale entries used by mm/debug_vm_pagetable. After using
this patch, I couldn't reproduce/experience the problems anymore. I was
able to reproduce the problem as well on latest upstream kernel (6.16).
I also modified destroy_args() to use mmput() instead of mmdrop(), there
is no reason to hold mm_users reference and not release the mm_struct
entirely, and in the output above with my debugging prints I already had
patched it to use mmput, it did not fix the problem, but helped in the
debugging as well.
Link: https://lkml.kernel.org/r/20250731214051.4115182-1-herton@redhat.com
Fixes: 3c9b84f044a9 ("mm/debug_vm_pgtable: introduce struct pgtable_debug_args")
Signed-off-by: Herton R. Krzesinski <herton(a)redhat.com>
Cc: Anshuman Khandual <anshuman.khandual(a)arm.com>
Cc: Christophe Leroy <christophe.leroy(a)csgroup.eu>
Cc: Gavin Shan <gshan(a)redhat.com>
Cc: Gerald Schaefer <gerald.schaefer(a)linux.ibm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/debug_vm_pgtable.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
--- a/mm/debug_vm_pgtable.c~mm-debug_vm_pgtable-clear-page-table-entries-at-destroy_args
+++ a/mm/debug_vm_pgtable.c
@@ -990,29 +990,34 @@ static void __init destroy_args(struct p
/* Free page table entries */
if (args->start_ptep) {
+ pmd_clear(args->pmdp);
pte_free(args->mm, args->start_ptep);
mm_dec_nr_ptes(args->mm);
}
if (args->start_pmdp) {
+ pud_clear(args->pudp);
pmd_free(args->mm, args->start_pmdp);
mm_dec_nr_pmds(args->mm);
}
if (args->start_pudp) {
+ p4d_clear(args->p4dp);
pud_free(args->mm, args->start_pudp);
mm_dec_nr_puds(args->mm);
}
- if (args->start_p4dp)
+ if (args->start_p4dp) {
+ pgd_clear(args->pgdp);
p4d_free(args->mm, args->start_p4dp);
+ }
/* Free vma and mm struct */
if (args->vma)
vm_area_free(args->vma);
if (args->mm)
- mmdrop(args->mm);
+ mmput(args->mm);
}
static struct page * __init
_
Patches currently in -mm which might be from herton(a)redhat.com are
The quilt patch titled
Subject: squashfs: fix memory leak in squashfs_fill_super
has been removed from the -mm tree. Its filename was
squashfs-fix-memory-leak-in-squashfs_fill_super.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Phillip Lougher <phillip(a)squashfs.org.uk>
Subject: squashfs: fix memory leak in squashfs_fill_super
Date: Mon, 11 Aug 2025 23:37:40 +0100
If sb_min_blocksize returns 0, squashfs_fill_super exits without freeing
allocated memory (sb->s_fs_info).
Fix this by moving the call to sb_min_blocksize to before memory is
allocated.
Link: https://lkml.kernel.org/r/20250811223740.110392-1-phillip@squashfs.org.uk
Fixes: 734aa85390ea ("Squashfs: check return result of sb_min_blocksize")
Signed-off-by: Phillip Lougher <phillip(a)squashfs.org.uk>
Reported-by: Scott GUO <scottzhguo(a)tencent.com>
Closes: https://lore.kernel.org/all/20250811061921.3807353-1-scott_gzh@163.com
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/squashfs/super.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
--- a/fs/squashfs/super.c~squashfs-fix-memory-leak-in-squashfs_fill_super
+++ a/fs/squashfs/super.c
@@ -187,10 +187,15 @@ static int squashfs_fill_super(struct su
unsigned short flags;
unsigned int fragments;
u64 lookup_table_start, xattr_id_table_start, next_table;
- int err;
+ int err, devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE);
TRACE("Entered squashfs_fill_superblock\n");
+ if (!devblksize) {
+ errorf(fc, "squashfs: unable to set blocksize\n");
+ return -EINVAL;
+ }
+
sb->s_fs_info = kzalloc(sizeof(*msblk), GFP_KERNEL);
if (sb->s_fs_info == NULL) {
ERROR("Failed to allocate squashfs_sb_info\n");
@@ -201,12 +206,7 @@ static int squashfs_fill_super(struct su
msblk->panic_on_errors = (opts->errors == Opt_errors_panic);
- msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE);
- if (!msblk->devblksize) {
- errorf(fc, "squashfs: unable to set blocksize\n");
- return -EINVAL;
- }
-
+ msblk->devblksize = devblksize;
msblk->devblksize_log2 = ffz(~msblk->devblksize);
mutex_init(&msblk->meta_index_mutex);
_
Patches currently in -mm which might be from phillip(a)squashfs.org.uk are
The quilt patch titled
Subject: kho: warn if KHO is disabled due to an error
has been removed from the -mm tree. Its filename was
kho-warn-if-kho-is-disabled-due-to-an-error.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Pasha Tatashin <pasha.tatashin(a)soleen.com>
Subject: kho: warn if KHO is disabled due to an error
Date: Fri, 8 Aug 2025 20:18:04 +0000
During boot scratch area is allocated based on command line parameters or
auto calculated. However, scratch area may fail to allocate, and in that
case KHO is disabled. Currently, no warning is printed that KHO is
disabled, which makes it confusing for the end user to figure out why KHO
is not available. Add the missing warning message.
Link: https://lkml.kernel.org/r/20250808201804.772010-4-pasha.tatashin@soleen.com
Signed-off-by: Pasha Tatashin <pasha.tatashin(a)soleen.com>
Acked-by: Mike Rapoport (Microsoft) <rppt(a)kernel.org>
Acked-by: Pratyush Yadav <pratyush(a)kernel.org>
Cc: Alexander Graf <graf(a)amazon.com>
Cc: Arnd Bergmann <arnd(a)arndb.de>
Cc: Baoquan He <bhe(a)redhat.com>
Cc: Changyuan Lyu <changyuanl(a)google.com>
Cc: Coiby Xu <coxu(a)redhat.com>
Cc: Dave Vasilevsky <dave(a)vasilevsky.ca>
Cc: Eric Biggers <ebiggers(a)google.com>
Cc: Kees Cook <kees(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/kexec_handover.c | 1 +
1 file changed, 1 insertion(+)
--- a/kernel/kexec_handover.c~kho-warn-if-kho-is-disabled-due-to-an-error
+++ a/kernel/kexec_handover.c
@@ -564,6 +564,7 @@ err_free_scratch_areas:
err_free_scratch_desc:
memblock_free(kho_scratch, kho_scratch_cnt * sizeof(*kho_scratch));
err_disable_kho:
+ pr_warn("Failed to reserve scratch area, disabling kexec handover\n");
kho_enable = false;
}
_
Patches currently in -mm which might be from pasha.tatashin(a)soleen.com are
The quilt patch titled
Subject: kho: mm: don't allow deferred struct page with KHO
has been removed from the -mm tree. Its filename was
kho-mm-dont-allow-deferred-struct-page-with-kho.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Pasha Tatashin <pasha.tatashin(a)soleen.com>
Subject: kho: mm: don't allow deferred struct page with KHO
Date: Fri, 8 Aug 2025 20:18:03 +0000
KHO uses struct pages for the preserved memory early in boot, however,
with deferred struct page initialization, only a small portion of memory
has properly initialized struct pages.
This problem was detected where vmemmap is poisoned, and illegal flag
combinations are detected.
Don't allow them to be enabled together, and later we will have to teach
KHO to work properly with deferred struct page init kernel feature.
Link: https://lkml.kernel.org/r/20250808201804.772010-3-pasha.tatashin@soleen.com
Fixes: 4e1d010e3bda ("kexec: add config option for KHO")
Signed-off-by: Pasha Tatashin <pasha.tatashin(a)soleen.com>
Acked-by: Mike Rapoport (Microsoft) <rppt(a)kernel.org>
Acked-by: Pratyush Yadav <pratyush(a)kernel.org>
Cc: Alexander Graf <graf(a)amazon.com>
Cc: Arnd Bergmann <arnd(a)arndb.de>
Cc: Baoquan He <bhe(a)redhat.com>
Cc: Changyuan Lyu <changyuanl(a)google.com>
Cc: Coiby Xu <coxu(a)redhat.com>
Cc: Dave Vasilevsky <dave(a)vasilevsky.ca>
Cc: Eric Biggers <ebiggers(a)google.com>
Cc: Kees Cook <kees(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/Kconfig.kexec | 1 +
1 file changed, 1 insertion(+)
--- a/kernel/Kconfig.kexec~kho-mm-dont-allow-deferred-struct-page-with-kho
+++ a/kernel/Kconfig.kexec
@@ -97,6 +97,7 @@ config KEXEC_JUMP
config KEXEC_HANDOVER
bool "kexec handover"
depends on ARCH_SUPPORTS_KEXEC_HANDOVER && ARCH_SUPPORTS_KEXEC_FILE
+ depends on !DEFERRED_STRUCT_PAGE_INIT
select MEMBLOCK_KHO_SCRATCH
select KEXEC_FILE
select DEBUG_FS
_
Patches currently in -mm which might be from pasha.tatashin(a)soleen.com are
The patch titled
Subject: x86/mm/64: define ARCH_PAGE_TABLE_SYNC_MASK and arch_sync_kernel_mappings()
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
x86-mm-64-define-arch_page_table_sync_mask-and-arch_sync_kernel_mappings.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Harry Yoo <harry.yoo(a)oracle.com>
Subject: x86/mm/64: define ARCH_PAGE_TABLE_SYNC_MASK and arch_sync_kernel_mappings()
Date: Mon, 18 Aug 2025 11:02:06 +0900
Define ARCH_PAGE_TABLE_SYNC_MASK and arch_sync_kernel_mappings() to ensure
page tables are properly synchronized when calling p*d_populate_kernel().
For 5-level paging, synchronization is performed via
pgd_populate_kernel(). In 4-level paging, pgd_populate() is a no-op, so
synchronization is instead performed at the P4D level via
p4d_populate_kernel().
This fixes intermittent boot failures on systems using 4-level paging and
a large amount of persistent memory:
BUG: unable to handle page fault for address: ffffe70000000034
#PF: supervisor write access in kernel mode
#PF: error_code(0x0002) - not-present page
PGD 0 P4D 0
Oops: 0002 [#1] SMP NOPTI
RIP: 0010:__init_single_page+0x9/0x6d
Call Trace:
<TASK>
__init_zone_device_page+0x17/0x5d
memmap_init_zone_device+0x154/0x1bb
pagemap_range+0x2e0/0x40f
memremap_pages+0x10b/0x2f0
devm_memremap_pages+0x1e/0x60
dev_dax_probe+0xce/0x2ec [device_dax]
dax_bus_probe+0x6d/0xc9
[... snip ...]
</TASK>
It also fixes a crash in vmemmap_set_pmd() caused by accessing vmemmap
before sync_global_pgds() [1]:
BUG: unable to handle page fault for address: ffffeb3ff1200000
#PF: supervisor write access in kernel mode
#PF: error_code(0x0002) - not-present page
PGD 0 P4D 0
Oops: Oops: 0002 [#1] PREEMPT SMP NOPTI
Tainted: [W]=WARN
RIP: 0010:vmemmap_set_pmd+0xff/0x230
<TASK>
vmemmap_populate_hugepages+0x176/0x180
vmemmap_populate+0x34/0x80
__populate_section_memmap+0x41/0x90
sparse_add_section+0x121/0x3e0
__add_pages+0xba/0x150
add_pages+0x1d/0x70
memremap_pages+0x3dc/0x810
devm_memremap_pages+0x1c/0x60
xe_devm_add+0x8b/0x100 [xe]
xe_tile_init_noalloc+0x6a/0x70 [xe]
xe_device_probe+0x48c/0x740 [xe]
[... snip ...]
Link: https://lkml.kernel.org/r/20250818020206.4517-4-harry.yoo@oracle.com
Fixes: 8d400913c231 ("x86/vmemmap: handle unpopulated sub-pmd ranges")
Signed-off-by: Harry Yoo <harry.yoo(a)oracle.com>
Closes: https://lore.kernel.org/linux-mm/20250311114420.240341-1-gwan-gyeong.mun@in… [1]
Suggested-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Acked-by: Kiryl Shutsemau <kas(a)kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt(a)kernel.org>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Alexander Potapenko <glider(a)google.com>
Cc: Alistair Popple <apopple(a)nvidia.com>
Cc: Andrey Konovalov <andreyknvl(a)gmail.com>
Cc: Andrey Ryabinin <ryabinin.a.a(a)gmail.com>
Cc: Andy Lutomirski <luto(a)kernel.org>
Cc: "Aneesh Kumar K.V" <aneesh.kumar(a)linux.ibm.com>
Cc: Anshuman Khandual <anshuman.khandual(a)arm.com>
Cc: Ard Biesheuvel <ardb(a)kernel.org>
Cc: Arnd Bergmann <arnd(a)arndb.de>
Cc: bibo mao <maobibo(a)loongson.cn>
Cc: Borislav Betkov <bp(a)alien8.de>
Cc: Christoph Lameter (Ampere) <cl(a)gentwo.org>
Cc: Dennis Zhou <dennis(a)kernel.org>
Cc: Dev Jain <dev.jain(a)arm.com>
Cc: Dmitriy Vyukov <dvyukov(a)google.com>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Jane Chu <jane.chu(a)oracle.com>
Cc: Joao Martins <joao.m.martins(a)oracle.com>
Cc: Joerg Roedel <joro(a)8bytes.org>
Cc: John Hubbard <jhubbard(a)nvidia.com>
Cc: Kevin Brodsky <kevin.brodsky(a)arm.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Peter Xu <peterx(a)redhat.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Qi Zheng <zhengqi.arch(a)bytedance.com>
Cc: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: Tejun Heo <tj(a)kernel.org>
Cc: Thomas Gleinxer <tglx(a)linutronix.de>
Cc: Thomas Huth <thuth(a)redhat.com>
Cc: "Uladzislau Rezki (Sony)" <urezki(a)gmail.com>
Cc: Vincenzo Frascino <vincenzo.frascino(a)arm.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/x86/include/asm/pgtable_64_types.h | 3 +++
arch/x86/mm/init_64.c | 18 ++++++++++++++++++
2 files changed, 21 insertions(+)
--- a/arch/x86/include/asm/pgtable_64_types.h~x86-mm-64-define-arch_page_table_sync_mask-and-arch_sync_kernel_mappings
+++ a/arch/x86/include/asm/pgtable_64_types.h
@@ -36,6 +36,9 @@ static inline bool pgtable_l5_enabled(vo
#define pgtable_l5_enabled() cpu_feature_enabled(X86_FEATURE_LA57)
#endif /* USE_EARLY_PGTABLE_L5 */
+#define ARCH_PAGE_TABLE_SYNC_MASK \
+ (pgtable_l5_enabled() ? PGTBL_PGD_MODIFIED : PGTBL_P4D_MODIFIED)
+
extern unsigned int pgdir_shift;
extern unsigned int ptrs_per_p4d;
--- a/arch/x86/mm/init_64.c~x86-mm-64-define-arch_page_table_sync_mask-and-arch_sync_kernel_mappings
+++ a/arch/x86/mm/init_64.c
@@ -224,6 +224,24 @@ static void sync_global_pgds(unsigned lo
}
/*
+ * Make kernel mappings visible in all page tables in the system.
+ * This is necessary except when the init task populates kernel mappings
+ * during the boot process. In that case, all processes originating from
+ * the init task copies the kernel mappings, so there is no issue.
+ * Otherwise, missing synchronization could lead to kernel crashes due
+ * to missing page table entries for certain kernel mappings.
+ *
+ * Synchronization is performed at the top level, which is the PGD in
+ * 5-level paging systems. But in 4-level paging systems, however,
+ * pgd_populate() is a no-op, so synchronization is done at the P4D level.
+ * sync_global_pgds() handles this difference between paging levels.
+ */
+void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
+{
+ sync_global_pgds(start, end);
+}
+
+/*
* NOTE: This function is marked __ref because it calls __init function
* (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
*/
_
Patches currently in -mm which might be from harry.yoo(a)oracle.com are
mm-move-page-table-sync-declarations-to-linux-pgtableh.patch
mm-introduce-and-use-pgdp4d_populate_kernel.patch
x86-mm-64-define-arch_page_table_sync_mask-and-arch_sync_kernel_mappings.patch
The patch titled
Subject: mm: introduce and use {pgd,p4d}_populate_kernel()
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-introduce-and-use-pgdp4d_populate_kernel.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Harry Yoo <harry.yoo(a)oracle.com>
Subject: mm: introduce and use {pgd,p4d}_populate_kernel()
Date: Mon, 18 Aug 2025 11:02:05 +0900
Introduce and use {pgd,p4d}_populate_kernel() in core MM code when
populating PGD and P4D entries for the kernel address space. These
helpers ensure proper synchronization of page tables when updating the
kernel portion of top-level page tables.
Until now, the kernel has relied on each architecture to handle
synchronization of top-level page tables in an ad-hoc manner. For
example, see commit 9b861528a801 ("x86-64, mem: Update all PGDs for direct
mapping and vmemmap mapping changes").
However, this approach has proven fragile for following reasons:
1) It is easy to forget to perform the necessary page table
synchronization when introducing new changes.
For instance, commit 4917f55b4ef9 ("mm/sparse-vmemmap: improve memory
savings for compound devmaps") overlooked the need to synchronize
page tables for the vmemmap area.
2) It is also easy to overlook that the vmemmap and direct mapping areas
must not be accessed before explicit page table synchronization.
For example, commit 8d400913c231 ("x86/vmemmap: handle unpopulated
sub-pmd ranges")) caused crashes by accessing the vmemmap area
before calling sync_global_pgds().
To address this, as suggested by Dave Hansen, introduce _kernel() variants
of the page table population helpers, which invoke architecture-specific
hooks to properly synchronize page tables. These are introduced in a new
header file, include/linux/pgalloc.h, so they can be called from common
code.
They reuse existing infrastructure for vmalloc and ioremap.
Synchronization requirements are determined by ARCH_PAGE_TABLE_SYNC_MASK,
and the actual synchronization is performed by
arch_sync_kernel_mappings().
This change currently targets only x86_64, so only PGD and P4D level
helpers are introduced. Currently, these helpers are no-ops since no
architecture sets PGTBL_{PGD,P4D}_MODIFIED in ARCH_PAGE_TABLE_SYNC_MASK.
In theory, PUD and PMD level helpers can be added later if needed by other
architectures. For now, 32-bit architectures (x86-32 and arm) only handle
PGTBL_PMD_MODIFIED, so p*d_populate_kernel() will never affect them unless
we introduce a PMD level helper.
Link: https://lkml.kernel.org/r/20250818020206.4517-3-harry.yoo@oracle.com
Fixes: 8d400913c231 ("x86/vmemmap: handle unpopulated sub-pmd ranges")
Signed-off-by: Harry Yoo <harry.yoo(a)oracle.com>
Suggested-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Acked-by: Kiryl Shutsemau <kas(a)kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt(a)kernel.org>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Alexander Potapenko <glider(a)google.com>
Cc: Alistair Popple <apopple(a)nvidia.com>
Cc: Andrey Konovalov <andreyknvl(a)gmail.com>
Cc: Andrey Ryabinin <ryabinin.a.a(a)gmail.com>
Cc: Andy Lutomirski <luto(a)kernel.org>
Cc: "Aneesh Kumar K.V" <aneesh.kumar(a)linux.ibm.com>
Cc: Anshuman Khandual <anshuman.khandual(a)arm.com>
Cc: Ard Biesheuvel <ardb(a)kernel.org>
Cc: Arnd Bergmann <arnd(a)arndb.de>
Cc: bibo mao <maobibo(a)loongson.cn>
Cc: Borislav Betkov <bp(a)alien8.de>
Cc: Christoph Lameter (Ampere) <cl(a)gentwo.org>
Cc: Dennis Zhou <dennis(a)kernel.org>
Cc: Dev Jain <dev.jain(a)arm.com>
Cc: Dmitriy Vyukov <dvyukov(a)google.com>
Cc: Gwan-gyeong Mun <gwan-gyeong.mun(a)intel.com>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Jane Chu <jane.chu(a)oracle.com>
Cc: Joao Martins <joao.m.martins(a)oracle.com>
Cc: Joerg Roedel <joro(a)8bytes.org>
Cc: John Hubbard <jhubbard(a)nvidia.com>
Cc: Kevin Brodsky <kevin.brodsky(a)arm.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Peter Xu <peterx(a)redhat.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Qi Zheng <zhengqi.arch(a)bytedance.com>
Cc: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: Tejun Heo <tj(a)kernel.org>
Cc: Thomas Gleinxer <tglx(a)linutronix.de>
Cc: Thomas Huth <thuth(a)redhat.com>
Cc: "Uladzislau Rezki (Sony)" <urezki(a)gmail.com>
Cc: Vincenzo Frascino <vincenzo.frascino(a)arm.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/pgalloc.h | 24 ++++++++++++++++++++++++
include/linux/pgtable.h | 13 +++++++------
mm/kasan/init.c | 12 ++++++------
mm/percpu.c | 6 +++---
mm/sparse-vmemmap.c | 6 +++---
5 files changed, 43 insertions(+), 18 deletions(-)
diff --git a/include/linux/pgalloc.h a/include/linux/pgalloc.h
new file mode 100644
--- /dev/null
+++ a/include/linux/pgalloc.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_PGALLOC_H
+#define _LINUX_PGALLOC_H
+
+#include <linux/pgtable.h>
+#include <asm/pgalloc.h>
+
+static inline void pgd_populate_kernel(unsigned long addr, pgd_t *pgd,
+ p4d_t *p4d)
+{
+ pgd_populate(&init_mm, pgd, p4d);
+ if (ARCH_PAGE_TABLE_SYNC_MASK & PGTBL_PGD_MODIFIED)
+ arch_sync_kernel_mappings(addr, addr);
+}
+
+static inline void p4d_populate_kernel(unsigned long addr, p4d_t *p4d,
+ pud_t *pud)
+{
+ p4d_populate(&init_mm, p4d, pud);
+ if (ARCH_PAGE_TABLE_SYNC_MASK & PGTBL_P4D_MODIFIED)
+ arch_sync_kernel_mappings(addr, addr);
+}
+
+#endif /* _LINUX_PGALLOC_H */
--- a/include/linux/pgtable.h~mm-introduce-and-use-pgdp4d_populate_kernel
+++ a/include/linux/pgtable.h
@@ -1469,8 +1469,8 @@ static inline void modify_prot_commit_pt
/*
* Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values
- * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings()
- * needs to be called.
+ * and let generic vmalloc, ioremap and page table update code know when
+ * arch_sync_kernel_mappings() needs to be called.
*/
#ifndef ARCH_PAGE_TABLE_SYNC_MASK
#define ARCH_PAGE_TABLE_SYNC_MASK 0
@@ -1954,10 +1954,11 @@ static inline bool arch_has_pfn_modify_c
/*
* Page Table Modification bits for pgtbl_mod_mask.
*
- * These are used by the p?d_alloc_track*() set of functions an in the generic
- * vmalloc/ioremap code to track at which page-table levels entries have been
- * modified. Based on that the code can better decide when vmalloc and ioremap
- * mapping changes need to be synchronized to other page-tables in the system.
+ * These are used by the p?d_alloc_track*() and p*d_populate_kernel()
+ * functions in the generic vmalloc, ioremap and page table update code
+ * to track at which page-table levels entries have been modified.
+ * Based on that the code can better decide when page table changes need
+ * to be synchronized to other page-tables in the system.
*/
#define __PGTBL_PGD_MODIFIED 0
#define __PGTBL_P4D_MODIFIED 1
--- a/mm/kasan/init.c~mm-introduce-and-use-pgdp4d_populate_kernel
+++ a/mm/kasan/init.c
@@ -13,9 +13,9 @@
#include <linux/mm.h>
#include <linux/pfn.h>
#include <linux/slab.h>
+#include <linux/pgalloc.h>
#include <asm/page.h>
-#include <asm/pgalloc.h>
#include "kasan.h"
@@ -191,7 +191,7 @@ static int __ref zero_p4d_populate(pgd_t
pud_t *pud;
pmd_t *pmd;
- p4d_populate(&init_mm, p4d,
+ p4d_populate_kernel(addr, p4d,
lm_alias(kasan_early_shadow_pud));
pud = pud_offset(p4d, addr);
pud_populate(&init_mm, pud,
@@ -212,7 +212,7 @@ static int __ref zero_p4d_populate(pgd_t
} else {
p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
pud_init(p);
- p4d_populate(&init_mm, p4d, p);
+ p4d_populate_kernel(addr, p4d, p);
}
}
zero_pud_populate(p4d, addr, next);
@@ -251,10 +251,10 @@ int __ref kasan_populate_early_shadow(co
* puds,pmds, so pgd_populate(), pud_populate()
* is noops.
*/
- pgd_populate(&init_mm, pgd,
+ pgd_populate_kernel(addr, pgd,
lm_alias(kasan_early_shadow_p4d));
p4d = p4d_offset(pgd, addr);
- p4d_populate(&init_mm, p4d,
+ p4d_populate_kernel(addr, p4d,
lm_alias(kasan_early_shadow_pud));
pud = pud_offset(p4d, addr);
pud_populate(&init_mm, pud,
@@ -273,7 +273,7 @@ int __ref kasan_populate_early_shadow(co
if (!p)
return -ENOMEM;
} else {
- pgd_populate(&init_mm, pgd,
+ pgd_populate_kernel(addr, pgd,
early_alloc(PAGE_SIZE, NUMA_NO_NODE));
}
}
--- a/mm/percpu.c~mm-introduce-and-use-pgdp4d_populate_kernel
+++ a/mm/percpu.c
@@ -3108,7 +3108,7 @@ out_free:
#endif /* BUILD_EMBED_FIRST_CHUNK */
#ifdef BUILD_PAGE_FIRST_CHUNK
-#include <asm/pgalloc.h>
+#include <linux/pgalloc.h>
#ifndef P4D_TABLE_SIZE
#define P4D_TABLE_SIZE PAGE_SIZE
@@ -3134,13 +3134,13 @@ void __init __weak pcpu_populate_pte(uns
if (pgd_none(*pgd)) {
p4d = memblock_alloc_or_panic(P4D_TABLE_SIZE, P4D_TABLE_SIZE);
- pgd_populate(&init_mm, pgd, p4d);
+ pgd_populate_kernel(addr, pgd, p4d);
}
p4d = p4d_offset(pgd, addr);
if (p4d_none(*p4d)) {
pud = memblock_alloc_or_panic(PUD_TABLE_SIZE, PUD_TABLE_SIZE);
- p4d_populate(&init_mm, p4d, pud);
+ p4d_populate_kernel(addr, p4d, pud);
}
pud = pud_offset(p4d, addr);
--- a/mm/sparse-vmemmap.c~mm-introduce-and-use-pgdp4d_populate_kernel
+++ a/mm/sparse-vmemmap.c
@@ -27,9 +27,9 @@
#include <linux/spinlock.h>
#include <linux/vmalloc.h>
#include <linux/sched.h>
+#include <linux/pgalloc.h>
#include <asm/dma.h>
-#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include "hugetlb_vmemmap.h"
@@ -229,7 +229,7 @@ p4d_t * __meminit vmemmap_p4d_populate(p
if (!p)
return NULL;
pud_init(p);
- p4d_populate(&init_mm, p4d, p);
+ p4d_populate_kernel(addr, p4d, p);
}
return p4d;
}
@@ -241,7 +241,7 @@ pgd_t * __meminit vmemmap_pgd_populate(u
void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
if (!p)
return NULL;
- pgd_populate(&init_mm, pgd, p);
+ pgd_populate_kernel(addr, pgd, p);
}
return pgd;
}
_
Patches currently in -mm which might be from harry.yoo(a)oracle.com are
mm-move-page-table-sync-declarations-to-linux-pgtableh.patch
mm-introduce-and-use-pgdp4d_populate_kernel.patch
x86-mm-64-define-arch_page_table_sync_mask-and-arch_sync_kernel_mappings.patch
The patch titled
Subject: mm: move page table sync declarations to linux/pgtable.h
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-move-page-table-sync-declarations-to-linux-pgtableh.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Harry Yoo <harry.yoo(a)oracle.com>
Subject: mm: move page table sync declarations to linux/pgtable.h
Date: Mon, 18 Aug 2025 11:02:04 +0900
During our internal testing, we started observing intermittent boot
failures when the machine uses 4-level paging and has a large amount of
persistent memory:
BUG: unable to handle page fault for address: ffffe70000000034
#PF: supervisor write access in kernel mode
#PF: error_code(0x0002) - not-present page
PGD 0 P4D 0
Oops: 0002 [#1] SMP NOPTI
RIP: 0010:__init_single_page+0x9/0x6d
Call Trace:
<TASK>
__init_zone_device_page+0x17/0x5d
memmap_init_zone_device+0x154/0x1bb
pagemap_range+0x2e0/0x40f
memremap_pages+0x10b/0x2f0
devm_memremap_pages+0x1e/0x60
dev_dax_probe+0xce/0x2ec [device_dax]
dax_bus_probe+0x6d/0xc9
[... snip ...]
</TASK>
It turns out that the kernel panics while initializing vmemmap (struct
page array) when the vmemmap region spans two PGD entries, because the new
PGD entry is only installed in init_mm.pgd, but not in the page tables of
other tasks.
And looking at __populate_section_memmap():
if (vmemmap_can_optimize(altmap, pgmap))
// does not sync top level page tables
r = vmemmap_populate_compound_pages(pfn, start, end, nid, pgmap);
else
// sync top level page tables in x86
r = vmemmap_populate(start, end, nid, altmap);
In the normal path, vmemmap_populate() in arch/x86/mm/init_64.c
synchronizes the top level page table (See commit 9b861528a801 ("x86-64,
mem: Update all PGDs for direct mapping and vmemmap mapping changes")) so
that all tasks in the system can see the new vmemmap area.
However, when vmemmap_can_optimize() returns true, the optimized path
skips synchronization of top-level page tables. This is because
vmemmap_populate_compound_pages() is implemented in core MM code, which
does not handle synchronization of the top-level page tables. Instead,
the core MM has historically relied on each architecture to perform this
synchronization manually.
We're not the first party to encounter a crash caused by not-sync'd top
level page tables: earlier this year, Gwan-gyeong Mun attempted to address
the issue [1] [2] after hitting a kernel panic when x86 code accessed the
vmemmap area before the corresponding top-level entries were synced. At
that time, the issue was believed to be triggered only when struct page
was enlarged for debugging purposes, and the patch did not get further
updates.
It turns out that current approach of relying on each arch to handle the
page table sync manually is fragile because 1) it's easy to forget to sync
the top level page table, and 2) it's also easy to overlook that the
kernel should not access the vmemmap and direct mapping areas before the
sync.
# The solution: Make page table sync more code robust and harder to miss
To address this, Dave Hansen suggested [3] [4] introducing
{pgd,p4d}_populate_kernel() for updating kernel portion of the page tables
and allow each architecture to explicitly perform synchronization when
installing top-level entries. With this approach, we no longer need to
worry about missing the sync step, reducing the risk of future
regressions.
The new interface reuses existing ARCH_PAGE_TABLE_SYNC_MASK,
PGTBL_P*D_MODIFIED and arch_sync_kernel_mappings() facility used by
vmalloc and ioremap to synchronize page tables.
pgd_populate_kernel() looks like this:
static inline void pgd_populate_kernel(unsigned long addr, pgd_t *pgd,
p4d_t *p4d)
{
pgd_populate(&init_mm, pgd, p4d);
if (ARCH_PAGE_TABLE_SYNC_MASK & PGTBL_PGD_MODIFIED)
arch_sync_kernel_mappings(addr, addr);
}
It is worth noting that vmalloc() and apply_to_range() carefully
synchronizes page tables by calling p*d_alloc_track() and
arch_sync_kernel_mappings(), and thus they are not affected by this patch
series.
This series was hugely inspired by Dave Hansen's suggestion and hence
added Suggested-by: Dave Hansen.
Cc stable because lack of this series opens the door to intermittent
boot failures.
This patch (of 3):
Move ARCH_PAGE_TABLE_SYNC_MASK and arch_sync_kernel_mappings() to
linux/pgtable.h so that they can be used outside of vmalloc and ioremap.
Link: https://lkml.kernel.org/r/20250818020206.4517-1-harry.yoo@oracle.com
Link: https://lkml.kernel.org/r/20250818020206.4517-2-harry.yoo@oracle.com
Link: https://lore.kernel.org/linux-mm/20250220064105.808339-1-gwan-gyeong.mun@in… [1]
Link: https://lore.kernel.org/linux-mm/20250311114420.240341-1-gwan-gyeong.mun@in… [2]
Link: https://lore.kernel.org/linux-mm/d1da214c-53d3-45ac-a8b6-51821c5416e4@intel… [3]
Link: https://lore.kernel.org/linux-mm/4d800744-7b88-41aa-9979-b245e8bf794b@intel… [4]
Fixes: 8d400913c231 ("x86/vmemmap: handle unpopulated sub-pmd ranges")
Signed-off-by: Harry Yoo <harry.yoo(a)oracle.com>
Acked-by: Kiryl Shutsemau <kas(a)kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt(a)kernel.org>
Reviewed-by: "Uladzislau Rezki (Sony)" <urezki(a)gmail.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Alexander Potapenko <glider(a)google.com>
Cc: Alistair Popple <apopple(a)nvidia.com>
Cc: Andrey Konovalov <andreyknvl(a)gmail.com>
Cc: Andrey Ryabinin <ryabinin.a.a(a)gmail.com>
Cc: Andy Lutomirski <luto(a)kernel.org>
Cc: "Aneesh Kumar K.V" <aneesh.kumar(a)linux.ibm.com>
Cc: Anshuman Khandual <anshuman.khandual(a)arm.com>
Cc: Ard Biesheuvel <ardb(a)kernel.org>
Cc: Arnd Bergmann <arnd(a)arndb.de>
Cc: bibo mao <maobibo(a)loongson.cn>
Cc: Borislav Betkov <bp(a)alien8.de>
Cc: Christoph Lameter (Ampere) <cl(a)gentwo.org>
Cc: Dennis Zhou <dennis(a)kernel.org>
Cc: Dev Jain <dev.jain(a)arm.com>
Cc: Dmitriy Vyukov <dvyukov(a)google.com>
Cc: Gwan-gyeong Mun <gwan-gyeong.mun(a)intel.com>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Jane Chu <jane.chu(a)oracle.com>
Cc: Joao Martins <joao.m.martins(a)oracle.com>
Cc: Joerg Roedel <joro(a)8bytes.org>
Cc: John Hubbard <jhubbard(a)nvidia.com>
Cc: Kevin Brodsky <kevin.brodsky(a)arm.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Peter Xu <peterx(a)redhat.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Qi Zheng <zhengqi.arch(a)bytedance.com>
Cc: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: Tejun Heo <tj(a)kernel.org>
Cc: Thomas Gleinxer <tglx(a)linutronix.de>
Cc: Thomas Huth <thuth(a)redhat.com>
Cc: Vincenzo Frascino <vincenzo.frascino(a)arm.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/pgtable.h | 16 ++++++++++++++++
include/linux/vmalloc.h | 16 ----------------
2 files changed, 16 insertions(+), 16 deletions(-)
--- a/include/linux/pgtable.h~mm-move-page-table-sync-declarations-to-linux-pgtableh
+++ a/include/linux/pgtable.h
@@ -1467,6 +1467,22 @@ static inline void modify_prot_commit_pt
}
#endif
+/*
+ * Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values
+ * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings()
+ * needs to be called.
+ */
+#ifndef ARCH_PAGE_TABLE_SYNC_MASK
+#define ARCH_PAGE_TABLE_SYNC_MASK 0
+#endif
+
+/*
+ * There is no default implementation for arch_sync_kernel_mappings(). It is
+ * relied upon the compiler to optimize calls out if ARCH_PAGE_TABLE_SYNC_MASK
+ * is 0.
+ */
+void arch_sync_kernel_mappings(unsigned long start, unsigned long end);
+
#endif /* CONFIG_MMU */
/*
--- a/include/linux/vmalloc.h~mm-move-page-table-sync-declarations-to-linux-pgtableh
+++ a/include/linux/vmalloc.h
@@ -220,22 +220,6 @@ int vmap_pages_range(unsigned long addr,
struct page **pages, unsigned int page_shift);
/*
- * Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values
- * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings()
- * needs to be called.
- */
-#ifndef ARCH_PAGE_TABLE_SYNC_MASK
-#define ARCH_PAGE_TABLE_SYNC_MASK 0
-#endif
-
-/*
- * There is no default implementation for arch_sync_kernel_mappings(). It is
- * relied upon the compiler to optimize calls out if ARCH_PAGE_TABLE_SYNC_MASK
- * is 0.
- */
-void arch_sync_kernel_mappings(unsigned long start, unsigned long end);
-
-/*
* Lowlevel-APIs (not for driver use!)
*/
_
Patches currently in -mm which might be from harry.yoo(a)oracle.com are
mm-move-page-table-sync-declarations-to-linux-pgtableh.patch
mm-introduce-and-use-pgdp4d_populate_kernel.patch
x86-mm-64-define-arch_page_table_sync_mask-and-arch_sync_kernel_mappings.patch
During the integration of the RTL8239 POE chip + its frontend MCU, it was
noticed that multi-byte operations were basically broken in the current
driver.
Tests using SMBus Block Writes showed that the data (after the Wr maker +
Ack) was mixed up on the wire. At first glance, it looked like an
endianness problem. But for transfers where the number of count + data
bytes was not divisible by 4, the last bytes were not looking like an
endianness problem because they were in the wrong order but not for example
0 - which would be the case for an endianness problem with 32 bit
registers. At the end, it turned out to be the way how i2c_write tried to
add the bytes to the send registers.
Each 32 bit register was used similar to a shift register - shifting the
various bytes up the register while the next one is added to the least
significant byte. But the I2C controller expects the first byte of the
transmission in the least significant byte of the first register. And the
last byte (assuming it is a 16 byte transfer) is expected in the most
significant byte of the fourth register.
While doing these tests, it was also observed that the count byte was
missing from the SMBus Block Writes. The driver just removed them from the
data->block (from the I2C subsystem). But the I2C controller DOES NOT
automatically add this byte - for example by using the configured
transmission length.
The RTL8239 MCU is not actually an SMBus compliant device. Instead, it
expects I2C Block Reads + I2C Block Writes. But according to the already
identified bugs in the driver, it was clear that the I2C controller can
simply be modified to not send the count byte for I2C_SMBUS_I2C_BLOCK_DATA.
The receive part just needs to write the content of the receive buffer to
the correct position in data->block.
While the on-wire format was now correct, reads were still not possible
against the MCU (for the RTL8239 POE chip). It was always timing out
because the 2ms were not enough for sending the read request and then
receiving the 12 byte answer.
These changes were originally submitted to OpenWrt. But there are plans to
migrate OpenWrt to the upstream Linux driver. As a result, the pull request
was stopped and the changes were redone against this driver.
For reasons of transparency: The work on I2C_SMBUS_I2C_BLOCK_DATA support
for the RTL8239-MCU was done on RTL931xx. All problems were therefore
detected with the patches from Jonas Jelonek [1] and not the vanilla Linux
driver. But looking through the code, it seems like these are NOT
regressions introduced by the RTL931x patchset.
I've picked up Alex Guo's patch [2] to reduce conflicts between pending
fixes.
[1] https://patchwork.ozlabs.org/project/linux-i2c/cover/20250727114800.3046-1-…
[2] https://lore.kernel.org/r/20250615235248.529019-1-alexguo1023@gmail.com
Signed-off-by: Sven Eckelmann <sven(a)narfation.org>
---
Changes in v5:
- Simplify function/capability registration by using
I2C_FUNC_SMBUS_I2C_BLOCK, thanks Jonas Jelonek
- Link to v4: https://lore.kernel.org/r/20250809-i2c-rtl9300-multi-byte-v4-0-d71dd5eb6121…
Changes in v4:
- Provide only "write" examples for "i2c: rtl9300: Fix multi-byte I2C write"
- drop the second initialization of vals in rtl9300_i2c_write() directly in
the "Fix multi-byte I2C write" fix
- indicate in target branch for each patch in PATCH prefix
- minor commit message cleanups
- Link to v3: https://lore.kernel.org/r/20250804-i2c-rtl9300-multi-byte-v3-0-e20607e1b28c…
Changes in v3:
- integrated patch
https://lore.kernel.org/r/20250615235248.529019-1-alexguo1023@gmail.com
to avoid conflicts in the I2C_SMBUS_BLOCK_DATA code
- added Fixes and stable(a)vger.kernel.org to Alex Guo's patch
- added Chris Packham's Reviewed-by/Acked-by
- Link to v2: https://lore.kernel.org/r/20250803-i2c-rtl9300-multi-byte-v2-0-9b7b759fe2b6…
Changes in v2:
- add the missing transfer width and read length increase for the SMBus
Write/Read
- Link to v1: https://lore.kernel.org/r/20250802-i2c-rtl9300-multi-byte-v1-0-5f687e0098e2…
---
Alex Guo (1):
i2c: rtl9300: Fix out-of-bounds bug in rtl9300_i2c_smbus_xfer
Harshal Gohel (2):
[i2c-host-fixes] i2c: rtl9300: Fix multi-byte I2C write
[i2c-host] i2c: rtl9300: Implement I2C block read and write
Sven Eckelmann (2):
[i2c-host-fixes] i2c: rtl9300: Increase timeout for transfer polling
[i2c-host-fixes] i2c: rtl9300: Add missing count byte for SMBus Block Ops
drivers/i2c/busses/i2c-rtl9300.c | 51 +++++++++++++++++++++++++++++++++-------
1 file changed, 42 insertions(+), 9 deletions(-)
---
base-commit: 7e161a991ea71e6ec526abc8f40c6852ebe3d946
change-id: 20250802-i2c-rtl9300-multi-byte-edaa1fb0872c
Best regards,
--
Sven Eckelmann <sven(a)narfation.org>
The patch titled
Subject: mm/damon/core: set quota->charged_from to jiffies at first charge window
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-damon-core-set-quota-charged_from-to-jiffies-at-first-charge-window.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Sang-Heon Jeon <ekffu200098(a)gmail.com>
Subject: mm/damon/core: set quota->charged_from to jiffies at first charge window
Date: Wed, 20 Aug 2025 00:01:23 +0900
Kernel initializes "jiffies" timer as 5 minutes below zero, as shown in
include/linux/jiffies.h
/*
* Have the 32 bit jiffies value wrap 5 minutes after boot
* so jiffies wrap bugs show up earlier.
*/
#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ))
And they cast unsigned value to signed to cover wraparound
#define time_after_eq(a,b) \
(typecheck(unsigned long, a) && \
typecheck(unsigned long, b) && \
((long)((a) - (b)) >= 0))
In 64bit systems, these might not be a problem because wrapround occurs
300 million years after the boot, assuming HZ value is 1000.
With same assuming, In 32bit system, wraparound occurs 5 minutues after
the initial boot and every 49 days after the first wraparound. And about
25 days after first wraparound, it continues quota charging window up to
next 25 days.
Example 1: initial boot
jiffies=0xFFFB6C20, charged_from+interval=0x000003E8
time_after_eq(jiffies, charged_from+interval)=(long)0xFFFB6838; In
signed values, it is considered negative so it is false.
Example 2: after about 25 days first wraparound
jiffies=0x800004E8, charged_from+interval=0x000003E8
time_after_eq(jiffies, charged_from+interval)=(long)0x80000100; In
signed values, it is considered negative so it is false
So, change quota->charged_from to jiffies at damos_adjust_quota() when
it is consider first charge window.
In theory; but almost impossible; quota->total_charged_sz and
qutoa->charged_from should be both zero even if it is not in first
charge window. But It will only delay one reset_interval, So it is not
big problem.
Link: https://lkml.kernel.org/r/20250819150123.1532458-1-ekffu200098@gmail.com
Fixes: 2b8a248d5873 ("mm/damon/schemes: implement size quota for schemes application speed control") [5.16]
Signed-off-by: Sang-Heon Jeon <ekffu200098(a)gmail.com>
Reviewed-by: SeongJae Park <sj(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/damon/core.c | 4 ++++
1 file changed, 4 insertions(+)
--- a/mm/damon/core.c~mm-damon-core-set-quota-charged_from-to-jiffies-at-first-charge-window
+++ a/mm/damon/core.c
@@ -2111,6 +2111,10 @@ static void damos_adjust_quota(struct da
if (!quota->ms && !quota->sz && list_empty("a->goals))
return;
+ /* First charge window */
+ if (!quota->total_charged_sz && !quota->charged_from)
+ quota->charged_from = jiffies;
+
/* New charge window starts */
if (time_after_eq(jiffies, quota->charged_from +
msecs_to_jiffies(quota->reset_interval))) {
_
Patches currently in -mm which might be from ekffu200098(a)gmail.com are
mm-damon-core-fix-commit_ops_filters-by-using-correct-nth-function.patch
selftests-damon-fix-selftests-by-installing-drgn-related-script.patch
mm-damon-core-fix-damos_commit_filter-not-changing-allow.patch
mm-damon-core-set-quota-charged_from-to-jiffies-at-first-charge-window.patch
mm-damon-update-expired-description-of-damos_action.patch
docs-mm-damon-design-fix-typo-s-sz_trtied-sz_tried.patch
selftests-damon-test-no-op-commit-broke-damon-status.patch
selftests-damon-test-no-op-commit-broke-damon-status-fix.patch
mm-damon-tests-core-kunit-add-damos_commit_filter-test.patch
From: Fengnan Chang <changfengnan(a)bytedance.com>
[ Upstream commit 9d83e1f05c98bab5de350bef89177e2be8b34db0 ]
After commit 0b2b066f8a85 ("io_uring/io-wq: only create a new worker
if it can make progress"), in our produce environment, we still
observe that part of io_worker threads keeps creating and destroying.
After analysis, it was confirmed that this was due to a more complex
scenario involving a large number of fsync operations, which can be
abstracted as frequent write + fsync operations on multiple files in
a single uring instance. Since write is a hash operation while fsync
is not, and fsync is likely to be suspended during execution, the
action of checking the hash value in
io_wqe_dec_running cannot handle such scenarios.
Similarly, if hash-based work and non-hash-based work are sent at the
same time, similar issues are likely to occur.
Returning to the starting point of the issue, when a new work
arrives, io_wq_enqueue may wake up free worker A, while
io_wq_dec_running may create worker B. Ultimately, only one of A and
B can obtain and process the task, leaving the other in an idle
state. In the end, the issue is caused by inconsistent logic in the
checks performed by io_wq_enqueue and io_wq_dec_running.
Therefore, the problem can be resolved by checking for available
workers in io_wq_dec_running.
Signed-off-by: Fengnan Chang <changfengnan(a)bytedance.com>
Reviewed-by: Diangang Li <lidiangang(a)bytedance.com>
Link: https://lore.kernel.org/r/20250813120214.18729-1-changfengnan@bytedance.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
LLM Generated explanations, may be completely bogus:
Now let me analyze the nature of the fix to determine if it's
appropriate for stable:
**Backport Status: YES**
This commit should be backported to stable kernel trees for the
following reasons:
## Bug Fix Nature
The commit fixes a **real performance bug** that occurs in production
environments. The issue causes unnecessary creation and destruction of
io_worker threads, leading to:
1. CPU overhead from constant thread creation/destruction
2. Resource wastage from redundant workers
3. Performance degradation in production workloads with mixed hash/non-
hash operations
## Root Cause Analysis
The patch addresses a **race condition** between `io_wq_enqueue()` and
`io_wq_dec_running()`:
- When new work arrives, `io_wq_enqueue()` may wake up a free worker A
- Simultaneously, `io_wq_dec_running()` may create a new worker B
- Only one worker can actually process the task, leaving the other idle
- This creates a worker "churn" pattern that wastes resources
## Small and Contained Fix
The fix is minimal and surgical:
1. Adds a check in `create_worker_cb()` to verify if a free worker is
available before creating a new one
2. Uses the existing `io_acct_activate_free_worker()` function (lines
361-365)
3. Only adds 8 lines of code with proper RCU locking
4. Introduces a single goto label for clean error handling
## Production Impact
The commit message explicitly states this was observed in **production
environments** with real workloads involving:
- Frequent write + fsync operations
- Mixed hash and non-hash operations
- Multiple files in a single io_uring instance
## Regression Potential
The fix has **low regression risk** because:
1. It reuses existing, well-tested infrastructure
(`io_acct_activate_free_worker()`)
2. The logic is consistent with what `io_wq_enqueue()` already does
3. It adds a defensive check rather than changing core logic
4. The RCU locking is properly scoped and follows existing patterns
## Stable Kernel Rules Compliance
The patch follows stable kernel criteria:
- ✓ Fixes a real bug affecting users (worker thread churn in production)
- ✓ Small change (8 lines added)
- ✓ Obviously correct (aligns logic between enqueue and dec_running
paths)
- ✓ Already tested in production (reported by ByteDance engineers)
- ✓ Not a new feature or optimization
## Affected Versions
This should be backported to kernels containing commit 0b2b066f8a85
("io_uring/io-wq: only create a new worker if it can make progress")
which introduced incomplete handling of the worker creation logic. The
issue affects mixed workload scenarios that are common in production
environments.
The fact that this was discovered and reported by engineers from a major
cloud provider (ByteDance) running production workloads further
validates its importance for stable backporting.
io_uring/io-wq.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c
index be91edf34f01..17dfaa0395c4 100644
--- a/io_uring/io-wq.c
+++ b/io_uring/io-wq.c
@@ -357,6 +357,13 @@ static void create_worker_cb(struct callback_head *cb)
worker = container_of(cb, struct io_worker, create_work);
wq = worker->wq;
acct = worker->acct;
+
+ rcu_read_lock();
+ do_create = !io_acct_activate_free_worker(acct);
+ rcu_read_unlock();
+ if (!do_create)
+ goto no_need_create;
+
raw_spin_lock(&acct->workers_lock);
if (acct->nr_workers < acct->max_workers) {
@@ -367,6 +374,7 @@ static void create_worker_cb(struct callback_head *cb)
if (do_create) {
create_io_worker(wq, acct);
} else {
+no_need_create:
atomic_dec(&acct->nr_running);
io_worker_ref_put(wq);
}
--
2.50.1
amdgpu_dm_connector_ddc_get_modes() reinitializes a connector's probed
modes list without cleaning it up. First time it is called during the
driver's initialization phase, then via drm_mode_getconnector() ioctl.
The leaks observed with Kmemleak are as following:
unreferenced object 0xffff88812f91b200 (size 128):
comm "(udev-worker)", pid 388, jiffies 4294695475
hex dump (first 32 bytes):
ac dd 07 00 80 02 70 0b 90 0b e0 0b 00 00 e0 01 ......p.........
0b 07 10 07 5c 07 00 00 0a 00 00 00 00 00 00 00 ....\...........
backtrace (crc 89db554f):
__kmalloc_cache_noprof+0x3a3/0x490
drm_mode_duplicate+0x8e/0x2b0
amdgpu_dm_create_common_mode+0x40/0x150 [amdgpu]
amdgpu_dm_connector_add_common_modes+0x336/0x488 [amdgpu]
amdgpu_dm_connector_get_modes+0x428/0x8a0 [amdgpu]
amdgpu_dm_initialize_drm_device+0x1389/0x17b4 [amdgpu]
amdgpu_dm_init.cold+0x157b/0x1a1e [amdgpu]
dm_hw_init+0x3f/0x110 [amdgpu]
amdgpu_device_ip_init+0xcf4/0x1180 [amdgpu]
amdgpu_device_init.cold+0xb84/0x1863 [amdgpu]
amdgpu_driver_load_kms+0x15/0x90 [amdgpu]
amdgpu_pci_probe+0x391/0xce0 [amdgpu]
local_pci_probe+0xd9/0x190
pci_call_probe+0x183/0x540
pci_device_probe+0x171/0x2c0
really_probe+0x1e1/0x890
Found by Linux Verification Center (linuxtesting.org).
Fixes: acc96ae0d127 ("drm/amd/display: set panel orientation before drm_dev_register")
Cc: stable(a)vger.kernel.org
Signed-off-by: Fedor Pchelkin <pchelkin(a)ispras.ru>
---
I can't reproduce the issue before the commit in Fixes which placed that
extra amdgpu_dm_connector_get_modes() call. Though the reinitializing part
/* empty probed_modes */
INIT_LIST_HEAD(&connector->probed_modes);
was added years before and it looks OK since drm_connector_list_update()
should shake the list in between drm_mode_getconnector() ioctl calls.
For what the patch does there exists a drm_mode_remove() helper but it's
static at drivers/gpu/drm/drm_connector.c and requires to be exported
first. This probably looks like a subject for an independent for-next
patch, if needed.
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index cd0e2976e268..4b84f944f066 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -8227,9 +8227,14 @@ static void amdgpu_dm_connector_ddc_get_modes(struct drm_connector *connector,
{
struct amdgpu_dm_connector *amdgpu_dm_connector =
to_amdgpu_dm_connector(connector);
+ struct drm_display_mode *mode, *t;
if (drm_edid) {
/* empty probed_modes */
+ list_for_each_entry_safe(mode, t, &connector->probed_modes, head) {
+ list_del(&mode->head);
+ drm_mode_destroy(connector->dev, mode);
+ }
INIT_LIST_HEAD(&connector->probed_modes);
amdgpu_dm_connector->num_modes =
drm_edid_connector_add_modes(connector);
--
2.50.1
Fix smb3_init_transform_rq() to initialise buffer to NULL before calling
netfs_alloc_folioq_buffer() as netfs assumes it can append to the buffer it
is given. Setting it to NULL means it should start a fresh buffer, but the
value is currently undefined.
Fixes: a2906d3316fc ("cifs: Switch crypto buffer to use a folio_queue rather than an xarray")
Signed-off-by: David Howells <dhowells(a)redhat.com>
cc: Steve French <sfrench(a)samba.org>
cc: Paulo Alcantara <pc(a)manguebit.org>
cc: linux-cifs(a)vger.kernel.org
cc: linux-fsdevel(a)vger.kernel.org
---
fs/smb/client/smb2ops.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index ad8947434b71..cd0c9b5a35c3 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -4487,7 +4487,7 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, int num_rqst,
for (int i = 1; i < num_rqst; i++) {
struct smb_rqst *old = &old_rq[i - 1];
struct smb_rqst *new = &new_rq[i];
- struct folio_queue *buffer;
+ struct folio_queue *buffer = NULL;
size_t size = iov_iter_count(&old->rq_iter);
orig_len += smb_rqst_len(server, old);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 47b0f6d8f0d2be4d311a49e13d2fd5f152f492b2
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081848-proximity-feline-dfea@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 47b0f6d8f0d2be4d311a49e13d2fd5f152f492b2 Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao(a)debian.org>
Date: Thu, 31 Jul 2025 02:57:18 -0700
Subject: [PATCH] mm/kmemleak: avoid deadlock by moving pr_warn() outside
kmemleak_lock
When netpoll is enabled, calling pr_warn_once() while holding
kmemleak_lock in mem_pool_alloc() can cause a deadlock due to lock
inversion with the netconsole subsystem. This occurs because
pr_warn_once() may trigger netpoll, which eventually leads to
__alloc_skb() and back into kmemleak code, attempting to reacquire
kmemleak_lock.
This is the path for the deadlock.
mem_pool_alloc()
-> raw_spin_lock_irqsave(&kmemleak_lock, flags);
-> pr_warn_once()
-> netconsole subsystem
-> netpoll
-> __alloc_skb
-> __create_object
-> raw_spin_lock_irqsave(&kmemleak_lock, flags);
Fix this by setting a flag and issuing the pr_warn_once() after
kmemleak_lock is released.
Link: https://lkml.kernel.org/r/20250731-kmemleak_lock-v1-1-728fd470198f@debian.o…
Fixes: c5665868183f ("mm: kmemleak: use the memory pool for early allocations")
Signed-off-by: Breno Leitao <leitao(a)debian.org>
Reported-by: Jakub Kicinski <kuba(a)kernel.org>
Acked-by: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 8d588e685311..e0333455c738 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -470,6 +470,7 @@ static struct kmemleak_object *mem_pool_alloc(gfp_t gfp)
{
unsigned long flags;
struct kmemleak_object *object;
+ bool warn = false;
/* try the slab allocator first */
if (object_cache) {
@@ -488,8 +489,10 @@ static struct kmemleak_object *mem_pool_alloc(gfp_t gfp)
else if (mem_pool_free_count)
object = &mem_pool[--mem_pool_free_count];
else
- pr_warn_once("Memory pool empty, consider increasing CONFIG_DEBUG_KMEMLEAK_MEM_POOL_SIZE\n");
+ warn = true;
raw_spin_unlock_irqrestore(&kmemleak_lock, flags);
+ if (warn)
+ pr_warn_once("Memory pool empty, consider increasing CONFIG_DEBUG_KMEMLEAK_MEM_POOL_SIZE\n");
return object;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 59305202c67fea50378dcad0cc199dbc13a0e99a
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081801-undertake-nuzzle-c4b1@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 59305202c67fea50378dcad0cc199dbc13a0e99a Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual(a)arm.com>
Date: Fri, 20 Jun 2025 10:54:27 +0530
Subject: [PATCH] mm/ptdump: take the memory hotplug lock inside
ptdump_walk_pgd()
Memory hot remove unmaps and tears down various kernel page table regions
as required. The ptdump code can race with concurrent modifications of
the kernel page tables. When leaf entries are modified concurrently, the
dump code may log stale or inconsistent information for a VA range, but
this is otherwise not harmful.
But when intermediate levels of kernel page table are freed, the dump code
will continue to use memory that has been freed and potentially
reallocated for another purpose. In such cases, the ptdump code may
dereference bogus addresses, leading to a number of potential problems.
To avoid the above mentioned race condition, platforms such as arm64,
riscv and s390 take memory hotplug lock, while dumping kernel page table
via the sysfs interface /sys/kernel/debug/kernel_page_tables.
Similar race condition exists while checking for pages that might have
been marked W+X via /sys/kernel/debug/kernel_page_tables/check_wx_pages
which in turn calls ptdump_check_wx(). Instead of solving this race
condition again, let's just move the memory hotplug lock inside generic
ptdump_check_wx() which will benefit both the scenarios.
Drop get_online_mems() and put_online_mems() combination from all existing
platform ptdump code paths.
Link: https://lkml.kernel.org/r/20250620052427.2092093-1-anshuman.khandual@arm.com
Fixes: bbd6ec605c0f ("arm64/mm: Enable memory hot remove")
Signed-off-by: Anshuman Khandual <anshuman.khandual(a)arm.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Reviewed-by: Dev Jain <dev.jain(a)arm.com>
Acked-by: Alexander Gordeev <agordeev(a)linux.ibm.com> [s390]
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Will Deacon <will(a)kernel.org>
Cc: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: Paul Walmsley <paul.walmsley(a)sifive.com>
Cc: Palmer Dabbelt <palmer(a)dabbelt.com>
Cc: Alexander Gordeev <agordeev(a)linux.ibm.com>
Cc: Gerald Schaefer <gerald.schaefer(a)linux.ibm.com>
Cc: Heiko Carstens <hca(a)linux.ibm.com>
Cc: Vasily Gorbik <gor(a)linux.ibm.com>
Cc: Christian Borntraeger <borntraeger(a)linux.ibm.com>
Cc: Sven Schnelle <svens(a)linux.ibm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/arch/arm64/mm/ptdump_debugfs.c b/arch/arm64/mm/ptdump_debugfs.c
index 68bf1a125502..1e308328c079 100644
--- a/arch/arm64/mm/ptdump_debugfs.c
+++ b/arch/arm64/mm/ptdump_debugfs.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/debugfs.h>
-#include <linux/memory_hotplug.h>
#include <linux/seq_file.h>
#include <asm/ptdump.h>
@@ -9,9 +8,7 @@ static int ptdump_show(struct seq_file *m, void *v)
{
struct ptdump_info *info = m->private;
- get_online_mems();
ptdump_walk(m, info);
- put_online_mems();
return 0;
}
DEFINE_SHOW_ATTRIBUTE(ptdump);
diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c
index 32922550a50a..3b51690cc876 100644
--- a/arch/riscv/mm/ptdump.c
+++ b/arch/riscv/mm/ptdump.c
@@ -6,7 +6,6 @@
#include <linux/efi.h>
#include <linux/init.h>
#include <linux/debugfs.h>
-#include <linux/memory_hotplug.h>
#include <linux/seq_file.h>
#include <linux/ptdump.h>
@@ -413,9 +412,7 @@ bool ptdump_check_wx(void)
static int ptdump_show(struct seq_file *m, void *v)
{
- get_online_mems();
ptdump_walk(m, m->private);
- put_online_mems();
return 0;
}
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index ac604b176660..9af2aae0a515 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -247,11 +247,9 @@ static int ptdump_show(struct seq_file *m, void *v)
.marker = markers,
};
- get_online_mems();
mutex_lock(&cpa_mutex);
ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
mutex_unlock(&cpa_mutex);
- put_online_mems();
return 0;
}
DEFINE_SHOW_ATTRIBUTE(ptdump);
diff --git a/mm/ptdump.c b/mm/ptdump.c
index 61a352aa12ed..b600c7f864b8 100644
--- a/mm/ptdump.c
+++ b/mm/ptdump.c
@@ -176,6 +176,7 @@ void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm, pgd_t *pgd)
{
const struct ptdump_range *range = st->range;
+ get_online_mems();
mmap_write_lock(mm);
while (range->start != range->end) {
walk_page_range_debug(mm, range->start, range->end,
@@ -183,6 +184,7 @@ void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm, pgd_t *pgd)
range++;
}
mmap_write_unlock(mm);
+ put_online_mems();
/* Flush out the last page */
st->note_page_flush(st);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 59305202c67fea50378dcad0cc199dbc13a0e99a
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081800-autopilot-booted-fb7f@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 59305202c67fea50378dcad0cc199dbc13a0e99a Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual(a)arm.com>
Date: Fri, 20 Jun 2025 10:54:27 +0530
Subject: [PATCH] mm/ptdump: take the memory hotplug lock inside
ptdump_walk_pgd()
Memory hot remove unmaps and tears down various kernel page table regions
as required. The ptdump code can race with concurrent modifications of
the kernel page tables. When leaf entries are modified concurrently, the
dump code may log stale or inconsistent information for a VA range, but
this is otherwise not harmful.
But when intermediate levels of kernel page table are freed, the dump code
will continue to use memory that has been freed and potentially
reallocated for another purpose. In such cases, the ptdump code may
dereference bogus addresses, leading to a number of potential problems.
To avoid the above mentioned race condition, platforms such as arm64,
riscv and s390 take memory hotplug lock, while dumping kernel page table
via the sysfs interface /sys/kernel/debug/kernel_page_tables.
Similar race condition exists while checking for pages that might have
been marked W+X via /sys/kernel/debug/kernel_page_tables/check_wx_pages
which in turn calls ptdump_check_wx(). Instead of solving this race
condition again, let's just move the memory hotplug lock inside generic
ptdump_check_wx() which will benefit both the scenarios.
Drop get_online_mems() and put_online_mems() combination from all existing
platform ptdump code paths.
Link: https://lkml.kernel.org/r/20250620052427.2092093-1-anshuman.khandual@arm.com
Fixes: bbd6ec605c0f ("arm64/mm: Enable memory hot remove")
Signed-off-by: Anshuman Khandual <anshuman.khandual(a)arm.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Reviewed-by: Dev Jain <dev.jain(a)arm.com>
Acked-by: Alexander Gordeev <agordeev(a)linux.ibm.com> [s390]
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Will Deacon <will(a)kernel.org>
Cc: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: Paul Walmsley <paul.walmsley(a)sifive.com>
Cc: Palmer Dabbelt <palmer(a)dabbelt.com>
Cc: Alexander Gordeev <agordeev(a)linux.ibm.com>
Cc: Gerald Schaefer <gerald.schaefer(a)linux.ibm.com>
Cc: Heiko Carstens <hca(a)linux.ibm.com>
Cc: Vasily Gorbik <gor(a)linux.ibm.com>
Cc: Christian Borntraeger <borntraeger(a)linux.ibm.com>
Cc: Sven Schnelle <svens(a)linux.ibm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/arch/arm64/mm/ptdump_debugfs.c b/arch/arm64/mm/ptdump_debugfs.c
index 68bf1a125502..1e308328c079 100644
--- a/arch/arm64/mm/ptdump_debugfs.c
+++ b/arch/arm64/mm/ptdump_debugfs.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/debugfs.h>
-#include <linux/memory_hotplug.h>
#include <linux/seq_file.h>
#include <asm/ptdump.h>
@@ -9,9 +8,7 @@ static int ptdump_show(struct seq_file *m, void *v)
{
struct ptdump_info *info = m->private;
- get_online_mems();
ptdump_walk(m, info);
- put_online_mems();
return 0;
}
DEFINE_SHOW_ATTRIBUTE(ptdump);
diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c
index 32922550a50a..3b51690cc876 100644
--- a/arch/riscv/mm/ptdump.c
+++ b/arch/riscv/mm/ptdump.c
@@ -6,7 +6,6 @@
#include <linux/efi.h>
#include <linux/init.h>
#include <linux/debugfs.h>
-#include <linux/memory_hotplug.h>
#include <linux/seq_file.h>
#include <linux/ptdump.h>
@@ -413,9 +412,7 @@ bool ptdump_check_wx(void)
static int ptdump_show(struct seq_file *m, void *v)
{
- get_online_mems();
ptdump_walk(m, m->private);
- put_online_mems();
return 0;
}
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index ac604b176660..9af2aae0a515 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -247,11 +247,9 @@ static int ptdump_show(struct seq_file *m, void *v)
.marker = markers,
};
- get_online_mems();
mutex_lock(&cpa_mutex);
ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
mutex_unlock(&cpa_mutex);
- put_online_mems();
return 0;
}
DEFINE_SHOW_ATTRIBUTE(ptdump);
diff --git a/mm/ptdump.c b/mm/ptdump.c
index 61a352aa12ed..b600c7f864b8 100644
--- a/mm/ptdump.c
+++ b/mm/ptdump.c
@@ -176,6 +176,7 @@ void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm, pgd_t *pgd)
{
const struct ptdump_range *range = st->range;
+ get_online_mems();
mmap_write_lock(mm);
while (range->start != range->end) {
walk_page_range_debug(mm, range->start, range->end,
@@ -183,6 +184,7 @@ void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm, pgd_t *pgd)
range++;
}
mmap_write_unlock(mm);
+ put_online_mems();
/* Flush out the last page */
st->note_page_flush(st);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 1ef94169db0958d6de39f9ea6e063ce887342e2d
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081801-shortlist-acutely-2100@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1ef94169db0958d6de39f9ea6e063ce887342e2d Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu(a)suse.com>
Date: Wed, 2 Jul 2025 15:08:13 +0930
Subject: [PATCH] btrfs: populate otime when logging an inode item
[TEST FAILURE WITH EXPERIMENTAL FEATURES]
When running test case generic/508, the test case will fail with the new
btrfs shutdown support:
generic/508 - output mismatch (see /home/adam/xfstests/results//generic/508.out.bad)
--- tests/generic/508.out 2022-05-11 11:25:30.806666664 +0930
+++ /home/adam/xfstests/results//generic/508.out.bad 2025-07-02 14:53:22.401824212 +0930
@@ -1,2 +1,6 @@
QA output created by 508
Silence is golden
+Before:
+After : stat.btime = Thu Jan 1 09:30:00 1970
+Before:
+After : stat.btime = Wed Jul 2 14:53:22 2025
...
(Run 'diff -u /home/adam/xfstests/tests/generic/508.out /home/adam/xfstests/results//generic/508.out.bad' to see the entire diff)
Ran: generic/508
Failures: generic/508
Failed 1 of 1 tests
Please note that the test case requires shutdown support, thus the test
case will be skipped using the current upstream kernel, as it doesn't
have shutdown ioctl support.
[CAUSE]
The direct cause the 0 time stamp in the log tree:
leaf 30507008 items 2 free space 16057 generation 9 owner TREE_LOG
leaf 30507008 flags 0x1(WRITTEN) backref revision 1
checksum stored e522548d
checksum calced e522548d
fs uuid 57d45451-481e-43e4-aa93-289ad707a3a0
chunk uuid d52bd3fd-5163-4337-98a7-7986993ad398
item 0 key (257 INODE_ITEM 0) itemoff 16123 itemsize 160
generation 9 transid 9 size 0 nbytes 0
block group 0 mode 100644 links 1 uid 0 gid 0 rdev 0
sequence 1 flags 0x0(none)
atime 1751432947.492000000 (2025-07-02 14:39:07)
ctime 1751432947.492000000 (2025-07-02 14:39:07)
mtime 1751432947.492000000 (2025-07-02 14:39:07)
otime 0.0 (1970-01-01 09:30:00) <<<
But the old fs tree has all the correct time stamp:
btrfs-progs v6.12
fs tree key (FS_TREE ROOT_ITEM 0)
leaf 30425088 items 2 free space 16061 generation 5 owner FS_TREE
leaf 30425088 flags 0x1(WRITTEN) backref revision 1
checksum stored 48f6c57e
checksum calced 48f6c57e
fs uuid 57d45451-481e-43e4-aa93-289ad707a3a0
chunk uuid d52bd3fd-5163-4337-98a7-7986993ad398
item 0 key (256 INODE_ITEM 0) itemoff 16123 itemsize 160
generation 3 transid 0 size 0 nbytes 16384
block group 0 mode 40755 links 1 uid 0 gid 0 rdev 0
sequence 0 flags 0x0(none)
atime 1751432947.0 (2025-07-02 14:39:07)
ctime 1751432947.0 (2025-07-02 14:39:07)
mtime 1751432947.0 (2025-07-02 14:39:07)
otime 1751432947.0 (2025-07-02 14:39:07) <<<
The root cause is that fill_inode_item() in tree-log.c is only
populating a/c/m time, not the otime (or btime in statx output).
Part of the reason is that, the vfs inode only has a/c/m time, no native
btime support yet.
[FIX]
Thankfully btrfs has its otime stored in btrfs_inode::i_otime_sec and
btrfs_inode::i_otime_nsec.
So what we really need is just fill the otime time stamp in
fill_inode_item() of tree-log.c
There is another fill_inode_item() in inode.c, which is doing the proper
otime population.
Fixes: 94edf4ae43a5 ("Btrfs: don't bother committing delayed inode updates when fsyncing")
CC: stable(a)vger.kernel.org
Reviewed-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 1e805dabfc4b..ab0815d9e7e5 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4233,6 +4233,9 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
btrfs_set_timespec_sec(leaf, &item->ctime, inode_get_ctime_sec(inode));
btrfs_set_timespec_nsec(leaf, &item->ctime, inode_get_ctime_nsec(inode));
+ btrfs_set_timespec_sec(leaf, &item->otime, BTRFS_I(inode)->i_otime_sec);
+ btrfs_set_timespec_nsec(leaf, &item->otime, BTRFS_I(inode)->i_otime_nsec);
+
/*
* We do not need to set the nbytes field, in fact during a fast fsync
* its value may not even be correct, since a fast fsync does not wait
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 59305202c67fea50378dcad0cc199dbc13a0e99a
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081859-excess-willfully-0f01@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 59305202c67fea50378dcad0cc199dbc13a0e99a Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual(a)arm.com>
Date: Fri, 20 Jun 2025 10:54:27 +0530
Subject: [PATCH] mm/ptdump: take the memory hotplug lock inside
ptdump_walk_pgd()
Memory hot remove unmaps and tears down various kernel page table regions
as required. The ptdump code can race with concurrent modifications of
the kernel page tables. When leaf entries are modified concurrently, the
dump code may log stale or inconsistent information for a VA range, but
this is otherwise not harmful.
But when intermediate levels of kernel page table are freed, the dump code
will continue to use memory that has been freed and potentially
reallocated for another purpose. In such cases, the ptdump code may
dereference bogus addresses, leading to a number of potential problems.
To avoid the above mentioned race condition, platforms such as arm64,
riscv and s390 take memory hotplug lock, while dumping kernel page table
via the sysfs interface /sys/kernel/debug/kernel_page_tables.
Similar race condition exists while checking for pages that might have
been marked W+X via /sys/kernel/debug/kernel_page_tables/check_wx_pages
which in turn calls ptdump_check_wx(). Instead of solving this race
condition again, let's just move the memory hotplug lock inside generic
ptdump_check_wx() which will benefit both the scenarios.
Drop get_online_mems() and put_online_mems() combination from all existing
platform ptdump code paths.
Link: https://lkml.kernel.org/r/20250620052427.2092093-1-anshuman.khandual@arm.com
Fixes: bbd6ec605c0f ("arm64/mm: Enable memory hot remove")
Signed-off-by: Anshuman Khandual <anshuman.khandual(a)arm.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Reviewed-by: Dev Jain <dev.jain(a)arm.com>
Acked-by: Alexander Gordeev <agordeev(a)linux.ibm.com> [s390]
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Will Deacon <will(a)kernel.org>
Cc: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: Paul Walmsley <paul.walmsley(a)sifive.com>
Cc: Palmer Dabbelt <palmer(a)dabbelt.com>
Cc: Alexander Gordeev <agordeev(a)linux.ibm.com>
Cc: Gerald Schaefer <gerald.schaefer(a)linux.ibm.com>
Cc: Heiko Carstens <hca(a)linux.ibm.com>
Cc: Vasily Gorbik <gor(a)linux.ibm.com>
Cc: Christian Borntraeger <borntraeger(a)linux.ibm.com>
Cc: Sven Schnelle <svens(a)linux.ibm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/arch/arm64/mm/ptdump_debugfs.c b/arch/arm64/mm/ptdump_debugfs.c
index 68bf1a125502..1e308328c079 100644
--- a/arch/arm64/mm/ptdump_debugfs.c
+++ b/arch/arm64/mm/ptdump_debugfs.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/debugfs.h>
-#include <linux/memory_hotplug.h>
#include <linux/seq_file.h>
#include <asm/ptdump.h>
@@ -9,9 +8,7 @@ static int ptdump_show(struct seq_file *m, void *v)
{
struct ptdump_info *info = m->private;
- get_online_mems();
ptdump_walk(m, info);
- put_online_mems();
return 0;
}
DEFINE_SHOW_ATTRIBUTE(ptdump);
diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c
index 32922550a50a..3b51690cc876 100644
--- a/arch/riscv/mm/ptdump.c
+++ b/arch/riscv/mm/ptdump.c
@@ -6,7 +6,6 @@
#include <linux/efi.h>
#include <linux/init.h>
#include <linux/debugfs.h>
-#include <linux/memory_hotplug.h>
#include <linux/seq_file.h>
#include <linux/ptdump.h>
@@ -413,9 +412,7 @@ bool ptdump_check_wx(void)
static int ptdump_show(struct seq_file *m, void *v)
{
- get_online_mems();
ptdump_walk(m, m->private);
- put_online_mems();
return 0;
}
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index ac604b176660..9af2aae0a515 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -247,11 +247,9 @@ static int ptdump_show(struct seq_file *m, void *v)
.marker = markers,
};
- get_online_mems();
mutex_lock(&cpa_mutex);
ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
mutex_unlock(&cpa_mutex);
- put_online_mems();
return 0;
}
DEFINE_SHOW_ATTRIBUTE(ptdump);
diff --git a/mm/ptdump.c b/mm/ptdump.c
index 61a352aa12ed..b600c7f864b8 100644
--- a/mm/ptdump.c
+++ b/mm/ptdump.c
@@ -176,6 +176,7 @@ void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm, pgd_t *pgd)
{
const struct ptdump_range *range = st->range;
+ get_online_mems();
mmap_write_lock(mm);
while (range->start != range->end) {
walk_page_range_debug(mm, range->start, range->end,
@@ -183,6 +184,7 @@ void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm, pgd_t *pgd)
range++;
}
mmap_write_unlock(mm);
+ put_online_mems();
/* Flush out the last page */
st->note_page_flush(st);
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 59305202c67fea50378dcad0cc199dbc13a0e99a
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081858-sabbath-blunt-7735@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 59305202c67fea50378dcad0cc199dbc13a0e99a Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual(a)arm.com>
Date: Fri, 20 Jun 2025 10:54:27 +0530
Subject: [PATCH] mm/ptdump: take the memory hotplug lock inside
ptdump_walk_pgd()
Memory hot remove unmaps and tears down various kernel page table regions
as required. The ptdump code can race with concurrent modifications of
the kernel page tables. When leaf entries are modified concurrently, the
dump code may log stale or inconsistent information for a VA range, but
this is otherwise not harmful.
But when intermediate levels of kernel page table are freed, the dump code
will continue to use memory that has been freed and potentially
reallocated for another purpose. In such cases, the ptdump code may
dereference bogus addresses, leading to a number of potential problems.
To avoid the above mentioned race condition, platforms such as arm64,
riscv and s390 take memory hotplug lock, while dumping kernel page table
via the sysfs interface /sys/kernel/debug/kernel_page_tables.
Similar race condition exists while checking for pages that might have
been marked W+X via /sys/kernel/debug/kernel_page_tables/check_wx_pages
which in turn calls ptdump_check_wx(). Instead of solving this race
condition again, let's just move the memory hotplug lock inside generic
ptdump_check_wx() which will benefit both the scenarios.
Drop get_online_mems() and put_online_mems() combination from all existing
platform ptdump code paths.
Link: https://lkml.kernel.org/r/20250620052427.2092093-1-anshuman.khandual@arm.com
Fixes: bbd6ec605c0f ("arm64/mm: Enable memory hot remove")
Signed-off-by: Anshuman Khandual <anshuman.khandual(a)arm.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Reviewed-by: Dev Jain <dev.jain(a)arm.com>
Acked-by: Alexander Gordeev <agordeev(a)linux.ibm.com> [s390]
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Will Deacon <will(a)kernel.org>
Cc: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: Paul Walmsley <paul.walmsley(a)sifive.com>
Cc: Palmer Dabbelt <palmer(a)dabbelt.com>
Cc: Alexander Gordeev <agordeev(a)linux.ibm.com>
Cc: Gerald Schaefer <gerald.schaefer(a)linux.ibm.com>
Cc: Heiko Carstens <hca(a)linux.ibm.com>
Cc: Vasily Gorbik <gor(a)linux.ibm.com>
Cc: Christian Borntraeger <borntraeger(a)linux.ibm.com>
Cc: Sven Schnelle <svens(a)linux.ibm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/arch/arm64/mm/ptdump_debugfs.c b/arch/arm64/mm/ptdump_debugfs.c
index 68bf1a125502..1e308328c079 100644
--- a/arch/arm64/mm/ptdump_debugfs.c
+++ b/arch/arm64/mm/ptdump_debugfs.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/debugfs.h>
-#include <linux/memory_hotplug.h>
#include <linux/seq_file.h>
#include <asm/ptdump.h>
@@ -9,9 +8,7 @@ static int ptdump_show(struct seq_file *m, void *v)
{
struct ptdump_info *info = m->private;
- get_online_mems();
ptdump_walk(m, info);
- put_online_mems();
return 0;
}
DEFINE_SHOW_ATTRIBUTE(ptdump);
diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c
index 32922550a50a..3b51690cc876 100644
--- a/arch/riscv/mm/ptdump.c
+++ b/arch/riscv/mm/ptdump.c
@@ -6,7 +6,6 @@
#include <linux/efi.h>
#include <linux/init.h>
#include <linux/debugfs.h>
-#include <linux/memory_hotplug.h>
#include <linux/seq_file.h>
#include <linux/ptdump.h>
@@ -413,9 +412,7 @@ bool ptdump_check_wx(void)
static int ptdump_show(struct seq_file *m, void *v)
{
- get_online_mems();
ptdump_walk(m, m->private);
- put_online_mems();
return 0;
}
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index ac604b176660..9af2aae0a515 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -247,11 +247,9 @@ static int ptdump_show(struct seq_file *m, void *v)
.marker = markers,
};
- get_online_mems();
mutex_lock(&cpa_mutex);
ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
mutex_unlock(&cpa_mutex);
- put_online_mems();
return 0;
}
DEFINE_SHOW_ATTRIBUTE(ptdump);
diff --git a/mm/ptdump.c b/mm/ptdump.c
index 61a352aa12ed..b600c7f864b8 100644
--- a/mm/ptdump.c
+++ b/mm/ptdump.c
@@ -176,6 +176,7 @@ void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm, pgd_t *pgd)
{
const struct ptdump_range *range = st->range;
+ get_online_mems();
mmap_write_lock(mm);
while (range->start != range->end) {
walk_page_range_debug(mm, range->start, range->end,
@@ -183,6 +184,7 @@ void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm, pgd_t *pgd)
range++;
}
mmap_write_unlock(mm);
+ put_online_mems();
/* Flush out the last page */
st->note_page_flush(st);
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 005b0a0c24e1628313e951516b675109a92cacfe
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081827-washed-yelp-3c3e@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 005b0a0c24e1628313e951516b675109a92cacfe Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Fri, 18 Jul 2025 13:07:29 +0100
Subject: [PATCH] btrfs: send: use fallocate for hole punching with send stream
v2
Currently holes are sent as writes full of zeroes, which results in
unnecessarily using disk space at the receiving end and increasing the
stream size.
In some cases we avoid sending writes of zeroes, like during a full
send operation where we just skip writes for holes.
But for some cases we fill previous holes with writes of zeroes too, like
in this scenario:
1) We have a file with a hole in the range [2M, 3M), we snapshot the
subvolume and do a full send. The range [2M, 3M) stays as a hole at
the receiver since we skip sending write commands full of zeroes;
2) We punch a hole for the range [3M, 4M) in our file, so that now it
has a 2M hole in the range [2M, 4M), and snapshot the subvolume.
Now if we do an incremental send, we will send write commands full
of zeroes for the range [2M, 4M), removing the hole for [2M, 3M) at
the receiver.
We could improve cases such as this last one by doing additional
comparisons of file extent items (or their absence) between the parent
and send snapshots, but that's a lot of code to add plus additional CPU
and IO costs.
Since the send stream v2 already has a fallocate command and btrfs-progs
implements a callback to execute fallocate since the send stream v2
support was added to it, update the kernel to use fallocate for punching
holes for V2+ streams.
Test coverage is provided by btrfs/284 which is a version of btrfs/007
that exercises send stream v2 instead of v1, using fsstress with random
operations and fssum to verify file contents.
Link: https://github.com/kdave/btrfs-progs/issues/1001
CC: stable(a)vger.kernel.org # 6.1+
Reviewed-by: Boris Burkov <boris(a)bur.io>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 09822e766e41..7664025a5af4 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -4,6 +4,7 @@
*/
#include <linux/bsearch.h>
+#include <linux/falloc.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/sort.h>
@@ -5405,6 +5406,30 @@ static int send_update_extent(struct send_ctx *sctx,
return ret;
}
+static int send_fallocate(struct send_ctx *sctx, u32 mode, u64 offset, u64 len)
+{
+ struct fs_path *path;
+ int ret;
+
+ path = get_cur_inode_path(sctx);
+ if (IS_ERR(path))
+ return PTR_ERR(path);
+
+ ret = begin_cmd(sctx, BTRFS_SEND_C_FALLOCATE);
+ if (ret < 0)
+ return ret;
+
+ TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
+ TLV_PUT_U32(sctx, BTRFS_SEND_A_FALLOCATE_MODE, mode);
+ TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
+ TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len);
+
+ ret = send_cmd(sctx);
+
+tlv_put_failure:
+ return ret;
+}
+
static int send_hole(struct send_ctx *sctx, u64 end)
{
struct fs_path *p = NULL;
@@ -5412,6 +5437,14 @@ static int send_hole(struct send_ctx *sctx, u64 end)
u64 offset = sctx->cur_inode_last_extent;
int ret = 0;
+ /*
+ * Starting with send stream v2 we have fallocate and can use it to
+ * punch holes instead of sending writes full of zeroes.
+ */
+ if (proto_cmd_ok(sctx, BTRFS_SEND_C_FALLOCATE))
+ return send_fallocate(sctx, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ offset, end - offset);
+
/*
* A hole that starts at EOF or beyond it. Since we do not yet support
* fallocate (for extent preallocation and hole punching), sending a
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 1ef94169db0958d6de39f9ea6e063ce887342e2d
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081801-ended-viewless-5ac7@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1ef94169db0958d6de39f9ea6e063ce887342e2d Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu(a)suse.com>
Date: Wed, 2 Jul 2025 15:08:13 +0930
Subject: [PATCH] btrfs: populate otime when logging an inode item
[TEST FAILURE WITH EXPERIMENTAL FEATURES]
When running test case generic/508, the test case will fail with the new
btrfs shutdown support:
generic/508 - output mismatch (see /home/adam/xfstests/results//generic/508.out.bad)
--- tests/generic/508.out 2022-05-11 11:25:30.806666664 +0930
+++ /home/adam/xfstests/results//generic/508.out.bad 2025-07-02 14:53:22.401824212 +0930
@@ -1,2 +1,6 @@
QA output created by 508
Silence is golden
+Before:
+After : stat.btime = Thu Jan 1 09:30:00 1970
+Before:
+After : stat.btime = Wed Jul 2 14:53:22 2025
...
(Run 'diff -u /home/adam/xfstests/tests/generic/508.out /home/adam/xfstests/results//generic/508.out.bad' to see the entire diff)
Ran: generic/508
Failures: generic/508
Failed 1 of 1 tests
Please note that the test case requires shutdown support, thus the test
case will be skipped using the current upstream kernel, as it doesn't
have shutdown ioctl support.
[CAUSE]
The direct cause the 0 time stamp in the log tree:
leaf 30507008 items 2 free space 16057 generation 9 owner TREE_LOG
leaf 30507008 flags 0x1(WRITTEN) backref revision 1
checksum stored e522548d
checksum calced e522548d
fs uuid 57d45451-481e-43e4-aa93-289ad707a3a0
chunk uuid d52bd3fd-5163-4337-98a7-7986993ad398
item 0 key (257 INODE_ITEM 0) itemoff 16123 itemsize 160
generation 9 transid 9 size 0 nbytes 0
block group 0 mode 100644 links 1 uid 0 gid 0 rdev 0
sequence 1 flags 0x0(none)
atime 1751432947.492000000 (2025-07-02 14:39:07)
ctime 1751432947.492000000 (2025-07-02 14:39:07)
mtime 1751432947.492000000 (2025-07-02 14:39:07)
otime 0.0 (1970-01-01 09:30:00) <<<
But the old fs tree has all the correct time stamp:
btrfs-progs v6.12
fs tree key (FS_TREE ROOT_ITEM 0)
leaf 30425088 items 2 free space 16061 generation 5 owner FS_TREE
leaf 30425088 flags 0x1(WRITTEN) backref revision 1
checksum stored 48f6c57e
checksum calced 48f6c57e
fs uuid 57d45451-481e-43e4-aa93-289ad707a3a0
chunk uuid d52bd3fd-5163-4337-98a7-7986993ad398
item 0 key (256 INODE_ITEM 0) itemoff 16123 itemsize 160
generation 3 transid 0 size 0 nbytes 16384
block group 0 mode 40755 links 1 uid 0 gid 0 rdev 0
sequence 0 flags 0x0(none)
atime 1751432947.0 (2025-07-02 14:39:07)
ctime 1751432947.0 (2025-07-02 14:39:07)
mtime 1751432947.0 (2025-07-02 14:39:07)
otime 1751432947.0 (2025-07-02 14:39:07) <<<
The root cause is that fill_inode_item() in tree-log.c is only
populating a/c/m time, not the otime (or btime in statx output).
Part of the reason is that, the vfs inode only has a/c/m time, no native
btime support yet.
[FIX]
Thankfully btrfs has its otime stored in btrfs_inode::i_otime_sec and
btrfs_inode::i_otime_nsec.
So what we really need is just fill the otime time stamp in
fill_inode_item() of tree-log.c
There is another fill_inode_item() in inode.c, which is doing the proper
otime population.
Fixes: 94edf4ae43a5 ("Btrfs: don't bother committing delayed inode updates when fsyncing")
CC: stable(a)vger.kernel.org
Reviewed-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 1e805dabfc4b..ab0815d9e7e5 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4233,6 +4233,9 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
btrfs_set_timespec_sec(leaf, &item->ctime, inode_get_ctime_sec(inode));
btrfs_set_timespec_nsec(leaf, &item->ctime, inode_get_ctime_nsec(inode));
+ btrfs_set_timespec_sec(leaf, &item->otime, BTRFS_I(inode)->i_otime_sec);
+ btrfs_set_timespec_nsec(leaf, &item->otime, BTRFS_I(inode)->i_otime_nsec);
+
/*
* We do not need to set the nbytes field, in fact during a fast fsync
* its value may not even be correct, since a fast fsync does not wait
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 807d9023e75fc20bfd6dd2ac0408ce4af53f1648
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081832-unearned-monopoly-13b1@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 807d9023e75fc20bfd6dd2ac0408ce4af53f1648 Mon Sep 17 00:00:00 2001
From: Boris Burkov <boris(a)bur.io>
Date: Mon, 14 Jul 2025 16:44:28 -0700
Subject: [PATCH] btrfs: fix ssd_spread overallocation
If the ssd_spread mount option is enabled, then we run the so called
clustered allocator for data block groups. In practice, this results in
creating a btrfs_free_cluster which caches a block_group and borrows its
free extents for allocation.
Since the introduction of allocation size classes in 6.1, there has been
a bug in the interaction between that feature and ssd_spread.
find_free_extent() has a number of nested loops. The loop going over the
allocation stages, stored in ffe_ctl->loop and managed by
find_free_extent_update_loop(), the loop over the raid levels, and the
loop over all the block_groups in a space_info. The size class feature
relies on the block_group loop to ensure it gets a chance to see a
block_group of a given size class. However, the clustered allocator
uses the cached cluster block_group and breaks that loop. Each call to
do_allocation() will really just go back to the same cached block_group.
Normally, this is OK, as the allocation either succeeds and we don't
want to loop any more or it fails, and we clear the cluster and return
its space to the block_group.
But with size classes, the allocation can succeed, then later fail,
outside of do_allocation() due to size class mismatch. That latter
failure is not properly handled due to the highly complex multi loop
logic. The result is a painful loop where we continue to allocate the
same num_bytes from the cluster in a tight loop until it fails and
releases the cluster and lets us try a new block_group. But by then, we
have skipped great swaths of the available block_groups and are likely
to fail to allocate, looping the outer loop. In pathological cases like
the reproducer below, the cached block_group is often the very last one,
in which case we don't perform this tight bg loop but instead rip
through the ffe stages to LOOP_CHUNK_ALLOC and allocate a chunk, which
is now the last one, and we enter the tight inner loop until an
allocation failure. Then allocation succeeds on the final block_group
and if the next allocation is a size mismatch, the exact same thing
happens again.
Triggering this is as easy as mounting with -o ssd_spread and then
running:
mount -o ssd_spread $dev $mnt
dd if=/dev/zero of=$mnt/big bs=16M count=1 &>/dev/null
dd if=/dev/zero of=$mnt/med bs=4M count=1 &>/dev/null
sync
if you do the two writes + sync in a loop, you can force btrfs to spin
an excessive amount on semi-successful clustered allocations, before
ultimately failing and advancing to the stage where we force a chunk
allocation. This results in 2G of data allocated per iteration, despite
only using ~20M of data. By using a small size classed extent, the inner
loop takes longer and we can spin for longer.
The simplest, shortest term fix to unbreak this is to make the clustered
allocator size_class aware in the dumbest way, where it fails on size
class mismatch. This may hinder the operation of the clustered
allocator, but better hindered than completely broken and terribly
overallocating.
Further re-design improvements are also in the works.
Fixes: 52bb7a2166af ("btrfs: introduce size class to block group allocator")
CC: stable(a)vger.kernel.org # 6.1+
Reported-by: David Sterba <dsterba(a)suse.com>
Reviewed-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: Boris Burkov <boris(a)bur.io>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 85833bf216de..97d517cdf2df 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3651,6 +3651,21 @@ btrfs_release_block_group(struct btrfs_block_group *cache,
btrfs_put_block_group(cache);
}
+static bool find_free_extent_check_size_class(const struct find_free_extent_ctl *ffe_ctl,
+ const struct btrfs_block_group *bg)
+{
+ if (ffe_ctl->policy == BTRFS_EXTENT_ALLOC_ZONED)
+ return true;
+ if (!btrfs_block_group_should_use_size_class(bg))
+ return true;
+ if (ffe_ctl->loop >= LOOP_WRONG_SIZE_CLASS)
+ return true;
+ if (ffe_ctl->loop >= LOOP_UNSET_SIZE_CLASS &&
+ bg->size_class == BTRFS_BG_SZ_NONE)
+ return true;
+ return ffe_ctl->size_class == bg->size_class;
+}
+
/*
* Helper function for find_free_extent().
*
@@ -3672,7 +3687,8 @@ static int find_free_extent_clustered(struct btrfs_block_group *bg,
if (!cluster_bg)
goto refill_cluster;
if (cluster_bg != bg && (cluster_bg->ro ||
- !block_group_bits(cluster_bg, ffe_ctl->flags)))
+ !block_group_bits(cluster_bg, ffe_ctl->flags) ||
+ !find_free_extent_check_size_class(ffe_ctl, cluster_bg)))
goto release_cluster;
offset = btrfs_alloc_from_cluster(cluster_bg, last_ptr,
@@ -4229,21 +4245,6 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
return -ENOSPC;
}
-static bool find_free_extent_check_size_class(struct find_free_extent_ctl *ffe_ctl,
- struct btrfs_block_group *bg)
-{
- if (ffe_ctl->policy == BTRFS_EXTENT_ALLOC_ZONED)
- return true;
- if (!btrfs_block_group_should_use_size_class(bg))
- return true;
- if (ffe_ctl->loop >= LOOP_WRONG_SIZE_CLASS)
- return true;
- if (ffe_ctl->loop >= LOOP_UNSET_SIZE_CLASS &&
- bg->size_class == BTRFS_BG_SZ_NONE)
- return true;
- return ffe_ctl->size_class == bg->size_class;
-}
-
static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info,
struct find_free_extent_ctl *ffe_ctl,
struct btrfs_space_info *space_info,
From: Niklas Neronin <niklas.neronin(a)linux.intel.com>
Partially revert commit e1db856bd288 ("usb: xhci: remove '0' write to
write-1-to-clear register") because the patch cleared the Interrupt Pending
bit during interrupt enabling and disabling. The Interrupt Pending bit
should only be cleared when the driver has handled the interrupt.
Ideally, all interrupts should be handled before disabling the interrupt;
consequently, no interrupt should be pending when enabling the interrupt.
For this reason, keep the debug message informing if an interrupt is still
pending when an interrupt is disabled.
Because the Interrupt Pending bit is write-1-to-clear, writing '0' to it
ensures that the state does not change.
Link: https://lore.kernel.org/linux-usb/20250818231103.672ec7ed@foxbook
Fixes: e1db856bd288 ("usb: xhci: remove '0' write to write-1-to-clear register")
Closes: https://bbs.archlinux.org/viewtopic.php?id=307641
cc: stable(a)vger.kernel.org # 6.16+
Signed-off-by: Niklas Neronin <niklas.neronin(a)linux.intel.com>
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
---
drivers/usb/host/xhci.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 0e03691f03bf..742c23826e17 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -309,6 +309,7 @@ int xhci_enable_interrupter(struct xhci_interrupter *ir)
return -EINVAL;
iman = readl(&ir->ir_set->iman);
+ iman &= ~IMAN_IP;
iman |= IMAN_IE;
writel(iman, &ir->ir_set->iman);
@@ -325,6 +326,7 @@ int xhci_disable_interrupter(struct xhci_hcd *xhci, struct xhci_interrupter *ir)
return -EINVAL;
iman = readl(&ir->ir_set->iman);
+ iman &= ~IMAN_IP;
iman &= ~IMAN_IE;
writel(iman, &ir->ir_set->iman);
--
2.43.0
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 1ef94169db0958d6de39f9ea6e063ce887342e2d
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081800-anew-bullion-cdbe@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1ef94169db0958d6de39f9ea6e063ce887342e2d Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu(a)suse.com>
Date: Wed, 2 Jul 2025 15:08:13 +0930
Subject: [PATCH] btrfs: populate otime when logging an inode item
[TEST FAILURE WITH EXPERIMENTAL FEATURES]
When running test case generic/508, the test case will fail with the new
btrfs shutdown support:
generic/508 - output mismatch (see /home/adam/xfstests/results//generic/508.out.bad)
--- tests/generic/508.out 2022-05-11 11:25:30.806666664 +0930
+++ /home/adam/xfstests/results//generic/508.out.bad 2025-07-02 14:53:22.401824212 +0930
@@ -1,2 +1,6 @@
QA output created by 508
Silence is golden
+Before:
+After : stat.btime = Thu Jan 1 09:30:00 1970
+Before:
+After : stat.btime = Wed Jul 2 14:53:22 2025
...
(Run 'diff -u /home/adam/xfstests/tests/generic/508.out /home/adam/xfstests/results//generic/508.out.bad' to see the entire diff)
Ran: generic/508
Failures: generic/508
Failed 1 of 1 tests
Please note that the test case requires shutdown support, thus the test
case will be skipped using the current upstream kernel, as it doesn't
have shutdown ioctl support.
[CAUSE]
The direct cause the 0 time stamp in the log tree:
leaf 30507008 items 2 free space 16057 generation 9 owner TREE_LOG
leaf 30507008 flags 0x1(WRITTEN) backref revision 1
checksum stored e522548d
checksum calced e522548d
fs uuid 57d45451-481e-43e4-aa93-289ad707a3a0
chunk uuid d52bd3fd-5163-4337-98a7-7986993ad398
item 0 key (257 INODE_ITEM 0) itemoff 16123 itemsize 160
generation 9 transid 9 size 0 nbytes 0
block group 0 mode 100644 links 1 uid 0 gid 0 rdev 0
sequence 1 flags 0x0(none)
atime 1751432947.492000000 (2025-07-02 14:39:07)
ctime 1751432947.492000000 (2025-07-02 14:39:07)
mtime 1751432947.492000000 (2025-07-02 14:39:07)
otime 0.0 (1970-01-01 09:30:00) <<<
But the old fs tree has all the correct time stamp:
btrfs-progs v6.12
fs tree key (FS_TREE ROOT_ITEM 0)
leaf 30425088 items 2 free space 16061 generation 5 owner FS_TREE
leaf 30425088 flags 0x1(WRITTEN) backref revision 1
checksum stored 48f6c57e
checksum calced 48f6c57e
fs uuid 57d45451-481e-43e4-aa93-289ad707a3a0
chunk uuid d52bd3fd-5163-4337-98a7-7986993ad398
item 0 key (256 INODE_ITEM 0) itemoff 16123 itemsize 160
generation 3 transid 0 size 0 nbytes 16384
block group 0 mode 40755 links 1 uid 0 gid 0 rdev 0
sequence 0 flags 0x0(none)
atime 1751432947.0 (2025-07-02 14:39:07)
ctime 1751432947.0 (2025-07-02 14:39:07)
mtime 1751432947.0 (2025-07-02 14:39:07)
otime 1751432947.0 (2025-07-02 14:39:07) <<<
The root cause is that fill_inode_item() in tree-log.c is only
populating a/c/m time, not the otime (or btime in statx output).
Part of the reason is that, the vfs inode only has a/c/m time, no native
btime support yet.
[FIX]
Thankfully btrfs has its otime stored in btrfs_inode::i_otime_sec and
btrfs_inode::i_otime_nsec.
So what we really need is just fill the otime time stamp in
fill_inode_item() of tree-log.c
There is another fill_inode_item() in inode.c, which is doing the proper
otime population.
Fixes: 94edf4ae43a5 ("Btrfs: don't bother committing delayed inode updates when fsyncing")
CC: stable(a)vger.kernel.org
Reviewed-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 1e805dabfc4b..ab0815d9e7e5 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4233,6 +4233,9 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
btrfs_set_timespec_sec(leaf, &item->ctime, inode_get_ctime_sec(inode));
btrfs_set_timespec_nsec(leaf, &item->ctime, inode_get_ctime_nsec(inode));
+ btrfs_set_timespec_sec(leaf, &item->otime, BTRFS_I(inode)->i_otime_sec);
+ btrfs_set_timespec_nsec(leaf, &item->otime, BTRFS_I(inode)->i_otime_nsec);
+
/*
* We do not need to set the nbytes field, in fact during a fast fsync
* its value may not even be correct, since a fast fsync does not wait
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 1ef94169db0958d6de39f9ea6e063ce887342e2d
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081800-uncouple-vagrancy-7f2b@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1ef94169db0958d6de39f9ea6e063ce887342e2d Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu(a)suse.com>
Date: Wed, 2 Jul 2025 15:08:13 +0930
Subject: [PATCH] btrfs: populate otime when logging an inode item
[TEST FAILURE WITH EXPERIMENTAL FEATURES]
When running test case generic/508, the test case will fail with the new
btrfs shutdown support:
generic/508 - output mismatch (see /home/adam/xfstests/results//generic/508.out.bad)
--- tests/generic/508.out 2022-05-11 11:25:30.806666664 +0930
+++ /home/adam/xfstests/results//generic/508.out.bad 2025-07-02 14:53:22.401824212 +0930
@@ -1,2 +1,6 @@
QA output created by 508
Silence is golden
+Before:
+After : stat.btime = Thu Jan 1 09:30:00 1970
+Before:
+After : stat.btime = Wed Jul 2 14:53:22 2025
...
(Run 'diff -u /home/adam/xfstests/tests/generic/508.out /home/adam/xfstests/results//generic/508.out.bad' to see the entire diff)
Ran: generic/508
Failures: generic/508
Failed 1 of 1 tests
Please note that the test case requires shutdown support, thus the test
case will be skipped using the current upstream kernel, as it doesn't
have shutdown ioctl support.
[CAUSE]
The direct cause the 0 time stamp in the log tree:
leaf 30507008 items 2 free space 16057 generation 9 owner TREE_LOG
leaf 30507008 flags 0x1(WRITTEN) backref revision 1
checksum stored e522548d
checksum calced e522548d
fs uuid 57d45451-481e-43e4-aa93-289ad707a3a0
chunk uuid d52bd3fd-5163-4337-98a7-7986993ad398
item 0 key (257 INODE_ITEM 0) itemoff 16123 itemsize 160
generation 9 transid 9 size 0 nbytes 0
block group 0 mode 100644 links 1 uid 0 gid 0 rdev 0
sequence 1 flags 0x0(none)
atime 1751432947.492000000 (2025-07-02 14:39:07)
ctime 1751432947.492000000 (2025-07-02 14:39:07)
mtime 1751432947.492000000 (2025-07-02 14:39:07)
otime 0.0 (1970-01-01 09:30:00) <<<
But the old fs tree has all the correct time stamp:
btrfs-progs v6.12
fs tree key (FS_TREE ROOT_ITEM 0)
leaf 30425088 items 2 free space 16061 generation 5 owner FS_TREE
leaf 30425088 flags 0x1(WRITTEN) backref revision 1
checksum stored 48f6c57e
checksum calced 48f6c57e
fs uuid 57d45451-481e-43e4-aa93-289ad707a3a0
chunk uuid d52bd3fd-5163-4337-98a7-7986993ad398
item 0 key (256 INODE_ITEM 0) itemoff 16123 itemsize 160
generation 3 transid 0 size 0 nbytes 16384
block group 0 mode 40755 links 1 uid 0 gid 0 rdev 0
sequence 0 flags 0x0(none)
atime 1751432947.0 (2025-07-02 14:39:07)
ctime 1751432947.0 (2025-07-02 14:39:07)
mtime 1751432947.0 (2025-07-02 14:39:07)
otime 1751432947.0 (2025-07-02 14:39:07) <<<
The root cause is that fill_inode_item() in tree-log.c is only
populating a/c/m time, not the otime (or btime in statx output).
Part of the reason is that, the vfs inode only has a/c/m time, no native
btime support yet.
[FIX]
Thankfully btrfs has its otime stored in btrfs_inode::i_otime_sec and
btrfs_inode::i_otime_nsec.
So what we really need is just fill the otime time stamp in
fill_inode_item() of tree-log.c
There is another fill_inode_item() in inode.c, which is doing the proper
otime population.
Fixes: 94edf4ae43a5 ("Btrfs: don't bother committing delayed inode updates when fsyncing")
CC: stable(a)vger.kernel.org
Reviewed-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 1e805dabfc4b..ab0815d9e7e5 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4233,6 +4233,9 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
btrfs_set_timespec_sec(leaf, &item->ctime, inode_get_ctime_sec(inode));
btrfs_set_timespec_nsec(leaf, &item->ctime, inode_get_ctime_nsec(inode));
+ btrfs_set_timespec_sec(leaf, &item->otime, BTRFS_I(inode)->i_otime_sec);
+ btrfs_set_timespec_nsec(leaf, &item->otime, BTRFS_I(inode)->i_otime_nsec);
+
/*
* We do not need to set the nbytes field, in fact during a fast fsync
* its value may not even be correct, since a fast fsync does not wait
Mount options (uid, gid, mode) are silently ignored when debugfs is
mounted. This is a regression introduced during the conversion to the
new mount API.
When the mount API conversion was done, the parsed options were never
applied to the superblock when it was reused. As a result, the mount
options were ignored when debugfs was mounted.
Fix this by following the same pattern as the tracefs fix in commit
e4d32142d1de ("tracing: Fix tracefs mount options"). Call
debugfs_reconfigure() in debugfs_get_tree() to apply the mount options
to the superblock after it has been created or reused.
As an example, with the bug the "mode" mount option is ignored:
$ mount -o mode=0666 -t debugfs debugfs /tmp/debugfs_test
$ mount | grep debugfs_test
debugfs on /tmp/debugfs_test type debugfs (rw,relatime)
$ ls -ld /tmp/debugfs_test
drwx------ 25 root root 0 Aug 4 14:16 /tmp/debugfs_test
With the fix applied, it works as expected:
$ mount -o mode=0666 -t debugfs debugfs /tmp/debugfs_test
$ mount | grep debugfs_test
debugfs on /tmp/debugfs_test type debugfs (rw,relatime,mode=666)
$ ls -ld /tmp/debugfs_test
drw-rw-rw- 37 root root 0 Aug 2 17:28 /tmp/debugfs_test
Fixes: a20971c18752 ("vfs: Convert debugfs to use the new mount API")
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220406
Cc: stable(a)vger.kernel.org
Reviewed-by: Eric Sandeen <sandeen(a)redhat.com>
Signed-off-by: Charalampos Mitrodimas <charmitro(a)posteo.net>
---
Changes in v3:
- Reworded patch description to avoid confusion about OOPs
- Link to v2: https://lore.kernel.org/r/20250813-debugfs-mount-opts-v2-1-0ca79720edc6@pos…
Changes in v2:
- Follow the same pattern as e4d32142d1de ("tracing: Fix tracefs mount options")
- Add Cc: stable tag
- Link to v1: https://lore.kernel.org/r/20250804-debugfs-mount-opts-v1-1-bc05947a80b5@pos…
---
fs/debugfs/inode.c | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index a0357b0cf362d8ac47ff810e162402d6a8ae2cb9..c12d649df6a5435050f606c2828a9a7cc61922e4 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -183,6 +183,9 @@ static int debugfs_reconfigure(struct fs_context *fc)
struct debugfs_fs_info *sb_opts = sb->s_fs_info;
struct debugfs_fs_info *new_opts = fc->s_fs_info;
+ if (!new_opts)
+ return 0;
+
sync_filesystem(sb);
/* structure copy of new mount options to sb */
@@ -282,10 +285,16 @@ static int debugfs_fill_super(struct super_block *sb, struct fs_context *fc)
static int debugfs_get_tree(struct fs_context *fc)
{
+ int err;
+
if (!(debugfs_allow & DEBUGFS_ALLOW_API))
return -EPERM;
- return get_tree_single(fc, debugfs_fill_super);
+ err = get_tree_single(fc, debugfs_fill_super);
+ if (err)
+ return err;
+
+ return debugfs_reconfigure(fc);
}
static void debugfs_free_fc(struct fs_context *fc)
---
base-commit: 3c4a063b1f8ab71352df1421d9668521acb63cd9
change-id: 20250804-debugfs-mount-opts-2a68d7741f05
Best regards,
--
Charalampos Mitrodimas <charmitro(a)posteo.net>
From: Nianyao Tang <tangnianyao(a)huawei.com>
[ upstream commit e8cde32f111f7f5681a7bad3ec747e9e697569a9 ]
Enable ECBHB bits in ID_AA64MMFR1 register as per ARM DDI 0487K.a
specification.
When guest OS read ID_AA64MMFR1_EL1, kvm emulate this reg using
ftr_id_aa64mmfr1 and always return ID_AA64MMFR1_EL1.ECBHB=0 to guest.
It results in guest syscall jump to tramp ventry, which is not needed
in implementation with ID_AA64MMFR1_EL1.ECBHB=1.
Let's make the guest syscall process the same as the host.
This fixes performance regressions introduced by commit a53b3599d9bf
("arm64: errata: Add newer ARM cores to the spectre_bhb_loop_affected()
lists") for guests running on neoverse v2 hardware, which supports
ECBHB.
Signed-off-by: Nianyao Tang <tangnianyao(a)huawei.com>
Link: https://lore.kernel.org/r/20240611122049.2758600-1-tangnianyao@huawei.com
Signed-off-by: Catalin Marinas <catalin.marinas(a)arm.com>
Signed-off-by: Patrick Roy <roypat(a)amazon.co.uk>
---
arch/arm64/kernel/cpufeature.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 840cc48b5147..5d2322eeee47 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -343,6 +343,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_ECBHB_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_TIDCP1_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_AFP_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_ETS_SHIFT, 4, 0),
--
2.50.1
There is a long standing bug which causes I2C communication not to
work on the Armada 3700 based boards. This small series restores
that functionality.
Signed-off-by: Imre Kaloz <kaloz(a)openwrt.org>
Signed-off-by: Gabor Juhos <j4g8y7(a)gmail.com>
---
Changes in v2:
- collect offered tags
- rebase and retest on tip of i2c/for-current
- Link to v1: https://lore.kernel.org/r/20250511-i2c-pxa-fix-i2c-communication-v1-0-e9097…
---
Gabor Juhos (3):
i2c: add init_recovery() callback
i2c: pxa: prevent calling of the generic recovery init code
i2c: pxa: handle 'Early Bus Busy' condition on Armada 3700
drivers/i2c/busses/i2c-pxa.c | 25 +++++++++++++++++++------
drivers/i2c/i2c-core-base.c | 8 +++++++-
include/linux/i2c.h | 4 ++++
3 files changed, 30 insertions(+), 7 deletions(-)
---
base-commit: 8f5ae30d69d7543eee0d70083daf4de8fe15d585
change-id: 20250510-i2c-pxa-fix-i2c-communication-3e6de1e3d0c6
Best regards,
--
Gabor Juhos <j4g8y7(a)gmail.com>
Since commits
7b9eb53e8591 ("media: cx18: Access v4l2_fh from file")
9ba9d11544f9 ("media: ivtv: Access v4l2_fh from file")
All the ioctl handlers access their private data structures
from file *
The ivtv and cx18 drivers call the ioctl handlers from their
DVB layer without a valid file *, causing invalid memory access.
The issue has been reported by smatch in
"[bug report] media: cx18: Access v4l2_fh from file"
Fix this by providing wrappers for the ioctl handlers to be
used by the DVB layer that do not require a valid file *.
Signed-off-by: Jacopo Mondi <jacopo.mondi(a)ideasonboard.com>
---
Changes in v4:
- Slightly adjust commit messages
- Link to v3: https://lore.kernel.org/r/20250818-cx18-v4l2-fh-v3-0-5e2f08f3cadc@ideasonbo…
Changes in v3:
- Change helpers to accept the type they're going to operate on instead
of using the open_id wrapper type as suggested by Laurent
- Link to v2: https://lore.kernel.org/r/20250818-cx18-v4l2-fh-v2-0-3f53ce423663@ideasonbo…
Changes in v2:
- Add Cc: stable(a)vger.kernel.org per-patch
---
Jacopo Mondi (2):
media: cx18: Fix invalid access to file *
media: ivtv: Fix invalid access to file *
drivers/media/pci/cx18/cx18-driver.c | 9 +++------
drivers/media/pci/cx18/cx18-ioctl.c | 30 +++++++++++++++++++-----------
drivers/media/pci/cx18/cx18-ioctl.h | 8 +++++---
drivers/media/pci/ivtv/ivtv-driver.c | 11 ++++-------
drivers/media/pci/ivtv/ivtv-ioctl.c | 22 +++++++++++++++++-----
drivers/media/pci/ivtv/ivtv-ioctl.h | 6 ++++--
6 files changed, 52 insertions(+), 34 deletions(-)
---
base-commit: a75b8d198c55e9eb5feb6f6e155496305caba2dc
change-id: 20250818-cx18-v4l2-fh-7eaa6199fdde
Best regards,
--
Jacopo Mondi <jacopo.mondi(a)ideasonboard.com>
Since commits
7b9eb53e8591 ("media: cx18: Access v4l2_fh from file")
9ba9d11544f9 ("media: ivtv: Access v4l2_fh from file")
All the ioctl handlers access their private data structures
from file *
The ivtv and cx18 drivers call the ioctl handlers from their
DVB layer without a valid file *, causing invalid memory access.
The issue has been reported by smatch in
"[bug report] media: cx18: Access v4l2_fh from file"
Fix this by providing wrappers for the ioctl handlers to be
used by the DVB layer that do not require a valid file *.
Signed-off-by: Jacopo Mondi <jacopo.mondi(a)ideasonboard.com>
---
Changes in v3:
- Change helpers to accept the type they're going to operate on instead
of using the open_id wrapper type as suggested by Laurent
- Link to v2: https://lore.kernel.org/r/20250818-cx18-v4l2-fh-v2-0-3f53ce423663@ideasonbo…
Changes in v2:
- Add Cc: stable(a)vger.kernel.org per-patch
---
Jacopo Mondi (2):
media: cx18: Fix invalid access to file *
media: ivtv: Fix invalid access to file *
drivers/media/pci/cx18/cx18-driver.c | 9 +++------
drivers/media/pci/cx18/cx18-ioctl.c | 30 +++++++++++++++++++-----------
drivers/media/pci/cx18/cx18-ioctl.h | 8 +++++---
drivers/media/pci/ivtv/ivtv-driver.c | 11 ++++-------
drivers/media/pci/ivtv/ivtv-ioctl.c | 22 +++++++++++++++++-----
drivers/media/pci/ivtv/ivtv-ioctl.h | 6 ++++--
6 files changed, 52 insertions(+), 34 deletions(-)
---
base-commit: a75b8d198c55e9eb5feb6f6e155496305caba2dc
change-id: 20250818-cx18-v4l2-fh-7eaa6199fdde
Best regards,
--
Jacopo Mondi <jacopo.mondi(a)ideasonboard.com>
Hi,
在 2025/08/17 22:18, Sasha Levin 写道:
> This is a note to let you know that I've just added the patch titled
>
> md: call del_gendisk in control path
>
> to the 6.6-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> md-call-del_gendisk-in-control-path.patch
> and it can be found in the queue-6.6 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
>
>
This patch should be be backported to any stable kernel, this change
will break user tools mdadm:
https://lore.kernel.org/all/f654db67-a5a5-114b-09b8-00db303daab7@redhat.com/
Thanks,
Kuai
>
> commit fa738623105e2dd4865274dc8525856feaec3ae9
> Author: Xiao Ni <xni(a)redhat.com>
> Date: Wed Jun 11 15:31:06 2025 +0800
>
> md: call del_gendisk in control path
>
> [ Upstream commit 9e59d609763f70a992a8f3808dabcce60f14eb5c ]
>
> Now del_gendisk and put_disk are called asynchronously in workqueue work.
> The asynchronous way has a problem that the device node can still exist
> after mdadm --stop command returns in a short window. So udev rule can
> open this device node and create the struct mddev in kernel again. So put
> del_gendisk in control path and still leave put_disk in md_kobj_release
> to avoid uaf of gendisk.
>
> Function del_gendisk can't be called with reconfig_mutex. If it's called
> with reconfig mutex, a deadlock can happen. del_gendisk waits all sysfs
> files access to finish and sysfs file access waits reconfig mutex. So
> put del_gendisk after releasing reconfig mutex.
>
> But there is still a window that sysfs can be accessed between mddev_unlock
> and del_gendisk. So some actions (add disk, change level, .e.g) can happen
> which lead unexpected results. MD_DELETED is used to resolve this problem.
> MD_DELETED is set before releasing reconfig mutex and it should be checked
> for these sysfs access which need reconfig mutex. For sysfs access which
> don't need reconfig mutex, del_gendisk will wait them to finish.
>
> But it doesn't need to do this in function mddev_lock_nointr. There are
> ten places that call it.
> * Five of them are in dm raid which we don't need to care. MD_DELETED is
> only used for md raid.
> * stop_sync_thread, md_do_sync and md_start_sync are related sync request,
> and it needs to wait sync thread to finish before stopping an array.
> * md_ioctl: md_open is called before md_ioctl, so ->openers is added. It
> will fail to stop the array. So it doesn't need to check MD_DELETED here
> * md_set_readonly:
> It needs to call mddev_set_closing_and_sync_blockdev when setting readonly
> or read_auto. So it will fail to stop the array too because MD_CLOSING is
> already set.
>
> Reviewed-by: Yu Kuai <yukuai3(a)huawei.com>
> Signed-off-by: Xiao Ni <xni(a)redhat.com>
> Link: https://lore.kernel.org/linux-raid/20250611073108.25463-2-xni@redhat.com
> Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index b086cbf24086..8e3939c0d2ed 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -639,9 +639,6 @@ static void __mddev_put(struct mddev *mddev)
> mddev->ctime || mddev->hold_active)
> return;
>
> - /* Array is not configured at all, and not held active, so destroy it */
> - set_bit(MD_DELETED, &mddev->flags);
> -
> /*
> * Call queue_work inside the spinlock so that flush_workqueue() after
> * mddev_find will succeed in waiting for the work to be done.
> @@ -837,6 +834,16 @@ void mddev_unlock(struct mddev *mddev)
> kobject_del(&rdev->kobj);
> export_rdev(rdev, mddev);
> }
> +
> + /* Call del_gendisk after release reconfig_mutex to avoid
> + * deadlock (e.g. call del_gendisk under the lock and an
> + * access to sysfs files waits the lock)
> + * And MD_DELETED is only used for md raid which is set in
> + * do_md_stop. dm raid only uses md_stop to stop. So dm raid
> + * doesn't need to check MD_DELETED when getting reconfig lock
> + */
> + if (test_bit(MD_DELETED, &mddev->flags))
> + del_gendisk(mddev->gendisk);
> }
> EXPORT_SYMBOL_GPL(mddev_unlock);
>
> @@ -5616,19 +5623,30 @@ md_attr_store(struct kobject *kobj, struct attribute *attr,
> struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
> struct mddev *mddev = container_of(kobj, struct mddev, kobj);
> ssize_t rv;
> + struct kernfs_node *kn = NULL;
>
> if (!entry->store)
> return -EIO;
> if (!capable(CAP_SYS_ADMIN))
> return -EACCES;
> +
> + if (entry->store == array_state_store && cmd_match(page, "clear"))
> + kn = sysfs_break_active_protection(kobj, attr);
> +
> spin_lock(&all_mddevs_lock);
> if (!mddev_get(mddev)) {
> spin_unlock(&all_mddevs_lock);
> + if (kn)
> + sysfs_unbreak_active_protection(kn);
> return -EBUSY;
> }
> spin_unlock(&all_mddevs_lock);
> rv = entry->store(mddev, page, length);
> mddev_put(mddev);
> +
> + if (kn)
> + sysfs_unbreak_active_protection(kn);
> +
> return rv;
> }
>
> @@ -5636,12 +5654,6 @@ static void md_kobj_release(struct kobject *ko)
> {
> struct mddev *mddev = container_of(ko, struct mddev, kobj);
>
> - if (mddev->sysfs_state)
> - sysfs_put(mddev->sysfs_state);
> - if (mddev->sysfs_level)
> - sysfs_put(mddev->sysfs_level);
> -
> - del_gendisk(mddev->gendisk);
> put_disk(mddev->gendisk);
> }
>
> @@ -6531,8 +6543,9 @@ static int do_md_stop(struct mddev *mddev, int mode,
> mddev->bitmap_info.offset = 0;
>
> export_array(mddev);
> -
> md_clean(mddev);
> + set_bit(MD_DELETED, &mddev->flags);
> +
> if (mddev->hold_active == UNTIL_STOP)
> mddev->hold_active = 0;
> }
> diff --git a/drivers/md/md.h b/drivers/md/md.h
> index 46995558d3bd..0a7c9122db50 100644
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -589,11 +589,26 @@ static inline bool is_md_suspended(struct mddev *mddev)
>
> static inline int __must_check mddev_lock(struct mddev *mddev)
> {
> - return mutex_lock_interruptible(&mddev->reconfig_mutex);
> + int ret;
> +
> + ret = mutex_lock_interruptible(&mddev->reconfig_mutex);
> +
> + /* MD_DELETED is set in do_md_stop with reconfig_mutex.
> + * So check it here.
> + */
> + if (!ret && test_bit(MD_DELETED, &mddev->flags)) {
> + ret = -ENODEV;
> + mutex_unlock(&mddev->reconfig_mutex);
> + }
> +
> + return ret;
> }
>
> /* Sometimes we need to take the lock in a situation where
> * failure due to interrupts is not acceptable.
> + * It doesn't need to check MD_DELETED here, the owner which
> + * holds the lock here can't be stopped. And all paths can't
> + * call this function after do_md_stop.
> */
> static inline void mddev_lock_nointr(struct mddev *mddev)
> {
> @@ -602,7 +617,14 @@ static inline void mddev_lock_nointr(struct mddev *mddev)
>
> static inline int mddev_trylock(struct mddev *mddev)
> {
> - return mutex_trylock(&mddev->reconfig_mutex);
> + int ret;
> +
> + ret = mutex_trylock(&mddev->reconfig_mutex);
> + if (!ret && test_bit(MD_DELETED, &mddev->flags)) {
> + ret = -ENODEV;
> + mutex_unlock(&mddev->reconfig_mutex);
> + }
> + return ret;
> }
> extern void mddev_unlock(struct mddev *mddev);
>
> .
>
Hi,
The first four patches in this series are miscellaneous fixes and
improvements in the Cadence and TI CSI-RX drivers around probing, fwnode
and link creation.
The last two patches add support for transmitting multiple pixels per
clock on the internal bus between Cadence CSI-RX bridge and TI CSI-RX
wrapper. As this internal bus is 32-bit wide, the maximum number of
pixels that can be transmitted per cycle depend upon the format's bit
width. Secondly, the downstream element must support unpacking of
multiple pixels.
Thus we export a module function that can be used by the downstream
driver to negotiate the pixels per cycle on the output pixel stream of
the Cadence bridge.
Signed-off-by: Jai Luthra <jai.luthra(a)ideasonboard.com>
---
Changes in v4:
- Rebase on top of v6.17-rc1
- Add missing include for linux/export.h in cdns-csi2rx.c
- Link to v3: https://lore.kernel.org/r/20250626-probe_fixes-v3-0-83e735ae466e@ideasonboa…
Changes in v3:
- Move cdns-csi2rx header to include/media
- Export symbol from cdns-csi2rx.c to be used only through
the j721e-csi2rx.c module namespace
- Other minor fixes suggested by Sakari
- Add Abhilash's T-by tags
- Link to v2: https://lore.kernel.org/r/20250410-probe_fixes-v2-0-801bc6eebdea@ideasonboa…
Changes in v2:
- Rebase on v6.15-rc1
- Fix lkp warnings in PATCH 5/6 missing header for FIELD_PREP
- Add R-By tags from Devarsh and Changhuang
- Link to v1: https://lore.kernel.org/r/20250324-probe_fixes-v1-0-5cd5b9e1cfac@ideasonboa…
---
Jai Luthra (6):
media: ti: j721e-csi2rx: Use devm_of_platform_populate
media: ti: j721e-csi2rx: Use fwnode_get_named_child_node
media: ti: j721e-csi2rx: Fix source subdev link creation
media: cadence: csi2rx: Implement get_fwnode_pad op
media: cadence: cdns-csi2rx: Support multiple pixels per clock cycle
media: ti: j721e-csi2rx: Support multiple pixels per clock
MAINTAINERS | 1 +
drivers/media/platform/cadence/cdns-csi2rx.c | 75 ++++++++++++++++------
drivers/media/platform/ti/Kconfig | 3 +-
.../media/platform/ti/j721e-csi2rx/j721e-csi2rx.c | 65 ++++++++++++++-----
include/media/cadence/cdns-csi2rx.h | 19 ++++++
5 files changed, 128 insertions(+), 35 deletions(-)
---
base-commit: 8f5ae30d69d7543eee0d70083daf4de8fe15d585
change-id: 20250314-probe_fixes-7e0ec33c7fee
Best regards,
--
Jai Luthra <jai.luthra(a)ideasonboard.com>
The patch titled
Subject: proc: fix missing pde_set_flags() for net proc files
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
proc-fix-missing-pde_set_flags-for-net-proc-files.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: wangzijie <wangzijie1(a)honor.com>
Subject: proc: fix missing pde_set_flags() for net proc files
Date: Mon, 18 Aug 2025 20:31:02 +0800
To avoid potential UAF issues during module removal races, we use
pde_set_flags() to save proc_ops flags in PDE itself before
proc_register(), and then use pde_has_proc_*() helpers instead of directly
dereferencing pde->proc_ops->*.
However, the pde_set_flags() call was missing when creating net related
proc files. This omission caused incorrect behavior which FMODE_LSEEK was
being cleared inappropriately in proc_reg_open() for net proc files. Lars
reported it in this link[1].
Fix this by ensuring pde_set_flags() is called when register proc entry,
and add NULL check for proc_ops in pde_set_flags().
[1]: https://lore.kernel.org/all/20250815195616.64497967@chagall.paradoxon.rec/
Link: https://lkml.kernel.org/r/20250818123102.959595-1-wangzijie1@honor.com
Fixes: ff7ec8dc1b64 ("proc: use the same treatment to check proc_lseek as ones for proc_read_iter et.al)
Signed-off-by: wangzijie <wangzijie1(a)honor.com>
Reported-by: Lars Wendler <polynomial-c(a)gmx.de>
Cc: Alexei Starovoitov <ast(a)kernel.org>
Cc: Alexey Dobriyan <adobriyan(a)gmail.com>
Cc: Al Viro <viro(a)zeniv.linux.org.uk>
Cc: "Edgecombe, Rick P" <rick.p.edgecombe(a)intel.com>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Jiri Slaby <jirislaby(a)kernel.org>
Cc: Kirill A. Shutemov <k.shutemov(a)gmail.com>
Cc: wangzijie <wangzijie1(a)honor.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/proc/generic.c | 36 +++++++++++++++++++-----------------
1 file changed, 19 insertions(+), 17 deletions(-)
--- a/fs/proc/generic.c~proc-fix-missing-pde_set_flags-for-net-proc-files
+++ a/fs/proc/generic.c
@@ -367,6 +367,23 @@ static const struct inode_operations pro
.setattr = proc_notify_change,
};
+static void pde_set_flags(struct proc_dir_entry *pde)
+{
+ if (!pde->proc_ops)
+ return;
+
+ if (pde->proc_ops->proc_flags & PROC_ENTRY_PERMANENT)
+ pde->flags |= PROC_ENTRY_PERMANENT;
+ if (pde->proc_ops->proc_read_iter)
+ pde->flags |= PROC_ENTRY_proc_read_iter;
+#ifdef CONFIG_COMPAT
+ if (pde->proc_ops->proc_compat_ioctl)
+ pde->flags |= PROC_ENTRY_proc_compat_ioctl;
+#endif
+ if (pde->proc_ops->proc_lseek)
+ pde->flags |= PROC_ENTRY_proc_lseek;
+}
+
/* returns the registered entry, or frees dp and returns NULL on failure */
struct proc_dir_entry *proc_register(struct proc_dir_entry *dir,
struct proc_dir_entry *dp)
@@ -374,6 +391,8 @@ struct proc_dir_entry *proc_register(str
if (proc_alloc_inum(&dp->low_ino))
goto out_free_entry;
+ pde_set_flags(dp);
+
write_lock(&proc_subdir_lock);
dp->parent = dir;
if (pde_subdir_insert(dir, dp) == false) {
@@ -561,20 +580,6 @@ struct proc_dir_entry *proc_create_reg(c
return p;
}
-static void pde_set_flags(struct proc_dir_entry *pde)
-{
- if (pde->proc_ops->proc_flags & PROC_ENTRY_PERMANENT)
- pde->flags |= PROC_ENTRY_PERMANENT;
- if (pde->proc_ops->proc_read_iter)
- pde->flags |= PROC_ENTRY_proc_read_iter;
-#ifdef CONFIG_COMPAT
- if (pde->proc_ops->proc_compat_ioctl)
- pde->flags |= PROC_ENTRY_proc_compat_ioctl;
-#endif
- if (pde->proc_ops->proc_lseek)
- pde->flags |= PROC_ENTRY_proc_lseek;
-}
-
struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
struct proc_dir_entry *parent,
const struct proc_ops *proc_ops, void *data)
@@ -585,7 +590,6 @@ struct proc_dir_entry *proc_create_data(
if (!p)
return NULL;
p->proc_ops = proc_ops;
- pde_set_flags(p);
return proc_register(parent, p);
}
EXPORT_SYMBOL(proc_create_data);
@@ -636,7 +640,6 @@ struct proc_dir_entry *proc_create_seq_p
p->proc_ops = &proc_seq_ops;
p->seq_ops = ops;
p->state_size = state_size;
- pde_set_flags(p);
return proc_register(parent, p);
}
EXPORT_SYMBOL(proc_create_seq_private);
@@ -667,7 +670,6 @@ struct proc_dir_entry *proc_create_singl
return NULL;
p->proc_ops = &proc_single_ops;
p->single_show = show;
- pde_set_flags(p);
return proc_register(parent, p);
}
EXPORT_SYMBOL(proc_create_single_data);
_
Patches currently in -mm which might be from wangzijie1(a)honor.com are
proc-fix-missing-pde_set_flags-for-net-proc-files.patch
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 005b0a0c24e1628313e951516b675109a92cacfe
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081848-kilobyte-skirmish-7ccd@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 005b0a0c24e1628313e951516b675109a92cacfe Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Fri, 18 Jul 2025 13:07:29 +0100
Subject: [PATCH] btrfs: send: use fallocate for hole punching with send stream
v2
Currently holes are sent as writes full of zeroes, which results in
unnecessarily using disk space at the receiving end and increasing the
stream size.
In some cases we avoid sending writes of zeroes, like during a full
send operation where we just skip writes for holes.
But for some cases we fill previous holes with writes of zeroes too, like
in this scenario:
1) We have a file with a hole in the range [2M, 3M), we snapshot the
subvolume and do a full send. The range [2M, 3M) stays as a hole at
the receiver since we skip sending write commands full of zeroes;
2) We punch a hole for the range [3M, 4M) in our file, so that now it
has a 2M hole in the range [2M, 4M), and snapshot the subvolume.
Now if we do an incremental send, we will send write commands full
of zeroes for the range [2M, 4M), removing the hole for [2M, 3M) at
the receiver.
We could improve cases such as this last one by doing additional
comparisons of file extent items (or their absence) between the parent
and send snapshots, but that's a lot of code to add plus additional CPU
and IO costs.
Since the send stream v2 already has a fallocate command and btrfs-progs
implements a callback to execute fallocate since the send stream v2
support was added to it, update the kernel to use fallocate for punching
holes for V2+ streams.
Test coverage is provided by btrfs/284 which is a version of btrfs/007
that exercises send stream v2 instead of v1, using fsstress with random
operations and fssum to verify file contents.
Link: https://github.com/kdave/btrfs-progs/issues/1001
CC: stable(a)vger.kernel.org # 6.1+
Reviewed-by: Boris Burkov <boris(a)bur.io>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 09822e766e41..7664025a5af4 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -4,6 +4,7 @@
*/
#include <linux/bsearch.h>
+#include <linux/falloc.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/sort.h>
@@ -5405,6 +5406,30 @@ static int send_update_extent(struct send_ctx *sctx,
return ret;
}
+static int send_fallocate(struct send_ctx *sctx, u32 mode, u64 offset, u64 len)
+{
+ struct fs_path *path;
+ int ret;
+
+ path = get_cur_inode_path(sctx);
+ if (IS_ERR(path))
+ return PTR_ERR(path);
+
+ ret = begin_cmd(sctx, BTRFS_SEND_C_FALLOCATE);
+ if (ret < 0)
+ return ret;
+
+ TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
+ TLV_PUT_U32(sctx, BTRFS_SEND_A_FALLOCATE_MODE, mode);
+ TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
+ TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len);
+
+ ret = send_cmd(sctx);
+
+tlv_put_failure:
+ return ret;
+}
+
static int send_hole(struct send_ctx *sctx, u64 end)
{
struct fs_path *p = NULL;
@@ -5412,6 +5437,14 @@ static int send_hole(struct send_ctx *sctx, u64 end)
u64 offset = sctx->cur_inode_last_extent;
int ret = 0;
+ /*
+ * Starting with send stream v2 we have fallocate and can use it to
+ * punch holes instead of sending writes full of zeroes.
+ */
+ if (proto_cmd_ok(sctx, BTRFS_SEND_C_FALLOCATE))
+ return send_fallocate(sctx, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ offset, end - offset);
+
/*
* A hole that starts at EOF or beyond it. Since we do not yet support
* fallocate (for extent preallocation and hole punching), sending a
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x d2845519b0723c5d5a0266cbf410495f9b8fd65c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081859-matcher-handprint-c398@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d2845519b0723c5d5a0266cbf410495f9b8fd65c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch(a)lst.de>
Date: Wed, 23 Jul 2025 14:19:44 +0200
Subject: [PATCH] xfs: fully decouple XFS_IBULK* flags from XFS_IWALK* flags
Fix up xfs_inumbers to now pass in the XFS_IBULK* flags into the flags
argument to xfs_inobt_walk, which expects the XFS_IWALK* flags.
Currently passing the wrong flags works for non-debug builds because
the only XFS_IWALK* flag has the same encoding as the corresponding
XFS_IBULK* flag, but in debug builds it can trigger an assert that no
incorrect flag is passed. Instead just extra the relevant flag.
Fixes: 5b35d922c52798 ("xfs: Decouple XFS_IBULK flags from XFS_IWALK flags")
Cc: <stable(a)vger.kernel.org> # v5.19
Reported-by: cen zhang <zzzccc427(a)gmail.com>
Signed-off-by: Christoph Hellwig <hch(a)lst.de>
Reviewed-by: Darrick J. Wong <djwong(a)kernel.org>
Signed-off-by: Carlos Maiolino <cem(a)kernel.org>
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index c8c9b8d8309f..5116842420b2 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -447,17 +447,21 @@ xfs_inumbers(
.breq = breq,
};
struct xfs_trans *tp;
+ unsigned int iwalk_flags = 0;
int error = 0;
if (xfs_bulkstat_already_done(breq->mp, breq->startino))
return 0;
+ if (breq->flags & XFS_IBULK_SAME_AG)
+ iwalk_flags |= XFS_IWALK_SAME_AG;
+
/*
* Grab an empty transaction so that we can use its recursive buffer
* locking abilities to detect cycles in the inobt without deadlocking.
*/
tp = xfs_trans_alloc_empty(breq->mp);
- error = xfs_inobt_walk(breq->mp, tp, breq->startino, breq->flags,
+ error = xfs_inobt_walk(breq->mp, tp, breq->startino, iwalk_flags,
xfs_inumbers_walk, breq->icount, &ic);
xfs_trans_cancel(tp);
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 1ef94169db0958d6de39f9ea6e063ce887342e2d
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081854-eject-aloft-03ff@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1ef94169db0958d6de39f9ea6e063ce887342e2d Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu(a)suse.com>
Date: Wed, 2 Jul 2025 15:08:13 +0930
Subject: [PATCH] btrfs: populate otime when logging an inode item
[TEST FAILURE WITH EXPERIMENTAL FEATURES]
When running test case generic/508, the test case will fail with the new
btrfs shutdown support:
generic/508 - output mismatch (see /home/adam/xfstests/results//generic/508.out.bad)
--- tests/generic/508.out 2022-05-11 11:25:30.806666664 +0930
+++ /home/adam/xfstests/results//generic/508.out.bad 2025-07-02 14:53:22.401824212 +0930
@@ -1,2 +1,6 @@
QA output created by 508
Silence is golden
+Before:
+After : stat.btime = Thu Jan 1 09:30:00 1970
+Before:
+After : stat.btime = Wed Jul 2 14:53:22 2025
...
(Run 'diff -u /home/adam/xfstests/tests/generic/508.out /home/adam/xfstests/results//generic/508.out.bad' to see the entire diff)
Ran: generic/508
Failures: generic/508
Failed 1 of 1 tests
Please note that the test case requires shutdown support, thus the test
case will be skipped using the current upstream kernel, as it doesn't
have shutdown ioctl support.
[CAUSE]
The direct cause the 0 time stamp in the log tree:
leaf 30507008 items 2 free space 16057 generation 9 owner TREE_LOG
leaf 30507008 flags 0x1(WRITTEN) backref revision 1
checksum stored e522548d
checksum calced e522548d
fs uuid 57d45451-481e-43e4-aa93-289ad707a3a0
chunk uuid d52bd3fd-5163-4337-98a7-7986993ad398
item 0 key (257 INODE_ITEM 0) itemoff 16123 itemsize 160
generation 9 transid 9 size 0 nbytes 0
block group 0 mode 100644 links 1 uid 0 gid 0 rdev 0
sequence 1 flags 0x0(none)
atime 1751432947.492000000 (2025-07-02 14:39:07)
ctime 1751432947.492000000 (2025-07-02 14:39:07)
mtime 1751432947.492000000 (2025-07-02 14:39:07)
otime 0.0 (1970-01-01 09:30:00) <<<
But the old fs tree has all the correct time stamp:
btrfs-progs v6.12
fs tree key (FS_TREE ROOT_ITEM 0)
leaf 30425088 items 2 free space 16061 generation 5 owner FS_TREE
leaf 30425088 flags 0x1(WRITTEN) backref revision 1
checksum stored 48f6c57e
checksum calced 48f6c57e
fs uuid 57d45451-481e-43e4-aa93-289ad707a3a0
chunk uuid d52bd3fd-5163-4337-98a7-7986993ad398
item 0 key (256 INODE_ITEM 0) itemoff 16123 itemsize 160
generation 3 transid 0 size 0 nbytes 16384
block group 0 mode 40755 links 1 uid 0 gid 0 rdev 0
sequence 0 flags 0x0(none)
atime 1751432947.0 (2025-07-02 14:39:07)
ctime 1751432947.0 (2025-07-02 14:39:07)
mtime 1751432947.0 (2025-07-02 14:39:07)
otime 1751432947.0 (2025-07-02 14:39:07) <<<
The root cause is that fill_inode_item() in tree-log.c is only
populating a/c/m time, not the otime (or btime in statx output).
Part of the reason is that, the vfs inode only has a/c/m time, no native
btime support yet.
[FIX]
Thankfully btrfs has its otime stored in btrfs_inode::i_otime_sec and
btrfs_inode::i_otime_nsec.
So what we really need is just fill the otime time stamp in
fill_inode_item() of tree-log.c
There is another fill_inode_item() in inode.c, which is doing the proper
otime population.
Fixes: 94edf4ae43a5 ("Btrfs: don't bother committing delayed inode updates when fsyncing")
CC: stable(a)vger.kernel.org
Reviewed-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 1e805dabfc4b..ab0815d9e7e5 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4233,6 +4233,9 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
btrfs_set_timespec_sec(leaf, &item->ctime, inode_get_ctime_sec(inode));
btrfs_set_timespec_nsec(leaf, &item->ctime, inode_get_ctime_nsec(inode));
+ btrfs_set_timespec_sec(leaf, &item->otime, BTRFS_I(inode)->i_otime_sec);
+ btrfs_set_timespec_nsec(leaf, &item->otime, BTRFS_I(inode)->i_otime_nsec);
+
/*
* We do not need to set the nbytes field, in fact during a fast fsync
* its value may not even be correct, since a fast fsync does not wait
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x d2845519b0723c5d5a0266cbf410495f9b8fd65c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081859-buckskin-outwit-e0f0@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d2845519b0723c5d5a0266cbf410495f9b8fd65c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch(a)lst.de>
Date: Wed, 23 Jul 2025 14:19:44 +0200
Subject: [PATCH] xfs: fully decouple XFS_IBULK* flags from XFS_IWALK* flags
Fix up xfs_inumbers to now pass in the XFS_IBULK* flags into the flags
argument to xfs_inobt_walk, which expects the XFS_IWALK* flags.
Currently passing the wrong flags works for non-debug builds because
the only XFS_IWALK* flag has the same encoding as the corresponding
XFS_IBULK* flag, but in debug builds it can trigger an assert that no
incorrect flag is passed. Instead just extra the relevant flag.
Fixes: 5b35d922c52798 ("xfs: Decouple XFS_IBULK flags from XFS_IWALK flags")
Cc: <stable(a)vger.kernel.org> # v5.19
Reported-by: cen zhang <zzzccc427(a)gmail.com>
Signed-off-by: Christoph Hellwig <hch(a)lst.de>
Reviewed-by: Darrick J. Wong <djwong(a)kernel.org>
Signed-off-by: Carlos Maiolino <cem(a)kernel.org>
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index c8c9b8d8309f..5116842420b2 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -447,17 +447,21 @@ xfs_inumbers(
.breq = breq,
};
struct xfs_trans *tp;
+ unsigned int iwalk_flags = 0;
int error = 0;
if (xfs_bulkstat_already_done(breq->mp, breq->startino))
return 0;
+ if (breq->flags & XFS_IBULK_SAME_AG)
+ iwalk_flags |= XFS_IWALK_SAME_AG;
+
/*
* Grab an empty transaction so that we can use its recursive buffer
* locking abilities to detect cycles in the inobt without deadlocking.
*/
tp = xfs_trans_alloc_empty(breq->mp);
- error = xfs_inobt_walk(breq->mp, tp, breq->startino, breq->flags,
+ error = xfs_inobt_walk(breq->mp, tp, breq->startino, iwalk_flags,
xfs_inumbers_walk, breq->icount, &ic);
xfs_trans_cancel(tp);
The patch titled
Subject: mm/mremap: fix WARN with uffd that has remap events disabled
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-mremap-fix-warn-with-uffd-that-has-remap-events-disabled.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: David Hildenbrand <david(a)redhat.com>
Subject: mm/mremap: fix WARN with uffd that has remap events disabled
Date: Mon, 18 Aug 2025 19:53:58 +0200
Registering userfaultd on a VMA that spans at least one PMD and then
mremap()'ing that VMA can trigger a WARN when recovering from a failed
page table move due to a page table allocation error.
The code ends up doing the right thing (recurse, avoiding moving actual
page tables), but triggering that WARN is unpleasant:
WARNING: CPU: 2 PID: 6133 at mm/mremap.c:357 move_normal_pmd mm/mremap.c:357 [inline]
WARNING: CPU: 2 PID: 6133 at mm/mremap.c:357 move_pgt_entry mm/mremap.c:595 [inline]
WARNING: CPU: 2 PID: 6133 at mm/mremap.c:357 move_page_tables+0x3832/0x44a0 mm/mremap.c:852
Modules linked in:
CPU: 2 UID: 0 PID: 6133 Comm: syz.0.19 Not tainted 6.17.0-rc1-syzkaller-00004-g53e760d89498 #0 PREEMPT(full)
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014
RIP: 0010:move_normal_pmd mm/mremap.c:357 [inline]
RIP: 0010:move_pgt_entry mm/mremap.c:595 [inline]
RIP: 0010:move_page_tables+0x3832/0x44a0 mm/mremap.c:852
Code: ...
RSP: 0018:ffffc900037a76d8 EFLAGS: 00010293
RAX: 0000000000000000 RBX: 0000000032930007 RCX: ffffffff820c6645
RDX: ffff88802e56a440 RSI: ffffffff820c7201 RDI: 0000000000000007
RBP: ffff888037728fc0 R08: 0000000000000007 R09: 0000000000000000
R10: 0000000032930007 R11: 0000000000000000 R12: 0000000000000000
R13: ffffc900037a79a8 R14: 0000000000000001 R15: dffffc0000000000
FS: 000055556316a500(0000) GS:ffff8880d68bc000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000001b30863fff CR3: 0000000050171000 CR4: 0000000000352ef0
Call Trace:
<TASK>
copy_vma_and_data+0x468/0x790 mm/mremap.c:1215
move_vma+0x548/0x1780 mm/mremap.c:1282
mremap_to+0x1b7/0x450 mm/mremap.c:1406
do_mremap+0xfad/0x1f80 mm/mremap.c:1921
__do_sys_mremap+0x119/0x170 mm/mremap.c:1977
do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
do_syscall_64+0xcd/0x4c0 arch/x86/entry/syscall_64.c:94
entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7f00d0b8ebe9
Code: ...
RSP: 002b:00007ffe5ea5ee98 EFLAGS: 00000246 ORIG_RAX: 0000000000000019
RAX: ffffffffffffffda RBX: 00007f00d0db5fa0 RCX: 00007f00d0b8ebe9
RDX: 0000000000400000 RSI: 0000000000c00000 RDI: 0000200000000000
RBP: 00007ffe5ea5eef0 R08: 0000200000c00000 R09: 0000000000000000
R10: 0000000000000003 R11: 0000000000000246 R12: 0000000000000002
R13: 00007f00d0db5fa0 R14: 00007f00d0db5fa0 R15: 0000000000000005
</TASK>
The underlying issue is that we recurse during the original page table
move, but not during the recovery move.
Fix it by checking for both VMAs and performing the check before the
pmd_none() sanity check.
Add a new helper where we perform+document that check for the PMD and PUD
level.
Thanks to Harry for bisecting.
Link: https://lkml.kernel.org/r/20250818175358.1184757-1-david@redhat.com
Fixes: 0cef0bb836e ("mm: clear uffd-wp PTE/PMD state on mremap()")
Signed-off-by: David Hildenbrand <david(a)redhat.com>
Reported-by: syzbot+4d9a13f0797c46a29e42(a)syzkaller.appspotmail.com
Closes: https://lkml.kernel.org/r/689bb893.050a0220.7f033.013a.GAE@google.com
Cc: "Liam R. Howlett" <Liam.Howlett(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Jann Horn <jannh(a)google.com>
Cc: Pedro Falcato <pfalcato(a)suse.de>
Cc: Harry Yoo <harry.yoo(a)oracle.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/mremap.c | 41 +++++++++++++++++++++++------------------
1 file changed, 23 insertions(+), 18 deletions(-)
--- a/mm/mremap.c~mm-mremap-fix-warn-with-uffd-that-has-remap-events-disabled
+++ a/mm/mremap.c
@@ -323,6 +323,25 @@ static inline bool arch_supports_page_ta
}
#endif
+static inline bool uffd_supports_page_table_move(struct pagetable_move_control *pmc)
+{
+ /*
+ * If we are moving a VMA that has uffd-wp registered but with
+ * remap events disabled (new VMA will not be registered with uffd), we
+ * need to ensure that the uffd-wp state is cleared from all pgtables.
+ * This means recursing into lower page tables in move_page_tables().
+ *
+ * We might get called with VMAs reversed when recovering from a
+ * failed page table move. In that case, the
+ * "old"-but-actually-"originally new" VMA during recovery will not have
+ * a uffd context. Recursing into lower page tables during the original
+ * move but not during the recovery move will cause trouble, because we
+ * run into already-existing page tables. So check both VMAs.
+ */
+ return !vma_has_uffd_without_event_remap(pmc->old) &&
+ !vma_has_uffd_without_event_remap(pmc->new);
+}
+
#ifdef CONFIG_HAVE_MOVE_PMD
static bool move_normal_pmd(struct pagetable_move_control *pmc,
pmd_t *old_pmd, pmd_t *new_pmd)
@@ -335,6 +354,8 @@ static bool move_normal_pmd(struct paget
if (!arch_supports_page_table_move())
return false;
+ if (!uffd_supports_page_table_move(pmc))
+ return false;
/*
* The destination pmd shouldn't be established, free_pgtables()
* should have released it.
@@ -361,15 +382,6 @@ static bool move_normal_pmd(struct paget
if (WARN_ON_ONCE(!pmd_none(*new_pmd)))
return false;
- /* If this pmd belongs to a uffd vma with remap events disabled, we need
- * to ensure that the uffd-wp state is cleared from all pgtables. This
- * means recursing into lower page tables in move_page_tables(), and we
- * can reuse the existing code if we simply treat the entry as "not
- * moved".
- */
- if (vma_has_uffd_without_event_remap(vma))
- return false;
-
/*
* We don't have to worry about the ordering of src and dst
* ptlocks because exclusive mmap_lock prevents deadlock.
@@ -418,6 +430,8 @@ static bool move_normal_pud(struct paget
if (!arch_supports_page_table_move())
return false;
+ if (!uffd_supports_page_table_move(pmc))
+ return false;
/*
* The destination pud shouldn't be established, free_pgtables()
* should have released it.
@@ -425,15 +439,6 @@ static bool move_normal_pud(struct paget
if (WARN_ON_ONCE(!pud_none(*new_pud)))
return false;
- /* If this pud belongs to a uffd vma with remap events disabled, we need
- * to ensure that the uffd-wp state is cleared from all pgtables. This
- * means recursing into lower page tables in move_page_tables(), and we
- * can reuse the existing code if we simply treat the entry as "not
- * moved".
- */
- if (vma_has_uffd_without_event_remap(vma))
- return false;
-
/*
* We don't have to worry about the ordering of src and dst
* ptlocks because exclusive mmap_lock prevents deadlock.
_
Patches currently in -mm which might be from david(a)redhat.com are
mm-mremap-fix-warn-with-uffd-that-has-remap-events-disabled.patch
mm-migrate-remove-migratepage_unmap.patch
treewide-remove-migratepage_success.patch
mm-huge_memory-move-more-common-code-into-insert_pmd.patch
mm-huge_memory-move-more-common-code-into-insert_pud.patch
mm-huge_memory-support-huge-zero-folio-in-vmf_insert_folio_pmd.patch
fs-dax-use-vmf_insert_folio_pmd-to-insert-the-huge-zero-folio.patch
mm-huge_memory-mark-pmd-mappings-of-the-huge-zero-folio-special.patch
powerpc-ptdump-rename-struct-pgtable_level-to-struct-ptdump_pglevel.patch
mm-rmap-convert-enum-rmap_level-to-enum-pgtable_level.patch
mm-memory-convert-print_bad_pte-to-print_bad_page_map.patch
mm-memory-factor-out-common-code-from-vm_normal_page_.patch
mm-introduce-and-use-vm_normal_page_pud.patch
mm-rename-vm_ops-find_special_page-to-vm_ops-find_normal_page.patch
prctl-extend-pr_set_thp_disable-to-optionally-exclude-vm_hugepage.patch
mm-huge_memory-convert-tva_flags-to-enum-tva_type.patch
mm-huge_memory-respect-madv_collapse-with-pr_thp_disable_except_advised.patch
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x d2845519b0723c5d5a0266cbf410495f9b8fd65c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081858-issuing-conclude-4ff3@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d2845519b0723c5d5a0266cbf410495f9b8fd65c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch(a)lst.de>
Date: Wed, 23 Jul 2025 14:19:44 +0200
Subject: [PATCH] xfs: fully decouple XFS_IBULK* flags from XFS_IWALK* flags
Fix up xfs_inumbers to now pass in the XFS_IBULK* flags into the flags
argument to xfs_inobt_walk, which expects the XFS_IWALK* flags.
Currently passing the wrong flags works for non-debug builds because
the only XFS_IWALK* flag has the same encoding as the corresponding
XFS_IBULK* flag, but in debug builds it can trigger an assert that no
incorrect flag is passed. Instead just extra the relevant flag.
Fixes: 5b35d922c52798 ("xfs: Decouple XFS_IBULK flags from XFS_IWALK flags")
Cc: <stable(a)vger.kernel.org> # v5.19
Reported-by: cen zhang <zzzccc427(a)gmail.com>
Signed-off-by: Christoph Hellwig <hch(a)lst.de>
Reviewed-by: Darrick J. Wong <djwong(a)kernel.org>
Signed-off-by: Carlos Maiolino <cem(a)kernel.org>
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index c8c9b8d8309f..5116842420b2 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -447,17 +447,21 @@ xfs_inumbers(
.breq = breq,
};
struct xfs_trans *tp;
+ unsigned int iwalk_flags = 0;
int error = 0;
if (xfs_bulkstat_already_done(breq->mp, breq->startino))
return 0;
+ if (breq->flags & XFS_IBULK_SAME_AG)
+ iwalk_flags |= XFS_IWALK_SAME_AG;
+
/*
* Grab an empty transaction so that we can use its recursive buffer
* locking abilities to detect cycles in the inobt without deadlocking.
*/
tp = xfs_trans_alloc_empty(breq->mp);
- error = xfs_inobt_walk(breq->mp, tp, breq->startino, breq->flags,
+ error = xfs_inobt_walk(breq->mp, tp, breq->startino, iwalk_flags,
xfs_inumbers_walk, breq->icount, &ic);
xfs_trans_cancel(tp);
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 005b0a0c24e1628313e951516b675109a92cacfe
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081840-stomp-enhance-b456@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 005b0a0c24e1628313e951516b675109a92cacfe Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Fri, 18 Jul 2025 13:07:29 +0100
Subject: [PATCH] btrfs: send: use fallocate for hole punching with send stream
v2
Currently holes are sent as writes full of zeroes, which results in
unnecessarily using disk space at the receiving end and increasing the
stream size.
In some cases we avoid sending writes of zeroes, like during a full
send operation where we just skip writes for holes.
But for some cases we fill previous holes with writes of zeroes too, like
in this scenario:
1) We have a file with a hole in the range [2M, 3M), we snapshot the
subvolume and do a full send. The range [2M, 3M) stays as a hole at
the receiver since we skip sending write commands full of zeroes;
2) We punch a hole for the range [3M, 4M) in our file, so that now it
has a 2M hole in the range [2M, 4M), and snapshot the subvolume.
Now if we do an incremental send, we will send write commands full
of zeroes for the range [2M, 4M), removing the hole for [2M, 3M) at
the receiver.
We could improve cases such as this last one by doing additional
comparisons of file extent items (or their absence) between the parent
and send snapshots, but that's a lot of code to add plus additional CPU
and IO costs.
Since the send stream v2 already has a fallocate command and btrfs-progs
implements a callback to execute fallocate since the send stream v2
support was added to it, update the kernel to use fallocate for punching
holes for V2+ streams.
Test coverage is provided by btrfs/284 which is a version of btrfs/007
that exercises send stream v2 instead of v1, using fsstress with random
operations and fssum to verify file contents.
Link: https://github.com/kdave/btrfs-progs/issues/1001
CC: stable(a)vger.kernel.org # 6.1+
Reviewed-by: Boris Burkov <boris(a)bur.io>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 09822e766e41..7664025a5af4 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -4,6 +4,7 @@
*/
#include <linux/bsearch.h>
+#include <linux/falloc.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/sort.h>
@@ -5405,6 +5406,30 @@ static int send_update_extent(struct send_ctx *sctx,
return ret;
}
+static int send_fallocate(struct send_ctx *sctx, u32 mode, u64 offset, u64 len)
+{
+ struct fs_path *path;
+ int ret;
+
+ path = get_cur_inode_path(sctx);
+ if (IS_ERR(path))
+ return PTR_ERR(path);
+
+ ret = begin_cmd(sctx, BTRFS_SEND_C_FALLOCATE);
+ if (ret < 0)
+ return ret;
+
+ TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
+ TLV_PUT_U32(sctx, BTRFS_SEND_A_FALLOCATE_MODE, mode);
+ TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
+ TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len);
+
+ ret = send_cmd(sctx);
+
+tlv_put_failure:
+ return ret;
+}
+
static int send_hole(struct send_ctx *sctx, u64 end)
{
struct fs_path *p = NULL;
@@ -5412,6 +5437,14 @@ static int send_hole(struct send_ctx *sctx, u64 end)
u64 offset = sctx->cur_inode_last_extent;
int ret = 0;
+ /*
+ * Starting with send stream v2 we have fallocate and can use it to
+ * punch holes instead of sending writes full of zeroes.
+ */
+ if (proto_cmd_ok(sctx, BTRFS_SEND_C_FALLOCATE))
+ return send_fallocate(sctx, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ offset, end - offset);
+
/*
* A hole that starts at EOF or beyond it. Since we do not yet support
* fallocate (for extent preallocation and hole punching), sending a
From: Su Hui <suhui(a)nfschina.com>
[ Upstream commit 7919407eca2ef562fa6c98c41cfdf6f6cdd69d92 ]
When encounters some errors like these:
xhci_hcd 0000:4a:00.2: xHCI dying or halted, can't queue_command
xhci_hcd 0000:4a:00.2: FIXME: allocate a command ring segment
usb usb5-port6: couldn't allocate usb_device
It's hard to know whether xhc_state is dying or halted. So it's better
to print xhc_state's value which can help locate the resaon of the bug.
Signed-off-by: Su Hui <suhui(a)nfschina.com>
Link: https://lore.kernel.org/r/20250725060117.1773770-1-suhui@nfschina.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
LLM Generated explanations, may be completely bogus:
**Backport Status: YES**
This commit is suitable for backporting to stable kernel trees for the
following reasons:
1. **Enhanced Debugging for Real-World Issues**: The commit improves
debugging of USB xHCI host controller failures by printing the actual
`xhc_state` value when `queue_command` fails. The commit message
shows real error messages users encounter ("xHCI dying or halted,
can't queue_command"), demonstrating this is a real-world debugging
problem.
2. **Minimal and Safe Change**: The change is extremely small and safe -
it only modifies a debug print statement from:
```c
xhci_dbg(xhci, "xHCI dying or halted, can't queue_command\n");
```
to:
```c
xhci_dbg(xhci, "xHCI dying or halted, can't queue_command. state:
0x%x\n", xhci->xhc_state);
```
3. **No Functional Changes**: This is a pure diagnostic improvement. It
doesn't change any logic, control flow, or data structures. It only
adds the state value (0x%x format) to an existing debug message.
4. **Important for Troubleshooting**: The xHCI driver is critical for
USB functionality, and when it fails with "dying or halted" states,
knowing the exact state helps diagnose whether:
- `XHCI_STATE_DYING` (0x1) - controller is dying
- `XHCI_STATE_HALTED` (0x2) - controller is halted
- Both states (0x3) - controller has both flags set
This distinction is valuable for debugging hardware issues, driver
bugs, or system problems.
5. **Zero Risk of Regression**: Adding a parameter to a debug print
statement has no risk of introducing regressions. The worst case is
the debug message prints the state value.
6. **Follows Stable Rules**: This meets stable kernel criteria as it:
- Fixes a real debugging limitation
- Is obviously correct
- Has been tested (signed-off and accepted by Greg KH)
- Is small (single line change)
- Doesn't add new features, just improves existing diagnostics
The commit helps system administrators and developers diagnose USB
issues more effectively by providing the actual state value rather than
just saying "dying or halted", making it a valuable debugging
enhancement for stable kernels.
drivers/usb/host/xhci-ring.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 2ff8787f753c..19978f02bb9e 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -4378,7 +4378,8 @@ static int queue_command(struct xhci_hcd *xhci, struct xhci_command *cmd,
if ((xhci->xhc_state & XHCI_STATE_DYING) ||
(xhci->xhc_state & XHCI_STATE_HALTED)) {
- xhci_dbg(xhci, "xHCI dying or halted, can't queue_command\n");
+ xhci_dbg(xhci, "xHCI dying or halted, can't queue_command. state: 0x%x\n",
+ xhci->xhc_state);
return -ESHUTDOWN;
}
--
2.39.5
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 62be7afcc13b2727bdc6a4c91aefed6b452e6ecc
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081853-parrot-skeleton-78e1@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 62be7afcc13b2727bdc6a4c91aefed6b452e6ecc Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota(a)wdc.com>
Date: Sun, 29 Jun 2025 23:18:29 +0900
Subject: [PATCH] btrfs: zoned: requeue to unused block group list if zone
finish failed
btrfs_zone_finish() can fail for several reason. If it is -EAGAIN, we need
to try it again later. So, put the block group to the retry list properly.
Failing to do so will keep the removable block group intact until remount
and can causes unnecessary ENOSPC.
Fixes: 74e91b12b115 ("btrfs: zoned: zone finish unused block group")
CC: stable(a)vger.kernel.org # 6.1+
Reviewed-by: Johannes Thumshirn <johannes.thumshirn(a)wdc.com>
Signed-off-by: Naohiro Aota <naohiro.aota(a)wdc.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 3ddf9fe52b9d..47c6d040176c 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1639,8 +1639,10 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
ret = btrfs_zone_finish(block_group);
if (ret < 0) {
btrfs_dec_block_group_ro(block_group);
- if (ret == -EAGAIN)
+ if (ret == -EAGAIN) {
+ btrfs_link_bg_list(block_group, &retry_list);
ret = 0;
+ }
goto next;
}
The patch below does not apply to the 6.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.15.y
git checkout FETCH_HEAD
git cherry-pick -x d2845519b0723c5d5a0266cbf410495f9b8fd65c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081857-glitter-hummus-4836@gregkh' --subject-prefix 'PATCH 6.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d2845519b0723c5d5a0266cbf410495f9b8fd65c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch(a)lst.de>
Date: Wed, 23 Jul 2025 14:19:44 +0200
Subject: [PATCH] xfs: fully decouple XFS_IBULK* flags from XFS_IWALK* flags
Fix up xfs_inumbers to now pass in the XFS_IBULK* flags into the flags
argument to xfs_inobt_walk, which expects the XFS_IWALK* flags.
Currently passing the wrong flags works for non-debug builds because
the only XFS_IWALK* flag has the same encoding as the corresponding
XFS_IBULK* flag, but in debug builds it can trigger an assert that no
incorrect flag is passed. Instead just extra the relevant flag.
Fixes: 5b35d922c52798 ("xfs: Decouple XFS_IBULK flags from XFS_IWALK flags")
Cc: <stable(a)vger.kernel.org> # v5.19
Reported-by: cen zhang <zzzccc427(a)gmail.com>
Signed-off-by: Christoph Hellwig <hch(a)lst.de>
Reviewed-by: Darrick J. Wong <djwong(a)kernel.org>
Signed-off-by: Carlos Maiolino <cem(a)kernel.org>
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index c8c9b8d8309f..5116842420b2 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -447,17 +447,21 @@ xfs_inumbers(
.breq = breq,
};
struct xfs_trans *tp;
+ unsigned int iwalk_flags = 0;
int error = 0;
if (xfs_bulkstat_already_done(breq->mp, breq->startino))
return 0;
+ if (breq->flags & XFS_IBULK_SAME_AG)
+ iwalk_flags |= XFS_IWALK_SAME_AG;
+
/*
* Grab an empty transaction so that we can use its recursive buffer
* locking abilities to detect cycles in the inobt without deadlocking.
*/
tp = xfs_trans_alloc_empty(breq->mp);
- error = xfs_inobt_walk(breq->mp, tp, breq->startino, breq->flags,
+ error = xfs_inobt_walk(breq->mp, tp, breq->startino, iwalk_flags,
xfs_inumbers_walk, breq->icount, &ic);
xfs_trans_cancel(tp);
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 33e8f24b52d2796b8cfb28c19a1a7dd6476323a8
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081821-debating-askew-bf1e@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 33e8f24b52d2796b8cfb28c19a1a7dd6476323a8 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 19 May 2025 11:07:29 +0100
Subject: [PATCH] btrfs: abort transaction on unexpected eb generation at
btrfs_copy_root()
If we find an unexpected generation for the extent buffer we are cloning
at btrfs_copy_root(), we just WARN_ON() and don't error out and abort the
transaction, meaning we allow to persist metadata with an unexpected
generation. Instead of warning only, abort the transaction and return
-EUCLEAN.
CC: stable(a)vger.kernel.org # 6.1+
Reviewed-by: Daniel Vacek <neelx(a)suse.com>
Reviewed-by: Qu Wenruo <wqu(a)suse.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index ae6cd77282f5..a5ee6ce312cf 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -283,7 +283,14 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
- WARN_ON(btrfs_header_generation(buf) > trans->transid);
+ if (unlikely(btrfs_header_generation(buf) > trans->transid)) {
+ btrfs_tree_unlock(cow);
+ free_extent_buffer(cow);
+ ret = -EUCLEAN;
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
+
if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
ret = btrfs_inc_ref(trans, root, cow, 1);
if (ret)
The patch below does not apply to the 6.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.16.y
git checkout FETCH_HEAD
git cherry-pick -x d2845519b0723c5d5a0266cbf410495f9b8fd65c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081857-swerve-preschool-2c2c@gregkh' --subject-prefix 'PATCH 6.16.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d2845519b0723c5d5a0266cbf410495f9b8fd65c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch(a)lst.de>
Date: Wed, 23 Jul 2025 14:19:44 +0200
Subject: [PATCH] xfs: fully decouple XFS_IBULK* flags from XFS_IWALK* flags
Fix up xfs_inumbers to now pass in the XFS_IBULK* flags into the flags
argument to xfs_inobt_walk, which expects the XFS_IWALK* flags.
Currently passing the wrong flags works for non-debug builds because
the only XFS_IWALK* flag has the same encoding as the corresponding
XFS_IBULK* flag, but in debug builds it can trigger an assert that no
incorrect flag is passed. Instead just extra the relevant flag.
Fixes: 5b35d922c52798 ("xfs: Decouple XFS_IBULK flags from XFS_IWALK flags")
Cc: <stable(a)vger.kernel.org> # v5.19
Reported-by: cen zhang <zzzccc427(a)gmail.com>
Signed-off-by: Christoph Hellwig <hch(a)lst.de>
Reviewed-by: Darrick J. Wong <djwong(a)kernel.org>
Signed-off-by: Carlos Maiolino <cem(a)kernel.org>
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index c8c9b8d8309f..5116842420b2 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -447,17 +447,21 @@ xfs_inumbers(
.breq = breq,
};
struct xfs_trans *tp;
+ unsigned int iwalk_flags = 0;
int error = 0;
if (xfs_bulkstat_already_done(breq->mp, breq->startino))
return 0;
+ if (breq->flags & XFS_IBULK_SAME_AG)
+ iwalk_flags |= XFS_IWALK_SAME_AG;
+
/*
* Grab an empty transaction so that we can use its recursive buffer
* locking abilities to detect cycles in the inobt without deadlocking.
*/
tp = xfs_trans_alloc_empty(breq->mp);
- error = xfs_inobt_walk(breq->mp, tp, breq->startino, breq->flags,
+ error = xfs_inobt_walk(breq->mp, tp, breq->startino, iwalk_flags,
xfs_inumbers_walk, breq->icount, &ic);
xfs_trans_cancel(tp);
The patch below does not apply to the 6.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.16.y
git checkout FETCH_HEAD
git cherry-pick -x ad580dfa388fabb52af033e3f8cc5d04be985e54
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081814-monsoon-supermom-44bb@gregkh' --subject-prefix 'PATCH 6.16.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ad580dfa388fabb52af033e3f8cc5d04be985e54 Mon Sep 17 00:00:00 2001
From: Leo Martins <loemra.dev(a)gmail.com>
Date: Mon, 21 Jul 2025 10:49:16 -0700
Subject: [PATCH] btrfs: fix subpage deadlock in
try_release_subpage_extent_buffer()
There is a potential deadlock that can happen in
try_release_subpage_extent_buffer() because the irq-safe xarray spin
lock fs_info->buffer_tree is being acquired before the irq-unsafe
eb->refs_lock.
This leads to the potential race:
// T1 (random eb->refs user) // T2 (release folio)
spin_lock(&eb->refs_lock);
// interrupt
end_bbio_meta_write()
btrfs_meta_folio_clear_writeback()
btree_release_folio()
folio_test_writeback() //false
try_release_extent_buffer()
try_release_subpage_extent_buffer()
xa_lock_irq(&fs_info->buffer_tree)
spin_lock(&eb->refs_lock); // blocked; held by T1
buffer_tree_clear_mark()
xas_lock_irqsave() // blocked; held by T2
I believe that the spin lock can safely be replaced by an rcu_read_lock.
The xa_for_each loop does not need the spin lock as it's already
internally protected by the rcu_read_lock. The extent buffer is also
protected by the rcu_read_lock so it won't be freed before we take the
eb->refs_lock and check the ref count.
The rcu_read_lock is taken and released every iteration, just like the
spin lock, which means we're not protected against concurrent
insertions into the xarray. This is fine because we rely on
folio->private to detect if there are any ebs remaining in the folio.
There is already some precedent for this with find_extent_buffer_nolock,
which loads an extent buffer from the xarray with only rcu_read_lock.
lockdep warning:
=====================================================
WARNING: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected
6.16.0-0_fbk701_debug_rc0_123_g4c06e63b9203 #1 Tainted: G E N
-----------------------------------------------------
kswapd0/66 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire:
ffff000011ffd600 (&eb->refs_lock){+.+.}-{3:3}, at: try_release_extent_buffer+0x18c/0x560
and this task is already holding:
ffff0000c1d91b88 (&buffer_xa_class){-.-.}-{3:3}, at: try_release_extent_buffer+0x13c/0x560
which would create a new lock dependency:
(&buffer_xa_class){-.-.}-{3:3} -> (&eb->refs_lock){+.+.}-{3:3}
but this new dependency connects a HARDIRQ-irq-safe lock:
(&buffer_xa_class){-.-.}-{3:3}
... which became HARDIRQ-irq-safe at:
lock_acquire+0x178/0x358
_raw_spin_lock_irqsave+0x60/0x88
buffer_tree_clear_mark+0xc4/0x160
end_bbio_meta_write+0x238/0x398
btrfs_bio_end_io+0x1f8/0x330
btrfs_orig_write_end_io+0x1c4/0x2c0
bio_endio+0x63c/0x678
blk_update_request+0x1c4/0xa00
blk_mq_end_request+0x54/0x88
virtblk_request_done+0x124/0x1d0
blk_mq_complete_request+0x84/0xa0
virtblk_done+0x130/0x238
vring_interrupt+0x130/0x288
__handle_irq_event_percpu+0x1e8/0x708
handle_irq_event+0x98/0x1b0
handle_fasteoi_irq+0x264/0x7c0
generic_handle_domain_irq+0xa4/0x108
gic_handle_irq+0x7c/0x1a0
do_interrupt_handler+0xe4/0x148
el1_interrupt+0x30/0x50
el1h_64_irq_handler+0x14/0x20
el1h_64_irq+0x6c/0x70
_raw_spin_unlock_irq+0x38/0x70
__run_timer_base+0xdc/0x5e0
run_timer_softirq+0xa0/0x138
handle_softirqs.llvm.13542289750107964195+0x32c/0xbd0
____do_softirq.llvm.17674514681856217165+0x18/0x28
call_on_irq_stack+0x24/0x30
__irq_exit_rcu+0x164/0x430
irq_exit_rcu+0x18/0x88
el1_interrupt+0x34/0x50
el1h_64_irq_handler+0x14/0x20
el1h_64_irq+0x6c/0x70
arch_local_irq_enable+0x4/0x8
do_idle+0x1a0/0x3b8
cpu_startup_entry+0x60/0x80
rest_init+0x204/0x228
start_kernel+0x394/0x3f0
__primary_switched+0x8c/0x8958
to a HARDIRQ-irq-unsafe lock:
(&eb->refs_lock){+.+.}-{3:3}
... which became HARDIRQ-irq-unsafe at:
...
lock_acquire+0x178/0x358
_raw_spin_lock+0x4c/0x68
free_extent_buffer_stale+0x2c/0x170
btrfs_read_sys_array+0x1b0/0x338
open_ctree+0xeb0/0x1df8
btrfs_get_tree+0xb60/0x1110
vfs_get_tree+0x8c/0x250
fc_mount+0x20/0x98
btrfs_get_tree+0x4a4/0x1110
vfs_get_tree+0x8c/0x250
do_new_mount+0x1e0/0x6c0
path_mount+0x4ec/0xa58
__arm64_sys_mount+0x370/0x490
invoke_syscall+0x6c/0x208
el0_svc_common+0x14c/0x1b8
do_el0_svc+0x4c/0x60
el0_svc+0x4c/0x160
el0t_64_sync_handler+0x70/0x100
el0t_64_sync+0x168/0x170
other info that might help us debug this:
Possible interrupt unsafe locking scenario:
CPU0 CPU1
---- ----
lock(&eb->refs_lock);
local_irq_disable();
lock(&buffer_xa_class);
lock(&eb->refs_lock);
<Interrupt>
lock(&buffer_xa_class);
*** DEADLOCK ***
2 locks held by kswapd0/66:
#0: ffff800085506e40 (fs_reclaim){+.+.}-{0:0}, at: balance_pgdat+0xe8/0xe50
#1: ffff0000c1d91b88 (&buffer_xa_class){-.-.}-{3:3}, at: try_release_extent_buffer+0x13c/0x560
Link: https://www.kernel.org/doc/Documentation/locking/lockdep-design.rst#:~:text…
Fixes: 19d7f65f032f ("btrfs: convert the buffer_radix to an xarray")
CC: stable(a)vger.kernel.org # 6.16+
Reviewed-by: Boris Burkov <boris(a)bur.io>
Reviewed-by: Qu Wenruo <wqu(a)suse.com>
Signed-off-by: Leo Martins <loemra.dev(a)gmail.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 835b0deef9bb..f23d75986947 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4331,15 +4331,18 @@ static int try_release_subpage_extent_buffer(struct folio *folio)
unsigned long end = index + (PAGE_SIZE >> fs_info->nodesize_bits) - 1;
int ret;
- xa_lock_irq(&fs_info->buffer_tree);
+ rcu_read_lock();
xa_for_each_range(&fs_info->buffer_tree, index, eb, start, end) {
/*
* The same as try_release_extent_buffer(), to ensure the eb
* won't disappear out from under us.
*/
spin_lock(&eb->refs_lock);
+ rcu_read_unlock();
+
if (refcount_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
spin_unlock(&eb->refs_lock);
+ rcu_read_lock();
continue;
}
@@ -4358,11 +4361,10 @@ static int try_release_subpage_extent_buffer(struct folio *folio)
* check the folio private at the end. And
* release_extent_buffer() will release the refs_lock.
*/
- xa_unlock_irq(&fs_info->buffer_tree);
release_extent_buffer(eb);
- xa_lock_irq(&fs_info->buffer_tree);
+ rcu_read_lock();
}
- xa_unlock_irq(&fs_info->buffer_tree);
+ rcu_read_unlock();
/*
* Finally to check if we have cleared folio private, as if we have
@@ -4375,7 +4377,6 @@ static int try_release_subpage_extent_buffer(struct folio *folio)
ret = 0;
spin_unlock(&folio->mapping->i_private_lock);
return ret;
-
}
int try_release_extent_buffer(struct folio *folio)
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 33e8f24b52d2796b8cfb28c19a1a7dd6476323a8
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081821-startling-skiing-ca6d@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 33e8f24b52d2796b8cfb28c19a1a7dd6476323a8 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 19 May 2025 11:07:29 +0100
Subject: [PATCH] btrfs: abort transaction on unexpected eb generation at
btrfs_copy_root()
If we find an unexpected generation for the extent buffer we are cloning
at btrfs_copy_root(), we just WARN_ON() and don't error out and abort the
transaction, meaning we allow to persist metadata with an unexpected
generation. Instead of warning only, abort the transaction and return
-EUCLEAN.
CC: stable(a)vger.kernel.org # 6.1+
Reviewed-by: Daniel Vacek <neelx(a)suse.com>
Reviewed-by: Qu Wenruo <wqu(a)suse.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index ae6cd77282f5..a5ee6ce312cf 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -283,7 +283,14 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
- WARN_ON(btrfs_header_generation(buf) > trans->transid);
+ if (unlikely(btrfs_header_generation(buf) > trans->transid)) {
+ btrfs_tree_unlock(cow);
+ free_extent_buffer(cow);
+ ret = -EUCLEAN;
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
+
if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
ret = btrfs_inc_ref(trans, root, cow, 1);
if (ret)
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 33e8f24b52d2796b8cfb28c19a1a7dd6476323a8
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081815-strut-suspend-7a53@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 33e8f24b52d2796b8cfb28c19a1a7dd6476323a8 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 19 May 2025 11:07:29 +0100
Subject: [PATCH] btrfs: abort transaction on unexpected eb generation at
btrfs_copy_root()
If we find an unexpected generation for the extent buffer we are cloning
at btrfs_copy_root(), we just WARN_ON() and don't error out and abort the
transaction, meaning we allow to persist metadata with an unexpected
generation. Instead of warning only, abort the transaction and return
-EUCLEAN.
CC: stable(a)vger.kernel.org # 6.1+
Reviewed-by: Daniel Vacek <neelx(a)suse.com>
Reviewed-by: Qu Wenruo <wqu(a)suse.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index ae6cd77282f5..a5ee6ce312cf 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -283,7 +283,14 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
- WARN_ON(btrfs_header_generation(buf) > trans->transid);
+ if (unlikely(btrfs_header_generation(buf) > trans->transid)) {
+ btrfs_tree_unlock(cow);
+ free_extent_buffer(cow);
+ ret = -EUCLEAN;
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
+
if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
ret = btrfs_inc_ref(trans, root, cow, 1);
if (ret)
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 1f06c942aa709d397cf6bed577a0d10a61509667
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081810-washer-purchase-bdf5@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1f06c942aa709d397cf6bed577a0d10a61509667 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Sat, 7 Jun 2025 19:44:03 +0100
Subject: [PATCH] btrfs: always abort transaction on failure to add block group
to free space tree
Only one of the callers of __add_block_group_free_space() aborts the
transaction if the call fails, while the others don't do it and it's
either never done up the call chain or much higher in the call chain.
So make sure we abort the transaction at __add_block_group_free_space()
if it fails, which brings a couple benefits:
1) If some call chain never aborts the transaction, we avoid having some
metadata inconsistency because BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE is
cleared when we enter __add_block_group_free_space() and therefore
__add_block_group_free_space() is never called again to add the block
group items to the free space tree, since the function is only called
when that flag is set in a block group;
2) If the call chain already aborts the transaction, then we get a better
trace that points to the exact step from __add_block_group_free_space()
which failed, which is better for analysis.
So abort the transaction at __add_block_group_free_space() if any of its
steps fails.
CC: stable(a)vger.kernel.org # 6.6+
Reviewed-by: Boris Burkov <boris(a)bur.io>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 9eb9858e8e99..af005fb4b676 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -1431,12 +1431,17 @@ static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags);
ret = add_new_free_space_info(trans, block_group, path);
- if (ret)
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
return ret;
+ }
- return __add_to_free_space_tree(trans, block_group, path,
- block_group->start,
- block_group->length);
+ ret = __add_to_free_space_tree(trans, block_group, path,
+ block_group->start, block_group->length);
+ if (ret)
+ btrfs_abort_transaction(trans, ret);
+
+ return 0;
}
int add_block_group_free_space(struct btrfs_trans_handle *trans,
@@ -1461,9 +1466,6 @@ int add_block_group_free_space(struct btrfs_trans_handle *trans,
}
ret = __add_block_group_free_space(trans, block_group, path);
- if (ret)
- btrfs_abort_transaction(trans, ret);
-
out:
btrfs_free_path(path);
mutex_unlock(&block_group->free_space_lock);
The patch below does not apply to the 6.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.15.y
git checkout FETCH_HEAD
git cherry-pick -x 33e8f24b52d2796b8cfb28c19a1a7dd6476323a8
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081815-mothproof-embody-49e3@gregkh' --subject-prefix 'PATCH 6.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 33e8f24b52d2796b8cfb28c19a1a7dd6476323a8 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 19 May 2025 11:07:29 +0100
Subject: [PATCH] btrfs: abort transaction on unexpected eb generation at
btrfs_copy_root()
If we find an unexpected generation for the extent buffer we are cloning
at btrfs_copy_root(), we just WARN_ON() and don't error out and abort the
transaction, meaning we allow to persist metadata with an unexpected
generation. Instead of warning only, abort the transaction and return
-EUCLEAN.
CC: stable(a)vger.kernel.org # 6.1+
Reviewed-by: Daniel Vacek <neelx(a)suse.com>
Reviewed-by: Qu Wenruo <wqu(a)suse.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index ae6cd77282f5..a5ee6ce312cf 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -283,7 +283,14 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
- WARN_ON(btrfs_header_generation(buf) > trans->transid);
+ if (unlikely(btrfs_header_generation(buf) > trans->transid)) {
+ btrfs_tree_unlock(cow);
+ free_extent_buffer(cow);
+ ret = -EUCLEAN;
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
+
if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
ret = btrfs_inc_ref(trans, root, cow, 1);
if (ret)
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 1f06c942aa709d397cf6bed577a0d10a61509667
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081809-unwatched-rejoicing-21e4@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1f06c942aa709d397cf6bed577a0d10a61509667 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Sat, 7 Jun 2025 19:44:03 +0100
Subject: [PATCH] btrfs: always abort transaction on failure to add block group
to free space tree
Only one of the callers of __add_block_group_free_space() aborts the
transaction if the call fails, while the others don't do it and it's
either never done up the call chain or much higher in the call chain.
So make sure we abort the transaction at __add_block_group_free_space()
if it fails, which brings a couple benefits:
1) If some call chain never aborts the transaction, we avoid having some
metadata inconsistency because BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE is
cleared when we enter __add_block_group_free_space() and therefore
__add_block_group_free_space() is never called again to add the block
group items to the free space tree, since the function is only called
when that flag is set in a block group;
2) If the call chain already aborts the transaction, then we get a better
trace that points to the exact step from __add_block_group_free_space()
which failed, which is better for analysis.
So abort the transaction at __add_block_group_free_space() if any of its
steps fails.
CC: stable(a)vger.kernel.org # 6.6+
Reviewed-by: Boris Burkov <boris(a)bur.io>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 9eb9858e8e99..af005fb4b676 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -1431,12 +1431,17 @@ static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags);
ret = add_new_free_space_info(trans, block_group, path);
- if (ret)
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
return ret;
+ }
- return __add_to_free_space_tree(trans, block_group, path,
- block_group->start,
- block_group->length);
+ ret = __add_to_free_space_tree(trans, block_group, path,
+ block_group->start, block_group->length);
+ if (ret)
+ btrfs_abort_transaction(trans, ret);
+
+ return 0;
}
int add_block_group_free_space(struct btrfs_trans_handle *trans,
@@ -1461,9 +1466,6 @@ int add_block_group_free_space(struct btrfs_trans_handle *trans,
}
ret = __add_block_group_free_space(trans, block_group, path);
- if (ret)
- btrfs_abort_transaction(trans, ret);
-
out:
btrfs_free_path(path);
mutex_unlock(&block_group->free_space_lock);
The patch below does not apply to the 6.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.16.y
git checkout FETCH_HEAD
git cherry-pick -x 33e8f24b52d2796b8cfb28c19a1a7dd6476323a8
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081814-cotton-endpoint-c7b3@gregkh' --subject-prefix 'PATCH 6.16.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 33e8f24b52d2796b8cfb28c19a1a7dd6476323a8 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 19 May 2025 11:07:29 +0100
Subject: [PATCH] btrfs: abort transaction on unexpected eb generation at
btrfs_copy_root()
If we find an unexpected generation for the extent buffer we are cloning
at btrfs_copy_root(), we just WARN_ON() and don't error out and abort the
transaction, meaning we allow to persist metadata with an unexpected
generation. Instead of warning only, abort the transaction and return
-EUCLEAN.
CC: stable(a)vger.kernel.org # 6.1+
Reviewed-by: Daniel Vacek <neelx(a)suse.com>
Reviewed-by: Qu Wenruo <wqu(a)suse.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index ae6cd77282f5..a5ee6ce312cf 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -283,7 +283,14 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
- WARN_ON(btrfs_header_generation(buf) > trans->transid);
+ if (unlikely(btrfs_header_generation(buf) > trans->transid)) {
+ btrfs_tree_unlock(cow);
+ free_extent_buffer(cow);
+ ret = -EUCLEAN;
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
+
if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
ret = btrfs_inc_ref(trans, root, cow, 1);
if (ret)
The patch below does not apply to the 6.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.15.y
git checkout FETCH_HEAD
git cherry-pick -x 1f06c942aa709d397cf6bed577a0d10a61509667
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081808-companion-arson-989c@gregkh' --subject-prefix 'PATCH 6.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1f06c942aa709d397cf6bed577a0d10a61509667 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Sat, 7 Jun 2025 19:44:03 +0100
Subject: [PATCH] btrfs: always abort transaction on failure to add block group
to free space tree
Only one of the callers of __add_block_group_free_space() aborts the
transaction if the call fails, while the others don't do it and it's
either never done up the call chain or much higher in the call chain.
So make sure we abort the transaction at __add_block_group_free_space()
if it fails, which brings a couple benefits:
1) If some call chain never aborts the transaction, we avoid having some
metadata inconsistency because BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE is
cleared when we enter __add_block_group_free_space() and therefore
__add_block_group_free_space() is never called again to add the block
group items to the free space tree, since the function is only called
when that flag is set in a block group;
2) If the call chain already aborts the transaction, then we get a better
trace that points to the exact step from __add_block_group_free_space()
which failed, which is better for analysis.
So abort the transaction at __add_block_group_free_space() if any of its
steps fails.
CC: stable(a)vger.kernel.org # 6.6+
Reviewed-by: Boris Burkov <boris(a)bur.io>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 9eb9858e8e99..af005fb4b676 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -1431,12 +1431,17 @@ static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags);
ret = add_new_free_space_info(trans, block_group, path);
- if (ret)
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
return ret;
+ }
- return __add_to_free_space_tree(trans, block_group, path,
- block_group->start,
- block_group->length);
+ ret = __add_to_free_space_tree(trans, block_group, path,
+ block_group->start, block_group->length);
+ if (ret)
+ btrfs_abort_transaction(trans, ret);
+
+ return 0;
}
int add_block_group_free_space(struct btrfs_trans_handle *trans,
@@ -1461,9 +1466,6 @@ int add_block_group_free_space(struct btrfs_trans_handle *trans,
}
ret = __add_block_group_free_space(trans, block_group, path);
- if (ret)
- btrfs_abort_transaction(trans, ret);
-
out:
btrfs_free_path(path);
mutex_unlock(&block_group->free_space_lock);
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x e1249667750399a48cafcf5945761d39fa584edf
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081832-drainable-observant-d242@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e1249667750399a48cafcf5945761d39fa584edf Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 30 Jun 2025 13:19:20 +0100
Subject: [PATCH] btrfs: qgroup: fix race between quota disable and quota
rescan ioctl
There's a race between a task disabling quotas and another running the
rescan ioctl that can result in a use-after-free of qgroup records from
the fs_info->qgroup_tree rbtree.
This happens as follows:
1) Task A enters btrfs_ioctl_quota_rescan() -> btrfs_qgroup_rescan();
2) Task B enters btrfs_quota_disable() and calls
btrfs_qgroup_wait_for_completion(), which does nothing because at that
point fs_info->qgroup_rescan_running is false (it wasn't set yet by
task A);
3) Task B calls btrfs_free_qgroup_config() which starts freeing qgroups
from fs_info->qgroup_tree without taking the lock fs_info->qgroup_lock;
4) Task A enters qgroup_rescan_zero_tracking() which starts iterating
the fs_info->qgroup_tree tree while holding fs_info->qgroup_lock,
but task B is freeing qgroup records from that tree without holding
the lock, resulting in a use-after-free.
Fix this by taking fs_info->qgroup_lock at btrfs_free_qgroup_config().
Also at btrfs_qgroup_rescan() don't start the rescan worker if quotas
were already disabled.
Reported-by: cen zhang <zzzccc427(a)gmail.com>
Link: https://lore.kernel.org/linux-btrfs/CAFRLqsV+cMDETFuzqdKSHk_FDm6tneea45krsH…
CC: stable(a)vger.kernel.org # 6.1+
Reviewed-by: Boris Burkov <boris(a)bur.io>
Reviewed-by: Qu Wenruo <wqu(a)suse.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index b83d9534adae..310ca2dd9f24 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -636,22 +636,30 @@ bool btrfs_check_quota_leak(const struct btrfs_fs_info *fs_info)
/*
* This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
- * first two are in single-threaded paths.And for the third one, we have set
- * quota_root to be null with qgroup_lock held before, so it is safe to clean
- * up the in-memory structures without qgroup_lock held.
+ * first two are in single-threaded paths.
*/
void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
{
struct rb_node *n;
struct btrfs_qgroup *qgroup;
+ /*
+ * btrfs_quota_disable() can be called concurrently with
+ * btrfs_qgroup_rescan() -> qgroup_rescan_zero_tracking(), so take the
+ * lock.
+ */
+ spin_lock(&fs_info->qgroup_lock);
while ((n = rb_first(&fs_info->qgroup_tree))) {
qgroup = rb_entry(n, struct btrfs_qgroup, node);
rb_erase(n, &fs_info->qgroup_tree);
__del_qgroup_rb(qgroup);
+ spin_unlock(&fs_info->qgroup_lock);
btrfs_sysfs_del_one_qgroup(fs_info, qgroup);
kfree(qgroup);
+ spin_lock(&fs_info->qgroup_lock);
}
+ spin_unlock(&fs_info->qgroup_lock);
+
/*
* We call btrfs_free_qgroup_config() when unmounting
* filesystem and disabling quota, so we set qgroup_ulist
@@ -4036,12 +4044,21 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
qgroup_rescan_zero_tracking(fs_info);
mutex_lock(&fs_info->qgroup_rescan_lock);
- fs_info->qgroup_rescan_running = true;
- btrfs_queue_work(fs_info->qgroup_rescan_workers,
- &fs_info->qgroup_rescan_work);
+ /*
+ * The rescan worker is only for full accounting qgroups, check if it's
+ * enabled as it is pointless to queue it otherwise. A concurrent quota
+ * disable may also have just cleared BTRFS_FS_QUOTA_ENABLED.
+ */
+ if (btrfs_qgroup_full_accounting(fs_info)) {
+ fs_info->qgroup_rescan_running = true;
+ btrfs_queue_work(fs_info->qgroup_rescan_workers,
+ &fs_info->qgroup_rescan_work);
+ } else {
+ ret = -ENOTCONN;
+ }
mutex_unlock(&fs_info->qgroup_rescan_lock);
- return 0;
+ return ret;
}
int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
The patch below does not apply to the 6.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.16.y
git checkout FETCH_HEAD
git cherry-pick -x 1f06c942aa709d397cf6bed577a0d10a61509667
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081808-unneeded-unstuffed-e294@gregkh' --subject-prefix 'PATCH 6.16.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1f06c942aa709d397cf6bed577a0d10a61509667 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Sat, 7 Jun 2025 19:44:03 +0100
Subject: [PATCH] btrfs: always abort transaction on failure to add block group
to free space tree
Only one of the callers of __add_block_group_free_space() aborts the
transaction if the call fails, while the others don't do it and it's
either never done up the call chain or much higher in the call chain.
So make sure we abort the transaction at __add_block_group_free_space()
if it fails, which brings a couple benefits:
1) If some call chain never aborts the transaction, we avoid having some
metadata inconsistency because BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE is
cleared when we enter __add_block_group_free_space() and therefore
__add_block_group_free_space() is never called again to add the block
group items to the free space tree, since the function is only called
when that flag is set in a block group;
2) If the call chain already aborts the transaction, then we get a better
trace that points to the exact step from __add_block_group_free_space()
which failed, which is better for analysis.
So abort the transaction at __add_block_group_free_space() if any of its
steps fails.
CC: stable(a)vger.kernel.org # 6.6+
Reviewed-by: Boris Burkov <boris(a)bur.io>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 9eb9858e8e99..af005fb4b676 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -1431,12 +1431,17 @@ static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags);
ret = add_new_free_space_info(trans, block_group, path);
- if (ret)
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
return ret;
+ }
- return __add_to_free_space_tree(trans, block_group, path,
- block_group->start,
- block_group->length);
+ ret = __add_to_free_space_tree(trans, block_group, path,
+ block_group->start, block_group->length);
+ if (ret)
+ btrfs_abort_transaction(trans, ret);
+
+ return 0;
}
int add_block_group_free_space(struct btrfs_trans_handle *trans,
@@ -1461,9 +1466,6 @@ int add_block_group_free_space(struct btrfs_trans_handle *trans,
}
ret = __add_block_group_free_space(trans, block_group, path);
- if (ret)
- btrfs_abort_transaction(trans, ret);
-
out:
btrfs_free_path(path);
mutex_unlock(&block_group->free_space_lock);
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x e1249667750399a48cafcf5945761d39fa584edf
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081831-cancel-lunchtime-0e51@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e1249667750399a48cafcf5945761d39fa584edf Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 30 Jun 2025 13:19:20 +0100
Subject: [PATCH] btrfs: qgroup: fix race between quota disable and quota
rescan ioctl
There's a race between a task disabling quotas and another running the
rescan ioctl that can result in a use-after-free of qgroup records from
the fs_info->qgroup_tree rbtree.
This happens as follows:
1) Task A enters btrfs_ioctl_quota_rescan() -> btrfs_qgroup_rescan();
2) Task B enters btrfs_quota_disable() and calls
btrfs_qgroup_wait_for_completion(), which does nothing because at that
point fs_info->qgroup_rescan_running is false (it wasn't set yet by
task A);
3) Task B calls btrfs_free_qgroup_config() which starts freeing qgroups
from fs_info->qgroup_tree without taking the lock fs_info->qgroup_lock;
4) Task A enters qgroup_rescan_zero_tracking() which starts iterating
the fs_info->qgroup_tree tree while holding fs_info->qgroup_lock,
but task B is freeing qgroup records from that tree without holding
the lock, resulting in a use-after-free.
Fix this by taking fs_info->qgroup_lock at btrfs_free_qgroup_config().
Also at btrfs_qgroup_rescan() don't start the rescan worker if quotas
were already disabled.
Reported-by: cen zhang <zzzccc427(a)gmail.com>
Link: https://lore.kernel.org/linux-btrfs/CAFRLqsV+cMDETFuzqdKSHk_FDm6tneea45krsH…
CC: stable(a)vger.kernel.org # 6.1+
Reviewed-by: Boris Burkov <boris(a)bur.io>
Reviewed-by: Qu Wenruo <wqu(a)suse.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index b83d9534adae..310ca2dd9f24 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -636,22 +636,30 @@ bool btrfs_check_quota_leak(const struct btrfs_fs_info *fs_info)
/*
* This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
- * first two are in single-threaded paths.And for the third one, we have set
- * quota_root to be null with qgroup_lock held before, so it is safe to clean
- * up the in-memory structures without qgroup_lock held.
+ * first two are in single-threaded paths.
*/
void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
{
struct rb_node *n;
struct btrfs_qgroup *qgroup;
+ /*
+ * btrfs_quota_disable() can be called concurrently with
+ * btrfs_qgroup_rescan() -> qgroup_rescan_zero_tracking(), so take the
+ * lock.
+ */
+ spin_lock(&fs_info->qgroup_lock);
while ((n = rb_first(&fs_info->qgroup_tree))) {
qgroup = rb_entry(n, struct btrfs_qgroup, node);
rb_erase(n, &fs_info->qgroup_tree);
__del_qgroup_rb(qgroup);
+ spin_unlock(&fs_info->qgroup_lock);
btrfs_sysfs_del_one_qgroup(fs_info, qgroup);
kfree(qgroup);
+ spin_lock(&fs_info->qgroup_lock);
}
+ spin_unlock(&fs_info->qgroup_lock);
+
/*
* We call btrfs_free_qgroup_config() when unmounting
* filesystem and disabling quota, so we set qgroup_ulist
@@ -4036,12 +4044,21 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
qgroup_rescan_zero_tracking(fs_info);
mutex_lock(&fs_info->qgroup_rescan_lock);
- fs_info->qgroup_rescan_running = true;
- btrfs_queue_work(fs_info->qgroup_rescan_workers,
- &fs_info->qgroup_rescan_work);
+ /*
+ * The rescan worker is only for full accounting qgroups, check if it's
+ * enabled as it is pointless to queue it otherwise. A concurrent quota
+ * disable may also have just cleared BTRFS_FS_QUOTA_ENABLED.
+ */
+ if (btrfs_qgroup_full_accounting(fs_info)) {
+ fs_info->qgroup_rescan_running = true;
+ btrfs_queue_work(fs_info->qgroup_rescan_workers,
+ &fs_info->qgroup_rescan_work);
+ } else {
+ ret = -ENOTCONN;
+ }
mutex_unlock(&fs_info->qgroup_rescan_lock);
- return 0;
+ return ret;
}
int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 7ebf381a69421a88265d3c49cd0f007ba7336c9d
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081805-strongly-container-0be3@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7ebf381a69421a88265d3c49cd0f007ba7336c9d Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Fri, 11 Jul 2025 20:21:28 +0100
Subject: [PATCH] btrfs: don't ignore inode missing when replaying log tree
During log replay, at add_inode_ref(), we return -ENOENT if our current
inode isn't found on the subvolume tree or if a parent directory isn't
found. The error comes from btrfs_iget_logging() <- btrfs_iget() <-
btrfs_read_locked_inode().
The single caller of add_inode_ref(), replay_one_buffer(), ignores an
-ENOENT error because it expects that error to mean only that a parent
directory wasn't found and that is ok.
Before commit 5f61b961599a ("btrfs: fix inode lookup error handling during
log replay") we were converting any error when getting a parent directory
to -ENOENT and any error when getting the current inode to -EIO, so our
caller would fail log replay in case we can't find the current inode.
After that commit however in case the current inode is not found we return
-ENOENT to the caller and therefore it ignores the critical fact that the
current inode was not found in the subvolume tree.
Fix this by converting -ENOENT to 0 when we don't find a parent directory,
returning -ENOENT when we don't find the current inode and making the
caller, replay_one_buffer(), not ignore -ENOENT anymore.
Fixes: 5f61b961599a ("btrfs: fix inode lookup error handling during log replay")
CC: stable(a)vger.kernel.org # 6.16
Reviewed-by: Boris Burkov <boris(a)bur.io>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index ab0815d9e7e5..e3c77f3d092c 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1416,6 +1416,8 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
dir = btrfs_iget_logging(parent_objectid, root);
if (IS_ERR(dir)) {
ret = PTR_ERR(dir);
+ if (ret == -ENOENT)
+ ret = 0;
dir = NULL;
goto out;
}
@@ -1440,6 +1442,15 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
if (IS_ERR(dir)) {
ret = PTR_ERR(dir);
dir = NULL;
+ /*
+ * A new parent dir may have not been
+ * logged and not exist in the subvolume
+ * tree, see the comment above before
+ * the loop when getting the first
+ * parent dir.
+ */
+ if (ret == -ENOENT)
+ ret = 0;
goto out;
}
}
@@ -2551,9 +2562,8 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
key.type == BTRFS_INODE_EXTREF_KEY) {
ret = add_inode_ref(wc->trans, root, log, path,
eb, i, &key);
- if (ret && ret != -ENOENT)
+ if (ret)
break;
- ret = 0;
} else if (key.type == BTRFS_EXTENT_DATA_KEY) {
ret = replay_one_extent(wc->trans, root, path,
eb, i, &key);
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x e1249667750399a48cafcf5945761d39fa584edf
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081830-legroom-preshow-e033@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e1249667750399a48cafcf5945761d39fa584edf Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 30 Jun 2025 13:19:20 +0100
Subject: [PATCH] btrfs: qgroup: fix race between quota disable and quota
rescan ioctl
There's a race between a task disabling quotas and another running the
rescan ioctl that can result in a use-after-free of qgroup records from
the fs_info->qgroup_tree rbtree.
This happens as follows:
1) Task A enters btrfs_ioctl_quota_rescan() -> btrfs_qgroup_rescan();
2) Task B enters btrfs_quota_disable() and calls
btrfs_qgroup_wait_for_completion(), which does nothing because at that
point fs_info->qgroup_rescan_running is false (it wasn't set yet by
task A);
3) Task B calls btrfs_free_qgroup_config() which starts freeing qgroups
from fs_info->qgroup_tree without taking the lock fs_info->qgroup_lock;
4) Task A enters qgroup_rescan_zero_tracking() which starts iterating
the fs_info->qgroup_tree tree while holding fs_info->qgroup_lock,
but task B is freeing qgroup records from that tree without holding
the lock, resulting in a use-after-free.
Fix this by taking fs_info->qgroup_lock at btrfs_free_qgroup_config().
Also at btrfs_qgroup_rescan() don't start the rescan worker if quotas
were already disabled.
Reported-by: cen zhang <zzzccc427(a)gmail.com>
Link: https://lore.kernel.org/linux-btrfs/CAFRLqsV+cMDETFuzqdKSHk_FDm6tneea45krsH…
CC: stable(a)vger.kernel.org # 6.1+
Reviewed-by: Boris Burkov <boris(a)bur.io>
Reviewed-by: Qu Wenruo <wqu(a)suse.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index b83d9534adae..310ca2dd9f24 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -636,22 +636,30 @@ bool btrfs_check_quota_leak(const struct btrfs_fs_info *fs_info)
/*
* This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
- * first two are in single-threaded paths.And for the third one, we have set
- * quota_root to be null with qgroup_lock held before, so it is safe to clean
- * up the in-memory structures without qgroup_lock held.
+ * first two are in single-threaded paths.
*/
void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
{
struct rb_node *n;
struct btrfs_qgroup *qgroup;
+ /*
+ * btrfs_quota_disable() can be called concurrently with
+ * btrfs_qgroup_rescan() -> qgroup_rescan_zero_tracking(), so take the
+ * lock.
+ */
+ spin_lock(&fs_info->qgroup_lock);
while ((n = rb_first(&fs_info->qgroup_tree))) {
qgroup = rb_entry(n, struct btrfs_qgroup, node);
rb_erase(n, &fs_info->qgroup_tree);
__del_qgroup_rb(qgroup);
+ spin_unlock(&fs_info->qgroup_lock);
btrfs_sysfs_del_one_qgroup(fs_info, qgroup);
kfree(qgroup);
+ spin_lock(&fs_info->qgroup_lock);
}
+ spin_unlock(&fs_info->qgroup_lock);
+
/*
* We call btrfs_free_qgroup_config() when unmounting
* filesystem and disabling quota, so we set qgroup_ulist
@@ -4036,12 +4044,21 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
qgroup_rescan_zero_tracking(fs_info);
mutex_lock(&fs_info->qgroup_rescan_lock);
- fs_info->qgroup_rescan_running = true;
- btrfs_queue_work(fs_info->qgroup_rescan_workers,
- &fs_info->qgroup_rescan_work);
+ /*
+ * The rescan worker is only for full accounting qgroups, check if it's
+ * enabled as it is pointless to queue it otherwise. A concurrent quota
+ * disable may also have just cleared BTRFS_FS_QUOTA_ENABLED.
+ */
+ if (btrfs_qgroup_full_accounting(fs_info)) {
+ fs_info->qgroup_rescan_running = true;
+ btrfs_queue_work(fs_info->qgroup_rescan_workers,
+ &fs_info->qgroup_rescan_work);
+ } else {
+ ret = -ENOTCONN;
+ }
mutex_unlock(&fs_info->qgroup_rescan_lock);
- return 0;
+ return ret;
}
int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 64690a90cd7c6db16d3af8616be1f4bf8d492850
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081847-user-synthesis-c726@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 64690a90cd7c6db16d3af8616be1f4bf8d492850 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum(a)suse.com>
Date: Thu, 17 Jul 2025 16:12:50 +0200
Subject: [PATCH] cdc-acm: fix race between initial clearing halt and open
On the devices that need their endpoints to get an
initial clear_halt, this needs to be done before
the devices can be opened. That means it needs to be
before the devices are registered.
Fixes: 15bf722e6f6c0 ("cdc-acm: Add support of ATOL FPrint fiscal printers")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Oliver Neukum <oneukum(a)suse.com>
Link: https://lore.kernel.org/r/20250717141259.2345605-1-oneukum@suse.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index c2ecfa3c8349..5a334e370f4d 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -1520,6 +1520,12 @@ static int acm_probe(struct usb_interface *intf,
goto err_remove_files;
}
+ if (quirks & CLEAR_HALT_CONDITIONS) {
+ /* errors intentionally ignored */
+ usb_clear_halt(usb_dev, acm->in);
+ usb_clear_halt(usb_dev, acm->out);
+ }
+
tty_dev = tty_port_register_device(&acm->port, acm_tty_driver, minor,
&control_interface->dev);
if (IS_ERR(tty_dev)) {
@@ -1527,11 +1533,6 @@ static int acm_probe(struct usb_interface *intf,
goto err_release_data_interface;
}
- if (quirks & CLEAR_HALT_CONDITIONS) {
- usb_clear_halt(usb_dev, acm->in);
- usb_clear_halt(usb_dev, acm->out);
- }
-
dev_info(&intf->dev, "ttyACM%d: USB ACM device\n", minor);
return 0;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 1e61f6ab08786d66a11cfc51e13d6f08a6b06c56
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081835-swaddling-wound-cef8@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1e61f6ab08786d66a11cfc51e13d6f08a6b06c56 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel(a)collabora.com>
Date: Fri, 4 Jul 2025 19:55:06 +0200
Subject: [PATCH] usb: typec: fusb302: cache PD RX state
This patch fixes a race condition communication error, which ends up in
PD hard resets when losing the race. Some systems, like the Radxa ROCK
5B are powered through USB-C without any backup power source and use a
FUSB302 chip to do the PD negotiation. This means it is quite important
to avoid hard resets, since that effectively kills the system's
power-supply.
I've found the following race condition while debugging unplanned power
loss during booting the board every now and then:
1. lots of TCPM/FUSB302/PD initialization stuff
2. TCPM ends up in SNK_WAIT_CAPABILITIES (tcpm_set_pd_rx is enabled here)
3. the remote PD source does not send anything, so TCPM does a SOFT RESET
4. TCPM ends up in SNK_WAIT_CAPABILITIES for the second time
(tcpm_set_pd_rx is enabled again, even though it is still on)
At this point I've seen broken CRC good messages being send by the
FUSB302 with a logic analyzer sniffing the CC lines. Also it looks like
messages are being lost and things generally going haywire with one of
the two sides doing a hard reset once a broken CRC good message was send
to the bus.
I think the system is running into a race condition, that the FIFOs are
being cleared and/or the automatic good CRC message generation flag is
being updated while a message is already arriving.
Let's avoid this by caching the PD RX enabled state, as we have already
processed anything in the FIFOs and are in a good state. As a side
effect that this also optimizes I2C bus usage :)
As far as I can tell the problem theoretically also exists when TCPM
enters SNK_WAIT_CAPABILITIES the first time, but I believe this is less
critical for the following reason:
On devices like the ROCK 5B, which are powered through a TCPM backed
USB-C port, the bootloader must have done some prior PD communication
(initial communication must happen within 5 seconds after plugging the
USB-C plug). This means the first time the kernel TCPM state machine
reaches SNK_WAIT_CAPABILITIES, the remote side is not sending messages
actively. On other devices a hard reset simply adds some extra delay and
things should be good afterwards.
Fixes: c034a43e72dda ("staging: typec: Fairchild FUSB302 Type-c chip driver")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel(a)collabora.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
Link: https://lore.kernel.org/r/20250704-fusb302-race-condition-fix-v1-1-239012c0…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/typec/tcpm/fusb302.c b/drivers/usb/typec/tcpm/fusb302.c
index f2801279c4b5..a4ff2403ddd6 100644
--- a/drivers/usb/typec/tcpm/fusb302.c
+++ b/drivers/usb/typec/tcpm/fusb302.c
@@ -104,6 +104,7 @@ struct fusb302_chip {
bool vconn_on;
bool vbus_on;
bool charge_on;
+ bool pd_rx_on;
bool vbus_present;
enum typec_cc_polarity cc_polarity;
enum typec_cc_status cc1;
@@ -841,6 +842,11 @@ static int tcpm_set_pd_rx(struct tcpc_dev *dev, bool on)
int ret = 0;
mutex_lock(&chip->lock);
+ if (chip->pd_rx_on == on) {
+ fusb302_log(chip, "pd is already %s", str_on_off(on));
+ goto done;
+ }
+
ret = fusb302_pd_rx_flush(chip);
if (ret < 0) {
fusb302_log(chip, "cannot flush pd rx buffer, ret=%d", ret);
@@ -863,6 +869,8 @@ static int tcpm_set_pd_rx(struct tcpc_dev *dev, bool on)
str_on_off(on), ret);
goto done;
}
+
+ chip->pd_rx_on = on;
fusb302_log(chip, "pd := %s", str_on_off(on));
done:
mutex_unlock(&chip->lock);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 64690a90cd7c6db16d3af8616be1f4bf8d492850
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081847-resident-transform-fcca@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 64690a90cd7c6db16d3af8616be1f4bf8d492850 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum(a)suse.com>
Date: Thu, 17 Jul 2025 16:12:50 +0200
Subject: [PATCH] cdc-acm: fix race between initial clearing halt and open
On the devices that need their endpoints to get an
initial clear_halt, this needs to be done before
the devices can be opened. That means it needs to be
before the devices are registered.
Fixes: 15bf722e6f6c0 ("cdc-acm: Add support of ATOL FPrint fiscal printers")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Oliver Neukum <oneukum(a)suse.com>
Link: https://lore.kernel.org/r/20250717141259.2345605-1-oneukum@suse.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index c2ecfa3c8349..5a334e370f4d 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -1520,6 +1520,12 @@ static int acm_probe(struct usb_interface *intf,
goto err_remove_files;
}
+ if (quirks & CLEAR_HALT_CONDITIONS) {
+ /* errors intentionally ignored */
+ usb_clear_halt(usb_dev, acm->in);
+ usb_clear_halt(usb_dev, acm->out);
+ }
+
tty_dev = tty_port_register_device(&acm->port, acm_tty_driver, minor,
&control_interface->dev);
if (IS_ERR(tty_dev)) {
@@ -1527,11 +1533,6 @@ static int acm_probe(struct usb_interface *intf,
goto err_release_data_interface;
}
- if (quirks & CLEAR_HALT_CONDITIONS) {
- usb_clear_halt(usb_dev, acm->in);
- usb_clear_halt(usb_dev, acm->out);
- }
-
dev_info(&intf->dev, "ttyACM%d: USB ACM device\n", minor);
return 0;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 1e61f6ab08786d66a11cfc51e13d6f08a6b06c56
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081834-staff-ranged-09c8@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1e61f6ab08786d66a11cfc51e13d6f08a6b06c56 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel(a)collabora.com>
Date: Fri, 4 Jul 2025 19:55:06 +0200
Subject: [PATCH] usb: typec: fusb302: cache PD RX state
This patch fixes a race condition communication error, which ends up in
PD hard resets when losing the race. Some systems, like the Radxa ROCK
5B are powered through USB-C without any backup power source and use a
FUSB302 chip to do the PD negotiation. This means it is quite important
to avoid hard resets, since that effectively kills the system's
power-supply.
I've found the following race condition while debugging unplanned power
loss during booting the board every now and then:
1. lots of TCPM/FUSB302/PD initialization stuff
2. TCPM ends up in SNK_WAIT_CAPABILITIES (tcpm_set_pd_rx is enabled here)
3. the remote PD source does not send anything, so TCPM does a SOFT RESET
4. TCPM ends up in SNK_WAIT_CAPABILITIES for the second time
(tcpm_set_pd_rx is enabled again, even though it is still on)
At this point I've seen broken CRC good messages being send by the
FUSB302 with a logic analyzer sniffing the CC lines. Also it looks like
messages are being lost and things generally going haywire with one of
the two sides doing a hard reset once a broken CRC good message was send
to the bus.
I think the system is running into a race condition, that the FIFOs are
being cleared and/or the automatic good CRC message generation flag is
being updated while a message is already arriving.
Let's avoid this by caching the PD RX enabled state, as we have already
processed anything in the FIFOs and are in a good state. As a side
effect that this also optimizes I2C bus usage :)
As far as I can tell the problem theoretically also exists when TCPM
enters SNK_WAIT_CAPABILITIES the first time, but I believe this is less
critical for the following reason:
On devices like the ROCK 5B, which are powered through a TCPM backed
USB-C port, the bootloader must have done some prior PD communication
(initial communication must happen within 5 seconds after plugging the
USB-C plug). This means the first time the kernel TCPM state machine
reaches SNK_WAIT_CAPABILITIES, the remote side is not sending messages
actively. On other devices a hard reset simply adds some extra delay and
things should be good afterwards.
Fixes: c034a43e72dda ("staging: typec: Fairchild FUSB302 Type-c chip driver")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel(a)collabora.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
Link: https://lore.kernel.org/r/20250704-fusb302-race-condition-fix-v1-1-239012c0…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/typec/tcpm/fusb302.c b/drivers/usb/typec/tcpm/fusb302.c
index f2801279c4b5..a4ff2403ddd6 100644
--- a/drivers/usb/typec/tcpm/fusb302.c
+++ b/drivers/usb/typec/tcpm/fusb302.c
@@ -104,6 +104,7 @@ struct fusb302_chip {
bool vconn_on;
bool vbus_on;
bool charge_on;
+ bool pd_rx_on;
bool vbus_present;
enum typec_cc_polarity cc_polarity;
enum typec_cc_status cc1;
@@ -841,6 +842,11 @@ static int tcpm_set_pd_rx(struct tcpc_dev *dev, bool on)
int ret = 0;
mutex_lock(&chip->lock);
+ if (chip->pd_rx_on == on) {
+ fusb302_log(chip, "pd is already %s", str_on_off(on));
+ goto done;
+ }
+
ret = fusb302_pd_rx_flush(chip);
if (ret < 0) {
fusb302_log(chip, "cannot flush pd rx buffer, ret=%d", ret);
@@ -863,6 +869,8 @@ static int tcpm_set_pd_rx(struct tcpc_dev *dev, bool on)
str_on_off(on), ret);
goto done;
}
+
+ chip->pd_rx_on = on;
fusb302_log(chip, "pd := %s", str_on_off(on));
done:
mutex_unlock(&chip->lock);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 1e61f6ab08786d66a11cfc51e13d6f08a6b06c56
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081833-chatting-dragging-a84b@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1e61f6ab08786d66a11cfc51e13d6f08a6b06c56 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel(a)collabora.com>
Date: Fri, 4 Jul 2025 19:55:06 +0200
Subject: [PATCH] usb: typec: fusb302: cache PD RX state
This patch fixes a race condition communication error, which ends up in
PD hard resets when losing the race. Some systems, like the Radxa ROCK
5B are powered through USB-C without any backup power source and use a
FUSB302 chip to do the PD negotiation. This means it is quite important
to avoid hard resets, since that effectively kills the system's
power-supply.
I've found the following race condition while debugging unplanned power
loss during booting the board every now and then:
1. lots of TCPM/FUSB302/PD initialization stuff
2. TCPM ends up in SNK_WAIT_CAPABILITIES (tcpm_set_pd_rx is enabled here)
3. the remote PD source does not send anything, so TCPM does a SOFT RESET
4. TCPM ends up in SNK_WAIT_CAPABILITIES for the second time
(tcpm_set_pd_rx is enabled again, even though it is still on)
At this point I've seen broken CRC good messages being send by the
FUSB302 with a logic analyzer sniffing the CC lines. Also it looks like
messages are being lost and things generally going haywire with one of
the two sides doing a hard reset once a broken CRC good message was send
to the bus.
I think the system is running into a race condition, that the FIFOs are
being cleared and/or the automatic good CRC message generation flag is
being updated while a message is already arriving.
Let's avoid this by caching the PD RX enabled state, as we have already
processed anything in the FIFOs and are in a good state. As a side
effect that this also optimizes I2C bus usage :)
As far as I can tell the problem theoretically also exists when TCPM
enters SNK_WAIT_CAPABILITIES the first time, but I believe this is less
critical for the following reason:
On devices like the ROCK 5B, which are powered through a TCPM backed
USB-C port, the bootloader must have done some prior PD communication
(initial communication must happen within 5 seconds after plugging the
USB-C plug). This means the first time the kernel TCPM state machine
reaches SNK_WAIT_CAPABILITIES, the remote side is not sending messages
actively. On other devices a hard reset simply adds some extra delay and
things should be good afterwards.
Fixes: c034a43e72dda ("staging: typec: Fairchild FUSB302 Type-c chip driver")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel(a)collabora.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
Link: https://lore.kernel.org/r/20250704-fusb302-race-condition-fix-v1-1-239012c0…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/typec/tcpm/fusb302.c b/drivers/usb/typec/tcpm/fusb302.c
index f2801279c4b5..a4ff2403ddd6 100644
--- a/drivers/usb/typec/tcpm/fusb302.c
+++ b/drivers/usb/typec/tcpm/fusb302.c
@@ -104,6 +104,7 @@ struct fusb302_chip {
bool vconn_on;
bool vbus_on;
bool charge_on;
+ bool pd_rx_on;
bool vbus_present;
enum typec_cc_polarity cc_polarity;
enum typec_cc_status cc1;
@@ -841,6 +842,11 @@ static int tcpm_set_pd_rx(struct tcpc_dev *dev, bool on)
int ret = 0;
mutex_lock(&chip->lock);
+ if (chip->pd_rx_on == on) {
+ fusb302_log(chip, "pd is already %s", str_on_off(on));
+ goto done;
+ }
+
ret = fusb302_pd_rx_flush(chip);
if (ret < 0) {
fusb302_log(chip, "cannot flush pd rx buffer, ret=%d", ret);
@@ -863,6 +869,8 @@ static int tcpm_set_pd_rx(struct tcpc_dev *dev, bool on)
str_on_off(on), ret);
goto done;
}
+
+ chip->pd_rx_on = on;
fusb302_log(chip, "pd := %s", str_on_off(on));
done:
mutex_unlock(&chip->lock);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 7f8fdd4dbffc05982b96caf586f77a014b2a9353
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081849-trio-prepay-4247@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7f8fdd4dbffc05982b96caf586f77a014b2a9353 Mon Sep 17 00:00:00 2001
From: Yunhui Cui <cuiyunhui(a)bytedance.com>
Date: Wed, 23 Jul 2025 10:33:22 +0800
Subject: [PATCH] serial: 8250: fix panic due to PSLVERR
When the PSLVERR_RESP_EN parameter is set to 1, the device generates
an error response if an attempt is made to read an empty RBR (Receive
Buffer Register) while the FIFO is enabled.
In serial8250_do_startup(), calling serial_port_out(port, UART_LCR,
UART_LCR_WLEN8) triggers dw8250_check_lcr(), which invokes
dw8250_force_idle() and serial8250_clear_and_reinit_fifos(). The latter
function enables the FIFO via serial_out(p, UART_FCR, p->fcr).
Execution proceeds to the serial_port_in(port, UART_RX).
This satisfies the PSLVERR trigger condition.
When another CPU (e.g., using printk()) is accessing the UART (UART
is busy), the current CPU fails the check (value & ~UART_LCR_SPAR) ==
(lcr & ~UART_LCR_SPAR) in dw8250_check_lcr(), causing it to enter
dw8250_force_idle().
Put serial_port_out(port, UART_LCR, UART_LCR_WLEN8) under the port->lock
to fix this issue.
Panic backtrace:
[ 0.442336] Oops - unknown exception [#1]
[ 0.442343] epc : dw8250_serial_in32+0x1e/0x4a
[ 0.442351] ra : serial8250_do_startup+0x2c8/0x88e
...
[ 0.442416] console_on_rootfs+0x26/0x70
Fixes: c49436b657d0 ("serial: 8250_dw: Improve unwritable LCR workaround")
Link: https://lore.kernel.org/all/84cydt5peu.fsf@jogness.linutronix.de/T/
Signed-off-by: Yunhui Cui <cuiyunhui(a)bytedance.com>
Reviewed-by: John Ogness <john.ogness(a)linutronix.de>
Cc: stable <stable(a)kernel.org>
Link: https://lore.kernel.org/r/20250723023322.464-2-cuiyunhui@bytedance.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 7eddcab318b4..2da9db960d09 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -2269,9 +2269,9 @@ static void serial8250_initialize(struct uart_port *port)
{
unsigned long flags;
+ uart_port_lock_irqsave(port, &flags);
serial_port_out(port, UART_LCR, UART_LCR_WLEN8);
- uart_port_lock_irqsave(port, &flags);
serial8250_init_mctrl(port);
serial8250_iir_txen_test(port);
uart_port_unlock_irqrestore(port, flags);
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 1e61f6ab08786d66a11cfc51e13d6f08a6b06c56
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081832-rental-utter-f3e1@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1e61f6ab08786d66a11cfc51e13d6f08a6b06c56 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel(a)collabora.com>
Date: Fri, 4 Jul 2025 19:55:06 +0200
Subject: [PATCH] usb: typec: fusb302: cache PD RX state
This patch fixes a race condition communication error, which ends up in
PD hard resets when losing the race. Some systems, like the Radxa ROCK
5B are powered through USB-C without any backup power source and use a
FUSB302 chip to do the PD negotiation. This means it is quite important
to avoid hard resets, since that effectively kills the system's
power-supply.
I've found the following race condition while debugging unplanned power
loss during booting the board every now and then:
1. lots of TCPM/FUSB302/PD initialization stuff
2. TCPM ends up in SNK_WAIT_CAPABILITIES (tcpm_set_pd_rx is enabled here)
3. the remote PD source does not send anything, so TCPM does a SOFT RESET
4. TCPM ends up in SNK_WAIT_CAPABILITIES for the second time
(tcpm_set_pd_rx is enabled again, even though it is still on)
At this point I've seen broken CRC good messages being send by the
FUSB302 with a logic analyzer sniffing the CC lines. Also it looks like
messages are being lost and things generally going haywire with one of
the two sides doing a hard reset once a broken CRC good message was send
to the bus.
I think the system is running into a race condition, that the FIFOs are
being cleared and/or the automatic good CRC message generation flag is
being updated while a message is already arriving.
Let's avoid this by caching the PD RX enabled state, as we have already
processed anything in the FIFOs and are in a good state. As a side
effect that this also optimizes I2C bus usage :)
As far as I can tell the problem theoretically also exists when TCPM
enters SNK_WAIT_CAPABILITIES the first time, but I believe this is less
critical for the following reason:
On devices like the ROCK 5B, which are powered through a TCPM backed
USB-C port, the bootloader must have done some prior PD communication
(initial communication must happen within 5 seconds after plugging the
USB-C plug). This means the first time the kernel TCPM state machine
reaches SNK_WAIT_CAPABILITIES, the remote side is not sending messages
actively. On other devices a hard reset simply adds some extra delay and
things should be good afterwards.
Fixes: c034a43e72dda ("staging: typec: Fairchild FUSB302 Type-c chip driver")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel(a)collabora.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
Link: https://lore.kernel.org/r/20250704-fusb302-race-condition-fix-v1-1-239012c0…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/typec/tcpm/fusb302.c b/drivers/usb/typec/tcpm/fusb302.c
index f2801279c4b5..a4ff2403ddd6 100644
--- a/drivers/usb/typec/tcpm/fusb302.c
+++ b/drivers/usb/typec/tcpm/fusb302.c
@@ -104,6 +104,7 @@ struct fusb302_chip {
bool vconn_on;
bool vbus_on;
bool charge_on;
+ bool pd_rx_on;
bool vbus_present;
enum typec_cc_polarity cc_polarity;
enum typec_cc_status cc1;
@@ -841,6 +842,11 @@ static int tcpm_set_pd_rx(struct tcpc_dev *dev, bool on)
int ret = 0;
mutex_lock(&chip->lock);
+ if (chip->pd_rx_on == on) {
+ fusb302_log(chip, "pd is already %s", str_on_off(on));
+ goto done;
+ }
+
ret = fusb302_pd_rx_flush(chip);
if (ret < 0) {
fusb302_log(chip, "cannot flush pd rx buffer, ret=%d", ret);
@@ -863,6 +869,8 @@ static int tcpm_set_pd_rx(struct tcpc_dev *dev, bool on)
str_on_off(on), ret);
goto done;
}
+
+ chip->pd_rx_on = on;
fusb302_log(chip, "pd := %s", str_on_off(on));
done:
mutex_unlock(&chip->lock);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 7f8fdd4dbffc05982b96caf586f77a014b2a9353
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081848-lemon-attic-be0e@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7f8fdd4dbffc05982b96caf586f77a014b2a9353 Mon Sep 17 00:00:00 2001
From: Yunhui Cui <cuiyunhui(a)bytedance.com>
Date: Wed, 23 Jul 2025 10:33:22 +0800
Subject: [PATCH] serial: 8250: fix panic due to PSLVERR
When the PSLVERR_RESP_EN parameter is set to 1, the device generates
an error response if an attempt is made to read an empty RBR (Receive
Buffer Register) while the FIFO is enabled.
In serial8250_do_startup(), calling serial_port_out(port, UART_LCR,
UART_LCR_WLEN8) triggers dw8250_check_lcr(), which invokes
dw8250_force_idle() and serial8250_clear_and_reinit_fifos(). The latter
function enables the FIFO via serial_out(p, UART_FCR, p->fcr).
Execution proceeds to the serial_port_in(port, UART_RX).
This satisfies the PSLVERR trigger condition.
When another CPU (e.g., using printk()) is accessing the UART (UART
is busy), the current CPU fails the check (value & ~UART_LCR_SPAR) ==
(lcr & ~UART_LCR_SPAR) in dw8250_check_lcr(), causing it to enter
dw8250_force_idle().
Put serial_port_out(port, UART_LCR, UART_LCR_WLEN8) under the port->lock
to fix this issue.
Panic backtrace:
[ 0.442336] Oops - unknown exception [#1]
[ 0.442343] epc : dw8250_serial_in32+0x1e/0x4a
[ 0.442351] ra : serial8250_do_startup+0x2c8/0x88e
...
[ 0.442416] console_on_rootfs+0x26/0x70
Fixes: c49436b657d0 ("serial: 8250_dw: Improve unwritable LCR workaround")
Link: https://lore.kernel.org/all/84cydt5peu.fsf@jogness.linutronix.de/T/
Signed-off-by: Yunhui Cui <cuiyunhui(a)bytedance.com>
Reviewed-by: John Ogness <john.ogness(a)linutronix.de>
Cc: stable <stable(a)kernel.org>
Link: https://lore.kernel.org/r/20250723023322.464-2-cuiyunhui@bytedance.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 7eddcab318b4..2da9db960d09 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -2269,9 +2269,9 @@ static void serial8250_initialize(struct uart_port *port)
{
unsigned long flags;
+ uart_port_lock_irqsave(port, &flags);
serial_port_out(port, UART_LCR, UART_LCR_WLEN8);
- uart_port_lock_irqsave(port, &flags);
serial8250_init_mctrl(port);
serial8250_iir_txen_test(port);
uart_port_unlock_irqrestore(port, flags);
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 1e61f6ab08786d66a11cfc51e13d6f08a6b06c56
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081831-untrimmed-dab-6b43@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1e61f6ab08786d66a11cfc51e13d6f08a6b06c56 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel(a)collabora.com>
Date: Fri, 4 Jul 2025 19:55:06 +0200
Subject: [PATCH] usb: typec: fusb302: cache PD RX state
This patch fixes a race condition communication error, which ends up in
PD hard resets when losing the race. Some systems, like the Radxa ROCK
5B are powered through USB-C without any backup power source and use a
FUSB302 chip to do the PD negotiation. This means it is quite important
to avoid hard resets, since that effectively kills the system's
power-supply.
I've found the following race condition while debugging unplanned power
loss during booting the board every now and then:
1. lots of TCPM/FUSB302/PD initialization stuff
2. TCPM ends up in SNK_WAIT_CAPABILITIES (tcpm_set_pd_rx is enabled here)
3. the remote PD source does not send anything, so TCPM does a SOFT RESET
4. TCPM ends up in SNK_WAIT_CAPABILITIES for the second time
(tcpm_set_pd_rx is enabled again, even though it is still on)
At this point I've seen broken CRC good messages being send by the
FUSB302 with a logic analyzer sniffing the CC lines. Also it looks like
messages are being lost and things generally going haywire with one of
the two sides doing a hard reset once a broken CRC good message was send
to the bus.
I think the system is running into a race condition, that the FIFOs are
being cleared and/or the automatic good CRC message generation flag is
being updated while a message is already arriving.
Let's avoid this by caching the PD RX enabled state, as we have already
processed anything in the FIFOs and are in a good state. As a side
effect that this also optimizes I2C bus usage :)
As far as I can tell the problem theoretically also exists when TCPM
enters SNK_WAIT_CAPABILITIES the first time, but I believe this is less
critical for the following reason:
On devices like the ROCK 5B, which are powered through a TCPM backed
USB-C port, the bootloader must have done some prior PD communication
(initial communication must happen within 5 seconds after plugging the
USB-C plug). This means the first time the kernel TCPM state machine
reaches SNK_WAIT_CAPABILITIES, the remote side is not sending messages
actively. On other devices a hard reset simply adds some extra delay and
things should be good afterwards.
Fixes: c034a43e72dda ("staging: typec: Fairchild FUSB302 Type-c chip driver")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel(a)collabora.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
Link: https://lore.kernel.org/r/20250704-fusb302-race-condition-fix-v1-1-239012c0…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/typec/tcpm/fusb302.c b/drivers/usb/typec/tcpm/fusb302.c
index f2801279c4b5..a4ff2403ddd6 100644
--- a/drivers/usb/typec/tcpm/fusb302.c
+++ b/drivers/usb/typec/tcpm/fusb302.c
@@ -104,6 +104,7 @@ struct fusb302_chip {
bool vconn_on;
bool vbus_on;
bool charge_on;
+ bool pd_rx_on;
bool vbus_present;
enum typec_cc_polarity cc_polarity;
enum typec_cc_status cc1;
@@ -841,6 +842,11 @@ static int tcpm_set_pd_rx(struct tcpc_dev *dev, bool on)
int ret = 0;
mutex_lock(&chip->lock);
+ if (chip->pd_rx_on == on) {
+ fusb302_log(chip, "pd is already %s", str_on_off(on));
+ goto done;
+ }
+
ret = fusb302_pd_rx_flush(chip);
if (ret < 0) {
fusb302_log(chip, "cannot flush pd rx buffer, ret=%d", ret);
@@ -863,6 +869,8 @@ static int tcpm_set_pd_rx(struct tcpc_dev *dev, bool on)
str_on_off(on), ret);
goto done;
}
+
+ chip->pd_rx_on = on;
fusb302_log(chip, "pd := %s", str_on_off(on));
done:
mutex_unlock(&chip->lock);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 7f8fdd4dbffc05982b96caf586f77a014b2a9353
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081848-charter-handcart-4eda@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7f8fdd4dbffc05982b96caf586f77a014b2a9353 Mon Sep 17 00:00:00 2001
From: Yunhui Cui <cuiyunhui(a)bytedance.com>
Date: Wed, 23 Jul 2025 10:33:22 +0800
Subject: [PATCH] serial: 8250: fix panic due to PSLVERR
When the PSLVERR_RESP_EN parameter is set to 1, the device generates
an error response if an attempt is made to read an empty RBR (Receive
Buffer Register) while the FIFO is enabled.
In serial8250_do_startup(), calling serial_port_out(port, UART_LCR,
UART_LCR_WLEN8) triggers dw8250_check_lcr(), which invokes
dw8250_force_idle() and serial8250_clear_and_reinit_fifos(). The latter
function enables the FIFO via serial_out(p, UART_FCR, p->fcr).
Execution proceeds to the serial_port_in(port, UART_RX).
This satisfies the PSLVERR trigger condition.
When another CPU (e.g., using printk()) is accessing the UART (UART
is busy), the current CPU fails the check (value & ~UART_LCR_SPAR) ==
(lcr & ~UART_LCR_SPAR) in dw8250_check_lcr(), causing it to enter
dw8250_force_idle().
Put serial_port_out(port, UART_LCR, UART_LCR_WLEN8) under the port->lock
to fix this issue.
Panic backtrace:
[ 0.442336] Oops - unknown exception [#1]
[ 0.442343] epc : dw8250_serial_in32+0x1e/0x4a
[ 0.442351] ra : serial8250_do_startup+0x2c8/0x88e
...
[ 0.442416] console_on_rootfs+0x26/0x70
Fixes: c49436b657d0 ("serial: 8250_dw: Improve unwritable LCR workaround")
Link: https://lore.kernel.org/all/84cydt5peu.fsf@jogness.linutronix.de/T/
Signed-off-by: Yunhui Cui <cuiyunhui(a)bytedance.com>
Reviewed-by: John Ogness <john.ogness(a)linutronix.de>
Cc: stable <stable(a)kernel.org>
Link: https://lore.kernel.org/r/20250723023322.464-2-cuiyunhui@bytedance.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 7eddcab318b4..2da9db960d09 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -2269,9 +2269,9 @@ static void serial8250_initialize(struct uart_port *port)
{
unsigned long flags;
+ uart_port_lock_irqsave(port, &flags);
serial_port_out(port, UART_LCR, UART_LCR_WLEN8);
- uart_port_lock_irqsave(port, &flags);
serial8250_init_mctrl(port);
serial8250_iir_txen_test(port);
uart_port_unlock_irqrestore(port, flags);
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 7f8fdd4dbffc05982b96caf586f77a014b2a9353
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081847-cheddar-glacier-ea0e@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7f8fdd4dbffc05982b96caf586f77a014b2a9353 Mon Sep 17 00:00:00 2001
From: Yunhui Cui <cuiyunhui(a)bytedance.com>
Date: Wed, 23 Jul 2025 10:33:22 +0800
Subject: [PATCH] serial: 8250: fix panic due to PSLVERR
When the PSLVERR_RESP_EN parameter is set to 1, the device generates
an error response if an attempt is made to read an empty RBR (Receive
Buffer Register) while the FIFO is enabled.
In serial8250_do_startup(), calling serial_port_out(port, UART_LCR,
UART_LCR_WLEN8) triggers dw8250_check_lcr(), which invokes
dw8250_force_idle() and serial8250_clear_and_reinit_fifos(). The latter
function enables the FIFO via serial_out(p, UART_FCR, p->fcr).
Execution proceeds to the serial_port_in(port, UART_RX).
This satisfies the PSLVERR trigger condition.
When another CPU (e.g., using printk()) is accessing the UART (UART
is busy), the current CPU fails the check (value & ~UART_LCR_SPAR) ==
(lcr & ~UART_LCR_SPAR) in dw8250_check_lcr(), causing it to enter
dw8250_force_idle().
Put serial_port_out(port, UART_LCR, UART_LCR_WLEN8) under the port->lock
to fix this issue.
Panic backtrace:
[ 0.442336] Oops - unknown exception [#1]
[ 0.442343] epc : dw8250_serial_in32+0x1e/0x4a
[ 0.442351] ra : serial8250_do_startup+0x2c8/0x88e
...
[ 0.442416] console_on_rootfs+0x26/0x70
Fixes: c49436b657d0 ("serial: 8250_dw: Improve unwritable LCR workaround")
Link: https://lore.kernel.org/all/84cydt5peu.fsf@jogness.linutronix.de/T/
Signed-off-by: Yunhui Cui <cuiyunhui(a)bytedance.com>
Reviewed-by: John Ogness <john.ogness(a)linutronix.de>
Cc: stable <stable(a)kernel.org>
Link: https://lore.kernel.org/r/20250723023322.464-2-cuiyunhui@bytedance.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 7eddcab318b4..2da9db960d09 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -2269,9 +2269,9 @@ static void serial8250_initialize(struct uart_port *port)
{
unsigned long flags;
+ uart_port_lock_irqsave(port, &flags);
serial_port_out(port, UART_LCR, UART_LCR_WLEN8);
- uart_port_lock_irqsave(port, &flags);
serial8250_init_mctrl(port);
serial8250_iir_txen_test(port);
uart_port_unlock_irqrestore(port, flags);
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 7f8fdd4dbffc05982b96caf586f77a014b2a9353
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081846-exquisite-previous-f76e@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7f8fdd4dbffc05982b96caf586f77a014b2a9353 Mon Sep 17 00:00:00 2001
From: Yunhui Cui <cuiyunhui(a)bytedance.com>
Date: Wed, 23 Jul 2025 10:33:22 +0800
Subject: [PATCH] serial: 8250: fix panic due to PSLVERR
When the PSLVERR_RESP_EN parameter is set to 1, the device generates
an error response if an attempt is made to read an empty RBR (Receive
Buffer Register) while the FIFO is enabled.
In serial8250_do_startup(), calling serial_port_out(port, UART_LCR,
UART_LCR_WLEN8) triggers dw8250_check_lcr(), which invokes
dw8250_force_idle() and serial8250_clear_and_reinit_fifos(). The latter
function enables the FIFO via serial_out(p, UART_FCR, p->fcr).
Execution proceeds to the serial_port_in(port, UART_RX).
This satisfies the PSLVERR trigger condition.
When another CPU (e.g., using printk()) is accessing the UART (UART
is busy), the current CPU fails the check (value & ~UART_LCR_SPAR) ==
(lcr & ~UART_LCR_SPAR) in dw8250_check_lcr(), causing it to enter
dw8250_force_idle().
Put serial_port_out(port, UART_LCR, UART_LCR_WLEN8) under the port->lock
to fix this issue.
Panic backtrace:
[ 0.442336] Oops - unknown exception [#1]
[ 0.442343] epc : dw8250_serial_in32+0x1e/0x4a
[ 0.442351] ra : serial8250_do_startup+0x2c8/0x88e
...
[ 0.442416] console_on_rootfs+0x26/0x70
Fixes: c49436b657d0 ("serial: 8250_dw: Improve unwritable LCR workaround")
Link: https://lore.kernel.org/all/84cydt5peu.fsf@jogness.linutronix.de/T/
Signed-off-by: Yunhui Cui <cuiyunhui(a)bytedance.com>
Reviewed-by: John Ogness <john.ogness(a)linutronix.de>
Cc: stable <stable(a)kernel.org>
Link: https://lore.kernel.org/r/20250723023322.464-2-cuiyunhui@bytedance.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 7eddcab318b4..2da9db960d09 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -2269,9 +2269,9 @@ static void serial8250_initialize(struct uart_port *port)
{
unsigned long flags;
+ uart_port_lock_irqsave(port, &flags);
serial_port_out(port, UART_LCR, UART_LCR_WLEN8);
- uart_port_lock_irqsave(port, &flags);
serial8250_init_mctrl(port);
serial8250_iir_txen_test(port);
uart_port_unlock_irqrestore(port, flags);
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 1e61f6ab08786d66a11cfc51e13d6f08a6b06c56
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081830-selected-dandelion-46ec@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1e61f6ab08786d66a11cfc51e13d6f08a6b06c56 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel(a)collabora.com>
Date: Fri, 4 Jul 2025 19:55:06 +0200
Subject: [PATCH] usb: typec: fusb302: cache PD RX state
This patch fixes a race condition communication error, which ends up in
PD hard resets when losing the race. Some systems, like the Radxa ROCK
5B are powered through USB-C without any backup power source and use a
FUSB302 chip to do the PD negotiation. This means it is quite important
to avoid hard resets, since that effectively kills the system's
power-supply.
I've found the following race condition while debugging unplanned power
loss during booting the board every now and then:
1. lots of TCPM/FUSB302/PD initialization stuff
2. TCPM ends up in SNK_WAIT_CAPABILITIES (tcpm_set_pd_rx is enabled here)
3. the remote PD source does not send anything, so TCPM does a SOFT RESET
4. TCPM ends up in SNK_WAIT_CAPABILITIES for the second time
(tcpm_set_pd_rx is enabled again, even though it is still on)
At this point I've seen broken CRC good messages being send by the
FUSB302 with a logic analyzer sniffing the CC lines. Also it looks like
messages are being lost and things generally going haywire with one of
the two sides doing a hard reset once a broken CRC good message was send
to the bus.
I think the system is running into a race condition, that the FIFOs are
being cleared and/or the automatic good CRC message generation flag is
being updated while a message is already arriving.
Let's avoid this by caching the PD RX enabled state, as we have already
processed anything in the FIFOs and are in a good state. As a side
effect that this also optimizes I2C bus usage :)
As far as I can tell the problem theoretically also exists when TCPM
enters SNK_WAIT_CAPABILITIES the first time, but I believe this is less
critical for the following reason:
On devices like the ROCK 5B, which are powered through a TCPM backed
USB-C port, the bootloader must have done some prior PD communication
(initial communication must happen within 5 seconds after plugging the
USB-C plug). This means the first time the kernel TCPM state machine
reaches SNK_WAIT_CAPABILITIES, the remote side is not sending messages
actively. On other devices a hard reset simply adds some extra delay and
things should be good afterwards.
Fixes: c034a43e72dda ("staging: typec: Fairchild FUSB302 Type-c chip driver")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel(a)collabora.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
Link: https://lore.kernel.org/r/20250704-fusb302-race-condition-fix-v1-1-239012c0…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/typec/tcpm/fusb302.c b/drivers/usb/typec/tcpm/fusb302.c
index f2801279c4b5..a4ff2403ddd6 100644
--- a/drivers/usb/typec/tcpm/fusb302.c
+++ b/drivers/usb/typec/tcpm/fusb302.c
@@ -104,6 +104,7 @@ struct fusb302_chip {
bool vconn_on;
bool vbus_on;
bool charge_on;
+ bool pd_rx_on;
bool vbus_present;
enum typec_cc_polarity cc_polarity;
enum typec_cc_status cc1;
@@ -841,6 +842,11 @@ static int tcpm_set_pd_rx(struct tcpc_dev *dev, bool on)
int ret = 0;
mutex_lock(&chip->lock);
+ if (chip->pd_rx_on == on) {
+ fusb302_log(chip, "pd is already %s", str_on_off(on));
+ goto done;
+ }
+
ret = fusb302_pd_rx_flush(chip);
if (ret < 0) {
fusb302_log(chip, "cannot flush pd rx buffer, ret=%d", ret);
@@ -863,6 +869,8 @@ static int tcpm_set_pd_rx(struct tcpc_dev *dev, bool on)
str_on_off(on), ret);
goto done;
}
+
+ chip->pd_rx_on = on;
fusb302_log(chip, "pd := %s", str_on_off(on));
done:
mutex_unlock(&chip->lock);
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 7f8fdd4dbffc05982b96caf586f77a014b2a9353
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081846-vanquish-fastball-b7d1@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7f8fdd4dbffc05982b96caf586f77a014b2a9353 Mon Sep 17 00:00:00 2001
From: Yunhui Cui <cuiyunhui(a)bytedance.com>
Date: Wed, 23 Jul 2025 10:33:22 +0800
Subject: [PATCH] serial: 8250: fix panic due to PSLVERR
When the PSLVERR_RESP_EN parameter is set to 1, the device generates
an error response if an attempt is made to read an empty RBR (Receive
Buffer Register) while the FIFO is enabled.
In serial8250_do_startup(), calling serial_port_out(port, UART_LCR,
UART_LCR_WLEN8) triggers dw8250_check_lcr(), which invokes
dw8250_force_idle() and serial8250_clear_and_reinit_fifos(). The latter
function enables the FIFO via serial_out(p, UART_FCR, p->fcr).
Execution proceeds to the serial_port_in(port, UART_RX).
This satisfies the PSLVERR trigger condition.
When another CPU (e.g., using printk()) is accessing the UART (UART
is busy), the current CPU fails the check (value & ~UART_LCR_SPAR) ==
(lcr & ~UART_LCR_SPAR) in dw8250_check_lcr(), causing it to enter
dw8250_force_idle().
Put serial_port_out(port, UART_LCR, UART_LCR_WLEN8) under the port->lock
to fix this issue.
Panic backtrace:
[ 0.442336] Oops - unknown exception [#1]
[ 0.442343] epc : dw8250_serial_in32+0x1e/0x4a
[ 0.442351] ra : serial8250_do_startup+0x2c8/0x88e
...
[ 0.442416] console_on_rootfs+0x26/0x70
Fixes: c49436b657d0 ("serial: 8250_dw: Improve unwritable LCR workaround")
Link: https://lore.kernel.org/all/84cydt5peu.fsf@jogness.linutronix.de/T/
Signed-off-by: Yunhui Cui <cuiyunhui(a)bytedance.com>
Reviewed-by: John Ogness <john.ogness(a)linutronix.de>
Cc: stable <stable(a)kernel.org>
Link: https://lore.kernel.org/r/20250723023322.464-2-cuiyunhui@bytedance.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 7eddcab318b4..2da9db960d09 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -2269,9 +2269,9 @@ static void serial8250_initialize(struct uart_port *port)
{
unsigned long flags;
+ uart_port_lock_irqsave(port, &flags);
serial_port_out(port, UART_LCR, UART_LCR_WLEN8);
- uart_port_lock_irqsave(port, &flags);
serial8250_init_mctrl(port);
serial8250_iir_txen_test(port);
uart_port_unlock_irqrestore(port, flags);
The patch below does not apply to the 6.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.15.y
git checkout FETCH_HEAD
git cherry-pick -x 7f8fdd4dbffc05982b96caf586f77a014b2a9353
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081845-enlarging-goldsmith-455a@gregkh' --subject-prefix 'PATCH 6.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7f8fdd4dbffc05982b96caf586f77a014b2a9353 Mon Sep 17 00:00:00 2001
From: Yunhui Cui <cuiyunhui(a)bytedance.com>
Date: Wed, 23 Jul 2025 10:33:22 +0800
Subject: [PATCH] serial: 8250: fix panic due to PSLVERR
When the PSLVERR_RESP_EN parameter is set to 1, the device generates
an error response if an attempt is made to read an empty RBR (Receive
Buffer Register) while the FIFO is enabled.
In serial8250_do_startup(), calling serial_port_out(port, UART_LCR,
UART_LCR_WLEN8) triggers dw8250_check_lcr(), which invokes
dw8250_force_idle() and serial8250_clear_and_reinit_fifos(). The latter
function enables the FIFO via serial_out(p, UART_FCR, p->fcr).
Execution proceeds to the serial_port_in(port, UART_RX).
This satisfies the PSLVERR trigger condition.
When another CPU (e.g., using printk()) is accessing the UART (UART
is busy), the current CPU fails the check (value & ~UART_LCR_SPAR) ==
(lcr & ~UART_LCR_SPAR) in dw8250_check_lcr(), causing it to enter
dw8250_force_idle().
Put serial_port_out(port, UART_LCR, UART_LCR_WLEN8) under the port->lock
to fix this issue.
Panic backtrace:
[ 0.442336] Oops - unknown exception [#1]
[ 0.442343] epc : dw8250_serial_in32+0x1e/0x4a
[ 0.442351] ra : serial8250_do_startup+0x2c8/0x88e
...
[ 0.442416] console_on_rootfs+0x26/0x70
Fixes: c49436b657d0 ("serial: 8250_dw: Improve unwritable LCR workaround")
Link: https://lore.kernel.org/all/84cydt5peu.fsf@jogness.linutronix.de/T/
Signed-off-by: Yunhui Cui <cuiyunhui(a)bytedance.com>
Reviewed-by: John Ogness <john.ogness(a)linutronix.de>
Cc: stable <stable(a)kernel.org>
Link: https://lore.kernel.org/r/20250723023322.464-2-cuiyunhui@bytedance.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 7eddcab318b4..2da9db960d09 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -2269,9 +2269,9 @@ static void serial8250_initialize(struct uart_port *port)
{
unsigned long flags;
+ uart_port_lock_irqsave(port, &flags);
serial_port_out(port, UART_LCR, UART_LCR_WLEN8);
- uart_port_lock_irqsave(port, &flags);
serial8250_init_mctrl(port);
serial8250_iir_txen_test(port);
uart_port_unlock_irqrestore(port, flags);
The patch below does not apply to the 6.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.16.y
git checkout FETCH_HEAD
git cherry-pick -x 7f8fdd4dbffc05982b96caf586f77a014b2a9353
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081844-wimp-jubilance-539e@gregkh' --subject-prefix 'PATCH 6.16.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7f8fdd4dbffc05982b96caf586f77a014b2a9353 Mon Sep 17 00:00:00 2001
From: Yunhui Cui <cuiyunhui(a)bytedance.com>
Date: Wed, 23 Jul 2025 10:33:22 +0800
Subject: [PATCH] serial: 8250: fix panic due to PSLVERR
When the PSLVERR_RESP_EN parameter is set to 1, the device generates
an error response if an attempt is made to read an empty RBR (Receive
Buffer Register) while the FIFO is enabled.
In serial8250_do_startup(), calling serial_port_out(port, UART_LCR,
UART_LCR_WLEN8) triggers dw8250_check_lcr(), which invokes
dw8250_force_idle() and serial8250_clear_and_reinit_fifos(). The latter
function enables the FIFO via serial_out(p, UART_FCR, p->fcr).
Execution proceeds to the serial_port_in(port, UART_RX).
This satisfies the PSLVERR trigger condition.
When another CPU (e.g., using printk()) is accessing the UART (UART
is busy), the current CPU fails the check (value & ~UART_LCR_SPAR) ==
(lcr & ~UART_LCR_SPAR) in dw8250_check_lcr(), causing it to enter
dw8250_force_idle().
Put serial_port_out(port, UART_LCR, UART_LCR_WLEN8) under the port->lock
to fix this issue.
Panic backtrace:
[ 0.442336] Oops - unknown exception [#1]
[ 0.442343] epc : dw8250_serial_in32+0x1e/0x4a
[ 0.442351] ra : serial8250_do_startup+0x2c8/0x88e
...
[ 0.442416] console_on_rootfs+0x26/0x70
Fixes: c49436b657d0 ("serial: 8250_dw: Improve unwritable LCR workaround")
Link: https://lore.kernel.org/all/84cydt5peu.fsf@jogness.linutronix.de/T/
Signed-off-by: Yunhui Cui <cuiyunhui(a)bytedance.com>
Reviewed-by: John Ogness <john.ogness(a)linutronix.de>
Cc: stable <stable(a)kernel.org>
Link: https://lore.kernel.org/r/20250723023322.464-2-cuiyunhui@bytedance.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 7eddcab318b4..2da9db960d09 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -2269,9 +2269,9 @@ static void serial8250_initialize(struct uart_port *port)
{
unsigned long flags;
+ uart_port_lock_irqsave(port, &flags);
serial_port_out(port, UART_LCR, UART_LCR_WLEN8);
- uart_port_lock_irqsave(port, &flags);
serial8250_init_mctrl(port);
serial8250_iir_txen_test(port);
uart_port_unlock_irqrestore(port, flags);
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 9d5eff7821f6d70f7d1b4d8a60680fba4de868a7
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081845-cork-enable-a1e8@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 9d5eff7821f6d70f7d1b4d8a60680fba4de868a7 Mon Sep 17 00:00:00 2001
From: Shyam Prasad N <sprasad(a)microsoft.com>
Date: Thu, 17 Jul 2025 17:36:13 +0530
Subject: [PATCH] cifs: reset iface weights when we cannot find a candidate
We now do a weighted selection of server interfaces when allocating
new channels. The weights are decided based on the speed advertised.
The fulfilled weight for an interface is a counter that is used to
track the interface selection. It should be reset back to zero once
all interfaces fulfilling their weight.
In cifs_chan_update_iface, this reset logic was missing. As a result
when the server interface list changes, the client may not be able
to find a new candidate for other channels after all interfaces have
been fulfilled.
Fixes: a6d8fb54a515 ("cifs: distribute channels across interfaces based on speed")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Shyam Prasad N <sprasad(a)microsoft.com>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c
index 330bc3d25bad..0a8c2fcc9ded 100644
--- a/fs/smb/client/sess.c
+++ b/fs/smb/client/sess.c
@@ -332,6 +332,7 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
struct cifs_server_iface *old_iface = NULL;
struct cifs_server_iface *last_iface = NULL;
struct sockaddr_storage ss;
+ int retry = 0;
spin_lock(&ses->chan_lock);
chan_index = cifs_ses_get_chan_index(ses, server);
@@ -360,6 +361,7 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
return;
}
+try_again:
last_iface = list_last_entry(&ses->iface_list, struct cifs_server_iface,
iface_head);
iface_min_speed = last_iface->speed;
@@ -397,6 +399,13 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
}
if (list_entry_is_head(iface, &ses->iface_list, iface_head)) {
+ list_for_each_entry(iface, &ses->iface_list, iface_head)
+ iface->weight_fulfilled = 0;
+
+ /* see if it can be satisfied in second attempt */
+ if (!retry++)
+ goto try_again;
+
iface = NULL;
cifs_dbg(FYI, "unable to find a suitable iface\n");
}
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 7e6c3130690a01076efdf45aa02ba5d5c16849a0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081833-booting-yelling-d0fa@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7e6c3130690a01076efdf45aa02ba5d5c16849a0 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj(a)kernel.org>
Date: Sun, 20 Jul 2025 11:58:22 -0700
Subject: [PATCH] mm/damon/ops-common: ignore migration request to invalid
nodes
damon_migrate_pages() tries migration even if the target node is invalid.
If users mistakenly make such invalid requests via
DAMOS_MIGRATE_{HOT,COLD} action, the below kernel BUG can happen.
[ 7831.883495] BUG: unable to handle page fault for address: 0000000000001f48
[ 7831.884160] #PF: supervisor read access in kernel mode
[ 7831.884681] #PF: error_code(0x0000) - not-present page
[ 7831.885203] PGD 0 P4D 0
[ 7831.885468] Oops: Oops: 0000 [#1] SMP PTI
[ 7831.885852] CPU: 31 UID: 0 PID: 94202 Comm: kdamond.0 Not tainted 6.16.0-rc5-mm-new-damon+ #93 PREEMPT(voluntary)
[ 7831.886913] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-4.el9 04/01/2014
[ 7831.887777] RIP: 0010:__alloc_frozen_pages_noprof (include/linux/mmzone.h:1724 include/linux/mmzone.h:1750 mm/page_alloc.c:4936 mm/page_alloc.c:5137)
[...]
[ 7831.895953] Call Trace:
[ 7831.896195] <TASK>
[ 7831.896397] __folio_alloc_noprof (mm/page_alloc.c:5183 mm/page_alloc.c:5192)
[ 7831.896787] migrate_pages_batch (mm/migrate.c:1189 mm/migrate.c:1851)
[ 7831.897228] ? __pfx_alloc_migration_target (mm/migrate.c:2137)
[ 7831.897735] migrate_pages (mm/migrate.c:2078)
[ 7831.898141] ? __pfx_alloc_migration_target (mm/migrate.c:2137)
[ 7831.898664] damon_migrate_folio_list (mm/damon/ops-common.c:321 mm/damon/ops-common.c:354)
[ 7831.899140] damon_migrate_pages (mm/damon/ops-common.c:405)
[...]
Add a target node validity check in damon_migrate_pages(). The validity
check is stolen from that of do_pages_move(), which is being used for the
move_pages() system call.
Link: https://lkml.kernel.org/r/20250720185822.1451-1-sj@kernel.org
Fixes: b51820ebea65 ("mm/damon/paddr: introduce DAMOS_MIGRATE_COLD action for demotion") [6.11.x]
Signed-off-by: SeongJae Park <sj(a)kernel.org>
Reviewed-by: Joshua Hahn <joshua.hahnjy(a)gmail.com>
Cc: Honggyu Kim <honggyu.kim(a)sk.com>
Cc: Hyeongtak Ji <hyeongtak.ji(a)sk.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/damon/ops-common.c b/mm/damon/ops-common.c
index 6a9797d1d7ff..99321ff5cb92 100644
--- a/mm/damon/ops-common.c
+++ b/mm/damon/ops-common.c
@@ -383,6 +383,10 @@ unsigned long damon_migrate_pages(struct list_head *folio_list, int target_nid)
if (list_empty(folio_list))
return nr_migrated;
+ if (target_nid < 0 || target_nid >= MAX_NUMNODES ||
+ !node_state(target_nid, N_MEMORY))
+ return nr_migrated;
+
noreclaim_flag = memalloc_noreclaim_save();
nid = folio_nid(lru_to_folio(folio_list));
The patch below does not apply to the 6.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.15.y
git checkout FETCH_HEAD
git cherry-pick -x 7e6c3130690a01076efdf45aa02ba5d5c16849a0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081832-splendor-carve-f25b@gregkh' --subject-prefix 'PATCH 6.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7e6c3130690a01076efdf45aa02ba5d5c16849a0 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj(a)kernel.org>
Date: Sun, 20 Jul 2025 11:58:22 -0700
Subject: [PATCH] mm/damon/ops-common: ignore migration request to invalid
nodes
damon_migrate_pages() tries migration even if the target node is invalid.
If users mistakenly make such invalid requests via
DAMOS_MIGRATE_{HOT,COLD} action, the below kernel BUG can happen.
[ 7831.883495] BUG: unable to handle page fault for address: 0000000000001f48
[ 7831.884160] #PF: supervisor read access in kernel mode
[ 7831.884681] #PF: error_code(0x0000) - not-present page
[ 7831.885203] PGD 0 P4D 0
[ 7831.885468] Oops: Oops: 0000 [#1] SMP PTI
[ 7831.885852] CPU: 31 UID: 0 PID: 94202 Comm: kdamond.0 Not tainted 6.16.0-rc5-mm-new-damon+ #93 PREEMPT(voluntary)
[ 7831.886913] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-4.el9 04/01/2014
[ 7831.887777] RIP: 0010:__alloc_frozen_pages_noprof (include/linux/mmzone.h:1724 include/linux/mmzone.h:1750 mm/page_alloc.c:4936 mm/page_alloc.c:5137)
[...]
[ 7831.895953] Call Trace:
[ 7831.896195] <TASK>
[ 7831.896397] __folio_alloc_noprof (mm/page_alloc.c:5183 mm/page_alloc.c:5192)
[ 7831.896787] migrate_pages_batch (mm/migrate.c:1189 mm/migrate.c:1851)
[ 7831.897228] ? __pfx_alloc_migration_target (mm/migrate.c:2137)
[ 7831.897735] migrate_pages (mm/migrate.c:2078)
[ 7831.898141] ? __pfx_alloc_migration_target (mm/migrate.c:2137)
[ 7831.898664] damon_migrate_folio_list (mm/damon/ops-common.c:321 mm/damon/ops-common.c:354)
[ 7831.899140] damon_migrate_pages (mm/damon/ops-common.c:405)
[...]
Add a target node validity check in damon_migrate_pages(). The validity
check is stolen from that of do_pages_move(), which is being used for the
move_pages() system call.
Link: https://lkml.kernel.org/r/20250720185822.1451-1-sj@kernel.org
Fixes: b51820ebea65 ("mm/damon/paddr: introduce DAMOS_MIGRATE_COLD action for demotion") [6.11.x]
Signed-off-by: SeongJae Park <sj(a)kernel.org>
Reviewed-by: Joshua Hahn <joshua.hahnjy(a)gmail.com>
Cc: Honggyu Kim <honggyu.kim(a)sk.com>
Cc: Hyeongtak Ji <hyeongtak.ji(a)sk.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/damon/ops-common.c b/mm/damon/ops-common.c
index 6a9797d1d7ff..99321ff5cb92 100644
--- a/mm/damon/ops-common.c
+++ b/mm/damon/ops-common.c
@@ -383,6 +383,10 @@ unsigned long damon_migrate_pages(struct list_head *folio_list, int target_nid)
if (list_empty(folio_list))
return nr_migrated;
+ if (target_nid < 0 || target_nid >= MAX_NUMNODES ||
+ !node_state(target_nid, N_MEMORY))
+ return nr_migrated;
+
noreclaim_flag = memalloc_noreclaim_save();
nid = folio_nid(lru_to_folio(folio_list));
The patch below does not apply to the 6.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.16.y
git checkout FETCH_HEAD
git cherry-pick -x 7e6c3130690a01076efdf45aa02ba5d5c16849a0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081831-singular-geologist-93a6@gregkh' --subject-prefix 'PATCH 6.16.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7e6c3130690a01076efdf45aa02ba5d5c16849a0 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj(a)kernel.org>
Date: Sun, 20 Jul 2025 11:58:22 -0700
Subject: [PATCH] mm/damon/ops-common: ignore migration request to invalid
nodes
damon_migrate_pages() tries migration even if the target node is invalid.
If users mistakenly make such invalid requests via
DAMOS_MIGRATE_{HOT,COLD} action, the below kernel BUG can happen.
[ 7831.883495] BUG: unable to handle page fault for address: 0000000000001f48
[ 7831.884160] #PF: supervisor read access in kernel mode
[ 7831.884681] #PF: error_code(0x0000) - not-present page
[ 7831.885203] PGD 0 P4D 0
[ 7831.885468] Oops: Oops: 0000 [#1] SMP PTI
[ 7831.885852] CPU: 31 UID: 0 PID: 94202 Comm: kdamond.0 Not tainted 6.16.0-rc5-mm-new-damon+ #93 PREEMPT(voluntary)
[ 7831.886913] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-4.el9 04/01/2014
[ 7831.887777] RIP: 0010:__alloc_frozen_pages_noprof (include/linux/mmzone.h:1724 include/linux/mmzone.h:1750 mm/page_alloc.c:4936 mm/page_alloc.c:5137)
[...]
[ 7831.895953] Call Trace:
[ 7831.896195] <TASK>
[ 7831.896397] __folio_alloc_noprof (mm/page_alloc.c:5183 mm/page_alloc.c:5192)
[ 7831.896787] migrate_pages_batch (mm/migrate.c:1189 mm/migrate.c:1851)
[ 7831.897228] ? __pfx_alloc_migration_target (mm/migrate.c:2137)
[ 7831.897735] migrate_pages (mm/migrate.c:2078)
[ 7831.898141] ? __pfx_alloc_migration_target (mm/migrate.c:2137)
[ 7831.898664] damon_migrate_folio_list (mm/damon/ops-common.c:321 mm/damon/ops-common.c:354)
[ 7831.899140] damon_migrate_pages (mm/damon/ops-common.c:405)
[...]
Add a target node validity check in damon_migrate_pages(). The validity
check is stolen from that of do_pages_move(), which is being used for the
move_pages() system call.
Link: https://lkml.kernel.org/r/20250720185822.1451-1-sj@kernel.org
Fixes: b51820ebea65 ("mm/damon/paddr: introduce DAMOS_MIGRATE_COLD action for demotion") [6.11.x]
Signed-off-by: SeongJae Park <sj(a)kernel.org>
Reviewed-by: Joshua Hahn <joshua.hahnjy(a)gmail.com>
Cc: Honggyu Kim <honggyu.kim(a)sk.com>
Cc: Hyeongtak Ji <hyeongtak.ji(a)sk.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/damon/ops-common.c b/mm/damon/ops-common.c
index 6a9797d1d7ff..99321ff5cb92 100644
--- a/mm/damon/ops-common.c
+++ b/mm/damon/ops-common.c
@@ -383,6 +383,10 @@ unsigned long damon_migrate_pages(struct list_head *folio_list, int target_nid)
if (list_empty(folio_list))
return nr_migrated;
+ if (target_nid < 0 || target_nid >= MAX_NUMNODES ||
+ !node_state(target_nid, N_MEMORY))
+ return nr_migrated;
+
noreclaim_flag = memalloc_noreclaim_save();
nid = folio_nid(lru_to_folio(folio_list));
Hello,
New build issue found on stable-rc/linux-6.1.y:
---
call to undeclared function 'BIT_U32'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] in drivers/net/can/ti_hecc.o (drivers/net/can/ti_hecc.c) [logspec:kbuild,kbuild.compiler.error]
---
- dashboard: https://d.kernelci.org/i/maestro:8fd2dd605b4f4e3fdfb2bb48ed2965f305259b0d
- giturl: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
- commit HEAD: de81846a25749c1fa21ba68cf4938d4309cfbcef
Log excerpt:
=====================================================
drivers/net/can/ti_hecc.c:387:14: error: call to undeclared function 'BIT_U32'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
387 | mbx_mask = ~BIT_U32(HECC_RX_LAST_MBOX);
| ^
1 error generated.
=====================================================
# Builds where the incident occurred:
## defconfig+allmodconfig+CONFIG_FRAME_WARN=2048 on (arm):
- compiler: clang-17
- dashboard: https://d.kernelci.org/build/maestro:68a32682233e484a3f9ea0aa
#kernelci issue maestro:8fd2dd605b4f4e3fdfb2bb48ed2965f305259b0d
Reported-by: kernelci.org bot <bot(a)kernelci.org>
--
This is an experimental report format. Please send feedback in!
Talk to us at kernelci(a)lists.linux.dev
Made with love by the KernelCI team - https://kernelci.org
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x e2d18cbf178775ad377ad88ee55e6e183c38d262
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081818-skilled-timid-4660@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e2d18cbf178775ad377ad88ee55e6e183c38d262 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka(a)suse.cz>
Date: Mon, 2 Jun 2025 13:02:12 +0200
Subject: [PATCH] mm, slab: restore NUMA policy support for large kmalloc
The slab allocator observes the task's NUMA policy in various places
such as allocating slab pages. Large kmalloc() allocations used to do
that too, until an unintended change by c4cab557521a ("mm/slab_common:
cleanup kmalloc_large()") resulted in ignoring mempolicy and just
preferring the local node. Restore the NUMA policy support.
Fixes: c4cab557521a ("mm/slab_common: cleanup kmalloc_large()")
Cc: <stable(a)vger.kernel.org>
Acked-by: Christoph Lameter (Ampere) <cl(a)gentwo.org>
Acked-by: Roman Gushchin <roman.gushchin(a)linux.dev>
Reviewed-by: Harry Yoo <harry.yoo(a)oracle.com>
Signed-off-by: Vlastimil Babka <vbabka(a)suse.cz>
diff --git a/mm/slub.c b/mm/slub.c
index 31e11ef256f9..06d64a5fb1bf 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4269,7 +4269,12 @@ static void *___kmalloc_large_node(size_t size, gfp_t flags, int node)
flags = kmalloc_fix_flags(flags);
flags |= __GFP_COMP;
- folio = (struct folio *)alloc_pages_node_noprof(node, flags, order);
+
+ if (node == NUMA_NO_NODE)
+ folio = (struct folio *)alloc_pages_noprof(flags, order);
+ else
+ folio = (struct folio *)__alloc_pages_noprof(flags, order, node, NULL);
+
if (folio) {
ptr = folio_address(folio);
lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B,
From: Mikhail Lobanov <m.lobanov(a)rosa.ru>
[ Upstream commit 16ee3ea8faef8ff042acc15867a6c458c573de61 ]
When userspace sets supported rates for a new station via
NL80211_CMD_NEW_STATION, it might send a list that's empty
or contains only invalid values. Currently, we process these
values in sta_link_apply_parameters() without checking the result of
ieee80211_parse_bitrates(), which can lead to an empty rates bitmap.
A similar issue was addressed for NL80211_CMD_SET_BSS in commit
ce04abc3fcc6 ("wifi: mac80211: check basic rates validity").
This patch applies the same approach in sta_link_apply_parameters()
for NL80211_CMD_NEW_STATION, ensuring there is at least one valid
rate by inspecting the result of ieee80211_parse_bitrates().
Found by Linux Verification Center (linuxtesting.org) with Syzkaller.
[ Summary of conflict resolutions:
- The function ieee80211_parse_bitrates() takes channel width as
its first parameter, and the chandef struct has been refactored
in kernel version 6.9, in commit
6092077ad09ce880c61735c314060f0bd79ae4aa so that the width is
contained in chanreq.oper.width. In kernel version 6.6 the
width parameter is defined directly in the chandef struct. ]
Fixes: b95eb7f0eee4 ("wifi: cfg80211/mac80211: separate link params from station params")
Signed-off-by: Mikhail Lobanov <m.lobanov(a)rosa.ru>
Link: https://patch.msgid.link/20250317103139.17625-1-m.lobanov@rosa.ru
Signed-off-by: Johannes Berg <johannes.berg(a)intel.com>
Signed-off-by: Hanne-Lotta Mäenpää <hannelotta(a)gmail.com>
---
net/mac80211/cfg.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 3ff7f38394a6..1addfba4b285 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1847,12 +1847,12 @@ static int sta_link_apply_parameters(struct ieee80211_local *local,
}
if (params->supported_rates &&
- params->supported_rates_len) {
- ieee80211_parse_bitrates(link->conf->chandef.width,
- sband, params->supported_rates,
- params->supported_rates_len,
- &link_sta->pub->supp_rates[sband->band]);
- }
+ params->supported_rates_len &&
+ !ieee80211_parse_bitrates(link->conf->chandef.width,
+ sband, params->supported_rates,
+ params->supported_rates_len,
+ &link_sta->pub->supp_rates[sband->band]))
+ return -EINVAL;
if (params->ht_capa)
ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband,
--
2.50.0
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 41b70df5b38bc80967d2e0ed55cc3c3896bba781
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081549-shorter-borrower-941d@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 41b70df5b38bc80967d2e0ed55cc3c3896bba781 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe(a)kernel.dk>
Date: Tue, 12 Aug 2025 08:30:11 -0600
Subject: [PATCH] io_uring/net: commit partial buffers on retry
Ring provided buffers are potentially only valid within the single
execution context in which they were acquired. io_uring deals with this
and invalidates them on retry. But on the networking side, if
MSG_WAITALL is set, or if the socket is of the streaming type and too
little was processed, then it will hang on to the buffer rather than
recycle or commit it. This is problematic for two reasons:
1) If someone unregisters the provided buffer ring before a later retry,
then the req->buf_list will no longer be valid.
2) If multiple sockers are using the same buffer group, then multiple
receives can consume the same memory. This can cause data corruption
in the application, as either receive could land in the same
userspace buffer.
Fix this by disallowing partial retries from pinning a provided buffer
across multiple executions, if ring provided buffers are used.
Cc: stable(a)vger.kernel.org
Reported-by: pt x <superman.xpt(a)gmail.com>
Fixes: c56e022c0a27 ("io_uring: add support for user mapped provided buffer ring")
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/net.c b/io_uring/net.c
index dd96e355982f..d69f2afa4f7a 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -494,6 +494,15 @@ static int io_bundle_nbufs(struct io_async_msghdr *kmsg, int ret)
return nbufs;
}
+static int io_net_kbuf_recyle(struct io_kiocb *req,
+ struct io_async_msghdr *kmsg, int len)
+{
+ req->flags |= REQ_F_BL_NO_RECYCLE;
+ if (req->flags & REQ_F_BUFFERS_COMMIT)
+ io_kbuf_commit(req, req->buf_list, len, io_bundle_nbufs(kmsg, len));
+ return IOU_RETRY;
+}
+
static inline bool io_send_finish(struct io_kiocb *req, int *ret,
struct io_async_msghdr *kmsg,
unsigned issue_flags)
@@ -562,8 +571,7 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
kmsg->msg.msg_controllen = 0;
kmsg->msg.msg_control = NULL;
sr->done_io += ret;
- req->flags |= REQ_F_BL_NO_RECYCLE;
- return -EAGAIN;
+ return io_net_kbuf_recyle(req, kmsg, ret);
}
if (ret == -ERESTARTSYS)
ret = -EINTR;
@@ -674,8 +682,7 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags)
sr->len -= ret;
sr->buf += ret;
sr->done_io += ret;
- req->flags |= REQ_F_BL_NO_RECYCLE;
- return -EAGAIN;
+ return io_net_kbuf_recyle(req, kmsg, ret);
}
if (ret == -ERESTARTSYS)
ret = -EINTR;
@@ -1071,8 +1078,7 @@ int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
}
if (ret > 0 && io_net_retry(sock, flags)) {
sr->done_io += ret;
- req->flags |= REQ_F_BL_NO_RECYCLE;
- return IOU_RETRY;
+ return io_net_kbuf_recyle(req, kmsg, ret);
}
if (ret == -ERESTARTSYS)
ret = -EINTR;
@@ -1218,8 +1224,7 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags)
sr->len -= ret;
sr->buf += ret;
sr->done_io += ret;
- req->flags |= REQ_F_BL_NO_RECYCLE;
- return -EAGAIN;
+ return io_net_kbuf_recyle(req, kmsg, ret);
}
if (ret == -ERESTARTSYS)
ret = -EINTR;
@@ -1500,8 +1505,7 @@ int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
zc->len -= ret;
zc->buf += ret;
zc->done_io += ret;
- req->flags |= REQ_F_BL_NO_RECYCLE;
- return -EAGAIN;
+ return io_net_kbuf_recyle(req, kmsg, ret);
}
if (ret == -ERESTARTSYS)
ret = -EINTR;
@@ -1571,8 +1575,7 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
if (ret > 0 && io_net_retry(sock, flags)) {
sr->done_io += ret;
- req->flags |= REQ_F_BL_NO_RECYCLE;
- return -EAGAIN;
+ return io_net_kbuf_recyle(req, kmsg, ret);
}
if (ret == -ERESTARTSYS)
ret = -EINTR;
Hello,
New build issue found on stable-rc/linux-6.6.y:
---
call to undeclared function 'BIT_U32'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] in drivers/net/can/ti_hecc.o (drivers/net/can/ti_hecc.c) [logspec:kbuild,kbuild.compiler.error]
---
- dashboard: https://d.kernelci.org/i/maestro:8ace23e7f8ec64c36de1851f3a96cbd024dbc75a
- giturl: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
- commit HEAD: 243d4807be1704fef446934001535d37afafd8d7
Log excerpt:
=====================================================
drivers/net/can/ti_hecc.c:386:14: error: call to undeclared function 'BIT_U32'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
386 | mbx_mask = ~BIT_U32(HECC_RX_LAST_MBOX);
| ^
1 error generated.
=====================================================
# Builds where the incident occurred:
## defconfig+allmodconfig+CONFIG_FRAME_WARN=2048 on (arm):
- compiler: clang-17
- dashboard: https://d.kernelci.org/build/maestro:68a3271d233e484a3f9ea12f
#kernelci issue maestro:8ace23e7f8ec64c36de1851f3a96cbd024dbc75a
Reported-by: kernelci.org bot <bot(a)kernelci.org>
--
This is an experimental report format. Please send feedback in!
Talk to us at kernelci(a)lists.linux.dev
Made with love by the KernelCI team - https://kernelci.org
Hello,
New build issue found on stable-rc/linux-5.4.y:
---
implicit declaration of function 'BIT_U32' [-Werror,-Wimplicit-function-declaration] in drivers/net/can/ti_hecc.o (drivers/net/can/ti_hecc.c) [logspec:kbuild,kbuild.compiler.error]
---
- dashboard: https://d.kernelci.org/i/maestro:d43992843dddea5eb6d0e618759db726469a7b6a
- giturl: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
- commit HEAD: a6319f2fe27b8fefe40757d3797cfca30d43ce3c
Log excerpt:
=====================================================
drivers/net/can/ti_hecc.c:396:14: error: implicit declaration of function 'BIT_U32' [-Werror,-Wimplicit-function-declaration]
396 | mbx_mask = ~BIT_U32(HECC_RX_LAST_MBOX);
| ^
1 error generated.
=====================================================
# Builds where the incident occurred:
## defconfig+allmodconfig+CONFIG_FRAME_WARN=2048 on (arm):
- compiler: clang-17
- dashboard: https://d.kernelci.org/build/maestro:68a325ed233e484a3f9ea033
#kernelci issue maestro:d43992843dddea5eb6d0e618759db726469a7b6a
Reported-by: kernelci.org bot <bot(a)kernelci.org>
--
This is an experimental report format. Please send feedback in!
Talk to us at kernelci(a)lists.linux.dev
Made with love by the KernelCI team - https://kernelci.org
The patch below does not apply to the 6.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.15.y
git checkout FETCH_HEAD
git cherry-pick -x 3ee9cebd0a5e7ea47eb35cec95eaa1a866af982d
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081850-culture-uncheck-1048@gregkh' --subject-prefix 'PATCH 6.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3ee9cebd0a5e7ea47eb35cec95eaa1a866af982d Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky(a)amd.com>
Date: Wed, 13 Aug 2025 10:26:59 -0500
Subject: [PATCH] x86/sev: Ensure SVSM reserved fields in a page validation
entry are initialized to zero
In order to support future versions of the SVSM_CORE_PVALIDATE call, all
reserved fields within a PVALIDATE entry must be set to zero as an SVSM should
be ensuring all reserved fields are zero in order to support future usage of
reserved areas based on the protocol version.
Fixes: fcd042e86422 ("x86/sev: Perform PVALIDATE using the SVSM when not at VMPL0")
Signed-off-by: Tom Lendacky <thomas.lendacky(a)amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Reviewed-by: Joerg Roedel <joerg.roedel(a)amd.com>
Cc: <stable(a)kernel.org>
Link: https://lore.kernel.org/7cde412f8b057ea13a646fb166b1ca023f6a5031.1755098819…
diff --git a/arch/x86/boot/startup/sev-shared.c b/arch/x86/boot/startup/sev-shared.c
index 7a706db87b93..4ab0dbd043c6 100644
--- a/arch/x86/boot/startup/sev-shared.c
+++ b/arch/x86/boot/startup/sev-shared.c
@@ -785,6 +785,7 @@ static void __head svsm_pval_4k_page(unsigned long paddr, bool validate)
pc->entry[0].page_size = RMP_PG_SIZE_4K;
pc->entry[0].action = validate;
pc->entry[0].ignore_cf = 0;
+ pc->entry[0].rsvd = 0;
pc->entry[0].pfn = paddr >> PAGE_SHIFT;
/* Protocol 0, Call ID 1 */
diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c
index fc59ce78c477..43ecc6b9fb9c 100644
--- a/arch/x86/coco/sev/core.c
+++ b/arch/x86/coco/sev/core.c
@@ -227,6 +227,7 @@ static u64 svsm_build_ca_from_pfn_range(u64 pfn, u64 pfn_end, bool action,
pe->page_size = RMP_PG_SIZE_4K;
pe->action = action;
pe->ignore_cf = 0;
+ pe->rsvd = 0;
pe->pfn = pfn;
pe++;
@@ -257,6 +258,7 @@ static int svsm_build_ca_from_psc_desc(struct snp_psc_desc *desc, unsigned int d
pe->page_size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K;
pe->action = e->operation == SNP_PAGE_STATE_PRIVATE;
pe->ignore_cf = 0;
+ pe->rsvd = 0;
pe->pfn = e->gfn;
pe++;
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 3ee9cebd0a5e7ea47eb35cec95eaa1a866af982d
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081852-debtless-penniless-395d@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3ee9cebd0a5e7ea47eb35cec95eaa1a866af982d Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky(a)amd.com>
Date: Wed, 13 Aug 2025 10:26:59 -0500
Subject: [PATCH] x86/sev: Ensure SVSM reserved fields in a page validation
entry are initialized to zero
In order to support future versions of the SVSM_CORE_PVALIDATE call, all
reserved fields within a PVALIDATE entry must be set to zero as an SVSM should
be ensuring all reserved fields are zero in order to support future usage of
reserved areas based on the protocol version.
Fixes: fcd042e86422 ("x86/sev: Perform PVALIDATE using the SVSM when not at VMPL0")
Signed-off-by: Tom Lendacky <thomas.lendacky(a)amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Reviewed-by: Joerg Roedel <joerg.roedel(a)amd.com>
Cc: <stable(a)kernel.org>
Link: https://lore.kernel.org/7cde412f8b057ea13a646fb166b1ca023f6a5031.1755098819…
diff --git a/arch/x86/boot/startup/sev-shared.c b/arch/x86/boot/startup/sev-shared.c
index 7a706db87b93..4ab0dbd043c6 100644
--- a/arch/x86/boot/startup/sev-shared.c
+++ b/arch/x86/boot/startup/sev-shared.c
@@ -785,6 +785,7 @@ static void __head svsm_pval_4k_page(unsigned long paddr, bool validate)
pc->entry[0].page_size = RMP_PG_SIZE_4K;
pc->entry[0].action = validate;
pc->entry[0].ignore_cf = 0;
+ pc->entry[0].rsvd = 0;
pc->entry[0].pfn = paddr >> PAGE_SHIFT;
/* Protocol 0, Call ID 1 */
diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c
index fc59ce78c477..43ecc6b9fb9c 100644
--- a/arch/x86/coco/sev/core.c
+++ b/arch/x86/coco/sev/core.c
@@ -227,6 +227,7 @@ static u64 svsm_build_ca_from_pfn_range(u64 pfn, u64 pfn_end, bool action,
pe->page_size = RMP_PG_SIZE_4K;
pe->action = action;
pe->ignore_cf = 0;
+ pe->rsvd = 0;
pe->pfn = pfn;
pe++;
@@ -257,6 +258,7 @@ static int svsm_build_ca_from_psc_desc(struct snp_psc_desc *desc, unsigned int d
pe->page_size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K;
pe->action = e->operation == SNP_PAGE_STATE_PRIVATE;
pe->ignore_cf = 0;
+ pe->rsvd = 0;
pe->pfn = e->gfn;
pe++;
The following commit has been merged into the x86/urgent branch of tip:
Commit-ID: e9576e078220c50ace9e9087355423de23e25fa5
Gitweb: https://git.kernel.org/tip/e9576e078220c50ace9e9087355423de23e25fa5
Author: Yazen Ghannam <yazen.ghannam(a)amd.com>
AuthorDate: Mon, 21 Jul 2025 18:11:54
Committer: Borislav Petkov (AMD) <bp(a)alien8.de>
CommitterDate: Mon, 18 Aug 2025 16:36:59 +02:00
x86/CPU/AMD: Ignore invalid reset reason value
The reset reason value may be "all bits set", e.g. 0xFFFFFFFF. This is a
commonly used error response from hardware. This may occur due to a real
hardware issue or when running in a VM.
The user will see all reset reasons reported in this case.
Check for an error response value and return early to avoid decoding
invalid data.
Also, adjust the data variable type to match the hardware register size.
Fixes: ab8131028710 ("x86/CPU/AMD: Print the reason for the last reset")
Reported-by: Libing He <libhe(a)redhat.com>
Signed-off-by: Yazen Ghannam <yazen.ghannam(a)amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Reviewed-by: Mario Limonciello <mario.limonciello(a)amd.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/20250721181155.3536023-1-yazen.ghannam@amd.com
---
arch/x86/kernel/cpu/amd.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index a5ece6e..a6f88ca 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -1326,8 +1326,8 @@ static const char * const s5_reset_reason_txt[] = {
static __init int print_s5_reset_status_mmio(void)
{
- unsigned long value;
void __iomem *addr;
+ u32 value;
int i;
if (!cpu_feature_enabled(X86_FEATURE_ZEN))
@@ -1340,12 +1340,16 @@ static __init int print_s5_reset_status_mmio(void)
value = ioread32(addr);
iounmap(addr);
+ /* Value with "all bits set" is an error response and should be ignored. */
+ if (value == U32_MAX)
+ return 0;
+
for (i = 0; i < ARRAY_SIZE(s5_reset_reason_txt); i++) {
if (!(value & BIT(i)))
continue;
if (s5_reset_reason_txt[i]) {
- pr_info("x86/amd: Previous system reset reason [0x%08lx]: %s\n",
+ pr_info("x86/amd: Previous system reset reason [0x%08x]: %s\n",
value, s5_reset_reason_txt[i]);
}
}
Hello,
New build issue found on stable-rc/linux-5.4.y:
---
‘CPUFREQ_NEED_UPDATE_LIMITS’ undeclared here (not in a function) in drivers/cpufreq/cppc_cpufreq.o (drivers/cpufreq/cppc_cpufreq.c) [logspec:kbuild,kbuild.compiler.error]
---
- dashboard: https://d.kernelci.org/i/maestro:dc41002c36bbf28f576cb4cf62779067892fb9db
- giturl: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
- commit HEAD: a6319f2fe27b8fefe40757d3797cfca30d43ce3c
Log excerpt:
=====================================================
drivers/cpufreq/cppc_cpufreq.c:410:40: error: ‘CPUFREQ_NEED_UPDATE_LIMITS’ undeclared here (not in a function)
410 | .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS,
| ^~~~~~~~~~~~~~~~~~~~~~~~~~
=====================================================
# Builds where the incident occurred:
## defconfig+arm64-chromebook+kselftest on (arm64):
- compiler: gcc-12
- dashboard: https://d.kernelci.org/build/maestro:68a3251d233e484a3f9e9e33
## defconfig+lab-setup+arm64-chromebook+CONFIG_MODULE_COMPRESS=n+CONFIG_MODULE_COMPRESS_NONE=y on (arm64):
- compiler: gcc-12
- dashboard: https://d.kernelci.org/build/maestro:68a324d8233e484a3f9e9df2
## defconfig+lab-setup+kselftest on (arm64):
- compiler: gcc-12
- dashboard: https://d.kernelci.org/build/maestro:68a32513233e484a3f9e9e2a
#kernelci issue maestro:dc41002c36bbf28f576cb4cf62779067892fb9db
Reported-by: kernelci.org bot <bot(a)kernelci.org>
--
This is an experimental report format. Please send feedback in!
Talk to us at kernelci(a)lists.linux.dev
Made with love by the KernelCI team - https://kernelci.org
Hello,
New build issue found on stable-rc/linux-5.4.y:
---
use of undeclared identifier 'CPUFREQ_NEED_UPDATE_LIMITS' in drivers/cpufreq/cppc_cpufreq.o (drivers/cpufreq/cppc_cpufreq.c) [logspec:kbuild,kbuild.compiler.error]
---
- dashboard: https://d.kernelci.org/i/maestro:b9c80192e71f04dd69fa1038881cb02b2c46b045
- giturl: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
- commit HEAD: a6319f2fe27b8fefe40757d3797cfca30d43ce3c
Log excerpt:
=====================================================
drivers/cpufreq/cppc_cpufreq.c:410:33: error: use of undeclared identifier 'CPUFREQ_NEED_UPDATE_LIMITS'
410 | .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS,
| ^
1 error generated.
=====================================================
# Builds where the incident occurred:
## defconfig+arm64-chromebook+kselftest on (arm64):
- compiler: clang-17
- dashboard: https://d.kernelci.org/build/maestro:68a324ee233e484a3f9e9e07
#kernelci issue maestro:b9c80192e71f04dd69fa1038881cb02b2c46b045
Reported-by: kernelci.org bot <bot(a)kernelci.org>
--
This is an experimental report format. Please send feedback in!
Talk to us at kernelci(a)lists.linux.dev
Made with love by the KernelCI team - https://kernelci.org
Hi,
While testing Linux kernel 6.12.42 on OpenWrt, we observed a
regression in IPv6 Router Advertisement (RA) handling for the default
router.
Affected commits
The following commits appear related and may have introduced the issue:
ipv6: fix possible infinite loop in fib6_info_uses_dev():
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=…
ipv6: prevent infinite loop in rt6_nlmsg_size():
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=…
ipv6: annotate data-races around rt->fib6_nsiblings:
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=…
Problem description:
In Linux kernel 6.12.42, IPv6 FIB multipath and concurrent access
handling was made stricter (READ_ONCE / WRITE_ONCE + RCU retry).
The RA “Automatic” mode relies on checking whether a local default route exists.
With the stricter FIB handling, this check can fail in multipath scenarios.
As a result, RA does not advertise a default route, and IPv6 clients
on LAN fail to receive the default gateway.
Steps to reproduce
Run OpenWrt with kernel 6.12.42 on a router with br-lan bridge.
Configure IPv6 RA in Automatic default router mode.
Observe that no default route is advertised to clients (though
prefixes may still be delivered).
Expected behavior
Router Advertisement should continue to advertise the default route as
in kernel 6.12.41 and earlier.
Client IPv6 connectivity should not break.
Actual behavior
RA fails to advertise a default route in Automatic mode.
Clients do not install a default IPv6 route → connectivity fails.
Temporary workaround
Change RA default router mode from Automatic → Always / Use available
prefixes in OpenWrt.
This bypasses the dependency on local default route check and restores
correct RA behavior.
Additional notes
This appears to be an unintended side effect of the stricter FIB
handling changes introduced in 6.12.42. Please advise if this has
already been reported or if I should prepare a minimal reproducer
outside OpenWrt.
Thanks,
[GitHub: mgz0227]
Since commits
7b9eb53e8591 ("media: cx18: Access v4l2_fh from file")
9ba9d11544f9 ("media: ivtv: Access v4l2_fh from file")
All the ioctl handlers access their private data structures
from file *
The ivtv and cx18 drivers call the ioctl handlers from their
DVB layer without a valid file *, causing invalid memory access.
The issue has been reported by smatch in
"[bug report] media: cx18: Access v4l2_fh from file"
Fix this by providing wrappers for the ioctl handlers to be
used by the DVB layer that do not require a valid file *.
Signed-off-by: Jacopo Mondi <jacopo.mondi(a)ideasonboard.com>
---
Changes in v2:
- Add Cc: stable(a)vger.kernel.org per-patch
---
Jacopo Mondi (2):
media: cx18: Fix invalid access to file *
media: ivtv: Fix invalid access to file *
drivers/media/pci/cx18/cx18-driver.c | 6 +++---
drivers/media/pci/cx18/cx18-ioctl.c | 26 ++++++++++++++++++++------
drivers/media/pci/cx18/cx18-ioctl.h | 8 +++++---
drivers/media/pci/ivtv/ivtv-driver.c | 4 ++--
drivers/media/pci/ivtv/ivtv-ioctl.c | 22 +++++++++++++++++-----
drivers/media/pci/ivtv/ivtv-ioctl.h | 6 ++++--
6 files changed, 51 insertions(+), 21 deletions(-)
---
base-commit: a75b8d198c55e9eb5feb6f6e155496305caba2dc
change-id: 20250818-cx18-v4l2-fh-7eaa6199fdde
Best regards,
--
Jacopo Mondi <jacopo.mondi(a)ideasonboard.com>
Since commits
7b9eb53e8591 ("media: cx18: Access v4l2_fh from file")
9ba9d11544f9 ("media: ivtv: Access v4l2_fh from file")
All the ioctl handlers access their private data structures
from file *
The ivtv and cx18 drivers call the ioctl handlers from their
DVB layer without a valid file *, causing invalid memory access.
The issue has been reported by smatch in
"[bug report] media: cx18: Access v4l2_fh from file"
Fix this by providing wrappers for the ioctl handlers to be
used by the DVB layer that do not require a valid file *.
Signed-off-by: Jacopo Mondi <jacopo.mondi(a)ideasonboard.com>
---
Jacopo Mondi (2):
media: cx18: Fix invalid access to file *
media: ivtv: Fix invalid access to file *
drivers/media/pci/cx18/cx18-driver.c | 6 +++---
drivers/media/pci/cx18/cx18-ioctl.c | 26 ++++++++++++++++++++------
drivers/media/pci/cx18/cx18-ioctl.h | 8 +++++---
drivers/media/pci/ivtv/ivtv-driver.c | 4 ++--
drivers/media/pci/ivtv/ivtv-ioctl.c | 22 +++++++++++++++++-----
drivers/media/pci/ivtv/ivtv-ioctl.h | 6 ++++--
6 files changed, 51 insertions(+), 21 deletions(-)
---
base-commit: a75b8d198c55e9eb5feb6f6e155496305caba2dc
change-id: 20250818-cx18-v4l2-fh-7eaa6199fdde
Best regards,
--
Jacopo Mondi <jacopo.mondi(a)ideasonboard.com>
[ Upstream commit a238487f7965d102794ed9f8aff0b667cd2ae886 ]
The 4xxx drivers hardcode the ring to service mapping. However, when
additional configurations where added to the driver, the mappings were
not updated. This implies that an incorrect mapping might be reported
through pfvf for certain configurations.
This is a backport of the upstream commit with modifications, as the
original patch does not apply cleanly to kernel v6.1.x. The logic has
been simplified to reflect the limited configurations of the QAT driver
in this version: crypto-only and compression.
Instead of dynamically computing the ring to service mappings, these are
now hardcoded to simplify the backport.
Fixes: 0cec19c761e5 ("crypto: qat - add support for compression for 4xxx")
Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu(a)intel.com>
Reviewed-by: Damian Muszynski <damian.muszynski(a)intel.com>
Reviewed-by: Tero Kristo <tero.kristo(a)linux.intel.com>
Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au>
(cherry-picked from commit a238487f7965d102794ed9f8aff0b667cd2ae886)
[Giovanni: backport to 6.1.y, conflict resolved simplifying the logic
in the function get_ring_to_svc_map() as the QAT driver in v6.1 supports
only limited configurations (crypto only and compression). Differs from
upstream as the ring to service mapping is hardcoded rather than being
dynamically computed.]
Reviewed-by: Ahsan Atta <ahsan.atta(a)intel.com>
Tested-by: Ahsan Atta <ahsan.atta(a)intel.com>
Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu(a)intel.com>
---
V1 -> V2: changed signed-off-by area:
* added (cherry-picked from ...) after last tag from upstream commit
* added a note explaining how this backport differs from the original patch
* added a new Signed-off-by tag for the backport author.
drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c | 13 +++++++++++++
drivers/crypto/qat/qat_common/adf_accel_devices.h | 1 +
drivers/crypto/qat/qat_common/adf_gen4_hw_data.h | 6 ++++++
drivers/crypto/qat/qat_common/adf_init.c | 3 +++
4 files changed, 23 insertions(+)
diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c
index fda5f699ff57..65b52c692add 100644
--- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c
+++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c
@@ -297,6 +297,18 @@ static char *uof_get_name(struct adf_accel_dev *accel_dev, u32 obj_num)
return NULL;
}
+static u16 get_ring_to_svc_map(struct adf_accel_dev *accel_dev)
+{
+ switch (get_service_enabled(accel_dev)) {
+ case SVC_CY:
+ return ADF_GEN4_DEFAULT_RING_TO_SRV_MAP;
+ case SVC_DC:
+ return ADF_GEN4_DEFAULT_RING_TO_SRV_MAP_DC;
+ }
+
+ return 0;
+}
+
static u32 uof_get_ae_mask(struct adf_accel_dev *accel_dev, u32 obj_num)
{
switch (get_service_enabled(accel_dev)) {
@@ -353,6 +365,7 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data)
hw_data->uof_get_ae_mask = uof_get_ae_mask;
hw_data->set_msix_rttable = set_msix_default_rttable;
hw_data->set_ssm_wdtimer = adf_gen4_set_ssm_wdtimer;
+ hw_data->get_ring_to_svc_map = get_ring_to_svc_map;
hw_data->disable_iov = adf_disable_sriov;
hw_data->ring_pair_reset = adf_gen4_ring_pair_reset;
hw_data->enable_pm = adf_gen4_enable_pm;
diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h
index ad01d99e6e2b..7993d0f82dea 100644
--- a/drivers/crypto/qat/qat_common/adf_accel_devices.h
+++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h
@@ -176,6 +176,7 @@ struct adf_hw_device_data {
void (*get_arb_info)(struct arb_info *arb_csrs_info);
void (*get_admin_info)(struct admin_info *admin_csrs_info);
enum dev_sku_info (*get_sku)(struct adf_hw_device_data *self);
+ u16 (*get_ring_to_svc_map)(struct adf_accel_dev *accel_dev);
int (*alloc_irq)(struct adf_accel_dev *accel_dev);
void (*free_irq)(struct adf_accel_dev *accel_dev);
void (*enable_error_correction)(struct adf_accel_dev *accel_dev);
diff --git a/drivers/crypto/qat/qat_common/adf_gen4_hw_data.h b/drivers/crypto/qat/qat_common/adf_gen4_hw_data.h
index 4fb4b3df5a18..5e653ec755e6 100644
--- a/drivers/crypto/qat/qat_common/adf_gen4_hw_data.h
+++ b/drivers/crypto/qat/qat_common/adf_gen4_hw_data.h
@@ -95,6 +95,12 @@ do { \
ADF_RING_BUNDLE_SIZE * (bank) + \
ADF_RING_CSR_RING_SRV_ARB_EN, (value))
+#define ADF_GEN4_DEFAULT_RING_TO_SRV_MAP_DC \
+ (COMP << ADF_CFG_SERV_RING_PAIR_0_SHIFT | \
+ COMP << ADF_CFG_SERV_RING_PAIR_1_SHIFT | \
+ COMP << ADF_CFG_SERV_RING_PAIR_2_SHIFT | \
+ COMP << ADF_CFG_SERV_RING_PAIR_3_SHIFT)
+
/* Default ring mapping */
#define ADF_GEN4_DEFAULT_RING_TO_SRV_MAP \
(ASYM << ADF_CFG_SERV_RING_PAIR_0_SHIFT | \
diff --git a/drivers/crypto/qat/qat_common/adf_init.c b/drivers/crypto/qat/qat_common/adf_init.c
index 2e3481270c4b..49f07584f8c9 100644
--- a/drivers/crypto/qat/qat_common/adf_init.c
+++ b/drivers/crypto/qat/qat_common/adf_init.c
@@ -95,6 +95,9 @@ int adf_dev_init(struct adf_accel_dev *accel_dev)
return -EFAULT;
}
+ if (hw_data->get_ring_to_svc_map)
+ hw_data->ring_to_svc_map = hw_data->get_ring_to_svc_map(accel_dev);
+
if (adf_ae_init(accel_dev)) {
dev_err(&GET_DEV(accel_dev),
"Failed to initialise Acceleration Engine\n");
--
2.50.0
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x abbf9a44944171ca99c150adad9361a2f517d3b6
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081813-overhand-resolute-c0f3@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From abbf9a44944171ca99c150adad9361a2f517d3b6 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda(a)kernel.org>
Date: Sun, 27 Jul 2025 11:23:17 +0200
Subject: [PATCH] rust: workaround `rustdoc` target modifiers bug
Starting with Rust 1.88.0 (released 2025-06-26), `rustdoc` complains
about a target modifier mismatch in configurations where `-Zfixed-x18`
is passed:
error: mixing `-Zfixed-x18` will cause an ABI mismatch in crate `rust_out`
|
= help: the `-Zfixed-x18` flag modifies the ABI so Rust crates compiled with different values of this flag cannot be used together safely
= note: unset `-Zfixed-x18` in this crate is incompatible with `-Zfixed-x18=` in dependency `core`
= help: set `-Zfixed-x18=` in this crate or unset `-Zfixed-x18` in `core`
= help: if you are sure this will not cause problems, you may use `-Cunsafe-allow-abi-mismatch=fixed-x18` to silence this error
The reason is that `rustdoc` was not passing the target modifiers when
configuring the session options, and thus it would report a mismatch
that did not exist as soon as a target modifier is used in a dependency.
We did not notice it in the kernel until now because `-Zfixed-x18` has
been a target modifier only since 1.88.0 (and it is the only one we use
so far).
The issue has been reported upstream [1] and a fix has been submitted
[2], including a test similar to the kernel case.
[ This is now fixed upstream (thanks Guillaume for the quick review),
so it will be fixed in Rust 1.90.0 (expected 2025-09-18).
- Miguel ]
Meanwhile, conditionally pass `-Cunsafe-allow-abi-mismatch=fixed-x18`
to workaround the issue on our side.
Cc: stable(a)vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs).
Reported-by: Konrad Dybcio <konrad.dybcio(a)oss.qualcomm.com>
Closes: https://lore.kernel.org/rust-for-linux/36cdc798-524f-4910-8b77-d7b9fac08d77…
Link: https://github.com/rust-lang/rust/issues/144521 [1]
Link: https://github.com/rust-lang/rust/pull/144523 [2]
Reviewed-by: Alice Ryhl <aliceryhl(a)google.com>
Link: https://lore.kernel.org/r/20250727092317.2930617-1-ojeda@kernel.org
Signed-off-by: Miguel Ojeda <ojeda(a)kernel.org>
diff --git a/rust/Makefile b/rust/Makefile
index 4263462b8470..d47f82588d78 100644
--- a/rust/Makefile
+++ b/rust/Makefile
@@ -65,6 +65,10 @@ core-cfgs = \
core-edition := $(if $(call rustc-min-version,108700),2024,2021)
+# `rustdoc` did not save the target modifiers, thus workaround for
+# the time being (https://github.com/rust-lang/rust/issues/144521).
+rustdoc_modifiers_workaround := $(if $(call rustc-min-version,108800),-Cunsafe-allow-abi-mismatch=fixed-x18)
+
# `rustc` recognizes `--remap-path-prefix` since 1.26.0, but `rustdoc` only
# since Rust 1.81.0. Moreover, `rustdoc` ICEs on out-of-tree builds since Rust
# 1.82.0 (https://github.com/rust-lang/rust/issues/138520). Thus workaround both
@@ -77,6 +81,7 @@ quiet_cmd_rustdoc = RUSTDOC $(if $(rustdoc_host),H, ) $<
-Zunstable-options --generate-link-to-definition \
--output $(rustdoc_output) \
--crate-name $(subst rustdoc-,,$@) \
+ $(rustdoc_modifiers_workaround) \
$(if $(rustdoc_host),,--sysroot=/dev/null) \
@$(objtree)/include/generated/rustc_cfg $<
@@ -215,6 +220,7 @@ quiet_cmd_rustdoc_test_kernel = RUSTDOC TK $<
--extern bindings --extern uapi \
--no-run --crate-name kernel -Zunstable-options \
--sysroot=/dev/null \
+ $(rustdoc_modifiers_workaround) \
--test-builder $(objtree)/scripts/rustdoc_test_builder \
$< $(rustdoc_test_kernel_quiet); \
$(objtree)/scripts/rustdoc_test_gen
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 252fea131e15aba2cd487119d1a8f546471199e2
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081858-zips-enchanted-3d3e@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 252fea131e15aba2cd487119d1a8f546471199e2 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda(a)kernel.org>
Date: Sat, 26 Jul 2025 15:34:35 +0200
Subject: [PATCH] rust: kbuild: clean output before running `rustdoc`
`rustdoc` can get confused when generating documentation into a folder
that contains generated files from other `rustdoc` versions.
For instance, running something like:
rustup default 1.78.0
make LLVM=1 rustdoc
rustup default 1.88.0
make LLVM=1 rustdoc
may generate errors like:
error: couldn't generate documentation: invalid template: last line expected to start with a comment
|
= note: failed to create or modify "./Documentation/output/rust/rustdoc/src-files.js"
Thus just always clean the output folder before generating the
documentation -- we are anyway regenerating it every time the `rustdoc`
target gets called, at least for the time being.
Cc: stable(a)vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs).
Reported-by: Daniel Almeida <daniel.almeida(a)collabora.com>
Closes: https://rust-for-linux.zulipchat.com/#narrow/channel/288089/topic/x/near/52…
Reviewed-by: Tamir Duberstein <tamird(a)kernel.org>
Link: https://lore.kernel.org/r/20250726133435.2460085-1-ojeda@kernel.org
Signed-off-by: Miguel Ojeda <ojeda(a)kernel.org>
diff --git a/rust/Makefile b/rust/Makefile
index d47f82588d78..bfa915b0e588 100644
--- a/rust/Makefile
+++ b/rust/Makefile
@@ -111,14 +111,14 @@ rustdoc: rustdoc-core rustdoc-macros rustdoc-compiler_builtins \
rustdoc-macros: private rustdoc_host = yes
rustdoc-macros: private rustc_target_flags = --crate-type proc-macro \
--extern proc_macro
-rustdoc-macros: $(src)/macros/lib.rs FORCE
+rustdoc-macros: $(src)/macros/lib.rs rustdoc-clean FORCE
+$(call if_changed,rustdoc)
# Starting with Rust 1.82.0, skipping `-Wrustdoc::unescaped_backticks` should
# not be needed -- see https://github.com/rust-lang/rust/pull/128307.
rustdoc-core: private skip_flags = --edition=2021 -Wrustdoc::unescaped_backticks
rustdoc-core: private rustc_target_flags = --edition=$(core-edition) $(core-cfgs)
-rustdoc-core: $(RUST_LIB_SRC)/core/src/lib.rs FORCE
+rustdoc-core: $(RUST_LIB_SRC)/core/src/lib.rs rustdoc-clean FORCE
+$(call if_changed,rustdoc)
rustdoc-compiler_builtins: $(src)/compiler_builtins.rs rustdoc-core FORCE
@@ -130,7 +130,8 @@ rustdoc-ffi: $(src)/ffi.rs rustdoc-core FORCE
rustdoc-pin_init_internal: private rustdoc_host = yes
rustdoc-pin_init_internal: private rustc_target_flags = --cfg kernel \
--extern proc_macro --crate-type proc-macro
-rustdoc-pin_init_internal: $(src)/pin-init/internal/src/lib.rs FORCE
+rustdoc-pin_init_internal: $(src)/pin-init/internal/src/lib.rs \
+ rustdoc-clean FORCE
+$(call if_changed,rustdoc)
rustdoc-pin_init: private rustdoc_host = yes
@@ -148,6 +149,9 @@ rustdoc-kernel: $(src)/kernel/lib.rs rustdoc-core rustdoc-ffi rustdoc-macros \
$(obj)/bindings.o FORCE
+$(call if_changed,rustdoc)
+rustdoc-clean: FORCE
+ $(Q)rm -rf $(rustdoc_output)
+
quiet_cmd_rustc_test_library = $(RUSTC_OR_CLIPPY_QUIET) TL $<
cmd_rustc_test_library = \
OBJTREE=$(abspath $(objtree)) \
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 2ae826799932ff89409f56636ad3c25578fe7cf5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081825-obedience-result-270a@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2ae826799932ff89409f56636ad3c25578fe7cf5 Mon Sep 17 00:00:00 2001
From: Lizhi Xu <lizhi.xu(a)windriver.com>
Date: Mon, 16 Jun 2025 09:31:40 +0800
Subject: [PATCH] ocfs2: reset folio to NULL when get folio fails
The reproducer uses FAULT_INJECTION to make memory allocation fail, which
causes __filemap_get_folio() to fail, when initializing w_folios[i] in
ocfs2_grab_folios_for_write(), it only returns an error code and the value
of w_folios[i] is the error code, which causes
ocfs2_unlock_and_free_folios() to recycle the invalid w_folios[i] when
releasing folios.
Link: https://lkml.kernel.org/r/20250616013140.3602219-1-lizhi.xu@windriver.com
Reported-by: syzbot+c2ea94ae47cd7e3881ec(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=c2ea94ae47cd7e3881ec
Signed-off-by: Lizhi Xu <lizhi.xu(a)windriver.com>
Reviewed-by: Joseph Qi <joseph.qi(a)linux.alibaba.com>
Cc: Mark Fasheh <mark(a)fasheh.com>
Cc: Joel Becker <jlbec(a)evilplan.org>
Cc: Junxiao Bi <junxiao.bi(a)oracle.com>
Cc: Changwei Ge <gechangwei(a)live.cn>
Cc: Jun Piao <piaojun(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 40b6bce12951..89aadc6cdd87 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1071,6 +1071,7 @@ static int ocfs2_grab_folios_for_write(struct address_space *mapping,
if (IS_ERR(wc->w_folios[i])) {
ret = PTR_ERR(wc->w_folios[i]);
mlog_errno(ret);
+ wc->w_folios[i] = NULL;
goto out;
}
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 2ae826799932ff89409f56636ad3c25578fe7cf5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081824-approach-prodigal-6b35@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2ae826799932ff89409f56636ad3c25578fe7cf5 Mon Sep 17 00:00:00 2001
From: Lizhi Xu <lizhi.xu(a)windriver.com>
Date: Mon, 16 Jun 2025 09:31:40 +0800
Subject: [PATCH] ocfs2: reset folio to NULL when get folio fails
The reproducer uses FAULT_INJECTION to make memory allocation fail, which
causes __filemap_get_folio() to fail, when initializing w_folios[i] in
ocfs2_grab_folios_for_write(), it only returns an error code and the value
of w_folios[i] is the error code, which causes
ocfs2_unlock_and_free_folios() to recycle the invalid w_folios[i] when
releasing folios.
Link: https://lkml.kernel.org/r/20250616013140.3602219-1-lizhi.xu@windriver.com
Reported-by: syzbot+c2ea94ae47cd7e3881ec(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=c2ea94ae47cd7e3881ec
Signed-off-by: Lizhi Xu <lizhi.xu(a)windriver.com>
Reviewed-by: Joseph Qi <joseph.qi(a)linux.alibaba.com>
Cc: Mark Fasheh <mark(a)fasheh.com>
Cc: Joel Becker <jlbec(a)evilplan.org>
Cc: Junxiao Bi <junxiao.bi(a)oracle.com>
Cc: Changwei Ge <gechangwei(a)live.cn>
Cc: Jun Piao <piaojun(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 40b6bce12951..89aadc6cdd87 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1071,6 +1071,7 @@ static int ocfs2_grab_folios_for_write(struct address_space *mapping,
if (IS_ERR(wc->w_folios[i])) {
ret = PTR_ERR(wc->w_folios[i]);
mlog_errno(ret);
+ wc->w_folios[i] = NULL;
goto out;
}
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 2ae826799932ff89409f56636ad3c25578fe7cf5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081824-glimpse-unmanned-9833@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2ae826799932ff89409f56636ad3c25578fe7cf5 Mon Sep 17 00:00:00 2001
From: Lizhi Xu <lizhi.xu(a)windriver.com>
Date: Mon, 16 Jun 2025 09:31:40 +0800
Subject: [PATCH] ocfs2: reset folio to NULL when get folio fails
The reproducer uses FAULT_INJECTION to make memory allocation fail, which
causes __filemap_get_folio() to fail, when initializing w_folios[i] in
ocfs2_grab_folios_for_write(), it only returns an error code and the value
of w_folios[i] is the error code, which causes
ocfs2_unlock_and_free_folios() to recycle the invalid w_folios[i] when
releasing folios.
Link: https://lkml.kernel.org/r/20250616013140.3602219-1-lizhi.xu@windriver.com
Reported-by: syzbot+c2ea94ae47cd7e3881ec(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=c2ea94ae47cd7e3881ec
Signed-off-by: Lizhi Xu <lizhi.xu(a)windriver.com>
Reviewed-by: Joseph Qi <joseph.qi(a)linux.alibaba.com>
Cc: Mark Fasheh <mark(a)fasheh.com>
Cc: Joel Becker <jlbec(a)evilplan.org>
Cc: Junxiao Bi <junxiao.bi(a)oracle.com>
Cc: Changwei Ge <gechangwei(a)live.cn>
Cc: Jun Piao <piaojun(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 40b6bce12951..89aadc6cdd87 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1071,6 +1071,7 @@ static int ocfs2_grab_folios_for_write(struct address_space *mapping,
if (IS_ERR(wc->w_folios[i])) {
ret = PTR_ERR(wc->w_folios[i]);
mlog_errno(ret);
+ wc->w_folios[i] = NULL;
goto out;
}
}
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 2ae826799932ff89409f56636ad3c25578fe7cf5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081822-pebble-cinch-682b@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2ae826799932ff89409f56636ad3c25578fe7cf5 Mon Sep 17 00:00:00 2001
From: Lizhi Xu <lizhi.xu(a)windriver.com>
Date: Mon, 16 Jun 2025 09:31:40 +0800
Subject: [PATCH] ocfs2: reset folio to NULL when get folio fails
The reproducer uses FAULT_INJECTION to make memory allocation fail, which
causes __filemap_get_folio() to fail, when initializing w_folios[i] in
ocfs2_grab_folios_for_write(), it only returns an error code and the value
of w_folios[i] is the error code, which causes
ocfs2_unlock_and_free_folios() to recycle the invalid w_folios[i] when
releasing folios.
Link: https://lkml.kernel.org/r/20250616013140.3602219-1-lizhi.xu@windriver.com
Reported-by: syzbot+c2ea94ae47cd7e3881ec(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=c2ea94ae47cd7e3881ec
Signed-off-by: Lizhi Xu <lizhi.xu(a)windriver.com>
Reviewed-by: Joseph Qi <joseph.qi(a)linux.alibaba.com>
Cc: Mark Fasheh <mark(a)fasheh.com>
Cc: Joel Becker <jlbec(a)evilplan.org>
Cc: Junxiao Bi <junxiao.bi(a)oracle.com>
Cc: Changwei Ge <gechangwei(a)live.cn>
Cc: Jun Piao <piaojun(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 40b6bce12951..89aadc6cdd87 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1071,6 +1071,7 @@ static int ocfs2_grab_folios_for_write(struct address_space *mapping,
if (IS_ERR(wc->w_folios[i])) {
ret = PTR_ERR(wc->w_folios[i]);
mlog_errno(ret);
+ wc->w_folios[i] = NULL;
goto out;
}
}
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 2ae826799932ff89409f56636ad3c25578fe7cf5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081823-exuberant-trace-6385@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2ae826799932ff89409f56636ad3c25578fe7cf5 Mon Sep 17 00:00:00 2001
From: Lizhi Xu <lizhi.xu(a)windriver.com>
Date: Mon, 16 Jun 2025 09:31:40 +0800
Subject: [PATCH] ocfs2: reset folio to NULL when get folio fails
The reproducer uses FAULT_INJECTION to make memory allocation fail, which
causes __filemap_get_folio() to fail, when initializing w_folios[i] in
ocfs2_grab_folios_for_write(), it only returns an error code and the value
of w_folios[i] is the error code, which causes
ocfs2_unlock_and_free_folios() to recycle the invalid w_folios[i] when
releasing folios.
Link: https://lkml.kernel.org/r/20250616013140.3602219-1-lizhi.xu@windriver.com
Reported-by: syzbot+c2ea94ae47cd7e3881ec(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=c2ea94ae47cd7e3881ec
Signed-off-by: Lizhi Xu <lizhi.xu(a)windriver.com>
Reviewed-by: Joseph Qi <joseph.qi(a)linux.alibaba.com>
Cc: Mark Fasheh <mark(a)fasheh.com>
Cc: Joel Becker <jlbec(a)evilplan.org>
Cc: Junxiao Bi <junxiao.bi(a)oracle.com>
Cc: Changwei Ge <gechangwei(a)live.cn>
Cc: Jun Piao <piaojun(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 40b6bce12951..89aadc6cdd87 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1071,6 +1071,7 @@ static int ocfs2_grab_folios_for_write(struct address_space *mapping,
if (IS_ERR(wc->w_folios[i])) {
ret = PTR_ERR(wc->w_folios[i]);
mlog_errno(ret);
+ wc->w_folios[i] = NULL;
goto out;
}
}
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 2ae826799932ff89409f56636ad3c25578fe7cf5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081822-hasty-vineyard-a5ea@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2ae826799932ff89409f56636ad3c25578fe7cf5 Mon Sep 17 00:00:00 2001
From: Lizhi Xu <lizhi.xu(a)windriver.com>
Date: Mon, 16 Jun 2025 09:31:40 +0800
Subject: [PATCH] ocfs2: reset folio to NULL when get folio fails
The reproducer uses FAULT_INJECTION to make memory allocation fail, which
causes __filemap_get_folio() to fail, when initializing w_folios[i] in
ocfs2_grab_folios_for_write(), it only returns an error code and the value
of w_folios[i] is the error code, which causes
ocfs2_unlock_and_free_folios() to recycle the invalid w_folios[i] when
releasing folios.
Link: https://lkml.kernel.org/r/20250616013140.3602219-1-lizhi.xu@windriver.com
Reported-by: syzbot+c2ea94ae47cd7e3881ec(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=c2ea94ae47cd7e3881ec
Signed-off-by: Lizhi Xu <lizhi.xu(a)windriver.com>
Reviewed-by: Joseph Qi <joseph.qi(a)linux.alibaba.com>
Cc: Mark Fasheh <mark(a)fasheh.com>
Cc: Joel Becker <jlbec(a)evilplan.org>
Cc: Junxiao Bi <junxiao.bi(a)oracle.com>
Cc: Changwei Ge <gechangwei(a)live.cn>
Cc: Jun Piao <piaojun(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 40b6bce12951..89aadc6cdd87 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1071,6 +1071,7 @@ static int ocfs2_grab_folios_for_write(struct address_space *mapping,
if (IS_ERR(wc->w_folios[i])) {
ret = PTR_ERR(wc->w_folios[i]);
mlog_errno(ret);
+ wc->w_folios[i] = NULL;
goto out;
}
}
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 5c241ed8d031693dadf33dd98ed2e7cc363e9b66
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081819-dawdler-letdown-9d50@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5c241ed8d031693dadf33dd98ed2e7cc363e9b66 Mon Sep 17 00:00:00 2001
From: Kairui Song <kasong(a)tencent.com>
Date: Mon, 28 Jul 2025 15:52:59 +0800
Subject: [PATCH] mm/shmem, swap: improve cached mTHP handling and fix
potential hang
The current swap-in code assumes that, when a swap entry in shmem mapping
is order 0, its cached folios (if present) must be order 0 too, which
turns out not always correct.
The problem is shmem_split_large_entry is called before verifying the
folio will eventually be swapped in, one possible race is:
CPU1 CPU2
shmem_swapin_folio
/* swap in of order > 0 swap entry S1 */
folio = swap_cache_get_folio
/* folio = NULL */
order = xa_get_order
/* order > 0 */
folio = shmem_swap_alloc_folio
/* mTHP alloc failure, folio = NULL */
<... Interrupted ...>
shmem_swapin_folio
/* S1 is swapped in */
shmem_writeout
/* S1 is swapped out, folio cached */
shmem_split_large_entry(..., S1)
/* S1 is split, but the folio covering it has order > 0 now */
Now any following swapin of S1 will hang: `xa_get_order` returns 0, and
folio lookup will return a folio with order > 0. The
`xa_get_order(&mapping->i_pages, index) != folio_order(folio)` will always
return false causing swap-in to return -EEXIST.
And this looks fragile. So fix this up by allowing seeing a larger folio
in swap cache, and check the whole shmem mapping range covered by the
swapin have the right swap value upon inserting the folio. And drop the
redundant tree walks before the insertion.
This will actually improve performance, as it avoids two redundant Xarray
tree walks in the hot path, and the only side effect is that in the
failure path, shmem may redundantly reallocate a few folios causing
temporary slight memory pressure.
And worth noting, it may seems the order and value check before inserting
might help reducing the lock contention, which is not true. The swap
cache layer ensures raced swapin will either see a swap cache folio or
failed to do a swapin (we have SWAP_HAS_CACHE bit even if swap cache is
bypassed), so holding the folio lock and checking the folio flag is
already good enough for avoiding the lock contention. The chance that a
folio passes the swap entry value check but the shmem mapping slot has
changed should be very low.
Link: https://lkml.kernel.org/r/20250728075306.12704-1-ryncsn@gmail.com
Link: https://lkml.kernel.org/r/20250728075306.12704-2-ryncsn@gmail.com
Fixes: 809bc86517cc ("mm: shmem: support large folio swap out")
Signed-off-by: Kairui Song <kasong(a)tencent.com>
Reviewed-by: Kemeng Shi <shikemeng(a)huaweicloud.com>
Reviewed-by: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Tested-by: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Baoquan He <bhe(a)redhat.com>
Cc: Barry Song <baohua(a)kernel.org>
Cc: Chris Li <chrisl(a)kernel.org>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Nhat Pham <nphamcs(a)gmail.com>
Cc: Dev Jain <dev.jain(a)arm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/shmem.c b/mm/shmem.c
index 7570a24e0ae4..1d0fd266c29b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -891,7 +891,9 @@ static int shmem_add_to_page_cache(struct folio *folio,
pgoff_t index, void *expected, gfp_t gfp)
{
XA_STATE_ORDER(xas, &mapping->i_pages, index, folio_order(folio));
- long nr = folio_nr_pages(folio);
+ unsigned long nr = folio_nr_pages(folio);
+ swp_entry_t iter, swap;
+ void *entry;
VM_BUG_ON_FOLIO(index != round_down(index, nr), folio);
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
@@ -903,14 +905,25 @@ static int shmem_add_to_page_cache(struct folio *folio,
gfp &= GFP_RECLAIM_MASK;
folio_throttle_swaprate(folio, gfp);
+ swap = radix_to_swp_entry(expected);
do {
+ iter = swap;
xas_lock_irq(&xas);
- if (expected != xas_find_conflict(&xas)) {
- xas_set_err(&xas, -EEXIST);
- goto unlock;
+ xas_for_each_conflict(&xas, entry) {
+ /*
+ * The range must either be empty, or filled with
+ * expected swap entries. Shmem swap entries are never
+ * partially freed without split of both entry and
+ * folio, so there shouldn't be any holes.
+ */
+ if (!expected || entry != swp_to_radix_entry(iter)) {
+ xas_set_err(&xas, -EEXIST);
+ goto unlock;
+ }
+ iter.val += 1 << xas_get_order(&xas);
}
- if (expected && xas_find_conflict(&xas)) {
+ if (expected && iter.val - nr != swap.val) {
xas_set_err(&xas, -EEXIST);
goto unlock;
}
@@ -2359,7 +2372,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
error = -ENOMEM;
goto failed;
}
- } else if (order != folio_order(folio)) {
+ } else if (order > folio_order(folio)) {
/*
* Swap readahead may swap in order 0 folios into swapcache
* asynchronously, while the shmem mapping can still stores
@@ -2384,15 +2397,23 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
swap = swp_entry(swp_type(swap), swp_offset(swap) + offset);
}
+ } else if (order < folio_order(folio)) {
+ swap.val = round_down(swap.val, 1 << folio_order(folio));
+ index = round_down(index, 1 << folio_order(folio));
}
alloced:
- /* We have to do this with folio locked to prevent races */
+ /*
+ * We have to do this with the folio locked to prevent races.
+ * The shmem_confirm_swap below only checks if the first swap
+ * entry matches the folio, that's enough to ensure the folio
+ * is not used outside of shmem, as shmem swap entries
+ * and swap cache folios are never partially freed.
+ */
folio_lock(folio);
if ((!skip_swapcache && !folio_test_swapcache(folio)) ||
- folio->swap.val != swap.val ||
!shmem_confirm_swap(mapping, index, swap) ||
- xa_get_order(&mapping->i_pages, index) != folio_order(folio)) {
+ folio->swap.val != swap.val) {
error = -EEXIST;
goto unlock;
}
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 09fefdca80aebd1023e827cb0ee174983d829d18
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081839-covenant-lagoon-0736@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 09fefdca80aebd1023e827cb0ee174983d829d18 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david(a)redhat.com>
Date: Fri, 13 Jun 2025 11:27:00 +0200
Subject: [PATCH] mm/huge_memory: don't ignore queried cachemode in
vmf_insert_pfn_pud()
Patch series "mm/huge_memory: vmf_insert_folio_*() and
vmf_insert_pfn_pud() fixes", v3.
While working on improving vm_normal_page() and friends, I stumbled over
this issues: refcounted "normal" folios must not be marked using
pmd_special() / pud_special(). Otherwise, we're effectively telling the
system that these folios are no "normal", violating the rules we
documented for vm_normal_page().
Fortunately, there are not many pmd_special()/pud_special() users yet. So
far there doesn't seem to be serious damage.
Tested using the ndctl tests ("ndctl:dax" suite).
This patch (of 3):
We set up the cache mode but ... don't forward the updated pgprot to
insert_pfn_pud().
Only a problem on x86-64 PAT when mapping PFNs using PUDs that require a
special cachemode.
Fix it by using the proper pgprot where the cachemode was setup.
It is unclear in which configurations we would get the cachemode wrong:
through vfio seems possible. Getting cachemodes wrong is usually ...
bad. As the fix is easy, let's backport it to stable.
Identified by code inspection.
Link: https://lkml.kernel.org/r/20250613092702.1943533-1-david@redhat.com
Link: https://lkml.kernel.org/r/20250613092702.1943533-2-david@redhat.com
Fixes: 7b806d229ef1 ("mm: remove vmf_insert_pfn_xxx_prot() for huge page-table entries")
Signed-off-by: David Hildenbrand <david(a)redhat.com>
Reviewed-by: Dan Williams <dan.j.williams(a)intel.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Reviewed-by: Jason Gunthorpe <jgg(a)nvidia.com>
Reviewed-by: Oscar Salvador <osalvador(a)suse.de>
Tested-by: Dan Williams <dan.j.williams(a)intel.com>
Cc: Alistair Popple <apopple(a)nvidia.com>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Dev Jain <dev.jain(a)arm.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Mariano Pache <npache(a)redhat.com>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Zi Yan <ziy(a)nvidia.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d3e66136e41a..49b98082c540 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1516,10 +1516,9 @@ static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma)
}
static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
- pud_t *pud, pfn_t pfn, bool write)
+ pud_t *pud, pfn_t pfn, pgprot_t prot, bool write)
{
struct mm_struct *mm = vma->vm_mm;
- pgprot_t prot = vma->vm_page_prot;
pud_t entry;
if (!pud_none(*pud)) {
@@ -1581,7 +1580,7 @@ vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write)
pfnmap_setup_cachemode_pfn(pfn_t_to_pfn(pfn), &pgprot);
ptl = pud_lock(vma->vm_mm, vmf->pud);
- insert_pfn_pud(vma, addr, vmf->pud, pfn, write);
+ insert_pfn_pud(vma, addr, vmf->pud, pfn, pgprot, write);
spin_unlock(ptl);
return VM_FAULT_NOPAGE;
@@ -1625,7 +1624,7 @@ vm_fault_t vmf_insert_folio_pud(struct vm_fault *vmf, struct folio *folio,
add_mm_counter(mm, mm_counter_file(folio), HPAGE_PUD_NR);
}
insert_pfn_pud(vma, addr, vmf->pud, pfn_to_pfn_t(folio_pfn(folio)),
- write);
+ vma->vm_page_prot, write);
spin_unlock(ptl);
return VM_FAULT_NOPAGE;
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 09fefdca80aebd1023e827cb0ee174983d829d18
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081838-flap-rewire-69a5@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 09fefdca80aebd1023e827cb0ee174983d829d18 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david(a)redhat.com>
Date: Fri, 13 Jun 2025 11:27:00 +0200
Subject: [PATCH] mm/huge_memory: don't ignore queried cachemode in
vmf_insert_pfn_pud()
Patch series "mm/huge_memory: vmf_insert_folio_*() and
vmf_insert_pfn_pud() fixes", v3.
While working on improving vm_normal_page() and friends, I stumbled over
this issues: refcounted "normal" folios must not be marked using
pmd_special() / pud_special(). Otherwise, we're effectively telling the
system that these folios are no "normal", violating the rules we
documented for vm_normal_page().
Fortunately, there are not many pmd_special()/pud_special() users yet. So
far there doesn't seem to be serious damage.
Tested using the ndctl tests ("ndctl:dax" suite).
This patch (of 3):
We set up the cache mode but ... don't forward the updated pgprot to
insert_pfn_pud().
Only a problem on x86-64 PAT when mapping PFNs using PUDs that require a
special cachemode.
Fix it by using the proper pgprot where the cachemode was setup.
It is unclear in which configurations we would get the cachemode wrong:
through vfio seems possible. Getting cachemodes wrong is usually ...
bad. As the fix is easy, let's backport it to stable.
Identified by code inspection.
Link: https://lkml.kernel.org/r/20250613092702.1943533-1-david@redhat.com
Link: https://lkml.kernel.org/r/20250613092702.1943533-2-david@redhat.com
Fixes: 7b806d229ef1 ("mm: remove vmf_insert_pfn_xxx_prot() for huge page-table entries")
Signed-off-by: David Hildenbrand <david(a)redhat.com>
Reviewed-by: Dan Williams <dan.j.williams(a)intel.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Reviewed-by: Jason Gunthorpe <jgg(a)nvidia.com>
Reviewed-by: Oscar Salvador <osalvador(a)suse.de>
Tested-by: Dan Williams <dan.j.williams(a)intel.com>
Cc: Alistair Popple <apopple(a)nvidia.com>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Dev Jain <dev.jain(a)arm.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Mariano Pache <npache(a)redhat.com>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Zi Yan <ziy(a)nvidia.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d3e66136e41a..49b98082c540 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1516,10 +1516,9 @@ static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma)
}
static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
- pud_t *pud, pfn_t pfn, bool write)
+ pud_t *pud, pfn_t pfn, pgprot_t prot, bool write)
{
struct mm_struct *mm = vma->vm_mm;
- pgprot_t prot = vma->vm_page_prot;
pud_t entry;
if (!pud_none(*pud)) {
@@ -1581,7 +1580,7 @@ vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write)
pfnmap_setup_cachemode_pfn(pfn_t_to_pfn(pfn), &pgprot);
ptl = pud_lock(vma->vm_mm, vmf->pud);
- insert_pfn_pud(vma, addr, vmf->pud, pfn, write);
+ insert_pfn_pud(vma, addr, vmf->pud, pfn, pgprot, write);
spin_unlock(ptl);
return VM_FAULT_NOPAGE;
@@ -1625,7 +1624,7 @@ vm_fault_t vmf_insert_folio_pud(struct vm_fault *vmf, struct folio *folio,
add_mm_counter(mm, mm_counter_file(folio), HPAGE_PUD_NR);
}
insert_pfn_pud(vma, addr, vmf->pud, pfn_to_pfn_t(folio_pfn(folio)),
- write);
+ vma->vm_page_prot, write);
spin_unlock(ptl);
return VM_FAULT_NOPAGE;
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x e2d18cbf178775ad377ad88ee55e6e183c38d262
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081818-fragrant-plausibly-d214@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e2d18cbf178775ad377ad88ee55e6e183c38d262 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka(a)suse.cz>
Date: Mon, 2 Jun 2025 13:02:12 +0200
Subject: [PATCH] mm, slab: restore NUMA policy support for large kmalloc
The slab allocator observes the task's NUMA policy in various places
such as allocating slab pages. Large kmalloc() allocations used to do
that too, until an unintended change by c4cab557521a ("mm/slab_common:
cleanup kmalloc_large()") resulted in ignoring mempolicy and just
preferring the local node. Restore the NUMA policy support.
Fixes: c4cab557521a ("mm/slab_common: cleanup kmalloc_large()")
Cc: <stable(a)vger.kernel.org>
Acked-by: Christoph Lameter (Ampere) <cl(a)gentwo.org>
Acked-by: Roman Gushchin <roman.gushchin(a)linux.dev>
Reviewed-by: Harry Yoo <harry.yoo(a)oracle.com>
Signed-off-by: Vlastimil Babka <vbabka(a)suse.cz>
diff --git a/mm/slub.c b/mm/slub.c
index 31e11ef256f9..06d64a5fb1bf 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4269,7 +4269,12 @@ static void *___kmalloc_large_node(size_t size, gfp_t flags, int node)
flags = kmalloc_fix_flags(flags);
flags |= __GFP_COMP;
- folio = (struct folio *)alloc_pages_node_noprof(node, flags, order);
+
+ if (node == NUMA_NO_NODE)
+ folio = (struct folio *)alloc_pages_noprof(flags, order);
+ else
+ folio = (struct folio *)__alloc_pages_noprof(flags, order, node, NULL);
+
if (folio) {
ptr = folio_address(folio);
lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B,
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 62be7afcc13b2727bdc6a4c91aefed6b452e6ecc
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081819-dean-outage-295c@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 62be7afcc13b2727bdc6a4c91aefed6b452e6ecc Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota(a)wdc.com>
Date: Sun, 29 Jun 2025 23:18:29 +0900
Subject: [PATCH] btrfs: zoned: requeue to unused block group list if zone
finish failed
btrfs_zone_finish() can fail for several reason. If it is -EAGAIN, we need
to try it again later. So, put the block group to the retry list properly.
Failing to do so will keep the removable block group intact until remount
and can causes unnecessary ENOSPC.
Fixes: 74e91b12b115 ("btrfs: zoned: zone finish unused block group")
CC: stable(a)vger.kernel.org # 6.1+
Reviewed-by: Johannes Thumshirn <johannes.thumshirn(a)wdc.com>
Signed-off-by: Naohiro Aota <naohiro.aota(a)wdc.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 3ddf9fe52b9d..47c6d040176c 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1639,8 +1639,10 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
ret = btrfs_zone_finish(block_group);
if (ret < 0) {
btrfs_dec_block_group_ro(block_group);
- if (ret == -EAGAIN)
+ if (ret == -EAGAIN) {
+ btrfs_link_bg_list(block_group, &retry_list);
ret = 0;
+ }
goto next;
}
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 62be7afcc13b2727bdc6a4c91aefed6b452e6ecc
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081802-litigator-enrich-4269@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 62be7afcc13b2727bdc6a4c91aefed6b452e6ecc Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota(a)wdc.com>
Date: Sun, 29 Jun 2025 23:18:29 +0900
Subject: [PATCH] btrfs: zoned: requeue to unused block group list if zone
finish failed
btrfs_zone_finish() can fail for several reason. If it is -EAGAIN, we need
to try it again later. So, put the block group to the retry list properly.
Failing to do so will keep the removable block group intact until remount
and can causes unnecessary ENOSPC.
Fixes: 74e91b12b115 ("btrfs: zoned: zone finish unused block group")
CC: stable(a)vger.kernel.org # 6.1+
Reviewed-by: Johannes Thumshirn <johannes.thumshirn(a)wdc.com>
Signed-off-by: Naohiro Aota <naohiro.aota(a)wdc.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 3ddf9fe52b9d..47c6d040176c 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1639,8 +1639,10 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
ret = btrfs_zone_finish(block_group);
if (ret < 0) {
btrfs_dec_block_group_ro(block_group);
- if (ret == -EAGAIN)
+ if (ret == -EAGAIN) {
+ btrfs_link_bg_list(block_group, &retry_list);
ret = 0;
+ }
goto next;
}
The existing memstick core patch: commit 62c59a8786e6 ("memstick: Skip
allocating card when removing host") sets host->removing in
memstick_remove_host(),but still exists a critical time window where
memstick_check can run after host->eject is set but before removing is set.
In the rtsx_usb_ms driver, the problematic sequence is:
rtsx_usb_ms_drv_remove: memstick_check:
host->eject = true
cancel_work_sync(handle_req) if(!host->removing)
... memstick_alloc_card()
memstick_set_rw_addr()
memstick_new_req()
rtsx_usb_ms_request()
if(!host->eject)
skip schedule_work
wait_for_completion()
memstick_remove_host: [blocks indefinitely]
host->removing = true
flush_workqueue()
[block]
1. rtsx_usb_ms_drv_remove sets host->eject = true
2. cancel_work_sync(&host->handle_req) runs
3. memstick_check work may be executed here <-- danger window
4. memstick_remove_host sets removing = 1
During this window (step 3), memstick_check calls memstick_alloc_card,
which may indefinitely waiting for mrq_complete completion that will
never occur because rtsx_usb_ms_request sees eject=true and skips
scheduling work, memstick_set_rw_addr waits forever for completion.
This causes a deadlock when memstick_remove_host tries to flush_workqueue,
waiting for memstick_check to complete, while memstick_check is blocked
waiting for mrq_complete completion.
Fix this by setting removing=true at the start of rtsx_usb_ms_drv_remove,
before any work cancellation. This ensures memstick_check will see the
removing flag immediately and exit early, avoiding the deadlock.
Fixes: 62c59a8786e6 ("memstick: Skip allocating card when removing host")
Signed-off-by: Jiayi Li <lijiayi(a)kylinos.cn>
Cc: stable(a)vger.kernel.org
---
v1 -> v2:
Added Cc: stable(a)vger.kernel.org
---
drivers/memstick/core/memstick.c | 1 -
drivers/memstick/host/rtsx_usb_ms.c | 1 +
2 files changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c
index 043b9ec756ff..95e65f4958f2 100644
--- a/drivers/memstick/core/memstick.c
+++ b/drivers/memstick/core/memstick.c
@@ -555,7 +555,6 @@ EXPORT_SYMBOL(memstick_add_host);
*/
void memstick_remove_host(struct memstick_host *host)
{
- host->removing = 1;
flush_workqueue(workqueue);
mutex_lock(&host->lock);
if (host->card)
diff --git a/drivers/memstick/host/rtsx_usb_ms.c b/drivers/memstick/host/rtsx_usb_ms.c
index 3878136227e4..5b5e9354fb2e 100644
--- a/drivers/memstick/host/rtsx_usb_ms.c
+++ b/drivers/memstick/host/rtsx_usb_ms.c
@@ -812,6 +812,7 @@ static void rtsx_usb_ms_drv_remove(struct platform_device *pdev)
int err;
host->eject = true;
+ msh->removing = true;
cancel_work_sync(&host->handle_req);
cancel_delayed_work_sync(&host->poll_card);
--
2.47.1
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 24e066ded45b8147b79c7455ac43a5bff7b5f378
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081819-amnesty-yen-4c26@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 24e066ded45b8147b79c7455ac43a5bff7b5f378 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Fri, 11 Jul 2025 20:48:23 +0100
Subject: [PATCH] btrfs: don't skip remaining extrefs if dir not found during
log replay
During log replay, at add_inode_ref(), if we have an extref item that
contains multiple extrefs and one of them points to a directory that does
not exist in the subvolume tree, we are supposed to ignore it and process
the remaining extrefs encoded in the extref item, since each extref can
point to a different parent inode. However when that happens we just
return from the function and ignore the remaining extrefs.
The problem has been around since extrefs were introduced, in commit
f186373fef00 ("btrfs: extended inode refs"), but it's hard to hit in
practice because getting extref items encoding multiple extref requires
getting a hash collision when computing the offset of the extref's
key. The offset if computed like this:
key.offset = btrfs_extref_hash(dir_ino, name->name, name->len);
and btrfs_extref_hash() is just a wrapper around crc32c().
Fix this by moving to next iteration of the loop when we don't find
the parent directory that an extref points to.
Fixes: f186373fef00 ("btrfs: extended inode refs")
CC: stable(a)vger.kernel.org # 6.1+
Reviewed-by: Boris Burkov <boris(a)bur.io>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index e3c77f3d092c..467b69a4ef3b 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1433,6 +1433,8 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
if (log_ref_ver) {
ret = extref_get_fields(eb, ref_ptr, &name,
&ref_index, &parent_objectid);
+ if (ret)
+ goto out;
/*
* parent object can change from one array
* item to another.
@@ -1449,16 +1451,23 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
* the loop when getting the first
* parent dir.
*/
- if (ret == -ENOENT)
+ if (ret == -ENOENT) {
+ /*
+ * The next extref may refer to
+ * another parent dir that
+ * exists, so continue.
+ */
ret = 0;
+ goto next;
+ }
goto out;
}
}
} else {
ret = ref_get_fields(eb, ref_ptr, &name, &ref_index);
+ if (ret)
+ goto out;
}
- if (ret)
- goto out;
ret = inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode),
ref_index, &name);
@@ -1492,10 +1501,11 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
}
/* Else, ret == 1, we already have a perfect match, we're done. */
+next:
ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + name.len;
kfree(name.name);
name.name = NULL;
- if (log_ref_ver) {
+ if (log_ref_ver && dir) {
iput(&dir->vfs_inode);
dir = NULL;
}
Dear 5.15.y maintainers,
A f2fs patch should be backported from upstream mainline to the stable
5.15.y branch. The patch's information is shown as below:
[Subject]
f2fs: fix to avoid UAF in f2fs_sync_inode_meta()
[Upstream commit ID]
7c30d79930132466f5be7d0b57add14d1a016bda
[Kernel version]
5.15.y
[Why]
This patch fixes the issue where the f2fs_inode_info.gdirty_list is not
deleted when evicting the inode. This would cause the gdirty_list to
remain incorrectly linked when the f2fs_inode_info is reallocated, which
in turn would be detected by __list_del_entry_valid during list_del_init.
On the Android 5.15 U arm platform, the issue that could be reproduced
within 24 hours has not recurred for a week after applying this patch.
Thanks,
Jiucheng
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 1c320d8e92925bb7615f83a7b6e3f402a5c2ca63
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081855-sensation-survivor-0dde@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1c320d8e92925bb7615f83a7b6e3f402a5c2ca63 Mon Sep 17 00:00:00 2001
From: Baokun Li <libaokun1(a)huawei.com>
Date: Mon, 14 Jul 2025 21:03:20 +0800
Subject: [PATCH] ext4: fix zombie groups in average fragment size lists
Groups with no free blocks shouldn't be in any average fragment size list.
However, when all blocks in a group are allocated(i.e., bb_fragments or
bb_free is 0), we currently skip updating the average fragment size, which
means the group isn't removed from its previous s_mb_avg_fragment_size[old]
list.
This created "zombie" groups that were always skipped during traversal as
they couldn't satisfy any block allocation requests, negatively impacting
traversal efficiency.
Therefore, when a group becomes completely full, bb_avg_fragment_size_order
is now set to -1. If the old order was not -1, a removal operation is
performed; if the new order is not -1, an insertion is performed.
Fixes: 196e402adf2e ("ext4: improve cr 0 / cr 1 group scanning")
CC: stable(a)vger.kernel.org
Signed-off-by: Baokun Li <libaokun1(a)huawei.com>
Reviewed-by: Jan Kara <jack(a)suse.cz>
Reviewed-by: Zhang Yi <yi.zhang(a)huawei.com>
Link: https://patch.msgid.link/20250714130327.1830534-11-libaokun1@huawei.com
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 6d98f2a5afc4..72b20fc52bbf 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -841,30 +841,30 @@ static void
mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- int new_order;
+ int new, old;
- if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_fragments == 0)
+ if (!test_opt2(sb, MB_OPTIMIZE_SCAN))
return;
- new_order = mb_avg_fragment_size_order(sb,
- grp->bb_free / grp->bb_fragments);
- if (new_order == grp->bb_avg_fragment_size_order)
+ old = grp->bb_avg_fragment_size_order;
+ new = grp->bb_fragments == 0 ? -1 :
+ mb_avg_fragment_size_order(sb, grp->bb_free / grp->bb_fragments);
+ if (new == old)
return;
- if (grp->bb_avg_fragment_size_order != -1) {
- write_lock(&sbi->s_mb_avg_fragment_size_locks[
- grp->bb_avg_fragment_size_order]);
+ if (old >= 0) {
+ write_lock(&sbi->s_mb_avg_fragment_size_locks[old]);
list_del(&grp->bb_avg_fragment_size_node);
- write_unlock(&sbi->s_mb_avg_fragment_size_locks[
- grp->bb_avg_fragment_size_order]);
+ write_unlock(&sbi->s_mb_avg_fragment_size_locks[old]);
+ }
+
+ grp->bb_avg_fragment_size_order = new;
+ if (new >= 0) {
+ write_lock(&sbi->s_mb_avg_fragment_size_locks[new]);
+ list_add_tail(&grp->bb_avg_fragment_size_node,
+ &sbi->s_mb_avg_fragment_size[new]);
+ write_unlock(&sbi->s_mb_avg_fragment_size_locks[new]);
}
- grp->bb_avg_fragment_size_order = new_order;
- write_lock(&sbi->s_mb_avg_fragment_size_locks[
- grp->bb_avg_fragment_size_order]);
- list_add_tail(&grp->bb_avg_fragment_size_node,
- &sbi->s_mb_avg_fragment_size[grp->bb_avg_fragment_size_order]);
- write_unlock(&sbi->s_mb_avg_fragment_size_locks[
- grp->bb_avg_fragment_size_order]);
}
/*
The patch below does not apply to the 6.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.16.y
git checkout FETCH_HEAD
git cherry-pick -x c08ba63078dd6046c279df37795cb77e784e1ec9
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081839-unmoral-mulch-990f@gregkh' --subject-prefix 'PATCH 6.16.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c08ba63078dd6046c279df37795cb77e784e1ec9 Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky(a)amd.com>
Date: Wed, 16 Jul 2025 15:41:35 -0500
Subject: [PATCH] virt: sev-guest: Satisfy linear mapping requirement in
get_derived_key()
Commit
7ffeb2fc2670 ("x86/sev: Document requirement for linear mapping of guest request buffers")
added a check that requires the guest request buffers to be in the linear
mapping. The get_derived_key() function was passing a buffer that was
allocated on the stack, resulting in the call to snp_send_guest_request()
returning an error.
Update the get_derived_key() function to use an allocated buffer instead
of a stack buffer.
Fixes: 7ffeb2fc2670 ("x86/sev: Document requirement for linear mapping of guest request buffers")
Signed-off-by: Tom Lendacky <thomas.lendacky(a)amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Cc: <stable(a)kernel.org>
Link: https://lore.kernel.org/9b764ca9fc79199a091aac684c4926e2080ca7a8.1752698495…
diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c
index d2b3ae7113ab..b01ec99106cd 100644
--- a/drivers/virt/coco/sev-guest/sev-guest.c
+++ b/drivers/virt/coco/sev-guest/sev-guest.c
@@ -116,13 +116,11 @@ static int get_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_io
static int get_derived_key(struct snp_guest_dev *snp_dev, struct snp_guest_request_ioctl *arg)
{
+ struct snp_derived_key_resp *derived_key_resp __free(kfree) = NULL;
struct snp_derived_key_req *derived_key_req __free(kfree) = NULL;
- struct snp_derived_key_resp derived_key_resp = {0};
struct snp_msg_desc *mdesc = snp_dev->msg_desc;
struct snp_guest_req req = {};
int rc, resp_len;
- /* Response data is 64 bytes and max authsize for GCM is 16 bytes. */
- u8 buf[64 + 16];
if (!arg->req_data || !arg->resp_data)
return -EINVAL;
@@ -132,8 +130,9 @@ static int get_derived_key(struct snp_guest_dev *snp_dev, struct snp_guest_reque
* response payload. Make sure that it has enough space to cover the
* authtag.
*/
- resp_len = sizeof(derived_key_resp.data) + mdesc->ctx->authsize;
- if (sizeof(buf) < resp_len)
+ resp_len = sizeof(derived_key_resp->data) + mdesc->ctx->authsize;
+ derived_key_resp = kzalloc(resp_len, GFP_KERNEL_ACCOUNT);
+ if (!derived_key_resp)
return -ENOMEM;
derived_key_req = kzalloc(sizeof(*derived_key_req), GFP_KERNEL_ACCOUNT);
@@ -149,23 +148,21 @@ static int get_derived_key(struct snp_guest_dev *snp_dev, struct snp_guest_reque
req.vmpck_id = mdesc->vmpck_id;
req.req_buf = derived_key_req;
req.req_sz = sizeof(*derived_key_req);
- req.resp_buf = buf;
+ req.resp_buf = derived_key_resp;
req.resp_sz = resp_len;
req.exit_code = SVM_VMGEXIT_GUEST_REQUEST;
rc = snp_send_guest_request(mdesc, &req);
arg->exitinfo2 = req.exitinfo2;
- if (rc)
- return rc;
-
- memcpy(derived_key_resp.data, buf, sizeof(derived_key_resp.data));
- if (copy_to_user((void __user *)arg->resp_data, &derived_key_resp,
- sizeof(derived_key_resp)))
- rc = -EFAULT;
+ if (!rc) {
+ if (copy_to_user((void __user *)arg->resp_data, derived_key_resp,
+ sizeof(derived_key_resp->data)))
+ rc = -EFAULT;
+ }
/* The response buffer contains the sensitive data, explicitly clear it. */
- memzero_explicit(buf, sizeof(buf));
- memzero_explicit(&derived_key_resp, sizeof(derived_key_resp));
+ memzero_explicit(derived_key_resp, sizeof(*derived_key_resp));
+
return rc;
}
Hi,
Since kernel v6.14.3, when using wireless to connect to my home router
on my laptop, my wireless connection slows down to unusable speeds.
More specifically, since kernel 6.14.3, when connecting to the
wireless networks of my OpenWRT Router on my Lenovo IdeaPad Slim 15
16AKP10 laptop,
either a 2.4ghz or a 5ghz network, the connection speed drops down to
0.1-0.2 Mbps download and 0 Mbps upload when measured using
speedtest-cli.
My laptop uses an mt7925 chip according to the loaded driver and firmware.
Detailed Description:
As mentioned above, my wireless connection becomes unusable when using
linux 6.14.3 and above, dropping speeds to almost 0 Mbps,
even when standing next to my router. Further, pinging archlinux.org
results in "Temporary failure in name resolution".
Any other wireless device in my house can successfully connect to my
router and properly use the internet with good speeds, eg. iphones,
ipads, raspberry pi and a windows laptop.
When using my Lenovo laptop on a kernel 6.14.3 or higher to connect to
other access points, such as my iPhone's hotspot and some TPLink and
Zyxel routers - the connection speed is good, and there are no issues,
which makes me believe there's something going on with my OpenWRT
configuration in conjunction with a commit introduced on kernel 6.14.3
for the mt7925e module as detailed below.
I have followed a related issue previously reported on the mailing
list regarding a problem with the same wifi chip on kernel 6.14.3, but
the merged fix doesn't seem to fix my problem:
https://lore.kernel.org/linux-mediatek/EmWnO5b-acRH1TXbGnkx41eJw654vmCR-8_x…
I've tested stable builds of 6.15 as well up to 6.15.9 in the last
month, which also do not fix the problem.
I've also built and bisected v6.14 on june using guides on the Arch
Linux wiki, for the following bad commit, same as the previously
mentioned reported issue:
[80007d3f92fd018d0a052a706400e976b36e3c87] wifi: mt76: mt7925:
integrate *mlo_sta_cmd and *sta_cmd
Testing further this week, I cloned mainline after 6.16 was released,
built and tested it, and the issue still persists.
I reverted the following commits on mainline and retested, to
successfully see good wireless speeds:
[0aa8496adda570c2005410a30df963a16643a3dc] wifi: mt76: mt7925: fix
missing hdr_trans_tlv command for broadcast wtbl
[cb1353ef34735ec1e5d9efa1fe966f05ff1dc1e1] wifi: mt76: mt7925:
integrate *mlo_sta_cmd and *sta_cmd
Then, reverting *only* 0aa8496adda570c2005410a30df963a16643a3dc causes
the issue to reproduce, which confirms the issue is caused by commit
cb1353ef34735ec1e5d9efa1fe966f05ff1dc1e1 on mainline.
I've attached the following files to a bugzilla ticket:
- lspci -nnk output:
https://bugzilla.kernel.org/attachment.cgi?id=308466
- dmesg output:
https://bugzilla.kernel.org/attachment.cgi?id=308465
- .config for the built mainline kernel:
https://bugzilla.kernel.org/attachment.cgi?id=308467
More information:
OS Distribution: Arch Linux
Linux build information from /proc/version:
Linux version 6.16.0linux-mainline-11853-g21be711c0235
(tal@arch-debug) (gcc (GCC) 15.1.1 20250729, GNU ld (GNU Binutils)
2.45.0) #3 SMP PREEMPT_DYNAMIC
OpenWRT Version on my Router: 24.10.2
Laptop Hardware:
- Lenovo IdeaPad Slim 15 16AKP10 laptop (x86_64 Ryzen AI 350 CPU)
- Network device as reported by lscpi: 14c3:7925
- Network modules and driver in use: mt7925e
- mediatek chip firmware as of dmesg:
HW/SW Version: 0x8a108a10, Build Time: 20250526152947a
WM Firmware Version: ____000000, Build Time: 20250526153043
Referencing regzbot:
#regzbot introduced: 80007d3f92fd018d0a052a706400e976b36e3c87
Please let me know if any other information is needed, or if there is
anything else that I can test on my end.
Thanks,
Tal Inbar
From: Josef Bacik <josef(a)toxicpanda.com>
[ Upstream commit 17f46b803d4f23c66cacce81db35fef3adb8f2af ]
In production we have been hitting the following warning consistently
------------[ cut here ]------------
refcount_t: underflow; use-after-free.
WARNING: CPU: 17 PID: 1800359 at lib/refcount.c:28 refcount_warn_saturate+0x9c/0xe0
Workqueue: nfsiod nfs_direct_write_schedule_work [nfs]
RIP: 0010:refcount_warn_saturate+0x9c/0xe0
PKRU: 55555554
Call Trace:
<TASK>
? __warn+0x9f/0x130
? refcount_warn_saturate+0x9c/0xe0
? report_bug+0xcc/0x150
? handle_bug+0x3d/0x70
? exc_invalid_op+0x16/0x40
? asm_exc_invalid_op+0x16/0x20
? refcount_warn_saturate+0x9c/0xe0
nfs_direct_write_schedule_work+0x237/0x250 [nfs]
process_one_work+0x12f/0x4a0
worker_thread+0x14e/0x3b0
? ZSTD_getCParams_internal+0x220/0x220
kthread+0xdc/0x120
? __btf_name_valid+0xa0/0xa0
ret_from_fork+0x1f/0x30
This is because we're completing the nfs_direct_request twice in a row.
The source of this is when we have our commit requests to submit, we
process them and send them off, and then in the completion path for the
commit requests we have
if (nfs_commit_end(cinfo.mds))
nfs_direct_write_complete(dreq);
However since we're submitting asynchronous requests we sometimes have
one that completes before we submit the next one, so we end up calling
complete on the nfs_direct_request twice.
The only other place we use nfs_generic_commit_list() is in
__nfs_commit_inode, which wraps this call in a
nfs_commit_begin();
nfs_commit_end();
Which is a common pattern for this style of completion handling, one
that is also repeated in the direct code with get_dreq()/put_dreq()
calls around where we process events as well as in the completion paths.
Fix this by using the same pattern for the commit requests.
Before with my 200 node rocksdb stress running this warning would pop
every 10ish minutes. With my patch the stress test has been running for
several hours without popping.
Signed-off-by: Josef Bacik <josef(a)toxicpanda.com>
Cc: stable(a)vger.kernel.org # 5.4
Signed-off-by: Trond Myklebust <trond.myklebust(a)hammerspace.com>
[ chanho : Backports v5.4.y, commit 133a48abf6ec (NFS: Fix up commit deadlocks)
is needed to use nfs_commit_end ]
Signed-off-by: Chanho Min <chanho.min(a)lge.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
fs/nfs/direct.c | 11 +++++++++--
fs/nfs/write.c | 2 +-
include/linux/nfs_fs.h | 1 +
3 files changed, 11 insertions(+), 3 deletions(-)
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 32dc176ea1aba..982f0eeac3dfa 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -727,10 +727,17 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
LIST_HEAD(mds_list);
nfs_init_cinfo_from_dreq(&cinfo, dreq);
+ nfs_commit_begin(cinfo.mds);
nfs_scan_commit(dreq->inode, &mds_list, &cinfo);
res = nfs_generic_commit_list(dreq->inode, &mds_list, 0, &cinfo);
- if (res < 0) /* res == -ENOMEM */
- nfs_direct_write_reschedule(dreq);
+ if (res < 0) { /* res == -ENOMEM */
+ spin_lock(&dreq->lock);
+ if (dreq->flags == 0)
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ spin_unlock(&dreq->lock);
+ }
+ if (nfs_commit_end(cinfo.mds))
+ nfs_direct_write_complete(dreq);
}
static void nfs_direct_write_schedule_work(struct work_struct *work)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c9895316fc070..f3c672b11c4fc 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1653,7 +1653,7 @@ static int wait_on_commit(struct nfs_mds_commit_info *cinfo)
!atomic_read(&cinfo->rpcs_out));
}
-static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo)
+void nfs_commit_begin(struct nfs_mds_commit_info *cinfo)
{
atomic_inc(&cinfo->rpcs_out);
}
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index a96b116cc9224..b2b441f3572be 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -549,6 +549,7 @@ extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
extern int nfs_commit_inode(struct inode *, int);
extern struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail);
extern void nfs_commit_free(struct nfs_commit_data *data);
+void nfs_commit_begin(struct nfs_mds_commit_info *cinfo);
bool nfs_commit_end(struct nfs_mds_commit_info *cinfo);
static inline int
Define ARCH_PAGE_TABLE_SYNC_MASK and arch_sync_kernel_mappings() to ensure
page tables are properly synchronized when calling
p*d_populate_kernel().
For 5-level paging, synchronization is performed via
pgd_populate_kernel(). In 4-level paging, pgd_populate() is a no-op,
so synchronization is instead performed at the P4D level via
p4d_populate_kernel().
This fixes intermittent boot failures on systems using 4-level paging
and a large amount of persistent memory:
BUG: unable to handle page fault for address: ffffe70000000034
#PF: supervisor write access in kernel mode
#PF: error_code(0x0002) - not-present page
PGD 0 P4D 0
Oops: 0002 [#1] SMP NOPTI
RIP: 0010:__init_single_page+0x9/0x6d
Call Trace:
<TASK>
__init_zone_device_page+0x17/0x5d
memmap_init_zone_device+0x154/0x1bb
pagemap_range+0x2e0/0x40f
memremap_pages+0x10b/0x2f0
devm_memremap_pages+0x1e/0x60
dev_dax_probe+0xce/0x2ec [device_dax]
dax_bus_probe+0x6d/0xc9
[... snip ...]
</TASK>
It also fixes a crash in vmemmap_set_pmd() caused by accessing vmemmap
before sync_global_pgds() [1]:
BUG: unable to handle page fault for address: ffffeb3ff1200000
#PF: supervisor write access in kernel mode
#PF: error_code(0x0002) - not-present page
PGD 0 P4D 0
Oops: Oops: 0002 [#1] PREEMPT SMP NOPTI
Tainted: [W]=WARN
RIP: 0010:vmemmap_set_pmd+0xff/0x230
<TASK>
vmemmap_populate_hugepages+0x176/0x180
vmemmap_populate+0x34/0x80
__populate_section_memmap+0x41/0x90
sparse_add_section+0x121/0x3e0
__add_pages+0xba/0x150
add_pages+0x1d/0x70
memremap_pages+0x3dc/0x810
devm_memremap_pages+0x1c/0x60
xe_devm_add+0x8b/0x100 [xe]
xe_tile_init_noalloc+0x6a/0x70 [xe]
xe_device_probe+0x48c/0x740 [xe]
[... snip ...]
Cc: <stable(a)vger.kernel.org>
Fixes: 8d400913c231 ("x86/vmemmap: handle unpopulated sub-pmd ranges")
Closes: https://lore.kernel.org/linux-mm/20250311114420.240341-1-gwan-gyeong.mun@in… [1]
Suggested-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Acked-by: Kiryl Shutsemau <kas(a)kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt(a)kernel.org>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Signed-off-by: Harry Yoo <harry.yoo(a)oracle.com>
---
arch/x86/include/asm/pgtable_64_types.h | 3 +++
arch/x86/mm/init_64.c | 18 ++++++++++++++++++
2 files changed, 21 insertions(+)
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 4604f924d8b8..7eb61ef6a185 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -36,6 +36,9 @@ static inline bool pgtable_l5_enabled(void)
#define pgtable_l5_enabled() cpu_feature_enabled(X86_FEATURE_LA57)
#endif /* USE_EARLY_PGTABLE_L5 */
+#define ARCH_PAGE_TABLE_SYNC_MASK \
+ (pgtable_l5_enabled() ? PGTBL_PGD_MODIFIED : PGTBL_P4D_MODIFIED)
+
extern unsigned int pgdir_shift;
extern unsigned int ptrs_per_p4d;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 76e33bd7c556..b9426fce5f3e 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -223,6 +223,24 @@ static void sync_global_pgds(unsigned long start, unsigned long end)
sync_global_pgds_l4(start, end);
}
+/*
+ * Make kernel mappings visible in all page tables in the system.
+ * This is necessary except when the init task populates kernel mappings
+ * during the boot process. In that case, all processes originating from
+ * the init task copies the kernel mappings, so there is no issue.
+ * Otherwise, missing synchronization could lead to kernel crashes due
+ * to missing page table entries for certain kernel mappings.
+ *
+ * Synchronization is performed at the top level, which is the PGD in
+ * 5-level paging systems. But in 4-level paging systems, however,
+ * pgd_populate() is a no-op, so synchronization is done at the P4D level.
+ * sync_global_pgds() handles this difference between paging levels.
+ */
+void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
+{
+ sync_global_pgds(start, end);
+}
+
/*
* NOTE: This function is marked __ref because it calls __init function
* (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
--
2.43.0
Move ARCH_PAGE_TABLE_SYNC_MASK and arch_sync_kernel_mappings() to
linux/pgtable.h so that they can be used outside of vmalloc and ioremap.
Cc: <stable(a)vger.kernel.org>
Fixes: 8d400913c231 ("x86/vmemmap: handle unpopulated sub-pmd ranges")
Acked-by: Kiryl Shutsemau <kas(a)kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt(a)kernel.org>
Reviewed-by: "Uladzislau Rezki (Sony)" <urezki(a)gmail.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Signed-off-by: Harry Yoo <harry.yoo(a)oracle.com>
---
include/linux/pgtable.h | 16 ++++++++++++++++
include/linux/vmalloc.h | 16 ----------------
2 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 4c035637eeb7..ba699df6ef69 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1467,6 +1467,22 @@ static inline void modify_prot_commit_ptes(struct vm_area_struct *vma, unsigned
}
#endif
+/*
+ * Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values
+ * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings()
+ * needs to be called.
+ */
+#ifndef ARCH_PAGE_TABLE_SYNC_MASK
+#define ARCH_PAGE_TABLE_SYNC_MASK 0
+#endif
+
+/*
+ * There is no default implementation for arch_sync_kernel_mappings(). It is
+ * relied upon the compiler to optimize calls out if ARCH_PAGE_TABLE_SYNC_MASK
+ * is 0.
+ */
+void arch_sync_kernel_mappings(unsigned long start, unsigned long end);
+
#endif /* CONFIG_MMU */
/*
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index fdc9aeb74a44..2759dac6be44 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -219,22 +219,6 @@ extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
int vmap_pages_range(unsigned long addr, unsigned long end, pgprot_t prot,
struct page **pages, unsigned int page_shift);
-/*
- * Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values
- * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings()
- * needs to be called.
- */
-#ifndef ARCH_PAGE_TABLE_SYNC_MASK
-#define ARCH_PAGE_TABLE_SYNC_MASK 0
-#endif
-
-/*
- * There is no default implementation for arch_sync_kernel_mappings(). It is
- * relied upon the compiler to optimize calls out if ARCH_PAGE_TABLE_SYNC_MASK
- * is 0.
- */
-void arch_sync_kernel_mappings(unsigned long start, unsigned long end);
-
/*
* Lowlevel-APIs (not for driver use!)
*/
--
2.43.0