function dualshock4_get_calibration_data allocates memory to pointer
buf. however the function may exit prematurely due to transfer_failure
in this case it does not handle freeing memory.
this patch handles memory deallocation at exit.
Reported-by: syzbot+4f5f81e1456a1f645bf8(a)syzkaller.appspotmail.com
Tested-by: syzbot+4f5f81e1456a1f645bf8(a)syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/691560c4.a70a0220.3124cb.0019.GAE@google.com/T/
Fixes: 947992c7fa9e0 ("HID: playstation: DS4: Fix calibration workaround for clone devices")
Cc: stable(a)vger.kernel.org
Signed-off-by: Eslam Khafagy <eslam.medhat1993(a)gmail.com>
---
v2:
* Adding tag "Cc: stable(a)vger.kernel.org"
v1: https://lore.kernel.org/all/20251115022323.1395726-1-eslam.medhat1993@gmail…
---
drivers/hid/hid-playstation.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/hid/hid-playstation.c b/drivers/hid/hid-playstation.c
index 63f6eb9030d1..fef81b7e27c1 100644
--- a/drivers/hid/hid-playstation.c
+++ b/drivers/hid/hid-playstation.c
@@ -1992,9 +1992,6 @@ static int dualshock4_get_calibration_data(struct dualshock4 *ds4)
acc_z_plus = get_unaligned_le16(&buf[31]);
acc_z_minus = get_unaligned_le16(&buf[33]);
- /* Done parsing the buffer, so let's free it. */
- kfree(buf);
-
/*
* Set gyroscope calibration and normalization parameters.
* Data values will be normalized to 1/DS4_GYRO_RES_PER_DEG_S degree/s.
@@ -2041,6 +2038,10 @@ static int dualshock4_get_calibration_data(struct dualshock4 *ds4)
ds4->accel_calib_data[2].sens_denom = range_2g;
transfer_failed:
+ /* First free buf if still allocated */
+ if(buf)
+ kfree(buf);
+
/*
* Sanity check gyro calibration data. This is needed to prevent crashes
* during report handling of virtual, clone or broken devices not implementing
--
2.43.0
The original implementation of memcpy_sglist() was broken because it
didn't handle scatterlists that describe exactly the same memory, which
is a case that many callers rely on. The current implementation is
broken too because it calls the skcipher_walk functions which can fail.
It ignores any errors from those functions.
Fix it by replacing it with a new implementation written from scratch.
It always succeeds. It's also a bit faster, since it avoids the
overhead of skcipher_walk. skcipher_walk includes a lot of
functionality (such as alignmask handling) that's irrelevant here.
Reported-by: Colin Ian King <coking(a)nvidia.com>
Closes: https://lore.kernel.org/r/20251114122620.111623-1-coking@nvidia.com
Fixes: 131bdceca1f0 ("crypto: scatterwalk - Add memcpy_sglist")
Fixes: 0f8d42bf128d ("crypto: scatterwalk - Move skcipher walk and use it for memcpy_sglist")
Cc: stable(a)vger.kernel.org
Signed-off-by: Eric Biggers <ebiggers(a)kernel.org>
---
crypto/scatterwalk.c | 97 +++++++++++++++++++++++++++++++-----
include/crypto/scatterwalk.h | 52 +++++++++++--------
2 files changed, 115 insertions(+), 34 deletions(-)
diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c
index 1d010e2a1b1a..b95e5974e327 100644
--- a/crypto/scatterwalk.c
+++ b/crypto/scatterwalk.c
@@ -99,30 +99,101 @@ void memcpy_to_sglist(struct scatterlist *sg, unsigned int start,
scatterwalk_start_at_pos(&walk, sg, start);
memcpy_to_scatterwalk(&walk, buf, nbytes);
}
EXPORT_SYMBOL_GPL(memcpy_to_sglist);
+/**
+ * memcpy_sglist() - Copy data from one scatterlist to another
+ * @dst: The destination scatterlist. Can be NULL if @nbytes == 0.
+ * @src: The source scatterlist. Can be NULL if @nbytes == 0.
+ * @nbytes: Number of bytes to copy
+ *
+ * The scatterlists can describe exactly the same memory, in which case this
+ * function is a no-op. No other overlaps are supported.
+ *
+ * Context: Any context
+ */
void memcpy_sglist(struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
- struct skcipher_walk walk = {};
+ unsigned int src_offset, dst_offset;
- if (unlikely(nbytes == 0)) /* in case sg == NULL */
+ if (unlikely(nbytes == 0)) /* in case src and/or dst is NULL */
return;
- walk.total = nbytes;
-
- scatterwalk_start(&walk.in, src);
- scatterwalk_start(&walk.out, dst);
+ src_offset = src->offset;
+ dst_offset = dst->offset;
+ for (;;) {
+ /* Compute the length to copy this step. */
+ unsigned int len = min3(src->offset + src->length - src_offset,
+ dst->offset + dst->length - dst_offset,
+ nbytes);
+ struct page *src_page = sg_page(src);
+ struct page *dst_page = sg_page(dst);
+ const void *src_virt;
+ void *dst_virt;
+
+ if (IS_ENABLED(CONFIG_HIGHMEM)) {
+ /* HIGHMEM: we may have to actually map the pages. */
+ const unsigned int src_oip = offset_in_page(src_offset);
+ const unsigned int dst_oip = offset_in_page(dst_offset);
+ const unsigned int limit = PAGE_SIZE;
+
+ /* Further limit len to not cross a page boundary. */
+ len = min3(len, limit - src_oip, limit - dst_oip);
+
+ /* Compute the source and destination pages. */
+ src_page += src_offset / PAGE_SIZE;
+ dst_page += dst_offset / PAGE_SIZE;
+
+ if (src_page != dst_page) {
+ /* Copy between different pages. */
+ memcpy_page(dst_page, dst_oip,
+ src_page, src_oip, len);
+ flush_dcache_page(dst_page);
+ } else if (src_oip != dst_oip) {
+ /* Copy between different parts of same page. */
+ dst_virt = kmap_local_page(dst_page);
+ memcpy(dst_virt + dst_oip, dst_virt + src_oip,
+ len);
+ kunmap_local(dst_virt);
+ flush_dcache_page(dst_page);
+ } /* Else, it's the same memory. No action needed. */
+ } else {
+ /*
+ * !HIGHMEM: no mapping needed. Just work in the linear
+ * buffer of each sg entry. Note that we can cross page
+ * boundaries, as they are not significant in this case.
+ */
+ src_virt = page_address(src_page) + src_offset;
+ dst_virt = page_address(dst_page) + dst_offset;
+ if (src_virt != dst_virt) {
+ memcpy(dst_virt, src_virt, len);
+ if (ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE)
+ __scatterwalk_flush_dcache_pages(
+ dst_page, dst_offset, len);
+ } /* Else, it's the same memory. No action needed. */
+ }
+ nbytes -= len;
+ if (nbytes == 0) /* No more to copy? */
+ break;
- skcipher_walk_first(&walk, true);
- do {
- if (walk.src.virt.addr != walk.dst.virt.addr)
- memcpy(walk.dst.virt.addr, walk.src.virt.addr,
- walk.nbytes);
- skcipher_walk_done(&walk, 0);
- } while (walk.nbytes);
+ /*
+ * There's more to copy. Advance the offsets by the length
+ * copied this step, and advance the sg entries as needed.
+ */
+ src_offset += len;
+ if (src_offset >= src->offset + src->length) {
+ src = sg_next(src);
+ src_offset = src->offset;
+ }
+ dst_offset += len;
+ if (dst_offset >= dst->offset + dst->length) {
+ dst = sg_next(dst);
+ dst_offset = dst->offset;
+ }
+ }
}
EXPORT_SYMBOL_GPL(memcpy_sglist);
struct scatterlist *scatterwalk_ffwd(struct scatterlist dst[2],
struct scatterlist *src,
diff --git a/include/crypto/scatterwalk.h b/include/crypto/scatterwalk.h
index 83d14376ff2b..f485454e3955 100644
--- a/include/crypto/scatterwalk.h
+++ b/include/crypto/scatterwalk.h
@@ -225,10 +225,38 @@ static inline void scatterwalk_done_src(struct scatter_walk *walk,
{
scatterwalk_unmap(walk);
scatterwalk_advance(walk, nbytes);
}
+/*
+ * Flush the dcache of any pages that overlap the region
+ * [offset, offset + nbytes) relative to base_page.
+ *
+ * This should be called only when ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE, to ensure
+ * that all relevant code (including the call to sg_page() in the caller, if
+ * applicable) gets fully optimized out when !ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE.
+ */
+static inline void __scatterwalk_flush_dcache_pages(struct page *base_page,
+ unsigned int offset,
+ unsigned int nbytes)
+{
+ unsigned int num_pages;
+
+ base_page += offset / PAGE_SIZE;
+ offset %= PAGE_SIZE;
+
+ /*
+ * This is an overflow-safe version of
+ * num_pages = DIV_ROUND_UP(offset + nbytes, PAGE_SIZE).
+ */
+ num_pages = nbytes / PAGE_SIZE;
+ num_pages += DIV_ROUND_UP(offset + (nbytes % PAGE_SIZE), PAGE_SIZE);
+
+ for (unsigned int i = 0; i < num_pages; i++)
+ flush_dcache_page(base_page + i);
+}
+
/**
* scatterwalk_done_dst() - Finish one step of a walk of destination scatterlist
* @walk: the scatter_walk
* @nbytes: the number of bytes processed this step, less than or equal to the
* number of bytes that scatterwalk_next() returned.
@@ -238,31 +266,13 @@ static inline void scatterwalk_done_src(struct scatter_walk *walk,
*/
static inline void scatterwalk_done_dst(struct scatter_walk *walk,
unsigned int nbytes)
{
scatterwalk_unmap(walk);
- /*
- * Explicitly check ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE instead of just
- * relying on flush_dcache_page() being a no-op when not implemented,
- * since otherwise the BUG_ON in sg_page() does not get optimized out.
- * This also avoids having to consider whether the loop would get
- * reliably optimized out or not.
- */
- if (ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE) {
- struct page *base_page;
- unsigned int offset;
- int start, end, i;
-
- base_page = sg_page(walk->sg);
- offset = walk->offset;
- start = offset >> PAGE_SHIFT;
- end = start + (nbytes >> PAGE_SHIFT);
- end += (offset_in_page(offset) + offset_in_page(nbytes) +
- PAGE_SIZE - 1) >> PAGE_SHIFT;
- for (i = start; i < end; i++)
- flush_dcache_page(base_page + i);
- }
+ if (ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE)
+ __scatterwalk_flush_dcache_pages(sg_page(walk->sg),
+ walk->offset, nbytes);
scatterwalk_advance(walk, nbytes);
}
void scatterwalk_skip(struct scatter_walk *walk, unsigned int nbytes);
--
2.51.2
The original implementation of memcpy_sglist() was broken because it
didn't handle overlapping scatterlists. The current implementation is
broken too because it calls the skcipher_walk functions which can fail.
It ignores any errors from those functions.
Fix it by replacing it with a new implementation written from scratch.
It always succeeds. It's also a bit faster, since it avoids the
overhead of skcipher_walk. skcipher_walk includes a lot of
functionality (such as alignmask handling) that's irrelevant here.
Reported-by: Colin Ian King <coking(a)nvidia.com>
Closes: https://lore.kernel.org/r/20251114122620.111623-1-coking@nvidia.com
Fixes: 131bdceca1f0 ("crypto: scatterwalk - Add memcpy_sglist")
Fixes: 0f8d42bf128d ("crypto: scatterwalk - Move skcipher walk and use it for memcpy_sglist")
Cc: stable(a)vger.kernel.org
Signed-off-by: Eric Biggers <ebiggers(a)kernel.org>
---
crypto/scatterwalk.c | 103 ++++++++++++++++++++++++++++++-----
include/crypto/scatterwalk.h | 52 +++++++++++-------
2 files changed, 121 insertions(+), 34 deletions(-)
diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c
index 1d010e2a1b1a..af6c17bfcadb 100644
--- a/crypto/scatterwalk.c
+++ b/crypto/scatterwalk.c
@@ -99,30 +99,107 @@ void memcpy_to_sglist(struct scatterlist *sg, unsigned int start,
scatterwalk_start_at_pos(&walk, sg, start);
memcpy_to_scatterwalk(&walk, buf, nbytes);
}
EXPORT_SYMBOL_GPL(memcpy_to_sglist);
+/**
+ * memcpy_sglist() - Copy data from one scatterlist to another
+ * @dst: The destination scatterlist. Can be NULL if @nbytes == 0.
+ * @src: The source scatterlist. Can be NULL if @nbytes == 0.
+ * @nbytes: Number of bytes to copy
+ *
+ * The scatterlists can overlap. Hence this really acts like memmove(), not
+ * memcpy().
+ *
+ * Context: Any context
+ */
void memcpy_sglist(struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
- struct skcipher_walk walk = {};
+ unsigned int src_offset, dst_offset;
- if (unlikely(nbytes == 0)) /* in case sg == NULL */
+ if (unlikely(nbytes == 0)) /* in case src and/or dst is NULL */
return;
- walk.total = nbytes;
-
- scatterwalk_start(&walk.in, src);
- scatterwalk_start(&walk.out, dst);
+ src_offset = src->offset;
+ dst_offset = dst->offset;
+ for (;;) {
+ /* Compute the length to copy this step. */
+ unsigned int len = min3(src->offset + src->length - src_offset,
+ dst->offset + dst->length - dst_offset,
+ nbytes);
+ struct page *src_page = sg_page(src);
+ struct page *dst_page = sg_page(dst);
+ const void *src_virt;
+ void *dst_virt;
+
+ if (IS_ENABLED(CONFIG_HIGHMEM)) {
+ /* HIGHMEM: we may have to actually map the pages. */
+ const unsigned int src_oip = offset_in_page(src_offset);
+ const unsigned int dst_oip = offset_in_page(dst_offset);
+ const unsigned int limit = PAGE_SIZE;
+
+ /* Further limit len to not cross a page boundary. */
+ len = min3(len, limit - src_oip, limit - dst_oip);
+
+ /* Compute the source and destination pages. */
+ src_page += src_offset / PAGE_SIZE;
+ dst_page += dst_offset / PAGE_SIZE;
+
+ if (src_page != dst_page) {
+ /*
+ * Copy between different pages.
+ * No need for memmove(), as the pages differ.
+ */
+ src_virt = kmap_local_page(src_page);
+ dst_virt = kmap_local_page(dst_page);
+ memcpy(dst_virt + dst_oip, src_virt + src_oip,
+ len);
+ flush_dcache_page(dst_page);
+ kunmap_local(dst_virt);
+ kunmap_local(src_virt);
+ } else if (src_oip != dst_oip) {
+ /* Copy between different parts of same page */
+ dst_virt = kmap_local_page(dst_page);
+ memmove(dst_virt + dst_oip, dst_virt + src_oip,
+ len);
+ flush_dcache_page(dst_page);
+ kunmap_local(dst_virt);
+ } /* Exact overlap. No action needed. */
+ } else {
+ /*
+ * !HIGHMEM: no mapping needed. Just work in the linear
+ * buffer of each sg entry.
+ */
+ src_virt = page_address(src_page) + src_offset;
+ dst_virt = page_address(dst_page) + dst_offset;
+ if (src_virt != dst_virt) {
+ memmove(dst_virt, src_virt, len);
+ if (ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE)
+ __scatterwalk_flush_dcache_pages(
+ dst_page, dst_offset, len);
+ }
+ }
+ nbytes -= len;
+ if (nbytes == 0) /* No more to copy? */
+ break;
- skcipher_walk_first(&walk, true);
- do {
- if (walk.src.virt.addr != walk.dst.virt.addr)
- memcpy(walk.dst.virt.addr, walk.src.virt.addr,
- walk.nbytes);
- skcipher_walk_done(&walk, 0);
- } while (walk.nbytes);
+ /*
+ * There's more to copy. Advance the offsets by the length
+ * copied this step, and advance the sg entries as needed.
+ */
+ src_offset += len;
+ if (src_offset >= src->offset + src->length) {
+ src = sg_next(src);
+ src_offset = src->offset;
+ }
+ dst_offset += len;
+ if (dst_offset >= dst->offset + dst->length) {
+ dst = sg_next(dst);
+ dst_offset = dst->offset;
+ }
+ }
}
EXPORT_SYMBOL_GPL(memcpy_sglist);
struct scatterlist *scatterwalk_ffwd(struct scatterlist dst[2],
struct scatterlist *src,
diff --git a/include/crypto/scatterwalk.h b/include/crypto/scatterwalk.h
index 83d14376ff2b..f485454e3955 100644
--- a/include/crypto/scatterwalk.h
+++ b/include/crypto/scatterwalk.h
@@ -225,10 +225,38 @@ static inline void scatterwalk_done_src(struct scatter_walk *walk,
{
scatterwalk_unmap(walk);
scatterwalk_advance(walk, nbytes);
}
+/*
+ * Flush the dcache of any pages that overlap the region
+ * [offset, offset + nbytes) relative to base_page.
+ *
+ * This should be called only when ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE, to ensure
+ * that all relevant code (including the call to sg_page() in the caller, if
+ * applicable) gets fully optimized out when !ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE.
+ */
+static inline void __scatterwalk_flush_dcache_pages(struct page *base_page,
+ unsigned int offset,
+ unsigned int nbytes)
+{
+ unsigned int num_pages;
+
+ base_page += offset / PAGE_SIZE;
+ offset %= PAGE_SIZE;
+
+ /*
+ * This is an overflow-safe version of
+ * num_pages = DIV_ROUND_UP(offset + nbytes, PAGE_SIZE).
+ */
+ num_pages = nbytes / PAGE_SIZE;
+ num_pages += DIV_ROUND_UP(offset + (nbytes % PAGE_SIZE), PAGE_SIZE);
+
+ for (unsigned int i = 0; i < num_pages; i++)
+ flush_dcache_page(base_page + i);
+}
+
/**
* scatterwalk_done_dst() - Finish one step of a walk of destination scatterlist
* @walk: the scatter_walk
* @nbytes: the number of bytes processed this step, less than or equal to the
* number of bytes that scatterwalk_next() returned.
@@ -238,31 +266,13 @@ static inline void scatterwalk_done_src(struct scatter_walk *walk,
*/
static inline void scatterwalk_done_dst(struct scatter_walk *walk,
unsigned int nbytes)
{
scatterwalk_unmap(walk);
- /*
- * Explicitly check ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE instead of just
- * relying on flush_dcache_page() being a no-op when not implemented,
- * since otherwise the BUG_ON in sg_page() does not get optimized out.
- * This also avoids having to consider whether the loop would get
- * reliably optimized out or not.
- */
- if (ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE) {
- struct page *base_page;
- unsigned int offset;
- int start, end, i;
-
- base_page = sg_page(walk->sg);
- offset = walk->offset;
- start = offset >> PAGE_SHIFT;
- end = start + (nbytes >> PAGE_SHIFT);
- end += (offset_in_page(offset) + offset_in_page(nbytes) +
- PAGE_SIZE - 1) >> PAGE_SHIFT;
- for (i = start; i < end; i++)
- flush_dcache_page(base_page + i);
- }
+ if (ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE)
+ __scatterwalk_flush_dcache_pages(sg_page(walk->sg),
+ walk->offset, nbytes);
scatterwalk_advance(walk, nbytes);
}
void scatterwalk_skip(struct scatter_walk *walk, unsigned int nbytes);
--
2.51.2
The quilt patch titled
Subject: mm, swap: fix potential UAF issue for VMA readahead
has been removed from the -mm tree. Its filename was
mm-swap-fix-potential-uaf-issue-for-vma-readahead.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Kairui Song <kasong(a)tencent.com>
Subject: mm, swap: fix potential UAF issue for VMA readahead
Date: Tue, 11 Nov 2025 21:36:08 +0800
Since commit 78524b05f1a3 ("mm, swap: avoid redundant swap device
pinning"), the common helper for allocating and preparing a folio in the
swap cache layer no longer tries to get a swap device reference
internally, because all callers of __read_swap_cache_async are already
holding a swap entry reference. The repeated swap device pinning isn't
needed on the same swap device.
Caller of VMA readahead is also holding a reference to the target entry's
swap device, but VMA readahead walks the page table, so it might encounter
swap entries from other devices, and call __read_swap_cache_async on
another device without holding a reference to it.
So it is possible to cause a UAF when swapoff of device A raced with
swapin on device B, and VMA readahead tries to read swap entries from
device A. It's not easy to trigger, but in theory, it could cause real
issues.
Make VMA readahead try to get the device reference first if the swap
device is a different one from the target entry.
Link: https://lkml.kernel.org/r/20251111-swap-fix-vma-uaf-v1-1-41c660e58562@tence…
Fixes: 78524b05f1a3 ("mm, swap: avoid redundant swap device pinning")
Suggested-by: Huang Ying <ying.huang(a)linux.alibaba.com>
Signed-off-by: Kairui Song <kasong(a)tencent.com>
Acked-by: Chris Li <chrisl(a)kernel.org>
Cc: Baoquan He <bhe(a)redhat.com>
Cc: Barry Song <baohua(a)kernel.org>
Cc: Kemeng Shi <shikemeng(a)huaweicloud.com>
Cc: Nhat Pham <nphamcs(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/swap_state.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
--- a/mm/swap_state.c~mm-swap-fix-potential-uaf-issue-for-vma-readahead
+++ a/mm/swap_state.c
@@ -748,6 +748,8 @@ static struct folio *swap_vma_readahead(
blk_start_plug(&plug);
for (addr = start; addr < end; ilx++, addr += PAGE_SIZE) {
+ struct swap_info_struct *si = NULL;
+
if (!pte++) {
pte = pte_offset_map(vmf->pmd, addr);
if (!pte)
@@ -761,8 +763,19 @@ static struct folio *swap_vma_readahead(
continue;
pte_unmap(pte);
pte = NULL;
+ /*
+ * Readahead entry may come from a device that we are not
+ * holding a reference to, try to grab a reference, or skip.
+ */
+ if (swp_type(entry) != swp_type(targ_entry)) {
+ si = get_swap_device(entry);
+ if (!si)
+ continue;
+ }
folio = __read_swap_cache_async(entry, gfp_mask, mpol, ilx,
&page_allocated, false);
+ if (si)
+ put_swap_device(si);
if (!folio)
continue;
if (page_allocated) {
_
Patches currently in -mm which might be from kasong(a)tencent.com are
mm-swap-do-not-perform-synchronous-discard-during-allocation.patch
mm-swap-rename-helper-for-setup-bad-slots.patch
mm-swap-cleanup-swap-entry-allocation-parameter.patch
mm-migrate-swap-drop-usage-of-folio_index.patch
mm-swap-remove-redundant-argument-for-isolating-a-cluster.patch
The quilt patch titled
Subject: selftests/user_events: fix type cast for write_index packed member in perf_test
has been removed from the -mm tree. Its filename was
selftests-user_events-fix-type-cast-for-write_index-packed-member-in-perf_test.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Ankit Khushwaha <ankitkhushwaha.linux(a)gmail.com>
Subject: selftests/user_events: fix type cast for write_index packed member in perf_test
Date: Thu, 6 Nov 2025 15:25:32 +0530
Accessing 'reg.write_index' directly triggers a -Waddress-of-packed-member
warning due to potential unaligned pointer access:
perf_test.c:239:38: warning: taking address of packed member 'write_index'
of class or structure 'user_reg' may result in an unaligned pointer value
[-Waddress-of-packed-member]
239 | ASSERT_NE(-1, write(self->data_fd, ®.write_index,
| ^~~~~~~~~~~~~~~
Since write(2) works with any alignment. Casting '®.write_index'
explicitly to 'void *' to suppress this warning.
Link: https://lkml.kernel.org/r/20251106095532.15185-1-ankitkhushwaha.linux@gmail…
Fixes: 42187bdc3ca4 ("selftests/user_events: Add perf self-test for empty arguments events")
Signed-off-by: Ankit Khushwaha <ankitkhushwaha.linux(a)gmail.com>
Cc: Beau Belgrave <beaub(a)linux.microsoft.com>
Cc: "Masami Hiramatsu (Google)" <mhiramat(a)kernel.org>
Cc: Steven Rostedt <rostedt(a)goodmis.org>
Cc: sunliming <sunliming(a)kylinos.cn>
Cc: Wei Yang <richard.weiyang(a)gmail.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
tools/testing/selftests/user_events/perf_test.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/tools/testing/selftests/user_events/perf_test.c~selftests-user_events-fix-type-cast-for-write_index-packed-member-in-perf_test
+++ a/tools/testing/selftests/user_events/perf_test.c
@@ -236,7 +236,7 @@ TEST_F(user, perf_empty_events) {
ASSERT_EQ(1 << reg.enable_bit, self->check);
/* Ensure write shows up at correct offset */
- ASSERT_NE(-1, write(self->data_fd, ®.write_index,
+ ASSERT_NE(-1, write(self->data_fd, (void *)®.write_index,
sizeof(reg.write_index)));
val = (void *)(((char *)perf_page) + perf_page->data_offset);
ASSERT_EQ(PERF_RECORD_SAMPLE, *val);
_
Patches currently in -mm which might be from ankitkhushwaha.linux(a)gmail.com are
selftest-mm-fix-pointer-comparison-in-mremap_test.patch
The quilt patch titled
Subject: lib/test_kho: check if KHO is enabled
has been removed from the -mm tree. Its filename was
lib-test_kho-check-if-kho-is-enabled.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Pasha Tatashin <pasha.tatashin(a)soleen.com>
Subject: lib/test_kho: check if KHO is enabled
Date: Thu, 6 Nov 2025 17:06:35 -0500
We must check whether KHO is enabled prior to issuing KHO commands,
otherwise KHO internal data structures are not initialized.
Link: https://lkml.kernel.org/r/20251106220635.2608494-1-pasha.tatashin@soleen.com
Fixes: b753522bed0b ("kho: add test for kexec handover")
Signed-off-by: Pasha Tatashin <pasha.tatashin(a)soleen.com>
Reported-by: kernel test robot <oliver.sang(a)intel.com>
Closes: https://lore.kernel.org/oe-lkp/202511061629.e242724-lkp@intel.com
Reviewed-by: Pratyush Yadav <pratyush(a)kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt(a)kernel.org>
Cc: Alexander Graf <graf(a)amazon.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
lib/test_kho.c | 3 +++
1 file changed, 3 insertions(+)
--- a/lib/test_kho.c~lib-test_kho-check-if-kho-is-enabled
+++ a/lib/test_kho.c
@@ -301,6 +301,9 @@ static int __init kho_test_init(void)
phys_addr_t fdt_phys;
int err;
+ if (!kho_is_enabled())
+ return 0;
+
err = kho_retrieve_subtree(KHO_TEST_FDT, &fdt_phys);
if (!err)
return kho_test_restore(fdt_phys);
_
Patches currently in -mm which might be from pasha.tatashin(a)soleen.com are
kho-make-debugfs-interface-optional.patch
kho-add-interfaces-to-unpreserve-folios-page-ranges-and-vmalloc.patch
memblock-unpreserve-memory-in-case-of-error.patch
test_kho-unpreserve-memory-in-case-of-error.patch
kho-dont-unpreserve-memory-during-abort.patch
liveupdate-kho-move-to-kernel-liveupdate.patch
liveupdate-kho-move-to-kernel-liveupdate-fix.patch
maintainers-update-kho-maintainers.patch
kho-fix-misleading-log-message-in-kho_populate.patch
kho-convert-__kho_abort-to-return-void.patch
kho-introduce-high-level-memory-allocation-api.patch
kho-preserve-fdt-folio-only-once-during-initialization.patch
kho-verify-deserialization-status-and-fix-fdt-alignment-access.patch
kho-always-expose-output-fdt-in-debugfs.patch
kho-simplify-serialization-and-remove-__kho_abort.patch
kho-remove-global-preserved_mem_map-and-store-state-in-fdt.patch
kho-remove-abort-functionality-and-support-state-refresh.patch
kho-update-fdt-dynamically-for-subtree-addition-removal.patch
kho-allow-kexec-load-before-kho-finalization.patch
kho-allow-memory-preservation-state-updates-after-finalization.patch
kho-add-kconfig-option-to-enable-kho-by-default.patch
liveupdate-luo_core-luo_ioctl-live-update-orchestrator.patch
liveupdate-luo_core-integrate-with-kho.patch
liveupdate-luo_core-integrate-with-kho-fix.patch
reboot-call-liveupdate_reboot-before-kexec.patch
liveupdate-luo_session-add-sessions-support.patch
liveupdate-luo_session-add-sessions-support-fix.patch
liveupdate-luo_ioctl-add-user-interface.patch
liveupdate-luo_file-implement-file-systems-callbacks.patch
liveupdate-luo_session-add-ioctls-for-file-preservation-and-state-management.patch
liveupdate-luo_flb-introduce-file-lifecycle-bound-global-state.patch
docs-add-luo-documentation.patch
maintainers-add-liveupdate-entry.patch
mm-memfd_luo-allow-preserving-memfd-fix.patch
selftests-liveupdate-add-userspace-api-selftests.patch
selftests-liveupdate-add-kexec-based-selftest-for-session-lifecycle.patch
selftests-liveupdate-add-kexec-test-for-multiple-and-empty-sessions.patch
tests-liveupdate-add-in-kernel-liveupdate-test.patch
The quilt patch titled
Subject: mm/huge_memory: fix folio split check for anon folios in swapcache
has been removed from the -mm tree. Its filename was
mm-huge_memory-fix-folio-split-check-for-anon-folios-in-swapcache.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Zi Yan <ziy(a)nvidia.com>
Subject: mm/huge_memory: fix folio split check for anon folios in swapcache
Date: Wed, 5 Nov 2025 11:29:10 -0500
Both uniform and non uniform split check missed the check to prevent
splitting anon folios in swapcache to non-zero order.
Splitting anon folios in swapcache to non-zero order can cause data
corruption since swapcache only support PMD order and order-0 entries.
This can happen when one use split_huge_pages under debugfs to split
anon folios in swapcache.
In-tree callers do not perform such an illegal operation. Only debugfs
interface could trigger it. I will put adding a test case on my TODO
list.
Fix the check.
Link: https://lkml.kernel.org/r/20251105162910.752266-1-ziy@nvidia.com
Fixes: 58729c04cf10 ("mm/huge_memory: add buddy allocator like (non-uniform) folio_split()")
Signed-off-by: Zi Yan <ziy(a)nvidia.com>
Reported-by: "David Hildenbrand (Red Hat)" <david(a)kernel.org>
Closes: https://lore.kernel.org/all/dc0ecc2c-4089-484f-917f-920fdca4c898@kernel.org/
Acked-by: David Hildenbrand (Red Hat) <david(a)kernel.org>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Barry Song <baohua(a)kernel.org>
Cc: Dev Jain <dev.jain(a)arm.com>
Cc: Lance Yang <lance.yang(a)linux.dev>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Nico Pache <npache(a)redhat.com>
Cc: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: Wei Yang <richard.weiyang(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/huge_memory.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
--- a/mm/huge_memory.c~mm-huge_memory-fix-folio-split-check-for-anon-folios-in-swapcache
+++ a/mm/huge_memory.c
@@ -3522,7 +3522,8 @@ bool non_uniform_split_supported(struct
/* order-1 is not supported for anonymous THP. */
VM_WARN_ONCE(warns && new_order == 1,
"Cannot split to order-1 folio");
- return new_order != 1;
+ if (new_order == 1)
+ return false;
} else if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
!mapping_large_folio_support(folio->mapping)) {
/*
@@ -3553,7 +3554,8 @@ bool uniform_split_supported(struct foli
if (folio_test_anon(folio)) {
VM_WARN_ONCE(warns && new_order == 1,
"Cannot split to order-1 folio");
- return new_order != 1;
+ if (new_order == 1)
+ return false;
} else if (new_order) {
if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
!mapping_large_folio_support(folio->mapping)) {
_
Patches currently in -mm which might be from ziy(a)nvidia.com are
mm-huge_memory-add-split_huge_page_to_order.patch
mm-memory-failure-improve-large-block-size-folio-handling.patch
mm-huge_memory-fix-kernel-doc-comments-for-folio_split-and-related.patch
mm-huge_memory-fix-kernel-doc-comments-for-folio_split-and-related-fix.patch
mm-huge_memory-fix-kernel-doc-comments-for-folio_split-and-related-fix-2.patch
migrate-optimise-alloc_migration_target-fix.patch
The quilt patch titled
Subject: crash: fix crashkernel resource shrink
has been removed from the -mm tree. Its filename was
crash-fix-crashkernel-resource-shrink.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Sourabh Jain <sourabhjain(a)linux.ibm.com>
Subject: crash: fix crashkernel resource shrink
Date: Sun, 2 Nov 2025 01:07:41 +0530
When crashkernel is configured with a high reservation, shrinking its
value below the low crashkernel reservation causes two issues:
1. Invalid crashkernel resource objects
2. Kernel crash if crashkernel shrinking is done twice
For example, with crashkernel=200M,high, the kernel reserves 200MB of high
memory and some default low memory (say 256MB). The reservation appears
as:
cat /proc/iomem | grep -i crash
af000000-beffffff : Crash kernel
433000000-43f7fffff : Crash kernel
If crashkernel is then shrunk to 50MB (echo 52428800 >
/sys/kernel/kexec_crash_size), /proc/iomem still shows 256MB reserved:
af000000-beffffff : Crash kernel
Instead, it should show 50MB:
af000000-b21fffff : Crash kernel
Further shrinking crashkernel to 40MB causes a kernel crash with the
following trace (x86):
BUG: kernel NULL pointer dereference, address: 0000000000000038
PGD 0 P4D 0
Oops: 0000 [#1] PREEMPT SMP NOPTI
<snip...>
Call Trace: <TASK>
? __die_body.cold+0x19/0x27
? page_fault_oops+0x15a/0x2f0
? search_module_extables+0x19/0x60
? search_bpf_extables+0x5f/0x80
? exc_page_fault+0x7e/0x180
? asm_exc_page_fault+0x26/0x30
? __release_resource+0xd/0xb0
release_resource+0x26/0x40
__crash_shrink_memory+0xe5/0x110
crash_shrink_memory+0x12a/0x190
kexec_crash_size_store+0x41/0x80
kernfs_fop_write_iter+0x141/0x1f0
vfs_write+0x294/0x460
ksys_write+0x6d/0xf0
<snip...>
This happens because __crash_shrink_memory()/kernel/crash_core.c
incorrectly updates the crashk_res resource object even when
crashk_low_res should be updated.
Fix this by ensuring the correct crashkernel resource object is updated
when shrinking crashkernel memory.
Link: https://lkml.kernel.org/r/20251101193741.289252-1-sourabhjain@linux.ibm.com
Fixes: 16c6006af4d4 ("kexec: enable kexec_crash_size to support two crash kernel regions")
Signed-off-by: Sourabh Jain <sourabhjain(a)linux.ibm.com>
Acked-by: Baoquan He <bhe(a)redhat.com>
Cc: Zhen Lei <thunder.leizhen(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/crash_core.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/kernel/crash_core.c~crash-fix-crashkernel-resource-shrink
+++ a/kernel/crash_core.c
@@ -373,7 +373,7 @@ static int __crash_shrink_memory(struct
old_res->start = 0;
old_res->end = 0;
} else {
- crashk_res.end = ram_res->start - 1;
+ old_res->end = ram_res->start - 1;
}
crash_free_reserved_phys_range(ram_res->start, ram_res->end);
_
Patches currently in -mm which might be from sourabhjain(a)linux.ibm.com are
inode_hash() currently mixes a hash value with the super_block pointer
using an unbounded multiplication:
tmp = (hashval * (unsigned long)sb) ^
(GOLDEN_RATIO_PRIME + hashval) / L1_CACHE_BYTES;
On 64-bit kernels this multiplication can overflow and wrap in unsigned
long arithmetic. While this is not a memory-safety issue, it is an
unbounded integer operation and weakens the mixing properties of the
hash.
Replace the pointer*hash multiply with hash_long() over a mixed value
(hashval ^ (unsigned long)sb) and keep the existing shift/mask. This
removes the overflow source and reuses the standard hash helper already
used in other kernel code.
This is an integer wraparound / robustness issue (CWE-190/CWE-407),
not a memory-safety bug.
Reported-by: Qianchang Zhao <pioooooooooip(a)gmail.com>
Reported-by: Zhitong Liu <liuzhitong1993(a)gmail.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Qianchang Zhao <pioooooooooip(a)gmail.com>
---
fs/smb/server/vfs_cache.c | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c
index dfed6fce8..a62ea5aae 100644
--- a/fs/smb/server/vfs_cache.c
+++ b/fs/smb/server/vfs_cache.c
@@ -10,6 +10,7 @@
#include <linux/vmalloc.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
+#include <linux/hash.h>
#include "glob.h"
#include "vfs_cache.h"
@@ -65,12 +66,8 @@ static void fd_limit_close(void)
static unsigned long inode_hash(struct super_block *sb, unsigned long hashval)
{
- unsigned long tmp;
-
- tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
- L1_CACHE_BYTES;
- tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> inode_hash_shift);
- return tmp & inode_hash_mask;
+ unsigned long mixed = hashval ^ (unsigned long)sb;
+ return hash_long(mixed, inode_hash_shift) & inode_hash_mask;
}
static struct ksmbd_inode *__ksmbd_inode_lookup(struct dentry *de)
--
2.34.1
Sasha,
Also wanted to make sure stable gets this patch from David Howells
that fixes a regression that was mentioned in some of the same email
threads.
It fixes an important stable regression in cifs_readv when cache=none.
Bharath has also reviewed and tested it with 6.6 stable. See
attached.
On Fri, Nov 14, 2025 at 9:00 AM Sasha Levin <sashal(a)kernel.org> wrote:
>
> On Fri, Nov 14, 2025 at 04:42:57PM +0530, Shyam Prasad N wrote:
> >Hi Greg/Sasha,
> >
> >Over the last few months, a few users have reported a data corruption
> >with Linux SMB kernel client filesystem. This is one such report:
> >https://lore.kernel.org/linux-cifs/36fb31bf2c854cdc930a3415f5551dcd@izw-ber…
> >
> >The issue is now well understood. Attached is a fix for this issue.
> >I've made sure that the fix is stopping the data corruption and also
> >not regressing other write patterns.
> >
> >The regression seems to have been introduced during a refactoring of
> >this code path during the v6.3 and continued to exist till v6.9,
> >before the code was refactored again with netfs helper library
> >integration in v6.10.
> >
> >I request you to include this change in all stable trees for
> >v6.3..v6.9. I've done my testing on stable-6.6. Please let me know if
> >you want this tested on any other kernels.
>
> I'll queue it up for 6.6, thanks!
>
> --
> Thanks,
> Sasha
--
Thanks,
Steve
Hi,
We’re offering exclusive access to the Fruit Attraction 2025 Visitor Contact List — your direct connection to verified buyers, importers, exporters, and industry leaders from the global fresh produce, logistics, and agri-innovation market.
We have 97,137 verified visitor contacts in our database.
You’ll receive full contact data: Individual Email Address, Phone Number, Contact Name, Job Title, Company Name, Website, Physical Address, LinkedIn Profile, and more.
All data is fully 100% GDPR-compliant and ethically sourced
20% Off—Respond Before Nov 20.
Let me know if you are Interested to see the pricing? Reply “Send me Pricing.”
Best regards,
Karlee Howell
Sr. Marketing Manager
Prefer not to receive these emails? Just reply “NOT INTERESTED”.
From: NeilBrown <neil(a)brown.name>
A recent change to clamp_t() in 6.1.y caused fs/nfsd/nfs4state.c to fail
to compile with gcc-9. The code in nfsd4_get_drc_mem() was written with
the assumption that when "max < min",
clamp(val, min, max)
would return max. This assumption is not documented as an API promise
and the change caused a compile failure if it could be statically
determined that "max < min".
The relevant code was no longer present upstream when commit 1519fbc8832b
("minmax.h: use BUILD_BUG_ON_MSG() for the lo < hi test in clamp()")
landed there, so there is no upstream change to nfsd4_get_drc_mem() to
backport.
There is no clear case that the existing code in nfsd4_get_drc_mem()
is functioning incorrectly. The goal of this patch is to permit the clean
application of commit 1519fbc8832b ("minmax.h: use BUILD_BUG_ON_MSG() for
the lo < hi test in clamp()"), and any commits that depend on it, to LTS
kernels without affecting the ability to compile those kernels. This is
done by open-coding the __clamp() macro sans the built-in type checking.
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220745#c0
Signed-off-by: NeilBrown <neil(a)brown.name>
Stable-dep-of: 1519fbc8832b ("minmax.h: use BUILD_BUG_ON_MSG() for the lo < hi test in clamp()")
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
---
fs/nfsd/nfs4state.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
Changes since Neil's post:
* Editorial changes to the commit message
* Attempt to address David's review comments
* Applied to linux-6.12.y, passed NFSD upstream CI suite
This patch is intended to be applied to linux-6.12.y, and should
apply cleanly to other LTS kernels since nfsd4_get_drc_mem hasn't
changed since v5.4.
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 7b0fabf8c657..41545933dd18 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1983,8 +1983,10 @@ static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn
*/
scale_factor = max_t(unsigned int, 8, nn->nfsd_serv->sv_nrthreads);
- avail = clamp_t(unsigned long, avail, slotsize,
- total_avail/scale_factor);
+ if (avail > total_avail / scale_factor)
+ avail = total_avail / scale_factor;
+ else if (avail < slotsize)
+ avail = slotsize;
num = min_t(int, num, avail / slotsize);
num = max_t(int, num, 1);
nfsd_drc_mem_used += num * slotsize;
--
2.51.0
Hi linux-stable-mirror(a)lists.linaro.org We make sublimated sports bags, backpacks, and sportswear. To get delivery prices, please: 1. Visit hillapex.com and pick an item. 2. Let us know your order quantity. 3. Provide your country name. Best regards, Saad Afzal Founder | Hill Apex 📧 Email: saad(a)hillapex.com 🌐 Website: hillapex.com 📞 Phone/WhatsApp: +92 300 7101027 📍 Address: Chenab Ranger’s Road, 51310 Sialkot, Pakistan
The following commit has been merged into the x86/boot branch of tip:
Commit-ID: 1c7ac68c05bc0327d725dd10aa05f5120f868250
Gitweb: https://git.kernel.org/tip/1c7ac68c05bc0327d725dd10aa05f5120f868250
Author: Yazen Ghannam <yazen.ghannam(a)amd.com>
AuthorDate: Tue, 11 Nov 2025 14:53:57
Committer: Borislav Petkov (AMD) <bp(a)alien8.de>
CommitterDate: Fri, 14 Nov 2025 21:00:26 +01:00
x86/acpi/boot: Correct acpi_is_processor_usable() check again
ACPI v6.3 defined a new "Online Capable" MADT LAPIC flag. This bit is
used in conjunction with the "Enabled" MADT LAPIC flag to determine if
a CPU can be enabled/hotplugged by the OS after boot.
Before the new bit was defined, the "Enabled" bit was explicitly
described like this (ACPI v6.0 wording provided):
"If zero, this processor is unusable, and the operating system
support will not attempt to use it"
This means that CPU hotplug (based on MADT) is not possible. Many BIOS
implementations follow this guidance. They may include LAPIC entries in
MADT for unavailable CPUs, but since these entries are marked with
"Enabled=0" it is expected that the OS will completely ignore these
entries.
However, QEMU will do the same (include entries with "Enabled=0") for
the purpose of allowing CPU hotplug within the guest.
Comment from QEMU function pc_madt_cpu_entry():
/* ACPI spec says that LAPIC entry for non present
* CPU may be omitted from MADT or it must be marked
* as disabled. However omitting non present CPU from
* MADT breaks hotplug on linux. So possible CPUs
* should be put in MADT but kept disabled.
*/
Recent Linux topology changes broke the QEMU use case. A following fix
for the QEMU use case broke bare metal topology enumeration.
Rework the Linux MADT LAPIC flags check to allow the QEMU use case only
for guests and to maintain the ACPI spec behavior for bare metal.
Remove an unnecessary check added to fix a bare metal case introduced by
the QEMU "fix".
[ bp: Change logic as Michal suggested. ]
Fixes: fed8d8773b8e ("x86/acpi/boot: Correct acpi_is_processor_usable() check")
Fixes: f0551af02130 ("x86/topology: Ignore non-present APIC IDs in a present package")
Closes: https://lore.kernel.org/r/20251024204658.3da9bf3f.michal.pecio@gmail.com
Reported-by: Michal Pecio <michal.pecio(a)gmail.com>
Signed-off-by: Yazen Ghannam <yazen.ghannam(a)amd.com>
Tested-by: Michal Pecio <michal.pecio(a)gmail.com>
Tested-by: Ricardo Neri <ricardo.neri-calderon(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/20251111145357.4031846-1-yazen.ghannam@amd.com
---
arch/x86/kernel/acpi/boot.c | 12 ++++++++----
arch/x86/kernel/cpu/topology.c | 15 ---------------
2 files changed, 8 insertions(+), 19 deletions(-)
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 9fa321a..d6138b2 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -35,6 +35,7 @@
#include <asm/smp.h>
#include <asm/i8259.h>
#include <asm/setup.h>
+#include <asm/hypervisor.h>
#include "sleep.h" /* To include x86_acpi_suspend_lowlevel */
static int __initdata acpi_force = 0;
@@ -164,11 +165,14 @@ static bool __init acpi_is_processor_usable(u32 lapic_flags)
if (lapic_flags & ACPI_MADT_ENABLED)
return true;
- if (!acpi_support_online_capable ||
- (lapic_flags & ACPI_MADT_ONLINE_CAPABLE))
- return true;
+ if (acpi_support_online_capable)
+ return lapic_flags & ACPI_MADT_ONLINE_CAPABLE;
- return false;
+ /*
+ * QEMU expects legacy "Enabled=0" LAPIC entries to be counted as usable
+ * in order to support CPU hotplug in guests.
+ */
+ return !hypervisor_is_type(X86_HYPER_NATIVE);
}
static int __init
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 6073a16..425404e 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -27,7 +27,6 @@
#include <xen/xen.h>
#include <asm/apic.h>
-#include <asm/hypervisor.h>
#include <asm/io_apic.h>
#include <asm/mpspec.h>
#include <asm/msr.h>
@@ -240,20 +239,6 @@ static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present)
cpuid_to_apicid[cpu] = apic_id;
topo_set_cpuids(cpu, apic_id, acpi_id);
} else {
- u32 pkgid = topo_apicid(apic_id, TOPO_PKG_DOMAIN);
-
- /*
- * Check for present APICs in the same package when running
- * on bare metal. Allow the bogosity in a guest.
- */
- if (hypervisor_is_type(X86_HYPER_NATIVE) &&
- topo_unit_count(pkgid, TOPO_PKG_DOMAIN, phys_cpu_present_map)) {
- pr_info_once("Ignoring hot-pluggable APIC ID %x in present package.\n",
- apic_id);
- topo_info.nr_rejected_cpus++;
- return;
- }
-
topo_info.nr_disabled_cpus++;
}
On a Xen dom0 boot, this feature does not behave, and we end up
calculating:
num_roots = 1
num_nodes = 2
roots_per_node = 0
This causes a divide-by-zero in the modulus inside the loop.
This change adds a couple of guards for invalid states where we might
get a divide-by-zero.
Signed-off-by: Steven Noonan <steven(a)uplinklabs.net>
Signed-off-by: Ariadne Conill <ariadne(a)ariadne.space>
CC: Yazen Ghannam <yazen.ghannam(a)amd.com>
CC: x86(a)vger.kernel.org
CC: stable(a)vger.kernel.org
---
arch/x86/kernel/amd_node.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/arch/x86/kernel/amd_node.c b/arch/x86/kernel/amd_node.c
index 3d0a4768d603c..cdc6ba224d4ad 100644
--- a/arch/x86/kernel/amd_node.c
+++ b/arch/x86/kernel/amd_node.c
@@ -282,6 +282,17 @@ static int __init amd_smn_init(void)
return -ENODEV;
num_nodes = amd_num_nodes();
+
+ if (!num_nodes)
+ return -ENODEV;
+
+ /* Possibly a virtualized environment (e.g. Xen) where we wi
ll get
+ * roots_per_node=0 if the number of roots is fewer than number of
+ * nodes
+ */
+ if (num_roots < num_nodes)
+ return -ENODEV;
+
amd_roots = kcalloc(num_nodes, sizeof(*amd_roots), GFP_KERNEL);
if (!amd_roots)
return -ENOMEM;
--
2.51.2
Hi.
I've check c-repro [1] on 6.1.y branch and found that repro still produce
the crash on 6.1.y. I notice that syzbot bisection result [2]
is incorrect: indeed, the hung was fixed by upstream commit b0ad381fa769
("btrfs: fix deadlock with fiemap and extent locking"). Also,
I saw CVE-2024-35784 [3][4] vulnerability, that have direct relation with that syzbot
report. Therefore, syzbot reproducer provided additional way to check for CVE-2024-35784.
I attempted to fix CVE-2024-35784 in stable 6.1.y (over v6.1.157), and
found that the initial fix commit b0ad381fa769 ("btrfs: fix deadlock with
fiemap and extent locking") introduced regressions [5][6].
IMHO here is the minimum patch series to eliminate CVE-2024-35784 from 6.1.y:
b0ad381fa769 ("btrfs: fix deadlock with fiemap and extent locking") (Initial fix of the CVE-2024-35784)
a1a4a9ca77f1 ("btrfs: fix race between ordered extent completion and fiemap") (Fixes: b0ad381fa769)
978b63f7464a ("btrfs: fix race when detecting delalloc ranges during fiemap") (Fixes: b0ad381fa769)
1cab1375ba6d ("btrfs: reuse cloned extent buffer during fiemap to avoid re-allocations") (Optimization: 978b63f7464a)
53e24158684b ("btrfs: set start on clone before calling copy_extent_buffer_full") (Fixes: 1cab1375ba6d)
Required patches attached.
Only two patches in the series have minor backport modifications due to v6.1.157 btrfs code differences.
The remaining patches are identical to the upstream.
Regards,
AK
Reported-by: syzbot+f8217aae382555004877(a)syzkaller.appspotmail.com
----
[1] https://syzkaller.appspot.com/text?tag=ReproC&x=12b4c88b280000
[2] https://syzkaller.appspot.com/bug?extid=f8217aae382555004877
[3] https://lore.kernel.org/all/2024051704-CVE-2024-35784-6dec@gregkh/
[4] https://cve.org/CVERecord/?id=CVE-2024-35784
[5] https://lore.kernel.org/linux-btrfs/cover.1709202499.git.fdmanana@suse.com/
[6] https://lore.kernel.org/all/20240304211551.880347593@linuxfoundation.org/
Here is a series adding support for 6 Winbond SPI NOR chips. Describing
these chips is needed otherwise the block protection feature is not
available. Everything else looks fine otherwise.
In practice I am only adding 6 very similar IDs but I split the commits
because the amount of meta data to show proof that all the chips have
been tested and work is pretty big.
As the commits simply add an ID, I am Cc'ing stable with the hope to
get these backported to LTS kernels as allowed by the stable rules (see
link below, but I hope I am doing this right).
Link: https://elixir.bootlin.com/linux/v6.17.7/source/Documentation/process/stabl…
Thanks,
Miquèl
---
Miquel Raynal (6):
mtd: spi-nor: winbond: Add support for W25Q01NWxxIQ chips
mtd: spi-nor: winbond: Add support for W25Q01NWxxIM chips
mtd: spi-nor: winbond: Add support for W25Q02NWxxIM chips
mtd: spi-nor: winbond: Add support for W25H512NWxxAM chips
mtd: spi-nor: winbond: Add support for W25H01NWxxAM chips
mtd: spi-nor: winbond: Add support for W25H02NWxxAM chips
drivers/mtd/spi-nor/winbond.c | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
---
base-commit: 479ba7fc704936b74a91ee352fe113d6391d562f
change-id: 20251105-winbond-v6-18-rc1-spi-nor-7f78cb2785d6
Best regards,
--
Miquel Raynal <miquel.raynal(a)bootlin.com>
I marked these as fixes, but the issue is not likely to trigger in
normal conditions.
Not tested on hardware, please kindly provide tested-by, the best with
some probe bind/unbind cycle.
Best regards,
Krzysztof
---
Krzysztof Kozlowski (2):
ASoC: codecs: pm4125: Fix potential conflict when probing two devices
ASoC: codecs: pm4125: Remove irq_chip on component unbind
sound/soc/codecs/pm4125.c | 19 ++++++++++++++-----
1 file changed, 14 insertions(+), 5 deletions(-)
---
base-commit: d22122bd89bc5ce7b3e057d99679ca50a72a8245
change-id: 20251023-asoc-regmap-irq-chip-bb2053c32168
Best regards,
--
Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Make sure to drop the reference taken to the ahb platform device when
looking up its driver data while enabling the smmu.
Note that holding a reference to a device does not prevent its driver
data from going away.
Fixes: 89c788bab1f0 ("ARM: tegra: Add SMMU enabler in AHB")
Cc: stable(a)vger.kernel.org # 3.5
Signed-off-by: Johan Hovold <johan(a)kernel.org>
---
drivers/amba/tegra-ahb.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/amba/tegra-ahb.c b/drivers/amba/tegra-ahb.c
index c0e8b765522d..f23c3ed01810 100644
--- a/drivers/amba/tegra-ahb.c
+++ b/drivers/amba/tegra-ahb.c
@@ -144,6 +144,7 @@ int tegra_ahb_enable_smmu(struct device_node *dn)
if (!dev)
return -EPROBE_DEFER;
ahb = dev_get_drvdata(dev);
+ put_device(dev);
val = gizmo_readl(ahb, AHB_ARBITRATION_XBAR_CTRL);
val |= AHB_ARBITRATION_XBAR_CTRL_SMMU_INIT_DONE;
gizmo_writel(ahb, val, AHB_ARBITRATION_XBAR_CTRL);
--
2.49.1
On Tegra platforms using ACPI, the SMCCC driver already registers the
SoC device. This makes the registration performed by the Tegra fuse
driver redundant.
When booted via ACPI, skip registering the SoC device and suppress
printing SKU information from the Tegra fuse driver, as this information
is already provided by the SMCCC driver.
Fixes: 972167c69080 ("soc/tegra: fuse: Add ACPI support for Tegra194 and Tegra234")
Cc: stable(a)vger.kernel.org
Signed-off-by: Kartik Rajput <kkartik(a)nvidia.com>
---
drivers/soc/tegra/fuse/fuse-tegra.c | 2 --
1 file changed, 2 deletions(-)
diff --git a/drivers/soc/tegra/fuse/fuse-tegra.c b/drivers/soc/tegra/fuse/fuse-tegra.c
index d27667283846..74d2fedea71c 100644
--- a/drivers/soc/tegra/fuse/fuse-tegra.c
+++ b/drivers/soc/tegra/fuse/fuse-tegra.c
@@ -182,8 +182,6 @@ static int tegra_fuse_probe(struct platform_device *pdev)
}
fuse->soc->init(fuse);
- tegra_fuse_print_sku_info(&tegra_sku_info);
- tegra_soc_device_register();
err = tegra_fuse_add_lookups(fuse);
if (err)
--
2.43.0
Hello Greg, hello Sasha,
Could you please queue up
90918e3b6404 ("netfilter: nft_ct: add seqadj extension for natted connections")
for 6.17?
As-is some more esoteric configurations may not work and provide warning
splat:
Missing nfct_seqadj_ext_add() setup call
WARNING: .. at net/netfilter/nf_conntrack_seqadj.c:41 ... [nf_conntrack]
etc.
I don't think this fix has risks and I'm not aware of any dependencies.
Thanks for maintaining the stable trees!
put_device() is called on error path of rpmsg_eptdev_add() to cleanup
resource attached to eptdev->dev, unfortunately it's bogus cause
dev->release() is not set yet.
When a struct device instance is destroyed, driver core framework checks
the possible release() callback from candidates below:
- struct device::release()
- dev->type->release()
- dev->class->dev_release()
Rpmsg eptdev owns none of them so WARN() will complaint the absence of
release():
[ 159.112182] ------------[ cut here ]------------
[ 159.112188] Device '(null)' does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst.
[ 159.112205] WARNING: CPU: 2 PID: 1975 at drivers/base/core.c:2567 device_release+0x7a/0x90
Fixes: c0cdc19f84a4 ("rpmsg: Driver for user space endpoint interface")
Cc: stable(a)vger.kernel.org
Signed-off-by: Dawei Li <dawei.li(a)linux.dev>
---
drivers/rpmsg/rpmsg_char.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/drivers/rpmsg/rpmsg_char.c b/drivers/rpmsg/rpmsg_char.c
index 34b35ea74aab..1b8297b373f0 100644
--- a/drivers/rpmsg/rpmsg_char.c
+++ b/drivers/rpmsg/rpmsg_char.c
@@ -494,7 +494,6 @@ static int rpmsg_eptdev_add(struct rpmsg_eptdev *eptdev,
if (cdev)
ida_free(&rpmsg_minor_ida, MINOR(dev->devt));
free_eptdev:
- put_device(dev);
kfree(eptdev);
return ret;
--
2.25.1
The function load_timings_from_dt() directly assigns the result of
krealloc() to tegra->timings, which causes a memory leak when
krealloc() fails. When krealloc() returns NULL, the original pointer
is lost, making it impossible to free the previously allocated memory.
This fix uses a temporary variable to store the krealloc() result and
only updates tegra->timings after successful allocation, preserving
the original pointer in case of failure.
Fixes: 888ca40e2843 ("clk: tegra: emc: Support multiple RAM codes")
Cc: stable(a)vger.kernel.org
Signed-off-by: Wentao Liang <vulab(a)iscas.ac.cn>
---
drivers/clk/tegra/clk-tegra124-emc.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/drivers/clk/tegra/clk-tegra124-emc.c b/drivers/clk/tegra/clk-tegra124-emc.c
index 2a6db0434281..ed4972fa6dab 100644
--- a/drivers/clk/tegra/clk-tegra124-emc.c
+++ b/drivers/clk/tegra/clk-tegra124-emc.c
@@ -444,6 +444,7 @@ static int load_timings_from_dt(struct tegra_clk_emc *tegra,
u32 ram_code)
{
struct emc_timing *timings_ptr;
+ struct emc_timing *new_timings;
struct device_node *child;
int child_count = of_get_child_count(node);
int i = 0, err;
@@ -451,10 +452,15 @@ static int load_timings_from_dt(struct tegra_clk_emc *tegra,
size = (tegra->num_timings + child_count) * sizeof(struct emc_timing);
- tegra->timings = krealloc(tegra->timings, size, GFP_KERNEL);
- if (!tegra->timings)
+ new_timings = krealloc(tegra->timings, size, GFP_KERNEL);
+ if (!new_timings) {
+ kfree(tegra->timings);
+ tegra->timings = NULL;
+ tegra->num_timings = 0;
return -ENOMEM;
+ }
+ tegra->timings = new_timings;
timings_ptr = tegra->timings + tegra->num_timings;
tegra->num_timings += child_count;
--
2.34.1
Protect access to fore200e->available_cell_rate with rate_mtx lock to
prevent potential data race.
In this case, since the update depends on a prior read, a data race
could lead to a wrong fore200e.available_cell_rate value.
The field fore200e.available_cell_rate is generally protected by the lock
fore200e.rate_mtx when accessed. In all other read and write cases, this
field is consistently protected by the lock, except for this case and
during initialization.
This potential bug was detected by our experimental static analysis tool,
which analyzes locking APIs and paired functions to identify data races
and atomicity violations.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable(a)vger.kernel.org
Signed-off-by: Gui-Dong Han <2045gemini(a)gmail.com>
---
v2:
* Added a description of the data race hazard in fore200e_open(), as
suggested by Jakub Kicinski and Simon Horman.
---
drivers/atm/fore200e.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index 4fea1149e003..f62e38571440 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -1374,7 +1374,9 @@ fore200e_open(struct atm_vcc *vcc)
vcc->dev_data = NULL;
+ mutex_lock(&fore200e->rate_mtx);
fore200e->available_cell_rate += vcc->qos.txtp.max_pcr;
+ mutex_unlock(&fore200e->rate_mtx);
kfree(fore200e_vcc);
return -EINVAL;
--
2.25.1
Fixed WCN6855 firmware to use the correct FW file and added a fallback mechanism.
Changes v2:
- Add Fixes tag.
- Add comments in the commit and code to explain the reason for the changes.
- Link to v1
https://lore.kernel.org/all/20251112074638.1592864-1-quic_shuaz@quicinc.com/
Shuai Zhang (1):
Bluetooth: btqca: Add WCN6855 firmware priority selection feature
drivers/bluetooth/btqca.c | 22 ++++++++++++++++++++--
1 file changed, 20 insertions(+), 2 deletions(-)
--
2.34.1
In order to set the AMCR register, which configures the
memory-region split between ospi1 and ospi2, we need to
identify the ospi instance.
By using memory-region-names, it allows to identify the
ospi instance this memory-region belongs to.
Fixes: cad2492de91c ("arm64: dts: st: Add SPI NOR flash support on stm32mp257f-ev1 board")
Cc: stable(a)vger.kernel.org
Signed-off-by: Patrice Chotard <patrice.chotard(a)foss.st.com>
---
Changes in v4:
- Rebase on v6.18-rc1
- Link to v3: https://lore.kernel.org/r/20250811-upstream_fix_dts_omm-v3-1-c4186b7667cb@f…
Changes in v3:
- Set again "Cc: <stable(a)vger.kernel.org>"
- Link to v2: https://lore.kernel.org/r/20250811-upstream_fix_dts_omm-v2-1-00ff55076bd5@f…
Changes in v2:
- Update commit message.
- Use correct memory-region-names value.
- Remove "Cc: <stable(a)vger.kernel.org>" tag as the fixed patch is not part of a LTS.
- Link to v1: https://lore.kernel.org/r/20250806-upstream_fix_dts_omm-v1-1-e68c15ed422d@f…
---
arch/arm64/boot/dts/st/stm32mp257f-ev1.dts | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/arm64/boot/dts/st/stm32mp257f-ev1.dts b/arch/arm64/boot/dts/st/stm32mp257f-ev1.dts
index 6e165073f732..bb6d6393d2e4 100644
--- a/arch/arm64/boot/dts/st/stm32mp257f-ev1.dts
+++ b/arch/arm64/boot/dts/st/stm32mp257f-ev1.dts
@@ -266,6 +266,7 @@ &i2c8 {
&ommanager {
memory-region = <&mm_ospi1>;
+ memory-region-names = "ospi1";
pinctrl-0 = <&ospi_port1_clk_pins_a
&ospi_port1_io03_pins_a
&ospi_port1_cs0_pins_a>;
---
base-commit: 3a8660878839faadb4f1a6dd72c3179c1df56787
change-id: 20250806-upstream_fix_dts_omm-c006b69042f1
Best regards,
--
Patrice Chotard <patrice.chotard(a)foss.st.com>
From: Michal Pecio <michal.pecio(a)gmail.com>
Return buffers if streaming fails to start due to uvc_pm_get() error.
This bug may be responsible for a warning I got running
while :; do yavta -c3 /dev/video0; done
on an xHCI controller which failed under this workload.
I had no luck reproducing this warning again to confirm.
xhci_hcd 0000:09:00.0: HC died; cleaning up
usb 13-2: USB disconnect, device number 2
WARNING: CPU: 2 PID: 29386 at drivers/media/common/videobuf2/videobuf2-core.c:1803 vb2_start_streaming+0xac/0x120
Fixes: 7dd56c47784a ("media: uvcvideo: Remove stream->is_streaming field")
Cc: stable(a)vger.kernel.org
Signed-off-by: Michal Pecio <michal.pecio(a)gmail.com>
Reviewed-by: Ricardo Ribalda <ribalda(a)chromium.org>
Reviewed-by: Laurent Pinchart <laurent.pinchart(a)ideasonboard.com>
Link: https://patch.msgid.link/20251015133642.3dede646.michal.pecio@gmail.com
Signed-off-by: Laurent Pinchart <laurent.pinchart(a)ideasonboard.com>
---
Changes since v1:
- Reorganize error path
---
drivers/media/usb/uvc/uvc_queue.c | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/drivers/media/usb/uvc/uvc_queue.c b/drivers/media/usb/uvc/uvc_queue.c
index 790184c9843d..e838c6c1893a 100644
--- a/drivers/media/usb/uvc/uvc_queue.c
+++ b/drivers/media/usb/uvc/uvc_queue.c
@@ -177,18 +177,20 @@ static int uvc_start_streaming_video(struct vb2_queue *vq, unsigned int count)
ret = uvc_pm_get(stream->dev);
if (ret)
- return ret;
+ goto err_buffers;
queue->buf_used = 0;
ret = uvc_video_start_streaming(stream);
- if (ret == 0)
- return 0;
+ if (ret)
+ goto err_pm;
+ return 0;
+
+err_pm:
uvc_pm_put(stream->dev);
-
+err_buffers:
uvc_queue_return_buffers(queue, UVC_BUF_STATE_QUEUED);
-
return ret;
}
base-commit: d363bdfa0ec6b19a4f40b572cec70430d5b13ad6
--
Regards,
Laurent Pinchart
From: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
[ Upstream commit 68410c5bd381a81bcc92b808e7dc4e6b9ed25d11 ]
If a shared IRQ is used by the driver due to platform limitation, then the
IRQ affinity hint is set right after the allocation of IRQ vectors in
ath11k_pci_alloc_msi(). This does no harm unless one of the functions
requesting the IRQ fails and attempt to free the IRQ. This results in the
below warning:
WARNING: CPU: 7 PID: 349 at kernel/irq/manage.c:1929 free_irq+0x278/0x29c
Call trace:
free_irq+0x278/0x29c
ath11k_pcic_free_irq+0x70/0x10c [ath11k]
ath11k_pci_probe+0x800/0x820 [ath11k_pci]
local_pci_probe+0x40/0xbc
The warning is due to not clearing the affinity hint before freeing the
IRQs.
So to fix this issue, clear the IRQ affinity hint before calling
ath11k_pcic_free_irq() in the error path. The affinity will be cleared once
again further down the error path due to code organization, but that does
no harm.
Tested-on: QCA6390 hw2.0 PCI WLAN.HST.1.0.1-05266-QCAHSTSWPLZ_V2_TO_X86-1
Cc: Baochen Qiang <quic_bqiang(a)quicinc.com>
Fixes: 39564b475ac5 ("wifi: ath11k: fix boot failure with one MSI vector")
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
Reviewed-by: Baochen Qiang <quic_bqiang(a)quicinc.com>
Link: https://patch.msgid.link/20250225053447.16824-2-manivannan.sadhasivam@linar…
Signed-off-by: Jeff Johnson <jeff.johnson(a)oss.qualcomm.com>
Signed-off-by: Wenshan Lan <jetlan9(a)163.com>
---
drivers/net/wireless/ath/ath11k/pci.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c
index 6ebfa5d02e2e..c1d576ff77fa 100644
--- a/drivers/net/wireless/ath/ath11k/pci.c
+++ b/drivers/net/wireless/ath/ath11k/pci.c
@@ -936,6 +936,8 @@ static int ath11k_pci_probe(struct pci_dev *pdev,
return 0;
err_free_irq:
+ /* __free_irq() expects the caller to have cleared the affinity hint */
+ ath11k_pci_set_irq_affinity_hint(ab_pci, NULL);
ath11k_pcic_free_irq(ab);
err_ce_free:
--
2.43.0
The current LoongArch BPF trampoline implementation is incompatible
with tracing functions in kernel modules. This causes several severe
and user-visible problems:
* Kernel lockups when a BPF program is attached to a module function [0].
* The `bpf_selftests/module_attach` test fails consistently.
* Critical kernel modules like WireGuard experience traffic disruption
when their functions are traced with fentry [1].
Given the severity and the potential for other unknown side-effects,
it is safest to disable the feature entirely for now. This patch
prevents the BPF subsystem from allowing trampoline attachments to
module functions on LoongArch.
This is a temporary mitigation until the core issues in the trampoline
code for module handling can be identified and fixed.
[root@fedora bpf]# ./test_progs -a module_attach -v
bpf_testmod.ko is already unloaded.
Loading bpf_testmod.ko...
Successfully loaded bpf_testmod.ko.
test_module_attach:PASS:skel_open 0 nsec
test_module_attach:PASS:set_attach_target 0 nsec
test_module_attach:PASS:set_attach_target_explicit 0 nsec
test_module_attach:PASS:skel_load 0 nsec
libbpf: prog 'handle_fentry': failed to attach: -ENOTSUPP
libbpf: prog 'handle_fentry': failed to auto-attach: -ENOTSUPP
test_module_attach:FAIL:skel_attach skeleton attach failed: -524
Summary: 0/0 PASSED, 0 SKIPPED, 1 FAILED
Successfully unloaded bpf_testmod.ko.
[0]: https://lore.kernel.org/loongarch/CAK3+h2wDmpC-hP4u4pJY8T-yfKyk4yRzpu2LMO+C…
[1]: https://lore.kernel.org/loongarch/CAK3+h2wYcpc+OwdLDUBvg2rF9rvvyc5amfHT-KcF…
Cc: stable(a)vger.kernel.org
Fixes: f9b6b41f0cf3 (“LoongArch: BPF: Add basic bpf trampoline support”)
Acked-by: Hengqi Chen <hengqi.chen(a)gmail.com>
Signed-off-by: Vincent Li <vincent.mc.li(a)gmail.com>
---
arch/loongarch/net/bpf_jit.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index cbe53d0b7fb0..49c1d4b95404 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -1624,6 +1624,9 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i
/* Direct jump skips 5 NOP instructions */
else if (is_bpf_text_address((unsigned long)orig_call))
orig_call += LOONGARCH_BPF_FENTRY_NBYTES;
+ /* Module tracing not supported - causes kernel lockups */
+ else if (is_module_text_address((unsigned long)orig_call))
+ return -ENOTSUPP;
if (flags & BPF_TRAMP_F_CALL_ORIG) {
move_addr(ctx, LOONGARCH_GPR_A0, (const u64)im);
--
2.38.1
Propose backporting commit:1cf11d80db5df ("ALSA: hda: Fix missing pointer check in hda_component_manager_init
function") to the 6.12.y stable branch.
The current 6.12 branch does not include commit:6014e9021b28 ("ALSA: hda: Move codec drivers into sound/hda/codecs directory"), so the patch has been updated to reflect the original source code path.
Denis Arefev (1):
ALSA: hda: Fix missing pointer check in hda_component_manager_init
function
sound/pci/hda/hda_component.c | 4 ++++
1 file changed, 4 insertions(+)
--
2.17.1
The patch titled
Subject: selftests/mm: fix division-by-zero in uffd-unit-tests
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
selftests-mm-fix-division-by-zero-in-uffd-unit-tests.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Carlos Llamas <cmllamas(a)google.com>
Subject: selftests/mm: fix division-by-zero in uffd-unit-tests
Date: Thu, 13 Nov 2025 03:46:22 +0000
Commit 4dfd4bba8578 ("selftests/mm/uffd: refactor non-composite global
vars into struct") moved some of the operations previously implemented in
uffd_setup_environment() earlier in the main test loop.
The calculation of nr_pages, which involves a division by page_size, now
occurs before checking that default_huge_page_size() returns a non-zero
This leads to a division-by-zero error on systems with !CONFIG_HUGETLB.
Fix this by relocating the non-zero page_size check before the nr_pages
calculation, as it was originally implemented.
Link: https://lkml.kernel.org/r/20251113034623.3127012-1-cmllamas@google.com
Fixes: 4dfd4bba8578 ("selftests/mm/uffd: refactor non-composite global vars into struct")
Signed-off-by: Carlos Llamas <cmllamas(a)google.com>
Acked-by: David Hildenbrand (Red Hat) <david(a)kernel.org>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Reviewed-by: Mike Rapoport (Microsoft) <rppt(a)kernel.org>
Cc: Brendan Jackman <jackmanb(a)google.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Peter Xu <peterx(a)redhat.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
tools/testing/selftests/mm/uffd-unit-tests.c | 15 +++++++--------
1 file changed, 7 insertions(+), 8 deletions(-)
--- a/tools/testing/selftests/mm/uffd-unit-tests.c~selftests-mm-fix-division-by-zero-in-uffd-unit-tests
+++ a/tools/testing/selftests/mm/uffd-unit-tests.c
@@ -1758,10 +1758,15 @@ int main(int argc, char *argv[])
uffd_test_ops = mem_type->mem_ops;
uffd_test_case_ops = test->test_case_ops;
- if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB))
+ if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB)) {
gopts.page_size = default_huge_page_size();
- else
+ if (gopts.page_size == 0) {
+ uffd_test_skip("huge page size is 0, feature missing?");
+ continue;
+ }
+ } else {
gopts.page_size = psize();
+ }
/* Ensure we have at least 2 pages */
gopts.nr_pages = MAX(UFFD_TEST_MEM_SIZE, gopts.page_size * 2)
@@ -1776,12 +1781,6 @@ int main(int argc, char *argv[])
continue;
uffd_test_start("%s on %s", test->name, mem_type->name);
- if ((mem_type->mem_flag == MEM_HUGETLB ||
- mem_type->mem_flag == MEM_HUGETLB_PRIVATE) &&
- (default_huge_page_size() == 0)) {
- uffd_test_skip("huge page size is 0, feature missing?");
- continue;
- }
if (!uffd_feature_supported(test)) {
uffd_test_skip("feature missing");
continue;
_
Patches currently in -mm which might be from cmllamas(a)google.com are
selftests-mm-fix-division-by-zero-in-uffd-unit-tests.patch
This is a note to let you know that I've just added the patch titled
iio: humditiy: hdc3020: fix units for thresholds and hysteresis
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From cb372b4f46d4285e5d2c07ba734374151b8e34e7 Mon Sep 17 00:00:00 2001
From: Dimitri Fedrau <dimitri.fedrau(a)liebherr.com>
Date: Thu, 16 Oct 2025 07:20:39 +0200
Subject: iio: humditiy: hdc3020: fix units for thresholds and hysteresis
According to the ABI the units after application of scale and offset are
milli degree celsius for temperature thresholds and milli percent for
relative humidity thresholds. Currently the resulting units are degree
celsius for temperature thresholds and hysteresis and percent for relative
humidity thresholds and hysteresis. Change scale factor to fix this issue.
Fixes: 3ad0e7e5f0cb ("iio: humidity: hdc3020: add threshold events support")
Reported-by: Chris Lesiak <chris.lesiak(a)licorbio.com>
Reviewed-by: Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
Signed-off-by: Dimitri Fedrau <dimitri.fedrau(a)liebherr.com>
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/humidity/hdc3020.c | 69 ++++++++++++++++++++--------------
1 file changed, 41 insertions(+), 28 deletions(-)
diff --git a/drivers/iio/humidity/hdc3020.c b/drivers/iio/humidity/hdc3020.c
index 8aa567d9aded..78b2c171c8da 100644
--- a/drivers/iio/humidity/hdc3020.c
+++ b/drivers/iio/humidity/hdc3020.c
@@ -72,6 +72,9 @@
#define HDC3020_MAX_TEMP_HYST_MICRO 164748607
#define HDC3020_MAX_HUM_MICRO 99220264
+/* Divide 65535 from the datasheet by 5 to avoid overflows */
+#define HDC3020_THRESH_FRACTION (65535 / 5)
+
struct hdc3020_data {
struct i2c_client *client;
struct gpio_desc *reset_gpio;
@@ -376,15 +379,18 @@ static int hdc3020_thresh_get_temp(u16 thresh)
int temp;
/*
- * Get the temperature threshold from 9 LSBs, shift them to get
- * the truncated temperature threshold representation and
- * calculate the threshold according to the formula in the
- * datasheet. Result is degree celsius scaled by 65535.
+ * Get the temperature threshold from 9 LSBs, shift them to get the
+ * truncated temperature threshold representation and calculate the
+ * threshold according to the explicit formula in the datasheet:
+ * T(C) = -45 + (175 * temp) / 65535.
+ * Additionally scale by HDC3020_THRESH_FRACTION to avoid precision loss
+ * when calculating threshold and hysteresis values. Result is degree
+ * celsius scaled by HDC3020_THRESH_FRACTION.
*/
temp = FIELD_GET(HDC3020_THRESH_TEMP_MASK, thresh) <<
HDC3020_THRESH_TEMP_TRUNC_SHIFT;
- return -2949075 + (175 * temp);
+ return -2949075 / 5 + (175 / 5 * temp);
}
static int hdc3020_thresh_get_hum(u16 thresh)
@@ -394,13 +400,16 @@ static int hdc3020_thresh_get_hum(u16 thresh)
/*
* Get the humidity threshold from 7 MSBs, shift them to get the
* truncated humidity threshold representation and calculate the
- * threshold according to the formula in the datasheet. Result is
- * percent scaled by 65535.
+ * threshold according to the explicit formula in the datasheet:
+ * RH(%) = 100 * hum / 65535.
+ * Additionally scale by HDC3020_THRESH_FRACTION to avoid precision loss
+ * when calculating threshold and hysteresis values. Result is percent
+ * scaled by HDC3020_THRESH_FRACTION.
*/
hum = FIELD_GET(HDC3020_THRESH_HUM_MASK, thresh) <<
HDC3020_THRESH_HUM_TRUNC_SHIFT;
- return hum * 100;
+ return hum * 100 / 5;
}
static u16 hdc3020_thresh_set_temp(int s_temp, u16 curr_thresh)
@@ -455,8 +464,8 @@ int hdc3020_thresh_clr(s64 s_thresh, s64 s_hyst, enum iio_event_direction dir)
else
s_clr = s_thresh + s_hyst;
- /* Divide by 65535 to get units of micro */
- return div_s64(s_clr, 65535);
+ /* Divide by HDC3020_THRESH_FRACTION to get units of micro */
+ return div_s64(s_clr, HDC3020_THRESH_FRACTION);
}
static int _hdc3020_write_thresh(struct hdc3020_data *data, u16 reg, u16 val)
@@ -507,7 +516,7 @@ static int hdc3020_write_thresh(struct iio_dev *indio_dev,
clr = ret;
/* Scale value to include decimal part into calculations */
- s_val = (val < 0) ? (val * 1000000 - val2) : (val * 1000000 + val2);
+ s_val = (val < 0) ? (val * 1000 - val2) : (val * 1000 + val2);
switch (chan->type) {
case IIO_TEMP:
switch (info) {
@@ -523,7 +532,8 @@ static int hdc3020_write_thresh(struct iio_dev *indio_dev,
/* Calculate old hysteresis */
s_thresh = (s64)hdc3020_thresh_get_temp(thresh) * 1000000;
s_clr = (s64)hdc3020_thresh_get_temp(clr) * 1000000;
- s_hyst = div_s64(abs(s_thresh - s_clr), 65535);
+ s_hyst = div_s64(abs(s_thresh - s_clr),
+ HDC3020_THRESH_FRACTION);
/* Set new threshold */
thresh = reg_val;
/* Set old hysteresis */
@@ -532,16 +542,17 @@ static int hdc3020_write_thresh(struct iio_dev *indio_dev,
case IIO_EV_INFO_HYSTERESIS:
/*
* Function hdc3020_thresh_get_temp returns temperature
- * in degree celsius scaled by 65535. Scale by 1000000
- * to be able to subtract scaled hysteresis value.
+ * in degree celsius scaled by HDC3020_THRESH_FRACTION.
+ * Scale by 1000000 to be able to subtract scaled
+ * hysteresis value.
*/
s_thresh = (s64)hdc3020_thresh_get_temp(thresh) * 1000000;
/*
* Units of s_val are in micro degree celsius, scale by
- * 65535 to get same units as s_thresh.
+ * HDC3020_THRESH_FRACTION to get same units as s_thresh.
*/
s_val = min(abs(s_val), HDC3020_MAX_TEMP_HYST_MICRO);
- s_hyst = (s64)s_val * 65535;
+ s_hyst = (s64)s_val * HDC3020_THRESH_FRACTION;
s_clr = hdc3020_thresh_clr(s_thresh, s_hyst, dir);
s_clr = max(s_clr, HDC3020_MIN_TEMP_MICRO);
s_clr = min(s_clr, HDC3020_MAX_TEMP_MICRO);
@@ -565,7 +576,8 @@ static int hdc3020_write_thresh(struct iio_dev *indio_dev,
/* Calculate old hysteresis */
s_thresh = (s64)hdc3020_thresh_get_hum(thresh) * 1000000;
s_clr = (s64)hdc3020_thresh_get_hum(clr) * 1000000;
- s_hyst = div_s64(abs(s_thresh - s_clr), 65535);
+ s_hyst = div_s64(abs(s_thresh - s_clr),
+ HDC3020_THRESH_FRACTION);
/* Set new threshold */
thresh = reg_val;
/* Try to set old hysteresis */
@@ -574,15 +586,16 @@ static int hdc3020_write_thresh(struct iio_dev *indio_dev,
case IIO_EV_INFO_HYSTERESIS:
/*
* Function hdc3020_thresh_get_hum returns relative
- * humidity in percent scaled by 65535. Scale by 1000000
- * to be able to subtract scaled hysteresis value.
+ * humidity in percent scaled by HDC3020_THRESH_FRACTION.
+ * Scale by 1000000 to be able to subtract scaled
+ * hysteresis value.
*/
s_thresh = (s64)hdc3020_thresh_get_hum(thresh) * 1000000;
/*
- * Units of s_val are in micro percent, scale by 65535
- * to get same units as s_thresh.
+ * Units of s_val are in micro percent, scale by
+ * HDC3020_THRESH_FRACTION to get same units as s_thresh.
*/
- s_hyst = (s64)s_val * 65535;
+ s_hyst = (s64)s_val * HDC3020_THRESH_FRACTION;
s_clr = hdc3020_thresh_clr(s_thresh, s_hyst, dir);
s_clr = max(s_clr, 0);
s_clr = min(s_clr, HDC3020_MAX_HUM_MICRO);
@@ -630,7 +643,7 @@ static int hdc3020_read_thresh(struct iio_dev *indio_dev,
thresh = hdc3020_thresh_get_temp(ret);
switch (info) {
case IIO_EV_INFO_VALUE:
- *val = thresh;
+ *val = thresh * MILLI;
break;
case IIO_EV_INFO_HYSTERESIS:
ret = hdc3020_read_be16(data, reg_clr);
@@ -638,18 +651,18 @@ static int hdc3020_read_thresh(struct iio_dev *indio_dev,
return ret;
clr = hdc3020_thresh_get_temp(ret);
- *val = abs(thresh - clr);
+ *val = abs(thresh - clr) * MILLI;
break;
default:
return -EOPNOTSUPP;
}
- *val2 = 65535;
+ *val2 = HDC3020_THRESH_FRACTION;
return IIO_VAL_FRACTIONAL;
case IIO_HUMIDITYRELATIVE:
thresh = hdc3020_thresh_get_hum(ret);
switch (info) {
case IIO_EV_INFO_VALUE:
- *val = thresh;
+ *val = thresh * MILLI;
break;
case IIO_EV_INFO_HYSTERESIS:
ret = hdc3020_read_be16(data, reg_clr);
@@ -657,12 +670,12 @@ static int hdc3020_read_thresh(struct iio_dev *indio_dev,
return ret;
clr = hdc3020_thresh_get_hum(ret);
- *val = abs(thresh - clr);
+ *val = abs(thresh - clr) * MILLI;
break;
default:
return -EOPNOTSUPP;
}
- *val2 = 65535;
+ *val2 = HDC3020_THRESH_FRACTION;
return IIO_VAL_FRACTIONAL;
default:
return -EOPNOTSUPP;
--
2.51.2
This is a note to let you know that I've just added the patch titled
iio: humditiy: hdc3020: fix units for temperature and humidity
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 7b8dc11c0a830caa0d890c603d597161c6c26095 Mon Sep 17 00:00:00 2001
From: Dimitri Fedrau <dimitri.fedrau(a)liebherr.com>
Date: Thu, 16 Oct 2025 07:20:38 +0200
Subject: iio: humditiy: hdc3020: fix units for temperature and humidity
measurement
According to the ABI the units after application of scale and offset are
milli degrees for temperature measurements and milli percent for relative
humidity measurements. Currently the resulting units are degree celsius for
temperature measurements and percent for relative humidity measurements.
Change scale factor to fix this issue.
Fixes: c9180b8e39be ("iio: humidity: Add driver for ti HDC302x humidity sensors")
Reported-by: Chris Lesiak <chris.lesiak(a)licorbio.com>
Suggested-by: Chris Lesiak <chris.lesiak(a)licorbio.com>
Reviewed-by: Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
Signed-off-by: Dimitri Fedrau <dimitri.fedrau(a)liebherr.com>
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/humidity/hdc3020.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/iio/humidity/hdc3020.c b/drivers/iio/humidity/hdc3020.c
index ffb25596d3a8..8aa567d9aded 100644
--- a/drivers/iio/humidity/hdc3020.c
+++ b/drivers/iio/humidity/hdc3020.c
@@ -301,9 +301,9 @@ static int hdc3020_read_raw(struct iio_dev *indio_dev,
case IIO_CHAN_INFO_SCALE:
*val2 = 65536;
if (chan->type == IIO_TEMP)
- *val = 175;
+ *val = 175 * MILLI;
else
- *val = 100;
+ *val = 100 * MILLI;
return IIO_VAL_FRACTIONAL;
case IIO_CHAN_INFO_OFFSET:
--
2.51.2
This is a note to let you know that I've just added the patch titled
iio: imu: st_lsm6dsx: fix array size for st_lsm6dsx_settings fields
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 3af0c1fb1cdc351b64ff1a4bc06d491490c1f10a Mon Sep 17 00:00:00 2001
From: Francesco Lavra <flavra(a)baylibre.com>
Date: Fri, 17 Oct 2025 19:32:08 +0200
Subject: iio: imu: st_lsm6dsx: fix array size for st_lsm6dsx_settings fields
The `decimator` and `batch` fields of struct st_lsm6dsx_settings
are arrays indexed by sensor type, not by sensor hardware
identifier; moreover, the `batch` field is only used for the
accelerometer and gyroscope.
Change the array size for `decimator` from ST_LSM6DSX_MAX_ID to
ST_LSM6DSX_ID_MAX, and change the array size for `batch` from
ST_LSM6DSX_MAX_ID to 2; move the enum st_lsm6dsx_sensor_id
definition so that the ST_LSM6DSX_ID_MAX value is usable within
the struct st_lsm6dsx_settings definition.
Fixes: 801a6e0af0c6c ("iio: imu: st_lsm6dsx: add support to LSM6DSO")
Signed-off-by: Francesco Lavra <flavra(a)baylibre.com>
Acked-by: Lorenzo Bianconi <lorenzo(a)kernel.org>
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
index c225b246c8a5..bca136392e99 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
@@ -252,6 +252,15 @@ struct st_lsm6dsx_event_settings {
u8 wakeup_src_x_mask;
};
+enum st_lsm6dsx_sensor_id {
+ ST_LSM6DSX_ID_GYRO,
+ ST_LSM6DSX_ID_ACC,
+ ST_LSM6DSX_ID_EXT0,
+ ST_LSM6DSX_ID_EXT1,
+ ST_LSM6DSX_ID_EXT2,
+ ST_LSM6DSX_ID_MAX
+};
+
enum st_lsm6dsx_ext_sensor_id {
ST_LSM6DSX_ID_MAGN,
};
@@ -337,23 +346,14 @@ struct st_lsm6dsx_settings {
struct st_lsm6dsx_odr_table_entry odr_table[2];
struct st_lsm6dsx_samples_to_discard samples_to_discard[2];
struct st_lsm6dsx_fs_table_entry fs_table[2];
- struct st_lsm6dsx_reg decimator[ST_LSM6DSX_MAX_ID];
- struct st_lsm6dsx_reg batch[ST_LSM6DSX_MAX_ID];
+ struct st_lsm6dsx_reg decimator[ST_LSM6DSX_ID_MAX];
+ struct st_lsm6dsx_reg batch[2];
struct st_lsm6dsx_fifo_ops fifo_ops;
struct st_lsm6dsx_hw_ts_settings ts_settings;
struct st_lsm6dsx_shub_settings shub_settings;
struct st_lsm6dsx_event_settings event_settings;
};
-enum st_lsm6dsx_sensor_id {
- ST_LSM6DSX_ID_GYRO,
- ST_LSM6DSX_ID_ACC,
- ST_LSM6DSX_ID_EXT0,
- ST_LSM6DSX_ID_EXT1,
- ST_LSM6DSX_ID_EXT2,
- ST_LSM6DSX_ID_MAX,
-};
-
enum st_lsm6dsx_fifo_mode {
ST_LSM6DSX_FIFO_BYPASS = 0x0,
ST_LSM6DSX_FIFO_CONT = 0x6,
--
2.51.2
This is a note to let you know that I've just added the patch titled
iio: adc: ad7124: fix temperature channel
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From e2cc390a6629c76924a2740c54b144b9b28fca59 Mon Sep 17 00:00:00 2001
From: David Lechner <dlechner(a)baylibre.com>
Date: Fri, 10 Oct 2025 15:24:31 -0500
Subject: iio: adc: ad7124: fix temperature channel
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Fix temperature channel not working due to gain and offset not being
initialized. For channels other than the voltage ones calibration is
skipped (which is OK). However that results in the calibration register
values tracked in st->channels[i].cfg all being zero. These zeros are
later written to hardware before a measurement is made which caused the
raw temperature readings to be always 8388608 (0x800000).
To fix it, we just make sure the gain and offset values are set to the
default values and still return early without doing an internal
calibration.
While here, add a comment explaining why we don't bother calibrating
the temperature channel.
Fixes: 47036a03a303 ("iio: adc: ad7124: Implement internal calibration at probe time")
Reviewed-by: Marcelo Schmitt <marcelo.schmitt(a)analog.com>
Signed-off-by: David Lechner <dlechner(a)baylibre.com>
Reviewed-by: Uwe Kleine-König <u.kleine-koenig(a)baylibre.com>
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/ad7124.c | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c
index 910b40393f77..61623cc6cb25 100644
--- a/drivers/iio/adc/ad7124.c
+++ b/drivers/iio/adc/ad7124.c
@@ -1525,10 +1525,6 @@ static int __ad7124_calibrate_all(struct ad7124_state *st, struct iio_dev *indio
int ret, i;
for (i = 0; i < st->num_channels; i++) {
-
- if (indio_dev->channels[i].type != IIO_VOLTAGE)
- continue;
-
/*
* For calibration the OFFSET register should hold its reset default
* value. For the GAIN register there is no such requirement but
@@ -1538,6 +1534,14 @@ static int __ad7124_calibrate_all(struct ad7124_state *st, struct iio_dev *indio
st->channels[i].cfg.calibration_offset = 0x800000;
st->channels[i].cfg.calibration_gain = st->gain_default;
+ /*
+ * Only the main voltage input channels are important enough
+ * to be automatically calibrated here. For everything else,
+ * just use the default values set above.
+ */
+ if (indio_dev->channels[i].type != IIO_VOLTAGE)
+ continue;
+
/*
* Full-scale calibration isn't supported at gain 1, so skip in
* that case. Note that untypically full-scale calibration has
--
2.51.2
This is a note to let you know that I've just added the patch titled
iio: accel: fix ADXL355 startup race condition
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From c92c1bc408e9e11ae3c7011b062fdd74c09283a3 Mon Sep 17 00:00:00 2001
From: Valek Andrej <andrej.v(a)skyrain.eu>
Date: Tue, 14 Oct 2025 09:13:44 +0200
Subject: iio: accel: fix ADXL355 startup race condition
There is an race-condition where device is not full working after SW reset.
Therefore it's necessary to wait some time after reset and verify shadow
registers values by reading and comparing the values before/after reset.
This mechanism is described in datasheet at least from revision D.
Fixes: 12ed27863ea3 ("iio: accel: Add driver support for ADXL355")
Signed-off-by: Valek Andrej <andrej.v(a)skyrain.eu>
Signed-off-by: Kessler Markus <markus.kessler(a)hilti.com>
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/accel/adxl355_core.c | 44 ++++++++++++++++++++++++++++----
1 file changed, 39 insertions(+), 5 deletions(-)
diff --git a/drivers/iio/accel/adxl355_core.c b/drivers/iio/accel/adxl355_core.c
index 2e00fd51b4d5..5fc7f814b907 100644
--- a/drivers/iio/accel/adxl355_core.c
+++ b/drivers/iio/accel/adxl355_core.c
@@ -56,6 +56,8 @@
#define ADXL355_POWER_CTL_DRDY_MSK BIT(2)
#define ADXL355_SELF_TEST_REG 0x2E
#define ADXL355_RESET_REG 0x2F
+#define ADXL355_BASE_ADDR_SHADOW_REG 0x50
+#define ADXL355_SHADOW_REG_COUNT 5
#define ADXL355_DEVID_AD_VAL 0xAD
#define ADXL355_DEVID_MST_VAL 0x1D
@@ -294,7 +296,12 @@ static void adxl355_fill_3db_frequency_table(struct adxl355_data *data)
static int adxl355_setup(struct adxl355_data *data)
{
unsigned int regval;
+ int retries = 5; /* the number is chosen based on empirical reasons */
int ret;
+ u8 *shadow_regs __free(kfree) = kzalloc(ADXL355_SHADOW_REG_COUNT, GFP_KERNEL);
+
+ if (!shadow_regs)
+ return -ENOMEM;
ret = regmap_read(data->regmap, ADXL355_DEVID_AD_REG, ®val);
if (ret)
@@ -321,14 +328,41 @@ static int adxl355_setup(struct adxl355_data *data)
if (regval != ADXL355_PARTID_VAL)
dev_warn(data->dev, "Invalid DEV ID 0x%02x\n", regval);
- /*
- * Perform a software reset to make sure the device is in a consistent
- * state after start-up.
- */
- ret = regmap_write(data->regmap, ADXL355_RESET_REG, ADXL355_RESET_CODE);
+ /* Read shadow registers to be compared after reset */
+ ret = regmap_bulk_read(data->regmap,
+ ADXL355_BASE_ADDR_SHADOW_REG,
+ shadow_regs, ADXL355_SHADOW_REG_COUNT);
if (ret)
return ret;
+ do {
+ if (--retries == 0) {
+ dev_err(data->dev, "Shadow registers mismatch\n");
+ return -EIO;
+ }
+
+ /*
+ * Perform a software reset to make sure the device is in a consistent
+ * state after start-up.
+ */
+ ret = regmap_write(data->regmap, ADXL355_RESET_REG,
+ ADXL355_RESET_CODE);
+ if (ret)
+ return ret;
+
+ /* Wait at least 5ms after software reset */
+ usleep_range(5000, 10000);
+
+ /* Read shadow registers for comparison */
+ ret = regmap_bulk_read(data->regmap,
+ ADXL355_BASE_ADDR_SHADOW_REG,
+ data->buffer.buf,
+ ADXL355_SHADOW_REG_COUNT);
+ if (ret)
+ return ret;
+ } while (memcmp(shadow_regs, data->buffer.buf,
+ ADXL355_SHADOW_REG_COUNT));
+
ret = regmap_update_bits(data->regmap, ADXL355_POWER_CTL_REG,
ADXL355_POWER_CTL_DRDY_MSK,
FIELD_PREP(ADXL355_POWER_CTL_DRDY_MSK, 1));
--
2.51.2
This is a note to let you know that I've just added the patch titled
iio: adc: ad7280a: fix ad7280_store_balance_timer()
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From bd886cdcbf9e746f61c74035a3acd42e9108e115 Mon Sep 17 00:00:00 2001
From: David Lechner <dlechner(a)baylibre.com>
Date: Fri, 10 Oct 2025 10:44:45 -0500
Subject: iio: adc: ad7280a: fix ad7280_store_balance_timer()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Use correct argument to iio_str_to_fixpoint() to parse 3 decimal places.
iio_str_to_fixpoint() has a bit of an unintuitive API where the
fract_mult parameter is the multiplier of the first decimal place as if
it was already an integer. So to get 3 decimal places, fract_mult must
be 100 rather than 1000.
Fixes: 96ccdbc07a74 ("staging:iio:adc:ad7280a: Standardize extended ABI naming")
Signed-off-by: David Lechner <dlechner(a)baylibre.com>
Reviewed-by: Nuno Sá <nuno.sa(a)analog.com>
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/ad7280a.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iio/adc/ad7280a.c b/drivers/iio/adc/ad7280a.c
index dda2986ccda0..50a6ff7c8b1c 100644
--- a/drivers/iio/adc/ad7280a.c
+++ b/drivers/iio/adc/ad7280a.c
@@ -541,7 +541,7 @@ static ssize_t ad7280_store_balance_timer(struct iio_dev *indio_dev,
int val, val2;
int ret;
- ret = iio_str_to_fixpoint(buf, 1000, &val, &val2);
+ ret = iio_str_to_fixpoint(buf, 100, &val, &val2);
if (ret)
return ret;
--
2.51.2
This is a note to let you know that I've just added the patch titled
iio:common:ssp_sensors: Fix an error handling path ssp_probe()
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 21553258b94861a73d7f2cf15469d69240e1170d Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet(a)wanadoo.fr>
Date: Fri, 10 Oct 2025 20:58:48 +0200
Subject: iio:common:ssp_sensors: Fix an error handling path ssp_probe()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
If an error occurs after a successful mfd_add_devices() call, it should be
undone by a corresponding mfd_remove_devices() call, as already done in the
remove function.
Fixes: 50dd64d57eee ("iio: common: ssp_sensors: Add sensorhub driver")
Signed-off-by: Christophe JAILLET <christophe.jaillet(a)wanadoo.fr>
Reviewed-by: Nuno Sá <nuno.sa(a)analog.com>
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/common/ssp_sensors/ssp_dev.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/iio/common/ssp_sensors/ssp_dev.c b/drivers/iio/common/ssp_sensors/ssp_dev.c
index 1e167dc673ca..da09c9f3ceb6 100644
--- a/drivers/iio/common/ssp_sensors/ssp_dev.c
+++ b/drivers/iio/common/ssp_sensors/ssp_dev.c
@@ -503,7 +503,7 @@ static int ssp_probe(struct spi_device *spi)
ret = spi_setup(spi);
if (ret < 0) {
dev_err(&spi->dev, "Failed to setup spi\n");
- return ret;
+ goto err_setup_spi;
}
data->fw_dl_state = SSP_FW_DL_STATE_NONE;
@@ -568,6 +568,8 @@ static int ssp_probe(struct spi_device *spi)
err_setup_irq:
mutex_destroy(&data->pending_lock);
mutex_destroy(&data->comm_lock);
+err_setup_spi:
+ mfd_remove_devices(&spi->dev);
dev_err(&spi->dev, "Probe failed!\n");
--
2.51.2
This is a note to let you know that I've just added the patch titled
iio: buffer-dmaengine: enable .get_dma_dev()
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 3db847df994d475db7812dde90376f2848bcd30a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nuno=20S=C3=A1?= <nuno.sa(a)analog.com>
Date: Tue, 7 Oct 2025 10:15:23 +0100
Subject: iio: buffer-dmaengine: enable .get_dma_dev()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Wire up the .get_dma_dev() callback to use the DMA buffer infrastructure's
implementation. This ensures that DMABUF operations use the correct DMA
device for mapping, which is essential for proper operation on systems
where memory is mapped above the 32-bit range.
Without this callback, the core would fall back to using the IIO device's
parent, which may not have the appropriate DMA mask configuration for
high memory access.
Fixes: 7a86d469983a ("iio: buffer-dmaengine: Support new DMABUF based userspace API")
Reviewed-by: David Lechner <dlechner(a)baylibre.com>
Signed-off-by: Nuno Sá <nuno.sa(a)analog.com>
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/buffer/industrialio-buffer-dmaengine.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/iio/buffer/industrialio-buffer-dmaengine.c b/drivers/iio/buffer/industrialio-buffer-dmaengine.c
index e9d9a7d39fe1..27dd56334345 100644
--- a/drivers/iio/buffer/industrialio-buffer-dmaengine.c
+++ b/drivers/iio/buffer/industrialio-buffer-dmaengine.c
@@ -177,6 +177,8 @@ static const struct iio_buffer_access_funcs iio_dmaengine_buffer_ops = {
.lock_queue = iio_dma_buffer_lock_queue,
.unlock_queue = iio_dma_buffer_unlock_queue,
+ .get_dma_dev = iio_dma_buffer_get_dma_dev,
+
.modes = INDIO_BUFFER_HARDWARE,
.flags = INDIO_BUFFER_FLAG_FIXED_WATERMARK,
};
--
2.51.2
This is a note to let you know that I've just added the patch titled
iio: adc: stm32-dfsdm: fix st,adc-alt-channel property handling
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 8a6b7989ff0cd0a95c93be1927f2af7ad10f28de Mon Sep 17 00:00:00 2001
From: Olivier Moysan <olivier.moysan(a)foss.st.com>
Date: Thu, 2 Oct 2025 13:22:49 +0200
Subject: iio: adc: stm32-dfsdm: fix st,adc-alt-channel property handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Initially st,adc-alt-channel property was defined as an enum in the DFSDM
binding. The DFSDM binding has been changed to use the new IIO backend
framework, along with the adoption of IIO generic channels.
In this new binding st,adc-alt-channel is defined as a boolean property,
but it is still handled has an enum in DFSDM driver.
Fix st,adc-alt-channel property handling in DFSDM driver.
Fixes: 3208fa0cd919 ("iio: adc: stm32-dfsdm: adopt generic channels bindings")
Signed-off-by: Olivier Moysan <olivier.moysan(a)foss.st.com>
Reviewed-by: Nuno Sá <nuno.sa(a)analog.com>
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/stm32-dfsdm-adc.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/iio/adc/stm32-dfsdm-adc.c b/drivers/iio/adc/stm32-dfsdm-adc.c
index 74b1b4dc6e81..9664b9bd75d4 100644
--- a/drivers/iio/adc/stm32-dfsdm-adc.c
+++ b/drivers/iio/adc/stm32-dfsdm-adc.c
@@ -725,9 +725,8 @@ static int stm32_dfsdm_generic_channel_parse_of(struct stm32_dfsdm *dfsdm,
}
df_ch->src = val;
- ret = fwnode_property_read_u32(node, "st,adc-alt-channel", &df_ch->alt_si);
- if (ret != -EINVAL)
- df_ch->alt_si = 0;
+ if (fwnode_property_present(node, "st,adc-alt-channel"))
+ df_ch->alt_si = 1;
if (adc->dev_data->type == DFSDM_IIO) {
backend = devm_iio_backend_fwnode_get(&indio_dev->dev, NULL, node);
--
2.51.2
This is a note to let you know that I've just added the patch titled
iio: pressure: bmp280: correct meas_time_us calculation
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 0bf1bfde53b30da7fd7f4a6c3db5b8e77888958d Mon Sep 17 00:00:00 2001
From: Achim Gratz <Achim.Gratz(a)Stromeko.DE>
Date: Sun, 28 Sep 2025 19:26:28 +0200
Subject: iio: pressure: bmp280: correct meas_time_us calculation
Correction of meas_time_us initialization based on an observation and
partial patch by David Lechner.
The constant part of the measurement time (as described in the
datasheet and implemented in the BM(P/E)2 Sensor API) was apparently
forgotten (it was already correctly applied for the BMP380) and is now
used.
There was also another thinko in bmp280_wait_conv:
data->oversampling_humid can actually have a value of 0 (for an
oversampling_ratio of 1), so it can not be used to detect the presence
of the humidity measurement capability. Use
data->chip_info->oversampling_humid_avail instead, which is NULL for
chips that cannot measure humidity and therefore must skip that part
of the calculation.
Closes: https://lore.kernel.org/linux-iio/875xgfg0wz.fsf@Gerda.invalid/
Fixes: 26ccfaa9ddaa ("iio: pressure: bmp280: Use sleep and forced mode for oneshot captures")
Suggested-by: David Lechner <dlechner(a)baylibre.com>
Tested-by: Achim Gratz <Achim.Gratz(a)Stromeko.DE>
Signed-off-by: Achim Gratz <Achim.Gratz(a)Stromeko.DE>
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/pressure/bmp280-core.c | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/drivers/iio/pressure/bmp280-core.c b/drivers/iio/pressure/bmp280-core.c
index c04e8bb4c993..d983ce9c0b99 100644
--- a/drivers/iio/pressure/bmp280-core.c
+++ b/drivers/iio/pressure/bmp280-core.c
@@ -1040,13 +1040,16 @@ static int bmp280_wait_conv(struct bmp280_data *data)
unsigned int reg, meas_time_us;
int ret;
- /* Check if we are using a BME280 device */
- if (data->oversampling_humid)
- meas_time_us = BMP280_PRESS_HUMID_MEAS_OFFSET +
- BIT(data->oversampling_humid) * BMP280_MEAS_DUR;
+ /* Constant part of the measurement time */
+ meas_time_us = BMP280_MEAS_OFFSET;
- else
- meas_time_us = 0;
+ /*
+ * Check if we are using a BME280 device,
+ * Humidity measurement time
+ */
+ if (data->chip_info->oversampling_humid_avail)
+ meas_time_us += BMP280_PRESS_HUMID_MEAS_OFFSET +
+ BIT(data->oversampling_humid) * BMP280_MEAS_DUR;
/* Pressure measurement time */
meas_time_us += BMP280_PRESS_HUMID_MEAS_OFFSET +
--
2.51.2
This is a note to let you know that I've just added the patch titled
iio: adc: rtq6056: Correct the sign bit index
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 9b45744bf09fc2a3287e05287141d6e123c125a7 Mon Sep 17 00:00:00 2001
From: ChiYuan Huang <cy_huang(a)richtek.com>
Date: Thu, 18 Sep 2025 11:10:59 +0800
Subject: iio: adc: rtq6056: Correct the sign bit index
The vshunt/current reported register is a signed 16bit integer. The
sign bit index should be '15', not '16'.
Fixes: 4396f45d211b ("iio: adc: Add rtq6056 support")
Reported-by: Andy Hsu <andy_ya_hsu(a)wiwynn.com>
Signed-off-by: ChiYuan Huang <cy_huang(a)richtek.com>
Reviewed-by: David Lechner <dlechner(a)baylibre.com>
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/rtq6056.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iio/adc/rtq6056.c b/drivers/iio/adc/rtq6056.c
index ad9738228b7f..2bf3a09ac6b0 100644
--- a/drivers/iio/adc/rtq6056.c
+++ b/drivers/iio/adc/rtq6056.c
@@ -300,7 +300,7 @@ static int rtq6056_adc_read_channel(struct rtq6056_priv *priv,
return IIO_VAL_INT;
case RTQ6056_REG_SHUNTVOLT:
case RTQ6056_REG_CURRENT:
- *val = sign_extend32(regval, 16);
+ *val = sign_extend32(regval, 15);
return IIO_VAL_INT;
default:
return -EINVAL;
--
2.51.2
This is a note to let you know that I've just added the patch titled
iio: adc: ad7380: fix SPI offload trigger rate
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 632757312d7eb320b66ca60e0cfe098ec53cee08 Mon Sep 17 00:00:00 2001
From: David Lechner <dlechner(a)baylibre.com>
Date: Fri, 19 Sep 2025 15:50:34 -0500
Subject: iio: adc: ad7380: fix SPI offload trigger rate
Add a special case to double the SPI offload trigger rate when all
channels of a single-ended chip are enabled in a buffered read.
The single-ended chips in the AD738x family can only do simultaneous
sampling of half their channels and have a multiplexer to allow reading
the other half. To comply with the IIO definition of sampling_frequency,
we need to trigger twice as often when the sequencer is enabled to so
that both banks can be read in a single sample period.
Fixes: bbeaec81a03e ("iio: ad7380: add support for SPI offload")
Signed-off-by: David Lechner <dlechner(a)baylibre.com>
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/ad7380.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/drivers/iio/adc/ad7380.c b/drivers/iio/adc/ad7380.c
index fa251dc1aae6..bfd908deefc0 100644
--- a/drivers/iio/adc/ad7380.c
+++ b/drivers/iio/adc/ad7380.c
@@ -1227,6 +1227,14 @@ static int ad7380_offload_buffer_postenable(struct iio_dev *indio_dev)
if (ret)
return ret;
+ /*
+ * When the sequencer is required to read all channels, we need to
+ * trigger twice per sample period in order to read one complete set
+ * of samples.
+ */
+ if (st->seq)
+ config.periodic.frequency_hz *= 2;
+
ret = spi_offload_trigger_enable(st->offload, st->offload_trigger, &config);
if (ret)
spi_unoptimize_message(&st->offload_msg);
--
2.51.2
This is a note to let you know that I've just added the patch titled
iio: adc: ad4030: Fix _scale value for common-mode channels
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From ffc74ad539136ae9e16f7b5f2e4582e88018cd49 Mon Sep 17 00:00:00 2001
From: Marcelo Schmitt <marcelo.schmitt(a)analog.com>
Date: Thu, 18 Sep 2025 14:37:27 -0300
Subject: iio: adc: ad4030: Fix _scale value for common-mode channels
Previously, the driver always used the amount of precision bits of
differential input channels to provide the scale to mV. Though,
differential and common-mode voltage channels have different amount of
precision bits and the correct number of precision bits must be considered
to get to a proper mV scale factor for each one. Use channel specific
number of precision bits to provide the correct scale value for each
channel.
Fixes: de67f28abe58 ("iio: adc: ad4030: check scan_type for error")
Fixes: 949abd1ca5a4 ("iio: adc: ad4030: add averaging support")
Signed-off-by: Marcelo Schmitt <marcelo.schmitt(a)analog.com>
Reviewed-by: David Lechner <dlechner(a)baylibre.com>
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/ad4030.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iio/adc/ad4030.c b/drivers/iio/adc/ad4030.c
index 1bc2f9a22470..d8bee6a4215a 100644
--- a/drivers/iio/adc/ad4030.c
+++ b/drivers/iio/adc/ad4030.c
@@ -385,7 +385,7 @@ static int ad4030_get_chan_scale(struct iio_dev *indio_dev,
struct ad4030_state *st = iio_priv(indio_dev);
const struct iio_scan_type *scan_type;
- scan_type = iio_get_current_scan_type(indio_dev, st->chip->channels);
+ scan_type = iio_get_current_scan_type(indio_dev, chan);
if (IS_ERR(scan_type))
return PTR_ERR(scan_type);
--
2.51.2
Mejora tus contrataciones con PsicoSmart
body {
margin: 0;
padding: 0;
font-family: Arial, Helvetica, sans-serif;
font-size: 14px;
color: #333;
background-color: #ffffff;
}
table {
border-spacing: 0;
width: 100%;
max-width: 600px;
margin: auto;
}
td {
padding: 12px 20px;
}
a {
color: #1a73e8;
text-decoration: none;
}
.footer {
font-size: 12px;
color: #888888;
text-align: center;
}
Evalúa talento de forma objetiva y mejora tus contrataciones con PsicoSmart.
Hola, ,
¿Te ha pasado que un candidato luce perfecto en entrevista, pero en el trabajo no encaja como esperabas?
En selección, confiar solo en la percepción puede llevar a decisiones costosas. Por eso quiero presentarte PsicoSmart, una herramienta creada para que los equipos de Recursos Humanos tomen decisiones más objetivas y acertadas.
Con PsicoSmart puedes:
Aplicar 31 pruebas psicométricas que evalúan liderazgo, honestidad, comunicación e inteligencia.
Validar conocimientos técnicos con más de 2,500 exámenes especializados.
Supervisar la identidad de quien responde mediante captura fotográfica automática durante la evaluación.
Gestionar todo desde una sola plataforma, accesible desde cualquier dispositivo.
Aprovecha el Buen Fin del 1 al 22 de noviembre y obtén hasta 15% de descuento.
Si estás buscando mejorar tus procesos de contratación, esta puede ser una excelente oportunidad.
Para más información, puedes responder este correo o contactarme directamente; mis datos están al final del mensaje.
Saludos,
--------------
Atte.: Valeria Pérez
Ciudad de México: (55) 5018 0565
WhatsApp: +52 33 1607 2089
Si no deseas recibir más correos, haz clic aquí para darte de baja.
Para remover su dirección de esta lista haga <a href="https://s1.arrobamail.com/unsuscribe.php?id=yiwtsrewiswqwiseup">click aquí</a>
This is the start of the stable review cycle for the 6.12.57 release.
There are 40 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Sun, 02 Nov 2025 14:00:34 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v6.x/stable-review/patch-6.12.57-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-6.12.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 6.12.57-rc1
Edward Cree <ecree.xilinx(a)gmail.com>
sfc: fix NULL dereferences in ef100_process_design_param()
Xiaogang Chen <xiaogang.chen(a)amd.com>
udmabuf: fix a buf size overflow issue during udmabuf creation
Aditya Kumar Singh <quic_adisi(a)quicinc.com>
wifi: ath12k: fix read pointer after free in ath12k_mac_assign_vif_to_vdev()
Kees Bakker <kees(a)ijzerbout.nl>
iommu/vt-d: Avoid use of NULL after WARN_ON_ONCE
William Breathitt Gray <wbg(a)kernel.org>
gpio: idio-16: Define fixed direction of the GPIO lines
Ioana Ciornei <ioana.ciornei(a)nxp.com>
gpio: regmap: add the .fixed_direction_output configuration parameter
Mathieu Dubois-Briand <mathieu.dubois-briand(a)bootlin.com>
gpio: regmap: Allow to allocate regmap-irq device
Vincent Mailhol <mailhol.vincent(a)wanadoo.fr>
bits: introduce fixed-type GENMASK_U*()
Vincent Mailhol <mailhol.vincent(a)wanadoo.fr>
bits: add comments and newlines to #if, #else and #endif directives
Wang Liang <wangliang74(a)huawei.com>
bonding: check xdp prog when set bond mode
Hangbin Liu <liuhangbin(a)gmail.com>
bonding: return detailed error when loading native XDP fails
Alexander Wetzel <Alexander(a)wetzel-home.de>
wifi: cfg80211: Add missing lock in cfg80211_check_and_end_cac()
Chao Yu <chao(a)kernel.org>
f2fs: fix to avoid panic once fallocation fails for pinfile
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
mptcp: pm: in-kernel: C-flag: handle late ADD_ADDR
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
selftests: mptcp: join: mark 'delete re-add signal' as skipped if not supported
Geliang Tang <geliang(a)kernel.org>
selftests: mptcp: disable add_addr retrans in endpoint_tests
Jonathan Corbet <corbet(a)lwn.net>
docs: kdoc: handle the obsolescensce of docutils.ErrorString()
Menglong Dong <menglong8.dong(a)gmail.com>
arch: Add the macro COMPILE_OFFSETS to all the asm-offsets.c
Tejun Heo <tj(a)kernel.org>
sched_ext: Make qmap dump operation non-destructive
Filipe Manana <fdmanana(a)suse.com>
btrfs: use smp_mb__after_atomic() when forcing COW in create_pending_snapshot()
Qu Wenruo <wqu(a)suse.com>
btrfs: tree-checker: add inode extref checks
Filipe Manana <fdmanana(a)suse.com>
btrfs: abort transaction if we fail to update inode in log replay dir fixup
Filipe Manana <fdmanana(a)suse.com>
btrfs: use level argument in log tree walk callback replay_one_buffer()
Filipe Manana <fdmanana(a)suse.com>
btrfs: always drop log root tree reference in btrfs_replay_log()
Thorsten Blum <thorsten.blum(a)linux.dev>
btrfs: scrub: replace max_t()/min_t() with clamp() in scrub_throttle_dev_io()
Naohiro Aota <naohiro.aota(a)wdc.com>
btrfs: zoned: refine extent allocator hint selection
Johannes Thumshirn <johannes.thumshirn(a)wdc.com>
btrfs: zoned: return error from btrfs_zone_finish_endio()
Filipe Manana <fdmanana(a)suse.com>
btrfs: abort transaction in the process_one_buffer() log tree walk callback
Filipe Manana <fdmanana(a)suse.com>
btrfs: abort transaction on specific error places when walking log tree
Chen Ridong <chenridong(a)huawei.com>
cpuset: Use new excpus for nocpu error check when enabling root partition
Avadhut Naik <avadhut.naik(a)amd.com>
EDAC/mc_sysfs: Increase legacy channel support to 16
David Kaplan <david.kaplan(a)amd.com>
x86/bugs: Fix reporting of LFENCE retpoline
David Kaplan <david.kaplan(a)amd.com>
x86/bugs: Report correct retbleed mitigation status
Jiri Olsa <jolsa(a)kernel.org>
seccomp: passthrough uprobe systemcall without filtering
Josh Poimboeuf <jpoimboe(a)kernel.org>
perf: Skip user unwind if the task is a kernel thread
Josh Poimboeuf <jpoimboe(a)kernel.org>
perf: Have get_perf_callchain() return NULL if crosstask and user are set
Steven Rostedt <rostedt(a)goodmis.org>
perf: Use current->flags & PF_KTHREAD|PF_USER_WORKER instead of current->mm == NULL
Dapeng Mi <dapeng1.mi(a)linux.intel.com>
perf/x86/intel: Add ICL_FIXED_0_ADAPTIVE bit into INTEL_FIXED_BITS_MASK
Richard Guy Briggs <rgb(a)redhat.com>
audit: record fanotify event regardless of presence of rules
Xiang Mei <xmei5(a)asu.edu>
net/sched: sch_qfq: Fix null-deref in agg_dequeue
-------------
Diffstat:
Documentation/sphinx/kernel_abi.py | 4 +-
Documentation/sphinx/kernel_feat.py | 4 +-
Documentation/sphinx/kernel_include.py | 6 ++-
Documentation/sphinx/maintainers_include.py | 4 +-
Makefile | 4 +-
arch/alpha/kernel/asm-offsets.c | 1 +
arch/arc/kernel/asm-offsets.c | 1 +
arch/arm/kernel/asm-offsets.c | 2 +
arch/arm64/kernel/asm-offsets.c | 1 +
arch/csky/kernel/asm-offsets.c | 1 +
arch/hexagon/kernel/asm-offsets.c | 1 +
arch/loongarch/kernel/asm-offsets.c | 2 +
arch/m68k/kernel/asm-offsets.c | 1 +
arch/microblaze/kernel/asm-offsets.c | 1 +
arch/mips/kernel/asm-offsets.c | 2 +
arch/nios2/kernel/asm-offsets.c | 1 +
arch/openrisc/kernel/asm-offsets.c | 1 +
arch/parisc/kernel/asm-offsets.c | 1 +
arch/powerpc/kernel/asm-offsets.c | 1 +
arch/riscv/kernel/asm-offsets.c | 1 +
arch/s390/kernel/asm-offsets.c | 1 +
arch/sh/kernel/asm-offsets.c | 1 +
arch/sparc/kernel/asm-offsets.c | 1 +
arch/um/kernel/asm-offsets.c | 2 +
arch/x86/events/intel/core.c | 10 ++--
arch/x86/include/asm/perf_event.h | 6 ++-
arch/x86/kernel/cpu/bugs.c | 9 ++--
arch/x86/kvm/pmu.h | 2 +-
arch/xtensa/kernel/asm-offsets.c | 1 +
drivers/dma-buf/udmabuf.c | 2 +-
drivers/edac/edac_mc_sysfs.c | 24 ++++++++++
drivers/gpio/gpio-idio-16.c | 5 ++
drivers/gpio/gpio-regmap.c | 53 ++++++++++++++++++--
drivers/iommu/intel/iommu.c | 7 +--
drivers/net/bonding/bond_main.c | 11 +++--
drivers/net/bonding/bond_options.c | 3 ++
drivers/net/ethernet/sfc/ef100_netdev.c | 6 +--
drivers/net/ethernet/sfc/ef100_nic.c | 47 ++++++++----------
drivers/net/wireless/ath/ath12k/mac.c | 6 +--
fs/btrfs/disk-io.c | 2 +-
fs/btrfs/extent-tree.c | 6 ++-
fs/btrfs/inode.c | 7 +--
fs/btrfs/scrub.c | 3 +-
fs/btrfs/transaction.c | 2 +-
fs/btrfs/tree-checker.c | 37 ++++++++++++++
fs/btrfs/tree-log.c | 64 +++++++++++++++++++------
fs/btrfs/zoned.c | 8 ++--
fs/btrfs/zoned.h | 9 ++--
fs/f2fs/file.c | 8 ++--
fs/f2fs/segment.c | 20 ++++----
include/linux/audit.h | 2 +-
include/linux/bitops.h | 1 -
include/linux/bits.h | 38 ++++++++++++++-
include/linux/gpio/regmap.h | 16 +++++++
include/net/bonding.h | 1 +
include/net/pkt_sched.h | 25 +++++++++-
kernel/cgroup/cpuset.c | 6 +--
kernel/events/callchain.c | 16 +++----
kernel/events/core.c | 7 +--
kernel/seccomp.c | 32 ++++++++++---
net/mptcp/pm_netlink.c | 6 +++
net/sched/sch_api.c | 10 ----
net/sched/sch_hfsc.c | 16 -------
net/sched/sch_qfq.c | 2 +-
net/wireless/reg.c | 4 ++
tools/sched_ext/scx_qmap.bpf.c | 18 ++++++-
tools/testing/selftests/net/mptcp/mptcp_join.sh | 3 +-
67 files changed, 442 insertions(+), 164 deletions(-)
Hi,
This Linux kernel patch series introduces support for error recovery for
passthrough PCI devices on System Z (s390x).
Background
----------
For PCI devices on s390x an operating system receives platform specific
error events from firmware rather than through AER.Today for
passthrough/userspace devices, we don't attempt any error recovery and
ignore any error events for the devices. The passthrough/userspace devices
are managed by the vfio-pci driver. The driver does register error handling
callbacks (error_detected), and on an error trigger an eventfd to
userspace. But we need a mechanism to notify userspace
(QEMU/guest/userspace drivers) about the error event.
Proposal
--------
We can expose this error information (currently only the PCI Error Code)
via a device feature. Userspace can then obtain the error information
via VFIO_DEVICE_FEATURE ioctl and take appropriate actions such as driving
a device reset.
This is how a typical flow for passthrough devices to a VM would work:
For passthrough devices to a VM, the driver bound to the device on the host
is vfio-pci. vfio-pci driver does support the error_detected() callback
(vfio_pci_core_aer_err_detected()), and on an PCI error s390x recovery
code on the host will call the vfio-pci error_detected() callback. The
vfio-pci error_detected() callback will notify userspace/QEMU via an
eventfd, and return PCI_ERS_RESULT_CAN_RECOVER. At this point the s390x
error recovery on the host will skip any further action(see patch 6) and
let userspace drive the error recovery.
Once userspace/QEMU is notified, it then injects this error into the VM
so device drivers in the VM can take recovery actions. For example for a
passthrough NVMe device, the VM's OS NVMe driver will access the device.
At this point the VM's NVMe driver's error_detected() will drive the
recovery by returning PCI_ERS_RESULT_NEED_RESET, and the s390x error
recovery in the VM's OS will try to do a reset. Resets are privileged
operations and so the VM will need intervention from QEMU to perform the
reset. QEMU will invoke the VFIO_DEVICE_RESET ioctl to now notify the
host that the VM is requesting a reset of the device. The vfio-pci driver
on the host will then perform the reset on the device to recover it.
Thanks
Farhan
ChangeLog
---------
v4 series https://lore.kernel.org/all/20250924171628.826-1-alifm@linux.ibm.com/
v4 -> v5
- Rebase on 6.18-rc5
- Move bug fixes to the beginning of the series (patch 1 and 2). These patches
were posted as a separate fixes series
https://lore.kernel.org/all/a14936ac-47d6-461b-816f-0fd66f869b0f@linux.ibm.…
- Add matching pci_put_dev() for pci_get_slot() (patch 6).
v3 series https://lore.kernel.org/all/20250911183307.1910-1-alifm@linux.ibm.com/
v3 -> v4
- Remove warn messages for each PCI capability not restored (patch 1)
- Check PCI_COMMAND and PCI_STATUS register for error value instead of device id
(patch 1)
- Fix kernel crash in patch 3
- Added reviewed by tags
- Address comments from Niklas's (patches 4, 5, 7)
- Fix compilation error non s390x system (patch 8)
- Explicitly align struct vfio_device_feature_zpci_err (patch 8)
v2 series https://lore.kernel.org/all/20250825171226.1602-1-alifm@linux.ibm.com/
v2 -> v3
- Patch 1 avoids saving any config space state if the device is in error
(suggested by Alex)
- Patch 2 adds additional check only for FLR reset to try other function
reset method (suggested by Alex).
- Patch 3 fixes a bug in s390 for resetting PCI devices with multiple
functions. Creates a new flag pci_slot to allow per function slot.
- Patch 4 fixes a bug in s390 for resource to bus address translation.
- Rebase on 6.17-rc5
v1 series https://lore.kernel.org/all/20250813170821.1115-1-alifm@linux.ibm.com/
v1 - > v2
- Patches 1 and 2 adds some additional checks for FLR/PM reset to
try other function reset method (suggested by Alex).
- Patch 3 fixes a bug in s390 for resetting PCI devices with multiple
functions.
- Patch 7 adds a new device feature for zPCI devices for the VFIO_DEVICE_FEATURE
ioctl. The ioctl is used by userspace to retriece any PCI error
information for the device (suggested by Alex).
- Patch 8 adds a reset_done() callback for the vfio-pci driver, to
restore the state of the device after a reset.
- Patch 9 removes the pcie check for triggering VFIO_PCI_ERR_IRQ_INDEX.
Farhan Ali (9):
PCI: Allow per function PCI slots
s390/pci: Add architecture specific resource/bus address translation
PCI: Avoid saving error values for config space
PCI: Add additional checks for flr reset
s390/pci: Update the logic for detecting passthrough device
s390/pci: Store PCI error information for passthrough devices
vfio-pci/zdev: Add a device feature for error information
vfio: Add a reset_done callback for vfio-pci driver
vfio: Remove the pcie check for VFIO_PCI_ERR_IRQ_INDEX
arch/s390/include/asm/pci.h | 29 ++++++++
arch/s390/pci/pci.c | 75 +++++++++++++++++++++
arch/s390/pci/pci_event.c | 107 +++++++++++++++++-------------
drivers/pci/host-bridge.c | 4 +-
drivers/pci/pci.c | 37 +++++++++--
drivers/pci/pcie/aer.c | 3 +
drivers/pci/pcie/dpc.c | 3 +
drivers/pci/pcie/ptm.c | 3 +
drivers/pci/slot.c | 25 ++++++-
drivers/pci/tph.c | 3 +
drivers/pci/vc.c | 3 +
drivers/vfio/pci/vfio_pci_core.c | 20 ++++--
drivers/vfio/pci/vfio_pci_intrs.c | 3 +-
drivers/vfio/pci/vfio_pci_priv.h | 9 +++
drivers/vfio/pci/vfio_pci_zdev.c | 45 ++++++++++++-
include/linux/pci.h | 1 +
include/uapi/linux/vfio.h | 15 +++++
17 files changed, 321 insertions(+), 64 deletions(-)
--
2.43.0
The function samsung_dsim_parse_dt() calls of_graph_get_endpoint_by_regs()
to get the endpoint device node, but fails to call of_node_put() to release
the reference when the function returns. This results in a device node
reference leak.
Fix this by adding the missing of_node_put() call before returning from
the function.
Found via static analysis and code review.
Fixes: 77169a11d4e9 ("drm/bridge: samsung-dsim: add driver support for exynos7870 DSIM bridge")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miaoqian Lin <linmq006(a)gmail.com>
---
drivers/gpu/drm/bridge/samsung-dsim.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/bridge/samsung-dsim.c b/drivers/gpu/drm/bridge/samsung-dsim.c
index eabc4c32f6ab..1a5acd5077ad 100644
--- a/drivers/gpu/drm/bridge/samsung-dsim.c
+++ b/drivers/gpu/drm/bridge/samsung-dsim.c
@@ -2086,6 +2086,7 @@ static int samsung_dsim_parse_dt(struct samsung_dsim *dsi)
if (lane_polarities[1])
dsi->swap_dn_dp_data = true;
}
+ of_node_put(endpoint);
return 0;
}
--
2.39.5 (Apple Git-154)
The current LoongArch BPF trampoline implementation is incompatible
with tracing functions in kernel modules. This causes several severe
and user-visible problems:
* Kernel lockups when a BPF program is attached to a module function [0].
* The `bpf_selftests/module_attach` test fails consistently.
* Critical kernel modules like WireGuard experience traffic disruption
when their functions are traced with fentry [1].
Given the severity and the potential for other unknown side-effects,
it is safest to disable the feature entirely for now. This patch
prevents the BPF subsystem from allowing trampoline attachments to
module functions on LoongArch.
This is a temporary mitigation until the core issues in the trampoline
code for module handling can be identified and fixed.
[root@fedora bpf]# ./test_progs -a module_attach -v
bpf_testmod.ko is already unloaded.
Loading bpf_testmod.ko...
Successfully loaded bpf_testmod.ko.
test_module_attach:PASS:skel_open 0 nsec
test_module_attach:PASS:set_attach_target 0 nsec
test_module_attach:PASS:set_attach_target_explicit 0 nsec
test_module_attach:PASS:skel_load 0 nsec
libbpf: prog 'handle_fentry': failed to attach: -ENOTSUPP
libbpf: prog 'handle_fentry': failed to auto-attach: -ENOTSUPP
test_module_attach:FAIL:skel_attach skeleton attach failed: -524
Summary: 0/0 PASSED, 0 SKIPPED, 1 FAILED
Successfully unloaded bpf_testmod.ko.
[0]: https://lore.kernel.org/loongarch/CAK3+h2wDmpC-hP4u4pJY8T-yfKyk4yRzpu2LMO+C…
[1]: https://lore.kernel.org/loongarch/CAK3+h2wYcpc+OwdLDUBvg2rF9rvvyc5amfHT-KcF…
Cc: stable(a)vger.kernel.org
Fixes: f9b6b41f0cf3 (“LoongArch: BPF: Add basic bpf trampoline support”)
Closes: https://lore.kernel.org/loongarch/CAK3+h2wDmpC-hP4u4pJY8T-yfKyk4yRzpu2LMO+C…
Acked-by: Hengqi Chen <hengqi.chen(a)gmail.com>
Signed-off-by: Vincent Li <vincent.mc.li(a)gmail.com>
---
arch/loongarch/net/bpf_jit.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index cbe53d0b7fb0..49c1d4b95404 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -1624,6 +1624,9 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i
/* Direct jump skips 5 NOP instructions */
else if (is_bpf_text_address((unsigned long)orig_call))
orig_call += LOONGARCH_BPF_FENTRY_NBYTES;
+ /* Module tracing not supported - causes kernel lockups */
+ else if (is_module_text_address((unsigned long)orig_call))
+ return -ENOTSUPP;
if (flags & BPF_TRAMP_F_CALL_ORIG) {
move_addr(ctx, LOONGARCH_GPR_A0, (const u64)im);
--
2.38.1
The current LoongArch BPF trampoline implementation is incompatible
with tracing functions in kernel modules. This causes several severe
and user-visible problems:
* Kernel lockups when a BPF program is attached to a module function [0].
* The `bpf_selftests/module_attach` test fails consistently.
* Critical kernel modules like WireGuard experience traffic disruption
when their functions are traced with fentry [1].
Given the severity and the potential for other unknown side-effects,
it is safest to disable the feature entirely for now. This patch
prevents the BPF subsystem from allowing trampoline attachments to
module functions on LoongArch.
This is a temporary mitigation until the core issues in the trampoline
code for module handling can be identified and fixed.
[root@fedora bpf]# ./test_progs -a module_attach -v
bpf_testmod.ko is already unloaded.
Loading bpf_testmod.ko...
Successfully loaded bpf_testmod.ko.
test_module_attach:PASS:skel_open 0 nsec
test_module_attach:PASS:set_attach_target 0 nsec
test_module_attach:PASS:set_attach_target_explicit 0 nsec
test_module_attach:PASS:skel_load 0 nsec
libbpf: prog 'handle_fentry': failed to attach: -ENOTSUPP
libbpf: prog 'handle_fentry': failed to auto-attach: -ENOTSUPP
test_module_attach:FAIL:skel_attach skeleton attach failed: -524
Summary: 0/0 PASSED, 0 SKIPPED, 1 FAILED
Successfully unloaded bpf_testmod.ko.
[0]: https://lore.kernel.org/loongarch/CAK3+h2wDmpC-hP4u4pJY8T-yfKyk4yRzpu2LMO+C…
[1]: https://lore.kernel.org/loongarch/CAK3+h2wYcpc+OwdLDUBvg2rF9rvvyc5amfHT-KcF…
Cc: stable(a)vger.kernel.org
Fixes: f9b6b41f0cf3 (“LoongArch: BPF: Add basic bpf trampoline support”)
Closes: https://lore.kernel.org/loongarch/CAK3+h2wDmpC-hP4u4pJY8T-yfKyk4yRzpu2LMO+C…
Acked-by: Hengqi Chen <hengqi.chen(a)gmail.com>
Signed-off-by: Vincent Li <vincent.mc.li(a)gmail.com>
---
arch/loongarch/net/bpf_jit.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index cbe53d0b7fb0..49c1d4b95404 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -1624,6 +1624,9 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i
/* Direct jump skips 5 NOP instructions */
else if (is_bpf_text_address((unsigned long)orig_call))
orig_call += LOONGARCH_BPF_FENTRY_NBYTES;
+ /* Module tracing not supported - causes kernel lockups */
+ else if (is_module_text_address((unsigned long)orig_call))
+ return -ENOTSUPP;
if (flags & BPF_TRAMP_F_CALL_ORIG) {
move_addr(ctx, LOONGARCH_GPR_A0, (const u64)im);
--
2.38.1
Hi Greg, Sasha,
On 03/11/2025 02:38, gregkh(a)linuxfoundation.org wrote:
>
> This is a note to let you know that I've just added the patch titled
>
> mptcp: drop bogus optimization in __mptcp_check_push()
>
> to the 5.15-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> mptcp-drop-bogus-optimization-in-__mptcp_check_push.patch
> and it can be found in the queue-5.15 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
Can you please drop this patch from v5.15? It looks like it is causing
some issues with MP_PRIO tests. I think that's because back then, the
path is selected differently, with the use of 'msk->last_snd' which will
bypass some decisions to where to send the next data.
I will try to check if another version of this patch is needed for v5.15.
Cheers,
Matt
(I kept the patch below just in case some people from the MPTCP ML want
to react.)
> From stable+bounces-192087-greg=kroah.com(a)vger.kernel.org Mon Nov 3 05:15:58 2025
> From: Sasha Levin <sashal(a)kernel.org>
> Date: Sun, 2 Nov 2025 15:15:50 -0500
> Subject: mptcp: drop bogus optimization in __mptcp_check_push()
> To: stable(a)vger.kernel.org
> Cc: Paolo Abeni <pabeni(a)redhat.com>, Geliang Tang <geliang(a)kernel.org>, Mat Martineau <martineau(a)kernel.org>, "Matthieu Baerts (NGI0)" <matttbe(a)kernel.org>, Jakub Kicinski <kuba(a)kernel.org>, Sasha Levin <sashal(a)kernel.org>
> Message-ID: <20251102201550.3588174-1-sashal(a)kernel.org>
>
> From: Paolo Abeni <pabeni(a)redhat.com>
>
> [ Upstream commit 27b0e701d3872ba59c5b579a9e8a02ea49ad3d3b ]
>
> Accessing the transmit queue without owning the msk socket lock is
> inherently racy, hence __mptcp_check_push() could actually quit early
> even when there is pending data.
>
> That in turn could cause unexpected tx lock and timeout.
>
> Dropping the early check avoids the race, implicitly relaying on later
> tests under the relevant lock. With such change, all the other
> mptcp_send_head() call sites are now under the msk socket lock and we
> can additionally drop the now unneeded annotation on the transmit head
> pointer accesses.
>
> Fixes: 6e628cd3a8f7 ("mptcp: use mptcp release_cb for delayed tasks")
> Cc: stable(a)vger.kernel.org
> Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
> Reviewed-by: Geliang Tang <geliang(a)kernel.org>
> Tested-by: Geliang Tang <geliang(a)kernel.org>
> Reviewed-by: Mat Martineau <martineau(a)kernel.org>
> Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
> Link: https://patch.msgid.link/20251028-net-mptcp-send-timeout-v1-1-38ffff5a9ec8@…
> Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
> [ split upstream __subflow_push_pending modification across __mptcp_push_pending and __mptcp_subflow_push_pending ]
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
> ---
> net/mptcp/protocol.c | 13 +++++--------
> net/mptcp/protocol.h | 2 +-
> 2 files changed, 6 insertions(+), 9 deletions(-)
>
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -1137,7 +1137,7 @@ static void __mptcp_clean_una(struct soc
> if (WARN_ON_ONCE(!msk->recovery))
> break;
>
> - WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
> + msk->first_pending = mptcp_send_next(sk);
> }
>
> dfrag_clear(sk, dfrag);
> @@ -1674,7 +1674,7 @@ void __mptcp_push_pending(struct sock *s
>
> mptcp_update_post_push(msk, dfrag, ret);
> }
> - WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
> + msk->first_pending = mptcp_send_next(sk);
> }
>
> /* at this point we held the socket lock for the last subflow we used */
> @@ -1732,7 +1732,7 @@ static void __mptcp_subflow_push_pending
>
> mptcp_update_post_push(msk, dfrag, ret);
> }
> - WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
> + msk->first_pending = mptcp_send_next(sk);
> }
>
> out:
> @@ -1850,7 +1850,7 @@ static int mptcp_sendmsg(struct sock *sk
> get_page(dfrag->page);
> list_add_tail(&dfrag->list, &msk->rtx_queue);
> if (!msk->first_pending)
> - WRITE_ONCE(msk->first_pending, dfrag);
> + msk->first_pending = dfrag;
> }
> pr_debug("msk=%p dfrag at seq=%llu len=%u sent=%u new=%d\n", msk,
> dfrag->data_seq, dfrag->data_len, dfrag->already_sent,
> @@ -2645,7 +2645,7 @@ static void __mptcp_clear_xmit(struct so
> struct mptcp_sock *msk = mptcp_sk(sk);
> struct mptcp_data_frag *dtmp, *dfrag;
>
> - WRITE_ONCE(msk->first_pending, NULL);
> + msk->first_pending = NULL;
> list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list)
> dfrag_clear(sk, dfrag);
> }
> @@ -3114,9 +3114,6 @@ void __mptcp_data_acked(struct sock *sk)
>
> void __mptcp_check_push(struct sock *sk, struct sock *ssk)
> {
> - if (!mptcp_send_head(sk))
> - return;
> -
> if (!sock_owned_by_user(sk)) {
> struct sock *xmit_ssk = mptcp_subflow_get_send(mptcp_sk(sk));
>
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -325,7 +325,7 @@ static inline struct mptcp_data_frag *mp
> {
> const struct mptcp_sock *msk = mptcp_sk(sk);
>
> - return READ_ONCE(msk->first_pending);
> + return msk->first_pending;
> }
>
> static inline struct mptcp_data_frag *mptcp_send_next(struct sock *sk)
Cheers,
Matt
--
Sponsored by the NGI0 Core fund.
The receive error handling code is shared between RSCI and all other
SCIF port types, but the RSCI overrun_reg is specified as a memory
offset, while for other SCIF types it is an enum value used to index
into the sci_port_params->regs array, as mentioned above the
sci_serial_in() function.
For RSCI, the overrun_reg is CSR (0x48), causing the sci_getreg() call
inside the sci_handle_fifo_overrun() function to index outside the
bounds of the regs array, which currently has a size of 20, as specified
by SCI_NR_REGS.
Because of this, we end up accessing memory outside of RSCI's
rsci_port_params structure, which, when interpreted as a plat_sci_reg,
happens to have a non-zero size, causing the following WARN when
sci_serial_in() is called, as the accidental size does not match the
supported register sizes.
The existence of the overrun_reg needs to be checked because
SCIx_SH3_SCIF_REGTYPE has overrun_reg set to SCLSR, but SCLSR is not
present in the regs array.
Avoid calling sci_getreg() for port types which don't use standard
register handling.
Use the ops->read_reg() and ops->write_reg() functions to properly read
and write registers for RSCI, and change the type of the status variable
to accommodate the 32-bit CSR register.
sci_getreg() and sci_serial_in() are also called with overrun_reg in the
sci_mpxed_interrupt() interrupt handler, but that code path is not used
for RSCI, as it does not have a muxed interrupt.
------------[ cut here ]------------
Invalid register access
WARNING: CPU: 0 PID: 0 at drivers/tty/serial/sh-sci.c:522 sci_serial_in+0x38/0xac
Modules linked in: renesas_usbhs at24 rzt2h_adc industrialio_adc sha256 cfg80211 bluetooth ecdh_generic ecc rfkill fuse drm backlight ipv6
CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.17.0-rc1+ #30 PREEMPT
Hardware name: Renesas RZ/T2H EVK Board based on r9a09g077m44 (DT)
pstate: 604000c5 (nZCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : sci_serial_in+0x38/0xac
lr : sci_serial_in+0x38/0xac
sp : ffff800080003e80
x29: ffff800080003e80 x28: ffff800082195b80 x27: 000000000000000d
x26: ffff8000821956d0 x25: 0000000000000000 x24: ffff800082195b80
x23: ffff000180e0d800 x22: 0000000000000010 x21: 0000000000000000
x20: 0000000000000010 x19: ffff000180e72000 x18: 000000000000000a
x17: ffff8002bcee7000 x16: ffff800080000000 x15: 0720072007200720
x14: 0720072007200720 x13: 0720072007200720 x12: 0720072007200720
x11: 0000000000000058 x10: 0000000000000018 x9 : ffff8000821a6a48
x8 : 0000000000057fa8 x7 : 0000000000000406 x6 : ffff8000821fea48
x5 : ffff00033ef88408 x4 : ffff8002bcee7000 x3 : ffff800082195b80
x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff800082195b80
Call trace:
sci_serial_in+0x38/0xac (P)
sci_handle_fifo_overrun.isra.0+0x70/0x134
sci_er_interrupt+0x50/0x39c
__handle_irq_event_percpu+0x48/0x140
handle_irq_event+0x44/0xb0
handle_fasteoi_irq+0xf4/0x1a0
handle_irq_desc+0x34/0x58
generic_handle_domain_irq+0x1c/0x28
gic_handle_irq+0x4c/0x140
call_on_irq_stack+0x30/0x48
do_interrupt_handler+0x80/0x84
el1_interrupt+0x34/0x68
el1h_64_irq_handler+0x18/0x24
el1h_64_irq+0x6c/0x70
default_idle_call+0x28/0x58 (P)
do_idle+0x1f8/0x250
cpu_startup_entry+0x34/0x3c
rest_init+0xd8/0xe0
console_on_rootfs+0x0/0x6c
__primary_switched+0x88/0x90
---[ end trace 0000000000000000 ]---
Cc: stable(a)vger.kernel.org
Fixes: 0666e3fe95ab ("serial: sh-sci: Add support for RZ/T2H SCI")
Signed-off-by: Cosmin Tanislav <cosmin-gabriel.tanislav.xa(a)renesas.com>
---
drivers/tty/serial/sh-sci.c | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index 538b2f991609..62bb62b82cbe 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -1014,16 +1014,18 @@ static int sci_handle_fifo_overrun(struct uart_port *port)
struct sci_port *s = to_sci_port(port);
const struct plat_sci_reg *reg;
int copied = 0;
- u16 status;
+ u32 status;
- reg = sci_getreg(port, s->params->overrun_reg);
- if (!reg->size)
- return 0;
+ if (s->type != SCI_PORT_RSCI) {
+ reg = sci_getreg(port, s->params->overrun_reg);
+ if (!reg->size)
+ return 0;
+ }
- status = sci_serial_in(port, s->params->overrun_reg);
+ status = s->ops->read_reg(port, s->params->overrun_reg);
if (status & s->params->overrun_mask) {
status &= ~s->params->overrun_mask;
- sci_serial_out(port, s->params->overrun_reg, status);
+ s->ops->write_reg(port, s->params->overrun_reg, status);
port->icount.overrun++;
--
2.51.0
From: HariKrishna Sagala <hariconscious(a)gmail.com>
snprintf() returns the would-be-filled size when the string overflows
the given buffer size, hence using this value may result in a buffer
overflow (although it's unrealistic).
This patch replaces it with a safer version, scnprintf() for papering
over such a potential issue.
Link: https://github.com/KSPP/linux/issues/105
'Fixes: 5a565ba23abe ("ASoC: Intel: avs: Probing and firmware tracing
over debugfs")'
Signed-off-by: HariKrishna Sagala <hariconscious(a)gmail.com>
---
Thank you for the feedback and the suggestions.
Corrected the indentation & commit message.
V1:
https://lore.kernel.org/all/20251112120235.54328-2-hariconscious@gmail.com/
sound/soc/intel/avs/debugfs.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/sound/soc/intel/avs/debugfs.c b/sound/soc/intel/avs/debugfs.c
index 3534de46f9e4..cdb82392b9ee 100644
--- a/sound/soc/intel/avs/debugfs.c
+++ b/sound/soc/intel/avs/debugfs.c
@@ -119,9 +119,9 @@ static ssize_t probe_points_read(struct file *file, char __user *to, size_t coun
}
for (i = 0; i < num_desc; i++) {
- ret = snprintf(buf + len, PAGE_SIZE - len,
- "Id: %#010x Purpose: %d Node id: %#x\n",
- desc[i].id.value, desc[i].purpose, desc[i].node_id.val);
+ ret = scnprintf(buf + len, PAGE_SIZE - len,
+ "Id: %#010x Purpose: %d Node id: %#x\n",
+ desc[i].id.value, desc[i].purpose, desc[i].node_id.val);
if (ret < 0)
goto free_desc;
len += ret;
base-commit: 24172e0d79900908cf5ebf366600616d29c9b417
--
2.43.0
Re:Good day,
Hope you are well, my first email returned undelivered, please
can I provide you with more information through this email?.
Best regards,
Harry Schofield
From: Christian Hitz <christian.hitz(a)bbv.ch>
If a GPIO is used to control the chip's enable pin, it needs to be pulled
high before any i2c communication is attempted.
Currently, the enable GPIO handling is not correct.
Assume the enable GPIO is low when the probe function is entered. In this
case the device is in SHUTDOWN mode and does not react to i2c commands.
During probe the following sequence happens:
1. The call to lp50xx_reset() on line 548 has no effect as i2c is not
possible yet.
2. Then - on line 552 - lp50xx_enable_disable() is called. As
"priv->enable_gpio“ has not yet been initialized, setting the GPIO has
no effect. Also the i2c enable command is not executed as the device
is still in SHUTDOWN.
3. On line 556 the call to lp50xx_probe_dt() finally parses the rest of
the DT and the configured priv->enable_gpio is set up.
As a result the device is still in SHUTDOWN mode and not ready for
operation.
Split lp50xx_enable_disable() into distinct enable and disable functions
to enforce correct ordering between enable_gpio manipulations and i2c
commands.
Read enable_gpio configuration from DT before attempting to manipulate
enable_gpio.
Add delays to observe correct wait timing after manipulating enable_gpio
and before any i2c communication.
Fixes: 242b81170fb8 ("leds: lp50xx: Add the LP50XX family of the RGB LED driver")
Cc: stable(a)vger.kernel.org
Signed-off-by: Christian Hitz <christian.hitz(a)bbv.ch>
---
Changes in v2:
- Unconditionally reset in lp50xx_enable
- Define magic numbers
- Improve log message
---
drivers/leds/leds-lp50xx.c | 55 +++++++++++++++++++++++++++-----------
1 file changed, 40 insertions(+), 15 deletions(-)
diff --git a/drivers/leds/leds-lp50xx.c b/drivers/leds/leds-lp50xx.c
index 94f8ef6b482c..d3485d814cf4 100644
--- a/drivers/leds/leds-lp50xx.c
+++ b/drivers/leds/leds-lp50xx.c
@@ -50,6 +50,12 @@
#define LP50XX_SW_RESET 0xff
#define LP50XX_CHIP_EN BIT(6)
+#define LP50XX_CHIP_DISABLE 0x00
+#define LP50XX_START_TIME_US 500
+#define LP50XX_RESET_TIME_US 3
+
+#define LP50XX_EN_GPIO_LOW 0
+#define LP50XX_EN_GPIO_HIGH 1
/* There are 3 LED outputs per bank */
#define LP50XX_LEDS_PER_MODULE 3
@@ -371,19 +377,42 @@ static int lp50xx_reset(struct lp50xx *priv)
return regmap_write(priv->regmap, priv->chip_info->reset_reg, LP50XX_SW_RESET);
}
-static int lp50xx_enable_disable(struct lp50xx *priv, int enable_disable)
+static int lp50xx_enable(struct lp50xx *priv)
{
int ret;
- ret = gpiod_direction_output(priv->enable_gpio, enable_disable);
+ if (priv->enable_gpio) {
+ ret = gpiod_direction_output(priv->enable_gpio, LP50XX_EN_GPIO_HIGH);
+ if (ret)
+ return ret;
+
+ udelay(LP50XX_START_TIME_US);
+ }
+
+ ret = lp50xx_reset(priv);
if (ret)
return ret;
- if (enable_disable)
- return regmap_write(priv->regmap, LP50XX_DEV_CFG0, LP50XX_CHIP_EN);
- else
- return regmap_write(priv->regmap, LP50XX_DEV_CFG0, 0);
+ return regmap_write(priv->regmap, LP50XX_DEV_CFG0, LP50XX_CHIP_EN);
+}
+static int lp50xx_disable(struct lp50xx *priv)
+{
+ int ret;
+
+ ret = regmap_write(priv->regmap, LP50XX_DEV_CFG0, LP50XX_CHIP_DISABLE);
+ if (ret)
+ return ret;
+
+ if (priv->enable_gpio) {
+ ret = gpiod_direction_output(priv->enable_gpio, LP50XX_EN_GPIO_LOW);
+ if (ret)
+ return ret;
+
+ udelay(LP50XX_RESET_TIME_US);
+ }
+
+ return 0;
}
static int lp50xx_probe_leds(struct fwnode_handle *child, struct lp50xx *priv,
@@ -447,6 +476,10 @@ static int lp50xx_probe_dt(struct lp50xx *priv)
return dev_err_probe(priv->dev, PTR_ERR(priv->enable_gpio),
"Failed to get enable GPIO\n");
+ ret = lp50xx_enable(priv);
+ if (ret)
+ return ret;
+
priv->regulator = devm_regulator_get(priv->dev, "vled");
if (IS_ERR(priv->regulator))
priv->regulator = NULL;
@@ -547,14 +580,6 @@ static int lp50xx_probe(struct i2c_client *client)
return ret;
}
- ret = lp50xx_reset(led);
- if (ret)
- return ret;
-
- ret = lp50xx_enable_disable(led, 1);
- if (ret)
- return ret;
-
return lp50xx_probe_dt(led);
}
@@ -563,7 +588,7 @@ static void lp50xx_remove(struct i2c_client *client)
struct lp50xx *led = i2c_get_clientdata(client);
int ret;
- ret = lp50xx_enable_disable(led, 0);
+ ret = lp50xx_disable(led);
if (ret)
dev_err(led->dev, "Failed to disable chip\n");
--
2.51.1
Hi,
There have been multiple reports [0][1] (+ 2 offlist) of suspend
failing when NBCON console drivers are in use. With the help of
NXP and NVIDIA we were able to isolate the problem and verify
the fix.
The first NBCON drivers appeared in 6.13, so currently there is
no LTS kernel that requires this series. But it should go into
6.17.x and 6.18.
John Ogness
[0] https://lore.kernel.org/lkml/80b020fc-c18a-4da4-b222-16da1cab2f4c@nvidia.com
[1] https://lore.kernel.org/lkml/DB9PR04MB8429E7DDF2D93C2695DE401D92C4A@DB9PR04…
John Ogness (1):
printk: Avoid scheduling irq_work on suspend
kernel/printk/internal.h | 8 ++++---
kernel/printk/printk.c | 51 ++++++++++++++++++++++++++++------------
2 files changed, 41 insertions(+), 18 deletions(-)
base-commit: e9a6fb0bcdd7609be6969112f3fbfcce3b1d4a7c
--
2.47.3
This changes ResourceSize to use the resource_size_t typedef (currently
ResourceSize is defined as phys_addr_t), and moves ResourceSize to
kernel::io and defines PhysAddr next to it. Any usage of ResourceSize or
bindings::phys_addr_t that references a physical address is updated to
use the new PhysAddr typedef.
I included some cc stable annotations because I think it is useful to
backport this to v6.18. This is to make backporting drivers to the 6.18
LTS easier as we will not have to worry about changing imports when
backporting.
Signed-off-by: Alice Ryhl <aliceryhl(a)google.com>
---
Changes in v2:
- Fix build error in last patch.
- Add cc stable.
- Link to v1: https://lore.kernel.org/r/20251106-resource-phys-typedefs-v1-0-0c0edc7301ce…
---
Alice Ryhl (4):
rust: io: define ResourceSize as resource_size_t
rust: io: move ResourceSize to top-level io module
rust: scatterlist: import ResourceSize from kernel::io
rust: io: add typedef for phys_addr_t
rust/kernel/devres.rs | 18 +++++++++++++++---
rust/kernel/io.rs | 26 +++++++++++++++++++++++---
rust/kernel/io/resource.rs | 13 ++++++-------
rust/kernel/scatterlist.rs | 2 +-
4 files changed, 45 insertions(+), 14 deletions(-)
---
base-commit: 211ddde0823f1442e4ad052a2f30f050145ccada
change-id: 20251106-resource-phys-typedefs-6db37927d159
Best regards,
--
Alice Ryhl <aliceryhl(a)google.com>
inode_hash() currently mixes a name-derived hash with the super_block
pointer using an unbounded multiplication:
tmp = (hashval * (unsigned long)sb) ^
(GOLDEN_RATIO_PRIME + hashval) / L1_CACHE_BYTES;
On 64-bit kernels this multiplication can overflow for many inputs.
With attacker-chosen filenames (authenticated client), overflowed
products collapse into a small set of buckets, saturating a few chains
and degrading lookups from O(1) to O(n). This produces second-scale
latency spikes and high CPU usage in ksmbd workers (algorithmic DoS).
Replace the pointer*hash multiply with hash_long() over a mixed value
(hashval ^ (unsigned long)sb) and keep the existing shift/mask. This
removes the overflow source and improves bucket distribution under
adversarial inputs without changing external behavior.
This is an algorithmic-complexity issue (CWE-190/CWE-407), not a
memory-safety bug.
Reported-by: Qianchang Zhao <pioooooooooip(a)gmail.com>
Reported-by: Zhitong Liu <liuzhitong1993(a)gmail.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Qianchang Zhao <pioooooooooip(a)gmail.com>
---
fs/smb/server/vfs_cache.c | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c
index dfed6fce8..ac18edf56 100644
--- a/fs/smb/server/vfs_cache.c
+++ b/fs/smb/server/vfs_cache.c
@@ -10,6 +10,7 @@
#include <linux/vmalloc.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
+#include <linux/hash.h>
#include "glob.h"
#include "vfs_cache.h"
@@ -65,12 +66,8 @@ static void fd_limit_close(void)
static unsigned long inode_hash(struct super_block *sb, unsigned long hashval)
{
- unsigned long tmp;
-
- tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
- L1_CACHE_BYTES;
- tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> inode_hash_shift);
- return tmp & inode_hash_mask;
+ return hash_long(hashval ^ (unsigned long)sb, inode_hash_shift) &
+ inode_hash_mask;
}
static struct ksmbd_inode *__ksmbd_inode_lookup(struct dentry *de)
--
2.34.1
Mejora el clima organizacional
body {
margin: 0;
padding: 0;
font-family: Arial, Helvetica, sans-serif;
font-size: 14px;
color: #333;
background-color: #ffffff;
}
table {
border-spacing: 0;
width: 100%;
max-width: 600px;
margin: auto;
}
td {
padding: 12px 20px;
}
a {
color: #1a73e8;
text-decoration: none;
}
.footer {
font-size: 12px;
color: #888888;
text-align: center;
}
Invierte en capacitación que realmente desarrolla a tu equipo con Vorecol Learning.
Hola, ,
Invertir en capacitación no siempre significa que tu equipo esté creciendo. Sin un buen seguimiento, los cursos pueden no tener el efecto esperado.
Vorecol Learning es una plataforma que te ayuda a gestionar el aprendizaje de tu equipo de forma sencilla y efectiva.
Con Vorecol Learning puedes:
Capacitar a tu personal, desde uno hasta cinco mil colaboradores, con cursos hechos a la medida de las necesidades de tu empresa.
Ver cómo avanza cada persona y entregar certificados al terminar los cursos, asegurando que aprendieron de verdad.
Permitir que tus colaboradores estudien cuando quieran y desde cualquier dispositivo, con evaluaciones que confirman lo aprendido.
Esta plataforma está diseñada para que la capacitación sea una herramienta real para el desarrollo de tu equipo, no solo una lista de tareas por cumplir.
Aprovecha el Buen Fin del 1 al 22 de noviembre con hasta 15% de descuento y potencia el aprendizaje en tu empresa con Vorecol Learning.
Responde a este correo o contáctame; mis datos están abajo.
Saludos,
--------------
Atte.: Mariann Rivas
Ciudad de México: (55) 5018 0565
WhatsApp: +52 33 1607 2089
Si no deseas recibir más correos, haz clic aquí para darte de baja.
Para remover su dirección de esta lista haga <a href="https://s1.arrobamail.com/unsuscribe.php?id=yiwtsrewiswqwtseup">click aquí</a>
When the second-stage kernel is booted via kexec with a limiting command
line such as "mem=<size>", the physical range that contains the carried
over IMA measurement list may fall outside the truncated RAM leading to
a kernel panic.
BUG: unable to handle page fault for address: ffff97793ff47000
RIP: ima_restore_measurement_list+0xdc/0x45a
#PF: error_code(0x0000) – not-present page
Other architectures already validate the range with page_is_ram(), as
done in commit: cbf9c4b9617b ("of: check previous kernel's
ima-kexec-buffer against memory bounds") do a similar check on x86.
Cc: stable(a)vger.kernel.org
Fixes: b69a2afd5afc ("x86/kexec: Carry forward IMA measurement log on kexec")
Reported-by: Paul Webb <paul.x.webb(a)oracle.com>
Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli(a)oracle.com>
---
Have tested the kexec for x86 kernel with IMA_KEXEC enabled and the
above patch works good. Paul initially reported this on 6.12 kernel but
I was able to reproduce this on 6.18, so I tried replicating how this
was fixed in drivers/of/kexec.c
---
arch/x86/kernel/setup.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 1b2edd07a3e1..fcef197d180e 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -439,9 +439,23 @@ int __init ima_free_kexec_buffer(void)
int __init ima_get_kexec_buffer(void **addr, size_t *size)
{
+ unsigned long start_pfn, end_pfn;
+
if (!ima_kexec_buffer_size)
return -ENOENT;
+ /*
+ * Calculate the PFNs for the buffer and ensure
+ * they are with in addressable memory.
+ */
+ start_pfn = PFN_DOWN(ima_kexec_buffer_phys);
+ end_pfn = PFN_DOWN(ima_kexec_buffer_phys + ima_kexec_buffer_size - 1);
+ if (!pfn_range_is_mapped(start_pfn, end_pfn)) {
+ pr_warn("IMA buffer at 0x%llx, size = 0x%zx beyond memory\n",
+ ima_kexec_buffer_phys, ima_kexec_buffer_size);
+ return -EINVAL;
+ }
+
*addr = __va(ima_kexec_buffer_phys);
*size = ima_kexec_buffer_size;
--
2.50.1
The patch titled
Subject: mm/memfd: fix information leak in hugetlb folios
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-memfd-fix-information-leak-in-hugetlb-folios.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Deepanshu Kartikey <kartikey406(a)gmail.com>
Subject: mm/memfd: fix information leak in hugetlb folios
Date: Wed, 12 Nov 2025 20:20:34 +0530
When allocating hugetlb folios for memfd, three initialization steps are
missing:
1. Folios are not zeroed, leading to kernel memory disclosure to userspace
2. Folios are not marked uptodate before adding to page cache
3. hugetlb_fault_mutex is not taken before hugetlb_add_to_page_cache()
The memfd allocation path bypasses the normal page fault handler
(hugetlb_no_page) which would handle all of these initialization steps.
This is problematic especially for udmabuf use cases where folios are
pinned and directly accessed by userspace via DMA.
Fix by matching the initialization pattern used in hugetlb_no_page():
- Zero the folio using folio_zero_user() which is optimized for huge pages
- Mark it uptodate with folio_mark_uptodate()
- Take hugetlb_fault_mutex before adding to page cache to prevent races
The folio_zero_user() change also fixes a potential security issue where
uninitialized kernel memory could be disclosed to userspace through read()
or mmap() operations on the memfd.
Link: https://lkml.kernel.org/r/20251112145034.2320452-1-kartikey406@gmail.com
Fixes: 89c1905d9c14 ("mm/gup: introduce memfd_pin_folios() for pinning memfd folios")
Signed-off-by: Deepanshu Kartikey <kartikey406(a)gmail.com>
Reported-by: syzbot+f64019ba229e3a5c411b(a)syzkaller.appspotmail.com
Link: https://lore.kernel.org/all/20251112031631.2315651-1-kartikey406@gmail.com/ [v1]
Closes: https://syzkaller.appspot.com/bug?extid=f64019ba229e3a5c411b
Suggested-by: Oscar Salvador <osalvador(a)suse.de>
Suggested-by: David Hildenbrand <david(a)redhat.com>
Tested-by: syzbot+f64019ba229e3a5c411b(a)syzkaller.appspotmail.com
Cc: Vivek Kasireddy <vivek.kasireddy(a)intel.com>
Cc: Jason Gunthorpe <jgg(a)nvidia.com>
Cc: Jason Gunthorpe <jgg(a)nvidia.com> (v2)
Cc: Christoph Hellwig <hch(a)lst.de> (v6)
Cc: Dave Airlie <airlied(a)redhat.com>
Cc: Gerd Hoffmann <kraxel(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memfd.c | 27 +++++++++++++++++++++++++++
1 file changed, 27 insertions(+)
--- a/mm/memfd.c~mm-memfd-fix-information-leak-in-hugetlb-folios
+++ a/mm/memfd.c
@@ -96,9 +96,36 @@ struct folio *memfd_alloc_folio(struct f
NULL,
gfp_mask);
if (folio) {
+ u32 hash;
+
+ /*
+ * Zero the folio to prevent information leaks to userspace.
+ * Use folio_zero_user() which is optimized for huge/gigantic
+ * pages. Pass 0 as addr_hint since this is not a faulting path
+ * and we don't have a user virtual address yet.
+ */
+ folio_zero_user(folio, 0);
+
+ /*
+ * Mark the folio uptodate before adding to page cache,
+ * as required by filemap.c and other hugetlb paths.
+ */
+ __folio_mark_uptodate(folio);
+
+ /*
+ * Serialize hugepage allocation and instantiation to prevent
+ * races with concurrent allocations, as required by all other
+ * callers of hugetlb_add_to_page_cache().
+ */
+ hash = hugetlb_fault_mutex_hash(memfd->f_mapping, idx);
+ mutex_lock(&hugetlb_fault_mutex_table[hash]);
+
err = hugetlb_add_to_page_cache(folio,
memfd->f_mapping,
idx);
+
+ mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+
if (err) {
folio_put(folio);
goto err_unresv;
_
Patches currently in -mm which might be from kartikey406(a)gmail.com are
mm-memfd-fix-information-leak-in-hugetlb-folios.patch
ocfs2-validate-cl_bpc-in-allocator-inodes-to-prevent-divide-by-zero.patch
In etm_setup_aux(), when a user sink is obtained via
coresight_get_sink_by_id(), it increments the reference count of the
sink device. However, if the sink is used in path building, the path
holds a reference, but the initial reference from
coresight_get_sink_by_id() is not released, causing a reference count
leak. We should release the initial reference after the path is built.
Found by code review.
Cc: stable(a)vger.kernel.org
Fixes: 0e6c20517596 ("coresight: etm-perf: Allow an event to use different sinks")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
Changes in v2:
- modified the patch as suggestions.
---
drivers/hwtracing/coresight/coresight-etm-perf.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
index f677c08233ba..8a5a59434cc9 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -454,6 +454,11 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,
goto err;
out:
+ if (user_sink) {
+ put_device(&user_sink->dev);
+ user_sink = NULL;
+ }
+
return event_data;
err:
--
2.17.1
###**🌟 فرصتك للنشر في مجلات علمية محكمة دوليًا – دعوة مفتوحة للباحثين والأكاديميين**
**انضم إلى النخبة العلمية… وانشر أبحاثك معنا.**
**انطلاقًا من إيماننا بأن البحث العلمي هو الركيزة الأساسية لنهضة المجتمعات وتقدمها،**
يسر*فكر للدراسات والتطوير* أن تتشرف بدعوتكم للنشر في مجلاتها العلمية المحكمة، التي تمثل منبرًا أكاديميًا رصينًا لاحتضان الأفكار الأصيلة، والمشاريع البحثية الجادة، والرؤى التي تسهم في إنتاج معرفة تطبيقية تخدم قضايا الإنسان والمجتمع.
إن دعوتنا هذه تأتي ضمن رؤيتنا في تمكين الباحثين والكتّاب في العالم العربي والإسلامي من إيصال أبحاثهم إلى أوسع نطاق، والمساهمة الفاعلة في الحراك العلمي محليًا ودوليًا، عبر منصات نشر معتمدة وموثوقة.
➡️ **قدّم مخطوطتك الآن:** [https://7m8ue.r.ag.d.sendibm3.com/mk/cl/f/sh/WCPzyXJTZ7nvI8YYc4qB1knr1PmTmi…
####عامل تأثير عربي=2.7
مجلة ريحان للنشر العلمي
-----------------------
(Rihan Journal for Scientific Publishing)
###$50
*مجلة علمية دولية، محكّمة، شهرية، مفتوحة الوصول، تصدر عن مركز فكر للدراسات والتطوير.*
رقم التسلسل المعياري الدولي: **ISSN-E: 2709-2097**
تستقبل المجلة الأبحاث والمقالات العلمية بثلاث لغات: **العربية، الإنجليزية، والتركية**، في مختلف التخصصات، وتخضع جميع المواد المقدمة لعملية تحكيم علمي صارمة، تضمن جودة المحتوى وموثوقيته الأكاديمية.
####عامل تأثير عربي=1.7
مجلة ايبرس للنشر الطبي
----------------------
###$50
*مجلة علمية دولية، محكّمة، فصلية، مفتوحة الوصول، تصدر عن مركز فكر للدراسات والتطوير.*
رقم التسلسل المعياري الدولي: **ISSN-E: 2959-5371**
تأسست مجلة إيبرس للنشر الطبي عام 2022، لتكون منصة علمية رصينة في مجال العلوم الطبية والصحية، ووجهة موثوقة للباحثين والطلبة وأعضاء الهيئات التدريسية والأكاديميين لنشر أبحاثهم الأصيلة، ومراجعاتهم العلمية، ومشاركتهم في تطوير المعرفة الطبية الحديثة.
تستقبل المجلة الدراسات والأبحاث باللغة **العربية، الإنجليزية، والتركية**، وتخضع جميع المواد لعملية تحكيم علمي دقيقة، وفق أعلى المعايير الأكاديمية الدولية.
مجلة طُوى للعلوم الاجتماعية
---------------------------
###$50
*مجلة علمية دولية، محكّمة، فصلية، مفتوحة الوصول، تصدر عن مركز فكر للدراسات والتطوير.*
رقم التسلسل المعياري الدولي:
**ISSN: 3104-7211**
**مجلة طُوى** هي منصة أكاديمية تُعنى بنشر البحوث والدراسات الأصيلة في **مجالات العلوم الاجتماعية المتداخلة**، بما يسهم في إنتاج معرفة تحليلية معمقة حول قضايا الإنسان والمجتمع. تلتزم المجلة بمعايير **التحكيم العلمي الرصين**، وتُرحّب بالأعمال المقدّمة باللغات: **العربية، الإنجليزية، والتركية**.
جاء اختيار اسم "طُوى" استلهامًا من **الوادي المقدّس طُوى**، تعبيرًا عن قدسية المعرفة، وإيمانًا بأن الفكر العلمي رسالة إنسانية لا تقل أثرًا وعمقًا عن أي رسالة تغيير أو بناء مجتمعي.
مجلة زنوبيا لدراسات المرأة والطفل والاسرة
-----------------------------------------
###$50
*مجلة علمية دولية، محكّمة، فصلية، مفتوحة الوصول، تصدر عن مركز فكر للدراسات والتطوير*
بالتعاون مع **المنتدى الثقافي النسائي السوري**
رقم التسلسل المعياري الدولي:
**ISSN: 3104-7874**
تُعنى **مجلة زنوبيا** بنشر البحوث والدراسات الأكاديمية التي تتناول قضايا **المرأة، والطفل، والأسرة** من زوايا علمية، اجتماعية، وثقافية متعددة، مع التركيز على التحديات والتحولات المعاصرة التي تمس هذه الفئات داخل السياقات العربية والعالمية. وتوفّر المجلة منبرًا أكاديميًا موثوقًا يعزز التفكير النقدي والتحليل العلمي البنّاء، ويسعى لإبراز التجارب والرؤى التي تدعم التمكين المجتمعي.
مجلة زكا للعلوم المالية والاقتصادية والإدارية
---------------------------------------------
###$50
*مجلة علمية دولية، محكّمة، فصلية، مفتوحة الوصول، تصدر عن مركز فكر للدراسات والتطوير.*
رقم التسلسل المعياري الدولي
**ISSN: 3104-7289**
تُعنى **مجلة زكا** بنشر أبحاث علمية أصيلة وعالية الجودة في مجالات: **إدارة الأعمال، الاقتصاد، المحاسبة، العلوم المالية، التسويق، الاقتصاد الإسلامي، الإدارة العامة، والتمويل**.
وتمثل المجلة منصة أكاديمية رصينة تستهدف الباحثين، الأكاديميين، وطلبة الدراسات العليا في التخصصات المالية والإدارية، وتسعى إلى الإسهام الفعّال في تطوير المعرفة الاقتصادية والإدارية، وفقًا لأحدث المعايير العلمية والمنهجيات البحثية الحديثة.
مجلة روح للعلوم الإنسانية
-------------------------
###$50
**(RUH Journal of Humanities)**
*مجلة علمية دولية، محكّمة، فصلية، مفتوحة الوصول، تصدر عن مركز فكر للدراسات والتطوير.*
رقم التسلسل المعياري الدولي:
**ISSN: 3105-2436**
تهدف **مجلة روح للعلوم الإنسانية** إلى نشر الأبحاث العلمية المتميزة في مختلف مجالات العلوم الإنسانية، وتعزيز الفكر الأكاديمي والنقاش العلمي المتخصص في القضايا الإنسانية المعاصرة على المستويين العربي والعالمي.
تُعد المجلة منصة أكاديمية رصينة تجمع الباحثين من تخصصات متعددة لتبادل المعرفة، وتحفيز الدراسات الرصينة ذات الأثر المجتمعي والثقافي، وتوسيع دائرة الحوار العلمي حول الإنسان والمجتمع.
جاء اختيار اسم **"روح"** ليعكس جوهر العلوم الإنسانية، التي تنطلق من فهم الإنسان: مشاعره، ثقافته، سلوكه، وتاريخه.
تشير الكلمة إلى **العنصر الحيوي** الذي يمنح الإنسان والمجتمع معناهما، مما يرسّخ رؤية المجلة في أن **العلوم الإنسانية هي الروح النابضة لفهم المجتمعات وتطورها**.
دعوة للتبرع
------------
###$10
###🎓 ساهم في نهضة العلم في سوريا – كن جزءًا من التغيير الحقيقي!
إذا كنت تؤمن بقوة المعرفة وأهمية دعم البحث العلمي في سوريا، فقد حان الوقت لتشارك في صناعة الأمل.
**تبرعك اليوم لا يُقدَّر بثمن…**
إنه استثمار في العقول، وفي جيل جديد من الباحثين والمفكرين الذين يسعون لإعادة بناء المجتمع على أسس علمية وإنسانية.
📢 **ساهم في صناعة الأمل… تبرعك اليوم يصنع فرقاً حقيقياً غداً!**
المؤتمرات العلمية
-----------------
###$100
###🏛️ **مركز فكر للمؤتمرات العلمية**
**مركز فكر للمؤتمرات العلمية** هو أحد برامج مركز فكر للدراسات والتطوير، يُعنى بتنظيم المؤتمرات والملتقيات العلمية المتخصصة، بهدف دعم البحث الأكاديمي وتوسيع دائرة الحوار المعرفي حول القضايا المعاصرة التي تهم المجتمعات العربية والعالمية.
ينطلق المركز من رؤية تؤمن بأن **المؤتمر العلمي ليس مجرد حدث أكاديمي، بل هو منصة استراتيجية لإنتاج المعرفة، وتبادل الخبرات، وصياغة حلول مستندة إلى البحث العلمي**.
###📚 **مجالات المؤتمرات:**
العلوم الإنسانية والاجتماعية
العلوم الطبية والصحية
الدراسات الاقتصادية والمالية
دراسات المرأة والطفل والأسرة
البيئة والتنمية المستدامة
التكنولوجيا والتحول الرقمي
التعليم والتربية
**مع خالص الشكر والتقدير،**
============================
**نثمّن وقتكم واهتمامكم، ونتطلع إلى تعاون مثمر يجمعنا في خدمة البحث العلمي والمجتمع.**
======================================================================================
**نؤمن أن العمل المشترك هو مفتاح التغيير الحقيقي، ونرحّب بكم دائمًا ضمن شبكتنا العلمية والمجتمعية.**
====================================================================================================
➡️ **قدّم مخطوطتك الآن:** [https://forms.gle/g7McfPrkaYYDFC2N6](https://forms.gle/g7McfPrkaYYDFC2N6)
Maria Abdel Rahim
[pr@rjsp.org](mailto:ahmet@rjsp.org)
rihanjournal(a)gmail.com
[00905306359001](https://)
gazimuhtarpas
29600,gazantap
[Unsubscribe](https://7m8ue.r.ag.d.sendibm3.com/mk/un/v2/sh/7nVTPdbLJ2bPbEmD…
The sit driver's packet transmission path calls: sit_tunnel_xmit() ->
update_or_create_fnhe(), which lead to fnhe_remove_oldest() being called
to delete entries exceeding FNHE_RECLAIM_DEPTH+random.
The race window is between fnhe_remove_oldest() selecting fnheX for
deletion and the subsequent kfree_rcu(). During this time, the
concurrent path's __mkroute_output() -> find_exception() can fetch the
soon-to-be-deleted fnheX, and rt_bind_exception() then binds it with a
new dst using a dst_hold(). When the original fnheX is freed via RCU,
the dst reference remains permanently leaked.
CPU 0 CPU 1
__mkroute_output()
find_exception() [fnheX]
update_or_create_fnhe()
fnhe_remove_oldest() [fnheX]
rt_bind_exception() [bind dst]
RCU callback [fnheX freed, dst leak]
This issue manifests as a device reference count leak and a warning in
dmesg when unregistering the net device:
unregister_netdevice: waiting for sitX to become free. Usage count = N
Ido Schimmel provided the simple test validation method [1].
The fix clears 'oldest->fnhe_daddr' before calling fnhe_flush_routes().
Since rt_bind_exception() checks this field, setting it to zero prevents
the stale fnhe from being reused and bound to a new dst just before it
is freed.
[1]
ip netns add ns1
ip -n ns1 link set dev lo up
ip -n ns1 address add 192.0.2.1/32 dev lo
ip -n ns1 link add name dummy1 up type dummy
ip -n ns1 route add 192.0.2.2/32 dev dummy1
ip -n ns1 link add name gretap1 up arp off type gretap \
local 192.0.2.1 remote 192.0.2.2
ip -n ns1 route add 198.51.0.0/16 dev gretap1
taskset -c 0 ip netns exec ns1 mausezahn gretap1 \
-A 198.51.100.1 -B 198.51.0.0/16 -t udp -p 1000 -c 0 -q &
taskset -c 2 ip netns exec ns1 mausezahn gretap1 \
-A 198.51.100.1 -B 198.51.0.0/16 -t udp -p 1000 -c 0 -q &
sleep 10
ip netns pids ns1 | xargs kill
ip netns del ns1
Cc: stable(a)vger.kernel.org
Fixes: 67d6d681e15b ("ipv4: make exception cache less predictible")
Signed-off-by: Chuang Wang <nashuiliang(a)gmail.com>
---
v0 -> v1:
- Expanded commit description to fully document the race condition,
including the sit driver's call chain and stack trace.
- Added Ido Schimmel's validation method.
---
net/ipv4/route.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6d27d3610c1c..b549d6a57307 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -607,6 +607,11 @@ static void fnhe_remove_oldest(struct fnhe_hash_bucket *hash)
oldest_p = fnhe_p;
}
}
+
+ /* Clear oldest->fnhe_daddr to prevent this fnhe from being
+ * rebound with new dsts in rt_bind_exception().
+ */
+ oldest->fnhe_daddr = 0;
fnhe_flush_routes(oldest);
*oldest_p = oldest->fnhe_next;
kfree_rcu(oldest, rcu);
--
2.47.3
The vdd_mpu regulator maximum voltage was previously limited to 1.2985V,
which prevented the CPU from reaching the 1GHz operating point. This
limitation was put in place because voltage changes were not working
correctly, causing the board to stall when attempting higher frequencies.
Increase the maximum voltage to 1.3515V to allow the full 1GHz OPP to be
used.
Add a TPS65219 PMIC driver fixes that properly implement the LOCK register
handling, to make voltage transitions work reliably.
Changes in v3:
- Remove an unused variable
- Link to v2: https://lore.kernel.org/r/20251106-fix_tps65219-v2-0-a7d608c4272f@bootlin.c…
Changes in v2:
- Setup a custom regmap_bus only for the TPS65214 instead of checking
the chip_id every time reg_write is called.
- Add the am335x-bonegreen-eco devicetree change in the same patch
series.
Signed-off-by: Kory Maincent (TI.com) <kory.maincent(a)bootlin.com>
---
Kory Maincent (TI.com) (2):
mfd: tps65219: Implement LOCK register handling for TPS65214
ARM: dts: am335x-bonegreen-eco: Enable 1GHz OPP by increasing vdd_mpu voltage
arch/arm/boot/dts/ti/omap/am335x-bonegreen-eco.dts | 2 +-
drivers/mfd/tps65219.c | 49 +++++++++++++++++++++-
include/linux/mfd/tps65219.h | 2 +
3 files changed, 51 insertions(+), 2 deletions(-)
---
base-commit: 1c353dc8d962de652bc7ad2ba2e63f553331391c
change-id: 20251106-fix_tps65219-dd62141d22cf
Best regards,
--
Köry Maincent, Bootlin
Embedded Linux and kernel engineering
https://bootlin.com
In systemd we're trying to switch the internal credentials setup logic
to new mount API [1], and I noticed fsconfig(FSCONFIG_CMD_RECONFIGURE)
consistently fails on tmpfs with noswap option. This can be trivially
reproduced with the following:
```
int fs_fd = fsopen("tmpfs", 0);
fsconfig(fs_fd, FSCONFIG_SET_FLAG, "noswap", NULL, 0);
fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
fsmount(fs_fd, 0, 0);
fsconfig(fs_fd, FSCONFIG_CMD_RECONFIGURE, NULL, NULL, 0); <------ EINVAL
```
After some digging the culprit is shmem_reconfigure() rejecting
!(ctx->seen & SHMEM_SEEN_NOSWAP) && sbinfo->noswap, which is bogus
as ctx->seen serves as a mask for whether certain options are touched
at all. On top of that, noswap option doesn't use fsparam_flag_no,
hence it's not really possible to "reenable" swap to begin with.
Drop the check and redundant SHMEM_SEEN_NOSWAP flag.
[1] https://github.com/systemd/systemd/pull/39637
Fixes: 2c6efe9cf2d7 ("shmem: add support to ignore swap")
Signed-off-by: Mike Yuan <me(a)yhndnzj.com>
Cc: Luis Chamberlain <mcgrof(a)kernel.org>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: <stable(a)vger.kernel.org>
---
mm/shmem.c | 15 +++++++--------
1 file changed, 7 insertions(+), 8 deletions(-)
diff --git a/mm/shmem.c b/mm/shmem.c
index b9081b817d28..1b976414d6fa 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -131,8 +131,7 @@ struct shmem_options {
#define SHMEM_SEEN_INODES 2
#define SHMEM_SEEN_HUGE 4
#define SHMEM_SEEN_INUMS 8
-#define SHMEM_SEEN_NOSWAP 16
-#define SHMEM_SEEN_QUOTA 32
+#define SHMEM_SEEN_QUOTA 16
};
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -4677,7 +4676,6 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
"Turning off swap in unprivileged tmpfs mounts unsupported");
}
ctx->noswap = true;
- ctx->seen |= SHMEM_SEEN_NOSWAP;
break;
case Opt_quota:
if (fc->user_ns != &init_user_ns)
@@ -4827,14 +4825,15 @@ static int shmem_reconfigure(struct fs_context *fc)
err = "Current inum too high to switch to 32-bit inums";
goto out;
}
- if ((ctx->seen & SHMEM_SEEN_NOSWAP) && ctx->noswap && !sbinfo->noswap) {
+
+ /*
+ * "noswap" doesn't use fsparam_flag_no, i.e. there's no "swap"
+ * counterpart for (re-)enabling swap.
+ */
+ if (ctx->noswap && !sbinfo->noswap) {
err = "Cannot disable swap on remount";
goto out;
}
- if (!(ctx->seen & SHMEM_SEEN_NOSWAP) && !ctx->noswap && sbinfo->noswap) {
- err = "Cannot enable swap on remount if it was disabled on first mount";
- goto out;
- }
if (ctx->seen & SHMEM_SEEN_QUOTA &&
!sb_any_quota_loaded(fc->root->d_sb)) {
base-commit: 0d7bee10beeb59b1133bf5a4749b17a4ef3bbb01
--
2.51.1
Since Linux v6.7, booting using BootX on an Old World PowerMac produces
an early crash. Stan Johnson writes, "the symptoms are that the screen
goes blank and the backlight stays on, and the system freezes (Linux
doesn't boot)."
Further testing revealed that the failure can be avoided by disabling
CONFIG_BOOTX_TEXT. Bisection revealed that the regression was caused by
a change to the font bitmap pointer that's used when btext_init() begins
painting characters on the display, early in the boot process.
Christophe Leroy explains, "before kernel text is relocated to its final
location ... data is addressed with an offset which is added to the
Global Offset Table (GOT) entries at the start of bootx_init()
by function reloc_got2(). But the pointers that are located inside a
structure are not referenced in the GOT and are therefore not updated by
reloc_got2(). It is therefore needed to apply the offset manually by using
PTRRELOC() macro."
Cc: Cedar Maxwell <cedarmaxwell(a)mac.com>
Cc: Stan Johnson <userm57(a)yahoo.com>
Cc: "Dr. David Alan Gilbert" <linux(a)treblig.org>
Cc: Benjamin Herrenschmidt <benh(a)kernel.crashing.org>
Cc: stable(a)vger.kernel.org
Link: https://lists.debian.org/debian-powerpc/2025/10/msg00111.html
Link: https://lore.kernel.org/linuxppc-dev/d81ddca8-c5ee-d583-d579-02b19ed95301@y…
Reported-by: Cedar Maxwell <cedarmaxwell(a)mac.com>
Closes: https://lists.debian.org/debian-powerpc/2025/09/msg00031.html
Bisected-by: Stan Johnson <userm57(a)yahoo.com>
Tested-by: Stan Johnson <userm57(a)yahoo.com>
Fixes: 0ebc7feae79a ("powerpc: Use shared font data")
Suggested-by: Christophe Leroy <christophe.leroy(a)csgroup.eu>
Signed-off-by: Finn Thain <fthain(a)linux-m68k.org>
---
Changed since v1:
- Improved commit log entry to better explain the need for PTRRELOC().
---
arch/powerpc/kernel/btext.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index 7f63f1cdc6c3..ca00c4824e31 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -20,6 +20,7 @@
#include <asm/io.h>
#include <asm/processor.h>
#include <asm/udbg.h>
+#include <asm/setup.h>
#define NO_SCROLL
@@ -463,7 +464,7 @@ static noinline void draw_byte(unsigned char c, long locX, long locY)
{
unsigned char *base = calc_base(locX << 3, locY << 4);
unsigned int font_index = c * 16;
- const unsigned char *font = font_sun_8x16.data + font_index;
+ const unsigned char *font = PTRRELOC(font_sun_8x16.data) + font_index;
int rb = dispDeviceRowBytes;
rmci_maybe_on();
--
2.49.1
The function qcom_slim_ngd_notify_slaves() calls of_slim_get_device() which
internally uses device_find_child() to obtain a device reference.
According to the device_find_child() documentation,
the caller must drop the reference with put_device() after use.
Found via static analysis and this is similar to commit 4e65bda8273c
("ASoC: wcd934x: fix error handling in wcd934x_codec_parse_data()")
Fixes: 917809e2280b ("slimbus: ngd: Add qcom SLIMBus NGD driver")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miaoqian Lin <linmq006(a)gmail.com>
---
drivers/slimbus/qcom-ngd-ctrl.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/slimbus/qcom-ngd-ctrl.c b/drivers/slimbus/qcom-ngd-ctrl.c
index 4fb66986cc22..cd40ab839c54 100644
--- a/drivers/slimbus/qcom-ngd-ctrl.c
+++ b/drivers/slimbus/qcom-ngd-ctrl.c
@@ -1241,6 +1241,7 @@ static void qcom_slim_ngd_notify_slaves(struct qcom_slim_ngd_ctrl *ctrl)
if (slim_get_logical_addr(sbdev))
dev_err(ctrl->dev, "Failed to get logical address\n");
+ put_device(&sbdev->dev);
}
}
--
2.39.5 (Apple Git-154)
On 32-bit book3s with hash-MMUs, tlb_flush() was a no-op. This was
unnoticed because all uses until recently were for unmaps, and thus
handled by __tlb_remove_tlb_entry().
After commit 4a18419f71cd ("mm/mprotect: use mmu_gather") in kernel 5.19,
tlb_gather_mmu() started being used for mprotect as well. This caused
mprotect to simply not work on these machines:
int *ptr = mmap(NULL, 4096, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
*ptr = 1; // force HPTE to be created
mprotect(ptr, 4096, PROT_READ);
*ptr = 2; // should segfault, but succeeds
Fixed by making tlb_flush() actually flush TLB pages. This finally
agrees with the behaviour of boot3s64's tlb_flush().
Fixes: 4a18419f71cd ("mm/mprotect: use mmu_gather")
Signed-off-by: Dave Vasilevsky <dave(a)vasilevsky.ca>
Cc: stable(a)vger.kernel.org
---
Changes in v2:
- Flush entire TLB if full mm is requested.
- Link to v1: https://lore.kernel.org/r/20251027-vasi-mprotect-g3-v1-1-3c5187085f9a@vasil…
---
arch/powerpc/include/asm/book3s/32/tlbflush.h | 8 ++++++--
arch/powerpc/mm/book3s32/tlb.c | 9 +++++++++
2 files changed, 15 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/include/asm/book3s/32/tlbflush.h b/arch/powerpc/include/asm/book3s/32/tlbflush.h
index e43534da5207aa3b0cb3c07b78e29b833c141f3f..b8c587ad2ea954f179246a57d6e86e45e91dcfdc 100644
--- a/arch/powerpc/include/asm/book3s/32/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/32/tlbflush.h
@@ -11,6 +11,7 @@
void hash__flush_tlb_mm(struct mm_struct *mm);
void hash__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
void hash__flush_range(struct mm_struct *mm, unsigned long start, unsigned long end);
+void hash__flush_gather(struct mmu_gather *tlb);
#ifdef CONFIG_SMP
void _tlbie(unsigned long address);
@@ -28,9 +29,12 @@ void _tlbia(void);
*/
static inline void tlb_flush(struct mmu_gather *tlb)
{
- /* 603 needs to flush the whole TLB here since it doesn't use a hash table. */
- if (!mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) {
+ hash__flush_gather(tlb);
+ } else {
+ /* 603 needs to flush the whole TLB here since it doesn't use a hash table. */
_tlbia();
+ }
}
static inline void flush_range(struct mm_struct *mm, unsigned long start, unsigned long end)
diff --git a/arch/powerpc/mm/book3s32/tlb.c b/arch/powerpc/mm/book3s32/tlb.c
index 9ad6b56bfec96e989b96f027d075ad5812500854..e54a7b0112322e5818d80facd2e3c7722e6dd520 100644
--- a/arch/powerpc/mm/book3s32/tlb.c
+++ b/arch/powerpc/mm/book3s32/tlb.c
@@ -105,3 +105,12 @@ void hash__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1);
}
EXPORT_SYMBOL(hash__flush_tlb_page);
+
+void hash__flush_gather(struct mmu_gather *tlb)
+{
+ if (tlb->fullmm || tlb->need_flush_all)
+ hash__flush_tlb_mm(tlb->mm);
+ else
+ hash__flush_range(tlb->mm, tlb->start, tlb->end);
+}
+EXPORT_SYMBOL(hash__flush_gather);
---
base-commit: 24172e0d79900908cf5ebf366600616d29c9b417
change-id: 20251027-vasi-mprotect-g3-f8f5278d4140
Best regards,
--
Dave Vasilevsky <dave(a)vasilevsky.ca>
See the first patch for details on the bug.
Signed-off-by: Alice Ryhl <aliceryhl(a)google.com>
---
Alice Ryhl (3):
rust_binder: fix race condition on death_list
rust_binder: avoid mem::take on delivered_deaths
rust: list: add warning to List::remove docs about mem::take
drivers/android/binder/node.rs | 6 +++---
drivers/android/binder/process.rs | 8 ++++++--
rust/kernel/list.rs | 3 +++
3 files changed, 12 insertions(+), 5 deletions(-)
---
base-commit: 211ddde0823f1442e4ad052a2f30f050145ccada
change-id: 20251111-binder-fix-list-remove-41c29c75ffc9
Best regards,
--
Alice Ryhl <aliceryhl(a)google.com>
From: Kairui Song <kasong(a)tencent.com>
Since commit 78524b05f1a3 ("mm, swap: avoid redundant swap device
pinning"), the common helper for allocating and preparing a folio in the
swap cache layer no longer tries to get a swap device reference
internally, because all callers of __read_swap_cache_async are already
holding a swap entry reference. The repeated swap device pinning isn't
needed on the same swap device.
Caller of VMA readahead is also holding a reference to the target
entry's swap device, but VMA readahead walks the page table, so it might
encounter swap entries from other devices, and call
__read_swap_cache_async on another device without holding a reference to
it.
So it is possible to cause a UAF when swapoff of device A raced with
swapin on device B, and VMA readahead tries to read swap entries from
device A. It's not easy to trigger, but in theory, it could cause real
issues.
Make VMA readahead try to get the device reference first if the swap
device is a different one from the target entry.
Cc: stable(a)vger.kernel.org
Fixes: 78524b05f1a3 ("mm, swap: avoid redundant swap device pinning")
Suggested-by: Huang Ying <ying.huang(a)linux.alibaba.com>
Signed-off-by: Kairui Song <kasong(a)tencent.com>
---
Sending as a new patch instead of V2 because the approach is very
different.
Previous patch:
https://lore.kernel.org/linux-mm/20251110-revert-78524b05f1a3-v1-1-88313f2b…
---
mm/swap_state.c | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 0cf9853a9232..da0481e163a4 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -745,6 +745,7 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask,
blk_start_plug(&plug);
for (addr = start; addr < end; ilx++, addr += PAGE_SIZE) {
+ struct swap_info_struct *si = NULL;
softleaf_t entry;
if (!pte++) {
@@ -759,8 +760,19 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask,
continue;
pte_unmap(pte);
pte = NULL;
+ /*
+ * Readahead entry may come from a device that we are not
+ * holding a reference to, try to grab a reference, or skip.
+ */
+ if (swp_type(entry) != swp_type(targ_entry)) {
+ si = get_swap_device(entry);
+ if (!si)
+ continue;
+ }
folio = __read_swap_cache_async(entry, gfp_mask, mpol, ilx,
&page_allocated, false);
+ if (si)
+ put_swap_device(si);
if (!folio)
continue;
if (page_allocated) {
---
base-commit: 565d240810a6c9689817a9f3d08f80adf488ca59
change-id: 20251111-swap-fix-vma-uaf-bec70969250f
Best regards,
--
Kairui Song <kasong(a)tencent.com>
Correct RGMII delay application logic in lan937x_set_tune_adj().
The function was missing `data16 &= ~PORT_TUNE_ADJ` before setting the
new delay value. This caused the new value to be bitwise-OR'd with the
existing PORT_TUNE_ADJ field instead of replacing it.
For example, when setting the RGMII 2 TX delay on port 4, the
intended TUNE_ADJUST value of 0 (RGMII_2_TX_DELAY_2NS) was
incorrectly OR'd with the default 0x1B (from register value 0xDA3),
leaving the delay at the wrong setting.
This patch adds the missing mask to clear the field, ensuring the
correct delay value is written. Physical measurements on the RGMII TX
lines confirm the fix, showing the delay changing from ~1ns (before
change) to ~2ns.
While testing on i.MX 8MP showed this was within the platform's timing
tolerance, it did not match the intended hardware-characterized value.
Fixes: b19ac41faa3f ("net: dsa: microchip: apply rgmii tx and rx delay in phylink mac config")
Signed-off-by: Oleksij Rempel <o.rempel(a)pengutronix.de>
---
drivers/net/dsa/microchip/lan937x_main.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/net/dsa/microchip/lan937x_main.c b/drivers/net/dsa/microchip/lan937x_main.c
index b1ae3b9de3d1..5a1496fff445 100644
--- a/drivers/net/dsa/microchip/lan937x_main.c
+++ b/drivers/net/dsa/microchip/lan937x_main.c
@@ -540,6 +540,7 @@ static void lan937x_set_tune_adj(struct ksz_device *dev, int port,
ksz_pread16(dev, port, reg, &data16);
/* Update tune Adjust */
+ data16 &= ~PORT_TUNE_ADJ;
data16 |= FIELD_PREP(PORT_TUNE_ADJ, val);
ksz_pwrite16(dev, port, reg, data16);
--
2.47.3
intel_th_output_open() calls bus_find_device_by_devt() which
internally increments the device reference count via get_device(), but
this reference is not properly released in several error paths. When
device driver is unavailable, file operations cannot be obtained, or
the driver's open method fails, the function returns without calling
put_device(), leading to a permanent device reference count leak. This
prevents the device from being properly released and could cause
resource exhaustion over time.
Found by code review.
Cc: stable(a)vger.kernel.org
Fixes: 39f4034693b7 ("intel_th: Add driver infrastructure for Intel(R) Trace Hub devices")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
Changes in v2:
- modified the patch to fix uninitialized variable 'err' warnings.
---
drivers/hwtracing/intel_th/core.c | 20 +++++++++++++++-----
1 file changed, 15 insertions(+), 5 deletions(-)
diff --git a/drivers/hwtracing/intel_th/core.c b/drivers/hwtracing/intel_th/core.c
index 47d9e6c3bac0..fdb9d022d875 100644
--- a/drivers/hwtracing/intel_th/core.c
+++ b/drivers/hwtracing/intel_th/core.c
@@ -810,13 +810,17 @@ static int intel_th_output_open(struct inode *inode, struct file *file)
int err;
dev = bus_find_device_by_devt(&intel_th_bus, inode->i_rdev);
- if (!dev || !dev->driver)
- return -ENODEV;
+ if (!dev || !dev->driver) {
+ err = -ENODEV;
+ goto out_no_device;
+ }
thdrv = to_intel_th_driver(dev->driver);
fops = fops_get(thdrv->fops);
- if (!fops)
- return -ENODEV;
+ if (!fops) {
+ err = -ENODEV;
+ goto out_put_device;
+ }
replace_fops(file, fops);
@@ -824,10 +828,16 @@ static int intel_th_output_open(struct inode *inode, struct file *file)
if (file->f_op->open) {
err = file->f_op->open(inode, file);
- return err;
+ if (err)
+ goto out_put_device;
}
return 0;
+
+out_put_device:
+ put_device(dev);
+out_no_device:
+ return err;
}
static const struct file_operations intel_th_output_fops = {
--
2.17.1
The vdd_mpu regulator maximum voltage was previously limited to 1.2985V,
which prevented the CPU from reaching the 1GHz operating point. This
limitation was put in place because voltage changes were not working
correctly, causing the board to stall when attempting higher frequencies.
Increase the maximum voltage to 1.3515V to allow the full 1GHz OPP to be
used.
Add a TPS65219 PMIC driver fixes that properly implement the LOCK register
handling, to make voltage transitions work reliably.
Changes in v2:
- Setup a custom regmap_bus only for the TPS65214 instead of checking
the chip_id every time reg_write is called.
- Add the am335x-bonegreen-eco devicetree change in the same patch
series.
Signed-off-by: Kory Maincent (TI.com) <kory.maincent(a)bootlin.com>
---
Kory Maincent (TI.com) (2):
mfd: tps65219: Implement LOCK register handling for TPS65214
ARM: dts: am335x-bonegreen-eco: Enable 1GHz OPP by increasing vdd_mpu voltage
arch/arm/boot/dts/ti/omap/am335x-bonegreen-eco.dts | 2 +-
drivers/mfd/tps65219.c | 51 +++++++++++++++++++++-
include/linux/mfd/tps65219.h | 2 +
3 files changed, 53 insertions(+), 2 deletions(-)
---
base-commit: 1c353dc8d962de652bc7ad2ba2e63f553331391c
change-id: 20251106-fix_tps65219-dd62141d22cf
Best regards,
--
Köry Maincent, Bootlin
Embedded Linux and kernel engineering
https://bootlin.com
The L1 substates support requires additional steps to work, namely:
-Proper handling of the CLKREQ# sideband signal. (It is mostly handled by
hardware, but software still needs to set the clkreq fields in the
PCIE_CLIENT_POWER_CON register to match the hardware implementation.)
-Program the frequency of the aux clock into the
DSP_PCIE_PL_AUX_CLK_FREQ_OFF register. (During L1 substates the core_clk
is turned off and the aux_clk is used instead.)
These steps are currently missing from the driver.
For more details, see section '18.6.6.4 L1 Substate' in the RK3658 TRM 1.1
Part 2, or section '11.6.6.4 L1 Substate' in the RK3588 TRM 1.0 Part2.
While this has always been a problem when using e.g.
CONFIG_PCIEASPM_POWER_SUPERSAVE=y, or when modifying
/sys/bus/pci/devices/.../link/l1_2_aspm, the lacking driver support for L1
substates became more apparent after commit f3ac2ff14834 ("PCI/ASPM:
Enable all ClockPM and ASPM states for devicetree platforms"), which
enabled ASPM also for CONFIG_PCIEASPM_DEFAULT=y.
When using e.g. an NVMe drive connected to the PCIe controller, the
problem will be seen as:
nvme nvme0: controller is down; will reset: CSTS=0xffffffff, PCI_STATUS=0x10
nvme nvme0: Does your device have a faulty power saving mode enabled?
nvme nvme0: Try "nvme_core.default_ps_max_latency_us=0 pcie_aspm=off pcie_port_pm=off" and report a bug
Thus, prevent advertising L1 Substates support until proper driver support
is added.
Cc: stable(a)vger.kernel.org
Fixes: 0e898eb8df4e ("PCI: rockchip-dwc: Add Rockchip RK356X host controller driver")
Fixes: f3ac2ff14834 ("PCI/ASPM: Enable all ClockPM and ASPM states for devicetree platforms")
Acked-by: Shawn Lin <shawn.lin(a)rock-chips.com>
Signed-off-by: Niklas Cassel <cassel(a)kernel.org>
---
Changes since v2:
-Improve commit message (Bjorn)
drivers/pci/controller/dwc/pcie-dw-rockchip.c | 21 +++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/drivers/pci/controller/dwc/pcie-dw-rockchip.c b/drivers/pci/controller/dwc/pcie-dw-rockchip.c
index 3e2752c7dd09..84f882abbca5 100644
--- a/drivers/pci/controller/dwc/pcie-dw-rockchip.c
+++ b/drivers/pci/controller/dwc/pcie-dw-rockchip.c
@@ -200,6 +200,25 @@ static bool rockchip_pcie_link_up(struct dw_pcie *pci)
return FIELD_GET(PCIE_LINKUP_MASK, val) == PCIE_LINKUP;
}
+/*
+ * See e.g. section '11.6.6.4 L1 Substate' in the RK3588 TRM V1.0 for the steps
+ * needed to support L1 substates. Currently, not a single rockchip platform
+ * performs these steps, so disable L1 substates until there is proper support.
+ */
+static void rockchip_pcie_disable_l1sub(struct dw_pcie *pci)
+{
+ u32 cap, l1subcap;
+
+ cap = dw_pcie_find_ext_capability(pci, PCI_EXT_CAP_ID_L1SS);
+ if (cap) {
+ l1subcap = dw_pcie_readl_dbi(pci, cap + PCI_L1SS_CAP);
+ l1subcap &= ~(PCI_L1SS_CAP_L1_PM_SS | PCI_L1SS_CAP_ASPM_L1_1 |
+ PCI_L1SS_CAP_ASPM_L1_2 | PCI_L1SS_CAP_PCIPM_L1_1 |
+ PCI_L1SS_CAP_PCIPM_L1_2);
+ dw_pcie_writel_dbi(pci, cap + PCI_L1SS_CAP, l1subcap);
+ }
+}
+
static void rockchip_pcie_enable_l0s(struct dw_pcie *pci)
{
u32 cap, lnkcap;
@@ -264,6 +283,7 @@ static int rockchip_pcie_host_init(struct dw_pcie_rp *pp)
irq_set_chained_handler_and_data(irq, rockchip_pcie_intx_handler,
rockchip);
+ rockchip_pcie_disable_l1sub(pci);
rockchip_pcie_enable_l0s(pci);
return 0;
@@ -301,6 +321,7 @@ static void rockchip_pcie_ep_init(struct dw_pcie_ep *ep)
struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
enum pci_barno bar;
+ rockchip_pcie_disable_l1sub(pci);
rockchip_pcie_enable_l0s(pci);
rockchip_pcie_ep_hide_broken_ats_cap_rk3588(ep);
--
2.51.0
After calling device_initialize(), the reference count of the device
is set to 1. If device_add() fails or register_queue_kobjects() fails,
the function returns without calling put_device() to release the
initial reference, causing a memory leak of the device structure.
Similarly, in netdev_unregister_kobject(), after calling device_del(),
there is no call to put_device() to release the initial reference,
leading to a memory leak. Add put_device() in the error paths of
netdev_register_kobject() and after device_del() in
netdev_unregister_kobject() to properly release the device references.
Found by code review.
Cc: stable(a)vger.kernel.org
Fixes: a1b3f594dc5f ("net: Expose all network devices in a namespaces in sysfs")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
net/core/net-sysfs.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index ca878525ad7c..d3895f26a0c8 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -2327,6 +2327,7 @@ void netdev_unregister_kobject(struct net_device *ndev)
pm_runtime_set_memalloc_noio(dev, false);
device_del(dev);
+ put_device(dev);
}
/* Create sysfs entries for network device. */
@@ -2357,7 +2358,7 @@ int netdev_register_kobject(struct net_device *ndev)
error = device_add(dev);
if (error)
- return error;
+ goto out_put_device;
error = register_queue_kobjects(ndev);
if (error) {
@@ -2367,6 +2368,10 @@ int netdev_register_kobject(struct net_device *ndev)
pm_runtime_set_memalloc_noio(dev, true);
+ return 0;
+
+out_put_device:
+ put_device(dev);
return error;
}
--
2.17.1
Post a166563e7ec3 ("arm64: mm: support large block mapping when rodata=full"),
__change_memory_common has a real chance of failing due to split failure.
Before that commit, this line was introduced in c55191e96caa, still having
a chance of failing if it needs to allocate pagetable memory in
apply_to_page_range, although that has never been observed to be true.
In general, we should always propagate the return value to the caller.
Cc: stable(a)vger.kernel.org
Fixes: c55191e96caa ("arm64: mm: apply r/o permissions of VM areas to its linear alias as well")
Signed-off-by: Dev Jain <dev.jain(a)arm.com>
---
Based on Linux 6.18-rc4.
arch/arm64/mm/pageattr.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 5135f2d66958..b4ea86cd3a71 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -148,6 +148,7 @@ static int change_memory_common(unsigned long addr, int numpages,
unsigned long size = PAGE_SIZE * numpages;
unsigned long end = start + size;
struct vm_struct *area;
+ int ret;
int i;
if (!PAGE_ALIGNED(addr)) {
@@ -185,8 +186,10 @@ static int change_memory_common(unsigned long addr, int numpages,
if (rodata_full && (pgprot_val(set_mask) == PTE_RDONLY ||
pgprot_val(clear_mask) == PTE_RDONLY)) {
for (i = 0; i < area->nr_pages; i++) {
- __change_memory_common((u64)page_address(area->pages[i]),
+ ret = __change_memory_common((u64)page_address(area->pages[i]),
PAGE_SIZE, set_mask, clear_mask);
+ if (ret)
+ return ret;
}
}
--
2.30.2
In hrtimer_interrupt(), interrupts are disabled when acquiring a spinlock,
which subsequently triggers an oops. During the oops call chain,
blocking_notifier_call_chain() invokes _cond_resched, ultimately leading
to a hard lockup.
Call Stack:
hrtimer_interrupt//raw_spin_lock_irqsave
__hrtimer_run_queues
page_fault
do_page_fault
bad_area_nosemaphore
no_context
oops_end
bust_spinlocks
unblank_screen
do_unblank_screen
fbcon_blank
fb_notifier_call_chain
blocking_notifier_call_chain
down_read
_cond_resched
If the system is in an oops state, use down_read_trylock instead of a
blocking lock acquisition. If the trylock fails, skip executing the
notifier callbacks to avoid potential deadlocks or unsafe operations
during the oops handling process.
Cc: stable(a)vger.kernel.org # 6.6
Fixes: fe9d4f576324 ("Add kernel/notifier.c")
Signed-off-by: Yi Yang <yiyang13(a)huawei.com>
---
kernel/notifier.c | 15 ++++++++++++---
1 file changed, 12 insertions(+), 3 deletions(-)
diff --git a/kernel/notifier.c b/kernel/notifier.c
index b3ce28f39eb6..ebff2315fac2 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -384,9 +384,18 @@ int blocking_notifier_call_chain(struct blocking_notifier_head *nh,
* is, we re-check the list after having taken the lock anyway:
*/
if (rcu_access_pointer(nh->head)) {
- down_read(&nh->rwsem);
- ret = notifier_call_chain(&nh->head, val, v, -1, NULL);
- up_read(&nh->rwsem);
+ if (!oops_in_progress) {
+ down_read(&nh->rwsem);
+ ret = notifier_call_chain(&nh->head, val, v, -1, NULL);
+ up_read(&nh->rwsem);
+ } else {
+ if (down_read_trylock(&nh->rwsem)) {
+ ret = notifier_call_chain(&nh->head, val, v, -1, NULL);
+ up_read(&nh->rwsem);
+ } else {
+ ret = NOTIFY_BAD;
+ }
+ }
}
return ret;
}
--
2.25.1
When looking at the recent CI results on NIPA and MPTCP CIs, a few MPTCP
Join tests are marked as unstable. Here are some fixes for that.
- Patch 1: a small fix for mptcp_connect.sh, printing a note as
initially intended. For >=v5.13.
- Patch 2: avoid unexpected reset when closing subflows. For >= 5.13.
- Patches 3-4: longer transfer when not waiting for the end. For >=5.18.
- Patch 5: read all received data when expecting a reset. For >= v6.1.
- Patch 6: a fix to properly kill background tasks. For >= v6.5.
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
---
Matthieu Baerts (NGI0) (6):
selftests: mptcp: connect: fix fallback note due to OoO
selftests: mptcp: join: rm: set backup flag
selftests: mptcp: join: endpoints: longer transfer
selftests: mptcp: join: userspace: longer transfer
selftests: mptcp: connect: trunc: read all recv data
selftests: mptcp: join: properly kill background tasks
tools/testing/selftests/net/mptcp/mptcp_connect.c | 18 +++--
tools/testing/selftests/net/mptcp/mptcp_connect.sh | 2 +-
tools/testing/selftests/net/mptcp/mptcp_join.sh | 90 +++++++++++-----------
tools/testing/selftests/net/mptcp/mptcp_lib.sh | 21 +++++
4 files changed, 80 insertions(+), 51 deletions(-)
---
base-commit: 96a9178a29a6b84bb632ebeb4e84cf61191c73d5
change-id: 20251108-net-mptcp-sft-join-unstable-5a28cdb6ea54
Best regards,
--
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 16c43a56b79e2c3220b043236369a129d508c65a
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025110816-catalog-residency-716f@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 16c43a56b79e2c3220b043236369a129d508c65a Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda(a)kernel.org>
Date: Sun, 2 Nov 2025 22:28:52 +0100
Subject: [PATCH] rust: kbuild: treat `build_error` and `rustdoc` as kernel
objects
Even if normally `build_error` isn't a kernel object, it should still
be treated as such so that we pass the same flags. Similarly, `rustdoc`
targets are never kernel objects, but we need to treat them as such.
Otherwise, starting with Rust 1.91.0 (released 2025-10-30), `rustc`
will complain about missing sanitizer flags since `-Zsanitizer` is a
target modifier too [1]:
error: mixing `-Zsanitizer` will cause an ABI mismatch in crate `build_error`
--> rust/build_error.rs:3:1
|
3 | //! Build-time error.
| ^
|
= help: the `-Zsanitizer` flag modifies the ABI so Rust crates compiled with different values of this flag cannot be used together safely
= note: unset `-Zsanitizer` in this crate is incompatible with `-Zsanitizer=kernel-address` in dependency `core`
= help: set `-Zsanitizer=kernel-address` in this crate or unset `-Zsanitizer` in `core`
= help: if you are sure this will not cause problems, you may use `-Cunsafe-allow-abi-mismatch=sanitizer` to silence this error
Thus explicitly mark them as kernel objects.
Cc: stable(a)vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs).
Link: https://github.com/rust-lang/rust/pull/138736 [1]
Reviewed-by: Alice Ryhl <aliceryhl(a)google.com>
Tested-by: Justin M. Forbes <jforbes(a)fedoraproject.org>
Link: https://patch.msgid.link/20251102212853.1505384-1-ojeda@kernel.org
Signed-off-by: Miguel Ojeda <ojeda(a)kernel.org>
diff --git a/rust/Makefile b/rust/Makefile
index 23c7ae905bd2..a9fb9354b659 100644
--- a/rust/Makefile
+++ b/rust/Makefile
@@ -127,9 +127,14 @@ rustdoc-core: private rustc_target_flags = --edition=$(core-edition) $(core-cfgs
rustdoc-core: $(RUST_LIB_SRC)/core/src/lib.rs rustdoc-clean FORCE
+$(call if_changed,rustdoc)
+# Even if `rustdoc` targets are not kernel objects, they should still be
+# treated as such so that we pass the same flags. Otherwise, for instance,
+# `rustdoc` will complain about missing sanitizer flags causing an ABI mismatch.
+rustdoc-compiler_builtins: private is-kernel-object := y
rustdoc-compiler_builtins: $(src)/compiler_builtins.rs rustdoc-core FORCE
+$(call if_changed,rustdoc)
+rustdoc-ffi: private is-kernel-object := y
rustdoc-ffi: $(src)/ffi.rs rustdoc-core FORCE
+$(call if_changed,rustdoc)
@@ -147,6 +152,7 @@ rustdoc-pin_init: $(src)/pin-init/src/lib.rs rustdoc-pin_init_internal \
rustdoc-macros FORCE
+$(call if_changed,rustdoc)
+rustdoc-kernel: private is-kernel-object := y
rustdoc-kernel: private rustc_target_flags = --extern ffi --extern pin_init \
--extern build_error --extern macros \
--extern bindings --extern uapi
@@ -522,6 +528,10 @@ $(obj)/pin_init.o: $(src)/pin-init/src/lib.rs $(obj)/compiler_builtins.o \
$(obj)/$(libpin_init_internal_name) $(obj)/$(libmacros_name) FORCE
+$(call if_changed_rule,rustc_library)
+# Even if normally `build_error` is not a kernel object, it should still be
+# treated as such so that we pass the same flags. Otherwise, for instance,
+# `rustc` will complain about missing sanitizer flags causing an ABI mismatch.
+$(obj)/build_error.o: private is-kernel-object := y
$(obj)/build_error.o: private skip_gendwarfksyms = 1
$(obj)/build_error.o: $(src)/build_error.rs $(obj)/compiler_builtins.o FORCE
+$(call if_changed_rule,rustc_library)
The quilt patch titled
Subject: crash: let architecture decide crash memory export to iomem_resource
has been removed from the -mm tree. Its filename was
crash-let-architecture-decide-crash-memory-export-to-iomem_resource.patch
This patch was dropped because it was merged into the mm-nonmm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Sourabh Jain <sourabhjain(a)linux.ibm.com>
Subject: crash: let architecture decide crash memory export to iomem_resource
Date: Thu, 16 Oct 2025 19:58:31 +0530
With the generic crashkernel reservation, the kernel emits the following
warning on powerpc:
WARNING: CPU: 0 PID: 1 at arch/powerpc/mm/mem.c:341 add_system_ram_resources+0xfc/0x180
Modules linked in:
CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.17.0-auto-12607-g5472d60c129f #1 VOLUNTARY
Hardware name: IBM,9080-HEX Power11 (architected) 0x820200 0xf000007 of:IBM,FW1110.01 (NH1110_069) hv:phyp pSeries
NIP: c00000000201de3c LR: c00000000201de34 CTR: 0000000000000000
REGS: c000000127cef8a0 TRAP: 0700 Not tainted (6.17.0-auto-12607-g5472d60c129f)
MSR: 8000000002029033 <SF,VEC,EE,ME,IR,DR,RI,LE> CR: 84000840 XER: 20040010
CFAR: c00000000017eed0 IRQMASK: 0
GPR00: c00000000201de34 c000000127cefb40 c0000000016a8100 0000000000000001
GPR04: c00000012005aa00 0000000020000000 c000000002b705c8 0000000000000000
GPR08: 000000007fffffff fffffffffffffff0 c000000002db8100 000000011fffffff
GPR12: c00000000201dd40 c000000002ff0000 c0000000000112bc 0000000000000000
GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
GPR20: 0000000000000000 0000000000000000 0000000000000000 c0000000015a3808
GPR24: c00000000200468c c000000001699888 0000000000000106 c0000000020d1950
GPR28: c0000000014683f8 0000000081000200 c0000000015c1868 c000000002b9f710
NIP [c00000000201de3c] add_system_ram_resources+0xfc/0x180
LR [c00000000201de34] add_system_ram_resources+0xf4/0x180
Call Trace:
add_system_ram_resources+0xf4/0x180 (unreliable)
do_one_initcall+0x60/0x36c
do_initcalls+0x120/0x220
kernel_init_freeable+0x23c/0x390
kernel_init+0x34/0x26c
ret_from_kernel_user_thread+0x14/0x1c
This warning occurs due to a conflict between crashkernel and System RAM
iomem resources.
The generic crashkernel reservation adds the crashkernel memory range to
/proc/iomem during early initialization. Later, all memblock ranges are
added to /proc/iomem as System RAM. If the crashkernel region overlaps
with any memblock range, it causes a conflict while adding those memblock
regions as iomem resources, triggering the above warning. The conflicting
memblock regions are then omitted from /proc/iomem.
For example, if the following crashkernel region is added to /proc/iomem:
20000000-11fffffff : Crash kernel
then the following memblock regions System RAM regions fail to be inserted:
00000000-7fffffff : System RAM
80000000-257fffffff : System RAM
Fix this by not adding the crashkernel memory to /proc/iomem on powerpc.
Introduce an architecture hook to let each architecture decide whether to
export the crashkernel region to /proc/iomem.
For more info checkout commit c40dd2f766440 ("powerpc: Add System RAM
to /proc/iomem") and commit bce074bdbc36 ("powerpc: insert System RAM
resource to prevent crashkernel conflict")
Note: Before switching to the generic crashkernel reservation, powerpc
never exported the crashkernel region to /proc/iomem.
Link: https://lkml.kernel.org/r/20251016142831.144515-1-sourabhjain@linux.ibm.com
Fixes: e3185ee438c2 ("powerpc/crash: use generic crashkernel reservation").
Signed-off-by: Sourabh Jain <sourabhjain(a)linux.ibm.com>
Reported-by: Venkat Rao Bagalkote <venkat88(a)linux.ibm.com>
Closes: https://lore.kernel.org/all/90937fe0-2e76-4c82-b27e-7b8a7fe3ac69@linux.ibm.…
Tested-by: Venkat Rao Bagalkote <venkat88(a)linux.ibm.com>
Cc: Baoquan he <bhe(a)redhat.com>
Cc: Hari Bathini <hbathini(a)linux.ibm.com>
Cc: Madhavan Srinivasan <maddy(a)linux.ibm.com>
Cc: Mahesh Salgaonkar <mahesh(a)linux.ibm.com>
Cc: Michael Ellerman <mpe(a)ellerman.id.au>
Cc: Ritesh Harjani (IBM) <ritesh.list(a)gmail.com>
Cc: Vivek Goyal <vgoyal(a)redhat.com>
Cc: Dave Young <dyoung(a)redhat.com>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/powerpc/include/asm/crash_reserve.h | 8 ++++++++
include/linux/crash_reserve.h | 6 ++++++
kernel/crash_reserve.c | 3 +++
3 files changed, 17 insertions(+)
--- a/arch/powerpc/include/asm/crash_reserve.h~crash-let-architecture-decide-crash-memory-export-to-iomem_resource
+++ a/arch/powerpc/include/asm/crash_reserve.h
@@ -5,4 +5,12 @@
/* crash kernel regions are Page size agliged */
#define CRASH_ALIGN PAGE_SIZE
+#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
+static inline bool arch_add_crash_res_to_iomem(void)
+{
+ return false;
+}
+#define arch_add_crash_res_to_iomem arch_add_crash_res_to_iomem
+#endif
+
#endif /* _ASM_POWERPC_CRASH_RESERVE_H */
--- a/include/linux/crash_reserve.h~crash-let-architecture-decide-crash-memory-export-to-iomem_resource
+++ a/include/linux/crash_reserve.h
@@ -32,6 +32,12 @@ int __init parse_crashkernel(char *cmdli
void __init reserve_crashkernel_cma(unsigned long long cma_size);
#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
+#ifndef arch_add_crash_res_to_iomem
+static inline bool arch_add_crash_res_to_iomem(void)
+{
+ return true;
+}
+#endif
#ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE
#define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20)
#endif
--- a/kernel/crash_reserve.c~crash-let-architecture-decide-crash-memory-export-to-iomem_resource
+++ a/kernel/crash_reserve.c
@@ -524,6 +524,9 @@ void __init reserve_crashkernel_cma(unsi
#ifndef HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY
static __init int insert_crashkernel_resources(void)
{
+ if (!arch_add_crash_res_to_iomem())
+ return 0;
+
if (crashk_res.start < crashk_res.end)
insert_resource(&iomem_resource, &crashk_res);
_
Patches currently in -mm which might be from sourabhjain(a)linux.ibm.com are
crash-fix-crashkernel-resource-shrink.patch
The quilt patch titled
Subject: scs: fix a wrong parameter in __scs_magic
has been removed from the -mm tree. Its filename was
scs-fix-a-wrong-parameter-in-__scs_magic.patch
This patch was dropped because it was merged into the mm-nonmm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Zhichi Lin <zhichi.lin(a)vivo.com>
Subject: scs: fix a wrong parameter in __scs_magic
Date: Sat, 11 Oct 2025 16:22:22 +0800
__scs_magic() needs a 'void *' variable, but a 'struct task_struct *' is
given. 'task_scs(tsk)' is the starting address of the task's shadow call
stack, and '__scs_magic(task_scs(tsk))' is the end address of the task's
shadow call stack. Here should be '__scs_magic(task_scs(tsk))'.
The user-visible effect of this bug is that when CONFIG_DEBUG_STACK_USAGE
is enabled, the shadow call stack usage checking function
(scs_check_usage) would scan an incorrect memory range. This could lead
to:
1. **Inaccurate stack usage reporting**: The function would calculate
wrong usage statistics for the shadow call stack, potentially showing
incorrect value in kmsg.
2. **Potential kernel crash**: If the value of __scs_magic(tsk)is
greater than that of __scs_magic(task_scs(tsk)), the for loop may
access unmapped memory, potentially causing a kernel panic. However,
this scenario is unlikely because task_struct is allocated via the slab
allocator (which typically returns lower addresses), while the shadow
call stack returned by task_scs(tsk) is allocated via vmalloc(which
typically returns higher addresses).
However, since this is purely a debugging feature
(CONFIG_DEBUG_STACK_USAGE), normal production systems should be not
unaffected. The bug only impacts developers and testers who are actively
debugging stack usage with this configuration enabled.
Link: https://lkml.kernel.org/r/20251011082222.12965-1-zhichi.lin@vivo.com
Fixes: 5bbaf9d1fcb9 ("scs: Add support for stack usage debugging")
Signed-off-by: Jiyuan Xie <xiejiyuan(a)vivo.com>
Signed-off-by: Zhichi Lin <zhichi.lin(a)vivo.com>
Reviewed-by: Sami Tolvanen <samitolvanen(a)google.com>
Acked-by: Will Deacon <will(a)kernel.org>
Cc: Andrey Konovalov <andreyknvl(a)gmail.com>
Cc: Kees Cook <keescook(a)chromium.org>
Cc: Marco Elver <elver(a)google.com>
Cc: Will Deacon <will(a)kernel.org>
Cc: Yee Lee <yee.lee(a)mediatek.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/scs.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/kernel/scs.c~scs-fix-a-wrong-parameter-in-__scs_magic
+++ a/kernel/scs.c
@@ -135,7 +135,7 @@ static void scs_check_usage(struct task_
if (!IS_ENABLED(CONFIG_DEBUG_STACK_USAGE))
return;
- for (p = task_scs(tsk); p < __scs_magic(tsk); ++p) {
+ for (p = task_scs(tsk); p < __scs_magic(task_scs(tsk)); ++p) {
if (!READ_ONCE_NOCHECK(*p))
break;
used += sizeof(*p);
_
Patches currently in -mm which might be from zhichi.lin(a)vivo.com are
F2FS can mount filesystems with corrupted directory depth values that
get runtime-clamped to MAX_DIR_HASH_DEPTH. When RENAME_WHITEOUT
operations are performed on such directories, f2fs_rename performs
directory modifications (updating target entry and deleting source
entry) before attempting to add the whiteout entry via f2fs_add_link.
If f2fs_add_link fails due to the corrupted directory structure, the
function returns an error to VFS, but the partial directory
modifications have already been committed to disk. VFS assumes the
entire rename operation failed and does not update the dentry cache,
leaving stale mappings.
In the error path, VFS does not call d_move() to update the dentry
cache. This results in new_dentry still pointing to the old inode
(new_inode) which has already had its i_nlink decremented to zero.
The stale cache causes subsequent operations to incorrectly reference
the freed inode.
This causes subsequent operations to use cached dentry information that
no longer matches the on-disk state. When a second rename targets the
same entry, VFS attempts to decrement i_nlink on the stale inode, which
may already have i_nlink=0, triggering a WARNING in drop_nlink().
Example sequence:
1. First rename (RENAME_WHITEOUT): file2 → file1
- f2fs updates file1 entry on disk (points to inode 8)
- f2fs deletes file2 entry on disk
- f2fs_add_link(whiteout) fails (corrupted directory)
- Returns error to VFS
- VFS does not call d_move() due to error
- VFS cache still has: file1 → inode 7 (stale!)
- inode 7 has i_nlink=0 (already decremented)
2. Second rename: file3 → file1
- VFS uses stale cache: file1 → inode 7
- Tries to drop_nlink on inode 7 (i_nlink already 0)
- WARNING in drop_nlink()
Fix this by explicitly invalidating old_dentry and new_dentry when
f2fs_add_link fails during whiteout creation. This forces VFS to
refresh from disk on subsequent operations, ensuring cache consistency
even when the rename partially succeeds.
Reproducer:
1. Mount F2FS image with corrupted i_current_depth
2. renameat2(file2, file1, RENAME_WHITEOUT)
3. renameat2(file3, file1, 0)
4. System triggers WARNING in drop_nlink()
Fixes: 7e01e7ad746b ("f2fs: support RENAME_WHITEOUT")
Reported-by: syzbot+632cf32276a9a564188d(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=632cf32276a9a564188d
Suggested-by: Chao Yu <chao(a)kernel.org>
Link: https://lore.kernel.org/all/20251022233349.102728-1-kartikey406@gmail.com/ [v1]
Cc: stable(a)vger.kernel.org
Signed-off-by: Deepanshu Kartikey <kartikey406(a)gmail.com>
---
Changes in v2:
- Added detailed explanation about VFS not calling d_move() in error path,
resulting in new_dentry still pointing to inode with zeroed i_nlink
(suggested by Chao Yu)
- Added Fixes tag pointing to commit 7e01e7ad746b
- Added Cc: stable(a)vger.kernel.org for backporting to stable kernels
---
fs/f2fs/namei.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index b882771e4699..712479b7b93d 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -1053,9 +1053,11 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
if (whiteout) {
set_inode_flag(whiteout, FI_INC_LINK);
err = f2fs_add_link(old_dentry, whiteout);
- if (err)
+ if (err) {
+ d_invalidate(old_dentry);
+ d_invalidate(new_dentry);
goto put_out_dir;
-
+ }
spin_lock(&whiteout->i_lock);
whiteout->i_state &= ~I_LINKABLE;
spin_unlock(&whiteout->i_lock);
--
2.43.0
svm_update_lbrv() is called when MSR_IA32_DEBUGCTLMSR is updated, and on
nested transitions where LBRV is used. It checks whether LBRV enablement
needs to be changed in the current VMCB, and if it does, it also
recalculate intercepts to LBR MSRs.
However, there are cases where intercepts need to be updated even when
LBRV enablement doesn't. Example scenario:
- L1 has MSR_IA32_DEBUGCTLMSR cleared.
- L1 runs L2 without LBR_CTL_ENABLE (no LBRV).
- L2 sets DEBUGCTLMSR_LBR in MSR_IA32_DEBUGCTLMSR, svm_update_lbrv()
sets LBR_CTL_ENABLE in VMCB02 and disables intercepts to LBR MSRs.
- L2 exits to L1, svm_update_lbrv() is not called on this transition.
- L1 clears MSR_IA32_DEBUGCTLMSR, svm_update_lbrv() finds that
LBR_CTL_ENABLE is already cleared in VMCB01 and does nothing.
- Intercepts remain disabled, L1 reads to LBR MSRs read the host MSRs.
Fix it by always recalculating intercepts in svm_update_lbrv().
Fixes: 1d5a1b5860ed ("KVM: x86: nSVM: correctly virtualize LBR msrs when L2 is running")
Cc: stable(a)vger.kernel.org
Signed-off-by: Yosry Ahmed <yosry.ahmed(a)linux.dev>
---
arch/x86/kvm/svm/svm.c | 29 +++++++++++++++++++----------
1 file changed, 19 insertions(+), 10 deletions(-)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index d25c56b30b4e2..26ab75ecf1c67 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -806,25 +806,29 @@ void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
vmcb_mark_dirty(to_vmcb, VMCB_LBR);
}
-void svm_enable_lbrv(struct kvm_vcpu *vcpu)
+static void __svm_enable_lbrv(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
- svm_recalc_lbr_msr_intercepts(vcpu);
/* Move the LBR msrs to the vmcb02 so that the guest can see them. */
if (is_guest_mode(vcpu))
svm_copy_lbrs(svm->vmcb, svm->vmcb01.ptr);
}
-static void svm_disable_lbrv(struct kvm_vcpu *vcpu)
+void svm_enable_lbrv(struct kvm_vcpu *vcpu)
+{
+ __svm_enable_lbrv(vcpu);
+ svm_recalc_lbr_msr_intercepts(vcpu);
+}
+
+static void __svm_disable_lbrv(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
KVM_BUG_ON(sev_es_guest(vcpu->kvm), vcpu->kvm);
svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
- svm_recalc_lbr_msr_intercepts(vcpu);
/*
* Move the LBR msrs back to the vmcb01 to avoid copying them
@@ -853,13 +857,18 @@ void svm_update_lbrv(struct kvm_vcpu *vcpu)
(is_guest_mode(vcpu) && guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK));
- if (enable_lbrv == current_enable_lbrv)
- return;
+ if (enable_lbrv && !current_enable_lbrv)
+ __svm_enable_lbrv(vcpu);
+ else if (!enable_lbrv && current_enable_lbrv)
+ __svm_disable_lbrv(vcpu);
- if (enable_lbrv)
- svm_enable_lbrv(vcpu);
- else
- svm_disable_lbrv(vcpu);
+ /*
+ * During nested transitions, it is possible that the current VMCB has
+ * LBR_CTL set, but the previous LBR_CTL had it cleared (or vice versa).
+ * In this case, even though LBR_CTL does not need an update, intercepts
+ * do, so always recalculate the intercepts here.
+ */
+ svm_recalc_lbr_msr_intercepts(vcpu);
}
void disable_nmi_singlestep(struct vcpu_svm *svm)
--
2.51.2.1041.gc1ab5b90ca-goog
On Fri, 11 Oct 2024 22:38:53 +0800 Zheng Yejian <zhengyejian(a)huaweicloud.com> wrote:
>
> Currently when the length of a symbol is longer than 0x7f characters,
> its type shown in /proc/kallsyms can be incorrect.
>
> I found this issue when reading the code, but it can be reproduced by
> following steps:
>
> 1. Define a function which symbol length is 130 characters:
>
> #define X13(x) x##x##x##x##x##x##x##x##x##x##x##x##x
> static noinline void X13(x123456789)(void)
> {
> printk("hello world\n");
> }
>
> 2. The type in vmlinux is 't':
>
> $ nm vmlinux | grep x123456
> ffffffff816290f0 t x123456789x123456789x123456789x12[...]
>
> 3. Then boot the kernel, the type shown in /proc/kallsyms becomes 'g'
> instead of the expected 't':
>
> # cat /proc/kallsyms | grep x123456
> ffffffff816290f0 g x123456789x123456789x123456789x12[...]
>
> The root cause is that, after commit 73bbb94466fd ("kallsyms: support
> "big" kernel symbols"), ULEB128 was used to encode symbol name length.
> That is, for "big" kernel symbols of which name length is longer than
> 0x7f characters, the length info is encoded into 2 bytes.
>
> kallsyms_get_symbol_type() expects to read the first char of the
> symbol name which indicates the symbol type. However, due to the
> "big" symbol case not being handled, the symbol type read from
> /proc/kallsyms may be wrong, so handle it properly.
>
> Cc: stable(a)vger.kernel.org
> Fixes: 73bbb94466fd ("kallsyms: support "big" kernel symbols")
> Signed-off-by: Zheng Yejian <zhengyejian(a)huaweicloud.com>
Gary made me aware of this thread (thanks!) -- we are coming from:
https://lore.kernel.org/all/aQjua6zkEHYNVN3X@x1/
For which I sent this patch without knowing about this one:
https://lore.kernel.org/rust-for-linux/20251107050414.511648-1-ojeda@kernel…
This has been seen now by Arnaldo (Cc'ing) in a real system, so I think
we should take this one since it was first, with:
Cc: stable(a)vger.kernel.org
Thanks!
Cheers,
Miguel
css_task_iter_next() pins and returns a task, but the task can do whatever
between that and cgroup_procs_show() being called, including dying and
losing its PID. When that happens, task_pid_vnr() returns 0.
d245698d727a ("cgroup: Defer task cgroup unlink until after the task is
done switching out") makes this more likely as tasks now stay iterable with
css_task_iter_next() until the last schedule is complete, which can be
after the task has lost its PID.
Showing "0" in cgroup.procs or cgroup.threads is confusing and can lead to
surprising outcomes. For example, if a user tries to kill PID 0, it kills
all processes in the current process group.
Skip entries with PID 0 by returning SEQ_SKIP.
Cc: stable(a)vger.kernel.org
Signed-off-by: Tejun Heo <tj(a)kernel.org>
---
kernel/cgroup/cgroup.c | 11 +++++++++++
1 file changed, 11 insertions(+)
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -5287,6 +5287,17 @@ static void *cgroup_procs_start(struct s
static int cgroup_procs_show(struct seq_file *s, void *v)
{
+ pid_t pid = task_pid_vnr(v);
+
+ /*
+ * css_task_iter_next() could have visited a task which has already lost
+ * its PID but is not dead yet or the task could have been unhashed
+ * since css_task_iter_next(). In such cases, $pid would be 0 here.
+ * Don't confuse userspace with it.
+ */
+ if (unlikely(!pid))
+ return SEQ_SKIP;
+
seq_printf(s, "%d\n", task_pid_vnr(v));
return 0;
}
The patch titled
Subject: mm, swap: fix potential UAF issue for VMA readahead
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-swap-fix-potential-uaf-issue-for-vma-readahead.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Kairui Song <kasong(a)tencent.com>
Subject: mm, swap: fix potential UAF issue for VMA readahead
Date: Tue, 11 Nov 2025 21:36:08 +0800
Since commit 78524b05f1a3 ("mm, swap: avoid redundant swap device
pinning"), the common helper for allocating and preparing a folio in the
swap cache layer no longer tries to get a swap device reference
internally, because all callers of __read_swap_cache_async are already
holding a swap entry reference. The repeated swap device pinning isn't
needed on the same swap device.
Caller of VMA readahead is also holding a reference to the target entry's
swap device, but VMA readahead walks the page table, so it might encounter
swap entries from other devices, and call __read_swap_cache_async on
another device without holding a reference to it.
So it is possible to cause a UAF when swapoff of device A raced with
swapin on device B, and VMA readahead tries to read swap entries from
device A. It's not easy to trigger, but in theory, it could cause real
issues.
Make VMA readahead try to get the device reference first if the swap
device is a different one from the target entry.
Link: https://lkml.kernel.org/r/20251111-swap-fix-vma-uaf-v1-1-41c660e58562@tence…
Fixes: 78524b05f1a3 ("mm, swap: avoid redundant swap device pinning")
Suggested-by: Huang Ying <ying.huang(a)linux.alibaba.com>
Signed-off-by: Kairui Song <kasong(a)tencent.com>
Cc: Baoquan He <bhe(a)redhat.com>
Cc: Barry Song <baohua(a)kernel.org>
Cc: Chris Li <chrisl(a)kernel.org>
Cc: Kemeng Shi <shikemeng(a)huaweicloud.com>
Cc: Nhat Pham <nphamcs(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/swap_state.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
--- a/mm/swap_state.c~mm-swap-fix-potential-uaf-issue-for-vma-readahead
+++ a/mm/swap_state.c
@@ -748,6 +748,8 @@ static struct folio *swap_vma_readahead(
blk_start_plug(&plug);
for (addr = start; addr < end; ilx++, addr += PAGE_SIZE) {
+ struct swap_info_struct *si = NULL;
+
if (!pte++) {
pte = pte_offset_map(vmf->pmd, addr);
if (!pte)
@@ -761,8 +763,19 @@ static struct folio *swap_vma_readahead(
continue;
pte_unmap(pte);
pte = NULL;
+ /*
+ * Readahead entry may come from a device that we are not
+ * holding a reference to, try to grab a reference, or skip.
+ */
+ if (swp_type(entry) != swp_type(targ_entry)) {
+ si = get_swap_device(entry);
+ if (!si)
+ continue;
+ }
folio = __read_swap_cache_async(entry, gfp_mask, mpol, ilx,
&page_allocated, false);
+ if (si)
+ put_swap_device(si);
if (!folio)
continue;
if (page_allocated) {
_
Patches currently in -mm which might be from kasong(a)tencent.com are
mm-swap-fix-potential-uaf-issue-for-vma-readahead.patch
mm-swap-do-not-perform-synchronous-discard-during-allocation.patch
mm-swap-rename-helper-for-setup-bad-slots.patch
mm-swap-cleanup-swap-entry-allocation-parameter.patch
mm-migrate-swap-drop-usage-of-folio_index.patch
mm-swap-remove-redundant-argument-for-isolating-a-cluster.patch
revert-mm-swap-avoid-redundant-swap-device-pinning.patch
commit 44e8241c51f762aafa50ed116da68fd6ecdcc954 upstream.
On big endian arm kernels, the arm optimized Curve25519 code produces
incorrect outputs and fails the Curve25519 test. This has been true
ever since this code was added.
It seems that hardly anyone (or even no one?) actually uses big endian
arm kernels. But as long as they're ostensibly supported, we should
disable this code on them so that it's not accidentally used.
Note: for future-proofing, use !CPU_BIG_ENDIAN instead of
CPU_LITTLE_ENDIAN. Both of these are arch-specific options that could
get removed in the future if big endian support gets dropped.
Fixes: d8f1308a025f ("crypto: arm/curve25519 - wire up NEON implementation")
Cc: stable(a)vger.kernel.org
Acked-by: Ard Biesheuvel <ardb(a)kernel.org>
Link: https://lore.kernel.org/r/20251104054906.716914-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers(a)kernel.org>
---
arch/arm/crypto/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index 3858c4d4cb98..f6323b84631f 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -2,11 +2,11 @@
menu "Accelerated Cryptographic Algorithms for CPU (arm)"
config CRYPTO_CURVE25519_NEON
tristate "Public key crypto: Curve25519 (NEON)"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && !CPU_BIG_ENDIAN
select CRYPTO_LIB_CURVE25519_GENERIC
select CRYPTO_ARCH_HAVE_LIB_CURVE25519
help
Curve25519 algorithm
base-commit: f6e38ae624cf7eb96fb444a8ca2d07caa8d9c8fe
--
2.51.2
commit 44e8241c51f762aafa50ed116da68fd6ecdcc954 upstream.
On big endian arm kernels, the arm optimized Curve25519 code produces
incorrect outputs and fails the Curve25519 test. This has been true
ever since this code was added.
It seems that hardly anyone (or even no one?) actually uses big endian
arm kernels. But as long as they're ostensibly supported, we should
disable this code on them so that it's not accidentally used.
Note: for future-proofing, use !CPU_BIG_ENDIAN instead of
CPU_LITTLE_ENDIAN. Both of these are arch-specific options that could
get removed in the future if big endian support gets dropped.
Fixes: d8f1308a025f ("crypto: arm/curve25519 - wire up NEON implementation")
Cc: stable(a)vger.kernel.org
Acked-by: Ard Biesheuvel <ardb(a)kernel.org>
Link: https://lore.kernel.org/r/20251104054906.716914-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers(a)kernel.org>
---
arch/arm/crypto/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index f87e63b2212e..df2ae5c6af95 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -2,11 +2,11 @@
menu "Accelerated Cryptographic Algorithms for CPU (arm)"
config CRYPTO_CURVE25519_NEON
tristate
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && !CPU_BIG_ENDIAN
select CRYPTO_KPP
select CRYPTO_LIB_CURVE25519_GENERIC
select CRYPTO_ARCH_HAVE_LIB_CURVE25519
default CRYPTO_LIB_CURVE25519_INTERNAL
help
base-commit: 8a243ecde1f6447b8e237f2c1c67c0bb67d16d67
--
2.51.2
commit 44e8241c51f762aafa50ed116da68fd6ecdcc954 upstream.
On big endian arm kernels, the arm optimized Curve25519 code produces
incorrect outputs and fails the Curve25519 test. This has been true
ever since this code was added.
It seems that hardly anyone (or even no one?) actually uses big endian
arm kernels. But as long as they're ostensibly supported, we should
disable this code on them so that it's not accidentally used.
Note: for future-proofing, use !CPU_BIG_ENDIAN instead of
CPU_LITTLE_ENDIAN. Both of these are arch-specific options that could
get removed in the future if big endian support gets dropped.
Fixes: d8f1308a025f ("crypto: arm/curve25519 - wire up NEON implementation")
Cc: stable(a)vger.kernel.org
Acked-by: Ard Biesheuvel <ardb(a)kernel.org>
Link: https://lore.kernel.org/r/20251104054906.716914-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers(a)kernel.org>
---
arch/arm/crypto/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index 1e5f3cdf691c..a00ab9265280 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -2,11 +2,11 @@
menu "Accelerated Cryptographic Algorithms for CPU (arm)"
config CRYPTO_CURVE25519_NEON
tristate
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && !CPU_BIG_ENDIAN
select CRYPTO_KPP
select CRYPTO_LIB_CURVE25519_GENERIC
select CRYPTO_ARCH_HAVE_LIB_CURVE25519
default CRYPTO_LIB_CURVE25519_INTERNAL
help
base-commit: 7660ce69123ea73b22930fcf20d995ad310049ef
--
2.51.2
Commit e26ee4efbc79 ("fuse: allocate ff->release_args only if release is
needed") skips allocating ff->release_args if the server does not
implement open. However in doing so, fuse_prepare_release() now skips
grabbing the reference on the inode, which makes it possible for an
inode to be evicted from the dcache while there are inflight readahead
requests. This causes a deadlock if the server triggers reclaim while
servicing the readahead request and reclaim attempts to evict the inode
of the file being read ahead. Since the folio is locked during
readahead, when reclaim evicts the fuse inode and fuse_evict_inode()
attempts to remove all folios associated with the inode from the page
cache (truncate_inode_pages_range()), reclaim will block forever waiting
for the lock since readahead cannot relinquish the lock because it is
itself blocked in reclaim:
>>> stack_trace(1504735)
folio_wait_bit_common (mm/filemap.c:1308:4)
folio_lock (./include/linux/pagemap.h:1052:3)
truncate_inode_pages_range (mm/truncate.c:336:10)
fuse_evict_inode (fs/fuse/inode.c:161:2)
evict (fs/inode.c:704:3)
dentry_unlink_inode (fs/dcache.c:412:3)
__dentry_kill (fs/dcache.c:615:3)
shrink_kill (fs/dcache.c:1060:12)
shrink_dentry_list (fs/dcache.c:1087:3)
prune_dcache_sb (fs/dcache.c:1168:2)
super_cache_scan (fs/super.c:221:10)
do_shrink_slab (mm/shrinker.c:435:9)
shrink_slab (mm/shrinker.c:626:10)
shrink_node (mm/vmscan.c:5951:2)
shrink_zones (mm/vmscan.c:6195:3)
do_try_to_free_pages (mm/vmscan.c:6257:3)
do_swap_page (mm/memory.c:4136:11)
handle_pte_fault (mm/memory.c:5562:10)
handle_mm_fault (mm/memory.c:5870:9)
do_user_addr_fault (arch/x86/mm/fault.c:1338:10)
handle_page_fault (arch/x86/mm/fault.c:1481:3)
exc_page_fault (arch/x86/mm/fault.c:1539:2)
asm_exc_page_fault+0x22/0x27
Fix this deadlock by allocating ff->release_args and grabbing the
reference on the inode when preparing the file for release even if the
server does not implement open. The inode reference will be dropped when
the last reference on the fuse file is dropped (see fuse_file_put() ->
fuse_release_end()).
Fixes: e26ee4efbc79 ("fuse: allocate ff->release_args only if release is needed")
Cc: stable(a)vger.kernel.org
Signed-off-by: Joanne Koong <joannelkoong(a)gmail.com>
Reported-by: Omar Sandoval <osandov(a)fb.com>
---
fs/fuse/file.c | 40 ++++++++++++++++++++++++++--------------
1 file changed, 26 insertions(+), 14 deletions(-)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index f1ef77a0be05..654e21ee93fb 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -100,7 +100,7 @@ static void fuse_release_end(struct fuse_mount *fm, struct fuse_args *args,
kfree(ra);
}
-static void fuse_file_put(struct fuse_file *ff, bool sync)
+static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir)
{
if (refcount_dec_and_test(&ff->count)) {
struct fuse_release_args *ra = &ff->args->release_args;
@@ -110,7 +110,9 @@ static void fuse_file_put(struct fuse_file *ff, bool sync)
fuse_file_io_release(ff, ra->inode);
if (!args) {
- /* Do nothing when server does not implement 'open' */
+ /* Do nothing when server does not implement 'opendir' */
+ } else if (!isdir && ff->fm->fc->no_open) {
+ fuse_release_end(ff->fm, args, 0);
} else if (sync) {
fuse_simple_request(ff->fm, args);
fuse_release_end(ff->fm, args, 0);
@@ -131,8 +133,17 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
struct fuse_file *ff;
int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
bool open = isdir ? !fc->no_opendir : !fc->no_open;
+ bool release = !isdir || open;
- ff = fuse_file_alloc(fm, open);
+ /*
+ * ff->args->release_args still needs to be allocated (so we can hold an
+ * inode reference while there are pending inflight file operations when
+ * ->release() is called, see fuse_prepare_release()) even if
+ * fc->no_open is set else it becomes possible for reclaim to deadlock
+ * if while servicing the readahead request the server triggers reclaim
+ * and reclaim evicts the inode of the file being read ahead.
+ */
+ ff = fuse_file_alloc(fm, release);
if (!ff)
return ERR_PTR(-ENOMEM);
@@ -152,13 +163,14 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
fuse_file_free(ff);
return ERR_PTR(err);
} else {
- /* No release needed */
- kfree(ff->args);
- ff->args = NULL;
- if (isdir)
+ if (isdir) {
+ /* No release needed */
+ kfree(ff->args);
+ ff->args = NULL;
fc->no_opendir = 1;
- else
+ } else {
fc->no_open = 1;
+ }
}
}
@@ -363,7 +375,7 @@ void fuse_file_release(struct inode *inode, struct fuse_file *ff,
* own ref to the file, the IO completion has to drop the ref, which is
* how the fuse server can end up closing its clients' files.
*/
- fuse_file_put(ff, false);
+ fuse_file_put(ff, false, isdir);
}
void fuse_release_common(struct file *file, bool isdir)
@@ -394,7 +406,7 @@ void fuse_sync_release(struct fuse_inode *fi, struct fuse_file *ff,
{
WARN_ON(refcount_read(&ff->count) > 1);
fuse_prepare_release(fi, ff, flags, FUSE_RELEASE, true);
- fuse_file_put(ff, true);
+ fuse_file_put(ff, true, false);
}
EXPORT_SYMBOL_GPL(fuse_sync_release);
@@ -891,7 +903,7 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
folio_put(ap->folios[i]);
}
if (ia->ff)
- fuse_file_put(ia->ff, false);
+ fuse_file_put(ia->ff, false, false);
fuse_io_free(ia);
}
@@ -1815,7 +1827,7 @@ static void fuse_writepage_free(struct fuse_writepage_args *wpa)
if (wpa->bucket)
fuse_sync_bucket_dec(wpa->bucket);
- fuse_file_put(wpa->ia.ff, false);
+ fuse_file_put(wpa->ia.ff, false, false);
kfree(ap->folios);
kfree(wpa);
@@ -1968,7 +1980,7 @@ int fuse_write_inode(struct inode *inode, struct writeback_control *wbc)
ff = __fuse_write_file_get(fi);
err = fuse_flush_times(inode, ff);
if (ff)
- fuse_file_put(ff, false);
+ fuse_file_put(ff, false, false);
return err;
}
@@ -2186,7 +2198,7 @@ static int fuse_iomap_writeback_submit(struct iomap_writepage_ctx *wpc,
}
if (data->ff)
- fuse_file_put(data->ff, false);
+ fuse_file_put(data->ff, false, false);
return error;
}
--
2.47.3
Optimiza la gestión de compensaciones
Garantiza remuneraciones justas y competitivas con Vorecol Compensation Management.
Hola ,
Garantizar remuneraciones justas y competitivas es clave para atraer y retener talento, pero muchas veces determinar el salario adecuado puede ser complejo.
Con Vorecol Compensation Management podrás establecer con precisión el salario para cada puesto, respaldado por criterios objetivos basados en los factores más relevantes de tu organización.
Con Vorecol Compensation Management puedes:
• Determinar salarios adecuados y equitativos para todos los roles.
• Respaldar las decisiones de compensación con justificaciones objetivas.
• Facilitar la transparencia y confianza en la gestión de remuneraciones.
Esto permite crear un entorno laboral más justo, motivador y competitivo, beneficiando tanto a tu equipo como a la empresa.
Aprovecha el Buen Fin del 1 al 22 de noviembre con hasta 15% de descuento y descubre cómo optimizar la gestión de compensaciones.
Puedes responder a este correo o contactarme directamente; mis datos están al final del mensaje.
Saludos,
--------------
Atte.: Daniel Rodríguez
Ciudad de México: (55) 5018 0565
WhatsApp: +52 33 1607 2089
Si no deseas recibir más correos, haz clic aquí para darte de baja.
Para remover su dirección de esta lista haga <a href="https://s1.arrobamail.com/unsuscribe.php?id=yiwtsrewiswqwwseup">click aquí</a>
In etm_setup_aux(), when a user sink is obtained via
coresight_get_sink_by_id(), it increments the reference count of the
sink device. However, if the sink is used in path building, the path
holds a reference, but the initial reference from
coresight_get_sink_by_id() is not released, causing a reference count
leak. We should release the initial reference after the path is built.
Found by code review.
Cc: stable(a)vger.kernel.org
Fixes: 0e6c20517596 ("coresight: etm-perf: Allow an event to use different sinks")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
drivers/hwtracing/coresight/coresight-etm-perf.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
index f677c08233ba..6584f6aa87bf 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -453,6 +453,11 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,
if (!event_data->snk_config)
goto err;
+ if (user_sink) {
+ put_device(&user_sink->dev);
+ user_sink = NULL;
+ }
+
out:
return event_data;
--
2.17.1
fixed possible out of band access to an array
If the fealnx_init_one() function is called more than MAX_UNITS times
or card_idx is less than zero
Added a check: 0 <= card_idx < MAX_UNITS
Found by Linux Verification Center (linuxtesting.org) with SVACE.
Signed-off-by: Ilya Krutskih <devsec(a)tpz.ru>
---
drivers/net/ethernet/fealnx.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c
index 6ac8547ef9b8..c7f2141a01fe 100644
--- a/drivers/net/ethernet/fealnx.c
+++ b/drivers/net/ethernet/fealnx.c
@@ -491,8 +491,8 @@ static int fealnx_init_one(struct pci_dev *pdev,
card_idx++;
sprintf(boardname, "fealnx%d", card_idx);
-
- option = card_idx < MAX_UNITS ? options[card_idx] : 0;
+ if (card_idx >= 0)
+ option = card_idx < MAX_UNITS ? options[card_idx] : 0;
i = pci_enable_device(pdev);
if (i) return i;
@@ -623,7 +623,7 @@ static int fealnx_init_one(struct pci_dev *pdev,
np->default_port = option & 15;
}
- if (card_idx < MAX_UNITS && full_duplex[card_idx] > 0)
+ if ((0 <= card_idx && MAX_UNITS > card_idx) && full_duplex[card_idx] > 0)
np->mii.full_duplex = full_duplex[card_idx];
if (np->mii.full_duplex) {
--
2.43.0
intel_th_output_open() calls bus_find_device_by_devt() which
internally increments the device reference count via get_device(), but
this reference is not properly released in several error paths. When
device driver is unavailable, file operations cannot be obtained, or
the driver's open method fails, the function returns without calling
put_device(), leading to a permanent device reference count leak. This
prevents the device from being properly released and could cause
resource exhaustion over time.
Found by code review.
Cc: stable(a)vger.kernel.org
Fixes: 39f4034693b7 ("intel_th: Add driver infrastructure for Intel(R) Trace Hub devices")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
drivers/hwtracing/intel_th/core.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/drivers/hwtracing/intel_th/core.c b/drivers/hwtracing/intel_th/core.c
index 47d9e6c3bac0..ecc4b4ff5cf6 100644
--- a/drivers/hwtracing/intel_th/core.c
+++ b/drivers/hwtracing/intel_th/core.c
@@ -811,12 +811,12 @@ static int intel_th_output_open(struct inode *inode, struct file *file)
dev = bus_find_device_by_devt(&intel_th_bus, inode->i_rdev);
if (!dev || !dev->driver)
- return -ENODEV;
+ goto out_no_device;
thdrv = to_intel_th_driver(dev->driver);
fops = fops_get(thdrv->fops);
if (!fops)
- return -ENODEV;
+ goto out_put_device;
replace_fops(file, fops);
@@ -824,10 +824,16 @@ static int intel_th_output_open(struct inode *inode, struct file *file)
if (file->f_op->open) {
err = file->f_op->open(inode, file);
- return err;
+ if (err)
+ goto out_put_device;
}
return 0;
+
+out_put_device:
+ put_device(dev);
+out_no_device:
+ return err;
}
static const struct file_operations intel_th_output_fops = {
--
2.17.1
From: Chuck Lever <chuck.lever(a)oracle.com>
Mike noted that when NFSD responds to an NFS_FILE_SYNC WRITE, it
does not also persist file time stamps. To wit, Section 18.32.3
of RFC 8881 mandates:
> The client specifies with the stable parameter the method of how
> the data is to be processed by the server. If stable is
> FILE_SYNC4, the server MUST commit the data written plus all file
> system metadata to stable storage before returning results. This
> corresponds to the NFSv2 protocol semantics. Any other behavior
> constitutes a protocol violation. If stable is DATA_SYNC4, then
> the server MUST commit all of the data to stable storage and
> enough of the metadata to retrieve the data before returning.
Commit 3f3503adb332 ("NFSD: Use vfs_iocb_iter_write()") replaced:
- flags |= RWF_SYNC;
with:
+ kiocb.ki_flags |= IOCB_DSYNC;
which appears to be correct given:
if (flags & RWF_SYNC)
kiocb_flags |= IOCB_DSYNC;
in kiocb_set_rw_flags(). However the author of that commit did not
appreciate that the previous line in kiocb_set_rw_flags() results
in IOCB_SYNC also being set:
kiocb_flags |= (__force int) (flags & RWF_SUPPORTED);
RWF_SUPPORTED contains RWF_SYNC, and RWF_SYNC is the same bit as
IOCB_SYNC. Reviewers at the time did not catch the omission.
Reported-by: Mike Snitzer <snitzer(a)kernel.org>
Closes: https://lore.kernel.org/linux-nfs/20251018005431.3403-1-cel@kernel.org/T/#t
Fixes: 3f3503adb332 ("NFSD: Use vfs_iocb_iter_write()")
Cc: stable(a)vger.kernel.org
Reviewed-by: Jeff Layton <jlayton(a)kernel.org>
Reviewed-by: NeilBrown <neil(a)brown.name>
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
---
fs/nfsd/vfs.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index f537a7b4ee01..5333d49910d9 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1314,8 +1314,18 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
stable = NFS_UNSTABLE;
init_sync_kiocb(&kiocb, file);
kiocb.ki_pos = offset;
- if (stable && !fhp->fh_use_wgather)
- kiocb.ki_flags |= IOCB_DSYNC;
+ if (likely(!fhp->fh_use_wgather)) {
+ switch (stable) {
+ case NFS_FILE_SYNC:
+ /* persist data and timestamps */
+ kiocb.ki_flags |= IOCB_DSYNC | IOCB_SYNC;
+ break;
+ case NFS_DATA_SYNC:
+ /* persist data only */
+ kiocb.ki_flags |= IOCB_DSYNC;
+ break;
+ }
+ }
nvecs = xdr_buf_to_bvec(rqstp->rq_bvec, rqstp->rq_maxpages, payload);
iov_iter_bvec(&iter, ITER_SOURCE, rqstp->rq_bvec, nvecs, *cnt);
--
2.51.0
The function 'ep0_rx_state()' accessed 'mreq->request' before verifying
that mreq was valid. If 'next_ep0_request()' returned NULL, this could
lead to a NULL pointer dereference. The return value of
'next_ep0_request()' is checked in every other code path except
here. It appears that the intended 'if (mreq)' check was mistakenly
written as 'if (req)', since the req pointer cannot be NULL when mreq
is not NULL.
Initialize 'mreq' and 'req' to NULL by default, and switch 'req'
NULL-checking to 'mreq' non-NULL check to prevent invalid memory access.
Found by Linux Verification Center (linuxtesting.org) with SVACE.
Fixes: df2069acb005 ("usb: Add MediaTek USB3 DRD driver")
Signed-off-by: Pavel Zhigulin <Pavel.Zhigulin(a)kaspersky.com>
---
v2: Add <stable(a)vger.kernel.org> to CC list
v1: https://lore.kernel.org/all/20251027193152.3906497-1-Pavel.Zhigulin@kaspers…
drivers/usb/mtu3/mtu3_gadget_ep0.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/drivers/usb/mtu3/mtu3_gadget_ep0.c b/drivers/usb/mtu3/mtu3_gadget_ep0.c
index e4fd1bb14a55..ee7466ca4d99 100644
--- a/drivers/usb/mtu3/mtu3_gadget_ep0.c
+++ b/drivers/usb/mtu3/mtu3_gadget_ep0.c
@@ -508,8 +508,8 @@ static int handle_standard_request(struct mtu3 *mtu,
/* receive an data packet (OUT) */
static void ep0_rx_state(struct mtu3 *mtu)
{
- struct mtu3_request *mreq;
- struct usb_request *req;
+ struct mtu3_request *mreq = NULL;
+ struct usb_request *req = NULL;
void __iomem *mbase = mtu->mac_base;
u32 maxp;
u32 csr;
@@ -519,10 +519,11 @@ static void ep0_rx_state(struct mtu3 *mtu)
csr = mtu3_readl(mbase, U3D_EP0CSR) & EP0_W1C_BITS;
mreq = next_ep0_request(mtu);
- req = &mreq->request;
/* read packet and ack; or stall because of gadget driver bug */
- if (req) {
+ if (mreq) {
+ req = &mreq->request;
+
void *buf = req->buf + req->actual;
unsigned int len = req->length - req->actual;
--
2.43.0
From: Niravkumar L Rabara <niravkumarlaxmidas.rabara(a)altera.com>
The OCRAM ECC is always enabled either by the BootROM or by the Secure
Device Manager (SDM) during a power-on reset on SoCFPGA.
However, during a warm reset, the OCRAM content is retained to preserve
data, while the control and status registers are reset to their default
values. As a result, ECC must be explicitly re-enabled after a warm reset.
Fixes: 17e47dc6db4f ("EDAC/altera: Add Stratix10 OCRAM ECC support")
Cc: stable(a)vger.kernel.org
Signed-off-by: Niravkumar L Rabara <niravkumarlaxmidas.rabara(a)altera.com>
Acked-by: Dinh Nguyen <dinguyen(a)kernel.org>
---
v2 changes:
- Add Fixes and Cc tags
- Retains Acked-by from v1 patch
v1 link:
https://lore.kernel.org/all/20251103140920.1060643-1-niravkumarlaxmidas.rab…
drivers/edac/altera_edac.c | 18 +++++++++++++++---
1 file changed, 15 insertions(+), 3 deletions(-)
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index 103b2c2eba2a..a776d61027f2 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -1184,10 +1184,22 @@ altr_check_ocram_deps_init(struct altr_edac_device_dev *device)
if (ret)
return ret;
- /* Verify OCRAM has been initialized */
+ /*
+ * Verify that OCRAM has been initialized.
+ * During a warm reset, OCRAM contents are retained, but the control
+ * and status registers are reset to their default values. Therefore,
+ * ECC must be explicitly re-enabled in the control register.
+ * Error condition: if INITCOMPLETEA is clear and ECC_EN is already set.
+ */
if (!ecc_test_bits(ALTR_A10_ECC_INITCOMPLETEA,
- (base + ALTR_A10_ECC_INITSTAT_OFST)))
- return -ENODEV;
+ (base + ALTR_A10_ECC_INITSTAT_OFST))) {
+ if (!ecc_test_bits(ALTR_A10_ECC_EN,
+ (base + ALTR_A10_ECC_CTRL_OFST)))
+ ecc_set_bits(ALTR_A10_ECC_EN,
+ (base + ALTR_A10_ECC_CTRL_OFST));
+ else
+ return -ENODEV;
+ }
/* Enable IRQ on Single Bit Error */
writel(ALTR_A10_ECC_SERRINTEN, (base + ALTR_A10_ECC_ERRINTENS_OFST));
--
2.25.1
This patch series fixes delayed hw_error handling during SSR.
Patch 1 adds a wakeup to ensure hw_error is processed promptly after coredump collection.
Patch 2 corrects the timeout unit from jiffies to ms.
Changes v2:
- Split timeout conversion into a separate patch.
- Clarified commit messages and added test case description.
- Link to v1
https://lore.kernel.org/all/20251104112601.2670019-1-quic_shuaz@quicinc.com/
Shuai Zhang (2):
Bluetooth: qca: Fix delayed hw_error handling due to missing wakeup
during SSR
Bluetooth: hci_qca: Convert timeout from jiffies to ms
drivers/bluetooth/hci_qca.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
--
2.34.1
The following commit has been merged into the sched/core branch of tip:
Commit-ID: 79f3f9bedd149ea438aaeb0fb6a083637affe205
Gitweb: https://git.kernel.org/tip/79f3f9bedd149ea438aaeb0fb6a083637affe205
Author: Peter Zijlstra <peterz(a)infradead.org>
AuthorDate: Wed, 02 Apr 2025 20:07:34 +02:00
Committer: Peter Zijlstra <peterz(a)infradead.org>
CommitterDate: Tue, 11 Nov 2025 12:33:38 +01:00
sched/eevdf: Fix min_vruntime vs avg_vruntime
Basically, from the constraint that the sum of lag is zero, you can
infer that the 0-lag point is the weighted average of the individual
vruntime, which is what we're trying to compute:
\Sum w_i * v_i
avg = --------------
\Sum w_i
Now, since vruntime takes the whole u64 (worse, it wraps), this
multiplication term in the numerator is not something we can compute;
instead we do the min_vruntime (v0 henceforth) thing like:
v_i = (v_i - v0) + v0
This does two things:
- it keeps the key: (v_i - v0) 'small';
- it creates a relative 0-point in the modular space.
If you do that subtitution and work it all out, you end up with:
\Sum w_i * (v_i - v0)
avg = --------------------- + v0
\Sum w_i
Since you cannot very well track a ratio like that (and not suffer
terrible numerical problems) we simpy track the numerator and
denominator individually and only perform the division when strictly
needed.
Notably, the numerator lives in cfs_rq->avg_vruntime and the denominator
lives in cfs_rq->avg_load.
The one extra 'funny' is that these numbers track the entities in the
tree, and current is typically outside of the tree, so avg_vruntime()
adds current when needed before doing the division.
(vruntime_eligible() elides the division by cross-wise multiplication)
Anyway, as mentioned above, we currently use the CFS era min_vruntime
for this purpose. However, this thing can only move forward, while the
above avg can in fact move backward (when a non-eligible task leaves,
the average becomes smaller), this can cause trouble when through
happenstance (or construction) these values drift far enough apart to
wreck the game.
Replace cfs_rq::min_vruntime with cfs_rq::zero_vruntime which is kept
near/at avg_vruntime, following its motion.
The down-side is that this requires computing the avg more often.
Fixes: 147f3efaa241 ("sched/fair: Implement an EEVDF-like scheduling policy")
Reported-by: Zicheng Qu <quzicheng(a)huawei.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Link: https://patch.msgid.link/20251106111741.GC4068168@noisy.programming.kicks-a…
Cc: stable(a)vger.kernel.org
---
kernel/sched/debug.c | 8 +--
kernel/sched/fair.c | 114 +++++++++---------------------------------
kernel/sched/sched.h | 4 +-
3 files changed, 31 insertions(+), 95 deletions(-)
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 02e16b7..41caa22 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -796,7 +796,7 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
{
- s64 left_vruntime = -1, min_vruntime, right_vruntime = -1, left_deadline = -1, spread;
+ s64 left_vruntime = -1, zero_vruntime, right_vruntime = -1, left_deadline = -1, spread;
struct sched_entity *last, *first, *root;
struct rq *rq = cpu_rq(cpu);
unsigned long flags;
@@ -819,15 +819,15 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
last = __pick_last_entity(cfs_rq);
if (last)
right_vruntime = last->vruntime;
- min_vruntime = cfs_rq->min_vruntime;
+ zero_vruntime = cfs_rq->zero_vruntime;
raw_spin_rq_unlock_irqrestore(rq, flags);
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "left_deadline",
SPLIT_NS(left_deadline));
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "left_vruntime",
SPLIT_NS(left_vruntime));
- SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "min_vruntime",
- SPLIT_NS(min_vruntime));
+ SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "zero_vruntime",
+ SPLIT_NS(zero_vruntime));
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "avg_vruntime",
SPLIT_NS(avg_vruntime(cfs_rq)));
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "right_vruntime",
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4a11a83..8d971d4 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -554,7 +554,7 @@ static inline bool entity_before(const struct sched_entity *a,
static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
- return (s64)(se->vruntime - cfs_rq->min_vruntime);
+ return (s64)(se->vruntime - cfs_rq->zero_vruntime);
}
#define __node_2_se(node) \
@@ -606,13 +606,13 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
*
* Which we track using:
*
- * v0 := cfs_rq->min_vruntime
+ * v0 := cfs_rq->zero_vruntime
* \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime
* \Sum w_i := cfs_rq->avg_load
*
- * Since min_vruntime is a monotonic increasing variable that closely tracks
- * the per-task service, these deltas: (v_i - v), will be in the order of the
- * maximal (virtual) lag induced in the system due to quantisation.
+ * Since zero_vruntime closely tracks the per-task service, these
+ * deltas: (v_i - v), will be in the order of the maximal (virtual) lag
+ * induced in the system due to quantisation.
*
* Also, we use scale_load_down() to reduce the size.
*
@@ -671,7 +671,7 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq)
avg = div_s64(avg, load);
}
- return cfs_rq->min_vruntime + avg;
+ return cfs_rq->zero_vruntime + avg;
}
/*
@@ -732,7 +732,7 @@ static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime)
load += weight;
}
- return avg >= (s64)(vruntime - cfs_rq->min_vruntime) * load;
+ return avg >= (s64)(vruntime - cfs_rq->zero_vruntime) * load;
}
int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -740,42 +740,14 @@ int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se)
return vruntime_eligible(cfs_rq, se->vruntime);
}
-static u64 __update_min_vruntime(struct cfs_rq *cfs_rq, u64 vruntime)
+static void update_zero_vruntime(struct cfs_rq *cfs_rq)
{
- u64 min_vruntime = cfs_rq->min_vruntime;
- /*
- * open coded max_vruntime() to allow updating avg_vruntime
- */
- s64 delta = (s64)(vruntime - min_vruntime);
- if (delta > 0) {
- avg_vruntime_update(cfs_rq, delta);
- min_vruntime = vruntime;
- }
- return min_vruntime;
-}
+ u64 vruntime = avg_vruntime(cfs_rq);
+ s64 delta = (s64)(vruntime - cfs_rq->zero_vruntime);
-static void update_min_vruntime(struct cfs_rq *cfs_rq)
-{
- struct sched_entity *se = __pick_root_entity(cfs_rq);
- struct sched_entity *curr = cfs_rq->curr;
- u64 vruntime = cfs_rq->min_vruntime;
-
- if (curr) {
- if (curr->on_rq)
- vruntime = curr->vruntime;
- else
- curr = NULL;
- }
+ avg_vruntime_update(cfs_rq, delta);
- if (se) {
- if (!curr)
- vruntime = se->min_vruntime;
- else
- vruntime = min_vruntime(vruntime, se->min_vruntime);
- }
-
- /* ensure we never gain time by being placed backwards. */
- cfs_rq->min_vruntime = __update_min_vruntime(cfs_rq, vruntime);
+ cfs_rq->zero_vruntime = vruntime;
}
static inline u64 cfs_rq_min_slice(struct cfs_rq *cfs_rq)
@@ -848,6 +820,7 @@ RB_DECLARE_CALLBACKS(static, min_vruntime_cb, struct sched_entity,
static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
avg_vruntime_add(cfs_rq, se);
+ update_zero_vruntime(cfs_rq);
se->min_vruntime = se->vruntime;
se->min_slice = se->slice;
rb_add_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
@@ -859,6 +832,7 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
rb_erase_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
&min_vruntime_cb);
avg_vruntime_sub(cfs_rq, se);
+ update_zero_vruntime(cfs_rq);
}
struct sched_entity *__pick_root_entity(struct cfs_rq *cfs_rq)
@@ -1226,7 +1200,6 @@ static void update_curr(struct cfs_rq *cfs_rq)
curr->vruntime += calc_delta_fair(delta_exec, curr);
resched = update_deadline(cfs_rq, curr);
- update_min_vruntime(cfs_rq);
if (entity_is_task(curr)) {
/*
@@ -3808,15 +3781,6 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
if (!curr)
__enqueue_entity(cfs_rq, se);
cfs_rq->nr_queued++;
-
- /*
- * The entity's vruntime has been adjusted, so let's check
- * whether the rq-wide min_vruntime needs updated too. Since
- * the calculations above require stable min_vruntime rather
- * than up-to-date one, we do the update at the end of the
- * reweight process.
- */
- update_min_vruntime(cfs_rq);
}
}
@@ -5429,15 +5393,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
update_cfs_group(se);
- /*
- * Now advance min_vruntime if @se was the entity holding it back,
- * except when: DEQUEUE_SAVE && !DEQUEUE_MOVE, in this case we'll be
- * put back on, and if we advance min_vruntime, we'll be placed back
- * further than we started -- i.e. we'll be penalized.
- */
- if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) != DEQUEUE_SAVE)
- update_min_vruntime(cfs_rq);
-
if (flags & DEQUEUE_DELAYED)
finish_delayed_dequeue_entity(se);
@@ -9015,7 +8970,6 @@ static void yield_task_fair(struct rq *rq)
if (entity_eligible(cfs_rq, se)) {
se->vruntime = se->deadline;
se->deadline += calc_delta_fair(se->slice, se);
- update_min_vruntime(cfs_rq);
}
}
@@ -13078,23 +13032,6 @@ static inline void task_tick_core(struct rq *rq, struct task_struct *curr)
* Which shows that S and s_i transform alike (which makes perfect sense
* given that S is basically the (weighted) average of s_i).
*
- * Then:
- *
- * x -> s_min := min{s_i} (8)
- *
- * to obtain:
- *
- * \Sum_i w_i (s_i - s_min)
- * S = s_min + ------------------------ (9)
- * \Sum_i w_i
- *
- * Which already looks familiar, and is the basis for our current
- * approximation:
- *
- * S ~= s_min (10)
- *
- * Now, obviously, (10) is absolute crap :-), but it sorta works.
- *
* So the thing to remember is that the above is strictly UP. It is
* possible to generalize to multiple runqueues -- however it gets really
* yuck when you have to add affinity support, as illustrated by our very
@@ -13116,23 +13053,23 @@ static inline void task_tick_core(struct rq *rq, struct task_struct *curr)
* Let, for our runqueue 'k':
*
* T_k = \Sum_i w_i s_i
- * W_k = \Sum_i w_i ; for all i of k (11)
+ * W_k = \Sum_i w_i ; for all i of k (8)
*
* Then we can write (6) like:
*
* T_k
- * S_k = --- (12)
+ * S_k = --- (9)
* W_k
*
* From which immediately follows that:
*
* T_k + T_l
- * S_k+l = --------- (13)
+ * S_k+l = --------- (10)
* W_k + W_l
*
* On which we can define a combined lag:
*
- * lag_k+l(i) := S_k+l - s_i (14)
+ * lag_k+l(i) := S_k+l - s_i (11)
*
* And that gives us the tools to compare tasks across a combined runqueue.
*
@@ -13143,7 +13080,7 @@ static inline void task_tick_core(struct rq *rq, struct task_struct *curr)
* using (7); this only requires storing single 'time'-stamps.
*
* b) when comparing tasks between 2 runqueues of which one is forced-idle,
- * compare the combined lag, per (14).
+ * compare the combined lag, per (11).
*
* Now, of course cgroups (I so hate them) make this more interesting in
* that a) seems to suggest we need to iterate all cgroup on a CPU at such
@@ -13191,12 +13128,11 @@ static inline void task_tick_core(struct rq *rq, struct task_struct *curr)
* every tick. This limits the observed divergence due to the work
* conservancy.
*
- * On top of that, we can improve upon things by moving away from our
- * horrible (10) hack and moving to (9) and employing (13) here.
+ * On top of that, we can improve upon things by employing (10) here.
*/
/*
- * se_fi_update - Update the cfs_rq->min_vruntime_fi in a CFS hierarchy if needed.
+ * se_fi_update - Update the cfs_rq->zero_vruntime_fi in a CFS hierarchy if needed.
*/
static void se_fi_update(const struct sched_entity *se, unsigned int fi_seq,
bool forceidle)
@@ -13210,7 +13146,7 @@ static void se_fi_update(const struct sched_entity *se, unsigned int fi_seq,
cfs_rq->forceidle_seq = fi_seq;
}
- cfs_rq->min_vruntime_fi = cfs_rq->min_vruntime;
+ cfs_rq->zero_vruntime_fi = cfs_rq->zero_vruntime;
}
}
@@ -13263,11 +13199,11 @@ bool cfs_prio_less(const struct task_struct *a, const struct task_struct *b,
/*
* Find delta after normalizing se's vruntime with its cfs_rq's
- * min_vruntime_fi, which would have been updated in prior calls
+ * zero_vruntime_fi, which would have been updated in prior calls
* to se_fi_update().
*/
delta = (s64)(sea->vruntime - seb->vruntime) +
- (s64)(cfs_rqb->min_vruntime_fi - cfs_rqa->min_vruntime_fi);
+ (s64)(cfs_rqb->zero_vruntime_fi - cfs_rqa->zero_vruntime_fi);
return delta > 0;
}
@@ -13513,7 +13449,7 @@ static void set_next_task_fair(struct rq *rq, struct task_struct *p, bool first)
void init_cfs_rq(struct cfs_rq *cfs_rq)
{
cfs_rq->tasks_timeline = RB_ROOT_CACHED;
- cfs_rq->min_vruntime = (u64)(-(1LL << 20));
+ cfs_rq->zero_vruntime = (u64)(-(1LL << 20));
raw_spin_lock_init(&cfs_rq->removed.lock);
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 82e74e8..5a3cf81 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -681,10 +681,10 @@ struct cfs_rq {
s64 avg_vruntime;
u64 avg_load;
- u64 min_vruntime;
+ u64 zero_vruntime;
#ifdef CONFIG_SCHED_CORE
unsigned int forceidle_seq;
- u64 min_vruntime_fi;
+ u64 zero_vruntime_fi;
#endif
struct rb_root_cached tasks_timeline;
This reverts commit 8d3bf19f1b585a3cc0027f508b64c33484db8d0d.
While this fake hotplugging was a nice idea, it has shown that this feature
does not handle PCIe switches correctly:
pci_bus 0004:43: busn_res: can not insert [bus 43-41] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci_bus 0004:43: busn_res: [bus 43-41] end is updated to 43
pci_bus 0004:43: busn_res: can not insert [bus 43] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci 0004:42:00.0: devices behind bridge are unusable because [bus 43] cannot be assigned for them
pci_bus 0004:44: busn_res: can not insert [bus 44-41] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci_bus 0004:44: busn_res: [bus 44-41] end is updated to 44
pci_bus 0004:44: busn_res: can not insert [bus 44] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci 0004:42:02.0: devices behind bridge are unusable because [bus 44] cannot be assigned for them
pci_bus 0004:45: busn_res: can not insert [bus 45-41] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci_bus 0004:45: busn_res: [bus 45-41] end is updated to 45
pci_bus 0004:45: busn_res: can not insert [bus 45] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci 0004:42:06.0: devices behind bridge are unusable because [bus 45] cannot be assigned for them
pci_bus 0004:46: busn_res: can not insert [bus 46-41] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci_bus 0004:46: busn_res: [bus 46-41] end is updated to 46
pci_bus 0004:46: busn_res: can not insert [bus 46] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci 0004:42:0e.0: devices behind bridge are unusable because [bus 46] cannot be assigned for them
pci_bus 0004:42: busn_res: [bus 42-41] end is updated to 46
pci_bus 0004:42: busn_res: can not insert [bus 42-46] under [bus 41] (conflicts with (null) [bus 41])
pci 0004:41:00.0: devices behind bridge are unusable because [bus 42-46] cannot be assigned for them
pcieport 0004:40:00.0: bridge has subordinate 41 but max busn 46
During the initial scan, PCI core doesn't see the switch and since the Root
Port is not hot plug capable, the secondary bus number gets assigned as the
subordinate bus number. This means, the PCI core assumes that only one bus
will appear behind the Root Port since the Root Port is not hot plug
capable.
This works perfectly fine for PCIe endpoints connected to the Root Port,
since they don't extend the bus. However, if a PCIe switch is connected,
then there is a problem when the downstream busses starts showing up and
the PCI core doesn't extend the subordinate bus number after initial scan
during boot.
The long term plan is to migrate this driver to the pwrctrl framework,
once it adds proper support for powering up and enumerating PCIe switches.
Cc: stable(a)vger.kernel.org
Suggested-by: Manivannan Sadhasivam <mani(a)kernel.org>
Signed-off-by: Niklas Cassel <cassel(a)kernel.org>
---
drivers/pci/controller/dwc/pcie-designware-host.c | 10 ++--------
drivers/pci/controller/dwc/pcie-designware.h | 1 -
2 files changed, 2 insertions(+), 9 deletions(-)
diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c
index e92513c5bda5..f7e13dc16653 100644
--- a/drivers/pci/controller/dwc/pcie-designware-host.c
+++ b/drivers/pci/controller/dwc/pcie-designware-host.c
@@ -664,14 +664,8 @@ int dw_pcie_host_init(struct dw_pcie_rp *pp)
goto err_remove_edma;
}
- /*
- * Note: Skip the link up delay only when a Link Up IRQ is present.
- * If there is no Link Up IRQ, we should not bypass the delay
- * because that would require users to manually rescan for devices.
- */
- if (!pp->use_linkup_irq)
- /* Ignore errors, the link may come up later */
- dw_pcie_wait_for_link(pci);
+ /* Ignore errors, the link may come up later */
+ dw_pcie_wait_for_link(pci);
ret = pci_host_probe(bridge);
if (ret)
diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
index e995f692a1ec..640827e9d093 100644
--- a/drivers/pci/controller/dwc/pcie-designware.h
+++ b/drivers/pci/controller/dwc/pcie-designware.h
@@ -426,7 +426,6 @@ struct dw_pcie_rp {
bool use_atu_msg;
int msg_atu_index;
struct resource *msg_res;
- bool use_linkup_irq;
struct pci_eq_presets presets;
struct pci_config_window *cfg;
bool ecam_enabled;
--
2.51.1
This reverts commit 4581403f67929d02c197cb187c4e1e811c9e762a.
While this fake hotplugging was a nice idea, it has shown that this feature
does not handle PCIe switches correctly:
pci_bus 0004:43: busn_res: can not insert [bus 43-41] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci_bus 0004:43: busn_res: [bus 43-41] end is updated to 43
pci_bus 0004:43: busn_res: can not insert [bus 43] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci 0004:42:00.0: devices behind bridge are unusable because [bus 43] cannot be assigned for them
pci_bus 0004:44: busn_res: can not insert [bus 44-41] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci_bus 0004:44: busn_res: [bus 44-41] end is updated to 44
pci_bus 0004:44: busn_res: can not insert [bus 44] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci 0004:42:02.0: devices behind bridge are unusable because [bus 44] cannot be assigned for them
pci_bus 0004:45: busn_res: can not insert [bus 45-41] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci_bus 0004:45: busn_res: [bus 45-41] end is updated to 45
pci_bus 0004:45: busn_res: can not insert [bus 45] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci 0004:42:06.0: devices behind bridge are unusable because [bus 45] cannot be assigned for them
pci_bus 0004:46: busn_res: can not insert [bus 46-41] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci_bus 0004:46: busn_res: [bus 46-41] end is updated to 46
pci_bus 0004:46: busn_res: can not insert [bus 46] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci 0004:42:0e.0: devices behind bridge are unusable because [bus 46] cannot be assigned for them
pci_bus 0004:42: busn_res: [bus 42-41] end is updated to 46
pci_bus 0004:42: busn_res: can not insert [bus 42-46] under [bus 41] (conflicts with (null) [bus 41])
pci 0004:41:00.0: devices behind bridge are unusable because [bus 42-46] cannot be assigned for them
pcieport 0004:40:00.0: bridge has subordinate 41 but max busn 46
During the initial scan, PCI core doesn't see the switch and since the Root
Port is not hot plug capable, the secondary bus number gets assigned as the
subordinate bus number. This means, the PCI core assumes that only one bus
will appear behind the Root Port since the Root Port is not hot plug
capable.
This works perfectly fine for PCIe endpoints connected to the Root Port,
since they don't extend the bus. However, if a PCIe switch is connected,
then there is a problem when the downstream busses starts showing up and
the PCI core doesn't extend the subordinate bus number after initial scan
during boot.
The long term plan is to migrate this driver to the pwrctrl framework,
once it adds proper support for powering up and enumerating PCIe switches.
Cc: stable(a)vger.kernel.org
Suggested-by: Manivannan Sadhasivam <mani(a)kernel.org>
Signed-off-by: Niklas Cassel <cassel(a)kernel.org>
---
drivers/pci/controller/dwc/pcie-qcom.c | 58 +-------------------------
1 file changed, 1 insertion(+), 57 deletions(-)
diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
index 28f5f7acb92a..b10e8adc79bb 100644
--- a/drivers/pci/controller/dwc/pcie-qcom.c
+++ b/drivers/pci/controller/dwc/pcie-qcom.c
@@ -55,9 +55,6 @@
#define PARF_AXI_MSTR_WR_ADDR_HALT_V2 0x1a8
#define PARF_Q2A_FLUSH 0x1ac
#define PARF_LTSSM 0x1b0
-#define PARF_INT_ALL_STATUS 0x224
-#define PARF_INT_ALL_CLEAR 0x228
-#define PARF_INT_ALL_MASK 0x22c
#define PARF_SID_OFFSET 0x234
#define PARF_BDF_TRANSLATE_CFG 0x24c
#define PARF_DBI_BASE_ADDR_V2 0x350
@@ -134,9 +131,6 @@
/* PARF_LTSSM register fields */
#define LTSSM_EN BIT(8)
-/* PARF_INT_ALL_{STATUS/CLEAR/MASK} register fields */
-#define PARF_INT_ALL_LINK_UP BIT(13)
-
/* PARF_NO_SNOOP_OVERRIDE register fields */
#define WR_NO_SNOOP_OVERRIDE_EN BIT(1)
#define RD_NO_SNOOP_OVERRIDE_EN BIT(3)
@@ -1604,32 +1598,6 @@ static void qcom_pcie_init_debugfs(struct qcom_pcie *pcie)
qcom_pcie_link_transition_count);
}
-static irqreturn_t qcom_pcie_global_irq_thread(int irq, void *data)
-{
- struct qcom_pcie *pcie = data;
- struct dw_pcie_rp *pp = &pcie->pci->pp;
- struct device *dev = pcie->pci->dev;
- u32 status = readl_relaxed(pcie->parf + PARF_INT_ALL_STATUS);
-
- writel_relaxed(status, pcie->parf + PARF_INT_ALL_CLEAR);
-
- if (FIELD_GET(PARF_INT_ALL_LINK_UP, status)) {
- msleep(PCIE_RESET_CONFIG_WAIT_MS);
- dev_dbg(dev, "Received Link up event. Starting enumeration!\n");
- /* Rescan the bus to enumerate endpoint devices */
- pci_lock_rescan_remove();
- pci_rescan_bus(pp->bridge->bus);
- pci_unlock_rescan_remove();
-
- qcom_pcie_icc_opp_update(pcie);
- } else {
- dev_WARN_ONCE(dev, 1, "Received unknown event. INT_STATUS: 0x%08x\n",
- status);
- }
-
- return IRQ_HANDLED;
-}
-
static void qcom_pci_free_msi(void *ptr)
{
struct dw_pcie_rp *pp = (struct dw_pcie_rp *)ptr;
@@ -1774,8 +1742,7 @@ static int qcom_pcie_probe(struct platform_device *pdev)
struct dw_pcie_rp *pp;
struct resource *res;
struct dw_pcie *pci;
- int ret, irq;
- char *name;
+ int ret;
pcie_cfg = of_device_get_match_data(dev);
if (!pcie_cfg) {
@@ -1932,27 +1899,6 @@ static int qcom_pcie_probe(struct platform_device *pdev)
goto err_phy_exit;
}
- name = devm_kasprintf(dev, GFP_KERNEL, "qcom_pcie_global_irq%d",
- pci_domain_nr(pp->bridge->bus));
- if (!name) {
- ret = -ENOMEM;
- goto err_host_deinit;
- }
-
- irq = platform_get_irq_byname_optional(pdev, "global");
- if (irq > 0) {
- ret = devm_request_threaded_irq(&pdev->dev, irq, NULL,
- qcom_pcie_global_irq_thread,
- IRQF_ONESHOT, name, pcie);
- if (ret) {
- dev_err_probe(&pdev->dev, ret,
- "Failed to request Global IRQ\n");
- goto err_host_deinit;
- }
-
- writel_relaxed(PARF_INT_ALL_LINK_UP, pcie->parf + PARF_INT_ALL_MASK);
- }
-
qcom_pcie_icc_opp_update(pcie);
if (pcie->mhi)
@@ -1960,8 +1906,6 @@ static int qcom_pcie_probe(struct platform_device *pdev)
return 0;
-err_host_deinit:
- dw_pcie_host_deinit(pp);
err_phy_exit:
list_for_each_entry_safe(port, tmp, &pcie->ports, list) {
phy_exit(port->phy);
--
2.51.1
This reverts commit ba4a2e2317b9faeca9193ed6d3193ddc3cf2aba3.
While this fake hotplugging was a nice idea, it has shown that this feature
does not handle PCIe switches correctly:
pci_bus 0004:43: busn_res: can not insert [bus 43-41] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci_bus 0004:43: busn_res: [bus 43-41] end is updated to 43
pci_bus 0004:43: busn_res: can not insert [bus 43] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci 0004:42:00.0: devices behind bridge are unusable because [bus 43] cannot be assigned for them
pci_bus 0004:44: busn_res: can not insert [bus 44-41] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci_bus 0004:44: busn_res: [bus 44-41] end is updated to 44
pci_bus 0004:44: busn_res: can not insert [bus 44] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci 0004:42:02.0: devices behind bridge are unusable because [bus 44] cannot be assigned for them
pci_bus 0004:45: busn_res: can not insert [bus 45-41] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci_bus 0004:45: busn_res: [bus 45-41] end is updated to 45
pci_bus 0004:45: busn_res: can not insert [bus 45] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci 0004:42:06.0: devices behind bridge are unusable because [bus 45] cannot be assigned for them
pci_bus 0004:46: busn_res: can not insert [bus 46-41] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci_bus 0004:46: busn_res: [bus 46-41] end is updated to 46
pci_bus 0004:46: busn_res: can not insert [bus 46] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci 0004:42:0e.0: devices behind bridge are unusable because [bus 46] cannot be assigned for them
pci_bus 0004:42: busn_res: [bus 42-41] end is updated to 46
pci_bus 0004:42: busn_res: can not insert [bus 42-46] under [bus 41] (conflicts with (null) [bus 41])
pci 0004:41:00.0: devices behind bridge are unusable because [bus 42-46] cannot be assigned for them
pcieport 0004:40:00.0: bridge has subordinate 41 but max busn 46
During the initial scan, PCI core doesn't see the switch and since the Root
Port is not hot plug capable, the secondary bus number gets assigned as the
subordinate bus number. This means, the PCI core assumes that only one bus
will appear behind the Root Port since the Root Port is not hot plug
capable.
This works perfectly fine for PCIe endpoints connected to the Root Port,
since they don't extend the bus. However, if a PCIe switch is connected,
then there is a problem when the downstream busses starts showing up and
the PCI core doesn't extend the subordinate bus number after initial scan
during boot.
The long term plan is to migrate this driver to the pwrctrl framework,
once it adds proper support for powering up and enumerating PCIe switches.
Cc: stable(a)vger.kernel.org
Suggested-by: Manivannan Sadhasivam <mani(a)kernel.org>
Signed-off-by: Niklas Cassel <cassel(a)kernel.org>
---
drivers/pci/controller/dwc/pcie-qcom.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
index 70c0ae8b7523..28f5f7acb92a 100644
--- a/drivers/pci/controller/dwc/pcie-qcom.c
+++ b/drivers/pci/controller/dwc/pcie-qcom.c
@@ -136,7 +136,6 @@
/* PARF_INT_ALL_{STATUS/CLEAR/MASK} register fields */
#define PARF_INT_ALL_LINK_UP BIT(13)
-#define PARF_INT_MSI_DEV_0_7 GENMASK(30, 23)
/* PARF_NO_SNOOP_OVERRIDE register fields */
#define WR_NO_SNOOP_OVERRIDE_EN BIT(1)
@@ -1951,8 +1950,7 @@ static int qcom_pcie_probe(struct platform_device *pdev)
goto err_host_deinit;
}
- writel_relaxed(PARF_INT_ALL_LINK_UP | PARF_INT_MSI_DEV_0_7,
- pcie->parf + PARF_INT_ALL_MASK);
+ writel_relaxed(PARF_INT_ALL_LINK_UP, pcie->parf + PARF_INT_ALL_MASK);
}
qcom_pcie_icc_opp_update(pcie);
--
2.51.1
This reverts commit 36971d6c5a9a134c15760ae9fd13c6d5f9a36abb.
While this fake hotplugging was a nice idea, it has shown that this feature
does not handle PCIe switches correctly:
pci_bus 0004:43: busn_res: can not insert [bus 43-41] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci_bus 0004:43: busn_res: [bus 43-41] end is updated to 43
pci_bus 0004:43: busn_res: can not insert [bus 43] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci 0004:42:00.0: devices behind bridge are unusable because [bus 43] cannot be assigned for them
pci_bus 0004:44: busn_res: can not insert [bus 44-41] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci_bus 0004:44: busn_res: [bus 44-41] end is updated to 44
pci_bus 0004:44: busn_res: can not insert [bus 44] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci 0004:42:02.0: devices behind bridge are unusable because [bus 44] cannot be assigned for them
pci_bus 0004:45: busn_res: can not insert [bus 45-41] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci_bus 0004:45: busn_res: [bus 45-41] end is updated to 45
pci_bus 0004:45: busn_res: can not insert [bus 45] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci 0004:42:06.0: devices behind bridge are unusable because [bus 45] cannot be assigned for them
pci_bus 0004:46: busn_res: can not insert [bus 46-41] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci_bus 0004:46: busn_res: [bus 46-41] end is updated to 46
pci_bus 0004:46: busn_res: can not insert [bus 46] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci 0004:42:0e.0: devices behind bridge are unusable because [bus 46] cannot be assigned for them
pci_bus 0004:42: busn_res: [bus 42-41] end is updated to 46
pci_bus 0004:42: busn_res: can not insert [bus 42-46] under [bus 41] (conflicts with (null) [bus 41])
pci 0004:41:00.0: devices behind bridge are unusable because [bus 42-46] cannot be assigned for them
pcieport 0004:40:00.0: bridge has subordinate 41 but max busn 46
During the initial scan, PCI core doesn't see the switch and since the Root
Port is not hot plug capable, the secondary bus number gets assigned as the
subordinate bus number. This means, the PCI core assumes that only one bus
will appear behind the Root Port since the Root Port is not hot plug
capable.
This works perfectly fine for PCIe endpoints connected to the Root Port,
since they don't extend the bus. However, if a PCIe switch is connected,
then there is a problem when the downstream busses starts showing up and
the PCI core doesn't extend the subordinate bus number after initial scan
during boot.
The long term plan is to migrate this driver to the pwrctrl framework,
once it adds proper support for powering up and enumerating PCIe switches.
Cc: stable(a)vger.kernel.org
Suggested-by: Manivannan Sadhasivam <mani(a)kernel.org>
Signed-off-by: Niklas Cassel <cassel(a)kernel.org>
---
drivers/pci/controller/dwc/pcie-qcom.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
index c48a20602d7f..70c0ae8b7523 100644
--- a/drivers/pci/controller/dwc/pcie-qcom.c
+++ b/drivers/pci/controller/dwc/pcie-qcom.c
@@ -1927,10 +1927,6 @@ static int qcom_pcie_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, pcie);
- irq = platform_get_irq_byname_optional(pdev, "global");
- if (irq > 0)
- pp->use_linkup_irq = true;
-
ret = dw_pcie_host_init(pp);
if (ret) {
dev_err(dev, "cannot initialize host\n");
@@ -1944,6 +1940,7 @@ static int qcom_pcie_probe(struct platform_device *pdev)
goto err_host_deinit;
}
+ irq = platform_get_irq_byname_optional(pdev, "global");
if (irq > 0) {
ret = devm_request_threaded_irq(&pdev->dev, irq, NULL,
qcom_pcie_global_irq_thread,
--
2.51.1
This reverts commit 0e0b45ab5d770a748487ba0ae8f77d1fb0f0de3e.
While this fake hotplugging was a nice idea, it has shown that this feature
does not handle PCIe switches correctly:
pci_bus 0004:43: busn_res: can not insert [bus 43-41] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci_bus 0004:43: busn_res: [bus 43-41] end is updated to 43
pci_bus 0004:43: busn_res: can not insert [bus 43] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci 0004:42:00.0: devices behind bridge are unusable because [bus 43] cannot be assigned for them
pci_bus 0004:44: busn_res: can not insert [bus 44-41] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci_bus 0004:44: busn_res: [bus 44-41] end is updated to 44
pci_bus 0004:44: busn_res: can not insert [bus 44] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci 0004:42:02.0: devices behind bridge are unusable because [bus 44] cannot be assigned for them
pci_bus 0004:45: busn_res: can not insert [bus 45-41] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci_bus 0004:45: busn_res: [bus 45-41] end is updated to 45
pci_bus 0004:45: busn_res: can not insert [bus 45] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci 0004:42:06.0: devices behind bridge are unusable because [bus 45] cannot be assigned for them
pci_bus 0004:46: busn_res: can not insert [bus 46-41] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci_bus 0004:46: busn_res: [bus 46-41] end is updated to 46
pci_bus 0004:46: busn_res: can not insert [bus 46] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci 0004:42:0e.0: devices behind bridge are unusable because [bus 46] cannot be assigned for them
pci_bus 0004:42: busn_res: [bus 42-41] end is updated to 46
pci_bus 0004:42: busn_res: can not insert [bus 42-46] under [bus 41] (conflicts with (null) [bus 41])
pci 0004:41:00.0: devices behind bridge are unusable because [bus 42-46] cannot be assigned for them
pcieport 0004:40:00.0: bridge has subordinate 41 but max busn 46
During the initial scan, PCI core doesn't see the switch and since the Root
Port is not hot plug capable, the secondary bus number gets assigned as the
subordinate bus number. This means, the PCI core assumes that only one bus
will appear behind the Root Port since the Root Port is not hot plug
capable.
This works perfectly fine for PCIe endpoints connected to the Root Port,
since they don't extend the bus. However, if a PCIe switch is connected,
then there is a problem when the downstream busses starts showing up and
the PCI core doesn't extend the subordinate bus number after initial scan
during boot.
The long term plan is to migrate this driver to the pwrctrl framework,
once it adds proper support for powering up and enumerating PCIe switches.
Cc: stable(a)vger.kernel.org
Suggested-by: Manivannan Sadhasivam <mani(a)kernel.org>
Signed-off-by: Niklas Cassel <cassel(a)kernel.org>
---
drivers/pci/controller/dwc/pcie-dw-rockchip.c | 59 +------------------
1 file changed, 3 insertions(+), 56 deletions(-)
diff --git a/drivers/pci/controller/dwc/pcie-dw-rockchip.c b/drivers/pci/controller/dwc/pcie-dw-rockchip.c
index 07378ececd88..7eceec8c9c83 100644
--- a/drivers/pci/controller/dwc/pcie-dw-rockchip.c
+++ b/drivers/pci/controller/dwc/pcie-dw-rockchip.c
@@ -448,34 +448,6 @@ static const struct dw_pcie_ops dw_pcie_ops = {
.stop_link = rockchip_pcie_stop_link,
};
-static irqreturn_t rockchip_pcie_rc_sys_irq_thread(int irq, void *arg)
-{
- struct rockchip_pcie *rockchip = arg;
- struct dw_pcie *pci = &rockchip->pci;
- struct dw_pcie_rp *pp = &pci->pp;
- struct device *dev = pci->dev;
- u32 reg;
-
- reg = rockchip_pcie_readl_apb(rockchip, PCIE_CLIENT_INTR_STATUS_MISC);
- rockchip_pcie_writel_apb(rockchip, reg, PCIE_CLIENT_INTR_STATUS_MISC);
-
- dev_dbg(dev, "PCIE_CLIENT_INTR_STATUS_MISC: %#x\n", reg);
- dev_dbg(dev, "LTSSM_STATUS: %#x\n", rockchip_pcie_get_ltssm(rockchip));
-
- if (reg & PCIE_RDLH_LINK_UP_CHGED) {
- if (rockchip_pcie_link_up(pci)) {
- msleep(PCIE_RESET_CONFIG_WAIT_MS);
- dev_dbg(dev, "Received Link up event. Starting enumeration!\n");
- /* Rescan the bus to enumerate endpoint devices */
- pci_lock_rescan_remove();
- pci_rescan_bus(pp->bridge->bus);
- pci_unlock_rescan_remove();
- }
- }
-
- return IRQ_HANDLED;
-}
-
static irqreturn_t rockchip_pcie_ep_sys_irq_thread(int irq, void *arg)
{
struct rockchip_pcie *rockchip = arg;
@@ -508,29 +480,14 @@ static irqreturn_t rockchip_pcie_ep_sys_irq_thread(int irq, void *arg)
return IRQ_HANDLED;
}
-static int rockchip_pcie_configure_rc(struct platform_device *pdev,
- struct rockchip_pcie *rockchip)
+static int rockchip_pcie_configure_rc(struct rockchip_pcie *rockchip)
{
- struct device *dev = &pdev->dev;
struct dw_pcie_rp *pp;
- int irq, ret;
u32 val;
if (!IS_ENABLED(CONFIG_PCIE_ROCKCHIP_DW_HOST))
return -ENODEV;
- irq = platform_get_irq_byname(pdev, "sys");
- if (irq < 0)
- return irq;
-
- ret = devm_request_threaded_irq(dev, irq, NULL,
- rockchip_pcie_rc_sys_irq_thread,
- IRQF_ONESHOT, "pcie-sys-rc", rockchip);
- if (ret) {
- dev_err(dev, "failed to request PCIe sys IRQ\n");
- return ret;
- }
-
/* LTSSM enable control mode */
val = FIELD_PREP_WM16(PCIE_LTSSM_ENABLE_ENHANCE, 1);
rockchip_pcie_writel_apb(rockchip, val, PCIE_CLIENT_HOT_RESET_CTRL);
@@ -542,17 +499,7 @@ static int rockchip_pcie_configure_rc(struct platform_device *pdev,
pp = &rockchip->pci.pp;
pp->ops = &rockchip_pcie_host_ops;
- ret = dw_pcie_host_init(pp);
- if (ret) {
- dev_err(dev, "failed to initialize host\n");
- return ret;
- }
-
- /* unmask DLL up/down indicator */
- val = FIELD_PREP_WM16(PCIE_RDLH_LINK_UP_CHGED, 0);
- rockchip_pcie_writel_apb(rockchip, val, PCIE_CLIENT_INTR_MASK_MISC);
-
- return ret;
+ return dw_pcie_host_init(pp);
}
static int rockchip_pcie_configure_ep(struct platform_device *pdev,
@@ -678,7 +625,7 @@ static int rockchip_pcie_probe(struct platform_device *pdev)
switch (data->mode) {
case DW_PCIE_RC_TYPE:
- ret = rockchip_pcie_configure_rc(pdev, rockchip);
+ ret = rockchip_pcie_configure_rc(rockchip);
if (ret)
goto deinit_clk;
break;
--
2.51.1
This reverts commit ec9fd499b9c60a187ac8d6414c3c343c77d32e42.
While this fake hotplugging was a nice idea, it has shown that this feature
does not handle PCIe switches correctly:
pci_bus 0004:43: busn_res: can not insert [bus 43-41] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci_bus 0004:43: busn_res: [bus 43-41] end is updated to 43
pci_bus 0004:43: busn_res: can not insert [bus 43] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci 0004:42:00.0: devices behind bridge are unusable because [bus 43] cannot be assigned for them
pci_bus 0004:44: busn_res: can not insert [bus 44-41] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci_bus 0004:44: busn_res: [bus 44-41] end is updated to 44
pci_bus 0004:44: busn_res: can not insert [bus 44] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci 0004:42:02.0: devices behind bridge are unusable because [bus 44] cannot be assigned for them
pci_bus 0004:45: busn_res: can not insert [bus 45-41] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci_bus 0004:45: busn_res: [bus 45-41] end is updated to 45
pci_bus 0004:45: busn_res: can not insert [bus 45] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci 0004:42:06.0: devices behind bridge are unusable because [bus 45] cannot be assigned for them
pci_bus 0004:46: busn_res: can not insert [bus 46-41] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci_bus 0004:46: busn_res: [bus 46-41] end is updated to 46
pci_bus 0004:46: busn_res: can not insert [bus 46] under [bus 42-41] (conflicts with (null) [bus 42-41])
pci 0004:42:0e.0: devices behind bridge are unusable because [bus 46] cannot be assigned for them
pci_bus 0004:42: busn_res: [bus 42-41] end is updated to 46
pci_bus 0004:42: busn_res: can not insert [bus 42-46] under [bus 41] (conflicts with (null) [bus 41])
pci 0004:41:00.0: devices behind bridge are unusable because [bus 42-46] cannot be assigned for them
pcieport 0004:40:00.0: bridge has subordinate 41 but max busn 46
During the initial scan, PCI core doesn't see the switch and since the Root
Port is not hot plug capable, the secondary bus number gets assigned as the
subordinate bus number. This means, the PCI core assumes that only one bus
will appear behind the Root Port since the Root Port is not hot plug
capable.
This works perfectly fine for PCIe endpoints connected to the Root Port,
since they don't extend the bus. However, if a PCIe switch is connected,
then there is a problem when the downstream busses starts showing up and
the PCI core doesn't extend the subordinate bus number after initial scan
during boot.
The long term plan is to migrate this driver to the pwrctrl framework,
once it adds proper support for powering up and enumerating PCIe switches.
Cc: stable(a)vger.kernel.org
Suggested-by: Manivannan Sadhasivam <mani(a)kernel.org>
Signed-off-by: Niklas Cassel <cassel(a)kernel.org>
---
drivers/pci/controller/dwc/pcie-dw-rockchip.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/drivers/pci/controller/dwc/pcie-dw-rockchip.c b/drivers/pci/controller/dwc/pcie-dw-rockchip.c
index 3e2752c7dd09..07378ececd88 100644
--- a/drivers/pci/controller/dwc/pcie-dw-rockchip.c
+++ b/drivers/pci/controller/dwc/pcie-dw-rockchip.c
@@ -541,7 +541,6 @@ static int rockchip_pcie_configure_rc(struct platform_device *pdev,
pp = &rockchip->pci.pp;
pp->ops = &rockchip_pcie_host_ops;
- pp->use_linkup_irq = true;
ret = dw_pcie_host_init(pp);
if (ret) {
--
2.51.1
The sockmap feature allows bpf syscall from userspace, or based
on bpf sockops, replacing the sk_prot of sockets during protocol stack
processing with sockmap's custom read/write interfaces.
'''
tcp_rcv_state_process()
syn_recv_sock()/subflow_syn_recv_sock()
tcp_init_transfer(BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB)
bpf_skops_established <== sockops
bpf_sock_map_update(sk) <== call bpf helper
tcp_bpf_update_proto() <== update sk_prot
'''
When the server has MPTCP enabled but the client sends a TCP SYN
without MPTCP, subflow_syn_recv_sock() performs a fallback on the
subflow, replacing the subflow sk's sk_prot with the native sk_prot.
'''
subflow_syn_recv_sock()
subflow_ulp_fallback()
subflow_drop_ctx()
mptcp_subflow_ops_undo_override()
'''
Then, this subflow can be normally used by sockmap, which replaces the
native sk_prot with sockmap's custom sk_prot. The issue occurs when the
user executes accept::mptcp_stream_accept::mptcp_fallback_tcp_ops().
Here, it uses sk->sk_prot to compare with the native sk_prot, but this
is incorrect when sockmap is used, as we may incorrectly set
sk->sk_socket->ops.
This fix uses the more generic sk_family for the comparison instead.
Additionally, this also prevents a WARNING from occurring:
result from ./scripts/decode_stacktrace.sh:
------------[ cut here ]------------
WARNING: CPU: 0 PID: 337 at net/mptcp/protocol.c:68 mptcp_stream_accept \
(net/mptcp/protocol.c:4005)
Modules linked in:
...
PKRU: 55555554
Call Trace:
<TASK>
do_accept (net/socket.c:1989)
__sys_accept4 (net/socket.c:2028 net/socket.c:2057)
__x64_sys_accept (net/socket.c:2067)
x64_sys_call (arch/x86/entry/syscall_64.c:41)
do_syscall_64 (arch/x86/entry/syscall_64.c:63 arch/x86/entry/syscall_64.c:94)
entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
RIP: 0033:0x7f87ac92b83d
---[ end trace 0000000000000000 ]---
Fixes: cec37a6e41aa ("mptcp: Handle MP_CAPABLE options for outgoing connections")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Jiayuan Chen <jiayuan.chen(a)linux.dev>
Reviewed-by: Jakub Sitnicki <jakub(a)cloudflare.com>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
---
net/mptcp/protocol.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 2d6b8de35c44..90b4aeca2596 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -61,11 +61,13 @@ static u64 mptcp_wnd_end(const struct mptcp_sock *msk)
static const struct proto_ops *mptcp_fallback_tcp_ops(const struct sock *sk)
{
+ unsigned short family = READ_ONCE(sk->sk_family);
+
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- if (sk->sk_prot == &tcpv6_prot)
+ if (family == AF_INET6)
return &inet6_stream_ops;
#endif
- WARN_ON_ONCE(sk->sk_prot != &tcp_prot);
+ WARN_ON_ONCE(family != AF_INET);
return &inet_stream_ops;
}
--
2.43.0
The spsc_queue is an unlocked, highly asynchronous piece of
infrastructure. Its inline function spsc_queue_peek() obtains the head
entry of the queue.
This access is performed without READ_ONCE() and is, therefore,
undefined behavior. In order to prevent the compiler from ever
reordering that access, or even optimizing it away, a READ_ONCE() is
strictly necessary. This is easily proven by the fact that
spsc_queue_pop() uses this very pattern to access the head.
Add READ_ONCE() to spsc_queue_peek().
Cc: stable(a)vger.kernel.org # v4.16+
Fixes: 27105db6c63a ("drm/amdgpu: Add SPSC queue to scheduler.")
Signed-off-by: Philipp Stanner <phasta(a)kernel.org>
---
I think this makes it less broken, but I'm not even sure if it's enough
or more memory barriers or an rcu_dereference() would be correct. The
spsc_queue is, of course, not documented and the existing barrier
comments are either false or not telling.
If someone has an idea, shoot us the info. Otherwise I think this is the
right thing to do for now.
P.
---
include/drm/spsc_queue.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/drm/spsc_queue.h b/include/drm/spsc_queue.h
index ee9df8cc67b7..39bada748ffc 100644
--- a/include/drm/spsc_queue.h
+++ b/include/drm/spsc_queue.h
@@ -54,7 +54,7 @@ static inline void spsc_queue_init(struct spsc_queue *queue)
static inline struct spsc_node *spsc_queue_peek(struct spsc_queue *queue)
{
- return queue->head;
+ return READ_ONCE(queue->head);
}
static inline int spsc_queue_count(struct spsc_queue *queue)
--
2.49.0
From: Kairui Song <kasong(a)tencent.com>
This reverts commit 78524b05f1a3e16a5d00cc9c6259c41a9d6003ce.
While reviewing recent leaf entry changes, I noticed that commit
78524b05f1a3 ("mm, swap: avoid redundant swap device pinning") isn't
correct. It's true that most all callers of __read_swap_cache_async are
already holding a swap entry reference, so the repeated swap device
pinning isn't needed on the same swap device, but it is possible that
VMA readahead (swap_vma_readahead()) may encounter swap entries from a
different swap device when there are multiple swap devices, and call
__read_swap_cache_async without holding a reference to that swap device.
So it is possible to cause a UAF if swapoff of device A raced with
swapin on device B, and VMA readahead tries to read swap entries from
device A. It's not easy to trigger but in theory possible to cause real
issues. And besides, that commit made swap more vulnerable to issues
like corrupted page tables.
Just revert it. __read_swap_cache_async isn't that sensitive to
performance after all, as it's mostly used for SSD/HDD swap devices with
readahead. SYNCHRONOUS_IO devices may fallback onto it for swap count >
1 entries, but very soon we will have a new helper and routine for
such devices, so they will never touch this helper or have redundant
swap device reference overhead.
Fixes: 78524b05f1a3 ("mm, swap: avoid redundant swap device pinning")
Signed-off-by: Kairui Song <kasong(a)tencent.com>
---
mm/swap_state.c | 14 ++++++--------
mm/zswap.c | 8 +-------
2 files changed, 7 insertions(+), 15 deletions(-)
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 3f85a1c4cfd9..0c25675de977 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -406,13 +406,17 @@ struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
struct mempolicy *mpol, pgoff_t ilx, bool *new_page_allocated,
bool skip_if_exists)
{
- struct swap_info_struct *si = __swap_entry_to_info(entry);
+ struct swap_info_struct *si;
struct folio *folio;
struct folio *new_folio = NULL;
struct folio *result = NULL;
void *shadow = NULL;
*new_page_allocated = false;
+ si = get_swap_device(entry);
+ if (!si)
+ return NULL;
+
for (;;) {
int err;
@@ -499,6 +503,7 @@ struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
put_swap_folio(new_folio, entry);
folio_unlock(new_folio);
put_and_return:
+ put_swap_device(si);
if (!(*new_page_allocated) && new_folio)
folio_put(new_folio);
return result;
@@ -518,16 +523,11 @@ struct folio *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
struct vm_area_struct *vma, unsigned long addr,
struct swap_iocb **plug)
{
- struct swap_info_struct *si;
bool page_allocated;
struct mempolicy *mpol;
pgoff_t ilx;
struct folio *folio;
- si = get_swap_device(entry);
- if (!si)
- return NULL;
-
mpol = get_vma_policy(vma, addr, 0, &ilx);
folio = __read_swap_cache_async(entry, gfp_mask, mpol, ilx,
&page_allocated, false);
@@ -535,8 +535,6 @@ struct folio *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
if (page_allocated)
swap_read_folio(folio, plug);
-
- put_swap_device(si);
return folio;
}
diff --git a/mm/zswap.c b/mm/zswap.c
index 5d0f8b13a958..aefe71fd160c 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1005,18 +1005,12 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
struct folio *folio;
struct mempolicy *mpol;
bool folio_was_allocated;
- struct swap_info_struct *si;
int ret = 0;
/* try to allocate swap cache folio */
- si = get_swap_device(swpentry);
- if (!si)
- return -EEXIST;
-
mpol = get_task_policy(current);
folio = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol,
- NO_INTERLEAVE_INDEX, &folio_was_allocated, true);
- put_swap_device(si);
+ NO_INTERLEAVE_INDEX, &folio_was_allocated, true);
if (!folio)
return -ENOMEM;
---
base-commit: 02dafa01ec9a00c3758c1c6478d82fe601f5f1ba
change-id: 20251109-revert-78524b05f1a3-04a1295bef8a
Best regards,
--
Kairui Song <kasong(a)tencent.com>
The sockmap feature allows bpf syscall from userspace, or based on bpf
sockops, replacing the sk_prot of sockets during protocol stack processing
with sockmap's custom read/write interfaces.
'''
tcp_rcv_state_process()
subflow_syn_recv_sock()
tcp_init_transfer(BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB)
bpf_skops_established <== sockops
bpf_sock_map_update(sk) <== call bpf helper
tcp_bpf_update_proto() <== update sk_prot
'''
Consider two scenarios:
1. When the server has MPTCP enabled and the client also requests MPTCP,
the sk passed to the BPF program is a subflow sk. Since subflows only
handle partial data, replacing their sk_prot is meaningless and will
cause traffic disruption.
2. When the server has MPTCP enabled but the client sends a TCP SYN
without MPTCP, subflow_syn_recv_sock() performs a fallback on the
subflow, replacing the subflow sk's sk_prot with the native sk_prot.
'''
subflow_ulp_fallback()
subflow_drop_ctx()
mptcp_subflow_ops_undo_override()
'''
Subsequently, accept::mptcp_stream_accept::mptcp_fallback_tcp_ops()
converts the subflow to plain TCP.
For the first case, we should prevent it from being combined with sockmap
by setting sk_prot->psock_update_sk_prot to NULL, which will be blocked by
sockmap's own flow.
For the second case, since subflow_syn_recv_sock() has already restored
sk_prot to native tcp_prot/tcpv6_prot, no further action is needed.
Fixes: cec37a6e41aa ("mptcp: Handle MP_CAPABLE options for outgoing connections")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Jiayuan Chen <jiayuan.chen(a)linux.dev>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
---
net/mptcp/subflow.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index e8325890a322..af707ce0f624 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -2144,6 +2144,10 @@ void __init mptcp_subflow_init(void)
tcp_prot_override = tcp_prot;
tcp_prot_override.release_cb = tcp_release_cb_override;
tcp_prot_override.diag_destroy = tcp_abort_override;
+#ifdef CONFIG_BPF_SYSCALL
+ /* Disable sockmap processing for subflows */
+ tcp_prot_override.psock_update_sk_prot = NULL;
+#endif
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
/* In struct mptcp_subflow_request_sock, we assume the TCP request sock
@@ -2180,6 +2184,10 @@ void __init mptcp_subflow_init(void)
tcpv6_prot_override = tcpv6_prot;
tcpv6_prot_override.release_cb = tcp_release_cb_override;
tcpv6_prot_override.diag_destroy = tcp_abort_override;
+#ifdef CONFIG_BPF_SYSCALL
+ /* Disable sockmap processing for subflows */
+ tcpv6_prot_override.psock_update_sk_prot = NULL;
+#endif
#endif
mptcp_diag_subflow_init(&subflow_ulp_ops);
--
2.43.0
Hello,
New build issue found on stable-rc/linux-5.4.y:
---
clang: error: linker command failed with exit code 1 (use -v to see invocation) in samples/seccomp/bpf-fancy (scripts/Makefile.host:116) [logspec:kbuild,kbuild.other]
---
- dashboard: https://d.kernelci.org/i/maestro:9b282409ffe9399386349927812ed439dcc91837
- giturl: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
- commit HEAD: 350bc296cce9fcac34ec525a838f99ac76e33550
Log excerpt:
=====================================================
.o
/usr/bin/ld: cannot find crtbeginS.o: No such file or directory
/usr/bin/ld: cannot find -lgcc: No such file or directory
/usr/bin/ld: cannot find -lgcc_s: No such file or directory
clang: error: linker command failed with exit code 1 (use -v to see invocation)
=====================================================
# Builds where the incident occurred:
## i386_defconfig+allmodconfig+CONFIG_FRAME_WARN=2048 on (i386):
- compiler: clang-17
- config: https://files.kernelci.org/kbuild-clang-17-i386-allmodconfig-69128f652fd237…
- dashboard: https://d.kernelci.org/build/maestro:69128f652fd2377ea99535c5
#kernelci issue maestro:9b282409ffe9399386349927812ed439dcc91837
Reported-by: kernelci.org bot <bot(a)kernelci.org>
--
This is an experimental report format. Please send feedback in!
Talk to us at kernelci(a)lists.linux.dev
Made with love by the KernelCI team - https://kernelci.org
Fix a memory leak in netpoll and introduce netconsole selftests that
expose the issue when running with kmemleak detection enabled.
This patchset includes a selftest for netpoll with multiple concurrent
users (netconsole + bonding), which simulates the scenario from test[1]
that originally demonstrated the issue allegedly fixed by commit
efa95b01da18 ("netpoll: fix use after free") - a commit that is now
being reverted.
Sending this to "net" branch because this is a fix, and the selftest
might help with the backports validation.
Link: https://lore.kernel.org/lkml/96b940137a50e5c387687bb4f57de8b0435a653f.14048… [1]
Signed-off-by: Breno Leitao <leitao(a)debian.org>
---
Changes in v10:
- Get rid of the create_and_enable_dynamic_target() (Simon)
- Link to v9: https://lore.kernel.org/r/20251106-netconsole_torture-v9-0-f73cd147c13c@deb…
Changes in v9:
- Reordered the config entries in tools/testing/selftests/drivers/net/bonding/config (NIPA)
- Link to v8: https://lore.kernel.org/r/20251104-netconsole_torture-v8-0-5288440e2fa0@deb…
Changes in v8:
- Sending it again, now that commit 1a8fed52f7be1 ("netdevsim: set the
carrier when the device goes up") has landed in net
- Created one namespace for TX and one for RX (Paolo)
- Used additional helpers to create and delete netdevsim (Paolo)
- Link to v7: https://lore.kernel.org/r/20251003-netconsole_torture-v7-0-aa92fcce62a9@deb…
Changes in v7:
- Rebased on top of `net`
- Link to v6: https://lore.kernel.org/r/20251002-netconsole_torture-v6-0-543bf52f6b46@deb…
Changes in v6:
- Expand the tests even more and some small fixups
- Moved the test to bonding selftests
- Link to v5: https://lore.kernel.org/r/20250918-netconsole_torture-v5-0-77e25e0a4eb6@deb…
Changes in v5:
- Set CONFIG_BONDING=m in selftests/drivers/net/config.
- Link to v4: https://lore.kernel.org/r/20250917-netconsole_torture-v4-0-0a5b3b8f81ce@deb…
Changes in v4:
- Added an additional selftest to test multiple netpoll users in
parallel
- Link to v3: https://lore.kernel.org/r/20250905-netconsole_torture-v3-0-875c7febd316@deb…
Changes in v3:
- This patchset is a merge of the fix and the selftest together as
recommended by Jakub.
Changes in v2:
- Reuse the netconsole creation from lib_netcons.sh. Thus, refactoring
the create_dynamic_target() (Jakub)
- Move the "wait" to after all the messages has been sent.
- Link to v1: https://lore.kernel.org/r/20250902-netconsole_torture-v1-1-03c6066598e9@deb…
---
Breno Leitao (4):
net: netpoll: fix incorrect refcount handling causing incorrect cleanup
selftest: netcons: refactor target creation
selftest: netcons: create a torture test
selftest: netcons: add test for netconsole over bonded interfaces
net/core/netpoll.c | 7 +-
tools/testing/selftests/drivers/net/Makefile | 1 +
.../testing/selftests/drivers/net/bonding/Makefile | 2 +
tools/testing/selftests/drivers/net/bonding/config | 4 +
.../drivers/net/bonding/netcons_over_bonding.sh | 361 +++++++++++++++++++++
.../selftests/drivers/net/lib/sh/lib_netcons.sh | 78 ++++-
.../selftests/drivers/net/netcons_torture.sh | 130 ++++++++
7 files changed, 566 insertions(+), 17 deletions(-)
---
base-commit: 7d1988a943850c584e8e2e4bcc7a3b5275024072
change-id: 20250902-netconsole_torture-8fc23f0aca99
Best regards,
--
Breno Leitao <leitao(a)debian.org>
When turbo mode is unavailable on a Skylake-X system, executing the
command:
"echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo"
results in an unchecked MSR access error: WRMSR to 0x199
(attempted to write 0x0000000100001300).
This issue was reproduced on an OEM (Original Equipment Manufacturer)
system and is not a common problem across all Skylake-X systems.
This error occurs because the MSR 0x199 Turbo Engage Bit (bit 32) is set
when turbo mode is disabled. The issue arises when intel_pstate fails to
detect that turbo mode is disabled. Here intel_pstate relies on
MSR_IA32_MISC_ENABLE bit 38 to determine the status of turbo mode.
However, on this system, bit 38 is not set even when turbo mode is
disabled.
According to the Intel Software Developer's Manual (SDM), the BIOS sets
this bit during platform initialization to enable or disable
opportunistic processor performance operations. Logically, this bit
should be set in such cases. However, the SDM also specifies that "OS and
applications must use CPUID leaf 06H to detect processors with
opportunistic processor performance operations enabled."
Therefore, in addition to checking MSR_IA32_MISC_ENABLE bit 38, verify
that CPUID.06H:EAX[1] is 0 to accurately determine if turbo mode is
disabled.
Fixes: 4521e1a0ce17 ("cpufreq: intel_pstate: Reflect current no_turbo state correctly")
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
---
drivers/cpufreq/intel_pstate.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index f41ed0b9e610..ba9bf06f1c77 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -598,6 +598,9 @@ static bool turbo_is_disabled(void)
{
u64 misc_en;
+ if (!cpu_feature_enabled(X86_FEATURE_IDA))
+ return true;
+
rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
return !!(misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
--
2.48.1
The patch below does not apply to the 6.17-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.17.y
git checkout FETCH_HEAD
git cherry-pick -x a26a6c93edfeee82cb73f55e87d995eea59ddfe8
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025111057-slick-manatee-7f63@gregkh' --subject-prefix 'PATCH 6.17.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a26a6c93edfeee82cb73f55e87d995eea59ddfe8 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan(a)kernel.org>
Date: Wed, 5 Nov 2025 15:30:27 -0700
Subject: [PATCH] kbuild: Strip trailing padding bytes from
modules.builtin.modinfo
After commit d50f21091358 ("kbuild: align modinfo section for Secureboot
Authenticode EDK2 compat"), running modules_install with certain
versions of kmod (such as 29.1 in Ubuntu Jammy) in certain
configurations may fail with:
depmod: ERROR: kmod_builtin_iter_next: unexpected string without modname prefix
The additional padding bytes to ensure .modinfo is aligned within
vmlinux.unstripped are unexpected by kmod, as this section has always
just been null-terminated strings.
Strip the trailing padding bytes from modules.builtin.modinfo after it
has been extracted from vmlinux.unstripped to restore the format that
kmod expects while keeping .modinfo aligned within vmlinux.unstripped to
avoid regressing the Authenticode calculation fix for EDK2.
Cc: stable(a)vger.kernel.org
Fixes: d50f21091358 ("kbuild: align modinfo section for Secureboot Authenticode EDK2 compat")
Reported-by: Omar Sandoval <osandov(a)fb.com>
Reported-by: Samir M <samir(a)linux.ibm.com>
Reported-by: Venkat Rao Bagalkote <venkat88(a)linux.ibm.com>
Closes: https://lore.kernel.org/7fef7507-ad64-4e51-9bb8-c9fb6532e51e@linux.ibm.com/
Tested-by: Omar Sandoval <osandov(a)fb.com>
Tested-by: Samir M <samir(a)linux.ibm.com>
Tested-by: Venkat Rao Bagalkote <venkat88(a)linux.ibm.com>
Reviewed-by: Nicolas Schier <nsc(a)kernel.org>
Link: https://patch.msgid.link/20251105-kbuild-fix-builtin-modinfo-for-kmod-v1-1-…
Signed-off-by: Nathan Chancellor <nathan(a)kernel.org>
diff --git a/scripts/Makefile.vmlinux b/scripts/Makefile.vmlinux
index ced4379550d7..cd788cac9d91 100644
--- a/scripts/Makefile.vmlinux
+++ b/scripts/Makefile.vmlinux
@@ -102,11 +102,24 @@ vmlinux: vmlinux.unstripped FORCE
# modules.builtin.modinfo
# ---------------------------------------------------------------------------
+# .modinfo in vmlinux.unstripped is aligned to 8 bytes for compatibility with
+# tools that expect vmlinux to have sufficiently aligned sections but the
+# additional bytes used for padding .modinfo to satisfy this requirement break
+# certain versions of kmod with
+#
+# depmod: ERROR: kmod_builtin_iter_next: unexpected string without modname prefix
+#
+# Strip the trailing padding bytes after extracting .modinfo to comply with
+# what kmod expects to parse.
+quiet_cmd_modules_builtin_modinfo = GEN $@
+ cmd_modules_builtin_modinfo = $(cmd_objcopy); \
+ sed -i 's/\x00\+$$/\x00/g' $@
+
OBJCOPYFLAGS_modules.builtin.modinfo := -j .modinfo -O binary
targets += modules.builtin.modinfo
modules.builtin.modinfo: vmlinux.unstripped FORCE
- $(call if_changed,objcopy)
+ $(call if_changed,modules_builtin_modinfo)
# modules.builtin
# ---------------------------------------------------------------------------
The patch below does not apply to the 6.17-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.17.y
git checkout FETCH_HEAD
git cherry-pick -x a26a6c93edfeee82cb73f55e87d995eea59ddfe8
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025111122-suitor-absently-2164@gregkh' --subject-prefix 'PATCH 6.17.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a26a6c93edfeee82cb73f55e87d995eea59ddfe8 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan(a)kernel.org>
Date: Wed, 5 Nov 2025 15:30:27 -0700
Subject: [PATCH] kbuild: Strip trailing padding bytes from
modules.builtin.modinfo
After commit d50f21091358 ("kbuild: align modinfo section for Secureboot
Authenticode EDK2 compat"), running modules_install with certain
versions of kmod (such as 29.1 in Ubuntu Jammy) in certain
configurations may fail with:
depmod: ERROR: kmod_builtin_iter_next: unexpected string without modname prefix
The additional padding bytes to ensure .modinfo is aligned within
vmlinux.unstripped are unexpected by kmod, as this section has always
just been null-terminated strings.
Strip the trailing padding bytes from modules.builtin.modinfo after it
has been extracted from vmlinux.unstripped to restore the format that
kmod expects while keeping .modinfo aligned within vmlinux.unstripped to
avoid regressing the Authenticode calculation fix for EDK2.
Cc: stable(a)vger.kernel.org
Fixes: d50f21091358 ("kbuild: align modinfo section for Secureboot Authenticode EDK2 compat")
Reported-by: Omar Sandoval <osandov(a)fb.com>
Reported-by: Samir M <samir(a)linux.ibm.com>
Reported-by: Venkat Rao Bagalkote <venkat88(a)linux.ibm.com>
Closes: https://lore.kernel.org/7fef7507-ad64-4e51-9bb8-c9fb6532e51e@linux.ibm.com/
Tested-by: Omar Sandoval <osandov(a)fb.com>
Tested-by: Samir M <samir(a)linux.ibm.com>
Tested-by: Venkat Rao Bagalkote <venkat88(a)linux.ibm.com>
Reviewed-by: Nicolas Schier <nsc(a)kernel.org>
Link: https://patch.msgid.link/20251105-kbuild-fix-builtin-modinfo-for-kmod-v1-1-…
Signed-off-by: Nathan Chancellor <nathan(a)kernel.org>
diff --git a/scripts/Makefile.vmlinux b/scripts/Makefile.vmlinux
index ced4379550d7..cd788cac9d91 100644
--- a/scripts/Makefile.vmlinux
+++ b/scripts/Makefile.vmlinux
@@ -102,11 +102,24 @@ vmlinux: vmlinux.unstripped FORCE
# modules.builtin.modinfo
# ---------------------------------------------------------------------------
+# .modinfo in vmlinux.unstripped is aligned to 8 bytes for compatibility with
+# tools that expect vmlinux to have sufficiently aligned sections but the
+# additional bytes used for padding .modinfo to satisfy this requirement break
+# certain versions of kmod with
+#
+# depmod: ERROR: kmod_builtin_iter_next: unexpected string without modname prefix
+#
+# Strip the trailing padding bytes after extracting .modinfo to comply with
+# what kmod expects to parse.
+quiet_cmd_modules_builtin_modinfo = GEN $@
+ cmd_modules_builtin_modinfo = $(cmd_objcopy); \
+ sed -i 's/\x00\+$$/\x00/g' $@
+
OBJCOPYFLAGS_modules.builtin.modinfo := -j .modinfo -O binary
targets += modules.builtin.modinfo
modules.builtin.modinfo: vmlinux.unstripped FORCE
- $(call if_changed,objcopy)
+ $(call if_changed,modules_builtin_modinfo)
# modules.builtin
# ---------------------------------------------------------------------------
Hi Sasha,
Same here, can you backport the previous related commit (2f9c63883730
"drm/amd/display: update color on atomic commit time" [1]) too?
Otherwise, the commit below alone will cause regressions.
Thanks,
Melissa
[1]
https://github.com/torvalds/linux/commit/2f9c63883730a0bfecb086e6e59246933f…
On 04/11/2025 20:53, Sasha Levin wrote:
> This is a note to let you know that I've just added the patch titled
>
> drm/amd/display: change dc stream color settings only in atomic commit
>
> to the 6.17-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> drm-amd-display-change-dc-stream-color-settings-only.patch
> and it can be found in the queue-6.17 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
>
>
>
> commit 87fe0b67d6d8340123e563e7156fbdf070a2954d
> Author: Melissa Wen <mwen(a)igalia.com>
> Date: Thu Sep 11 14:21:20 2025 -0300
>
> drm/amd/display: change dc stream color settings only in atomic commit
>
> [ Upstream commit 51cb93aa0c4a9bb126b76f6e9fd640d88de25cee ]
>
> Don't update DC stream color components during atomic check. The driver
> will continue validating the new CRTC color state but will not change DC
> stream color components. The DC stream color state will only be
> programmed at commit time in the `atomic_setup_commit` stage.
>
> It fixes gamma LUT loss reported by KDE users when changing brightness
> quickly or changing Display settings (such as overscan) with nightlight
> on and HDR. As KWin can do a test commit with color settings different
> from those that should be applied in a non-test-only commit, if the
> driver changes DC stream color state in atomic check, this state can be
> eventually HW programmed in commit tail, instead of the respective state
> set by the non-blocking commit.
>
> Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4444
> Reported-by: Xaver Hugl <xaver.hugl(a)gmail.com>
> Signed-off-by: Melissa Wen <mwen(a)igalia.com>
> Reviewed-by: Harry Wentland <harry.wentland(a)amd.com>
> Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index d66c9609efd8d..60eb2c2c79b77 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -11105,7 +11105,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
> if (dm_new_crtc_state->base.color_mgmt_changed ||
> dm_old_crtc_state->regamma_tf != dm_new_crtc_state->regamma_tf ||
> drm_atomic_crtc_needs_modeset(new_crtc_state)) {
> - ret = amdgpu_dm_update_crtc_color_mgmt(dm_new_crtc_state);
> + ret = amdgpu_dm_check_crtc_color_mgmt(dm_new_crtc_state, true);
> if (ret)
> goto fail;
> }
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> index c18a6b43c76f6..42801caf57b69 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> @@ -1037,6 +1037,8 @@ void amdgpu_dm_init_color_mod(void);
> int amdgpu_dm_create_color_properties(struct amdgpu_device *adev);
> int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state);
> int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc);
> +int amdgpu_dm_check_crtc_color_mgmt(struct dm_crtc_state *crtc,
> + bool check_only);
> int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
> struct drm_plane_state *plane_state,
> struct dc_plane_state *dc_plane_state);
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
> index c0dfe2d8b3bec..d4739b6334c24 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
> @@ -566,12 +566,11 @@ static int __set_output_tf(struct dc_transfer_func *func,
> return res ? 0 : -ENOMEM;
> }
>
> -static int amdgpu_dm_set_atomic_regamma(struct dc_stream_state *stream,
> +static int amdgpu_dm_set_atomic_regamma(struct dc_transfer_func *out_tf,
> const struct drm_color_lut *regamma_lut,
> uint32_t regamma_size, bool has_rom,
> enum dc_transfer_func_predefined tf)
> {
> - struct dc_transfer_func *out_tf = &stream->out_transfer_func;
> int ret = 0;
>
> if (regamma_size || tf != TRANSFER_FUNCTION_LINEAR) {
> @@ -885,33 +884,33 @@ int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state)
> }
>
> /**
> - * amdgpu_dm_update_crtc_color_mgmt: Maps DRM color management to DC stream.
> + * amdgpu_dm_check_crtc_color_mgmt: Check if DRM color props are programmable by DC.
> * @crtc: amdgpu_dm crtc state
> + * @check_only: only check color state without update dc stream
> *
> - * With no plane level color management properties we're free to use any
> - * of the HW blocks as long as the CRTC CTM always comes before the
> - * CRTC RGM and after the CRTC DGM.
> - *
> - * - The CRTC RGM block will be placed in the RGM LUT block if it is non-linear.
> - * - The CRTC DGM block will be placed in the DGM LUT block if it is non-linear.
> - * - The CRTC CTM will be placed in the gamut remap block if it is non-linear.
> + * This function just verifies CRTC LUT sizes, if there is enough space for
> + * output transfer function and if its parameters can be calculated by AMD
> + * color module. It also adjusts some settings for programming CRTC degamma at
> + * plane stage, using plane DGM block.
> *
> * The RGM block is typically more fully featured and accurate across
> * all ASICs - DCE can't support a custom non-linear CRTC DGM.
> *
> * For supporting both plane level color management and CRTC level color
> - * management at once we have to either restrict the usage of CRTC properties
> - * or blend adjustments together.
> + * management at once we have to either restrict the usage of some CRTC
> + * properties or blend adjustments together.
> *
> * Returns:
> - * 0 on success. Error code if setup fails.
> + * 0 on success. Error code if validation fails.
> */
> -int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
> +
> +int amdgpu_dm_check_crtc_color_mgmt(struct dm_crtc_state *crtc,
> + bool check_only)
> {
> struct dc_stream_state *stream = crtc->stream;
> struct amdgpu_device *adev = drm_to_adev(crtc->base.state->dev);
> bool has_rom = adev->asic_type <= CHIP_RAVEN;
> - struct drm_color_ctm *ctm = NULL;
> + struct dc_transfer_func *out_tf;
> const struct drm_color_lut *degamma_lut, *regamma_lut;
> uint32_t degamma_size, regamma_size;
> bool has_regamma, has_degamma;
> @@ -940,6 +939,14 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
> crtc->cm_has_degamma = false;
> crtc->cm_is_degamma_srgb = false;
>
> + if (check_only) {
> + out_tf = kvzalloc(sizeof(*out_tf), GFP_KERNEL);
> + if (!out_tf)
> + return -ENOMEM;
> + } else {
> + out_tf = &stream->out_transfer_func;
> + }
> +
> /* Setup regamma and degamma. */
> if (is_legacy) {
> /*
> @@ -954,8 +961,8 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
> * inverse color ramp in legacy userspace.
> */
> crtc->cm_is_degamma_srgb = true;
> - stream->out_transfer_func.type = TF_TYPE_DISTRIBUTED_POINTS;
> - stream->out_transfer_func.tf = TRANSFER_FUNCTION_SRGB;
> + out_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
> + out_tf->tf = TRANSFER_FUNCTION_SRGB;
> /*
> * Note: although we pass has_rom as parameter here, we never
> * actually use ROM because the color module only takes the ROM
> @@ -963,16 +970,12 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
> *
> * See more in mod_color_calculate_regamma_params()
> */
> - r = __set_legacy_tf(&stream->out_transfer_func, regamma_lut,
> + r = __set_legacy_tf(out_tf, regamma_lut,
> regamma_size, has_rom);
> - if (r)
> - return r;
> } else {
> regamma_size = has_regamma ? regamma_size : 0;
> - r = amdgpu_dm_set_atomic_regamma(stream, regamma_lut,
> + r = amdgpu_dm_set_atomic_regamma(out_tf, regamma_lut,
> regamma_size, has_rom, tf);
> - if (r)
> - return r;
> }
>
> /*
> @@ -981,6 +984,43 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
> * have to place the CTM in the OCSC in that case.
> */
> crtc->cm_has_degamma = has_degamma;
> + if (check_only)
> + kvfree(out_tf);
> +
> + return r;
> +}
> +
> +/**
> + * amdgpu_dm_update_crtc_color_mgmt: Maps DRM color management to DC stream.
> + * @crtc: amdgpu_dm crtc state
> + *
> + * With no plane level color management properties we're free to use any
> + * of the HW blocks as long as the CRTC CTM always comes before the
> + * CRTC RGM and after the CRTC DGM.
> + *
> + * - The CRTC RGM block will be placed in the RGM LUT block if it is non-linear.
> + * - The CRTC DGM block will be placed in the DGM LUT block if it is non-linear.
> + * - The CRTC CTM will be placed in the gamut remap block if it is non-linear.
> + *
> + * The RGM block is typically more fully featured and accurate across
> + * all ASICs - DCE can't support a custom non-linear CRTC DGM.
> + *
> + * For supporting both plane level color management and CRTC level color
> + * management at once we have to either restrict the usage of CRTC properties
> + * or blend adjustments together.
> + *
> + * Returns:
> + * 0 on success. Error code if setup fails.
> + */
> +int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc)
> +{
> + struct dc_stream_state *stream = crtc->stream;
> + struct drm_color_ctm *ctm = NULL;
> + int ret;
> +
> + ret = amdgpu_dm_check_crtc_color_mgmt(crtc, false);
> + if (ret)
> + return ret;
>
> /* Setup CRTC CTM. */
> if (crtc->base.ctm) {
Set the DMA mask before calling nvkm_device_ctor(), so that when the
flush page is created in nvkm_fb_ctor(), the allocation will not fail
if the page is outside of DMA address space, which can easily happen if
IOMMU is disable. In such situations, you will get an error like this:
nouveau 0000:65:00.0: DMA addr 0x0000000107c56000+4096 overflow (mask ffffffff, bus limit 0).
Commit 38f5359354d4 ("rm/nouveau/pci: set streaming DMA mask early")
set the mask after calling nvkm_device_ctor(), but back then there was
no flush page being created, which might explain why the mask wasn't
set earlier.
Flush page allocation was added in commit 5728d064190e ("drm/nouveau/fb:
handle sysmem flush page from common code"). nvkm_fb_ctor() calls
alloc_page(), which can allocate a page anywhere in system memory, but
then calls dma_map_page() on that page. But since the DMA mask is still
set to 32, the map can fail if the page is allocated above 4GB. This is
easy to reproduce on systems with a lot of memory and IOMMU disabled.
An alternative approach would be to force the allocation of the flush
page to low memory, by specifying __GFP_DMA32. However, this would
always allocate the page in low memory, even though the hardware can
access high memory.
Fixes: 5728d064190e ("drm/nouveau/fb: handle sysmem flush page from common code")
Signed-off-by: Timur Tabi <ttabi(a)nvidia.com>
---
.../gpu/drm/nouveau/nvkm/engine/device/pci.c | 24 +++++++++----------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
index 8f0261a0d618..7cc5a7499583 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
@@ -1695,6 +1695,18 @@ nvkm_device_pci_new(struct pci_dev *pci_dev, const char *cfg, const char *dbg,
*pdevice = &pdev->device;
pdev->pdev = pci_dev;
+ /* Set DMA mask based on capabilities reported by the MMU subdev. */
+ if (pdev->device.mmu && !pdev->device.pci->agp.bridge)
+ bits = pdev->device.mmu->dma_bits;
+ else
+ bits = 32;
+
+ ret = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(bits));
+ if (ret && bits != 32) {
+ dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32));
+ pdev->device.mmu->dma_bits = 32;
+ }
+
ret = nvkm_device_ctor(&nvkm_device_pci_func, quirk, &pci_dev->dev,
pci_is_pcie(pci_dev) ? NVKM_DEVICE_PCIE :
pci_find_capability(pci_dev, PCI_CAP_ID_AGP) ?
@@ -1708,17 +1720,5 @@ nvkm_device_pci_new(struct pci_dev *pci_dev, const char *cfg, const char *dbg,
if (ret)
return ret;
- /* Set DMA mask based on capabilities reported by the MMU subdev. */
- if (pdev->device.mmu && !pdev->device.pci->agp.bridge)
- bits = pdev->device.mmu->dma_bits;
- else
- bits = 32;
-
- ret = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(bits));
- if (ret && bits != 32) {
- dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32));
- pdev->device.mmu->dma_bits = 32;
- }
-
return 0;
}
base-commit: 18a7e218cfcdca6666e1f7356533e4c988780b57
--
2.51.0
KVM currenty fails a nested VMRUN and injects VMEXIT_INVALID (aka
SVM_EXIT_ERR) if L1 sets NP_ENABLE and the host does not support NPTs.
On first glance, it seems like the check should actually be for
guest_cpu_cap_has(X86_FEATURE_NPT) instead, as it is possible for the
host to support NPTs but the guest CPUID to not advertise it.
However, the consistency check is not architectural to begin with. The
APM does not mention VMEXIT_INVALID if NP_ENABLE is set on a processor
that does not have X86_FEATURE_NPT. Hence, NP_ENABLE should be ignored
if X86_FEATURE_NPT is not available for L1. Apart from the consistency
check, this is currently the case because NP_ENABLE is actually copied
from VMCB01 to VMCB02, not from VMCB12.
On the other hand, the APM does mention two other consistency checks for
NP_ENABLE, both of which are missing (paraphrased):
In Volume #2, 15.25.3 (24593—Rev. 3.42—March 2024):
If VMRUN is executed with hCR0.PG cleared to zero and NP_ENABLE set to
1, VMRUN terminates with #VMEXIT(VMEXIT_INVALID)
In Volume #2, 15.25.4 (24593—Rev. 3.42—March 2024):
When VMRUN is executed with nested paging enabled (NP_ENABLE = 1), the
following conditions are considered illegal state combinations, in
addition to those mentioned in “Canonicalization and Consistency
Checks”:
• Any MBZ bit of nCR3 is set.
• Any G_PAT.PA field has an unsupported type encoding or any
reserved field in G_PAT has a nonzero value.
Replace the existing consistency check with consistency checks on
hCR0.PG and nCR3. Only perform the consistency checks if L1 has
X86_FEATURE_NPT and NP_ENABLE is set in VMCB12. The G_PAT consistency
check will be addressed separately.
As it is now possible for an L1 to run L2 with NP_ENABLE set but
ignored, also check that L1 has X86_FEATURE_NPT in nested_npt_enabled().
Pass L1's CR0 to __nested_vmcb_check_controls(). In
nested_vmcb_check_controls(), L1's CR0 is available through
kvm_read_cr0(), as vcpu->arch.cr0 is not updated to L2's CR0 until later
through nested_vmcb02_prepare_save() -> svm_set_cr0().
In svm_set_nested_state(), L1's CR0 is available in the captured save
area, as svm_get_nested_state() captures L1's save area when running L2,
and L1's CR0 is stashed in VMCB01 on nested VMRUN (in
nested_svm_vmrun()).
Fixes: 4b16184c1cca ("KVM: SVM: Initialize Nested Nested MMU context on VMRUN")
Cc: stable(a)vger.kernel.org
Signed-off-by: Yosry Ahmed <yosry.ahmed(a)linux.dev>
---
arch/x86/kvm/svm/nested.c | 21 ++++++++++++++++-----
arch/x86/kvm/svm/svm.h | 3 ++-
2 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 74211c5c68026..87bcc5eff96e8 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -325,7 +325,8 @@ static bool nested_svm_check_bitmap_pa(struct kvm_vcpu *vcpu, u64 pa, u32 size)
}
static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
- struct vmcb_ctrl_area_cached *control)
+ struct vmcb_ctrl_area_cached *control,
+ unsigned long l1_cr0)
{
if (CC(!vmcb12_is_intercept(control, INTERCEPT_VMRUN)))
return false;
@@ -333,8 +334,12 @@ static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
if (CC(control->asid == 0))
return false;
- if (CC((control->nested_ctl & SVM_NESTED_CTL_NP_ENABLE) && !npt_enabled))
- return false;
+ if (nested_npt_enabled(to_svm(vcpu))) {
+ if (CC(!kvm_vcpu_is_legal_gpa(vcpu, control->nested_cr3)))
+ return false;
+ if (CC(!(l1_cr0 & X86_CR0_PG)))
+ return false;
+ }
if (CC(!nested_svm_check_bitmap_pa(vcpu, control->msrpm_base_pa,
MSRPM_SIZE)))
@@ -400,7 +405,12 @@ static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb_ctrl_area_cached *ctl = &svm->nested.ctl;
- return __nested_vmcb_check_controls(vcpu, ctl);
+ /*
+ * Make sure we did not enter guest mode yet, in which case
+ * kvm_read_cr0() could return L2's CR0.
+ */
+ WARN_ON_ONCE(is_guest_mode(vcpu));
+ return __nested_vmcb_check_controls(vcpu, ctl, kvm_read_cr0(vcpu));
}
static
@@ -1831,7 +1841,8 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
ret = -EINVAL;
__nested_copy_vmcb_control_to_cache(vcpu, &ctl_cached, ctl);
- if (!__nested_vmcb_check_controls(vcpu, &ctl_cached))
+ /* 'save' contains L1 state saved from before VMRUN */
+ if (!__nested_vmcb_check_controls(vcpu, &ctl_cached, save->cr0))
goto out_free;
/*
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index f6fb70ddf7272..3e805a43ffcdb 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -552,7 +552,8 @@ static inline bool gif_set(struct vcpu_svm *svm)
static inline bool nested_npt_enabled(struct vcpu_svm *svm)
{
- return svm->nested.ctl.nested_ctl & SVM_NESTED_CTL_NP_ENABLE;
+ return guest_cpu_cap_has(&svm->vcpu, X86_FEATURE_NPT) &&
+ svm->nested.ctl.nested_ctl & SVM_NESTED_CTL_NP_ENABLE;
}
static inline bool nested_vnmi_enabled(struct vcpu_svm *svm)
--
2.51.2.1041.gc1ab5b90ca-goog
In preparation for using svm_copy_lbrs() with 'struct vmcb_save_area'
without a containing 'struct vmcb', and later even 'struct
vmcb_save_area_cached', make it a macro. Pull the call to
vmcb_mark_dirty() out to the callers.
Macros are generally not preferred compared to functions, mainly due to
type-safety. However, in this case it seems like having a simple macro
copying a few fields is better than copy-pasting the same 5 lines of
code in different places.
On the bright side, pulling vmcb_mark_dirty() calls to the callers makes
it clear that in one case, vmcb_mark_dirty() was being called on VMCB12.
It is not architecturally defined for the CPU to clear arbitrary clean
bits, and it is not needed, so drop that one call.
Technically fixes the non-architectural behavior of setting the dirty
bit on VMCB12.
Fixes: d20c796ca370 ("KVM: x86: nSVM: implement nested LBR virtualization")
Cc: stable(a)vger.kernel.org
Signed-off-by: Yosry Ahmed <yosry.ahmed(a)linux.dev>
---
arch/x86/kvm/svm/nested.c | 16 ++++++++++------
arch/x86/kvm/svm/svm.c | 11 -----------
arch/x86/kvm/svm/svm.h | 10 +++++++++-
3 files changed, 19 insertions(+), 18 deletions(-)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index da6e80b3ac353..a37bd5c1f36fa 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -675,10 +675,12 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
* Reserved bits of DEBUGCTL are ignored. Be consistent with
* svm_set_msr's definition of reserved bits.
*/
- svm_copy_lbrs(vmcb02, vmcb12);
+ svm_copy_lbrs(&vmcb02->save, &vmcb12->save);
+ vmcb_mark_dirty(vmcb02, VMCB_LBR);
vmcb02->save.dbgctl &= ~DEBUGCTL_RESERVED_BITS;
} else {
- svm_copy_lbrs(vmcb02, vmcb01);
+ svm_copy_lbrs(&vmcb02->save, &vmcb01->save);
+ vmcb_mark_dirty(vmcb02, VMCB_LBR);
}
svm_update_lbrv(&svm->vcpu);
}
@@ -1184,10 +1186,12 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
- (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK)))
- svm_copy_lbrs(vmcb12, vmcb02);
- else
- svm_copy_lbrs(vmcb01, vmcb02);
+ (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
+ svm_copy_lbrs(&vmcb12->save, &vmcb02->save);
+ } else {
+ svm_copy_lbrs(&vmcb01->save, &vmcb02->save);
+ vmcb_mark_dirty(vmcb01, VMCB_LBR);
+ }
svm_update_lbrv(vcpu);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 10c21e4c5406f..711276e8ee84f 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -795,17 +795,6 @@ static void svm_recalc_msr_intercepts(struct kvm_vcpu *vcpu)
*/
}
-void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
-{
- to_vmcb->save.dbgctl = from_vmcb->save.dbgctl;
- to_vmcb->save.br_from = from_vmcb->save.br_from;
- to_vmcb->save.br_to = from_vmcb->save.br_to;
- to_vmcb->save.last_excp_from = from_vmcb->save.last_excp_from;
- to_vmcb->save.last_excp_to = from_vmcb->save.last_excp_to;
-
- vmcb_mark_dirty(to_vmcb, VMCB_LBR);
-}
-
static void __svm_enable_lbrv(struct kvm_vcpu *vcpu)
{
to_svm(vcpu)->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index c856d8e0f95e7..f6fb70ddf7272 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -687,8 +687,16 @@ static inline void *svm_vcpu_alloc_msrpm(void)
return svm_alloc_permissions_map(MSRPM_SIZE, GFP_KERNEL_ACCOUNT);
}
+#define svm_copy_lbrs(to, from) \
+({ \
+ (to)->dbgctl = (from)->dbgctl; \
+ (to)->br_from = (from)->br_from; \
+ (to)->br_to = (from)->br_to; \
+ (to)->last_excp_from = (from)->last_excp_from; \
+ (to)->last_excp_to = (from)->last_excp_to; \
+})
+
void svm_vcpu_free_msrpm(void *msrpm);
-void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb);
void svm_enable_lbrv(struct kvm_vcpu *vcpu);
void svm_update_lbrv(struct kvm_vcpu *vcpu);
--
2.51.2.1041.gc1ab5b90ca-goog
From: Samuel Zhang <guoqing.zhang(a)amd.com>
[ Upstream commit eb6e7f520d6efa4d4ebf1671455abe4a681f7a05 ]
On PF passthrough environment, after hibernate and then resume, coralgemm
will cause gpu page fault.
Mode1 reset happens during hibernate, but partition mode is not restored
on resume, register mmCP_HYP_XCP_CTL and mmCP_PSP_XCP_CTL is not right
after resume. When CP access the MQD BO, wrong stride size is used,
this will cause out of bound access on the MQD BO, resulting page fault.
The fix is to ensure gfx_v9_4_3_switch_compute_partition() is called
when resume from a hibernation.
KFD resume is called separately during a reset recovery or resume from
suspend sequence. Hence it's not required to be called as part of
partition switch.
Signed-off-by: Samuel Zhang <guoqing.zhang(a)amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
(cherry picked from commit 5d1b32cfe4a676fe552416cb5ae847b215463a1a)
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
LLM Generated explanations, may be completely bogus:
Based on my comprehensive analysis using semantic code analysis tools
and repository examination, here's my assessment:
## **BACKPORT DECISION: YES**
### Analysis Process and Findings:
#### 1. **Semantic Analysis Tools Used:**
- **mcp__semcode__find_function**: Located
`aqua_vanjaram_switch_partition_mode`, `gfx_v9_4_3_cp_resume`, and
`amdgpu_xcp_restore_partition_mode`
- **mcp__semcode__find_callers**: Traced call graph showing
`gfx_v9_4_3_cp_resume` is called during resume via
`gfx_v9_4_3_hw_init`
- **mcp__semcode__find_callchain**: Confirmed the resume path and
analyzed impact scope
- **mcp__semcode__find_type**: Examined `struct amdgpu_device` to verify
`in_suspend` flag management
- **WebSearch**: Found mailing list discussions showing multiple patch
iterations (v2, v3, v4)
- **Repository analysis**: Traced historical context and related commits
#### 2. **Impact Analysis:**
**Severity: HIGH** - This fixes GPU page faults that crash user
workloads
- **Hardware affected**: Aqua Vanjaram/MI300 series datacenter GPUs
(gfx_v9_4_3, IP versions 9.4.4 and 9.5.0)
- **Configuration**: PF passthrough environments (SR-IOV virtualization)
- **Trigger**: User-space reachable via hibernation cycle + workload
execution
- **Root cause**: Out-of-bounds memory access on MQD (Memory Queue
Descriptor) buffer object due to wrong CP register values
(CP_HYP_XCP_CTL)
#### 3. **Code Changes Analysis:**
**Two minimal, targeted changes:**
**Change 1** (drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c:410-411):
```c
-if (adev->kfd.init_complete && !amdgpu_in_reset(adev))
+if (adev->kfd.init_complete && !amdgpu_in_reset(adev) &&
!adev->in_suspend)
flags |= AMDGPU_XCP_OPS_KFD;
```
- Prevents KFD operations during suspend/hibernation
- KFD resume is handled separately in the resume sequence
**Change 2** (drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c:2295-2298):
```c
+if (adev->in_suspend)
+ amdgpu_xcp_restore_partition_mode(adev->xcp_mgr);
+else if (amdgpu_xcp_query_partition_mode(...) ==
AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE)
```
- Adds hibernation resume handling to restore partition mode
- Uses existing `amdgpu_xcp_restore_partition_mode()` function (added in
c45e38f21754b, Sept 2023)
- Ensures CP registers get correct values on resume
#### 4. **Scope and Dependencies:**
- **Contained fix**: Only 3 lines changed across 2 files
- **Existing infrastructure**: Depends on
`amdgpu_xcp_restore_partition_mode()` which has been in the kernel
since Sept 2023
- **Low coupling**: Changes are isolated to resume path, don't affect
normal operation
- **Call graph impact**: Minimal - only affects hibernation resume
codepath
#### 5. **Stable Tree Compliance:**
✅ **Bug fix**: Yes - fixes GPU page faults
✅ **Small and contained**: 3-line change
✅ **No new features**: Uses existing restore function
✅ **No architectural changes**: Follows pattern from c45e38f21754b
❌ **Stable tags**: No "Fixes:" or "Cc: stable@" tags present
However, the absence of stable tags appears to be an oversight given the
severity.
#### 6. **Risk Assessment:**
**Low regression risk:**
- Only affects specific hardware (Aqua Vanjaram GPUs)
- Only impacts PF passthrough configuration
- Only touches hibernation resume path
- Multiple patch iterations suggest thorough testing
- Builds on proven pattern from 2023 reset handling
#### 7. **Historical Context:**
- Part of ongoing partition mode fixes (multiple related commits in
2024-2025)
- Web search revealed extensive mailing list discussion
- Multiple patch versions (v2, v3, v4) indicate careful upstream review
- Targeted for drm-next-6.19
### Conclusion:
This commit **SHOULD be backported** because it:
1. Fixes a serious user-visible bug (GPU crashes from page faults)
2. Has minimal code changes with low regression risk
3. Affects critical datacenter hardware (MI300 series)
4. Is well-tested with multiple upstream review cycles
5. Follows established architectural patterns
6. Is confined to a specific use case, limiting blast radius
The fix is essential for users running AMD MI300 GPUs in virtualized
environments with hibernation support.
drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c | 3 ++-
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 4 +++-
2 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
index 811124ff88a88..f9e2edf5260bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
+++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
@@ -407,7 +407,8 @@ static int aqua_vanjaram_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr,
return -EINVAL;
}
- if (adev->kfd.init_complete && !amdgpu_in_reset(adev))
+ if (adev->kfd.init_complete && !amdgpu_in_reset(adev) &&
+ !adev->in_suspend)
flags |= AMDGPU_XCP_OPS_KFD;
if (flags & AMDGPU_XCP_OPS_KFD) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 51babf5c78c86..02c69ffd05837 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -2292,7 +2292,9 @@ static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev)
r = amdgpu_xcp_init(adev->xcp_mgr, num_xcp, mode);
} else {
- if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
+ if (adev->in_suspend)
+ amdgpu_xcp_restore_partition_mode(adev->xcp_mgr);
+ else if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
AMDGPU_XCP_FL_NONE) ==
AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE)
r = amdgpu_xcp_switch_partition_mode(
--
2.51.0
In preparation for using svm_copy_lbrs() with 'struct vmcb_save_area'
without a containing 'struct vmcb', and later even 'struct
vmcb_save_area_cached', make it a macro. Pull the call to
vmcb_mark_dirty() out to the callers.
Macros are generally not preferred compared to functions, mainly due to
type-safety. However, in this case it seems like having a simple macro
copying a few fields is better than copy-pasting the same 5 lines of
code in different places.
On the bright side, pulling vmcb_mark_dirty() calls to the callers makes
it clear that in one case, vmcb_mark_dirty() was being called on VMCB12.
It is not architecturally defined for the CPU to clear arbitrary clean
bits, and it is not needed, so drop that one call.
Technically fixes the non-architectural behavior of setting the dirty
bit on VMCB12.
Fixes: d20c796ca370 ("KVM: x86: nSVM: implement nested LBR virtualization")
Cc: stable(a)vger.kernel.org
Signed-off-by: Yosry Ahmed <yosry.ahmed(a)linux.dev>
---
arch/x86/kvm/svm/nested.c | 16 ++++++++++------
arch/x86/kvm/svm/svm.c | 11 -----------
arch/x86/kvm/svm/svm.h | 10 +++++++++-
3 files changed, 19 insertions(+), 18 deletions(-)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index c81005b245222..e7861392f2fcd 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -676,10 +676,12 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
* Reserved bits of DEBUGCTL are ignored. Be consistent with
* svm_set_msr's definition of reserved bits.
*/
- svm_copy_lbrs(vmcb02, vmcb12);
+ svm_copy_lbrs(&vmcb02->save, &vmcb12->save);
+ vmcb_mark_dirty(vmcb02, VMCB_LBR);
vmcb02->save.dbgctl &= ~DEBUGCTL_RESERVED_BITS;
} else {
- svm_copy_lbrs(vmcb02, vmcb01);
+ svm_copy_lbrs(&vmcb02->save, &vmcb01->save);
+ vmcb_mark_dirty(vmcb02, VMCB_LBR);
}
svm_update_lbrv(&svm->vcpu);
}
@@ -1186,10 +1188,12 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
- (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK)))
- svm_copy_lbrs(vmcb12, vmcb02);
- else
- svm_copy_lbrs(vmcb01, vmcb02);
+ (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
+ svm_copy_lbrs(&vmcb12->save, &vmcb02->save);
+ } else {
+ svm_copy_lbrs(&vmcb01->save, &vmcb02->save);
+ vmcb_mark_dirty(vmcb01, VMCB_LBR);
+ }
svm_update_lbrv(vcpu);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index fc42bcdbb5200..9eb112f0e61f0 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -795,17 +795,6 @@ static void svm_recalc_msr_intercepts(struct kvm_vcpu *vcpu)
*/
}
-void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
-{
- to_vmcb->save.dbgctl = from_vmcb->save.dbgctl;
- to_vmcb->save.br_from = from_vmcb->save.br_from;
- to_vmcb->save.br_to = from_vmcb->save.br_to;
- to_vmcb->save.last_excp_from = from_vmcb->save.last_excp_from;
- to_vmcb->save.last_excp_to = from_vmcb->save.last_excp_to;
-
- vmcb_mark_dirty(to_vmcb, VMCB_LBR);
-}
-
static void __svm_enable_lbrv(struct kvm_vcpu *vcpu)
{
to_svm(vcpu)->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index c2acaa49ee1c5..e510c8183bd87 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -687,8 +687,16 @@ static inline void *svm_vcpu_alloc_msrpm(void)
return svm_alloc_permissions_map(MSRPM_SIZE, GFP_KERNEL_ACCOUNT);
}
+#define svm_copy_lbrs(to, from) \
+({ \
+ (to)->dbgctl = (from)->dbgctl; \
+ (to)->br_from = (from)->br_from; \
+ (to)->br_to = (from)->br_to; \
+ (to)->last_excp_from = (from)->last_excp_from; \
+ (to)->last_excp_to = (from)->last_excp_to; \
+})
+
void svm_vcpu_free_msrpm(void *msrpm);
-void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb);
void svm_enable_lbrv(struct kvm_vcpu *vcpu);
void svm_update_lbrv(struct kvm_vcpu *vcpu);
--
2.51.2.1041.gc1ab5b90ca-goog
Gestiona el desempeño con Vorecol
body {
margin: 0;
padding: 0;
font-family: Arial, Helvetica, sans-serif;
font-size: 14px;
color: #333;
background-color: #ffffff;
}
table {
border-spacing: 0;
width: 100%;
max-width: 600px;
margin: auto;
}
td {
padding: 12px 20px;
}
a {
color: #1a73e8;
text-decoration: none;
}
.footer {
font-size: 12px;
color: #888888;
text-align: center;
}
Mejora la gestión del desempeño y talento con Vorecol Performance Management.
Hola ,
Gestionar el desempeño de tu equipo puede ser más sencillo y efectivo con las herramientas adecuadas. Sin un buen sistema, es difícil identificar, desarrollar y retener a los mejores colaboradores.
El módulo de Performance Management de Vorecol te ofrece una solución completa para medir y potenciar el talento en tu organización.
Con este módulo puedes:
Evaluar el desempeño y potencial de tus colaboradores con la matriz Nine Box para tomar mejores decisiones.
Establecer y seguir objetivos claros usando la metodología SMART, alineados con las prioridades de tu empresa.
Ajustar el sistema según lo que necesites, desde manejar los periodos hasta recibir notificaciones, todo fácil de usar.
Además, contarás con soporte técnico y capacitación especializada para resolver cualquier duda y aprovechar al máximo la herramienta.
Aprovecha el Buen Fin del 1 al 22 de noviembre con hasta 15% de descuento y descubre cómo mejorar la gestión del desempeño en tu equipo.
Si quieres conocer más, responde este correo o contáctame directamente.
Saludos,
--------------
Atte.: Luis Ramírez
Ciudad de México: (55) 5018 0565
WhatsApp: +52 33 1607 2089
Si no deseas recibir más correos, haz clic aquí para darte de baja.
Para remover su dirección de esta lista haga <a href="https://s1.arrobamail.com/unsuscribe.php?id=yiwtsrewiswqwqseup">click aquí</a>
On 11/8/2025 9:26 AM, Sasha Levin wrote:
> Caution: This message originated from an External Source. Use proper caution when opening attachments, clicking links, or responding.
>
>
> This is a note to let you know that I've just added the patch titled
>
> net: ionic: add dma_wmb() before ringing TX doorbell
>
> to the 6.12-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> net-ionic-add-dma_wmb-before-ringing-tx-doorbell.patch
> and it can be found in the queue-6.12 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
>
>
>
> commit 05587f91cc2e8b071605aeef6442d2acf6e627c9
> Author: Mohammad Heib <mheib(a)redhat.com>
> Date: Fri Oct 31 17:52:02 2025 +0200
>
> net: ionic: add dma_wmb() before ringing TX doorbell
>
> [ Upstream commit d261f5b09c28850dc63ca1d3018596f829f402d5 ]
>
> The TX path currently writes descriptors and then immediately writes to
> the MMIO doorbell register to notify the NIC. On weakly ordered
> architectures, descriptor writes may still be pending in CPU or DMA
> write buffers when the doorbell is issued, leading to the device
> fetching stale or incomplete descriptors.
>
> Add a dma_wmb() in ionic_txq_post() to ensure all descriptor writes are
> visible to the device before the doorbell MMIO write.
>
> Fixes: 0f3154e6bcb3 ("ionic: Add Tx and Rx handling")
> Signed-off-by: Mohammad Heib <mheib(a)redhat.com>
> Link: https://patch.msgid.link/20251031155203.203031-1-mheib@redhat.com
> Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
> index 0f5758c273c22..3a094d3ea6f4f 100644
> --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
> +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
> @@ -29,6 +29,10 @@ static void ionic_tx_clean(struct ionic_queue *q,
>
> static inline void ionic_txq_post(struct ionic_queue *q, bool ring_dbell)
> {
> + /* Ensure TX descriptor writes reach memory before NIC reads them.
> + * Prevents device from fetching stale descriptors.
> + */
> + dma_wmb();
> ionic_q_post(q, ring_dbell);
> }
>
I posted on the original patch, but I will post here as well.
Apologies for the late and duplicate response, but it's not clear to me
why this is necessary.
In other vendors the "doorbell record" (dbr) is writing another location
in system memory, not an mmio write. These cases do use a dma_wmb().
Why isn't the writeq() sufficient in our case? According to
Documentation/memory-barriers.txt it seems like writeq() should be
sufficient.
Thanks,
Brett
Hi,
After a recent 6.1.y stable kernel update, my Indy (mips64 R4400SC) now
just stops booting early, just before when I would normally see the
kernel messages about mounting the root filesystem.
There are no further messages of any kind, and the boot process does not
appear to ever complete. However, the kernel is not fully crashed, as
it does respond to sysrq commands from the keyboard (and I do get output
on the console from these).
I bisected to the following:
794b679a28bb59a4533ae39a7cf945b9d5bbe336 is the first bad commit
commit 794b679a28bb59a4533ae39a7cf945b9d5bbe336
Author: Jiaxun Yang <jiaxun.yang(a)flygoat.com>
Date: Sat Jun 7 13:43:56 2025 +0100
MIPS: mm: tlb-r4k: Uniquify TLB entries on init
commit 35ad7e181541aa5757f9f316768d3e64403ec843 upstream.
This reverts cleanly on top of 6.1.158 and the resulting kernel boots
normally. I then reproduced this failure on 6.18-rc4. Reverting
35ad7e181541 on top of 6.18-rc4 also results in a normal boot.
Let me know if you need any more info!
Thanks,
Nick
From: Maciej Wieczor-Retman <maciej.wieczor-retman(a)intel.com>
The problem presented here is related to NUMA systems and tag-based
KASAN modes - software and hardware ones. It can be explained in the
following points:
1. There can be more than one virtual memory chunk.
2. Chunk's base address has a tag.
3. The base address points at the first chunk and thus inherits
the tag of the first chunk.
4. The subsequent chunks will be accessed with the tag from the
first chunk.
5. Thus, the subsequent chunks need to have their tag set to
match that of the first chunk.
Unpoison all vms[]->addr memory and pointers with the same tag to
resolve the mismatch.
Fixes: 1d96320f8d53 ("kasan, vmalloc: add vmalloc tagging for SW_TAGS")
Cc: <stable(a)vger.kernel.org> # 6.1+
Signed-off-by: Maciej Wieczor-Retman <maciej.wieczor-retman(a)intel.com>
Tested-by: Baoquan He <bhe(a)redhat.com>
---
Changelog v6:
- Add Baoquan's tested-by tag.
- Move patch to the beginning of the series as it is a fix.
- Add fixes tag.
mm/kasan/tags.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/mm/kasan/tags.c b/mm/kasan/tags.c
index ecc17c7c675a..c6b40cbffae3 100644
--- a/mm/kasan/tags.c
+++ b/mm/kasan/tags.c
@@ -148,12 +148,20 @@ void __kasan_save_free_info(struct kmem_cache *cache, void *object)
save_stack_info(cache, object, 0, true);
}
+/*
+ * A tag mismatch happens when calculating per-cpu chunk addresses, because
+ * they all inherit the tag from vms[0]->addr, even when nr_vms is bigger
+ * than 1. This is a problem because all the vms[]->addr come from separate
+ * allocations and have different tags so while the calculated address is
+ * correct the tag isn't.
+ */
void __kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms)
{
int area;
for (area = 0 ; area < nr_vms ; area++) {
kasan_poison(vms[area]->addr, vms[area]->size,
- arch_kasan_get_tag(vms[area]->addr), false);
+ arch_kasan_get_tag(vms[0]->addr), false);
+ arch_kasan_set_tag(vms[area]->addr, arch_kasan_get_tag(vms[0]->addr));
}
}
--
2.51.0
Improve the condition used to determine when input internal buffers need
to be reconfigured during streamon on the capture port. Previously, the
check relied on the INPUT_PAUSE sub-state, which was also being set
during seek operations. This led to input buffers being queued multiple
times to the firmware, causing session errors due to duplicate buffer
submissions.
This change introduces a more accurate check using the FIRST_IPSC and
DRC sub-states to ensure that input buffer reconfiguration is triggered
only during resolution change scenarios, such as streamoff/on on the
capture port. This avoids duplicate buffer queuing during seek
operations.
Fixes: c1f8b2cc72ec ("media: iris: handle streamoff/on from client in dynamic resolution change")
Cc: stable(a)vger.kernel.org
Reported-by: Val Packett <val(a)packett.cool>
Closes: https://gitlab.freedesktop.org/gstreamer/gstreamer/-/issues/4700
Signed-off-by: Dikshita Agarwal <dikshita.agarwal(a)oss.qualcomm.com>
---
Changes in v3:
- Fixed the compilation issue
- Added stable(a)vger.kernel.org in Cc
- Link to v2: https://lore.kernel.org/r/20251104-iris-seek-fix-v2-1-c9dace39b43d@oss.qual…
Changes in v2:
- Removed spurious space and addressed other comments (Nicolas)
- Remove the unnecessary initializations (Self)
- Link to v1: https://lore.kernel.org/r/20251103-iris-seek-fix-v1-1-6db5f5e17722@oss.qual…
---
drivers/media/platform/qcom/iris/iris_common.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/media/platform/qcom/iris/iris_common.c b/drivers/media/platform/qcom/iris/iris_common.c
index 9fc663bdaf3fc989fe1273b4d4280a87f68de85d..7f1c7fe144f707accc2e3da65ce37cd6d9dfeaff 100644
--- a/drivers/media/platform/qcom/iris/iris_common.c
+++ b/drivers/media/platform/qcom/iris/iris_common.c
@@ -91,12 +91,14 @@ int iris_process_streamon_input(struct iris_inst *inst)
int iris_process_streamon_output(struct iris_inst *inst)
{
const struct iris_hfi_command_ops *hfi_ops = inst->core->hfi_ops;
- bool drain_active = false, drc_active = false;
enum iris_inst_sub_state clear_sub_state = 0;
+ bool drain_active, drc_active, first_ipsc;
int ret = 0;
iris_scale_power(inst);
+ first_ipsc = inst->sub_state & IRIS_INST_SUB_FIRST_IPSC;
+
drain_active = inst->sub_state & IRIS_INST_SUB_DRAIN &&
inst->sub_state & IRIS_INST_SUB_DRAIN_LAST;
@@ -108,7 +110,8 @@ int iris_process_streamon_output(struct iris_inst *inst)
else if (drain_active)
clear_sub_state = IRIS_INST_SUB_DRAIN | IRIS_INST_SUB_DRAIN_LAST;
- if (inst->domain == DECODER && inst->sub_state & IRIS_INST_SUB_INPUT_PAUSE) {
+ /* Input internal buffer reconfiguration required in case of resolution change */
+ if (first_ipsc || drc_active) {
ret = iris_alloc_and_queue_input_int_bufs(inst);
if (ret)
return ret;
---
base-commit: 163917839c0eea3bdfe3620f27f617a55fd76302
change-id: 20251103-iris-seek-fix-7a25af22fa52
Best regards,
--
Dikshita Agarwal <dikshita.agarwal(a)oss.qualcomm.com>
Hi Bjorn et al.
this series addresses a few issues that have come up with the helper
function that enables Atomic Op Requests to be initiated by PCI
enpoints:
A. Most in-tree users of this helper use it incorrectly [0].
B. On s390, Atomic Op Requests are enabled, although the helper
cannot know whether the root port is really supporting them.
C. Loop control in the helper function does not guarantee that a root
port's capabilities are ever checked against those requested by the
caller.
Address these issue with the following patches:
Patch 1: Make it harder to mis-use the enablement function,
Patch 2: Addresses issues B. and C.
I did test that issue B is fixed with these patches. Also, I verified
that Atomic Ops enablement on a Mellanox/Nvidia ConnectX-6 adapter
plugged straight into the root port of a x86 system still gets AtomicOp
Requests enabled. However, I did not test this with any PCIe switches
between root port and endpoint.
Ideally, both patches would be incorporated immediately, so we could
start correcting the mis-uses in the device drivers. I don't know of any
complaints when using Atomic Ops on devices where the driver is
mis-using the helper. Patch 2 however, is fixing an obseved issue.
[0]: https://lore.kernel.org/all/fbe34de16f5c0bf25a16f9819a57fdd81e5bb08c.camel@…
[1]: https://lore.kernel.org/all/20251105-mlxatomics-v1-0-10c71649e08d@linux.ibm…
Signed-off-by: Gerd Bayer <gbayer(a)linux.ibm.com>
---
Gerd Bayer (2):
PCI: AtomicOps: Define valid root port capabilities
PCI: AtomicOps: Fix logic in enable function
drivers/pci/pci.c | 43 +++++++++++++++++++++----------------------
include/uapi/linux/pci_regs.h | 8 ++++++++
2 files changed, 29 insertions(+), 22 deletions(-)
---
base-commit: e9a6fb0bcdd7609be6969112f3fbfcce3b1d4a7c
change-id: 20251106-fix_pciatops-7e8608eccb03
Best regards,
--
Gerd Bayer <gbayer(a)linux.ibm.com>
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x fad472efab0a805dd939f017c5b8669a786a4bcf
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025110805-fame-viability-c333@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From fad472efab0a805dd939f017c5b8669a786a4bcf Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda(a)kernel.org>
Date: Sun, 2 Nov 2025 22:28:53 +0100
Subject: [PATCH] rust: kbuild: workaround `rustdoc` doctests modifier bug
The `rustdoc` modifiers bug [1] was fixed in Rust 1.90.0 [2], for which
we added a workaround in commit abbf9a449441 ("rust: workaround `rustdoc`
target modifiers bug").
However, `rustdoc`'s doctest generation still has a similar issue [3],
being fixed at [4], which does not affect us because we apply the
workaround to both, and now, starting with Rust 1.91.0 (released
2025-10-30), `-Zsanitizer` is a target modifier too [5], which means we
fail with:
RUSTDOC TK rust/kernel/lib.rs
error: mixing `-Zsanitizer` will cause an ABI mismatch in crate `kernel`
--> rust/kernel/lib.rs:3:1
|
3 | //! The `kernel` crate.
| ^
|
= help: the `-Zsanitizer` flag modifies the ABI so Rust crates compiled with different values of this flag cannot be used together safely
= note: unset `-Zsanitizer` in this crate is incompatible with `-Zsanitizer=kernel-address` in dependency `core`
= help: set `-Zsanitizer=kernel-address` in this crate or unset `-Zsanitizer` in `core`
= help: if you are sure this will not cause problems, you may use `-Cunsafe-allow-abi-mismatch=sanitizer` to silence this error
A simple way around is to add the sanitizer to the list in the existing
workaround (especially if we had not started to pass the sanitizer
flags in the previous commit, since in that case that would not be
necessary). However, that still applies the workaround in more cases
than necessary.
Instead, only modify the doctests flags to ignore the check for
sanitizers, so that it is more local (and thus the compiler keeps checking
it for us in the normal `rustdoc` calls). Since the previous commit
already treated the `rustdoc` calls as kernel objects, this should allow
us in the future to easily remove this workaround when the time comes.
By the way, the `-Cunsafe-allow-abi-mismatch` flag overwrites previous
ones rather than appending, so it needs to be all done in the same flag.
Moreover, unknown modifiers are rejected, and thus we have to gate based
on the version too.
Finally, `-Zsanitizer-cfi-normalize-integers` is not affected (in Rust
1.91.0), so it is not needed in the workaround for the moment.
Cc: stable(a)vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs).
Link: https://github.com/rust-lang/rust/issues/144521 [1]
Link: https://github.com/rust-lang/rust/pull/144523 [2]
Link: https://github.com/rust-lang/rust/issues/146465 [3]
Link: https://github.com/rust-lang/rust/pull/148068 [4]
Link: https://github.com/rust-lang/rust/pull/138736 [5]
Reviewed-by: Alice Ryhl <aliceryhl(a)google.com>
Tested-by: Justin M. Forbes <jforbes(a)fedoraproject.org>
Link: https://patch.msgid.link/20251102212853.1505384-2-ojeda@kernel.org
Signed-off-by: Miguel Ojeda <ojeda(a)kernel.org>
diff --git a/rust/Makefile b/rust/Makefile
index a9fb9354b659..3e545c1a0ff4 100644
--- a/rust/Makefile
+++ b/rust/Makefile
@@ -69,6 +69,9 @@ core-edition := $(if $(call rustc-min-version,108700),2024,2021)
# the time being (https://github.com/rust-lang/rust/issues/144521).
rustdoc_modifiers_workaround := $(if $(call rustc-min-version,108800),-Cunsafe-allow-abi-mismatch=fixed-x18)
+# Similarly, for doctests (https://github.com/rust-lang/rust/issues/146465).
+doctests_modifiers_workaround := $(rustdoc_modifiers_workaround)$(if $(call rustc-min-version,109100),$(comma)sanitizer)
+
# `rustc` recognizes `--remap-path-prefix` since 1.26.0, but `rustdoc` only
# since Rust 1.81.0. Moreover, `rustdoc` ICEs on out-of-tree builds since Rust
# 1.82.0 (https://github.com/rust-lang/rust/issues/138520). Thus workaround both
@@ -236,7 +239,7 @@ quiet_cmd_rustdoc_test_kernel = RUSTDOC TK $<
--extern bindings --extern uapi \
--no-run --crate-name kernel -Zunstable-options \
--sysroot=/dev/null \
- $(rustdoc_modifiers_workaround) \
+ $(doctests_modifiers_workaround) \
--test-builder $(objtree)/scripts/rustdoc_test_builder \
$< $(rustdoc_test_kernel_quiet); \
$(objtree)/scripts/rustdoc_test_gen
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 0c716703965ffc5ef4311b65cb5d84a703784717
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025110954-lunacy-murkiness-7783@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0c716703965ffc5ef4311b65cb5d84a703784717 Mon Sep 17 00:00:00 2001
From: Bui Quang Minh <minhquangbui99(a)gmail.com>
Date: Thu, 30 Oct 2025 21:44:38 +0700
Subject: [PATCH] virtio-net: fix received length check in big packets
Since commit 4959aebba8c0 ("virtio-net: use mtu size as buffer length
for big packets"), when guest gso is off, the allocated size for big
packets is not MAX_SKB_FRAGS * PAGE_SIZE anymore but depends on
negotiated MTU. The number of allocated frags for big packets is stored
in vi->big_packets_num_skbfrags.
Because the host announced buffer length can be malicious (e.g. the host
vhost_net driver's get_rx_bufs is modified to announce incorrect
length), we need a check in virtio_net receive path. Currently, the
check is not adapted to the new change which can lead to NULL page
pointer dereference in the below while loop when receiving length that
is larger than the allocated one.
This commit fixes the received length check corresponding to the new
change.
Fixes: 4959aebba8c0 ("virtio-net: use mtu size as buffer length for big packets")
Cc: stable(a)vger.kernel.org
Signed-off-by: Bui Quang Minh <minhquangbui99(a)gmail.com>
Reviewed-by: Xuan Zhuo <xuanzhuo(a)linux.alibaba.com>
Tested-by: Lei Yang <leiyang(a)redhat.com>
Link: https://patch.msgid.link/20251030144438.7582-1-minhquangbui99@gmail.com
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index e6e650bc3bc3..8855a994e12b 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -910,17 +910,6 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
goto ok;
}
- /*
- * Verify that we can indeed put this data into a skb.
- * This is here to handle cases when the device erroneously
- * tries to receive more than is possible. This is usually
- * the case of a broken device.
- */
- if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) {
- net_dbg_ratelimited("%s: too much data\n", skb->dev->name);
- dev_kfree_skb(skb);
- return NULL;
- }
BUG_ON(offset >= PAGE_SIZE);
while (len) {
unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
@@ -2112,9 +2101,19 @@ static struct sk_buff *receive_big(struct net_device *dev,
struct virtnet_rq_stats *stats)
{
struct page *page = buf;
- struct sk_buff *skb =
- page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0);
+ struct sk_buff *skb;
+ /* Make sure that len does not exceed the size allocated in
+ * add_recvbuf_big.
+ */
+ if (unlikely(len > (vi->big_packets_num_skbfrags + 1) * PAGE_SIZE)) {
+ pr_debug("%s: rx error: len %u exceeds allocated size %lu\n",
+ dev->name, len,
+ (vi->big_packets_num_skbfrags + 1) * PAGE_SIZE);
+ goto err;
+ }
+
+ skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0);
u64_stats_add(&stats->bytes, len - vi->hdr_len);
if (unlikely(!skb))
goto err;
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x d968e99488c4b08259a324a89e4ed17bf36561a4
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025110916-yummy-cane-0741@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d968e99488c4b08259a324a89e4ed17bf36561a4 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter(a)intel.com>
Date: Fri, 24 Oct 2025 11:59:17 +0300
Subject: [PATCH] scsi: ufs: ufs-pci: Set
UFSHCD_QUIRK_PERFORM_LINK_STARTUP_ONCE for Intel ADL
Link startup becomes unreliable for Intel Alder Lake based host
controllers when a 2nd DME_LINKSTARTUP is issued unnecessarily. Employ
UFSHCD_QUIRK_PERFORM_LINK_STARTUP_ONCE to suppress that from happening.
Fixes: 7dc9fb47bc9a ("scsi: ufs: ufs-pci: Add support for Intel ADL")
Cc: stable(a)vger.kernel.org
Signed-off-by: Adrian Hunter <adrian.hunter(a)intel.com>
Reviewed-by: Bart Van Assche <bvanassche(a)acm.org>
Link: https://patch.msgid.link/20251024085918.31825-4-adrian.hunter@intel.com
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
diff --git a/drivers/ufs/host/ufshcd-pci.c b/drivers/ufs/host/ufshcd-pci.c
index 89f88b693850..5f65dfad1a71 100644
--- a/drivers/ufs/host/ufshcd-pci.c
+++ b/drivers/ufs/host/ufshcd-pci.c
@@ -428,7 +428,8 @@ static int ufs_intel_lkf_init(struct ufs_hba *hba)
static int ufs_intel_adl_init(struct ufs_hba *hba)
{
hba->nop_out_timeout = 200;
- hba->quirks |= UFSHCD_QUIRK_BROKEN_AUTO_HIBERN8;
+ hba->quirks |= UFSHCD_QUIRK_BROKEN_AUTO_HIBERN8 |
+ UFSHCD_QUIRK_PERFORM_LINK_STARTUP_ONCE;
hba->caps |= UFSHCD_CAP_WB_EN;
return ufs_intel_common_init(hba);
}
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x d34caa89a132cd69efc48361d4772251546fdb88
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025110906-retrieval-daunting-5fa7@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d34caa89a132cd69efc48361d4772251546fdb88 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter(a)intel.com>
Date: Fri, 24 Oct 2025 11:59:16 +0300
Subject: [PATCH] scsi: ufs: core: Add a quirk to suppress link_startup_again
ufshcd_link_startup() has a facility (link_startup_again) to issue
DME_LINKSTARTUP a 2nd time even though the 1st time was successful.
Some older hardware benefits from that, however the behaviour is
non-standard, and has been found to cause link startup to be unreliable
for some Intel Alder Lake based host controllers.
Add UFSHCD_QUIRK_PERFORM_LINK_STARTUP_ONCE to suppress
link_startup_again, in preparation for setting the quirk for affected
controllers.
Fixes: 7dc9fb47bc9a ("scsi: ufs: ufs-pci: Add support for Intel ADL")
Cc: stable(a)vger.kernel.org
Signed-off-by: Adrian Hunter <adrian.hunter(a)intel.com>
Reviewed-by: Bart Van Assche <bvanassche(a)acm.org>
Link: https://patch.msgid.link/20251024085918.31825-3-adrian.hunter@intel.com
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 2b76f543d072..453a99ec6282 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -5066,7 +5066,8 @@ static int ufshcd_link_startup(struct ufs_hba *hba)
* If UFS device isn't active then we will have to issue link startup
* 2 times to make sure the device state move to active.
*/
- if (!ufshcd_is_ufs_dev_active(hba))
+ if (!(hba->quirks & UFSHCD_QUIRK_PERFORM_LINK_STARTUP_ONCE) &&
+ !ufshcd_is_ufs_dev_active(hba))
link_startup_again = true;
link_startup:
diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index 9425cfd9d00e..0f95576bf1f6 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -688,6 +688,13 @@ enum ufshcd_quirks {
* single doorbell mode.
*/
UFSHCD_QUIRK_BROKEN_LSDBS_CAP = 1 << 25,
+
+ /*
+ * This quirk indicates that DME_LINKSTARTUP should not be issued a 2nd
+ * time (refer link_startup_again) after the 1st time was successful,
+ * because it causes link startup to become unreliable.
+ */
+ UFSHCD_QUIRK_PERFORM_LINK_STARTUP_ONCE = 1 << 26,
};
enum ufshcd_caps {
mei_register() fails to release the device reference in error paths
after device_initialize(). During normal device registration, the
reference is properly handled through mei_deregister() which calls
device_destroy(). However, in error handling paths (such as cdev_alloc
failure, cdev_add failure, etc.), missing put_device() calls cause
reference count leaks, preventing the device's release function
(mei_device_release) from being called and resulting in memory leaks
of mei_device.
Found by code review.
Cc: stable(a)vger.kernel.org
Fixes: 7704e6be4ed2 ("mei: hook mei_device on class device")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
drivers/misc/mei/main.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c
index 86a73684a373..6f26d5160788 100644
--- a/drivers/misc/mei/main.c
+++ b/drivers/misc/mei/main.c
@@ -1307,6 +1307,7 @@ int mei_register(struct mei_device *dev, struct device *parent)
err_del_cdev:
cdev_del(dev->cdev);
err:
+ put_device(&dev->dev);
mei_minor_free(minor);
return ret;
}
--
2.17.1
First, we can't assume pipe == crtc index. If a pipe is fused off in
between, it no longer holds. intel_crtc_for_pipe() is the only proper
way to get from a pipe to the corresponding crtc.
Second, drivers aren't supposed to access or index drm->vblank[]
directly. There's drm_crtc_vblank_crtc() for this.
Use both functions to fix the pipe to vblank conversion.
Fixes: f02658c46cf7 ("drm/i915/psr: Add mechanism to notify PSR of pipe enable/disable")
Cc: Jouni Högander <jouni.hogander(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v6.16+
Signed-off-by: Jani Nikula <jani.nikula(a)intel.com>
---
drivers/gpu/drm/i915/display/intel_psr.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index 05014ffe3ce1..c77a92ea7919 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -932,7 +932,8 @@ static bool is_dc5_dc6_blocked(struct intel_dp *intel_dp)
{
struct intel_display *display = to_intel_display(intel_dp);
u32 current_dc_state = intel_display_power_get_current_dc_state(display);
- struct drm_vblank_crtc *vblank = &display->drm->vblank[intel_dp->psr.pipe];
+ struct intel_crtc *crtc = intel_crtc_for_pipe(display, intel_dp->psr.pipe);
+ struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(&crtc->base);
return (current_dc_state != DC_STATE_EN_UPTO_DC5 &&
current_dc_state != DC_STATE_EN_UPTO_DC6) ||
--
2.47.3
From: Bjorn Helgaas <bhelgaas(a)google.com>
Previously meson_pcie_link_up() only returned true if the link was in the
L0 state. This was incorrect because hardware autonomously manages
transitions between L0, L0s, and L1 while both components on the link stay
in D0. Those states should all be treated as "link is active".
Returning false when the device was in L0s or L1 broke config accesses
because dw_pcie_other_conf_map_bus() fails if the link is down, which
caused errors like this:
meson-pcie fc000000.pcie: error: wait linkup timeout
pci 0000:01:00.0: BAR 0: error updating (0xfc700004 != 0xffffffff)
Remove the LTSSM state check, timeout, speed check, and error message from
meson_pcie_link_up(), the dw_pcie_ops.link_up() method, so it is a simple
boolean check of whether the link is active. Timeouts and and error
messages are handled at a higher level, e.g., dw_pcie_wait_for_link().
Fixes: 9c0ef6d34fdb ("PCI: amlogic: Add the Amlogic Meson PCIe controller driver")
Reported-by: Linnaea Lavia <linnaea-von-lavia(a)live.com>
Closes: https://lore.kernel.org/r/DM4PR05MB102707B8CDF84D776C39F22F2C7F0A@DM4PR05MB…
Signed-off-by: Bjorn Helgaas <bhelgaas(a)google.com>
Tested-by: Linnaea Lavia <linnaea-von-lavia(a)live.com>
Cc: stable(a)vger.kernel.org
---
drivers/pci/controller/dwc/pci-meson.c | 36 +++-----------------------
1 file changed, 3 insertions(+), 33 deletions(-)
diff --git a/drivers/pci/controller/dwc/pci-meson.c b/drivers/pci/controller/dwc/pci-meson.c
index 787469d1b396..13685d89227a 100644
--- a/drivers/pci/controller/dwc/pci-meson.c
+++ b/drivers/pci/controller/dwc/pci-meson.c
@@ -338,40 +338,10 @@ static struct pci_ops meson_pci_ops = {
static bool meson_pcie_link_up(struct dw_pcie *pci)
{
struct meson_pcie *mp = to_meson_pcie(pci);
- struct device *dev = pci->dev;
- u32 speed_okay = 0;
- u32 cnt = 0;
- u32 state12, state17, smlh_up, ltssm_up, rdlh_up;
+ u32 state12;
- do {
- state12 = meson_cfg_readl(mp, PCIE_CFG_STATUS12);
- state17 = meson_cfg_readl(mp, PCIE_CFG_STATUS17);
- smlh_up = IS_SMLH_LINK_UP(state12);
- rdlh_up = IS_RDLH_LINK_UP(state12);
- ltssm_up = IS_LTSSM_UP(state12);
-
- if (PM_CURRENT_STATE(state17) < PCIE_GEN3)
- speed_okay = 1;
-
- if (smlh_up)
- dev_dbg(dev, "smlh_link_up is on\n");
- if (rdlh_up)
- dev_dbg(dev, "rdlh_link_up is on\n");
- if (ltssm_up)
- dev_dbg(dev, "ltssm_up is on\n");
- if (speed_okay)
- dev_dbg(dev, "speed_okay\n");
-
- if (smlh_up && rdlh_up && ltssm_up && speed_okay)
- return true;
-
- cnt++;
-
- udelay(10);
- } while (cnt < WAIT_LINKUP_TIMEOUT);
-
- dev_err(dev, "error: wait linkup timeout\n");
- return false;
+ state12 = meson_cfg_readl(mp, PCIE_CFG_STATUS12);
+ return IS_SMLH_LINK_UP(state12) && IS_RDLH_LINK_UP(state12);
}
static int meson_pcie_host_init(struct dw_pcie_rp *pp)
--
2.43.0
syzkaller discovered the following crash: (kernel BUG)
[ 44.607039] ------------[ cut here ]------------
[ 44.607422] kernel BUG at mm/userfaultfd.c:2067!
[ 44.608148] Oops: invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN NOPTI
[ 44.608814] CPU: 1 UID: 0 PID: 2475 Comm: reproducer Not tainted 6.16.0-rc6 #1 PREEMPT(none)
[ 44.609635] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
[ 44.610695] RIP: 0010:userfaultfd_release_all+0x3a8/0x460
<snip other registers, drop unreliable trace>
[ 44.617726] Call Trace:
[ 44.617926] <TASK>
[ 44.619284] userfaultfd_release+0xef/0x1b0
[ 44.620976] __fput+0x3f9/0xb60
[ 44.621240] fput_close_sync+0x110/0x210
[ 44.622222] __x64_sys_close+0x8f/0x120
[ 44.622530] do_syscall_64+0x5b/0x2f0
[ 44.622840] entry_SYSCALL_64_after_hwframe+0x76/0x7e
[ 44.623244] RIP: 0033:0x7f365bb3f227
Kernel panics because it detects UFFD inconsistency during
userfaultfd_release_all(). Specifically, a VMA which has a valid pointer
to vma->vm_userfaultfd_ctx, but no UFFD flags in vma->vm_flags.
The inconsistency is caused in ksm_madvise(): when user calls madvise()
with MADV_UNMEARGEABLE on a VMA that is registered for UFFD in MINOR
mode, it accidentally clears all flags stored in the upper 32 bits of
vma->vm_flags.
Assuming x86_64 kernel build, unsigned long is 64-bit and unsigned int
and int are 32-bit wide. This setup causes the following mishap during
the &= ~VM_MERGEABLE assignment.
VM_MERGEABLE is a 32-bit constant of type unsigned int, 0x8000'0000.
After ~ is applied, it becomes 0x7fff'ffff unsigned int, which is then
promoted to unsigned long before the & operation. This promotion fills
upper 32 bits with leading 0s, as we're doing unsigned conversion (and
even for a signed conversion, this wouldn't help as the leading bit is
0). & operation thus ends up AND-ing vm_flags with 0x0000'0000'7fff'ffff
instead of intended 0xffff'ffff'7fff'ffff and hence accidentally clears
the upper 32-bits of its value.
Fix it by changing `VM_MERGEABLE` constant to unsigned long, using the
BIT() macro.
Note: other VM_* flags are not affected:
This only happens to the VM_MERGEABLE flag, as the other VM_* flags are
all constants of type int and after ~ operation, they end up with
leading 1 and are thus converted to unsigned long with leading 1s.
Note 2:
After commit 31defc3b01d9 ("userfaultfd: remove (VM_)BUG_ON()s"), this is
no longer a kernel BUG, but a WARNING at the same place:
[ 45.595973] WARNING: CPU: 1 PID: 2474 at mm/userfaultfd.c:2067
but the root-cause (flag-drop) remains the same.
Fixes: 7677f7fd8be76 ("userfaultfd: add minor fault registration mode")
Signed-off-by: Jakub Acs <acsjakub(a)amazon.de>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Xu Xin <xu.xin16(a)zte.com.cn>
Cc: Chengming Zhou <chengming.zhou(a)linux.dev>
Cc: Peter Xu <peterx(a)redhat.com>
Cc: Axel Rasmussen <axelrasmussen(a)google.com>
Cc: linux-mm(a)kvack.org
Cc: linux-kernel(a)vger.kernel.org
Cc: stable(a)vger.kernel.org
---
include/linux/mm.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1ae97a0b8ec7..c6794d0e24eb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -296,7 +296,7 @@ extern unsigned int kobjsize(const void *objp);
#define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */
#define VM_HUGEPAGE 0x20000000 /* MADV_HUGEPAGE marked this vma */
#define VM_NOHUGEPAGE 0x40000000 /* MADV_NOHUGEPAGE marked this vma */
-#define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */
+#define VM_MERGEABLE BIT(31) /* KSM may merge identical pages */
#ifdef CONFIG_ARCH_USES_HIGH_VMA_FLAGS
#define VM_HIGH_ARCH_BIT_0 32 /* bit only usable on 64-bit architectures */
--
2.47.3
Amazon Web Services Development Center Germany GmbH
Tamara-Danz-Str. 13
10243 Berlin
Geschaeftsfuehrung: Christian Schlaeger
Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B
Sitz: Berlin
Ust-ID: DE 365 538 597
Hi,
This compile tested only series aims to fix the DBI parsing issue repored in
[1]. The issue stems from the fact that the DT and binding described 'dbi'
region as 'elbi' from the start.
Now, both binding and DTs are fixed and the driver is reworked to work with both
old and new DTs.
Note: The driver patch is OK to be backported till 6.2 where the common resource
parsing code was introduced. But the DTS patch should not be backported. And I'm
not sure about the backporting of the binding.
Please test this series on the Meson board with old and new DTs.
- Mani
[1] https://lore.kernel.org/linux-pci/DM4PR05MB102707B8CDF84D776C39F22F2C7F0A@D…
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)oss.qualcomm.com>
---
Resending as the git sendemail config got messed up
---
Manivannan Sadhasivam (3):
dt-bindings: PCI: amlogic: Fix the register name of the DBI region
arm64: dts: amlogic: Fix the register name of the 'DBI' region
PCI: meson: Fix parsing the DBI register region
.../devicetree/bindings/pci/amlogic,axg-pcie.yaml | 6 +++---
arch/arm64/boot/dts/amlogic/meson-axg.dtsi | 4 ++--
arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi | 2 +-
drivers/pci/controller/dwc/pci-meson.c | 18 +++++++++++++++---
drivers/pci/controller/dwc/pcie-designware.c | 12 +++++++-----
5 files changed, 28 insertions(+), 14 deletions(-)
---
base-commit: 3a8660878839faadb4f1a6dd72c3179c1df56787
change-id: 20251031-pci-meson-fix-c8b651bc6662
Best regards,
--
Manivannan Sadhasivam <manivannan.sadhasivam(a)oss.qualcomm.com>
The quilt patch titled
Subject: kho: warn and exit when unpreserved page wasn't preserved
has been removed from the -mm tree. Its filename was
kho-warn-and-exit-when-unpreserved-page-wasnt-preserved.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Pratyush Yadav <pratyush(a)kernel.org>
Subject: kho: warn and exit when unpreserved page wasn't preserved
Date: Mon, 3 Nov 2025 19:02:32 +0100
Calling __kho_unpreserve() on a pair of (pfn, end_pfn) that wasn't
preserved is a bug. Currently, if that is done, the physxa or bits can be
NULL. This results in a soft lockup since a NULL physxa or bits results
in redoing the loop without ever making any progress.
Return when physxa or bits are not found, but WARN first to loudly
indicate invalid behaviour.
Link: https://lkml.kernel.org/r/20251103180235.71409-3-pratyush@kernel.org
Fixes: fc33e4b44b27 ("kexec: enable KHO support for memory preservation")
Signed-off-by: Pratyush Yadav <pratyush(a)kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt(a)kernel.org>
Cc: Alexander Graf <graf(a)amazon.com>
Cc: Baoquan He <bhe(a)redhat.com>
Cc: Pasha Tatashin <pasha.tatashin(a)soleen.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/kexec_handover.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
--- a/kernel/kexec_handover.c~kho-warn-and-exit-when-unpreserved-page-wasnt-preserved
+++ a/kernel/kexec_handover.c
@@ -171,12 +171,12 @@ static void __kho_unpreserve(struct kho_
const unsigned long pfn_high = pfn >> order;
physxa = xa_load(&track->orders, order);
- if (!physxa)
- continue;
+ if (WARN_ON_ONCE(!physxa))
+ return;
bits = xa_load(&physxa->phys_bits, pfn_high / PRESERVE_BITS);
- if (!bits)
- continue;
+ if (WARN_ON_ONCE(!bits))
+ return;
clear_bit(pfn_high % PRESERVE_BITS, bits->preserve);
_
Patches currently in -mm which might be from pratyush(a)kernel.org are
maintainers-add-myself-as-a-reviewer-for-kho.patch
liveupdate-luo_file-add-private-argument-to-store-runtime-state.patch
The quilt patch titled
Subject: mm/secretmem: fix use-after-free race in fault handler
has been removed from the -mm tree. Its filename was
mm-secretmem-fix-use-after-free-race-in-fault-handler.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Lance Yang <lance.yang(a)linux.dev>
Subject: mm/secretmem: fix use-after-free race in fault handler
Date: Fri, 31 Oct 2025 20:09:55 +0800
When a page fault occurs in a secret memory file created with
`memfd_secret(2)`, the kernel will allocate a new folio for it, mark the
underlying page as not-present in the direct map, and add it to the file
mapping.
If two tasks cause a fault in the same page concurrently, both could end
up allocating a folio and removing the page from the direct map, but only
one would succeed in adding the folio to the file mapping. The task that
failed undoes the effects of its attempt by (a) freeing the folio again
and (b) putting the page back into the direct map. However, by doing
these two operations in this order, the page becomes available to the
allocator again before it is placed back in the direct mapping.
If another task attempts to allocate the page between (a) and (b), and the
kernel tries to access it via the direct map, it would result in a
supervisor not-present page fault.
Fix the ordering to restore the direct map before the folio is freed.
Link: https://lkml.kernel.org/r/20251031120955.92116-1-lance.yang@linux.dev
Fixes: 1507f51255c9 ("mm: introduce memfd_secret system call to create "secret" memory areas")
Signed-off-by: Lance Yang <lance.yang(a)linux.dev>
Reported-by: Google Big Sleep <big-sleep-vuln-reports(a)google.com>
Closes: https://lore.kernel.org/linux-mm/CAEXGt5QeDpiHTu3K9tvjUTPqo+d-=wuCNYPa+6sWK…
Acked-by: David Hildenbrand <david(a)redhat.com>
Reviewed-by: Mike Rapoport (Microsoft) <rppt(a)kernel.org>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/secretmem.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/secretmem.c~mm-secretmem-fix-use-after-free-race-in-fault-handler
+++ a/mm/secretmem.c
@@ -82,13 +82,13 @@ retry:
__folio_mark_uptodate(folio);
err = filemap_add_folio(mapping, folio, offset, gfp);
if (unlikely(err)) {
- folio_put(folio);
/*
* If a split of large page was required, it
* already happened when we marked the page invalid
* which guarantees that this call won't fail
*/
set_direct_map_default_noflush(folio_page(folio, 0));
+ folio_put(folio);
if (err == -EEXIST)
goto retry;
_
Patches currently in -mm which might be from lance.yang(a)linux.dev are
mm-khugepaged-guard-is_zero_pfn-calls-with-pte_present.patch
The quilt patch titled
Subject: nilfs2: avoid having an active sc_timer before freeing sci
has been removed from the -mm tree. Its filename was
nilfs2-avoid-having-an-active-sc_timer-before-freeing-sci.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Edward Adam Davis <eadavis(a)qq.com>
Subject: nilfs2: avoid having an active sc_timer before freeing sci
Date: Thu, 30 Oct 2025 07:51:52 +0900
Because kthread_stop did not stop sc_task properly and returned -EINTR,
the sc_timer was not properly closed, ultimately causing the problem [1]
reported by syzbot when freeing sci due to the sc_timer not being closed.
Because the thread sc_task main function nilfs_segctor_thread() returns 0
when it succeeds, when the return value of kthread_stop() is not 0 in
nilfs_segctor_destroy(), we believe that it has not properly closed
sc_timer.
We use timer_shutdown_sync() to sync wait for sc_timer to shutdown, and
set the value of sc_task to NULL under the protection of lock
sc_state_lock, so as to avoid the issue caused by sc_timer not being
properly shutdowned.
[1]
ODEBUG: free active (active state 0) object: 00000000dacb411a object type: timer_list hint: nilfs_construction_timeout
Call trace:
nilfs_segctor_destroy fs/nilfs2/segment.c:2811 [inline]
nilfs_detach_log_writer+0x668/0x8cc fs/nilfs2/segment.c:2877
nilfs_put_super+0x4c/0x12c fs/nilfs2/super.c:509
Link: https://lkml.kernel.org/r/20251029225226.16044-1-konishi.ryusuke@gmail.com
Fixes: 3f66cc261ccb ("nilfs2: use kthread_create and kthread_stop for the log writer thread")
Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Reported-by: syzbot+24d8b70f039151f65590(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=24d8b70f039151f65590
Tested-by: syzbot+24d8b70f039151f65590(a)syzkaller.appspotmail.com
Signed-off-by: Edward Adam Davis <eadavis(a)qq.com>
Cc: <stable(a)vger.kernel.org> [6.12+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/nilfs2/segment.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
--- a/fs/nilfs2/segment.c~nilfs2-avoid-having-an-active-sc_timer-before-freeing-sci
+++ a/fs/nilfs2/segment.c
@@ -2768,7 +2768,12 @@ static void nilfs_segctor_destroy(struct
if (sci->sc_task) {
wake_up(&sci->sc_wait_daemon);
- kthread_stop(sci->sc_task);
+ if (kthread_stop(sci->sc_task)) {
+ spin_lock(&sci->sc_state_lock);
+ sci->sc_task = NULL;
+ timer_shutdown_sync(&sci->sc_timer);
+ spin_unlock(&sci->sc_state_lock);
+ }
}
spin_lock(&sci->sc_state_lock);
_
Patches currently in -mm which might be from eadavis(a)qq.com are
The quilt patch titled
Subject: scripts/decode_stacktrace.sh: fix build ID and PC source parsing
has been removed from the -mm tree. Its filename was
scripts-decode_stacktracesh-fix-build-id-and-pc-source-parsing.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Carlos Llamas <cmllamas(a)google.com>
Subject: scripts/decode_stacktrace.sh: fix build ID and PC source parsing
Date: Thu, 30 Oct 2025 01:03:33 +0000
Support for parsing PC source info in stacktraces (e.g. '(P)') was added
in commit 2bff77c665ed ("scripts/decode_stacktrace.sh: fix decoding of
lines with an additional info"). However, this logic was placed after the
build ID processing. This incorrect order fails to parse lines containing
both elements, e.g.:
drm_gem_mmap_obj+0x114/0x200 [drm 03d0564e0529947d67bb2008c3548be77279fd27] (P)
This patch fixes the problem by extracting the PC source info first and
then processing the module build ID. With this change, the line above is
now properly parsed as such:
drm_gem_mmap_obj (./include/linux/mmap_lock.h:212 ./include/linux/mm.h:811 drivers/gpu/drm/drm_gem.c:1177) drm (P)
While here, also add a brief explanation the build ID section.
Link: https://lkml.kernel.org/r/20251030010347.2731925-1-cmllamas@google.com
Fixes: 2bff77c665ed ("scripts/decode_stacktrace.sh: fix decoding of lines with an additional info")
Signed-off-by: Carlos Llamas <cmllamas(a)google.com>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Reviewed-by: Luca Ceresoli <luca.ceresoli(a)bootlin.com>
Cc: Breno Leitao <leitao(a)debian.org>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Marc Rutland <mark.rutland(a)arm.com>
Cc: Mark Brown <broonie(a)kernel.org>
Cc: Matthieu Baerts <matttbe(a)kernel.org>
Cc: Miroslav Benes <mbenes(a)suse.cz>
Cc: Puranjay Mohan <puranjay(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
scripts/decode_stacktrace.sh | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
--- a/scripts/decode_stacktrace.sh~scripts-decode_stacktracesh-fix-build-id-and-pc-source-parsing
+++ a/scripts/decode_stacktrace.sh
@@ -277,12 +277,6 @@ handle_line() {
fi
done
- if [[ ${words[$last]} =~ ^[0-9a-f]+\] ]]; then
- words[$last-1]="${words[$last-1]} ${words[$last]}"
- unset words[$last] spaces[$last]
- last=$(( $last - 1 ))
- fi
-
# Extract info after the symbol if present. E.g.:
# func_name+0x54/0x80 (P)
# ^^^
@@ -294,6 +288,14 @@ handle_line() {
unset words[$last] spaces[$last]
last=$(( $last - 1 ))
fi
+
+ # Join module name with its build id if present, as these were
+ # split during tokenization (e.g. "[module" and "modbuildid]").
+ if [[ ${words[$last]} =~ ^[0-9a-f]+\] ]]; then
+ words[$last-1]="${words[$last-1]} ${words[$last]}"
+ unset words[$last] spaces[$last]
+ last=$(( $last - 1 ))
+ fi
if [[ ${words[$last]} =~ \[([^]]+)\] ]]; then
module=${words[$last]}
_
Patches currently in -mm which might be from cmllamas(a)google.com are
The quilt patch titled
Subject: mm/damon/sysfs: change next_update_jiffies to a global variable
has been removed from the -mm tree. Its filename was
mm-damon-sysfs-change-next_update_jiffies-to-a-global-variable.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Quanmin Yan <yanquanmin1(a)huawei.com>
Subject: mm/damon/sysfs: change next_update_jiffies to a global variable
Date: Thu, 30 Oct 2025 10:07:46 +0800
In DAMON's damon_sysfs_repeat_call_fn(), time_before() is used to compare
the current jiffies with next_update_jiffies to determine whether to
update the sysfs files at this moment.
On 32-bit systems, the kernel initializes jiffies to "-5 minutes" to make
jiffies wrap bugs appear earlier. However, this causes time_before() in
damon_sysfs_repeat_call_fn() to unexpectedly return true during the first
5 minutes after boot on 32-bit systems (see [1] for more explanation,
which fixes another jiffies-related issue before). As a result, DAMON
does not update sysfs files during that period.
There is also an issue unrelated to the system's word size[2]: if the
user stops DAMON just after next_update_jiffies is updated and restarts
it after 'refresh_ms' or a longer delay, next_update_jiffies will retain
an older value, causing time_before() to return false and the update to
happen earlier than expected.
Fix these issues by making next_update_jiffies a global variable and
initializing it each time DAMON is started.
Link: https://lkml.kernel.org/r/20251030020746.967174-3-yanquanmin1@huawei.com
Link: https://lkml.kernel.org/r/20250822025057.1740854-1-ekffu200098@gmail.com [1]
Link: https://lore.kernel.org/all/20251029013038.66625-1-sj@kernel.org/ [2]
Fixes: d809a7c64ba8 ("mm/damon/sysfs: implement refresh_ms file internal work")
Suggested-by: SeongJae Park <sj(a)kernel.org>
Reviewed-by: SeongJae Park <sj(a)kernel.org>
Signed-off-by: Quanmin Yan <yanquanmin1(a)huawei.com>
Cc: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Cc: ze zuo <zuoze1(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/damon/sysfs.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
--- a/mm/damon/sysfs.c~mm-damon-sysfs-change-next_update_jiffies-to-a-global-variable
+++ a/mm/damon/sysfs.c
@@ -1552,16 +1552,17 @@ static struct damon_ctx *damon_sysfs_bui
return ctx;
}
+static unsigned long damon_sysfs_next_update_jiffies;
+
static int damon_sysfs_repeat_call_fn(void *data)
{
struct damon_sysfs_kdamond *sysfs_kdamond = data;
- static unsigned long next_update_jiffies;
if (!sysfs_kdamond->refresh_ms)
return 0;
- if (time_before(jiffies, next_update_jiffies))
+ if (time_before(jiffies, damon_sysfs_next_update_jiffies))
return 0;
- next_update_jiffies = jiffies +
+ damon_sysfs_next_update_jiffies = jiffies +
msecs_to_jiffies(sysfs_kdamond->refresh_ms);
if (!mutex_trylock(&damon_sysfs_lock))
@@ -1607,6 +1608,9 @@ static int damon_sysfs_turn_damon_on(str
}
kdamond->damon_ctx = ctx;
+ damon_sysfs_next_update_jiffies =
+ jiffies + msecs_to_jiffies(kdamond->refresh_ms);
+
repeat_call_control->fn = damon_sysfs_repeat_call_fn;
repeat_call_control->data = kdamond;
repeat_call_control->repeat = true;
_
Patches currently in -mm which might be from yanquanmin1(a)huawei.com are
mm-damon-add-a-min_sz_region-parameter-to-damon_set_region_biggest_system_ram_default.patch
mm-damon-reclaim-use-min_sz_region-for-core-address-alignment-when-setting-regions.patch
The quilt patch titled
Subject: mm/damon/stat: change last_refresh_jiffies to a global variable
has been removed from the -mm tree. Its filename was
mm-damon-stat-change-last_refresh_jiffies-to-a-global-variable.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Quanmin Yan <yanquanmin1(a)huawei.com>
Subject: mm/damon/stat: change last_refresh_jiffies to a global variable
Date: Thu, 30 Oct 2025 10:07:45 +0800
Patch series "mm/damon: fixes for the jiffies-related issues", v2.
On 32-bit systems, the kernel initializes jiffies to "-5 minutes" to make
jiffies wrap bugs appear earlier. However, this may cause the
time_before() series of functions to return unexpected values, resulting
in DAMON not functioning as intended. Meanwhile, similar issues exist in
some specific user operation scenarios.
This patchset addresses these issues. The first patch is about the
DAMON_STAT module, and the second patch is about the core layer's sysfs.
This patch (of 2):
In DAMON_STAT's damon_stat_damon_call_fn(), time_before_eq() is used to
avoid unnecessarily frequent stat update.
On 32-bit systems, the kernel initializes jiffies to "-5 minutes" to make
jiffies wrap bugs appear earlier. However, this causes time_before_eq()
in DAMON_STAT to unexpectedly return true during the first 5 minutes after
boot on 32-bit systems (see [1] for more explanation, which fixes another
jiffies-related issue before). As a result, DAMON_STAT does not update
any monitoring results during that period, which becomes more confusing
when DAMON_STAT_ENABLED_DEFAULT is enabled.
There is also an issue unrelated to the system's word size[2]: if the user
stops DAMON_STAT just after last_refresh_jiffies is updated and restarts
it after 5 seconds or a longer delay, last_refresh_jiffies will retain an
older value, causing time_before_eq() to return false and the update to
happen earlier than expected.
Fix these issues by making last_refresh_jiffies a global variable and
initializing it each time DAMON_STAT is started.
Link: https://lkml.kernel.org/r/20251030020746.967174-2-yanquanmin1@huawei.com
Link: https://lkml.kernel.org/r/20250822025057.1740854-1-ekffu200098@gmail.com [1]
Link: https://lore.kernel.org/all/20251028143250.50144-1-sj@kernel.org/ [2]
Fixes: fabdd1e911da ("mm/damon/stat: calculate and expose estimated memory bandwidth")
Signed-off-by: Quanmin Yan <yanquanmin1(a)huawei.com>
Suggested-by: SeongJae Park <sj(a)kernel.org>
Reviewed-by: SeongJae Park <sj(a)kernel.org>
Cc: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Cc: ze zuo <zuoze1(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/damon/stat.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
--- a/mm/damon/stat.c~mm-damon-stat-change-last_refresh_jiffies-to-a-global-variable
+++ a/mm/damon/stat.c
@@ -46,6 +46,8 @@ MODULE_PARM_DESC(aggr_interval_us,
static struct damon_ctx *damon_stat_context;
+static unsigned long damon_stat_last_refresh_jiffies;
+
static void damon_stat_set_estimated_memory_bandwidth(struct damon_ctx *c)
{
struct damon_target *t;
@@ -130,13 +132,12 @@ static void damon_stat_set_idletime_perc
static int damon_stat_damon_call_fn(void *data)
{
struct damon_ctx *c = data;
- static unsigned long last_refresh_jiffies;
/* avoid unnecessarily frequent stat update */
- if (time_before_eq(jiffies, last_refresh_jiffies +
+ if (time_before_eq(jiffies, damon_stat_last_refresh_jiffies +
msecs_to_jiffies(5 * MSEC_PER_SEC)))
return 0;
- last_refresh_jiffies = jiffies;
+ damon_stat_last_refresh_jiffies = jiffies;
aggr_interval_us = c->attrs.aggr_interval;
damon_stat_set_estimated_memory_bandwidth(c);
@@ -210,6 +211,8 @@ static int damon_stat_start(void)
err = damon_start(&damon_stat_context, 1, true);
if (err)
return err;
+
+ damon_stat_last_refresh_jiffies = jiffies;
call_control.data = damon_stat_context;
return damon_call(damon_stat_context, &call_control);
}
_
Patches currently in -mm which might be from yanquanmin1(a)huawei.com are
mm-damon-add-a-min_sz_region-parameter-to-damon_set_region_biggest_system_ram_default.patch
mm-damon-reclaim-use-min_sz_region-for-core-address-alignment-when-setting-regions.patch
The quilt patch titled
Subject: mm/mremap: honour writable bit in mremap pte batching
has been removed from the -mm tree. Its filename was
mm-mremap-honour-writable-bit-in-mremap-pte-batching.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Dev Jain <dev.jain(a)arm.com>
Subject: mm/mremap: honour writable bit in mremap pte batching
Date: Tue, 28 Oct 2025 12:09:52 +0530
Currently mremap folio pte batch ignores the writable bit during figuring
out a set of similar ptes mapping the same folio. Suppose that the first
pte of the batch is writable while the others are not - set_ptes will end
up setting the writable bit on the other ptes, which is a violation of
mremap semantics. Therefore, use FPB_RESPECT_WRITE to check the writable
bit while determining the pte batch.
Link: https://lkml.kernel.org/r/20251028063952.90313-1-dev.jain@arm.com
Signed-off-by: Dev Jain <dev.jain(a)arm.com>
Fixes: f822a9a81a31 ("mm: optimize mremap() by PTE batching")
Reported-by: David Hildenbrand <david(a)redhat.com>
Debugged-by: David Hildenbrand <david(a)redhat.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Acked-by: Pedro Falcato <pfalcato(a)suse.de>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Barry Song <baohua(a)kernel.org>
Cc: Jann Horn <jannh(a)google.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org> [6.17+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/mremap.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/mremap.c~mm-mremap-honour-writable-bit-in-mremap-pte-batching
+++ a/mm/mremap.c
@@ -187,7 +187,7 @@ static int mremap_folio_pte_batch(struct
if (!folio || !folio_test_large(folio))
return 1;
- return folio_pte_batch(folio, ptep, pte, max_nr);
+ return folio_pte_batch_flags(folio, NULL, ptep, &pte, max_nr, FPB_RESPECT_WRITE);
}
static int move_ptes(struct pagetable_move_control *pmc,
_
Patches currently in -mm which might be from dev.jain(a)arm.com are
The quilt patch titled
Subject: gcov: add support for GCC 15
has been removed from the -mm tree. Its filename was
gcov-add-support-for-gcc-15.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Peter Oberparleiter <oberpar(a)linux.ibm.com>
Subject: gcov: add support for GCC 15
Date: Tue, 28 Oct 2025 12:51:25 +0100
Using gcov on kernels compiled with GCC 15 results in truncated 16-byte
long .gcda files with no usable data. To fix this, update GCOV_COUNTERS
to match the value defined by GCC 15.
Tested with GCC 14.3.0 and GCC 15.2.0.
Link: https://lkml.kernel.org/r/20251028115125.1319410-1-oberpar@linux.ibm.com
Signed-off-by: Peter Oberparleiter <oberpar(a)linux.ibm.com>
Reported-by: Matthieu Baerts <matttbe(a)kernel.org>
Closes: https://github.com/linux-test-project/lcov/issues/445
Tested-by: Matthieu Baerts <matttbe(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/gcov/gcc_4_7.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
--- a/kernel/gcov/gcc_4_7.c~gcov-add-support-for-gcc-15
+++ a/kernel/gcov/gcc_4_7.c
@@ -18,7 +18,9 @@
#include <linux/mm.h>
#include "gcov.h"
-#if (__GNUC__ >= 14)
+#if (__GNUC__ >= 15)
+#define GCOV_COUNTERS 10
+#elif (__GNUC__ >= 14)
#define GCOV_COUNTERS 9
#elif (__GNUC__ >= 10)
#define GCOV_COUNTERS 8
_
Patches currently in -mm which might be from oberpar(a)linux.ibm.com are
The quilt patch titled
Subject: mm/mm_init: fix hash table order logging in alloc_large_system_hash()
has been removed from the -mm tree. Its filename was
mm-mm_init-fix-hash-table-order-logging-in-alloc_large_system_hash.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: "Isaac J. Manjarres" <isaacmanjarres(a)google.com>
Subject: mm/mm_init: fix hash table order logging in alloc_large_system_hash()
Date: Tue, 28 Oct 2025 12:10:12 -0700
When emitting the order of the allocation for a hash table,
alloc_large_system_hash() unconditionally subtracts PAGE_SHIFT from log
base 2 of the allocation size. This is not correct if the allocation size
is smaller than a page, and yields a negative value for the order as seen
below:
TCP established hash table entries: 32 (order: -4, 256 bytes, linear) TCP
bind hash table entries: 32 (order: -2, 1024 bytes, linear)
Use get_order() to compute the order when emitting the hash table
information to correctly handle cases where the allocation size is smaller
than a page:
TCP established hash table entries: 32 (order: 0, 256 bytes, linear) TCP
bind hash table entries: 32 (order: 0, 1024 bytes, linear)
Link: https://lkml.kernel.org/r/20251028191020.413002-1-isaacmanjarres@google.com
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Isaac J. Manjarres <isaacmanjarres(a)google.com>
Reviewed-by: Mike Rapoport (Microsoft) <rppt(a)kernel.org>
Reviewed-by: David Hildenbrand <david(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/mm_init.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/mm_init.c~mm-mm_init-fix-hash-table-order-logging-in-alloc_large_system_hash
+++ a/mm/mm_init.c
@@ -2469,7 +2469,7 @@ void *__init alloc_large_system_hash(con
panic("Failed to allocate %s hash table\n", tablename);
pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n",
- tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size,
+ tablename, 1UL << log2qty, get_order(size), size,
virt ? (huge ? "vmalloc hugepage" : "vmalloc") : "linear");
if (_hash_shift)
_
Patches currently in -mm which might be from isaacmanjarres(a)google.com are
The quilt patch titled
Subject: mm/truncate: unmap large folio on split failure
has been removed from the -mm tree. Its filename was
mm-truncate-unmap-large-folio-on-split-failure.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Kiryl Shutsemau <kas(a)kernel.org>
Subject: mm/truncate: unmap large folio on split failure
Date: Mon, 27 Oct 2025 11:56:36 +0000
Accesses within VMA, but beyond i_size rounded up to PAGE_SIZE are
supposed to generate SIGBUS.
This behavior might not be respected on truncation.
During truncation, the kernel splits a large folio in order to reclaim
memory. As a side effect, it unmaps the folio and destroys PMD mappings
of the folio. The folio will be refaulted as PTEs and SIGBUS semantics
are preserved.
However, if the split fails, PMD mappings are preserved and the user will
not receive SIGBUS on any accesses within the PMD.
Unmap the folio on split failure. It will lead to refault as PTEs and
preserve SIGBUS semantics.
Make an exception for shmem/tmpfs that for long time intentionally mapped
with PMDs across i_size.
Link: https://lkml.kernel.org/r/20251027115636.82382-3-kirill@shutemov.name
Fixes: b9a8a4195c7d ("truncate,shmem: Handle truncates that split large folios")
Signed-off-by: Kiryl Shutsemau <kas(a)kernel.org>
Cc: Al Viro <viro(a)zeniv.linux.org.uk>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: "Darrick J. Wong" <djwong(a)kernel.org>
Cc: Dave Chinner <david(a)fromorbit.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: Rik van Riel <riel(a)surriel.com>
Cc: Shakeel Butt <shakeel.butt(a)linux.dev>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/truncate.c | 35 +++++++++++++++++++++++++++++------
1 file changed, 29 insertions(+), 6 deletions(-)
--- a/mm/truncate.c~mm-truncate-unmap-large-folio-on-split-failure
+++ a/mm/truncate.c
@@ -177,6 +177,32 @@ int truncate_inode_folio(struct address_
return 0;
}
+static int try_folio_split_or_unmap(struct folio *folio, struct page *split_at,
+ unsigned long min_order)
+{
+ enum ttu_flags ttu_flags =
+ TTU_SYNC |
+ TTU_SPLIT_HUGE_PMD |
+ TTU_IGNORE_MLOCK;
+ int ret;
+
+ ret = try_folio_split_to_order(folio, split_at, min_order);
+
+ /*
+ * If the split fails, unmap the folio, so it will be refaulted
+ * with PTEs to respect SIGBUS semantics.
+ *
+ * Make an exception for shmem/tmpfs that for long time
+ * intentionally mapped with PMDs across i_size.
+ */
+ if (ret && !shmem_mapping(folio->mapping)) {
+ try_to_unmap(folio, ttu_flags);
+ WARN_ON(folio_mapped(folio));
+ }
+
+ return ret;
+}
+
/*
* Handle partial folios. The folio may be entirely within the
* range if a split has raced with us. If not, we zero the part of the
@@ -226,7 +252,7 @@ bool truncate_inode_partial_folio(struct
min_order = mapping_min_folio_order(folio->mapping);
split_at = folio_page(folio, PAGE_ALIGN_DOWN(offset) / PAGE_SIZE);
- if (!try_folio_split_to_order(folio, split_at, min_order)) {
+ if (!try_folio_split_or_unmap(folio, split_at, min_order)) {
/*
* try to split at offset + length to make sure folios within
* the range can be dropped, especially to avoid memory waste
@@ -250,13 +276,10 @@ bool truncate_inode_partial_folio(struct
if (!folio_trylock(folio2))
goto out;
- /*
- * make sure folio2 is large and does not change its mapping.
- * Its split result does not matter here.
- */
+ /* make sure folio2 is large and does not change its mapping */
if (folio_test_large(folio2) &&
folio2->mapping == folio->mapping)
- try_folio_split_to_order(folio2, split_at2, min_order);
+ try_folio_split_or_unmap(folio2, split_at2, min_order);
folio_unlock(folio2);
out:
_
Patches currently in -mm which might be from kas(a)kernel.org are
The quilt patch titled
Subject: mm/memory: do not populate page table entries beyond i_size
has been removed from the -mm tree. Its filename was
mm-memory-do-not-populate-page-table-entries-beyond-i_size.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Kiryl Shutsemau <kas(a)kernel.org>
Subject: mm/memory: do not populate page table entries beyond i_size
Date: Mon, 27 Oct 2025 11:56:35 +0000
Patch series "Fix SIGBUS semantics with large folios", v3.
Accessing memory within a VMA, but beyond i_size rounded up to the next
page size, is supposed to generate SIGBUS.
Darrick reported[1] an xfstests regression in v6.18-rc1. generic/749
failed due to missing SIGBUS. This was caused by my recent changes that
try to fault in the whole folio where possible:
19773df031bc ("mm/fault: try to map the entire file folio in finish_fault()")
357b92761d94 ("mm/filemap: map entire large folio faultaround")
These changes did not consider i_size when setting up PTEs, leading to
xfstest breakage.
However, the problem has been present in the kernel for a long time -
since huge tmpfs was introduced in 2016. The kernel happily maps
PMD-sized folios as PMD without checking i_size. And huge=always tmpfs
allocates PMD-size folios on any writes.
I considered this corner case when I implemented a large tmpfs, and my
conclusion was that no one in their right mind should rely on receiving a
SIGBUS signal when accessing beyond i_size. I cannot imagine how it could
be useful for the workload.
But apparently filesystem folks care a lot about preserving strict SIGBUS
semantics.
Generic/749 was introduced last year with reference to POSIX, but no real
workloads were mentioned. It also acknowledged the tmpfs deviation from
the test case.
POSIX indeed says[3]:
References within the address range starting at pa and
continuing for len bytes to whole pages following the end of an
object shall result in delivery of a SIGBUS signal.
The patchset fixes the regression introduced by recent changes as well as
more subtle SIGBUS breakage due to split failure on truncation.
This patch (of 2):
Accesses within VMA, but beyond i_size rounded up to PAGE_SIZE are
supposed to generate SIGBUS.
Recent changes attempted to fault in full folio where possible. They did
not respect i_size, which led to populating PTEs beyond i_size and
breaking SIGBUS semantics.
Darrick reported generic/749 breakage because of this.
However, the problem existed before the recent changes. With huge=always
tmpfs, any write to a file leads to PMD-size allocation. Following the
fault-in of the folio will install PMD mapping regardless of i_size.
Fix filemap_map_pages() and finish_fault() to not install:
- PTEs beyond i_size;
- PMD mappings across i_size;
Make an exception for shmem/tmpfs that for long time intentionally
mapped with PMDs across i_size.
Link: https://lkml.kernel.org/r/20251027115636.82382-1-kirill@shutemov.name
Link: https://lkml.kernel.org/r/20251027115636.82382-2-kirill@shutemov.name
Signed-off-by: Kiryl Shutsemau <kas(a)kernel.org>
Fixes: 6795801366da ("xfs: Support large folios")
Reported-by: "Darrick J. Wong" <djwong(a)kernel.org>
Cc: Al Viro <viro(a)zeniv.linux.org.uk>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: Dave Chinner <david(a)fromorbit.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: Rik van Riel <riel(a)surriel.com>
Cc: Shakeel Butt <shakeel.butt(a)linux.dev>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/filemap.c | 28 ++++++++++++++++++++--------
mm/memory.c | 20 +++++++++++++++++++-
2 files changed, 39 insertions(+), 9 deletions(-)
--- a/mm/filemap.c~mm-memory-do-not-populate-page-table-entries-beyond-i_size
+++ a/mm/filemap.c
@@ -3681,7 +3681,8 @@ skip:
static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
struct folio *folio, unsigned long start,
unsigned long addr, unsigned int nr_pages,
- unsigned long *rss, unsigned short *mmap_miss)
+ unsigned long *rss, unsigned short *mmap_miss,
+ bool can_map_large)
{
unsigned int ref_from_caller = 1;
vm_fault_t ret = 0;
@@ -3696,7 +3697,7 @@ static vm_fault_t filemap_map_folio_rang
* The folio must not cross VMA or page table boundary.
*/
addr0 = addr - start * PAGE_SIZE;
- if (folio_within_vma(folio, vmf->vma) &&
+ if (can_map_large && folio_within_vma(folio, vmf->vma) &&
(addr0 & PMD_MASK) == ((addr0 + folio_size(folio) - 1) & PMD_MASK)) {
vmf->pte -= start;
page -= start;
@@ -3811,13 +3812,27 @@ vm_fault_t filemap_map_pages(struct vm_f
unsigned long rss = 0;
unsigned int nr_pages = 0, folio_type;
unsigned short mmap_miss = 0, mmap_miss_saved;
+ bool can_map_large;
rcu_read_lock();
folio = next_uptodate_folio(&xas, mapping, end_pgoff);
if (!folio)
goto out;
- if (filemap_map_pmd(vmf, folio, start_pgoff)) {
+ file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1;
+ end_pgoff = min(end_pgoff, file_end);
+
+ /*
+ * Do not allow to map with PTEs beyond i_size and with PMD
+ * across i_size to preserve SIGBUS semantics.
+ *
+ * Make an exception for shmem/tmpfs that for long time
+ * intentionally mapped with PMDs across i_size.
+ */
+ can_map_large = shmem_mapping(mapping) ||
+ file_end >= folio_next_index(folio);
+
+ if (can_map_large && filemap_map_pmd(vmf, folio, start_pgoff)) {
ret = VM_FAULT_NOPAGE;
goto out;
}
@@ -3830,10 +3845,6 @@ vm_fault_t filemap_map_pages(struct vm_f
goto out;
}
- file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1;
- if (end_pgoff > file_end)
- end_pgoff = file_end;
-
folio_type = mm_counter_file(folio);
do {
unsigned long end;
@@ -3850,7 +3861,8 @@ vm_fault_t filemap_map_pages(struct vm_f
else
ret |= filemap_map_folio_range(vmf, folio,
xas.xa_index - folio->index, addr,
- nr_pages, &rss, &mmap_miss);
+ nr_pages, &rss, &mmap_miss,
+ can_map_large);
folio_unlock(folio);
} while ((folio = next_uptodate_folio(&xas, mapping, end_pgoff)) != NULL);
--- a/mm/memory.c~mm-memory-do-not-populate-page-table-entries-beyond-i_size
+++ a/mm/memory.c
@@ -65,6 +65,7 @@
#include <linux/gfp.h>
#include <linux/migrate.h>
#include <linux/string.h>
+#include <linux/shmem_fs.h>
#include <linux/memory-tiers.h>
#include <linux/debugfs.h>
#include <linux/userfaultfd_k.h>
@@ -5501,8 +5502,25 @@ fallback:
return ret;
}
+ if (!needs_fallback && vma->vm_file) {
+ struct address_space *mapping = vma->vm_file->f_mapping;
+ pgoff_t file_end;
+
+ file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
+
+ /*
+ * Do not allow to map with PTEs beyond i_size and with PMD
+ * across i_size to preserve SIGBUS semantics.
+ *
+ * Make an exception for shmem/tmpfs that for long time
+ * intentionally mapped with PMDs across i_size.
+ */
+ needs_fallback = !shmem_mapping(mapping) &&
+ file_end < folio_next_index(folio);
+ }
+
if (pmd_none(*vmf->pmd)) {
- if (folio_test_pmd_mappable(folio)) {
+ if (!needs_fallback && folio_test_pmd_mappable(folio)) {
ret = do_set_pmd(vmf, folio, page);
if (ret != VM_FAULT_FALLBACK)
return ret;
_
Patches currently in -mm which might be from kas(a)kernel.org are
The quilt patch titled
Subject: fs/proc: fix uaf in proc_readdir_de()
has been removed from the -mm tree. Its filename was
fs-proc-fix-uaf-in-proc_readdir_de.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Wei Yang <albinwyang(a)tencent.com>
Subject: fs/proc: fix uaf in proc_readdir_de()
Date: Sat, 25 Oct 2025 10:42:33 +0800
Pde is erased from subdir rbtree through rb_erase(), but not set the node
to EMPTY, which may result in uaf access. We should use RB_CLEAR_NODE()
set the erased node to EMPTY, then pde_subdir_next() will return NULL to
avoid uaf access.
We found an uaf issue while using stress-ng testing, need to run testcase
getdent and tun in the same time. The steps of the issue is as follows:
1) use getdent to traverse dir /proc/pid/net/dev_snmp6/, and current
pde is tun3;
2) in the [time windows] unregister netdevice tun3 and tun2, and erase
them from rbtree. erase tun3 first, and then erase tun2. the
pde(tun2) will be released to slab;
3) continue to getdent process, then pde_subdir_next() will return
pde(tun2) which is released, it will case uaf access.
CPU 0 | CPU 1
-------------------------------------------------------------------------
traverse dir /proc/pid/net/dev_snmp6/ | unregister_netdevice(tun->dev) //tun3 tun2
sys_getdents64() |
iterate_dir() |
proc_readdir() |
proc_readdir_de() | snmp6_unregister_dev()
pde_get(de); | proc_remove()
read_unlock(&proc_subdir_lock); | remove_proc_subtree()
| write_lock(&proc_subdir_lock);
[time window] | rb_erase(&root->subdir_node, &parent->subdir);
| write_unlock(&proc_subdir_lock);
read_lock(&proc_subdir_lock); |
next = pde_subdir_next(de); |
pde_put(de); |
de = next; //UAF |
rbtree of dev_snmp6
|
pde(tun3)
/ \
NULL pde(tun2)
Link: https://lkml.kernel.org/r/20251025024233.158363-1-albin_yang@163.com
Signed-off-by: Wei Yang <albinwyang(a)tencent.com>
Cc: Al Viro <viro(a)zeniv.linux.org.uk>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: wangzijie <wangzijie1(a)honor.com>
Cc: Alexey Dobriyan <adobriyan(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/proc/generic.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
--- a/fs/proc/generic.c~fs-proc-fix-uaf-in-proc_readdir_de
+++ a/fs/proc/generic.c
@@ -698,6 +698,12 @@ void pde_put(struct proc_dir_entry *pde)
}
}
+static void pde_erase(struct proc_dir_entry *pde, struct proc_dir_entry *parent)
+{
+ rb_erase(&pde->subdir_node, &parent->subdir);
+ RB_CLEAR_NODE(&pde->subdir_node);
+}
+
/*
* Remove a /proc entry and free it if it's not currently in use.
*/
@@ -720,7 +726,7 @@ void remove_proc_entry(const char *name,
WARN(1, "removing permanent /proc entry '%s'", de->name);
de = NULL;
} else {
- rb_erase(&de->subdir_node, &parent->subdir);
+ pde_erase(de, parent);
if (S_ISDIR(de->mode))
parent->nlink--;
}
@@ -764,7 +770,7 @@ int remove_proc_subtree(const char *name
root->parent->name, root->name);
return -EINVAL;
}
- rb_erase(&root->subdir_node, &parent->subdir);
+ pde_erase(root, parent);
de = root;
while (1) {
@@ -776,7 +782,7 @@ int remove_proc_subtree(const char *name
next->parent->name, next->name);
return -EINVAL;
}
- rb_erase(&next->subdir_node, &de->subdir);
+ pde_erase(next, de);
de = next;
continue;
}
_
Patches currently in -mm which might be from albinwyang(a)tencent.com are
The quilt patch titled
Subject: mm/huge_memory: preserve PG_has_hwpoisoned if a folio is split to >0 order
has been removed from the -mm tree. Its filename was
mm-huge_memory-preserve-pg_has_hwpoisoned-if-a-folio-is-split-to-0-order.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Zi Yan <ziy(a)nvidia.com>
Subject: mm/huge_memory: preserve PG_has_hwpoisoned if a folio is split to >0 order
Date: Wed, 22 Oct 2025 23:05:21 -0400
folio split clears PG_has_hwpoisoned, but the flag should be preserved in
after-split folios containing pages with PG_hwpoisoned flag if the folio
is split to >0 order folios. Scan all pages in a to-be-split folio to
determine which after-split folios need the flag.
An alternatives is to change PG_has_hwpoisoned to PG_maybe_hwpoisoned to
avoid the scan and set it on all after-split folios, but resulting false
positive has undesirable negative impact. To remove false positive,
caller of folio_test_has_hwpoisoned() and folio_contain_hwpoisoned_page()
needs to do the scan. That might be causing a hassle for current and
future callers and more costly than doing the scan in the split code.
More details are discussed in [1].
This issue can be exposed via:
1. splitting a has_hwpoisoned folio to >0 order from debugfs interface;
2. truncating part of a has_hwpoisoned folio in
truncate_inode_partial_folio().
And later accesses to a hwpoisoned page could be possible due to the
missing has_hwpoisoned folio flag. This will lead to MCE errors.
Link: https://lore.kernel.org/all/CAHbLzkoOZm0PXxE9qwtF4gKR=cpRXrSrJ9V9Pm2DJexs98… [1]
Link: https://lkml.kernel.org/r/20251023030521.473097-1-ziy@nvidia.com
Fixes: c010d47f107f ("mm: thp: split huge page to any lower order pages")
Signed-off-by: Zi Yan <ziy(a)nvidia.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Reviewed-by: Yang Shi <yang(a)os.amperecomputing.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Reviewed-by: Lance Yang <lance.yang(a)linux.dev>
Reviewed-by: Miaohe Lin <linmiaohe(a)huawei.com>
Reviewed-by: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Reviewed-by: Wei Yang <richard.weiyang(a)gmail.com>
Cc: Pankaj Raghav <kernel(a)pankajraghav.com>
Cc: Barry Song <baohua(a)kernel.org>
Cc: Dev Jain <dev.jain(a)arm.com>
Cc: Jane Chu <jane.chu(a)oracle.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Luis Chamberalin <mcgrof(a)kernel.org>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Naoya Horiguchi <nao.horiguchi(a)gmail.com>
Cc: Nico Pache <npache(a)redhat.com>
Cc: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/huge_memory.c | 23 ++++++++++++++++++++---
1 file changed, 20 insertions(+), 3 deletions(-)
--- a/mm/huge_memory.c~mm-huge_memory-preserve-pg_has_hwpoisoned-if-a-folio-is-split-to-0-order
+++ a/mm/huge_memory.c
@@ -3263,6 +3263,14 @@ bool can_split_folio(struct folio *folio
caller_pins;
}
+static bool page_range_has_hwpoisoned(struct page *page, long nr_pages)
+{
+ for (; nr_pages; page++, nr_pages--)
+ if (PageHWPoison(page))
+ return true;
+ return false;
+}
+
/*
* It splits @folio into @new_order folios and copies the @folio metadata to
* all the resulting folios.
@@ -3270,17 +3278,24 @@ bool can_split_folio(struct folio *folio
static void __split_folio_to_order(struct folio *folio, int old_order,
int new_order)
{
+ /* Scan poisoned pages when split a poisoned folio to large folios */
+ const bool handle_hwpoison = folio_test_has_hwpoisoned(folio) && new_order;
long new_nr_pages = 1 << new_order;
long nr_pages = 1 << old_order;
long i;
+ folio_clear_has_hwpoisoned(folio);
+
+ /* Check first new_nr_pages since the loop below skips them */
+ if (handle_hwpoison &&
+ page_range_has_hwpoisoned(folio_page(folio, 0), new_nr_pages))
+ folio_set_has_hwpoisoned(folio);
/*
* Skip the first new_nr_pages, since the new folio from them have all
* the flags from the original folio.
*/
for (i = new_nr_pages; i < nr_pages; i += new_nr_pages) {
struct page *new_head = &folio->page + i;
-
/*
* Careful: new_folio is not a "real" folio before we cleared PageTail.
* Don't pass it around before clear_compound_head().
@@ -3322,6 +3337,10 @@ static void __split_folio_to_order(struc
(1L << PG_dirty) |
LRU_GEN_MASK | LRU_REFS_MASK));
+ if (handle_hwpoison &&
+ page_range_has_hwpoisoned(new_head, new_nr_pages))
+ folio_set_has_hwpoisoned(new_folio);
+
new_folio->mapping = folio->mapping;
new_folio->index = folio->index + i;
@@ -3422,8 +3441,6 @@ static int __split_unmapped_folio(struct
if (folio_test_anon(folio))
mod_mthp_stat(order, MTHP_STAT_NR_ANON, -1);
- folio_clear_has_hwpoisoned(folio);
-
/*
* split to new_order one order at a time. For uniform split,
* folio is split to new_order directly.
_
Patches currently in -mm which might be from ziy(a)nvidia.com are
mm-huge_memory-fix-folio-split-check-for-anon-folios-in-swapcache.patch
mm-huge_memory-add-split_huge_page_to_order.patch
mm-memory-failure-improve-large-block-size-folio-handling.patch
mm-huge_memory-fix-kernel-doc-comments-for-folio_split-and-related.patch
mm-huge_memory-fix-kernel-doc-comments-for-folio_split-and-related-fix.patch
mm-huge_memory-fix-kernel-doc-comments-for-folio_split-and-related-fix-2.patch
migrate-optimise-alloc_migration_target-fix.patch
The quilt patch titled
Subject: mm/kmsan: fix kmsan kmalloc hook when no stack depots are allocated yet
has been removed from the -mm tree. Its filename was
mm-kmsan-fix-kmsan-kmalloc-hook-when-no-stack-depots-are-allocated-yet.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Aleksei Nikiforov <aleksei.nikiforov(a)linux.ibm.com>
Subject: mm/kmsan: fix kmsan kmalloc hook when no stack depots are allocated yet
Date: Tue, 30 Sep 2025 13:56:01 +0200
If no stack depot is allocated yet, due to masking out __GFP_RECLAIM flags
kmsan called from kmalloc cannot allocate stack depot. kmsan fails to
record origin and report issues. This may result in KMSAN failing to
report issues.
Reusing flags from kmalloc without modifying them should be safe for kmsan.
For example, such chain of calls is possible:
test_uninit_kmalloc -> kmalloc -> __kmalloc_cache_noprof ->
slab_alloc_node -> slab_post_alloc_hook ->
kmsan_slab_alloc -> kmsan_internal_poison_memory.
Only when it is called in a context without flags present should
__GFP_RECLAIM flags be masked.
With this change all kmsan tests start working reliably.
Eric reported:
: Yes, KMSAN seems to be at least partially broken currently. Besides the
: fact that the kmsan KUnit test is currently failing (which I reported at
: https://lore.kernel.org/r/20250911175145.GA1376@sol), I've confirmed that
: the poly1305 KUnit test causes a KMSAN warning with Aleksei's patch
: applied but does not cause a warning without it. The warning did get
: reached via syzbot somehow
: (https://lore.kernel.org/r/751b3d80293a6f599bb07770afcef24f623c7da0.17610263…),
: so KMSAN must still work in some cases. But it didn't work for me.
Link: https://lkml.kernel.org/r/20250930115600.709776-2-aleksei.nikiforov@linux.i…
Link: https://lkml.kernel.org/r/20251022030213.GA35717@sol
Fixes: 97769a53f117 ("mm, bpf: Introduce try_alloc_pages() for opportunistic page allocation")
Signed-off-by: Aleksei Nikiforov <aleksei.nikiforov(a)linux.ibm.com>
Reviewed-by: Alexander Potapenko <glider(a)google.com>
Tested-by: Eric Biggers <ebiggers(a)kernel.org>
Cc: Alexei Starovoitov <ast(a)kernel.org>
Cc: Dmitriy Vyukov <dvyukov(a)google.com>
Cc: Ilya Leoshkevich <iii(a)linux.ibm.com>
Cc: Marco Elver <elver(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/kmsan/core.c | 3 ---
mm/kmsan/hooks.c | 6 ++++--
mm/kmsan/shadow.c | 2 +-
3 files changed, 5 insertions(+), 6 deletions(-)
--- a/mm/kmsan/core.c~mm-kmsan-fix-kmsan-kmalloc-hook-when-no-stack-depots-are-allocated-yet
+++ a/mm/kmsan/core.c
@@ -72,9 +72,6 @@ depot_stack_handle_t kmsan_save_stack_wi
nr_entries = stack_trace_save(entries, KMSAN_STACK_DEPTH, 0);
- /* Don't sleep. */
- flags &= ~(__GFP_DIRECT_RECLAIM | __GFP_KSWAPD_RECLAIM);
-
handle = stack_depot_save(entries, nr_entries, flags);
return stack_depot_set_extra_bits(handle, extra);
}
--- a/mm/kmsan/hooks.c~mm-kmsan-fix-kmsan-kmalloc-hook-when-no-stack-depots-are-allocated-yet
+++ a/mm/kmsan/hooks.c
@@ -84,7 +84,8 @@ void kmsan_slab_free(struct kmem_cache *
if (s->ctor)
return;
kmsan_enter_runtime();
- kmsan_internal_poison_memory(object, s->object_size, GFP_KERNEL,
+ kmsan_internal_poison_memory(object, s->object_size,
+ GFP_KERNEL & ~(__GFP_RECLAIM),
KMSAN_POISON_CHECK | KMSAN_POISON_FREE);
kmsan_leave_runtime();
}
@@ -114,7 +115,8 @@ void kmsan_kfree_large(const void *ptr)
kmsan_enter_runtime();
page = virt_to_head_page((void *)ptr);
KMSAN_WARN_ON(ptr != page_address(page));
- kmsan_internal_poison_memory((void *)ptr, page_size(page), GFP_KERNEL,
+ kmsan_internal_poison_memory((void *)ptr, page_size(page),
+ GFP_KERNEL & ~(__GFP_RECLAIM),
KMSAN_POISON_CHECK | KMSAN_POISON_FREE);
kmsan_leave_runtime();
}
--- a/mm/kmsan/shadow.c~mm-kmsan-fix-kmsan-kmalloc-hook-when-no-stack-depots-are-allocated-yet
+++ a/mm/kmsan/shadow.c
@@ -208,7 +208,7 @@ void kmsan_free_page(struct page *page,
return;
kmsan_enter_runtime();
kmsan_internal_poison_memory(page_address(page), page_size(page),
- GFP_KERNEL,
+ GFP_KERNEL & ~(__GFP_RECLAIM),
KMSAN_POISON_CHECK | KMSAN_POISON_FREE);
kmsan_leave_runtime();
}
_
Patches currently in -mm which might be from aleksei.nikiforov(a)linux.ibm.com are
The quilt patch titled
Subject: mm/shmem: fix THP allocation and fallback loop
has been removed from the -mm tree. Its filename was
mm-shmem-fix-thp-allocation-and-fallback-loop.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Kairui Song <kasong(a)tencent.com>
Subject: mm/shmem: fix THP allocation and fallback loop
Date: Wed, 22 Oct 2025 18:57:19 +0800
The order check and fallback loop is updating the index value on every
loop. This will cause the index to be wrongly aligned by a larger value
while the loop shrinks the order.
This may result in inserting and returning a folio of the wrong index and
cause data corruption with some userspace workloads [1].
[kasong(a)tencent.com: introduce a temporary variable to improve code]
Link: https://lkml.kernel.org/r/20251023065913.36925-1-ryncsn@gmail.com
Link: https://lore.kernel.org/linux-mm/CAMgjq7DqgAmj25nDUwwu1U2cSGSn8n4-Hqpgotted… [1]
Link: https://lkml.kernel.org/r/20251022105719.18321-1-ryncsn@gmail.com
Link: https://lore.kernel.org/linux-mm/CAMgjq7DqgAmj25nDUwwu1U2cSGSn8n4-Hqpgotted… [1]
Fixes: e7a2ab7b3bb5 ("mm: shmem: add mTHP support for anonymous shmem")
Closes: https://lore.kernel.org/linux-mm/CAMgjq7DqgAmj25nDUwwu1U2cSGSn8n4-Hqpgotted…
Signed-off-by: Kairui Song <kasong(a)tencent.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Acked-by: Zi Yan <ziy(a)nvidia.com>
Reviewed-by: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Reviewed-by: Barry Song <baohua(a)kernel.org>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Dev Jain <dev.jain(a)arm.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Nico Pache <npache(a)redhat.com>
Cc: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/shmem.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
--- a/mm/shmem.c~mm-shmem-fix-thp-allocation-and-fallback-loop
+++ a/mm/shmem.c
@@ -1882,6 +1882,7 @@ static struct folio *shmem_alloc_and_add
struct shmem_inode_info *info = SHMEM_I(inode);
unsigned long suitable_orders = 0;
struct folio *folio = NULL;
+ pgoff_t aligned_index;
long pages;
int error, order;
@@ -1895,10 +1896,12 @@ static struct folio *shmem_alloc_and_add
order = highest_order(suitable_orders);
while (suitable_orders) {
pages = 1UL << order;
- index = round_down(index, pages);
- folio = shmem_alloc_folio(gfp, order, info, index);
- if (folio)
+ aligned_index = round_down(index, pages);
+ folio = shmem_alloc_folio(gfp, order, info, aligned_index);
+ if (folio) {
+ index = aligned_index;
goto allocated;
+ }
if (pages == HPAGE_PMD_NR)
count_vm_event(THP_FILE_FALLBACK);
_
Patches currently in -mm which might be from kasong(a)tencent.com are
mm-swap-do-not-perform-synchronous-discard-during-allocation.patch
mm-swap-rename-helper-for-setup-bad-slots.patch
mm-swap-cleanup-swap-entry-allocation-parameter.patch
mm-migrate-swap-drop-usage-of-folio_index.patch
mm-swap-remove-redundant-argument-for-isolating-a-cluster.patch
revert-mm-swap-avoid-redundant-swap-device-pinning.patch
The quilt patch titled
Subject: kho: allocate metadata directly from the buddy allocator
has been removed from the -mm tree. Its filename was
liveupdate-kho-allocate-metadata-directly-from-the-buddy-allocator.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Pasha Tatashin <pasha.tatashin(a)soleen.com>
Subject: kho: allocate metadata directly from the buddy allocator
Date: Mon, 20 Oct 2025 20:08:52 -0400
KHO allocates metadata for its preserved memory map using the slab
allocator via kzalloc(). This metadata is temporary and is used by the
next kernel during early boot to find preserved memory.
A problem arises when KFENCE is enabled. kzalloc() calls can be randomly
intercepted by kfence_alloc(), which services the allocation from a
dedicated KFENCE memory pool. This pool is allocated early in boot via
memblock.
When booting via KHO, the memblock allocator is restricted to a "scratch
area", forcing the KFENCE pool to be allocated within it. This creates a
conflict, as the scratch area is expected to be ephemeral and
overwriteable by a subsequent kexec. If KHO metadata is placed in this
KFENCE pool, it leads to memory corruption when the next kernel is loaded.
To fix this, modify KHO to allocate its metadata directly from the buddy
allocator instead of slab.
Link: https://lkml.kernel.org/r/20251021000852.2924827-4-pasha.tatashin@soleen.com
Fixes: fc33e4b44b27 ("kexec: enable KHO support for memory preservation")
Signed-off-by: Pasha Tatashin <pasha.tatashin(a)soleen.com>
Reviewed-by: Pratyush Yadav <pratyush(a)kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt(a)kernel.org>
Reviewed-by: David Matlack <dmatlack(a)google.com>
Cc: Alexander Graf <graf(a)amazon.com>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: Jason Gunthorpe <jgg(a)ziepe.ca>
Cc: Jonathan Corbet <corbet(a)lwn.net>
Cc: Masahiro Yamada <masahiroy(a)kernel.org>
Cc: Miguel Ojeda <ojeda(a)kernel.org>
Cc: Randy Dunlap <rdunlap(a)infradead.org>
Cc: Samiullah Khawaja <skhawaja(a)google.com>
Cc: Tejun Heo <tj(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/gfp.h | 3 +++
kernel/kexec_handover.c | 6 +++---
2 files changed, 6 insertions(+), 3 deletions(-)
--- a/include/linux/gfp.h~liveupdate-kho-allocate-metadata-directly-from-the-buddy-allocator
+++ a/include/linux/gfp.h
@@ -7,6 +7,7 @@
#include <linux/mmzone.h>
#include <linux/topology.h>
#include <linux/alloc_tag.h>
+#include <linux/cleanup.h>
#include <linux/sched.h>
struct vm_area_struct;
@@ -463,4 +464,6 @@ static inline struct folio *folio_alloc_
/* This should be paired with folio_put() rather than free_contig_range(). */
#define folio_alloc_gigantic(...) alloc_hooks(folio_alloc_gigantic_noprof(__VA_ARGS__))
+DEFINE_FREE(free_page, void *, free_page((unsigned long)_T))
+
#endif /* __LINUX_GFP_H */
--- a/kernel/kexec_handover.c~liveupdate-kho-allocate-metadata-directly-from-the-buddy-allocator
+++ a/kernel/kexec_handover.c
@@ -142,7 +142,7 @@ static void *xa_load_or_alloc(struct xar
if (res)
return res;
- void *elm __free(kfree) = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ void *elm __free(free_page) = (void *)get_zeroed_page(GFP_KERNEL);
if (!elm)
return ERR_PTR(-ENOMEM);
@@ -348,9 +348,9 @@ static_assert(sizeof(struct khoser_mem_c
static struct khoser_mem_chunk *new_chunk(struct khoser_mem_chunk *cur_chunk,
unsigned long order)
{
- struct khoser_mem_chunk *chunk __free(kfree) = NULL;
+ struct khoser_mem_chunk *chunk __free(free_page) = NULL;
- chunk = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ chunk = (void *)get_zeroed_page(GFP_KERNEL);
if (!chunk)
return ERR_PTR(-ENOMEM);
_
Patches currently in -mm which might be from pasha.tatashin(a)soleen.com are
lib-test_kho-check-if-kho-is-enabled.patch
kho-make-debugfs-interface-optional.patch
kho-add-interfaces-to-unpreserve-folios-page-ranges-and-vmalloc.patch
memblock-unpreserve-memory-in-case-of-error.patch
test_kho-unpreserve-memory-in-case-of-error.patch
kho-dont-unpreserve-memory-during-abort.patch
liveupdate-kho-move-to-kernel-liveupdate.patch
liveupdate-kho-move-to-kernel-liveupdate-fix.patch
maintainers-update-kho-maintainers.patch
liveupdate-luo_core-luo_ioctl-live-update-orchestrator.patch
liveupdate-luo_core-integrate-with-kho.patch
reboot-call-liveupdate_reboot-before-kexec.patch
liveupdate-kconfig-make-debugfs-optional.patch
liveupdate-kho-when-live-update-add-kho-image-during-kexec-load.patch
liveupdate-luo_session-add-sessions-support.patch
liveupdate-luo_ioctl-add-user-interface.patch
liveupdate-luo_file-implement-file-systems-callbacks.patch
liveupdate-luo_session-add-ioctls-for-file-preservation-and-state-management.patch
liveupdate-luo_flb-introduce-file-lifecycle-bound-global-state.patch
docs-add-luo-documentation.patch
maintainers-add-liveupdate-entry.patch
selftests-liveupdate-add-userspace-api-selftests.patch
selftests-liveupdate-add-kexec-based-selftest-for-session-lifecycle.patch
selftests-liveupdate-add-kexec-test-for-multiple-and-empty-sessions.patch
tests-liveupdate-add-in-kernel-liveupdate-test.patch
The quilt patch titled
Subject: kho: increase metadata bitmap size to PAGE_SIZE
has been removed from the -mm tree. Its filename was
liveupdate-kho-increase-metadata-bitmap-size-to-page_size.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Pasha Tatashin <pasha.tatashin(a)soleen.com>
Subject: kho: increase metadata bitmap size to PAGE_SIZE
Date: Mon, 20 Oct 2025 20:08:51 -0400
KHO memory preservation metadata is preserved in 512 byte chunks which
requires their allocation from slab allocator. Slabs are not safe to be
used with KHO because of kfence, and because partial slabs may lead leaks
to the next kernel. Change the size to be PAGE_SIZE.
The kfence specifically may cause memory corruption, where it randomly
provides slab objects that can be within the scratch area. The reason for
that is that kfence allocates its objects prior to KHO scratch is marked
as CMA region.
While this change could potentially increase metadata overhead on systems
with sparsely preserved memory, this is being mitigated by ongoing work to
reduce sparseness during preservation via 1G guest pages. Furthermore,
this change aligns with future work on a stateless KHO, which will also
use page-sized bitmaps for its radix tree metadata.
Link: https://lkml.kernel.org/r/20251021000852.2924827-3-pasha.tatashin@soleen.com
Fixes: fc33e4b44b27 ("kexec: enable KHO support for memory preservation")
Signed-off-by: Pasha Tatashin <pasha.tatashin(a)soleen.com>
Reviewed-by: Mike Rapoport (Microsoft) <rppt(a)kernel.org>
Reviewed-by: Pratyush Yadav <pratyush(a)kernel.org>
Cc: Alexander Graf <graf(a)amazon.com>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: David Matlack <dmatlack(a)google.com>
Cc: Jason Gunthorpe <jgg(a)ziepe.ca>
Cc: Jonathan Corbet <corbet(a)lwn.net>
Cc: Masahiro Yamada <masahiroy(a)kernel.org>
Cc: Miguel Ojeda <ojeda(a)kernel.org>
Cc: Randy Dunlap <rdunlap(a)infradead.org>
Cc: Samiullah Khawaja <skhawaja(a)google.com>
Cc: Tejun Heo <tj(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/kexec_handover.c | 21 +++++++++++----------
1 file changed, 11 insertions(+), 10 deletions(-)
--- a/kernel/kexec_handover.c~liveupdate-kho-increase-metadata-bitmap-size-to-page_size
+++ a/kernel/kexec_handover.c
@@ -69,10 +69,10 @@ early_param("kho", kho_parse_enable);
* Keep track of memory that is to be preserved across KHO.
*
* The serializing side uses two levels of xarrays to manage chunks of per-order
- * 512 byte bitmaps. For instance if PAGE_SIZE = 4096, the entire 1G order of a
- * 1TB system would fit inside a single 512 byte bitmap. For order 0 allocations
- * each bitmap will cover 16M of address space. Thus, for 16G of memory at most
- * 512K of bitmap memory will be needed for order 0.
+ * PAGE_SIZE byte bitmaps. For instance if PAGE_SIZE = 4096, the entire 1G order
+ * of a 8TB system would fit inside a single 4096 byte bitmap. For order 0
+ * allocations each bitmap will cover 128M of address space. Thus, for 16G of
+ * memory at most 512K of bitmap memory will be needed for order 0.
*
* This approach is fully incremental, as the serialization progresses folios
* can continue be aggregated to the tracker. The final step, immediately prior
@@ -80,12 +80,14 @@ early_param("kho", kho_parse_enable);
* successor kernel to parse.
*/
-#define PRESERVE_BITS (512 * 8)
+#define PRESERVE_BITS (PAGE_SIZE * 8)
struct kho_mem_phys_bits {
DECLARE_BITMAP(preserve, PRESERVE_BITS);
};
+static_assert(sizeof(struct kho_mem_phys_bits) == PAGE_SIZE);
+
struct kho_mem_phys {
/*
* Points to kho_mem_phys_bits, a sparse bitmap array. Each bit is sized
@@ -133,19 +135,19 @@ static struct kho_out kho_out = {
.finalized = false,
};
-static void *xa_load_or_alloc(struct xarray *xa, unsigned long index, size_t sz)
+static void *xa_load_or_alloc(struct xarray *xa, unsigned long index)
{
void *res = xa_load(xa, index);
if (res)
return res;
- void *elm __free(kfree) = kzalloc(sz, GFP_KERNEL);
+ void *elm __free(kfree) = kzalloc(PAGE_SIZE, GFP_KERNEL);
if (!elm)
return ERR_PTR(-ENOMEM);
- if (WARN_ON(kho_scratch_overlap(virt_to_phys(elm), sz)))
+ if (WARN_ON(kho_scratch_overlap(virt_to_phys(elm), PAGE_SIZE)))
return ERR_PTR(-EINVAL);
res = xa_cmpxchg(xa, index, NULL, elm, GFP_KERNEL);
@@ -218,8 +220,7 @@ static int __kho_preserve_order(struct k
}
}
- bits = xa_load_or_alloc(&physxa->phys_bits, pfn_high / PRESERVE_BITS,
- sizeof(*bits));
+ bits = xa_load_or_alloc(&physxa->phys_bits, pfn_high / PRESERVE_BITS);
if (IS_ERR(bits))
return PTR_ERR(bits);
_
Patches currently in -mm which might be from pasha.tatashin(a)soleen.com are
lib-test_kho-check-if-kho-is-enabled.patch
kho-make-debugfs-interface-optional.patch
kho-add-interfaces-to-unpreserve-folios-page-ranges-and-vmalloc.patch
memblock-unpreserve-memory-in-case-of-error.patch
test_kho-unpreserve-memory-in-case-of-error.patch
kho-dont-unpreserve-memory-during-abort.patch
liveupdate-kho-move-to-kernel-liveupdate.patch
liveupdate-kho-move-to-kernel-liveupdate-fix.patch
maintainers-update-kho-maintainers.patch
liveupdate-luo_core-luo_ioctl-live-update-orchestrator.patch
liveupdate-luo_core-integrate-with-kho.patch
reboot-call-liveupdate_reboot-before-kexec.patch
liveupdate-kconfig-make-debugfs-optional.patch
liveupdate-kho-when-live-update-add-kho-image-during-kexec-load.patch
liveupdate-luo_session-add-sessions-support.patch
liveupdate-luo_ioctl-add-user-interface.patch
liveupdate-luo_file-implement-file-systems-callbacks.patch
liveupdate-luo_session-add-ioctls-for-file-preservation-and-state-management.patch
liveupdate-luo_flb-introduce-file-lifecycle-bound-global-state.patch
docs-add-luo-documentation.patch
maintainers-add-liveupdate-entry.patch
selftests-liveupdate-add-userspace-api-selftests.patch
selftests-liveupdate-add-kexec-based-selftest-for-session-lifecycle.patch
selftests-liveupdate-add-kexec-test-for-multiple-and-empty-sessions.patch
tests-liveupdate-add-in-kernel-liveupdate-test.patch
The quilt patch titled
Subject: kho: warn and fail on metadata or preserved memory in scratch area
has been removed from the -mm tree. Its filename was
liveupdate-kho-warn-and-fail-on-metadata-or-preserved-memory-in-scratch-area.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Pasha Tatashin <pasha.tatashin(a)soleen.com>
Subject: kho: warn and fail on metadata or preserved memory in scratch area
Date: Mon, 20 Oct 2025 20:08:50 -0400
Patch series "KHO: kfence + KHO memory corruption fix", v3.
This series fixes a memory corruption bug in KHO that occurs when KFENCE
is enabled.
The root cause is that KHO metadata, allocated via kzalloc(), can be
randomly serviced by kfence_alloc(). When a kernel boots via KHO, the
early memblock allocator is restricted to a "scratch area". This forces
the KFENCE pool to be allocated within this scratch area, creating a
conflict. If KHO metadata is subsequently placed in this pool, it gets
corrupted during the next kexec operation.
Google is using KHO and have had obscure crashes due to this memory
corruption, with stacks all over the place. I would prefer this fix to be
properly backported to stable so we can also automatically consume it once
we switch to the upstream KHO.
Patch 1/3 introduces a debug-only feature (CONFIG_KEXEC_HANDOVER_DEBUG)
that adds checks to detect and fail any operation that attempts to place
KHO metadata or preserved memory within the scratch area. This serves as
a validation and diagnostic tool to confirm the problem without affecting
production builds.
Patch 2/3 Increases bitmap to PAGE_SIZE, so buddy allocator can be used.
Patch 3/3 Provides the fix by modifying KHO to allocate its metadata
directly from the buddy allocator instead of slab. This bypasses the
KFENCE interception entirely.
This patch (of 3):
It is invalid for KHO metadata or preserved memory regions to be located
within the KHO scratch area, as this area is overwritten when the next
kernel is loaded, and used early in boot by the next kernel. This can
lead to memory corruption.
Add checks to kho_preserve_* and KHO's internal metadata allocators
(xa_load_or_alloc, new_chunk) to verify that the physical address of the
memory does not overlap with any defined scratch region. If an overlap is
detected, the operation will fail and a WARN_ON is triggered. To avoid
performance overhead in production kernels, these checks are enabled only
when CONFIG_KEXEC_HANDOVER_DEBUG is selected.
[rppt(a)kernel.org: fix KEXEC_HANDOVER_DEBUG Kconfig dependency]
Link: https://lkml.kernel.org/r/aQHUyyFtiNZhx8jo@kernel.org
[pasha.tatashin(a)soleen.com: build fix]
Link: https://lkml.kernel.org/r/CA+CK2bBnorfsTymKtv4rKvqGBHs=y=MjEMMRg_tE-RME6n-z…
Link: https://lkml.kernel.org/r/20251021000852.2924827-1-pasha.tatashin@soleen.com
Link: https://lkml.kernel.org/r/20251021000852.2924827-2-pasha.tatashin@soleen.com
Fixes: fc33e4b44b27 ("kexec: enable KHO support for memory preservation")
Signed-off-by: Pasha Tatashin <pasha.tatashin(a)soleen.com>
Signed-off-by: Mike Rapoport <rppt(a)kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt(a)kernel.org>
Reviewed-by: Pratyush Yadav <pratyush(a)kernel.org>
Cc: Alexander Graf <graf(a)amazon.com>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: David Matlack <dmatlack(a)google.com>
Cc: Jason Gunthorpe <jgg(a)ziepe.ca>
Cc: Jonathan Corbet <corbet(a)lwn.net>
Cc: Masahiro Yamada <masahiroy(a)kernel.org>
Cc: Miguel Ojeda <ojeda(a)kernel.org>
Cc: Randy Dunlap <rdunlap(a)infradead.org>
Cc: Samiullah Khawaja <skhawaja(a)google.com>
Cc: Tejun Heo <tj(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/Kconfig.kexec | 9 ++++
kernel/Makefile | 1
kernel/kexec_handover.c | 57 +++++++++++++++++++----------
kernel/kexec_handover_debug.c | 25 ++++++++++++
kernel/kexec_handover_internal.h | 20 ++++++++++
5 files changed, 93 insertions(+), 19 deletions(-)
--- a/kernel/Kconfig.kexec~liveupdate-kho-warn-and-fail-on-metadata-or-preserved-memory-in-scratch-area
+++ a/kernel/Kconfig.kexec
@@ -109,6 +109,15 @@ config KEXEC_HANDOVER
to keep data or state alive across the kexec. For this to work,
both source and target kernels need to have this option enabled.
+config KEXEC_HANDOVER_DEBUG
+ bool "Enable Kexec Handover debug checks"
+ depends on KEXEC_HANDOVER
+ help
+ This option enables extra sanity checks for the Kexec Handover
+ subsystem. Since, KHO performance is crucial in live update
+ scenarios and the extra code might be adding overhead it is
+ only optionally enabled.
+
config CRASH_DUMP
bool "kernel crash dumps"
default ARCH_DEFAULT_CRASH_DUMP
--- a/kernel/kexec_handover.c~liveupdate-kho-warn-and-fail-on-metadata-or-preserved-memory-in-scratch-area
+++ a/kernel/kexec_handover.c
@@ -8,6 +8,7 @@
#define pr_fmt(fmt) "KHO: " fmt
+#include <linux/cleanup.h>
#include <linux/cma.h>
#include <linux/count_zeros.h>
#include <linux/debugfs.h>
@@ -22,6 +23,7 @@
#include <asm/early_ioremap.h>
+#include "kexec_handover_internal.h"
/*
* KHO is tightly coupled with mm init and needs access to some of mm
* internal APIs.
@@ -133,26 +135,26 @@ static struct kho_out kho_out = {
static void *xa_load_or_alloc(struct xarray *xa, unsigned long index, size_t sz)
{
- void *elm, *res;
+ void *res = xa_load(xa, index);
- elm = xa_load(xa, index);
- if (elm)
- return elm;
+ if (res)
+ return res;
+
+ void *elm __free(kfree) = kzalloc(sz, GFP_KERNEL);
- elm = kzalloc(sz, GFP_KERNEL);
if (!elm)
return ERR_PTR(-ENOMEM);
+ if (WARN_ON(kho_scratch_overlap(virt_to_phys(elm), sz)))
+ return ERR_PTR(-EINVAL);
+
res = xa_cmpxchg(xa, index, NULL, elm, GFP_KERNEL);
if (xa_is_err(res))
- res = ERR_PTR(xa_err(res));
-
- if (res) {
- kfree(elm);
+ return ERR_PTR(xa_err(res));
+ else if (res)
return res;
- }
- return elm;
+ return no_free_ptr(elm);
}
static void __kho_unpreserve(struct kho_mem_track *track, unsigned long pfn,
@@ -345,15 +347,19 @@ static_assert(sizeof(struct khoser_mem_c
static struct khoser_mem_chunk *new_chunk(struct khoser_mem_chunk *cur_chunk,
unsigned long order)
{
- struct khoser_mem_chunk *chunk;
+ struct khoser_mem_chunk *chunk __free(kfree) = NULL;
chunk = kzalloc(PAGE_SIZE, GFP_KERNEL);
if (!chunk)
- return NULL;
+ return ERR_PTR(-ENOMEM);
+
+ if (WARN_ON(kho_scratch_overlap(virt_to_phys(chunk), PAGE_SIZE)))
+ return ERR_PTR(-EINVAL);
+
chunk->hdr.order = order;
if (cur_chunk)
KHOSER_STORE_PTR(cur_chunk->hdr.next, chunk);
- return chunk;
+ return no_free_ptr(chunk);
}
static void kho_mem_ser_free(struct khoser_mem_chunk *first_chunk)
@@ -374,14 +380,17 @@ static int kho_mem_serialize(struct kho_
struct khoser_mem_chunk *chunk = NULL;
struct kho_mem_phys *physxa;
unsigned long order;
+ int err = -ENOMEM;
xa_for_each(&ser->track.orders, order, physxa) {
struct kho_mem_phys_bits *bits;
unsigned long phys;
chunk = new_chunk(chunk, order);
- if (!chunk)
+ if (IS_ERR(chunk)) {
+ err = PTR_ERR(chunk);
goto err_free;
+ }
if (!first_chunk)
first_chunk = chunk;
@@ -391,8 +400,10 @@ static int kho_mem_serialize(struct kho_
if (chunk->hdr.num_elms == ARRAY_SIZE(chunk->bitmaps)) {
chunk = new_chunk(chunk, order);
- if (!chunk)
+ if (IS_ERR(chunk)) {
+ err = PTR_ERR(chunk);
goto err_free;
+ }
}
elm = &chunk->bitmaps[chunk->hdr.num_elms];
@@ -409,7 +420,7 @@ static int kho_mem_serialize(struct kho_
err_free:
kho_mem_ser_free(first_chunk);
- return -ENOMEM;
+ return err;
}
static void __init deserialize_bitmap(unsigned int order,
@@ -465,8 +476,8 @@ static void __init kho_mem_deserialize(c
* area for early allocations that happen before page allocator is
* initialized.
*/
-static struct kho_scratch *kho_scratch;
-static unsigned int kho_scratch_cnt;
+struct kho_scratch *kho_scratch;
+unsigned int kho_scratch_cnt;
/*
* The scratch areas are scaled by default as percent of memory allocated from
@@ -752,6 +763,9 @@ int kho_preserve_folio(struct folio *fol
const unsigned int order = folio_order(folio);
struct kho_mem_track *track = &kho_out.ser.track;
+ if (WARN_ON(kho_scratch_overlap(pfn << PAGE_SHIFT, PAGE_SIZE << order)))
+ return -EINVAL;
+
return __kho_preserve_order(track, pfn, order);
}
EXPORT_SYMBOL_GPL(kho_preserve_folio);
@@ -775,6 +789,11 @@ int kho_preserve_pages(struct page *page
unsigned long failed_pfn = 0;
int err = 0;
+ if (WARN_ON(kho_scratch_overlap(start_pfn << PAGE_SHIFT,
+ nr_pages << PAGE_SHIFT))) {
+ return -EINVAL;
+ }
+
while (pfn < end_pfn) {
const unsigned int order =
min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn));
diff --git a/kernel/kexec_handover_debug.c a/kernel/kexec_handover_debug.c
new file mode 100644
--- /dev/null
+++ a/kernel/kexec_handover_debug.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * kexec_handover_debug.c - kexec handover optional debug functionality
+ * Copyright (C) 2025 Google LLC, Pasha Tatashin <pasha.tatashin(a)soleen.com>
+ */
+
+#define pr_fmt(fmt) "KHO: " fmt
+
+#include "kexec_handover_internal.h"
+
+bool kho_scratch_overlap(phys_addr_t phys, size_t size)
+{
+ phys_addr_t scratch_start, scratch_end;
+ unsigned int i;
+
+ for (i = 0; i < kho_scratch_cnt; i++) {
+ scratch_start = kho_scratch[i].addr;
+ scratch_end = kho_scratch[i].addr + kho_scratch[i].size;
+
+ if (phys < scratch_end && (phys + size) > scratch_start)
+ return true;
+ }
+
+ return false;
+}
diff --git a/kernel/kexec_handover_internal.h a/kernel/kexec_handover_internal.h
new file mode 100644
--- /dev/null
+++ a/kernel/kexec_handover_internal.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LINUX_KEXEC_HANDOVER_INTERNAL_H
+#define LINUX_KEXEC_HANDOVER_INTERNAL_H
+
+#include <linux/kexec_handover.h>
+#include <linux/types.h>
+
+extern struct kho_scratch *kho_scratch;
+extern unsigned int kho_scratch_cnt;
+
+#ifdef CONFIG_KEXEC_HANDOVER_DEBUG
+bool kho_scratch_overlap(phys_addr_t phys, size_t size);
+#else
+static inline bool kho_scratch_overlap(phys_addr_t phys, size_t size)
+{
+ return false;
+}
+#endif /* CONFIG_KEXEC_HANDOVER_DEBUG */
+
+#endif /* LINUX_KEXEC_HANDOVER_INTERNAL_H */
--- a/kernel/Makefile~liveupdate-kho-warn-and-fail-on-metadata-or-preserved-memory-in-scratch-area
+++ a/kernel/Makefile
@@ -83,6 +83,7 @@ obj-$(CONFIG_KEXEC) += kexec.o
obj-$(CONFIG_KEXEC_FILE) += kexec_file.o
obj-$(CONFIG_KEXEC_ELF) += kexec_elf.o
obj-$(CONFIG_KEXEC_HANDOVER) += kexec_handover.o
+obj-$(CONFIG_KEXEC_HANDOVER_DEBUG) += kexec_handover_debug.o
obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
obj-$(CONFIG_COMPAT) += compat.o
obj-$(CONFIG_CGROUPS) += cgroup/
_
Patches currently in -mm which might be from pasha.tatashin(a)soleen.com are
lib-test_kho-check-if-kho-is-enabled.patch
kho-make-debugfs-interface-optional.patch
kho-add-interfaces-to-unpreserve-folios-page-ranges-and-vmalloc.patch
memblock-unpreserve-memory-in-case-of-error.patch
test_kho-unpreserve-memory-in-case-of-error.patch
kho-dont-unpreserve-memory-during-abort.patch
liveupdate-kho-move-to-kernel-liveupdate.patch
liveupdate-kho-move-to-kernel-liveupdate-fix.patch
maintainers-update-kho-maintainers.patch
liveupdate-luo_core-luo_ioctl-live-update-orchestrator.patch
liveupdate-luo_core-integrate-with-kho.patch
reboot-call-liveupdate_reboot-before-kexec.patch
liveupdate-kconfig-make-debugfs-optional.patch
liveupdate-kho-when-live-update-add-kho-image-during-kexec-load.patch
liveupdate-luo_session-add-sessions-support.patch
liveupdate-luo_ioctl-add-user-interface.patch
liveupdate-luo_file-implement-file-systems-callbacks.patch
liveupdate-luo_session-add-ioctls-for-file-preservation-and-state-management.patch
liveupdate-luo_flb-introduce-file-lifecycle-bound-global-state.patch
docs-add-luo-documentation.patch
maintainers-add-liveupdate-entry.patch
selftests-liveupdate-add-userspace-api-selftests.patch
selftests-liveupdate-add-kexec-based-selftest-for-session-lifecycle.patch
selftests-liveupdate-add-kexec-test-for-multiple-and-empty-sessions.patch
tests-liveupdate-add-in-kernel-liveupdate-test.patch