The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 8a0e4bdddd1c998b894d879a1d22f1e745606215
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122926-sizably-bountiful-a98b@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8a0e4bdddd1c998b894d879a1d22f1e745606215 Mon Sep 17 00:00:00 2001
From: Wei Yang <richard.weiyang(a)gmail.com>
Date: Thu, 6 Nov 2025 03:41:55 +0000
Subject: [PATCH] mm/huge_memory: merge uniform_split_supported() and
non_uniform_split_supported()
uniform_split_supported() and non_uniform_split_supported() share
significantly similar logic.
The only functional difference is that uniform_split_supported() includes
an additional check on the requested @new_order.
The reason for this check comes from the following two aspects:
* some file system or swap cache just supports order-0 folio
* the behavioral difference between uniform/non-uniform split
The behavioral difference between uniform split and non-uniform:
* uniform split splits folio directly to @new_order
* non-uniform split creates after-split folios with orders from
folio_order(folio) - 1 to new_order.
This means for non-uniform split or !new_order split we should check the
file system and swap cache respectively.
This commit unifies the logic and merge the two functions into a single
combined helper, removing redundant code and simplifying the split
support checking mechanism.
Link: https://lkml.kernel.org/r/20251106034155.21398-3-richard.weiyang@gmail.com
Fixes: c010d47f107f ("mm: thp: split huge page to any lower order pages")
Signed-off-by: Wei Yang <richard.weiyang(a)gmail.com>
Reviewed-by: Zi Yan <ziy(a)nvidia.com>
Cc: Zi Yan <ziy(a)nvidia.com>
Cc: "David Hildenbrand (Red Hat)" <david(a)kernel.org>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Barry Song <baohua(a)kernel.org>
Cc: Dev Jain <dev.jain(a)arm.com>
Cc: Lance Yang <lance.yang(a)linux.dev>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Nico Pache <npache(a)redhat.com>
Cc: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index b74708dc5b5f..19d4a5f52ca2 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -374,10 +374,8 @@ int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list
unsigned int new_order, bool unmapped);
int min_order_for_split(struct folio *folio);
int split_folio_to_list(struct folio *folio, struct list_head *list);
-bool uniform_split_supported(struct folio *folio, unsigned int new_order,
- bool warns);
-bool non_uniform_split_supported(struct folio *folio, unsigned int new_order,
- bool warns);
+bool folio_split_supported(struct folio *folio, unsigned int new_order,
+ enum split_type split_type, bool warns);
int folio_split(struct folio *folio, unsigned int new_order, struct page *page,
struct list_head *list);
@@ -408,7 +406,7 @@ static inline int split_huge_page_to_order(struct page *page, unsigned int new_o
static inline int try_folio_split_to_order(struct folio *folio,
struct page *page, unsigned int new_order)
{
- if (!non_uniform_split_supported(folio, new_order, /* warns= */ false))
+ if (!folio_split_supported(folio, new_order, SPLIT_TYPE_NON_UNIFORM, /* warns= */ false))
return split_huge_page_to_order(&folio->page, new_order);
return folio_split(folio, new_order, page, NULL);
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 4118f330c55e..d79a4bb363de 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3593,8 +3593,8 @@ static int __split_unmapped_folio(struct folio *folio, int new_order,
return 0;
}
-bool non_uniform_split_supported(struct folio *folio, unsigned int new_order,
- bool warns)
+bool folio_split_supported(struct folio *folio, unsigned int new_order,
+ enum split_type split_type, bool warns)
{
if (folio_test_anon(folio)) {
/* order-1 is not supported for anonymous THP. */
@@ -3602,48 +3602,41 @@ bool non_uniform_split_supported(struct folio *folio, unsigned int new_order,
"Cannot split to order-1 folio");
if (new_order == 1)
return false;
- } else if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
- !mapping_large_folio_support(folio->mapping)) {
- /*
- * No split if the file system does not support large folio.
- * Note that we might still have THPs in such mappings due to
- * CONFIG_READ_ONLY_THP_FOR_FS. But in that case, the mapping
- * does not actually support large folios properly.
- */
- VM_WARN_ONCE(warns,
- "Cannot split file folio to non-0 order");
- return false;
- }
-
- /* Only swapping a whole PMD-mapped folio is supported */
- if (folio_test_swapcache(folio)) {
- VM_WARN_ONCE(warns,
- "Cannot split swapcache folio to non-0 order");
- return false;
- }
-
- return true;
-}
-
-/* See comments in non_uniform_split_supported() */
-bool uniform_split_supported(struct folio *folio, unsigned int new_order,
- bool warns)
-{
- if (folio_test_anon(folio)) {
- VM_WARN_ONCE(warns && new_order == 1,
- "Cannot split to order-1 folio");
- if (new_order == 1)
- return false;
- } else if (new_order) {
+ } else if (split_type == SPLIT_TYPE_NON_UNIFORM || new_order) {
if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
!mapping_large_folio_support(folio->mapping)) {
+ /*
+ * We can always split a folio down to a single page
+ * (new_order == 0) uniformly.
+ *
+ * For any other scenario
+ * a) uniform split targeting a large folio
+ * (new_order > 0)
+ * b) any non-uniform split
+ * we must confirm that the file system supports large
+ * folios.
+ *
+ * Note that we might still have THPs in such
+ * mappings, which is created from khugepaged when
+ * CONFIG_READ_ONLY_THP_FOR_FS is enabled. But in that
+ * case, the mapping does not actually support large
+ * folios properly.
+ */
VM_WARN_ONCE(warns,
"Cannot split file folio to non-0 order");
return false;
}
}
- if (new_order && folio_test_swapcache(folio)) {
+ /*
+ * swapcache folio could only be split to order 0
+ *
+ * non-uniform split creates after-split folios with orders from
+ * folio_order(folio) - 1 to new_order, making it not suitable for any
+ * swapcache folio split. Only uniform split to order-0 can be used
+ * here.
+ */
+ if ((split_type == SPLIT_TYPE_NON_UNIFORM || new_order) && folio_test_swapcache(folio)) {
VM_WARN_ONCE(warns,
"Cannot split swapcache folio to non-0 order");
return false;
@@ -3711,11 +3704,7 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
if (new_order >= old_order)
return -EINVAL;
- if (split_type == SPLIT_TYPE_UNIFORM && !uniform_split_supported(folio, new_order, true))
- return -EINVAL;
-
- if (split_type == SPLIT_TYPE_NON_UNIFORM &&
- !non_uniform_split_supported(folio, new_order, true))
+ if (!folio_split_supported(folio, new_order, split_type, /* warn = */ true))
return -EINVAL;
is_hzp = is_huge_zero_folio(folio);
The patch below does not apply to the 6.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.18.y
git checkout FETCH_HEAD
git cherry-pick -x 8a0e4bdddd1c998b894d879a1d22f1e745606215
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122925-victory-numeral-2346@gregkh' --subject-prefix 'PATCH 6.18.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8a0e4bdddd1c998b894d879a1d22f1e745606215 Mon Sep 17 00:00:00 2001
From: Wei Yang <richard.weiyang(a)gmail.com>
Date: Thu, 6 Nov 2025 03:41:55 +0000
Subject: [PATCH] mm/huge_memory: merge uniform_split_supported() and
non_uniform_split_supported()
uniform_split_supported() and non_uniform_split_supported() share
significantly similar logic.
The only functional difference is that uniform_split_supported() includes
an additional check on the requested @new_order.
The reason for this check comes from the following two aspects:
* some file system or swap cache just supports order-0 folio
* the behavioral difference between uniform/non-uniform split
The behavioral difference between uniform split and non-uniform:
* uniform split splits folio directly to @new_order
* non-uniform split creates after-split folios with orders from
folio_order(folio) - 1 to new_order.
This means for non-uniform split or !new_order split we should check the
file system and swap cache respectively.
This commit unifies the logic and merge the two functions into a single
combined helper, removing redundant code and simplifying the split
support checking mechanism.
Link: https://lkml.kernel.org/r/20251106034155.21398-3-richard.weiyang@gmail.com
Fixes: c010d47f107f ("mm: thp: split huge page to any lower order pages")
Signed-off-by: Wei Yang <richard.weiyang(a)gmail.com>
Reviewed-by: Zi Yan <ziy(a)nvidia.com>
Cc: Zi Yan <ziy(a)nvidia.com>
Cc: "David Hildenbrand (Red Hat)" <david(a)kernel.org>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Barry Song <baohua(a)kernel.org>
Cc: Dev Jain <dev.jain(a)arm.com>
Cc: Lance Yang <lance.yang(a)linux.dev>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Nico Pache <npache(a)redhat.com>
Cc: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index b74708dc5b5f..19d4a5f52ca2 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -374,10 +374,8 @@ int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list
unsigned int new_order, bool unmapped);
int min_order_for_split(struct folio *folio);
int split_folio_to_list(struct folio *folio, struct list_head *list);
-bool uniform_split_supported(struct folio *folio, unsigned int new_order,
- bool warns);
-bool non_uniform_split_supported(struct folio *folio, unsigned int new_order,
- bool warns);
+bool folio_split_supported(struct folio *folio, unsigned int new_order,
+ enum split_type split_type, bool warns);
int folio_split(struct folio *folio, unsigned int new_order, struct page *page,
struct list_head *list);
@@ -408,7 +406,7 @@ static inline int split_huge_page_to_order(struct page *page, unsigned int new_o
static inline int try_folio_split_to_order(struct folio *folio,
struct page *page, unsigned int new_order)
{
- if (!non_uniform_split_supported(folio, new_order, /* warns= */ false))
+ if (!folio_split_supported(folio, new_order, SPLIT_TYPE_NON_UNIFORM, /* warns= */ false))
return split_huge_page_to_order(&folio->page, new_order);
return folio_split(folio, new_order, page, NULL);
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 4118f330c55e..d79a4bb363de 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3593,8 +3593,8 @@ static int __split_unmapped_folio(struct folio *folio, int new_order,
return 0;
}
-bool non_uniform_split_supported(struct folio *folio, unsigned int new_order,
- bool warns)
+bool folio_split_supported(struct folio *folio, unsigned int new_order,
+ enum split_type split_type, bool warns)
{
if (folio_test_anon(folio)) {
/* order-1 is not supported for anonymous THP. */
@@ -3602,48 +3602,41 @@ bool non_uniform_split_supported(struct folio *folio, unsigned int new_order,
"Cannot split to order-1 folio");
if (new_order == 1)
return false;
- } else if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
- !mapping_large_folio_support(folio->mapping)) {
- /*
- * No split if the file system does not support large folio.
- * Note that we might still have THPs in such mappings due to
- * CONFIG_READ_ONLY_THP_FOR_FS. But in that case, the mapping
- * does not actually support large folios properly.
- */
- VM_WARN_ONCE(warns,
- "Cannot split file folio to non-0 order");
- return false;
- }
-
- /* Only swapping a whole PMD-mapped folio is supported */
- if (folio_test_swapcache(folio)) {
- VM_WARN_ONCE(warns,
- "Cannot split swapcache folio to non-0 order");
- return false;
- }
-
- return true;
-}
-
-/* See comments in non_uniform_split_supported() */
-bool uniform_split_supported(struct folio *folio, unsigned int new_order,
- bool warns)
-{
- if (folio_test_anon(folio)) {
- VM_WARN_ONCE(warns && new_order == 1,
- "Cannot split to order-1 folio");
- if (new_order == 1)
- return false;
- } else if (new_order) {
+ } else if (split_type == SPLIT_TYPE_NON_UNIFORM || new_order) {
if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
!mapping_large_folio_support(folio->mapping)) {
+ /*
+ * We can always split a folio down to a single page
+ * (new_order == 0) uniformly.
+ *
+ * For any other scenario
+ * a) uniform split targeting a large folio
+ * (new_order > 0)
+ * b) any non-uniform split
+ * we must confirm that the file system supports large
+ * folios.
+ *
+ * Note that we might still have THPs in such
+ * mappings, which is created from khugepaged when
+ * CONFIG_READ_ONLY_THP_FOR_FS is enabled. But in that
+ * case, the mapping does not actually support large
+ * folios properly.
+ */
VM_WARN_ONCE(warns,
"Cannot split file folio to non-0 order");
return false;
}
}
- if (new_order && folio_test_swapcache(folio)) {
+ /*
+ * swapcache folio could only be split to order 0
+ *
+ * non-uniform split creates after-split folios with orders from
+ * folio_order(folio) - 1 to new_order, making it not suitable for any
+ * swapcache folio split. Only uniform split to order-0 can be used
+ * here.
+ */
+ if ((split_type == SPLIT_TYPE_NON_UNIFORM || new_order) && folio_test_swapcache(folio)) {
VM_WARN_ONCE(warns,
"Cannot split swapcache folio to non-0 order");
return false;
@@ -3711,11 +3704,7 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
if (new_order >= old_order)
return -EINVAL;
- if (split_type == SPLIT_TYPE_UNIFORM && !uniform_split_supported(folio, new_order, true))
- return -EINVAL;
-
- if (split_type == SPLIT_TYPE_NON_UNIFORM &&
- !non_uniform_split_supported(folio, new_order, true))
+ if (!folio_split_supported(folio, new_order, split_type, /* warn = */ true))
return -EINVAL;
is_hzp = is_huge_zero_folio(folio);
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 590c03ca6a3fbb114396673314e2aa483839608b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122925-designed-overture-2e7d@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 590c03ca6a3fbb114396673314e2aa483839608b Mon Sep 17 00:00:00 2001
From: xu xin <xu.xin16(a)zte.com.cn>
Date: Tue, 7 Oct 2025 18:28:21 +0800
Subject: [PATCH] mm/ksm: fix exec/fork inheritance support for prctl
Patch series "ksm: fix exec/fork inheritance", v2.
This series fixes exec/fork inheritance. See the detailed description of
the issue below.
This patch (of 2):
Background
==========
commit d7597f59d1d33 ("mm: add new api to enable ksm per process")
introduced MMF_VM_MERGE_ANY for mm->flags, and allowed user to set it by
prctl() so that the process's VMAs are forcibly scanned by ksmd.
Subsequently, the 3c6f33b7273a ("mm/ksm: support fork/exec for prctl")
supported inheriting the MMF_VM_MERGE_ANY flag when a task calls execve().
Finally, commit 3a9e567ca45fb ("mm/ksm: fix ksm exec support for prctl")
fixed the issue that ksmd doesn't scan the mm_struct with MMF_VM_MERGE_ANY
by adding the mm_slot to ksm_mm_head in __bprm_mm_init().
Problem
=======
In some extreme scenarios, however, this inheritance of MMF_VM_MERGE_ANY
during exec/fork can fail. For example, when the scanning frequency of
ksmd is tuned extremely high, a process carrying MMF_VM_MERGE_ANY may
still fail to pass it to the newly exec'd process. This happens because
ksm_execve() is executed too early in the do_execve flow (prematurely
adding the new mm_struct to the ksm_mm_slot list).
As a result, before do_execve completes, ksmd may have already performed a
scan and found that this new mm_struct has no VM_MERGEABLE VMAs, thus
clearing its MMF_VM_MERGE_ANY flag. Consequently, when the new program
executes, the flag MMF_VM_MERGE_ANY inheritance missed.
Root reason
===========
commit d7597f59d1d33 ("mm: add new api to enable ksm per process") clear
the flag MMF_VM_MERGE_ANY when ksmd found no VM_MERGEABLE VMAs.
Solution
========
Firstly, Don't clear MMF_VM_MERGE_ANY when ksmd found no VM_MERGEABLE
VMAs, because perhaps their mm_struct has just been added to ksm_mm_slot
list, and its process has not yet officially started running or has not
yet performed mmap/brk to allocate anonymous VMAS.
Secondly, recheck MMF_VM_MERGEABLE again if a process takes
MMF_VM_MERGE_ANY, and create a mm_slot and join it into ksm_scan_list
again.
Link: https://lkml.kernel.org/r/20251007182504440BJgK8VXRHh8TD7IGSUIY4@zte.com.cn
Link: https://lkml.kernel.org/r/20251007182821572h_SoFqYZXEP1mvWI4n9VL@zte.com.cn
Fixes: 3c6f33b7273a ("mm/ksm: support fork/exec for prctl")
Fixes: d7597f59d1d3 ("mm: add new api to enable ksm per process")
Signed-off-by: xu xin <xu.xin16(a)zte.com.cn>
Cc: Stefan Roesch <shr(a)devkernel.io>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Jinjiang Tu <tujinjiang(a)huawei.com>
Cc: Wang Yaxin <wang.yaxin(a)zte.com.cn>
Cc: Yang Yang <yang.yang29(a)zte.com.cn>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 067538fc4d58..c982694c987b 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -17,7 +17,7 @@
#ifdef CONFIG_KSM
int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
unsigned long end, int advice, vm_flags_t *vm_flags);
-vm_flags_t ksm_vma_flags(const struct mm_struct *mm, const struct file *file,
+vm_flags_t ksm_vma_flags(struct mm_struct *mm, const struct file *file,
vm_flags_t vm_flags);
int ksm_enable_merge_any(struct mm_struct *mm);
int ksm_disable_merge_any(struct mm_struct *mm);
@@ -103,7 +103,7 @@ bool ksm_process_mergeable(struct mm_struct *mm);
#else /* !CONFIG_KSM */
-static inline vm_flags_t ksm_vma_flags(const struct mm_struct *mm,
+static inline vm_flags_t ksm_vma_flags(struct mm_struct *mm,
const struct file *file, vm_flags_t vm_flags)
{
return vm_flags;
diff --git a/mm/ksm.c b/mm/ksm.c
index cdefba633856..4f672f4f2140 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -2712,8 +2712,14 @@ static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
spin_unlock(&ksm_mmlist_lock);
mm_slot_free(mm_slot_cache, mm_slot);
+ /*
+ * Only clear MMF_VM_MERGEABLE. We must not clear
+ * MMF_VM_MERGE_ANY, because for those MMF_VM_MERGE_ANY process,
+ * perhaps their mm_struct has just been added to ksm_mm_slot
+ * list, and its process has not yet officially started running
+ * or has not yet performed mmap/brk to allocate anonymous VMAS.
+ */
mm_flags_clear(MMF_VM_MERGEABLE, mm);
- mm_flags_clear(MMF_VM_MERGE_ANY, mm);
mmap_read_unlock(mm);
mmdrop(mm);
} else {
@@ -2831,12 +2837,20 @@ static int __ksm_del_vma(struct vm_area_struct *vma)
*
* Returns: @vm_flags possibly updated to mark mergeable.
*/
-vm_flags_t ksm_vma_flags(const struct mm_struct *mm, const struct file *file,
+vm_flags_t ksm_vma_flags(struct mm_struct *mm, const struct file *file,
vm_flags_t vm_flags)
{
if (mm_flags_test(MMF_VM_MERGE_ANY, mm) &&
- __ksm_should_add_vma(file, vm_flags))
+ __ksm_should_add_vma(file, vm_flags)) {
vm_flags |= VM_MERGEABLE;
+ /*
+ * Generally, the flags here always include MMF_VM_MERGEABLE.
+ * However, in rare cases, this flag may be cleared by ksmd who
+ * scans a cycle without finding any mergeable vma.
+ */
+ if (unlikely(!mm_flags_test(MMF_VM_MERGEABLE, mm)))
+ __ksm_enter(mm);
+ }
return vm_flags;
}
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 590c03ca6a3fbb114396673314e2aa483839608b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122924-reproach-foster-4189@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 590c03ca6a3fbb114396673314e2aa483839608b Mon Sep 17 00:00:00 2001
From: xu xin <xu.xin16(a)zte.com.cn>
Date: Tue, 7 Oct 2025 18:28:21 +0800
Subject: [PATCH] mm/ksm: fix exec/fork inheritance support for prctl
Patch series "ksm: fix exec/fork inheritance", v2.
This series fixes exec/fork inheritance. See the detailed description of
the issue below.
This patch (of 2):
Background
==========
commit d7597f59d1d33 ("mm: add new api to enable ksm per process")
introduced MMF_VM_MERGE_ANY for mm->flags, and allowed user to set it by
prctl() so that the process's VMAs are forcibly scanned by ksmd.
Subsequently, the 3c6f33b7273a ("mm/ksm: support fork/exec for prctl")
supported inheriting the MMF_VM_MERGE_ANY flag when a task calls execve().
Finally, commit 3a9e567ca45fb ("mm/ksm: fix ksm exec support for prctl")
fixed the issue that ksmd doesn't scan the mm_struct with MMF_VM_MERGE_ANY
by adding the mm_slot to ksm_mm_head in __bprm_mm_init().
Problem
=======
In some extreme scenarios, however, this inheritance of MMF_VM_MERGE_ANY
during exec/fork can fail. For example, when the scanning frequency of
ksmd is tuned extremely high, a process carrying MMF_VM_MERGE_ANY may
still fail to pass it to the newly exec'd process. This happens because
ksm_execve() is executed too early in the do_execve flow (prematurely
adding the new mm_struct to the ksm_mm_slot list).
As a result, before do_execve completes, ksmd may have already performed a
scan and found that this new mm_struct has no VM_MERGEABLE VMAs, thus
clearing its MMF_VM_MERGE_ANY flag. Consequently, when the new program
executes, the flag MMF_VM_MERGE_ANY inheritance missed.
Root reason
===========
commit d7597f59d1d33 ("mm: add new api to enable ksm per process") clear
the flag MMF_VM_MERGE_ANY when ksmd found no VM_MERGEABLE VMAs.
Solution
========
Firstly, Don't clear MMF_VM_MERGE_ANY when ksmd found no VM_MERGEABLE
VMAs, because perhaps their mm_struct has just been added to ksm_mm_slot
list, and its process has not yet officially started running or has not
yet performed mmap/brk to allocate anonymous VMAS.
Secondly, recheck MMF_VM_MERGEABLE again if a process takes
MMF_VM_MERGE_ANY, and create a mm_slot and join it into ksm_scan_list
again.
Link: https://lkml.kernel.org/r/20251007182504440BJgK8VXRHh8TD7IGSUIY4@zte.com.cn
Link: https://lkml.kernel.org/r/20251007182821572h_SoFqYZXEP1mvWI4n9VL@zte.com.cn
Fixes: 3c6f33b7273a ("mm/ksm: support fork/exec for prctl")
Fixes: d7597f59d1d3 ("mm: add new api to enable ksm per process")
Signed-off-by: xu xin <xu.xin16(a)zte.com.cn>
Cc: Stefan Roesch <shr(a)devkernel.io>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Jinjiang Tu <tujinjiang(a)huawei.com>
Cc: Wang Yaxin <wang.yaxin(a)zte.com.cn>
Cc: Yang Yang <yang.yang29(a)zte.com.cn>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 067538fc4d58..c982694c987b 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -17,7 +17,7 @@
#ifdef CONFIG_KSM
int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
unsigned long end, int advice, vm_flags_t *vm_flags);
-vm_flags_t ksm_vma_flags(const struct mm_struct *mm, const struct file *file,
+vm_flags_t ksm_vma_flags(struct mm_struct *mm, const struct file *file,
vm_flags_t vm_flags);
int ksm_enable_merge_any(struct mm_struct *mm);
int ksm_disable_merge_any(struct mm_struct *mm);
@@ -103,7 +103,7 @@ bool ksm_process_mergeable(struct mm_struct *mm);
#else /* !CONFIG_KSM */
-static inline vm_flags_t ksm_vma_flags(const struct mm_struct *mm,
+static inline vm_flags_t ksm_vma_flags(struct mm_struct *mm,
const struct file *file, vm_flags_t vm_flags)
{
return vm_flags;
diff --git a/mm/ksm.c b/mm/ksm.c
index cdefba633856..4f672f4f2140 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -2712,8 +2712,14 @@ static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
spin_unlock(&ksm_mmlist_lock);
mm_slot_free(mm_slot_cache, mm_slot);
+ /*
+ * Only clear MMF_VM_MERGEABLE. We must not clear
+ * MMF_VM_MERGE_ANY, because for those MMF_VM_MERGE_ANY process,
+ * perhaps their mm_struct has just been added to ksm_mm_slot
+ * list, and its process has not yet officially started running
+ * or has not yet performed mmap/brk to allocate anonymous VMAS.
+ */
mm_flags_clear(MMF_VM_MERGEABLE, mm);
- mm_flags_clear(MMF_VM_MERGE_ANY, mm);
mmap_read_unlock(mm);
mmdrop(mm);
} else {
@@ -2831,12 +2837,20 @@ static int __ksm_del_vma(struct vm_area_struct *vma)
*
* Returns: @vm_flags possibly updated to mark mergeable.
*/
-vm_flags_t ksm_vma_flags(const struct mm_struct *mm, const struct file *file,
+vm_flags_t ksm_vma_flags(struct mm_struct *mm, const struct file *file,
vm_flags_t vm_flags)
{
if (mm_flags_test(MMF_VM_MERGE_ANY, mm) &&
- __ksm_should_add_vma(file, vm_flags))
+ __ksm_should_add_vma(file, vm_flags)) {
vm_flags |= VM_MERGEABLE;
+ /*
+ * Generally, the flags here always include MMF_VM_MERGEABLE.
+ * However, in rare cases, this flag may be cleared by ksmd who
+ * scans a cycle without finding any mergeable vma.
+ */
+ if (unlikely(!mm_flags_test(MMF_VM_MERGEABLE, mm)))
+ __ksm_enter(mm);
+ }
return vm_flags;
}
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 935a20d1bebf6236076785fac3ff81e3931834e9
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122946-opt-excuse-6662@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 935a20d1bebf6236076785fac3ff81e3931834e9 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche(a)acm.org>
Date: Fri, 14 Nov 2025 13:04:07 -0800
Subject: [PATCH] block: Remove queue freezing from several sysfs store
callbacks
Freezing the request queue from inside sysfs store callbacks may cause a
deadlock in combination with the dm-multipath driver and the
queue_if_no_path option. Additionally, freezing the request queue slows
down system boot on systems where sysfs attributes are set synchronously.
Fix this by removing the blk_mq_freeze_queue() / blk_mq_unfreeze_queue()
calls from the store callbacks that do not strictly need these callbacks.
Add the __data_racy annotation to request_queue.rq_timeout to suppress
KCSAN data race reports about the rq_timeout reads.
This patch may cause a small delay in applying the new settings.
For all the attributes affected by this patch, I/O will complete
correctly whether the old or the new value of the attribute is used.
This patch affects the following sysfs attributes:
* io_poll_delay
* io_timeout
* nomerges
* read_ahead_kb
* rq_affinity
Here is an example of a deadlock triggered by running test srp/002
if this patch is not applied:
task:multipathd
Call Trace:
<TASK>
__schedule+0x8c1/0x1bf0
schedule+0xdd/0x270
schedule_preempt_disabled+0x1c/0x30
__mutex_lock+0xb89/0x1650
mutex_lock_nested+0x1f/0x30
dm_table_set_restrictions+0x823/0xdf0
__bind+0x166/0x590
dm_swap_table+0x2a7/0x490
do_resume+0x1b1/0x610
dev_suspend+0x55/0x1a0
ctl_ioctl+0x3a5/0x7e0
dm_ctl_ioctl+0x12/0x20
__x64_sys_ioctl+0x127/0x1a0
x64_sys_call+0xe2b/0x17d0
do_syscall_64+0x96/0x3a0
entry_SYSCALL_64_after_hwframe+0x4b/0x53
</TASK>
task:(udev-worker)
Call Trace:
<TASK>
__schedule+0x8c1/0x1bf0
schedule+0xdd/0x270
blk_mq_freeze_queue_wait+0xf2/0x140
blk_mq_freeze_queue_nomemsave+0x23/0x30
queue_ra_store+0x14e/0x290
queue_attr_store+0x23e/0x2c0
sysfs_kf_write+0xde/0x140
kernfs_fop_write_iter+0x3b2/0x630
vfs_write+0x4fd/0x1390
ksys_write+0xfd/0x230
__x64_sys_write+0x76/0xc0
x64_sys_call+0x276/0x17d0
do_syscall_64+0x96/0x3a0
entry_SYSCALL_64_after_hwframe+0x4b/0x53
</TASK>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Ming Lei <ming.lei(a)redhat.com>
Cc: Nilay Shroff <nilay(a)linux.ibm.com>
Cc: Martin Wilck <mwilck(a)suse.com>
Cc: Benjamin Marzinski <bmarzins(a)redhat.com>
Cc: stable(a)vger.kernel.org
Fixes: af2814149883 ("block: freeze the queue in queue_attr_store")
Signed-off-by: Bart Van Assche <bvanassche(a)acm.org>
Reviewed-by: Nilay Shroff <nilay(a)linux.ibm.com>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 76c47fe9b8d6..8684c57498cc 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -143,21 +143,22 @@ queue_ra_store(struct gendisk *disk, const char *page, size_t count)
{
unsigned long ra_kb;
ssize_t ret;
- unsigned int memflags;
struct request_queue *q = disk->queue;
ret = queue_var_store(&ra_kb, page, count);
if (ret < 0)
return ret;
/*
- * ->ra_pages is protected by ->limits_lock because it is usually
- * calculated from the queue limits by queue_limits_commit_update.
+ * The ->ra_pages change below is protected by ->limits_lock because it
+ * is usually calculated from the queue limits by
+ * queue_limits_commit_update().
+ *
+ * bdi->ra_pages reads are not serialized against bdi->ra_pages writes.
+ * Use WRITE_ONCE() to write bdi->ra_pages once.
*/
mutex_lock(&q->limits_lock);
- memflags = blk_mq_freeze_queue(q);
- disk->bdi->ra_pages = ra_kb >> (PAGE_SHIFT - 10);
+ WRITE_ONCE(disk->bdi->ra_pages, ra_kb >> (PAGE_SHIFT - 10));
mutex_unlock(&q->limits_lock);
- blk_mq_unfreeze_queue(q, memflags);
return ret;
}
@@ -375,21 +376,18 @@ static ssize_t queue_nomerges_store(struct gendisk *disk, const char *page,
size_t count)
{
unsigned long nm;
- unsigned int memflags;
struct request_queue *q = disk->queue;
ssize_t ret = queue_var_store(&nm, page, count);
if (ret < 0)
return ret;
- memflags = blk_mq_freeze_queue(q);
blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
blk_queue_flag_clear(QUEUE_FLAG_NOXMERGES, q);
if (nm == 2)
blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
else if (nm)
blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
- blk_mq_unfreeze_queue(q, memflags);
return ret;
}
@@ -409,7 +407,6 @@ queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count)
#ifdef CONFIG_SMP
struct request_queue *q = disk->queue;
unsigned long val;
- unsigned int memflags;
ret = queue_var_store(&val, page, count);
if (ret < 0)
@@ -421,7 +418,6 @@ queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count)
* are accessed individually using atomic test_bit operation. So we
* don't grab any lock while updating these flags.
*/
- memflags = blk_mq_freeze_queue(q);
if (val == 2) {
blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, q);
@@ -432,7 +428,6 @@ queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count)
blk_queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
}
- blk_mq_unfreeze_queue(q, memflags);
#endif
return ret;
}
@@ -446,11 +441,9 @@ static ssize_t queue_poll_delay_store(struct gendisk *disk, const char *page,
static ssize_t queue_poll_store(struct gendisk *disk, const char *page,
size_t count)
{
- unsigned int memflags;
ssize_t ret = count;
struct request_queue *q = disk->queue;
- memflags = blk_mq_freeze_queue(q);
if (!(q->limits.features & BLK_FEAT_POLL)) {
ret = -EINVAL;
goto out;
@@ -459,7 +452,6 @@ static ssize_t queue_poll_store(struct gendisk *disk, const char *page,
pr_info_ratelimited("writes to the poll attribute are ignored.\n");
pr_info_ratelimited("please use driver specific parameters instead.\n");
out:
- blk_mq_unfreeze_queue(q, memflags);
return ret;
}
@@ -472,7 +464,7 @@ static ssize_t queue_io_timeout_show(struct gendisk *disk, char *page)
static ssize_t queue_io_timeout_store(struct gendisk *disk, const char *page,
size_t count)
{
- unsigned int val, memflags;
+ unsigned int val;
int err;
struct request_queue *q = disk->queue;
@@ -480,9 +472,7 @@ static ssize_t queue_io_timeout_store(struct gendisk *disk, const char *page,
if (err || val == 0)
return -EINVAL;
- memflags = blk_mq_freeze_queue(q);
blk_queue_rq_timeout(q, msecs_to_jiffies(val));
- blk_mq_unfreeze_queue(q, memflags);
return count;
}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2fff8a80dbd2..cb4ba09959ee 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -495,7 +495,7 @@ struct request_queue {
*/
unsigned long queue_flags;
- unsigned int rq_timeout;
+ unsigned int __data_racy rq_timeout;
unsigned int queue_depth;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x ee5a977b4e771cc181f39d504426dbd31ed701cc
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122959-unawake-devious-8898@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ee5a977b4e771cc181f39d504426dbd31ed701cc Mon Sep 17 00:00:00 2001
From: Fedor Pchelkin <pchelkin(a)ispras.ru>
Date: Sat, 1 Nov 2025 19:04:28 +0300
Subject: [PATCH] ext4: fix string copying in parse_apply_sb_mount_options()
strscpy_pad() can't be used to copy a non-NUL-term string into a NUL-term
string of possibly bigger size. Commit 0efc5990bca5 ("string.h: Introduce
memtostr() and memtostr_pad()") provides additional information in that
regard. So if this happens, the following warning is observed:
strnlen: detected buffer overflow: 65 byte read of buffer size 64
WARNING: CPU: 0 PID: 28655 at lib/string_helpers.c:1032 __fortify_report+0x96/0xc0 lib/string_helpers.c:1032
Modules linked in:
CPU: 0 UID: 0 PID: 28655 Comm: syz-executor.3 Not tainted 6.12.54-syzkaller-00144-g5f0270f1ba00 #0
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
RIP: 0010:__fortify_report+0x96/0xc0 lib/string_helpers.c:1032
Call Trace:
<TASK>
__fortify_panic+0x1f/0x30 lib/string_helpers.c:1039
strnlen include/linux/fortify-string.h:235 [inline]
sized_strscpy include/linux/fortify-string.h:309 [inline]
parse_apply_sb_mount_options fs/ext4/super.c:2504 [inline]
__ext4_fill_super fs/ext4/super.c:5261 [inline]
ext4_fill_super+0x3c35/0xad00 fs/ext4/super.c:5706
get_tree_bdev_flags+0x387/0x620 fs/super.c:1636
vfs_get_tree+0x93/0x380 fs/super.c:1814
do_new_mount fs/namespace.c:3553 [inline]
path_mount+0x6ae/0x1f70 fs/namespace.c:3880
do_mount fs/namespace.c:3893 [inline]
__do_sys_mount fs/namespace.c:4103 [inline]
__se_sys_mount fs/namespace.c:4080 [inline]
__x64_sys_mount+0x280/0x300 fs/namespace.c:4080
do_syscall_x64 arch/x86/entry/common.c:52 [inline]
do_syscall_64+0x64/0x140 arch/x86/entry/common.c:83
entry_SYSCALL_64_after_hwframe+0x76/0x7e
Since userspace is expected to provide s_mount_opts field to be at most 63
characters long with the ending byte being NUL-term, use a 64-byte buffer
which matches the size of s_mount_opts, so that strscpy_pad() does its job
properly. Return with error if the user still managed to provide a
non-NUL-term string here.
Found by Linux Verification Center (linuxtesting.org) with Syzkaller.
Fixes: 8ecb790ea8c3 ("ext4: avoid potential buffer over-read in parse_apply_sb_mount_options()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Fedor Pchelkin <pchelkin(a)ispras.ru>
Reviewed-by: Baokun Li <libaokun1(a)huawei.com>
Reviewed-by: Jan Kara <jack(a)suse.cz>
Message-ID: <20251101160430.222297-1-pchelkin(a)ispras.ru>
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7de15249e826..d1ba894c0e0a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2476,7 +2476,7 @@ static int parse_apply_sb_mount_options(struct super_block *sb,
struct ext4_fs_context *m_ctx)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- char s_mount_opts[65];
+ char s_mount_opts[64];
struct ext4_fs_context *s_ctx = NULL;
struct fs_context *fc = NULL;
int ret = -ENOMEM;
@@ -2484,7 +2484,8 @@ static int parse_apply_sb_mount_options(struct super_block *sb,
if (!sbi->s_es->s_mount_opts[0])
return 0;
- strscpy_pad(s_mount_opts, sbi->s_es->s_mount_opts);
+ if (strscpy_pad(s_mount_opts, sbi->s_es->s_mount_opts) < 0)
+ return -E2BIG;
fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
if (!fc)
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x ee5a977b4e771cc181f39d504426dbd31ed701cc
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122958-ambitious-prenatal-99c2@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ee5a977b4e771cc181f39d504426dbd31ed701cc Mon Sep 17 00:00:00 2001
From: Fedor Pchelkin <pchelkin(a)ispras.ru>
Date: Sat, 1 Nov 2025 19:04:28 +0300
Subject: [PATCH] ext4: fix string copying in parse_apply_sb_mount_options()
strscpy_pad() can't be used to copy a non-NUL-term string into a NUL-term
string of possibly bigger size. Commit 0efc5990bca5 ("string.h: Introduce
memtostr() and memtostr_pad()") provides additional information in that
regard. So if this happens, the following warning is observed:
strnlen: detected buffer overflow: 65 byte read of buffer size 64
WARNING: CPU: 0 PID: 28655 at lib/string_helpers.c:1032 __fortify_report+0x96/0xc0 lib/string_helpers.c:1032
Modules linked in:
CPU: 0 UID: 0 PID: 28655 Comm: syz-executor.3 Not tainted 6.12.54-syzkaller-00144-g5f0270f1ba00 #0
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
RIP: 0010:__fortify_report+0x96/0xc0 lib/string_helpers.c:1032
Call Trace:
<TASK>
__fortify_panic+0x1f/0x30 lib/string_helpers.c:1039
strnlen include/linux/fortify-string.h:235 [inline]
sized_strscpy include/linux/fortify-string.h:309 [inline]
parse_apply_sb_mount_options fs/ext4/super.c:2504 [inline]
__ext4_fill_super fs/ext4/super.c:5261 [inline]
ext4_fill_super+0x3c35/0xad00 fs/ext4/super.c:5706
get_tree_bdev_flags+0x387/0x620 fs/super.c:1636
vfs_get_tree+0x93/0x380 fs/super.c:1814
do_new_mount fs/namespace.c:3553 [inline]
path_mount+0x6ae/0x1f70 fs/namespace.c:3880
do_mount fs/namespace.c:3893 [inline]
__do_sys_mount fs/namespace.c:4103 [inline]
__se_sys_mount fs/namespace.c:4080 [inline]
__x64_sys_mount+0x280/0x300 fs/namespace.c:4080
do_syscall_x64 arch/x86/entry/common.c:52 [inline]
do_syscall_64+0x64/0x140 arch/x86/entry/common.c:83
entry_SYSCALL_64_after_hwframe+0x76/0x7e
Since userspace is expected to provide s_mount_opts field to be at most 63
characters long with the ending byte being NUL-term, use a 64-byte buffer
which matches the size of s_mount_opts, so that strscpy_pad() does its job
properly. Return with error if the user still managed to provide a
non-NUL-term string here.
Found by Linux Verification Center (linuxtesting.org) with Syzkaller.
Fixes: 8ecb790ea8c3 ("ext4: avoid potential buffer over-read in parse_apply_sb_mount_options()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Fedor Pchelkin <pchelkin(a)ispras.ru>
Reviewed-by: Baokun Li <libaokun1(a)huawei.com>
Reviewed-by: Jan Kara <jack(a)suse.cz>
Message-ID: <20251101160430.222297-1-pchelkin(a)ispras.ru>
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7de15249e826..d1ba894c0e0a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2476,7 +2476,7 @@ static int parse_apply_sb_mount_options(struct super_block *sb,
struct ext4_fs_context *m_ctx)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- char s_mount_opts[65];
+ char s_mount_opts[64];
struct ext4_fs_context *s_ctx = NULL;
struct fs_context *fc = NULL;
int ret = -ENOMEM;
@@ -2484,7 +2484,8 @@ static int parse_apply_sb_mount_options(struct super_block *sb,
if (!sbi->s_es->s_mount_opts[0])
return 0;
- strscpy_pad(s_mount_opts, sbi->s_es->s_mount_opts);
+ if (strscpy_pad(s_mount_opts, sbi->s_es->s_mount_opts) < 0)
+ return -E2BIG;
fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
if (!fc)
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x ee5a977b4e771cc181f39d504426dbd31ed701cc
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122957-crevice-busily-7e0e@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ee5a977b4e771cc181f39d504426dbd31ed701cc Mon Sep 17 00:00:00 2001
From: Fedor Pchelkin <pchelkin(a)ispras.ru>
Date: Sat, 1 Nov 2025 19:04:28 +0300
Subject: [PATCH] ext4: fix string copying in parse_apply_sb_mount_options()
strscpy_pad() can't be used to copy a non-NUL-term string into a NUL-term
string of possibly bigger size. Commit 0efc5990bca5 ("string.h: Introduce
memtostr() and memtostr_pad()") provides additional information in that
regard. So if this happens, the following warning is observed:
strnlen: detected buffer overflow: 65 byte read of buffer size 64
WARNING: CPU: 0 PID: 28655 at lib/string_helpers.c:1032 __fortify_report+0x96/0xc0 lib/string_helpers.c:1032
Modules linked in:
CPU: 0 UID: 0 PID: 28655 Comm: syz-executor.3 Not tainted 6.12.54-syzkaller-00144-g5f0270f1ba00 #0
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
RIP: 0010:__fortify_report+0x96/0xc0 lib/string_helpers.c:1032
Call Trace:
<TASK>
__fortify_panic+0x1f/0x30 lib/string_helpers.c:1039
strnlen include/linux/fortify-string.h:235 [inline]
sized_strscpy include/linux/fortify-string.h:309 [inline]
parse_apply_sb_mount_options fs/ext4/super.c:2504 [inline]
__ext4_fill_super fs/ext4/super.c:5261 [inline]
ext4_fill_super+0x3c35/0xad00 fs/ext4/super.c:5706
get_tree_bdev_flags+0x387/0x620 fs/super.c:1636
vfs_get_tree+0x93/0x380 fs/super.c:1814
do_new_mount fs/namespace.c:3553 [inline]
path_mount+0x6ae/0x1f70 fs/namespace.c:3880
do_mount fs/namespace.c:3893 [inline]
__do_sys_mount fs/namespace.c:4103 [inline]
__se_sys_mount fs/namespace.c:4080 [inline]
__x64_sys_mount+0x280/0x300 fs/namespace.c:4080
do_syscall_x64 arch/x86/entry/common.c:52 [inline]
do_syscall_64+0x64/0x140 arch/x86/entry/common.c:83
entry_SYSCALL_64_after_hwframe+0x76/0x7e
Since userspace is expected to provide s_mount_opts field to be at most 63
characters long with the ending byte being NUL-term, use a 64-byte buffer
which matches the size of s_mount_opts, so that strscpy_pad() does its job
properly. Return with error if the user still managed to provide a
non-NUL-term string here.
Found by Linux Verification Center (linuxtesting.org) with Syzkaller.
Fixes: 8ecb790ea8c3 ("ext4: avoid potential buffer over-read in parse_apply_sb_mount_options()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Fedor Pchelkin <pchelkin(a)ispras.ru>
Reviewed-by: Baokun Li <libaokun1(a)huawei.com>
Reviewed-by: Jan Kara <jack(a)suse.cz>
Message-ID: <20251101160430.222297-1-pchelkin(a)ispras.ru>
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7de15249e826..d1ba894c0e0a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2476,7 +2476,7 @@ static int parse_apply_sb_mount_options(struct super_block *sb,
struct ext4_fs_context *m_ctx)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- char s_mount_opts[65];
+ char s_mount_opts[64];
struct ext4_fs_context *s_ctx = NULL;
struct fs_context *fc = NULL;
int ret = -ENOMEM;
@@ -2484,7 +2484,8 @@ static int parse_apply_sb_mount_options(struct super_block *sb,
if (!sbi->s_es->s_mount_opts[0])
return 0;
- strscpy_pad(s_mount_opts, sbi->s_es->s_mount_opts);
+ if (strscpy_pad(s_mount_opts, sbi->s_es->s_mount_opts) < 0)
+ return -E2BIG;
fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
if (!fc)
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x ee5a977b4e771cc181f39d504426dbd31ed701cc
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025122956-dropkick-outlast-5a49@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ee5a977b4e771cc181f39d504426dbd31ed701cc Mon Sep 17 00:00:00 2001
From: Fedor Pchelkin <pchelkin(a)ispras.ru>
Date: Sat, 1 Nov 2025 19:04:28 +0300
Subject: [PATCH] ext4: fix string copying in parse_apply_sb_mount_options()
strscpy_pad() can't be used to copy a non-NUL-term string into a NUL-term
string of possibly bigger size. Commit 0efc5990bca5 ("string.h: Introduce
memtostr() and memtostr_pad()") provides additional information in that
regard. So if this happens, the following warning is observed:
strnlen: detected buffer overflow: 65 byte read of buffer size 64
WARNING: CPU: 0 PID: 28655 at lib/string_helpers.c:1032 __fortify_report+0x96/0xc0 lib/string_helpers.c:1032
Modules linked in:
CPU: 0 UID: 0 PID: 28655 Comm: syz-executor.3 Not tainted 6.12.54-syzkaller-00144-g5f0270f1ba00 #0
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
RIP: 0010:__fortify_report+0x96/0xc0 lib/string_helpers.c:1032
Call Trace:
<TASK>
__fortify_panic+0x1f/0x30 lib/string_helpers.c:1039
strnlen include/linux/fortify-string.h:235 [inline]
sized_strscpy include/linux/fortify-string.h:309 [inline]
parse_apply_sb_mount_options fs/ext4/super.c:2504 [inline]
__ext4_fill_super fs/ext4/super.c:5261 [inline]
ext4_fill_super+0x3c35/0xad00 fs/ext4/super.c:5706
get_tree_bdev_flags+0x387/0x620 fs/super.c:1636
vfs_get_tree+0x93/0x380 fs/super.c:1814
do_new_mount fs/namespace.c:3553 [inline]
path_mount+0x6ae/0x1f70 fs/namespace.c:3880
do_mount fs/namespace.c:3893 [inline]
__do_sys_mount fs/namespace.c:4103 [inline]
__se_sys_mount fs/namespace.c:4080 [inline]
__x64_sys_mount+0x280/0x300 fs/namespace.c:4080
do_syscall_x64 arch/x86/entry/common.c:52 [inline]
do_syscall_64+0x64/0x140 arch/x86/entry/common.c:83
entry_SYSCALL_64_after_hwframe+0x76/0x7e
Since userspace is expected to provide s_mount_opts field to be at most 63
characters long with the ending byte being NUL-term, use a 64-byte buffer
which matches the size of s_mount_opts, so that strscpy_pad() does its job
properly. Return with error if the user still managed to provide a
non-NUL-term string here.
Found by Linux Verification Center (linuxtesting.org) with Syzkaller.
Fixes: 8ecb790ea8c3 ("ext4: avoid potential buffer over-read in parse_apply_sb_mount_options()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Fedor Pchelkin <pchelkin(a)ispras.ru>
Reviewed-by: Baokun Li <libaokun1(a)huawei.com>
Reviewed-by: Jan Kara <jack(a)suse.cz>
Message-ID: <20251101160430.222297-1-pchelkin(a)ispras.ru>
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7de15249e826..d1ba894c0e0a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2476,7 +2476,7 @@ static int parse_apply_sb_mount_options(struct super_block *sb,
struct ext4_fs_context *m_ctx)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- char s_mount_opts[65];
+ char s_mount_opts[64];
struct ext4_fs_context *s_ctx = NULL;
struct fs_context *fc = NULL;
int ret = -ENOMEM;
@@ -2484,7 +2484,8 @@ static int parse_apply_sb_mount_options(struct super_block *sb,
if (!sbi->s_es->s_mount_opts[0])
return 0;
- strscpy_pad(s_mount_opts, sbi->s_es->s_mount_opts);
+ if (strscpy_pad(s_mount_opts, sbi->s_es->s_mount_opts) < 0)
+ return -E2BIG;
fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
if (!fc)