The quilt patch titled
Subject: mm/ksm: fix ksm_zero_pages accounting
has been removed from the -mm tree. Its filename was
mm-ksm-fix-ksm_zero_pages-accounting.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Chengming Zhou <chengming.zhou(a)linux.dev>
Subject: mm/ksm: fix ksm_zero_pages accounting
Date: Tue, 28 May 2024 13:15:22 +0800
We normally ksm_zero_pages++ in ksmd when page is merged with zero page,
but ksm_zero_pages-- is done from page tables side, where there is no any
accessing protection of ksm_zero_pages.
So we can read very exceptional value of ksm_zero_pages in rare cases,
such as -1, which is very confusing to users.
Fix it by changing to use atomic_long_t, and the same case with the
mm->ksm_zero_pages.
Link: https://lkml.kernel.org/r/20240528-b4-ksm-counters-v3-2-34bb358fdc13@linux.…
Fixes: e2942062e01d ("ksm: count all zero pages placed by KSM")
Fixes: 6080d19f0704 ("ksm: add ksm zero pages for each process")
Signed-off-by: Chengming Zhou <chengming.zhou(a)linux.dev>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Ran Xiaokai <ran.xiaokai(a)zte.com.cn>
Cc: Stefan Roesch <shr(a)devkernel.io>
Cc: xu xin <xu.xin16(a)zte.com.cn>
Cc: Yang Yang <yang.yang29(a)zte.com.cn>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/proc/base.c | 2 +-
include/linux/ksm.h | 17 ++++++++++++++---
include/linux/mm_types.h | 2 +-
mm/ksm.c | 11 +++++------
4 files changed, 21 insertions(+), 11 deletions(-)
--- a/fs/proc/base.c~mm-ksm-fix-ksm_zero_pages-accounting
+++ a/fs/proc/base.c
@@ -3214,7 +3214,7 @@ static int proc_pid_ksm_stat(struct seq_
mm = get_task_mm(task);
if (mm) {
seq_printf(m, "ksm_rmap_items %lu\n", mm->ksm_rmap_items);
- seq_printf(m, "ksm_zero_pages %lu\n", mm->ksm_zero_pages);
+ seq_printf(m, "ksm_zero_pages %ld\n", mm_ksm_zero_pages(mm));
seq_printf(m, "ksm_merging_pages %lu\n", mm->ksm_merging_pages);
seq_printf(m, "ksm_process_profit %ld\n", ksm_process_profit(mm));
mmput(mm);
--- a/include/linux/ksm.h~mm-ksm-fix-ksm_zero_pages-accounting
+++ a/include/linux/ksm.h
@@ -33,16 +33,27 @@ void __ksm_exit(struct mm_struct *mm);
*/
#define is_ksm_zero_pte(pte) (is_zero_pfn(pte_pfn(pte)) && pte_dirty(pte))
-extern unsigned long ksm_zero_pages;
+extern atomic_long_t ksm_zero_pages;
+
+static inline void ksm_map_zero_page(struct mm_struct *mm)
+{
+ atomic_long_inc(&ksm_zero_pages);
+ atomic_long_inc(&mm->ksm_zero_pages);
+}
static inline void ksm_might_unmap_zero_page(struct mm_struct *mm, pte_t pte)
{
if (is_ksm_zero_pte(pte)) {
- ksm_zero_pages--;
- mm->ksm_zero_pages--;
+ atomic_long_dec(&ksm_zero_pages);
+ atomic_long_dec(&mm->ksm_zero_pages);
}
}
+static inline long mm_ksm_zero_pages(struct mm_struct *mm)
+{
+ return atomic_long_read(&mm->ksm_zero_pages);
+}
+
static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
{
if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags))
--- a/include/linux/mm_types.h~mm-ksm-fix-ksm_zero_pages-accounting
+++ a/include/linux/mm_types.h
@@ -985,7 +985,7 @@ struct mm_struct {
* Represent how many empty pages are merged with kernel zero
* pages when enabling KSM use_zero_pages.
*/
- unsigned long ksm_zero_pages;
+ atomic_long_t ksm_zero_pages;
#endif /* CONFIG_KSM */
#ifdef CONFIG_LRU_GEN_WALKS_MMU
struct {
--- a/mm/ksm.c~mm-ksm-fix-ksm_zero_pages-accounting
+++ a/mm/ksm.c
@@ -296,7 +296,7 @@ static bool ksm_use_zero_pages __read_mo
static bool ksm_smart_scan = true;
/* The number of zero pages which is placed by KSM */
-unsigned long ksm_zero_pages;
+atomic_long_t ksm_zero_pages = ATOMIC_LONG_INIT(0);
/* The number of pages that have been skipped due to "smart scanning" */
static unsigned long ksm_pages_skipped;
@@ -1429,8 +1429,7 @@ static int replace_page(struct vm_area_s
* the dirty bit in zero page's PTE is set.
*/
newpte = pte_mkdirty(pte_mkspecial(pfn_pte(page_to_pfn(kpage), vma->vm_page_prot)));
- ksm_zero_pages++;
- mm->ksm_zero_pages++;
+ ksm_map_zero_page(mm);
/*
* We're replacing an anonymous page with a zero page, which is
* not anonymous. We need to do proper accounting otherwise we
@@ -3374,7 +3373,7 @@ static void wait_while_offlining(void)
#ifdef CONFIG_PROC_FS
long ksm_process_profit(struct mm_struct *mm)
{
- return (long)(mm->ksm_merging_pages + mm->ksm_zero_pages) * PAGE_SIZE -
+ return (long)(mm->ksm_merging_pages + mm_ksm_zero_pages(mm)) * PAGE_SIZE -
mm->ksm_rmap_items * sizeof(struct ksm_rmap_item);
}
#endif /* CONFIG_PROC_FS */
@@ -3663,7 +3662,7 @@ KSM_ATTR_RO(pages_skipped);
static ssize_t ksm_zero_pages_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
- return sysfs_emit(buf, "%ld\n", ksm_zero_pages);
+ return sysfs_emit(buf, "%ld\n", atomic_long_read(&ksm_zero_pages));
}
KSM_ATTR_RO(ksm_zero_pages);
@@ -3672,7 +3671,7 @@ static ssize_t general_profit_show(struc
{
long general_profit;
- general_profit = (ksm_pages_sharing + ksm_zero_pages) * PAGE_SIZE -
+ general_profit = (ksm_pages_sharing + atomic_long_read(&ksm_zero_pages)) * PAGE_SIZE -
ksm_rmap_items * sizeof(struct ksm_rmap_item);
return sysfs_emit(buf, "%ld\n", general_profit);
_
Patches currently in -mm which might be from chengming.zhou(a)linux.dev are
The quilt patch titled
Subject: mm/ksm: fix ksm_pages_scanned accounting
has been removed from the -mm tree. Its filename was
mm-ksm-fix-ksm_pages_scanned-accounting.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Chengming Zhou <chengming.zhou(a)linux.dev>
Subject: mm/ksm: fix ksm_pages_scanned accounting
Date: Tue, 28 May 2024 13:15:21 +0800
Patch series "mm/ksm: fix some accounting problems", v3.
We encountered some abnormal ksm_pages_scanned and ksm_zero_pages during
some random tests.
1. ksm_pages_scanned unchanged even ksmd scanning has progress.
2. ksm_zero_pages maybe -1 in some rare cases.
This patch (of 2):
During testing, I found ksm_pages_scanned is unchanged although the
scan_get_next_rmap_item() did return valid rmap_item that is not NULL.
The reason is the scan_get_next_rmap_item() will return NULL after a full
scan, so ksm_do_scan() just return without accounting of the
ksm_pages_scanned.
Fix it by just putting ksm_pages_scanned accounting in that loop, and it
will be accounted more timely if that loop would last for a long time.
Link: https://lkml.kernel.org/r/20240528-b4-ksm-counters-v3-0-34bb358fdc13@linux.…
Link: https://lkml.kernel.org/r/20240528-b4-ksm-counters-v3-1-34bb358fdc13@linux.…
Fixes: b348b5fe2b5f ("mm/ksm: add pages scanned metric")
Signed-off-by: Chengming Zhou <chengming.zhou(a)linux.dev>
Acked-by: David Hildenbrand <david(a)redhat.com>
Reviewed-by: xu xin <xu.xin16(a)zte.com.cn>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Ran Xiaokai <ran.xiaokai(a)zte.com.cn>
Cc: Stefan Roesch <shr(a)devkernel.io>
Cc: Yang Yang <yang.yang29(a)zte.com.cn>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/ksm.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
--- a/mm/ksm.c~mm-ksm-fix-ksm_pages_scanned-accounting
+++ a/mm/ksm.c
@@ -2754,18 +2754,16 @@ static void ksm_do_scan(unsigned int sca
{
struct ksm_rmap_item *rmap_item;
struct page *page;
- unsigned int npages = scan_npages;
- while (npages-- && likely(!freezing(current))) {
+ while (scan_npages-- && likely(!freezing(current))) {
cond_resched();
rmap_item = scan_get_next_rmap_item(&page);
if (!rmap_item)
return;
cmp_and_merge_page(page, rmap_item);
put_page(page);
+ ksm_pages_scanned++;
}
-
- ksm_pages_scanned += scan_npages - npages;
}
static int ksmd_should_run(void)
_
Patches currently in -mm which might be from chengming.zhou(a)linux.dev are
The quilt patch titled
Subject: kmsan: do not wipe out origin when doing partial unpoisoning
has been removed from the -mm tree. Its filename was
kmsan-do-not-wipe-out-origin-when-doing-partial-unpoisoning.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Alexander Potapenko <glider(a)google.com>
Subject: kmsan: do not wipe out origin when doing partial unpoisoning
Date: Tue, 28 May 2024 12:48:06 +0200
As noticed by Brian, KMSAN should not be zeroing the origin when
unpoisoning parts of a four-byte uninitialized value, e.g.:
char a[4];
kmsan_unpoison_memory(a, 1);
This led to false negatives, as certain poisoned values could receive zero
origins, preventing those values from being reported.
To fix the problem, check that kmsan_internal_set_shadow_origin() writes
zero origins only to slots which have zero shadow.
Link: https://lkml.kernel.org/r/20240528104807.738758-1-glider@google.com
Fixes: f80be4571b19 ("kmsan: add KMSAN runtime core")
Signed-off-by: Alexander Potapenko <glider(a)google.com>
Reported-by: Brian Johannesmeyer <bjohannesmeyer(a)gmail.com>
Link: https://lore.kernel.org/lkml/20240524232804.1984355-1-bjohannesmeyer@gmail.…
Reviewed-by: Marco Elver <elver(a)google.com>
Tested-by: Brian Johannesmeyer <bjohannesmeyer(a)gmail.com>
Cc: Dmitry Vyukov <dvyukov(a)google.com>
Cc: Kees Cook <keescook(a)chromium.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/kmsan/core.c | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)
--- a/mm/kmsan/core.c~kmsan-do-not-wipe-out-origin-when-doing-partial-unpoisoning
+++ a/mm/kmsan/core.c
@@ -196,8 +196,7 @@ void kmsan_internal_set_shadow_origin(vo
u32 origin, bool checked)
{
u64 address = (u64)addr;
- void *shadow_start;
- u32 *origin_start;
+ u32 *shadow_start, *origin_start;
size_t pad = 0;
KMSAN_WARN_ON(!kmsan_metadata_is_contiguous(addr, size));
@@ -225,8 +224,16 @@ void kmsan_internal_set_shadow_origin(vo
origin_start =
(u32 *)kmsan_get_metadata((void *)address, KMSAN_META_ORIGIN);
- for (int i = 0; i < size / KMSAN_ORIGIN_SIZE; i++)
- origin_start[i] = origin;
+ /*
+ * If the new origin is non-zero, assume that the shadow byte is also non-zero,
+ * and unconditionally overwrite the old origin slot.
+ * If the new origin is zero, overwrite the old origin slot iff the
+ * corresponding shadow slot is zero.
+ */
+ for (int i = 0; i < size / KMSAN_ORIGIN_SIZE; i++) {
+ if (origin || !shadow_start[i])
+ origin_start[i] = origin;
+ }
}
struct page *kmsan_vmalloc_to_page_or_null(void *vaddr)
_
Patches currently in -mm which might be from glider(a)google.com are
The quilt patch titled
Subject: nilfs2: fix potential kernel bug due to lack of writeback flag waiting
has been removed from the -mm tree. Its filename was
nilfs2-fix-potential-kernel-bug-due-to-lack-of-writeback-flag-waiting.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Subject: nilfs2: fix potential kernel bug due to lack of writeback flag waiting
Date: Thu, 30 May 2024 23:15:56 +0900
Destructive writes to a block device on which nilfs2 is mounted can cause
a kernel bug in the folio/page writeback start routine or writeback end
routine (__folio_start_writeback in the log below):
kernel BUG at mm/page-writeback.c:3070!
Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI
...
RIP: 0010:__folio_start_writeback+0xbaa/0x10e0
Code: 25 ff 0f 00 00 0f 84 18 01 00 00 e8 40 ca c6 ff e9 17 f6 ff ff
e8 36 ca c6 ff 4c 89 f7 48 c7 c6 80 c0 12 84 e8 e7 b3 0f 00 90 <0f>
0b e8 1f ca c6 ff 4c 89 f7 48 c7 c6 a0 c6 12 84 e8 d0 b3 0f 00
...
Call Trace:
<TASK>
nilfs_segctor_do_construct+0x4654/0x69d0 [nilfs2]
nilfs_segctor_construct+0x181/0x6b0 [nilfs2]
nilfs_segctor_thread+0x548/0x11c0 [nilfs2]
kthread+0x2f0/0x390
ret_from_fork+0x4b/0x80
ret_from_fork_asm+0x1a/0x30
</TASK>
This is because when the log writer starts a writeback for segment summary
blocks or a super root block that use the backing device's page cache, it
does not wait for the ongoing folio/page writeback, resulting in an
inconsistent writeback state.
Fix this issue by waiting for ongoing writebacks when putting
folios/pages on the backing device into writeback state.
Link: https://lkml.kernel.org/r/20240530141556.4411-1-konishi.ryusuke@gmail.com
Fixes: 9ff05123e3bf ("nilfs2: segment constructor")
Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Tested-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/nilfs2/segment.c | 3 +++
1 file changed, 3 insertions(+)
--- a/fs/nilfs2/segment.c~nilfs2-fix-potential-kernel-bug-due-to-lack-of-writeback-flag-waiting
+++ a/fs/nilfs2/segment.c
@@ -1652,6 +1652,7 @@ static void nilfs_segctor_prepare_write(
if (bh->b_folio != bd_folio) {
if (bd_folio) {
folio_lock(bd_folio);
+ folio_wait_writeback(bd_folio);
folio_clear_dirty_for_io(bd_folio);
folio_start_writeback(bd_folio);
folio_unlock(bd_folio);
@@ -1665,6 +1666,7 @@ static void nilfs_segctor_prepare_write(
if (bh == segbuf->sb_super_root) {
if (bh->b_folio != bd_folio) {
folio_lock(bd_folio);
+ folio_wait_writeback(bd_folio);
folio_clear_dirty_for_io(bd_folio);
folio_start_writeback(bd_folio);
folio_unlock(bd_folio);
@@ -1681,6 +1683,7 @@ static void nilfs_segctor_prepare_write(
}
if (bd_folio) {
folio_lock(bd_folio);
+ folio_wait_writeback(bd_folio);
folio_clear_dirty_for_io(bd_folio);
folio_start_writeback(bd_folio);
folio_unlock(bd_folio);
_
Patches currently in -mm which might be from konishi.ryusuke(a)gmail.com are
The patch titled
Subject: mm/page_table_check: fix crash on ZONE_DEVICE
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-page_table_check-fix-crash-on-zone_device.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Peter Xu <peterx(a)redhat.com>
Subject: mm/page_table_check: fix crash on ZONE_DEVICE
Date: Wed, 5 Jun 2024 17:21:46 -0400
Not all pages may apply to pgtable check. One example is ZONE_DEVICE
pages: they map PFNs directly, and they don't allocate page_ext at all
even if there's struct page around. One may reference
devm_memremap_pages().
When both ZONE_DEVICE and page-table-check enabled, then try to map some
dax memories, one can trigger kernel bug constantly now when the kernel
was trying to inject some pfn maps on the dax device:
kernel BUG at mm/page_table_check.c:55!
While it's pretty legal to use set_pxx_at() for ZONE_DEVICE pages for page
fault resolutions, skip all the checks if page_ext doesn't even exist in
pgtable checker, which applies to ZONE_DEVICE but maybe more.
Link: https://lkml.kernel.org/r/20240605212146.994486-1-peterx@redhat.com
Signed-off-by: Peter Xu <peterx(a)redhat.com>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: Pasha Tatashin <pasha.tatashin(a)soleen.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/page_table_check.c | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
--- a/mm/page_table_check.c~mm-page_table_check-fix-crash-on-zone_device
+++ a/mm/page_table_check.c
@@ -73,6 +73,9 @@ static void page_table_check_clear(unsig
page = pfn_to_page(pfn);
page_ext = page_ext_get(page);
+ if (!page_ext)
+ return;
+
BUG_ON(PageSlab(page));
anon = PageAnon(page);
@@ -110,6 +113,9 @@ static void page_table_check_set(unsigne
page = pfn_to_page(pfn);
page_ext = page_ext_get(page);
+ if (!page_ext)
+ return;
+
BUG_ON(PageSlab(page));
anon = PageAnon(page);
@@ -140,7 +146,10 @@ void __page_table_check_zero(struct page
BUG_ON(PageSlab(page));
page_ext = page_ext_get(page);
- BUG_ON(!page_ext);
+
+ if (!page_ext)
+ return;
+
for (i = 0; i < (1ul << order); i++) {
struct page_table_check *ptc = get_page_table_check(page_ext);
_
Patches currently in -mm which might be from peterx(a)redhat.com are
mm-page_table_check-fix-crash-on-zone_device.patch
mm-debug_vm_pgtable-drop-random_orvalue-trick.patch
mm-drop-leftover-comment-references-to-pxx_huge.patch
The following commit has been merged into the x86/urgent branch of tip:
Commit-ID: c625dabbf1c4a8e77e4734014f2fde7aa9071a1f
Gitweb: https://git.kernel.org/tip/c625dabbf1c4a8e77e4734014f2fde7aa9071a1f
Author: Yazen Ghannam <yazen.ghannam(a)amd.com>
AuthorDate: Mon, 03 Apr 2023 16:42:44
Committer: Borislav Petkov (AMD) <bp(a)alien8.de>
CommitterDate: Wed, 05 Jun 2024 21:23:34 +02:00
x86/amd_nb: Check for invalid SMN reads
AMD Zen-based systems use a System Management Network (SMN) that
provides access to implementation-specific registers.
SMN accesses are done indirectly through an index/data pair in PCI
config space. The PCI config access may fail and return an error code.
This would prevent the "read" value from being updated.
However, the PCI config access may succeed, but the return value may be
invalid. This is in similar fashion to PCI bad reads, i.e. return all
bits set.
Most systems will return 0 for SMN addresses that are not accessible.
This is in line with AMD convention that unavailable registers are
Read-as-Zero/Writes-Ignored.
However, some systems will return a "PCI Error Response" instead. This
value, along with an error code of 0 from the PCI config access, will
confuse callers of the amd_smn_read() function.
Check for this condition, clear the return value, and set a proper error
code.
Fixes: ddfe43cdc0da ("x86/amd_nb: Add SMN and Indirect Data Fabric access for AMD Fam17h")
Signed-off-by: Yazen Ghannam <yazen.ghannam(a)amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20230403164244.471141-1-yazen.ghannam@amd.com
---
arch/x86/kernel/amd_nb.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 3cf156f..027a8c7 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -215,7 +215,14 @@ out:
int amd_smn_read(u16 node, u32 address, u32 *value)
{
- return __amd_smn_rw(node, address, value, false);
+ int err = __amd_smn_rw(node, address, value, false);
+
+ if (PCI_POSSIBLE_ERROR(*value)) {
+ err = -ENODEV;
+ *value = 0;
+ }
+
+ return err;
}
EXPORT_SYMBOL_GPL(amd_smn_read);