June 2021 - Linux-stable-mirror

[merged] mm-thp-try_to_unmap-use-ttu_sync-for-safe-splitting.patch removed from -mm tree

by akpm＠linux-foundation.org

The patch titled Subject: mm/thp: try_to_unmap() use TTU_SYNC for safe splitting has been removed from the -mm tree. Its filename was mm-thp-try_to_unmap-use-ttu_sync-for-safe-splitting.patch This patch was dropped because it was merged into mainline or a subsystem tree ------------------------------------------------------ From: Hugh Dickins <hughd(a)google.com> Subject: mm/thp: try_to_unmap() use TTU_SYNC for safe splitting Stressing huge tmpfs often crashed on unmap_page()'s VM_BUG_ON_PAGE (!unmap_success): with dump_page() showing mapcount:1, but then its raw struct page output showing _mapcount ffffffff i.e. mapcount 0. And even if that particular VM_BUG_ON_PAGE(!unmap_success) is removed, it is immediately followed by a VM_BUG_ON_PAGE(compound_mapcount(head)), and further down an IS_ENABLED(CONFIG_DEBUG_VM) total_mapcount BUG(): all indicative of some mapcount difficulty in development here perhaps. But the !CONFIG_DEBUG_VM path handles the failures correctly and silently. I believe the problem is that once a racing unmap has cleared pte or pmd, try_to_unmap_one() may skip taking the page table lock, and emerge from try_to_unmap() before the racing task has reached decrementing mapcount. Instead of abandoning the unsafe VM_BUG_ON_PAGE(), and the ones that follow, use PVMW_SYNC in try_to_unmap_one() in this case: adding TTU_SYNC to the options, and passing that from unmap_page(). When CONFIG_DEBUG_VM, or for non-debug too? Consensus is to do the same for both: the slight overhead added should rarely matter, except perhaps if splitting sparsely-populated multiply-mapped shmem. Once confident that bugs are fixed, TTU_SYNC here can be removed, and the race tolerated. Link: https://lkml.kernel.org/r/c1e95853-8bcd-d8fd-55fa-e7f2488e78f@google.com Fixes: fec89c109f3a ("thp: rewrite freeze_page()/unfreeze_page() with generic rmap walkers") Signed-off-by: Hugh Dickins <hughd(a)google.com> Cc: Alistair Popple <apopple(a)nvidia.com> Cc: Jan Kara <jack(a)suse.cz> Cc: Jue Wang <juew(a)google.com> Cc: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com> Cc: "Matthew Wilcox (Oracle)" <willy(a)infradead.org> Cc: Miaohe Lin <linmiaohe(a)huawei.com> Cc: Minchan Kim <minchan(a)kernel.org> Cc: Naoya Horiguchi <naoya.horiguchi(a)nec.com> Cc: Oscar Salvador <osalvador(a)suse.de> Cc: Peter Xu <peterx(a)redhat.com> Cc: Ralph Campbell <rcampbell(a)nvidia.com> Cc: Shakeel Butt <shakeelb(a)google.com> Cc: Wang Yugui <wangyugui(a)e16-tech.com> Cc: Yang Shi <shy828301(a)gmail.com> Cc: Zi Yan <ziy(a)nvidia.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- include/linux/rmap.h | 1 + mm/huge_memory.c | 2 +- mm/page_vma_mapped.c | 11 +++++++++++ mm/rmap.c | 17 ++++++++++++++++- 4 files changed, 29 insertions(+), 2 deletions(-) --- a/include/linux/rmap.h~mm-thp-try_to_unmap-use-ttu_sync-for-safe-splitting +++ a/include/linux/rmap.h @@ -91,6 +91,7 @@ enum ttu_flags { TTU_SPLIT_HUGE_PMD = 0x4, /* split huge PMD if any */ TTU_IGNORE_MLOCK = 0x8, /* ignore mlock */ + TTU_SYNC = 0x10, /* avoid racy checks with PVMW_SYNC */ TTU_IGNORE_HWPOISON = 0x20, /* corrupted page is recoverable */ TTU_BATCH_FLUSH = 0x40, /* Batch TLB flushes where possible * and caller guarantees they will --- a/mm/huge_memory.c~mm-thp-try_to_unmap-use-ttu_sync-for-safe-splitting +++ a/mm/huge_memory.c @@ -2350,7 +2350,7 @@ void vma_adjust_trans_huge(struct vm_are static void unmap_page(struct page *page) { - enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | + enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_SYNC | TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD; bool unmap_success; --- a/mm/page_vma_mapped.c~mm-thp-try_to_unmap-use-ttu_sync-for-safe-splitting +++ a/mm/page_vma_mapped.c @@ -212,6 +212,17 @@ restart: pvmw->ptl = NULL; } } else if (!pmd_present(pmde)) { + /* + * If PVMW_SYNC, take and drop THP pmd lock so that we + * cannot return prematurely, while zap_huge_pmd() has + * cleared *pmd but not decremented compound_mapcount(). + */ + if ((pvmw->flags & PVMW_SYNC) && + PageTransCompound(pvmw->page)) { + spinlock_t *ptl = pmd_lock(mm, pvmw->pmd); + + spin_unlock(ptl); + } return false; } if (!map_pte(pvmw)) --- a/mm/rmap.c~mm-thp-try_to_unmap-use-ttu_sync-for-safe-splitting +++ a/mm/rmap.c @@ -1405,6 +1405,15 @@ static bool try_to_unmap_one(struct page struct mmu_notifier_range range; enum ttu_flags flags = (enum ttu_flags)(long)arg; + /* + * When racing against e.g. zap_pte_range() on another cpu, + * in between its ptep_get_and_clear_full() and page_remove_rmap(), + * try_to_unmap() may return false when it is about to become true, + * if page table locking is skipped: use TTU_SYNC to wait for that. + */ + if (flags & TTU_SYNC) + pvmw.flags = PVMW_SYNC; + /* munlock has nothing to gain from examining un-locked vmas */ if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED)) return true; @@ -1777,7 +1786,13 @@ bool try_to_unmap(struct page *page, enu else rmap_walk(page, &rwc); - return !page_mapcount(page) ? true : false; + /* + * When racing against e.g. zap_pte_range() on another cpu, + * in between its ptep_get_and_clear_full() and page_remove_rmap(), + * try_to_unmap() may return false when it is about to become true, + * if page table locking is skipped: use TTU_SYNC to wait for that. + */ + return !page_mapcount(page); } /** _ Patches currently in -mm which might be from hughd(a)google.com are mm-thp-remap_page-is-only-needed-on-anonymous-thp.patch mm-hwpoison_user_mappings-try_to_unmap-with-ttu_sync.patch

4 years, 6 months

1
0
0 0

[merged] mm-thp-make-is_huge_zero_pmd-safe-and-quicker.patch removed from -mm tree

by akpm＠linux-foundation.org

The patch titled Subject: mm/thp: make is_huge_zero_pmd() safe and quicker has been removed from the -mm tree. Its filename was mm-thp-make-is_huge_zero_pmd-safe-and-quicker.patch This patch was dropped because it was merged into mainline or a subsystem tree ------------------------------------------------------ From: Hugh Dickins <hughd(a)google.com> Subject: mm/thp: make is_huge_zero_pmd() safe and quicker Most callers of is_huge_zero_pmd() supply a pmd already verified present; but a few (notably zap_huge_pmd()) do not - it might be a pmd migration entry, in which the pfn is encoded differently from a present pmd: which might pass the is_huge_zero_pmd() test (though not on x86, since L1TF forced us to protect against that); or perhaps even crash in pmd_page() applied to a swap-like entry. Make it safe by adding pmd_present() check into is_huge_zero_pmd() itself; and make it quicker by saving huge_zero_pfn, so that is_huge_zero_pmd() will not need to do that pmd_page() lookup each time. __split_huge_pmd_locked() checked pmd_trans_huge() before: that worked, but is unnecessary now that is_huge_zero_pmd() checks present. Link: https://lkml.kernel.org/r/21ea9ca-a1f5-8b90-5e88-95fb1c49bbfa@google.com Fixes: e71769ae5260 ("mm: enable thp migration for shmem thp") Signed-off-by: Hugh Dickins <hughd(a)google.com> Acked-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com> Reviewed-by: Yang Shi <shy828301(a)gmail.com> Cc: Alistair Popple <apopple(a)nvidia.com> Cc: Jan Kara <jack(a)suse.cz> Cc: Jue Wang <juew(a)google.com> Cc: "Matthew Wilcox (Oracle)" <willy(a)infradead.org> Cc: Miaohe Lin <linmiaohe(a)huawei.com> Cc: Minchan Kim <minchan(a)kernel.org> Cc: Naoya Horiguchi <naoya.horiguchi(a)nec.com> Cc: Oscar Salvador <osalvador(a)suse.de> Cc: Peter Xu <peterx(a)redhat.com> Cc: Ralph Campbell <rcampbell(a)nvidia.com> Cc: Shakeel Butt <shakeelb(a)google.com> Cc: Wang Yugui <wangyugui(a)e16-tech.com> Cc: Zi Yan <ziy(a)nvidia.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- include/linux/huge_mm.h | 8 +++++++- mm/huge_memory.c | 5 ++++- 2 files changed, 11 insertions(+), 2 deletions(-) --- a/include/linux/huge_mm.h~mm-thp-make-is_huge_zero_pmd-safe-and-quicker +++ a/include/linux/huge_mm.h @@ -286,6 +286,7 @@ struct page *follow_devmap_pud(struct vm vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd); extern struct page *huge_zero_page; +extern unsigned long huge_zero_pfn; static inline bool is_huge_zero_page(struct page *page) { @@ -294,7 +295,7 @@ static inline bool is_huge_zero_page(str static inline bool is_huge_zero_pmd(pmd_t pmd) { - return is_huge_zero_page(pmd_page(pmd)); + return READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd) && pmd_present(pmd); } static inline bool is_huge_zero_pud(pud_t pud) @@ -439,6 +440,11 @@ static inline bool is_huge_zero_page(str { return false; } + +static inline bool is_huge_zero_pmd(pmd_t pmd) +{ + return false; +} static inline bool is_huge_zero_pud(pud_t pud) { --- a/mm/huge_memory.c~mm-thp-make-is_huge_zero_pmd-safe-and-quicker +++ a/mm/huge_memory.c @@ -62,6 +62,7 @@ static struct shrinker deferred_split_sh static atomic_t huge_zero_refcount; struct page *huge_zero_page __read_mostly; +unsigned long huge_zero_pfn __read_mostly = ~0UL; bool transparent_hugepage_enabled(struct vm_area_struct *vma) { @@ -98,6 +99,7 @@ retry: __free_pages(zero_page, compound_order(zero_page)); goto retry; } + WRITE_ONCE(huge_zero_pfn, page_to_pfn(zero_page)); /* We take additional reference here. It will be put back by shrinker */ atomic_set(&huge_zero_refcount, 2); @@ -147,6 +149,7 @@ static unsigned long shrink_huge_zero_pa if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) { struct page *zero_page = xchg(&huge_zero_page, NULL); BUG_ON(zero_page == NULL); + WRITE_ONCE(huge_zero_pfn, ~0UL); __free_pages(zero_page, compound_order(zero_page)); return HPAGE_PMD_NR; } @@ -2071,7 +2074,7 @@ static void __split_huge_pmd_locked(stru return; } - if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) { + if (is_huge_zero_pmd(*pmd)) { /* * FIXME: Do we want to invalidate secondary mmu by calling * mmu_notifier_invalidate_range() see comments below inside _ Patches currently in -mm which might be from hughd(a)google.com are mm-thp-remap_page-is-only-needed-on-anonymous-thp.patch mm-hwpoison_user_mappings-try_to_unmap-with-ttu_sync.patch

4 years, 6 months

1
0
0 0

[merged] mm-thp-fix-__split_huge_pmd_locked-on-shmem-migration-entry.patch removed from -mm tree

by akpm＠linux-foundation.org

The patch titled Subject: mm/thp: fix __split_huge_pmd_locked() on shmem migration entry has been removed from the -mm tree. Its filename was mm-thp-fix-__split_huge_pmd_locked-on-shmem-migration-entry.patch This patch was dropped because it was merged into mainline or a subsystem tree ------------------------------------------------------ From: Hugh Dickins <hughd(a)google.com> Subject: mm/thp: fix __split_huge_pmd_locked() on shmem migration entry Patch series "mm/thp: fix THP splitting unmap BUGs and related", v10. Here is v2 batch of long-standing THP bug fixes that I had not got around to sending before, but prompted now by Wang Yugui's report https://lore.kernel.org/linux-mm/20210412180659.B9E3.409509F4@e16-tech.com/ Wang Yugui has tested a rollup of these fixes applied to 5.10.39, and they have done no harm, but have *not* fixed that issue: something more is needed and I have no idea of what. This patch (of 7): Stressing huge tmpfs page migration racing hole punch often crashed on the VM_BUG_ON(!pmd_present) in pmdp_huge_clear_flush(), with DEBUG_VM=y kernel; or shortly afterwards, on a bad dereference in __split_huge_pmd_locked() when DEBUG_VM=n. They forgot to allow for pmd migration entries in the non-anonymous case. Full disclosure: those particular experiments were on a kernel with more relaxed mmap_lock and i_mmap_rwsem locking, and were not repeated on the vanilla kernel: it is conceivable that stricter locking happens to avoid those cases, or makes them less likely; but __split_huge_pmd_locked() already allowed for pmd migration entries when handling anonymous THPs, so this commit brings the shmem and file THP handling into line. And while there: use old_pmd rather than _pmd, as in the following blocks; and make it clearer to the eye that the !vma_is_anonymous() block is self-contained, making an early return after accounting for unmapping. Link: https://lkml.kernel.org/r/af88612-1473-2eaa-903-8d1a448b26@google.com Link: https://lkml.kernel.org/r/dd221a99-efb3-cd1d-6256-7e646af29314@google.com Fixes: e71769ae5260 ("mm: enable thp migration for shmem thp") Signed-off-by: Hugh Dickins <hughd(a)google.com> Cc: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com> Cc: Yang Shi <shy828301(a)gmail.com> Cc: Wang Yugui <wangyugui(a)e16-tech.com> Cc: "Matthew Wilcox (Oracle)" <willy(a)infradead.org> Cc: Naoya Horiguchi <naoya.horiguchi(a)nec.com> Cc: Alistair Popple <apopple(a)nvidia.com> Cc: Ralph Campbell <rcampbell(a)nvidia.com> Cc: Zi Yan <ziy(a)nvidia.com> Cc: Miaohe Lin <linmiaohe(a)huawei.com> Cc: Minchan Kim <minchan(a)kernel.org> Cc: Jue Wang <juew(a)google.com> Cc: Peter Xu <peterx(a)redhat.com> Cc: Jan Kara <jack(a)suse.cz> Cc: Shakeel Butt <shakeelb(a)google.com> Cc: Oscar Salvador <osalvador(a)suse.de> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/huge_memory.c | 27 ++++++++++++++++++--------- mm/pgtable-generic.c | 5 ++--- 2 files changed, 20 insertions(+), 12 deletions(-) --- a/mm/huge_memory.c~mm-thp-fix-__split_huge_pmd_locked-on-shmem-migration-entry +++ a/mm/huge_memory.c @@ -2044,7 +2044,7 @@ static void __split_huge_pmd_locked(stru count_vm_event(THP_SPLIT_PMD); if (!vma_is_anonymous(vma)) { - _pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd); + old_pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd); /* * We are going to unmap this huge page. So * just go ahead and zap it @@ -2053,16 +2053,25 @@ static void __split_huge_pmd_locked(stru zap_deposited_table(mm, pmd); if (vma_is_special_huge(vma)) return; - page = pmd_page(_pmd); - if (!PageDirty(page) && pmd_dirty(_pmd)) - set_page_dirty(page); - if (!PageReferenced(page) && pmd_young(_pmd)) - SetPageReferenced(page); - page_remove_rmap(page, true); - put_page(page); + if (unlikely(is_pmd_migration_entry(old_pmd))) { + swp_entry_t entry; + + entry = pmd_to_swp_entry(old_pmd); + page = migration_entry_to_page(entry); + } else { + page = pmd_page(old_pmd); + if (!PageDirty(page) && pmd_dirty(old_pmd)) + set_page_dirty(page); + if (!PageReferenced(page) && pmd_young(old_pmd)) + SetPageReferenced(page); + page_remove_rmap(page, true); + put_page(page); + } add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR); return; - } else if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) { + } + + if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) { /* * FIXME: Do we want to invalidate secondary mmu by calling * mmu_notifier_invalidate_range() see comments below inside --- a/mm/pgtable-generic.c~mm-thp-fix-__split_huge_pmd_locked-on-shmem-migration-entry +++ a/mm/pgtable-generic.c @@ -135,9 +135,8 @@ pmd_t pmdp_huge_clear_flush(struct vm_ar { pmd_t pmd; VM_BUG_ON(address & ~HPAGE_PMD_MASK); - VM_BUG_ON(!pmd_present(*pmdp)); - /* Below assumes pmd_present() is true */ - VM_BUG_ON(!pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp)); + VM_BUG_ON(pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) && + !pmd_devmap(*pmdp)); pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); return pmd; _ Patches currently in -mm which might be from hughd(a)google.com are mm-thp-remap_page-is-only-needed-on-anonymous-thp.patch mm-hwpoison_user_mappings-try_to_unmap-with-ttu_sync.patch

4 years, 6 months

1
0
0 0

[merged] mm-thp-use-head-page-in-__migration_entry_wait.patch removed from -mm tree

by akpm＠linux-foundation.org

The patch titled Subject: mm, thp: use head page in __migration_entry_wait() has been removed from the -mm tree. Its filename was mm-thp-use-head-page-in-__migration_entry_wait.patch This patch was dropped because it was merged into mainline or a subsystem tree ------------------------------------------------------ From: Xu Yu <xuyu(a)linux.alibaba.com> Subject: mm, thp: use head page in __migration_entry_wait() We notice that hung task happens in a corner but practical scenario when CONFIG_PREEMPT_NONE is enabled, as follows. Process 0 Process 1 Process 2..Inf split_huge_page_to_list unmap_page split_huge_pmd_address __migration_entry_wait(head) __migration_entry_wait(tail) remap_page (roll back) remove_migration_ptes rmap_walk_anon cond_resched Where __migration_entry_wait(tail) is occurred in kernel space, e.g., copy_to_user in fstat, which will immediately fault again without rescheduling, and thus occupy the cpu fully. When there are too many processes performing __migration_entry_wait on tail page, remap_page will never be done after cond_resched. This makes __migration_entry_wait operate on the compound head page, thus waits for remap_page to complete, whether the THP is split successfully or roll back. Note that put_and_wait_on_page_locked helps to drop the page reference acquired with get_page_unless_zero, as soon as the page is on the wait queue, before actually waiting. So splitting the THP is only prevented for a brief interval. Link: https://lkml.kernel.org/r/b9836c1dd522e903891760af9f0c86a2cce987eb.16231440… Fixes: ba98828088ad ("thp: add option to setup migration entries during PMD split") Suggested-by: Hugh Dickins <hughd(a)google.com> Signed-off-by: Gang Deng <gavin.dg(a)linux.alibaba.com> Signed-off-by: Xu Yu <xuyu(a)linux.alibaba.com> Acked-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com> Acked-by: Hugh Dickins <hughd(a)google.com> Cc: Matthew Wilcox <willy(a)infradead.org> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/migrate.c | 1 + 1 file changed, 1 insertion(+) --- a/mm/migrate.c~mm-thp-use-head-page-in-__migration_entry_wait +++ a/mm/migrate.c @@ -295,6 +295,7 @@ void __migration_entry_wait(struct mm_st goto out; page = migration_entry_to_page(entry); + page = compound_head(page); /* * Once page cache replacement of page migration started, page_count _ Patches currently in -mm which might be from xuyu(a)linux.alibaba.com are

4 years, 6 months

1
0
0 0

[merged] mm-slubc-include-swabh.patch removed from -mm tree

by akpm＠linux-foundation.org

The patch titled Subject: mm/slub.c: include swab.h has been removed from the -mm tree. Its filename was mm-slubc-include-swabh.patch This patch was dropped because it was merged into mainline or a subsystem tree ------------------------------------------------------ From: Andrew Morton <akpm(a)linux-foundation.org> Subject: mm/slub.c: include swab.h Fixes build with CONFIG_SLAB_FREELIST_HARDENED=y. Hopefully. But it's the right thing to do anwyay. Fixes: 1ad53d9fa3f61 ("slub: improve bit diffusion for freelist ptr obfuscation") Link: https://bugzilla.kernel.org/show_bug.cgi?id=213417 Reported-by: <vannguye(a)cisco.com> Acked-by: Kees Cook <keescook(a)chromium.org> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/slub.c | 1 + 1 file changed, 1 insertion(+) --- a/mm/slub.c~mm-slubc-include-swabh +++ a/mm/slub.c @@ -15,6 +15,7 @@ #include <linux/module.h> #include <linux/bit_spinlock.h> #include <linux/interrupt.h> +#include <linux/swab.h> #include <linux/bitops.h> #include <linux/slab.h> #include "slab.h" _ Patches currently in -mm which might be from akpm(a)linux-foundation.org are mm-page_alloc-fix-memory-map-initialization-for-descending-nodes-checkpatch-fixes.patch mm.patch mm-slub-kunit-add-a-kunit-test-for-slub-debugging-functionality-fix-2.patch slub-force-on-no_hash_pointers-when-slub_debug-is-enabled-fix.patch fs-remove-noop_set_page_dirty-fix.patch mm-gup-pack-has_pinned-in-mmf_has_pinned-checkpatch-fixes.patch mm-memcg-optimize-user-context-object-stock-access-checkpatch-fixes.patch mm-memcg-slab-create-a-new-set-of-kmalloc-cg-n-caches-v5-fix.patch binfmt-remove-in-tree-usage-of-map_executable-fix.patch mm-mmap-introduce-unlock_range-for-code-cleanup-fix.patch lazy-tlb-allow-lazy-tlb-mm-refcounting-to-be-configurable-fix.patch mm-page_alloc-convert-per-cpu-list-protection-to-local_lock-fix-checkpatch-fixes.patch mm-memory_hotplug-disable-memmap_on_memory-when-hugetlb_free_vmemmap-enabled-fix.patch nommu-remove-__gfp_highmem-in-vmalloc-vzalloc-checkpatch-fixes.patch mm-madvise-introduce-madv_populate_readwrite-to-prefault-page-tables-checkpatch-fixes.patch mmmemory_hotplug-drop-unneeded-locking-fix.patch fs-proc-kcorec-add-mmap-interface-fix.patch kernelh-split-out-panic-and-oops-helpers-fix.patch lib-math-rational-add-kunit-test-cases-fix.patch lib-decompressors-remove-set-but-not-used-variabled-level-fix.patch ipc-utilc-use-binary-search-for-max_idx-fix.patch linux-next-pre.patch linux-next-post.patch linux-next-rejects.patch linux-next-git-rejects.patch kernel-cgroup-cgroupc-dont-export-cgroup_get_e_css-twice.patch mm-introduce-memfd_secret-system-call-to-create-secret-memory-areas-fix.patch module-add-printk-formats-to-add-module-build-id-to-stacktraces-fix.patch module-add-printk-formats-to-add-module-build-id-to-stacktraces-fix-fix.patch kernel-forkc-export-kernel_thread-to-modules.patch

4 years, 6 months

1
0
0 0

[merged] crash_core-vmcoreinfo-append-section_size_bits-to-vmcoreinfo.patch removed from -mm tree

by akpm＠linux-foundation.org

The patch titled Subject: crash_core, vmcoreinfo: append 'SECTION_SIZE_BITS' to vmcoreinfo has been removed from the -mm tree. Its filename was crash_core-vmcoreinfo-append-section_size_bits-to-vmcoreinfo.patch This patch was dropped because it was merged into mainline or a subsystem tree ------------------------------------------------------ From: Pingfan Liu <kernelfans(a)gmail.com> Subject: crash_core, vmcoreinfo: append 'SECTION_SIZE_BITS' to vmcoreinfo As mentioned in kernel commit 1d50e5d0c505 ("crash_core, vmcoreinfo: Append 'MAX_PHYSMEM_BITS' to vmcoreinfo"), SECTION_SIZE_BITS in the formula: #define SECTIONS_SHIFT (MAX_PHYSMEM_BITS - SECTION_SIZE_BITS) Besides SECTIONS_SHIFT, SECTION_SIZE_BITS is also used to calculate PAGES_PER_SECTION in makedumpfile just like kernel. Unfortunately, this arch-dependent macro SECTION_SIZE_BITS changes, e.g. recently in kernel commit f0b13ee23241 ("arm64/sparsemem: reduce SECTION_SIZE_BITS"). But user space wants a stable interface to get this info. Such info is impossible to be deduced from a crashdump vmcore. Hence append SECTION_SIZE_BITS to vmcoreinfo. Link: https://lkml.kernel.org/r/20210608103359.84907-1-kernelfans@gmail.com Link: http://lists.infradead.org/pipermail/kexec/2021-June/022676.html Signed-off-by: Pingfan Liu <kernelfans(a)gmail.com> Acked-by: Baoquan He <bhe(a)redhat.com> Cc: Bhupesh Sharma <bhupesh.sharma(a)linaro.org> Cc: Kazuhito Hagio <k-hagio(a)ab.jp.nec.com> Cc: Dave Young <dyoung(a)redhat.com> Cc: Boris Petkov <bp(a)alien8.de> Cc: Ingo Molnar <mingo(a)kernel.org> Cc: Thomas Gleixner <tglx(a)linutronix.de> Cc: James Morse <james.morse(a)arm.com> Cc: Mark Rutland <mark.rutland(a)arm.com> Cc: Will Deacon <will(a)kernel.org> Cc: Catalin Marinas <catalin.marinas(a)arm.com> Cc: Michael Ellerman <mpe(a)ellerman.id.au> Cc: Paul Mackerras <paulus(a)samba.org> Cc: Benjamin Herrenschmidt <benh(a)kernel.crashing.org> Cc: Dave Anderson <anderson(a)redhat.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- kernel/crash_core.c | 1 + 1 file changed, 1 insertion(+) --- a/kernel/crash_core.c~crash_core-vmcoreinfo-append-section_size_bits-to-vmcoreinfo +++ a/kernel/crash_core.c @@ -464,6 +464,7 @@ static int __init crash_save_vmcoreinfo_ VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); VMCOREINFO_STRUCT_SIZE(mem_section); VMCOREINFO_OFFSET(mem_section, section_mem_map); + VMCOREINFO_NUMBER(SECTION_SIZE_BITS); VMCOREINFO_NUMBER(MAX_PHYSMEM_BITS); #endif VMCOREINFO_STRUCT_SIZE(page); _ Patches currently in -mm which might be from kernelfans(a)gmail.com are

4 years, 6 months

1
0
0 0

[merged] mm-hugetlb-expand-restore_reserve_on_error-functionality.patch removed from -mm tree

by akpm＠linux-foundation.org

The patch titled Subject: mm/hugetlb: expand restore_reserve_on_error functionality has been removed from the -mm tree. Its filename was mm-hugetlb-expand-restore_reserve_on_error-functionality.patch This patch was dropped because it was merged into mainline or a subsystem tree ------------------------------------------------------ From: Mike Kravetz <mike.kravetz(a)oracle.com> Subject: mm/hugetlb: expand restore_reserve_on_error functionality The routine restore_reserve_on_error is called to restore reservation information when an error occurs after page allocation. The routine alloc_huge_page modifies the mapping reserve map and potentially the reserve count during allocation. If code calling alloc_huge_page encounters an error after allocation and needs to free the page, the reservation information needs to be adjusted. Currently, restore_reserve_on_error only takes action on pages for which the reserve count was adjusted(HPageRestoreReserve flag). There is nothing wrong with these adjustments. However, alloc_huge_page ALWAYS modifies the reserve map during allocation even if the reserve count is not adjusted. This can cause issues as observed during development of this patch [1]. One specific series of operations causing an issue is: - Create a shared hugetlb mapping Reservations for all pages created by default - Fault in a page in the mapping Reservation exists so reservation count is decremented - Punch a hole in the file/mapping at index previously faulted Reservation and any associated pages will be removed - Allocate a page to fill the hole No reservation entry, so reserve count unmodified Reservation entry added to map by alloc_huge_page - Error after allocation and before instantiating the page Reservation entry remains in map - Allocate a page to fill the hole Reservation entry exists, so decrement reservation count This will cause a reservation count underflow as the reservation count was decremented twice for the same index. A user would observe a very large number for HugePages_Rsvd in /proc/meminfo. This would also likely cause subsequent allocations of hugetlb pages to fail as it would 'appear' that all pages are reserved. This sequence of operations is unlikely to happen, however they were easily reproduced and observed using hacked up code as described in [1]. Address the issue by having the routine restore_reserve_on_error take action on pages where HPageRestoreReserve is not set. In this case, we need to remove any reserve map entry created by alloc_huge_page. A new helper routine vma_del_reservation assists with this operation. There are three callers of alloc_huge_page which do not currently call restore_reserve_on error before freeing a page on error paths. Add those missing calls. [1] https://lore.kernel.org/linux-mm/20210528005029.88088-1-almasrymina@google.… Link: https://lkml.kernel.org/r/20210607204510.22617-1-mike.kravetz@oracle.com Fixes: 96b96a96ddee ("mm/hugetlb: fix huge page reservation leak in private mapping error paths" Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com> Reviewed-by: Mina Almasry <almasrymina(a)google.com> Cc: Axel Rasmussen <axelrasmussen(a)google.com> Cc: Peter Xu <peterx(a)redhat.com> Cc: Muchun Song <songmuchun(a)bytedance.com> Cc: Michal Hocko <mhocko(a)suse.com> Cc: Naoya Horiguchi <naoya.horiguchi(a)nec.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- fs/hugetlbfs/inode.c | 1 include/linux/hugetlb.h | 2 mm/hugetlb.c | 122 ++++++++++++++++++++++++++++++-------- 3 files changed, 101 insertions(+), 24 deletions(-) --- a/fs/hugetlbfs/inode.c~mm-hugetlb-expand-restore_reserve_on_error-functionality +++ a/fs/hugetlbfs/inode.c @@ -735,6 +735,7 @@ static long hugetlbfs_fallocate(struct f __SetPageUptodate(page); error = huge_add_to_page_cache(page, mapping, index); if (unlikely(error)) { + restore_reserve_on_error(h, &pseudo_vma, addr, page); put_page(page); mutex_unlock(&hugetlb_fault_mutex_table[hash]); goto out; --- a/include/linux/hugetlb.h~mm-hugetlb-expand-restore_reserve_on_error-functionality +++ a/include/linux/hugetlb.h @@ -610,6 +610,8 @@ struct page *alloc_huge_page_vma(struct unsigned long address); int huge_add_to_page_cache(struct page *page, struct address_space *mapping, pgoff_t idx); +void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, + unsigned long address, struct page *page); /* arch callback */ int __init __alloc_bootmem_huge_page(struct hstate *h); --- a/mm/hugetlb.c~mm-hugetlb-expand-restore_reserve_on_error-functionality +++ a/mm/hugetlb.c @@ -2121,12 +2121,18 @@ out: * be restored when a newly allocated huge page must be freed. It is * to be called after calling vma_needs_reservation to determine if a * reservation exists. + * + * vma_del_reservation is used in error paths where an entry in the reserve + * map was created during huge page allocation and must be removed. It is to + * be called after calling vma_needs_reservation to determine if a reservation + * exists. */ enum vma_resv_mode { VMA_NEEDS_RESV, VMA_COMMIT_RESV, VMA_END_RESV, VMA_ADD_RESV, + VMA_DEL_RESV, }; static long __vma_reservation_common(struct hstate *h, struct vm_area_struct *vma, unsigned long addr, @@ -2170,11 +2176,21 @@ static long __vma_reservation_common(str ret = region_del(resv, idx, idx + 1); } break; + case VMA_DEL_RESV: + if (vma->vm_flags & VM_MAYSHARE) { + region_abort(resv, idx, idx + 1, 1); + ret = region_del(resv, idx, idx + 1); + } else { + ret = region_add(resv, idx, idx + 1, 1, NULL, NULL); + /* region_add calls of range 1 should never fail. */ + VM_BUG_ON(ret < 0); + } + break; default: BUG(); } - if (vma->vm_flags & VM_MAYSHARE) + if (vma->vm_flags & VM_MAYSHARE || mode == VMA_DEL_RESV) return ret; /* * We know private mapping must have HPAGE_RESV_OWNER set. @@ -2222,25 +2238,39 @@ static long vma_add_reservation(struct h return __vma_reservation_common(h, vma, addr, VMA_ADD_RESV); } +static long vma_del_reservation(struct hstate *h, + struct vm_area_struct *vma, unsigned long addr) +{ + return __vma_reservation_common(h, vma, addr, VMA_DEL_RESV); +} + /* - * This routine is called to restore a reservation on error paths. In the - * specific error paths, a huge page was allocated (via alloc_huge_page) - * and is about to be freed. If a reservation for the page existed, - * alloc_huge_page would have consumed the reservation and set - * HPageRestoreReserve in the newly allocated page. When the page is freed - * via free_huge_page, the global reservation count will be incremented if - * HPageRestoreReserve is set. However, free_huge_page can not adjust the - * reserve map. Adjust the reserve map here to be consistent with global - * reserve count adjustments to be made by free_huge_page. - */ -static void restore_reserve_on_error(struct hstate *h, - struct vm_area_struct *vma, unsigned long address, - struct page *page) + * This routine is called to restore reservation information on error paths. + * It should ONLY be called for pages allocated via alloc_huge_page(), and + * the hugetlb mutex should remain held when calling this routine. + * + * It handles two specific cases: + * 1) A reservation was in place and the page consumed the reservation. + * HPageRestoreReserve is set in the page. + * 2) No reservation was in place for the page, so HPageRestoreReserve is + * not set. However, alloc_huge_page always updates the reserve map. + * + * In case 1, free_huge_page later in the error path will increment the + * global reserve count. But, free_huge_page does not have enough context + * to adjust the reservation map. This case deals primarily with private + * mappings. Adjust the reserve map here to be consistent with global + * reserve count adjustments to be made by free_huge_page. Make sure the + * reserve map indicates there is a reservation present. + * + * In case 2, simply undo reserve map modifications done by alloc_huge_page. + */ +void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, + unsigned long address, struct page *page) { - if (unlikely(HPageRestoreReserve(page))) { - long rc = vma_needs_reservation(h, vma, address); + long rc = vma_needs_reservation(h, vma, address); - if (unlikely(rc < 0)) { + if (HPageRestoreReserve(page)) { + if (unlikely(rc < 0)) /* * Rare out of memory condition in reserve map * manipulation. Clear HPageRestoreReserve so that @@ -2253,16 +2283,57 @@ static void restore_reserve_on_error(str * accounting of reserve counts. */ ClearHPageRestoreReserve(page); - } else if (rc) { - rc = vma_add_reservation(h, vma, address); - if (unlikely(rc < 0)) + else if (rc) + (void)vma_add_reservation(h, vma, address); + else + vma_end_reservation(h, vma, address); + } else { + if (!rc) { + /* + * This indicates there is an entry in the reserve map + * added by alloc_huge_page. We know it was added + * before the alloc_huge_page call, otherwise + * HPageRestoreReserve would be set on the page. + * Remove the entry so that a subsequent allocation + * does not consume a reservation. + */ + rc = vma_del_reservation(h, vma, address); + if (rc < 0) /* - * See above comment about rare out of - * memory condition. + * VERY rare out of memory condition. Since + * we can not delete the entry, set + * HPageRestoreReserve so that the reserve + * count will be incremented when the page + * is freed. This reserve will be consumed + * on a subsequent allocation. */ - ClearHPageRestoreReserve(page); + SetHPageRestoreReserve(page); + } else if (rc < 0) { + /* + * Rare out of memory condition from + * vma_needs_reservation call. Memory allocation is + * only attempted if a new entry is needed. Therefore, + * this implies there is not an entry in the + * reserve map. + * + * For shared mappings, no entry in the map indicates + * no reservation. We are done. + */ + if (!(vma->vm_flags & VM_MAYSHARE)) + /* + * For private mappings, no entry indicates + * a reservation is present. Since we can + * not add an entry, set SetHPageRestoreReserve + * on the page so reserve count will be + * incremented when freed. This reserve will + * be consumed on a subsequent allocation. + */ + SetHPageRestoreReserve(page); } else - vma_end_reservation(h, vma, address); + /* + * No reservation present, do nothing + */ + vma_end_reservation(h, vma, address); } } @@ -4037,6 +4108,8 @@ again: spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); entry = huge_ptep_get(src_pte); if (!pte_same(src_pte_old, entry)) { + restore_reserve_on_error(h, vma, addr, + new); put_page(new); /* dst_entry won't change as in child */ goto again; @@ -5006,6 +5079,7 @@ out_release_unlock: if (vm_shared || is_continue) unlock_page(page); out_release_nounlock: + restore_reserve_on_error(h, dst_vma, dst_addr, page); put_page(page); goto out; } _ Patches currently in -mm which might be from mike.kravetz(a)oracle.com are mm-hugetlb-alloc-the-vmemmap-pages-associated-with-each-hugetlb-page-fix.patch hugetlb-remove-prep_compound_huge_page-cleanup.patch hugetlb-address-ref-count-racing-in-prep_compound_gigantic_page.patch

4 years, 6 months

1
0
0 0

[merged] mm-slub-actually-fix-freelist-pointer-vs-redzoning.patch removed from -mm tree

by akpm＠linux-foundation.org

The patch titled Subject: mm/slub: actually fix freelist pointer vs redzoning has been removed from the -mm tree. Its filename was mm-slub-actually-fix-freelist-pointer-vs-redzoning.patch This patch was dropped because it was merged into mainline or a subsystem tree ------------------------------------------------------ From: Kees Cook <keescook(a)chromium.org> Subject: mm/slub: actually fix freelist pointer vs redzoning It turns out that SLUB redzoning ("slub_debug=Z") checks from s->object_size rather than from s->inuse (which is normally bumped to make room for the freelist pointer), so a cache created with an object size less than 24 would have the freelist pointer written beyond s->object_size, causing the redzone to be corrupted by the freelist pointer. This was very visible with "slub_debug=ZF": BUG test (Tainted: G B ): Right Redzone overwritten ----------------------------------------------------------------------------- INFO: 0xffff957ead1c05de-0xffff957ead1c05df @offset=1502. First byte 0x1a instead of 0xbb INFO: Slab 0xffffef3950b47000 objects=170 used=170 fp=0x0000000000000000 flags=0x8000000000000200 INFO: Object 0xffff957ead1c05d8 @offset=1496 fp=0xffff957ead1c0620 Redzone (____ptrval____): bb bb bb bb bb bb bb bb ........ Object (____ptrval____): 00 00 00 00 00 f6 f4 a5 ........ Redzone (____ptrval____): 40 1d e8 1a aa @.... Padding (____ptrval____): 00 00 00 00 00 00 00 00 ........ Adjust the offset to stay within s->object_size. (Note that no caches of in this size range are known to exist in the kernel currently.) Link: https://lkml.kernel.org/r/20210608183955.280836-4-keescook@chromium.org Link: https://lore.kernel.org/linux-mm/20200807160627.GA1420741@elver.google.com/ Link: https://lore.kernel.org/lkml/0f7dd7b2-7496-5e2d-9488-2ec9f8e90441@suse.cz/F…: 89b83f282d8b (slub: avoid redzone when choosing freepointer location) Link: https://lore.kernel.org/lkml/CANpmjNOwZ5VpKQn+SYWovTkFB4VsT-RPwyENBmaK0dLcp… Signed-off-by: Kees Cook <keescook(a)chromium.org> Reported-by: Marco Elver <elver(a)google.com> Reported-by: "Lin, Zhenpeng" <zplin(a)psu.edu> Tested-by: Marco Elver <elver(a)google.com> Acked-by: Vlastimil Babka <vbabka(a)suse.cz> Cc: Christoph Lameter <cl(a)linux.com> Cc: David Rientjes <rientjes(a)google.com> Cc: Joonsoo Kim <iamjoonsoo.kim(a)lge.com> Cc: Pekka Enberg <penberg(a)kernel.org> Cc: Roman Gushchin <guro(a)fb.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/slub.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) --- a/mm/slub.c~mm-slub-actually-fix-freelist-pointer-vs-redzoning +++ a/mm/slub.c @@ -3689,7 +3689,6 @@ static int calculate_sizes(struct kmem_c { slab_flags_t flags = s->flags; unsigned int size = s->object_size; - unsigned int freepointer_area; unsigned int order; /* @@ -3698,13 +3697,6 @@ static int calculate_sizes(struct kmem_c * the possible location of the free pointer. */ size = ALIGN(size, sizeof(void *)); - /* - * This is the area of the object where a freepointer can be - * safely written. If redzoning adds more to the inuse size, we - * can't use that portion for writing the freepointer, so - * s->offset must be limited within this for the general case. - */ - freepointer_area = size; #ifdef CONFIG_SLUB_DEBUG /* @@ -3730,7 +3722,7 @@ static int calculate_sizes(struct kmem_c /* * With that we have determined the number of bytes in actual use - * by the object. This is the potential offset to the free pointer. + * by the object and redzoning. */ s->inuse = size; @@ -3753,13 +3745,13 @@ static int calculate_sizes(struct kmem_c */ s->offset = size; size += sizeof(void *); - } else if (freepointer_area > sizeof(void *)) { + } else { /* * Store freelist pointer near middle of object to keep * it away from the edges of the object to avoid small * sized over/underflows from neighboring allocations. */ - s->offset = ALIGN(freepointer_area / 2, sizeof(void *)); + s->offset = ALIGN_DOWN(s->object_size / 2, sizeof(void *)); } #ifdef CONFIG_SLUB_DEBUG _ Patches currently in -mm which might be from keescook(a)chromium.org are

4 years, 6 months

1
0
0 0

[merged] mm-slub-fix-redzoning-for-small-allocations.patch removed from -mm tree

by akpm＠linux-foundation.org

The patch titled Subject: mm/slub: fix redzoning for small allocations has been removed from the -mm tree. Its filename was mm-slub-fix-redzoning-for-small-allocations.patch This patch was dropped because it was merged into mainline or a subsystem tree ------------------------------------------------------ From: Kees Cook <keescook(a)chromium.org> Subject: mm/slub: fix redzoning for small allocations The redzone area for SLUB exists between s->object_size and s->inuse (which is at least the word-aligned object_size). If a cache were created with an object_size smaller than sizeof(void *), the in-object stored freelist pointer would overwrite the redzone (e.g. with boot param "slub_debug=ZF"): BUG test (Tainted: G B ): Right Redzone overwritten ----------------------------------------------------------------------------- INFO: 0xffff957ead1c05de-0xffff957ead1c05df @offset=1502. First byte 0x1a instead of 0xbb INFO: Slab 0xffffef3950b47000 objects=170 used=170 fp=0x0000000000000000 flags=0x8000000000000200 INFO: Object 0xffff957ead1c05d8 @offset=1496 fp=0xffff957ead1c0620 Redzone (____ptrval____): bb bb bb bb bb bb bb bb ........ Object (____ptrval____): f6 f4 a5 40 1d e8 ...@.. Redzone (____ptrval____): 1a aa .. Padding (____ptrval____): 00 00 00 00 00 00 00 00 ........ Store the freelist pointer out of line when object_size is smaller than sizeof(void *) and redzoning is enabled. Additionally remove the "smaller than sizeof(void *)" check under CONFIG_DEBUG_VM in kmem_cache_sanity_check() as it is now redundant: SLAB and SLOB both handle small sizes. (Note that no caches within this size range are known to exist in the kernel currently.) Link: https://lkml.kernel.org/r/20210608183955.280836-3-keescook@chromium.org Fixes: 81819f0fc828 ("SLUB core") Signed-off-by: Kees Cook <keescook(a)chromium.org> Acked-by: Vlastimil Babka <vbabka(a)suse.cz> Cc: Christoph Lameter <cl(a)linux.com> Cc: David Rientjes <rientjes(a)google.com> Cc: Joonsoo Kim <iamjoonsoo.kim(a)lge.com> Cc: "Lin, Zhenpeng" <zplin(a)psu.edu> Cc: Marco Elver <elver(a)google.com> Cc: Pekka Enberg <penberg(a)kernel.org> Cc: Roman Gushchin <guro(a)fb.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/slab_common.c | 3 +-- mm/slub.c | 8 +++++--- 2 files changed, 6 insertions(+), 5 deletions(-) --- a/mm/slab_common.c~mm-slub-fix-redzoning-for-small-allocations +++ a/mm/slab_common.c @@ -97,8 +97,7 @@ EXPORT_SYMBOL(kmem_cache_size); #ifdef CONFIG_DEBUG_VM static int kmem_cache_sanity_check(const char *name, unsigned int size) { - if (!name || in_interrupt() || size < sizeof(void *) || - size > KMALLOC_MAX_SIZE) { + if (!name || in_interrupt() || size > KMALLOC_MAX_SIZE) { pr_err("kmem_cache_create(%s) integrity check failed\n", name); return -EINVAL; } --- a/mm/slub.c~mm-slub-fix-redzoning-for-small-allocations +++ a/mm/slub.c @@ -3734,15 +3734,17 @@ static int calculate_sizes(struct kmem_c */ s->inuse = size; - if (((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) || - s->ctor)) { + if ((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) || + ((flags & SLAB_RED_ZONE) && s->object_size < sizeof(void *)) || + s->ctor) { /* * Relocate free pointer after the object if it is not * permitted to overwrite the first word of the object on * kmem_cache_free. * * This is the case if we do RCU, have a constructor or - * destructor or are poisoning the objects. + * destructor, are poisoning the objects, or are + * redzoning an object smaller than sizeof(void *). * * The assumption that s->offset >= s->inuse means free * pointer is outside of the object is used in the _ Patches currently in -mm which might be from keescook(a)chromium.org are

4 years, 6 months

1
0
0 0

[merged] mm-slub-clarify-verification-reporting.patch removed from -mm tree

by akpm＠linux-foundation.org

The patch titled Subject: mm/slub: clarify verification reporting has been removed from the -mm tree. Its filename was mm-slub-clarify-verification-reporting.patch This patch was dropped because it was merged into mainline or a subsystem tree ------------------------------------------------------ From: Kees Cook <keescook(a)chromium.org> Subject: mm/slub: clarify verification reporting Patch series "Actually fix freelist pointer vs redzoning", v4. This fixes redzoning vs the freelist pointer (both for middle-position and very small caches). Both are "theoretical" fixes, in that I see no evidence of such small-sized caches actually be used in the kernel, but that's no reason to let the bugs continue to exist, especially since people doing local development keep tripping over it. :) This patch (of 3): Instead of repeating "Redzone" and "Poison", clarify which sides of those zones got tripped. Additionally fix column alignment in the trailer. Before: BUG test (Tainted: G B ): Redzone overwritten ... Redzone (____ptrval____): bb bb bb bb bb bb bb bb ........ Object (____ptrval____): f6 f4 a5 40 1d e8 ...@.. Redzone (____ptrval____): 1a aa .. Padding (____ptrval____): 00 00 00 00 00 00 00 00 ........ After: BUG test (Tainted: G B ): Right Redzone overwritten ... Redzone (____ptrval____): bb bb bb bb bb bb bb bb ........ Object (____ptrval____): f6 f4 a5 40 1d e8 ...@.. Redzone (____ptrval____): 1a aa .. Padding (____ptrval____): 00 00 00 00 00 00 00 00 ........ The earlier commits that slowly resulted in the "Before" reporting were: d86bd1bece6f ("mm/slub: support left redzone") ffc79d288000 ("slub: use print_hex_dump") 2492268472e7 ("SLUB: change error reporting format to follow lockdep loosely") Link: https://lkml.kernel.org/r/20210608183955.280836-1-keescook@chromium.org Link: https://lkml.kernel.org/r/20210608183955.280836-2-keescook@chromium.org Link: https://lore.kernel.org/lkml/cfdb11d7-fb8e-e578-c939-f7f5fb69a6bd@suse.cz/ Signed-off-by: Kees Cook <keescook(a)chromium.org> Acked-by: Vlastimil Babka <vbabka(a)suse.cz> Cc: Marco Elver <elver(a)google.com> Cc: "Lin, Zhenpeng" <zplin(a)psu.edu> Cc: Christoph Lameter <cl(a)linux.com> Cc: Pekka Enberg <penberg(a)kernel.org> Cc: David Rientjes <rientjes(a)google.com> Cc: Joonsoo Kim <iamjoonsoo.kim(a)lge.com> Cc: Roman Gushchin <guro(a)fb.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- Documentation/vm/slub.rst | 10 +++++----- mm/slub.c | 14 +++++++------- 2 files changed, 12 insertions(+), 12 deletions(-) --- a/Documentation/vm/slub.rst~mm-slub-clarify-verification-reporting +++ a/Documentation/vm/slub.rst @@ -181,7 +181,7 @@ SLUB Debug output Here is a sample of slub debug output:: ==================================================================== - BUG kmalloc-8: Redzone overwritten + BUG kmalloc-8: Right Redzone overwritten -------------------------------------------------------------------- INFO: 0xc90f6d28-0xc90f6d2b. First byte 0x00 instead of 0xcc @@ -189,10 +189,10 @@ Here is a sample of slub debug output:: INFO: Object 0xc90f6d20 @offset=3360 fp=0xc90f6d58 INFO: Allocated in get_modalias+0x61/0xf5 age=53 cpu=1 pid=554 - Bytes b4 0xc90f6d10: 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ - Object 0xc90f6d20: 31 30 31 39 2e 30 30 35 1019.005 - Redzone 0xc90f6d28: 00 cc cc cc . - Padding 0xc90f6d50: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ + Bytes b4 (0xc90f6d10): 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ + Object (0xc90f6d20): 31 30 31 39 2e 30 30 35 1019.005 + Redzone (0xc90f6d28): 00 cc cc cc . + Padding (0xc90f6d50): 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ [<c010523d>] dump_trace+0x63/0x1eb [<c01053df>] show_trace_log_lvl+0x1a/0x2f --- a/mm/slub.c~mm-slub-clarify-verification-reporting +++ a/mm/slub.c @@ -712,15 +712,15 @@ static void print_trailer(struct kmem_ca p, p - addr, get_freepointer(s, p)); if (s->flags & SLAB_RED_ZONE) - print_section(KERN_ERR, "Redzone ", p - s->red_left_pad, + print_section(KERN_ERR, "Redzone ", p - s->red_left_pad, s->red_left_pad); else if (p > addr + 16) print_section(KERN_ERR, "Bytes b4 ", p - 16, 16); - print_section(KERN_ERR, "Object ", p, + print_section(KERN_ERR, "Object ", p, min_t(unsigned int, s->object_size, PAGE_SIZE)); if (s->flags & SLAB_RED_ZONE) - print_section(KERN_ERR, "Redzone ", p + s->object_size, + print_section(KERN_ERR, "Redzone ", p + s->object_size, s->inuse - s->object_size); off = get_info_end(s); @@ -732,7 +732,7 @@ static void print_trailer(struct kmem_ca if (off != size_from_object(s)) /* Beginning of the filler is the free pointer */ - print_section(KERN_ERR, "Padding ", p + off, + print_section(KERN_ERR, "Padding ", p + off, size_from_object(s) - off); dump_stack(); @@ -909,11 +909,11 @@ static int check_object(struct kmem_cach u8 *endobject = object + s->object_size; if (s->flags & SLAB_RED_ZONE) { - if (!check_bytes_and_report(s, page, object, "Redzone", + if (!check_bytes_and_report(s, page, object, "Left Redzone", object - s->red_left_pad, val, s->red_left_pad)) return 0; - if (!check_bytes_and_report(s, page, object, "Redzone", + if (!check_bytes_and_report(s, page, object, "Right Redzone", endobject, val, s->inuse - s->object_size)) return 0; } else { @@ -928,7 +928,7 @@ static int check_object(struct kmem_cach if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) && (!check_bytes_and_report(s, page, p, "Poison", p, POISON_FREE, s->object_size - 1) || - !check_bytes_and_report(s, page, p, "Poison", + !check_bytes_and_report(s, page, p, "End Poison", p + s->object_size - 1, POISON_END, 1))) return 0; /* _ Patches currently in -mm which might be from keescook(a)chromium.org are

4 years, 6 months

1
0
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror June 2021