October 2024 - Linux-stable-mirror

[PATCH] efistub/tpm: Use ACPI reclaim memory for event log to avoid corruption

by Ard Biesheuvel

From: Ard Biesheuvel <ardb(a)kernel.org> The TPM event log table is a Linux specific construct, where the data produced by the GetEventLog() boot service is cached in memory, and passed on to the OS using a EFI configuration table. The use of EFI_LOADER_DATA here results in the region being left unreserved in the E820 memory map constructed by the EFI stub, and this is the memory description that is passed on to the incoming kernel by kexec, which is therefore unaware that the region should be reserved. Even though the utility of the TPM2 event log after a kexec is questionable, any corruption might send the parsing code off into the weeds and crash the kernel. So let's use EFI_ACPI_RECLAIM_MEMORY instead, which is always treated as reserved by the E820 conversion logic. Cc: <stable(a)vger.kernel.org> Reported-by: Breno Leitao <leitao(a)debian.org> Tested-by: Usama Arif <usamaarif642(a)gmail.com> Signed-off-by: Ard Biesheuvel <ardb(a)kernel.org> --- drivers/firmware/efi/libstub/tpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/tpm.c b/drivers/firmware/efi/libstub/tpm.c index df3182f2e63a..1fd6823248ab 100644 --- a/drivers/firmware/efi/libstub/tpm.c +++ b/drivers/firmware/efi/libstub/tpm.c @@ -96,7 +96,7 @@ static void efi_retrieve_tcg2_eventlog(int version, efi_physical_addr_t log_loca } /* Allocate space for the logs and copy them. */ - status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, + status = efi_bs_call(allocate_pool, EFI_ACPI_RECLAIM_MEMORY, sizeof(*log_tbl) + log_size, (void **)&log_tbl); if (status != EFI_SUCCESS) { -- 2.46.0.662.g92d0881bb0-goog

1 year, 1 month

7
19
0 0

[PATCH 6.6] Revert "selftests/bpf: Implement get_hw_ring_size function to retrieve current and max interface size"

by Pu Lehui

From: Pu Lehui <pulehui(a)huawei.com> This reverts commit c8c590f07ad7ffaa6ef11e90b81202212077497b which is commit 90a695c3d31e1c9f0adb8c4c80028ed4ea7ed5ab upstream. Commit c8c590f07ad7 ("selftests/bpf: Implement get_hw_ring_size function to retrieve current and max interface size") will cause the following bpf selftests compilation error in the 6.6 stable branch, and it is not the Stable-dep-of of commit 103c0431c7fb ("selftests/bpf: Drop unneeded error.h includes"). So let's revert commit c8c590f07ad7 to fix this compilation error. ./network_helpers.h:66:43: error: 'struct ethtool_ringparam' declared inside parameter list will not be visible outside of this definition or declaration [-Werror] 66 | int get_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param); Signed-off-by: Pu Lehui <pulehui(a)huawei.com> --- tools/testing/selftests/bpf/network_helpers.c | 24 ------------------- tools/testing/selftests/bpf/network_helpers.h | 4 ---- .../selftests/bpf/prog_tests/flow_dissector.c | 1 + tools/testing/selftests/bpf/xdp_hw_metadata.c | 14 +++++++++++ 4 files changed, 15 insertions(+), 28 deletions(-) diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index d2acc8875212..0877b60ec81f 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -465,27 +465,3 @@ int get_socket_local_port(int sock_fd) return -1; } - -int get_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param) -{ - struct ifreq ifr = {0}; - int sockfd, err; - - sockfd = socket(AF_INET, SOCK_DGRAM, 0); - if (sockfd < 0) - return -errno; - - memcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); - - ring_param->cmd = ETHTOOL_GRINGPARAM; - ifr.ifr_data = (char *)ring_param; - - if (ioctl(sockfd, SIOCETHTOOL, &ifr) < 0) { - err = errno; - close(sockfd); - return -err; - } - - close(sockfd); - return 0; -} diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index 11cbe194769b..5eccc67d1a99 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -9,11 +9,8 @@ typedef __u16 __sum16; #include <linux/if_packet.h> #include <linux/ip.h> #include <linux/ipv6.h> -#include <linux/ethtool.h> -#include <linux/sockios.h> #include <netinet/tcp.h> #include <bpf/bpf_endian.h> -#include <net/if.h> #define MAGIC_VAL 0x1234 #define NUM_ITER 100000 @@ -63,7 +60,6 @@ int make_sockaddr(int family, const char *addr_str, __u16 port, struct sockaddr_storage *addr, socklen_t *len); char *ping_command(int family); int get_socket_local_port(int sock_fd); -int get_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param); struct nstoken; /** diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index 3171047414a7..b81046806579 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -2,6 +2,7 @@ #define _GNU_SOURCE #include <test_progs.h> #include <network_helpers.h> +#include <linux/if.h> #include <linux/if_tun.h> #include <sys/uio.h> diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c index 79f2da8f6ead..adb77c1a6a74 100644 --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c @@ -288,6 +288,20 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t return 0; } +struct ethtool_channels { + __u32 cmd; + __u32 max_rx; + __u32 max_tx; + __u32 max_other; + __u32 max_combined; + __u32 rx_count; + __u32 tx_count; + __u32 other_count; + __u32 combined_count; +}; + +#define ETHTOOL_GCHANNELS 0x0000003c /* Get no of channels */ + static int rxq_num(const char *ifname) { struct ethtool_channels ch = { -- 2.34.1

1 year, 1 month

1
0
0 0

[merged mm-hotfixes-stable] mm-mmap-limit-thp-aligment-of-anonymous-mappings-to-pmd-aligned-sizes.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: mm, mmap: limit THP alignment of anonymous mappings to PMD-aligned sizes has been removed from the -mm tree. Its filename was mm-mmap-limit-thp-aligment-of-anonymous-mappings-to-pmd-aligned-sizes.patch This patch was dropped because it was merged into the mm-hotfixes-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: Vlastimil Babka <vbabka(a)suse.cz> Subject: mm, mmap: limit THP alignment of anonymous mappings to PMD-aligned sizes Date: Thu, 24 Oct 2024 17:12:29 +0200 Since commit efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries") a mmap() of anonymous memory without a specific address hint and of at least PMD_SIZE will be aligned to PMD so that it can benefit from a THP backing page. However this change has been shown to regress some workloads significantly. [1] reports regressions in various spec benchmarks, with up to 600% slowdown of the cactusBSSN benchmark on some platforms. The benchmark seems to create many mappings of 4632kB, which would have merged to a large THP-backed area before commit efa7df3e3bb5 and now they are fragmented to multiple areas each aligned to PMD boundary with gaps between. The regression then seems to be caused mainly due to the benchmark's memory access pattern suffering from TLB or cache aliasing due to the aligned boundaries of the individual areas. Another known regression bisected to commit efa7df3e3bb5 is darktable [2] [3] and early testing suggests this patch fixes the regression there as well. To fix the regression but still try to benefit from THP-friendly anonymous mapping alignment, add a condition that the size of the mapping must be a multiple of PMD size instead of at least PMD size. In case of many odd-sized mapping like the cactusBSSN creates, those will stop being aligned and with gaps between, and instead naturally merge again. Link: https://lkml.kernel.org/r/20241024151228.101841-2-vbabka@suse.cz Fixes: efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries") Signed-off-by: Vlastimil Babka <vbabka(a)suse.cz> Reported-by: Michael Matz <matz(a)suse.de> Debugged-by: Gabriel Krisman Bertazi <gabriel(a)krisman.be> Closes: https://bugzilla.suse.com/show_bug.cgi?id=1229012 [1] Reported-by: Matthias Bodenbinder <matthias(a)bodenbinder.de> Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219366 [2] Closes: https://lore.kernel.org/all/2050f0d4-57b0-481d-bab8-05e8d48fed0c@leemhuis.i… [3] Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com> Reviewed-by: Yang Shi <yang(a)os.amperecomputing.com> Cc: Rik van Riel <riel(a)surriel.com> Cc: Jann Horn <jannh(a)google.com> Cc: Liam R. Howlett <Liam.Howlett(a)Oracle.com> Cc: Petr Tesarik <ptesarik(a)suse.com> Cc: Thorsten Leemhuis <regressions(a)leemhuis.info> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/mmap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) --- a/mm/mmap.c~mm-mmap-limit-thp-aligment-of-anonymous-mappings-to-pmd-aligned-sizes +++ a/mm/mmap.c @@ -900,7 +900,8 @@ __get_unmapped_area(struct file *file, u if (get_area) { addr = get_area(file, addr, len, pgoff, flags); - } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { + } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) + && IS_ALIGNED(len, PMD_SIZE)) { /* Ensures that larger anonymous mappings are THP aligned. */ addr = thp_get_unmapped_area_vmflags(file, addr, len, pgoff, flags, vm_flags); _ Patches currently in -mm which might be from vbabka(a)suse.cz are

1 year, 1 month

1
0
0 0

[merged mm-hotfixes-stable] mm-shrinker-avoid-memleak-in-alloc_shrinker_info.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: mm: shrinker: avoid memleak in alloc_shrinker_info has been removed from the -mm tree. Its filename was mm-shrinker-avoid-memleak-in-alloc_shrinker_info.patch This patch was dropped because it was merged into the mm-hotfixes-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: Chen Ridong <chenridong(a)huawei.com> Subject: mm: shrinker: avoid memleak in alloc_shrinker_info Date: Fri, 25 Oct 2024 06:09:42 +0000 A memleak was found as below: unreferenced object 0xffff8881010d2a80 (size 32): comm "mkdir", pid 1559, jiffies 4294932666 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 40 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 @............... backtrace (crc 2e7ef6fa): [<ffffffff81372754>] __kmalloc_node_noprof+0x394/0x470 [<ffffffff813024ab>] alloc_shrinker_info+0x7b/0x1a0 [<ffffffff813b526a>] mem_cgroup_css_online+0x11a/0x3b0 [<ffffffff81198dd9>] online_css+0x29/0xa0 [<ffffffff811a243d>] cgroup_apply_control_enable+0x20d/0x360 [<ffffffff811a5728>] cgroup_mkdir+0x168/0x5f0 [<ffffffff8148543e>] kernfs_iop_mkdir+0x5e/0x90 [<ffffffff813dbb24>] vfs_mkdir+0x144/0x220 [<ffffffff813e1c97>] do_mkdirat+0x87/0x130 [<ffffffff813e1de9>] __x64_sys_mkdir+0x49/0x70 [<ffffffff81f8c928>] do_syscall_64+0x68/0x140 [<ffffffff8200012f>] entry_SYSCALL_64_after_hwframe+0x76/0x7e alloc_shrinker_info(), when shrinker_unit_alloc() returns an errer, the info won't be freed. Just fix it. Link: https://lkml.kernel.org/r/20241025060942.1049263-1-chenridong@huaweicloud.c… Fixes: 307bececcd12 ("mm: shrinker: add a secondary array for shrinker_info::{map, nr_deferred}") Signed-off-by: Chen Ridong <chenridong(a)huawei.com> Acked-by: Qi Zheng <zhengqi.arch(a)bytedance.com> Acked-by: Roman Gushchin <roman.gushchin(a)linux.dev> Acked-by: Vlastimil Babka <vbabka(a)suse.cz> Acked-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com> Reviewed-by: Dave Chinner <dchinner(a)redhat.com> Cc: Anshuman Khandual <anshuman.khandual(a)arm.com> Cc: Muchun Song <muchun.song(a)linux.dev> Cc: Wang Weiyang <wangweiyang2(a)huawei.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/shrinker.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) --- a/mm/shrinker.c~mm-shrinker-avoid-memleak-in-alloc_shrinker_info +++ a/mm/shrinker.c @@ -76,19 +76,21 @@ void free_shrinker_info(struct mem_cgrou int alloc_shrinker_info(struct mem_cgroup *memcg) { - struct shrinker_info *info; int nid, ret = 0; int array_size = 0; mutex_lock(&shrinker_mutex); array_size = shrinker_unit_size(shrinker_nr_max); for_each_node(nid) { - info = kvzalloc_node(sizeof(*info) + array_size, GFP_KERNEL, nid); + struct shrinker_info *info = kvzalloc_node(sizeof(*info) + array_size, + GFP_KERNEL, nid); if (!info) goto err; info->map_nr_max = shrinker_nr_max; - if (shrinker_unit_alloc(info, NULL, nid)) + if (shrinker_unit_alloc(info, NULL, nid)) { + kvfree(info); goto err; + } rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info); } mutex_unlock(&shrinker_mutex); _ Patches currently in -mm which might be from chenridong(a)huawei.com are

1 year, 1 month

1
0
0 0

[merged mm-hotfixes-stable] vmscanmigrate-fix-double-decrement-on-node-stats-when-demoting-pages.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: vmscan,migrate: fix page count imbalance on node stats when demoting pages has been removed from the -mm tree. Its filename was vmscanmigrate-fix-double-decrement-on-node-stats-when-demoting-pages.patch This patch was dropped because it was merged into the mm-hotfixes-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: Gregory Price <gourry(a)gourry.net> Subject: vmscan,migrate: fix page count imbalance on node stats when demoting pages Date: Fri, 25 Oct 2024 10:17:24 -0400 When numa balancing is enabled with demotion, vmscan will call migrate_pages when shrinking LRUs. migrate_pages will decrement the the node's isolated page count, leading to an imbalanced count when invoked from (MG)LRU code. The result is dmesg output like such: $ cat /proc/sys/vm/stat_refresh [77383.088417] vmstat_refresh: nr_isolated_anon -103212 [77383.088417] vmstat_refresh: nr_isolated_file -899642 This negative value may impact compaction and reclaim throttling. The following path produces the decrement: shrink_folio_list demote_folio_list migrate_pages migrate_pages_batch migrate_folio_move migrate_folio_done mod_node_page_state(-ve) <- decrement This path happens for SUCCESSFUL migrations, not failures. Typically callers to migrate_pages are required to handle putback/accounting for failures, but this is already handled in the shrink code. When accounting for migrations, instead do not decrement the count when the migration reason is MR_DEMOTION. As of v6.11, this demotion logic is the only source of MR_DEMOTION. Link: https://lkml.kernel.org/r/20241025141724.17927-1-gourry@gourry.net Fixes: 26aa2d199d6f ("mm/migrate: demote pages during reclaim") Signed-off-by: Gregory Price <gourry(a)gourry.net> Reviewed-by: Yang Shi <shy828301(a)gmail.com> Reviewed-by: Davidlohr Bueso <dave(a)stgolabs.net> Reviewed-by: Shakeel Butt <shakeel.butt(a)linux.dev> Reviewed-by: "Huang, Ying" <ying.huang(a)intel.com> Reviewed-by: Oscar Salvador <osalvador(a)suse.de> Cc: Dave Hansen <dave.hansen(a)linux.intel.com> Cc: Wei Xu <weixugc(a)google.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/mm/migrate.c~vmscanmigrate-fix-double-decrement-on-node-stats-when-demoting-pages +++ a/mm/migrate.c @@ -1178,7 +1178,7 @@ static void migrate_folio_done(struct fo * not accounted to NR_ISOLATED_*. They can be recognized * as __folio_test_movable */ - if (likely(!__folio_test_movable(src))) + if (likely(!__folio_test_movable(src)) && reason != MR_DEMOTION) mod_node_page_state(folio_pgdat(src), NR_ISOLATED_ANON + folio_is_file_lru(src), -folio_nr_pages(src)); _ Patches currently in -mm which might be from gourry(a)gourry.net are

1 year, 1 month

1
0
0 0

[merged mm-hotfixes-stable] mm-multi-gen-lru-remove-mm_leaf_old-and-mm_nonleaf_total-stats.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: mm: multi-gen LRU: remove MM_LEAF_OLD and MM_NONLEAF_TOTAL stats has been removed from the -mm tree. Its filename was mm-multi-gen-lru-remove-mm_leaf_old-and-mm_nonleaf_total-stats.patch This patch was dropped because it was merged into the mm-hotfixes-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: Yu Zhao <yuzhao(a)google.com> Subject: mm: multi-gen LRU: remove MM_LEAF_OLD and MM_NONLEAF_TOTAL stats Date: Sat, 19 Oct 2024 01:29:38 +0000 Patch series "mm: multi-gen LRU: Have secondary MMUs participate in MM_WALK". Today, the MM_WALK capability causes MGLRU to clear the young bit from PMDs and PTEs during the page table walk before eviction, but MGLRU does not call the clear_young() MMU notifier in this case. By not calling this notifier, the MM walk takes less time/CPU, but it causes pages that are accessed mostly through KVM / secondary MMUs to appear younger than they should be. We do call the clear_young() notifier today, but only when attempting to evict the page, so we end up clearing young/accessed information less frequently for secondary MMUs than for mm PTEs, and therefore they appear younger and are less likely to be evicted. Therefore, memory that is *not* being accessed mostly by KVM will be evicted *more* frequently, worsening performance. ChromeOS observed a tab-open latency regression when enabling MGLRU with a setup that involved running a VM: Tab-open latency histogram (ms) Version p50 mean p95 p99 max base 1315 1198 2347 3454 10319 mglru 2559 1311 7399 12060 43758 fix 1119 926 2470 4211 6947 This series replaces the final non-selftest patchs from this series[1], which introduced a similar change (and a new MMU notifier) with KVM optimizations. I'll send a separate series (to Sean and Paolo) for the KVM optimizations. This series also makes proactive reclaim with MGLRU possible for KVM memory. I have verified that this functions correctly with the selftest from [1], but given that that test is a KVM selftest, I'll send it with the rest of the KVM optimizations later. Andrew, let me know if you'd like to take the test now anyway. [1]: https://lore.kernel.org/linux-mm/20240926013506.860253-18-jthoughton@google… This patch (of 2): The removed stats, MM_LEAF_OLD and MM_NONLEAF_TOTAL, are not very helpful and become more complicated to properly compute when adding test/clear_young() notifiers in MGLRU's mm walk. Link: https://lkml.kernel.org/r/20241019012940.3656292-1-jthoughton@google.com Link: https://lkml.kernel.org/r/20241019012940.3656292-2-jthoughton@google.com Fixes: bd74fdaea146 ("mm: multi-gen LRU: support page table walks") Signed-off-by: Yu Zhao <yuzhao(a)google.com> Signed-off-by: James Houghton <jthoughton(a)google.com> Cc: Axel Rasmussen <axelrasmussen(a)google.com> Cc: David Matlack <dmatlack(a)google.com> Cc: David Rientjes <rientjes(a)google.com> Cc: David Stevens <stevensd(a)google.com> Cc: Oliver Upton <oliver.upton(a)linux.dev> Cc: Paolo Bonzini <pbonzini(a)redhat.com> Cc: Sean Christopherson <seanjc(a)google.com> Cc: Wei Xu <weixugc(a)google.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- include/linux/mmzone.h | 2 -- mm/vmscan.c | 14 +++++--------- 2 files changed, 5 insertions(+), 11 deletions(-) --- a/include/linux/mmzone.h~mm-multi-gen-lru-remove-mm_leaf_old-and-mm_nonleaf_total-stats +++ a/include/linux/mmzone.h @@ -458,9 +458,7 @@ struct lru_gen_folio { enum { MM_LEAF_TOTAL, /* total leaf entries */ - MM_LEAF_OLD, /* old leaf entries */ MM_LEAF_YOUNG, /* young leaf entries */ - MM_NONLEAF_TOTAL, /* total non-leaf entries */ MM_NONLEAF_FOUND, /* non-leaf entries found in Bloom filters */ MM_NONLEAF_ADDED, /* non-leaf entries added to Bloom filters */ NR_MM_STATS --- a/mm/vmscan.c~mm-multi-gen-lru-remove-mm_leaf_old-and-mm_nonleaf_total-stats +++ a/mm/vmscan.c @@ -3399,7 +3399,6 @@ restart: continue; if (!pte_young(ptent)) { - walk->mm_stats[MM_LEAF_OLD]++; continue; } @@ -3552,7 +3551,6 @@ restart: walk->mm_stats[MM_LEAF_TOTAL]++; if (!pmd_young(val)) { - walk->mm_stats[MM_LEAF_OLD]++; continue; } @@ -3564,8 +3562,6 @@ restart: continue; } - walk->mm_stats[MM_NONLEAF_TOTAL]++; - if (!walk->force_scan && should_clear_pmd_young()) { if (!pmd_young(val)) continue; @@ -5254,11 +5250,11 @@ static void lru_gen_seq_show_full(struct for (tier = 0; tier < MAX_NR_TIERS; tier++) { seq_printf(m, " %10d", tier); for (type = 0; type < ANON_AND_FILE; type++) { - const char *s = " "; + const char *s = "xxx"; unsigned long n[3] = {}; if (seq == max_seq) { - s = "RT "; + s = "RTx"; n[0] = READ_ONCE(lrugen->avg_refaulted[type][tier]); n[1] = READ_ONCE(lrugen->avg_total[type][tier]); } else if (seq == min_seq[type] || NR_HIST_GENS > 1) { @@ -5280,14 +5276,14 @@ static void lru_gen_seq_show_full(struct seq_puts(m, " "); for (i = 0; i < NR_MM_STATS; i++) { - const char *s = " "; + const char *s = "xxxx"; unsigned long n = 0; if (seq == max_seq && NR_HIST_GENS == 1) { - s = "LOYNFA"; + s = "TYFA"; n = READ_ONCE(mm_state->stats[hist][i]); } else if (seq != max_seq && NR_HIST_GENS > 1) { - s = "loynfa"; + s = "tyfa"; n = READ_ONCE(mm_state->stats[hist][i]); } _ Patches currently in -mm which might be from yuzhao(a)google.com are mm-page_alloc-keep-track-of-free-highatomic.patch

1 year, 1 month

1
0
0 0

[merged mm-hotfixes-stable] mm-allow-set-clear-page_type-again.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: mm: allow set/clear page_type again has been removed from the -mm tree. Its filename was mm-allow-set-clear-page_type-again.patch This patch was dropped because it was merged into the mm-hotfixes-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: Yu Zhao <yuzhao(a)google.com> Subject: mm: allow set/clear page_type again Date: Sat, 19 Oct 2024 22:22:12 -0600 Some page flags (page->flags) were converted to page types (page->page_types). A recent example is PG_hugetlb. From the exclusive writer's perspective, e.g., a thread doing __folio_set_hugetlb(), there is a difference between the page flag and type APIs: the former allows the same non-atomic operation to be repeated whereas the latter does not. For example, calling __folio_set_hugetlb() twice triggers VM_BUG_ON_FOLIO(), since the second call expects the type (PG_hugetlb) not to be set previously. Using add_hugetlb_folio() as an example, it calls __folio_set_hugetlb() in the following error-handling path. And when that happens, it triggers the aforementioned VM_BUG_ON_FOLIO(). if (folio_test_hugetlb(folio)) { rc = hugetlb_vmemmap_restore_folio(h, folio); if (rc) { spin_lock_irq(&hugetlb_lock); add_hugetlb_folio(h, folio, false); ... It is possible to make hugeTLB comply with the new requirements from the page type API. However, a straightforward fix would be to just allow the same page type to be set or cleared again inside the API, to avoid any changes to its callers. Link: https://lkml.kernel.org/r/20241020042212.296781-1-yuzhao@google.com Fixes: d99e3140a4d3 ("mm: turn folio_test_hugetlb into a PageType") Signed-off-by: Yu Zhao <yuzhao(a)google.com> Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org> Cc: Muchun Song <muchun.song(a)linux.dev> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- include/linux/page-flags.h | 8 ++++++++ 1 file changed, 8 insertions(+) --- a/include/linux/page-flags.h~mm-allow-set-clear-page_type-again +++ a/include/linux/page-flags.h @@ -975,12 +975,16 @@ static __always_inline bool folio_test_# } \ static __always_inline void __folio_set_##fname(struct folio *folio) \ { \ + if (folio_test_##fname(folio)) \ + return; \ VM_BUG_ON_FOLIO(data_race(folio->page.page_type) != UINT_MAX, \ folio); \ folio->page.page_type = (unsigned int)PGTY_##lname << 24; \ } \ static __always_inline void __folio_clear_##fname(struct folio *folio) \ { \ + if (folio->page.page_type == UINT_MAX) \ + return; \ VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio); \ folio->page.page_type = UINT_MAX; \ } @@ -993,11 +997,15 @@ static __always_inline int Page##uname(c } \ static __always_inline void __SetPage##uname(struct page *page) \ { \ + if (Page##uname(page)) \ + return; \ VM_BUG_ON_PAGE(data_race(page->page_type) != UINT_MAX, page); \ page->page_type = (unsigned int)PGTY_##lname << 24; \ } \ static __always_inline void __ClearPage##uname(struct page *page) \ { \ + if (page->page_type == UINT_MAX) \ + return; \ VM_BUG_ON_PAGE(!Page##uname(page), page); \ page->page_type = UINT_MAX; \ } _ Patches currently in -mm which might be from yuzhao(a)google.com are mm-page_alloc-keep-track-of-free-highatomic.patch

1 year, 1 month

1
0
0 0

[merged mm-hotfixes-stable] nilfs2-fix-potential-deadlock-with-newly-created-symlinks.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: nilfs2: fix potential deadlock with newly created symlinks has been removed from the -mm tree. Its filename was nilfs2-fix-potential-deadlock-with-newly-created-symlinks.patch This patch was dropped because it was merged into the mm-hotfixes-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: Ryusuke Konishi <konishi.ryusuke(a)gmail.com> Subject: nilfs2: fix potential deadlock with newly created symlinks Date: Sun, 20 Oct 2024 13:51:28 +0900 Syzbot reported that page_symlink(), called by nilfs_symlink(), triggers memory reclamation involving the filesystem layer, which can result in circular lock dependencies among the reader/writer semaphore nilfs->ns_segctor_sem, s_writers percpu_rwsem (intwrite) and the fs_reclaim pseudo lock. This is because after commit 21fc61c73c39 ("don't put symlink bodies in pagecache into highmem"), the gfp flags of the page cache for symbolic links are overwritten to GFP_KERNEL via inode_nohighmem(). This is not a problem for symlinks read from the backing device, because the __GFP_FS flag is dropped after inode_nohighmem() is called. However, when a new symlink is created with nilfs_symlink(), the gfp flags remain overwritten to GFP_KERNEL. Then, memory allocation called from page_symlink() etc. triggers memory reclamation including the FS layer, which may call nilfs_evict_inode() or nilfs_dirty_inode(). And these can cause a deadlock if they are called while nilfs->ns_segctor_sem is held: Fix this issue by dropping the __GFP_FS flag from the page cache GFP flags of newly created symlinks in the same way that nilfs_new_inode() and __nilfs_read_inode() do, as a workaround until we adopt nofs allocation scope consistently or improve the locking constraints. Link: https://lkml.kernel.org/r/20241020050003.4308-1-konishi.ryusuke@gmail.com Fixes: 21fc61c73c39 ("don't put symlink bodies in pagecache into highmem") Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com> Reported-by: syzbot+9ef37ac20608f4836256(a)syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=9ef37ac20608f4836256 Tested-by: syzbot+9ef37ac20608f4836256(a)syzkaller.appspotmail.com Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- fs/nilfs2/namei.c | 3 +++ 1 file changed, 3 insertions(+) --- a/fs/nilfs2/namei.c~nilfs2-fix-potential-deadlock-with-newly-created-symlinks +++ a/fs/nilfs2/namei.c @@ -157,6 +157,9 @@ static int nilfs_symlink(struct mnt_idma /* slow symlink */ inode->i_op = &nilfs_symlink_inode_operations; inode_nohighmem(inode); + mapping_set_gfp_mask(inode->i_mapping, + mapping_gfp_constraint(inode->i_mapping, + ~__GFP_FS)); inode->i_mapping->a_ops = &nilfs_aops; err = page_symlink(inode, symname, l); if (err) _ Patches currently in -mm which might be from konishi.ryusuke(a)gmail.com are nilfs2-convert-segment-buffer-to-be-folio-based.patch nilfs2-convert-common-metadata-file-code-to-be-folio-based.patch nilfs2-convert-segment-usage-file-to-be-folio-based.patch nilfs2-convert-persistent-object-allocator-to-be-folio-based.patch nilfs2-convert-inode-file-to-be-folio-based.patch nilfs2-convert-dat-file-to-be-folio-based.patch nilfs2-remove-nilfs_palloc_block_get_entry.patch nilfs2-convert-checkpoint-file-to-be-folio-based.patch

1 year, 1 month

1
0
0 0

[merged mm-hotfixes-stable] kasan-remove-vmalloc_percpu-test.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: kasan: remove vmalloc_percpu test has been removed from the -mm tree. Its filename was kasan-remove-vmalloc_percpu-test.patch This patch was dropped because it was merged into the mm-hotfixes-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: Andrey Konovalov <andreyknvl(a)gmail.com> Subject: kasan: remove vmalloc_percpu test Date: Tue, 22 Oct 2024 18:07:06 +0200 Commit 1a2473f0cbc0 ("kasan: improve vmalloc tests") added the vmalloc_percpu KASAN test with the assumption that __alloc_percpu always uses vmalloc internally, which is tagged by KASAN. However, __alloc_percpu might allocate memory from the first per-CPU chunk, which is not allocated via vmalloc(). As a result, the test might fail. Remove the test until proper KASAN annotation for the per-CPU allocated are added; tracked in https://bugzilla.kernel.org/show_bug.cgi?id=215019. Link: https://lkml.kernel.org/r/20241022160706.38943-1-andrey.konovalov@linux.dev Fixes: 1a2473f0cbc0 ("kasan: improve vmalloc tests") Signed-off-by: Andrey Konovalov <andreyknvl(a)gmail.com> Reported-by: Samuel Holland <samuel.holland(a)sifive.com> Link: https://lore.kernel.org/all/4a245fff-cc46-44d1-a5f9-fd2f1c3764ae@sifive.com/ Reported-by: Sabyrzhan Tasbolatov <snovitoll(a)gmail.com> Link: https://lore.kernel.org/all/CACzwLxiWzNqPBp4C1VkaXZ2wDwvY3yZeetCi1TLGFipKW7… Cc: Alexander Potapenko <glider(a)google.com> Cc: Andrey Ryabinin <ryabinin.a.a(a)gmail.com> Cc: Dmitry Vyukov <dvyukov(a)google.com> Cc: Marco Elver <elver(a)google.com> Cc: Sabyrzhan Tasbolatov <snovitoll(a)gmail.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/kasan/kasan_test_c.c | 27 --------------------------- 1 file changed, 27 deletions(-) --- a/mm/kasan/kasan_test_c.c~kasan-remove-vmalloc_percpu-test +++ a/mm/kasan/kasan_test_c.c @@ -1810,32 +1810,6 @@ static void vm_map_ram_tags(struct kunit free_pages((unsigned long)p_ptr, 1); } -static void vmalloc_percpu(struct kunit *test) -{ - char __percpu *ptr; - int cpu; - - /* - * This test is specifically crafted for the software tag-based mode, - * the only tag-based mode that poisons percpu mappings. - */ - KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_SW_TAGS); - - ptr = __alloc_percpu(PAGE_SIZE, PAGE_SIZE); - - for_each_possible_cpu(cpu) { - char *c_ptr = per_cpu_ptr(ptr, cpu); - - KUNIT_EXPECT_GE(test, (u8)get_tag(c_ptr), (u8)KASAN_TAG_MIN); - KUNIT_EXPECT_LT(test, (u8)get_tag(c_ptr), (u8)KASAN_TAG_KERNEL); - - /* Make sure that in-bounds accesses don't crash the kernel. */ - *c_ptr = 0; - } - - free_percpu(ptr); -} - /* * Check that the assigned pointer tag falls within the [KASAN_TAG_MIN, * KASAN_TAG_KERNEL) range (note: excluding the match-all tag) for tag-based @@ -2023,7 +1997,6 @@ static struct kunit_case kasan_kunit_tes KUNIT_CASE(vmalloc_oob), KUNIT_CASE(vmap_tags), KUNIT_CASE(vm_map_ram_tags), - KUNIT_CASE(vmalloc_percpu), KUNIT_CASE(match_all_not_assigned), KUNIT_CASE(match_all_ptr_tag), KUNIT_CASE(match_all_mem_tag), _ Patches currently in -mm which might be from andreyknvl(a)gmail.com are

1 year, 1 month

1
0
0 0

[merged mm-hotfixes-stable] tools-mm-werror-fixes-in-page-types-slabinfo.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: tools/mm: -Werror fixes in page-types/slabinfo has been removed from the -mm tree. Its filename was tools-mm-werror-fixes-in-page-types-slabinfo.patch This patch was dropped because it was merged into the mm-hotfixes-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: Wladislav Wiebe <wladislav.kw(a)gmail.com> Subject: tools/mm: -Werror fixes in page-types/slabinfo Date: Tue, 22 Oct 2024 19:21:13 +0200 Commit e6d2c436ff693 ("tools/mm: allow users to provide additional cflags/ldflags") passes now CFLAGS to Makefile. With this, build systems with default -Werror enabled found: slabinfo.c:1300:25: error: ignoring return value of 'chdir' declared with attribute 'warn_unused_result' [-Werror=unused-result] �� chdir(".."); �� ^~~~~~~~~~~ page-types.c:397:35: error: format '%lu' expects argument of type 'long unsigned int', but argument 2 has type 'uint64_t' {aka 'long long unsigned int'} [-Werror=format=] �� printf("%lu\t", mapcnt0); �� ~~^�� ~~~~~~~ .. Fix page-types by using PRIu64 for uint64_t prints and check in slabinfo for return code on chdir(".."). Link: https://lkml.kernel.org/r/c1ceb507-94bc-461c-934d-c19b77edd825@gmail.com Fixes: e6d2c436ff69 ("tools/mm: allow users to provide additional cflags/ldflags") Signed-off-by: Wladislav Wiebe <wladislav.kw(a)gmail.com> Cc: Vlastimil Babka <vbabka(a)suse.cz> Cc: Herton R. Krzesinski <herton(a)redhat.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- tools/mm/page-types.c | 9 +++++---- tools/mm/slabinfo.c | 4 +++- 2 files changed, 8 insertions(+), 5 deletions(-) --- a/tools/mm/page-types.c~tools-mm-werror-fixes-in-page-types-slabinfo +++ a/tools/mm/page-types.c @@ -22,6 +22,7 @@ #include <time.h> #include <setjmp.h> #include <signal.h> +#include <inttypes.h> #include <sys/types.h> #include <sys/errno.h> #include <sys/fcntl.h> @@ -391,9 +392,9 @@ static void show_page_range(unsigned lon if (opt_file) printf("%lx\t", voff); if (opt_list_cgroup) - printf("@%llu\t", (unsigned long long)cgroup0); + printf("@%" PRIu64 "\t", cgroup0); if (opt_list_mapcnt) - printf("%lu\t", mapcnt0); + printf("%" PRIu64 "\t", mapcnt0); printf("%lx\t%lx\t%s\n", index, count, page_flag_name(flags0)); } @@ -419,9 +420,9 @@ static void show_page(unsigned long voff if (opt_file) printf("%lx\t", voffset); if (opt_list_cgroup) - printf("@%llu\t", (unsigned long long)cgroup); + printf("@%" PRIu64 "\t", cgroup) if (opt_list_mapcnt) - printf("%lu\t", mapcnt); + printf("%" PRIu64 "\t", mapcnt); printf("%lx\t%s\n", offset, page_flag_name(flags)); } --- a/tools/mm/slabinfo.c~tools-mm-werror-fixes-in-page-types-slabinfo +++ a/tools/mm/slabinfo.c @@ -1297,7 +1297,9 @@ static void read_slab_dir(void) slab->cpu_partial_free = get_obj("cpu_partial_free"); slab->alloc_node_mismatch = get_obj("alloc_node_mismatch"); slab->deactivate_bypass = get_obj("deactivate_bypass"); - chdir(".."); + if (chdir("..")) + fatal("Unable to chdir from slab ../%s\n", + slab->name); if (slab->name[0] == ':') alias_targets++; slab++; _ Patches currently in -mm which might be from wladislav.kw(a)gmail.com are

1 year, 1 month

1
0
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror October 2024