The page allocator tracks the number of zones that have unaccepted memory using static_branch_enc/dec() and uses that static branch in hot paths to determine if it needs to deal with unaccepted memory.
Borisal and Thomas pointed out that the tracking is racy operations on static_branch are not serialized against adding/removing unaccepted pages to/from the zone.
The effect of this static_branch optimization is only visible on microbenchmark.
Instead of adding more complexity around it, remove it altogether.
Signed-off-by: Kirill A. Shutemov kirill.shutemov@linux.intel.com Fixes: dcdfdd40fa82 ("mm: Add support for unaccepted memory") Link: https://lore.kernel.org/all/20250506092445.GBaBnVXXyvnazly6iF@fat_crate.loca... Reported-by: Borislav Petkov bp@alien8.de Reported-by: Thomas Gleixner tglx@linutronix.de Cc: stable@vger.kernel.org # v6.5+ Cc: Andrew Morton akpm@linux-foundation.org Cc: Vlastimil Babka vbabka@suse.cz Cc: Suren Baghdasaryan surenb@google.com Cc: Michal Hocko mhocko@suse.com Cc: Brendan Jackman jackmanb@google.com Cc: Johannes Weiner hannes@cmpxchg.org --- mm/internal.h | 1 - mm/mm_init.c | 1 - mm/page_alloc.c | 47 ----------------------------------------------- 3 files changed, 49 deletions(-)
diff --git a/mm/internal.h b/mm/internal.h index e9695baa5922..50c2f590b2d0 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1595,7 +1595,6 @@ unsigned long move_page_tables(struct pagetable_move_control *pmc);
#ifdef CONFIG_UNACCEPTED_MEMORY void accept_page(struct page *page); -void unaccepted_cleanup_work(struct work_struct *work); #else /* CONFIG_UNACCEPTED_MEMORY */ static inline void accept_page(struct page *page) { diff --git a/mm/mm_init.c b/mm/mm_init.c index 9659689b8ace..84f14fa12d0d 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1441,7 +1441,6 @@ static void __meminit zone_init_free_lists(struct zone *zone)
#ifdef CONFIG_UNACCEPTED_MEMORY INIT_LIST_HEAD(&zone->unaccepted_pages); - INIT_WORK(&zone->unaccepted_cleanup, unaccepted_cleanup_work); #endif }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5fccf5fce084..a4a4df2daedb 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7175,16 +7175,8 @@ bool has_managed_dma(void)
#ifdef CONFIG_UNACCEPTED_MEMORY
-/* Counts number of zones with unaccepted pages. */ -static DEFINE_STATIC_KEY_FALSE(zones_with_unaccepted_pages); - static bool lazy_accept = true;
-void unaccepted_cleanup_work(struct work_struct *work) -{ - static_branch_dec(&zones_with_unaccepted_pages); -} - static int __init accept_memory_parse(char *p) { if (!strcmp(p, "lazy")) { @@ -7209,11 +7201,7 @@ static bool page_contains_unaccepted(struct page *page, unsigned int order) static void __accept_page(struct zone *zone, unsigned long *flags, struct page *page) { - bool last; - list_del(&page->lru); - last = list_empty(&zone->unaccepted_pages); - account_freepages(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE); __mod_zone_page_state(zone, NR_UNACCEPTED, -MAX_ORDER_NR_PAGES); __ClearPageUnaccepted(page); @@ -7222,28 +7210,6 @@ static void __accept_page(struct zone *zone, unsigned long *flags, accept_memory(page_to_phys(page), PAGE_SIZE << MAX_PAGE_ORDER);
__free_pages_ok(page, MAX_PAGE_ORDER, FPI_TO_TAIL); - - if (last) { - /* - * There are two corner cases: - * - * - If allocation occurs during the CPU bring up, - * static_branch_dec() cannot be used directly as - * it causes a deadlock on cpu_hotplug_lock. - * - * Instead, use schedule_work() to prevent deadlock. - * - * - If allocation occurs before workqueues are initialized, - * static_branch_dec() should be called directly. - * - * Workqueues are initialized before CPU bring up, so this - * will not conflict with the first scenario. - */ - if (system_wq) - schedule_work(&zone->unaccepted_cleanup); - else - unaccepted_cleanup_work(&zone->unaccepted_cleanup); - } }
void accept_page(struct page *page) @@ -7280,20 +7246,12 @@ static bool try_to_accept_memory_one(struct zone *zone) return true; }
-static inline bool has_unaccepted_memory(void) -{ - return static_branch_unlikely(&zones_with_unaccepted_pages); -} - static bool cond_accept_memory(struct zone *zone, unsigned int order, int alloc_flags) { long to_accept, wmark; bool ret = false;
- if (!has_unaccepted_memory()) - return false; - if (list_empty(&zone->unaccepted_pages)) return false;
@@ -7331,22 +7289,17 @@ static bool __free_unaccepted(struct page *page) { struct zone *zone = page_zone(page); unsigned long flags; - bool first = false;
if (!lazy_accept) return false;
spin_lock_irqsave(&zone->lock, flags); - first = list_empty(&zone->unaccepted_pages); list_add_tail(&page->lru, &zone->unaccepted_pages); account_freepages(zone, MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE); __mod_zone_page_state(zone, NR_UNACCEPTED, MAX_ORDER_NR_PAGES); __SetPageUnaccepted(page); spin_unlock_irqrestore(&zone->lock, flags);
- if (first) - static_branch_inc(&zones_with_unaccepted_pages); - return true; }
On Tue, May 06, 2025 at 02:25:09PM +0300, Kirill A. Shutemov wrote:
The page allocator tracks the number of zones that have unaccepted memory using static_branch_enc/dec() and uses that static branch in hot paths to determine if it needs to deal with unaccepted memory.
Borisal and Thomas pointed out that the tracking is racy operations on
Boris or Borislav would be nice.
static_branch are not serialized against adding/removing unaccepted pages to/from the zone.
Also, that sentence needs massaging.
The effect of this static_branch optimization is only visible on microbenchmark.
Instead of adding more complexity around it, remove it altogether.
Yah, good idea.
Signed-off-by: Kirill A. Shutemov kirill.shutemov@linux.intel.com Fixes: dcdfdd40fa82 ("mm: Add support for unaccepted memory") Link: https://lore.kernel.org/all/20250506092445.GBaBnVXXyvnazly6iF@fat_crate.loca... Reported-by: Borislav Petkov bp@alien8.de
Tested-by: Borislav Petkov (AMD) bp@alien8.de
Thx for the quick fix.
The page allocator tracks the number of zones that have unaccepted memory using static_branch_enc/dec() and uses that static branch in hot paths to determine if it needs to deal with unaccepted memory.
Borislav and Thomas pointed out that the tracking is racy: operations on static_branch are not serialized against adding/removing unaccepted pages to/from the zone.
Sanity checks inside static_branch machinery detects it:
WARNING: CPU: 0 PID: 10 at kernel/jump_label.c:276 __static_key_slow_dec_cpuslocked+0x8e/0xa0
The comment around the WARN() explains the problem:
/* * Warn about the '-1' case though; since that means a * decrement is concurrent with a first (0->1) increment. IOW * people are trying to disable something that wasn't yet fully * enabled. This suggests an ordering problem on the user side. */
The effect of this static_branch optimization is only visible on microbenchmark.
Instead of adding more complexity around it, remove it altogether.
Signed-off-by: Kirill A. Shutemov kirill.shutemov@linux.intel.com Fixes: dcdfdd40fa82 ("mm: Add support for unaccepted memory") Link: https://lore.kernel.org/all/20250506092445.GBaBnVXXyvnazly6iF@fat_crate.loca... Reported-by: Borislav Petkov bp@alien8.de Tested-by: Borislav Petkov (AMD) bp@alien8.de Reported-by: Thomas Gleixner tglx@linutronix.de Cc: stable@vger.kernel.org # v6.5+ Cc: Andrew Morton akpm@linux-foundation.org Cc: Vlastimil Babka vbabka@suse.cz Cc: Suren Baghdasaryan surenb@google.com Cc: Michal Hocko mhocko@suse.com Cc: Brendan Jackman jackmanb@google.com Cc: Johannes Weiner hannes@cmpxchg.org ---
v2: - Update commit message; - Apply Borislav's Tested-by tag;
--- mm/internal.h | 1 - mm/mm_init.c | 1 - mm/page_alloc.c | 47 ----------------------------------------------- 3 files changed, 49 deletions(-)
diff --git a/mm/internal.h b/mm/internal.h index e9695baa5922..50c2f590b2d0 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1595,7 +1595,6 @@ unsigned long move_page_tables(struct pagetable_move_control *pmc);
#ifdef CONFIG_UNACCEPTED_MEMORY void accept_page(struct page *page); -void unaccepted_cleanup_work(struct work_struct *work); #else /* CONFIG_UNACCEPTED_MEMORY */ static inline void accept_page(struct page *page) { diff --git a/mm/mm_init.c b/mm/mm_init.c index 9659689b8ace..84f14fa12d0d 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1441,7 +1441,6 @@ static void __meminit zone_init_free_lists(struct zone *zone)
#ifdef CONFIG_UNACCEPTED_MEMORY INIT_LIST_HEAD(&zone->unaccepted_pages); - INIT_WORK(&zone->unaccepted_cleanup, unaccepted_cleanup_work); #endif }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5fccf5fce084..a4a4df2daedb 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7175,16 +7175,8 @@ bool has_managed_dma(void)
#ifdef CONFIG_UNACCEPTED_MEMORY
-/* Counts number of zones with unaccepted pages. */ -static DEFINE_STATIC_KEY_FALSE(zones_with_unaccepted_pages); - static bool lazy_accept = true;
-void unaccepted_cleanup_work(struct work_struct *work) -{ - static_branch_dec(&zones_with_unaccepted_pages); -} - static int __init accept_memory_parse(char *p) { if (!strcmp(p, "lazy")) { @@ -7209,11 +7201,7 @@ static bool page_contains_unaccepted(struct page *page, unsigned int order) static void __accept_page(struct zone *zone, unsigned long *flags, struct page *page) { - bool last; - list_del(&page->lru); - last = list_empty(&zone->unaccepted_pages); - account_freepages(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE); __mod_zone_page_state(zone, NR_UNACCEPTED, -MAX_ORDER_NR_PAGES); __ClearPageUnaccepted(page); @@ -7222,28 +7210,6 @@ static void __accept_page(struct zone *zone, unsigned long *flags, accept_memory(page_to_phys(page), PAGE_SIZE << MAX_PAGE_ORDER);
__free_pages_ok(page, MAX_PAGE_ORDER, FPI_TO_TAIL); - - if (last) { - /* - * There are two corner cases: - * - * - If allocation occurs during the CPU bring up, - * static_branch_dec() cannot be used directly as - * it causes a deadlock on cpu_hotplug_lock. - * - * Instead, use schedule_work() to prevent deadlock. - * - * - If allocation occurs before workqueues are initialized, - * static_branch_dec() should be called directly. - * - * Workqueues are initialized before CPU bring up, so this - * will not conflict with the first scenario. - */ - if (system_wq) - schedule_work(&zone->unaccepted_cleanup); - else - unaccepted_cleanup_work(&zone->unaccepted_cleanup); - } }
void accept_page(struct page *page) @@ -7280,20 +7246,12 @@ static bool try_to_accept_memory_one(struct zone *zone) return true; }
-static inline bool has_unaccepted_memory(void) -{ - return static_branch_unlikely(&zones_with_unaccepted_pages); -} - static bool cond_accept_memory(struct zone *zone, unsigned int order, int alloc_flags) { long to_accept, wmark; bool ret = false;
- if (!has_unaccepted_memory()) - return false; - if (list_empty(&zone->unaccepted_pages)) return false;
@@ -7331,22 +7289,17 @@ static bool __free_unaccepted(struct page *page) { struct zone *zone = page_zone(page); unsigned long flags; - bool first = false;
if (!lazy_accept) return false;
spin_lock_irqsave(&zone->lock, flags); - first = list_empty(&zone->unaccepted_pages); list_add_tail(&page->lru, &zone->unaccepted_pages); account_freepages(zone, MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE); __mod_zone_page_state(zone, NR_UNACCEPTED, MAX_ORDER_NR_PAGES); __SetPageUnaccepted(page); spin_unlock_irqrestore(&zone->lock, flags);
- if (first) - static_branch_inc(&zones_with_unaccepted_pages); - return true; }
linux-stable-mirror@lists.linaro.org