Per the discussion with Joonsoo Kim [1], we need check the return value of lookup_page_ext() for all call sites since it might return NULL in some cases, although it is unlikely, i.e. memory hotplug.
Tested with ltp with "page_owner=0".
[1] http://lkml.kernel.org/r/20160519002809.GA10245@js1304-P5Q-DELUXE
Signed-off-by: Yang Shi yang.shi@linaro.org --- include/linux/page_idle.h | 43 ++++++++++++++++++++++++++++++++++++------- mm/page_alloc.c | 6 ++++++ mm/page_owner.c | 27 +++++++++++++++++++++++++++ mm/page_poison.c | 8 +++++++- mm/vmstat.c | 2 ++ 5 files changed, 78 insertions(+), 8 deletions(-)
diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h index bf268fa..8f5d4ad 100644 --- a/include/linux/page_idle.h +++ b/include/linux/page_idle.h @@ -46,33 +46,62 @@ extern struct page_ext_operations page_idle_ops;
static inline bool page_is_young(struct page *page) { - return test_bit(PAGE_EXT_YOUNG, &lookup_page_ext(page)->flags); + struct page_ext *page_ext; + page_ext = lookup_page_ext(page); + if (unlikely(!page_ext) + return false; + + return test_bit(PAGE_EXT_YOUNG, &page_ext->flags); }
static inline void set_page_young(struct page *page) { - set_bit(PAGE_EXT_YOUNG, &lookup_page_ext(page)->flags); + struct page_ext *page_ext; + page_ext = lookup_page_ext(page); + if (unlikely(!page_ext) + return; + + set_bit(PAGE_EXT_YOUNG, &page_ext->flags); }
static inline bool test_and_clear_page_young(struct page *page) { - return test_and_clear_bit(PAGE_EXT_YOUNG, - &lookup_page_ext(page)->flags); + struct page_ext *page_ext; + page_ext = lookup_page_ext(page); + if (unlikely(!page_ext) + return false; + + return test_and_clear_bit(PAGE_EXT_YOUNG, &page_ext->flags); }
static inline bool page_is_idle(struct page *page) { - return test_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags); + struct page_ext *page_ext; + page_ext = lookup_page_ext(page); + if (unlikely(!page_ext) + return false; + + return test_bit(PAGE_EXT_IDLE, &page_ext->flags); }
static inline void set_page_idle(struct page *page) { - set_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags); + struct page_ext *page_ext; + page_ext = lookup_page_ext(page); + if (unlikely(!page_ext) + return; + + set_bit(PAGE_EXT_IDLE, &page_ext->flags); }
static inline void clear_page_idle(struct page *page) { - clear_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags); + struct page_ext *page_ext; + page_ext = lookup_page_ext(page); + if (unlikely(!page_ext) + return; + + clear_bit(PAGE_EXT_IDLE, &page_ext->flags); } #endif /* CONFIG_64BIT */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f8f3bfc..d27e8b9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -656,6 +656,9 @@ static inline void set_page_guard(struct zone *zone, struct page *page, return;
page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + return; + __set_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
INIT_LIST_HEAD(&page->lru); @@ -673,6 +676,9 @@ static inline void clear_page_guard(struct zone *zone, struct page *page, return;
page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + return; + __clear_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
set_page_private(page, 0); diff --git a/mm/page_owner.c b/mm/page_owner.c index 792b56d..902e398 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -55,6 +55,8 @@ void __reset_page_owner(struct page *page, unsigned int order)
for (i = 0; i < (1 << order); i++) { page_ext = lookup_page_ext(page + i); + if (unlikely(!page_ext)) + continue; __clear_bit(PAGE_EXT_OWNER, &page_ext->flags); } } @@ -62,6 +64,10 @@ void __reset_page_owner(struct page *page, unsigned int order) void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) { struct page_ext *page_ext = lookup_page_ext(page); + + if (unlikely(!page_ext)) + return; + struct stack_trace trace = { .nr_entries = 0, .max_entries = ARRAY_SIZE(page_ext->trace_entries), @@ -82,6 +88,8 @@ void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) void __set_page_owner_migrate_reason(struct page *page, int reason) { struct page_ext *page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + return;
page_ext->last_migrate_reason = reason; } @@ -89,6 +97,12 @@ void __set_page_owner_migrate_reason(struct page *page, int reason) gfp_t __get_page_owner_gfp(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + /* + * The caller just returns 0 if no valid gfp + * So return 0 here too. + */ + return 0;
return page_ext->gfp_mask; } @@ -97,6 +111,10 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage) { struct page_ext *old_ext = lookup_page_ext(oldpage); struct page_ext *new_ext = lookup_page_ext(newpage); + + if (unlikely(!old_ext || !new_ext)) + return; + int i;
new_ext->order = old_ext->order; @@ -186,6 +204,11 @@ err: void __dump_page_owner(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) { + pr_alert("There is not page extension available.\n"); + return; + } + struct stack_trace trace = { .nr_entries = page_ext->nr_entries, .entries = &page_ext->trace_entries[0], @@ -251,6 +274,8 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos) }
page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + continue;
/* * Some pages could be missed by concurrent allocation or free, @@ -317,6 +342,8 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone) continue;
page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + continue;
/* Maybe overraping zone */ if (test_bit(PAGE_EXT_OWNER, &page_ext->flags)) diff --git a/mm/page_poison.c b/mm/page_poison.c index 1eae5fa..2e647c6 100644 --- a/mm/page_poison.c +++ b/mm/page_poison.c @@ -54,6 +54,9 @@ static inline void set_page_poison(struct page *page) struct page_ext *page_ext;
page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + return; + __set_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags); }
@@ -62,6 +65,9 @@ static inline void clear_page_poison(struct page *page) struct page_ext *page_ext;
page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + return; + __clear_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags); }
@@ -70,7 +76,7 @@ bool page_is_poisoned(struct page *page) struct page_ext *page_ext;
page_ext = lookup_page_ext(page); - if (!page_ext) + if (unlikely(!page_ext)) return false;
return test_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags); diff --git a/mm/vmstat.c b/mm/vmstat.c index 77e42ef..cb2a67b 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1061,6 +1061,8 @@ static void pagetypeinfo_showmixedcount_print(struct seq_file *m, continue;
page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + continue;
if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) continue;
On Mon, May 23, 2016 at 10:16:08AM -0700, Yang Shi wrote:
Per the discussion with Joonsoo Kim [1], we need check the return value of lookup_page_ext() for all call sites since it might return NULL in some cases, although it is unlikely, i.e. memory hotplug.
Tested with ltp with "page_owner=0".
[1] http://lkml.kernel.org/r/20160519002809.GA10245@js1304-P5Q-DELUXE
Signed-off-by: Yang Shi yang.shi@linaro.org
I didn't read code code in detail to see how page_ext memory space allocated in boot code and memory hotplug but to me, it's not good to check NULL whenever we calls lookup_page_ext.
More dangerous thing is now page_ext is used by optionable feature(ie, not critical for system stability) but if we want to use page_ext as another important tool for the system in future, it could be a serious problem.
Can we put some hooks of page_ext into memory-hotplug so guarantee that page_ext memory space is allocated with memmap space at the same time? IOW, once every PFN wakers find a page is valid, page_ext is valid, too so lookup_page_ext never returns NULL on valid page by design.
I hope we consider this direction, too.
Thanks.
include/linux/page_idle.h | 43 ++++++++++++++++++++++++++++++++++++------- mm/page_alloc.c | 6 ++++++ mm/page_owner.c | 27 +++++++++++++++++++++++++++ mm/page_poison.c | 8 +++++++- mm/vmstat.c | 2 ++ 5 files changed, 78 insertions(+), 8 deletions(-)
diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h index bf268fa..8f5d4ad 100644 --- a/include/linux/page_idle.h +++ b/include/linux/page_idle.h @@ -46,33 +46,62 @@ extern struct page_ext_operations page_idle_ops; static inline bool page_is_young(struct page *page) {
- return test_bit(PAGE_EXT_YOUNG, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return false;
- return test_bit(PAGE_EXT_YOUNG, &page_ext->flags);
} static inline void set_page_young(struct page *page) {
- set_bit(PAGE_EXT_YOUNG, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return;
- set_bit(PAGE_EXT_YOUNG, &page_ext->flags);
} static inline bool test_and_clear_page_young(struct page *page) {
- return test_and_clear_bit(PAGE_EXT_YOUNG,
&lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return false;
- return test_and_clear_bit(PAGE_EXT_YOUNG, &page_ext->flags);
} static inline bool page_is_idle(struct page *page) {
- return test_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return false;
- return test_bit(PAGE_EXT_IDLE, &page_ext->flags);
} static inline void set_page_idle(struct page *page) {
- set_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return;
- set_bit(PAGE_EXT_IDLE, &page_ext->flags);
} static inline void clear_page_idle(struct page *page) {
- clear_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return;
- clear_bit(PAGE_EXT_IDLE, &page_ext->flags);
} #endif /* CONFIG_64BIT */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f8f3bfc..d27e8b9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -656,6 +656,9 @@ static inline void set_page_guard(struct zone *zone, struct page *page, return; page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
- __set_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
INIT_LIST_HEAD(&page->lru); @@ -673,6 +676,9 @@ static inline void clear_page_guard(struct zone *zone, struct page *page, return; page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
- __clear_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
set_page_private(page, 0); diff --git a/mm/page_owner.c b/mm/page_owner.c index 792b56d..902e398 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -55,6 +55,8 @@ void __reset_page_owner(struct page *page, unsigned int order) for (i = 0; i < (1 << order); i++) { page_ext = lookup_page_ext(page + i);
if (unlikely(!page_ext))
__clear_bit(PAGE_EXT_OWNER, &page_ext->flags); }continue;
} @@ -62,6 +64,10 @@ void __reset_page_owner(struct page *page, unsigned int order) void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
- struct stack_trace trace = { .nr_entries = 0, .max_entries = ARRAY_SIZE(page_ext->trace_entries),
@@ -82,6 +88,8 @@ void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) void __set_page_owner_migrate_reason(struct page *page, int reason) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
page_ext->last_migrate_reason = reason; } @@ -89,6 +97,12 @@ void __set_page_owner_migrate_reason(struct page *page, int reason) gfp_t __get_page_owner_gfp(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
/*
* The caller just returns 0 if no valid gfp
* So return 0 here too.
*/
return 0;
return page_ext->gfp_mask; } @@ -97,6 +111,10 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage) { struct page_ext *old_ext = lookup_page_ext(oldpage); struct page_ext *new_ext = lookup_page_ext(newpage);
- if (unlikely(!old_ext || !new_ext))
return;
- int i;
new_ext->order = old_ext->order; @@ -186,6 +204,11 @@ err: void __dump_page_owner(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)) {
pr_alert("There is not page extension available.\n");
return;
- }
- struct stack_trace trace = { .nr_entries = page_ext->nr_entries, .entries = &page_ext->trace_entries[0],
@@ -251,6 +274,8 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos) } page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
continue;
/* * Some pages could be missed by concurrent allocation or free, @@ -317,6 +342,8 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone) continue; page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
continue;
/* Maybe overraping zone */ if (test_bit(PAGE_EXT_OWNER, &page_ext->flags)) diff --git a/mm/page_poison.c b/mm/page_poison.c index 1eae5fa..2e647c6 100644 --- a/mm/page_poison.c +++ b/mm/page_poison.c @@ -54,6 +54,9 @@ static inline void set_page_poison(struct page *page) struct page_ext *page_ext; page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
- __set_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags);
} @@ -62,6 +65,9 @@ static inline void clear_page_poison(struct page *page) struct page_ext *page_ext; page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
- __clear_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags);
} @@ -70,7 +76,7 @@ bool page_is_poisoned(struct page *page) struct page_ext *page_ext; page_ext = lookup_page_ext(page);
- if (!page_ext)
- if (unlikely(!page_ext)) return false;
return test_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags); diff --git a/mm/vmstat.c b/mm/vmstat.c index 77e42ef..cb2a67b 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1061,6 +1061,8 @@ static void pagetypeinfo_showmixedcount_print(struct seq_file *m, continue; page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
continue;
if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) continue; -- 2.0.2
-- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
On Tue, May 24, 2016 at 11:58:11AM +0900, Minchan Kim wrote:
On Mon, May 23, 2016 at 10:16:08AM -0700, Yang Shi wrote:
Per the discussion with Joonsoo Kim [1], we need check the return value of lookup_page_ext() for all call sites since it might return NULL in some cases, although it is unlikely, i.e. memory hotplug.
Tested with ltp with "page_owner=0".
[1] http://lkml.kernel.org/r/20160519002809.GA10245@js1304-P5Q-DELUXE
Signed-off-by: Yang Shi yang.shi@linaro.org
I didn't read code code in detail to see how page_ext memory space allocated in boot code and memory hotplug but to me, it's not good to check NULL whenever we calls lookup_page_ext.
More dangerous thing is now page_ext is used by optionable feature(ie, not critical for system stability) but if we want to use page_ext as another important tool for the system in future, it could be a serious problem.
Can we put some hooks of page_ext into memory-hotplug so guarantee that page_ext memory space is allocated with memmap space at the same time? IOW, once every PFN wakers find a page is valid, page_ext is valid, too so lookup_page_ext never returns NULL on valid page by design.
I hope we consider this direction, too.
Yang, Could you think about this?
Even, your patch was broken, I think. It doesn't work with !CONFIG_DEBUG_VM && !CONFIG_PAGE_POISONING because lookup_page_ext doesn't return NULL in that case.
Thanks.
include/linux/page_idle.h | 43 ++++++++++++++++++++++++++++++++++++------- mm/page_alloc.c | 6 ++++++ mm/page_owner.c | 27 +++++++++++++++++++++++++++ mm/page_poison.c | 8 +++++++- mm/vmstat.c | 2 ++ 5 files changed, 78 insertions(+), 8 deletions(-)
diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h index bf268fa..8f5d4ad 100644 --- a/include/linux/page_idle.h +++ b/include/linux/page_idle.h @@ -46,33 +46,62 @@ extern struct page_ext_operations page_idle_ops; static inline bool page_is_young(struct page *page) {
- return test_bit(PAGE_EXT_YOUNG, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return false;
- return test_bit(PAGE_EXT_YOUNG, &page_ext->flags);
} static inline void set_page_young(struct page *page) {
- set_bit(PAGE_EXT_YOUNG, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return;
- set_bit(PAGE_EXT_YOUNG, &page_ext->flags);
} static inline bool test_and_clear_page_young(struct page *page) {
- return test_and_clear_bit(PAGE_EXT_YOUNG,
&lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return false;
- return test_and_clear_bit(PAGE_EXT_YOUNG, &page_ext->flags);
} static inline bool page_is_idle(struct page *page) {
- return test_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return false;
- return test_bit(PAGE_EXT_IDLE, &page_ext->flags);
} static inline void set_page_idle(struct page *page) {
- set_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return;
- set_bit(PAGE_EXT_IDLE, &page_ext->flags);
} static inline void clear_page_idle(struct page *page) {
- clear_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return;
- clear_bit(PAGE_EXT_IDLE, &page_ext->flags);
} #endif /* CONFIG_64BIT */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f8f3bfc..d27e8b9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -656,6 +656,9 @@ static inline void set_page_guard(struct zone *zone, struct page *page, return; page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
- __set_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
INIT_LIST_HEAD(&page->lru); @@ -673,6 +676,9 @@ static inline void clear_page_guard(struct zone *zone, struct page *page, return; page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
- __clear_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
set_page_private(page, 0); diff --git a/mm/page_owner.c b/mm/page_owner.c index 792b56d..902e398 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -55,6 +55,8 @@ void __reset_page_owner(struct page *page, unsigned int order) for (i = 0; i < (1 << order); i++) { page_ext = lookup_page_ext(page + i);
if (unlikely(!page_ext))
__clear_bit(PAGE_EXT_OWNER, &page_ext->flags); }continue;
} @@ -62,6 +64,10 @@ void __reset_page_owner(struct page *page, unsigned int order) void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
- struct stack_trace trace = { .nr_entries = 0, .max_entries = ARRAY_SIZE(page_ext->trace_entries),
@@ -82,6 +88,8 @@ void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) void __set_page_owner_migrate_reason(struct page *page, int reason) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
page_ext->last_migrate_reason = reason; } @@ -89,6 +97,12 @@ void __set_page_owner_migrate_reason(struct page *page, int reason) gfp_t __get_page_owner_gfp(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
/*
* The caller just returns 0 if no valid gfp
* So return 0 here too.
*/
return 0;
return page_ext->gfp_mask; } @@ -97,6 +111,10 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage) { struct page_ext *old_ext = lookup_page_ext(oldpage); struct page_ext *new_ext = lookup_page_ext(newpage);
- if (unlikely(!old_ext || !new_ext))
return;
- int i;
new_ext->order = old_ext->order; @@ -186,6 +204,11 @@ err: void __dump_page_owner(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)) {
pr_alert("There is not page extension available.\n");
return;
- }
- struct stack_trace trace = { .nr_entries = page_ext->nr_entries, .entries = &page_ext->trace_entries[0],
@@ -251,6 +274,8 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos) } page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
continue;
/* * Some pages could be missed by concurrent allocation or free, @@ -317,6 +342,8 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone) continue; page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
continue;
/* Maybe overraping zone */ if (test_bit(PAGE_EXT_OWNER, &page_ext->flags)) diff --git a/mm/page_poison.c b/mm/page_poison.c index 1eae5fa..2e647c6 100644 --- a/mm/page_poison.c +++ b/mm/page_poison.c @@ -54,6 +54,9 @@ static inline void set_page_poison(struct page *page) struct page_ext *page_ext; page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
- __set_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags);
} @@ -62,6 +65,9 @@ static inline void clear_page_poison(struct page *page) struct page_ext *page_ext; page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
- __clear_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags);
} @@ -70,7 +76,7 @@ bool page_is_poisoned(struct page *page) struct page_ext *page_ext; page_ext = lookup_page_ext(page);
- if (!page_ext)
- if (unlikely(!page_ext)) return false;
return test_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags); diff --git a/mm/vmstat.c b/mm/vmstat.c index 77e42ef..cb2a67b 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1061,6 +1061,8 @@ static void pagetypeinfo_showmixedcount_print(struct seq_file *m, continue; page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
continue;
if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) continue; -- 2.0.2
-- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
-- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
On 5/25/2016 5:37 PM, Minchan Kim wrote:
On Tue, May 24, 2016 at 11:58:11AM +0900, Minchan Kim wrote:
On Mon, May 23, 2016 at 10:16:08AM -0700, Yang Shi wrote:
Per the discussion with Joonsoo Kim [1], we need check the return value of lookup_page_ext() for all call sites since it might return NULL in some cases, although it is unlikely, i.e. memory hotplug.
Tested with ltp with "page_owner=0".
[1] http://lkml.kernel.org/r/20160519002809.GA10245@js1304-P5Q-DELUXE
Signed-off-by: Yang Shi yang.shi@linaro.org
I didn't read code code in detail to see how page_ext memory space allocated in boot code and memory hotplug but to me, it's not good to check NULL whenever we calls lookup_page_ext.
More dangerous thing is now page_ext is used by optionable feature(ie, not critical for system stability) but if we want to use page_ext as another important tool for the system in future, it could be a serious problem.
Can we put some hooks of page_ext into memory-hotplug so guarantee that page_ext memory space is allocated with memmap space at the same time? IOW, once every PFN wakers find a page is valid, page_ext is valid, too so lookup_page_ext never returns NULL on valid page by design.
I hope we consider this direction, too.
Yang, Could you think about this?
Thanks a lot for the suggestion. Sorry for the late reply, I was busy on preparing patches. I do agree this is a direction we should look into, but I haven't got time to think about it deeper. I hope Joonsoo could chime in too since he is the original author for page extension.
Even, your patch was broken, I think. It doesn't work with !CONFIG_DEBUG_VM && !CONFIG_PAGE_POISONING because lookup_page_ext doesn't return NULL in that case.
Actually, I think the #ifdef should be removed if lookup_page_ext() is possible to return NULL. It sounds not make sense returning NULL only when DEBUG_VM is enabled. It should return NULL no matter what debug config is selected. If Joonsoo agrees with me I'm going to come up with a patch to fix it.
Regards, Yang
Thanks.
include/linux/page_idle.h | 43 ++++++++++++++++++++++++++++++++++++------- mm/page_alloc.c | 6 ++++++ mm/page_owner.c | 27 +++++++++++++++++++++++++++ mm/page_poison.c | 8 +++++++- mm/vmstat.c | 2 ++ 5 files changed, 78 insertions(+), 8 deletions(-)
diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h index bf268fa..8f5d4ad 100644 --- a/include/linux/page_idle.h +++ b/include/linux/page_idle.h @@ -46,33 +46,62 @@ extern struct page_ext_operations page_idle_ops;
static inline bool page_is_young(struct page *page) {
- return test_bit(PAGE_EXT_YOUNG, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return false;
- return test_bit(PAGE_EXT_YOUNG, &page_ext->flags);
}
static inline void set_page_young(struct page *page) {
- set_bit(PAGE_EXT_YOUNG, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return;
- set_bit(PAGE_EXT_YOUNG, &page_ext->flags);
}
static inline bool test_and_clear_page_young(struct page *page) {
- return test_and_clear_bit(PAGE_EXT_YOUNG,
&lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return false;
- return test_and_clear_bit(PAGE_EXT_YOUNG, &page_ext->flags);
}
static inline bool page_is_idle(struct page *page) {
- return test_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return false;
- return test_bit(PAGE_EXT_IDLE, &page_ext->flags);
}
static inline void set_page_idle(struct page *page) {
- set_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return;
- set_bit(PAGE_EXT_IDLE, &page_ext->flags);
}
static inline void clear_page_idle(struct page *page) {
- clear_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return;
- clear_bit(PAGE_EXT_IDLE, &page_ext->flags);
} #endif /* CONFIG_64BIT */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f8f3bfc..d27e8b9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -656,6 +656,9 @@ static inline void set_page_guard(struct zone *zone, struct page *page, return;
page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
return;
__set_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
INIT_LIST_HEAD(&page->lru);
@@ -673,6 +676,9 @@ static inline void clear_page_guard(struct zone *zone, struct page *page, return;
page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
return;
__clear_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
set_page_private(page, 0);
diff --git a/mm/page_owner.c b/mm/page_owner.c index 792b56d..902e398 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -55,6 +55,8 @@ void __reset_page_owner(struct page *page, unsigned int order)
for (i = 0; i < (1 << order); i++) { page_ext = lookup_page_ext(page + i);
if (unlikely(!page_ext))
__clear_bit(PAGE_EXT_OWNER, &page_ext->flags); }continue;
} @@ -62,6 +64,10 @@ void __reset_page_owner(struct page *page, unsigned int order) void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
- struct stack_trace trace = { .nr_entries = 0, .max_entries = ARRAY_SIZE(page_ext->trace_entries),
@@ -82,6 +88,8 @@ void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) void __set_page_owner_migrate_reason(struct page *page, int reason) { struct page_ext *page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
return;
page_ext->last_migrate_reason = reason;
} @@ -89,6 +97,12 @@ void __set_page_owner_migrate_reason(struct page *page, int reason) gfp_t __get_page_owner_gfp(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
/*
* The caller just returns 0 if no valid gfp
* So return 0 here too.
*/
return 0;
return page_ext->gfp_mask;
} @@ -97,6 +111,10 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage) { struct page_ext *old_ext = lookup_page_ext(oldpage); struct page_ext *new_ext = lookup_page_ext(newpage);
if (unlikely(!old_ext || !new_ext))
return;
int i;
new_ext->order = old_ext->order;
@@ -186,6 +204,11 @@ err: void __dump_page_owner(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)) {
pr_alert("There is not page extension available.\n");
return;
- }
- struct stack_trace trace = { .nr_entries = page_ext->nr_entries, .entries = &page_ext->trace_entries[0],
@@ -251,6 +274,8 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos) }
page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
continue;
/*
- Some pages could be missed by concurrent allocation or free,
@@ -317,6 +342,8 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone) continue;
page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
continue; /* Maybe overraping zone */ if (test_bit(PAGE_EXT_OWNER, &page_ext->flags))
diff --git a/mm/page_poison.c b/mm/page_poison.c index 1eae5fa..2e647c6 100644 --- a/mm/page_poison.c +++ b/mm/page_poison.c @@ -54,6 +54,9 @@ static inline void set_page_poison(struct page *page) struct page_ext *page_ext;
page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
- __set_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags);
}
@@ -62,6 +65,9 @@ static inline void clear_page_poison(struct page *page) struct page_ext *page_ext;
page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
- __clear_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags);
}
@@ -70,7 +76,7 @@ bool page_is_poisoned(struct page *page) struct page_ext *page_ext;
page_ext = lookup_page_ext(page);
- if (!page_ext)
if (unlikely(!page_ext)) return false;
return test_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags);
diff --git a/mm/vmstat.c b/mm/vmstat.c index 77e42ef..cb2a67b 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1061,6 +1061,8 @@ static void pagetypeinfo_showmixedcount_print(struct seq_file *m, continue;
page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
continue; if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) continue;
-- 2.0.2
-- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
-- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
On Thu, May 26, 2016 at 04:15:28PM -0700, Shi, Yang wrote:
On 5/25/2016 5:37 PM, Minchan Kim wrote:
On Tue, May 24, 2016 at 11:58:11AM +0900, Minchan Kim wrote:
On Mon, May 23, 2016 at 10:16:08AM -0700, Yang Shi wrote:
Per the discussion with Joonsoo Kim [1], we need check the return value of lookup_page_ext() for all call sites since it might return NULL in some cases, although it is unlikely, i.e. memory hotplug.
Tested with ltp with "page_owner=0".
[1] http://lkml.kernel.org/r/20160519002809.GA10245@js1304-P5Q-DELUXE
Signed-off-by: Yang Shi yang.shi@linaro.org
I didn't read code code in detail to see how page_ext memory space allocated in boot code and memory hotplug but to me, it's not good to check NULL whenever we calls lookup_page_ext.
More dangerous thing is now page_ext is used by optionable feature(ie, not critical for system stability) but if we want to use page_ext as another important tool for the system in future, it could be a serious problem.
Can we put some hooks of page_ext into memory-hotplug so guarantee that page_ext memory space is allocated with memmap space at the same time? IOW, once every PFN wakers find a page is valid, page_ext is valid, too so lookup_page_ext never returns NULL on valid page by design.
I hope we consider this direction, too.
Yang, Could you think about this?
Thanks a lot for the suggestion. Sorry for the late reply, I was busy on preparing patches. I do agree this is a direction we should look into, but I haven't got time to think about it deeper. I hope Joonsoo could chime in too since he is the original author for page extension.
Even, your patch was broken, I think. It doesn't work with !CONFIG_DEBUG_VM && !CONFIG_PAGE_POISONING because lookup_page_ext doesn't return NULL in that case.
Actually, I think the #ifdef should be removed if lookup_page_ext() is possible to return NULL. It sounds not make sense returning NULL only when DEBUG_VM is enabled. It should return NULL no matter what debug config is selected. If Joonsoo agrees with me I'm going to come up with a patch to fix it.
I don't know what lock protects race section->page_ext storing/tearing during memory hotplug while random thread accesses pege_ext, for example, kpageflags->page_is_idle.
Please consider that, too if you want to go with this approach.
Regards, Yang
Thanks.
include/linux/page_idle.h | 43 ++++++++++++++++++++++++++++++++++++------- mm/page_alloc.c | 6 ++++++ mm/page_owner.c | 27 +++++++++++++++++++++++++++ mm/page_poison.c | 8 +++++++- mm/vmstat.c | 2 ++ 5 files changed, 78 insertions(+), 8 deletions(-)
diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h index bf268fa..8f5d4ad 100644 --- a/include/linux/page_idle.h +++ b/include/linux/page_idle.h @@ -46,33 +46,62 @@ extern struct page_ext_operations page_idle_ops;
static inline bool page_is_young(struct page *page) {
- return test_bit(PAGE_EXT_YOUNG, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return false;
- return test_bit(PAGE_EXT_YOUNG, &page_ext->flags);
}
static inline void set_page_young(struct page *page) {
- set_bit(PAGE_EXT_YOUNG, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return;
- set_bit(PAGE_EXT_YOUNG, &page_ext->flags);
}
static inline bool test_and_clear_page_young(struct page *page) {
- return test_and_clear_bit(PAGE_EXT_YOUNG,
&lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return false;
- return test_and_clear_bit(PAGE_EXT_YOUNG, &page_ext->flags);
}
static inline bool page_is_idle(struct page *page) {
- return test_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return false;
- return test_bit(PAGE_EXT_IDLE, &page_ext->flags);
}
static inline void set_page_idle(struct page *page) {
- set_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return;
- set_bit(PAGE_EXT_IDLE, &page_ext->flags);
}
static inline void clear_page_idle(struct page *page) {
- clear_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return;
- clear_bit(PAGE_EXT_IDLE, &page_ext->flags);
} #endif /* CONFIG_64BIT */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f8f3bfc..d27e8b9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -656,6 +656,9 @@ static inline void set_page_guard(struct zone *zone, struct page *page, return;
page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
return;
__set_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
INIT_LIST_HEAD(&page->lru);
@@ -673,6 +676,9 @@ static inline void clear_page_guard(struct zone *zone, struct page *page, return;
page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
return;
__clear_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
set_page_private(page, 0);
diff --git a/mm/page_owner.c b/mm/page_owner.c index 792b56d..902e398 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -55,6 +55,8 @@ void __reset_page_owner(struct page *page, unsigned int order)
for (i = 0; i < (1 << order); i++) { page_ext = lookup_page_ext(page + i);
if (unlikely(!page_ext))
__clear_bit(PAGE_EXT_OWNER, &page_ext->flags); }continue;
} @@ -62,6 +64,10 @@ void __reset_page_owner(struct page *page, unsigned int order) void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
- struct stack_trace trace = { .nr_entries = 0, .max_entries = ARRAY_SIZE(page_ext->trace_entries),
@@ -82,6 +88,8 @@ void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) void __set_page_owner_migrate_reason(struct page *page, int reason) { struct page_ext *page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
return;
page_ext->last_migrate_reason = reason;
} @@ -89,6 +97,12 @@ void __set_page_owner_migrate_reason(struct page *page, int reason) gfp_t __get_page_owner_gfp(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
/*
* The caller just returns 0 if no valid gfp
* So return 0 here too.
*/
return 0;
return page_ext->gfp_mask;
} @@ -97,6 +111,10 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage) { struct page_ext *old_ext = lookup_page_ext(oldpage); struct page_ext *new_ext = lookup_page_ext(newpage);
if (unlikely(!old_ext || !new_ext))
return;
int i;
new_ext->order = old_ext->order;
@@ -186,6 +204,11 @@ err: void __dump_page_owner(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)) {
pr_alert("There is not page extension available.\n");
return;
- }
- struct stack_trace trace = { .nr_entries = page_ext->nr_entries, .entries = &page_ext->trace_entries[0],
@@ -251,6 +274,8 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos) }
page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
continue;
/*
- Some pages could be missed by concurrent allocation or free,
@@ -317,6 +342,8 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone) continue;
page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
continue; /* Maybe overraping zone */ if (test_bit(PAGE_EXT_OWNER, &page_ext->flags))
diff --git a/mm/page_poison.c b/mm/page_poison.c index 1eae5fa..2e647c6 100644 --- a/mm/page_poison.c +++ b/mm/page_poison.c @@ -54,6 +54,9 @@ static inline void set_page_poison(struct page *page) struct page_ext *page_ext;
page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
- __set_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags);
}
@@ -62,6 +65,9 @@ static inline void clear_page_poison(struct page *page) struct page_ext *page_ext;
page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
- __clear_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags);
}
@@ -70,7 +76,7 @@ bool page_is_poisoned(struct page *page) struct page_ext *page_ext;
page_ext = lookup_page_ext(page);
- if (!page_ext)
if (unlikely(!page_ext)) return false;
return test_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags);
diff --git a/mm/vmstat.c b/mm/vmstat.c index 77e42ef..cb2a67b 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1061,6 +1061,8 @@ static void pagetypeinfo_showmixedcount_print(struct seq_file *m, continue;
page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
continue; if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) continue;
-- 2.0.2
-- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
-- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
On Fri, May 27, 2016 at 02:14:32PM +0900, Minchan Kim wrote:
On Thu, May 26, 2016 at 04:15:28PM -0700, Shi, Yang wrote:
On 5/25/2016 5:37 PM, Minchan Kim wrote:
On Tue, May 24, 2016 at 11:58:11AM +0900, Minchan Kim wrote:
On Mon, May 23, 2016 at 10:16:08AM -0700, Yang Shi wrote:
Per the discussion with Joonsoo Kim [1], we need check the return value of lookup_page_ext() for all call sites since it might return NULL in some cases, although it is unlikely, i.e. memory hotplug.
Tested with ltp with "page_owner=0".
[1] http://lkml.kernel.org/r/20160519002809.GA10245@js1304-P5Q-DELUXE
Signed-off-by: Yang Shi yang.shi@linaro.org
I didn't read code code in detail to see how page_ext memory space allocated in boot code and memory hotplug but to me, it's not good to check NULL whenever we calls lookup_page_ext.
More dangerous thing is now page_ext is used by optionable feature(ie, not critical for system stability) but if we want to use page_ext as another important tool for the system in future, it could be a serious problem.
Hello, Minchan.
I wonder how pages that isn't managed by kernel yet will cause serious problem. Until onlining, these pages are out of our scope. Any information about them would be useless until it is actually activated. I guess that returning NULL for those pages will not hurt any functionality. Do you have any possible scenario that this causes the serious problem?
And, allocation such memory space doesn't come from free. If someone just add the memory device and don't online it, these memory will be wasted. I don't know if there is such a usecase but it's possible scenario.
Can we put some hooks of page_ext into memory-hotplug so guarantee that page_ext memory space is allocated with memmap space at the same time? IOW, once every PFN wakers find a page is valid, page_ext is valid, too so lookup_page_ext never returns NULL on valid page by design.
I hope we consider this direction, too.
Yang, Could you think about this?
Thanks a lot for the suggestion. Sorry for the late reply, I was busy on preparing patches. I do agree this is a direction we should look into, but I haven't got time to think about it deeper. I hope Joonsoo could chime in too since he is the original author for page extension.
Even, your patch was broken, I think. It doesn't work with !CONFIG_DEBUG_VM && !CONFIG_PAGE_POISONING because lookup_page_ext doesn't return NULL in that case.
Actually, I think the #ifdef should be removed if lookup_page_ext() is possible to return NULL. It sounds not make sense returning NULL only when DEBUG_VM is enabled. It should return NULL no matter what debug config is selected. If Joonsoo agrees with me I'm going to come up with a patch to fix it.
Agreed but let's wait for Minchan's response.
Thanks.
On Fri, May 27, 2016 at 03:08:39PM +0900, Joonsoo Kim wrote:
On Fri, May 27, 2016 at 02:14:32PM +0900, Minchan Kim wrote:
On Thu, May 26, 2016 at 04:15:28PM -0700, Shi, Yang wrote:
On 5/25/2016 5:37 PM, Minchan Kim wrote:
On Tue, May 24, 2016 at 11:58:11AM +0900, Minchan Kim wrote:
On Mon, May 23, 2016 at 10:16:08AM -0700, Yang Shi wrote:
Per the discussion with Joonsoo Kim [1], we need check the return value of lookup_page_ext() for all call sites since it might return NULL in some cases, although it is unlikely, i.e. memory hotplug.
Tested with ltp with "page_owner=0".
[1] http://lkml.kernel.org/r/20160519002809.GA10245@js1304-P5Q-DELUXE
Signed-off-by: Yang Shi yang.shi@linaro.org
I didn't read code code in detail to see how page_ext memory space allocated in boot code and memory hotplug but to me, it's not good to check NULL whenever we calls lookup_page_ext.
More dangerous thing is now page_ext is used by optionable feature(ie, not critical for system stability) but if we want to use page_ext as another important tool for the system in future, it could be a serious problem.
Hello, Minchan.
Hi Joonsoo,
I wonder how pages that isn't managed by kernel yet will cause serious problem. Until onlining, these pages are out of our scope. Any information about them would be useless until it is actually activated. I guess that returning NULL for those pages will not hurt any functionality. Do you have any possible scenario that this causes the serious problem?
I don't have any specific usecase now. That's why I said "in future". And I don't want to argue whether there is possible scenario or not to make the feature useful but if you want, I should write novel. One of example, pop up my mind, xen, hv and even memory_hotplug itself might want to use page_ext for their functionality extension to hook guest pages.
My opinion is that page_ext is extension of struct page so it would be better to allow any operation on struct page without any limitation if we can do it. Whether it's useful or useless depend on random usecase and we don't need to limit that way from the beginning.
However, current design allows deferred page_ext population so any user of page_ext should keep it in mind and should either make fallback plan or don't use page_ext for those cases. If we decide go this way through discussion, at least, we should make such limitation more clear to somewhere in this chance, maybe around page_ext_operation->need comment.
My comment's point is that we should consider that way at least. It's worth to discuss pros and cons, what's the best and what makes that way hesitate if we can't.
And, allocation such memory space doesn't come from free. If someone just add the memory device and don't online it, these memory will be
Here goes several questions. Cced hotplug guys
1. If someone just add the memory device without onlining, kernel code can return pfn_valid == true on the offlined page?
2. If so, it means memmap on offline memory is already populated somewhere. Where is the memmap allocated? part of offlined memory space or other memory?
3. Could we allocate page_ext in part of offline memory space so that it doesn't consume online memory.
wasted. I don't know if there is such a usecase but it's possible scenario.
Can we put some hooks of page_ext into memory-hotplug so guarantee that page_ext memory space is allocated with memmap space at the same time? IOW, once every PFN wakers find a page is valid, page_ext is valid, too so lookup_page_ext never returns NULL on valid page by design.
I hope we consider this direction, too.
Yang, Could you think about this?
Thanks a lot for the suggestion. Sorry for the late reply, I was busy on preparing patches. I do agree this is a direction we should look into, but I haven't got time to think about it deeper. I hope Joonsoo could chime in too since he is the original author for page extension.
Even, your patch was broken, I think. It doesn't work with !CONFIG_DEBUG_VM && !CONFIG_PAGE_POISONING because lookup_page_ext doesn't return NULL in that case.
Actually, I think the #ifdef should be removed if lookup_page_ext() is possible to return NULL. It sounds not make sense returning NULL only when DEBUG_VM is enabled. It should return NULL no matter what debug config is selected. If Joonsoo agrees with me I'm going to come up with a patch to fix it.
Agreed but let's wait for Minchan's response.
If we goes this way, how to guarantee this race?
kpageflags_read stable_page_flags page_is_idle lookup_page_ext section = __pfn_to_section(pfn) offline_pages memory_notify(MEM_OFFLINE) offline_page_ext ms->page_ext = NULL section->page_ext + pfn
Thanks.
On 5/27/2016 1:11 AM, Minchan Kim wrote:
On Fri, May 27, 2016 at 03:08:39PM +0900, Joonsoo Kim wrote:
On Fri, May 27, 2016 at 02:14:32PM +0900, Minchan Kim wrote:
On Thu, May 26, 2016 at 04:15:28PM -0700, Shi, Yang wrote:
On 5/25/2016 5:37 PM, Minchan Kim wrote:
On Tue, May 24, 2016 at 11:58:11AM +0900, Minchan Kim wrote:
On Mon, May 23, 2016 at 10:16:08AM -0700, Yang Shi wrote: > Per the discussion with Joonsoo Kim [1], we need check the return value of > lookup_page_ext() for all call sites since it might return NULL in some cases, > although it is unlikely, i.e. memory hotplug. > > Tested with ltp with "page_owner=0". > > [1] http://lkml.kernel.org/r/20160519002809.GA10245@js1304-P5Q-DELUXE > > Signed-off-by: Yang Shi yang.shi@linaro.org
I didn't read code code in detail to see how page_ext memory space allocated in boot code and memory hotplug but to me, it's not good to check NULL whenever we calls lookup_page_ext.
More dangerous thing is now page_ext is used by optionable feature(ie, not critical for system stability) but if we want to use page_ext as another important tool for the system in future, it could be a serious problem.
Hello, Minchan.
Hi Joonsoo,
I wonder how pages that isn't managed by kernel yet will cause serious problem. Until onlining, these pages are out of our scope. Any information about them would be useless until it is actually activated. I guess that returning NULL for those pages will not hurt any functionality. Do you have any possible scenario that this causes the serious problem?
I don't have any specific usecase now. That's why I said "in future". And I don't want to argue whether there is possible scenario or not to make the feature useful but if you want, I should write novel. One of example, pop up my mind, xen, hv and even memory_hotplug itself might want to use page_ext for their functionality extension to hook guest pages.
My opinion is that page_ext is extension of struct page so it would be better to allow any operation on struct page without any limitation if we can do it. Whether it's useful or useless depend on random usecase and we don't need to limit that way from the beginning.
However, current design allows deferred page_ext population so any user of page_ext should keep it in mind and should either make fallback plan or don't use page_ext for those cases. If we decide go this way through discussion, at least, we should make such limitation more clear to somewhere in this chance, maybe around page_ext_operation->need comment.
My comment's point is that we should consider that way at least. It's worth to discuss pros and cons, what's the best and what makes that way hesitate if we can't.
And, allocation such memory space doesn't come from free. If someone just add the memory device and don't online it, these memory will be
Here goes several questions. Cced hotplug guys
If someone just add the memory device without onlining, kernel code can return pfn_valid == true on the offlined page?
If so, it means memmap on offline memory is already populated somewhere. Where is the memmap allocated? part of offlined memory space or other memory?
- Could we allocate page_ext in part of offline memory space so that
it doesn't consume online memory.
wasted. I don't know if there is such a usecase but it's possible scenario.
Can we put some hooks of page_ext into memory-hotplug so guarantee that page_ext memory space is allocated with memmap space at the same time? IOW, once every PFN wakers find a page is valid, page_ext is valid, too so lookup_page_ext never returns NULL on valid page by design.
I hope we consider this direction, too.
Yang, Could you think about this?
Thanks a lot for the suggestion. Sorry for the late reply, I was busy on preparing patches. I do agree this is a direction we should look into, but I haven't got time to think about it deeper. I hope Joonsoo could chime in too since he is the original author for page extension.
Even, your patch was broken, I think. It doesn't work with !CONFIG_DEBUG_VM && !CONFIG_PAGE_POISONING because lookup_page_ext doesn't return NULL in that case.
Actually, I think the #ifdef should be removed if lookup_page_ext() is possible to return NULL. It sounds not make sense returning NULL only when DEBUG_VM is enabled. It should return NULL no matter what debug config is selected. If Joonsoo agrees with me I'm going to come up with a patch to fix it.
Agreed but let's wait for Minchan's response.
If we goes this way, how to guarantee this race?
Thanks for pointing out this. It sounds reasonable. However, this should be only possible to happen on 32 bit since just 32 bit version page_is_idle() calls lookup_page_ext(), it doesn't do it on 64 bit.
And, such race condition should exist regardless of whether DEBUG_VM is enabled or not, right?
rcu might be good enough to protect it.
A quick fix may look like:
diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h index 8f5d4ad..bf0cd6a 100644 --- a/include/linux/page_idle.h +++ b/include/linux/page_idle.h @@ -77,8 +77,12 @@ static inline bool test_and_clear_page_young(struct page *page) static inline bool page_is_idle(struct page *page) { struct page_ext *page_ext; + + rcu_read_lock(); page_ext = lookup_page_ext(page); + rcu_read_unlock(); + if (unlikely(!page_ext)) return false;
diff --git a/mm/page_ext.c b/mm/page_ext.c index 56b160f..94927c9 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -183,7 +183,6 @@ struct page_ext *lookup_page_ext(struct page *page) { unsigned long pfn = page_to_pfn(page); struct mem_section *section = __pfn_to_section(pfn); -#if defined(CONFIG_DEBUG_VM) || defined(CONFIG_PAGE_POISONING) /* * The sanity checks the page allocator does upon freeing a * page can reach here before the page_ext arrays are @@ -195,7 +194,7 @@ struct page_ext *lookup_page_ext(struct page *page) */ if (!section->page_ext) return NULL; -#endif + return section->page_ext + pfn; }
@@ -279,7 +278,8 @@ static void __free_page_ext(unsigned long pfn) return; base = ms->page_ext + pfn; free_page_ext(base); - ms->page_ext = NULL; + rcu_assign_pointer(ms->page_ext, NULL); + synchronize_rcu(); }
static int __meminit online_page_ext(unsigned long start_pfn,
Thanks, Yang
kpageflags_read stable_page_flags page_is_idle lookup_page_ext section = __pfn_to_section(pfn)
offline_pages memory_notify(MEM_OFFLINE) offline_page_ext ms->page_ext = NULL section->page_ext + pfn
Thanks.
On Fri, May 27, 2016 at 11:16:41AM -0700, Shi, Yang wrote:
<snip>
If we goes this way, how to guarantee this race?
Thanks for pointing out this. It sounds reasonable. However, this should be only possible to happen on 32 bit since just 32 bit version page_is_idle() calls lookup_page_ext(), it doesn't do it on 64 bit.
And, such race condition should exist regardless of whether DEBUG_VM is enabled or not, right?
rcu might be good enough to protect it.
A quick fix may look like:
diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h index 8f5d4ad..bf0cd6a 100644 --- a/include/linux/page_idle.h +++ b/include/linux/page_idle.h @@ -77,8 +77,12 @@ static inline bool test_and_clear_page_young(struct page *page) static inline bool page_is_idle(struct page *page) { struct page_ext *page_ext;
rcu_read_lock(); page_ext = lookup_page_ext(page);
rcu_read_unlock();
- if (unlikely(!page_ext)) return false;
diff --git a/mm/page_ext.c b/mm/page_ext.c index 56b160f..94927c9 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -183,7 +183,6 @@ struct page_ext *lookup_page_ext(struct page *page) { unsigned long pfn = page_to_pfn(page); struct mem_section *section = __pfn_to_section(pfn); -#if defined(CONFIG_DEBUG_VM) || defined(CONFIG_PAGE_POISONING) /* * The sanity checks the page allocator does upon freeing a * page can reach here before the page_ext arrays are @@ -195,7 +194,7 @@ struct page_ext *lookup_page_ext(struct page *page) */ if (!section->page_ext) return NULL; -#endif
return section->page_ext + pfn;
}
@@ -279,7 +278,8 @@ static void __free_page_ext(unsigned long pfn) return; base = ms->page_ext + pfn; free_page_ext(base);
ms->page_ext = NULL;
rcu_assign_pointer(ms->page_ext, NULL);
synchronize_rcu();
How does it fix the problem? I cannot understand your point.
}
static int __meminit online_page_ext(unsigned long start_pfn,
Thanks, Yang
kpageflags_read stable_page_flags page_is_idle lookup_page_ext section = __pfn_to_section(pfn)
offline_pages memory_notify(MEM_OFFLINE) offline_page_ext ms->page_ext = NULL section->page_ext + pfn
Thanks.
On 5/29/2016 11:11 PM, Minchan Kim wrote:
On Fri, May 27, 2016 at 11:16:41AM -0700, Shi, Yang wrote:
<snip>
If we goes this way, how to guarantee this race?
Thanks for pointing out this. It sounds reasonable. However, this should be only possible to happen on 32 bit since just 32 bit version page_is_idle() calls lookup_page_ext(), it doesn't do it on 64 bit.
And, such race condition should exist regardless of whether DEBUG_VM is enabled or not, right?
rcu might be good enough to protect it.
A quick fix may look like:
diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h index 8f5d4ad..bf0cd6a 100644 --- a/include/linux/page_idle.h +++ b/include/linux/page_idle.h @@ -77,8 +77,12 @@ static inline bool test_and_clear_page_young(struct page *page) static inline bool page_is_idle(struct page *page) { struct page_ext *page_ext;
rcu_read_lock(); page_ext = lookup_page_ext(page);
rcu_read_unlock();
- if (unlikely(!page_ext)) return false;
diff --git a/mm/page_ext.c b/mm/page_ext.c index 56b160f..94927c9 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -183,7 +183,6 @@ struct page_ext *lookup_page_ext(struct page *page) { unsigned long pfn = page_to_pfn(page); struct mem_section *section = __pfn_to_section(pfn); -#if defined(CONFIG_DEBUG_VM) || defined(CONFIG_PAGE_POISONING) /* * The sanity checks the page allocator does upon freeing a * page can reach here before the page_ext arrays are @@ -195,7 +194,7 @@ struct page_ext *lookup_page_ext(struct page *page) */ if (!section->page_ext) return NULL; -#endif
return section->page_ext + pfn;
}
@@ -279,7 +278,8 @@ static void __free_page_ext(unsigned long pfn) return; base = ms->page_ext + pfn; free_page_ext(base);
ms->page_ext = NULL;
rcu_assign_pointer(ms->page_ext, NULL);
synchronize_rcu();
How does it fix the problem? I cannot understand your point.
Assigning NULL pointer to page_Ext will be blocked until rcu_read_lock critical section is done, so the lookup and writing operations will be serialized. And, rcu_read_lock disables preempt too.
Yang
}
static int __meminit online_page_ext(unsigned long start_pfn,
Thanks, Yang
kpageflags_read stable_page_flags page_is_idle lookup_page_ext section = __pfn_to_section(pfn)
offline_pages memory_notify(MEM_OFFLINE) offline_page_ext ms->page_ext = NULL section->page_ext + pfn
Thanks.
On Wed, Jun 01, 2016 at 01:40:48PM -0700, Shi, Yang wrote:
On 5/29/2016 11:11 PM, Minchan Kim wrote:
On Fri, May 27, 2016 at 11:16:41AM -0700, Shi, Yang wrote:
<snip>
If we goes this way, how to guarantee this race?
Thanks for pointing out this. It sounds reasonable. However, this should be only possible to happen on 32 bit since just 32 bit version page_is_idle() calls lookup_page_ext(), it doesn't do it on 64 bit.
And, such race condition should exist regardless of whether DEBUG_VM is enabled or not, right?
rcu might be good enough to protect it.
A quick fix may look like:
diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h index 8f5d4ad..bf0cd6a 100644 --- a/include/linux/page_idle.h +++ b/include/linux/page_idle.h @@ -77,8 +77,12 @@ static inline bool test_and_clear_page_young(struct page *page) static inline bool page_is_idle(struct page *page) { struct page_ext *page_ext;
rcu_read_lock(); page_ext = lookup_page_ext(page);
rcu_read_unlock();
- if (unlikely(!page_ext)) return false;
diff --git a/mm/page_ext.c b/mm/page_ext.c index 56b160f..94927c9 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -183,7 +183,6 @@ struct page_ext *lookup_page_ext(struct page *page) { unsigned long pfn = page_to_pfn(page); struct mem_section *section = __pfn_to_section(pfn); -#if defined(CONFIG_DEBUG_VM) || defined(CONFIG_PAGE_POISONING) /* * The sanity checks the page allocator does upon freeing a * page can reach here before the page_ext arrays are @@ -195,7 +194,7 @@ struct page_ext *lookup_page_ext(struct page *page) */ if (!section->page_ext) return NULL; -#endif
return section->page_ext + pfn;
}
@@ -279,7 +278,8 @@ static void __free_page_ext(unsigned long pfn) return; base = ms->page_ext + pfn; free_page_ext(base);
ms->page_ext = NULL;
rcu_assign_pointer(ms->page_ext, NULL);
synchronize_rcu();
How does it fix the problem? I cannot understand your point.
Assigning NULL pointer to page_Ext will be blocked until rcu_read_lock critical section is done, so the lookup and writing operations will be serialized. And, rcu_read_lock disables preempt too.
I meant your rcu_read_lock in page_idle should cover test_bit op. One more thing, you should use rcu_dereference.
As well, please cover memory onlining case I mentioned in another thread as well as memory offlining.
Anyway, to me, every caller of page_ext should prepare lookup_page_ext can return NULL anytime and they should use rcu_read_[un]lock, which is not good. :(
On 6/1/2016 10:00 PM, Minchan Kim wrote:
On Wed, Jun 01, 2016 at 01:40:48PM -0700, Shi, Yang wrote:
On 5/29/2016 11:11 PM, Minchan Kim wrote:
On Fri, May 27, 2016 at 11:16:41AM -0700, Shi, Yang wrote:
<snip>
If we goes this way, how to guarantee this race?
Thanks for pointing out this. It sounds reasonable. However, this should be only possible to happen on 32 bit since just 32 bit version page_is_idle() calls lookup_page_ext(), it doesn't do it on 64 bit.
And, such race condition should exist regardless of whether DEBUG_VM is enabled or not, right?
rcu might be good enough to protect it.
A quick fix may look like:
diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h index 8f5d4ad..bf0cd6a 100644 --- a/include/linux/page_idle.h +++ b/include/linux/page_idle.h @@ -77,8 +77,12 @@ static inline bool test_and_clear_page_young(struct page *page) static inline bool page_is_idle(struct page *page) { struct page_ext *page_ext;
rcu_read_lock(); page_ext = lookup_page_ext(page);
rcu_read_unlock();
- if (unlikely(!page_ext)) return false;
diff --git a/mm/page_ext.c b/mm/page_ext.c index 56b160f..94927c9 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -183,7 +183,6 @@ struct page_ext *lookup_page_ext(struct page *page) { unsigned long pfn = page_to_pfn(page); struct mem_section *section = __pfn_to_section(pfn); -#if defined(CONFIG_DEBUG_VM) || defined(CONFIG_PAGE_POISONING) /* * The sanity checks the page allocator does upon freeing a * page can reach here before the page_ext arrays are @@ -195,7 +194,7 @@ struct page_ext *lookup_page_ext(struct page *page) */ if (!section->page_ext) return NULL; -#endif
return section->page_ext + pfn;
}
@@ -279,7 +278,8 @@ static void __free_page_ext(unsigned long pfn) return; base = ms->page_ext + pfn; free_page_ext(base);
ms->page_ext = NULL;
rcu_assign_pointer(ms->page_ext, NULL);
synchronize_rcu();
How does it fix the problem? I cannot understand your point.
Assigning NULL pointer to page_Ext will be blocked until rcu_read_lock critical section is done, so the lookup and writing operations will be serialized. And, rcu_read_lock disables preempt too.
I meant your rcu_read_lock in page_idle should cover test_bit op.
Yes, definitely. Thanks for catching it.
One more thing, you should use rcu_dereference.
I will check which one is the best since I saw some use rcu_assign_pointer.
As well, please cover memory onlining case I mentioned in another thread as well as memory offlining.
I will look into it too.
Thanks, Yang
Anyway, to me, every caller of page_ext should prepare lookup_page_ext can return NULL anytime and they should use rcu_read_[un]lock, which is not good. :(
On Fri, May 27, 2016 at 05:11:08PM +0900, Minchan Kim wrote:
On Fri, May 27, 2016 at 03:08:39PM +0900, Joonsoo Kim wrote:
On Fri, May 27, 2016 at 02:14:32PM +0900, Minchan Kim wrote:
On Thu, May 26, 2016 at 04:15:28PM -0700, Shi, Yang wrote:
On 5/25/2016 5:37 PM, Minchan Kim wrote:
On Tue, May 24, 2016 at 11:58:11AM +0900, Minchan Kim wrote:
On Mon, May 23, 2016 at 10:16:08AM -0700, Yang Shi wrote: >Per the discussion with Joonsoo Kim [1], we need check the return value of >lookup_page_ext() for all call sites since it might return NULL in some cases, >although it is unlikely, i.e. memory hotplug. > >Tested with ltp with "page_owner=0". > >[1] http://lkml.kernel.org/r/20160519002809.GA10245@js1304-P5Q-DELUXE > >Signed-off-by: Yang Shi yang.shi@linaro.org
I didn't read code code in detail to see how page_ext memory space allocated in boot code and memory hotplug but to me, it's not good to check NULL whenever we calls lookup_page_ext.
More dangerous thing is now page_ext is used by optionable feature(ie, not critical for system stability) but if we want to use page_ext as another important tool for the system in future, it could be a serious problem.
Hello, Minchan.
Hi Joonsoo,
I wonder how pages that isn't managed by kernel yet will cause serious problem. Until onlining, these pages are out of our scope. Any information about them would be useless until it is actually activated. I guess that returning NULL for those pages will not hurt any functionality. Do you have any possible scenario that this causes the serious problem?
I don't have any specific usecase now. That's why I said "in future". And I don't want to argue whether there is possible scenario or not to make the feature useful but if you want, I should write novel. One of example, pop up my mind, xen, hv and even memory_hotplug itself might want to use page_ext for their functionality extension to hook guest pages.
There is no detail so I can't guess how to use it and how it causes the serious problem. But, we can do it when it is really needed.
My opinion is that page_ext is extension of struct page so it would be better to allow any operation on struct page without any limitation if we can do it. Whether it's useful or useless depend on random usecase and we don't need to limit that way from the beginning.
If there is no drawback, it would be a better design. But, we have trade-off that for some case that the memory is added but not onlined, there is memory saving if we allocate page_ext later. So, in current situation that there is no user to require such guarantee, I don't think it's worth doing right now.
However, current design allows deferred page_ext population so any user of page_ext should keep it in mind and should either make fallback plan or don't use page_ext for those cases. If we decide go this way through discussion, at least, we should make such limitation more clear to somewhere in this chance, maybe around page_ext_operation->need comment.
Agreed.
My comment's point is that we should consider that way at least. It's worth to discuss pros and cons, what's the best and what makes that way hesitate if we can't.
Yes, your suggestion would be good for future direction, but, for now, I think that inserting NULL to all callers is right fix.
1) Current design that page_ext is allocated when online is design decision of page_ext to save memory as much as possible. Fixing possible problem within this design decision looks good to me.
2) Maybe, we need to backport fixes because it would crash older kernels. In this case, fix with NULL is easy to backport.
And, allocation such memory space doesn't come from free. If someone just add the memory device and don't online it, these memory will be
Here goes several questions. Cced hotplug guys
If someone just add the memory device without onlining, kernel code can return pfn_valid == true on the offlined page?
AFAIK, yes.
If so, it means memmap on offline memory is already populated somewhere. Where is the memmap allocated? part of offlined memory space or other memory?
Other memory.
- Could we allocate page_ext in part of offline memory space so that
it doesn't consume online memory.
wasted. I don't know if there is such a usecase but it's possible scenario.
Can we put some hooks of page_ext into memory-hotplug so guarantee that page_ext memory space is allocated with memmap space at the same time? IOW, once every PFN wakers find a page is valid, page_ext is valid, too so lookup_page_ext never returns NULL on valid page by design.
I hope we consider this direction, too.
Yang, Could you think about this?
Thanks a lot for the suggestion. Sorry for the late reply, I was busy on preparing patches. I do agree this is a direction we should look into, but I haven't got time to think about it deeper. I hope Joonsoo could chime in too since he is the original author for page extension.
Even, your patch was broken, I think. It doesn't work with !CONFIG_DEBUG_VM && !CONFIG_PAGE_POISONING because lookup_page_ext doesn't return NULL in that case.
Actually, I think the #ifdef should be removed if lookup_page_ext() is possible to return NULL. It sounds not make sense returning NULL only when DEBUG_VM is enabled. It should return NULL no matter what debug config is selected. If Joonsoo agrees with me I'm going to come up with a patch to fix it.
Agreed but let's wait for Minchan's response.
If we goes this way, how to guarantee this race?
kpageflags_read stable_page_flags page_is_idle lookup_page_ext section = __pfn_to_section(pfn)
offline_pages memory_notify(MEM_OFFLINE) offline_page_ext ms->page_ext = NULL section->page_ext + pfn
I think that it is a fundamental problem of memory hotplug. There is similar race with struct page for offlined memory.
kpageflags_read pfn_valid remove_memory stable_page_flags crash!
I already reported similar race problem to memory hotplug guys but didn't get any answer.
lkml.kernel.org/r/20151221031501.GA32524@js1304-P5Q-DELUXE
Thanks.
On Mon, May 30, 2016 at 02:39:06PM +0900, Joonsoo Kim wrote:
On Fri, May 27, 2016 at 05:11:08PM +0900, Minchan Kim wrote:
On Fri, May 27, 2016 at 03:08:39PM +0900, Joonsoo Kim wrote:
On Fri, May 27, 2016 at 02:14:32PM +0900, Minchan Kim wrote:
On Thu, May 26, 2016 at 04:15:28PM -0700, Shi, Yang wrote:
On 5/25/2016 5:37 PM, Minchan Kim wrote:
On Tue, May 24, 2016 at 11:58:11AM +0900, Minchan Kim wrote: >On Mon, May 23, 2016 at 10:16:08AM -0700, Yang Shi wrote: >>Per the discussion with Joonsoo Kim [1], we need check the return value of >>lookup_page_ext() for all call sites since it might return NULL in some cases, >>although it is unlikely, i.e. memory hotplug. >> >>Tested with ltp with "page_owner=0". >> >>[1] http://lkml.kernel.org/r/20160519002809.GA10245@js1304-P5Q-DELUXE >> >>Signed-off-by: Yang Shi yang.shi@linaro.org > >I didn't read code code in detail to see how page_ext memory space >allocated in boot code and memory hotplug but to me, it's not good >to check NULL whenever we calls lookup_page_ext. > >More dangerous thing is now page_ext is used by optionable feature(ie, not >critical for system stability) but if we want to use page_ext as >another important tool for the system in future, >it could be a serious problem.
Hello, Minchan.
Hi Joonsoo,
I wonder how pages that isn't managed by kernel yet will cause serious problem. Until onlining, these pages are out of our scope. Any information about them would be useless until it is actually activated. I guess that returning NULL for those pages will not hurt any functionality. Do you have any possible scenario that this causes the serious problem?
I don't have any specific usecase now. That's why I said "in future". And I don't want to argue whether there is possible scenario or not to make the feature useful but if you want, I should write novel. One of example, pop up my mind, xen, hv and even memory_hotplug itself might want to use page_ext for their functionality extension to hook guest pages.
There is no detail so I can't guess how to use it and how it causes the serious problem. But, we can do it when it is really needed.
My opinion is that page_ext is extension of struct page so it would be better to allow any operation on struct page without any limitation if we can do it. Whether it's useful or useless depend on random usecase and we don't need to limit that way from the beginning.
If there is no drawback, it would be a better design. But, we have trade-off that for some case that the memory is added but not onlined, there is memory saving if we allocate page_ext later. So, in current situation that there is no user to require such guarantee, I don't think it's worth doing right now.
However, current design allows deferred page_ext population so any user of page_ext should keep it in mind and should either make fallback plan or don't use page_ext for those cases. If we decide go this way through discussion, at least, we should make such limitation more clear to somewhere in this chance, maybe around page_ext_operation->need comment.
Agreed.
Okay, We realized from this discussion that by design, guest of page_ext at the meoment should know his page_ext access from the page can be failed so every caller should prepare for it.
Shi, Yang, Please include some comment about that in your patch to prevent further reviewer waste his time with repeating same discussion and client of page_ext can know the limitation.
My comment's point is that we should consider that way at least. It's worth to discuss pros and cons, what's the best and what makes that way hesitate if we can't.
Yes, your suggestion would be good for future direction, but, for now, I think that inserting NULL to all callers is right fix.
- Current design that page_ext is allocated when online is design
decision of page_ext to save memory as much as possible. Fixing possible problem within this design decision looks good to me.
Okay. Shi Yang, please include this comment in your patch, too.
- Maybe, we need to backport fixes because it would crash older
kernels. In this case, fix with NULL is easy to backport.
Agreed.
Then, Shi Yang need to mark the page as stable. Shi, Please resend your patch with hard testing and more better description with marking it as stable. And I know another race problem about Shi's patch. I will reply to the thread.
And, allocation such memory space doesn't come from free. If someone just add the memory device and don't online it, these memory will be
Here goes several questions. Cced hotplug guys
If someone just add the memory device without onlining, kernel code can return pfn_valid == true on the offlined page?
AFAIK, yes.
If so, it means memmap on offline memory is already populated somewhere. Where is the memmap allocated? part of offlined memory space or other memory?
Other memory.
- Could we allocate page_ext in part of offline memory space so that
it doesn't consume online memory.
wasted. I don't know if there is such a usecase but it's possible scenario.
> >Can we put some hooks of page_ext into memory-hotplug so guarantee >that page_ext memory space is allocated with memmap space at the >same time? IOW, once every PFN wakers find a page is valid, page_ext >is valid, too so lookup_page_ext never returns NULL on valid page >by design. > >I hope we consider this direction, too.
Yang, Could you think about this?
Thanks a lot for the suggestion. Sorry for the late reply, I was busy on preparing patches. I do agree this is a direction we should look into, but I haven't got time to think about it deeper. I hope Joonsoo could chime in too since he is the original author for page extension.
Even, your patch was broken, I think. It doesn't work with !CONFIG_DEBUG_VM && !CONFIG_PAGE_POISONING because lookup_page_ext doesn't return NULL in that case.
Actually, I think the #ifdef should be removed if lookup_page_ext() is possible to return NULL. It sounds not make sense returning NULL only when DEBUG_VM is enabled. It should return NULL no matter what debug config is selected. If Joonsoo agrees with me I'm going to come up with a patch to fix it.
Agreed but let's wait for Minchan's response.
If we goes this way, how to guarantee this race?
kpageflags_read stable_page_flags page_is_idle lookup_page_ext section = __pfn_to_section(pfn)
offline_pages memory_notify(MEM_OFFLINE) offline_page_ext ms->page_ext = NULL section->page_ext + pfn
I think that it is a fundamental problem of memory hotplug. There is similar race with struct page for offlined memory.
kpageflags_read pfn_valid
remove_memory stable_page_flags crash!
I already reported similar race problem to memory hotplug guys but didn't get any answer.
lkml.kernel.org/r/20151221031501.GA32524@js1304-P5Q-DELUXE
Who's in charge of memory-hotplug? Kame, Could you nudge him?
Thanks.
On 5/29/2016 11:08 PM, Minchan Kim wrote:
On Mon, May 30, 2016 at 02:39:06PM +0900, Joonsoo Kim wrote:
On Fri, May 27, 2016 at 05:11:08PM +0900, Minchan Kim wrote:
On Fri, May 27, 2016 at 03:08:39PM +0900, Joonsoo Kim wrote:
On Fri, May 27, 2016 at 02:14:32PM +0900, Minchan Kim wrote:
On Thu, May 26, 2016 at 04:15:28PM -0700, Shi, Yang wrote:
On 5/25/2016 5:37 PM, Minchan Kim wrote: > On Tue, May 24, 2016 at 11:58:11AM +0900, Minchan Kim wrote: >> On Mon, May 23, 2016 at 10:16:08AM -0700, Yang Shi wrote: >>> Per the discussion with Joonsoo Kim [1], we need check the return value of >>> lookup_page_ext() for all call sites since it might return NULL in some cases, >>> although it is unlikely, i.e. memory hotplug. >>> >>> Tested with ltp with "page_owner=0". >>> >>> [1] http://lkml.kernel.org/r/20160519002809.GA10245@js1304-P5Q-DELUXE >>> >>> Signed-off-by: Yang Shi yang.shi@linaro.org >> >> I didn't read code code in detail to see how page_ext memory space >> allocated in boot code and memory hotplug but to me, it's not good >> to check NULL whenever we calls lookup_page_ext. >> >> More dangerous thing is now page_ext is used by optionable feature(ie, not >> critical for system stability) but if we want to use page_ext as >> another important tool for the system in future, >> it could be a serious problem.
Hello, Minchan.
Hi Joonsoo,
I wonder how pages that isn't managed by kernel yet will cause serious problem. Until onlining, these pages are out of our scope. Any information about them would be useless until it is actually activated. I guess that returning NULL for those pages will not hurt any functionality. Do you have any possible scenario that this causes the serious problem?
I don't have any specific usecase now. That's why I said "in future". And I don't want to argue whether there is possible scenario or not to make the feature useful but if you want, I should write novel. One of example, pop up my mind, xen, hv and even memory_hotplug itself might want to use page_ext for their functionality extension to hook guest pages.
There is no detail so I can't guess how to use it and how it causes the serious problem. But, we can do it when it is really needed.
My opinion is that page_ext is extension of struct page so it would be better to allow any operation on struct page without any limitation if we can do it. Whether it's useful or useless depend on random usecase and we don't need to limit that way from the beginning.
If there is no drawback, it would be a better design. But, we have trade-off that for some case that the memory is added but not onlined, there is memory saving if we allocate page_ext later. So, in current situation that there is no user to require such guarantee, I don't think it's worth doing right now.
However, current design allows deferred page_ext population so any user of page_ext should keep it in mind and should either make fallback plan or don't use page_ext for those cases. If we decide go this way through discussion, at least, we should make such limitation more clear to somewhere in this chance, maybe around page_ext_operation->need comment.
Agreed.
Okay, We realized from this discussion that by design, guest of page_ext at the meoment should know his page_ext access from the page can be failed so every caller should prepare for it.
Shi, Yang, Please include some comment about that in your patch to prevent further reviewer waste his time with repeating same discussion and client of page_ext can know the limitation.
My comment's point is that we should consider that way at least. It's worth to discuss pros and cons, what's the best and what makes that way hesitate if we can't.
Yes, your suggestion would be good for future direction, but, for now, I think that inserting NULL to all callers is right fix.
- Current design that page_ext is allocated when online is design
decision of page_ext to save memory as much as possible. Fixing possible problem within this design decision looks good to me.
Okay. Shi Yang, please include this comment in your patch, too.
There is already such comment in lookup_page_ext:
* The sanity checks the page allocator does upon freeing a * page can reach here before the page_ext arrays are * allocated when feeding a range of pages to the allocator * for the first time during bootup or memory hotplug.
I will add more details, i.e. newly added but not onlined memory could reach here too.
- Maybe, we need to backport fixes because it would crash older
kernels. In this case, fix with NULL is easy to backport.
Agreed.
Then, Shi Yang need to mark the page as stable.
Agreed.
Shi, Please resend your patch with hard testing and more better description with marking it as stable.
Will come with an incremental patch to remove CONFIG_DEBUG #ifdef and fix the improve the comments since the comments are included by it.
Thanks, Yang
And I know another race problem about Shi's patch. I will reply to the thread.
And, allocation such memory space doesn't come from free. If someone just add the memory device and don't online it, these memory will be
Here goes several questions. Cced hotplug guys
If someone just add the memory device without onlining, kernel code can return pfn_valid == true on the offlined page?
AFAIK, yes.
If so, it means memmap on offline memory is already populated somewhere. Where is the memmap allocated? part of offlined memory space or other memory?
Other memory.
- Could we allocate page_ext in part of offline memory space so that
it doesn't consume online memory.
wasted. I don't know if there is such a usecase but it's possible scenario.
>> >> Can we put some hooks of page_ext into memory-hotplug so guarantee >> that page_ext memory space is allocated with memmap space at the >> same time? IOW, once every PFN wakers find a page is valid, page_ext >> is valid, too so lookup_page_ext never returns NULL on valid page >> by design. >> >> I hope we consider this direction, too. > > Yang, Could you think about this?
Thanks a lot for the suggestion. Sorry for the late reply, I was busy on preparing patches. I do agree this is a direction we should look into, but I haven't got time to think about it deeper. I hope Joonsoo could chime in too since he is the original author for page extension.
> > Even, your patch was broken, I think. > It doesn't work with !CONFIG_DEBUG_VM && !CONFIG_PAGE_POISONING because > lookup_page_ext doesn't return NULL in that case.
Actually, I think the #ifdef should be removed if lookup_page_ext() is possible to return NULL. It sounds not make sense returning NULL only when DEBUG_VM is enabled. It should return NULL no matter what debug config is selected. If Joonsoo agrees with me I'm going to come up with a patch to fix it.
Agreed but let's wait for Minchan's response.
If we goes this way, how to guarantee this race?
kpageflags_read stable_page_flags page_is_idle lookup_page_ext section = __pfn_to_section(pfn)
offline_pages memory_notify(MEM_OFFLINE) offline_page_ext ms->page_ext = NULL section->page_ext + pfn
I think that it is a fundamental problem of memory hotplug. There is similar race with struct page for offlined memory.
kpageflags_read pfn_valid
remove_memory stable_page_flags crash!
I already reported similar race problem to memory hotplug guys but didn't get any answer.
lkml.kernel.org/r/20151221031501.GA32524@js1304-P5Q-DELUXE
Who's in charge of memory-hotplug? Kame, Could you nudge him?
Thanks.
On Thu, 26 May 2016 16:15:28 -0700 "Shi, Yang" yang.shi@linaro.org wrote:
I hope we consider this direction, too.
Yang, Could you think about this?
Thanks a lot for the suggestion. Sorry for the late reply, I was busy on preparing patches. I do agree this is a direction we should look into, but I haven't got time to think about it deeper. I hope Joonsoo could chime in too since he is the original author for page extension.
Even, your patch was broken, I think. It doesn't work with !CONFIG_DEBUG_VM && !CONFIG_PAGE_POISONING because lookup_page_ext doesn't return NULL in that case.
Actually, I think the #ifdef should be removed if lookup_page_ext() is possible to return NULL. It sounds not make sense returning NULL only when DEBUG_VM is enabled. It should return NULL no matter what debug config is selected. If Joonsoo agrees with me I'm going to come up with a patch to fix it.
I've lost the plot here. What is the status of this patch?
Latest version:
From: Yang Shi yang.shi@linaro.org Subject: mm: check the return value of lookup_page_ext for all call sites
Per the discussion with Joonsoo Kim [1], we need check the return value of lookup_page_ext() for all call sites since it might return NULL in some cases, although it is unlikely, i.e. memory hotplug.
Tested with ltp with "page_owner=0".
[1] http://lkml.kernel.org/r/20160519002809.GA10245@js1304-P5Q-DELUXE
[akpm@linux-foundation.org: fix build-breaking typos] [arnd@arndb.de: fix build problems from lookup_page_ext] Link: http://lkml.kernel.org/r/6285269.2CksypHdYp@wuerfel [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/1464023768-31025-1-git-send-email-yang.shi@linaro.o... Signed-off-by: Yang Shi yang.shi@linaro.org Signed-off-by: Arnd Bergmann arnd@arndb.de Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Signed-off-by: Andrew Morton akpm@linux-foundation.org ---
include/linux/page_idle.h | 43 ++++++++++++++++++++++++++++++------ mm/page_alloc.c | 6 +++++ mm/page_owner.c | 26 +++++++++++++++++++++ mm/page_poison.c | 8 +++++- mm/vmstat.c | 2 + 5 files changed, 77 insertions(+), 8 deletions(-)
diff -puN include/linux/page_idle.h~mm-check-the-return-value-of-lookup_page_ext-for-all-call-sites include/linux/page_idle.h --- a/include/linux/page_idle.h~mm-check-the-return-value-of-lookup_page_ext-for-all-call-sites +++ a/include/linux/page_idle.h @@ -46,33 +46,62 @@ extern struct page_ext_operations page_i
static inline bool page_is_young(struct page *page) { - return test_bit(PAGE_EXT_YOUNG, &lookup_page_ext(page)->flags); + struct page_ext *page_ext = lookup_page_ext(page); + + if (unlikely(!page_ext)) + return false; + + return test_bit(PAGE_EXT_YOUNG, &page_ext->flags); }
static inline void set_page_young(struct page *page) { - set_bit(PAGE_EXT_YOUNG, &lookup_page_ext(page)->flags); + struct page_ext *page_ext = lookup_page_ext(page); + + if (unlikely(!page_ext)) + return; + + set_bit(PAGE_EXT_YOUNG, &page_ext->flags); }
static inline bool test_and_clear_page_young(struct page *page) { - return test_and_clear_bit(PAGE_EXT_YOUNG, - &lookup_page_ext(page)->flags); + struct page_ext *page_ext = lookup_page_ext(page); + + if (unlikely(!page_ext)) + return false; + + return test_and_clear_bit(PAGE_EXT_YOUNG, &page_ext->flags); }
static inline bool page_is_idle(struct page *page) { - return test_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags); + struct page_ext *page_ext = lookup_page_ext(page); + + if (unlikely(!page_ext)) + return false; + + return test_bit(PAGE_EXT_IDLE, &page_ext->flags); }
static inline void set_page_idle(struct page *page) { - set_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags); + struct page_ext *page_ext = lookup_page_ext(page); + + if (unlikely(!page_ext)) + return; + + set_bit(PAGE_EXT_IDLE, &page_ext->flags); }
static inline void clear_page_idle(struct page *page) { - clear_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags); + struct page_ext *page_ext = lookup_page_ext(page); + + if (unlikely(!page_ext)) + return; + + clear_bit(PAGE_EXT_IDLE, &page_ext->flags); } #endif /* CONFIG_64BIT */
diff -puN mm/page_alloc.c~mm-check-the-return-value-of-lookup_page_ext-for-all-call-sites mm/page_alloc.c --- a/mm/page_alloc.c~mm-check-the-return-value-of-lookup_page_ext-for-all-call-sites +++ a/mm/page_alloc.c @@ -656,6 +656,9 @@ static inline void set_page_guard(struct return;
page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + return; + __set_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
INIT_LIST_HEAD(&page->lru); @@ -673,6 +676,9 @@ static inline void clear_page_guard(stru return;
page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + return; + __clear_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
set_page_private(page, 0); diff -puN mm/page_owner.c~mm-check-the-return-value-of-lookup_page_ext-for-all-call-sites mm/page_owner.c --- a/mm/page_owner.c~mm-check-the-return-value-of-lookup_page_ext-for-all-call-sites +++ a/mm/page_owner.c @@ -55,6 +55,8 @@ void __reset_page_owner(struct page *pag
for (i = 0; i < (1 << order); i++) { page_ext = lookup_page_ext(page + i); + if (unlikely(!page_ext)) + continue; __clear_bit(PAGE_EXT_OWNER, &page_ext->flags); } } @@ -62,6 +64,7 @@ void __reset_page_owner(struct page *pag void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) { struct page_ext *page_ext = lookup_page_ext(page); + struct stack_trace trace = { .nr_entries = 0, .max_entries = ARRAY_SIZE(page_ext->trace_entries), @@ -69,6 +72,9 @@ void __set_page_owner(struct page *page, .skip = 3, };
+ if (unlikely(!page_ext)) + return; + save_stack_trace(&trace);
page_ext->order = order; @@ -82,6 +88,8 @@ void __set_page_owner(struct page *page, void __set_page_owner_migrate_reason(struct page *page, int reason) { struct page_ext *page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + return;
page_ext->last_migrate_reason = reason; } @@ -89,6 +97,12 @@ void __set_page_owner_migrate_reason(str gfp_t __get_page_owner_gfp(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + /* + * The caller just returns 0 if no valid gfp + * So return 0 here too. + */ + return 0;
return page_ext->gfp_mask; } @@ -99,6 +113,9 @@ void __copy_page_owner(struct page *oldp struct page_ext *new_ext = lookup_page_ext(newpage); int i;
+ if (unlikely(!old_ext || !new_ext)) + return; + new_ext->order = old_ext->order; new_ext->gfp_mask = old_ext->gfp_mask; new_ext->nr_entries = old_ext->nr_entries; @@ -193,6 +210,11 @@ void __dump_page_owner(struct page *page gfp_t gfp_mask = page_ext->gfp_mask; int mt = gfpflags_to_migratetype(gfp_mask);
+ if (unlikely(!page_ext)) { + pr_alert("There is not page extension available.\n"); + return; + } + if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) { pr_alert("page_owner info is not active (free page?)\n"); return; @@ -251,6 +273,8 @@ read_page_owner(struct file *file, char }
page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + continue;
/* * Some pages could be missed by concurrent allocation or free, @@ -317,6 +341,8 @@ static void init_pages_in_zone(pg_data_t continue;
page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + continue;
/* Maybe overraping zone */ if (test_bit(PAGE_EXT_OWNER, &page_ext->flags)) diff -puN mm/page_poison.c~mm-check-the-return-value-of-lookup_page_ext-for-all-call-sites mm/page_poison.c --- a/mm/page_poison.c~mm-check-the-return-value-of-lookup_page_ext-for-all-call-sites +++ a/mm/page_poison.c @@ -54,6 +54,9 @@ static inline void set_page_poison(struc struct page_ext *page_ext;
page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + return; + __set_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags); }
@@ -62,6 +65,9 @@ static inline void clear_page_poison(str struct page_ext *page_ext;
page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + return; + __clear_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags); }
@@ -70,7 +76,7 @@ bool page_is_poisoned(struct page *page) struct page_ext *page_ext;
page_ext = lookup_page_ext(page); - if (!page_ext) + if (unlikely(!page_ext)) return false;
return test_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags); diff -puN mm/vmstat.c~mm-check-the-return-value-of-lookup_page_ext-for-all-call-sites mm/vmstat.c --- a/mm/vmstat.c~mm-check-the-return-value-of-lookup_page_ext-for-all-call-sites +++ a/mm/vmstat.c @@ -1061,6 +1061,8 @@ static void pagetypeinfo_showmixedcount_ continue;
page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + continue;
if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) continue; _
On 5/27/2016 1:02 PM, Andrew Morton wrote:
On Thu, 26 May 2016 16:15:28 -0700 "Shi, Yang" yang.shi@linaro.org wrote:
I hope we consider this direction, too.
Yang, Could you think about this?
Thanks a lot for the suggestion. Sorry for the late reply, I was busy on preparing patches. I do agree this is a direction we should look into, but I haven't got time to think about it deeper. I hope Joonsoo could chime in too since he is the original author for page extension.
Even, your patch was broken, I think. It doesn't work with !CONFIG_DEBUG_VM && !CONFIG_PAGE_POISONING because lookup_page_ext doesn't return NULL in that case.
Actually, I think the #ifdef should be removed if lookup_page_ext() is possible to return NULL. It sounds not make sense returning NULL only when DEBUG_VM is enabled. It should return NULL no matter what debug config is selected. If Joonsoo agrees with me I'm going to come up with a patch to fix it.
I've lost the plot here. What is the status of this patch?
Latest version:
Yes, this is the latest version. We are discussing about some future optimization.
And, Minchan Kim pointed out a possible race condition which exists even before this patch. I proposed a quick fix, as long as they are happy to the fix, I will post it to the mailing list.
Thanks, Yang
From: Yang Shi yang.shi@linaro.org Subject: mm: check the return value of lookup_page_ext for all call sites
Per the discussion with Joonsoo Kim [1], we need check the return value of lookup_page_ext() for all call sites since it might return NULL in some cases, although it is unlikely, i.e. memory hotplug.
Tested with ltp with "page_owner=0".
[1] http://lkml.kernel.org/r/20160519002809.GA10245@js1304-P5Q-DELUXE
[akpm@linux-foundation.org: fix build-breaking typos] [arnd@arndb.de: fix build problems from lookup_page_ext] Link: http://lkml.kernel.org/r/6285269.2CksypHdYp@wuerfel [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/1464023768-31025-1-git-send-email-yang.shi@linaro.o... Signed-off-by: Yang Shi yang.shi@linaro.org Signed-off-by: Arnd Bergmann arnd@arndb.de Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Signed-off-by: Andrew Morton akpm@linux-foundation.org
include/linux/page_idle.h | 43 ++++++++++++++++++++++++++++++------ mm/page_alloc.c | 6 +++++ mm/page_owner.c | 26 +++++++++++++++++++++ mm/page_poison.c | 8 +++++- mm/vmstat.c | 2 + 5 files changed, 77 insertions(+), 8 deletions(-)
diff -puN include/linux/page_idle.h~mm-check-the-return-value-of-lookup_page_ext-for-all-call-sites include/linux/page_idle.h --- a/include/linux/page_idle.h~mm-check-the-return-value-of-lookup_page_ext-for-all-call-sites +++ a/include/linux/page_idle.h @@ -46,33 +46,62 @@ extern struct page_ext_operations page_i
static inline bool page_is_young(struct page *page) {
- return test_bit(PAGE_EXT_YOUNG, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return false;
- return test_bit(PAGE_EXT_YOUNG, &page_ext->flags);
}
static inline void set_page_young(struct page *page) {
- set_bit(PAGE_EXT_YOUNG, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
- set_bit(PAGE_EXT_YOUNG, &page_ext->flags);
}
static inline bool test_and_clear_page_young(struct page *page) {
- return test_and_clear_bit(PAGE_EXT_YOUNG,
&lookup_page_ext(page)->flags);
- struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return false;
- return test_and_clear_bit(PAGE_EXT_YOUNG, &page_ext->flags);
}
static inline bool page_is_idle(struct page *page) {
- return test_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return false;
- return test_bit(PAGE_EXT_IDLE, &page_ext->flags);
}
static inline void set_page_idle(struct page *page) {
- set_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
- set_bit(PAGE_EXT_IDLE, &page_ext->flags);
}
static inline void clear_page_idle(struct page *page) {
- clear_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
- clear_bit(PAGE_EXT_IDLE, &page_ext->flags);
} #endif /* CONFIG_64BIT */
diff -puN mm/page_alloc.c~mm-check-the-return-value-of-lookup_page_ext-for-all-call-sites mm/page_alloc.c --- a/mm/page_alloc.c~mm-check-the-return-value-of-lookup_page_ext-for-all-call-sites +++ a/mm/page_alloc.c @@ -656,6 +656,9 @@ static inline void set_page_guard(struct return;
page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
return;
__set_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
INIT_LIST_HEAD(&page->lru);
@@ -673,6 +676,9 @@ static inline void clear_page_guard(stru return;
page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
return;
__clear_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
set_page_private(page, 0);
diff -puN mm/page_owner.c~mm-check-the-return-value-of-lookup_page_ext-for-all-call-sites mm/page_owner.c --- a/mm/page_owner.c~mm-check-the-return-value-of-lookup_page_ext-for-all-call-sites +++ a/mm/page_owner.c @@ -55,6 +55,8 @@ void __reset_page_owner(struct page *pag
for (i = 0; i < (1 << order); i++) { page_ext = lookup_page_ext(page + i);
if (unlikely(!page_ext))
__clear_bit(PAGE_EXT_OWNER, &page_ext->flags); }continue;
} @@ -62,6 +64,7 @@ void __reset_page_owner(struct page *pag void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) { struct page_ext *page_ext = lookup_page_ext(page);
- struct stack_trace trace = { .nr_entries = 0, .max_entries = ARRAY_SIZE(page_ext->trace_entries),
@@ -69,6 +72,9 @@ void __set_page_owner(struct page *page, .skip = 3, };
if (unlikely(!page_ext))
return;
save_stack_trace(&trace);
page_ext->order = order;
@@ -82,6 +88,8 @@ void __set_page_owner(struct page *page, void __set_page_owner_migrate_reason(struct page *page, int reason) { struct page_ext *page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
return;
page_ext->last_migrate_reason = reason;
} @@ -89,6 +97,12 @@ void __set_page_owner_migrate_reason(str gfp_t __get_page_owner_gfp(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
/*
* The caller just returns 0 if no valid gfp
* So return 0 here too.
*/
return 0;
return page_ext->gfp_mask;
} @@ -99,6 +113,9 @@ void __copy_page_owner(struct page *oldp struct page_ext *new_ext = lookup_page_ext(newpage); int i;
- if (unlikely(!old_ext || !new_ext))
return;
- new_ext->order = old_ext->order; new_ext->gfp_mask = old_ext->gfp_mask; new_ext->nr_entries = old_ext->nr_entries;
@@ -193,6 +210,11 @@ void __dump_page_owner(struct page *page gfp_t gfp_mask = page_ext->gfp_mask; int mt = gfpflags_to_migratetype(gfp_mask);
- if (unlikely(!page_ext)) {
pr_alert("There is not page extension available.\n");
return;
- }
- if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) { pr_alert("page_owner info is not active (free page?)\n"); return;
@@ -251,6 +273,8 @@ read_page_owner(struct file *file, char }
page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
continue;
/*
- Some pages could be missed by concurrent allocation or free,
@@ -317,6 +341,8 @@ static void init_pages_in_zone(pg_data_t continue;
page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
continue; /* Maybe overraping zone */ if (test_bit(PAGE_EXT_OWNER, &page_ext->flags))
diff -puN mm/page_poison.c~mm-check-the-return-value-of-lookup_page_ext-for-all-call-sites mm/page_poison.c --- a/mm/page_poison.c~mm-check-the-return-value-of-lookup_page_ext-for-all-call-sites +++ a/mm/page_poison.c @@ -54,6 +54,9 @@ static inline void set_page_poison(struc struct page_ext *page_ext;
page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
- __set_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags);
}
@@ -62,6 +65,9 @@ static inline void clear_page_poison(str struct page_ext *page_ext;
page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
- __clear_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags);
}
@@ -70,7 +76,7 @@ bool page_is_poisoned(struct page *page) struct page_ext *page_ext;
page_ext = lookup_page_ext(page);
- if (!page_ext)
if (unlikely(!page_ext)) return false;
return test_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags);
diff -puN mm/vmstat.c~mm-check-the-return-value-of-lookup_page_ext-for-all-call-sites mm/vmstat.c --- a/mm/vmstat.c~mm-check-the-return-value-of-lookup_page_ext-for-all-call-sites +++ a/mm/vmstat.c @@ -1061,6 +1061,8 @@ static void pagetypeinfo_showmixedcount_ continue;
page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
continue; if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) continue;
_
On Fri, 27 May 2016 13:17:19 -0700 "Shi, Yang" yang.shi@linaro.org wrote:
Actually, I think the #ifdef should be removed if lookup_page_ext() is possible to return NULL. It sounds not make sense returning NULL only when DEBUG_VM is enabled. It should return NULL no matter what debug config is selected. If Joonsoo agrees with me I'm going to come up with a patch to fix it.
I've lost the plot here. What is the status of this patch?
Latest version:
Yes, this is the latest version. We are discussing about some future optimization.
And, Minchan Kim pointed out a possible race condition which exists even before this patch. I proposed a quick fix, as long as they are happy to the fix, I will post it to the mailing list.
OK, thanks - I've moved it into the for-Linus-next-week queue.
A patch for lookup_page_ext introduced several build errors and warnings, e.g.
mm/page_owner.c: In function '__set_page_owner': mm/page_owner.c:71:2: error: ISO C90 forbids mixed declarations and code [-Werror=declaration-after-statement] include/linux/page_idle.h: In function 'set_page_young': include/linux/page_idle.h:62:3: error: expected ')' before 'return'
This fixes all of them. Please fold into the original patch.
Signed-off-by: Arnd Bergmann arnd@arndb.de Fixes: 38c4fffbad3c ("mm: check the return value of lookup_page_ext for all call sites")
diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h index 569c3a180625..fec40271339f 100644 --- a/include/linux/page_idle.h +++ b/include/linux/page_idle.h @@ -48,7 +48,7 @@ static inline bool page_is_young(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext) + if (unlikely(!page_ext)) return false;
return test_bit(PAGE_EXT_YOUNG, &page_ext->flags); @@ -58,7 +58,7 @@ static inline void set_page_young(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext) + if (unlikely(!page_ext)) return;
set_bit(PAGE_EXT_YOUNG, &page_ext->flags); @@ -68,7 +68,7 @@ static inline bool test_and_clear_page_young(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext) + if (unlikely(!page_ext)) return false;
return test_and_clear_bit(PAGE_EXT_YOUNG, &page_ext->flags); @@ -78,7 +78,7 @@ static inline bool page_is_idle(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext) + if (unlikely(!page_ext)) return false;
return test_bit(PAGE_EXT_IDLE, &page_ext->flags); @@ -88,7 +88,7 @@ static inline void set_page_idle(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext) + if (unlikely(!page_ext)) return;
set_bit(PAGE_EXT_IDLE, &page_ext->flags); @@ -98,7 +98,7 @@ static inline void clear_page_idle(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext) + if (unlikely(!page_ext)) return;
clear_bit(PAGE_EXT_IDLE, &page_ext->flags); diff --git a/mm/page_owner.c b/mm/page_owner.c index 902e39813295..c6cda3e36212 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -65,9 +65,6 @@ void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)) - return; - struct stack_trace trace = { .nr_entries = 0, .max_entries = ARRAY_SIZE(page_ext->trace_entries), @@ -75,6 +72,9 @@ void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) .skip = 3, };
+ if (unlikely(!page_ext)) + return; + save_stack_trace(&trace);
page_ext->order = order; @@ -111,12 +111,11 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage) { struct page_ext *old_ext = lookup_page_ext(oldpage); struct page_ext *new_ext = lookup_page_ext(newpage); + int i;
if (unlikely(!old_ext || !new_ext)) return;
- int i; - new_ext->order = old_ext->order; new_ext->gfp_mask = old_ext->gfp_mask; new_ext->nr_entries = old_ext->nr_entries; @@ -204,11 +203,6 @@ err: void __dump_page_owner(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page); - if (unlikely(!page_ext)) { - pr_alert("There is not page extension available.\n"); - return; - } - struct stack_trace trace = { .nr_entries = page_ext->nr_entries, .entries = &page_ext->trace_entries[0], @@ -216,6 +210,11 @@ void __dump_page_owner(struct page *page) gfp_t gfp_mask = page_ext->gfp_mask; int mt = gfpflags_to_migratetype(gfp_mask);
+ if (unlikely(!page_ext)) { + pr_alert("There is not page extension available.\n"); + return; + } + if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) { pr_alert("page_owner info is not active (free page?)\n"); return;
Hi Arnd,
Thanks a lot for the patch. My bad, sorry for the inconvenience. I omitted the specific page_idle change is for 32 bit only.
And, my host compiler looks old which is still 4.8 so it might not catch the warning. I will update my compiler.
Regards, Yang
On 5/24/2016 3:08 AM, Arnd Bergmann wrote:
A patch for lookup_page_ext introduced several build errors and warnings, e.g.
mm/page_owner.c: In function '__set_page_owner': mm/page_owner.c:71:2: error: ISO C90 forbids mixed declarations and code [-Werror=declaration-after-statement] include/linux/page_idle.h: In function 'set_page_young': include/linux/page_idle.h:62:3: error: expected ')' before 'return'
This fixes all of them. Please fold into the original patch.
Signed-off-by: Arnd Bergmann arnd@arndb.de Fixes: 38c4fffbad3c ("mm: check the return value of lookup_page_ext for all call sites")
diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h index 569c3a180625..fec40271339f 100644 --- a/include/linux/page_idle.h +++ b/include/linux/page_idle.h @@ -48,7 +48,7 @@ static inline bool page_is_young(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
if (unlikely(!page_ext)) return false;
return test_bit(PAGE_EXT_YOUNG, &page_ext->flags);
@@ -58,7 +58,7 @@ static inline void set_page_young(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
if (unlikely(!page_ext)) return;
set_bit(PAGE_EXT_YOUNG, &page_ext->flags);
@@ -68,7 +68,7 @@ static inline bool test_and_clear_page_young(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
if (unlikely(!page_ext)) return false;
return test_and_clear_bit(PAGE_EXT_YOUNG, &page_ext->flags);
@@ -78,7 +78,7 @@ static inline bool page_is_idle(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
if (unlikely(!page_ext)) return false;
return test_bit(PAGE_EXT_IDLE, &page_ext->flags);
@@ -88,7 +88,7 @@ static inline void set_page_idle(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
if (unlikely(!page_ext)) return;
set_bit(PAGE_EXT_IDLE, &page_ext->flags);
@@ -98,7 +98,7 @@ static inline void clear_page_idle(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
if (unlikely(!page_ext)) return;
clear_bit(PAGE_EXT_IDLE, &page_ext->flags);
diff --git a/mm/page_owner.c b/mm/page_owner.c index 902e39813295..c6cda3e36212 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -65,9 +65,6 @@ void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
- struct stack_trace trace = { .nr_entries = 0, .max_entries = ARRAY_SIZE(page_ext->trace_entries),
@@ -75,6 +72,9 @@ void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) .skip = 3, };
if (unlikely(!page_ext))
return;
save_stack_trace(&trace);
page_ext->order = order;
@@ -111,12 +111,11 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage) { struct page_ext *old_ext = lookup_page_ext(oldpage); struct page_ext *new_ext = lookup_page_ext(newpage);
int i;
if (unlikely(!old_ext || !new_ext)) return;
- int i;
- new_ext->order = old_ext->order; new_ext->gfp_mask = old_ext->gfp_mask; new_ext->nr_entries = old_ext->nr_entries;
@@ -204,11 +203,6 @@ err: void __dump_page_owner(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)) {
pr_alert("There is not page extension available.\n");
return;
- }
- struct stack_trace trace = { .nr_entries = page_ext->nr_entries, .entries = &page_ext->trace_entries[0],
@@ -216,6 +210,11 @@ void __dump_page_owner(struct page *page) gfp_t gfp_mask = page_ext->gfp_mask; int mt = gfpflags_to_migratetype(gfp_mask);
- if (unlikely(!page_ext)) {
pr_alert("There is not page extension available.\n");
return;
- }
- if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) { pr_alert("page_owner info is not active (free page?)\n"); return;
On 5/23/2016 10:16 AM, Yang Shi wrote:
Per the discussion with Joonsoo Kim [1], we need check the return value of lookup_page_ext() for all call sites since it might return NULL in some cases, although it is unlikely, i.e. memory hotplug.
Tested with ltp with "page_owner=0".
[1] http://lkml.kernel.org/r/20160519002809.GA10245@js1304-P5Q-DELUXE
Signed-off-by: Yang Shi yang.shi@linaro.org
include/linux/page_idle.h | 43 ++++++++++++++++++++++++++++++++++++------- mm/page_alloc.c | 6 ++++++ mm/page_owner.c | 27 +++++++++++++++++++++++++++ mm/page_poison.c | 8 +++++++- mm/vmstat.c | 2 ++ 5 files changed, 78 insertions(+), 8 deletions(-)
diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h index bf268fa..8f5d4ad 100644 --- a/include/linux/page_idle.h +++ b/include/linux/page_idle.h @@ -46,33 +46,62 @@ extern struct page_ext_operations page_idle_ops; static inline bool page_is_young(struct page *page) {
- return test_bit(PAGE_EXT_YOUNG, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return false;
- return test_bit(PAGE_EXT_YOUNG, &page_ext->flags); }
static inline void set_page_young(struct page *page) {
- set_bit(PAGE_EXT_YOUNG, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return;
- set_bit(PAGE_EXT_YOUNG, &page_ext->flags); }
static inline bool test_and_clear_page_young(struct page *page) {
- return test_and_clear_bit(PAGE_EXT_YOUNG,
&lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return false;
- return test_and_clear_bit(PAGE_EXT_YOUNG, &page_ext->flags); }
static inline bool page_is_idle(struct page *page) {
- return test_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return false;
- return test_bit(PAGE_EXT_IDLE, &page_ext->flags); }
static inline void set_page_idle(struct page *page) {
- set_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return;
- set_bit(PAGE_EXT_IDLE, &page_ext->flags); }
static inline void clear_page_idle(struct page *page) {
- clear_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return;
- clear_bit(PAGE_EXT_IDLE, &page_ext->flags); } #endif /* CONFIG_64BIT */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f8f3bfc..d27e8b9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -656,6 +656,9 @@ static inline void set_page_guard(struct zone *zone, struct page *page, return; page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
- __set_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
INIT_LIST_HEAD(&page->lru); @@ -673,6 +676,9 @@ static inline void clear_page_guard(struct zone *zone, struct page *page, return; page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
- __clear_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
set_page_private(page, 0); diff --git a/mm/page_owner.c b/mm/page_owner.c index 792b56d..902e398 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -55,6 +55,8 @@ void __reset_page_owner(struct page *page, unsigned int order) for (i = 0; i < (1 << order); i++) { page_ext = lookup_page_ext(page + i);
if (unlikely(!page_ext))
__clear_bit(PAGE_EXT_OWNER, &page_ext->flags); } }continue;
@@ -62,6 +64,10 @@ void __reset_page_owner(struct page *page, unsigned int order) void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
- struct stack_trace trace = { .nr_entries = 0, .max_entries = ARRAY_SIZE(page_ext->trace_entries),
@@ -82,6 +88,8 @@ void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) void __set_page_owner_migrate_reason(struct page *page, int reason) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
page_ext->last_migrate_reason = reason; } @@ -89,6 +97,12 @@ void __set_page_owner_migrate_reason(struct page *page, int reason) gfp_t __get_page_owner_gfp(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
/*
* The caller just returns 0 if no valid gfp
* So return 0 here too.
*/
return 0;
return page_ext->gfp_mask; } @@ -97,6 +111,10 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage) { struct page_ext *old_ext = lookup_page_ext(oldpage); struct page_ext *new_ext = lookup_page_ext(newpage);
- if (unlikely(!old_ext || !new_ext))
return;
- int i;
new_ext->order = old_ext->order; @@ -186,6 +204,11 @@ err: void __dump_page_owner(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)) {
pr_alert("There is not page extension available.\n");
return;
- }
- struct stack_trace trace = { .nr_entries = page_ext->nr_entries, .entries = &page_ext->trace_entries[0],
@@ -251,6 +274,8 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos) } page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
continue;
/* * Some pages could be missed by concurrent allocation or free, @@ -317,6 +342,8 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone) continue; page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
continue;
/* Maybe overraping zone */ if (test_bit(PAGE_EXT_OWNER, &page_ext->flags)) diff --git a/mm/page_poison.c b/mm/page_poison.c index 1eae5fa..2e647c6 100644 --- a/mm/page_poison.c +++ b/mm/page_poison.c @@ -54,6 +54,9 @@ static inline void set_page_poison(struct page *page) struct page_ext *page_ext; page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
- __set_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags); }
@@ -62,6 +65,9 @@ static inline void clear_page_poison(struct page *page) struct page_ext *page_ext; page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
- __clear_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags); }
@@ -70,7 +76,7 @@ bool page_is_poisoned(struct page *page) struct page_ext *page_ext; page_ext = lookup_page_ext(page);
- if (!page_ext)
- if (unlikely(!page_ext)) return false;
return test_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags); diff --git a/mm/vmstat.c b/mm/vmstat.c index 77e42ef..cb2a67b 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1061,6 +1061,8 @@ static void pagetypeinfo_showmixedcount_print(struct seq_file *m, continue; page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
continue;
if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) continue;
if (unlikely(!page_ext) /* Breaks the build */
static inline bool page_is_young(struct page *page) { - return test_bit(PAGE_EXT_YOUNG, &lookup_page_ext(page)->flags); + struct page_ext *page_ext; + page_ext = lookup_page_ext(page); + if (unlikely(!page_ext) + return false; + + return test_bit(PAGE_EXT_YOUNG, &page_ext->flags); }
Thanks Shakil
On Mon, May 23, 2016 at 10:16:08AM -0700, Yang Shi wrote:
Per the discussion with Joonsoo Kim [1], we need check the return value of lookup_page_ext() for all call sites since it might return NULL in some cases, although it is unlikely, i.e. memory hotplug.
Tested with ltp with "page_owner=0".
[1] http://lkml.kernel.org/r/20160519002809.GA10245@js1304-P5Q-DELUXE
Signed-off-by: Yang Shi yang.shi@linaro.org
include/linux/page_idle.h | 43 ++++++++++++++++++++++++++++++++++++------- mm/page_alloc.c | 6 ++++++ mm/page_owner.c | 27 +++++++++++++++++++++++++++ mm/page_poison.c | 8 +++++++- mm/vmstat.c | 2 ++ 5 files changed, 78 insertions(+), 8 deletions(-)
diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h index bf268fa..8f5d4ad 100644 --- a/include/linux/page_idle.h +++ b/include/linux/page_idle.h @@ -46,33 +46,62 @@ extern struct page_ext_operations page_idle_ops; static inline bool page_is_young(struct page *page) {
- return test_bit(PAGE_EXT_YOUNG, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return false;
- return test_bit(PAGE_EXT_YOUNG, &page_ext->flags);
} static inline void set_page_young(struct page *page) {
- set_bit(PAGE_EXT_YOUNG, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return;
- set_bit(PAGE_EXT_YOUNG, &page_ext->flags);
} static inline bool test_and_clear_page_young(struct page *page) {
- return test_and_clear_bit(PAGE_EXT_YOUNG,
&lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return false;
- return test_and_clear_bit(PAGE_EXT_YOUNG, &page_ext->flags);
} static inline bool page_is_idle(struct page *page) {
- return test_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return false;
- return test_bit(PAGE_EXT_IDLE, &page_ext->flags);
} static inline void set_page_idle(struct page *page) {
- set_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return;
- set_bit(PAGE_EXT_IDLE, &page_ext->flags);
} static inline void clear_page_idle(struct page *page) {
- clear_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags);
- struct page_ext *page_ext;
- page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)
return;
- clear_bit(PAGE_EXT_IDLE, &page_ext->flags);
} #endif /* CONFIG_64BIT */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f8f3bfc..d27e8b9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -656,6 +656,9 @@ static inline void set_page_guard(struct zone *zone, struct page *page, return; page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
- __set_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
INIT_LIST_HEAD(&page->lru); @@ -673,6 +676,9 @@ static inline void clear_page_guard(struct zone *zone, struct page *page, return; page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
- __clear_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
set_page_private(page, 0); diff --git a/mm/page_owner.c b/mm/page_owner.c index 792b56d..902e398 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -55,6 +55,8 @@ void __reset_page_owner(struct page *page, unsigned int order) for (i = 0; i < (1 << order); i++) { page_ext = lookup_page_ext(page + i);
if (unlikely(!page_ext))
__clear_bit(PAGE_EXT_OWNER, &page_ext->flags); }continue;
} @@ -62,6 +64,10 @@ void __reset_page_owner(struct page *page, unsigned int order) void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
- struct stack_trace trace = { .nr_entries = 0, .max_entries = ARRAY_SIZE(page_ext->trace_entries),
@@ -82,6 +88,8 @@ void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) void __set_page_owner_migrate_reason(struct page *page, int reason) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
page_ext->last_migrate_reason = reason; } @@ -89,6 +97,12 @@ void __set_page_owner_migrate_reason(struct page *page, int reason) gfp_t __get_page_owner_gfp(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
/*
* The caller just returns 0 if no valid gfp
* So return 0 here too.
*/
return 0;
return page_ext->gfp_mask; } @@ -97,6 +111,10 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage) { struct page_ext *old_ext = lookup_page_ext(oldpage); struct page_ext *new_ext = lookup_page_ext(newpage);
- if (unlikely(!old_ext || !new_ext))
return;
- int i;
new_ext->order = old_ext->order; @@ -186,6 +204,11 @@ err: void __dump_page_owner(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext)) {
pr_alert("There is not page extension available.\n");
return;
- }
- struct stack_trace trace = { .nr_entries = page_ext->nr_entries, .entries = &page_ext->trace_entries[0],
@@ -251,6 +274,8 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos) } page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
continue;
/* * Some pages could be missed by concurrent allocation or free, @@ -317,6 +342,8 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone) continue; page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
continue;
/* Maybe overraping zone */ if (test_bit(PAGE_EXT_OWNER, &page_ext->flags)) diff --git a/mm/page_poison.c b/mm/page_poison.c index 1eae5fa..2e647c6 100644 --- a/mm/page_poison.c +++ b/mm/page_poison.c @@ -54,6 +54,9 @@ static inline void set_page_poison(struct page *page) struct page_ext *page_ext; page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
- __set_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags);
} @@ -62,6 +65,9 @@ static inline void clear_page_poison(struct page *page) struct page_ext *page_ext; page_ext = lookup_page_ext(page);
- if (unlikely(!page_ext))
return;
- __clear_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags);
} @@ -70,7 +76,7 @@ bool page_is_poisoned(struct page *page) struct page_ext *page_ext; page_ext = lookup_page_ext(page);
- if (!page_ext)
- if (unlikely(!page_ext)) return false;
return test_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags); diff --git a/mm/vmstat.c b/mm/vmstat.c index 77e42ef..cb2a67b 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1061,6 +1061,8 @@ static void pagetypeinfo_showmixedcount_print(struct seq_file *m, continue; page_ext = lookup_page_ext(page);
if (unlikely(!page_ext))
continue;
if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) continue; --
There is another race with here.
init_section_page_ext < reordering > section->page_ext = base - pfn
lookup_page_ext if (section->page_ext) section->page_ext + pfn base = alloc_page_ext
I guess alloc_page_ext doesn't make sure to prevent such reordering so caller of memory allocating APIB should provide right memory barrier.
linaro-kernel@lists.linaro.org