On 12/17/19 3:16 PM, Mina Almasry wrote:
Augments hugetlb_cgroup_charge_cgroup to be able to charge hugetlb usage or hugetlb reservation counter.
Adds a new interface to uncharge a hugetlb_cgroup counter via hugetlb_cgroup_uncharge_counter.
Integrates the counter with hugetlb_cgroup, via hugetlb_cgroup_init, hugetlb_cgroup_have_usage, and hugetlb_cgroup_css_offline.
Signed-off-by: Mina Almasry almasrymina@google.com
Changes in V9:
- Fixed HUGETLB_CGROUP_MIN_ORDER.
- Minor variable name update.
- Moved some init/cleanup code from later patches in the series to this patch.
- Updated reparenting of reservation accounting.
include/linux/hugetlb_cgroup.h | 68 ++++++++++++++--------- mm/hugetlb.c | 19 ++++--- mm/hugetlb_cgroup.c | 98 +++++++++++++++++++++++++--------- 3 files changed, 127 insertions(+), 58 deletions(-)
diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index 063962f6dfc6a..eab8a70d5bcb5 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -20,29 +20,37 @@ struct hugetlb_cgroup; /*
- Minimum page order trackable by hugetlb cgroup.
- At least 3 pages are necessary for all the tracking information.
*/
- At least 4 pages are necessary for all the tracking information.
#define HUGETLB_CGROUP_MIN_ORDER 2
#ifdef CONFIG_CGROUP_HUGETLB
-static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page) +static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page,
bool reserved)
{ VM_BUG_ON_PAGE(!PageHuge(page), page);
if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER) return NULL;
- return (struct hugetlb_cgroup *)page[2].private;
- if (reserved)
return (struct hugetlb_cgroup *)page[3].private;
- else
return (struct hugetlb_cgroup *)page[2].private;
}
-static inline -int set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg) +static inline int set_hugetlb_cgroup(struct page *page,
struct hugetlb_cgroup *h_cg,
bool reservation)
{ VM_BUG_ON_PAGE(!PageHuge(page), page);
if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER) return -1;
- page[2].private = (unsigned long)h_cg;
- if (reservation)
page[3].private = (unsigned long)h_cg;
- else
return 0;page[2].private = (unsigned long)h_cg;
}
@@ -52,26 +60,34 @@ static inline bool hugetlb_cgroup_disabled(void) }
extern int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
struct hugetlb_cgroup **ptr);
struct hugetlb_cgroup **ptr,
bool reserved);
extern void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg,
struct page *page);
struct page *page, bool reserved);
extern void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
struct page *page);
struct page *page, bool reserved);
extern void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
struct hugetlb_cgroup *h_cg);
struct hugetlb_cgroup *h_cg,
bool reserved);
+extern void hugetlb_cgroup_uncharge_counter(struct page_counter *p,
unsigned long nr_pages,
struct cgroup_subsys_state *css);
Do we need a corresponding stub for the !CONFIG_CGROUP_HUGETLB case?
extern void hugetlb_cgroup_file_init(void) __init; extern void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage);
#else -static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page) +static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page,
bool reserved)
{ return NULL; }
-static inline -int set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg) +static inline int set_hugetlb_cgroup(struct page *page,
struct hugetlb_cgroup *h_cg, bool reserved)
{ return 0; } @@ -81,28 +97,30 @@ static inline bool hugetlb_cgroup_disabled(void) return true; }
-static inline int -hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
struct hugetlb_cgroup **ptr)
+static inline int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
struct hugetlb_cgroup **ptr,
bool reserved)
{ return 0; }
-static inline void -hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
struct hugetlb_cgroup *h_cg,
struct page *page)
+static inline void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
struct hugetlb_cgroup *h_cg,
struct page *page,
bool reserved)
{ }
-static inline void -hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, struct page *page) +static inline void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
struct page *page,
bool reserved)
{ }
-static inline void -hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
struct hugetlb_cgroup *h_cg)
+static inline void hugetlb_cgroup_uncharge_cgroup(int idx,
unsigned long nr_pages,
struct hugetlb_cgroup *h_cg,
bool reserved)
{ }
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ac65bb5e38ac2..e6e8240f1718c 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1067,7 +1067,8 @@ static void update_and_free_page(struct hstate *h, struct page *page) 1 << PG_active | 1 << PG_private | 1 << PG_writeback); }
- VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page), page);
- VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page, false), page);
- VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page, true), page); set_compound_page_dtor(page, NULL_COMPOUND_DTOR); set_page_refcounted(page); if (hstate_is_gigantic(h)) {
@@ -1177,8 +1178,8 @@ void free_huge_page(struct page *page)
spin_lock(&hugetlb_lock); clear_page_huge_active(page);
- hugetlb_cgroup_uncharge_page(hstate_index(h),
pages_per_huge_page(h), page);
- hugetlb_cgroup_uncharge_page(hstate_index(h), pages_per_huge_page(h),
page, false);
hugetlb code has been restructured a bit and this now resides in the function __free_huge_page().
if (restore_reserve) h->resv_huge_pages++;
@@ -1204,7 +1205,8 @@ static void prep_new_huge_page(struct hstate *h, struct page *page, int nid) INIT_LIST_HEAD(&page->lru); set_compound_page_dtor(page, HUGETLB_PAGE_DTOR); spin_lock(&hugetlb_lock);
- set_hugetlb_cgroup(page, NULL);
- set_hugetlb_cgroup(page, NULL, false);
- set_hugetlb_cgroup(page, NULL, true); h->nr_huge_pages++; h->nr_huge_pages_node[nid]++; spin_unlock(&hugetlb_lock);
@@ -1990,7 +1992,8 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, gbl_chg = 1; }
- ret = hugetlb_cgroup_charge_cgroup(idx, pages_per_huge_page(h), &h_cg);
- ret = hugetlb_cgroup_charge_cgroup(idx, pages_per_huge_page(h), &h_cg,
if (ret) goto out_subpool_put;false);
@@ -2014,7 +2017,8 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, list_move(&page->lru, &h->hugepage_activelist); /* Fall through */ }
- hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h), h_cg, page);
hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h), h_cg, page,
false);
spin_unlock(&hugetlb_lock);
set_page_private(page, (unsigned long)spool);
@@ -2038,7 +2042,8 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, return page;
out_uncharge_cgroup:
- hugetlb_cgroup_uncharge_cgroup(idx, pages_per_huge_page(h), h_cg);
- hugetlb_cgroup_uncharge_cgroup(idx, pages_per_huge_page(h), h_cg,
false);
out_subpool_put: if (map_chg || avoid_reserve) hugepage_subpool_put_pages(spool, 1); diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index 35415af9ed26f..b03270b0d5833 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -96,8 +96,12 @@ static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg) int idx;
for (idx = 0; idx < hugetlb_max_hstate; idx++) {
if (page_counter_read(&h_cg->hugepage[idx]))
if (page_counter_read(
hugetlb_cgroup_get_counter(h_cg, idx, true)) ||
page_counter_read(
hugetlb_cgroup_get_counter(h_cg, idx, false))) { return true;
} return false;}
} @@ -108,18 +112,32 @@ static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup, int idx;
for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) {
struct page_counter *counter = &h_cgroup->hugepage[idx];
struct page_counter *parent = NULL;
struct page_counter *fault_parent = NULL;
unsigned long limit; int ret;struct page_counter *reserved_parent = NULL;
if (parent_h_cgroup)
parent = &parent_h_cgroup->hugepage[idx];
page_counter_init(counter, parent);
if (parent_h_cgroup) {
fault_parent = hugetlb_cgroup_get_counter(
parent_h_cgroup, idx, false);
reserved_parent = hugetlb_cgroup_get_counter(
parent_h_cgroup, idx, true);
}
page_counter_init(hugetlb_cgroup_get_counter(h_cgroup, idx,
false),
fault_parent);
page_counter_init(hugetlb_cgroup_get_counter(h_cgroup, idx,
true),
reserved_parent);
limit = round_down(PAGE_COUNTER_MAX, 1 << huge_page_order(&hstates[idx]));
ret = page_counter_set_max(counter, limit);
ret = page_counter_set_max(
hugetlb_cgroup_get_counter(h_cgroup, idx, false),
limit);
ret = page_counter_set_max(
VM_BUG_ON(ret);hugetlb_cgroup_get_counter(h_cgroup, idx, true), limit);
The second page_counter_set_max() call overwrites ret before the check in VM_BUG_ON().
} } @@ -149,7 +167,6 @@ static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css) kfree(h_cgroup); }
/*
- Should be called with hugetlb_lock held.
- Since we are holding hugetlb_lock, pages cannot get moved from
@@ -165,7 +182,7 @@ static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg, struct hugetlb_cgroup *page_hcg; struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg);
- page_hcg = hugetlb_cgroup_from_page(page);
- page_hcg = hugetlb_cgroup_from_page(page, false); /*
- We can have pages in active list without any cgroup
- ie, hugepage with less than 3 pages. We can safely
@@ -184,7 +201,7 @@ static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg, /* Take the pages off the local counter */ page_counter_cancel(counter, nr_pages);
- set_hugetlb_cgroup(page, parent);
- set_hugetlb_cgroup(page, parent, false);
out: return; } @@ -227,7 +244,7 @@ static inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx, }
int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
struct hugetlb_cgroup **ptr)
struct hugetlb_cgroup **ptr, bool reserved)
{ int ret = 0; struct page_counter *counter; @@ -250,13 +267,20 @@ int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, } rcu_read_unlock();
- if (!page_counter_try_charge(&h_cg->hugepage[idx], nr_pages,
&counter)) {
- if (!page_counter_try_charge(hugetlb_cgroup_get_counter(h_cg, idx,
reserved),
ret = -ENOMEM; hugetlb_event(hugetlb_cgroup_from_counter(counter, idx), idx, HUGETLB_MAX);nr_pages, &counter)) {
css_put(&h_cg->css);
}goto done;
- css_put(&h_cg->css);
- /* Reservations take a reference to the css because they do not get
* reparented.
I'm hoping someone with more cgroup knowledge can comment on this and any consequences of not reparenting reservations. We previously talked about why reparenting would be very difficult/expensive. I understand why you are nopt doing it. Just do not fully understand what needs to be done from the cgroup side.
Several of the following modifications deal with this. -- Mike kravetz
*/
- if (!reserved)
css_put(&h_cg->css);
done: *ptr = h_cg; return ret; @@ -265,12 +289,12 @@ int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, /* Should be called with hugetlb_lock held */ void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg,
struct page *page)
struct page *page, bool reserved)
{ if (hugetlb_cgroup_disabled() || !h_cg) return;
- set_hugetlb_cgroup(page, h_cg);
- set_hugetlb_cgroup(page, h_cg, reserved); return;
}
@@ -278,23 +302,29 @@ void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
- Should be called with hugetlb_lock held
*/ void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
struct page *page)
struct page *page, bool reserved)
{ struct hugetlb_cgroup *h_cg;
if (hugetlb_cgroup_disabled()) return; lockdep_assert_held(&hugetlb_lock);
- h_cg = hugetlb_cgroup_from_page(page);
- h_cg = hugetlb_cgroup_from_page(page, reserved); if (unlikely(!h_cg)) return;
- set_hugetlb_cgroup(page, NULL);
- page_counter_uncharge(&h_cg->hugepage[idx], nr_pages);
- set_hugetlb_cgroup(page, NULL, reserved);
- page_counter_uncharge(hugetlb_cgroup_get_counter(h_cg, idx, reserved),
nr_pages);
- if (reserved)
css_put(&h_cg->css);
- return;
}
void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
struct hugetlb_cgroup *h_cg)
struct hugetlb_cgroup *h_cg, bool reserved)
{ if (hugetlb_cgroup_disabled() || !h_cg) return; @@ -302,8 +332,22 @@ void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER) return;
- page_counter_uncharge(&h_cg->hugepage[idx], nr_pages);
- return;
- page_counter_uncharge(hugetlb_cgroup_get_counter(h_cg, idx, reserved),
nr_pages);
- if (reserved)
css_put(&h_cg->css);
+}
+void hugetlb_cgroup_uncharge_counter(struct page_counter *p,
unsigned long nr_pages,
struct cgroup_subsys_state *css)
+{
- if (hugetlb_cgroup_disabled() || !p || !css)
return;
- page_counter_uncharge(p, nr_pages);
- css_put(css);
}
enum { @@ -678,6 +722,7 @@ void __init hugetlb_cgroup_file_init(void) void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage) { struct hugetlb_cgroup *h_cg;
struct hugetlb_cgroup *h_cg_reservation; struct hstate *h = page_hstate(oldhpage);
if (hugetlb_cgroup_disabled())
@@ -685,11 +730,12 @@ void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage)
VM_BUG_ON_PAGE(!PageHuge(oldhpage), oldhpage); spin_lock(&hugetlb_lock);
- h_cg = hugetlb_cgroup_from_page(oldhpage);
- set_hugetlb_cgroup(oldhpage, NULL);
h_cg = hugetlb_cgroup_from_page(oldhpage, false);
h_cg_reservation = hugetlb_cgroup_from_page(oldhpage, true);
set_hugetlb_cgroup(oldhpage, NULL, false);
/* move the h_cg details to new cgroup */
- set_hugetlb_cgroup(newhpage, h_cg);
- set_hugetlb_cgroup(newhpage, h_cg_reservation, true); list_move(&newhpage->lru, &h->hugepage_activelist); spin_unlock(&hugetlb_lock); return;
-- 2.24.1.735.g03f4e72817-goog