A new map flag BPF_F_NO_CHARGE is introduced in bpf_attr, with which we can choose not to charge map memory while account it to root memcg only. At the map creation time, we can get the no charge flag from struct bpf_attr directly, while for other paths we can get it from struct bpf_map.
The usecase of this flag is that sometimes we may create bpf maps with a process running in a container (with memcg) but these maps are targeted to the whole system, so we don't want to charge these memory into this container. That will be good for memory resource management for this container, as these shared bpf maps are always pinned which should belong to the system rather than this container. That can also help to make the charging behavior consistent, for example, if we charge the pinned memory into this container, after the contianer restarts these memory will not belong to it any more.
Two helpers are introduced for followup usage.
Signed-off-by: Yafang Shao laoar.shao@gmail.com --- include/linux/bpf.h | 15 ++++++++++++++- include/uapi/linux/bpf.h | 3 +++ kernel/bpf/syscall.c | 1 + tools/include/uapi/linux/bpf.h | 3 +++ 4 files changed, 21 insertions(+), 1 deletion(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 88449fbbe063..07c6603a6c81 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -184,7 +184,8 @@ struct bpf_map { char name[BPF_OBJ_NAME_LEN]; bool bypass_spec_v1; bool frozen; /* write-once; write-protected by freeze_mutex */ - /* 14 bytes hole */ + bool no_charge; /* Don't charge to memcg */ + /* 13 bytes hole */
/* The 3rd and 4th cacheline with misc members to avoid false sharing * particularly with refcounting. @@ -207,6 +208,18 @@ struct bpf_map { } owner; };
+static inline gfp_t +map_flags_no_charge(gfp_t flags, union bpf_attr *attr) +{ + return flags |= (attr->map_flags & BPF_F_NO_CHARGE) ? 0 : __GFP_ACCOUNT; +} + +static inline gfp_t +bpf_flags_no_charge(gfp_t flags, bool no_charge) +{ + return flags |= no_charge ? 0 : __GFP_ACCOUNT; +} + static inline bool map_value_has_spin_lock(const struct bpf_map *map) { return map->spin_lock_off >= 0; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7604e7d5438f..e2dba6cdd88d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1225,6 +1225,9 @@ enum {
/* Create a map that is suitable to be an inner map with dynamic max entries */ BPF_F_INNER_MAP = (1U << 12), + +/* Don't charge memory to memcg */ + BPF_F_NO_CHARGE = (1U << 13), };
/* Flags for BPF_PROG_QUERY. */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index cdaa1152436a..029f04588b1a 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -368,6 +368,7 @@ void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr) map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags); map->numa_node = bpf_map_attr_numa_node(attr); map->map_extra = attr->map_extra; + map->no_charge = attr->map_flags & BPF_F_NO_CHARGE; }
static int bpf_map_alloc_id(struct bpf_map *map) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 7604e7d5438f..e2dba6cdd88d 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1225,6 +1225,9 @@ enum {
/* Create a map that is suitable to be an inner map with dynamic max entries */ BPF_F_INNER_MAP = (1U << 12), + +/* Don't charge memory to memcg */ + BPF_F_NO_CHARGE = (1U << 13), };
/* Flags for BPF_PROG_QUERY. */