From: Shaun Tancheff shaun.tancheff@hpe.com
For users that are unable to update to memcg-v2 this provides a method where memcg-v1 can more effectively apply enough memory pressure to effectively throttle filesystem I/O or otherwise minimize being memcg oom killed at the expense of reduced performance.
This patch extends the memcg-v1 legacy sysfs entries with: limit_in_bytes.min, limit_in_bytes.low and limit_in_bytes.high Since old software will need to be updated to take advantage of the new files a secondary method of setting min, low and high based on a percentage of the limit is also provided. The percentages are determined by module parameters.
The available module parameters can be set at kernel boot time, for example: memcontrol.memcg_min=10 memcontrol.memcg_low=30 memcontrol.memcg_high=80
Would set min to 10%, low to 30% and high to 80% of the value written to: /sys/fs/cgroup/memory/<grp>/memory.limit_in_bytes
Signed-off-by: Shaun Tancheff shaun.tancheff@hpe.com --- mm/memcontrol.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 83 insertions(+), 1 deletion(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 5abffe6f8389..eec6e6ed92f8 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -73,6 +73,18 @@
#include <trace/events/vmscan.h>
+static unsigned int memcg_v1_min_default_percent; +module_param_named(memcg_min, memcg_v1_min_default_percent, uint, 0600); +MODULE_PARM_DESC(memcg_min, "memcg v1 min default percent"); + +static unsigned int memcg_v1_low_default_percent; +module_param_named(memcg_low, memcg_v1_low_default_percent, uint, 0600); +MODULE_PARM_DESC(memcg_low, "memcg v1 low default percent"); + +static unsigned int memcg_v1_high_default_percent; +module_param_named(memcg_high, memcg_v1_high_default_percent, uint, 0600); +MODULE_PARM_DESC(memcg_high, "memcg v1 high default percent"); + struct cgroup_subsys memory_cgrp_subsys __read_mostly; EXPORT_SYMBOL(memory_cgrp_subsys);
@@ -208,6 +220,7 @@ enum res_type { _MEMSWAP, _KMEM, _TCP, + _MEM_V1, };
#define MEMFILE_PRIVATE(x, val) ((x) << 16 | (val)) @@ -3689,6 +3702,9 @@ enum { RES_MAX_USAGE, RES_FAILCNT, RES_SOFT_LIMIT, + RES_LIMIT_MIN, + RES_LIMIT_LOW, + RES_LIMIT_HIGH, };
static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css, @@ -3699,6 +3715,7 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
switch (MEMFILE_TYPE(cft->private)) { case _MEM: + case _MEM_V1: counter = &memcg->memory; break; case _MEMSWAP: @@ -3729,6 +3746,12 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css, return counter->failcnt; case RES_SOFT_LIMIT: return (u64)memcg->soft_limit * PAGE_SIZE; + case RES_LIMIT_MIN: + return (u64)READ_ONCE(memcg->memory.min); + case RES_LIMIT_LOW: + return (u64)READ_ONCE(memcg->memory.low); + case RES_LIMIT_HIGH: + return (u64)READ_ONCE(memcg->memory.high); default: BUG(); } @@ -3828,6 +3851,35 @@ static int memcg_update_tcp_max(struct mem_cgroup *memcg, unsigned long max) return ret; }
+static inline void mem_cgroup_v1_set_defaults(struct mem_cgroup *memcg, + u64 nr_pages) +{ + u64 max = (u64)(PAGE_COUNTER_MAX * PAGE_SIZE) / PAGE_SIZE; + u64 min, low, high; + + if (mem_cgroup_is_root(memcg) || max == nr_pages) + return; + + min = READ_ONCE(memcg->memory.min); + low = READ_ONCE(memcg->memory.low); + if (min || low) + return; + + if (!min && memcg_v1_min_default_percent) { + min = (nr_pages * memcg_v1_min_default_percent) / 100; + page_counter_set_min(&memcg->memory, min); + } + if (!low && memcg_v1_low_default_percent) { + low = (nr_pages * memcg_v1_low_default_percent) / 100; + page_counter_set_low(&memcg->memory, low); + } + high = READ_ONCE(memcg->memory.high); + if (high == PAGE_COUNTER_MAX && memcg_v1_high_default_percent) { + high = (nr_pages * memcg_v1_high_default_percent) / 100; + page_counter_set_high(&memcg->memory, high); + } +} + /* * The user of this function is... * RES_LIMIT. @@ -3851,6 +3903,11 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of, break; } switch (MEMFILE_TYPE(of_cft(of)->private)) { + case _MEM_V1: + ret = mem_cgroup_resize_max(memcg, nr_pages, false); + if (!ret) + mem_cgroup_v1_set_defaults(memcg, nr_pages); + break; case _MEM: ret = mem_cgroup_resize_max(memcg, nr_pages, false); break; @@ -4999,6 +5056,13 @@ static int mem_cgroup_slab_show(struct seq_file *m, void *p) } #endif
+static ssize_t memory_min_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off); +static ssize_t memory_low_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off); +static ssize_t memory_high_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off); + static struct cftype mem_cgroup_legacy_files[] = { { .name = "usage_in_bytes", @@ -5013,10 +5077,28 @@ static struct cftype mem_cgroup_legacy_files[] = { }, { .name = "limit_in_bytes", - .private = MEMFILE_PRIVATE(_MEM, RES_LIMIT), + .private = MEMFILE_PRIVATE(_MEM_V1, RES_LIMIT), .write = mem_cgroup_write, .read_u64 = mem_cgroup_read_u64, }, + { + .name = "limit_in_bytes.min", + .private = MEMFILE_PRIVATE(_MEM_V1, RES_LIMIT_MIN), + .write = memory_min_write, + .read_u64 = mem_cgroup_read_u64, + }, + { + .name = "limit_in_bytes.low", + .private = MEMFILE_PRIVATE(_MEM_V1, RES_LIMIT_LOW), + .write = memory_low_write, + .read_u64 = mem_cgroup_read_u64, + }, + { + .name = "limit_in_bytes.high", + .private = MEMFILE_PRIVATE(_MEM_V1, RES_LIMIT_HIGH), + .write = memory_high_write, + .read_u64 = mem_cgroup_read_u64, + }, { .name = "soft_limit_in_bytes", .private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT),
Hi,
Thanks for your patch.
FYI: kernel test robot notices the stable kernel rule is not satisfied.
Rule: 'Cc: stable@vger.kernel.org' or 'commit <sha1> upstream.' Subject: [PATCH] memcg-v1: Enable setting memory min, low, high Link: https://lore.kernel.org/stable/20230404205013.31520-1-shaun.tancheff%40gmail...
The check is based on https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html
Hi Shaun,
kernel test robot noticed the following build errors:
[auto build test ERROR on v6.3-rc5] [also build test ERROR on linus/master] [cannot apply to akpm-mm/mm-everything next-20230404] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Shaun-Tancheff/memcg-v1-Enabl... patch link: https://lore.kernel.org/r/20230404205013.31520-1-shaun.tancheff%40gmail.com patch subject: [PATCH] memcg-v1: Enable setting memory min, low, high config: i386-randconfig-a011-20230403 (https://download.01.org/0day-ci/archive/20230405/202304051011.6E3fABwV-lkp@i...) compiler: gcc-11 (Debian 11.3.0-8) 11.3.0 reproduce (this is a W=1 build): # https://github.com/intel-lab-lkp/linux/commit/180e4266c809a61c2711599c6462bd... git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Shaun-Tancheff/memcg-v1-Enable-setting-memory-min-low-high/20230405-045143 git checkout 180e4266c809a61c2711599c6462bd719efed76c # save the config file mkdir build_dir && cp config build_dir/.config make W=1 O=build_dir ARCH=i386 olddefconfig make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash
If you fix the issue, kindly add following tag where applicable | Reported-by: kernel test robot lkp@intel.com | Link: https://lore.kernel.org/oe-kbuild-all/202304051011.6E3fABwV-lkp@intel.com/
All errors (new ones prefixed by >>):
ld: mm/memcontrol.o: in function `mem_cgroup_v1_set_defaults':
mm/memcontrol.c:3878: undefined reference to `__udivdi3'
vim +3878 mm/memcontrol.c
3853 3854 static inline void mem_cgroup_v1_set_defaults(struct mem_cgroup *memcg, 3855 u64 nr_pages) 3856 { 3857 u64 max = (u64)(PAGE_COUNTER_MAX * PAGE_SIZE) / PAGE_SIZE; 3858 u64 min, low, high; 3859 3860 if (mem_cgroup_is_root(memcg) || max == nr_pages) 3861 return; 3862 3863 min = READ_ONCE(memcg->memory.min); 3864 low = READ_ONCE(memcg->memory.low); 3865 if (min || low) 3866 return; 3867 3868 if (!min && memcg_v1_min_default_percent) { 3869 min = (nr_pages * memcg_v1_min_default_percent) / 100; 3870 page_counter_set_min(&memcg->memory, min); 3871 } 3872 if (!low && memcg_v1_low_default_percent) { 3873 low = (nr_pages * memcg_v1_low_default_percent) / 100; 3874 page_counter_set_low(&memcg->memory, low); 3875 } 3876 high = READ_ONCE(memcg->memory.high); 3877 if (high == PAGE_COUNTER_MAX && memcg_v1_high_default_percent) {
3878 high = (nr_pages * memcg_v1_high_default_percent) / 100;
3879 page_counter_set_high(&memcg->memory, high); 3880 } 3881 } 3882
Hi Shaun,
kernel test robot noticed the following build errors:
[auto build test ERROR on v6.3-rc5] [also build test ERROR on linus/master] [cannot apply to akpm-mm/mm-everything next-20230404] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Shaun-Tancheff/memcg-v1-Enabl... patch link: https://lore.kernel.org/r/20230404205013.31520-1-shaun.tancheff%40gmail.com patch subject: [PATCH] memcg-v1: Enable setting memory min, low, high config: m68k-randconfig-r023-20230403 (https://download.01.org/0day-ci/archive/20230405/202304051118.jpLmhRPu-lkp@i...) compiler: m68k-linux-gcc (GCC) 12.1.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/intel-lab-lkp/linux/commit/180e4266c809a61c2711599c6462bd... git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Shaun-Tancheff/memcg-v1-Enable-setting-memory-min-low-high/20230405-045143 git checkout 180e4266c809a61c2711599c6462bd719efed76c # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=m68k olddefconfig COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=m68k SHELL=/bin/bash
If you fix the issue, kindly add following tag where applicable | Reported-by: kernel test robot lkp@intel.com | Link: https://lore.kernel.org/oe-kbuild-all/202304051118.jpLmhRPu-lkp@intel.com/
All errors (new ones prefixed by >>):
m68k-linux-ld: m68k-linux-ld: DWARF error: could not find abbrev number 99017497 mm/memcontrol.o: in function `mem_cgroup_write':
memcontrol.c:(.text+0x4548): undefined reference to `__udivdi3' m68k-linux-ld: memcontrol.c:(.text+0x45b4): undefined reference to `__udivdi3'
m68k-linux-ld: memcontrol.c:(.text+0x45ec): undefined reference to `__udivdi3'
m68k-linux-ld: mm/memcontrol.o:(.debug_addr+0x1fc): undefined reference to `__udivdi3'
linux-stable-mirror@lists.linaro.org