From: Feng Zhou zhoufeng.zf@bytedance.com
Trace sched related functions, such as enqueue_task_fair, it is necessary to specify a task instead of the current task which within a given cgroup.
Feng Zhou (2): bpf: Add bpf_task_under_cgroup() kfunc selftests/bpf: Add testcase for bpf_task_under_cgroup
Changelog: v2->v3: Addressed comments from Alexei Starovoitov - Modify the comment information of the function. - Narrow down the testcase's hook point Details in here: https://lore.kernel.org/all/20230421090403.15515-1-zhoufeng.zf@bytedance.com...
v1->v2: Addressed comments from Alexei Starovoitov - Add kfunc instead. Details in here: https://lore.kernel.org/all/20230420072657.80324-1-zhoufeng.zf@bytedance.com...
kernel/bpf/helpers.c | 20 ++++++++ tools/testing/selftests/bpf/DENYLIST.s390x | 1 + .../bpf/prog_tests/task_under_cgroup.c | 47 +++++++++++++++++++ .../selftests/bpf/progs/cgrp_kfunc_common.h | 1 + .../bpf/progs/test_task_under_cgroup.c | 37 +++++++++++++++ 5 files changed, 106 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c create mode 100644 tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
From: Feng Zhou zhoufeng.zf@bytedance.com
Add a kfunc that's similar to the bpf_current_task_under_cgroup. The difference is that it is a designated task.
When hook sched related functions, sometimes it is necessary to specify a task instead of the current task.
Signed-off-by: Feng Zhou zhoufeng.zf@bytedance.com --- kernel/bpf/helpers.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+)
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index bb6b4637ebf2..453cbd312366 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2149,6 +2149,25 @@ __bpf_kfunc struct cgroup *bpf_cgroup_from_id(u64 cgid) return NULL; return cgrp; } + +/** + * bpf_task_under_cgroup - wrap task_under_cgroup_hierarchy() as a kfunc, test + * task's membership of cgroup ancestry. + * @task: the task to be tested + * @ancestor: possible ancestor of @task's cgroup + * + * Tests whether @task's default cgroup hierarchy is a descendant of @ancestor. + * It follows all the same rules as cgroup_is_descendant, and only applies + * to the default hierarchy. + */ +__bpf_kfunc long bpf_task_under_cgroup(struct task_struct *task, + struct cgroup *ancestor) +{ + if (unlikely(!ancestor || !task)) + return -EINVAL; + + return task_under_cgroup_hierarchy(task, ancestor); +} #endif /* CONFIG_CGROUPS */
/** @@ -2400,6 +2419,7 @@ BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cgroup_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_RCU | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_task_under_cgroup, KF_RCU) #endif BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL) BTF_SET8_END(generic_btf_ids)
From: Feng Zhou zhoufeng.zf@bytedance.com
test_progs: Tests new kfunc bpf_task_under_cgroup().
The bpf program saves the pid which call the getpgid syscall within a given cgroup to the remote_pid, which is convenient for the user-mode program to verify the test correctness.
The user-mode program creates its own mount namespace, and mounts the cgroupsv2 hierarchy in there, call the getpgid syscall, then check if remote_pid and local_pid are equal.
Signed-off-by: Feng Zhou zhoufeng.zf@bytedance.com --- tools/testing/selftests/bpf/DENYLIST.s390x | 1 + .../bpf/prog_tests/task_under_cgroup.c | 47 +++++++++++++++++++ .../selftests/bpf/progs/cgrp_kfunc_common.h | 1 + .../bpf/progs/test_task_under_cgroup.c | 37 +++++++++++++++ 4 files changed, 86 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c create mode 100644 tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index c7463f3ec3c0..5061d9e24c16 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -26,3 +26,4 @@ user_ringbuf # failed to find kernel BTF type ID of verif_stats # trace_vprintk__open_and_load unexpected error: -9 (?) xdp_bonding # failed to auto-attach program 'trace_on_entry': -524 (trampoline) xdp_metadata # JIT does not support calling kernel function (kfunc) +test_task_under_cgroup # JIT does not support calling kernel function (kfunc) diff --git a/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c b/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c new file mode 100644 index 000000000000..6d5709a8203d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Bytedance */ + +#include <sys/syscall.h> +#include <test_progs.h> +#include <cgroup_helpers.h> +#include "test_task_under_cgroup.skel.h" + +#define FOO "/foo" + +void test_task_under_cgroup(void) +{ + struct test_task_under_cgroup *skel; + int ret, foo = -1; + + foo = test__join_cgroup(FOO); + if (!ASSERT_OK(foo < 0, "cgroup_join_foo")) + return; + + skel = test_task_under_cgroup__open(); + if (!ASSERT_OK_PTR(skel, "test_task_under_cgroup__open")) + goto cleanup; + + skel->rodata->local_pid = getpid(); + skel->rodata->cgid = get_cgroup_id(FOO); + + ret = test_task_under_cgroup__load(skel); + if (!ASSERT_OK(ret, "test_task_under_cgroup__load")) + goto cleanup; + + ret = test_task_under_cgroup__attach(skel); + if (!ASSERT_OK(ret, "test_task_under_cgroup__attach")) + goto cleanup; + + syscall(SYS_getpgid); + + test_task_under_cgroup__detach(skel); + + ASSERT_EQ(skel->bss->remote_pid, skel->rodata->local_pid, + "test task_under_cgroup"); + +cleanup: + if (foo >= 0) + close(foo); + + test_task_under_cgroup__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h index 22914a70db54..001c416b42bc 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h @@ -26,6 +26,7 @@ struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) __ksym; struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym; void bpf_rcu_read_lock(void) __ksym; void bpf_rcu_read_unlock(void) __ksym; +int bpf_task_under_cgroup(struct task_struct *task, struct cgroup *ancestor) __ksym;
static inline struct __cgrps_kfunc_map_value *cgrps_kfunc_map_value_lookup(struct cgroup *cgrp) { diff --git a/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c new file mode 100644 index 000000000000..8f23a2933fde --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Bytedance */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +#include "bpf_misc.h" +#include "cgrp_kfunc_common.h" + +const volatile int local_pid; +const volatile long cgid; +int remote_pid; + +SEC("fentry/" SYS_PREFIX "sys_getpgid") +int sys_getpgid(void *ctx) +{ + struct cgroup *cgrp; + + if (local_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + cgrp = bpf_cgroup_from_id(cgid); + if (!cgrp) + return 0; + + if (!bpf_task_under_cgroup(bpf_get_current_task_btf(), cgrp)) + goto out; + + remote_pid = local_pid; + +out: + bpf_cgroup_release(cgrp); + return 0; +} + +char _license[] SEC("license") = "GPL";
On 4/26/23 7:30 PM, Feng zhou wrote:
From: Feng Zhou zhoufeng.zf@bytedance.com
test_progs: Tests new kfunc bpf_task_under_cgroup().
The bpf program saves the pid which call the getpgid syscall within a given cgroup to the remote_pid, which is convenient for the user-mode program to verify the test correctness.
The user-mode program creates its own mount namespace, and mounts the cgroupsv2 hierarchy in there, call the getpgid syscall, then check if remote_pid and local_pid are equal.
Signed-off-by: Feng Zhou zhoufeng.zf@bytedance.com
tools/testing/selftests/bpf/DENYLIST.s390x | 1 + .../bpf/prog_tests/task_under_cgroup.c | 47 +++++++++++++++++++ .../selftests/bpf/progs/cgrp_kfunc_common.h | 1 + .../bpf/progs/test_task_under_cgroup.c | 37 +++++++++++++++ 4 files changed, 86 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c create mode 100644 tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index c7463f3ec3c0..5061d9e24c16 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -26,3 +26,4 @@ user_ringbuf # failed to find kernel BTF type ID of verif_stats # trace_vprintk__open_and_load unexpected error: -9 (?) xdp_bonding # failed to auto-attach program 'trace_on_entry': -524 (trampoline) xdp_metadata # JIT does not support calling kernel function (kfunc) +test_task_under_cgroup # JIT does not support calling kernel function (kfunc) diff --git a/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c b/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c new file mode 100644 index 000000000000..6d5709a8203d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Bytedance */
+#include <sys/syscall.h> +#include <test_progs.h> +#include <cgroup_helpers.h> +#include "test_task_under_cgroup.skel.h"
+#define FOO "/foo"
+void test_task_under_cgroup(void) +{
- struct test_task_under_cgroup *skel;
- int ret, foo = -1;
- foo = test__join_cgroup(FOO);
- if (!ASSERT_OK(foo < 0, "cgroup_join_foo"))
return;
- skel = test_task_under_cgroup__open();
- if (!ASSERT_OK_PTR(skel, "test_task_under_cgroup__open"))
goto cleanup;
- skel->rodata->local_pid = getpid();
- skel->rodata->cgid = get_cgroup_id(FOO);
- ret = test_task_under_cgroup__load(skel);
- if (!ASSERT_OK(ret, "test_task_under_cgroup__load"))
goto cleanup;
- ret = test_task_under_cgroup__attach(skel);
- if (!ASSERT_OK(ret, "test_task_under_cgroup__attach"))
goto cleanup;
- syscall(SYS_getpgid);
- test_task_under_cgroup__detach(skel);
- ASSERT_EQ(skel->bss->remote_pid, skel->rodata->local_pid,
"test task_under_cgroup");
+cleanup:
- if (foo >= 0)
close(foo);
- test_task_under_cgroup__destroy(skel);
+} diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h index 22914a70db54..001c416b42bc 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h @@ -26,6 +26,7 @@ struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) __ksym; struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym; void bpf_rcu_read_lock(void) __ksym; void bpf_rcu_read_unlock(void) __ksym; +int bpf_task_under_cgroup(struct task_struct *task, struct cgroup *ancestor) __ksym;
return type 'long'?
static inline struct __cgrps_kfunc_map_value *cgrps_kfunc_map_value_lookup(struct cgroup *cgrp) { diff --git a/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c new file mode 100644 index 000000000000..8f23a2933fde --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Bytedance */
+#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h" +#include "cgrp_kfunc_common.h"
+const volatile int local_pid; +const volatile long cgid; +int remote_pid;
+SEC("fentry/" SYS_PREFIX "sys_getpgid") +int sys_getpgid(void *ctx) +{
- struct cgroup *cgrp;
- if (local_pid != (bpf_get_current_pid_tgid() >> 32))
return 0;
- cgrp = bpf_cgroup_from_id(cgid);
- if (!cgrp)
return 0;
- if (!bpf_task_under_cgroup(bpf_get_current_task_btf(), cgrp))
goto out;
The whole point of using bpf_task_under_cgroup() is to test a non-current task is under a particular cgroup.
Tracing kernel function enqueue_task_fair() is an option, but it may be inlined with certain compilers as enqueue_task_fair() is a static function. Also, the 'task' argument in enqueue_task_fair() is not trusted, could be null ptr and not rcu safe as well. We could promote 'task' in enqueue_task_fair() by: - checking NULL ptr, if not - increase reference count by 1 if current reference count is not 0 (using bpf_task_acquire kfunc) - if reference count can be increased, it becomes trusted, and then bpf_task_under_cgroup() can be used.
Could you try this approach or even better if there is another non-static function also having a task (not the current task)?
- remote_pid = local_pid;
+out:
- bpf_cgroup_release(cgrp);
- return 0;
+}
+char _license[] SEC("license") = "GPL";
linux-kselftest-mirror@lists.linaro.org