- Linux-kselftest-mirror - lists.linaro.org

[PATCH bpf-next v10 1/2] selftests/bpf: trace_helpers.c: optimize kallsyms cache

by Rong Tao

From: Rong Tao <rongtao(a)cestc.cn> Static ksyms often have problems because the number of symbols exceeds the MAX_SYMS limit. Like changing the MAX_SYMS from 300000 to 400000 in commit e76a014334a6("selftests/bpf: Bump and validate MAX_SYMS") solves the problem somewhat, but it's not the perfect way. This commit uses dynamic memory allocation, which completely solves the problem caused by the limitation of the number of kallsyms. At the same time, add APIs: load_kallsyms_local() ksym_search_local() ksym_get_addr_local() There are used to solve the problem of selftests/bpf updating kallsyms after attach new symbols during testmod testing. Acked-by: Stanislav Fomichev <sdf(a)google.com> Signed-off-by: Rong Tao <rongtao(a)cestc.cn> --- v10: Keep the original load_kallsyms(). v9: https://lore.kernel.org/lkml/tencent_254B7015EED7A5D112C45E033DA1822CF107@q… Add load_kallsyms_local,ksym_search_local,ksym_get_addr_local functions. v8: https://lore.kernel.org/lkml/tencent_6D23FE187408D965E95DFAA858BC7E8C760A@q… Resolves inter-thread contention for ksyms global variables. v7: https://lore.kernel.org/lkml/tencent_BD6E19C00BF565CD5C36A9A0BD828CFA210A@q… Fix __must_check macro. v6: https://lore.kernel.org/lkml/tencent_4A09A36F883A06EA428A593497642AF8AF08@q… Apply libbpf_ensure_mem() v5: https://lore.kernel.org/lkml/tencent_0E9E1A1C0981678D5E7EA9E4BDBA8EE2200A@q… Release the allocated memory once the load_kallsyms_refresh() upon error given it's dynamically allocated. v4: https://lore.kernel.org/lkml/tencent_59C74613113F0C728524B2A82FE5540A5E09@q… Make sure most cases we don't need the realloc() path to begin with, and check strdup() return value. v3: https://lore.kernel.org/lkml/tencent_50B4B2622FE7546A5FF9464310650C008509@q… Do not use structs and judge ksyms__add_symbol function return value. v2: https://lore.kernel.org/lkml/tencent_B655EE5E5D463110D70CD2846AB3262EED09@q… Do the usual len/capacity scheme here to amortize the cost of realloc, and don't free symbols. v1: https://lore.kernel.org/lkml/tencent_AB461510B10CD484E0B2F62E3754165F2909@q… --- samples/bpf/Makefile | 4 + .../selftests/bpf/prog_tests/fill_link_info.c | 9 +- .../prog_tests/kprobe_multi_testmod_test.c | 24 ++- tools/testing/selftests/bpf/trace_helpers.c | 137 +++++++++++++----- tools/testing/selftests/bpf/trace_helpers.h | 10 +- 5 files changed, 137 insertions(+), 47 deletions(-) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 4ccf4236031c..6c707ebcebb9 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -175,6 +175,7 @@ TPROGS_CFLAGS += -I$(srctree)/tools/testing/selftests/bpf/ TPROGS_CFLAGS += -I$(LIBBPF_INCLUDE) TPROGS_CFLAGS += -I$(srctree)/tools/include TPROGS_CFLAGS += -I$(srctree)/tools/perf +TPROGS_CFLAGS += -I$(srctree)/tools/lib TPROGS_CFLAGS += -DHAVE_ATTR_TEST=0 ifdef SYSROOT @@ -314,6 +315,9 @@ XDP_SAMPLE_CFLAGS += -Wall -O2 \ $(obj)/$(XDP_SAMPLE): TPROGS_CFLAGS = $(XDP_SAMPLE_CFLAGS) $(obj)/$(XDP_SAMPLE): $(src)/xdp_sample_user.h $(src)/xdp_sample_shared.h +# Override includes for trace_helpers.o because __must_check won't be defined +# in our include path. +$(obj)/$(TRACE_HELPERS): TPROGS_CFLAGS := $(TPROGS_CFLAGS) -D__must_check= -include $(BPF_SAMPLES_PATH)/Makefile.target diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c index 9d768e083714..13e618317c8b 100644 --- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c +++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c @@ -302,16 +302,18 @@ void test_fill_link_info(void) { struct test_fill_link_info *skel; int i; + struct ksyms *ksyms; skel = test_fill_link_info__open_and_load(); if (!ASSERT_OK_PTR(skel, "skel_open")) return; /* load kallsyms to compare the addr */ - if (!ASSERT_OK(load_kallsyms_refresh(), "load_kallsyms_refresh")) + ksyms = load_kallsyms_local(); + if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_local")) goto cleanup; - kprobe_addr = ksym_get_addr(KPROBE_FUNC); + kprobe_addr = ksym_get_addr_local(ksyms, KPROBE_FUNC); if (test__start_subtest("kprobe_link_info")) test_kprobe_fill_link_info(skel, BPF_PERF_EVENT_KPROBE, false); if (test__start_subtest("kretprobe_link_info")) @@ -329,7 +331,7 @@ void test_fill_link_info(void) qsort(kmulti_syms, KMULTI_CNT, sizeof(kmulti_syms[0]), symbols_cmp_r); for (i = 0; i < KMULTI_CNT; i++) - kmulti_addrs[i] = ksym_get_addr(kmulti_syms[i]); + kmulti_addrs[i] = ksym_get_addr_local(ksyms, kmulti_syms[i]); if (test__start_subtest("kprobe_multi_link_info")) test_kprobe_multi_fill_link_info(skel, false, false); if (test__start_subtest("kretprobe_multi_link_info")) @@ -339,4 +341,5 @@ void test_fill_link_info(void) cleanup: test_fill_link_info__destroy(skel); + free_kallsyms_local(ksyms); } diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c index 1fbe7e4ac00a..532b05ae2da4 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c @@ -4,6 +4,8 @@ #include "trace_helpers.h" #include "bpf/libbpf_internal.h" +static struct ksyms *ksyms; + static void kprobe_multi_testmod_check(struct kprobe_multi *skel) { ASSERT_EQ(skel->bss->kprobe_testmod_test1_result, 1, "kprobe_test1_result"); @@ -50,12 +52,12 @@ static void test_testmod_attach_api_addrs(void) LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); unsigned long long addrs[3]; - addrs[0] = ksym_get_addr("bpf_testmod_fentry_test1"); - ASSERT_NEQ(addrs[0], 0, "ksym_get_addr"); - addrs[1] = ksym_get_addr("bpf_testmod_fentry_test2"); - ASSERT_NEQ(addrs[1], 0, "ksym_get_addr"); - addrs[2] = ksym_get_addr("bpf_testmod_fentry_test3"); - ASSERT_NEQ(addrs[2], 0, "ksym_get_addr"); + addrs[0] = ksym_get_addr_local(ksyms, "bpf_testmod_fentry_test1"); + ASSERT_NEQ(addrs[0], 0, "ksym_get_addr_local"); + addrs[1] = ksym_get_addr_local(ksyms, "bpf_testmod_fentry_test2"); + ASSERT_NEQ(addrs[1], 0, "ksym_get_addr_local"); + addrs[2] = ksym_get_addr_local(ksyms, "bpf_testmod_fentry_test3"); + ASSERT_NEQ(addrs[2], 0, "ksym_get_addr_local"); opts.addrs = (const unsigned long *) addrs; opts.cnt = ARRAY_SIZE(addrs); @@ -79,11 +81,19 @@ static void test_testmod_attach_api_syms(void) void serial_test_kprobe_multi_testmod_test(void) { - if (!ASSERT_OK(load_kallsyms_refresh(), "load_kallsyms_refresh")) + ksyms = load_kallsyms_local(); + if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_local")) return; if (test__start_subtest("testmod_attach_api_syms")) test_testmod_attach_api_syms(); + + ksyms = load_kallsyms_refresh(ksyms); + if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_refresh")) + return; + if (test__start_subtest("testmod_attach_api_addrs")) test_testmod_attach_api_addrs(); + + free_kallsyms_local(ksyms); } diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index f83d9f65c65b..d64c4ef336e1 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -14,104 +14,171 @@ #include <linux/limits.h> #include <libelf.h> #include <gelf.h> +#include "bpf/libbpf_internal.h" #define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe" #define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe" -#define MAX_SYMS 400000 -static struct ksym syms[MAX_SYMS]; -static int sym_cnt; +struct ksyms { + struct ksym *syms; + size_t sym_cap; + size_t sym_cnt; +}; + +static struct ksyms *ksyms; + +static int ksyms__add_symbol(struct ksyms *ksyms, const char *name, + unsigned long addr) +{ + void *tmp; + + tmp = strdup(name); + if (!tmp) + return -ENOMEM; + ksyms->syms[ksyms->sym_cnt].addr = addr; + ksyms->syms[ksyms->sym_cnt].name = tmp; + + ksyms->sym_cnt++; + + return 0; +} + +void free_kallsyms_local(struct ksyms *ksyms) +{ + unsigned int i; + + if (!ksyms) + return; + + if (!ksyms->syms) { + free(ksyms); + return; + } + + for (i = 0; i < ksyms->sym_cnt; i++) + free(ksyms->syms[i].name); + free(ksyms->syms); + free(ksyms); +} static int ksym_cmp(const void *p1, const void *p2) { return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr; } -int load_kallsyms_refresh(void) +struct ksyms *load_kallsyms_refresh(struct ksyms *ksyms) { FILE *f; char func[256], buf[256]; char symbol; void *addr; - int i = 0; + int ret; - sym_cnt = 0; + /* flush kallsyms, free the previously allocated dynamic memory */ + free_kallsyms_local(ksyms); f = fopen("/proc/kallsyms", "r"); if (!f) - return -ENOENT; + return NULL; + + ksyms = calloc(1, sizeof(struct ksyms)); + if (!ksyms) + return NULL; while (fgets(buf, sizeof(buf), f)) { if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3) break; if (!addr) continue; - if (i >= MAX_SYMS) - return -EFBIG; - syms[i].addr = (long) addr; - syms[i].name = strdup(func); - i++; + ret = libbpf_ensure_mem((void **) &ksyms->syms, &ksyms->sym_cap, + sizeof(struct ksym), ksyms->sym_cnt + 1); + if (ret) + goto error; + ret = ksyms__add_symbol(ksyms, func, (unsigned long)addr); + if (ret) + goto error; } fclose(f); - sym_cnt = i; - qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp); - return 0; + qsort(ksyms->syms, ksyms->sym_cnt, sizeof(struct ksym), ksym_cmp); + return ksyms; + +error: + free_kallsyms_local(ksyms); + return NULL; +} + +struct ksyms *load_kallsyms_local(void) +{ + return load_kallsyms_refresh(NULL); } int load_kallsyms(void) { - /* - * This is called/used from multiplace places, - * load symbols just once. - */ - if (sym_cnt) - return 0; - return load_kallsyms_refresh(); + if (!ksyms) + ksyms = load_kallsyms_local(); + return ksyms ? 0 : 1; } -struct ksym *ksym_search(long key) +struct ksym *ksym_search_local(struct ksyms *ksyms, long key) { - int start = 0, end = sym_cnt; + int start = 0, end = ksyms->sym_cnt; int result; + if (!ksyms) + return NULL; + /* kallsyms not loaded. return NULL */ - if (sym_cnt <= 0) + if (ksyms->sym_cnt <= 0) return NULL; while (start < end) { size_t mid = start + (end - start) / 2; - result = key - syms[mid].addr; + result = key - ksyms->syms[mid].addr; if (result < 0) end = mid; else if (result > 0) start = mid + 1; else - return &syms[mid]; + return &ksyms->syms[mid]; } - if (start >= 1 && syms[start - 1].addr < key && - key < syms[start].addr) + if (start >= 1 && ksyms->syms[start - 1].addr < key && + key < ksyms->syms[start].addr) /* valid ksym */ - return &syms[start - 1]; + return &ksyms->syms[start - 1]; /* out of range. return _stext */ - return &syms[0]; + return &ksyms->syms[0]; } -long ksym_get_addr(const char *name) +struct ksym *ksym_search(long key) +{ + if (!ksyms) + return NULL; + return ksym_search_local(ksyms, key); +} + +long ksym_get_addr_local(struct ksyms *ksyms, const char *name) { int i; - for (i = 0; i < sym_cnt; i++) { - if (strcmp(syms[i].name, name) == 0) - return syms[i].addr; + for (i = 0; i < ksyms->sym_cnt; i++) { + if (strcmp(ksyms->syms[i].name, name) == 0) + return ksyms->syms[i].addr; } return 0; } +long ksym_get_addr(const char *name) +{ + if (!ksyms) + return 0; + return ksym_get_addr_local(ksyms, name); +} + /* open kallsyms and read symbol addresses on the fly. Without caching all symbols, * this is faster than load + find. */ diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h index 876f3e711df6..1449a107240c 100644 --- a/tools/testing/selftests/bpf/trace_helpers.h +++ b/tools/testing/selftests/bpf/trace_helpers.h @@ -11,13 +11,19 @@ struct ksym { long addr; char *name; }; +struct ksyms; int load_kallsyms(void); -int load_kallsyms_refresh(void); - struct ksym *ksym_search(long key); long ksym_get_addr(const char *name); +struct ksyms *load_kallsyms_local(void); +struct ksym *ksym_search_local(struct ksyms *ksyms, long key); +long ksym_get_addr_local(struct ksyms *ksyms, const char *name); + +struct ksyms *load_kallsyms_refresh(struct ksyms *ksyms); +void free_kallsyms_local(struct ksyms *ksyms); + /* open kallsyms and find addresses on the fly, faster than load + search. */ int kallsyms_find(const char *sym, unsigned long long *addr); -- 2.41.0

2 years, 3 months

2
2
0 0

[PATCH bpf-next v11 2/2] selftests/bpf: trace_helpers.c: Add a global ksyms initialization mutex

by Rong Tao

From: Rong Tao <rongtao(a)cestc.cn> As Jirka said [0], we just need to make sure that global ksyms initialization won't race. [0] https://lore.kernel.org/lkml/ZPCbAs3ItjRd8XVh@krava/ Signed-off-by: Rong Tao <rongtao(a)cestc.cn> --- tools/testing/selftests/bpf/trace_helpers.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 7d026c128252..411f87d5aac7 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -7,6 +7,7 @@ #include <errno.h> #include <fcntl.h> #include <poll.h> +#include <pthread.h> #include <unistd.h> #include <linux/perf_event.h> #include <sys/mman.h> @@ -26,6 +27,7 @@ struct ksyms { }; static struct ksyms *ksyms; +static pthread_mutex_t ksyms_mutex = PTHREAD_MUTEX_INITIALIZER; static int ksyms__add_symbol(struct ksyms *ksyms, const char *name, unsigned long addr) @@ -110,8 +112,10 @@ struct ksyms *load_kallsyms_local(struct ksyms *ksyms) int load_kallsyms(void) { + pthread_mutex_lock(&ksyms_mutex); if (!ksyms) ksyms = load_kallsyms_local(NULL); + pthread_mutex_unlock(&ksyms_mutex); return ksyms ? 0 : 1; } -- 2.41.0

2 years, 3 months

1
0
0 0

[PATCH bpf-next v11 0/2] selftests/bpf: Optimize kallsyms cache

by Rong Tao

From: Rong Tao <rongtao(a)cestc.cn> We need to optimize the kallsyms cache, including optimizations for the number of symbols limit, and, some test cases add new kernel symbols (such as testmods) and we need to refresh kallsyms (reload or refresh). Rong Tao (2): selftests/bpf: trace_helpers.c: optimize kallsyms cache selftests/bpf: trace_helpers.c: Add a global ksyms initialization mutex samples/bpf/Makefile | 4 + .../selftests/bpf/prog_tests/fill_link_info.c | 2 +- .../prog_tests/kprobe_multi_testmod_test.c | 20 ++- tools/testing/selftests/bpf/trace_helpers.c | 136 +++++++++++++----- tools/testing/selftests/bpf/trace_helpers.h | 9 +- 5 files changed, 126 insertions(+), 45 deletions(-) -- 2.41.0

2 years, 3 months

1
0
0 0

Don't fill the kernel log with memfd_create messages

by Alex Xu (Hello71)

Hi all, Recently "memfd: improve userspace warnings for missing exec-related flags" was merged. On my system, this is a regression, not an improvement, because the entire 256k kernel log buffer (default on x86) is filled with these warnings and "__do_sys_memfd_create: 122 callbacks suppressed". I haven't investigated too closely, but the most likely cause is Wayland libraries. This is too serious of a consequence for using an old API, especially considering how recently the flags were added. The vast majority of software has not had time to add the flags: glibc does not define the macros until 2.38 which was released less than one month ago, man-pages does not document the flags, and according to Debian Code Search, only systemd, stress-ng, and strace actually pass either of these flags. Furthermore, since old kernels reject unknown flags, it's not just a matter of defining and passing the flag; every program needs to add logic to handle EINVAL and try again. Some other way needs to be found to encourage userspace to add the flags; otherwise, this message will be patched out because the kernel log becomes unusable after running unupdated programs, which will still exist even after upstreams are fixed. In particular, AppImages, flatpaks, snaps, and similar app bundles contain vendored Wayland libraries which can be difficult or impossible to update. Thanks, Alex.

2 years, 3 months

3
2
0 0

[PATCH iproute2-next v3] iplink: bridge: Add support for bridge FDB learning limits

by Johannes Nixdorf

Support setting the FDB limit through ip link. The arguments is: - fdb_max_learned_entries: A 32-bit unsigned integer specifying the maximum number of learned FDB entries, with 0 disabling the limit. Also support reading back the current number of learned FDB entries in the bridge by this count. The returned value's name is: - fdb_n_learned_entries: A 32-bit unsigned integer specifying the current number of learned FDB entries. Example: # ip -d -j -p link show br0 [ { ... "linkinfo": { "info_kind": "bridge", "info_data": { ... "fdb_n_learned_entries": 2, "fdb_max_learned_entries": 0, ... } }, ... } ] # ip link set br0 type bridge fdb_max_learned_entries 1024 # ip -d -j -p link show br0 [ { ... "linkinfo": { "info_kind": "bridge", "info_data": { ... "fdb_n_learned_entries": 2, "fdb_max_learned_entries": 1024, ... } }, ... } ] Signed-off-by: Johannes Nixdorf <jnixdorf-oss(a)avm.de> --- Changes since v2: - Properly split the net-next and iproute2-next threads. (from review) - Changed to *_n_* instead of *_cur_*. (from review) - Use strcmp() instead of matches(). (from review) - Made names in code and documentation consistent. (from review) - Various documentation fixes. (from review) Changes since v1: - Sent out the first corresponding iproute2 patches. net-next v3: https://lore.kernel.org/netdev/20230905-fdb_limit-v3-0-7597cd500a82@avm.de/ v2: https://lore.kernel.org/netdev/20230619071444.14625-1-jnixdorf-oss@avm.de/ v1: https://lore.kernel.org/netdev/20230515085046.4457-1-jnixdorf-oss@avm.de/ --- include/uapi/linux/if_link.h | 2 ++ ip/iplink_bridge.c | 21 +++++++++++++++++++++ man/man8/ip-link.8.in | 10 ++++++++++ 3 files changed, 33 insertions(+) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index c2ca7a6add0e..51cf58e3171c 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -508,6 +508,8 @@ enum { IFLA_BR_VLAN_STATS_PER_PORT, IFLA_BR_MULTI_BOOLOPT, IFLA_BR_MCAST_QUERIER_STATE, + IFLA_BR_FDB_N_LEARNED_ENTRIES, + IFLA_BR_FDB_MAX_LEARNED_ENTRIES, __IFLA_BR_MAX, }; diff --git a/ip/iplink_bridge.c b/ip/iplink_bridge.c index 7e4e62c81c0c..f08754618e0f 100644 --- a/ip/iplink_bridge.c +++ b/ip/iplink_bridge.c @@ -34,6 +34,7 @@ static void print_explain(FILE *f) " [ group_fwd_mask MASK ]\n" " [ group_address ADDRESS ]\n" " [ no_linklocal_learn NO_LINKLOCAL_LEARN ]\n" + " [ fdb_max_learned_entries FDB_MAX_LEARNED_ENTRIES ]\n" " [ vlan_filtering VLAN_FILTERING ]\n" " [ vlan_protocol VLAN_PROTOCOL ]\n" " [ vlan_default_pvid VLAN_DEFAULT_PVID ]\n" @@ -168,6 +169,14 @@ static int bridge_parse_opt(struct link_util *lu, int argc, char **argv, bm.optval |= no_ll_learn_bit; else bm.optval &= ~no_ll_learn_bit; + } else if (strcmp(*argv, "fdb_max_learned_entries") == 0) { + __u32 fdb_max_learned_entries; + + NEXT_ARG(); + if (get_u32(&fdb_max_learned_entries, *argv, 0)) + invarg("invalid fdb_max_learned_entries", *argv); + + addattr32(n, 1024, IFLA_BR_FDB_MAX_LEARNED_ENTRIES, fdb_max_learned_entries); } else if (matches(*argv, "fdb_flush") == 0) { addattr(n, 1024, IFLA_BR_FDB_FLUSH); } else if (matches(*argv, "vlan_default_pvid") == 0) { @@ -544,6 +553,18 @@ static void bridge_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[]) if (tb[IFLA_BR_GC_TIMER]) _bridge_print_timer(f, "gc_timer", tb[IFLA_BR_GC_TIMER]); + if (tb[IFLA_BR_FDB_N_LEARNED_ENTRIES]) + print_uint(PRINT_ANY, + "fdb_n_learned_entries", + "fdb_n_learned_entries %u ", + rta_getattr_u32(tb[IFLA_BR_FDB_N_LEARNED_ENTRIES])); + + if (tb[IFLA_BR_FDB_MAX_LEARNED_ENTRIES]) + print_uint(PRINT_ANY, + "fdb_max_learned_entries", + "fdb_max_learned_entries %u ", + rta_getattr_u32(tb[IFLA_BR_FDB_MAX_LEARNED_ENTRIES])); + if (tb[IFLA_BR_VLAN_DEFAULT_PVID]) print_uint(PRINT_ANY, "vlan_default_pvid", diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in index 7365d0c6b14f..184ae9183712 100644 --- a/man/man8/ip-link.8.in +++ b/man/man8/ip-link.8.in @@ -1630,6 +1630,8 @@ the following additional arguments are supported: ] [ .BI no_linklocal_learn " NO_LINKLOCAL_LEARN " ] [ +.BI fdb_max_learned_entries " FDB_MAX_LEARNED_ENTRIES " +] [ .BI vlan_filtering " VLAN_FILTERING " ] [ .BI vlan_protocol " VLAN_PROTOCOL " @@ -1741,6 +1743,14 @@ or off When disabled, the bridge will not learn from link-local frames (default: enabled). +.BI fdb_max_learned_entries " FDB_MAX_LEARNED_ENTRIES " +- set the maximum number of learned FDB entries. If +.RI ( FDB_MAX_LEARNED_ENTRIES " == 0) " +the feature is disabled. Default is +.BR 0 . +.I FDB_MAX_LEARNED_ENTRIES +is a 32bit unsigned integer. + .BI vlan_filtering " VLAN_FILTERING " - turn VLAN filtering on .RI ( VLAN_FILTERING " > 0) " --- base-commit: 865dd3ab1580092221c73317a844ee65f032c9e8 change-id: 20230905-fdb_limit-ace1467c6a84 Best regards, -- Johannes Nixdorf <jnixdorf-oss(a)avm.de>

2 years, 3 months

1
0
0 0

[PATCH 0/2] fcntl: add fcntl(F_CHECK_ORIGINAL_MEMFD)

by Michal Clapinski

This change introduces a new fcntl to check if an fd points to a memfd's original open fd (the one created by memfd_create). We encountered an issue with migrating memfds in CRIU (checkpoint restore in userspace - it migrates running processes between machines). Imagine a scenario: 1. Create a memfd. By default it's open with O_RDWR and yet one can exec() to it (unlike with regular files, where one would get ETXTBSY). 2. Reopen that memfd with O_RDWR via /proc/self/fd/<fd>. Now those 2 fds are indistinguishable from userspace. You can't exec() to either of them (since the reopen incremented inode->i_writecount) and their /proc/self/fdinfo/ are exactly the same. Unfortunately they are not the same. If you close the second one, the first one becomes exec()able again. If you close the first one, the other doesn't become exec()able. Therefore during migration it does matter which is recreated first and which is reopened but there is no way for CRIU to tell which was first. Michal Clapinski (2): fcntl: add fcntl(F_CHECK_ORIGINAL_MEMFD) selftests: test fcntl(F_CHECK_ORIGINAL_MEMFD) fs/fcntl.c | 3 ++ include/uapi/linux/fcntl.h | 9 ++++++ tools/testing/selftests/memfd/memfd_test.c | 32 ++++++++++++++++++++++ 3 files changed, 44 insertions(+) -- 2.42.0.283.g2d96d420d3-goog

2 years, 3 months

4
8
0 0

[PATCH v2 0/4] kunit: Fix some bugs in kunit_filter_suites()

by Jinjie Ruan

The state handle in kunit_module_notify() is not correct when the mod->state switch from MODULE_STATE_COMING to MODULE_STATE_GOING. And it's necessary to check NULL for kzalloc() in kunit_parse_glob_filter(). The order in which memory is released in err path in kunit_filter_suites() is also problematic. And there is a possible memory leak in kunit_filter_suites(). This patchset fix the above issues. Changes in v2: - Adjust the 4th patch to be the second. - Add goto labels in kunit_filter_suites() and adapt to it. - Fix the issue in the third patch. - Update the commit message and title. Jinjie Ruan (4): kunit: Fix wild-memory-access bug in kunit_free_suite_set() kunit: Fix the wrong err path and add goto labels in kunit_filter_suites() kunit: Fix possible null-ptr-deref in kunit_parse_glob_filter() kunit: Fix possible memory leak in kunit_filter_suites() lib/kunit/executor.c | 48 ++++++++++++++++++++++++++++++-------------- lib/kunit/test.c | 3 ++- 2 files changed, 35 insertions(+), 16 deletions(-) -- 2.34.1

2 years, 3 months

3
10
0 0

[PATCH v4 10/10] selftests/bpf/sockopt: Add io_uring support

by Breno Leitao

Expand the sockopt test to use also check for io_uring {g,s}etsockopt commands operations. This patch starts by marking each test if they support io_uring support or not. Right now, io_uring cmd getsockopt() has a limitation of only accepting level == SOL_SOCKET, otherwise it returns -EOPNOTSUPP. Since there aren't any test exercising getsockopt(level == SOL_SOCKET), this patch changes two tests to use level == SOL_SOCKET, they are "getsockopt: support smaller ctx->optlen" and "getsockopt: read ctx->optlen". There is no limitation for the setsockopt() part. Later, each test runs using regular {g,s}etsockopt systemcalls, and, if liburing is supported, execute the same test (again), but calling liburing {g,s}setsockopt commands. This patch also changes the level of two tests to use SOL_SOCKET for the following two tests. This is going to help to exercise the io_uring subsystem: * getsockopt: read ctx->optlen * getsockopt: support smaller ctx->optlen Signed-off-by: Breno Leitao <leitao(a)debian.org> --- .../selftests/bpf/prog_tests/sockopt.c | 113 +++++++++++++++++- 1 file changed, 107 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt.c b/tools/testing/selftests/bpf/prog_tests/sockopt.c index 9e6a5e3ed4de..9b1f7f0b1f7e 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include <io_uring/mini_liburing.h> #include "cgroup_helpers.h" static char bpf_log_buf[4096]; @@ -38,6 +39,7 @@ static struct sockopt_test { socklen_t get_optlen_ret; enum sockopt_test_error error; + bool io_uring_support; } tests[] = { /* ==================== getsockopt ==================== */ @@ -251,7 +253,9 @@ static struct sockopt_test { .attach_type = BPF_CGROUP_GETSOCKOPT, .expected_attach_type = BPF_CGROUP_GETSOCKOPT, + .get_level = SOL_SOCKET, .get_optlen = 64, + .io_uring_support = true, }, { .descr = "getsockopt: deny bigger ctx->optlen", @@ -276,6 +280,7 @@ static struct sockopt_test { .get_optlen = 64, .error = EFAULT_GETSOCKOPT, + .io_uring_support = true, }, { .descr = "getsockopt: ignore >PAGE_SIZE optlen", @@ -318,6 +323,7 @@ static struct sockopt_test { .get_optval = {}, /* the changes are ignored */ .get_optlen = PAGE_SIZE + 1, .error = EOPNOTSUPP_GETSOCKOPT, + .io_uring_support = true, }, { .descr = "getsockopt: support smaller ctx->optlen", @@ -337,8 +343,10 @@ static struct sockopt_test { .attach_type = BPF_CGROUP_GETSOCKOPT, .expected_attach_type = BPF_CGROUP_GETSOCKOPT, + .get_level = SOL_SOCKET, .get_optlen = 64, .get_optlen_ret = 32, + .io_uring_support = true, }, { .descr = "getsockopt: deny writing to ctx->optval", @@ -518,6 +526,7 @@ static struct sockopt_test { .set_level = 123, .set_optlen = 1, + .io_uring_support = true, }, { .descr = "setsockopt: allow changing ctx->level", @@ -572,6 +581,7 @@ static struct sockopt_test { .set_optname = 123, .set_optlen = 1, + .io_uring_support = true, }, { .descr = "setsockopt: allow changing ctx->optname", @@ -624,6 +634,7 @@ static struct sockopt_test { .expected_attach_type = BPF_CGROUP_SETSOCKOPT, .set_optlen = 64, + .io_uring_support = true, }, { .descr = "setsockopt: ctx->optlen == -1 is ok", @@ -640,6 +651,7 @@ static struct sockopt_test { .expected_attach_type = BPF_CGROUP_SETSOCKOPT, .set_optlen = 64, + .io_uring_support = true, }, { .descr = "setsockopt: deny ctx->optlen < 0 (except -1)", @@ -658,6 +670,7 @@ static struct sockopt_test { .set_optlen = 4, .error = EFAULT_SETSOCKOPT, + .io_uring_support = true, }, { .descr = "setsockopt: deny ctx->optlen > input optlen", @@ -675,6 +688,7 @@ static struct sockopt_test { .set_optlen = 64, .error = EFAULT_SETSOCKOPT, + .io_uring_support = true, }, { .descr = "setsockopt: ignore >PAGE_SIZE optlen", @@ -940,7 +954,89 @@ static int load_prog(const struct bpf_insn *insns, return fd; } -static int run_test(int cgroup_fd, struct sockopt_test *test) +/* Core function that handles io_uring ring initialization, + * sending SQE with sockopt command and waiting for the CQE. + */ +static int uring_sockopt(int op, int fd, int level, int optname, + const void *optval, socklen_t optlen) +{ + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + struct io_uring ring; + int err; + + err = io_uring_queue_init(1, &ring, 0); + if (!ASSERT_OK(err, "Failed to initialize io_uring ring")) + return err; + + sqe = io_uring_get_sqe(&ring); + if (!ASSERT_NEQ(sqe, NULL, "Failed to get an SQE")) { + err = -1; + goto fail; + } + + io_uring_prep_cmd(sqe, op, fd, level, optname, optval, optlen); + + err = io_uring_submit(&ring); + if (!ASSERT_EQ(err, 1, "Failed to submit SQE")) + goto fail; + + err = io_uring_wait_cqe(&ring, &cqe); + if (!ASSERT_OK(err, "Failed to wait for CQE")) + goto fail; + + err = cqe->res; + +fail: + io_uring_queue_exit(&ring); + + return err; +} + +static int uring_setsockopt(int fd, int level, int optname, const void *optval, + socklen_t optlen) +{ + return uring_sockopt(SOCKET_URING_OP_SETSOCKOPT, fd, level, optname, + optval, optlen); +} + +static int uring_getsockopt(int fd, int level, int optname, void *optval, + socklen_t *optlen) +{ + int ret = uring_sockopt(SOCKET_URING_OP_GETSOCKOPT, fd, level, optname, + optval, *optlen); + if (ret < 0) + return ret; + + /* Populate optlen back to be compatible with systemcall interface, + * and simplify the test. + */ + *optlen = ret; + + return 0; +} + +/* Execute the setsocktopt operation */ +static int call_setsockopt(bool use_io_uring, int fd, int level, int optname, + const void *optval, socklen_t optlen) +{ + if (use_io_uring) + return uring_setsockopt(fd, level, optname, optval, optlen); + + return setsockopt(fd, level, optname, optval, optlen); +} + +/* Execute the getsocktopt operation */ +static int call_getsockopt(bool use_io_uring, int fd, int level, int optname, + void *optval, socklen_t *optlen) +{ + if (use_io_uring) + return uring_getsockopt(fd, level, optname, optval, optlen); + + return getsockopt(fd, level, optname, optval, optlen); +} + +static int run_test(int cgroup_fd, struct sockopt_test *test, bool use_io_uring) { int sock_fd, err, prog_fd; void *optval = NULL; @@ -980,8 +1076,9 @@ static int run_test(int cgroup_fd, struct sockopt_test *test) test->set_optlen = num_pages * sysconf(_SC_PAGESIZE) + remainder; } - err = setsockopt(sock_fd, test->set_level, test->set_optname, - test->set_optval, test->set_optlen); + err = call_setsockopt(use_io_uring, sock_fd, test->set_level, + test->set_optname, test->set_optval, + test->set_optlen); if (err) { if (errno == EPERM && test->error == EPERM_SETSOCKOPT) goto close_sock_fd; @@ -1008,8 +1105,8 @@ static int run_test(int cgroup_fd, struct sockopt_test *test) socklen_t expected_get_optlen = test->get_optlen_ret ?: test->get_optlen; - err = getsockopt(sock_fd, test->get_level, test->get_optname, - optval, &optlen); + err = call_getsockopt(use_io_uring, sock_fd, test->get_level, + test->get_optname, optval, &optlen); if (err) { if (errno == EOPNOTSUPP && test->error == EOPNOTSUPP_GETSOCKOPT) goto free_optval; @@ -1063,7 +1160,11 @@ void test_sockopt(void) if (!test__start_subtest(tests[i].descr)) continue; - ASSERT_OK(run_test(cgroup_fd, &tests[i]), tests[i].descr); + ASSERT_OK(run_test(cgroup_fd, &tests[i], false), + tests[i].descr); + if (tests[i].io_uring_support) + ASSERT_OK(run_test(cgroup_fd, &tests[i], true), + tests[i].descr); } close(cgroup_fd); -- 2.34.1

2 years, 3 months

1
0
0 0

[PATCH v4 06/10] selftests/net: Extract uring helpers to be reusable

by Breno Leitao

Instead of defining basic io_uring functions in the test case, move them to a common directory, so, other tests can use them. This simplify the test code and reuse the common liburing infrastructure. This is basically a copy of what we have in io_uring_zerocopy_tx with some minor improvements to make checkpatch happy. A follow-up test will use the same helpers in a BPF sockopt test. Signed-off-by: Breno Leitao <leitao(a)debian.org> --- tools/include/io_uring/mini_liburing.h | 282 ++++++++++++++++++ tools/testing/selftests/net/Makefile | 1 + .../selftests/net/io_uring_zerocopy_tx.c | 268 +---------------- 3 files changed, 285 insertions(+), 266 deletions(-) create mode 100644 tools/include/io_uring/mini_liburing.h diff --git a/tools/include/io_uring/mini_liburing.h b/tools/include/io_uring/mini_liburing.h new file mode 100644 index 000000000000..9ccb16074eb5 --- /dev/null +++ b/tools/include/io_uring/mini_liburing.h @@ -0,0 +1,282 @@ +/* SPDX-License-Identifier: MIT */ + +#include <linux/io_uring.h> +#include <sys/mman.h> +#include <sys/syscall.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> + +struct io_sq_ring { + unsigned int *head; + unsigned int *tail; + unsigned int *ring_mask; + unsigned int *ring_entries; + unsigned int *flags; + unsigned int *array; +}; + +struct io_cq_ring { + unsigned int *head; + unsigned int *tail; + unsigned int *ring_mask; + unsigned int *ring_entries; + struct io_uring_cqe *cqes; +}; + +struct io_uring_sq { + unsigned int *khead; + unsigned int *ktail; + unsigned int *kring_mask; + unsigned int *kring_entries; + unsigned int *kflags; + unsigned int *kdropped; + unsigned int *array; + struct io_uring_sqe *sqes; + + unsigned int sqe_head; + unsigned int sqe_tail; + + size_t ring_sz; +}; + +struct io_uring_cq { + unsigned int *khead; + unsigned int *ktail; + unsigned int *kring_mask; + unsigned int *kring_entries; + unsigned int *koverflow; + struct io_uring_cqe *cqes; + + size_t ring_sz; +}; + +struct io_uring { + struct io_uring_sq sq; + struct io_uring_cq cq; + int ring_fd; +}; + +#if defined(__x86_64) || defined(__i386__) +#define read_barrier() __asm__ __volatile__("":::"memory") +#define write_barrier() __asm__ __volatile__("":::"memory") +#else +#define read_barrier() __sync_synchronize() +#define write_barrier() __sync_synchronize() +#endif + +static inline int io_uring_mmap(int fd, struct io_uring_params *p, + struct io_uring_sq *sq, struct io_uring_cq *cq) +{ + size_t size; + void *ptr; + int ret; + + sq->ring_sz = p->sq_off.array + p->sq_entries * sizeof(unsigned int); + ptr = mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING); + if (ptr == MAP_FAILED) + return -errno; + sq->khead = ptr + p->sq_off.head; + sq->ktail = ptr + p->sq_off.tail; + sq->kring_mask = ptr + p->sq_off.ring_mask; + sq->kring_entries = ptr + p->sq_off.ring_entries; + sq->kflags = ptr + p->sq_off.flags; + sq->kdropped = ptr + p->sq_off.dropped; + sq->array = ptr + p->sq_off.array; + + size = p->sq_entries * sizeof(struct io_uring_sqe); + sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQES); + if (sq->sqes == MAP_FAILED) { + ret = -errno; +err: + munmap(sq->khead, sq->ring_sz); + return ret; + } + + cq->ring_sz = p->cq_off.cqes + p->cq_entries * sizeof(struct io_uring_cqe); + ptr = mmap(0, cq->ring_sz, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING); + if (ptr == MAP_FAILED) { + ret = -errno; + munmap(sq->sqes, p->sq_entries * sizeof(struct io_uring_sqe)); + goto err; + } + cq->khead = ptr + p->cq_off.head; + cq->ktail = ptr + p->cq_off.tail; + cq->kring_mask = ptr + p->cq_off.ring_mask; + cq->kring_entries = ptr + p->cq_off.ring_entries; + cq->koverflow = ptr + p->cq_off.overflow; + cq->cqes = ptr + p->cq_off.cqes; + return 0; +} + +static inline int io_uring_setup(unsigned int entries, + struct io_uring_params *p) +{ + return syscall(__NR_io_uring_setup, entries, p); +} + +static inline int io_uring_enter(int fd, unsigned int to_submit, + unsigned int min_complete, + unsigned int flags, sigset_t *sig) +{ + return syscall(__NR_io_uring_enter, fd, to_submit, min_complete, + flags, sig, _NSIG / 8); +} + +static inline int io_uring_queue_init(unsigned int entries, + struct io_uring *ring, + unsigned int flags) +{ + struct io_uring_params p; + int fd, ret; + + memset(ring, 0, sizeof(*ring)); + memset(&p, 0, sizeof(p)); + p.flags = flags; + + fd = io_uring_setup(entries, &p); + if (fd < 0) + return fd; + ret = io_uring_mmap(fd, &p, &ring->sq, &ring->cq); + if (!ret) + ring->ring_fd = fd; + else + close(fd); + return ret; +} + +/* Get a sqe */ +static inline struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring) +{ + struct io_uring_sq *sq = &ring->sq; + + if (sq->sqe_tail + 1 - sq->sqe_head > *sq->kring_entries) + return NULL; + return &sq->sqes[sq->sqe_tail++ & *sq->kring_mask]; +} + +static inline int io_uring_wait_cqe(struct io_uring *ring, + struct io_uring_cqe **cqe_ptr) +{ + struct io_uring_cq *cq = &ring->cq; + const unsigned int mask = *cq->kring_mask; + unsigned int head = *cq->khead; + int ret; + + *cqe_ptr = NULL; + do { + read_barrier(); + if (head != *cq->ktail) { + *cqe_ptr = &cq->cqes[head & mask]; + break; + } + ret = io_uring_enter(ring->ring_fd, 0, 1, + IORING_ENTER_GETEVENTS, NULL); + if (ret < 0) + return -errno; + } while (1); + + return 0; +} + +static inline int io_uring_submit(struct io_uring *ring) +{ + struct io_uring_sq *sq = &ring->sq; + const unsigned int mask = *sq->kring_mask; + unsigned int ktail, submitted, to_submit; + int ret; + + read_barrier(); + if (*sq->khead != *sq->ktail) { + submitted = *sq->kring_entries; + goto submit; + } + if (sq->sqe_head == sq->sqe_tail) + return 0; + + ktail = *sq->ktail; + to_submit = sq->sqe_tail - sq->sqe_head; + for (submitted = 0; submitted < to_submit; submitted++) { + read_barrier(); + sq->array[ktail++ & mask] = sq->sqe_head++ & mask; + } + if (!submitted) + return 0; + + if (*sq->ktail != ktail) { + write_barrier(); + *sq->ktail = ktail; + write_barrier(); + } +submit: + ret = io_uring_enter(ring->ring_fd, submitted, 0, + IORING_ENTER_GETEVENTS, NULL); + return ret < 0 ? -errno : ret; +} + +static inline void io_uring_queue_exit(struct io_uring *ring) +{ + struct io_uring_sq *sq = &ring->sq; + + munmap(sq->sqes, *sq->kring_entries * sizeof(struct io_uring_sqe)); + munmap(sq->khead, sq->ring_sz); + close(ring->ring_fd); +} + +/* Prepare and send the SQE */ +static inline void io_uring_prep_cmd(struct io_uring_sqe *sqe, int op, + int sockfd, + int level, int optname, + const void *optval, + int optlen) +{ + memset(sqe, 0, sizeof(*sqe)); + sqe->opcode = (__u8)IORING_OP_URING_CMD; + sqe->fd = sockfd; + sqe->cmd_op = op; + + sqe->level = level; + sqe->optname = optname; + sqe->optval = (unsigned long long)optval; + sqe->optlen = optlen; +} + +static inline int io_uring_register_buffers(struct io_uring *ring, + const struct iovec *iovecs, + unsigned int nr_iovecs) +{ + int ret; + + ret = syscall(__NR_io_uring_register, ring->ring_fd, + IORING_REGISTER_BUFFERS, iovecs, nr_iovecs); + return (ret < 0) ? -errno : ret; +} + +static inline void io_uring_prep_send(struct io_uring_sqe *sqe, int sockfd, + const void *buf, size_t len, int flags) +{ + memset(sqe, 0, sizeof(*sqe)); + sqe->opcode = (__u8)IORING_OP_SEND; + sqe->fd = sockfd; + sqe->addr = (unsigned long)buf; + sqe->len = len; + sqe->msg_flags = (__u32)flags; +} + +static inline void io_uring_prep_sendzc(struct io_uring_sqe *sqe, int sockfd, + const void *buf, size_t len, int flags, + unsigned int zc_flags) +{ + io_uring_prep_send(sqe, sockfd, buf, len, flags); + sqe->opcode = (__u8)IORING_OP_SEND_ZC; + sqe->ioprio = zc_flags; +} + +static inline void io_uring_cqe_seen(struct io_uring *ring) +{ + *(&ring->cq)->khead += 1; + write_barrier(); +} diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 8b017070960d..f8d99837b9dc 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -98,6 +98,7 @@ $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma $(OUTPUT)/tcp_mmap: LDLIBS += -lpthread -lcrypto $(OUTPUT)/tcp_inq: LDLIBS += -lpthread $(OUTPUT)/bind_bhash: LDLIBS += -lpthread +$(OUTPUT)/io_uring_zerocopy_tx: CFLAGS += -I../../../include/ # Rules to generate bpf obj nat6to4.o CLANG ?= clang diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.c b/tools/testing/selftests/net/io_uring_zerocopy_tx.c index 154287740172..76e604e4810e 100644 --- a/tools/testing/selftests/net/io_uring_zerocopy_tx.c +++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.c @@ -36,6 +36,8 @@ #include <sys/un.h> #include <sys/wait.h> +#include <io_uring/mini_liburing.h> + #define NOTIF_TAG 0xfffffffULL #define NONZC_TAG 0 #define ZC_TAG 1 @@ -60,272 +62,6 @@ static struct sockaddr_storage cfg_dst_addr; static char payload[IP_MAXPACKET] __attribute__((aligned(4096))); -struct io_sq_ring { - unsigned *head; - unsigned *tail; - unsigned *ring_mask; - unsigned *ring_entries; - unsigned *flags; - unsigned *array; -}; - -struct io_cq_ring { - unsigned *head; - unsigned *tail; - unsigned *ring_mask; - unsigned *ring_entries; - struct io_uring_cqe *cqes; -}; - -struct io_uring_sq { - unsigned *khead; - unsigned *ktail; - unsigned *kring_mask; - unsigned *kring_entries; - unsigned *kflags; - unsigned *kdropped; - unsigned *array; - struct io_uring_sqe *sqes; - - unsigned sqe_head; - unsigned sqe_tail; - - size_t ring_sz; -}; - -struct io_uring_cq { - unsigned *khead; - unsigned *ktail; - unsigned *kring_mask; - unsigned *kring_entries; - unsigned *koverflow; - struct io_uring_cqe *cqes; - - size_t ring_sz; -}; - -struct io_uring { - struct io_uring_sq sq; - struct io_uring_cq cq; - int ring_fd; -}; - -#ifdef __alpha__ -# ifndef __NR_io_uring_setup -# define __NR_io_uring_setup 535 -# endif -# ifndef __NR_io_uring_enter -# define __NR_io_uring_enter 536 -# endif -# ifndef __NR_io_uring_register -# define __NR_io_uring_register 537 -# endif -#else /* !__alpha__ */ -# ifndef __NR_io_uring_setup -# define __NR_io_uring_setup 425 -# endif -# ifndef __NR_io_uring_enter -# define __NR_io_uring_enter 426 -# endif -# ifndef __NR_io_uring_register -# define __NR_io_uring_register 427 -# endif -#endif - -#if defined(__x86_64) || defined(__i386__) -#define read_barrier() __asm__ __volatile__("":::"memory") -#define write_barrier() __asm__ __volatile__("":::"memory") -#else - -#define read_barrier() __sync_synchronize() -#define write_barrier() __sync_synchronize() -#endif - -static int io_uring_setup(unsigned int entries, struct io_uring_params *p) -{ - return syscall(__NR_io_uring_setup, entries, p); -} - -static int io_uring_enter(int fd, unsigned int to_submit, - unsigned int min_complete, - unsigned int flags, sigset_t *sig) -{ - return syscall(__NR_io_uring_enter, fd, to_submit, min_complete, - flags, sig, _NSIG / 8); -} - -static int io_uring_register_buffers(struct io_uring *ring, - const struct iovec *iovecs, - unsigned nr_iovecs) -{ - int ret; - - ret = syscall(__NR_io_uring_register, ring->ring_fd, - IORING_REGISTER_BUFFERS, iovecs, nr_iovecs); - return (ret < 0) ? -errno : ret; -} - -static int io_uring_mmap(int fd, struct io_uring_params *p, - struct io_uring_sq *sq, struct io_uring_cq *cq) -{ - size_t size; - void *ptr; - int ret; - - sq->ring_sz = p->sq_off.array + p->sq_entries * sizeof(unsigned); - ptr = mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING); - if (ptr == MAP_FAILED) - return -errno; - sq->khead = ptr + p->sq_off.head; - sq->ktail = ptr + p->sq_off.tail; - sq->kring_mask = ptr + p->sq_off.ring_mask; - sq->kring_entries = ptr + p->sq_off.ring_entries; - sq->kflags = ptr + p->sq_off.flags; - sq->kdropped = ptr + p->sq_off.dropped; - sq->array = ptr + p->sq_off.array; - - size = p->sq_entries * sizeof(struct io_uring_sqe); - sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQES); - if (sq->sqes == MAP_FAILED) { - ret = -errno; -err: - munmap(sq->khead, sq->ring_sz); - return ret; - } - - cq->ring_sz = p->cq_off.cqes + p->cq_entries * sizeof(struct io_uring_cqe); - ptr = mmap(0, cq->ring_sz, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING); - if (ptr == MAP_FAILED) { - ret = -errno; - munmap(sq->sqes, p->sq_entries * sizeof(struct io_uring_sqe)); - goto err; - } - cq->khead = ptr + p->cq_off.head; - cq->ktail = ptr + p->cq_off.tail; - cq->kring_mask = ptr + p->cq_off.ring_mask; - cq->kring_entries = ptr + p->cq_off.ring_entries; - cq->koverflow = ptr + p->cq_off.overflow; - cq->cqes = ptr + p->cq_off.cqes; - return 0; -} - -static int io_uring_queue_init(unsigned entries, struct io_uring *ring, - unsigned flags) -{ - struct io_uring_params p; - int fd, ret; - - memset(ring, 0, sizeof(*ring)); - memset(&p, 0, sizeof(p)); - p.flags = flags; - - fd = io_uring_setup(entries, &p); - if (fd < 0) - return fd; - ret = io_uring_mmap(fd, &p, &ring->sq, &ring->cq); - if (!ret) - ring->ring_fd = fd; - else - close(fd); - return ret; -} - -static int io_uring_submit(struct io_uring *ring) -{ - struct io_uring_sq *sq = &ring->sq; - const unsigned mask = *sq->kring_mask; - unsigned ktail, submitted, to_submit; - int ret; - - read_barrier(); - if (*sq->khead != *sq->ktail) { - submitted = *sq->kring_entries; - goto submit; - } - if (sq->sqe_head == sq->sqe_tail) - return 0; - - ktail = *sq->ktail; - to_submit = sq->sqe_tail - sq->sqe_head; - for (submitted = 0; submitted < to_submit; submitted++) { - read_barrier(); - sq->array[ktail++ & mask] = sq->sqe_head++ & mask; - } - if (!submitted) - return 0; - - if (*sq->ktail != ktail) { - write_barrier(); - *sq->ktail = ktail; - write_barrier(); - } -submit: - ret = io_uring_enter(ring->ring_fd, submitted, 0, - IORING_ENTER_GETEVENTS, NULL); - return ret < 0 ? -errno : ret; -} - -static inline void io_uring_prep_send(struct io_uring_sqe *sqe, int sockfd, - const void *buf, size_t len, int flags) -{ - memset(sqe, 0, sizeof(*sqe)); - sqe->opcode = (__u8) IORING_OP_SEND; - sqe->fd = sockfd; - sqe->addr = (unsigned long) buf; - sqe->len = len; - sqe->msg_flags = (__u32) flags; -} - -static inline void io_uring_prep_sendzc(struct io_uring_sqe *sqe, int sockfd, - const void *buf, size_t len, int flags, - unsigned zc_flags) -{ - io_uring_prep_send(sqe, sockfd, buf, len, flags); - sqe->opcode = (__u8) IORING_OP_SEND_ZC; - sqe->ioprio = zc_flags; -} - -static struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring) -{ - struct io_uring_sq *sq = &ring->sq; - - if (sq->sqe_tail + 1 - sq->sqe_head > *sq->kring_entries) - return NULL; - return &sq->sqes[sq->sqe_tail++ & *sq->kring_mask]; -} - -static int io_uring_wait_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr) -{ - struct io_uring_cq *cq = &ring->cq; - const unsigned mask = *cq->kring_mask; - unsigned head = *cq->khead; - int ret; - - *cqe_ptr = NULL; - do { - read_barrier(); - if (head != *cq->ktail) { - *cqe_ptr = &cq->cqes[head & mask]; - break; - } - ret = io_uring_enter(ring->ring_fd, 0, 1, - IORING_ENTER_GETEVENTS, NULL); - if (ret < 0) - return -errno; - } while (1); - - return 0; -} - -static inline void io_uring_cqe_seen(struct io_uring *ring) -{ - *(&ring->cq)->khead += 1; - write_barrier(); -} - static unsigned long gettimeofday_ms(void) { struct timeval tv; -- 2.34.1

2 years, 3 months

1
0
0 0

[PATCH v2 0/8] Add printf attribute to kselftest functions

by Wieczor-Retman Maciej

Kselftest.h declares many variadic functions that can print some formatted message while also executing selftest logic. These declarations don't have any compiler mechanism to verify if passed arguments are valid in comparison with format specifiers used in printf() calls. Attribute addition can make debugging easier, the code more consistent and prevent mismatched or missing variables. Add a __printf() macro that validates types of variables passed to the format string. The macro is similarly used in other tools in the kernel. Add __printf() attributes to function definitions inside kselftest.h that use printing. Adding the __printf() macro exposes some mismatches in format strings across different selftests. Fix the mismatched format specifiers in multiple tests. Changelog v2: - Add review and fixes tags to patches. - Add two patches with mismatch fixes. - Fix missed attribute in selftests/kvm. (Andrew) - Fix previously missed issues in selftests/mm (Ilpo) Wieczor-Retman Maciej (8): selftests: Add printf attribute to ksefltest prints selftests/cachestat: Fix print_cachestat format selftests/openat2: Fix wrong format specifier selftests/pidfd: Fix ksft print formats selftests/sigaltstack: Fix wrong format specifier selftests/kvm: Replace attribute with macro selftests/mm: Substitute attribute with a macro selftests/resctrl: Fix wrong format specifier .../selftests/cachestat/test_cachestat.c | 2 +- tools/testing/selftests/kselftest.h | 18 ++++++++++-------- .../testing/selftests/kvm/include/test_util.h | 8 ++++---- tools/testing/selftests/mm/mremap_test.c | 2 +- tools/testing/selftests/mm/pkey-helpers.h | 2 +- tools/testing/selftests/openat2/openat2_test.c | 2 +- .../selftests/pidfd/pidfd_fdinfo_test.c | 2 +- tools/testing/selftests/pidfd/pidfd_test.c | 12 ++++++------ tools/testing/selftests/resctrl/cache.c | 2 +- tools/testing/selftests/sigaltstack/sas.c | 2 +- 10 files changed, 27 insertions(+), 25 deletions(-) base-commit: 9b1db732866bee060b9bca9493e5ebf5e8874c48 -- 2.42.0

2 years, 3 months

2
9
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-kselftest-mirror