This patchset introduces batched operations for the per-cpu variant of the array map.
It also introduces a standard way to define per-cpu values via the 'BPF_PERCPU_TYPE()' macro, which handles the alignment transparently. This was already implemented in the selftests and was merely refactored out to libbpf, with some simplifications for reuse.
The tests were updated to reflect all the new changes.
Pedro Tammela (3): bpf: add batched ops support for percpu array libbpf: selftests: refactor 'BPF_PERCPU_TYPE()' and 'bpf_percpu()' macros bpf: selftests: update array map tests for per-cpu batched ops
kernel/bpf/arraymap.c | 2 + tools/lib/bpf/bpf.h | 10 ++ tools/testing/selftests/bpf/bpf_util.h | 7 -- .../bpf/map_tests/array_map_batch_ops.c | 114 +++++++++++++----- .../bpf/map_tests/htab_map_batch_ops.c | 48 ++++---- .../selftests/bpf/prog_tests/map_init.c | 5 +- tools/testing/selftests/bpf/test_maps.c | 16 +-- 7 files changed, 133 insertions(+), 69 deletions(-)
Suggested-by: Jamal Hadi Salim jhs@mojatatu.com Signed-off-by: Pedro Tammela pctammela@mojatatu.com --- kernel/bpf/arraymap.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 463d25e1e67e..3c4105603f9d 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -698,6 +698,8 @@ const struct bpf_map_ops percpu_array_map_ops = { .map_delete_elem = array_map_delete_elem, .map_seq_show_elem = percpu_array_map_seq_show_elem, .map_check_btf = array_map_check_btf, + .map_lookup_batch = generic_map_lookup_batch, + .map_update_batch = generic_map_update_batch, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_array_elem, .map_btf_name = "bpf_array",
Pedro Tammela wrote:
Suggested-by: Jamal Hadi Salim jhs@mojatatu.com Signed-off-by: Pedro Tammela pctammela@mojatatu.com
A commit message describing some of the change details and a note it uses the for-each cpu copies (same as normal syscall on percpu map) and not the per-cpu ones would be nice. I at least had to go and check the generic_map* batch operations.
Also something about why generic_map_delete_batch is omitted?
kernel/bpf/arraymap.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 463d25e1e67e..3c4105603f9d 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -698,6 +698,8 @@ const struct bpf_map_ops percpu_array_map_ops = { .map_delete_elem = array_map_delete_elem, .map_seq_show_elem = percpu_array_map_seq_show_elem, .map_check_btf = array_map_check_btf,
- .map_lookup_batch = generic_map_lookup_batch,
- .map_update_batch = generic_map_update_batch, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_array_elem, .map_btf_name = "bpf_array",
-- 2.25.1
This macro was refactored out of the bpf selftests.
Since percpu values are rounded up to '8' in the kernel, a careless user in userspace might encounter unexpected values when parsing the output of the batched operations.
Now that both array and hash maps have support for batched ops in the percpu variant, let's provide a convenient macro to declare percpu map value types.
Updates the tests to a "reference" usage of the new macro.
Signed-off-by: Pedro Tammela pctammela@mojatatu.com --- tools/lib/bpf/bpf.h | 10 ++++ tools/testing/selftests/bpf/bpf_util.h | 7 --- .../bpf/map_tests/htab_map_batch_ops.c | 48 ++++++++++--------- .../selftests/bpf/prog_tests/map_init.c | 5 +- tools/testing/selftests/bpf/test_maps.c | 16 ++++--- 5 files changed, 46 insertions(+), 40 deletions(-)
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 875dde20d56e..5feace6960e3 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -128,6 +128,16 @@ LIBBPF_API int bpf_map_delete_elem(int fd, const void *key); LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key); LIBBPF_API int bpf_map_freeze(int fd);
+#define __bpf_percpu_align __attribute__((__aligned__(8))) + +#define BPF_PERCPU_TYPE(type) \ + struct { \ + type v; \ + /* padding */ \ + } __bpf_percpu_align + +#define bpf_percpu(name, cpu) ((name)[(cpu)].v) + struct bpf_map_batch_opts { size_t sz; /* size of this struct for forward/backward compatibility */ __u64 elem_flags; diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h index a3352a64c067..105db3120ab4 100644 --- a/tools/testing/selftests/bpf/bpf_util.h +++ b/tools/testing/selftests/bpf/bpf_util.h @@ -20,13 +20,6 @@ static inline unsigned int bpf_num_possible_cpus(void) return possible_cpus; }
-#define __bpf_percpu_val_align __attribute__((__aligned__(8))) - -#define BPF_DECLARE_PERCPU(type, name) \ - struct { type v; /* padding */ } __bpf_percpu_val_align \ - name[bpf_num_possible_cpus()] -#define bpf_percpu(name, cpu) name[(cpu)].v - #ifndef ARRAY_SIZE # define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #endif diff --git a/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c index 976bf415fbdd..3909e3980094 100644 --- a/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c +++ b/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c @@ -10,27 +10,31 @@ #include <bpf_util.h> #include <test_maps.h>
+typedef BPF_PERCPU_TYPE(int) pcpu_map_value_t; + static void map_batch_update(int map_fd, __u32 max_entries, int *keys, void *values, bool is_pcpu) { - typedef BPF_DECLARE_PERCPU(int, value); - value *v = NULL; + unsigned int nr_cpus = bpf_num_possible_cpus(); + pcpu_map_value_t *v; int i, j, err; + int offset = 0; DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts, .elem_flags = 0, .flags = 0, );
if (is_pcpu) - v = (value *)values; + v = values;
for (i = 0; i < max_entries; i++) { keys[i] = i + 1; if (is_pcpu) - for (j = 0; j < bpf_num_possible_cpus(); j++) - bpf_percpu(v[i], j) = i + 2 + j; + for (j = 0; j < nr_cpus; j++) + bpf_percpu(v + offset, j) = i + 2 + j; else ((int *)values)[i] = i + 2; + offset += nr_cpus; }
err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &opts); @@ -40,22 +44,23 @@ static void map_batch_update(int map_fd, __u32 max_entries, int *keys, static void map_batch_verify(int *visited, __u32 max_entries, int *keys, void *values, bool is_pcpu) { - typedef BPF_DECLARE_PERCPU(int, value); - value *v = NULL; + unsigned int nr_cpus = bpf_num_possible_cpus(); + pcpu_map_value_t *v; int i, j; + int offset = 0;
if (is_pcpu) - v = (value *)values; + v = values;
memset(visited, 0, max_entries * sizeof(*visited)); for (i = 0; i < max_entries; i++) { - if (is_pcpu) { - for (j = 0; j < bpf_num_possible_cpus(); j++) { - CHECK(keys[i] + 1 + j != bpf_percpu(v[i], j), + for (j = 0; j < nr_cpus; j++) { + int value = bpf_percpu(v + offset, j); + CHECK(keys[i] + 1 + j != value, "key/value checking", - "error: i %d j %d key %d value %d\n", - i, j, keys[i], bpf_percpu(v[i], j)); + "error: i %d j %d key %d value %d\n", i, + j, keys[i], value); } } else { CHECK(keys[i] + 1 != ((int *)values)[i], @@ -63,9 +68,8 @@ static void map_batch_verify(int *visited, __u32 max_entries, "error: i %d key %d value %d\n", i, keys[i], ((int *)values)[i]); } - + offset += nr_cpus; visited[i] = 1; - } for (i = 0; i < max_entries; i++) { CHECK(visited[i] != 1, "visited checking", @@ -75,11 +79,10 @@ static void map_batch_verify(int *visited, __u32 max_entries,
void __test_map_lookup_and_delete_batch(bool is_pcpu) { + unsigned int nr_cpus = bpf_num_possible_cpus(); __u32 batch, count, total, total_success; - typedef BPF_DECLARE_PERCPU(int, value); int map_fd, *keys, *visited, key; const __u32 max_entries = 10; - value pcpu_values[max_entries]; int err, step, value_size; bool nospace_err; void *values; @@ -100,12 +103,13 @@ void __test_map_lookup_and_delete_batch(bool is_pcpu) CHECK(map_fd == -1, "bpf_create_map_xattr()", "error:%s\n", strerror(errno));
- value_size = is_pcpu ? sizeof(value) : sizeof(int); - keys = malloc(max_entries * sizeof(int)); if (is_pcpu) - values = pcpu_values; + value_size = sizeof(pcpu_map_value_t) * nr_cpus; else - values = malloc(max_entries * sizeof(int)); + value_size = sizeof(int); + + keys = malloc(max_entries * sizeof(int)); + values = calloc(max_entries, value_size); visited = malloc(max_entries * sizeof(int)); CHECK(!keys || !values || !visited, "malloc()", "error:%s\n", strerror(errno)); @@ -203,7 +207,7 @@ void __test_map_lookup_and_delete_batch(bool is_pcpu) CHECK(total != max_entries, "delete with steps", "total = %u, max_entries = %u\n", total, max_entries);
- /* check map is empty, errono == ENOENT */ + /* check map is empty, errno == ENOENT */ err = bpf_map_get_next_key(map_fd, NULL, &key); CHECK(!err || errno != ENOENT, "bpf_map_get_next_key()", "error: %s\n", strerror(errno)); diff --git a/tools/testing/selftests/bpf/prog_tests/map_init.c b/tools/testing/selftests/bpf/prog_tests/map_init.c index 14a31109dd0e..ec3d010319cc 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_init.c +++ b/tools/testing/selftests/bpf/prog_tests/map_init.c @@ -12,10 +12,7 @@ static int duration;
typedef unsigned long long map_key_t; typedef unsigned long long map_value_t; -typedef struct { - map_value_t v; /* padding */ -} __bpf_percpu_val_align pcpu_map_value_t; - +typedef BPF_PERCPU_TYPE(map_value_t) pcpu_map_value_t;
static int map_populate(int map_fd, int num) { diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 51adc42b2b40..6acbebef5f90 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -30,6 +30,8 @@ #define ENOTSUPP 524 #endif
+typedef BPF_PERCPU_TYPE(long) pcpu_map_value_t; + static int skips;
static int map_flags; @@ -147,13 +149,13 @@ static void test_hashmap_sizes(unsigned int task, void *data) static void test_hashmap_percpu(unsigned int task, void *data) { unsigned int nr_cpus = bpf_num_possible_cpus(); - BPF_DECLARE_PERCPU(long, value); + pcpu_map_value_t value[nr_cpus]; long long key, next_key, first_key; int expected_key_mask = 0; int fd, i;
- fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_HASH, sizeof(key), - sizeof(bpf_percpu(value, 0)), 2, map_flags); + fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_HASH, sizeof(key), sizeof(long), + 2, map_flags); if (fd < 0) { printf("Failed to create hashmap '%s'!\n", strerror(errno)); exit(1); @@ -400,11 +402,11 @@ static void test_arraymap(unsigned int task, void *data) static void test_arraymap_percpu(unsigned int task, void *data) { unsigned int nr_cpus = bpf_num_possible_cpus(); - BPF_DECLARE_PERCPU(long, values); + pcpu_map_value_t values[nr_cpus]; int key, next_key, fd, i;
fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key), - sizeof(bpf_percpu(values, 0)), 2, 0); + sizeof(long), 2, 0); if (fd < 0) { printf("Failed to create arraymap '%s'!\n", strerror(errno)); exit(1); @@ -459,7 +461,7 @@ static void test_arraymap_percpu(unsigned int task, void *data) static void test_arraymap_percpu_many_keys(void) { unsigned int nr_cpus = bpf_num_possible_cpus(); - BPF_DECLARE_PERCPU(long, values); + pcpu_map_value_t values[nr_cpus]; /* nr_keys is not too large otherwise the test stresses percpu * allocator more than anything else */ @@ -467,7 +469,7 @@ static void test_arraymap_percpu_many_keys(void) int key, fd, i;
fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key), - sizeof(bpf_percpu(values, 0)), nr_keys, 0); + sizeof(long), nr_keys, 0); if (fd < 0) { printf("Failed to create per-cpu arraymap '%s'!\n", strerror(errno));
Pedro Tammela wrote:
This macro was refactored out of the bpf selftests.
Since percpu values are rounded up to '8' in the kernel, a careless user in userspace might encounter unexpected values when parsing the output of the batched operations.
Now that both array and hash maps have support for batched ops in the percpu variant, let's provide a convenient macro to declare percpu map value types.
Updates the tests to a "reference" usage of the new macro.
Signed-off-by: Pedro Tammela pctammela@mojatatu.com
Other than the initial patch needing a bit of description the series looks good to me. Thanks.
Follows the same logic as the hashtable tests.
Signed-off-by: Pedro Tammela pctammela@mojatatu.com --- .../bpf/map_tests/array_map_batch_ops.c | 114 +++++++++++++----- 1 file changed, 85 insertions(+), 29 deletions(-)
diff --git a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c index e42ea1195d18..e71b5fbf41b4 100644 --- a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c +++ b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c @@ -7,35 +7,68 @@ #include <bpf/bpf.h> #include <bpf/libbpf.h>
+#include <bpf_util.h> #include <test_maps.h>
+typedef BPF_PERCPU_TYPE(int) pcpu_map_value_t; + static void map_batch_update(int map_fd, __u32 max_entries, int *keys, - int *values) + void *values, bool is_pcpu) { - int i, err; + unsigned int nr_cpus = bpf_num_possible_cpus(); + pcpu_map_value_t *v; + int i, j, err; + int offset = 0; DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts, .elem_flags = 0, .flags = 0, );
+ if (is_pcpu) + v = values; + for (i = 0; i < max_entries; i++) { keys[i] = i; - values[i] = i + 1; + if (is_pcpu) + for (j = 0; j < nr_cpus; j++) + bpf_percpu(v + offset, j) = i + 1 + j; + else + ((int *)values)[i] = i + 1; + offset += nr_cpus; }
err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &opts); CHECK(err, "bpf_map_update_batch()", "error:%s\n", strerror(errno)); }
-static void map_batch_verify(int *visited, __u32 max_entries, - int *keys, int *values) +static void map_batch_verify(int *visited, __u32 max_entries, int *keys, + void *values, bool is_pcpu) { - int i; + unsigned int nr_cpus = bpf_num_possible_cpus(); + pcpu_map_value_t *v; + int i, j; + int offset = 0; + + if (is_pcpu) + v = values;
memset(visited, 0, max_entries * sizeof(*visited)); for (i = 0; i < max_entries; i++) { - CHECK(keys[i] + 1 != values[i], "key/value checking", - "error: i %d key %d value %d\n", i, keys[i], values[i]); + if (is_pcpu) { + for (j = 0; j < nr_cpus; j++) { + int value = bpf_percpu(v + offset, j); + CHECK(keys[i] + j + 1 != value, + "key/value checking", + "error: i %d j %d key %d value %d\n", i, + j, keys[i], value); + } + } else { + CHECK(keys[i] + 1 != ((int *)values)[i], + "key/value checking", + "error: i %d key %d value %d\n", i, keys[i], + ((int *)values)[i]); + } + offset += nr_cpus; visited[i] = 1; } for (i = 0; i < max_entries; i++) { @@ -44,19 +77,22 @@ static void map_batch_verify(int *visited, __u32 max_entries, } }
-void test_array_map_batch_ops(void) +void __test_map_lookup_and_update_batch(bool is_pcpu) { + unsigned int nr_cpus = bpf_num_possible_cpus(); struct bpf_create_map_attr xattr = { .name = "array_map", - .map_type = BPF_MAP_TYPE_ARRAY, + .map_type = is_pcpu ? BPF_MAP_TYPE_PERCPU_ARRAY : + BPF_MAP_TYPE_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), }; - int map_fd, *keys, *values, *visited; + int map_fd, *keys, *visited; __u32 count, total, total_success; const __u32 max_entries = 10; __u64 batch = 0; - int err, step; + int err, step, value_size; + void *values; DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts, .elem_flags = 0, .flags = 0, @@ -67,22 +103,24 @@ void test_array_map_batch_ops(void) CHECK(map_fd == -1, "bpf_create_map_xattr()", "error:%s\n", strerror(errno));
- keys = malloc(max_entries * sizeof(int)); - values = malloc(max_entries * sizeof(int)); - visited = malloc(max_entries * sizeof(int)); + if (is_pcpu) + value_size = sizeof(pcpu_map_value_t) * nr_cpus; + else + value_size = sizeof(int); + + keys = malloc(max_entries * sizeof(*keys)); + values = calloc(max_entries, value_size); + visited = malloc(max_entries * sizeof(*visited)); CHECK(!keys || !values || !visited, "malloc()", "error:%s\n", strerror(errno));
- /* populate elements to the map */ - map_batch_update(map_fd, max_entries, keys, values); - /* test 1: lookup in a loop with various steps. */ total_success = 0; for (step = 1; step < max_entries; step++) { - map_batch_update(map_fd, max_entries, keys, values); - map_batch_verify(visited, max_entries, keys, values); + map_batch_update(map_fd, max_entries, keys, values, is_pcpu); + map_batch_verify(visited, max_entries, keys, values, is_pcpu); memset(keys, 0, max_entries * sizeof(*keys)); - memset(values, 0, max_entries * sizeof(*values)); + memset(values, 0, max_entries * value_size); batch = 0; total = 0; /* iteratively lookup/delete elements with 'step' @@ -91,10 +129,10 @@ void test_array_map_batch_ops(void) count = step; while (true) { err = bpf_map_lookup_batch(map_fd, - total ? &batch : NULL, &batch, - keys + total, - values + total, - &count, &opts); + total ? &batch : NULL, + &batch, keys + total, + values + total * value_size, + &count, &opts);
CHECK((err && errno != ENOENT), "lookup with steps", "error: %s\n", strerror(errno)); @@ -108,7 +146,7 @@ void test_array_map_batch_ops(void) CHECK(total != max_entries, "lookup with steps", "total = %u, max_entries = %u\n", total, max_entries);
- map_batch_verify(visited, max_entries, keys, values); + map_batch_verify(visited, max_entries, keys, values, is_pcpu);
total_success++; } @@ -116,9 +154,27 @@ void test_array_map_batch_ops(void) CHECK(total_success == 0, "check total_success", "unexpected failure\n");
- printf("%s:PASS\n", __func__); - free(keys); - free(values); free(visited); + + if (!is_pcpu) + free(values); +} + +void array_map_batch_ops(void) +{ + __test_map_lookup_and_update_batch(false); + printf("test_%s:PASS\n", __func__); +} + +void array_percpu_map_batch_ops(void) +{ + __test_map_lookup_and_update_batch(true); + printf("test_%s:PASS\n", __func__); +} + +void test_array_map_batch_ops(void) +{ + array_map_batch_ops(); + array_percpu_map_batch_ops(); }
linux-kselftest-mirror@lists.linaro.org