Replacing the boolean field with an enum simplifies the addition of new load types. Currently, the bpf_program structure defines the autoload type using a boolean field. This field is now replaced with an enum, allowing new BPF program loading types to be introduced by extending the enum value range.
This patch is the first in a series of two.
Signed-off-by: Slava Imameev slava.imameev@crowdstrike.com --- tools/lib/bpf/libbpf.c | 50 ++++++++++++------- tools/lib/bpf/libbpf.h | 16 ++++++ tools/lib/bpf/libbpf.map | 2 + .../selftests/bpf/prog_tests/load_type.c | 40 +++++++++++++++ .../selftests/bpf/progs/test_load_type.c | 23 +++++++++ 5 files changed, 112 insertions(+), 19 deletions(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/load_type.c create mode 100644 tools/testing/selftests/bpf/progs/test_load_type.c
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 194809da5172..9af5c0b08b8b 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -473,7 +473,7 @@ struct bpf_program { struct bpf_object *obj;
int fd; - bool autoload; + enum bpf_prog_load_type load_type; bool autoattach; bool sym_global; bool mark_btf_static; @@ -824,11 +824,11 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, * autoload set to false. */ if (sec_name[0] == '?') { - prog->autoload = false; + prog->load_type = BPF_PROG_LOAD_TYPE_DISABLED; /* from now on forget there was ? in section name */ sec_name++; } else { - prog->autoload = true; + prog->load_type = BPF_PROG_LOAD_TYPE_AUTO; }
prog->autoattach = true; @@ -1117,7 +1117,8 @@ static int bpf_object_adjust_struct_ops_autoload(struct bpf_object *obj) } } if (use_cnt) - prog->autoload = should_load; + prog->load_type = should_load ? BPF_PROG_LOAD_TYPE_AUTO + : BPF_PROG_LOAD_TYPE_DISABLED; }
return 0; @@ -1202,7 +1203,7 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map) * then bpf_object_adjust_struct_ops_autoload() will update its * autoload accordingly. */ - st_ops->progs[i]->autoload = false; + st_ops->progs[i]->load_type = BPF_PROG_LOAD_TYPE_DISABLED; st_ops->progs[i] = NULL; }
@@ -1241,7 +1242,7 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map) * if user replaced it with another program or NULL */ if (st_ops->progs[i] && st_ops->progs[i] != prog) - st_ops->progs[i]->autoload = false; + st_ops->progs[i]->load_type = BPF_PROG_LOAD_TYPE_DISABLED;
/* Update the value from the shadow type */ st_ops->progs[i] = prog; @@ -3482,7 +3483,7 @@ static bool obj_needs_vmlinux_btf(const struct bpf_object *obj) }
bpf_object__for_each_program(prog, obj) { - if (!prog->autoload) + if (prog->load_type == BPF_PROG_LOAD_TYPE_DISABLED) continue; if (prog_needs_vmlinux_btf(prog)) return true; @@ -5973,7 +5974,7 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) /* no need to apply CO-RE relocation if the program is * not going to be loaded */ - if (!prog->autoload) + if (prog->load_type == BPF_PROG_LOAD_TYPE_DISABLED) continue;
/* adjust insn_idx from section frame of reference to the local @@ -7106,7 +7107,7 @@ static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_pat */ if (prog_is_subprog(obj, prog)) continue; - if (!prog->autoload) + if (prog->load_type == BPF_PROG_LOAD_TYPE_DISABLED) continue;
err = bpf_object__relocate_calls(obj, prog); @@ -7142,7 +7143,7 @@ static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_pat prog = &obj->programs[i]; if (prog_is_subprog(obj, prog)) continue; - if (!prog->autoload) + if (prog->load_type == BPF_PROG_LOAD_TYPE_DISABLED) continue;
/* Process data relos for main programs */ @@ -7906,8 +7907,8 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) prog = &obj->programs[i]; if (prog_is_subprog(obj, prog)) continue; - if (!prog->autoload) { - pr_debug("prog '%s': skipped loading\n", prog->name); + if (prog->load_type != BPF_PROG_LOAD_TYPE_AUTO) { + pr_debug("prog '%s': skipped auto-loading\n", prog->name); continue; } prog->log_level |= log_level; @@ -9224,16 +9225,13 @@ const char *bpf_program__section_name(const struct bpf_program *prog)
bool bpf_program__autoload(const struct bpf_program *prog) { - return prog->autoload; + return prog->load_type == BPF_PROG_LOAD_TYPE_AUTO; }
int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) { - if (prog->obj->loaded) - return libbpf_err(-EINVAL); - - prog->autoload = autoload; - return 0; + return bpf_program__set_load_type(prog, + autoload ? BPF_PROG_LOAD_TYPE_AUTO : BPF_PROG_LOAD_TYPE_DISABLED); }
bool bpf_program__autoattach(const struct bpf_program *prog) @@ -13983,7 +13981,7 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) struct bpf_program *prog = *prog_skel->prog; struct bpf_link **link = prog_skel->link;
- if (!prog->autoload || !prog->autoattach) + if (prog->load_type != BPF_PROG_LOAD_TYPE_AUTO || !prog->autoattach) continue;
/* auto-attaching not supported for this program */ @@ -14087,3 +14085,17 @@ void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s) free(s->progs); free(s); } + +int bpf_program__set_load_type(struct bpf_program *prog, enum bpf_prog_load_type type) +{ + if (prog->obj->loaded) + return libbpf_err(-EINVAL); + + prog->load_type = type; + return 0; +} + +enum bpf_prog_load_type bpf_program__load_type(const struct bpf_program *prog) +{ + return prog->load_type; +} diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 3020ee45303a..21e3d1f51cb3 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -1918,6 +1918,22 @@ LIBBPF_API int libbpf_register_prog_handler(const char *sec, */ LIBBPF_API int libbpf_unregister_prog_handler(int handler_id);
+/** + * The program load type: + * + * - BPF_PROG_LOAD_TYPE_DISABLED: the program is not loaded. + * - BPF_PROG_LOAD_TYPE_AUTO: the program is autoloaded when the bpf_object is loaded. + */ +enum bpf_prog_load_type { + BPF_PROG_LOAD_TYPE_DISABLED = 0, + BPF_PROG_LOAD_TYPE_AUTO, +}; + +LIBBPF_API int bpf_program__set_load_type(struct bpf_program *prog, + enum bpf_prog_load_type loadtype); +LIBBPF_API enum bpf_prog_load_type bpf_program__load_type(const struct bpf_program *prog); + + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index a8b2936a1646..08323e7930fd 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -436,4 +436,6 @@ LIBBPF_1.6.0 { bpf_linker__add_buf; bpf_linker__add_fd; bpf_linker__new_fd; + bpf_program__load_type; + bpf_program__set_load_type; } LIBBPF_1.5.0; diff --git a/tools/testing/selftests/bpf/prog_tests/load_type.c b/tools/testing/selftests/bpf/prog_tests/load_type.c new file mode 100644 index 000000000000..7c8d55173b2b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/load_type.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include <time.h> +#include "test_load_type.skel.h" + +void test_load_type(void) +{ + int duration = 0, err; + struct test_load_type *skel; + + skel = test_load_type__open(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + goto cleanup; + + /* don't load prog1 */ + bpf_program__set_load_type(skel->progs.prog1, BPF_PROG_LOAD_TYPE_DISABLED); + + /* load and attach prog2 */ + bpf_program__set_load_type(skel->progs.prog2, BPF_PROG_LOAD_TYPE_AUTO); + CHECK(!bpf_program__autoload(skel->progs.prog2), "prog2", "not autoload?!\n"); + + err = test_load_type__load(skel); + if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err)) + goto cleanup; + + CHECK(!bpf_program__autoattach(skel->progs.prog2), "prog2", "not autoattach?!\n"); + + err = test_load_type__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + usleep(1); + + CHECK(skel->bss->prog1_called, "prog1", "called?!\n"); + CHECK(!skel->bss->prog2_called, "prog2", "not called\n"); + +cleanup: + test_load_type__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_load_type.c b/tools/testing/selftests/bpf/progs/test_load_type.c new file mode 100644 index 000000000000..a0d39757c5b9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_load_type.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +bool prog1_called = false; +bool prog2_called = false; + +SEC("raw_tp/sys_enter") +int prog1(const void *ctx) +{ + prog1_called = true; + return 0; +} + +SEC("raw_tp/sys_enter") +int prog2(const void *ctx) +{ + prog2_called = true; + return 0; +} + +char _license[] SEC("license") = "GPL";
BPF programs designated as dynamically loaded can be loaded and attached independently after the initial bpf_object loading and attaching.
These programs can also be reloaded and reattached multiple times, enabling more flexible management of a resident BPF program set.
A key motivation for this feature is to reduce load times for utilities that include hundreds of BPF programs. When the selection of a resident BPF program set cannot be determined at the time of bpf_object loading and attaching, all BPF programs would otherwise need to be marked as autoload, leading to unnecessary overhead. This patch addresses that inefficiency.
Signed-off-by: Slava Imameev slava.imameev@crowdstrike.com --- tools/lib/bpf/libbpf.c | 144 +++++++++++++++-- tools/lib/bpf/libbpf.h | 5 +- tools/lib/bpf/libbpf.map | 2 + .../selftests/bpf/prog_tests/dynamicload.c | 145 ++++++++++++++++++ .../selftests/bpf/prog_tests/load_type.c | 61 ++++++++ .../selftests/bpf/progs/test_dynamicload.c | 31 ++++ .../selftests/bpf/progs/test_load_type.c | 8 + 7 files changed, 385 insertions(+), 11 deletions(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/dynamicload.c create mode 100644 tools/testing/selftests/bpf/progs/test_dynamicload.c
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 9af5c0b08b8b..731a4a09f865 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -689,6 +689,7 @@ struct bpf_object { bool loaded; bool has_subcalls; bool has_rodata; + bool has_dynload_progs;
struct bpf_gen *gen_loader;
@@ -7551,13 +7552,15 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog * custom log_buf is specified; if the program load fails, then we'll * bump log_level to 1 and use either custom log_buf or we'll allocate * our own and retry the load to get details on what failed + * A shared buffer cannot be used for dynamically loaded programs as they + * can be loaded concurrently. */ if (log_level) { if (prog->log_buf) { log_buf = prog->log_buf; log_buf_size = prog->log_size; own_log_buf = false; - } else if (obj->log_buf) { + } else if (obj->log_buf && prog->load_type != BPF_PROG_LOAD_TYPE_DYNAMIC) { log_buf = obj->log_buf; log_buf_size = obj->log_size; own_log_buf = false; @@ -7911,6 +7914,7 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) pr_debug("prog '%s': skipped auto-loading\n", prog->name); continue; } + prog->log_level |= log_level;
if (obj->gen_loader) @@ -8588,8 +8592,11 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps); }
- /* clean up fd_array */ - zfree(&obj->fd_array); + /* The fd array is needed for dynamically loaded programs, + * so defer freeing it in that case to the end of the object lifetime. + */ + if (!obj->has_dynload_progs || !obj->fd_array_cnt) + zfree(&obj->fd_array);
/* clean up module BTFs */ for (i = 0; i < obj->btf_module_cnt; i++) { @@ -8597,11 +8604,17 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch btf__free(obj->btf_modules[i].btf); free(obj->btf_modules[i].name); } - free(obj->btf_modules); + obj->btf_module_cnt = 0; + zfree(&obj->btf_modules);
- /* clean up vmlinux BTF */ - btf__free(obj->btf_vmlinux); - obj->btf_vmlinux = NULL; + /* The btf_vmlinux data is needed for dynamically loaded programs, + * so defer freeing it in that case to the end of the object lifetime. + */ + if (!obj->has_dynload_progs) { + /* clean up vmlinux BTF */ + btf__free(obj->btf_vmlinux); + obj->btf_vmlinux = NULL; + }
obj->loaded = true; /* doesn't matter if successfully or not */
@@ -9103,6 +9116,8 @@ void bpf_object__close(struct bpf_object *obj)
zfree(&obj->arena_data);
+ zfree(&obj->fd_array); + free(obj); }
@@ -9230,8 +9245,16 @@ bool bpf_program__autoload(const struct bpf_program *prog)
int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) { - return bpf_program__set_load_type(prog, - autoload ? BPF_PROG_LOAD_TYPE_AUTO : BPF_PROG_LOAD_TYPE_DISABLED); + enum bpf_prog_load_type type = prog->load_type; + + if (autoload) + type = BPF_PROG_LOAD_TYPE_AUTO; + else if (prog->load_type == BPF_PROG_LOAD_TYPE_AUTO) + type = BPF_PROG_LOAD_TYPE_DISABLED; + else + return 0; /* Otherwise, keep the current load type. */ + + return bpf_program__set_load_type(prog, type); }
bool bpf_program__autoattach(const struct bpf_program *prog) @@ -14086,12 +14109,67 @@ void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s) free(s); }
+static int bpf_program__set_dynamicload(struct bpf_program *prog) +{ + struct bpf_object *obj; + const char *attach_name; + + obj = prog->obj; + if (!obj) + return libbpf_err(-EINVAL); + + /* Dynamically loaded programs are not supported for gen_loader. + * This limitation exists because bpf_object_load_prog is not invoked + * for dynamically loaded programs, making them invisible to gen_loader. + * To ensure compatibility, bpf_program__set_dynamicload should not be + * called when gen_loader is used to generate a BPF object loader. + * The gen_loader implementation handles autoloaded programs and follows + * its own model for loading BPF programs. To pass a BPF program to + * gen_loader, set the program's load type to BPF_PROG_LOAD_TYPE_AUTO. + */ + if (obj->gen_loader) + return libbpf_err(-ENOTSUP); + + if (prog_is_subprog(obj, prog)) + return libbpf_err(-EINVAL); + + attach_name = strchr(prog->sec_name, '/'); + if (!attach_name || strchr(attach_name, ':')) { + /* Dynamic loading is not supported if module's BTF + * data is required for a bpf program. + * The module's BTF data is required in the folowing cases: + * - If a BPF program is annotated with just SEC("fentry") + * (or similar) without declaratively specifying + * target, then it is expected that target will be + * specified with bpf_program__set_attach_target() at + * runtime before BPF object load step. The module's + * BTF data will be required by libbpf_prepare_prog_load and + * libbpf_find_attach_btf_id. + * - The attach name is prepended with a module name. + */ + return libbpf_err(-EINVAL); + } + + obj->has_dynload_progs = true; + prog->load_type = BPF_PROG_LOAD_TYPE_DYNAMIC; + prog->autoattach = false; + + return 0; +} + int bpf_program__set_load_type(struct bpf_program *prog, enum bpf_prog_load_type type) { if (prog->obj->loaded) return libbpf_err(-EINVAL);
- prog->load_type = type; + switch (type) { + case BPF_PROG_LOAD_TYPE_DYNAMIC: + return bpf_program__set_dynamicload(prog); + default: + prog->load_type = type; + break; + } + return 0; }
@@ -14099,3 +14177,49 @@ enum bpf_prog_load_type bpf_program__load_type(const struct bpf_program *prog) { return prog->load_type; } + +/* + * This function must be called after bpf_object__load_progs. + * Dynamically-loaded program data is initialized on object load. + * Post-load initialization is not supported. + */ +int +bpf_program__load_dynamically(struct bpf_program *prog, int extra_log_level) +{ + int err; + struct bpf_object *obj; + + obj = prog->obj; + if (!obj || !obj->loaded) + return libbpf_err(-EINVAL); + + if (prog_is_subprog(obj, prog) || prog->load_type != BPF_PROG_LOAD_TYPE_DYNAMIC) + return libbpf_err(-EINVAL); + + prog->log_level |= extra_log_level; + + err = bpf_object_load_prog(obj, prog, prog->insns, prog->insns_cnt, + obj->license, obj->kern_version, &prog->fd); + if (err) { + pr_warn("prog '%s': failed to dynamically load: %d\n", prog->name, err); + prog->log_level &= ~extra_log_level; + return err; + } + + prog->log_level &= ~extra_log_level; + return 0; +} + +int bpf_program__unload_dynamically(struct bpf_program *prog) +{ + int err; + + if (!prog || prog->load_type != BPF_PROG_LOAD_TYPE_DYNAMIC) + return libbpf_err(-EINVAL); + + /* Close the file descriptor but retain the program's data to + * support reloading the program if it is required again. + */ + err = zclose(prog->fd); + return err ? libbpf_err(-errno) : 0; +} diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 21e3d1f51cb3..531f30491f0b 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -1923,16 +1923,19 @@ LIBBPF_API int libbpf_unregister_prog_handler(int handler_id); * * - BPF_PROG_LOAD_TYPE_DISABLED: the program is not loaded. * - BPF_PROG_LOAD_TYPE_AUTO: the program is autoloaded when the bpf_object is loaded. + * - BPF_PROG_LOAD_TYPE_DYNAMIC: the program is loaded and attached dynamically. */ enum bpf_prog_load_type { BPF_PROG_LOAD_TYPE_DISABLED = 0, BPF_PROG_LOAD_TYPE_AUTO, + BPF_PROG_LOAD_TYPE_DYNAMIC, };
LIBBPF_API int bpf_program__set_load_type(struct bpf_program *prog, enum bpf_prog_load_type loadtype); LIBBPF_API enum bpf_prog_load_type bpf_program__load_type(const struct bpf_program *prog); - +LIBBPF_API int bpf_program__load_dynamically(struct bpf_program *prog, int extra_log_level); +LIBBPF_API int bpf_program__unload_dynamically(struct bpf_program *prog);
#ifdef __cplusplus } /* extern "C" */ diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 08323e7930fd..4d84e4794685 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -438,4 +438,6 @@ LIBBPF_1.6.0 { bpf_linker__new_fd; bpf_program__load_type; bpf_program__set_load_type; + bpf_program__load_dynamically; + bpf_program__unload_dynamically; } LIBBPF_1.5.0; diff --git a/tools/testing/selftests/bpf/prog_tests/dynamicload.c b/tools/testing/selftests/bpf/prog_tests/dynamicload.c new file mode 100644 index 000000000000..9cde7dd45608 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/dynamicload.c @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include <time.h> +#include "test_dynamicload.skel.h" + +void test_dynamicload(void) +{ + int duration = 0, err; + struct bpf_link *link; + struct test_dynamicload *skel; + + skel = test_dynamicload__open(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + goto cleanup; + + /* don't load prog1 */ + bpf_program__set_load_type(skel->progs.prog1, BPF_PROG_LOAD_TYPE_DISABLED); + + /* prog2 is autoload */ + bpf_program__set_load_type(skel->progs.prog2, BPF_PROG_LOAD_TYPE_AUTO); + + /* prog3 is dynamically loaded */ + bpf_program__set_load_type(skel->progs.prog3, BPF_PROG_LOAD_TYPE_DYNAMIC); + + err = test_dynamicload__load(skel); + if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err)) + goto cleanup; + + err = test_dynamicload__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + /* trigger the BPF programs */ + usleep(1); + + CHECK(skel->bss->prog1_called, "prog1", "called?!\n"); + CHECK(!skel->bss->prog2_called, "prog2", "not called\n"); + CHECK(skel->bss->prog3_called, "prog3", "called?!\n"); + + /* prog1 is disabled for load */ + err = bpf_program__load_dynamically(skel->progs.prog1, 0); + if (CHECK(!err, "load_dynamically", "disabled program loaded?!\n")) + goto cleanup; + + /* prog1 is disabled for load */ + err = bpf_program__unload_dynamically(skel->progs.prog1); + if (CHECK(!err, "load_dynamically", "disabled program unloaded?!\n")) + goto cleanup; + + /* prog2 is autoload */ + err = bpf_program__load_dynamically(skel->progs.prog1, 0); + if (CHECK(!err, "load_dynamically", "autoload loaded dynamically?!\n")) + goto cleanup; + + /* prog2 is autoload */ + err = bpf_program__unload_dynamically(skel->progs.prog1); + if (CHECK(!err, "load_dynamically", "autoload unloaded dynamically?!\n")) + goto cleanup; + + /* reset the call flags */ + skel->bss->prog2_called = false; + skel->bss->prog3_called = false; + + usleep(1); + + CHECK(skel->bss->prog1_called, "prog1", "called?!\n"); + CHECK(!skel->bss->prog2_called, "prog2", "not called\n"); + CHECK(skel->bss->prog3_called, "prog3", "called?!\n"); + + /* load prog3 */ + err = bpf_program__load_dynamically(skel->progs.prog3, 0); + if (CHECK(err, "load_dynamically", "dynamic loading failed: %d\n", err)) + goto cleanup; + + /* attach prog3 */ + link = bpf_program__attach(skel->progs.prog3); + if (CHECK(libbpf_get_error(link), "attach", "attaching failed: %ld\n", + libbpf_get_error(link))) + goto cleanup; + + usleep(1); + + CHECK(!skel->bss->prog3_called, "prog3", "not called\n"); + + /* detach prog3 as test_dynamicload__destroy doesn't detach dynamically loaded programs */ + err = bpf_link__destroy(link); + if (CHECK(err, "link__destroy", "link destroy failed: %d\n", err)) + goto cleanup; + + /* reset the call flags after detach */ + skel->bss->prog2_called = false; + skel->bss->prog3_called = false; + + usleep(1); + + CHECK(!skel->bss->prog2_called, "prog2", "not called\n"); + CHECK(skel->bss->prog3_called, "prog3", "called?!\n"); + + /* unload prog3 */ + err = bpf_program__unload_dynamically(skel->progs.prog3); + if (CHECK(err, "unload_dynamically", "unload dynamically failed: %d\n", err)) + goto cleanup; + + /* reload prog3 */ + err = bpf_program__load_dynamically(skel->progs.prog3, 0); + if (CHECK(err, "load_dynamically", "dynamic reloading failed: %d\n", err)) + goto cleanup; + + /* reattach prog3 */ + link = bpf_program__attach(skel->progs.prog3); + if (CHECK(libbpf_get_error(link), "attach", "reattaching failed: %d\n", err)) + goto cleanup; + + usleep(1); + + CHECK(!skel->bss->prog3_called, "prog3", "not called\n"); + + /* detach prog3 as test_dynamicload__destroy doesn't detach dynamically loaded programs */ + err = bpf_link__destroy(link); + if (CHECK(err, "link__destroy", "link destroy failed: %d\n", err)) + goto cleanup; + + /* verify regular unload for dynamically loaded program, + * unload prog3 as a regular program + */ + bpf_program__unload(skel->progs.prog3); + + /* reset the call flags after unload */ + skel->bss->prog2_called = false; + skel->bss->prog3_called = false; + + usleep(1); + + CHECK(!skel->bss->prog2_called, "prog2", "not called\n"); + CHECK(skel->bss->prog3_called, "prog3", "called?!\n"); + + /* reloading prog3 must fail as it was unloaded as a regular program */ + err = bpf_program__load_dynamically(skel->progs.prog3, 0); + if (CHECK(!err, "load_dynamically", "dynamic reloading succeeded?! %d\n", err)) + goto cleanup; + +cleanup: + test_dynamicload__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/load_type.c b/tools/testing/selftests/bpf/prog_tests/load_type.c index 7c8d55173b2b..8bd082b3bc9d 100644 --- a/tools/testing/selftests/bpf/prog_tests/load_type.c +++ b/tools/testing/selftests/bpf/prog_tests/load_type.c @@ -7,6 +7,7 @@ void test_load_type(void) { int duration = 0, err; + struct bpf_link *link; struct test_load_type *skel;
skel = test_load_type__open(); @@ -20,11 +21,47 @@ void test_load_type(void) bpf_program__set_load_type(skel->progs.prog2, BPF_PROG_LOAD_TYPE_AUTO); CHECK(!bpf_program__autoload(skel->progs.prog2), "prog2", "not autoload?!\n");
+ err = bpf_program__set_load_type(skel->progs.prog3, BPF_PROG_LOAD_TYPE_DYNAMIC); + if (CHECK(err, "set_load_type", "set_load_type(DYNAMIC) failed: %d\n", err)) + goto cleanup; + CHECK(bpf_program__load_type(skel->progs.prog3) != BPF_PROG_LOAD_TYPE_DYNAMIC, + "prog3", "didn't set type?!\n"); + + /* bpf_program__set_autoload(program, false) doesn't have effect if the program + * type is not BPF_PROG_LOAD_TYPE_AUTO + */ + err = bpf_program__set_autoload(skel->progs.prog3, false); + if (CHECK(err, "set_autoload", "set_autoload(false) failed: %d\n", err)) + goto cleanup; + + CHECK(bpf_program__load_type(skel->progs.prog3) != BPF_PROG_LOAD_TYPE_DYNAMIC, + "prog3", "changed type?!\n"); + + err = bpf_program__set_autoload(skel->progs.prog3, true); + if (CHECK(err, "set_autoload", "set_autoload(true) failed: %d\n", err)) + goto cleanup; + + CHECK(bpf_program__load_type(skel->progs.prog3) != BPF_PROG_LOAD_TYPE_AUTO, + "prog3", "didn't change type to auto?!\n"); + + /* change the type back to BPF_PROG_LOAD_TYPE_DYNAMIC */ + err = bpf_program__set_load_type(skel->progs.prog3, BPF_PROG_LOAD_TYPE_DYNAMIC); + if (CHECK(err, "set_load_type", "changing from AUTO to DYNAMIC failed: %d\n", err)) + goto cleanup; + + CHECK(bpf_program__load_type(skel->progs.prog3) != BPF_PROG_LOAD_TYPE_DYNAMIC, + "prog3", "didn't change type from autoload to dynamic?!\n"); + err = test_load_type__load(skel); if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err)) goto cleanup;
CHECK(!bpf_program__autoattach(skel->progs.prog2), "prog2", "not autoattach?!\n"); + CHECK(bpf_program__autoattach(skel->progs.prog3), "prog3", "autoattach?!\n"); + + /* loaded program type cannot be changed */ + err = bpf_program__set_load_type(skel->progs.prog3, BPF_PROG_LOAD_TYPE_DISABLED); + CHECK(!err, "prog3", "changed type after load?!\n");
err = test_load_type__attach(skel); if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) @@ -34,6 +71,30 @@ void test_load_type(void)
CHECK(skel->bss->prog1_called, "prog1", "called?!\n"); CHECK(!skel->bss->prog2_called, "prog2", "not called\n"); + CHECK(skel->bss->prog3_called, "prog3", "called?!\n"); + + err = bpf_program__load_dynamically(skel->progs.prog3, 0); + if (CHECK(err, "load_dynamically", "load dynamically failed: %d\n", err)) + goto cleanup; + + err = bpf_program__load_dynamically(skel->progs.prog3, 0); + if (CHECK(err, "load_dynamically", "load dynamically failed: %d\n", err)) + goto cleanup; + + /* attach prog3 */ + link = bpf_program__attach(skel->progs.prog3); + if (CHECK(libbpf_get_error(link), "attach", "attaching failed: %ld\n", + libbpf_get_error(link))) + goto cleanup; + + usleep(1); + + CHECK(!skel->bss->prog3_called, "prog3", "not called?!\n"); + + /* detach prog3 as test_load_type__destroy doesn't detach dynamically loaded programs */ + err = bpf_link__destroy(link); + if (CHECK(err, "link__destroy", "link destroy failed: %d\n", err)) + goto cleanup;
cleanup: test_load_type__destroy(skel); diff --git a/tools/testing/selftests/bpf/progs/test_dynamicload.c b/tools/testing/selftests/bpf/progs/test_dynamicload.c new file mode 100644 index 000000000000..3d9b81691d7a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_dynamicload.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +bool prog1_called = false; +bool prog2_called = false; +bool prog3_called = false; + +SEC("raw_tp/sys_enter") +int prog1(const void *ctx) +{ + prog1_called = true; + return 0; +} + +SEC("raw_tp/sys_enter") +int prog2(const void *ctx) +{ + prog2_called = true; + return 0; +} + +SEC("raw_tp/sys_enter") +int prog3(const void *ctx) +{ + prog3_called = true; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_load_type.c b/tools/testing/selftests/bpf/progs/test_load_type.c index a0d39757c5b9..3d9b81691d7a 100644 --- a/tools/testing/selftests/bpf/progs/test_load_type.c +++ b/tools/testing/selftests/bpf/progs/test_load_type.c @@ -5,6 +5,7 @@
bool prog1_called = false; bool prog2_called = false; +bool prog3_called = false;
SEC("raw_tp/sys_enter") int prog1(const void *ctx) @@ -20,4 +21,11 @@ int prog2(const void *ctx) return 0; }
+SEC("raw_tp/sys_enter") +int prog3(const void *ctx) +{ + prog3_called = true; + return 0; +} + char _license[] SEC("license") = "GPL";
On Wed, Jan 22, 2025 at 1:53 PM Slava Imameev slava.imameev@crowdstrike.com wrote:
BPF programs designated as dynamically loaded can be loaded and attached independently after the initial bpf_object loading and attaching.
These programs can also be reloaded and reattached multiple times, enabling more flexible management of a resident BPF program set.
A key motivation for this feature is to reduce load times for utilities that include hundreds of BPF programs. When the selection of a resident BPF program set cannot be determined at the time of bpf_object loading and attaching, all BPF programs would otherwise need to be marked as autoload, leading to unnecessary overhead. This patch addresses that inefficiency.
Can you elaborate on why it's impossible to determine which BPF programs should be loaded before BPF object load step?
In general, I'm not too excited about these complications, it's error-prone enough with just normal autoload (true/false) logic and various interactions between different features. Adding the third "may be loaded much later" state just doesn't seem worth the complexit.
Also, for subsequent submissions, please make sure you have [PATCH bpf-next] subject prefix.
Signed-off-by: Slava Imameev slava.imameev@crowdstrike.com
tools/lib/bpf/libbpf.c | 144 +++++++++++++++-- tools/lib/bpf/libbpf.h | 5 +- tools/lib/bpf/libbpf.map | 2 + .../selftests/bpf/prog_tests/dynamicload.c | 145 ++++++++++++++++++ .../selftests/bpf/prog_tests/load_type.c | 61 ++++++++ .../selftests/bpf/progs/test_dynamicload.c | 31 ++++ .../selftests/bpf/progs/test_load_type.c | 8 +
and let's keep selftests in a separate patch from kernel and/or libbpf changes
7 files changed, 385 insertions(+), 11 deletions(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/dynamicload.c create mode 100644 tools/testing/selftests/bpf/progs/test_dynamicload.c
[...]
On Fri, 2025-01-24 at 10:31 -0800, Andrii Nakryiko wrote:
On Wed, Jan 22, 2025 at 1:53 PM Slava Imameev slava.imameev@crowdstrike.com wrote:
BPF programs designated as dynamically loaded can be loaded and attached independently after the initial bpf_object loading and attaching.
These programs can also be reloaded and reattached multiple times, enabling more flexible management of a resident BPF program set.
A key motivation for this feature is to reduce load times for utilities that include hundreds of BPF programs. When the selection of a resident BPF program set cannot be determined at the time of bpf_object loading and attaching, all BPF programs would otherwise need to be marked as autoload, leading to unnecessary overhead. This patch addresses that inefficiency.
Can you elaborate on why it's impossible to determine which BPF programs should be loaded before BPF object load step?
The main use case for this patch is large applications that need to dynamically load/unload BPF programs. Our specific use case is a continuously-running security application with a dynamically- reconfigurable feature set. As part of that reconfiguration, BPF programs may get loaded/unloaded on-the-fly. Restarting the entire application during reconfiguration is undesirable, as critical state data can be lost and loading hundreds of BPF programs is time- consuming.
The above points apply more generically to *any* application that requires dynamic loading/unloading. Reconfiguration can be done via a restart, but that has drawbacks: (a) Losing any non-persistent application state on restart. In our case, this creates a lapse in security that could be exploited by adversaries. (b) In applications with many programs, load+attach can take a long time. We measured load+attach of ~100 BPF programs taking ~10 seconds when done with current libbpf serially. Dynamically loading only the programs needed avoids wasting memory and CPU cycles. (c) The application itself might take a long time to restart, separate from the BPF load/attach time. By loading dynamically, the BPF programs can take effect much sooner and avoid wasted restart cycles.
This patch set also permits loading BPF programs in parallel if the application wishes. We tested parallel loading with 200+ BPF programs and found the load time dropped from 18 seconds to 5 seconds when done in parallel on a 6.8 kernel.
In the future, this approach could also allow maps to not be autoloaded, further saving on memory if no program needs the underlying map.
In summary, we believe dynamic loading of BPF programs is an important capability that will improve the performance of CrowdStrike's security applications as well as being useful to other applications that want to avoid restarts.
On Tue, Jan 28, 2025 at 3:08 PM Martin Kelly martin.kelly@crowdstrike.com wrote:
On Fri, 2025-01-24 at 10:31 -0800, Andrii Nakryiko wrote:
On Wed, Jan 22, 2025 at 1:53 PM Slava Imameev slava.imameev@crowdstrike.com wrote:
BPF programs designated as dynamically loaded can be loaded and attached independently after the initial bpf_object loading and attaching.
These programs can also be reloaded and reattached multiple times, enabling more flexible management of a resident BPF program set.
A key motivation for this feature is to reduce load times for utilities that include hundreds of BPF programs. When the selection of a resident BPF program set cannot be determined at the time of bpf_object loading and attaching, all BPF programs would otherwise need to be marked as autoload, leading to unnecessary overhead. This patch addresses that inefficiency.
Can you elaborate on why it's impossible to determine which BPF programs should be loaded before BPF object load step?
The main use case for this patch is large applications that need to dynamically load/unload BPF programs. Our specific use case is a continuously-running security application with a dynamically- reconfigurable feature set. As part of that reconfiguration, BPF programs may get loaded/unloaded on-the-fly. Restarting the entire application during reconfiguration is undesirable, as critical state data can be lost and loading hundreds of BPF programs is time- consuming.
Thanks for the details (and sorry for late reply, been traveling lately)!
I don't want to complicate bpf_object internals with this third autoload state, as this creates tons of non-obvious gotchas that have to constantly be checked whenever any new feature is added to bpf_object (or even during refactorings). It does seem like you have a pretty complicated use case, and so maybe some of the alternatives would be just fine for you.
I see two ways forward for you. Either you can break apart your BPF object of ~100 BPF programs into more independent BPF objects (seeing that programs can be independently loaded/unloaded depending on configuration, seems like you do have a bunch of logic independence, right?). I assume shared BPF maps are the biggest reason to keep all those programs together in one BPF object. To share BPF maps between multiple BPF objects libbpf provides two complementary interfaces:
- bpf_map__reuse_fd() for manual control - BPF map pinning (could be declarative or manual)
This way you can ensure that all BPF objects would use the same BPF map, where necessary.
Alternatively, we can look at this problem as needing libbpf to only prepare BPF program code (doing all the relocations and stuff like that), but then application actually taking care of loading/unloading BPF program with bpf_prog_load() outside of bpf_object abstraction. I've had an almost ready patches splitting bpf_object__load() into two steps: bpf_object__prepare() and bpf_object__load() after that. "prepare" step would create BPF maps, load BTF information, perform necessary relocations and arrive at final state of BPF program code (which you can get with bpf_program__insns() API), but stopping just short of actually doing bpf_prog_load() step.
This seems like it would solve your problem as well. You'd use libbpf to do all the low-level ELF processing and relocation, but then take over managing BPF program lifetime. Loading/unloading as you see fit, including in parallel.
Is this something that would work for you?
The above points apply more generically to *any* application that requires dynamic loading/unloading. Reconfiguration can be done via a restart, but that has drawbacks: (a) Losing any non-persistent application state on restart. In our case, this creates a lapse in security that could be exploited by adversaries. (b) In applications with many programs, load+attach can take a long time. We measured load+attach of ~100 BPF programs taking ~10 seconds when done with current libbpf serially. Dynamically loading only the programs needed avoids wasting memory and CPU cycles. (c) The application itself might take a long time to restart, separate from the BPF load/attach time. By loading dynamically, the BPF programs can take effect much sooner and avoid wasted restart cycles.
This patch set also permits loading BPF programs in parallel if the application wishes. We tested parallel loading with 200+ BPF programs and found the load time dropped from 18 seconds to 5 seconds when done in parallel on a 6.8 kernel.
bpf_object is intentionally single-threaded, so I don't think we'll be supporting parallel BPF program loading in the paradigm of bpf_object (but see the bpf_object__prepare() proposal). Even from API standpoint this is problematic with logging and log buffers basically assuming single-threaded execution of BPF program loading.
All that could be changed or worked around, but your use case is not really a typical case, so I'm a bit hesitant at this point.
In the future, this approach could also allow maps to not be autoloaded, further saving on memory if no program needs the underlying map.
In summary, we believe dynamic loading of BPF programs is an important capability that will improve the performance of CrowdStrike's security applications as well as being useful to other applications that want to avoid restarts.
On Wed, 2025-02-05 at 14:33 -0800, Andrii Nakryiko wrote:
I see two ways forward for you. Either you can break apart your BPF object of ~100 BPF programs into more independent BPF objects >
(seeing
that programs can be independently loaded/unloaded depending on configuration, seems like you do have a bunch of logic > > independence, right?). I assume shared BPF maps are the biggest reason to keep > > all those programs together in one BPF object. To share BPF maps >
between
multiple BPF objects libbpf provides two complementary interfaces:
- bpf_map__reuse_fd() for manual control - BPF map pinning (could be declarative or manual)
This way you can ensure that all BPF objects would use the same BPF map, where necessary.
I think this approach *could* work but could easily become complex for us because we'd need to track all the dependencies between programs and maps, and anything missed could lead to difficult refcount bugs.
Further, splitting into objects incurs some performance and memory cost because bpf_object__load_vmlinux_btf will be called for each object, and there's currently no way to share BTF data across the objects. Having a single BPF object avoids this issue. Potentially, libbpf could cache some BTF data to make lessen the impact.
Alternatively, we can look at this problem as needing libbpf to
only
prepare BPF program code (doing all the relocations and stuff like that), but then application actually taking care of > > loading/unloading BPF program with bpf_prog_load() outside of bpf_object abstraction. I've had an almost ready patches splitting bpf_object__load() into > > > two steps: bpf_object__prepare() and bpf_object__load() after that. "prepare" step would create BPF maps, load BTF information, perform necessary relocations and arrive at final state of BPF program code (which you can get with bpf_program__insns() API), but stopping
just
short of actually doing bpf_prog_load() step.
This seems like it would solve your problem as well. You'd use
libbpf
to do all the low-level ELF processing and relocation, but then
take
over managing BPF program lifetime. Loading/unloading as you see > > fit, including in parallel.
Is this something that would work for you?
I think this API could work, though I think we would need a few other modifications as well in order to correctly handle program/map dependencies and account for relocations. At a high level, I think we'd need something that includes:
1) A way to associate each BPF program with all the maps it will use (association of struct bpf_program * --> list of struct bpf_map * in some form). This is so that we can load/unload associated maps when we load/unload a program.
2) An API to create a BPF map, in case a new map needs to be loaded after initial startup.
3) An API to allow unloading a map while keeping map->fd reserved. This is important because the fd value is used by BPF program instructions, so without something like this, we'd have to redo the relocation process for any other BPF programs that access this map (and thus reload those programs too). This API could be implemented by dup'ing a placeholder fd.
Alternatively, if libbpf could automatically refcount maps across multiple BPF objects to load/unload them on demand, then all of the above work could happen behind the scenes. This would be similar to the other approach you mentioned, but with libbpf doing the refcounting heavy lifting instead of leaving that to each application, thus more robust and elegant. This would mean changing libbpf to (a) synchronize access to some map functions and (b) allowing struct bpf_map * to be shared across BPF objects. Perhaps a concept of a "collection of BPF objects" might allow for this.
> > > > This patch set also permits loading BPF programs in > > parallel if > > > > the > > application wishes. We tested parallel loading with > > 200+ BPF > > > > > > programs > > and found the load time dropped from 18 seconds to 5 > > seconds > > > > when > > done > > in parallel on a 6.8 kernel.
bpf_object is intentionally single-threaded, so I don't think we'll > > > be supporting parallel BPF program loading in the paradigm of > > bpf_object (but see the bpf_object__prepare() proposal). Even from API > >
standpoint
this is problematic with logging and log buffers basically assuming single-threaded execution of BPF program loading.
All that could be changed or worked around, but your use case is > > not really a typical case, so I'm a bit hesitant at this point.
> >
I can understand where you're coming from if no one else has mentioned a use case like this. We can do parallel loading by splitting our programs into BPF objects, but unless the objects are split very evenly, this results in less optimal load time. For example, if 100 programs are split into 2 objects and one object has 80 programs while the other has 20, then the one with 80 programs creates a bottleneck.
On Fri, Feb 7, 2025 at 5:13 PM Martin Kelly martin.kelly@crowdstrike.com wrote:
On Wed, 2025-02-05 at 14:33 -0800, Andrii Nakryiko wrote:
I see two ways forward for you. Either you can break apart your BPF object of ~100 BPF programs into more independent BPF objects >
(seeing
that programs can be independently loaded/unloaded depending on configuration, seems like you do have a bunch of logic > > independence, right?). I assume shared BPF maps are the biggest reason to keep > > all those programs together in one BPF object. To share BPF maps >
between
multiple BPF objects libbpf provides two complementary interfaces:
- bpf_map__reuse_fd() for manual control
- BPF map pinning (could be declarative or manual)
This way you can ensure that all BPF objects would use the same BPF map, where necessary.
I think this approach *could* work but could easily become complex for us because we'd need to track all the dependencies between programs and maps, and anything missed could lead to difficult refcount bugs.
Further, splitting into objects incurs some performance and memory cost because bpf_object__load_vmlinux_btf will be called for each object, and there's currently no way to share BTF data across the objects. Having a single BPF object avoids this issue. Potentially, libbpf could cache some BTF data to make lessen the impact.
Alternatively, we can look at this problem as needing libbpf to
> only
prepare BPF program code (doing all the relocations and stuff like that), but then application actually taking care of > > loading/unloading BPF program with bpf_prog_load() outside of bpf_object abstraction. I've had an almost ready patches splitting bpf_object__load() into > > > two steps: bpf_object__prepare() and bpf_object__load() after that. "prepare" step would create BPF maps, load BTF information, perform necessary relocations and arrive at final state of BPF program code (which you can get with bpf_program__insns() API), but stopping
> just
short of actually doing bpf_prog_load() step.
This seems like it would solve your problem as well. You'd use
> libbpf
to do all the low-level ELF processing and relocation, but then
> take
over managing BPF program lifetime. Loading/unloading as you see > > fit, including in parallel.
Is this something that would work for you?
I think this API could work, though I think we would need a few other modifications as well in order to correctly handle program/map dependencies and account for relocations. At a high level, I think we'd need something that includes:
- A way to associate each BPF program with all the maps it will use
(association of struct bpf_program * --> list of struct bpf_map * in some form). This is so that we can load/unload associated maps when we load/unload a program.
Tracking associated maps for a program is not necessary. As long as the last BPF program using the BPF map is unloaded, the kernel will automatically free not-anymore-referenced BPF map. Note that bpf_object itself will keep FDs for BPF maps, so you'd need to make sure to do bpf_object__close() to release those references.
But if you are going to ask to re-create BPF maps next time BPF program is loaded... Well, I'll say you are asking for a bit too much, tbh. If you want to be *that* sophisticated, it shouldn't be too hard for you to get all this information from BPF program's instructions.
- An API to create a BPF map, in case a new map needs to be loaded
after initial startup.
bpf_map_create()?
- An API to allow unloading a map while keeping map->fd reserved. This
is important because the fd value is used by BPF program instructions, so without something like this, we'd have to redo the relocation process for any other BPF programs that access this map (and thus reload those programs too). This API could be implemented by dup'ing a placeholder fd.
dup2() or dup3()? (heh, and yeah, you did ask what I was anticipating above ;) )
Alternatively, if libbpf could automatically refcount maps across multiple BPF objects to load/unload them on demand, then all of the above work could happen behind the scenes. This would be similar to the other approach you mentioned, but with libbpf doing the refcounting heavy lifting instead of leaving that to each application, thus more robust and elegant. This would mean changing libbpf to (a) synchronize access to some map functions and (b) allowing struct bpf_map * to be shared across BPF objects. Perhaps a concept of a "collection of BPF objects" might allow for this.
bpf_object is the unit of coherence in libbpf, so I don't see us refcounting maps between bpf_objects. Kernel is doing refcounting based on FDs, so see if you can use that.
> > > > > > This patch set also permits loading BPF programs in > > > parallel if > > > > the > > > application wishes. We tested parallel loading with > > > 200+ BPF > > > > > > programs > > > and found the load time dropped from 18 seconds to 5 > > > seconds > > > > when > > done > > > in parallel on a 6.8 kernel.
bpf_object is intentionally single-threaded, so I don't think we'll > > > be supporting parallel BPF program loading in the paradigm of > > bpf_object (but see the bpf_object__prepare() proposal). Even from API > >
standpoint
this is problematic with logging and log buffers basically assuming single-threaded execution of BPF program loading.
All that could be changed or worked around, but your use case is > > not really a typical case, so I'm a bit hesitant at this point.
> > >
I can understand where you're coming from if no one else has mentioned a use case like this. We can do parallel loading by splitting our programs into BPF objects, but unless the objects are split very evenly, this results in less optimal load time. For example, if 100 programs are split into 2 objects and one object has 80 programs while the other has 20, then the one with 80 programs creates a bottleneck.
Is 100 just a nicely looking rather large number, or do you really have 100 different BPF programs? Why so many and are they really all unique?
Asking because if it's just a way to attach BPF program doing more or less uniform set of actions for different hooks, then perhaps there are better ways to do this without having to duplicating BPF programs so much (like BPF cookie, multi-kprobes, etc, etc)
On Mon, 2025-02-10 at 16:06 -0800, Andrii Nakryiko wrote:
Tracking associated maps for a program is not necessary. As long as the last BPF program using the BPF map is unloaded, the kernel will automatically free not-anymore-referenced BPF map. Note that bpf_object itself will keep FDs for BPF maps, so you'd need to make sure to do bpf_object__close() to release those references.
But if you are going to ask to re-create BPF maps next time BPF program is loaded... Well, I'll say you are asking for a bit too > much, tbh. If you want to be *that* sophisticated, it shouldn't be too hard for you to get all this information from BPF program's instructions.
We really are that sophisticated (see below for more details). We could scan program instructions, but we'd then tie our logic to BPF implementation details and duplicate logic already present in libbpf (https://elixir.bootlin.com/linux/v6.13.2/source/tools/lib/bpf/libbpf.c#L6087 ). Obviously this *can* be done but it's not at all ideal from an application perspective.
bpf_object is the unit of coherence in libbpf, so I don't see us refcounting maps between bpf_objects. Kernel is doing refcounting based on FDs, so see if you can use that.
I can understand that. That said, I think if there's no logic across objects, and bpf_object access is not thread-safe, it puts us into a tough situation: - Complex refcounting, code scanning, etc to keep consistency when manipulating maps used by multiple programs. - Parallel loading not being well-balanced, if we split programs across objects.
We could alternatively write our own custom loader, but then we’d have to duplicate much of the useful logic that libbpf already implements: skeleton generation, map/program association, embedding programs into ELFs, loading logic and kernel probing, etc. We’d like some way to handle dynamic/parallel loading without having to replicate all the advantages libbpf grants us.
Is 100 just a nicely looking rather large number, or do you really have 100 different BPF programs? Why so many and are they really all unique?
Asking because if it's just a way to attach BPF program doing more or less uniform set of actions for different hooks, then perhaps there are better ways to do this without having to duplicating BPF programs so much (like BPF cookie, multi-kprobes, etc, etc)
100 is not an arbitrary number; we have that and higher (~200 is a good current estimate, and that grows as new product features are added). The programs are really doing different things. We also have to support a wide range of kernels, handling cases like: "on this kernel range, trampolines aren't supported, so use kretprobes with a context map for function args instead of fexit, but on newer kernels just use an fexit hook."
The use case here is that our security monitoring agent leverages eBPF as its foundational technology to gather telemetry from the kernel. As part of that, we hook many different kernel subsystems (process, memory, filesystem, network, etc), tying them together and tracking with maps. So we legitimately have a very large number of programs all doing different work. For products of this scale, it increases security and performance to load this set of programs and their maps in an optimized, parallel fashion and subsequently change the loaded set of programs and maps dynamically without disturbing the rest of the application.
On Wed, Feb 12, 2025 at 2:31 PM Martin Kelly martin.kelly@crowdstrike.com wrote:
On Mon, 2025-02-10 at 16:06 -0800, Andrii Nakryiko wrote:
Tracking associated maps for a program is not necessary. As long as the last BPF program using the BPF map is unloaded, the kernel will automatically free not-anymore-referenced BPF map. Note that bpf_object itself will keep FDs for BPF maps, so you'd need to make sure to do bpf_object__close() to release those references.
But if you are going to ask to re-create BPF maps next time BPF program is loaded... Well, I'll say you are asking for a bit too > much, tbh. If you want to be *that* sophisticated, it shouldn't be too hard for you to get all this information from BPF program's instructions.
We really are that sophisticated (see below for more details). We could scan program instructions, but we'd then tie our logic to BPF implementation details and duplicate logic already present in libbpf (https://elixir.bootlin.com/linux/v6.13.2/source/tools/lib/bpf/libbpf.c#L6087 ). Obviously this *can* be done but it's not at all ideal from an application perspective.
I agree it's not ideal, but it's also not some complicated and bound-to-be-changed logic. What you point out in libbpf source code is a bit different thing, reality is much simpler. Only so-called ldimm64 instruction (BPF_LD | BPF_IMM | BPF_DW opcode) can be referencing map FD, so analysing this is borderline trivial. And this is part of BPF ISA, so not going to change.
We need to double check, but I think libbpf doesn't use FD_ARRAY approach, unless you are using light skeleton, so if you don't you don't even have to worry about FD_ARRAY thing.
bpf_object is the unit of coherence in libbpf, so I don't see us refcounting maps between bpf_objects. Kernel is doing refcounting based on FDs, so see if you can use that.
I can understand that. That said, I think if there's no logic across objects, and bpf_object access is not thread-safe, it puts us into a tough situation:
- Complex refcounting, code scanning, etc to keep consistency when
manipulating maps used by multiple programs.
- Parallel loading not being well-balanced, if we split programs across
objects.
We could alternatively write our own custom loader, but then we’d have to duplicate much of the useful logic that libbpf already implements: skeleton generation, map/program association, embedding programs into ELFs, loading logic and kernel probing, etc. We’d like some way to handle dynamic/parallel loading without having to replicate all the advantages libbpf grants us.
Yeah, I can understand that as well, but bpf_object's single-threaded design and the fact that bpf_object__load is kind of the final step where programs are loaded (or not) is pretty backed in. I don't see bpf_object becoming multi-threaded. The dynamic program loading/unloading/loading again is something that I can't yet justify, tbh.
So the best I can propose you is to use libbpf's skeleton and bpf_object concept for, effectively, ELF handling, relocations, all the preparations up to loading BPF programs. And after that you can take over loading and handling program lifetime outside of bpf_object.
Dynamic map creation after bpf_object__load() I think is completely outside of the scope and you'll have to solve this problem for yourself. I would point out, though, that internally libbpf already switched to sort-of pre-creating stable FDs for maps before they are actually created in the kernel. So it's conceivable that we can have more granularity in bpf_object preparation. I.e., first step would be to parse ELF and handle relocations, prepare everything. After that we can have a step to create maps, and then another one to create programs. Usually people would do all that, but you can stop right before maps creation or before program creation, whatever fits your use case better.
The key is that program instructions will be final and won't need adjustments regardless of maps actually being created or not. FDs, as I mentioned, are stable regardless.
So, not ideal for your (very complicated) use case, but you still avoid dealing with all the ELF and relocation stuff (which is the annoying and rather complicated part, and I can see no one wanting to reimplement that). Map and program creation is relatively straightforward matters compared to that.
Is 100 just a nicely looking rather large number, or do you really have 100 different BPF programs? Why so many and are they really all unique?
Asking because if it's just a way to attach BPF program doing more or less uniform set of actions for different hooks, then perhaps there are better ways to do this without having to duplicating BPF programs so much (like BPF cookie, multi-kprobes, etc, etc)
100 is not an arbitrary number; we have that and higher (~200 is a good current estimate, and that grows as new product features are added). The programs are really doing different things. We also have to support a wide range of kernels, handling cases like: "on this kernel range, trampolines aren't supported, so use kretprobes with a context map for function args instead of fexit, but on newer kernels just use an fexit hook."
Yes, this is typical, and bpf_program__set_autoload() and bpf_map__set_autocreate() are meant to handle that. It's the program loading after bpf_object load is what is not supported.
The use case here is that our security monitoring agent leverages eBPF as its foundational technology to gather telemetry from the kernel. As part of that, we hook many different kernel subsystems (process, memory, filesystem, network, etc), tying them together and tracking with maps. So we legitimately have a very large number of programs all doing different work. For products of this scale, it increases security and performance to load this set of programs and their maps in an optimized, parallel fashion and subsequently change the loaded set of programs and maps dynamically without disturbing the rest of the application.
Yes, makes sense. You'll need to decide for yourself if it's actually more meaningful to split those 200 programs into independent bpf_objects by features, and be rigorous about sharing state (maps) through bpf_map__reuse_fd(), which would allow to parallelize loading within confines of existing libbpf APIs. Or you can be a bit more low-level with program loading outside of bpf_object API, as I described above.
linux-kselftest-mirror@lists.linaro.org