I'd like to cut down the memory usage of parsing vmlinux BTF in ebpf-go. With some upcoming changes the library is sitting at 5MiB for a parse. Most of that memory is simply copying the BTF blob into user space. By allowing vmlinux BTF to be mmapped read-only into user space I can cut memory usage by about 75%.
Signed-off-by: Lorenz Bauer lmb@isovalent.com --- Lorenz Bauer (2): btf: allow mmap of vmlinux btf selftests: bpf: add a test for mmapable vmlinux BTF
include/asm-generic/vmlinux.lds.h | 3 +- kernel/bpf/sysfs_btf.c | 25 ++++++- tools/testing/selftests/bpf/prog_tests/btf_sysfs.c | 79 ++++++++++++++++++++++ 3 files changed, 104 insertions(+), 3 deletions(-) --- base-commit: 38d976c32d85ef12dcd2b8a231196f7049548477 change-id: 20250501-vmlinux-mmap-2ec5563c3ef1
Best regards,
User space needs access to kernel BTF for many modern features of BPF. Right now each process needs to read the BTF blob either in pieces or as a whole. Allow mmaping the sysfs file so that processes can directly access the memory allocated for it in the kernel.
Signed-off-by: Lorenz Bauer lmb@isovalent.com --- include/asm-generic/vmlinux.lds.h | 3 ++- kernel/bpf/sysfs_btf.c | 25 +++++++++++++++++++++++-- 2 files changed, 25 insertions(+), 3 deletions(-)
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 58a635a6d5bdf0c53c267c2a3d21a5ed8678ce73..1750390735fac7637cc4d2fa05f96cb2a36aa448 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -667,10 +667,11 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) */ #ifdef CONFIG_DEBUG_INFO_BTF #define BTF \ + . = ALIGN(PAGE_SIZE); \ .BTF : AT(ADDR(.BTF) - LOAD_OFFSET) { \ BOUNDED_SECTION_BY(.BTF, _BTF) \ } \ - . = ALIGN(4); \ + . = ALIGN(PAGE_SIZE); \ .BTF_ids : AT(ADDR(.BTF_ids) - LOAD_OFFSET) { \ *(.BTF_ids) \ } diff --git a/kernel/bpf/sysfs_btf.c b/kernel/bpf/sysfs_btf.c index 81d6cf90584a7157929c50f62a5c6862e7a3d081..7651f37b82c78b8afd96078567a5b6612f5f4d97 100644 --- a/kernel/bpf/sysfs_btf.c +++ b/kernel/bpf/sysfs_btf.c @@ -7,18 +7,39 @@ #include <linux/kobject.h> #include <linux/init.h> #include <linux/sysfs.h> +#include <linux/mm.h> +#include <linux/io.h>
/* See scripts/link-vmlinux.sh, gen_btf() func for details */ extern char __start_BTF[]; extern char __stop_BTF[];
+struct kobject *btf_kobj; + +static int btf_vmlinux_mmap(struct file *filp, struct kobject *kobj, + const struct bin_attribute *attr, + struct vm_area_struct *vma) +{ + size_t btf_size = __stop_BTF - __start_BTF; + + if (kobj != btf_kobj) + return -EINVAL; + + if (vma->vm_flags & (VM_WRITE|VM_EXEC|VM_MAYSHARE)) + return -EACCES; + + vm_flags_clear(vma, VM_MAYEXEC); + vm_flags_clear(vma, VM_MAYWRITE); + + return vm_iomap_memory(vma, virt_to_phys(__start_BTF), btf_size); +} + static struct bin_attribute bin_attr_btf_vmlinux __ro_after_init = { .attr = { .name = "vmlinux", .mode = 0444, }, .read_new = sysfs_bin_attr_simple_read, + .mmap = btf_vmlinux_mmap, };
-struct kobject *btf_kobj; - static int __init btf_vmlinux_init(void) { bin_attr_btf_vmlinux.private = __start_BTF;
On Thu, May 1, 2025 at 7:28 AM Lorenz Bauer lmb@isovalent.com wrote:
User space needs access to kernel BTF for many modern features of BPF. Right now each process needs to read the BTF blob either in pieces or as a whole. Allow mmaping the sysfs file so that processes can directly access the memory allocated for it in the kernel.
Signed-off-by: Lorenz Bauer lmb@isovalent.com
include/asm-generic/vmlinux.lds.h | 3 ++- kernel/bpf/sysfs_btf.c | 25 +++++++++++++++++++++++-- 2 files changed, 25 insertions(+), 3 deletions(-)
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 58a635a6d5bdf0c53c267c2a3d21a5ed8678ce73..1750390735fac7637cc4d2fa05f96cb2a36aa448 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -667,10 +667,11 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) */ #ifdef CONFIG_DEBUG_INFO_BTF #define BTF \
. = ALIGN(PAGE_SIZE); \ .BTF : AT(ADDR(.BTF) - LOAD_OFFSET) { \ BOUNDED_SECTION_BY(.BTF, _BTF) \ } \
. = ALIGN(4); \
. = ALIGN(PAGE_SIZE); \ .BTF_ids : AT(ADDR(.BTF_ids) - LOAD_OFFSET) { \ *(.BTF_ids) \ }
diff --git a/kernel/bpf/sysfs_btf.c b/kernel/bpf/sysfs_btf.c index 81d6cf90584a7157929c50f62a5c6862e7a3d081..7651f37b82c78b8afd96078567a5b6612f5f4d97 100644 --- a/kernel/bpf/sysfs_btf.c +++ b/kernel/bpf/sysfs_btf.c @@ -7,18 +7,39 @@ #include <linux/kobject.h> #include <linux/init.h> #include <linux/sysfs.h> +#include <linux/mm.h> +#include <linux/io.h>
/* See scripts/link-vmlinux.sh, gen_btf() func for details */ extern char __start_BTF[]; extern char __stop_BTF[];
+struct kobject *btf_kobj;
+static int btf_vmlinux_mmap(struct file *filp, struct kobject *kobj,
const struct bin_attribute *attr,
struct vm_area_struct *vma)
+{
size_t btf_size = __stop_BTF - __start_BTF;
if (kobj != btf_kobj)
return -EINVAL;
if (vma->vm_flags & (VM_WRITE|VM_EXEC|VM_MAYSHARE))
return -EACCES;
vm_flags_clear(vma, VM_MAYEXEC);
vm_flags_clear(vma, VM_MAYWRITE);
Probably should set VM_DONTDUMP to avoid being in the core dump. vm_flags_mod() can set and clear in one operation.
return vm_iomap_memory(vma, virt_to_phys(__start_BTF), btf_size);
and this one should probably be vm_insert_pages(). Since it's not an IO area.
Overall I think it's a good idea. As Daniel suggested pls make use of it in libbpf too. That exercise will make sure that feature probing works with fallback.
pw-bot: cr
On Thu, May 1, 2025 at 9:26 PM Alexei Starovoitov alexei.starovoitov@gmail.com wrote:
return vm_iomap_memory(vma, virt_to_phys(__start_BTF), btf_size);
and this one should probably be vm_insert_pages(). Since it's not an IO area.
FYI I went with open coding with remap_pfn_range since that allows me to avoid struct page.
Lorenz
Add a basic test for the ability to mmap /sys/kernel/btf/vmlinux. Since libbpf doesn't have an API to parse BTF from memory we do some basic sanity checks ourselves.
Signed-off-by: Lorenz Bauer lmb@isovalent.com --- tools/testing/selftests/bpf/prog_tests/btf_sysfs.c | 79 ++++++++++++++++++++++ 1 file changed, 79 insertions(+)
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_sysfs.c b/tools/testing/selftests/bpf/prog_tests/btf_sysfs.c new file mode 100644 index 0000000000000000000000000000000000000000..8dffed136b4757779028ec0971b56ff541f2218c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/btf_sysfs.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* Copyright (c) 2025 Isovalent */ + +#include <test_progs.h> +#include <bpf/btf.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <unistd.h> + +#define BTF_MAGIC 0xeB9F + +static const char *btf_path = "/sys/kernel/btf/vmlinux"; + +void test_btf_sysfs(void) +{ + struct stat st; + __u64 btf_size; + void *raw_data = NULL; + int fd = -1; + size_t trailing; + long page_size; + + page_size = sysconf(_SC_PAGESIZE); + if (!ASSERT_GE(page_size, 0, "get_page_size")) + goto cleanup; + + if (!ASSERT_OK(stat(btf_path, &st), "stat_btf")) + goto cleanup; + + btf_size = st.st_size; + trailing = page_size - (btf_size % page_size) % page_size; + + fd = open(btf_path, O_RDONLY); + if (!ASSERT_GE(fd, 0, "open_btf")) + goto cleanup; + + raw_data = mmap(NULL, btf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_writable")) + goto cleanup; + + raw_data = mmap(NULL, btf_size, PROT_READ, MAP_SHARED, fd, 0); + if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_shared")) + goto cleanup; + + raw_data = mmap(NULL, btf_size + trailing + 1, PROT_READ, MAP_PRIVATE, fd, 0); + if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_invalid_size")) + goto cleanup; + + raw_data = mmap(NULL, btf_size, PROT_READ, MAP_PRIVATE, fd, 0); + if (!ASSERT_NEQ(raw_data, MAP_FAILED, "mmap_btf")) + goto cleanup; + + if (!ASSERT_EQ(mprotect(raw_data, btf_size, PROT_READ | PROT_WRITE), -1, + "mprotect_writable")) + goto cleanup; + + if (!ASSERT_EQ(mprotect(raw_data, btf_size, PROT_READ | PROT_EXEC), -1, + "mprotect_executable")) + goto cleanup; + + /* Check BTF magic value */ + if (!ASSERT_EQ(*(__u16 *)raw_data, BTF_MAGIC, "btf_magic")) + goto cleanup; + + /* Check padding is zeroed */ + for (int i = 0; i < trailing; i++) { + if (((__u8 *)raw_data)[btf_size + i] != 0) { + PRINT_FAIL("tail of BTF is not zero at page offset %d\n", i); + goto cleanup; + } + } + +cleanup: + if (raw_data && raw_data != MAP_FAILED) + munmap(raw_data, btf_size); + if (fd >= 0) + close(fd); +}
On Thu, May 1, 2025, at 7:28 AM, Lorenz Bauer wrote:
I'd like to cut down the memory usage of parsing vmlinux BTF in ebpf-go. With some upcoming changes the library is sitting at 5MiB for a parse. Most of that memory is simply copying the BTF blob into user space. By allowing vmlinux BTF to be mmapped read-only into user space I can cut memory usage by about 75%.
Cool! Maybe teach libbpf to use this as well? So everyone else can pick up the win transparently.
Signed-off-by: Lorenz Bauer lmb@isovalent.com
Lorenz Bauer (2): btf: allow mmap of vmlinux btf selftests: bpf: add a test for mmapable vmlinux BTF
include/asm-generic/vmlinux.lds.h | 3 +- kernel/bpf/sysfs_btf.c | 25 ++++++- tools/testing/selftests/bpf/prog_tests/btf_sysfs.c | 79 ++++++++++++++++++++++ 3 files changed, 104 insertions(+), 3 deletions(-)
base-commit: 38d976c32d85ef12dcd2b8a231196f7049548477 change-id: 20250501-vmlinux-mmap-2ec5563c3ef1
Best regards,
Lorenz Bauer lmb@isovalent.com
linux-kselftest-mirror@lists.linaro.org