Until CONFIG_DMABUF_SYSFS_STATS was added [1] it was only possible to perform per-buffer accounting with debugfs which is not suitable for production environments. Eventually we discovered the overhead with per-buffer sysfs file creation/removal was significantly impacting allocation and free times, and exacerbated kernfs lock contention. [2] dma_buf_stats_setup() is responsible for 39% of single-page buffer creation duration, or 74% of single-page dma_buf_export() duration when stressing dmabuf allocations and frees.
I prototyped a change from per-buffer to per-exporter statistics with a RCU protected list of exporter allocations that accommodates most (but not all) of our use-cases and avoids almost all of the sysfs overhead. While that adds less overhead than per-buffer sysfs, and less even than the maintenance of the dmabuf debugfs_list, it's still *additional* overhead on top of the debugfs_list and doesn't give us per-buffer info.
This series uses the existing dmabuf debugfs_list to implement a BPF dmabuf iterator, which adds no overhead to buffer allocation/free and provides per-buffer info. The list has been moved outside of CONFIG_DEBUG_FS scope so that it is always populated. The BPF program loaded by userspace that extracts per-buffer information gets to define its own interface which avoids the lack of ABI stability with debugfs.
As this is a replacement for our use of CONFIG_DMABUF_SYSFS_STATS, the last patch is a RFC for removing it from the kernel. Please see my suggestion there regarding the timeline for that.
[1] https://lore.kernel.org/linux-media/20201210044400.1080308-1-hridya@google.c... [2] https://lore.kernel.org/all/20220516171315.2400578-1-tjmercier@google.com
v1: https://lore.kernel.org/all/20250414225227.3642618-1-tjmercier@google.com
v1 -> v2: Make the DMA buffer list independent of CONFIG_DEBUG_FS per Christian König Add CONFIG_DMA_SHARED_BUFFER check to kernel/bpf/Makefile per kernel test robot Use BTF_ID_LIST_SINGLE instead of BTF_ID_LIST_GLOBAL_SINGLE per Song Liu Fixup comment style, mixing code/declarations, and use ASSERT_OK_FD in selftest per Song Liu Add BPF_ITER_RESCHED feature to bpf_dmabuf_reg_info per Alexei Starovoitov Add open-coded iterator and selftest per Alexei Starovoitov Add a second test buffer from the system dmabuf heap to selftests Use the BPF program we'll use in production for selftest per Alexei Starovoitov https://r.android.com/c/platform/system/bpfprogs/+/3616123/2/dmabufIter.c https://r.android.com/c/platform/system/memory/libmeminfo/+/3614259/1/libdma...
T.J. Mercier (6): dma-buf: Rename and expose debugfs symbols bpf: Add dmabuf iterator bpf: Add open coded dmabuf iterator selftests/bpf: Add test for dmabuf_iter selftests/bpf: Add test for open coded dmabuf_iter RFC: dma-buf: Remove DMA-BUF statistics
.../ABI/testing/sysfs-kernel-dmabuf-buffers | 24 -- Documentation/driver-api/dma-buf.rst | 5 - drivers/dma-buf/Kconfig | 15 - drivers/dma-buf/Makefile | 1 - drivers/dma-buf/dma-buf-sysfs-stats.c | 202 -------------- drivers/dma-buf/dma-buf-sysfs-stats.h | 35 --- drivers/dma-buf/dma-buf.c | 58 +--- include/linux/dma-buf.h | 6 +- kernel/bpf/Makefile | 3 + kernel/bpf/dmabuf_iter.c | 177 ++++++++++++ kernel/bpf/helpers.c | 5 + .../testing/selftests/bpf/bpf_experimental.h | 5 + tools/testing/selftests/bpf/config | 3 + .../selftests/bpf/prog_tests/dmabuf_iter.c | 258 ++++++++++++++++++ .../testing/selftests/bpf/progs/dmabuf_iter.c | 91 ++++++ 15 files changed, 561 insertions(+), 327 deletions(-) delete mode 100644 Documentation/ABI/testing/sysfs-kernel-dmabuf-buffers delete mode 100644 drivers/dma-buf/dma-buf-sysfs-stats.c delete mode 100644 drivers/dma-buf/dma-buf-sysfs-stats.h create mode 100644 kernel/bpf/dmabuf_iter.c create mode 100644 tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c create mode 100644 tools/testing/selftests/bpf/progs/dmabuf_iter.c
base-commit: 0af2f6be1b4281385b618cb86ad946eded089ac8
Expose the debugfs list and mutex so they are usable for the creation of a BPF iterator for dmabufs without the need for CONFIG_DEBUG_FS. Rename the symbols so it's clear debugfs is not required, and that the list contains dmabufs and not some other type.
Signed-off-by: T.J. Mercier tjmercier@google.com --- v2: Make the DMA buffer list independent of CONFIG_DEBUG_FS per Christian König drivers/dma-buf/dma-buf.c | 40 +++++++++++++++------------------------ include/linux/dma-buf.h | 6 ++++-- 2 files changed, 19 insertions(+), 27 deletions(-)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 5baa83b85515..7260bdd77c75 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -35,35 +35,25 @@
static inline int is_dma_buf_file(struct file *);
-#if IS_ENABLED(CONFIG_DEBUG_FS) -static DEFINE_MUTEX(debugfs_list_mutex); -static LIST_HEAD(debugfs_list); +DEFINE_MUTEX(dmabuf_list_mutex); +LIST_HEAD(dmabuf_list);
-static void __dma_buf_debugfs_list_add(struct dma_buf *dmabuf) +static void __dma_buf_list_add(struct dma_buf *dmabuf) { - mutex_lock(&debugfs_list_mutex); - list_add(&dmabuf->list_node, &debugfs_list); - mutex_unlock(&debugfs_list_mutex); + mutex_lock(&dmabuf_list_mutex); + list_add(&dmabuf->list_node, &dmabuf_list); + mutex_unlock(&dmabuf_list_mutex); }
-static void __dma_buf_debugfs_list_del(struct dma_buf *dmabuf) +static void __dma_buf_list_del(struct dma_buf *dmabuf) { if (!dmabuf) return;
- mutex_lock(&debugfs_list_mutex); + mutex_lock(&dmabuf_list_mutex); list_del(&dmabuf->list_node); - mutex_unlock(&debugfs_list_mutex); + mutex_unlock(&dmabuf_list_mutex); } -#else -static void __dma_buf_debugfs_list_add(struct dma_buf *dmabuf) -{ -} - -static void __dma_buf_debugfs_list_del(struct dma_buf *dmabuf) -{ -} -#endif
static char *dmabuffs_dname(struct dentry *dentry, char *buffer, int buflen) { @@ -115,7 +105,7 @@ static int dma_buf_file_release(struct inode *inode, struct file *file) if (!is_dma_buf_file(file)) return -EINVAL;
- __dma_buf_debugfs_list_del(file->private_data); + __dma_buf_list_del(file->private_data);
return 0; } @@ -689,7 +679,7 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) file->f_path.dentry->d_fsdata = dmabuf; dmabuf->file = file;
- __dma_buf_debugfs_list_add(dmabuf); + __dma_buf_list_add(dmabuf);
return dmabuf;
@@ -1630,7 +1620,7 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused) size_t size = 0; int ret;
- ret = mutex_lock_interruptible(&debugfs_list_mutex); + ret = mutex_lock_interruptible(&dmabuf_list_mutex);
if (ret) return ret; @@ -1639,7 +1629,7 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused) seq_printf(s, "%-8s\t%-8s\t%-8s\t%-8s\texp_name\t%-8s\tname\n", "size", "flags", "mode", "count", "ino");
- list_for_each_entry(buf_obj, &debugfs_list, list_node) { + list_for_each_entry(buf_obj, &dmabuf_list, list_node) {
ret = dma_resv_lock_interruptible(buf_obj->resv, NULL); if (ret) @@ -1676,11 +1666,11 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused)
seq_printf(s, "\nTotal %d objects, %zu bytes\n", count, size);
- mutex_unlock(&debugfs_list_mutex); + mutex_unlock(&dmabuf_list_mutex); return 0;
error_unlock: - mutex_unlock(&debugfs_list_mutex); + mutex_unlock(&dmabuf_list_mutex); return ret; }
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 36216d28d8bd..ebcd208272bf 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -18,6 +18,7 @@ #include <linux/err.h> #include <linux/scatterlist.h> #include <linux/list.h> +#include <linux/mutex.h> #include <linux/dma-mapping.h> #include <linux/fs.h> #include <linux/dma-fence.h> @@ -370,10 +371,8 @@ struct dma_buf { */ struct module *owner;
-#if IS_ENABLED(CONFIG_DEBUG_FS) /** @list_node: node for dma_buf accounting and debugging. */ struct list_head list_node; -#endif
/** @priv: exporter specific private data for this buffer object. */ void *priv; @@ -556,6 +555,9 @@ struct dma_buf_export_info { struct dma_buf_export_info name = { .exp_name = KBUILD_MODNAME, \ .owner = THIS_MODULE }
+extern struct list_head dmabuf_list; +extern struct mutex dmabuf_list_mutex; + /** * get_dma_buf - convenience wrapper for get_file. * @dmabuf: [in] pointer to dma_buf
On 5/5/25 00:41, T.J. Mercier wrote:
Expose the debugfs list and mutex so they are usable for the creation of a BPF iterator for dmabufs without the need for CONFIG_DEBUG_FS. Rename the symbols so it's clear debugfs is not required, and that the list contains dmabufs and not some other type.
Signed-off-by: T.J. Mercier tjmercier@google.com
Reviewed-by: Christian König christian.koenig@amd.com
v2: Make the DMA buffer list independent of CONFIG_DEBUG_FS per Christian König drivers/dma-buf/dma-buf.c | 40 +++++++++++++++------------------------ include/linux/dma-buf.h | 6 ++++-- 2 files changed, 19 insertions(+), 27 deletions(-)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 5baa83b85515..7260bdd77c75 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -35,35 +35,25 @@ static inline int is_dma_buf_file(struct file *); -#if IS_ENABLED(CONFIG_DEBUG_FS) -static DEFINE_MUTEX(debugfs_list_mutex); -static LIST_HEAD(debugfs_list); +DEFINE_MUTEX(dmabuf_list_mutex); +LIST_HEAD(dmabuf_list); -static void __dma_buf_debugfs_list_add(struct dma_buf *dmabuf) +static void __dma_buf_list_add(struct dma_buf *dmabuf) {
- mutex_lock(&debugfs_list_mutex);
- list_add(&dmabuf->list_node, &debugfs_list);
- mutex_unlock(&debugfs_list_mutex);
- mutex_lock(&dmabuf_list_mutex);
- list_add(&dmabuf->list_node, &dmabuf_list);
- mutex_unlock(&dmabuf_list_mutex);
} -static void __dma_buf_debugfs_list_del(struct dma_buf *dmabuf) +static void __dma_buf_list_del(struct dma_buf *dmabuf) { if (!dmabuf) return;
- mutex_lock(&debugfs_list_mutex);
- mutex_lock(&dmabuf_list_mutex); list_del(&dmabuf->list_node);
- mutex_unlock(&debugfs_list_mutex);
- mutex_unlock(&dmabuf_list_mutex);
} -#else -static void __dma_buf_debugfs_list_add(struct dma_buf *dmabuf) -{ -}
-static void __dma_buf_debugfs_list_del(struct dma_buf *dmabuf) -{ -} -#endif static char *dmabuffs_dname(struct dentry *dentry, char *buffer, int buflen) { @@ -115,7 +105,7 @@ static int dma_buf_file_release(struct inode *inode, struct file *file) if (!is_dma_buf_file(file)) return -EINVAL;
- __dma_buf_debugfs_list_del(file->private_data);
- __dma_buf_list_del(file->private_data);
return 0; } @@ -689,7 +679,7 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) file->f_path.dentry->d_fsdata = dmabuf; dmabuf->file = file;
- __dma_buf_debugfs_list_add(dmabuf);
- __dma_buf_list_add(dmabuf);
return dmabuf; @@ -1630,7 +1620,7 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused) size_t size = 0; int ret;
- ret = mutex_lock_interruptible(&debugfs_list_mutex);
- ret = mutex_lock_interruptible(&dmabuf_list_mutex);
if (ret) return ret; @@ -1639,7 +1629,7 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused) seq_printf(s, "%-8s\t%-8s\t%-8s\t%-8s\texp_name\t%-8s\tname\n", "size", "flags", "mode", "count", "ino");
- list_for_each_entry(buf_obj, &debugfs_list, list_node) {
- list_for_each_entry(buf_obj, &dmabuf_list, list_node) {
ret = dma_resv_lock_interruptible(buf_obj->resv, NULL); if (ret) @@ -1676,11 +1666,11 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused) seq_printf(s, "\nTotal %d objects, %zu bytes\n", count, size);
- mutex_unlock(&debugfs_list_mutex);
- mutex_unlock(&dmabuf_list_mutex); return 0;
error_unlock:
- mutex_unlock(&debugfs_list_mutex);
- mutex_unlock(&dmabuf_list_mutex); return ret;
} diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 36216d28d8bd..ebcd208272bf 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -18,6 +18,7 @@ #include <linux/err.h> #include <linux/scatterlist.h> #include <linux/list.h> +#include <linux/mutex.h> #include <linux/dma-mapping.h> #include <linux/fs.h> #include <linux/dma-fence.h> @@ -370,10 +371,8 @@ struct dma_buf { */ struct module *owner; -#if IS_ENABLED(CONFIG_DEBUG_FS) /** @list_node: node for dma_buf accounting and debugging. */ struct list_head list_node; -#endif /** @priv: exporter specific private data for this buffer object. */ void *priv; @@ -556,6 +555,9 @@ struct dma_buf_export_info { struct dma_buf_export_info name = { .exp_name = KBUILD_MODNAME, \ .owner = THIS_MODULE } +extern struct list_head dmabuf_list; +extern struct mutex dmabuf_list_mutex;
/**
- get_dma_buf - convenience wrapper for get_file.
- @dmabuf: [in] pointer to dma_buf
The dmabuf iterator traverses the list of all DMA buffers.
DMA buffers are refcounted through their associated struct file. A reference is taken on each buffer as the list is iterated to ensure each buffer persists for the duration of the bpf program execution without holding the list mutex.
Signed-off-by: T.J. Mercier tjmercier@google.com --- kernel/bpf/Makefile | 3 + kernel/bpf/dmabuf_iter.c | 134 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 kernel/bpf/dmabuf_iter.c
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 70502f038b92..3a335c50e6e3 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -53,6 +53,9 @@ obj-$(CONFIG_BPF_SYSCALL) += relo_core.o obj-$(CONFIG_BPF_SYSCALL) += btf_iter.o obj-$(CONFIG_BPF_SYSCALL) += btf_relocate.o obj-$(CONFIG_BPF_SYSCALL) += kmem_cache_iter.o +ifeq ($(CONFIG_DMA_SHARED_BUFFER),y) +obj-$(CONFIG_BPF_SYSCALL) += dmabuf_iter.o +endif
CFLAGS_REMOVE_percpu_freelist.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_bpf_lru_list.o = $(CC_FLAGS_FTRACE) diff --git a/kernel/bpf/dmabuf_iter.c b/kernel/bpf/dmabuf_iter.c new file mode 100644 index 000000000000..968762e11f73 --- /dev/null +++ b/kernel/bpf/dmabuf_iter.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2025 Google LLC */ +#include <linux/bpf.h> +#include <linux/btf_ids.h> +#include <linux/dma-buf.h> +#include <linux/kernel.h> +#include <linux/seq_file.h> + +BTF_ID_LIST_SINGLE(bpf_dmabuf_btf_id, struct, dma_buf) +DEFINE_BPF_ITER_FUNC(dmabuf, struct bpf_iter_meta *meta, struct dma_buf *dmabuf) + +static struct dma_buf *get_next_dmabuf(struct dma_buf *dmabuf) +{ + struct dma_buf *ret = NULL; + + /* + * Look for the first/next buffer we can obtain a reference to. + * + * The list mutex does not protect a dmabuf's refcount, so it can be + * zeroed while we are iterating. We cannot call get_dma_buf() since the + * caller of this program may not already own a reference to the buffer. + */ + mutex_lock(&dmabuf_list_mutex); + if (dmabuf) { + dma_buf_put(dmabuf); + list_for_each_entry_continue(dmabuf, &dmabuf_list, list_node) { + if (file_ref_get(&dmabuf->file->f_ref)) { + ret = dmabuf; + break; + } + } + } else { + list_for_each_entry(dmabuf, &dmabuf_list, list_node) { + if (file_ref_get(&dmabuf->file->f_ref)) { + ret = dmabuf; + break; + } + } + } + mutex_unlock(&dmabuf_list_mutex); + return ret; +} + +static void *dmabuf_iter_seq_start(struct seq_file *seq, loff_t *pos) +{ + if (*pos) + return NULL; + + return get_next_dmabuf(NULL); +} + +static void *dmabuf_iter_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct dma_buf *dmabuf = v; + + ++*pos; + + return get_next_dmabuf(dmabuf); +} + +struct bpf_iter__dmabuf { + __bpf_md_ptr(struct bpf_iter_meta *, meta); + __bpf_md_ptr(struct dma_buf *, dmabuf); +}; + +static int __dmabuf_seq_show(struct seq_file *seq, void *v, bool in_stop) +{ + struct bpf_iter_meta meta = { + .seq = seq, + }; + struct bpf_iter__dmabuf ctx = { + .meta = &meta, + .dmabuf = v, + }; + struct bpf_prog *prog = bpf_iter_get_info(&meta, in_stop); + + if (prog) + return bpf_iter_run_prog(prog, &ctx); + + return 0; +} + +static int dmabuf_iter_seq_show(struct seq_file *seq, void *v) +{ + return __dmabuf_seq_show(seq, v, false); +} + +static void dmabuf_iter_seq_stop(struct seq_file *seq, void *v) +{ + struct dma_buf *dmabuf = v; + + if (dmabuf) + dma_buf_put(dmabuf); +} + +static const struct seq_operations dmabuf_iter_seq_ops = { + .start = dmabuf_iter_seq_start, + .next = dmabuf_iter_seq_next, + .stop = dmabuf_iter_seq_stop, + .show = dmabuf_iter_seq_show, +}; + +static void bpf_iter_dmabuf_show_fdinfo(const struct bpf_iter_aux_info *aux, + struct seq_file *seq) +{ + seq_puts(seq, "dmabuf iter\n"); +} + +static const struct bpf_iter_seq_info dmabuf_iter_seq_info = { + .seq_ops = &dmabuf_iter_seq_ops, + .init_seq_private = NULL, + .fini_seq_private = NULL, + .seq_priv_size = 0, +}; + +static struct bpf_iter_reg bpf_dmabuf_reg_info = { + .target = "dmabuf", + .feature = BPF_ITER_RESCHED, + .show_fdinfo = bpf_iter_dmabuf_show_fdinfo, + .ctx_arg_info_size = 1, + .ctx_arg_info = { + { offsetof(struct bpf_iter__dmabuf, dmabuf), + PTR_TO_BTF_ID_OR_NULL }, + }, + .seq_info = &dmabuf_iter_seq_info, +}; + +static int __init dmabuf_iter_init(void) +{ + bpf_dmabuf_reg_info.ctx_arg_info[0].btf_id = bpf_dmabuf_btf_id[0]; + return bpf_iter_reg_target(&bpf_dmabuf_reg_info); +} + +late_initcall(dmabuf_iter_init);
On 5/5/25 00:41, T.J. Mercier wrote:
The dmabuf iterator traverses the list of all DMA buffers.
DMA buffers are refcounted through their associated struct file. A reference is taken on each buffer as the list is iterated to ensure each buffer persists for the duration of the bpf program execution without holding the list mutex.
Signed-off-by: T.J. Mercier tjmercier@google.com
kernel/bpf/Makefile | 3 + kernel/bpf/dmabuf_iter.c | 134 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 kernel/bpf/dmabuf_iter.c
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 70502f038b92..3a335c50e6e3 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -53,6 +53,9 @@ obj-$(CONFIG_BPF_SYSCALL) += relo_core.o obj-$(CONFIG_BPF_SYSCALL) += btf_iter.o obj-$(CONFIG_BPF_SYSCALL) += btf_relocate.o obj-$(CONFIG_BPF_SYSCALL) += kmem_cache_iter.o +ifeq ($(CONFIG_DMA_SHARED_BUFFER),y) +obj-$(CONFIG_BPF_SYSCALL) += dmabuf_iter.o +endif CFLAGS_REMOVE_percpu_freelist.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_bpf_lru_list.o = $(CC_FLAGS_FTRACE) diff --git a/kernel/bpf/dmabuf_iter.c b/kernel/bpf/dmabuf_iter.c new file mode 100644 index 000000000000..968762e11f73 --- /dev/null +++ b/kernel/bpf/dmabuf_iter.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2025 Google LLC */ +#include <linux/bpf.h> +#include <linux/btf_ids.h> +#include <linux/dma-buf.h> +#include <linux/kernel.h> +#include <linux/seq_file.h>
+BTF_ID_LIST_SINGLE(bpf_dmabuf_btf_id, struct, dma_buf) +DEFINE_BPF_ITER_FUNC(dmabuf, struct bpf_iter_meta *meta, struct dma_buf *dmabuf)
+static struct dma_buf *get_next_dmabuf(struct dma_buf *dmabuf) +{
- struct dma_buf *ret = NULL;
- /*
* Look for the first/next buffer we can obtain a reference to.
*
* The list mutex does not protect a dmabuf's refcount, so it can be
* zeroed while we are iterating. We cannot call get_dma_buf() since the
* caller of this program may not already own a reference to the buffer.
*/
- mutex_lock(&dmabuf_list_mutex);
- if (dmabuf) {
That looks like you try to mangle the start and next functionality in just one function.
I would just inline that into the dmabuf_iter_seq_start() and dmabuf_iter_seq_next() functions.
dma_buf_put(dmabuf);
list_for_each_entry_continue(dmabuf, &dmabuf_list, list_node) {
That you can put the DMA-buf and then still uses it in list_for_each_entry_continue() only works because the mutex is locked in the destroy path.
I strongly suggest to just put those two functions into drivers/dma-buf/dma-buf.c right next to the __dma_buf_debugfs_list_add() and __dma_buf_debugfs_list_del() functions.
Apart from those style suggestions looks good to me from the technical side, but I'm not an expert for the BPF stuff.
Regards, Christian.
if (file_ref_get(&dmabuf->file->f_ref)) {
ret = dmabuf;
break;
}
}
- } else {
list_for_each_entry(dmabuf, &dmabuf_list, list_node) {
if (file_ref_get(&dmabuf->file->f_ref)) {
ret = dmabuf;
break;
}
}
- }
- mutex_unlock(&dmabuf_list_mutex);
- return ret;
+}
+static void *dmabuf_iter_seq_start(struct seq_file *seq, loff_t *pos) +{
- if (*pos)
return NULL;
- return get_next_dmabuf(NULL);
+}
+static void *dmabuf_iter_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{
- struct dma_buf *dmabuf = v;
- ++*pos;
- return get_next_dmabuf(dmabuf);
+}
+struct bpf_iter__dmabuf {
- __bpf_md_ptr(struct bpf_iter_meta *, meta);
- __bpf_md_ptr(struct dma_buf *, dmabuf);
+};
+static int __dmabuf_seq_show(struct seq_file *seq, void *v, bool in_stop) +{
- struct bpf_iter_meta meta = {
.seq = seq,
- };
- struct bpf_iter__dmabuf ctx = {
.meta = &meta,
.dmabuf = v,
- };
- struct bpf_prog *prog = bpf_iter_get_info(&meta, in_stop);
- if (prog)
return bpf_iter_run_prog(prog, &ctx);
- return 0;
+}
+static int dmabuf_iter_seq_show(struct seq_file *seq, void *v) +{
- return __dmabuf_seq_show(seq, v, false);
+}
+static void dmabuf_iter_seq_stop(struct seq_file *seq, void *v) +{
- struct dma_buf *dmabuf = v;
- if (dmabuf)
dma_buf_put(dmabuf);
+}
+static const struct seq_operations dmabuf_iter_seq_ops = {
- .start = dmabuf_iter_seq_start,
- .next = dmabuf_iter_seq_next,
- .stop = dmabuf_iter_seq_stop,
- .show = dmabuf_iter_seq_show,
+};
+static void bpf_iter_dmabuf_show_fdinfo(const struct bpf_iter_aux_info *aux,
struct seq_file *seq)
+{
- seq_puts(seq, "dmabuf iter\n");
+}
+static const struct bpf_iter_seq_info dmabuf_iter_seq_info = {
- .seq_ops = &dmabuf_iter_seq_ops,
- .init_seq_private = NULL,
- .fini_seq_private = NULL,
- .seq_priv_size = 0,
+};
+static struct bpf_iter_reg bpf_dmabuf_reg_info = {
- .target = "dmabuf",
- .feature = BPF_ITER_RESCHED,
- .show_fdinfo = bpf_iter_dmabuf_show_fdinfo,
- .ctx_arg_info_size = 1,
- .ctx_arg_info = {
{ offsetof(struct bpf_iter__dmabuf, dmabuf),
PTR_TO_BTF_ID_OR_NULL },
- },
- .seq_info = &dmabuf_iter_seq_info,
+};
+static int __init dmabuf_iter_init(void) +{
- bpf_dmabuf_reg_info.ctx_arg_info[0].btf_id = bpf_dmabuf_btf_id[0];
- return bpf_iter_reg_target(&bpf_dmabuf_reg_info);
+}
+late_initcall(dmabuf_iter_init);
On Mon, May 5, 2025 at 4:17 AM Christian König christian.koenig@amd.com wrote:
On 5/5/25 00:41, T.J. Mercier wrote:
The dmabuf iterator traverses the list of all DMA buffers.
DMA buffers are refcounted through their associated struct file. A reference is taken on each buffer as the list is iterated to ensure each buffer persists for the duration of the bpf program execution without holding the list mutex.
Signed-off-by: T.J. Mercier tjmercier@google.com
kernel/bpf/Makefile | 3 + kernel/bpf/dmabuf_iter.c | 134 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 kernel/bpf/dmabuf_iter.c
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 70502f038b92..3a335c50e6e3 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -53,6 +53,9 @@ obj-$(CONFIG_BPF_SYSCALL) += relo_core.o obj-$(CONFIG_BPF_SYSCALL) += btf_iter.o obj-$(CONFIG_BPF_SYSCALL) += btf_relocate.o obj-$(CONFIG_BPF_SYSCALL) += kmem_cache_iter.o +ifeq ($(CONFIG_DMA_SHARED_BUFFER),y) +obj-$(CONFIG_BPF_SYSCALL) += dmabuf_iter.o +endif
CFLAGS_REMOVE_percpu_freelist.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_bpf_lru_list.o = $(CC_FLAGS_FTRACE) diff --git a/kernel/bpf/dmabuf_iter.c b/kernel/bpf/dmabuf_iter.c new file mode 100644 index 000000000000..968762e11f73 --- /dev/null +++ b/kernel/bpf/dmabuf_iter.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2025 Google LLC */ +#include <linux/bpf.h> +#include <linux/btf_ids.h> +#include <linux/dma-buf.h> +#include <linux/kernel.h> +#include <linux/seq_file.h>
+BTF_ID_LIST_SINGLE(bpf_dmabuf_btf_id, struct, dma_buf) +DEFINE_BPF_ITER_FUNC(dmabuf, struct bpf_iter_meta *meta, struct dma_buf *dmabuf)
+static struct dma_buf *get_next_dmabuf(struct dma_buf *dmabuf) +{
struct dma_buf *ret = NULL;
/*
* Look for the first/next buffer we can obtain a reference to.
*
* The list mutex does not protect a dmabuf's refcount, so it can be
* zeroed while we are iterating. We cannot call get_dma_buf() since the
* caller of this program may not already own a reference to the buffer.
*/
mutex_lock(&dmabuf_list_mutex);
if (dmabuf) {
That looks like you try to mangle the start and next functionality in just one function.
I would just inline that into the dmabuf_iter_seq_start() and dmabuf_iter_seq_next() functions.
Primarily this is to share between the open coded iterator (next patch) and this normal iterator since I didn't want to duplicate the same list traversal code across both of them.
dma_buf_put(dmabuf);
list_for_each_entry_continue(dmabuf, &dmabuf_list, list_node) {
That you can put the DMA-buf and then still uses it in list_for_each_entry_continue() only works because the mutex is locked in the destroy path.
Yup, this was deliberate.
I strongly suggest to just put those two functions into drivers/dma-buf/dma-buf.c right next to the __dma_buf_debugfs_list_add() and __dma_buf_debugfs_list_del() functions.
By two functions, you mean a get_first_dmabuf(void) and a get_next_dmabuf(struct dma_buf*)? To make the dma_buf_put() call a little less scary since all the mutex ops are right there?
Apart from those style suggestions looks good to me from the technical side, but I'm not an expert for the BPF stuff.
Regards, Christian.
Thanks for your comments and reviews!
if (file_ref_get(&dmabuf->file->f_ref)) {
ret = dmabuf;
break;
}
}
} else {
list_for_each_entry(dmabuf, &dmabuf_list, list_node) {
if (file_ref_get(&dmabuf->file->f_ref)) {
ret = dmabuf;
break;
}
}
}
mutex_unlock(&dmabuf_list_mutex);
return ret;
+}
+static void *dmabuf_iter_seq_start(struct seq_file *seq, loff_t *pos) +{
if (*pos)
return NULL;
return get_next_dmabuf(NULL);
+}
+static void *dmabuf_iter_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{
struct dma_buf *dmabuf = v;
++*pos;
return get_next_dmabuf(dmabuf);
+}
+struct bpf_iter__dmabuf {
__bpf_md_ptr(struct bpf_iter_meta *, meta);
__bpf_md_ptr(struct dma_buf *, dmabuf);
+};
+static int __dmabuf_seq_show(struct seq_file *seq, void *v, bool in_stop) +{
struct bpf_iter_meta meta = {
.seq = seq,
};
struct bpf_iter__dmabuf ctx = {
.meta = &meta,
.dmabuf = v,
};
struct bpf_prog *prog = bpf_iter_get_info(&meta, in_stop);
if (prog)
return bpf_iter_run_prog(prog, &ctx);
return 0;
+}
+static int dmabuf_iter_seq_show(struct seq_file *seq, void *v) +{
return __dmabuf_seq_show(seq, v, false);
+}
+static void dmabuf_iter_seq_stop(struct seq_file *seq, void *v) +{
struct dma_buf *dmabuf = v;
if (dmabuf)
dma_buf_put(dmabuf);
+}
+static const struct seq_operations dmabuf_iter_seq_ops = {
.start = dmabuf_iter_seq_start,
.next = dmabuf_iter_seq_next,
.stop = dmabuf_iter_seq_stop,
.show = dmabuf_iter_seq_show,
+};
+static void bpf_iter_dmabuf_show_fdinfo(const struct bpf_iter_aux_info *aux,
struct seq_file *seq)
+{
seq_puts(seq, "dmabuf iter\n");
+}
+static const struct bpf_iter_seq_info dmabuf_iter_seq_info = {
.seq_ops = &dmabuf_iter_seq_ops,
.init_seq_private = NULL,
.fini_seq_private = NULL,
.seq_priv_size = 0,
+};
+static struct bpf_iter_reg bpf_dmabuf_reg_info = {
.target = "dmabuf",
.feature = BPF_ITER_RESCHED,
.show_fdinfo = bpf_iter_dmabuf_show_fdinfo,
.ctx_arg_info_size = 1,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__dmabuf, dmabuf),
PTR_TO_BTF_ID_OR_NULL },
},
.seq_info = &dmabuf_iter_seq_info,
+};
+static int __init dmabuf_iter_init(void) +{
bpf_dmabuf_reg_info.ctx_arg_info[0].btf_id = bpf_dmabuf_btf_id[0];
return bpf_iter_reg_target(&bpf_dmabuf_reg_info);
+}
+late_initcall(dmabuf_iter_init);
On 5/5/25 18:33, T.J. Mercier wrote:
On Mon, May 5, 2025 at 4:17 AM Christian König christian.koenig@amd.com wrote:
On 5/5/25 00:41, T.J. Mercier wrote:
The dmabuf iterator traverses the list of all DMA buffers.
DMA buffers are refcounted through their associated struct file. A reference is taken on each buffer as the list is iterated to ensure each buffer persists for the duration of the bpf program execution without holding the list mutex.
Signed-off-by: T.J. Mercier tjmercier@google.com
kernel/bpf/Makefile | 3 + kernel/bpf/dmabuf_iter.c | 134 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 kernel/bpf/dmabuf_iter.c
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 70502f038b92..3a335c50e6e3 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -53,6 +53,9 @@ obj-$(CONFIG_BPF_SYSCALL) += relo_core.o obj-$(CONFIG_BPF_SYSCALL) += btf_iter.o obj-$(CONFIG_BPF_SYSCALL) += btf_relocate.o obj-$(CONFIG_BPF_SYSCALL) += kmem_cache_iter.o +ifeq ($(CONFIG_DMA_SHARED_BUFFER),y) +obj-$(CONFIG_BPF_SYSCALL) += dmabuf_iter.o +endif
CFLAGS_REMOVE_percpu_freelist.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_bpf_lru_list.o = $(CC_FLAGS_FTRACE) diff --git a/kernel/bpf/dmabuf_iter.c b/kernel/bpf/dmabuf_iter.c new file mode 100644 index 000000000000..968762e11f73 --- /dev/null +++ b/kernel/bpf/dmabuf_iter.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2025 Google LLC */ +#include <linux/bpf.h> +#include <linux/btf_ids.h> +#include <linux/dma-buf.h> +#include <linux/kernel.h> +#include <linux/seq_file.h>
+BTF_ID_LIST_SINGLE(bpf_dmabuf_btf_id, struct, dma_buf) +DEFINE_BPF_ITER_FUNC(dmabuf, struct bpf_iter_meta *meta, struct dma_buf *dmabuf)
+static struct dma_buf *get_next_dmabuf(struct dma_buf *dmabuf) +{
struct dma_buf *ret = NULL;
/*
* Look for the first/next buffer we can obtain a reference to.
*
* The list mutex does not protect a dmabuf's refcount, so it can be
* zeroed while we are iterating. We cannot call get_dma_buf() since the
* caller of this program may not already own a reference to the buffer.
*/
mutex_lock(&dmabuf_list_mutex);
if (dmabuf) {
That looks like you try to mangle the start and next functionality in just one function.
I would just inline that into the dmabuf_iter_seq_start() and dmabuf_iter_seq_next() functions.
Primarily this is to share between the open coded iterator (next patch) and this normal iterator since I didn't want to duplicate the same list traversal code across both of them.
Ah, ok that makes a bit more sense. It would still be nicer if it's in two functions since the logic doesn't share anything common except for taking the lock as far as I can seee.
dma_buf_put(dmabuf);
list_for_each_entry_continue(dmabuf, &dmabuf_list, list_node) {
That you can put the DMA-buf and then still uses it in list_for_each_entry_continue() only works because the mutex is locked in the destroy path.
Yup, this was deliberate.
I strongly suggest to just put those two functions into drivers/dma-buf/dma-buf.c right next to the __dma_buf_debugfs_list_add() and __dma_buf_debugfs_list_del() functions.
By two functions, you mean a get_first_dmabuf(void) and a get_next_dmabuf(struct dma_buf*)? To make the dma_buf_put() call a little less scary since all the mutex ops are right there?
Yes, exactly that's the idea. The comment above is good to have as well, but it only works one way.
If somebody changes the DMA-buf code without looking at this here we are busted.
Regards, Christian.
Apart from those style suggestions looks good to me from the technical side, but I'm not an expert for the BPF stuff.
Regards, Christian.
Thanks for your comments and reviews!
if (file_ref_get(&dmabuf->file->f_ref)) {
ret = dmabuf;
break;
}
}
} else {
list_for_each_entry(dmabuf, &dmabuf_list, list_node) {
if (file_ref_get(&dmabuf->file->f_ref)) {
ret = dmabuf;
break;
}
}
}
mutex_unlock(&dmabuf_list_mutex);
return ret;
+}
+static void *dmabuf_iter_seq_start(struct seq_file *seq, loff_t *pos) +{
if (*pos)
return NULL;
return get_next_dmabuf(NULL);
+}
+static void *dmabuf_iter_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{
struct dma_buf *dmabuf = v;
++*pos;
return get_next_dmabuf(dmabuf);
+}
+struct bpf_iter__dmabuf {
__bpf_md_ptr(struct bpf_iter_meta *, meta);
__bpf_md_ptr(struct dma_buf *, dmabuf);
+};
+static int __dmabuf_seq_show(struct seq_file *seq, void *v, bool in_stop) +{
struct bpf_iter_meta meta = {
.seq = seq,
};
struct bpf_iter__dmabuf ctx = {
.meta = &meta,
.dmabuf = v,
};
struct bpf_prog *prog = bpf_iter_get_info(&meta, in_stop);
if (prog)
return bpf_iter_run_prog(prog, &ctx);
return 0;
+}
+static int dmabuf_iter_seq_show(struct seq_file *seq, void *v) +{
return __dmabuf_seq_show(seq, v, false);
+}
+static void dmabuf_iter_seq_stop(struct seq_file *seq, void *v) +{
struct dma_buf *dmabuf = v;
if (dmabuf)
dma_buf_put(dmabuf);
+}
+static const struct seq_operations dmabuf_iter_seq_ops = {
.start = dmabuf_iter_seq_start,
.next = dmabuf_iter_seq_next,
.stop = dmabuf_iter_seq_stop,
.show = dmabuf_iter_seq_show,
+};
+static void bpf_iter_dmabuf_show_fdinfo(const struct bpf_iter_aux_info *aux,
struct seq_file *seq)
+{
seq_puts(seq, "dmabuf iter\n");
+}
+static const struct bpf_iter_seq_info dmabuf_iter_seq_info = {
.seq_ops = &dmabuf_iter_seq_ops,
.init_seq_private = NULL,
.fini_seq_private = NULL,
.seq_priv_size = 0,
+};
+static struct bpf_iter_reg bpf_dmabuf_reg_info = {
.target = "dmabuf",
.feature = BPF_ITER_RESCHED,
.show_fdinfo = bpf_iter_dmabuf_show_fdinfo,
.ctx_arg_info_size = 1,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__dmabuf, dmabuf),
PTR_TO_BTF_ID_OR_NULL },
},
.seq_info = &dmabuf_iter_seq_info,
+};
+static int __init dmabuf_iter_init(void) +{
bpf_dmabuf_reg_info.ctx_arg_info[0].btf_id = bpf_dmabuf_btf_id[0];
return bpf_iter_reg_target(&bpf_dmabuf_reg_info);
+}
+late_initcall(dmabuf_iter_init);
On Mon, May 5, 2025 at 9:56 AM Christian König christian.koenig@amd.com wrote:
On 5/5/25 18:33, T.J. Mercier wrote:
On Mon, May 5, 2025 at 4:17 AM Christian König christian.koenig@amd.com wrote:
On 5/5/25 00:41, T.J. Mercier wrote:
The dmabuf iterator traverses the list of all DMA buffers.
DMA buffers are refcounted through their associated struct file. A reference is taken on each buffer as the list is iterated to ensure each buffer persists for the duration of the bpf program execution without holding the list mutex.
Signed-off-by: T.J. Mercier tjmercier@google.com
kernel/bpf/Makefile | 3 + kernel/bpf/dmabuf_iter.c | 134 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 kernel/bpf/dmabuf_iter.c
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 70502f038b92..3a335c50e6e3 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -53,6 +53,9 @@ obj-$(CONFIG_BPF_SYSCALL) += relo_core.o obj-$(CONFIG_BPF_SYSCALL) += btf_iter.o obj-$(CONFIG_BPF_SYSCALL) += btf_relocate.o obj-$(CONFIG_BPF_SYSCALL) += kmem_cache_iter.o +ifeq ($(CONFIG_DMA_SHARED_BUFFER),y) +obj-$(CONFIG_BPF_SYSCALL) += dmabuf_iter.o +endif
CFLAGS_REMOVE_percpu_freelist.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_bpf_lru_list.o = $(CC_FLAGS_FTRACE) diff --git a/kernel/bpf/dmabuf_iter.c b/kernel/bpf/dmabuf_iter.c new file mode 100644 index 000000000000..968762e11f73 --- /dev/null +++ b/kernel/bpf/dmabuf_iter.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2025 Google LLC */ +#include <linux/bpf.h> +#include <linux/btf_ids.h> +#include <linux/dma-buf.h> +#include <linux/kernel.h> +#include <linux/seq_file.h>
+BTF_ID_LIST_SINGLE(bpf_dmabuf_btf_id, struct, dma_buf) +DEFINE_BPF_ITER_FUNC(dmabuf, struct bpf_iter_meta *meta, struct dma_buf *dmabuf)
+static struct dma_buf *get_next_dmabuf(struct dma_buf *dmabuf) +{
struct dma_buf *ret = NULL;
/*
* Look for the first/next buffer we can obtain a reference to.
*
* The list mutex does not protect a dmabuf's refcount, so it can be
* zeroed while we are iterating. We cannot call get_dma_buf() since the
* caller of this program may not already own a reference to the buffer.
*/
mutex_lock(&dmabuf_list_mutex);
if (dmabuf) {
That looks like you try to mangle the start and next functionality in just one function.
I would just inline that into the dmabuf_iter_seq_start() and dmabuf_iter_seq_next() functions.
Primarily this is to share between the open coded iterator (next patch) and this normal iterator since I didn't want to duplicate the same list traversal code across both of them.
Ah, ok that makes a bit more sense. It would still be nicer if it's in two functions since the logic doesn't share anything common except for taking the lock as far as I can seee.
dma_buf_put(dmabuf);
list_for_each_entry_continue(dmabuf, &dmabuf_list, list_node) {
That you can put the DMA-buf and then still uses it in list_for_each_entry_continue() only works because the mutex is locked in the destroy path.
Yup, this was deliberate.
I strongly suggest to just put those two functions into drivers/dma-buf/dma-buf.c right next to the __dma_buf_debugfs_list_add() and __dma_buf_debugfs_list_del() functions.
By two functions, you mean a get_first_dmabuf(void) and a get_next_dmabuf(struct dma_buf*)? To make the dma_buf_put() call a little less scary since all the mutex ops are right there?
Yes, exactly that's the idea. The comment above is good to have as well, but it only works one way.
If somebody changes the DMA-buf code without looking at this here we are busted.
Sounds good, will do. Thanks.
Regards, Christian.
Apart from those style suggestions looks good to me from the technical side, but I'm not an expert for the BPF stuff.
Regards, Christian.
Thanks for your comments and reviews!
if (file_ref_get(&dmabuf->file->f_ref)) {
ret = dmabuf;
break;
}
}
} else {
list_for_each_entry(dmabuf, &dmabuf_list, list_node) {
if (file_ref_get(&dmabuf->file->f_ref)) {
ret = dmabuf;
break;
}
}
}
mutex_unlock(&dmabuf_list_mutex);
return ret;
+}
+static void *dmabuf_iter_seq_start(struct seq_file *seq, loff_t *pos) +{
if (*pos)
return NULL;
return get_next_dmabuf(NULL);
+}
+static void *dmabuf_iter_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{
struct dma_buf *dmabuf = v;
++*pos;
return get_next_dmabuf(dmabuf);
+}
+struct bpf_iter__dmabuf {
__bpf_md_ptr(struct bpf_iter_meta *, meta);
__bpf_md_ptr(struct dma_buf *, dmabuf);
+};
+static int __dmabuf_seq_show(struct seq_file *seq, void *v, bool in_stop) +{
struct bpf_iter_meta meta = {
.seq = seq,
};
struct bpf_iter__dmabuf ctx = {
.meta = &meta,
.dmabuf = v,
};
struct bpf_prog *prog = bpf_iter_get_info(&meta, in_stop);
if (prog)
return bpf_iter_run_prog(prog, &ctx);
return 0;
+}
+static int dmabuf_iter_seq_show(struct seq_file *seq, void *v) +{
return __dmabuf_seq_show(seq, v, false);
+}
+static void dmabuf_iter_seq_stop(struct seq_file *seq, void *v) +{
struct dma_buf *dmabuf = v;
if (dmabuf)
dma_buf_put(dmabuf);
+}
+static const struct seq_operations dmabuf_iter_seq_ops = {
.start = dmabuf_iter_seq_start,
.next = dmabuf_iter_seq_next,
.stop = dmabuf_iter_seq_stop,
.show = dmabuf_iter_seq_show,
+};
+static void bpf_iter_dmabuf_show_fdinfo(const struct bpf_iter_aux_info *aux,
struct seq_file *seq)
+{
seq_puts(seq, "dmabuf iter\n");
+}
+static const struct bpf_iter_seq_info dmabuf_iter_seq_info = {
.seq_ops = &dmabuf_iter_seq_ops,
.init_seq_private = NULL,
.fini_seq_private = NULL,
.seq_priv_size = 0,
+};
+static struct bpf_iter_reg bpf_dmabuf_reg_info = {
.target = "dmabuf",
.feature = BPF_ITER_RESCHED,
.show_fdinfo = bpf_iter_dmabuf_show_fdinfo,
.ctx_arg_info_size = 1,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__dmabuf, dmabuf),
PTR_TO_BTF_ID_OR_NULL },
},
.seq_info = &dmabuf_iter_seq_info,
+};
+static int __init dmabuf_iter_init(void) +{
bpf_dmabuf_reg_info.ctx_arg_info[0].btf_id = bpf_dmabuf_btf_id[0];
return bpf_iter_reg_target(&bpf_dmabuf_reg_info);
+}
+late_initcall(dmabuf_iter_init);
On Mon, May 5, 2025 at 10:08 AM T.J. Mercier tjmercier@google.com wrote:
Sounds good, will do. Thanks.
looks like the majority of the code will be touching various bpf bits, so let's route the first 5 patches via bpf-next. When you respin, please mention [PATCH bpf-next] in the subject, so that CI can pick it up.
This open coded iterator allows for more flexibility when creating BPF programs. It can support output in formats other than text. With an open coded iterator, a single BPF program can traverse multiple kernel data structures (now including dmabufs), allowing for more efficient analysis of kernel data compared to multiple reads from procfs, sysfs, or multiple traditional BPF iterator invocations.
Signed-off-by: T.J. Mercier tjmercier@google.com --- kernel/bpf/dmabuf_iter.c | 43 ++++++++++++++++++++++++++++++++++++++++ kernel/bpf/helpers.c | 5 +++++ 2 files changed, 48 insertions(+)
diff --git a/kernel/bpf/dmabuf_iter.c b/kernel/bpf/dmabuf_iter.c index 968762e11f73..ebf9794241ef 100644 --- a/kernel/bpf/dmabuf_iter.c +++ b/kernel/bpf/dmabuf_iter.c @@ -132,3 +132,46 @@ static int __init dmabuf_iter_init(void) }
late_initcall(dmabuf_iter_init); + +struct bpf_iter_dmabuf { + /* opaque iterator state; having __u64 here allows to preserve correct + * alignment requirements in vmlinux.h, generated from BTF + */ + __u64 __opaque[1]; +} __aligned(8); + +/* Non-opaque version of bpf_iter_dmabuf */ +struct bpf_iter_dmabuf_kern { + struct dma_buf *dmabuf; +} __aligned(8); + +__bpf_kfunc_start_defs(); + +__bpf_kfunc int bpf_iter_dmabuf_new(struct bpf_iter_dmabuf *it) +{ + struct bpf_iter_dmabuf_kern *kit = (void *)it; + + BUILD_BUG_ON(sizeof(*kit) > sizeof(*it)); + BUILD_BUG_ON(__alignof__(*kit) != __alignof__(*it)); + + kit->dmabuf = NULL; + return 0; +} + +__bpf_kfunc struct dma_buf *bpf_iter_dmabuf_next(struct bpf_iter_dmabuf *it) +{ + struct bpf_iter_dmabuf_kern *kit = (void *)it; + + kit->dmabuf = get_next_dmabuf(kit->dmabuf); + return kit->dmabuf; +} + +__bpf_kfunc void bpf_iter_dmabuf_destroy(struct bpf_iter_dmabuf *it) +{ + struct bpf_iter_dmabuf_kern *kit = (void *)it; + + if (kit->dmabuf) + dma_buf_put(kit->dmabuf); +} + +__bpf_kfunc_end_defs(); diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index e3a2662f4e33..49de5eae44da 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -3294,6 +3294,11 @@ BTF_ID_FLAGS(func, bpf_iter_kmem_cache_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLE BTF_ID_FLAGS(func, bpf_iter_kmem_cache_destroy, KF_ITER_DESTROY | KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_local_irq_save) BTF_ID_FLAGS(func, bpf_local_irq_restore) +#ifdef CONFIG_DMA_SHARED_BUFFER +BTF_ID_FLAGS(func, bpf_iter_dmabuf_new, KF_ITER_NEW | KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_iter_dmabuf_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_iter_dmabuf_destroy, KF_ITER_DESTROY | KF_SLEEPABLE) +#endif BTF_KFUNCS_END(common_btf_ids)
static const struct btf_kfunc_id_set common_kfunc_set = {
This test creates a udmabuf, and a dmabuf from the system dmabuf heap, and uses a BPF program that prints dmabuf metadata with the new dmabuf_iter to verify they can be found.
Signed-off-by: T.J. Mercier tjmercier@google.com --- tools/testing/selftests/bpf/config | 3 + .../selftests/bpf/prog_tests/dmabuf_iter.c | 224 ++++++++++++++++++ .../testing/selftests/bpf/progs/dmabuf_iter.c | 53 +++++ 3 files changed, 280 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c create mode 100644 tools/testing/selftests/bpf/progs/dmabuf_iter.c
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index c378d5d07e02..2bdff2f3285f 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -22,6 +22,8 @@ CONFIG_CRYPTO_AES=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_BTF=y CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_DMABUF_HEAPS=y +CONFIG_DMABUF_HEAPS_SYSTEM=y CONFIG_DUMMY=y CONFIG_DYNAMIC_FTRACE=y CONFIG_FPROBE=y @@ -106,6 +108,7 @@ CONFIG_SECURITY=y CONFIG_SECURITYFS=y CONFIG_SYN_COOKIES=y CONFIG_TEST_BPF=m +CONFIG_UDMABUF=y CONFIG_USERFAULTFD=y CONFIG_VSOCKETS=y CONFIG_VXLAN=y diff --git a/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c new file mode 100644 index 000000000000..35745f4ce0f8 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Google */ + +#include <test_progs.h> +#include <bpf/libbpf.h> +#include <bpf/btf.h> +#include "dmabuf_iter.skel.h" + +#include <fcntl.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <unistd.h> + +#include <linux/dma-buf.h> +#include <linux/dma-heap.h> +#include <linux/udmabuf.h> + +static int memfd, udmabuf; +static const char udmabuf_test_buffer_name[DMA_BUF_NAME_LEN] = "udmabuf_test_buffer_for_iter"; +static size_t udmabuf_test_buffer_size; +static int sysheap_dmabuf; +static const char sysheap_test_buffer_name[DMA_BUF_NAME_LEN] = "sysheap_test_buffer_for_iter"; +static size_t sysheap_test_buffer_size; + +static int create_udmabuf(void) +{ + struct udmabuf_create create; + int dev_udmabuf; + + udmabuf_test_buffer_size = 10 * getpagesize(); + + if (!ASSERT_LE(sizeof(udmabuf_test_buffer_name), DMA_BUF_NAME_LEN, "NAMETOOLONG")) + return 1; + + memfd = memfd_create("memfd_test", MFD_ALLOW_SEALING); + if (!ASSERT_OK_FD(memfd, "memfd_create")) + return 1; + + if (!ASSERT_OK(ftruncate(memfd, udmabuf_test_buffer_size), "ftruncate")) + return 1; + + if (!ASSERT_OK(fcntl(memfd, F_ADD_SEALS, F_SEAL_SHRINK), "seal")) + return 1; + + dev_udmabuf = open("/dev/udmabuf", O_RDONLY); + if (!ASSERT_OK_FD(dev_udmabuf, "open udmabuf")) + return 1; + + create.memfd = memfd; + create.flags = UDMABUF_FLAGS_CLOEXEC; + create.offset = 0; + create.size = udmabuf_test_buffer_size; + + udmabuf = ioctl(dev_udmabuf, UDMABUF_CREATE, &create); + close(dev_udmabuf); + if (!ASSERT_OK_FD(udmabuf, "udmabuf_create")) + return 1; + + if (!ASSERT_OK(ioctl(udmabuf, DMA_BUF_SET_NAME_B, udmabuf_test_buffer_name), "name")) + return 1; + + return 0; +} + +static int create_sys_heap_dmabuf(void) +{ + sysheap_test_buffer_size = 20 * getpagesize(); + + struct dma_heap_allocation_data data = { + .len = sysheap_test_buffer_size, + .fd = 0, + .fd_flags = O_RDWR | O_CLOEXEC, + .heap_flags = 0, + }; + int heap_fd, ret; + + if (!ASSERT_LE(sizeof(sysheap_test_buffer_name), DMA_BUF_NAME_LEN, "NAMETOOLONG")) + return 1; + + heap_fd = open("/dev/dma_heap/system", O_RDONLY); + if (!ASSERT_OK_FD(heap_fd, "open dma heap")) + return 1; + + ret = ioctl(heap_fd, DMA_HEAP_IOCTL_ALLOC, &data); + close(heap_fd); + if (!ASSERT_OK(ret, "syheap alloc")) + return 1; + + sysheap_dmabuf = data.fd; + + if (!ASSERT_OK(ioctl(sysheap_dmabuf, DMA_BUF_SET_NAME_B, sysheap_test_buffer_name), "name")) + return 1; + + return 0; +} + +static int create_test_buffers(void) +{ + int ret; + + ret = create_udmabuf(); + if (ret) + return ret; + + return create_sys_heap_dmabuf(); +} + +static void destroy_test_buffers(void) +{ + close(udmabuf); + close(memfd); + close(sysheap_dmabuf); +} + +enum Fields { INODE, SIZE, NAME, EXPORTER, FIELD_COUNT }; +struct DmabufInfo { + unsigned long inode; + unsigned long size; + char name[DMA_BUF_NAME_LEN]; + char exporter[32]; +}; + +static bool check_dmabuf_info(const struct DmabufInfo *bufinfo, + unsigned long size, + const char *name, const char *exporter) +{ + return size == bufinfo->size && + !strcmp(name, bufinfo->name) && + !strcmp(exporter, bufinfo->exporter); +} + +static void subtest_dmabuf_iter_check_default_iter(struct dmabuf_iter *skel) +{ + bool found_test_sysheap_dmabuf = false; + bool found_test_udmabuf = false; + struct DmabufInfo bufinfo; + size_t linesize = 0; + char *line = NULL; + FILE *iter_file; + int iter_fd, f = INODE; + + iter_fd = bpf_iter_create(bpf_link__fd(skel->links.dmabuf_collector)); + ASSERT_OK_FD(iter_fd, "iter_create"); + + iter_file = fdopen(iter_fd, "r"); + ASSERT_OK_PTR(iter_file, "fdopen"); + + while (getline(&line, &linesize, iter_file) != -1) { + if (f % FIELD_COUNT == INODE) { + ASSERT_EQ(sscanf(line, "%ld", &bufinfo.inode), 1, + "read inode"); + } else if (f % FIELD_COUNT == SIZE) { + ASSERT_EQ(sscanf(line, "%ld", &bufinfo.size), 1, + "read size"); + } else if (f % FIELD_COUNT == NAME) { + ASSERT_EQ(sscanf(line, "%s", bufinfo.name), 1, + "read name"); + } else if (f % FIELD_COUNT == EXPORTER) { + ASSERT_EQ(sscanf(line, "%31s", bufinfo.exporter), 1, + "read exporter"); + + if (check_dmabuf_info(&bufinfo, + sysheap_test_buffer_size, + sysheap_test_buffer_name, + "system")) + found_test_sysheap_dmabuf = true; + else if (check_dmabuf_info(&bufinfo, + udmabuf_test_buffer_size, + udmabuf_test_buffer_name, + "udmabuf")) + found_test_udmabuf = true; + } + ++f; + } + + ASSERT_EQ(f % FIELD_COUNT, INODE, "number of fields"); + + ASSERT_TRUE(found_test_sysheap_dmabuf, "found_test_sysheap_dmabuf"); + ASSERT_TRUE(found_test_udmabuf, "found_test_udmabuf"); + + free(line); + fclose(iter_file); + close(iter_fd); +} + +void test_dmabuf_iter(void) +{ + struct dmabuf_iter *skel = NULL; + char buf[256]; + int iter_fd; + + skel = dmabuf_iter__open_and_load(); + if (!ASSERT_OK_PTR(skel, "dmabuf_iter__open_and_load")) + return; + + if (!ASSERT_OK(create_test_buffers(), "create_buffers")) + goto destroy; + + if (!ASSERT_OK(dmabuf_iter__attach(skel), "skel_attach")) + goto destroy; + + iter_fd = bpf_iter_create(bpf_link__fd(skel->links.dmabuf_collector)); + if (!ASSERT_OK_FD(iter_fd, "iter_create")) + goto destroy; + + while (read(iter_fd, buf, sizeof(buf)) > 0) + ; /* Read out all contents */ + + /* Next reads should return 0 */ + ASSERT_EQ(read(iter_fd, buf, sizeof(buf)), 0, "read"); + + if (test__start_subtest("default_iter")) + subtest_dmabuf_iter_check_default_iter(skel); + + close(iter_fd); + +destroy: + destroy_test_buffers(); + dmabuf_iter__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/dmabuf_iter.c b/tools/testing/selftests/bpf/progs/dmabuf_iter.c new file mode 100644 index 000000000000..6e80a53c4670 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/dmabuf_iter.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Google LLC */ +#include <vmlinux.h> +#include <bpf/bpf_core_read.h> +#include <bpf/bpf_helpers.h> + +/* From uapi/linux/dma-buf.h */ +#define DMA_BUF_NAME_LEN 32 + +char _license[] SEC("license") = "GPL"; + +/** + * Fields output by this iterator are delimited by newlines. Convert any + * newlines in user-provided printed strings to spaces. + */ +static void sanitize_string(char *src, size_t size) +{ + for (char *c = src; c && (size_t)(c-src) < size; ++c) + if (*c == '\n') + *c = ' '; +} + +SEC("iter/dmabuf") +int dmabuf_collector(struct bpf_iter__dmabuf *ctx) +{ + const struct dma_buf *dmabuf = ctx->dmabuf; + struct seq_file *seq = ctx->meta->seq; + unsigned long inode = 0; + size_t size; + const char *pname, *exporter; + char name[DMA_BUF_NAME_LEN] = {'\0'}; + + if (!dmabuf) + return 0; + + if (BPF_CORE_READ_INTO(&inode, dmabuf, file, f_inode, i_ino) || + bpf_core_read(&size, sizeof(size), &dmabuf->size) || + bpf_core_read(&pname, sizeof(pname), &dmabuf->name) || + bpf_core_read(&exporter, sizeof(exporter), &dmabuf->exp_name)) + return 1; + + /* Buffers are not required to be named */ + if (pname) { + if (bpf_probe_read_kernel(name, sizeof(name), pname)) + return 1; + + /* Name strings can be provided by userspace */ + sanitize_string(name, sizeof(name)); + } + + BPF_SEQ_PRINTF(seq, "%lu\n%llu\n%s\n%s\n", inode, size, name, exporter); + return 0; +}
Use the same test buffers as the traditional iterator and a new BPF map to verify the test buffers can be found with the open coded dmabuf iterator.
Signed-off-by: T.J. Mercier tjmercier@google.com --- .../testing/selftests/bpf/bpf_experimental.h | 5 ++ .../selftests/bpf/prog_tests/dmabuf_iter.c | 52 +++++++++++++++---- .../testing/selftests/bpf/progs/dmabuf_iter.c | 38 ++++++++++++++ 3 files changed, 86 insertions(+), 9 deletions(-)
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index 6535c8ae3c46..5e512a1d09d1 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -591,4 +591,9 @@ extern int bpf_iter_kmem_cache_new(struct bpf_iter_kmem_cache *it) __weak __ksym extern struct kmem_cache *bpf_iter_kmem_cache_next(struct bpf_iter_kmem_cache *it) __weak __ksym; extern void bpf_iter_kmem_cache_destroy(struct bpf_iter_kmem_cache *it) __weak __ksym;
+struct bpf_iter_dmabuf; +extern int bpf_iter_dmabuf_new(struct bpf_iter_dmabuf *it) __weak __ksym; +extern struct dma_buf *bpf_iter_dmabuf_next(struct bpf_iter_dmabuf *it) __weak __ksym; +extern void bpf_iter_dmabuf_destroy(struct bpf_iter_dmabuf *it) __weak __ksym; + #endif diff --git a/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c index 35745f4ce0f8..cc02bb555610 100644 --- a/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c @@ -26,10 +26,11 @@ static int sysheap_dmabuf; static const char sysheap_test_buffer_name[DMA_BUF_NAME_LEN] = "sysheap_test_buffer_for_iter"; static size_t sysheap_test_buffer_size;
-static int create_udmabuf(void) +static int create_udmabuf(int map_fd) { struct udmabuf_create create; int dev_udmabuf; + bool f = false;
udmabuf_test_buffer_size = 10 * getpagesize();
@@ -63,10 +64,10 @@ static int create_udmabuf(void) if (!ASSERT_OK(ioctl(udmabuf, DMA_BUF_SET_NAME_B, udmabuf_test_buffer_name), "name")) return 1;
- return 0; + return bpf_map_update_elem(map_fd, udmabuf_test_buffer_name, &f, BPF_ANY); }
-static int create_sys_heap_dmabuf(void) +static int create_sys_heap_dmabuf(int map_fd) { sysheap_test_buffer_size = 20 * getpagesize();
@@ -77,6 +78,7 @@ static int create_sys_heap_dmabuf(void) .heap_flags = 0, }; int heap_fd, ret; + bool f = false;
if (!ASSERT_LE(sizeof(sysheap_test_buffer_name), DMA_BUF_NAME_LEN, "NAMETOOLONG")) return 1; @@ -95,18 +97,18 @@ static int create_sys_heap_dmabuf(void) if (!ASSERT_OK(ioctl(sysheap_dmabuf, DMA_BUF_SET_NAME_B, sysheap_test_buffer_name), "name")) return 1;
- return 0; + return bpf_map_update_elem(map_fd, sysheap_test_buffer_name, &f, BPF_ANY); }
-static int create_test_buffers(void) +static int create_test_buffers(int map_fd) { int ret;
- ret = create_udmabuf(); + ret = create_udmabuf(map_fd); if (ret) return ret;
- return create_sys_heap_dmabuf(); + return create_sys_heap_dmabuf(map_fd); }
static void destroy_test_buffers(void) @@ -187,17 +189,46 @@ static void subtest_dmabuf_iter_check_default_iter(struct dmabuf_iter *skel) close(iter_fd); }
+static void subtest_dmabuf_iter_check_open_coded(struct dmabuf_iter *skel, int map_fd) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + char key[DMA_BUF_NAME_LEN]; + int err, fd; + bool found; + + /* No need to attach it, just run it directly */ + fd = bpf_program__fd(skel->progs.iter_dmabuf_for_each); + + err = bpf_prog_test_run_opts(fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) + return; + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) + return; + + if (!ASSERT_OK(bpf_map_get_next_key(map_fd, NULL, key), "get next key")) + return; + + do { + ASSERT_OK(bpf_map_lookup_elem(map_fd, key, &found), "lookup elem"); + ASSERT_TRUE(found, "found test buffer"); + } while (bpf_map_get_next_key(map_fd, key, key)); +} + void test_dmabuf_iter(void) { struct dmabuf_iter *skel = NULL; + int iter_fd, map_fd; char buf[256]; - int iter_fd;
skel = dmabuf_iter__open_and_load(); if (!ASSERT_OK_PTR(skel, "dmabuf_iter__open_and_load")) return;
- if (!ASSERT_OK(create_test_buffers(), "create_buffers")) + map_fd = bpf_map__fd(skel->maps.testbuf_hash); + if (!ASSERT_OK_FD(map_fd, "map_fd")) + goto destroy_skel; + + if (!ASSERT_OK(create_test_buffers(map_fd), "create_buffers")) goto destroy;
if (!ASSERT_OK(dmabuf_iter__attach(skel), "skel_attach")) @@ -215,10 +246,13 @@ void test_dmabuf_iter(void)
if (test__start_subtest("default_iter")) subtest_dmabuf_iter_check_default_iter(skel); + if (test__start_subtest("open_coded")) + subtest_dmabuf_iter_check_open_coded(skel, map_fd);
close(iter_fd);
destroy: destroy_test_buffers(); +destroy_skel: dmabuf_iter__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/dmabuf_iter.c b/tools/testing/selftests/bpf/progs/dmabuf_iter.c index 6e80a53c4670..030654ab5717 100644 --- a/tools/testing/selftests/bpf/progs/dmabuf_iter.c +++ b/tools/testing/selftests/bpf/progs/dmabuf_iter.c @@ -9,6 +9,13 @@
char _license[] SEC("license") = "GPL";
+struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, DMA_BUF_NAME_LEN); + __type(value, bool); + __uint(max_entries, 5); +} testbuf_hash SEC(".maps"); + /** * Fields output by this iterator are delimited by newlines. Convert any * newlines in user-provided printed strings to spaces. @@ -51,3 +58,34 @@ int dmabuf_collector(struct bpf_iter__dmabuf *ctx) BPF_SEQ_PRINTF(seq, "%lu\n%llu\n%s\n%s\n", inode, size, name, exporter); return 0; } + +SEC("syscall") +int iter_dmabuf_for_each(const void *ctx) +{ + struct dma_buf *d; + + bpf_for_each(dmabuf, d) { + char name[DMA_BUF_NAME_LEN]; + const char *pname; + bool *found; + + if (bpf_core_read(&pname, sizeof(pname), &d->name)) + return 1; + + /* Buffers are not required to be named */ + if (!pname) + continue; + + if (bpf_probe_read_kernel(name, sizeof(name), pname)) + return 1; + + found = bpf_map_lookup_elem(&testbuf_hash, name); + if (found) { + bool t = true; + + bpf_map_update_elem(&testbuf_hash, name, &t, BPF_EXIST); + } + } + + return 0; +}
I think Android is probably the only remaining user of the dmabuf sysfs files. The BPF infrastructure added earlier in this series will allow us to get the same information much more cheaply.
This patch is a RFC because I'd like to keep this for at least one more longterm stable release (6.18?) before actually removing it so that we can have one longterm stable kernel version that supports both options to facilitate a transition from the sysfs files to a BPF program.
Signed-off-by: T.J. Mercier tjmercier@google.com --- .../ABI/testing/sysfs-kernel-dmabuf-buffers | 24 --- Documentation/driver-api/dma-buf.rst | 5 - drivers/dma-buf/Kconfig | 15 -- drivers/dma-buf/Makefile | 1 - drivers/dma-buf/dma-buf-sysfs-stats.c | 202 ------------------ drivers/dma-buf/dma-buf-sysfs-stats.h | 35 --- drivers/dma-buf/dma-buf.c | 18 -- 7 files changed, 300 deletions(-) delete mode 100644 Documentation/ABI/testing/sysfs-kernel-dmabuf-buffers delete mode 100644 drivers/dma-buf/dma-buf-sysfs-stats.c delete mode 100644 drivers/dma-buf/dma-buf-sysfs-stats.h
diff --git a/Documentation/ABI/testing/sysfs-kernel-dmabuf-buffers b/Documentation/ABI/testing/sysfs-kernel-dmabuf-buffers deleted file mode 100644 index 5d3bc997dc64..000000000000 --- a/Documentation/ABI/testing/sysfs-kernel-dmabuf-buffers +++ /dev/null @@ -1,24 +0,0 @@ -What: /sys/kernel/dmabuf/buffers -Date: May 2021 -KernelVersion: v5.13 -Contact: Hridya Valsaraju hridya@google.com -Description: The /sys/kernel/dmabuf/buffers directory contains a - snapshot of the internal state of every DMA-BUF. - /sys/kernel/dmabuf/buffers/<inode_number> will contain the - statistics for the DMA-BUF with the unique inode number - <inode_number> -Users: kernel memory tuning/debugging tools - -What: /sys/kernel/dmabuf/buffers/<inode_number>/exporter_name -Date: May 2021 -KernelVersion: v5.13 -Contact: Hridya Valsaraju hridya@google.com -Description: This file is read-only and contains the name of the exporter of - the DMA-BUF. - -What: /sys/kernel/dmabuf/buffers/<inode_number>/size -Date: May 2021 -KernelVersion: v5.13 -Contact: Hridya Valsaraju hridya@google.com -Description: This file is read-only and specifies the size of the DMA-BUF in - bytes. diff --git a/Documentation/driver-api/dma-buf.rst b/Documentation/driver-api/dma-buf.rst index 29abf1eebf9f..2f36c21d9948 100644 --- a/Documentation/driver-api/dma-buf.rst +++ b/Documentation/driver-api/dma-buf.rst @@ -125,11 +125,6 @@ Implicit Fence Poll Support .. kernel-doc:: drivers/dma-buf/dma-buf.c :doc: implicit fence polling
-DMA-BUF statistics -~~~~~~~~~~~~~~~~~~ -.. kernel-doc:: drivers/dma-buf/dma-buf-sysfs-stats.c - :doc: overview - DMA Buffer ioctls ~~~~~~~~~~~~~~~~~
diff --git a/drivers/dma-buf/Kconfig b/drivers/dma-buf/Kconfig index fee04fdb0822..03e38c0d1fff 100644 --- a/drivers/dma-buf/Kconfig +++ b/drivers/dma-buf/Kconfig @@ -76,21 +76,6 @@ menuconfig DMABUF_HEAPS allows userspace to allocate dma-bufs that can be shared between drivers.
-menuconfig DMABUF_SYSFS_STATS - bool "DMA-BUF sysfs statistics (DEPRECATED)" - depends on DMA_SHARED_BUFFER - help - Choose this option to enable DMA-BUF sysfs statistics - in location /sys/kernel/dmabuf/buffers. - - /sys/kernel/dmabuf/buffers/<inode_number> will contain - statistics for the DMA-BUF with the unique inode number - <inode_number>. - - This option is deprecated and should sooner or later be removed. - Android is the only user of this and it turned out that this resulted - in quite some performance problems. - source "drivers/dma-buf/heaps/Kconfig"
endmenu diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile index 70ec901edf2c..8ab2bfecb1c9 100644 --- a/drivers/dma-buf/Makefile +++ b/drivers/dma-buf/Makefile @@ -6,7 +6,6 @@ obj-$(CONFIG_DMABUF_HEAPS) += heaps/ obj-$(CONFIG_SYNC_FILE) += sync_file.o obj-$(CONFIG_SW_SYNC) += sw_sync.o sync_debug.o obj-$(CONFIG_UDMABUF) += udmabuf.o -obj-$(CONFIG_DMABUF_SYSFS_STATS) += dma-buf-sysfs-stats.o
dmabuf_selftests-y := \ selftest.o \ diff --git a/drivers/dma-buf/dma-buf-sysfs-stats.c b/drivers/dma-buf/dma-buf-sysfs-stats.c deleted file mode 100644 index b5b62e40ccc1..000000000000 --- a/drivers/dma-buf/dma-buf-sysfs-stats.c +++ /dev/null @@ -1,202 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * DMA-BUF sysfs statistics. - * - * Copyright (C) 2021 Google LLC. - */ - -#include <linux/dma-buf.h> -#include <linux/dma-resv.h> -#include <linux/kobject.h> -#include <linux/printk.h> -#include <linux/slab.h> -#include <linux/sysfs.h> - -#include "dma-buf-sysfs-stats.h" - -#define to_dma_buf_entry_from_kobj(x) container_of(x, struct dma_buf_sysfs_entry, kobj) - -/** - * DOC: overview - * - * ``/sys/kernel/debug/dma_buf/bufinfo`` provides an overview of every DMA-BUF - * in the system. However, since debugfs is not safe to be mounted in - * production, procfs and sysfs can be used to gather DMA-BUF statistics on - * production systems. - * - * The ``/proc/<pid>/fdinfo/<fd>`` files in procfs can be used to gather - * information about DMA-BUF fds. Detailed documentation about the interface - * is present in Documentation/filesystems/proc.rst. - * - * Unfortunately, the existing procfs interfaces can only provide information - * about the DMA-BUFs for which processes hold fds or have the buffers mmapped - * into their address space. This necessitated the creation of the DMA-BUF sysfs - * statistics interface to provide per-buffer information on production systems. - * - * The interface at ``/sys/kernel/dmabuf/buffers`` exposes information about - * every DMA-BUF when ``CONFIG_DMABUF_SYSFS_STATS`` is enabled. - * - * The following stats are exposed by the interface: - * - * * ``/sys/kernel/dmabuf/buffers/<inode_number>/exporter_name`` - * * ``/sys/kernel/dmabuf/buffers/<inode_number>/size`` - * - * The information in the interface can also be used to derive per-exporter - * statistics. The data from the interface can be gathered on error conditions - * or other important events to provide a snapshot of DMA-BUF usage. - * It can also be collected periodically by telemetry to monitor various metrics. - * - * Detailed documentation about the interface is present in - * Documentation/ABI/testing/sysfs-kernel-dmabuf-buffers. - */ - -struct dma_buf_stats_attribute { - struct attribute attr; - ssize_t (*show)(struct dma_buf *dmabuf, - struct dma_buf_stats_attribute *attr, char *buf); -}; -#define to_dma_buf_stats_attr(x) container_of(x, struct dma_buf_stats_attribute, attr) - -static ssize_t dma_buf_stats_attribute_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct dma_buf_stats_attribute *attribute; - struct dma_buf_sysfs_entry *sysfs_entry; - struct dma_buf *dmabuf; - - attribute = to_dma_buf_stats_attr(attr); - sysfs_entry = to_dma_buf_entry_from_kobj(kobj); - dmabuf = sysfs_entry->dmabuf; - - if (!dmabuf || !attribute->show) - return -EIO; - - return attribute->show(dmabuf, attribute, buf); -} - -static const struct sysfs_ops dma_buf_stats_sysfs_ops = { - .show = dma_buf_stats_attribute_show, -}; - -static ssize_t exporter_name_show(struct dma_buf *dmabuf, - struct dma_buf_stats_attribute *attr, - char *buf) -{ - return sysfs_emit(buf, "%s\n", dmabuf->exp_name); -} - -static ssize_t size_show(struct dma_buf *dmabuf, - struct dma_buf_stats_attribute *attr, - char *buf) -{ - return sysfs_emit(buf, "%zu\n", dmabuf->size); -} - -static struct dma_buf_stats_attribute exporter_name_attribute = - __ATTR_RO(exporter_name); -static struct dma_buf_stats_attribute size_attribute = __ATTR_RO(size); - -static struct attribute *dma_buf_stats_default_attrs[] = { - &exporter_name_attribute.attr, - &size_attribute.attr, - NULL, -}; -ATTRIBUTE_GROUPS(dma_buf_stats_default); - -static void dma_buf_sysfs_release(struct kobject *kobj) -{ - struct dma_buf_sysfs_entry *sysfs_entry; - - sysfs_entry = to_dma_buf_entry_from_kobj(kobj); - kfree(sysfs_entry); -} - -static const struct kobj_type dma_buf_ktype = { - .sysfs_ops = &dma_buf_stats_sysfs_ops, - .release = dma_buf_sysfs_release, - .default_groups = dma_buf_stats_default_groups, -}; - -void dma_buf_stats_teardown(struct dma_buf *dmabuf) -{ - struct dma_buf_sysfs_entry *sysfs_entry; - - sysfs_entry = dmabuf->sysfs_entry; - if (!sysfs_entry) - return; - - kobject_del(&sysfs_entry->kobj); - kobject_put(&sysfs_entry->kobj); -} - - -/* Statistics files do not need to send uevents. */ -static int dmabuf_sysfs_uevent_filter(const struct kobject *kobj) -{ - return 0; -} - -static const struct kset_uevent_ops dmabuf_sysfs_no_uevent_ops = { - .filter = dmabuf_sysfs_uevent_filter, -}; - -static struct kset *dma_buf_stats_kset; -static struct kset *dma_buf_per_buffer_stats_kset; -int dma_buf_init_sysfs_statistics(void) -{ - dma_buf_stats_kset = kset_create_and_add("dmabuf", - &dmabuf_sysfs_no_uevent_ops, - kernel_kobj); - if (!dma_buf_stats_kset) - return -ENOMEM; - - dma_buf_per_buffer_stats_kset = kset_create_and_add("buffers", - &dmabuf_sysfs_no_uevent_ops, - &dma_buf_stats_kset->kobj); - if (!dma_buf_per_buffer_stats_kset) { - kset_unregister(dma_buf_stats_kset); - return -ENOMEM; - } - - return 0; -} - -void dma_buf_uninit_sysfs_statistics(void) -{ - kset_unregister(dma_buf_per_buffer_stats_kset); - kset_unregister(dma_buf_stats_kset); -} - -int dma_buf_stats_setup(struct dma_buf *dmabuf, struct file *file) -{ - struct dma_buf_sysfs_entry *sysfs_entry; - int ret; - - if (!dmabuf->exp_name) { - pr_err("exporter name must not be empty if stats needed\n"); - return -EINVAL; - } - - sysfs_entry = kzalloc(sizeof(struct dma_buf_sysfs_entry), GFP_KERNEL); - if (!sysfs_entry) - return -ENOMEM; - - sysfs_entry->kobj.kset = dma_buf_per_buffer_stats_kset; - sysfs_entry->dmabuf = dmabuf; - - dmabuf->sysfs_entry = sysfs_entry; - - /* create the directory for buffer stats */ - ret = kobject_init_and_add(&sysfs_entry->kobj, &dma_buf_ktype, NULL, - "%lu", file_inode(file)->i_ino); - if (ret) - goto err_sysfs_dmabuf; - - return 0; - -err_sysfs_dmabuf: - kobject_put(&sysfs_entry->kobj); - dmabuf->sysfs_entry = NULL; - return ret; -} diff --git a/drivers/dma-buf/dma-buf-sysfs-stats.h b/drivers/dma-buf/dma-buf-sysfs-stats.h deleted file mode 100644 index 7a8a995b75ba..000000000000 --- a/drivers/dma-buf/dma-buf-sysfs-stats.h +++ /dev/null @@ -1,35 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * DMA-BUF sysfs statistics. - * - * Copyright (C) 2021 Google LLC. - */ - -#ifndef _DMA_BUF_SYSFS_STATS_H -#define _DMA_BUF_SYSFS_STATS_H - -#ifdef CONFIG_DMABUF_SYSFS_STATS - -int dma_buf_init_sysfs_statistics(void); -void dma_buf_uninit_sysfs_statistics(void); - -int dma_buf_stats_setup(struct dma_buf *dmabuf, struct file *file); - -void dma_buf_stats_teardown(struct dma_buf *dmabuf); -#else - -static inline int dma_buf_init_sysfs_statistics(void) -{ - return 0; -} - -static inline void dma_buf_uninit_sysfs_statistics(void) {} - -static inline int dma_buf_stats_setup(struct dma_buf *dmabuf, struct file *file) -{ - return 0; -} - -static inline void dma_buf_stats_teardown(struct dma_buf *dmabuf) {} -#endif -#endif // _DMA_BUF_SYSFS_STATS_H diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 7260bdd77c75..adc6a0c96641 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -31,8 +31,6 @@ #include <uapi/linux/dma-buf.h> #include <uapi/linux/magic.h>
-#include "dma-buf-sysfs-stats.h" - static inline int is_dma_buf_file(struct file *);
DEFINE_MUTEX(dmabuf_list_mutex); @@ -88,7 +86,6 @@ static void dma_buf_release(struct dentry *dentry) */ BUG_ON(dmabuf->cb_in.active || dmabuf->cb_out.active);
- dma_buf_stats_teardown(dmabuf); dmabuf->ops->release(dmabuf);
if (dmabuf->resv == (struct dma_resv *)&dmabuf[1]) @@ -671,10 +668,6 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) dmabuf->resv = resv; }
- ret = dma_buf_stats_setup(dmabuf, file); - if (ret) - goto err_dmabuf; - file->private_data = dmabuf; file->f_path.dentry->d_fsdata = dmabuf; dmabuf->file = file; @@ -683,10 +676,6 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
return dmabuf;
-err_dmabuf: - if (!resv) - dma_resv_fini(dmabuf->resv); - kfree(dmabuf); err_file: fput(file); err_module: @@ -1717,12 +1706,6 @@ static inline void dma_buf_uninit_debugfs(void)
static int __init dma_buf_init(void) { - int ret; - - ret = dma_buf_init_sysfs_statistics(); - if (ret) - return ret; - dma_buf_mnt = kern_mount(&dma_buf_fs_type); if (IS_ERR(dma_buf_mnt)) return PTR_ERR(dma_buf_mnt); @@ -1736,6 +1719,5 @@ static void __exit dma_buf_deinit(void) { dma_buf_uninit_debugfs(); kern_unmount(dma_buf_mnt); - dma_buf_uninit_sysfs_statistics(); } __exitcall(dma_buf_deinit);
On 5/5/25 00:41, T.J. Mercier wrote:
I think Android is probably the only remaining user of the dmabuf sysfs files. The BPF infrastructure added earlier in this series will allow us to get the same information much more cheaply.
This patch is a RFC because I'd like to keep this for at least one more longterm stable release (6.18?) before actually removing it so that we can have one longterm stable kernel version that supports both options to facilitate a transition from the sysfs files to a BPF program.
Sounds like a plan to me.
Signed-off-by: T.J. Mercier tjmercier@google.com
Acked-by: Christian König christian.koenig@amd.com
.../ABI/testing/sysfs-kernel-dmabuf-buffers | 24 --- Documentation/driver-api/dma-buf.rst | 5 - drivers/dma-buf/Kconfig | 15 -- drivers/dma-buf/Makefile | 1 - drivers/dma-buf/dma-buf-sysfs-stats.c | 202 ------------------ drivers/dma-buf/dma-buf-sysfs-stats.h | 35 --- drivers/dma-buf/dma-buf.c | 18 -- 7 files changed, 300 deletions(-) delete mode 100644 Documentation/ABI/testing/sysfs-kernel-dmabuf-buffers delete mode 100644 drivers/dma-buf/dma-buf-sysfs-stats.c delete mode 100644 drivers/dma-buf/dma-buf-sysfs-stats.h
diff --git a/Documentation/ABI/testing/sysfs-kernel-dmabuf-buffers b/Documentation/ABI/testing/sysfs-kernel-dmabuf-buffers deleted file mode 100644 index 5d3bc997dc64..000000000000 --- a/Documentation/ABI/testing/sysfs-kernel-dmabuf-buffers +++ /dev/null @@ -1,24 +0,0 @@ -What: /sys/kernel/dmabuf/buffers -Date: May 2021 -KernelVersion: v5.13 -Contact: Hridya Valsaraju hridya@google.com -Description: The /sys/kernel/dmabuf/buffers directory contains a
snapshot of the internal state of every DMA-BUF.
/sys/kernel/dmabuf/buffers/<inode_number> will contain the
statistics for the DMA-BUF with the unique inode number
<inode_number>
-Users: kernel memory tuning/debugging tools
-What: /sys/kernel/dmabuf/buffers/<inode_number>/exporter_name -Date: May 2021 -KernelVersion: v5.13 -Contact: Hridya Valsaraju hridya@google.com -Description: This file is read-only and contains the name of the exporter of
the DMA-BUF.
-What: /sys/kernel/dmabuf/buffers/<inode_number>/size -Date: May 2021 -KernelVersion: v5.13 -Contact: Hridya Valsaraju hridya@google.com -Description: This file is read-only and specifies the size of the DMA-BUF in
bytes.
diff --git a/Documentation/driver-api/dma-buf.rst b/Documentation/driver-api/dma-buf.rst index 29abf1eebf9f..2f36c21d9948 100644 --- a/Documentation/driver-api/dma-buf.rst +++ b/Documentation/driver-api/dma-buf.rst @@ -125,11 +125,6 @@ Implicit Fence Poll Support .. kernel-doc:: drivers/dma-buf/dma-buf.c :doc: implicit fence polling -DMA-BUF statistics -~~~~~~~~~~~~~~~~~~ -.. kernel-doc:: drivers/dma-buf/dma-buf-sysfs-stats.c
- :doc: overview
DMA Buffer ioctls
diff --git a/drivers/dma-buf/Kconfig b/drivers/dma-buf/Kconfig index fee04fdb0822..03e38c0d1fff 100644 --- a/drivers/dma-buf/Kconfig +++ b/drivers/dma-buf/Kconfig @@ -76,21 +76,6 @@ menuconfig DMABUF_HEAPS allows userspace to allocate dma-bufs that can be shared between drivers. -menuconfig DMABUF_SYSFS_STATS - bool "DMA-BUF sysfs statistics (DEPRECATED)" - depends on DMA_SHARED_BUFFER - help - Choose this option to enable DMA-BUF sysfs statistics - in location /sys/kernel/dmabuf/buffers. - - /sys/kernel/dmabuf/buffers/<inode_number> will contain - statistics for the DMA-BUF with the unique inode number - <inode_number>. - - This option is deprecated and should sooner or later be removed. - Android is the only user of this and it turned out that this resulted - in quite some performance problems. - source "drivers/dma-buf/heaps/Kconfig" endmenu diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile index 70ec901edf2c..8ab2bfecb1c9 100644 --- a/drivers/dma-buf/Makefile +++ b/drivers/dma-buf/Makefile @@ -6,7 +6,6 @@ obj-$(CONFIG_DMABUF_HEAPS) += heaps/ obj-$(CONFIG_SYNC_FILE) += sync_file.o obj-$(CONFIG_SW_SYNC) += sw_sync.o sync_debug.o obj-$(CONFIG_UDMABUF) += udmabuf.o -obj-$(CONFIG_DMABUF_SYSFS_STATS) += dma-buf-sysfs-stats.o dmabuf_selftests-y := \ selftest.o \ diff --git a/drivers/dma-buf/dma-buf-sysfs-stats.c b/drivers/dma-buf/dma-buf-sysfs-stats.c deleted file mode 100644 index b5b62e40ccc1..000000000000 --- a/drivers/dma-buf/dma-buf-sysfs-stats.c +++ /dev/null @@ -1,202 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * DMA-BUF sysfs statistics. - * - * Copyright (C) 2021 Google LLC. - */ - -#include <linux/dma-buf.h> -#include <linux/dma-resv.h> -#include <linux/kobject.h> -#include <linux/printk.h> -#include <linux/slab.h> -#include <linux/sysfs.h> - -#include "dma-buf-sysfs-stats.h" - -#define to_dma_buf_entry_from_kobj(x) container_of(x, struct dma_buf_sysfs_entry, kobj) - -/** - * DOC: overview - * - * ``/sys/kernel/debug/dma_buf/bufinfo`` provides an overview of every DMA-BUF - * in the system. However, since debugfs is not safe to be mounted in - * production, procfs and sysfs can be used to gather DMA-BUF statistics on - * production systems. - * - * The ``/proc/<pid>/fdinfo/<fd>`` files in procfs can be used to gather - * information about DMA-BUF fds. Detailed documentation about the interface - * is present in Documentation/filesystems/proc.rst. - * - * Unfortunately, the existing procfs interfaces can only provide information - * about the DMA-BUFs for which processes hold fds or have the buffers mmapped - * into their address space. This necessitated the creation of the DMA-BUF sysfs - * statistics interface to provide per-buffer information on production systems. - * - * The interface at ``/sys/kernel/dmabuf/buffers`` exposes information about - * every DMA-BUF when ``CONFIG_DMABUF_SYSFS_STATS`` is enabled. - * - * The following stats are exposed by the interface: - * - * * ``/sys/kernel/dmabuf/buffers/<inode_number>/exporter_name`` - * * ``/sys/kernel/dmabuf/buffers/<inode_number>/size`` - * - * The information in the interface can also be used to derive per-exporter - * statistics. The data from the interface can be gathered on error conditions - * or other important events to provide a snapshot of DMA-BUF usage. - * It can also be collected periodically by telemetry to monitor various metrics. - * - * Detailed documentation about the interface is present in - * Documentation/ABI/testing/sysfs-kernel-dmabuf-buffers. - */ - -struct dma_buf_stats_attribute { - struct attribute attr; - ssize_t (*show)(struct dma_buf *dmabuf, - struct dma_buf_stats_attribute *attr, char *buf); -}; -#define to_dma_buf_stats_attr(x) container_of(x, struct dma_buf_stats_attribute, attr) - -static ssize_t dma_buf_stats_attribute_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct dma_buf_stats_attribute *attribute; - struct dma_buf_sysfs_entry *sysfs_entry; - struct dma_buf *dmabuf; - - attribute = to_dma_buf_stats_attr(attr); - sysfs_entry = to_dma_buf_entry_from_kobj(kobj); - dmabuf = sysfs_entry->dmabuf; - - if (!dmabuf || !attribute->show) - return -EIO; - - return attribute->show(dmabuf, attribute, buf); -} - -static const struct sysfs_ops dma_buf_stats_sysfs_ops = { - .show = dma_buf_stats_attribute_show, -}; - -static ssize_t exporter_name_show(struct dma_buf *dmabuf, - struct dma_buf_stats_attribute *attr, - char *buf) -{ - return sysfs_emit(buf, "%s\n", dmabuf->exp_name); -} - -static ssize_t size_show(struct dma_buf *dmabuf, - struct dma_buf_stats_attribute *attr, - char *buf) -{ - return sysfs_emit(buf, "%zu\n", dmabuf->size); -} - -static struct dma_buf_stats_attribute exporter_name_attribute = - __ATTR_RO(exporter_name); -static struct dma_buf_stats_attribute size_attribute = __ATTR_RO(size); - -static struct attribute *dma_buf_stats_default_attrs[] = { - &exporter_name_attribute.attr, - &size_attribute.attr, - NULL, -}; -ATTRIBUTE_GROUPS(dma_buf_stats_default); - -static void dma_buf_sysfs_release(struct kobject *kobj) -{ - struct dma_buf_sysfs_entry *sysfs_entry; - - sysfs_entry = to_dma_buf_entry_from_kobj(kobj); - kfree(sysfs_entry); -} - -static const struct kobj_type dma_buf_ktype = { - .sysfs_ops = &dma_buf_stats_sysfs_ops, - .release = dma_buf_sysfs_release, - .default_groups = dma_buf_stats_default_groups, -}; - -void dma_buf_stats_teardown(struct dma_buf *dmabuf) -{ - struct dma_buf_sysfs_entry *sysfs_entry; - - sysfs_entry = dmabuf->sysfs_entry; - if (!sysfs_entry) - return; - - kobject_del(&sysfs_entry->kobj); - kobject_put(&sysfs_entry->kobj); -} - - -/* Statistics files do not need to send uevents. */ -static int dmabuf_sysfs_uevent_filter(const struct kobject *kobj) -{ - return 0; -} - -static const struct kset_uevent_ops dmabuf_sysfs_no_uevent_ops = { - .filter = dmabuf_sysfs_uevent_filter, -}; - -static struct kset *dma_buf_stats_kset; -static struct kset *dma_buf_per_buffer_stats_kset; -int dma_buf_init_sysfs_statistics(void) -{ - dma_buf_stats_kset = kset_create_and_add("dmabuf", - &dmabuf_sysfs_no_uevent_ops, - kernel_kobj); - if (!dma_buf_stats_kset) - return -ENOMEM; - - dma_buf_per_buffer_stats_kset = kset_create_and_add("buffers", - &dmabuf_sysfs_no_uevent_ops, - &dma_buf_stats_kset->kobj); - if (!dma_buf_per_buffer_stats_kset) { - kset_unregister(dma_buf_stats_kset); - return -ENOMEM; - } - - return 0; -} - -void dma_buf_uninit_sysfs_statistics(void) -{ - kset_unregister(dma_buf_per_buffer_stats_kset); - kset_unregister(dma_buf_stats_kset); -} - -int dma_buf_stats_setup(struct dma_buf *dmabuf, struct file *file) -{ - struct dma_buf_sysfs_entry *sysfs_entry; - int ret; - - if (!dmabuf->exp_name) { - pr_err("exporter name must not be empty if stats needed\n"); - return -EINVAL; - } - - sysfs_entry = kzalloc(sizeof(struct dma_buf_sysfs_entry), GFP_KERNEL); - if (!sysfs_entry) - return -ENOMEM; - - sysfs_entry->kobj.kset = dma_buf_per_buffer_stats_kset; - sysfs_entry->dmabuf = dmabuf; - - dmabuf->sysfs_entry = sysfs_entry; - - /* create the directory for buffer stats */ - ret = kobject_init_and_add(&sysfs_entry->kobj, &dma_buf_ktype, NULL, - "%lu", file_inode(file)->i_ino); - if (ret) - goto err_sysfs_dmabuf; - - return 0; - -err_sysfs_dmabuf: - kobject_put(&sysfs_entry->kobj); - dmabuf->sysfs_entry = NULL; - return ret; -} diff --git a/drivers/dma-buf/dma-buf-sysfs-stats.h b/drivers/dma-buf/dma-buf-sysfs-stats.h deleted file mode 100644 index 7a8a995b75ba..000000000000 --- a/drivers/dma-buf/dma-buf-sysfs-stats.h +++ /dev/null @@ -1,35 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * DMA-BUF sysfs statistics. - * - * Copyright (C) 2021 Google LLC. - */ - -#ifndef _DMA_BUF_SYSFS_STATS_H -#define _DMA_BUF_SYSFS_STATS_H - -#ifdef CONFIG_DMABUF_SYSFS_STATS - -int dma_buf_init_sysfs_statistics(void); -void dma_buf_uninit_sysfs_statistics(void); - -int dma_buf_stats_setup(struct dma_buf *dmabuf, struct file *file); - -void dma_buf_stats_teardown(struct dma_buf *dmabuf); -#else - -static inline int dma_buf_init_sysfs_statistics(void) -{ - return 0; -} - -static inline void dma_buf_uninit_sysfs_statistics(void) {} - -static inline int dma_buf_stats_setup(struct dma_buf *dmabuf, struct file *file) -{ - return 0; -} - -static inline void dma_buf_stats_teardown(struct dma_buf *dmabuf) {} -#endif -#endif // _DMA_BUF_SYSFS_STATS_H diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 7260bdd77c75..adc6a0c96641 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -31,8 +31,6 @@ #include <uapi/linux/dma-buf.h> #include <uapi/linux/magic.h> -#include "dma-buf-sysfs-stats.h" - static inline int is_dma_buf_file(struct file *); DEFINE_MUTEX(dmabuf_list_mutex); @@ -88,7 +86,6 @@ static void dma_buf_release(struct dentry *dentry) */ BUG_ON(dmabuf->cb_in.active || dmabuf->cb_out.active); - dma_buf_stats_teardown(dmabuf); dmabuf->ops->release(dmabuf); if (dmabuf->resv == (struct dma_resv *)&dmabuf[1]) @@ -671,10 +668,6 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) dmabuf->resv = resv; } - ret = dma_buf_stats_setup(dmabuf, file); - if (ret) - goto err_dmabuf; - file->private_data = dmabuf; file->f_path.dentry->d_fsdata = dmabuf; dmabuf->file = file; @@ -683,10 +676,6 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) return dmabuf; -err_dmabuf: - if (!resv) - dma_resv_fini(dmabuf->resv); - kfree(dmabuf); err_file: fput(file); err_module: @@ -1717,12 +1706,6 @@ static inline void dma_buf_uninit_debugfs(void) static int __init dma_buf_init(void) { - int ret; - - ret = dma_buf_init_sysfs_statistics(); - if (ret) - return ret; - dma_buf_mnt = kern_mount(&dma_buf_fs_type); if (IS_ERR(dma_buf_mnt)) return PTR_ERR(dma_buf_mnt); @@ -1736,6 +1719,5 @@ static void __exit dma_buf_deinit(void) { dma_buf_uninit_debugfs(); kern_unmount(dma_buf_mnt); - dma_buf_uninit_sysfs_statistics(); } __exitcall(dma_buf_deinit);
linux-kselftest-mirror@lists.linaro.org