Use the paged attachment mappings support to create NET_RX pages. NET_RX pages are pages that can be used in the networking receive path:
Bind the pages to the driver's rx queues specified by the create_flags param, and create a gen_pool to hold the free pages available for the driver to allocate.
Signed-off-by: Mina Almasry almasrymina@google.com --- drivers/dma-buf/dma-buf.c | 174 +++++++++++++++++++++++++++++++++++ include/linux/dma-buf.h | 20 ++++ include/linux/netdevice.h | 1 + include/uapi/linux/dma-buf.h | 2 + 4 files changed, 197 insertions(+)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 50b1d813cf5c..acb86bf406f4 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -27,6 +27,7 @@ #include <linux/dma-resv.h> #include <linux/mm.h> #include <linux/mount.h> +#include <linux/netdevice.h> #include <linux/pseudo_fs.h>
#include <uapi/linux/dma-buf.h> @@ -1681,6 +1682,8 @@ static void dma_buf_pages_destroy(struct percpu_ref *ref) pci_dev_put(priv->pci_dev); }
+const struct dma_buf_pages_type_ops net_rx_ops; + static long dma_buf_create_pages(struct file *file, struct dma_buf_create_pages_info *create_info) { @@ -1793,6 +1796,9 @@ static long dma_buf_create_pages(struct file *file, priv->create_flags = create_info->create_flags;
switch (priv->type) { + case DMA_BUF_PAGES_NET_RX: + priv->type_ops = &net_rx_ops; + break; default: err = -EINVAL; goto out_put_new_file; @@ -1966,3 +1972,171 @@ static void __exit dma_buf_deinit(void) dma_buf_uninit_sysfs_statistics(); } __exitcall(dma_buf_deinit); + +/******************************** + * dma_buf_pages_net_rx * + ********************************/ + +void dma_buf_pages_net_rx_release(struct dma_buf_pages *priv, struct file *file) +{ + struct netdev_rx_queue *rxq; + unsigned long xa_idx; + + xa_for_each(&priv->net_rx.bound_rxq_list, xa_idx, rxq) + if (rxq->dmabuf_pages == file) + rxq->dmabuf_pages = NULL; +} + +static int dev_is_class(struct device *dev, void *class) +{ + if (dev->class != NULL && !strcmp(dev->class->name, class)) + return 1; + + return 0; +} + +int dma_buf_pages_net_rx_init(struct dma_buf_pages *priv, struct file *file) +{ + struct netdev_rx_queue *rxq; + struct net_device *netdev; + int xa_id, err, rxq_idx; + struct device *device; + + priv->net_rx.page_pool = + gen_pool_create(PAGE_SHIFT, dev_to_node(&priv->pci_dev->dev)); + + if (!priv->net_rx.page_pool) + return -ENOMEM; + + /* + * We start with PAGE_SIZE instead of 0 since gen_pool_alloc_*() returns + * NULL on error + */ + err = gen_pool_add_virt(priv->net_rx.page_pool, PAGE_SIZE, 0, + PAGE_SIZE * priv->num_pages, + dev_to_node(&priv->pci_dev->dev)); + if (err) + goto out_destroy_pool; + + xa_init_flags(&priv->net_rx.bound_rxq_list, XA_FLAGS_ALLOC); + + device = device_find_child(&priv->pci_dev->dev, "net", dev_is_class); + if (!device) { + err = -ENODEV; + goto out_destroy_xarray; + } + + netdev = to_net_dev(device); + if (!netdev) { + err = -ENODEV; + goto out_put_dev; + } + + for (rxq_idx = 0; rxq_idx < (sizeof(priv->create_flags) * 8); + rxq_idx++) { + if (!(priv->create_flags & (1ULL << rxq_idx))) + continue; + + if (rxq_idx >= netdev->num_rx_queues) { + err = -ERANGE; + goto out_release_rx; + } + + rxq = __netif_get_rx_queue(netdev, rxq_idx); + + err = xa_alloc(&priv->net_rx.bound_rxq_list, &xa_id, rxq, + xa_limit_32b, GFP_KERNEL); + if (err) + goto out_release_rx; + + /* We previously have done a dma_buf_attach(), which validates + * that the net_device we're trying to attach to can reach the + * dmabuf, so we don't need to check here as well. + */ + rxq->dmabuf_pages = file; + } + put_device(device); + return 0; + +out_release_rx: + dma_buf_pages_net_rx_release(priv, file); +out_put_dev: + put_device(device); +out_destroy_xarray: + xa_destroy(&priv->net_rx.bound_rxq_list); +out_destroy_pool: + gen_pool_destroy(priv->net_rx.page_pool); + return err; +} + +void dma_buf_pages_net_rx_free(struct dma_buf_pages *priv) +{ + xa_destroy(&priv->net_rx.bound_rxq_list); + gen_pool_destroy(priv->net_rx.page_pool); +} + +static unsigned long dma_buf_page_to_gen_pool_addr(struct page *page) +{ + struct dma_buf_pages *priv; + struct dev_pagemap *pgmap; + unsigned long offset; + + pgmap = page->pgmap; + priv = container_of(pgmap, struct dma_buf_pages, pgmap); + offset = page - priv->pages; + /* Offset + 1 is due to the fact that we want to avoid 0 virt address + * returned from the gen_pool. The gen_pool returns 0 on error, and virt + * address 0 is indistinguishable from an error. + */ + return (offset + 1) << PAGE_SHIFT; +} + +static struct page * +dma_buf_gen_pool_addr_to_page(unsigned long addr, struct dma_buf_pages *priv) +{ + /* - 1 is due to the fact that we want to avoid 0 virt address + * returned from the gen_pool. See comment in dma_buf_create_pages() + * for details. + */ + unsigned long offset = (addr >> PAGE_SHIFT) - 1; + return &priv->pages[offset]; +} + +void dma_buf_page_free_net_rx(struct dma_buf_pages *priv, struct page *page) +{ + unsigned long addr = dma_buf_page_to_gen_pool_addr(page); + + if (gen_pool_has_addr(priv->net_rx.page_pool, addr, PAGE_SIZE)) + gen_pool_free(priv->net_rx.page_pool, addr, PAGE_SIZE); +} + +const struct dma_buf_pages_type_ops net_rx_ops = { + .dma_buf_pages_init = dma_buf_pages_net_rx_init, + .dma_buf_pages_release = dma_buf_pages_net_rx_release, + .dma_buf_pages_destroy = dma_buf_pages_net_rx_free, + .dma_buf_page_free = dma_buf_page_free_net_rx, +}; + +struct page *dma_buf_pages_net_rx_alloc(struct dma_buf_pages *priv) +{ + unsigned long gen_pool_addr; + struct page *pg; + + if (!(priv->type & DMA_BUF_PAGES_NET_RX)) + return NULL; + + gen_pool_addr = gen_pool_alloc(priv->net_rx.page_pool, PAGE_SIZE); + if (!gen_pool_addr) + return NULL; + + if (!PAGE_ALIGNED(gen_pool_addr)) { + net_err_ratelimited("dmabuf page pool allocation not aligned"); + gen_pool_free(priv->net_rx.page_pool, gen_pool_addr, PAGE_SIZE); + return NULL; + } + + pg = dma_buf_gen_pool_addr_to_page(gen_pool_addr, priv); + + percpu_ref_get(&priv->pgmap.ref); + return pg; +} diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 5789006180ea..e8e66d6407d0 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -22,6 +22,9 @@ #include <linux/fs.h> #include <linux/dma-fence.h> #include <linux/wait.h> +#include <linux/genalloc.h> +#include <linux/xarray.h> +#include <net/page_pool.h>
struct device; struct dma_buf; @@ -552,6 +555,11 @@ struct dma_buf_pages_type_ops { struct page *page); };
+struct dma_buf_pages_net_rx { + struct gen_pool *page_pool; + struct xarray bound_rxq_list; +}; + struct dma_buf_pages { /* fields for dmabuf */ struct dma_buf *dmabuf; @@ -568,6 +576,10 @@ struct dma_buf_pages { unsigned int type; const struct dma_buf_pages_type_ops *type_ops; __u64 create_flags; + + union { + struct dma_buf_pages_net_rx net_rx; + }; };
/** @@ -671,6 +683,8 @@ static inline bool is_dma_buf_pages_file(struct file *file) return file->f_op == &dma_buf_pages_fops; }
+struct page *dma_buf_pages_net_rx_alloc(struct dma_buf_pages *priv); + static inline bool is_dma_buf_page(struct page *page) { return (is_zone_device_page(page) && page->pgmap && @@ -718,6 +732,12 @@ static inline int dma_buf_map_sg(struct device *dev, struct scatterlist *sg, { return 0; } + +static inline struct page *dma_buf_pages_net_rx_alloc(struct dma_buf_pages *priv) +{ + return NULL; +} + #endif
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c2f0c6002a84..7a087ffa9baa 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -796,6 +796,7 @@ struct netdev_rx_queue { #ifdef CONFIG_XDP_SOCKETS struct xsk_buff_pool *pool; #endif + struct file __rcu *dmabuf_pages; } ____cacheline_aligned_in_smp;
/* diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h index d0f63a2ab7e4..b392cef9d3c6 100644 --- a/include/uapi/linux/dma-buf.h +++ b/include/uapi/linux/dma-buf.h @@ -186,6 +186,8 @@ struct dma_buf_create_pages_info { __u64 create_flags; };
+#define DMA_BUF_PAGES_NET_RX (1 << 0) + #define DMA_BUF_CREATE_PAGES _IOW(DMA_BUF_BASE, 4, struct dma_buf_create_pages_info)
#endif