Re: [PATCH net-next v15 03/14] netdev: support binding dma-buf to netdevice

3 Jul 2024

      On Fri, 28 Jun 2024 00:32:40 +0000 Mina Almasry wrote:
...
+/* Protected by rtnl_lock() */
+static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1);

+void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding)
+{

struct netdev_rx_queue *rxq;
unsigned long xa_idx;
unsigned int rxq_idx;

if (!binding)
return;

nit: I don't see how it can happen, no defensive programming, please
...

if (binding->list.next)
list_del(&binding->list);

xa_for_each(&binding->bound_rxq_list, xa_idx, rxq) {

nit: s/bound_rxq_list/bound_rxqs/ ? it's not a list
...

if (rxq->mp_params.mp_priv == binding) {

	/* We hold the rtnl_lock while binding/unbinding

	 * dma-buf, so we can't race with another thread that

	 * is also modifying this value. However, the page_pool

	 * may read this config while it's creating its

	 * rx-queues. WRITE_ONCE() here to match the

	 * READ_ONCE() in the page_pool.

	 */

	WRITE_ONCE(rxq->mp_params.mp_priv, NULL);

Is this really sufficient in terms of locking? @binding is not
RCU-protected and neither is the reader guaranteed to be in 
an RCU critical section. Actually the "reader" tries to take a ref 
and use this struct so it's not even a pure reader.
Let's add a lock or use one of the existing locks
Or, perhaps time to add a mutex to struct net_device
...

	rxq_idx = get_netdev_rx_queue_index(rxq);

	netdev_rx_queue_restart(binding->dev, rxq_idx);

}

}

xa_erase(&net_devmem_dmabuf_bindings, binding->id);

net_devmem_dmabuf_binding_put(binding);

+}

+int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,

		    struct net_devmem_dmabuf_binding *binding)

+{

struct netdev_rx_queue *rxq;
u32 xa_idx;
int err;

if (rxq_idx >= dev->num_rx_queues)
return -ERANGE;

rxq = __netif_get_rx_queue(dev, rxq_idx);
if (rxq->mp_params.mp_priv)
return -EEXIST;

Makes me wonder - do we need an API to unbind or we assume
application will only have one binding per socket and close 
it every time? I guess that's fine for future extension.
...

err = xa_alloc(&binding->bound_rxq_list, &xa_idx, rxq, xa_limit_32b,
       GFP_KERNEL);

if (err)
return err;

/* We hold the rtnl_lock while binding/unbinding dma-buf, so we can't
* race with another thread that is also modifying this value. However,

* the driver may read this config while it's creating its * rx-queues.

* WRITE_ONCE() here to match the READ_ONCE() in the driver.

*/

WRITE_ONCE(rxq->mp_params.mp_priv, binding);

err = netdev_rx_queue_restart(dev, rxq_idx);
if (err)
goto err_xa_erase;

return 0;

+err_xa_erase:

WRITE_ONCE(rxq->mp_params.mp_priv, NULL);
xa_erase(&binding->bound_rxq_list, xa_idx);

return err;

+}

+int net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,

	   struct net_devmem_dmabuf_binding **out)

+{

struct net_devmem_dmabuf_binding *binding;
static u32 id_alloc_next;
struct scatterlist *sg;
struct dma_buf *dmabuf;
unsigned int sg_idx, i;
unsigned long virtual;
int err;

dmabuf = dma_buf_get(dmabuf_fd);
if (IS_ERR(dmabuf))
return -EBADFD;

nit: I think error pointers are nicer than **out parameters :(
     you can ERR_CAST() all the DMABUF errors
...

binding = kzalloc_node(sizeof(*binding), GFP_KERNEL,
	       dev_to_node(&dev->dev));

if (!binding) {
err = -ENOMEM;

goto err_put_dmabuf;

}

binding->dev = dev;

err = xa_alloc_cyclic(&net_devmem_dmabuf_bindings, &binding->id,
	      binding, xa_limit_32b, &id_alloc_next,

	      GFP_KERNEL);

if (err < 0)
goto err_free_binding;

xa_init_flags(&binding->bound_rxq_list, XA_FLAGS_ALLOC);

refcount_set(&binding->ref, 1);

binding->dmabuf = dmabuf;

binding->attachment = dma_buf_attach(binding->dmabuf, dev->dev.parent);
if (IS_ERR(binding->attachment)) {
err = PTR_ERR(binding->attachment);

goto err_free_id;

}

...
-/* Stub */
 int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info)
 {

return 0;

struct nlattr *tb[ARRAY_SIZE(netdev_queue_dmabuf_nl_policy)];
struct net_devmem_dmabuf_binding *out_binding;
struct list_head *sock_binding_list;
u32 ifindex, dmabuf_fd, rxq_idx;
struct net_device *netdev;
struct sk_buff *rsp;
struct nlattr *attr;
int rem, err = 0;
void *hdr;

if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX) ||
   GENL_REQ_ATTR_CHECK(info, NETDEV_A_BIND_DMABUF_DMABUF_FD) ||

   GENL_REQ_ATTR_CHECK(info, NETDEV_A_BIND_DMABUF_QUEUES))

return -EINVAL;

ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]);
dmabuf_fd = nla_get_u32(info->attrs[NETDEV_A_BIND_DMABUF_DMABUF_FD]);

rtnl_lock();

netdev = __dev_get_by_index(genl_info_net(info), ifindex);
if (!netdev) {

|| !netif_device_present(netdev)
...

err = -ENODEV;

goto err_unlock;

}

err = net_devmem_bind_dmabuf(netdev, dmabuf_fd, &out_binding);
if (err)
goto err_unlock;

nla_for_each_attr(attr, genlmsg_data(info->genlhdr),
	  genlmsg_len(info->genlhdr), rem) {

if (nla_type(attr) != NETDEV_A_BIND_DMABUF_QUEUES)

	continue;

nit: nla_for_each_attr_type()
...

err = nla_parse_nested(

	tb, ARRAY_SIZE(netdev_queue_dmabuf_nl_policy) - 1, attr,

	netdev_queue_dmabuf_nl_policy, info->extack);

if (err < 0)

	goto err_unbind;

rxq_idx = nla_get_u32(tb[NETDEV_A_QUEUE_DMABUF_IDX]);

err = net_devmem_bind_dmabuf_to_queue(netdev, rxq_idx,

				      out_binding);

if (err)

	goto err_unbind;

}

sock_binding_list = genl_sk_priv_get(&netdev_nl_family,
			     NETLINK_CB(skb).sk);

if (IS_ERR(sock_binding_list)) {
err = PTR_ERR(sock_binding_list);

goto err_unbind;

}

list_add(&out_binding->list, sock_binding_list);

rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!rsp) {
err = -ENOMEM;

goto err_unbind;

}

hdr = genlmsg_iput(rsp, info);
if (!hdr) {
err = -EMSGSIZE;

goto err_genlmsg_free;

}

I'd move genl_sk_priv_get(), genlmsg_new() and genlmsg_iput() before we
take rtnl_lock(), but I admit it's a bit late for this sort of
feedback.. :)
...

nla_put_u32(rsp, NETDEV_A_BIND_DMABUF_DMABUF_ID, out_binding->id);
genlmsg_end(rsp, hdr);

rtnl_unlock();

return genlmsg_reply(rsp, info);

+err_genlmsg_free:

nlmsg_free(rsp);

+err_unbind:

net_devmem_unbind_dmabuf(out_binding);

+err_unlock:

rtnl_unlock();
return err;

}

2026

2025

2024

2023

2022

2021

2020

2019

2018

2017

Re: [PATCH net-next v15 03/14] netdev: support binding dma-buf to netdevice