On Fri, 28 Jun 2024 00:32:40 +0000 Mina Almasry wrote:
+/* Protected by rtnl_lock() */ +static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1);
+void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding) +{
- struct netdev_rx_queue *rxq;
- unsigned long xa_idx;
- unsigned int rxq_idx;
- if (!binding)
return;
nit: I don't see how it can happen, no defensive programming, please
- if (binding->list.next)
list_del(&binding->list);
- xa_for_each(&binding->bound_rxq_list, xa_idx, rxq) {
nit: s/bound_rxq_list/bound_rxqs/ ? it's not a list
if (rxq->mp_params.mp_priv == binding) {
/* We hold the rtnl_lock while binding/unbinding
* dma-buf, so we can't race with another thread that
* is also modifying this value. However, the page_pool
* may read this config while it's creating its
* rx-queues. WRITE_ONCE() here to match the
* READ_ONCE() in the page_pool.
*/
WRITE_ONCE(rxq->mp_params.mp_priv, NULL);
Is this really sufficient in terms of locking? @binding is not RCU-protected and neither is the reader guaranteed to be in an RCU critical section. Actually the "reader" tries to take a ref and use this struct so it's not even a pure reader.
Let's add a lock or use one of the existing locks
Or, perhaps time to add a mutex to struct net_device
rxq_idx = get_netdev_rx_queue_index(rxq);
netdev_rx_queue_restart(binding->dev, rxq_idx);
}
- }
- xa_erase(&net_devmem_dmabuf_bindings, binding->id);
- net_devmem_dmabuf_binding_put(binding);
+}
+int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
struct net_devmem_dmabuf_binding *binding)
+{
- struct netdev_rx_queue *rxq;
- u32 xa_idx;
- int err;
- if (rxq_idx >= dev->num_rx_queues)
return -ERANGE;
- rxq = __netif_get_rx_queue(dev, rxq_idx);
- if (rxq->mp_params.mp_priv)
return -EEXIST;
Makes me wonder - do we need an API to unbind or we assume application will only have one binding per socket and close it every time? I guess that's fine for future extension.
- err = xa_alloc(&binding->bound_rxq_list, &xa_idx, rxq, xa_limit_32b,
GFP_KERNEL);
- if (err)
return err;
- /* We hold the rtnl_lock while binding/unbinding dma-buf, so we can't
* race with another thread that is also modifying this value. However,
* the driver may read this config while it's creating its * rx-queues.
* WRITE_ONCE() here to match the READ_ONCE() in the driver.
*/
- WRITE_ONCE(rxq->mp_params.mp_priv, binding);
- err = netdev_rx_queue_restart(dev, rxq_idx);
- if (err)
goto err_xa_erase;
- return 0;
+err_xa_erase:
- WRITE_ONCE(rxq->mp_params.mp_priv, NULL);
- xa_erase(&binding->bound_rxq_list, xa_idx);
- return err;
+}
+int net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
struct net_devmem_dmabuf_binding **out)
+{
- struct net_devmem_dmabuf_binding *binding;
- static u32 id_alloc_next;
- struct scatterlist *sg;
- struct dma_buf *dmabuf;
- unsigned int sg_idx, i;
- unsigned long virtual;
- int err;
- dmabuf = dma_buf_get(dmabuf_fd);
- if (IS_ERR(dmabuf))
return -EBADFD;
nit: I think error pointers are nicer than **out parameters :( you can ERR_CAST() all the DMABUF errors
- binding = kzalloc_node(sizeof(*binding), GFP_KERNEL,
dev_to_node(&dev->dev));
- if (!binding) {
err = -ENOMEM;
goto err_put_dmabuf;
- }
- binding->dev = dev;
- err = xa_alloc_cyclic(&net_devmem_dmabuf_bindings, &binding->id,
binding, xa_limit_32b, &id_alloc_next,
GFP_KERNEL);
- if (err < 0)
goto err_free_binding;
- xa_init_flags(&binding->bound_rxq_list, XA_FLAGS_ALLOC);
- refcount_set(&binding->ref, 1);
- binding->dmabuf = dmabuf;
- binding->attachment = dma_buf_attach(binding->dmabuf, dev->dev.parent);
- if (IS_ERR(binding->attachment)) {
err = PTR_ERR(binding->attachment);
goto err_free_id;
- }
-/* Stub */ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) {
- return 0;
- struct nlattr *tb[ARRAY_SIZE(netdev_queue_dmabuf_nl_policy)];
- struct net_devmem_dmabuf_binding *out_binding;
- struct list_head *sock_binding_list;
- u32 ifindex, dmabuf_fd, rxq_idx;
- struct net_device *netdev;
- struct sk_buff *rsp;
- struct nlattr *attr;
- int rem, err = 0;
- void *hdr;
- if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX) ||
GENL_REQ_ATTR_CHECK(info, NETDEV_A_BIND_DMABUF_DMABUF_FD) ||
GENL_REQ_ATTR_CHECK(info, NETDEV_A_BIND_DMABUF_QUEUES))
return -EINVAL;
- ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]);
- dmabuf_fd = nla_get_u32(info->attrs[NETDEV_A_BIND_DMABUF_DMABUF_FD]);
- rtnl_lock();
- netdev = __dev_get_by_index(genl_info_net(info), ifindex);
- if (!netdev) {
|| !netif_device_present(netdev)
err = -ENODEV;
goto err_unlock;
- }
- err = net_devmem_bind_dmabuf(netdev, dmabuf_fd, &out_binding);
- if (err)
goto err_unlock;
- nla_for_each_attr(attr, genlmsg_data(info->genlhdr),
genlmsg_len(info->genlhdr), rem) {
if (nla_type(attr) != NETDEV_A_BIND_DMABUF_QUEUES)
continue;
nit: nla_for_each_attr_type()
err = nla_parse_nested(
tb, ARRAY_SIZE(netdev_queue_dmabuf_nl_policy) - 1, attr,
netdev_queue_dmabuf_nl_policy, info->extack);
if (err < 0)
goto err_unbind;
rxq_idx = nla_get_u32(tb[NETDEV_A_QUEUE_DMABUF_IDX]);
err = net_devmem_bind_dmabuf_to_queue(netdev, rxq_idx,
out_binding);
if (err)
goto err_unbind;
- }
- sock_binding_list = genl_sk_priv_get(&netdev_nl_family,
NETLINK_CB(skb).sk);
- if (IS_ERR(sock_binding_list)) {
err = PTR_ERR(sock_binding_list);
goto err_unbind;
- }
- list_add(&out_binding->list, sock_binding_list);
- rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!rsp) {
err = -ENOMEM;
goto err_unbind;
- }
- hdr = genlmsg_iput(rsp, info);
- if (!hdr) {
err = -EMSGSIZE;
goto err_genlmsg_free;
- }
I'd move genl_sk_priv_get(), genlmsg_new() and genlmsg_iput() before we take rtnl_lock(), but I admit it's a bit late for this sort of feedback.. :)
- nla_put_u32(rsp, NETDEV_A_BIND_DMABUF_DMABUF_ID, out_binding->id);
- genlmsg_end(rsp, hdr);
- rtnl_unlock();
- return genlmsg_reply(rsp, info);
+err_genlmsg_free:
- nlmsg_free(rsp);
+err_unbind:
- net_devmem_unbind_dmabuf(out_binding);
+err_unlock:
- rtnl_unlock();
- return err;
}