2025-01-13, 10:31:39 +0100, Antonio Quartulli wrote:
int ovpn_nl_peer_new_doit(struct sk_buff *skb, struct genl_info *info) {
- return -EOPNOTSUPP;
- struct nlattr *attrs[OVPN_A_PEER_MAX + 1];
- struct ovpn_priv *ovpn = info->user_ptr[0];
- struct ovpn_socket *ovpn_sock;
- struct socket *sock = NULL;
- struct ovpn_peer *peer;
- u32 sockfd, peer_id;
- int ret;
- /* peers can only be added when the interface is up and running */
- if (!netif_running(ovpn->dev))
return -ENETDOWN;
Since we're not under rtnl_lock here, the device could go down while we're creating this peer, and we may end up with a down device that has a peer anyway.
I'm not sure what this (and the peer flushing on NETDEV_DOWN) is trying to accomplish. Is it a problem to keep peers when the netdevice is down?
- if (GENL_REQ_ATTR_CHECK(info, OVPN_A_PEER))
return -EINVAL;
- ret = nla_parse_nested(attrs, OVPN_A_PEER_MAX, info->attrs[OVPN_A_PEER],
ovpn_peer_nl_policy, info->extack);
- if (ret)
return ret;
- ret = ovpn_nl_peer_precheck(ovpn, info, attrs);
- if (ret < 0)
return ret;
- if (NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_PEER], attrs,
OVPN_A_PEER_SOCKET))
return -EINVAL;
- peer_id = nla_get_u32(attrs[OVPN_A_PEER_ID]);
- peer = ovpn_peer_new(ovpn, peer_id);
- if (IS_ERR(peer)) {
NL_SET_ERR_MSG_FMT_MOD(info->extack,
"cannot create new peer object for peer %u: %ld",
peer_id, PTR_ERR(peer));
return PTR_ERR(peer);
- }
- /* lookup the fd in the kernel table and extract the socket object */
- sockfd = nla_get_u32(attrs[OVPN_A_PEER_SOCKET]);
- /* sockfd_lookup() increases sock's refcounter */
- sock = sockfd_lookup(sockfd, &ret);
- if (!sock) {
NL_SET_ERR_MSG_FMT_MOD(info->extack,
"cannot lookup peer socket (fd=%u): %d",
sockfd, ret);
return -ENOTSOCK;
All those returns should be "goto peer_release" (and setting ret) so that we don't leak peer.
- }
- /* Only when using UDP as transport protocol the remote endpoint
* can be configured so that ovpn knows where to send packets to.
*
* In case of TCP, the socket is connected to the peer and ovpn
* will just send bytes over it, without the need to specify a
* destination.
*/
- if (sock->sk->sk_protocol != IPPROTO_UDP &&
(attrs[OVPN_A_PEER_REMOTE_IPV4] ||
attrs[OVPN_A_PEER_REMOTE_IPV6])) {
NL_SET_ERR_MSG_FMT_MOD(info->extack,
"unexpected remote IP address for non UDP socket");
sockfd_put(sock);
return -EINVAL;
goto peer_release
- }
- ovpn_sock = ovpn_socket_new(sock, peer);
- if (IS_ERR(ovpn_sock)) {
NL_SET_ERR_MSG_FMT_MOD(info->extack,
"cannot encapsulate socket: %ld",
PTR_ERR(ovpn_sock));
sockfd_put(sock);
return -ENOTSOCK;
goto peer_release
- }
- peer->sock = ovpn_sock;
- ret = ovpn_nl_peer_modify(peer, info, attrs);
- if (ret < 0)
goto peer_release;
- ret = ovpn_peer_add(ovpn, peer);
- if (ret < 0) {
NL_SET_ERR_MSG_FMT_MOD(info->extack,
"cannot add new peer (id=%u) to hashtable: %d\n",
peer->id, ret);
goto peer_release;
- }
- return 0;
+peer_release:
- /* release right away because peer is not used in any context */
- ovpn_peer_release(peer);
- return ret;
}
[...]
int ovpn_nl_peer_del_doit(struct sk_buff *skb, struct genl_info *info) {
- return -EOPNOTSUPP;
- struct nlattr *attrs[OVPN_A_PEER_MAX + 1];
- struct ovpn_priv *ovpn = info->user_ptr[0];
- struct ovpn_peer *peer;
- u32 peer_id;
- int ret;
- if (GENL_REQ_ATTR_CHECK(info, OVPN_A_PEER))
return -EINVAL;
- ret = nla_parse_nested(attrs, OVPN_A_PEER_MAX, info->attrs[OVPN_A_PEER],
ovpn_peer_nl_policy, info->extack);
- if (ret)
return ret;
- if (NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_PEER], attrs,
OVPN_A_PEER_ID))
return -EINVAL;
- peer_id = nla_get_u32(attrs[OVPN_A_PEER_ID]);
- peer = ovpn_peer_get_by_id(ovpn, peer_id);
- if (!peer) {
NL_SET_ERR_MSG_FMT_MOD(info->extack,
"cannot find peer with id %u", peer_id);
return -ENOENT;
- }
- netdev_dbg(ovpn->dev, "del peer %u\n", peer->id);
- ret = ovpn_peer_del(peer, OVPN_DEL_PEER_REASON_USERSPACE);
With the delayed socket release (which is similar to what was in v11, but now with refcounting on the netdevice which should make rtnl_link_unregister in ovpn_cleanup wait [*]), we may return to userspace as if the peer was gone, but the socket hasn't been detached yet.
A userspace application that tries to remove the peer and immediately re-create it with the same socket could get EBUSY if the workqueue hasn't done its job yet. That would be quite confusing to the application.
So I would add a completion to wait here until the socket has been fully detached. Something like below.
[*] I don't think the current refcounting fully protects against that, I'll comment on 05/25
-------- 8< --------
diff --git a/drivers/net/ovpn/netlink.c b/drivers/net/ovpn/netlink.c index 72357bb5f30b..19aa4ee6d468 100644 --- a/drivers/net/ovpn/netlink.c +++ b/drivers/net/ovpn/netlink.c @@ -733,6 +733,9 @@ int ovpn_nl_peer_del_doit(struct sk_buff *skb, struct genl_info *info)
netdev_dbg(ovpn->dev, "del peer %u\n", peer->id); ret = ovpn_peer_del(peer, OVPN_DEL_PEER_REASON_USERSPACE); + if (ret >= 0 && peer->sock) + wait_for_completion(&peer->sock_detach); + ovpn_peer_put(peer);
return ret; diff --git a/drivers/net/ovpn/peer.c b/drivers/net/ovpn/peer.c index b032390047fe..6120521d0c32 100644 --- a/drivers/net/ovpn/peer.c +++ b/drivers/net/ovpn/peer.c @@ -92,6 +92,7 @@ struct ovpn_peer *ovpn_peer_new(struct ovpn_priv *ovpn, u32 id) ovpn_peer_stats_init(&peer->vpn_stats); ovpn_peer_stats_init(&peer->link_stats); INIT_WORK(&peer->keepalive_work, ovpn_peer_keepalive_send); + init_completion(&peer->sock_detach);
ret = dst_cache_init(&peer->dst_cache, GFP_KERNEL); if (ret < 0) { diff --git a/drivers/net/ovpn/peer.h b/drivers/net/ovpn/peer.h index 7a062cc5a5a4..8c54bf5709ef 100644 --- a/drivers/net/ovpn/peer.h +++ b/drivers/net/ovpn/peer.h @@ -112,6 +112,7 @@ struct ovpn_peer { struct rcu_head rcu; struct work_struct remove_work; struct work_struct keepalive_work; + struct completion sock_detach; };
/** diff --git a/drivers/net/ovpn/socket.c b/drivers/net/ovpn/socket.c index a5c3bc834a35..7cefac42c3be 100644 --- a/drivers/net/ovpn/socket.c +++ b/drivers/net/ovpn/socket.c @@ -31,6 +31,8 @@ static void ovpn_socket_release_kref(struct kref *kref)
sockfd_put(sock->sock); kfree_rcu(sock, rcu); + + complete(&sock->peer->sock_detach); }
/** @@ -181,12 +183,12 @@ struct ovpn_socket *ovpn_socket_new(struct socket *sock, struct ovpn_peer *peer)
ovpn_sock->sock = sock; kref_init(&ovpn_sock->refcount); + ovpn_sock->peer = peer;
/* TCP sockets are per-peer, therefore they are linked to their unique * peer */ if (sock->sk->sk_protocol == IPPROTO_TCP) { - ovpn_sock->peer = peer; ovpn_peer_hold(peer); } else if (sock->sk->sk_protocol == IPPROTO_UDP) { /* in UDP we only link the ovpn instance since the socket is diff --git a/drivers/net/ovpn/socket.h b/drivers/net/ovpn/socket.h index 15827e347f53..3f5a35fd9048 100644 --- a/drivers/net/ovpn/socket.h +++ b/drivers/net/ovpn/socket.h @@ -28,12 +28,12 @@ struct ovpn_peer; * @rcu: member used to schedule RCU destructor callback */ struct ovpn_socket { + struct ovpn_peer *peer; union { struct { struct ovpn_priv *ovpn; netdevice_tracker dev_tracker; }; - struct ovpn_peer *peer; };
struct socket *sock;