On Mon, Nov 17, 2025 at 06:00:27PM -0800, Bobby Eshleman wrote:
From: Bobby Eshleman bobbyeshleman@meta.com
Add netns support to loopback and vhost. Keep netns disabled for virtio-vsock, but add necessary changes to comply with common API updates.
Signed-off-by: Bobby Eshleman bobbyeshleman@meta.com
Changes in v10:
- Splitting patches complicates the series with meaningless placeholder values that eventually get replaced anyway,
so to avoid that this patch combines into one. Links
Yeah, looking at the result, this is better IMO, thanks!
to previous patches here:
- Link: https://lore.kernel.org/all/20251111-vsock-vmtest-v9-3-852787a37bed@meta.com...
- Link: https://lore.kernel.org/all/20251111-vsock-vmtest-v9-6-852787a37bed@meta.com...
- Link: https://lore.kernel.org/all/20251111-vsock-vmtest-v9-7-852787a37bed@meta.com...
- remove placeholder values (Stefano)
- update comment describe net/net_mode for
virtio_transport_reset_no_sock()
drivers/vhost/vsock.c | 45 +++++++++++++++++------ include/linux/virtio_vsock.h | 8 +++-- net/vmw_vsock/virtio_transport.c | 10 ++++-- net/vmw_vsock/virtio_transport_common.c | 63 ++++++++++++++++++++++++--------- net/vmw_vsock/vsock_loopback.c | 8 +++-- 5 files changed, 102 insertions(+), 32 deletions(-)
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index c8319cd1c232..2846076d484f 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -46,6 +46,11 @@ static DEFINE_READ_MOSTLY_HASHTABLE(vhost_vsock_hash, 8); struct vhost_vsock { struct vhost_dev dev; struct vhost_virtqueue vqs[2];
struct net *net;
netns_tracker ns_tracker;
/* The ns mode at the time vhost_vsock was created */
enum vsock_net_mode net_mode;
/* Link to global vhost_vsock_hash, writes use vhost_vsock_mutex */ struct hlist_node hash;
@@ -72,7 +77,8 @@ static bool vhost_transport_supports_local_mode(void) /* Callers that dereference the return value must hold vhost_vsock_mutex or the
- RCU read lock.
*/ -static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) +static struct vhost_vsock *vhost_vsock_get(u32 guest_cid, struct net *net,
enum vsock_net_mode mode){ struct vhost_vsock *vsock;
@@ -83,9 +89,10 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) if (other_cid == 0) continue;
if (other_cid == guest_cid)
if (other_cid == guest_cid &&vsock_net_check_mode(net, mode, vsock->net,vsock->net_mode)) return vsock;
}
return NULL;
@@ -274,7 +281,8 @@ static void vhost_transport_send_pkt_work(struct vhost_work *work) }
static int -vhost_transport_send_pkt(struct sk_buff *skb) +vhost_transport_send_pkt(struct sk_buff *skb, struct net *net,
enum vsock_net_mode net_mode){ struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); struct vhost_vsock *vsock; @@ -283,7 +291,7 @@ vhost_transport_send_pkt(struct sk_buff *skb) rcu_read_lock();
/* Find the vhost_vsock according to guest context id */
- vsock = vhost_vsock_get(le64_to_cpu(hdr->dst_cid));
- vsock = vhost_vsock_get(le64_to_cpu(hdr->dst_cid), net, net_mode); if (!vsock) { rcu_read_unlock(); kfree_skb(skb);
@@ -310,7 +318,8 @@ vhost_transport_cancel_pkt(struct vsock_sock *vsk) rcu_read_lock();
/* Find the vhost_vsock according to guest context id */
- vsock = vhost_vsock_get(vsk->remote_addr.svm_cid);
- vsock = vhost_vsock_get(vsk->remote_addr.svm_cid,
if (!vsock) goto out;sock_net(sk_vsock(vsk)), vsk->net_mode);@@ -470,11 +479,12 @@ static struct virtio_transport vhost_transport = { static bool vhost_transport_seqpacket_allow(struct vsock_sock *vsk, u32 remote_cid) {
struct net *net = sock_net(sk_vsock(vsk)); struct vhost_vsock *vsock; bool seqpacket_allow = false;
rcu_read_lock();
- vsock = vhost_vsock_get(remote_cid);
vsock = vhost_vsock_get(remote_cid, net, vsk->net_mode);
if (vsock) seqpacket_allow = vsock->seqpacket_allow;
@@ -545,7 +555,8 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) if (le64_to_cpu(hdr->src_cid) == vsock->guest_cid && le64_to_cpu(hdr->dst_cid) == vhost_transport_get_local_cid())
virtio_transport_recv_pkt(&vhost_transport, skb);
virtio_transport_recv_pkt(&vhost_transport, skb, else kfree_skb(skb);vsock->net, vsock->net_mode);@@ -662,6 +673,7 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file) { struct vhost_virtqueue **vqs; struct vhost_vsock *vsock;
struct net *net; int ret;
/* This struct is large and allocation could fail, fall back to vmalloc
@@ -677,6 +689,17 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file) goto out; }
- net = current->nsproxy->net_ns;
- vsock->net = get_net_track(net, &vsock->ns_tracker, GFP_KERNEL);
- /* Store the mode of the namespace at the time of creation. If this
* namespace later changes from "global" to "local", we want this vsock* to continue operating normally and not suddenly break. For that* reason, we save the mode here and later use it when performing* socket lookups with vsock_net_check_mode() (see vhost_vsock_get()).*/- vsock->net_mode = vsock_net_mode(net);
- vsock->guest_cid = 0; /* no CID assigned yet */ vsock->seqpacket_allow = false;
@@ -716,7 +739,8 @@ static void vhost_vsock_reset_orphans(struct sock *sk) */
/* If the peer is still valid, no need to reset connection */
- if (vhost_vsock_get(vsk->remote_addr.svm_cid))
if (vhost_vsock_get(vsk->remote_addr.svm_cid, sock_net(sk),
vsk->net_mode))return;
/* If the close timeout is pending, let it expire. This avoids races
@@ -761,6 +785,7 @@ static int vhost_vsock_dev_release(struct inode *inode, struct file *file) virtio_vsock_skb_queue_purge(&vsock->send_pkt_queue);
vhost_dev_cleanup(&vsock->dev);
- put_net_track(vsock->net, &vsock->ns_tracker); kfree(vsock->dev.vqs); vhost_vsock_free(vsock); return 0;
@@ -787,7 +812,7 @@ static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid)
/* Refuse if CID is already in use */ mutex_lock(&vhost_vsock_mutex);
- other = vhost_vsock_get(guest_cid);
- other = vhost_vsock_get(guest_cid, vsock->net, vsock->net_mode); if (other && other != vsock) { mutex_unlock(&vhost_vsock_mutex); return -EADDRINUSE;
diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 0c67543a45c8..5ed6136a4ed4 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -173,6 +173,8 @@ struct virtio_vsock_pkt_info { u32 remote_cid, remote_port; struct vsock_sock *vsk; struct msghdr *msg;
- struct net *net;
- enum vsock_net_mode net_mode; u32 pkt_len; u16 type; u16 op;
@@ -185,7 +187,8 @@ struct virtio_transport { struct vsock_transport transport;
/* Takes ownership of the packet */
- int (*send_pkt)(struct sk_buff *skb);
int (*send_pkt)(struct sk_buff *skb, struct net *net,
enum vsock_net_mode net_mode);/* Used in MSG_ZEROCOPY mode. Checks, that provided data
- (number of buffers) could be transmitted with zerocopy
@@ -280,7 +283,8 @@ virtio_transport_dgram_enqueue(struct vsock_sock *vsk, void virtio_transport_destruct(struct vsock_sock *vsk);
void virtio_transport_recv_pkt(struct virtio_transport *t,
struct sk_buff *skb);
struct sk_buff *skb, struct net *net,enum vsock_net_mode net_mode);void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct sk_buff *skb); u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 wanted); void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit); diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index e585cb66c6f5..bc266bdb7faa 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -243,7 +243,8 @@ static int virtio_transport_send_skb_fast_path(struct virtio_vsock *vsock, struc }
static int -virtio_transport_send_pkt(struct sk_buff *skb) +virtio_transport_send_pkt(struct sk_buff *skb, struct net *net,
enum vsock_net_mode net_mode){ struct virtio_vsock_hdr *hdr; struct virtio_vsock *vsock; @@ -675,7 +676,12 @@ static void virtio_transport_rx_work(struct work_struct *work) virtio_vsock_skb_put(skb, payload_len);
virtio_transport_deliver_tap_pkt(skb);
virtio_transport_recv_pkt(&virtio_transport, skb);
/* Force virtio-transport into global mode since it* does not yet support local-mode namespacing.*/virtio_transport_recv_pkt(&virtio_transport, skb, } } while (!virtqueue_enable_cb(vq));NULL, VSOCK_NET_MODE_GLOBAL);diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index dcc8a1d5851e..168e7517a3f0 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -413,7 +413,7 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
virtio_transport_inc_tx_pkt(vvs, skb);
ret = t_ops->send_pkt(skb);
if (ret < 0) break;ret = t_ops->send_pkt(skb, info->net, info->net_mode);@@ -527,6 +527,8 @@ static int virtio_transport_send_credit_update(struct vsock_sock *vsk) struct virtio_vsock_pkt_info info = { .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE, .vsk = vsk,
.net = sock_net(sk_vsock(vsk)),.net_mode = vsk->net_mode,};
return virtio_transport_send_pkt_info(vsk, &info);
@@ -1067,6 +1069,8 @@ int virtio_transport_connect(struct vsock_sock *vsk) struct virtio_vsock_pkt_info info = { .op = VIRTIO_VSOCK_OP_REQUEST, .vsk = vsk,
.net = sock_net(sk_vsock(vsk)),.net_mode = vsk->net_mode,};
return virtio_transport_send_pkt_info(vsk, &info);
@@ -1082,6 +1086,8 @@ int virtio_transport_shutdown(struct vsock_sock *vsk, int mode) (mode & SEND_SHUTDOWN ? VIRTIO_VSOCK_SHUTDOWN_SEND : 0), .vsk = vsk,
.net = sock_net(sk_vsock(vsk)),.net_mode = vsk->net_mode,};
return virtio_transport_send_pkt_info(vsk, &info);
@@ -1108,6 +1114,8 @@ virtio_transport_stream_enqueue(struct vsock_sock *vsk, .msg = msg, .pkt_len = len, .vsk = vsk,
.net = sock_net(sk_vsock(vsk)),.net_mode = vsk->net_mode,};
return virtio_transport_send_pkt_info(vsk, &info);
@@ -1145,6 +1153,8 @@ static int virtio_transport_reset(struct vsock_sock *vsk, .op = VIRTIO_VSOCK_OP_RST, .reply = !!skb, .vsk = vsk,
.net = sock_net(sk_vsock(vsk)),.net_mode = vsk->net_mode,};
/* Send RST only if the original pkt is not a RST pkt */
@@ -1156,15 +1166,27 @@ static int virtio_transport_reset(struct vsock_sock *vsk,
/* Normally packets are associated with a socket. There may be no socket if an
- attempt was made to connect to a socket that does not exist.
- net and net_mode refer to the namespace of whoever sent the invalid message.
- For loopback, this is the namespace of the socket. For vhost, this is the
- namespace of the VM (i.e., vhost_vsock).
*/ static int virtio_transport_reset_no_sock(const struct virtio_transport *t,
struct sk_buff *skb)
struct sk_buff *skb, struct net *net,enum vsock_net_mode net_mode){ struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); struct virtio_vsock_pkt_info info = { .op = VIRTIO_VSOCK_OP_RST, .type = le16_to_cpu(hdr->type), .reply = true,
/* net or net_mode are not defined here because we pass* net and net_mode directly to t->send_pkt(), instead of* relying on virtio_transport_send_pkt_info() to pass them to* t->send_pkt(). They are not needed by* virtio_transport_alloc_skb(). }; struct sk_buff *reply;*/@@ -1183,7 +1205,7 @@ static int virtio_transport_reset_no_sock(const struct virtio_transport *t, if (!reply) return -ENOMEM;
- return t->send_pkt(reply);
- return t->send_pkt(reply, net, net_mode);
}
/* This function should be called with sk_lock held and SOCK_DONE set */ @@ -1465,6 +1487,8 @@ virtio_transport_send_response(struct vsock_sock *vsk, .remote_port = le32_to_cpu(hdr->src_port), .reply = true, .vsk = vsk,
.net = sock_net(sk_vsock(vsk)),.net_mode = vsk->net_mode,};
return virtio_transport_send_pkt_info(vsk, &info);
@@ -1507,12 +1531,14 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb, int ret;
if (le16_to_cpu(hdr->op) != VIRTIO_VSOCK_OP_REQUEST) {
virtio_transport_reset_no_sock(t, skb);
virtio_transport_reset_no_sock(t, skb, sock_net(sk),vsk->net_mode);return -EINVAL; }
if (sk_acceptq_is_full(sk)) {
virtio_transport_reset_no_sock(t, skb);
virtio_transport_reset_no_sock(t, skb, sock_net(sk), return -ENOMEM; }vsk->net_mode);@@ -1520,13 +1546,15 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb, * Subsequent enqueues would lead to a memory leak. */ if (sk->sk_shutdown == SHUTDOWN_MASK) {
virtio_transport_reset_no_sock(t, skb);
virtio_transport_reset_no_sock(t, skb, sock_net(sk),vsk->net_mode);return -ESHUTDOWN; }
child = vsock_create_connected(sk); if (!child) {
virtio_transport_reset_no_sock(t, skb);
virtio_transport_reset_no_sock(t, skb, sock_net(sk), return -ENOMEM; }vsk->net_mode);@@ -1548,7 +1576,8 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb, */ if (ret || vchild->transport != &t->transport) { release_sock(child);
virtio_transport_reset_no_sock(t, skb);
virtio_transport_reset_no_sock(t, skb, sock_net(sk), sock_put(child); return ret; }vsk->net_mode);@@ -1576,7 +1605,8 @@ static bool virtio_transport_valid_type(u16 type)
- lock.
*/ void virtio_transport_recv_pkt(struct virtio_transport *t,
struct sk_buff *skb)
struct sk_buff *skb, struct net *net,enum vsock_net_mode net_mode){ struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); struct sockaddr_vm src, dst; @@ -1599,24 +1629,25 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, le32_to_cpu(hdr->fwd_cnt));
if (!virtio_transport_valid_type(le16_to_cpu(hdr->type))) {
(void)virtio_transport_reset_no_sock(t, skb);
(void)virtio_transport_reset_no_sock(t, skb, net, net_mode);goto free_pkt; }
/* The socket must be in connected or bound table
- otherwise send reset back
*/
- sk = vsock_find_connected_socket(&src, &dst);
- sk = vsock_find_connected_socket_net(&src, &dst, net, net_mode); if (!sk) {
sk = vsock_find_bound_socket(&dst);
if (!sk) {sk = vsock_find_bound_socket_net(&dst, net, net_mode);
(void)virtio_transport_reset_no_sock(t, skb);
(void)virtio_transport_reset_no_sock(t, skb, net,net_mode); goto free_pkt;} }
if (virtio_transport_get_type(sk) != le16_to_cpu(hdr->type)) {
(void)virtio_transport_reset_no_sock(t, skb);
sock_put(sk); goto free_pkt; }(void)virtio_transport_reset_no_sock(t, skb, net, net_mode);@@ -1635,7 +1666,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, */ if (sock_flag(sk, SOCK_DONE) || (sk->sk_state != TCP_LISTEN && vsk->transport != &t->transport)) {
(void)virtio_transport_reset_no_sock(t, skb);
release_sock(sk); sock_put(sk); goto free_pkt;(void)virtio_transport_reset_no_sock(t, skb, net, net_mode);@@ -1667,7 +1698,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, kfree_skb(skb); break; default:
(void)virtio_transport_reset_no_sock(t, skb);
kfree_skb(skb); break; }(void)virtio_transport_reset_no_sock(t, skb, net, net_mode);diff --git a/net/vmw_vsock/vsock_loopback.c b/net/vmw_vsock/vsock_loopback.c index 1e25c1a6b43f..a730fa74d2d9 100644 --- a/net/vmw_vsock/vsock_loopback.c +++ b/net/vmw_vsock/vsock_loopback.c @@ -31,7 +31,8 @@ static bool vsock_loopback_supports_local_mode(void) return true; }
-static int vsock_loopback_send_pkt(struct sk_buff *skb) +static int vsock_loopback_send_pkt(struct sk_buff *skb, struct net *net,
enum vsock_net_mode net_mode){ struct vsock_loopback *vsock = &the_vsock_loopback; int len = skb->len; @@ -138,7 +139,10 @@ static void vsock_loopback_work(struct work_struct *work) */ virtio_transport_consume_skb_sent(skb, false); virtio_transport_deliver_tap_pkt(skb);
virtio_transport_recv_pkt(&loopback_transport, skb);
virtio_transport_recv_pkt(&loopback_transport, skb,sock_net(skb->sk),
This will crash without next patch right? (I'll comment also there, but I think the order of patches here is important).
That said, should we add a WARN_ONCE/WARN_ON_ONCE here with a check on skb->sk ?
The rest LTGM, Stefano
}vsock_sk(skb->sk)->net_mode);}
-- 2.47.3