On Tue, Nov 11, 2025 at 10:54:45PM -0800, Bobby Eshleman wrote:
From: Bobby Eshleman bobbyeshleman@meta.com
Enable network namespace support in the virtio-vsock and common transport layer.
The changes include:
This list seems to have been generated by AI. I have nothing against it, but I don't think it's important to list all the things that have changed, but rather to explain why.
- Add a 'net' field to virtio_vsock_pkt_info to carry the namespace
pointer for outgoing packets.
Why?
- Add 'net' and 'net_mode' to t->send_pkt() and
virtio_transport_recv_pkt() functions
Why?
- Modify callback functions to accept placeholder values
(NULL and 0) for net and net_mode. The placeholders will be
Why 0 ? I mean VSOCK_NET_MODE_GLOBAL is also 0, no? So I don't understand if you want to specify an invalid value (like NULL) or VSOCK_NET_MODE_GLOBAL.
replaced when later patches in this series add namespace support to transports. 4. Set virtio-vsock to global mode unconditionally, instead of using placeholders. This is done in this patch because virtio-vsock won't have any additional changes to choose the net/net_mode, unlike the other transports. Same complexity as placeholders. 5. Pass net and net_mode to virtio_transport_reset_no_sock() directly. This ensures that the outgoing RST packets are scoped based on the namespace of the receiver of the failed request.
"Receiver" is confusing IMO, see the comment on virtio_transport_reset_no_sock().
- Pass net and net_mode to socket lookup functions using
vsock_find_{bound,connected}_socket_net().
mmmm, are those functions working fine with the placeholders?
If it simplifies, I think we can eventually merge all changes to transports that depends on virtio_transport_common in a single commit. IMO is better to have working commits than better split.
I mean, is this commit working (at runtime) well?
Signed-off-by: Bobby Eshleman bobbyeshleman@meta.com
Changes in v9:
- include/virtio_vsock.h: send_pkt() cb takes net and net_mode
- virtio_transport reset_no_sock() takes net and net_mode
- vhost-vsock: add placeholders to recv_pkt() for compilation
- loopback: add placeholders to recv_pkt() for compilation
- remove skb->cb net/net_mode usage, pass as arguments to
t->send_pkt() and virtio_transport_recv_pkt() functions instead. Note that skb->cb will still be used by loopback, but only internal to loopback and never passing it to virtio common.
- remove virtio_vsock_alloc_rx_skb(), it is not needed after removing
skb->cb usage.
- pass net and net_mode to virtio_transport_reset_no_sock()
Changes in v8:
- add the virtio_vsock_alloc_rx_skb(), to be in same patch that fields
are read (Stefano)
Changes in v7:
- add comment explaining the !vsk case in virtio_transport_alloc_skb()
drivers/vhost/vsock.c | 6 ++-- include/linux/virtio_vsock.h | 8 +++-- net/vmw_vsock/virtio_transport.c | 10 ++++-- net/vmw_vsock/virtio_transport_common.c | 57 ++++++++++++++++++++++++--------- net/vmw_vsock/vsock_loopback.c | 5 +-- 5 files changed, 62 insertions(+), 24 deletions(-)
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 34adf0cf9124..0a0e73405532 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -269,7 +269,8 @@ static void vhost_transport_send_pkt_work(struct vhost_work *work) }
static int -vhost_transport_send_pkt(struct sk_buff *skb) +vhost_transport_send_pkt(struct sk_buff *skb, struct net *net,
enum vsock_net_mode net_mode){ struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); struct vhost_vsock *vsock; @@ -537,7 +538,8 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) if (le64_to_cpu(hdr->src_cid) == vsock->guest_cid && le64_to_cpu(hdr->dst_cid) == vhost_transport_get_local_cid())
virtio_transport_recv_pkt(&vhost_transport, skb);
virtio_transport_recv_pkt(&vhost_transport, skb, NULL, else kfree_skb(skb);0);diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 0c67543a45c8..5ed6136a4ed4 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -173,6 +173,8 @@ struct virtio_vsock_pkt_info { u32 remote_cid, remote_port; struct vsock_sock *vsk; struct msghdr *msg;
- struct net *net;
- enum vsock_net_mode net_mode; u32 pkt_len; u16 type; u16 op;
@@ -185,7 +187,8 @@ struct virtio_transport { struct vsock_transport transport;
/* Takes ownership of the packet */
- int (*send_pkt)(struct sk_buff *skb);
int (*send_pkt)(struct sk_buff *skb, struct net *net,
enum vsock_net_mode net_mode);/* Used in MSG_ZEROCOPY mode. Checks, that provided data
- (number of buffers) could be transmitted with zerocopy
@@ -280,7 +283,8 @@ virtio_transport_dgram_enqueue(struct vsock_sock *vsk, void virtio_transport_destruct(struct vsock_sock *vsk);
void virtio_transport_recv_pkt(struct virtio_transport *t,
struct sk_buff *skb);
struct sk_buff *skb, struct net *net,enum vsock_net_mode net_mode);void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct sk_buff *skb); u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 wanted); void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit); diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index f92f23be3f59..9395fd875823 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -231,7 +231,8 @@ static int virtio_transport_send_skb_fast_path(struct virtio_vsock *vsock, struc }
static int -virtio_transport_send_pkt(struct sk_buff *skb) +virtio_transport_send_pkt(struct sk_buff *skb, struct net *net,
enum vsock_net_mode net_mode){ struct virtio_vsock_hdr *hdr; struct virtio_vsock *vsock; @@ -660,7 +661,12 @@ static void virtio_transport_rx_work(struct work_struct *work) virtio_vsock_skb_put(skb, payload_len);
virtio_transport_deliver_tap_pkt(skb);
virtio_transport_recv_pkt(&virtio_transport, skb);
/* Force virtio-transport into global mode since it* does not yet support local-mode namespacing.*/virtio_transport_recv_pkt(&virtio_transport, skb, } } while (!virtqueue_enable_cb(vq));NULL, VSOCK_NET_MODE_GLOBAL);diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index dcc8a1d5851e..f4e09cb1567c 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -413,7 +413,7 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
virtio_transport_inc_tx_pkt(vvs, skb);
ret = t_ops->send_pkt(skb);
if (ret < 0) break;ret = t_ops->send_pkt(skb, info->net, info->net_mode);@@ -527,6 +527,8 @@ static int virtio_transport_send_credit_update(struct vsock_sock *vsk) struct virtio_vsock_pkt_info info = { .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE, .vsk = vsk,
.net = sock_net(sk_vsock(vsk)),.net_mode = vsk->net_mode,};
return virtio_transport_send_pkt_info(vsk, &info);
@@ -1067,6 +1069,8 @@ int virtio_transport_connect(struct vsock_sock *vsk) struct virtio_vsock_pkt_info info = { .op = VIRTIO_VSOCK_OP_REQUEST, .vsk = vsk,
.net = sock_net(sk_vsock(vsk)),.net_mode = vsk->net_mode,};
return virtio_transport_send_pkt_info(vsk, &info);
@@ -1082,6 +1086,8 @@ int virtio_transport_shutdown(struct vsock_sock *vsk, int mode) (mode & SEND_SHUTDOWN ? VIRTIO_VSOCK_SHUTDOWN_SEND : 0), .vsk = vsk,
.net = sock_net(sk_vsock(vsk)),.net_mode = vsk->net_mode,};
return virtio_transport_send_pkt_info(vsk, &info);
@@ -1108,6 +1114,8 @@ virtio_transport_stream_enqueue(struct vsock_sock *vsk, .msg = msg, .pkt_len = len, .vsk = vsk,
.net = sock_net(sk_vsock(vsk)),.net_mode = vsk->net_mode,};
return virtio_transport_send_pkt_info(vsk, &info);
@@ -1145,6 +1153,8 @@ static int virtio_transport_reset(struct vsock_sock *vsk, .op = VIRTIO_VSOCK_OP_RST, .reply = !!skb, .vsk = vsk,
.net = sock_net(sk_vsock(vsk)),.net_mode = vsk->net_mode,};
/* Send RST only if the original pkt is not a RST pkt */
@@ -1156,15 +1166,27 @@ static int virtio_transport_reset(struct vsock_sock *vsk,
/* Normally packets are associated with a socket. There may be no socket if an
- attempt was made to connect to a socket that does not exist.
- net and net_mode refer to the net and mode of the receiving device (e.g.,
- vhost_vsock). For loopback, they refer to the sending socket net/mode. This
- way the RST packet is sent back to the same namespace as the bad request.
Could this be a problem, should we split this function?
BTW, I'm a bit confused. For vhost-vsock, this is the namespace of the device, so the namespace of the guest, so also in that case the namespace of the sender, no?
Maybe sender/receiver are confusing. What you want to highlight with this comment?
*/ static int virtio_transport_reset_no_sock(const struct virtio_transport *t,
struct sk_buff *skb)
struct sk_buff *skb, struct net *net,enum vsock_net_mode net_mode){ struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); struct virtio_vsock_pkt_info info = { .op = VIRTIO_VSOCK_OP_RST, .type = le16_to_cpu(hdr->type), .reply = true,
/* net or net_mode are not defined here because we pass* net and net_mode directly to t->send_pkt(), instead of* relying on virtio_transport_send_pkt_info() to pass them to* t->send_pkt(). They are not needed by* virtio_transport_alloc_skb(). }; struct sk_buff *reply;*/@@ -1183,7 +1205,7 @@ static int virtio_transport_reset_no_sock(const struct virtio_transport *t, if (!reply) return -ENOMEM;
- return t->send_pkt(reply);
- return t->send_pkt(reply, net, net_mode);
}
/* This function should be called with sk_lock held and SOCK_DONE set */ @@ -1465,6 +1487,8 @@ virtio_transport_send_response(struct vsock_sock *vsk, .remote_port = le32_to_cpu(hdr->src_port), .reply = true, .vsk = vsk,
.net = sock_net(sk_vsock(vsk)),.net_mode = vsk->net_mode,};
return virtio_transport_send_pkt_info(vsk, &info);
@@ -1507,12 +1531,12 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb, int ret;
if (le16_to_cpu(hdr->op) != VIRTIO_VSOCK_OP_REQUEST) {
virtio_transport_reset_no_sock(t, skb);
virtio_transport_reset_no_sock(t, skb, sock_net(sk), vsk->net_mode);return -EINVAL; }
if (sk_acceptq_is_full(sk)) {
virtio_transport_reset_no_sock(t, skb);
return -ENOMEM; }virtio_transport_reset_no_sock(t, skb, sock_net(sk), vsk->net_mode);@@ -1520,13 +1544,13 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb, * Subsequent enqueues would lead to a memory leak. */ if (sk->sk_shutdown == SHUTDOWN_MASK) {
virtio_transport_reset_no_sock(t, skb);
virtio_transport_reset_no_sock(t, skb, sock_net(sk), vsk->net_mode);return -ESHUTDOWN; }
child = vsock_create_connected(sk); if (!child) {
virtio_transport_reset_no_sock(t, skb);
return -ENOMEM; }virtio_transport_reset_no_sock(t, skb, sock_net(sk), vsk->net_mode);@@ -1548,7 +1572,7 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb, */ if (ret || vchild->transport != &t->transport) { release_sock(child);
virtio_transport_reset_no_sock(t, skb);
sock_put(child); return ret; }virtio_transport_reset_no_sock(t, skb, sock_net(sk), vsk->net_mode);@@ -1576,7 +1600,8 @@ static bool virtio_transport_valid_type(u16 type)
- lock.
*/ void virtio_transport_recv_pkt(struct virtio_transport *t,
struct sk_buff *skb)
struct sk_buff *skb, struct net *net,enum vsock_net_mode net_mode){ struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); struct sockaddr_vm src, dst; @@ -1599,24 +1624,24 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, le32_to_cpu(hdr->fwd_cnt));
if (!virtio_transport_valid_type(le16_to_cpu(hdr->type))) {
(void)virtio_transport_reset_no_sock(t, skb);
(void)virtio_transport_reset_no_sock(t, skb, net, net_mode);goto free_pkt; }
/* The socket must be in connected or bound table
- otherwise send reset back
*/
- sk = vsock_find_connected_socket(&src, &dst);
- sk = vsock_find_connected_socket_net(&src, &dst, net, net_mode);
Here `net` can be null, right? Is this okay?
if (!sk) {
sk = vsock_find_bound_socket(&dst);
if (!sk) {sk = vsock_find_bound_socket_net(&dst, net, net_mode);
(void)virtio_transport_reset_no_sock(t, skb);
(void)virtio_transport_reset_no_sock(t, skb, net, net_mode); goto free_pkt;} }
if (virtio_transport_get_type(sk) != le16_to_cpu(hdr->type)) {
(void)virtio_transport_reset_no_sock(t, skb);
sock_put(sk); goto free_pkt; }(void)virtio_transport_reset_no_sock(t, skb, net, net_mode);@@ -1635,7 +1660,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, */ if (sock_flag(sk, SOCK_DONE) || (sk->sk_state != TCP_LISTEN && vsk->transport != &t->transport)) {
(void)virtio_transport_reset_no_sock(t, skb);
release_sock(sk); sock_put(sk); goto free_pkt;(void)virtio_transport_reset_no_sock(t, skb, net, net_mode);@@ -1667,7 +1692,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, kfree_skb(skb); break; default:
(void)virtio_transport_reset_no_sock(t, skb);
kfree_skb(skb); break; }(void)virtio_transport_reset_no_sock(t, skb, net, net_mode);diff --git a/net/vmw_vsock/vsock_loopback.c b/net/vmw_vsock/vsock_loopback.c index a8f218f0c5a3..d3ac056663ea 100644 --- a/net/vmw_vsock/vsock_loopback.c +++ b/net/vmw_vsock/vsock_loopback.c @@ -26,7 +26,8 @@ static u32 vsock_loopback_get_local_cid(void) return VMADDR_CID_LOCAL; }
-static int vsock_loopback_send_pkt(struct sk_buff *skb) +static int vsock_loopback_send_pkt(struct sk_buff *skb, struct net *net,
enum vsock_net_mode net_mode){ struct vsock_loopback *vsock = &the_vsock_loopback; int len = skb->len; @@ -130,7 +131,7 @@ static void vsock_loopback_work(struct work_struct *work) */ virtio_transport_consume_skb_sent(skb, false); virtio_transport_deliver_tap_pkt(skb);
virtio_transport_recv_pkt(&loopback_transport, skb);
}virtio_transport_recv_pkt(&loopback_transport, skb, NULL, 0);}
-- 2.47.3