Signal delivery during connect() may disconnect an already established socket. Problem is that such socket might have been placed in a sockmap before the connection was closed.
PATCH 1 ensures this race won't lead to an unconnected vsock staying in the sockmap. PATCH 2 selftests it.
PATCH 3 fixes a related race. Note that selftest in PATCH 2 does test this code as well, but winning this race variant may take more than 2 seconds, so I'm not advertising it.
Signed-off-by: Michal Luczaj mhal@rbox.co --- Changes in v3: - Selftest: drop unnecessary variable initialization and reorder the calls - Link to v2: https://lore.kernel.org/r/20250314-vsock-trans-signal-race-v2-0-421a41f60f42...
Changes in v2: - Handle one more path of tripping the warning - Add a selftest - Collect R-b [Stefano] - Link to v1: https://lore.kernel.org/r/20250307-vsock-trans-signal-race-v1-1-3aca3f771fbd...
--- Michal Luczaj (3): vsock/bpf: Fix EINTR connect() racing sockmap update selftest/bpf: Add test for AF_VSOCK connect() racing sockmap update vsock/bpf: Fix bpf recvmsg() racing transport reassignment
net/vmw_vsock/af_vsock.c | 10 ++- net/vmw_vsock/vsock_bpf.c | 24 ++++-- .../selftests/bpf/prog_tests/sockmap_basic.c | 97 ++++++++++++++++++++++ 3 files changed, 122 insertions(+), 9 deletions(-) --- base-commit: da9e8efe7ee10e8425dc356a9fc593502c8e3933 change-id: 20250305-vsock-trans-signal-race-d62f7718d099
Best regards,
Signal delivery during connect() may result in a disconnect of an already TCP_ESTABLISHED socket. Problem is that such established socket might have been placed in a sockmap before the connection was closed. We end up with a SS_UNCONNECTED vsock in a sockmap. And this, combined with the ability to reassign (unconnected) vsock's transport to NULL, breaks the sockmap contract. As manifested by WARN_ON_ONCE.
connect / state = SS_CONNECTED / sock_map_update_elem if signal_pending state = SS_UNCONNECTED
connect transport = NULL vsock_bpf_recvmsg WARN_ON_ONCE(!vsk->transport)
Ensure the socket does not stay in sockmap.
WARNING: CPU: 8 PID: 1228 at net/vmw_vsock/vsock_bpf.c:97 vsock_bpf_recvmsg+0xb43/0xe00 CPU: 8 UID: 0 PID: 1228 Comm: a.out Not tainted 6.14.0-rc5+ RIP: 0010:vsock_bpf_recvmsg+0xb43/0xe00 sock_recvmsg+0x1b2/0x220 __sys_recvfrom+0x190/0x270 __x64_sys_recvfrom+0xdc/0x1b0 do_syscall_64+0x93/0x1b0 entry_SYSCALL_64_after_hwframe+0x76/0x7e
Fixes: 634f1a7110b4 ("vsock: support sockmap") Reviewed-by: Stefano Garzarella sgarzare@redhat.com Signed-off-by: Michal Luczaj mhal@rbox.co --- net/vmw_vsock/af_vsock.c | 10 +++++++++- net/vmw_vsock/vsock_bpf.c | 1 + 2 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 7e3db87ae4333cf63327ec105ca99253569bb9fe..81b1b8e9c946a646778367ab78ca180cef75ef72 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1579,7 +1579,15 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr,
if (signal_pending(current)) { err = sock_intr_errno(timeout); - sk->sk_state = sk->sk_state == TCP_ESTABLISHED ? TCP_CLOSING : TCP_CLOSE; + if (sk->sk_state == TCP_ESTABLISHED) { + /* Might have raced with a sockmap update. */ + if (sk->sk_prot->unhash) + sk->sk_prot->unhash(sk); + + sk->sk_state = TCP_CLOSING; + } else { + sk->sk_state = TCP_CLOSE; + } sock->state = SS_UNCONNECTED; vsock_transport_cancel_pkt(vsk); vsock_remove_connected(vsk); diff --git a/net/vmw_vsock/vsock_bpf.c b/net/vmw_vsock/vsock_bpf.c index 07b96d56f3a577af71021b1b8132743554996c4f..c68fdaf09046b68254dac3ea70ffbe73dfa45cef 100644 --- a/net/vmw_vsock/vsock_bpf.c +++ b/net/vmw_vsock/vsock_bpf.c @@ -127,6 +127,7 @@ static void vsock_bpf_rebuild_protos(struct proto *prot, const struct proto *bas { *prot = *base; prot->close = sock_map_close; + prot->unhash = sock_map_unhash; prot->recvmsg = vsock_bpf_recvmsg; prot->sock_is_readable = sk_msg_is_readable; }
Racing signal-interrupted connect() and sockmap update may result in an unconnected (and missing vsock transport) socket in a sockmap.
Test spends 2 seconds attempting to reach WARN_ON_ONCE().
connect / state = SS_CONNECTED / sock_map_update_elem if signal_pending state = SS_UNCONNECTED
connect transport = NULL vsock_bpf_recvmsg WARN_ON_ONCE(!vsk->transport)
Signed-off-by: Michal Luczaj mhal@rbox.co --- .../selftests/bpf/prog_tests/sockmap_basic.c | 97 ++++++++++++++++++++++ 1 file changed, 97 insertions(+)
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 1e3e4392dcca0e1722c1982ecc649a80c27443b2..d9cd20d1fbc25cc56d37f06522c5b805004fa884 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -3,6 +3,7 @@ #include <error.h> #include <netinet/tcp.h> #include <sys/epoll.h> +#include <linux/time64.h>
#include "test_progs.h" #include "test_skmsg_load_helpers.skel.h" @@ -1042,6 +1043,100 @@ static void test_sockmap_vsock_unconnected(void) xclose(map); }
+#define CONNECT_SIGNAL_RACE_TIMEOUT 2 /* seconds */ + +static void sig_handler(int signum) +{ + /* nop */ +} + +static void connect_signal_racer_cleanup(void *map) +{ + xclose(*(int *)map); +} + +static void *connect_signal_racer(void *arg) +{ + int map; + + map = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(int), + sizeof(int), 1, NULL); + if (!ASSERT_OK_FD(map, "bpf_map_create")) + return NULL; + + pthread_cleanup_push(connect_signal_racer_cleanup, &map); + + for (;;) { + int c = *(int *)arg; + int zero = 0; + + (void)bpf_map_update_elem(map, &zero, &c, BPF_ANY); + + if (kill(0, SIGUSR1)) { + FAIL_ERRNO("kill"); + break; + } + + if ((recv(c, NULL, 0, MSG_DONTWAIT) < 0) && errno == ENODEV) { + FAIL_ERRNO("recv"); + break; + } + } + + pthread_cleanup_pop(1); + + return NULL; +} + +static void test_sockmap_vsock_connect_signal_race(void) +{ + struct sockaddr_vm addr, bad_addr; + socklen_t alen = sizeof(addr); + sighandler_t orig_handler; + pthread_t thread; + int s, c, p; + __u64 tout; + + orig_handler = signal(SIGUSR1, sig_handler); + if (!ASSERT_NEQ(orig_handler, SIG_ERR, "signal handler setup")) + return; + + s = socket_loopback(AF_VSOCK, SOCK_SEQPACKET | SOCK_NONBLOCK); + if (s < 0) + goto restore; + + if (xgetsockname(s, (struct sockaddr *)&addr, &alen)) + goto close; + + bad_addr = addr; + bad_addr.svm_cid = 0x42424242; /* non-existing */ + + if (xpthread_create(&thread, 0, connect_signal_racer, &c)) + goto close; + + tout = get_time_ns() + CONNECT_SIGNAL_RACE_TIMEOUT * NSEC_PER_SEC; + do { + c = xsocket(AF_VSOCK, SOCK_SEQPACKET, 0); + if (c < 0) + break; + + if (connect(c, (struct sockaddr *)&addr, alen) && errno == EINTR) + (void)connect(c, (struct sockaddr *)&bad_addr, alen); + + xclose(c); + p = accept(s, NULL, NULL); + if (p >= 0) + xclose(p); + } while (get_time_ns() < tout); + + ASSERT_OK(pthread_cancel(thread), "pthread_cancel"); + xpthread_join(thread, NULL); +close: + xclose(s); +restore: + ASSERT_NEQ(signal(SIGUSR1, orig_handler), SIG_ERR, "handler restore"); +} + void test_sockmap_basic(void) { if (test__start_subtest("sockmap create_update_free")) @@ -1108,4 +1203,6 @@ void test_sockmap_basic(void) test_sockmap_skb_verdict_vsock_poll(); if (test__start_subtest("sockmap vsock unconnected")) test_sockmap_vsock_unconnected(); + if (test__start_subtest("sockmap vsock connect signal race")) + test_sockmap_vsock_connect_signal_race(); }
On 3/16/25 11:45 PM, Michal Luczaj wrote:
Racing signal-interrupted connect() and sockmap update may result in an unconnected (and missing vsock transport) socket in a sockmap.
Test spends 2 seconds attempting to reach WARN_ON_ONCE().
connect / state = SS_CONNECTED / sock_map_update_elem if signal_pending state = SS_UNCONNECTED
connect transport = NULL vsock_bpf_recvmsg WARN_ON_ONCE(!vsk->transport)
Signed-off-by: Michal Luczaj mhal@rbox.co
This is apparently causing some bpf self-test failure. (Timeout? the self-test failure output is not clear to me.)
Could you please have a look?
Thanks!
Paolo
On 3/17/25 09:23, Paolo Abeni wrote:
On 3/16/25 11:45 PM, Michal Luczaj wrote:
Racing signal-interrupted connect() and sockmap update may result in an unconnected (and missing vsock transport) socket in a sockmap.
Test spends 2 seconds attempting to reach WARN_ON_ONCE().
connect / state = SS_CONNECTED / sock_map_update_elem if signal_pending state = SS_UNCONNECTED
connect transport = NULL vsock_bpf_recvmsg WARN_ON_ONCE(!vsk->transport)
Signed-off-by: Michal Luczaj mhal@rbox.co
This is apparently causing some bpf self-test failure. (Timeout? the self-test failure output is not clear to me.)
Could you please have a look?
Sending signal to the whole process group probably isn't the best idea. Not sure how the previous version passed though. Sorry, v4 incoming.
Michal
Signal delivery during connect() may lead to a disconnect of an already established socket. That involves removing socket from any sockmap and resetting state to SS_UNCONNECTED. While it correctly restores socket's proto, a call to vsock_bpf_recvmsg() might have been already under way in another thread. If the connect()ing thread reassigns the vsock transport to NULL, the recvmsg()ing thread may trigger a WARN_ON_ONCE.
connect / state = SS_CONNECTED / sock_map_update_elem vsock_bpf_recvmsg psock = sk_psock_get() lock sk if signal_pending unhash sock_map_remove_links state = SS_UNCONNECTED release sk
connect transport = NULL lock sk WARN_ON_ONCE(!vsk->transport)
Protect recvmsg() from racing against transport reassignment. Enforce the sockmap invariant that psock implies transport: lock socket before getting psock.
WARNING: CPU: 9 PID: 1222 at net/vmw_vsock/vsock_bpf.c:92 vsock_bpf_recvmsg+0xb55/0xe00 CPU: 9 UID: 0 PID: 1222 Comm: a.out Not tainted 6.14.0-rc5+ RIP: 0010:vsock_bpf_recvmsg+0xb55/0xe00 sock_recvmsg+0x1b2/0x220 __sys_recvfrom+0x190/0x270 __x64_sys_recvfrom+0xdc/0x1b0 do_syscall_64+0x93/0x1b0 entry_SYSCALL_64_after_hwframe+0x76/0x7e
Fixes: 634f1a7110b4 ("vsock: support sockmap") Signed-off-by: Michal Luczaj mhal@rbox.co --- net/vmw_vsock/vsock_bpf.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-)
diff --git a/net/vmw_vsock/vsock_bpf.c b/net/vmw_vsock/vsock_bpf.c index c68fdaf09046b68254dac3ea70ffbe73dfa45cef..5138195d91fb258d4bc09b48e80e13651d62863a 100644 --- a/net/vmw_vsock/vsock_bpf.c +++ b/net/vmw_vsock/vsock_bpf.c @@ -73,28 +73,35 @@ static int __vsock_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int return err; }
-static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg, - size_t len, int flags, int *addr_len) +static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int flags, int *addr_len) { struct sk_psock *psock; struct vsock_sock *vsk; int copied;
+ /* Since signal delivery during connect() may reset the state of socket + * that's already in a sockmap, take the lock before checking on psock. + * This serializes a possible transport reassignment, protecting this + * function from running with NULL transport. + */ + lock_sock(sk); + psock = sk_psock_get(sk); - if (unlikely(!psock)) + if (unlikely(!psock)) { + release_sock(sk); return __vsock_recvmsg(sk, msg, len, flags); + }
- lock_sock(sk); vsk = vsock_sk(sk); - if (WARN_ON_ONCE(!vsk->transport)) { copied = -ENODEV; goto out; }
if (vsock_has_data(sk, psock) && sk_psock_queue_empty(psock)) { - release_sock(sk); sk_psock_put(sk, psock); + release_sock(sk); return __vsock_recvmsg(sk, msg, len, flags); }
@@ -108,8 +115,8 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg, }
if (sk_psock_queue_empty(psock)) { - release_sock(sk); sk_psock_put(sk, psock); + release_sock(sk); return __vsock_recvmsg(sk, msg, len, flags); }
@@ -117,8 +124,8 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg, }
out: - release_sock(sk); sk_psock_put(sk, psock); + release_sock(sk);
return copied; }
linux-kselftest-mirror@lists.linaro.org