TLS expects that it owns the receive queue of the TCP socket. This cannot be guaranteed in case the reader of the TCP socket entered before the TLS ULP was installed, or uses some non-standard read API (eg. zerocopy ones). Replace the WARN_ON() and a buggy early exit (which leaves anchor pointing to a freed skb) with real error handling. Wipe the parsing state and tell the reader to retry.
We already reload the anchor every time we (re)acquire the socket lock, so the only condition we need to avoid is an out of bounds read (not having enough bytes in the socket for previously parsed record len).
If some data was read from under TLS but there's enough in the queue we'll reload and decrypt what is most likely not a valid TLS record. Leading to some undefined behavior from TLS perspective (corrupting a stream? missing an alert? missing an attack?) but no kernel crash should take place.
Reported-by: William Liu will@willsroot.io Reported-by: Savino Dicanosa savy@syst3mfailure.io Link: https://lore.kernel.org/tFjq_kf7sWIG3A7CrCg_egb8CVsT_gsmHAK0_wxDPJXfIzxFAMxq... Fixes: 84c61fe1a75b ("tls: rx: do not use the standard strparser") Signed-off-by: Jakub Kicinski kuba@kernel.org --- v2: - fix the reporter tags - drop the copied_seq nonsense, just correct the error handling v1: https://lore.kernel.org/20250806180510.3656677-1-kuba@kernel.org --- net/tls/tls.h | 2 +- net/tls/tls_strp.c | 11 ++++++++--- net/tls/tls_sw.c | 3 ++- 3 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/net/tls/tls.h b/net/tls/tls.h index 774859b63f0d..4e077068e6d9 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -196,7 +196,7 @@ void tls_strp_msg_done(struct tls_strparser *strp); int tls_rx_msg_size(struct tls_strparser *strp, struct sk_buff *skb); void tls_rx_msg_ready(struct tls_strparser *strp);
-void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh); +bool tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh); int tls_strp_msg_cow(struct tls_sw_context_rx *ctx); struct sk_buff *tls_strp_msg_detach(struct tls_sw_context_rx *ctx); int tls_strp_msg_hold(struct tls_strparser *strp, struct sk_buff_head *dst); diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c index 095cf31bae0b..d71643b494a1 100644 --- a/net/tls/tls_strp.c +++ b/net/tls/tls_strp.c @@ -475,7 +475,7 @@ static void tls_strp_load_anchor_with_queue(struct tls_strparser *strp, int len) strp->stm.offset = offset; }
-void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh) +bool tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh) { struct strp_msg *rxm; struct tls_msg *tlm; @@ -484,8 +484,11 @@ void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh) DEBUG_NET_WARN_ON_ONCE(!strp->stm.full_len);
if (!strp->copy_mode && force_refresh) { - if (WARN_ON(tcp_inq(strp->sk) < strp->stm.full_len)) - return; + if (unlikely(tcp_inq(strp->sk) < strp->stm.full_len)) { + WRITE_ONCE(strp->msg_ready, 0); + memset(&strp->stm, 0, sizeof(strp->stm)); + return false; + }
tls_strp_load_anchor_with_queue(strp, strp->stm.full_len); } @@ -495,6 +498,8 @@ void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh) rxm->offset = strp->stm.offset; tlm = tls_msg(strp->anchor); tlm->control = strp->mark; + + return true; }
/* Called with lock held on lower socket */ diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 549d1ea01a72..51c98a007dda 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1384,7 +1384,8 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, return sock_intr_errno(timeo); }
- tls_strp_msg_load(&ctx->strp, released); + if (unlikely(!tls_strp_msg_load(&ctx->strp, released))) + return tls_rx_rec_wait(sk, psock, nonblock, false);
return 1; }
Check a race where data disappears from the TCP socket after TLS signaled that its ready to receive.
ok 6 global.data_steal # RUN tls_basic.base_base ... # OK tls_basic.base_base
Signed-off-by: Jakub Kicinski kuba@kernel.org --- tools/testing/selftests/net/tls.c | 63 +++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+)
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 5ded3b3a7538..d8cfcf9bb825 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -2708,6 +2708,69 @@ TEST(prequeue) { close(cfd); }
+TEST(data_steal) { + struct tls_crypto_info_keys tls; + char buf[20000], buf2[20000]; + struct sockaddr_in addr; + int sfd, cfd, ret, fd; + int pid, status; + socklen_t len; + + len = sizeof(addr); + memrnd(buf, sizeof(buf)); + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls, 0); + + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(INADDR_ANY); + addr.sin_port = 0; + + fd = socket(AF_INET, SOCK_STREAM, 0); + sfd = socket(AF_INET, SOCK_STREAM, 0); + + ASSERT_EQ(bind(sfd, &addr, sizeof(addr)), 0); + ASSERT_EQ(listen(sfd, 10), 0); + ASSERT_EQ(getsockname(sfd, &addr, &len), 0); + ASSERT_EQ(connect(fd, &addr, sizeof(addr)), 0); + ASSERT_GE(cfd = accept(sfd, &addr, &len), 0); + close(sfd); + + ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); + if (ret) { + ASSERT_EQ(errno, ENOENT); + SKIP(return, "no TLS support"); + } + ASSERT_EQ(setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")), 0); + + /* Spawn a child and get it into the read wait path of the underlying + * TCP socket. + */ + pid = fork(); + ASSERT_GE(pid, 0); + if (!pid) { + EXPECT_EQ(recv(cfd, buf, sizeof(buf), MSG_WAITALL), + sizeof(buf)); + exit(!__test_passed(_metadata)); + } + + usleep(2000); + ASSERT_EQ(setsockopt(fd, SOL_TLS, TLS_TX, &tls, tls.len), 0); + ASSERT_EQ(setsockopt(cfd, SOL_TLS, TLS_RX, &tls, tls.len), 0); + + EXPECT_EQ(send(fd, buf, sizeof(buf), 0), sizeof(buf)); + usleep(2000); + EXPECT_EQ(recv(cfd, buf2, sizeof(buf2), MSG_DONTWAIT), -1); + /* Don't check errno, the error will be different depending + * on what random bytes TLS interpreted as the record length. + */ + + close(fd); + close(cfd); + + EXPECT_EQ(wait(&status), pid); + EXPECT_EQ(status, 0); +} + static void __attribute__((constructor)) fips_check(void) { int res; FILE *f;
On Thu, Aug 7, 2025 at 4:29 PM Jakub Kicinski kuba@kernel.org wrote:
Check a race where data disappears from the TCP socket after TLS signaled that its ready to receive.
ok 6 global.data_steal # RUN tls_basic.base_base ... # OK tls_basic.base_base
Signed-off-by: Jakub Kicinski kuba@kernel.org
Reviewed-by: Eric Dumazet edumazet@google.com
On Thu, Aug 7, 2025 at 4:29 PM Jakub Kicinski kuba@kernel.org wrote:
TLS expects that it owns the receive queue of the TCP socket. This cannot be guaranteed in case the reader of the TCP socket entered before the TLS ULP was installed, or uses some non-standard read API (eg. zerocopy ones). Replace the WARN_ON() and a buggy early exit (which leaves anchor pointing to a freed skb) with real error handling. Wipe the parsing state and tell the reader to retry.
We already reload the anchor every time we (re)acquire the socket lock, so the only condition we need to avoid is an out of bounds read (not having enough bytes in the socket for previously parsed record len).
If some data was read from under TLS but there's enough in the queue we'll reload and decrypt what is most likely not a valid TLS record. Leading to some undefined behavior from TLS perspective (corrupting a stream? missing an alert? missing an attack?) but no kernel crash should take place.
Reported-by: William Liu will@willsroot.io Reported-by: Savino Dicanosa savy@syst3mfailure.io Link: https://lore.kernel.org/tFjq_kf7sWIG3A7CrCg_egb8CVsT_gsmHAK0_wxDPJXfIzxFAMxq... Fixes: 84c61fe1a75b ("tls: rx: do not use the standard strparser") Signed-off-by: Jakub Kicinski kuba@kernel.org
v2:
- fix the reporter tags
- drop the copied_seq nonsense, just correct the error handling
v1: https://lore.kernel.org/20250806180510.3656677-1-kuba@kernel.org
Reviewed-by: Eric Dumazet edumazet@google.com
On 8/8/25 1:29 AM, Jakub Kicinski wrote:
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 549d1ea01a72..51c98a007dda 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1384,7 +1384,8 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, return sock_intr_errno(timeo); }
- tls_strp_msg_load(&ctx->strp, released);
- if (unlikely(!tls_strp_msg_load(&ctx->strp, released)))
return tls_rx_rec_wait(sk, psock, nonblock, false);
I'm probably missing something relevant, but I don't see anything preventing the above recursion from going very deep and cause stack overflow.
Perhaps something alike:
released = false; goto <function start>
would be safer?
/P
On Tue, 12 Aug 2025 12:45:56 +0200 Paolo Abeni wrote:
On 8/8/25 1:29 AM, Jakub Kicinski wrote:
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 549d1ea01a72..51c98a007dda 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1384,7 +1384,8 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, return sock_intr_errno(timeo); }
- tls_strp_msg_load(&ctx->strp, released);
- if (unlikely(!tls_strp_msg_load(&ctx->strp, released)))
return tls_rx_rec_wait(sk, psock, nonblock, false);
I'm probably missing something relevant, but I don't see anything preventing the above recursion from going very deep and cause stack overflow.
Perhaps something alike:
released = false; goto <function start>
would be safer?
It's a tail call to the same function, the compiler should do that for us automatically. Can we not trust the compiler to be sensible? Both clang and gcc get it right.
On 8/12/25 3:28 PM, Jakub Kicinski wrote:
On Tue, 12 Aug 2025 12:45:56 +0200 Paolo Abeni wrote:
On 8/8/25 1:29 AM, Jakub Kicinski wrote:
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 549d1ea01a72..51c98a007dda 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1384,7 +1384,8 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, return sock_intr_errno(timeo); }
- tls_strp_msg_load(&ctx->strp, released);
- if (unlikely(!tls_strp_msg_load(&ctx->strp, released)))
return tls_rx_rec_wait(sk, psock, nonblock, false);
I'm probably missing something relevant, but I don't see anything preventing the above recursion from going very deep and cause stack overflow.
Perhaps something alike:
released = false; goto <function start>
would be safer?
It's a tail call to the same function, the compiler should do that for us automatically. Can we not trust the compiler to be sensible? Both clang and gcc get it right.
Sound reasonable, I dumbly did not consider it. I'm fine with the patch in the current form.
/P
Hello:
This series was applied to netdev/net.git (main) by Jakub Kicinski kuba@kernel.org:
On Thu, 7 Aug 2025 16:29:06 -0700 you wrote:
TLS expects that it owns the receive queue of the TCP socket. This cannot be guaranteed in case the reader of the TCP socket entered before the TLS ULP was installed, or uses some non-standard read API (eg. zerocopy ones). Replace the WARN_ON() and a buggy early exit (which leaves anchor pointing to a freed skb) with real error handling. Wipe the parsing state and tell the reader to retry.
[...]
Here is the summary with links: - [net,v2,1/2] tls: handle data disappearing from under the TLS ULP https://git.kernel.org/netdev/net/c/6db015fc4b5d - [net,v2,2/2] selftests: tls: test TCP stealing data from under the TLS socket https://git.kernel.org/netdev/net/c/d7e82594a45c
You are awesome, thank you!
linux-kselftest-mirror@lists.linaro.org