The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to stable@vger.kernel.org.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x 57fc0f1ceaa4016354cf6f88533e20b56190e41a # <resolve conflicts, build, test, etc.> git commit -s git send-email --to 'stable@vger.kernel.org' --in-reply-to '2023062330-scrawny-capture-257c@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 57fc0f1ceaa4016354cf6f88533e20b56190e41a Mon Sep 17 00:00:00 2001 From: Paolo Abeni pabeni@redhat.com Date: Tue, 20 Jun 2023 18:24:23 +0200 Subject: [PATCH] mptcp: ensure listener is unhashed before updating the sk status
The MPTCP protocol access the listener subflow in a lockless manner in a couple of places (poll, diag). That works only if the msk itself leaves the listener status only after that the subflow itself has been closed/disconnected. Otherwise we risk deadlock in diag, as reported by Christoph.
Address the issue ensuring that the first subflow (the listener one) is always disconnected before updating the msk socket status.
Reported-by: Christoph Paasch cpaasch@apple.com Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/407 Fixes: b29fcfb54cd7 ("mptcp: full disconnect implementation") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni pabeni@redhat.com Reviewed-by: Matthieu Baerts matthieu.baerts@tessares.net Signed-off-by: Matthieu Baerts matthieu.baerts@tessares.net Signed-off-by: Jakub Kicinski kuba@kernel.org
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 59f8f3124855..1224dfca5bf3 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1047,6 +1047,7 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, if (err) return err;
+ inet_sk_state_store(newsk, TCP_LISTEN); err = kernel_listen(ssock, backlog); if (err) return err; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a66ec341485e..a6c7f2d24909 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2368,13 +2368,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, kfree_rcu(subflow, rcu); } else { /* otherwise tcp will dispose of the ssk and subflow ctx */ - if (ssk->sk_state == TCP_LISTEN) { - tcp_set_state(ssk, TCP_CLOSE); - mptcp_subflow_queue_clean(sk, ssk); - inet_csk_listen_stop(ssk); - mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED); - } - __tcp_close(ssk, 0);
/* close acquired an extra ref */ @@ -2902,10 +2895,24 @@ static __poll_t mptcp_check_readable(struct mptcp_sock *msk) return EPOLLIN | EPOLLRDNORM; }
-static void mptcp_listen_inuse_dec(struct sock *sk) +static void mptcp_check_listen_stop(struct sock *sk) { - if (inet_sk_state_load(sk) == TCP_LISTEN) - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + struct sock *ssk; + + if (inet_sk_state_load(sk) != TCP_LISTEN) + return; + + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + ssk = mptcp_sk(sk)->first; + if (WARN_ON_ONCE(!ssk || inet_sk_state_load(ssk) != TCP_LISTEN)) + return; + + lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); + mptcp_subflow_queue_clean(sk, ssk); + inet_csk_listen_stop(ssk); + mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED); + tcp_set_state(ssk, TCP_CLOSE); + release_sock(ssk); }
bool __mptcp_close(struct sock *sk, long timeout) @@ -2918,7 +2925,7 @@ bool __mptcp_close(struct sock *sk, long timeout) WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) { - mptcp_listen_inuse_dec(sk); + mptcp_check_listen_stop(sk); inet_sk_state_store(sk, TCP_CLOSE); goto cleanup; } @@ -3035,7 +3042,7 @@ static int mptcp_disconnect(struct sock *sk, int flags) if (msk->fastopening) return -EBUSY;
- mptcp_listen_inuse_dec(sk); + mptcp_check_listen_stop(sk); inet_sk_state_store(sk, TCP_CLOSE);
mptcp_stop_timer(sk);
From: Paolo Abeni pabeni@redhat.com
commit 57fc0f1ceaa4016354cf6f88533e20b56190e41a upstream.
The MPTCP protocol access the listener subflow in a lockless manner in a couple of places (poll, diag). That works only if the msk itself leaves the listener status only after that the subflow itself has been closed/disconnected. Otherwise we risk deadlock in diag, as reported by Christoph.
Address the issue ensuring that the first subflow (the listener one) is always disconnected before updating the msk socket status.
Reported-by: Christoph Paasch cpaasch@apple.com Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/407 Fixes: b29fcfb54cd7 ("mptcp: full disconnect implementation") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni pabeni@redhat.com Reviewed-by: Matthieu Baerts matthieu.baerts@tessares.net Signed-off-by: Matthieu Baerts matthieu.baerts@tessares.net Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Matthieu Baerts matthieu.baerts@tessares.net --- Conflicting with c558246ee73e ("mptcp: add statistics for mptcp socket in use"): I took the new modifications without the ones modifying the 'inuse' counters. It was on top of 786eda7a0c19 ("Linux 6.1.36-rc1") --- net/mptcp/pm_netlink.c | 1 + net/mptcp/protocol.c | 26 ++++++++++++++++++++------ 2 files changed, 21 insertions(+), 6 deletions(-)
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 01d34ee4525e..9127a7fd5269 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1039,6 +1039,7 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, return err; }
+ inet_sk_state_store(newsk, TCP_LISTEN); err = kernel_listen(ssock, backlog); if (err) { pr_warn("kernel_listen error, err=%d", err); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a2c6ce40e426..4ca61e80f4bb 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2400,12 +2400,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, kfree_rcu(subflow, rcu); } else { /* otherwise tcp will dispose of the ssk and subflow ctx */ - if (ssk->sk_state == TCP_LISTEN) { - tcp_set_state(ssk, TCP_CLOSE); - mptcp_subflow_queue_clean(sk, ssk); - inet_csk_listen_stop(ssk); - } - __tcp_close(ssk, 0);
/* close acquired an extra ref */ @@ -2939,6 +2933,24 @@ static __poll_t mptcp_check_readable(struct mptcp_sock *msk) return EPOLLIN | EPOLLRDNORM; }
+static void mptcp_check_listen_stop(struct sock *sk) +{ + struct sock *ssk; + + if (inet_sk_state_load(sk) != TCP_LISTEN) + return; + + ssk = mptcp_sk(sk)->first; + if (WARN_ON_ONCE(!ssk || inet_sk_state_load(ssk) != TCP_LISTEN)) + return; + + lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); + mptcp_subflow_queue_clean(sk, ssk); + inet_csk_listen_stop(ssk); + tcp_set_state(ssk, TCP_CLOSE); + release_sock(ssk); +} + bool __mptcp_close(struct sock *sk, long timeout) { struct mptcp_subflow_context *subflow; @@ -2949,6 +2961,7 @@ bool __mptcp_close(struct sock *sk, long timeout) WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) { + mptcp_check_listen_stop(sk); inet_sk_state_store(sk, TCP_CLOSE); goto cleanup; } @@ -3062,6 +3075,7 @@ static int mptcp_disconnect(struct sock *sk, int flags) if (msk->fastopening) return -EBUSY;
+ mptcp_check_listen_stop(sk); inet_sk_state_store(sk, TCP_CLOSE);
mptcp_stop_timer(sk);
linux-stable-mirror@lists.linaro.org