6.1-stable review patch. If anyone has any objections, please let me know.
------------------
From: Eric Dumazet edumazet@google.com
[ Upstream commit 5e514f1cba090e1c8fff03e92a175eccfe46305f ]
tcp_reset() ends with a sequence that is carefuly ordered.
We need to fix [e]poll bugs in the following patches, it makes sense to use a common helper.
Suggested-by: Neal Cardwell ncardwell@google.com Signed-off-by: Eric Dumazet edumazet@google.com Acked-by: Neal Cardwell ncardwell@google.com Link: https://lore.kernel.org/r/20240528125253.1966136-2-edumazet@google.com Signed-off-by: Jakub Kicinski kuba@kernel.org Stable-dep-of: 853c3bd7b791 ("tcp: fix race in tcp_write_err()") Signed-off-by: Sasha Levin sashal@kernel.org --- include/net/tcp.h | 1 + net/ipv4/tcp.c | 2 +- net/ipv4/tcp_input.c | 32 +++++++++++++++++++++----------- 3 files changed, 23 insertions(+), 12 deletions(-)
diff --git a/include/net/tcp.h b/include/net/tcp.h index 8ea1fba84eff9..cc314c383c532 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -633,6 +633,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb, /* tcp_input.c */ void tcp_rearm_rto(struct sock *sk); void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req); +void tcp_done_with_error(struct sock *sk, int err); void tcp_reset(struct sock *sk, struct sk_buff *skb); void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb); void tcp_fin(struct sock *sk); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7e162a34baec2..cb79919323a62 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -589,7 +589,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) */ mask |= EPOLLOUT | EPOLLWRNORM; } - /* This barrier is coupled with smp_wmb() in tcp_reset() */ + /* This barrier is coupled with smp_wmb() in tcp_done_with_error() */ smp_rmb(); if (READ_ONCE(sk->sk_err) || !skb_queue_empty_lockless(&sk->sk_error_queue)) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d72255e5262b3..b1b4f44d21370 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4356,9 +4356,26 @@ static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq) !after(seq, tp->rcv_nxt + tcp_receive_window(tp)); }
+ +void tcp_done_with_error(struct sock *sk, int err) +{ + /* This barrier is coupled with smp_rmb() in tcp_poll() */ + WRITE_ONCE(sk->sk_err, err); + smp_wmb(); + + tcp_write_queue_purge(sk); + tcp_done(sk); + + if (!sock_flag(sk, SOCK_DEAD)) + sk_error_report(sk); +} +EXPORT_SYMBOL(tcp_done_with_error); + /* When we get a reset we do this. */ void tcp_reset(struct sock *sk, struct sk_buff *skb) { + int err; + trace_tcp_receive_reset(sk);
/* mptcp can't tell us to ignore reset pkts, @@ -4370,24 +4387,17 @@ void tcp_reset(struct sock *sk, struct sk_buff *skb) /* We want the right error as BSD sees it (and indeed as we do). */ switch (sk->sk_state) { case TCP_SYN_SENT: - WRITE_ONCE(sk->sk_err, ECONNREFUSED); + err = ECONNREFUSED; break; case TCP_CLOSE_WAIT: - WRITE_ONCE(sk->sk_err, EPIPE); + err = EPIPE; break; case TCP_CLOSE: return; default: - WRITE_ONCE(sk->sk_err, ECONNRESET); + err = ECONNRESET; } - /* This barrier is coupled with smp_rmb() in tcp_poll() */ - smp_wmb(); - - tcp_write_queue_purge(sk); - tcp_done(sk); - - if (!sock_flag(sk, SOCK_DEAD)) - sk_error_report(sk); + tcp_done_with_error(sk, err); }
/*