From: Kevin Yang yyd@google.com
[ Upstream commit c11204c78d6966c5bda6dd05c3ac5cbb193f93e3 ]
This code fix a bug that sk->sk_txrehash gets its default enable value from sysctl_txrehash only when the socket is a TCP listener.
We should have sysctl_txrehash to set the default sk->sk_txrehash, no matter TCP, nor listerner/connector.
Tested by following packetdrill: 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 socket(..., SOCK_DGRAM, IPPROTO_UDP) = 4 // SO_TXREHASH == 74, default to sysctl_txrehash == 1 +0 getsockopt(3, SOL_SOCKET, 74, [1], [4]) = 0 +0 getsockopt(4, SOL_SOCKET, 74, [1], [4]) = 0
Fixes: 26859240e4ee ("txhash: Add socket option to control TX hash rethink behavior") Signed-off-by: Kevin Yang yyd@google.com Signed-off-by: Eric Dumazet edumazet@google.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- net/core/sock.c | 3 ++- net/ipv4/af_inet.c | 1 + net/ipv4/inet_connection_sock.c | 3 --- net/ipv6/af_inet6.c | 1 + 4 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/net/core/sock.c b/net/core/sock.c index 30407b2dd2ac4..ba6ea61b3458b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1524,6 +1524,8 @@ int sk_setsockopt(struct sock *sk, int level, int optname, ret = -EINVAL; break; } + if ((u8)val == SOCK_TXREHASH_DEFAULT) + val = READ_ONCE(sock_net(sk)->core.sysctl_txrehash); /* Paired with READ_ONCE() in tcp_rtx_synack() */ WRITE_ONCE(sk->sk_txrehash, (u8)val); break; @@ -3428,7 +3430,6 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_pacing_rate = ~0UL; WRITE_ONCE(sk->sk_pacing_shift, 10); sk->sk_incoming_cpu = -1; - sk->sk_txrehash = SOCK_TXREHASH_DEFAULT;
sk_rx_queue_clear(sk); /* diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 92d4237862518..5b19b77d5d759 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -347,6 +347,7 @@ static int inet_create(struct net *net, struct socket *sock, int protocol, sk->sk_destruct = inet_sock_destruct; sk->sk_protocol = protocol; sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; + sk->sk_txrehash = READ_ONCE(net->core.sysctl_txrehash);
inet->uc_ttl = -1; inet->mc_loop = 1; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 647b3c6b575ef..7152ede18f115 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1225,9 +1225,6 @@ int inet_csk_listen_start(struct sock *sk) sk->sk_ack_backlog = 0; inet_csk_delack_init(sk);
- if (sk->sk_txrehash == SOCK_TXREHASH_DEFAULT) - sk->sk_txrehash = READ_ONCE(sock_net(sk)->core.sysctl_txrehash); - /* There is race window here: we announce ourselves listening, * but this transition is still not validated by get_port(). * It is OK, because this socket enters to hash table only diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 7b0cd54da452b..fb1bf6eb0ff8e 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -221,6 +221,7 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol, np->pmtudisc = IPV6_PMTUDISC_WANT; np->repflow = net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ESTABLISHED; sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; + sk->sk_txrehash = READ_ONCE(net->core.sysctl_txrehash);
/* Init the ipv4 part of the socket since we can have sockets * using v6 API for ipv4.