From: Chia-Yu Chang chia-yu.chang@nokia-bell-labs.com
Based on specification: https://tools.ietf.org/id/draft-ietf-tcpm-accurate-ecn-28.txt
Based on Section 3.1.5 of AccECN spec (RFC9768), a TCP Server in AccECN mode MUST NOT set ECT on any packet for the rest of the connection, if it has received or sent at least one valid SYN or Acceptable SYN/ACK with (AE,CWR,ECE) = (0,0,0) during the handshake.
In addition, a host in AccECN mode that is feeding back the IP-ECN field on a SYN or SYN/ACK MUST feed back the IP-ECN field on the latest valid SYN or acceptable SYN/ACK to arrive.
Signed-off-by: Chia-Yu Chang chia-yu.chang@nokia-bell-labs.com
--- v6: - Do not cast const struct request_sock into struct request_sock - Set tcp_accecn_fail_mode after calling tcp_rtx_synack(). --- net/ipv4/inet_connection_sock.c | 4 ++++ net/ipv4/tcp_input.c | 2 ++ net/ipv4/tcp_minisocks.c | 39 ++++++++++++++++++++++++--------- net/ipv4/tcp_output.c | 3 ++- net/ipv4/tcp_timer.c | 3 +++ 5 files changed, 40 insertions(+), 11 deletions(-)
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index b4eae731c9ba..ea5fdbf05b05 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -20,6 +20,7 @@ #include <net/tcp_states.h> #include <net/xfrm.h> #include <net/tcp.h> +#include <net/tcp_ecn.h> #include <net/sock_reuseport.h> #include <net/addrconf.h>
@@ -1103,6 +1104,9 @@ static void reqsk_timer_handler(struct timer_list *t) (!resend || !tcp_rtx_synack(sk_listener, req) || inet_rsk(req)->acked)) { + if (req->num_retrans > 1 && tcp_rsk(req)->accecn_ok) + tcp_accecn_fail_mode_set(tcp_sk(sk_listener), + TCP_ACCECN_ACE_FAIL_SEND); if (req->num_timeout++ == 0) atomic_dec(&queue->young); mod_timer(&req->rsk_timer, jiffies + tcp_reqsk_timeout(req)); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 1f354d3cf26a..7638aaa8befb 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6230,6 +6230,8 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, if (th->syn) { if (tcp_ecn_mode_accecn(tp)) { accecn_reflector = true; + tp->syn_ect_rcv = TCP_SKB_CB(skb)->ip_dsfield & + INET_ECN_MASK; if (tp->rx_opt.accecn && tp->saw_accecn_opt < TCP_ACCECN_OPT_COUNTER_SEEN) { u8 saw_opt = tcp_accecn_option_init(skb, tp->rx_opt.accecn); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 545d3ba0adcf..e6cbd4317fd5 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -750,16 +750,35 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, */ if (!tcp_oow_rate_limited(sock_net(sk), skb, LINUX_MIB_TCPACKSKIPPEDSYNRECV, - &tcp_rsk(req)->last_oow_ack_time) && - - !tcp_rtx_synack(sk, req)) { - unsigned long expires = jiffies; - - expires += tcp_reqsk_timeout(req); - if (!fastopen) - mod_timer_pending(&req->rsk_timer, expires); - else - req->rsk_timer.expires = expires; + &tcp_rsk(req)->last_oow_ack_time)) { + if (tcp_rsk(req)->accecn_ok) { + u8 ect_rcv = TCP_SKB_CB(skb)->ip_dsfield & + INET_ECN_MASK; + + tcp_rsk(req)->syn_ect_rcv = ect_rcv; + if (tcp_accecn_ace(tcp_hdr(skb)) == 0x0) { + u8 fail_mode = TCP_ACCECN_ACE_FAIL_RECV; + + tcp_accecn_fail_mode_set(tcp_sk(sk), + fail_mode); + } + } + if (!tcp_rtx_synack(sk, req)) { + u8 fail_mode = TCP_ACCECN_ACE_FAIL_SEND; + unsigned long expires = jiffies; + + if (req->num_retrans > 1 && + tcp_rsk(req)->accecn_ok) + tcp_accecn_fail_mode_set(tcp_sk(sk), + fail_mode); + + expires += tcp_reqsk_timeout(req); + if (!fastopen) + mod_timer_pending(&req->rsk_timer, + expires); + else + req->rsk_timer.expires = expires; + } } return NULL; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index c6754854ad09..9489cda7322e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -334,7 +334,8 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb, return;
if (tcp_ecn_mode_accecn(tp)) { - if (!tcp_accecn_ace_fail_recv(tp)) + if (!tcp_accecn_ace_fail_recv(tp) && + !tcp_accecn_ace_fail_send(tp)) INET_ECN_xmit(sk); tcp_accecn_set_ace(tp, skb, th); skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ACCECN; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 0672c3d8f4f1..58b9d8af4321 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -22,6 +22,7 @@ #include <linux/module.h> #include <linux/gfp.h> #include <net/tcp.h> +#include <net/tcp_ecn.h> #include <net/rstreason.h>
static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk) @@ -479,6 +480,8 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req) * it's not good to give up too easily. */ tcp_rtx_synack(sk, req); + if (req->num_retrans > 1 && tcp_rsk(req)->accecn_ok) + tcp_accecn_fail_mode_set(tcp_sk(sk), TCP_ACCECN_ACE_FAIL_SEND); req->num_timeout++; tcp_update_rto_stats(sk); if (!tp->retrans_stamp)