From: Chia-Yu Chang chia-yu.chang@nokia-bell-labs.com
For Accurate ECN, the first SYN/ACK sent by the TCP server shall set the ACE flag (see Table 1 of RFC9768) and the AccECN option to complete the capability negotiation. However, if the TCP server needs to retransmit such a SYN/ACK (for example, because it did not receive an ACK acknowledging its SYN/ACK, or received a second SYN requesting AccECN support), the TCP server retransmits the SYN/ACK without the AccECN option. This is because the SYN/ACK may be lost due to congestion, or a middlebox may block the AccECN option. Furthermore, if this retransmission also times out, to expedite connection establishment, the TCP server should retransmit the SYN/ACK with (AE,CWR,ECE) = (0,0,0) and without the AccECN option, while maintaining AccECN feedback mode.
This complies with Section 3.2.3.2.2 of the AccECN specification (RFC9768).
Signed-off-by: Chia-Yu Chang chia-yu.chang@nokia-bell-labs.com
--- v6: - Use new synack_type TCP_SYNACK_RETRANS and num_retrans. --- include/net/tcp_ecn.h | 20 ++++++++++++++------ net/ipv4/tcp_output.c | 4 ++-- 2 files changed, 16 insertions(+), 8 deletions(-)
diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h index a709fb1756eb..57841dfa6705 100644 --- a/include/net/tcp_ecn.h +++ b/include/net/tcp_ecn.h @@ -649,12 +649,20 @@ static inline void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb) }
static inline void -tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th) -{ - if (tcp_rsk(req)->accecn_ok) - tcp_accecn_echo_syn_ect(th, tcp_rsk(req)->syn_ect_rcv); - else if (inet_rsk(req)->ecn_ok) - th->ece = 1; +tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th, + enum tcp_synack_type synack_type) +{ + // num_retrans will be incresaed after tcp_ecn_make_synack() + if (!req->num_retrans) { + if (tcp_rsk(req)->accecn_ok) + tcp_accecn_echo_syn_ect(th, tcp_rsk(req)->syn_ect_rcv); + else if (inet_rsk(req)->ecn_ok) + th->ece = 1; + } else if (tcp_rsk(req)->accecn_ok) { + th->ae = 0; + th->cwr = 0; + th->ece = 0; + } }
static inline bool tcp_accecn_option_beacon_check(const struct sock *sk) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5fa14a73d03f..c6754854ad09 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1106,7 +1106,7 @@ static unsigned int tcp_synack_options(const struct sock *sk,
if (treq->accecn_ok && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_option) && - req->num_timeout < 1 && remaining >= TCPOLEN_ACCECN_BASE) { + synack_type != TCP_SYNACK_RETRANS && remaining >= TCPOLEN_ACCECN_BASE) { opts->use_synack_ecn_bytes = 1; remaining -= tcp_options_fit_accecn(opts, 0, remaining); } @@ -4004,7 +4004,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, memset(th, 0, sizeof(struct tcphdr)); th->syn = 1; th->ack = 1; - tcp_ecn_make_synack(req, th); + tcp_ecn_make_synack(req, th, synack_type); th->source = htons(ireq->ir_num); th->dest = ireq->ir_rmt_port; skb->mark = ireq->ir_mark;