On Thu, May 22, 2025 at 3:44 PM Sasha Levin sashal@kernel.org wrote:
This is a note to let you know that I've just added the patch titled
tcp: reorganize tcp_in_ack_event() and tcp_count_delivered()
to the 6.6-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git%3Ba=su...
The filename of the patch is: tcp-reorganize-tcp_in_ack_event-and-tcp_count_delive.patch and it can be found in the queue-6.6 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree, please let stable@vger.kernel.org know about it.
May I ask why this patch was backported to stable versions ?
This is causing a packetdrill test to fail.
Ilpo, can you take a look ?
# packetdrill --ip_version=ipv6 --mtu=1520 dctcp-plb-simple.pkt dctcp-plb-simple.pkt:49: error handling packet: inconsistent flowlabels for this packet: expected: 0x1c50c vs actual: 0xb867c script packet: 0.032224 P.W 5001:6001(1000) ack 1 <nop,nop,TS val 100 ecr 100> actual packet: 0.032209 P.W 5001:6001(1000) ack 1 win 255 <nop,nop,TS val 124 ecr 100>
$ cat dctcp-plb-simple.pkt // DCTCP PLB Test // Check that DCTCP rehashes based on PLB congestion conditions
`sysctl -qw net/ipv4/tcp_ecn=1 \ net/ipv4/tcp_congestion_control=dctcp \ net/ipv4/tcp_plb_enabled=1 \ net/ipv4/tcp_rmem="4096 131072 15000000"`
// Initialize connection 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 +0 bind(3, ..., ...) = 0 +0 listen(3, 1) = 0
// ECN handshake: send EW flags in SYN packet, E flag in SYN-ACK response +0 < SEW 0:0(0) win 32792 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 6> +0 > (flowlabel 0x1) SE. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 8> +0 < . 1:1(0) ack 1 win 257 +0 accept(3, ..., ...) = 4 +0 getsockopt(4, IPPROTO_TCP, TCP_CONGESTION, "dctcp", [5]) = 0
+0 write(4, ..., 1000) = 1000 +0 > (flowlabel 0x1) P. 1:1001(1000) ack 1 <nop,nop,TS val 100 ecr 100> +0 < . 1:1(0) ack 1001 win 1000 <nop,nop,TS val 100 ecr 100> // no ECN mark, flowlabel won't change.
+0 write(4, ..., 1000) = 1000 +0 > (flowlabel 0x1) P. 1001:2001(1000) ack 1 <nop,nop,TS val 100 ecr 100> +0 < . 1:1(0) ack 2001 win 1000 <nop,nop,TS val 100 ecr 100> // No packets ECN-marked.
+0 write(4, ..., 1000) = 1000 +0 > (flowlabel 0x1) P. 2001:3001(1000) ack 1 <nop,nop,TS val 100 ecr 100> +0 < E. 1:1(0) ack 3001 win 1000 <nop,nop,TS val 100 ecr 100> // ECN-marked. 1 congested round.
+0 write(4, ..., 1000) = 1000 +0 > (flowlabel 0x1) PW. 3001:4001(1000) ack 1 <nop,nop,TS val 100 ecr 100> +0 < E. 1:1(0) ack 4001 win 1000 <nop,nop,TS val 100 ecr 100> // ECN-marked. 2 congested round.
+0 write(4, ..., 1000) = 1000 +0 > (flowlabel 0x1) PW. 4001:5001(1000) ack 1 <nop,nop,TS val 100 ecr 100> +0 < E. 1:1(0) ack 5001 win 1000 <nop,nop,TS val 100 ecr 100> // ECN-marked. 3 congested round.
+0 write(4, ..., 1000) = 1000 // Flowlabel will change next round +0 > (flowlabel 0x1) PW. 5001:6001(1000) ack 1 <nop,nop,TS val 100 ecr 100> +0 < . 1:1(0) ack 6001 win 1000 <nop,nop,TS val 100 ecr 100> // No packets ECN-marked. 0 congested round.
+0 write(4, ..., 1000) = 1000 +0 > (flowlabel 0x2) P. 6001:7001(1000) ack 1 <nop,nop,TS val 100 ecr 100> +0 < . 1:1(0) ack 7001 win 1000 <nop,nop,TS val 100 ecr 100> // No packets ECN-marked. Verify new flowlabel
+0 write(4, ..., 1000) = 1000 +0 > (flowlabel 0x2) P. 7001:8001(1000) ack 1 <nop,nop,TS val 100 ecr 100> +0 < . 1:1(0) ack 8001 win 1000 <nop,nop,TS val 100 ecr 100> // No packets ECN-marked. New flowlabel should stick
commit c39f5ebea6713c908f64e4682c26d144d9a659de Author: Ilpo Järvinen ij@kernel.org Date: Wed Mar 5 23:38:41 2025 +0100
tcp: reorganize tcp_in_ack_event() and tcp_count_delivered() [ Upstream commit 149dfb31615e22271d2525f078c95ea49bc4db24 ] - Move tcp_count_delivered() earlier and split tcp_count_delivered_ce() out of it - Move tcp_in_ack_event() later - While at it, remove the inline from tcp_in_ack_event() and let the compiler to decide Accurate ECN's heuristics does not know if there is going to be ACE field based CE counter increase or not until after rtx queue has been processed. Only then the number of ACKed bytes/pkts is available. As CE or not affects presence of FLAG_ECE, that information for tcp_in_ack_event is not yet available in the old location of the call to tcp_in_ack_event(). Signed-off-by: Ilpo Järvinen <ij@kernel.org> Signed-off-by: Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Sasha Levin <sashal@kernel.org>
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 10d38ec0ff5ac..a172248b66783 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -425,6 +425,20 @@ static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr return false; }
+static void tcp_count_delivered_ce(struct tcp_sock *tp, u32 ecn_count) +{
tp->delivered_ce += ecn_count;
+}
+/* Updates the delivered and delivered_ce counts */ +static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered,
bool ece_ack)
+{
tp->delivered += delivered;
if (ece_ack)
tcp_count_delivered_ce(tp, delivered);
+}
/* Buffer size and advertised window tuning.
- Tuning sk->sk_sndbuf, when connection enters established state.
@@ -1137,15 +1151,6 @@ void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb) } }
-/* Updates the delivered and delivered_ce counts */ -static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered,
bool ece_ack)
-{
tp->delivered += delivered;
if (ece_ack)
tp->delivered_ce += delivered;
-}
/* This procedure tags the retransmission queue when SACKs arrive.
- We have three tag bits: SACKED(S), RETRANS(R) and LOST(L).
@@ -3816,12 +3821,23 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) } }
-static inline void tcp_in_ack_event(struct sock *sk, u32 flags) +static void tcp_in_ack_event(struct sock *sk, int flag) { const struct inet_connection_sock *icsk = inet_csk(sk);
if (icsk->icsk_ca_ops->in_ack_event)
icsk->icsk_ca_ops->in_ack_event(sk, flags);
if (icsk->icsk_ca_ops->in_ack_event) {
u32 ack_ev_flags = 0;
if (flag & FLAG_WIN_UPDATE)
ack_ev_flags |= CA_ACK_WIN_UPDATE;
if (flag & FLAG_SLOWPATH) {
ack_ev_flags |= CA_ACK_SLOWPATH;
if (flag & FLAG_ECE)
ack_ev_flags |= CA_ACK_ECE;
}
icsk->icsk_ca_ops->in_ack_event(sk, ack_ev_flags);
}
}
/* Congestion control has updated the cwnd already. So if we're in @@ -3938,12 +3954,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_snd_una_update(tp, ack); flag |= FLAG_WIN_UPDATE;
tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS); } else {
u32 ack_ev_flags = CA_ACK_SLOWPATH;
if (ack_seq != TCP_SKB_CB(skb)->end_seq) flag |= FLAG_DATA; else
@@ -3955,19 +3967,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, &sack_state);
if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) flag |= FLAG_ECE;
ack_ev_flags |= CA_ACK_ECE;
} if (sack_state.sack_delivered) tcp_count_delivered(tp, sack_state.sack_delivered, flag & FLAG_ECE);
if (flag & FLAG_WIN_UPDATE)
ack_ev_flags |= CA_ACK_WIN_UPDATE;
tcp_in_ack_event(sk, ack_ev_flags); } /* This is a deviation from RFC3168 since it states that:
@@ -3994,6 +3999,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
tcp_rack_update_reo_wnd(sk, &rs);
tcp_in_ack_event(sk, flag);
if (tp->tlp_high_seq) tcp_process_tlp_ack(sk, ack, flag);
@@ -4025,6 +4032,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) return 1;
no_queue:
tcp_in_ack_event(sk, flag); /* If data was DSACKed, see if we can undo a cwnd reduction. */ if (flag & FLAG_DSACKING_ACK) { tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,