On 5/14/25 3:56 PM, chia-yu.chang@nokia-bell-labs.com wrote:
This patch uses the existing 1-byte holes in the tcp_sock_write_txrx group for new u8 members, but adds a 4-byte hole in tcp_sock_write_rx group after the new u32 delivered_ecn_bytes[3] member. Therefore, the group size of tcp_sock_write_rx is increased from 96 to 112.
Note that I'm still concerned by the relevant increase of the cacheline groups size. My fear is that this change could defeat some/most of the benefist from the cacheline reorg for all tcp users.
Some additional feedback from Eric and/or Neal more than welcome!
A possible alternative could be placing all the accounting fields outside of all the fastpath cache groups, i.e. after __cacheline_group_end(tcp_sock_write_rx)
@@ -710,6 +713,8 @@ static __be32 *process_tcp_ao_options(struct tcp_sock *tp, return ptr; } +#define NOP_LEFTOVER ((TCPOPT_NOP << 8) | TCPOPT_NOP)
/* Write previously computed TCP options to the packet.
- Beware: Something in the Internet is very sensitive to the ordering of
 @@ -728,8 +733,10 @@ static void tcp_options_write(struct tcphdr *th, struct tcp_sock *tp, struct tcp_out_options *opts, struct tcp_key *key) {
- u16 leftover_bytes = NOP_LEFTOVER; /* replace next NOPs if avail */ __be32 *ptr = (__be32 *)(th + 1); u16 options = opts->options; /* mungable copy */
 - int leftover_size = 2;
 if (tcp_key_is_md5(key)) { *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | @@ -763,18 +770,64 @@ static void tcp_options_write(struct tcphdr *th, struct tcp_sock *tp, *ptr++ = htonl(opts->tsecr); }
- if (OPTION_ACCECN & options) {
 const u8 ect0_idx = INET_ECN_ECT_0 - 1;const u8 ect1_idx = INET_ECN_ECT_1 - 1;const u8 ce_idx = INET_ECN_CE - 1;u32 e0b;u32 e1b;u32 ceb;u8 len;e0b = opts->ecn_bytes[ect0_idx] + TCP_ACCECN_E0B_INIT_OFFSET;e1b = opts->ecn_bytes[ect1_idx] + TCP_ACCECN_E1B_INIT_OFFSET;ceb = opts->ecn_bytes[ce_idx] + TCP_ACCECN_CEB_INIT_OFFSET;len = TCPOLEN_ACCECN_BASE +opts->num_accecn_fields * TCPOLEN_ACCECN_PERFIELD;if (opts->num_accecn_fields == 2) {*ptr++ = htonl((TCPOPT_ACCECN1 << 24) | (len << 16) |((e1b >> 8) & 0xffff));*ptr++ = htonl(((e1b & 0xff) << 24) |(ceb & 0xffffff));} else if (opts->num_accecn_fields == 1) {*ptr++ = htonl((TCPOPT_ACCECN1 << 24) | (len << 16) |((e1b >> 8) & 0xffff));leftover_bytes = ((e1b & 0xff) << 8) |TCPOPT_NOP;leftover_size = 1;} else if (opts->num_accecn_fields == 0) {leftover_bytes = (TCPOPT_ACCECN1 << 8) | len;leftover_size = 2;} else if (opts->num_accecn_fields == 3) {*ptr++ = htonl((TCPOPT_ACCECN1 << 24) | (len << 16) |((e1b >> 8) & 0xffff));*ptr++ = htonl(((e1b & 0xff) << 24) |(ceb & 0xffffff));*ptr++ = htonl(((e0b & 0xffffff) << 8) |TCPOPT_NOP);}if (tp)tp->accecn_minlen = 0;- }
 - if (unlikely(OPTION_SACK_ADVERTISE & options)) {
 
*ptr++ = htonl((TCPOPT_NOP << 24) |(TCPOPT_NOP << 16) |
*ptr++ = htonl((leftover_bytes << 16) | (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM);leftover_bytes = NOP_LEFTOVER;
AFAICS here leftover_size could be == 1. why is not reset to 2?
More importantly this looks quite fragile and error prone. *Possibly* have a tcp_accecn_write_option() helper that would rewrite the existing option as needed could be simpler and less impacting for the existing code?
/P