From: Willem de Bruijn willemb@google.com
[ Upstream commit a18dfa9925b9ef6107ea3aa5814ca3c704d34a8a ]
When spanning datagram construction over multiple send calls using MSG_MORE, per datagram settings are configured on the first send.
That is when ip(6)_setup_cork stores these settings for subsequent use in __ip(6)_append_data and others.
The only flag that escaped this was dontfrag. As a result, a datagram could be constructed with df=0 on the first sendmsg, but df=1 on a next. Which is what cmsg_ip.sh does in an upcoming MSG_MORE test in the "diff" scenario.
Changing datagram conditions in the middle of constructing an skb makes this already complex code path even more convoluted. It is here unintentional. Bring this flag in line with expected sockopt/cmsg behavior.
And stop passing ipc6 to __ip6_append_data, to avoid such issues in the future. This is already the case for __ip_append_data.
inet6_cork had a 6 byte hole, so the 1B flag has no impact.
Signed-off-by: Willem de Bruijn willemb@google.com Reviewed-by: Eric Dumazet edumazet@google.com Link: https://patch.msgid.link/20250307033620.411611-3-willemdebruijn.kernel@gmail... Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- include/linux/ipv6.h | 1 + net/ipv6/ip6_output.c | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index d758c131ed5e1..89b3527e03675 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -189,6 +189,7 @@ struct inet6_cork { struct ipv6_txoptions *opt; u8 hop_limit; u8 tclass; + u8 dontfrag:1; };
/** diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 426330b8dfa47..f024d89cb0f81 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1416,6 +1416,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, cork->fl.u.ip6 = *fl6; v6_cork->hop_limit = ipc6->hlimit; v6_cork->tclass = ipc6->tclass; + v6_cork->dontfrag = ipc6->dontfrag; if (rt->dst.flags & DST_XFRM_TUNNEL) mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); @@ -1450,7 +1451,7 @@ static int __ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, size_t length, int transhdrlen, - unsigned int flags, struct ipcm6_cookie *ipc6) + unsigned int flags) { struct sk_buff *skb, *skb_prev = NULL; unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu; @@ -1507,7 +1508,7 @@ static int __ip6_append_data(struct sock *sk, if (headersize + transhdrlen > mtu) goto emsgsize;
- if (cork->length + length > mtu - headersize && ipc6->dontfrag && + if (cork->length + length > mtu - headersize && v6_cork->dontfrag && (sk->sk_protocol == IPPROTO_UDP || sk->sk_protocol == IPPROTO_RAW)) { ipv6_local_rxpmtu(sk, fl6, mtu - headersize + @@ -1825,7 +1826,7 @@ int ip6_append_data(struct sock *sk,
return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base, &np->cork, sk_page_frag(sk), getfrag, - from, length, transhdrlen, flags, ipc6); + from, length, transhdrlen, flags); } EXPORT_SYMBOL_GPL(ip6_append_data);
@@ -2020,7 +2021,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk, err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork, ¤t->task_frag, getfrag, from, length + exthdrlen, transhdrlen + exthdrlen, - flags, ipc6); + flags); if (err) { __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork); return ERR_PTR(err);