Hi Greg,
first, we need to cherry-pick another patch first:
From 52a589d51f1008f62569bf89e95b26221ee76690 Mon Sep 17 00:00:00 2001 From: Xin Long lucien.xin@gmail.com Date: Mon, 25 Dec 2017 14:43:58 +0800 Subject: [PATCH] geneve: update skb dst pmtu on tx path
Commit a93bf0ff4490 ("vxlan: update skb dst pmtu on tx path") has fixed a performance issue caused by the change of lower dev's mtu for vxlan.
The same thing needs to be done for geneve as well.
Note that geneve cannot adjust it's mtu according to lower dev's mtu when creating it. The performance is very low later when netperfing over it without fixing the mtu manually. This patch could also avoid this issue.
Signed-off-by: Xin Long lucien.xin@gmail.com Signed-off-by: David S. Miller davem@davemloft.net
Then you can apply the following backport. A backport is required because v4.15 has commit 77552cfa39c48e695c39d0553afc8c6018e411ce which rewrote
skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info);
into
skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info);
in net/ipv6/ip6_tunnel.c which is missing:
From b2fb9a8178660f92c6ab29d3171bc44e2cb1b618 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel nicolas.dichtel@6wind.com Date: Thu, 25 Jan 2018 19:03:03 +0100 Subject: net: don't call update_pmtu unconditionally
commit f15ca723c1ebe6c1a06bc95fda6b62cd87b44559 upstream.
Some dst_ops (e.g. md_dst_ops)) doesn't set this handler. It may result to: "BUG: unable to handle kernel NULL pointer dereference at (null)"
Let's add a helper to check if update_pmtu is available before calling it.
Fixes: 52a589d51f10 ("geneve: update skb dst pmtu on tx path") Fixes: a93bf0ff4490 ("vxlan: update skb dst pmtu on tx path") CC: Roman Kapl code@rkapl.cz CC: Xin Long lucien.xin@gmail.com Signed-off-by: Nicolas Dichtel nicolas.dichtel@6wind.com Signed-off-by: David S. Miller davem@davemloft.net --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 3 +-- drivers/net/geneve.c | 4 ++-- drivers/net/vxlan.c | 6 ++---- include/net/dst.h | 8 ++++++++ net/ipv4/ip_tunnel.c | 3 +-- net/ipv4/ip_vti.c | 2 +- net/ipv6/ip6_tunnel.c | 5 ++--- net/ipv6/ip6_vti.c | 2 +- net/ipv6/sit.c | 4 ++-- 9 files changed, 20 insertions(+), 17 deletions(-)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 7774654c2ccb..7a5ed5a5391e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1447,8 +1447,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb, struct ipoib_dev_priv *priv = ipoib_priv(dev); int e = skb_queue_empty(&priv->cm.skb_queue);
- if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + skb_dst_update_pmtu(skb, mtu);
skb_queue_tail(&priv->cm.skb_queue, skb); if (e) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 1b0fcf0b2afa..fbc825ac97ab 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -829,7 +829,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, int mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr) - GENEVE_BASE_HLEN - info->options_len - 14;
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + skb_dst_update_pmtu(skb, mtu); }
sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); @@ -875,7 +875,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, int mtu = dst_mtu(dst) - sizeof(struct ipv6hdr) - GENEVE_BASE_HLEN - info->options_len - 14;
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + skb_dst_update_pmtu(skb, mtu); }
sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index bb44f0c6891f..3d9c5b35a4a7 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2158,8 +2158,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, if (skb_dst(skb)) { int mtu = dst_mtu(ndst) - VXLAN_HEADROOM;
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, - skb, mtu); + skb_dst_update_pmtu(skb, mtu); }
tos = ip_tunnel_ecn_encap(tos, old_iph, skb); @@ -2200,8 +2199,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, if (skb_dst(skb)) { int mtu = dst_mtu(ndst) - VXLAN6_HEADROOM;
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, - skb, mtu); + skb_dst_update_pmtu(skb, mtu); }
tos = ip_tunnel_ecn_encap(tos, old_iph, skb); diff --git a/include/net/dst.h b/include/net/dst.h index 694c2e6ae618..ebfb4328fdb1 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -520,4 +520,12 @@ static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst) } #endif
+static inline void skb_dst_update_pmtu(struct sk_buff *skb, u32 mtu) +{ + struct dst_entry *dst = skb_dst(skb); + + if (dst && dst->ops->update_pmtu) + dst->ops->update_pmtu(dst, NULL, skb, mtu); +} + #endif /* _NET_DST_H */ diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 13f7bbc0168d..a2fcc20774a6 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -521,8 +521,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, else mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
- if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + skb_dst_update_pmtu(skb, mtu);
if (skb->protocol == htons(ETH_P_IP)) { if (!skb_is_gso(skb) && diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 89453cf62158..c9cd891f69c2 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -209,7 +209,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
mtu = dst_mtu(dst); if (skb->len > mtu) { - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + skb_dst_update_pmtu(skb, mtu); if (skb->protocol == htons(ETH_P_IP)) { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 7e11f6a811f5..d61a82fd4b60 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -652,7 +652,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (rel_info > dst_mtu(skb_dst(skb2))) goto out;
- skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info); + skb_dst_update_pmtu(skb2, rel_info); } if (rel_type == ICMP_REDIRECT) skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2); @@ -1141,8 +1141,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, mtu = 576; }
- if (skb_dst(skb) && !t->parms.collect_md) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + skb_dst_update_pmtu(skb, mtu); if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) { *pmtu = mtu; err = -EMSGSIZE; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 7c0f647b5195..2493a40bc4b1 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -486,7 +486,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
mtu = dst_mtu(dst); if (!skb->ignore_df && skb->len > mtu) { - skb_dst(skb)->ops->update_pmtu(dst, NULL, skb, mtu); + skb_dst_update_pmtu(skb, mtu);
if (skb->protocol == htons(ETH_P_IPV6)) { if (mtu < IPV6_MIN_MTU) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index f03c1a562135..b35d8905794c 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -925,8 +925,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, df = 0; }
- if (tunnel->parms.iph.daddr && skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + if (tunnel->parms.iph.daddr) + skb_dst_update_pmtu(skb, mtu);
if (skb->len > mtu && !skb_is_gso(skb)) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);