If a packet needs to be encapsulated towards a local destination IP, the packet will undergo a "local bypass" and be injected into the Rx path as if it was received by the target VXLAN device without undergoing encapsulation. If such a device does not exist, the packet will be dropped.
There are scenarios where we do not want to perform such a bypass, but instead want the packet to be encapsulated and locally received by a user space program for post-processing.
To that end, add a new VXLAN device attribute that controls whether a "local bypass" is performed or not. Default to performing a bypass to maintain existing behavior.
Signed-off-by: Vladimir Nikishkin vladimir@nikishkin.pw Reviewed-by: Ido Schimmel idosch@nvidia.com --- v8=>v9 1. Update commit message to mention "local bypass". 2. Update "summary phrase" to include net: as suggested by https://patchwork.kernel.org/project/netdevbpf/patch/20230510200247.1534793-... 3. Add reviewer's reviewed-by.
drivers/net/vxlan/vxlan_core.c | 21 +++++++++++++++++++-- include/net/vxlan.h | 4 +++- include/uapi/linux/if_link.h | 1 + 3 files changed, 23 insertions(+), 3 deletions(-)
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 561fe1b314f5..78744549c1b3 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2352,7 +2352,8 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev, #endif /* Bypass encapsulation if the destination is local */ if (rt_flags & RTCF_LOCAL && - !(rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) { + !(rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) && + vxlan->cfg.flags & VXLAN_F_LOCALBYPASS) { struct vxlan_dev *dst_vxlan;
dst_release(dst); @@ -3172,6 +3173,7 @@ static void vxlan_raw_setup(struct net_device *dev) }
static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { + [IFLA_VXLAN_UNSPEC] = { .strict_start_type = IFLA_VXLAN_LOCALBYPASS }, [IFLA_VXLAN_ID] = { .type = NLA_U32 }, [IFLA_VXLAN_GROUP] = { .len = sizeof_field(struct iphdr, daddr) }, [IFLA_VXLAN_GROUP6] = { .len = sizeof(struct in6_addr) }, @@ -3202,6 +3204,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { [IFLA_VXLAN_TTL_INHERIT] = { .type = NLA_FLAG }, [IFLA_VXLAN_DF] = { .type = NLA_U8 }, [IFLA_VXLAN_VNIFILTER] = { .type = NLA_U8 }, + [IFLA_VXLAN_LOCALBYPASS] = NLA_POLICY_MAX(NLA_U8, 1), };
static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[], @@ -4011,6 +4014,17 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[], conf->flags |= VXLAN_F_UDP_ZERO_CSUM_TX; }
+ if (data[IFLA_VXLAN_LOCALBYPASS]) { + err = vxlan_nl2flag(conf, data, IFLA_VXLAN_LOCALBYPASS, + VXLAN_F_LOCALBYPASS, changelink, + true, extack); + if (err) + return err; + } else if (!changelink) { + /* default to local bypass on a new device */ + conf->flags |= VXLAN_F_LOCALBYPASS; + } + if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]) { err = vxlan_nl2flag(conf, data, IFLA_VXLAN_UDP_ZERO_CSUM6_TX, VXLAN_F_UDP_ZERO_CSUM6_TX, changelink, @@ -4232,6 +4246,7 @@ static size_t vxlan_get_size(const struct net_device *dev) nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_RX */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_TX */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_RX */ + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LOCALBYPASS */ 0; }
@@ -4308,7 +4323,9 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_VXLAN_REMCSUM_TX, !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_TX)) || nla_put_u8(skb, IFLA_VXLAN_REMCSUM_RX, - !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_RX))) + !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_RX)) || + nla_put_u8(skb, IFLA_VXLAN_LOCALBYPASS, + !!(vxlan->cfg.flags & VXLAN_F_LOCALBYPASS))) goto nla_put_failure;
if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports)) diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 20bd7d893e10..0be91ca78d3a 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -328,6 +328,7 @@ struct vxlan_dev { #define VXLAN_F_TTL_INHERIT 0x10000 #define VXLAN_F_VNIFILTER 0x20000 #define VXLAN_F_MDB 0x40000 +#define VXLAN_F_LOCALBYPASS 0x80000
/* Flags that are used in the receive path. These flags must match in * order for a socket to be shareable @@ -348,7 +349,8 @@ struct vxlan_dev { VXLAN_F_UDP_ZERO_CSUM6_TX | \ VXLAN_F_UDP_ZERO_CSUM6_RX | \ VXLAN_F_COLLECT_METADATA | \ - VXLAN_F_VNIFILTER) + VXLAN_F_VNIFILTER | \ + VXLAN_F_LOCALBYPASS)
struct net_device *vxlan_dev_create(struct net *net, const char *name, u8 name_assign_type, struct vxlan_config *conf); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 4ac1000b0ef2..0f6a0fe09bdb 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -828,6 +828,7 @@ enum { IFLA_VXLAN_TTL_INHERIT, IFLA_VXLAN_DF, IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */ + IFLA_VXLAN_LOCALBYPASS, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)