On Thu, Jul 01, 2021 at 09:38:44AM -0600, David Ahern wrote:
On 7/1/21 8:59 AM, Guillaume Nault wrote:
I first tried to write this selftest using VRFs, but there were some problems that made me switch to namespaces (I don't remember precisely which ones, probably virtual tunnel devices in collect_md mode).
if you hit a problem with the test not working, send me the test script and I will take a look.
So I've looked again at what it'd take to make a VRF-based selftest. The problem is that we currently can't create collect_md tunnel interfaces in different VRFs, if the VRFs are part of the same netns.
Most tunnels explicitely refuse to create a collect_md device if another one already exists in the netns, no matter the rest of the tunnel parameters. This is the behaviour of ip_gre, ipip, ip6_gre and ip6_tunnel.
Then there's sit, which allows the creation of the second collect_md device in the other VRF. However, iproute2 doesn't set the IFLA_IPTUN_LINK attribute when it creates an external device, so it can't set up such a configuration.
Bareudp simply doesn't support VRF.
Finally, vxlan allows devices with different IFLA_VXLAN_LINK attributes to be created, but only when VXLAN_F_IPV6_LINKLOCAL is set. Removing the VXLAN_F_IPV6_LINKLOCAL test at the end of vxlan_config_validate() is enough to make two VXLAN-GPE devices work in a multi-VRF setup:
--- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -3767,8 +3767,7 @@ static int vxlan_config_validate(struct net *src_net, struct vxlan_config *conf, (conf->flags & (VXLAN_F_RCV_FLAGS | VXLAN_F_IPV6))) continue;
- if ((conf->flags & VXLAN_F_IPV6_LINKLOCAL) && - tmp->cfg.remote_ifindex != conf->remote_ifindex) + if (tmp->cfg.remote_ifindex != conf->remote_ifindex) continue;
NL_SET_ERR_MSG(extack,
Here's an example of what a full selftests looks like using VXLAN-GPE. Without the patch above, creating the second vxlan interface fails (EEXIST).
#!/bin/bash # SPDX-License-Identifier: GPL-2.0
NUM_NETIFS=6
source lib.sh
VETH_H1_RTA=${NETIFS[p1]} VETH_RTA_H1=${NETIFS[p2]} VETH_RTA_RTB=${NETIFS[p3]} VETH_RTB_RTA=${NETIFS[p4]} VETH_RTB_H2=${NETIFS[p5]} VETH_H2_RTB=${NETIFS[p6]}
MAC_H1_RTA=$(mac_get "${VETH_H1_RTA}") MAC_RTA_H1=$(mac_get "${VETH_RTA_H1}") MAC_RTB_H2=$(mac_get "${VETH_RTB_H2}") MAC_H2_RTB=$(mac_get "${VETH_H2_RTB}")
VRF_H1="vrf-h1" VRF_RTA="vrf-rta" VRF_RTB="vrf-rtb" VRF_H2="vrf-h2"
# Set up a chain of 4 VRFs connected with the veth interfaces: # H1 <-> RTA <-> RTB <-> H2 setup_base_net() { # Initialise VRFs
vrf_prepare
for VRF in "${VRF_H1}" "${VRF_RTA}" "${VRF_RTB}" "${VRF_H2}"; do vrf_create "${VRF}" ip link set dev "${VRF}" up done
# Assign each veth to its VRF
__simple_if_init "${VETH_H1_RTA}" "${VRF_H1}"
__simple_if_init "${VETH_RTA_H1}" "${VRF_RTA}" __simple_if_init "${VETH_RTA_RTB}" "${VRF_RTA}"
__simple_if_init "${VETH_RTB_RTA}" "${VRF_RTB}" __simple_if_init "${VETH_RTB_H2}" "${VRF_RTB}"
__simple_if_init "${VETH_H2_RTB}" "${VRF_H2}"
# Let each veth communicate with its peer
ip address add dev "${VETH_H1_RTA}" 192.0.2.0x1a peer 192.0.2.0xa1/32 ip address add dev "${VETH_RTA_H1}" 192.0.2.0xa1 peer 192.0.2.0x1a/32
ip address add dev "${VETH_RTA_RTB}" 192.0.2.0xab peer 192.0.2.0xba/32 ip address add dev "${VETH_RTB_RTA}" 192.0.2.0xba peer 192.0.2.0xab/32
ip address add dev "${VETH_RTB_H2}" 192.0.2.0xb2 peer 192.0.2.0x2b/32 ip address add dev "${VETH_H2_RTB}" 192.0.2.0x2b peer 192.0.2.0xb2/32
# Define host IPs for H1 and H2 and route them through RTA and RTB. # Don't set up routing inside RTA and RTB yet.
ip address add 198.51.100.1/32 dev "${VETH_H1_RTA}" ip address add 198.51.100.2/32 dev "${VETH_H2_RTB}"
ip route add 198.51.100.2/32 src 198.51.100.1 via 192.0.2.0xa1 \ vrf "${VRF_H1}" ip route add 198.51.100.1/32 src 198.51.100.2 via 192.0.2.0xb2 \ vrf "${VRF_H2}" }
# Route H1 and H2 host IPs inside RTA and RTB using VXLAN-GPE encapsulation. setup_vxlan_gpe() { # Create an external VXLAN-GPE device in the intermediate VRFs
ip link add name tunnel-rta up type vxlan \ dev "${VRF_RTA}" gpe external ip link add name tunnel-rtb up type vxlan \ dev "${VRF_RTB}" gpe external
# Forward packets received from the end hosts through the tunnels
tc qdisc add dev "${VETH_RTA_H1}" ingress tc filter add dev "${VETH_RTA_H1}" ingress \ protocol ipv4 flower dst_ip 198.51.100.2 \ action tunnel_key set src_ip 192.0.2.0xab \ dst_ip 192.0.2.0xba id 10 \ action mirred egress redirect dev tunnel-rta
tc qdisc add dev "${VETH_RTB_H2}" ingress tc filter add dev "${VETH_RTB_H2}" ingress \ protocol ipv4 flower dst_ip 198.51.100.1 \ action tunnel_key set src_ip 192.0.2.0xba \ dst_ip 192.0.2.0xab id 10 \ action mirred egress redirect dev tunnel-rtb
# Decapsulate packets received from the tunnels and send them to the # end hosts
tc qdisc add dev tunnel-rta ingress tc filter add dev tunnel-rta ingress matchall \ action vlan push_eth dst_mac "${MAC_H1_RTA}" \ src_mac "${MAC_RTA_H1}" \ action mirred egress redirect dev "${VETH_RTA_H1}"
tc qdisc add dev tunnel-rtb ingress tc filter add dev tunnel-rtb ingress matchall \ action vlan push_eth dst_mac "${MAC_H2_RTB}" \ src_mac "${MAC_RTB_H2}" \ action mirred egress redirect dev "${VETH_RTB_H2}" }
setup_base_net setup_vxlan_gpe
ip vrf exec "${VRF_H1}" ping 198.51.100.2