This patch set adds support for using FOU or GUE encapsulation with an ipip device operating in collect-metadata mode and a set of kfuncs for controlling encap parameters exposed to a BPF tc-hook.
BPF tc-hooks allow us to read tunnel metadata (like remote IP addresses) in the ingress path of an externally controlled tunnel interface via the bpf_skb_get_tunnel_{key,opt} bpf-helpers. Packets can then be redirected to the same or a different externally controlled tunnel interface by overwriting metadata via the bpf_skb_set_tunnel_{key,opt} helpers and a call to bpf_redirect. This enables us to redirect packets between tunnel interfaces - and potentially change the encapsulation type - using only a single BPF program.
Today this approach works fine for a couple of tunnel combinations. For example: redirecting packets between Geneve and GRE interfaces or GRE and plain ipip interfaces. However, redirecting using FOU or GUE is not supported today. The ip_tunnel module does not allow us to egress packets using additional UDP encapsulation from an ipip device in collect-metadata mode.
Patch 1 lifts this restriction by adding a struct ip_tunnel_encap to the tunnel metadata. It can be filled by a new BPF kfunc introduced in Patch 2 and evaluated by the ip_tunnel egress path. This will allow us to use FOU and GUE encap with externally controlled ipip devices.
Patch 2 introduces two new BPF kfuncs: bpf_skb_{set,get}_fou_encap. These helpers can be used to set and get UDP encap parameters from the BPF tc-hook doing the packet redirect.
Patch 3 adds BPF tunnel selftests using the two kfuncs.
Christian Ehrig (3): ipip,ip_tunnel,sit: Add FOU support for externally controlled ipip devices bpf,fou: Add bpf_skb_{set,get}_fou_encap kfuncs selftests/bpf: Test FOU kfuncs for externally controlled ipip devices
include/net/fou.h | 2 + include/net/ip_tunnels.h | 27 ++-- net/ipv4/Makefile | 2 +- net/ipv4/fou_bpf.c | 118 ++++++++++++++++++ net/ipv4/fou_core.c | 5 + net/ipv4/ip_tunnel.c | 22 +++- net/ipv4/ipip.c | 1 + net/ipv6/sit.c | 2 +- .../selftests/bpf/progs/test_tunnel_kern.c | 117 +++++++++++++++++ tools/testing/selftests/bpf/test_tunnel.sh | 81 ++++++++++++ 10 files changed, 360 insertions(+), 17 deletions(-) create mode 100644 net/ipv4/fou_bpf.c
Add tests for FOU and GUE encapsulation via the bpf_skb_{set,get}_fou_encap kfuncs, using ipip devices in collect-metadata mode.
These tests make sure that we can successfully set and obtain FOU and GUE encap parameters using ingress / egress BPF tc-hooks.
Signed-off-by: Christian Ehrig cehrig@cloudflare.com --- .../selftests/bpf/progs/test_tunnel_kern.c | 117 ++++++++++++++++++ tools/testing/selftests/bpf/test_tunnel.sh | 81 ++++++++++++ 2 files changed, 198 insertions(+)
diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index 9ab2d55ab7c0..f66af753bbbb 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -52,6 +52,21 @@ struct vxlan_metadata { __u32 gbp; };
+struct bpf_fou_encap { + __be16 sport; + __be16 dport; +}; + +enum bpf_fou_encap_type { + FOU_BPF_ENCAP_FOU, + FOU_BPF_ENCAP_GUE, +}; + +int bpf_skb_set_fou_encap(struct __sk_buff *skb_ctx, + struct bpf_fou_encap *encap, int type) __ksym; +int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx, + struct bpf_fou_encap *encap) __ksym; + struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 1); @@ -749,6 +764,108 @@ int ipip_get_tunnel(struct __sk_buff *skb) return TC_ACT_OK; }
+SEC("tc") +int ipip_gue_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key = {}; + struct bpf_fou_encap encap = {}; + void *data = (void *)(long)skb->data; + struct iphdr *iph = data; + void *data_end = (void *)(long)skb->data_end; + int ret; + + if (data + sizeof(*iph) > data_end) { + log_err(1); + return TC_ACT_SHOT; + } + + key.tunnel_ttl = 64; + if (iph->protocol == IPPROTO_ICMP) + key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0); + if (ret < 0) { + log_err(ret); + return TC_ACT_SHOT; + } + + encap.sport = 0; + encap.dport = bpf_htons(5555); + + ret = bpf_skb_set_fou_encap(skb, &encap, FOU_BPF_ENCAP_GUE); + if (ret < 0) { + log_err(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("tc") +int ipip_fou_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key = {}; + struct bpf_fou_encap encap = {}; + void *data = (void *)(long)skb->data; + struct iphdr *iph = data; + void *data_end = (void *)(long)skb->data_end; + int ret; + + if (data + sizeof(*iph) > data_end) { + log_err(1); + return TC_ACT_SHOT; + } + + key.tunnel_ttl = 64; + if (iph->protocol == IPPROTO_ICMP) + key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0); + if (ret < 0) { + log_err(ret); + return TC_ACT_SHOT; + } + + encap.sport = 0; + encap.dport = bpf_htons(5555); + + ret = bpf_skb_set_fou_encap(skb, &encap, FOU_BPF_ENCAP_FOU); + if (ret < 0) { + log_err(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("tc") +int ipip_encap_get_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key = {}; + struct bpf_fou_encap encap = {}; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); + if (ret < 0) { + log_err(ret); + return TC_ACT_SHOT; + } + + ret = bpf_skb_get_fou_encap(skb, &encap); + if (ret < 0) { + log_err(ret); + return TC_ACT_SHOT; + } + + if (bpf_ntohs(encap.dport) != 5555) + return TC_ACT_SHOT; + + bpf_printk("%d remote ip 0x%x, sport %d, dport %d\n", ret, + key.remote_ipv4, bpf_ntohs(encap.sport), + bpf_ntohs(encap.dport)); + return TC_ACT_OK; +} + SEC("tc") int ipip6_set_tunnel(struct __sk_buff *skb) { diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh index 2dec7dbf29a2..f2379414a887 100755 --- a/tools/testing/selftests/bpf/test_tunnel.sh +++ b/tools/testing/selftests/bpf/test_tunnel.sh @@ -212,6 +212,24 @@ add_ipip_tunnel() ip addr add dev $DEV 10.1.1.200/24 }
+add_ipip_encap_tunnel() +{ + # at_ns0 namespace + ip netns exec at_ns0 ip fou add port 5555 $IPPROTO + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE \ + local 172.16.1.100 remote 172.16.1.200 \ + encap $ENCAP encap-sport auto encap-dport 5555 noencap-csum + ip netns exec at_ns0 ip link set dev $DEV_NS up + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + + # root namespace + ip fou add port 5555 $IPPROTO + ip link add dev $DEV type $TYPE external + ip link set dev $DEV up + ip addr add dev $DEV 10.1.1.200/24 +} + add_ip6tnl_tunnel() { ip netns exec at_ns0 ip addr add ::11/96 dev veth0 @@ -461,6 +479,60 @@ test_ipip() echo -e ${GREEN}"PASS: $TYPE"${NC} }
+test_ipip_gue() +{ + TYPE=ipip + DEV_NS=ipip00 + DEV=ipip11 + ret=0 + ENCAP=gue + IPPROTO=$ENCAP + + check $TYPE + config_device + add_ipip_encap_tunnel + ip link set dev veth1 mtu 1500 + attach_bpf $DEV ipip_gue_set_tunnel ipip_encap_get_tunnel + ping $PING_ARG 10.1.1.100 + check_err $? + ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 + check_err $? + cleanup + + if [ $ret -ne 0 ]; then + echo -e ${RED}"FAIL: $TYPE (GUE)"${NC} + return 1 + fi + echo -e ${GREEN}"PASS: $TYPE (GUE)"${NC} +} + +test_ipip_fou() +{ + TYPE=ipip + DEV_NS=ipip00 + DEV=ipip11 + ret=0 + ENCAP=fou + IPPROTO="ipproto 4" + + check $TYPE + config_device + add_ipip_encap_tunnel + ip link set dev veth1 mtu 1500 + attach_bpf $DEV ipip_fou_set_tunnel ipip_encap_get_tunnel + ping $PING_ARG 10.1.1.100 + check_err $? + ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 + check_err $? + cleanup + + if [ $ret -ne 0 ]; then + echo -e ${RED}"FAIL: $TYPE (FOU)"${NC} + return 1 + fi + echo -e ${GREEN}"PASS: $TYPE (FOU)"${NC} +} + test_ipip6() { TYPE=ip6tnl @@ -634,6 +706,7 @@ cleanup() ip xfrm policy delete dir in src 10.1.1.100/32 dst 10.1.1.200/32 2> /dev/null ip xfrm state delete src 172.16.1.100 dst 172.16.1.200 proto esp spi 0x1 2> /dev/null ip xfrm state delete src 172.16.1.200 dst 172.16.1.100 proto esp spi 0x2 2> /dev/null + ip fou del port 5555 gue 2> /dev/null }
cleanup_exit() @@ -708,6 +781,14 @@ bpf_tunnel_test() test_ipip errors=$(( $errors + $? ))
+ echo "Testing IPIP (GUE) tunnel..." + test_ipip_gue + errors=$(( $errors + $? )) + + echo "Testing IPIP (FOU) tunnel..." + test_ipip_fou + errors=$(( $errors + $? )) + echo "Testing IPIP6 tunnel..." test_ipip6 errors=$(( $errors + $? ))
linux-kselftest-mirror@lists.linaro.org