This patch series adds xfrm metadata helpers using the unstable kfunc call interface for the TC-BPF hooks.
This allows steering traffic towards different IPsec connections based on logic implemented in bpf programs.
The helpers are integrated into the xfrm_interface module. For this purpose the main functionality of this module is moved to xfrm_interface_core.c.
---
Series changes in v3: - tag bpf-next tree instead of ipsec-next - add IFLA_XFRM_COLLECT_METADATA sync patch
Eyal Birger (4): xfrm: interface: rename xfrm_interface.c to xfrm_interface_core.c xfrm: interface: Add unstable helpers for setting/getting XFRM metadata from TC-BPF tools: add IFLA_XFRM_COLLECT_METADATA to uapi/linux/if_link.h selftests/bpf: add xfrm_info tests
include/net/dst_metadata.h | 1 + include/net/xfrm.h | 20 + net/core/dst.c | 8 +- net/xfrm/Makefile | 8 + net/xfrm/xfrm_interface_bpf.c | 99 +++++ ...xfrm_interface.c => xfrm_interface_core.c} | 15 + tools/include/uapi/linux/if_link.h | 1 + tools/testing/selftests/bpf/config | 2 + .../selftests/bpf/prog_tests/xfrm_info.c | 365 ++++++++++++++++++ tools/testing/selftests/bpf/progs/xfrm_info.c | 40 ++ 10 files changed, 557 insertions(+), 2 deletions(-) create mode 100644 net/xfrm/xfrm_interface_bpf.c rename net/xfrm/{xfrm_interface.c => xfrm_interface_core.c} (98%) create mode 100644 tools/testing/selftests/bpf/prog_tests/xfrm_info.c create mode 100644 tools/testing/selftests/bpf/progs/xfrm_info.c
This change allows adding additional files to the xfrm_interface module.
Signed-off-by: Eyal Birger eyal.birger@gmail.com --- net/xfrm/Makefile | 2 ++ net/xfrm/{xfrm_interface.c => xfrm_interface_core.c} | 0 2 files changed, 2 insertions(+) rename net/xfrm/{xfrm_interface.c => xfrm_interface_core.c} (100%)
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index 494aa744bfb9..08a2870fdd36 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -3,6 +3,8 @@ # Makefile for the XFRM subsystem. #
+xfrm_interface-$(CONFIG_XFRM_INTERFACE) += xfrm_interface_core.o + obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ xfrm_input.o xfrm_output.o \ xfrm_sysctl.o xfrm_replay.o xfrm_device.o diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface_core.c similarity index 100% rename from net/xfrm/xfrm_interface.c rename to net/xfrm/xfrm_interface_core.c
This change adds xfrm metadata helpers using the unstable kfunc call interface for the TC-BPF hooks. This allows steering traffic towards different IPsec connections based on logic implemented in bpf programs.
This object is built based on the availabilty of BTF debug info.
The metadata percpu dsts used on TX take ownership of the original skb dsts so that they may be used as part of the xfrm transmittion logic - e.g. for MTU calculations.
Signed-off-by: Eyal Birger eyal.birger@gmail.com
---
v3: - remove redunant memset() as suggested by Martin KaFai Lau - remove __exit annotation from cleanup() function as it's used from an __init function
v2: changed added following points raised by Martin KaFai Lau: - make sure dst is refcounted prior to caching - free dst_orig regardless of CONFIG_DST_CACHE - call xfrm interface bpf cleanup in case of kfunc registration errors --- include/net/dst_metadata.h | 1 + include/net/xfrm.h | 20 +++++++ net/core/dst.c | 8 ++- net/xfrm/Makefile | 6 +++ net/xfrm/xfrm_interface_bpf.c | 99 ++++++++++++++++++++++++++++++++++ net/xfrm/xfrm_interface_core.c | 15 ++++++ 6 files changed, 147 insertions(+), 2 deletions(-) create mode 100644 net/xfrm/xfrm_interface_bpf.c
diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index a454cf4327fe..1b7fae4c6b24 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -26,6 +26,7 @@ struct macsec_info { struct xfrm_md_info { u32 if_id; int link; + struct dst_entry *dst_orig; };
struct metadata_dst { diff --git a/include/net/xfrm.h b/include/net/xfrm.h index e0cc6791c001..5e5fea3087b6 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -2086,4 +2086,24 @@ static inline bool xfrm6_local_dontfrag(const struct sock *sk) return false; } #endif + +#if (IS_BUILTIN(CONFIG_XFRM_INTERFACE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ + (IS_MODULE(CONFIG_XFRM_INTERFACE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) + +extern int register_xfrm_interface_bpf(void); +extern void cleanup_xfrm_interface_bpf(void); + +#else + +static inline int register_xfrm_interface_bpf(void) +{ + return 0; +} + +static inline void cleanup_xfrm_interface_bpf(void) +{ +} + +#endif + #endif /* _NET_XFRM_H */ diff --git a/net/core/dst.c b/net/core/dst.c index bc9c9be4e080..bb14a0392388 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -316,6 +316,8 @@ void metadata_dst_free(struct metadata_dst *md_dst) if (md_dst->type == METADATA_IP_TUNNEL) dst_cache_destroy(&md_dst->u.tun_info.dst_cache); #endif + if (md_dst->type == METADATA_XFRM) + dst_release(md_dst->u.xfrm_info.dst_orig); kfree(md_dst); } EXPORT_SYMBOL_GPL(metadata_dst_free); @@ -340,16 +342,18 @@ EXPORT_SYMBOL_GPL(metadata_dst_alloc_percpu);
void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst) { -#ifdef CONFIG_DST_CACHE int cpu;
for_each_possible_cpu(cpu) { struct metadata_dst *one_md_dst = per_cpu_ptr(md_dst, cpu);
+#ifdef CONFIG_DST_CACHE if (one_md_dst->type == METADATA_IP_TUNNEL) dst_cache_destroy(&one_md_dst->u.tun_info.dst_cache); - } #endif + if (one_md_dst->type == METADATA_XFRM) + dst_release(one_md_dst->u.xfrm_info.dst_orig); + } free_percpu(md_dst); } EXPORT_SYMBOL_GPL(metadata_dst_free_percpu); diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index 08a2870fdd36..cd47f88921f5 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -5,6 +5,12 @@
xfrm_interface-$(CONFIG_XFRM_INTERFACE) += xfrm_interface_core.o
+ifeq ($(CONFIG_XFRM_INTERFACE),m) +xfrm_interface-$(CONFIG_DEBUG_INFO_BTF_MODULES) += xfrm_interface_bpf.o +else ifeq ($(CONFIG_XFRM_INTERFACE),y) +xfrm_interface-$(CONFIG_DEBUG_INFO_BTF) += xfrm_interface_bpf.o +endif + obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ xfrm_input.o xfrm_output.o \ xfrm_sysctl.o xfrm_replay.o xfrm_device.o diff --git a/net/xfrm/xfrm_interface_bpf.c b/net/xfrm/xfrm_interface_bpf.c new file mode 100644 index 000000000000..7938bf6cbb32 --- /dev/null +++ b/net/xfrm/xfrm_interface_bpf.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Unstable XFRM Helpers for TC-BPF hook + * + * These are called from SCHED_CLS BPF programs. Note that it is + * allowed to break compatibility for these functions since the interface they + * are exposed through to BPF programs is explicitly unstable. + */ + +#include <linux/bpf.h> +#include <linux/btf_ids.h> + +#include <net/dst_metadata.h> +#include <net/xfrm.h> + +struct bpf_xfrm_info { + u32 if_id; + int link; +}; + +static struct metadata_dst __percpu *xfrm_md_dst; +__diag_push(); +__diag_ignore_all("-Wmissing-prototypes", + "Global functions as their definitions will be in xfrm_interface BTF"); + +__used noinline +int bpf_skb_get_xfrm_info(struct __sk_buff *skb_ctx, struct bpf_xfrm_info *to) +{ + struct sk_buff *skb = (struct sk_buff *)skb_ctx; + struct xfrm_md_info *info; + + memset(to, 0, sizeof(*to)); + + info = skb_xfrm_md_info(skb); + if (!info) + return -EINVAL; + + to->if_id = info->if_id; + to->link = info->link; + return 0; +} + +__used noinline +int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx, + const struct bpf_xfrm_info *from) +{ + struct sk_buff *skb = (struct sk_buff *)skb_ctx; + struct metadata_dst *md_dst; + struct xfrm_md_info *info; + + if (unlikely(skb_metadata_dst(skb))) + return -EINVAL; + + md_dst = this_cpu_ptr(xfrm_md_dst); + + info = &md_dst->u.xfrm_info; + + info->if_id = from->if_id; + info->link = from->link; + skb_dst_force(skb); + info->dst_orig = skb_dst(skb); + + dst_hold((struct dst_entry *)md_dst); + skb_dst_set(skb, (struct dst_entry *)md_dst); + return 0; +} + +__diag_pop() + +BTF_SET8_START(xfrm_ifc_kfunc_set) +BTF_ID_FLAGS(func, bpf_skb_get_xfrm_info) +BTF_ID_FLAGS(func, bpf_skb_set_xfrm_info) +BTF_SET8_END(xfrm_ifc_kfunc_set) + +static const struct btf_kfunc_id_set xfrm_interface_kfunc_set = { + .owner = THIS_MODULE, + .set = &xfrm_ifc_kfunc_set, +}; + +int __init register_xfrm_interface_bpf(void) +{ + int err; + + xfrm_md_dst = metadata_dst_alloc_percpu(0, METADATA_XFRM, + GFP_KERNEL); + if (!xfrm_md_dst) + return -ENOMEM; + err = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, + &xfrm_interface_kfunc_set); + if (err < 0) { + cleanup_xfrm_interface_bpf(); + return err; + } + return 0; +} + +void cleanup_xfrm_interface_bpf(void) +{ + metadata_dst_free_percpu(xfrm_md_dst); +} diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c index 5a67b120c4db..1e1e8e965939 100644 --- a/net/xfrm/xfrm_interface_core.c +++ b/net/xfrm/xfrm_interface_core.c @@ -396,6 +396,14 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
if_id = md_info->if_id; fl->flowi_oif = md_info->link; + if (md_info->dst_orig) { + struct dst_entry *tmp_dst = dst; + + dst = md_info->dst_orig; + skb_dst_set(skb, dst); + md_info->dst_orig = NULL; + dst_release(tmp_dst); + } } else { if_id = xi->p.if_id; } @@ -1162,12 +1170,18 @@ static int __init xfrmi_init(void) if (err < 0) goto rtnl_link_failed;
+ err = register_xfrm_interface_bpf(); + if (err < 0) + goto kfunc_failed; + lwtunnel_encap_add_ops(&xfrmi_encap_ops, LWTUNNEL_ENCAP_XFRM);
xfrm_if_register_cb(&xfrm_if_cb);
return err;
+kfunc_failed: + rtnl_link_unregister(&xfrmi_link_ops); rtnl_link_failed: xfrmi6_fini(); xfrmi6_failed: @@ -1183,6 +1197,7 @@ static void __exit xfrmi_fini(void) { xfrm_if_unregister_cb(); lwtunnel_encap_del_ops(&xfrmi_encap_ops, LWTUNNEL_ENCAP_XFRM); + cleanup_xfrm_interface_bpf(); rtnl_link_unregister(&xfrmi_link_ops); xfrmi4_fini(); xfrmi6_fini();
On 12/1/22 1:14 PM, Eyal Birger wrote:
This change adds xfrm metadata helpers using the unstable kfunc call interface for the TC-BPF hooks. This allows steering traffic towards different IPsec connections based on logic implemented in bpf programs.
This object is built based on the availabilty of BTF debug info.
s/availabilty/availability/
The metadata percpu dsts used on TX take ownership of the original skb dsts so that they may be used as part of the xfrm transmittion logic -
s/transmittion/transmission/
[ ... ]
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index 08a2870fdd36..cd47f88921f5 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -5,6 +5,12 @@ xfrm_interface-$(CONFIG_XFRM_INTERFACE) += xfrm_interface_core.o +ifeq ($(CONFIG_XFRM_INTERFACE),m) +xfrm_interface-$(CONFIG_DEBUG_INFO_BTF_MODULES) += xfrm_interface_bpf.o +else ifeq ($(CONFIG_XFRM_INTERFACE),y) +xfrm_interface-$(CONFIG_DEBUG_INFO_BTF) += xfrm_interface_bpf.o +endif
- obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ xfrm_input.o xfrm_output.o \ xfrm_sysctl.o xfrm_replay.o xfrm_device.o
diff --git a/net/xfrm/xfrm_interface_bpf.c b/net/xfrm/xfrm_interface_bpf.c new file mode 100644 index 000000000000..7938bf6cbb32 --- /dev/null +++ b/net/xfrm/xfrm_interface_bpf.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Unstable XFRM Helpers for TC-BPF hook
- These are called from SCHED_CLS BPF programs. Note that it is
- allowed to break compatibility for these functions since the interface they
- are exposed through to BPF programs is explicitly unstable.
- */
+#include <linux/bpf.h> +#include <linux/btf_ids.h>
+#include <net/dst_metadata.h> +#include <net/xfrm.h>
+struct bpf_xfrm_info {
Documentation is needed here and...
- u32 if_id;
- int link;
+};
+static struct metadata_dst __percpu *xfrm_md_dst; +__diag_push(); +__diag_ignore_all("-Wmissing-prototypes",
"Global functions as their definitions will be in xfrm_interface BTF");
+__used noinline +int bpf_skb_get_xfrm_info(struct __sk_buff *skb_ctx, struct bpf_xfrm_info *to)
... also this get and the following set kfunc.
+{
- struct sk_buff *skb = (struct sk_buff *)skb_ctx;
- struct xfrm_md_info *info;
- memset(to, 0, sizeof(*to));
- info = skb_xfrm_md_info(skb);
- if (!info)
return -EINVAL;
- to->if_id = info->if_id;
- to->link = info->link;
- return 0;
+}
+__used noinline +int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx,
const struct bpf_xfrm_info *from)
+{
- struct sk_buff *skb = (struct sk_buff *)skb_ctx;
- struct metadata_dst *md_dst;
- struct xfrm_md_info *info;
- if (unlikely(skb_metadata_dst(skb)))
return -EINVAL;
- md_dst = this_cpu_ptr(xfrm_md_dst);
- info = &md_dst->u.xfrm_info;
- info->if_id = from->if_id;
- info->link = from->link;
- skb_dst_force(skb);
- info->dst_orig = skb_dst(skb);
- dst_hold((struct dst_entry *)md_dst);
- skb_dst_set(skb, (struct dst_entry *)md_dst);
- return 0;
+}
+__diag_pop()
+BTF_SET8_START(xfrm_ifc_kfunc_set) +BTF_ID_FLAGS(func, bpf_skb_get_xfrm_info) +BTF_ID_FLAGS(func, bpf_skb_set_xfrm_info) +BTF_SET8_END(xfrm_ifc_kfunc_set)
+static const struct btf_kfunc_id_set xfrm_interface_kfunc_set = {
- .owner = THIS_MODULE,
- .set = &xfrm_ifc_kfunc_set,
+};
+int __init register_xfrm_interface_bpf(void) +{
- int err;
- xfrm_md_dst = metadata_dst_alloc_percpu(0, METADATA_XFRM,
GFP_KERNEL);
- if (!xfrm_md_dst)
return -ENOMEM;
- err = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS,
&xfrm_interface_kfunc_set);
- if (err < 0) {
cleanup_xfrm_interface_bpf();
nit. Directly call metadata_dst_free_percpu(xfrm_md_dst), easier to read.
return err;
- }
- return 0;
+}
+void cleanup_xfrm_interface_bpf(void) +{
- metadata_dst_free_percpu(xfrm_md_dst);
+}
On 12/1/22 1:14 PM, Eyal Birger wrote:
+int bpf_skb_get_xfrm_info(struct __sk_buff *skb_ctx, struct bpf_xfrm_info *to) +{
- struct sk_buff *skb = (struct sk_buff *)skb_ctx;
- struct xfrm_md_info *info;
- memset(to, 0, sizeof(*to));
This zero-ing is also not needed. verifier ensures "to" is initialized before calling this kfunc.
- info = skb_xfrm_md_info(skb);
- if (!info)
return -EINVAL;
- to->if_id = info->if_id;
- to->link = info->link;
- return 0;
+}
Needed for XFRM metadata tests.
Signed-off-by: Eyal Birger eyal.birger@gmail.com --- tools/include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+)
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 0242f31e339c..901d98b865a1 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -673,6 +673,7 @@ enum { IFLA_XFRM_UNSPEC, IFLA_XFRM_LINK, IFLA_XFRM_IF_ID, + IFLA_XFRM_COLLECT_METADATA, __IFLA_XFRM_MAX };
Test the xfrm_info kfunc helpers.
Note: the tests require support for xfrmi "external" mode in iproute2.
The test setup creates three name spaces - NS0, NS1, NS2.
XFRM tunnels are setup between NS0 and the two other NSs.
The kfunc helpers are used to steer traffic from NS0 to the other NSs based on a userspace populated bpf global variable and validate that the return traffic had arrived from the desired NS.
Signed-off-by: Eyal Birger eyal.birger@gmail.com
--- v3: changes suggested by Martin KaFai Lau: - rename test_xfrm_info{,_kern}.c -> xfrm_info.c - avoid running in a separate thread - simplify test setup - verify underlay/overlay success - create NS0 xfrmi external device using netlink to avoid dependency on newer iproute2 - remove use of bpf_trace_printk() in test programs - add "preserve_access_index" attribute annotation in test program struct definition - use bss variables access instead of a map for userspace/bpf interface
v2: - use an lwt route in NS1 for testing that flow as well - indendation fix --- tools/testing/selftests/bpf/config | 2 + .../selftests/bpf/prog_tests/xfrm_info.c | 365 ++++++++++++++++++ tools/testing/selftests/bpf/progs/xfrm_info.c | 40 ++ 3 files changed, 407 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/xfrm_info.c create mode 100644 tools/testing/selftests/bpf/progs/xfrm_info.c
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index f9034ea00bc9..3543c76cef56 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -23,6 +23,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_IMA=y CONFIG_IMA_READ_POLICY=y CONFIG_IMA_WRITE_POLICY=y +CONFIG_INET_ESP=y CONFIG_IP_NF_FILTER=y CONFIG_IP_NF_RAW=y CONFIG_IP_NF_TARGET_SYNPROXY=y @@ -74,3 +75,4 @@ CONFIG_TEST_BPF=y CONFIG_USERFAULTFD=y CONFIG_VXLAN=y CONFIG_XDP_SOCKETS=y +CONFIG_XFRM_INTERFACE=y diff --git a/tools/testing/selftests/bpf/prog_tests/xfrm_info.c b/tools/testing/selftests/bpf/prog_tests/xfrm_info.c new file mode 100644 index 000000000000..bc5badaeb7fb --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xfrm_info.c @@ -0,0 +1,365 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +/* + * Topology: + * --------- + * NS0 namespace | NS1 namespace | NS2 namespace + * | | + * +---------------+ | +---------------+ | + * | ipsec0 |---------| ipsec0 | | + * | 192.168.1.100 | | | 192.168.1.200 | | + * | if_id: bpf | | +---------------+ | + * +---------------+ | | + * | | | +---------------+ + * | | | | ipsec0 | + * ------------------------------------------| 192.168.1.200 | + * | | +---------------+ + * | | + * | | (overlay network) + * ------------------------------------------------------ + * | | (underlay network) + * +--------------+ | +--------------+ | + * | veth01 |----------| veth10 | | + * | 172.16.1.100 | | | 172.16.1.200 | | + * ---------------+ | +--------------+ | + * | | + * +--------------+ | | +--------------+ + * | veth02 |-----------------------------------| veth20 | + * | 172.16.2.100 | | | | 172.16.2.200 | + * +--------------+ | | +--------------+ + * + * + * Test Packet flow + * ----------- + * The tests perform 'ping 192.168.1.200' from the NS0 namespace: + * 1) request is routed to NS0 ipsec0 + * 2) NS0 ipsec0 tc egress BPF program is triggered and sets the if_id based + * on a map value. This makes the ipsec0 device in external mode select the + * destination tunnel + * 3) ping reaches the other namespace (NS1 or NS2 based on which if_id was + * used) and response is sent + * 4) response is received on NS0 ipsec0, tc ingress program is triggered and + * records the response if_id in the map + * 5) requested if_id is compared with received if_id + */ + +#include <net/if.h> +#include <linux/rtnetlink.h> +#include <linux/if_link.h> + +#include "test_progs.h" +#include "network_helpers.h" +#include "xfrm_info.skel.h" + +#define NS0 "xfrm_test_ns0" +#define NS1 "xfrm_test_ns1" +#define NS2 "xfrm_test_ns2" + +#define IF_ID_0_TO_1 1 +#define IF_ID_0_TO_2 2 +#define IF_ID_1 3 +#define IF_ID_2 4 + +#define IP4_ADDR_VETH01 "172.16.1.100" +#define IP4_ADDR_VETH10 "172.16.1.200" +#define IP4_ADDR_VETH02 "172.16.2.100" +#define IP4_ADDR_VETH20 "172.16.2.200" + +#define ESP_DUMMY_PARAMS \ + "proto esp aead 'rfc4106(gcm(aes))' " \ + "0xe4d8f4b4da1df18a3510b3781496daa82488b713 128 mode tunnel " + +#define PING_ARGS "-i 0.01 -c 3 -w 10 -q" + +#define SYS(fmt, ...) \ + ({ \ + char cmd[1024]; \ + snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ + if (!ASSERT_OK(system(cmd), cmd)) \ + goto fail; \ + }) + +#define SYS_NOFAIL(fmt, ...) \ + ({ \ + char cmd[1024]; \ + snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ + system(cmd); \ + }) + +static int attach_tc_prog(struct bpf_tc_hook *hook, int igr_fd, int egr_fd) +{ + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1, + .priority = 1, .prog_fd = igr_fd); + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1, + .priority = 1, .prog_fd = egr_fd); + int ret; + + ret = bpf_tc_hook_create(hook); + if (!ASSERT_OK(ret, "create tc hook")) + return ret; + + if (igr_fd >= 0) { + hook->attach_point = BPF_TC_INGRESS; + ret = bpf_tc_attach(hook, &opts1); + if (!ASSERT_OK(ret, "bpf_tc_attach")) { + bpf_tc_hook_destroy(hook); + return ret; + } + } + + if (egr_fd >= 0) { + hook->attach_point = BPF_TC_EGRESS; + ret = bpf_tc_attach(hook, &opts2); + if (!ASSERT_OK(ret, "bpf_tc_attach")) { + bpf_tc_hook_destroy(hook); + return ret; + } + } + + return 0; +} + +static void cleanup(void) +{ + SYS_NOFAIL("test -f /var/run/netns/" NS0 " && ip netns delete " NS0); + SYS_NOFAIL("test -f /var/run/netns/" NS1 " && ip netns delete " NS1); + SYS_NOFAIL("test -f /var/run/netns/" NS2 " && ip netns delete " NS2); +} + +static int config_underlay(void) +{ + SYS("ip netns add " NS0); + SYS("ip netns add " NS1); + SYS("ip netns add " NS2); + + /* NS0 <-> NS1 [veth01 <-> veth10] */ + SYS("ip link add veth01 netns " NS0 " type veth peer name veth10 netns " NS1); + SYS("ip -net " NS0 " addr add " IP4_ADDR_VETH01 "/24 dev veth01"); + SYS("ip -net " NS0 " link set dev veth01 up"); + SYS("ip -net " NS1 " addr add " IP4_ADDR_VETH10 "/24 dev veth10"); + SYS("ip -net " NS1 " link set dev veth10 up"); + + /* NS0 <-> NS2 [veth02 <-> veth20] */ + SYS("ip link add veth02 netns " NS0 " type veth peer name veth20 netns " NS2); + SYS("ip -net " NS0 " addr add " IP4_ADDR_VETH02 "/24 dev veth02"); + SYS("ip -net " NS0 " link set dev veth02 up"); + SYS("ip -net " NS2 " addr add " IP4_ADDR_VETH20 "/24 dev veth20"); + SYS("ip -net " NS2 " link set dev veth20 up"); + + return 0; +fail: + return -1; +} + +static int setup_xfrm_tunnel_ns(const char *ns, const char *ipv4_local, + const char *ipv4_remote, int if_id) +{ + /* State: local -> remote */ + SYS("ip -net %s xfrm state add src %s dst %s spi 1 " + ESP_DUMMY_PARAMS "if_id %d", ns, ipv4_local, ipv4_remote, if_id); + + /* State: local <- remote */ + SYS("ip -net %s xfrm state add src %s dst %s spi 1 " + ESP_DUMMY_PARAMS "if_id %d", ns, ipv4_remote, ipv4_local, if_id); + + /* Policy: local -> remote */ + SYS("ip -net %s xfrm policy add dir out src 0.0.0.0/0 dst 0.0.0.0/0 " + "if_id %d tmpl src %s dst %s proto esp mode tunnel if_id %d", ns, + if_id, ipv4_local, ipv4_remote, if_id); + + /* Policy: local <- remote */ + SYS("ip -net %s xfrm policy add dir in src 0.0.0.0/0 dst 0.0.0.0/0 " + "if_id %d tmpl src %s dst %s proto esp mode tunnel if_id %d", ns, + if_id, ipv4_remote, ipv4_local, if_id); + + return 0; +fail: + return -1; +} + +static int setup_xfrm_tunnel(const char *ns_a, const char *ns_b, + const char *ipv4_a, const char *ipv4_b, + int if_id_a, int if_id_b) +{ + return setup_xfrm_tunnel_ns(ns_a, ipv4_a, ipv4_b, if_id_a) || + setup_xfrm_tunnel_ns(ns_b, ipv4_b, ipv4_a, if_id_b); +} + +static struct rtattr *rtattr_add(struct nlmsghdr *nh, unsigned short type, + unsigned short len) +{ + struct rtattr *rta = + (struct rtattr *)((uint8_t *)nh + RTA_ALIGN(nh->nlmsg_len)); + rta->rta_type = type; + rta->rta_len = RTA_LENGTH(len); + nh->nlmsg_len = RTA_ALIGN(nh->nlmsg_len) + RTA_ALIGN(rta->rta_len); + return rta; +} + +static struct rtattr *rtattr_add_str(struct nlmsghdr *nh, unsigned short type, + const char *s) +{ + struct rtattr *rta = rtattr_add(nh, type, strlen(s)); + + memcpy(RTA_DATA(rta), s, strlen(s)); + return rta; +} + +static struct rtattr *rtattr_begin(struct nlmsghdr *nh, unsigned short type) +{ + return rtattr_add(nh, type, 0); +} + +static void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr) +{ + uint8_t *end = (uint8_t *)nh + nh->nlmsg_len; + + attr->rta_len = end - (uint8_t *)attr; +} + +static int setup_xfrmi_external_dev(const char *ns) +{ + struct { + struct nlmsghdr nh; + struct ifinfomsg info; + unsigned char data[128]; + } req; + struct rtattr *link_info, *info_data; + struct nstoken *nstoken; + int ret = -1, sock = 0; + struct nlmsghdr *nh; + + memset(&req, 0, sizeof(req)); + nh = &req.nh; + nh->nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + nh->nlmsg_type = RTM_NEWLINK; + nh->nlmsg_flags |= NLM_F_CREATE | NLM_F_REQUEST; + + rtattr_add_str(nh, IFLA_IFNAME, "ipsec0"); + link_info = rtattr_begin(nh, IFLA_LINKINFO); + rtattr_add_str(nh, IFLA_INFO_KIND, "xfrm"); + info_data = rtattr_begin(nh, IFLA_INFO_DATA); + rtattr_add(nh, IFLA_XFRM_COLLECT_METADATA, 0); + rtattr_end(nh, info_data); + rtattr_end(nh, link_info); + + nstoken = open_netns(ns); + + sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE); + if (!ASSERT_GT(sock, 0, "netlink socket")) + goto Exit; + ret = send(sock, nh, nh->nlmsg_len, 0); + if (!ASSERT_EQ(ret, nh->nlmsg_len, "netlink send length")) + goto Exit; + + ret = 0; +Exit: + if (sock) + close(sock); + close_netns(nstoken); + return ret; +} + +static int config_overlay(void) +{ + if (setup_xfrm_tunnel(NS0, NS1, IP4_ADDR_VETH01, IP4_ADDR_VETH10, + IF_ID_0_TO_1, IF_ID_1)) + goto fail; + if (setup_xfrm_tunnel(NS0, NS2, IP4_ADDR_VETH02, IP4_ADDR_VETH20, + IF_ID_0_TO_2, IF_ID_2)) + goto fail; + + /* Older iproute2 doesn't support this option */ + if (!ASSERT_OK(setup_xfrmi_external_dev(NS0), "xfrmi")) + goto fail; + + SYS("ip -net " NS0 " addr add 192.168.1.100/24 dev ipsec0"); + SYS("ip -net " NS0 " link set dev ipsec0 up"); + + SYS("ip -net " NS1 " link add ipsec0 type xfrm if_id %d", IF_ID_1); + SYS("ip -net " NS1 " addr add 192.168.1.200/24 dev ipsec0"); + SYS("ip -net " NS1 " link set dev ipsec0 up"); + + SYS("ip -net " NS2 " link add ipsec0 type xfrm if_id %d", IF_ID_2); + SYS("ip -net " NS2 " addr add 192.168.1.200/24 dev ipsec0"); + SYS("ip -net " NS2 " link set dev ipsec0 up"); + + return 0; +fail: + return -1; +} + +static int test_ping(int family, const char *addr) +{ + SYS("%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr); + return 0; +fail: + return -1; +} + +static int test_xfrm_ping(struct xfrm_info *skel, u32 if_id) +{ + skel->bss->req_if_id = if_id; + + if (test_ping(AF_INET, "192.168.1.200")) + return -1; + + if (!ASSERT_EQ(skel->bss->resp_if_id, if_id, "if_id")) + return -1; + + return 0; +} + +static void _test_xfrm_info(void) +{ + int get_xfrm_info_prog_fd, set_xfrm_info_prog_fd; + struct xfrm_info *skel = NULL; + struct nstoken *nstoken = NULL; + int ifindex = -1; + DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, + .attach_point = BPF_TC_INGRESS); + + /* load and attach bpf progs to ipsec dev tc hook point */ + skel = xfrm_info__open_and_load(); + if (!ASSERT_OK_PTR(skel, "xfrm_info__open_and_load")) + goto done; + nstoken = open_netns(NS0); + ifindex = if_nametoindex("ipsec0"); + if (!ASSERT_NEQ(ifindex, 0, "ipsec0 ifindex")) + goto done; + tc_hook.ifindex = ifindex; + set_xfrm_info_prog_fd = bpf_program__fd(skel->progs.set_xfrm_info); + get_xfrm_info_prog_fd = bpf_program__fd(skel->progs.get_xfrm_info); + if (!ASSERT_GE(set_xfrm_info_prog_fd, 0, "bpf_program__fd")) + goto done; + if (!ASSERT_GE(get_xfrm_info_prog_fd, 0, "bpf_program__fd")) + goto done; + if (attach_tc_prog(&tc_hook, get_xfrm_info_prog_fd, + set_xfrm_info_prog_fd)) + goto done; + if (!ASSERT_EQ(test_xfrm_ping(skel, IF_ID_0_TO_1), 0, "ping " NS1)) + goto done; + if (!ASSERT_EQ(test_xfrm_ping(skel, IF_ID_0_TO_2), 0, "ping " NS2)) + goto done; + +done: + if (nstoken) + close_netns(nstoken); + if (skel) + xfrm_info__destroy(skel); +} + +void test_xfrm_info(void) +{ + cleanup(); + + if (!ASSERT_OK(config_underlay(), "config_underlay")) + return; + if (!ASSERT_OK(config_overlay(), "config_overlay")) + return; + + if (test__start_subtest("xfrm_info")) + _test_xfrm_info(); + + cleanup(); +} diff --git a/tools/testing/selftests/bpf/progs/xfrm_info.c b/tools/testing/selftests/bpf/progs/xfrm_info.c new file mode 100644 index 000000000000..908579310bf9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/xfrm_info.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <linux/pkt_cls.h> +#include <bpf/bpf_helpers.h> + +__u32 req_if_id; +__u32 resp_if_id; + +struct bpf_xfrm_info { + __u32 if_id; + int link; +} __attribute__((preserve_access_index)); + +int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx, + const struct bpf_xfrm_info *from) __ksym; +int bpf_skb_get_xfrm_info(struct __sk_buff *skb_ctx, + struct bpf_xfrm_info *to) __ksym; + +SEC("tc") +int set_xfrm_info(struct __sk_buff *skb) +{ + struct bpf_xfrm_info info = { .if_id = req_if_id }; + + return bpf_skb_set_xfrm_info(skb, &info) ? TC_ACT_SHOT : TC_ACT_UNSPEC; +} + +SEC("tc") +int get_xfrm_info(struct __sk_buff *skb) +{ + struct bpf_xfrm_info info = {}; + + if (bpf_skb_get_xfrm_info(skb, &info) < 0) + return TC_ACT_SHOT; + + resp_if_id = info.if_id; + + return TC_ACT_UNSPEC; +} + +char _license[] SEC("license") = "GPL";
On 12/1/22 1:14 PM, Eyal Birger wrote:
Test the xfrm_info kfunc helpers.
Note: the tests require support for xfrmi "external" mode in iproute2.
Not needed now. Please update the commit message.
The test is failing on platform that has no kfunc support yet. Please check the BPF CI result in patchwork after posting to ensure the tests run well: https://patchwork.kernel.org/project/netdevbpf/patch/20221201211425.1528197-...
This test needs to be added to the DENYLIST.<arch> for the not yet supported platform. Please refer to selftests/bpf/README.rst for details.
[ ... ]
+#define SYS_NOFAIL(fmt, ...) \
- ({ \
char cmd[1024]; \
snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
system(cmd); \
- })
+static int attach_tc_prog(struct bpf_tc_hook *hook, int igr_fd, int egr_fd) +{
- DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1,
.priority = 1, .prog_fd = igr_fd);
- DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1,
.priority = 1, .prog_fd = egr_fd);
s/DECLARE_LIBBPF_OPTS/LIBBPF_OPTS/
DECLARE_ is a legacy naming in libbpf_legacy.h
[ ... ]
+static int setup_xfrmi_external_dev(const char *ns) +{
- struct {
struct nlmsghdr nh;
struct ifinfomsg info;
unsigned char data[128];
- } req;
- struct rtattr *link_info, *info_data;
- struct nstoken *nstoken;
- int ret = -1, sock = 0;
sock = -1;
- struct nlmsghdr *nh;
- memset(&req, 0, sizeof(req));
- nh = &req.nh;
- nh->nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
- nh->nlmsg_type = RTM_NEWLINK;
- nh->nlmsg_flags |= NLM_F_CREATE | NLM_F_REQUEST;
- rtattr_add_str(nh, IFLA_IFNAME, "ipsec0");
- link_info = rtattr_begin(nh, IFLA_LINKINFO);
- rtattr_add_str(nh, IFLA_INFO_KIND, "xfrm");
- info_data = rtattr_begin(nh, IFLA_INFO_DATA);
- rtattr_add(nh, IFLA_XFRM_COLLECT_METADATA, 0);
- rtattr_end(nh, info_data);
- rtattr_end(nh, link_info);
- nstoken = open_netns(ns);
Please check error.
- sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
- if (!ASSERT_GT(sock, 0, "netlink socket"))
s/_GT/_GE/
goto Exit;
Please run checkpatch.pl...
CHECK: Avoid CamelCase: <Exit> #301: FILE: tools/testing/selftests/bpf/prog_tests/xfrm_info.c:250: + goto Exit;
- ret = send(sock, nh, nh->nlmsg_len, 0);
- if (!ASSERT_EQ(ret, nh->nlmsg_len, "netlink send length"))
goto Exit;
- ret = 0;
+Exit:
- if (sock)
if (sock != -1) ...
close(sock);
- close_netns(nstoken);
- return ret;
+}
+static int config_overlay(void) +{
- if (setup_xfrm_tunnel(NS0, NS1, IP4_ADDR_VETH01, IP4_ADDR_VETH10,
IF_ID_0_TO_1, IF_ID_1))
goto fail;
- if (setup_xfrm_tunnel(NS0, NS2, IP4_ADDR_VETH02, IP4_ADDR_VETH20,
IF_ID_0_TO_2, IF_ID_2))
goto fail;
- /* Older iproute2 doesn't support this option */
- if (!ASSERT_OK(setup_xfrmi_external_dev(NS0), "xfrmi"))
goto fail;
- SYS("ip -net " NS0 " addr add 192.168.1.100/24 dev ipsec0");
- SYS("ip -net " NS0 " link set dev ipsec0 up");
- SYS("ip -net " NS1 " link add ipsec0 type xfrm if_id %d", IF_ID_1);
- SYS("ip -net " NS1 " addr add 192.168.1.200/24 dev ipsec0");
- SYS("ip -net " NS1 " link set dev ipsec0 up");
- SYS("ip -net " NS2 " link add ipsec0 type xfrm if_id %d", IF_ID_2);
- SYS("ip -net " NS2 " addr add 192.168.1.200/24 dev ipsec0");
- SYS("ip -net " NS2 " link set dev ipsec0 up");
- return 0;
+fail:
- return -1;
+}
+static int test_ping(int family, const char *addr) +{
- SYS("%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr);
- return 0;
+fail:
- return -1;
+}
+static int test_xfrm_ping(struct xfrm_info *skel, u32 if_id) +{
- skel->bss->req_if_id = if_id;
- if (test_ping(AF_INET, "192.168.1.200"))
nit. Directly do SYS() here to avoid another reading detour to test_ping() which is almost a one liner and only used once here.
return -1;
- if (!ASSERT_EQ(skel->bss->resp_if_id, if_id, "if_id"))
return -1;
- return 0;
+}
+static void _test_xfrm_info(void) +{
- int get_xfrm_info_prog_fd, set_xfrm_info_prog_fd;
- struct xfrm_info *skel = NULL;
- struct nstoken *nstoken = NULL;
- int ifindex = -1;
nit. Unnecessary init. Nonthing to cleanup at the label "done:" and it is not a return value also. will be easier to review.
- DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
.attach_point = BPF_TC_INGRESS);
- /* load and attach bpf progs to ipsec dev tc hook point */
- skel = xfrm_info__open_and_load();
- if (!ASSERT_OK_PTR(skel, "xfrm_info__open_and_load"))
goto done;
- nstoken = open_netns(NS0);
Check error.
- ifindex = if_nametoindex("ipsec0");
- if (!ASSERT_NEQ(ifindex, 0, "ipsec0 ifindex"))
goto done;
- tc_hook.ifindex = ifindex;
- set_xfrm_info_prog_fd = bpf_program__fd(skel->progs.set_xfrm_info);
- get_xfrm_info_prog_fd = bpf_program__fd(skel->progs.get_xfrm_info);
- if (!ASSERT_GE(set_xfrm_info_prog_fd, 0, "bpf_program__fd"))
goto done;
- if (!ASSERT_GE(get_xfrm_info_prog_fd, 0, "bpf_program__fd"))
goto done;
- if (attach_tc_prog(&tc_hook, get_xfrm_info_prog_fd,
set_xfrm_info_prog_fd))
goto done;
- if (!ASSERT_EQ(test_xfrm_ping(skel, IF_ID_0_TO_1), 0, "ping " NS1))
goto done;
- if (!ASSERT_EQ(test_xfrm_ping(skel, IF_ID_0_TO_2), 0, "ping " NS2))
goto done;
+done:
- if (nstoken)
close_netns(nstoken);
- if (skel)
xfrm_info__destroy(skel);
+}
+void test_xfrm_info(void) +{
- cleanup();
- if (!ASSERT_OK(config_underlay(), "config_underlay"))
cleanup() is needed on error.
return;
- if (!ASSERT_OK(config_overlay(), "config_overlay"))
Same here.
return;
- if (test__start_subtest("xfrm_info"))
_test_xfrm_info();
- cleanup();
+} diff --git a/tools/testing/selftests/bpf/progs/xfrm_info.c b/tools/testing/selftests/bpf/progs/xfrm_info.c new file mode 100644 index 000000000000..908579310bf9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/xfrm_info.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <linux/pkt_cls.h> +#include <bpf/bpf_helpers.h>
+__u32 req_if_id; +__u32 resp_if_id;
+struct bpf_xfrm_info {
- __u32 if_id;
- int link;
+} __attribute__((preserve_access_index));
+int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx,
const struct bpf_xfrm_info *from) __ksym;
+int bpf_skb_get_xfrm_info(struct __sk_buff *skb_ctx,
struct bpf_xfrm_info *to) __ksym;
+SEC("tc") +int set_xfrm_info(struct __sk_buff *skb) +{
- struct bpf_xfrm_info info = { .if_id = req_if_id };
- return bpf_skb_set_xfrm_info(skb, &info) ? TC_ACT_SHOT : TC_ACT_UNSPEC;
Add these TC_ACT_* to bpf_tracing_net.h and then vmlinux.h can be used. Take a look at some of the bpf_tracing_net.h in selftests.
+}
+SEC("tc") +int get_xfrm_info(struct __sk_buff *skb) +{
- struct bpf_xfrm_info info = {};
- if (bpf_skb_get_xfrm_info(skb, &info) < 0)
return TC_ACT_SHOT;
- resp_if_id = info.if_id;
- return TC_ACT_UNSPEC;
+}
+char _license[] SEC("license") = "GPL";
linux-kselftest-mirror@lists.linaro.org