This patch series add tests to validate XDP native support for PASS, DROP, ABORT, and TX actions, as well as headroom and tailroom adjustment. For adjustment tests, validate support for both the extension and shrinking cases across various packet sizes and offset values.
The pass criteria for head/tail adjustment tests require that at-least one adjustment value works for at-least one packet size. This ensure that the variability in maximum supported head/tail adjustment offset across different drivers is being incorporated.
The results reported in this series are based on netdevsim. However, the series is tested against multiple other drivers including fbnic.
Note: The XDP support for fbnic will be added later. --- Change-log: - Force checksum computation in netdevsim xdp hook - Update checksum when updating packet for head/tail adjustment cases - Use 1 as the minimum value for the data growth while adjusting tail
V5: https://lore.kernel.org/netdev/20250715210553.1568963-6-mohsin.bashr@gmail.c... V4: https://lore.kernel.org/netdev/20250714210352.1115230-1-mohsin.bashr@gmail.c... V3: https://lore.kernel.org/netdev/20250712002648.2385849-1-mohsin.bashr@gmail.c... V2: https://lore.kernel.org/netdev/20250710184351.63797-1-mohsin.bashr@gmail.com V1: https://lore.kernel.org/netdev/20250709173707.3177206-1-mohsin.bashr@gmail.c...
Jakub Kicinski (1): net: netdevsim: hook in XDP handling
Mohsin Bashir (4): selftests: drv-net: Test XDP_PASS/DROP support selftests: drv-net: Test XDP_TX support selftests: drv-net: Test tail-adjustment support selftests: drv-net: Test head-adjustment support
drivers/net/netdevsim/netdev.c | 21 +- tools/testing/selftests/drivers/net/Makefile | 1 + tools/testing/selftests/drivers/net/xdp.py | 656 ++++++++++++++++++ .../selftests/net/lib/xdp_native.bpf.c | 621 +++++++++++++++++ 4 files changed, 1298 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/drivers/net/xdp.py create mode 100644 tools/testing/selftests/net/lib/xdp_native.bpf.c
From: Jakub Kicinski kuba@kernel.org
Add basic XDP support by hooking in do_xdp_generic(). This should be enough to validate most basic XDP tests.
Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Mohsin Bashir mohsin.bashr@gmail.com --- drivers/net/netdevsim/netdev.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-)
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 611e7f65291c..a7628f5c09af 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -387,15 +387,34 @@ static int nsim_get_iflink(const struct net_device *dev) static int nsim_rcv(struct nsim_rq *rq, int budget) { struct net_device *dev = rq->napi.dev; + struct bpf_prog *xdp_prog; + struct netdevsim *ns; struct sk_buff *skb; unsigned int skblen; int i, ret;
+ ns = netdev_priv(dev); + xdp_prog = READ_ONCE(ns->xdp.prog); + for (i = 0; i < budget; i++) { if (skb_queue_empty(&rq->skb_queue)) break;
skb = skb_dequeue(&rq->skb_queue); + + if (xdp_prog) { + /* skb might be freed directly by XDP, save the len */ + skblen = skb->len; + + if (skb->ip_summed == CHECKSUM_PARTIAL) + skb_checksum_help(skb); + ret = do_xdp_generic(xdp_prog, &skb); + if (ret != XDP_PASS) { + dev_dstats_rx_add(dev, skblen); + continue; + } + } + /* skb might be discard at netif_receive_skb, save the len */ skblen = skb->len; skb_mark_napi_id(skb, &rq->napi); @@ -936,7 +955,7 @@ static void nsim_setup(struct net_device *dev) NETIF_F_TSO; dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS; dev->max_mtu = ETH_MAX_MTU; - dev->xdp_features = NETDEV_XDP_ACT_HW_OFFLOAD; + dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_HW_OFFLOAD; }
static int nsim_queue_init(struct netdevsim *ns)
Test XDP_PASS/DROP in single buffer and multi buffer mode when XDP native support is available.
./drivers/net/xdp.py TAP version 13 1..4 ok 1 xdp.test_xdp_native_pass_sb ok 2 xdp.test_xdp_native_pass_mb ok 3 xdp.test_xdp_native_drop_sb ok 4 xdp.test_xdp_native_drop_mb # Totals: pass:4 fail:0 xfail:0 xpass:0 skip:0 error:0
Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Mohsin Bashir mohsin.bashr@gmail.com --- tools/testing/selftests/drivers/net/Makefile | 1 + tools/testing/selftests/drivers/net/xdp.py | 303 ++++++++++++++++++ .../selftests/net/lib/xdp_native.bpf.c | 158 +++++++++ 3 files changed, 462 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/xdp.py create mode 100644 tools/testing/selftests/net/lib/xdp_native.bpf.c
diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 9bd84d6b542e..3556f3563e08 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -22,6 +22,7 @@ TEST_PROGS := \ stats.py \ shaper.py \ hds.py \ + xdp.py \ # end of TEST_PROGS
include ../../lib.mk diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py new file mode 100755 index 000000000000..8ab1607edcd8 --- /dev/null +++ b/tools/testing/selftests/drivers/net/xdp.py @@ -0,0 +1,303 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +This file contains tests to verify native XDP support in network drivers. +The tests utilize the BPF program `xdp_native.bpf.o` from the `selftests.net.lib` +directory, with each test focusing on a specific aspect of XDP functionality. +""" +import random +import string +from dataclasses import dataclass +from enum import Enum + +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ne +from lib.py import KsftFailEx, NetDrvEpEnv +from lib.py import bkg, cmd, rand_port +from lib.py import ip, bpftool, defer + + +class TestConfig(Enum): + """Enum for XDP configuration options.""" + MODE = 0 # Configures the BPF program for a specific test + PORT = 1 # Port configuration to communicate with the remote host + + +class XDPAction(Enum): + """Enum for XDP actions.""" + PASS = 0 # Pass the packet up to the stack + DROP = 1 # Drop the packet + + +class XDPStats(Enum): + """Enum for XDP statistics.""" + RX = 0 # Count of valid packets received for testing + PASS = 1 # Count of packets passed up to the stack + DROP = 2 # Count of packets dropped + + +@dataclass +class BPFProgInfo: + """Data class to store information about a BPF program.""" + name: str # Name of the BPF program + file: str # BPF program object file + xdp_sec: str = "xdp" # XDP section name (e.g., "xdp" or "xdp.frags") + mtu: int = 1500 # Maximum Transmission Unit, default is 1500 + + +def _exchg_udp(cfg, port, test_string): + """ + Exchanges UDP packets between a local and remote host using the socat tool. + + Args: + cfg: Configuration object containing network settings. + port: Port number to use for the UDP communication. + test_string: String that the remote host will send. + + Returns: + The string received by the test host. + """ + cfg.require_cmd("socat", remote=True) + + rx_udp_cmd = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport STDOUT" + tx_udp_cmd = f"echo -n {test_string} | socat -t 2 -u STDIN UDP:{cfg.baddr}:{port}" + + with bkg(rx_udp_cmd, exit_wait=True) as nc: + cmd(tx_udp_cmd, host=cfg.remote, shell=True) + + return nc.stdout.strip() + + +def _test_udp(cfg, port, size=256): + """ + Tests UDP packet exchange between a local and remote host. + + Args: + cfg: Configuration object containing network settings. + port: Port number to use for the UDP communication. + size: The length of the test string to be exchanged, default is 256 characters. + + Returns: + bool: True if the received string matches the sent string, False otherwise. + """ + test_str = "".join(random.choice(string.ascii_lowercase) for _ in range(size)) + recvd_str = _exchg_udp(cfg, port, test_str) + + return recvd_str == test_str + + +def _load_xdp_prog(cfg, bpf_info): + """ + Loads an XDP program onto a network interface. + + Args: + cfg: Configuration object containing network settings. + bpf_info: BPFProgInfo object containing information about the BPF program. + + Returns: + dict: A dictionary containing the XDP program ID, name, and associated map IDs. + """ + abs_path = cfg.net_lib_dir / bpf_info.file + prog_info = {} + + cmd(f"ip link set dev {cfg.remote_ifname} mtu {bpf_info.mtu}", shell=True, host=cfg.remote) + defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote) + + cmd( + f"ip link set dev {cfg.ifname} mtu {bpf_info.mtu} xdp obj {abs_path} sec {bpf_info.xdp_sec}", + shell=True + ) + defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off") + + xdp_info = ip(f"-d link show dev {cfg.ifname}", json=True)[0] + prog_info["id"] = xdp_info["xdp"]["prog"]["id"] + prog_info["name"] = xdp_info["xdp"]["prog"]["name"] + prog_id = prog_info["id"] + + map_ids = bpftool(f"prog show id {prog_id}", json=True)["map_ids"] + prog_info["maps"] = {} + for map_id in map_ids: + name = bpftool(f"map show id {map_id}", json=True)["name"] + prog_info["maps"][name] = map_id + + return prog_info + + +def format_hex_bytes(value): + """ + Helper function that converts an integer into a formatted hexadecimal byte string. + + Args: + value: An integer representing the number to be converted. + + Returns: + A string representing hexadecimal equivalent of value, with bytes separated by spaces. + """ + hex_str = value.to_bytes(4, byteorder='little', signed=True) + return ' '.join(f'{byte:02x}' for byte in hex_str) + + +def _set_xdp_map(map_name, key, value): + """ + Updates an XDP map with a given key-value pair using bpftool. + + Args: + map_name: The name of the XDP map to update. + key: The key to update in the map, formatted as a hexadecimal string. + value: The value to associate with the key, formatted as a hexadecimal string. + """ + key_formatted = format_hex_bytes(key) + value_formatted = format_hex_bytes(value) + bpftool( + f"map update name {map_name} key hex {key_formatted} value hex {value_formatted}" + ) + + +def _get_stats(xdp_map_id): + """ + Retrieves and formats statistics from an XDP map. + + Args: + xdp_map_id: The ID of the XDP map from which to retrieve statistics. + + Returns: + A dictionary containing formatted packet statistics for various XDP actions. + The keys are based on the XDPStats Enum values. + + Raises: + KsftFailEx: If the stats retrieval fails. + """ + stats_dump = bpftool(f"map dump id {xdp_map_id}", json=True) + if not stats_dump: + raise KsftFailEx(f"Failed to get stats for map {xdp_map_id}") + + stats_formatted = {} + for key in range(0, 4): + val = stats_dump[key]["formatted"]["value"] + if stats_dump[key]["formatted"]["key"] == XDPStats.RX.value: + stats_formatted[XDPStats.RX.value] = val + elif stats_dump[key]["formatted"]["key"] == XDPStats.PASS.value: + stats_formatted[XDPStats.PASS.value] = val + elif stats_dump[key]["formatted"]["key"] == XDPStats.DROP.value: + stats_formatted[XDPStats.DROP.value] = val + + return stats_formatted + + +def _test_pass(cfg, bpf_info, msg_sz): + """ + Tests the XDP_PASS action by exchanging UDP packets. + + Args: + cfg: Configuration object containing network settings. + bpf_info: BPFProgInfo object containing information about the BPF program. + msg_sz: Size of the test message to send. + """ + + prog_info = _load_xdp_prog(cfg, bpf_info) + port = rand_port() + + _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.PASS.value) + _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) + + ksft_eq(_test_udp(cfg, port, msg_sz), True, "UDP packet exchange failed") + stats = _get_stats(prog_info["maps"]["map_xdp_stats"]) + + ksft_ne(stats[XDPStats.RX.value], 0, "RX stats should not be zero") + ksft_eq(stats[XDPStats.RX.value], stats[XDPStats.PASS.value], "RX and PASS stats mismatch") + + +def test_xdp_native_pass_sb(cfg): + """ + Tests the XDP_PASS action for single buffer case. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500) + + _test_pass(cfg, bpf_info, 256) + + +def test_xdp_native_pass_mb(cfg): + """ + Tests the XDP_PASS action for a multi-buff size. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000) + + _test_pass(cfg, bpf_info, 8000) + + +def _test_drop(cfg, bpf_info, msg_sz): + """ + Tests the XDP_DROP action by exchanging UDP packets. + + Args: + cfg: Configuration object containing network settings. + bpf_info: BPFProgInfo object containing information about the BPF program. + msg_sz: Size of the test message to send. + """ + + prog_info = _load_xdp_prog(cfg, bpf_info) + port = rand_port() + + _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.DROP.value) + _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) + + ksft_eq(_test_udp(cfg, port, msg_sz), False, "UDP packet exchange should fail") + stats = _get_stats(prog_info["maps"]["map_xdp_stats"]) + + ksft_ne(stats[XDPStats.RX.value], 0, "RX stats should be zero") + ksft_eq(stats[XDPStats.RX.value], stats[XDPStats.DROP.value], "RX and DROP stats mismatch") + + +def test_xdp_native_drop_sb(cfg): + """ + Tests the XDP_DROP action for a signle-buff case. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500) + + _test_drop(cfg, bpf_info, 256) + + +def test_xdp_native_drop_mb(cfg): + """ + Tests the XDP_DROP action for a multi-buff case. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000) + + _test_drop(cfg, bpf_info, 8000) + + +def main(): + """ + Main function to execute the XDP tests. + + This function runs a series of tests to validate the XDP support for + both the single and multi-buffer. It uses the NetDrvEpEnv context + manager to manage the network driver environment and the ksft_run + function to execute the tests. + """ + with NetDrvEpEnv(__file__) as cfg: + ksft_run( + [ + test_xdp_native_pass_sb, + test_xdp_native_pass_mb, + test_xdp_native_drop_sb, + test_xdp_native_drop_mb, + ], + args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c new file mode 100644 index 000000000000..90b34b2a4fef --- /dev/null +++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stddef.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/udp.h> +#include <bpf/bpf_endian.h> +#include <bpf/bpf_helpers.h> + +enum { + XDP_MODE = 0, + XDP_PORT = 1, +} xdp_map_setup_keys; + +enum { + XDP_MODE_PASS = 0, + XDP_MODE_DROP = 1, +} xdp_map_modes; + +enum { + STATS_RX = 0, + STATS_PASS = 1, + STATS_DROP = 2, +} xdp_stats; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 2); + __type(key, __u32); + __type(value, __s32); +} map_xdp_setup SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 4); + __type(key, __u32); + __type(value, __u64); +} map_xdp_stats SEC(".maps"); + +static void record_stats(struct xdp_md *ctx, __u32 stat_type) +{ + __u64 *count; + + count = bpf_map_lookup_elem(&map_xdp_stats, &stat_type); + + if (count) + __sync_fetch_and_add(count, 1); +} + +static struct udphdr *filter_udphdr(struct xdp_md *ctx, __u16 port) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct udphdr *udph = NULL; + struct ethhdr *eth = data; + + if (data + sizeof(*eth) > data_end) + return NULL; + + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph = data + sizeof(*eth); + + if (iph + 1 > (struct iphdr *)data_end || + iph->protocol != IPPROTO_UDP) + return NULL; + + udph = (void *)eth + sizeof(*iph) + sizeof(*eth); + } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + struct ipv6hdr *ipv6h = data + sizeof(*eth); + + if (ipv6h + 1 > (struct ipv6hdr *)data_end || + ipv6h->nexthdr != IPPROTO_UDP) + return NULL; + + udph = (void *)eth + sizeof(*ipv6h) + sizeof(*eth); + } else { + return NULL; + } + + if (udph + 1 > (struct udphdr *)data_end) + return NULL; + + if (udph->dest != bpf_htons(port)) + return NULL; + + record_stats(ctx, STATS_RX); + + return udph; +} + +static int xdp_mode_pass(struct xdp_md *ctx, __u16 port) +{ + struct udphdr *udph = NULL; + + udph = filter_udphdr(ctx, port); + if (!udph) + return XDP_PASS; + + record_stats(ctx, STATS_PASS); + + return XDP_PASS; +} + +static int xdp_mode_drop_handler(struct xdp_md *ctx, __u16 port) +{ + struct udphdr *udph = NULL; + + udph = filter_udphdr(ctx, port); + if (!udph) + return XDP_PASS; + + record_stats(ctx, STATS_DROP); + + return XDP_DROP; +} + +static int xdp_prog_common(struct xdp_md *ctx) +{ + __u32 key, *port; + __s32 *mode; + + key = XDP_MODE; + mode = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!mode) + return XDP_PASS; + + key = XDP_PORT; + port = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!port) + return XDP_PASS; + + switch (*mode) { + case XDP_MODE_PASS: + return xdp_mode_pass(ctx, (__u16)(*port)); + case XDP_MODE_DROP: + return xdp_mode_drop_handler(ctx, (__u16)(*port)); + } + + /* Default action is to simple pass */ + return XDP_PASS; +} + +SEC("xdp") +int xdp_prog(struct xdp_md *ctx) +{ + return xdp_prog_common(ctx); +} + +SEC("xdp.frags") +int xdp_prog_frags(struct xdp_md *ctx) +{ + return xdp_prog_common(ctx); +} + +char _license[] SEC("license") = "GPL";
On 19/07/2025 11:30, Mohsin Bashir wrote:
Test XDP_PASS/DROP in single buffer and multi buffer mode when XDP native support is available.
./drivers/net/xdp.py TAP version 13 1..4 ok 1 xdp.test_xdp_native_pass_sb ok 2 xdp.test_xdp_native_pass_mb ok 3 xdp.test_xdp_native_drop_sb ok 4 xdp.test_xdp_native_drop_mb # Totals: pass:4 fail:0 xfail:0 xpass:0 skip:0 error:0
Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Mohsin Bashir mohsin.bashr@gmail.com
tools/testing/selftests/drivers/net/Makefile | 1 + tools/testing/selftests/drivers/net/xdp.py | 303 ++++++++++++++++++ .../selftests/net/lib/xdp_native.bpf.c | 158 +++++++++ 3 files changed, 462 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/xdp.py create mode 100644 tools/testing/selftests/net/lib/xdp_native.bpf.c
...
+static struct udphdr *filter_udphdr(struct xdp_md *ctx, __u16 port) +{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
- struct udphdr *udph = NULL;
- struct ethhdr *eth = data;
- if (data + sizeof(*eth) > data_end)
return NULL;
This check assumes that the packet headers reside in the linear part of the xdp_buff. However, this assumption does not hold across all drivers. For example, in mlx5, the linear part is empty when using multi-buffer mode with striding rq configuration. This causes all multi-buffer test cases to fail over mlx5.
To ensure correctness across all drivers, all direct accesses to packet data should use these safer helper functions instead: bpf_xdp_load_bytes() and bpf_xdp_store_bytes().
Related discussion and context can be found here: https://github.com/xdp-project/xdp-tools/pull/409
- if (eth->h_proto == bpf_htons(ETH_P_IP)) {
struct iphdr *iph = data + sizeof(*eth);
if (iph + 1 > (struct iphdr *)data_end ||
iph->protocol != IPPROTO_UDP)
return NULL;
udph = (void *)eth + sizeof(*iph) + sizeof(*eth);
- } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
struct ipv6hdr *ipv6h = data + sizeof(*eth);
if (ipv6h + 1 > (struct ipv6hdr *)data_end ||
ipv6h->nexthdr != IPPROTO_UDP)
return NULL;
udph = (void *)eth + sizeof(*ipv6h) + sizeof(*eth);
- } else {
return NULL;
- }
- if (udph + 1 > (struct udphdr *)data_end)
return NULL;
- if (udph->dest != bpf_htons(port))
return NULL;
- record_stats(ctx, STATS_RX);
- return udph;
+}
On Mon, 21 Jul 2025 14:43:15 +0300 Nimrod Oren wrote:
+static struct udphdr *filter_udphdr(struct xdp_md *ctx, __u16 port) +{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
- struct udphdr *udph = NULL;
- struct ethhdr *eth = data;
- if (data + sizeof(*eth) > data_end)
return NULL;
This check assumes that the packet headers reside in the linear part of the xdp_buff. However, this assumption does not hold across all drivers. For example, in mlx5, the linear part is empty when using multi-buffer mode with striding rq configuration. This causes all multi-buffer test cases to fail over mlx5.
To ensure correctness across all drivers, all direct accesses to packet data should use these safer helper functions instead: bpf_xdp_load_bytes() and bpf_xdp_store_bytes().
Related discussion and context can be found here: https://github.com/xdp-project/xdp-tools/pull/409
That's a reasonable way to modify the test. But I'm not sure it's something that should be blocking merging the patches. Or for that matter whether it's Mohsin's responsibility to make the test cater to quirks of mlx5, which is not even part of NIPA testing - we have no way of knowing what passes for mlx5, what regresses it etc.
Not related to this series, but I'd be curious what the perf hit is for having to load the headers.
On 21/07/2025 18:40, Jakub Kicinski wrote:
On Mon, 21 Jul 2025 14:43:15 +0300 Nimrod Oren wrote:
+static struct udphdr *filter_udphdr(struct xdp_md *ctx, __u16 port) +{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
- struct udphdr *udph = NULL;
- struct ethhdr *eth = data;
- if (data + sizeof(*eth) > data_end)
return NULL;
This check assumes that the packet headers reside in the linear part of the xdp_buff. However, this assumption does not hold across all drivers. For example, in mlx5, the linear part is empty when using multi-buffer mode with striding rq configuration. This causes all multi-buffer test cases to fail over mlx5.
To ensure correctness across all drivers, all direct accesses to packet data should use these safer helper functions instead: bpf_xdp_load_bytes() and bpf_xdp_store_bytes().
Related discussion and context can be found here: https://github.com/xdp-project/xdp-tools/pull/409
That's a reasonable way to modify the test. But I'm not sure it's something that should be blocking merging the patches. Or for that matter whether it's Mohsin's responsibility to make the test cater to quirks of mlx5,
Definitely not a quirk, you cannot assume the headers are in the linear part, especially if you're going to put this program as reference in the kernel tree.
This issue has nothing to do with mlx5, but a buggy XDP program.
which is not even part of NIPA testing - we have no way of knowing what passes for mlx5, what regresses it etc.
People have been developing XDP code that runs on mlx5 long before NIPA even existed 🤷♂️.. And as you know, we run these selftests on mlx5 hardware, as evident by Nimrod's mail, and others you've seen on the list. You know what regresses.
IOn Mon, 21 Jul 2025 21:34:05 +0300 Gal Pressman wrote:
That's a reasonable way to modify the test. But I'm not sure it's something that should be blocking merging the patches. Or for that matter whether it's Mohsin's responsibility to make the test cater to quirks of mlx5,
Definitely not a quirk, you cannot assume the headers are in the linear part, especially if you're going to put this program as reference in the kernel tree.
This issue has nothing to do with mlx5, but a buggy XDP program.
We put the tests in the tree to foster collaboration. If you think the test should be improved please send patches. I don't think the kernel will allow pulling headers if they are not in the linear section. But that's your problem to solve.
which is not even part of NIPA testing - we have no way of knowing what passes for mlx5, what regresses it etc.
People have been developing XDP code that runs on mlx5 long before NIPA even existed 🤷♂️.. And as you know we run these selftests on mlx5 hardware, as evident by Nimrod's mail, and others you've seen on the list. You know what regresses.
No, please don't try to dispute facts. It's not integrated, if you go on a vacation upstream will have no idea what broke in mlx5. Either you are reporting the results upstream or our guarantees on regressions are best effort. BTW I don't understand how you can claim that a new test regresses something. It never passed on mlx5 == not a regression.
On 21/07/2025 23:33, Jakub Kicinski wrote:
IOn Mon, 21 Jul 2025 21:34:05 +0300 Gal Pressman wrote:
That's a reasonable way to modify the test. But I'm not sure it's something that should be blocking merging the patches. Or for that matter whether it's Mohsin's responsibility to make the test cater to quirks of mlx5,
Definitely not a quirk, you cannot assume the headers are in the linear part, especially if you're going to put this program as reference in the kernel tree.
This issue has nothing to do with mlx5, but a buggy XDP program.
We put the tests in the tree to foster collaboration. If you think the test should be improved please send patches. I don't think the kernel will allow pulling headers if they are not in the linear section. But that's your problem to solve.
Why send patches to fix something that shouldn't be merged broken in the first place?
Mohsin made an incorrect *assumption*, it is not based on anything that is guaranteed by XDP. Fixing this is trivial, and I don't see any technical reasons why not to.
which is not even part of NIPA testing - we have no way of knowing what passes for mlx5, what regresses it etc.
People have been developing XDP code that runs on mlx5 long before NIPA even existed 🤷♂️.. And as you know we run these selftests on mlx5 hardware, as evident by Nimrod's mail, and others you've seen on the list. You know what regresses.
No, please don't try to dispute facts. It's not integrated, if you go on a vacation upstream will have no idea what broke in mlx5. Either you are reporting the results upstream or our guarantees on regressions are best effort. BTW I don't understand how you can claim that a new test regresses something. It never passed on mlx5 == not a regression.
This thread is more about pushing NIPA agenda than a technical discussion.
We're not part of NIPA as can be clearly observed by our MAINTAINERS entry. Don't confuse our NICs with others that are not accessible to the public, many people (inside and outside of NVIDIA) are developing and testing mlx5 without NIPA for years.
For the record, we run these selftests internally whether I go on vacation or not. These tests are running as part of our regression and failures are either reported to the list, or fixed by the team. You know all that.
On Tue, 22 Jul 2025 10:21:22 +0300 Gal Pressman wrote:
This thread is more about pushing NIPA agenda than a technical discussion.
What is the "NIPA agenda"? Please explain, this is getting funny :D
On 7/21/25 8:34 PM, Gal Pressman wrote:
On 21/07/2025 18:40, Jakub Kicinski wrote:
On Mon, 21 Jul 2025 14:43:15 +0300 Nimrod Oren wrote:
+static struct udphdr *filter_udphdr(struct xdp_md *ctx, __u16 port) +{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
- struct udphdr *udph = NULL;
- struct ethhdr *eth = data;
- if (data + sizeof(*eth) > data_end)
return NULL;
This check assumes that the packet headers reside in the linear part of the xdp_buff. However, this assumption does not hold across all drivers. For example, in mlx5, the linear part is empty when using multi-buffer mode with striding rq configuration. This causes all multi-buffer test cases to fail over mlx5.
To ensure correctness across all drivers, all direct accesses to packet data should use these safer helper functions instead: bpf_xdp_load_bytes() and bpf_xdp_store_bytes().
Related discussion and context can be found here: https://github.com/xdp-project/xdp-tools/pull/409
That's a reasonable way to modify the test. But I'm not sure it's something that should be blocking merging the patches. Or for that matter whether it's Mohsin's responsibility to make the test cater to quirks of mlx5,
Definitely not a quirk, you cannot assume the headers are in the linear part, especially if you're going to put this program as reference in the kernel tree.
This issue has nothing to do with mlx5, but a buggy XDP program.
Note that with the self-tests we have a slightly different premise WRT the actual kernel code. We prefer on-boarding tests cases that work for some/most of the possible setup vs perfect ones, and eventually improve incrementally as needed: the goal is to increase the code coverage _and_ encourage people to contribute new tests upstream.
We try to avoid breaking existing tests (at least the ones actually reporting into the infrastructure), but for new ones the barriers are intentionally different than VS kernel code.
Cheers,
Paolo
On 7/21/25 4:43 AM, Nimrod Oren wrote:
To ensure correctness across all drivers, all direct accesses to packet data should use these safer helper functions instead: bpf_xdp_load_bytes() and bpf_xdp_store_bytes().
Not sure if the test will be run on the older kernel. fwiw, there is a bpf_dynptr_slice_rdwr() kfunc which was added in commit 66e3a13e7c2c. It uses bpf_xdp_pointer() underneath, so it can avoid a copy if the requested data is contained in a single frag.
On Mon, 21 Jul 2025 12:43:57 -0700 Martin KaFai Lau wrote:
On 7/21/25 4:43 AM, Nimrod Oren wrote:
To ensure correctness across all drivers, all direct accesses to packet data should use these safer helper functions instead: bpf_xdp_load_bytes() and bpf_xdp_store_bytes().
Not sure if the test will be run on the older kernel. fwiw, there is a bpf_dynptr_slice_rdwr() kfunc which was added in commit 66e3a13e7c2c. It uses bpf_xdp_pointer() underneath, so it can avoid a copy if the requested data is contained in a single frag.
We discussed running the tests on stable but nothing materialized, yet.
Add test to verify the XDP_TX functionality by generating traffic from a remote node on a specific UDP port and redirecting it back to the sender.
./drivers/net/xdp.py TAP version 13 1..5 ok 1 xdp.test_xdp_native_pass_sb ok 2 xdp.test_xdp_native_pass_mb ok 3 xdp.test_xdp_native_drop_sb ok 4 xdp.test_xdp_native_drop_mb ok 5 xdp.test_xdp_native_tx_mb # Totals: pass:5 fail:0 xfail:0 xpass:0 skip:0 error:0
Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Mohsin Bashir mohsin.bashr@gmail.com --- tools/testing/selftests/drivers/net/xdp.py | 34 ++++++++ .../selftests/net/lib/xdp_native.bpf.c | 80 +++++++++++++++++++ 2 files changed, 114 insertions(+)
diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py index 8ab1607edcd8..d42dbee78431 100755 --- a/tools/testing/selftests/drivers/net/xdp.py +++ b/tools/testing/selftests/drivers/net/xdp.py @@ -27,6 +27,7 @@ class XDPAction(Enum): """Enum for XDP actions.""" PASS = 0 # Pass the packet up to the stack DROP = 1 # Drop the packet + TX = 2 # Route the packet to the remote host
class XDPStats(Enum): @@ -34,6 +35,7 @@ class XDPStats(Enum): RX = 0 # Count of valid packets received for testing PASS = 1 # Count of packets passed up to the stack DROP = 2 # Count of packets dropped + TX = 3 # Count of incoming packets routed to the remote host
@dataclass @@ -180,6 +182,8 @@ def _get_stats(xdp_map_id): stats_formatted[XDPStats.PASS.value] = val elif stats_dump[key]["formatted"]["key"] == XDPStats.DROP.value: stats_formatted[XDPStats.DROP.value] = val + elif stats_dump[key]["formatted"]["key"] == XDPStats.TX.value: + stats_formatted[XDPStats.TX.value] = val
return stats_formatted
@@ -278,6 +282,35 @@ def test_xdp_native_drop_mb(cfg): _test_drop(cfg, bpf_info, 8000)
+def test_xdp_native_tx_mb(cfg): + """ + Tests the XDP_TX action for a multi-buff case. + + Args: + cfg: Configuration object containing network settings. + """ + cfg.require_cmd("socat", remote=True) + + bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000) + prog_info = _load_xdp_prog(cfg, bpf_info) + port = rand_port() + + _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.TX.value) + _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) + + test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(8000)) + rx_udp = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport STDOUT" + tx_udp = f"echo {test_string} | socat -t 2 -u STDIN UDP:{cfg.baddr}:{port}" + + with bkg(rx_udp, host=cfg.remote, exit_wait=True) as rnc: + cmd(tx_udp, host=cfg.remote, shell=True) + + stats = _get_stats(prog_info['maps']['map_xdp_stats']) + + ksft_eq(rnc.stdout.strip(), test_string, "UDP packet exchange failed") + ksft_eq(stats[XDPStats.TX.value], 1, "TX stats mismatch") + + def main(): """ Main function to execute the XDP tests. @@ -294,6 +327,7 @@ def main(): test_xdp_native_pass_mb, test_xdp_native_drop_sb, test_xdp_native_drop_mb, + test_xdp_native_tx_mb, ], args=(cfg,)) ksft_exit() diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c index 90b34b2a4fef..d3c66c891589 100644 --- a/tools/testing/selftests/net/lib/xdp_native.bpf.c +++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c @@ -18,12 +18,14 @@ enum { enum { XDP_MODE_PASS = 0, XDP_MODE_DROP = 1, + XDP_MODE_TX = 2, } xdp_map_modes;
enum { STATS_RX = 0, STATS_PASS = 1, STATS_DROP = 2, + STATS_TX = 3, } xdp_stats;
struct { @@ -117,6 +119,82 @@ static int xdp_mode_drop_handler(struct xdp_md *ctx, __u16 port) return XDP_DROP; }
+static void swap_machdr(void *data) +{ + struct ethhdr *eth = data; + __u8 tmp_mac[ETH_ALEN]; + + __builtin_memcpy(tmp_mac, eth->h_source, ETH_ALEN); + __builtin_memcpy(eth->h_source, eth->h_dest, ETH_ALEN); + __builtin_memcpy(eth->h_dest, tmp_mac, ETH_ALEN); +} + +static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct udphdr *udph = NULL; + struct ethhdr *eth = data; + + if (data + sizeof(*eth) > data_end) + return XDP_PASS; + + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph = data + sizeof(*eth); + __be32 tmp_ip = iph->saddr; + + if (iph + 1 > (struct iphdr *)data_end || + iph->protocol != IPPROTO_UDP) + return XDP_PASS; + + udph = data + sizeof(*iph) + sizeof(*eth); + + if (udph + 1 > (struct udphdr *)data_end) + return XDP_PASS; + if (udph->dest != bpf_htons(port)) + return XDP_PASS; + + record_stats(ctx, STATS_RX); + swap_machdr((void *)eth); + + iph->saddr = iph->daddr; + iph->daddr = tmp_ip; + + record_stats(ctx, STATS_TX); + + return XDP_TX; + + } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + struct ipv6hdr *ipv6h = data + sizeof(*eth); + struct in6_addr tmp_ipv6; + + if (ipv6h + 1 > (struct ipv6hdr *)data_end || + ipv6h->nexthdr != IPPROTO_UDP) + return XDP_PASS; + + udph = data + sizeof(*ipv6h) + sizeof(*eth); + + if (udph + 1 > (struct udphdr *)data_end) + return XDP_PASS; + if (udph->dest != bpf_htons(port)) + return XDP_PASS; + + record_stats(ctx, STATS_RX); + swap_machdr((void *)eth); + + __builtin_memcpy(&tmp_ipv6, &ipv6h->saddr, sizeof(tmp_ipv6)); + __builtin_memcpy(&ipv6h->saddr, &ipv6h->daddr, + sizeof(tmp_ipv6)); + __builtin_memcpy(&ipv6h->daddr, &tmp_ipv6, sizeof(tmp_ipv6)); + + record_stats(ctx, STATS_TX); + + return XDP_TX; + } + + return XDP_PASS; +} + static int xdp_prog_common(struct xdp_md *ctx) { __u32 key, *port; @@ -137,6 +215,8 @@ static int xdp_prog_common(struct xdp_md *ctx) return xdp_mode_pass(ctx, (__u16)(*port)); case XDP_MODE_DROP: return xdp_mode_drop_handler(ctx, (__u16)(*port)); + case XDP_MODE_TX: + return xdp_mode_tx_handler(ctx, (__u16)(*port)); }
/* Default action is to simple pass */
Add test to validate support for the two cases of tail adjustment: 1) tail extension, and 2) tail shrinking across different frame sizes and offset values. For each of the two cases, test both the single and multi-buffer cases by choosing appropriate packet size.
The negative offset value result in growing of tailroom (shrinking of payload) while the positive offset result in shrinking of tailroom (growing of payload).
Since the support for tail adjustment varies across drivers, classify the test as pass if at least one combination of packet size and offset from a pre-selected list results in a successful run. In case of an unsuccessful run, report the failure and highlight the packet size and offset values that caused the test to fail, as well as the values that resulted in the last successful run.
Note: The growing part of this test for netdevsim may appear flaky when the offset value is larger than 1. This behavior occurs because tailroom is not explicitly reserved for netdevsim, with 1 being the typical tailroom value. However, in certain cases, such as payload being the last in the page with additional available space, the truesize is expanded. This also result increases the tailroom causing the test to pass intermittently. In contrast, when tailrrom is explicitly reserved, such as in the of fbnic, the test results are deterministic.
./drivers/net/xdp.py TAP version 13 1..7 ok 1 xdp.test_xdp_native_pass_sb ok 2 xdp.test_xdp_native_pass_mb ok 3 xdp.test_xdp_native_drop_sb ok 4 xdp.test_xdp_native_drop_mb ok 5 xdp.test_xdp_native_tx_mb # Failed run: ... successful run: ... offset 1. Reason: Adjustment failed ok 6 xdp.test_xdp_native_adjst_tail_grow_data ok 7 xdp.test_xdp_native_adjst_tail_shrnk_data # Totals: pass:7 fail:0 xfail:0 xpass:0 skip:0 error:0
Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Mohsin Bashir mohsin.bashr@gmail.com --- tools/testing/selftests/drivers/net/xdp.py | 178 ++++++++++++++- .../selftests/net/lib/xdp_native.bpf.c | 206 +++++++++++++++++- 2 files changed, 380 insertions(+), 4 deletions(-)
diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py index d42dbee78431..85b5db0a1121 100755 --- a/tools/testing/selftests/drivers/net/xdp.py +++ b/tools/testing/selftests/drivers/net/xdp.py @@ -11,7 +11,7 @@ import string from dataclasses import dataclass from enum import Enum
-from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ne +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ne, ksft_pr from lib.py import KsftFailEx, NetDrvEpEnv from lib.py import bkg, cmd, rand_port from lib.py import ip, bpftool, defer @@ -21,6 +21,8 @@ class TestConfig(Enum): """Enum for XDP configuration options.""" MODE = 0 # Configures the BPF program for a specific test PORT = 1 # Port configuration to communicate with the remote host + ADJST_OFFSET = 2 # Tail/Head adjustment offset for extension/shrinking + ADJST_TAG = 3 # Adjustment tag to annotate the start and end of extension
class XDPAction(Enum): @@ -28,6 +30,7 @@ class XDPAction(Enum): PASS = 0 # Pass the packet up to the stack DROP = 1 # Drop the packet TX = 2 # Route the packet to the remote host + TAIL_ADJST = 3 # Adjust the tail of the packet
class XDPStats(Enum): @@ -36,6 +39,7 @@ class XDPStats(Enum): PASS = 1 # Count of packets passed up to the stack DROP = 2 # Count of packets dropped TX = 3 # Count of incoming packets routed to the remote host + ABORT = 4 # Count of packets that were aborted
@dataclass @@ -174,7 +178,7 @@ def _get_stats(xdp_map_id): raise KsftFailEx(f"Failed to get stats for map {xdp_map_id}")
stats_formatted = {} - for key in range(0, 4): + for key in range(0, 5): val = stats_dump[key]["formatted"]["value"] if stats_dump[key]["formatted"]["key"] == XDPStats.RX.value: stats_formatted[XDPStats.RX.value] = val @@ -184,6 +188,8 @@ def _get_stats(xdp_map_id): stats_formatted[XDPStats.DROP.value] = val elif stats_dump[key]["formatted"]["key"] == XDPStats.TX.value: stats_formatted[XDPStats.TX.value] = val + elif stats_dump[key]["formatted"]["key"] == XDPStats.ABORT.value: + stats_formatted[XDPStats.ABORT.value] = val
return stats_formatted
@@ -311,6 +317,172 @@ def test_xdp_native_tx_mb(cfg): ksft_eq(stats[XDPStats.TX.value], 1, "TX stats mismatch")
+def _validate_res(res, offset_lst, pkt_sz_lst): + """ + Validates the result of a test. + + Args: + res: The result of the test, which should be a dictionary with a "status" key. + + Raises: + KsftFailEx: If the test fails to pass any combination of offset and packet size. + """ + if "status" not in res: + raise KsftFailEx("Missing 'status' key in result dictionary") + + # Validate that not a single case was successful + if res["status"] == "fail": + if res["offset"] == offset_lst[0] and res["pkt_sz"] == pkt_sz_lst[0]: + raise KsftFailEx(f"{res['reason']}") + + # Get the previous offset and packet size to report the successful run + tmp_idx = offset_lst.index(res["offset"]) + prev_offset = offset_lst[tmp_idx - 1] + if tmp_idx == 0: + tmp_idx = pkt_sz_lst.index(res["pkt_sz"]) + prev_pkt_sz = pkt_sz_lst[tmp_idx - 1] + else: + prev_pkt_sz = res["pkt_sz"] + + # Use these values for error reporting + ksft_pr( + f"Failed run: pkt_sz {res['pkt_sz']}, offset {res['offset']}. " + f"Last successful run: pkt_sz {prev_pkt_sz}, offset {prev_offset}. " + f"Reason: {res['reason']}" + ) + + +def _check_for_failures(recvd_str, stats): + """ + Checks for common failures while adjusting headroom or tailroom. + + Args: + recvd_str: The string received from the remote host after sending a test string. + stats: A dictionary containing formatted packet statistics for various XDP actions. + + Returns: + str: A string describing the failure reason if a failure is detected, otherwise None. + """ + + # Any adjustment failure result in an abort hence, we track this counter + if stats[XDPStats.ABORT.value] != 0: + return "Adjustment failed" + + # Since we are using aggregate stats for a single test across all offsets and packet sizes + # we can't use RX stats only to track data exchange failure without taking a previous + # snapshot. An easier way is to simply check for non-zero length of received string. + if len(recvd_str) == 0: + return "Data exchange failed" + + # Check for RX and PASS stats mismatch. Ideally, they should be equal for a successful run + if stats[XDPStats.RX.value] != stats[XDPStats.PASS.value]: + return "RX stats mismatch" + + return None + + +def _test_xdp_native_tail_adjst(cfg, pkt_sz_lst, offset_lst): + """ + Tests the XDP tail adjustment functionality. + + This function loads the appropriate XDP program based on the provided + program name and configures the XDP map for tail adjustment. It then + validates the tail adjustment by sending and receiving UDP packets + with specified packet sizes and offsets. + + Args: + cfg: Configuration object containing network settings. + prog: Name of the XDP program to load. + pkt_sz_lst: List of packet sizes to test. + offset_lst: List of offsets to validate support for tail adjustment. + + Returns: + dict: A dictionary with test status and failure details if applicable. + """ + port = rand_port() + bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000) + + prog_info = _load_xdp_prog(cfg, bpf_info) + + # Configure the XDP map for tail adjustment + _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.TAIL_ADJST.value) + _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) + + for offset in offset_lst: + tag = format(random.randint(65, 90), "02x") + + _set_xdp_map("map_xdp_setup", TestConfig.ADJST_OFFSET.value, offset) + if offset > 0: + _set_xdp_map("map_xdp_setup", TestConfig.ADJST_TAG.value, int(tag, 16)) + + for pkt_sz in pkt_sz_lst: + test_str = "".join(random.choice(string.ascii_lowercase) for _ in range(pkt_sz)) + recvd_str = _exchg_udp(cfg, port, test_str) + stats = _get_stats(prog_info["maps"]["map_xdp_stats"]) + + failure = _check_for_failures(recvd_str, stats) + if failure is not None: + return { + "status": "fail", + "reason": failure, + "offset": offset, + "pkt_sz": pkt_sz, + } + + # Validate data content based on offset direction + expected_data = None + if offset > 0: + expected_data = test_str + (offset * chr(int(tag, 16))) + else: + expected_data = test_str[0:pkt_sz + offset] + + if recvd_str != expected_data: + return { + "status": "fail", + "reason": "Data mismatch", + "offset": offset, + "pkt_sz": pkt_sz, + } + + return {"status": "pass"} + + +def test_xdp_native_adjst_tail_grow_data(cfg): + """ + Tests the XDP tail adjustment by growing packet data. + + Args: + cfg: Configuration object containing network settings. + """ + pkt_sz_lst = [512, 1024, 2048] + offset_lst = [1, 16, 32, 64, 128, 256] + res = _test_xdp_native_tail_adjst( + cfg, + pkt_sz_lst, + offset_lst, + ) + + _validate_res(res, offset_lst, pkt_sz_lst) + + +def test_xdp_native_adjst_tail_shrnk_data(cfg): + """ + Tests the XDP tail adjustment by shrinking packet data. + + Args: + cfg: Configuration object containing network settings. + """ + pkt_sz_lst = [512, 1024, 2048] + offset_lst = [-16, -32, -64, -128, -256] + res = _test_xdp_native_tail_adjst( + cfg, + pkt_sz_lst, + offset_lst, + ) + + _validate_res(res, offset_lst, pkt_sz_lst) + + def main(): """ Main function to execute the XDP tests. @@ -328,6 +500,8 @@ def main(): test_xdp_native_drop_sb, test_xdp_native_drop_mb, test_xdp_native_tx_mb, + test_xdp_native_adjst_tail_grow_data, + test_xdp_native_adjst_tail_shrnk_data, ], args=(cfg,)) ksft_exit() diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c index d3c66c891589..7bb2dff86d8f 100644 --- a/tools/testing/selftests/net/lib/xdp_native.bpf.c +++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c @@ -10,15 +10,22 @@ #include <bpf/bpf_endian.h> #include <bpf/bpf_helpers.h>
+#define MAX_ADJST_OFFSET 256 +#define MAX_PAYLOAD_LEN 5000 +#define MAX_HDR_LEN 64 + enum { XDP_MODE = 0, XDP_PORT = 1, + XDP_ADJST_OFFSET = 2, + XDP_ADJST_TAG = 3, } xdp_map_setup_keys;
enum { XDP_MODE_PASS = 0, XDP_MODE_DROP = 1, XDP_MODE_TX = 2, + XDP_MODE_TAIL_ADJST = 3, } xdp_map_modes;
enum { @@ -26,22 +33,28 @@ enum { STATS_PASS = 1, STATS_DROP = 2, STATS_TX = 3, + STATS_ABORT = 4, } xdp_stats;
struct { __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, 2); + __uint(max_entries, 5); __type(key, __u32); __type(value, __s32); } map_xdp_setup SEC(".maps");
struct { __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, 4); + __uint(max_entries, 5); __type(key, __u32); __type(value, __u64); } map_xdp_stats SEC(".maps");
+static __u32 min(__u32 a, __u32 b) +{ + return a < b ? a : b; +} + static void record_stats(struct xdp_md *ctx, __u32 stat_type) { __u64 *count; @@ -195,6 +208,193 @@ static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port) return XDP_PASS; }
+static void *update_pkt(struct xdp_md *ctx, __s16 offset, __u32 *udp_csum) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct udphdr *udph = NULL; + struct ethhdr *eth = data; + __u32 len, len_new; + + if (data + sizeof(*eth) > data_end) + return NULL; + + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph = data + sizeof(*eth); + __u16 total_len; + + if (iph + 1 > (struct iphdr *)data_end) + return NULL; + + iph->tot_len = bpf_htons(bpf_ntohs(iph->tot_len) + offset); + + udph = (void *)eth + sizeof(*iph) + sizeof(*eth); + if (!udph || udph + 1 > (struct udphdr *)data_end) + return NULL; + + len_new = bpf_htons(bpf_ntohs(udph->len) + offset); + } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + struct ipv6hdr *ipv6h = data + sizeof(*eth); + __u16 payload_len; + + if (ipv6h + 1 > (struct ipv6hdr *)data_end) + return NULL; + + udph = (void *)eth + sizeof(*ipv6h) + sizeof(*eth); + if (!udph || udph + 1 > (struct udphdr *)data_end) + return NULL; + + *udp_csum = ~((__u32)udph->check); + + len = ipv6h->payload_len; + len_new = bpf_htons(bpf_ntohs(len) + offset); + ipv6h->payload_len = len_new; + + *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new, + sizeof(len_new), *udp_csum); + + len = udph->len; + len_new = bpf_htons(bpf_ntohs(udph->len) + offset); + *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new, + sizeof(len_new), *udp_csum); + } else { + return NULL; + } + + udph->len = len_new; + + return udph; +} + +static __u16 csum_fold_helper(__u32 csum) +{ + return ~((csum & 0xffff) + (csum >> 16)) ? : 0xffff; +} + +static int xdp_adjst_tail_shrnk_data(struct xdp_md *ctx, __u16 offset, + __u32 hdr_len) +{ + char tmp_buff[MAX_ADJST_OFFSET]; + __u32 buff_pos, udp_csum = 0; + struct udphdr *udph = NULL; + __u32 buff_len; + + udph = update_pkt(ctx, 0 - offset, &udp_csum); + if (!udph) + return -1; + + buff_len = bpf_xdp_get_buff_len(ctx); + + offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET : + offset & 0xff; + if (offset == 0) + return -1; + + /* Make sure we have enough data to avoid eating the header */ + if (buff_len - offset < hdr_len) + return -1; + + buff_pos = buff_len - offset; + if (bpf_xdp_load_bytes(ctx, buff_pos, tmp_buff, offset) < 0) + return -1; + + udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum); + udph->check = (__u16)csum_fold_helper(udp_csum); + + if (bpf_xdp_adjust_tail(ctx, 0 - offset) < 0) + return -1; + + return 0; +} + +static int xdp_adjst_tail_grow_data(struct xdp_md *ctx, __u16 offset) +{ + char tmp_buff[MAX_ADJST_OFFSET]; + __u32 buff_pos, udp_csum = 0; + __u32 buff_len, hdr_len, key; + struct udphdr *udph; + __s32 *val; + __u8 tag; + + /* Proceed to update the packet headers before attempting to adjuste + * the tail. Once the tail is adjusted we lose access to the offset + * amount of data at the end of the packet which is crucial to update + * the checksum. + * Since any failure beyond this would abort the packet, we should + * not worry about passing a packet up the stack with wrong headers + */ + udph = update_pkt(ctx, offset, &udp_csum); + if (!udph) + return -1; + + key = XDP_ADJST_TAG; + val = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!val) + return -1; + + tag = (__u8)(*val); + + for (int i = 0; i < MAX_ADJST_OFFSET; i++) + __builtin_memcpy(&tmp_buff[i], &tag, 1); + + offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET : + offset & 0xff; + if (offset == 0) + return -1; + + udp_csum = bpf_csum_diff(0, 0, (__be32 *)tmp_buff, offset, udp_csum); + udph->check = (__u16)csum_fold_helper(udp_csum); + + buff_len = bpf_xdp_get_buff_len(ctx); + + if (bpf_xdp_adjust_tail(ctx, offset) < 0) { + bpf_printk("Failed to adjust tail\n"); + return -1; + } + + if (bpf_xdp_store_bytes(ctx, buff_len, tmp_buff, offset) < 0) + return -1; + + return 0; +} + +static int xdp_adjst_tail(struct xdp_md *ctx, __u16 port) +{ + void *data = (void *)(long)ctx->data; + struct udphdr *udph = NULL; + __s32 *adjust_offset, *val; + __u32 key, hdr_len; + void *offset_ptr; + __u8 tag; + int ret; + + udph = filter_udphdr(ctx, port); + if (!udph) + return XDP_PASS; + + hdr_len = (void *)udph - data + sizeof(struct udphdr); + key = XDP_ADJST_OFFSET; + adjust_offset = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!adjust_offset) + return XDP_PASS; + + if (*adjust_offset < 0) + ret = xdp_adjst_tail_shrnk_data(ctx, + (__u16)(0 - *adjust_offset), + hdr_len); + else + ret = xdp_adjst_tail_grow_data(ctx, (__u16)(*adjust_offset)); + if (ret) + goto abort_pkt; + + record_stats(ctx, STATS_PASS); + return XDP_PASS; + +abort_pkt: + record_stats(ctx, STATS_ABORT); + return XDP_ABORTED; +} + static int xdp_prog_common(struct xdp_md *ctx) { __u32 key, *port; @@ -217,6 +417,8 @@ static int xdp_prog_common(struct xdp_md *ctx) return xdp_mode_drop_handler(ctx, (__u16)(*port)); case XDP_MODE_TX: return xdp_mode_tx_handler(ctx, (__u16)(*port)); + case XDP_MODE_TAIL_ADJST: + return xdp_adjst_tail(ctx, (__u16)(*port)); }
/* Default action is to simple pass */
Add test to validate the headroom adjustment support for both extension and the shrinking cases. For the extension part, eat up space from the start of payload data whereas, for the shrinking part, populate the newly available space with a tag. In the user-space, validate that a test string is manipulated accordingly. The negative and positive offset values result in shrinking and growing of headroom (growing and shrinking of payload) respectively.
TAP version 13 1..9 ok 1 xdp.test_xdp_native_pass_sb ok 2 xdp.test_xdp_native_pass_mb ok 3 xdp.test_xdp_native_drop_sb ok 4 xdp.test_xdp_native_drop_mb ok 5 xdp.test_xdp_native_tx_mb # Failed run: pkt_sz 512, ... offset 1. Reason: Adjustment failed ok 6 xdp.test_xdp_native_adjst_tail_grow_data ok 7 xdp.test_xdp_native_adjst_tail_shrnk_data # Failed run: pkt_sz 512, ... offset -128. Reason: Adjustment failed ok 8 xdp.test_xdp_native_adjst_head_grow_data # Failed run: pkt_sz (512) > HDS threshold (0) and offset 64 > 48 ok 9 xdp.test_xdp_native_adjst_head_shrnk_data # Totals: pass:9 fail:0 xfail:0 xpass:0 skip:0 error:0
Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Mohsin Bashir mohsin.bashr@gmail.com --- tools/testing/selftests/drivers/net/xdp.py | 147 +++++++++++++- .../selftests/net/lib/xdp_native.bpf.c | 181 ++++++++++++++++++ 2 files changed, 327 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py index 85b5db0a1121..887d662ad128 100755 --- a/tools/testing/selftests/drivers/net/xdp.py +++ b/tools/testing/selftests/drivers/net/xdp.py @@ -12,7 +12,7 @@ from dataclasses import dataclass from enum import Enum
from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ne, ksft_pr -from lib.py import KsftFailEx, NetDrvEpEnv +from lib.py import KsftFailEx, NetDrvEpEnv, EthtoolFamily, NlError from lib.py import bkg, cmd, rand_port from lib.py import ip, bpftool, defer
@@ -31,6 +31,7 @@ class XDPAction(Enum): DROP = 1 # Drop the packet TX = 2 # Route the packet to the remote host TAIL_ADJST = 3 # Adjust the tail of the packet + HEAD_ADJST = 4 # Adjust the head of the packet
class XDPStats(Enum): @@ -483,6 +484,147 @@ def test_xdp_native_adjst_tail_shrnk_data(cfg): _validate_res(res, offset_lst, pkt_sz_lst)
+def get_hds_thresh(cfg): + """ + Retrieves the header data split (HDS) threshold for a network interface. + + Args: + cfg: Configuration object containing network settings. + + Returns: + The HDS threshold value. If the threshold is not supported or an error occurs, + a default value of 1500 is returned. + """ + netnl = cfg.netnl + hds_thresh = 1500 + + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + if 'hds-thresh' not in rings: + ksft_pr(f'hds-thresh not supported. Using default: {hds_thresh}') + return hds_thresh + hds_thresh = rings['hds-thresh'] + except NlError as e: + ksft_pr(f"Failed to get rings: {e}. Using default: {hds_thresh}") + + return hds_thresh + + +def _test_xdp_native_head_adjst(cfg, prog, pkt_sz_lst, offset_lst): + """ + Tests the XDP head adjustment action for a multi-buffer case. + + Args: + cfg: Configuration object containing network settings. + netnl: Network namespace or link object (not used in this function). + + This function sets up the packet size and offset lists, then performs + the head adjustment test by sending and receiving UDP packets. + """ + cfg.require_cmd("socat", remote=True) + + prog_info = _load_xdp_prog(cfg, BPFProgInfo(prog, "xdp_native.bpf.o", "xdp.frags", 9000)) + port = rand_port() + + _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.HEAD_ADJST.value) + _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) + + hds_thresh = get_hds_thresh(cfg) + for offset in offset_lst: + for pkt_sz in pkt_sz_lst: + # The "head" buffer must contain at least the Ethernet header + # after we eat into it. We send large-enough packets, but if HDS + # is enabled head will only contain headers. Don't try to eat + # more than 28 bytes (UDPv4 + eth hdr left: (14 + 20 + 8) - 14) + l2_cut_off = 28 if cfg.addr_ipver == 4 else 48 + if pkt_sz > hds_thresh and offset > l2_cut_off: + ksft_pr( + f"Failed run: pkt_sz ({pkt_sz}) > HDS threshold ({hds_thresh}) and " + f"offset {offset} > {l2_cut_off}" + ) + return {"status": "pass"} + + test_str = ''.join(random.choice(string.ascii_lowercase) for _ in range(pkt_sz)) + tag = format(random.randint(65, 90), '02x') + + _set_xdp_map("map_xdp_setup", + TestConfig.ADJST_OFFSET.value, + offset) + _set_xdp_map("map_xdp_setup", TestConfig.ADJST_TAG.value, int(tag, 16)) + _set_xdp_map("map_xdp_setup", TestConfig.ADJST_OFFSET.value, offset) + + recvd_str = _exchg_udp(cfg, port, test_str) + + # Check for failures around adjustment and data exchange + failure = _check_for_failures(recvd_str, _get_stats(prog_info['maps']['map_xdp_stats'])) + if failure is not None: + return { + "status": "fail", + "reason": failure, + "offset": offset, + "pkt_sz": pkt_sz + } + + # Validate data content based on offset direction + expected_data = None + if offset < 0: + expected_data = chr(int(tag, 16)) * (0 - offset) + test_str + else: + expected_data = test_str[offset:] + + if recvd_str != expected_data: + return { + "status": "fail", + "reason": "Data mismatch", + "offset": offset, + "pkt_sz": pkt_sz + } + + return {"status": "pass"} + + +def test_xdp_native_adjst_head_grow_data(cfg): + """ + Tests the XDP headroom growth support. + + Args: + cfg: Configuration object containing network settings. + + This function sets up the packet size and offset lists, then calls the + _test_xdp_native_head_adjst_mb function to perform the actual test. The + test is passed if the headroom is successfully extended for given packet + sizes and offsets. + """ + pkt_sz_lst = [512, 1024, 2048] + + # Negative values result in headroom shrinking, resulting in growing of payload + offset_lst = [-16, -32, -64, -128, -256] + res = _test_xdp_native_head_adjst(cfg, "xdp_prog_frags", pkt_sz_lst, offset_lst) + + _validate_res(res, offset_lst, pkt_sz_lst) + + +def test_xdp_native_adjst_head_shrnk_data(cfg): + """ + Tests the XDP headroom shrinking support. + + Args: + cfg: Configuration object containing network settings. + + This function sets up the packet size and offset lists, then calls the + _test_xdp_native_head_adjst_mb function to perform the actual test. The + test is passed if the headroom is successfully shrunk for given packet + sizes and offsets. + """ + pkt_sz_lst = [512, 1024, 2048] + + # Positive values result in headroom growing, resulting in shrinking of payload + offset_lst = [16, 32, 64, 128, 256] + res = _test_xdp_native_head_adjst(cfg, "xdp_prog_frags", pkt_sz_lst, offset_lst) + + _validate_res(res, offset_lst, pkt_sz_lst) + + def main(): """ Main function to execute the XDP tests. @@ -493,6 +635,7 @@ def main(): function to execute the tests. """ with NetDrvEpEnv(__file__) as cfg: + cfg.netnl = EthtoolFamily() ksft_run( [ test_xdp_native_pass_sb, @@ -502,6 +645,8 @@ def main(): test_xdp_native_tx_mb, test_xdp_native_adjst_tail_grow_data, test_xdp_native_adjst_tail_shrnk_data, + test_xdp_native_adjst_head_grow_data, + test_xdp_native_adjst_head_shrnk_data, ], args=(cfg,)) ksft_exit() diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c index 7bb2dff86d8f..521ba38f2ddd 100644 --- a/tools/testing/selftests/net/lib/xdp_native.bpf.c +++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c @@ -26,6 +26,7 @@ enum { XDP_MODE_DROP = 1, XDP_MODE_TX = 2, XDP_MODE_TAIL_ADJST = 3, + XDP_MODE_HEAD_ADJST = 4, } xdp_map_modes;
enum { @@ -395,6 +396,184 @@ static int xdp_adjst_tail(struct xdp_md *ctx, __u16 port) return XDP_ABORTED; }
+static int xdp_adjst_head_shrnk_data(struct xdp_md *ctx, __u64 hdr_len, + __u32 offset) +{ + char tmp_buff[MAX_ADJST_OFFSET]; + struct udphdr *udph; + void *offset_ptr; + __u32 udp_csum = 0; + + /* Update the length information in the IP and UDP headers before + * adjusting the headroom. This simplifies accessing the relevant + * fields in the IP and UDP headers for fragmented packets. Any + * failure beyond this point will result in the packet being aborted, + * so we don't need to worry about incorrect length information for + * passed packets. + */ + udph = update_pkt(ctx, (__s16)(0 - offset), &udp_csum); + if (!udph) + return -1; + + offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET : + offset & 0xff; + if (offset == 0) + return -1; + + if (bpf_xdp_load_bytes(ctx, hdr_len, tmp_buff, offset) < 0) + return -1; + + udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum); + + udph->check = (__u16)csum_fold_helper(udp_csum); + + if (bpf_xdp_load_bytes(ctx, 0, tmp_buff, MAX_ADJST_OFFSET) < 0) + return -1; + + if (bpf_xdp_adjust_head(ctx, offset) < 0) + return -1; + + if (offset > MAX_ADJST_OFFSET) + return -1; + + if (hdr_len > MAX_ADJST_OFFSET || hdr_len == 0) + return -1; + + /* Added here to handle clang complain about negative value */ + hdr_len = hdr_len & 0xff; + + if (hdr_len == 0) + return -1; + + if (bpf_xdp_store_bytes(ctx, 0, tmp_buff, hdr_len) < 0) + return -1; + + return 0; +} + +static int xdp_adjst_head_grow_data(struct xdp_md *ctx, __u64 hdr_len, + __u32 offset) +{ + char hdr_buff[MAX_HDR_LEN]; + char data_buff[MAX_ADJST_OFFSET]; + void *offset_ptr; + __s32 *val; + __u32 key; + __u8 tag; + __u32 udp_csum = 0; + struct udphdr *udph; + + udph = update_pkt(ctx, (__s16)(offset), &udp_csum); + if (!udph) + return -1; + + key = XDP_ADJST_TAG; + val = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!val) + return -1; + + tag = (__u8)(*val); + for (int i = 0; i < MAX_ADJST_OFFSET; i++) + __builtin_memcpy(&data_buff[i], &tag, 1); + + offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET : + offset & 0xff; + if (offset == 0) + return -1; + + udp_csum = bpf_csum_diff(0, 0, (__be32 *)data_buff, offset, udp_csum); + udph->check = (__u16)csum_fold_helper(udp_csum); + + if (hdr_len > MAX_ADJST_OFFSET || hdr_len == 0) + return -1; + + /* Added here to handle clang complain about negative value */ + hdr_len = hdr_len & 0xff; + + if (hdr_len == 0) + return -1; + + if (bpf_xdp_load_bytes(ctx, 0, hdr_buff, hdr_len) < 0) + return -1; + + if (offset > MAX_ADJST_OFFSET) + return -1; + + if (bpf_xdp_adjust_head(ctx, 0 - offset) < 0) + return -1; + + if (bpf_xdp_store_bytes(ctx, 0, hdr_buff, hdr_len) < 0) + return -1; + + if (bpf_xdp_store_bytes(ctx, hdr_len, data_buff, offset) < 0) + return -1; + + return 0; +} + +static int xdp_head_adjst(struct xdp_md *ctx, __u16 port) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct udphdr *udph_ptr = NULL; + __u32 key, size, hdr_len; + __s32 *val; + int res; + + /* Filter packets based on UDP port */ + udph_ptr = filter_udphdr(ctx, port); + if (!udph_ptr) + return XDP_PASS; + + hdr_len = (void *)udph_ptr - data + sizeof(struct udphdr); + + key = XDP_ADJST_OFFSET; + val = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!val) + return XDP_PASS; + + switch (*val) { + case -16: + case 16: + size = 16; + break; + case -32: + case 32: + size = 32; + break; + case -64: + case 64: + size = 64; + break; + case -128: + case 128: + size = 128; + break; + case -256: + case 256: + size = 256; + break; + default: + bpf_printk("Invalid adjustment offset: %d\n", *val); + goto abort; + } + + if (*val < 0) + res = xdp_adjst_head_grow_data(ctx, hdr_len, size); + else + res = xdp_adjst_head_shrnk_data(ctx, hdr_len, size); + + if (res) + goto abort; + + record_stats(ctx, STATS_PASS); + return XDP_PASS; + +abort: + record_stats(ctx, STATS_ABORT); + return XDP_ABORTED; +} + static int xdp_prog_common(struct xdp_md *ctx) { __u32 key, *port; @@ -419,6 +598,8 @@ static int xdp_prog_common(struct xdp_md *ctx) return xdp_mode_tx_handler(ctx, (__u16)(*port)); case XDP_MODE_TAIL_ADJST: return xdp_adjst_tail(ctx, (__u16)(*port)); + case XDP_MODE_HEAD_ADJST: + return xdp_head_adjst(ctx, (__u16)(*port)); }
/* Default action is to simple pass */
linux-kselftest-mirror@lists.linaro.org