- Linux-kselftest-mirror - lists.linaro.org

[PATCH net 2/2] selftests: drv-net: test installing XDP with HDS set to auto

by Jakub Kicinski

Testing bnxt: # NETIF=eth0 ./ksft-net-drv/drivers/net/hds.py KTAP version 1 1..9 ok 1 hds.get_hds ok 2 hds.get_hds_thresh ok 3 hds.set_hds_disable # SKIP disabling of HDS not supported by the device ok 4 hds.set_hds_enable ok 5 hds.set_hds_thresh_zero ok 6 hds.set_hds_thresh_max ok 7 hds.set_hds_thresh_gt ok 8 hds.set_xdp ok 9 hds.set_xdp_enabled # Totals: pass:8 fail:0 xfail:0 xpass:0 skip:1 error:0 Signed-off-by: Jakub Kicinski <kuba(a)kernel.org> --- CC: shuah(a)kernel.org CC: hawk(a)kernel.org CC: petrm(a)nvidia.com CC: willemb(a)google.com CC: jstancek(a)redhat.com CC: linux-kselftest(a)vger.kernel.org --- tools/testing/selftests/net/lib/Makefile | 3 + .../testing/selftests/net/lib/xdp_dummy.bpf.c | 13 +++++ tools/testing/selftests/drivers/net/hds.py | 55 ++++++++++++++++++- 3 files changed, 68 insertions(+), 3 deletions(-) create mode 100644 tools/testing/selftests/net/lib/xdp_dummy.bpf.c diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile index bc6b6762baf3..c22623b9a2a5 100644 --- a/tools/testing/selftests/net/lib/Makefile +++ b/tools/testing/selftests/net/lib/Makefile @@ -9,7 +9,10 @@ TEST_FILES := ../../../../../Documentation/netlink/specs TEST_FILES += ../../../../net/ynl TEST_GEN_FILES += csum +TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c)) TEST_INCLUDES := $(wildcard py/*.py sh/*.sh) include ../../lib.mk + +include ../bpf.mk diff --git a/tools/testing/selftests/net/lib/xdp_dummy.bpf.c b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c new file mode 100644 index 000000000000..d988b2e0cee8 --- /dev/null +++ b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define KBUILD_MODNAME "xdp_dummy" +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +SEC("xdp") +int xdp_dummy_prog(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py index 394971b25c0b..cd1a0eea39a8 100755 --- a/tools/testing/selftests/drivers/net/hds.py +++ b/tools/testing/selftests/drivers/net/hds.py @@ -2,17 +2,26 @@ # SPDX-License-Identifier: GPL-2.0 import errno +import os from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises, KsftSkipEx -from lib.py import EthtoolFamily, NlError +from lib.py import CmdExitFailure, EthtoolFamily, NlError from lib.py import NetDrvEnv +from lib.py import defer, ip -def get_hds(cfg, netnl) -> None: + +def _get_hds_mode(cfg, netnl) -> str: try: rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) except NlError as e: raise KsftSkipEx('ring-get not supported by device') if 'tcp-data-split' not in rings: raise KsftSkipEx('tcp-data-split not supported by device') + return rings['tcp-data-split'] + + +def get_hds(cfg, netnl) -> None: + _get_hds_mode(cfg, netnl) + def get_hds_thresh(cfg, netnl) -> None: try: @@ -104,6 +113,44 @@ from lib.py import NetDrvEnv netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': hds_gt}) ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + +def set_xdp(cfg, netnl) -> None: + """ + Enable single-buffer XDP on the device. + When HDS is in "auto" / UNKNOWN mode, XDP installation should work. + """ + mode = _get_hds_mode(cfg, netnl) + if mode == 'enabled': + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'unknown'}) + + test_dir = os.path.dirname(os.path.realpath(__file__)) + prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + ip(f"link set dev %s xdp obj %s sec xdp" % + (cfg.ifname, prog)) + ip(f"link set dev %s xdp off" % cfg.ifname) + + +def set_xdp_enabled(cfg, netnl) -> None: + """ + Enable single-buffer XDP on the device. + When HDS is in "enabled" mode, XDP installation should not work. + """ + _get_hds_mode(cfg, netnl) + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'enabled'}) + + defer(netnl.rings_set, {'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'unknown'}) + + test_dir = os.path.dirname(os.path.realpath(__file__)) + prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + with ksft_raises(CmdExitFailure) as e: + ip(f"link set dev %s xdp obj %s sec xdp" % + (cfg.ifname, prog)) + ip(f"link set dev %s xdp off" % cfg.ifname) + + def main() -> None: with NetDrvEnv(__file__, queue_count=3) as cfg: ksft_run([get_hds, @@ -112,7 +159,9 @@ from lib.py import NetDrvEnv set_hds_enable, set_hds_thresh_zero, set_hds_thresh_max, - set_hds_thresh_gt], + set_hds_thresh_gt, + set_xdp, + set_xdp_enabled], args=(cfg, EthtoolFamily())) ksft_exit() -- 2.48.1

6 months

1
0
0 0

[PATCH] selftests/bpf: DENYLIST.aarch64: enable kprobe_multi tests for ARM64

by Alexis Lothoré (eBPF Foundation)

The kprobe_multi feature was disabled on ARM64 due to the lack of fprobe support. The fprobe rewrite on function_graph has been recently merged and thus brought support for fprobes on arm64. This then enables kprobe_multi support on arm64, and so the corresponding tests can now be run on this architecture. Remove the tests depending on kprobe_multi from DENYLIST.aarch64 to allow those to run in CI. CONFIG_FPROBE is already correctly set in tools/testing/selftests/bpf/config Signed-off-by: Alexis Lothoré (eBPF Foundation) <alexis.lothore(a)bootlin.com> --- The tests being enabled with this series have been run locally in an ARM64 qemu environment, and in Github CI. I only did some testing to ensure that the tests depending on kprobe_multi now run correctly on arm64, it is fair to stress that all the hard work has actually been done by M. Hiramatsu ([0]) [0] https://lore.kernel.org/bpf/173518987627.391279.3307342580035322889.stgit@d… --- tools/testing/selftests/bpf/DENYLIST.aarch64 | 9 --------- 1 file changed, 9 deletions(-) diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64 index 901349da680fa67896d279d184db78e964d9ae27..6d8feda27ce9de07d77d6e384666082923e3dc76 100644 --- a/tools/testing/selftests/bpf/DENYLIST.aarch64 +++ b/tools/testing/selftests/bpf/DENYLIST.aarch64 @@ -1,12 +1,3 @@ -bpf_cookie/multi_kprobe_attach_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3 -bpf_cookie/multi_kprobe_link_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3 -kprobe_multi_bench_attach # needs CONFIG_FPROBE -kprobe_multi_test # needs CONFIG_FPROBE -module_attach # prog 'kprobe_multi': failed to auto-attach: -95 fentry_test/fentry_many_args # fentry_many_args:FAIL:fentry_many_args_attach unexpected error: -524 fexit_test/fexit_many_args # fexit_many_args:FAIL:fexit_many_args_attach unexpected error: -524 tracing_struct/struct_many_args # struct_many_args:FAIL:tracing_struct_many_args__attach unexpected error: -524 -fill_link_info/kprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95 -fill_link_info/kretprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95 -fill_link_info/kprobe_multi_invalid_ubuff # bpf_program__attach_kprobe_multi_opts unexpected error: -95 -missed/kprobe_recursion # missed_kprobe_recursion__attach unexpected error: -95 (errno 95) --- base-commit: d3417ac824b98e8773bc04b93e09c4b93c2c6cad change-id: 20250219-enable_kprobe_multi_tests-c8d53336e5cd Best regards, -- Alexis Lothoré, Bootlin Embedded Linux and Kernel engineering https://bootlin.com

6 months

2
1
0 0

[PATCH v8] Kunit to check the longest symbol length

by Sergio González Collado

The longest length of a symbol (KSYM_NAME_LEN) was increased to 512 in the reference [1]. This patch adds kunit test suite to check the longest symbol length. These tests verify that the longest symbol length defined is supported. This test can also help other efforts for longer symbol length, like [2]. The test suite defines one symbol with the longest possible length. The first test verify that functions with names of the created symbol, can be called or not. The second test, verify that the symbols are created (or not) in the kernel symbol table. [1] https://lore.kernel.org/lkml/20220802015052.10452-6-ojeda@kernel.org/ [2] https://lore.kernel.org/lkml/20240605032120.3179157-1-song@kernel.org/ Tested-by: Martin Rodriguez Reboredo <yakoyoku(a)gmail.com> Reviewed-by: Shuah Khan <skhan(a)linuxfoundation.org> Reviewed-by: Rae Moar <rmoar(a)google.com> Signed-off-by: Sergio González Collado <sergio.collado(a)gmail.com> Link: https://github.com/Rust-for-Linux/linux/issues/504 --- V7 -> V8: typo fixed & rebased --- V6 -> V7: rebased --- V5 -> V6: remove tests with symbols of length KSYM_NAME_LEN+1 --- V4 -> V5: fixed typo, added improved description --- V3 -> V4: add x86 mantainers, add new reference. --- V2 -> V3: updated base and added MODULE_DESCRIPTION() and MODULE_AUTHOR() --- V1 -> V2: corrected CI tests. Added fix proposed at [3] [3] https://lore.kernel.org/lkml/Y9ES4UKl%2F+DtvAVS@gmail.com/T/#m3ef0e12bb834d… The test execution should result in something like: ``` [20:04:35] =============== longest-symbol (4 subtests) ================ [20:04:35] [PASSED] test_longest_symbol [20:04:35] [PASSED] test_longest_symbol_kallsyms [20:04:35] ================= [PASSED] longest-symbol ================== [20:04:35] ============================================================ [20:04:35] Testing complete. Ran 4 tests: passed: 4 ``` --- arch/x86/tools/insn_decoder_test.c | 3 +- lib/Kconfig.debug | 9 ++++ lib/Makefile | 2 + lib/longest_symbol_kunit.c | 84 ++++++++++++++++++++++++++++++ 4 files changed, 97 insertions(+), 1 deletion(-) create mode 100644 lib/longest_symbol_kunit.c diff --git a/arch/x86/tools/insn_decoder_test.c b/arch/x86/tools/insn_decoder_test.c index 472540aeabc2..6c2986d2ad11 100644 --- a/arch/x86/tools/insn_decoder_test.c +++ b/arch/x86/tools/insn_decoder_test.c @@ -10,6 +10,7 @@ #include <assert.h> #include <unistd.h> #include <stdarg.h> +#include <linux/kallsyms.h> #define unlikely(cond) (cond) @@ -106,7 +107,7 @@ static void parse_args(int argc, char **argv) } } -#define BUFSIZE 256 +#define BUFSIZE (256 + KSYM_NAME_LEN) int main(int argc, char **argv) { diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1af972a92d06..62d43aa9e8f0 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2838,6 +2838,15 @@ config FORTIFY_KUNIT_TEST by the str*() and mem*() family of functions. For testing runtime traps of FORTIFY_SOURCE, see LKDTM's "FORTIFY_*" tests. +config LONGEST_SYM_KUNIT_TEST + tristate "Test the longest symbol possible" if !KUNIT_ALL_TESTS + depends on KUNIT && KPROBES + default KUNIT_ALL_TESTS + help + Tests the longest symbol possible + + If unsure, say N. + config HW_BREAKPOINT_KUNIT_TEST bool "Test hw_breakpoint constraints accounting" if !KUNIT_ALL_TESTS depends on HAVE_HW_BREAKPOINT diff --git a/lib/Makefile b/lib/Makefile index d5cfc7afbbb8..e8fec9defec2 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -393,6 +393,8 @@ obj-$(CONFIG_FORTIFY_KUNIT_TEST) += fortify_kunit.o obj-$(CONFIG_CRC_KUNIT_TEST) += crc_kunit.o obj-$(CONFIG_SIPHASH_KUNIT_TEST) += siphash_kunit.o obj-$(CONFIG_USERCOPY_KUNIT_TEST) += usercopy_kunit.o +obj-$(CONFIG_LONGEST_SYM_KUNIT_TEST) += longest_symbol_kunit.o +CFLAGS_longest_symbol_kunit.o += $(call cc-disable-warning, missing-prototypes) obj-$(CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED) += devmem_is_allowed.o diff --git a/lib/longest_symbol_kunit.c b/lib/longest_symbol_kunit.c new file mode 100644 index 000000000000..2a2dd1151097 --- /dev/null +++ b/lib/longest_symbol_kunit.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test the longest symbol length. Execute with: + * ./tools/testing/kunit/kunit.py run longest-symbol + * --arch=x86_64 --kconfig_add CONFIG_KPROBES=y --kconfig_add CONFIG_MODULES=y + * --kconfig_add CONFIG_RETPOLINE=n --kconfig_add CONFIG_CFI_CLANG=n + * --kconfig_add CONFIG_MITIGATION_RETPOLINE=n + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <kunit/test.h> +#include <linux/stringify.h> +#include <linux/kprobes.h> +#include <linux/kallsyms.h> + +#define DI(name) s##name##name +#define DDI(name) DI(n##name##name) +#define DDDI(name) DDI(n##name##name) +#define DDDDI(name) DDDI(n##name##name) +#define DDDDDI(name) DDDDI(n##name##name) + +#define PLUS1(name) __PASTE(name, e) + +/*Generate a symbol whose name length is 511 */ +#define LONGEST_SYM_NAME DDDDDI(g1h2i3j4k5l6m7n) + +#define RETURN_LONGEST_SYM 0xAAAAA + +noinline int LONGEST_SYM_NAME(void); +noinline int LONGEST_SYM_NAME(void) +{ + return RETURN_LONGEST_SYM; +} + +_Static_assert(sizeof(__stringify(LONGEST_SYM_NAME)) == KSYM_NAME_LEN, +"Incorrect symbol length found. Expected KSYM_NAME_LEN: " +__stringify(KSYM_NAME) ", but found: " +__stringify(sizeof(LONGEST_SYM_NAME))); + +static void test_longest_symbol(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, RETURN_LONGEST_SYM, LONGEST_SYM_NAME()); +}; + +static void test_longest_symbol_kallsyms(struct kunit *test) +{ + unsigned long (*kallsyms_lookup_name)(const char *name); + static int (*longest_sym)(void); + + struct kprobe kp = { + .symbol_name = "kallsyms_lookup_name", + }; + + if (register_kprobe(&kp) < 0) { + pr_info("%s: kprobe not registered\n", __func__); + KUNIT_FAIL(test, "test_longest_symbol kallsyms: kprobe not registered\n"); + return; + } + + kunit_warn(test, "test_longest_symbol kallsyms: kprobe registered\n"); + kallsyms_lookup_name = (unsigned long (*)(const char *name))kp.addr; + unregister_kprobe(&kp); + + longest_sym = + (void *) kallsyms_lookup_name(__stringify(LONGEST_SYM_NAME)); + KUNIT_EXPECT_EQ(test, RETURN_LONGEST_SYM, longest_sym()); +}; + +static struct kunit_case longest_symbol_test_cases[] = { + KUNIT_CASE(test_longest_symbol), + KUNIT_CASE(test_longest_symbol_kallsyms), + {} +}; + +static struct kunit_suite longest_symbol_test_suite = { + .name = "longest-symbol", + .test_cases = longest_symbol_test_cases, +}; +kunit_test_suite(longest_symbol_test_suite); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Test the longest symbol length"); +MODULE_AUTHOR("Sergio González Collado"); base-commit: a86bf2283d2c9769205407e2b54777c03d012939 -- 2.39.2

6 months

2
3
0 0

[PATCH v4 0/3] printf: convert self-test to KUnit

by Tamir Duberstein

This is one of just 3 remaining "Test Module" kselftests (the others being bitmap and scanf), the rest having been converted to KUnit. I tested this using: $ tools/testing/kunit/kunit.py run --arch arm64 --make_options LLVM=1 printf I have also sent out a series converting scanf[0]. Link: https://lore.kernel.org/all/20250204-scanf-kunit-convert-v3-0-386d7c3ee714@… [0] Signed-off-by: Tamir Duberstein <tamird(a)gmail.com> --- Changes in v4: - Add patch "implicate test line in failure messages". - Rebase on linux-next, move scanf_kunit.c into lib/tests/. - Link to v3: https://lore.kernel.org/r/20250210-printf-kunit-convert-v3-0-ee6ac5500f5e@g… Changes in v3: - Remove extraneous trailing newlines from failure messages. - Replace `pr_warn` with `kunit_warn`. - Drop arch changes. - Remove KUnit boilerplate from CONFIG_PRINTF_KUNIT_TEST help text. - Restore `total_tests` counting. - Remove tc_fail macro in last patch. - Link to v2: https://lore.kernel.org/r/20250207-printf-kunit-convert-v2-0-057b23860823@g… Changes in v2: - Incorporate code review from prior work[0] by Arpitha Raghunandan. - Link to v1: https://lore.kernel.org/r/20250204-printf-kunit-convert-v1-0-ecf1b846a4de@g… Link: https://lore.kernel.org/lkml/20200817043028.76502-1-98.arpi@gmail.com/t/#u [0] --- Tamir Duberstein (3): printf: convert self-test to KUnit printf: break kunit into test cases printf: implicate test line in failure messages Documentation/core-api/printk-formats.rst | 4 +- MAINTAINERS | 2 +- lib/Kconfig.debug | 12 +- lib/Makefile | 1 - lib/tests/Makefile | 1 + lib/{test_printf.c => tests/printf_kunit.c} | 437 ++++++++++++---------------- tools/testing/selftests/lib/config | 1 - tools/testing/selftests/lib/printf.sh | 4 - 8 files changed, 200 insertions(+), 262 deletions(-) --- base-commit: 7b7a883c7f4de1ee5040bd1c32aabaafde54d209 change-id: 20250131-printf-kunit-convert-fd4012aa2ec6 Best regards, -- Tamir Duberstein <tamird(a)gmail.com>

6 months

2
5
0 0

[PATCH] spelling mistake

by goralbaris

Signed-off-by: goralbaris <goralbaris(a)gmail.com> --- tools/testing/selftests/kvm/x86_64/hyperv_features.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c index 068e9c69710d..db442bf3b8d3 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -457,7 +457,7 @@ static void guest_test_msrs_access(void) msr->fault_expected = true; break; case 45: - /* MSR is vailable when CPUID feature bit is set */ + /* MSR is available when CPUID feature bit is set */ if (!has_invtsc) goto next_stage; vcpu_set_cpuid_feature(vcpu, HV_ACCESS_TSC_INVARIANT); -- 2.34.1

6 months

1
0
0 0

[PATCH bpf-next v2 0/4] selftests/bpf: tc_links/tc_opts: Unserialize tests

by Bastien Curutchet (eBPF Foundation)

Hi all, Both tc_links.c and tc_opts.c do their tests on the loopback interface. It prevents from parallelizing their executions. Add a new behaviour to the test_progs framework that creates and opens a new network namespace to run a test in it. This is done automatically on tests whose names start with 'ns_'. One test already has a name starting with 'ns_', so PATCH 1 renames it to avoid conflicts. PATCH 2 introduces the test_progs 'feature'. PATCH 3 & 4 convert some tests to use these dedicated namespaces. Signed-off-by: Bastien Curutchet (eBPF Foundation) <bastien.curutchet(a)bootlin.com> --- Changes in v2: - Handle the netns creation / opening directly in test_progs - Link to v1: https://lore.kernel.org/bpf/e3838d93-04e3-4e96-af53-e9e63550d7ba@bootlin.com --- Bastien Curutchet (eBPF Foundation) (4): selftests/bpf: ns_current_pid_tgid: Rename the test function selftests/bpf: Optionally open a dedicated namespace to run test in it selftests/bpf: tc_links/tc_opts: Unserialize tests selftests/bpf: ns_current_pid_tgid: Use test_progs's ns_ feature .../selftests/bpf/prog_tests/ns_current_pid_tgid.c | 47 ++++++++-------------- tools/testing/selftests/bpf/prog_tests/tc_links.c | 28 ++++++------- tools/testing/selftests/bpf/prog_tests/tc_opts.c | 40 +++++++++--------- tools/testing/selftests/bpf/test_progs.c | 12 ++++++ 4 files changed, 63 insertions(+), 64 deletions(-) --- base-commit: a814b9be27fb3c3f49343aee4b015b76f5875558 change-id: 20250219-b4-tc_links-b6d5bf709e1f Best regards, -- Bastien Curutchet (eBPF Foundation) <bastien.curutchet(a)bootlin.com>

6 months

3
6
0 0

[PATCH v1 00/13] iommu: Add MSI mapping support with nested SMMU

by Nicolin Chen

[ Background ] On ARM GIC systems and others, the target address of the MSI is translated by the IOMMU. For GIC, the MSI address page is called "ITS" page. When the IOMMU is disabled, the MSI address is programmed to the physical location of the GIC ITS page (e.g. 0x20200000). When the IOMMU is enabled, the ITS page is behind the IOMMU, so the MSI address is programmed to an allocated IO virtual address (a.k.a IOVA), e.g. 0xFFFF0000, which must be mapped to the physical ITS page: IOVA (0xFFFF0000) ===> PA (0x20200000). When a 2-stage translation is enabled, IOVA will be still used to program the MSI address, though the mappings will be in two stages: IOVA (0xFFFF0000) ===> IPA (e.g. 0x80900000) ===> PA (0x20200000) (IPA stands for Intermediate Physical Address). If the device that generates MSI is attached to an IOMMU_DOMAIN_DMA, the IOVA is dynamically allocated from the top of the IOVA space. If attached to an IOMMU_DOMAIN_UNMANAGED (e.g. a VFIO passthrough device), the IOVA is fixed to an MSI window reported by the IOMMU driver via IOMMU_RESV_SW_MSI, which is hardwired to MSI_IOVA_BASE (IOVA==0x8000000) for ARM IOMMUs. So far, this IOMMU_RESV_SW_MSI works well as kernel is entirely in charge of the IOMMU translation (1-stage translation), since the IOVA for the ITS page is fixed and known by kernel. However, with virtual machine enabling a nested IOMMU translation (2-stage), a guest kernel directly controls the stage-1 translation with an IOMMU_DOMAIN_DMA, mapping a vITS page (at an IPA 0x80900000) onto its own IOVA space (e.g. 0xEEEE0000). Then, the host kernel can't know that guest-level IOVA to program the MSI address. There have been two approaches to solve this problem: 1. Create an identity mapping in the stage-1. VMM could insert a few RMRs (Reserved Memory Regions) in guest's IORT. Then the guest kernel would fetch these RMR entries from the IORT and create an IOMMU_RESV_DIRECT region per iommu group for a direct mapping. Eventually, the mappings would look like: IOVA (0x8000000) === IPA (0x8000000) ===> 0x20200000 This requires an IOMMUFD ioctl for kernel and VMM to agree on the IPA. 2. Forward the guest-level MSI IOVA captured by VMM to the host-level GIC driver, to program the correct MSI IOVA. Forward the VMM-defined vITS page location (IPA) to the kernel for the stage-2 mapping. Eventually: IOVA (0xFFFF0000) ===> IPA (0x80900000) ===> PA (0x20200000) This requires a VFIO ioctl (for IOVA) and an IOMMUFD ioctl (for IPA). Worth mentioning that when Eric Auger was working on the same topic with the VFIO iommu uAPI, he had the approach (2) first, and then switched to the approach (1), suggested by Jean-Philippe for reduction of complexity. The approach (1) basically feels like the existing VFIO passthrough that has a 1-stage mapping for the unmanaged domain, yet only by shifting the MSI mapping from stage 1 (guest-has-no-iommu case) to stage 2 (guest-has- iommu case). So, it could reuse the existing IOMMU_RESV_SW_MSI piece, by sharing the same idea of "VMM leaving everything to the kernel". The approach (2) is an ideal solution, yet it requires additional effort for kernel to be aware of the 1-stage gIOVA(s) and 2-stage IPAs for vITS page(s), which demands VMM to closely cooperate. * It also brings some complicated use cases to the table where the host or/and guest system(s) has/have multiple ITS pages. [ Execution ] Though these two approaches feel very different on the surface, they can share some underlying common infrastructure. Currently, only one pair of sw_msi functions (prepare/compose) are provided by dma-iommu for irqchip drivers to directly use. There could be different versions of functions from different domain owners: for existing VFIO passthrough cases and in- kernel DMA domain cases, reuse the existing dma-iommu's version of sw_msi functions; for nested translation use cases, there can be another version of sw_msi functions to handle mapping and msi_msg(s) differently. As a part-1 supporting the approach (1), i.e. the RMR solution: - Get rid of the duplication in the "compose" function - Introduce a function pointer for the previously "prepare" function - Allow different domain owners to set their own "sw_msi" implementations - Implement an iommufd_sw_msi function to additionally support a nested translation use case using the approach (1) - Add a pair of IOMMUFD options for a SW_MSI window for kernel and VMM to agree on (for approach 1) [ Future Plan ] Part-2 and beyond will continue the effort of supporting the approach (2) for a complete vITS-to-pITS mapping: 1) Map the phsical ITS page (potentially via IOMMUFD_CMD_IOAS_MAP_MSI) 2) Convey the IOVAs per-irq (potentially via VFIO_IRQ_SET_ACTION_PREPARE) Note that the set_option uAPI in this series might not fit since this requires it is an array of MSI IOVAs.) --- This is a joint effort that includes Jason's rework in irq/iommu/iommufd base level and my additional patches on top of that for new uAPIs. This series is on github: https://github.com/nicolinc/iommufd/commits/iommufd_msi_p1-v1 Pairing QEMU branch for testing (approach 1): https://github.com/nicolinc/qemu/commits/wip/for_iommufd_msi_p1-v1-rmr (Note: QEMU virt command no longer requires iommmufd object v.s. RFCv2) Changelog v1 * Rebase on v6.14-rc1 and iommufd_attach_handle-v1 series https://lore.kernel.org/all/cover.1738645017.git.nicolinc@nvidia.com/ * Correct typos * Replace set_bit with __set_bit * Use a common helper to get iommufd_handle * Add kdoc for iommu_msi_iova/iommu_msi_page_shift * Rename msi_msg_set_msi_addr() to msi_msg_set_addr() * Update selftest for a better coverage for the new options * Change IOMMU_OPTION_SW_MSI_START/SIZE to be per-idev and properly check against device's reserved region list RFCv2 https://lore.kernel.org/kvm/cover.1736550979.git.nicolinc@nvidia.com/ * Rebase on v6.13-rc6 * Drop all the irq/pci patches and rework the compose function instead * Add a new sw_msi op to iommu_domain for a per type implementation and let iommufd core has its own implementation to support both approaches * Add RMR-solution (approach 1) support since it is straightforward and have been used in some out-of-tree projects widely RFCv1 https://lore.kernel.org/kvm/cover.1731130093.git.nicolinc@nvidia.com/ Thanks! Nicolin Jason Gunthorpe (5): genirq/msi: Store the IOMMU IOVA directly in msi_desc instead of iommu_cookie genirq/msi: Rename iommu_dma_compose_msi_msg() to msi_msg_set_addr() iommu: Make iommu_dma_prepare_msi() into a generic operation irqchip: Have CONFIG_IRQ_MSI_IOMMU be selected by the irqchips that need it iommufd: Implement sw_msi support natively Nicolin Chen (8): iommu: Turn fault_data to iommufd private pointer iommu: Turn iova_cookie to dma-iommu private pointer iommufd/device: Move sw_msi_start from igroup to idev iommufd: Pass in idev to iopt_table_enforce_dev_resv_regions iommufd: Add IOMMU_OPTION_SW_MSI_START/SIZE ioctls iommufd/selftest: Add MOCK_FLAGS_DEVICE_NO_ATTACH iommufd/selftest: Add a testing reserved region iommufd/selftest: Add coverage for IOMMU_OPTION_SW_MSI_START/SIZE drivers/iommu/Kconfig | 1 - drivers/irqchip/Kconfig | 4 + kernel/irq/Kconfig | 1 + drivers/iommu/iommufd/iommufd_private.h | 29 ++- drivers/iommu/iommufd/iommufd_test.h | 4 + include/linux/iommu.h | 58 ++++-- include/linux/msi.h | 47 +++-- include/uapi/linux/iommufd.h | 20 +- drivers/iommu/dma-iommu.c | 63 ++---- drivers/iommu/iommu.c | 29 +++ drivers/iommu/iommufd/device.c | 196 ++++++++++++++---- drivers/iommu/iommufd/fault.c | 2 +- drivers/iommu/iommufd/hw_pagetable.c | 5 +- drivers/iommu/iommufd/io_pagetable.c | 18 +- drivers/iommu/iommufd/ioas.c | 97 +++++++++ drivers/iommu/iommufd/main.c | 13 ++ drivers/iommu/iommufd/selftest.c | 41 +++- drivers/irqchip/irq-gic-v2m.c | 5 +- drivers/irqchip/irq-gic-v3-its.c | 13 +- drivers/irqchip/irq-gic-v3-mbi.c | 12 +- drivers/irqchip/irq-ls-scfg-msi.c | 5 +- tools/testing/selftests/iommu/iommufd.c | 97 +++++++++ .../selftests/iommu/iommufd_fail_nth.c | 21 ++ 23 files changed, 608 insertions(+), 173 deletions(-) base-commit: 2b5bc8c9425fd87e094a08f72498536133da80e1 -- 2.43.0

6 months

4
27
0 0

[PATCH bpf-next v2 0/6] XDP metadata support for tun driver

by Marcus Wichelmann

Hi all, Thank you for your review comments. Here is an updated patch series with the requested changes. To add a selftest for the metadata support of the tun driver, I refactored an existing "xdp_context_functional" test which already tested something similar but for the veth driver. I made the testing logic behind it more reusable so that it also works for the tun driver and possibly other drivers in the future. The last patch ("fix file descriptor assertion in open_tuntap helper") fixes an assertion in an existing helper function that I just moved and reused. Somehow the file descriptor for /dev/net/tun turned out to be 0 when running in the BPF kernel-patches GitHub CI, so the assert condition needed adjustment: https://github.com/kernel-patches/bpf/actions/runs/13339140896 Successful pipeline: https://github.com/kernel-patches/bpf/actions/runs/13372306548 --- v2: - submit against bpf-next subtree - split commits and improved commit messages - remove redundant metasize check and add clarifying comment instead - use max() instead of ternary operator - add selftest for metadata support in the tun driver v1: https://lore.kernel.org/all/20250130171614.1657224-1-marcus.wichelmann@hetz… Marcus Wichelmann (6): net: tun: enable XDP metadata support net: tun: enable transfer of XDP metadata to skb selftests/bpf: move open_tuntap to network helpers selftests/bpf: refactor xdp_context_functional test and bpf program selftests/bpf: add test for XDP metadata support in tun driver selftests/bpf: fix file descriptor assertion in open_tuntap helper drivers/net/tun.c | 24 ++- tools/testing/selftests/bpf/network_helpers.c | 28 ++++ tools/testing/selftests/bpf/network_helpers.h | 3 + .../selftests/bpf/prog_tests/lwt_helpers.h | 29 ---- .../bpf/prog_tests/xdp_context_test_run.c | 152 +++++++++++++++--- .../selftests/bpf/progs/test_xdp_meta.c | 56 ++++--- 6 files changed, 215 insertions(+), 77 deletions(-) -- 2.43.0

6 months

3
15
0 0

[PATCH] selftests/mm: run_vmtests.sh: fix half_ufd_size_MB calculation

by Rafael Aquini

From: Rafael Aquini <raquini(a)redhat.com> We noticed that uffd-stress test was always failing to run when invoked for the hugetlb profiles on x86_64 systems with a processor count of 64 or bigger: ... # ------------------------------------ # running ./uffd-stress hugetlb 128 32 # ------------------------------------ # ERROR: invalid MiB (errno=9, @uffd-stress.c:459) ... # [FAIL] not ok 3 uffd-stress hugetlb 128 32 # exit=1 ... The problem boils down to how run_vmtests.sh (mis)calculates the size of the region it feeds to uffd-stress. The latter expects to see an amount of MiB while the former is just giving out the number of free hugepages halved down. This measurement discrepancy ends up violating uffd-stress' assertion on number of hugetlb pages allocated per CPU, causing it to bail out with the error above. This commit fixes that issue by adjusting run_vmtests.sh's half_ufd_size_MB calculation so it properly renders the region size in MiB, as expected, while maintaining all of its original constraints in place. Fixes: 2e47a445d7b3 ("selftests/mm: run_vmtests.sh: fix hugetlb mem size calculation") Signed-off-by: Rafael Aquini <raquini(a)redhat.com> --- tools/testing/selftests/mm/run_vmtests.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 333c468c2699..157d07e5aaa3 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -304,7 +304,9 @@ uffd_stress_bin=./uffd-stress CATEGORY="userfaultfd" run_test ${uffd_stress_bin} anon 20 16 # Hugetlb tests require source and destination huge pages. Pass in half # the size of the free pages we have, which is used for *each*. -half_ufd_size_MB=$((freepgs / 2)) +# uffd-stress expects a region expressed in MiB, so we adjust +# half_ufd_size_MB accordingly. +half_ufd_size_MB=$(((freepgs * hpgsize_KB) / 1024 / 2)) CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb "$half_ufd_size_MB" 32 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb-private "$half_ufd_size_MB" 32 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem 20 16 -- 2.47.0

6 months

3
2
0 0

[RFC PATCH 0/2] Add NV Selftest cases

by Ganapatrao Kulkarni

In this patch seried, modified kvm selftests code to enable guest code to run in vEL2(As guest Hypervisor). Also added test cases to test guest code booting in vEL2 and register access of VNCR mapped registers. This patchset is created as per discussions over ml[1]. Posting RFC patch for the early feedback and to further explore requirements and test cases. Ganapatrao Kulkarni (2): KVM: arm64: nv: selftests: Add guest hypervisor test KVM: arm64: nv: selftests: Access VNCR mapped registers tools/testing/selftests/kvm/Makefile.kvm | 2 + .../selftests/kvm/arm64/nv_guest_hypervisor.c | 83 ++++++ .../selftests/kvm/arm64/nv_vncr_regs_test.c | 255 ++++++++++++++++++ .../kvm/include/arm64/kvm_util_arch.h | 3 + .../selftests/kvm/include/arm64/nv_util.h | 28 ++ .../testing/selftests/kvm/include/kvm_util.h | 1 + .../selftests/kvm/lib/arm64/processor.c | 59 +++- 7 files changed, 417 insertions(+), 14 deletions(-) create mode 100644 tools/testing/selftests/kvm/arm64/nv_guest_hypervisor.c create mode 100644 tools/testing/selftests/kvm/arm64/nv_vncr_regs_test.c create mode 100644 tools/testing/selftests/kvm/include/arm64/nv_util.h -- 2.48.1

6 months

2
9
0 0

[PATCH v7 2/2] selftests/rseq: Add test for mm_cid compaction

by Gabriele Monaco

A task in the kernel (task_mm_cid_work) runs somewhat periodically to compact the mm_cid for each process. Add a test to validate that it runs correctly and timely. The test spawns 1 thread pinned to each CPU, then each thread, including the main one, runs in short bursts for some time. During this period, the mm_cids should be spanning all numbers between 0 and nproc. At the end of this phase, a thread with high enough mm_cid (>= nproc/2) is selected to be the new leader, all other threads terminate. After some time, the only running thread should see 0 as mm_cid, if that doesn't happen, the compaction mechanism didn't work and the test fails. The test never fails if only 1 core is available, in which case, we cannot test anything as the only available mm_cid is 0. Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com> Signed-off-by: Gabriele Monaco <gmonaco(a)redhat.com> --- tools/testing/selftests/rseq/.gitignore | 1 + tools/testing/selftests/rseq/Makefile | 2 +- .../selftests/rseq/mm_cid_compaction_test.c | 200 ++++++++++++++++++ 3 files changed, 202 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/rseq/mm_cid_compaction_test.c diff --git a/tools/testing/selftests/rseq/.gitignore b/tools/testing/selftests/rseq/.gitignore index 16496de5f6ce4..2c89f97e4f737 100644 --- a/tools/testing/selftests/rseq/.gitignore +++ b/tools/testing/selftests/rseq/.gitignore @@ -3,6 +3,7 @@ basic_percpu_ops_test basic_percpu_ops_mm_cid_test basic_test basic_rseq_op_test +mm_cid_compaction_test param_test param_test_benchmark param_test_compare_twice diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile index 5a3432fceb586..ce1b38f46a355 100644 --- a/tools/testing/selftests/rseq/Makefile +++ b/tools/testing/selftests/rseq/Makefile @@ -16,7 +16,7 @@ OVERRIDE_TARGETS = 1 TEST_GEN_PROGS = basic_test basic_percpu_ops_test basic_percpu_ops_mm_cid_test param_test \ param_test_benchmark param_test_compare_twice param_test_mm_cid \ - param_test_mm_cid_benchmark param_test_mm_cid_compare_twice + param_test_mm_cid_benchmark param_test_mm_cid_compare_twice mm_cid_compaction_test TEST_GEN_PROGS_EXTENDED = librseq.so diff --git a/tools/testing/selftests/rseq/mm_cid_compaction_test.c b/tools/testing/selftests/rseq/mm_cid_compaction_test.c new file mode 100644 index 0000000000000..7ddde3b657dd6 --- /dev/null +++ b/tools/testing/selftests/rseq/mm_cid_compaction_test.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: LGPL-2.1 +#define _GNU_SOURCE +#include <assert.h> +#include <pthread.h> +#include <sched.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stddef.h> + +#include "../kselftest.h" +#include "rseq.h" + +#define VERBOSE 0 +#define printf_verbose(fmt, ...) \ + do { \ + if (VERBOSE) \ + printf(fmt, ##__VA_ARGS__); \ + } while (0) + +/* 0.5 s */ +#define RUNNER_PERIOD 500000 +/* Number of runs before we terminate or get the token */ +#define THREAD_RUNS 5 + +/* + * Number of times we check that the mm_cid were compacted. + * Checks are repeated every RUNNER_PERIOD. + */ +#define MM_CID_COMPACT_TIMEOUT 10 + +struct thread_args { + int cpu; + int num_cpus; + pthread_mutex_t *token; + pthread_barrier_t *barrier; + pthread_t *tinfo; + struct thread_args *args_head; +}; + +static void __noreturn *thread_runner(void *arg) +{ + struct thread_args *args = arg; + int i, ret, curr_mm_cid; + cpu_set_t cpumask; + + CPU_ZERO(&cpumask); + CPU_SET(args->cpu, &cpumask); + ret = pthread_setaffinity_np(pthread_self(), sizeof(cpumask), &cpumask); + if (ret) { + errno = ret; + perror("Error: failed to set affinity"); + abort(); + } + pthread_barrier_wait(args->barrier); + + for (i = 0; i < THREAD_RUNS; i++) + usleep(RUNNER_PERIOD); + curr_mm_cid = rseq_current_mm_cid(); + /* + * We select one thread with high enough mm_cid to be the new leader. + * All other threads (including the main thread) will terminate. + * After some time, the mm_cid of the only remaining thread should + * converge to 0, if not, the test fails. + */ + if (curr_mm_cid >= args->num_cpus / 2 && + !pthread_mutex_trylock(args->token)) { + printf_verbose( + "cpu%d has mm_cid=%d and will be the new leader.\n", + sched_getcpu(), curr_mm_cid); + for (i = 0; i < args->num_cpus; i++) { + if (args->tinfo[i] == pthread_self()) + continue; + ret = pthread_join(args->tinfo[i], NULL); + if (ret) { + errno = ret; + perror("Error: failed to join thread"); + abort(); + } + } + pthread_barrier_destroy(args->barrier); + free(args->tinfo); + free(args->token); + free(args->barrier); + free(args->args_head); + + for (i = 0; i < MM_CID_COMPACT_TIMEOUT; i++) { + curr_mm_cid = rseq_current_mm_cid(); + printf_verbose("run %d: mm_cid=%d on cpu%d.\n", i, + curr_mm_cid, sched_getcpu()); + if (curr_mm_cid == 0) + exit(EXIT_SUCCESS); + usleep(RUNNER_PERIOD); + } + exit(EXIT_FAILURE); + } + printf_verbose("cpu%d has mm_cid=%d and is going to terminate.\n", + sched_getcpu(), curr_mm_cid); + pthread_exit(NULL); +} + +int test_mm_cid_compaction(void) +{ + cpu_set_t affinity; + int i, j, ret = 0, num_threads; + pthread_t *tinfo; + pthread_mutex_t *token; + pthread_barrier_t *barrier; + struct thread_args *args; + + sched_getaffinity(0, sizeof(affinity), &affinity); + num_threads = CPU_COUNT(&affinity); + tinfo = calloc(num_threads, sizeof(*tinfo)); + if (!tinfo) { + perror("Error: failed to allocate tinfo"); + return -1; + } + args = calloc(num_threads, sizeof(*args)); + if (!args) { + perror("Error: failed to allocate args"); + ret = -1; + goto out_free_tinfo; + } + token = malloc(sizeof(*token)); + if (!token) { + perror("Error: failed to allocate token"); + ret = -1; + goto out_free_args; + } + barrier = malloc(sizeof(*barrier)); + if (!barrier) { + perror("Error: failed to allocate barrier"); + ret = -1; + goto out_free_token; + } + if (num_threads == 1) { + fprintf(stderr, "Cannot test on a single cpu. " + "Skipping mm_cid_compaction test.\n"); + /* only skipping the test, this is not a failure */ + goto out_free_barrier; + } + pthread_mutex_init(token, NULL); + ret = pthread_barrier_init(barrier, NULL, num_threads); + if (ret) { + errno = ret; + perror("Error: failed to initialise barrier"); + goto out_free_barrier; + } + for (i = 0, j = 0; i < CPU_SETSIZE && j < num_threads; i++) { + if (!CPU_ISSET(i, &affinity)) + continue; + args[j].num_cpus = num_threads; + args[j].tinfo = tinfo; + args[j].token = token; + args[j].barrier = barrier; + args[j].cpu = i; + args[j].args_head = args; + if (!j) { + /* The first thread is the main one */ + tinfo[0] = pthread_self(); + ++j; + continue; + } + ret = pthread_create(&tinfo[j], NULL, thread_runner, &args[j]); + if (ret) { + errno = ret; + perror("Error: failed to create thread"); + abort(); + } + ++j; + } + printf_verbose("Started %d threads.\n", num_threads); + + /* Also main thread will terminate if it is not selected as leader */ + thread_runner(&args[0]); + + /* only reached in case of errors */ +out_free_barrier: + free(barrier); +out_free_token: + free(token); +out_free_args: + free(args); +out_free_tinfo: + free(tinfo); + + return ret; +} + +int main(int argc, char **argv) +{ + if (!rseq_mm_cid_available()) { + fprintf(stderr, "Error: rseq_mm_cid unavailable\n"); + return -1; + } + if (test_mm_cid_compaction()) + return -1; + return 0; +} -- 2.48.1

6 months

1
0
0 0

[PATCH v5 3/3] rseq/selftests: Add test for mm_cid compaction

by Gabriele Monaco

A task in the kernel (task_mm_cid_work) runs somewhat periodically to compact the mm_cid for each process. Add a test to validate that it runs correctly and timely. The test spawns 1 thread pinned to each CPU, then each thread, including the main one, runs in short bursts for some time. During this period, the mm_cids should be spanning all numbers between 0 and nproc. At the end of this phase, a thread with high enough mm_cid (>= nproc/2) is selected to be the new leader, all other threads terminate. After some time, the only running thread should see 0 as mm_cid, if that doesn't happen, the compaction mechanism didn't work and the test fails. The test never fails if only 1 core is available, in which case, we cannot test anything as the only available mm_cid is 0. To: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com> Signed-off-by: Gabriele Monaco <gmonaco(a)redhat.com> --- tools/testing/selftests/rseq/.gitignore | 1 + tools/testing/selftests/rseq/Makefile | 2 +- .../selftests/rseq/mm_cid_compaction_test.c | 200 ++++++++++++++++++ 3 files changed, 202 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/rseq/mm_cid_compaction_test.c diff --git a/tools/testing/selftests/rseq/.gitignore b/tools/testing/selftests/rseq/.gitignore index 16496de5f6ce4..2c89f97e4f737 100644 --- a/tools/testing/selftests/rseq/.gitignore +++ b/tools/testing/selftests/rseq/.gitignore @@ -3,6 +3,7 @@ basic_percpu_ops_test basic_percpu_ops_mm_cid_test basic_test basic_rseq_op_test +mm_cid_compaction_test param_test param_test_benchmark param_test_compare_twice diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile index 5a3432fceb586..ce1b38f46a355 100644 --- a/tools/testing/selftests/rseq/Makefile +++ b/tools/testing/selftests/rseq/Makefile @@ -16,7 +16,7 @@ OVERRIDE_TARGETS = 1 TEST_GEN_PROGS = basic_test basic_percpu_ops_test basic_percpu_ops_mm_cid_test param_test \ param_test_benchmark param_test_compare_twice param_test_mm_cid \ - param_test_mm_cid_benchmark param_test_mm_cid_compare_twice + param_test_mm_cid_benchmark param_test_mm_cid_compare_twice mm_cid_compaction_test TEST_GEN_PROGS_EXTENDED = librseq.so diff --git a/tools/testing/selftests/rseq/mm_cid_compaction_test.c b/tools/testing/selftests/rseq/mm_cid_compaction_test.c new file mode 100644 index 0000000000000..701719b320049 --- /dev/null +++ b/tools/testing/selftests/rseq/mm_cid_compaction_test.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: LGPL-2.1 +#define _GNU_SOURCE +#include <assert.h> +#include <pthread.h> +#include <sched.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stddef.h> + +#include "../kselftest.h" +#include "rseq.h" + +#define VERBOSE 0 +#define printf_verbose(fmt, ...) \ + do { \ + if (VERBOSE) \ + printf(fmt, ##__VA_ARGS__); \ + } while (0) + +/* 0.5 s */ +#define RUNNER_PERIOD 500000 +/* Number of runs before we terminate or get the token */ +#define THREAD_RUNS 5 + +/* + * Number of times we check that the mm_cid were compacted. + * Checks are repeated every RUNNER_PERIOD. + */ +#define MM_CID_COMPACT_TIMEOUT 10 + +struct thread_args { + int cpu; + int num_cpus; + pthread_mutex_t *token; + pthread_barrier_t *barrier; + pthread_t *tinfo; + struct thread_args *args_head; +}; + +static void __noreturn *thread_runner(void *arg) +{ + struct thread_args *args = arg; + int i, ret, curr_mm_cid; + cpu_set_t cpumask; + + CPU_ZERO(&cpumask); + CPU_SET(args->cpu, &cpumask); + ret = pthread_setaffinity_np(pthread_self(), sizeof(cpumask), &cpumask); + if (ret) { + errno = ret; + perror("Error: failed to set affinity"); + abort(); + } + pthread_barrier_wait(args->barrier); + + for (i = 0; i < THREAD_RUNS; i++) + usleep(RUNNER_PERIOD); + curr_mm_cid = rseq_current_mm_cid(); + /* + * We select one thread with high enough mm_cid to be the new leader + * all other threads (including the main thread) will terminate. + * After some time, the mm_cid of the only remaining thread should + * converge to 0, if not, the test fails. + */ + if (curr_mm_cid >= args->num_cpus / 2 && + !pthread_mutex_trylock(args->token)) { + printf_verbose( + "cpu%d has mm_cid=%d and will be the new leader.\n", + sched_getcpu(), curr_mm_cid); + for (i = 0; i < args->num_cpus; i++) { + if (args->tinfo[i] == pthread_self()) + continue; + ret = pthread_join(args->tinfo[i], NULL); + if (ret) { + errno = ret; + perror("Error: failed to join thread"); + abort(); + } + } + pthread_barrier_destroy(args->barrier); + free(args->tinfo); + free(args->token); + free(args->barrier); + free(args->args_head); + + for (i = 0; i < MM_CID_COMPACT_TIMEOUT; i++) { + curr_mm_cid = rseq_current_mm_cid(); + printf_verbose("run %d: mm_cid=%d on cpu%d.\n", i, + curr_mm_cid, sched_getcpu()); + if (curr_mm_cid == 0) + exit(EXIT_SUCCESS); + usleep(RUNNER_PERIOD); + } + exit(EXIT_FAILURE); + } + printf_verbose("cpu%d has mm_cid=%d and is going to terminate.\n", + sched_getcpu(), curr_mm_cid); + pthread_exit(NULL); +} + +int test_mm_cid_compaction(void) +{ + cpu_set_t affinity; + int i, j, ret = 0, num_threads; + pthread_t *tinfo; + pthread_mutex_t *token; + pthread_barrier_t *barrier; + struct thread_args *args; + + sched_getaffinity(0, sizeof(affinity), &affinity); + num_threads = CPU_COUNT(&affinity); + tinfo = calloc(num_threads, sizeof(*tinfo)); + if (!tinfo) { + perror("Error: failed to allocate tinfo"); + return -1; + } + args = calloc(num_threads, sizeof(*args)); + if (!args) { + perror("Error: failed to allocate args"); + ret = -1; + goto out_free_tinfo; + } + token = malloc(sizeof(*token)); + if (!token) { + perror("Error: failed to allocate token"); + ret = -1; + goto out_free_args; + } + barrier = malloc(sizeof(*barrier)); + if (!barrier) { + perror("Error: failed to allocate barrier"); + ret = -1; + goto out_free_token; + } + if (num_threads == 1) { + fprintf(stderr, "Cannot test on a single cpu. " + "Skipping mm_cid_compaction test.\n"); + /* only skipping the test, this is not a failure */ + goto out_free_barrier; + } + pthread_mutex_init(token, NULL); + ret = pthread_barrier_init(barrier, NULL, num_threads); + if (ret) { + errno = ret; + perror("Error: failed to initialise barrier"); + goto out_free_barrier; + } + for (i = 0, j = 0; i < CPU_SETSIZE && j < num_threads; i++) { + if (!CPU_ISSET(i, &affinity)) + continue; + args[j].num_cpus = num_threads; + args[j].tinfo = tinfo; + args[j].token = token; + args[j].barrier = barrier; + args[j].cpu = i; + args[j].args_head = args; + if (!j) { + /* The first thread is the main one */ + tinfo[0] = pthread_self(); + ++j; + continue; + } + ret = pthread_create(&tinfo[j], NULL, thread_runner, &args[j]); + if (ret) { + errno = ret; + perror("Error: failed to create thread"); + abort(); + } + ++j; + } + printf_verbose("Started %d threads.\n", num_threads); + + /* Also main thread will terminate if it is not selected as leader */ + thread_runner(&args[0]); + + /* only reached in case of errors */ +out_free_barrier: + free(barrier); +out_free_token: + free(token); +out_free_args: + free(args); +out_free_tinfo: + free(tinfo); + + return ret; +} + +int main(int argc, char **argv) +{ + if (!rseq_mm_cid_available()) { + fprintf(stderr, "Error: rseq_mm_cid unavailable\n"); + return -1; + } + if (test_mm_cid_compaction()) + return -1; + return 0; +} -- 2.48.1

6 months

3
4
0 0

[PATCH net-next v5 0/8] Some pktgen fixes/improvments (part I)

by Peter Seiderer

While taking a look at '[PATCH net] pktgen: Avoid out-of-range in get_imix_entries' ([1]) and '[PATCH net v2] pktgen: Avoid out-of-bounds access in get_imix_entries' ([2], [3]) and doing some tests and code review I detected that the /proc/net/pktgen/... parsing logic does not honour the user given buffer bounds (resulting in out-of-bounds access). This can be observed e.g. by the following simple test (sometimes the old/'longer' previous value is re-read from the buffer): $ echo add_device lo@0 > /proc/net/pktgen/kpktgend_0 $ echo "min_pkt_size 12345" > /proc/net/pktgen/lo\@0 && grep min_pkt_size /proc/net/pktgen/lo\@0 Params: count 1000 min_pkt_size: 12345 max_pkt_size: 0 Result: OK: min_pkt_size=12345 $ echo -n "min_pkt_size 123" > /proc/net/pktgen/lo\@0 && grep min_pkt_size /proc/net/pktgen/lo\@0 Params: count 1000 min_pkt_size: 12345 max_pkt_size: 0 Result: OK: min_pkt_size=12345 $ echo "min_pkt_size 123" > /proc/net/pktgen/lo\@0 && grep min_pkt_size /proc/net/pktgen/lo\@0 Params: count 1000 min_pkt_size: 123 max_pkt_size: 0 Result: OK: min_pkt_size=123 So fix the out-of-bounds access (and some minor findings) and add a simple proc_net_pktgen selftest... Patch set splited into part I (this one) - net: pktgen: replace ENOTSUPP with EOPNOTSUPP - net: pktgen: enable 'param=value' parsing - net: pktgen: fix hex32_arg parsing for short reads - net: pktgen: fix 'rate 0' error handling (return -EINVAL) - net: pktgen: fix 'ratep 0' error handling (return -EINVAL) - net: pktgen: fix ctrl interface command parsing - net: pktgen: fix access outside of user given buffer in pktgen_thread_write() And part II (will follow): - net: pktgen: use defines for the various dec/hex number parsing digits lengths - net: pktgen: fix mix of int/long - net: pktgen: remove extra tmp variable (re-use len instead) - net: pktgen: remove some superfluous variable initializing - net: pktgen: fix mpls maximum labels list parsing - net: pktgen: fix access outside of user given buffer in pktgen_if_write() - net: pktgen: fix mpls reset parsing - net: pktgen: remove all superfluous index assignements - selftest: net: add proc_net_pktgen Regards, Peter Changes v4 -> v5: - split up patchset into part i/ii (suggested by Simon Horman) Changes v3 -> v4: - add rev-by Simon Horman - new patch 'net: pktgen: use defines for the various dec/hex number parsing digits lengths' (suggested by Simon Horman) - replace C99 comment (suggested by Paolo Abeni) - drop available characters check in strn_len() (suggested by Paolo Abeni) - factored out patch 'net: pktgen: align some variable declarations to the most common pattern' (suggested by Paolo Abeni) - factored out patch 'net: pktgen: remove extra tmp variable (re-use len instead)' (suggested by Paolo Abeni) - factored out patch 'net: pktgen: remove some superfluous variable initializing' (suggested by Paolo Abeni) - factored out patch 'net: pktgen: fix mpls maximum labels list parsing' (suggested by Paolo Abeni) - factored out 'net: pktgen: hex32_arg/num_arg error out in case no characters are available' (suggested by Paolo Abeni) - factored out 'net: pktgen: num_arg error out in case no valid character is parsed' (suggested by Paolo Abeni) Changes v2 -> v3: - new patch: 'net: pktgen: fix ctrl interface command parsing' - new patch: 'net: pktgen: fix mpls reset parsing' - tools/testing/selftests/net/proc_net_pktgen.c: - fix typo in change description ('v1 -> v1' and tyop) - rename some vars to better match usage add_loopback_0 -> thr_cmd_add_loopback_0 rm_loopback_0 -> thr_cmd_rm_loopback_0 wrong_ctrl_cmd -> wrong_thr_cmd legacy_ctrl_cmd -> legacy_thr_cmd ctrl_fd -> thr_fd - add ctrl interface tests Changes v1 -> v2: - new patch: 'net: pktgen: fix hex32_arg parsing for short reads' - new patch: 'net: pktgen: fix 'rate 0' error handling (return -EINVAL)' - new patch: 'net: pktgen: fix 'ratep 0' error handling (return -EINVAL)' - net/core/pktgen.c: additional fix get_imix_entries() and get_labels() - tools/testing/selftests/net/proc_net_pktgen.c: - fix tyop not vs. nod (suggested by Jakub Kicinski) - fix misaligned line (suggested by Jakub Kicinski) - enable fomerly commented out CONFIG_XFRM dependent test (command spi), as CONFIG_XFRM is enabled via tools/testing/selftests/net/config CONFIG_XFRM_INTERFACE/CONFIG_XFRM_USER (suggestex by Jakub Kicinski) - add CONFIG_NET_PKTGEN=m to tools/testing/selftests/net/config (suggested by Jakub Kicinski) - add modprobe pktgen to FIXTURE_SETUP() (suggested by Jakub Kicinski) - fix some checkpatch warnings (Missing a blank line after declarations) - shrink line length by re-naming some variables (command -> cmd, device -> dev) - add 'rate 0' testcase - add 'ratep 0' testcase [1] https://lore.kernel.org/netdev/20241006221221.3744995-1-artem.chernyshev@re… [2] https://lore.kernel.org/netdev/20250109083039.14004-1-pchelkin@ispras.ru/ [3] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?… Peter Seiderer (8): net: pktgen: replace ENOTSUPP with EOPNOTSUPP net: pktgen: enable 'param=value' parsing net: pktgen: fix hex32_arg parsing for short reads net: pktgen: fix 'rate 0' error handling (return -EINVAL) net: pktgen: fix 'ratep 0' error handling (return -EINVAL) net: pktgen: fix ctrl interface command parsing net: pktgen: fix access outside of user given buffer in pktgen_thread_write() net: pktgen: use defines for the various dec/hex number parsing digits lengths net/core/pktgen.c | 119 +++++++++++++++++++++++++--------------------- 1 file changed, 66 insertions(+), 53 deletions(-) -- 2.48.1

6 months

4
15
0 0

[PATCH bpf-next 0/3] selftests/bpf: tc_links/tc_opts: Unserialize tests

by Bastien Curutchet (eBPF Foundation)

Hi all, Both tc_links.c and tc_opts.c do their tests on the loopback interface. It prevents from parallelizing their executions. Use namespaces and the new append_tid() helper to allow this parallelization. Signed-off-by: Bastien Curutchet (eBPF Foundation) <bastien.curutchet(a)bootlin.com> --- Bastien Curutchet (eBPF Foundation) (3): selftests/bpf: tc_helpers: Add create_and_open_tid_ns() selftests/bpf: tc_link/tc_opts: Use unique namespace selftests/bpf: tc_links/tc_opts: Serialize tests .../testing/selftests/bpf/prog_tests/tc_helpers.h | 12 ++ tools/testing/selftests/bpf/prog_tests/tc_links.c | 164 +++++++++++++-- tools/testing/selftests/bpf/prog_tests/tc_opts.c | 230 ++++++++++++++++++--- 3 files changed, 361 insertions(+), 45 deletions(-) --- base-commit: cfed0f474a4bb2f12b54de5d6a7301cfb7dc0dbd change-id: 20250128-tc_links-d894a23b7063 Best regards, -- Bastien Curutchet (eBPF Foundation) <bastien.curutchet(a)bootlin.com>

6 months

4
6
0 0

[PATCH v2 Linux-6.12.y Linux-6.13.y 1/1] selftests/mm: build with -O2

by Yifei Liu

From: Kevin Brodsky <kevin.brodsky(a)arm.com> [ Upstream commit 46036188ea1f5266df23a6149dea0df1c77cd1c7 ] The mm kselftests are currently built with no optimisation (-O0). It's unclear why, and besides being obviously suboptimal, this also prevents the pkeys tests from working as intended. Let's build all the tests with -O2. [kevin.brodsky(a)arm.com: silence unused-result warnings] Link: https://lkml.kernel.org/r/20250107170110.2819685-1-kevin.brodsky@arm.com Link: https://lkml.kernel.org/r/20241209095019.1732120-6-kevin.brodsky@arm.com Signed-off-by: Kevin Brodsky <kevin.brodsky(a)arm.com> Cc: Aruna Ramakrishna <aruna.ramakrishna(a)oracle.com> Cc: Catalin Marinas <catalin.marinas(a)arm.com> Cc: Dave Hansen <dave.hansen(a)linux.intel.com> Cc: Joey Gouly <joey.gouly(a)arm.com> Cc: Keith Lucas <keith.lucas(a)oracle.com> Cc: Ryan Roberts <ryan.roberts(a)arm.com> Cc: Shuah Khan <shuah(a)kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> (cherry picked from commit 46036188ea1f5266df23a6149dea0df1c77cd1c7) [Yifei: This commit also fix the failure of pkey_sighandler_tests_64, which is also in linux-6.12.y and linux-6.13.y, thus backport this commit] Signed-off-by: Yifei Liu <yifei.l.liu(a)oracle.com> --- tools/testing/selftests/mm/Makefile | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 02e1204971b0..c0138cb19705 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -33,9 +33,16 @@ endif # LDLIBS. MAKEFLAGS += --no-builtin-rules -CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) $(TOOLS_INCLUDES) +CFLAGS = -Wall -O2 -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) $(TOOLS_INCLUDES) LDLIBS = -lrt -lpthread -lm +# Some distributions (such as Ubuntu) configure GCC so that _FORTIFY_SOURCE is +# automatically enabled at -O1 or above. This triggers various unused-result +# warnings where functions such as read() or write() are called and their +# return value is not checked. Disable _FORTIFY_SOURCE to silence those +# warnings. +CFLAGS += -U_FORTIFY_SOURCE + TEST_GEN_FILES = cow TEST_GEN_FILES += compaction_test TEST_GEN_FILES += gup_longterm -- 2.46.0

6 months

3
6
0 0

[PATCH] selftests: firmware: Improve readability in fallback mechanism message

by Brian Ochoa

This patch adds commas to clarify sentence structure: - "To confirm look for" --> "To confirm, look for" - "If you do remove this file" --> "If you do, remove this file" Signed-off-by: Brian Ochoa <brianeochoa(a)gmail.com> --- tools/testing/selftests/firmware/fw_fallback.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/firmware/fw_fallback.sh b/tools/testing/selftests/firmware/fw_fallback.sh index 70d18be46af5..cd1ff88feb28 100755 --- a/tools/testing/selftests/firmware/fw_fallback.sh +++ b/tools/testing/selftests/firmware/fw_fallback.sh @@ -173,13 +173,13 @@ test_syfs_timeout() echo "" echo "This might be a distribution udev rule setup by your distribution" echo "to immediately cancel all fallback requests, this must be" - echo "removed before running these tests. To confirm look for" + echo "removed before running these tests. To confirm, look for" echo "a firmware rule like /lib/udev/rules.d/50-firmware.rules" echo "and see if you have something like this:" echo "" echo "SUBSYSTEM==\"firmware\", ACTION==\"add\", ATTR{loading}=\"-1\"" echo "" - echo "If you do remove this file or comment out this line before" + echo "If you do, remove this file or comment out this line before" echo "proceeding with these tests." exit 1 fi -- 2.34.1

6 months

1
0
0 0

[PATCH] Fix SC2181 warning

by ritvikfoss＠gmail.com

From: Ritvik Gupta <ritvikfoss(a)gmail.com> Check exit-code directly with '!' instead of '$?' to improve readability and fix SC2181 warning, reported by shellcheck. Signed-off-by: Ritvik Gupta <ritvikfoss(a)gmail.com> --- tools/testing/selftests/kselftest_deps.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kselftest_deps.sh b/tools/testing/selftests/kselftest_deps.sh index 487e49fdf2a6..2a4720723ebf 100755 --- a/tools/testing/selftests/kselftest_deps.sh +++ b/tools/testing/selftests/kselftest_deps.sh @@ -262,8 +262,8 @@ then for lib in $test_libs; do let total_cnt+=1 - $CC -o $tmp_file.bin $lib $tmp_file > /dev/null 2>&1 - if [ $? -ne 0 ]; then + if ! $CC -o $tmp_file.bin $lib $tmp_file > /dev/null 2>&1 + then echo "FAIL: $test dependency check: $lib" >> $fail let fail_cnt+=1 fail_libs+="$lib " -- 2.48.1

6 months

1
0
0 0

[PATCH] selftests: net : Fix few spelling mistakes

by Chandra Mohan Sundar

Fix few spelling mistakes in net selftests Signed-off-by: Chandra Mohan Sundar <chandru.dav(a)gmail.com> --- tools/testing/selftests/net/fcnal-test.sh | 4 ++-- tools/testing/selftests/net/fdb_flush.sh | 2 +- tools/testing/selftests/net/fib_nexthops.sh | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 899dbad0104b..4fcc38907e48 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -3667,7 +3667,7 @@ ipv6_addr_bind_novrf() # when it really should not a=${NSA_LO_IP6} log_start - show_hint "Tecnically should fail since address is not on device but kernel allows" + show_hint "Technically should fail since address is not on device but kernel allows" run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to out of scope local address" } @@ -3724,7 +3724,7 @@ ipv6_addr_bind_vrf() # passes when it really should not a=${VRF_IP6} log_start - show_hint "Tecnically should fail since address is not on device but kernel allows" + show_hint "Technically should fail since address is not on device but kernel allows" run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to VRF address with device bind" diff --git a/tools/testing/selftests/net/fdb_flush.sh b/tools/testing/selftests/net/fdb_flush.sh index d5e3abb8658c..9931a1e36e3d 100755 --- a/tools/testing/selftests/net/fdb_flush.sh +++ b/tools/testing/selftests/net/fdb_flush.sh @@ -583,7 +583,7 @@ vxlan_test_flush_by_remote_attributes() $IP link del dev vx10 $IP link add name vx10 type vxlan dstport "$VXPORT" external - # For multicat FDB entries, the VXLAN driver stores a linked list of + # For multicast FDB entries, the VXLAN driver stores a linked list of # remotes for a given key. Verify that only the expected remotes are # flushed. multicast_fdb_entries_add diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index 77c83d9508d3..bea1282e0281 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -741,7 +741,7 @@ ipv6_fcnal() run_cmd "$IP nexthop add id 52 via 2001:db8:92::3" log_test $? 2 "Create nexthop - gw only" - # gw is not reachable throught given dev + # gw is not reachable through given dev run_cmd "$IP nexthop add id 53 via 2001:db8:3::3 dev veth1" log_test $? 2 "Create nexthop - invalid gw+dev combination" -- 2.43.0

6 months

3
2
0 0

[PATCH v3 bpf 0/2] bpf: skip non exist keys in generic_map_lookup_batch

by Yan Zhai

The generic_map_lookup_batch currently returns EINTR if it fails with ENOENT and retries several times on bpf_map_copy_value. The next batch would start from the same location, presuming it's a transient issue. This is incorrect if a map can actually have "holes", i.e. "get_next_key" can return a key that does not point to a valid value. At least the array of maps type may contain such holes legitly. Right now these holes show up, generic batch lookup cannot proceed any more. It will always fail with EINTR errors. This patch fixes this behavior by skipping the non-existing key, and does not return EINTR any more. V2->V3: deleted a unused macro V1->V2: split the fix and selftests; fixed a few selftests issues. V2: https://lore.kernel.org/bpf/cover.1738905497.git.yan@cloudflare.com/ V1: https://lore.kernel.org/bpf/Z6OYbS4WqQnmzi2z@debian.debian/ Yan Zhai (2): bpf: skip non exist keys in generic_map_lookup_batch selftests: bpf: test batch lookup on array of maps with holes kernel/bpf/syscall.c | 18 ++---- .../bpf/map_tests/map_in_map_batch_ops.c | 62 +++++++++++++------ 2 files changed, 49 insertions(+), 31 deletions(-) -- 2.39.5

6 months

4
7
0 0

[PATCH bpf-next v5 0/6] selftests/bpf: Migrate test_xdp_redirect_multi.sh to test_progs

by Bastien Curutchet (eBPF Foundation)

Hi all, This patch series continues the work to migrate the *.sh tests into prog_tests framework. test_xdp_redirect_multi.sh tests the XDP redirections done through bpf_redirect_map(). This is already partly covered by test_xdp_veth.c that already tests map redirections at XDP level. What isn't covered yet by test_xdp_veth is the use of the broadcast flags (BPF_F_BROADCAST or BPF_F_EXCLUDE_INGRESS) and XDP egress programs. Hence, this patch series add test cases to test_xdp_veth.c to get rid of the test_xdp_redirect_multi.sh: - PATCH 1 & 2 Rework test_xdp_veth.c to avoid using the root namespace - PATCH 3 and 4 cover the broadcast flags - PATCH 5 covers the XDP egress programs NOTE: While working on this iteration I ran into a memory leak in net/core/rtnetlink.c that leads to oom-kill when running ./test_progs in a loop. This leak has been fixed by commit 1438f5d07b9a ("rtnetlink: fix netns leak with rtnl_setlink()") in the net tree. Signed-off-by: Bastien Curutchet (eBPF Foundation) <bastien.curutchet(a)bootlin.com> --- Changes in v5: - Remove the patches that were applied from previous iteration - Add PATCH 1 & 2 to avoid using the root namespace so the veth indexes don't get incremented on every ./test_progs call - PATCH 3: Remove unnecessary <linux/ip.h> header - Link to v4: https://lore.kernel.org/r/20250131-redirect-multi-v4-0-970b33678512@bootlin… Changes in v4: - Remove the NO_IP #define - append_tid() takes string's size as input to ensure there is enough space to fit the thread ID at the end - Fix PATCH 12's commit log - Link to v3: https://lore.kernel.org/r/20250128-redirect-multi-v3-0-c1ce69997c01@bootlin… Changes in v3: - Add append_tid() helper and use unique names to allow parallel testing - Check create_network()'s return value through ASSERT_OK() - Remove check_ping() and unused defines - Change next_veth type (from string to int) - Link to v2: https://lore.kernel.org/r/20250121-redirect-multi-v2-0-fc9cacabc6b2@bootlin… Changes in v2: - Use serial_test_* to avoid conflict between tests - Link to v1: https://lore.kernel.org/r/20250121-redirect-multi-v1-0-b215e35ff505@bootlin… --- Bastien Curutchet (eBPF Foundation) (6): selftests/bpf: test_xdp_veth: Create struct net_configuration selftests/bpf: test_xdp_veth: Use a dedicated namespace selftests/bpf: Optionally select broadcasting flags selftests/bpf: test_xdp_veth: Add XDP broadcast redirection tests selftests/bpf: test_xdp_veth: Add XDP program on egress test selftests/bpf: Remove test_xdp_redirect_multi.sh tools/testing/selftests/bpf/Makefile | 2 - .../selftests/bpf/prog_tests/test_xdp_veth.c | 435 ++++++++++++++++++--- .../testing/selftests/bpf/progs/xdp_redirect_map.c | 88 +++++ .../selftests/bpf/progs/xdp_redirect_multi_kern.c | 41 +- .../selftests/bpf/test_xdp_redirect_multi.sh | 214 ---------- tools/testing/selftests/bpf/xdp_redirect_multi.c | 226 ----------- 6 files changed, 491 insertions(+), 515 deletions(-) --- base-commit: 36ab3d3de536753a4b9b2b4c4ce26af41917a378 change-id: 20250103-redirect-multi-245d6eafb5d1 Best regards, -- Bastien Curutchet (eBPF Foundation) <bastien.curutchet(a)bootlin.com>

6 months

2
7
0 0

[PATCH v4 00/27] KVM: arm64: Implement support for SME in non-protected guests

by Mark Brown

I've removed the RFC tag from this version of the series, but the items that I'm looking for feedback on remains the same: - The userspace ABI, in particular: - The vector length used for the SVE registers, access to the SVE registers and access to ZA and (if available) ZT0 depending on the current state of PSTATE.{SM,ZA}. - The use of a single finalisation for both SVE and SME. - The addition of control for enabling fine grained traps in a similar manner to FGU but without the UNDEF, I'm not clear if this is desired at all and at present this requires symmetric read and write traps like FGU. That seemed like it might be desired from an implementation point of view but we already have one case where we enable an asymmetric trap (for ARM64_WORKAROUND_AMPERE_AC03_CPU_38) and it seems generally useful to enable asymmetrically. This series implements support for SME use in non-protected KVM guests. Much of this is very similar to SVE, the main additional challenge that SME presents is that it introduces a new vector length similar to the SVE vector length and two new controls which change the registers seen by guests: - PSTATE.ZA enables the ZA matrix register and, if SME2 is supported, the ZT0 LUT register. - PSTATE.SM enables streaming mode, a new floating point mode which uses the SVE register set with the separately configured SME vector length. In streaming mode implementation of the FFR register is optional. It is also permitted to build systems which support SME without SVE, in this case when not in streaming mode no SVE registers or instructions are available. Further, there is no requirement that there be any overlap in the set of vector lengths supported by SVE and SME in a system, this is expected to be a common situation in practical systems. Since there is a new vector length to configure we introduce a new feature parallel to the existing SVE one with a new pseudo register for the streaming mode vector length. Due to the overlap with SVE caused by streaming mode rather than finalising SME as a separate feature we use the existing SVE finalisation to also finalise SME, a new define KVM_ARM_VCPU_VEC is provided to help make user code clearer. Finalising SVE and SME separately would introduce complication with register access since finalising SVE makes the SVE regsiters writeable by userspace and doing multiple finalisations results in an error being reported. Dealing with a state where the SVE registers are writeable due to one of SVE or SME being finalised but may have their VL changed by the other being finalised seems like needless complexity with minimal practical utility, it seems clearer to just express directly that only one finalisation can be done in the ABI. Access to the floating point registers follows the architecture: - When both SVE and SME are present: - If PSTATE.SM == 0 the vector length used for the Z and P registers is the SVE vector length. - If PSTATE.SM == 1 the vector length used for the Z and P registers is the SME vector length. - If only SME is present: - If PSTATE.SM == 0 the Z and P registers are inaccessible and the floating point state accessed via the encodings for the V registers. - If PSTATE.SM == 1 the vector length used for the Z and P registers - The SME specific ZA and ZT0 registers are only accessible if SVCR.ZA is 1. The VMM must understand this, in particular when loading state SVCR should be configured before other state. There are a large number of subfeatures for SME, most of which only offer additional instructions but some of which (SME2 and FA64) add architectural state. These are configured via the ID registers as per usual. The new KVM_ARM_VCPU_VEC feature and ZA and ZT0 registers have not been added to the get-reg-list selftest, the idea of supporting additional features there without restructuring the program to generate all possible feature combinations has been rejected. I will post a separate series which does that restructuring. No support is present for protected guests, this is expected to be added separately, the series is already rather large and pKVM in general offers a subset of features. This series is based on Mark Rutland's fix series: https://lore.kernel.org/r/20250210195226.1215254-1-mark.rutland@arm.com Signed-off-by: Mark Brown <broonie(a)kernel.org> --- Changes in v4: - Rebase onto v6.14-rc2 and Mark Rutland's fixes. - Expose SME to nested guests. - Additional cleanups and test fixes following on from the rebase. - Link to v3: https://lore.kernel.org/r/20241220-kvm-arm64-sme-v3-0-05b018c1ffeb@kernel.o… Changes in v3: - Rebase onto v6.12-rc2. - Link to v2: https://lore.kernel.org/r/20231222-kvm-arm64-sme-v2-0-da226cb180bb@kernel.o… Changes in v2: - Rebase onto v6.7-rc3. - Configure subfeatures based on host system only. - Complete nVHE support. - There was some snafu with sending v1 out, it didn't make it to the lists but in case it hit people's inboxes I'm sending as v2. --- Mark Brown (27): arm64/fpsimd: Update FA64 and ZT0 enables when loading SME state arm64/fpsimd: Decide to save ZT0 and streaming mode FFR at bind time arm64/fpsimd: Check enable bit for FA64 when saving EFI state arm64/fpsimd: Determine maximum virtualisable SME vector length KVM: arm64: Introduce non-UNDEF FGT control KVM: arm64: Pay attention to FFR parameter in SVE save and load KVM: arm64: Pull ctxt_has_ helpers to start of sysreg-sr.h KVM: arm64: Move SVE state access macros after feature test macros KVM: arm64: Rename SVE finalization constants to be more general KVM: arm64: Document the KVM ABI for SME KVM: arm64: Define internal features for SME KVM: arm64: Rename sve_state_reg_region KVM: arm64: Store vector lengths in an array KVM: arm64: Implement SME vector length configuration KVM: arm64: Support SME control registers KVM: arm64: Support TPIDR2_EL0 KVM: arm64: Support SME identification registers for guests KVM: arm64: Support SME priority registers KVM: arm64: Provide assembly for SME state restore KVM: arm64: Support userspace access to streaming mode Z and P registers KVM: arm64: Expose SME specific state to userspace KVM: arm64: Context switch SME state for normal guests KVM: arm64: Handle SME exceptions KVM: arm64: Expose SME to nested guests KVM: arm64: Provide interface for configuring and enabling SME for guests KVM: arm64: selftests: Add SME system registers to get-reg-list KVM: arm64: selftests: Add SME to set_id_regs test Documentation/virt/kvm/api.rst | 117 +++++++--- arch/arm64/include/asm/fpsimd.h | 22 ++ arch/arm64/include/asm/kvm_emulate.h | 12 +- arch/arm64/include/asm/kvm_host.h | 143 ++++++++++--- arch/arm64/include/asm/kvm_hyp.h | 4 +- arch/arm64/include/asm/kvm_pkvm.h | 2 +- arch/arm64/include/asm/vncr_mapping.h | 2 + arch/arm64/include/uapi/asm/kvm.h | 33 +++ arch/arm64/kernel/cpufeature.c | 2 - arch/arm64/kernel/fpsimd.c | 86 ++++---- arch/arm64/kvm/arm.c | 10 + arch/arm64/kvm/fpsimd.c | 19 +- arch/arm64/kvm/guest.c | 262 ++++++++++++++++++++--- arch/arm64/kvm/handle_exit.c | 14 ++ arch/arm64/kvm/hyp/fpsimd.S | 18 +- arch/arm64/kvm/hyp/include/hyp/switch.h | 141 ++++++++++-- arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 77 ++++--- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 9 +- arch/arm64/kvm/hyp/nvhe/pkvm.c | 4 +- arch/arm64/kvm/hyp/nvhe/switch.c | 11 +- arch/arm64/kvm/hyp/vhe/switch.c | 21 +- arch/arm64/kvm/nested.c | 3 +- arch/arm64/kvm/reset.c | 154 +++++++++---- arch/arm64/kvm/sys_regs.c | 118 +++++++++- include/uapi/linux/kvm.h | 1 + tools/testing/selftests/kvm/arm64/get-reg-list.c | 32 ++- tools/testing/selftests/kvm/arm64/set_id_regs.c | 29 ++- 27 files changed, 1078 insertions(+), 268 deletions(-) --- base-commit: 6a25088d268ce4c2163142ead7fe1975bb687cb7 change-id: 20230301-kvm-arm64-sme-06a1246d3636 prerequisite-message-id: 20250210195226.1215254-1-mark.rutland(a)arm.com prerequisite-patch-id: 615ab9c526e9f6242bd5b8d7188e96fb66fb0e64 prerequisite-patch-id: e5c4f2ff9c9ba01a0f659dd1e8bf6396de46e197 prerequisite-patch-id: 0794d28526755180847841c045a6b7cb3d800c16 prerequisite-patch-id: 079d3a8a680f793b593268eeba000acc55a0b4ec prerequisite-patch-id: a3428f67a5ee49f2b01208f30b57984d5409d8f5 prerequisite-patch-id: 26393e401e9eae7cff5bb1d3bdb18b4e29ffc8fe prerequisite-patch-id: 64f9819f751da4a1c73b9d1b292ccee6afda89f6 prerequisite-patch-id: 0355baaa8ceb31dc85d015b56084c33416f78041 Best regards, -- Mark Brown <broonie(a)kernel.org>

6 months

2
31
0 0

[PATCH AUTOSEL 6.12 01/31] sched_ext: selftests/dsp_local_on: Fix sporadic failures

by Sasha Levin

From: Tejun Heo <tj(a)kernel.org> [ Upstream commit e9fe182772dcb2630964724fd93e9c90b68ea0fd ] dsp_local_on has several incorrect assumptions, one of which is that p->nr_cpus_allowed always tracks p->cpus_ptr. This is not true when a task is scheduled out while migration is disabled - p->cpus_ptr is temporarily overridden to the previous CPU while p->nr_cpus_allowed remains unchanged. This led to sporadic test faliures when dsp_local_on_dispatch() tries to put a migration disabled task to a different CPU. Fix it by keeping the previous CPU when migration is disabled. There are SCX schedulers that make use of p->nr_cpus_allowed. They should also implement explicit handling for p->migration_disabled. Signed-off-by: Tejun Heo <tj(a)kernel.org> Reported-by: Ihor Solodrai <ihor.solodrai(a)pm.me> Cc: Andrea Righi <arighi(a)nvidia.com> Cc: Changwoo Min <changwoo(a)igalia.com> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/sched_ext/dsp_local_on.bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c index c9a2da0575a0f..eea06decb6f59 100644 --- a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c +++ b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c @@ -43,7 +43,7 @@ void BPF_STRUCT_OPS(dsp_local_on_dispatch, s32 cpu, struct task_struct *prev) if (!p) return; - if (p->nr_cpus_allowed == nr_cpus) + if (p->nr_cpus_allowed == nr_cpus && !p->migration_disabled) target = bpf_get_prandom_u32() % nr_cpus; else target = scx_bpf_task_cpu(p); -- 2.39.5

6 months

1
1
0 0

[PATCH AUTOSEL 6.13 01/31] sched_ext: selftests/dsp_local_on: Fix sporadic failures

by Sasha Levin

From: Tejun Heo <tj(a)kernel.org> [ Upstream commit e9fe182772dcb2630964724fd93e9c90b68ea0fd ] dsp_local_on has several incorrect assumptions, one of which is that p->nr_cpus_allowed always tracks p->cpus_ptr. This is not true when a task is scheduled out while migration is disabled - p->cpus_ptr is temporarily overridden to the previous CPU while p->nr_cpus_allowed remains unchanged. This led to sporadic test faliures when dsp_local_on_dispatch() tries to put a migration disabled task to a different CPU. Fix it by keeping the previous CPU when migration is disabled. There are SCX schedulers that make use of p->nr_cpus_allowed. They should also implement explicit handling for p->migration_disabled. Signed-off-by: Tejun Heo <tj(a)kernel.org> Reported-by: Ihor Solodrai <ihor.solodrai(a)pm.me> Cc: Andrea Righi <arighi(a)nvidia.com> Cc: Changwoo Min <changwoo(a)igalia.com> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/sched_ext/dsp_local_on.bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c index fbda6bf546712..758b479bd1ee1 100644 --- a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c +++ b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c @@ -43,7 +43,7 @@ void BPF_STRUCT_OPS(dsp_local_on_dispatch, s32 cpu, struct task_struct *prev) if (!p) return; - if (p->nr_cpus_allowed == nr_cpus) + if (p->nr_cpus_allowed == nr_cpus && !p->migration_disabled) target = bpf_get_prandom_u32() % nr_cpus; else target = scx_bpf_task_cpu(p); -- 2.39.5

6 months

1
1
0 0

[PATCH 0/2] tools: Unify top-level quiet infrastructure

by Charlie Jenkins

The quiet infrastructure was moved out of Makefile.build to accomidate the new syscall table generation scripts in perf. Syscall table generation wanted to also be able to be quiet, so instead of again copying the code to set the quiet variables, the code was moved into Makefile.perf to be used globally. This was not the right solution. It should have been moved even further upwards in the call chain. Makefile.include is imported in many files so this seems like a proper place to put it. To: Signed-off-by: Charlie Jenkins <charlie(a)rivosinc.com> --- Charlie Jenkins (2): tools: Unify top-level quiet infrastructure tools: Remove redundant quiet setup tools/arch/arm64/tools/Makefile | 6 ----- tools/bpf/Makefile | 6 ----- tools/bpf/bpftool/Documentation/Makefile | 6 ----- tools/bpf/bpftool/Makefile | 6 ----- tools/bpf/resolve_btfids/Makefile | 2 -- tools/bpf/runqslower/Makefile | 5 +--- tools/build/Makefile | 8 +----- tools/lib/bpf/Makefile | 13 ---------- tools/lib/perf/Makefile | 13 ---------- tools/lib/thermal/Makefile | 13 ---------- tools/objtool/Makefile | 6 ----- tools/perf/Makefile.perf | 41 ------------------------------- tools/scripts/Makefile.include | 31 ++++++++++++++++++++++- tools/testing/selftests/bpf/Makefile.docs | 6 ----- tools/testing/selftests/hid/Makefile | 2 -- tools/thermal/lib/Makefile | 13 ---------- tools/tracing/latency/Makefile | 6 ----- tools/tracing/rtla/Makefile | 6 ----- tools/verification/rv/Makefile | 6 ----- 19 files changed, 32 insertions(+), 163 deletions(-) --- base-commit: 2014c95afecee3e76ca4a56956a936e23283f05b change-id: 20250203-quiet_tools-9a6ea9d65a19 -- - Charlie

6 months

5
12
0 0

[PATCH v7 0/8] Buddy allocator like (or non-uniform) folio split

by Zi Yan

Hi Matthew, Can you please take a look at Patch 1 and let me know if the new xarray function looks good to you? I will send __filemap_add_folio() and shmem_split_large_entry() changes separately. Hi all, This patchset adds a new buddy allocator like (or non-uniform) large folio split from a order-n folio to order-m with m < n. It reduces 1. the total number of after-split folios from 2^(n-m) to n-m+1; 2. the amount of memory needed for multi-index xarray split from 2^(n/6-m/6) to n/6-m/6, assuming XA_CHUNK_SHIFT=6; 3. keep more large folios after a split from all order-m folios to order-(n-1) to order-m folios. For example, to split an order-9 to order-0, folio split generates 10 (or 11 for anonymous memory) folios instead of 512, allocates 1 xa_node instead of 8, and leaves 1 order-8, 1 order-7, ..., 1 order-1 and 2 order-0 folios (or 4 order-0 for anonymous memory) instead of 512 order-0 folios. It is on top of mm-everything-2025-02-07-05-27 with V6 reverted. It is ready to be merged. Instead of duplicating existing split_huge_page*() code, __folio_split() is introduced as the shared backend code for both split_huge_page_to_list_to_order() and folio_split(). __folio_split() can support both uniform split and buddy allocator like (or non-uniform) split. All existing split_huge_page*() users can be gradually converted to use folio_split() if possible. In this patchset, I converted truncate_inode_partial_folio() to use folio_split(). xfstests quick group passed for both tmpfs and xfs. Changelog === From V6[8]: 1. Added an xarray function xas_try_split() to support iterative folio split, removing the need of using xas_split_alloc() and xas_split(). The function guarantees that at most one xa_node is allocated for each call. 2. Added concrete numbers of after-split folios and xa_node savings to cover letter, commit log. (per Andrew) From V5[7]: 1. Split shmem to any lower order patches are in mm tree, so dropped from this series. 2. Rename split_folio_at() to try_folio_split() to clarify that non-uniform split will not be used if it is not supported. From V4[6]: 1. Enabled shmem support in both uniform and buddy allocator like split and added selftests for it. 2. Added functions to check if uniform split and buddy allocator like split are supported for the given folio and order. 3. Made truncate fall back to uniform split if buddy allocator split is not supported (CONFIG_READ_ONLY_THP_FOR_FS and FS without large folio). 4. Added the missing folio_clear_has_hwpoisoned() to __split_unmapped_folio(). From V3[5]: 1. Used xas_split_alloc(GFP_NOWAIT) instead of xas_nomem(), since extra operations inside xas_split_alloc() are needed for correctness. 2. Enabled folio_split() for shmem and no issue was found with xfstests quick test group. 3. Split both ends of a truncate range in truncate_inode_partial_folio() to avoid wasting memory in shmem truncate (per David Hildenbrand). 4. Removed page_in_folio_offset() since page_folio() does the same thing. 5. Finished truncate related tests from xfstests quick test group on XFS and tmpfs without issues. 6. Disabled buddy allocator like split on CONFIG_READ_ONLY_THP_FOR_FS and FS without large folio. This check was missed in the prior versions. From V2[3]: 1. Incorporated all the feedback from Kirill[4]. 2. Used GFP_NOWAIT for xas_nomem(). 3. Tested the code path when xas_nomem() fails. 4. Added selftests for folio_split(). 5. Fixed no THP config build error. From V1[2]: 1. Split the original patch 1 into multiple ones for easy review (per Kirill). 2. Added xas_destroy() to avoid memory leak. 3. Fixed nr_dropped not used error (per kernel test robot). 4. Added proper error handling when xas_nomem() fails to allocate memory for xas_split() during buddy allocator like split. From RFC[1]: 1. Merged backend code of split_huge_page_to_list_to_order() and folio_split(). The same code is used for both uniform split and buddy allocator like split. 2. Use xas_nomem() instead of xas_split_alloc() for folio_split(). 3. folio_split() now leaves the first after-split folio unlocked, instead of the one containing the given page, since the caller of truncate_inode_partial_folio() locks and unlocks the first folio. 4. Extended split_huge_page debugfs to use folio_split(). 5. Added truncate_inode_partial_folio() as first user of folio_split(). Design === folio_split() splits a large folio in the same way as buddy allocator splits a large free page for allocation. The purpose is to minimize the number of folios after the split. For example, if user wants to free the 3rd subpage in a order-9 folio, folio_split() will split the order-9 folio as: O-0, O-0, O-0, O-0, O-2, O-3, O-4, O-5, O-6, O-7, O-8 if it is anon O-1, O-0, O-0, O-2, O-3, O-4, O-5, O-6, O-7, O-9 if it is pagecache Since anon folio does not support order-1 yet. The split process is similar to existing approach: 1. Unmap all page mappings (split PMD mappings if exist); 2. Split meta data like memcg, page owner, page alloc tag; 3. Copy meta data in struct folio to sub pages, but instead of spliting the whole folio into multiple smaller ones with the same order in a shot, this approach splits the folio iteratively. Taking the example above, this approach first splits the original order-9 into two order-8, then splits left part of order-8 to two order-7 and so on; 4. Post-process split folios, like write mapping->i_pages for pagecache, adjust folio refcounts, add split folios to corresponding list; 5. Remap split folios 6. Unlock split folios. __split_unmapped_folio() and __split_folio_to_order() replace __split_huge_page() and __split_huge_page_tail() respectively. __split_unmapped_folio() uses different approaches to perform uniform split and buddy allocator like split: 1. uniform split: one single call to __split_folio_to_order() is used to uniformly split the given folio. All resulting folios are put back to the list after split. The folio containing the given page is left to caller to unlock and others are unlocked. 2. buddy allocator like (or non-uniform) split: (old_order - new_order) calls to __split_folio_to_order() are used to split the given folio at order N to order N-1. After each call, the target folio is changed to the one containing the page, which is given as a folio_split() parameter. After each call, folios not containing the page are put back to the list. The folio containing the page is put back to the list when its order is new_order. All folios are unlocked except the first folio, which is left to caller to unlock. Patch Overview === 1. Patch 1 added a new xarray function xas_try_split() to perform iterative xarray split. 2. Patch 2 added __split_unmapped_folio() and __split_folio_to_order() to prepare for moving to new backend split code. 3. Patch 3 moved common code in split_huge_page_to_list_to_order() to __folio_split(). 4. Patch 4 added new folio_split() and made split_huge_page_to_list_to_order() share the new __split_unmapped_folio() with folio_split(). 5. Patch 5 removed no longer used __split_huge_page() and __split_huge_page_tail(). 6. Patch 6 added a new in_folio_offset to split_huge_page debugfs for folio_split() test. 7. Patch 7 used try_folio_split() for truncate operation. 8. Patch 8 added folio_split() tests. Any comments and/or suggestions are welcome. Thanks. [1] https://lore.kernel.org/linux-mm/20241008223748.555845-1-ziy@nvidia.com/ [2] https://lore.kernel.org/linux-mm/20241028180932.1319265-1-ziy@nvidia.com/ [3] https://lore.kernel.org/linux-mm/20241101150357.1752726-1-ziy@nvidia.com/ [4] https://lore.kernel.org/linux-mm/e6ppwz5t4p4kvir6eqzoto4y5fmdjdxdyvxvtw43nc… [5] https://lore.kernel.org/linux-mm/20241205001839.2582020-1-ziy@nvidia.com/ [6] https://lore.kernel.org/linux-mm/20250106165513.104899-1-ziy@nvidia.com/ [7] https://lore.kernel.org/linux-mm/20250116211042.741543-1-ziy@nvidia.com/ [8] https://lore.kernel.org/linux-mm/20250205031417.1771278-1-ziy@nvidia.com/ Zi Yan (8): xarray: add xas_try_split() to split a multi-index entry. mm/huge_memory: add two new (not yet used) functions for folio_split() mm/huge_memory: move folio split common code to __folio_split() mm/huge_memory: add buddy allocator like (non-uniform) folio_split() mm/huge_memory: remove the old, unused __split_huge_page() mm/huge_memory: add folio_split() to debugfs testing interface. mm/truncate: use buddy allocator like folio split for truncate operation. selftests/mm: add tests for folio_split(), buddy allocator like split. Documentation/core-api/xarray.rst | 14 +- include/linux/huge_mm.h | 36 + include/linux/xarray.h | 7 + lib/test_xarray.c | 47 ++ lib/xarray.c | 136 +++- mm/huge_memory.c | 751 ++++++++++++------ mm/truncate.c | 31 +- tools/testing/radix-tree/Makefile | 1 + .../selftests/mm/split_huge_page_test.c | 34 +- 9 files changed, 772 insertions(+), 285 deletions(-) -- 2.47.2

6 months

3
25
0 0

[PATCH v6 0/3] rust: kunit: Support KUnit tests with a user-space like syntax

by David Gow

Hi all, After much delay, v6 of the KUnit/Rust integration patchset is here. This change incorporates most of Miguels suggestions from v5 (save for some of the copyright headers I wasn't comfortable unilaterally changing). This means the documentation is much improved, and it should work more cleanly on Rust 1.83 and 1.84, no longer requiring static_mut_refs or const_mut_refs. (I'm not 100% sure I understand all of the details of this, but I'm comfortable enough with how it's ended up.) This has been rebased against 6.14-rc1/rust-next, and should be able to comfortably go in via either the KUnit or Rust trees. My suspicion is that there's more likely to be conflicts with the Rust work (due to the changes in rust/macros/lib.rs) than with KUnit, where there are no current patches which would break the API, so maybe it makes the most sense for it to go in via Rust for 6.15. This series was originally written by José Expósito, and has been modified and updated by Matt Gilbride, Miguel Ojeda, and myself. The original version can be found here: https://github.com/Rust-for-Linux/linux/pull/950 Add support for writing KUnit tests in Rust. While Rust doctests are already converted to KUnit tests and run, they're really better suited for examples, rather than as first-class unit tests. This series implements a series of direct Rust bindings for KUnit tests, as well as a new macro which allows KUnit tests to be written using a close variant of normal Rust unit test syntax. The only change required is replacing '#[cfg(test)]' with '#[kunit_tests(kunit_test_suite_name)]' An example test would look like: #[kunit_tests(rust_kernel_hid_driver)] mod tests { use super::*; use crate::{c_str, driver, hid, prelude::*}; use core::ptr; struct SimpleTestDriver; impl Driver for SimpleTestDriver { type Data = (); } #[test] fn rust_test_hid_driver_adapter() { let mut hid = bindings::hid_driver::default(); let name = c_str!("SimpleTestDriver"); static MODULE: ThisModule = unsafe { ThisModule::from_ptr(ptr::null_mut()) }; let res = unsafe { <hid::Adapter<SimpleTestDriver> as driver::DriverOps>::register(&mut hid, name, &MODULE) }; assert_eq!(res, Err(ENODEV)); // The mock returns -19 } } Please give this a go, and make sure I haven't broken it! There's almost certainly a lot of improvements which can be made -- and there's a fair case to be made for replacing some of this with generated C code which can use the C macros -- but this is hopefully an adequate implementation for now, and the interface can (with luck) remain the same even if the implementation changes. A few small notable missing features: - Attributes (like the speed of a test) are hardcoded to the default value. - Similarly, the module name attribute is hardcoded to NULL. In C, we use the KBUILD_MODNAME macro, but I couldn't find a way to use this from Rust which wasn't more ugly than just disabling it. - Assertions are not automatically rewritten to use KUnit assertions. --- Changes since v5: https://lore.kernel.org/all/20241213081035.2069066-1-davidgow@google.com/ - Rebased against 6.14-rc1 - Fixed a bunch of warnings / clippy lints introduced in Rust 1.83 and 1.84. - No longer needs static_mut_refs / const_mut_refs, and is much cleaned up as a result. (Thanks, Miguel) - Major documentation and example fixes. (Thanks, Miguel) Changes since v4: https://lore.kernel.org/linux-kselftest/20241101064505.3820737-1-davidgow@g… - Rebased against 6.13-rc1 - Allowed an unused_unsafe warning after the behaviour of addr_of_mut!() changed in Rust 1.82. (Thanks Boqun, Miguel) - "Expect" that the sample assert_eq!(1+1, 2) produces a clippy warning due to a redundant assertion. (Thanks Boqun, Miguel) - Fix some missing safety comments, and remove some unneeded 'unsafe' blocks. (Thanks Boqun) - Fix a couple of minor rustfmt issues which were triggering checkpatch warnings. Changes since v3: https://lore.kernel.org/linux-kselftest/20241030045719.3085147-2-davidgow@g… - The kunit_unsafe_test_suite!() macro now panic!s if the suite name is too long, triggering a compile error. (Thanks, Alice!) - The #[kunit_tests()] macro now preserves span information, so errors can be better reported. (Thanks, Boqun!) - The example tests have been updated to no longer use assert_eq!() with a constant bool argument (which triggered a clippy warning now we have the span info). Changes since v2: https://lore.kernel.org/linux-kselftest/20241029092422.2884505-1-davidgow@g… - Include missing rust/macros/kunit.rs file from v2. (Thanks Boqun!) - The kunit_unsafe_test_suite!() macro will truncate the name of the suite if it is too long. (Thanks Alice!) - The proc macro now emits an error if the suite name is too long. - We no longer needlessly use UnsafeCell<> in kunit_unsafe_test_suite!(). (Thanks Alice!) Changes since v1: https://lore.kernel.org/lkml/20230720-rustbind-v1-0-c80db349e3b5@google.com… - Rebase on top of the latest rust-next (commit 718c4069896c) - Make kunit_case a const fn, rather than a macro (Thanks Boqun) - As a result, the null terminator is now created with kernel::kunit::kunit_case_null() - Use the C kunit_get_current_test() function to implement in_kunit_test(), rather than re-implementing it (less efficiently) ourselves. Changes since the GitHub PR: - Rebased on top of kselftest/kunit - Add const_mut_refs feature This may conflict with https://lore.kernel.org/lkml/20230503090708.2524310-6-nmi@metaspace.dk/ - Add rust/macros/kunit.rs to the KUnit MAINTAINERS entry --- José Expósito (3): rust: kunit: add KUnit case and suite macros rust: macros: add macro to easily run KUnit tests rust: kunit: allow to know if we are in a test MAINTAINERS | 1 + rust/kernel/kunit.rs | 199 +++++++++++++++++++++++++++++++++++++++++++ rust/macros/kunit.rs | 161 ++++++++++++++++++++++++++++++++++ rust/macros/lib.rs | 29 +++++++ 4 files changed, 390 insertions(+) create mode 100644 rust/macros/kunit.rs -- 2.48.1.601.g30ceb7b040-goog

6 months

3
17
0 0

[PATCH net 0/4] sockmap, vsock: For connectible sockets allow only connected

by Michal Luczaj

Series deals with one more case of vsock surprising BPF/sockmap by being inconsistency about (having an) assigned transport. KASAN: null-ptr-deref in range [0x0000000000000120-0x0000000000000127] CPU: 7 UID: 0 PID: 56 Comm: kworker/7:0 Not tainted 6.14.0-rc1+ Workqueue: vsock-loopback vsock_loopback_work RIP: 0010:vsock_read_skb+0x4b/0x90 Call Trace: sk_psock_verdict_data_ready+0xa4/0x2e0 virtio_transport_recv_pkt+0x1ca8/0x2acc vsock_loopback_work+0x27d/0x3f0 process_one_work+0x846/0x1420 worker_thread+0x5b3/0xf80 kthread+0x35a/0x700 ret_from_fork+0x2d/0x70 ret_from_fork_asm+0x1a/0x30 This bug, similarly to commit f6abafcd32f9 ("vsock/bpf: return early if transport is not assigned"), could be fixed with a single NULL check. But instead, let's explore another approach: take a hint from vsock_bpf_update_proto() and teach sockmap to accept only vsocks that are already connected (no risk of transport being dropped or reassigned). At the same time straight reject the listeners (vsock listening sockets do not carry any transport anyway). This way BPF does not have to worry about vsk->transport becoming NULL. Signed-off-by: Michal Luczaj <mhal(a)rbox.co> --- Michal Luczaj (4): sockmap, vsock: For connectible sockets allow only connected vsock/bpf: Warn on socket without transport selftest/bpf: Adapt vsock_delete_on_close to sockmap rejecting unconnected selftest/bpf: Add vsock test for sockmap rejecting unconnected net/core/sock_map.c | 3 + net/vmw_vsock/af_vsock.c | 3 + net/vmw_vsock/vsock_bpf.c | 2 +- .../selftests/bpf/prog_tests/sockmap_basic.c | 70 ++++++++++++++++------ 4 files changed, 59 insertions(+), 19 deletions(-) --- base-commit: 9c01a177c2e4b55d2bcce8a1f6bdd1d46a8320e3 change-id: 20250210-vsock-listen-sockmap-nullptr-e6e82ca79611 Best regards, -- Michal Luczaj <mhal(a)rbox.co>

6 months

3
13
0 0

[RESEND v4 0/3] mm/pkey: Add PKEY_UNRESTRICTED macro

by Yury Khrustalev

Add PKEY_UNRESTRICTED macro to mman.h and use it in selftests. For context, this change will also allow for more consistent update of the Glibc manual which in turn will help with introducing memory protection keys on AArch64 targets. Applies to 5bc55a333a2f (tag: v6.13-rc7). Note that I couldn't build ppc tests so I would appreciate if someone could check the 3rd patch. Thank you! Signed-off-by: Yury Khrustalev <yury.khrustalev(a)arm.com> --- Changes in v4: - Removed change to tools/include/uapi/asm-generic/mman-common.h as it is not necessary. Link to v3: https://lore.kernel.org/all/20241028090715.509527-1-yury.khrustalev@arm.com/ Changes in v3: - Replaced previously missed 0-s tools/testing/selftests/mm/mseal_test.c - Replaced previously missed 0-s in tools/testing/selftests/mm/mseal_test.c Link to v2: https://lore.kernel.org/linux-arch/20241027170006.464252-2-yury.khrustalev@… Changes in v2: - Update tools/include/uapi/asm-generic/mman-common.h as well - Add usages of the new macro to selftests. Link to v1: https://lore.kernel.org/linux-arch/20241022120128.359652-1-yury.khrustalev@… --- Yury Khrustalev (3): mm/pkey: Add PKEY_UNRESTRICTED macro selftests/mm: Use PKEY_UNRESTRICTED macro selftests/powerpc: Use PKEY_UNRESTRICTED macro include/uapi/asm-generic/mman-common.h | 1 + tools/testing/selftests/mm/mseal_test.c | 6 +++--- tools/testing/selftests/mm/pkey-helpers.h | 3 ++- tools/testing/selftests/mm/pkey_sighandler_tests.c | 4 ++-- tools/testing/selftests/mm/protection_keys.c | 2 +- tools/testing/selftests/powerpc/include/pkeys.h | 2 +- tools/testing/selftests/powerpc/mm/pkey_exec_prot.c | 2 +- tools/testing/selftests/powerpc/mm/pkey_siginfo.c | 2 +- tools/testing/selftests/powerpc/ptrace/core-pkey.c | 6 +++--- tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c | 6 +++--- 10 files changed, 18 insertions(+), 16 deletions(-) -- 2.39.5

6 months

3
7
0 0

[PATCH] kunit: Clarify kunit_skip() argument name

by Kevin Brodsky

kunit_skip() and kunit_mark_skipped() can only be passed a pointer to a struct kunit, not struct kunit_suite (only kunit_log() actually supports both). Rename their first argument accordingly. Signed-off-by: Kevin Brodsky <kevin.brodsky(a)arm.com> --- Cc: Brendan Higgins <brendan.higgins(a)linux.dev> Cc: David Gow <davidgow(a)google.com> Cc: Rae Moar <rmoar(a)google.com> Cc: linux-kselftest(a)vger.kernel.org --- include/kunit/test.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/include/kunit/test.h b/include/kunit/test.h index 58dbab60f853..0ffb97c78566 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -553,9 +553,9 @@ void kunit_cleanup(struct kunit *test); void __printf(2, 3) kunit_log_append(struct string_stream *log, const char *fmt, ...); /** - * kunit_mark_skipped() - Marks @test_or_suite as skipped + * kunit_mark_skipped() - Marks @test as skipped * - * @test_or_suite: The test context object. + * @test: The test context object. * @fmt: A printk() style format string. * * Marks the test as skipped. @fmt is given output as the test status @@ -563,18 +563,18 @@ void __printf(2, 3) kunit_log_append(struct string_stream *log, const char *fmt, * * Test execution continues after kunit_mark_skipped() is called. */ -#define kunit_mark_skipped(test_or_suite, fmt, ...) \ +#define kunit_mark_skipped(test, fmt, ...) \ do { \ - WRITE_ONCE((test_or_suite)->status, KUNIT_SKIPPED); \ - scnprintf((test_or_suite)->status_comment, \ + WRITE_ONCE((test)->status, KUNIT_SKIPPED); \ + scnprintf((test)->status_comment, \ KUNIT_STATUS_COMMENT_SIZE, \ fmt, ##__VA_ARGS__); \ } while (0) /** - * kunit_skip() - Marks @test_or_suite as skipped + * kunit_skip() - Marks @test as skipped * - * @test_or_suite: The test context object. + * @test: The test context object. * @fmt: A printk() style format string. * * Skips the test. @fmt is given output as the test status @@ -582,10 +582,10 @@ void __printf(2, 3) kunit_log_append(struct string_stream *log, const char *fmt, * * Test execution is halted after kunit_skip() is called. */ -#define kunit_skip(test_or_suite, fmt, ...) \ +#define kunit_skip(test, fmt, ...) \ do { \ - kunit_mark_skipped((test_or_suite), fmt, ##__VA_ARGS__);\ - kunit_try_catch_throw(&((test_or_suite)->try_catch)); \ + kunit_mark_skipped((test), fmt, ##__VA_ARGS__); \ + kunit_try_catch_throw(&((test)->try_catch)); \ } while (0) /* base-commit: 0ad2507d5d93f39619fc42372c347d6006b64319 -- 2.47.0

6 months

2
1
0 0

[PATCH] kunit: tool: Build GDB scripts

by Brendan Jackman

Following a similar rationale as commit e4835f1da425f ("kunit: tool: Build compile_commands.json"), make a common developer tool available by default for KUnit users. Compared to compile_commands.json, there is a little more work to be done to build the GDB scripts. Is it enough to affect development cycle duration? Unscientific evaluation: rm -rf .kunit; time tools/testing/kunit/kunit.py build --kunitconfig ./lib/kunit/.kunitconfig --jobs 96 Without this patch it took 14.77s, with this patch it took 14.83. So, although `make scripts_gdb` is pretty slow, presumably most of that is just the overhead of running Kbuild at all, actually building the scripts is approximately free. Note also, to actually get the GDB scripts the user needs to enable CONFIG_SCRIPTS_GDB, but building the scripts_gdb target without that is still harmless. Signed-off-by: Brendan Jackman <jackmanb(a)google.com> --- tools/testing/kunit/kunit_kernel.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index e76d7894b6c5195ece49f0d8c7ac35130df428a9..33b5f7351cbb5d0be240cb52db2bc1fa94aeb75e 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -72,8 +72,8 @@ class LinuxSourceTreeOperations: raise ConfigError(e.output.decode()) def make(self, jobs: int, build_dir: str, make_options: Optional[List[str]]) -> None: - command = ['make', 'all', 'compile_commands.json', 'ARCH=' + self._linux_arch, - 'O=' + build_dir, '--jobs=' + str(jobs)] + command = ['make', 'all', 'compile_commands.json', 'scripts_gdb', + 'ARCH=' + self._linux_arch, 'O=' + build_dir, '--jobs=' + str(jobs)] if make_options: command.extend(make_options) if self._cross_compile: --- base-commit: 521d60e196ecb215f425e04e9ab33e02beaffbe3 change-id: 20250121-kunit-gdb-b27315b4f2d8 Best regards, -- Brendan Jackman <jackmanb(a)google.com>

6 months

2
1
0 0

[PATCH] tools/nolibc: add support for N64 and N32 ABIs

by Thomas Weißschuh

Add support for the MIPS 64bit N64 and ILP32 N32 ABIs. In addition to different byte orders and ABIs there are also different releases of the MIPS architecture. To avoid blowing up the test matrix, only add a subset of all possible test combinations. Signed-off-by: Thomas Weißschuh <linux(a)weissschuh.net> --- tools/include/nolibc/arch-mips.h | 119 ++++++++++++++++++++++++---- tools/testing/selftests/nolibc/Makefile | 28 ++++++- tools/testing/selftests/nolibc/run-tests.sh | 2 +- 3 files changed, 131 insertions(+), 18 deletions(-) diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h index 753a8ed2cf695f0b5eac4b5e4d317fdb383ebf93..638520a3427a985fdbd5f5a49b55853bbadeee75 100644 --- a/tools/include/nolibc/arch-mips.h +++ b/tools/include/nolibc/arch-mips.h @@ -10,7 +10,7 @@ #include "compiler.h" #include "crt.h" -#if !defined(_ABIO32) +#if !defined(_ABIO32) && !defined(_ABIN32) && !defined(_ABI64) #error Unsupported MIPS ABI #endif @@ -32,11 +32,32 @@ * - the arguments are cast to long and assigned into the target registers * which are then simply passed as registers to the asm code, so that we * don't have to experience issues with register constraints. + * + * Syscalls for MIPS ABI N32, same as ABI O32 with the following differences : + * - arguments are in a0, a1, a2, a3, t0, t1, t2, t3. + * t0..t3 are also known as a4..a7. + * - stack is 16-byte aligned */ +#if defined(_ABIO32) + #define _NOLIBC_SYSCALL_CLOBBERLIST \ "memory", "cc", "at", "v1", "hi", "lo", \ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" +#define _NOLIBC_SYSCALL_STACK_RESERVE "addiu $sp, $sp, -32\n" +#define _NOLIBC_SYSCALL_STACK_UNRESERVE "addiu $sp, $sp, 32\n" + +#elif defined(_ABIN32) || defined(_ABI64) + +/* binutils, GCC and clang disagree about register aliases, use numbers instead. */ +#define _NOLIBC_SYSCALL_CLOBBERLIST \ + "memory", "cc", "at", "v1", \ + "10", "11", "12", "13", "14", "15", "24", "25" + +#define _NOLIBC_SYSCALL_STACK_RESERVE +#define _NOLIBC_SYSCALL_STACK_UNRESERVE + +#endif #define my_syscall0(num) \ ({ \ @@ -44,9 +65,9 @@ register long _arg4 __asm__ ("a3"); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r"(_num), "=r"(_arg4) \ : "r"(_num) \ : _NOLIBC_SYSCALL_CLOBBERLIST \ @@ -61,9 +82,9 @@ register long _arg4 __asm__ ("a3"); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r"(_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1) \ @@ -80,9 +101,9 @@ register long _arg4 __asm__ ("a3"); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r"(_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2) \ @@ -100,9 +121,9 @@ register long _arg4 __asm__ ("a3"); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r"(_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2), "r"(_arg3) \ @@ -120,9 +141,9 @@ register long _arg4 __asm__ ("a3") = (long)(arg4); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r" (_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \ @@ -131,6 +152,8 @@ _arg4 ? -_num : _num; \ }) +#if defined(_ABIO32) + #define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ ({ \ register long _num __asm__ ("v0") = (num); \ @@ -141,10 +164,10 @@ register long _arg5 = (long)(arg5); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "sw %7, 16($sp)\n" \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r" (_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \ @@ -164,11 +187,11 @@ register long _arg6 = (long)(arg6); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "sw %7, 16($sp)\n" \ "sw %8, 20($sp)\n" \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r" (_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ @@ -178,6 +201,50 @@ _arg4 ? -_num : _num; \ }) +#else + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__ ("v0") = (num); \ + register long _arg1 __asm__ ("$4") = (long)(arg1); \ + register long _arg2 __asm__ ("$5") = (long)(arg2); \ + register long _arg3 __asm__ ("$6") = (long)(arg3); \ + register long _arg4 __asm__ ("$7") = (long)(arg4); \ + register long _arg5 __asm__ ("$8") = (long)(arg5); \ + \ + __asm__ volatile ( \ + "syscall\n" \ + : "=r" (_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num __asm__ ("v0") = (num); \ + register long _arg1 __asm__ ("$4") = (long)(arg1); \ + register long _arg2 __asm__ ("$5") = (long)(arg2); \ + register long _arg3 __asm__ ("$6") = (long)(arg3); \ + register long _arg4 __asm__ ("$7") = (long)(arg4); \ + register long _arg5 __asm__ ("$8") = (long)(arg5); \ + register long _arg6 __asm__ ("$9") = (long)(arg6); \ + \ + __asm__ volatile ( \ + "syscall\n" \ + : "=r" (_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_arg6) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#endif + /* startup code, note that it's called __start on MIPS */ void __start(void); void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector __start(void) @@ -190,13 +257,33 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector __ "1:\n" ".cpload $ra\n" "move $a0, $sp\n" /* save stack pointer to $a0, as arg1 of _start_c */ + +#if defined(_ABIO32) "addiu $sp, $sp, -4\n" /* space for .cprestore to store $gp */ ".cprestore 0\n" "li $t0, -8\n" "and $sp, $sp, $t0\n" /* $sp must be 8-byte aligned */ "addiu $sp, $sp, -16\n" /* the callee expects to save a0..a3 there */ - "lui $t9, %hi(_start_c)\n" /* ABI requires current function address in $t9 */ +#else + "daddiu $sp, $sp, -8\n" /* space for .cprestore to store $gp */ + ".cpsetup $ra, 0, 1b\n" + "li $t0, -16\n" + "and $sp, $sp, $t0\n" /* $sp must be 16-byte aligned */ +#endif + + /* ABI requires current function address in $t9 */ +#if defined(_ABIO32) || defined(_ABIN32) + "lui $t9, %hi(_start_c)\n" "ori $t9, %lo(_start_c)\n" +#else + "lui $t9, %highest(_start_c)\n" + "ori $t9, %higher(_start_c)\n" + "dsll $t9, 0x10\n" + "ori $t9, %hi(_start_c)\n" + "dsll $t9, 0x10\n" + "ori $t9, %lo(_start_c)\n" +#endif + "jalr $t9\n" /* transfer to c runtime */ " nop\n" /* delayed slot */ ".set pop\n" diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index 983985b7529b65b7ce4a00c28f3f915d83974eea..2dec6ab9596c974b6aac439685e17f5c10a76948 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -52,6 +52,10 @@ ARCH_ppc64 = powerpc ARCH_ppc64le = powerpc ARCH_mips32le = mips ARCH_mips32be = mips +ARCH_mipsn32le = mips +ARCH_mipsn32be = mips +ARCH_mips64le = mips +ARCH_mips64be = mips ARCH_riscv32 = riscv ARCH_riscv64 = riscv ARCH := $(or $(ARCH_$(XARCH)),$(XARCH)) @@ -64,6 +68,10 @@ IMAGE_arm64 = arch/arm64/boot/Image IMAGE_arm = arch/arm/boot/zImage IMAGE_mips32le = vmlinuz IMAGE_mips32be = vmlinuz +IMAGE_mipsn32le = vmlinuz +IMAGE_mipsn32be = vmlinuz +IMAGE_mips64le = vmlinuz +IMAGE_mips64be = vmlinuz IMAGE_ppc = vmlinux IMAGE_ppc64 = vmlinux IMAGE_ppc64le = arch/powerpc/boot/zImage @@ -83,6 +91,10 @@ DEFCONFIG_arm64 = defconfig DEFCONFIG_arm = multi_v7_defconfig DEFCONFIG_mips32le = malta_defconfig DEFCONFIG_mips32be = malta_defconfig generic/eb.config +DEFCONFIG_mipsn32le = malta_defconfig generic/64r2.config +DEFCONFIG_mipsn32be = malta_defconfig generic/64r6.config generic/eb.config +DEFCONFIG_mips64le = malta_defconfig generic/64r6.config +DEFCONFIG_mips64be = malta_defconfig generic/64r2.config generic/eb.config DEFCONFIG_ppc = pmac32_defconfig DEFCONFIG_ppc64 = powernv_be_defconfig DEFCONFIG_ppc64le = powernv_defconfig @@ -105,7 +117,11 @@ QEMU_ARCH_x86 = x86_64 QEMU_ARCH_arm64 = aarch64 QEMU_ARCH_arm = arm QEMU_ARCH_mips32le = mipsel # works with malta_defconfig -QEMU_ARCH_mips32be = mips +QEMU_ARCH_mips32be = mips +QEMU_ARCH_mipsn32le = mips64el +QEMU_ARCH_mipsn32be = mips64 +QEMU_ARCH_mips64le = mips64el +QEMU_ARCH_mips64be = mips64 QEMU_ARCH_ppc = ppc QEMU_ARCH_ppc64 = ppc64 QEMU_ARCH_ppc64le = ppc64 @@ -117,6 +133,8 @@ QEMU_ARCH_loongarch = loongarch64 QEMU_ARCH = $(QEMU_ARCH_$(XARCH)) QEMU_ARCH_USER_ppc64le = ppc64le +QEMU_ARCH_USER_mipsn32le = mipsn32el +QEMU_ARCH_USER_mipsn32be = mipsn32 QEMU_ARCH_USER = $(or $(QEMU_ARCH_USER_$(XARCH)),$(QEMU_ARCH_$(XARCH))) QEMU_BIOS_DIR = /usr/share/edk2/ @@ -134,6 +152,10 @@ QEMU_ARGS_arm64 = -M virt -cpu cortex-a53 -append "panic=-1 $(TEST:%=NOLIBC QEMU_ARGS_arm = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_mips32le = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_mips32be = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_mipsn32le = -M malta -cpu 5KEc -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_mipsn32be = -M malta -cpu I6400 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_mips64le = -M malta -cpu I6400 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_mips64be = -M malta -cpu 5KEc -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_ppc = -M g3beige -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_ppc64 = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_ppc64le = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" @@ -161,6 +183,10 @@ CFLAGS_ppc64le = -m64 -mlittle-endian -mno-vsx $(call cc-option,-mabi=elfv2) CFLAGS_s390 = -m64 CFLAGS_mips32le = -EL -mabi=32 -fPIC CFLAGS_mips32be = -EB -mabi=32 +CFLAGS_mipsn32le = -EL -mabi=n32 -fPIC -march=mips64r2 +CFLAGS_mipsn32be = -EB -mabi=n32 -march=mips64r6 +CFLAGS_mips64le = -EL -mabi=64 -march=mips64r6 +CFLAGS_mips64be = -EB -mabi=64 -march=mips64r2 CFLAGS_STACKPROTECTOR ?= $(call cc-option,-mstack-protector-guard=global $(call cc-option,-fstack-protector-all)) CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wextra \ $(call cc-option,-fno-stack-protector) $(call cc-option,-Wmissing-prototypes) \ diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh index 6db01115276888bc89f6ec5532153c37e55c83d3..f0f3890fb5fa8196cd33aa8681ed30b00d8f474e 100755 --- a/tools/testing/selftests/nolibc/run-tests.sh +++ b/tools/testing/selftests/nolibc/run-tests.sh @@ -20,7 +20,7 @@ llvm= all_archs=( i386 x86_64 arm64 arm - mips32le mips32be + mips32le mips32be mipsn32le mipsn32be mips64le mips64be ppc ppc64 ppc64le riscv32 riscv64 s390 --- base-commit: 16681bea9a80080765c98b545ad74c17de2d513c change-id: 20231105-nolibc-mips-n32-234901bd910d Best regards, -- Thomas Weißschuh <linux(a)weissschuh.net>

6 months

3
6
0 0

[PATCH net-next v3] selftests: net: add support for testing SO_RCVMARK and SO_RCVPRIORITY

by Anna Emese Nyiri

Introduce tests to verify the correct functionality of the SO_RCVMARK and SO_RCVPRIORITY socket options. Key changes include: - so_rcv_listener.c: Implements a receiver application to test the correct behavior of the SO_RCVMARK and SO_RCVPRIORITY options. - test_so_rcv.sh: Provides a shell script to automate testing for these options. - Makefile: Integrates test_so_rcv.sh into the kernel selftests. v3: - Add the C part to TEST_GEN_FILES. - Ensure the test fails if no cmsg of type opt.name is received in so_rcv_listener.c - Rebased on net-next. v2: https://lore.kernel.org/netdev/20250210192216.37756-1-annaemesenyiri@gmail.… - Add the C part to TEST_GEN_PROGS and .gitignore. - Modify buffer space and add IPv6 testing option in so_rcv_listener.c. - Add IPv6 testing, remove unnecessary comment, add kselftest exit codes, run both binaries in a namespace, and add sleep in test_so_rcv.sh. The sleep was added to ensure that the listener process has enough time to start before the sender attempts to connect. - Rebased on net-next. v1: https://lore.kernel.org/netdev/20250129143601.16035-2-annaemesenyiri@gmail.… Suggested-by: Jakub Kicinski <kuba(a)kernel.org> Suggested-by: Ferenc Fejes <fejes(a)inf.elte.hu> Signed-off-by: Anna Emese Nyiri <annaemesenyiri(a)gmail.com> --- tools/testing/selftests/net/.gitignore | 1 + tools/testing/selftests/net/Makefile | 2 + tools/testing/selftests/net/so_rcv_listener.c | 168 ++++++++++++++++++ tools/testing/selftests/net/test_so_rcv.sh | 73 ++++++++ 4 files changed, 244 insertions(+) create mode 100644 tools/testing/selftests/net/so_rcv_listener.c create mode 100755 tools/testing/selftests/net/test_so_rcv.sh diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 28a715a8ef2b..80dcae53ef55 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -42,6 +42,7 @@ socket so_incoming_cpu so_netns_cookie so_txtime +so_rcv_listener stress_reuseport_listen tap tcp_fastopen_backup_key diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index b6271714504d..8d6116b80cf1 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -33,6 +33,7 @@ TEST_PROGS += gro.sh TEST_PROGS += gre_gso.sh TEST_PROGS += cmsg_so_mark.sh TEST_PROGS += cmsg_so_priority.sh +TEST_PROGS += test_so_rcv.sh TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh TEST_PROGS += netns-name.sh TEST_PROGS += nl_netdev.py @@ -76,6 +77,7 @@ TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun tap epoll_busy_ TEST_GEN_FILES += toeplitz TEST_GEN_FILES += cmsg_sender TEST_GEN_FILES += stress_reuseport_listen +TEST_GEN_FILES += so_rcv_listener TEST_PROGS += test_vxlan_vnifiltering.sh TEST_GEN_FILES += io_uring_zerocopy_tx TEST_PROGS += io_uring_zerocopy_tx.sh diff --git a/tools/testing/selftests/net/so_rcv_listener.c b/tools/testing/selftests/net/so_rcv_listener.c new file mode 100644 index 000000000000..4b0b14edce61 --- /dev/null +++ b/tools/testing/selftests/net/so_rcv_listener.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <errno.h> +#include <netdb.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <linux/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> + +#ifndef SO_RCVPRIORITY +#define SO_RCVPRIORITY 82 +#endif + +struct options { + __u32 val; + int name; + int rcvname; + const char *host; + const char *service; +} opt; + +static void __attribute__((noreturn)) usage(const char *bin) +{ + printf("Usage: %s [opts] <dst host> <dst port / service>\n", bin); + printf("Options:\n" + "\t\t-M val Test SO_RCVMARK\n" + "\t\t-P val Test SO_RCVPRIORITY\n" + ""); + exit(EXIT_FAILURE); +} + +static void parse_args(int argc, char *argv[]) +{ + int o; + + while ((o = getopt(argc, argv, "M:P:")) != -1) { + switch (o) { + case 'M': + opt.val = atoi(optarg); + opt.name = SO_MARK; + opt.rcvname = SO_RCVMARK; + break; + case 'P': + opt.val = atoi(optarg); + opt.name = SO_PRIORITY; + opt.rcvname = SO_RCVPRIORITY; + break; + default: + usage(argv[0]); + break; + } + } + + if (optind != argc - 2) + usage(argv[0]); + + opt.host = argv[optind]; + opt.service = argv[optind + 1]; +} + +int main(int argc, char *argv[]) +{ + int err = 0; + int recv_fd = -1; + int ret_value = 0; + __u32 recv_val; + struct cmsghdr *cmsg; + char cbuf[CMSG_SPACE(sizeof(__u32))]; + char recv_buf[CMSG_SPACE(sizeof(__u32))]; + struct iovec iov[1]; + struct msghdr msg; + struct sockaddr_in recv_addr4; + struct sockaddr_in6 recv_addr6; + + parse_args(argc, argv); + + int family = strchr(opt.host, ':') ? AF_INET6 : AF_INET; + + recv_fd = socket(family, SOCK_DGRAM, IPPROTO_UDP); + if (recv_fd < 0) { + perror("Can't open recv socket"); + ret_value = -errno; + goto cleanup; + } + + err = setsockopt(recv_fd, SOL_SOCKET, opt.rcvname, &opt.val, sizeof(opt.val)); + if (err < 0) { + perror("Recv setsockopt error"); + ret_value = -errno; + goto cleanup; + } + + if (family == AF_INET) { + memset(&recv_addr4, 0, sizeof(recv_addr4)); + recv_addr4.sin_family = family; + recv_addr4.sin_port = htons(atoi(opt.service)); + + if (inet_pton(family, opt.host, &recv_addr4.sin_addr) <= 0) { + perror("Invalid IPV4 address"); + ret_value = -errno; + goto cleanup; + } + + err = bind(recv_fd, (struct sockaddr *)&recv_addr4, sizeof(recv_addr4)); + } else { + memset(&recv_addr6, 0, sizeof(recv_addr6)); + recv_addr6.sin6_family = family; + recv_addr6.sin6_port = htons(atoi(opt.service)); + + if (inet_pton(family, opt.host, &recv_addr6.sin6_addr) <= 0) { + perror("Invalid IPV6 address"); + ret_value = -errno; + goto cleanup; + } + + err = bind(recv_fd, (struct sockaddr *)&recv_addr6, sizeof(recv_addr6)); + } + + if (err < 0) { + perror("Recv bind error"); + ret_value = -errno; + goto cleanup; + } + + iov[0].iov_base = recv_buf; + iov[0].iov_len = sizeof(recv_buf); + + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = iov; + msg.msg_iovlen = 1; + msg.msg_control = cbuf; + msg.msg_controllen = sizeof(cbuf); + + err = recvmsg(recv_fd, &msg, 0); + if (err < 0) { + perror("Message receive error"); + ret_value = -errno; + goto cleanup; + } + + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; cmsg = CMSG_NXTHDR(&msg, cmsg)) { + if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == opt.name) { + recv_val = *(__u32 *)CMSG_DATA(cmsg); + printf("Received value: %u\n", recv_val); + + if (recv_val != opt.val) { + fprintf(stderr, "Error: expected value: %u, got: %u\n", + opt.val, recv_val); + ret_value = -EINVAL; + } + goto cleanup; + } + } + + fprintf(stderr, "Error: No matching cmsg received\n"); + ret_value = -ENOMSG; + +cleanup: + if (recv_fd >= 0) + close(recv_fd); + + return ret_value; +} diff --git a/tools/testing/selftests/net/test_so_rcv.sh b/tools/testing/selftests/net/test_so_rcv.sh new file mode 100755 index 000000000000..d8aa4362879d --- /dev/null +++ b/tools/testing/selftests/net/test_so_rcv.sh @@ -0,0 +1,73 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +HOSTS=("127.0.0.1" "::1") +PORT=1234 +TOTAL_TESTS=0 +FAILED_TESTS=0 + +declare -A TESTS=( + ["SO_RCVPRIORITY"]="-P 2" + ["SO_RCVMARK"]="-M 3" +) + +check_result() { + ((TOTAL_TESTS++)) + if [ "$1" -ne 0 ]; then + ((FAILED_TESTS++)) + fi +} + +cleanup() +{ + cleanup_ns $NS +} + +trap cleanup EXIT + +setup_ns NS + +for HOST in "${HOSTS[@]}"; do + PROTOCOL="IPv4" + if [[ "$HOST" == "::1" ]]; then + PROTOCOL="IPv6" + fi + for test_name in "${!TESTS[@]}"; do + echo "Running $test_name test, $PROTOCOL" + arg=${TESTS[$test_name]} + + ip netns exec $NS ./so_rcv_listener $arg $HOST $PORT & + LISTENER_PID=$! + + sleep 0.5 + + if ! ip netns exec $NS ./cmsg_sender $arg $HOST $PORT; then + echo "Sender failed for $test_name, $PROTOCOL" + kill "$LISTENER_PID" 2>/dev/null + wait "$LISTENER_PID" + check_result 1 + continue + fi + + wait "$LISTENER_PID" + LISTENER_EXIT_CODE=$? + + if [ "$LISTENER_EXIT_CODE" -eq 0 ]; then + echo "Rcv test OK for $test_name, $PROTOCOL" + check_result 0 + else + echo "Rcv test FAILED for $test_name, $PROTOCOL" + check_result 1 + fi + done +done + +if [ "$FAILED_TESTS" -ne 0 ]; then + echo "FAIL - $FAILED_TESTS/$TOTAL_TESTS tests failed" + exit ${KSFT_FAIL} +else + echo "OK - All $TOTAL_TESTS tests passed" + exit ${KSFT_PASS} +fi -- 2.43.0

6 months, 1 week

4
3
0 0

[PATCH net-next v8 0/3] netdev-genl: Add an xsk attribute to queues

by Joe Damato

Greetings: Welcome to v8. Minor change, see changelog below. Re-tested on my mlx5 system both with and without CONFIG_XDP_SOCKETS enabled and both with and without NETIF set. This is an attempt to followup on something Jakub asked me about [1], adding an xsk attribute to queues and more clearly documenting which queues are linked to NAPIs... After the RFC [2], Jakub suggested creating an empty nest for queues which have a pool, so I've adjusted this version to work that way. The nest can be extended in the future to express attributes about XSK as needed. Queues which are not used for AF_XDP do not have the xsk attribute present. I've run the included test on: - my mlx5 machine (via NETIF=) - without setting NETIF And the test seems to pass in both cases. Thanks, Joe [1]: https://lore.kernel.org/netdev/20250113143109.60afa59a@kernel.org/ [2]: https://lore.kernel.org/netdev/20250129172431.65773-1-jdamato@fastly.com/ v8: - Update the Makefile in patch 3 to use TEST_GEN_FILES instead of TEST_GET_PROGS. - Fix a codespell complaint in xdp_helper.c. v7: https://lore.kernel.org/netdev/20250213192336.42156-1-jdamato@fastly.com/ - Added CONFIG_XDP_SOCKETS to selftests/driver/net/config as suggested by Stanislav. - Updated xdp_helper.c to return -1 for AF_XDP non-existence, but 1 for other failures. - Updated queues.py to mark test as skipped if AF_XDP does not exist. v6: https://lore.kernel.org/bpf/20250210193903.16235-1-jdamato@fastly.com/ - Added ifdefs for CONFIG_XDP_SOCKETS in patch 2 as Stanislav suggested. v5: https://lore.kernel.org/bpf/20250208041248.111118-1-jdamato@fastly.com/ - Removed unused ret variable from patch 2 as Simon suggested. v4: https://lore.kernel.org/lkml/20250207030916.32751-1-jdamato@fastly.com/ - Add patch 1, as suggested by Jakub, which adds an empty nest helper. - Use the helper in patch 2, which makes the code cleaner and prevents a possible bug. v3: https://lore.kernel.org/netdev/20250204191108.161046-1-jdamato@fastly.com/ - Change comment format in patch 2 to avoid kdoc warnings. No other changes. v2: https://lore.kernel.org/all/20250203185828.19334-1-jdamato@fastly.com/ - Switched from RFC to actual submission now that net-next is open - Adjusted patch 1 to include an empty nest as suggested by Jakub - Adjusted patch 2 to update the test based on changes to patch 1, and to incorporate some Python feedback from Jakub :) rfc: https://lore.kernel.org/netdev/20250129172431.65773-1-jdamato@fastly.com/ Joe Damato (3): netlink: Add nla_put_empty_nest helper netdev-genl: Add an XSK attribute to queues selftests: drv-net: Test queue xsk attribute Documentation/netlink/specs/netdev.yaml | 13 ++- include/net/netlink.h | 15 +++ include/uapi/linux/netdev.h | 6 ++ net/core/netdev-genl.c | 12 +++ tools/include/uapi/linux/netdev.h | 6 ++ .../testing/selftests/drivers/net/.gitignore | 2 + tools/testing/selftests/drivers/net/Makefile | 3 + tools/testing/selftests/drivers/net/config | 1 + tools/testing/selftests/drivers/net/queues.py | 42 +++++++- .../selftests/drivers/net/xdp_helper.c | 98 +++++++++++++++++++ 10 files changed, 194 insertions(+), 4 deletions(-) create mode 100644 tools/testing/selftests/drivers/net/.gitignore create mode 100644 tools/testing/selftests/drivers/net/xdp_helper.c base-commit: 7a7e0197133d18cfd9931e7d3a842d0f5730223f -- 2.43.0

6 months, 1 week

2
2
0 0

[PATCH 0/2] selftests/mm: Allow execution on systems without huge pages

by Mark Brown

Currently the mm selftests refuse to run if we don't have huge page support but there are plenty of tests that don't depend on this feature, relax this requirement to allow coverage on relevant systems (eg, most 32 bit arm ones). While doing this I noticed a bug with an existing check if we're running THP tests, the fix overlaps with the above change so is sent as part of a series. Signed-off-by: Mark Brown <broonie(a)kernel.org> --- Mark Brown (2): selftests/mm: Fix check for running THP tests selftests/mm: Allow tests to run with no huge pages support tools/testing/selftests/mm/run_vmtests.sh | 68 +++++++++++++++++++------------ 1 file changed, 43 insertions(+), 25 deletions(-) --- base-commit: a64dcfb451e254085a7daee5fe51bf22959d52d3 change-id: 20250211-kselftest-mm-no-hugepages-ee5917a170eb Best regards, -- Mark Brown <broonie(a)kernel.org>

6 months, 1 week

2
4
0 0

[PATCH 2/2] rseq/selftests: Add test for mm_cid compaction

by Gabriele Monaco

A task in the kernel (task_mm_cid_work) runs somewhat periodically to compact the mm_cid for each process. Add a test to validate that it runs correctly and timely. The test spawns 1 thread pinned to each CPU, then each thread, including the main one, runs in short bursts for some time. During this period, the mm_cids should be spanning all numbers between 0 and nproc. At the end of this phase, a thread with high enough mm_cid (>= nproc/2) is selected to be the new leader, all other threads terminate. After some time, the only running thread should see 0 as mm_cid, if that doesn't happen, the compaction mechanism didn't work and the test fails. Since mm_cid compaction is less likely for tasks running in short bursts, we increase the likelihood by just running a busy loop at every iteration. This compaction is a best effort work and this behaviour is currently acceptable. The test never fails if only 1 core is available, in which case, we cannot test anything as the only available mm_cid is 0. Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com> Signed-off-by: Gabriele Monaco <gmonaco(a)redhat.com> --- tools/testing/selftests/rseq/.gitignore | 1 + tools/testing/selftests/rseq/Makefile | 2 +- .../selftests/rseq/mm_cid_compaction_test.c | 208 ++++++++++++++++++ 3 files changed, 210 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/rseq/mm_cid_compaction_test.c diff --git a/tools/testing/selftests/rseq/.gitignore b/tools/testing/selftests/rseq/.gitignore index 16496de5f6ce4..2c89f97e4f737 100644 --- a/tools/testing/selftests/rseq/.gitignore +++ b/tools/testing/selftests/rseq/.gitignore @@ -3,6 +3,7 @@ basic_percpu_ops_test basic_percpu_ops_mm_cid_test basic_test basic_rseq_op_test +mm_cid_compaction_test param_test param_test_benchmark param_test_compare_twice diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile index 5a3432fceb586..ce1b38f46a355 100644 --- a/tools/testing/selftests/rseq/Makefile +++ b/tools/testing/selftests/rseq/Makefile @@ -16,7 +16,7 @@ OVERRIDE_TARGETS = 1 TEST_GEN_PROGS = basic_test basic_percpu_ops_test basic_percpu_ops_mm_cid_test param_test \ param_test_benchmark param_test_compare_twice param_test_mm_cid \ - param_test_mm_cid_benchmark param_test_mm_cid_compare_twice + param_test_mm_cid_benchmark param_test_mm_cid_compare_twice mm_cid_compaction_test TEST_GEN_PROGS_EXTENDED = librseq.so diff --git a/tools/testing/selftests/rseq/mm_cid_compaction_test.c b/tools/testing/selftests/rseq/mm_cid_compaction_test.c new file mode 100644 index 0000000000000..8808500466d02 --- /dev/null +++ b/tools/testing/selftests/rseq/mm_cid_compaction_test.c @@ -0,0 +1,208 @@ +// SPDX-License-Identifier: LGPL-2.1 +#define _GNU_SOURCE +#include <assert.h> +#include <pthread.h> +#include <sched.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stddef.h> + +#include "../kselftest.h" +#include "rseq.h" + +#define VERBOSE 0 +#define printf_verbose(fmt, ...) \ + do { \ + if (VERBOSE) \ + printf(fmt, ##__VA_ARGS__); \ + } while (0) + +/* 0.5 s */ +#define RUNNER_PERIOD 500000 +/* Number of runs before we terminate or get the token */ +#define THREAD_RUNS 5 + +/* + * Number of times we check that the mm_cid were compacted. + * Checks are repeated every RUNNER_PERIOD. + */ +#define MM_CID_COMPACT_TIMEOUT 10 + +struct thread_args { + int cpu; + int num_cpus; + pthread_mutex_t *token; + pthread_barrier_t *barrier; + pthread_t *tinfo; + struct thread_args *args_head; +}; + +static void __noreturn *thread_runner(void *arg) +{ + struct thread_args *args = arg; + int i, ret, curr_mm_cid; + cpu_set_t cpumask; + + CPU_ZERO(&cpumask); + CPU_SET(args->cpu, &cpumask); + ret = pthread_setaffinity_np(pthread_self(), sizeof(cpumask), &cpumask); + if (ret) { + errno = ret; + perror("Error: failed to set affinity"); + abort(); + } + pthread_barrier_wait(args->barrier); + + for (i = 0; i < THREAD_RUNS; i++) + usleep(RUNNER_PERIOD); + curr_mm_cid = rseq_current_mm_cid(); + /* + * We select one thread with high enough mm_cid to be the new leader. + * All other threads (including the main thread) will terminate. + * After some time, the mm_cid of the only remaining thread should + * converge to 0, if not, the test fails. + */ + if (curr_mm_cid >= args->num_cpus / 2 && + !pthread_mutex_trylock(args->token)) { + printf_verbose( + "cpu%d has mm_cid=%d and will be the new leader.\n", + sched_getcpu(), curr_mm_cid); + for (i = 0; i < args->num_cpus; i++) { + if (args->tinfo[i] == pthread_self()) + continue; + ret = pthread_join(args->tinfo[i], NULL); + if (ret) { + errno = ret; + perror("Error: failed to join thread"); + abort(); + } + } + pthread_barrier_destroy(args->barrier); + free(args->tinfo); + free(args->token); + free(args->barrier); + free(args->args_head); + + for (i = 0; i < MM_CID_COMPACT_TIMEOUT; i++) { + curr_mm_cid = rseq_current_mm_cid(); + printf_verbose("run %d: mm_cid=%d on cpu%d.\n", i, + curr_mm_cid, sched_getcpu()); + if (curr_mm_cid == 0) + exit(EXIT_SUCCESS); + /* + * Currently mm_cid compaction is less likely for tasks + * running in short bursts: increase likelihood by just + * running for some time doing nothing. + */ + for (int j = 0; j < 0xffff; j++) + for (int k = 0; k < 0xffff; k++) + asm(""); + usleep(RUNNER_PERIOD); + } + exit(EXIT_FAILURE); + } + printf_verbose("cpu%d has mm_cid=%d and is going to terminate.\n", + sched_getcpu(), curr_mm_cid); + pthread_exit(NULL); +} + +int test_mm_cid_compaction(void) +{ + cpu_set_t affinity; + int i, j, ret = 0, num_threads; + pthread_t *tinfo; + pthread_mutex_t *token; + pthread_barrier_t *barrier; + struct thread_args *args; + + sched_getaffinity(0, sizeof(affinity), &affinity); + num_threads = CPU_COUNT(&affinity); + tinfo = calloc(num_threads, sizeof(*tinfo)); + if (!tinfo) { + perror("Error: failed to allocate tinfo"); + return -1; + } + args = calloc(num_threads, sizeof(*args)); + if (!args) { + perror("Error: failed to allocate args"); + ret = -1; + goto out_free_tinfo; + } + token = malloc(sizeof(*token)); + if (!token) { + perror("Error: failed to allocate token"); + ret = -1; + goto out_free_args; + } + barrier = malloc(sizeof(*barrier)); + if (!barrier) { + perror("Error: failed to allocate barrier"); + ret = -1; + goto out_free_token; + } + if (num_threads == 1) { + fprintf(stderr, "Cannot test on a single cpu. " + "Skipping mm_cid_compaction test.\n"); + /* only skipping the test, this is not a failure */ + goto out_free_barrier; + } + pthread_mutex_init(token, NULL); + ret = pthread_barrier_init(barrier, NULL, num_threads); + if (ret) { + errno = ret; + perror("Error: failed to initialise barrier"); + goto out_free_barrier; + } + for (i = 0, j = 0; i < CPU_SETSIZE && j < num_threads; i++) { + if (!CPU_ISSET(i, &affinity)) + continue; + args[j].num_cpus = num_threads; + args[j].tinfo = tinfo; + args[j].token = token; + args[j].barrier = barrier; + args[j].cpu = i; + args[j].args_head = args; + if (!j) { + /* The first thread is the main one */ + tinfo[0] = pthread_self(); + ++j; + continue; + } + ret = pthread_create(&tinfo[j], NULL, thread_runner, &args[j]); + if (ret) { + errno = ret; + perror("Error: failed to create thread"); + abort(); + } + ++j; + } + printf_verbose("Started %d threads.\n", num_threads); + + /* Also main thread will terminate if it is not selected as leader */ + thread_runner(&args[0]); + + /* only reached in case of errors */ +out_free_barrier: + free(barrier); +out_free_token: + free(token); +out_free_args: + free(args); +out_free_tinfo: + free(tinfo); + + return ret; +} + +int main(int argc, char **argv) +{ + if (!rseq_mm_cid_available()) { + fprintf(stderr, "Error: rseq_mm_cid unavailable\n"); + return -1; + } + if (test_mm_cid_compaction()) + return -1; + return 0; +} -- 2.48.1

6 months, 1 week

2
1
0 0

[RFC v2 0/5] mm: introduce THP deferred setting

by Nico Pache

This series is a follow-up to [1], which adds mTHP support to khugepaged. mTHP khugepaged support was necessary for the global="defer" and mTHP="inherit" case (and others) to make sense. We've seen cases were customers switching from RHEL7 to RHEL8 see a significant increase in the memory footprint for the same workloads. Through our investigations we found that a large contributing factor to the increase in RSS was an increase in THP usage. For workloads like MySQL, or when using allocators like jemalloc, it is often recommended to set /transparent_hugepages/enabled=never. This is in part due to performance degradations and increased memory waste. This series introduces enabled=defer, this setting acts as a middle ground between always and madvise. If the mapping is MADV_HUGEPAGE, the page fault handler will act normally, making a hugepage if possible. If the allocation is not MADV_HUGEPAGE, then the page fault handler will default to the base size allocation. The caveat is that khugepaged can still operate on pages thats not MADV_HUGEPAGE. This allows for two things... one, applications specifically designed to use hugepages will get them, and two, applications that don't use hugepages can still benefit from them without aggressively inserting THPs at every possible chance. This curbs the memory waste, and defers the use of hugepages to khugepaged. Khugepaged can then scan the memory for eligible collapsing. Admins may want to lower max_ptes_none, if not, khugepaged may aggressively collapse single allocations into hugepages. TESTING: - Built for x86_64, aarch64, ppc64le, and s390x - selftests mm - In [1] I provided a script [2] that has multiple access patterns - lots of general use. These changes have been running in my VM for some time - redis testing. This test was my original case for the defer mode. What I was able to prove was that THP=always leads to increased max_latency cases; hence why it is recommended to disable THPs for redis servers. However with 'defer' we dont have the max_latency spikes and can still get the system to utilize THPs. I further tested this with the mTHP defer setting and found that redis (and probably other jmalloc users) can utilize THPs via defer (+mTHP defer) without a large latency penalty and some potential gains. I uploaded some mmtest results here [3] which compares: stock+thp=never stock+(m)thp=always khugepaged-mthp + defer (max_ptes_none=64) The results show that (m)THPs can cause some throughput regression in some cases, but also has gains in other cases. The mTHP+defer results have more gains and less losses over the (m)THP=always case. V2 Changes: - base changes on mTHP khugepaged support - Fix selftests parsing issue - add mTHP defer option - add mTHP defer Documentation [1] - https://lkml.org/lkml/2025/2/10/1982 [2] - https://gitlab.com/npache/khugepaged_mthp_test [3] - https://people.redhat.com/npache/mthp_khugepaged_defer/testoutput2/output.h… Nico Pache (5): mm: defer THP insertion to khugepaged mm: document transparent_hugepage=defer usage selftests: mm: add defer to thp setting parser khugepaged: add defer option to mTHP options mm: document mTHP defer setting Documentation/admin-guide/mm/transhuge.rst | 40 ++++++++++--- include/linux/huge_mm.h | 18 +++++- mm/huge_memory.c | 69 +++++++++++++++++++--- mm/khugepaged.c | 10 ++-- tools/testing/selftests/mm/thp_settings.c | 1 + tools/testing/selftests/mm/thp_settings.h | 1 + 6 files changed, 115 insertions(+), 24 deletions(-) -- 2.48.1

6 months, 1 week

2
13
0 0

[PATCH] selftests/damon/damos_quota_goal: handle minimum quota that cannot be further reduced

by SeongJae Park

damos_quota_goal.py selftest see if DAMOS quota goals tuning feature increases or reduces the effective size quota for given score as expected. The tuning feature sets the minimum quota size as one byte, so if the effective size quota is already one, we cannot expect it further be reduced. However the test is not aware of the edge case, and fails since it shown no expected change of the effective quota. Handle the case by updating the failure logic for no change to see if it was the case, and simply skips to next test input. Fixes: f1c07c0a1662b ("selftests/damon: add a test for DAMOS quota goal") Cc: <stable(a)vger.kernel.org> # 6.10.x Reported-by: kernel test robot <oliver.sang(a)intel.com> Closes: https://lore.kernel.org/oe-lkp/202502171423.b28a918d-lkp@intel.com Signed-off-by: SeongJae Park <sj(a)kernel.org> --- tools/testing/selftests/damon/damos_quota_goal.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/damon/damos_quota_goal.py b/tools/testing/selftests/damon/damos_quota_goal.py index 18246f3b62f7..f76e0412b564 100755 --- a/tools/testing/selftests/damon/damos_quota_goal.py +++ b/tools/testing/selftests/damon/damos_quota_goal.py @@ -63,6 +63,9 @@ def main(): if last_effective_bytes != 0 else -1.0)) if last_effective_bytes == goal.effective_bytes: + # effective quota was already minimum that cannot be more reduced + if expect_increase is False and last_effective_bytes == 1: + continue print('efective bytes not changed: %d' % goal.effective_bytes) exit(1) base-commit: 20017459916819f8ae15ca3840e71fbf0ea8354e -- 2.39.5

6 months, 1 week

1
0
0 0

[PATCH 0/2] selftests/ftrace: Fix ftracetest test cases

by Masami Hiramatsu (Google)

Here is a couple of patches to fix issues. I think mount_options.tc's one is a real bug(I'm not sure how it worked), but another one is an enhancement for (my) execution environment. Anyway, those should go through kselftests tree. Thank you, --- Masami Hiramatsu (Google) (2): selftests/ftrace: Fix to use remount when testing mount GID option selftests/ftrace: Make uprobe test more robust against binary name .../ftrace/test.d/00basic/mount_options.tc | 8 ++++---- .../ftrace/test.d/dynevent/add_remove_uprobe.tc | 4 +++- 2 files changed, 7 insertions(+), 5 deletions(-) -- Masami Hiramatsu (Google) <mhiramat(a)kernel.org>

6 months, 1 week

4
5
0 0

Re: [PATCH v8 3/4] scanf: convert self-test to KUnit

by Andy Shevchenko

On Sat, Feb 15, 2025 at 02:52:22PM -0500, Tamir Duberstein wrote: > On Sat, Feb 15, 2025 at 1:51 PM kernel test robot <lkp(a)intel.com> wrote: > I am not able to reproduce these warnings with clang 19.1.7. They also > don't obviously make sense to me. Please, when reply, remove boielrplate stuff! I have just wasted a couple of minutes to understand what's going on in the message that is 2700 lines of text as the reply to the bot message which was ~700 lines. -- With Best Regards, Andy Shevchenko

6 months, 1 week

2
1
0 0

[PATCH v6 0/6] ptrace: introduce PTRACE_SET_SYSCALL_INFO API

by Dmitry V. Levin

PTRACE_SET_SYSCALL_INFO is a generic ptrace API that complements PTRACE_GET_SYSCALL_INFO by letting the ptracer modify details of system calls the tracee is blocked in. This API allows ptracers to obtain and modify system call details in a straightforward and architecture-agnostic way, providing a consistent way of manipulating the system call number and arguments across architectures. As in case of PTRACE_GET_SYSCALL_INFO, PTRACE_SET_SYSCALL_INFO also does not aim to address numerous architecture-specific system call ABI peculiarities, like differences in the number of system call arguments for such system calls as pread64 and preadv. The current implementation supports changing only those bits of system call information that are used by strace system call tampering, namely, syscall number, syscall arguments, and syscall return value. Support of changing additional details returned by PTRACE_GET_SYSCALL_INFO, such as instruction pointer and stack pointer, could be added later if needed, by using struct ptrace_syscall_info.flags to specify the additional details that should be set. Currently, "flags" and "reserved" fields of struct ptrace_syscall_info must be initialized with zeroes; "arch", "instruction_pointer", and "stack_pointer" fields are currently ignored. PTRACE_SET_SYSCALL_INFO currently supports only PTRACE_SYSCALL_INFO_ENTRY, PTRACE_SYSCALL_INFO_EXIT, and PTRACE_SYSCALL_INFO_SECCOMP operations. Other operations could be added later if needed. Ideally, PTRACE_SET_SYSCALL_INFO should have been introduced along with PTRACE_GET_SYSCALL_INFO, but it didn't happen. The last straw that convinced me to implement PTRACE_SET_SYSCALL_INFO was apparent failure to provide an API of changing the first system call argument on riscv architecture [1]. ptrace(2) man page: long ptrace(enum __ptrace_request request, pid_t pid, void *addr, void *data); ... PTRACE_SET_SYSCALL_INFO Modify information about the system call that caused the stop. The "data" argument is a pointer to struct ptrace_syscall_info that specifies the system call information to be set. The "addr" argument should be set to sizeof(struct ptrace_syscall_info)). [1] https://lore.kernel.org/all/59505464-c84a-403d-972f-d4b2055eeaac@gmail.com/ Notes: v6: * mips: Submit mips_get_syscall_arg() o32 fix via mips tree to get it merged into v6.14-rc3 * Rebase to v6.14-rc3 * v5: https://lore.kernel.org/all/20250210113336.GA887@strace.io/ v5: * ptrace: Extend the commit message to say that the new API does not aim to address numerous architecture-specific syscall ABI peculiarities * selftests: Add a workaround for s390 16-bit syscall numbers * Add more Acked-by * v4: https://lore.kernel.org/all/20250203065849.GA14120@strace.io/ v4: * Split out syscall_set_return_value() for hexagon into a separate patch * s390: Change the style of syscall_set_arguments() implementation as requested * Add more Reviewed-by * v3: https://lore.kernel.org/all/20250128091445.GA8257@strace.io/ v3: * powerpc: Submit syscall_set_return_value() fix for "sc" case separately * mips: Do not introduce erroneous argument truncation on mips n32, add a detailed description to the commit message of the mips_get_syscall_arg() change * ptrace: Add explicit padding to the end of struct ptrace_syscall_info, simplify obtaining of user ptrace_syscall_info, do not introduce PTRACE_SYSCALL_INFO_SIZE_VER0 * ptrace: Change the return type of ptrace_set_syscall_info_* functions from "unsigned long" to "int" * ptrace: Add -ERANGE check to ptrace_set_syscall_info_exit(), add comments to -ERANGE checks * ptrace: Update comments about supported syscall stops * selftests: Extend set_syscall_info test, fix for mips n32 * Add Tested-by and Reviewed-by v2: * Add patch to fix syscall_set_return_value() on powerpc * Add patch to fix mips_get_syscall_arg() on mips * Add syscall_set_return_value() implementation on hexagon * Add syscall_set_return_value() invocation to syscall_set_nr() on arm and arm64. * Fix syscall_set_nr() and mips_set_syscall_arg() on mips * Add a comment to syscall_set_nr() on arc, powerpc, s390, sh, and sparc * Remove redundant ptrace_syscall_info.op assignments in ptrace_get_syscall_info_* * Minor style tweaks in ptrace_get_syscall_info_op() * Remove syscall_set_return_value() invocation from ptrace_set_syscall_info_entry() * Skip syscall_set_arguments() invocation in case of syscall number -1 in ptrace_set_syscall_info_entry() * Split ptrace_syscall_info.reserved into ptrace_syscall_info.reserved and ptrace_syscall_info.flags * Use __kernel_ulong_t instead of unsigned long in set_syscall_info test Dmitry V. Levin (6): hexagon: add syscall_set_return_value() syscall.h: add syscall_set_arguments() syscall.h: introduce syscall_set_nr() ptrace_get_syscall_info: factor out ptrace_get_syscall_info_op ptrace: introduce PTRACE_SET_SYSCALL_INFO request selftests/ptrace: add a test case for PTRACE_SET_SYSCALL_INFO arch/arc/include/asm/syscall.h | 25 + arch/arm/include/asm/syscall.h | 37 ++ arch/arm64/include/asm/syscall.h | 29 + arch/csky/include/asm/syscall.h | 13 + arch/hexagon/include/asm/syscall.h | 21 + arch/loongarch/include/asm/syscall.h | 15 + arch/m68k/include/asm/syscall.h | 7 + arch/microblaze/include/asm/syscall.h | 7 + arch/mips/include/asm/syscall.h | 46 ++ arch/nios2/include/asm/syscall.h | 16 + arch/openrisc/include/asm/syscall.h | 13 + arch/parisc/include/asm/syscall.h | 19 + arch/powerpc/include/asm/syscall.h | 20 + arch/riscv/include/asm/syscall.h | 16 + arch/s390/include/asm/syscall.h | 21 + arch/sh/include/asm/syscall_32.h | 24 + arch/sparc/include/asm/syscall.h | 22 + arch/um/include/asm/syscall-generic.h | 19 + arch/x86/include/asm/syscall.h | 43 ++ arch/xtensa/include/asm/syscall.h | 18 + include/asm-generic/syscall.h | 30 + include/uapi/linux/ptrace.h | 7 +- kernel/ptrace.c | 179 +++++- tools/testing/selftests/ptrace/Makefile | 2 +- .../selftests/ptrace/set_syscall_info.c | 519 ++++++++++++++++++ 25 files changed, 1141 insertions(+), 27 deletions(-) create mode 100644 tools/testing/selftests/ptrace/set_syscall_info.c base-commit: 0ad2507d5d93f39619fc42372c347d6006b64319 -- ldv

6 months, 1 week

1
0
0 0

[PATCH v7 0/6] introduce PIDFD_SELF* sentinels

by Lorenzo Stoakes

If you wish to utilise a pidfd interface to refer to the current process or thread it is rather cumbersome, requiring something like: int pidfd = pidfd_open(getpid(), 0 or PIDFD_THREAD); ... close(pidfd); Or the equivalent call opening /proc/self. It is more convenient to use a sentinel value to indicate to an interface that accepts a pidfd that we simply wish to refer to the current process thread. This series introduces sentinels for this purposes which can be passed as the pidfd in this instance rather than having to establish a dummy fd for this purpose. It is useful to refer to both the current thread from the userland's perspective for which we use PIDFD_SELF, and the current process from the userland's perspective, for which we use PIDFD_SELF_PROCESS. There is unfortunately some confusion between the kernel and userland as to what constitutes a process - a thread from the userland perspective is a process in userland, and a userland process is a thread group (more specifically the thread group leader from the kernel perspective). We therefore alias things thusly: * PIDFD_SELF_THREAD aliased by PIDFD_SELF - use PIDTYPE_PID. * PIDFD_SELF_THREAD_GROUP alised by PIDFD_SELF_PROCESS - use PIDTYPE_TGID. In all of the kernel code we refer to PIDFD_SELF_THREAD and PIDFD_SELF_THREAD_GROUP. However we expect users to use PIDFD_SELF and PIDFD_SELF_PROCESS. This matters for cases where, for instance, a user unshare()'s FDs or does thread-specific signal handling and where the user would be hugely confused if the FDs referenced or signal processed referred to the thread group leader rather than the individual thread. For now we only adjust pidfd_get_task() and the pidfd_send_signal() system call with specific handling for this, implementing this functionality for process_madvise(), process_mrelease() (albeit, using it here wouldn't really make sense) and pidfd_send_signal(). We defer making further changes, as this would require a significant rework of the pidfd mechanism. The motivating case here is to support PIDFD_SELF in process_madvise(), so this suffices for immediate uses. Moving forward, this can be further expanded to other uses. v7: * Reworked implementation according to Christian's requirements. We now only support process_madvise() and pidfd_send_signal() system calls with PIDFD_SELF as specified. * Updated tests to account for broken pidfd_open_test.c implementation. * Fixed missing includes in pidfd self tests. * Removed tests relating to functionality no longer supported. * Update guard pages test to use PIDFD_SELF. v6: * Avoid static inline in UAPI header as suggested by Pedro. * Place PIDFD_SELF values out of range of errors and any other sentinel as suggested by Pedro. https://lore.kernel.org/linux-mm/cover.1729926229.git.lorenzo.stoakes@oracl… v5: * Fixup self test dependencies on pidfd/pidfd.h. https://lore.kernel.org/linux-mm/cover.1729848252.git.lorenzo.stoakes@oracl… v4: * Avoid returning an fd in the __pidfd_get_pid() function as pointed out by Christian, instead simply always pin the pid and maintain fd scope in the helper alone. * Add wrapper header file in tools/include/linux to allow for import of UAPI pidfd.h header without encountering the collision between system fcntl.h and linux/fcntl.h as discussed with Shuah and John. * Fixup tests to import the UAPI pidfd.h header working around conflicts between system fcntl.h and linux/fcntl.h which the UAPI pidfd.h imports, as reported by Shuah. * Use an int for pidfd_is_self_sentinel() to avoid any dependency on stdbool.h in userland. https://lore.kernel.org/linux-mm/cover.1729198898.git.lorenzo.stoakes@oracl… v3: * Do not fput() an invalid fd as reported by kernel test bot. * Fix unintended churn from moving variable declaration. https://lore.kernel.org/linux-mm/cover.1729073310.git.lorenzo.stoakes@oracl… v2: * Fix tests as reported by Shuah. * Correct RFC version lore link. https://lore.kernel.org/linux-mm/cover.1728643714.git.lorenzo.stoakes@oracl… Non-RFC v1: * Removed RFC tag - there seems to be general consensus that this change is a good idea, but perhaps some debate to be had on implementation. It seems sensible then to move forward with the RFC flag removed. * Introduced PIDFD_SELF_THREAD, PIDFD_SELF_THREAD_GROUP and their aliases PIDFD_SELF and PIDFD_SELF_PROCESS respectively. * Updated testing accordingly. https://lore.kernel.org/linux-mm/cover.1728578231.git.lorenzo.stoakes@oracl… RFC version: https://lore.kernel.org/linux-mm/cover.1727644404.git.lorenzo.stoakes@oracl… Lorenzo Stoakes (6): pidfd: add PIDFD_SELF* sentinels to refer to own thread/process selftests/pidfd: add missing system header imcludes to pidfd tests tools: testing: separate out wait_for_pid() into helper header selftests: pidfd: add pidfd.h UAPI wrapper selftests: pidfd: add tests for PIDFD_SELF_* selftests/mm: use PIDFD_SELF in guard pages test include/uapi/linux/pidfd.h | 24 ++++ kernel/pid.c | 24 +++- kernel/signal.c | 106 +++++++++++------- tools/include/linux/pidfd.h | 14 +++ tools/testing/selftests/cgroup/test_kill.c | 2 +- tools/testing/selftests/mm/Makefile | 4 + tools/testing/selftests/mm/guard-pages.c | 15 +-- .../pid_namespace/regression_enomem.c | 2 +- tools/testing/selftests/pidfd/Makefile | 3 +- tools/testing/selftests/pidfd/pidfd.h | 28 +---- .../selftests/pidfd/pidfd_fdinfo_test.c | 1 + tools/testing/selftests/pidfd/pidfd_helpers.h | 39 +++++++ .../testing/selftests/pidfd/pidfd_open_test.c | 6 +- .../selftests/pidfd/pidfd_setns_test.c | 1 + tools/testing/selftests/pidfd/pidfd_test.c | 76 +++++++++++-- 15 files changed, 242 insertions(+), 103 deletions(-) create mode 100644 tools/include/linux/pidfd.h create mode 100644 tools/testing/selftests/pidfd/pidfd_helpers.h -- 2.48.1

6 months, 1 week

8
26
0 0

[linux-next:master] [lib/prime_numbers] 313b38a6ec: kernel-selftests.lib.prime_numbers.sh.fail

by kernel test robot

Hello, kernel test robot noticed "kernel-selftests.lib.prime_numbers.sh.fail" on: commit: 313b38a6ecb46db4002925af91b64df4f2b76d1f ("lib/prime_numbers: convert self-test to KUnit") https://git.kernel.org/cgit/linux/kernel/git/next/linux-next.git master [test failed on linux-next/master 0ae0fa3bf0b44c8611d114a9f69985bf451010c3] in testcase: kernel-selftests version: kernel-selftests-x86_64-78a632a2086c-1_20250215 with following parameters: group: lib config: x86_64-rhel-9.4-kselftests compiler: gcc-12 test machine: 4 threads Intel(R) Xeon(R) CPU E3-1225 v5 @ 3.30GHz (Skylake) with 16G memory (please refer to attached dmesg/kmsg for entire log/backtrace) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <oliver.sang(a)intel.com> | Closes: https://lore.kernel.org/oe-lkp/202502171110.708d965a-lkp@intel.com # timeout set to 300 # selftests: lib: prime_numbers.sh # Warning: file prime_numbers.sh is missing! not ok 3 selftests: lib: prime_numbers.sh (missed the change for tools/testing/selftests/lib/Makefile?) The kernel config and materials to reproduce are available at: https://download.01.org/0day-ci/archive/20250217/202502171110.708d965a-lkp@… -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki

6 months, 1 week

1
0
0 0

[PATCH v6 0/4] Migrate PCI Endpoint Subsystem tests to Kselftest

by Manivannan Sadhasivam

Hi, This series carries forward the effort to add Kselftest for PCI Endpoint Subsystem started by Aman Gupta [1] a while ago. I reworked the initial version based on another patch that fixes the return values of IOCTLs in pci_endpoint_test driver and did many cleanups. Since the resulting work modified the initial version substantially, I took over the authorship. This series also incorporates the review comment by Shuah Khan [2] to move the existing tests from 'tools/pci' to 'tools/testing/kselftest/pci_endpoint' before migrating to Kselftest framework. I made sure that the tests are executable in each commit and updated documentation accordingly. - Mani [1] https://lore.kernel.org/linux-pci/20221007053934.5188-1-aman1.gupta@samsung… [2] https://lore.kernel.org/linux-pci/b2a5db97-dc59-33ab-71cd-f591e0b1b34d@linu… Changes in v6: * Fixed the documentation to pass max MSI and MSI-X count to configfs * Collected tags Changes in v5: * Incorporated comments from Niklas * Added a patch to fix the DMA MEMCPY check in pci-epf-test driver * Collected tags * Rebased on top of pci/next 0333f56dbbf7ef6bb46d2906766c3e1b2a04a94d Changes in v4: * Dropped the BAR fix patches and submitted them separately: https://lore.kernel.org/linux-pci/20241231130224.38206-1-manivannan.sadhasi… * Rebased on top of pci/next 9e1b45d7a5bc0ad20f6b5267992da422884b916e Changes in v3: * Collected tags. * Added a note about failing testcase 10 and command to skip it in documentation. * Removed Aman Gupta and Padmanabhan Rajanbabu from CC as their addresses are bouncing. Changes in v2: * Added a patch that fixes return values of IOCTL in pci_endpoint_test driver * Moved the existing tests to new location before migrating * Added a fix for BARs on Qcom devices * Updated documentation and also added fixture variants for memcpy & DMA modes Manivannan Sadhasivam (4): PCI: endpoint: pci-epf-test: Fix the check for DMA MEMCPY test misc: pci_endpoint_test: Fix the return value of IOCTL selftests: Move PCI Endpoint tests from tools/pci to Kselftests selftests: pci_endpoint: Migrate to Kselftest framework Documentation/PCI/endpoint/pci-test-howto.rst | 174 +++++------- MAINTAINERS | 2 +- drivers/misc/pci_endpoint_test.c | 255 +++++++++-------- drivers/pci/endpoint/functions/pci-epf-test.c | 4 +- tools/pci/Build | 1 - tools/pci/Makefile | 58 ---- tools/pci/pcitest.c | 264 ------------------ tools/pci/pcitest.sh | 73 ----- tools/testing/selftests/Makefile | 1 + .../testing/selftests/pci_endpoint/.gitignore | 2 + tools/testing/selftests/pci_endpoint/Makefile | 7 + tools/testing/selftests/pci_endpoint/config | 4 + .../pci_endpoint/pci_endpoint_test.c | 221 +++++++++++++++ 13 files changed, 437 insertions(+), 629 deletions(-) delete mode 100644 tools/pci/Build delete mode 100644 tools/pci/Makefile delete mode 100644 tools/pci/pcitest.c delete mode 100644 tools/pci/pcitest.sh create mode 100644 tools/testing/selftests/pci_endpoint/.gitignore create mode 100644 tools/testing/selftests/pci_endpoint/Makefile create mode 100644 tools/testing/selftests/pci_endpoint/config create mode 100644 tools/testing/selftests/pci_endpoint/pci_endpoint_test.c -- 2.25.1

6 months, 1 week

3
6
0 0

[PATCH net-next] tun: Pad virtio headers

by Akihiko Odaki

tun used to simply advance iov_iter when it needs to pad virtio header, which leaves the garbage in the buffer as is. This is especially problematic when tun starts to allow enabling the hash reporting feature; even if the feature is enabled, the packet may lack a hash value and may contain a hole in the virtio header because the packet arrived before the feature gets enabled or does not contain the header fields to be hashed. If the hole is not filled with zero, it is impossible to tell if the packet lacks a hash value. In theory, a user of tun can fill the buffer with zero before calling read() to avoid such a problem, but leaving the garbage in the buffer is awkward anyway so fill the buffer in tun. The specification also says the device MUST set num_buffers to 1 when the field is present so set it when the specified header size is big enough to contain the field. Signed-off-by: Akihiko Odaki <akihiko.odaki(a)daynix.com> --- drivers/net/tap.c | 2 +- drivers/net/tun.c | 6 ++++-- drivers/net/tun_vnet.h | 14 +++++++++----- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 1287e241f4454fb8ec4975bbaded5fbaa88e3cc8..d96009153c316f669e626d95002e5fe8add3a1b2 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -711,7 +711,7 @@ static ssize_t tap_put_user(struct tap_queue *q, int total; if (q->flags & IFF_VNET_HDR) { - struct virtio_net_hdr vnet_hdr; + struct virtio_net_hdr_v1 vnet_hdr; vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index b14231a743915c2851eaae49d757b763ec4a8841..a3aed7e42c63d8b8f523c0141c7d970ab185178c 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1987,7 +1987,9 @@ static ssize_t tun_put_user_xdp(struct tun_struct *tun, ssize_t ret; if (tun->flags & IFF_VNET_HDR) { - struct virtio_net_hdr gso = { 0 }; + struct virtio_net_hdr_v1 gso = { + .num_buffers = cpu_to_tun_vnet16(tun->flags, 1) + }; vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz); ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, &gso); @@ -2040,7 +2042,7 @@ static ssize_t tun_put_user(struct tun_struct *tun, } if (vnet_hdr_sz) { - struct virtio_net_hdr gso; + struct virtio_net_hdr_v1 gso; ret = tun_vnet_hdr_from_skb(tun->flags, tun->dev, skb, &gso); if (ret) diff --git a/drivers/net/tun_vnet.h b/drivers/net/tun_vnet.h index fd7411c4447ffb180e032fe3e22f6709c30da8e9..b4f406f522728f92266898969831c26a87930f6a 100644 --- a/drivers/net/tun_vnet.h +++ b/drivers/net/tun_vnet.h @@ -135,15 +135,17 @@ static inline int tun_vnet_hdr_get(int sz, unsigned int flags, } static inline int tun_vnet_hdr_put(int sz, struct iov_iter *iter, - const struct virtio_net_hdr *hdr) + const struct virtio_net_hdr_v1 *hdr) { + int content_sz = MIN(sizeof(*hdr), sz); + if (unlikely(iov_iter_count(iter) < sz)) return -EINVAL; - if (unlikely(copy_to_iter(hdr, sizeof(*hdr), iter) != sizeof(*hdr))) + if (unlikely(copy_to_iter(hdr, content_sz, iter) != content_sz)) return -EFAULT; - iov_iter_advance(iter, sz - sizeof(*hdr)); + iov_iter_zero(sz - content_sz, iter); return 0; } @@ -157,11 +159,11 @@ static inline int tun_vnet_hdr_to_skb(unsigned int flags, struct sk_buff *skb, static inline int tun_vnet_hdr_from_skb(unsigned int flags, const struct net_device *dev, const struct sk_buff *skb, - struct virtio_net_hdr *hdr) + struct virtio_net_hdr_v1 *hdr) { int vlan_hlen = skb_vlan_tag_present(skb) ? VLAN_HLEN : 0; - if (virtio_net_hdr_from_skb(skb, hdr, + if (virtio_net_hdr_from_skb(skb, (struct virtio_net_hdr *)hdr, tun_vnet_is_little_endian(flags), true, vlan_hlen)) { struct skb_shared_info *sinfo = skb_shinfo(skb); @@ -179,6 +181,8 @@ static inline int tun_vnet_hdr_from_skb(unsigned int flags, return -EINVAL; } + hdr->num_buffers = cpu_to_tun_vnet16(flags, 1); + return 0; } --- base-commit: f54eab84fc17ef79b701e29364b7d08ca3a1d2f6 change-id: 20250116-buffers-96e14bf023fc prerequisite-change-id: 20241230-tun-66e10a49b0c7:v6 prerequisite-patch-id: 871dc5f146fb6b0e3ec8612971a8e8190472c0fb prerequisite-patch-id: 2797ed249d32590321f088373d4055ff3f430a0e prerequisite-patch-id: ea3370c72d4904e2f0536ec76ba5d26784c0cede prerequisite-patch-id: 837e4cf5d6b451424f9b1639455e83a260c4440d prerequisite-patch-id: ea701076f57819e844f5a35efe5cbc5712d3080d prerequisite-patch-id: 701646fb43ad04cc64dd2bf13c150ccbe6f828ce prerequisite-patch-id: 53176dae0c003f5b6c114d43f936cf7140d31bb5 Best regards, -- Akihiko Odaki <akihiko.odaki(a)daynix.com>

6 months, 1 week

3
5
0 0

[PATCH net-next] selftest:net: fixed spelling mistakes

by Andres Urian Florez

Fixed spelling errors in test_redirect6() error message and test_port_shadowing() comments Signed-off-by: Andres Urian Florez <andres.emb.sys(a)gmail.com> --- tools/testing/selftests/net/netfilter/nft_nat.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/netfilter/nft_nat.sh b/tools/testing/selftests/net/netfilter/nft_nat.sh index 9e39de26455f..8143b877ae7d 100755 --- a/tools/testing/selftests/net/netfilter/nft_nat.sh +++ b/tools/testing/selftests/net/netfilter/nft_nat.sh @@ -569,7 +569,7 @@ test_redirect6() ip netns exec "$ns0" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then - echo "ERROR: cannnot ping $ns1 from $ns2 via ipv6" + echo "ERROR: cannot ping $ns1 from $ns2 via ipv6" lret=1 fi @@ -859,7 +859,7 @@ EOF # from router:service bypass connection tracking. test_port_shadow_notrack "$family" - # test nat based mitigation: fowarded packets coming from service port + # test nat based mitigation: forwarded packets coming from service port # are masqueraded with random highport. test_port_shadow_pat "$family" -- 2.43.0

6 months, 1 week

2
1
0 0

[PATCH] selftests/core: fix repeated word in close_range_test.c comment

by Imanol

Fixes a minor grammatical issue in a comment in close_range_test.c where "and and" was mistakenly repeated. Signed-off-by: Imanol <imvalient(a)protonmail.com> --- tools/testing/selftests/core/close_range_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c index e0d9851fe1c9..c19e8d037211 100644 --- a/tools/testing/selftests/core/close_range_test.c +++ b/tools/testing/selftests/core/close_range_test.c @@ -506,7 +506,7 @@ TEST(close_range_cloexec_unshare_syzbot) /* * Create a huge gap in the fd table. When we now call - * CLOSE_RANGE_UNSHARE with a shared fd table and and with ~0U as upper + * CLOSE_RANGE_UNSHARE with a shared fd table and with ~0U as upper * bound the kernel will only copy up to fd1 file descriptors into the * new fd table. If the kernel is buggy and doesn't handle * CLOSE_RANGE_CLOEXEC correctly it will not have copied all file -- 2.43.0

6 months, 1 week

1
0
0 0

[PATCH net-next v5 0/8] Some pktgen fixes/improvments (part II)

by Peter Seiderer

While taking a look at '[PATCH net] pktgen: Avoid out-of-range in get_imix_entries' ([1]) and '[PATCH net v2] pktgen: Avoid out-of-bounds access in get_imix_entries' ([2], [3]) and doing some tests and code review I detected that the /proc/net/pktgen/... parsing logic does not honour the user given buffer bounds (resulting in out-of-bounds access). This can be observed e.g. by the following simple test (sometimes the old/'longer' previous value is re-read from the buffer): $ echo add_device lo@0 > /proc/net/pktgen/kpktgend_0 $ echo "min_pkt_size 12345" > /proc/net/pktgen/lo\@0 && grep min_pkt_size /proc/net/pktgen/lo\@0 Params: count 1000 min_pkt_size: 12345 max_pkt_size: 0 Result: OK: min_pkt_size=12345 $ echo -n "min_pkt_size 123" > /proc/net/pktgen/lo\@0 && grep min_pkt_size /proc/net/pktgen/lo\@0 Params: count 1000 min_pkt_size: 12345 max_pkt_size: 0 Result: OK: min_pkt_size=12345 $ echo "min_pkt_size 123" > /proc/net/pktgen/lo\@0 && grep min_pkt_size /proc/net/pktgen/lo\@0 Params: count 1000 min_pkt_size: 123 max_pkt_size: 0 Result: OK: min_pkt_size=123 So fix the out-of-bounds access (and some minor findings) and add a simple proc_net_pktgen selftest... Patch set splited into part I - net: pktgen: replace ENOTSUPP with EOPNOTSUPP - net: pktgen: enable 'param=value' parsing - net: pktgen: fix hex32_arg parsing for short reads - net: pktgen: fix 'rate 0' error handling (return -EINVAL) - net: pktgen: fix 'ratep 0' error handling (return -EINVAL) - net: pktgen: fix ctrl interface command parsing - net: pktgen: fix access outside of user given buffer in pktgen_thread_write() And part II (this one): - net: pktgen: use defines for the various dec/hex number parsing digits lengths - net: pktgen: fix mix of int/long - net: pktgen: remove extra tmp variable (re-use len instead) - net: pktgen: remove some superfluous variable initializing - net: pktgen: fix mpls maximum labels list parsing - net: pktgen: fix access outside of user given buffer in pktgen_if_write() - net: pktgen: fix mpls reset parsing - net: pktgen: remove all superfluous index assignements - selftest: net: add proc_net_pktgen Regards, Peter Changes v4 -> v5: - split up patchset into part i/ii (suggested by Simon Horman) - add rev-by Simon Horman - net: pktgen: align some variable declarations to the most common pattern -> net: pktgen: fix mix of int/long - instead of align to most common pattern (int) adjust all usages to size_t for i and max and ssize_t for len and adjust function signatures of hex32_arg(), count_trail_chars(), num_arg() and strn_len() accordingly - respect reverse xmas tree order for local variable declarations (where possible without too much code churn) - update subject line and patch description - dropped net: pktgen: hex32_arg/num_arg error out in case no characters are available - keep empty hex/num arg is implicit assumed as zero value - dropped net: pktgen: num_arg error out in case no valid character is parsed - keep empty hex/num arg is implicit assumed as zero value - Change patch description ('Fixes:' -> 'Addresses the following:', suggested by Simon Horman) - net: pktgen: remove all superfluous index assignements - new patch (suggested by Simon Horman) - selftest: net: add proc_net_pktgen - addapt to dropped patch 'net: pktgen: hex32_arg/num_arg error out in case no characters are available', empty hex/num arg is now implicit assumed as zero value (instead of failure) Changes v3 -> v4: - add rev-by Simon Horman - new patch 'net: pktgen: use defines for the various dec/hex number parsing digits lengths' (suggested by Simon Horman) - replace C99 comment (suggested by Paolo Abeni) - drop available characters check in strn_len() (suggested by Paolo Abeni) - factored out patch 'net: pktgen: align some variable declarations to the most common pattern' (suggested by Paolo Abeni) - factored out patch 'net: pktgen: remove extra tmp variable (re-use len instead)' (suggested by Paolo Abeni) - factored out patch 'net: pktgen: remove some superfluous variable initializing' (suggested by Paolo Abeni) - factored out patch 'net: pktgen: fix mpls maximum labels list parsing' (suggested by Paolo Abeni) - factored out 'net: pktgen: hex32_arg/num_arg error out in case no characters are available' (suggested by Paolo Abeni) - factored out 'net: pktgen: num_arg error out in case no valid character is parsed' (suggested by Paolo Abeni) Changes v2 -> v3: - new patch: 'net: pktgen: fix ctrl interface command parsing' - new patch: 'net: pktgen: fix mpls reset parsing' - tools/testing/selftests/net/proc_net_pktgen.c: - fix typo in change description ('v1 -> v1' and tyop) - rename some vars to better match usage add_loopback_0 -> thr_cmd_add_loopback_0 rm_loopback_0 -> thr_cmd_rm_loopback_0 wrong_ctrl_cmd -> wrong_thr_cmd legacy_ctrl_cmd -> legacy_thr_cmd ctrl_fd -> thr_fd - add ctrl interface tests Changes v1 -> v2: - new patch: 'net: pktgen: fix hex32_arg parsing for short reads' - new patch: 'net: pktgen: fix 'rate 0' error handling (return -EINVAL)' - new patch: 'net: pktgen: fix 'ratep 0' error handling (return -EINVAL)' - net/core/pktgen.c: additional fix get_imix_entries() and get_labels() - tools/testing/selftests/net/proc_net_pktgen.c: - fix tyop not vs. nod (suggested by Jakub Kicinski) - fix misaligned line (suggested by Jakub Kicinski) - enable fomerly commented out CONFIG_XFRM dependent test (command spi), as CONFIG_XFRM is enabled via tools/testing/selftests/net/config CONFIG_XFRM_INTERFACE/CONFIG_XFRM_USER (suggestex by Jakub Kicinski) - add CONFIG_NET_PKTGEN=m to tools/testing/selftests/net/config (suggested by Jakub Kicinski) - add modprobe pktgen to FIXTURE_SETUP() (suggested by Jakub Kicinski) - fix some checkpatch warnings (Missing a blank line after declarations) - shrink line length by re-naming some variables (command -> cmd, device -> dev) - add 'rate 0' testcase - add 'ratep 0' testcase [1] https://lore.kernel.org/netdev/20241006221221.3744995-1-artem.chernyshev@re… [2] https://lore.kernel.org/netdev/20250109083039.14004-1-pchelkin@ispras.ru/ [3] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?… Peter Seiderer (8): net: pktgen: fix mix of int/long net: pktgen: remove extra tmp variable (re-use len instead) net: pktgen: remove some superfluous variable initializing net: pktgen: fix mpls maximum labels list parsing net: pktgen: fix access outside of user given buffer in pktgen_if_write() net: pktgen: fix mpls reset parsing net: pktgen: remove all superfluous index assignements selftest: net: add proc_net_pktgen net/core/pktgen.c | 288 ++++---- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/config | 1 + tools/testing/selftests/net/proc_net_pktgen.c | 646 ++++++++++++++++++ 4 files changed, 806 insertions(+), 130 deletions(-) create mode 100644 tools/testing/selftests/net/proc_net_pktgen.c -- 2.48.1

6 months, 1 week

3
12
0 0

[PATCH bpf-next v11 0/5] xsk: TX metadata Launch Time support

by Song Yoong Siang

This series expands the XDP TX metadata framework to allow user applications to pass per packet 64-bit launch time directly to the kernel driver, requesting launch time hardware offload support. The XDP TX metadata framework will not perform any clock conversion or packet reordering. Please note that the role of Tx metadata is just to pass the launch time, not to enable the offload feature. Users will need to enable the launch time hardware offload feature of the device by using the respective command, such as the tc-etf command. Although some devices use the tc-etf command to enable their launch time hardware offload feature, xsk packets will not go through the etf qdisc. Therefore, in my opinion, the launch time should always be based on the PTP Hardware Clock (PHC). Thus, i did not include a clock ID to indicate the clock source. To simplify the test steps, I modified the xdp_hw_metadata bpf self-test tool in such a way that it will set the launch time based on the offset provided by the user and the value of the Receive Hardware Timestamp, which is against the PHC. This will eliminate the need to discipline System Clock with the PHC and then use clock_gettime() to get the time. Please note that AF_XDP lacks a feedback mechanism to inform the application if the requested launch time is invalid. So, users are expected to familiar with the horizon of the launch time of the device they use and not request a launch time that is beyond the horizon. Otherwise, the driver might interpret the launch time incorrectly and react wrongly. For stmmac and igc, where modulo computation is used, a launch time larger than the horizon will cause the device to transmit the packet earlier that the requested launch time. Although there is no feedback mechanism for the launch time request for now, user still can check whether the requested launch time is working or not, by requesting the Transmit Completion Hardware Timestamp. v11: - regenerate netdev_xsk_flags based on latest netdev.yaml (Jakub) v10: https://lore.kernel.org/netdev/20250207021943.814768-1-yoong.siang.song@int… - use net_err_ratelimited(), instead of net_ratelimit() (Maciej) - accumulate the amount of used descs in local variable and update the igc_metadata_request::used_desc once (Maciej) - Ensure reverse christmas tree rule (Maciej) V9: https://lore.kernel.org/netdev/20250206060408.808325-1-yoong.siang.song@int… - Remove the igc_desc_unused() checking (Maciej) - Ensure that skb allocation and DMA mapping work before proceeding to fill in igc_tx_buffer info, context desc, and data desc (Maciej) - Rate limit the error messages (Maciej) - Update the comment to indicate that the 2 descriptors needed by the empty frame are already taken into consideration (Maciej) - Handle the case where the insertion of an empty frame fails and explain the reason behind (Maciej) - put self SOB tag as last tag (Maciej) V8: https://lore.kernel.org/netdev/20250205024116.798862-1-yoong.siang.song@int… - check the number of used descriptor in xsk_tx_metadata_request() by using used_desc of struct igc_metadata_request, and then decreases the budget with it (Maciej) - submit another bug fix patch to set the buffer type for empty frame (Maciej): https://lore.kernel.org/netdev/20250205023603.798819-1-yoong.siang.song@int… V7: https://lore.kernel.org/netdev/20250204004907.789330-1-yoong.siang.song@int… - split the refactoring code of igc empty packet insertion into a separate commit (Faizal) - add explanation on why the value "4" is used as igc transmit budget (Faizal) - perform a stress test by sending 1000 packets with 10ms interval and launch time set to 500us in the future (Faizal & Yong Liang) V6: https://lore.kernel.org/netdev/20250116155350.555374-1-yoong.siang.song@int… - fix selftest build errors by using asprintf() and realloc(), instead of managing the buffer sizes manually (Daniel, Stanislav) V5: https://lore.kernel.org/netdev/20250114152718.120588-1-yoong.siang.song@int… - change netdev feature name from tx-launch-time to tx-launch-time-fifo to explicitly state the FIFO behaviour (Stanislav) - improve the looping of xdp_hw_metadata app to wait for packet tx completion to be more readable by using clock_gettime() (Stanislav) - add launch time setup steps into xdp_hw_metadata app (Stanislav) V4: https://lore.kernel.org/netdev/20250106135506.9687-1-yoong.siang.song@intel… - added XDP launch time support to the igc driver (Jesper & Florian) - added per-driver launch time limitation on xsk-tx-metadata.rst (Jesper) - added explanation on FIFO behavior on xsk-tx-metadata.rst (Jakub) - added step to enable launch time in the commit message (Jesper & Willem) - explicitly documented the type of launch_time and which clock source it is against (Willem) V3: https://lore.kernel.org/netdev/20231203165129.1740512-1-yoong.siang.song@in… - renamed to use launch time (Jesper & Willem) - changed the default launch time in xdp_hw_metadata apps from 1s to 0.1s because some NICs do not support such a large future time. V2: https://lore.kernel.org/netdev/20231201062421.1074768-1-yoong.siang.song@in… - renamed to use Earliest TxTime First (Willem) - renamed to use txtime (Willem) V1: https://lore.kernel.org/netdev/20231130162028.852006-1-yoong.siang.song@int… Song Yoong Siang (5): xsk: Add launch time hardware offload support to XDP Tx metadata selftests/bpf: Add launch time request to xdp_hw_metadata net: stmmac: Add launch time support to XDP ZC igc: Refactor empty frame insertion for launch time support igc: Add launch time support to XDP ZC Documentation/netlink/specs/netdev.yaml | 4 + Documentation/networking/xsk-tx-metadata.rst | 62 +++++++ drivers/net/ethernet/intel/igc/igc.h | 1 + drivers/net/ethernet/intel/igc/igc_main.c | 143 +++++++++++---- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 2 + .../net/ethernet/stmicro/stmmac/stmmac_main.c | 13 ++ include/net/xdp_sock.h | 10 ++ include/net/xdp_sock_drv.h | 1 + include/uapi/linux/if_xdp.h | 10 ++ include/uapi/linux/netdev.h | 3 + net/core/netdev-genl.c | 2 + net/xdp/xsk.c | 3 + tools/include/uapi/linux/if_xdp.h | 10 ++ tools/include/uapi/linux/netdev.h | 3 + tools/testing/selftests/bpf/xdp_hw_metadata.c | 168 +++++++++++++++++- 15 files changed, 396 insertions(+), 39 deletions(-) -- 2.34.1

6 months, 1 week

1
5
0 0

[PATCH bpf-next v10 0/5] xsk: TX metadata Launch Time support

by Song Yoong Siang

This series expands the XDP TX metadata framework to allow user applications to pass per packet 64-bit launch time directly to the kernel driver, requesting launch time hardware offload support. The XDP TX metadata framework will not perform any clock conversion or packet reordering. Please note that the role of Tx metadata is just to pass the launch time, not to enable the offload feature. Users will need to enable the launch time hardware offload feature of the device by using the respective command, such as the tc-etf command. Although some devices use the tc-etf command to enable their launch time hardware offload feature, xsk packets will not go through the etf qdisc. Therefore, in my opinion, the launch time should always be based on the PTP Hardware Clock (PHC). Thus, i did not include a clock ID to indicate the clock source. To simplify the test steps, I modified the xdp_hw_metadata bpf self-test tool in such a way that it will set the launch time based on the offset provided by the user and the value of the Receive Hardware Timestamp, which is against the PHC. This will eliminate the need to discipline System Clock with the PHC and then use clock_gettime() to get the time. Please note that AF_XDP lacks a feedback mechanism to inform the application if the requested launch time is invalid. So, users are expected to familiar with the horizon of the launch time of the device they use and not request a launch time that is beyond the horizon. Otherwise, the driver might interpret the launch time incorrectly and react wrongly. For stmmac and igc, where modulo computation is used, a launch time larger than the horizon will cause the device to transmit the packet earlier that the requested launch time. Although there is no feedback mechanism for the launch time request for now, user still can check whether the requested launch time is working or not, by requesting the Transmit Completion Hardware Timestamp. v10: - use net_err_ratelimited(), instead of net_ratelimit() (Maciej) - accumulate the amount of used descs in local variable and update the igc_metadata_request::used_desc once (Maciej) - Ensure reverse christmas tree rule (Maciej) V9: https://lore.kernel.org/netdev/20250206060408.808325-1-yoong.siang.song@int… - Remove the igc_desc_unused() checking (Maciej) - Ensure that skb allocation and DMA mapping work before proceeding to fill in igc_tx_buffer info, context desc, and data desc (Maciej) - Rate limit the error messages (Maciej) - Update the comment to indicate that the 2 descriptors needed by the empty frame are already taken into consideration (Maciej) - Handle the case where the insertion of an empty frame fails and explain the reason behind (Maciej) - put self SOB tag as last tag (Maciej) V8: https://lore.kernel.org/netdev/20250205024116.798862-1-yoong.siang.song@int… - check the number of used descriptor in xsk_tx_metadata_request() by using used_desc of struct igc_metadata_request, and then decreases the budget with it (Maciej) - submit another bug fix patch to set the buffer type for empty frame (Maciej): https://lore.kernel.org/netdev/20250205023603.798819-1-yoong.siang.song@int… V7: https://lore.kernel.org/netdev/20250204004907.789330-1-yoong.siang.song@int… - split the refactoring code of igc empty packet insertion into a separate commit (Faizal) - add explanation on why the value "4" is used as igc transmit budget (Faizal) - perform a stress test by sending 1000 packets with 10ms interval and launch time set to 500us in the future (Faizal & Yong Liang) V6: https://lore.kernel.org/netdev/20250116155350.555374-1-yoong.siang.song@int… - fix selftest build errors by using asprintf() and realloc(), instead of managing the buffer sizes manually (Daniel, Stanislav) V5: https://lore.kernel.org/netdev/20250114152718.120588-1-yoong.siang.song@int… - change netdev feature name from tx-launch-time to tx-launch-time-fifo to explicitly state the FIFO behaviour (Stanislav) - improve the looping of xdp_hw_metadata app to wait for packet tx completion to be more readable by using clock_gettime() (Stanislav) - add launch time setup steps into xdp_hw_metadata app (Stanislav) V4: https://lore.kernel.org/netdev/20250106135506.9687-1-yoong.siang.song@intel… - added XDP launch time support to the igc driver (Jesper & Florian) - added per-driver launch time limitation on xsk-tx-metadata.rst (Jesper) - added explanation on FIFO behavior on xsk-tx-metadata.rst (Jakub) - added step to enable launch time in the commit message (Jesper & Willem) - explicitly documented the type of launch_time and which clock source it is against (Willem) V3: https://lore.kernel.org/netdev/20231203165129.1740512-1-yoong.siang.song@in… - renamed to use launch time (Jesper & Willem) - changed the default launch time in xdp_hw_metadata apps from 1s to 0.1s because some NICs do not support such a large future time. V2: https://lore.kernel.org/netdev/20231201062421.1074768-1-yoong.siang.song@in… - renamed to use Earliest TxTime First (Willem) - renamed to use txtime (Willem) V1: https://lore.kernel.org/netdev/20231130162028.852006-1-yoong.siang.song@int… Song Yoong Siang (5): xsk: Add launch time hardware offload support to XDP Tx metadata selftests/bpf: Add launch time request to xdp_hw_metadata net: stmmac: Add launch time support to XDP ZC igc: Refactor empty frame insertion for launch time support igc: Add launch time support to XDP ZC Documentation/netlink/specs/netdev.yaml | 4 + Documentation/networking/xsk-tx-metadata.rst | 62 +++++++ drivers/net/ethernet/intel/igc/igc.h | 1 + drivers/net/ethernet/intel/igc/igc_main.c | 143 +++++++++++---- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 2 + .../net/ethernet/stmicro/stmmac/stmmac_main.c | 13 ++ include/net/xdp_sock.h | 10 ++ include/net/xdp_sock_drv.h | 1 + include/uapi/linux/if_xdp.h | 10 ++ include/uapi/linux/netdev.h | 3 + net/core/netdev-genl.c | 2 + net/xdp/xsk.c | 3 + tools/include/uapi/linux/if_xdp.h | 10 ++ tools/include/uapi/linux/netdev.h | 3 + tools/testing/selftests/bpf/xdp_hw_metadata.c | 168 +++++++++++++++++- 15 files changed, 396 insertions(+), 39 deletions(-) -- 2.34.1

6 months, 1 week

3
8
0 0

[PATCH] selftests: netfilter: Fix typo in conntrack_icmp_related.sh comment

by Marcelo Moreira

Correct the typo "adress" to "address" in a comment in conntrack_icmp_related.sh to improve clarity. Signed-off-by: Marcelo Moreira <marcelomoreira1905(a)gmail.com> --- tools/testing/selftests/net/netfilter/conntrack_icmp_related.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/netfilter/conntrack_icmp_related.sh b/tools/testing/selftests/net/netfilter/conntrack_icmp_related.sh index c63d840ead61..f63b7f12b36a 100755 --- a/tools/testing/selftests/net/netfilter/conntrack_icmp_related.sh +++ b/tools/testing/selftests/net/netfilter/conntrack_icmp_related.sh @@ -171,7 +171,7 @@ table inet filter { } EOF -# make sure NAT core rewrites adress of icmp error if nat is used according to +# make sure NAT core rewrites address of icmp error if nat is used according to # conntrack nat information (icmp error will be directed at nsrouter1 address, # but it needs to be routed to nsclient1 address). ip netns exec "$nsrouter1" nft -f - <<EOF -- 2.48.1

6 months, 1 week

2
1
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-kselftest-mirror