- Linux-kselftest-mirror - lists.linaro.org

[PATCH v14 3/3] selftests/rseq: Add test for mm_cid compaction

by Gabriele Monaco

A task in the kernel (task_mm_cid_work) runs somewhat periodically to compact the mm_cid for each process. Add a test to validate that it runs correctly and timely. The test spawns 1 thread pinned to each CPU, then each thread, including the main one, runs in short bursts for some time. During this period, the mm_cids should be spanning all numbers between 0 and nproc. At the end of this phase, a thread with high enough mm_cid (>= nproc/2) is selected to be the new leader, all other threads terminate. After some time, the only running thread should see 0 as mm_cid, if that doesn't happen, the compaction mechanism didn't work and the test fails. The test never fails if only 1 core is available, in which case, we cannot test anything as the only available mm_cid is 0. Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com> Acked-by: Shuah Khan <skhan(a)linuxfoundation.org> Signed-off-by: Gabriele Monaco <gmonaco(a)redhat.com> --- tools/testing/selftests/rseq/.gitignore | 1 + tools/testing/selftests/rseq/Makefile | 2 +- .../selftests/rseq/mm_cid_compaction_test.c | 200 ++++++++++++++++++ 3 files changed, 202 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/rseq/mm_cid_compaction_test.c diff --git a/tools/testing/selftests/rseq/.gitignore b/tools/testing/selftests/rseq/.gitignore index 0fda241fa62b0..b3920c59bf401 100644 --- a/tools/testing/selftests/rseq/.gitignore +++ b/tools/testing/selftests/rseq/.gitignore @@ -3,6 +3,7 @@ basic_percpu_ops_test basic_percpu_ops_mm_cid_test basic_test basic_rseq_op_test +mm_cid_compaction_test param_test param_test_benchmark param_test_compare_twice diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile index 0d0a5fae59547..bc4d940f66d40 100644 --- a/tools/testing/selftests/rseq/Makefile +++ b/tools/testing/selftests/rseq/Makefile @@ -17,7 +17,7 @@ OVERRIDE_TARGETS = 1 TEST_GEN_PROGS = basic_test basic_percpu_ops_test basic_percpu_ops_mm_cid_test param_test \ param_test_benchmark param_test_compare_twice param_test_mm_cid \ param_test_mm_cid_benchmark param_test_mm_cid_compare_twice \ - syscall_errors_test + syscall_errors_test mm_cid_compaction_test TEST_GEN_PROGS_EXTENDED = librseq.so diff --git a/tools/testing/selftests/rseq/mm_cid_compaction_test.c b/tools/testing/selftests/rseq/mm_cid_compaction_test.c new file mode 100644 index 0000000000000..7ddde3b657dd6 --- /dev/null +++ b/tools/testing/selftests/rseq/mm_cid_compaction_test.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: LGPL-2.1 +#define _GNU_SOURCE +#include <assert.h> +#include <pthread.h> +#include <sched.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stddef.h> + +#include "../kselftest.h" +#include "rseq.h" + +#define VERBOSE 0 +#define printf_verbose(fmt, ...) \ + do { \ + if (VERBOSE) \ + printf(fmt, ##__VA_ARGS__); \ + } while (0) + +/* 0.5 s */ +#define RUNNER_PERIOD 500000 +/* Number of runs before we terminate or get the token */ +#define THREAD_RUNS 5 + +/* + * Number of times we check that the mm_cid were compacted. + * Checks are repeated every RUNNER_PERIOD. + */ +#define MM_CID_COMPACT_TIMEOUT 10 + +struct thread_args { + int cpu; + int num_cpus; + pthread_mutex_t *token; + pthread_barrier_t *barrier; + pthread_t *tinfo; + struct thread_args *args_head; +}; + +static void __noreturn *thread_runner(void *arg) +{ + struct thread_args *args = arg; + int i, ret, curr_mm_cid; + cpu_set_t cpumask; + + CPU_ZERO(&cpumask); + CPU_SET(args->cpu, &cpumask); + ret = pthread_setaffinity_np(pthread_self(), sizeof(cpumask), &cpumask); + if (ret) { + errno = ret; + perror("Error: failed to set affinity"); + abort(); + } + pthread_barrier_wait(args->barrier); + + for (i = 0; i < THREAD_RUNS; i++) + usleep(RUNNER_PERIOD); + curr_mm_cid = rseq_current_mm_cid(); + /* + * We select one thread with high enough mm_cid to be the new leader. + * All other threads (including the main thread) will terminate. + * After some time, the mm_cid of the only remaining thread should + * converge to 0, if not, the test fails. + */ + if (curr_mm_cid >= args->num_cpus / 2 && + !pthread_mutex_trylock(args->token)) { + printf_verbose( + "cpu%d has mm_cid=%d and will be the new leader.\n", + sched_getcpu(), curr_mm_cid); + for (i = 0; i < args->num_cpus; i++) { + if (args->tinfo[i] == pthread_self()) + continue; + ret = pthread_join(args->tinfo[i], NULL); + if (ret) { + errno = ret; + perror("Error: failed to join thread"); + abort(); + } + } + pthread_barrier_destroy(args->barrier); + free(args->tinfo); + free(args->token); + free(args->barrier); + free(args->args_head); + + for (i = 0; i < MM_CID_COMPACT_TIMEOUT; i++) { + curr_mm_cid = rseq_current_mm_cid(); + printf_verbose("run %d: mm_cid=%d on cpu%d.\n", i, + curr_mm_cid, sched_getcpu()); + if (curr_mm_cid == 0) + exit(EXIT_SUCCESS); + usleep(RUNNER_PERIOD); + } + exit(EXIT_FAILURE); + } + printf_verbose("cpu%d has mm_cid=%d and is going to terminate.\n", + sched_getcpu(), curr_mm_cid); + pthread_exit(NULL); +} + +int test_mm_cid_compaction(void) +{ + cpu_set_t affinity; + int i, j, ret = 0, num_threads; + pthread_t *tinfo; + pthread_mutex_t *token; + pthread_barrier_t *barrier; + struct thread_args *args; + + sched_getaffinity(0, sizeof(affinity), &affinity); + num_threads = CPU_COUNT(&affinity); + tinfo = calloc(num_threads, sizeof(*tinfo)); + if (!tinfo) { + perror("Error: failed to allocate tinfo"); + return -1; + } + args = calloc(num_threads, sizeof(*args)); + if (!args) { + perror("Error: failed to allocate args"); + ret = -1; + goto out_free_tinfo; + } + token = malloc(sizeof(*token)); + if (!token) { + perror("Error: failed to allocate token"); + ret = -1; + goto out_free_args; + } + barrier = malloc(sizeof(*barrier)); + if (!barrier) { + perror("Error: failed to allocate barrier"); + ret = -1; + goto out_free_token; + } + if (num_threads == 1) { + fprintf(stderr, "Cannot test on a single cpu. " + "Skipping mm_cid_compaction test.\n"); + /* only skipping the test, this is not a failure */ + goto out_free_barrier; + } + pthread_mutex_init(token, NULL); + ret = pthread_barrier_init(barrier, NULL, num_threads); + if (ret) { + errno = ret; + perror("Error: failed to initialise barrier"); + goto out_free_barrier; + } + for (i = 0, j = 0; i < CPU_SETSIZE && j < num_threads; i++) { + if (!CPU_ISSET(i, &affinity)) + continue; + args[j].num_cpus = num_threads; + args[j].tinfo = tinfo; + args[j].token = token; + args[j].barrier = barrier; + args[j].cpu = i; + args[j].args_head = args; + if (!j) { + /* The first thread is the main one */ + tinfo[0] = pthread_self(); + ++j; + continue; + } + ret = pthread_create(&tinfo[j], NULL, thread_runner, &args[j]); + if (ret) { + errno = ret; + perror("Error: failed to create thread"); + abort(); + } + ++j; + } + printf_verbose("Started %d threads.\n", num_threads); + + /* Also main thread will terminate if it is not selected as leader */ + thread_runner(&args[0]); + + /* only reached in case of errors */ +out_free_barrier: + free(barrier); +out_free_token: + free(token); +out_free_args: + free(args); +out_free_tinfo: + free(tinfo); + + return ret; +} + +int main(int argc, char **argv) +{ + if (!rseq_mm_cid_available()) { + fprintf(stderr, "Error: rseq_mm_cid unavailable\n"); + return -1; + } + if (test_mm_cid_compaction()) + return -1; + return 0; +} -- 2.50.0

5 months, 1 week

1
0
0 0

[PATCH 0/5] selftests: vDSO: Clean up vdso_test_abi and drop vdso_test_clock_getres

by Thomas Weißschuh

Some cleanups for the vDSO selftests. Signed-off-by: Thomas Weißschuh <thomas.weissschuh(a)linutronix.de> --- Thomas Weißschuh (5): selftests: vDSO: vdso_test_abi: Use ksft_finished() selftests: vDSO: vdso_test_abi: Drop clock availability tests selftests: vDSO: vdso_test_abi: Use explicit indices for name array selftests: vDSO: vdso_test_abi: Test CPUTIME clocks selftests: vDSO: Drop vdso_test_clock_getres tools/testing/selftests/vDSO/Makefile | 2 - tools/testing/selftests/vDSO/vdso_test_abi.c | 57 +++------- .../selftests/vDSO/vdso_test_clock_getres.c | 123 --------------------- 3 files changed, 17 insertions(+), 165 deletions(-) --- base-commit: 437079605c26dc7c98586580a8c01b5f7f746a79 change-id: 20250707-vdso-tests-fixes-7e4ddffd7f27 Best regards, -- Thomas Weißschuh <thomas.weissschuh(a)linutronix.de>

5 months, 1 week

1
5
0 0

[PATCH] selftests/arm64: Prevent build warnings from -Wmaybe-uninitialized

by Anshuman Khandual

Arguments passed into WEXITSTATUS() should have been initialized earlier. Otherwise following warning show up while building platform selftests on arm64. Hence just zero out all the relevant local variables to avoid the build warning. Warning: ‘status’ may be used uninitialized in this function [-Wmaybe-uninitialized] Cc: Catalin Marinas <catalin.marinas(a)arm.com> Cc: Will Deacon <will(a)kernel.org> Cc: Shuah Khan <shuah(a)kernel.org> Cc: Mark Brown <broonie(a)kernel.org> Cc: linux-arm-kernel(a)lists.infradead.org Cc: linux-kselftest(a)vger.kernel.org Cc: linux-kernel(a)vger.kernel.org Signed-off-by: Anshuman Khandual <anshuman.khandual(a)arm.com> --- This applies on v6.16-rc3 tools/testing/selftests/arm64/abi/tpidr2.c | 2 +- tools/testing/selftests/arm64/fp/za-fork.c | 2 +- tools/testing/selftests/arm64/gcs/basic-gcs.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/arm64/abi/tpidr2.c b/tools/testing/selftests/arm64/abi/tpidr2.c index eb19dcc37a755..389a60e5feabf 100644 --- a/tools/testing/selftests/arm64/abi/tpidr2.c +++ b/tools/testing/selftests/arm64/abi/tpidr2.c @@ -96,7 +96,7 @@ static int write_sleep_read(void) static int write_fork_read(void) { pid_t newpid, waiting, oldpid; - int status; + int status = 0; set_tpidr2(getpid()); diff --git a/tools/testing/selftests/arm64/fp/za-fork.c b/tools/testing/selftests/arm64/fp/za-fork.c index 587b946482226..6098beb3515a0 100644 --- a/tools/testing/selftests/arm64/fp/za-fork.c +++ b/tools/testing/selftests/arm64/fp/za-fork.c @@ -24,7 +24,7 @@ int verify_fork(void); int fork_test_c(void) { pid_t newpid, waiting; - int child_status, parent_result; + int child_status = 0, parent_result; newpid = fork(); if (newpid == 0) { diff --git a/tools/testing/selftests/arm64/gcs/basic-gcs.c b/tools/testing/selftests/arm64/gcs/basic-gcs.c index 3fb9742342a34..2b350b6b7e12c 100644 --- a/tools/testing/selftests/arm64/gcs/basic-gcs.c +++ b/tools/testing/selftests/arm64/gcs/basic-gcs.c @@ -240,7 +240,7 @@ static bool map_guarded_stack(void) static bool test_fork(void) { unsigned long child_mode; - int ret, status; + int ret, status = 0; pid_t pid; bool pass = true; -- 2.30.2

5 months, 1 week

3
4
0 0

LTP syscalls mseal02 and shmctl03 fails on compat mode 64-bit kernel on 32-bit rootfs

by Naresh Kamboju

The LTP syscalls mseal02 and shmctl03 failed only with compat mode testing with 64-bit kernel with 32-bit rootfs combination. Would it be possible to detect compat mode test environment and handle the test expectation in LTP test development ? Test case: - ltp-syscalls/mseal02 - ltp-syscalls/shmctl03 Test environments: - qemu-arm64-compat - qemu-x86_64-compat - x86-compat Regression Analysis: - New regression? Yes - Reproducibility? Yes Test regression: LTP mseal02.c:45: TFAIL: mseal(0xf7a8e000, 4294963201, 0) expected EINVAL: ENOMEM (12) Test regression: LTP shmctl03.c:33: TFAIL: /proc/sys/kernel/shmmax != 2147483647 got 4294967295 Reported-by: Linux Kernel Functional Testing <lkft(a)linaro.org> ## Test log tst_test.c:1953: TINFO: LTP version: 20250530 tst_test.c:1956: TINFO: Tested kernel: 6.16.0-rc4-next-20250701 #1 SMP PREEMPT @1751364932 aarch64 tst_kconfig.c:88: TINFO: Parsing kernel config '/proc/config.gz' tst_kconfig.c:676: TINFO: CONFIG_TRACE_IRQFLAGS kernel option detected which might slow the execution tst_test.c:1774: TINFO: Overall timeout per run is 0h 21m 36s mseal02.c:45: TPASS: mseal(0xf7a8e000, 4096, 4294967295) : EINVAL (22) mseal02.c:45: TPASS: mseal(0xf7a8e001, 4096, 0) : EINVAL (22) mseal02.c:45: TFAIL: mseal(0xf7a8e000, 4294963201, 0) expected EINVAL: ENOMEM (12) mseal02.c:45: TPASS: mseal(0xf7a90000, 8192, 0) : ENOMEM (12) mseal02.c:45: TPASS: mseal(0xf7a8f000, 8192, 0) : ENOMEM (12) mseal02.c:45: TPASS: mseal(0xf7a8e000, 16384, 0) : ENOMEM (12) tst_test.c:1953: TINFO: LTP version: 20250530 tst_test.c:1956: TINFO: Tested kernel: 6.16.0-rc3-next-20250627 #1 SMP PREEMPT @1751015729 aarch64 tst_kconfig.c:88: TINFO: Parsing kernel config '/proc/config.gz' tst_kconfig.c:676: TINFO: CONFIG_TRACE_IRQFLAGS kernel option detected which might slow the execution tst_test.c:1774: TINFO: Overall timeout per run is 0h 21m 36s shmctl03.c:31: TPASS: shmmin = 1 shmctl03.c:33: TFAIL: /proc/sys/kernel/shmmax != 2147483647 got 4294967295 shmctl03.c:34: TPASS: /proc/sys/kernel/shmmni = 4096 shmctl03.c:35: TFAIL: /proc/sys/kernel/shmall != 4278190079 got 4294967295 ## Source * Kernel version: 6.16.0-rc4-next-20250701 * Git tree: https://kernel.googlesource.com/pub/scm/linux/kernel/git/next/linux-next.git * Git sha: 3f804361f3b9af33e00b90ec9cb5afcc96831e60 * Git describe: 6.16.0-rc4-next-20250701 * Project details: https://qa-reports.linaro.org/lkft/linux-next-master/build/next-20250701/ * Architectures: arm64 * Toolchains: gcc-13 * Build name: gcc-13-lkftconfig-compat ## Build arm64 * Test log: https://qa-reports.linaro.org/api/testruns/28971382/log_file/ * Test details 1: https://qa-reports.linaro.org/lkft/linux-next-master/build/next-20250701/te… * Test details 2: https://qa-reports.linaro.org/lkft/linux-next-master/build/next-20250627/te… * Test results compare 1: https://qa-reports.linaro.org/lkft/linux-next-master/build/next-20250701/te… * Test results compare 2: https://qa-reports.linaro.org/lkft/linux-next-master/build/next-20250627/te… * Build link: https://storage.tuxsuite.com/public/linaro/lkft/builds/2zGk12rggXwQHzqasQsW… * Kernel config: https://storage.tuxsuite.com/public/linaro/lkft/builds/2zGk12rggXwQHzqasQsW… -- Linaro LKFT https://lkft.linaro.org

5 months, 1 week

2
2
0 0

[PATCH v3] selftests/mm: pagemap_scan ioctl: add PFN ZERO test cases

by Muhammad Usama Anjum

Add test cases to test the correctness of PFN ZERO flag of pagemap_scan ioctl. Test with normal pages backed memory and huge pages backed memory. Cc: David Hildenbrand <david(a)redhat.com> Signed-off-by: Muhammad Usama Anjum <usama.anjum(a)collabora.com> --- The bug has been fixed [1]. [1] https://lore.kernel.org/all/20250617143532.2375383-1-david@redhat.com Changes since v1: - Skip if madvise() fails - Skip test if use_zero_page isn't set to 1 - Keep on using memalign()+free() to allocate huge pages Changes sice v2: - Move zero page detection code out to vm_util and use that - Use mmap instead of memalign to allocate hugepage --- tools/testing/selftests/mm/cow.c | 27 +------- tools/testing/selftests/mm/pagemap_ioctl.c | 72 +++++++++++++++++++++- tools/testing/selftests/mm/vm_util.c | 23 +++++++ tools/testing/selftests/mm/vm_util.h | 2 + 4 files changed, 95 insertions(+), 29 deletions(-) diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index b6cfe0a4b7dfd..b26bbf6ec4617 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -72,31 +72,6 @@ static int detect_thp_sizes(size_t sizes[], int max) return count; } -static void detect_huge_zeropage(void) -{ - int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page", - O_RDONLY); - size_t enabled = 0; - char buf[15]; - int ret; - - if (fd < 0) - return; - - ret = pread(fd, buf, sizeof(buf), 0); - if (ret > 0 && ret < sizeof(buf)) { - buf[ret] = 0; - - enabled = strtoul(buf, NULL, 10); - if (enabled == 1) { - has_huge_zeropage = true; - ksft_print_msg("[INFO] huge zeropage is enabled\n"); - } - } - - close(fd); -} - static bool range_is_swapped(void *addr, size_t size) { for (; size; addr += pagesize, size -= pagesize) @@ -1791,7 +1766,7 @@ int main(int argc, char **argv) } nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes, ARRAY_SIZE(hugetlbsizes)); - detect_huge_zeropage(); + has_huge_zeropage = detect_huge_zeropage(); ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() + ARRAY_SIZE(anon_thp_test_cases) * tests_per_anon_thp_test_case() + diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c index 57b4bba2b45f3..059c6d5f971e7 100644 --- a/tools/testing/selftests/mm/pagemap_ioctl.c +++ b/tools/testing/selftests/mm/pagemap_ioctl.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 + #define _GNU_SOURCE #include <stdio.h> #include <fcntl.h> @@ -34,8 +35,8 @@ #define PAGEMAP "/proc/self/pagemap" int pagemap_fd; int uffd; -unsigned int page_size; -unsigned int hpage_size; +size_t page_size; +size_t hpage_size; const char *progname; #define LEN(region) ((region.end - region.start)/page_size) @@ -1480,6 +1481,68 @@ static void transact_test(int page_size) extra_thread_faults); } +void zeropfn_tests(void) +{ + unsigned long long mem_size; + struct page_region vec; + int i, ret; + char *mmap_mem, *mem; + + /* Test with normal memory */ + mem_size = 10 * page_size; + mem = mmap(NULL, mem_size, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0); + if (mem == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + + /* Touch each page to ensure it's mapped */ + for (i = 0; i < mem_size; i += page_size) + (void)((volatile char *)mem)[i]; + + ret = pagemap_ioctl(mem, mem_size, &vec, 1, 0, + (mem_size / page_size), PAGE_IS_PFNZERO, 0, 0, PAGE_IS_PFNZERO); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 1 && LEN(vec) == (mem_size / page_size), + "%s all pages must have PFNZERO set\n", __func__); + + munmap(mem, mem_size); + + /* Test with huge page if user_zero_page is set to 1 */ + if (!detect_huge_zeropage()) { + ksft_test_result_skip("%s use_zero_page not supported or set to 1\n", __func__); + return; + } + + mem_size = 2 * hpage_size; + mmap_mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mmap_mem == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + + /* We need a THP-aligned memory area. */ + mem = (char *)(((uintptr_t)mmap_mem + hpage_size) & ~(hpage_size - 1)); + + ret = madvise(mem, hpage_size, MADV_HUGEPAGE); + if (!ret) { + char tmp = *mem; + + asm volatile("" : "+r" (tmp)); + + ret = pagemap_ioctl(mem, hpage_size, &vec, 1, 0, + 0, PAGE_IS_PFNZERO, 0, 0, PAGE_IS_PFNZERO); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 1 && LEN(vec) == (hpage_size / page_size), + "%s all huge pages must have PFNZERO set\n", __func__); + } else { + ksft_test_result_skip("%s huge page not supported\n", __func__); + } + + munmap(mmap_mem, mem_size); +} + int main(int __attribute__((unused)) argc, char *argv[]) { int shmid, buf_size, fd, i, ret; @@ -1494,7 +1557,7 @@ int main(int __attribute__((unused)) argc, char *argv[]) if (init_uffd()) ksft_exit_pass(); - ksft_set_plan(115); + ksft_set_plan(117); page_size = getpagesize(); hpage_size = read_pmd_pagesize(); @@ -1669,6 +1732,9 @@ int main(int __attribute__((unused)) argc, char *argv[]) /* 16. Userfaultfd tests */ userfaultfd_tests(); + /* 17. ZEROPFN tests */ + zeropfn_tests(); + close(pagemap_fd); ksft_exit_pass(); } diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c index a36734fb62f38..dde9e8ab4dc46 100644 --- a/tools/testing/selftests/mm/vm_util.c +++ b/tools/testing/selftests/mm/vm_util.c @@ -424,3 +424,26 @@ bool check_vmflag_io(void *addr) flags += flaglen; } } + +bool detect_huge_zeropage(void) +{ + int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page", + O_RDONLY); + bool enabled = 0; + char buf[15]; + int ret; + + if (fd < 0) + return 0; + + ret = pread(fd, buf, sizeof(buf), 0); + if (ret > 0 && ret < sizeof(buf)) { + buf[ret] = 0; + + if (strtoul(buf, NULL, 10) == 1) + enabled = 1; + } + + close(fd); + return enabled; +} diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index 6effafdc4d8a2..ca4c1f78ce18c 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -74,6 +74,8 @@ int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len, unsigned long get_free_hugepages(void); bool check_vmflag_io(void *addr); +bool detect_huge_zeropage(void); + /* * On ppc64 this will only work with radix 2M hugepage size */ -- 2.43.0

5 months, 1 week

1
0
0 0

[PATCH] tools/nolibc: add support for clock_nanosleep() and nanosleep()

by Thomas Weißschuh

Also add some tests. Signed-off-by: Thomas Weißschuh <thomas.weissschuh(a)linutronix.de> --- tools/include/nolibc/time.h | 34 ++++++++++++++++++++++++++++ tools/testing/selftests/nolibc/nolibc-test.c | 1 + 2 files changed, 35 insertions(+) diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h index fc387940d51f389d4233bd5712588dced31ae6e5..d02bc44d2643a5e39afa808841f7175bfab5ff7e 100644 --- a/tools/include/nolibc/time.h +++ b/tools/include/nolibc/time.h @@ -36,6 +36,8 @@ void __nolibc_timespec_kernel_to_user(const struct __kernel_timespec *kts, struc * int clock_getres(clockid_t clockid, struct timespec *res); * int clock_gettime(clockid_t clockid, struct timespec *tp); * int clock_settime(clockid_t clockid, const struct timespec *tp); + * int clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqtp, + * struct timespec *rmtp) */ static __attribute__((unused)) @@ -107,6 +109,32 @@ int clock_settime(clockid_t clockid, struct timespec *tp) return __sysret(sys_clock_settime(clockid, tp)); } +static __attribute__((unused)) +int sys_clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqtp, + struct timespec *rmtp) +{ +#if defined(__NR_clock_nanosleep) + return my_syscall4(__NR_clock_nanosleep, clockid, flags, rqtp, rmtp); +#elif defined(__NR_clock_nanosleep_time64) + struct __kernel_timespec krqtp, krmtp; + int ret; + + __nolibc_timespec_user_to_kernel(rqtp, &krqtp); + ret = my_syscall4(__NR_clock_nanosleep_time64, clockid, flags, &krqtp, &krmtp); + if (rmtp) + __nolibc_timespec_kernel_to_user(&krmtp, rmtp); + return ret; +#else + return __nolibc_enosys(__func__, clockid, flags, rqtp, rmtp); +#endif +} + +static __attribute__((unused)) +int clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqtp, + struct timespec *rmtp) +{ + return __sysret(sys_clock_nanosleep(clockid, flags, rqtp, rmtp)); +} static __inline__ double difftime(time_t time1, time_t time2) @@ -114,6 +142,12 @@ double difftime(time_t time1, time_t time2) return time1 - time2; } +static __inline__ +int nanosleep(const struct timespec *rqtp, struct timespec *rmtp) +{ + return clock_nanosleep(CLOCK_REALTIME, 0, rqtp, rmtp); +} + static __attribute__((unused)) time_t time(time_t *tptr) diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index b5bca1dcf36e95a576ca9ffba4f7c213978a3f35..315229233930265501296dfeb9bc2838bb6fef84 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -1363,6 +1363,7 @@ int run_syscall(int min, int max) CASE_TEST(mmap_bad); EXPECT_PTRER(1, mmap(NULL, 0, PROT_READ, MAP_PRIVATE, 0, 0), MAP_FAILED, EINVAL); break; CASE_TEST(munmap_bad); EXPECT_SYSER(1, munmap(NULL, 0), -1, EINVAL); break; CASE_TEST(mmap_munmap_good); EXPECT_SYSZR(1, test_mmap_munmap()); break; + CASE_TEST(nanosleep); ts.tv_nsec = -1; EXPECT_SYSER(1, nanosleep(&ts, NULL), -1, EINVAL); break; CASE_TEST(open_tty); EXPECT_SYSNE(1, tmp = open("/dev/null", O_RDONLY), -1); if (tmp != -1) close(tmp); break; CASE_TEST(open_blah); EXPECT_SYSER(1, tmp = open("/proc/self/blah", O_RDONLY), -1, ENOENT); if (tmp != -1) close(tmp); break; CASE_TEST(openat_dir); EXPECT_SYSZR(1, test_openat()); break; --- base-commit: 1536aa0fb1e09cb50f401ec4852c60f38173d751 change-id: 20250704-nolibc-nanosleep-2476b806b0d5 Best regards, -- Thomas Weißschuh <thomas.weissschuh(a)linutronix.de>

5 months, 1 week

3
2
0 0

[PATCH v2] selftests: futex: define SYS_futex on 32-bit architectures with 64-bit time_t

by Ben Zong-You Xie

glibc does not define SYS_futex for 32-bit architectures using 64-bit time_t e.g. riscv32, therefore this test fails to compile since it does not find SYS_futex in C library headers. Define SYS_futex as SYS_futex_time64 in this situation to ensure successful compilation and compatibility. Signed-off-by: Cynthia Huang <cynthia(a)andestech.com> Signed-off-by: Ben Zong-You Xie <ben717(a)andestech.com> --- Changes since v1: - Fix the SOB chain v1 : https://lore.kernel.org/all/20250527093536.3646143-1-ben717@andestech.com/ --- tools/testing/selftests/futex/include/futextest.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/testing/selftests/futex/include/futextest.h b/tools/testing/selftests/futex/include/futextest.h index ddbcfc9b7bac..7a5fd1d5355e 100644 --- a/tools/testing/selftests/futex/include/futextest.h +++ b/tools/testing/selftests/futex/include/futextest.h @@ -47,6 +47,17 @@ typedef volatile u_int32_t futex_t; FUTEX_PRIVATE_FLAG) #endif +/* + * SYS_futex is expected from system C library, in glibc some 32-bit + * architectures (e.g. RV32) are using 64-bit time_t, therefore it doesn't have + * SYS_futex defined but just SYS_futex_time64. Define SYS_futex as + * SYS_futex_time64 in this situation to ensure the compilation and the + * compatibility. + */ +#if !defined(SYS_futex) && defined(SYS_futex_time64) +#define SYS_futex SYS_futex_time64 +#endif + /** * futex() - SYS_futex syscall wrapper * @uaddr: address of first futex -- 2.34.1

5 months, 1 week

3
2
0 0

[PATCH 0/2] selftests/nolibc: correctly report errors from printf() and friends

by Thomas Weißschuh

When an error is encountered by printf() it needs to be reported. Signed-off-by: Thomas Weißschuh <thomas.weissschuh(a)linutronix.de> --- Thomas Weißschuh (2): selftests/nolibc: create /dev/full when running as PID 1 selftests/nolibc: correctly report errors from printf() and friends tools/include/nolibc/stdio.h | 4 ++-- tools/testing/selftests/nolibc/nolibc-test.c | 27 ++++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 3 deletions(-) --- base-commit: 1536aa0fb1e09cb50f401ec4852c60f38173d751 change-id: 20250704-nolibc-printf-error-aa54f951c0a4 Best regards, -- Thomas Weißschuh <thomas.weissschuh(a)linutronix.de>

5 months, 1 week

2
3
0 0

[PATCH v13 0/5] rust: replace kernel::str::CStr w/ core::ffi::CStr

by Tamir Duberstein

This picks up from Michal Rostecki's work[0]. Per Michal's guidance I have omitted Co-authored tags, as the end result is quite different. Link: https://lore.kernel.org/rust-for-linux/20240819153656.28807-2-vadorovsky@pr… [0] Closes: https://github.com/Rust-for-Linux/linux/issues/1075 Signed-off-by: Tamir Duberstein <tamird(a)gmail.com> --- Changes in v13: - Rebase on v6.16-rc4. - Link to v12: https://lore.kernel.org/r/20250619-cstr-core-v12-0-80c9c7b45900@gmail.com Changes in v12: - Introduce `kernel::fmt::Display` to allow implementations on foreign types. - Tidy up doc comment on `str_to_cstr`. (Alice Ryhl). - Link to v11: https://lore.kernel.org/r/20250530-cstr-core-v11-0-cd9c0cbcb902@gmail.com Changes in v11: - Use `quote_spanned!` to avoid `use<'a, T>` and generally reduce manual token construction. - Add a commit to simplify `quote_spanned!`. - Drop first commit in favor of https://lore.kernel.org/rust-for-linux/20240906164448.2268368-1-paddymills@…. (Miguel Ojeda) - Correctly handle expressions such as `pr_info!("{a}", a = a = a)`. (Benno Lossin) - Avoid dealing with `}}` escapes, which is not needed. (Benno Lossin) - Revert some unnecessary changes. (Benno Lossin) - Rename `c_str_avoid_literals!` to `str_to_cstr!`. (Benno Lossin & Alice Ryhl). - Link to v10: https://lore.kernel.org/r/20250524-cstr-core-v10-0-6412a94d9d75@gmail.com Changes in v10: - Rebase on cbeaa41dfe26b72639141e87183cb23e00d4b0dd. - Implement Alice's suggestion to use a proc macro to work around orphan rules otherwise preventing `core::ffi::CStr` to be directly printed with `{}`. - Link to v9: https://lore.kernel.org/r/20250317-cstr-core-v9-0-51d6cc522f62@gmail.com Changes in v9: - Rebase on rust-next. - Restore `impl Display for BStr` which exists upstream[1]. - Link: https://doc.rust-lang.org/nightly/std/bstr/struct.ByteStr.html#impl-Display… [1] - Link to v8: https://lore.kernel.org/r/20250203-cstr-core-v8-0-cb3f26e78686@gmail.com Changes in v8: - Move `{from,as}_char_ptr` back to `CStrExt`. This reduces the diff some. - Restore `from_bytes_with_nul_unchecked_mut`, `to_cstring`. - Link to v7: https://lore.kernel.org/r/20250202-cstr-core-v7-0-da1802520438@gmail.com Changes in v7: - Rebased on mainline. - Restore functionality added in commit a321f3ad0a5d ("rust: str: add {make,to}_{upper,lower}case() to CString"). - Used `diff.algorithm patience` to improve diff readability. - Link to v6: https://lore.kernel.org/r/20250202-cstr-core-v6-0-8469cd6d29fd@gmail.com Changes in v6: - Split the work into several commits for ease of review. - Restore `{from,as}_char_ptr` to allow building on ARM (see commit message). - Add `CStrExt` to `kernel::prelude`. (Alice Ryhl) - Remove `CStrExt::from_bytes_with_nul_unchecked_mut` and restore `DerefMut for CString`. (Alice Ryhl) - Rename and hide `kernel::c_str!` to encourage use of C-String literals. - Drop implementation and invocation changes in kunit.rs. (Trevor Gross) - Drop docs on `Display` impl. (Trevor Gross) - Rewrite docs in the style of the standard library. - Restore the `test_cstr_debug` unit tests to demonstrate that the implementation has changed. Changes in v5: - Keep the `test_cstr_display*` unit tests. Changes in v4: - Provide the `CStrExt` trait with `display()` method, which returns a `CStrDisplay` wrapper with `Display` implementation. This addresses the lack of `Display` implementation for `core::ffi::CStr`. - Provide `from_bytes_with_nul_unchecked_mut()` method in `CStrExt`, which might be useful and is going to prevent manual, unsafe casts. - Fix a typo (s/preffered/prefered/). Changes in v3: - Fix the commit message. - Remove redundant braces in `use`, when only one item is imported. Changes in v2: - Do not remove `c_str` macro. While it's preferred to use C-string literals, there are two cases where `c_str` is helpful: - When working with macros, which already return a Rust string literal (e.g. `stringify!`). - When building macros, where we want to take a Rust string literal as an argument (for caller's convenience), but still use it as a C-string internally. - Use Rust literals as arguments in macros (`new_mutex`, `new_condvar`, `new_mutex`). Use the `c_str` macro to convert these literals to C-string literals. - Use `c_str` in kunit.rs for converting the output of `stringify!` to a `CStr`. - Remove `DerefMut` implementation for `CString`. --- Tamir Duberstein (5): rust: macros: reduce collections in `quote!` macro rust: support formatting of foreign types rust: replace `CStr` with `core::ffi::CStr` rust: replace `kernel::c_str!` with C-Strings rust: remove core::ffi::CStr reexport drivers/block/rnull.rs | 4 +- drivers/cpufreq/rcpufreq_dt.rs | 5 +- drivers/gpu/drm/drm_panic_qr.rs | 5 +- drivers/gpu/drm/nova/driver.rs | 10 +- drivers/gpu/nova-core/driver.rs | 6 +- drivers/gpu/nova-core/firmware.rs | 2 +- drivers/gpu/nova-core/gpu.rs | 4 +- drivers/gpu/nova-core/nova_core.rs | 2 +- drivers/net/phy/ax88796b_rust.rs | 8 +- drivers/net/phy/qt2025.rs | 6 +- rust/kernel/auxiliary.rs | 6 +- rust/kernel/block/mq.rs | 2 +- rust/kernel/clk.rs | 9 +- rust/kernel/configfs.rs | 14 +- rust/kernel/cpufreq.rs | 6 +- rust/kernel/device.rs | 9 +- rust/kernel/devres.rs | 2 +- rust/kernel/driver.rs | 4 +- rust/kernel/drm/device.rs | 4 +- rust/kernel/drm/driver.rs | 3 +- rust/kernel/drm/ioctl.rs | 2 +- rust/kernel/error.rs | 10 +- rust/kernel/faux.rs | 5 +- rust/kernel/firmware.rs | 16 +- rust/kernel/fmt.rs | 89 +++++++ rust/kernel/kunit.rs | 28 +-- rust/kernel/lib.rs | 3 +- rust/kernel/miscdevice.rs | 5 +- rust/kernel/net/phy.rs | 12 +- rust/kernel/of.rs | 5 +- rust/kernel/pci.rs | 2 +- rust/kernel/platform.rs | 6 +- rust/kernel/prelude.rs | 5 +- rust/kernel/print.rs | 4 +- rust/kernel/seq_file.rs | 6 +- rust/kernel/str.rs | 444 ++++++++++------------------------ rust/kernel/sync.rs | 7 +- rust/kernel/sync/completion.rs | 2 +- rust/kernel/sync/condvar.rs | 4 +- rust/kernel/sync/lock.rs | 4 +- rust/kernel/sync/lock/global.rs | 6 +- rust/kernel/sync/poll.rs | 1 + rust/kernel/workqueue.rs | 9 +- rust/macros/fmt.rs | 99 ++++++++ rust/macros/kunit.rs | 10 +- rust/macros/lib.rs | 19 ++ rust/macros/module.rs | 2 +- rust/macros/quote.rs | 111 ++++----- samples/rust/rust_configfs.rs | 9 +- samples/rust/rust_driver_auxiliary.rs | 7 +- samples/rust/rust_driver_faux.rs | 4 +- samples/rust/rust_driver_pci.rs | 4 +- samples/rust/rust_driver_platform.rs | 4 +- samples/rust/rust_misc_device.rs | 3 +- scripts/rustdoc_test_gen.rs | 6 +- 55 files changed, 543 insertions(+), 521 deletions(-) --- base-commit: 769e324b66b0d92d04f315d0c45a0f72737c7494 change-id: 20250201-cstr-core-d4b9b69120cf Best regards, -- Tamir Duberstein <tamird(a)gmail.com>

5 months, 2 weeks

4
32
0 0

[PATCH v5] selftests/futex: Convert 32bit timespec struct to 64bit version for 32bit compatibility mode

by Terry Tritton

sys_futex_wait() can not accept old_timespec32 struct, so userspace should convert it from 32bit to 64bit before syscall to support 32bit compatible mode. This fix is based off [1] Link: https://lore.kernel.org/all/20231203235117.29677-1-wegao@suse.com/ [1] Originally-by: Wei Gao <wegao(a)suse.com> Signed-off-by: Terry Tritton <terry.tritton(a)linaro.org> --- Changes in v5: - Fixed checkpatch errors Changes in v4: - Change to use __kernel_timespec as suggested by tglx Changes in v3: - Fix signed-off-by chain but for real this time Changes in v2: - Fix signed-off-by chain tools/testing/selftests/futex/include/futex2test.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/futex/include/futex2test.h b/tools/testing/selftests/futex/include/futex2test.h index ea79662405bc..ff9eebcc270c 100644 --- a/tools/testing/selftests/futex/include/futex2test.h +++ b/tools/testing/selftests/futex/include/futex2test.h @@ -5,6 +5,7 @@ * Copyright 2021 Collabora Ltd. */ #include <stdint.h> +#include <linux/time_types.h> #define u64_to_ptr(x) ((void *)(uintptr_t)(x)) @@ -65,7 +66,12 @@ struct futex32_numa { static inline int futex_waitv(volatile struct futex_waitv *waiters, unsigned long nr_waiters, unsigned long flags, struct timespec *timo, clockid_t clockid) { - return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, timo, clockid); + struct __kernel_timespec ts = { + .tv_sec = timo->tv_sec, + .tv_nsec = timo->tv_nsec, + }; + + return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, &ts, clockid); } /* -- 2.39.5

5 months, 2 weeks

1
0
0 0

[PATCH v4] selftests/futex: Convert 32bit timespec struct to 64bit version for 32bit compatibility mode

by Terry Tritton

sys_futex_wait() can not accept old_timespec32 struct, so userspace should convert it from 32bit to 64bit before syscall to support 32bit compatible mode. This fix is based off [1] Link: https://lore.kernel.org/all/20231203235117.29677-1-wegao@suse.com/ [1] Originally-by: Wei Gao <wegao(a)suse.com> Signed-off-by: Terry Tritton <terry.tritton(a)linaro.org> --- Changes in v4: - Change to use __kernel_timespec as suggested by tglx Changes in v3: - Fix signed-off-by chain but for real this time Changes in v2: - Fix signed-off-by chain tools/testing/selftests/futex/include/futex2test.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/futex/include/futex2test.h b/tools/testing/selftests/futex/include/futex2test.h index ea79662405bc..af5b92ba04ad 100644 --- a/tools/testing/selftests/futex/include/futex2test.h +++ b/tools/testing/selftests/futex/include/futex2test.h @@ -5,6 +5,7 @@ * Copyright 2021 Collabora Ltd. */ #include <stdint.h> +#include <linux/time_types.h> #define u64_to_ptr(x) ((void *)(uintptr_t)(x)) @@ -65,7 +66,12 @@ struct futex32_numa { static inline int futex_waitv(volatile struct futex_waitv *waiters, unsigned long nr_waiters, unsigned long flags, struct timespec *timo, clockid_t clockid) { - return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, timo, clockid); + struct __kernel_timespec ts = { + .tv_sec = timo->tv_sec, + .tv_nsec = timo->tv_nsec, + }; + + return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, &ts, clockid); } /* -- 2.39.5

5 months, 2 weeks

2
2
0 0

[PATCH net-next v4] ipv6: add `force_forwarding` sysctl to enable per-interface forwarding

by Gabriel Goller

It is currently impossible to enable ipv6 forwarding on a per-interface basis like in ipv4. To enable forwarding on an ipv6 interface we need to enable it on all interfaces and disable it on the other interfaces using a netfilter rule. This is especially cumbersome if you have lots of interface and only want to enable forwarding on a few. According to the sysctl docs [0] the `net.ipv6.conf.all.forwarding` enables forwarding for all interfaces, while the interface-specific `net.ipv6.conf.<interface>.forwarding` configures the interface Host/Router configuration. Introduce a new sysctl flag `force_forwarding`, which can be set on every interface. The ip6_forwarding function will then check if the global forwarding flag OR the force_forwarding flag is active and forward the packet. To preserver backwards-compatibility reset the flag (on all interfaces) to 0 if the net.ipv6.conf.all.forwarding flag is set to 0. Add a short selftest that checks if a packet gets forwarded with and without `force_forwarding`. [0]: https://www.kernel.org/doc/Documentation/networking/ip-sysctl.txt Signed-off-by: Gabriel Goller <g.goller(a)proxmox.com> --- First time writing a selftest, so LMK if I did something wrong or if there is something I can improve. Thanks! v4: * actually write the sysctl value to the table * use ASSERT_RTNL() when forwarding the sysctl change * remove useless comments in function body * simplify forwarding and force_forwarding check in ip6_output.c * fix code backticks in Documentation (double instead of single) * add selftests v3: https://lore.kernel.org/netdev/20250702074619.139031-1-g.goller@proxmox.com/ * remove forwarding=0 setting force_forwarding=0 globally. * add min and max (0 and 1) value to sysctl. v2: https://lore.kernel.org/netdev/20250701140423.487411-1-g.goller@proxmox.com/ * rename from `do_forwarding` to `force_forwarding`. * add global `force_forwarding` flag which will enable `force_forwarding` on every interface like the `ipv4.all.forwarding` flag. * `forwarding`=0 will disable global and per-interface `force_forwarding`. * export option as NETCONFA_FORCE_FORWARDING. v1: https://lore.kernel.org/netdev/20250702074619.139031-1-g.goller@proxmox.com/ Documentation/networking/ip-sysctl.rst | 5 + include/linux/ipv6.h | 1 + include/uapi/linux/ipv6.h | 1 + include/uapi/linux/netconf.h | 1 + include/uapi/linux/sysctl.h | 1 + net/ipv6/addrconf.c | 91 +++++++++++++++ net/ipv6/ip6_output.c | 3 +- tools/testing/selftests/net/Makefile | 1 + .../selftests/net/ipv6_force_forwarding.sh | 105 ++++++++++++++++++ 9 files changed, 208 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/net/ipv6_force_forwarding.sh diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 0f1251cce314..ec7fa1e890f1 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -2292,6 +2292,11 @@ conf/all/forwarding - BOOLEAN proxy_ndp - BOOLEAN Do proxy ndp. +force_forwarding - BOOLEAN + Enable forwarding on this interface only -- regardless of the setting on + ``conf/all/forwarding``. When setting ``conf.all.forwarding`` to 0, + the ``force_forwarding`` flag will be reset on all interfaces. + fwmark_reflect - BOOLEAN Controls the fwmark of kernel-generated IPv6 reply packets that are not associated with a socket for example, TCP RSTs or ICMPv6 echo replies). diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 5aeeed22f35b..5380107e466c 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -19,6 +19,7 @@ struct ipv6_devconf { __s32 forwarding; __s32 disable_policy; __s32 proxy_ndp; + __s32 force_forwarding; __cacheline_group_end(ipv6_devconf_read_txrx); __s32 accept_ra; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index cf592d7b630f..d4d3ae774b26 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -199,6 +199,7 @@ enum { DEVCONF_NDISC_EVICT_NOCARRIER, DEVCONF_ACCEPT_UNTRACKED_NA, DEVCONF_ACCEPT_RA_MIN_LFT, + DEVCONF_FORCE_FORWARDING, DEVCONF_MAX }; diff --git a/include/uapi/linux/netconf.h b/include/uapi/linux/netconf.h index fac4edd55379..1c8c84d65ae3 100644 --- a/include/uapi/linux/netconf.h +++ b/include/uapi/linux/netconf.h @@ -19,6 +19,7 @@ enum { NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, NETCONFA_INPUT, NETCONFA_BC_FORWARDING, + NETCONFA_FORCE_FORWARDING, __NETCONFA_MAX }; #define NETCONFA_MAX (__NETCONFA_MAX - 1) diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h index 8981f00204db..63d1464cb71c 100644 --- a/include/uapi/linux/sysctl.h +++ b/include/uapi/linux/sysctl.h @@ -573,6 +573,7 @@ enum { NET_IPV6_ACCEPT_RA_FROM_LOCAL=26, NET_IPV6_ACCEPT_RA_RT_INFO_MIN_PLEN=27, NET_IPV6_RA_DEFRTR_METRIC=28, + NET_IPV6_FORCE_FORWARDING=29, __NET_IPV6_MAX }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ba2ec7c870cc..dcf4e8bf8cf8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -239,6 +239,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .ndisc_evict_nocarrier = 1, .ra_honor_pio_life = 0, .ra_honor_pio_pflag = 0, + .force_forwarding = 0, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -303,6 +304,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .ndisc_evict_nocarrier = 1, .ra_honor_pio_life = 0, .ra_honor_pio_pflag = 0, + .force_forwarding = 0, }; /* Check if link is ready: is it up and is a valid qdisc available */ @@ -857,6 +859,15 @@ static void addrconf_forward_change(struct net *net, __s32 newf) idev = __in6_dev_get_rtnl_net(dev); if (idev) { int changed = (!idev->cnf.forwarding) ^ (!newf); + /* + * With the introduction of force_forwarding, we need to be backwards + * compatible, so that means we need to set the force_forwarding flag + * on every interface to 0 if net.ipv6.conf.all.forwarding is set to 0. + * This allows the global forwarding flag to disable forwarding for + * all interfaces. + */ + if (newf == 0) + WRITE_ONCE(idev->cnf.force_forwarding, newf); WRITE_ONCE(idev->cnf.forwarding, newf); if (changed) @@ -5719,6 +5730,7 @@ static void ipv6_store_devconf(const struct ipv6_devconf *cnf, array[DEVCONF_ACCEPT_UNTRACKED_NA] = READ_ONCE(cnf->accept_untracked_na); array[DEVCONF_ACCEPT_RA_MIN_LFT] = READ_ONCE(cnf->accept_ra_min_lft); + array[DEVCONF_FORCE_FORWARDING] = READ_ONCE(cnf->force_forwarding); } static inline size_t inet6_ifla6_size(void) @@ -6747,6 +6759,78 @@ static int addrconf_sysctl_disable_policy(const struct ctl_table *ctl, int write return ret; } +static void addrconf_force_forward_change(struct net *net, __s32 newf) +{ + ASSERT_RTNL(); + struct net_device *dev; + struct inet6_dev *idev; + + for_each_netdev(net, dev) { + idev = __in6_dev_get_rtnl_net(dev); + if (idev) { + int changed = (!idev->cnf.force_forwarding) ^ (!newf); + + WRITE_ONCE(idev->cnf.force_forwarding, newf); + if (changed) { + inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, + NETCONFA_FORCE_FORWARDING, + dev->ifindex, &idev->cnf); + } + } + } +} + +static int addrconf_sysctl_force_forwarding(const struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct inet6_dev *idev = ctl->extra1; + struct net *net = ctl->extra2; + int *valp = ctl->data; + loff_t pos = *ppos; + int new_val = *valp; + int old_val = *valp; + int ret; + + struct ctl_table tmp_ctl = *ctl; + + tmp_ctl.extra1 = SYSCTL_ZERO; + tmp_ctl.extra2 = SYSCTL_ONE; + tmp_ctl.data = &new_val; + + ret = proc_douintvec_minmax(&tmp_ctl, write, buffer, lenp, ppos); + + if (write && old_val != new_val) { + if (!rtnl_net_trylock(net)) + return restart_syscall(); + + if (valp == &net->ipv6.devconf_dflt->force_forwarding) { + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_FORCE_FORWARDING, + NETCONFA_IFINDEX_DEFAULT, + net->ipv6.devconf_dflt); + } else if (valp == &net->ipv6.devconf_all->force_forwarding) { + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_FORCE_FORWARDING, + NETCONFA_IFINDEX_ALL, + net->ipv6.devconf_all); + + addrconf_force_forward_change(net, new_val); + } else { + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_FORCE_FORWARDING, + idev->dev->ifindex, + &idev->cnf); + } + rtnl_net_unlock(net); + } + + if (write) + WRITE_ONCE(*valp, new_val); + if (ret) + *ppos = pos; + return ret; +} + static int minus_one = -1; static const int two_five_five = 255; static u32 ioam6_if_id_max = U16_MAX; @@ -7217,6 +7301,13 @@ static const struct ctl_table addrconf_sysctl[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO, }, + { + .procname = "force_forwarding", + .data = &ipv6_devconf.force_forwarding, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = addrconf_sysctl_force_forwarding, + }, }; static int __addrconf_sysctl_register(struct net *net, char *dev_name, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 7bd29a9ff0db..440b9efced72 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -509,7 +509,8 @@ int ip6_forward(struct sk_buff *skb) u32 mtu; idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); - if (READ_ONCE(net->ipv6.devconf_all->forwarding) == 0) + if (idev && !READ_ONCE(idev->cnf.force_forwarding) && + !READ_ONCE(net->ipv6.devconf_all->forwarding)) goto error; if (skb->pkt_type != PACKET_HOST) diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 332f387615d7..f64ec8a15a77 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -112,6 +112,7 @@ TEST_PROGS += skf_net_off.sh TEST_GEN_FILES += skf_net_off TEST_GEN_FILES += tfo TEST_PROGS += tfo_passive.sh +TEST_PROGS += ipv6_force_forwarding.sh # YNL files, must be before "include ..lib.mk" YNL_GEN_FILES := busy_poller netlink-dumps diff --git a/tools/testing/selftests/net/ipv6_force_forwarding.sh b/tools/testing/selftests/net/ipv6_force_forwarding.sh new file mode 100644 index 000000000000..62adc9d4afc9 --- /dev/null +++ b/tools/testing/selftests/net/ipv6_force_forwarding.sh @@ -0,0 +1,105 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test IPv6 force_forwarding interface property +# +# This test verifies that the force_forwarding property works correctly: +# - When global forwarding is disabled, packets are not forwarded normally +# - When force_forwarding is enabled on an interface, packets are forwarded +# regardless of the global forwarding setting + +source lib.sh + +cleanup() { + cleanup_ns $ns1 $ns2 $ns3 +} + +trap cleanup EXIT + +setup_test() { + # Create three namespaces: sender, router, receiver + setup_ns ns1 ns2 ns3 + + # Create veth pairs: ns1 <-> ns2 <-> ns3 + ip link add name veth12 type veth peer name veth21 + ip link add name veth23 type veth peer name veth32 + + # Move interfaces to namespaces + ip link set veth12 netns $ns1 + ip link set veth21 netns $ns2 + ip link set veth23 netns $ns2 + ip link set veth32 netns $ns3 + + # Configure interfaces + ip -n $ns1 addr add 2001:db8:1::1/64 dev veth12 + ip -n $ns2 addr add 2001:db8:1::2/64 dev veth21 + ip -n $ns2 addr add 2001:db8:2::1/64 dev veth23 + ip -n $ns3 addr add 2001:db8:2::2/64 dev veth32 + + # Bring up interfaces + ip -n $ns1 link set veth12 up + ip -n $ns2 link set veth21 up + ip -n $ns2 link set veth23 up + ip -n $ns3 link set veth32 up + + # Add routes + ip -n $ns1 route add 2001:db8:2::/64 via 2001:db8:1::2 + ip -n $ns3 route add 2001:db8:1::/64 via 2001:db8:2::1 + + # Disable global forwarding + ip netns exec $ns2 sysctl -qw net.ipv6.conf.all.forwarding=0 +} + +test_force_forwarding() { + local ret=0 + + echo "TEST: force_forwarding functionality" + + # Check if force_forwarding sysctl exists + if ! ip netns exec $ns2 test -f /proc/sys/net/ipv6/conf/veth21/force_forwarding; then + echo "SKIP: force_forwarding not available" + return $ksft_skip + fi + + # Test 1: Without force_forwarding, ping should fail + ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth21.force_forwarding=0 + ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth23.force_forwarding=0 + + if ip netns exec $ns1 ping -6 -c 1 -W 2 2001:db8:2::2 &>/dev/null; then + echo "FAIL: ping succeeded when forwarding disabled" + ret=1 + else + echo "PASS: forwarding disabled correctly" + fi + + # Test 2: With force_forwarding enabled, ping should succeed + ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth21.force_forwarding=1 + ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth23.force_forwarding=1 + + if ip netns exec $ns1 ping -6 -c 1 -W 2 2001:db8:2::2 &>/dev/null; then + echo "PASS: force_forwarding enabled forwarding" + else + echo "FAIL: ping failed with force_forwarding enabled" + ret=1 + fi + + return $ret +} + +echo "IPv6 force_forwarding test" +echo "==========================" + +setup_test +test_force_forwarding +ret=$? + +if [ $ret -eq 0 ]; then + echo "OK" + exit 0 +elif [ $ret -eq $ksft_skip ]; then + echo "SKIP" + exit $ksft_skip +else + echo "FAIL" + exit 1 +fi -- 2.39.5

5 months, 2 weeks

3
5
0 0

[PATCH v2] selftests/futex: Add futex_numa to .gitignore

by Terry Tritton

futex_numa was never added to the .gitignore file. Add it. Fixes: 9140f57c1c13 ("futex,selftests: Add another FUTEX2_NUMA selftest") Signed-off-by: Terry Tritton <terry.tritton(a)linaro.org> --- Changes in v2: - Add Fixes tag tools/testing/selftests/futex/functional/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore index 7b24ae89594a..776ad658f75e 100644 --- a/tools/testing/selftests/futex/functional/.gitignore +++ b/tools/testing/selftests/futex/functional/.gitignore @@ -11,3 +11,4 @@ futex_wait_timeout futex_wait_uninitialized_heap futex_wait_wouldblock futex_waitv +futex_numa -- 2.39.5

5 months, 2 weeks

2
1
0 0

[PATCH v2] selftests/mm: pagemap_scan ioctl: add PFN ZERO test cases

by Muhammad Usama Anjum

Add test cases to test the correctness of PFN ZERO flag of pagemap_scan ioctl. Test with normal pages backed memory and huge pages backed memory. Cc: David Hildenbrand <david(a)redhat.com> Signed-off-by: Muhammad Usama Anjum <usama.anjum(a)collabora.com> --- The bug has been fixed [1]. [1] https://lore.kernel.org/all/20250617143532.2375383-1-david@redhat.com Changes since v1: - Skip if madvise() fails - Skip test if use_zero_page isn't set to 1 - Keep on using memalign()+free() to allocate huge pages --- tools/testing/selftests/mm/pagemap_ioctl.c | 86 +++++++++++++++++++++- 1 file changed, 85 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c index 57b4bba2b45f3..976ab357f4651 100644 --- a/tools/testing/selftests/mm/pagemap_ioctl.c +++ b/tools/testing/selftests/mm/pagemap_ioctl.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 + #define _GNU_SOURCE #include <stdio.h> #include <fcntl.h> @@ -1480,6 +1481,86 @@ static void transact_test(int page_size) extra_thread_faults); } +bool is_use_zero_page_set(void) +{ + ssize_t bytes_read; + char buffer[2]; + int fd; + + fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page", O_RDONLY); + if (fd < 0) + return 0; + + bytes_read = read(fd, buffer, sizeof(buffer) - 1); + if (bytes_read <= 0) { + close(fd); + return 0; + } + + close(fd); + if (atoi(buffer) != 1) + return 0; + + return 1; +} + +void zeropfn_tests(void) +{ + unsigned long long mem_size; + struct page_region vec; + int i, ret; + char *mem; + + /* Test with normal memory */ + mem_size = 10 * page_size; + mem = mmap(NULL, mem_size, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0); + if (mem == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + + /* Touch each page to ensure it's mapped */ + for (i = 0; i < mem_size; i += page_size) + (void)((volatile char *)mem)[i]; + + ret = pagemap_ioctl(mem, mem_size, &vec, 1, 0, + (mem_size / page_size), PAGE_IS_PFNZERO, 0, 0, PAGE_IS_PFNZERO); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 1 && LEN(vec) == (mem_size / page_size), + "%s all pages must have PFNZERO set\n", __func__); + + munmap(mem, mem_size); + + /* Test with huge page if user_zero_page is set to 1 */ + if (!is_use_zero_page_set()) { + ksft_test_result_skip("%s use_zero_page not supported or set to 1\n", __func__); + return; + } + + mem_size = 10 * hpage_size; + mem = memalign(hpage_size, mem_size); + if (!mem) + ksft_exit_fail_msg("error nomem\n"); + + ret = madvise(mem, mem_size, MADV_HUGEPAGE); + if (!ret) { + for (i = 0; i < mem_size; i += hpage_size) + (void)((volatile char *)mem)[i]; + + ret = pagemap_ioctl(mem, mem_size, &vec, 1, 0, + (mem_size / page_size), PAGE_IS_PFNZERO, 0, 0, PAGE_IS_PFNZERO); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 1 && LEN(vec) == (mem_size / page_size), + "%s all huge pages must have PFNZERO set\n", __func__); + + free(mem); + } else { + ksft_test_result_skip("%s huge page not supported\n", __func__); + } +} + int main(int __attribute__((unused)) argc, char *argv[]) { int shmid, buf_size, fd, i, ret; @@ -1494,7 +1575,7 @@ int main(int __attribute__((unused)) argc, char *argv[]) if (init_uffd()) ksft_exit_pass(); - ksft_set_plan(115); + ksft_set_plan(117); page_size = getpagesize(); hpage_size = read_pmd_pagesize(); @@ -1669,6 +1750,9 @@ int main(int __attribute__((unused)) argc, char *argv[]) /* 16. Userfaultfd tests */ userfaultfd_tests(); + /* 17. ZEROPFN tests */ + zeropfn_tests(); + close(pagemap_fd); ksft_exit_pass(); } -- 2.43.0

5 months, 2 weeks

2
1
0 0

[PATCH v3 0/4] kselftest/arm64: Add coverage for the interaction of vfork() and GCS

by Mark Brown

I had cause to look at the vfork() support for GCS and realised that we don't have any direct test coverage, this series does so by adding vfork() to nolibc and then using that in basic-gcs to provide some simple vfork() coverage. Signed-off-by: Mark Brown <broonie(a)kernel.org> --- Changes in v3: - Stylistic nits in the GCS vfork() test. - SPARC has a non-standard vfork() ABI which needs handling. - Link to v2: https://lore.kernel.org/r/20250610-arm64-gcs-vfork-exit-v2-0-929443dfcf82@k… Changes in v2: - Add replacement of ifdef with if defined() in nolibc since the code doesn't reflect the coding style. - Remove check for arch specific vfork(). - Link to v1: https://lore.kernel.org/r/20250609-arm64-gcs-vfork-exit-v1-0-baad0f085747@k… --- Mark Brown (4): tools/nolibc: Replace ifdef with if defined() in sys.h tools/nolibc: Provide vfork() kselftest/arm64: Add a test for vfork() with GCS selftests/nolibc: Add coverage of vfork() tools/include/nolibc/arch-sparc.h | 16 +++++++ tools/include/nolibc/sys.h | 59 ++++++++++++++++++------- tools/testing/selftests/arm64/gcs/basic-gcs.c | 63 +++++++++++++++++++++++++++ tools/testing/selftests/nolibc/nolibc-test.c | 23 ++++++++-- 4 files changed, 142 insertions(+), 19 deletions(-) --- base-commit: 86731a2a651e58953fc949573895f2fa6d456841 change-id: 20250528-arm64-gcs-vfork-exit-4a7daf7652ee Best regards, -- Mark Brown <broonie(a)kernel.org>

5 months, 2 weeks

2
6
0 0

[kvm-unit-tests PATCH] riscv: Use norvc over arch, -c

by Jesse Taube

The Linux kernel main tree uses "norvc" over "arch, -c" change to match this. GCC 15 started to add _zca_zcd to the assembler flags causing a bug which made "arch, -c" generate a compressed instruction. Link: https://sourceware.org/bugzilla/show_bug.cgi?id=33128 Cc: Clément Léger <cleger(a)rivosinc.com> Signed-off-by: Jesse Taube <jesse(a)rivosinc.com> --- riscv/isa-dbltrp.c | 2 +- riscv/sbi-dbtr.c | 2 +- riscv/sbi-fwft.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/riscv/isa-dbltrp.c b/riscv/isa-dbltrp.c index b7e21589..af12860c 100644 --- a/riscv/isa-dbltrp.c +++ b/riscv/isa-dbltrp.c @@ -26,7 +26,7 @@ do { \ unsigned long value = 0; \ asm volatile( \ " .option push\n" \ - " .option arch,-c\n" \ + " .option norvc\n" \ " sw %0, 0(%1)\n" \ " .option pop\n" \ : : "r" (value), "r" (ptr) : "memory"); \ diff --git a/riscv/sbi-dbtr.c b/riscv/sbi-dbtr.c index c4ccd81d..129f79b8 100644 --- a/riscv/sbi-dbtr.c +++ b/riscv/sbi-dbtr.c @@ -134,7 +134,7 @@ static __attribute__((naked)) void exec_call(void) { /* skip over nop when triggered instead of ret. */ asm volatile (".option push\n" - ".option arch, -c\n" + ".option norvc\n" "nop\n" "ret\n" ".option pop\n"); diff --git a/riscv/sbi-fwft.c b/riscv/sbi-fwft.c index 8920bcb5..fda7eb52 100644 --- a/riscv/sbi-fwft.c +++ b/riscv/sbi-fwft.c @@ -174,7 +174,7 @@ static void fwft_check_misaligned_exc_deleg(void) * Disable compression so the lw takes exactly 4 bytes and thus * can be skipped reliably from the exception handler. */ - ".option arch,-c\n" + ".option norvc\n" "lw %[val], 1(%[val_addr])\n" ".option pop\n" : [val] "+r" (ret.value) -- 2.43.0

5 months, 2 weeks

2
1
0 0

[PATCH v3] kunit: fix longest symbol length test

by Sergio González Collado

The kunit test that checks the longests symbol length [1], has triggered warnings in some pilelines when symbol prefixes are used [2][3]. The test will to depend on !PREFIX_SYMBOLS and !CFI_CLANG as sujested in [4] and on !GCOV_KERNEL. [1] https://lore.kernel.org/rust-for-linux/CABVgOSm=5Q0fM6neBhxSbOUHBgNzmwf2V22… [2] https://lore.kernel.org/all/20250328112156.2614513-1-arnd@kernel.org/T/#u [3] https://lore.kernel.org/rust-for-linux/bbd03b37-c4d9-4a92-9be2-75aaf8c19815… [4] https://lore.kernel.org/linux-kselftest/20250427200916.GA1661412@ax162/T/#t Reviewed-by: Rae Moar <rmoar(a)google.com> Signed-off-by: Sergio González Collado <sergio.collado(a)gmail.com> --- v2 -> v3: added dependency on !GCOV_KERNEL (to avoid __gcov_ prefix) --- v1 -> v2: added dependency on !CFI_CLANG as suggested in [3], removed CONFIG_ prefix --- lib/Kconfig.debug | 1 + lib/tests/longest_symbol_kunit.c | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index f9051ab610d5..e55c761eae20 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2886,6 +2886,7 @@ config FORTIFY_KUNIT_TEST config LONGEST_SYM_KUNIT_TEST tristate "Test the longest symbol possible" if !KUNIT_ALL_TESTS depends on KUNIT && KPROBES + depends on !PREFIX_SYMBOLS && !CFI_CLANG && !GCOV_KERNEL default KUNIT_ALL_TESTS help Tests the longest symbol possible diff --git a/lib/tests/longest_symbol_kunit.c b/lib/tests/longest_symbol_kunit.c index e3c28ff1807f..9b4de3050ba7 100644 --- a/lib/tests/longest_symbol_kunit.c +++ b/lib/tests/longest_symbol_kunit.c @@ -3,8 +3,7 @@ * Test the longest symbol length. Execute with: * ./tools/testing/kunit/kunit.py run longest-symbol * --arch=x86_64 --kconfig_add CONFIG_KPROBES=y --kconfig_add CONFIG_MODULES=y - * --kconfig_add CONFIG_RETPOLINE=n --kconfig_add CONFIG_CFI_CLANG=n - * --kconfig_add CONFIG_MITIGATION_RETPOLINE=n + * --kconfig_add CONFIG_CPU_MITIGATIONS=n --kconfig_add CONFIG_GCOV_KERNEL=n */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt base-commit: 1a80a098c606b285fb0a13aa992af4f86da1ff06 -- 2.39.2

5 months, 2 weeks

2
2
0 0

[PATCH v3] selftests/futex: Convert 32bit timespec struct to 64bit version for 32bit compatibility mode

by Terry Tritton

Futex_waitv can not accept old_timespec32 struct, so userspace should convert it from 32bit to 64bit before syscall in 32bit compatible mode. This fix is based off [1] Link: https://lore.kernel.org/all/20231203235117.29677-1-wegao@suse.com/ [1] Originally-by: Wei Gao <wegao(a)suse.com> Signed-off-by: Terry Tritton <terry.tritton(a)linaro.org> --- Changes in v3: - Fix signed-off-by chain but for real this time Changes in v2: - Fix signed-off-by chain .../testing/selftests/futex/include/futex2test.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tools/testing/selftests/futex/include/futex2test.h b/tools/testing/selftests/futex/include/futex2test.h index ea79662405bc..6780e51eb2d6 100644 --- a/tools/testing/selftests/futex/include/futex2test.h +++ b/tools/testing/selftests/futex/include/futex2test.h @@ -55,6 +55,13 @@ struct futex32_numa { futex_t numa; }; +#if !defined(__LP64__) +struct timespec64 { + int64_t tv_sec; + int64_t tv_nsec; +}; +#endif + /** * futex_waitv - Wait at multiple futexes, wake on any * @waiters: Array of waiters @@ -65,7 +72,15 @@ struct futex32_numa { static inline int futex_waitv(volatile struct futex_waitv *waiters, unsigned long nr_waiters, unsigned long flags, struct timespec *timo, clockid_t clockid) { +#if !defined(__LP64__) + struct timespec64 timo64 = {0}; + + timo64.tv_sec = timo->tv_sec; + timo64.tv_nsec = timo->tv_nsec; + return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, &timo64, clockid); +#else return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, timo, clockid); +#endif } /* -- 2.39.5

5 months, 2 weeks

3
2
0 0

[PATCH] selftests: print installation complete message

by Shuah Khan

Add installation complete message to Makefile install logic. Signed-off-by: Shuah Khan <skhan(a)linuxfoundation.org> --- tools/testing/selftests/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 9dae84a74e7f..b95de208265a 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -300,6 +300,7 @@ ifdef INSTALL_PATH else \ printf "Unable to get version from git describe\n"; \ fi + @echo "**Kselftest Installation is complete: $(INSTALL_PATH)**" else $(error Error: set INSTALL_PATH to use install) endif -- 2.47.2

5 months, 2 weeks

1
0
0 0

[PATCH net-next v3 7/7] selftests: net: extend SCM_PIDFD test to cover stale pidfds

by Alexander Mikhalitsyn

Extend SCM_PIDFD test scenarios to also cover dead task's pidfd retrieval and reading its exit info. Cc: linux-kselftest(a)vger.kernel.org Cc: linux-kernel(a)vger.kernel.org Cc: netdev(a)vger.kernel.org Cc: Shuah Khan <shuah(a)kernel.org> Cc: "David S. Miller" <davem(a)davemloft.net> Cc: Eric Dumazet <edumazet(a)google.com> Cc: Jakub Kicinski <kuba(a)kernel.org> Cc: Paolo Abeni <pabeni(a)redhat.com> Cc: Simon Horman <horms(a)kernel.org> Cc: Christian Brauner <brauner(a)kernel.org> Cc: Kuniyuki Iwashima <kuniyu(a)google.com> Cc: Lennart Poettering <mzxreary(a)0pointer.de> Cc: Luca Boccassi <bluca(a)debian.org> Cc: David Rheinsberg <david(a)readahead.eu> Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn(a)canonical.com> Reviewed-by: Christian Brauner <brauner(a)kernel.org> --- .../testing/selftests/net/af_unix/scm_pidfd.c | 217 ++++++++++++++---- 1 file changed, 173 insertions(+), 44 deletions(-) diff --git a/tools/testing/selftests/net/af_unix/scm_pidfd.c b/tools/testing/selftests/net/af_unix/scm_pidfd.c index 7e534594167e..37e034874034 100644 --- a/tools/testing/selftests/net/af_unix/scm_pidfd.c +++ b/tools/testing/selftests/net/af_unix/scm_pidfd.c @@ -15,6 +15,7 @@ #include <sys/types.h> #include <sys/wait.h> +#include "../../pidfd/pidfd.h" #include "../../kselftest_harness.h" #define clean_errno() (errno == 0 ? "None" : strerror(errno)) @@ -26,6 +27,8 @@ #define SCM_PIDFD 0x04 #endif +#define CHILD_EXIT_CODE_OK 123 + static void child_die() { exit(1); @@ -126,16 +129,65 @@ static pid_t get_pid_from_fdinfo_file(int pidfd, const char *key, size_t keylen) return result; } +struct cmsg_data { + struct ucred *ucred; + int *pidfd; +}; + +static int parse_cmsg(struct msghdr *msg, struct cmsg_data *res) +{ + struct cmsghdr *cmsg; + int data = 0; + + if (msg->msg_flags & (MSG_TRUNC | MSG_CTRUNC)) { + log_err("recvmsg: truncated"); + return 1; + } + + for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL; + cmsg = CMSG_NXTHDR(msg, cmsg)) { + if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SCM_PIDFD) { + if (cmsg->cmsg_len < sizeof(*res->pidfd)) { + log_err("CMSG parse: SCM_PIDFD wrong len"); + return 1; + } + + res->pidfd = (void *)CMSG_DATA(cmsg); + } + + if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SCM_CREDENTIALS) { + if (cmsg->cmsg_len < sizeof(*res->ucred)) { + log_err("CMSG parse: SCM_CREDENTIALS wrong len"); + return 1; + } + + res->ucred = (void *)CMSG_DATA(cmsg); + } + } + + if (!res->pidfd) { + log_err("CMSG parse: SCM_PIDFD not found"); + return 1; + } + + if (!res->ucred) { + log_err("CMSG parse: SCM_CREDENTIALS not found"); + return 1; + } + + return 0; +} + static int cmsg_check(int fd) { struct msghdr msg = { 0 }; - struct cmsghdr *cmsg; + struct cmsg_data res; struct iovec iov; - struct ucred *ucred = NULL; int data = 0; char control[CMSG_SPACE(sizeof(struct ucred)) + CMSG_SPACE(sizeof(int))] = { 0 }; - int *pidfd = NULL; pid_t parent_pid; int err; @@ -158,53 +210,99 @@ static int cmsg_check(int fd) return 1; } - for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; - cmsg = CMSG_NXTHDR(&msg, cmsg)) { - if (cmsg->cmsg_level == SOL_SOCKET && - cmsg->cmsg_type == SCM_PIDFD) { - if (cmsg->cmsg_len < sizeof(*pidfd)) { - log_err("CMSG parse: SCM_PIDFD wrong len"); - return 1; - } + /* send(pfd, "x", sizeof(char), 0) */ + if (data != 'x') { + log_err("recvmsg: data corruption"); + return 1; + } - pidfd = (void *)CMSG_DATA(cmsg); - } + if (parse_cmsg(&msg, &res)) { + log_err("CMSG parse: parse_cmsg() failed"); + return 1; + } - if (cmsg->cmsg_level == SOL_SOCKET && - cmsg->cmsg_type == SCM_CREDENTIALS) { - if (cmsg->cmsg_len < sizeof(*ucred)) { - log_err("CMSG parse: SCM_CREDENTIALS wrong len"); - return 1; - } + /* pidfd from SCM_PIDFD should point to the parent process PID */ + parent_pid = + get_pid_from_fdinfo_file(*res.pidfd, "Pid:", sizeof("Pid:") - 1); + if (parent_pid != getppid()) { + log_err("wrong SCM_PIDFD %d != %d", parent_pid, getppid()); + close(*res.pidfd); + return 1; + } - ucred = (void *)CMSG_DATA(cmsg); - } + close(*res.pidfd); + return 0; +} + +static int cmsg_check_dead(int fd, int expected_pid) +{ + int err; + struct msghdr msg = { 0 }; + struct cmsg_data res; + struct iovec iov; + int data = 0; + char control[CMSG_SPACE(sizeof(struct ucred)) + + CMSG_SPACE(sizeof(int))] = { 0 }; + pid_t client_pid; + struct pidfd_info info = { + .mask = PIDFD_INFO_EXIT, + }; + + iov.iov_base = &data; + iov.iov_len = sizeof(data); + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + err = recvmsg(fd, &msg, 0); + if (err < 0) { + log_err("recvmsg"); + return 1; } - /* send(pfd, "x", sizeof(char), 0) */ - if (data != 'x') { + if (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) { + log_err("recvmsg: truncated"); + return 1; + } + + /* send(cfd, "y", sizeof(char), 0) */ + if (data != 'y') { log_err("recvmsg: data corruption"); return 1; } - if (!pidfd) { - log_err("CMSG parse: SCM_PIDFD not found"); + if (parse_cmsg(&msg, &res)) { + log_err("CMSG parse: parse_cmsg() failed"); return 1; } - if (!ucred) { - log_err("CMSG parse: SCM_CREDENTIALS not found"); + /* + * pidfd from SCM_PIDFD should point to the client_pid. + * Let's read exit information and check if it's what + * we expect to see. + */ + if (ioctl(*res.pidfd, PIDFD_GET_INFO, &info)) { + log_err("%s: ioctl(PIDFD_GET_INFO) failed", __func__); + close(*res.pidfd); return 1; } - /* pidfd from SCM_PIDFD should point to the parent process PID */ - parent_pid = - get_pid_from_fdinfo_file(*pidfd, "Pid:", sizeof("Pid:") - 1); - if (parent_pid != getppid()) { - log_err("wrong SCM_PIDFD %d != %d", parent_pid, getppid()); + if (!(info.mask & PIDFD_INFO_EXIT)) { + log_err("%s: No exit information from ioctl(PIDFD_GET_INFO)", __func__); + close(*res.pidfd); return 1; } + err = WIFEXITED(info.exit_code) ? WEXITSTATUS(info.exit_code) : 1; + if (err != CHILD_EXIT_CODE_OK) { + log_err("%s: wrong exit_code %d != %d", __func__, err, CHILD_EXIT_CODE_OK); + close(*res.pidfd); + return 1; + } + + close(*res.pidfd); return 0; } @@ -291,6 +389,24 @@ static void fill_sockaddr(struct sock_addr *addr, bool abstract) memcpy(sun_path_buf, addr->sock_name, strlen(addr->sock_name)); } +static int sk_enable_cred_pass(int sk) +{ + int on = 0; + + on = 1; + if (setsockopt(sk, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on))) { + log_err("Failed to set SO_PASSCRED"); + return 1; + } + + if (setsockopt(sk, SOL_SOCKET, SO_PASSPIDFD, &on, sizeof(on))) { + log_err("Failed to set SO_PASSPIDFD"); + return 1; + } + + return 0; +} + static void client(FIXTURE_DATA(scm_pidfd) *self, const FIXTURE_VARIANT(scm_pidfd) *variant) { @@ -299,7 +415,6 @@ static void client(FIXTURE_DATA(scm_pidfd) *self, struct ucred peer_cred; int peer_pidfd; pid_t peer_pid; - int on = 0; cfd = socket(AF_UNIX, variant->type, 0); if (cfd < 0) { @@ -322,14 +437,8 @@ static void client(FIXTURE_DATA(scm_pidfd) *self, child_die(); } - on = 1; - if (setsockopt(cfd, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on))) { - log_err("Failed to set SO_PASSCRED"); - child_die(); - } - - if (setsockopt(cfd, SOL_SOCKET, SO_PASSPIDFD, &on, sizeof(on))) { - log_err("Failed to set SO_PASSPIDFD"); + if (sk_enable_cred_pass(cfd)) { + log_err("sk_enable_cred_pass() failed"); child_die(); } @@ -340,6 +449,12 @@ static void client(FIXTURE_DATA(scm_pidfd) *self, child_die(); } + /* send something to the parent so it can receive SCM_PIDFD too and validate it */ + if (send(cfd, "y", sizeof(char), 0) == -1) { + log_err("Failed to send(cfd, \"y\", sizeof(char), 0)"); + child_die(); + } + /* skip further for SOCK_DGRAM as it's not applicable */ if (variant->type == SOCK_DGRAM) return; @@ -398,7 +513,13 @@ TEST_F(scm_pidfd, test) close(self->server); close(self->startup_pipe[0]); client(self, variant); - exit(0); + + /* + * It's a bit unusual, but in case of success we return non-zero + * exit code (CHILD_EXIT_CODE_OK) and then we expect to read it + * from ioctl(PIDFD_GET_INFO) in cmsg_check_dead(). + */ + exit(CHILD_EXIT_CODE_OK); } close(self->startup_pipe[1]); @@ -421,9 +542,17 @@ TEST_F(scm_pidfd, test) ASSERT_NE(-1, err); } - close(pfd); waitpid(self->client_pid, &child_status, 0); - ASSERT_EQ(0, WIFEXITED(child_status) ? WEXITSTATUS(child_status) : 1); + /* see comment before exit(CHILD_EXIT_CODE_OK) */ + ASSERT_EQ(CHILD_EXIT_CODE_OK, WIFEXITED(child_status) ? WEXITSTATUS(child_status) : 1); + + err = sk_enable_cred_pass(pfd); + ASSERT_EQ(0, err); + + err = cmsg_check_dead(pfd, self->client_pid); + ASSERT_EQ(0, err); + + close(pfd); } TEST_HARNESS_MAIN -- 2.43.0

5 months, 2 weeks

1
0
0 0

[PATCH v2] selftests/powerpc: fix "for a while" typo

by Ahelenia Ziemiańska

Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli(a)nabijaczleweli.xyz> --- v1: https://lore.kernel.org/lkml/h2ieddqja5jfrnuh3mvlxt6njrvp352t5rfzp2cvnrufop… tools/testing/selftests/powerpc/tm/tm-tmspr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/tm/tm-tmspr.c b/tools/testing/selftests/powerpc/tm/tm-tmspr.c index dd5ddffa28b7..0d64988ffb40 100644 --- a/tools/testing/selftests/powerpc/tm/tm-tmspr.c +++ b/tools/testing/selftests/powerpc/tm/tm-tmspr.c @@ -14,7 +14,7 @@ * (1) create more threads than cpus * (2) in each thread: * (a) set TFIAR and TFHAR a unique value - * (b) loop for awhile, continually checking to see if + * (b) loop for a while, continually checking to see if * either register has been corrupted. * * (3) Loop: -- 2.39.5

5 months, 2 weeks

1
0
0 0

[PATCH v6 00/28] KVM: arm64: Implement support for SME

by Mark Brown

I've removed the RFC tag from this version of the series, but the items that I'm looking for feedback on remains the same: - The userspace ABI, in particular: - The vector length used for the SVE registers, access to the SVE registers and access to ZA and (if available) ZT0 depending on the current state of PSTATE.{SM,ZA}. - The use of a single finalisation for both SVE and SME. - The addition of control for enabling fine grained traps in a similar manner to FGU but without the UNDEF, I'm not clear if this is desired at all and at present this requires symmetric read and write traps like FGU. That seemed like it might be desired from an implementation point of view but we already have one case where we enable an asymmetric trap (for ARM64_WORKAROUND_AMPERE_AC03_CPU_38) and it seems generally useful to enable asymmetrically. This series implements support for SME use in non-protected KVM guests. Much of this is very similar to SVE, the main additional challenge that SME presents is that it introduces a new vector length similar to the SVE vector length and two new controls which change the registers seen by guests: - PSTATE.ZA enables the ZA matrix register and, if SME2 is supported, the ZT0 LUT register. - PSTATE.SM enables streaming mode, a new floating point mode which uses the SVE register set with the separately configured SME vector length. In streaming mode implementation of the FFR register is optional. It is also permitted to build systems which support SME without SVE, in this case when not in streaming mode no SVE registers or instructions are available. Further, there is no requirement that there be any overlap in the set of vector lengths supported by SVE and SME in a system, this is expected to be a common situation in practical systems. Since there is a new vector length to configure we introduce a new feature parallel to the existing SVE one with a new pseudo register for the streaming mode vector length. Due to the overlap with SVE caused by streaming mode rather than finalising SME as a separate feature we use the existing SVE finalisation to also finalise SME, a new define KVM_ARM_VCPU_VEC is provided to help make user code clearer. Finalising SVE and SME separately would introduce complication with register access since finalising SVE makes the SVE registers writeable by userspace and doing multiple finalisations results in an error being reported. Dealing with a state where the SVE registers are writeable due to one of SVE or SME being finalised but may have their VL changed by the other being finalised seems like needless complexity with minimal practical utility, it seems clearer to just express directly that only one finalisation can be done in the ABI. Access to the floating point registers follows the architecture: - When both SVE and SME are present: - If PSTATE.SM == 0 the vector length used for the Z and P registers is the SVE vector length. - If PSTATE.SM == 1 the vector length used for the Z and P registers is the SME vector length. - If only SME is present: - If PSTATE.SM == 0 the Z and P registers are inaccessible and the floating point state accessed via the encodings for the V registers. - If PSTATE.SM == 1 the vector length used for the Z and P registers - The SME specific ZA and ZT0 registers are only accessible if SVCR.ZA is 1. The VMM must understand this, in particular when loading state SVCR should be configured before other state. It should be noted that while the architecture refers to PSTATE.SM and PSTATE.ZA these PSTATE bits are not preserved in SPSR_ELx, they are only accessible via SVCR. There are a large number of subfeatures for SME, most of which only offer additional instructions but some of which (SME2 and FA64) add architectural state. These are configured via the ID registers as per usual. Protected KVM supported, with the implementation maintaining the existing restriction that the hypervisor will refuse to run if streaming mode or ZA is enabled. This both simplfies the code and avoids the need to allocate storage for host ZA and ZT0 state, there seems to be little practical use case for supporting this and the memory usage would be non-trivial. The new KVM_ARM_VCPU_VEC feature and ZA and ZT0 registers have not been added to the get-reg-list selftest, the idea of supporting additional features there without restructuring the program to generate all possible feature combinations has been rejected. I will post a separate series which does that restructuring. Signed-off-by: Mark Brown <broonie(a)kernel.org> --- Changes in v6: - Rebase onto v6.16-rc3. - Link to v5: https://lore.kernel.org/r/20250417-kvm-arm64-sme-v5-0-f469a2d5f574@kernel.o… Changes in v5: - Rebase onto v6.15-rc2. - Add pKVM guest support. - Always restore SVCR. - Link to v4: https://lore.kernel.org/r/20250214-kvm-arm64-sme-v4-0-d64a681adcc2@kernel.o… Changes in v4: - Rebase onto v6.14-rc2 and Mark Rutland's fixes. - Expose SME to nested guests. - Additional cleanups and test fixes following on from the rebase. - Flush register state on VMM PSTATE.{SM,ZA}. - Link to v3: https://lore.kernel.org/r/20241220-kvm-arm64-sme-v3-0-05b018c1ffeb@kernel.o… Changes in v3: - Rebase onto v6.12-rc2. - Link to v2: https://lore.kernel.org/r/20231222-kvm-arm64-sme-v2-0-da226cb180bb@kernel.o… Changes in v2: - Rebase onto v6.7-rc3. - Configure subfeatures based on host system only. - Complete nVHE support. - There was some snafu with sending v1 out, it didn't make it to the lists but in case it hit people's inboxes I'm sending as v2. --- Mark Brown (28): arm64/fpsimd: Update FA64 and ZT0 enables when loading SME state arm64/fpsimd: Decide to save ZT0 and streaming mode FFR at bind time arm64/fpsimd: Check enable bit for FA64 when saving EFI state arm64/fpsimd: Determine maximum virtualisable SME vector length KVM: arm64: Introduce non-UNDEF FGT control KVM: arm64: Pay attention to FFR parameter in SVE save and load KVM: arm64: Pull ctxt_has_ helpers to start of sysreg-sr.h KVM: arm64: Move SVE state access macros after feature test macros KVM: arm64: Rename SVE finalization constants to be more general KVM: arm64: Document the KVM ABI for SME KVM: arm64: Define internal features for SME KVM: arm64: Rename sve_state_reg_region KVM: arm64: Store vector lengths in an array KVM: arm64: Implement SME vector length configuration KVM: arm64: Support SME control registers KVM: arm64: Support TPIDR2_EL0 KVM: arm64: Support SME identification registers for guests KVM: arm64: Support SME priority registers KVM: arm64: Provide assembly for SME register access KVM: arm64: Support userspace access to streaming mode Z and P registers KVM: arm64: Flush register state on writes to SVCR.SM and SVCR.ZA KVM: arm64: Expose SME specific state to userspace KVM: arm64: Context switch SME state for guests KVM: arm64: Handle SME exceptions KVM: arm64: Expose SME to nested guests KVM: arm64: Provide interface for configuring and enabling SME for guests KVM: arm64: selftests: Add SME system registers to get-reg-list KVM: arm64: selftests: Add SME to set_id_regs test Documentation/virt/kvm/api.rst | 117 +++++++---- arch/arm64/include/asm/fpsimd.h | 26 +++ arch/arm64/include/asm/kvm_emulate.h | 6 + arch/arm64/include/asm/kvm_host.h | 168 ++++++++++++--- arch/arm64/include/asm/kvm_hyp.h | 5 +- arch/arm64/include/asm/kvm_pkvm.h | 2 +- arch/arm64/include/asm/vncr_mapping.h | 2 + arch/arm64/include/uapi/asm/kvm.h | 33 +++ arch/arm64/kernel/cpufeature.c | 2 - arch/arm64/kernel/fpsimd.c | 89 ++++---- arch/arm64/kvm/arm.c | 10 + arch/arm64/kvm/fpsimd.c | 28 ++- arch/arm64/kvm/guest.c | 252 ++++++++++++++++++++--- arch/arm64/kvm/handle_exit.c | 14 ++ arch/arm64/kvm/hyp/fpsimd.S | 28 ++- arch/arm64/kvm/hyp/include/hyp/switch.h | 175 ++++++++++++++-- arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 97 +++++---- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 86 ++++++-- arch/arm64/kvm/hyp/nvhe/pkvm.c | 81 ++++++-- arch/arm64/kvm/hyp/nvhe/switch.c | 4 +- arch/arm64/kvm/hyp/nvhe/sys_regs.c | 6 + arch/arm64/kvm/hyp/vhe/switch.c | 17 +- arch/arm64/kvm/nested.c | 3 +- arch/arm64/kvm/reset.c | 156 ++++++++++---- arch/arm64/kvm/sys_regs.c | 140 ++++++++++++- include/uapi/linux/kvm.h | 1 + tools/testing/selftests/kvm/arm64/get-reg-list.c | 32 ++- tools/testing/selftests/kvm/arm64/set_id_regs.c | 29 ++- 28 files changed, 1315 insertions(+), 294 deletions(-) --- base-commit: 7204503c922cfdb4fcfce4a4ab61f4558a01a73b change-id: 20230301-kvm-arm64-sme-06a1246d3636 Best regards, -- Mark Brown <broonie(a)kernel.org>

5 months, 2 weeks

2
31
0 0

[PATCH v2 0/3] kselftest/arm64: Update sve-ptrace for ABI changes

by Mark Brown

Mark Rutland's recent SME fixes updated the SME ABI to reject any attempt to write FPSIMD register data via the streaming mode SVE register set but did not update the sve-ptrace test to take account of this, resulting in spurious failures. Update the test for this, and also fix another preexisting issue I noticed while looking at this. Signed-off-by: Mark Brown <broonie(a)kernel.org> --- Changes in v2: - Rebase onto v6.16-rc1. - Update fixes tag for patch 1. - Link to v1: https://lore.kernel.org/r/20250523-kselftest-arm64-ssve-fixups-v1-0-65069a2… --- Mark Brown (3): kselftest/arm64: Fix check for setting new VLs in sve-ptrace kselftest/arm64: Fix test for streaming FPSIMD write in sve-ptrace kselftest/arm64: Specify SVE data when testing VL set in sve-ptrace tools/testing/selftests/arm64/fp/sve-ptrace.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) --- base-commit: 19272b37aa4f83ca52bdf9c16d5d81bdd1354494 change-id: 20250523-kselftest-arm64-ssve-fixups-b68ae61c1ebf Best regards, -- Mark Brown <broonie(a)kernel.org>

5 months, 2 weeks

2
4
0 0

[PATCH] kselftest/arm64: Convert tpidr2 test to use kselftest.h

by Mark Brown

Recent work by Thomas Weißschuh means that it is now possible to use kselftest.h with nolibc. Convert the tpidr2 test which is nolibc specific to use kselftest.h, making it look more standard and ensuring it gets the benefit of any work done on kselftest.h. Signed-off-by: Mark Brown <broonie(a)kernel.org> --- tools/testing/selftests/arm64/abi/Makefile | 2 +- tools/testing/selftests/arm64/abi/tpidr2.c | 140 ++++++++--------------------- 2 files changed, 38 insertions(+), 104 deletions(-) diff --git a/tools/testing/selftests/arm64/abi/Makefile b/tools/testing/selftests/arm64/abi/Makefile index a6d30c620908..483488f8c2ad 100644 --- a/tools/testing/selftests/arm64/abi/Makefile +++ b/tools/testing/selftests/arm64/abi/Makefile @@ -12,4 +12,4 @@ $(OUTPUT)/syscall-abi: syscall-abi.c syscall-abi-asm.S $(OUTPUT)/tpidr2: tpidr2.c $(CC) -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \ -static -include ../../../../include/nolibc/nolibc.h \ - -ffreestanding -Wall $^ -o $@ -lgcc + -I../.. -ffreestanding -Wall $^ -o $@ -lgcc diff --git a/tools/testing/selftests/arm64/abi/tpidr2.c b/tools/testing/selftests/arm64/abi/tpidr2.c index eb19dcc37a75..f58a9f89b952 100644 --- a/tools/testing/selftests/arm64/abi/tpidr2.c +++ b/tools/testing/selftests/arm64/abi/tpidr2.c @@ -3,31 +3,12 @@ #include <linux/sched.h> #include <linux/wait.h> +#include "kselftest.h" + #define SYS_TPIDR2 "S3_3_C13_C0_5" #define EXPECTED_TESTS 5 -static void putstr(const char *str) -{ - write(1, str, strlen(str)); -} - -static void putnum(unsigned int num) -{ - char c; - - if (num / 10) - putnum(num / 10); - - c = '0' + (num % 10); - write(1, &c, 1); -} - -static int tests_run; -static int tests_passed; -static int tests_failed; -static int tests_skipped; - static void set_tpidr2(uint64_t val) { asm volatile ( @@ -50,20 +31,6 @@ static uint64_t get_tpidr2(void) return val; } -static void print_summary(void) -{ - if (tests_passed + tests_failed + tests_skipped != EXPECTED_TESTS) - putstr("# UNEXPECTED TEST COUNT: "); - - putstr("# Totals: pass:"); - putnum(tests_passed); - putstr(" fail:"); - putnum(tests_failed); - putstr(" xfail:0 xpass:0 skip:"); - putnum(tests_skipped); - putstr(" error:0\n"); -} - /* Processes should start with TPIDR2 == 0 */ static int default_value(void) { @@ -105,9 +72,8 @@ static int write_fork_read(void) if (newpid == 0) { /* In child */ if (get_tpidr2() != oldpid) { - putstr("# TPIDR2 changed in child: "); - putnum(get_tpidr2()); - putstr("\n"); + ksft_print_msg("TPIDR2 changed in child: %llx\n", + get_tpidr2()); exit(0); } @@ -115,14 +81,12 @@ static int write_fork_read(void) if (get_tpidr2() == getpid()) { exit(1); } else { - putstr("# Failed to set TPIDR2 in child\n"); + ksft_print_msg("Failed to set TPIDR2 in child\n"); exit(0); } } if (newpid < 0) { - putstr("# fork() failed: -"); - putnum(-newpid); - putstr("\n"); + ksft_print_msg("fork() failed: %d\n", newpid); return 0; } @@ -132,23 +96,22 @@ static int write_fork_read(void) if (waiting < 0) { if (errno == EINTR) continue; - putstr("# waitpid() failed: "); - putnum(errno); - putstr("\n"); + ksft_print_msg("waitpid() failed: %d\n", errno); return 0; } if (waiting != newpid) { - putstr("# waitpid() returned wrong PID\n"); + ksft_print_msg("waitpid() returned wrong PID: %d != %d\n", + waiting, newpid); return 0; } if (!WIFEXITED(status)) { - putstr("# child did not exit\n"); + ksft_print_msg("child did not exit\n"); return 0; } if (getpid() != get_tpidr2()) { - putstr("# TPIDR2 corrupted in parent\n"); + ksft_print_msg("TPIDR2 corrupted in parent\n"); return 0; } @@ -188,35 +151,32 @@ static int write_clone_read(void) stack = malloc(__STACK_SIZE); if (!stack) { - putstr("# malloc() failed\n"); + ksft_print_msg("malloc() failed\n"); return 0; } ret = sys_clone(CLONE_VM, (unsigned long)stack + __STACK_SIZE, &parent_tid, 0, &child_tid); if (ret == -1) { - putstr("# clone() failed\n"); - putnum(errno); - putstr("\n"); + ksft_print_msg("clone() failed: %d\n", errno); return 0; } if (ret == 0) { /* In child */ if (get_tpidr2() != 0) { - putstr("# TPIDR2 non-zero in child: "); - putnum(get_tpidr2()); - putstr("\n"); + ksft_print_msg("TPIDR2 non-zero in child: %llx\n", + get_tpidr2()); exit(0); } if (gettid() == 0) - putstr("# Child TID==0\n"); + ksft_print_msg("Child TID==0\n"); set_tpidr2(gettid()); if (get_tpidr2() == gettid()) { exit(1); } else { - putstr("# Failed to set TPIDR2 in child\n"); + ksft_print_msg("Failed to set TPIDR2 in child\n"); exit(0); } } @@ -227,25 +187,22 @@ static int write_clone_read(void) if (waiting < 0) { if (errno == EINTR) continue; - putstr("# wait4() failed: "); - putnum(errno); - putstr("\n"); + ksft_print_msg("wait4() failed: %d\n", errno); return 0; } if (waiting != ret) { - putstr("# wait4() returned wrong PID "); - putnum(waiting); - putstr("\n"); + ksft_print_msg("wait4() returned wrong PID %d\n", + waiting); return 0; } if (!WIFEXITED(status)) { - putstr("# child did not exit\n"); + ksft_print_msg("child did not exit\n"); return 0; } if (parent != get_tpidr2()) { - putstr("# TPIDR2 corrupted in parent\n"); + ksft_print_msg("TPIDR2 corrupted in parent\n"); return 0; } @@ -253,35 +210,14 @@ static int write_clone_read(void) } } -#define run_test(name) \ - if (name()) { \ - tests_passed++; \ - } else { \ - tests_failed++; \ - putstr("not "); \ - } \ - putstr("ok "); \ - putnum(++tests_run); \ - putstr(" " #name "\n"); - -#define skip_test(name) \ - tests_skipped++; \ - putstr("ok "); \ - putnum(++tests_run); \ - putstr(" # SKIP " #name "\n"); - int main(int argc, char **argv) { int ret; - putstr("TAP version 13\n"); - putstr("1.."); - putnum(EXPECTED_TESTS); - putstr("\n"); + ksft_print_header(); + ksft_set_plan(5); - putstr("# PID: "); - putnum(getpid()); - putstr("\n"); + ksft_print_msg("PID: %d\n", getpid()); /* * This test is run with nolibc which doesn't support hwcap and @@ -290,23 +226,21 @@ int main(int argc, char **argv) */ ret = open("/proc/sys/abi/sme_default_vector_length", O_RDONLY, 0); if (ret >= 0) { - run_test(default_value); - run_test(write_read); - run_test(write_sleep_read); - run_test(write_fork_read); - run_test(write_clone_read); + ksft_test_result(default_value(), "default_value\n"); + ksft_test_result(write_read, "write_read\n"); + ksft_test_result(write_sleep_read, "write_sleep_read\n"); + ksft_test_result(write_fork_read, "write_fork_read\n"); + ksft_test_result(write_clone_read, "write_clone_read\n"); } else { - putstr("# SME support not present\n"); + ksft_print_msg("SME support not present\n"); - skip_test(default_value); - skip_test(write_read); - skip_test(write_sleep_read); - skip_test(write_fork_read); - skip_test(write_clone_read); + ksft_test_result_skip("default_value\n"); + ksft_test_result_skip("write_read\n"); + ksft_test_result_skip("write_sleep_read\n"); + ksft_test_result_skip("write_fork_read\n"); + ksft_test_result_skip("write_clone_read\n"); } - print_summary(); - - return 0; + ksft_finished(); } --- base-commit: 19272b37aa4f83ca52bdf9c16d5d81bdd1354494 change-id: 20250527-kselftest-arm64-nolibc-header-b650a457f7f4 Best regards, -- Mark Brown <broonie(a)kernel.org>

5 months, 2 weeks

2
1
0 0

[PATCH v2 0/3] arm64: Support FEAT_LSFE (Large System Float Extension)

by Mark Brown

FEAT_LSFE is optional from v9.5, it adds new instructions for atomic memory operations with floating point values. We have no immediate use for it in kernel, provide a hwcap so userspace can discover it and allow the ID register field to be exposed to KVM guests. Signed-off-by: Mark Brown <broonie(a)kernel.org> --- Changes in v2: - Fix result of vi dropping in hwcap test. - Link to v1: https://lore.kernel.org/r/20250627-arm64-lsfe-v1-0-68351c4bf741@kernel.org --- Mark Brown (3): arm64/hwcap: Add hwcap for FEAT_LSFE KVM: arm64: Expose FEAT_LSFE to guests kselftest/arm64: Add lsfe to the hwcaps test Documentation/arch/arm64/elf_hwcaps.rst | 4 ++++ arch/arm64/include/asm/hwcap.h | 1 + arch/arm64/include/uapi/asm/hwcap.h | 1 + arch/arm64/kernel/cpufeature.c | 2 ++ arch/arm64/kernel/cpuinfo.c | 1 + arch/arm64/kvm/sys_regs.c | 4 +++- tools/testing/selftests/arm64/abi/hwcap.c | 21 +++++++++++++++++++++ 7 files changed, 33 insertions(+), 1 deletion(-) --- base-commit: 86731a2a651e58953fc949573895f2fa6d456841 change-id: 20250625-arm64-lsfe-0810cf98adc2 Best regards, -- Mark Brown <broonie(a)kernel.org>

5 months, 2 weeks

1
3
0 0

[RFC PATCH v1 0/2] kselftest/resctrl: CAT functional tests

by Ilpo Järvinen

Hi all, In the last Fall Reinette mentioned functional tests of resctrl would be preferred over selftests that are based on performance measurement. This series tries to address that shortcoming by adding some functional tests for resctrl FS interface and another that checks MSRs match to what is written through resctrl FS. The MSR test is only available for Intel CPUs at the moment. Why RFC? The new functional selftest itself works, AFAIK. However, calling ksft_test_result_skip() in cat.c if MSR reading is found to be unavailable is problematic because of how kselftest harness is architected. The kselftest.h header itself defines some variables, so including it into different .c files results in duplicating the test framework related variables (duplication of ksft_count matters in this case). The duplication problem could be worked around by creating a resctrl selftest specific wrapper for ksft_test_result_skip() into resctrl_tests.c so the accounting would occur in the "correct" .c file, but perhaps that is considered hacky and the selftest framework/build systems should be reworked to avoid duplicating variables? Ilpo Järvinen (2): kselftest/resctrl: CAT L3 resctrl FS function tests kselftest/resctrl: Add CAT L3 CBM functional test for Intel RDT tools/testing/selftests/resctrl/cat_test.c | 210 ++++++++++++++++++ tools/testing/selftests/resctrl/msr.c | 55 +++++ tools/testing/selftests/resctrl/resctrl.h | 6 + .../testing/selftests/resctrl/resctrl_tests.c | 2 + tools/testing/selftests/resctrl/resctrlfs.c | 48 ++++ 5 files changed, 321 insertions(+) create mode 100644 tools/testing/selftests/resctrl/msr.c base-commit: c1d7e19c70cbb8a19f63c190cf53e71b5f970514 -- 2.39.5

5 months, 2 weeks

2
5
0 0

[PATCH v2 0/7] selftests/mm: Fix false positives and skip unsupported tests

by Aboorva Devarajan

Hi all, This patch series addresses false positives in the generic mm selftests and skips tests that cannot run correctly due to missing features or system limitations. --- v1: https://lore.kernel.org/all/20250616160632.35250-1-aboorvad@linux.ibm.com/ Changes in v2: - Rebased onto the mm-new branch, top commit of the base is 3b4a8ad89f7e ("mm: add zblock allocator"). - Split some patches for clarity. - Updated virtual_address_range test to support testing 4PB VA on PPC64. - Added proper Fixes: tags. - Included a patch to skip a failing userfaultfd test when unsupported, instead of reporting a failure. --- Please let us know if you have any further comments. Thanks, Aboorva Aboorva Devarajan (3): selftests/mm: Fix child process exit codes in ksm_functional_tests selftests/mm: Skip thuge-gen if shmmax is too small or no 1G huge pages selftests/mm: Skip hugepage-mremap test if userfaultfd unavailable Donet Tom (4): mm/selftests: Fix incorrect pointer being passed to mark_range() selftests/mm: Add support to test 4PB VA on PPC64 selftest/mm: Fix ksm_funtional_test failures mm/selftests: Fix split_huge_page_test failure on systems with 64KB page size tools/testing/selftests/mm/hugepage-mremap.c | 16 ++++++++++--- .../selftests/mm/ksm_functional_tests.c | 24 +++++++++++++------ .../selftests/mm/split_huge_page_test.c | 23 +++++++++++++----- tools/testing/selftests/mm/thuge-gen.c | 11 +++++---- .../selftests/mm/virtual_address_range.c | 8 ++++++- 5 files changed, 61 insertions(+), 21 deletions(-) -- 2.43.5

5 months, 2 weeks

5
33
0 0

[PATCH net-next v2 0/2] Add IPIP flowtable SW acceleratio

by Lorenzo Bianconi

Introduce SW acceleration for IPIP tunnels in the netfilter flowtable infrastructure. --- Changes in v2: - Introduce IPIP flowtable selftest - Link to v1: https://lore.kernel.org/r/20250623-nf-flowtable-ipip-v1-1-2853596e3941@kern… --- Lorenzo Bianconi (2): net: netfilter: Add IPIP flowtable SW acceleration selftests: netfilter: nft_flowtable.sh: Add IPIP flowtable selftest net/ipv4/ipip.c | 21 ++++++++++++ net/netfilter/nf_flow_table_ip.c | 28 +++++++++++++-- .../selftests/net/netfilter/nft_flowtable.sh | 40 ++++++++++++++++++++++ 3 files changed, 87 insertions(+), 2 deletions(-) --- base-commit: 8efa26fcbf8a7f783fd1ce7dd2a409e9b7758df0 change-id: 20250623-nf-flowtable-ipip-1b3d7b08d067 Best regards, -- Lorenzo Bianconi <lorenzo(a)kernel.org>

5 months, 2 weeks

4
6
0 0

[PATCH bpf-next v2 18/18] selftests/bpf: add bench tests for tracing_multi

by Menglong Dong

Add bench testcase for fentry_multi, fexit_multi and fmodret_multi in bench_trigger.c. Signed-off-by: Menglong Dong <dongml2(a)chinatelecom.cn> --- v2: - use the existing bpf bench framework instead of introducing new one --- tools/testing/selftests/bpf/bench.c | 8 +++ .../selftests/bpf/benchs/bench_trigger.c | 72 +++++++++++++++++++ .../selftests/bpf/benchs/run_bench_trigger.sh | 1 + .../selftests/bpf/progs/trigger_bench.c | 22 ++++++ 4 files changed, 103 insertions(+) diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index ddd73d06a1eb..32f1e2e936c0 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -510,8 +510,12 @@ extern const struct bench bench_trig_kretprobe; extern const struct bench bench_trig_kprobe_multi; extern const struct bench bench_trig_kretprobe_multi; extern const struct bench bench_trig_fentry; +extern const struct bench bench_trig_fentry_multi; +extern const struct bench bench_trig_fentry_multi_all; extern const struct bench bench_trig_fexit; +extern const struct bench bench_trig_fexit_multi; extern const struct bench bench_trig_fmodret; +extern const struct bench bench_trig_fmodret_multi; extern const struct bench bench_trig_tp; extern const struct bench bench_trig_rawtp; @@ -578,8 +582,12 @@ static const struct bench *benchs[] = { &bench_trig_kprobe_multi, &bench_trig_kretprobe_multi, &bench_trig_fentry, + &bench_trig_fentry_multi, + &bench_trig_fentry_multi_all, &bench_trig_fexit, + &bench_trig_fexit_multi, &bench_trig_fmodret, + &bench_trig_fmodret_multi, &bench_trig_tp, &bench_trig_rawtp, /* uprobes */ diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c index 82327657846e..a1844ee358f1 100644 --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -226,6 +226,54 @@ static void trigger_fentry_setup(void) attach_bpf(ctx.skel->progs.bench_trigger_fentry); } +static void trigger_fentry_multi_setup(void) +{ + setup_ctx(); + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fentry_multi, true); + load_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_fentry_multi); +} + +static void trigger_fentry_multi_all_setup(void) +{ + LIBBPF_OPTS(bpf_trace_multi_opts, opts); + struct bpf_program *prog; + struct bpf_link *link; + char **syms = NULL; + size_t cnt = 0; + int i; + + setup_ctx(); + prog = ctx.skel->progs.bench_trigger_fentry_multi; + bpf_program__set_autoload(prog, true); + load_ctx(); + + if (bpf_get_ksyms(&syms, &cnt, true)) { + printf("failed to get ksyms\n"); + exit(1); + } + + for (i = 0; i < cnt; i++) { + if (strcmp(syms[i], "bpf_get_numa_node_id") == 0) + break; + } + if (i == cnt) { + printf("bpf_get_numa_node_id not found in ksyms\n"); + exit(1); + } + + printf("found %zu ksyms\n", cnt); + opts.syms = (const char **) syms; + opts.cnt = cnt; + opts.skip_invalid = true; + link = bpf_program__attach_trace_multi_opts(prog, &opts); + if (!link) { + printf("failed to attach bench_trigger_fentry_multi to all\n"); + exit(1); + } + ctx.skel->links.bench_trigger_fentry_multi = link; +} + static void trigger_fexit_setup(void) { setup_ctx(); @@ -234,6 +282,14 @@ static void trigger_fexit_setup(void) attach_bpf(ctx.skel->progs.bench_trigger_fexit); } +static void trigger_fexit_multi_setup(void) +{ + setup_ctx(); + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fexit_multi, true); + load_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_fexit_multi); +} + static void trigger_fmodret_setup(void) { setup_ctx(); @@ -246,6 +302,18 @@ static void trigger_fmodret_setup(void) attach_bpf(ctx.skel->progs.bench_trigger_fmodret); } +static void trigger_fmodret_multi_setup(void) +{ + setup_ctx(); + bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); + bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true); + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fmodret_multi, true); + load_ctx(); + /* override driver program */ + ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc); + attach_bpf(ctx.skel->progs.bench_trigger_fmodret_multi); +} + static void trigger_tp_setup(void) { setup_ctx(); @@ -512,8 +580,12 @@ BENCH_TRIG_KERNEL(kretprobe, "kretprobe"); BENCH_TRIG_KERNEL(kprobe_multi, "kprobe-multi"); BENCH_TRIG_KERNEL(kretprobe_multi, "kretprobe-multi"); BENCH_TRIG_KERNEL(fentry, "fentry"); +BENCH_TRIG_KERNEL(fentry_multi, "fentry-multi"); +BENCH_TRIG_KERNEL(fentry_multi_all, "fentry-multi-all"); BENCH_TRIG_KERNEL(fexit, "fexit"); +BENCH_TRIG_KERNEL(fexit_multi, "fexit-multi"); BENCH_TRIG_KERNEL(fmodret, "fmodret"); +BENCH_TRIG_KERNEL(fmodret_multi, "fmodret-multi"); BENCH_TRIG_KERNEL(tp, "tp"); BENCH_TRIG_KERNEL(rawtp, "rawtp"); diff --git a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh index a690f5a68b6b..48a7f809d053 100755 --- a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh +++ b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh @@ -5,6 +5,7 @@ set -eufo pipefail def_tests=( \ usermode-count kernel-count syscall-count \ fentry fexit fmodret \ + fentry-multi fentry-multi-all fexit-multi fmodret-multi \ rawtp tp \ kprobe kprobe-multi \ kretprobe kretprobe-multi \ diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c index 044a6d78923e..2ff1a7568080 100644 --- a/tools/testing/selftests/bpf/progs/trigger_bench.c +++ b/tools/testing/selftests/bpf/progs/trigger_bench.c @@ -111,6 +111,13 @@ int bench_trigger_fentry(void *ctx) return 0; } +SEC("?fentry.multi/bpf_get_numa_node_id") +int bench_trigger_fentry_multi(void *ctx) +{ + inc_counter(); + return 0; +} + SEC("?fexit/bpf_get_numa_node_id") int bench_trigger_fexit(void *ctx) { @@ -118,6 +125,14 @@ int bench_trigger_fexit(void *ctx) return 0; } +SEC("?fexit.multi/bpf_get_numa_node_id") +int bench_trigger_fexit_multi(void *ctx) +{ + inc_counter(); + + return 0; +} + SEC("?fmod_ret/bpf_modify_return_test_tp") int bench_trigger_fmodret(void *ctx) { @@ -125,6 +140,13 @@ int bench_trigger_fmodret(void *ctx) return -22; } +SEC("?fmod_ret.multi/bpf_modify_return_test_tp") +int bench_trigger_fmodret_multi(void *ctx) +{ + inc_counter(); + return -22; +} + SEC("?tp/bpf_test_run/bpf_trigger_tp") int bench_trigger_tp(void *ctx) { -- 2.39.5

5 months, 2 weeks

1
0
0 0

[PATCH bpf-next v2 17/18] selftests/bpf: add basic testcases for tracing_multi

by Menglong Dong

In this commit, we add some testcases for the following attach types: BPF_TRACE_FENTRY_MULTI BPF_TRACE_FEXIT_MULTI BPF_MODIFY_RETURN_MULTI We reuse the testings in fentry_test.c, fexit_test.c and modify_return.c by attach the tracing bpf prog as tracing_multi. We add some functions to skip for tracing progs to bpf_get_ksyms(). The functions that in the "btf_id_deny" should be skipped. What's more, the kernel can't find the right function address according to the btf type id when duplicated function name exist. So we skip such functions that we meet. The list is not whole, so we still can fail during attaching the FENTRY_MULTI to all the kernel functions. This is something that we need to fix in the feature. Signed-off-by: Menglong Dong <dongml2(a)chinatelecom.cn> --- tools/testing/selftests/bpf/Makefile | 2 +- .../selftests/bpf/prog_tests/fentry_fexit.c | 22 +- .../selftests/bpf/prog_tests/fentry_test.c | 79 +++++-- .../selftests/bpf/prog_tests/fexit_test.c | 79 +++++-- .../selftests/bpf/prog_tests/modify_return.c | 60 +++++ .../bpf/prog_tests/tracing_multi_link.c | 210 ++++++++++++++++++ .../selftests/bpf/progs/fentry_multi_empty.c | 13 ++ .../selftests/bpf/progs/tracing_multi_test.c | 181 +++++++++++++++ .../selftests/bpf/test_kmods/bpf_testmod.c | 24 ++ tools/testing/selftests/bpf/test_progs.c | 50 +++++ tools/testing/selftests/bpf/test_progs.h | 3 + tools/testing/selftests/bpf/trace_helpers.c | 69 ++++++ 12 files changed, 744 insertions(+), 48 deletions(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/tracing_multi_link.c create mode 100644 tools/testing/selftests/bpf/progs/fentry_multi_empty.c create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_test.c diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 4863106034df..1fa0da096262 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -496,7 +496,7 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \ test_subskeleton.skel.h test_subskeleton_lib.skel.h \ test_usdt.skel.h -LSKELS := fentry_test.c fexit_test.c fexit_sleep.c atomics.c \ +LSKELS := fexit_sleep.c atomics.c \ trace_printk.c trace_vprintk.c map_ptr_kern.c \ core_kern.c core_kern_overflow.c test_ringbuf.c \ test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c index 130f5b82d2e6..84cc8b669684 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c @@ -1,32 +1,32 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ #include <test_progs.h> -#include "fentry_test.lskel.h" -#include "fexit_test.lskel.h" +#include "fentry_test.skel.h" +#include "fexit_test.skel.h" void test_fentry_fexit(void) { - struct fentry_test_lskel *fentry_skel = NULL; - struct fexit_test_lskel *fexit_skel = NULL; + struct fentry_test *fentry_skel = NULL; + struct fexit_test *fexit_skel = NULL; __u64 *fentry_res, *fexit_res; int err, prog_fd, i; LIBBPF_OPTS(bpf_test_run_opts, topts); - fentry_skel = fentry_test_lskel__open_and_load(); + fentry_skel = fentry_test__open_and_load(); if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load")) goto close_prog; - fexit_skel = fexit_test_lskel__open_and_load(); + fexit_skel = fexit_test__open_and_load(); if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load")) goto close_prog; - err = fentry_test_lskel__attach(fentry_skel); + err = fentry_test__attach(fentry_skel); if (!ASSERT_OK(err, "fentry_attach")) goto close_prog; - err = fexit_test_lskel__attach(fexit_skel); + err = fexit_test__attach(fexit_skel); if (!ASSERT_OK(err, "fexit_attach")) goto close_prog; - prog_fd = fexit_skel->progs.test1.prog_fd; + prog_fd = bpf_program__fd(fexit_skel->progs.test1); err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "ipv6 test_run"); ASSERT_OK(topts.retval, "ipv6 test retval"); @@ -40,6 +40,6 @@ void test_fentry_fexit(void) } close_prog: - fentry_test_lskel__destroy(fentry_skel); - fexit_test_lskel__destroy(fexit_skel); + fentry_test__destroy(fentry_skel); + fexit_test__destroy(fexit_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c index aee1bc77a17f..9edd383feabd 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c @@ -1,26 +1,16 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ #include <test_progs.h> -#include "fentry_test.lskel.h" +#include "fentry_test.skel.h" #include "fentry_many_args.skel.h" -static int fentry_test_common(struct fentry_test_lskel *fentry_skel) +static int fentry_test_check(struct fentry_test *fentry_skel) { + LIBBPF_OPTS(bpf_test_run_opts, topts); int err, prog_fd, i; - int link_fd; __u64 *result; - LIBBPF_OPTS(bpf_test_run_opts, topts); - - err = fentry_test_lskel__attach(fentry_skel); - if (!ASSERT_OK(err, "fentry_attach")) - return err; - /* Check that already linked program can't be attached again. */ - link_fd = fentry_test_lskel__test1__attach(fentry_skel); - if (!ASSERT_LT(link_fd, 0, "fentry_attach_link")) - return -1; - - prog_fd = fentry_skel->progs.test1.prog_fd; + prog_fd = bpf_program__fd(fentry_skel->progs.test1); err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "test_run"); ASSERT_EQ(topts.retval, 0, "test_run"); @@ -31,7 +21,28 @@ static int fentry_test_common(struct fentry_test_lskel *fentry_skel) return -1; } - fentry_test_lskel__detach(fentry_skel); + return 0; +} + +static int fentry_test_common(struct fentry_test *fentry_skel) +{ + struct bpf_link *link; + int err; + + err = fentry_test__attach(fentry_skel); + if (!ASSERT_OK(err, "fentry_attach")) + return err; + + /* Check that already linked program can't be attached again. */ + link = bpf_program__attach(fentry_skel->progs.test1); + if (!ASSERT_ERR_PTR(link, "fentry_attach_link")) + return -1; + + err = fentry_test_check(fentry_skel); + if (!ASSERT_OK(err, "fentry_test_check")) + return err; + + fentry_test__detach(fentry_skel); /* zero results for re-attach test */ memset(fentry_skel->bss, 0, sizeof(*fentry_skel->bss)); @@ -40,10 +51,10 @@ static int fentry_test_common(struct fentry_test_lskel *fentry_skel) static void fentry_test(void) { - struct fentry_test_lskel *fentry_skel = NULL; + struct fentry_test *fentry_skel = NULL; int err; - fentry_skel = fentry_test_lskel__open_and_load(); + fentry_skel = fentry_test__open_and_load(); if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load")) goto cleanup; @@ -55,7 +66,7 @@ static void fentry_test(void) ASSERT_OK(err, "fentry_second_attach"); cleanup: - fentry_test_lskel__destroy(fentry_skel); + fentry_test__destroy(fentry_skel); } static void fentry_many_args(void) @@ -84,10 +95,42 @@ static void fentry_many_args(void) fentry_many_args__destroy(fentry_skel); } +static void fentry_multi_test(void) +{ + struct fentry_test *fentry_skel = NULL; + int err, prog_cnt; + + fentry_skel = fentry_test__open(); + if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_open")) + goto cleanup; + + prog_cnt = sizeof(fentry_skel->progs) / sizeof(long); + err = bpf_to_tracing_multi((void *)&fentry_skel->progs, prog_cnt); + if (!ASSERT_OK(err, "fentry_to_multi")) + goto cleanup; + + err = fentry_test__load(fentry_skel); + if (!ASSERT_OK(err, "fentry_skel_load")) + goto cleanup; + + err = bpf_attach_as_tracing_multi((void *)&fentry_skel->progs, + prog_cnt, + (void *)&fentry_skel->links); + if (!ASSERT_OK(err, "fentry_attach_multi")) + goto cleanup; + + err = fentry_test_check(fentry_skel); + ASSERT_OK(err, "fentry_first_attach"); +cleanup: + fentry_test__destroy(fentry_skel); +} + void test_fentry_test(void) { if (test__start_subtest("fentry")) fentry_test(); + if (test__start_subtest("fentry_multi")) + fentry_multi_test(); if (test__start_subtest("fentry_many_args")) fentry_many_args(); } diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c index 1c13007e37dd..5652d02b3ad9 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c @@ -1,26 +1,16 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ #include <test_progs.h> -#include "fexit_test.lskel.h" +#include "fexit_test.skel.h" #include "fexit_many_args.skel.h" -static int fexit_test_common(struct fexit_test_lskel *fexit_skel) +static int fexit_test_check(struct fexit_test *fexit_skel) { + LIBBPF_OPTS(bpf_test_run_opts, topts); int err, prog_fd, i; - int link_fd; __u64 *result; - LIBBPF_OPTS(bpf_test_run_opts, topts); - - err = fexit_test_lskel__attach(fexit_skel); - if (!ASSERT_OK(err, "fexit_attach")) - return err; - /* Check that already linked program can't be attached again. */ - link_fd = fexit_test_lskel__test1__attach(fexit_skel); - if (!ASSERT_LT(link_fd, 0, "fexit_attach_link")) - return -1; - - prog_fd = fexit_skel->progs.test1.prog_fd; + prog_fd = bpf_program__fd(fexit_skel->progs.test1); err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "test_run"); ASSERT_EQ(topts.retval, 0, "test_run"); @@ -31,7 +21,28 @@ static int fexit_test_common(struct fexit_test_lskel *fexit_skel) return -1; } - fexit_test_lskel__detach(fexit_skel); + return 0; +} + +static int fexit_test_common(struct fexit_test *fexit_skel) +{ + struct bpf_link *link; + int err; + + err = fexit_test__attach(fexit_skel); + if (!ASSERT_OK(err, "fexit_attach")) + return err; + + /* Check that already linked program can't be attached again. */ + link = bpf_program__attach(fexit_skel->progs.test1); + if (!ASSERT_ERR_PTR(link, "fexit_attach_link")) + return -1; + + err = fexit_test_check(fexit_skel); + if (!ASSERT_OK(err, "fexit_test_check")) + return err; + + fexit_test__detach(fexit_skel); /* zero results for re-attach test */ memset(fexit_skel->bss, 0, sizeof(*fexit_skel->bss)); @@ -40,10 +51,10 @@ static int fexit_test_common(struct fexit_test_lskel *fexit_skel) static void fexit_test(void) { - struct fexit_test_lskel *fexit_skel = NULL; + struct fexit_test *fexit_skel = NULL; int err; - fexit_skel = fexit_test_lskel__open_and_load(); + fexit_skel = fexit_test__open_and_load(); if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load")) goto cleanup; @@ -55,7 +66,7 @@ static void fexit_test(void) ASSERT_OK(err, "fexit_second_attach"); cleanup: - fexit_test_lskel__destroy(fexit_skel); + fexit_test__destroy(fexit_skel); } static void fexit_many_args(void) @@ -84,10 +95,42 @@ static void fexit_many_args(void) fexit_many_args__destroy(fexit_skel); } +static void fexit_test_multi(void) +{ + struct fexit_test *fexit_skel = NULL; + int err, prog_cnt; + + fexit_skel = fexit_test__open(); + if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_open")) + goto cleanup; + + prog_cnt = sizeof(fexit_skel->progs) / sizeof(long); + err = bpf_to_tracing_multi((void *)&fexit_skel->progs, prog_cnt); + if (!ASSERT_OK(err, "fexit_to_multi")) + goto cleanup; + + err = fexit_test__load(fexit_skel); + if (!ASSERT_OK(err, "fexit_skel_load")) + goto cleanup; + + err = bpf_attach_as_tracing_multi((void *)&fexit_skel->progs, + prog_cnt, + (void *)&fexit_skel->links); + if (!ASSERT_OK(err, "fexit_attach_multi")) + goto cleanup; + + err = fexit_test_check(fexit_skel); + ASSERT_OK(err, "fexit_first_attach"); +cleanup: + fexit_test__destroy(fexit_skel); +} + void test_fexit_test(void) { if (test__start_subtest("fexit")) fexit_test(); + if (test__start_subtest("fexit_multi")) + fexit_test_multi(); if (test__start_subtest("fexit_many_args")) fexit_many_args(); } diff --git a/tools/testing/selftests/bpf/prog_tests/modify_return.c b/tools/testing/selftests/bpf/prog_tests/modify_return.c index a70c99c2f8c8..3ca454379e90 100644 --- a/tools/testing/selftests/bpf/prog_tests/modify_return.c +++ b/tools/testing/selftests/bpf/prog_tests/modify_return.c @@ -49,6 +49,56 @@ static void run_test(__u32 input_retval, __u16 want_side_effect, __s16 want_ret) modify_return__destroy(skel); } +static void run_multi_test(__u32 input_retval, __u16 want_side_effect, __s16 want_ret) +{ + struct modify_return *skel = NULL; + int err, prog_fd, prog_cnt; + __u16 side_effect; + __s16 ret; + LIBBPF_OPTS(bpf_test_run_opts, topts); + + skel = modify_return__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + /* stack function args is not supported by tracing multi-link yet, + * so we only enable the bpf progs without stack function args. + */ + bpf_program__set_expected_attach_type(skel->progs.fentry_test, + BPF_TRACE_FENTRY_MULTI); + bpf_program__set_expected_attach_type(skel->progs.fexit_test, + BPF_TRACE_FEXIT_MULTI); + bpf_program__set_expected_attach_type(skel->progs.fmod_ret_test, + BPF_MODIFY_RETURN_MULTI); + + err = modify_return__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + prog_cnt = sizeof(skel->progs) / sizeof(long); + err = bpf_attach_as_tracing_multi((void *)&skel->progs, + prog_cnt, + (void *)&skel->links); + if (!ASSERT_OK(err, "modify_return__attach failed")) + goto cleanup; + + skel->bss->input_retval = input_retval; + prog_fd = bpf_program__fd(skel->progs.fmod_ret_test); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + + side_effect = UPPER(topts.retval); + ret = LOWER(topts.retval); + + ASSERT_EQ(ret, want_ret, "test_run ret"); + ASSERT_EQ(side_effect, want_side_effect, "modify_return side_effect"); + ASSERT_EQ(skel->bss->fentry_result, 1, "modify_return fentry_result"); + ASSERT_EQ(skel->bss->fexit_result, 1, "modify_return fexit_result"); + ASSERT_EQ(skel->bss->fmod_ret_result, 1, "modify_return fmod_ret_result"); +cleanup: + modify_return__destroy(skel); +} + /* TODO: conflict with get_func_ip_test */ void serial_test_modify_return(void) { @@ -59,3 +109,13 @@ void serial_test_modify_return(void) 0 /* want_side_effect */, -EINVAL * 2 /* want_ret */); } + +void serial_test_modify_return_multi(void) +{ + run_multi_test(0 /* input_retval */, + 2 /* want_side_effect */, + 33 /* want_ret */); + run_multi_test(-EINVAL /* input_retval */, + 1 /* want_side_effect */, + -EINVAL + 29 /* want_ret */); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_multi_link.c b/tools/testing/selftests/bpf/prog_tests/tracing_multi_link.c new file mode 100644 index 000000000000..1cbe6089472f --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/tracing_multi_link.c @@ -0,0 +1,210 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 ChinaTelecom */ + +#include <test_progs.h> + +#include "tracing_multi_test.skel.h" +#include "fentry_multi_empty.skel.h" + +static void test_run(struct tracing_multi_test *skel) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + int err, prog_fd; + + skel->bss->pid = getpid(); + prog_fd = bpf_program__fd(skel->progs.fentry_cookie_test1); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, 0, "test_run"); + + ASSERT_EQ(skel->bss->fentry_test1_result, 1, "fentry_test1_result"); + ASSERT_EQ(skel->bss->fentry_test2_result, 1, "fentry_test2_result"); + ASSERT_EQ(skel->bss->fentry_test3_result, 1, "fentry_test3_result"); + ASSERT_EQ(skel->bss->fentry_test4_result, 1, "fentry_test4_result"); + ASSERT_EQ(skel->bss->fentry_test5_result, 1, "fentry_test5_result"); + ASSERT_EQ(skel->bss->fentry_test6_result, 1, "fentry_test6_result"); + ASSERT_EQ(skel->bss->fentry_test7_result, 1, "fentry_test7_result"); + ASSERT_EQ(skel->bss->fentry_test8_result, 1, "fentry_test8_result"); +} + +static void test_skel_auto_api(void) +{ + struct tracing_multi_test *skel; + int err; + + skel = tracing_multi_test__open_and_load(); + if (!ASSERT_OK_PTR(skel, "tracing_multi_test__open_and_load")) + return; + + /* disable all programs that should fail */ + bpf_program__set_autoattach(skel->progs.fentry_fail_test1, false); + bpf_program__set_autoattach(skel->progs.fentry_fail_test2, false); + bpf_program__set_autoattach(skel->progs.fentry_fail_test3, false); + bpf_program__set_autoattach(skel->progs.fentry_fail_test4, false); + bpf_program__set_autoattach(skel->progs.fentry_fail_test5, false); + bpf_program__set_autoattach(skel->progs.fentry_fail_test6, false); + + bpf_program__set_autoattach(skel->progs.fexit_fail_test1, false); + bpf_program__set_autoattach(skel->progs.fexit_fail_test2, false); + bpf_program__set_autoattach(skel->progs.fexit_fail_test3, false); + + err = tracing_multi_test__attach(skel); + if (!ASSERT_OK(err, "tracing_multi_test__attach")) + goto cleanup; + + test_run(skel); + +cleanup: + tracing_multi_test__destroy(skel); +} + +static int attach_bpf(struct bpf_program *prog, struct bpf_link **link_ptr, + bool success) +{ + struct bpf_link *link; + int err; + + link = bpf_program__attach(prog); + err = libbpf_get_error(link); + if (!ASSERT_OK(success ? err : !err, "attach_bpf")) + return err; + *link_ptr = link; + + return 0; +} + +#define attach_skel_bpf(name, success) \ + attach_bpf(skel->progs.name, &skel->links.name, success) + +static void test_skel_manual_api(void) +{ + struct tracing_multi_test *skel; + + skel = tracing_multi_test__open_and_load(); + if (!ASSERT_OK_PTR(skel, "tracing_multi_test__open_and_load")) + return; + + if (attach_skel_bpf(fentry_success_test1, true) || + attach_skel_bpf(fentry_success_test2, true) || + attach_skel_bpf(fentry_success_test3, true) || + attach_skel_bpf(fentry_success_test4, true) || + attach_skel_bpf(fexit_success_test1, true) || + attach_skel_bpf(fexit_success_test2, true) || + attach_skel_bpf(fentry_fail_test1, false) || + attach_skel_bpf(fentry_fail_test2, false) || + attach_skel_bpf(fentry_fail_test3, false) || + attach_skel_bpf(fentry_fail_test4, false) || + attach_skel_bpf(fentry_fail_test5, false) || + attach_skel_bpf(fentry_fail_test6, false) || + attach_skel_bpf(fexit_fail_test1, false) || + attach_skel_bpf(fexit_fail_test2, false) || + attach_skel_bpf(fexit_fail_test3, false) || + attach_skel_bpf(fentry_cookie_test1, true)) + goto cleanup; + + test_run(skel); + +cleanup: + tracing_multi_test__destroy(skel); +} + +static void test_attach_api(void) +{ + LIBBPF_OPTS(bpf_trace_multi_opts, opts); + struct tracing_multi_test *skel; + struct bpf_link *link; + const char *syms[8] = { + "bpf_fentry_test1", + "bpf_fentry_test2", + "bpf_fentry_test3", + "bpf_fentry_test4", + "bpf_fentry_test5", + "bpf_fentry_test6", + "bpf_fentry_test7", + "bpf_fentry_test8", + }; + __u64 cookies[] = {1, 7, 2, 3, 4, 5, 6, 8}; + + skel = tracing_multi_test__open_and_load(); + if (!ASSERT_OK_PTR(skel, "tracing_multi_test__open_and_load")) + return; + + opts.syms = syms; + opts.cookies = cookies; + opts.cnt = ARRAY_SIZE(syms); + link = bpf_program__attach_trace_multi_opts(skel->progs.fentry_cookie_test1, + &opts); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_trace_multi_opts")) + goto cleanup; + skel->links.fentry_cookie_test1 = link; + + skel->bss->test_cookie = true; + test_run(skel); +cleanup: + tracing_multi_test__destroy(skel); +} + +static void test_attach_bench(bool kernel) +{ + LIBBPF_OPTS(bpf_trace_multi_opts, opts); + struct fentry_multi_empty *skel; + long attach_start_ns, attach_end_ns; + long detach_start_ns, detach_end_ns; + double attach_delta, detach_delta; + struct bpf_link *link = NULL; + char **syms = NULL; + size_t cnt = 0; + + if (!ASSERT_OK(bpf_get_ksyms(&syms, &cnt, kernel), "get_syms")) + return; + + skel = fentry_multi_empty__open_and_load(); + if (!ASSERT_OK_PTR(skel, "fentry_multi_empty__open_and_load")) + goto cleanup; + + opts.syms = (const char **) syms; + opts.cnt = cnt; + opts.skip_invalid = true; + + attach_start_ns = get_time_ns(); + link = bpf_program__attach_trace_multi_opts(skel->progs.fentry_multi_empty, + &opts); + attach_end_ns = get_time_ns(); + + if (!ASSERT_OK_PTR(link, "bpf_program__attach_trace_multi_opts")) + return; + + detach_start_ns = get_time_ns(); + bpf_link__destroy(link); + detach_end_ns = get_time_ns(); + + attach_delta = (attach_end_ns - attach_start_ns) / 1000000000.0; + detach_delta = (detach_end_ns - detach_start_ns) / 1000000000.0; + + printf("%s: found %lu functions\n", __func__, opts.cnt); + printf("%s: attached in %7.3lfs\n", __func__, attach_delta); + printf("%s: detached in %7.3lfs\n", __func__, detach_delta); + +cleanup: + fentry_multi_empty__destroy(skel); + if (syms) + free(syms); +} + +void serial_test_tracing_multi_attach_bench(void) +{ + if (test__start_subtest("kernel")) + test_attach_bench(true); + if (test__start_subtest("modules")) + test_attach_bench(false); +} + +void test_tracing_multi_attach_test(void) +{ + if (test__start_subtest("skel_auto_api")) + test_skel_auto_api(); + if (test__start_subtest("skel_manual_api")) + test_skel_manual_api(); + if (test__start_subtest("attach_api")) + test_attach_api(); +} diff --git a/tools/testing/selftests/bpf/progs/fentry_multi_empty.c b/tools/testing/selftests/bpf/progs/fentry_multi_empty.c new file mode 100644 index 000000000000..a09ba216dff8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/fentry_multi_empty.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 ChinaTelecom */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +SEC("fentry.multi/bpf_fentry_test1") +int BPF_PROG(fentry_multi_empty) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/tracing_multi_test.c b/tools/testing/selftests/bpf/progs/tracing_multi_test.c new file mode 100644 index 000000000000..fa27851896b9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tracing_multi_test.c @@ -0,0 +1,181 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 ChinaTelecom */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct bpf_testmod_struct_arg_1 { + int a; +}; +struct bpf_testmod_struct_arg_2 { + long a; + long b; +}; + +__u64 test_result = 0; + +int pid = 0; +int test_cookie = 0; + +__u64 fentry_test1_result = 0; +__u64 fentry_test2_result = 0; +__u64 fentry_test3_result = 0; +__u64 fentry_test4_result = 0; +__u64 fentry_test5_result = 0; +__u64 fentry_test6_result = 0; +__u64 fentry_test7_result = 0; +__u64 fentry_test8_result = 0; + +extern const void bpf_fentry_test1 __ksym; +extern const void bpf_fentry_test2 __ksym; +extern const void bpf_fentry_test3 __ksym; +extern const void bpf_fentry_test4 __ksym; +extern const void bpf_fentry_test5 __ksym; +extern const void bpf_fentry_test6 __ksym; +extern const void bpf_fentry_test7 __ksym; +extern const void bpf_fentry_test8 __ksym; + +SEC("fentry.multi/bpf_testmod_test_struct_arg_1,bpf_testmod_test_struct_arg_13") +int BPF_PROG2(fentry_success_test1, struct bpf_testmod_struct_arg_2, a) +{ + test_result = a.a + a.b; + return 0; +} + +SEC("fentry.multi/bpf_testmod_test_struct_arg_2,bpf_testmod_test_struct_arg_10") +int BPF_PROG2(fentry_success_test2, int, a, struct bpf_testmod_struct_arg_2, b) +{ + test_result = a + b.a + b.b; + return 0; +} + +SEC("fentry.multi/bpf_testmod_test_struct_arg_1,bpf_testmod_test_struct_arg_4") +int BPF_PROG2(fentry_success_test3, struct bpf_testmod_struct_arg_2, a, int, b, + int, c) +{ + test_result = c; + return 0; +} + +SEC("fentry.multi/bpf_testmod_test_struct_arg_1,bpf_testmod_test_struct_arg_2") +int BPF_PROG2(fentry_success_test4, struct bpf_testmod_struct_arg_2, a, int, b, + int, c) +{ + test_result = c; + return 0; +} + +SEC("fentry.multi/bpf_testmod_test_struct_arg_1,bpf_testmod_test_struct_arg_1") +int BPF_PROG2(fentry_fail_test1, struct bpf_testmod_struct_arg_2, a) +{ + test_result = a.a + a.b; + return 0; +} + +SEC("fentry.multi/bpf_testmod_test_struct_arg_1,bpf_testmod_test_struct_arg_2") +int BPF_PROG2(fentry_fail_test2, struct bpf_testmod_struct_arg_2, a) +{ + test_result = a.a + a.b; + return 0; +} + +SEC("fentry.multi/bpf_testmod_test_struct_arg_1,bpf_testmod_test_struct_arg_4") +int BPF_PROG2(fentry_fail_test3, struct bpf_testmod_struct_arg_2, a) +{ + test_result = a.a + a.b; + return 0; +} + +SEC("fentry.multi/bpf_testmod_test_struct_arg_2,bpf_testmod_test_struct_arg_2") +int BPF_PROG2(fentry_fail_test4, int, a, struct bpf_testmod_struct_arg_2, b) +{ + test_result = a + b.a + b.b; + return 0; +} + +SEC("fentry.multi/bpf_testmod_test_struct_arg_2,bpf_testmod_test_struct_arg_13") +int BPF_PROG2(fentry_fail_test5, int, a, struct bpf_testmod_struct_arg_2, b) +{ + test_result = a + b.a + b.b; + return 0; +} + +SEC("fentry.multi/bpf_testmod_test_struct_arg_1,bpf_testmod_test_struct_arg_12") +int BPF_PROG2(fentry_fail_test6, struct bpf_testmod_struct_arg_2, a, int, b, + int, c) +{ + test_result = c; + return 0; +} + +SEC("fexit.multi/bpf_testmod_test_struct_arg_1,bpf_testmod_test_struct_arg_2,bpf_testmod_test_struct_arg_3") +int BPF_PROG2(fexit_success_test1, struct bpf_testmod_struct_arg_2, a, int, b, + int, c, int, retval) +{ + test_result = retval; + return 0; +} + +SEC("fexit.multi/bpf_testmod_test_struct_arg_2,bpf_testmod_test_struct_arg_12") +int BPF_PROG2(fexit_success_test2, int, a, struct bpf_testmod_struct_arg_2, b, + int, c, int, retval) +{ + test_result = a + b.a + b.b + retval; + return 0; +} + +SEC("fexit.multi/bpf_testmod_test_struct_arg_1,bpf_testmod_test_struct_arg_4") +int BPF_PROG2(fexit_fail_test1, struct bpf_testmod_struct_arg_2, a, int, b, + int, c, int, retval) +{ + test_result = retval; + return 0; +} + +SEC("fexit.multi/bpf_testmod_test_struct_arg_2,bpf_testmod_test_struct_arg_10") +int BPF_PROG2(fexit_fail_test2, int, a, struct bpf_testmod_struct_arg_2, b, + int, c, int, retval) +{ + test_result = a + b.a + b.b + retval; + return 0; +} + +SEC("fexit.multi/bpf_testmod_test_struct_arg_2,bpf_testmod_test_struct_arg_11") +int BPF_PROG2(fexit_fail_test3, int, a, struct bpf_testmod_struct_arg_2, b, + int, c, int, retval) +{ + test_result = a + b.a + b.b + retval; + return 0; +} + +static void tracing_multi_check_cookie(unsigned long long *ctx) +{ + if (bpf_get_current_pid_tgid() >> 32 != pid) + return; + + __u64 cookie = test_cookie ? bpf_get_attach_cookie(ctx) : 0; + __u64 addr = bpf_get_func_ip(ctx); + +#define SET(__var, __addr, __cookie) ({ \ + if (((const void *) addr == __addr) && \ + (!test_cookie || (cookie == __cookie))) \ + __var = 1; \ +}) + SET(fentry_test1_result, &bpf_fentry_test1, 1); + SET(fentry_test2_result, &bpf_fentry_test2, 7); + SET(fentry_test3_result, &bpf_fentry_test3, 2); + SET(fentry_test4_result, &bpf_fentry_test4, 3); + SET(fentry_test5_result, &bpf_fentry_test5, 4); + SET(fentry_test6_result, &bpf_fentry_test6, 5); + SET(fentry_test7_result, &bpf_fentry_test7, 6); + SET(fentry_test8_result, &bpf_fentry_test8, 8); +} + +SEC("fentry.multi/bpf_fentry_test1,bpf_fentry_test2,bpf_fentry_test3,bpf_fentry_test4,bpf_fentry_test5,bpf_fentry_test6,bpf_fentry_test7,bpf_fentry_test8") +int BPF_PROG(fentry_cookie_test1) +{ + tracing_multi_check_cookie(ctx); + return 0; +} diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c index e9e918cdf31f..07ea1d5d3795 100644 --- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c @@ -128,6 +128,30 @@ bpf_testmod_test_struct_arg_9(u64 a, void *b, short c, int d, void *e, char f, return bpf_testmod_test_struct_arg_result; } +noinline int +bpf_testmod_test_struct_arg_10(int a, struct bpf_testmod_struct_arg_2 b) { + bpf_testmod_test_struct_arg_result = a + b.a + b.b; + return bpf_testmod_test_struct_arg_result; +} + +noinline struct bpf_testmod_struct_arg_2 * +bpf_testmod_test_struct_arg_11(int a, struct bpf_testmod_struct_arg_2 b, int c) { + bpf_testmod_test_struct_arg_result = a + b.a + b.b + c; + return (void *)bpf_testmod_test_struct_arg_result; +} + +noinline int +bpf_testmod_test_struct_arg_12(int a, struct bpf_testmod_struct_arg_2 b, int *c) { + bpf_testmod_test_struct_arg_result = a + b.a + b.b + *c; + return bpf_testmod_test_struct_arg_result; +} + +noinline int +bpf_testmod_test_struct_arg_13(struct bpf_testmod_struct_arg_2 b) { + bpf_testmod_test_struct_arg_result = b.a + b.b; + return bpf_testmod_test_struct_arg_result; +} + noinline int bpf_testmod_test_arg_ptr_to_struct(struct bpf_testmod_struct_arg_1 *a) { bpf_testmod_test_struct_arg_result = a->a; diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 309d9d4a8ace..533b714f1ca6 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -667,6 +667,56 @@ int bpf_find_map(const char *test, struct bpf_object *obj, const char *name) return bpf_map__fd(map); } +int bpf_to_tracing_multi(struct bpf_program **progs, int prog_cnt) +{ + enum bpf_attach_type type; + int i, err; + + for (i = 0; i < prog_cnt; i++) { + type = bpf_program__get_expected_attach_type(progs[i]); + if (type == BPF_TRACE_FENTRY) + type = BPF_TRACE_FENTRY_MULTI; + else if (type == BPF_TRACE_FEXIT) + type = BPF_TRACE_FEXIT_MULTI; + else if (type == BPF_MODIFY_RETURN) + type = BPF_MODIFY_RETURN_MULTI; + else + continue; + err = bpf_program__set_expected_attach_type(progs[i], type); + if (err) + return err; + } + + return 0; +} + +int bpf_attach_as_tracing_multi(struct bpf_program **progs, int prog_cnt, + struct bpf_link **link) +{ + struct bpf_link *__link; + int err, type; + + for (int i = 0; i < prog_cnt; i++) { + LIBBPF_OPTS(bpf_trace_multi_opts, opts); + + type = bpf_program__get_expected_attach_type(progs[i]); + if (type != BPF_TRACE_FENTRY_MULTI && + type != BPF_TRACE_FEXIT_MULTI && + type != BPF_MODIFY_RETURN_MULTI) + continue; + + opts.attach_tracing = true; + __link = bpf_program__attach_trace_multi_opts(progs[i], &opts); + err = libbpf_get_error(link); + if (err) + return err; + + link[i] = __link; + } + + return 0; +} + int compare_map_keys(int map1_fd, int map2_fd) { __u32 key, next_key; diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index df2222a1806f..7e30c6dbf35c 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -496,6 +496,9 @@ int trigger_module_test_write(int write_sz); int write_sysctl(const char *sysctl, const char *value); int get_bpf_max_tramp_links_from(struct btf *btf); int get_bpf_max_tramp_links(void); +int bpf_to_tracing_multi(struct bpf_program **progs, int prog_cnt); +int bpf_attach_as_tracing_multi(struct bpf_program **progs, int prog_cnt, + struct bpf_link **link); struct netns_obj; struct netns_obj *netns_new(const char *name, bool open); diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index d24baf244d1f..a9e9dd3be226 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -559,6 +559,75 @@ static bool skip_entry(char *name) if (!strncmp(name, "__ftrace_invalid_address__", sizeof("__ftrace_invalid_address__") - 1)) return true; + + /* skip functions in "btf_id_deny" */ + if (!strcmp(name, "migrate_disable")) + return true; + if (!strcmp(name, "migrate_enable")) + return true; + if (!strcmp(name, "rcu_read_unlock_strict")) + return true; + if (!strcmp(name, "preempt_count_add")) + return true; + if (!strcmp(name, "preempt_count_sub")) + return true; + if (!strcmp(name, "__rcu_read_lock")) + return true; + if (!strcmp(name, "__rcu_read_unlock")) + return true; + + /* Following symbols have multi definition in kallsyms, take + * "t_next" for example: + * + * ffffffff813c10d0 t t_next + * ffffffff813d31b0 t t_next + * ffffffff813e06b0 t t_next + * ffffffff813eb360 t t_next + * ffffffff81613360 t t_next + * + * but only one of them have corresponding mrecord: + * ffffffff81613364 t_next + * + * The kernel search the target function address by the symbol + * name "t_next" with kallsyms_lookup_name() during attaching + * and the function "0xffffffff813c10d0" can be matched, which + * doesn't have a corresponding mrecord. And this will make + * the attach failing. Skip the functions like this. + * + * The list maybe not whole, so we still can fail......We need a + * way to make the whole things right. Yes, we need fix it :/ + */ + if (!strcmp(name, "kill_pid_usb_asyncio")) + return true; + if (!strcmp(name, "t_next")) + return true; + if (!strcmp(name, "t_stop")) + return true; + if (!strcmp(name, "t_start")) + return true; + if (!strcmp(name, "p_next")) + return true; + if (!strcmp(name, "p_stop")) + return true; + if (!strcmp(name, "p_start")) + return true; + if (!strcmp(name, "mem32_serial_out")) + return true; + if (!strcmp(name, "mem32_serial_in")) + return true; + if (!strcmp(name, "io_serial_in")) + return true; + if (!strcmp(name, "io_serial_out")) + return true; + if (!strcmp(name, "event_callback")) + return true; + if (!strcmp(name, "amd_pmu_init")) + return true; + if (!strcmp(name, "sync_regs")) + return true; + if (!strcmp(name, "empty")) + return true; + return false; } -- 2.39.5

5 months, 2 weeks

1
0
0 0

[PATCH bpf-next v2 16/18] selftests/bpf: move get_ksyms and get_addrs to trace_helpers.c

by Menglong Dong

We need to get all the kernel function that can be traced sometimes, so we move the get_syms() and get_addrs() in kprobe_multi_test.c to trace_helpers.c and rename it to bpf_get_ksyms() and bpf_get_addrs(). Signed-off-by: Menglong Dong <dongml2(a)chinatelecom.cn> --- .../bpf/prog_tests/kprobe_multi_test.c | 220 +----------------- tools/testing/selftests/bpf/trace_helpers.c | 214 +++++++++++++++++ tools/testing/selftests/bpf/trace_helpers.h | 3 + 3 files changed, 220 insertions(+), 217 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index e19ef509ebf8..171706e78da8 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -422,220 +422,6 @@ static void test_unique_match(void) kprobe_multi__destroy(skel); } -static size_t symbol_hash(long key, void *ctx __maybe_unused) -{ - return str_hash((const char *) key); -} - -static bool symbol_equal(long key1, long key2, void *ctx __maybe_unused) -{ - return strcmp((const char *) key1, (const char *) key2) == 0; -} - -static bool is_invalid_entry(char *buf, bool kernel) -{ - if (kernel && strchr(buf, '[')) - return true; - if (!kernel && !strchr(buf, '[')) - return true; - return false; -} - -static bool skip_entry(char *name) -{ - /* - * We attach to almost all kernel functions and some of them - * will cause 'suspicious RCU usage' when fprobe is attached - * to them. Filter out the current culprits - arch_cpu_idle - * default_idle and rcu_* functions. - */ - if (!strcmp(name, "arch_cpu_idle")) - return true; - if (!strcmp(name, "default_idle")) - return true; - if (!strncmp(name, "rcu_", 4)) - return true; - if (!strcmp(name, "bpf_dispatcher_xdp_func")) - return true; - if (!strncmp(name, "__ftrace_invalid_address__", - sizeof("__ftrace_invalid_address__") - 1)) - return true; - return false; -} - -/* Do comparision by ignoring '.llvm.<hash>' suffixes. */ -static int compare_name(const char *name1, const char *name2) -{ - const char *res1, *res2; - int len1, len2; - - res1 = strstr(name1, ".llvm."); - res2 = strstr(name2, ".llvm."); - len1 = res1 ? res1 - name1 : strlen(name1); - len2 = res2 ? res2 - name2 : strlen(name2); - - if (len1 == len2) - return strncmp(name1, name2, len1); - if (len1 < len2) - return strncmp(name1, name2, len1) <= 0 ? -1 : 1; - return strncmp(name1, name2, len2) >= 0 ? 1 : -1; -} - -static int load_kallsyms_compare(const void *p1, const void *p2) -{ - return compare_name(((const struct ksym *)p1)->name, ((const struct ksym *)p2)->name); -} - -static int search_kallsyms_compare(const void *p1, const struct ksym *p2) -{ - return compare_name(p1, p2->name); -} - -static int get_syms(char ***symsp, size_t *cntp, bool kernel) -{ - size_t cap = 0, cnt = 0; - char *name = NULL, *ksym_name, **syms = NULL; - struct hashmap *map; - struct ksyms *ksyms; - struct ksym *ks; - char buf[256]; - FILE *f; - int err = 0; - - ksyms = load_kallsyms_custom_local(load_kallsyms_compare); - if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_custom_local")) - return -EINVAL; - - /* - * The available_filter_functions contains many duplicates, - * but other than that all symbols are usable in kprobe multi - * interface. - * Filtering out duplicates by using hashmap__add, which won't - * add existing entry. - */ - - if (access("/sys/kernel/tracing/trace", F_OK) == 0) - f = fopen("/sys/kernel/tracing/available_filter_functions", "r"); - else - f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r"); - - if (!f) - return -EINVAL; - - map = hashmap__new(symbol_hash, symbol_equal, NULL); - if (IS_ERR(map)) { - err = libbpf_get_error(map); - goto error; - } - - while (fgets(buf, sizeof(buf), f)) { - if (is_invalid_entry(buf, kernel)) - continue; - - free(name); - if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1) - continue; - if (skip_entry(name)) - continue; - - ks = search_kallsyms_custom_local(ksyms, name, search_kallsyms_compare); - if (!ks) { - err = -EINVAL; - goto error; - } - - ksym_name = ks->name; - err = hashmap__add(map, ksym_name, 0); - if (err == -EEXIST) { - err = 0; - continue; - } - if (err) - goto error; - - err = libbpf_ensure_mem((void **) &syms, &cap, - sizeof(*syms), cnt + 1); - if (err) - goto error; - - syms[cnt++] = ksym_name; - } - - *symsp = syms; - *cntp = cnt; - -error: - free(name); - fclose(f); - hashmap__free(map); - if (err) - free(syms); - return err; -} - -static int get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel) -{ - unsigned long *addr, *addrs, *tmp_addrs; - int err = 0, max_cnt, inc_cnt; - char *name = NULL; - size_t cnt = 0; - char buf[256]; - FILE *f; - - if (access("/sys/kernel/tracing/trace", F_OK) == 0) - f = fopen("/sys/kernel/tracing/available_filter_functions_addrs", "r"); - else - f = fopen("/sys/kernel/debug/tracing/available_filter_functions_addrs", "r"); - - if (!f) - return -ENOENT; - - /* In my local setup, the number of entries is 50k+ so Let us initially - * allocate space to hold 64k entries. If 64k is not enough, incrementally - * increase 1k each time. - */ - max_cnt = 65536; - inc_cnt = 1024; - addrs = malloc(max_cnt * sizeof(long)); - if (addrs == NULL) { - err = -ENOMEM; - goto error; - } - - while (fgets(buf, sizeof(buf), f)) { - if (is_invalid_entry(buf, kernel)) - continue; - - free(name); - if (sscanf(buf, "%p %ms$*[^\n]\n", &addr, &name) != 2) - continue; - if (skip_entry(name)) - continue; - - if (cnt == max_cnt) { - max_cnt += inc_cnt; - tmp_addrs = realloc(addrs, max_cnt); - if (!tmp_addrs) { - err = -ENOMEM; - goto error; - } - addrs = tmp_addrs; - } - - addrs[cnt++] = (unsigned long)addr; - } - - *addrsp = addrs; - *cntp = cnt; - -error: - free(name); - fclose(f); - if (err) - free(addrs); - return err; -} - static void do_bench_test(struct kprobe_multi_empty *skel, struct bpf_kprobe_multi_opts *opts) { long attach_start_ns, attach_end_ns; @@ -670,7 +456,7 @@ static void test_kprobe_multi_bench_attach(bool kernel) char **syms = NULL; size_t cnt = 0; - if (!ASSERT_OK(get_syms(&syms, &cnt, kernel), "get_syms")) + if (!ASSERT_OK(bpf_get_ksyms(&syms, &cnt, kernel), "bpf_get_ksyms")) return; skel = kprobe_multi_empty__open_and_load(); @@ -696,13 +482,13 @@ static void test_kprobe_multi_bench_attach_addr(bool kernel) size_t cnt = 0; int err; - err = get_addrs(&addrs, &cnt, kernel); + err = bpf_get_addrs(&addrs, &cnt, kernel); if (err == -ENOENT) { test__skip(); return; } - if (!ASSERT_OK(err, "get_addrs")) + if (!ASSERT_OK(err, "bpf_get_addrs")) return; skel = kprobe_multi_empty__open_and_load(); diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 81943c6254e6..d24baf244d1f 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -17,6 +17,7 @@ #include <linux/limits.h> #include <libelf.h> #include <gelf.h> +#include "bpf/hashmap.h" #include "bpf/libbpf_internal.h" #define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe" @@ -519,3 +520,216 @@ void read_trace_pipe(void) { read_trace_pipe_iter(trace_pipe_cb, NULL, 0); } + +static size_t symbol_hash(long key, void *ctx __maybe_unused) +{ + return str_hash((const char *) key); +} + +static bool symbol_equal(long key1, long key2, void *ctx __maybe_unused) +{ + return strcmp((const char *) key1, (const char *) key2) == 0; +} + +static bool is_invalid_entry(char *buf, bool kernel) +{ + if (kernel && strchr(buf, '[')) + return true; + if (!kernel && !strchr(buf, '[')) + return true; + return false; +} + +static bool skip_entry(char *name) +{ + /* + * We attach to almost all kernel functions and some of them + * will cause 'suspicious RCU usage' when fprobe is attached + * to them. Filter out the current culprits - arch_cpu_idle + * default_idle and rcu_* functions. + */ + if (!strcmp(name, "arch_cpu_idle")) + return true; + if (!strcmp(name, "default_idle")) + return true; + if (!strncmp(name, "rcu_", 4)) + return true; + if (!strcmp(name, "bpf_dispatcher_xdp_func")) + return true; + if (!strncmp(name, "__ftrace_invalid_address__", + sizeof("__ftrace_invalid_address__") - 1)) + return true; + return false; +} + +/* Do comparison by ignoring '.llvm.<hash>' suffixes. */ +static int compare_name(const char *name1, const char *name2) +{ + const char *res1, *res2; + int len1, len2; + + res1 = strstr(name1, ".llvm."); + res2 = strstr(name2, ".llvm."); + len1 = res1 ? res1 - name1 : strlen(name1); + len2 = res2 ? res2 - name2 : strlen(name2); + + if (len1 == len2) + return strncmp(name1, name2, len1); + if (len1 < len2) + return strncmp(name1, name2, len1) <= 0 ? -1 : 1; + return strncmp(name1, name2, len2) >= 0 ? 1 : -1; +} + +static int load_kallsyms_compare(const void *p1, const void *p2) +{ + return compare_name(((const struct ksym *)p1)->name, ((const struct ksym *)p2)->name); +} + +static int search_kallsyms_compare(const void *p1, const struct ksym *p2) +{ + return compare_name(p1, p2->name); +} + +int bpf_get_ksyms(char ***symsp, size_t *cntp, bool kernel) +{ + size_t cap = 0, cnt = 0; + char *name = NULL, *ksym_name, **syms = NULL; + struct hashmap *map; + struct ksyms *ksyms; + struct ksym *ks; + char buf[256]; + FILE *f; + int err = 0; + + ksyms = load_kallsyms_custom_local(load_kallsyms_compare); + if (!ksyms) + return -EINVAL; + + /* + * The available_filter_functions contains many duplicates, + * but other than that all symbols are usable to trace. + * Filtering out duplicates by using hashmap__add, which won't + * add existing entry. + */ + + if (access("/sys/kernel/tracing/trace", F_OK) == 0) + f = fopen("/sys/kernel/tracing/available_filter_functions", "r"); + else + f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r"); + + if (!f) + return -EINVAL; + + map = hashmap__new(symbol_hash, symbol_equal, NULL); + if (IS_ERR(map)) { + err = libbpf_get_error(map); + goto error; + } + + while (fgets(buf, sizeof(buf), f)) { + if (is_invalid_entry(buf, kernel)) + continue; + + free(name); + if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1) + continue; + if (skip_entry(name)) + continue; + + ks = search_kallsyms_custom_local(ksyms, name, search_kallsyms_compare); + if (!ks) { + err = -EINVAL; + goto error; + } + + ksym_name = ks->name; + err = hashmap__add(map, ksym_name, 0); + if (err == -EEXIST) { + err = 0; + continue; + } + if (err) + goto error; + + err = libbpf_ensure_mem((void **) &syms, &cap, + sizeof(*syms), cnt + 1); + if (err) + goto error; + + syms[cnt++] = ksym_name; + } + + *symsp = syms; + *cntp = cnt; + +error: + free(name); + fclose(f); + hashmap__free(map); + if (err) + free(syms); + return err; +} + +int bpf_get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel) +{ + unsigned long *addr, *addrs, *tmp_addrs; + int err = 0, max_cnt, inc_cnt; + char *name = NULL; + size_t cnt = 0; + char buf[256]; + FILE *f; + + if (access("/sys/kernel/tracing/trace", F_OK) == 0) + f = fopen("/sys/kernel/tracing/available_filter_functions_addrs", "r"); + else + f = fopen("/sys/kernel/debug/tracing/available_filter_functions_addrs", "r"); + + if (!f) + return -ENOENT; + + /* In my local setup, the number of entries is 50k+ so Let us initially + * allocate space to hold 64k entries. If 64k is not enough, incrementally + * increase 1k each time. + */ + max_cnt = 65536; + inc_cnt = 1024; + addrs = malloc(max_cnt * sizeof(long)); + if (addrs == NULL) { + err = -ENOMEM; + goto error; + } + + while (fgets(buf, sizeof(buf), f)) { + if (is_invalid_entry(buf, kernel)) + continue; + + free(name); + if (sscanf(buf, "%p %ms$*[^\n]\n", &addr, &name) != 2) + continue; + if (skip_entry(name)) + continue; + + if (cnt == max_cnt) { + max_cnt += inc_cnt; + tmp_addrs = realloc(addrs, max_cnt); + if (!tmp_addrs) { + err = -ENOMEM; + goto error; + } + addrs = tmp_addrs; + } + + addrs[cnt++] = (unsigned long)addr; + } + + *addrsp = addrs; + *cntp = cnt; + +error: + free(name); + fclose(f); + if (err) + free(addrs); + return err; +} diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h index 2ce873c9f9aa..9437bdd4afa5 100644 --- a/tools/testing/selftests/bpf/trace_helpers.h +++ b/tools/testing/selftests/bpf/trace_helpers.h @@ -41,4 +41,7 @@ ssize_t get_rel_offset(uintptr_t addr); int read_build_id(const char *path, char *build_id, size_t size); +int bpf_get_ksyms(char ***symsp, size_t *cntp, bool kernel); +int bpf_get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel); + #endif -- 2.39.5

5 months, 2 weeks

1
0
0 0

[PATCH v2 0/4] kselftest/arm64: Add coverage for the interaction of vfork() and GCS

by Mark Brown

I had cause to look at the vfork() support for GCS and realised that we don't have any direct test coverage, this series does so by adding vfork() to nolibc and then using that in basic-gcs to provide some simple vfork() coverage. Signed-off-by: Mark Brown <broonie(a)kernel.org> --- Changes in v2: - Add replacement of ifdef with if defined() in nolibc since the code doesn't reflect the coding style. - Remove check for arch specific vfork(). - Link to v1: https://lore.kernel.org/r/20250609-arm64-gcs-vfork-exit-v1-0-baad0f085747@k… --- Mark Brown (4): tools/nolibc: Replace ifdef with if defined() in sys.h tools/nolibc: Provide vfork() kselftest/arm64: Add a test for vfork() with GCS selftests/nolibc: Add coverage of vfork() tools/include/nolibc/sys.h | 57 +++++++++++++++++------- tools/testing/selftests/arm64/gcs/basic-gcs.c | 63 +++++++++++++++++++++++++++ tools/testing/selftests/nolibc/nolibc-test.c | 23 ++++++++-- 3 files changed, 124 insertions(+), 19 deletions(-) --- base-commit: 19272b37aa4f83ca52bdf9c16d5d81bdd1354494 change-id: 20250528-arm64-gcs-vfork-exit-4a7daf7652ee Best regards, -- Mark Brown <broonie(a)kernel.org>

5 months, 2 weeks

3
7
0 0

[PATCH] selftests: net: fix resource leak in napi_id_helper.c

by Malaya Kumar Rout

Resolve minor resource leaks reported by cppcheck in napi_id_helper.c cppcheck output before this patch: tools/testing/selftests/drivers/net/napi_id_helper.c:37:3: error: Resource leak: server [resourceLeak] tools/testing/selftests/drivers/net/napi_id_helper.c:46:3: error: Resource leak: server [resourceLeak] tools/testing/selftests/drivers/net/napi_id_helper.c:51:3: error: Resource leak: server [resourceLeak] tools/testing/selftests/drivers/net/napi_id_helper.c:59:3: error: Resource leak: server [resourceLeak] tools/testing/selftests/drivers/net/napi_id_helper.c:67:3: error: Resource leak: server [resourceLeak] tools/testing/selftests/drivers/net/napi_id_helper.c:76:3: error: Resource leak: server [resourceLeak] cppcheck output after this patch: No resource leaks found Signed-off-by: Malaya Kumar Rout <malayarout91(a)gmail.com> --- tools/testing/selftests/drivers/net/napi_id_helper.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/testing/selftests/drivers/net/napi_id_helper.c b/tools/testing/selftests/drivers/net/napi_id_helper.c index eecd610c2109..1441b8d49b91 100644 --- a/tools/testing/selftests/drivers/net/napi_id_helper.c +++ b/tools/testing/selftests/drivers/net/napi_id_helper.c @@ -34,6 +34,7 @@ int main(int argc, char *argv[]) if (setsockopt(server, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt))) { perror("setsockopt"); + close(server); return 1; } @@ -43,11 +44,13 @@ int main(int argc, char *argv[]) if (bind(server, (struct sockaddr *)&address, sizeof(address)) < 0) { perror("bind failed"); + close(server); return 1; } if (listen(server, 1) < 0) { perror("listen"); + close(server); return 1; } @@ -56,6 +59,7 @@ int main(int argc, char *argv[]) client = accept(server, NULL, 0); if (client < 0) { perror("accept"); + close(server); return 1; } @@ -64,6 +68,7 @@ int main(int argc, char *argv[]) &optlen); if (ret != 0) { perror("getsockopt"); + close(server); return 1; } @@ -73,6 +78,7 @@ int main(int argc, char *argv[]) if (napi_id == 0) { fprintf(stderr, "napi ID is 0\n"); + close(server); return 1; } -- 2.43.0

5 months, 2 weeks

5
6
0 0

[PATCH] global: fix misapplications of "awhile"

by Ahelenia Ziemiańska

Of these: 7 "for a while" typos 5 "take a while" typos 1 misreading of "once in a while"? 3 awhiles used correctly remain in the tree Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli(a)nabijaczleweli.xyz> --- Documentation/trace/histogram.rst | 2 +- arch/sh/drivers/pci/common.c | 2 +- arch/sh/drivers/pci/pci-sh7780.c | 2 +- drivers/atm/lanai.c | 2 +- drivers/md/bcache/bcache.h | 2 +- drivers/md/bcache/request.c | 2 +- drivers/net/ethernet/google/gve/gve_rx_dqo.c | 2 +- drivers/scsi/hpsa.c | 2 +- drivers/tty/serial/jsm/jsm_neo.c | 2 +- fs/ocfs2/dlm/dlmrecovery.c | 2 +- sound/pci/emu10k1/emu10k1_main.c | 2 +- sound/pci/emu10k1/emupcm.c | 2 +- tools/testing/selftests/powerpc/tm/tm-tmspr.c | 2 +- 13 files changed, 13 insertions(+), 13 deletions(-) diff --git a/Documentation/trace/histogram.rst b/Documentation/trace/histogram.rst index 0aada18c38c6..2b98c1720a54 100644 --- a/Documentation/trace/histogram.rst +++ b/Documentation/trace/histogram.rst @@ -249,7 +249,7 @@ Extended error information table, it should keep a running total of the number of bytes requested by that call_site. - We'll let it run for awhile and then dump the contents of the 'hist' + We'll let it run for a while and then dump the contents of the 'hist' file in the kmalloc event's subdirectory (for readability, a number of entries have been omitted):: diff --git a/arch/sh/drivers/pci/common.c b/arch/sh/drivers/pci/common.c index 9633b6147a05..f95004c67e6c 100644 --- a/arch/sh/drivers/pci/common.c +++ b/arch/sh/drivers/pci/common.c @@ -148,7 +148,7 @@ unsigned int pcibios_handle_status_errors(unsigned long addr, cmd |= PCI_STATUS_PARITY | PCI_STATUS_DETECTED_PARITY; - /* Now back off of the IRQ for awhile */ + /* Now back off of the IRQ for a while */ if (hose->err_irq) { disable_irq_nosync(hose->err_irq); hose->err_timer.expires = jiffies + HZ; diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c index 9a624a6ee354..f41d6939a3d9 100644 --- a/arch/sh/drivers/pci/pci-sh7780.c +++ b/arch/sh/drivers/pci/pci-sh7780.c @@ -153,7 +153,7 @@ static irqreturn_t sh7780_pci_serr_irq(int irq, void *dev_id) /* Deassert SERR */ __raw_writel(SH4_PCIINTM_SDIM, hose->reg_base + SH4_PCIINTM); - /* Back off the IRQ for awhile */ + /* Back off the IRQ for a while */ disable_irq_nosync(irq); hose->serr_timer.expires = jiffies + HZ; add_timer(&hose->serr_timer); diff --git a/drivers/atm/lanai.c b/drivers/atm/lanai.c index 2a1fe3080712..0dfa2cdc897c 100644 --- a/drivers/atm/lanai.c +++ b/drivers/atm/lanai.c @@ -755,7 +755,7 @@ static void lanai_shutdown_rx_vci(const struct lanai_vcc *lvcc) /* Shutdown transmitting on card. * Unfortunately the lanai needs us to wait until all the data * drains out of the buffer before we can dealloc it, so this - * can take awhile -- up to 370ms for a full 128KB buffer + * can take a while -- up to 370ms for a full 128KB buffer * assuming everone else is quiet. In theory the time is * boundless if there's a CBR VCC holding things up. */ diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 1d33e40d26ea..7318d9800370 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -499,7 +499,7 @@ struct gc_stat { * won't automatically reattach). * * CACHE_SET_STOPPING always gets set first when we're closing down a cache set; - * we'll continue to run normally for awhile with CACHE_SET_STOPPING set (i.e. + * we'll continue to run normally for a while with CACHE_SET_STOPPING set (i.e. * flushing dirty data). * * CACHE_SET_RUNNING means all cache devices have been registered and journal diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index af345dc6fde1..87b4341cb42c 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -257,7 +257,7 @@ static CLOSURE_CALLBACK(bch_data_insert_start) /* * But if it's not a writeback write we'd rather just bail out if - * there aren't any buckets ready to write to - it might take awhile and + * there aren't any buckets ready to write to - it might take a while and * we might be starving btree writes for gc or something. */ diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index dcb0545baa50..6a0be54f1c81 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -608,7 +608,7 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, buf_len = compl_desc->packet_len; hdr_len = compl_desc->header_len; - /* Page might have not been used for awhile and was likely last written + /* Page might have not been used for a while and was likely last written * by a different thread. */ if (rx->dqo.page_pool) { diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index c73a71ac3c29..0066f15153a7 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -7795,7 +7795,7 @@ static int hpsa_wait_for_mode_change_ack(struct ctlr_info *h) u32 doorbell_value; unsigned long flags; - /* under certain very rare conditions, this can take awhile. + /* under certain very rare conditions, this can take a while. * (e.g.: hot replace a failed 144GB drive in a RAID 5 set right * as we enter this code.) */ diff --git a/drivers/tty/serial/jsm/jsm_neo.c b/drivers/tty/serial/jsm/jsm_neo.c index e8e13bf056e2..2eb9ff26d6e8 100644 --- a/drivers/tty/serial/jsm/jsm_neo.c +++ b/drivers/tty/serial/jsm/jsm_neo.c @@ -1189,7 +1189,7 @@ static irqreturn_t neo_intr(int irq, void *voidbrd) /* * The UART triggered us with a bogus interrupt type. * It appears the Exar chip, when REALLY bogged down, will throw - * these once and awhile. + * these periodically. * Its harmless, just ignore it and move on. */ jsm_dbg(INTR, &brd->pci_dev, diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 67fc62a49a76..00f52812dbb0 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -2632,7 +2632,7 @@ static int dlm_pick_recovery_master(struct dlm_ctxt *dlm) dlm_reco_master_ready(dlm), msecs_to_jiffies(1000)); if (!dlm_reco_master_ready(dlm)) { - mlog(0, "%s: reco master taking awhile\n", + mlog(0, "%s: reco master taking a while\n", dlm->name); goto again; } diff --git a/sound/pci/emu10k1/emu10k1_main.c b/sound/pci/emu10k1/emu10k1_main.c index bbe252b8916c..6050201851b1 100644 --- a/sound/pci/emu10k1/emu10k1_main.c +++ b/sound/pci/emu10k1/emu10k1_main.c @@ -606,7 +606,7 @@ static int snd_emu10k1_ecard_init(struct snd_emu10k1 *emu) /* Step 2: Calibrate the ADC and DAC */ snd_emu10k1_ecard_write(emu, EC_DACCAL | EC_LEDN | EC_TRIM_CSN); - /* Step 3: Wait for awhile; XXX We can't get away with this + /* Step 3: Wait for a while; XXX We can't get away with this * under a real operating system; we'll need to block and wait that * way. */ snd_emu10k1_wait(emu, 48000); diff --git a/sound/pci/emu10k1/emupcm.c b/sound/pci/emu10k1/emupcm.c index 1bf6e3d652f8..ca4b03317539 100644 --- a/sound/pci/emu10k1/emupcm.c +++ b/sound/pci/emu10k1/emupcm.c @@ -991,7 +991,7 @@ static snd_pcm_uframes_t snd_emu10k1_capture_pointer(struct snd_pcm_substream *s if (!epcm->running) return 0; if (epcm->first_ptr) { - udelay(50); /* hack, it takes awhile until capture is started */ + udelay(50); /* hack, it takes a while until capture is started */ epcm->first_ptr = 0; } ptr = snd_emu10k1_ptr_read(emu, epcm->capture_idx_reg, 0) & 0x0000ffff; diff --git a/tools/testing/selftests/powerpc/tm/tm-tmspr.c b/tools/testing/selftests/powerpc/tm/tm-tmspr.c index dd5ddffa28b7..0d64988ffb40 100644 --- a/tools/testing/selftests/powerpc/tm/tm-tmspr.c +++ b/tools/testing/selftests/powerpc/tm/tm-tmspr.c @@ -14,7 +14,7 @@ * (1) create more threads than cpus * (2) in each thread: * (a) set TFIAR and TFHAR a unique value - * (b) loop for awhile, continually checking to see if + * (b) loop for a while, continually checking to see if * either register has been corrupted. * * (3) Loop: -- 2.39.5

5 months, 2 weeks

2
1
0 0

[PATCH v11 net-next 00/15] AccECN protocol patch series

by chia-yu.chang＠nokia-bell-labs.com

From: Chia-Yu Chang <chia-yu.chang(a)nokia-bell-labs.com> Hello, Please find the v10 AccECN protocol patch series, which covers the core functionality of Accurate ECN, AccECN negotiation, AccECN TCP options, and AccECN failure handling. The Accurate ECN draft can be found in https://datatracker.ietf.org/doc/html/draft-ietf-tcpm-accurate-ecn-28 This patch series is part of the full AccECN patch series, which is available at https://github.com/L4STeam/linux-net-next/commits/upstream_l4steam/ Best Regards, Chia-Yu --- v11 (04-Jul-2025) - Fix compilation issue of some intermediate patches in v10 v10 (03-Jul-2025) - Add new patch of separated header file include/net/tcp_ecn.h to include ECN and AccECN functions (Eric Dumazet <edumazet(a)google.com>) - Add comments on the AccECN helper functions in tcp_ecn.h (Eric Dumazet <edumazet(a)google.com>) - Add documentation of tcp_ecn, tcp_ecn_option, tcp_ecn_beacon in ip-sysctl.rst to the corresponding patch (Eric Dumazet <edumazet(a)google.com>) - Split wait third ACK functionality into a separated patch from AccECN negotiation patch (Eric Dumazet <edumazet(a)google.com>) - Add READ_ONCE() over every reads of sysctl for all patches in the series (Eric Dumazet <edumazet(a)google.com>) - Merge heuristics of AccECN option ceb/cep and ACE field multi-wrap into a single patch - Add a table of SACK block reduction and required AccECN field in patch #15 commit message (Eric Dumazet <edumazet(a)google.com>) v9 (21-Jun-2025) - Use tcp_data_ecn_check() to set TCP_ECN_SEE flag only for RFC3168 ECN (Paolo Abeni <pabeni(a)redhat.com>) - Add comments about setting TCP_ECN_SEEN flag for RFC3168 and Accruate ECN (Paolo Abeni <pabeni(a)redhat.com>) - Restruct the code in the for loop of tcp_accecn_process_option() (Paolo Abeni <pabeni(a)redhat.com>) - Remove ecn_bytes and add use_synack_ecn_bytes flag to identify whether syn_ack_bytes or received_ecn_bytes is used (Paolo Abeni <pabeni(a)redhat.com>) - Replace leftover_bytes and leftover_size with leftover_highbyte and leftover_lowbyte and add comments in tcp_options_write() (Paolo Abeni <pabeni(a)redhat.com>) - Add comments and commit message about the 1st retx SYN still attempt AccECN negotiation (Paolo Abeni <pabeni(a)redhat.com>) v8 (10-Jun-2025) - Add new helper function tcp_ecn_received_counters_payload() in #6 (Paolo Abeni <pabeni(a)redhat.com>) - Set opts->num_sack_blocks=0 to avoid potential undefined value in #8 (Paolo Abeni <pabeni(a)redhat.com>) - Reset leftover_size to 2 once leftover_bytes is used in #9 (Paolo Abeni <pabeni(a)redhat.com>) - Add new helper function tcp_accecn_opt_demand_min() in #10 (Paolo Abeni <pabeni(a)redhat.com>) - Add new helper function tcp_accecn_saw_opt_fail_recv() in #11 (Paolo Abeni <pabeni(a)redhat.com>) - Update tcp_options_fit_accecn() to avoid using recursion in #14 (Paolo Abeni <pabeni(a)redhat.com>) v7 (14-May-2025) - Modify group sizes of tcp_sock_write_txrx and tcp_sock_write_rx in #3 based on pahole results (Paolo Abeni <pabeni(a)redhat.com>) - Fix the issue in #4 and #5 where the RFC3168 ECN behavior in tcp_ecn_send() is changed (Paolo Abeni <pabeni(a)redhat.com>) - Modify group size of tcp_sock_write_txrx in #4 and #6 based on pahole results (Paolo Abeni <pabeni(a)redhat.com>) - Update commit message for #9 to explain the increase in tcp_sock_write_rx group size - Modify group size of tcp_sock_write_tx in #10 based on pahole results v6 (09-May-2025) - Add #3 to utilize exisintg holes of tcp_sock_write_txrx group for later patches (#4, #9, #10) with new u8 members (Paolo Abeni <pabeni(a)redhat.com>) - Add pahole outcomes before and after commit in #4, #5, #6, #9, #10, #15 (Paolo Abeni <pabeni(a)redhat.com>) - Define new helper function tcp_send_ack_reflect_ect() for sending ACK with reflected ECT in #5 (Paolo Abeni <pabeni(a)redhat.com>) - Add comments for function tcp_ecn_rcv_synack() in #5 (Paolo Abeni <pabeni(a)redhat.com>) - Add enum/define to be used by sysctl_tcp_ecn in #5, sysctl_tcp_ecn_option in #9, and sysctl_tcp_ecn_option_beacon in #10 (Paolo Abeni <pabeni(a)redhat.com>) - Move accecn_fail_mode and saw_accecn_opt in #5 and #11 to use exisintg holes of tcp_sock (Paolo Abeni <pabeni(a)redhat.com>) - Change data type of new members of tcp_request_sock and move them to the end of struct in #5 and #11 (Paolo Abeni <pabeni(a)redhat.com>) - Move new members of tcp_info to the end of struct in #6 (Paolo Abeni <pabeni(a)redhat.com>) - Merge previous #7 into #9 (Paolo Abeni <pabeni(a)redhat.com>) - Mask ecnfield with INET_ECN_MASK to remove WARN_ONCE in #9 (Paolo Abeni <pabeni(a)redhat.com>) - Reduce the indentation levels for reabability in #9 and #10 (Paolo Abeni <pabeni(a)redhat.com>) - Move delivered_ecn_bytes to the RX group in #9, accecn_opt_tstamp to the TX group in #10, pkts_acked_ewma to the RX group in #15 (Paolo Abeni <pabeni(a)redhat.com>) - Add changes in Documentation/networking/net_cachelines/tcp_sock.rst for new tcp_sock members in #3, #5, #6, #9, #10, #15 v5 (22-Apr-2025) - Further fix for 32-bit ARM alignment in tcp.c (Simon Horman <horms(a)kernel.org>) v4 (18-Apr-2025) - Fix 32-bit ARM assertion for alignment requirement (Simon Horman <horms(a)kernel.org>) v3 (14-Apr-2025) - Fix patch apply issue in v2 (Jakub Kicinski <kuba(a)kernel.org>) v2 (18-Mar-2025) - Add one missing patch from the previous AccECN protocol preparation patch series to this patch series. --- Chia-Yu Chang (6): tcp: reorganize tcp_sock_write_txrx group for variables later tcp: ecn functions in separated include file tcp: Add wait_third_ack flag for ECN negotiation in simultaneous connect tcp: accecn: AccECN option send control tcp: accecn: AccECN option failure handling tcp: accecn: try to fit AccECN option with SACK Ilpo Järvinen (9): tcp: reorganize SYN ECN code tcp: fast path functions later tcp: AccECN core tcp: accecn: AccECN negotiation tcp: accecn: add AccECN rx byte counters tcp: accecn: AccECN needs to know delivered bytes tcp: sack option handling improvements tcp: accecn: AccECN option tcp: accecn: AccECN option ceb/cep and ACE field multi-wrap heuristics Documentation/networking/ip-sysctl.rst | 55 +- .../networking/net_cachelines/tcp_sock.rst | 13 + include/linux/tcp.h | 33 +- include/net/netns/ipv4.h | 2 + include/net/tcp.h | 87 ++- include/net/tcp_ecn.h | 648 ++++++++++++++++++ include/uapi/linux/tcp.h | 7 + net/ipv4/syncookies.c | 4 + net/ipv4/sysctl_net_ipv4.c | 19 + net/ipv4/tcp.c | 29 +- net/ipv4/tcp_input.c | 371 ++++++++-- net/ipv4/tcp_ipv4.c | 8 +- net/ipv4/tcp_minisocks.c | 40 +- net/ipv4/tcp_output.c | 297 ++++++-- net/ipv6/syncookies.c | 2 + net/ipv6/tcp_ipv6.c | 1 + 16 files changed, 1429 insertions(+), 187 deletions(-) create mode 100644 include/net/tcp_ecn.h -- 2.34.1

5 months, 2 weeks

1
15
0 0

[PATCH v6 14/14] selftests/sched_ext: Add test for DL server total_bw consistency

by Joel Fernandes

Add a new kselftest to verify that the total_bw value in /sys/kernel/debug/sched/debug remains consistent across all CPUs under different sched_ext BPF program states: 1. Before a BPF scheduler is loaded 2. While a BPF scheduler is loaded and active 3. After a BPF scheduler is unloaded The test runs CPU stress threads to ensure DL server bandwidth values stabilize before checking consistency. This helps catch potential issues with DL server bandwidth accounting during sched_ext transitions. Signed-off-by: Joel Fernandes <joelagnelf(a)nvidia.com> --- tools/testing/selftests/sched_ext/Makefile | 1 + tools/testing/selftests/sched_ext/total_bw.c | 286 +++++++++++++++++++ 2 files changed, 287 insertions(+) create mode 100644 tools/testing/selftests/sched_ext/total_bw.c diff --git a/tools/testing/selftests/sched_ext/Makefile b/tools/testing/selftests/sched_ext/Makefile index f0a8cba3a99f..d48be158b0a1 100644 --- a/tools/testing/selftests/sched_ext/Makefile +++ b/tools/testing/selftests/sched_ext/Makefile @@ -184,6 +184,7 @@ auto-test-targets := \ select_cpu_vtime \ rt_stall \ test_example \ + total_bw \ testcase-targets := $(addsuffix .o,$(addprefix $(SCXOBJ_DIR)/,$(auto-test-targets))) diff --git a/tools/testing/selftests/sched_ext/total_bw.c b/tools/testing/selftests/sched_ext/total_bw.c new file mode 100644 index 000000000000..6b81d6c51054 --- /dev/null +++ b/tools/testing/selftests/sched_ext/total_bw.c @@ -0,0 +1,286 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test to verify that total_bw value remains consistent across all CPUs + * in different BPF program states. + * + * Copyright (C) 2025 Nvidia Corporation. + */ +#include <bpf/bpf.h> +#include <errno.h> +#include <pthread.h> +#include <scx/common.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/wait.h> +#include <unistd.h> +#include "minimal.bpf.skel.h" +#include "scx_test.h" + +#define MAX_CPUS 512 +#define STABILIZATION_TIME_SEC 5 +#define STRESS_DURATION_SEC 5 + +struct total_bw_ctx { + struct minimal *skel; + long baseline_bw[MAX_CPUS]; + int nr_cpus; +}; + +static void *cpu_stress_thread(void *arg) +{ + volatile int i; + time_t end_time = time(NULL) + STRESS_DURATION_SEC; + + while (time(NULL) < end_time) { + for (i = 0; i < 1000000; i++); + } + + return NULL; +} + +/* + * The first enqueue on a CPU causes the DL server to start, for that + * reason run stressor threads in the hopes it schedules on all CPUs. + */ +static int run_cpu_stress(int nr_cpus) +{ + pthread_t *threads; + int i, ret = 0; + + threads = calloc(nr_cpus, sizeof(pthread_t)); + if (!threads) + return -ENOMEM; + + /* Create threads to run on each CPU */ + for (i = 0; i < nr_cpus; i++) { + if (pthread_create(&threads[i], NULL, cpu_stress_thread, NULL)) { + ret = -errno; + fprintf(stderr, "Failed to create thread %d: %s\n", i, strerror(-ret)); + break; + } + } + + /* Wait for all threads to complete */ + for (i = 0; i < nr_cpus; i++) { + if (threads[i]) + pthread_join(threads[i], NULL); + } + + free(threads); + return ret; +} + +static int read_total_bw_values(long *bw_values, int max_cpus) +{ + FILE *fp; + char line[256]; + int cpu_count = 0; + + fp = fopen("/sys/kernel/debug/sched/debug", "r"); + if (!fp) { + SCX_ERR("Failed to open debug file"); + return -1; + } + + while (fgets(line, sizeof(line), fp)) { + char *bw_str = strstr(line, "total_bw"); + if (bw_str) { + bw_str = strchr(bw_str, ':'); + if (bw_str) { + /* Only store up to max_cpus values */ + if (cpu_count < max_cpus) { + bw_values[cpu_count] = atol(bw_str + 1); + } + cpu_count++; + } + } + } + + fclose(fp); + return cpu_count; +} + +static bool verify_total_bw_consistency(long *bw_values, int count) +{ + int i; + long first_value; + + if (count <= 0) + return false; + + first_value = bw_values[0]; + + for (i = 1; i < count; i++) { + if (bw_values[i] != first_value) { + SCX_ERR("Inconsistent total_bw: CPU0=%ld, CPU%d=%ld", + first_value, i, bw_values[i]); + return false; + } + } + + return true; +} + +static int fetch_verify_total_bw(long *bw_values, int nr_cpus) +{ + int attempts = 0; + int max_attempts = 10; + int count; + + /* + * The first enqueue on a CPU causes the DL server to start, for that + * reason run stressor threads in the hopes it schedules on all CPUs. + */ + if (run_cpu_stress(nr_cpus) < 0) { + SCX_ERR("Failed to run CPU stress"); + return -1; + } + + /* Let things settle down */ + sleep(STABILIZATION_TIME_SEC); + + /* Try multiple times to get stable values */ + while (attempts < max_attempts) { + count = read_total_bw_values(bw_values, nr_cpus); + fprintf(stderr, "Read %d total_bw values (testing %d CPUs)\n", count, nr_cpus); + /* If system has more CPUs than we're testing, that's OK */ + if (count < nr_cpus) { + SCX_ERR("Expected at least %d CPUs, got %d", nr_cpus, count); + attempts++; + sleep(1); + continue; + } + + /* Only verify the CPUs we're testing */ + if (verify_total_bw_consistency(bw_values, nr_cpus)) { + fprintf(stderr, "Values are consistent: %ld\n", bw_values[0]); + return 0; + } + + attempts++; + sleep(1); + } + + return -1; +} + +static enum scx_test_status setup(void **ctx) +{ + struct total_bw_ctx *test_ctx; + + if (access("/sys/kernel/debug/sched/debug", R_OK) != 0) { + fprintf(stderr, "Skipping test: debugfs sched/debug not accessible\n"); + return SCX_TEST_SKIP; + } + + test_ctx = calloc(1, sizeof(*test_ctx)); + if (!test_ctx) + return SCX_TEST_FAIL; + + test_ctx->nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + if (test_ctx->nr_cpus <= 0) { + free(test_ctx); + return SCX_TEST_FAIL; + } + + /* If system has more CPUs than MAX_CPUS, just test the first MAX_CPUS */ + if (test_ctx->nr_cpus > MAX_CPUS) { + test_ctx->nr_cpus = MAX_CPUS; + } + + /* Test scenario 1: BPF program not loaded */ + /* Read and verify baseline total_bw before loading BPF program */ + fprintf(stderr, "BPF prog initially not loaded, reading total_bw values\n"); + if (fetch_verify_total_bw(test_ctx->baseline_bw, test_ctx->nr_cpus) < 0) { + SCX_ERR("Failed to get stable baseline values"); + free(test_ctx); + return SCX_TEST_FAIL; + } + + /* Load the BPF skeleton */ + test_ctx->skel = minimal__open(); + if (!test_ctx->skel) { + free(test_ctx); + return SCX_TEST_FAIL; + } + + SCX_ENUM_INIT(test_ctx->skel); + if (minimal__load(test_ctx->skel)) { + minimal__destroy(test_ctx->skel); + free(test_ctx); + return SCX_TEST_FAIL; + } + + *ctx = test_ctx; + return SCX_TEST_PASS; +} + +static enum scx_test_status run(void *ctx) +{ + struct total_bw_ctx *test_ctx = ctx; + struct bpf_link *link; + long loaded_bw[MAX_CPUS]; + long unloaded_bw[MAX_CPUS]; + int i; + + /* Test scenario 2: BPF program loaded */ + link = bpf_map__attach_struct_ops(test_ctx->skel->maps.minimal_ops); + if (!link) { + SCX_ERR("Failed to attach scheduler"); + return SCX_TEST_FAIL; + } + + fprintf(stderr, "BPF program loaded, reading total_bw values\n"); + if (fetch_verify_total_bw(loaded_bw, test_ctx->nr_cpus) < 0) { + SCX_ERR("Failed to get stable values with BPF loaded"); + bpf_link__destroy(link); + return SCX_TEST_FAIL; + } + bpf_link__destroy(link); + + /* Test scenario 3: BPF program unloaded */ + fprintf(stderr, "BPF program unloaded, reading total_bw values\n"); + if (fetch_verify_total_bw(unloaded_bw, test_ctx->nr_cpus) < 0) { + SCX_ERR("Failed to get stable values after BPF unload"); + return SCX_TEST_FAIL; + } + + /* Verify all three scenarios have the same total_bw values */ + for (i = 0; i < test_ctx->nr_cpus; i++) { + if (test_ctx->baseline_bw[i] != loaded_bw[i]) { + SCX_ERR("CPU%d: baseline_bw=%ld != loaded_bw=%ld", + i, test_ctx->baseline_bw[i], loaded_bw[i]); + return SCX_TEST_FAIL; + } + + if (test_ctx->baseline_bw[i] != unloaded_bw[i]) { + SCX_ERR("CPU%d: baseline_bw=%ld != unloaded_bw=%ld", + i, test_ctx->baseline_bw[i], unloaded_bw[i]); + return SCX_TEST_FAIL; + } + } + + fprintf(stderr, "All total_bw values are consistent across all scenarios\n"); + return SCX_TEST_PASS; +} + +static void cleanup(void *ctx) +{ + struct total_bw_ctx *test_ctx = ctx; + + if (test_ctx) { + if (test_ctx->skel) + minimal__destroy(test_ctx->skel); + free(test_ctx); + } +} + +struct scx_test total_bw = { + .name = "total_bw", + .description = "Verify total_bw consistency across BPF program states", + .setup = setup, + .run = run, + .cleanup = cleanup, +}; +REGISTER_SCX_TEST(&total_bw) -- 2.34.1

5 months, 2 weeks

1
0
0 0

[PATCH v6 12/14] selftests/sched_ext: Add test for sched_ext dl_server

by Joel Fernandes

From: Andrea Righi <arighi(a)nvidia.com> Add a selftest to validate the correct behavior of the deadline server for the ext_sched_class. [ Joel: Replaced occurences of CFS in the test with EXT. ] Signed-off-by: Joel Fernandes <joelagnelf(a)nvidia.com> Signed-off-by: Andrea Righi <arighi(a)nvidia.com> --- tools/testing/selftests/sched_ext/Makefile | 1 + .../selftests/sched_ext/rt_stall.bpf.c | 23 ++ tools/testing/selftests/sched_ext/rt_stall.c | 213 ++++++++++++++++++ 3 files changed, 237 insertions(+) create mode 100644 tools/testing/selftests/sched_ext/rt_stall.bpf.c create mode 100644 tools/testing/selftests/sched_ext/rt_stall.c diff --git a/tools/testing/selftests/sched_ext/Makefile b/tools/testing/selftests/sched_ext/Makefile index 9d9d6b4c38b0..f0a8cba3a99f 100644 --- a/tools/testing/selftests/sched_ext/Makefile +++ b/tools/testing/selftests/sched_ext/Makefile @@ -182,6 +182,7 @@ auto-test-targets := \ select_cpu_dispatch_bad_dsq \ select_cpu_dispatch_dbl_dsp \ select_cpu_vtime \ + rt_stall \ test_example \ testcase-targets := $(addsuffix .o,$(addprefix $(SCXOBJ_DIR)/,$(auto-test-targets))) diff --git a/tools/testing/selftests/sched_ext/rt_stall.bpf.c b/tools/testing/selftests/sched_ext/rt_stall.bpf.c new file mode 100644 index 000000000000..80086779dd1e --- /dev/null +++ b/tools/testing/selftests/sched_ext/rt_stall.bpf.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * A scheduler that verified if RT tasks can stall SCHED_EXT tasks. + * + * Copyright (c) 2025 NVIDIA Corporation. + */ + +#include <scx/common.bpf.h> + +char _license[] SEC("license") = "GPL"; + +UEI_DEFINE(uei); + +void BPF_STRUCT_OPS(rt_stall_exit, struct scx_exit_info *ei) +{ + UEI_RECORD(uei, ei); +} + +SEC(".struct_ops.link") +struct sched_ext_ops rt_stall_ops = { + .exit = (void *)rt_stall_exit, + .name = "rt_stall", +}; diff --git a/tools/testing/selftests/sched_ext/rt_stall.c b/tools/testing/selftests/sched_ext/rt_stall.c new file mode 100644 index 000000000000..d4cb545ebfd8 --- /dev/null +++ b/tools/testing/selftests/sched_ext/rt_stall.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2025 NVIDIA Corporation. + */ +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sched.h> +#include <sys/prctl.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <time.h> +#include <linux/sched.h> +#include <signal.h> +#include <bpf/bpf.h> +#include <scx/common.h> +#include <sys/wait.h> +#include <unistd.h> +#include "rt_stall.bpf.skel.h" +#include "scx_test.h" +#include "../kselftest.h" + +#define CORE_ID 0 /* CPU to pin tasks to */ +#define RUN_TIME 5 /* How long to run the test in seconds */ + +/* Simple busy-wait function for test tasks */ +static void process_func(void) +{ + while (1) { + /* Busy wait */ + for (volatile unsigned long i = 0; i < 10000000UL; i++); + } +} + +/* Set CPU affinity to a specific core */ +static void set_affinity(int cpu) +{ + cpu_set_t mask; + + CPU_ZERO(&mask); + CPU_SET(cpu, &mask); + if (sched_setaffinity(0, sizeof(mask), &mask) != 0) { + perror("sched_setaffinity"); + exit(EXIT_FAILURE); + } +} + +/* Set task scheduling policy and priority */ +static void set_sched(int policy, int priority) +{ + struct sched_param param; + + param.sched_priority = priority; + if (sched_setscheduler(0, policy, &param) != 0) { + perror("sched_setscheduler"); + exit(EXIT_FAILURE); + } +} + +/* Get process runtime from /proc/<pid>/stat */ +static float get_process_runtime(int pid) +{ + char path[256]; + FILE *file; + long utime, stime; + int fields; + + snprintf(path, sizeof(path), "/proc/%d/stat", pid); + file = fopen(path, "r"); + if (file == NULL) { + perror("Failed to open stat file"); + return -1; + } + + /* Skip the first 13 fields and read the 14th and 15th */ + fields = fscanf(file, + "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %lu %lu", + &utime, &stime); + fclose(file); + + if (fields != 2) { + fprintf(stderr, "Failed to read stat file\n"); + return -1; + } + + /* Calculate the total time spent in the process */ + long total_time = utime + stime; + long ticks_per_second = sysconf(_SC_CLK_TCK); + float runtime_seconds = total_time * 1.0 / ticks_per_second; + + return runtime_seconds; +} + +static enum scx_test_status setup(void **ctx) +{ + struct rt_stall *skel; + + skel = rt_stall__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(rt_stall__load(skel), "Failed to load skel"); + + *ctx = skel; + + return SCX_TEST_PASS; +} + +static bool sched_stress_test(void) +{ + float cfs_runtime, rt_runtime; + int cfs_pid, rt_pid; + float expected_min_ratio = 0.04; /* 4% */ + + ksft_print_header(); + ksft_set_plan(1); + + /* Create and set up a EXT task */ + cfs_pid = fork(); + if (cfs_pid == 0) { + set_affinity(CORE_ID); + process_func(); + exit(0); + } else if (cfs_pid < 0) { + perror("fork for EXT task"); + ksft_exit_fail(); + } + + /* Create an RT task */ + rt_pid = fork(); + if (rt_pid == 0) { + set_affinity(CORE_ID); + set_sched(SCHED_FIFO, 50); + process_func(); + exit(0); + } else if (rt_pid < 0) { + perror("fork for RT task"); + ksft_exit_fail(); + } + + /* Let the processes run for the specified time */ + sleep(RUN_TIME); + + /* Get runtime for the EXT task */ + cfs_runtime = get_process_runtime(cfs_pid); + if (cfs_runtime != -1) + ksft_print_msg("Runtime of EXT task (PID %d) is %f seconds\n", cfs_pid, cfs_runtime); + else + ksft_exit_fail_msg("Error getting runtime for EXT task (PID %d)\n", cfs_pid); + + /* Get runtime for the RT task */ + rt_runtime = get_process_runtime(rt_pid); + if (rt_runtime != -1) + ksft_print_msg("Runtime of RT task (PID %d) is %f seconds\n", rt_pid, rt_runtime); + else + ksft_exit_fail_msg("Error getting runtime for RT task (PID %d)\n", rt_pid); + + /* Kill the processes */ + kill(cfs_pid, SIGKILL); + kill(rt_pid, SIGKILL); + waitpid(cfs_pid, NULL, 0); + waitpid(rt_pid, NULL, 0); + + /* Verify that the scx task got enough runtime */ + float actual_ratio = cfs_runtime / (cfs_runtime + rt_runtime); + ksft_print_msg("EXT task got %.2f%% of total runtime\n", actual_ratio * 100); + + if (actual_ratio >= expected_min_ratio) { + ksft_test_result_pass("PASS: EXT task got more than %.2f%% of runtime\n", + expected_min_ratio * 100); + return true; + } else { + ksft_test_result_fail("FAIL: EXT task got less than %.2f%% of runtime\n", + expected_min_ratio * 100); + return false; + } +} + +static enum scx_test_status run(void *ctx) +{ + struct rt_stall *skel = ctx; + struct bpf_link *link; + bool res; + + link = bpf_map__attach_struct_ops(skel->maps.rt_stall_ops); + SCX_FAIL_IF(!link, "Failed to attach scheduler"); + + res = sched_stress_test(); + + SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_NONE)); + bpf_link__destroy(link); + + if (!res) + ksft_exit_fail(); + + return SCX_TEST_PASS; +} + +static void cleanup(void *ctx) +{ + struct rt_stall *skel = ctx; + + rt_stall__destroy(skel); +} + +struct scx_test rt_stall = { + .name = "rt_stall", + .description = "Verify that RT tasks cannot stall SCHED_EXT tasks", + .setup = setup, + .run = run, + .cleanup = cleanup, +}; +REGISTER_SCX_TEST(&rt_stall) -- 2.34.1

5 months, 2 weeks

1
0
0 0

[PATCH net-next v12 0/8] Support rate management on traffic classes in devlink and mlx5

by Mark Bloch

V12: - Fixed YAML indentation in devlink.yaml. - Removed unused total variable from devlink_nl_rate_tc_bw_set(). - Quoted shell variables in devlink.sh and split declarations to fix shellcheck warnings. - Added missing DevlinkFamily imports in selftests to fix pylint warnings. - Pulled changes from net-next to enable these adjustments: Inclusion of DevlinkFamily in YNL test libs. Introduction of nlmsg_for_each_attr_type() macro and its use in nfsd. V11: - Refactored the devlink code to accept relative TC bandwidth share values instead of percentages. - Updated documentation to clarify that values are interpreted as relative shares. - Refactored the logic in mlx5 to support proportional scaling for tc-bw values. - Switched to `nlmsg_for_each_attr_type()` for cleaner attribute parsing. - Added a hardware selftest to validate TC bandwidth behavior. - Refactored esw_qos_is_node_empty for readability. V10: - Added netdevsim selftest for tc-bw ops. - Dropped header: field as it’s unnecessary for local constants in devlink.yaml. V9: - Defined DEVLINK_RATE_TCS_MAX as 8 in uapi/linux/devlink.h. - Replaced IEEE_8021QAZ_MAX_TCS with DEVLINK_RATE_TCS_MAX throughout the code. - Updated devlink-rate-tc-index-max spec to reference the correct UAPI header. V8: - Limit line width to 80 characters in mlx5 changes instead of 100. - Increase the scheduling node levels to support TC arbitration. - Ensure parent nodes are set correctly in all code paths that extend the hierarchy depth for TC arbitration. - Extended the cover letter with the ongoing discussion on devlink-rate and net-shapers. - Extended the cover letter with the Netdev talk link on this series. V7: - Fixed disabling tc-bw on leaf nodes that did not have tc-bw configured. - Fixed an issue where tc-bw was disabled on a node with assigned vports, ensuring that vport->qos.sched_node->parent is correctly updated with the cloned node. - Declared a constant for the maximum allowed Traffic Class index in devlink rate. - Added a range check to validate rate-tc-index. - Added documentation for the tc-bw argument. - Add a validation check to ensure that the total bandwidth assigned to all traffic classes sums to 100. V6: - Addressed comments on devlink patch #3. - Removed first 4 IFC patches, to be pulled from mlx5-next. V5: - Fix warning in devlink_nl_rate_tc_bw_set(). - Fix target branch of patch #4. V4: - Renamed the nested attribute for traffic class bandwidth to DEVLINK_ATTR_RATE_TC_BWS. - Changed the order of the attributes in `devlink.h`. - Refactored the initialization tc-bw array in devlink_nl_rate_tc_bw_set(). - Added extack messages to provide clear feedback on issues with tc-bw arguments. - Updated `rate-tc-bws` to support a multi-attr set, where each attribute includes an index and the corresponding bandwidth for that traffic class. - Handled the issue where the user could provide DEVLINK_ATTR_RATE_TC_BWS with duplicate indices. - Provided ynl exmaples in patch [1/5] commit message. - Take IFC patches to beginning of the series, targeted for mlx5-next. V3: - Dropped rate-tc-index, using tc-bw array index instead. - Renamed rate-bw to rate-tc-bw. - Documneted what the rate-tc-bw represents and added a range check for validation. - Intorduced devlink_nl_rate_tc_bw_set() to parse and set the TC bandwidth values. - Updated the user API in the commit message of patch 1/6 to ensure bandwidths sum equals 100. - Fixed missing filling of rate-parent in devlink_nl_rate_fill(). V2: - Included <linux/dcbnl.h> in devlink.h to resolve missing IEEE_8021QAZ_MAX_TCS definition. - Refactored the rate-tc-bw attribute structure to use a separate rate-tc-index. - Updated patch 2/6 title. This patch series extends the devlink-rate API to support traffic class (TC) bandwidth management, enabling more granular control over traffic shaping and rate limiting across multiple TCs. The API now allows users to specify bandwidth proportions for different traffic classes in a single command. This is particularly useful for managing Enhanced Transmission Selection (ETS) for groups of Virtual Functions (VFs), allowing precise bandwidth allocation across traffic classes. Additionally the series refines the QoS handling in net/mlx5 to support TC arbitration and bandwidth management on vports and rate nodes. Discussions on traffic class shaping in net-shapers began in V5 [1], where we discussed with maintainers whether net-shapers should support traffic classes and how this could be implemented. Later, after further conversations with Paolo Abeni and Simon Horman, Cosmin provided an update [2], confirming that net-shapers' tree-based hierarchy aligns well with traffic classes when treated as distinct subsets of netdev queues. Since mlx5 enforces a 1:1 mapping between TX queues and traffic classes, this approach seems feasible, though some open questions remain regarding queue reconfiguration and certain mlx5 scheduling behaviors. Building on that discussion, Cosmin has now shared a concrete implementation plan on the netdev mailing list [3]. The plan, developed in collaboration with Paolo and Simon, outlines how net-shapers can be extended to support the same use cases currently covered by devlink-rate, with the eventual goal of aligning both and simplifying the shaping infrastructure in the kernel. This work was presented at Netdev 0x19 in Zagreb [4]. There we presented how TC scheduling is enforced in mlx5 hardware, which led to discussions on the mailing list. A summary of how things work: Classification means labeling a packet with a traffic class based on the packet's DSCP or VLAN PCP field, then treating packets with different traffic classes differently during transmit processing. In a virtualized setup, VFs are untrusted and do not control classification or shaping. Classification is done by the hardware using a prio-to-TC mapping set by the hypervisor. VFs only select which send queue to use and are expected to respect the classification logic by sending each traffic class on its dedicated queue. As stated in the net-shapers plan [3], each transmit queue should carry only a single traffic class. Mixing classes in a single queue can lead to HOL blocking. In the mlx5 implementation, if the queue used does not match the classified traffic class, the hardware moves the queue to the correct TC scheduler. This movement is not a reclassification; it’s a necessary enforcement step to ensure traffic class isolation is maintained. Extend devlink-rate API to support rate management on TCs: - devlink: Extend the devlink rate API to support traffic class bandwidth management Introduce a no-op implementation: - net/mlx5: Add no-op implementation for setting tc-bw on rate objects Add support for enabling and disabling TC QoS on vports and nodes: - net/mlx5: Add support for setting tc-bw on nodes - net/mlx5: Add traffic class scheduling support for vport QoS Support for setting tc-bw on rate objects: - net/mlx5: Manage TC arbiter nodes and implement full support for tc-bw [1] https://lore.kernel.org/netdev/20241204220931.254964-1-tariqt@nvidia.com/ [2] https://lore.kernel.org/netdev/67df1a562614b553dcab043f347a0d7c5393ff83.cam… [3] https://lore.kernel.org/netdev/d9831d0c940a7b77419abe7c7330e822bbfd1cfb.cam… [4] https://netdevconf.info/0x19/sessions/talk/optimizing-bandwidth-allocation-… Carolina Jubran (8): netlink: introduce type-checking attribute iteration for nlmsg devlink: Extend devlink rate API with traffic classes bandwidth management selftest: netdevsim: Add devlink rate tc-bw test net/mlx5: Add no-op implementation for setting tc-bw on rate objects net/mlx5: Add support for setting tc-bw on nodes net/mlx5: Add traffic class scheduling support for vport QoS net/mlx5: Manage TC arbiter nodes and implement full support for tc-bw selftests: drv-net: Add test for devlink-rate traffic class bandwidth distribution Documentation/netlink/specs/devlink.yaml | 32 +- .../networking/devlink/devlink-port.rst | 8 + .../net/ethernet/mellanox/mlx5/core/devlink.c | 2 + .../net/ethernet/mellanox/mlx5/core/esw/qos.c | 1037 ++++++++++++++++- .../net/ethernet/mellanox/mlx5/core/esw/qos.h | 8 + .../net/ethernet/mellanox/mlx5/core/eswitch.h | 14 +- drivers/net/netdevsim/dev.c | 43 + drivers/net/netdevsim/netdevsim.h | 1 + drivers/net/vxlan/vxlan_vnifilter.c | 13 +- fs/nfsd/nfsctl.c | 36 +- include/net/devlink.h | 8 + include/net/netlink.h | 14 + include/uapi/linux/devlink.h | 9 + net/devlink/netlink_gen.c | 15 +- net/devlink/netlink_gen.h | 1 + net/devlink/rate.c | 127 ++ .../drivers/net/hw/devlink_rate_tc_bw.py | 466 ++++++++ .../drivers/net/hw/lib/py/__init__.py | 2 +- .../selftests/drivers/net/lib/py/__init__.py | 2 +- .../drivers/net/netdevsim/devlink.sh | 53 + .../testing/selftests/net/lib/py/__init__.py | 2 +- tools/testing/selftests/net/lib/py/ynl.py | 5 + 22 files changed, 1825 insertions(+), 73 deletions(-) create mode 100755 tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py base-commit: 20a0c20f82acf46d5731a11743e7c7ac4de25db8 -- 2.34.1

5 months, 2 weeks

2
9
0 0

[PATCH] selftests/futex: Add futex_numa to .gitignore

by Terry Tritton

futex_numa was never added to the .gitignore file. Add it. Signed-off-by: Terry Tritton <terry.tritton(a)linaro.org> --- tools/testing/selftests/futex/functional/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore index 7b24ae89594a..776ad658f75e 100644 --- a/tools/testing/selftests/futex/functional/.gitignore +++ b/tools/testing/selftests/futex/functional/.gitignore @@ -11,3 +11,4 @@ futex_wait_timeout futex_wait_uninitialized_heap futex_wait_wouldblock futex_waitv +futex_numa -- 2.39.5

5 months, 2 weeks

2
1
0 0

[PATCH] selftests/kexec: fix test_kexec_jump build and ignore generated binary

by Moon Hee Lee

The test_kexec_jump program builds correctly when invoked from the top-level selftests/Makefile, which explicitly sets the OUTPUT variable. However, building directly in tools/testing/selftests/kexec fails with: make: *** No rule to make target '/test_kexec_jump', needed by 'test_kexec_jump.sh'. Stop. This failure occurs because the Makefile rule relies on $(OUTPUT), which is undefined in direct builds. Fix this by listing test_kexec_jump in TEST_GEN_PROGS, the standard way to declare generated test binaries in the kselftest framework. This ensures the binary is built regardless of invocation context and properly removed by make clean. Also add the binary to .gitignore to avoid tracking it in version control. Signed-off-by: Moon Hee Lee <moonhee.lee.ca(a)gmail.com> --- tools/testing/selftests/kexec/.gitignore | 2 ++ tools/testing/selftests/kexec/Makefile | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/kexec/.gitignore diff --git a/tools/testing/selftests/kexec/.gitignore b/tools/testing/selftests/kexec/.gitignore new file mode 100644 index 000000000000..5f3d9e089ae8 --- /dev/null +++ b/tools/testing/selftests/kexec/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +test_kexec_jump diff --git a/tools/testing/selftests/kexec/Makefile b/tools/testing/selftests/kexec/Makefile index e3000ccb9a5d..874cfdd3b75b 100644 --- a/tools/testing/selftests/kexec/Makefile +++ b/tools/testing/selftests/kexec/Makefile @@ -12,7 +12,7 @@ include ../../../scripts/Makefile.arch ifeq ($(IS_64_BIT)$(ARCH_PROCESSED),1x86) TEST_PROGS += test_kexec_jump.sh -test_kexec_jump.sh: $(OUTPUT)/test_kexec_jump +TEST_GEN_PROGS := test_kexec_jump endif include ../lib.mk -- 2.43.0

5 months, 2 weeks

3
2
0 0

[kvm-unit-tests PATCH v3 1/2] riscv: Add RV_INSN_LEN to processor.h

by Jesse Taube

When handeling traps and faults it is offten necessary to know the size of the instruction at epc. Add RV_INSN_LEN to calculate the instruction size. Signed-off-by: Jesse Taube <jesse(a)rivosinc.com> --- lib/riscv/asm/processor.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/riscv/asm/processor.h b/lib/riscv/asm/processor.h index 40104272..631ce226 100644 --- a/lib/riscv/asm/processor.h +++ b/lib/riscv/asm/processor.h @@ -7,6 +7,8 @@ #define EXCEPTION_CAUSE_MAX 24 #define INTERRUPT_CAUSE_MAX 16 +#define RV_INSN_LEN(insn) ((((insn) & 0x3) < 0x3) ? 2 : 4) + typedef void (*exception_fn)(struct pt_regs *); struct thread_info { -- 2.43.0

5 months, 2 weeks

2
4
0 0

[kvm-unit-tests PATCH v2] riscv: Allow SBI_CONSOLE with no uart in device tree

by Jesse Taube

When CONFIG_SBI_CONSOLE is enabled and there is no uart defined in the device tree kvm-unit-tests fails to start. Only abort when uart is not found in device tree if SBI_CONSOLE is false Signed-off-by: Jesse Taube <jesse(a)rivosinc.com> --- lib/riscv/io.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/lib/riscv/io.c b/lib/riscv/io.c index fb40adb7..e64e9253 100644 --- a/lib/riscv/io.c +++ b/lib/riscv/io.c @@ -30,7 +30,6 @@ static u32 uart0_reg_width = 1; static u32 uart0_reg_shift; static struct spinlock uart_lock; -#ifndef CONFIG_SBI_CONSOLE static u32 uart0_read(u32 num) { u32 offset = num << uart0_reg_shift; @@ -54,7 +53,6 @@ static void uart0_write(u32 num, u32 val) else writel(val, uart0_base + offset); } -#endif static void uart0_init_fdt(void) { @@ -73,11 +71,16 @@ static void uart0_init_fdt(void) break; } +#ifdef CONFIG_SBI_CONSOLE + uart0_base = NULL; + return; +#else if (ret) { printf("%s: Compatible uart not found in the device tree, aborting...\n", __func__); abort(); } +#endif } else { const fdt32_t *val; int len; @@ -116,8 +119,8 @@ void io_init(void) } } -#ifdef CONFIG_SBI_CONSOLE -void puts(const char *s) +void sbi_puts(const char *s); +void sbi_puts(const char *s) { phys_addr_t addr = virt_to_phys((void *)s); unsigned long hi = upper_32_bits(addr); @@ -127,9 +130,11 @@ void puts(const char *s) sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_WRITE, strlen(s), lo, hi, 0, 0, 0); spin_unlock(&uart_lock); } -#else -void puts(const char *s) + +void uart0_puts(const char *s); +void uart0_puts(const char *s) { + assert(uart0_base); spin_lock(&uart_lock); while (*s) { while (!(uart0_read(UART_LSR_OFFSET) & UART_LSR_THRE)) @@ -138,7 +143,15 @@ void puts(const char *s) } spin_unlock(&uart_lock); } + +void puts(const char *s) +{ +#ifdef CONFIG_SBI_CONSOLE + sbi_puts(s); +#else + uart0_puts(s); #endif +} /* * Defining halt to take 'code' as an argument guarantees that it will -- 2.43.0

5 months, 2 weeks

2
2
0 0

[kvm-unit-tests PATCH v2] riscv: lib: sbi_shutdown add pass/fail exit code.

by Jesse Taube

When exiting it may be useful for the sbi implementation to know if kvm-unit-tests passed or failed. Add exit code to sbi_shutdown, and use it in exit() to pass success/failure (0/1) to sbi. Signed-off-by: Jesse Taube <jesse(a)rivosinc.com> --- lib/riscv/asm/sbi.h | 2 +- lib/riscv/io.c | 2 +- lib/riscv/sbi.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/riscv/asm/sbi.h b/lib/riscv/asm/sbi.h index a5738a5c..de11c109 100644 --- a/lib/riscv/asm/sbi.h +++ b/lib/riscv/asm/sbi.h @@ -250,7 +250,7 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, unsigned long arg3, unsigned long arg4, unsigned long arg5); -void sbi_shutdown(void); +void sbi_shutdown(unsigned int code); struct sbiret sbi_hart_start(unsigned long hartid, unsigned long entry, unsigned long sp); struct sbiret sbi_hart_stop(void); struct sbiret sbi_hart_get_status(unsigned long hartid); diff --git a/lib/riscv/io.c b/lib/riscv/io.c index fb40adb7..0bde25d4 100644 --- a/lib/riscv/io.c +++ b/lib/riscv/io.c @@ -150,7 +150,7 @@ void halt(int code); void exit(int code) { printf("\nEXIT: STATUS=%d\n", ((code) << 1) | 1); - sbi_shutdown(); + sbi_shutdown(!!code); halt(code); __builtin_unreachable(); } diff --git a/lib/riscv/sbi.c b/lib/riscv/sbi.c index 2959378f..9dd11e9d 100644 --- a/lib/riscv/sbi.c +++ b/lib/riscv/sbi.c @@ -107,9 +107,9 @@ struct sbiret sbi_sse_inject(unsigned long event_id, unsigned long hart_id) return sbi_ecall(SBI_EXT_SSE, SBI_EXT_SSE_INJECT, event_id, hart_id, 0, 0, 0, 0); } -void sbi_shutdown(void) +void sbi_shutdown(unsigned int code) { - sbi_ecall(SBI_EXT_SRST, 0, 0, 0, 0, 0, 0, 0); + sbi_ecall(SBI_EXT_SRST, 0, 0, code, 0, 0, 0, 0); puts("SBI shutdown failed!\n"); } -- 2.43.0

5 months, 2 weeks

2
4
0 0

[kvm-unit-tests PATCH v8] riscv: sbi: Add SBI Debug Triggers Extension tests

by Jesse Taube

Add tests for the DBTR SBI extension. Signed-off-by: Jesse Taube <jesse(a)rivosinc.com> Reviewed-by: Charlie Jenkins <charlie(a)rivosinc.com> Tested-by: Charlie Jenkins <charlie(a)rivosinc.com> --- V1 -> V2: - Call report_prefix_pop before returning - Disable compressed instructions in exec_call, update related comment - Remove extra "| 1" in dbtr_test_load - Remove extra newlines - Remove extra tabs in check_exec - Remove typedefs from enums - Return when dbtr_install_trigger fails - s/avalible/available/g - s/unistall/uninstall/g V2 -> V3: - Change SBI_DBTR_SHMEM_INVALID_ADDR to -1UL - Move all dbtr functions to sbi-dbtr.c - Move INSN_LEN to processor.h - Update include list - Use C-style comments V3 -> V4: - Include libcflat.h - Remove #define SBI_DBTR_SHMEM_INVALID_ADDR V4 -> V5: - Sort includes - Add kfail for update triggers V5 -> V6: - Add assert in gen_tdata1 - Add prefix to dbtr_test_type - Add TRIG_STATE_DMODE - Add TRIG_STATE_RESERVED - Align function paramaters with opening parenthesis - Change OpenSBI < v1.7 to < v1.5 - Constantly use spaces in prefix rather than _ - Export split_phys_addr - Fix MCONTROL_U and MCONTROL_M mix up - Fix swapped VU and VS - Move /* to own line - Print type in dbtr_test_type - Remove _BIT suffix from macros - Remove duplicate MODE_S - Remove spaces before include - Rename tdata1,2 to trigger and control in dbtr_install_trigger - Report skip in dbtr_test_multiple - Report variables in info not pass or fail - s/save/store/g - sbi_debug_set_shmem use split_phys_addr - Use if (!report(... in dbtr_test_disable_enable V6 -> V7: - Alphabetize Makefile - Only print read info on failure - Remove return after assert - Remove unnecessary OpenSBI version check - Rename error to exit_test in check_dbtr - Use prefix in dbtr_test_num_triggers and dbtr_test_type V7 -> V8: - Add mcontrol_size mcontrol6_size - Cast McontrolType to int for assert - Set trigger size to 32BIT using mcontrol_size Debug triggers allow traps on different access sizes, which was set to any. QEMU was triggering on a 32BIT access next to the trigger address causing an erroneous trap. Set the size to 32BITs to avoid this. --- lib/riscv/asm/sbi.h | 1 + riscv/Makefile | 1 + riscv/sbi-dbtr.c | 867 ++++++++++++++++++++++++++++++++++++++++++++ riscv/sbi-tests.h | 2 + riscv/sbi.c | 3 +- 5 files changed, 873 insertions(+), 1 deletion(-) create mode 100644 riscv/sbi-dbtr.c diff --git a/lib/riscv/asm/sbi.h b/lib/riscv/asm/sbi.h index a5738a5c..78fd6e2a 100644 --- a/lib/riscv/asm/sbi.h +++ b/lib/riscv/asm/sbi.h @@ -51,6 +51,7 @@ enum sbi_ext_id { SBI_EXT_SUSP = 0x53555350, SBI_EXT_FWFT = 0x46574654, SBI_EXT_SSE = 0x535345, + SBI_EXT_DBTR = 0x44425452, }; enum sbi_ext_base_fid { diff --git a/riscv/Makefile b/riscv/Makefile index 11e68eae..9309ac12 100644 --- a/riscv/Makefile +++ b/riscv/Makefile @@ -18,6 +18,7 @@ tests += $(TEST_DIR)/sieve.$(exe) all: $(tests) $(TEST_DIR)/sbi-deps += $(TEST_DIR)/sbi-asm.o +$(TEST_DIR)/sbi-deps += $(TEST_DIR)/sbi-dbtr.o $(TEST_DIR)/sbi-deps += $(TEST_DIR)/sbi-fwft.o $(TEST_DIR)/sbi-deps += $(TEST_DIR)/sbi-sse.o diff --git a/riscv/sbi-dbtr.c b/riscv/sbi-dbtr.c new file mode 100644 index 00000000..13f64015 --- /dev/null +++ b/riscv/sbi-dbtr.c @@ -0,0 +1,867 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * SBI DBTR testsuite + * + * Copyright (C) 2025, Rivos Inc., Jesse Taube <jesse(a)rivosinc.com> + */ + +#include <libcflat.h> +#include <bitops.h> + +#include <asm/io.h> +#include <asm/processor.h> + +#include "sbi-tests.h" + +#define RV_MAX_TRIGGERS 32 + +#define SBI_DBTR_TRIG_STATE_MAPPED BIT(0) +#define SBI_DBTR_TRIG_STATE_U BIT(1) +#define SBI_DBTR_TRIG_STATE_S BIT(2) +#define SBI_DBTR_TRIG_STATE_VU BIT(3) +#define SBI_DBTR_TRIG_STATE_VS BIT(4) +#define SBI_DBTR_TRIG_STATE_HAVE_HW_TRIG BIT(5) +#define SBI_DBTR_TRIG_STATE_RESERVED GENMASK(7, 6) + +#define SBI_DBTR_TRIG_STATE_HW_TRIG_IDX_SHIFT 8 +#define SBI_DBTR_TRIG_STATE_HW_TRIG_IDX(trig_state) (trig_state >> SBI_DBTR_TRIG_STATE_HW_TRIG_IDX_SHIFT) + +#define SBI_DBTR_TDATA1_TYPE_SHIFT (__riscv_xlen - 4) +#define SBI_DBTR_TDATA1_DMODE BIT_UL(__riscv_xlen - 5) + +#define SBI_DBTR_TDATA1_MCONTROL6_LOAD BIT(0) +#define SBI_DBTR_TDATA1_MCONTROL6_STORE BIT(1) +#define SBI_DBTR_TDATA1_MCONTROL6_EXECUTE BIT(2) +#define SBI_DBTR_TDATA1_MCONTROL6_U BIT(3) +#define SBI_DBTR_TDATA1_MCONTROL6_S BIT(4) +#define SBI_DBTR_TDATA1_MCONTROL6_M BIT(6) +#define SBI_DBTR_TDATA1_MCONTROL6_SIZE_SHIFT 16 +#define SBI_DBTR_TDATA1_MCONTROL6_SIZE_MASK 0x7 +#define SBI_DBTR_TDATA1_MCONTROL6_SELECT BIT(21) +#define SBI_DBTR_TDATA1_MCONTROL6_VU BIT(23) +#define SBI_DBTR_TDATA1_MCONTROL6_VS BIT(24) + +#define SBI_DBTR_TDATA1_MCONTROL_LOAD BIT(0) +#define SBI_DBTR_TDATA1_MCONTROL_STORE BIT(1) +#define SBI_DBTR_TDATA1_MCONTROL_EXECUTE BIT(2) +#define SBI_DBTR_TDATA1_MCONTROL_U BIT(3) +#define SBI_DBTR_TDATA1_MCONTROL_S BIT(4) +#define SBI_DBTR_TDATA1_MCONTROL_M BIT(6) +#define SBI_DBTR_TDATA1_MCONTROL_SIZELO_SHIFT 16 +#define SBI_DBTR_TDATA1_MCONTROL_SIZELO_MASK 0x3 +#define SBI_DBTR_TDATA1_MCONTROL_SELECT BIT(19) +#define SBI_DBTR_TDATA1_MCONTROL_SIZEHI_SHIFT 21 +#define SBI_DBTR_TDATA1_MCONTROL_SIZEHI_MASK 0x3 + +enum McontrolType { + SBI_DBTR_TDATA1_TYPE_NONE = (0UL << SBI_DBTR_TDATA1_TYPE_SHIFT), + SBI_DBTR_TDATA1_TYPE_LEGACY = (1UL << SBI_DBTR_TDATA1_TYPE_SHIFT), + SBI_DBTR_TDATA1_TYPE_MCONTROL = (2UL << SBI_DBTR_TDATA1_TYPE_SHIFT), + SBI_DBTR_TDATA1_TYPE_ICOUNT = (3UL << SBI_DBTR_TDATA1_TYPE_SHIFT), + SBI_DBTR_TDATA1_TYPE_ITRIGGER = (4UL << SBI_DBTR_TDATA1_TYPE_SHIFT), + SBI_DBTR_TDATA1_TYPE_ETRIGGER = (5UL << SBI_DBTR_TDATA1_TYPE_SHIFT), + SBI_DBTR_TDATA1_TYPE_MCONTROL6 = (6UL << SBI_DBTR_TDATA1_TYPE_SHIFT), + SBI_DBTR_TDATA1_TYPE_TMEXTTRIGGER = (7UL << SBI_DBTR_TDATA1_TYPE_SHIFT), + SBI_DBTR_TDATA1_TYPE_RESERVED0 = (8UL << SBI_DBTR_TDATA1_TYPE_SHIFT), + SBI_DBTR_TDATA1_TYPE_RESERVED1 = (9UL << SBI_DBTR_TDATA1_TYPE_SHIFT), + SBI_DBTR_TDATA1_TYPE_RESERVED2 = (10UL << SBI_DBTR_TDATA1_TYPE_SHIFT), + SBI_DBTR_TDATA1_TYPE_RESERVED3 = (11UL << SBI_DBTR_TDATA1_TYPE_SHIFT), + SBI_DBTR_TDATA1_TYPE_CUSTOM0 = (12UL << SBI_DBTR_TDATA1_TYPE_SHIFT), + SBI_DBTR_TDATA1_TYPE_CUSTOM1 = (13UL << SBI_DBTR_TDATA1_TYPE_SHIFT), + SBI_DBTR_TDATA1_TYPE_CUSTOM2 = (14UL << SBI_DBTR_TDATA1_TYPE_SHIFT), + SBI_DBTR_TDATA1_TYPE_DISABLED = (15UL << SBI_DBTR_TDATA1_TYPE_SHIFT), +}; + +enum Tdata1Size { + SIZE_ANY = 0, + SIZE_8BIT, + SIZE_16BIT, + SIZE_32BIT, + SIZE_48BIT, + SIZE_64BIT, +}; + +enum Tdata1Value { + VALUE_NONE = 0, + VALUE_LOAD = BIT(0), + VALUE_STORE = BIT(1), + VALUE_EXECUTE = BIT(2), +}; + +enum Tdata1Mode { + MODE_NONE = 0, + MODE_M = BIT(0), + MODE_U = BIT(1), + MODE_S = BIT(2), + MODE_VU = BIT(3), + MODE_VS = BIT(4), +}; + +enum sbi_ext_dbtr_fid { + SBI_EXT_DBTR_NUM_TRIGGERS = 0, + SBI_EXT_DBTR_SETUP_SHMEM, + SBI_EXT_DBTR_TRIGGER_READ, + SBI_EXT_DBTR_TRIGGER_INSTALL, + SBI_EXT_DBTR_TRIGGER_UPDATE, + SBI_EXT_DBTR_TRIGGER_UNINSTALL, + SBI_EXT_DBTR_TRIGGER_ENABLE, + SBI_EXT_DBTR_TRIGGER_DISABLE, +}; + +struct sbi_dbtr_data_msg { + unsigned long tstate; + unsigned long tdata1; + unsigned long tdata2; + unsigned long tdata3; +}; + +struct sbi_dbtr_id_msg { + unsigned long idx; +}; + +/* SBI shared mem messages layout */ +struct sbi_dbtr_shmem_entry { + union { + struct sbi_dbtr_data_msg data; + struct sbi_dbtr_id_msg id; + }; +}; + +static bool dbtr_handled; + +/* Expected to be leaf function as not to disrupt frame-pointer */ +static __attribute__((naked)) void exec_call(void) +{ + /* skip over nop when triggered instead of ret. */ + asm volatile (".option push\n" + ".option arch, -c\n" + "nop\n" + "ret\n" + ".option pop\n"); +} + +static void dbtr_exception_handler(struct pt_regs *regs) +{ + dbtr_handled = true; + + /* Reading *epc may cause a fault, skip over nop */ + if ((void *)regs->epc == exec_call) { + regs->epc += 4; + return; + } + + /* WARNING: Skips over the trapped intruction */ + regs->epc += RV_INSN_LEN(readw((void *)regs->epc)); +} + +static bool do_store(void *tdata2) +{ + bool ret; + + writel(0, tdata2); + + ret = dbtr_handled; + dbtr_handled = false; + + return ret; +} + +static bool do_load(void *tdata2) +{ + bool ret; + + readl(tdata2); + + ret = dbtr_handled; + dbtr_handled = false; + + return ret; +} + +static bool do_exec(void) +{ + bool ret; + + exec_call(); + + ret = dbtr_handled; + dbtr_handled = false; + + return ret; +} + +static unsigned long mcontrol_size(enum Tdata1Size mode) +{ + unsigned long ret = 0; + + ret |= ((mode >> 2) & SBI_DBTR_TDATA1_MCONTROL_SIZEHI_MASK) + << SBI_DBTR_TDATA1_MCONTROL_SIZEHI_SHIFT; + ret |= (mode & SBI_DBTR_TDATA1_MCONTROL_SIZELO_MASK) + << SBI_DBTR_TDATA1_MCONTROL_SIZELO_SHIFT; + + return ret; +} + +static unsigned long mcontrol6_size(enum Tdata1Size mode) +{ + return (mode & SBI_DBTR_TDATA1_MCONTROL6_SIZE_MASK) + << SBI_DBTR_TDATA1_MCONTROL6_SIZE_SHIFT; +} + +static unsigned long gen_tdata1_mcontrol(enum Tdata1Mode mode, enum Tdata1Value value) +{ + unsigned long tdata1 = SBI_DBTR_TDATA1_TYPE_MCONTROL; + + if (value & VALUE_LOAD) + tdata1 |= SBI_DBTR_TDATA1_MCONTROL_LOAD; + + if (value & VALUE_STORE) + tdata1 |= SBI_DBTR_TDATA1_MCONTROL_STORE; + + if (value & VALUE_EXECUTE) + tdata1 |= SBI_DBTR_TDATA1_MCONTROL_EXECUTE; + + if (mode & MODE_M) + tdata1 |= SBI_DBTR_TDATA1_MCONTROL_M; + + if (mode & MODE_U) + tdata1 |= SBI_DBTR_TDATA1_MCONTROL_U; + + if (mode & MODE_S) + tdata1 |= SBI_DBTR_TDATA1_MCONTROL_S; + + return tdata1; +} + +static unsigned long gen_tdata1_mcontrol6(enum Tdata1Mode mode, enum Tdata1Value value) +{ + unsigned long tdata1 = SBI_DBTR_TDATA1_TYPE_MCONTROL6; + + if (value & VALUE_LOAD) + tdata1 |= SBI_DBTR_TDATA1_MCONTROL6_LOAD; + + if (value & VALUE_STORE) + tdata1 |= SBI_DBTR_TDATA1_MCONTROL6_STORE; + + if (value & VALUE_EXECUTE) + tdata1 |= SBI_DBTR_TDATA1_MCONTROL6_EXECUTE; + + if (mode & MODE_M) + tdata1 |= SBI_DBTR_TDATA1_MCONTROL6_M; + + if (mode & MODE_U) + tdata1 |= SBI_DBTR_TDATA1_MCONTROL6_U; + + if (mode & MODE_S) + tdata1 |= SBI_DBTR_TDATA1_MCONTROL6_S; + + if (mode & MODE_VU) + tdata1 |= SBI_DBTR_TDATA1_MCONTROL6_VU; + + if (mode & MODE_VS) + tdata1 |= SBI_DBTR_TDATA1_MCONTROL6_VS; + + return tdata1; +} + +static unsigned long gen_tdata1(enum McontrolType type, enum Tdata1Value value, enum Tdata1Mode mode) +{ + switch (type) { + case SBI_DBTR_TDATA1_TYPE_MCONTROL: + return gen_tdata1_mcontrol(mode, value) | mcontrol_size(SIZE_32BIT); + case SBI_DBTR_TDATA1_TYPE_MCONTROL6: + return gen_tdata1_mcontrol6(mode, value) | mcontrol6_size(SIZE_32BIT); + default: + assert_msg(false, "Invalid mcontrol type: %u", (int)type); + } +} + +static struct sbiret sbi_debug_num_triggers(unsigned long trig_tdata1) +{ + return sbi_ecall(SBI_EXT_DBTR, SBI_EXT_DBTR_NUM_TRIGGERS, trig_tdata1, 0, 0, 0, 0, 0); +} + +static struct sbiret sbi_debug_set_shmem_raw(unsigned long shmem_phys_lo, + unsigned long shmem_phys_hi, + unsigned long flags) +{ + return sbi_ecall(SBI_EXT_DBTR, SBI_EXT_DBTR_SETUP_SHMEM, shmem_phys_lo, + shmem_phys_hi, flags, 0, 0, 0); +} + +static struct sbiret sbi_debug_set_shmem(void *shmem) +{ + unsigned long base_addr_lo, base_addr_hi; + + split_phys_addr(virt_to_phys(shmem), &base_addr_hi, &base_addr_lo); + return sbi_debug_set_shmem_raw(base_addr_lo, base_addr_hi, 0); +} + +static struct sbiret sbi_debug_read_triggers(unsigned long trig_idx_base, + unsigned long trig_count) +{ + return sbi_ecall(SBI_EXT_DBTR, SBI_EXT_DBTR_TRIGGER_READ, trig_idx_base, + trig_count, 0, 0, 0, 0); +} + +static struct sbiret sbi_debug_install_triggers(unsigned long trig_count) +{ + return sbi_ecall(SBI_EXT_DBTR, SBI_EXT_DBTR_TRIGGER_INSTALL, trig_count, 0, 0, 0, 0, 0); +} + +static struct sbiret sbi_debug_update_triggers(unsigned long trig_count) +{ + return sbi_ecall(SBI_EXT_DBTR, SBI_EXT_DBTR_TRIGGER_UPDATE, trig_count, 0, 0, 0, 0, 0); +} + +static struct sbiret sbi_debug_uninstall_triggers(unsigned long trig_idx_base, + unsigned long trig_idx_mask) +{ + return sbi_ecall(SBI_EXT_DBTR, SBI_EXT_DBTR_TRIGGER_UNINSTALL, trig_idx_base, + trig_idx_mask, 0, 0, 0, 0); +} + +static struct sbiret sbi_debug_enable_triggers(unsigned long trig_idx_base, + unsigned long trig_idx_mask) +{ + return sbi_ecall(SBI_EXT_DBTR, SBI_EXT_DBTR_TRIGGER_ENABLE, trig_idx_base, + trig_idx_mask, 0, 0, 0, 0); +} + +static struct sbiret sbi_debug_disable_triggers(unsigned long trig_idx_base, + unsigned long trig_idx_mask) +{ + return sbi_ecall(SBI_EXT_DBTR, SBI_EXT_DBTR_TRIGGER_DISABLE, trig_idx_base, + trig_idx_mask, 0, 0, 0, 0); +} + +static bool dbtr_install_trigger(struct sbi_dbtr_shmem_entry *shmem, void *trigger, + unsigned long control) +{ + struct sbiret sbi_ret; + bool ret; + + shmem->data.tdata1 = control; + shmem->data.tdata2 = (unsigned long)trigger; + + sbi_ret = sbi_debug_install_triggers(1); + ret = sbiret_report_error(&sbi_ret, SBI_SUCCESS, "sbi_debug_install_triggers"); + if (ret) + install_exception_handler(EXC_BREAKPOINT, dbtr_exception_handler); + + return ret; +} + +static bool dbtr_uninstall_trigger(void) +{ + struct sbiret ret; + + install_exception_handler(EXC_BREAKPOINT, NULL); + + ret = sbi_debug_uninstall_triggers(0, 1); + return sbiret_report_error(&ret, SBI_SUCCESS, "sbi_debug_uninstall_triggers"); +} + +static unsigned long dbtr_test_num_triggers(void) +{ + struct sbiret ret; + unsigned long tdata1 = 0; + /* sbi_debug_num_triggers will return trig_max in sbiret.value when trig_tdata1 == 0 */ + + report_prefix_push("available triggers"); + + /* should be at least one trigger. */ + ret = sbi_debug_num_triggers(tdata1); + sbiret_report_error(&ret, SBI_SUCCESS, "sbi_debug_num_triggers"); + + if (ret.value == 0) { + report_fail("Returned 0 triggers available"); + } else { + report_pass("Returned triggers available"); + report_info("Returned %lu triggers available", ret.value); + } + + report_prefix_pop(); + return ret.value; +} + +static enum McontrolType dbtr_test_type(unsigned long *num_trig) +{ + struct sbiret ret; + unsigned long tdata1 = SBI_DBTR_TDATA1_TYPE_MCONTROL6; + + report_prefix_push("test type"); + report_prefix_push("sbi_debug_num_triggers"); + + ret = sbi_debug_num_triggers(tdata1); + sbiret_report_error(&ret, SBI_SUCCESS, "mcontrol6"); + *num_trig = ret.value; + if (ret.value > 0) { + report_pass("Returned mcontrol6 triggers available"); + report_info("Returned %lu mcontrol6 triggers available", + ret.value); + report_prefix_popn(2); + return tdata1; + } + + tdata1 = SBI_DBTR_TDATA1_TYPE_MCONTROL; + + ret = sbi_debug_num_triggers(tdata1); + sbiret_report_error(&ret, SBI_SUCCESS, "mcontrol"); + *num_trig = ret.value; + if (ret.value > 0) { + report_pass("Returned mcontrol triggers available"); + report_info("Returned %lu mcontrol triggers available", + ret.value); + report_prefix_popn(2); + return tdata1; + } + + report_fail("Returned 0 mcontrol(6) triggers available"); + report_prefix_popn(2); + + return SBI_DBTR_TDATA1_TYPE_NONE; +} + +static struct sbiret dbtr_test_store_install_uninstall(struct sbi_dbtr_shmem_entry *shmem, + enum McontrolType type) +{ + static unsigned long test; + struct sbiret ret; + + report_prefix_push("store trigger"); + + shmem->data.tdata1 = gen_tdata1(type, VALUE_STORE, MODE_S); + shmem->data.tdata2 = (unsigned long)&test; + + ret = sbi_debug_install_triggers(1); + if (!sbiret_report_error(&ret, SBI_SUCCESS, "sbi_debug_install_triggers")) { + report_prefix_pop(); + return ret; + } + + install_exception_handler(EXC_BREAKPOINT, dbtr_exception_handler); + + report(do_store(&test), "triggered"); + + if (do_load(&test)) + report_fail("triggered by load"); + + ret = sbi_debug_uninstall_triggers(0, 1); + sbiret_report_error(&ret, SBI_SUCCESS, "sbi_debug_uninstall_triggers"); + + if (do_store(&test)) + report_fail("triggered after uninstall"); + + install_exception_handler(EXC_BREAKPOINT, NULL); + report_prefix_pop(); + + return ret; +} + +static void dbtr_test_update(struct sbi_dbtr_shmem_entry *shmem, enum McontrolType type) +{ + static unsigned long test; + struct sbiret ret; + bool kfail; + + report_prefix_push("update trigger"); + + if (!dbtr_install_trigger(shmem, NULL, gen_tdata1(type, VALUE_NONE, MODE_NONE))) { + report_prefix_pop(); + return; + } + + shmem->id.idx = 0; + shmem->data.tdata1 = gen_tdata1(type, VALUE_STORE, MODE_S); + shmem->data.tdata2 = (unsigned long)&test; + + ret = sbi_debug_update_triggers(1); + sbiret_report_error(&ret, SBI_SUCCESS, "sbi_debug_update_triggers"); + + /* + * Known broken update_triggers. + * https://lore.kernel.org/opensbi/aDdp1UeUh7GugeHp@ghost/T/#t + */ + kfail = __sbi_get_imp_id() == SBI_IMPL_OPENSBI && + __sbi_get_imp_version() < sbi_impl_opensbi_mk_version(1, 7); + report_kfail(kfail, do_store(&test), "triggered"); + + dbtr_uninstall_trigger(); + report_prefix_pop(); +} + +static void dbtr_test_load(struct sbi_dbtr_shmem_entry *shmem, enum McontrolType type) +{ + static unsigned long test; + + report_prefix_push("load trigger"); + if (!dbtr_install_trigger(shmem, &test, gen_tdata1(type, VALUE_LOAD, MODE_S))) { + report_prefix_pop(); + return; + } + + report(do_load(&test), "triggered"); + + if (do_store(&test)) + report_fail("triggered by store"); + + dbtr_uninstall_trigger(); + report_prefix_pop(); +} + +static void dbtr_test_disable_enable(struct sbi_dbtr_shmem_entry *shmem, enum McontrolType type) +{ + static unsigned long test; + struct sbiret ret; + + report_prefix_push("disable trigger"); + if (!dbtr_install_trigger(shmem, &test, gen_tdata1(type, VALUE_STORE, MODE_S))) { + report_prefix_pop(); + return; + } + + ret = sbi_debug_disable_triggers(0, 1); + sbiret_report_error(&ret, SBI_SUCCESS, "sbi_debug_disable_triggers"); + + if (!report(!do_store(&test), "should not trigger")) { + dbtr_uninstall_trigger(); + report_prefix_pop(); + report_skip("enable trigger: no disable"); + + return; + } + + report_prefix_pop(); + report_prefix_push("enable trigger"); + + ret = sbi_debug_enable_triggers(0, 1); + sbiret_report_error(&ret, SBI_SUCCESS, "sbi_debug_enable_triggers"); + + report(do_store(&test), "triggered"); + + dbtr_uninstall_trigger(); + report_prefix_pop(); +} + +static void dbtr_test_exec(struct sbi_dbtr_shmem_entry *shmem, enum McontrolType type) +{ + static unsigned long test; + + report_prefix_push("exec trigger"); + /* check if loads and stores trigger exec */ + if (!dbtr_install_trigger(shmem, &test, gen_tdata1(type, VALUE_EXECUTE, MODE_S))) { + report_prefix_pop(); + return; + } + + if (do_load(&test)) + report_fail("triggered by load"); + + if (do_store(&test)) + report_fail("triggered by store"); + + dbtr_uninstall_trigger(); + + /* Check if exec works */ + if (!dbtr_install_trigger(shmem, exec_call, gen_tdata1(type, VALUE_EXECUTE, MODE_S))) { + report_prefix_pop(); + return; + } + report(do_exec(), "triggered"); + + dbtr_uninstall_trigger(); + report_prefix_pop(); +} + +static void dbtr_test_read(struct sbi_dbtr_shmem_entry *shmem, enum McontrolType type) +{ + const unsigned long tstatus_expected = SBI_DBTR_TRIG_STATE_S | SBI_DBTR_TRIG_STATE_MAPPED; + const unsigned long tdata1 = gen_tdata1(type, VALUE_STORE, MODE_S); + static unsigned long test; + struct sbiret ret; + + report_prefix_push("read trigger"); + if (!dbtr_install_trigger(shmem, &test, tdata1)) { + report_prefix_pop(); + return; + } + + ret = sbi_debug_read_triggers(0, 1); + sbiret_report_error(&ret, SBI_SUCCESS, "sbi_debug_read_triggers"); + + if (!report(shmem->data.tdata1 == tdata1, "tdata1 expected: 0x%016lx", tdata1)) + report_info("tdata1 found: 0x%016lx", shmem->data.tdata1); + if (!report(shmem->data.tdata2 == ((unsigned long)&test), "tdata2 expected: 0x%016lx", + (unsigned long)&test)) + report_info("tdata2 found: 0x%016lx", shmem->data.tdata2); + if (!report(shmem->data.tstate == tstatus_expected, "tstate expected: 0x%016lx", tstatus_expected)) + report_info("tstate found: 0x%016lx", shmem->data.tstate); + + dbtr_uninstall_trigger(); + report_prefix_pop(); +} + +static void check_exec(unsigned long base) +{ + struct sbiret ret; + + report(do_exec(), "exec triggered"); + + ret = sbi_debug_uninstall_triggers(base, 1); + sbiret_report_error(&ret, SBI_SUCCESS, "sbi_debug_uninstall_triggers"); +} + +static void dbtr_test_multiple(struct sbi_dbtr_shmem_entry *shmem, enum McontrolType type, + unsigned long num_trigs) +{ + static unsigned long test[2]; + struct sbiret ret; + bool have_three = num_trigs > 2; + + if (num_trigs < 2) { + report_skip("test multiple"); + return; + } + + report_prefix_push("test multiple"); + + if (!dbtr_install_trigger(shmem, &test[0], gen_tdata1(type, VALUE_STORE, MODE_S))) { + report_prefix_pop(); + return; + } + if (!dbtr_install_trigger(shmem, &test[1], gen_tdata1(type, VALUE_LOAD, MODE_S))) + goto error; + if (have_three && + !dbtr_install_trigger(shmem, exec_call, gen_tdata1(type, VALUE_EXECUTE, MODE_S))) { + ret = sbi_debug_uninstall_triggers(1, 1); + sbiret_report_error(&ret, SBI_SUCCESS, "sbi_debug_uninstall_triggers"); + goto error; + } + + report(do_store(&test[0]), "store triggered"); + + if (do_load(&test[0])) + report_fail("store triggered by load"); + + report(do_load(&test[1]), "load triggered"); + + if (do_store(&test[1])) + report_fail("load triggered by store"); + + if (have_three) + check_exec(2); + + ret = sbi_debug_uninstall_triggers(1, 1); + sbiret_report_error(&ret, SBI_SUCCESS, "sbi_debug_uninstall_triggers"); + + if (do_load(&test[1])) + report_fail("load triggered after uninstall"); + + report(do_store(&test[0]), "store triggered"); + + if (!have_three && + dbtr_install_trigger(shmem, exec_call, gen_tdata1(type, VALUE_EXECUTE, MODE_S))) + check_exec(1); + +error: + ret = sbi_debug_uninstall_triggers(0, 1); + sbiret_report_error(&ret, SBI_SUCCESS, "sbi_debug_uninstall_triggers"); + + install_exception_handler(EXC_BREAKPOINT, NULL); + report_prefix_pop(); +} + +static void dbtr_test_multiple_types(struct sbi_dbtr_shmem_entry *shmem, unsigned long type) +{ + static unsigned long test; + + report_prefix_push("test multiple types"); + + /* check if loads and stores trigger exec */ + if (!dbtr_install_trigger(shmem, &test, + gen_tdata1(type, VALUE_EXECUTE | VALUE_LOAD | VALUE_STORE, MODE_S))) { + report_prefix_pop(); + return; + } + + report(do_load(&test), "load triggered"); + + report(do_store(&test), "store triggered"); + + dbtr_uninstall_trigger(); + + /* Check if exec works */ + if (!dbtr_install_trigger(shmem, exec_call, + gen_tdata1(type, VALUE_EXECUTE | VALUE_LOAD | VALUE_STORE, MODE_S))) { + report_prefix_pop(); + return; + } + + report(do_exec(), "exec triggered"); + + dbtr_uninstall_trigger(); + report_prefix_pop(); +} + +static void dbtr_test_disable_uninstall(struct sbi_dbtr_shmem_entry *shmem, enum McontrolType type) +{ + static unsigned long test; + struct sbiret ret; + + report_prefix_push("disable uninstall"); + if (!dbtr_install_trigger(shmem, &test, gen_tdata1(type, VALUE_STORE, MODE_S))) { + report_prefix_pop(); + return; + } + + ret = sbi_debug_disable_triggers(0, 1); + sbiret_report_error(&ret, SBI_SUCCESS, "sbi_debug_disable_triggers"); + + dbtr_uninstall_trigger(); + + if (!dbtr_install_trigger(shmem, &test, gen_tdata1(type, VALUE_STORE, MODE_S))) { + report_prefix_pop(); + return; + } + + report(do_store(&test), "triggered"); + + dbtr_uninstall_trigger(); + report_prefix_pop(); +} + +static void dbtr_test_uninstall_enable(struct sbi_dbtr_shmem_entry *shmem, enum McontrolType type) +{ + static unsigned long test; + struct sbiret ret; + + report_prefix_push("uninstall enable"); + if (!dbtr_install_trigger(shmem, &test, gen_tdata1(type, VALUE_STORE, MODE_S))) { + report_prefix_pop(); + return; + } + dbtr_uninstall_trigger(); + + ret = sbi_debug_enable_triggers(0, 1); + sbiret_report_error(&ret, SBI_SUCCESS, "sbi_debug_enable_triggers"); + + install_exception_handler(EXC_BREAKPOINT, dbtr_exception_handler); + + report(!do_store(&test), "should not trigger"); + + install_exception_handler(EXC_BREAKPOINT, NULL); + report_prefix_pop(); +} + +static void dbtr_test_uninstall_update(struct sbi_dbtr_shmem_entry *shmem, enum McontrolType type) +{ + static unsigned long test; + struct sbiret ret; + bool kfail; + + report_prefix_push("uninstall update"); + if (!dbtr_install_trigger(shmem, NULL, gen_tdata1(type, VALUE_NONE, MODE_NONE))) { + report_prefix_pop(); + return; + } + + dbtr_uninstall_trigger(); + + shmem->id.idx = 0; + shmem->data.tdata1 = gen_tdata1(type, VALUE_STORE, MODE_S); + shmem->data.tdata2 = (unsigned long)&test; + + /* + * Known broken update_triggers. + * https://lore.kernel.org/opensbi/aDdp1UeUh7GugeHp@ghost/T/#t + */ + kfail = __sbi_get_imp_id() == SBI_IMPL_OPENSBI && + __sbi_get_imp_version() < sbi_impl_opensbi_mk_version(1, 7); + ret = sbi_debug_update_triggers(1); + sbiret_kfail_error(kfail, &ret, SBI_ERR_FAILURE, "sbi_debug_update_triggers"); + + install_exception_handler(EXC_BREAKPOINT, dbtr_exception_handler); + + report(!do_store(&test), "should not trigger"); + + install_exception_handler(EXC_BREAKPOINT, NULL); + report_prefix_pop(); +} + +static void dbtr_test_disable_read(struct sbi_dbtr_shmem_entry *shmem, enum McontrolType type) +{ + const unsigned long tstatus_expected = SBI_DBTR_TRIG_STATE_S | SBI_DBTR_TRIG_STATE_MAPPED; + const unsigned long tdata1 = gen_tdata1(type, VALUE_STORE, MODE_NONE); + static unsigned long test; + struct sbiret ret; + + report_prefix_push("disable read"); + if (!dbtr_install_trigger(shmem, &test, gen_tdata1(type, VALUE_STORE, MODE_S))) { + report_prefix_pop(); + return; + } + + ret = sbi_debug_disable_triggers(0, 1); + sbiret_report_error(&ret, SBI_SUCCESS, "sbi_debug_disable_triggers"); + + ret = sbi_debug_read_triggers(0, 1); + sbiret_report_error(&ret, SBI_SUCCESS, "sbi_debug_read_triggers"); + + if (!report(shmem->data.tdata1 == tdata1, "tdata1 expected: 0x%016lx", tdata1)) + report_info("tdata1 found: 0x%016lx", shmem->data.tdata1); + if (!report(shmem->data.tdata2 == ((unsigned long)&test), "tdata2 expected: 0x%016lx", + (unsigned long)&test)) + report_info("tdata2 found: 0x%016lx", shmem->data.tdata2); + if (!report(shmem->data.tstate == tstatus_expected, "tstate expected: 0x%016lx", tstatus_expected)) + report_info("tstate found: 0x%016lx", shmem->data.tstate); + + dbtr_uninstall_trigger(); + report_prefix_pop(); +} + +void check_dbtr(void) +{ + static struct sbi_dbtr_shmem_entry shmem[RV_MAX_TRIGGERS] = {}; + unsigned long num_trigs; + enum McontrolType trig_type; + struct sbiret ret; + + report_prefix_push("dbtr"); + + if (!sbi_probe(SBI_EXT_DBTR)) { + report_skip("extension not available"); + goto exit_test; + } + + num_trigs = dbtr_test_num_triggers(); + if (!num_trigs) + goto exit_test; + + trig_type = dbtr_test_type(&num_trigs); + if (trig_type == SBI_DBTR_TDATA1_TYPE_NONE) + goto exit_test; + + ret = sbi_debug_set_shmem(shmem); + sbiret_report_error(&ret, SBI_SUCCESS, "sbi_debug_set_shmem"); + + ret = dbtr_test_store_install_uninstall(&shmem[0], trig_type); + /* install or uninstall failed */ + if (ret.error != SBI_SUCCESS) + goto exit_test; + + dbtr_test_load(&shmem[0], trig_type); + dbtr_test_exec(&shmem[0], trig_type); + dbtr_test_read(&shmem[0], trig_type); + dbtr_test_disable_enable(&shmem[0], trig_type); + dbtr_test_update(&shmem[0], trig_type); + dbtr_test_multiple_types(&shmem[0], trig_type); + dbtr_test_multiple(shmem, trig_type, num_trigs); + dbtr_test_disable_uninstall(&shmem[0], trig_type); + dbtr_test_uninstall_enable(&shmem[0], trig_type); + dbtr_test_uninstall_update(&shmem[0], trig_type); + dbtr_test_disable_read(&shmem[0], trig_type); + +exit_test: + report_prefix_pop(); +} diff --git a/riscv/sbi-tests.h b/riscv/sbi-tests.h index d5c4ae70..c1ebf016 100644 --- a/riscv/sbi-tests.h +++ b/riscv/sbi-tests.h @@ -97,8 +97,10 @@ static inline bool env_enabled(const char *env) return s && (*s == '1' || *s == 'y' || *s == 'Y'); } +void split_phys_addr(phys_addr_t paddr, unsigned long *hi, unsigned long *lo); void sbi_bad_fid(int ext); void check_sse(void); +void check_dbtr(void); #endif /* __ASSEMBLER__ */ #endif /* _RISCV_SBI_TESTS_H_ */ diff --git a/riscv/sbi.c b/riscv/sbi.c index edb1a6be..3b8aadce 100644 --- a/riscv/sbi.c +++ b/riscv/sbi.c @@ -105,7 +105,7 @@ static int rand_online_cpu(prng_state *ps) return cpu; } -static void split_phys_addr(phys_addr_t paddr, unsigned long *hi, unsigned long *lo) +void split_phys_addr(phys_addr_t paddr, unsigned long *hi, unsigned long *lo) { *lo = (unsigned long)paddr; *hi = 0; @@ -1561,6 +1561,7 @@ int main(int argc, char **argv) check_susp(); check_sse(); check_fwft(); + check_dbtr(); return report_summary(); } -- 2.43.0

5 months, 2 weeks

2
2
0 0

[PATCH] selftests/mm: pagemap_scan ioctl: add PFN ZERO test cases

by Muhammad Usama Anjum

Add test cases to test the correctness of PFN ZERO flag of pagemap_scan ioctl. Test with normal pages backed memory and huge pages backed memory. Cc: David Hildenbrand <david(a)redhat.com> Signed-off-by: Muhammad Usama Anjum <usama.anjum(a)collabora.com> --- The bug has been fixed [1]. [1] https://lore.kernel.org/all/20250617143532.2375383-1-david@redhat.com --- tools/testing/selftests/mm/pagemap_ioctl.c | 57 +++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c index 57b4bba2b45f3..6138de0087edf 100644 --- a/tools/testing/selftests/mm/pagemap_ioctl.c +++ b/tools/testing/selftests/mm/pagemap_ioctl.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 + #define _GNU_SOURCE #include <stdio.h> #include <fcntl.h> @@ -1480,6 +1481,57 @@ static void transact_test(int page_size) extra_thread_faults); } +void zeropfn_tests(void) +{ + unsigned long long mem_size; + struct page_region vec; + int i, ret; + char *mem; + + /* Test with page backed memory */ + mem_size = 10 * page_size; + mem = mmap(NULL, mem_size, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0); + if (mem == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + + /* Touch each page to ensure it's mapped */ + for (i = 0; i < mem_size; i += page_size) + (void)((volatile char *)mem)[i]; + + ret = pagemap_ioctl(mem, mem_size, &vec, 1, 0, + (mem_size / page_size), PAGE_IS_PFNZERO, 0, 0, PAGE_IS_PFNZERO); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 1 && LEN(vec) == (mem_size / page_size), + "%s all pages must have PFNZERO set\n", __func__); + + munmap(mem, mem_size); + + /* Test with huge page */ + mem_size = 10 * hpage_size; + mem = memalign(hpage_size, mem_size); + if (!mem) + ksft_exit_fail_msg("error nomem\n"); + + ret = madvise(mem, mem_size, MADV_HUGEPAGE); + if (ret) + ksft_exit_fail_msg("madvise failed %d %s\n", errno, strerror(errno)); + + for (i = 0; i < mem_size; i += hpage_size) + (void)((volatile char *)mem)[i]; + + ret = pagemap_ioctl(mem, mem_size, &vec, 1, 0, + (mem_size / page_size), PAGE_IS_PFNZERO, 0, 0, PAGE_IS_PFNZERO); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 1 && LEN(vec) == (mem_size / page_size), + "%s all huge pages must have PFNZERO set\n", __func__); + + free(mem); +} + int main(int __attribute__((unused)) argc, char *argv[]) { int shmid, buf_size, fd, i, ret; @@ -1494,7 +1546,7 @@ int main(int __attribute__((unused)) argc, char *argv[]) if (init_uffd()) ksft_exit_pass(); - ksft_set_plan(115); + ksft_set_plan(117); page_size = getpagesize(); hpage_size = read_pmd_pagesize(); @@ -1669,6 +1721,9 @@ int main(int __attribute__((unused)) argc, char *argv[]) /* 16. Userfaultfd tests */ userfaultfd_tests(); + /* 17. ZEROPFN tests */ + zeropfn_tests(); + close(pagemap_fd); ksft_exit_pass(); } -- 2.43.0

5 months, 2 weeks

2
4
0 0

[PATCH v4 0/2] kunit: qemu_configs: Add MIPS configurations

by Thomas Weißschuh

Add basic support to run various MIPS variants via kunit_tool using the virtualized malta platform. Signed-off-by: Thomas Weißschuh <thomas.weissschuh(a)linutronix.de> --- Changes in v4: - Rebase on v6.16-rc1 - Pick up reviews from David - Clarify that GIC page is linked to vDSO - Link to v3: https://lore.kernel.org/r/20250415-kunit-mips-v3-0-4ec2461b5a7e@linutronix.… Changes in v3: - Also skip VDSO_RANDOMIZE_SIZE adjustment for kthreads - Link to v2: https://lore.kernel.org/r/20250414-kunit-mips-v2-0-4cf01e1a29e6@linutronix.… Changes in v2: - Fix usercopy kunit test by handling ABI-less tasks in stack_top() - Drop change to mm initialization. The broken test is not built by default anymore. - Link to v1: https://lore.kernel.org/r/20250212-kunit-mips-v1-0-eb49c9d76615@linutronix.… --- Thomas Weißschuh (2): MIPS: Don't crash in stack_top() for tasks without ABI or vDSO kunit: qemu_configs: Add MIPS configurations arch/mips/kernel/process.c | 16 +++++++++------- tools/testing/kunit/qemu_configs/mips.py | 18 ++++++++++++++++++ tools/testing/kunit/qemu_configs/mips64.py | 19 +++++++++++++++++++ tools/testing/kunit/qemu_configs/mips64el.py | 19 +++++++++++++++++++ tools/testing/kunit/qemu_configs/mipsel.py | 18 ++++++++++++++++++ 5 files changed, 83 insertions(+), 7 deletions(-) --- base-commit: 19272b37aa4f83ca52bdf9c16d5d81bdd1354494 change-id: 20241014-kunit-mips-e4fe1c265ed7 Best regards, -- Thomas Weißschuh <thomas.weissschuh(a)linutronix.de>

5 months, 2 weeks

3
4
0 0

[PATCH v2] selftests/futex: Convert 32bit timespec struct to 64bit version for 32bit compatibility mode

by Terry Tritton

Futex_waitv can not accept old_timespec32 struct, so userspace should convert it from 32bit to 64bit before syscall in 32bit compatible mode. This fix is based off [1] Link: https://lore.kernel.org/all/20231203235117.29677-1-wegao@suse.com/ [1] Signed-off-by: Wei Gao <wegao(a)suse.com> Signed-off-by: Terry Tritton <terry.tritton(a)linaro.org> --- .../testing/selftests/futex/include/futex2test.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tools/testing/selftests/futex/include/futex2test.h b/tools/testing/selftests/futex/include/futex2test.h index ea79662405bc..6780e51eb2d6 100644 --- a/tools/testing/selftests/futex/include/futex2test.h +++ b/tools/testing/selftests/futex/include/futex2test.h @@ -55,6 +55,13 @@ struct futex32_numa { futex_t numa; }; +#if !defined(__LP64__) +struct timespec64 { + int64_t tv_sec; + int64_t tv_nsec; +}; +#endif + /** * futex_waitv - Wait at multiple futexes, wake on any * @waiters: Array of waiters @@ -65,7 +72,15 @@ struct futex32_numa { static inline int futex_waitv(volatile struct futex_waitv *waiters, unsigned long nr_waiters, unsigned long flags, struct timespec *timo, clockid_t clockid) { +#if !defined(__LP64__) + struct timespec64 timo64 = {0}; + + timo64.tv_sec = timo->tv_sec; + timo64.tv_nsec = timo->tv_nsec; + return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, &timo64, clockid); +#else return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, timo, clockid); +#endif } /* -- 2.39.5

5 months, 2 weeks

3
5
0 0

[PATCH net-next 0/7] netpoll: Factor out functions from netpoll_send_udp() and add ipv6 selftest

by Breno Leitao

Refactors the netpoll UDP transmit path to improve code clarity, maintainability, and protocol-layer encapsulation. Function netpoll_send_udp() has more than 100 LoC, which is hard to understand and review. After this patchset, it has only 32 LoC, which is more manageable. The series systematically moves the construction of protocol headers (UDP, IPv4, IPv6, Ethernet) out of the core `netpoll_send_udp()` function into dedicated static helpers: - `push_udp()` for UDP header setup - `push_ipv4()` and `push_ipv6()` for IP header setup - `push_eth()` for Ethernet header setup This results in a clean, layered abstraction that mirrors the protocol stack, reduces code duplication, and improves readability. Also, to make sure this is not breaking anything, add IPv6 selftest to netconsole tests, which will exercise this code. This test would also pick problems similiar to the one fixed by f599020702698 ("net: netpoll: Initialize UDP checksum field before checksumming"), which was embarrassin we didn't have a selftest catch it. Anyway, there are **no functional changes** intended in this patchset. Signed-off-by: Breno Leitao <leitao(a)debian.org> --- Breno Leitao (7): netpoll: Improve code clarity with explicit struct size calculations netpoll: factor out UDP checksum calculation into helper netpoll: factor out IPv6 header setup into push_ipv6() helper netpoll: factor out IPv4 header setup into push_ipv4() helper netpoll: factor out UDP header setup into push_udp() helper netpoll: move Ethernet setup to push_eth() helper selftests: net: Add IPv6 support to netconsole basic tests net/core/netpoll.c | 196 +++++++++++++-------- .../selftests/drivers/net/lib/sh/lib_netcons.sh | 74 +++++++- .../testing/selftests/drivers/net/netcons_basic.sh | 52 +++--- 3 files changed, 216 insertions(+), 106 deletions(-) --- base-commit: 8efa26fcbf8a7f783fd1ce7dd2a409e9b7758df0 change-id: 20250620-netpoll_untagle_ip-e37c799a6925 Best regards, -- Breno Leitao <leitao(a)debian.org>

5 months, 2 weeks

2
9
0 0

[PATCH bpf-next v2 0/3] bpf: Fix and test aux usage after do_check_insn()

by Luis Gerhorst

Fix cur_aux()->nospec_result test after do_check_insn() referring to the to-be-analyzed (potentially unsafe) instruction, not the already-analyzed (safe) instruction. This might allow a unsafe insn to slip through on a speculative path. Create some tests from the reproducer [1]. Commit d6f1c85f2253 ("bpf: Fall back to nospec for Spectre v1") should not be in any stable kernel yet, therefore bpf-next should suffice. [1] https://lore.kernel.org/bpf/685b3c1b.050a0220.2303ee.0010.GAE@google.com/ Changes since v1: - Fix compiler error due to missed rename of prev_insn_idx in first patch - v1: https://lore.kernel.org/bpf/20250628125927.763088-1-luis.gerhorst@fau.de/ Changes since RFC: - Introduce prev_aux() as suggested by Alexei. For this, we must move the env->prev_insn_idx assignment to happen directly after do_check_insn(), for which I have created a separate commit. This patch could be simplified by using a local prev_aux variable as sugested by Eduard, but I figured one might find the new assignment-strategy easier to understand (before, prev_insn_idx and env->prev_insn_idx were out-of-sync for the latter part of the loop). Also, like this we do not have an additional prev_* variable that must be kept in-sync and the local variable's usage (old prev_insn_idx, new tmp) is much more local. If you think it would be better to not take the risk and keep the fix simple by just introducing the prev_aux variable, let me know. - Change WARN_ON_ONCE() to verifier_bug_if() as suggested by Alexei - Change assertion to check instruction is BPF_JMP[32] as suggested by Eduard - RFC: https://lore.kernel.org/bpf/8734bmoemx.fsf@fau.de/ Luis Gerhorst (3): bpf: Update env->prev_insn_idx after do_check_insn() bpf: Fix aux usage after do_check_insn() selftests/bpf: Add Spectre v4 tests kernel/bpf/verifier.c | 30 ++-- tools/testing/selftests/bpf/progs/bpf_misc.h | 4 + .../selftests/bpf/progs/verifier_unpriv.c | 149 ++++++++++++++++++ 3 files changed, 174 insertions(+), 9 deletions(-) base-commit: d69bafe6ee2b5eff6099fa26626ecc2963f0f363 -- 2.49.0

5 months, 2 weeks

2
5
0 0

[PATCH] tools/testing/selftests: add mremap() unfaulted/faulted test cases

by Lorenzo Stoakes

Assert that mremap() behaviour is as expected when moving around unfaulted VMAs immediately adjacent to faulted ones, as well as moving around faulted VMAs and placing them back immediately adjacent to the VMA from which they were moved. This also introduces a shared helper for the syscall version of mremap() so we don't encounter any issues with libc filtering parameters. Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com> --- tools/testing/selftests/mm/merge.c | 599 ++++++++++++++++++++++++++- tools/testing/selftests/mm/vm_util.c | 8 + tools/testing/selftests/mm/vm_util.h | 3 + 3 files changed, 608 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/mm/merge.c b/tools/testing/selftests/mm/merge.c index 150dd5baed2b..cc4253f47f10 100644 --- a/tools/testing/selftests/mm/merge.c +++ b/tools/testing/selftests/mm/merge.c @@ -13,6 +13,7 @@ #include <sys/wait.h> #include <linux/perf_event.h> #include "vm_util.h" +#include <linux/mman.h> FIXTURE(merge) { @@ -25,7 +26,7 @@ FIXTURE_SETUP(merge) { self->page_size = psize(); /* Carve out PROT_NONE region to map over. */ - self->carveout = mmap(NULL, 12 * self->page_size, PROT_NONE, + self->carveout = mmap(NULL, 30 * self->page_size, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); ASSERT_NE(self->carveout, MAP_FAILED); /* Setup PROCMAP_QUERY interface. */ @@ -34,7 +35,7 @@ FIXTURE_SETUP(merge) FIXTURE_TEARDOWN(merge) { - ASSERT_EQ(munmap(self->carveout, 12 * self->page_size), 0); + ASSERT_EQ(munmap(self->carveout, 30 * self->page_size), 0); ASSERT_EQ(close_procmap(&self->procmap), 0); /* * Clear unconditionally, as some tests set this. It is no issue if this @@ -576,4 +577,598 @@ TEST_F(merge, ksm_merge) ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 2 * page_size); } +TEST_F(merge, mremap_unfaulted_to_faulted) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr, *ptr2; + + /* + * Map two distinct areas: + * + * |-----------| |-----------| + * | unfaulted | | unfaulted | + * |-----------| |-----------| + * ptr ptr2 + */ + ptr = mmap(&carveout[page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + ptr2 = mmap(&carveout[7 * page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr2, MAP_FAILED); + + /* Offset ptr2 further away. */ + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr2 + page_size * 1000); + ASSERT_NE(ptr2, MAP_FAILED); + + /* + * Fault in ptr: + * \ + * |-----------| / |-----------| + * | faulted | \ | unfaulted | + * |-----------| / |-----------| + * ptr \ ptr2 + */ + ptr[0] = 'x'; + + /* + * Now move ptr2 adjacent to ptr: + * + * |-----------|-----------| + * | faulted | unfaulted | + * |-----------|-----------| + * ptr ptr2 + * + * It should merge: + * + * |----------------------| + * | faulted | + * |----------------------| + * ptr + */ + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[5 * page_size]); + ASSERT_NE(ptr2, MAP_FAILED); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size); +} + +TEST_F(merge, mremap_unfaulted_behind_faulted) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr, *ptr2; + + /* + * Map two distinct areas: + * + * |-----------| |-----------| + * | unfaulted | | unfaulted | + * |-----------| |-----------| + * ptr ptr2 + */ + ptr = mmap(&carveout[6 * page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + ptr2 = mmap(&carveout[14 * page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr2, MAP_FAILED); + + /* Offset ptr2 further away. */ + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr2 + page_size * 1000); + ASSERT_NE(ptr2, MAP_FAILED); + + /* + * Fault in ptr: + * \ + * |-----------| / |-----------| + * | faulted | \ | unfaulted | + * |-----------| / |-----------| + * ptr \ ptr2 + */ + ptr[0] = 'x'; + + /* + * Now move ptr2 adjacent, but behind, ptr: + * + * |-----------|-----------| + * | unfaulted | faulted | + * |-----------|-----------| + * ptr2 ptr + * + * It should merge: + * + * |----------------------| + * | faulted | + * |----------------------| + * ptr2 + */ + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &carveout[page_size]); + ASSERT_NE(ptr2, MAP_FAILED); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr2)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr2); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 10 * page_size); +} + +TEST_F(merge, mremap_unfaulted_between_faulted) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr, *ptr2, *ptr3; + + /* + * Map three distinct areas: + * + * |-----------| |-----------| |-----------| + * | unfaulted | | unfaulted | | unfaulted | + * |-----------| |-----------| |-----------| + * ptr ptr2 ptr3 + */ + ptr = mmap(&carveout[page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + ptr2 = mmap(&carveout[7 * page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr2, MAP_FAILED); + ptr3 = mmap(&carveout[14 * page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr3, MAP_FAILED); + + /* Offset ptr3 further away. */ + ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr3 + page_size * 2000); + ASSERT_NE(ptr3, MAP_FAILED); + + /* Offset ptr2 further away. */ + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr2 + page_size * 1000); + ASSERT_NE(ptr2, MAP_FAILED); + + /* + * Fault in ptr, ptr3: + * \ \ + * |-----------| / |-----------| / |-----------| + * | faulted | \ | unfaulted | \ | faulted | + * |-----------| / |-----------| / |-----------| + * ptr \ ptr2 \ ptr3 + */ + ptr[0] = 'x'; + ptr3[0] = 'x'; + + /* + * Move ptr3 back into place, leaving a place for ptr2: + * \ + * |-----------| |-----------| / |-----------| + * | faulted | | faulted | \ | unfaulted | + * |-----------| |-----------| / |-----------| + * ptr ptr3 \ ptr2 + */ + ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[10 * page_size]); + ASSERT_NE(ptr3, MAP_FAILED); + + /* + * Finally, move ptr2 into place: + * + * |-----------|-----------|-----------| + * | faulted | unfaulted | faulted | + * |-----------|-----------|-----------| + * ptr ptr2 ptr3 + * + * It should merge, but only ptr, ptr2: + * + * |-----------------------|-----------| + * | faulted | unfaulted | + * |-----------------------|-----------| + */ + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[5 * page_size]); + ASSERT_NE(ptr2, MAP_FAILED); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr3)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr3); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr3 + 5 * page_size); +} + +TEST_F(merge, mremap_unfaulted_between_faulted_unfaulted) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr, *ptr2, *ptr3; + + /* + * Map three distinct areas: + * + * |-----------| |-----------| |-----------| + * | unfaulted | | unfaulted | | unfaulted | + * |-----------| |-----------| |-----------| + * ptr ptr2 ptr3 + */ + ptr = mmap(&carveout[page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + ptr2 = mmap(&carveout[7 * page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr2, MAP_FAILED); + ptr3 = mmap(&carveout[14 * page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr3, MAP_FAILED); + + /* Offset ptr3 further away. */ + ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr3 + page_size * 2000); + ASSERT_NE(ptr3, MAP_FAILED); + + + /* Offset ptr2 further away. */ + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr2 + page_size * 1000); + ASSERT_NE(ptr2, MAP_FAILED); + + /* + * Fault in ptr: + * \ \ + * |-----------| / |-----------| / |-----------| + * | faulted | \ | unfaulted | \ | unfaulted | + * |-----------| / |-----------| / |-----------| + * ptr \ ptr2 \ ptr3 + */ + ptr[0] = 'x'; + + /* + * Move ptr3 back into place, leaving a place for ptr2: + * \ + * |-----------| |-----------| / |-----------| + * | faulted | | unfaulted | \ | unfaulted | + * |-----------| |-----------| / |-----------| + * ptr ptr3 \ ptr2 + */ + ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[10 * page_size]); + ASSERT_NE(ptr3, MAP_FAILED); + + /* + * Finally, move ptr2 into place: + * + * |-----------|-----------|-----------| + * | faulted | unfaulted | unfaulted | + * |-----------|-----------|-----------| + * ptr ptr2 ptr3 + * + * It should merge: + * + * |-----------------------------------| + * | faulted | + * |-----------------------------------| + */ + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[5 * page_size]); + ASSERT_NE(ptr2, MAP_FAILED); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 15 * page_size); +} + +TEST_F(merge, mremap_unfaulted_between_correctly_placed_faulted) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr, *ptr2; + + /* + * Map one larger area: + * + * |-----------------------------------| + * | unfaulted | + * |-----------------------------------| + */ + ptr = mmap(&carveout[page_size], 15 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* + * Fault in ptr: + * + * |-----------------------------------| + * | faulted | + * |-----------------------------------| + */ + ptr[0] = 'x'; + + /* + * Unmap middle: + * + * |-----------| |-----------| + * | faulted | | faulted | + * |-----------| |-----------| + * + * Now the faulted areas are compatible with each other (anon_vma the + * same, vma->vm_pgoff equal to virtual page offset). + */ + ASSERT_EQ(munmap(&ptr[5 * page_size], 5 * page_size), 0); + + /* + * Map a new area, ptr2: + * \ + * |-----------| |-----------| / |-----------| + * | faulted | | faulted | \ | unfaulted | + * |-----------| |-----------| / |-----------| + * ptr \ ptr2 + */ + ptr2 = mmap(&carveout[20 * page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr2, MAP_FAILED); + + /* + * Finally, move ptr2 into place: + * + * |-----------|-----------|-----------| + * | faulted | unfaulted | faulted | + * |-----------|-----------|-----------| + * ptr ptr2 ptr3 + * + * It should merge: + * + * |-----------------------------------| + * | faulted | + * |-----------------------------------| + */ + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[5 * page_size]); + ASSERT_NE(ptr2, MAP_FAILED); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 15 * page_size); +} + +TEST_F(merge, mremap_correct_placed_faulted) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr, *ptr2, *ptr3; + + /* + * Map one larger area: + * + * |-----------------------------------| + * | unfaulted | + * |-----------------------------------| + */ + ptr = mmap(&carveout[page_size], 15 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* + * Fault in ptr: + * + * |-----------------------------------| + * | faulted | + * |-----------------------------------| + */ + ptr[0] = 'x'; + + /* + * Offset the final and middle 5 pages further away: + * \ \ + * |-----------| / |-----------| / |-----------| + * | faulted | \ | faulted | \ | faulted | + * |-----------| / |-----------| / |-----------| + * ptr \ ptr2 \ ptr3 + */ + ptr3 = &ptr[10 * page_size]; + ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr3 + page_size * 2000); + ASSERT_NE(ptr3, MAP_FAILED); + ptr2 = &ptr[5 * page_size]; + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr2 + page_size * 1000); + ASSERT_NE(ptr2, MAP_FAILED); + + /* + * Move ptr2 into its correct place: + * \ + * |-----------|-----------| / |-----------| + * | faulted | faulted | \ | faulted | + * |-----------|-----------| / |-----------| + * ptr ptr2 \ ptr3 + * + * It should merge: + * \ + * |-----------------------| / |-----------| + * | faulted | \ | faulted | + * |-----------------------| / |-----------| + * ptr \ ptr3 + */ + + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[5 * page_size]); + ASSERT_NE(ptr2, MAP_FAILED); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size); + + /* + * Now move ptr out of place: + * \ \ + * |-----------| / |-----------| / |-----------| + * | faulted | \ | faulted | \ | faulted | + * |-----------| / |-----------| / |-----------| + * ptr2 \ ptr \ ptr3 + */ + ptr = sys_mremap(ptr, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr + page_size * 1000); + ASSERT_NE(ptr, MAP_FAILED); + + /* + * Now move ptr back into place: + * \ + * |-----------|-----------| / |-----------| + * | faulted | faulted | \ | faulted | + * |-----------|-----------| / |-----------| + * ptr ptr2 \ ptr3 + * + * It should merge: + * \ + * |-----------------------| / |-----------| + * | faulted | \ | faulted | + * |-----------------------| / |-----------| + * ptr \ ptr3 + */ + ptr = sys_mremap(ptr, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &carveout[page_size]); + ASSERT_NE(ptr, MAP_FAILED); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size); + + /* + * Now move ptr out of place again: + * \ \ + * |-----------| / |-----------| / |-----------| + * | faulted | \ | faulted | \ | faulted | + * |-----------| / |-----------| / |-----------| + * ptr2 \ ptr \ ptr3 + */ + ptr = sys_mremap(ptr, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr + page_size * 1000); + ASSERT_NE(ptr, MAP_FAILED); + + /* + * Now move ptr3 back into place: + * \ + * |-----------|-----------| / |-----------| + * | faulted | faulted | \ | faulted | + * |-----------|-----------| / |-----------| + * ptr2 ptr3 \ ptr + * + * It should merge: + * \ + * |-----------------------| / |-----------| + * | faulted | \ | faulted | + * |-----------------------| / |-----------| + * ptr2 \ ptr + */ + ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &ptr2[5 * page_size]); + ASSERT_NE(ptr3, MAP_FAILED); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr2)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr2); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 10 * page_size); + + /* + * Now move ptr back into place: + * + * |-----------|-----------------------| + * | faulted | faulted | + * |-----------|-----------------------| + * ptr ptr2 + * + * It should merge: + * + * |-----------------------------------| + * | faulted | + * |-----------------------------------| + * ptr + */ + ptr = sys_mremap(ptr, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &carveout[page_size]); + ASSERT_NE(ptr, MAP_FAILED); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 15 * page_size); + + /* + * Now move ptr2 out of the way: + * \ + * |-----------| |-----------| / |-----------| + * | faulted | | faulted | \ | faulted | + * |-----------| |-----------| / |-----------| + * ptr ptr3 \ ptr2 + */ + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr2 + page_size * 1000); + ASSERT_NE(ptr2, MAP_FAILED); + + /* + * Now move it back: + * + * |-----------|-----------|-----------| + * | faulted | faulted | faulted | + * |-----------|-----------|-----------| + * ptr ptr2 ptr3 + * + * It should merge: + * + * |-----------------------------------| + * | faulted | + * |-----------------------------------| + * ptr + */ + ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[5 * page_size]); + ASSERT_NE(ptr2, MAP_FAILED); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 15 * page_size); + + /* + * Move ptr3 out of place: + * \ + * |-----------------------| / |-----------| + * | faulted | \ | faulted | + * |-----------------------| / |-----------| + * ptr \ ptr3 + */ + ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr3 + page_size * 1000); + ASSERT_NE(ptr3, MAP_FAILED); + + /* + * Now move it back: + * + * |-----------|-----------|-----------| + * | faulted | faulted | faulted | + * |-----------|-----------|-----------| + * ptr ptr2 ptr3 + * + * It should merge: + * + * |-----------------------------------| + * | faulted | + * |-----------------------------------| + * ptr + */ + ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[10 * page_size]); + ASSERT_NE(ptr3, MAP_FAILED); + + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 15 * page_size); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c index 5492e3f784df..1d434772fa54 100644 --- a/tools/testing/selftests/mm/vm_util.c +++ b/tools/testing/selftests/mm/vm_util.c @@ -524,3 +524,11 @@ int read_sysfs(const char *file_path, unsigned long *val) return 0; } + +void *sys_mremap(void *old_address, unsigned long old_size, + unsigned long new_size, int flags, void *new_address) +{ + return (void *)syscall(__NR_mremap, (unsigned long)old_address, + old_size, new_size, flags, + (unsigned long)new_address); +} diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index b8136d12a0f8..797c24215b17 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -117,6 +117,9 @@ static inline void log_test_result(int result) ksft_test_result_report(result, "%s\n", test_name); } +void *sys_mremap(void *old_address, unsigned long old_size, + unsigned long new_size, int flags, void *new_address); + /* * On ppc64 this will only work with radix 2M hugepage size */ -- 2.50.0

5 months, 2 weeks

1
0
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-kselftest-mirror