September 2021 - Linux-kselftest-mirror

[PATCH v3] lib: Convert test_user_copy to KUnit test

by Vitor Massaru Iha

This adds the conversion of the runtime tests of test_user_copy fuctions, from `lib/test_user_copy.c`to KUnit tests. Signed-off-by: Vitor Massaru Iha <vitor(a)massaru.org> --- v2: * splitted patch in 3: - Allows to install and load modules in root filesystem; - Provides an userspace memory context when tests are compiled as module; - Convert test_user_copy to KUnit test; * removed entry for CONFIG_TEST_USER_COPY; * replaced pr_warn to KUNIT_EXPECT_FALSE_MSG in test macro to decrease the diff; v3: * rebased with last kunit branch * Please apply this commit from kunit-fixes: 3f37d14b8a3152441f36b6bc74000996679f0998 And these from patchwork: https://patchwork.kernel.org/patch/11676331/ https://patchwork.kernel.org/patch/11676335/ --- lib/Kconfig.debug | 28 ++++++++------ lib/Makefile | 2 +- lib/{test_user_copy.c => user_copy_kunit.c} | 42 +++++++++------------ 3 files changed, 35 insertions(+), 37 deletions(-) rename lib/{test_user_copy.c => user_copy_kunit.c} (91%) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 9ad9210d70a1..f699a3624ae7 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2078,18 +2078,6 @@ config TEST_VMALLOC If unsure, say N. -config TEST_USER_COPY - tristate "Test user/kernel boundary protections" - depends on m - help - This builds the "test_user_copy" module that runs sanity checks - on the copy_to/from_user infrastructure, making sure basic - user/kernel boundary testing is working. If it fails to load, - a regression has been detected in the user/kernel memory boundary - protections. - - If unsure, say N. - config TEST_BPF tristate "Test BPF filter functionality" depends on m && NET @@ -2154,6 +2142,22 @@ config SYSCTL_KUNIT_TEST If unsure, say N. +config USER_COPY_KUNIT + tristate "KUnit Test for user/kernel boundary protections" + depends on KUNIT + depends on m + help + This builds the "user_copy_kunit" module that runs sanity checks + on the copy_to/from_user infrastructure, making sure basic + user/kernel boundary testing is working. If it fails to load, + a regression has been detected in the user/kernel memory boundary + protections. + + For more information on KUnit and unit tests in general please refer + to the KUnit documentation in Documentation/dev-tools/kunit/. + + If unsure, say N. + config LIST_KUNIT_TEST tristate "KUnit Test for Kernel Linked-list structures" if !KUNIT_ALL_TESTS depends on KUNIT diff --git a/lib/Makefile b/lib/Makefile index b1c42c10073b..8c145f85accc 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -78,7 +78,6 @@ obj-$(CONFIG_TEST_VMALLOC) += test_vmalloc.o obj-$(CONFIG_TEST_OVERFLOW) += test_overflow.o obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o obj-$(CONFIG_TEST_SORT) += test_sort.o -obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o obj-$(CONFIG_TEST_PRINTF) += test_printf.o @@ -318,3 +317,4 @@ obj-$(CONFIG_OBJAGG) += objagg.o # KUnit tests obj-$(CONFIG_LIST_KUNIT_TEST) += list-test.o obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o +obj-$(CONFIG_USER_COPY_KUNIT) += user_copy_kunit.o diff --git a/lib/test_user_copy.c b/lib/user_copy_kunit.c similarity index 91% rename from lib/test_user_copy.c rename to lib/user_copy_kunit.c index 5ff04d8fe971..a10ddd15b4cd 100644 --- a/lib/test_user_copy.c +++ b/lib/user_copy_kunit.c @@ -16,6 +16,7 @@ #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/vmalloc.h> +#include <kunit/test.h> /* * Several 32-bit architectures support 64-bit {get,put}_user() calls. @@ -35,7 +36,7 @@ ({ \ int cond = (condition); \ if (cond) \ - pr_warn("[%d] " msg "\n", __LINE__, ##__VA_ARGS__); \ + KUNIT_EXPECT_FALSE_MSG(test, cond, msg, ##__VA_ARGS__); \ cond; \ }) @@ -44,7 +45,7 @@ static bool is_zeroed(void *from, size_t size) return memchr_inv(from, 0x0, size) == NULL; } -static int test_check_nonzero_user(char *kmem, char __user *umem, size_t size) +static int test_check_nonzero_user(struct kunit *test, char *kmem, char __user *umem, size_t size) { int ret = 0; size_t start, end, i, zero_start, zero_end; @@ -102,7 +103,7 @@ static int test_check_nonzero_user(char *kmem, char __user *umem, size_t size) return ret; } -static int test_copy_struct_from_user(char *kmem, char __user *umem, +static int test_copy_struct_from_user(struct kunit *test, char *kmem, char __user *umem, size_t size) { int ret = 0; @@ -177,7 +178,7 @@ static int test_copy_struct_from_user(char *kmem, char __user *umem, return ret; } -static int __init test_user_copy_init(void) +static void user_copy_test(struct kunit *test) { int ret = 0; char *kmem; @@ -192,16 +193,14 @@ static int __init test_user_copy_init(void) #endif kmem = kmalloc(PAGE_SIZE * 2, GFP_KERNEL); - if (!kmem) - return -ENOMEM; + KUNIT_EXPECT_FALSE_MSG(test, kmem == NULL, "kmalloc failed"); user_addr = vm_mmap(NULL, 0, PAGE_SIZE * 2, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANONYMOUS | MAP_PRIVATE, 0); if (user_addr >= (unsigned long)(TASK_SIZE)) { - pr_warn("Failed to allocate user memory\n"); kfree(kmem); - return -ENOMEM; + KUNIT_FAIL(test, "Failed to allocate user memory"); } usermem = (char __user *)user_addr; @@ -245,9 +244,9 @@ static int __init test_user_copy_init(void) #undef test_legit /* Test usage of check_nonzero_user(). */ - ret |= test_check_nonzero_user(kmem, usermem, 2 * PAGE_SIZE); + ret |= test_check_nonzero_user(test, kmem, usermem, 2 * PAGE_SIZE); /* Test usage of copy_struct_from_user(). */ - ret |= test_copy_struct_from_user(kmem, usermem, 2 * PAGE_SIZE); + ret |= test_copy_struct_from_user(test, kmem, usermem, 2 * PAGE_SIZE); /* * Invalid usage: none of these copies should succeed. @@ -309,23 +308,18 @@ static int __init test_user_copy_init(void) vm_munmap(user_addr, PAGE_SIZE * 2); kfree(kmem); - - if (ret == 0) { - pr_info("tests passed.\n"); - return 0; - } - - return -EINVAL; } -module_init(test_user_copy_init); - -static void __exit test_user_copy_exit(void) -{ - pr_info("unloaded.\n"); -} +static struct kunit_case user_copy_test_cases[] = { + KUNIT_CASE(user_copy_test), + {} +}; -module_exit(test_user_copy_exit); +static struct kunit_suite user_copy_test_suite = { + .name = "user_copy", + .test_cases = user_copy_test_cases, +}; +kunit_test_suites(&user_copy_test_suite); MODULE_AUTHOR("Kees Cook <keescook(a)chromium.org>"); MODULE_LICENSE("GPL"); base-commit: d43c7fb05765152d4d4a39a8ef957c4ea14d8847 -- 2.26.2

11 months, 1 week

4
11
0 0

[PATCH v4 00/10] Add support for synchronous signals on perf events

by Marco Elver

The perf subsystem today unifies various tracing and monitoring features, from both software and hardware. One benefit of the perf subsystem is automatically inheriting events to child tasks, which enables process-wide events monitoring with low overheads. By default perf events are non-intrusive, not affecting behaviour of the tasks being monitored. For certain use-cases, however, it makes sense to leverage the generality of the perf events subsystem and optionally allow the tasks being monitored to receive signals on events they are interested in. This patch series adds the option to synchronously signal user space on events. To better support process-wide synchronous self-monitoring, without events propagating to children that do not share the current process's shared environment, two pre-requisite patches are added to optionally restrict inheritance to CLONE_THREAD, and remove events on exec (without affecting the parent). Examples how to use these features can be found in the tests added at the end of the series. In addition to the tests added, the series has also been subjected to syzkaller fuzzing (focus on 'kernel/events/' coverage). Motivation and Example Uses --------------------------- 1. Our immediate motivation is low-overhead sampling-based race detection for user space [1]. By using perf_event_open() at process initialization, we can create hardware breakpoint/watchpoint events that are propagated automatically to all threads in a process. As far as we are aware, today no existing kernel facility (such as ptrace) allows us to set up process-wide watchpoints with minimal overheads (that are comparable to mprotect() of whole pages). 2. Other low-overhead error detectors that rely on detecting accesses to certain memory locations or code, process-wide and also only in a specific set of subtasks or threads. [1] https://llvm.org/devmtg/2020-09/slides/Morehouse-GWP-Tsan.pdf Other ideas for use-cases we found interesting, but should only illustrate the range of potential to further motivate the utility (we're sure there are more): 3. Code hot patching without full stop-the-world. Specifically, by setting a code breakpoint to entry to the patched routine, then send signals to threads and check that they are not in the routine, but without stopping them further. If any of the threads will enter the routine, it will receive SIGTRAP and pause. 4. Safepoints without mprotect(). Some Java implementations use "load from a known memory location" as a safepoint. When threads need to be stopped, the page containing the location is mprotect()ed and threads get a signal. This could be replaced with a watchpoint, which does not require a whole page nor DTLB shootdowns. 5. Threads receiving signals on performance events to throttle/unthrottle themselves. 6. Tracking data flow globally. Changelog --------- v4: * Fix for parent and child racing to exit in sync_child_event(). * Fix race between irq_work running and task's sighand being released by release_task(). * Generalize setting si_perf and si_addr independent of event type; introduces perf_event_attr::sig_data, which can be set by user space to be propagated to si_perf. * Warning in perf_sigtrap() if ctx->task and current mismatch; we expect this on architectures that do not properly implement arch_irq_work_raise(). * Require events that want sigtrap to be associated with a task. * Dropped "perf: Add breakpoint information to siginfo on SIGTRAP" in favor of more generic solution (perf_event_attr::sig_data). v3: * Add patch "perf: Rework perf_event_exit_event()" to beginning of series, courtesy of Peter Zijlstra. * Rework "perf: Add support for event removal on exec" based on the added "perf: Rework perf_event_exit_event()". * Fix kselftests to work with more recent libc, due to the way it forces using the kernel's own siginfo_t. * Add basic perf-tool built-in test. v2/RFC: https://lkml.kernel.org/r/20210310104139.679618-1-elver@google.com * Patch "Support only inheriting events if cloned with CLONE_THREAD" added to series. * Patch "Add support for event removal on exec" added to series. * Patch "Add kselftest for process-wide sigtrap handling" added to series. * Patch "Add kselftest for remove_on_exec" added to series. * Implicitly restrict inheriting events if sigtrap, but the child was cloned with CLONE_CLEAR_SIGHAND, because it is not generally safe if the child cleared all signal handlers to continue sending SIGTRAP. * Various minor fixes (see details in patches). v1/RFC: https://lkml.kernel.org/r/20210223143426.2412737-1-elver@google.com Pre-series: The discussion at [2] led to the changes in this series. The approach taken in "Add support for SIGTRAP on perf events" to trigger the signal was suggested by Peter Zijlstra in [3]. [2] https://lore.kernel.org/lkml/CACT4Y+YPrXGw+AtESxAgPyZ84TYkNZdP0xpocX2jwVAbZ… [3] https://lore.kernel.org/lkml/YBv3rAT566k+6zjg@hirez.programming.kicks-ass.n… Marco Elver (9): perf: Apply PERF_EVENT_IOC_MODIFY_ATTRIBUTES to children perf: Support only inheriting events if cloned with CLONE_THREAD perf: Add support for event removal on exec signal: Introduce TRAP_PERF si_code and si_perf to siginfo perf: Add support for SIGTRAP on perf events selftests/perf_events: Add kselftest for process-wide sigtrap handling selftests/perf_events: Add kselftest for remove_on_exec tools headers uapi: Sync tools/include/uapi/linux/perf_event.h perf test: Add basic stress test for sigtrap handling Peter Zijlstra (1): perf: Rework perf_event_exit_event() arch/m68k/kernel/signal.c | 3 + arch/x86/kernel/signal_compat.c | 5 +- fs/signalfd.c | 4 + include/linux/compat.h | 2 + include/linux/perf_event.h | 9 +- include/linux/signal.h | 1 + include/uapi/asm-generic/siginfo.h | 6 +- include/uapi/linux/perf_event.h | 12 +- include/uapi/linux/signalfd.h | 4 +- kernel/events/core.c | 302 +++++++++++++----- kernel/fork.c | 2 +- kernel/signal.c | 11 + tools/include/uapi/linux/perf_event.h | 12 +- tools/perf/tests/Build | 1 + tools/perf/tests/builtin-test.c | 5 + tools/perf/tests/sigtrap.c | 150 +++++++++ tools/perf/tests/tests.h | 1 + .../testing/selftests/perf_events/.gitignore | 3 + tools/testing/selftests/perf_events/Makefile | 6 + tools/testing/selftests/perf_events/config | 1 + .../selftests/perf_events/remove_on_exec.c | 260 +++++++++++++++ tools/testing/selftests/perf_events/settings | 1 + .../selftests/perf_events/sigtrap_threads.c | 210 ++++++++++++ 23 files changed, 924 insertions(+), 87 deletions(-) create mode 100644 tools/perf/tests/sigtrap.c create mode 100644 tools/testing/selftests/perf_events/.gitignore create mode 100644 tools/testing/selftests/perf_events/Makefile create mode 100644 tools/testing/selftests/perf_events/config create mode 100644 tools/testing/selftests/perf_events/remove_on_exec.c create mode 100644 tools/testing/selftests/perf_events/settings create mode 100644 tools/testing/selftests/perf_events/sigtrap_threads.c -- 2.31.0.208.g409f899ff0-goog

1 year, 1 month

5
29
0 0

[PATCH v6] lib: add basic KUnit test for lib/math

by Daniel Latypov

Add basic test coverage for files that don't require any config options: * part of math.h (what seem to be the most commonly used macros) * gcd.c * lcm.c * int_sqrt.c * reciprocal_div.c (Ignored int_pow.c since it's a simple textbook algorithm.) These tests aren't particularly interesting, but they * provide short and simple examples of parameterized tests * provide a place to add tests for any new files in this dir * are written so adding new test cases to cover edge cases should be easy * looking at code coverage, we hit all the branches in the .c files Signed-off-by: Daniel Latypov <dlatypov(a)google.com> Reviewed-by: David Gow <davidgow(a)google.com> --- Changes since v5: * add in test cases for roundup/rounddown * address misc comments from David Changes since v4: * add in test cases for some math.h macros (abs, round_up/round_down, div_round_down/closest) * use parameterized testing less to keep things terser Changes since v3: * fix `checkpatch.pl --strict` warnings * add test cases for gcd(0,0) and lcm(0,0) * minor: don't test both gcd(a,b) and gcd(b,a) when a == b Changes since v2: mv math_test.c => math_kunit.c Changes since v1: * Rebase and rewrite to use the new parameterized testing support. * misc: fix overflow in literal and inline int_sqrt format string. * related: commit 1f0e943df68a ("Documentation: kunit: provide guidance for testing many inputs") was merged explaining the patterns shown here. * there's an in-flight patch to update it for parameterized testing. --- lib/math/Kconfig | 12 ++ lib/math/Makefile | 2 + lib/math/math_kunit.c | 291 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 305 insertions(+) create mode 100644 lib/math/math_kunit.c diff --git a/lib/math/Kconfig b/lib/math/Kconfig index f19bc9734fa7..a974d4db0f9c 100644 --- a/lib/math/Kconfig +++ b/lib/math/Kconfig @@ -15,3 +15,15 @@ config PRIME_NUMBERS config RATIONAL bool + +config MATH_KUNIT_TEST + tristate "KUnit test for lib/math and math.h" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + help + This builds unit tests for lib/math and math.h. + + For more information on KUnit and unit tests in general, please refer + to the KUnit documentation in Documentation/dev-tools/kunit/. + + If unsure, say N. diff --git a/lib/math/Makefile b/lib/math/Makefile index be6909e943bd..30abb7a8d564 100644 --- a/lib/math/Makefile +++ b/lib/math/Makefile @@ -4,3 +4,5 @@ obj-y += div64.o gcd.o lcm.o int_pow.o int_sqrt.o reciprocal_div.o obj-$(CONFIG_CORDIC) += cordic.o obj-$(CONFIG_PRIME_NUMBERS) += prime_numbers.o obj-$(CONFIG_RATIONAL) += rational.o + +obj-$(CONFIG_MATH_KUNIT_TEST) += math_kunit.o diff --git a/lib/math/math_kunit.c b/lib/math/math_kunit.c new file mode 100644 index 000000000000..556c23b17c3c --- /dev/null +++ b/lib/math/math_kunit.c @@ -0,0 +1,291 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Simple KUnit suite for math helper funcs that are always enabled. + * + * Copyright (C) 2020, Google LLC. + * Author: Daniel Latypov <dlatypov(a)google.com> + */ + +#include <kunit/test.h> +#include <linux/gcd.h> +#include <linux/kernel.h> +#include <linux/lcm.h> +#include <linux/reciprocal_div.h> + +static void abs_test(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, abs((char)0), (char)0); + KUNIT_EXPECT_EQ(test, abs((char)42), (char)42); + KUNIT_EXPECT_EQ(test, abs((char)-42), (char)42); + + /* The expression in the macro is actually promoted to an int. */ + KUNIT_EXPECT_EQ(test, abs((short)0), 0); + KUNIT_EXPECT_EQ(test, abs((short)42), 42); + KUNIT_EXPECT_EQ(test, abs((short)-42), 42); + + KUNIT_EXPECT_EQ(test, abs(0), 0); + KUNIT_EXPECT_EQ(test, abs(42), 42); + KUNIT_EXPECT_EQ(test, abs(-42), 42); + + KUNIT_EXPECT_EQ(test, abs(0L), 0L); + KUNIT_EXPECT_EQ(test, abs(42L), 42L); + KUNIT_EXPECT_EQ(test, abs(-42L), 42L); + + KUNIT_EXPECT_EQ(test, abs(0LL), 0LL); + KUNIT_EXPECT_EQ(test, abs(42LL), 42LL); + KUNIT_EXPECT_EQ(test, abs(-42LL), 42LL); + + /* Unsigned types get casted to signed. */ + KUNIT_EXPECT_EQ(test, abs(0ULL), 0LL); + KUNIT_EXPECT_EQ(test, abs(42ULL), 42LL); +} + +static void int_sqrt_test(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, int_sqrt(0UL), 0UL); + KUNIT_EXPECT_EQ(test, int_sqrt(1UL), 1UL); + KUNIT_EXPECT_EQ(test, int_sqrt(4UL), 2UL); + KUNIT_EXPECT_EQ(test, int_sqrt(5UL), 2UL); + KUNIT_EXPECT_EQ(test, int_sqrt(8UL), 2UL); + KUNIT_EXPECT_EQ(test, int_sqrt(1UL << 30), 1UL << 15); +} + +static void round_up_test(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, round_up(0, 1), 0); + KUNIT_EXPECT_EQ(test, round_up(1, 2), 2); + KUNIT_EXPECT_EQ(test, round_up(3, 2), 4); + KUNIT_EXPECT_EQ(test, round_up((1 << 30) - 1, 2), 1 << 30); + KUNIT_EXPECT_EQ(test, round_up((1 << 30) - 1, 1 << 29), 1 << 30); +} + +static void round_down_test(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, round_down(0, 1), 0); + KUNIT_EXPECT_EQ(test, round_down(1, 2), 0); + KUNIT_EXPECT_EQ(test, round_down(3, 2), 2); + KUNIT_EXPECT_EQ(test, round_down((1 << 30) - 1, 2), (1 << 30) - 2); + KUNIT_EXPECT_EQ(test, round_down((1 << 30) - 1, 1 << 29), 1 << 29); +} + +/* These versions can round to numbers that aren't a power of two */ +static void roundup_test(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, roundup(0, 1), 0); + KUNIT_EXPECT_EQ(test, roundup(1, 2), 2); + KUNIT_EXPECT_EQ(test, roundup(3, 2), 4); + KUNIT_EXPECT_EQ(test, roundup((1 << 30) - 1, 2), 1 << 30); + KUNIT_EXPECT_EQ(test, roundup((1 << 30) - 1, 1 << 29), 1 << 30); + + KUNIT_EXPECT_EQ(test, roundup(3, 2), 4); + KUNIT_EXPECT_EQ(test, roundup(4, 3), 6); +} + +static void rounddown_test(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, rounddown(0, 1), 0); + KUNIT_EXPECT_EQ(test, rounddown(1, 2), 0); + KUNIT_EXPECT_EQ(test, rounddown(3, 2), 2); + KUNIT_EXPECT_EQ(test, rounddown((1 << 30) - 1, 2), (1 << 30) - 2); + KUNIT_EXPECT_EQ(test, rounddown((1 << 30) - 1, 1 << 29), 1 << 29); + + KUNIT_EXPECT_EQ(test, rounddown(3, 2), 2); + KUNIT_EXPECT_EQ(test, rounddown(4, 3), 3); +} + +static void div_round_up_test(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, DIV_ROUND_UP(0, 1), 0); + KUNIT_EXPECT_EQ(test, DIV_ROUND_UP(20, 10), 2); + KUNIT_EXPECT_EQ(test, DIV_ROUND_UP(21, 10), 3); + KUNIT_EXPECT_EQ(test, DIV_ROUND_UP(21, 20), 2); + KUNIT_EXPECT_EQ(test, DIV_ROUND_UP(21, 99), 1); +} + +static void div_round_closest_test(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, DIV_ROUND_CLOSEST(0, 1), 0); + KUNIT_EXPECT_EQ(test, DIV_ROUND_CLOSEST(20, 10), 2); + KUNIT_EXPECT_EQ(test, DIV_ROUND_CLOSEST(21, 10), 2); + KUNIT_EXPECT_EQ(test, DIV_ROUND_CLOSEST(25, 10), 3); +} + +/* Generic test case for unsigned long inputs. */ +struct test_case { + unsigned long a, b; + unsigned long result; +}; + +static struct test_case gcd_cases[] = { + { + .a = 0, .b = 0, + .result = 0, + }, + { + .a = 0, .b = 1, + .result = 1, + }, + { + .a = 2, .b = 2, + .result = 2, + }, + { + .a = 2, .b = 4, + .result = 2, + }, + { + .a = 3, .b = 5, + .result = 1, + }, + { + .a = 3 * 9, .b = 3 * 5, + .result = 3, + }, + { + .a = 3 * 5 * 7, .b = 3 * 5 * 11, + .result = 15, + }, + { + .a = 1 << 21, + .b = (1 << 21) - 1, + .result = 1, + }, +}; + +KUNIT_ARRAY_PARAM(gcd, gcd_cases, NULL); + +static void gcd_test(struct kunit *test) +{ + const char *message_fmt = "gcd(%lu, %lu)"; + const struct test_case *test_param = test->param_value; + + KUNIT_EXPECT_EQ_MSG(test, test_param->result, + gcd(test_param->a, test_param->b), + message_fmt, test_param->a, + test_param->b); + + if (test_param->a == test_param->b) + return; + + /* gcd(a,b) == gcd(b,a) */ + KUNIT_EXPECT_EQ_MSG(test, test_param->result, + gcd(test_param->b, test_param->a), + message_fmt, test_param->b, + test_param->a); +} + +static struct test_case lcm_cases[] = { + { + .a = 0, .b = 0, + .result = 0, + }, + { + .a = 0, .b = 1, + .result = 0, + }, + { + .a = 1, .b = 2, + .result = 2, + }, + { + .a = 2, .b = 2, + .result = 2, + }, + { + .a = 3 * 5, .b = 3 * 7, + .result = 3 * 5 * 7, + }, +}; + +KUNIT_ARRAY_PARAM(lcm, lcm_cases, NULL); + +static void lcm_test(struct kunit *test) +{ + const char *message_fmt = "lcm(%lu, %lu)"; + const struct test_case *test_param = test->param_value; + + KUNIT_EXPECT_EQ_MSG(test, test_param->result, + lcm(test_param->a, test_param->b), + message_fmt, test_param->a, + test_param->b); + + if (test_param->a == test_param->b) + return; + + /* lcm(a,b) == lcm(b,a) */ + KUNIT_EXPECT_EQ_MSG(test, test_param->result, + lcm(test_param->b, test_param->a), + message_fmt, test_param->b, + test_param->a); +} + +struct u32_test_case { + u32 a, b; + u32 result; +}; + +static struct u32_test_case reciprocal_div_cases[] = { + { + .a = 0, .b = 1, + .result = 0, + }, + { + .a = 42, .b = 20, + .result = 2, + }, + { + .a = 42, .b = 9999, + .result = 0, + }, + { + .a = (1 << 16), .b = (1 << 14), + .result = 1 << 2, + }, +}; + +KUNIT_ARRAY_PARAM(reciprocal_div, reciprocal_div_cases, NULL); + +static void reciprocal_div_test(struct kunit *test) +{ + const struct u32_test_case *test_param = test->param_value; + struct reciprocal_value rv = reciprocal_value(test_param->b); + + KUNIT_EXPECT_EQ_MSG(test, test_param->result, + reciprocal_divide(test_param->a, rv), + "reciprocal_divide(%u, %u)", + test_param->a, test_param->b); +} + +static void reciprocal_scale_test(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, reciprocal_scale(0u, 100), 0u); + KUNIT_EXPECT_EQ(test, reciprocal_scale(1u, 100), 0u); + KUNIT_EXPECT_EQ(test, reciprocal_scale(1u << 4, 1 << 28), 1u); + KUNIT_EXPECT_EQ(test, reciprocal_scale(1u << 16, 1 << 28), 1u << 12); + KUNIT_EXPECT_EQ(test, reciprocal_scale(~0u, 1 << 28), (1u << 28) - 1); +} + +static struct kunit_case math_test_cases[] = { + KUNIT_CASE(abs_test), + KUNIT_CASE(int_sqrt_test), + KUNIT_CASE(round_up_test), + KUNIT_CASE(round_down_test), + KUNIT_CASE(roundup_test), + KUNIT_CASE(rounddown_test), + KUNIT_CASE(div_round_up_test), + KUNIT_CASE(div_round_closest_test), + KUNIT_CASE_PARAM(gcd_test, gcd_gen_params), + KUNIT_CASE_PARAM(lcm_test, lcm_gen_params), + KUNIT_CASE_PARAM(reciprocal_div_test, reciprocal_div_gen_params), + KUNIT_CASE(reciprocal_scale_test), + {} +}; + +static struct kunit_suite math_test_suite = { + .name = "lib-math", + .test_cases = math_test_cases, +}; + +kunit_test_suites(&math_test_suite); + +MODULE_LICENSE("GPL v2"); base-commit: 7e25f40eab52c57ff6772d27d2aef3640a3237d7 -- 2.31.1.368.gbe11c130af-goog

1 year, 1 month

5
5
0 0

[PATCH v2] selftests/kselftest/runner/run_one(): Allow running non-executable files

by SeongJae Park

From: SeongJae Park <sjpark(a)amazon.de> When running a test program, 'run_one()' checks if the program has the execution permission and fails if it doesn't. However, it's easy to mistakenly missing the permission, as some common tools like 'diff' don't support the permission change well[1]. Compared to that, making mistakes in the test program's path would only rare, as those are explicitly listed in 'TEST_PROGS'. Therefore, it might make more sense to resolve the situation on our own and run the program. For the reason, this commit makes the test program runner function to still print the warning message but try parsing the interpreter of the program and explicitly run it with the interpreter, in the case. [1] https://lore.kernel.org/mm-commits/YRJisBs9AunccCD4@kroah.com/ Suggested-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> Signed-off-by: SeongJae Park <sjpark(a)amazon.de> --- Changes from v1 (https://lore.kernel.org/linux-kselftest/20210810140459.23990-1-sj38.park@gm…) - Parse and use the interpreter instead of changing the file tools/testing/selftests/kselftest/runner.sh | 28 +++++++++++++-------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh index cc9c846585f0..a9ba782d8ca0 100644 --- a/tools/testing/selftests/kselftest/runner.sh +++ b/tools/testing/selftests/kselftest/runner.sh @@ -33,9 +33,9 @@ tap_timeout() { # Make sure tests will time out if utility is available. if [ -x /usr/bin/timeout ] ; then - /usr/bin/timeout --foreground "$kselftest_timeout" "$1" + /usr/bin/timeout --foreground "$kselftest_timeout" $1 else - "$1" + $1 fi } @@ -65,17 +65,25 @@ run_one() TEST_HDR_MSG="selftests: $DIR: $BASENAME_TEST" echo "# $TEST_HDR_MSG" - if [ ! -x "$TEST" ]; then - echo -n "# Warning: file $TEST is " - if [ ! -e "$TEST" ]; then - echo "missing!" - else - echo "not executable, correct this." - fi + if [ ! -e "$TEST" ]; then + echo "# Warning: file $TEST is missing!" echo "not ok $test_num $TEST_HDR_MSG" else + cmd="./$BASENAME_TEST" + if [ ! -x "$TEST" ]; then + echo "# Warning: file $TEST is not executable" + + if [ $(head -n 1 "$TEST" | cut -c -2) = "#!" ] + then + interpreter=$(head -n 1 "$TEST" | cut -c 3-) + cmd="$interpreter ./$BASENAME_TEST" + else + echo "not ok $test_num $TEST_HDR_MSG" + return + fi + fi cd `dirname $TEST` > /dev/null - ((((( tap_timeout ./$BASENAME_TEST 2>&1; echo $? >&3) | + ((((( tap_timeout "$cmd" 2>&1; echo $? >&3) | tap_prefix >&4) 3>&1) | (read xs; exit $xs)) 4>>"$logfile" && echo "ok $test_num $TEST_HDR_MSG") || -- 2.17.1

2 years, 2 months

3
6
0 0

[PATCH net-next v2 0/3] net: introduce rps_default_mask

by Paolo Abeni

Real-time setups try hard to ensure proper isolation between time critical applications and e.g. network processing performed by the network stack in softirq and RPS is used to move the softirq activity away from the isolated core. If the network configuration is dynamic, with netns and devices routinely created at run-time, enforcing the correct RPS setting on each newly created device allowing to transient bad configuration became complex. These series try to address the above, introducing a new sysctl knob: rps_default_mask. The new sysctl entry allows configuring a systemwide RPS mask, to be enforced since receive queue creation time without any fourther per device configuration required. Additionally, a simple self-test is introduced to check the rps_default_mask behavior. v1 -> v2: - fix sparse warning in patch 2/3 Paolo Abeni (3): net/sysctl: factor-out netdev_rx_queue_set_rps_mask() helper net/core: introduce default_rps_mask netns attribute self-tests: introduce self-tests for RPS default mask Documentation/admin-guide/sysctl/net.rst | 6 ++ include/linux/netdevice.h | 1 + net/core/net-sysfs.c | 73 +++++++++++-------- net/core/sysctl_net_core.c | 58 +++++++++++++++ tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/config | 3 + .../testing/selftests/net/rps_default_mask.sh | 57 +++++++++++++++ 7 files changed, 169 insertions(+), 30 deletions(-) create mode 100755 tools/testing/selftests/net/rps_default_mask.sh -- 2.26.2

2 years, 5 months

4
12
0 0

[PATCH v2 net-next 0/3] ptp: Add adjust phase to support phase offset.

by vincent.cheng.xh＠renesas.com

From: Vincent Cheng <vincent.cheng.xh(a)renesas.com> This series adds adjust phase to the PTP Hardware Clock device interface. Some PTP hardware clocks have a write phase mode that has a built-in hardware filtering capability. The write phase mode utilizes a phase offset control word instead of a frequency offset control word. Add adjust phase function to take advantage of this capability. Changes since v1: - As suggested by Richard Cochran: 1. ops->adjphase is new so need to check for non-null function pointer. 2. Kernel coding style uses lower_case_underscores. 3. Use existing PTP clock API for delayed worker. Vincent Cheng (3): ptp: Add adjphase function to support phase offset control. ptp: Add adjust_phase to ptp_clock_caps capability. ptp: ptp_clockmatrix: Add adjphase() to support PHC write phase mode. drivers/ptp/ptp_chardev.c | 1 + drivers/ptp/ptp_clock.c | 3 ++ drivers/ptp/ptp_clockmatrix.c | 92 +++++++++++++++++++++++++++++++++++ drivers/ptp/ptp_clockmatrix.h | 8 ++- include/linux/ptp_clock_kernel.h | 6 ++- include/uapi/linux/ptp_clock.h | 4 +- tools/testing/selftests/ptp/testptp.c | 6 ++- 7 files changed, 114 insertions(+), 6 deletions(-) -- 2.7.4

2 years, 11 months

5
8
0 0

Re: [PATCH 2/2] kvm: rename KVM_MAX_VCPU_ID to, KVM_MAX_VCPU_IDS

by Christian Zigotzky

Hello Juergen, Hello All, Since the RC1 of kernel 5.13, -smp 2 and -smp 4 don't work with a virtual e5500 QEMU KVM-HV machine anymore. [1] I see in the serial console, that the uImage doesn't load. I use the following QEMU command for booting: qemu-system-ppc64 -M ppce500 -cpu e5500 -enable-kvm -m 1024 -kernel uImage -drive format=raw,file=MintPPC32-X5000.img,index=0,if=virtio -netdev user,id=mynet0 -device virtio-net,netdev=mynet0 -append "rw root=/dev/vda" -device virtio-vga -device virtio-mouse-pci -device virtio-keyboard-pci -device pci-ohci,id=newusb -device usb-audio,bus=newusb.0 -smp 4 The kernels boot without KVM-HV. Summary for KVM-HV: -smp 1 -> works -smp 2 -> doesn't work -smp 3 -> works -smp 4 -> doesn't work I used -smp 4 before the RC1 of kernel 5.13 because my FSL P5040 BookE machine [2] has 4 cores. Does this patch solve this issue? [3] Thanks, Christian [1] https://lists.ozlabs.org/pipermail/linuxppc-dev/2021-May/229103.html [2] http://wiki.amiga.org/index.php?title=X5000 [3] https://lists.ozlabs.org/pipermail/linuxppc-dev/2021-September/234152.html

3 years

1
1
0 0

[PATCH v3 0/6] KVM: my debug patch queue

by Maxim Levitsky

Hi! I would like to publish two debug features which were needed for other stuff I work on. One is the reworked lx-symbols script which now actually works on at least gdb 9.1 (gdb 9.2 was reported to fail to load the debug symbols from the kernel for some reason, not related to this patch) and upstream qemu. The other feature is the ability to trap all guest exceptions (on SVM for now) and see them in kvmtrace prior to potential merge to double/triple fault. This can be very useful and I already had to manually patch KVM a few times for this. I will, once time permits, implement this feature on Intel as well. V2: * Some more refactoring and workarounds for lx-symbols script * added KVM_GUESTDBG_BLOCKIRQ flag to enable 'block interrupts on single step' together with KVM_CAP_SET_GUEST_DEBUG2 capability to indicate which guest debug flags are supported. This is a replacement for unconditional block of interrupts on single step that was done in previous version of this patch set. Patches to qemu to use that feature will be sent soon. * Reworked the the 'intercept all exceptions for debug' feature according to the review feedback: - renamed the parameter that enables the feature and moved it to common kvm module. (only SVM part is currently implemented though) - disable the feature for SEV guests as was suggested during the review - made the vmexit table const again, as was suggested in the review as well. V3: * Modified a selftest to cover the KVM_GUESTDBG_BLOCKIRQ * Rebased on kvm/queue Best regards, Maxim Levitsky Maxim Levitsky (6): KVM: SVM: split svm_handle_invalid_exit KVM: x86: add force_intercept_exceptions_mask KVM: SVM: implement force_intercept_exceptions_mask scripts/gdb: rework lx-symbols gdb script KVM: x86: implement KVM_GUESTDBG_BLOCKIRQ KVM: selftests: test KVM_GUESTDBG_BLOCKIRQ Documentation/virt/kvm/api.rst | 1 + arch/x86/include/asm/kvm_host.h | 5 +- arch/x86/include/uapi/asm/kvm.h | 1 + arch/x86/kvm/svm/svm.c | 87 +++++++- arch/x86/kvm/svm/svm.h | 6 +- arch/x86/kvm/x86.c | 12 +- arch/x86/kvm/x86.h | 2 + kernel/module.c | 8 +- scripts/gdb/linux/symbols.py | 203 ++++++++++++------ .../testing/selftests/kvm/x86_64/debug_regs.c | 24 ++- 10 files changed, 266 insertions(+), 83 deletions(-) -- 2.26.3

3 years, 4 months

4
27
0 0

RFC - kernel test result specification (KTAP)

by Rae Moar

We are looking to further standardise the output format used by kernel test frameworks like kselftest and KUnit. Thus far we have used the TAP (Test Anything Protocol) specification, but it has been extended in many different ways, so we would like to agree on a common "Kernel TAP" (KTAP) format to resolve these differences. Thus, below is a draft of a specification of KTAP. Note that this specification is largely based on the current format of test results for KUnit tests. Additionally, this specification was heavily inspired by the KTAP specification draft by Tim Bird (https://lore.kernel.org/linux-kselftest/CY4PR13MB1175B804E31E502221BC8163FD…). However, there are some notable differences to his specification. One such difference is the format of nested tests is more fully specified in the following specification. However, they are specified in a way which may not be compatible with many kselftest nested tests. ===================== Specification of KTAP ===================== TAP, or the Test Anything Protocol is a format for specifying test results used by a number of projects. It's website and specification are found at: https://testanything.org/. The Linux Kernel uses TAP output for test results. However, KUnit (and other Kernel testing frameworks such as kselftest) have some special needs for test results which don't gel perfectly with the original TAP specification. Thus, a "Kernel TAP" (KTAP) format is specified to extend and alter TAP to support these use-cases. KTAP Output consists of 5 major elements (all line-based): - The version line - Plan lines - Test case result lines - Diagnostic lines - A bail out line An important component in this specification of KTAP is the specification of the format of nested tests. This can be found in the section on nested tests below. The version line ---------------- The first line of KTAP output must be the version line. As this specification documents the first version of KTAP, the recommended version line is "KTAP version 1". However, since all kernel testing frameworks use TAP version lines, "TAP version 14" and "TAP version 13" are all acceptable version lines. Version lines with other versions of TAP or KTAP will not cause the parsing of the test results to fail but it will produce an error. Plan lines ---------- Plan lines must follow the format of "1..N" where N is the number of subtests. The second line of KTAP output must be a plan line, which indicates the number of tests at the highest level, such that the tests do not have a parent. Also, in the instance of a test having subtests, the second line of the test after the subtest header must be a plan line which indicates the number of subtests within that test. Test case result lines ---------------------- Test case result lines must have the format: <result> <number> [-] [<description>] [<directive>] [<diagnostic data>] The result can be either "ok", which indicates the test case passed, or "not ok", which indicates that the test case failed. The number represents the number of the test case or suite being performed. The first test case or suite must have the number 1 and the number must increase by 1 for each additional test case or result at the same level and within the same testing suite. The "-" character is optional. The description is a description of the test, generally the name of the test, and can be any string of words (can't include #). The description is optional. The directive is used to indicate if a test was skipped. The format for the directive is: "# SKIP [<skip_description>]". The skip_description is optional and can be any string of words to describe why the test was skipped. The result of the test case result line can be either "ok" or "not ok" if the skip directive is used. Finally, note that TAP 14 specification includes TODO directives but these are not supported for KTAP. Examples of test case result lines: Test passed: ok 1 - test_case_name Test was skipped: not ok 1 - test_case_name # SKIP test_case_name should be skipped Test failed: not_ok 1 - test_case_name Diagnostic lines ---------------- Diagnostic lines are used for description of testing operations. Diagnostic lines are generally formatted as "# <diagnostic_description>", where the description can be any string. However, in practice, diagnostic lines are all lines that don't follow the format of any other KTAP line format. Diagnostic lines can be anywhere in the test output after the first two lines. There are a few special diagnostic lines. Diagnostic lines of the format "# Subtest: <test_name>" indicate the start of a test with subtests. Also, diagnostic lines of the format "# <test_name>: <description>" refer to a specific test and tend to occur before the test result line of that test but are optional. Bail out line ------------- A bail out line can occur anywhere in the KTAP output and will indicate that a test has crashed. The format of a bail out line is "Bail out! [<description>]", where the description can give information on why the bail out occurred and can be any string. Nested tests ------------ The new specification for KTAP will support an arbitrary number of nested subtests. Thus, tests can now have subtests and those subtests can have subtests. This can be useful to further categorize tests and organize test results. The new required format for a test with subtests consists of: a subtest header line, a plan line, all subtests, and a final test result line. The first line of the test must be the subtest header line with the format: "# Subtest: <test_name>". The second line of the test must be the plan line, which is formatted as "1..N", where N is the number of subtests. Following the plan line, all lines pertaining to the subtests will follow. Finally, the last line of the test is a final test result line with the format: "(ok|not ok) <number> [-] [<description>] [<directive>] [<diagnostic data>]", which follows the same format as the general test result lines described in this section. The result line should indicate the result of the subtests. Thus, if one of the subtests fail, the test should fail. The description in the final test result line should match the name of the test in the subtest header. An example format: KTAP version 1 1..1 # Subtest: test_suite 1..2 ok 1 - test_1 ok 2 - test_2 ok 1 - test_suite An example format with multiple levels of nested testing: KTAP version 1 1..1 # Subtest: test_suite 1..2 # Subtest: sub_test_suite 1..2 ok 1 - test_1 ok 2 test_2 ok 1 - sub_test_suite ok 2 - test ok 1 - test_suite In the instance that the plan line is missing, the end of the test will be denoted by the final result line containing a description that matches the name of the test given in the subtest header. Note that thus, if the plan line is missing and one of the subtests have a matching name to the test suite this will cause errors. Lastly, indentation is also recommended for improved readability. Major differences between TAP 14 and KTAP specification ------------------------------------------------------- Note that the major differences between TAP 14 and KTAP specification: - yaml and json are not allowed in diagnostic messages - TODO directive not allowed - KTAP allows for an arbitrary number of tests to be nested with specified nested test format Example of KTAP --------------- KTAP version 1 1..1 # Subtest: test_suite 1..1 # Subtest: sub_test_suite 1..2 ok 1 - test_1 ok 2 test_2 ok 1 - sub_test_suite ok 1 - test_suite ========================================= Note on incompatibilities with kselftests ========================================= To my knowledge, the above specification seems to generally accept the TAP format of many non-nested test results of kselftests. An example of a common kselftests TAP format for non-nested test results that are accepted by the above specification: TAP version 13 1..2 # selftests: vDSO: vdso_test_gettimeofday # The time is 1628024856.096879 ok 1 selftests: vDSO: vdso_test_gettimeofday # selftests: vDSO: vdso_test_getcpu # Could not find __vdso_getcpu ok 2 selftests: vDSO: vdso_test_getcpu # SKIP However, one major difference noted with kselftests is the use of more directives than the "# SKIP" directive. kselftest also supports XPASS and XFAIL directives. Some additional examples found in kselftests: not ok 5 selftests: netfilter: nft_concat_range.sh # TIMEOUT 45 seconds not ok 45 selftests: kvm: kvm_binary_stats_test # exit=127 Should the specification be expanded to include these directives? However, the general format for kselftests with nested test results seems to differ from the above specification. It seems that a general format for nested tests is as follows: TAP version 13 1..2 # selftests: membarrier: membarrier_test_single_thread # TAP version 13 # 1..2 # ok 1 sys_membarrier available # ok 2 sys membarrier invalid command test: command = -1, flags = 0, errno = 22. Failed as expected ok 1 selftests: membarrier: membarrier_test_single_thread # selftests: membarrier: membarrier_test_multi_thread # TAP version 13 # 1..2 # ok 1 sys_membarrier available # ok 2 sys membarrier invalid command test: command = -1, flags = 0, errno = 22. Failed as expected ok 2 selftests: membarrier: membarrier_test_multi_thread The major differences here, that do not match the above specification, are use of "# " as an indentation and using a TAP version line to denote a new test with subtests rather than the subtest header line described above. If these are widely utilized formats in kselftests, should we include both versions in the specification or should we attempt to agree on a single format for nested tests? I personally believe we should try to agree on a single format for nested tests. This would allow for a cleaner specification of KTAP and would reduce possible confusion. ==== So what do people think about the above specification? How should we handle the differences with kselftests? If this specification is accepted, where should the specification be documented?

3 years, 5 months

7
13
0 0

[RFC PATCH 00/13] x86 User Interrupts support

by Sohil Mehta

User Interrupts Introduction ============================ User Interrupts (Uintr) is a hardware technology that enables delivering interrupts directly to user space. Today, virtually all communication across privilege boundaries happens by going through the kernel. These include signals, pipes, remote procedure calls and hardware interrupt based notifications. User interrupts provide the foundation for more efficient (low latency and low CPU utilization) versions of these common operations by avoiding transitions through the kernel. In the User Interrupts hardware architecture, a receiver is always expected to be a user space task. However, a user interrupt can be sent by another user space task, kernel or an external source (like a device). In addition to the general infrastructure to receive user interrupts, this series introduces a single source: interrupts from another user task. These are referred to as User IPIs. The first implementation of User IPIs will be in the Intel processor code-named Sapphire Rapids. Refer Chapter 11 of the Intel Architecture instruction set extensions for details of the hardware architecture [1]. Series-reviewed-by: Tony Luck <tony.luck(a)intel.com> Main goals of this RFC ====================== - Introduce this upcoming technology to the community. This cover letter includes a hardware architecture summary along with the software architecture and kernel design choices. This post is a bit long as a result. Hopefully, it helps answer more questions than it creates :) I am also planning to talk about User Interrupts next week at the LPC Kernel summit. - Discuss potential use cases. We are starting to look at actual usages and libraries (like libevent[2] and liburing[3]) that can take advantage of this technology. Unfortunately, we don't have much to share on this right now. We need some help from the community to identify usages that can benefit from this. We would like to make sure the proposed APIs work for the eventual consumers. - Get early feedback on the software architecture. We are hoping to get some feedback on the direction of overall software architecture - starting with User IPI, extending it for kernel-to-user interrupt notifications and external interrupts in the future. - Discuss some of the main architecture opens. There is lot of work that still needs to happen to enable this technology. We are looking for some input on future patches that would be of interest. Here are some of the big opens that we are looking to resolve. * Should Uintr interrupt all blocking system calls like sleep(), read(), poll(), etc? If so, should we implement an SA_RESTART type of mechanism similar to signals? - Refer Blocking for interrupts section below. * Should the User Interrupt Target table (UITT) be shared between threads of a multi-threaded application or maybe even across processes? - Refer Sharing the UITT section below. Why care about this? - Micro benchmark performance ================================================== There is a ~9x or higher performance improvement using User IPI over other IPC mechanisms for event signaling. Below is the average normalized latency for a 1M ping-pong IPC notifications with message size=1. +------------+-------------------------+ | IPC type | Relative Latency | | |(normalized to User IPI) | +------------+-------------------------+ | User IPI | 1.0 | | Signal | 14.8 | | Eventfd | 9.7 | | Pipe | 16.3 | | Domain | 17.3 | +------------+-------------------------+ Results have been estimated based on tests on internal hardware with Linux v5.14 + User IPI patches. Original benchmark: https://github.com/goldsborough/ipc-bench Updated benchmark: https://github.com/intel/uintr-ipc-bench/tree/linux-rfc-v1 *Performance varies by use, configuration and other factors. How it works underneath? - Hardware Summary =========================================== User Interrupts is a posted interrupt delivery mechanism. The interrupts are first posted to a memory location and then delivered to the receiver when they are running with CPL=3. Kernel managed architectural data structures -------------------------------------------- UPID: User Posted Interrupt Descriptor - Holds receiver interrupt vector information and notification state (like an ongoing notification, suppressed notifications). UITT: User Interrupt Target Table - Stores UPID pointer and vector information for interrupt routing on the sender side. Referred by the senduipi instruction. The interrupt state of each task is referenced via MSRs which are saved and restored by the kernel during context switch. Instructions ------------ senduipi <index> - send a user IPI to a target task based on the UITT index. clui - Mask user interrupts by clearing UIF (User Interrupt Flag). stui - Unmask user interrupts by setting UIF. testui - Test current value of UIF. uiret - return from a user interrupt handler. User IPI -------- When a User IPI sender executes 'senduipi <index>', the hardware refers the UITT table entry pointed by the index and posts the interrupt vector (63-0) into the receiver's UPID. If the receiver is running (CPL=3), the sender cpu would send a physical IPI to the receiver's cpu. On the receiver side this IPI is detected as a User Interrupt. The User Interrupt handler for the receiver is invoked and the vector number (63-0) is pushed onto the stack. Upon execution of 'uiret' in the interrupt handler, the control is transferred back to instruction that was interrupted. Refer Chapter 11 of the Intel Architecture instruction set extensions [1] for more details. Application interface - Software Architecture ============================================= User Interrupts (Uintr) is an opt-in feature (unlike signals). Applications wanting to use Uintr are expected to register themselves with the kernel using the Uintr related system calls. A Uintr receiver is always a userspace task. A Uintr sender can be another userspace task, kernel or a device. 1) A receiver can register/unregister an interrupt handler using the Uintr receiver related syscalls. uintr_register_handler(handler, flags) uintr_unregister_handler(flags) 2) A syscall also allows a receiver to register a vector and create a user interrupt file descriptor - uintr_fd. uintr_fd = uintr_create_fd(vector, flags) Uintr can be useful in some of the usages where eventfd or signals are used for frequent userspace event notifications. The semantics of uintr_fd are somewhat similar to an eventfd() or the write end of a pipe. 3) Any sender with access to uintr_fd can use it to deliver events (in this case - interrupts) to a receiver. A sender task can manage its connection with the receiver using the sender related syscalls based on uintr_fd. uipi_index = uintr_register_sender(uintr_fd, flags) Using an FD abstraction provides a secure mechanism to connect with a receiver. The FD sharing and isolation mechanisms put in place by the kernel would extend to Uintr as well. 4a) After the initial setup, a sender task can use the SENDUIPI instruction along with the uipi_index to generate user IPIs without any kernel intervention. SENDUIPI <uipi_index> If the receiver is running (CPL=3), then the user interrupt is delivered directly without a kernel transition. If the receiver isn't running the interrupt is delivered when the receiver gets context switched back. If the receiver is blocked in the kernel, the user interrupt is delivered to the kernel which then unblocks the intended receiver to deliver the interrupt. 4b) If the sender is the kernel or a device, the uintr_fd can be passed onto the related kernel entity to allow them to setup a connection and then generate a user interrupt for event delivery. <The exact details of this API are still being worked upon.> For details of the user interface and associated system calls refer the Uintr man-pages draft: https://github.com/intel/uintr-linux-kernel/tree/rfc-v1/tools/uintr/manpages. We have also included the same content as patch 1 of this series to make it easier to review. Refer the Uintr compiler programming guide [4] for details on Uintr integration with GCC and Binutils. Kernel design choices ===================== Here are some of the reasons and trade-offs for the current design of the APIs. System call interface --------------------- Why a system call interface?: The 2 options we considered are using a char device at /dev or use system calls (current approach). A syscall approach avoids exposing a core cpu feature through a driver model. Also, we want to have a user interrupt FD per vector and share a single common interrupt handler among all vectors. This seems easier for the kernel and userspace to accomplish using a syscall based approach. Data sharing using user interrupts: Uintr doesn't include a mechanism to share/transmit data. The expectation is applications use existing data sharing mechanisms to share data and use Uintr only for signaling. An FD for each vector: A uintr_fd is assigned to each vector to allow fine grained priority and event management by the receiver. The alternative we considered was to allocate an FD to the interrupt handler and having that shared with the sender. However, that approach relies on the sender selecting the vector and moves the vector priority management to the sender. Also, if multiple senders want to send unique user interrupts they would need to coordinate the vector selection amongst them. Extending the APIs: Currently, the system calls are only extendable using the flags argument. We can add a variable size struct to some of the syscalls if needed. Extending existing mechanisms ----------------------------- Uintr can be beneficial in some of the usages where eventfd() or signals are used. Since Uintr is hardware-dependent, thread-specific and bypasses the kernel in the fast path, it makes extending existing mechanisms harder. Main issues with extending signals: Signal handlers are defined significantly differently than a User interrupt handler. An application needs to save/restore registers in a user interrupt handler and call uiret to return from it. Also, signals can be process directed (or thread directed) but user interrupts are always thread directed. Comparison of signals with User Interrupts: +=====================+===========================+===========================+ | | Signals | User Interrupts | +=====================+===========================+===========================+ | Stacks | Has alt stacks | Uses application stack | | | | (alternate stack option | | | | not yet enabled) | +---------------------+---------------------------+---------------------------+ | Registers state | Kernel manages incl. | App responsible (Use GCC | | | FPU/XSTATE area | 'interrupt' attribute for | | | | general purpose registers)| +---------------------+---------------------------+---------------------------+ | Blocking/Masking | sigprocmask(2)/sa_mask | CLUI instruction (No per | | | | vector masking) | +---------------------+---------------------------+---------------------------+ | Direction | Uni-directional | Uni-directional | +---------------------+---------------------------+---------------------------+ | Post event | kill(), signal(), | SENDUIPI <index> - index | | | sigqueue(), etc. | derived from uintr_fd | +---------------------+---------------------------+---------------------------+ | Target | Process-directed or | Thread-directed | | | thread-directed | | +---------------------+---------------------------+---------------------------+ | Fork/inheritance | Empty signal set | Nothing is inherited | +---------------------+---------------------------+---------------------------+ | Execv | Pending signals preserved | Nothing is inherited | +---------------------+---------------------------+---------------------------+ | Order of delivery | Undetermined | High to low vector numbers| | for multiple signals| | | +---------------------+---------------------------+---------------------------+ | Handler re-entry | All signals except the | No interrupts can cause | | | one being handled | handler re-entry. | +---------------------+---------------------------+---------------------------+ | Delivery feedback | 0 or -1 based on whether | No feedback on whether the| | | the signal was sent | interrupt was sent or | | | | received. | +---------------------+---------------------------+---------------------------+ Main issues with extending eventfd(): eventfd() has a counter value that is core to the API. User interrupts can't have an associated counter since the signaling happens at the user level and the hardware doesn't have a memory counter mechanism. Also, eventfd can be used for bi-directional signaling where as uintr_fd is uni-directional. Comparison of eventfd with uintr_fd: +====================+======================+==============================+ | | Eventfd | uintr_fd (User Interrupt FD) | +====================+======================+==============================+ | Object | Counter - uint64 | Receiver vector information | +--------------------+----------------------+------------------------------+ | Post event | write() to eventfd | SENDUIPI <index> - index | | | | derived from uintr_fd | +--------------------+----------------------+------------------------------+ | Receive event | read() on eventfd | Implicit - Handler is | | | | invoked with associated | | | | vector. | +--------------------+----------------------+------------------------------+ | Direction | Bi-directional | Uni-directional | +--------------------+----------------------+------------------------------+ | Data transmitted | Counter - uint64 | None | +--------------------+----------------------+------------------------------+ | Waiting for events | Poll() family of | No per vector wait. | | | syscalls | uintr_wait() allows waiting | | | | for all user interrupts | +--------------------+----------------------+------------------------------+ Security Model ============== User Interrupts is designed as an opt-in feature (unlike signals). The security model for user interrupts is intended to be similar to eventfd(). The general idea is that any sender with access to uintr_fd would be able to generate the associated interrupt vector for the receiver task that created the fd. Untrusted processes ------------------- The current implementation expects only trusted and cooperating processes to communicate using user interrupts. Coordination is expected between processes for a connection teardown. In situations where coordination doesn't happen (say, due to abrupt process exit), the kernel would end up keeping shared resources (like UPID) allocated to avoid faults. Currently, a sender can easily cause a denial of service for the receiver by generating a storm of user interrupts. A user interrupt handler is invoked with interrupts disabled, but upon execution of uiret, interrupts get enabled again by the hardware. This can lead to the handler being invoked again before normal execution can resume. There isn't a hardware mechanism to mask specific interrupt vectors. To enable untrusted processes to communicate, we need to add a per-vector masking option through another syscall (or maybe IOCTL). However, this can add some complexity to the kernel code. A vector can only be masked by modifying the UITT entries at the source. We need to be careful about races while removing and restoring the UPID from the UITT. Resource limits --------------- The maximum number of receiver-sender connections would be limited by the maximum number of open file descriptors and the size of the UITT. The UITT size is chosen as 4kB fixed size arbitrarily right now. We plan to make it dynamic and configurable in size. RLIMIT_MEMLOCK or ENOMEM should be triggered when the size limits have been hit. Main Opens ========== Blocking for interrupts ----------------------- User interrupts are delivered to applications immediately if they are running in userspace. If a receiver task has blocked in the kernel using the placeholder uintr_wait() syscall, the task would be woken up to deliver the user interrupt. However, if the task is blocked due to any other blocking calls like read(), sleep(), etc; the interrupt will only get delivered when the application gets scheduled again. We need to consider if applications need to receive User Interrupts as soon as they are posted (similar to signals) when they are blocked due to some other reason. Adding this capability would likely make the kernel implementation more complex. Interrupting system calls using User Interrupts would also mean we need to consider an SA_RESTART type of mechanism. We also need to evaluate if some of the signal handler related semantics in the kernel can be reused for User Interrupts. Sharing the User Interrupt Target Table (UITT) ---------------------------------------------- The current implementation assigns a unique UITT to each task. This assumes that User interrupts are used for point-to-point communication between 2 tasks. Also, this keeps the kernel implementation relatively simple. However, there are of benefits to sharing the UITT between threads of a multi-threaded application. One, they would see a consistent view of the UITT. i.e. SENDUIPI <index> would mean the same on all threads of the application. Also, each thread doesn't have to register itself using the common uintr_fd. This would simplify the userspace setup and make efficient use of kernel memory. The potential downside is that the kernel implementation to allocate, modify, expand and free the UITT would be more complex. A similar argument can be made for a set of processes that do a lot of IPC amongst them. They would prefer to have a shared UITT that lets them target any process from any process. With the current file descriptor based approach, the connection setup can be time consuming and somewhat cumbersome. We need to evaluate if this can be made simpler as well. Kernel page table isolation (KPTI) ---------------------------------- SENDUIPI is a special ring-3 instruction that makes a supervisor mode memory access to the UPID and UITT memory. The current patches need KPTI to be disabled for User IPIs to work. To make User IPI work with KPTI, we need to allocate these structures from a special memory region that has supervisor access but it is mapped into userspace. The plan is to implement a mechanism similar to LDT. Processors that support user interrupts are not affected by Meltdown so the auto mode of KPTI will default to off. Users who want to force enable KPTI will need to wait for a later version of this patch series to use user interrupts. Please let us know if you want the development of these patches to be prioritized (or deprioritized). FAQs ==== Q: What happens if a process is "surprised" by a user interrupt? A: For tasks that haven't registered with the kernel and requested for user interrupts aren't expected or able to receive to user interrupts. Q: Do user interrupts affect kernel scheduling? A: No. If a task is blocked waiting for user interrupts, when the kernel receives a notification on behalf of that task we only put it back on the runqueue. Delivery of a user interrupt in no way changes the scheduling priorities of a task. Q: Does the sender get to know if the interrupt was delivered? A: No. User interrupts only provides a posted interrupt delivery mechanism. If applications need to rely on whether the interrupt was delivered they should consider a userspace mechanism for feedback (like a shared memory counter or a user interrupt back to the sender). Q: Why is there no feedback on interrupt delivery? A: Being a posted interrupt delivery mechanism, the interrupt delivery happens in 2 steps: 1) The interrupt information is stored in a memory location (UPID). 2) The physical interrupt is delivered to the interrupt receiver. The 2nd step could happen immediately, after an extended period, or it might never happen based on the state of the receiver after step 1. (The receiver could have disabled interrupts, have been context switched out or it might have crashed during that time.) This makes it very hard for the hardware to reliably provide feedback upon execution of SENDUIPI. Q: Can user interrupts be nested? A: Yes. Using STUI instruction in the interrupt handler would allow new user interrupts to be delivered. However, there no TPR(thread priority register) like mechanism to allow only higher priority interrupts. Any user interrupt can be taken when nesting is enabled. Q: Can a task receive all pending user interrupts in one go? A: No. The hardware allows only one vector to be processed at a time. If a task is interested in knowing all the interrupts that are pending then we could add a syscall that provides the pending interrupts information. Q: Do the processes need to be pinned to a cpu? A: No. User interrupts will be routed correctly to whichever cpu the receiver is running on. The kernel updates the cpu information in the UPID during context switch. Q: Why are UPID and UITT allocated by the kernel? A: If allocated by user space, applications could misuse the UPID and UITT to write to unauthorized memory and generate interrupts on any cpu. The UPID and UITT are allocated by the kernel and accessed by the hardware with supervisor privilege. Patch structure for this series =============================== - Man-pages and Kernel documentation (patch 1,2) - Hardware enumeration (patch 3, 4) - User IPI kernel vector reservation (patch 5) - Syscall interface for interrupt receiver, sender and vector management(uintr_fd) (patch 6-12) - Basic selftests (patch 13) Along with the patches in this RFC, there are additional tests and samples that are available at: https://github.com/intel/uintr-linux-kernel/tree/rfc-v1 Links ===== [1]: https://software.intel.com/content/www/us/en/develop/download/intel-archite… [2]: https://libevent.org/ [3]: https://github.com/axboe/liburing [4]: https://github.com/intel/uintr-compiler-guide/blob/uintr-gcc-11.1/UINTR-com… Sohil Mehta (13): x86/uintr/man-page: Include man pages draft for reference Documentation/x86: Add documentation for User Interrupts x86/cpu: Enumerate User Interrupts support x86/fpu/xstate: Enumerate User Interrupts supervisor state x86/irq: Reserve a user IPI notification vector x86/uintr: Introduce uintr receiver syscalls x86/process/64: Add uintr task context switch support x86/process/64: Clean up uintr task fork and exit paths x86/uintr: Introduce vector registration and uintr_fd syscall x86/uintr: Introduce user IPI sender syscalls x86/uintr: Introduce uintr_wait() syscall x86/uintr: Wire up the user interrupt syscalls selftests/x86: Add basic tests for User IPI .../admin-guide/kernel-parameters.txt | 2 + Documentation/x86/index.rst | 1 + Documentation/x86/user-interrupts.rst | 107 +++ arch/x86/Kconfig | 12 + arch/x86/entry/syscalls/syscall_32.tbl | 6 + arch/x86/entry/syscalls/syscall_64.tbl | 6 + arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/disabled-features.h | 8 +- arch/x86/include/asm/entry-common.h | 4 + arch/x86/include/asm/fpu/types.h | 20 +- arch/x86/include/asm/fpu/xstate.h | 3 +- arch/x86/include/asm/hardirq.h | 4 + arch/x86/include/asm/idtentry.h | 5 + arch/x86/include/asm/irq_vectors.h | 6 +- arch/x86/include/asm/msr-index.h | 8 + arch/x86/include/asm/processor.h | 8 + arch/x86/include/asm/uintr.h | 76 ++ arch/x86/include/uapi/asm/processor-flags.h | 2 + arch/x86/kernel/Makefile | 1 + arch/x86/kernel/cpu/common.c | 61 ++ arch/x86/kernel/cpu/cpuid-deps.c | 1 + arch/x86/kernel/fpu/core.c | 17 + arch/x86/kernel/fpu/xstate.c | 20 +- arch/x86/kernel/idt.c | 4 + arch/x86/kernel/irq.c | 51 + arch/x86/kernel/process.c | 10 + arch/x86/kernel/process_64.c | 4 + arch/x86/kernel/uintr_core.c | 880 ++++++++++++++++++ arch/x86/kernel/uintr_fd.c | 300 ++++++ include/linux/syscalls.h | 8 + include/uapi/asm-generic/unistd.h | 15 +- kernel/sys_ni.c | 8 + scripts/checksyscalls.sh | 6 + tools/testing/selftests/x86/Makefile | 10 + tools/testing/selftests/x86/uintr.c | 147 +++ tools/uintr/manpages/0_overview.txt | 265 ++++++ tools/uintr/manpages/1_register_receiver.txt | 122 +++ .../uintr/manpages/2_unregister_receiver.txt | 62 ++ tools/uintr/manpages/3_create_fd.txt | 104 +++ tools/uintr/manpages/4_register_sender.txt | 121 +++ tools/uintr/manpages/5_unregister_sender.txt | 79 ++ tools/uintr/manpages/6_wait.txt | 59 ++ 42 files changed, 2626 insertions(+), 8 deletions(-) create mode 100644 Documentation/x86/user-interrupts.rst create mode 100644 arch/x86/include/asm/uintr.h create mode 100644 arch/x86/kernel/uintr_core.c create mode 100644 arch/x86/kernel/uintr_fd.c create mode 100644 tools/testing/selftests/x86/uintr.c create mode 100644 tools/uintr/manpages/0_overview.txt create mode 100644 tools/uintr/manpages/1_register_receiver.txt create mode 100644 tools/uintr/manpages/2_unregister_receiver.txt create mode 100644 tools/uintr/manpages/3_create_fd.txt create mode 100644 tools/uintr/manpages/4_register_sender.txt create mode 100644 tools/uintr/manpages/5_unregister_sender.txt create mode 100644 tools/uintr/manpages/6_wait.txt base-commit: 6880fa6c56601bb8ed59df6c30fd390cc5f6dd8f -- 2.33.0

3 years, 5 months

13
86
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-kselftest-mirror September 2021