April 2023 - Linux-kselftest-mirror

[PATCH] tools: Copy linux/align.h into tools/

by Ackerley Tng

This provides alignment macros for use in selftests. Also clean up tools/include/linux/bitmap.h's inline definition of IS_ALIGNED(). Signed-off-by: Ackerley Tng <ackerleytng(a)google.com> --- tools/include/linux/align.h | 15 +++++++++++++++ tools/include/linux/bitmap.h | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 tools/include/linux/align.h diff --git a/tools/include/linux/align.h b/tools/include/linux/align.h new file mode 100644 index 000000000000..2b4acec7b95a --- /dev/null +++ b/tools/include/linux/align.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_ALIGN_H +#define _LINUX_ALIGN_H + +#include <linux/const.h> + +/* @a is a power of 2 value */ +#define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) +#define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a)) +#define __ALIGN_MASK(x, mask) __ALIGN_KERNEL_MASK((x), (mask)) +#define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) +#define PTR_ALIGN_DOWN(p, a) ((typeof(p))ALIGN_DOWN((unsigned long)(p), (a))) +#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) + +#endif /* _LINUX_ALIGN_H */ diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h index f3566ea0f932..8c6852dba04f 100644 --- a/tools/include/linux/bitmap.h +++ b/tools/include/linux/bitmap.h @@ -3,6 +3,7 @@ #define _TOOLS_LINUX_BITMAP_H #include <string.h> +#include <linux/align.h> #include <linux/bitops.h> #include <linux/find.h> #include <stdlib.h> @@ -126,7 +127,6 @@ static inline bool bitmap_and(unsigned long *dst, const unsigned long *src1, #define BITMAP_MEM_ALIGNMENT (8 * sizeof(unsigned long)) #endif #define BITMAP_MEM_MASK (BITMAP_MEM_ALIGNMENT - 1) -#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) static inline bool bitmap_equal(const unsigned long *src1, const unsigned long *src2, unsigned int nbits) -- 2.39.2.722.g9855ee24e9-goog

10 months, 1 week

2
3
0 0

[PATCH v3] lib: Convert test_user_copy to KUnit test

by Vitor Massaru Iha

This adds the conversion of the runtime tests of test_user_copy fuctions, from `lib/test_user_copy.c`to KUnit tests. Signed-off-by: Vitor Massaru Iha <vitor(a)massaru.org> --- v2: * splitted patch in 3: - Allows to install and load modules in root filesystem; - Provides an userspace memory context when tests are compiled as module; - Convert test_user_copy to KUnit test; * removed entry for CONFIG_TEST_USER_COPY; * replaced pr_warn to KUNIT_EXPECT_FALSE_MSG in test macro to decrease the diff; v3: * rebased with last kunit branch * Please apply this commit from kunit-fixes: 3f37d14b8a3152441f36b6bc74000996679f0998 And these from patchwork: https://patchwork.kernel.org/patch/11676331/ https://patchwork.kernel.org/patch/11676335/ --- lib/Kconfig.debug | 28 ++++++++------ lib/Makefile | 2 +- lib/{test_user_copy.c => user_copy_kunit.c} | 42 +++++++++------------ 3 files changed, 35 insertions(+), 37 deletions(-) rename lib/{test_user_copy.c => user_copy_kunit.c} (91%) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 9ad9210d70a1..f699a3624ae7 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2078,18 +2078,6 @@ config TEST_VMALLOC If unsure, say N. -config TEST_USER_COPY - tristate "Test user/kernel boundary protections" - depends on m - help - This builds the "test_user_copy" module that runs sanity checks - on the copy_to/from_user infrastructure, making sure basic - user/kernel boundary testing is working. If it fails to load, - a regression has been detected in the user/kernel memory boundary - protections. - - If unsure, say N. - config TEST_BPF tristate "Test BPF filter functionality" depends on m && NET @@ -2154,6 +2142,22 @@ config SYSCTL_KUNIT_TEST If unsure, say N. +config USER_COPY_KUNIT + tristate "KUnit Test for user/kernel boundary protections" + depends on KUNIT + depends on m + help + This builds the "user_copy_kunit" module that runs sanity checks + on the copy_to/from_user infrastructure, making sure basic + user/kernel boundary testing is working. If it fails to load, + a regression has been detected in the user/kernel memory boundary + protections. + + For more information on KUnit and unit tests in general please refer + to the KUnit documentation in Documentation/dev-tools/kunit/. + + If unsure, say N. + config LIST_KUNIT_TEST tristate "KUnit Test for Kernel Linked-list structures" if !KUNIT_ALL_TESTS depends on KUNIT diff --git a/lib/Makefile b/lib/Makefile index b1c42c10073b..8c145f85accc 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -78,7 +78,6 @@ obj-$(CONFIG_TEST_VMALLOC) += test_vmalloc.o obj-$(CONFIG_TEST_OVERFLOW) += test_overflow.o obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o obj-$(CONFIG_TEST_SORT) += test_sort.o -obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o obj-$(CONFIG_TEST_PRINTF) += test_printf.o @@ -318,3 +317,4 @@ obj-$(CONFIG_OBJAGG) += objagg.o # KUnit tests obj-$(CONFIG_LIST_KUNIT_TEST) += list-test.o obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o +obj-$(CONFIG_USER_COPY_KUNIT) += user_copy_kunit.o diff --git a/lib/test_user_copy.c b/lib/user_copy_kunit.c similarity index 91% rename from lib/test_user_copy.c rename to lib/user_copy_kunit.c index 5ff04d8fe971..a10ddd15b4cd 100644 --- a/lib/test_user_copy.c +++ b/lib/user_copy_kunit.c @@ -16,6 +16,7 @@ #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/vmalloc.h> +#include <kunit/test.h> /* * Several 32-bit architectures support 64-bit {get,put}_user() calls. @@ -35,7 +36,7 @@ ({ \ int cond = (condition); \ if (cond) \ - pr_warn("[%d] " msg "\n", __LINE__, ##__VA_ARGS__); \ + KUNIT_EXPECT_FALSE_MSG(test, cond, msg, ##__VA_ARGS__); \ cond; \ }) @@ -44,7 +45,7 @@ static bool is_zeroed(void *from, size_t size) return memchr_inv(from, 0x0, size) == NULL; } -static int test_check_nonzero_user(char *kmem, char __user *umem, size_t size) +static int test_check_nonzero_user(struct kunit *test, char *kmem, char __user *umem, size_t size) { int ret = 0; size_t start, end, i, zero_start, zero_end; @@ -102,7 +103,7 @@ static int test_check_nonzero_user(char *kmem, char __user *umem, size_t size) return ret; } -static int test_copy_struct_from_user(char *kmem, char __user *umem, +static int test_copy_struct_from_user(struct kunit *test, char *kmem, char __user *umem, size_t size) { int ret = 0; @@ -177,7 +178,7 @@ static int test_copy_struct_from_user(char *kmem, char __user *umem, return ret; } -static int __init test_user_copy_init(void) +static void user_copy_test(struct kunit *test) { int ret = 0; char *kmem; @@ -192,16 +193,14 @@ static int __init test_user_copy_init(void) #endif kmem = kmalloc(PAGE_SIZE * 2, GFP_KERNEL); - if (!kmem) - return -ENOMEM; + KUNIT_EXPECT_FALSE_MSG(test, kmem == NULL, "kmalloc failed"); user_addr = vm_mmap(NULL, 0, PAGE_SIZE * 2, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANONYMOUS | MAP_PRIVATE, 0); if (user_addr >= (unsigned long)(TASK_SIZE)) { - pr_warn("Failed to allocate user memory\n"); kfree(kmem); - return -ENOMEM; + KUNIT_FAIL(test, "Failed to allocate user memory"); } usermem = (char __user *)user_addr; @@ -245,9 +244,9 @@ static int __init test_user_copy_init(void) #undef test_legit /* Test usage of check_nonzero_user(). */ - ret |= test_check_nonzero_user(kmem, usermem, 2 * PAGE_SIZE); + ret |= test_check_nonzero_user(test, kmem, usermem, 2 * PAGE_SIZE); /* Test usage of copy_struct_from_user(). */ - ret |= test_copy_struct_from_user(kmem, usermem, 2 * PAGE_SIZE); + ret |= test_copy_struct_from_user(test, kmem, usermem, 2 * PAGE_SIZE); /* * Invalid usage: none of these copies should succeed. @@ -309,23 +308,18 @@ static int __init test_user_copy_init(void) vm_munmap(user_addr, PAGE_SIZE * 2); kfree(kmem); - - if (ret == 0) { - pr_info("tests passed.\n"); - return 0; - } - - return -EINVAL; } -module_init(test_user_copy_init); - -static void __exit test_user_copy_exit(void) -{ - pr_info("unloaded.\n"); -} +static struct kunit_case user_copy_test_cases[] = { + KUNIT_CASE(user_copy_test), + {} +}; -module_exit(test_user_copy_exit); +static struct kunit_suite user_copy_test_suite = { + .name = "user_copy", + .test_cases = user_copy_test_cases, +}; +kunit_test_suites(&user_copy_test_suite); MODULE_AUTHOR("Kees Cook <keescook(a)chromium.org>"); MODULE_LICENSE("GPL"); base-commit: d43c7fb05765152d4d4a39a8ef957c4ea14d8847 -- 2.26.2

10 months, 4 weeks

4
11
0 0

[RFC] ktap_v2: KTAP specification transition method

by Frank Rowand

In the middle of the thread about a patch to add the skip test result, I suggested documenting the process of deprecating the KTAP v1 Specification method of marking a skipped test: https://lore.kernel.org/all/490271eb-1429-2217-6e38-837c6e5e328b@gmail.com/… In a reply to that email I suggested that we ought to have a process to transition the KTAP Specification from v1 to v2, and possibly v3 and future. This email is meant to be the root of that discussion. My initial thinking is that there are at least three different types of project and/or community that may have different needs in this area. Type 1 - project controls both the test output generation and the test output parsing tool. Both generation and parsing code are in the same repository and/or synchronized versions are distributed together. Devicetree unittests are an example of Type 1. I plan to maintain changes of test output to KTAP v2 format in coordination with updating the parser to process KTAP v2 data. Type 2 - project controls both the test output generation and the test output parsing tool. The test output generation and a parser modifications may be controlled by the project BUT there are one or more external testing projects that (1) may have their own parsers, and (2) may have a single framework that tests multiple versions of the tests. I think that kselftest and kunit tests are probably examples of Type 2. I also think that DT unittests will become a Type 2 project as a result of converting to KTAP v2 data. Type 3 - project may create and maintain some tests, but is primarily a consumer of tests created by other projects. Type 3 projects typically have a single framework that is able to execute and process multiple versions of the tests. The Fuego test project is an example of Type 3. Maybe adding all of this complexity of different Types in my initial thinking was silly -- maybe everything in this topic is governed by the more complex Type 3. My thinking was that the three different Types of project would be impacted in different ways by transition plans. Type 3 would be the most impacted, so I wanted to be sure that any transition plan especially considered their needs. There is an important aspect of the KTAP format that might ease the transition from one version to another: All KTAP formatted results begin with a "version line", so as soon as a parser has processed the first line of a test, it can apply the appropriate KTAP Specification version to all subsequent lines of test output. A parser implementation could choose to process all versions, could choose to invoke a version specific parser, or some other approach all together. In the "add skip test results" thread, I suggested deprecating the v1 method of marking a skipped test in v2, with a scheduled removal of the v1 method in v3. But since the KTAP format version is available in the very first line of test output, is it necessary to do a slow deprecation and removal over two versions? One argument to doing a two version deprecation/removal process is that a parser that is one version older the the test output _might_ be able to process the test output without error, but would not be able to take advantage of features added in the newer version of the Specification. My opinion is that a two version deprecation/removal process will slow the Specification update process and lead to more versions of the Specification over a given time interval. A one version deprecation/removal process puts more of a burden on Type 3 projects and external parsers for Type 2 projects to implement parsers that can process the newer Specification more quickly and puts a burden on test maintainers to delay a move to the newer Specification, or possibly pressure to support selection of more than one Specification version format for output data. One additional item... On the KTAP Specification version 2 process wiki page, I suggested that it is "desirable for test result parsers that understand the KTAP Specification version 2 data also be able to parse version 1 data." With the implication "Converting version 1 compliant data to version 2 compliant data should not require a "flag day" switch of test result parsers." If this thread discussion results in a different decision, I will update the wiki. Thoughts? -Frank

11 months, 3 weeks

3
2
0 0

[PATCH v4 00/10] Add support for synchronous signals on perf events

by Marco Elver

The perf subsystem today unifies various tracing and monitoring features, from both software and hardware. One benefit of the perf subsystem is automatically inheriting events to child tasks, which enables process-wide events monitoring with low overheads. By default perf events are non-intrusive, not affecting behaviour of the tasks being monitored. For certain use-cases, however, it makes sense to leverage the generality of the perf events subsystem and optionally allow the tasks being monitored to receive signals on events they are interested in. This patch series adds the option to synchronously signal user space on events. To better support process-wide synchronous self-monitoring, without events propagating to children that do not share the current process's shared environment, two pre-requisite patches are added to optionally restrict inheritance to CLONE_THREAD, and remove events on exec (without affecting the parent). Examples how to use these features can be found in the tests added at the end of the series. In addition to the tests added, the series has also been subjected to syzkaller fuzzing (focus on 'kernel/events/' coverage). Motivation and Example Uses --------------------------- 1. Our immediate motivation is low-overhead sampling-based race detection for user space [1]. By using perf_event_open() at process initialization, we can create hardware breakpoint/watchpoint events that are propagated automatically to all threads in a process. As far as we are aware, today no existing kernel facility (such as ptrace) allows us to set up process-wide watchpoints with minimal overheads (that are comparable to mprotect() of whole pages). 2. Other low-overhead error detectors that rely on detecting accesses to certain memory locations or code, process-wide and also only in a specific set of subtasks or threads. [1] https://llvm.org/devmtg/2020-09/slides/Morehouse-GWP-Tsan.pdf Other ideas for use-cases we found interesting, but should only illustrate the range of potential to further motivate the utility (we're sure there are more): 3. Code hot patching without full stop-the-world. Specifically, by setting a code breakpoint to entry to the patched routine, then send signals to threads and check that they are not in the routine, but without stopping them further. If any of the threads will enter the routine, it will receive SIGTRAP and pause. 4. Safepoints without mprotect(). Some Java implementations use "load from a known memory location" as a safepoint. When threads need to be stopped, the page containing the location is mprotect()ed and threads get a signal. This could be replaced with a watchpoint, which does not require a whole page nor DTLB shootdowns. 5. Threads receiving signals on performance events to throttle/unthrottle themselves. 6. Tracking data flow globally. Changelog --------- v4: * Fix for parent and child racing to exit in sync_child_event(). * Fix race between irq_work running and task's sighand being released by release_task(). * Generalize setting si_perf and si_addr independent of event type; introduces perf_event_attr::sig_data, which can be set by user space to be propagated to si_perf. * Warning in perf_sigtrap() if ctx->task and current mismatch; we expect this on architectures that do not properly implement arch_irq_work_raise(). * Require events that want sigtrap to be associated with a task. * Dropped "perf: Add breakpoint information to siginfo on SIGTRAP" in favor of more generic solution (perf_event_attr::sig_data). v3: * Add patch "perf: Rework perf_event_exit_event()" to beginning of series, courtesy of Peter Zijlstra. * Rework "perf: Add support for event removal on exec" based on the added "perf: Rework perf_event_exit_event()". * Fix kselftests to work with more recent libc, due to the way it forces using the kernel's own siginfo_t. * Add basic perf-tool built-in test. v2/RFC: https://lkml.kernel.org/r/20210310104139.679618-1-elver@google.com * Patch "Support only inheriting events if cloned with CLONE_THREAD" added to series. * Patch "Add support for event removal on exec" added to series. * Patch "Add kselftest for process-wide sigtrap handling" added to series. * Patch "Add kselftest for remove_on_exec" added to series. * Implicitly restrict inheriting events if sigtrap, but the child was cloned with CLONE_CLEAR_SIGHAND, because it is not generally safe if the child cleared all signal handlers to continue sending SIGTRAP. * Various minor fixes (see details in patches). v1/RFC: https://lkml.kernel.org/r/20210223143426.2412737-1-elver@google.com Pre-series: The discussion at [2] led to the changes in this series. The approach taken in "Add support for SIGTRAP on perf events" to trigger the signal was suggested by Peter Zijlstra in [3]. [2] https://lore.kernel.org/lkml/CACT4Y+YPrXGw+AtESxAgPyZ84TYkNZdP0xpocX2jwVAbZ… [3] https://lore.kernel.org/lkml/YBv3rAT566k+6zjg@hirez.programming.kicks-ass.n… Marco Elver (9): perf: Apply PERF_EVENT_IOC_MODIFY_ATTRIBUTES to children perf: Support only inheriting events if cloned with CLONE_THREAD perf: Add support for event removal on exec signal: Introduce TRAP_PERF si_code and si_perf to siginfo perf: Add support for SIGTRAP on perf events selftests/perf_events: Add kselftest for process-wide sigtrap handling selftests/perf_events: Add kselftest for remove_on_exec tools headers uapi: Sync tools/include/uapi/linux/perf_event.h perf test: Add basic stress test for sigtrap handling Peter Zijlstra (1): perf: Rework perf_event_exit_event() arch/m68k/kernel/signal.c | 3 + arch/x86/kernel/signal_compat.c | 5 +- fs/signalfd.c | 4 + include/linux/compat.h | 2 + include/linux/perf_event.h | 9 +- include/linux/signal.h | 1 + include/uapi/asm-generic/siginfo.h | 6 +- include/uapi/linux/perf_event.h | 12 +- include/uapi/linux/signalfd.h | 4 +- kernel/events/core.c | 302 +++++++++++++----- kernel/fork.c | 2 +- kernel/signal.c | 11 + tools/include/uapi/linux/perf_event.h | 12 +- tools/perf/tests/Build | 1 + tools/perf/tests/builtin-test.c | 5 + tools/perf/tests/sigtrap.c | 150 +++++++++ tools/perf/tests/tests.h | 1 + .../testing/selftests/perf_events/.gitignore | 3 + tools/testing/selftests/perf_events/Makefile | 6 + tools/testing/selftests/perf_events/config | 1 + .../selftests/perf_events/remove_on_exec.c | 260 +++++++++++++++ tools/testing/selftests/perf_events/settings | 1 + .../selftests/perf_events/sigtrap_threads.c | 210 ++++++++++++ 23 files changed, 924 insertions(+), 87 deletions(-) create mode 100644 tools/perf/tests/sigtrap.c create mode 100644 tools/testing/selftests/perf_events/.gitignore create mode 100644 tools/testing/selftests/perf_events/Makefile create mode 100644 tools/testing/selftests/perf_events/config create mode 100644 tools/testing/selftests/perf_events/remove_on_exec.c create mode 100644 tools/testing/selftests/perf_events/settings create mode 100644 tools/testing/selftests/perf_events/sigtrap_threads.c -- 2.31.0.208.g409f899ff0-goog

1 year

5
29
0 0

selftests: clone3: clone3_cap_checkpoint_restore fails - Could not set CAP_CHECKPOINT_RESTORE

by Naresh Kamboju

selftests clone3 cap_checkpoint_restore fails on all devices. Reported-by: Linux Kernel Functional Testing <lkft(a)linaro.org> [ 97.198602] audit: type=1701 audit(1651167820.383:12): auid=4294967295 uid=0 gid=0 ses=4294967295 pid=1732 comm=\"clone3_cap_chec\" exe=\"/opt/kselftests/default-in-kernel/clone3/clone3_cap_checkpoint_restore\" sig=6 res=1 # selftests: clone3: clone3_cap_checkpoint_restore # TAP version 13 # 1..1 # # Starting 1 tests from 1 test cases. # # RUN global.clone3_cap_checkpoint_restore ... # # clone3_cap_checkpoint_restore.c:155:clone3_cap_checkpoint_restore:Child has PID 1733 # # clone3() syscall supported # cap_set_proc: Operation not permitted # # clone3_cap_checkpoint_restore.c:164:clone3_cap_checkpoint_restore:Expected set_capability() (-1) == 0 (0) # # clone3_cap_checkpoint_restore.c:165:clone3_cap_checkpoint_restore:Could not set CAP_CHECKPOINT_RESTORE # # clone3_cap_checkpoint_restore: Test terminated by assertion # # FAIL global.clone3_cap_checkpoint_restore # not ok 1 global.clone3_cap_checkpoint_restore # # FAILED: 0 / 1 tests passed. # # Totals: pass:0 fail:1 xfail:0 xpass:0 skip:0 error:0 not ok 4 selftests: clone3: clone3_cap_checkpoint_restore # exit=1 Test details links, https://lkft.validation.linaro.org/scheduler/job/5812724#L2074 Test results comparison link, https://qa-reports.linaro.org/lkft/linux-next-master/build/next-20221107/te… https://qa-reports.linaro.org/lkft/linux-next-master/build/next-20221107/te… https://qa-reports.linaro.org/lkft/linux-next-master/build/next-20221107/te… metadata: git_ref: master git_repo: https://gitlab.com/Linaro/lkft/mirrors/next/linux-next git_sha: d8e87774068af213ab5b058b1b114dc397b577aa git_describe: next-20221107 kernel_version: 6.1.0-rc3 kernel-config: https://builds.tuxbuild.com/2HChVOSNxUpvfAfLTnaK7zyTdUi/config build-url: https://gitlab.com/Linaro/lkft/mirrors/next/linux-next/-/pipelines/687092361 artifact-location: https://builds.tuxbuild.com/2HChVOSNxUpvfAfLTnaK7zyTdUi toolchain: gcc-11 -- Linaro LKFT https://lkft.linaro.org

1 year, 1 month

3
3
0 0

[syzbot] WARNING in btrfs_free_reserved_data_space_noquota

by syzbot

Hello, syzbot found the following issue on: HEAD commit: b7b275e60bcd Linux 6.1-rc7 git tree: upstream console+strace: https://syzkaller.appspot.com/x/log.txt?x=158a7b73880000 kernel config: https://syzkaller.appspot.com/x/.config?x=2325e409a9a893e1 dashboard link: https://syzkaller.appspot.com/bug?extid=adec8406ad17413d4c06 compiler: Debian clang version 13.0.1-++20220126092033+75e33f71c2da-1~exp1~20220126212112.63, GNU ld (GNU Binutils for Debian) 2.35.2 syz repro: https://syzkaller.appspot.com/x/repro.syz?x=169ccb75880000 C reproducer: https://syzkaller.appspot.com/x/repro.c?x=17bf7153880000 Downloadable assets: disk image: https://storage.googleapis.com/syzbot-assets/525233126d34/disk-b7b275e6.raw… vmlinux: https://storage.googleapis.com/syzbot-assets/e8299bf41400/vmlinux-b7b275e6.… kernel image: https://storage.googleapis.com/syzbot-assets/eebf691dbf6f/bzImage-b7b275e6.… mounted in repro: https://storage.googleapis.com/syzbot-assets/5423c2d2ad62/mount_0.gz The issue was bisected to: commit c814bf958926ff45a9c1e899bd001006ab6cfbae Author: ye xingchen <ye.xingchen(a)zte.com.cn> Date: Tue Aug 16 10:51:06 2022 +0000 powerpc/selftests: Use timersub() for gettimeofday() bisection log: https://syzkaller.appspot.com/x/bisect.txt?x=118c3d03880000 final oops: https://syzkaller.appspot.com/x/report.txt?x=138c3d03880000 console output: https://syzkaller.appspot.com/x/log.txt?x=158c3d03880000 IMPORTANT: if you fix the issue, please add the following tag to the commit: Reported-by: syzbot+adec8406ad17413d4c06(a)syzkaller.appspotmail.com Fixes: c814bf958926 ("powerpc/selftests: Use timersub() for gettimeofday()") RDX: 0000000000000001 RSI: 0000000020000280 RDI: 0000000000000005 RBP: 00007ffd32e91c70 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000008000000 R11: 0000000000000246 R12: 0000000000000006 R13: 00007ffd32e91cb0 R14: 00007ffd32e91c90 R15: 0000000000000006 </TASK> ------------[ cut here ]------------ WARNING: CPU: 1 PID: 3764 at fs/btrfs/space-info.h:122 btrfs_space_info_free_bytes_may_use fs/btrfs/space-info.h:154 [inline] WARNING: CPU: 1 PID: 3764 at fs/btrfs/space-info.h:122 btrfs_free_reserved_data_space_noquota+0x219/0x2b0 fs/btrfs/delalloc-space.c:179 Modules linked in: CPU: 1 PID: 3764 Comm: syz-executor759 Not tainted 6.1.0-rc7-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022 RIP: 0010:btrfs_space_info_update_bytes_may_use fs/btrfs/space-info.h:122 [inline] RIP: 0010:btrfs_space_info_free_bytes_may_use fs/btrfs/space-info.h:154 [inline] RIP: 0010:btrfs_free_reserved_data_space_noquota+0x219/0x2b0 fs/btrfs/delalloc-space.c:179 Code: 2f 00 74 08 4c 89 ef e8 b5 98 32 fe 49 8b 5d 00 48 89 df 4c 8b 74 24 08 4c 89 f6 e8 21 81 de fd 4c 39 f3 73 16 e8 d7 7e de fd <0f> 0b 31 db 4c 8b 34 24 41 80 3c 2f 00 75 8c eb 92 e8 c1 7e de fd RSP: 0018:ffffc9000443f410 EFLAGS: 00010293 RAX: ffffffff83ac1919 RBX: 00000000005cb000 RCX: ffff888027989d40 RDX: 0000000000000000 RSI: 0000000000800000 RDI: 00000000005cb000 RBP: dffffc0000000000 R08: ffffffff83ac190f R09: fffffbfff1cebe0e R10: fffffbfff1cebe0e R11: 1ffffffff1cebe0d R12: ffff8880774f3800 R13: ffff8880774f3860 R14: 0000000000800000 R15: 1ffff1100ee9e70c FS: 0000555555aaa300(0000) GS:ffff8880b9900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f0d98f20140 CR3: 0000000025ccf000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: <TASK> btrfs_free_reserved_data_space+0x9d/0xd0 fs/btrfs/delalloc-space.c:199 btrfs_dio_iomap_begin+0x8f7/0x1070 fs/btrfs/inode.c:7762 iomap_iter+0x606/0x8a0 fs/iomap/iter.c:74 __iomap_dio_rw+0xd91/0x20d0 fs/iomap/direct-io.c:601 btrfs_dio_write+0x9c/0xe0 fs/btrfs/inode.c:8094 btrfs_direct_write fs/btrfs/file.c:1835 [inline] btrfs_do_write_iter+0x871/0x1260 fs/btrfs/file.c:1980 do_iter_write+0x6c2/0xc20 fs/read_write.c:861 vfs_writev fs/read_write.c:934 [inline] do_pwritev+0x200/0x350 fs/read_write.c:1031 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f0d98ea8ea9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 41 15 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffd32e91c38 EFLAGS: 00000246 ORIG_RAX: 0000000000000148 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f0d98ea8ea9 RDX: 0000000000000001 RSI: 0000000020000280 RDI: 0000000000000005 RBP: 00007ffd32e91c70 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000008000000 R11: 0000000000000246 R12: 0000000000000006 R13: 00007ffd32e91cb0 R14: 00007ffd32e91c90 R15: 0000000000000006 </TASK> --- This report is generated by a bot. It may contain errors. See https://goo.gl/tpsmEJ for more information about syzbot. syzbot engineers can be reached at syzkaller(a)googlegroups.com. syzbot will keep track of this issue. See: https://goo.gl/tpsmEJ#status for how to communicate with syzbot. For information about bisection process see: https://goo.gl/tpsmEJ#bisection syzbot can test patches for this issue, for details see: https://goo.gl/tpsmEJ#testing-patches

1 year, 1 month

3
3
0 0

[PATCH v6] lib: add basic KUnit test for lib/math

by Daniel Latypov

Add basic test coverage for files that don't require any config options: * part of math.h (what seem to be the most commonly used macros) * gcd.c * lcm.c * int_sqrt.c * reciprocal_div.c (Ignored int_pow.c since it's a simple textbook algorithm.) These tests aren't particularly interesting, but they * provide short and simple examples of parameterized tests * provide a place to add tests for any new files in this dir * are written so adding new test cases to cover edge cases should be easy * looking at code coverage, we hit all the branches in the .c files Signed-off-by: Daniel Latypov <dlatypov(a)google.com> Reviewed-by: David Gow <davidgow(a)google.com> --- Changes since v5: * add in test cases for roundup/rounddown * address misc comments from David Changes since v4: * add in test cases for some math.h macros (abs, round_up/round_down, div_round_down/closest) * use parameterized testing less to keep things terser Changes since v3: * fix `checkpatch.pl --strict` warnings * add test cases for gcd(0,0) and lcm(0,0) * minor: don't test both gcd(a,b) and gcd(b,a) when a == b Changes since v2: mv math_test.c => math_kunit.c Changes since v1: * Rebase and rewrite to use the new parameterized testing support. * misc: fix overflow in literal and inline int_sqrt format string. * related: commit 1f0e943df68a ("Documentation: kunit: provide guidance for testing many inputs") was merged explaining the patterns shown here. * there's an in-flight patch to update it for parameterized testing. --- lib/math/Kconfig | 12 ++ lib/math/Makefile | 2 + lib/math/math_kunit.c | 291 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 305 insertions(+) create mode 100644 lib/math/math_kunit.c diff --git a/lib/math/Kconfig b/lib/math/Kconfig index f19bc9734fa7..a974d4db0f9c 100644 --- a/lib/math/Kconfig +++ b/lib/math/Kconfig @@ -15,3 +15,15 @@ config PRIME_NUMBERS config RATIONAL bool + +config MATH_KUNIT_TEST + tristate "KUnit test for lib/math and math.h" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + help + This builds unit tests for lib/math and math.h. + + For more information on KUnit and unit tests in general, please refer + to the KUnit documentation in Documentation/dev-tools/kunit/. + + If unsure, say N. diff --git a/lib/math/Makefile b/lib/math/Makefile index be6909e943bd..30abb7a8d564 100644 --- a/lib/math/Makefile +++ b/lib/math/Makefile @@ -4,3 +4,5 @@ obj-y += div64.o gcd.o lcm.o int_pow.o int_sqrt.o reciprocal_div.o obj-$(CONFIG_CORDIC) += cordic.o obj-$(CONFIG_PRIME_NUMBERS) += prime_numbers.o obj-$(CONFIG_RATIONAL) += rational.o + +obj-$(CONFIG_MATH_KUNIT_TEST) += math_kunit.o diff --git a/lib/math/math_kunit.c b/lib/math/math_kunit.c new file mode 100644 index 000000000000..556c23b17c3c --- /dev/null +++ b/lib/math/math_kunit.c @@ -0,0 +1,291 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Simple KUnit suite for math helper funcs that are always enabled. + * + * Copyright (C) 2020, Google LLC. + * Author: Daniel Latypov <dlatypov(a)google.com> + */ + +#include <kunit/test.h> +#include <linux/gcd.h> +#include <linux/kernel.h> +#include <linux/lcm.h> +#include <linux/reciprocal_div.h> + +static void abs_test(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, abs((char)0), (char)0); + KUNIT_EXPECT_EQ(test, abs((char)42), (char)42); + KUNIT_EXPECT_EQ(test, abs((char)-42), (char)42); + + /* The expression in the macro is actually promoted to an int. */ + KUNIT_EXPECT_EQ(test, abs((short)0), 0); + KUNIT_EXPECT_EQ(test, abs((short)42), 42); + KUNIT_EXPECT_EQ(test, abs((short)-42), 42); + + KUNIT_EXPECT_EQ(test, abs(0), 0); + KUNIT_EXPECT_EQ(test, abs(42), 42); + KUNIT_EXPECT_EQ(test, abs(-42), 42); + + KUNIT_EXPECT_EQ(test, abs(0L), 0L); + KUNIT_EXPECT_EQ(test, abs(42L), 42L); + KUNIT_EXPECT_EQ(test, abs(-42L), 42L); + + KUNIT_EXPECT_EQ(test, abs(0LL), 0LL); + KUNIT_EXPECT_EQ(test, abs(42LL), 42LL); + KUNIT_EXPECT_EQ(test, abs(-42LL), 42LL); + + /* Unsigned types get casted to signed. */ + KUNIT_EXPECT_EQ(test, abs(0ULL), 0LL); + KUNIT_EXPECT_EQ(test, abs(42ULL), 42LL); +} + +static void int_sqrt_test(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, int_sqrt(0UL), 0UL); + KUNIT_EXPECT_EQ(test, int_sqrt(1UL), 1UL); + KUNIT_EXPECT_EQ(test, int_sqrt(4UL), 2UL); + KUNIT_EXPECT_EQ(test, int_sqrt(5UL), 2UL); + KUNIT_EXPECT_EQ(test, int_sqrt(8UL), 2UL); + KUNIT_EXPECT_EQ(test, int_sqrt(1UL << 30), 1UL << 15); +} + +static void round_up_test(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, round_up(0, 1), 0); + KUNIT_EXPECT_EQ(test, round_up(1, 2), 2); + KUNIT_EXPECT_EQ(test, round_up(3, 2), 4); + KUNIT_EXPECT_EQ(test, round_up((1 << 30) - 1, 2), 1 << 30); + KUNIT_EXPECT_EQ(test, round_up((1 << 30) - 1, 1 << 29), 1 << 30); +} + +static void round_down_test(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, round_down(0, 1), 0); + KUNIT_EXPECT_EQ(test, round_down(1, 2), 0); + KUNIT_EXPECT_EQ(test, round_down(3, 2), 2); + KUNIT_EXPECT_EQ(test, round_down((1 << 30) - 1, 2), (1 << 30) - 2); + KUNIT_EXPECT_EQ(test, round_down((1 << 30) - 1, 1 << 29), 1 << 29); +} + +/* These versions can round to numbers that aren't a power of two */ +static void roundup_test(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, roundup(0, 1), 0); + KUNIT_EXPECT_EQ(test, roundup(1, 2), 2); + KUNIT_EXPECT_EQ(test, roundup(3, 2), 4); + KUNIT_EXPECT_EQ(test, roundup((1 << 30) - 1, 2), 1 << 30); + KUNIT_EXPECT_EQ(test, roundup((1 << 30) - 1, 1 << 29), 1 << 30); + + KUNIT_EXPECT_EQ(test, roundup(3, 2), 4); + KUNIT_EXPECT_EQ(test, roundup(4, 3), 6); +} + +static void rounddown_test(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, rounddown(0, 1), 0); + KUNIT_EXPECT_EQ(test, rounddown(1, 2), 0); + KUNIT_EXPECT_EQ(test, rounddown(3, 2), 2); + KUNIT_EXPECT_EQ(test, rounddown((1 << 30) - 1, 2), (1 << 30) - 2); + KUNIT_EXPECT_EQ(test, rounddown((1 << 30) - 1, 1 << 29), 1 << 29); + + KUNIT_EXPECT_EQ(test, rounddown(3, 2), 2); + KUNIT_EXPECT_EQ(test, rounddown(4, 3), 3); +} + +static void div_round_up_test(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, DIV_ROUND_UP(0, 1), 0); + KUNIT_EXPECT_EQ(test, DIV_ROUND_UP(20, 10), 2); + KUNIT_EXPECT_EQ(test, DIV_ROUND_UP(21, 10), 3); + KUNIT_EXPECT_EQ(test, DIV_ROUND_UP(21, 20), 2); + KUNIT_EXPECT_EQ(test, DIV_ROUND_UP(21, 99), 1); +} + +static void div_round_closest_test(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, DIV_ROUND_CLOSEST(0, 1), 0); + KUNIT_EXPECT_EQ(test, DIV_ROUND_CLOSEST(20, 10), 2); + KUNIT_EXPECT_EQ(test, DIV_ROUND_CLOSEST(21, 10), 2); + KUNIT_EXPECT_EQ(test, DIV_ROUND_CLOSEST(25, 10), 3); +} + +/* Generic test case for unsigned long inputs. */ +struct test_case { + unsigned long a, b; + unsigned long result; +}; + +static struct test_case gcd_cases[] = { + { + .a = 0, .b = 0, + .result = 0, + }, + { + .a = 0, .b = 1, + .result = 1, + }, + { + .a = 2, .b = 2, + .result = 2, + }, + { + .a = 2, .b = 4, + .result = 2, + }, + { + .a = 3, .b = 5, + .result = 1, + }, + { + .a = 3 * 9, .b = 3 * 5, + .result = 3, + }, + { + .a = 3 * 5 * 7, .b = 3 * 5 * 11, + .result = 15, + }, + { + .a = 1 << 21, + .b = (1 << 21) - 1, + .result = 1, + }, +}; + +KUNIT_ARRAY_PARAM(gcd, gcd_cases, NULL); + +static void gcd_test(struct kunit *test) +{ + const char *message_fmt = "gcd(%lu, %lu)"; + const struct test_case *test_param = test->param_value; + + KUNIT_EXPECT_EQ_MSG(test, test_param->result, + gcd(test_param->a, test_param->b), + message_fmt, test_param->a, + test_param->b); + + if (test_param->a == test_param->b) + return; + + /* gcd(a,b) == gcd(b,a) */ + KUNIT_EXPECT_EQ_MSG(test, test_param->result, + gcd(test_param->b, test_param->a), + message_fmt, test_param->b, + test_param->a); +} + +static struct test_case lcm_cases[] = { + { + .a = 0, .b = 0, + .result = 0, + }, + { + .a = 0, .b = 1, + .result = 0, + }, + { + .a = 1, .b = 2, + .result = 2, + }, + { + .a = 2, .b = 2, + .result = 2, + }, + { + .a = 3 * 5, .b = 3 * 7, + .result = 3 * 5 * 7, + }, +}; + +KUNIT_ARRAY_PARAM(lcm, lcm_cases, NULL); + +static void lcm_test(struct kunit *test) +{ + const char *message_fmt = "lcm(%lu, %lu)"; + const struct test_case *test_param = test->param_value; + + KUNIT_EXPECT_EQ_MSG(test, test_param->result, + lcm(test_param->a, test_param->b), + message_fmt, test_param->a, + test_param->b); + + if (test_param->a == test_param->b) + return; + + /* lcm(a,b) == lcm(b,a) */ + KUNIT_EXPECT_EQ_MSG(test, test_param->result, + lcm(test_param->b, test_param->a), + message_fmt, test_param->b, + test_param->a); +} + +struct u32_test_case { + u32 a, b; + u32 result; +}; + +static struct u32_test_case reciprocal_div_cases[] = { + { + .a = 0, .b = 1, + .result = 0, + }, + { + .a = 42, .b = 20, + .result = 2, + }, + { + .a = 42, .b = 9999, + .result = 0, + }, + { + .a = (1 << 16), .b = (1 << 14), + .result = 1 << 2, + }, +}; + +KUNIT_ARRAY_PARAM(reciprocal_div, reciprocal_div_cases, NULL); + +static void reciprocal_div_test(struct kunit *test) +{ + const struct u32_test_case *test_param = test->param_value; + struct reciprocal_value rv = reciprocal_value(test_param->b); + + KUNIT_EXPECT_EQ_MSG(test, test_param->result, + reciprocal_divide(test_param->a, rv), + "reciprocal_divide(%u, %u)", + test_param->a, test_param->b); +} + +static void reciprocal_scale_test(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, reciprocal_scale(0u, 100), 0u); + KUNIT_EXPECT_EQ(test, reciprocal_scale(1u, 100), 0u); + KUNIT_EXPECT_EQ(test, reciprocal_scale(1u << 4, 1 << 28), 1u); + KUNIT_EXPECT_EQ(test, reciprocal_scale(1u << 16, 1 << 28), 1u << 12); + KUNIT_EXPECT_EQ(test, reciprocal_scale(~0u, 1 << 28), (1u << 28) - 1); +} + +static struct kunit_case math_test_cases[] = { + KUNIT_CASE(abs_test), + KUNIT_CASE(int_sqrt_test), + KUNIT_CASE(round_up_test), + KUNIT_CASE(round_down_test), + KUNIT_CASE(roundup_test), + KUNIT_CASE(rounddown_test), + KUNIT_CASE(div_round_up_test), + KUNIT_CASE(div_round_closest_test), + KUNIT_CASE_PARAM(gcd_test, gcd_gen_params), + KUNIT_CASE_PARAM(lcm_test, lcm_gen_params), + KUNIT_CASE_PARAM(reciprocal_div_test, reciprocal_div_gen_params), + KUNIT_CASE(reciprocal_scale_test), + {} +}; + +static struct kunit_suite math_test_suite = { + .name = "lib-math", + .test_cases = math_test_cases, +}; + +kunit_test_suites(&math_test_suite); + +MODULE_LICENSE("GPL v2"); base-commit: 7e25f40eab52c57ff6772d27d2aef3640a3237d7 -- 2.31.1.368.gbe11c130af-goog

1 year, 1 month

5
5
0 0

[PATCH v6 1/2] posix-timers: Prefer delivery of signals to the current thread

by Marco Elver

From: Dmitry Vyukov <dvyukov(a)google.com> POSIX timers using the CLOCK_PROCESS_CPUTIME_ID clock prefer the main thread of a thread group for signal delivery. However, this has a significant downside: it requires waking up a potentially idle thread. Instead, prefer to deliver signals to the current thread (in the same thread group) if SIGEV_THREAD_ID is not set by the user. This does not change guaranteed semantics, since POSIX process CPU time timers have never guaranteed that signal delivery is to a specific thread (without SIGEV_THREAD_ID set). The effect is that we no longer wake up potentially idle threads, and the kernel is no longer biased towards delivering the timer signal to any particular thread (which better distributes the timer signals esp. when multiple timers fire concurrently). Signed-off-by: Dmitry Vyukov <dvyukov(a)google.com> Suggested-by: Oleg Nesterov <oleg(a)redhat.com> Reviewed-by: Oleg Nesterov <oleg(a)redhat.com> Signed-off-by: Marco Elver <elver(a)google.com> --- v6: - Split test from this patch. - Update wording on what this patch aims to improve. v5: - Rebased onto v6.2. v4: - Restructured checks in send_sigqueue() as suggested. v3: - Switched to the completely different implementation (much simpler) based on the Oleg's idea. RFC v2: - Added additional Cc as Thomas asked. --- kernel/signal.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/kernel/signal.c b/kernel/signal.c index 8cb28f1df294..605445fa27d4 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1003,8 +1003,7 @@ static void complete_signal(int sig, struct task_struct *p, enum pid_type type) /* * Now find a thread we can wake up to take the signal off the queue. * - * If the main thread wants the signal, it gets first crack. - * Probably the least surprising to the average bear. + * Try the suggested task first (may or may not be the main thread). */ if (wants_signal(sig, p)) t = p; @@ -1970,8 +1969,23 @@ int send_sigqueue(struct sigqueue *q, struct pid *pid, enum pid_type type) ret = -1; rcu_read_lock(); + /* + * This function is used by POSIX timers to deliver a timer signal. + * Where type is PIDTYPE_PID (such as for timers with SIGEV_THREAD_ID + * set), the signal must be delivered to the specific thread (queues + * into t->pending). + * + * Where type is not PIDTYPE_PID, signals must just be delivered to the + * current process. In this case, prefer to deliver to current if it is + * in the same thread group as the target, as it avoids unnecessarily + * waking up a potentially idle task. + */ t = pid_task(pid, type); - if (!t || !likely(lock_task_sighand(t, &flags))) + if (!t) + goto ret; + if (type != PIDTYPE_PID && same_thread_group(t, current)) + t = current; + if (!likely(lock_task_sighand(t, &flags))) goto ret; ret = 1; /* the signal is ignored */ @@ -1993,6 +2007,11 @@ int send_sigqueue(struct sigqueue *q, struct pid *pid, enum pid_type type) q->info.si_overrun = 0; signalfd_notify(t, sig); + /* + * If the type is not PIDTYPE_PID, we just use shared_pending, which + * won't guarantee that the specified task will receive the signal, but + * is sufficient if t==current in the common case. + */ pending = (type != PIDTYPE_PID) ? &t->signal->shared_pending : &t->pending; list_add_tail(&q->list, &pending->list); sigaddset(&pending->signal, sig); -- 2.40.0.rc1.284.g88254d51c5-goog

1 year, 2 months

9
50
0 0

[PATCH v3 0/7] Split a folio to any lower order folios

by Zi Yan

From: Zi Yan <ziy(a)nvidia.com> Hi all, File folio supports any order and people would like to support flexible orders for anonymous folio[1] too. Currently, split_huge_page() only splits a huge page to order-0 pages, but splitting to orders higher than 0 is also useful. This patchset adds support for splitting a huge page to any lower order pages and uses it during file folio truncate operations. The patchset is on top of mm-everything-2023-03-27-21-20. Changelog === Since v2 --- 1. Fixed an issue in __split_page_owner() introduced during my rebase Since v1 --- 1. Changed split_page_memcg() and split_page_owner() parameter to use order 2. Used folio_test_pmd_mappable() in place of the equivalent code Details === * Patch 1 changes split_page_memcg() to use order instead of nr_pages * Patch 2 changes split_page_owner() to use order instead of nr_pages * Patch 3 and 4 add new_order parameter split_page_memcg() and split_page_owner() and prepare for upcoming changes. * Patch 5 adds split_huge_page_to_list_to_order() to split a huge page to any lower order. The original split_huge_page_to_list() calls split_huge_page_to_list_to_order() with new_order = 0. * Patch 6 uses split_huge_page_to_list_to_order() in large pagecache folio truncation instead of split the large folio all the way down to order-0. * Patch 7 adds a test API to debugfs and test cases in split_huge_page_test selftests. Comments and/or suggestions are welcome. [1] https://lore.kernel.org/linux-mm/Y%2FblF0GIunm+pRIC@casper.infradead.org/ Zi Yan (7): mm/memcg: use order instead of nr in split_page_memcg() mm/page_owner: use order instead of nr in split_page_owner() mm: memcg: make memcg huge page split support any order split. mm: page_owner: add support for splitting to any order in split page_owner. mm: thp: split huge page to any lower order pages. mm: truncate: split huge page cache page to a non-zero order if possible. mm: huge_memory: enable debugfs to split huge pages to any order. include/linux/huge_mm.h | 10 +- include/linux/memcontrol.h | 4 +- include/linux/page_owner.h | 10 +- mm/huge_memory.c | 137 ++++++++--- mm/memcontrol.c | 10 +- mm/page_alloc.c | 8 +- mm/page_owner.c | 8 +- mm/truncate.c | 21 +- .../selftests/mm/split_huge_page_test.c | 225 +++++++++++++++++- 9 files changed, 365 insertions(+), 68 deletions(-) -- 2.39.2

1 year, 4 months

6
22
0 0

[PATCH v1 0/9] x86/resctrl: Use soft RMIDs for reliable MBM on AMD

by Peter Newman

Hi Reinette, Fenghua, This series introduces a new mount option enabling an alternate mode for MBM to work around an issue on present AMD implementations and any other resctrl implementation where there are more RMIDs (or equivalent) than hardware counters. The L3 External Bandwidth Monitoring feature of the AMD PQoS extension[1] only guarantees that RMIDs currently assigned to a processor will be tracked by hardware. The counters of any other RMIDs which are no longer being tracked will be reset to zero. The MBM event counters return "Unavailable" to indicate when this has happened. An interval for effectively measuring memory bandwidth typically needs to be multiple seconds long. In Google's workloads, it is not feasible to bound the number of jobs with different RMIDs which will run in a cache domain over any period of time. Consequently, on a fully-committed system where all RMIDs are allocated, few groups' counters return non-zero values. To demonstrate the underlying issue, the first patch provides a test case in tools/testing/selftests/resctrl/test_rmids.sh. On an AMD EPYC 7B12 64-Core Processor with the default behavior: # ./test_rmids.sh Created 255 monitoring groups. g1: mbm_total_bytes: Unavailable -> Unavailable (FAIL) g2: mbm_total_bytes: Unavailable -> Unavailable (FAIL) g3: mbm_total_bytes: Unavailable -> Unavailable (FAIL) [..] g238: mbm_total_bytes: Unavailable -> Unavailable (FAIL) g239: mbm_total_bytes: Unavailable -> Unavailable (FAIL) g240: mbm_total_bytes: Unavailable -> Unavailable (FAIL) g241: mbm_total_bytes: Unavailable -> 660497472 g242: mbm_total_bytes: Unavailable -> 660793344 g243: mbm_total_bytes: Unavailable -> 660477312 g244: mbm_total_bytes: Unavailable -> 660495360 g245: mbm_total_bytes: Unavailable -> 660775360 g246: mbm_total_bytes: Unavailable -> 660645504 g247: mbm_total_bytes: Unavailable -> 660696128 g248: mbm_total_bytes: Unavailable -> 660605248 g249: mbm_total_bytes: Unavailable -> 660681280 g250: mbm_total_bytes: Unavailable -> 660834240 g251: mbm_total_bytes: Unavailable -> 660440064 g252: mbm_total_bytes: Unavailable -> 660501504 g253: mbm_total_bytes: Unavailable -> 660590720 g254: mbm_total_bytes: Unavailable -> 660548352 g255: mbm_total_bytes: Unavailable -> 660607296 255 groups, 0 returned counts in first pass, 15 in second successfully measured bandwidth from 15/255 groups To compare, here is the output from an Intel(R) Xeon(R) Platinum 8173M CPU: # ./test_rmids.sh Created 223 monitoring groups. g1: mbm_total_bytes: 0 -> 606126080 g2: mbm_total_bytes: 0 -> 613236736 g3: mbm_total_bytes: 0 -> 610254848 [..] g221: mbm_total_bytes: 0 -> 584679424 g222: mbm_total_bytes: 0 -> 588808192 g223: mbm_total_bytes: 0 -> 587317248 223 groups, 223 returned counts in first pass, 223 in second successfully measured bandwidth from 223/223 groups To make better use of the hardware in such a use case, this patchset introduces a "soft" RMID implementation, where each CPU is permanently assigned a "hard" RMID. On context switches which change the current soft RMID, the difference between each CPU's current event counts and most recent counts is added to the totals for the current or outgoing soft RMID. This technique does not work for cache occupancy counters, so this patch series disables cache occupancy events when soft RMIDs are enabled. This series adds the "mbm_soft_rmid" mount option to allow users to opt-in to the functionaltiy when they deem it helpful. When the same system from the earlier AMD example enables the mbm_soft_rmid mount option: # ./test_rmids.sh Created 255 monitoring groups. g1: mbm_total_bytes: 0 -> 686560576 g2: mbm_total_bytes: 0 -> 668204416 [..] g252: mbm_total_bytes: 0 -> 672651200 g253: mbm_total_bytes: 0 -> 666956800 g254: mbm_total_bytes: 0 -> 665917056 g255: mbm_total_bytes: 0 -> 671049600 255 groups, 255 returned counts in first pass, 255 in second successfully measured bandwidth from 255/255 groups (patches are based on tip/master) [1] https://www.amd.com/system/files/TechDocs/56375_1.03_PUB.pdf Peter Newman (8): selftests/resctrl: Verify all RMIDs count together x86/resctrl: Add resctrl_mbm_flush_cpu() to collect CPUs' MBM events x86/resctrl: Flush MBM event counts on soft RMID change x86/resctrl: Call mon_event_count() directly for soft RMIDs x86/resctrl: Create soft RMID version of __mon_event_count() x86/resctrl: Assign HW RMIDs to CPUs for soft RMID x86/resctrl: Use mbm_update() to push soft RMID counts x86/resctrl: Add mount option to enable soft RMID Stephane Eranian (1): x86/resctrl: Hold a spinlock in __rmid_read() on AMD arch/x86/include/asm/resctrl.h | 29 +++- arch/x86/kernel/cpu/resctrl/core.c | 80 ++++++++- arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 9 +- arch/x86/kernel/cpu/resctrl/internal.h | 19 ++- arch/x86/kernel/cpu/resctrl/monitor.c | 158 +++++++++++++++++- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 52 ++++++ tools/testing/selftests/resctrl/test_rmids.sh | 93 +++++++++++ 7 files changed, 425 insertions(+), 15 deletions(-) create mode 100755 tools/testing/selftests/resctrl/test_rmids.sh base-commit: dd806e2f030e57dd5bac973372aa252b6c175b73 -- 2.40.0.634.g4ca3ef3211-goog

1 year, 6 months

2
39
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-kselftest-mirror April 2023