- Linux-kselftest-mirror - lists.linaro.org

Re: [PATCH] procfs: block chmod on /proc/thread-self/comm

by Christian Brauner

> > diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c > > index 486334981e60..08f0969208eb 100644 > > --- a/tools/testing/selftests/nolibc/nolibc-test.c > > +++ b/tools/testing/selftests/nolibc/nolibc-test.c > > @@ -580,6 +580,10 @@ int run_syscall(int min, int max) > > CASE_TEST(chmod_net); EXPECT_SYSZR(proc, chmod("/proc/self/net", 0555)); break; > > CASE_TEST(chmod_self); EXPECT_SYSER(proc, chmod("/proc/self", 0555), -1, EPERM); break; > > CASE_TEST(chown_self); EXPECT_SYSER(proc, chown("/proc/self", 0, 0), -1, EPERM); break; > > + CASE_TEST(chmod_self_comm); EXPECT_SYSER(proc, chmod("/proc/self/comm", 0777), -1, EPERM); break; > > + CASE_TEST(chmod_tid_comm); EXPECT_SYSER(proc, chmod("/proc/thread-self/comm", 0777), -1, EPERM); break; > > + CASE_TEST(chmod_self_environ);EXPECT_SYSER(proc, chmod("/proc/self/environ", 0777), -1, EPERM); break; > > + CASE_TEST(chmod_tid_environ); EXPECT_SYSER(proc, chmod("/proc/thread-self/environ", 0777), -1, EPERM); break; > > I'm not a big fan of this, it abuses the nolibc testsuite to test core > kernel functionality. Yes, this should be dropped. We need a minimal patch to fix this. This just makes backporting harder and any test doesn't need to be backported.

2 years, 5 months

4
4
0 0

[PATCH] procfs: block chmod on /proc/thread-self/comm

by Aleksa Sarai

Due to an oversight in commit 1b3044e39a89 ("procfs: fix pthread cross-thread naming if !PR_DUMPABLE") in switching from REG to NOD, chmod operations on /proc/thread-self/comm were no longer blocked as they are on almost all other procfs files. A very similar situation with /proc/self/environ was used to as a root exploit a long time ago, but procfs has SB_I_NOEXEC so this is simply a correctness issue. Ref: https://lwn.net/Articles/191954/ Ref: 6d76fa58b050 ("Don't allow chmod() on the /proc/<pid>/ files") Fixes: 1b3044e39a89 ("procfs: fix pthread cross-thread naming if !PR_DUMPABLE") Cc: stable(a)vger.kernel.org # v4.7+ Signed-off-by: Aleksa Sarai <cyphar(a)cyphar.com> --- fs/proc/base.c | 3 ++- tools/testing/selftests/nolibc/nolibc-test.c | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 05452c3b9872..7394229816f3 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3583,7 +3583,8 @@ static int proc_tid_comm_permission(struct mnt_idmap *idmap, } static const struct inode_operations proc_tid_comm_inode_operations = { - .permission = proc_tid_comm_permission, + .setattr = proc_setattr, + .permission = proc_tid_comm_permission, }; /* diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 486334981e60..08f0969208eb 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -580,6 +580,10 @@ int run_syscall(int min, int max) CASE_TEST(chmod_net); EXPECT_SYSZR(proc, chmod("/proc/self/net", 0555)); break; CASE_TEST(chmod_self); EXPECT_SYSER(proc, chmod("/proc/self", 0555), -1, EPERM); break; CASE_TEST(chown_self); EXPECT_SYSER(proc, chown("/proc/self", 0, 0), -1, EPERM); break; + CASE_TEST(chmod_self_comm); EXPECT_SYSER(proc, chmod("/proc/self/comm", 0777), -1, EPERM); break; + CASE_TEST(chmod_tid_comm); EXPECT_SYSER(proc, chmod("/proc/thread-self/comm", 0777), -1, EPERM); break; + CASE_TEST(chmod_self_environ);EXPECT_SYSER(proc, chmod("/proc/self/environ", 0777), -1, EPERM); break; + CASE_TEST(chmod_tid_environ); EXPECT_SYSER(proc, chmod("/proc/thread-self/environ", 0777), -1, EPERM); break; CASE_TEST(chroot_root); EXPECT_SYSZR(euid0, chroot("/")); break; CASE_TEST(chroot_blah); EXPECT_SYSER(1, chroot("/proc/self/blah"), -1, ENOENT); break; CASE_TEST(chroot_exe); EXPECT_SYSER(proc, chroot("/proc/self/exe"), -1, ENOTDIR); break; -- 2.41.0

2 years, 5 months

4
4
0 0

[PATCH] procfs: block chmod on /proc/thread-self/comm

by Aleksa Sarai

Due to an oversight in commit 1b3044e39a89 ("procfs: fix pthread cross-thread naming if !PR_DUMPABLE") in switching from REG to NOD, chmod operations on /proc/thread-self/comm were no longer blocked as they are on almost all other procfs files. A very similar situation with /proc/self/environ was used to as a root exploit a long time ago, but procfs has SB_I_NOEXEC so this is simply a correctness issue. Ref: https://lwn.net/Articles/191954/ Ref: 6d76fa58b050 ("Don't allow chmod() on the /proc/<pid>/ files") Fixes: 1b3044e39a89 ("procfs: fix pthread cross-thread naming if !PR_DUMPABLE") Cc: stable(a)vger.kernel.org # v4.7+ Signed-off-by: Aleksa Sarai <cyphar(a)cyphar.com> --- fs/proc/base.c | 3 ++- tools/testing/selftests/nolibc/nolibc-test.c | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 05452c3b9872..7394229816f3 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3583,7 +3583,8 @@ static int proc_tid_comm_permission(struct mnt_idmap *idmap, } static const struct inode_operations proc_tid_comm_inode_operations = { - .permission = proc_tid_comm_permission, + .setattr = proc_setattr, + .permission = proc_tid_comm_permission, }; /* diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 486334981e60..08f0969208eb 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -580,6 +580,10 @@ int run_syscall(int min, int max) CASE_TEST(chmod_net); EXPECT_SYSZR(proc, chmod("/proc/self/net", 0555)); break; CASE_TEST(chmod_self); EXPECT_SYSER(proc, chmod("/proc/self", 0555), -1, EPERM); break; CASE_TEST(chown_self); EXPECT_SYSER(proc, chown("/proc/self", 0, 0), -1, EPERM); break; + CASE_TEST(chmod_self_comm); EXPECT_SYSER(proc, chmod("/proc/self/comm", 0777), -1, EPERM); break; + CASE_TEST(chmod_tid_comm); EXPECT_SYSER(proc, chmod("/proc/thread-self/comm", 0777), -1, EPERM); break; + CASE_TEST(chmod_self_environ);EXPECT_SYSER(proc, chmod("/proc/self/environ", 0777), -1, EPERM); break; + CASE_TEST(chmod_tid_environ); EXPECT_SYSER(proc, chmod("/proc/thread-self/environ", 0777), -1, EPERM); break; CASE_TEST(chroot_root); EXPECT_SYSZR(euid0, chroot("/")); break; CASE_TEST(chroot_blah); EXPECT_SYSER(1, chroot("/proc/self/blah"), -1, ENOENT); break; CASE_TEST(chroot_exe); EXPECT_SYSER(proc, chroot("/proc/self/exe"), -1, ENOTDIR); break; -- 2.41.0

2 years, 5 months

1
0
0 0

[PATCH 0/2] proc: proc_setattr for /proc/$PID/net

by Thomas Weißschuh

/proc/$PID/net currently allows the setting of file attributes, in contrast to other /proc/$PID/ files and directories. This would break the nolibc testsuite so the first patch in the series removes the offending testcase. The "fix" for nolibc-test is intentionally kept trivial as the series will most likely go through the filesystem tree and if conflicts arise, it is obvious on how to resolve them. Technically this can lead to breakage of nolibc-test if an old nolibc-test is used with a newer kernel containing the fix. Note: Except for /proc itself this is the only "struct inode_operations" in fs/proc/ that is missing an implementation of setattr(). Signed-off-by: Thomas Weißschuh <linux(a)weissschuh.net> --- Thomas Weißschuh (2): selftests/nolibc: drop test chmod_net proc: use generic setattr() for /proc/$PID/net fs/proc/proc_net.c | 1 + tools/testing/selftests/nolibc/nolibc-test.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) --- base-commit: a92b7d26c743b9dc06d520f863d624e94978a1d9 change-id: 20230624-proc-net-setattr-8f0a6b8eb2f5 Best regards, -- Thomas Weißschuh <linux(a)weissschuh.net>

2 years, 5 months

5
15
0 0

[PATCH][next] selftests: ALSA: Fix fclose on an already fclosed file pointer

by Colin Ian King

In the case where a sysfs file cannot be opened the error return path fcloses file pointer fpl, however, fpl has already been closed in the previous stanza. Fix the double fclose by removing it. Fixes: 10b98a4db11a ("selftests: ALSA: Add test for the 'pcmtest' driver") Signed-off-by: Colin Ian King <colin.i.king(a)gmail.com> --- tools/testing/selftests/alsa/test-pcmtest-driver.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/testing/selftests/alsa/test-pcmtest-driver.c b/tools/testing/selftests/alsa/test-pcmtest-driver.c index 71931b240a83..357adc722cba 100644 --- a/tools/testing/selftests/alsa/test-pcmtest-driver.c +++ b/tools/testing/selftests/alsa/test-pcmtest-driver.c @@ -47,10 +47,8 @@ static int read_patterns(void) sprintf(pf, "/sys/kernel/debug/pcmtest/fill_pattern%d", i); fp = fopen(pf, "r"); - if (!fp) { - fclose(fpl); + if (!fp) return -1; - } fread(patterns[i].buf, 1, patterns[i].len, fp); fclose(fp); } -- 2.39.2

2 years, 5 months

3
2
0 0

[PATCH] x86: checksum: Fix unaligned checksums on < i686

by David Gow

The checksum_32 code was originally written to only handle 2-byte aligned buffers, but was later extended to support arbitrary alignment. However, the non-PPro variant doesn't apply the carry before jumping to the 2- or 4-byte aligned versions, which clear CF. This causes the new checksum_kunit test to fail, as it runs with a large number of different possible alignments and both with and without carries. For example: ./tools/testing/kunit/kunit.py run --arch i386 --kconfig_add CONFIG_M486=y checksum Gives: KTAP version 1 # Subtest: checksum 1..3 ok 1 test_csum_fixed_random_inputs # test_csum_all_carry_inputs: ASSERTION FAILED at lib/checksum_kunit.c:267 Expected result == expec, but result == 65281 (0xff01) expec == 65280 (0xff00) not ok 2 test_csum_all_carry_inputs # test_csum_no_carry_inputs: ASSERTION FAILED at lib/checksum_kunit.c:314 Expected result == expec, but result == 65535 (0xffff) expec == 65534 (0xfffe) not ok 3 test_csum_no_carry_inputs With this patch, it passes. KTAP version 1 # Subtest: checksum 1..3 ok 1 test_csum_fixed_random_inputs ok 2 test_csum_all_carry_inputs ok 3 test_csum_no_carry_inputs I also tested it on a real 486DX2, with the same results. Signed-off-by: David Gow <davidgow(a)google.com> --- This is a follow-up to the UML patch to use the common 32-bit x86 checksum implementations: https://lore.kernel.org/linux-um/20230704083022.692368-2-davidgow@google.co… --- arch/x86/lib/checksum_32.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S index 23318c338db0..128287cea42d 100644 --- a/arch/x86/lib/checksum_32.S +++ b/arch/x86/lib/checksum_32.S @@ -62,6 +62,7 @@ SYM_FUNC_START(csum_partial) jl 8f movzbl (%esi), %ebx adcl %ebx, %eax + adcl $0, %eax roll $8, %eax inc %esi testl $2, %esi -- 2.41.0.255.g8b1d071c50-goog

2 years, 5 months

2
2
0 0

[PATCH] kunit: qemu_configs: Enable all architectural features for arm64

by Mark Brown

While it probably doesn't make a huge difference given the current KUnit coverage we will get the best coverage of arm64 architecture features if we specify -cpu=max rather than picking a specific CPU, this will include all architecture features that qemu supports including many which have not yet made it into physical implementations. Due to performance issues emulating the architected pointer authentication algorithm it is recommended to use the implementation defined algorithm that qemu has instead, this should make no meaningful difference to the coverage and will run the tests faster. Signed-off-by: Mark Brown <broonie(a)kernel.org> --- tools/testing/kunit/qemu_configs/arm64.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/kunit/qemu_configs/arm64.py b/tools/testing/kunit/qemu_configs/arm64.py index 67d04064f785..d3ff27024755 100644 --- a/tools/testing/kunit/qemu_configs/arm64.py +++ b/tools/testing/kunit/qemu_configs/arm64.py @@ -9,4 +9,4 @@ CONFIG_SERIAL_AMBA_PL011_CONSOLE=y''', qemu_arch='aarch64', kernel_path='arch/arm64/boot/Image.gz', kernel_command_line='console=ttyAMA0', - extra_qemu_params=['-machine', 'virt', '-cpu', 'cortex-a57']) + extra_qemu_params=['-machine', 'virt', '-cpu', 'max,pauth-impdef=on']) --- base-commit: 06c2afb862f9da8dc5efa4b6076a0e48c3fbaaa5 change-id: 20230702-kunit-arm64-cpu-max-7e3aa5f02fb2 Best regards, -- Mark Brown <broonie(a)kernel.org>

2 years, 5 months

2
1
0 0

Re: [PATCH 6.4 0/6] 6.4.3-rc2 review

by Naresh Kamboju

On Mon, 10 Jul 2023 at 02:15, Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> wrote: > > This is the start of the stable review cycle for the 6.4.3 release. > There are 6 patches in this series, all will be posted as a response > to this one. If anyone has any issues with these being applied, please > let me know. > > Responses should be made by Tue, 11 Jul 2023 20:38:10 +0000. > Anything received after that time might be too late. > > The whole patch series can be found in one patch at: > https://www.kernel.org/pub/linux/kernel/v6.x/stable-review/patch-6.4.3-rc2.… > or in the git tree and branch at: > git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-6.4.y > and the diffstat can be found below. > > thanks, > > greg k-h Results from Linaro’s test farm. No regressions on arm64, arm, x86_64, and i386. Tested-by: Linux Kernel Functional Testing <lkft(a)linaro.org> NOTE: Following two issues are not specific to this round of review, We have been noticing on stable-rc 6.4.1-rc1 release with kselftest merge configs builds testing selftests: net tests. 1) While running selftests: net: pmtu.sh on arm64 ARM juno-r2, qemu-arm64 and Raspberry Pi 4 Model B following kernel crash reported. # selftests: net: pmtu.sh # TEST: ipv4: PMTU exceptions [ OK ] # TEST: ipv4: PMTU exceptions - nexthop objects [ OK ] # TEST: ipv6: PMTU exceptions [FAIL] # PMTU exception wasn't created after exceeding MTU # TEST: ipv6: PMTU exceptions - nexthop objects [FAIL] # PMTU exception wasn't created after exceeding MTU # TEST: ICMPv4 with DSCP and ECN: PMTU exceptions [ OK ] # TEST: ICMPv4 with DSCP and ECN: PMTU exceptions - nexthop objects [ OK ] # TEST: UDPv4 with DSCP and ECN: PMTU exceptions [ OK ] # TEST: UDPv4 with DSCP and ECN: PMTU exceptions - nexthop objects [ OK ] # TEST: IPv4 over vxlan4: PMTU exceptions [ OK ] # TEST: IPv4 over vxlan4: PMTU exceptions - nexthop objects [ OK ] # TEST: IPv6 over vxlan4: PMTU exceptions [FAIL] # PMTU exception wasn't created after exceeding link layer MTU on vxlan interface # TEST: IPv6 over vxlan4: PMTU exceptions - nexthop objects [FAIL] # PMTU exception wasn't created after exceeding link layer MTU on vxlan interface # TEST: IPv4 over vxlan6: PMTU exceptions [ OK ] [ 226.478625] Unable to handle kernel paging request at virtual address ffff800974529000 [ 226.486721] Mem abort info: [ 226.489534] ESR = 0x0000000096000005 [ 226.493304] EC = 0x25: DABT (current EL), IL = 32 bits [ 226.498643] SET = 0, FnV = 0 [ 226.501716] EA = 0, S1PTW = 0 [ 226.504876] FSC = 0x05: level 1 translation fault [ 226.509778] Data abort info: [ 226.512675] ISV = 0, ISS = 0x00000005 [ 226.516531] CM = 0, WnR = 0 [ 226.519518] swapper pgtable: 4k pages, 48-bit VAs, pgdp=0000000082721000 [ 226.526249] [ffff800974529000] pgd=10000009fffff003, p4d=10000009fffff003, pud=0000000000000000 [ 226.535343] Internal error: Oops: 0000000096000005 [#1] PREEMPT SMP [ 226.541633] Modules linked in: act_csum libcrc32c act_pedit cls_flower sch_prio bpfilter ip_tables x_tables veth macvtap tap tun cfg80211 bluetooth rfkill tda998x hdlcd cec drm_dma_helper onboard_usb_hub drm_kms_helper crct10dif_ce sch_fq_codel fuse drm [last unloaded: test_blackhole_dev] [ 226.567412] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.4.3-rc2 #1 [ 226.573614] Hardware name: ARM Juno development board (r2) (DT) [ 226.579549] pstate: 000000c5 (nzcv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 226.586534] pc : percpu_counter_add_batch+0x40/0x148 [ 226.591529] lr : percpu_counter_add_batch+0x124/0x148 [ 226.596603] sp : ffff800008003dc0 [ 226.599926] x29: ffff800008003dc0 x28: ffff80000b282040 x27: ffff800008199b00 [ 226.607101] x26: ffff800008199ab4 x25: 0000000000000000 x24: ffff80000b318888 [ 226.614275] x23: ffff80000b274b80 x22: 0000000000000020 x21: 0000000000000000 [ 226.621449] x20: ffffffffffffffff x19: ffff00082892d740 x18: 0000000000000000 [ 226.628623] x17: ffff800974529000 x16: 0000000000000000 x15: 00000000000001da [ 226.635796] x14: 0000000000000400 x13: ffff8009745ce000 x12: 0000000000000000 [ 226.642969] x11: ffff80000a7cac24 x10: 0000000000000000 x9 : ffff8000094984cc [ 226.650143] x8 : ffff800008003cd8 x7 : 0000000000000000 x6 : 0000000000000101 [ 226.657317] x5 : 0000000000000000 x4 : ffff800008003da0 x3 : 0000000000000000 [ 226.664489] x2 : 0000000100000101 x1 : ffff800974529000 x0 : 0000000000000000 [ 226.671663] Call trace: [ 226.674116] percpu_counter_add_batch+0x40/0x148 [ 226.678756] dst_destroy+0x18c/0x240 [ 226.682351] dst_destroy_rcu+0x24/0x40 [ 226.686118] rcu_core+0x464/0xe80 [ 226.689453] rcu_core_si+0x18/0x30 [ 226.692872] __do_softirq+0x130/0x4f4 [ 226.696549] ____do_softirq+0x18/0x30 [ 226.700228] call_on_irq_stack+0x24/0x58 [ 226.704167] do_softirq_own_stack+0x24/0x38 [ 226.708367] __irq_exit_rcu+0x17c/0x1b0 [ 226.712220] irq_exit_rcu+0x18/0x48 [ 226.715723] el1_interrupt+0x38/0x68 [ 226.719314] el1h_64_irq_handler+0x18/0x28 [ 226.723426] el1h_64_irq+0x64/0x68 [ 226.726841] cpuidle_enter_state+0x148/0x5b0 [ 226.731129] cpuidle_enter+0x40/0x60 [ 226.734723] do_idle+0x22c/0x2b8 [ 226.737970] cpu_startup_entry+0x30/0x40 [ 226.741912] rest_init+0x114/0x1c0 [ 226.745329] arch_call_rest_init+0x18/0x20 [ 226.749444] start_kernel+0x5e0/0x6f8 [ 226.753121] __primary_switched+0xbc/0xd0 [ 226.757152] Code: 927902b9 36380775 d538d081 f9402e60 (b8616817) [ 226.763268] ---[ end trace 0000000000000000 ]--- [ 226.767898] Kernel panic - not syncing: Oops: Fatal exception in interrupt [ 226.774790] SMP: stopping secondary CPUs [ 226.778932] Kernel Offset: disabled [ 226.782428] CPU features: 0x800004,18780800,0000421b [ 226.787408] Memory Limit: none [ 226.790479] ---[ end Kernel panic - not syncing: Oops: Fatal exception in interrupt ]--- Links: - https://qa-reports.linaro.org/lkft/linux-stable-rc-linux-6.4.y/build/v6.4.1… - https://qa-reports.linaro.org/lkft/linux-stable-rc-linux-6.4.y/build/v6.4.1… - https://lkft.validation.linaro.org/scheduler/job/6578667#L4613 - https://storage.tuxsuite.com/public/linaro/lkft/builds/2SLr5Fizk6DFnf66nxHo… And 2) While running selftests: net: vrf-xfrm-tests.sh on arm64 Raspberry Pi 4 Model B and arm64 Qualcomm Technologies APQ 8016 SBC devices. # selftests: net: vrf-xfrm-tests.sh # # No qdisc on VRF device # TEST: IPv6 no xfrm policy [ OK ] # Cannot open netlink socket: Protocol not supported [ 3310.312970] audit: type=1334 audit(1651171022.511:197): prog-id=59 op=LOAD # TEST: IPv4 xfrm policy based on address [ OK ] # TEST: IPv6 xfrm policy based on address [ OK ] # Cannot open netlink socket: Protocol not supported # TEST: IPv6 xfrm policy with VRF in selector [ OK ] # TEST: IPv4 xfrm policy with xfrm device [FAIL] # TEST: IPv6 xfrm policy with xfrm device [FAIL] # # netem qdisc on VRF device # Cannot open netlink socket: Protocol not supported ... [ 3326.099841] [ 3326.108605] ============================= [ 3326.117159] WARNING: suspicious RCU usage [ 3326.125672] 6.4.3-rc2 #1 Not tainted [ 3326.134070] ----------------------------- [ 3326.142459] include/net/neighbour.h:302 suspicious rcu_dereference_check() usage! [ 3326.150984] [ 3326.150984] other info that might help us debug this: [ 3326.150984] [ 3326.175738] [ 3326.175738] rcu_scheduler_active = 2, debug_locks = 1 [ 3326.192048] 2 locks held by ping/19436: [ 3326.200354] #0: ffff00000bce3570 (sk_lock-AF_INET){+.+.}-{0:0}, at: raw_sendmsg+0x25c/0xf18 [ 3326.208962] #1: ffff80000b397b98 (rcu_read_lock_bh){....}-{1:2}, at: vrf_finish_output+0x70/0x8d0 [ 3326.217663] [ 3326.217663] stack backtrace: [ 3326.234476] CPU: 1 PID: 19436 Comm: ping Not tainted 6.4.3-rc2 #1 [ 3326.243032] Hardware name: Qualcomm Technologies, Inc. APQ 8016 SBC (DT) [ 3326.251626] Call trace: [ 3326.260137] dump_backtrace+0xa0/0x128 [ 3326.268713] show_stack+0x30/0x48 [ 3326.277235] dump_stack_lvl+0x90/0xb0 [ 3326.285718] dump_stack+0x18/0x28 [ 3326.294123] lockdep_rcu_suspicious+0x16c/0x230 [ 3326.302589] vrf_finish_output+0x664/0x8d0 [ 3326.311049] vrf_output+0x104/0x410 [ 3326.319488] ip_send_skb+0x60/0x128 [ 3326.327928] ip_push_pending_frames+0x44/0x70 [ 3326.336395] raw_sendmsg+0x584/0xf18 [ 3326.344815] inet_sendmsg+0x50/0x88 [ 3326.353251] __sys_sendto+0xfc/0x198 [ 3326.361650] __arm64_sys_sendto+0x34/0x50 [ 3326.369890] invoke_syscall+0x8c/0x120 [ 3326.377899] el0_svc_common.constprop.0+0x104/0x130 [ 3326.385824] do_el0_svc+0x44/0xb8 [ 3326.393644] el0_svc+0x40/0xa8 [ 3326.401277] el0t_64_sync_handler+0xbc/0x138 [ 3326.408721] el0t_64_sync+0x190/0x198 [ 3326.416111] [ 3326.423395] ============================= [ 3326.430605] WARNING: suspicious RCU usage [ 3326.437613] 6.4.3-rc2 #1 Not tainted [ 3326.444481] ----------------------------- [ 3326.451275] include/net/neighbour.h:307 suspicious rcu_dereference_check() usage! [ 3326.458148] [ 3326.458148] other info that might help us debug this: [ 3326.458148] [ 3326.477416] [ 3326.477416] rcu_scheduler_active = 2, debug_locks = 1 [ 3326.489637] 2 locks held by ping/19436: [ 3326.495704] #0: ffff00000bce3570 (sk_lock-AF_INET){+.+.}-{0:0}, at: raw_sendmsg+0x25c/0xf18 [ 3326.501817] #1: ffff80000b397b98 (rcu_read_lock_bh){....}-{1:2}, at: vrf_finish_output+0x70/0x8d0 [ 3326.509274] [ 3326.509274] stack backtrace: [ 3326.523226] CPU: 1 PID: 19436 Comm: ping Not tainted 6.4.3-rc2 #1 [ 3326.528565] Hardware name: Qualcomm Technologies, Inc. APQ 8016 SBC (DT) [ 3326.533774] Call trace: [ 3326.540093] dump_backtrace+0xa0/0x128 [ 3326.545130] show_stack+0x30/0x48 [ 3326.550107] dump_stack_lvl+0x90/0xb0 [ 3326.554925] dump_stack+0x18/0x28 [ 3326.559493] lockdep_rcu_suspicious+0x16c/0x230 [ 3326.563974] vrf_finish_output+0x618/0x8d0 [ 3326.568383] vrf_output+0x104/0x410 [ 3326.572598] ip_send_skb+0x60/0x128 [ 3326.576589] ip_push_pending_frames+0x44/0x70 [ 3326.580477] raw_sendmsg+0x584/0xf18 [ 3326.584239] inet_sendmsg+0x50/0x88 [ 3326.587983] __sys_sendto+0xfc/0x198 [ 3326.591723] __arm64_sys_sendto+0x34/0x50 [ 3326.595464] invoke_syscall+0x8c/0x120 [ 3326.599199] el0_svc_common.constprop.0+0x104/0x130 [ 3326.602929] do_el0_svc+0x44/0xb8 [ 3326.606753] el0_svc+0x40/0xa8 [ 3326.610378] el0t_64_sync_handler+0xbc/0x138 [ 3326.614029] el0t_64_sync+0x190/0x198 # TEST: IPv4 no xfrm policy [ OK ] Links: - https://qa-reports.linaro.org/lkft/linux-stable-rc-linux-6.4.y/build/v6.4.1… - https://qa-reports.linaro.org/lkft/linux-stable-rc-linux-6.4.y/build/v6.4.1… - https://qa-reports.linaro.org/lkft/linux-stable-rc-linux-6.4.y/build/v6.4.1… metadata: git_ref: linux-6.4.y git_repo: https://gitlab.com/Linaro/lkft/mirrors/stable/linux-stable-rc git_sha: 3e37df3ffd9a648c9f88f6bbca158e43d5077bef git_describe: v6.4.1-22-g3e37df3ffd9a kernel_version: 6.4.3-rc2 kernel-config: https://storage.tuxsuite.com/public/linaro/lkft/builds/2SLr5Fizk6DFnf66nxHo… artifact-location: https://storage.tuxsuite.com/public/linaro/lkft/builds/2SLr5Fizk6DFnf66nxHo… toolchain: gcc-11 ## Build * kernel: 6.4.3-rc2 * git: https://gitlab.com/Linaro/lkft/mirrors/stable/linux-stable-rc * git branch: linux-6.4.y * git commit: 3e37df3ffd9a648c9f88f6bbca158e43d5077bef * git describe: v6.4.1-22-g3e37df3ffd9a * test details: https://qa-reports.linaro.org/lkft/linux-stable-rc-linux-6.4.y/build/v6.4.1… ## Test Regressions (compared to v6.4.1) ## Metric Regressions (compared to v6.4.1) ## Test Fixes (compared to v6.4.1) ## Metric Fixes (compared to v6.4.1) ## Test result summary total: 173253, pass: 148435, fail: 2272, skip: 22546, xfail: 0 ## Build Summary * arc: 5 total, 5 passed, 0 failed * arm: 145 total, 145 passed, 0 failed * arm64: 54 total, 53 passed, 1 failed * i386: 41 total, 41 passed, 0 failed * mips: 30 total, 28 passed, 2 failed * parisc: 4 total, 4 passed, 0 failed * powerpc: 38 total, 36 passed, 2 failed * riscv: 26 total, 25 passed, 1 failed * s390: 16 total, 14 passed, 2 failed * sh: 14 total, 12 passed, 2 failed * sparc: 8 total, 8 passed, 0 failed * x86_64: 46 total, 46 passed, 0 failed ## Test suites summary * boot * fwts * kselftest-android * kselftest-arm64 * kselftest-breakpoints * kselftest-capabilities * kselftest-cgroup * kselftest-clone3 * kselftest-core * kselftest-cpu-hotplug * kselftest-cpufreq * kselftest-drivers-dma-buf * kselftest-efivarfs * kselftest-exec * kselftest-filesystems * kselftest-filesystems-binderfs * kselftest-firmware * kselftest-fpu * kselftest-ftrace * kselftest-futex * kselftest-gpio * kselftest-intel_pstate * kselftest-ipc * kselftest-ir * kselftest-kcmp * kselftest-kexec * kselftest-kvm * kselftest-lib * kselftest-livepatch * kselftest-membarrier * kselftest-memfd * kselftest-memory-hotplug * kselftest-mincore * kselftest-mount * kselftest-mqueue * kselftest-net * kselftest-net-forwarding * kselftest-net-mptcp * kselftest-netfilter * kselftest-nsfs * kselftest-openat2 * kselftest-pid_namespace * kselftest-pidfd * kselftest-proc * kselftest-pstore * kselftest-ptrace * kselftest-rseq * kselftest-rtc * kselftest-seccomp * kselftest-sigaltstack * kselftest-size * kselftest-splice * kselftest-static_keys * kselftest-sync * kselftest-sysctl * kselftest-tc-testing * kselftest-timens * kselftest-timers * kselftest-tmpfs * kselftest-tpm2 * kselftest-user * kselftest-user_events * kselftest-vDSO * kselftest-watchdog * kselftest-x86 * kselftest-zram * kunit * kvm-unit-tests * libgpiod * libhugetlbfs * log-parser-boot * log-parser-test * ltp-cap_bounds * ltp-commands * ltp-containers * ltp-controllers * ltp-cpuhotplug * ltp-crypto * ltp-cve * ltp-dio * ltp-fcntl-locktests * ltp-filecaps * ltp-fs * ltp-fs_bind * ltp-fs_perms_simple * ltp-fsx * ltp-hugetlb * ltp-io * ltp-ipc * ltp-math * ltp-mm * ltp-nptl * ltp-pty * ltp-sched * ltp-securebits * ltp-smoke * ltp-syscalls * ltp-tracing * network-basic-tests * perf * rcutorture * v4l2-compliance * vdso -- Linaro LKFT https://lkft.linaro.org

2 years, 5 months

4
5
0 0

[PATCH bpf-next v4 0/6] Support defragmenting IPv(4|6) packets in BPF

by Daniel Xu

=== Context === In the context of a middlebox, fragmented packets are tricky to handle. The full 5-tuple of a packet is often only available in the first fragment which makes enforcing consistent policy difficult. There are really only two stateless options, neither of which are very nice: 1. Enforce policy on first fragment and accept all subsequent fragments. This works but may let in certain attacks or allow data exfiltration. 2. Enforce policy on first fragment and drop all subsequent fragments. This does not really work b/c some protocols may rely on fragmentation. For example, DNS may rely on oversized UDP packets for large responses. So stateful tracking is the only sane option. RFC 8900 [0] calls this out as well in section 6.3: Middleboxes [...] should process IP fragments in a manner that is consistent with [RFC0791] and [RFC8200]. In many cases, middleboxes must maintain state in order to achieve this goal. === BPF related bits === Policy has traditionally been enforced from XDP/TC hooks. Both hooks run before kernel reassembly facilities. However, with the new BPF_PROG_TYPE_NETFILTER, we can rather easily hook into existing netfilter reassembly infra. The basic idea is we bump a refcnt on the netfilter defrag module and then run the bpf prog after the defrag module runs. This allows bpf progs to transparently see full, reassembled packets. The nice thing about this is that progs don't have to carry around logic to detect fragments. === Changelog === Changes from v3: * Correctly initialize `addrlen` stack var for recvmsg() Changes from v2: * module_put() if ->enable() fails * Fix CI build errors Changes from v1: * Drop bpf_program__attach_netfilter() patches * static -> static const where appropriate * Fix callback assignment order during registration * Only request_module() if callbacks are missing * Fix retval when modprobe fails in userspace * Fix v6 defrag module name (nf_defrag_ipv6_hooks -> nf_defrag_ipv6) * Simplify priority checking code * Add warning if module doesn't assign callbacks in the future * Take refcnt on module while defrag link is active [0]: https://datatracker.ietf.org/doc/html/rfc8900 Daniel Xu (6): netfilter: defrag: Add glue hooks for enabling/disabling defrag netfilter: bpf: Support BPF_F_NETFILTER_IP_DEFRAG in netfilter link netfilter: bpf: Prevent defrag module unload while link active bpf: selftests: Support not connecting client socket bpf: selftests: Support custom type and proto for client sockets bpf: selftests: Add defrag selftests include/linux/netfilter.h | 15 + include/uapi/linux/bpf.h | 5 + net/ipv4/netfilter/nf_defrag_ipv4.c | 17 +- net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 11 + net/netfilter/core.c | 6 + net/netfilter/nf_bpf_link.c | 150 +++++++++- tools/include/uapi/linux/bpf.h | 5 + tools/testing/selftests/bpf/Makefile | 4 +- .../selftests/bpf/generate_udp_fragments.py | 90 ++++++ .../selftests/bpf/ip_check_defrag_frags.h | 57 ++++ tools/testing/selftests/bpf/network_helpers.c | 26 +- tools/testing/selftests/bpf/network_helpers.h | 3 + .../bpf/prog_tests/ip_check_defrag.c | 283 ++++++++++++++++++ .../selftests/bpf/progs/ip_check_defrag.c | 104 +++++++ 14 files changed, 754 insertions(+), 22 deletions(-) create mode 100755 tools/testing/selftests/bpf/generate_udp_fragments.py create mode 100644 tools/testing/selftests/bpf/ip_check_defrag_frags.h create mode 100644 tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c create mode 100644 tools/testing/selftests/bpf/progs/ip_check_defrag.c -- 2.41.0

2 years, 5 months

1
3
0 0

[for-linus][PATCH 1/5] selftests/user_events: Test struct size match cases

by Steven Rostedt

From: Beau Belgrave <beaub(a)linux.microsoft.com> The self tests for user_events currently does not ensure that the edge case for struct types work properly with size differences. Add cases for mis-matching struct names and sizes to ensure they work properly. Link: https://lkml.kernel.org/r/20230629235049.581-3-beaub@linux.microsoft.com Cc: Shuah Khan <skhan(a)linuxfoundation.org> Cc: linux-kselftest(a)vger.kernel.org Signed-off-by: Beau Belgrave <beaub(a)linux.microsoft.com> Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org> --- tools/testing/selftests/user_events/dyn_test.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/testing/selftests/user_events/dyn_test.c b/tools/testing/selftests/user_events/dyn_test.c index d6979a48478f..91a4444ad42b 100644 --- a/tools/testing/selftests/user_events/dyn_test.c +++ b/tools/testing/selftests/user_events/dyn_test.c @@ -217,6 +217,18 @@ TEST_F(user, matching) { /* Types don't match */ TEST_NMATCH("__test_event u64 a; u64 b", "__test_event u32 a; u32 b"); + + /* Struct name and size matches */ + TEST_MATCH("__test_event struct my_struct a 20", + "__test_event struct my_struct a 20"); + + /* Struct name don't match */ + TEST_NMATCH("__test_event struct my_struct a 20", + "__test_event struct my_struct b 20"); + + /* Struct size don't match */ + TEST_NMATCH("__test_event struct my_struct a 20", + "__test_event struct my_struct a 21"); } int main(int argc, char **argv) -- 2.40.1

2 years, 5 months

1
0
0 0

[PATCH][next] selftests/mm: mkdirty: Fix incorrect position of #endif

by Colin Ian King

The #endif is the wrong side of a } causing a build failure when __NR_userfaultfd is not defined. Fix this by moving the #end to enclose the } Fixes: 9eac40fc0cc7 ("selftests/mm: mkdirty: test behavior of (pte|pmd)_mkdirty on VMAs without write permissions") Signed-off-by: Colin Ian King <colin.i.king(a)gmail.com> --- tools/testing/selftests/mm/mkdirty.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/mkdirty.c b/tools/testing/selftests/mm/mkdirty.c index 6d71d972997b..301abb99e027 100644 --- a/tools/testing/selftests/mm/mkdirty.c +++ b/tools/testing/selftests/mm/mkdirty.c @@ -321,8 +321,8 @@ static void test_uffdio_copy(void) munmap: munmap(dst, pagesize); free(src); -#endif /* __NR_userfaultfd */ } +#endif /* __NR_userfaultfd */ int main(void) { -- 2.39.2

2 years, 5 months

2
1
0 0

[linux-next:master] BUILD REGRESSION 40b055fe7f276cf2c1da47316c52f2ff9255a68a

by kernel test robot

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master branch HEAD: 40b055fe7f276cf2c1da47316c52f2ff9255a68a Add linux-next specific files for 20230712 Error/Warning reports: https://lore.kernel.org/oe-kbuild-all/202306122223.HHER4zOo-lkp@intel.com https://lore.kernel.org/oe-kbuild-all/202306210212.N0BipYQd-lkp@intel.com Error/Warning: (recently discovered and may have been fixed) arch/parisc/kernel/pdt.c:67:6: warning: no previous prototype for 'arch_report_meminfo' [-Wmissing-prototypes] kernel/bpf/verifier.c:3959:12: warning: stack frame size (2064) exceeds limit (2048) in '__mark_chain_precision' [-Wframe-larger-than] lib/kunit/executor_test.c:138:4: warning: cast from 'void (*)(const void *)' to 'kunit_action_t *' (aka 'void (*)(void *)') converts to incompatible function type [-Wcast-function-type-strict] lib/kunit/test.c:775:38: warning: cast from 'void (*)(const void *)' to 'kunit_action_t *' (aka 'void (*)(void *)') converts to incompatible function type [-Wcast-function-type-strict] Unverified Error/Warning (likely false positive, please contact us if interested): drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c:98 mlx5_devcom_register_device() error: uninitialized symbol 'tmp_dev'. kernel/trace/trace_functions_graph.c:1012 print_graph_return() warn: bitwise AND condition is false here kernel/trace/trace_functions_graph.c:726 print_graph_entry_leaf() warn: bitwise AND condition is false here net/wireless/scan.c:373 cfg80211_gen_new_ie() warn: potential spectre issue 'sub->data' [r] net/wireless/scan.c:397 cfg80211_gen_new_ie() warn: possible spectre second half. 'ext_id' {standard input}: Error: local label `"2" (instance number 9 of a fb label)' is not defined Error/Warning ids grouped by kconfigs: gcc_recent_errors |-- microblaze-randconfig-m041-20230710 | |-- drivers-net-ethernet-mellanox-mlx5-core-lib-devcom.c-mlx5_devcom_register_device()-error:uninitialized-symbol-tmp_dev-. | |-- net-wireless-scan.c-cfg80211_gen_new_ie()-warn:possible-spectre-second-half.-ext_id | `-- net-wireless-scan.c-cfg80211_gen_new_ie()-warn:potential-spectre-issue-sub-data-r |-- mips-randconfig-m031-20230710 | |-- kernel-trace-trace_functions_graph.c-print_graph_entry_leaf()-warn:bitwise-AND-condition-is-false-here | `-- kernel-trace-trace_functions_graph.c-print_graph_return()-warn:bitwise-AND-condition-is-false-here |-- parisc-randconfig-r082-20230710 | `-- arch-parisc-kernel-pdt.c:warning:no-previous-prototype-for-arch_report_meminfo `-- sh-allmodconfig `-- standard-input:Error:local-label-(instance-number-of-a-fb-label)-is-not-defined clang_recent_errors |-- hexagon-randconfig-r013-20230712 | |-- lib-kunit-executor_test.c:warning:cast-from-void-(-)(const-void-)-to-kunit_action_t-(aka-void-(-)(void-)-)-converts-to-incompatible-function-type | `-- lib-kunit-test.c:warning:cast-from-void-(-)(const-void-)-to-kunit_action_t-(aka-void-(-)(void-)-)-converts-to-incompatible-function-type |-- riscv-randconfig-r023-20230712 | `-- kernel-bpf-verifier.c:warning:stack-frame-size-()-exceeds-limit-()-in-__mark_chain_precision `-- s390-randconfig-r044-20230712 |-- lib-kunit-executor_test.c:warning:cast-from-void-(-)(const-void-)-to-kunit_action_t-(aka-void-(-)(void-)-)-converts-to-incompatible-function-type `-- lib-kunit-test.c:warning:cast-from-void-(-)(const-void-)-to-kunit_action_t-(aka-void-(-)(void-)-)-converts-to-incompatible-function-type elapsed time: 798m configs tested: 123 configs skipped: 8 tested configs: alpha allyesconfig gcc alpha defconfig gcc alpha randconfig-r003-20230712 gcc alpha randconfig-r015-20230712 gcc alpha randconfig-r025-20230712 gcc arc allyesconfig gcc arc defconfig gcc arc haps_hs_smp_defconfig gcc arc randconfig-r043-20230712 gcc arc vdk_hs38_smp_defconfig gcc arm allmodconfig gcc arm allyesconfig gcc arm am200epdkit_defconfig clang arm davinci_all_defconfig clang arm defconfig gcc arm milbeaut_m10v_defconfig clang arm multi_v7_defconfig gcc arm randconfig-r046-20230712 gcc arm rpc_defconfig gcc arm spear13xx_defconfig clang arm64 allyesconfig gcc arm64 defconfig gcc csky defconfig gcc csky randconfig-r034-20230712 gcc hexagon randconfig-r013-20230712 clang hexagon randconfig-r035-20230712 clang hexagon randconfig-r041-20230712 clang hexagon randconfig-r045-20230712 clang i386 allyesconfig gcc i386 buildonly-randconfig-r004-20230712 gcc i386 buildonly-randconfig-r005-20230712 gcc i386 buildonly-randconfig-r006-20230712 gcc i386 debian-10.3 gcc i386 defconfig gcc i386 randconfig-i001-20230712 gcc i386 randconfig-i002-20230712 gcc i386 randconfig-i003-20230712 gcc i386 randconfig-i004-20230712 gcc i386 randconfig-i005-20230712 gcc i386 randconfig-i006-20230712 gcc i386 randconfig-i011-20230712 clang i386 randconfig-i012-20230712 clang i386 randconfig-i013-20230712 clang i386 randconfig-i014-20230712 clang i386 randconfig-i015-20230712 clang i386 randconfig-i016-20230712 clang loongarch allmodconfig gcc loongarch allnoconfig gcc loongarch defconfig gcc m68k allmodconfig gcc m68k allyesconfig gcc m68k defconfig gcc microblaze randconfig-r021-20230712 gcc mips allmodconfig gcc mips allyesconfig gcc mips randconfig-r031-20230712 clang mips randconfig-r032-20230712 clang nios2 alldefconfig gcc nios2 defconfig gcc nios2 randconfig-r001-20230712 gcc parisc allyesconfig gcc parisc defconfig gcc parisc randconfig-r036-20230712 gcc parisc64 defconfig gcc powerpc allmodconfig gcc powerpc allnoconfig gcc powerpc bamboo_defconfig gcc powerpc cm5200_defconfig gcc powerpc powernv_defconfig clang powerpc storcenter_defconfig gcc riscv allmodconfig gcc riscv allnoconfig gcc riscv allyesconfig gcc riscv defconfig gcc riscv randconfig-r023-20230712 clang riscv randconfig-r042-20230712 clang riscv rv32_defconfig gcc s390 allmodconfig gcc s390 allyesconfig gcc s390 defconfig gcc s390 randconfig-r044-20230712 clang sh allmodconfig gcc sh ecovec24-romimage_defconfig gcc sh landisk_defconfig gcc sh randconfig-r033-20230712 gcc sh sh7757lcr_defconfig gcc sh urquell_defconfig gcc sparc allyesconfig gcc sparc defconfig gcc sparc64 alldefconfig gcc sparc64 randconfig-r011-20230712 gcc sparc64 randconfig-r014-20230712 gcc um allmodconfig clang um allnoconfig clang um allyesconfig clang um defconfig gcc um i386_defconfig gcc um randconfig-r012-20230712 gcc um x86_64_defconfig gcc x86_64 allyesconfig gcc x86_64 buildonly-randconfig-r001-20230712 gcc x86_64 buildonly-randconfig-r002-20230712 gcc x86_64 buildonly-randconfig-r003-20230712 gcc x86_64 defconfig gcc x86_64 kexec gcc x86_64 randconfig-r026-20230712 clang x86_64 randconfig-x001-20230712 clang x86_64 randconfig-x002-20230712 clang x86_64 randconfig-x003-20230712 clang x86_64 randconfig-x004-20230712 clang x86_64 randconfig-x005-20230712 clang x86_64 randconfig-x006-20230712 clang x86_64 randconfig-x011-20230712 gcc x86_64 randconfig-x012-20230712 gcc x86_64 randconfig-x013-20230712 gcc x86_64 randconfig-x014-20230712 gcc x86_64 randconfig-x015-20230712 gcc x86_64 randconfig-x016-20230712 gcc x86_64 rhel-8.3-rust clang x86_64 rhel-8.3 gcc xtensa defconfig gcc xtensa randconfig-r006-20230712 gcc xtensa randconfig-r016-20230712 gcc -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki

2 years, 5 months

1
0
0 0

[PATCH v2] selftests/zram: avoid subshells and bc for ratio calculations

by David Disseldorp

Awk is already called for /sys/block/zram#/mm_stat parsing, so use it to also perform the floating point capacity vs consumption ratio calculations. The test output is unchanged. This allows bc to be dropped as a dependency for the zram selftests. The documented free dependency can also be removed following d18da7ec37195 ("selftests/zram01.sh: Fix compression ratio calculation") Signed-off-by: David Disseldorp <ddiss(a)suse.de> --- tools/testing/selftests/zram/README | 2 -- tools/testing/selftests/zram/zram01.sh | 18 ++++++++---------- 2 files changed, 8 insertions(+), 12 deletions(-) v2: drop unused dependencies from selftests/zram/README diff --git a/tools/testing/selftests/zram/README b/tools/testing/selftests/zram/README index 110b34834a6fa..510ca5a1087f5 100644 --- a/tools/testing/selftests/zram/README +++ b/tools/testing/selftests/zram/README @@ -27,9 +27,7 @@ zram01.sh: creates general purpose ram disks with ext4 filesystems zram02.sh: creates block device for swap Commands required for testing: - - bc - dd - - free - awk - mkswap - swapon diff --git a/tools/testing/selftests/zram/zram01.sh b/tools/testing/selftests/zram/zram01.sh index 8f4affe34f3e4..df1b1d4158989 100755 --- a/tools/testing/selftests/zram/zram01.sh +++ b/tools/testing/selftests/zram/zram01.sh @@ -33,7 +33,7 @@ zram_algs="lzo" zram_fill_fs() { - for i in $(seq $dev_start $dev_end); do + for ((i = $dev_start; i <= $dev_end && !ERR_CODE; i++)); do echo "fill zram$i..." local b=0 while [ true ]; do @@ -44,15 +44,13 @@ zram_fill_fs() done echo "zram$i can be filled with '$b' KB" - local mem_used_total=`awk '{print $3}' "/sys/block/zram$i/mm_stat"` - local v=$((100 * 1024 * $b / $mem_used_total)) - if [ "$v" -lt 100 ]; then - echo "FAIL compression ratio: 0.$v:1" - ERR_CODE=-1 - return - fi - - echo "zram compression ratio: $(echo "scale=2; $v / 100 " | bc):1: OK" + awk -v b="$b" '{ v = (100 * 1024 * b / $3) } END { + if (v < 100) { + printf "FAIL compression ratio: 0.%u:1\n", v + exit 1 + } + printf "zram compression ratio: %.2f:1: OK\n", v / 100 + }' "/sys/block/zram$i/mm_stat" || ERR_CODE=-1 done } -- 2.35.3

2 years, 5 months

1
0
0 0

Słowa kluczowe do wypozycjonowania

by Adam Charachuta

Dzień dobry, zapoznałem się z Państwa ofertą i z przyjemnością przyznaję, że przyciąga uwagę i zachęca do dalszych rozmów. Pomyślałem, że może mógłbym mieć swój wkład w Państwa rozwój i pomóc dotrzeć z tą ofertą do większego grona odbiorców. Pozycjonuję strony www, dzięki czemu generują świetny ruch w sieci. Możemy porozmawiać w najbliższym czasie? Pozdrawiam Adam Charachuta

2 years, 5 months

1
0
0 0

[PATCH v24 0/5] Implement IOCTL to get and optionally clear info about PTEs

by Muhammad Usama Anjum

*Changes in v24*: - Rebase on top of next-20230710 - Place WP markers in case of hole as well *Changes in v23*: - Set vec_buf_index in loop only when vec_buf_index is set - Return -EFAULT instead of -EINVAL if vec is NULL - Correctly return the walk ending address to the page granularity *Changes in v22*: - Interface change: - Replace [start start + len) with [start, end) - Return the ending address of the address walk in start *Changes in v21*: - Abort walk instead of returning error if WP is to be performed on partial hugetlb *Changes in v20* - Correct PAGE_IS_FILE and add PAGE_IS_PFNZERO *Changes in v19* - Minor changes and interface updates *Changes in v18* - Rebase on top of next-20230613 - Minor updates *Changes in v17* - Rebase on top of next-20230606 - Minor improvements in PAGEMAP_SCAN IOCTL patch *Changes in v16* - Fix a corner case - Add exclusive PM_SCAN_OP_WP back *Changes in v15* - Build fix (Add missed build fix in RESEND) *Changes in v14* - Fix build error caused by #ifdef added at last minute in some configs *Changes in v13* - Rebase on top of next-20230414 - Give-up on using uffd_wp_range() and write new helpers, flush tlb only once *Changes in v12* - Update and other memory types to UFFD_FEATURE_WP_ASYNC - Rebaase on top of next-20230406 - Review updates *Changes in v11* - Rebase on top of next-20230307 - Base patches on UFFD_FEATURE_WP_UNPOPULATED - Do a lot of cosmetic changes and review updates - Remove ENGAGE_WP + !GET operation as it can be performed with UFFDIO_WRITEPROTECT *Changes in v10* - Add specific condition to return error if hugetlb is used with wp async - Move changes in tools/include/uapi/linux/fs.h to separate patch - Add documentation *Changes in v9:* - Correct fault resolution for userfaultfd wp async - Fix build warnings and errors which were happening on some configs - Simplify pagemap ioctl's code *Changes in v8:* - Update uffd async wp implementation - Improve PAGEMAP_IOCTL implementation *Changes in v7:* - Add uffd wp async - Update the IOCTL to use uffd under the hood instead of soft-dirty flags *Motivation* The real motivation for adding PAGEMAP_SCAN IOCTL is to emulate Windows GetWriteWatch() syscall [1]. The GetWriteWatch{} retrieves the addresses of the pages that are written to in a region of virtual memory. This syscall is used in Windows applications and games etc. This syscall is being emulated in pretty slow manner in userspace. Our purpose is to enhance the kernel such that we translate it efficiently in a better way. Currently some out of tree hack patches are being used to efficiently emulate it in some kernels. We intend to replace those with these patches. So the whole gaming on Linux can effectively get benefit from this. It means there would be tons of users of this code. CRIU use case [2] was mentioned by Andrei and Danylo: > Use cases for migrating sparse VMAs are binaries sanitized with ASAN, > MSAN or TSAN [3]. All of these sanitizers produce sparse mappings of > shadow memory [4]. Being able to migrate such binaries allows to highly > reduce the amount of work needed to identify and fix post-migration > crashes, which happen constantly. Andrei's defines the following uses of this code: * it is more granular and allows us to track changed pages more effectively. The current interface can clear dirty bits for the entire process only. In addition, reading info about pages is a separate operation. It means we must freeze the process to read information about all its pages, reset dirty bits, only then we can start dumping pages. The information about pages becomes more and more outdated, while we are processing pages. The new interface solves both these downsides. First, it allows us to read pte bits and clear the soft-dirty bit atomically. It means that CRIU will not need to freeze processes to pre-dump their memory. Second, it clears soft-dirty bits for a specified region of memory. It means CRIU will have actual info about pages to the moment of dumping them. * The new interface has to be much faster because basic page filtering is happening in the kernel. With the old interface, we have to read pagemap for each page. *Implementation Evolution (Short Summary)* From the definition of GetWriteWatch(), we feel like kernel's soft-dirty feature can be used under the hood with some additions like: * reset soft-dirty flag for only a specific region of memory instead of clearing the flag for the entire process * get and clear soft-dirty flag for a specific region atomically So we decided to use ioctl on pagemap file to read or/and reset soft-dirty flag. But using soft-dirty flag, sometimes we get extra pages which weren't even written. They had become soft-dirty because of VMA merging and VM_SOFTDIRTY flag. This breaks the definition of GetWriteWatch(). We were able to by-pass this short coming by ignoring VM_SOFTDIRTY until David reported that mprotect etc messes up the soft-dirty flag while ignoring VM_SOFTDIRTY [5]. This wasn't happening until [6] got introduced. We discussed if we can revert these patches. But we could not reach to any conclusion. So at this point, I made couple of tries to solve this whole VM_SOFTDIRTY issue by correcting the soft-dirty implementation: * [7] Correct the bug fixed wrongly back in 2014. It had potential to cause regression. We left it behind. * [8] Keep a list of soft-dirty part of a VMA across splits and merges. I got the reply don't increase the size of the VMA by 8 bytes. At this point, we left soft-dirty considering it is too much delicate and userfaultfd [9] seemed like the only way forward. From there onward, we have been basing soft-dirty emulation on userfaultfd wp feature where kernel resolves the faults itself when WP_ASYNC feature is used. It was straight forward to add WP_ASYNC feature in userfautlfd. Now we get only those pages dirty or written-to which are really written in reality. (PS There is another WP_UNPOPULATED userfautfd feature is required which is needed to avoid pre-faulting memory before write-protecting [9].) All the different masks were added on the request of CRIU devs to create interface more generic and better. [1] https://learn.microsoft.com/en-us/windows/win32/api/memoryapi/nf-memoryapi-… [2] https://lore.kernel.org/all/20221014134802.1361436-1-mdanylo@google.com [3] https://github.com/google/sanitizers [4] https://github.com/google/sanitizers/wiki/AddressSanitizerAlgorithm#64-bit [5] https://lore.kernel.org/all/bfcae708-db21-04b4-0bbe-712badd03071@redhat.com [6] https://lore.kernel.org/all/20220725142048.30450-1-peterx@redhat.com/ [7] https://lore.kernel.org/all/20221122115007.2787017-1-usama.anjum@collabora.… [8] https://lore.kernel.org/all/20221220162606.1595355-1-usama.anjum@collabora.… [9] https://lore.kernel.org/all/20230306213925.617814-1-peterx@redhat.com [10] https://lore.kernel.org/all/20230125144529.1630917-1-mdanylo@google.com * Original Cover letter from v8* Hello, Note: Soft-dirty pages and pages which have been written-to are synonyms. As kernel already has soft-dirty feature inside which we have given up to use, we are using written-to terminology while using UFFD async WP under the hood. This IOCTL, PAGEMAP_SCAN on pagemap file can be used to get and/or clear the info about page table entries. The following operations are supported in this ioctl: - Get the information if the pages have been written-to (PAGE_IS_WRITTEN), file mapped (PAGE_IS_FILE), present (PAGE_IS_PRESENT) or swapped (PAGE_IS_SWAPPED). - Write-protect the pages (PAGEMAP_WP_ENGAGE) to start finding which pages have been written-to. - Find pages which have been written-to and write protect the pages (atomic PAGE_IS_WRITTEN + PAGEMAP_WP_ENGAGE) It is possible to find and clear soft-dirty pages entirely in userspace. But it isn't efficient: - The mprotect and SIGSEGV handler for bookkeeping - The userfaultfd wp (synchronous) with the handler for bookkeeping Some benchmarks can be seen here[1]. This series adds features that weren't present earlier: - There is no atomic get soft-dirty/Written-to status and clear present in the kernel. - The pages which have been written-to can not be found in accurate way. (Kernel's soft-dirty PTE bit + sof_dirty VMA bit shows more soft-dirty pages than there actually are.) Historically, soft-dirty PTE bit tracking has been used in the CRIU project. The procfs interface is enough for finding the soft-dirty bit status and clearing the soft-dirty bit of all the pages of a process. We have the use case where we need to track the soft-dirty PTE bit for only specific pages on-demand. We need this tracking and clear mechanism of a region of memory while the process is running to emulate the getWriteWatch() syscall of Windows. *(Moved to using UFFD instead of soft-dirtyi feature to find pages which have been written-to from v7 patch series)*: Stop using the soft-dirty flags for finding which pages have been written to. It is too delicate and wrong as it shows more soft-dirty pages than the actual soft-dirty pages. There is no interest in correcting it [2][3] as this is how the feature was written years ago. It shouldn't be updated to changed behaviour. Peter Xu has suggested using the async version of the UFFD WP [4] as it is based inherently on the PTEs. So in this patch series, I've added a new mode to the UFFD which is asynchronous version of the write protect. When this variant of the UFFD WP is used, the page faults are resolved automatically by the kernel. The pages which have been written-to can be found by reading pagemap file (!PM_UFFD_WP). This feature can be used successfully to find which pages have been written to from the time the pages were write protected. This works just like the soft-dirty flag without showing any extra pages which aren't soft-dirty in reality. The information related to pages if the page is file mapped, present and swapped is required for the CRIU project [5][6]. The addition of the required mask, any mask, excluded mask and return masks are also required for the CRIU project [5]. The IOCTL returns the addresses of the pages which match the specific masks. The page addresses are returned in struct page_region in a compact form. The max_pages is needed to support a use case where user only wants to get a specific number of pages. So there is no need to find all the pages of interest in the range when max_pages is specified. The IOCTL returns when the maximum number of the pages are found. The max_pages is optional. If max_pages is specified, it must be equal or greater than the vec_size. This restriction is needed to handle worse case when one page_region only contains info of one page and it cannot be compacted. This is needed to emulate the Windows getWriteWatch() syscall. The patch series include the detailed selftest which can be used as an example for the uffd async wp test and PAGEMAP_IOCTL. It shows the interface usages as well. [1] https://lore.kernel.org/lkml/54d4c322-cd6e-eefd-b161-2af2b56aae24@collabora… [2] https://lore.kernel.org/all/20221220162606.1595355-1-usama.anjum@collabora.… [3] https://lore.kernel.org/all/20221122115007.2787017-1-usama.anjum@collabora.… [4] https://lore.kernel.org/all/Y6Hc2d+7eTKs7AiH@x1n [5] https://lore.kernel.org/all/YyiDg79flhWoMDZB@gmail.com/ [6] https://lore.kernel.org/all/20221014134802.1361436-1-mdanylo@google.com/ Regards, Muhammad Usama Anjum Muhammad Usama Anjum (4): fs/proc/task_mmu: Implement IOCTL to get and optionally clear info about PTEs tools headers UAPI: Update linux/fs.h with the kernel sources mm/pagemap: add documentation of PAGEMAP_SCAN IOCTL selftests: mm: add pagemap ioctl tests Peter Xu (1): userfaultfd: UFFD_FEATURE_WP_ASYNC Documentation/admin-guide/mm/pagemap.rst | 58 + Documentation/admin-guide/mm/userfaultfd.rst | 35 + fs/proc/task_mmu.c | 583 +++++++ fs/userfaultfd.c | 26 +- include/linux/hugetlb.h | 1 + include/linux/userfaultfd_k.h | 21 +- include/uapi/linux/fs.h | 55 + include/uapi/linux/userfaultfd.h | 9 +- mm/hugetlb.c | 34 +- mm/memory.c | 27 +- tools/include/uapi/linux/fs.h | 55 + tools/testing/selftests/mm/.gitignore | 2 + tools/testing/selftests/mm/Makefile | 3 +- tools/testing/selftests/mm/config | 1 + tools/testing/selftests/mm/pagemap_ioctl.c | 1464 ++++++++++++++++++ tools/testing/selftests/mm/run_vmtests.sh | 4 + 16 files changed, 2354 insertions(+), 24 deletions(-) create mode 100644 tools/testing/selftests/mm/pagemap_ioctl.c mode change 100644 => 100755 tools/testing/selftests/mm/run_vmtests.sh -- 2.39.2

2 years, 5 months

2
7
0 0

[PATCH v2 00/12] tools/nolibc: shrink arch support

by Zhangjin Wu

Hi, Willy This is v2 of the "tools/nolibc: shrink arch support" [1]. This v2 has no core code logic change, but applies some suggestions from Willy and Thomas, one is using post-whitespaces instead of post-tab, another is restructuring the arch support directory and files [2]. Like musl, this v2 creates <ARCH> directory for every arch and splits the old arch-<ARCH>.h to <ARCH>/{crt.h, sys.h} and at the same time, splits the old arch.h to crt_arch.h and sys_arch.h. at last, only need to include crt_arch.h in crt.h and sys_arch.h in sys.h respectively, and no longer need to include arch.h in the other common headers: crt.h <-- crt_arch.h <-- <ARCH>/crt.h sys.h <-- sys_arch.h <-- <ARCH>/sys.h It is based on the 20230705-nolibc-series2 branch of nolibc repo [3]. It should be applied after the v6 __sysret helper series [4] and the v4 min config support series [5]. Here is the test report for all of the supported architectures: arch/board | result ------------|------------ arm/vexpress-a9 | 142 test(s) passed, 1 skipped, 0 failed. arm/virt | 142 test(s) passed, 1 skipped, 0 failed. aarch64/virt | 142 test(s) passed, 1 skipped, 0 failed. ppc/g3beige | not supported ppc/ppce500 | not supported i386/pc | 142 test(s) passed, 1 skipped, 0 failed. x86_64/pc | 142 test(s) passed, 1 skipped, 0 failed. mipsel/malta | 142 test(s) passed, 1 skipped, 0 failed. loongarch64/virt | 142 test(s) passed, 1 skipped, 0 failed. riscv64/virt | 142 test(s) passed, 1 skipped, 0 failed. riscv32/virt | 0 test(s) passed, 0 skipped, 0 failed. s390x/s390-ccw-virtio | 142 test(s) passed, 1 skipped, 0 failed. Changes from v1 --> v2: * tools/nolibc: rename arch-<ARCH>.h to <ARCH>/arch.h tools/nolibc: split arch.h to crt.h and sys.h Restruct the arch support directory and files. Fix up the errors reported by scripts/checkpatch.pl. * tools/nolibc: sys.h: remove the old sys_stat support Rebase on the new arch support directory and files. * tools/nolibc: crt.h: add _start_c Move #include "compiler.h" in the common crt.h too. * tools/nolibc: arm/crt.h: shrink _start with _start_c tools/nolibc: aarch64/crt.h: shrink _start with _start_c tools/nolibc: i386/crt.h: shrink _start with _start_c tools/nolibc: x86_64/crt.h: shrink _start with _start_c tools/nolibc: mips/crt.h: shrink _start with _start_c tools/nolibc: loongarch/crt.h: shrink _start with _start_c tools/nolibc: riscv/crt.h: shrink _start with _start_c tools/nolibc: s390/crt.h: shrink _start with _start_c Rebase on the new arch support directory and files. Use post-whitespaces instead of post-tab. Best regards, Zhangjin --- [1]: https://lore.kernel.org/lkml/cover.1687976753.git.falcon@tinylab.org/ [2]: https://lore.kernel.org/lkml/20230703145500.500460-1-falcon@tinylab.org/ [3]: https://git.kernel.org/pub/scm/linux/kernel/git/wtarreau/nolibc.git [4]: https://lore.kernel.org/lkml/cover.1688739492.git.falcon@tinylab.org/ [5]: https://lore.kernel.org/lkml/cover.1688750763.git.falcon@tinylab.org/ Zhangjin Wu (12): tools/nolibc: rename arch-<ARCH>.h to <ARCH>/arch.h tools/nolibc: split arch.h to crt.h and sys.h tools/nolibc: sys.h: remove the old sys_stat support tools/nolibc: crt.h: add _start_c tools/nolibc: arm/crt.h: shrink _start with _start_c tools/nolibc: aarch64/crt.h: shrink _start with _start_c tools/nolibc: i386/crt.h: shrink _start with _start_c tools/nolibc: x86_64/crt.h: shrink _start with _start_c tools/nolibc: mips/crt.h: shrink _start with _start_c tools/nolibc: loongarch/crt.h: shrink _start with _start_c tools/nolibc: riscv/crt.h: shrink _start with _start_c tools/nolibc: s390/crt.h: shrink _start with _start_c tools/include/nolibc/Makefile | 36 ++++--- tools/include/nolibc/aarch64/crt.h | 24 +++++ .../nolibc/{arch-aarch64.h => aarch64/sys.h} | 68 +------------ tools/include/nolibc/arch.h | 36 ------- tools/include/nolibc/arm/crt.h | 25 +++++ .../include/nolibc/{arch-arm.h => arm/sys.h} | 96 +------------------ tools/include/nolibc/crt.h | 60 ++++++++++++ tools/include/nolibc/crt_arch.h | 32 +++++++ tools/include/nolibc/i386/crt.h | 33 +++++++ .../nolibc/{arch-i386.h => i386/sys.h} | 77 +-------------- tools/include/nolibc/loongarch/crt.h | 30 ++++++ .../{arch-loongarch.h => loongarch/sys.h} | 64 +------------ tools/include/nolibc/mips/crt.h | 32 +++++++ .../nolibc/{arch-mips.h => mips/sys.h} | 87 +---------------- tools/include/nolibc/nolibc.h | 2 +- tools/include/nolibc/riscv/crt.h | 28 ++++++ .../nolibc/{arch-riscv.h => riscv/sys.h} | 83 +--------------- tools/include/nolibc/s390/crt.h | 21 ++++ .../nolibc/{arch-s390.h => s390/sys.h} | 74 +------------- tools/include/nolibc/signal.h | 1 - tools/include/nolibc/stdio.h | 1 - tools/include/nolibc/stdlib.h | 2 +- tools/include/nolibc/sys.h | 65 +++---------- tools/include/nolibc/sys_arch.h | 32 +++++++ tools/include/nolibc/time.h | 1 - tools/include/nolibc/types.h | 4 +- tools/include/nolibc/unistd.h | 1 - tools/include/nolibc/x86_64/crt.h | 33 +++++++ .../nolibc/{arch-x86_64.h => x86_64/sys.h} | 74 +------------- 29 files changed, 421 insertions(+), 701 deletions(-) create mode 100644 tools/include/nolibc/aarch64/crt.h rename tools/include/nolibc/{arch-aarch64.h => aarch64/sys.h} (76%) delete mode 100644 tools/include/nolibc/arch.h create mode 100644 tools/include/nolibc/arm/crt.h rename tools/include/nolibc/{arch-arm.h => arm/sys.h} (74%) create mode 100644 tools/include/nolibc/crt.h create mode 100644 tools/include/nolibc/crt_arch.h create mode 100644 tools/include/nolibc/i386/crt.h rename tools/include/nolibc/{arch-i386.h => i386/sys.h} (73%) create mode 100644 tools/include/nolibc/loongarch/crt.h rename tools/include/nolibc/{arch-loongarch.h => loongarch/sys.h} (73%) create mode 100644 tools/include/nolibc/mips/crt.h rename tools/include/nolibc/{arch-mips.h => mips/sys.h} (74%) create mode 100644 tools/include/nolibc/riscv/crt.h rename tools/include/nolibc/{arch-riscv.h => riscv/sys.h} (70%) create mode 100644 tools/include/nolibc/s390/crt.h rename tools/include/nolibc/{arch-s390.h => s390/sys.h} (68%) create mode 100644 tools/include/nolibc/sys_arch.h create mode 100644 tools/include/nolibc/x86_64/crt.h rename tools/include/nolibc/{arch-x86_64.h => x86_64/sys.h} (76%) -- 2.25.1

2 years, 5 months

3
23
0 0

[PATCH 0/4] selftests/nolibc: simplify conditions and testcases

by Thomas Weißschuh

A few cleanups to the existing test logic. Signed-off-by: Thomas Weißschuh <linux(a)weissschuh.net> --- Thomas Weißschuh (4): selftests/nolibc: make evaluation of test conditions selftests/nolibc: simplify status printing selftests/nolibc: simplify status argument selftests/nolibc: avoid gaps in test numbers tools/testing/selftests/nolibc/nolibc-test.c | 201 +++++++++++---------------- 1 file changed, 85 insertions(+), 116 deletions(-) --- base-commit: 078cda365b3f47f61047a08230925a1478e9a1c8 change-id: 20230711-nolibc-sizeof-long-gaps-0f28cba7ee4d Best regards, -- Thomas Weißschuh <linux(a)weissschuh.net>

2 years, 5 months

2
5
0 0

[PATCH bpf-next v5 0/7] Add SO_REUSEPORT support for TC bpf_sk_assign

by Lorenz Bauer

We want to replace iptables TPROXY with a BPF program at TC ingress. To make this work in all cases we need to assign a SO_REUSEPORT socket to an skb, which is currently prohibited. This series adds support for such sockets to bpf_sk_assing. I did some refactoring to cut down on the amount of duplicate code. The key to this is to use INDIRECT_CALL in the reuseport helpers. To show that this approach is not just beneficial to TC sk_assign I removed duplicate code for bpf_sk_lookup as well. Joint work with Daniel Borkmann. Signed-off-by: Lorenz Bauer <lmb(a)isovalent.com> --- Changes in v5: - Drop reuse_sk == sk check in inet[6]_steal_stock (Kuniyuki) - Link to v4: https://lore.kernel.org/r/20230613-so-reuseport-v4-0-4ece76708bba@isovalent… Changes in v4: - WARN_ON_ONCE if reuseport socket is refcounted (Kuniyuki) - Use inet[6]_ehashfn_t to shorten function declarations (Kuniyuki) - Shuffle documentation patch around (Kuniyuki) - Update commit message to explain why IPv6 needs EXPORT_SYMBOL - Link to v3: https://lore.kernel.org/r/20230613-so-reuseport-v3-0-907b4cbb7b99@isovalent… Changes in v3: - Fix warning re udp_ehashfn and udp6_ehashfn (Simon) - Return higher scoring connected UDP reuseport sockets (Kuniyuki) - Fix ipv6 module builds - Link to v2: https://lore.kernel.org/r/20230613-so-reuseport-v2-0-b7c69a342613@isovalent… Changes in v2: - Correct commit abbrev length (Kuniyuki) - Reduce duplication (Kuniyuki) - Add checks on sk_state (Martin) - Split exporting inet[6]_lookup_reuseport into separate patch (Eric) --- Daniel Borkmann (1): selftests/bpf: Test that SO_REUSEPORT can be used with sk_assign helper Lorenz Bauer (6): udp: re-score reuseport groups when connected sockets are present net: export inet_lookup_reuseport and inet6_lookup_reuseport net: remove duplicate reuseport_lookup functions net: document inet[6]_lookup_reuseport sk_state requirements net: remove duplicate sk_lookup helpers bpf, net: Support SO_REUSEPORT sockets with bpf_sk_assign include/net/inet6_hashtables.h | 81 ++++++++- include/net/inet_hashtables.h | 74 +++++++- include/net/sock.h | 7 +- include/uapi/linux/bpf.h | 3 - net/core/filter.c | 2 - net/ipv4/inet_hashtables.c | 68 ++++--- net/ipv4/udp.c | 88 ++++----- net/ipv6/inet6_hashtables.c | 71 +++++--- net/ipv6/udp.c | 98 ++++------ tools/include/uapi/linux/bpf.h | 3 - tools/testing/selftests/bpf/network_helpers.c | 3 + .../selftests/bpf/prog_tests/assign_reuse.c | 197 +++++++++++++++++++++ .../selftests/bpf/progs/test_assign_reuse.c | 142 +++++++++++++++ 13 files changed, 658 insertions(+), 179 deletions(-) --- base-commit: c20f9cef725bc6b19efe372696e8000fb5af0d46 change-id: 20230613-so-reuseport-e92c526173ee Best regards, -- Lorenz Bauer <lmb(a)isovalent.com>

2 years, 5 months

2
9
0 0

[PATCH] selftests/arm64: fix build failure during the "emit_tests" step

by John Hubbard

The build failure reported in [1] occurred because commit 9fc96c7c19df ("selftests: error out if kernel header files are not yet built") added a new "kernel_header_files" dependency to "all", and that triggered another, pre-existing problem. Specifically, the arm64 selftests override the emit_tests target, and that override improperly declares itself to depend upon the "all" target. This is a problem because the "emit_tests" target in lib.mk was not intended to be overridden. emit_tests is a very simple, sequential build target that was originally invoked from the "install" target, which in turn, depends upon "all". That approach worked for years. But with 9fc96c7c19df in place, emit_tests failed, because it does not set up all of the elaborate things that "install" does. And that caused the new "kernel_header_files" target (which depends upon $(KBUILD_OUTPUT) being correct) to fail. Some detail: The "all" target is .PHONY. Therefore, each target that depends on "all" will cause it to be invoked again, and because dependencies are managed quite loosely in the selftests Makefiles, many things will run, even "all" is invoked several times in immediate succession. So this is not a "real" failure, as far as build steps go: everything gets built, but "all" reports a problem when invoked a second time from a bad environment. To fix this, simply remove the unnecessary "all" dependency from the overridden emit_tests target. The dependency is still effectively honored, because again, invocation is via "install", which also depends upon "all". An alternative approach would be to harden the emit_tests target so that it can depend upon "all", but that's a lot more complicated and hard to get right, and doesn't seem worth it, especially given that emit_tests should probably not be overridden at all. [1] https://lore.kernel.org/20230710-kselftest-fix-arm64-v1-1-48e872844f25@kern… Fixes: 9fc96c7c19df ("selftests: error out if kernel header files are not yet built") Reported-by: Mark Brown <broonie(a)kernel.org> Signed-off-by: John Hubbard <jhubbard(a)nvidia.com> --- tools/testing/selftests/arm64/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile index 9460cbe81bcc..ace8b67fb22d 100644 --- a/tools/testing/selftests/arm64/Makefile +++ b/tools/testing/selftests/arm64/Makefile @@ -42,7 +42,7 @@ run_tests: all done # Avoid any output on non arm64 on emit_tests -emit_tests: all +emit_tests: @for DIR in $(ARM64_SUBTARGETS); do \ BUILD_TARGET=$(OUTPUT)/$$DIR; \ make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \ base-commit: d5fe758c21f4770763ae4c05580be239be18947d -- 2.41.0

2 years, 6 months

2
2
0 0

[linux-next:master] BUILD REGRESSION 8e4b7f2f3d6071665b1dfd70786229c8a5d6c256

by kernel test robot

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master branch HEAD: 8e4b7f2f3d6071665b1dfd70786229c8a5d6c256 Add linux-next specific files for 20230711 Error/Warning reports: https://lore.kernel.org/oe-kbuild-all/202306122223.HHER4zOo-lkp@intel.com https://lore.kernel.org/oe-kbuild-all/202306260401.qZlYQpV2-lkp@intel.com https://lore.kernel.org/oe-kbuild-all/202307111309.401QvMTN-lkp@intel.com Error/Warning: (recently discovered and may have been fixed) arch/parisc/kernel/pdt.c:67:6: warning: no previous prototype for 'arch_report_meminfo' [-Wmissing-prototypes] arch/s390/include/asm/io.h:29:17: error: implicit declaration of function 'iounmap'; did you mean 'vunmap'? [-Werror=implicit-function-declaration] drivers/mfd/max77541.c:176:18: warning: cast to smaller integer type 'enum max7754x_ids' from 'const void *' [-Wvoid-pointer-to-enum-cast] drivers/net/arcnet/arc-rimi.c:107:13: error: implicit declaration of function 'ioremap'; did you mean 'ifr_map'? [-Werror=implicit-function-declaration] drivers/net/arcnet/com90xx.c:225:24: error: implicit declaration of function 'ioremap'; did you mean 'ifr_map'? [-Werror=implicit-function-declaration] drivers/net/ethernet/8390/pcnet_cs.c:290:12: error: implicit declaration of function 'ioremap'; did you mean 'ifr_map'? [-Werror=implicit-function-declaration] drivers/net/ethernet/fujitsu/fmvj18x_cs.c:549:12: error: implicit declaration of function 'ioremap'; did you mean 'iounmap'? [-Werror=implicit-function-declaration] drivers/net/ethernet/smsc/smc91c92_cs.c:447:17: error: implicit declaration of function 'ioremap'; did you mean 'ifr_map'? [-Werror=implicit-function-declaration] drivers/net/ethernet/xircom/xirc2ps_cs.c:843:28: error: implicit declaration of function 'ioremap'; did you mean 'iounmap'? [-Werror=implicit-function-declaration] drivers/pcmcia/cistpl.c:103:31: error: implicit declaration of function 'ioremap'; did you mean 'iounmap'? [-Werror=implicit-function-declaration] drivers/tty/ipwireless/main.c:115:30: error: implicit declaration of function 'ioremap'; did you mean 'iounmap'? [-Werror=implicit-function-declaration] lib/kunit/executor_test.c:138:4: warning: cast from 'void (*)(const void *)' to 'kunit_action_t *' (aka 'void (*)(void *)') converts to incompatible function type [-Wcast-function-type-strict] lib/kunit/test.c:775:38: warning: cast from 'void (*)(const void *)' to 'kunit_action_t *' (aka 'void (*)(void *)') converts to incompatible function type [-Wcast-function-type-strict] Unverified Error/Warning (likely false positive, please contact us if interested): drivers/clk/imx/clk-imx93.c:294 imx93_clocks_probe() error: uninitialized symbol 'base'. drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c:98 mlx5_devcom_register_device() error: uninitialized symbol 'tmp_dev'. net/wireless/scan.c:373 cfg80211_gen_new_ie() warn: potential spectre issue 'sub->data' [r] net/wireless/scan.c:397 cfg80211_gen_new_ie() warn: possible spectre second half. 'ext_id' {standard input}: Error: local label `"2" (instance number 9 of a fb label)' is not defined Error/Warning ids grouped by kconfigs: gcc_recent_errors |-- arm64-randconfig-m041-20230710 | `-- drivers-clk-imx-clk-imx93.c-imx93_clocks_probe()-error:uninitialized-symbol-base-. |-- parisc-randconfig-r083-20230710 | `-- arch-parisc-kernel-pdt.c:warning:no-previous-prototype-for-arch_report_meminfo |-- s390-allmodconfig | |-- arch-s390-include-asm-io.h:error:implicit-declaration-of-function-iounmap | |-- drivers-net-arcnet-arc-rimi.c:error:implicit-declaration-of-function-ioremap | |-- drivers-net-arcnet-com9x.c:error:implicit-declaration-of-function-ioremap | |-- drivers-net-ethernet-fujitsu-fmvj18x_cs.c:error:implicit-declaration-of-function-ioremap | |-- drivers-net-ethernet-pcnet_cs.c:error:implicit-declaration-of-function-ioremap | |-- drivers-net-ethernet-smsc-smc91c92_cs.c:error:implicit-declaration-of-function-ioremap | |-- drivers-net-ethernet-xircom-xirc2ps_cs.c:error:implicit-declaration-of-function-ioremap | |-- drivers-pcmcia-cistpl.c:error:implicit-declaration-of-function-ioremap | `-- drivers-tty-ipwireless-main.c:error:implicit-declaration-of-function-ioremap |-- sh-allmodconfig | `-- standard-input:Error:local-label-(instance-number-of-a-fb-label)-is-not-defined `-- x86_64-randconfig-m001-20230710 |-- drivers-net-ethernet-mellanox-mlx5-core-lib-devcom.c-mlx5_devcom_register_device()-error:uninitialized-symbol-tmp_dev-. |-- net-wireless-scan.c-cfg80211_gen_new_ie()-warn:possible-spectre-second-half.-ext_id `-- net-wireless-scan.c-cfg80211_gen_new_ie()-warn:potential-spectre-issue-sub-data-r clang_recent_errors |-- arm-randconfig-r001-20230710 | |-- lib-kunit-executor_test.c:warning:cast-from-void-(-)(const-void-)-to-kunit_action_t-(aka-void-(-)(void-)-)-converts-to-incompatible-function-type | `-- lib-kunit-test.c:warning:cast-from-void-(-)(const-void-)-to-kunit_action_t-(aka-void-(-)(void-)-)-converts-to-incompatible-function-type |-- arm64-randconfig-r013-20230710 | `-- lib-kunit-test.c:warning:cast-from-void-(-)(const-void-)-to-kunit_action_t-(aka-void-(-)(void-)-)-converts-to-incompatible-function-type |-- arm64-randconfig-r024-20230710 | |-- drivers-mfd-max77541.c:warning:cast-to-smaller-integer-type-enum-max7754x_ids-from-const-void | |-- lib-kunit-executor_test.c:warning:cast-from-void-(-)(const-void-)-to-kunit_action_t-(aka-void-(-)(void-)-)-converts-to-incompatible-function-type | `-- lib-kunit-test.c:warning:cast-from-void-(-)(const-void-)-to-kunit_action_t-(aka-void-(-)(void-)-)-converts-to-incompatible-function-type |-- hexagon-randconfig-r041-20230710 | `-- lib-kunit-test.c:warning:cast-from-void-(-)(const-void-)-to-kunit_action_t-(aka-void-(-)(void-)-)-converts-to-incompatible-function-type |-- hexagon-randconfig-r045-20230710 | `-- lib-kunit-test.c:warning:cast-from-void-(-)(const-void-)-to-kunit_action_t-(aka-void-(-)(void-)-)-converts-to-incompatible-function-type |-- riscv-randconfig-r042-20230710 | |-- lib-kunit-executor_test.c:warning:cast-from-void-(-)(const-void-)-to-kunit_action_t-(aka-void-(-)(void-)-)-converts-to-incompatible-function-type | `-- lib-kunit-test.c:warning:cast-from-void-(-)(const-void-)-to-kunit_action_t-(aka-void-(-)(void-)-)-converts-to-incompatible-function-type `-- x86_64-buildonly-randconfig-r002-20230711 `-- drivers-mfd-max77541.c:warning:cast-to-smaller-integer-type-enum-max7754x_ids-from-const-void elapsed time: 720m configs tested: 140 configs skipped: 4 tested configs: alpha allyesconfig gcc alpha defconfig gcc alpha randconfig-r004-20230710 gcc alpha randconfig-r005-20230710 gcc alpha randconfig-r034-20230710 gcc arc alldefconfig gcc arc allyesconfig gcc arc axs103_defconfig gcc arc defconfig gcc arc randconfig-r043-20230710 gcc arm allmodconfig gcc arm allyesconfig gcc arm aspeed_g4_defconfig clang arm defconfig gcc arm dove_defconfig clang arm lpc18xx_defconfig gcc arm mvebu_v7_defconfig gcc arm netwinder_defconfig clang arm omap2plus_defconfig gcc arm randconfig-r001-20230710 clang arm randconfig-r026-20230710 gcc arm randconfig-r046-20230710 gcc arm sama5_defconfig gcc arm spear3xx_defconfig clang arm stm32_defconfig gcc arm versatile_defconfig clang arm64 allyesconfig gcc arm64 defconfig gcc arm64 randconfig-r013-20230710 clang arm64 randconfig-r024-20230710 clang csky defconfig gcc csky randconfig-r006-20230710 gcc csky randconfig-r016-20230710 gcc csky randconfig-r036-20230710 gcc hexagon defconfig clang hexagon randconfig-r041-20230710 clang hexagon randconfig-r045-20230710 clang i386 allyesconfig gcc i386 buildonly-randconfig-r004-20230711 clang i386 buildonly-randconfig-r005-20230711 clang i386 buildonly-randconfig-r006-20230711 clang i386 debian-10.3 gcc i386 defconfig gcc i386 randconfig-i001-20230710 gcc i386 randconfig-i002-20230710 gcc i386 randconfig-i003-20230710 gcc i386 randconfig-i004-20230710 gcc i386 randconfig-i005-20230710 gcc i386 randconfig-i006-20230710 gcc i386 randconfig-i011-20230710 clang i386 randconfig-i012-20230710 clang i386 randconfig-i013-20230710 clang i386 randconfig-i014-20230710 clang i386 randconfig-i015-20230710 clang i386 randconfig-i016-20230710 clang i386 randconfig-r011-20230710 clang loongarch allmodconfig gcc loongarch allnoconfig gcc loongarch defconfig gcc m68k allmodconfig gcc m68k allyesconfig gcc m68k amcore_defconfig gcc m68k defconfig gcc m68k m5307c3_defconfig gcc m68k randconfig-r021-20230710 gcc m68k stmark2_defconfig gcc m68k virt_defconfig gcc microblaze mmu_defconfig gcc mips allmodconfig gcc mips allyesconfig gcc mips cobalt_defconfig gcc mips maltaup_defconfig clang nios2 defconfig gcc parisc allyesconfig gcc parisc defconfig gcc parisc randconfig-r015-20230710 gcc parisc64 defconfig gcc powerpc allmodconfig gcc powerpc allnoconfig gcc powerpc asp8347_defconfig gcc powerpc linkstation_defconfig gcc powerpc mvme5100_defconfig clang powerpc ppc64_defconfig gcc powerpc randconfig-r035-20230710 gcc powerpc tqm8560_defconfig clang powerpc walnut_defconfig clang riscv allmodconfig gcc riscv allnoconfig gcc riscv allyesconfig gcc riscv defconfig gcc riscv randconfig-r002-20230710 gcc riscv randconfig-r031-20230710 gcc riscv randconfig-r032-20230710 gcc riscv randconfig-r042-20230710 clang riscv rv32_defconfig gcc s390 alldefconfig clang s390 allmodconfig gcc s390 allyesconfig gcc s390 defconfig gcc s390 randconfig-r044-20230710 clang sh allmodconfig gcc sh j2_defconfig gcc sh migor_defconfig gcc sh rts7751r2dplus_defconfig gcc sh se7750_defconfig gcc sparc allyesconfig gcc sparc defconfig gcc sparc64 randconfig-r022-20230710 gcc um allmodconfig clang um allnoconfig clang um allyesconfig clang um defconfig gcc um i386_defconfig gcc um randconfig-r023-20230710 gcc um x86_64_defconfig gcc x86_64 allyesconfig gcc x86_64 buildonly-randconfig-r001-20230711 clang x86_64 buildonly-randconfig-r002-20230711 clang x86_64 buildonly-randconfig-r003-20230711 clang x86_64 defconfig gcc x86_64 kexec gcc x86_64 randconfig-r025-20230710 clang x86_64 randconfig-r033-20230710 gcc x86_64 randconfig-x001-20230710 clang x86_64 randconfig-x002-20230710 clang x86_64 randconfig-x003-20230710 clang x86_64 randconfig-x004-20230710 clang x86_64 randconfig-x005-20230710 clang x86_64 randconfig-x006-20230710 clang x86_64 randconfig-x011-20230710 gcc x86_64 randconfig-x012-20230710 gcc x86_64 randconfig-x013-20230710 gcc x86_64 randconfig-x014-20230710 gcc x86_64 randconfig-x015-20230710 gcc x86_64 randconfig-x016-20230710 gcc x86_64 rhel-8.3-rust clang x86_64 rhel-8.3 gcc xtensa alldefconfig gcc xtensa generic_kc705_defconfig gcc xtensa randconfig-r012-20230710 gcc -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki

2 years, 6 months

1
0
0 0

Re: [PATCH v3 03/10] eventfs: adding eventfs dir add functions

by Ajay Kaher

> On 10-Jul-2023, at 7:24 AM, Steven Rostedt <rostedt(a)goodmis.org> wrote: > > !! External Email > > On Mon, 3 Jul 2023 15:52:26 -0400 > Steven Rostedt <rostedt(a)goodmis.org> wrote: > >> On Mon, 3 Jul 2023 18:51:22 +0000 >> Ajay Kaher <akaher(a)vmware.com> wrote: >> >>>> >>>> We can also look to see if we can implement this with RCU. What exactly >>>> is this rwsem protecting? >>>> >>> >>> - struct eventfs_file holds the meta-data for file or dir. >>> https://github.com/intel-lab-lkp/linux/blob/dfe0dc15a73261ed83cdc728e43f4b3… >>> - eventfs_rwsem is supposed to protect the 'link-list which is made of struct eventfs_file >>> ' and elements of struct eventfs_file. >> >> RCU is usually the perfect solution for protecting link lists though. I'll >> take a look at this when I get back to work. >> > > So I did the below patch on top of this series. If you could fold this > into the appropriate patches, it should get us closer to an acceptable > solution. > > What I did was: > > 1. Moved the struct eventfs_file and eventfs_inode into event_inode.c as it > really should not be exposed to all users. > > 2. Added a recursion check to eventfs_remove_rec() as it is really > dangerous to have unchecked recursion in the kernel (we do have a fixed > size stack). > > 3. Removed all the eventfs_rwsem code and replaced it with an srcu lock for > the readers, and a mutex to synchronize the writers of the list. > > 4. Added a eventfs_mutex that is used for the modifications of the > dentry itself (as well as modifying the list from 3 above). > > 5. Have the free use srcu callbacks. After the srcu grace periods are done, > it adds the eventfs_file onto a llist (lockless link list) and wakes up a > work queue. Then the work queue does the freeing (this needs to be done in > task/workqueue context, as srcu callbacks are done in softirq context). > > This appears to pass through some of my instance stress tests as well as > the in tree ftrace selftests. > Awesome :) I have manually applied the patches and ftracetest results are same as v3. No more complains from lockdep. I will merge this into appropriate patches of v3 and soon send v4. You have renamed eventfs_create_dir() to create_dir(), and kept eventfs_create_dir() just a wrapper with lock, same for eventfs_create_file(). However these wrapper no where used, I will drop these wrappers. I was trying to have independent lock for each instance of events. As common lock for every instance of events is not must. Something was broken in your mail (I guess cc list) and couldn’t reach to lkml or ignored by lkml. I just wanted to track the auto test results from linux-kselftest. -Ajay > > --- > fs/tracefs/event_inode.c | 333 ++++++++++++++++++++++---------------------- > include/linux/tracefs.h | 26 --- > kernel/trace/trace.h | 1 > kernel/trace/trace_events.c | 6 > 4 files changed, 179 insertions(+), 187 deletions(-) > > Index: linux-trace.git/fs/tracefs/event_inode.c > =================================================================== > --- linux-trace.git.orig/fs/tracefs/event_inode.c 2023-07-07 22:04:44.490812310 -0400 > +++ linux-trace.git/fs/tracefs/event_inode.c 2023-07-09 21:48:28.162874719 -0400 > @@ -16,71 +16,69 @@ > #include <linux/fsnotify.h> > #include <linux/fs.h> > #include <linux/namei.h> > +#include <linux/workqueue.h> > #include <linux/security.h> > #include <linux/tracefs.h> > #include <linux/kref.h> > #include <linux/delay.h> > #include "internal.h" > > -/** > - * eventfs_dentry_to_rwsem - Return corresponding eventfs_rwsem > - * @dentry: a pointer to dentry > - * > - * helper function to return crossponding eventfs_rwsem for given dentry > - */ > -static struct rw_semaphore *eventfs_dentry_to_rwsem(struct dentry *dentry) > -{ > - if (S_ISDIR(dentry->d_inode->i_mode)) > - return (struct rw_semaphore *)dentry->d_inode->i_private; > - else > - return (struct rw_semaphore *)dentry->d_parent->d_inode->i_private; > -} > +struct eventfs_inode { > + struct list_head e_top_files; > +}; > > -/** > - * eventfs_down_read - acquire read lock function > - * @eventfs_rwsem: a pointer to rw_semaphore > - * > - * helper function to perform read lock. Nested locking requires because > - * lookup(), release() requires read lock, these could be called directly > - * or from open(), remove() which already hold the read/write lock. > - */ > -static void eventfs_down_read(struct rw_semaphore *eventfs_rwsem) > -{ > - down_read_nested(eventfs_rwsem, SINGLE_DEPTH_NESTING); > -} > +struct eventfs_file { > + const char *name; > + struct dentry *d_parent; > + struct dentry *dentry; > + struct list_head list; > + struct eventfs_inode *ei; > + const struct file_operations *fop; > + const struct inode_operations *iop; > + union { > + struct rcu_head rcu; > + struct llist_node llist; /* For freeing after RCU */ > + }; > + void *data; > + umode_t mode; > + bool created; > +}; > > -/** > - * eventfs_up_read - release read lock function > - * @eventfs_rwsem: a pointer to rw_semaphore > - * > - * helper function to release eventfs_rwsem lock if locked > - */ > -static void eventfs_up_read(struct rw_semaphore *eventfs_rwsem) > -{ > - up_read(eventfs_rwsem); > -} > +static DEFINE_MUTEX(eventfs_mutex); > +DEFINE_STATIC_SRCU(eventfs_srcu); > > -/** > - * eventfs_down_write - acquire write lock function > - * @eventfs_rwsem: a pointer to rw_semaphore > - * > - * helper function to perform write lock on eventfs_rwsem > - */ > -static void eventfs_down_write(struct rw_semaphore *eventfs_rwsem) > +static struct dentry *create_file(const char *name, umode_t mode, > + struct dentry *parent, void *data, > + const struct file_operations *fop) > { > - while (!down_write_trylock(eventfs_rwsem)) > - msleep(10); > -} > + struct tracefs_inode *ti; > + struct dentry *dentry; > + struct inode *inode; > > -/** > - * eventfs_up_write - release write lock function > - * @eventfs_rwsem: a pointer to rw_semaphore > - * > - * helper function to perform write lock on eventfs_rwsem > - */ > -static void eventfs_up_write(struct rw_semaphore *eventfs_rwsem) > -{ > - up_write(eventfs_rwsem); > + if (!(mode & S_IFMT)) > + mode |= S_IFREG; > + > + if (WARN_ON_ONCE(!S_ISREG(mode))) > + return NULL; > + > + dentry = eventfs_start_creating(name, parent); > + > + if (IS_ERR(dentry)) > + return dentry; > + > + inode = tracefs_get_inode(dentry->d_sb); > + if (unlikely(!inode)) > + return eventfs_failed_creating(dentry); > + > + inode->i_mode = mode; > + inode->i_fop = fop; > + inode->i_private = data; > + > + ti = get_tracefs(inode); > + ti->flags |= TRACEFS_EVENT_INODE; > + d_instantiate(dentry, inode); > + fsnotify_create(dentry->d_parent->d_inode, dentry); > + return eventfs_end_creating(dentry); > } > > /** > @@ -111,21 +109,30 @@ static struct dentry *eventfs_create_fil > struct dentry *parent, void *data, > const struct file_operations *fop) > { > - struct tracefs_inode *ti; > struct dentry *dentry; > - struct inode *inode; > > if (security_locked_down(LOCKDOWN_TRACEFS)) > return NULL; > > - if (!(mode & S_IFMT)) > - mode |= S_IFREG; > + mutex_lock(&eventfs_mutex); > + dentry = create_file(name, mode, parent, data, fop); > + mutex_unlock(&eventfs_mutex); > > - if (WARN_ON_ONCE(!S_ISREG(mode))) > - return NULL; > + return dentry; > +} > > - dentry = eventfs_start_creating(name, parent); > +static struct dentry *create_dir(const char *name, umode_t mode, > + struct dentry *parent, void *data, > + const struct file_operations *fop, > + const struct inode_operations *iop) > +{ > + struct tracefs_inode *ti; > + struct dentry *dentry; > + struct inode *inode; > > + WARN_ON(!S_ISDIR(mode)); > + > + dentry = eventfs_start_creating(name, parent); > if (IS_ERR(dentry)) > return dentry; > > @@ -134,13 +141,17 @@ static struct dentry *eventfs_create_fil > return eventfs_failed_creating(dentry); > > inode->i_mode = mode; > + inode->i_op = iop; > inode->i_fop = fop; > inode->i_private = data; > > ti = get_tracefs(inode); > ti->flags |= TRACEFS_EVENT_INODE; > + > + inc_nlink(inode); > d_instantiate(dentry, inode); > - fsnotify_create(dentry->d_parent->d_inode, dentry); > + inc_nlink(dentry->d_parent->d_inode); > + fsnotify_mkdir(dentry->d_parent->d_inode, dentry); > return eventfs_end_creating(dentry); > } > > @@ -175,37 +186,18 @@ static struct dentry *eventfs_create_dir > const struct file_operations *fop, > const struct inode_operations *iop) > { > - struct tracefs_inode *ti; > struct dentry *dentry; > - struct inode *inode; > > if (security_locked_down(LOCKDOWN_TRACEFS)) > return NULL; > > WARN_ON(!S_ISDIR(mode)); > > - dentry = eventfs_start_creating(name, parent); > - > - if (IS_ERR(dentry)) > - return dentry; > - > - inode = tracefs_get_inode(dentry->d_sb); > - if (unlikely(!inode)) > - return eventfs_failed_creating(dentry); > + mutex_lock(&eventfs_mutex); > + dentry = create_dir(name, mode, parent, data, fop, iop); > + mutex_unlock(&eventfs_mutex); > > - inode->i_mode = mode; > - inode->i_op = iop; > - inode->i_fop = fop; > - inode->i_private = data; > - > - ti = get_tracefs(inode); > - ti->flags |= TRACEFS_EVENT_INODE; > - > - inc_nlink(inode); > - d_instantiate(dentry, inode); > - inc_nlink(dentry->d_parent->d_inode); > - fsnotify_mkdir(dentry->d_parent->d_inode, dentry); > - return eventfs_end_creating(dentry); > + return dentry; > } > > /** > @@ -241,13 +233,14 @@ static void eventfs_post_create_dir(stru > { > struct eventfs_file *ef_child; > struct tracefs_inode *ti; > + int idx; > > - eventfs_down_read((struct rw_semaphore *) ef->data); > + /* srcu lock already held */ > /* fill parent-child relation */ > - list_for_each_entry(ef_child, &ef->ei->e_top_files, list) { > + list_for_each_entry_srcu(ef_child, &ef->ei->e_top_files, list, > + srcu_read_lock_held(&eventfs_srcu)) { > ef_child->d_parent = ef->dentry; > } > - eventfs_up_read((struct rw_semaphore *) ef->data); > > ti = get_tracefs(ef->dentry->d_inode); > ti->private = ef->ei; > @@ -271,40 +264,43 @@ static struct dentry *eventfs_root_looku > struct eventfs_inode *ei; > struct eventfs_file *ef; > struct dentry *ret = NULL; > - struct rw_semaphore *eventfs_rwsem; > + int idx; > > ti = get_tracefs(dir); > if (!(ti->flags & TRACEFS_EVENT_INODE)) > return NULL; > > ei = ti->private; > - eventfs_rwsem = (struct rw_semaphore *) dir->i_private; > - eventfs_down_read(eventfs_rwsem); > - list_for_each_entry(ef, &ei->e_top_files, list) { > + idx = srcu_read_lock(&eventfs_srcu); > + list_for_each_entry_srcu(ef, &ei->e_top_files, list, > + srcu_read_lock_held(&eventfs_srcu)) { > if (strcmp(ef->name, dentry->d_name.name)) > continue; > ret = simple_lookup(dir, dentry, flags); > if (ef->created) > continue; > + mutex_lock(&eventfs_mutex); > ef->created = true; > if (ef->ei) > - ef->dentry = eventfs_create_dir(ef->name, ef->mode, ef->d_parent, > - ef->data, ef->fop, ef->iop); > + ef->dentry = create_dir(ef->name, ef->mode, ef->d_parent, > + ef->data, ef->fop, ef->iop); > else > - ef->dentry = eventfs_create_file(ef->name, ef->mode, ef->d_parent, > - ef->data, ef->fop); > + ef->dentry = create_file(ef->name, ef->mode, ef->d_parent, > + ef->data, ef->fop); > > if (IS_ERR_OR_NULL(ef->dentry)) { > ef->created = false; > + mutex_unlock(&eventfs_mutex); > } else { > if (ef->ei) > eventfs_post_create_dir(ef); > ef->dentry->d_fsdata = ef; > + mutex_unlock(&eventfs_mutex); > dput(ef->dentry); > } > break; > } > - eventfs_up_read(eventfs_rwsem); > + srcu_read_unlock(&eventfs_srcu, idx); > return ret; > } > > @@ -318,21 +314,20 @@ static int eventfs_release(struct inode > struct tracefs_inode *ti; > struct eventfs_inode *ei; > struct eventfs_file *ef; > - struct dentry *dentry = file_dentry(file); > - struct rw_semaphore *eventfs_rwsem; > + int idx; > > ti = get_tracefs(inode); > if (!(ti->flags & TRACEFS_EVENT_INODE)) > return -EINVAL; > > ei = ti->private; > - eventfs_rwsem = eventfs_dentry_to_rwsem(dentry); > - eventfs_down_read(eventfs_rwsem); > - list_for_each_entry(ef, &ei->e_top_files, list) { > + idx = srcu_read_lock(&eventfs_srcu); > + list_for_each_entry_srcu(ef, &ei->e_top_files, list, > + srcu_read_lock_held(&eventfs_srcu)) { > if (ef->created) > dput(ef->dentry); > } > - eventfs_up_read(eventfs_rwsem); > + srcu_read_unlock(&eventfs_srcu, idx); > return dcache_dir_close(inode, file); > } > > @@ -352,30 +347,30 @@ static int dcache_dir_open_wrapper(struc > struct eventfs_file *ef; > struct inode *f_inode = file_inode(file); > struct dentry *dentry = file_dentry(file); > - struct rw_semaphore *eventfs_rwsem; > + int idx; > > ti = get_tracefs(f_inode); > if (!(ti->flags & TRACEFS_EVENT_INODE)) > return -EINVAL; > > ei = ti->private; > - eventfs_rwsem = eventfs_dentry_to_rwsem(dentry); > - eventfs_down_read(eventfs_rwsem); > - list_for_each_entry(ef, &ei->e_top_files, list) { > + idx = srcu_read_lock(&eventfs_srcu); > + list_for_each_entry_rcu(ef, &ei->e_top_files, list) { > if (ef->created) { > dget(ef->dentry); > continue; > } > > + mutex_lock(&eventfs_mutex); > ef->created = true; > > inode_lock(dentry->d_inode); > if (ef->ei) > - ef->dentry = eventfs_create_dir(ef->name, ef->mode, dentry, > - ef->data, ef->fop, ef->iop); > + ef->dentry = create_dir(ef->name, ef->mode, dentry, > + ef->data, ef->fop, ef->iop); > else > - ef->dentry = eventfs_create_file(ef->name, ef->mode, dentry, > - ef->data, ef->fop); > + ef->dentry = create_file(ef->name, ef->mode, dentry, > + ef->data, ef->fop); > inode_unlock(dentry->d_inode); > > if (IS_ERR_OR_NULL(ef->dentry)) { > @@ -385,8 +380,9 @@ static int dcache_dir_open_wrapper(struc > eventfs_post_create_dir(ef); > ef->dentry->d_fsdata = ef; > } > + mutex_unlock(&eventfs_mutex); > } > - eventfs_up_read(eventfs_rwsem); > + srcu_read_unlock(&eventfs_srcu, idx); > return dcache_dir_open(inode, file); > } > > @@ -463,13 +459,11 @@ static struct eventfs_file *eventfs_prep > * @parent: a pointer to the parent dentry for this file. This should be a > * directory dentry if set. If this parameter is NULL, then the > * directory will be created in the root of the tracefs filesystem. > - * @eventfs_rwsem: a pointer to rw_semaphore > * > * This function creates the top of the trace event directory. > */ > struct dentry *eventfs_create_events_dir(const char *name, > - struct dentry *parent, > - struct rw_semaphore *eventfs_rwsem) > + struct dentry *parent) > { > struct dentry *dentry = tracefs_start_creating(name, parent); > struct eventfs_inode *ei; > @@ -489,7 +483,6 @@ struct dentry *eventfs_create_events_dir > return ERR_PTR(-ENOMEM); > } > > - init_rwsem(eventfs_rwsem); > INIT_LIST_HEAD(&ei->e_top_files); > > ti = get_tracefs(inode); > @@ -499,7 +492,6 @@ struct dentry *eventfs_create_events_dir > inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; > inode->i_op = &eventfs_root_dir_inode_operations; > inode->i_fop = &eventfs_file_operations; > - inode->i_private = eventfs_rwsem; > > /* directory inodes start off with i_nlink == 2 (for "." entry) */ > inc_nlink(inode); > @@ -513,15 +505,13 @@ struct dentry *eventfs_create_events_dir > * eventfs_add_subsystem_dir - add eventfs subsystem_dir to list to create later > * @name: a pointer to a string containing the name of the file to create. > * @parent: a pointer to the parent dentry for this dir. > - * @eventfs_rwsem: a pointer to rw_semaphore > * > * This function adds eventfs subsystem dir to list. > * And all these dirs are created on the fly when they are looked up, > * and the dentry and inodes will be removed when they are done. > */ > struct eventfs_file *eventfs_add_subsystem_dir(const char *name, > - struct dentry *parent, > - struct rw_semaphore *eventfs_rwsem) > + struct dentry *parent) > { > struct tracefs_inode *ti_parent; > struct eventfs_inode *ei_parent; > @@ -536,16 +526,15 @@ struct eventfs_file *eventfs_add_subsyst > ef = eventfs_prepare_ef(name, > S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, > &eventfs_file_operations, > - &eventfs_root_dir_inode_operations, > - (void *) eventfs_rwsem); > + &eventfs_root_dir_inode_operations, NULL); > > if (IS_ERR(ef)) > return ef; > > - eventfs_down_write(eventfs_rwsem); > + mutex_lock(&eventfs_mutex); > list_add_tail(&ef->list, &ei_parent->e_top_files); > ef->d_parent = parent; > - eventfs_up_write(eventfs_rwsem); > + mutex_unlock(&eventfs_mutex); > return ef; > } > > @@ -553,15 +542,13 @@ struct eventfs_file *eventfs_add_subsyst > * eventfs_add_dir - add eventfs dir to list to create later > * @name: a pointer to a string containing the name of the file to create. > * @ef_parent: a pointer to the parent eventfs_file for this dir. > - * @eventfs_rwsem: a pointer to rw_semaphore > * > * This function adds eventfs dir to list. > * And all these dirs are created on the fly when they are looked up, > * and the dentry and inodes will be removed when they are done. > */ > struct eventfs_file *eventfs_add_dir(const char *name, > - struct eventfs_file *ef_parent, > - struct rw_semaphore *eventfs_rwsem) > + struct eventfs_file *ef_parent) > { > struct eventfs_file *ef; > > @@ -571,16 +558,15 @@ struct eventfs_file *eventfs_add_dir(con > ef = eventfs_prepare_ef(name, > S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, > &eventfs_file_operations, > - &eventfs_root_dir_inode_operations, > - (void *) eventfs_rwsem); > + &eventfs_root_dir_inode_operations, NULL); > > if (IS_ERR(ef)) > return ef; > > - eventfs_down_write(eventfs_rwsem); > + mutex_lock(&eventfs_mutex); > list_add_tail(&ef->list, &ef_parent->ei->e_top_files); > ef->d_parent = ef_parent->dentry; > - eventfs_up_write(eventfs_rwsem); > + mutex_unlock(&eventfs_mutex); > return ef; > } > > @@ -608,7 +594,6 @@ int eventfs_add_top_file(const char *nam > struct tracefs_inode *ti; > struct eventfs_inode *ei; > struct eventfs_file *ef; > - struct rw_semaphore *eventfs_rwsem; > > if (!parent) > return -EINVAL; > @@ -629,11 +614,10 @@ int eventfs_add_top_file(const char *nam > if (IS_ERR(ef)) > return -ENOMEM; > > - eventfs_rwsem = (struct rw_semaphore *) parent->d_inode->i_private; > - eventfs_down_write(eventfs_rwsem); > + mutex_lock(&eventfs_mutex); > list_add_tail(&ef->list, &ei->e_top_files); > ef->d_parent = parent; > - eventfs_up_write(eventfs_rwsem); > + mutex_unlock(&eventfs_mutex); > return 0; > } > > @@ -658,7 +642,6 @@ int eventfs_add_file(const char *name, u > const struct file_operations *fop) > { > struct eventfs_file *ef; > - struct rw_semaphore *eventfs_rwsem; > > if (!ef_parent) > return -EINVAL; > @@ -670,14 +653,42 @@ int eventfs_add_file(const char *name, u > if (IS_ERR(ef)) > return -ENOMEM; > > - eventfs_rwsem = (struct rw_semaphore *) ef_parent->data; > - eventfs_down_write(eventfs_rwsem); > + mutex_lock(&eventfs_mutex); > list_add_tail(&ef->list, &ef_parent->ei->e_top_files); > ef->d_parent = ef_parent->dentry; > - eventfs_up_write(eventfs_rwsem); > + mutex_unlock(&eventfs_mutex); > return 0; > } > > +static LLIST_HEAD(free_list); > + > +static void eventfs_workfn(struct work_struct *work) > +{ > + struct eventfs_file *ef, *tmp; > + struct llist_node *llnode; > + > + llnode = llist_del_all(&free_list); > + llist_for_each_entry_safe(ef, tmp, llnode, llist) { > + if (ef->created && ef->dentry) > + dput(ef->dentry); > + kfree(ef->name); > + kfree(ef->ei); > + kfree(ef); > + } > +} > + > +DECLARE_WORK(eventfs_work, eventfs_workfn); > + > +static void free_ef(struct rcu_head *head) > +{ > + struct eventfs_file *ef = container_of(head, struct eventfs_file, rcu); > + > + if (!llist_add(&ef->llist, &free_list)) > + return; > + > + queue_work(system_unbound_wq, &eventfs_work); > +} > + > /** > * eventfs_remove_rec - remove eventfs dir or file from list > * @ef: a pointer to eventfs_file to be removed. > @@ -685,51 +696,51 @@ int eventfs_add_file(const char *name, u > * This function recursively remove eventfs_file which > * contains info of file or dir. > */ > -static void eventfs_remove_rec(struct eventfs_file *ef) > +static void eventfs_remove_rec(struct eventfs_file *ef, int level) > { > - struct eventfs_file *ef_child, *n; > + struct eventfs_file *ef_child; > > if (!ef) > return; > + /* > + * Check recursion depth. It should never be greater than 3: > + * 0 - events/ > + * 1 - events/group/ > + * 2 - events/group/event/ > + * 3 - events/group/event/file > + */ > + if (WARN_ON_ONCE(level > 3)) > + return; > > if (ef->ei) { > /* search for nested folders or files */ > - list_for_each_entry_safe(ef_child, n, &ef->ei->e_top_files, list) { > - eventfs_remove_rec(ef_child); > + list_for_each_entry_srcu(ef_child, &ef->ei->e_top_files, list, > + lockdep_is_held(&eventfs_mutex)) { > + eventfs_remove_rec(ef_child, level + 1); > } > - kfree(ef->ei); > } > > - if (ef->created && ef->dentry) { > + if (ef->created && ef->dentry) > d_invalidate(ef->dentry); > - dput(ef->dentry); > - } > - list_del(&ef->list); > - kfree(ef->name); > - kfree(ef); > + > + list_del_rcu(&ef->list); > + call_srcu(&eventfs_srcu, &ef->rcu, free_ef); > } > > /** > * eventfs_remove - remove eventfs dir or file from list > * @ef: a pointer to eventfs_file to be removed. > * > - * This function acquire the eventfs_rwsem lock and call eventfs_remove_rec() > + * This function acquire the eventfs_mutex lock and calls eventfs_remove_rec() > */ > void eventfs_remove(struct eventfs_file *ef) > { > - struct rw_semaphore *eventfs_rwsem; > - > if (!ef) > return; > > - if (ef->ei) > - eventfs_rwsem = (struct rw_semaphore *) ef->data; > - else > - eventfs_rwsem = (struct rw_semaphore *) ef->d_parent->d_inode->i_private; > - > - eventfs_down_write(eventfs_rwsem); > - eventfs_remove_rec(ef); > - eventfs_up_write(eventfs_rwsem); > + mutex_lock(&eventfs_mutex); > + eventfs_remove_rec(ef, 0); > + mutex_unlock(&eventfs_mutex); > } > > /** > Index: linux-trace.git/include/linux/tracefs.h > =================================================================== > --- linux-trace.git.orig/include/linux/tracefs.h 2023-07-07 22:04:44.490812310 -0400 > +++ linux-trace.git/include/linux/tracefs.h 2023-07-07 22:04:44.486812271 -0400 > @@ -21,22 +21,7 @@ struct file_operations; > > #ifdef CONFIG_TRACING > > -struct eventfs_inode { > - struct list_head e_top_files; > -}; > - > -struct eventfs_file { > - const char *name; > - struct dentry *d_parent; > - struct dentry *dentry; > - struct list_head list; > - struct eventfs_inode *ei; > - const struct file_operations *fop; > - const struct inode_operations *iop; > - void *data; > - umode_t mode; > - bool created; > -}; > +struct eventfs_file; > > struct dentry *eventfs_start_creating(const char *name, struct dentry *parent); > > @@ -45,16 +30,13 @@ struct dentry *eventfs_failed_creating(s > struct dentry *eventfs_end_creating(struct dentry *dentry); > > struct dentry *eventfs_create_events_dir(const char *name, > - struct dentry *parent, > - struct rw_semaphore *eventfs_rwsem); > + struct dentry *parent); > > struct eventfs_file *eventfs_add_subsystem_dir(const char *name, > - struct dentry *parent, > - struct rw_semaphore *eventfs_rwsem); > + struct dentry *parent); > > struct eventfs_file *eventfs_add_dir(const char *name, > - struct eventfs_file *ef_parent, > - struct rw_semaphore *eventfs_rwsem); > + struct eventfs_file *ef_parent); > > int eventfs_add_file(const char *name, umode_t mode, > struct eventfs_file *ef_parent, void *data, > Index: linux-trace.git/kernel/trace/trace.h > =================================================================== > --- linux-trace.git.orig/kernel/trace/trace.h 2023-07-07 22:04:44.490812310 -0400 > +++ linux-trace.git/kernel/trace/trace.h 2023-07-07 22:04:44.486812271 -0400 > @@ -359,7 +359,6 @@ struct trace_array { > struct dentry *options; > struct dentry *percpu_dir; > struct dentry *event_dir; > - struct rw_semaphore eventfs_rwsem; > struct trace_options *topts; > struct list_head systems; > struct list_head events; > Index: linux-trace.git/kernel/trace/trace_events.c > =================================================================== > --- linux-trace.git.orig/kernel/trace/trace_events.c 2023-07-07 22:04:44.490812310 -0400 > +++ linux-trace.git/kernel/trace/trace_events.c 2023-07-07 22:04:44.486812271 -0400 > @@ -2337,7 +2337,7 @@ event_subsystem_dir(struct trace_array * > } else > __get_system(system); > > - dir->ef = eventfs_add_subsystem_dir(name, parent, &tr->eventfs_rwsem); > + dir->ef = eventfs_add_subsystem_dir(name, parent); > if (IS_ERR(dir->ef)) { > pr_warn("Failed to create system directory %s\n", name); > __put_system(system); > @@ -2439,7 +2439,7 @@ event_create_dir(struct dentry *parent, > return -ENOMEM; > > name = trace_event_name(call); > - file->ef = eventfs_add_dir(name, ef_subsystem, &tr->eventfs_rwsem); > + file->ef = eventfs_add_dir(name, ef_subsystem); > if (IS_ERR(file->ef)) { > pr_warn("Could not create tracefs '%s' directory\n", name); > return -1; > @@ -3647,7 +3647,7 @@ create_event_toplevel_files(struct dentr > if (!entry) > return -ENOMEM; > > - d_events = eventfs_create_events_dir("events", parent, &tr->eventfs_rwsem); > + d_events = eventfs_create_events_dir("events", parent); > if (IS_ERR(d_events)) { > pr_warn("Could not create tracefs 'events' directory\n"); > return -ENOMEM; > > !! External Email: This email originated from outside of the organization. Do not click links or open attachments unless you recognize the sender.

2 years, 6 months

2
4
0 0

[PATCH v4 0/9] cgroup/cpuset: Support remote partitions

by Waiman Long

v4: - [v3] https://lore.kernel.org/lkml/20230627005529.1564984-1-longman@redhat.com/ - Fix compilation problem reported by kernel test robot. v3: - [v2] https://lore.kernel.org/lkml/20230531163405.2200292-1-longman@redhat.com/ - Change the new control file from root-only "cpuset.cpus.reserve" to non-root "cpuset.cpus.exclusive" which lists the set of exclusive CPUs distributed down the hierarchy. - Add a patch to restrict boot-time isolated CPUs to isolated partitions only. - Update the test_cpuset_prs.sh test script and documentation accordingly. This patch series introduces a new cpuset control file "cpuset.cpus.exclusive" which must be a subset of "cpuset.cpus" and the parent's "cpuset.cpus.exclusive". This control file lists the exclusive CPUs to be distributed down the hierarchy. Any one of the exclusive CPUs can only be distributed to at most one child cpuset. Unlike "cpuset.cpus", invalid input to "cpuset.cpus.exclusive" will be rejected with an error. This new control file has no effect on the behavior of the cpuset until it turns into a partition root. At that point, its effective CPUs will be set to its exclusive CPUs unless some of them are offline. This patch series also introduces a new category of cpuset partition called remote partitions. The existing partition category where the partition roots have to be clustered around the root cgroup in a hierarchical way is now referred to as local partitions. A remote partition can be formed far from the root cgroup with no partition root parent. While local partitions can be created without touching "cpuset.cpus.exclusive" as it can be set automatically if a cpuset becomes a local partition root. Properly set "cpuset.cpus.exclusive" values down the hierarchy are required to create a remote partition. Both scheduling and isolated partitions can be formed in a remote partition. A local partition can be created under a remote partition. A remote partition, however, cannot be formed under a local partition for now. Modern container orchestration tools like Kubernetes use the cgroup hierarchy to manage different containers. And it is relying on other middleware like systemd to help managing it. If a container needs to use isolated CPUs, it is hard to get those with the local partitions as it will require the administrative parent cgroup to be a partition root too which tool like systemd may not be ready to manage. With this patch series, we allow the creation of remote partition far from the root. The container management tool can manage the "cpuset.cpus.exclusive" file without impacting the other cpuset files that are managed by other middlewares. Of course, invalid "cpuset.cpus.exclusive" values will be rejected and changes to "cpuset.cpus" can affect the value of "cpuset.cpus.exclusive" due to the requirement that it has to be a subset of the former control file. Waiman Long (9): cgroup/cpuset: Inherit parent's load balance state in v2 cgroup/cpuset: Extract out CS_CPU_EXCLUSIVE & CS_SCHED_LOAD_BALANCE handling cgroup/cpuset: Improve temporary cpumasks handling cgroup/cpuset: Allow suppression of sched domain rebuild in update_cpumasks_hier() cgroup/cpuset: Add cpuset.cpus.exclusive for v2 cgroup/cpuset: Introduce remote partition cgroup/cpuset: Check partition conflict with housekeeping setup cgroup/cpuset: Documentation update for partition cgroup/cpuset: Extend test_cpuset_prs.sh to test remote partition Documentation/admin-guide/cgroup-v2.rst | 100 +- kernel/cgroup/cpuset.c | 1347 ++++++++++++----- .../selftests/cgroup/test_cpuset_prs.sh | 398 +++-- 3 files changed, 1291 insertions(+), 554 deletions(-) -- 2.31.1

2 years, 6 months

2
21
0 0

[PATCH bpf-next v4 0/7] Add SO_REUSEPORT support for TC bpf_sk_assign

by Lorenz Bauer

We want to replace iptables TPROXY with a BPF program at TC ingress. To make this work in all cases we need to assign a SO_REUSEPORT socket to an skb, which is currently prohibited. This series adds support for such sockets to bpf_sk_assing. I did some refactoring to cut down on the amount of duplicate code. The key to this is to use INDIRECT_CALL in the reuseport helpers. To show that this approach is not just beneficial to TC sk_assign I removed duplicate code for bpf_sk_lookup as well. Joint work with Daniel Borkmann. Signed-off-by: Lorenz Bauer <lmb(a)isovalent.com> --- Changes in v4: - WARN_ON_ONCE if reuseport socket is refcounted (Kuniyuki) - Use inet[6]_ehashfn_t to shorten function declarations (Kuniyuki) - Shuffle documentation patch around (Kuniyuki) - Update commit message to explain why IPv6 needs EXPORT_SYMBOL - Link to v3: https://lore.kernel.org/r/20230613-so-reuseport-v3-0-907b4cbb7b99@isovalent… Changes in v3: - Fix warning re udp_ehashfn and udp6_ehashfn (Simon) - Return higher scoring connected UDP reuseport sockets (Kuniyuki) - Fix ipv6 module builds - Link to v2: https://lore.kernel.org/r/20230613-so-reuseport-v2-0-b7c69a342613@isovalent… Changes in v2: - Correct commit abbrev length (Kuniyuki) - Reduce duplication (Kuniyuki) - Add checks on sk_state (Martin) - Split exporting inet[6]_lookup_reuseport into separate patch (Eric) --- Daniel Borkmann (1): selftests/bpf: Test that SO_REUSEPORT can be used with sk_assign helper Lorenz Bauer (6): udp: re-score reuseport groups when connected sockets are present net: export inet_lookup_reuseport and inet6_lookup_reuseport net: remove duplicate reuseport_lookup functions net: document inet[6]_lookup_reuseport sk_state requirements net: remove duplicate sk_lookup helpers bpf, net: Support SO_REUSEPORT sockets with bpf_sk_assign include/net/inet6_hashtables.h | 81 ++++++++- include/net/inet_hashtables.h | 74 +++++++- include/net/sock.h | 7 +- include/uapi/linux/bpf.h | 3 - net/core/filter.c | 2 - net/ipv4/inet_hashtables.c | 67 ++++--- net/ipv4/udp.c | 88 ++++----- net/ipv6/inet6_hashtables.c | 70 +++++--- net/ipv6/udp.c | 98 ++++------ tools/include/uapi/linux/bpf.h | 3 - tools/testing/selftests/bpf/network_helpers.c | 3 + .../selftests/bpf/prog_tests/assign_reuse.c | 197 +++++++++++++++++++++ .../selftests/bpf/progs/test_assign_reuse.c | 142 +++++++++++++++ 13 files changed, 656 insertions(+), 179 deletions(-) --- base-commit: 970308a7b544fa1c7ee98a2721faba3765be8dd8 change-id: 20230613-so-reuseport-e92c526173ee Best regards, -- Lorenz Bauer <lmb(a)isovalent.com>

2 years, 6 months

3
19
0 0

[PATCH bpf-next v3 0/6] Support defragmenting IPv(4|6) packets in BPF

by Daniel Xu

=== Context === In the context of a middlebox, fragmented packets are tricky to handle. The full 5-tuple of a packet is often only available in the first fragment which makes enforcing consistent policy difficult. There are really only two stateless options, neither of which are very nice: 1. Enforce policy on first fragment and accept all subsequent fragments. This works but may let in certain attacks or allow data exfiltration. 2. Enforce policy on first fragment and drop all subsequent fragments. This does not really work b/c some protocols may rely on fragmentation. For example, DNS may rely on oversized UDP packets for large responses. So stateful tracking is the only sane option. RFC 8900 [0] calls this out as well in section 6.3: Middleboxes [...] should process IP fragments in a manner that is consistent with [RFC0791] and [RFC8200]. In many cases, middleboxes must maintain state in order to achieve this goal. === BPF related bits === Policy has traditionally been enforced from XDP/TC hooks. Both hooks run before kernel reassembly facilities. However, with the new BPF_PROG_TYPE_NETFILTER, we can rather easily hook into existing netfilter reassembly infra. The basic idea is we bump a refcnt on the netfilter defrag module and then run the bpf prog after the defrag module runs. This allows bpf progs to transparently see full, reassembled packets. The nice thing about this is that progs don't have to carry around logic to detect fragments. === Changelog === Changes from v2: * module_put() if ->enable() fails * Fix CI build errors Changes from v1: * Drop bpf_program__attach_netfilter() patches * static -> static const where appropriate * Fix callback assignment order during registration * Only request_module() if callbacks are missing * Fix retval when modprobe fails in userspace * Fix v6 defrag module name (nf_defrag_ipv6_hooks -> nf_defrag_ipv6) * Simplify priority checking code * Add warning if module doesn't assign callbacks in the future * Take refcnt on module while defrag link is active [0]: https://datatracker.ietf.org/doc/html/rfc8900 Daniel Xu (6): netfilter: defrag: Add glue hooks for enabling/disabling defrag netfilter: bpf: Support BPF_F_NETFILTER_IP_DEFRAG in netfilter link netfilter: bpf: Prevent defrag module unload while link active bpf: selftests: Support not connecting client socket bpf: selftests: Support custom type and proto for client sockets bpf: selftests: Add defrag selftests include/linux/netfilter.h | 15 + include/uapi/linux/bpf.h | 5 + net/ipv4/netfilter/nf_defrag_ipv4.c | 17 +- net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 11 + net/netfilter/core.c | 6 + net/netfilter/nf_bpf_link.c | 150 +++++++++- tools/include/uapi/linux/bpf.h | 5 + tools/testing/selftests/bpf/Makefile | 4 +- .../selftests/bpf/generate_udp_fragments.py | 90 ++++++ .../selftests/bpf/ip_check_defrag_frags.h | 57 ++++ tools/testing/selftests/bpf/network_helpers.c | 26 +- tools/testing/selftests/bpf/network_helpers.h | 3 + .../bpf/prog_tests/ip_check_defrag.c | 282 ++++++++++++++++++ .../selftests/bpf/progs/ip_check_defrag.c | 104 +++++++ 14 files changed, 753 insertions(+), 22 deletions(-) create mode 100755 tools/testing/selftests/bpf/generate_udp_fragments.py create mode 100644 tools/testing/selftests/bpf/ip_check_defrag_frags.h create mode 100644 tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c create mode 100644 tools/testing/selftests/bpf/progs/ip_check_defrag.c -- 2.41.0

2 years, 6 months

2
4
0 0

[PATCH v3 0/3] cpuset: Allow setscheduler regardless of manipulated task

by Michal Koutný

Changes in v3: - only skip permissions check when effective affinity doesn't change - update commit message accordingly Changes in v2 (https://lore.kernel.org/r/20230630183908.32148-1-mkoutny@suse.com): - rebased on mainline - drop is_in_v2_mode() Changes in v1 (https://lore.kernel.org/r/20230629091146.28801-1-mkoutny@suse.com): - added selftests - comments rewording RFC in https://lore.kernel.org/r/20220623124944.2753-1-mkoutny@suse.com Michal Koutný (3): cpuset: Allow setscheduler regardless of manipulated task selftests: cgroup: Minor code reorganizations selftests: cgroup: Add cpuset migrations testcase MAINTAINERS | 2 + kernel/cgroup/cpuset.c | 19 +- tools/testing/selftests/cgroup/.gitignore | 1 + tools/testing/selftests/cgroup/Makefile | 2 + tools/testing/selftests/cgroup/cgroup_util.c | 2 + tools/testing/selftests/cgroup/cgroup_util.h | 2 + tools/testing/selftests/cgroup/test_core.c | 2 +- tools/testing/selftests/cgroup/test_cpuset.c | 275 ++++++++++++++++++ .../selftests/cgroup/test_cpuset_prs.sh | 2 +- 9 files changed, 302 insertions(+), 5 deletions(-) create mode 100644 tools/testing/selftests/cgroup/test_cpuset.c base-commit: e55e5df193d247a38a5e1ac65a5316a0adcc22fa -- 2.41.0

2 years, 6 months

3
6
0 0

Re: [PATCH v3 03/10] eventfs: adding eventfs dir add functions

by Steven Rostedt

On Mon, 10 Jul 2023 15:07:30 -0400 Steven Rostedt <rostedt(a)goodmis.org> wrote: > On Mon, 10 Jul 2023 15:06:06 -0400 > Steven Rostedt <rostedt(a)goodmis.org> wrote: > > > > Something was broken in your mail (I guess cc list) and couldn’t reach to lkml or > > > ignored by lkml. I just wanted to track the auto test results from linux-kselftest. > > > > Yeah, claws-mail has an issue with some emails with quotes in it (sometimes > > drops the second quote). Sad part is, it happens after I hit send, and it > > is not part of the email. I'll send this reply now, but I bet it's going to happen again. > > > > Let's see :-/ I checked the To and Cc's and they all have the proper > > quotes. Let's see what ends up in my "Sent" folder. > > This time it worked! > But this reply did not :-p It was fine before I sent, but the email in my Sent folder shows: Cc: "mhiramat(a)kernel.org" <mhiramat(a)kernel.org>, "shuah(a)kernel.org" <shuah(a)kernel.org>, "linux-kernel(a)vger.kernel.org" <linux-kernel(a)vger.kernel.org>, "linux-trace-kernel(a)vger.kernel.org\" <linux-trace-kernel(a)vger.kernel.org>, "linux-kselftest(a)vger.kernel.org" <linux-kselftest(a)vger.kernel.org>, Ching-lin Yu <chinglinyu(a)google.com>, Nadav Amit <namit(a)vmware.com>, "srivatsa(a)csail.mit.edu" <srivatsa(a)csail.mit.edu>, Alexey Makhalov <amakhalov(a)vmware.com>, Vasavi Sirnapalli <vsirnapalli(a)vmware.com>, Tapas Kundu <tkundu(a)vmware.com>, "er.ajay.kaher(a)gmail.com" <er.ajay.kaher(a)gmail.com> Claw's injected a backslash into: "linux-trace-kernel(a)vger.kernel.org\" <linux-trace-kernel(a)vger.kernel.org> I have my own build of claws-mail, let me update it and perhaps this will go away. -- Steve

2 years, 6 months

1
0
0 0

[PATCH v7 00/19] Add iommufd physical device operations for replace and alloc hwpt

by Jason Gunthorpe

This is the basic functionality for iommufd to support iommufd_device_replace() and IOMMU_HWPT_ALLOC for physical devices. iommufd_device_replace() allows changing the HWPT associated with the device to a new IOAS or HWPT. Replace does this in way that failure leaves things unchanged, and utilizes the iommu iommu_group_replace_domain() API to allow the iommu driver to perform an optional non-disruptive change. IOMMU_HWPT_ALLOC allows HWPTs to be explicitly allocated by the user and used by attach or replace. At this point it isn't very useful since the HWPT is the same as the automatically managed HWPT from the IOAS. However a following series will allow userspace to customize the created HWPT. The implementation is complicated because we have to introduce some per-iommu_group memory in iommufd and redo how we think about multi-device groups to be more explicit. This solves all the locking problems in the prior attempts. This series is infrastructure work for the following series which: - Add replace for attach - Expose replace through VFIO APIs - Implement driver parameters for HWPT creation (nesting) Once review of this is complete I will keep it on a side branch and accumulate the following series when they are ready so we can have a stable base and make more incremental progress. When we have all the parts together to get a full implementation it can go to Linus. This is on github: https://github.com/jgunthorpe/linux/commits/iommufd_hwpt v7: - Rebase to v6.4-rc2, update to new signature of iommufd_get_ioas() v6: https://lore.kernel.org/r/0-v6-fdb604df649a+369-iommufd_alloc_jgg@nvidia.com - Go back to the v4 locking arragnment with now both the attach/detach igroup->locks inside the functions, Kevin says he needs this for a followup series. This still fixes the syzkaller bug - Fix two more error unwind locking bugs where iommufd_object_abort_and_destroy(hwpt) would deadlock or be mislocked. Make sure fail_nth will catch these mistakes - Add a patch allowing objects to have different abort than destroy function, it allows hwpt abort to require the caller to continue to hold the lock and enforces this with lockdep. v5: https://lore.kernel.org/r/0-v5-6716da355392+c5-iommufd_alloc_jgg@nvidia.com - Go back to the v3 version of the code, keep the comment changes from v4. Syzkaller says the group lock change in v4 didn't work. - Adjust the fail_nth test to cover the path syzkaller found. We need to have an ioas with a mapped page installed to inject a failure during domain attachment. v4: https://lore.kernel.org/r/0-v4-9cd79ad52ee8+13f5-iommufd_alloc_jgg@nvidia.c… - Refine comments and commit messages - Move the group lock into iommufd_hw_pagetable_attach() - Fix error unwind in iommufd_device_do_replace() v3: https://lore.kernel.org/r/0-v3-61d41fd9e13e+1f5-iommufd_alloc_jgg@nvidia.com - Refine comments and commit messages - Adjust the flow in iommufd_device_auto_get_domain() so pt_id is only set on success - Reject replace on non-attached devices - Add missing __reserved check for IOMMU_HWPT_ALLOC v2: https://lore.kernel.org/r/0-v2-51b9896e7862+8a8c-iommufd_alloc_jgg@nvidia.c… - Use WARN_ON for the igroup->group test and move that logic to a function iommufd_group_try_get() - Change igroup->devices to igroup->device list Replace will need to iterate over all attached idevs - Rename to iommufd_group_setup_msi() - New patch to export iommu_get_resv_regions() - New patch to use per-device reserved regions instead of per-group regions - Split out the reorganizing of iommufd_device_change_pt() from the replace patch - Replace uses the per-dev reserved regions - Use stdev_id in a few more places in the selftest - Fix error handling in IOMMU_HWPT_ALLOC - Clarify comments - Rebase on v6.3-rc1 v1: https://lore.kernel.org/all/0-v1-7612f88c19f5+2f21-iommufd_alloc_jgg@nvidia… Jason Gunthorpe (17): iommufd: Move isolated msi enforcement to iommufd_device_bind() iommufd: Add iommufd_group iommufd: Replace the hwpt->devices list with iommufd_group iommu: Export iommu_get_resv_regions() iommufd: Keep track of each device's reserved regions instead of groups iommufd: Use the iommufd_group to avoid duplicate MSI setup iommufd: Make sw_msi_start a group global iommufd: Move putting a hwpt to a helper function iommufd: Add enforced_cache_coherency to iommufd_hw_pagetable_alloc() iommufd: Allow a hwpt to be aborted after allocation iommufd: Fix locking around hwpt allocation iommufd: Reorganize iommufd_device_attach into iommufd_device_change_pt iommufd: Add iommufd_device_replace() iommufd: Make destroy_rwsem use a lock class per object type iommufd: Add IOMMU_HWPT_ALLOC iommufd/selftest: Return the real idev id from selftest mock_domain iommufd/selftest: Add a selftest for IOMMU_HWPT_ALLOC Nicolin Chen (2): iommu: Introduce a new iommu_group_replace_domain() API iommufd/selftest: Test iommufd_device_replace() drivers/iommu/iommu-priv.h | 10 + drivers/iommu/iommu.c | 41 +- drivers/iommu/iommufd/device.c | 553 +++++++++++++----- drivers/iommu/iommufd/hw_pagetable.c | 112 +++- drivers/iommu/iommufd/io_pagetable.c | 32 +- drivers/iommu/iommufd/iommufd_private.h | 52 +- drivers/iommu/iommufd/iommufd_test.h | 6 + drivers/iommu/iommufd/main.c | 24 +- drivers/iommu/iommufd/selftest.c | 40 ++ include/linux/iommufd.h | 1 + include/uapi/linux/iommufd.h | 26 + tools/testing/selftests/iommu/iommufd.c | 67 ++- .../selftests/iommu/iommufd_fail_nth.c | 67 ++- tools/testing/selftests/iommu/iommufd_utils.h | 63 +- 14 files changed, 868 insertions(+), 226 deletions(-) create mode 100644 drivers/iommu/iommu-priv.h base-commit: f1fcbaa18b28dec10281551dfe6ed3a3ed80e3d6 -- 2.40.1

2 years, 6 months

5
36
0 0

Re: [PATCH v4 33/35] maple_tree: Update testing code for mas_{next,prev,walk}

by Geert Uytterhoeven

Hi Liam, On Thu, May 18, 2023 at 9:37 PM Liam R. Howlett <Liam.Howlett(a)oracle.com> wrote: > Now that the functions have changed the limits, update the testing of > the maple tree to test these new settings. > > Signed-off-by: Liam R. Howlett <Liam.Howlett(a)oracle.com> Thanks for your patch, which is now commit eb2e817f38cafbf7 ("maple_tree: update testing code for mas_{next,prev,walk}") in > --- a/lib/test_maple_tree.c > +++ b/lib/test_maple_tree.c > @@ -2011,7 +2011,7 @@ static noinline void __init next_prev_test(struct maple_tree *mt) > > val = mas_next(&mas, ULONG_MAX); > MT_BUG_ON(mt, val != NULL); > - MT_BUG_ON(mt, mas.index != ULONG_MAX); > + MT_BUG_ON(mt, mas.index != 0x7d6); On m68k (ARAnyM): TEST STARTING BUG at next_prev_test:2014 (1) Pass: 3749128 Run:3749129 And after that it seems to hang[*]. After adding a debug print (thus shifting all line numbers by +1): next_prev_test:mas.index = 0x138e BUG at next_prev_test:2015 (1) 0x138e = 5006, while the expected value is 0x7d6 = 2006. I guess converting this test to the KUnit framework would make it a bit easier to investigate failures... [*] Left the debug one running, and I got a few more: BUG at check_empty_area_window:2656 (1) Pass: 3754275 Run:3754277 BUG at check_empty_area_window:2657 (1) Pass: 3754275 Run:3754278 BUG at check_empty_area_window:2658 (1) Pass: 3754275 Run:3754279 BUG at check_empty_area_window:2662 (1) Pass: 3754275 Run:3754280 BUG at check_empty_area_window:2663 (1) Pass: 3754275 Run:3754281 maple_tree: 3804518 of 3804524 tests passed So the full test took more than 20 minutes... > MT_BUG_ON(mt, mas.last != ULONG_MAX); > > val = mas_prev(&mas, 0); Gr{oetje,eeting}s, Geert -- Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert(a)linux-m68k.org In personal conversations with technical people, I call myself a hacker. But when I'm talking to journalists I just say "programmer" or something like that. -- Linus Torvalds

2 years, 6 months

3
4
0 0

[linux-next:master] BUILD REGRESSION fe57d0d86f03a8b2afe2869a95477d0ed1824c96

by kernel test robot

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master branch HEAD: fe57d0d86f03a8b2afe2869a95477d0ed1824c96 Add linux-next specific files for 20230710 Error/Warning reports: https://lore.kernel.org/oe-kbuild-all/202306122223.HHER4zOo-lkp@intel.com https://lore.kernel.org/oe-kbuild-all/202306141719.MJHClSrC-lkp@intel.com https://lore.kernel.org/oe-kbuild-all/202306291857.nyJjYwqk-lkp@intel.com Error/Warning: (recently discovered and may have been fixed) arch/parisc/kernel/pdt.c:67:6: warning: no previous prototype for 'arch_report_meminfo' [-Wmissing-prototypes] drivers/gpu/drm/i915/soc/intel_gmch.c:41:13: error: variable 'mchbar_addr' set but not used [-Werror=unused-but-set-variable] lib/kunit/executor_test.c:138:4: warning: cast from 'void (*)(const void *)' to 'kunit_action_t *' (aka 'void (*)(void *)') converts to incompatible function type [-Wcast-function-type-strict] lib/kunit/test.c:775:38: warning: cast from 'void (*)(const void *)' to 'kunit_action_t *' (aka 'void (*)(void *)') converts to incompatible function type [-Wcast-function-type-strict] Unverified Error/Warning (likely false positive, please contact us if interested): drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c:98 mlx5_devcom_register_device() error: uninitialized symbol 'tmp_dev'. net/wireless/scan.c:373 cfg80211_gen_new_ie() warn: potential spectre issue 'sub->data' [r] net/wireless/scan.c:397 cfg80211_gen_new_ie() warn: possible spectre second half. 'ext_id' {standard input}: Error: local label `"2" (instance number 9 of a fb label)' is not defined Error/Warning ids grouped by kconfigs: gcc_recent_errors |-- i386-buildonly-randconfig-r006-20230710 | `-- drivers-gpu-drm-i915-soc-intel_gmch.c:error:variable-mchbar_addr-set-but-not-used |-- parisc-randconfig-r004-20230710 | `-- arch-parisc-kernel-pdt.c:warning:no-previous-prototype-for-arch_report_meminfo |-- parisc-randconfig-r024-20230710 | `-- arch-parisc-kernel-pdt.c:warning:no-previous-prototype-for-arch_report_meminfo |-- parisc-randconfig-r031-20230710 | `-- arch-parisc-kernel-pdt.c:warning:no-previous-prototype-for-arch_report_meminfo |-- riscv-randconfig-r071-20230710 | |-- arch-riscv-kernel-signal.c:sparse:sparse:incorrect-type-in-assignment-(different-address-spaces)-expected-void-noderef-__user-datap-got-void | `-- arch-riscv-kernel-signal.c:sparse:sparse:incorrect-type-in-initializer-(different-address-spaces)-expected-void-__x-got-void-noderef-__user-assigned-datap |-- sh-allmodconfig | `-- standard-input:Error:local-label-(instance-number-of-a-fb-label)-is-not-defined `-- x86_64-randconfig-m001-20230710 |-- drivers-net-ethernet-mellanox-mlx5-core-lib-devcom.c-mlx5_devcom_register_device()-error:uninitialized-symbol-tmp_dev-. |-- net-wireless-scan.c-cfg80211_gen_new_ie()-warn:possible-spectre-second-half.-ext_id `-- net-wireless-scan.c-cfg80211_gen_new_ie()-warn:potential-spectre-issue-sub-data-r clang_recent_errors |-- hexagon-randconfig-r041-20230710 | `-- lib-kunit-test.c:warning:cast-from-void-(-)(const-void-)-to-kunit_action_t-(aka-void-(-)(void-)-)-converts-to-incompatible-function-type |-- hexagon-randconfig-r045-20230710 | `-- lib-kunit-test.c:warning:cast-from-void-(-)(const-void-)-to-kunit_action_t-(aka-void-(-)(void-)-)-converts-to-incompatible-function-type `-- riscv-randconfig-r042-20230710 |-- lib-kunit-executor_test.c:warning:cast-from-void-(-)(const-void-)-to-kunit_action_t-(aka-void-(-)(void-)-)-converts-to-incompatible-function-type `-- lib-kunit-test.c:warning:cast-from-void-(-)(const-void-)-to-kunit_action_t-(aka-void-(-)(void-)-)-converts-to-incompatible-function-type elapsed time: 725m configs tested: 133 configs skipped: 5 tested configs: alpha allyesconfig gcc alpha defconfig gcc alpha randconfig-r014-20230710 gcc alpha randconfig-r016-20230710 gcc alpha randconfig-r021-20230710 gcc arc alldefconfig gcc arc allyesconfig gcc arc defconfig gcc arc hsdk_defconfig gcc arc nsimosci_defconfig gcc arc randconfig-r002-20230710 gcc arc randconfig-r011-20230710 gcc arc randconfig-r035-20230710 gcc arc randconfig-r043-20230710 gcc arc vdk_hs38_smp_defconfig gcc arm allmodconfig gcc arm allyesconfig gcc arm bcm2835_defconfig clang arm defconfig gcc arm integrator_defconfig gcc arm jornada720_defconfig gcc arm mmp2_defconfig clang arm multi_v4t_defconfig gcc arm pxa3xx_defconfig gcc arm randconfig-r025-20230710 gcc arm randconfig-r046-20230710 gcc arm socfpga_defconfig clang arm spear13xx_defconfig clang arm vt8500_v6_v7_defconfig clang arm64 allyesconfig gcc arm64 defconfig gcc arm64 randconfig-r006-20230710 gcc arm64 randconfig-r035-20230710 gcc arm64 randconfig-r036-20230710 gcc csky defconfig gcc csky randconfig-r015-20230710 gcc csky randconfig-r034-20230710 gcc hexagon randconfig-r041-20230710 clang hexagon randconfig-r045-20230710 clang i386 allyesconfig gcc i386 buildonly-randconfig-r006-20230710 gcc i386 debian-10.3 gcc i386 defconfig gcc i386 randconfig-i006-20230710 gcc i386 randconfig-i011-20230710 clang i386 randconfig-i012-20230710 clang i386 randconfig-i013-20230710 clang i386 randconfig-i014-20230710 clang i386 randconfig-i015-20230710 clang i386 randconfig-i016-20230710 clang i386 randconfig-r036-20230710 gcc loongarch allmodconfig gcc loongarch allnoconfig gcc loongarch defconfig gcc loongarch randconfig-r005-20230710 gcc m68k allmodconfig gcc m68k allyesconfig gcc m68k amiga_defconfig gcc m68k defconfig gcc m68k m5275evb_defconfig gcc m68k m5475evb_defconfig gcc m68k randconfig-r033-20230710 gcc microblaze randconfig-r026-20230710 gcc mips allmodconfig gcc mips allyesconfig gcc mips decstation_64_defconfig gcc mips jazz_defconfig gcc mips malta_defconfig clang mips maltaup_defconfig clang mips vocore2_defconfig gcc nios2 10m50_defconfig gcc nios2 defconfig gcc parisc allyesconfig gcc parisc defconfig gcc parisc randconfig-r004-20230710 gcc parisc randconfig-r024-20230710 gcc parisc randconfig-r031-20230710 gcc parisc64 defconfig gcc powerpc allmodconfig gcc powerpc allnoconfig gcc powerpc ge_imp3a_defconfig clang powerpc icon_defconfig clang powerpc iss476-smp_defconfig gcc powerpc kilauea_defconfig clang powerpc maple_defconfig gcc powerpc mpc512x_defconfig clang powerpc mpc83xx_defconfig gcc powerpc pasemi_defconfig gcc powerpc ppc40x_defconfig gcc powerpc ppc44x_defconfig clang powerpc randconfig-r032-20230710 gcc powerpc sam440ep_defconfig gcc riscv allmodconfig gcc riscv allnoconfig gcc riscv allyesconfig gcc riscv defconfig gcc riscv randconfig-r031-20230710 gcc riscv randconfig-r042-20230710 clang riscv rv32_defconfig gcc s390 alldefconfig clang s390 allmodconfig gcc s390 allyesconfig gcc s390 defconfig gcc s390 randconfig-r044-20230710 clang sh allmodconfig gcc sh j2_defconfig gcc sh randconfig-r023-20230710 gcc sh rsk7203_defconfig gcc sh rsk7269_defconfig gcc sh rts7751r2d1_defconfig gcc sh se7721_defconfig gcc sh sh7710voipgw_defconfig gcc sparc allyesconfig gcc sparc defconfig gcc sparc64 randconfig-r012-20230710 gcc um allmodconfig clang um allnoconfig clang um allyesconfig clang um defconfig gcc um i386_defconfig gcc um randconfig-r003-20230710 clang um randconfig-r033-20230710 clang um x86_64_defconfig gcc x86_64 allyesconfig gcc x86_64 defconfig gcc x86_64 kexec gcc x86_64 randconfig-r013-20230710 clang x86_64 randconfig-r032-20230710 gcc x86_64 randconfig-x006-20230710 clang x86_64 randconfig-x016-20230710 gcc x86_64 rhel-8.3-rust clang x86_64 rhel-8.3 gcc xtensa randconfig-r035-20230710 gcc -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki

2 years, 6 months

1
0
0 0

[PATCH v0] selftests/bpf: replace manual array size calc with ARRAYSIZE.

by Mahmoud Maatuq

fixes coccinelle warnings due to manual calculation of array size. Signed-off-by: Mahmoud Maatuq <mahmoudmatook.mm(a)gmail.com> --- tools/testing/selftests/bpf/progs/syscall.c | 6 +++++- tools/testing/selftests/bpf/progs/test_rdonly_maps.c | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/syscall.c b/tools/testing/selftests/bpf/progs/syscall.c index e550f728962d..9cc25d93c601 100644 --- a/tools/testing/selftests/bpf/progs/syscall.c +++ b/tools/testing/selftests/bpf/progs/syscall.c @@ -27,6 +27,10 @@ struct args { BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \ BTF_INT_ENC(encoding, bits_offset, bits) +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) +#endif + static int btf_load(void) { struct btf_blob { @@ -82,7 +86,7 @@ int bpf_prog(struct args *ctx) static __u64 value = 34; static union bpf_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, - .insn_cnt = sizeof(insns) / sizeof(insns[0]), + .insn_cnt = ARRAY_SIZE(insns), }; int ret; diff --git a/tools/testing/selftests/bpf/progs/test_rdonly_maps.c b/tools/testing/selftests/bpf/progs/test_rdonly_maps.c index fc8e8a34a3db..7431936ab26c 100644 --- a/tools/testing/selftests/bpf/progs/test_rdonly_maps.c +++ b/tools/testing/selftests/bpf/progs/test_rdonly_maps.c @@ -21,6 +21,10 @@ struct { unsigned sum; } res = {}; +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) +#endif + SEC("raw_tracepoint/sys_enter:skip_loop") int skip_loop(struct pt_regs *ctx) { @@ -64,7 +68,7 @@ int full_loop(struct pt_regs *ctx) { /* prevent compiler to optimize everything out */ unsigned * volatile p = (void *)&rdonly_values.a; - int i = sizeof(rdonly_values.a) / sizeof(rdonly_values.a[0]); + int i = ARRAY_SIZE(rdonly_values.a); unsigned iters = 0, sum = 0; /* validate verifier can allow full loop as well */ -- 2.34.1

2 years, 6 months

1
0
0 0

[PATCH v4 00/18] selftests/nolibc: allow run with minimal kernel config

by Zhangjin Wu

Hi, Willy This v4 mainly uses the argv0 suggested by you, at the same time, a new run-libc-test target is added for glibc and musl, and the RB_ flags are added for nolibc to allow compile nolibc-test.c without <linux/reboot.h> for glibc, musl and nolibc (mainly for musl-gcc, without -I /path/to/sysroot). This patchset is based on the 20230705-nolibc-series2 branch of nolibc repo [2], it must be applied after our v6 __sysret series [3] (argv0 exported there) and Thomas' chmod_net removal patchset [4] (the new chmod_argv0 is added at the same line of chmod_net, will conflict). This patchset assumes the chmod_net removal patchset will be applied at first, if not, the chmod_argv0 added alphabetically will not be applied. Since our new chmod_argv0 is exactly added to replace chmod_net, so, Willy, is it ok for you to at least apply the chmod_net removal patch [5] before this patchset? selftests/nolibc: drop test chmod_net This patchset is tested together with the v6 __sysret series [3]: arch/board | result ------------|------------ arm/vexpress-a9 | 142 test(s) passed, 1 skipped, 0 failed. arm/virt | 142 test(s) passed, 1 skipped, 0 failed. aarch64/virt | 142 test(s) passed, 1 skipped, 0 failed. ppc/g3beige | not supported ppc/ppce500 | not supported i386/pc | 142 test(s) passed, 1 skipped, 0 failed. x86_64/pc | 142 test(s) passed, 1 skipped, 0 failed. mipsel/malta | 142 test(s) passed, 1 skipped, 0 failed. loongarch64/virt | 142 test(s) passed, 1 skipped, 0 failed. riscv64/virt | 142 test(s) passed, 1 skipped, 0 failed. riscv32/virt | 0 test(s) passed, 0 skipped, 0 failed. s390x/s390-ccw-virtio | 142 test(s) passed, 1 skipped, 0 failed. If use tinyconfig + basic console options (means disable all of the other options, include procfs, shmem, tmpfs, net and memfd_create, to save test time, only randomly choose 4 archs): ... LOG: testing report for loongarch64/virt: 15 chmod_self [SKIPPED] 16 chown_self [SKIPPED] 40 link_cross [SKIPPED] 0 -fstackprotector not supported [SKIPPED] 139 test(s) passed, 4 skipped, 0 failed. See all results in /labs/linux-lab/logging/nolibc/loongarch64-virt-nolibc-test.log LOG: testing summary: arch/board | result ------------|------------ arm/vexpress-a9 | 139 test(s) passed, 4 skipped, 0 failed. x86_64/pc | 139 test(s) passed, 4 skipped, 0 failed. mipsel/malta | 139 test(s) passed, 4 skipped, 0 failed. loongarch64/virt | 139 test(s) passed, 4 skipped, 0 failed. Changes from v3 --> v4: * selftests/nolibc: stat_fault: silence NULL argument warning with glibc selftests/nolibc: gettid: restore for glibc and musl selftests/nolibc: add _LARGEFILE64_SOURCE for musl selftests/nolibc: fix up int_fast16/32_t test cases for musl selftests/nolibc: fix up kernel parameters support selftests/nolibc: link_cross: use /proc/self/cmdline tools/nolibc: add rmdir() support selftests/nolibc: add a new rmdir() test case selftests/nolibc: fix up failures when CONFIG_PROC_FS=n selftests/nolibc: prepare /tmp for tmpfs or ramfs selftests/nolibc: vfprintf: remove MEMFD_CREATE dependency No change. * selftests/nolibc: add run-libc-test target New run and report for glibc or musl. for musl, we can simply issue: $ make run-libc-test CC=/path/to/musl-install/bin/musl-gcc * tools/nolibc: types.h: add RB_ flags for reboot() selftests/nolibc: prefer <sys/reboot.h> to <linux/reboot.h> Required by musl to compile nolibc-test.c without -I/path/to/sysroot * selftests/nolibc: chdir_root: restore current path after test restore current path to prevent breakage of using relative path * selftests/nolibc: stat_timestamps: remove procfs dependency selftests/nolibc: chroot_exe: remove procfs dependency selftests/nolibc: add chmod_argv0 test use argv0 instead of '/init' as before. Best regards, Zhangjin --- [1]: https://lore.kernel.org/lkml/cover.1688134399.git.falcon@tinylab.org/ [2]: https://git.kernel.org/pub/scm/linux/kernel/git/wtarreau/nolibc.git [3]: https://lore.kernel.org/lkml/cover.1688739492.git.falcon@tinylab.org/ [4]: https://lore.kernel.org/lkml/20230624-proc-net-setattr-v1-0-73176812adee@we… [5]: https://lore.kernel.org/lkml/20230624-proc-net-setattr-v1-1-73176812adee@we… Zhangjin Wu (18): selftests/nolibc: add run-libc-test target selftests/nolibc: stat_fault: silence NULL argument warning with glibc selftests/nolibc: gettid: restore for glibc and musl selftests/nolibc: add _LARGEFILE64_SOURCE for musl selftests/nolibc: fix up int_fast16/32_t test cases for musl tools/nolibc: types.h: add RB_ flags for reboot() selftests/nolibc: prefer <sys/reboot.h> to <linux/reboot.h> selftests/nolibc: fix up kernel parameters support selftests/nolibc: link_cross: use /proc/self/cmdline tools/nolibc: add rmdir() support selftests/nolibc: add a new rmdir() test case selftests/nolibc: fix up failures when CONFIG_PROC_FS=n selftests/nolibc: prepare /tmp for tmpfs or ramfs selftests/nolibc: vfprintf: remove MEMFD_CREATE dependency selftests/nolibc: chdir_root: restore current path after test selftests/nolibc: stat_timestamps: remove procfs dependency selftests/nolibc: chroot_exe: remove procfs dependency selftests/nolibc: add chmod_argv0 test tools/include/nolibc/sys.h | 23 ++++- tools/include/nolibc/types.h | 12 ++- tools/testing/selftests/nolibc/Makefile | 4 + tools/testing/selftests/nolibc/nolibc-test.c | 88 +++++++++++++++----- 4 files changed, 104 insertions(+), 23 deletions(-) -- 2.25.1

2 years, 6 months

2
24
0 0

[PATCH] selftests: hid: fix vmtests.sh not running make headers

by Benjamin Tissoires

According to commit 01d6c48a828b ("Documentation: kselftest: "make headers" is a prerequisite"), running the kselftests requires to run "make headers" first. Do that in "vmtest.sh" as well to fix the HID CI. Signed-off-by: Benjamin Tissoires <bentiss(a)kernel.org> --- Looks like the new master branch (v6.5-rc1) broke my CI. And given that `make headers` is now a requisite to run the kselftests, also include that command in vmtests.sh. Broken CI job: https://gitlab.freedesktop.org/bentiss/hid/-/jobs/44704436 Fixed CI job: https://gitlab.freedesktop.org/bentiss/hid/-/jobs/45151040 --- tools/testing/selftests/hid/vmtest.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/hid/vmtest.sh b/tools/testing/selftests/hid/vmtest.sh index 681b906b4853..4da48bf6b328 100755 --- a/tools/testing/selftests/hid/vmtest.sh +++ b/tools/testing/selftests/hid/vmtest.sh @@ -79,6 +79,7 @@ recompile_kernel() cd "${kernel_checkout}" ${make_command} olddefconfig + ${make_command} headers ${make_command} } --- base-commit: 0e382fa72bbf0610be40af9af9b03b0cd149df82 change-id: 20230709-fix-selftests-c8b0bdff1d20 Best regards, -- Benjamin Tissoires <bentiss(a)kernel.org>

2 years, 6 months

2
2
0 0

[PATCH v1 0/5] selftests/nolibc: report: print test status

by Zhangjin Wu

Hi, Willy As you suggested, the 'status: [success|warning|failure]' info is added to the summary line, with additional newlines around this line to extrude the status info. at the same time, the total tests is printed, the passed, skipped and failed values are aligned with '%03d'. This patchset is based on 20230705-nolibc-series2 of nolibc repo[1]. The test result looks like: ... 138 test(s): 135 passed, 002 skipped, 001 failed => status: failure See all results in /labs/linux-lab/src/linux-stable/tools/testing/selftests/nolibc/run.out Or: ... 137 test(s): 134 passed, 003 skipped, 000 failed => status: warning See all results in /labs/linux-lab/src/linux-stable/tools/testing/selftests/nolibc/run.out Best regards, Zhangjin --- [1]: https://git.kernel.org/pub/scm/linux/kernel/git/wtarreau/nolibc.git Zhangjin Wu (5): selftests/nolibc: report: print a summarized test status selftests/nolibc: report: print total tests selftests/nolibc: report: align passed, skipped and failed selftests/nolibc: report: extrude the test status line selftests/nolibc: report: add newline before test failures tools/testing/selftests/nolibc/Makefile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) -- 2.25.1

2 years, 6 months

2
13
0 0

[linux-next:master] BUILD REGRESSION 296d53d8f84ce50ffaee7d575487058c8d437335

by kernel test robot

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master branch HEAD: 296d53d8f84ce50ffaee7d575487058c8d437335 Add linux-next specific files for 20230703 Error/Warning reports: https://lore.kernel.org/oe-kbuild-all/202306122223.HHER4zOo-lkp@intel.com https://lore.kernel.org/oe-kbuild-all/202306151954.Rsz6HP7h-lkp@intel.com https://lore.kernel.org/oe-kbuild-all/202306301709.lvrxzyCj-lkp@intel.com https://lore.kernel.org/oe-kbuild-all/202306301756.x8dgyYnL-lkp@intel.com Error/Warning: (recently discovered and may have been fixed) arch/parisc/kernel/pdt.c:66:6: warning: no previous prototype for 'arch_report_meminfo' [-Wmissing-prototypes] drivers/bluetooth/btmtk.c:386:32: error: no member named 'dump' in 'struct hci_dev' drivers/bluetooth/btmtk.c:386:44: error: 'struct hci_dev' has no member named 'dump' drivers/char/mem.c:164:25: error: implicit declaration of function 'unxlate_dev_mem_ptr'; did you mean 'xlate_dev_mem_ptr'? [-Werror=implicit-function-declaration] lib/kunit/executor_test.c:138:4: warning: cast from 'void (*)(const void *)' to 'kunit_action_t *' (aka 'void (*)(void *)') converts to incompatible function type [-Wcast-function-type-strict] lib/kunit/test.c:775:38: warning: cast from 'void (*)(const void *)' to 'kunit_action_t *' (aka 'void (*)(void *)') converts to incompatible function type [-Wcast-function-type-strict] Unverified Error/Warning (likely false positive, please contact us if interested): arch/arm64/kvm/mmu.c:147:3-9: preceding lock on line 140 drivers/clk/qcom/gpucc-sm8550.c:37:22: sparse: sparse: decimal constant 2300000000 is between LONG_MAX and ULONG_MAX. For C99 that means long long, C90 compilers are very likely to produce unsigned long (and a warning) here drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c:98 mlx5_devcom_register_device() error: uninitialized symbol 'tmp_dev'. drivers/usb/cdns3/cdns3-starfive.c:23: warning: expecting prototype for cdns3(). Prototype was for USB_STRAP_HOST() instead {standard input}: Error: local label `"2" (instance number 9 of a fb label)' is not defined Error/Warning ids grouped by kconfigs: gcc_recent_errors |-- alpha-randconfig-r025-20230703 | `-- drivers-bluetooth-btmtk.c:error:struct-hci_dev-has-no-member-named-dump |-- arm-randconfig-r073-20230703 | `-- drivers-clk-qcom-gpucc-sm8550.c:sparse:sparse:decimal-constant-is-between-LONG_MAX-and-ULONG_MAX.-For-C99-that-means-long-long-C90-compilers-are-very-likely-to-produce-unsigned-long-(and-a-warning)-he |-- arm64-randconfig-r054-20230703 | `-- arch-arm64-kvm-mmu.c:preceding-lock-on-line |-- i386-randconfig-m031-20230703 | `-- drivers-net-ethernet-mellanox-mlx5-core-lib-devcom.c-mlx5_devcom_register_device()-error:uninitialized-symbol-tmp_dev-. |-- parisc-allnoconfig | `-- arch-parisc-kernel-pdt.c:warning:no-previous-prototype-for-arch_report_meminfo |-- parisc-allyesconfig | `-- arch-parisc-kernel-pdt.c:warning:no-previous-prototype-for-arch_report_meminfo |-- parisc-defconfig | `-- arch-parisc-kernel-pdt.c:warning:no-previous-prototype-for-arch_report_meminfo |-- parisc-randconfig-r011-20230703 | `-- arch-parisc-kernel-pdt.c:warning:no-previous-prototype-for-arch_report_meminfo |-- parisc-randconfig-r035-20230703 | `-- arch-parisc-kernel-pdt.c:warning:no-previous-prototype-for-arch_report_meminfo |-- parisc64-defconfig | `-- arch-parisc-kernel-pdt.c:warning:no-previous-prototype-for-arch_report_meminfo |-- riscv-allmodconfig | `-- drivers-usb-cdns3-cdns3-starfive.c:warning:expecting-prototype-for-cdns3().-Prototype-was-for-USB_STRAP_HOST()-instead |-- riscv-allyesconfig | `-- drivers-usb-cdns3-cdns3-starfive.c:warning:expecting-prototype-for-cdns3().-Prototype-was-for-USB_STRAP_HOST()-instead |-- riscv-randconfig-r091-20230703 | |-- arch-riscv-kernel-signal.c:sparse:sparse:incorrect-type-in-initializer-(different-address-spaces)-expected-void-__val-got-void-noderef-__user-assigned-datap | `-- drivers-bluetooth-btmtk.c:error:struct-hci_dev-has-no-member-named-dump |-- sh-allmodconfig | |-- drivers-char-mem.c:error:implicit-declaration-of-function-unxlate_dev_mem_ptr | `-- standard-input:Error:local-label-(instance-number-of-a-fb-label)-is-not-defined |-- sh-randconfig-r015-20230703 | `-- drivers-char-mem.c:error:implicit-declaration-of-function-unxlate_dev_mem_ptr |-- sh-randconfig-r024-20230703 | `-- drivers-char-mem.c:error:implicit-declaration-of-function-unxlate_dev_mem_ptr |-- sh-se7619_defconfig | `-- drivers-char-mem.c:error:implicit-declaration-of-function-unxlate_dev_mem_ptr `-- x86_64-buildonly-randconfig-r003-20230703 `-- drivers-bluetooth-btmtk.c:error:struct-hci_dev-has-no-member-named-dump clang_recent_errors |-- arm-randconfig-r005-20230703 | `-- lib-kunit-test.c:warning:cast-from-void-(-)(const-void-)-to-kunit_action_t-(aka-void-(-)(void-)-)-converts-to-incompatible-function-type |-- arm-randconfig-r035-20230703 | |-- lib-kunit-executor_test.c:warning:cast-from-void-(-)(const-void-)-to-kunit_action_t-(aka-void-(-)(void-)-)-converts-to-incompatible-function-type | `-- lib-kunit-test.c:warning:cast-from-void-(-)(const-void-)-to-kunit_action_t-(aka-void-(-)(void-)-)-converts-to-incompatible-function-type |-- arm64-randconfig-r026-20230703 | `-- lib-kunit-test.c:warning:cast-from-void-(-)(const-void-)-to-kunit_action_t-(aka-void-(-)(void-)-)-converts-to-incompatible-function-type |-- hexagon-randconfig-r041-20230703 | |-- drivers-bluetooth-btmtk.c:error:no-member-named-dump-in-struct-hci_dev | |-- lib-kunit-executor_test.c:warning:cast-from-void-(-)(const-void-)-to-kunit_action_t-(aka-void-(-)(void-)-)-converts-to-incompatible-function-type | `-- lib-kunit-test.c:warning:cast-from-void-(-)(const-void-)-to-kunit_action_t-(aka-void-(-)(void-)-)-converts-to-incompatible-function-type |-- hexagon-randconfig-r045-20230703 | |-- lib-kunit-executor_test.c:warning:cast-from-void-(-)(const-void-)-to-kunit_action_t-(aka-void-(-)(void-)-)-converts-to-incompatible-function-type | `-- lib-kunit-test.c:warning:cast-from-void-(-)(const-void-)-to-kunit_action_t-(aka-void-(-)(void-)-)-converts-to-incompatible-function-type `-- i386-randconfig-i011-20230703 `-- drivers-bluetooth-btmtk.c:error:no-member-named-dump-in-struct-hci_dev elapsed time: 747m configs tested: 136 configs skipped: 5 tested configs: alpha alldefconfig gcc alpha allyesconfig gcc alpha defconfig gcc alpha randconfig-r002-20230703 gcc alpha randconfig-r025-20230703 gcc arc allyesconfig gcc arc defconfig gcc arc nsimosci_hs_defconfig gcc arc randconfig-r014-20230703 gcc arc randconfig-r043-20230703 gcc arm allmodconfig gcc arm allyesconfig gcc arm defconfig clang arm defconfig gcc arm lpc32xx_defconfig clang arm omap1_defconfig clang arm pxa168_defconfig clang arm randconfig-r005-20230703 clang arm randconfig-r035-20230703 clang arm randconfig-r046-20230703 gcc arm socfpga_defconfig clang arm wpcm450_defconfig gcc arm64 alldefconfig gcc arm64 allyesconfig gcc arm64 defconfig gcc arm64 randconfig-r026-20230703 clang csky defconfig gcc csky randconfig-r006-20230703 gcc csky randconfig-r021-20230703 gcc hexagon randconfig-r041-20230703 clang hexagon randconfig-r045-20230703 clang i386 allyesconfig gcc i386 buildonly-randconfig-r004-20230703 gcc i386 buildonly-randconfig-r005-20230703 gcc i386 buildonly-randconfig-r006-20230703 gcc i386 debian-10.3 gcc i386 defconfig gcc i386 randconfig-i001-20230703 gcc i386 randconfig-i002-20230703 gcc i386 randconfig-i003-20230703 gcc i386 randconfig-i004-20230703 gcc i386 randconfig-i005-20230703 gcc i386 randconfig-i006-20230703 gcc i386 randconfig-i011-20230703 clang i386 randconfig-i012-20230703 clang i386 randconfig-i013-20230703 clang i386 randconfig-i014-20230703 clang i386 randconfig-i016-20230703 clang i386 randconfig-r036-20230703 gcc loongarch allmodconfig gcc loongarch allnoconfig gcc loongarch defconfig gcc m68k allmodconfig gcc m68k allyesconfig gcc m68k apollo_defconfig gcc m68k defconfig gcc m68k randconfig-r013-20230703 gcc microblaze defconfig gcc mips allmodconfig gcc mips allyesconfig gcc mips ar7_defconfig gcc mips gcw0_defconfig gcc mips ip32_defconfig gcc mips jazz_defconfig gcc mips lemote2f_defconfig clang mips randconfig-r023-20230703 gcc mips randconfig-r031-20230703 clang nios2 defconfig gcc openrisc randconfig-r034-20230703 gcc parisc allyesconfig gcc parisc defconfig gcc parisc randconfig-r011-20230703 gcc parisc randconfig-r035-20230703 gcc parisc64 defconfig gcc powerpc allmodconfig gcc powerpc allnoconfig gcc powerpc canyonlands_defconfig gcc powerpc ep8248e_defconfig gcc powerpc icon_defconfig clang powerpc klondike_defconfig gcc powerpc ksi8560_defconfig clang powerpc mpc5200_defconfig clang powerpc mpc83xx_defconfig gcc powerpc storcenter_defconfig gcc powerpc wii_defconfig gcc riscv alldefconfig clang riscv allmodconfig gcc riscv allnoconfig gcc riscv allyesconfig gcc riscv defconfig gcc riscv randconfig-r042-20230703 clang riscv rv32_defconfig gcc s390 allmodconfig gcc s390 allyesconfig gcc s390 defconfig gcc s390 randconfig-r004-20230703 gcc s390 randconfig-r033-20230703 gcc s390 randconfig-r044-20230703 clang sh allmodconfig gcc sh apsh4a3a_defconfig gcc sh randconfig-r015-20230703 gcc sh randconfig-r024-20230703 gcc sh randconfig-r031-20230703 gcc sh rts7751r2dplus_defconfig gcc sh se7619_defconfig gcc sh se7751_defconfig gcc sh sh7785lcr_32bit_defconfig gcc sparc allyesconfig gcc sparc defconfig gcc sparc randconfig-r022-20230703 gcc sparc64 randconfig-r001-20230703 gcc sparc64 randconfig-r033-20230703 gcc um allmodconfig clang um allnoconfig clang um allyesconfig clang um defconfig gcc um i386_defconfig gcc um randconfig-r034-20230703 clang um randconfig-r036-20230703 clang um x86_64_defconfig gcc x86_64 allyesconfig gcc x86_64 buildonly-randconfig-r001-20230703 gcc x86_64 buildonly-randconfig-r002-20230703 gcc x86_64 buildonly-randconfig-r003-20230703 gcc x86_64 defconfig gcc x86_64 kexec gcc x86_64 randconfig-x001-20230703 clang x86_64 randconfig-x002-20230703 clang x86_64 randconfig-x003-20230703 clang x86_64 randconfig-x004-20230703 clang x86_64 randconfig-x005-20230703 clang x86_64 randconfig-x006-20230703 clang x86_64 rhel-8.3-rust clang x86_64 rhel-8.3 gcc xtensa randconfig-r003-20230703 gcc xtensa randconfig-r012-20230703 gcc xtensa randconfig-r032-20230703 gcc -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki

2 years, 6 months

5
4
0 0

Re: [PATCH v3 03/10] eventfs: adding eventfs dir add functions

by Steven Rostedt

On Mon, 10 Jul 2023 02:17:01 +0000 Nadav Amit <namit(a)vmware.com> wrote: > > On Jul 9, 2023, at 6:54 PM, Steven Rostedt <rostedt(a)goodmis.org> wrote: > > > > + union { > > + struct rcu_head rcu; > > + struct llist_node llist; /* For freeing after RCU */ > > + }; > > The memory savings from using a union might not be worth the potential impact > of type confusion and bugs. It's also documentation. The two are related, as one is the hand off to the other. It's not a random union, and I'd like to leave it that way. -- Steve

2 years, 6 months

1
0
0 0

[PATCH] selftests/nolibc: simplify call to ioperm

by Thomas Weißschuh

Since commit 53fcfafa8c5c ("tools/nolibc/unistd: add syscall()") nolibc has support for syscall(2). Use it to get rid of some ifdef-ery. Signed-off-by: Thomas Weißschuh <linux(a)weissschuh.net> --- tools/testing/selftests/nolibc/nolibc-test.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 486334981e60..c02d89953679 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -1051,11 +1051,7 @@ int main(int argc, char **argv, char **envp) * exit with status code 2N+1 when N is written to 0x501. We * hard-code the syscall here as it's arch-dependent. */ -#if defined(_NOLIBC_SYS_H) - else if (my_syscall3(__NR_ioperm, 0x501, 1, 1) == 0) -#else - else if (ioperm(0x501, 1, 1) == 0) -#endif + else if (syscall(__NR_ioperm, 0x501, 1, 1) == 0) __asm__ volatile ("outb %%al, %%dx" :: "d"(0x501), "a"(0)); /* if it does nothing, fall back to the regular panic */ #endif --- base-commit: a901a3568fd26ca9c4a82d8bc5ed5b3ed844d451 change-id: 20230703-nolibc-ioperm-88d87ae6d5e9 Best regards, -- Thomas Weißschuh <linux(a)weissschuh.net>

2 years, 6 months

2
1
0 0

[PATCH v4 0/4] RISC-V: mm: Make SV48 the default address space

by Charlie Jenkins

Make sv48 the default address space for mmap as some applications currently depend on this assumption. Also enable users to select desired address space using a non-zero hint address to mmap. Previous kernel changes caused Java and other applications to be broken on sv57 which this patch fixes. Documentation is also added to the RISC-V virtual memory section to explain these changes. -Charlie --- v4: - Split testcases/document patch into test cases, in-code documentation, and formal documentation patches - Modified the mmap_base macro to be more legible and better represent memory layout - Fixed documentation to better reflect the implmentation - Renamed DEFAULT_VA_BITS to MMAP_VA_BITS - Added additional test case for rlimit changes --- Charlie Jenkins (4): RISC-V: mm: Restrict address space for sv39,sv48,sv57 RISC-V: mm: Add tests for RISC-V mm RISC-V: mm: Update pgtable comment documentation RISC-V: mm: Document mmap changes Documentation/riscv/vm-layout.rst | 22 +++ arch/riscv/include/asm/elf.h | 2 +- arch/riscv/include/asm/pgtable.h | 21 ++- arch/riscv/include/asm/processor.h | 43 +++++- tools/testing/selftests/riscv/Makefile | 2 +- tools/testing/selftests/riscv/mm/.gitignore | 1 + tools/testing/selftests/riscv/mm/Makefile | 21 +++ .../selftests/riscv/mm/testcases/mmap.c | 133 ++++++++++++++++++ 8 files changed, 232 insertions(+), 13 deletions(-) create mode 100644 tools/testing/selftests/riscv/mm/.gitignore create mode 100644 tools/testing/selftests/riscv/mm/Makefile create mode 100644 tools/testing/selftests/riscv/mm/testcases/mmap.c -- 2.41.0

2 years, 6 months

2
5
0 0

[PATCH v3 0/8] add UFFDIO_POISON to simulate memory poisoning with UFFD

by Axel Rasmussen

This series adds a new userfaultfd feature, UFFDIO_POISON. See commit 4 for a detailed description of the feature. The series is based on Linus master (partial 6.5 merge window), and structured like this: - Patches 1-3 are preparation / refactoring - Patches 4-6 implement and advertise the new feature - Patches 7-8 implement a unit test for the new feature Changelog: v2 -> v3: - Rebase onto current Linus master. - Don't overwrite existing PTE markers for non-hugetlb UFFDIO_POISON. Before, non-hugetlb would override them, but hugetlb would not. I don't think there's a use case where we *want* to override a UFFD_WP marker for example, so take the more conservative behavior for all kinds of memory. - [Peter] Drop hugetlb mfill atomic refactoring, since it isn't needed for this series (we don't touch that code directly anyway). - [Peter] Switch to re-using PTE_MARKER_SWAPIN_ERROR instead of defining new PTE_MARKER_UFFD_POISON. - [Peter] Extract start / len range overflow check into existing validate_range helper; this fixes the style issue of unnecessary braces in the UFFDIO_POISON implementation, because this code is just deleted. - [Peter] Extract file size check out into a new helper. - [Peter] Defer actually "enabling" the new feature until the last commit in the series; combine this with adding the documentation. As a consequence, move the selftest commits after this one. - [Randy] Fix typo in documentation. v1 -> v2: - [Peter] Return VM_FAULT_HWPOISON not VM_FAULT_SIGBUS, to yield the correct behavior for KVM (guest MCE). - [Peter] Rename UFFDIO_SIGBUS to UFFDIO_POISON. - [Peter] Implement hugetlbfs support for UFFDIO_POISON. Axel Rasmussen (8): mm: make PTE_MARKER_SWAPIN_ERROR more general mm: userfaultfd: check for start + len overflow in validate_range mm: userfaultfd: extract file size check out into a helper mm: userfaultfd: add new UFFDIO_POISON ioctl mm: userfaultfd: support UFFDIO_POISON for hugetlbfs mm: userfaultfd: document and enable new UFFDIO_POISON feature selftests/mm: refactor uffd_poll_thread to allow custom fault handlers selftests/mm: add uffd unit test for UFFDIO_POISON Documentation/admin-guide/mm/userfaultfd.rst | 15 +++ fs/userfaultfd.c | 73 ++++++++++-- include/linux/mm_inline.h | 19 +++ include/linux/swapops.h | 10 +- include/linux/userfaultfd_k.h | 4 + include/uapi/linux/userfaultfd.h | 25 +++- mm/hugetlb.c | 51 ++++++-- mm/madvise.c | 2 +- mm/memory.c | 15 ++- mm/mprotect.c | 4 +- mm/shmem.c | 4 +- mm/swapfile.c | 2 +- mm/userfaultfd.c | 83 ++++++++++--- tools/testing/selftests/mm/uffd-common.c | 5 +- tools/testing/selftests/mm/uffd-common.h | 3 + tools/testing/selftests/mm/uffd-stress.c | 12 +- tools/testing/selftests/mm/uffd-unit-tests.c | 117 +++++++++++++++++++ 17 files changed, 377 insertions(+), 67 deletions(-) -- 2.41.0.255.g8b1d071c50-goog

2 years, 6 months

2
18
0 0

ww_mutex.sh hangs since v5.16-rc1

by Li Zhijian

Hi Folks LKP/0Day found that ww_mutex.sh cannot complete since v5.16-rc1, but I'm pretty sorry that we failed to bisect the FBC, instead, the bisection pointed to a/below merge commit(91e1c99e17) finally. Due to this hang, other tests in the same group are also blocked in 0Day, we hope we can fix this hang ASAP. So if you have any idea about this, or need more debug information, feel free to let me know :) BTW, ww_mutex.sh was failed in v5.15 without hang, and looks it cannot reproduce on a vm. Our box: root@lkp-knm01 ~# lscpu Architecture: x86_64 CPU op-mode(s): 32-bit, 64-bit Byte Order: Little Endian Address sizes: 46 bits physical, 48 bits virtual CPU(s): 288 On-line CPU(s) list: 0-287 Thread(s) per core: 4 Core(s) per socket: 72 Socket(s): 1 NUMA node(s): 2 Vendor ID: GenuineIntel CPU family: 6 Model: 133 Model name: Intel(R) Xeon Phi(TM) CPU 7295 @ 1.50GHz Stepping: 0 CPU MHz: 1385.255 CPU max MHz: 1600.0000 CPU min MHz: 1000.0000 BogoMIPS: 2992.76 Virtualization: VT-x L1d cache: 32K L1i cache: 32K L2 cache: 1024K NUMA node0 CPU(s): 0-287 NUMA node1 CPU(s): Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx est tm2 ssse3 fma cx16 xtpr pdcm sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch ring3mwait cpuid_fault epb pti tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms avx512f rdseed adx avx512pf avx512er avx512cd xsaveopt dtherm ida arat pln pts avx512_vpopcntdq avx512_4vnniw avx512_4fmaps Below the call stack in v5.16-rc2 [ 1000.374954][ T2713] make: Leaving directory '/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-136057256686de39cc3a07c2e39ef6bc43003ff6/tools/testing/selftests/locking' [ 1000.375030][ T2713] [ 1000.428791][ T2713] 2021-11-22 22:21:27 make run_tests -C locking [ 1000.428864][ T2713] [ 1000.491043][ T2713] make: Entering directory '/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-136057256686de39cc3a07c2e39ef6bc43003ff6/tools/testing/selftests/locking' [ 1000.491121][ T2713] [ 1000.540807][ T2713] TAP version 13 [ 1000.540882][ T2713] [ 1000.576050][ T2713] 1..1 [ 1000.576282][ T2713] [ 1000.612980][ T2713] # selftests: locking: ww_mutex.sh [ 1000.613288][ T2713] [ 1495.201324][ T1577] INFO: task kworker/u576:16:1470 blocked for more than 491 seconds. [ 1495.220059][ T1577] Tainted: G B 5.16.0-rc2 #1 [ 1495.240902][ T1577] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 1495.265617][ T1577] task:kworker/u576:16 state:D stack: 0 pid: 1470 ppid: 2 flags:0x00004000 [ 1495.289054][ T1577] Workqueue: test-ww_mutex test_cycle_work [test_ww_mutex] [ 1495.310936][ T1577] Call Trace: [ 1495.327809][ T1577] <TASK> [ 1495.344735][ T1577] __schedule+0xdb0/0x25c0 [ 1495.362764][ T1577] ? io_schedule_timeout+0x180/0x180 [ 1495.382013][ T1577] ? lock_downgrade+0x680/0x680 [ 1495.400894][ T1577] ? do_raw_spin_lock+0x125/0x2c0 [ 1495.418866][ T1577] schedule+0xe4/0x280 [ 1495.435597][ T1577] schedule_preempt_disabled+0x18/0x40 [ 1495.454588][ T1577] __ww_mutex_lock+0x1248/0x34c0 [ 1495.476189][ T1577] ? test_cycle_work+0x1bb/0x500 [test_ww_mutex] [ 1495.497763][ T1577] ? mutex_lock_interruptible_nested+0x40/0x40 [ 1495.518959][ T1577] ? lock_downgrade+0x680/0x680 [ 1495.536861][ T1577] ? wait_for_completion_interruptible+0x340/0x340 [ 1495.556253][ T1577] ? ww_mutex_lock+0x3e/0x380 [ 1495.574003][ T1577] ww_mutex_lock+0x3e/0x380 [ 1495.591958][ T1577] test_cycle_work+0x1bb/0x500 [test_ww_mutex] [ 1495.612260][ T1577] ? stress_reorder_work+0xa00/0xa00 [test_ww_mutex] [ 1495.632857][ T1577] ? 0xffffffff81000000 [ 1495.649027][ T1577] ? rcu_read_lock_sched_held+0x5f/0x100 [ 1495.668211][ T1577] ? rcu_read_lock_bh_held+0xc0/0xc0 [ 1495.687010][ T1577] process_one_work+0x817/0x13c0 [ 1495.704991][ T1577] ? rcu_read_unlock+0x40/0x40 [ 1495.723024][ T1577] ? pwq_dec_nr_in_flight+0x280/0x280 [ 1495.740211][ T1577] ? rwlock_bug+0xc0/0xc0 [ 1495.758038][ T1577] worker_thread+0x8b/0xd80 [ 1495.775008][ T1577] ? process_one_work+0x13c0/0x13c0 [ 1495.793017][ T1577] kthread+0x3b9/0x4c0 [ 1495.810782][ T1577] ? set_kthread_struct+0x100/0x100 [ 1495.829988][ T1577] ret_from_fork+0x22/0x30 [ 1495.845811][ T1577] </TASK> [ 1495.859087][ T1577] INFO: task kworker/u576:36:1490 blocked for more than 492 seconds. [ 1495.879048][ T1577] Tainted: G B 5.16.0-rc2 #1 [ 1495.897879][ T1577] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 1495.919582][ T1577] task:kworker/u576:36 state:D stack: 0 pid: 1490 ppid: 2 flags:0x00004000 [ 1495.941865][ T1577] Workqueue: test-ww_mutex test_cycle_work [test_ww_mutex] [ 1495.959889][ T1577] Call Trace: [ 1495.974816][ T1577] <TASK> [ 1495.988759][ T1577] __schedule+0xdb0/0x25c0 [ 1495.988759][ T1577] __schedule+0xdb0/0x25c0 [ 1496.003849][ T1577] ? io_schedule_timeout+0x180/0x180 [ 1496.020839][ T1577] ? lock_downgrade+0x680/0x680 [ 1496.036854][ T1577] ? do_raw_spin_lock+0x125/0x2c0 [ 1496.051976][ T1577] schedule+0xe4/0x280 [ 1496.067780][ T1577] schedule_preempt_disabled+0x18/0x40 [ 1496.085004][ T1577] __ww_mutex_lock+0x1248/0x34c0 [ 1496.101895][ T1577] ? test_cycle_work+0x1bb/0x500 [test_ww_mutex] [ 1496.119889][ T1577] ? mutex_lock_interruptible_nested+0x40/0x40 [ 1496.137873][ T1577] ? lock_downgrade+0x680/0x680 [ 1496.152657][ T1577] ? wait_for_completion_interruptible+0x340/0x340 [ 1496.168773][ T1577] ? ww_mutex_lock+0x3e/0x380 [ 1496.184862][ T1577] ww_mutex_lock+0x3e/0x380 [ 1496.199979][ T1577] test_cycle_work+0x1bb/0x500 [test_ww_mutex] [ 1496.216277][ T1577] ? stress_reorder_work+0xa00/0xa00 [test_ww_mutex] [ 1496.234904][ T1577] ? 0xffffffff81000000 [ 1496.249856][ T1577] ? rcu_read_lock_sched_held+0x5f/0x100 [ 1496.265951][ T1577] ? rcu_read_lock_bh_held+0xc0/0xc0 [ 1496.282815][ T1577] process_one_work+0x817/0x13c0 [ 1496.299791][ T1577] ? rcu_read_unlock+0x40/0x40 [ 1496.314754][ T1577] ? pwq_dec_nr_in_flight+0x280/0x280 [ 1496.331779][ T1577] ? rwlock_bug+0xc0/0xc0 [ 1496.348007][ T1577] worker_thread+0x8b/0xd80 [ 1496.362905][ T1577] ? process_one_work+0x13c0/0x13c0 [ 1496.378975][ T1577] kthread+0x3b9/0x4c0 [ 1496.393866][ T1577] ? set_kthread_struct+0x100/0x100 [ 1496.408827][ T1577] ret_from_fork+0x22/0x30 [ 1496.423901][ T1577] </TASK> [ 1496.437994][ T1577] INFO: task kworker/u576:0:15113 blocked for more than 492 seconds. [ 1496.455862][ T1577] Tainted: G B 5.16.0-rc2 #1 [ 1496.473759][ T1577] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 1496.494808][ T1577] task:kworker/u576:0 state:D stack: 0 pid:15113 ppid: 2 flags:0x00004000 [ 1496.517000][ T1577] Workqueue: test-ww_mutex test_cycle_work [test_ww_mutex] [ 1496.537035][ T1577] Call Trace: [ 1496.551187][ T1577] <TASK> [ 1496.566405][ T1577] __schedule+0xdb0/0x25c0 [ 1496.582012][ T1577] ? io_schedule_timeout+0x180/0x180 [ 1496.598049][ T1577] ? lock_downgrade+0x680/0x680 [ 1496.615360][ T1577] ? do_raw_spin_lock+0x125/0x2c0 [ 1496.631835][ T1577] schedule+0xe4/0x280 [ 1496.645972][ T1577] schedule_preempt_disabled+0x18/0x40 [ 1496.663774][ T1577] __ww_mutex_lock+0x1248/0x34c0 [ 1496.681795][ T1577] ? test_cycle_work+0x1bb/0x500 [test_ww_mutex] [ 1496.698731][ T1577] ? mutex_lock_interruptible_nested+0x40/0x40 [ 1496.714996][ T1577] ? lock_downgrade+0x680/0x680 [ 1496.730888][ T1577] ? wait_for_completion_interruptible+0x340/0x340 [ 1496.747926][ T1577] ? ww_mutex_lock+0x3e/0x380 [ 1496.762482][ T1577] ww_mutex_lock+0x3e/0x380 [ 1496.778844][ T1577] test_cycle_work+0x1bb/0x500 [test_ww_mutex] And, we found that it occasionally hangs on v5.16-rc3 (1/3 runs), below is a good dmesg. [ 962.136756][ T2950] make: Entering directory '/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-d58071a8a76d779eedab38033ae4c821c30295a5/tools/testing/selftests/locking' [ 962.136831][ T2950]- [ 962.205036][ T2950] TAP version 13 [ 962.206003][ T2950]- [ 962.298458][ T2950] 1..1 [ 962.299657][ T2950]- [ 962.345588][ T2950] # selftests: locking: ww_mutex.sh [ 962.345657][ T2950]- [ 973.641869][T25509] All ww mutex selftests passed [ 973.773996][ T2950] # locking/ww_mutex: ok [ 973.774068][ T2950]- [ 973.774236][ T2960] # locking/ww_mutex: ok [ 973.802355][ T2960]- [ 973.829966][ T2950] ok 1 selftests: locking: ww_mutex.sh [ 973.834748][ T2950]- [ 973.838302][ T2960] ok 1 selftests: locking: ww_mutex.sh [ 973.899815][ T2960]- [ 973.921431][ T2950] make: Leaving directory '/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-d58071a8a76d779eedab38033ae4c821c30295a5/tools/testing/selftests/locking' [ 973.932312][ T2950]- [ 973.957345][ T2960] make: Leaving directory '/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-d58071a8a76d779eedab38033ae4c821c30295a5/tools/testing/selftests/locking' Thanks Zhijian@0Day

2 years, 6 months

3
3
0 0

[PATCH v2 bpf-next] selftests/bpf: Corrected two typos

by Lu Hongfei

When wrapping code, use ';' better than using ',' which is more in line with the coding habits of most engineers. Signed-off-by: Lu Hongfei <luhongfei(a)vivo.com> --- Compared to the previous version, the modifications made are: 1. Modified the subject to make it clearer and more accurate 2. Newly optimized typo in tcp_hdr_options.c tools/testing/selftests/bpf/benchs/bench_ringbufs.c | 2 +- tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c index 3ca14ad36607..e1ee979e6acc 100644 --- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c +++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c @@ -399,7 +399,7 @@ static void perfbuf_libbpf_setup(void) ctx->skel = perfbuf_setup_skeleton(); memset(&attr, 0, sizeof(attr)); - attr.config = PERF_COUNT_SW_BPF_OUTPUT, + attr.config = PERF_COUNT_SW_BPF_OUTPUT; attr.type = PERF_TYPE_SOFTWARE; attr.sample_type = PERF_SAMPLE_RAW; /* notify only every Nth sample */ diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c index 13bcaeb028b8..56685fc03c7e 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c @@ -347,7 +347,7 @@ static void syncookie_estab(void) exp_active_estab_in.max_delack_ms = 22; exp_passive_hdr_stg.syncookie = true; - exp_active_hdr_stg.resend_syn = true, + exp_active_hdr_stg.resend_syn = true; prepare_out(); -- 2.39.0

2 years, 6 months

4
3
0 0

[linux-next:master] BUILD REGRESSION 123212f53f3e394c1ae69a58c05dfdda56fec8c6

by kernel test robot

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master branch HEAD: 123212f53f3e394c1ae69a58c05dfdda56fec8c6 Add linux-next specific files for 20230707 Error/Warning reports: https://lore.kernel.org/oe-kbuild-all/202306122223.HHER4zOo-lkp@intel.com https://lore.kernel.org/oe-kbuild-all/202306240021.qNRc4iHW-lkp@intel.com Error/Warning: (recently discovered and may have been fixed) arch/arm64/kernel/entry-ftrace.S:59: Error: undefined symbol FTRACE_OPS_DIRECT_CALL used as an immediate value arch/parisc/kernel/pdt.c:67:6: warning: no previous prototype for 'arch_report_meminfo' [-Wmissing-prototypes] drivers/net/arcnet/arc-rimi.c:107:4: error: incompatible integer to pointer conversion assigning to 'void *' from 'int' [-Wint-conversion] drivers/net/arcnet/arc-rimi.c:107:6: error: call to undeclared function 'ioremap'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] drivers/net/arcnet/arc-rimi.c:113:3: error: call to undeclared function 'iounmap'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] drivers/pcmcia/cistpl.c:103:15: error: incompatible integer to pointer conversion assigning to 'void *' from 'int' [-Wint-conversion] drivers/pcmcia/cistpl.c:103:17: error: call to undeclared function 'ioremap'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] drivers/pcmcia/cistpl.c:72:3: error: call to undeclared function 'iounmap'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] drivers/tty/ipwireless/main.c:115:21: error: incompatible integer to pointer conversion assigning to 'void *' from 'int' [-Wint-conversion] drivers/tty/ipwireless/main.c:115:23: error: call to undeclared function 'ioremap'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] drivers/tty/ipwireless/main.c:155:2: error: call to undeclared function 'iounmap'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] lib/kunit/executor_test.c:138:4: warning: cast from 'void (*)(const void *)' to 'kunit_action_t *' (aka 'void (*)(void *)') converts to incompatible function type [-Wcast-function-type-strict] lib/kunit/test.c:775:38: warning: cast from 'void (*)(const void *)' to 'kunit_action_t *' (aka 'void (*)(void *)') converts to incompatible function type [-Wcast-function-type-strict] Unverified Error/Warning (likely false positive, please contact us if interested): {standard input}: Error: local label `"2" (instance number 9 of a fb label)' is not defined Error/Warning ids grouped by kconfigs: gcc_recent_errors |-- arm64-randconfig-r034-20230707 | `-- arch-arm64-kernel-entry-ftrace.S:Error:undefined-symbol-FTRACE_OPS_DIRECT_CALL-used-as-an-immediate-value |-- parisc-randconfig-r034-20230707 | `-- arch-parisc-kernel-pdt.c:warning:no-previous-prototype-for-arch_report_meminfo |-- parisc-randconfig-r081-20230703 | `-- arch-parisc-kernel-pdt.c:warning:no-previous-prototype-for-arch_report_meminfo `-- sh-allmodconfig `-- standard-input:Error:local-label-(instance-number-of-a-fb-label)-is-not-defined clang_recent_errors |-- arm-randconfig-r036-20230707 | `-- lib-kunit-test.c:warning:cast-from-void-(-)(const-void-)-to-kunit_action_t-(aka-void-(-)(void-)-)-converts-to-incompatible-function-type |-- hexagon-randconfig-r041-20230707 | |-- lib-kunit-executor_test.c:warning:cast-from-void-(-)(const-void-)-to-kunit_action_t-(aka-void-(-)(void-)-)-converts-to-incompatible-function-type | `-- lib-kunit-test.c:warning:cast-from-void-(-)(const-void-)-to-kunit_action_t-(aka-void-(-)(void-)-)-converts-to-incompatible-function-type `-- s390-randconfig-r013-20230707 |-- drivers-net-arcnet-arc-rimi.c:error:call-to-undeclared-function-ioremap-ISO-C99-and-later-do-not-support-implicit-function-declarations |-- drivers-net-arcnet-arc-rimi.c:error:call-to-undeclared-function-iounmap-ISO-C99-and-later-do-not-support-implicit-function-declarations |-- drivers-net-arcnet-arc-rimi.c:error:incompatible-integer-to-pointer-conversion-assigning-to-void-from-int |-- drivers-pcmcia-cistpl.c:error:call-to-undeclared-function-ioremap-ISO-C99-and-later-do-not-support-implicit-function-declarations |-- drivers-pcmcia-cistpl.c:error:call-to-undeclared-function-iounmap-ISO-C99-and-later-do-not-support-implicit-function-declarations |-- drivers-pcmcia-cistpl.c:error:incompatible-integer-to-pointer-conversion-assigning-to-void-from-int |-- drivers-tty-ipwireless-main.c:error:call-to-undeclared-function-ioremap-ISO-C99-and-later-do-not-support-implicit-function-declarations |-- drivers-tty-ipwireless-main.c:error:call-to-undeclared-function-iounmap-ISO-C99-and-later-do-not-support-implicit-function-declarations `-- drivers-tty-ipwireless-main.c:error:incompatible-integer-to-pointer-conversion-assigning-to-void-from-int elapsed time: 737m configs tested: 137 configs skipped: 5 tested configs: alpha alldefconfig gcc alpha allyesconfig gcc alpha defconfig gcc alpha randconfig-r036-20230707 gcc arc allyesconfig gcc arc axs103_defconfig gcc arc defconfig gcc arc randconfig-r035-20230707 gcc arc randconfig-r043-20230707 gcc arm allmodconfig gcc arm allyesconfig gcc arm assabet_defconfig gcc arm defconfig gcc arm h3600_defconfig gcc arm integrator_defconfig gcc arm randconfig-r021-20230707 gcc arm randconfig-r036-20230707 clang arm randconfig-r046-20230707 gcc arm64 allyesconfig gcc arm64 defconfig gcc arm64 randconfig-r023-20230707 clang arm64 randconfig-r034-20230707 gcc csky defconfig gcc csky randconfig-r002-20230707 gcc csky randconfig-r003-20230707 gcc csky randconfig-r032-20230707 gcc csky randconfig-r033-20230707 gcc hexagon randconfig-r041-20230707 clang hexagon randconfig-r045-20230707 clang i386 allyesconfig gcc i386 buildonly-randconfig-r004-20230707 gcc i386 buildonly-randconfig-r005-20230707 gcc i386 buildonly-randconfig-r006-20230707 gcc i386 debian-10.3 gcc i386 defconfig gcc i386 randconfig-i001-20230707 gcc i386 randconfig-i002-20230707 gcc i386 randconfig-i003-20230707 gcc i386 randconfig-i004-20230707 gcc i386 randconfig-i005-20230707 gcc i386 randconfig-i006-20230707 gcc i386 randconfig-i011-20230707 clang i386 randconfig-i012-20230707 clang i386 randconfig-i013-20230707 clang i386 randconfig-i014-20230707 clang i386 randconfig-i015-20230707 clang i386 randconfig-i016-20230707 clang loongarch allmodconfig gcc loongarch allnoconfig gcc loongarch defconfig gcc loongarch loongson3_defconfig gcc m68k allmodconfig gcc m68k allyesconfig gcc m68k amcore_defconfig gcc m68k defconfig gcc m68k randconfig-r015-20230707 gcc microblaze randconfig-r024-20230707 gcc mips allmodconfig gcc mips allyesconfig gcc mips ar7_defconfig gcc mips ip22_defconfig clang mips ip28_defconfig clang mips malta_defconfig clang mips rbtx49xx_defconfig clang nios2 defconfig gcc openrisc defconfig gcc openrisc randconfig-r011-20230707 gcc openrisc randconfig-r035-20230707 gcc parisc allyesconfig gcc parisc defconfig gcc parisc randconfig-r034-20230707 gcc parisc64 defconfig gcc powerpc allmodconfig gcc powerpc allnoconfig gcc powerpc chrp32_defconfig gcc powerpc ep88xc_defconfig gcc powerpc fsp2_defconfig clang powerpc motionpro_defconfig gcc powerpc ppa8548_defconfig clang powerpc randconfig-r031-20230707 gcc powerpc storcenter_defconfig gcc powerpc warp_defconfig gcc riscv allmodconfig gcc riscv allnoconfig gcc riscv allyesconfig gcc riscv defconfig gcc riscv randconfig-r005-20230707 gcc riscv randconfig-r042-20230707 clang riscv rv32_defconfig gcc s390 allmodconfig gcc s390 allyesconfig gcc s390 defconfig gcc s390 randconfig-r013-20230707 clang s390 randconfig-r014-20230707 clang s390 randconfig-r022-20230707 clang s390 randconfig-r033-20230707 gcc s390 randconfig-r044-20230707 clang sh allmodconfig gcc sh defconfig gcc sh randconfig-r006-20230707 gcc sh randconfig-r026-20230707 gcc sh randconfig-r031-20230707 gcc sparc allyesconfig gcc sparc defconfig gcc sparc randconfig-r001-20230707 gcc sparc sparc32_defconfig gcc sparc64 randconfig-r025-20230707 gcc um allmodconfig clang um allnoconfig clang um allyesconfig clang um defconfig gcc um i386_defconfig gcc um randconfig-r032-20230707 clang um x86_64_defconfig gcc x86_64 allyesconfig gcc x86_64 buildonly-randconfig-r001-20230707 gcc x86_64 buildonly-randconfig-r002-20230707 gcc x86_64 buildonly-randconfig-r003-20230707 gcc x86_64 defconfig gcc x86_64 kexec gcc x86_64 randconfig-x001-20230707 clang x86_64 randconfig-x002-20230707 clang x86_64 randconfig-x003-20230707 clang x86_64 randconfig-x004-20230707 clang x86_64 randconfig-x005-20230707 clang x86_64 randconfig-x006-20230707 clang x86_64 randconfig-x011-20230707 gcc x86_64 randconfig-x012-20230707 gcc x86_64 randconfig-x013-20230707 gcc x86_64 randconfig-x014-20230707 gcc x86_64 randconfig-x015-20230707 gcc x86_64 randconfig-x016-20230707 gcc x86_64 rhel-8.3-rust clang x86_64 rhel-8.3 gcc xtensa alldefconfig gcc xtensa randconfig-r016-20230707 gcc xtensa smp_lx200_defconfig gcc -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki

2 years, 6 months

1
0
0 0

[PATCH] tools: testing: Corrected a clerical error

by Lu Hongfei

When wrapping code, use ';' better than using ',' which is more in line with the coding habits of most engineers. Signed-off-by: Lu Hongfei <luhongfei(a)vivo.com> --- tools/testing/selftests/bpf/benchs/bench_ringbufs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c index 3ca14ad36607..e1ee979e6acc 100644 --- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c +++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c @@ -399,7 +399,7 @@ static void perfbuf_libbpf_setup(void) ctx->skel = perfbuf_setup_skeleton(); memset(&attr, 0, sizeof(attr)); - attr.config = PERF_COUNT_SW_BPF_OUTPUT, + attr.config = PERF_COUNT_SW_BPF_OUTPUT; attr.type = PERF_TYPE_SOFTWARE; attr.sample_type = PERF_SAMPLE_RAW; /* notify only every Nth sample */ -- 2.39.0

2 years, 6 months

2
1
0 0

[RFC][PATCH 00/10] KEYS: Introduce user asymmetric keys and signatures

by Roberto Sassu

From: Roberto Sassu <roberto.sassu(a)huawei.com> Define a new TLV-based format for keys and signatures, aiming to store and use in the kernel the crypto material from other unsupported formats (e.g. PGP). TLV fields have been defined to fill the corresponding kernel structures public_key, public_key_signature and key_preparsed_payload. Keys: struct public_key { struct key_preparsed_payload { KEY_PUB --> void *key; u32 keylen; --> prep->payload.data[asym_crypto] KEY_ALGO --> const char *pkey_algo; KEY_KID0 KEY_KID1 --> prep->payload.data[asym_key_ids] KEY_KID2 KEY_DESC --> prep->description Signatures: struct public_key_signature { SIG_S --> u8 *s; u32 s_size; SIG_KEY_ALGO --> const char *pkey_algo; SIG_HASH_ALGO --> const char *hash_algo; u32 digest_size; SIG_ENC --> const char *encoding; SIG_KID0 SIG_KID1 --> struct asymmetric_key_id *auth_ids[3]; SIG_KID2 For keys, since the format conversion has to be done in user space, user space is assumed to be trusted, in this proposal. Without this assumption, a malicious conversion tool could make a user load to the kernel a different key than the one expected. That should not be a particular problem for keys that are embedded in the kernel image and loaded at boot, since the conversion happens in a trusted environment such as the building infrastructure of the Linux distribution vendor. In the other cases, such as enrolling a key through the Machine Owner Key (MOK) mechanism, the user is responsible to ensure that the crypto material carried in the original format remains the same after the conversion. For signatures, assuming the strength of the crypto algorithms, altering the crypto material is simply a Denial-of-Service (DoS), as data can be validated only with the right signature. This patch set also offers the following contributions: - An API similar to the PKCS#7 one, to verify the authenticity of system data through user asymmetric keys and signatures - A mechanism to store a keyring blob in the kernel image and to extract and load the keys at system boot - eBPF binding, so that data authenticity verification with user asymmetric keys and signatures can be carried out also with eBPF programs - A new command for gnupg (in user space), to convert keys and signatures from PGP to the new kernel format The primary use case for this patch set is to verify the authenticity of RPM package headers with the PGP keys of the Linux distribution. Once their authenticity is verified, file digests can be extracted from those RPM headers and used as reference values for IMA Appraisal. Compared to the previous patch set, the main difference is not relying on User Mode Drivers (UMDs) for the conversion from the original format to the kernel format, due to the concern that full isolation of the UMD process cannot be achieved against a fully privileged system user (root). The discussion is still ongoing here: https://lore.kernel.org/linux-integrity/eb31920bd00e2c921b0aa6ebed8745cb013… This however does not prevent the goal mentioned above of verifying the authenticity of RPM headers to be achieved. The fact that Linux distribution vendors do the conversion in their infrastructure is a good enough guarantee. A very quick way to test the patch set is to execute: # gpg --conv-kernel /etc/pki/rpm-gpg/RPM-GPG-KEY-fedora-rawhide-primary | keyctl padd asymmetric "" @u # keyctl show @u Keyring 762357580 --alswrv 0 65534 keyring: _uid.0 567216072 --als--v 0 0 \_ asymmetric: PGP: 18b8e74c Patches 1-2 preliminarly export some definitions to user space so that conversion tools can specify the right public key algorithms and signature encodings (digest algorithms are already exported). Patches 3-5 introduce the user asymmetric keys and signatures. Patches 6 introduces a system API for verifying the authenticity of system data through user asymmetric keys and signatures. Patch 7-8 introduce a mechanism to store a keyring blob with user asymmetric keys in the kernel image, and load them at system boot. Patches 9-10 introduce the eBPF binding and corresponding test (which can be enabled only after the gnupg patches are upstreamed). Patches 1-2 [GNUPG] introduce the new gpg command --conv-kernel to convert PGP keys and signatures to the new kernel format. Changelog v1: - Remove useless check in validate_key() (suggested by Yonghong) - Don't rely on User Mode Drivers for the conversion from the original format to the kernel format - Use the more extensible TLV format, instead of a fixed structure Roberto Sassu (10): crypto: Export public key algorithm information crypto: Export signature encoding information KEYS: asymmetric: Introduce a parser for user asymmetric keys and sigs KEYS: asymmetric: Introduce the user asymmetric key parser KEYS: asymmetric: Introduce the user asymmetric key signature parser verification: Add verify_uasym_signature() and verify_uasym_sig_message() KEYS: asymmetric: Preload user asymmetric keys from a keyring blob KEYS: Introduce load_uasym_keyring() bpf: Introduce bpf_verify_uasym_signature() kfunc selftests/bpf: Prepare a test for user asymmetric key signatures MAINTAINERS | 1 + certs/Kconfig | 11 + certs/Makefile | 7 + certs/system_certificates.S | 18 + certs/system_keyring.c | 166 +++++- crypto/Kconfig | 6 + crypto/Makefile | 2 + crypto/asymmetric_keys/Kconfig | 14 + crypto/asymmetric_keys/Makefile | 10 + crypto/asymmetric_keys/asymmetric_type.c | 3 +- crypto/asymmetric_keys/uasym_key_parser.c | 229 ++++++++ crypto/asymmetric_keys/uasym_key_preload.c | 99 ++++ crypto/asymmetric_keys/uasym_parser.c | 201 +++++++ crypto/asymmetric_keys/uasym_parser.h | 43 ++ crypto/asymmetric_keys/uasym_sig_parser.c | 491 ++++++++++++++++++ crypto/pub_key_info.c | 20 + crypto/sig_enc_info.c | 16 + include/crypto/pub_key_info.h | 15 + include/crypto/sig_enc_info.h | 15 + include/crypto/uasym_keys_sigs.h | 82 +++ include/keys/asymmetric-type.h | 1 + include/linux/verification.h | 50 ++ include/uapi/linux/pub_key_info.h | 22 + include/uapi/linux/sig_enc_info.h | 18 + include/uapi/linux/uasym_parser.h | 107 ++++ kernel/trace/bpf_trace.c | 68 ++- ...y_pkcs7_sig.c => verify_pkcs7_uasym_sig.c} | 159 +++++- ...s7_sig.c => test_verify_pkcs7_uasym_sig.c} | 18 +- .../testing/selftests/bpf/verify_sig_setup.sh | 82 ++- 29 files changed, 1924 insertions(+), 50 deletions(-) create mode 100644 crypto/asymmetric_keys/uasym_key_parser.c create mode 100644 crypto/asymmetric_keys/uasym_key_preload.c create mode 100644 crypto/asymmetric_keys/uasym_parser.c create mode 100644 crypto/asymmetric_keys/uasym_parser.h create mode 100644 crypto/asymmetric_keys/uasym_sig_parser.c create mode 100644 crypto/pub_key_info.c create mode 100644 crypto/sig_enc_info.c create mode 100644 include/crypto/pub_key_info.h create mode 100644 include/crypto/sig_enc_info.h create mode 100644 include/crypto/uasym_keys_sigs.h create mode 100644 include/uapi/linux/pub_key_info.h create mode 100644 include/uapi/linux/sig_enc_info.h create mode 100644 include/uapi/linux/uasym_parser.h rename tools/testing/selftests/bpf/prog_tests/{verify_pkcs7_sig.c => verify_pkcs7_uasym_sig.c} (69%) rename tools/testing/selftests/bpf/progs/{test_verify_pkcs7_sig.c => test_verify_pkcs7_uasym_sig.c} (82%) -- 2.34.1

2 years, 6 months

2
14
0 0

[RESEND PATCH v3 0/2] RISC-V: mm: Make SV48 the default address space

by Charlie Jenkins

Make sv48 the default address space for mmap as some applications currently depend on this assumption. Also enable users to select desired address space using a non-zero hint address to mmap. Previous kernel changes caused Java and other applications to be broken on sv57 which this patch fixes. Documentation is also added to the RISC-V virtual memory section to explain these changes. Charlie Jenkins (2): RISC-V: mm: Restrict address space for sv39,sv48,sv57 RISC-V: mm: Update documentation and include test Documentation/riscv/vm-layout.rst | 22 +++++++++ arch/riscv/include/asm/elf.h | 2 +- arch/riscv/include/asm/pgtable.h | 21 ++++++-- arch/riscv/include/asm/processor.h | 34 ++++++++++--- tools/testing/selftests/riscv/Makefile | 2 +- tools/testing/selftests/riscv/mm/.gitignore | 1 + tools/testing/selftests/riscv/mm/Makefile | 21 ++++++++ .../selftests/riscv/mm/testcases/mmap.c | 49 +++++++++++++++++++ 8 files changed, 139 insertions(+), 13 deletions(-) create mode 100644 tools/testing/selftests/riscv/mm/.gitignore create mode 100644 tools/testing/selftests/riscv/mm/Makefile create mode 100644 tools/testing/selftests/riscv/mm/testcases/mmap.c -- 2.41.0

2 years, 6 months

5
10
0 0

[PATCH bpf-next v2 0/6] Support defragmenting IPv(4|6) packets in BPF

by Daniel Xu

=== Context === In the context of a middlebox, fragmented packets are tricky to handle. The full 5-tuple of a packet is often only available in the first fragment which makes enforcing consistent policy difficult. There are really only two stateless options, neither of which are very nice: 1. Enforce policy on first fragment and accept all subsequent fragments. This works but may let in certain attacks or allow data exfiltration. 2. Enforce policy on first fragment and drop all subsequent fragments. This does not really work b/c some protocols may rely on fragmentation. For example, DNS may rely on oversized UDP packets for large responses. So stateful tracking is the only sane option. RFC 8900 [0] calls this out as well in section 6.3: Middleboxes [...] should process IP fragments in a manner that is consistent with [RFC0791] and [RFC8200]. In many cases, middleboxes must maintain state in order to achieve this goal. === BPF related bits === Policy has traditionally been enforced from XDP/TC hooks. Both hooks run before kernel reassembly facilities. However, with the new BPF_PROG_TYPE_NETFILTER, we can rather easily hook into existing netfilter reassembly infra. The basic idea is we bump a refcnt on the netfilter defrag module and then run the bpf prog after the defrag module runs. This allows bpf progs to transparently see full, reassembled packets. The nice thing about this is that progs don't have to carry around logic to detect fragments. === Changelog === Changes from v1: * Drop bpf_program__attach_netfilter() patches * static -> static const where appropriate * Fix callback assignment order during registration * Only request_module() if callbacks are missing * Fix retval when modprobe fails in userspace * Fix v6 defrag module name (nf_defrag_ipv6_hooks -> nf_defrag_ipv6) * Simplify priority checking code * Add warning if module doesn't assign callbacks in the future * Take refcnt on module while defrag link is active [0]: https://datatracker.ietf.org/doc/html/rfc8900 Daniel Xu (6): netfilter: defrag: Add glue hooks for enabling/disabling defrag netfilter: bpf: Support BPF_F_NETFILTER_IP_DEFRAG in netfilter link netfilter: bpf: Prevent defrag module unload while link active bpf: selftests: Support not connecting client socket bpf: selftests: Support custom type and proto for client sockets bpf: selftests: Add defrag selftests include/linux/netfilter.h | 15 + include/uapi/linux/bpf.h | 5 + net/ipv4/netfilter/nf_defrag_ipv4.c | 17 +- net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 11 + net/netfilter/core.c | 6 + net/netfilter/nf_bpf_link.c | 149 ++++++++- tools/include/uapi/linux/bpf.h | 5 + tools/testing/selftests/bpf/Makefile | 4 +- .../selftests/bpf/generate_udp_fragments.py | 90 ++++++ .../selftests/bpf/ip_check_defrag_frags.h | 57 ++++ tools/testing/selftests/bpf/network_helpers.c | 26 +- tools/testing/selftests/bpf/network_helpers.h | 3 + .../bpf/prog_tests/ip_check_defrag.c | 282 ++++++++++++++++++ .../selftests/bpf/progs/ip_check_defrag.c | 104 +++++++ 14 files changed, 752 insertions(+), 22 deletions(-) create mode 100755 tools/testing/selftests/bpf/generate_udp_fragments.py create mode 100644 tools/testing/selftests/bpf/ip_check_defrag_frags.h create mode 100644 tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c create mode 100644 tools/testing/selftests/bpf/progs/ip_check_defrag.c -- 2.41.0

2 years, 6 months

2
4
0 0

[PATCH bpf-next] selftests/bpf: Bump and validate MAX_SYMS

by Björn Töpel

From: Björn Töpel <bjorn(a)rivosinc.com> BPF tests that load /proc/kallsyms, e.g. bpf_cookie, will perform a buffer overrun if the number of syms on the system is larger than MAX_SYMS. Bump the MAX_SYMS to 400000, and add a runtime check that bails out if the maximum is reached. Signed-off-by: Björn Töpel <bjorn(a)rivosinc.com> --- tools/testing/selftests/bpf/trace_helpers.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 9b070cdf44ac..f83d9f65c65b 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -18,7 +18,7 @@ #define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe" #define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe" -#define MAX_SYMS 300000 +#define MAX_SYMS 400000 static struct ksym syms[MAX_SYMS]; static int sym_cnt; @@ -46,6 +46,9 @@ int load_kallsyms_refresh(void) break; if (!addr) continue; + if (i >= MAX_SYMS) + return -EFBIG; + syms[i].addr = (long) addr; syms[i].name = strdup(func); i++; base-commit: fd283ab196a867f8f65f36913e0fadd031fcb823 -- 2.39.2

2 years, 6 months

3
2
0 0

[linux-next:master] BUILD REGRESSION c36ac601a98fb148147640bae219108ee81566f8

by kernel test robot

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master branch HEAD: c36ac601a98fb148147640bae219108ee81566f8 Add linux-next specific files for 20230706 Error/Warning reports: https://lore.kernel.org/oe-kbuild-all/202306122223.HHER4zOo-lkp@intel.com https://lore.kernel.org/oe-kbuild-all/202307050034.tAJSN9qy-lkp@intel.com Error/Warning: (recently discovered and may have been fixed) arch/parisc/kernel/pdt.c:67:6: warning: no previous prototype for 'arch_report_meminfo' [-Wmissing-prototypes] arch/riscv/kernel/crash_core.c:14:64: error: 'VMEMMAP_START' undeclared (first use in this function) arch/riscv/kernel/crash_core.c:15:62: error: 'VMEMMAP_END' undeclared (first use in this function); did you mean 'MEMREMAP_ENC'? arch/riscv/kernel/crash_core.c:8:27: error: 'VA_BITS' undeclared (first use in this function) lib/kunit/executor_test.c:138:4: warning: cast from 'void (*)(const void *)' to 'kunit_action_t *' (aka 'void (*)(void *)') converts to incompatible function type [-Wcast-function-type-strict] lib/kunit/test.c:775:38: warning: cast from 'void (*)(const void *)' to 'kunit_action_t *' (aka 'void (*)(void *)') converts to incompatible function type [-Wcast-function-type-strict] Unverified Error/Warning (likely false positive, please contact us if interested): drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c:98 mlx5_devcom_register_device() error: uninitialized symbol 'tmp_dev'. kernel/trace/trace_functions_graph.c:1012 print_graph_return() warn: bitwise AND condition is false here kernel/trace/trace_functions_graph.c:726 print_graph_entry_leaf() warn: bitwise AND condition is false here {standard input}: Error: local label `"2" (instance number 9 of a fb label)' is not defined Error/Warning ids grouped by kconfigs: gcc_recent_errors |-- i386-randconfig-m021-20230705 | |-- kernel-trace-trace_functions_graph.c-print_graph_entry_leaf()-warn:bitwise-AND-condition-is-false-here | `-- kernel-trace-trace_functions_graph.c-print_graph_return()-warn:bitwise-AND-condition-is-false-here |-- parisc-randconfig-r003-20230706 | `-- arch-parisc-kernel-pdt.c:warning:no-previous-prototype-for-arch_report_meminfo |-- parisc-randconfig-r081-20230703 | `-- arch-parisc-kernel-pdt.c:warning:no-previous-prototype-for-arch_report_meminfo |-- riscv-randconfig-r042-20230706 | |-- arch-riscv-kernel-crash_core.c:error:VA_BITS-undeclared-(first-use-in-this-function) | |-- arch-riscv-kernel-crash_core.c:error:VMEMMAP_END-undeclared-(first-use-in-this-function) | `-- arch-riscv-kernel-crash_core.c:error:VMEMMAP_START-undeclared-(first-use-in-this-function) |-- s390-randconfig-m041-20230705 | `-- drivers-net-ethernet-mellanox-mlx5-core-lib-devcom.c-mlx5_devcom_register_device()-error:uninitialized-symbol-tmp_dev-. `-- sh-allmodconfig `-- standard-input:Error:local-label-(instance-number-of-a-fb-label)-is-not-defined clang_recent_errors |-- arm64-randconfig-r004-20230706 | `-- lib-kunit-test.c:warning:cast-from-void-(-)(const-void-)-to-kunit_action_t-(aka-void-(-)(void-)-)-converts-to-incompatible-function-type |-- hexagon-randconfig-r041-20230706 | |-- lib-kunit-executor_test.c:warning:cast-from-void-(-)(const-void-)-to-kunit_action_t-(aka-void-(-)(void-)-)-converts-to-incompatible-function-type | `-- lib-kunit-test.c:warning:cast-from-void-(-)(const-void-)-to-kunit_action_t-(aka-void-(-)(void-)-)-converts-to-incompatible-function-type `-- powerpc-allyesconfig `-- clang:error:unsupported-option-fsanitize-thread-for-target-powerpc-unknown-linux-gnu elapsed time: 735m configs tested: 144 configs skipped: 8 tested configs: alpha allyesconfig gcc alpha defconfig gcc alpha randconfig-r006-20230706 gcc arc allyesconfig gcc arc defconfig gcc arc randconfig-r043-20230706 gcc arm allmodconfig gcc arm allyesconfig gcc arm defconfig gcc arm gemini_defconfig gcc arm imx_v4_v5_defconfig clang arm jornada720_defconfig gcc arm milbeaut_m10v_defconfig clang arm mps2_defconfig gcc arm mv78xx0_defconfig clang arm pxa910_defconfig gcc arm randconfig-r046-20230706 clang arm s5pv210_defconfig clang arm spear3xx_defconfig clang arm64 allyesconfig gcc arm64 defconfig gcc arm64 randconfig-r004-20230706 clang arm64 randconfig-r024-20230706 gcc csky defconfig gcc hexagon alldefconfig clang hexagon randconfig-r041-20230706 clang hexagon randconfig-r045-20230706 clang i386 allyesconfig gcc i386 buildonly-randconfig-r004-20230706 clang i386 buildonly-randconfig-r005-20230706 clang i386 buildonly-randconfig-r006-20230706 clang i386 debian-10.3 gcc i386 defconfig gcc i386 randconfig-i001-20230706 clang i386 randconfig-i002-20230706 clang i386 randconfig-i003-20230706 clang i386 randconfig-i004-20230706 clang i386 randconfig-i005-20230706 clang i386 randconfig-i006-20230706 clang i386 randconfig-i011-20230706 gcc i386 randconfig-i012-20230706 gcc i386 randconfig-i013-20230706 gcc i386 randconfig-i014-20230706 gcc i386 randconfig-i015-20230706 gcc i386 randconfig-i016-20230706 gcc i386 randconfig-r035-20230706 clang loongarch allmodconfig gcc loongarch allnoconfig gcc loongarch defconfig gcc loongarch randconfig-r001-20230706 gcc loongarch randconfig-r025-20230706 gcc loongarch randconfig-r031-20230706 gcc m68k allmodconfig gcc m68k allyesconfig gcc m68k defconfig gcc m68k sun3_defconfig gcc m68k sun3x_defconfig gcc microblaze randconfig-r005-20230706 gcc mips allmodconfig gcc mips allyesconfig gcc mips ci20_defconfig gcc mips db1xxx_defconfig gcc mips rs90_defconfig clang nios2 defconfig gcc openrisc or1klitex_defconfig gcc openrisc randconfig-r015-20230706 gcc parisc allyesconfig gcc parisc defconfig gcc parisc randconfig-r003-20230706 gcc parisc randconfig-r005-20230706 gcc parisc randconfig-r032-20230705 gcc parisc randconfig-r036-20230706 gcc parisc64 defconfig gcc powerpc allmodconfig gcc powerpc allnoconfig gcc powerpc g5_defconfig clang powerpc mpc5200_defconfig clang powerpc mpc834x_itx_defconfig gcc powerpc pcm030_defconfig gcc powerpc randconfig-r013-20230706 gcc powerpc randconfig-r036-20230705 gcc powerpc skiroot_defconfig clang powerpc walnut_defconfig clang powerpc xes_mpc85xx_defconfig clang riscv allmodconfig gcc riscv allnoconfig gcc riscv allyesconfig gcc riscv defconfig gcc riscv randconfig-r003-20230706 clang riscv randconfig-r021-20230706 gcc riscv randconfig-r023-20230706 gcc riscv randconfig-r042-20230706 gcc riscv rv32_defconfig gcc s390 allmodconfig gcc s390 allyesconfig gcc s390 defconfig gcc s390 randconfig-r031-20230705 gcc s390 randconfig-r044-20230706 gcc sh allmodconfig gcc sh ecovec24_defconfig gcc sh rsk7264_defconfig gcc sh titan_defconfig gcc sparc allyesconfig gcc sparc defconfig gcc sparc randconfig-r016-20230706 gcc sparc sparc64_defconfig gcc sparc64 randconfig-r002-20230706 gcc sparc64 randconfig-r035-20230705 gcc um allmodconfig clang um allnoconfig clang um allyesconfig clang um defconfig gcc um i386_defconfig gcc um randconfig-r011-20230706 clang um randconfig-r034-20230706 gcc um x86_64_defconfig gcc x86_64 allyesconfig gcc x86_64 buildonly-randconfig-r001-20230706 clang x86_64 buildonly-randconfig-r002-20230706 clang x86_64 buildonly-randconfig-r003-20230706 clang x86_64 defconfig gcc x86_64 kexec gcc x86_64 randconfig-r026-20230706 gcc x86_64 randconfig-r033-20230706 clang x86_64 randconfig-x001-20230706 gcc x86_64 randconfig-x002-20230706 gcc x86_64 randconfig-x003-20230706 gcc x86_64 randconfig-x004-20230706 gcc x86_64 randconfig-x005-20230706 gcc x86_64 randconfig-x006-20230706 gcc x86_64 randconfig-x011-20230706 clang x86_64 randconfig-x012-20230706 clang x86_64 randconfig-x013-20230706 clang x86_64 randconfig-x014-20230706 clang x86_64 randconfig-x015-20230706 clang x86_64 randconfig-x016-20230706 clang x86_64 rhel-8.3-rust clang x86_64 rhel-8.3 gcc xtensa audio_kc705_defconfig gcc xtensa cadence_csp_defconfig gcc xtensa randconfig-r002-20230706 gcc xtensa randconfig-r004-20230706 gcc xtensa randconfig-r022-20230706 gcc xtensa randconfig-r034-20230705 gcc -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki

2 years, 6 months

1
0
0 0

[PATCH v23 0/5] Implement IOCTL to get and optionally clear info about PTEs

by Muhammad Usama Anjum

*Changes in v23*: - Set vec_buf_index in loop only when vec_buf_index is set - Return -EFAULT instead of -EINVAL if vec is NULL - Correctly return the walk ending address to the page granularity *Changes in v22*: - Interface change: - Replace [start start + len) with [start, end) - Return the ending address of the address walk in start *Changes in v21*: - Abort walk instead of returning error if WP is to be performed on partial hugetlb *Changes in v20* - Correct PAGE_IS_FILE and add PAGE_IS_PFNZERO *Changes in v19* - Minor changes and interface updates *Changes in v18* - Rebase on top of next-20230613 - Minor updates *Changes in v17* - Rebase on top of next-20230606 - Minor improvements in PAGEMAP_SCAN IOCTL patch *Changes in v16* - Fix a corner case - Add exclusive PM_SCAN_OP_WP back *Changes in v15* - Build fix (Add missed build fix in RESEND) *Changes in v14* - Fix build error caused by #ifdef added at last minute in some configs *Changes in v13* - Rebase on top of next-20230414 - Give-up on using uffd_wp_range() and write new helpers, flush tlb only once *Changes in v12* - Update and other memory types to UFFD_FEATURE_WP_ASYNC - Rebaase on top of next-20230406 - Review updates *Changes in v11* - Rebase on top of next-20230307 - Base patches on UFFD_FEATURE_WP_UNPOPULATED - Do a lot of cosmetic changes and review updates - Remove ENGAGE_WP + !GET operation as it can be performed with UFFDIO_WRITEPROTECT *Changes in v10* - Add specific condition to return error if hugetlb is used with wp async - Move changes in tools/include/uapi/linux/fs.h to separate patch - Add documentation *Changes in v9:* - Correct fault resolution for userfaultfd wp async - Fix build warnings and errors which were happening on some configs - Simplify pagemap ioctl's code *Changes in v8:* - Update uffd async wp implementation - Improve PAGEMAP_IOCTL implementation *Changes in v7:* - Add uffd wp async - Update the IOCTL to use uffd under the hood instead of soft-dirty flags *Motivation* The real motivation for adding PAGEMAP_SCAN IOCTL is to emulate Windows GetWriteWatch() syscall [1]. The GetWriteWatch{} retrieves the addresses of the pages that are written to in a region of virtual memory. This syscall is used in Windows applications and games etc. This syscall is being emulated in pretty slow manner in userspace. Our purpose is to enhance the kernel such that we translate it efficiently in a better way. Currently some out of tree hack patches are being used to efficiently emulate it in some kernels. We intend to replace those with these patches. So the whole gaming on Linux can effectively get benefit from this. It means there would be tons of users of this code. CRIU use case [2] was mentioned by Andrei and Danylo: > Use cases for migrating sparse VMAs are binaries sanitized with ASAN, > MSAN or TSAN [3]. All of these sanitizers produce sparse mappings of > shadow memory [4]. Being able to migrate such binaries allows to highly > reduce the amount of work needed to identify and fix post-migration > crashes, which happen constantly. Andrei's defines the following uses of this code: * it is more granular and allows us to track changed pages more effectively. The current interface can clear dirty bits for the entire process only. In addition, reading info about pages is a separate operation. It means we must freeze the process to read information about all its pages, reset dirty bits, only then we can start dumping pages. The information about pages becomes more and more outdated, while we are processing pages. The new interface solves both these downsides. First, it allows us to read pte bits and clear the soft-dirty bit atomically. It means that CRIU will not need to freeze processes to pre-dump their memory. Second, it clears soft-dirty bits for a specified region of memory. It means CRIU will have actual info about pages to the moment of dumping them. * The new interface has to be much faster because basic page filtering is happening in the kernel. With the old interface, we have to read pagemap for each page. *Implementation Evolution (Short Summary)* From the definition of GetWriteWatch(), we feel like kernel's soft-dirty feature can be used under the hood with some additions like: * reset soft-dirty flag for only a specific region of memory instead of clearing the flag for the entire process * get and clear soft-dirty flag for a specific region atomically So we decided to use ioctl on pagemap file to read or/and reset soft-dirty flag. But using soft-dirty flag, sometimes we get extra pages which weren't even written. They had become soft-dirty because of VMA merging and VM_SOFTDIRTY flag. This breaks the definition of GetWriteWatch(). We were able to by-pass this short coming by ignoring VM_SOFTDIRTY until David reported that mprotect etc messes up the soft-dirty flag while ignoring VM_SOFTDIRTY [5]. This wasn't happening until [6] got introduced. We discussed if we can revert these patches. But we could not reach to any conclusion. So at this point, I made couple of tries to solve this whole VM_SOFTDIRTY issue by correcting the soft-dirty implementation: * [7] Correct the bug fixed wrongly back in 2014. It had potential to cause regression. We left it behind. * [8] Keep a list of soft-dirty part of a VMA across splits and merges. I got the reply don't increase the size of the VMA by 8 bytes. At this point, we left soft-dirty considering it is too much delicate and userfaultfd [9] seemed like the only way forward. From there onward, we have been basing soft-dirty emulation on userfaultfd wp feature where kernel resolves the faults itself when WP_ASYNC feature is used. It was straight forward to add WP_ASYNC feature in userfautlfd. Now we get only those pages dirty or written-to which are really written in reality. (PS There is another WP_UNPOPULATED userfautfd feature is required which is needed to avoid pre-faulting memory before write-protecting [9].) All the different masks were added on the request of CRIU devs to create interface more generic and better. [1] https://learn.microsoft.com/en-us/windows/win32/api/memoryapi/nf-memoryapi-… [2] https://lore.kernel.org/all/20221014134802.1361436-1-mdanylo@google.com [3] https://github.com/google/sanitizers [4] https://github.com/google/sanitizers/wiki/AddressSanitizerAlgorithm#64-bit [5] https://lore.kernel.org/all/bfcae708-db21-04b4-0bbe-712badd03071@redhat.com [6] https://lore.kernel.org/all/20220725142048.30450-1-peterx@redhat.com/ [7] https://lore.kernel.org/all/20221122115007.2787017-1-usama.anjum@collabora.… [8] https://lore.kernel.org/all/20221220162606.1595355-1-usama.anjum@collabora.… [9] https://lore.kernel.org/all/20230306213925.617814-1-peterx@redhat.com [10] https://lore.kernel.org/all/20230125144529.1630917-1-mdanylo@google.com * Original Cover letter from v8* Hello, Note: Soft-dirty pages and pages which have been written-to are synonyms. As kernel already has soft-dirty feature inside which we have given up to use, we are using written-to terminology while using UFFD async WP under the hood. This IOCTL, PAGEMAP_SCAN on pagemap file can be used to get and/or clear the info about page table entries. The following operations are supported in this ioctl: - Get the information if the pages have been written-to (PAGE_IS_WRITTEN), file mapped (PAGE_IS_FILE), present (PAGE_IS_PRESENT) or swapped (PAGE_IS_SWAPPED). - Write-protect the pages (PAGEMAP_WP_ENGAGE) to start finding which pages have been written-to. - Find pages which have been written-to and write protect the pages (atomic PAGE_IS_WRITTEN + PAGEMAP_WP_ENGAGE) It is possible to find and clear soft-dirty pages entirely in userspace. But it isn't efficient: - The mprotect and SIGSEGV handler for bookkeeping - The userfaultfd wp (synchronous) with the handler for bookkeeping Some benchmarks can be seen here[1]. This series adds features that weren't present earlier: - There is no atomic get soft-dirty/Written-to status and clear present in the kernel. - The pages which have been written-to can not be found in accurate way. (Kernel's soft-dirty PTE bit + sof_dirty VMA bit shows more soft-dirty pages than there actually are.) Historically, soft-dirty PTE bit tracking has been used in the CRIU project. The procfs interface is enough for finding the soft-dirty bit status and clearing the soft-dirty bit of all the pages of a process. We have the use case where we need to track the soft-dirty PTE bit for only specific pages on-demand. We need this tracking and clear mechanism of a region of memory while the process is running to emulate the getWriteWatch() syscall of Windows. *(Moved to using UFFD instead of soft-dirtyi feature to find pages which have been written-to from v7 patch series)*: Stop using the soft-dirty flags for finding which pages have been written to. It is too delicate and wrong as it shows more soft-dirty pages than the actual soft-dirty pages. There is no interest in correcting it [2][3] as this is how the feature was written years ago. It shouldn't be updated to changed behaviour. Peter Xu has suggested using the async version of the UFFD WP [4] as it is based inherently on the PTEs. So in this patch series, I've added a new mode to the UFFD which is asynchronous version of the write protect. When this variant of the UFFD WP is used, the page faults are resolved automatically by the kernel. The pages which have been written-to can be found by reading pagemap file (!PM_UFFD_WP). This feature can be used successfully to find which pages have been written to from the time the pages were write protected. This works just like the soft-dirty flag without showing any extra pages which aren't soft-dirty in reality. The information related to pages if the page is file mapped, present and swapped is required for the CRIU project [5][6]. The addition of the required mask, any mask, excluded mask and return masks are also required for the CRIU project [5]. The IOCTL returns the addresses of the pages which match the specific masks. The page addresses are returned in struct page_region in a compact form. The max_pages is needed to support a use case where user only wants to get a specific number of pages. So there is no need to find all the pages of interest in the range when max_pages is specified. The IOCTL returns when the maximum number of the pages are found. The max_pages is optional. If max_pages is specified, it must be equal or greater than the vec_size. This restriction is needed to handle worse case when one page_region only contains info of one page and it cannot be compacted. This is needed to emulate the Windows getWriteWatch() syscall. The patch series include the detailed selftest which can be used as an example for the uffd async wp test and PAGEMAP_IOCTL. It shows the interface usages as well. [1] https://lore.kernel.org/lkml/54d4c322-cd6e-eefd-b161-2af2b56aae24@collabora… [2] https://lore.kernel.org/all/20221220162606.1595355-1-usama.anjum@collabora.… [3] https://lore.kernel.org/all/20221122115007.2787017-1-usama.anjum@collabora.… [4] https://lore.kernel.org/all/Y6Hc2d+7eTKs7AiH@x1n [5] https://lore.kernel.org/all/YyiDg79flhWoMDZB@gmail.com/ [6] https://lore.kernel.org/all/20221014134802.1361436-1-mdanylo@google.com/ Regards, Muhammad Usama Anjum Muhammad Usama Anjum (4): fs/proc/task_mmu: Implement IOCTL to get and optionally clear info about PTEs tools headers UAPI: Update linux/fs.h with the kernel sources mm/pagemap: add documentation of PAGEMAP_SCAN IOCTL selftests: mm: add pagemap ioctl tests Peter Xu (1): userfaultfd: UFFD_FEATURE_WP_ASYNC Documentation/admin-guide/mm/pagemap.rst | 58 + Documentation/admin-guide/mm/userfaultfd.rst | 35 + fs/proc/task_mmu.c | 577 +++++++ fs/userfaultfd.c | 26 +- include/linux/hugetlb.h | 1 + include/linux/userfaultfd_k.h | 21 +- include/uapi/linux/fs.h | 55 + include/uapi/linux/userfaultfd.h | 9 +- mm/hugetlb.c | 34 +- mm/memory.c | 27 +- tools/include/uapi/linux/fs.h | 55 + tools/testing/selftests/mm/.gitignore | 2 + tools/testing/selftests/mm/Makefile | 3 +- tools/testing/selftests/mm/config | 1 + tools/testing/selftests/mm/pagemap_ioctl.c | 1464 ++++++++++++++++++ tools/testing/selftests/mm/run_vmtests.sh | 4 + 16 files changed, 2348 insertions(+), 24 deletions(-) create mode 100644 tools/testing/selftests/mm/pagemap_ioctl.c mode change 100644 => 100755 tools/testing/selftests/mm/run_vmtests.sh -- 2.39.2

2 years, 6 months

1
5
0 0

[PATCH v22 0/5] Implement IOCTL to get and optionally clear info about PTEs

by Muhammad Usama Anjum

Changes in v22: - Interface change: - Replace [start start + len) with [start, end) - Return the ending address of the address walk in start Changes in v21: - Abort walk instead of returning error if WP is to be performed on partial hugetlb *Changes in v20* - Correct PAGE_IS_FILE and add PAGE_IS_PFNZERO *Changes in v19* - Minor changes and interface updates *Changes in v18* - Rebase on top of next-20230613 - Minor updates *Changes in v17* - Rebase on top of next-20230606 - Minor improvements in PAGEMAP_SCAN IOCTL patch *Changes in v16* - Fix a corner case - Add exclusive PM_SCAN_OP_WP back *Changes in v15* - Build fix (Add missed build fix in RESEND) *Changes in v14* - Fix build error caused by #ifdef added at last minute in some configs *Changes in v13* - Rebase on top of next-20230414 - Give-up on using uffd_wp_range() and write new helpers, flush tlb only once *Changes in v12* - Update and other memory types to UFFD_FEATURE_WP_ASYNC - Rebaase on top of next-20230406 - Review updates *Changes in v11* - Rebase on top of next-20230307 - Base patches on UFFD_FEATURE_WP_UNPOPULATED - Do a lot of cosmetic changes and review updates - Remove ENGAGE_WP + !GET operation as it can be performed with UFFDIO_WRITEPROTECT *Changes in v10* - Add specific condition to return error if hugetlb is used with wp async - Move changes in tools/include/uapi/linux/fs.h to separate patch - Add documentation *Changes in v9:* - Correct fault resolution for userfaultfd wp async - Fix build warnings and errors which were happening on some configs - Simplify pagemap ioctl's code *Changes in v8:* - Update uffd async wp implementation - Improve PAGEMAP_IOCTL implementation *Changes in v7:* - Add uffd wp async - Update the IOCTL to use uffd under the hood instead of soft-dirty flags *Motivation* The real motivation for adding PAGEMAP_SCAN IOCTL is to emulate Windows GetWriteWatch() syscall [1]. The GetWriteWatch{} retrieves the addresses of the pages that are written to in a region of virtual memory. This syscall is used in Windows applications and games etc. This syscall is being emulated in pretty slow manner in userspace. Our purpose is to enhance the kernel such that we translate it efficiently in a better way. Currently some out of tree hack patches are being used to efficiently emulate it in some kernels. We intend to replace those with these patches. So the whole gaming on Linux can effectively get benefit from this. It means there would be tons of users of this code. CRIU use case [2] was mentioned by Andrei and Danylo: > Use cases for migrating sparse VMAs are binaries sanitized with ASAN, > MSAN or TSAN [3]. All of these sanitizers produce sparse mappings of > shadow memory [4]. Being able to migrate such binaries allows to highly > reduce the amount of work needed to identify and fix post-migration > crashes, which happen constantly. Andrei's defines the following uses of this code: * it is more granular and allows us to track changed pages more effectively. The current interface can clear dirty bits for the entire process only. In addition, reading info about pages is a separate operation. It means we must freeze the process to read information about all its pages, reset dirty bits, only then we can start dumping pages. The information about pages becomes more and more outdated, while we are processing pages. The new interface solves both these downsides. First, it allows us to read pte bits and clear the soft-dirty bit atomically. It means that CRIU will not need to freeze processes to pre-dump their memory. Second, it clears soft-dirty bits for a specified region of memory. It means CRIU will have actual info about pages to the moment of dumping them. * The new interface has to be much faster because basic page filtering is happening in the kernel. With the old interface, we have to read pagemap for each page. *Implementation Evolution (Short Summary)* From the definition of GetWriteWatch(), we feel like kernel's soft-dirty feature can be used under the hood with some additions like: * reset soft-dirty flag for only a specific region of memory instead of clearing the flag for the entire process * get and clear soft-dirty flag for a specific region atomically So we decided to use ioctl on pagemap file to read or/and reset soft-dirty flag. But using soft-dirty flag, sometimes we get extra pages which weren't even written. They had become soft-dirty because of VMA merging and VM_SOFTDIRTY flag. This breaks the definition of GetWriteWatch(). We were able to by-pass this short coming by ignoring VM_SOFTDIRTY until David reported that mprotect etc messes up the soft-dirty flag while ignoring VM_SOFTDIRTY [5]. This wasn't happening until [6] got introduced. We discussed if we can revert these patches. But we could not reach to any conclusion. So at this point, I made couple of tries to solve this whole VM_SOFTDIRTY issue by correcting the soft-dirty implementation: * [7] Correct the bug fixed wrongly back in 2014. It had potential to cause regression. We left it behind. * [8] Keep a list of soft-dirty part of a VMA across splits and merges. I got the reply don't increase the size of the VMA by 8 bytes. At this point, we left soft-dirty considering it is too much delicate and userfaultfd [9] seemed like the only way forward. From there onward, we have been basing soft-dirty emulation on userfaultfd wp feature where kernel resolves the faults itself when WP_ASYNC feature is used. It was straight forward to add WP_ASYNC feature in userfautlfd. Now we get only those pages dirty or written-to which are really written in reality. (PS There is another WP_UNPOPULATED userfautfd feature is required which is needed to avoid pre-faulting memory before write-protecting [9].) All the different masks were added on the request of CRIU devs to create interface more generic and better. [1] https://learn.microsoft.com/en-us/windows/win32/api/memoryapi/nf-memoryapi-… [2] https://lore.kernel.org/all/20221014134802.1361436-1-mdanylo@google.com [3] https://github.com/google/sanitizers [4] https://github.com/google/sanitizers/wiki/AddressSanitizerAlgorithm#64-bit [5] https://lore.kernel.org/all/bfcae708-db21-04b4-0bbe-712badd03071@redhat.com [6] https://lore.kernel.org/all/20220725142048.30450-1-peterx@redhat.com/ [7] https://lore.kernel.org/all/20221122115007.2787017-1-usama.anjum@collabora.… [8] https://lore.kernel.org/all/20221220162606.1595355-1-usama.anjum@collabora.… [9] https://lore.kernel.org/all/20230306213925.617814-1-peterx@redhat.com [10] https://lore.kernel.org/all/20230125144529.1630917-1-mdanylo@google.com * Original Cover letter from v8* Hello, Note: Soft-dirty pages and pages which have been written-to are synonyms. As kernel already has soft-dirty feature inside which we have given up to use, we are using written-to terminology while using UFFD async WP under the hood. This IOCTL, PAGEMAP_SCAN on pagemap file can be used to get and/or clear the info about page table entries. The following operations are supported in this ioctl: - Get the information if the pages have been written-to (PAGE_IS_WRITTEN), file mapped (PAGE_IS_FILE), present (PAGE_IS_PRESENT) or swapped (PAGE_IS_SWAPPED). - Write-protect the pages (PAGEMAP_WP_ENGAGE) to start finding which pages have been written-to. - Find pages which have been written-to and write protect the pages (atomic PAGE_IS_WRITTEN + PAGEMAP_WP_ENGAGE) It is possible to find and clear soft-dirty pages entirely in userspace. But it isn't efficient: - The mprotect and SIGSEGV handler for bookkeeping - The userfaultfd wp (synchronous) with the handler for bookkeeping Some benchmarks can be seen here[1]. This series adds features that weren't present earlier: - There is no atomic get soft-dirty/Written-to status and clear present in the kernel. - The pages which have been written-to can not be found in accurate way. (Kernel's soft-dirty PTE bit + sof_dirty VMA bit shows more soft-dirty pages than there actually are.) Historically, soft-dirty PTE bit tracking has been used in the CRIU project. The procfs interface is enough for finding the soft-dirty bit status and clearing the soft-dirty bit of all the pages of a process. We have the use case where we need to track the soft-dirty PTE bit for only specific pages on-demand. We need this tracking and clear mechanism of a region of memory while the process is running to emulate the getWriteWatch() syscall of Windows. *(Moved to using UFFD instead of soft-dirtyi feature to find pages which have been written-to from v7 patch series)*: Stop using the soft-dirty flags for finding which pages have been written to. It is too delicate and wrong as it shows more soft-dirty pages than the actual soft-dirty pages. There is no interest in correcting it [2][3] as this is how the feature was written years ago. It shouldn't be updated to changed behaviour. Peter Xu has suggested using the async version of the UFFD WP [4] as it is based inherently on the PTEs. So in this patch series, I've added a new mode to the UFFD which is asynchronous version of the write protect. When this variant of the UFFD WP is used, the page faults are resolved automatically by the kernel. The pages which have been written-to can be found by reading pagemap file (!PM_UFFD_WP). This feature can be used successfully to find which pages have been written to from the time the pages were write protected. This works just like the soft-dirty flag without showing any extra pages which aren't soft-dirty in reality. The information related to pages if the page is file mapped, present and swapped is required for the CRIU project [5][6]. The addition of the required mask, any mask, excluded mask and return masks are also required for the CRIU project [5]. The IOCTL returns the addresses of the pages which match the specific masks. The page addresses are returned in struct page_region in a compact form. The max_pages is needed to support a use case where user only wants to get a specific number of pages. So there is no need to find all the pages of interest in the range when max_pages is specified. The IOCTL returns when the maximum number of the pages are found. The max_pages is optional. If max_pages is specified, it must be equal or greater than the vec_size. This restriction is needed to handle worse case when one page_region only contains info of one page and it cannot be compacted. This is needed to emulate the Windows getWriteWatch() syscall. The patch series include the detailed selftest which can be used as an example for the uffd async wp test and PAGEMAP_IOCTL. It shows the interface usages as well. [1] https://lore.kernel.org/lkml/54d4c322-cd6e-eefd-b161-2af2b56aae24@collabora… [2] https://lore.kernel.org/all/20221220162606.1595355-1-usama.anjum@collabora.… [3] https://lore.kernel.org/all/20221122115007.2787017-1-usama.anjum@collabora.… [4] https://lore.kernel.org/all/Y6Hc2d+7eTKs7AiH@x1n [5] https://lore.kernel.org/all/YyiDg79flhWoMDZB@gmail.com/ [6] https://lore.kernel.org/all/20221014134802.1361436-1-mdanylo@google.com/ Regards, Muhammad Usama Anjum Muhammad Usama Anjum (4): fs/proc/task_mmu: Implement IOCTL to get and optionally clear info about PTEs tools headers UAPI: Update linux/fs.h with the kernel sources mm/pagemap: add documentation of PAGEMAP_SCAN IOCTL selftests: mm: add pagemap ioctl tests Peter Xu (1): userfaultfd: UFFD_FEATURE_WP_ASYNC Documentation/admin-guide/mm/pagemap.rst | 58 + Documentation/admin-guide/mm/userfaultfd.rst | 35 + fs/proc/task_mmu.c | 565 +++++++ fs/userfaultfd.c | 26 +- include/linux/hugetlb.h | 1 + include/linux/userfaultfd_k.h | 21 +- include/uapi/linux/fs.h | 55 + include/uapi/linux/userfaultfd.h | 9 +- mm/hugetlb.c | 34 +- mm/memory.c | 27 +- tools/include/uapi/linux/fs.h | 55 + tools/testing/selftests/mm/.gitignore | 2 + tools/testing/selftests/mm/Makefile | 3 +- tools/testing/selftests/mm/config | 1 + tools/testing/selftests/mm/pagemap_ioctl.c | 1464 ++++++++++++++++++ tools/testing/selftests/mm/run_vmtests.sh | 4 + 16 files changed, 2336 insertions(+), 24 deletions(-) create mode 100644 tools/testing/selftests/mm/pagemap_ioctl.c mode change 100644 => 100755 tools/testing/selftests/mm/run_vmtests.sh -- 2.39.2

2 years, 6 months

4
13
0 0

[PATCH v2 1/6] mm: userfaultfd: add new UFFDIO_POISON ioctl

by Axel Rasmussen

The basic idea here is to "simulate" memory poisoning for VMs. A VM running on some host might encounter a memory error, after which some page(s) are poisoned (i.e., future accesses SIGBUS). They expect that once poisoned, pages can never become "un-poisoned". So, when we live migrate the VM, we need to preserve the poisoned status of these pages. When live migrating, we try to get the guest running on its new host as quickly as possible. So, we start it running before all memory has been copied, and before we're certain which pages should be poisoned or not. So the basic way to use this new feature is: - On the new host, the guest's memory is registered with userfaultfd, in either MISSING or MINOR mode (doesn't really matter for this purpose). - On any first access, we get a userfaultfd event. At this point we can communicate with the old host to find out if the page was poisoned. - If so, we can respond with a UFFDIO_POISON - this places a swap marker so any future accesses will SIGBUS. Because the pte is now "present", future accesses won't generate more userfaultfd events, they'll just SIGBUS directly. UFFDIO_POISON does not handle unmapping previously-present PTEs. This isn't needed, because during live migration we want to intercept all accesses with userfaultfd (not just writes, so WP mode isn't useful for this). So whether minor or missing mode is being used (or both), the PTE won't be present in any case, so handling that case isn't needed. Why return VM_FAULT_HWPOISON instead of VM_FAULT_SIGBUS when one of these markers is encountered? For "normal" userspace programs there isn't a big difference, both yield a SIGBUS. The difference for KVM is key though: VM_FAULT_HWPOISON will result in an MCE being injected into the guest (which is the behavior we want). With VM_FAULT_SIGBUS, the hypervisor would need to catch the SIGBUS and deal with the MCE injection itself. Signed-off-by: Axel Rasmussen <axelrasmussen(a)google.com> --- fs/userfaultfd.c | 63 ++++++++++++++++++++++++++++++++ include/linux/swapops.h | 3 +- include/linux/userfaultfd_k.h | 4 ++ include/uapi/linux/userfaultfd.h | 25 +++++++++++-- mm/memory.c | 4 ++ mm/userfaultfd.c | 62 ++++++++++++++++++++++++++++++- 6 files changed, 156 insertions(+), 5 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 7cecd49e078b..c26a883399c9 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1965,6 +1965,66 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg) return ret; } +static inline int userfaultfd_poison(struct userfaultfd_ctx *ctx, unsigned long arg) +{ + __s64 ret; + struct uffdio_poison uffdio_poison; + struct uffdio_poison __user *user_uffdio_poison; + struct userfaultfd_wake_range range; + + user_uffdio_poison = (struct uffdio_poison __user *)arg; + + ret = -EAGAIN; + if (atomic_read(&ctx->mmap_changing)) + goto out; + + ret = -EFAULT; + if (copy_from_user(&uffdio_poison, user_uffdio_poison, + /* don't copy the output fields */ + sizeof(uffdio_poison) - (sizeof(__s64)))) + goto out; + + ret = validate_range(ctx->mm, uffdio_poison.range.start, + uffdio_poison.range.len); + if (ret) + goto out; + + ret = -EINVAL; + /* double check for wraparound just in case. */ + if (uffdio_poison.range.start + uffdio_poison.range.len <= + uffdio_poison.range.start) { + goto out; + } + if (uffdio_poison.mode & ~UFFDIO_POISON_MODE_DONTWAKE) + goto out; + + if (mmget_not_zero(ctx->mm)) { + ret = mfill_atomic_poison(ctx->mm, uffdio_poison.range.start, + uffdio_poison.range.len, + &ctx->mmap_changing, 0); + mmput(ctx->mm); + } else { + return -ESRCH; + } + + if (unlikely(put_user(ret, &user_uffdio_poison->updated))) + return -EFAULT; + if (ret < 0) + goto out; + + /* len == 0 would wake all */ + BUG_ON(!ret); + range.len = ret; + if (!(uffdio_poison.mode & UFFDIO_POISON_MODE_DONTWAKE)) { + range.start = uffdio_poison.range.start; + wake_userfault(ctx, &range); + } + ret = range.len == uffdio_poison.range.len ? 0 : -EAGAIN; + +out: + return ret; +} + static inline unsigned int uffd_ctx_features(__u64 user_features) { /* @@ -2066,6 +2126,9 @@ static long userfaultfd_ioctl(struct file *file, unsigned cmd, case UFFDIO_CONTINUE: ret = userfaultfd_continue(ctx, arg); break; + case UFFDIO_POISON: + ret = userfaultfd_poison(ctx, arg); + break; } return ret; } diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 4c932cb45e0b..8259fee32421 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -394,7 +394,8 @@ typedef unsigned long pte_marker; #define PTE_MARKER_UFFD_WP BIT(0) #define PTE_MARKER_SWAPIN_ERROR BIT(1) -#define PTE_MARKER_MASK (BIT(2) - 1) +#define PTE_MARKER_UFFD_POISON BIT(2) +#define PTE_MARKER_MASK (BIT(3) - 1) static inline swp_entry_t make_pte_marker_entry(pte_marker marker) { diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index ac7b0c96d351..ac8c6854097c 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -46,6 +46,7 @@ enum mfill_atomic_mode { MFILL_ATOMIC_COPY, MFILL_ATOMIC_ZEROPAGE, MFILL_ATOMIC_CONTINUE, + MFILL_ATOMIC_POISON, NR_MFILL_ATOMIC_MODES, }; @@ -83,6 +84,9 @@ extern ssize_t mfill_atomic_zeropage(struct mm_struct *dst_mm, extern ssize_t mfill_atomic_continue(struct mm_struct *dst_mm, unsigned long dst_start, unsigned long len, atomic_t *mmap_changing, uffd_flags_t flags); +extern ssize_t mfill_atomic_poison(struct mm_struct *dst_mm, unsigned long start, + unsigned long len, atomic_t *mmap_changing, + uffd_flags_t flags); extern int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start, unsigned long len, bool enable_wp, atomic_t *mmap_changing); diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index 66dd4cd277bd..62151706c5a3 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -39,7 +39,8 @@ UFFD_FEATURE_MINOR_SHMEM | \ UFFD_FEATURE_EXACT_ADDRESS | \ UFFD_FEATURE_WP_HUGETLBFS_SHMEM | \ - UFFD_FEATURE_WP_UNPOPULATED) + UFFD_FEATURE_WP_UNPOPULATED | \ + UFFD_FEATURE_POISON) #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -49,12 +50,14 @@ (__u64)1 << _UFFDIO_COPY | \ (__u64)1 << _UFFDIO_ZEROPAGE | \ (__u64)1 << _UFFDIO_WRITEPROTECT | \ - (__u64)1 << _UFFDIO_CONTINUE) + (__u64)1 << _UFFDIO_CONTINUE | \ + (__u64)1 << _UFFDIO_POISON) #define UFFD_API_RANGE_IOCTLS_BASIC \ ((__u64)1 << _UFFDIO_WAKE | \ (__u64)1 << _UFFDIO_COPY | \ + (__u64)1 << _UFFDIO_WRITEPROTECT | \ (__u64)1 << _UFFDIO_CONTINUE | \ - (__u64)1 << _UFFDIO_WRITEPROTECT) + (__u64)1 << _UFFDIO_POISON) /* * Valid ioctl command number range with this API is from 0x00 to @@ -71,6 +74,7 @@ #define _UFFDIO_ZEROPAGE (0x04) #define _UFFDIO_WRITEPROTECT (0x06) #define _UFFDIO_CONTINUE (0x07) +#define _UFFDIO_POISON (0x08) #define _UFFDIO_API (0x3F) /* userfaultfd ioctl ids */ @@ -91,6 +95,8 @@ struct uffdio_writeprotect) #define UFFDIO_CONTINUE _IOWR(UFFDIO, _UFFDIO_CONTINUE, \ struct uffdio_continue) +#define UFFDIO_POISON _IOWR(UFFDIO, _UFFDIO_POISON, \ + struct uffdio_poison) /* read() structure */ struct uffd_msg { @@ -225,6 +231,7 @@ struct uffdio_api { #define UFFD_FEATURE_EXACT_ADDRESS (1<<11) #define UFFD_FEATURE_WP_HUGETLBFS_SHMEM (1<<12) #define UFFD_FEATURE_WP_UNPOPULATED (1<<13) +#define UFFD_FEATURE_POISON (1<<14) __u64 features; __u64 ioctls; @@ -321,6 +328,18 @@ struct uffdio_continue { __s64 mapped; }; +struct uffdio_poison { + struct uffdio_range range; +#define UFFDIO_POISON_MODE_DONTWAKE ((__u64)1<<0) + __u64 mode; + + /* + * Fields below here are written by the ioctl and must be at the end: + * the copy_from_user will not read past here. + */ + __s64 updated; +}; + /* * Flags for the userfaultfd(2) system call itself. */ diff --git a/mm/memory.c b/mm/memory.c index d8a9a770b1f1..7fbda39e060d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3692,6 +3692,10 @@ static vm_fault_t handle_pte_marker(struct vm_fault *vmf) if (WARN_ON_ONCE(!marker)) return VM_FAULT_SIGBUS; + /* Poison emulation explicitly requested for this PTE. */ + if (marker & PTE_MARKER_UFFD_POISON) + return VM_FAULT_HWPOISON; + /* Higher priority than uffd-wp when data corrupted */ if (marker & PTE_MARKER_SWAPIN_ERROR) return VM_FAULT_SIGBUS; diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index a2bf37ee276d..87b62ca1e09e 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -286,6 +286,51 @@ static int mfill_atomic_pte_continue(pmd_t *dst_pmd, goto out; } +/* Handles UFFDIO_POISON for all non-hugetlb VMAs. */ +static int mfill_atomic_pte_poison(pmd_t *dst_pmd, + struct vm_area_struct *dst_vma, + unsigned long dst_addr, + uffd_flags_t flags) +{ + int ret; + struct mm_struct *dst_mm = dst_vma->vm_mm; + pte_t _dst_pte, *dst_pte; + spinlock_t *ptl; + + _dst_pte = make_pte_marker(PTE_MARKER_UFFD_POISON); + dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl); + + if (vma_is_shmem(dst_vma)) { + struct inode *inode; + pgoff_t offset, max_off; + + /* serialize against truncate with the page table lock */ + inode = dst_vma->vm_file->f_inode; + offset = linear_page_index(dst_vma, dst_addr); + max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + ret = -EFAULT; + if (unlikely(offset >= max_off)) + goto out_unlock; + } + + ret = -EEXIST; + /* + * For now, we don't handle unmapping pages, so only support filling in + * none PTEs, or replacing PTE markers. + */ + if (!pte_none_mostly(*dst_pte)) + goto out_unlock; + + set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte); + + /* No need to invalidate - it was non-present before */ + update_mmu_cache(dst_vma, dst_addr, dst_pte); + ret = 0; +out_unlock: + pte_unmap_unlock(dst_pte, ptl); + return ret; +} + static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address) { pgd_t *pgd; @@ -336,8 +381,12 @@ static __always_inline ssize_t mfill_atomic_hugetlb( * supported by hugetlb. A PMD_SIZE huge pages may exist as used * by THP. Since we can not reliably insert a zero page, this * feature is not supported. + * + * PTE marker handling for hugetlb is a bit special, so for now + * UFFDIO_POISON is not supported. */ - if (uffd_flags_mode_is(flags, MFILL_ATOMIC_ZEROPAGE)) { + if (uffd_flags_mode_is(flags, MFILL_ATOMIC_ZEROPAGE) || + uffd_flags_mode_is(flags, MFILL_ATOMIC_POISON)) { mmap_read_unlock(dst_mm); return -EINVAL; } @@ -481,6 +530,9 @@ static __always_inline ssize_t mfill_atomic_pte(pmd_t *dst_pmd, if (uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE)) { return mfill_atomic_pte_continue(dst_pmd, dst_vma, dst_addr, flags); + } else if (uffd_flags_mode_is(flags, MFILL_ATOMIC_POISON)) { + return mfill_atomic_pte_poison(dst_pmd, dst_vma, + dst_addr, flags); } /* @@ -702,6 +754,14 @@ ssize_t mfill_atomic_continue(struct mm_struct *dst_mm, unsigned long start, uffd_flags_set_mode(flags, MFILL_ATOMIC_CONTINUE)); } +ssize_t mfill_atomic_poison(struct mm_struct *dst_mm, unsigned long start, + unsigned long len, atomic_t *mmap_changing, + uffd_flags_t flags) +{ + return mfill_atomic(dst_mm, start, 0, len, mmap_changing, + uffd_flags_set_mode(flags, MFILL_ATOMIC_POISON)); +} + long uffd_wp_range(struct vm_area_struct *dst_vma, unsigned long start, unsigned long len, bool enable_wp) { -- 2.41.0.255.g8b1d071c50-goog

2 years, 6 months

5
22
0 0

2026

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-kselftest-mirror