- Linux-kselftest-mirror - lists.linaro.org

[PATCH AUTOSEL 6.14 17/23] selftests/bpf: Fix cap_enable_effective() return code

by Sasha Levin

From: Feng Yang <yangfeng(a)kylinos.cn> [ Upstream commit 339c1f8ea11cc042c30c315c1a8f61e4b8a90117 ] The caller of cap_enable_effective() expects negative error code. Fix it. Before: failed to restore CAP_SYS_ADMIN: -1, Unknown error -1 After: failed to restore CAP_SYS_ADMIN: -3, No such process failed to restore CAP_SYS_ADMIN: -22, Invalid argument Signed-off-by: Feng Yang <yangfeng(a)kylinos.cn> Acked-by: Eduard Zingerman <eddyz87(a)gmail.com> Link: https://lore.kernel.org/r/20250305022234.44932-1-yangfeng59949@163.com Signed-off-by: Alexei Starovoitov <ast(a)kernel.org> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/bpf/cap_helpers.c | 8 ++++---- tools/testing/selftests/bpf/cap_helpers.h | 1 + tools/testing/selftests/bpf/prog_tests/verifier.c | 4 ++-- tools/testing/selftests/bpf/test_loader.c | 6 +++--- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/bpf/cap_helpers.c b/tools/testing/selftests/bpf/cap_helpers.c index d5ac507401d7c..98f840c3a38f7 100644 --- a/tools/testing/selftests/bpf/cap_helpers.c +++ b/tools/testing/selftests/bpf/cap_helpers.c @@ -19,7 +19,7 @@ int cap_enable_effective(__u64 caps, __u64 *old_caps) err = capget(&hdr, data); if (err) - return err; + return -errno; if (old_caps) *old_caps = (__u64)(data[1].effective) << 32 | data[0].effective; @@ -32,7 +32,7 @@ int cap_enable_effective(__u64 caps, __u64 *old_caps) data[1].effective |= cap1; err = capset(&hdr, data); if (err) - return err; + return -errno; return 0; } @@ -49,7 +49,7 @@ int cap_disable_effective(__u64 caps, __u64 *old_caps) err = capget(&hdr, data); if (err) - return err; + return -errno; if (old_caps) *old_caps = (__u64)(data[1].effective) << 32 | data[0].effective; @@ -61,7 +61,7 @@ int cap_disable_effective(__u64 caps, __u64 *old_caps) data[1].effective &= ~cap1; err = capset(&hdr, data); if (err) - return err; + return -errno; return 0; } diff --git a/tools/testing/selftests/bpf/cap_helpers.h b/tools/testing/selftests/bpf/cap_helpers.h index 6d163530cb0fd..8dcb28557f762 100644 --- a/tools/testing/selftests/bpf/cap_helpers.h +++ b/tools/testing/selftests/bpf/cap_helpers.h @@ -4,6 +4,7 @@ #include <linux/types.h> #include <linux/capability.h> +#include <errno.h> #ifndef CAP_PERFMON #define CAP_PERFMON 38 diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 8a0e1ff8a2dc6..ecc320e045513 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -121,7 +121,7 @@ static void run_tests_aux(const char *skel_name, /* test_verifier tests are executed w/o CAP_SYS_ADMIN, do the same here */ err = cap_disable_effective(1ULL << CAP_SYS_ADMIN, &old_caps); if (err) { - PRINT_FAIL("failed to drop CAP_SYS_ADMIN: %i, %s\n", err, strerror(err)); + PRINT_FAIL("failed to drop CAP_SYS_ADMIN: %i, %s\n", err, strerror(-err)); return; } @@ -131,7 +131,7 @@ static void run_tests_aux(const char *skel_name, err = cap_enable_effective(old_caps, NULL); if (err) - PRINT_FAIL("failed to restore CAP_SYS_ADMIN: %i, %s\n", err, strerror(err)); + PRINT_FAIL("failed to restore CAP_SYS_ADMIN: %i, %s\n", err, strerror(-err)); } #define RUN(skel) run_tests_aux(#skel, skel##__elf_bytes, NULL) diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index 53b06647cf57d..8a403e5aa3145 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -773,7 +773,7 @@ static int drop_capabilities(struct cap_state *caps) err = cap_disable_effective(caps_to_drop, &caps->old_caps); if (err) { - PRINT_FAIL("failed to drop capabilities: %i, %s\n", err, strerror(err)); + PRINT_FAIL("failed to drop capabilities: %i, %s\n", err, strerror(-err)); return err; } @@ -790,7 +790,7 @@ static int restore_capabilities(struct cap_state *caps) err = cap_enable_effective(caps->old_caps, NULL); if (err) - PRINT_FAIL("failed to restore capabilities: %i, %s\n", err, strerror(err)); + PRINT_FAIL("failed to restore capabilities: %i, %s\n", err, strerror(-err)); caps->initialized = false; return err; } @@ -959,7 +959,7 @@ void run_subtest(struct test_loader *tester, if (subspec->caps) { err = cap_enable_effective(subspec->caps, NULL); if (err) { - PRINT_FAIL("failed to set capabilities: %i, %s\n", err, strerror(err)); + PRINT_FAIL("failed to set capabilities: %i, %s\n", err, strerror(-err)); goto subtest_cleanup; } } -- 2.39.5

6 months, 4 weeks

1
0
0 0

[PATCH AUTOSEL 6.14 02/23] selftests/bpf: Fix stdout race condition in traffic monitor

by Sasha Levin

From: Amery Hung <ameryhung(a)gmail.com> [ Upstream commit b99f27e90268b1a814c13f8bd72ea1db448ea257 ] Fix a race condition between the main test_progs thread and the traffic monitoring thread. The traffic monitor thread tries to print a line using multiple printf and use flockfile() to prevent the line from being torn apart. Meanwhile, the main thread doing io redirection can reassign or close stdout when going through tests. A deadlock as shown below can happen. main traffic_monitor_thread ==== ====================== show_transport() -> flockfile(stdout) stdio_hijack_init() -> stdout = open_memstream(log_buf, log_cnt); ... env.subtest_state->stdout_saved = stdout; ... funlockfile(stdout) stdio_restore_cleanup() -> fclose(env.subtest_state->stdout_saved); After the traffic monitor thread lock stdout, A new memstream can be assigned to stdout by the main thread. Therefore, the traffic monitor thread later will not be able to unlock the original stdout. As the main thread tries to access the old stdout, it will hang indefinitely as it is still locked by the traffic monitor thread. The deadlock can be reproduced by running test_progs repeatedly with traffic monitor enabled: for ((i=1;i<=100;i++)); do ./test_progs -a flow_dissector_skb* -m '*' done Fix this by only calling printf once and remove flockfile()/funlockfile(). Signed-off-by: Amery Hung <ameryhung(a)gmail.com> Signed-off-by: Martin KaFai Lau <martin.lau(a)kernel.org> Link: https://patch.msgid.link/20250213233217.553258-1-ameryhung@gmail.com Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/bpf/network_helpers.c | 33 ++++++++----------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 80844a5fb1fee..95e943270f359 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -771,12 +771,13 @@ static const char *pkt_type_str(u16 pkt_type) return "Unknown"; } +#define MAX_FLAGS_STRLEN 21 /* Show the information of the transport layer in the packet */ static void show_transport(const u_char *packet, u16 len, u32 ifindex, const char *src_addr, const char *dst_addr, u16 proto, bool ipv6, u8 pkt_type) { - char *ifname, _ifname[IF_NAMESIZE]; + char *ifname, _ifname[IF_NAMESIZE], flags[MAX_FLAGS_STRLEN] = ""; const char *transport_str; u16 src_port, dst_port; struct udphdr *udp; @@ -817,29 +818,21 @@ static void show_transport(const u_char *packet, u16 len, u32 ifindex, /* TCP or UDP*/ - flockfile(stdout); + if (proto == IPPROTO_TCP) + snprintf(flags, MAX_FLAGS_STRLEN, "%s%s%s%s", + tcp->fin ? ", FIN" : "", + tcp->syn ? ", SYN" : "", + tcp->rst ? ", RST" : "", + tcp->ack ? ", ACK" : ""); + if (ipv6) - printf("%-7s %-3s IPv6 %s.%d > %s.%d: %s, length %d", + printf("%-7s %-3s IPv6 %s.%d > %s.%d: %s, length %d%s\n", ifname, pkt_type_str(pkt_type), src_addr, src_port, - dst_addr, dst_port, transport_str, len); + dst_addr, dst_port, transport_str, len, flags); else - printf("%-7s %-3s IPv4 %s:%d > %s:%d: %s, length %d", + printf("%-7s %-3s IPv4 %s:%d > %s:%d: %s, length %d%s\n", ifname, pkt_type_str(pkt_type), src_addr, src_port, - dst_addr, dst_port, transport_str, len); - - if (proto == IPPROTO_TCP) { - if (tcp->fin) - printf(", FIN"); - if (tcp->syn) - printf(", SYN"); - if (tcp->rst) - printf(", RST"); - if (tcp->ack) - printf(", ACK"); - } - - printf("\n"); - funlockfile(stdout); + dst_addr, dst_port, transport_str, len, flags); } static void show_ipv6_packet(const u_char *packet, u32 ifindex, u8 pkt_type) -- 2.39.5

6 months, 4 weeks

1
0
0 0

[PATCH net-next] net/selftests: Add loopback link local route for self-connect

by Dmitry Safonov via B4 Relay

From: Dmitry Safonov <0x7f454c46(a)gmail.com> self-connect-ipv6 got slightly flaky on netdev: > # timeout set to 120 > # selftests: net/tcp_ao: self-connect_ipv6 > # 1..5 > # # 708[lib/setup.c:250] rand seed 1742872572 > # TAP version 13 > # # 708[lib/proc.c:213] Snmp6 Ip6OutNoRoutes: 0 => 1 > # not ok 1 # error 708[self-connect.c:70] failed to connect() > # ok 2 No unexpected trace events during the test run > # # Planned tests != run tests (5 != 2) > # # Totals: pass:1 fail:0 xfail:0 xpass:0 skip:0 error:1 > ok 1 selftests: net/tcp_ao: self-connect_ipv6 I can not reproduce it on my machines, but judging by "Ip6OutNoRoutes" there is no route to the local_addr (::1). Looking at the kernel code, I see that kernel does add link-local address automatically in init_loopback(), but that is called from ipv6 notifier block. So, in turn the userspace that brought up the loopback interface may see rtnetlink ACK earlier than addrconf_notify() does it's job (at least, on a slow VM such as netdev). Probably, for ipv4 it's the same, judging by inetdev_event(). The fix is quite simple: set the link-local route straight after bringing the loopback interface. That will make it synchronous. Signed-off-by: Dmitry Safonov <0x7f454c46(a)gmail.com> --- Sorry to send this during the merge window, it's a test stability fix. It seems that netdev build bot has hit the issue a couple of times, but seems not hitting it constantly at this moment: https://netdev.bots.linux.dev/flakes.html?br-cnt=150&tn-needle=tcp-ao I'm marking it net-next, so that build bot carries it until the merge closes. If it's not fine, I can re-send it after the merge window. --- tools/testing/selftests/net/tcp_ao/self-connect.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/net/tcp_ao/self-connect.c b/tools/testing/selftests/net/tcp_ao/self-connect.c index 73b2f2276f3f5410aaa74bede7f366f81761bd6e..2c73bea698a677f9aedd7bec28f6e7fee7845d2e 100644 --- a/tools/testing/selftests/net/tcp_ao/self-connect.c +++ b/tools/testing/selftests/net/tcp_ao/self-connect.c @@ -16,6 +16,9 @@ static void __setup_lo_intf(const char *lo_intf, if (link_set_up(lo_intf)) test_error("Failed to bring %s up", lo_intf); + + if (ip_route_add(lo_intf, TEST_FAMILY, local_addr, local_addr)) + test_error("Failed to add a local route %s", lo_intf); } static void setup_lo_intf(const char *lo_intf) --- base-commit: 1a9239bb4253f9076b5b4b2a1a4e8d7defd77a95 change-id: 20250402-tcp-ao-selfconnect-flake-e0aabc03c076 Best regards, -- Dmitry Safonov <0x7f454c46(a)gmail.com>

6 months, 4 weeks

2
1
0 0

[PATCH bpf-next 00/11] bpf: Mitigate Spectre v1 using barriers

by Luis Gerhorst

This improves the expressiveness of unprivileged BPF by inserting speculation barriers instead of rejecting the programs. The approach was previously presented at LPC'24 [1] and RAID'24 [2]. To mitigate the Spectre v1 (PHT) vulnerability, the kernel rejects potentially-dangerous unprivileged BPF programs as of commit 9183671af6db ("bpf: Fix leakage under speculation on mispredicted branches"). In [2], we have analyzed 364 object files from open source projects (Linux Samples and Selftests, BCC, Loxilb, Cilium, libbpf Examples, Parca, and Prevail) and found that this affects 31% to 54% of programs. To resolve this in the majority of cases this patchset adds a fall-back for mitigating Spectre v1 using speculation barriers. The kernel still optimistically attempts to verify all speculative paths but uses speculation barriers against v1 when unsafe behavior is detected. This allows for more programs to be accepted without disabling the BPF Spectre mitigations (e.g., by setting cpu_mitigations_off()). In [1] we have measured the overhead of this approach relative to having mitigations off and including the upstream Spectre v4 mitigations. For event tracing and stack-sampling profilers, we found that mitigations increase BPF program execution time by 0% to 62%. For the Loxilb network load balancer, we have measured a 14% slowdown in SCTP performance but no significant slowdown for TCP. This overhead only applies to programs that were previously rejected. I reran the expressiveness-evaluation with v6.14 and made sure the main results still match those from [1] and [2] (which used v6.5). Main design decisions are: * Do not use separate bytecode insns for v1 and v4 barriers. This simplifies the verifier significantly and has the only downside that performance on PowerPC is not as high as it could be. * Allow archs to still disable v1/v4 mitigations separately by setting bpf_jit_bypass_spec_v1/v4(). This has the benefit that archs can benefit from improved BPF expressiveness / performance if they are not vulnerable (e.g., ARM64 for v4 in the kernel). * Do not remove the empty BPF_NOSPEC implementation for backends for which it is unknown whether they are vulnerable to Spectre v1. [1] https://lpc.events/event/18/contributions/1954/ ("Mitigating Spectre-PHT using Speculation Barriers in Linux eBPF") [2] https://arxiv.org/pdf/2405.00078 ("VeriFence: Lightweight and Precise Spectre Defenses for Untrusted Linux Kernel Extensions") Changes: * RFC -> v1: - rebase to bpf-next-250313 - tests: mark expected successes/new errors - add bpt_jit_bypass_spec_v1/v4() to avoid #ifdef in bpf_bypass_spec_v1/v4() - ensure that nospec with v1-support is implemented for archs for which GCC supports speculation barriers, except for MIPS - arm64: emit speculation barrier - powerpc: change nospec to include v1 barrier - discuss potential security (archs that do not impl. BPF nospec) and performance (only PowerPC) regressions RFC: https://lore.kernel.org/bpf/20250224203619.594724-1-luis.gerhorst@fau.de/ Luis Gerhorst (11): bpf: Move insn if/else into do_check_insn() bpf: Return -EFAULT on misconfigurations bpf: Return -EFAULT on internal errors bpf, arm64, powerpc: Add bpf_jit_bypass_spec_v1/v4() bpf, arm64, powerpc: Change nospec to include v1 barrier bpf: Rename sanitize_stack_spill to nospec_result bpf: Fall back to nospec for Spectre v1 bpf: Allow nospec-protected var-offset stack access bpf: Return PTR_ERR from push_stack() bpf: Fall back to nospec for sanitization-failures bpf: Fall back to nospec for spec path verification arch/arm64/net/bpf_jit.h | 5 + arch/arm64/net/bpf_jit_comp.c | 28 +- arch/powerpc/net/bpf_jit_comp64.c | 79 +- include/linux/bpf.h | 11 +- include/linux/bpf_verifier.h | 3 +- include/linux/filter.h | 2 +- kernel/bpf/core.c | 32 +- kernel/bpf/verifier.c | 723 ++++++++++-------- .../selftests/bpf/progs/verifier_and.c | 3 +- .../selftests/bpf/progs/verifier_bounds.c | 35 +- .../bpf/progs/verifier_bounds_deduction.c | 43 +- .../selftests/bpf/progs/verifier_map_ptr.c | 12 +- .../selftests/bpf/progs/verifier_movsx.c | 6 +- .../selftests/bpf/progs/verifier_unpriv.c | 3 +- .../bpf/progs/verifier_value_ptr_arith.c | 50 +- .../selftests/bpf/verifier/dead_code.c | 3 +- tools/testing/selftests/bpf/verifier/jmp32.c | 33 +- tools/testing/selftests/bpf/verifier/jset.c | 10 +- 18 files changed, 630 insertions(+), 451 deletions(-) base-commit: 46d38f489ef02175dcff1e03a849c226eb0729a6 -- 2.48.1

6 months, 4 weeks

3
22
0 0

[PATCH v2 0/5] KVM: guest_memfd: support for uffd minor

by Nikita Kalyazin

This series is built on top of Fuad's v7 "mapping guest_memfd backed memory at the host" [1]. With James's KVM userfault [2], it is possible to handle stage-2 faults in guest_memfd in userspace. However, KVM itself also triggers faults in guest_memfd in some cases, for example: PV interfaces like kvmclock, PV EOI and page table walking code when fetching the MMIO instruction on x86. It was agreed in the guest_memfd upstream call on 23 Jan 2025 [3] that KVM would be accessing those pages via userspace page tables. In order for such faults to be handled in userspace, guest_memfd needs to support userfaultfd. Changes since v1 [4]: - James, Peter: implement a full minor trap instead of a hybrid missing/minor trap - James, Peter: to avoid shmem- and guest_memfd-specific code in the UFFDIO_CONTINUE implementation make it generic by calling vm_ops->fault() While generalising UFFDIO_CONTINUE implementation helped avoid guest_memfd-specific code in mm/userfaulfd, userfaultfd still needs access to KVM code to be able to verify the VMA type when handling UFFDIO_REGISTER_MODE_MINOR, so I used a similar approach to what Fuad did for now [5]. In v1, Peter was mentioning a potential for eliminating taking a folio lock [6]. I did not implement that, but according to my testing, the performance of shmem minor fault handling stayed the same after the migration to calling vm_ops->fault() (tested on an x86). Before: ./demand_paging_test -u MINOR -s shmem Random seed: 0x6b8b4567 Testing guest mode: PA-bits:ANY, VA-bits:48, 4K pages guest physical test memory: [0x3fffbffff000, 0x3ffffffff000) Finished creating vCPUs and starting uffd threads Started all vCPUs All vCPU threads joined Total guest execution time: 10.979277020s Per-vcpu demand paging rate: 23876.253375 pgs/sec/vcpu Overall demand paging rate: 23876.253375 pgs/sec After: ./demand_paging_test -u MINOR -s shmem Random seed: 0x6b8b4567 Testing guest mode: PA-bits:ANY, VA-bits:48, 4K pages guest physical test memory: [0x3fffbffff000, 0x3ffffffff000) Finished creating vCPUs and starting uffd threads Started all vCPUs All vCPU threads joined Total guest execution time: 10.978893504s Per-vcpu demand paging rate: 23877.087423 pgs/sec/vcpu Overall demand paging rate: 23877.087423 pgs/sec Nikita [1] https://lore.kernel.org/kvm/20250318161823.4005529-1-tabba@google.com/T/ [2] https://lore.kernel.org/kvm/20250109204929.1106563-1-jthoughton@google.com/… [3] https://docs.google.com/document/d/1M6766BzdY1Lhk7LiR5IqVR8B8mG3cr-cxTxOrAo… [4] https://lore.kernel.org/kvm/20250303133011.44095-1-kalyazin@amazon.com/T/ [5] https://lore.kernel.org/kvm/20250318161823.4005529-1-tabba@google.com/T/#Z2… [6] https://lore.kernel.org/kvm/20250303133011.44095-1-kalyazin@amazon.com/T/#m… Nikita Kalyazin (5): mm: userfaultfd: generic continue for non hugetlbfs KVM: guest_memfd: add kvm_gmem_vma_is_gmem mm: userfaultfd: allow to register continue for guest_memfd KVM: guest_memfd: add support for userfaultfd minor KVM: selftests: test userfaultfd minor for guest_memfd include/linux/mm_types.h | 3 + include/linux/userfaultfd_k.h | 13 ++- mm/hugetlb.c | 2 +- mm/shmem.c | 3 +- mm/userfaultfd.c | 25 +++-- .../testing/selftests/kvm/guest_memfd_test.c | 94 +++++++++++++++++++ virt/kvm/guest_memfd.c | 15 +++ virt/kvm/kvm_mm.h | 1 + 8 files changed, 146 insertions(+), 10 deletions(-) base-commit: 3cc51efc17a2c41a480eed36b31c1773936717e0 -- 2.47.1

6 months, 4 weeks

2
11
0 0

[PATCH v2] selftests: riscv: fix v_exec_initval_nolibc.c

by Ignacio Encinas

Vector registers are zero initialized by the kernel. Stop accepting "all ones" as a clean value. Note that this was not working as expected given that value == 0xff can be assumed to be always false by the compiler as value's range is [-128, 127]. Both GCC (-Wtype-limits) and clang (-Wtautological-constant-out-of-range-compare) warn about this. Reviewed-by: Charlie Jenkins <charlie(a)rivosinc.com> Tested-by: Charlie Jenkins <charlie(a)rivosinc.com> Signed-off-by: Ignacio Encinas <ignacio(a)iencinas.com> --- Changes in v2: Remove code that becomes useless now that the only "clean" value for vector registers is 0. - Link to v1: https://lore.kernel.org/r/20250305-fix-v_exec_initval_nolibc-v1-1-b87b60e43… --- tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c b/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c index 35c0812e32de0c82a54f84bd52c4272507121e35..4dde05e45a04122b566cedc36d20b072413b00e2 100644 --- a/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c +++ b/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c @@ -6,7 +6,7 @@ * the values. To further ensure consistency, this file is compiled without * libc and without auto-vectorization. * - * To be "clean" all values must be either all ones or all zeroes. + * To be "clean" all values must be all zeroes. */ #define __stringify_1(x...) #x @@ -14,9 +14,8 @@ int main(int argc, char **argv) { - char prev_value = 0, value; + char value = 0; unsigned long vl; - int first = 1; if (argc > 2 && strcmp(argv[2], "x")) asm volatile ( @@ -44,14 +43,11 @@ int main(int argc, char **argv) "vsrl.vi " __stringify(register) ", " __stringify(register) ", 8\n\t" \ ".option pop\n\t" \ : "=r" (value)); \ - if (first) { \ - first = 0; \ - } else if (value != prev_value || !(value == 0x00 || value == 0xff)) { \ + if (value != 0x00) { \ printf("Register " __stringify(register) \ " values not clean! value: %u\n", value); \ exit(-1); \ } \ - prev_value = value; \ } \ }) --- base-commit: 03d38806a902b36bf364cae8de6f1183c0a35a67 change-id: 20250301-fix-v_exec_initval_nolibc-498d976c372d Best regards, -- Ignacio Encinas <ignacio(a)iencinas.com>

6 months, 4 weeks

4
3
0 0

[PATCH 00/10] cgroup/cpuset: Miscellaneous partition bug fixes and enhancements

by Waiman Long

This patch series fixes a number of bugs in the cpuset partition code as well as improvement in remote partition handling. The test_cpuset_prs.sh is also enhanced to allow more vigorous remote partition testing. Waiman Long (10): cgroup/cpuset: Fix race between newly created partition and dying one cgroup/cpuset: Fix incorrect isolated_cpus update in update_parent_effective_cpumask() cgroup/cpuset: Fix error handling in remote_partition_disable() cgroup/cpuset: Remove remote_partition_check() & make update_cpumasks_hier() handle remote partition cgroup/cpuset: Don't allow creation of local partition over a remote one cgroup/cpuset: Code cleanup and comment update cgroup/cpuset: Remove unneeded goto in sched_partition_write() and rename it selftest/cgroup: Update test_cpuset_prs.sh to use | as effective CPUs and state separator selftest/cgroup: Clean up and restructure test_cpuset_prs.sh selftest/cgroup: Add a remote partition transition test to test_cpuset_prs.sh include/linux/cgroup-defs.h | 1 + include/linux/cgroup.h | 2 +- kernel/cgroup/cgroup.c | 6 + kernel/cgroup/cpuset-internal.h | 1 + kernel/cgroup/cpuset.c | 401 +++++++----- .../selftests/cgroup/test_cpuset_prs.sh | 617 ++++++++++++------ 6 files changed, 649 insertions(+), 379 deletions(-) -- 2.48.1

6 months, 4 weeks

4
23
0 0

[RFC PATCH security-next 0/4] Introducing Hornet LSM

by Blaise Boscaccy

This patch series introduces the Hornet LSM. Hornet takes a simple approach to light-skeleton-based eBPF signature verification. Signature data can be easily generated for the binary data that is generated via bpftool gen -L. This signature can be appended to a skeleton executable via scripts/sign-ebpf. Hornet checks the signature against a binary buffer containing the lskel instructions that the loader maps use. Maps are frozen to prevent TOCTOU bugs where a sufficiently privileged user could rewrite map data between the calls to BPF_PROG_LOAD and BPF_PROG_RUN. Additionally, both sparse-array-based and fd_array_cnt-based map fd arrays are supported for signature verification. Blaise Boscaccy (4): security: Hornet LSM hornet: Introduce sign-ebpf hornet: Add an example lskel data extactor script selftests/hornet: Add a selftest for the hornet LSM Documentation/admin-guide/LSM/Hornet.rst | 51 +++ crypto/asymmetric_keys/pkcs7_verify.c | 10 + include/linux/kernel_read_file.h | 1 + include/linux/verification.h | 1 + include/uapi/linux/lsm.h | 1 + scripts/Makefile | 1 + scripts/hornet/Makefile | 5 + scripts/hornet/extract-skel.sh | 29 ++ scripts/hornet/sign-ebpf.c | 420 +++++++++++++++++++ security/Kconfig | 3 +- security/Makefile | 1 + security/hornet/Kconfig | 11 + security/hornet/Makefile | 4 + security/hornet/hornet_lsm.c | 239 +++++++++++ tools/testing/selftests/Makefile | 1 + tools/testing/selftests/hornet/Makefile | 51 +++ tools/testing/selftests/hornet/loader.c | 21 + tools/testing/selftests/hornet/trivial.bpf.c | 33 ++ 18 files changed, 882 insertions(+), 1 deletion(-) create mode 100644 Documentation/admin-guide/LSM/Hornet.rst create mode 100644 scripts/hornet/Makefile create mode 100755 scripts/hornet/extract-skel.sh create mode 100644 scripts/hornet/sign-ebpf.c create mode 100644 security/hornet/Kconfig create mode 100644 security/hornet/Makefile create mode 100644 security/hornet/hornet_lsm.c create mode 100644 tools/testing/selftests/hornet/Makefile create mode 100644 tools/testing/selftests/hornet/loader.c create mode 100644 tools/testing/selftests/hornet/trivial.bpf.c -- 2.48.1

6 months, 4 weeks

5
22
0 0

[PATCH v3] selftests/mm: Convert page_size to unsigned long

by Siddarth G

Cppcheck warning: int result is assigned to long long variable. If the variable is long long to avoid loss of information, then you have loss of information. This patch changes the type of page_size from 'unsigned int' to 'unsigned long' instead of using ULL suffixes. Changing hpage_size to 'unsigned long' was considered, but since gethugepage() expects an int, this change was avoided. Reported-by: David Binderman <dcb314(a)hotmail.com> Closes: https://lore.kernel.org/all/AS8PR02MB10217315060BBFDB21F19643E9CA62@AS8PR02… Signed-off-by: Siddarth G <siddarthsgml(a)gmail.com> --- Changes since v2: - v2 had conflict with current mainline, so this is a fresh patch tools/testing/selftests/mm/pagemap_ioctl.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c index 57b4bba2b45f..fe5ae8b25ff6 100644 --- a/tools/testing/selftests/mm/pagemap_ioctl.c +++ b/tools/testing/selftests/mm/pagemap_ioctl.c @@ -34,7 +34,7 @@ #define PAGEMAP "/proc/self/pagemap" int pagemap_fd; int uffd; -unsigned int page_size; +unsigned long page_size; unsigned int hpage_size; const char *progname; @@ -184,7 +184,7 @@ void *gethugetlb_mem(int size, int *shmid) int userfaultfd_tests(void) { - int mem_size, vec_size, written, num_pages = 16; + long mem_size, vec_size, written, num_pages = 16; char *mem, *vec; mem_size = num_pages * page_size; @@ -213,7 +213,7 @@ int userfaultfd_tests(void) written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, vec_size - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (written < 0) - ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", written, errno, strerror(errno)); ksft_test_result(written == 0, "%s all new pages must not be written (dirty)\n", __func__); @@ -995,7 +995,7 @@ int unmapped_region_tests(void) { void *start = (void *)0x10000000; int written, len = 0x00040000; - int vec_size = len / page_size; + long vec_size = len / page_size; struct page_region *vec = malloc(sizeof(struct page_region) * vec_size); /* 1. Get written pages */ @@ -1051,7 +1051,7 @@ static void test_simple(void) int sanity_tests(void) { unsigned long long mem_size, vec_size; - int ret, fd, i, buf_size; + long ret, fd, i, buf_size; struct page_region *vec; char *mem, *fmem; struct stat sbuf; @@ -1160,7 +1160,7 @@ int sanity_tests(void) ret = stat(progname, &sbuf); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); fmem = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (fmem == MAP_FAILED) -- 2.43.0

6 months, 4 weeks

1
0
0 0

[PATCH v2] ublk: improve detection and handling of ublk server exit

by Uday Shankar

There are currently two ways in which ublk server exit is detected by ublk_drv: 1. uring_cmd cancellation. If there are any outstanding uring_cmds which have not been completed to the ublk server when it exits, io_uring calls the uring_cmd callback with a special cancellation flag as the issuing task is exiting. 2. I/O timeout. This is needed in addition to the above to handle the "saturated queue" case, when all I/Os for a given queue are in the ublk server, and therefore there are no outstanding uring_cmds to cancel when the ublk server exits. There are a couple of issues with this approach: - It is complex and inelegant to have two methods to detect the same condition - The second method detects ublk server exit only after a long delay (~30s, the default timeout assigned by the block layer). This delays the nosrv behavior from kicking in and potential subsequent recovery of the device. The second issue is brought to light with the new test_generic_04. It fails before this fix: selftests: ublk: test_generic_04.sh dev id is 0 dd: error writing '/dev/ublkb0': Input/output error 1+0 records in 0+0 records out 0 bytes copied, 30.0611 s, 0.0 kB/s DEAD dd took 31 seconds to exit (>= 5s tolerance)! generic_04 : [FAIL] Fix this by instead detecting and handling ublk server exit in the character file release callback. This has several advantages: - This one place can handle both saturated and unsaturated queues. Thus, it replaces both preexisting methods of detecting ublk server exit. - It runs quickly on ublk server exit - there is no 30s delay. - It starts the process of removing task references in ublk_drv. This is needed if we want to relax restrictions in the driver like letting only one thread serve each queue There is also the disadvantage that the character file release callback can also be triggered by intentional close of the file, which is a significant behavior change. Preexisting ublk servers (libublksrv) are dependent on the ability to open/close the file multiple times. To address this, only transition to a nosrv state if the file is released while the ublk device is live. This allows for programs to open/close the file multiple times during setup. It is still a behavior change if a ublk server decides to close/reopen the file while the device is LIVE (i.e. while it is responsible for serving I/O), but that would be highly unusual. This behavior is in line with what is done by FUSE, which is very similar to ublk in that a userspace daemon is providing services traditionally provided by the kernel. With this change in, the new test (and all other selftests, and all ublksrv tests) pass: selftests: ublk: test_generic_04.sh dev id is 0 dd: error writing '/dev/ublkb0': Input/output error 1+0 records in 0+0 records out 0 bytes copied, 0.0376731 s, 0.0 kB/s DEAD generic_04 : [PASS] Signed-off-by: Uday Shankar <ushankar(a)purestorage.com> --- Changes in v2: - Leave null ublk selftests target untouched, instead create new fault_inject target for injecting per-I/O delay (Ming Lei) - Allow multiple open/close of ublk character device with some restrictions - Drop patches which made it in separately at https://lore.kernel.org/r/20250401-ublk_selftests-v1-1-98129c9bc8bb@puresto… - Consolidate more nosrv logic in ublk character device release, and associated code cleanup - Link to v1: https://lore.kernel.org/r/20250325-ublk_timeout-v1-0-262f0121a7bd@purestora… --- drivers/block/ublk_drv.c | 187 +++++++----------------- tools/testing/selftests/ublk/Makefile | 4 +- tools/testing/selftests/ublk/fault_inject.c | 58 ++++++++ tools/testing/selftests/ublk/kublk.c | 6 +- tools/testing/selftests/ublk/kublk.h | 4 + tools/testing/selftests/ublk/test_generic_04.sh | 43 ++++++ 6 files changed, 165 insertions(+), 137 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 2fd05c1bd30b03343cb6f357f8c08dd92ff47af9..d06f8a9aa23f8b846928247fc9e29002c10a49e3 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -162,7 +162,6 @@ struct ublk_queue { bool force_abort; bool timeout; - bool canceling; bool fail_io; /* copy of dev->state == UBLK_S_DEV_FAIL_IO */ unsigned short nr_io_ready; /* how many ios setup */ spinlock_t cancel_lock; @@ -199,8 +198,6 @@ struct ublk_device { struct completion completion; unsigned int nr_queues_ready; unsigned int nr_privileged_daemon; - - struct work_struct nosrv_work; }; /* header of ublk_params */ @@ -209,8 +206,9 @@ struct ublk_params_header { __u32 types; }; -static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq); - +static void ublk_stop_dev_unlocked(struct ublk_device *ub); +static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq); +static void __ublk_quiesce_dev(struct ublk_device *ub); static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, struct ublk_queue *ubq, int tag, size_t offset); static inline unsigned int ublk_req_build_flags(struct request *req); @@ -1314,8 +1312,6 @@ static void ublk_queue_cmd_list(struct ublk_queue *ubq, struct rq_list *l) static enum blk_eh_timer_return ublk_timeout(struct request *rq) { struct ublk_queue *ubq = rq->mq_hctx->driver_data; - unsigned int nr_inflight = 0; - int i; if (ubq->flags & UBLK_F_UNPRIVILEGED_DEV) { if (!ubq->timeout) { @@ -1326,26 +1322,6 @@ static enum blk_eh_timer_return ublk_timeout(struct request *rq) return BLK_EH_DONE; } - if (!ubq_daemon_is_dying(ubq)) - return BLK_EH_RESET_TIMER; - - for (i = 0; i < ubq->q_depth; i++) { - struct ublk_io *io = &ubq->ios[i]; - - if (!(io->flags & UBLK_IO_FLAG_ACTIVE)) - nr_inflight++; - } - - /* cancelable uring_cmd can't help us if all commands are in-flight */ - if (nr_inflight == ubq->q_depth) { - struct ublk_device *ub = ubq->dev; - - if (ublk_abort_requests(ub, ubq)) { - schedule_work(&ub->nosrv_work); - } - return BLK_EH_DONE; - } - return BLK_EH_RESET_TIMER; } @@ -1368,9 +1344,6 @@ static blk_status_t ublk_prep_req(struct ublk_queue *ubq, struct request *rq) if (ublk_nosrv_should_queue_io(ubq) && unlikely(ubq->force_abort)) return BLK_STS_IOERR; - if (unlikely(ubq->canceling)) - return BLK_STS_IOERR; - /* fill iod to slot in io cmd buffer */ res = ublk_setup_iod(ubq, rq); if (unlikely(res != BLK_STS_OK)) @@ -1391,16 +1364,6 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, if (res != BLK_STS_OK) return res; - /* - * ->canceling has to be handled after ->force_abort and ->fail_io - * is dealt with, otherwise this request may not be failed in case - * of recovery, and cause hang when deleting disk - */ - if (unlikely(ubq->canceling)) { - __ublk_abort_rq(ubq, rq); - return BLK_STS_OK; - } - ublk_queue_cmd(ubq, rq); return BLK_STS_OK; } @@ -1461,8 +1424,52 @@ static int ublk_ch_open(struct inode *inode, struct file *filp) static int ublk_ch_release(struct inode *inode, struct file *filp) { struct ublk_device *ub = filp->private_data; + int i; + mutex_lock(&ub->mutex); + /* + * If the device is not live, we will not transition to a nosrv + * state. This protects against: + * - accidental poking of the ublk character device + * - some ublk servers which may open/close the ublk character + * device during startup + */ + if (ub->dev_info.state != UBLK_S_DEV_LIVE) + goto out; + + /* + * Since we are releasing the ublk character file descriptor, we + * know that there cannot be any concurrent file-related + * activity (e.g. uring_cmds or reads/writes). However, I/O + * might still be getting dispatched. Quiesce that too so that + * we don't need to worry about anything concurrent + */ + blk_mq_quiesce_queue(ub->ub_disk->queue); + + /* + * Handle any requests outstanding to the ublk server + */ + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) + ublk_abort_queue(ub, ublk_get_queue(ub, i)); + + /* + * Transition the device to the nosrv state. What exactly this + * means depends on the recovery flags + */ + if (ublk_nosrv_should_stop_dev(ub)) { + ublk_stop_dev_unlocked(ub); + } else if (ublk_nosrv_dev_should_queue_io(ub)) { + __ublk_quiesce_dev(ub); + } else { + ub->dev_info.state = UBLK_S_DEV_FAIL_IO; + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) + ublk_get_queue(ub, i)->fail_io = true; + } + + blk_mq_unquiesce_queue(ub->ub_disk->queue); +out: clear_bit(UB_STATE_OPEN, &ub->state); + mutex_unlock(&ub->mutex); return 0; } @@ -1556,57 +1563,6 @@ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq) } } -/* Must be called when queue is frozen */ -static bool ublk_mark_queue_canceling(struct ublk_queue *ubq) -{ - bool canceled; - - spin_lock(&ubq->cancel_lock); - canceled = ubq->canceling; - if (!canceled) - ubq->canceling = true; - spin_unlock(&ubq->cancel_lock); - - return canceled; -} - -static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq) -{ - bool was_canceled = ubq->canceling; - struct gendisk *disk; - - if (was_canceled) - return false; - - spin_lock(&ub->lock); - disk = ub->ub_disk; - if (disk) - get_device(disk_to_dev(disk)); - spin_unlock(&ub->lock); - - /* Our disk has been dead */ - if (!disk) - return false; - - /* - * Now we are serialized with ublk_queue_rq() - * - * Make sure that ubq->canceling is set when queue is frozen, - * because ublk_queue_rq() has to rely on this flag for avoiding to - * touch completed uring_cmd - */ - blk_mq_quiesce_queue(disk->queue); - was_canceled = ublk_mark_queue_canceling(ubq); - if (!was_canceled) { - /* abort queue is for making forward progress */ - ublk_abort_queue(ub, ubq); - } - blk_mq_unquiesce_queue(disk->queue); - put_device(disk_to_dev(disk)); - - return !was_canceled; -} - static void ublk_cancel_cmd(struct ublk_queue *ubq, struct ublk_io *io, unsigned int issue_flags) { @@ -1635,8 +1591,6 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd, struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); struct ublk_queue *ubq = pdu->ubq; struct task_struct *task; - struct ublk_device *ub; - bool need_schedule; struct ublk_io *io; if (WARN_ON_ONCE(!ubq)) @@ -1649,16 +1603,9 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd, if (WARN_ON_ONCE(task && task != ubq->ubq_daemon)) return; - ub = ubq->dev; - need_schedule = ublk_abort_requests(ub, ubq); - io = &ubq->ios[pdu->tag]; WARN_ON_ONCE(io->cmd != cmd); ublk_cancel_cmd(ubq, io, issue_flags); - - if (need_schedule) { - schedule_work(&ub->nosrv_work); - } } static inline bool ublk_queue_ready(struct ublk_queue *ubq) @@ -1756,13 +1703,13 @@ static struct gendisk *ublk_detach_disk(struct ublk_device *ub) return disk; } -static void ublk_stop_dev(struct ublk_device *ub) +static void ublk_stop_dev_unlocked(struct ublk_device *ub) + __must_hold(&ub->mutex) { struct gendisk *disk; - mutex_lock(&ub->mutex); if (ub->dev_info.state == UBLK_S_DEV_DEAD) - goto unlock; + return; if (ublk_nosrv_dev_should_queue_io(ub)) { if (ub->dev_info.state == UBLK_S_DEV_LIVE) __ublk_quiesce_dev(ub); @@ -1771,38 +1718,12 @@ static void ublk_stop_dev(struct ublk_device *ub) del_gendisk(ub->ub_disk); disk = ublk_detach_disk(ub); put_disk(disk); - unlock: - mutex_unlock(&ub->mutex); - ublk_cancel_dev(ub); } -static void ublk_nosrv_work(struct work_struct *work) +static void ublk_stop_dev(struct ublk_device *ub) { - struct ublk_device *ub = - container_of(work, struct ublk_device, nosrv_work); - int i; - - if (ublk_nosrv_should_stop_dev(ub)) { - ublk_stop_dev(ub); - return; - } - mutex_lock(&ub->mutex); - if (ub->dev_info.state != UBLK_S_DEV_LIVE) - goto unlock; - - if (ublk_nosrv_dev_should_queue_io(ub)) { - __ublk_quiesce_dev(ub); - } else { - blk_mq_quiesce_queue(ub->ub_disk->queue); - ub->dev_info.state = UBLK_S_DEV_FAIL_IO; - for (i = 0; i < ub->dev_info.nr_hw_queues; i++) { - ublk_get_queue(ub, i)->fail_io = true; - } - blk_mq_unquiesce_queue(ub->ub_disk->queue); - } - - unlock: + ublk_stop_dev_unlocked(ub); mutex_unlock(&ub->mutex); ublk_cancel_dev(ub); } @@ -2388,7 +2309,6 @@ static void ublk_remove(struct ublk_device *ub) bool unprivileged; ublk_stop_dev(ub); - cancel_work_sync(&ub->nosrv_work); cdev_device_del(&ub->cdev, &ub->cdev_dev); unprivileged = ub->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV; ublk_put_device(ub); @@ -2675,7 +2595,6 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) goto out_unlock; mutex_init(&ub->mutex); spin_lock_init(&ub->lock); - INIT_WORK(&ub->nosrv_work, ublk_nosrv_work); ret = ublk_alloc_dev_number(ub, header->dev_id); if (ret < 0) @@ -2807,7 +2726,6 @@ static inline void ublk_ctrl_cmd_dump(struct io_uring_cmd *cmd) static int ublk_ctrl_stop_dev(struct ublk_device *ub) { ublk_stop_dev(ub); - cancel_work_sync(&ub->nosrv_work); return 0; } @@ -2927,7 +2845,6 @@ static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq) /* We have to reset it to NULL, otherwise ub won't accept new FETCH_REQ */ ubq->ubq_daemon = NULL; ubq->timeout = false; - ubq->canceling = false; for (i = 0; i < ubq->q_depth; i++) { struct ublk_io *io = &ubq->ios[i]; diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile index c7781efea0f33c02f340f90f547d3a37c1d1b8a0..afee027cccdd1b8f13f1cb9a90a3348cd54b18bc 100644 --- a/tools/testing/selftests/ublk/Makefile +++ b/tools/testing/selftests/ublk/Makefile @@ -6,6 +6,7 @@ LDLIBS += -lpthread -lm -luring TEST_PROGS := test_generic_01.sh TEST_PROGS += test_generic_02.sh TEST_PROGS += test_generic_03.sh +TEST_PROGS += test_generic_04.sh TEST_PROGS += test_null_01.sh TEST_PROGS += test_null_02.sh @@ -26,7 +27,8 @@ TEST_GEN_PROGS_EXTENDED = kublk include ../lib.mk -$(TEST_GEN_PROGS_EXTENDED): kublk.c null.c file_backed.c common.c stripe.c +$(TEST_GEN_PROGS_EXTENDED): kublk.c null.c file_backed.c common.c stripe.c \ + fault_inject.c check: shellcheck -x -f gcc *.sh diff --git a/tools/testing/selftests/ublk/fault_inject.c b/tools/testing/selftests/ublk/fault_inject.c new file mode 100644 index 0000000000000000000000000000000000000000..e92d01e88e478a23df987ebff2a997212b831d31 --- /dev/null +++ b/tools/testing/selftests/ublk/fault_inject.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Fault injection ublk target. Hack this up however you like for + * testing specific behaviors of ublk_drv. Currently is a null target + * with a configurable delay before completing each I/O. This delay can + * be used to test ublk_drv's handling of I/O outstanding to the ublk + * server when it dies. + */ + +#include "kublk.h" + +static int ublk_fault_inject_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) +{ + const struct ublksrv_ctrl_dev_info *info = &dev->dev_info; + unsigned long dev_size = 250UL << 30; + + dev->tgt.dev_size = dev_size; + dev->tgt.params = (struct ublk_params) { + .types = UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DMA_ALIGN | + UBLK_PARAM_TYPE_SEGMENT, + .basic = { + .logical_bs_shift = 9, + .physical_bs_shift = 12, + .io_opt_shift = 12, + .io_min_shift = 9, + .max_sectors = info->max_io_buf_bytes >> 9, + .dev_sectors = dev_size >> 9, + }, + .dma = { + .alignment = 4095, + }, + .seg = { + .seg_boundary_mask = 4095, + .max_segment_size = 32 << 10, + .max_segments = 32, + }, + }; + + dev->private_data = (void *)ctx->delay_us; + return 0; +} + +static int ublk_fault_inject_queue_io(struct ublk_queue *q, int tag) +{ + const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); + + usleep((unsigned long)q->dev->private_data); + + ublk_complete_io(q, tag, iod->nr_sectors << 9); + return 0; +} + +const struct ublk_tgt_ops fault_inject_tgt_ops = { + .name = "fault_inject", + .init_tgt = ublk_fault_inject_tgt_init, + .queue_io = ublk_fault_inject_queue_io, +}; diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 91c282bc767449a418cce7fc816dc8e9fc732d6a..0fbfa43864453471219703451271540d5dfef593 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -10,6 +10,7 @@ static const struct ublk_tgt_ops *tgt_ops_list[] = { &null_tgt_ops, &loop_tgt_ops, &stripe_tgt_ops, + &fault_inject_tgt_ops, }; static const struct ublk_tgt_ops *ublk_find_tgt(const char *name) @@ -1041,7 +1042,7 @@ static int cmd_dev_get_features(void) static int cmd_dev_help(char *exe) { - printf("%s add -t [null|loop] [-q nr_queues] [-d depth] [-n dev_id] [backfile1] [backfile2] ...\n", exe); + printf("%s add -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id] [backfile1] [backfile2] ...\n", exe); printf("\t default: nr_queues=2(max 4), depth=128(max 128), dev_id=-1(auto allocation)\n"); printf("%s del [-n dev_id] -a \n", exe); printf("\t -a delete all devices -n delete specified device\n"); @@ -1064,6 +1065,7 @@ int main(int argc, char *argv[]) { "zero_copy", 0, NULL, 'z' }, { "foreground", 0, NULL, 0 }, { "chunk_size", 1, NULL, 0 }, + { "delay_us", 1, NULL, 0 }, { 0, 0, 0, 0 } }; int option_idx, opt; @@ -1112,6 +1114,8 @@ int main(int argc, char *argv[]) ctx.fg = 1; if (!strcmp(longopts[option_idx].name, "chunk_size")) ctx.chunk_size = strtol(optarg, NULL, 10); + if (!strcmp(longopts[option_idx].name, "delay_us")) + ctx.delay_us = strtoul(optarg, NULL, 10); } } diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 760ff8ffb8107037a19a8fb7ab408818845e010d..3750e67727eed89991158add49d30615ea012dae 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -70,6 +70,9 @@ struct dev_ctx { /* stripe */ unsigned int chunk_size; + /* fault_inject */ + unsigned long delay_us; + int _evtfd; }; @@ -357,6 +360,7 @@ static inline int ublk_queue_use_zc(const struct ublk_queue *q) extern const struct ublk_tgt_ops null_tgt_ops; extern const struct ublk_tgt_ops loop_tgt_ops; extern const struct ublk_tgt_ops stripe_tgt_ops; +extern const struct ublk_tgt_ops fault_inject_tgt_ops; void backing_file_tgt_deinit(struct ublk_dev *dev); int backing_file_tgt_init(struct ublk_dev *dev); diff --git a/tools/testing/selftests/ublk/test_generic_04.sh b/tools/testing/selftests/ublk/test_generic_04.sh new file mode 100755 index 0000000000000000000000000000000000000000..48af48164aa444d8ac6a58fef1743d2a16a56a14 --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_04.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_04" +ERR_CODE=0 + +_prep_test "fault_inject" "fast cleanup when all I/Os of one hctx are in server" + +# configure ublk server to sleep 2s before completing each I/O +dev_id=$(_add_ublk_dev -t fault_inject -q 2 -d 1 --delay_us 2000000) +_check_add_dev $TID $? + +echo "dev id is ${dev_id}" + +STARTTIME=${SECONDS} + +dd if=/dev/urandom of=/dev/ublkb${dev_id} oflag=direct bs=4k count=1 & +dd_pid=$! + +__ublk_kill_daemon ${dev_id} "DEAD" + +wait $dd_pid +dd_exitcode=$? + +ENDTIME=${SECONDS} +ELAPSED=$(($ENDTIME - $STARTTIME)) + +# assert that dd sees an error and exits quickly after ublk server is +# killed. previously this relied on seeing an I/O timeout and so would +# take ~30s +if [ $dd_exitcode -eq 0 ]; then + echo "dd unexpectedly exited successfully!" + ERR_CODE=255 +fi +if [ $ELAPSED -ge 5 ]; then + echo "dd took $ELAPSED seconds to exit (>= 5s tolerance)!" + ERR_CODE=255 +fi + +_cleanup_test "fault_inject" +_show_result $TID $ERR_CODE --- base-commit: 710e2c687a16b28a873a282517a85faf02a8b7cc change-id: 20250325-ublk_timeout-b06b9b51c591 Best regards, -- Uday Shankar <ushankar(a)purestorage.com>

7 months

2
1
0 0

[PATCH net v2] selftests: tc-testing: fix nat regex matching

by Pedro Tammela

In iproute 6.14, the nat ip mask logic was fixed to remove an undefined behaviour[1]. So now instead of reporting '0.0.0.0/32' on x86 and potentially '0.0.0.0/0' in other platforms, it reports '0.0.0.0/0' in all platforms. [1] https://lore.kernel.org/netdev/20250306112520.188728-1-torben.nielsen@preva… Reviewed-by: Simon Horman <horms(a)kernel.org> Signed-off-by: Pedro Tammela <pctammela(a)mojatatu.com> --- v2: Add reference to iproute2 commit and collect Simon's reviewed-by --- .../selftests/tc-testing/tc-tests/actions/nat.json | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json b/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json index ee2792998c89..4f21aeb8a3fb 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json @@ -305,7 +305,7 @@ "cmdUnderTest": "$TC actions add action nat ingress default 10.10.10.1 index 12", "expExitCode": "0", "verifyCmd": "$TC actions get action nat index 12", - "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/32 10.10.10.1 pass.*index 12 ref", + "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/0 10.10.10.1 pass.*index 12 ref", "matchCount": "1", "teardown": [ "$TC actions flush action nat" @@ -332,7 +332,7 @@ "cmdUnderTest": "$TC actions add action nat ingress any 10.10.10.1 index 12", "expExitCode": "0", "verifyCmd": "$TC actions get action nat index 12", - "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/32 10.10.10.1 pass.*index 12 ref", + "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/0 10.10.10.1 pass.*index 12 ref", "matchCount": "1", "teardown": [ "$TC actions flush action nat" @@ -359,7 +359,7 @@ "cmdUnderTest": "$TC actions add action nat ingress all 10.10.10.1 index 12", "expExitCode": "0", "verifyCmd": "$TC actions get action nat index 12", - "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/32 10.10.10.1 pass.*index 12 ref", + "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/0 10.10.10.1 pass.*index 12 ref", "matchCount": "1", "teardown": [ "$TC actions flush action nat" @@ -548,7 +548,7 @@ "cmdUnderTest": "$TC actions add action nat egress default 20.20.20.1 pipe index 10", "expExitCode": "0", "verifyCmd": "$TC actions get action nat index 10", - "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref", + "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/0 20.20.20.1 pipe.*index 10 ref", "matchCount": "1", "teardown": [ "$TC actions flush action nat" @@ -575,7 +575,7 @@ "cmdUnderTest": "$TC actions add action nat egress any 20.20.20.1 pipe index 10", "expExitCode": "0", "verifyCmd": "$TC actions get action nat index 10", - "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref", + "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/0 20.20.20.1 pipe.*index 10 ref", "matchCount": "1", "teardown": [ "$TC actions flush action nat" @@ -602,7 +602,7 @@ "cmdUnderTest": "$TC actions add action nat egress all 20.20.20.1 pipe index 10", "expExitCode": "0", "verifyCmd": "$TC actions get action nat index 10", - "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref", + "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/0 20.20.20.1 pipe.*index 10 ref", "matchCount": "1", "teardown": [ "$TC actions flush action nat" @@ -629,7 +629,7 @@ "cmdUnderTest": "$TC actions add action nat egress all 20.20.20.1 pipe index 10 cookie aa1bc2d3eeff112233445566778800a1", "expExitCode": "0", "verifyCmd": "$TC actions get action nat index 10", - "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref.*cookie aa1bc2d3eeff112233445566778800a1", + "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/0 20.20.20.1 pipe.*index 10 ref.*cookie aa1bc2d3eeff112233445566778800a1", "matchCount": "1", "teardown": [ "$TC actions flush action nat" -- 2.43.0

7 months

2
1
0 0

[PATCH net-next v24 00/23] Introducing OpenVPN Data Channel Offload

by Antonio Quartulli

Notable changes since v23: * dropped call to netif_tx_start/stop_all_queues() * dropped NETIF_F_HW_CSUM and NETIF_F_RXCSUM dev flags * dropped conditional call to skb_checksum_help() due to the point above * added call to dst_cache_reset() in nl_peer_modify() * dropped obsolete comment in ovpn_peer_keepalive_work() * reversed scheduling delay computation in ovpn_peer_keepalive_work() Please note that some patches were already reviewed/tested by a few people. These patches have retained the tags as they have hardly been touched. The latest code can also be found at: https://github.com/OpenVPN/ovpn-net-next Thanks a lot! Best Regards, Antonio Quartulli OpenVPN Inc. --- Antonio Quartulli (23): net: introduce OpenVPN Data Channel Offload (ovpn) ovpn: add basic netlink support ovpn: add basic interface creation/destruction/management routines ovpn: keep carrier always on for MP interfaces ovpn: introduce the ovpn_peer object ovpn: introduce the ovpn_socket object ovpn: implement basic TX path (UDP) ovpn: implement basic RX path (UDP) ovpn: implement packet processing ovpn: store tunnel and transport statistics ovpn: implement TCP transport skb: implement skb_send_sock_locked_with_flags() ovpn: add support for MSG_NOSIGNAL in tcp_sendmsg ovpn: implement multi-peer support ovpn: implement peer lookup logic ovpn: implement keepalive mechanism ovpn: add support for updating local or remote UDP endpoint ovpn: implement peer add/get/dump/delete via netlink ovpn: implement key add/get/del/swap via netlink ovpn: kill key and notify userspace in case of IV exhaustion ovpn: notify userspace when a peer is deleted ovpn: add basic ethtool support testing/selftests: add test tool and scripts for ovpn module Documentation/netlink/specs/ovpn.yaml | 367 +++ Documentation/netlink/specs/rt_link.yaml | 16 + MAINTAINERS | 11 + drivers/net/Kconfig | 15 + drivers/net/Makefile | 1 + drivers/net/ovpn/Makefile | 22 + drivers/net/ovpn/bind.c | 55 + drivers/net/ovpn/bind.h | 101 + drivers/net/ovpn/crypto.c | 211 ++ drivers/net/ovpn/crypto.h | 145 ++ drivers/net/ovpn/crypto_aead.c | 409 ++++ drivers/net/ovpn/crypto_aead.h | 29 + drivers/net/ovpn/io.c | 455 ++++ drivers/net/ovpn/io.h | 34 + drivers/net/ovpn/main.c | 330 +++ drivers/net/ovpn/main.h | 14 + drivers/net/ovpn/netlink-gen.c | 213 ++ drivers/net/ovpn/netlink-gen.h | 41 + drivers/net/ovpn/netlink.c | 1250 ++++++++++ drivers/net/ovpn/netlink.h | 18 + drivers/net/ovpn/ovpnpriv.h | 57 + drivers/net/ovpn/peer.c | 1364 +++++++++++ drivers/net/ovpn/peer.h | 163 ++ drivers/net/ovpn/pktid.c | 129 ++ drivers/net/ovpn/pktid.h | 87 + drivers/net/ovpn/proto.h | 118 + drivers/net/ovpn/skb.h | 61 + drivers/net/ovpn/socket.c | 244 ++ drivers/net/ovpn/socket.h | 49 + drivers/net/ovpn/stats.c | 21 + drivers/net/ovpn/stats.h | 47 + drivers/net/ovpn/tcp.c | 592 +++++ drivers/net/ovpn/tcp.h | 36 + drivers/net/ovpn/udp.c | 442 ++++ drivers/net/ovpn/udp.h | 25 + include/linux/skbuff.h | 2 + include/uapi/linux/if_link.h | 15 + include/uapi/linux/ovpn.h | 109 + include/uapi/linux/udp.h | 1 + net/core/skbuff.c | 18 +- net/ipv6/af_inet6.c | 1 + net/ipv6/udp.c | 1 + tools/testing/selftests/Makefile | 1 + tools/testing/selftests/net/ovpn/.gitignore | 2 + tools/testing/selftests/net/ovpn/Makefile | 31 + tools/testing/selftests/net/ovpn/common.sh | 92 + tools/testing/selftests/net/ovpn/config | 10 + tools/testing/selftests/net/ovpn/data64.key | 5 + tools/testing/selftests/net/ovpn/ovpn-cli.c | 2395 ++++++++++++++++++++ tools/testing/selftests/net/ovpn/tcp_peers.txt | 5 + .../testing/selftests/net/ovpn/test-chachapoly.sh | 9 + .../selftests/net/ovpn/test-close-socket-tcp.sh | 9 + .../selftests/net/ovpn/test-close-socket.sh | 45 + tools/testing/selftests/net/ovpn/test-float.sh | 9 + tools/testing/selftests/net/ovpn/test-tcp.sh | 9 + tools/testing/selftests/net/ovpn/test.sh | 113 + tools/testing/selftests/net/ovpn/udp_peers.txt | 5 + 57 files changed, 10054 insertions(+), 5 deletions(-) --- base-commit: 702e3fa16cd42ba712825e8d6171ea4755bc0491 change-id: 20241002-b4-ovpn-eeee35c694a2 Best regards, -- Antonio Quartulli <antonio(a)openvpn.net>

7 months

3
50
0 0

RE: ISC West 2025 Attendee Data to Drive Sales and Networking Efforts

by Sara Wood

Hi , I wanted to confirm if you got my last email. I can provide more information on the numbers and costs-just say the word! Regards Sara Wood Demand Generation Manager Leads Data Inc., Please reply with REMOVE if you don't wish to receive further emails -----Original Message----- From: Sara Wood To: Subject: ISC West 2025 Attendee Data to Drive Sales and Networking Efforts Hi , Are you considering getting the ICS West 2025 attendees list? Expo Name: International Security Conference & Exposition West 2025 Total Number of records: 23,000 records List includes: Company Name, Contact Name, Job Title, Mailing Address, Phone, Emails, etc. Do you want to acquire these leads? If so, I'm happy to send the pricing details. Eager for your response Regards Sara Wood Demand Generation Manager Leads Data Inc., Please reply with REMOVE if you don't wish to receive further emails

7 months

1
0
0 0

[PATCH 0/2] ublk: fixes for selftests

by Uday Shankar

Fix a couple of issues I saw when developing selftests for ublk. These patches are split out from the following series: https://lore.kernel.org/linux-block/20250325-ublk_timeout-v1-0-262f0121a7bd… Signed-off-by: Uday Shankar <ushankar(a)purestorage.com> --- Uday Shankar (2): selftests: ublk: kublk: use ioctl-encoded opcodes selftests: ublk: kublk: fix an error log line tools/testing/selftests/ublk/kublk.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) --- base-commit: 4cfcc398357b0fb3d4c97d47d4a9e3c0653b7903 change-id: 20250325-ublk_selftests-6a055dfbc55b Best regards, -- Uday Shankar <ushankar(a)purestorage.com>

7 months

2
5
0 0

[PATCH 0/4] ublk: improve handling of saturated queues when ublk server exits

by Uday Shankar

This set aims to reduce the long delay in applications reacting to ublk server exit in the case of a "fully saturated" queue, i.e. one for which all I/Os are outstanding to the ublk server. The first few patches fix some minor issues in the ublk selftests, and the last patch contains the main work and a test to validate it. Signed-off-by: Uday Shankar <ushankar(a)purestorage.com> --- Uday Shankar (4): selftests: ublk: kublk: use ioctl-encoded opcodes selftests: ublk: kublk: fix an error log line selftests: ublk: kublk: ignore SIGCHLD ublk: improve handling of saturated queues when ublk server exits drivers/block/ublk_drv.c | 40 +++++++++++------------ tools/testing/selftests/ublk/Makefile | 1 + tools/testing/selftests/ublk/kublk.c | 10 ++++-- tools/testing/selftests/ublk/kublk.h | 3 ++ tools/testing/selftests/ublk/null.c | 4 +++ tools/testing/selftests/ublk/test_generic_02.sh | 43 +++++++++++++++++++++++++ 6 files changed, 76 insertions(+), 25 deletions(-) --- base-commit: 648154b1c78c9e00b6934082cae48bb38714de20 change-id: 20250325-ublk_timeout-b06b9b51c591 Best regards, -- Uday Shankar <ushankar(a)purestorage.com>

7 months

2
19
0 0

[PATCH net] selftests: tc-testing: fix nat regex matching

by Pedro Tammela

In iproute 6.14, the nat ip mask logic was fixed to remove a undefined behaviour. So now instead of reporting '0.0.0.0/32' on x86 and potentially '0.0.0.0/0' in other platforms, it reports '0.0.0.0/0' in all platforms. Signed-off-by: Pedro Tammela <pctammela(a)mojatatu.com> --- .../selftests/tc-testing/tc-tests/actions/nat.json | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json b/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json index ee2792998c89..4f21aeb8a3fb 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json @@ -305,7 +305,7 @@ "cmdUnderTest": "$TC actions add action nat ingress default 10.10.10.1 index 12", "expExitCode": "0", "verifyCmd": "$TC actions get action nat index 12", - "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/32 10.10.10.1 pass.*index 12 ref", + "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/0 10.10.10.1 pass.*index 12 ref", "matchCount": "1", "teardown": [ "$TC actions flush action nat" @@ -332,7 +332,7 @@ "cmdUnderTest": "$TC actions add action nat ingress any 10.10.10.1 index 12", "expExitCode": "0", "verifyCmd": "$TC actions get action nat index 12", - "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/32 10.10.10.1 pass.*index 12 ref", + "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/0 10.10.10.1 pass.*index 12 ref", "matchCount": "1", "teardown": [ "$TC actions flush action nat" @@ -359,7 +359,7 @@ "cmdUnderTest": "$TC actions add action nat ingress all 10.10.10.1 index 12", "expExitCode": "0", "verifyCmd": "$TC actions get action nat index 12", - "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/32 10.10.10.1 pass.*index 12 ref", + "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/0 10.10.10.1 pass.*index 12 ref", "matchCount": "1", "teardown": [ "$TC actions flush action nat" @@ -548,7 +548,7 @@ "cmdUnderTest": "$TC actions add action nat egress default 20.20.20.1 pipe index 10", "expExitCode": "0", "verifyCmd": "$TC actions get action nat index 10", - "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref", + "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/0 20.20.20.1 pipe.*index 10 ref", "matchCount": "1", "teardown": [ "$TC actions flush action nat" @@ -575,7 +575,7 @@ "cmdUnderTest": "$TC actions add action nat egress any 20.20.20.1 pipe index 10", "expExitCode": "0", "verifyCmd": "$TC actions get action nat index 10", - "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref", + "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/0 20.20.20.1 pipe.*index 10 ref", "matchCount": "1", "teardown": [ "$TC actions flush action nat" @@ -602,7 +602,7 @@ "cmdUnderTest": "$TC actions add action nat egress all 20.20.20.1 pipe index 10", "expExitCode": "0", "verifyCmd": "$TC actions get action nat index 10", - "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref", + "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/0 20.20.20.1 pipe.*index 10 ref", "matchCount": "1", "teardown": [ "$TC actions flush action nat" @@ -629,7 +629,7 @@ "cmdUnderTest": "$TC actions add action nat egress all 20.20.20.1 pipe index 10 cookie aa1bc2d3eeff112233445566778800a1", "expExitCode": "0", "verifyCmd": "$TC actions get action nat index 10", - "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref.*cookie aa1bc2d3eeff112233445566778800a1", + "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/0 20.20.20.1 pipe.*index 10 ref.*cookie aa1bc2d3eeff112233445566778800a1", "matchCount": "1", "teardown": [ "$TC actions flush action nat" -- 2.43.0

7 months

2
2
0 0

[RFC PATCH] rtc: remove 'setdate' test program

by Wolfram Sang

The tool is not embedded in the testing framework. 'rtc' from rtc-tools serves as a much better programming example. No need to carry this tool in the kernel tree. Signed-off-by: Wolfram Sang <wsa+renesas(a)sang-engineering.com> --- Alexandre, what is you opinion on this? tools/testing/selftests/rtc/.gitignore | 1 - tools/testing/selftests/rtc/Makefile | 2 - tools/testing/selftests/rtc/setdate.c | 77 -------------------------- 3 files changed, 80 deletions(-) delete mode 100644 tools/testing/selftests/rtc/setdate.c diff --git a/tools/testing/selftests/rtc/.gitignore b/tools/testing/selftests/rtc/.gitignore index fb2d533aa575..a2afe7994e85 100644 --- a/tools/testing/selftests/rtc/.gitignore +++ b/tools/testing/selftests/rtc/.gitignore @@ -1,3 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only rtctest -setdate diff --git a/tools/testing/selftests/rtc/Makefile b/tools/testing/selftests/rtc/Makefile index 9dbb395c5c79..547c244a2ca5 100644 --- a/tools/testing/selftests/rtc/Makefile +++ b/tools/testing/selftests/rtc/Makefile @@ -4,8 +4,6 @@ LDLIBS += -lrt -lpthread -lm TEST_GEN_PROGS = rtctest -TEST_GEN_PROGS_EXTENDED = setdate - TEST_FILES := settings include ../lib.mk diff --git a/tools/testing/selftests/rtc/setdate.c b/tools/testing/selftests/rtc/setdate.c deleted file mode 100644 index b303890b3de2..000000000000 --- a/tools/testing/selftests/rtc/setdate.c +++ /dev/null @@ -1,77 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* Real Time Clock Driver Test - * by: Benjamin Gaignard (benjamin.gaignard(a)linaro.org) - * - * To build - * gcc rtctest_setdate.c -o rtctest_setdate - */ - -#include <stdio.h> -#include <linux/rtc.h> -#include <sys/ioctl.h> -#include <sys/time.h> -#include <sys/types.h> -#include <fcntl.h> -#include <unistd.h> -#include <stdlib.h> -#include <errno.h> - -static const char default_time[] = "00:00:00"; - -int main(int argc, char **argv) -{ - int fd, retval; - struct rtc_time new, current; - const char *rtc, *date; - const char *time = default_time; - - switch (argc) { - case 4: - time = argv[3]; - /* FALLTHROUGH */ - case 3: - date = argv[2]; - rtc = argv[1]; - break; - default: - fprintf(stderr, "usage: rtctest_setdate <rtcdev> <DD-MM-YYYY> [HH:MM:SS]\n"); - return 1; - } - - fd = open(rtc, O_RDONLY); - if (fd == -1) { - perror(rtc); - exit(errno); - } - - sscanf(date, "%d-%d-%d", &new.tm_mday, &new.tm_mon, &new.tm_year); - new.tm_mon -= 1; - new.tm_year -= 1900; - sscanf(time, "%d:%d:%d", &new.tm_hour, &new.tm_min, &new.tm_sec); - - fprintf(stderr, "Test will set RTC date/time to %d-%d-%d, %02d:%02d:%02d.\n", - new.tm_mday, new.tm_mon + 1, new.tm_year + 1900, - new.tm_hour, new.tm_min, new.tm_sec); - - /* Write the new date in RTC */ - retval = ioctl(fd, RTC_SET_TIME, &new); - if (retval == -1) { - perror("RTC_SET_TIME ioctl"); - close(fd); - exit(errno); - } - - /* Read back */ - retval = ioctl(fd, RTC_RD_TIME, &current); - if (retval == -1) { - perror("RTC_RD_TIME ioctl"); - exit(errno); - } - - fprintf(stderr, "\n\nCurrent RTC date/time is %d-%d-%d, %02d:%02d:%02d.\n", - current.tm_mday, current.tm_mon + 1, current.tm_year + 1900, - current.tm_hour, current.tm_min, current.tm_sec); - - close(fd); - return 0; -} -- 2.47.2

7 months

3
5
0 0

[PATCH] selftest: rtc: skip some tests if the alarm only supports minutes

by Wolfram Sang

There are alarms which have only minute-granularity. The RTC core already has a flag to describe them. Use this flag to skip tests which require the alarm to support seconds. Signed-off-by: Wolfram Sang <wsa+renesas(a)sang-engineering.com> --- Tested with a Renesas RZ-N1D board. This RTC obviously has only minute resolution for the alarms. Output now looks like this: # RUN rtc.alarm_alm_set ... # SKIP Skipping test since alarms has only minute granularity. # OK rtc.alarm_alm_set ok 5 rtc.alarm_alm_set # SKIP Skipping test since alarms has only minute granularity. Before it was like this: # RUN rtc.alarm_alm_set ... # rtctest.c:255:alarm_alm_set:Alarm time now set to 09:40:00. # rtctest.c:275:alarm_alm_set:data: 1a0 # rtctest.c:281:alarm_alm_set:Expected new (1489743644) == secs (1489743647) tools/testing/selftests/rtc/rtctest.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/rtc/rtctest.c b/tools/testing/selftests/rtc/rtctest.c index 3e4f0d5c5329..e0a148261e6f 100644 --- a/tools/testing/selftests/rtc/rtctest.c +++ b/tools/testing/selftests/rtc/rtctest.c @@ -29,6 +29,7 @@ enum rtc_alarm_state { RTC_ALARM_UNKNOWN, RTC_ALARM_ENABLED, RTC_ALARM_DISABLED, + RTC_ALARM_RES_MINUTE, }; FIXTURE(rtc) { @@ -88,7 +89,7 @@ static void nanosleep_with_retries(long ns) } } -static enum rtc_alarm_state get_rtc_alarm_state(int fd) +static enum rtc_alarm_state get_rtc_alarm_state(int fd, int need_seconds) { struct rtc_param param = { 0 }; int rc; @@ -103,6 +104,10 @@ static enum rtc_alarm_state get_rtc_alarm_state(int fd) if ((param.uvalue & _BITUL(RTC_FEATURE_ALARM)) == 0) return RTC_ALARM_DISABLED; + /* Check if alarm has desired granularity */ + if (need_seconds && (param.uvalue & _BITUL(RTC_FEATURE_ALARM_RES_MINUTE))) + return RTC_ALARM_RES_MINUTE; + return RTC_ALARM_ENABLED; } @@ -227,9 +232,11 @@ TEST_F(rtc, alarm_alm_set) { SKIP(return, "Skipping test since %s does not exist", rtc_file); ASSERT_NE(-1, self->fd); - alarm_state = get_rtc_alarm_state(self->fd); + alarm_state = get_rtc_alarm_state(self->fd, 1); if (alarm_state == RTC_ALARM_DISABLED) SKIP(return, "Skipping test since alarms are not supported."); + if (alarm_state == RTC_ALARM_RES_MINUTE) + SKIP(return, "Skipping test since alarms has only minute granularity."); rc = ioctl(self->fd, RTC_RD_TIME, &tm); ASSERT_NE(-1, rc); @@ -295,9 +302,11 @@ TEST_F(rtc, alarm_wkalm_set) { SKIP(return, "Skipping test since %s does not exist", rtc_file); ASSERT_NE(-1, self->fd); - alarm_state = get_rtc_alarm_state(self->fd); + alarm_state = get_rtc_alarm_state(self->fd, 1); if (alarm_state == RTC_ALARM_DISABLED) SKIP(return, "Skipping test since alarms are not supported."); + if (alarm_state == RTC_ALARM_RES_MINUTE) + SKIP(return, "Skipping test since alarms has only minute granularity."); rc = ioctl(self->fd, RTC_RD_TIME, &alarm.time); ASSERT_NE(-1, rc); @@ -357,7 +366,7 @@ TEST_F_TIMEOUT(rtc, alarm_alm_set_minute, 65) { SKIP(return, "Skipping test since %s does not exist", rtc_file); ASSERT_NE(-1, self->fd); - alarm_state = get_rtc_alarm_state(self->fd); + alarm_state = get_rtc_alarm_state(self->fd, 0); if (alarm_state == RTC_ALARM_DISABLED) SKIP(return, "Skipping test since alarms are not supported."); @@ -425,7 +434,7 @@ TEST_F_TIMEOUT(rtc, alarm_wkalm_set_minute, 65) { SKIP(return, "Skipping test since %s does not exist", rtc_file); ASSERT_NE(-1, self->fd); - alarm_state = get_rtc_alarm_state(self->fd); + alarm_state = get_rtc_alarm_state(self->fd, 0); if (alarm_state == RTC_ALARM_DISABLED) SKIP(return, "Skipping test since alarms are not supported."); -- 2.39.2

7 months

2
1
0 0

[PATCH] selftest: rtc: skip alarm test if RTC is minute resolution

by Weizhao Ouyang

Skip second resolution alarm test if RTC is minute resolution. Signed-off-by: Weizhao Ouyang <o451686892(a)gmail.com> --- tools/testing/selftests/rtc/rtctest.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/testing/selftests/rtc/rtctest.c b/tools/testing/selftests/rtc/rtctest.c index e103097d0b5b..b8d196f1c5f6 100644 --- a/tools/testing/selftests/rtc/rtctest.c +++ b/tools/testing/selftests/rtc/rtctest.c @@ -28,6 +28,7 @@ static char *rtc_file = "/dev/rtc0"; enum rtc_alarm_state { RTC_ALARM_UNKNOWN, RTC_ALARM_ENABLED, + RTC_ALARM_MINUTE, RTC_ALARM_DISABLED, }; @@ -100,6 +101,9 @@ static enum rtc_alarm_state get_rtc_alarm_state(int fd) if (rc < 0) return RTC_ALARM_UNKNOWN; + if (param.uvalue & _BITUL(RTC_FEATURE_ALARM_RES_MINUTE)) + return RTC_ALARM_MINUTE; + if ((param.uvalue & _BITUL(RTC_FEATURE_ALARM)) == 0) return RTC_ALARM_DISABLED; @@ -230,6 +234,8 @@ TEST_F(rtc, alarm_alm_set) { alarm_state = get_rtc_alarm_state(self->fd); if (alarm_state == RTC_ALARM_DISABLED) SKIP(return, "Skipping test since alarms are not supported."); + if (alarm_state == RTC_ALARM_MINUTE) + SKIP(return, "Skipping test since alarms have minute resolution."); rc = ioctl(self->fd, RTC_RD_TIME, &tm); ASSERT_NE(-1, rc); @@ -298,6 +304,8 @@ TEST_F(rtc, alarm_wkalm_set) { alarm_state = get_rtc_alarm_state(self->fd); if (alarm_state == RTC_ALARM_DISABLED) SKIP(return, "Skipping test since alarms are not supported."); + if (alarm_state == RTC_ALARM_MINUTE) + SKIP(return, "Skipping test since alarms have minute resolution."); rc = ioctl(self->fd, RTC_RD_TIME, &alarm.time); ASSERT_NE(-1, rc); -- 2.45.2

7 months

1
0
0 0

[PATCH 0/3] selftests: coredump: Some bug fixes

by Nam Cao

Hi, While trying the coredump test on qemu-system-riscv64, I observed test failures for various reasons. This series makes the test works on qemu-system-riscv64. Best regards, Nam Nam Cao (3): selftests: coredump: Properly initialize pointer selftests: coredump: Use waitpid() instead of busy-wait selftests: coredump: Raise timeout to 2 minutes tools/testing/selftests/coredump/stackdump | 6 +----- .../testing/selftests/coredump/stackdump_test.c | 17 +++++++++-------- 2 files changed, 10 insertions(+), 13 deletions(-) -- 2.39.5

7 months

2
5
0 0

[PATCH] unshare_test: set nr_open using soft limit

by lufei

Set maximum file descriptor number limit by rlimit.rlim_max than nr_open(hard limit). Hard limit may cause dup2 fail. Signed-off-by: lufei <lufei(a)uniontech.com> --- tools/testing/selftests/core/unshare_test.c | 28 +++++++++++++-------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/core/unshare_test.c b/tools/testing/selftests/core/unshare_test.c index 7fec9dfb1b0e..2c4e7104b0d9 100644 --- a/tools/testing/selftests/core/unshare_test.c +++ b/tools/testing/selftests/core/unshare_test.c @@ -26,10 +26,11 @@ TEST(unshare_EMFILE) .exit_signal = SIGCHLD, }; int fd; - ssize_t n, n2; - static char buf[512], buf2[512]; + ssize_t n, n2, n3; + static char buf[512], buf2[512], buf3[512]; struct rlimit rlimit; int nr_open; + int rlimit_max; fd = open("/proc/sys/fs/nr_open", O_RDWR); ASSERT_GE(fd, 0); @@ -42,22 +43,24 @@ TEST(unshare_EMFILE) ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit)); - /* bump fs.nr_open */ - n2 = sprintf(buf2, "%d\n", nr_open + 1024); + rlimit_max = rlimit.rlim_max; + + /* bump rlimit.rlim_max */ + n2 = sprintf(buf2, "%d\n", rlimit_max + 1024); lseek(fd, 0, SEEK_SET); write(fd, buf2, n2); /* bump ulimit -n */ - rlimit.rlim_cur = nr_open + 1024; - rlimit.rlim_max = nr_open + 1024; + rlimit.rlim_cur = rlimit_max + 1024; + rlimit.rlim_max = rlimit_max + 1024; EXPECT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit)) { lseek(fd, 0, SEEK_SET); write(fd, buf, n); exit(EXIT_FAILURE); } - /* get a descriptor past the old fs.nr_open */ - EXPECT_GE(dup2(2, nr_open + 64), 0) { + /* get a descriptor past the old rlimit.rlim_max */ + EXPECT_GE(dup2(2, rlimit_max + 64), 0) { lseek(fd, 0, SEEK_SET); write(fd, buf, n); exit(EXIT_FAILURE); @@ -74,15 +77,20 @@ TEST(unshare_EMFILE) if (pid == 0) { int err; - /* restore fs.nr_open */ + n3 = sprintf(buf3, "%d\n", rlimit_max); lseek(fd, 0, SEEK_SET); - write(fd, buf, n); + write(fd, buf3, n3); + /* ... and now unshare(CLONE_FILES) must fail with EMFILE */ err = unshare(CLONE_FILES); EXPECT_EQ(err, -1) exit(EXIT_FAILURE); EXPECT_EQ(errno, EMFILE) exit(EXIT_FAILURE); + + /* restore fs.nr_open */ + lseek(fd, 0, SEEK_SET); + write(fd, buf, n); exit(EXIT_SUCCESS); } -- 2.39.3

7 months

3
2
0 0

[PATCH net 0/4] mptcp: misc. fixes for 6.15-rc0

by Matthieu Baerts (NGI0)

Here are 4 unrelated patches: - Patch 1: fix a NULL pointer when two SYN-ACK for the same request are handled in parallel. A fix for up to v5.9. - Patch 2: selftests: fix check for the wrong FD. A fix for up to v5.17. - Patch 3: selftests: close all FDs in case of error. A fix for up to v5.17. - Patch 4: selftests: ignore a new generated file. A fix for 6.15-rc0. Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org> --- Cong Liu (1): selftests: mptcp: fix incorrect fd checks in main_loop Gang Yan (1): mptcp: fix NULL pointer in can_accept_new_subflow Geliang Tang (1): selftests: mptcp: close fd_in before returning in main_loop Matthieu Baerts (NGI0) (1): selftests: mptcp: ignore mptcp_diag binary net/mptcp/subflow.c | 15 ++++++++------- tools/testing/selftests/net/mptcp/.gitignore | 1 + tools/testing/selftests/net/mptcp/mptcp_connect.c | 11 +++++++---- 3 files changed, 16 insertions(+), 11 deletions(-) --- base-commit: 2ea396448f26d0d7d66224cb56500a6789c7ed07 change-id: 20250328-net-mptcp-misc-fixes-6-15-98bfbeaa15ac Best regards, -- Matthieu Baerts (NGI0) <matttbe(a)kernel.org>

7 months

2
5
0 0

[PATCH] kunit: fixes Compilation error on s390

by Alessandro Carminati

The current implementation of suppressing warning backtraces uses __func__, which is a compile-time constant only for non -fPIC compilation. GCC's support for this situation in position-independent code varies across versions and architectures. On the s390 architecture, -fPIC is required for compilation, and support for this scenario is available in GCC 11 and later. Fixes: d8b14a2 ("bug/kunit: core support for suppressing warning backtraces") Signed-off-by: Alessandro Carminati <acarmina(a)redhat.com> --- lib/kunit/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/kunit/Kconfig b/lib/kunit/Kconfig index 201402f0ab49..6c937144dcea 100644 --- a/lib/kunit/Kconfig +++ b/lib/kunit/Kconfig @@ -17,6 +17,7 @@ if KUNIT config KUNIT_SUPPRESS_BACKTRACE bool "KUnit - Enable backtrace suppression" + depends on (!S390 && CC_IS_GCC) || (CC_IS_GCC && GCC_VERSION >= 110000) default y help Enable backtrace suppression for KUnit. If enabled, backtraces -- 2.34.1

7 months

4
3
0 0

[PATCH] kunit: fixes backtrace suppression test module description

by Alessandro Carminati

Adds module description to the backtrace suppression test Fixes: ("19f3496") kunit: add test cases for backtrace warning suppression Signed-off-by: Alessandro Carminati <acarmina(a)redhat.com> --- lib/kunit/backtrace-suppression-test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/kunit/backtrace-suppression-test.c b/lib/kunit/backtrace-suppression-test.c index 8b4125af2481..d4c5df09bba6 100644 --- a/lib/kunit/backtrace-suppression-test.c +++ b/lib/kunit/backtrace-suppression-test.c @@ -102,3 +102,4 @@ static struct kunit_suite backtrace_suppression_test_suite = { kunit_test_suites(&backtrace_suppression_test_suite); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("KUnit test to verify warning backtrace suppression"); -- 2.34.1

7 months

3
2
0 0

[PATCH v2 0/4] tools/nolibc: MIPS: entrypoint cleanups and N32/N64 ABIs

by Thomas Weißschuh

Introduce support for the N32 and N64 ABIs. As preparation, the entrypoint is first simplified significantly. Thanks to Maciej for all the valuable information. Signed-off-by: Thomas Weißschuh <linux(a)weissschuh.net> --- Changes in v2: - Clean up entrypoint first - Annotate #endifs - Link to v1: https://lore.kernel.org/r/20250212-nolibc-mips-n32-v1-1-6892e58d1321@weisss… --- Thomas Weißschuh (4): tools/nolibc: MIPS: drop $gp setup tools/nolibc: MIPS: drop manual stack pointer alignment tools/nolibc: MIPS: drop noreorder option tools/nolibc: MIPS: add support for N64 and N32 ABIs tools/include/nolibc/arch-mips.h | 117 +++++++++++++++++++++------- tools/testing/selftests/nolibc/Makefile | 28 ++++++- tools/testing/selftests/nolibc/run-tests.sh | 2 +- 3 files changed, 118 insertions(+), 29 deletions(-) --- base-commit: 9c812b01f13d37410ea103e00bc47e5e0f6d2bad change-id: 20231105-nolibc-mips-n32-234901bd910d Best regards, -- Thomas Weißschuh <linux(a)weissschuh.net>

7 months

4
13
0 0

[PATCH v2] selftests/mm: Convert page_size to unsigned long

by Siddarth G

Cppcheck warning: int result is assigned to long long variable. If the variable is long long to avoid loss of information, then you have loss of information. This patch changes the type of page_size from 'unsigned int' to 'unsigned long' instead of using ULL suffixes. Changing hpage_size to 'unsigned long' was considered, but since gethugepage() expects an int, this change was avoided. Similarly, wp_init(), wp_free(), wp_addr_range() are designed to accept int parameters, even though they are assigned with long values. Future updates to these functions might be required ? Reported-by: David Binderman <dcb314(a)hotmail.com> Closes: https://lore.kernel.org/all/AS8PR02MB10217315060BBFDB21F19643E9CA62@AS8PR02… Signed-off-by: Siddarth G <siddarthsgml(a)gmail.com> --- Changes since v1: - Instead of using ULL suffixes, converted page_size and related variables involving it from 'int' to 'long' tools/testing/selftests/mm/pagemap_ioctl.c | 24 +++++++++++----------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c index f3b12402ca89..fe5ae8b25ff6 100644 --- a/tools/testing/selftests/mm/pagemap_ioctl.c +++ b/tools/testing/selftests/mm/pagemap_ioctl.c @@ -34,7 +34,7 @@ #define PAGEMAP "/proc/self/pagemap" int pagemap_fd; int uffd; -unsigned int page_size; +unsigned long page_size; unsigned int hpage_size; const char *progname; @@ -184,7 +184,7 @@ void *gethugetlb_mem(int size, int *shmid) int userfaultfd_tests(void) { - int mem_size, vec_size, written, num_pages = 16; + long mem_size, vec_size, written, num_pages = 16; char *mem, *vec; mem_size = num_pages * page_size; @@ -213,7 +213,7 @@ int userfaultfd_tests(void) written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, vec_size - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (written < 0) - ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", written, errno, strerror(errno)); ksft_test_result(written == 0, "%s all new pages must not be written (dirty)\n", __func__); @@ -244,7 +244,7 @@ int sanity_tests_sd(void) long walk_end; vec_size = num_pages/2; - mem_size = num_pages * (long long)page_size; + mem_size = num_pages * page_size; vec = malloc(sizeof(struct page_region) * vec_size); if (!vec) @@ -432,7 +432,7 @@ int sanity_tests_sd(void) free(vec2); /* 8. Smaller vec */ - mem_size = 1050ULL * page_size; + mem_size = 1050 * page_size; vec_size = mem_size/(page_size*2); vec = malloc(sizeof(struct page_region) * vec_size); @@ -487,7 +487,7 @@ int sanity_tests_sd(void) total_pages = 0; /* 9. Smaller vec */ - mem_size = 10000ULL * page_size; + mem_size = 10000 * page_size; vec_size = 50; vec = malloc(sizeof(struct page_region) * vec_size); @@ -995,7 +995,7 @@ int unmapped_region_tests(void) { void *start = (void *)0x10000000; int written, len = 0x00040000; - int vec_size = len / page_size; + long vec_size = len / page_size; struct page_region *vec = malloc(sizeof(struct page_region) * vec_size); /* 1. Get written pages */ @@ -1051,14 +1051,14 @@ static void test_simple(void) int sanity_tests(void) { unsigned long long mem_size, vec_size; - int ret, fd, i, buf_size; + long ret, fd, i, buf_size; struct page_region *vec; char *mem, *fmem; struct stat sbuf; char *tmp_buf; /* 1. wrong operation */ - mem_size = 10ULL * page_size; + mem_size = 10 * page_size; vec_size = mem_size / page_size; vec = malloc(sizeof(struct page_region) * vec_size); @@ -1160,7 +1160,7 @@ int sanity_tests(void) ret = stat(progname, &sbuf); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); fmem = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (fmem == MAP_FAILED) @@ -1507,7 +1507,7 @@ int main(int __attribute__((unused)) argc, char *argv[]) sanity_tests_sd(); /* 2. Normal page testing */ - mem_size = 10ULL * page_size; + mem_size = 10 * page_size; mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); if (mem == MAP_FAILED) ksft_exit_fail_msg("error nomem\n"); @@ -1520,7 +1520,7 @@ int main(int __attribute__((unused)) argc, char *argv[]) munmap(mem, mem_size); /* 3. Large page testing */ - mem_size = 512ULL * 10 * page_size; + mem_size = 512 * 10 * page_size; mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); if (mem == MAP_FAILED) ksft_exit_fail_msg("error nomem\n"); -- 2.43.0

7 months

1
0
0 0

ISC West 2025 Attendee Data to Drive Sales and Networking Efforts

by Sara Wood

Hi , Are you considering getting the ICS West 2025 attendees list? Expo Name: International Security Conference & Exposition West 2025 Total Number of records: 23,000 records List includes: Company Name, Contact Name, Job Title, Mailing Address, Phone, Emails, etc. Do you want to acquire these leads? If so, I'm happy to send the pricing details. Eager for your response Regards Sara Wood Demand Generation Manager Leads Data Inc., Please reply with REMOVE if you don't wish to receive further emails

7 months

1
0
0 0

[PATCH] selftests/nolibc: drop unnecessary sys/io.h include

by Thomas Weißschuh

The include of sys/io.h is not necessary anymore since commit 67eb617a8e1e ("selftests/nolibc: simplify call to ioperm"). It's existence is also problematic as the header does not exist on all architectures. Reported-by: Sebastian Andrzej Siewior <sebastian(a)breakpoint.cc> Signed-off-by: Thomas Weißschuh <linux(a)weissschuh.net> --- tools/testing/selftests/nolibc/nolibc-test.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 5884a891c491544050fc35b07322c73a1a9dbaf3..7a60b6ac1457e8d862ab1a6a26c9e46abec92111 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -16,7 +16,6 @@ #ifndef _NOLIBC_STDIO_H /* standard libcs need more includes */ #include <sys/auxv.h> -#include <sys/io.h> #include <sys/ioctl.h> #include <sys/mman.h> #include <sys/mount.h> --- base-commit: bceb73904c855c78402dca94c82915f078f259dd change-id: 20250324-nolibc-ioperm-155646560b95 Best regards, -- Thomas Weißschuh <linux(a)weissschuh.net>

7 months

3
2
0 0

[PATCH 0/2] ublk: specify io_cmd_buf pointer type

by Caleb Sander Mateos

io_cmd_buf points to an array of ublksrv_io_desc structs but its type is char *. Indexing the array requires an explicit multiplication and cast. The compiler also can't check the pointer types. Change io_cmd_buf's type to struct ublksrv_io_desc * so it can be indexed directly and the compiler can type-check the code. Make the same change to the ublk selftests. Caleb Sander Mateos (2): ublk: specify io_cmd_buf pointer type selftests: ublk: specify io_cmd_buf pointer type drivers/block/ublk_drv.c | 8 ++++---- tools/testing/selftests/ublk/kublk.c | 2 +- tools/testing/selftests/ublk/kublk.h | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) -- 2.45.2

7 months

4
6
0 0

[PATCH net-next v9 0/6] tun: Introduce virtio-net hashing feature

by Akihiko Odaki

virtio-net have two usage of hashes: one is RSS and another is hash reporting. Conventionally the hash calculation was done by the VMM. However, computing the hash after the queue was chosen defeats the purpose of RSS. Another approach is to use eBPF steering program. This approach has another downside: it cannot report the calculated hash due to the restrictive nature of eBPF. Introduce the code to compute hashes to the kernel in order to overcome thse challenges. An alternative solution is to extend the eBPF steering program so that it will be able to report to the userspace, but it is based on context rewrites, which is in feature freeze. We can adopt kfuncs, but they will not be UAPIs. We opt to ioctl to align with other relevant UAPIs (KVM and vhost_net). The patches for QEMU to use this new feature was submitted as RFC and is available at: https://patchew.org/QEMU/20240915-hash-v3-0-79cb08d28647@daynix.com/ This work was presented at LPC 2024: https://lpc.events/event/18/contributions/1963/ V1 -> V2: Changed to introduce a new BPF program type. Signed-off-by: Akihiko Odaki <akihiko.odaki(a)daynix.com> --- Changes in v9: - Added a missing return statement in patch "tun: Introduce virtio-net hash feature". - Link to v8: https://lore.kernel.org/r/20250306-rss-v8-0-7ab4f56ff423@daynix.com Changes in v8: - Disabled IPv6 to eliminate noises in tests. - Added a branch in tap to avoid unnecessary dissection when hash reporting is disabled. - Removed unnecessary rtnl_lock(). - Extracted code to handle new ioctls into separate functions to avoid adding extra NULL checks to the code handling other ioctls. - Introduced variable named "fd" to __tun_chr_ioctl(). - s/-/=/g in a patch message to avoid confusing Git. - Link to v7: https://lore.kernel.org/r/20250228-rss-v7-0-844205cbbdd6@daynix.com Changes in v7: - Ensured to set hash_report to VIRTIO_NET_HASH_REPORT_NONE for VHOST_NET_F_VIRTIO_NET_HDR. - s/4/sizeof(u32)/ in patch "virtio_net: Add functions for hashing". - Added tap_skb_cb type. - Rebased. - Link to v6: https://lore.kernel.org/r/20250109-rss-v6-0-b1c90ad708f6@daynix.com Changes in v6: - Extracted changes to fill vnet header holes into another series. - Squashed patches "skbuff: Introduce SKB_EXT_TUN_VNET_HASH", "tun: Introduce virtio-net hash reporting feature", and "tun: Introduce virtio-net RSS" into patch "tun: Introduce virtio-net hash feature". - Dropped the RFC tag. - Link to v5: https://lore.kernel.org/r/20241008-rss-v5-0-f3cf68df005d@daynix.com Changes in v5: - Fixed a compilation error with CONFIG_TUN_VNET_CROSS_LE. - Optimized the calculation of the hash value according to: https://git.dpdk.org/dpdk/commit/?id=3fb1ea032bd6ff8317af5dac9af901f1f324ca… - Added patch "tun: Unify vnet implementation". - Dropped patch "tap: Pad virtio header with zero". - Added patch "selftest: tun: Test vnet ioctls without device". - Reworked selftests to skip for older kernels. - Documented the case when the underlying device is deleted and packets have queue_mapping set by TC. - Reordered test harness arguments. - Added code to handle fragmented packets. - Link to v4: https://lore.kernel.org/r/20240924-rss-v4-0-84e932ec0e6c@daynix.com Changes in v4: - Moved tun_vnet_hash_ext to if_tun.h. - Renamed virtio_net_toeplitz() to virtio_net_toeplitz_calc(). - Replaced htons() with cpu_to_be16(). - Changed virtio_net_hash_rss() to return void. - Reordered variable declarations in virtio_net_hash_rss(). - Removed virtio_net_hdr_v1_hash_from_skb(). - Updated messages of "tap: Pad virtio header with zero" and "tun: Pad virtio header with zero". - Fixed vnet_hash allocation size. - Ensured to free vnet_hash when destructing tun_struct. - Link to v3: https://lore.kernel.org/r/20240915-rss-v3-0-c630015db082@daynix.com Changes in v3: - Reverted back to add ioctl. - Split patch "tun: Introduce virtio-net hashing feature" into "tun: Introduce virtio-net hash reporting feature" and "tun: Introduce virtio-net RSS". - Changed to reuse hash values computed for automq instead of performing RSS hashing when hash reporting is requested but RSS is not. - Extracted relevant data from struct tun_struct to keep it minimal. - Added kernel-doc. - Changed to allow calling TUNGETVNETHASHCAP before TUNSETIFF. - Initialized num_buffers with 1. - Added a test case for unclassified packets. - Fixed error handling in tests. - Changed tests to verify that the queue index will not overflow. - Rebased. - Link to v2: https://lore.kernel.org/r/20231015141644.260646-1-akihiko.odaki@daynix.com --- Akihiko Odaki (6): virtio_net: Add functions for hashing net: flow_dissector: Export flow_keys_dissector_symmetric tun: Introduce virtio-net hash feature selftest: tun: Test vnet ioctls without device selftest: tun: Add tests for virtio-net hashing vhost/net: Support VIRTIO_NET_F_HASH_REPORT Documentation/networking/tuntap.rst | 7 + drivers/net/Kconfig | 1 + drivers/net/tap.c | 68 +++- drivers/net/tun.c | 98 +++++- drivers/net/tun_vnet.h | 159 ++++++++- drivers/vhost/net.c | 49 +-- include/linux/if_tap.h | 2 + include/linux/skbuff.h | 3 + include/linux/virtio_net.h | 188 ++++++++++ include/net/flow_dissector.h | 1 + include/uapi/linux/if_tun.h | 75 ++++ net/core/flow_dissector.c | 3 +- net/core/skbuff.c | 4 + tools/testing/selftests/net/Makefile | 2 +- tools/testing/selftests/net/tun.c | 656 ++++++++++++++++++++++++++++++++++- 15 files changed, 1255 insertions(+), 61 deletions(-) --- base-commit: dd83757f6e686a2188997cb58b5975f744bb7786 change-id: 20240403-rss-e737d89efa77 prerequisite-change-id: 20241230-tun-66e10a49b0c7:v6 prerequisite-patch-id: 871dc5f146fb6b0e3ec8612971a8e8190472c0fb prerequisite-patch-id: 2797ed249d32590321f088373d4055ff3f430a0e prerequisite-patch-id: ea3370c72d4904e2f0536ec76ba5d26784c0cede prerequisite-patch-id: 837e4cf5d6b451424f9b1639455e83a260c4440d prerequisite-patch-id: ea701076f57819e844f5a35efe5cbc5712d3080d prerequisite-patch-id: 701646fb43ad04cc64dd2bf13c150ccbe6f828ce prerequisite-patch-id: 53176dae0c003f5b6c114d43f936cf7140d31bb5 prerequisite-change-id: 20250116-buffers-96e14bf023fc:v2 prerequisite-patch-id: 25fd4f99d4236a05a5ef16ab79f3e85ee57e21cc Best regards, -- Akihiko Odaki <akihiko.odaki(a)daynix.com>

7 months

4
53
0 0

[PATCH] selftests/mm: Fix loss of information warnings

by Siddarth G

Cppcheck reported a style warning: int result is assigned to long long variable. If the variable is long long to avoid loss of information, then you have loss of information. Changing the type of page_size from 'unsigned int' to 'unsigned long long' was considered. But that might cause new conversion issues in other parts of the code where calculations involving 'page_size' are assigned to int variables. So we approach by appending ULL suffixes Reported-by: David Binderman <dcb314(a)hotmail.com> Closes: https://lore.kernel.org/all/AS8PR02MB10217315060BBFDB21F19643E9CA62@AS8PR02… Signed-off-by: Siddarth G <siddarthsgml(a)gmail.com> --- tools/testing/selftests/mm/pagemap_ioctl.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c index 57b4bba2b45f..f3b12402ca89 100644 --- a/tools/testing/selftests/mm/pagemap_ioctl.c +++ b/tools/testing/selftests/mm/pagemap_ioctl.c @@ -244,7 +244,7 @@ int sanity_tests_sd(void) long walk_end; vec_size = num_pages/2; - mem_size = num_pages * page_size; + mem_size = num_pages * (long long)page_size; vec = malloc(sizeof(struct page_region) * vec_size); if (!vec) @@ -432,7 +432,7 @@ int sanity_tests_sd(void) free(vec2); /* 8. Smaller vec */ - mem_size = 1050 * page_size; + mem_size = 1050ULL * page_size; vec_size = mem_size/(page_size*2); vec = malloc(sizeof(struct page_region) * vec_size); @@ -487,7 +487,7 @@ int sanity_tests_sd(void) total_pages = 0; /* 9. Smaller vec */ - mem_size = 10000 * page_size; + mem_size = 10000ULL * page_size; vec_size = 50; vec = malloc(sizeof(struct page_region) * vec_size); @@ -1058,7 +1058,7 @@ int sanity_tests(void) char *tmp_buf; /* 1. wrong operation */ - mem_size = 10 * page_size; + mem_size = 10ULL * page_size; vec_size = mem_size / page_size; vec = malloc(sizeof(struct page_region) * vec_size); @@ -1507,7 +1507,7 @@ int main(int __attribute__((unused)) argc, char *argv[]) sanity_tests_sd(); /* 2. Normal page testing */ - mem_size = 10 * page_size; + mem_size = 10ULL * page_size; mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); if (mem == MAP_FAILED) ksft_exit_fail_msg("error nomem\n"); @@ -1520,7 +1520,7 @@ int main(int __attribute__((unused)) argc, char *argv[]) munmap(mem, mem_size); /* 3. Large page testing */ - mem_size = 512 * 10 * page_size; + mem_size = 512ULL * 10 * page_size; mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); if (mem == MAP_FAILED) ksft_exit_fail_msg("error nomem\n"); -- 2.43.0

7 months

2
1
0 0

bug report for linux-6.14/tools/testing/selftests/mm/pagemap_ioctl.c

by David Binderman

Hello there, Static analyser cppcheck says: > linux-6.14/tools/testing/selftests/mm/pagemap_ioctl.c:1061:11: style: int result is assigned to long long variable. If the variable is long long to avoid loss of information, then you have loss of information. [truncLongCastAssignment] > linux-6.14/tools/testing/selftests/mm/pagemap_ioctl.c:1510:11: style: int result is assigned to long long variable. If the variable is long long to avoid loss of information, then you have loss of information. [truncLongCastAssignment] > linux-6.14/tools/testing/selftests/mm/pagemap_ioctl.c:1523:11: style: int result is assigned to long long variable. If the variable is long long to avoid loss of information, then you have loss of information. [truncLongCastAssignment] > linux-6.14/tools/testing/selftests/mm/pagemap_ioctl.c:247:11: style: int result is assigned to long long variable. If the variable is long long to avoid loss of information, then you have loss of information. [truncLongCastAssignment] > linux-6.14/tools/testing/selftests/mm/pagemap_ioctl.c:435:11: style: int result is assigned to long long variable. If the variable is long long to avoid loss of information, then you have loss of information. [truncLongCastAssignment] > linux-6.14/tools/testing/selftests/mm/pagemap_ioctl.c:490:11: style: int result is assigned to long long variable. If the variable is long long to avoid loss of information, then you have loss of information. [truncLongCastAssignment] The source code of the first one is mem_size = 10 * page_size; Maybe better code: mem_size = 10ULL * page_size; Regards David Binderman

7 months

2
1
0 0

Symbol too long for allsyms warnings on KSYM_NAME_LEN

by Arnd Bergmann

My randconfig builds sometimes (around one in every 700 configs) run into this warning on x86: Symbol __pfx_snnnng1h2i3j4k5l6m7ng1h2i3j4k5l6m7nng1h2i3j4k5l6m7ng1h2i3j4k5l6m7nnng1h2i3j4k5l6m7ng1h2i3j4k5l6m7nng1h2i3j4k5l6m7ng1h2i3j4k5l6m7nnnng1h2i3j4k5l6m7ng1h2i3j4k5l6m7nng1h2i3j4k5l6m7ng1h2i3j4k5l6m7nnng1h2i3j4k5l6m7ng1h2i3j4k5l6m7nng1h2i3j4k5l6m7ng1h2i3j4k5l6m7nnnnng1h2i3j4k5l6m7ng1h2i3j4k5l6m7nng1h2i3j4k5l6m7ng1h2i3j4k5l6m7nnng1h2i3j4k5l6m7ng1h2i3j4k5l6m7nng1h2i3j4k5l6m7ng1h2i3j4k5l6m7nnnng1h2i3j4k5l6m7ng1h2i3j4k5l6m7nng1h2i3j4k5l6m7ng1h2i3j4k5l6m7nnng1h2i3j4k5l6m7ng1h2i3j4k5l6m7nng1h2i3j4k5l6m7ng1h2i3j4k5l6m7n too long for kallsyms (517 >= 512). Please increase KSYM_NAME_LEN both in kernel and kallsyms.c The check that gets triggered was added in commit c104c16073b ("Kunit to check the longest symbol length"), see https://lore.kernel.org/all/20241117195923.222145-1-sergio.collado@gmail.co… and the overlong identifier seems to be the result of objtool adding the six-byte "__pfx_" string to a symbol in elf_create_prefix_symbol() when CONFIG_FUNCTION_PADDING_CFI is set. I think the suggestion to "Please increase KSYM_NAME_LEN both in kernel and kallsyms.c" is misleading here and should probably be changed. I don't know if this something that objtool should work around, or something that needs to be adapted in the test. Arnd

7 months

2
2
0 0

Re: [PATCH v7 09/28] landlock: Add AUDIT_LANDLOCK_ACCESS and log ptrace denials

by Mickaël Salaün

On Thu, Mar 27, 2025 at 09:38:05PM +0000, Tingmao Wang wrote: > Hi Mickaël, Hi, thanks for the report. > > On 3/20/25 19:06, Mickaël Salaün wrote: > [...] > > +static struct landlock_hierarchy * > > +get_hierarchy(const struct landlock_ruleset *const domain, const size_t layer) > > +{ > > + struct landlock_hierarchy *hierarchy = domain->hierarchy; > > + ssize_t i; > > + > > + if (WARN_ON_ONCE(layer >= domain->num_layers)) > > + return hierarchy; > > + > > + for (i = domain->num_layers - 1; i > layer; i--) { > > + if (WARN_ON_ONCE(!hierarchy->parent)) > > + break; > > + > > + hierarchy = hierarchy->parent; > > + } > > + > > + return hierarchy; > > +} > > + > > +#ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST > > + > > +static void test_get_hierarchy(struct kunit *const test) > > +{ > > + struct landlock_hierarchy dom0_hierarchy = { > > + .id = 10, > > + }; > > + struct landlock_hierarchy dom1_hierarchy = { > > + .parent = &dom0_hierarchy, > > + .id = 20, > > + }; > > + struct landlock_hierarchy dom2_hierarchy = { > > + .parent = &dom1_hierarchy, > > + .id = 30, > > + }; > > + struct landlock_ruleset dom2 = { > > + .hierarchy = &dom2_hierarchy, > > + .num_layers = 3, > > + }; > > + > > + KUNIT_EXPECT_EQ(test, 10, get_hierarchy(&dom2, 0)->id); > > + KUNIT_EXPECT_EQ(test, 20, get_hierarchy(&dom2, 1)->id); > > + KUNIT_EXPECT_EQ(test, 30, get_hierarchy(&dom2, 2)->id); > > + KUNIT_EXPECT_EQ(test, 30, get_hierarchy(&dom2, -1)->id); > > This causes a warning from WARN_ON_ONCE(layer >= domain->num_layers) > when running this test, I guess because layer is unsigned. Interestingly this doesn't make the test to fail (because the result is still correct), nor to show up when using tools/testing/kunit/kunit.py, which is why I didn't see that. > Should it > be ssize_t, if this is an expected usage? The get_hierarchy() code is correct, and the KUnit test is correct too. Using a ssize_t would introduce a bug. The issue is that I wanted to test a case that should never happen, hence the WARN_ON_ONCE(). I guess the best "fix" for now would be to remove the KUnit test with -1, but there is a new KUnit feature to hide this kind of warning: https://lore.kernel.org/linux-kselftest/20250313114329.284104-1-acarmina@re… It is currently in linux-next, but I'm not sure it will be merged in Linux 6.15 . For now I'll keep this commit but I'll send a fix/update to either remove the test or use the new DEFINE_SUPPRESSED_WARNING macros depending on its merge status. > > ------------[ cut here ]------------ > WARNING: CPU: 7 PID: 145 at security/landlock/audit.c:142 get_hierarchy (security/landlock/audit.c:142) > Modules linked in: > CPU: 7 UID: 0 PID: 145 Comm: kunit_try_catch Tainted: G N 6.14.0-next-20250326-dev-00004-g4e57edc3e062-dirty #5 PREEMPT(undef) > Tainted: [N]=TEST > Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 > RIP: 0010:get_hierarchy (security/landlock/audit.c:142) > Code: 83 e8 02 e8 18 00 84 c0 75 02 0f 0b 48 83 c4 08 48 89 d8 5b 41 5c 41 5e 5d c3 48 c7 c7 00 f3 21 83 e8 e2 e7 18 00 84 c0 75 e2 <0f> 0b eb de 48 89 75 e0 e8 a1 a9 a7 ff 48 8b 75 e0 e9 76 ff ff ff > // snip // > Call Trace: > <TASK> > test_get_hierarchy (security/landlock/audit.c:178 (discriminator 5)) > ? test_get_denied_layer (security/landlock/audit.c:158) > ? lock_repin_lock (kernel/locking/lockdep.c:5649 kernel/locking/lockdep.c:5978) > ? __lock_acquire (kernel/locking/lockdep.c:4675 kernel/locking/lockdep.c:5189) > ? _raw_spin_unlock_irqrestore (./include/linux/spinlock_api_smp.h:151 kernel/locking/spinlock.c:194) > ? find_held_lock (kernel/locking/lockdep.c:5348) > ? trace_irq_enable (./include/trace/events/preemptirq.h:40 (discriminator 17)) > ? trace_hardirqs_on (kernel/trace/trace_preemptirq.c:80) > ? kvm_clock_get_cycles (./arch/x86/include/asm/preempt.h:95 arch/x86/kernel/kvmclock.c:80 arch/x86/kernel/kvmclock.c:86) > ? ktime_get_ts64 (kernel/time/timekeeping.c:318 (discriminator 4) kernel/time/timekeeping.c:335 (discriminator 4) kernel/time/timekeeping.c:907 (discriminator 4)) > kunit_try_run_case (lib/kunit/test.c:400 lib/kunit/test.c:443) > ? kunit_try_run_case_cleanup (lib/kunit/test.c:430) > >

7 months

1
0
0 0

[PATCH] kunit: Spelling s/slowm/slow/

by Geert Uytterhoeven

Fix a misspelling of "slow". Signed-off-by: Geert Uytterhoeven <geert(a)linux-m68k.org> --- include/kunit/test.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/kunit/test.h b/include/kunit/test.h index 58dbab60f8530588..9b773406e01f3c43 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -67,7 +67,7 @@ enum kunit_status { /* * Speed Attribute is stored as an enum and separated into categories of - * speed: very_slowm, slow, and normal. These speeds are relative to + * speed: very_slow, slow, and normal. These speeds are relative to * other KUnit tests. * * Note: unset speed attribute acts as default of KUNIT_SPEED_NORMAL. -- 2.43.0

7 months

2
1
0 0

[GIT PULL] Kselftest next update for Linux 6.15-rc1

by Shuah Khan

Hi Linus, Please pull the following kselftest next update for Linux 6.15-rc1. Fixes bugs and cleans up code in tracing, ftrace, and user_events tests. Adds missing executables to ftrace gitignore. diff is attached. thanks, -- Shuah ---------------------------------------------------------------- The following changes since commit a64dcfb451e254085a7daee5fe51bf22959d52d3: Linux 6.14-rc2 (2025-02-09 12:45:03 -0800) are available in the Git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest tags/linux_kselftest-next-6.15-rc1 for you to fetch changes up to 82ef781f24ac26f4aa71f02d7624c439ab8389a7: selftests/ftrace: add 'poll' binary to gitignore (2025-03-04 08:51:17 -0700) ---------------------------------------------------------------- linux_kselftest-next-6.15-rc1 Fixes bugs and cleans up code in tracing, ftrace, and user_events tests. Adds missing executables to ftrace gitignore. ---------------------------------------------------------------- Bharadwaj Raju (1): selftests/ftrace: add 'poll' binary to gitignore Heiko Carstens (1): selftests/ftrace: Use readelf to find entry point in uprobe test Steven Rostedt (3): selftests/tracing: Test only toplevel README file not the instances selftests/ftrace: Clean up triggers after setting them selftests/tracing: Allow some more tests to run in instances Yiqian Xun (1): selftests/user_events: Fix failures caused by test code tools/testing/selftests/ftrace/.gitignore | 1 + .../selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc | 10 +++++++--- tools/testing/selftests/ftrace/test.d/functions | 8 +++++++- .../test.d/trigger/inter-event/trigger-action-hist-xfail.tc | 1 + .../test.d/trigger/inter-event/trigger-onchange-action-hist.tc | 3 +++ .../test.d/trigger/inter-event/trigger-snapshot-action-hist.tc | 3 +++ .../ftrace/test.d/trigger/trigger-hist-expressions.tc | 1 + tools/testing/selftests/user_events/dyn_test.c | 2 ++ 8 files changed, 25 insertions(+), 4 deletions(-) ----------------------------------------------------------------

7 months

2
1
0 0

[GIT PULL] kunit next update for Linux 6.15-rc1

by Shuah Khan

Hi Linus, Please pull the following kunit next update for Linux 6.15-rc1. kunit tool: - Changes to kunit tool to use qboot on QEMU x86_64, and build GDB scripts. - Fixes kunit tool bug in parsing test plan. - Adds test to kunit tool to check parsing late test plan. kunit: - Clarifies kunit_skip() argument name. - Adds Kunit check for the longest symbol length. - Changes qemu_configs for sparc to use Zilog console. Conflicts in lib/Makefile between commit: b341f6fd45ab ("blackhole_dev: convert self-test to KUnit") from the net-next tree and commit: c104c16073b7 ("Kunit to check the longest symbol length") The commit c104c16073b7 conflicts with the mainline now with 62f3802332ed ("vdso: add generic time data storage") from kspp is now in the mainline. Stephen has the fixes for these two conflicts in next. (Thank you Stephen) diff is attached. thanks, -- Shuah ---------------------------------------------------------------- The following changes since commit a64dcfb451e254085a7daee5fe51bf22959d52d3: Linux 6.14-rc2 (2025-02-09 12:45:03 -0800) are available in the Git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest tags/linux_kselftest-kunit-6.15-rc1 for you to fetch changes up to 2e0cf2b32f72b20b0db5cc665cd8465d0f257278: kunit: tool: add test to check parsing late test plan (2025-03-15 18:13:43 -0600) ---------------------------------------------------------------- linux_kselftest-kunit-6.15-rc1 kunit tool: - Changes to kunit tool to use qboot on QEMU x86_64, and build GDB scripts. - Fixes kunit tool bug in parsing test plan. - Adds test to kunit tool to check parsing late test plan. kunit: - Clarifies kunit_skip() argument name. - Adds Kunit check for the longest symbol length. - Changes qemu_configs for sparc to use Zilog console. ---------------------------------------------------------------- Brendan Jackman (2): kunit: tool: Use qboot on QEMU x86_64 kunit: tool: Build GDB scripts Kevin Brodsky (1): kunit: Clarify kunit_skip() argument name Rae Moar (2): kunit: tool: Fix bug in parsing test plan kunit: tool: add test to check parsing late test plan Sergio González Collado (1): Kunit to check the longest symbol length Thomas Weißschuh (1): kunit: qemu_configs: sparc: use Zilog console arch/x86/tools/insn_decoder_test.c | 3 +- include/kunit/test.h | 20 ++++---- lib/Kconfig.debug | 9 ++++ lib/Makefile | 2 + lib/longest_symbol_kunit.c | 82 ++++++++++++++++++++++++++++++ tools/testing/kunit/kunit_kernel.py | 4 +- tools/testing/kunit/kunit_parser.py | 9 ++-- tools/testing/kunit/kunit_tool_test.py | 11 ++++ tools/testing/kunit/qemu_configs/sparc.py | 5 +- tools/testing/kunit/qemu_configs/x86_64.py | 4 +- 10 files changed, 128 insertions(+), 21 deletions(-) create mode 100644 lib/longest_symbol_kunit.c ----------------------------------------------------------------

7 months

2
1
0 0

[PATCH v7 0/7] rust: reduce `as` casts, enable related lints

by Tamir Duberstein

This started with a patch that enabled `clippy::ptr_as_ptr`. Benno Lossin suggested I also look into `clippy::ptr_cast_constness` and I discovered `clippy::as_ptr_cast_mut`. This series now enables all 3 lints. It also enables `clippy::as_underscore` which ensures other pointer casts weren't missed. The first commit reduces the need for pointer casts and is shared with another series[1]. As a later addition, `clippy::cast_lossless` and `clippy::ref_as_ptr` are also enabled. Link: https://lore.kernel.org/all/20250307-no-offset-v1-0-0c728f63b69c@gmail.com/ [1] Signed-off-by: Tamir Duberstein <tamird(a)gmail.com> --- Changes in v7: - Add patch to enable `clippy::ref_as_ptr`. - Link to v6: https://lore.kernel.org/r/20250324-ptr-as-ptr-v6-0-49d1b7fd4290@gmail.com Changes in v6: - Drop strict provenance patch. - Fix URLs in doc comments. - Add patch to enable `clippy::cast_lossless`. - Rebase on rust-next. - Link to v5: https://lore.kernel.org/r/20250317-ptr-as-ptr-v5-0-5b5f21fa230a@gmail.com Changes in v5: - Use `pointer::addr` in OF. (Boqun Feng) - Add documentation on stubs. (Benno Lossin) - Mark stubs `#[inline]`. - Pick up Alice's RB on a shared commit from https://lore.kernel.org/all/Z9f-3Aj3_FWBZRrm@google.com/. - Link to v4: https://lore.kernel.org/r/20250315-ptr-as-ptr-v4-0-b2d72c14dc26@gmail.com Changes in v4: - Add missing SoB. (Benno Lossin) - Use `without_provenance_mut` in alloc. (Boqun Feng) - Limit strict provenance lints to the `kernel` crate to avoid complex logic in the build system. This can be revisited on MSRV >= 1.84.0. - Rebase on rust-next. - Link to v3: https://lore.kernel.org/r/20250314-ptr-as-ptr-v3-0-e7ba61048f4a@gmail.com Changes in v3: - Fixed clippy warning in rust/kernel/firmware.rs. (kernel test robot) Link: https://lore.kernel.org/all/202503120332.YTCpFEvv-lkp@intel.com/ - s/as u64/as bindings::phys_addr_t/g. (Benno Lossin) - Use strict provenance APIs and enable lints. (Benno Lossin) - Link to v2: https://lore.kernel.org/r/20250309-ptr-as-ptr-v2-0-25d60ad922b7@gmail.com Changes in v2: - Fixed typo in first commit message. - Added additional patches, converted to series. - Link to v1: https://lore.kernel.org/r/20250307-ptr-as-ptr-v1-1-582d06514c98@gmail.com --- Tamir Duberstein (7): rust: retain pointer mut-ness in `container_of!` rust: enable `clippy::ptr_as_ptr` lint rust: enable `clippy::ptr_cast_constness` lint rust: enable `clippy::as_ptr_cast_mut` lint rust: enable `clippy::as_underscore` lint rust: enable `clippy::cast_lossless` lint rust: enable `clippy::ref_as_ptr` lint Makefile | 6 ++++++ drivers/gpu/drm/drm_panic_qr.rs | 10 +++++----- rust/bindings/lib.rs | 3 +++ rust/kernel/alloc/allocator_test.rs | 2 +- rust/kernel/alloc/kvec.rs | 4 ++-- rust/kernel/block/mq/operations.rs | 2 +- rust/kernel/block/mq/request.rs | 7 ++++--- rust/kernel/device.rs | 5 +++-- rust/kernel/device_id.rs | 5 +++-- rust/kernel/devres.rs | 19 ++++++++++--------- rust/kernel/dma.rs | 6 +++--- rust/kernel/error.rs | 2 +- rust/kernel/firmware.rs | 3 ++- rust/kernel/fs/file.rs | 3 ++- rust/kernel/io.rs | 18 +++++++++--------- rust/kernel/kunit.rs | 15 +++++++-------- rust/kernel/lib.rs | 5 ++--- rust/kernel/list/impl_list_item_mod.rs | 2 +- rust/kernel/miscdevice.rs | 2 +- rust/kernel/net/phy.rs | 4 ++-- rust/kernel/of.rs | 6 +++--- rust/kernel/pci.rs | 13 ++++++++----- rust/kernel/platform.rs | 6 ++++-- rust/kernel/print.rs | 11 +++++------ rust/kernel/rbtree.rs | 23 ++++++++++------------- rust/kernel/seq_file.rs | 3 ++- rust/kernel/str.rs | 14 +++++++------- rust/kernel/sync/poll.rs | 2 +- rust/kernel/uaccess.rs | 5 +++-- rust/kernel/workqueue.rs | 12 ++++++------ rust/uapi/lib.rs | 3 +++ 31 files changed, 120 insertions(+), 101 deletions(-) --- base-commit: 28bb48c4cb34f65a9aa602142e76e1426da31293 change-id: 20250307-ptr-as-ptr-21b1867fc4d4 Best regards, -- Tamir Duberstein <tamird(a)gmail.com>

7 months

3
26
0 0

[PATCH] rtc: Rename lib_test to rtc_lib_test

by Geert Uytterhoeven

When compiling the RTC library functions test as a module, the module has the non-descriptive name "lib_test.ko". Fix this by adding the subsystem's name as a prefix. Signed-off-by: Geert Uytterhoeven <geert(a)linux-m68k.org> --- drivers/rtc/Makefile | 2 +- drivers/rtc/{lib_test.c => rtc_lib_test.c} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename drivers/rtc/{lib_test.c => rtc_lib_test.c} (100%) diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index 489b4ab07068c758..c0ccbbfe2739c1aa 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -15,7 +15,7 @@ rtc-core-$(CONFIG_RTC_INTF_DEV) += dev.o rtc-core-$(CONFIG_RTC_INTF_PROC) += proc.o rtc-core-$(CONFIG_RTC_INTF_SYSFS) += sysfs.o -obj-$(CONFIG_RTC_LIB_KUNIT_TEST) += lib_test.o +obj-$(CONFIG_RTC_LIB_KUNIT_TEST) += rtc_lib_test.o # Keep the list ordered. diff --git a/drivers/rtc/lib_test.c b/drivers/rtc/rtc_lib_test.c similarity index 100% rename from drivers/rtc/lib_test.c rename to drivers/rtc/rtc_lib_test.c -- 2.43.0

7 months

1
0
0 0

[PATCH] selftest/mm: va_high_addr_switch: add ppc64 support check

by Li Wang

This patch adds PPC64 Radix MMU support to the va_high_addr_switch.sh by introducing check_supported_ppc64(). The function verifies: - 5-level paging (PGTABLE_LEVELS >= 5) enable in kernel config - Radix MMU (required for PPC64 5-level translation) - HugePages availability (needed for some tests) If any check fails, the test is skipped (ksft_skip). This ensures compatibility with Power9/Power10 systems running in Radix MMU mode. Avoid failures on 4-level paging system: # mmap(NULL, MAP_HUGETLB): 0xffffffffffffffff - FAILED # mmap(LOW_ADDR, MAP_HUGETLB): 0xffffffffffffffff - FAILED # mmap(HIGH_ADDR, MAP_HUGETLB): 0xffffffffffffffff - FAILED # mmap(HIGH_ADDR, MAP_HUGETLB) again: 0xffffffffffffffff - FAILED # mmap(HIGH_ADDR, MAP_FIXED | MAP_HUGETLB): 0xffffffffffffffff - FAILED # mmap(-1, MAP_HUGETLB): 0xffffffffffffffff - FAILED # mmap(-1, MAP_HUGETLB) again: 0xffffffffffffffff - FAILED # mmap(ADDR_SWITCH_HINT - PAGE_SIZE, 2*HUGETLB_SIZE, MAP_HUGETLB): 0xffffffffffffffff - FAILED # mmap(ADDR_SWITCH_HINT , 2*HUGETLB_SIZE, MAP_FIXED | MAP_HUGETLB): 0xffffffffffffffff - FAILED Signed-off-by: Li Wang <liwang(a)redhat.com> --- .../selftests/mm/va_high_addr_switch.sh | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tools/testing/selftests/mm/va_high_addr_switch.sh b/tools/testing/selftests/mm/va_high_addr_switch.sh index 2c725773cd79..1f92e8caceac 100755 --- a/tools/testing/selftests/mm/va_high_addr_switch.sh +++ b/tools/testing/selftests/mm/va_high_addr_switch.sh @@ -41,6 +41,31 @@ check_supported_x86_64() fi } +check_supported_ppc64() +{ + local config="/proc/config.gz" + [[ -f "${config}" ]] || config="/boot/config-$(uname -r)" + [[ -f "${config}" ]] || fail "Cannot find kernel config in /proc or /boot" + + local pg_table_levels=$(gzip -dcfq "${config}" | grep PGTABLE_LEVELS | cut -d'=' -f 2) + if [[ "${pg_table_levels}" -lt 5 ]]; then + echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test" + exit $ksft_skip + fi + + local mmu_support=$(grep -m1 "mmu" /proc/cpuinfo | awk '{print $3}') + if [[ "$mmu_support" != "radix" ]]; then + echo "$0: System does not use Radix MMU, required for 5-level paging" + exit $ksft_skip + fi + + local hugepages_total=$(awk '/HugePages_Total/ {print $2}' /proc/meminfo) + if [[ "${hugepages_total}" -eq 0 ]]; then + echo "$0: HugePages are not enabled, required for some tests" + exit $ksft_skip + fi +} + check_test_requirements() { # The test supports x86_64 and powerpc64. We currently have no useful @@ -50,6 +75,9 @@ check_test_requirements() "x86_64") check_supported_x86_64 ;; + "ppc64le"|"ppc64") + check_supported_ppc64 + ;; *) return 0 ;; -- 2.48.1

7 months

1
0
0 0

[RFC PATCH v3 0/8] PMU partitioning driver support

by Colton Lewis

This series introduces support in the KVM and ARM PMUv3 driver for partitioning PMU counters into two separate ranges by taking advantage of the MDCR_EL2.HPMN register field. The advantage of a partitioned PMU would be to allow KVM guests direct access to a subset of PMU functionality, greatly reducing the overhead of performance monitoring in guests. While this feature could be accepted on its own merits, practically there is a lot more to be done before it will be fully useful, so I'm sending as an RFC for now. v3: * Include cpucap definition for FEAT_HPMN0 to allow for setting HPMN to 0 * Include PMU header cleanup provided by Marc [1] with some minor changes so compilation works * Pull functions out of pmu-emul.c that aren't specific to the emulated PMU. This and the previous item aren't strictly needed but they provide a nicer starting point. * As suggested by Oliver, start a file for partitioned PMU functions and move the reserved_host_counters parameter and MDCR handling into KVM so the driver does not have to know about it and we need fewer hacks to keep the driver working on 32-bit ARM. This was not a complete separation because the driver still needs to start and stop the host counters all at once and needs to toggle MDCR_EL2.HPME to do that. Introduce kvm_pmu_host_counters_{enable,disable}() functions to handle this and define them as no ops on 32-bit ARM. * As suggested by Oliver, don't limit PMCR.N on emulated PMU. This value will be read correctly when the right traps are disabled to use the partitioned PMU v2: https://lore.kernel.org/kvm/20250208020111.2068239-1-coltonlewis@google.com/ v1: https://lore.kernel.org/kvm/20250127222031.3078945-1-coltonlewis@google.com/ [1] https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git/log/?… Colton Lewis (7): arm64: cpufeature: Add cap for HPMN0 arm64: Generate sign macro for sysreg Enums KVM: arm64: Reorganize PMU functions KVM: arm64: Introduce module param to partition the PMU perf: arm_pmuv3: Generalize counter bitmasks perf: arm_pmuv3: Keep out of guest counter partition KVM: arm64: selftests: Reword selftests error Marc Zyngier (1): KVM: arm64: Cleanup PMU includes arch/arm/include/asm/arm_pmuv3.h | 2 + arch/arm64/include/asm/arm_pmuv3.h | 2 +- arch/arm64/include/asm/kvm_host.h | 199 +++++++- arch/arm64/include/asm/kvm_pmu.h | 47 ++ arch/arm64/kernel/cpufeature.c | 8 + arch/arm64/kvm/Makefile | 2 +- arch/arm64/kvm/arm.c | 1 - arch/arm64/kvm/debug.c | 10 +- arch/arm64/kvm/hyp/include/hyp/switch.h | 1 + arch/arm64/kvm/pmu-emul.c | 464 +----------------- arch/arm64/kvm/pmu-part.c | 63 +++ arch/arm64/kvm/pmu.c | 454 +++++++++++++++++ arch/arm64/kvm/sys_regs.c | 2 + arch/arm64/tools/cpucaps | 1 + arch/arm64/tools/gen-sysreg.awk | 1 + arch/arm64/tools/sysreg | 6 +- drivers/perf/arm_pmuv3.c | 73 ++- include/kvm/arm_pmu.h | 204 -------- include/linux/perf/arm_pmu.h | 16 +- include/linux/perf/arm_pmuv3.h | 27 +- .../selftests/kvm/arm64/vpmu_counter_access.c | 2 +- virt/kvm/kvm_main.c | 1 + 22 files changed, 882 insertions(+), 704 deletions(-) create mode 100644 arch/arm64/include/asm/kvm_pmu.h create mode 100644 arch/arm64/kvm/pmu-part.c delete mode 100644 include/kvm/arm_pmu.h base-commit: 2014c95afecee3e76ca4a56956a936e23283f05b -- 2.48.1.601.g30ceb7b040-goog

7 months

3
16
0 0

[PATCH v7 0/3] Enable Zicbom in usermode

by Yunhui Cui

v1/v2: There is only the first patch: RISC-V: Enable cbo.clean/flush in usermode, which mainly removes the enabling of cbo.inval in user mode. v3: Add the functionality of Expose Zicbom and selftests for Zicbom. v4: Modify the order of macros, The test_no_cbo_inval function is added separately. v5: 1. Modify the order of RISCV_HWPROBE_KEY_ZICBOM_BLOCK_SIZE in hwprobe.rst 2. "TEST_NO_ZICBOINVAL" -> "TEST_NO_CBO_INVAL" v6: Change hwprobe_ext0_has's second param to u64. v7: Rebase to the latest code of linux-next. Yunhui Cui (3): RISC-V: Enable cbo.clean/flush in usermode RISC-V: hwprobe: Expose Zicbom extension and its block size RISC-V: selftests: Add TEST_ZICBOM into CBO tests Documentation/arch/riscv/hwprobe.rst | 6 ++ arch/riscv/include/asm/hwprobe.h | 2 +- arch/riscv/include/uapi/asm/hwprobe.h | 2 + arch/riscv/kernel/cpufeature.c | 8 +++ arch/riscv/kernel/sys_hwprobe.c | 8 ++- tools/testing/selftests/riscv/hwprobe/cbo.c | 66 +++++++++++++++++---- 6 files changed, 79 insertions(+), 13 deletions(-) -- 2.39.2

7 months, 1 week

2
4
0 0

[PATCH] selftests/filesystems: remove duplicate sys/types.h header

by Chen Ni

Remove duplicate header which is included twice. Signed-off-by: Chen Ni <nichen(a)iscas.ac.cn> --- tools/testing/selftests/filesystems/utils.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/filesystems/utils.c b/tools/testing/selftests/filesystems/utils.c index e553c89c5b19..06419bf4ba19 100644 --- a/tools/testing/selftests/filesystems/utils.c +++ b/tools/testing/selftests/filesystems/utils.c @@ -3,7 +3,6 @@ #define _GNU_SOURCE #endif #include <fcntl.h> -#include <sys/types.h> #include <dirent.h> #include <grp.h> #include <linux/limits.h> -- 2.25.1

7 months, 1 week

1
0
0 0

[PATCH net-next v2 0/7] selftests/net: Mixed select()+polling mode for TCP-AO tests

by Dmitry Safonov via B4 Relay

Should fix flaky tcp-ao/connect-deny-ipv6 test. Begging pardon for the delay since the report and for sending it this late in the release cycle. To: David S. Miller <davem(a)davemloft.net> To: Eric Dumazet <edumazet(a)google.com> To: Jakub Kicinski <kuba(a)kernel.org> To: Paolo Abeni <pabeni(a)redhat.com> To: Simon Horman <horms(a)kernel.org> To: Shuah Khan <shuah(a)kernel.org> Cc: netdev(a)vger.kernel.org Cc: linux-kselftest(a)vger.kernel.org Cc: linux-kernel(a)vger.kernel.org Signed-off-by: Dmitry Safonov <0x7f454c46(a)gmail.com> Changes in v2: - Base on net-next (Paolo) - Add a missing Fixes tag (Paolo) - Link to v1: https://lore.kernel.org/r/20250312-tcp-ao-selftests-polling-v1-0-72a642b855… --- Dmitry Safonov (7): selftests/net: Print TCP flags in more common format selftests/net: Provide tcp-ao counters comparison helper selftests/net: Fetch and check TCP-MD5 counters selftests/net: Add mixed select()+polling mode to TCP-AO tests selftests/net: Print the testing side in unsigned-md5 selftests/net: Delete timeout from test_connect_socket() selftests/net: Drop timeout argument from test_client_verify() tools/testing/selftests/net/tcp_ao/connect-deny.c | 58 ++-- tools/testing/selftests/net/tcp_ao/connect.c | 22 +- tools/testing/selftests/net/tcp_ao/icmps-discard.c | 17 +- .../testing/selftests/net/tcp_ao/key-management.c | 76 ++--- tools/testing/selftests/net/tcp_ao/lib/aolib.h | 114 ++++++-- .../testing/selftests/net/tcp_ao/lib/ftrace-tcp.c | 7 +- tools/testing/selftests/net/tcp_ao/lib/sock.c | 315 +++++++++++++++------ tools/testing/selftests/net/tcp_ao/restore.c | 75 +++-- tools/testing/selftests/net/tcp_ao/rst.c | 47 ++- tools/testing/selftests/net/tcp_ao/self-connect.c | 18 +- tools/testing/selftests/net/tcp_ao/seq-ext.c | 30 +- tools/testing/selftests/net/tcp_ao/unsigned-md5.c | 118 ++++---- 12 files changed, 552 insertions(+), 345 deletions(-) --- base-commit: 23c9ff659140f97d44bf6fb59f89526a168f2b86 change-id: 20250312-tcp-ao-selftests-polling-21b6bbdf77b6 Best regards, -- Dmitry Safonov <0x7f454c46(a)gmail.com>

7 months, 1 week

4
12
0 0

[PATCH v2 0/2] KVM: selftests: access_tracking_perf_test: skip the test when NUMA balancing is active

by Maxim Levitsky

Due to several issues which are unlikely to be fixed in the near future, the access_tracking_perf_test sanity check for how many pages are still clean after an iteration is not reliable when NUMA balancing is active. This patch series refactors this test to skip this check by default automatically. V2: adopted Sean's suggestions. Best regards, Maxim Levitsky Maxim Levitsky (1): KVM: selftests: access_tracking_perf_test: add option to skip the sanity check Sean Christopherson (1): KVM: selftests: Extract guts of THP accessor to standalone sysfs helpers .../selftests/kvm/access_tracking_perf_test.c | 33 +++++++++++++-- .../testing/selftests/kvm/include/test_util.h | 1 + tools/testing/selftests/kvm/lib/test_util.c | 42 ++++++++++++++----- 3 files changed, 61 insertions(+), 15 deletions(-) -- 2.26.3

7 months, 1 week

3
7
0 0

[PATCH v1] selftests/mm/cow: fix minor memory leak in child_vmsplice_memcmp_fn()

by Malaya Kumar Rout

Static Analyis for cow.c: error Exception branch exits without releasing memory 'old' and 'new' fix the issue by releasing the allocated memory. Signed-off-by: Malaya Kumar Rout <malayarout91(a)gmail.com> --- tools/testing/selftests/mm/cow.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index 9446673645eb..fdee42850548 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -168,18 +168,18 @@ static int child_vmsplice_memcmp_fn(char *mem, size_t size, memcpy(old, mem, size); if (pipe(fds) < 0) - return -errno; + goto out; /* Trigger a read-only pin. */ transferred = vmsplice(fds[1], &iov, 1, 0); if (transferred < 0) - return -errno; + goto out; if (transferred == 0) return -EINVAL; /* Unmap it from our page tables. */ if (munmap(mem, size) < 0) - return -errno; + goto out; /* Wait until the parent modified it. */ write(comm_pipes->child_ready[1], "0", 1); @@ -190,10 +190,15 @@ static int child_vmsplice_memcmp_fn(char *mem, size_t size, for (total = 0; total < transferred; total += cur) { cur = read(fds[0], new + total, transferred - total); if (cur < 0) - return -errno; + goto out; } return memcmp(old, new, transferred); + +out: + free(old); + free(new); + return -errno; } typedef int (*child_fn)(char *mem, size_t size, struct comm_pipes *comm_pipes); -- 2.43.0

7 months, 1 week

2
1
0 0

[PATCH] selftests: vDSO: vdso_standalone_test_x86: Replace source file with symlink

by Thomas Weißschuh

With the switch over to nolibc the source file vdso_standalone_test_x86.c was intended to be replaced with a symlink to vdso_test_gettimeofday.c. This was the patch that was submitted to LKML, but during application the symlink was replaced by a textual copy of the linked-to file. Having two copies introduces the possibility of divergence and increases maintenance burden, switch back to a symlink. Link: https://lore.kernel.org/lkml/20250226-parse_vdso-nolibc-v2-16-28e14e031ed8@… Fixes: 8770a9183fe1 ("selftests: vDSO: vdso_standalone_test_x86: Switch to nolibc") Signed-off-by: Thomas Weißschuh <thomas.weissschuh(a)linutronix.de> --- If symlinks are problematic an #include shim would also work. These are not handled really well by the kselftests build system though, as #include dependencies are not tracked by it. --- .../selftests/vDSO/vdso_standalone_test_x86.c | 59 +--------------------- 1 file changed, 1 insertion(+), 58 deletions(-) diff --git a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c deleted file mode 100644 index 9ce795b806f0992b83cef78c7e16fac0e54750da..0000000000000000000000000000000000000000 --- a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c +++ /dev/null @@ -1,58 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * vdso_test_gettimeofday.c: Sample code to test parse_vdso.c and - * vDSO gettimeofday() - * Copyright (c) 2014 Andy Lutomirski - * - * Compile with: - * gcc -std=gnu99 vdso_test_gettimeofday.c parse_vdso_gettimeofday.c - * - * Tested on x86, 32-bit and 64-bit. It may work on other architectures, too. - */ - -#include <stdio.h> -#ifndef NOLIBC -#include <sys/auxv.h> -#include <sys/time.h> -#endif - -#include "../kselftest.h" -#include "parse_vdso.h" -#include "vdso_config.h" -#include "vdso_call.h" - -int main(int argc, char **argv) -{ - const char *version = versions[VDSO_VERSION]; - const char **name = (const char **)&names[VDSO_NAMES]; - - unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR); - if (!sysinfo_ehdr) { - printf("AT_SYSINFO_EHDR is not present!\n"); - return KSFT_SKIP; - } - - vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR)); - - /* Find gettimeofday. */ - typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz); - gtod_t gtod = (gtod_t)vdso_sym(version, name[0]); - - if (!gtod) { - printf("Could not find %s\n", name[0]); - return KSFT_SKIP; - } - - struct timeval tv; - long ret = VDSO_CALL(gtod, 2, &tv, 0); - - if (ret == 0) { - printf("The time is %lld.%06lld\n", - (long long)tv.tv_sec, (long long)tv.tv_usec); - } else { - printf("%s failed\n", name[0]); - return KSFT_FAIL; - } - - return 0; -} diff --git a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c new file mode 120000 index 0000000000000000000000000000000000000000..4d3d96f1e440c965474681a6f35375a60b3921be --- /dev/null +++ b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c @@ -0,0 +1 @@ +vdso_test_gettimeofday.c \ No newline at end of file --- base-commit: 1e26c5e28ca5821a824e90dd359556f5e9e7b89f change-id: 20250326-vdso-selftests-fix-vdso_standalone_test_x86-c3a77b57ccbd Best regards, -- Thomas Weißschuh <thomas.weissschuh(a)linutronix.de>

7 months, 1 week

1
0
0 0

[PATCH v4 00/30] context_tracking,x86: Defer some IPIs until a user->kernel transition

by Valentin Schneider

Context ======= We've observed within Red Hat that isolated, NOHZ_FULL CPUs running a pure-userspace application get regularly interrupted by IPIs sent from housekeeping CPUs. Those IPIs are caused by activity on the housekeeping CPUs leading to various on_each_cpu() calls, e.g.: 64359.052209596 NetworkManager 0 1405 smp_call_function_many_cond (cpu=0, func=do_kernel_range_flush) smp_call_function_many_cond+0x1 smp_call_function+0x39 on_each_cpu+0x2a flush_tlb_kernel_range+0x7b __purge_vmap_area_lazy+0x70 _vm_unmap_aliases.part.42+0xdf change_page_attr_set_clr+0x16a set_memory_ro+0x26 bpf_int_jit_compile+0x2f9 bpf_prog_select_runtime+0xc6 bpf_prepare_filter+0x523 sk_attach_filter+0x13 sock_setsockopt+0x92c __sys_setsockopt+0x16a __x64_sys_setsockopt+0x20 do_syscall_64+0x87 entry_SYSCALL_64_after_hwframe+0x65 The heart of this series is the thought that while we cannot remove NOHZ_FULL CPUs from the list of CPUs targeted by these IPIs, they may not have to execute the callbacks immediately. Anything that only affects kernelspace can wait until the next user->kernel transition, providing it can be executed "early enough" in the entry code. The original implementation is from Peter [1]. Nicolas then added kernel TLB invalidation deferral to that [2], and I picked it up from there. Deferral approach ================= Storing each and every callback, like a secondary call_single_queue turned out to be a no-go: the whole point of deferral is to keep NOHZ_FULL CPUs in userspace for as long as possible - no signal of any form would be sent when deferring an IPI. This means that any form of queuing for deferred callbacks would end up as a convoluted memory leak. Deferred IPIs must thus be coalesced, which this series achieves by assigning IPIs a "type" and having a mapping of IPI type to callback, leveraged upon kernel entry. What about IPIs whose callback take a parameter, you may ask? Peter suggested during OSPM23 [3] that since on_each_cpu() targets housekeeping CPUs *and* isolated CPUs, isolated CPUs can access either global or housekeeping-CPU-local state to "reconstruct" the data that would have been sent via the IPI. This series does not affect any IPI callback that requires an argument, but the approach would remain the same (one coalescable callback executed on kernel entry). Kernel entry vs execution of the deferred operation =================================================== This is what I've referred to as the "Danger Zone" during my LPC24 talk [4]. There is a non-zero length of code that is executed upon kernel entry before the deferred operation can be itself executed (i.e. before we start getting into context_tracking.c proper), i.e.: idtentry_func_foo() <--- we're in the kernel irqentry_enter() enter_from_user_mode() __ct_user_exit() ct_kernel_enter_state() ct_work_flush() <--- deferred operation is executed here This means one must take extra care to what can happen in the early entry code, and that <bad things> cannot happen. For instance, we really don't want to hit instructions that have been modified by a remote text_poke() while we're on our way to execute a deferred sync_core(). Patches doing the actual deferral have more detail on this. Patches ======= o Patches 1-2 are standalone objtool cleanups. o Patches 3-4 add an RCU testing feature. o Patches 5-6 add infrastructure for annotating static keys and static calls that may be used in noinstr code (courtesy of Josh). o Patches 7-19 use said annotations on relevant keys / calls. o Patch 20 enforces proper usage of said annotations (courtesy of Josh). o Patches 21-23 fiddle with CT_STATE* within context tracking o Patches 24-29 add the actual IPI deferral faff o Patch 30 adds a freebie: deferring IPIs for NOHZ_IDLE. Not tested that much! if you care about battery-powered devices and energy consumption, go give it a try! Patches are also available at: https://gitlab.com/vschneid/linux.git -b redhat/isolirq/defer/v4 Stuff I'd like eyes and neurons on ================================== Context-tracking vs idle. Patch 22 "improves" the situation by adding an IDLE->KERNEL transition when getting an IRQ while idle, but it leaves the following window: ~> IRQ ct_nmi_enter() state = state + CT_STATE_KERNEL - CT_STATE_IDLE [...] ct_nmi_exit() state = state - CT_STATE_KERNEL + CT_STATE_IDLE [...] /!\ CT_STATE_IDLE here while we're really in kernelspace! /!\ ct_cpuidle_exit() state = state + CT_STATE_KERNEL - CT_STATE_IDLE Said window is contained within cpu_idle_poll() and the cpuidle call within cpuidle_enter_state(), both being noinstr (the former is __cpuidle which is noinstr itself). Thus objtool will consider it as early entry and will warn accordingly of any static key / call misuse, so the damage is somewhat contained, but it's not ideal. I tried fiddling with this but idle polling likes being annoying, as it is shaped like so: ct_cpuidle_enter(); raw_local_irq_enable(); while (!tif_need_resched() && (cpu_idle_force_poll || tick_check_broadcast_expired())) cpu_relax(); raw_local_irq_disable(); ct_cpuidle_exit(); IOW, getting an IRQ that doesn't end up setting NEED_RESCHED while idle-polling doesn't come near ct_cpuidle_exit(), which prevents me from having the outermost ct_nmi_exit() leave the state as CT_STATE_KERNEL (rather than CT_STATE_IDLE). Testing ======= Xeon E5-2699 system with SMToff, NOHZ_FULL, isolated CPUs. RHEL9 userspace. Workload is using rteval (kernel compilation + hackbench) on housekeeping CPUs and a dummy stay-in-userspace loop on the isolated CPUs. The main invocation is: $ trace-cmd record -e "csd_queue_cpu" -f "cpu & CPUS{$ISOL_CPUS}" \ -e "ipi_send_cpumask" -f "cpumask & CPUS{$ISOL_CPUS}" \ -e "ipi_send_cpu" -f "cpu & CPUS{$ISOL_CPUS}" \ rteval --onlyload --loads-cpulist=$HK_CPUS \ --hackbench-runlowmem=True --duration=$DURATION This only records IPIs sent to isolated CPUs, so any event there is interference (with a bit of fuzz at the start/end of the workload when spawning the processes). All tests were done with a duration of 6 hours. v6.13-rc6 # This is the actual IPI count $ trace-cmd report | grep callback | awk '{ print $(NF) }' | sort | uniq -c | sort -nr 531 callback=generic_smp_call_function_single_interrupt+0x0 # These are the different CSD's that caused IPIs $ trace-cmd report | grep csd_queue | awk '{ print $(NF-1) }' | sort | uniq -c | sort -nr 12818 func=do_flush_tlb_all 910 func=do_kernel_range_flush 78 func=do_sync_core v6.13-rc6 + patches: # This is the actual IPI count $ trace-cmd report | grep callback | awk '{ print $(NF) }' | sort | uniq -c | sort -nr # Zilch! # These are the different CSD's that caused IPIs $ trace-cmd report | grep csd_queue | awk '{ print $(NF-1) }' | sort | uniq -c | sort -nr # Nada! Note that tlb_remove_table_smp_sync() showed up during testing of v3, and has gone as mysteriously as it showed up. Yair had a series adressing this [5] which would be worth revisiting. Acknowledgements ================ Special thanks to: o Clark Williams for listening to my ramblings about this and throwing ideas my way o Josh Poimboeuf for all his help with everything objtool-related o All of the folks who attended various (too many?) talks about this and provided precious feedback. Links ===== [1]: https://lore.kernel.org/all/20210929151723.162004989@infradead.org/ [2]: https://github.com/vianpl/linux.git -b ct-work-defer-wip [3]: https://youtu.be/0vjE6fjoVVE [4]: https://lpc.events/event/18/contributions/1889/ [5]: https://lore.kernel.org/lkml/20230620144618.125703-1-ypodemsk@redhat.com/ Revisions ========= RFCv3 -> v4 ++++++++++++++ o Rebased onto v6.13-rc6 o New objtool patches from Josh o More .noinstr static key/call patches o Static calls now handled as well (again thanks to Josh) o Fixed clearing the work bits on kernel exit o Messed with IRQ hitting an idle CPU vs context tracking o Various comment and naming cleanups o Made RCU_DYNTICKS_TORTURE depend on !COMPILE_TEST (PeterZ) o Fixed the CT_STATE_KERNEL check when setting a deferred work (Frederic) o Cleaned up the __flush_tlb_all() mess thanks to PeterZ RFCv2 -> RFCv3 ++++++++++++++ o Rebased onto v6.12-rc6 o Added objtool documentation for the new warning (Josh) o Added low-size RCU watching counter to TREE04 torture scenario (Paul) o Added FORCEFUL jump label and static key types o Added noinstr-compliant helpers for tlb flush deferral RFCv1 -> RFCv2 ++++++++++++++ o Rebased onto v6.5-rc1 o Updated the trace filter patches (Steven) o Fixed __ro_after_init keys used in modules (Peter) o Dropped the extra context_tracking atomic, squashed the new bits in the existing .state field (Peter, Frederic) o Added an RCU_EXPERT config for the RCU dynticks counter size, and added an rcutorture case for a low-size counter (Paul) o Fixed flush_tlb_kernel_range_deferrable() definition Josh Poimboeuf (3): jump_label: Add annotations for validating noinstr usage static_call: Add read-only-after-init static calls objtool: Add noinstr validation for static branches/calls Peter Zijlstra (1): x86,tlb: Make __flush_tlb_global() noinstr-compliant Valentin Schneider (26): objtool: Make validate_call() recognize indirect calls to pv_ops[] objtool: Flesh out warning related to pv_ops[] calls rcu: Add a small-width RCU watching counter debug option rcutorture: Make TREE04 use CONFIG_RCU_DYNTICKS_TORTURE x86/paravirt: Mark pv_sched_clock static call as __ro_after_init x86/idle: Mark x86_idle static call as __ro_after_init x86/paravirt: Mark pv_steal_clock static call as __ro_after_init riscv/paravirt: Mark pv_steal_clock static call as __ro_after_init loongarch/paravirt: Mark pv_steal_clock static call as __ro_after_init arm64/paravirt: Mark pv_steal_clock static call as __ro_after_init arm/paravirt: Mark pv_steal_clock static call as __ro_after_init perf/x86/amd: Mark perf_lopwr_cb static call as __ro_after_init sched/clock: Mark sched_clock_running key as __ro_after_init x86/speculation/mds: Mark mds_idle_clear key as allowed in .noinstr sched/clock, x86: Mark __sched_clock_stable key as allowed in .noinstr x86/kvm/vmx: Mark vmx_l1d_should flush and vmx_l1d_flush_cond keys as allowed in .noinstr stackleack: Mark stack_erasing_bypass key as allowed in .noinstr context_tracking: Explicitely use CT_STATE_KERNEL where it is missing context_tracking: Exit CT_STATE_IDLE upon irq/nmi entry context_tracking: Turn CT_STATE_* into bits context-tracking: Introduce work deferral infrastructure context_tracking,x86: Defer kernel text patching IPIs x86/tlb: Make __flush_tlb_local() noinstr-compliant x86/tlb: Make __flush_tlb_all() noinstr x86/mm, mm/vmalloc: Defer flush_tlb_kernel_range() targeting NOHZ_FULL CPUs context-tracking: Add a Kconfig to enable IPI deferral for NO_HZ_IDLE arch/Kconfig | 9 ++ arch/arm/kernel/paravirt.c | 2 +- arch/arm64/kernel/paravirt.c | 2 +- arch/loongarch/kernel/paravirt.c | 2 +- arch/riscv/kernel/paravirt.c | 2 +- arch/x86/Kconfig | 1 + arch/x86/events/amd/brs.c | 2 +- arch/x86/include/asm/context_tracking_work.h | 22 ++++ arch/x86/include/asm/invpcid.h | 13 +-- arch/x86/include/asm/paravirt.h | 4 +- arch/x86/include/asm/text-patching.h | 1 + arch/x86/include/asm/tlbflush.h | 3 +- arch/x86/include/asm/xen/hypercall.h | 11 +- arch/x86/kernel/alternative.c | 38 ++++++- arch/x86/kernel/cpu/bugs.c | 9 +- arch/x86/kernel/kprobes/core.c | 4 +- arch/x86/kernel/kprobes/opt.c | 4 +- arch/x86/kernel/module.c | 2 +- arch/x86/kernel/paravirt.c | 4 +- arch/x86/kernel/process.c | 2 +- arch/x86/kvm/vmx/vmx.c | 11 +- arch/x86/mm/tlb.c | 46 ++++++-- arch/x86/xen/mmu_pv.c | 10 +- arch/x86/xen/xen-ops.h | 12 +- include/asm-generic/sections.h | 15 +++ include/linux/context_tracking.h | 21 ++++ include/linux/context_tracking_state.h | 64 +++++++++-- include/linux/context_tracking_work.h | 28 +++++ include/linux/jump_label.h | 30 ++++- include/linux/objtool.h | 7 ++ include/linux/static_call.h | 19 ++++ kernel/context_tracking.c | 98 ++++++++++++++-- kernel/rcu/Kconfig.debug | 15 +++ kernel/sched/clock.c | 7 +- kernel/stackleak.c | 6 +- kernel/time/Kconfig | 19 ++++ mm/vmalloc.c | 35 +++++- tools/objtool/Documentation/objtool.txt | 34 ++++++ tools/objtool/check.c | 106 +++++++++++++++--- tools/objtool/include/objtool/check.h | 1 + tools/objtool/include/objtool/elf.h | 1 + tools/objtool/include/objtool/special.h | 1 + tools/objtool/special.c | 18 ++- .../selftests/rcutorture/configs/rcu/TREE04 | 1 + 44 files changed, 635 insertions(+), 107 deletions(-) create mode 100644 arch/x86/include/asm/context_tracking_work.h create mode 100644 include/linux/context_tracking_work.h -- 2.43.0

7 months, 1 week

11
85
0 0

[PATCHv5 net-next 0/2] wireguard: selftests: use nftables for testing

by Hangbin Liu

This patch set convert iptables to nftables for wireguard testing, as iptables is deparated and nftables is the default framework of most releases. v5: remove the counter in nft rules and link nft statically (Jason A. Donenfeld) v4: no update, just re-send v3: drop iptables directly (Jason A. Donenfeld) Also convert to using nft for qemu testing (Jason A. Donenfeld) v2: use one nft table for testing (Phil Sutter) Hangbin Liu (2): wireguard: selftests: convert iptables to nft wireguard: selftests: update to using nft for qemu test tools/testing/selftests/wireguard/netns.sh | 29 +++++++++------ .../testing/selftests/wireguard/qemu/Makefile | 36 ++++++++++++++----- .../selftests/wireguard/qemu/kernel.config | 7 ++-- 3 files changed, 49 insertions(+), 23 deletions(-) -- 2.46.0

7 months, 1 week

2
5
0 0

[PATCH rcu 00/11] RCU torture changes for v6.15

by Boqun Feng

Hi, Please find the upcoming changes in rcutorture for v6.15. The changes can also be found at: git://git.kernel.org/pub/scm/linux/kernel/git/rcu/linux.git torture.2025.02.05a Regards, Boqun Paul E. McKenney (11): torture: Add get_torture_init_jiffies() for test-start time rcutorture: Add a test_boost_holdoff module parameter rcutorture: Include grace-period sequence numbers in failure/close-call rcutorture: Expand failure/close-call grace-period output rcu: Trace expedited grace-period numbers in hexadecimal rcutorture: Add ftrace-compatible timestamp to GP# failure/close-call output rcutorture: Make cur_ops->format_gp_seqs take buffer length rcutorture: Move RCU_TORTURE_TEST_{CHK_RDR_STATE,LOG_CPU} to bool rcutorture: Complain when invalid SRCU reader_flavor is specified srcu: Add FORCE_NEED_SRCU_NMI_SAFE Kconfig for testing torture: Make SRCU lockdep testing use srcu_read_lock_nmisafe() .../admin-guide/kernel-parameters.txt | 5 ++ include/linux/torture.h | 1 + include/trace/events/rcu.h | 2 +- kernel/rcu/Kconfig | 11 ++++ kernel/rcu/Kconfig.debug | 18 ++++- kernel/rcu/rcu.h | 2 + kernel/rcu/rcutorture.c | 65 +++++++++++++++++-- kernel/rcu/tiny.c | 14 ++++ kernel/rcu/tree.c | 20 ++++++ kernel/torture.c | 12 ++++ .../selftests/rcutorture/bin/srcu_lockdep.sh | 2 +- 11 files changed, 144 insertions(+), 8 deletions(-) -- 2.39.5 (Apple Git-154)

7 months, 1 week

3
18
0 0

Exclusive Proposal

by Gen Johan Bruntink

Hello I am keen to exchange thoughts on financial opportunities with you. If it aligns with your preferences, I would appreciate your WhatsApp contact details for a more fluid dialogue. Should that not suit, please let me know a convenient time for us to correspond directly. Thank you for your kind consideration. I eagerly anticipate your reply. Warm regards, General Johanne

7 months, 1 week

1
0
0 0

[PATCH v10 net-next 0/5] DUALPI2 patch

by chia-yu.chang＠nokia-bell-labs.com

From: Chia-Yu Chang <chia-yu.chang(a)nokia-bell-labs.com> Hello, Please find DUALPI2 patch v10. v10 - Remove leftover include in include/linux/netdevice.h and anonimous struct in sch_dualpi2.c (Paolo Abeni <pabeni(a)redhat.com>) - Use kfree_skb_reason() and add SKB_DROP_REASON_DUALPI2_STEP_DROP drop reason (Paolo Abeni <pabeni(a)redhat.com>) - Split DualPI2 patch into 3 patches: Struct definition & parsing, Dump statstics & configuration, Enqueue/Dequeue (Paolo Abeni <pabeni(a)redhat.com>) v9 - Fix mem_usage error in previous version - Add min_qlen_step to dualpi2 attribute as the minimum queue length in number of packets in the L-queue to start step threshold marking. In previous versions, this value was fixed to 2, so only when the queue length of the L queue was greater or equal to 2 packets step threshold was applied to mark packets in the L-queue. This will cause larger queuing delays for L4S traffic at low rates (<20Mbps). So we parameterize it and change the default value to 0. Comparsion of tcp_1down run 'HTB 20Mbit + DUALPI2 + 10ms base delay' Old versions: avg median # data pts Ping (ms) ICMP : 11.55 11.70 ms 350 TCP upload avg : 18.96 N/A Mbits/s 350 TCP upload sum : 18.96 N/A Mbits/s 350 New version (v9): avg median # data pts Ping (ms) ICMP : 10.81 10.70 ms 350 TCP upload avg : 18.91 N/A Mbits/s 350 TCP upload sum : 18.91 N/A Mbits/s 350 Comparsion of tcp_1down run 'HTB 10Mbit + DUALPI2 + 10ms base delay' Old versions: avg median # data pts Ping (ms) ICMP : 12.61 12.80 ms 350 TCP upload avg : 9.48 N/A Mbits/s 350 TCP upload sum : 9.48 N/A Mbits/s 350 New version (v9): avg median # data pts Ping (ms) ICMP : 11.06 10.80 ms 350 TCP upload avg : 9.43 N/A Mbits/s 350 TCP upload sum : 9.43 N/A Mbits/s 350 Comparsion of tcp_1down run 'HTB 10Mbit + DUALPI2 + 10ms base delay' Old versions: avg median # data pts Ping (ms) ICMP : 40.86 37.45 ms 350 TCP upload avg : 0.88 N/A Mbits/s 350 TCP upload sum : 0.88 N/A Mbits/s 350 TCP upload::1 : 0.88 0.97 Mbits/s 350 New version (v9): avg median # data pts Ping (ms) ICMP : 11.07 10.40 ms 350 TCP upload avg : 0.55 N/A Mbits/s 350 TCP upload sum : 0.55 N/A Mbits/s 350 TCP upload::1 : 0.55 0.59 Mbits/s 350 v8 - Fix warning messages in v7 v7 - Separate into 3 patches to avoid mixing changes of documentation, selftest, and code. (Cong Wang <xiyou.wangcong(a)gmail.com>) v6 - Add modprobe for dulapi2 in tc-testing script tc-testing/tdc.sh (Jakub Kicinski <kuba(a)kernel.org>) - Update test cases in dualpi2.json - Update commit message v5 - A comparison was done between MQ + DUALPI2, MQ + FQ_PIE, MQ + FQ_CODEL: Unshaped 1gigE with 4 download streams test: - Summary of tcp_4down run 'MQ + FQ_CODEL': avg median # data pts Ping (ms) ICMP : 1.19 1.34 ms 349 TCP download avg : 235.42 N/A Mbits/s 349 TCP download sum : 941.68 N/A Mbits/s 349 TCP download::1 : 235.19 235.39 Mbits/s 349 TCP download::2 : 235.03 235.35 Mbits/s 349 TCP download::3 : 236.89 235.44 Mbits/s 349 TCP download::4 : 234.57 235.19 Mbits/s 349 - Summary of tcp_4down run 'MQ + FQ_PIE' avg median # data pts Ping (ms) ICMP : 1.21 1.37 ms 350 TCP download avg : 235.42 N/A Mbits/s 350 TCP download sum : 941.61 N/A Mbits/s 350 TCP download::1 : 232.54 233.13 Mbits/s 350 TCP download::2 : 232.52 232.80 Mbits/s 350 TCP download::3 : 233.14 233.78 Mbits/s 350 TCP download::4 : 243.41 241.48 Mbits/s 350 - Summary of tcp_4down run 'MQ + DUALPI2' avg median # data pts Ping (ms) ICMP : 1.19 1.34 ms 349 TCP download avg : 235.42 N/A Mbits/s 349 TCP download sum : 941.68 N/A Mbits/s 349 TCP download::1 : 235.19 235.39 Mbits/s 349 TCP download::2 : 235.03 235.35 Mbits/s 349 TCP download::3 : 236.89 235.44 Mbits/s 349 TCP download::4 : 234.57 235.19 Mbits/s 349 Unshaped 1gigE with 128 download streams test: - Summary of tcp_128down run 'MQ + FQ_CODEL': avg median # data pts Ping (ms) ICMP : 1.88 1.86 ms 350 TCP download avg : 7.39 N/A Mbits/s 350 TCP download sum : 946.47 N/A Mbits/s 350 - Summary of tcp_128down run 'MQ + FQ_PIE': avg median # data pts Ping (ms) ICMP : 1.88 1.86 ms 350 TCP download avg : 7.39 N/A Mbits/s 350 TCP download sum : 946.47 N/A Mbits/s 350 - Summary of tcp_128down run 'MQ + DUALPI2': avg median # data pts Ping (ms) ICMP : 1.88 1.86 ms 350 TCP download avg : 7.39 N/A Mbits/s 350 TCP download sum : 946.47 N/A Mbits/s 350 Unshaped 10gigE with 4 download streams test: - Summary of tcp_4down run 'MQ + FQ_CODEL': avg median # data pts Ping (ms) ICMP : 0.22 0.23 ms 350 TCP download avg : 2354.08 N/A Mbits/s 350 TCP download sum : 9416.31 N/A Mbits/s 350 TCP download::1 : 2353.65 2352.81 Mbits/s 350 TCP download::2 : 2354.54 2354.21 Mbits/s 350 TCP download::3 : 2353.56 2353.78 Mbits/s 350 TCP download::4 : 2354.56 2354.45 Mbits/s 350 - Summary of tcp_4down run 'MQ + FQ_PIE': avg median # data pts Ping (ms) ICMP : 0.20 0.19 ms 350 TCP download avg : 2354.76 N/A Mbits/s 350 TCP download sum : 9419.04 N/A Mbits/s 350 TCP download::1 : 2354.77 2353.89 Mbits/s 350 TCP download::2 : 2353.41 2354.29 Mbits/s 350 TCP download::3 : 2356.18 2354.19 Mbits/s 350 TCP download::4 : 2354.68 2353.15 Mbits/s 350 - Summary of tcp_4down run 'MQ + DUALPI2': avg median # data pts Ping (ms) ICMP : 0.24 0.24 ms 350 TCP download avg : 2354.11 N/A Mbits/s 350 TCP download sum : 9416.43 N/A Mbits/s 350 TCP download::1 : 2354.75 2353.93 Mbits/s 350 TCP download::2 : 2353.15 2353.75 Mbits/s 350 TCP download::3 : 2353.49 2353.72 Mbits/s 350 TCP download::4 : 2355.04 2353.73 Mbits/s 350 Unshaped 10gigE with 128 download streams test: - Summary of tcp_128down run 'MQ + FQ_CODEL': avg median # data pts Ping (ms) ICMP : 7.57 8.69 ms 350 TCP download avg : 73.97 N/A Mbits/s 350 TCP download sum : 9467.82 N/A Mbits/s 350 - Summary of tcp_128down run 'MQ + FQ_PIE': avg median # data pts Ping (ms) ICMP : 7.82 8.91 ms 350 TCP download avg : 73.97 N/A Mbits/s 350 TCP download sum : 9468.42 N/A Mbits/s 350 - Summary of tcp_128down run 'MQ + DUALPI2': avg median # data pts Ping (ms) ICMP : 6.87 7.93 ms 350 TCP download avg : 73.95 N/A Mbits/s 350 TCP download sum : 9465.87 N/A Mbits/s 350 From the results shown above, we see small differences between combinations. - Update commit message to include results of no_split_gso and split_gso (Dave Taht <dave.taht(a)gmail.com> and Paolo Abeni <pabeni(a)redhat.com>) - Add memlimit in dualpi2 attribute, and add memory_used, max_memory_used, memory_limit in dualpi2 stats (Dave Taht <dave.taht(a)gmail.com>) - Update note in sch_dualpi2.c related to BBRv3 status (Dave Taht <dave.taht(a)gmail.com>) - Update license identifier (Dave Taht <dave.taht(a)gmail.com>) - Add selftest in tools/testing/selftests/tc-testing (Cong Wang <xiyou.wangcong(a)gmail.com>) - Use netlink policies for parameter checks (Jamal Hadi Salim <jhs(a)mojatatu.com>) - Modify texts & fix typos in Documentation/netlink/specs/tc.yaml (Dave Taht <dave.taht(a)gmail.com>) - Add dscsriptions of packet counter statistics and reset function of sch_dualpi2.c - Fix step_thresh in packets - Update code comments in sch_dualpi2.c v4 - Update statement in Kconfig for DualPI2 (Stephen Hemminger <stephen(a)networkplumber.org>) - Put a blank line after #define in sch_dualpi2.c (Stephen Hemminger <stephen(a)networkplumber.org>) - Fix line length warning v3 - Fix compilaiton error - Update Documentation/netlink/specs/tc.yaml (Jakub Kicinski <kuba(a)kernel.org>) v2 - Add Documentation/netlink/specs/tc.yaml (Jakub Kicinski <kuba(a)kernel.org>) - Use dualpi2 instead of skb prefix (Jamal Hadi Salim <jhs(a)mojatatu.com>) - Replace nla_parse_nested_deprecated with nla_parse_nested (Jamal Hadi Salim <jhs(a)mojatatu.com>) - Fix line length warning For more details of DualPI2, plesae refer IETF RFC9332 (https://datatracker.ietf.org/doc/html/rfc9332). Best regards, Chia-Yu Chia-Yu Chang (4): Documentation: netlink: specs: tc: Add DualPI2 specification selftests/tc-testing: Add selftests for qdisc DualPI2 sched: Struct definition and parsing of dualpi2 qdisc sched: Dump configuration and statistics of dualpi2 qdisc Koen De Schepper (1): sched: Add enqueue/dequeue of dualpi2 qdisc Documentation/netlink/specs/tc.yaml | 144 +++ include/net/dropreason-core.h | 6 + include/uapi/linux/pkt_sched.h | 39 + net/sched/Kconfig | 12 + net/sched/Makefile | 1 + net/sched/sch_dualpi2.c | 1092 +++++++++++++++++ tools/testing/selftests/tc-testing/config | 1 + .../tc-testing/tc-tests/qdiscs/dualpi2.json | 149 +++ tools/testing/selftests/tc-testing/tdc.sh | 1 + 9 files changed, 1445 insertions(+) create mode 100644 net/sched/sch_dualpi2.c create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/dualpi2.json -- 2.34.1

7 months, 1 week

2
6
0 0

[PATCH v5 0/6] rust: reduce pointer casts, enable related lints

by Tamir Duberstein

This started with a patch that enabled `clippy::ptr_as_ptr`. Benno Lossin suggested I also look into `clippy::ptr_cast_constness` and I discovered `clippy::as_ptr_cast_mut`. This series now enables all 3 lints. It also enables `clippy::as_underscore` which ensures other pointer casts weren't missed. The first commit reduces the need for pointer casts and is shared with another series[1]. The final patch also enables pointer provenance lints and fixes violations. See that commit message for details. The build system portion of that commit is pretty messy but I couldn't find a better way to convincingly ensure that these lints were applied globally. Suggestions would be very welcome. Link: https://lore.kernel.org/all/20250307-no-offset-v1-0-0c728f63b69c@gmail.com/ [1] Signed-off-by: Tamir Duberstein <tamird(a)gmail.com> --- Changes in v5: - Use `pointer::addr` in OF. (Boqun Feng) - Add documentation on stubs. (Benno Lossin) - Mark stubs `#[inline]`. - Pick up Alice's RB on a shared commit from https://lore.kernel.org/all/Z9f-3Aj3_FWBZRrm@google.com/. - Link to v4: https://lore.kernel.org/r/20250315-ptr-as-ptr-v4-0-b2d72c14dc26@gmail.com Changes in v4: - Add missing SoB. (Benno Lossin) - Use `without_provenance_mut` in alloc. (Boqun Feng) - Limit strict provenance lints to the `kernel` crate to avoid complex logic in the build system. This can be revisited on MSRV >= 1.84.0. - Rebase on rust-next. - Link to v3: https://lore.kernel.org/r/20250314-ptr-as-ptr-v3-0-e7ba61048f4a@gmail.com Changes in v3: - Fixed clippy warning in rust/kernel/firmware.rs. (kernel test robot) Link: https://lore.kernel.org/all/202503120332.YTCpFEvv-lkp@intel.com/ - s/as u64/as bindings::phys_addr_t/g. (Benno Lossin) - Use strict provenance APIs and enable lints. (Benno Lossin) - Link to v2: https://lore.kernel.org/r/20250309-ptr-as-ptr-v2-0-25d60ad922b7@gmail.com Changes in v2: - Fixed typo in first commit message. - Added additional patches, converted to series. - Link to v1: https://lore.kernel.org/r/20250307-ptr-as-ptr-v1-1-582d06514c98@gmail.com --- Tamir Duberstein (6): rust: retain pointer mut-ness in `container_of!` rust: enable `clippy::ptr_as_ptr` lint rust: enable `clippy::ptr_cast_constness` lint rust: enable `clippy::as_ptr_cast_mut` lint rust: enable `clippy::as_underscore` lint rust: use strict provenance APIs Makefile | 4 ++ init/Kconfig | 3 + rust/bindings/lib.rs | 1 + rust/kernel/alloc.rs | 2 +- rust/kernel/alloc/allocator_test.rs | 2 +- rust/kernel/alloc/kvec.rs | 4 +- rust/kernel/block/mq/operations.rs | 2 +- rust/kernel/block/mq/request.rs | 7 +- rust/kernel/device.rs | 5 +- rust/kernel/device_id.rs | 2 +- rust/kernel/devres.rs | 19 +++--- rust/kernel/error.rs | 2 +- rust/kernel/firmware.rs | 3 +- rust/kernel/fs/file.rs | 2 +- rust/kernel/io.rs | 16 ++--- rust/kernel/kunit.rs | 15 ++--- rust/kernel/lib.rs | 113 ++++++++++++++++++++++++++++++++- rust/kernel/list/impl_list_item_mod.rs | 2 +- rust/kernel/miscdevice.rs | 2 +- rust/kernel/of.rs | 6 +- rust/kernel/pci.rs | 15 +++-- rust/kernel/platform.rs | 6 +- rust/kernel/print.rs | 11 ++-- rust/kernel/rbtree.rs | 23 +++---- rust/kernel/seq_file.rs | 3 +- rust/kernel/str.rs | 18 ++---- rust/kernel/sync/poll.rs | 2 +- rust/kernel/uaccess.rs | 12 ++-- rust/kernel/workqueue.rs | 12 ++-- rust/uapi/lib.rs | 1 + 30 files changed, 218 insertions(+), 97 deletions(-) --- base-commit: 498f7ee4773f22924f00630136da8575f38954e8 change-id: 20250307-ptr-as-ptr-21b1867fc4d4 Best regards, -- Tamir Duberstein <tamird(a)gmail.com>

7 months, 1 week

5
54
0 0

kself test fail for "kci_test_bridge_parent_id"

by ALOK TIWARI

Hi, "kci_test_bridge_parent_id" test failed with error "as device can not be enslaved while up". Here is Error log. ------------------- > ./rtnetlink.sh -t kci_test_bridge_parent_id -v COMMAND: ip link add name test-dummy0 type dummy COMMAND: ip link set test-dummy0 up COMMAND: modprobe -q netdevsim COMMAND: ip link add name test-bond0 type bond mode 802.3ad COMMAND: ip link set dev eni10np1 master test-bond0 Error: Device can not be enslaved while up. COMMAND: ip link set dev eni20np1 master test-bond0 Error: Device can not be enslaved while up. COMMAND: ip link add name test-br0 type bridge COMMAND: ip link set dev test-bond0 master test-br0 FAIL: bridge_parent_id ------------------- upstream commit ec4ffd100ffb ("Revert "net: rtnetlink: Enslave device before bringing it up""), suggest the following scenario! $ ip link set dummy0 up $ ip link set dummy0 master bond0 down According to last commit, do we need to modify "kci_test_bridge_parent_id" test set to down. --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -1129,8 +1129,8 @@ kci_test_bridge_parent_id() dev10=`ls ${sysfsnet}10/net/` dev20=`ls ${sysfsnet}20/net/` run_cmd ip link add name test-bond0 type bond mode 802.3ad - run_cmd ip link set dev $dev10 master test-bond0 - run_cmd ip link set dev $dev20 master test-bond0 + run_cmd ip link set dev $dev10 master test-bond0 down + run_cmd ip link set dev $dev20 master test-bond0 down run_cmd ip link add name test-br0 type bridge Success log with modified test case > ./rtnetlink.sh -t kci_test_bridge_parent_id -v COMMAND: ip link add name test-dummy0 type dummy COMMAND: ip link set test-dummy0 up COMMAND: modprobe -q netdevsim COMMAND: ip link add name test-bond0 type bond mode 802.3ad COMMAND: ip link set dev eni10np1 master test-bond0 down COMMAND: ip link set dev eni20np1 master test-bond0 down COMMAND: ip link add name test-br0 type bridge COMMAND: ip link set dev test-bond0 master test-br0 PASS: bridge_parent_id Thanks, Alok

7 months, 1 week

1
0
0 0

[PATCH] firmware: cs_dsp: Ensure cs_dsp_load[_coeff]() returns 0 on success

by Richard Fitzgerald

Set ret = 0 on successful completion of the processing loop in cs_dsp_load() and cs_dsp_load_coeff() to ensure that the function returns 0 on success. All normal firmware files will have at least one data block, and processing this block will set ret == 0, from the result of either regmap_raw_write() or cs_dsp_parse_coeff(). The kunit tests create a dummy firmware file that contains only the header, without any data blocks. This gives cs_dsp a file to "load" that will not cause any side-effects. As there aren't any data blocks, the processing loop will not set ret == 0. Originally there was a line after the processing loop: ret = regmap_async_complete(regmap); which would set ret == 0 before the function returned. Commit fe08b7d5085a ("firmware: cs_dsp: Remove async regmap writes") changed the regmap write to a normal sync write, so the call to regmap_async_complete() wasn't necessary and was removed. It was overlooked that the ret here wasn't only to check the result of regmap_async_complete(), it also set the final return value of the function. Fixes: fe08b7d5085a ("firmware: cs_dsp: Remove async regmap writes") Signed-off-by: Richard Fitzgerald <rf(a)opensource.cirrus.com> --- drivers/firmware/cirrus/cs_dsp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/firmware/cirrus/cs_dsp.c b/drivers/firmware/cirrus/cs_dsp.c index 42433c19eb30..560724ce21aa 100644 --- a/drivers/firmware/cirrus/cs_dsp.c +++ b/drivers/firmware/cirrus/cs_dsp.c @@ -1631,6 +1631,7 @@ static int cs_dsp_load(struct cs_dsp *dsp, const struct firmware *firmware, cs_dsp_debugfs_save_wmfwname(dsp, file); + ret = 0; out_fw: cs_dsp_buf_free(&buf_list); @@ -2338,6 +2339,7 @@ static int cs_dsp_load_coeff(struct cs_dsp *dsp, const struct firmware *firmware cs_dsp_debugfs_save_binname(dsp, file); + ret = 0; out_fw: cs_dsp_buf_free(&buf_list); -- 2.43.0

7 months, 1 week

2
1
0 0

[PATCH v5 iproute2-next 0/1] DualPI2 iproute2 patch

by chia-yu.chang＠nokia-bell-labs.com

From: Chia-Yu Chang <chia-yu.chang(a)nokia-bell-labs.com> Hello, Please find DUALPI2 iproute2 patch v4. v5 (25-Mar-25) - Use matches() to replace current strcmp() (Stephen Hemminger <stephen(a)networkplumber.org>) - Use general parse_percent() for handling scaled percentage values (Stephen Hemminger <stephen(a)networkplumber.org>) - Add print function for JSON of dualpi2 stats (Stephen Hemminger <stephen(a)networkplumber.org>) v4 (16-Mar-25) - Add min_qlen_step to dualpi2 attribute as the minimum queue length in number of packets in the L-queue to start step amrking. v3 (21-Feb-25) - Add memlimit to dualpi2 attribute, and add memory_used, max_memory_used, memory_limit in dualpi2 stats (Dave Taht <dave.taht(a)gmail.com>) - Update manual to align latest implementation and clarify the queue naming and default unit - Use common "get_scaled_alpha_beta" and clean print_opt for Dualpi2 v2 (23-Oct-24) - Rename get_float in dualpi2 to get_float_min_max in utils.c - Move get_float from iplink_can.c in utils.c (Stephen Hemminger <stephen(a)networkplumber.org>) - Add print function for JSON of dualpi2 (Stephen Hemminger <stephen(a)networkplumber.org>) For more details of DualPI2, plesae refer IETF RFC9332 (https://datatracker.ietf.org/doc/html/rfc9332). Best Regards, Chia-Yu Chia-Yu Chang (1): tc: add dualpi2 scheduler module bash-completion/tc | 11 +- include/uapi/linux/pkt_sched.h | 39 +++ include/utils.h | 2 + ip/iplink_can.c | 14 - lib/utils.c | 30 ++ man/man8/tc-dualpi2.8 | 249 ++++++++++++++++ tc/Makefile | 1 + tc/q_dualpi2.c | 519 +++++++++++++++++++++++++++++++++ 8 files changed, 850 insertions(+), 15 deletions(-) create mode 100644 man/man8/tc-dualpi2.8 create mode 100644 tc/q_dualpi2.c -- 2.34.1

7 months, 1 week

1
1
0 0

[PATCH v3 0/2] selftests: livepatch: test if ftrace can trace a livepatched function

by Filipe Xavier

This patchset add ftrace helpers functions and add a new test makes sure that ftrace can trace a function that was introduced by a livepatch. Signed-off-by: Filipe Xavier <felipeaggger(a)gmail.com> Suggested-by: Marcos Paulo de Souza <mpdesouza(a)suse.com> Reviewed-by: Marcos Paulo de Souza <mpdesouza(a)suse.com> Acked-by: Miroslav Benes <mbenes(a)suse.cz> --- Changes in v3: - functions.sh: fixed sed to remove warning from shellcheck and add grep -Fw params. - test-ftrace.sh: change constant to use common SYSFS_KLP_DIR. - Link to v2: https://lore.kernel.org/r/20250318-ftrace-sftest-livepatch-v2-0-60cb0aa95cc… Changes in v2: - functions.sh: change check traced function to accept a list of functions. - Link to v1: https://lore.kernel.org/r/20250306-ftrace-sftest-livepatch-v1-0-a6f1dfc30e1… --- Filipe Xavier (2): selftests: livepatch: add new ftrace helpers functions selftests: livepatch: test if ftrace can trace a livepatched function tools/testing/selftests/livepatch/functions.sh | 49 ++++++++++++++++++++++++ tools/testing/selftests/livepatch/test-ftrace.sh | 34 ++++++++++++++++ 2 files changed, 83 insertions(+) --- base-commit: 848e076317446f9c663771ddec142d7c2eb4cb43 change-id: 20250306-ftrace-sftest-livepatch-60d9dc472235 Best regards, -- Filipe Xavier <felipeaggger(a)gmail.com>

7 months, 1 week

3
6
0 0

[PATCH] selftests/x86/lam: fix resource leak in do_uring() and allocate_dsa_pasid()

by Malaya Kumar Rout

Exception branch returns without closing the file descriptors 'file_fd' and 'fd' Signed-off-by: Malaya Kumar Rout <malayarout91(a)gmail.com> --- tools/testing/selftests/x86/lam.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c index 18d736640ece..eaba0a921322 100644 --- a/tools/testing/selftests/x86/lam.c +++ b/tools/testing/selftests/x86/lam.c @@ -682,7 +682,7 @@ int do_uring(unsigned long lam) return 1; if (fstat(file_fd, &st) < 0) - return 1; + goto cleanup; off_t file_sz = st.st_size; @@ -690,7 +690,7 @@ int do_uring(unsigned long lam) fi = malloc(sizeof(*fi) + sizeof(struct iovec) * blocks); if (!fi) - return 1; + goto cleanup; fi->file_sz = file_sz; fi->file_fd = file_fd; @@ -698,7 +698,7 @@ int do_uring(unsigned long lam) ring = malloc(sizeof(*ring)); if (!ring) { free(fi); - return 1; + goto cleanup; } memset(ring, 0, sizeof(struct io_ring)); @@ -730,6 +730,9 @@ int do_uring(unsigned long lam) free(fi); +cleanup: + close(file_fd); + return ret; } @@ -1189,8 +1192,10 @@ void *allocate_dsa_pasid(void) wq = mmap(NULL, 0x1000, PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, 0); - if (wq == MAP_FAILED) + if (wq == MAP_FAILED) { + close(fd); perror("mmap"); + } return wq; } -- 2.43.0

7 months, 1 week

1
0
0 0

[PATCH 0/3] selftests: vDSO: chacha: Bugfixes

by Thomas Weißschuh

Bugfixes for a few issues I ran into. Signed-off-by: Thomas Weißschuh <thomas.weissschuh(a)linutronix.de> --- Thomas Weißschuh (3): selftests: vDSO: chacha: Correctly skip test if necessary selftests: vDSO: chacha: Include asm/hwcap.h for arm64 selftests: vDSO: chacha: Provide default definition of HWCAP_S390_VXRS tools/testing/selftests/vDSO/vdso_test_chacha.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) --- base-commit: 586de92313fcab8ed84ac5f78f4d2aae2db92c59 change-id: 20250324-s390-vdso-hwcap-0914c0f7c7f1 Best regards, -- Thomas Weißschuh <thomas.weissschuh(a)linutronix.de>

7 months, 1 week

2
7
0 0

[PATCH v6 0/6] rust: reduce `as` casts, enable related lints

by Tamir Duberstein

This started with a patch that enabled `clippy::ptr_as_ptr`. Benno Lossin suggested I also look into `clippy::ptr_cast_constness` and I discovered `clippy::as_ptr_cast_mut`. This series now enables all 3 lints. It also enables `clippy::as_underscore` which ensures other pointer casts weren't missed. The first commit reduces the need for pointer casts and is shared with another series[1]. As a late addition, `clippy::cast_lossless` is also enabled. Link: https://lore.kernel.org/all/20250307-no-offset-v1-0-0c728f63b69c@gmail.com/ [1] Signed-off-by: Tamir Duberstein <tamird(a)gmail.com> --- Changes in v6: - Drop strict provenance patch. - Fix URLs in doc comments. - Add patch to enable `clippy::cast_lossless`. - Rebase on rust-next. - Link to v5: https://lore.kernel.org/r/20250317-ptr-as-ptr-v5-0-5b5f21fa230a@gmail.com Changes in v5: - Use `pointer::addr` in OF. (Boqun Feng) - Add documentation on stubs. (Benno Lossin) - Mark stubs `#[inline]`. - Pick up Alice's RB on a shared commit from https://lore.kernel.org/all/Z9f-3Aj3_FWBZRrm@google.com/. - Link to v4: https://lore.kernel.org/r/20250315-ptr-as-ptr-v4-0-b2d72c14dc26@gmail.com Changes in v4: - Add missing SoB. (Benno Lossin) - Use `without_provenance_mut` in alloc. (Boqun Feng) - Limit strict provenance lints to the `kernel` crate to avoid complex logic in the build system. This can be revisited on MSRV >= 1.84.0. - Rebase on rust-next. - Link to v3: https://lore.kernel.org/r/20250314-ptr-as-ptr-v3-0-e7ba61048f4a@gmail.com Changes in v3: - Fixed clippy warning in rust/kernel/firmware.rs. (kernel test robot) Link: https://lore.kernel.org/all/202503120332.YTCpFEvv-lkp@intel.com/ - s/as u64/as bindings::phys_addr_t/g. (Benno Lossin) - Use strict provenance APIs and enable lints. (Benno Lossin) - Link to v2: https://lore.kernel.org/r/20250309-ptr-as-ptr-v2-0-25d60ad922b7@gmail.com Changes in v2: - Fixed typo in first commit message. - Added additional patches, converted to series. - Link to v1: https://lore.kernel.org/r/20250307-ptr-as-ptr-v1-1-582d06514c98@gmail.com --- Tamir Duberstein (6): rust: retain pointer mut-ness in `container_of!` rust: enable `clippy::ptr_as_ptr` lint rust: enable `clippy::ptr_cast_constness` lint rust: enable `clippy::as_ptr_cast_mut` lint rust: enable `clippy::as_underscore` lint rust: enable `clippy::cast_lossless` lint Makefile | 5 +++++ drivers/gpu/drm/drm_panic_qr.rs | 10 +++++----- rust/bindings/lib.rs | 1 + rust/kernel/alloc/allocator_test.rs | 2 +- rust/kernel/alloc/kvec.rs | 4 ++-- rust/kernel/block/mq/operations.rs | 2 +- rust/kernel/block/mq/request.rs | 7 ++++--- rust/kernel/device.rs | 5 +++-- rust/kernel/device_id.rs | 2 +- rust/kernel/devres.rs | 19 ++++++++++--------- rust/kernel/dma.rs | 6 +++--- rust/kernel/error.rs | 2 +- rust/kernel/firmware.rs | 3 ++- rust/kernel/fs/file.rs | 2 +- rust/kernel/io.rs | 18 +++++++++--------- rust/kernel/kunit.rs | 15 +++++++-------- rust/kernel/lib.rs | 5 ++--- rust/kernel/list/impl_list_item_mod.rs | 2 +- rust/kernel/miscdevice.rs | 2 +- rust/kernel/net/phy.rs | 4 ++-- rust/kernel/of.rs | 6 +++--- rust/kernel/pci.rs | 13 ++++++++----- rust/kernel/platform.rs | 6 ++++-- rust/kernel/print.rs | 11 +++++------ rust/kernel/rbtree.rs | 23 ++++++++++------------- rust/kernel/seq_file.rs | 3 ++- rust/kernel/str.rs | 10 +++++----- rust/kernel/sync/poll.rs | 2 +- rust/kernel/workqueue.rs | 12 ++++++------ rust/uapi/lib.rs | 1 + 30 files changed, 107 insertions(+), 96 deletions(-) --- base-commit: 28bb48c4cb34f65a9aa602142e76e1426da31293 change-id: 20250307-ptr-as-ptr-21b1867fc4d4 Best regards, -- Tamir Duberstein <tamird(a)gmail.com>

7 months, 1 week

2
8
0 0

[PATCH v2 0/2] selftests: livepatch: test if ftrace can trace a livepatched function

by Filipe Xavier

This patchset add ftrace helpers functions and add a new test makes sure that ftrace can trace a function that was introduced by a livepatch. Signed-off-by: Filipe Xavier <felipeaggger(a)gmail.com> Suggested-by: Marcos Paulo de Souza <mpdesouza(a)suse.com> Reviewed-by: Marcos Paulo de Souza <mpdesouza(a)suse.com> --- Changes in v2: - functions.sh: change check traced function to accept a list of functions. - Link to v1: https://lore.kernel.org/r/20250306-ftrace-sftest-livepatch-v1-0-a6f1dfc30e1… --- Filipe Xavier (2): selftests: livepatch: add new ftrace helpers functions selftests: livepatch: test if ftrace can trace a livepatched function tools/testing/selftests/livepatch/functions.sh | 49 ++++++++++++++++++++++++ tools/testing/selftests/livepatch/test-ftrace.sh | 34 ++++++++++++++++ 2 files changed, 83 insertions(+) --- base-commit: 848e076317446f9c663771ddec142d7c2eb4cb43 change-id: 20250306-ftrace-sftest-livepatch-60d9dc472235 Best regards, -- Filipe Xavier <felipeaggger(a)gmail.com>

7 months, 1 week

3
7
0 0

[PATCH net-next v7 0/9] Device memory TCP TX

by Mina Almasry

v7: https://lore.kernel.org/netdev/20250227041209.2031104-1-almasrymina@google.… === Changelog: - Check the dmabuf net_iov binding belongs to the device the TX is going out on. (Jakub) - Provide detailed inspection of callsites of __skb_frag_ref/skb_page_unref in patch 2's changelog (Jakub) v6: https://lore.kernel.org/netdev/20250222191517.743530-1-almasrymina@google.c… === v6 has no major changes. Addressed a few issues from Paolo and David, and collected Acks from Stan. Thank you everyone for the review! Changes: - retain behavior to process MSG_FASTOPEN even if the provided cmsg is invalid (Paolo). - Rework the freeing of tx_vec slightly (it now has its own err label). (Paolo). - Squash the commit that makes dmabuf unbinding scheduled work into the same one which implements the TX path so we don't run into future errors on bisecting (Paolo). - Fix/add comments to explain how dmabuf binding refcounting works (David). v5: https://lore.kernel.org/netdev/20250220020914.895431-1-almasrymina@google.c… === v5 has no major changes; it clears up the relatively minor issues pointed out to in v4, and rebases the series on top of net-next to resolve the conflict with a patch that raced to the tree. It also collects the review tags from v4. Changes: - Rebase to net-next - Fix issues in selftest (Stan). - Address comments in the devmem and netmem driver docs (Stan and Bagas) - Fix zerocopy_fill_skb_from_devmem return error code (Stan). v4: https://lore.kernel.org/netdev/20250203223916.1064540-1-almasrymina@google.… === v4 mainly addresses the critical driver support issue surfaced in v3 by Paolo and Stan. Drivers aiming to support netmem_tx should make sure not to pass the netmem dma-addrs to the dma-mapping APIs, as these dma-addrs may come from dma-bufs. Additionally other feedback from v3 is addressed. Major changes: - Add helpers to handle netmem dma-addrs. Add GVE support for netmem_tx. - Fix binding->tx_vec not being freed on error paths during the tx binding. - Add a minimal devmem_tx test to devmem.py. - Clean up everything obsolete from the cover letter (Paolo). v3: https://patchwork.kernel.org/project/netdevbpf/list/?series=929401&state=* === Address minor comments from RFCv2 and fix a few build warnings and ynl-regen issues. No major changes. RFC v2: https://patchwork.kernel.org/project/netdevbpf/list/?series=920056&state=* ======= RFC v2 addresses much of the feedback from RFC v1. I plan on sending something close to this as net-next reopens, sending it slightly early to get feedback if any. Major changes: -------------- - much improved UAPI as suggested by Stan. We now interpret the iov_base of the passed in iov from userspace as the offset into the dmabuf to send from. This removes the need to set iov.iov_base = NULL which may be confusing to users, and enables us to send multiple iovs in the same sendmsg() call. ncdevmem and the docs show a sample use of that. - Removed the duplicate dmabuf iov_iter in binding->iov_iter. I think this is good improvment as it was confusing to keep track of 2 iterators for the same sendmsg, and mistracking both iterators caused a couple of bugs reported in the last iteration that are now resolved with this streamlining. - Improved test coverage in ncdevmem. Now multiple sendmsg() are tested, and sending multiple iovs in the same sendmsg() is tested. - Fixed issue where dmabuf unmapping was happening in invalid context (Stan). ==================================================================== The TX path had been dropped from the Device Memory TCP patch series post RFCv1 [1], to make that series slightly easier to review. This series rebases the implementation of the TX path on top of the net_iov/netmem framework agreed upon and merged. The motivation for the feature is thoroughly described in the docs & cover letter of the original proposal, so I don't repeat the lengthy descriptions here, but they are available in [1]. Full outline on usage of the TX path is detailed in the documentation included with this series. Test example is available via the kselftest included in the series as well. The series is relatively small, as the TX path for this feature largely piggybacks on the existing MSG_ZEROCOPY implementation. Patch Overview: --------------- 1. Documentation & tests to give high level overview of the feature being added. 1. Add netmem refcounting needed for the TX path. 2. Devmem TX netlink API. 3. Devmem TX net stack implementation. 4. Make dma-buf unbinding scheduled work to handle TX cases where it gets freed from contexts where we can't sleep. 5. Add devmem TX documentation. 6. Add scaffolding enabling driver support for netmem_tx. Add helpers, driver feature flag, and docs to enable drivers to declare netmem_tx support. 7. Guard netmem_tx against being enabled against drivers that don't support it. 8. Add devmem_tx selftests. Add TX path to ncdevmem and add a test to devmem.py. Testing: -------- Testing is very similar to devmem TCP RX path. The ncdevmem test used for the RX path is now augemented with client functionality to test TX path. * Test Setup: Kernel: net-next with this RFC and memory provider API cherry-picked locally. Hardware: Google Cloud A3 VMs. NIC: GVE with header split & RSS & flow steering support. Performance results are not included with this version, unfortunately. I'm having issues running the dma-buf exporter driver against the upstream kernel on my test setup. The issues are specific to that dma-buf exporter and do not affect this patch series. I plan to follow up this series with perf fixes if the tests point to issues once they're up and running. Special thanks to Stan who took a stab at rebasing the TX implementation on top of the netmem/net_iov framework merged. Parts of his proposal [2] that are reused as-is are forked off into their own patches to give full credit. [1] https://lore.kernel.org/netdev/20240909054318.1809580-1-almasrymina@google.… [2] https://lore.kernel.org/netdev/20240913150913.1280238-2-sdf@fomichev.me/T/#… Cc: sdf(a)fomichev.me Cc: asml.silence(a)gmail.com Cc: dw(a)davidwei.uk Cc: Jamal Hadi Salim <jhs(a)mojatatu.com> Cc: Victor Nogueira <victor(a)mojatatu.com> Cc: Pedro Tammela <pctammela(a)mojatatu.com> Cc: Samiullah Khawaja <skhawaja(a)google.com> Mina Almasry (8): netmem: add niov->type attribute to distinguish different net_iov types net: add get_netmem/put_netmem support net: devmem: Implement TX path net: add devmem TCP TX documentation net: enable driver support for netmem TX gve: add netmem TX support to GVE DQO-RDA mode net: check for driver support in netmem TX selftests: ncdevmem: Implement devmem TCP TX Stanislav Fomichev (1): net: devmem: TCP tx netlink api Documentation/netlink/specs/netdev.yaml | 12 + Documentation/networking/devmem.rst | 150 ++++++++- .../networking/net_cachelines/net_device.rst | 1 + Documentation/networking/netdev-features.rst | 5 + Documentation/networking/netmem.rst | 23 +- drivers/net/ethernet/google/gve/gve_main.c | 4 + drivers/net/ethernet/google/gve/gve_tx_dqo.c | 8 +- include/linux/netdevice.h | 2 + include/linux/skbuff.h | 17 +- include/linux/skbuff_ref.h | 4 +- include/net/netmem.h | 38 ++- include/net/sock.h | 1 + include/uapi/linux/netdev.h | 1 + net/core/datagram.c | 48 ++- net/core/dev.c | 33 ++ net/core/devmem.c | 118 ++++++- net/core/devmem.h | 83 ++++- net/core/netdev-genl-gen.c | 13 + net/core/netdev-genl-gen.h | 1 + net/core/netdev-genl.c | 73 ++++- net/core/skbuff.c | 48 ++- net/core/sock.c | 6 + net/ipv4/ip_output.c | 3 +- net/ipv4/tcp.c | 50 ++- net/ipv6/ip6_output.c | 3 +- net/vmw_vsock/virtio_transport_common.c | 5 +- tools/include/uapi/linux/netdev.h | 1 + .../selftests/drivers/net/hw/devmem.py | 26 +- .../selftests/drivers/net/hw/ncdevmem.c | 300 +++++++++++++++++- 29 files changed, 1002 insertions(+), 75 deletions(-) base-commit: 8ef890df4031121a94407c84659125cbccd3fdbe -- 2.49.0.rc0.332.g42c0ae87b1-goog

7 months, 1 week

5
21
0 0

[PATCH net-next] selftests: drv-net: rss_ctx: Don't assume indirection table is present

by Gal Pressman

The test_rss_context_dump() test assumes the indirection table is always supported, which is not true for all drivers, e.g., virtio_net when VIRTIO_NET_F_RSS is disabled. Skip the check if 'indir' is not present. Reviewed-by: Nimrod Oren <noren(a)nvidia.com> Signed-off-by: Gal Pressman <gal(a)nvidia.com> --- tools/testing/selftests/drivers/net/hw/rss_ctx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index d6e69d7d5e43..ca60ae325c22 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -392,7 +392,7 @@ def test_rss_context_dump(cfg): # Sanity-check the results for data in ctxs: - ksft_ne(set(data['indir']), {0}, "indir table is all zero") + ksft_ne(set(data.get('indir', [1])), {0}, "indir table is all zero") ksft_ne(set(data.get('hkey', [1])), {0}, "key is all zero") # More specific checks -- 2.40.1

7 months, 1 week

3
2
0 0

[PATCH net-next v1 1/2] net: pktgen: add strict buffer parsing index check

by Peter Seiderer

Add strict buffer parsing index check to avoid the following Smatch warning: net/core/pktgen.c:877 get_imix_entries() warn: check that incremented offset 'i' is capped Checking the buffer index i after every get_user/i++ step and returning with error code immediately avoids the current indirect (but correct) error handling. Reported-by: Dan Carpenter <dan.carpenter(a)linaro.org> Closes: https://lore.kernel.org/netdev/36cf3ee2-38b1-47e5-a42a-363efeb0ace3@stanley… Signed-off-by: Peter Seiderer <ps.report(a)gmx.net> --- net/core/pktgen.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index e850598db3e7..fe7fdefab994 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -856,6 +856,9 @@ static ssize_t get_imix_entries(const char __user *buffer, if (pkt_dev->n_imix_entries >= MAX_IMIX_ENTRIES) return -E2BIG; + if (i >= maxlen) + return -EINVAL; + max = min(10, maxlen - i); len = num_arg(&buffer[i], max, &size); if (len < 0) @@ -869,6 +872,8 @@ static ssize_t get_imix_entries(const char __user *buffer, if (c != ',') return -EINVAL; i++; + if (i >= maxlen) + return -EINVAL; if (size < 14 + 20 + 8) size = 14 + 20 + 8; @@ -911,6 +916,9 @@ static ssize_t get_labels(const char __user *buffer, if (n >= MAX_MPLS_LABELS) return -E2BIG; + if (i >= maxlen) + return -EINVAL; + max = min(8, maxlen - i); len = hex32_arg(&buffer[i], max, &tmp); if (len < 0) -- 2.48.1

7 months, 1 week

3
4
0 0

[PATCH v2 net-next 00/15] AccECN protocol patch series

by chia-yu.chang＠nokia-bell-labs.com

From: Chia-Yu Chang <chia-yu.chang(a)nokia-bell-labs.com> Hello, Plese find v2: v2 (18-03-2025) - Add one missing patch from previous AccECN protocol preparation patch series to this patch series The full patch series can be found in https://github.com/L4STeam/linux-net-next/commits/upstream_l4steam/ The Accurate ECN draft can be found in https://datatracker.ietf.org/doc/html/draft-ietf-tcpm-accurate-ecn-28 Best regards, Chia-Yu Chia-Yu Chang (1): tcp: accecn: AccECN option failure handling Ilpo Järvinen (14): tcp: fast path functions later tcp: AccECN core tcp: accecn: AccECN negotiation tcp: accecn: add AccECN rx byte counters tcp: accecn: AccECN needs to know delivered bytes tcp: allow embedding leftover into option padding tcp: sack option handling improvements tcp: accecn: AccECN option tcp: accecn: AccECN option send control tcp: accecn: AccECN option ceb/cep heuristic tcp: accecn: AccECN ACE field multi-wrap heuristic tcp: accecn: try to fit AccECN option with SACK tcp: try to avoid safer when ACKs are thinned gro: flushing when CWR is set negatively affects AccECN include/linux/tcp.h | 27 +- include/net/netns/ipv4.h | 2 + include/net/tcp.h | 198 +++++++++++-- include/uapi/linux/tcp.h | 7 + net/ipv4/syncookies.c | 3 + net/ipv4/sysctl_net_ipv4.c | 19 ++ net/ipv4/tcp.c | 26 +- net/ipv4/tcp_input.c | 591 +++++++++++++++++++++++++++++++++++-- net/ipv4/tcp_ipv4.c | 5 +- net/ipv4/tcp_minisocks.c | 92 +++++- net/ipv4/tcp_offload.c | 3 +- net/ipv4/tcp_output.c | 299 +++++++++++++++++-- net/ipv6/syncookies.c | 1 + net/ipv6/tcp_ipv6.c | 1 + 14 files changed, 1177 insertions(+), 97 deletions(-) -- 2.34.1

7 months, 1 week

4
18
0 0

[PATCH net-next] selftests: drv-net: rss_input_xfrm: Check test prerequisites before running

by Gal Pressman

Ensure the following prerequisites before executing the test: 1. 'socat' is installed on the remote host. 2. Python version supports socket.SO_INCOMING_CPU (available since v3.11). Skip the test if either prerequisite is not met. Reviewed-by: Nimrod Oren <noren(a)nvidia.com> Signed-off-by: Gal Pressman <gal(a)nvidia.com> --- .../testing/selftests/drivers/net/hw/rss_input_xfrm.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py index 53bb08cc29ec..58d74ba6c343 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py +++ b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py @@ -7,7 +7,7 @@ from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, cmd, fd_read_timeout from lib.py import NetDrvEpEnv from lib.py import EthtoolFamily, NetdevFamily from lib.py import KsftSkipEx, KsftFailEx -from lib.py import rand_port +from lib.py import rand_port, CmdExitFailure def traffic(cfg, local_port, remote_port, ipver): @@ -32,6 +32,14 @@ def test_rss_input_xfrm(cfg, ipver): if multiprocessing.cpu_count() < 2: raise KsftSkipEx("Need at least two CPUs to test symmetric RSS hash") + try: + cmd("hash socat", host=cfg.remote) + except CmdExitFailure: + raise KsftSkipEx("socat not installed on remote") + + if not hasattr(socket, "SO_INCOMING_CPU"): + raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11") + input_xfrm = cfg.ethnl.rss_get( {'header': {'dev-name': cfg.ifname}}).get('input_xfrm') -- 2.40.1

7 months, 1 week

3
3
0 0

[PATCH 08/10] kunit: add module description for backtrace suppression test

by Arnd Bergmann

From: Arnd Bergmann <arnd(a)arndb.de> The module was added but has no description: WARNING: modpost: missing MODULE_DESCRIPTION() in lib/kunit/backtrace-suppression-test.o Fixes: ebf1c420422a ("kunit: add test cases for backtrace warning suppression") Signed-off-by: Arnd Bergmann <arnd(a)arndb.de> --- lib/kunit/backtrace-suppression-test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/kunit/backtrace-suppression-test.c b/lib/kunit/backtrace-suppression-test.c index 8b4125af2481..1ceb30c736d7 100644 --- a/lib/kunit/backtrace-suppression-test.c +++ b/lib/kunit/backtrace-suppression-test.c @@ -102,3 +102,4 @@ static struct kunit_suite backtrace_suppression_test_suite = { kunit_test_suites(&backtrace_suppression_test_suite); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("KUnit test for suppressing warning tracebacks"); -- 2.39.5

7 months, 1 week

1
0
0 0

[PATCH net-next v11 00/10] tun: Introduce virtio-net hashing feature

by Akihiko Odaki

virtio-net have two usage of hashes: one is RSS and another is hash reporting. Conventionally the hash calculation was done by the VMM. However, computing the hash after the queue was chosen defeats the purpose of RSS. Another approach is to use eBPF steering program. This approach has another downside: it cannot report the calculated hash due to the restrictive nature of eBPF. Introduce the code to compute hashes to the kernel in order to overcome thse challenges. An alternative solution is to extend the eBPF steering program so that it will be able to report to the userspace, but it is based on context rewrites, which is in feature freeze. We can adopt kfuncs, but they will not be UAPIs. We opt to ioctl to align with other relevant UAPIs (KVM and vhost_net). The patches for QEMU to use this new feature was submitted as RFC and is available at: https://patchew.org/QEMU/20250313-hash-v4-0-c75c494b495e@daynix.com/ This work was presented at LPC 2024: https://lpc.events/event/18/contributions/1963/ V1 -> V2: Changed to introduce a new BPF program type. Signed-off-by: Akihiko Odaki <akihiko.odaki(a)daynix.com> --- Changes in v11: - Added the missing code to free vnet_hash in patch "tap: Introduce virtio-net hash feature". - Link to v10: https://lore.kernel.org/r/20250313-rss-v10-0-3185d73a9af0@daynix.com Changes in v10: - Split common code and TUN/TAP-specific code into separate patches. - Reverted a spurious style change in patch "tun: Introduce virtio-net hash feature". - Added a comment explaining disable_ipv6 in tests. - Used AF_PACKET for patch "selftest: tun: Add tests for virtio-net hashing". I also added the usage of FIXTURE_VARIANT() as the testing function now needs access to more variant-specific variables. - Corrected the message of patch "selftest: tun: Add tests for virtio-net hashing"; it mentioned validation of configuration but it is not scope of this patch. - Expanded the description of patch "selftest: tun: Add tests for virtio-net hashing". - Added patch "tun: Allow steering eBPF program to fall back". - Changed to handle TUNGETVNETHASHCAP before taking the rtnl lock. - Removed redundant tests for tun_vnet_ioctl(). - Added patch "selftest: tap: Add tests for virtio-net ioctls". - Added a design explanation of ioctls for extensibility and migration. - Removed a few branches in patch "vhost/net: Support VIRTIO_NET_F_HASH_REPORT". - Link to v9: https://lore.kernel.org/r/20250307-rss-v9-0-df76624025eb@daynix.com Changes in v9: - Added a missing return statement in patch "tun: Introduce virtio-net hash feature". - Link to v8: https://lore.kernel.org/r/20250306-rss-v8-0-7ab4f56ff423@daynix.com Changes in v8: - Disabled IPv6 to eliminate noises in tests. - Added a branch in tap to avoid unnecessary dissection when hash reporting is disabled. - Removed unnecessary rtnl_lock(). - Extracted code to handle new ioctls into separate functions to avoid adding extra NULL checks to the code handling other ioctls. - Introduced variable named "fd" to __tun_chr_ioctl(). - s/-/=/g in a patch message to avoid confusing Git. - Link to v7: https://lore.kernel.org/r/20250228-rss-v7-0-844205cbbdd6@daynix.com Changes in v7: - Ensured to set hash_report to VIRTIO_NET_HASH_REPORT_NONE for VHOST_NET_F_VIRTIO_NET_HDR. - s/4/sizeof(u32)/ in patch "virtio_net: Add functions for hashing". - Added tap_skb_cb type. - Rebased. - Link to v6: https://lore.kernel.org/r/20250109-rss-v6-0-b1c90ad708f6@daynix.com Changes in v6: - Extracted changes to fill vnet header holes into another series. - Squashed patches "skbuff: Introduce SKB_EXT_TUN_VNET_HASH", "tun: Introduce virtio-net hash reporting feature", and "tun: Introduce virtio-net RSS" into patch "tun: Introduce virtio-net hash feature". - Dropped the RFC tag. - Link to v5: https://lore.kernel.org/r/20241008-rss-v5-0-f3cf68df005d@daynix.com Changes in v5: - Fixed a compilation error with CONFIG_TUN_VNET_CROSS_LE. - Optimized the calculation of the hash value according to: https://git.dpdk.org/dpdk/commit/?id=3fb1ea032bd6ff8317af5dac9af901f1f324ca… - Added patch "tun: Unify vnet implementation". - Dropped patch "tap: Pad virtio header with zero". - Added patch "selftest: tun: Test vnet ioctls without device". - Reworked selftests to skip for older kernels. - Documented the case when the underlying device is deleted and packets have queue_mapping set by TC. - Reordered test harness arguments. - Added code to handle fragmented packets. - Link to v4: https://lore.kernel.org/r/20240924-rss-v4-0-84e932ec0e6c@daynix.com Changes in v4: - Moved tun_vnet_hash_ext to if_tun.h. - Renamed virtio_net_toeplitz() to virtio_net_toeplitz_calc(). - Replaced htons() with cpu_to_be16(). - Changed virtio_net_hash_rss() to return void. - Reordered variable declarations in virtio_net_hash_rss(). - Removed virtio_net_hdr_v1_hash_from_skb(). - Updated messages of "tap: Pad virtio header with zero" and "tun: Pad virtio header with zero". - Fixed vnet_hash allocation size. - Ensured to free vnet_hash when destructing tun_struct. - Link to v3: https://lore.kernel.org/r/20240915-rss-v3-0-c630015db082@daynix.com Changes in v3: - Reverted back to add ioctl. - Split patch "tun: Introduce virtio-net hashing feature" into "tun: Introduce virtio-net hash reporting feature" and "tun: Introduce virtio-net RSS". - Changed to reuse hash values computed for automq instead of performing RSS hashing when hash reporting is requested but RSS is not. - Extracted relevant data from struct tun_struct to keep it minimal. - Added kernel-doc. - Changed to allow calling TUNGETVNETHASHCAP before TUNSETIFF. - Initialized num_buffers with 1. - Added a test case for unclassified packets. - Fixed error handling in tests. - Changed tests to verify that the queue index will not overflow. - Rebased. - Link to v2: https://lore.kernel.org/r/20231015141644.260646-1-akihiko.odaki@daynix.com --- Akihiko Odaki (10): virtio_net: Add functions for hashing net: flow_dissector: Export flow_keys_dissector_symmetric tun: Allow steering eBPF program to fall back tun: Add common virtio-net hash feature code tun: Introduce virtio-net hash feature tap: Introduce virtio-net hash feature selftest: tun: Test vnet ioctls without device selftest: tun: Add tests for virtio-net hashing selftest: tap: Add tests for virtio-net ioctls vhost/net: Support VIRTIO_NET_F_HASH_REPORT Documentation/networking/tuntap.rst | 7 + drivers/net/Kconfig | 1 + drivers/net/ipvlan/ipvtap.c | 2 +- drivers/net/macvtap.c | 2 +- drivers/net/tap.c | 78 +++++- drivers/net/tun.c | 90 +++++-- drivers/net/tun_vnet.h | 155 ++++++++++- drivers/vhost/net.c | 68 ++--- include/linux/if_tap.h | 4 +- include/linux/skbuff.h | 3 + include/linux/virtio_net.h | 188 ++++++++++++++ include/net/flow_dissector.h | 1 + include/uapi/linux/if_tun.h | 82 ++++++ net/core/flow_dissector.c | 3 +- net/core/skbuff.c | 4 + tools/testing/selftests/net/Makefile | 2 +- tools/testing/selftests/net/tap.c | 97 ++++++- tools/testing/selftests/net/tun.c | 491 ++++++++++++++++++++++++++++++++++- 18 files changed, 1194 insertions(+), 84 deletions(-) --- base-commit: dd83757f6e686a2188997cb58b5975f744bb7786 change-id: 20240403-rss-e737d89efa77 prerequisite-change-id: 20241230-tun-66e10a49b0c7:v6 prerequisite-patch-id: 871dc5f146fb6b0e3ec8612971a8e8190472c0fb prerequisite-patch-id: 2797ed249d32590321f088373d4055ff3f430a0e prerequisite-patch-id: ea3370c72d4904e2f0536ec76ba5d26784c0cede prerequisite-patch-id: 837e4cf5d6b451424f9b1639455e83a260c4440d prerequisite-patch-id: ea701076f57819e844f5a35efe5cbc5712d3080d prerequisite-patch-id: 701646fb43ad04cc64dd2bf13c150ccbe6f828ce prerequisite-patch-id: 53176dae0c003f5b6c114d43f936cf7140d31bb5 prerequisite-change-id: 20250116-buffers-96e14bf023fc:v2 prerequisite-patch-id: 25fd4f99d4236a05a5ef16ab79f3e85ee57e21cc Best regards, -- Akihiko Odaki <akihiko.odaki(a)daynix.com>

7 months, 1 week

4
13
0 0

[PATCH v4 00/18] riscv: add SBI FWFT misaligned exception delegation support

by Clément Léger

The SBI Firmware Feature extension allows the S-mode to request some specific features (either hardware or software) to be enabled. This series uses this extension to request misaligned access exception delegation to S-mode in order to let the kernel handle it. It also adds support for the KVM FWFT SBI extension based on the misaligned access handling infrastructure. FWFT SBI extension is part of the SBI V3.0 specifications [1]. It can be tested using the qemu provided at [2] which contains the series from [3]. kvm-unit-tests [4] can be used inside kvm to tests the correct delegation of misaligned exceptions. Upstream OpenSBI can be used. Note: Since SBI V3.0 is not yet ratified, FWFT extension API is split between interface only and implementation, allowing to pick only the interface which do not have hard dependencies on SBI. The tests can be run using the included kselftest: $ qemu-system-riscv64 \ -cpu rv64,trap-misaligned-access=true,v=true \ -M virt \ -m 1024M \ -bios fw_dynamic.bin \ -kernel Image ... # ./misaligned TAP version 13 1..23 # Starting 23 tests from 1 test cases. # RUN global.gp_load_lh ... # OK global.gp_load_lh ok 1 global.gp_load_lh # RUN global.gp_load_lhu ... # OK global.gp_load_lhu ok 2 global.gp_load_lhu # RUN global.gp_load_lw ... # OK global.gp_load_lw ok 3 global.gp_load_lw # RUN global.gp_load_lwu ... # OK global.gp_load_lwu ok 4 global.gp_load_lwu # RUN global.gp_load_ld ... # OK global.gp_load_ld ok 5 global.gp_load_ld # RUN global.gp_load_c_lw ... # OK global.gp_load_c_lw ok 6 global.gp_load_c_lw # RUN global.gp_load_c_ld ... # OK global.gp_load_c_ld ok 7 global.gp_load_c_ld # RUN global.gp_load_c_ldsp ... # OK global.gp_load_c_ldsp ok 8 global.gp_load_c_ldsp # RUN global.gp_load_sh ... # OK global.gp_load_sh ok 9 global.gp_load_sh # RUN global.gp_load_sw ... # OK global.gp_load_sw ok 10 global.gp_load_sw # RUN global.gp_load_sd ... # OK global.gp_load_sd ok 11 global.gp_load_sd # RUN global.gp_load_c_sw ... # OK global.gp_load_c_sw ok 12 global.gp_load_c_sw # RUN global.gp_load_c_sd ... # OK global.gp_load_c_sd ok 13 global.gp_load_c_sd # RUN global.gp_load_c_sdsp ... # OK global.gp_load_c_sdsp ok 14 global.gp_load_c_sdsp # RUN global.fpu_load_flw ... # OK global.fpu_load_flw ok 15 global.fpu_load_flw # RUN global.fpu_load_fld ... # OK global.fpu_load_fld ok 16 global.fpu_load_fld # RUN global.fpu_load_c_fld ... # OK global.fpu_load_c_fld ok 17 global.fpu_load_c_fld # RUN global.fpu_load_c_fldsp ... # OK global.fpu_load_c_fldsp ok 18 global.fpu_load_c_fldsp # RUN global.fpu_store_fsw ... # OK global.fpu_store_fsw ok 19 global.fpu_store_fsw # RUN global.fpu_store_fsd ... # OK global.fpu_store_fsd ok 20 global.fpu_store_fsd # RUN global.fpu_store_c_fsd ... # OK global.fpu_store_c_fsd ok 21 global.fpu_store_c_fsd # RUN global.fpu_store_c_fsdsp ... # OK global.fpu_store_c_fsdsp ok 22 global.fpu_store_c_fsdsp # RUN global.gen_sigbus ... [12797.988647] misaligned[618]: unhandled signal 7 code 0x1 at 0x0000000000014dc0 in misaligned[4dc0,10000+76000] [12797.988990] CPU: 0 UID: 0 PID: 618 Comm: misaligned Not tainted 6.13.0-rc6-00008-g4ec4468967c9-dirty #51 [12797.989169] Hardware name: riscv-virtio,qemu (DT) [12797.989264] epc : 0000000000014dc0 ra : 0000000000014d00 sp : 00007fffe165d100 [12797.989407] gp : 000000000008f6e8 tp : 0000000000095760 t0 : 0000000000000008 [12797.989544] t1 : 00000000000965d8 t2 : 000000000008e830 s0 : 00007fffe165d160 [12797.989692] s1 : 000000000000001a a0 : 0000000000000000 a1 : 0000000000000002 [12797.989831] a2 : 0000000000000000 a3 : 0000000000000000 a4 : ffffffffdeadbeef [12797.989964] a5 : 000000000008ef61 a6 : 626769735f6e0000 a7 : fffffffffffff000 [12797.990094] s2 : 0000000000000001 s3 : 00007fffe165d838 s4 : 00007fffe165d848 [12797.990238] s5 : 000000000000001a s6 : 0000000000010442 s7 : 0000000000010200 [12797.990391] s8 : 000000000000003a s9 : 0000000000094508 s10: 0000000000000000 [12797.990526] s11: 0000555567460668 t3 : 00007fffe165d070 t4 : 00000000000965d0 [12797.990656] t5 : fefefefefefefeff t6 : 0000000000000073 [12797.990756] status: 0000000200004020 badaddr: 000000000008ef61 cause: 0000000000000006 [12797.990911] Code: 8793 8791 3423 fcf4 3783 fc84 c737 dead 0713 eef7 (c398) 0001 # OK global.gen_sigbus ok 23 global.gen_sigbus # PASSED: 23 / 23 tests passed. # Totals: pass:23 fail:0 xfail:0 xpass:0 skip:0 error:0 With kvm-tools: # lkvm run -k sbi.flat -m 128 Info: # lkvm run -k sbi.flat -m 128 -c 1 --name guest-97 Info: Removed ghost socket file "/root/.lkvm//guest-97.sock". ########################################################################## # kvm-unit-tests ########################################################################## ... [test messages elided] PASS: sbi: fwft: FWFT extension probing no error PASS: sbi: fwft: get/set reserved feature 0x6 error == SBI_ERR_DENIED PASS: sbi: fwft: get/set reserved feature 0x3fffffff error == SBI_ERR_DENIED PASS: sbi: fwft: get/set reserved feature 0x80000000 error == SBI_ERR_DENIED PASS: sbi: fwft: get/set reserved feature 0xbfffffff error == SBI_ERR_DENIED PASS: sbi: fwft: misaligned_deleg: Get misaligned deleg feature no error PASS: sbi: fwft: misaligned_deleg: Set misaligned deleg feature invalid value error PASS: sbi: fwft: misaligned_deleg: Set misaligned deleg feature invalid value error PASS: sbi: fwft: misaligned_deleg: Set misaligned deleg feature value no error PASS: sbi: fwft: misaligned_deleg: Set misaligned deleg feature value 0 PASS: sbi: fwft: misaligned_deleg: Set misaligned deleg feature value no error PASS: sbi: fwft: misaligned_deleg: Set misaligned deleg feature value 1 PASS: sbi: fwft: misaligned_deleg: Verify misaligned load exception trap in supervisor SUMMARY: 50 tests, 2 unexpected failures, 12 skipped This series is available at [6]. Link: https://github.com/riscv-non-isa/riscv-sbi-doc/releases/download/vv3.0-rc2/… [1] Link: https://github.com/rivosinc/qemu/tree/dev/cleger/misaligned [2] Link: https://lore.kernel.org/all/20241211211933.198792-3-fkonrad@amd.com/T/ [3] Link: https://github.com/clementleger/kvm-unit-tests/tree/dev/cleger/fwft_v1 [4] Link: https://github.com/clementleger/unaligned_test [5] Link: https://github.com/rivosinc/linux/tree/dev/cleger/fwft_v1 [6] --- V4: - Check SBI version 3.0 instead of 2.0 for FWFT presence - Use long for kvm_sbi_fwft operation return value - Init KVM sbi extension even if default_disabled - Remove revert_on_fail parameter for sbi_fwft_feature_set(). - Fix comments for sbi_fwft_set/get() - Only handle local features (there are no globals yet in the spec) - Add new SBI errors to sbi_err_map_linux_errno() V3: - Added comment about kvm sbi fwft supported/set/get callback requirements - Move struct kvm_sbi_fwft_feature in kvm_sbi_fwft.c - Add a FWFT interface V2: - Added Kselftest for misaligned testing - Added get_user() usage instead of __get_user() - Reenable interrupt when possible in misaligned access handling - Document that riscv supports unaligned-traps - Fix KVM extension state when an init function is present - Rework SBI misaligned accesses trap delegation code - Added support for CPU hotplugging - Added KVM SBI reset callback - Added reset for KVM SBI FWFT lock - Return SBI_ERR_DENIED_LOCKED when LOCK flag is set Clément Léger (18): riscv: add Firmware Feature (FWFT) SBI extensions definitions riscv: sbi: add new SBI error mappings riscv: sbi: add FWFT extension interface riscv: sbi: add SBI FWFT extension calls riscv: misaligned: request misaligned exception from SBI riscv: misaligned: use on_each_cpu() for scalar misaligned access probing riscv: misaligned: use correct CONFIG_ ifdef for misaligned_access_speed riscv: misaligned: move emulated access uniformity check in a function riscv: misaligned: add a function to check misalign trap delegability riscv: misaligned: factorize trap handling riscv: misaligned: enable IRQs while handling misaligned accesses riscv: misaligned: use get_user() instead of __get_user() Documentation/sysctl: add riscv to unaligned-trap supported archs selftests: riscv: add misaligned access testing RISC-V: KVM: add SBI extension init()/deinit() functions RISC-V: KVM: add SBI extension reset callback RISC-V: KVM: add support for FWFT SBI extension RISC-V: KVM: add support for SBI_FWFT_MISALIGNED_DELEG Documentation/admin-guide/sysctl/kernel.rst | 4 +- arch/riscv/include/asm/cpufeature.h | 8 +- arch/riscv/include/asm/kvm_host.h | 5 +- arch/riscv/include/asm/kvm_vcpu_sbi.h | 12 + arch/riscv/include/asm/kvm_vcpu_sbi_fwft.h | 29 ++ arch/riscv/include/asm/sbi.h | 62 +++++ arch/riscv/include/uapi/asm/kvm.h | 1 + arch/riscv/kernel/sbi.c | 95 +++++++ arch/riscv/kernel/traps.c | 57 ++-- arch/riscv/kernel/traps_misaligned.c | 118 +++++++- arch/riscv/kernel/unaligned_access_speed.c | 11 +- arch/riscv/kvm/Makefile | 1 + arch/riscv/kvm/vcpu.c | 7 +- arch/riscv/kvm/vcpu_sbi.c | 54 ++++ arch/riscv/kvm/vcpu_sbi_fwft.c | 252 +++++++++++++++++ arch/riscv/kvm/vcpu_sbi_sta.c | 3 +- .../selftests/riscv/misaligned/.gitignore | 1 + .../selftests/riscv/misaligned/Makefile | 12 + .../selftests/riscv/misaligned/common.S | 33 +++ .../testing/selftests/riscv/misaligned/fpu.S | 180 +++++++++++++ tools/testing/selftests/riscv/misaligned/gp.S | 103 +++++++ .../selftests/riscv/misaligned/misaligned.c | 254 ++++++++++++++++++ 22 files changed, 1255 insertions(+), 47 deletions(-) create mode 100644 arch/riscv/include/asm/kvm_vcpu_sbi_fwft.h create mode 100644 arch/riscv/kvm/vcpu_sbi_fwft.c create mode 100644 tools/testing/selftests/riscv/misaligned/.gitignore create mode 100644 tools/testing/selftests/riscv/misaligned/Makefile create mode 100644 tools/testing/selftests/riscv/misaligned/common.S create mode 100644 tools/testing/selftests/riscv/misaligned/fpu.S create mode 100644 tools/testing/selftests/riscv/misaligned/gp.S create mode 100644 tools/testing/selftests/riscv/misaligned/misaligned.c -- 2.47.2

7 months, 1 week

2
27
0 0

[PATCH v5 08/17] rust: kunit: refactor to use `&raw [const|mut]`

by Antonio Hickey

Replacing all occurrences of `addr_of!(place)` with `&raw const place`. This will allow us to reduce macro complexity, and improve consistency with existing reference syntax as `&raw const` is similar to `&` making it fit more naturally with other existing code. Suggested-by: Benno Lossin <benno.lossin(a)proton.me> Link: https://github.com/Rust-for-Linux/linux/issues/1148 Signed-off-by: Antonio Hickey <contact(a)antoniohickey.com> --- rust/kernel/kunit.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/kernel/kunit.rs b/rust/kernel/kunit.rs index 824da0e9738a..a17ef3b2e860 100644 --- a/rust/kernel/kunit.rs +++ b/rust/kernel/kunit.rs @@ -128,9 +128,9 @@ unsafe impl Sync for UnaryAssert {} unsafe { $crate::bindings::__kunit_do_failed_assertion( kunit_test, - core::ptr::addr_of!(LOCATION.0), + &raw const LOCATION.0, $crate::bindings::kunit_assert_type_KUNIT_ASSERTION, - core::ptr::addr_of!(ASSERTION.0.assert), + &raw const ASSERTION.0.assert, Some($crate::bindings::kunit_unary_assert_format), core::ptr::null(), );

7 months, 1 week

3
2
0 0

[PATCHv4 RESEND net-next 0/2] selftests: wireguards: use nftables for testing

by Hangbin Liu

This patch set convert iptables to nftables for wireguard testing, as iptables is deparated and nftables is the default framework of most releases. v3: drop iptables directly (Jason A. Donenfeld) Also convert to using nft for qemu testing (Jason A. Donenfeld) v2: use one nft table for testing (Phil Sutter) Hangbin Liu (2): selftests: wireguards: convert iptables to nft selftests: wireguard: update to using nft for qemu test tools/testing/selftests/wireguard/netns.sh | 29 +++++++++----- .../testing/selftests/wireguard/qemu/Makefile | 40 ++++++++++++++----- .../selftests/wireguard/qemu/kernel.config | 7 ++-- 3 files changed, 53 insertions(+), 23 deletions(-) -- 2.46.0

7 months, 1 week

3
13
0 0

[PATCH v2] tools/nolibc: Add support for SPARC

by Thomas Weißschuh

Add support for 32bit and 64bit SPARC to nolibc. Signed-off-by: Thomas Weißschuh <linux(a)weissschuh.net> Acked-by: Willy Tarreau <w(a)1wt.eu> Tested-by: Sebastian Andrzej Siewior <sebastian(a)breakpoint.cc> # UltraSparc T4 (Niagara4) --- This is only tested on QEMU. Any tests on real hardware would be very welcome. --- Changes in v2: - Pick up tags - Fix comment about syscall return register - Reword comment about stackpointer offsets - Link to v1: https://lore.kernel.org/r/20250316-nolibc-sparc-v1-1-2e97022d5e2c@weissschu… --- tools/include/nolibc/arch-sparc.h | 191 ++++++++++++++++++++++++++++ tools/include/nolibc/arch.h | 2 + tools/testing/selftests/nolibc/Makefile | 11 ++ tools/testing/selftests/nolibc/run-tests.sh | 2 + 4 files changed, 206 insertions(+) diff --git a/tools/include/nolibc/arch-sparc.h b/tools/include/nolibc/arch-sparc.h new file mode 100644 index 0000000000000000000000000000000000000000..1435172f3dfe33566cd8c61d726455702c804f6a --- /dev/null +++ b/tools/include/nolibc/arch-sparc.h @@ -0,0 +1,191 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * SPARC (32bit and 64bit) specific definitions for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh <linux(a)weissschuh.net> + */ + +#ifndef _NOLIBC_ARCH_SPARC_H +#define _NOLIBC_ARCH_SPARC_H + +#include <linux/unistd.h> + +#include "compiler.h" +#include "crt.h" + +/* + * Syscalls for SPARC: + * - registers are native word size + * - syscall number is passed in g1 + * - arguments are in o0-o5 + * - the system call is performed by calling a trap instruction + * - syscall return value is in o0 + * - syscall error flag is in the carry bit of the processor status register + */ + +#ifdef __arch64__ + +#define _NOLIBC_SYSCALL "t 0x6d\n" \ + "bcs,a %%xcc, 1f\n" \ + "sub %%g0, %%o0, %%o0\n" \ + "1:\n" + +#else + +#define _NOLIBC_SYSCALL "t 0x10\n" \ + "bcs,a 1f\n" \ + "sub %%g0, %%o0, %%o0\n" \ + "1:\n" + +#endif /* __arch64__ */ + +#define my_syscall0(num) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0"); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + register long _arg3 __asm__ ("o2") = (long)(arg3); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + register long _arg3 __asm__ ("o2") = (long)(arg3); \ + register long _arg4 __asm__ ("o3") = (long)(arg4); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + register long _arg3 __asm__ ("o2") = (long)(arg3); \ + register long _arg4 __asm__ ("o3") = (long)(arg4); \ + register long _arg5 __asm__ ("o4") = (long)(arg5); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + register long _arg3 __asm__ ("o2") = (long)(arg3); \ + register long _arg4 __asm__ ("o3") = (long)(arg4); \ + register long _arg5 __asm__ ("o4") = (long)(arg5); \ + register long _arg6 __asm__ ("o5") = (long)(arg6); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +/* startup code */ +void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) +{ + __asm__ volatile ( + /* + * Save argc pointer to o0, as arg1 of _start_c. + * Account for the window save area, which is 16 registers wide. + */ +#ifdef __arch64__ + "add %sp, 128 + 2047, %o0\n" /* on sparc64 / v9 the stack is offset by 2047 */ +#else + "add %sp, 64, %o0\n" +#endif + "b,a _start_c\n" /* transfer to c runtime */ + ); + __nolibc_entrypoint_epilogue(); +} + +static pid_t getpid(void); + +static __attribute__((unused)) +pid_t sys_fork(void) +{ + pid_t parent, ret; + + parent = getpid(); + ret = my_syscall0(__NR_fork); + + /* The syscall returns the parent pid in the child instead of 0 */ + if (ret == parent) + return 0; + else + return ret; +} +#define sys_fork sys_fork + +#endif /* _NOLIBC_ARCH_SPARC_H */ diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h index 8a2c143c0fba288147e5a7bf9db38ffb08367616..b8c1da9a88d1593d5a97f60909ede5d0c17699eb 100644 --- a/tools/include/nolibc/arch.h +++ b/tools/include/nolibc/arch.h @@ -33,6 +33,8 @@ #include "arch-s390.h" #elif defined(__loongarch__) #include "arch-loongarch.h" +#elif defined(__sparc__) +#include "arch-sparc.h" #else #error Unsupported Architecture #endif diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index 58bcbbd029bc3ad9ccac968191b703ccf5df0717..5060e189dc842d761dd13d70b8afdb2ff3390bc5 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -56,6 +56,8 @@ ARCH_mips32be = mips ARCH_riscv32 = riscv ARCH_riscv64 = riscv ARCH_s390x = s390 +ARCH_sparc32 = sparc +ARCH_sparc64 = sparc ARCH := $(or $(ARCH_$(XARCH)),$(XARCH)) # kernel image names by architecture @@ -76,6 +78,8 @@ IMAGE_riscv64 = arch/riscv/boot/Image IMAGE_s390x = arch/s390/boot/bzImage IMAGE_s390 = arch/s390/boot/bzImage IMAGE_loongarch = arch/loongarch/boot/vmlinuz.efi +IMAGE_sparc32 = arch/sparc/boot/image +IMAGE_sparc64 = arch/sparc/boot/image IMAGE = $(objtree)/$(IMAGE_$(XARCH)) IMAGE_NAME = $(notdir $(IMAGE)) @@ -97,6 +101,8 @@ DEFCONFIG_riscv64 = defconfig DEFCONFIG_s390x = defconfig DEFCONFIG_s390 = defconfig compat.config DEFCONFIG_loongarch = defconfig +DEFCONFIG_sparc32 = sparc32_defconfig +DEFCONFIG_sparc64 = sparc64_defconfig DEFCONFIG = $(DEFCONFIG_$(XARCH)) EXTRACONFIG = $(EXTRACONFIG_$(XARCH)) @@ -122,6 +128,8 @@ QEMU_ARCH_riscv64 = riscv64 QEMU_ARCH_s390x = s390x QEMU_ARCH_s390 = s390x QEMU_ARCH_loongarch = loongarch64 +QEMU_ARCH_sparc32 = sparc +QEMU_ARCH_sparc64 = sparc64 QEMU_ARCH = $(QEMU_ARCH_$(XARCH)) QEMU_ARCH_USER_ppc64le = ppc64le @@ -152,6 +160,8 @@ QEMU_ARGS_riscv64 = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_T QEMU_ARGS_s390x = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_s390 = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_loongarch = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_sparc32 = -M SS-5 -m 256M -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_sparc64 = -M sun4u -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS = -m 1G $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_BIOS) $(QEMU_ARGS_EXTRA) # OUTPUT is only set when run from the main makefile, otherwise @@ -174,6 +184,7 @@ CFLAGS_s390x = -m64 CFLAGS_s390 = -m31 CFLAGS_mips32le = -EL -mabi=32 -fPIC CFLAGS_mips32be = -EB -mabi=32 +CFLAGS_sparc32 = $(call cc-option,-m32) CFLAGS_STACKPROTECTOR ?= $(call cc-option,-mstack-protector-guard=global $(call cc-option,-fstack-protector-all)) CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wextra \ $(call cc-option,-fno-stack-protector) $(call cc-option,-Wmissing-prototypes) \ diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh index 0299a0912d4049dd12217f9835b81d231e1d2bfd..040956a9f5b8dda3e78abc0d4b6073f4fcd9e3ee 100755 --- a/tools/testing/selftests/nolibc/run-tests.sh +++ b/tools/testing/selftests/nolibc/run-tests.sh @@ -25,6 +25,7 @@ all_archs=( riscv32 riscv64 s390x s390 loongarch + sparc32 sparc64 ) archs="${all_archs[@]}" @@ -111,6 +112,7 @@ crosstool_arch() { loongarch) echo loongarch64;; mips*) echo mips;; s390*) echo s390;; + sparc*) echo sparc64;; *) echo "$1";; esac } --- base-commit: bceb73904c855c78402dca94c82915f078f259dd change-id: 20250226-nolibc-sparc-abf4775dc813 Best regards, -- Thomas Weißschuh <linux(a)weissschuh.net>

7 months, 1 week

2
1
0 0

[RFC -next 00/10] Add ZC notifications to splice and sendfile

by Joe Damato

Greetings: Welcome to the RFC. Currently, when a user app uses sendfile the user app has no way to know if the bytes were transmit; sendfile simply returns, but it is possible that a slow client on the other side may take time to receive and ACK the bytes. In the meantime, the user app which called sendfile has no way to know whether it can overwrite the data on disk that it just sendfile'd. One way to fix this is to add zerocopy notifications to sendfile similar to how MSG_ZEROCOPY works with sendmsg. This is possible thanks to the extensive work done by Pavel [1]. To support this, two important user ABI changes are proposed: - A new splice flag, SPLICE_F_ZC, which allows users to signal that splice should generate zerocopy notifications if possible. - A new system call, sendfile2, which is similar to sendfile64 except that it takes an additional argument, flags, which allows the user to specify either a "regular" sendfile or a sendfile with zerocopy notifications enabled. In either case, user apps can read notifications from the error queue (like they would with MSG_ZEROCOPY) to determine when their call to sendfile has completed. I tested this RFC using the selftest modified in the last patch and also by using the selftest between two different physical hosts: # server ./msg_zerocopy -4 -i eth0 -t 2 -v -r tcp # client (does the sendfiling) dd if=/dev/zero of=sendfile_data bs=1M count=8 ./msg_zerocopy -4 -i eth0 -D $SERVER_IP -v -l 1 -t 2 -z -f sendfile_data tcp I would love to get high level feedback from folks on a few things: - Is this functionality, at a high level, something that would be desirable / useful? I think so, but I'm of course I am biased ;) - Is this approach generally headed in the right direction? Are the proposed user ABI changes reasonable? If the above two points are generally agreed upon then I'd welcome feedback on the patches themselves :) This is kind of a net thing, but also kind of a splice thing so hope I am sending this to right places to get appropriate feedback. I based my code on the vfs/for-next tree, but am happy to rebase on another tree if desired. The cc-list got a little out of control, so I manually trimmed it down quite a bit; sorry if I missed anyone I should have CC'd in the process. Thanks, Joe [1]: https://lore.kernel.org/netdev/cover.1657643355.git.asml.silence@gmail.com/ Joe Damato (10): splice: Add ubuf_info to prepare for ZC splice: Add helper that passes through splice_desc splice: Factor splice_socket into a helper splice: Add SPLICE_F_ZC and attach ubuf fs: Add splice_write_sd to file operations fs: Extend do_sendfile to take a flags argument fs: Add sendfile2 which accepts a flags argument fs: Add sendfile flags for sendfile2 fs: Add sendfile2 syscall selftests: Add sendfile zerocopy notification test arch/alpha/kernel/syscalls/syscall.tbl | 1 + arch/arm/tools/syscall.tbl | 1 + arch/arm64/tools/syscall_32.tbl | 1 + arch/m68k/kernel/syscalls/syscall.tbl | 1 + arch/microblaze/kernel/syscalls/syscall.tbl | 1 + arch/mips/kernel/syscalls/syscall_n32.tbl | 1 + arch/mips/kernel/syscalls/syscall_n64.tbl | 1 + arch/mips/kernel/syscalls/syscall_o32.tbl | 1 + arch/parisc/kernel/syscalls/syscall.tbl | 1 + arch/powerpc/kernel/syscalls/syscall.tbl | 1 + arch/s390/kernel/syscalls/syscall.tbl | 1 + arch/sh/kernel/syscalls/syscall.tbl | 1 + arch/sparc/kernel/syscalls/syscall.tbl | 1 + arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + arch/xtensa/kernel/syscalls/syscall.tbl | 1 + fs/read_write.c | 40 +++++++--- fs/splice.c | 87 +++++++++++++++++---- include/linux/fs.h | 2 + include/linux/sendfile.h | 10 +++ include/linux/splice.h | 7 +- include/linux/syscalls.h | 2 + include/uapi/asm-generic/unistd.h | 4 +- net/socket.c | 1 + scripts/syscall.tbl | 1 + tools/testing/selftests/net/msg_zerocopy.c | 54 ++++++++++++- tools/testing/selftests/net/msg_zerocopy.sh | 5 ++ 27 files changed, 200 insertions(+), 29 deletions(-) create mode 100644 include/linux/sendfile.h base-commit: 2e72b1e0aac24a12f3bf3eec620efaca7ab7d4de -- 2.43.0

7 months, 1 week

5
37
0 0

[PATCH v9 net-next 0/3] DUALPI2 patch

by chia-yu.chang＠nokia-bell-labs.com

From: Chia-Yu Chang <chia-yu.chang(a)nokia-bell-labs.com> Hello, Please find DUALPI2 patch v9. v9 - Fix mem_usage error in previous version - Add min_qlen_step to dualpi2 attribute as the minimum queue length in number of packets in the L-queue to start step threshold marking. In previous versions, this value was fixed to 2, so only when the queue length of the L queue was greater or equal to 2 packets step threshold was applied to mark packets in the L-queue. This will cause larger queuing delays for L4S traffic at low rates (<20Mbps). So we parameterize it and change the default value to 0. Comparsion of tcp_1down run 'HTB 20Mbit + DUALPI2 + 10ms base delay' Old versions: avg median # data pts Ping (ms) ICMP : 11.55 11.70 ms 350 TCP upload avg : 18.96 N/A Mbits/s 350 TCP upload sum : 18.96 N/A Mbits/s 350 New version (v9): avg median # data pts Ping (ms) ICMP : 10.81 10.70 ms 350 TCP upload avg : 18.91 N/A Mbits/s 350 TCP upload sum : 18.91 N/A Mbits/s 350 Comparsion of tcp_1down run 'HTB 10Mbit + DUALPI2 + 10ms base delay' Old versions: avg median # data pts Ping (ms) ICMP : 12.61 12.80 ms 350 TCP upload avg : 9.48 N/A Mbits/s 350 TCP upload sum : 9.48 N/A Mbits/s 350 New version (v9): avg median # data pts Ping (ms) ICMP : 11.06 10.80 ms 350 TCP upload avg : 9.43 N/A Mbits/s 350 TCP upload sum : 9.43 N/A Mbits/s 350 Comparsion of tcp_1down run 'HTB 10Mbit + DUALPI2 + 10ms base delay' Old versions: avg median # data pts Ping (ms) ICMP : 40.86 37.45 ms 350 TCP upload avg : 0.88 N/A Mbits/s 350 TCP upload sum : 0.88 N/A Mbits/s 350 TCP upload::1 : 0.88 0.97 Mbits/s 350 New version (v9): avg median # data pts Ping (ms) ICMP : 11.07 10.40 ms 350 TCP upload avg : 0.55 N/A Mbits/s 350 TCP upload sum : 0.55 N/A Mbits/s 350 TCP upload::1 : 0.55 0.59 Mbits/s 350 v8 - Fix warning messages in v7 v7 - Separate into 3 patches to avoid mixing changes of documentation, selftest, and code. (Cong Wang <xiyou.wangcong(a)gmail.com>) v6 - Add modprobe for dulapi2 in tc-testing script tc-testing/tdc.sh (Jakub Kicinski <kuba(a)kernel.org>) - Update test cases in dualpi2.json - Update commit message v5 - A comparison was done between MQ + DUALPI2, MQ + FQ_PIE, MQ + FQ_CODEL: Unshaped 1gigE with 4 download streams test: - Summary of tcp_4down run 'MQ + FQ_CODEL': avg median # data pts Ping (ms) ICMP : 1.19 1.34 ms 349 TCP download avg : 235.42 N/A Mbits/s 349 TCP download sum : 941.68 N/A Mbits/s 349 TCP download::1 : 235.19 235.39 Mbits/s 349 TCP download::2 : 235.03 235.35 Mbits/s 349 TCP download::3 : 236.89 235.44 Mbits/s 349 TCP download::4 : 234.57 235.19 Mbits/s 349 - Summary of tcp_4down run 'MQ + FQ_PIE' avg median # data pts Ping (ms) ICMP : 1.21 1.37 ms 350 TCP download avg : 235.42 N/A Mbits/s 350 TCP download sum : 941.61 N/A Mbits/s 350 TCP download::1 : 232.54 233.13 Mbits/s 350 TCP download::2 : 232.52 232.80 Mbits/s 350 TCP download::3 : 233.14 233.78 Mbits/s 350 TCP download::4 : 243.41 241.48 Mbits/s 350 - Summary of tcp_4down run 'MQ + DUALPI2' avg median # data pts Ping (ms) ICMP : 1.19 1.34 ms 349 TCP download avg : 235.42 N/A Mbits/s 349 TCP download sum : 941.68 N/A Mbits/s 349 TCP download::1 : 235.19 235.39 Mbits/s 349 TCP download::2 : 235.03 235.35 Mbits/s 349 TCP download::3 : 236.89 235.44 Mbits/s 349 TCP download::4 : 234.57 235.19 Mbits/s 349 Unshaped 1gigE with 128 download streams test: - Summary of tcp_128down run 'MQ + FQ_CODEL': avg median # data pts Ping (ms) ICMP : 1.88 1.86 ms 350 TCP download avg : 7.39 N/A Mbits/s 350 TCP download sum : 946.47 N/A Mbits/s 350 - Summary of tcp_128down run 'MQ + FQ_PIE': avg median # data pts Ping (ms) ICMP : 1.88 1.86 ms 350 TCP download avg : 7.39 N/A Mbits/s 350 TCP download sum : 946.47 N/A Mbits/s 350 - Summary of tcp_128down run 'MQ + DUALPI2': avg median # data pts Ping (ms) ICMP : 1.88 1.86 ms 350 TCP download avg : 7.39 N/A Mbits/s 350 TCP download sum : 946.47 N/A Mbits/s 350 Unshaped 10gigE with 4 download streams test: - Summary of tcp_4down run 'MQ + FQ_CODEL': avg median # data pts Ping (ms) ICMP : 0.22 0.23 ms 350 TCP download avg : 2354.08 N/A Mbits/s 350 TCP download sum : 9416.31 N/A Mbits/s 350 TCP download::1 : 2353.65 2352.81 Mbits/s 350 TCP download::2 : 2354.54 2354.21 Mbits/s 350 TCP download::3 : 2353.56 2353.78 Mbits/s 350 TCP download::4 : 2354.56 2354.45 Mbits/s 350 - Summary of tcp_4down run 'MQ + FQ_PIE': avg median # data pts Ping (ms) ICMP : 0.20 0.19 ms 350 TCP download avg : 2354.76 N/A Mbits/s 350 TCP download sum : 9419.04 N/A Mbits/s 350 TCP download::1 : 2354.77 2353.89 Mbits/s 350 TCP download::2 : 2353.41 2354.29 Mbits/s 350 TCP download::3 : 2356.18 2354.19 Mbits/s 350 TCP download::4 : 2354.68 2353.15 Mbits/s 350 - Summary of tcp_4down run 'MQ + DUALPI2': avg median # data pts Ping (ms) ICMP : 0.24 0.24 ms 350 TCP download avg : 2354.11 N/A Mbits/s 350 TCP download sum : 9416.43 N/A Mbits/s 350 TCP download::1 : 2354.75 2353.93 Mbits/s 350 TCP download::2 : 2353.15 2353.75 Mbits/s 350 TCP download::3 : 2353.49 2353.72 Mbits/s 350 TCP download::4 : 2355.04 2353.73 Mbits/s 350 Unshaped 10gigE with 128 download streams test: - Summary of tcp_128down run 'MQ + FQ_CODEL': avg median # data pts Ping (ms) ICMP : 7.57 8.69 ms 350 TCP download avg : 73.97 N/A Mbits/s 350 TCP download sum : 9467.82 N/A Mbits/s 350 - Summary of tcp_128down run 'MQ + FQ_PIE': avg median # data pts Ping (ms) ICMP : 7.82 8.91 ms 350 TCP download avg : 73.97 N/A Mbits/s 350 TCP download sum : 9468.42 N/A Mbits/s 350 - Summary of tcp_128down run 'MQ + DUALPI2': avg median # data pts Ping (ms) ICMP : 6.87 7.93 ms 350 TCP download avg : 73.95 N/A Mbits/s 350 TCP download sum : 9465.87 N/A Mbits/s 350 From the results shown above, we see small differences between combinations. - Update commit message to include results of no_split_gso and split_gso (Dave Taht <dave.taht(a)gmail.com> and Paolo Abeni <pabeni(a)redhat.com>) - Add memlimit in dualpi2 attribute, and add memory_used, max_memory_used, memory_limit in dualpi2 stats (Dave Taht <dave.taht(a)gmail.com>) - Update note in sch_dualpi2.c related to BBRv3 status (Dave Taht <dave.taht(a)gmail.com>) - Update license identifier (Dave Taht <dave.taht(a)gmail.com>) - Add selftest in tools/testing/selftests/tc-testing (Cong Wang <xiyou.wangcong(a)gmail.com>) - Use netlink policies for parameter checks (Jamal Hadi Salim <jhs(a)mojatatu.com>) - Modify texts & fix typos in Documentation/netlink/specs/tc.yaml (Dave Taht <dave.taht(a)gmail.com>) - Add dscsriptions of packet counter statistics and reset function of sch_dualpi2.c - Fix step_thresh in packets - Update code comments in sch_dualpi2.c v4 - Update statement in Kconfig for DualPI2 (Stephen Hemminger <stephen(a)networkplumber.org>) - Put a blank line after #define in sch_dualpi2.c (Stephen Hemminger <stephen(a)networkplumber.org>) - Fix line length warning v3 - Fix compilaiton error - Update Documentation/netlink/specs/tc.yaml (Jakub Kicinski <kuba(a)kernel.org>) v2 - Add Documentation/netlink/specs/tc.yaml (Jakub Kicinski <kuba(a)kernel.org>) - Use dualpi2 instead of skb prefix (Jamal Hadi Salim <jhs(a)mojatatu.com>) - Replace nla_parse_nested_deprecated with nla_parse_nested (Jamal Hadi Salim <jhs(a)mojatatu.com>) - Fix line length warning For more details of DualPI2, plesae refer IETF RFC9332 (https://datatracker.ietf.org/doc/html/rfc9332). Best regards, Chia-Yu Chia-Yu Chang (2): Documentation: netlink: specs: tc: Add DualPI2 specification selftests/tc-testing: Add selftests for qdisc DualPI2 Koen De Schepper (1): sched: Add dualpi2 qdisc Documentation/netlink/specs/tc.yaml | 144 +++ include/linux/netdevice.h | 1 + include/uapi/linux/pkt_sched.h | 39 + net/sched/Kconfig | 12 + net/sched/Makefile | 1 + net/sched/sch_dualpi2.c | 1090 +++++++++++++++++ tools/testing/selftests/tc-testing/config | 1 + .../tc-testing/tc-tests/qdiscs/dualpi2.json | 149 +++ tools/testing/selftests/tc-testing/tdc.sh | 1 + 9 files changed, 1438 insertions(+) create mode 100644 net/sched/sch_dualpi2.c create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/dualpi2.json -- 2.34.1

7 months, 1 week

2
4
0 0

[PATCH 0/3] bitmap: convert self-test to KUnit

by Tamir Duberstein

This is one of just 3 remaining "Test Module" kselftests (the others being printf and scanf), the rest having been converted to KUnit. I tested this using: $ tools/testing/kunit/kunit.py run --arch arm64 --make_options LLVM=1 bitmap. I've already sent out a conversion series for each of printf[0] and scanf[1]. There was a previous attempt[2] to do this in July 2024. Please bear with me as I try to understand and address the objections from that time. I've spoken with Muhammad Usama Anjum, the author of that series, and received their approval to "take over" this work. Here we go... On 7/26/24 11:45 PM, John Hubbard wrote: > > This changes the situation from "works for Linus' tab completion > case", to "causes a tab completion problem"! :) > > I think a tests/ subdir is how we eventually decided to do this [1], > right? > > So: > > lib/tests/bitmap_kunit.c > > [1] https://lore.kernel.org/20240724201354.make.730-kees@kernel.org This is true and unfortunate, but not trivial to fix because new kallsyms tests were placed in lib/tests in commit 84b4a51fce4c ("selftests: add new kallsyms selftests") *after* the KUnit filename best practices were adopted. I propose that the KUnit maintainers blaze this trail using `string_kunit.c` which currently still lives in lib/ despite the KUnit docs giving it as an example at lib/tests/. On 7/27/24 12:24 AM, Shuah Khan wrote: > > This change will take away the ability to run bitmap tests during > boot on a non-kunit kernel. > > Nack on this change. I wan to see all tests that are being removed > from lib because they have been converted - also it doesn't make > sense to convert some tests like this one that add the ability test > during boot. This point was also discussed in another thread[3] in which: On 7/27/24 12:35 AM, Shuah Khan wrote: > > Please make sure you aren't taking away the ability to run these tests during > boot. > > It doesn't make sense to convert every single test especially when it > is intended to be run during boot without dependencies - not as a kunit test > but a regression test during boot. > > bitmap is one example - pay attention to the config help test - bitmap > one clearly states it runs regression testing during boot. Any test that > says that isn't a candidate for conversion. > > I am going to nack any such conversions. The crux of the argument seems to be that the config help text is taken to describe the author's intent with the fragment "at boot". I think this may be a case of confirmation bias: I see at least the following KUnit tests with "at boot" in their help text: - CPUMASK_KUNIT_TEST - BITFIELD_KUNIT - CHECKSUM_KUNIT - UTIL_MACROS_KUNIT It seems to me that the inference being made is that any test that runs "at boot" is intended to be run by both developers and users, but I find no evidence that bitmap in particular would ever provide additional value when run by users. There's further discussion about KUnit not being "ideal for cases where people would want to check a subsystem on a running kernel", but I find no evidence that bitmap in particular is actually testing the running kernel; it is a unit test of the bitmap functions, which is also stated in the config help text. David Gow made many of the same points in his final reply[4], which was never replied to. Link: https://lore.kernel.org/all/20250207-printf-kunit-convert-v2-0-057b23860823… [0] Link: https://lore.kernel.org/all/20250207-scanf-kunit-convert-v4-0-a23e2afaede8@… [1] Link: https://lore.kernel.org/all/20240726110658.2281070-1-usama.anjum@collabora.… [2] Link: https://lore.kernel.org/all/327831fb-47ab-4555-8f0b-19a8dbcaacd7@collabora.… [3] Link: https://lore.kernel.org/all/CABVgOSmMoPD3JfzVd4VTkzGL2fZCo8LfwzaVSzeFimPrhg… [4] Thanks for your attention. Signed-off-by: Tamir Duberstein <tamird(a)gmail.com> --- Tamir Duberstein (3): bitmap: remove _check_eq_u32_array bitmap: convert self-test to KUnit bitmap: break kunit into test cases MAINTAINERS | 2 +- arch/m68k/configs/amiga_defconfig | 1 - arch/m68k/configs/apollo_defconfig | 1 - arch/m68k/configs/atari_defconfig | 1 - arch/m68k/configs/bvme6000_defconfig | 1 - arch/m68k/configs/hp300_defconfig | 1 - arch/m68k/configs/mac_defconfig | 1 - arch/m68k/configs/multi_defconfig | 1 - arch/m68k/configs/mvme147_defconfig | 1 - arch/m68k/configs/mvme16x_defconfig | 1 - arch/m68k/configs/q40_defconfig | 1 - arch/m68k/configs/sun3_defconfig | 1 - arch/m68k/configs/sun3x_defconfig | 1 - arch/powerpc/configs/ppc64_defconfig | 1 - lib/Kconfig.debug | 24 +- lib/Makefile | 2 +- lib/{test_bitmap.c => bitmap_kunit.c} | 454 +++++++++++++--------------------- tools/testing/selftests/lib/bitmap.sh | 3 - tools/testing/selftests/lib/config | 1 - 19 files changed, 195 insertions(+), 304 deletions(-) --- base-commit: 2014c95afecee3e76ca4a56956a936e23283f05b change-id: 20250207-bitmap-kunit-convert-92d3147b2eee Best regards, -- Tamir Duberstein <tamird(a)gmail.com>

7 months, 1 week

8
26
0 0

[PATCH v4 08/16] rust: kunit: refactor to use `&raw [const|mut]`

by Antonio Hickey

Replacing all occurrences of `addr_of!(place)` and `addr_of_mut!(place)` with `&raw const place` and `&raw mut place` respectively. This will allow us to reduce macro complexity, and improve consistency with existing reference syntax as `&raw const`, `&raw mut` are similar to `&`, `&mut` making it fit more naturally with other existing code. Suggested-by: Benno Lossin <benno.lossin(a)proton.me> Link: https://github.com/Rust-for-Linux/linux/issues/1148 Signed-off-by: Antonio Hickey <contact(a)antoniohickey.com> --- rust/kernel/kunit.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/kernel/kunit.rs b/rust/kernel/kunit.rs index 824da0e9738a..a17ef3b2e860 100644 --- a/rust/kernel/kunit.rs +++ b/rust/kernel/kunit.rs @@ -128,9 +128,9 @@ unsafe impl Sync for UnaryAssert {} unsafe { $crate::bindings::__kunit_do_failed_assertion( kunit_test, - core::ptr::addr_of!(LOCATION.0), + &raw const LOCATION.0, $crate::bindings::kunit_assert_type_KUNIT_ASSERTION, - core::ptr::addr_of!(ASSERTION.0.assert), + &raw const ASSERTION.0.assert, Some($crate::bindings::kunit_unary_assert_format), core::ptr::null(), ); -- 2.48.1

7 months, 1 week

5
7
0 0

[PATCH net-next 0/6] netconsole: Add support for userdata release

by Breno Leitao

I am submitting a series of patches that introduce a new feature for the netconsole subsystem, specifically the addition of the 'release' field to the sysdata structure. This feature allows the kernel release/version to be appended to the userdata dictionary in every message sent, enhancing the information available for debugging and monitoring purposes. This complements the already supported release prepend feature, which was added some time ago. The release prepend appends the release information at the message header, which is not ideal for two reasons: 1) It is difficult to determine if a message includes this information, making it hard and resource-intensive to parse. 2) When a message is fragmented, the release information is appended to every message fragment, consuming valuable space in the packet. The "release prepend" feature was created before the concept of userdata and sysdata. Now that this format has proven successful, we are implementing the release feature as part of this enhanced structure. This patch series aims to improve the netconsole subsystem by providing a more efficient and user-friendly way to include kernel release information in messages. I believe these changes will significantly aid in system analysis and troubleshooting. Suggested-by: Manu Bretelle <chantr4(a)gmail.com> Signed-off-by: Breno Leitao <leitao(a)debian.org> --- Breno Leitao (6): netconsole: introduce 'release' as a new sysdata field netconsole: implement configfs for release_enabled netconsole: add 'sysdata' suffix to related functions netconsole: append release to sysdata selftests: netconsole: Add tests for 'release' feature in sysdata docs: netconsole: document release feature Documentation/networking/netconsole.rst | 25 ++++++++ drivers/net/netconsole.c | 71 ++++++++++++++++++++-- .../selftests/drivers/net/netcons_sysdata.sh | 44 +++++++++++++- 3 files changed, 133 insertions(+), 7 deletions(-) --- base-commit: 941defcea7e11ad7ff8f0d4856716dd637d757dd change-id: 20250314-netcons_release-dc1f1f5ca0f7 Best regards, -- Breno Leitao <leitao(a)debian.org>

7 months, 1 week

3
13
0 0

[PATCH] KVM: selftests: Fix a couple "prio" signedness bugs

by Dan Carpenter

There is an assert which relies on "prio" to be signed. GUEST_ASSERT(prio >= 0); Change the type from uint32_t to int. Fixes: 728fcc46d2c2 ("KVM: selftests: aarch64: Add test for restoring active IRQs") Fixes: 0ad3ff4a6adc ("KVM: selftests: aarch64: Add preemption tests in vgic_irq") Signed-off-by: Dan Carpenter <dan.carpenter(a)linaro.org> --- --- tools/testing/selftests/kvm/arm64/vgic_irq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/arm64/vgic_irq.c b/tools/testing/selftests/kvm/arm64/vgic_irq.c index f4ac28d53747..e89c0fc5eef3 100644 --- a/tools/testing/selftests/kvm/arm64/vgic_irq.c +++ b/tools/testing/selftests/kvm/arm64/vgic_irq.c @@ -294,7 +294,8 @@ static void guest_restore_active(struct test_args *args, uint32_t first_intid, uint32_t num, kvm_inject_cmd cmd) { - uint32_t prio, intid, ap1r; + uint32_t intid, ap1r; + int prio; int i; /* @@ -362,7 +363,8 @@ static void test_inject_preemption(struct test_args *args, uint32_t first_intid, int num, kvm_inject_cmd cmd) { - uint32_t intid, prio, step = KVM_PRIO_STEPS; + uint32_t intid, step = KVM_PRIO_STEPS; + int prio; int i; /* Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs -- 2.47.2

7 months, 1 week

2
1
0 0

[RFC PATCH 1/2] time/timekeeping: Fix possible inconsistencies in _COARSE clockids

by John Stultz

Lei Chen raised an issue with CLOCK_MONOTONIC_COARSE seeing time inconsistencies. Lei tracked down that this was being caused by the adjustment tk->tkr_mono.xtime_nsec -= offset; which is made to compensate for the unaccumulated cycles in offset when the mult value is adjusted forward, so that the non-_COARSE clockids don't see inconsistencies. However, the _COARSE clockids don't use the mult*offset value in their calculations, so this subtraction can cause the _COARSE clock ids to jump back a bit. Now, by design, this negative adjustment should be fine, because the logic run from timekeeping_adjust() is done after we accumulate approx mult*interval_cycles into xtime_nsec. The accumulated (mult*interval_cycles) will be larger then the (mult_adj*offset) value subtracted from xtime_nsec, and both operations are done together under the tk_core.lock, so the net change to xtime_nsec should always be positive. However, do_adjtimex() calls into timekeeping_advance() as well, since we want to apply the ntp freq adjustment immediately. In this case, we don't return early when the offset is smaller then interval_cycles, so we don't end up accumulating any time into xtime_nsec. But we do go on to call timekeeping_adjust(), which modifies the mult value, and subtracts from xtime_nsec to correct for the new mult value. Here because we did not accumulate anything, we have a window where the _COARSE clockids that don't utilize the mult*offset value, can see an inconsistency. So to fix this, rework the timekeeping_advance() logic a bit so that when we are called from do_adjtimex() and the offset is smaller then cycle_interval, that we call timekeeping_forward(), to first accumulate the sub-interval time into xtime_nsec. Then with no unaccumulated cycles in offset, we can do the mult adjustment without worry of the subtraction having an impact. NOTE: This was implemented as a potential alternative to Thomas' approach here: https://lore.kernel.org/lkml/87cyej5rid.ffs@tglx/ And similarly, it needs some additional review and testing, as it was developed while packing for conference travel. Cc: Thomas Gleixner <tglx(a)linutronix.de> Cc: Stephen Boyd <sboyd(a)kernel.org> Cc: Anna-Maria Behnsen <anna-maria(a)linutronix.de> Cc: Frederic Weisbecker <frederic(a)kernel.org> Cc: Shuah Khan <shuah(a)kernel.org> Cc: Miroslav Lichvar <mlichvar(a)redhat.com> Cc: linux-kselftest(a)vger.kernel.org Cc: kernel-team(a)android.com Cc: Lei Chen <lei.chen(a)smartx.com> Fixes: da15cfdae033 ("time: Introduce CLOCK_REALTIME_COARSE") Reported-by: Lei Chen <lei.chen(a)smartx.com> Closes: https://lore.kernel.org/lkml/20250310030004.3705801-1-lei.chen@smartx.com/ Diagnosed-by: Thomas Gleixner <tglx(a)linutronix.de> Signed-off-by: John Stultz <jstultz(a)google.com> --- kernel/time/timekeeping.c | 87 ++++++++++++++++++++++++++++----------- 1 file changed, 62 insertions(+), 25 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 1e67d076f1955..6f3a145e7b113 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -682,18 +682,18 @@ static void timekeeping_update_from_shadow(struct tk_data *tkd, unsigned int act } /** - * timekeeping_forward_now - update clock to the current time + * timekeeping_forward - update clock to given cycle now value * @tk: Pointer to the timekeeper to update + * @cycle_now: Current clocksource read value * * Forward the current clock to update its state since the last call to * update_wall_time(). This is useful before significant clock changes, * as it avoids having to deal with this time offset explicitly. */ -static void timekeeping_forward_now(struct timekeeper *tk) +static void timekeeping_forward(struct timekeeper *tk, u64 cycle_now) { - u64 cycle_now, delta; + u64 delta; - cycle_now = tk_clock_read(&tk->tkr_mono); delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask, tk->tkr_mono.clock->max_raw_delta); tk->tkr_mono.cycle_last = cycle_now; @@ -710,6 +710,21 @@ static void timekeeping_forward_now(struct timekeeper *tk) } } +/** + * timekeeping_forward_now - update clock to the current time + * @tk: Pointer to the timekeeper to update + * + * Forward the current clock to update its state since the last call to + * update_wall_time(). This is useful before significant clock changes, + * as it avoids having to deal with this time offset explicitly. + */ +static void timekeeping_forward_now(struct timekeeper *tk) +{ + u64 cycle_now = tk_clock_read(&tk->tkr_mono); + + timekeeping_forward(tk, cycle_now); +} + /** * ktime_get_real_ts64 - Returns the time of day in a timespec64. * @ts: pointer to the timespec to be set @@ -2151,6 +2166,45 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset, return offset; } +static u64 timekeeping_accumulate(struct timekeeper *tk, u64 now, u64 offset, + unsigned int *clock_set) +{ + struct timekeeper *real_tk = &tk_core.timekeeper; + int shift = 0, maxshift; + + /* + * If we have a sub-cycle_interval offset, we + * are likely doing a TK_FREQ_ADJ, so accumulate + * everything so we don't have a remainder offset + * when later adjusting the multiplier + */ + if (offset < real_tk->cycle_interval) { + timekeeping_forward(tk, now); + *clock_set = 1; + return 0; + } + + /* + * With NO_HZ we may have to accumulate many cycle_intervals + * (think "ticks") worth of time at once. To do this efficiently, + * we calculate the largest doubling multiple of cycle_intervals + * that is smaller than the offset. We then accumulate that + * chunk in one go, and then try to consume the next smaller + * doubled multiple. + */ + shift = ilog2(offset) - ilog2(tk->cycle_interval); + shift = max(0, shift); + /* Bound shift to one less than what overflows tick_length */ + maxshift = (64 - (ilog2(ntp_tick_length()) + 1)) - 1; + shift = min(shift, maxshift); + while (offset >= tk->cycle_interval) { + offset = logarithmic_accumulation(tk, offset, shift, clock_set); + if (offset < tk->cycle_interval << shift) + shift--; + } + return offset; +} + /* * timekeeping_advance - Updates the timekeeper to the current time and * current NTP tick length @@ -2160,8 +2214,7 @@ static bool timekeeping_advance(enum timekeeping_adv_mode mode) struct timekeeper *tk = &tk_core.shadow_timekeeper; struct timekeeper *real_tk = &tk_core.timekeeper; unsigned int clock_set = 0; - int shift = 0, maxshift; - u64 offset; + u64 cycle_now, offset; guard(raw_spinlock_irqsave)(&tk_core.lock); @@ -2169,7 +2222,8 @@ static bool timekeeping_advance(enum timekeeping_adv_mode mode) if (unlikely(timekeeping_suspended)) return false; - offset = clocksource_delta(tk_clock_read(&tk->tkr_mono), + cycle_now = tk_clock_read(&tk->tkr_mono); + offset = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask, tk->tkr_mono.clock->max_raw_delta); @@ -2177,24 +2231,7 @@ static bool timekeeping_advance(enum timekeeping_adv_mode mode) if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK) return false; - /* - * With NO_HZ we may have to accumulate many cycle_intervals - * (think "ticks") worth of time at once. To do this efficiently, - * we calculate the largest doubling multiple of cycle_intervals - * that is smaller than the offset. We then accumulate that - * chunk in one go, and then try to consume the next smaller - * doubled multiple. - */ - shift = ilog2(offset) - ilog2(tk->cycle_interval); - shift = max(0, shift); - /* Bound shift to one less than what overflows tick_length */ - maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1; - shift = min(shift, maxshift); - while (offset >= tk->cycle_interval) { - offset = logarithmic_accumulation(tk, offset, shift, &clock_set); - if (offset < tk->cycle_interval<<shift) - shift--; - } + offset = timekeeping_accumulate(tk, cycle_now, offset, &clock_set); /* Adjust the multiplier to correct NTP error */ timekeeping_adjust(tk, offset); -- 2.49.0.rc1.451.g8f38331e32-goog

7 months, 1 week

2
6
0 0

[PATCH net 0/7] selftests/net: Mixed select()+polling mode for TCP-AO tests

by Dmitry Safonov via B4 Relay

Should fix flaky tcp-ao/connect-deny-ipv6 test. Begging pardon for the delay since the report and for sending it this late in the release cycle. Signed-off-by: Dmitry Safonov <0x7f454c46(a)gmail.com> --- Dmitry Safonov (7): selftests/net: Print TCP flags in more common format selftests/net: Provide tcp-ao counters comparison helper selftests/net: Fetch and check TCP-MD5 counters selftests/net: Add mixed select()+polling mode to TCP-AO tests selftests/net: Print the testing side in unsigned-md5 selftests/net: Delete timeout from test_connect_socket() selftests/net: Drop timeout argument from test_client_verify() tools/testing/selftests/net/tcp_ao/connect-deny.c | 58 ++-- tools/testing/selftests/net/tcp_ao/connect.c | 22 +- tools/testing/selftests/net/tcp_ao/icmps-discard.c | 17 +- .../testing/selftests/net/tcp_ao/key-management.c | 76 ++--- tools/testing/selftests/net/tcp_ao/lib/aolib.h | 114 ++++++-- .../testing/selftests/net/tcp_ao/lib/ftrace-tcp.c | 7 +- tools/testing/selftests/net/tcp_ao/lib/sock.c | 315 +++++++++++++++------ tools/testing/selftests/net/tcp_ao/restore.c | 75 +++-- tools/testing/selftests/net/tcp_ao/rst.c | 47 ++- tools/testing/selftests/net/tcp_ao/self-connect.c | 18 +- tools/testing/selftests/net/tcp_ao/seq-ext.c | 30 +- tools/testing/selftests/net/tcp_ao/unsigned-md5.c | 118 ++++---- 12 files changed, 552 insertions(+), 345 deletions(-) --- base-commit: 0fed89a961ea851945d23cc35beb59d6e56c0964 change-id: 20250312-tcp-ao-selftests-polling-21b6bbdf77b6 Best regards, -- Dmitry Safonov <0x7f454c46(a)gmail.com>

7 months, 1 week

4
14
0 0

[PATCH v8 0/3] rust: kunit: Support KUnit tests with a user-space like syntax

by David Gow

Hi all, This is v8 of the Rust/KUnit integration patch. I think all of the suggestions have at least been responded to (even if there are a few I'm leaving as either future projects or matters of taste). Hopefully this is good-to-go for 6.15, so we can start using it concurrently with making any additional improvements we may wish. This series was originally written by José Expósito, and has been modified and updated by Matt Gilbride, Miguel Ojeda, and myself. The original version can be found here: https://github.com/Rust-for-Linux/linux/pull/950 Add support for writing KUnit tests in Rust. While Rust doctests are already converted to KUnit tests and run, they're really better suited for examples, rather than as first-class unit tests. This series implements a series of direct Rust bindings for KUnit tests, as well as a new macro which allows KUnit tests to be written using a close variant of normal Rust unit test syntax. The only change required is replacing '#[cfg(test)]' with '#[kunit_tests(kunit_test_suite_name)]' An example test would look like: #[kunit_tests(rust_kernel_hid_driver)] mod tests { use super::*; use crate::{c_str, driver, hid, prelude::*}; use core::ptr; struct SimpleTestDriver; impl Driver for SimpleTestDriver { type Data = (); } #[test] fn rust_test_hid_driver_adapter() { let mut hid = bindings::hid_driver::default(); let name = c_str!("SimpleTestDriver"); static MODULE: ThisModule = unsafe { ThisModule::from_ptr(ptr::null_mut()) }; let res = unsafe { <hid::Adapter<SimpleTestDriver> as driver::DriverOps>::register(&mut hid, name, &MODULE) }; assert_eq!(res, Err(ENODEV)); // The mock returns -19 } } Please give this a go, and make sure I haven't broken it! There's almost certainly a lot of improvements which can be made -- and there's a fair case to be made for replacing some of this with generated C code which can use the C macros -- but this is hopefully an adequate implementation for now, and the interface can (with luck) remain the same even if the implementation changes. A few small notable missing features: - Attributes (like the speed of a test) are hardcoded to the default value. - Similarly, the module name attribute is hardcoded to NULL. In C, we use the KBUILD_MODNAME macro, but I couldn't find a way to use this from Rust which wasn't more ugly than just disabling it. - Assertions are not automatically rewritten to use KUnit assertions. --- Changes since v7: https://lore.kernel.org/rust-for-linux/20250214074051.1619256-1-davidgow@go… - Reworked the SAFETY comment for addr_of_mut! use with statics in kunit_unsafe_test_suite!() (again) - Removed the second mocking example, which was causing confusion. The first example of in_kunit_test() should be clear enough. Changes since v6: https://lore.kernel.org/rust-for-linux/20250214074051.1619256-1-davidgow@go… - Fixed an [allow(unused_unsafe)] which ended up in patch 2 instead of patch 1. (Thanks, Tamir!) - Doc comments now have several useful links. (Thanks, Tamir!) - Fix a potential compile error under macos. (Thanks, Tamir!) - Several small tidy-ups to limit unsafe usage. (Thanks, Tamir!) Changes since v5: https://lore.kernel.org/all/20241213081035.2069066-1-davidgow@google.com/ - Rebased against 6.14-rc1 - Fixed a bunch of warnings / clippy lints introduced in Rust 1.83 and 1.84. - No longer needs static_mut_refs / const_mut_refs, and is much cleaned up as a result. (Thanks, Miguel) - Major documentation and example fixes. (Thanks, Miguel) Changes since v4: https://lore.kernel.org/linux-kselftest/20241101064505.3820737-1-davidgow@g… - Rebased against 6.13-rc1 - Allowed an unused_unsafe warning after the behaviour of addr_of_mut!() changed in Rust 1.82. (Thanks Boqun, Miguel) - "Expect" that the sample assert_eq!(1+1, 2) produces a clippy warning due to a redundant assertion. (Thanks Boqun, Miguel) - Fix some missing safety comments, and remove some unneeded 'unsafe' blocks. (Thanks Boqun) - Fix a couple of minor rustfmt issues which were triggering checkpatch warnings. Changes since v3: https://lore.kernel.org/linux-kselftest/20241030045719.3085147-2-davidgow@g… - The kunit_unsafe_test_suite!() macro now panic!s if the suite name is too long, triggering a compile error. (Thanks, Alice!) - The #[kunit_tests()] macro now preserves span information, so errors can be better reported. (Thanks, Boqun!) - The example tests have been updated to no longer use assert_eq!() with a constant bool argument (which triggered a clippy warning now we have the span info). Changes since v2: https://lore.kernel.org/linux-kselftest/20241029092422.2884505-1-davidgow@g… - Include missing rust/macros/kunit.rs file from v2. (Thanks Boqun!) - The kunit_unsafe_test_suite!() macro will truncate the name of the suite if it is too long. (Thanks Alice!) - The proc macro now emits an error if the suite name is too long. - We no longer needlessly use UnsafeCell<> in kunit_unsafe_test_suite!(). (Thanks Alice!) Changes since v1: https://lore.kernel.org/lkml/20230720-rustbind-v1-0-c80db349e3b5@google.com… - Rebase on top of the latest rust-next (commit 718c4069896c) - Make kunit_case a const fn, rather than a macro (Thanks Boqun) - As a result, the null terminator is now created with kernel::kunit::kunit_case_null() - Use the C kunit_get_current_test() function to implement in_kunit_test(), rather than re-implementing it (less efficiently) ourselves. Changes since the GitHub PR: - Rebased on top of kselftest/kunit - Add const_mut_refs feature This may conflict with https://lore.kernel.org/lkml/20230503090708.2524310-6-nmi@metaspace.dk/ - Add rust/macros/kunit.rs to the KUnit MAINTAINERS entry --- José Expósito (3): rust: kunit: add KUnit case and suite macros rust: macros: add macro to easily run KUnit tests rust: kunit: allow to know if we are in a test MAINTAINERS | 1 + rust/kernel/kunit.rs | 171 +++++++++++++++++++++++++++++++++++++++++++ rust/macros/kunit.rs | 161 ++++++++++++++++++++++++++++++++++++++++ rust/macros/lib.rs | 29 ++++++++ 4 files changed, 362 insertions(+) create mode 100644 rust/macros/kunit.rs -- 2.49.0.rc0.332.g42c0ae87b1-goog

7 months, 1 week

2
7
0 0

[PATCH net v4 0/3] vsock/bpf: Handle races between sockmap update and connect() disconnecting

by Michal Luczaj

Signal delivery during connect() may disconnect an already established socket. Problem is that such socket might have been placed in a sockmap before the connection was closed. PATCH 1 ensures this race won't lead to an unconnected vsock staying in the sockmap. PATCH 2 selftests it. PATCH 3 fixes a related race. Note that selftest in PATCH 2 does test this code as well, but winning this race variant may take more than 2 seconds, so I'm not advertising it. Signed-off-by: Michal Luczaj <mhal(a)rbox.co> --- Changes in v4: - Selftest: send signal to only our own process - Link to v3: https://lore.kernel.org/r/20250316-vsock-trans-signal-race-v3-0-17a6862277c… Changes in v3: - Selftest: drop unnecessary variable initialization and reorder the calls - Link to v2: https://lore.kernel.org/r/20250314-vsock-trans-signal-race-v2-0-421a41f60f4… Changes in v2: - Handle one more path of tripping the warning - Add a selftest - Collect R-b [Stefano] - Link to v1: https://lore.kernel.org/r/20250307-vsock-trans-signal-race-v1-1-3aca3f771fb… --- Michal Luczaj (3): vsock/bpf: Fix EINTR connect() racing sockmap update selftest/bpf: Add test for AF_VSOCK connect() racing sockmap update vsock/bpf: Fix bpf recvmsg() racing transport reassignment net/vmw_vsock/af_vsock.c | 10 ++- net/vmw_vsock/vsock_bpf.c | 24 ++++-- .../selftests/bpf/prog_tests/sockmap_basic.c | 99 ++++++++++++++++++++++ 3 files changed, 124 insertions(+), 9 deletions(-) --- base-commit: da9e8efe7ee10e8425dc356a9fc593502c8e3933 change-id: 20250305-vsock-trans-signal-race-d62f7718d099 Best regards, -- Michal Luczaj <mhal(a)rbox.co>

7 months, 1 week

4
11
0 0

[PATCH bpf-next] selftests/bpf: Sanitize pointer prior fclose()

by Björn Töpel

From: Björn Töpel <bjorn(a)rivosinc.com> There are scenarios where env.{sub,}test_state->stdout_saved, can be NULL, e.g. sometimes when the watchdog timeout kicks in, or if the open_memstream syscall is not available. Avoid crashing test_progs by adding an explicit NULL check prior the fclose() call. Signed-off-by: Björn Töpel <bjorn(a)rivosinc.com> --- tools/testing/selftests/bpf/test_progs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index d4ec9586b98c..309d9d4a8ace 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -103,12 +103,14 @@ static void stdio_restore(void) pthread_mutex_lock(&stdout_lock); if (env.subtest_state) { - fclose(env.subtest_state->stdout_saved); + if (env.subtest_state->stdout_saved) + fclose(env.subtest_state->stdout_saved); env.subtest_state->stdout_saved = NULL; stdout = env.test_state->stdout_saved; stderr = env.test_state->stdout_saved; } else { - fclose(env.test_state->stdout_saved); + if (env.test_state->stdout_saved) + fclose(env.test_state->stdout_saved); env.test_state->stdout_saved = NULL; stdout = env.stdout_saved; stderr = env.stderr_saved; base-commit: f3f8649585a445414521a6d5b76f41b51205086d -- 2.45.2

7 months, 1 week

3
2
0 0

[PATCH net v2 3/3] selftests: net: test for lwtunnel dst ref loops

by Justin Iurman

As recently specified by commit 0ea09cbf8350 ("docs: netdev: add a note on selftest posting") in net-next, the selftest is therefore shipped in this series. However, this selftest does not really test this series. It needs this series to avoid crashing the kernel. What it really tests, thanks to kmemleak, is what was fixed by the following commits: - commit c71a192976de ("net: ipv6: fix dst refleaks in rpl, seg6 and ioam6 lwtunnels") - commit 92191dd10730 ("net: ipv6: fix dst ref loops in rpl, seg6 and ioam6 lwtunnels") - commit c64a0727f9b1 ("net: ipv6: fix dst ref loop on input in seg6 lwt") - commit 13e55fbaec17 ("net: ipv6: fix dst ref loop on input in rpl lwt") - commit 0e7633d7b95b ("net: ipv6: fix dst ref loop in ila lwtunnel") - commit 5da15a9c11c1 ("net: ipv6: fix missing dst ref drop in ila lwtunnel") Signed-off-by: Justin Iurman <justin.iurman(a)uliege.be> --- Cc: Shuah Khan <shuah(a)kernel.org> Cc: linux-kselftest(a)vger.kernel.org --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/config | 2 + .../selftests/net/lwt_dst_cache_ref_loop.sh | 246 ++++++++++++++++++ 3 files changed, 249 insertions(+) create mode 100755 tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 5916f3b81c39..843ab747645d 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -101,6 +101,7 @@ TEST_PROGS += vlan_bridge_binding.sh TEST_PROGS += bpf_offload.py TEST_PROGS += ipv6_route_update_soft_lockup.sh TEST_PROGS += busy_poll_test.sh +TEST_PROGS += lwt_dst_cache_ref_loop.sh # YNL files, must be before "include ..lib.mk" YNL_GEN_FILES := busy_poller netlink-dumps diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 5b9baf708950..61e5116987f3 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -107,3 +107,5 @@ CONFIG_XFRM_INTERFACE=m CONFIG_XFRM_USER=m CONFIG_IP_NF_MATCH_RPFILTER=m CONFIG_IP6_NF_MATCH_RPFILTER=m +CONFIG_IPV6_ILA=m +CONFIG_IPV6_RPL_LWTUNNEL=y diff --git a/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh b/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh new file mode 100755 index 000000000000..881eb399798f --- /dev/null +++ b/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh @@ -0,0 +1,246 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Author: Justin Iurman <justin.iurman(a)uliege.be> +# +# WARNING +# ------- +# This is just a dummy script that triggers encap cases with possible dst cache +# reference loops in affected lwt users (see list below). Some cases are +# pathological configurations for simplicity, others are valid. Overall, we +# don't want this issue to happen, no matter what. In order to catch any +# reference loops, kmemleak MUST be used. The results alone are always blindly +# successful, don't rely on them. Note that the following tests may crash the +# kernel if the fix to prevent lwtunnel_{input|output|xmit}() reentry loops is +# not present. +# +# Affected lwt users so far (please update accordingly if needed): +# - ila_lwt (output only) +# - ioam6_iptunnel (output only) +# - rpl_iptunnel (both input and output) +# - seg6_iptunnel (both input and output) + +source lib.sh + +check_compatibility() +{ + setup_ns tmp_node &>/dev/null + if [ $? != 0 ]; then + echo "SKIP: Cannot create netns." + exit $ksft_skip + fi + + ip link add name veth0 netns $tmp_node type veth \ + peer name veth1 netns $tmp_node &>/dev/null + local ret=$? + + ip -netns $tmp_node link set veth0 up &>/dev/null + ret=$((ret + $?)) + + ip -netns $tmp_node link set veth1 up &>/dev/null + ret=$((ret + $?)) + + if [ $ret != 0 ]; then + echo "SKIP: Cannot configure links." + cleanup_ns $tmp_node + exit $ksft_skip + fi + + lsmod 2>/dev/null | grep -q "ila" + ila_lsmod=$? + [ $ila_lsmod != 0 ] && modprobe ila &>/dev/null + + ip -netns $tmp_node route add 2001:db8:1::/64 \ + encap ila 1:2:3:4 csum-mode no-action ident-type luid \ + hook-type output \ + dev veth0 &>/dev/null + + ip -netns $tmp_node route add 2001:db8:2::/64 \ + encap ioam6 trace prealloc type 0x800000 ns 0 size 4 \ + dev veth0 &>/dev/null + + ip -netns $tmp_node route add 2001:db8:3::/64 \ + encap rpl segs 2001:db8:3::1 dev veth0 &>/dev/null + + ip -netns $tmp_node route add 2001:db8:4::/64 \ + encap seg6 mode inline segs 2001:db8:4::1 dev veth0 &>/dev/null + + ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap ila" + skip_ila=$? + + ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap ioam6" + skip_ioam6=$? + + ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap rpl" + skip_rpl=$? + + ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap seg6" + skip_seg6=$? + + cleanup_ns $tmp_node +} + +setup() +{ + setup_ns alpha beta gamma &>/dev/null + + ip link add name veth-alpha netns $alpha type veth \ + peer name veth-betaL netns $beta &>/dev/null + + ip link add name veth-betaR netns $beta type veth \ + peer name veth-gamma netns $gamma &>/dev/null + + ip -netns $alpha link set veth-alpha name veth0 &>/dev/null + ip -netns $beta link set veth-betaL name veth0 &>/dev/null + ip -netns $beta link set veth-betaR name veth1 &>/dev/null + ip -netns $gamma link set veth-gamma name veth0 &>/dev/null + + ip -netns $alpha addr add 2001:db8:1::2/64 dev veth0 &>/dev/null + ip -netns $alpha link set veth0 up &>/dev/null + ip -netns $alpha link set lo up &>/dev/null + ip -netns $alpha route add 2001:db8:2::/64 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + ip -netns $beta addr add 2001:db8:1::1/64 dev veth0 &>/dev/null + ip -netns $beta addr add 2001:db8:2::1/64 dev veth1 &>/dev/null + ip -netns $beta link set veth0 up &>/dev/null + ip -netns $beta link set veth1 up &>/dev/null + ip -netns $beta link set lo up &>/dev/null + ip -netns $beta route del 2001:db8:2::/64 + ip -netns $beta route add 2001:db8:2::/64 dev veth1 + ip netns exec $beta \ + sysctl -wq net.ipv6.conf.all.forwarding=1 &>/dev/null + + ip -netns $gamma addr add 2001:db8:2::2/64 dev veth0 &>/dev/null + ip -netns $gamma link set veth0 up &>/dev/null + ip -netns $gamma link set lo up &>/dev/null + ip -netns $gamma route add 2001:db8:1::/64 \ + via 2001:db8:2::1 dev veth0 &>/dev/null + + sleep 1 + + ip netns exec $alpha ping6 -c 5 -W 1 2001:db8:2::2 &>/dev/null + if [ $? != 0 ]; then + echo "SKIP: Setup failed." + exit $ksft_skip + fi + + sleep 1 +} + +cleanup() +{ + cleanup_ns $alpha $beta $gamma + [ $ila_lsmod != 0 ] && modprobe -r ila &>/dev/null +} + +run_ila() +{ + if [ $skip_ila != 0 ]; then + echo "SKIP: ila (output)" + return + fi + + ip -netns $beta route del 2001:db8:2::/64 + ip -netns $beta route add 2001:db8:2:0:0:0:0:2/128 \ + encap ila 2001:db8:2:0 csum-mode no-action ident-type luid \ + hook-type output \ + dev veth1 &>/dev/null + sleep 1 + + echo "TEST: ila (output)" + ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 + + ip -netns $beta route del 2001:db8:2:0:0:0:0:2/128 + ip -netns $beta route add 2001:db8:2::/64 dev veth1 + sleep 1 +} + +run_ioam6() +{ + if [ $skip_ioam6 != 0 ]; then + echo "SKIP: ioam6 (output)" + return + fi + + ip -netns $beta route change 2001:db8:2::/64 \ + encap ioam6 trace prealloc type 0x800000 ns 1 size 4 \ + dev veth1 &>/dev/null + sleep 1 + + echo "TEST: ioam6 (output)" + ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 +} + +run_rpl() +{ + if [ $skip_rpl != 0 ]; then + echo "SKIP: rpl (input)" + echo "SKIP: rpl (output)" + return + fi + + ip -netns $beta route change 2001:db8:2::/64 \ + encap rpl segs 2001:db8:2::2 \ + dev veth1 &>/dev/null + sleep 1 + + echo "TEST: rpl (input)" + ip netns exec $alpha ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 + + echo "TEST: rpl (output)" + ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 +} + +run_seg6() +{ + if [ $skip_seg6 != 0 ]; then + echo "SKIP: seg6 (input)" + echo "SKIP: seg6 (output)" + return + fi + + ip -netns $beta route change 2001:db8:2::/64 \ + encap seg6 mode inline segs 2001:db8:2::2 \ + dev veth1 &>/dev/null + sleep 1 + + echo "TEST: seg6 (input)" + ip netns exec $alpha ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 + + echo "TEST: seg6 (output)" + ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 +} + +run() +{ + run_ila + run_ioam6 + run_rpl + run_seg6 +} + +if [ "$(id -u)" -ne 0 ]; then + echo "SKIP: Need root privileges." + exit $ksft_skip +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool." + exit $ksft_skip +fi + +check_compatibility + +trap cleanup EXIT + +setup +run + +exit $ksft_pass -- 2.34.1

7 months, 1 week

3
3
0 0

[PATCH bpf-next v3 0/3] bpf: Fix use-after-free of sockmap

by Jiayuan Chen

Hi all, this is the v3 version. === Syzkaller reported this issue [1]. The current sockmap has a dependency on sk_socket in both read and write stages, but there is a possibility that sk->sk_socket is released during the process, leading to panic situations. For a detailed reproduction, please refer to the description in the v2: https://lore.kernel.org/bpf/20250228055106.58071-1-jiayuan.chen@linux.dev/ The corresponding fix approaches are described in the commit messages of each patch. By the way, the current sockmap lacks statistical information, especially global statistics, such as the number of successful or failed rx and tx operations. These statistics cannot be obtained from the socket interface itself. These data will be of great help in troubleshooting issues and observing sockmap behavior. If the maintainer/reviewer does not object, I think we can provide these statistical information in the future, either through proc/trace/bpftool. [1] https://syzkaller.appspot.com/bug?extid=dd90a702f518e0eac072 --- v2 -> v3: 1. Michal Luczaj reported similar race issue under sockmap sending path. 2. Rcu lock is conflict with mutex_lock in unix socket read implementation. https://lore.kernel.org/bpf/20250228055106.58071-1-jiayuan.chen@linux.dev/ v1 -> v2: 1. Add Fixes tag. 2. Extend selftest of edge case for TCP/UDP sockets. 3. Add Reviewed-by and Acked-by tag. https://lore.kernel.org/bpf/20250226132242.52663-1-jiayuan.chen@linux.dev/T… Jiayuan Chen (3): bpf, sockmap: avoid using sk_socket after free when sending bpf, sockmap: avoid using sk_socket after free when reading selftests/bpf: Add edge case tests for sockmap net/core/skmsg.c | 22 ++++++- .../selftests/bpf/prog_tests/socket_helpers.h | 13 +++- .../selftests/bpf/prog_tests/sockmap_basic.c | 60 +++++++++++++++++++ 3 files changed, 91 insertions(+), 4 deletions(-) -- 2.47.1

7 months, 1 week

3
11
0 0

[PATCH] kunit: tool: fix count of tests if late test plan

by Rae Moar

Fix test count with late test plan. For example, TAP version 13 ok 1 test1 1..4 Returns a count of 1 passed, 1 crashed (because it expects tests after the test plan): returning the total count of 2 tests Change this to be 1 passed, 1 error: total count of 1 test Signed-off-by: Rae Moar <rmoar(a)google.com> --- tools/testing/kunit/kunit_parser.py | 4 ++++ tools/testing/kunit/kunit_tool_test.py | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index da53a709773a..c176487356e6 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -809,6 +809,10 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: test.log.extend(parse_diagnostic(lines)) if test.name != "" and not peek_test_name_match(lines, test): test.add_error(printer, 'missing subtest result line!') + elif not lines: + print_log(test.log, printer) + test.status = TestStatus.NO_TESTS + test.add_error(printer, 'No more test results!') else: parse_test_result(lines, test, expected_num, printer) diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 5ff4f6ffd873..bbba921e0eac 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -371,8 +371,8 @@ class KUnitParserTest(unittest.TestCase): """ result = kunit_parser.parse_run_tests(output.splitlines(), stdout) # Missing test results after test plan should alert a suspected test crash. - self.assertEqual(kunit_parser.TestStatus.TEST_CRASHED, result.status) - self.assertEqual(result.counts, kunit_parser.TestCounts(passed=1, crashed=1, errors=1)) + self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) + self.assertEqual(result.counts, kunit_parser.TestCounts(passed=1, errors=2)) def line_stream_from_strs(strs: Iterable[str]) -> kunit_parser.LineStream: return kunit_parser.LineStream(enumerate(strs, start=1)) base-commit: 2e0cf2b32f72b20b0db5cc665cd8465d0f257278 -- 2.49.0.395.g12beb8f557-goog

7 months, 1 week

2
1
0 0

[PATCH v3 1/2] kunit: tool: Fix bug in parsing test plan

by Rae Moar

A bug was identified where the KTAP below caused an infinite loop: TAP version 13 ok 4 test_case 1..4 The infinite loop was caused by the parser not parsing a test plan if following a test result line. Fix this bug by parsing test plan line to avoid the infinite loop. Signed-off-by: Rae Moar <rmoar(a)google.com> --- Changes since v2: - None, adds test in second patch tools/testing/kunit/kunit_parser.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index 29fc27e8949b..da53a709773a 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -759,7 +759,7 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: # If parsing the main/top-level test, parse KTAP version line and # test plan test.name = "main" - ktap_line = parse_ktap_header(lines, test, printer) + parse_ktap_header(lines, test, printer) test.log.extend(parse_diagnostic(lines)) parse_test_plan(lines, test) parent_test = True @@ -768,13 +768,12 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: # the KTAP version line and/or subtest header line ktap_line = parse_ktap_header(lines, test, printer) subtest_line = parse_test_header(lines, test) + test.log.extend(parse_diagnostic(lines)) + parse_test_plan(lines, test) parent_test = (ktap_line or subtest_line) if parent_test: - # If KTAP version line and/or subtest header is found, attempt - # to parse test plan and print test header - test.log.extend(parse_diagnostic(lines)) - parse_test_plan(lines, test) print_test_header(test, printer) + expected_count = test.expected_count subtests = [] test_num = 1 base-commit: 0619a4868fc1b32b07fb9ed6c69adc5e5cf4e4b2 -- 2.49.0.rc1.451.g8f38331e32-goog

7 months, 1 week

3
6
0 0

[PATCH net-next 00/12] mptcp: pm: prep work for new ops and sysctl knobs

by Matthieu Baerts (NGI0)

Here are a few cleanups, preparation work for the new PM ops, and sysctl knobs. - Patch 1: reorg: move generic NL code used by all PMs to pm_netlink.c. - Patch 2: use kmemdup() instead of kmalloc + copy. - Patch 3: small cleanup to use pm var instead of msk->pm. - Patch 4: reorg: id_avail_bitmap is only used by the in-kernel PM. - Patch 5: use struct_group to easily reset a subset of PM data vars. - Patch 6: introduce the minimal skeleton for the new PM ops. - Patch 7: register in-kernel and userspace PM ops. - Patch 8: new net.mptcp.path_manager sysctl knob, deprecating pm_type. - Patch 9: map the new path_manager sysctl knob with pm_type. - Patch 10: map the old pm_type sysctl knob with path_manager. - Patch 11: new net.mptcp.available_path_managers sysctl knob. - Patch 12: new test to validate path_manager and pm_type mapping. Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org> --- Geliang Tang (11): mptcp: pm: in-kernel: use kmemdup helper mptcp: pm: use pm variable instead of msk->pm mptcp: pm: only fill id_avail_bitmap for in-kernel pm mptcp: pm: add struct_group in mptcp_pm_data mptcp: pm: define struct mptcp_pm_ops mptcp: pm: register in-kernel and userspace PM mptcp: sysctl: set path manager by name mptcp: sysctl: map path_manager to pm_type mptcp: sysctl: map pm_type to path_manager mptcp: sysctl: add available_path_managers selftests: mptcp: add pm sysctl mapping tests Matthieu Baerts (NGI0) (1): mptcp: pm: split netlink and in-kernel init Documentation/networking/mptcp-sysctl.rst | 23 +++++ include/net/mptcp.h | 14 +++ net/mptcp/ctrl.c | 113 +++++++++++++++++++++- net/mptcp/pm.c | 97 ++++++++++++++++--- net/mptcp/pm_kernel.c | 16 +-- net/mptcp/pm_netlink.c | 6 ++ net/mptcp/pm_userspace.c | 10 ++ net/mptcp/protocol.h | 17 ++++ tools/testing/selftests/net/mptcp/userspace_pm.sh | 30 +++++- 9 files changed, 301 insertions(+), 25 deletions(-) --- base-commit: e016cf5f39e9c53e274a7b7122a949d8839b8782 change-id: 20250312-net-next-mptcp-pm-ops-intro-01510135cd5e Best regards, -- Matthieu Baerts (NGI0) <matttbe(a)kernel.org>

7 months, 1 week

3
25
0 0

[PATCH v2 0/2] Fix xdp_adjust_frags_tail_grow selftest on powerpc

by Saket Kumar Bhaskar

For platforms on powerpc architecture with a default page size greater than 4096, there was an inconsistency in fragment size calculation. This caused the BPF selftest xdp_adjust_tail/xdp_adjust_frags_tail_grow to fail on powerpc. The issue occurred because the fragment buffer size in bpf_prog_test_run_xdp() was set to 4096, while the actual data size in the fragment within the shared skb was checked against PAGE_SIZE (65536 on powerpc) in min_t, causing it to exceed 4096 and be set accordingly. This discrepancy led to an overflow when bpf_xdp_frags_increase_tail() checked for tailroom, as skb_frag_size(frag) could be greater than rxq->frag_size (when PAGE_SIZE > 4096). This change fixes: 1. test_run by getting the correct arch dependent PAGE_SIZE. 2. selftest by caculating tailroom and getting correct PAGE_SIZE. Changes: v1 -> v2: * Address comments from Alexander * Use dynamic page size, cacheline size and size of struct skb_shared_info to calculate parameters. * Fixed both test_run and selftest. v1: https://lore.kernel.org/all/20250122183720.1411176-1-skb99@linux.ibm.com/ Saket Kumar Bhaskar (2): bpf, test_run: Replace hardcoded page size with dynamic PAGE_SIZE in test_run selftests/bpf: Refactor xdp_adjust_tail selftest with dynamic sizing .../bpf/prog_tests/xdp_adjust_tail.c | 160 +++++++++++++----- .../bpf/progs/test_xdp_adjust_tail_grow.c | 41 +++-- 2 files changed, 149 insertions(+), 52 deletions(-) -- 2.43.5

7 months, 1 week

2
5
0 0

[PATCH 0/2] fs/proc/task_mmu: add guard region bit to pagemap

by Lorenzo Stoakes

Currently there is no means of determining whether a give page in a mapping range is designated a guard region (as installed via madvise() using the MADV_GUARD_INSTALL flag). This is generally not an issue, but in some instances users may wish to determine whether this is the case. This series adds this ability via /proc/$pid/pagemap, updates the documentation and adds a self test to assert that this functions correctly. Lorenzo Stoakes (2): fs/proc/task_mmu: add guard region bit to pagemap tools/selftests: add guard region test for /proc/$pid/pagemap Documentation/admin-guide/mm/pagemap.rst | 3 +- fs/proc/task_mmu.c | 6 ++- tools/testing/selftests/mm/guard-regions.c | 47 ++++++++++++++++++++++ tools/testing/selftests/mm/vm_util.h | 1 + 4 files changed, 55 insertions(+), 2 deletions(-) -- 2.48.1

7 months, 2 weeks

5
14
0 0

[PATCH bpf-next 0/2] selftests/bpf: Migrate test_xdp_vlan.sh into test_progs

by Bastien Curutchet (eBPF Foundation)

Hi all, This patch series continues the work to migrate the script tests into prog_tests. test_xdp_vlan.sh tests the ability of an XDP program to modify the VLAN ids on the fly. This isn't currently covered by an other test in the test_progs framework so I add a new file prog_tests/xdp_vlan.c that does the exact same tests (same network topology, same BPF programs) and remove the script. Signed-off-by: Bastien Curutchet (eBPF Foundation) <bastien.curutchet(a)bootlin.com> --- Bastien Curutchet (eBPF Foundation) (2): selftests/bpf: test_xdp_vlan: Rename BPF sections selftests/bpf: Migrate test_xdp_vlan.sh into test_progs tools/testing/selftests/bpf/Makefile | 4 +- tools/testing/selftests/bpf/prog_tests/xdp_vlan.c | 175 ++++++++++++++++ tools/testing/selftests/bpf/progs/test_xdp_vlan.c | 20 +- tools/testing/selftests/bpf/test_xdp_vlan.sh | 233 --------------------- .../selftests/bpf/test_xdp_vlan_mode_generic.sh | 9 - .../selftests/bpf/test_xdp_vlan_mode_native.sh | 9 - 6 files changed, 186 insertions(+), 264 deletions(-) --- base-commit: a814b9be27fb3c3f49343aee4b015b76f5875558 change-id: 20250130-xdp_vlan-e825cc4df14a Best regards, -- Bastien Curutchet (eBPF Foundation) <bastien.curutchet(a)bootlin.com>

7 months, 2 weeks

4
8
0 0

[PATCH] rseq/selftests: fix name clash with rseq UAPI header

by Michael Jeanson

When the rseq UAPI header is included 'union rseq' clashes with 'struct rseq', it's not the case in the rseq selftests but it does break the KVM selftests that also include this file. Rename 'union rseq' to 'union rseq_tls' to fix this. Fixes: e6644c967d3c ("rseq/selftests: Ensure the rseq ABI TLS is actually 1024 bytes") Reported-by: Mark Brown <broonie(a)kernel.org> Signed-off-by: Michael Jeanson <mjeanson(a)efficios.com> Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com> --- tools/testing/selftests/rseq/rseq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index 6d8997d815ef..663a9cef1952 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -75,13 +75,13 @@ static int rseq_ownership; * Use a union to ensure we allocate a TLS area of 1024 bytes to accomodate an * rseq registration that is larger than the current rseq ABI. */ -union rseq { +union rseq_tls { struct rseq_abi abi; char dummy[RSEQ_THREAD_AREA_ALLOC_SIZE]; }; static -__thread union rseq __rseq __attribute__((tls_model("initial-exec"))) = { +__thread union rseq_tls __rseq __attribute__((tls_model("initial-exec"))) = { .abi = { .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED, }, -- 2.43.0

7 months, 2 weeks

1
0
0 0

[PATCH v10 nf-next 0/3] conntrack: bridge: add double vlan, pppoe and pppoe-in-q

by Eric Woudstra

Conntrack bridge only tracks untagged and 802.1q. To make the bridge-fastpath experience more similar to the forward-fastpath experience, add double vlan, pppoe and pppoe-in-q tagged packets to bridge conntrack and to bridge filter chain. Split from patch-set: bridge-fastpath and related improvements v9 Eric Woudstra (3): netfilter: bridge: Add conntrack double vlan and pppoe netfilter: nft_chain_filter: Add bridge double vlan and pppoe selftests: netfilter: Add conntrack_bridge.sh net/bridge/netfilter/nf_conntrack_bridge.c | 83 +++++++-- net/netfilter/nft_chain_filter.c | 20 +- .../testing/selftests/net/netfilter/Makefile | 1 + .../net/netfilter/conntrack_bridge.sh | 176 ++++++++++++++++++ 4 files changed, 267 insertions(+), 13 deletions(-) create mode 100755 tools/testing/selftests/net/netfilter/conntrack_bridge.sh -- 2.47.1

7 months, 2 weeks

2
7
0 0

[PATCH net] selftests: drv-net: use defer in the ping test

by Jakub Kicinski

Make sure the test cleans up after itself. The XDP off statements at the end of the test may not be reached. Fixes: 75cc19c8ff89 ("selftests: drv-net: add xdp cases for ping.py") Signed-off-by: Jakub Kicinski <kuba(a)kernel.org> --- CC: shuah(a)kernel.org CC: ap420073(a)gmail.com CC: linux-kselftest(a)vger.kernel.org --- tools/testing/selftests/drivers/net/ping.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py index 93f4b411b378..fc69bfcc37c4 100755 --- a/tools/testing/selftests/drivers/net/ping.py +++ b/tools/testing/selftests/drivers/net/ping.py @@ -7,7 +7,7 @@ from lib.py import ksft_run, ksft_exit from lib.py import ksft_eq, KsftSkipEx, KsftFailEx from lib.py import EthtoolFamily, NetDrvEpEnv from lib.py import bkg, cmd, wait_port_listen, rand_port -from lib.py import ethtool, ip +from lib.py import defer, ethtool, ip remote_ifname="" no_sleep=False @@ -60,6 +60,7 @@ no_sleep=False prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) cmd(f"ip link set dev {cfg.ifname} mtu 1500 xdpgeneric obj {prog} sec xdp", shell=True) + defer(cmd, f"ip link set dev {cfg.ifname} xdpgeneric off") if no_sleep != True: time.sleep(10) @@ -68,7 +69,9 @@ no_sleep=False test_dir = os.path.dirname(os.path.realpath(__file__)) prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote) + defer(ip, f"link set dev {remote_ifname} mtu 1500", host=cfg.remote) ip("link set dev %s mtu 9000 xdpgeneric obj %s sec xdp.frags" % (cfg.ifname, prog)) + defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdpgeneric off") if no_sleep != True: time.sleep(10) @@ -78,6 +81,7 @@ no_sleep=False prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) cmd(f"ip -j link set dev {cfg.ifname} mtu 1500 xdp obj {prog} sec xdp", shell=True) + defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off") xdp_info = ip("-d link show %s" % (cfg.ifname), json=True)[0] if xdp_info['xdp']['mode'] != 1: """ @@ -94,10 +98,11 @@ no_sleep=False test_dir = os.path.dirname(os.path.realpath(__file__)) prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote) + defer(ip, f"link set dev {remote_ifname} mtu 1500", host=cfg.remote) try: cmd(f"ip link set dev {cfg.ifname} mtu 9000 xdp obj {prog} sec xdp.frags", shell=True) + defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off") except Exception as e: - cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) raise KsftSkipEx('device does not support native-multi-buffer XDP') if no_sleep != True: @@ -111,6 +116,7 @@ no_sleep=False cmd(f"ip link set dev {cfg.ifname} xdpoffload obj {prog} sec xdp", shell=True) except Exception as e: raise KsftSkipEx('device does not support offloaded XDP') + defer(ip, f"link set dev {cfg.ifname} xdpoffload off") cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) if no_sleep != True: @@ -157,7 +163,6 @@ no_sleep=False _test_v4(cfg) _test_v6(cfg) _test_tcp(cfg) - ip("link set dev %s xdpgeneric off" % cfg.ifname) def test_xdp_generic_mb(cfg, netnl) -> None: _set_xdp_generic_mb_on(cfg) @@ -169,7 +174,6 @@ no_sleep=False _test_v4(cfg) _test_v6(cfg) _test_tcp(cfg) - ip("link set dev %s xdpgeneric off" % cfg.ifname) def test_xdp_native_sb(cfg, netnl) -> None: _set_xdp_native_sb_on(cfg) @@ -181,7 +185,6 @@ no_sleep=False _test_v4(cfg) _test_v6(cfg) _test_tcp(cfg) - ip("link set dev %s xdp off" % cfg.ifname) def test_xdp_native_mb(cfg, netnl) -> None: _set_xdp_native_mb_on(cfg) @@ -193,14 +196,12 @@ no_sleep=False _test_v4(cfg) _test_v6(cfg) _test_tcp(cfg) - ip("link set dev %s xdp off" % cfg.ifname) def test_xdp_offload(cfg, netnl) -> None: _set_xdp_offload_on(cfg) _test_v4(cfg) _test_v6(cfg) _test_tcp(cfg) - ip("link set dev %s xdpoffload off" % cfg.ifname) def main() -> None: with NetDrvEpEnv(__file__) as cfg: @@ -213,7 +214,6 @@ no_sleep=False test_xdp_native_mb, test_xdp_offload], args=(cfg, EthtoolFamily())) - set_interface_init(cfg) ksft_exit() -- 2.48.1

7 months, 2 weeks

3
2
0 0

[PATCH v4 00/12] Direct Map Removal for guest_memfd

by Patrick Roy

Unmapping virtual machine guest memory from the host kernel's direct map is a successful mitigation against Spectre-style transient execution issues: If the kernel page tables do not contain entries pointing to guest memory, then any attempted speculative read through the direct map will necessarily be blocked by the MMU before any observable microarchitectural side-effects happen. This means that Spectre-gadgets and similar cannot be used to target virtual machine memory. Roughly 60% of speculative execution issues fall into this category [1, Table 1]. This patch series extends guest_memfd with the ability to remove its memory from the host kernel's direct map, to be able to attain the above protection for KVM guests running inside guest_memfd. === Changes to RFC v3 === - Settle relationship between direct map removal and shared/private memory in guest_memfd (David H.) - Omit TLB flushes upon direct map removal again - Settle uABI for how KVM accesses guest memory in non-CoCo guest_memfd VMs (upstream guest_memfd calls) - Add selftests that exercise the codepaths of non-CoCo guest_memfd VMs Lastly, this series is rebased on top of Fuad's v4 for shared mapping of guest_memfd [2]. The KVM parts should also apply on top of 0ad2507d5d93 ("Linux 6.14-rc3"), but the selftest patches need Fuad's series as base. === Overview === guest_memfd should be usable for "non-CoCo" VMs - virtual machines where host userspace is trusted (e.g. can have access to all of guest memory), but which should still be hardened against speculative execution attacks (Spectre, etc.) staged through potentially existing gadgets in the host kernel. To attain this hardening, unmap guest memory from the host kernels address space (e.g. zap direct map entries), while allowing KVM to continue accessing guest memory through userspace mappings. This works because KVM already almost always uses userspace mappings whenever KVM needs to access guest memory - the only parts that require direct map entries (because they use GUP) are KVM's MMU, and kvm-clock on x86. Building on top of guest_memfd sidesteps the MMU problem, as for memslots with KVM_MEM_GUEST_MEMFD set, the MMU consumes fd + offset directly without going through any VMAs. kvm-clock on the other hand is not strictly needed (guests boot fine without it), so ignore it for now. === Implementation === Make KVM_CREATE_GUEST_MEMFD accept a flag (KVM_GMEM_NO_DIRECT_MAP) that instructs it to remove newly allocated folios from the host kernels direct map immediately after preparation. Nothing further is needed to make non-CoCo VMs work - particularly, KVM does not need to be taught any special ways of accessing guest memory if it is in guest_memfd. Userspace can simply mmap guest_memfd (via KVM_GMEM_SHARED_MEM added in Fuad's series), and set the memslot's userspace_addr to this userspace mapping of guest_memfd. === Open Questions === In this patch series, stale TLB entries do not get flushed after direct map entries are marked as not present. This is fine from a functional point of view (as the mapping is still valid, it's just temporarily not supposed to be used), but pokes a theoretical hole into the speculation protection: Something could try to keep alive stale TLB entries for specific pages until the guest starts using them for sensitive information, and then stage a Spectre attack on that memory, despite it being unmapped. In practice, this would require knowing in advance, at gmem fault-time, which pages will eventually contain information of interest, and then preventing these specific TLB entries from getting naturally evicted (where the number of pages that can be targeted like this is limited by the size of the TLB). These seem to be fairly difficult requisites to fulfill, but we were wondering what the community thinks. === Summary === Patch 1 adds a struct address_space flag that indices that folios in a mapping are direct map removed, and threads it through mm code to ensure direct map removed folios don't end up in places where they can cause mayhem (particularly, we reject them in get_user_pages). Since these checks end up being duplicates of already existing checks for secretmem folios, patch 2 unifies the two by using the new address_space flag for secretmem mappings. Patches 3 through 5 are about support for direct map removal in guest_memfd, while patches 6 through 12 are about testing the non-CoCo setup in KVM selftests, with patches 6 through 9 being preparatory, and patches 10 through 12 adding the actual test cases. [1]: https://download.vusec.net/papers/quarantine_raid23.pdf [2]: https://lore.kernel.org/kvm/20250218172500.807733-1-tabba@google.com/ [RFC v1]: https://lore.kernel.org/kvm/20240709132041.3625501-1-roypat@amazon.co.uk/ [RFC v2]: https://lore.kernel.org/kvm/20240910163038.1298452-1-roypat@amazon.co.uk/ [RFC v3]: https://lore.kernel.org/kvm/20241030134912.515725-1-roypat@amazon.co.uk/ Patrick Roy (12): mm: introduce AS_NO_DIRECT_MAP mm/secretmem: set AS_NO_DIRECT_MAP instead of special-casing KVM: guest_memfd: Add flag to remove from direct map KVM: Add capability to discover KVM_GMEM_NO_DIRECT_MAP support KVM: Documentation: document KVM_GMEM_NO_DIRECT_MAP flag KVM: selftests: load elf via bounce buffer KVM: selftests: set KVM_MEM_GUEST_MEMFD in vm_mem_add() if guest_memfd != -1 KVM: selftests: Add guest_memfd based vm_mem_backing_src_types KVM: selftests: stuff vm_mem_backing_src_type into vm_shape KVM: selftests: adjust test_create_guest_memfd_invalid KVM: selftests: set KVM_GMEM_NO_DIRECT_MAP in mem conversion tests KVM: selftests: Test guest execution from direct map removed gmem Documentation/virt/kvm/api.rst | 13 ++++ include/linux/pagemap.h | 16 +++++ include/linux/secretmem.h | 18 ------ include/uapi/linux/kvm.h | 3 + lib/buildid.c | 4 +- mm/gup.c | 14 +--- mm/mlock.c | 2 +- mm/secretmem.c | 6 +- .../testing/selftests/kvm/guest_memfd_test.c | 2 +- .../testing/selftests/kvm/include/kvm_util.h | 29 ++++++--- .../testing/selftests/kvm/include/test_util.h | 8 +++ tools/testing/selftests/kvm/lib/elf.c | 8 +-- tools/testing/selftests/kvm/lib/io.c | 23 +++++++ tools/testing/selftests/kvm/lib/kvm_util.c | 64 +++++++++++-------- tools/testing/selftests/kvm/lib/test_util.c | 8 +++ tools/testing/selftests/kvm/lib/x86/sev.c | 1 + .../selftests/kvm/pre_fault_memory_test.c | 1 + .../selftests/kvm/set_memory_region_test.c | 40 ++++++++++++ .../kvm/x86/private_mem_conversions_test.c | 7 +- virt/kvm/guest_memfd.c | 24 ++++++- virt/kvm/kvm_main.c | 5 ++ 21 files changed, 214 insertions(+), 82 deletions(-) base-commit: da40655874b54a2b563f8ceb3ed839c6cd38e0b4 -- 2.48.1

7 months, 2 weeks

3
25
0 0

[PATCH v1 1/3] selftests/mm: Fix half_ufd_size_MB calculation

by Ryan Roberts

$half_ufd_size_MB is supposed to be half of the available hugetlb memory expressed in MB. But previously it was calculated in pages since $freepgs is the number of free pages. When huge pages are 2M it doesn't make a whole lot of difference; the number of pages that get used is just halved. But on arm64 with 16K or 64K base pages, the PMD size (and default hugetlb size) is 32M and 512M respectively. So in this case we end up passing a number of MB that is smaller than a single hugetlb page and the test raises an error. Fixes: 2e47a445d7b3 ("selftests/mm: run_vmtests.sh: fix hugetlb mem size calculation") Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com> --- tools/testing/selftests/mm/run_vmtests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index da7e26668103..14fa9d40d574 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -304,7 +304,7 @@ uffd_stress_bin=./uffd-stress CATEGORY="userfaultfd" run_test ${uffd_stress_bin} anon 20 16 # Hugetlb tests require source and destination huge pages. Pass in half # the size of the free pages we have, which is used for *each*. -half_ufd_size_MB=$((freepgs / 2)) +half_ufd_size_MB=$(((freepgs * hpgsize_KB / 2) / 1024)) CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb "$half_ufd_size_MB" 32 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb-private "$half_ufd_size_MB" 32 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem 20 16 -- 2.43.0

7 months, 2 weeks

3
9
0 0

[PATCH] rseq/selftests: ensure the rseq abi TLS is actually 1024 bytes

by Michael Jeanson

Adding the aligned(1024) attribute to the definition of __rseq_abi did not increase its size to 1024, for this attribute to impact the size of __rseq_abi it would need to be added to the declaration of 'struct rseq_abi'. We only want to increase the size of the TLS allocation to ensure registration will succeed with future extended ABI. Use a union with a dummy member to ensure we allocate 1024 bytes. Signed-off-by: Michael Jeanson <mjeanson(a)efficios.com> Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com> --- tools/testing/selftests/rseq/rseq.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index f6156790c3b4..aa9ae866bc1a 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -71,9 +71,20 @@ static int rseq_ownership; /* Original struct rseq allocation size is 32 bytes. */ #define ORIG_RSEQ_ALLOC_SIZE 32 +/* + * Use a union to ensure we allocate a TLS area of 1024 bytes to accomodate an + * rseq registration that is larger than the current rseq ABI. + */ +union rseq { + struct rseq_abi abi; + char dummy[RSEQ_THREAD_AREA_ALLOC_SIZE]; +}; + static -__thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"), aligned(RSEQ_THREAD_AREA_ALLOC_SIZE))) = { - .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED, +__thread union rseq __rseq __attribute__((tls_model("initial-exec"))) = { + .abi = { + .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED, + }, }; static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len, @@ -149,7 +160,7 @@ int rseq_register_current_thread(void) /* Treat libc's ownership as a successful registration. */ return 0; } - rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), 0, RSEQ_SIG); + rc = sys_rseq(&__rseq.abi, get_rseq_min_alloc_size(), 0, RSEQ_SIG); if (rc) { /* * After at least one thread has registered successfully @@ -183,7 +194,7 @@ int rseq_unregister_current_thread(void) /* Treat libc's ownership as a successful unregistration. */ return 0; } - rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); + rc = sys_rseq(&__rseq.abi, get_rseq_min_alloc_size(), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); if (rc) return -1; return 0; @@ -249,7 +260,7 @@ void rseq_init(void) rseq_ownership = 1; /* Calculate the offset of the rseq area from the thread pointer. */ - rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer(); + rseq_offset = (void *)&__rseq.abi - rseq_thread_pointer(); /* rseq flags are deprecated, always set to 0. */ rseq_flags = 0; -- 2.43.0

7 months, 2 weeks

2
3
0 0

[PATCH 00/13] selftests/bpf: Integrate test_xsk.c to test_progs framework

by Bastien Curutchet (eBPF Foundation)

Hi all, This patch series continues the work to migrate the script tests into prog_tests. The test_xsk.sh script tests lots of AF_XDP use cases. The tests it uses are defined in xksxceiver.c. As this script is used to test real hardware, the goal here is to keep it as is and only integrate the tests on veth peers into the test_progs framework. Three tests are flaky on s390 so they won't be integrated to test_progs yet (I'm currently trying to make them more robust). PATCH 1 & 2 fix some small issues xskxceiver.c PATCH 3 to 9 rework the xskxceiver to ease the integration in the test_progs framework. Two main points are addressed in them : - wrap kselftest calls behind macros to ease their replacement later - handle all errors to release resources instead of calling exit() when any error occurs. PATCH 10 extracts test_xsk[.c/.h] from xskxceiver[.c/.h] to make the tests available to test_progs PATCH 11 enables kselftest de-activation PATCH 12 isolates the flaky tests PATCH 13 integrate the non-flaky tests to the test_progs framework Signed-off-by: Bastien Curutchet (eBPF Foundation) <bastien.curutchet(a)bootlin.com> --- Bastien Curutchet (eBPF Foundation) (13): selftests/bpf: test_xsk: Initialize bitmap before use selftests/bpf: test_xsk: Fix memory leaks selftests/bpf: test_xsk: Wrap ksft_*() behind macros selftests/bpf: test_xsk: Add return value to init_iface() selftests/bpf: test_xsk: Don't exit immediately when xsk_attach fails selftests/bpf: test_xsk: Don't exit immediately when gettimeofday fails selftests/bpf: test_xsk: Don't exit immediately when workers fail selftests/bpf: test_xsk: Don't exit immediately if validate_traffic fails selftests/bpf: test_xsk: Don't exit immediately on allocation failures selftests/bpf: test_xsk: Split xskxceiver selftests/bpf: test_xsk: Make kselftest dependency optional selftests/bpf: test_xsk: Isolate flaky tests selftests/bpf: test_xsk: Integrate test_xsk.c to test_progs framework tools/testing/selftests/bpf/Makefile | 13 +- tools/testing/selftests/bpf/prog_tests/test_xsk.c | 2416 ++++++++++++++++++++ tools/testing/selftests/bpf/prog_tests/test_xsk.h | 299 +++ tools/testing/selftests/bpf/prog_tests/xsk.c | 178 ++ tools/testing/selftests/bpf/xskxceiver.c | 2543 +-------------------- tools/testing/selftests/bpf/xskxceiver.h | 153 -- 6 files changed, 3021 insertions(+), 2581 deletions(-) --- base-commit: 720c696b16a1b1680f64cac9b3bb9e312a23ac47 change-id: 20250218-xsk-0cf90e975d14 Best regards, -- Bastien Curutchet (eBPF Foundation) <bastien.curutchet(a)bootlin.com>

7 months, 2 weeks

3
18
0 0

[PATCH 1/2] selftests/pid_namespace: fix building with clang-20

by Dmitry Antipov

When using 'make LLVM=1 W=1 -C tools/testing/selftests/pid_namespace' with clang-20, I've noticed the following: pid_max.c:42:8: error: call to undeclared function 'mount'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] 42 | ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0); | ^ pid_max.c:42:29: error: use of undeclared identifier 'MS_PRIVATE' 42 | ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0); | ^ ... So include '<sys/mount.h>' to add all of the required declarations. Signed-off-by: Dmitry Antipov <dmantipov(a)yandex.ru> --- tools/testing/selftests/pid_namespace/pid_max.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/pid_namespace/pid_max.c b/tools/testing/selftests/pid_namespace/pid_max.c index 51c414faabb0..972bedc475f1 100644 --- a/tools/testing/selftests/pid_namespace/pid_max.c +++ b/tools/testing/selftests/pid_namespace/pid_max.c @@ -11,6 +11,7 @@ #include <string.h> #include <syscall.h> #include <sys/wait.h> +#include <sys/mount.h> #include "../kselftest_harness.h" #include "../pidfd/pidfd.h" -- 2.48.1

7 months, 2 weeks

2
2
0 0

[PATCH v4 iproute2-next 0/1] DualPI2 iproute2 patch

by chia-yu.chang＠nokia-bell-labs.com

From: Chia-Yu Chang <chia-yu.chang(a)nokia-bell-labs.com> Hello, Please find DUALPI2 iproute2 patch v4. v4 - Add min_qlen_step to dualpi2 attribute as the minimum queue length in number of packets in the L-queue to start step amrking. v3 - Add memlimit to dualpi2 attribute, and add memory_used, max_memory_used, memory_limit in dualpi2 stats (Dave Taht <dave.taht(a)gmail.com>) - Update manual to align latest implementation and clarify the queue naming and default unit - Use common "get_scaled_alpha_beta" and clean print_opt for Dualpi2 v2 - Rename get_float in dualpi2 to get_float_min_max in utils.c - Move get_float from iplink_can.c in utils.c (Stephen Hemminger <stephen(a)networkplumber.org>) - Add print function for JSON of dualpi2 (Stephen Hemminger <stephen(a)networkplumber.org>) For more details of DualPI2, plesae refer IETF RFC9332 (https://datatracker.ietf.org/doc/html/rfc9332). Best Regards, Chia-Yu Chia-Yu Chang (1): tc: add dualpi2 scheduler module bash-completion/tc | 11 +- include/uapi/linux/pkt_sched.h | 39 +++ include/utils.h | 2 + ip/iplink_can.c | 14 - lib/utils.c | 30 ++ man/man8/tc-dualpi2.8 | 249 ++++++++++++++++ tc/Makefile | 1 + tc/q_dualpi2.c | 508 +++++++++++++++++++++++++++++++++ 8 files changed, 839 insertions(+), 15 deletions(-) create mode 100644 man/man8/tc-dualpi2.8 create mode 100644 tc/q_dualpi2.c -- 2.34.1

7 months, 2 weeks

3
7
0 0

[PATCH 0/2] selftests: livepatch: test if ftrace can trace a livepatched function

by Filipe Xavier

This patchset add ftrace helpers functions and add a new test makes sure that ftrace can trace a function that was introduced by a livepatch. Signed-off-by: Filipe Xavier <felipeaggger(a)gmail.com> --- Filipe Xavier (2): selftests: livepatch: add new ftrace helpers functions selftests: livepatch: test if ftrace can trace a livepatched function tools/testing/selftests/livepatch/functions.sh | 45 ++++++++++++++++++++++++ tools/testing/selftests/livepatch/test-ftrace.sh | 35 ++++++++++++++++++ 2 files changed, 80 insertions(+) --- base-commit: 848e076317446f9c663771ddec142d7c2eb4cb43 change-id: 20250306-ftrace-sftest-livepatch-60d9dc472235 Best regards, -- Filipe Xavier <felipeaggger(a)gmail.com>

7 months, 2 weeks

3
7
0 0

[PATCH] tools/nolibc: Add support for SPARC

by Thomas Weißschuh

Add support for 32bit and 64bit SPARC to nolibc. Signed-off-by: Thomas Weißschuh <linux(a)weissschuh.net> --- This is only tested on QEMU. Any tests on real hardware would be very welcome. --- tools/include/nolibc/arch-sparc.h | 191 ++++++++++++++++++++++++++++ tools/include/nolibc/arch.h | 2 + tools/testing/selftests/nolibc/Makefile | 11 ++ tools/testing/selftests/nolibc/run-tests.sh | 2 + 4 files changed, 206 insertions(+) diff --git a/tools/include/nolibc/arch-sparc.h b/tools/include/nolibc/arch-sparc.h new file mode 100644 index 0000000000000000000000000000000000000000..cb5543eca87bb4d52cfba4c0668e35cbbf6dd124 --- /dev/null +++ b/tools/include/nolibc/arch-sparc.h @@ -0,0 +1,191 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * SPARC (32bit and 64bit) specific definitions for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh <linux(a)weissschuh.net> + */ + +#ifndef _NOLIBC_ARCH_SPARC_H +#define _NOLIBC_ARCH_SPARC_H + +#include <linux/unistd.h> + +#include "compiler.h" +#include "crt.h" + +/* + * Syscalls for SPARC: + * - registers are native word size + * - syscall number is passed in g1 + * - arguments are in o0-o5 + * - the system call is performed by calling a trap instruction + * - syscall return value is in 0a + * - syscall error flag is in the carry bit of the processor status register + */ + +#ifdef __arch64__ + +#define _NOLIBC_SYSCALL "t 0x6d\n" \ + "bcs,a %%xcc, 1f\n" \ + "sub %%g0, %%o0, %%o0\n" \ + "1:\n" + +#else + +#define _NOLIBC_SYSCALL "t 0x10\n" \ + "bcs,a 1f\n" \ + "sub %%g0, %%o0, %%o0\n" \ + "1:\n" + +#endif /* __arch64__ */ + +#define my_syscall0(num) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0"); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + register long _arg3 __asm__ ("o2") = (long)(arg3); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + register long _arg3 __asm__ ("o2") = (long)(arg3); \ + register long _arg4 __asm__ ("o3") = (long)(arg4); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + register long _arg3 __asm__ ("o2") = (long)(arg3); \ + register long _arg4 __asm__ ("o3") = (long)(arg4); \ + register long _arg5 __asm__ ("o4") = (long)(arg5); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + register long _arg3 __asm__ ("o2") = (long)(arg3); \ + register long _arg4 __asm__ ("o3") = (long)(arg4); \ + register long _arg5 __asm__ ("o4") = (long)(arg5); \ + register long _arg6 __asm__ ("o5") = (long)(arg6); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +/* startup code */ +void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) +{ + __asm__ volatile ( + /* + * Save stack pointer to o0, as arg1 of _start_c. + * Account for window save area and stack bias. + */ +#ifdef __arch64__ + "add %sp, 128 + 2047, %o0\n" +#else + "add %sp, 64, %o0\n" +#endif + "b,a _start_c\n" /* transfer to c runtime */ + ); + __nolibc_entrypoint_epilogue(); +} + +static pid_t getpid(void); + +static __attribute__((unused)) +pid_t sys_fork(void) +{ + pid_t parent, ret; + + parent = getpid(); + ret = my_syscall0(__NR_fork); + + /* The syscall returns the parent pid in the child instead of 0 */ + if (ret == parent) + return 0; + else + return ret; +} +#define sys_fork sys_fork + +#endif /* _NOLIBC_ARCH_SPARC_H */ diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h index 8a2c143c0fba288147e5a7bf9db38ffb08367616..b8c1da9a88d1593d5a97f60909ede5d0c17699eb 100644 --- a/tools/include/nolibc/arch.h +++ b/tools/include/nolibc/arch.h @@ -33,6 +33,8 @@ #include "arch-s390.h" #elif defined(__loongarch__) #include "arch-loongarch.h" +#elif defined(__sparc__) +#include "arch-sparc.h" #else #error Unsupported Architecture #endif diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index 58bcbbd029bc3ad9ccac968191b703ccf5df0717..5060e189dc842d761dd13d70b8afdb2ff3390bc5 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -56,6 +56,8 @@ ARCH_mips32be = mips ARCH_riscv32 = riscv ARCH_riscv64 = riscv ARCH_s390x = s390 +ARCH_sparc32 = sparc +ARCH_sparc64 = sparc ARCH := $(or $(ARCH_$(XARCH)),$(XARCH)) # kernel image names by architecture @@ -76,6 +78,8 @@ IMAGE_riscv64 = arch/riscv/boot/Image IMAGE_s390x = arch/s390/boot/bzImage IMAGE_s390 = arch/s390/boot/bzImage IMAGE_loongarch = arch/loongarch/boot/vmlinuz.efi +IMAGE_sparc32 = arch/sparc/boot/image +IMAGE_sparc64 = arch/sparc/boot/image IMAGE = $(objtree)/$(IMAGE_$(XARCH)) IMAGE_NAME = $(notdir $(IMAGE)) @@ -97,6 +101,8 @@ DEFCONFIG_riscv64 = defconfig DEFCONFIG_s390x = defconfig DEFCONFIG_s390 = defconfig compat.config DEFCONFIG_loongarch = defconfig +DEFCONFIG_sparc32 = sparc32_defconfig +DEFCONFIG_sparc64 = sparc64_defconfig DEFCONFIG = $(DEFCONFIG_$(XARCH)) EXTRACONFIG = $(EXTRACONFIG_$(XARCH)) @@ -122,6 +128,8 @@ QEMU_ARCH_riscv64 = riscv64 QEMU_ARCH_s390x = s390x QEMU_ARCH_s390 = s390x QEMU_ARCH_loongarch = loongarch64 +QEMU_ARCH_sparc32 = sparc +QEMU_ARCH_sparc64 = sparc64 QEMU_ARCH = $(QEMU_ARCH_$(XARCH)) QEMU_ARCH_USER_ppc64le = ppc64le @@ -152,6 +160,8 @@ QEMU_ARGS_riscv64 = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_T QEMU_ARGS_s390x = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_s390 = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_loongarch = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_sparc32 = -M SS-5 -m 256M -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_sparc64 = -M sun4u -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS = -m 1G $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_BIOS) $(QEMU_ARGS_EXTRA) # OUTPUT is only set when run from the main makefile, otherwise @@ -174,6 +184,7 @@ CFLAGS_s390x = -m64 CFLAGS_s390 = -m31 CFLAGS_mips32le = -EL -mabi=32 -fPIC CFLAGS_mips32be = -EB -mabi=32 +CFLAGS_sparc32 = $(call cc-option,-m32) CFLAGS_STACKPROTECTOR ?= $(call cc-option,-mstack-protector-guard=global $(call cc-option,-fstack-protector-all)) CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wextra \ $(call cc-option,-fno-stack-protector) $(call cc-option,-Wmissing-prototypes) \ diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh index 0299a0912d4049dd12217f9835b81d231e1d2bfd..040956a9f5b8dda3e78abc0d4b6073f4fcd9e3ee 100755 --- a/tools/testing/selftests/nolibc/run-tests.sh +++ b/tools/testing/selftests/nolibc/run-tests.sh @@ -25,6 +25,7 @@ all_archs=( riscv32 riscv64 s390x s390 loongarch + sparc32 sparc64 ) archs="${all_archs[@]}" @@ -111,6 +112,7 @@ crosstool_arch() { loongarch) echo loongarch64;; mips*) echo mips;; s390*) echo s390;; + sparc*) echo sparc64;; *) echo "$1";; esac } --- base-commit: bceb73904c855c78402dca94c82915f078f259dd change-id: 20250226-nolibc-sparc-abf4775dc813 Best regards, -- Thomas Weißschuh <linux(a)weissschuh.net>

7 months, 2 weeks

3
5
0 0

[PATCH v9 00/14] iommufd: Add vIOMMU infrastructure (Part-3: vEVENTQ)

by Nicolin Chen

As the vIOMMU infrastructure series part-3, this introduces a new vEVENTQ object. The existing FAULT object provides a nice notification pathway to the user space with a queue already, so let vEVENTQ reuse that. Mimicing the HWPT structure, add a common EVENTQ structure to support its derivatives: IOMMUFD_OBJ_FAULT (existing) and IOMMUFD_OBJ_VEVENTQ (new). An IOMMUFD_CMD_VEVENTQ_ALLOC is introduced to allocate vEVENTQ object for vIOMMUs. One vIOMMU can have multiple vEVENTQs in different types but can not support multiple vEVENTQs in the same type. The forwarding part is fairly simple but might need to replace a physical device ID with a virtual device ID in a driver-level event data structure. So, this also adds some helpers for drivers to use. As usual, this series comes with the selftest coverage for this new ioctl and with a real world use case in the ARM SMMUv3 driver. This is on Github: https://github.com/nicolinc/iommufd/commits/iommufd_veventq-v9 Paring QEMU branch for testing: https://github.com/nicolinc/qemu/commits/wip/for_iommufd_veventq-v9 Changelog v9 * Add Acked-by from Will * Fix typo in commit logs and reviewer name * Allow invaid nested STE for C_BAD_STE report * Drop extra indentation in arm_smmu_handle_event() * Drop comments in arm_smmu_attach_prepare_vmaster() v8 https://lore.kernel.org/all/cover.1740504232.git.nicolinc@nvidia.com/ * Add Reviewed-by from Jason and Pranjal * Fix errno returned in arm_smmu_handle_event() * Validate domain->type outside of arm_smmu_attach_prepare_vmaster() * Drop unnecessary vmaster comparison in arm_smmu_attach_commit_vmaster() v7 https://lore.kernel.org/all/cover.1740238876.git.nicolinc@nvidia.com/ * Rebase on Jason's for-next tree for latest fault.c * Add Reviewed-by * Update commit logs * Add __reserved field sanity * Skip kfree() on the static header * Replace "bool on_list" with list_is_last() * Use u32 for flags in iommufd_vevent_header * Drop casting in iommufd_viommu_get_vdev_id() * Update the bounding logic to veventq->sequence * Add missing cpu_to_le64() around STRTAB_STE_1_MEV * Reuse veventq->common.lock to fence sequence and num_events * Rename overflow to lost_events and log it in upon kmalloc failure * Correct the error handling part in iommufd_veventq_deliver_fetch() * Add an arm_smmu_clear_vmaster() to simplify identity/blocked domain attach ops * Add additional four event records to forward to user space VM, and update the uAPI doc * Reuse the existing smmu->streams_mutex lock to fence master->vmaster pointer, instead of adding a new rwsem v6 https://lore.kernel.org/all/cover.1737754129.git.nicolinc@nvidia.com/ * Drop supports_veventq viommu op * Split bug/cosmetics fixes out of the series * Drop the blocking mutex around copy_to_user() * Add veventq_depth in uAPI to limit vEVENTQ size * Revise the documentation for a clear description * Fix sparse warnings in arm_vmaster_report_event() * Rework iommufd_viommu_get_vdev_id() to return -ENOENT v.s. 0 * Allow Abort/Bypass STEs to allocate vEVENTQ and set STE.MEV for DoS mitigations v5 https://lore.kernel.org/all/cover.1736237481.git.nicolinc@nvidia.com/ * Add Reviewed-by from Baolu * Reorder the OBJ list as well * Fix alphabetical order after renaming in v4 * Add supports_veventq viommu op for vEVENTQ type validation v4 https://lore.kernel.org/all/cover.1735933254.git.nicolinc@nvidia.com/ * Rename "vIRQ" to "vEVENTQ" * Use flexible array in struct iommufd_vevent * Add the new ioctl command to union ucmd_buffer * Fix the alphabetical order in union ucmd_buffer too * Rename _TYPE_NONE to _TYPE_DEFAULT aligning with vIOMMU naming v3 https://lore.kernel.org/all/cover.1734477608.git.nicolinc@nvidia.com/ * Rebase on Will's for-joerg/arm-smmu/updates for arm_smmu_event series * Add "Reviewed-by" lines from Kevin * Fix typos in comments, kdocs, and jump tags * Add a patch to sort struct iommufd_ioctl_op * Update iommufd's userpsace-api documentation * Update uAPI kdoc to quote SMMUv3 offical spec * Drop the unused workqueue in struct iommufd_virq * Drop might_sleep() in iommufd_viommu_report_irq() helper * Add missing "break" in iommufd_viommu_get_vdev_id() helper * Shrink the scope of the vmaster's read lock in SMMUv3 driver * Pass in two arguments to iommufd_eventq_virq_handler() helper * Move "!ops || !ops->read" validation into iommufd_eventq_init() * Move "fault->ictx = ictx" closer to iommufd_ctx_get(fault->ictx) * Update commit message for arm_smmu_attach_prepare/commit_vmaster() * Keep "iommufd_fault" as-is and rename "iommufd_eventq_virq" to just "iommufd_virq" v2 https://lore.kernel.org/all/cover.1733263737.git.nicolinc@nvidia.com/ * Rebase on v6.13-rc1 * Add IOPF and vIRQ in iommufd.rst (userspace-api) * Add a proper locking in iommufd_event_virq_destroy * Add iommufd_event_virq_abort with a lockdep_assert_held * Rename "EVENT_*" to "EVENTQ_*" to describe the objects better * Reorganize flows in iommufd_eventq_virq_alloc for abort() to work * Adde struct arm_smmu_vmaster to store vSID upon attaching to a nested domain, calling a newly added iommufd_viommu_get_vdev_id helper * Adde an arm_vmaster_report_event helper in arm-smmu-v3-iommufd file to simplify the routine in arm_smmu_handle_evt() of the main driver v1 https://lore.kernel.org/all/cover.1724777091.git.nicolinc@nvidia.com/ Thanks! Nicolin Nicolin Chen (14): iommufd/fault: Move two fault functions out of the header iommufd/fault: Add an iommufd_fault_init() helper iommufd: Abstract an iommufd_eventq from iommufd_fault iommufd: Rename fault.c to eventq.c iommufd: Add IOMMUFD_OBJ_VEVENTQ and IOMMUFD_CMD_VEVENTQ_ALLOC iommufd/viommu: Add iommufd_viommu_get_vdev_id helper iommufd/viommu: Add iommufd_viommu_report_event helper iommufd/selftest: Require vdev_id when attaching to a nested domain iommufd/selftest: Add IOMMU_TEST_OP_TRIGGER_VEVENT for vEVENTQ coverage iommufd/selftest: Add IOMMU_VEVENTQ_ALLOC test coverage Documentation: userspace-api: iommufd: Update FAULT and VEVENTQ iommu/arm-smmu-v3: Introduce struct arm_smmu_vmaster iommu/arm-smmu-v3: Report events that belong to devices attached to vIOMMU iommu/arm-smmu-v3: Set MEV bit in nested STE for DoS mitigations drivers/iommu/iommufd/Makefile | 2 +- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 36 ++ drivers/iommu/iommufd/iommufd_private.h | 135 +++- drivers/iommu/iommufd/iommufd_test.h | 10 + include/linux/iommufd.h | 23 + include/uapi/linux/iommufd.h | 105 +++ tools/testing/selftests/iommu/iommufd_utils.h | 115 ++++ .../arm/arm-smmu-v3/arm-smmu-v3-iommufd.c | 60 ++ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 80 ++- drivers/iommu/iommufd/driver.c | 72 +++ drivers/iommu/iommufd/eventq.c | 597 ++++++++++++++++++ drivers/iommu/iommufd/fault.c | 342 ---------- drivers/iommu/iommufd/hw_pagetable.c | 6 +- drivers/iommu/iommufd/main.c | 7 + drivers/iommu/iommufd/selftest.c | 54 ++ drivers/iommu/iommufd/viommu.c | 2 + tools/testing/selftests/iommu/iommufd.c | 36 ++ .../selftests/iommu/iommufd_fail_nth.c | 7 + Documentation/userspace-api/iommufd.rst | 17 + 19 files changed, 1298 insertions(+), 408 deletions(-) create mode 100644 drivers/iommu/iommufd/eventq.c delete mode 100644 drivers/iommu/iommufd/fault.c base-commit: a05df03a88bc1088be8e9d958f208d6484691e43 -- 2.43.0

7 months, 2 weeks

2
15
0 0

[PATCH] selftests/bpf: Replace deprecated strncpy() with strscpy()

by feng.wei8＠zte.com.cn

From: FengWei <feng.wei8(a)zte.com.cn> strncpy() is deprecated for NUL-terminated destination buffers. Use strscpy() instead and remove the manual NUL-termination. Signed-off-by: FengWei <feng.wei8(a)zte.com.cn> --- tools/testing/selftests/bpf/test_verifier.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 447b68509d76..dfe64c6d4f87 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1320,8 +1320,7 @@ static bool cmp_str_seq(const char *log, const char *exp) printf("FAIL\nTestcase bug\n"); return false; } - strncpy(needle, exp, len); - needle[len] = 0; + strscpy(needle, exp, len); q = strstr(log, needle); if (!q) { printf("FAIL\nUnexpected verifier log!\n" -- 2.25.1

7 months, 2 weeks

2
1
0 0

[PATCH AUTOSEL 6.12 12/13] selftests: netfilter: skip br_netfilter queue tests if kernel is tainted

by Sasha Levin

From: Florian Westphal <fw(a)strlen.de> [ Upstream commit c21b02fd9cbf15aed6e32c89e0fd70070281e3d1 ] These scripts fail if the kernel is tainted which leads to wrong test failure reports in CI environments when an unrelated test triggers some splat. Check taint state at start of script and SKIP if its already dodgy. Signed-off-by: Florian Westphal <fw(a)strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo(a)netfilter.org> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/net/netfilter/br_netfilter.sh | 7 +++++++ .../testing/selftests/net/netfilter/br_netfilter_queue.sh | 7 +++++++ tools/testing/selftests/net/netfilter/nft_queue.sh | 1 + 3 files changed, 15 insertions(+) diff --git a/tools/testing/selftests/net/netfilter/br_netfilter.sh b/tools/testing/selftests/net/netfilter/br_netfilter.sh index c28379a965d83..1559ba275105e 100755 --- a/tools/testing/selftests/net/netfilter/br_netfilter.sh +++ b/tools/testing/selftests/net/netfilter/br_netfilter.sh @@ -13,6 +13,12 @@ source lib.sh checktool "nft --version" "run test without nft tool" +read t < /proc/sys/kernel/tainted +if [ "$t" -ne 0 ];then + echo SKIP: kernel is tainted + exit $ksft_skip +fi + cleanup() { cleanup_all_ns } @@ -165,6 +171,7 @@ if [ "$t" -eq 0 ];then echo PASS: kernel not tainted else echo ERROR: kernel is tainted + dmesg ret=1 fi diff --git a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh index 6a764d70ab06f..4788641717d93 100755 --- a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh +++ b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh @@ -4,6 +4,12 @@ source lib.sh checktool "nft --version" "run test without nft tool" +read t < /proc/sys/kernel/tainted +if [ "$t" -ne 0 ];then + echo SKIP: kernel is tainted + exit $ksft_skip +fi + cleanup() { cleanup_all_ns } @@ -72,6 +78,7 @@ if [ "$t" -eq 0 ];then echo PASS: kernel not tainted else echo ERROR: kernel is tainted + dmesg exit 1 fi diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh index a9d109fcc15c2..00fe1a6c1f30c 100755 --- a/tools/testing/selftests/net/netfilter/nft_queue.sh +++ b/tools/testing/selftests/net/netfilter/nft_queue.sh @@ -593,6 +593,7 @@ EOF echo "PASS: queue program exiting while packets queued" else echo "TAINT: queue program exiting while packets queued" + dmesg ret=1 fi } -- 2.39.5

7 months, 2 weeks

1
0
0 0

[PATCH AUTOSEL 6.13 15/16] selftests: netfilter: skip br_netfilter queue tests if kernel is tainted

by Sasha Levin

From: Florian Westphal <fw(a)strlen.de> [ Upstream commit c21b02fd9cbf15aed6e32c89e0fd70070281e3d1 ] These scripts fail if the kernel is tainted which leads to wrong test failure reports in CI environments when an unrelated test triggers some splat. Check taint state at start of script and SKIP if its already dodgy. Signed-off-by: Florian Westphal <fw(a)strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo(a)netfilter.org> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/net/netfilter/br_netfilter.sh | 7 +++++++ .../testing/selftests/net/netfilter/br_netfilter_queue.sh | 7 +++++++ tools/testing/selftests/net/netfilter/nft_queue.sh | 1 + 3 files changed, 15 insertions(+) diff --git a/tools/testing/selftests/net/netfilter/br_netfilter.sh b/tools/testing/selftests/net/netfilter/br_netfilter.sh index c28379a965d83..1559ba275105e 100755 --- a/tools/testing/selftests/net/netfilter/br_netfilter.sh +++ b/tools/testing/selftests/net/netfilter/br_netfilter.sh @@ -13,6 +13,12 @@ source lib.sh checktool "nft --version" "run test without nft tool" +read t < /proc/sys/kernel/tainted +if [ "$t" -ne 0 ];then + echo SKIP: kernel is tainted + exit $ksft_skip +fi + cleanup() { cleanup_all_ns } @@ -165,6 +171,7 @@ if [ "$t" -eq 0 ];then echo PASS: kernel not tainted else echo ERROR: kernel is tainted + dmesg ret=1 fi diff --git a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh index 6a764d70ab06f..4788641717d93 100755 --- a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh +++ b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh @@ -4,6 +4,12 @@ source lib.sh checktool "nft --version" "run test without nft tool" +read t < /proc/sys/kernel/tainted +if [ "$t" -ne 0 ];then + echo SKIP: kernel is tainted + exit $ksft_skip +fi + cleanup() { cleanup_all_ns } @@ -72,6 +78,7 @@ if [ "$t" -eq 0 ];then echo PASS: kernel not tainted else echo ERROR: kernel is tainted + dmesg exit 1 fi diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh index 785e3875a6da4..784d1b46912b0 100755 --- a/tools/testing/selftests/net/netfilter/nft_queue.sh +++ b/tools/testing/selftests/net/netfilter/nft_queue.sh @@ -593,6 +593,7 @@ EOF echo "PASS: queue program exiting while packets queued" else echo "TAINT: queue program exiting while packets queued" + dmesg ret=1 fi } -- 2.39.5

7 months, 2 weeks

1
0
0 0

[PATCH v9 0/4] rust: replace kernel::str::CStr w/ core::ffi::CStr

by Tamir Duberstein

This picks up from Michal Rostecki's work[0]. Per Michal's guidance I have omitted Co-authored tags, as the end result is quite different. Link: https://lore.kernel.org/rust-for-linux/20240819153656.28807-2-vadorovsky@pr… [0] Closes: https://github.com/Rust-for-Linux/linux/issues/1075 Signed-off-by: Tamir Duberstein <tamird(a)gmail.com> --- Changes in v9: - Rebase on rust-next. - Restore `impl Display for BStr` which exists upstream[1]. - Link: https://doc.rust-lang.org/nightly/std/bstr/struct.ByteStr.html#impl-Display… [1] - Link to v8: https://lore.kernel.org/r/20250203-cstr-core-v8-0-cb3f26e78686@gmail.com Changes in v8: - Move `{from,as}_char_ptr` back to `CStrExt`. This reduces the diff some. - Restore `from_bytes_with_nul_unchecked_mut`, `to_cstring`. - Link to v7: https://lore.kernel.org/r/20250202-cstr-core-v7-0-da1802520438@gmail.com Changes in v7: - Rebased on mainline. - Restore functionality added in commit a321f3ad0a5d ("rust: str: add {make,to}_{upper,lower}case() to CString"). - Used `diff.algorithm patience` to improve diff readability. - Link to v6: https://lore.kernel.org/r/20250202-cstr-core-v6-0-8469cd6d29fd@gmail.com Changes in v6: - Split the work into several commits for ease of review. - Restore `{from,as}_char_ptr` to allow building on ARM (see commit message). - Add `CStrExt` to `kernel::prelude`. (Alice Ryhl) - Remove `CStrExt::from_bytes_with_nul_unchecked_mut` and restore `DerefMut for CString`. (Alice Ryhl) - Rename and hide `kernel::c_str!` to encourage use of C-String literals. - Drop implementation and invocation changes in kunit.rs. (Trevor Gross) - Drop docs on `Display` impl. (Trevor Gross) - Rewrite docs in the style of the standard library. - Restore the `test_cstr_debug` unit tests to demonstrate that the implementation has changed. Changes in v5: - Keep the `test_cstr_display*` unit tests. Changes in v4: - Provide the `CStrExt` trait with `display()` method, which returns a `CStrDisplay` wrapper with `Display` implementation. This addresses the lack of `Display` implementation for `core::ffi::CStr`. - Provide `from_bytes_with_nul_unchecked_mut()` method in `CStrExt`, which might be useful and is going to prevent manual, unsafe casts. - Fix a typo (s/preffered/prefered/). Changes in v3: - Fix the commit message. - Remove redundant braces in `use`, when only one item is imported. Changes in v2: - Do not remove `c_str` macro. While it's preferred to use C-string literals, there are two cases where `c_str` is helpful: - When working with macros, which already return a Rust string literal (e.g. `stringify!`). - When building macros, where we want to take a Rust string literal as an argument (for caller's convenience), but still use it as a C-string internally. - Use Rust literals as arguments in macros (`new_mutex`, `new_condvar`, `new_mutex`). Use the `c_str` macro to convert these literals to C-string literals. - Use `c_str` in kunit.rs for converting the output of `stringify!` to a `CStr`. - Remove `DerefMut` implementation for `CString`. --- Tamir Duberstein (4): rust: move `CStr`'s `Display` to helper struct rust: replace `CStr` with `core::ffi::CStr` rust: replace `kernel::c_str!` with C-Strings rust: remove core::ffi::CStr reexport drivers/gpu/drm/drm_panic_qr.rs | 6 +- drivers/net/phy/ax88796b_rust.rs | 8 +- drivers/net/phy/qt2025.rs | 6 +- rust/kernel/device.rs | 7 +- rust/kernel/devres.rs | 2 +- rust/kernel/driver.rs | 4 +- rust/kernel/error.rs | 10 +- rust/kernel/faux.rs | 5 +- rust/kernel/firmware.rs | 8 +- rust/kernel/kunit.rs | 18 +- rust/kernel/lib.rs | 2 +- rust/kernel/miscdevice.rs | 5 +- rust/kernel/net/phy.rs | 12 +- rust/kernel/of.rs | 5 +- rust/kernel/pci.rs | 3 +- rust/kernel/platform.rs | 7 +- rust/kernel/prelude.rs | 2 +- rust/kernel/seq_file.rs | 4 +- rust/kernel/str.rs | 499 +++++++++++++---------------------- rust/kernel/sync.rs | 4 +- rust/kernel/sync/condvar.rs | 3 +- rust/kernel/sync/lock.rs | 4 +- rust/kernel/sync/lock/global.rs | 6 +- rust/kernel/sync/poll.rs | 1 + rust/kernel/workqueue.rs | 1 + rust/macros/module.rs | 2 +- samples/rust/rust_driver_faux.rs | 4 +- samples/rust/rust_driver_pci.rs | 4 +- samples/rust/rust_driver_platform.rs | 4 +- samples/rust/rust_misc_device.rs | 3 +- 30 files changed, 256 insertions(+), 393 deletions(-) --- base-commit: 433b1bd6e0a98938105c43c0553f24e0747ef52c change-id: 20250201-cstr-core-d4b9b69120cf Best regards, -- Tamir Duberstein <tamird(a)gmail.com>

7 months, 2 weeks

1
4
0 0

[PATCH v4 0/6] rust: reduce pointer casts, enable related lints

by Tamir Duberstein

This started with a patch that enabled `clippy::ptr_as_ptr`. Benno Lossin suggested I also look into `clippy::ptr_cast_constness` and I discovered `clippy::as_ptr_cast_mut`. This series now enables all 3 lints. It also enables `clippy::as_underscore` which ensures other pointer casts weren't missed. The first commit reduces the need for pointer casts and is shared with another series[1]. The final patch also enables pointer provenance lints and fixes violations. See that commit message for details. The build system portion of that commit is pretty messy but I couldn't find a better way to convincingly ensure that these lints were applied globally. Suggestions would be very welcome. Link: https://lore.kernel.org/all/20250307-no-offset-v1-0-0c728f63b69c@gmail.com/ [1] Signed-off-by: Tamir Duberstein <tamird(a)gmail.com> --- Changes in v4: - Add missing SoB. (Benno Lossin) - Use `without_provenance_mut` in alloc. (Boqun Feng) - Limit strict provenance lints to the `kernel` crate to avoid complex logic in the build system. This can be revisited on MSRV >= 1.84.0. - Rebase on rust-next. - Link to v3: https://lore.kernel.org/r/20250314-ptr-as-ptr-v3-0-e7ba61048f4a@gmail.com Changes in v3: - Fixed clippy warning in rust/kernel/firmware.rs. (kernel test robot) Link: https://lore.kernel.org/all/202503120332.YTCpFEvv-lkp@intel.com/ - s/as u64/as bindings::phys_addr_t/g. (Benno Lossin) - Use strict provenance APIs and enable lints. (Benno Lossin) - Link to v2: https://lore.kernel.org/r/20250309-ptr-as-ptr-v2-0-25d60ad922b7@gmail.com Changes in v2: - Fixed typo in first commit message. - Added additional patches, converted to series. - Link to v1: https://lore.kernel.org/r/20250307-ptr-as-ptr-v1-1-582d06514c98@gmail.com --- Tamir Duberstein (6): rust: retain pointer mut-ness in `container_of!` rust: enable `clippy::ptr_as_ptr` lint rust: enable `clippy::ptr_cast_constness` lint rust: enable `clippy::as_ptr_cast_mut` lint rust: enable `clippy::as_underscore` lint rust: use strict provenance APIs Makefile | 4 +++ init/Kconfig | 3 ++ rust/bindings/lib.rs | 1 + rust/kernel/alloc.rs | 2 +- rust/kernel/alloc/allocator_test.rs | 2 +- rust/kernel/alloc/kvec.rs | 4 +-- rust/kernel/block/mq/operations.rs | 2 +- rust/kernel/block/mq/request.rs | 7 +++-- rust/kernel/device.rs | 5 +-- rust/kernel/device_id.rs | 2 +- rust/kernel/devres.rs | 19 ++++++------ rust/kernel/error.rs | 2 +- rust/kernel/firmware.rs | 3 +- rust/kernel/fs/file.rs | 2 +- rust/kernel/io.rs | 16 +++++----- rust/kernel/kunit.rs | 15 +++++---- rust/kernel/lib.rs | 57 ++++++++++++++++++++++++++++++++-- rust/kernel/list/impl_list_item_mod.rs | 2 +- rust/kernel/miscdevice.rs | 2 +- rust/kernel/of.rs | 6 ++-- rust/kernel/pci.rs | 15 +++++---- rust/kernel/platform.rs | 6 ++-- rust/kernel/print.rs | 11 +++---- rust/kernel/rbtree.rs | 23 ++++++-------- rust/kernel/seq_file.rs | 3 +- rust/kernel/str.rs | 18 +++++------ rust/kernel/sync/poll.rs | 2 +- rust/kernel/uaccess.rs | 12 ++++--- rust/kernel/workqueue.rs | 12 +++---- rust/uapi/lib.rs | 1 + 30 files changed, 162 insertions(+), 97 deletions(-) --- base-commit: 2aadc0fc1f85d7a9ed2822ba7ee9f06775eb6d84 change-id: 20250307-ptr-as-ptr-21b1867fc4d4 Best regards, -- Tamir Duberstein <tamird(a)gmail.com>

7 months, 2 weeks

3
13
0 0

[PATCH net-next v23 00/23] Introducing OpenVPN Data Channel Offload

by Antonio Quartulli

Notable changes since v22: * ensure local endpoint is copied only if one is specified (compile error) Notable changes since v21: * accessed crypto_slot->primary_idx via READ/WRITE_ONCE * made ovpn_aead_init() static * converted link tx/rx packet counters from u32 to to uint * ensured all u32 NL attributes are read by nla_get_u32() * ensured all u32 NL attrivutes are written by nla_put_u32() * reset cache upon float or local endpoint change * dropped check for delta > 0 in keepalive worker scheduling * improved comments in update endpoints logic * converted local_ip to void* to avoid useless casts Please note that some patches were already reviewed/tested by a few people. These patches have retained the tags as they have hardly been touched. The latest code can also be found at: https://github.com/OpenVPN/ovpn-net-next Thanks a lot! Best Regards, Antonio Quartulli OpenVPN Inc. --- Antonio Quartulli (23): net: introduce OpenVPN Data Channel Offload (ovpn) ovpn: add basic netlink support ovpn: add basic interface creation/destruction/management routines ovpn: keep carrier always on for MP interfaces ovpn: introduce the ovpn_peer object ovpn: introduce the ovpn_socket object ovpn: implement basic TX path (UDP) ovpn: implement basic RX path (UDP) ovpn: implement packet processing ovpn: store tunnel and transport statistics ovpn: implement TCP transport skb: implement skb_send_sock_locked_with_flags() ovpn: add support for MSG_NOSIGNAL in tcp_sendmsg ovpn: implement multi-peer support ovpn: implement peer lookup logic ovpn: implement keepalive mechanism ovpn: add support for updating local or remote UDP endpoint ovpn: implement peer add/get/dump/delete via netlink ovpn: implement key add/get/del/swap via netlink ovpn: kill key and notify userspace in case of IV exhaustion ovpn: notify userspace when a peer is deleted ovpn: add basic ethtool support testing/selftests: add test tool and scripts for ovpn module Documentation/netlink/specs/ovpn.yaml | 367 +++ Documentation/netlink/specs/rt_link.yaml | 16 + MAINTAINERS | 11 + drivers/net/Kconfig | 15 + drivers/net/Makefile | 1 + drivers/net/ovpn/Makefile | 22 + drivers/net/ovpn/bind.c | 55 + drivers/net/ovpn/bind.h | 101 + drivers/net/ovpn/crypto.c | 211 ++ drivers/net/ovpn/crypto.h | 145 ++ drivers/net/ovpn/crypto_aead.c | 409 ++++ drivers/net/ovpn/crypto_aead.h | 29 + drivers/net/ovpn/io.c | 462 ++++ drivers/net/ovpn/io.h | 34 + drivers/net/ovpn/main.c | 339 +++ drivers/net/ovpn/main.h | 14 + drivers/net/ovpn/netlink-gen.c | 213 ++ drivers/net/ovpn/netlink-gen.h | 41 + drivers/net/ovpn/netlink.c | 1249 ++++++++++ drivers/net/ovpn/netlink.h | 18 + drivers/net/ovpn/ovpnpriv.h | 57 + drivers/net/ovpn/peer.c | 1367 +++++++++++ drivers/net/ovpn/peer.h | 163 ++ drivers/net/ovpn/pktid.c | 129 ++ drivers/net/ovpn/pktid.h | 87 + drivers/net/ovpn/proto.h | 118 + drivers/net/ovpn/skb.h | 61 + drivers/net/ovpn/socket.c | 244 ++ drivers/net/ovpn/socket.h | 49 + drivers/net/ovpn/stats.c | 21 + drivers/net/ovpn/stats.h | 47 + drivers/net/ovpn/tcp.c | 592 +++++ drivers/net/ovpn/tcp.h | 36 + drivers/net/ovpn/udp.c | 442 ++++ drivers/net/ovpn/udp.h | 25 + include/linux/skbuff.h | 2 + include/uapi/linux/if_link.h | 15 + include/uapi/linux/ovpn.h | 109 + include/uapi/linux/udp.h | 1 + net/core/skbuff.c | 18 +- net/ipv6/af_inet6.c | 1 + net/ipv6/udp.c | 1 + tools/testing/selftests/Makefile | 1 + tools/testing/selftests/net/ovpn/.gitignore | 2 + tools/testing/selftests/net/ovpn/Makefile | 31 + tools/testing/selftests/net/ovpn/common.sh | 92 + tools/testing/selftests/net/ovpn/config | 10 + tools/testing/selftests/net/ovpn/data64.key | 5 + tools/testing/selftests/net/ovpn/ovpn-cli.c | 2395 ++++++++++++++++++++ tools/testing/selftests/net/ovpn/tcp_peers.txt | 5 + .../testing/selftests/net/ovpn/test-chachapoly.sh | 9 + .../selftests/net/ovpn/test-close-socket-tcp.sh | 9 + .../selftests/net/ovpn/test-close-socket.sh | 45 + tools/testing/selftests/net/ovpn/test-float.sh | 9 + tools/testing/selftests/net/ovpn/test-tcp.sh | 9 + tools/testing/selftests/net/ovpn/test.sh | 113 + tools/testing/selftests/net/ovpn/udp_peers.txt | 5 + 57 files changed, 10072 insertions(+), 5 deletions(-) --- base-commit: 40587f749df216889163dd6e02d88ad53e759e66 change-id: 20241002-b4-ovpn-eeee35c694a2 Best regards, -- Antonio Quartulli <antonio(a)openvpn.net>

7 months, 2 weeks

3
32
0 0

[PATCH v9 0/6] scanf: convert self-test to KUnit

by Tamir Duberstein

This is one of just 3 remaining "Test Module" kselftests (the others being bitmap and printf), the rest having been converted to KUnit. In addition to the enclosed patch, please consider this an RFC on the removal of the "Test Module" kselftest machinery. I tested this using: $ tools/testing/kunit/kunit.py run --arch arm64 --make_options LLVM=1 scanf Failure output before this series: [ 383.100048] test_scanf: vsscanf("1574 9 64ca 935b 7 142d ff58 0", "%4hx %1hx %4hx %4hx %1hx %4hx %4hx %1hx", ...) expected 2472240330 got 1690959881 [ 383.102843] test_scanf: vsscanf("f12:2:d:2:c166:1:36b:1906", "%3hx:%1hx:%1hx:%1hx:%4hx:%1hx:%3hx:%4hx", ...) expected 131085 got 851970 [ 383.105376] test_scanf: vsscanf("4,b2fe,3,593,6,0,3bde,0", "%1hx,%4hx,%1hx,%3hx,%1hx,%1hx,%4hx,%1hx", ...) expected 93519875 got 242430 [ 383.105659] test_scanf: vsscanf("6-1-2-1-d9e6-f-93e-e567", "%1hx-%1hx-%1hx-%1hx-%4hx-%1hx-%3hx-%4hx", ...) expected 65538 got 131073 [ 383.106127] test_scanf: vsscanf("72d6/35/e88d/1/0/6c8c/7/1", "%4hx/%2hx/%4hx/%1hx/%1hx/%4hx/%1hx/%1hx", ...) expected 125069 got 3901554741 [ 383.106235] test_scanf: vsscanf("c9bea1b8122113e9a168df573", "%4hx%4hx%1hx%4hx%4hx%1hx%4hx%3hx", ...) expected 571539457 got 106936 ... [ 383.106398] test_scanf: failed 6 out of 2545 tests Failure output after this series: # numbers_list_field_width_val_width: ASSERTION FAILED at lib/scanf_kunit.c:94 lib/scanf_kunit.c:555: vsscanf("0 1e 3e43 31f0 0 0 5797 9c70", "%1hx %2hx %4hx %4hx %1hx %1hx %4hx %4hx", ...) expected 837828163 got 1044578334 not ok 1 " " # numbers_list_field_width_val_width: ASSERTION FAILED at lib/scanf_kunit.c:94 lib/scanf_kunit.c:555: vsscanf("dc2:1c:0:3531:2621:5172:1:7", "%3hx:%2hx:%1hx:%4hx:%4hx:%4hx:%1hx:%1hx", ...) expected 892403712 got 28 not ok 2 ":" # numbers_list_field_width_val_width: ASSERTION FAILED at lib/scanf_kunit.c:94 lib/scanf_kunit.c:555: vsscanf("e083,8f6e,b,70ca,1,1,aab1,10e4", "%4hx,%4hx,%1hx,%4hx,%1hx,%1hx,%4hx,%4hx", ...) expected 1892286475 got 757614 not ok 3 "," # numbers_list_field_width_val_width: ASSERTION FAILED at lib/scanf_kunit.c:94 lib/scanf_kunit.c:555: vsscanf("2e72-8435-1-2fc-7cbd-c2f1-7158-2b41", "%4hx-%4hx-%1hx-%3hx-%4hx-%4hx-%4hx-%4hx", ...) expected 50069505 got 99381 not ok 4 "-" # numbers_list_field_width_val_width: ASSERTION FAILED at lib/scanf_kunit.c:94 lib/scanf_kunit.c:555: vsscanf("403/0/17/1/11e7/1/1fe8/34ba", "%3hx/%1hx/%2hx/%1hx/%4hx/%1hx/%4hx/%4hx", ...) expected 65559 got 1507328 not ok 5 "/" Signed-off-by: Tamir Duberstein <tamird(a)gmail.com> --- Changes in v9: - Use __scanf correctly on `_test`. (Thanks to Petr Mladek). - Split header `#include` cleanup into separate patch. (Petr Mladek) - In "scanf: break kunit into test cases", revert to KUNIT_FAIL. (Petr Mladek) - Rebase on linux-next. - Add provisional patch "scanf: further break kunit into test cases". Please feel free to take this series without this patch if you prefer. - Link to v8: https://lore.kernel.org/r/20250214-scanf-kunit-convert-v8-0-5ea50f95f83c@gm… Changes in v8: - Expand "scanf: remove redundant debug logs" commit message. (Andy Shevchenko) - Add patch "implicate test line in failure messages". - Rebase on linux-next, move scanf_kunit.c into lib/tests/. - Link to v7: https://lore.kernel.org/r/20250211-scanf-kunit-convert-v7-0-c057f0a3d9d8@gm… Changes in v7: - Remove redundant debug logs. (Petr Mladek) - Drop Petr's Acked-by. - Use original test assertions as KUNIT_*_EQ_MSG produces hard-to-parse messages. The new failure output is: - Link to v6: https://lore.kernel.org/r/20250210-scanf-kunit-convert-v6-0-4d583d07f92d@gm… Changes in v6: - s/at boot/at runtime/ for consistency with the printf series. - Go back to kmalloc. (Geert Uytterhoeven) - Link to v5: https://lore.kernel.org/r/20250210-scanf-kunit-convert-v5-0-8e64f3a7de99@gm… Changes in v5: - Remove extraneous trailing newlines from failure messages. - Replace `pr_debug` with `kunit_printk`. - Use static char arrays instead of kmalloc. - Drop KUnit boilerplate from CONFIG_SCANF_KUNIT_TEST help text. - Drop arch changes. - Link to v4: https://lore.kernel.org/r/20250207-scanf-kunit-convert-v4-0-a23e2afaede8@gm… Changes in v4: - Bake `test` into various macros, greatly reducing diff noise. - Revert control flow changes. - Link to v3: https://lore.kernel.org/r/20250204-scanf-kunit-convert-v3-0-386d7c3ee714@gm… Changes in v3: - Reduce diff noise in lib/Makefile. (Petr Mladek) - Split `scanf_test` into a few test cases. New output: : =================== scanf (10 subtests) ==================== : [PASSED] numbers_simple : ====================== numbers_list ======================= : [PASSED] delim=" " : [PASSED] delim=":" : [PASSED] delim="," : [PASSED] delim="-" : [PASSED] delim="/" : ================== [PASSED] numbers_list =================== : ============ numbers_list_field_width_typemax ============= : [PASSED] delim=" " : [PASSED] delim=":" : [PASSED] delim="," : [PASSED] delim="-" : [PASSED] delim="/" : ======== [PASSED] numbers_list_field_width_typemax ========= : =========== numbers_list_field_width_val_width ============ : [PASSED] delim=" " : [PASSED] delim=":" : [PASSED] delim="," : [PASSED] delim="-" : [PASSED] delim="/" : ======= [PASSED] numbers_list_field_width_val_width ======== : [PASSED] numbers_slice : [PASSED] numbers_prefix_overflow : [PASSED] test_simple_strtoull : [PASSED] test_simple_strtoll : [PASSED] test_simple_strtoul : [PASSED] test_simple_strtol : ====================== [PASSED] scanf ====================== : ============================================================ : Testing complete. Ran 22 tests: passed: 22 : Elapsed time: 5.517s total, 0.001s configuring, 5.440s building, 0.067s running - Link to v2: https://lore.kernel.org/r/20250203-scanf-kunit-convert-v2-1-277a618d804e@gm… Changes in v2: - Rename lib/{test_scanf.c => scanf_kunit.c}. (Andy Shevchenko) - Link to v1: https://lore.kernel.org/r/20250131-scanf-kunit-convert-v1-1-0976524f0eba@gm… --- Tamir Duberstein (6): scanf: implicate test line in failure messages scanf: remove redundant debug logs scanf: convert self-test to KUnit scanf: break kunit into test cases scanf: tidy header `#include`s scanf: further break kunit into test cases MAINTAINERS | 2 +- lib/Kconfig.debug | 12 +- lib/Makefile | 1 - lib/tests/Makefile | 1 + lib/{test_scanf.c => tests/scanf_kunit.c} | 360 ++++++++++++++++-------------- tools/testing/selftests/lib/Makefile | 2 +- tools/testing/selftests/lib/config | 1 - tools/testing/selftests/lib/scanf.sh | 4 - 8 files changed, 201 insertions(+), 182 deletions(-) --- base-commit: 7ec162622e66a4ff886f8f28712ea1b13069e1aa change-id: 20250131-scanf-kunit-convert-f70dc33bb34c Best regards, -- Tamir Duberstein <tamird(a)gmail.com>

7 months, 2 weeks

4
19
0 0

[PATCH] selftests/mm: Add commentary about 9pfs bugs

by Brendan Jackman

As discussed here: https://lore.kernel.org/lkml/Z9RRkL1hom48z3Tt@google.com/ This code could benefit from some more commentary. To avoid needing to comment the same thing in multiple places (I guess more of these SKIPs will need to be added over time, for now I am only like 20% of the way through Project Run run_vmtests.sh Successfully), add a dummy "skip tests for this specific reason" function that basically just serves as a hook to hang comments on. Signed-off-by: Brendan Jackman <jackmanb(a)google.com> --- To: David Hildenbrand <david(a)redhat.com> --- tools/testing/selftests/mm/gup_longterm.c | 6 +----- tools/testing/selftests/mm/map_populate.c | 8 +++----- tools/testing/selftests/mm/vm_util.h | 18 ++++++++++++++++++ 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c index 03271442aae5aed060fd44010df552a2eedcdafc..21595b20bbc391a0e5d0ab0563ac4ce5e1e0069f 100644 --- a/tools/testing/selftests/mm/gup_longterm.c +++ b/tools/testing/selftests/mm/gup_longterm.c @@ -97,11 +97,7 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) if (ftruncate(fd, size)) { if (errno == ENOENT) { - /* - * This can happen if the file has been unlinked and the - * filesystem doesn't support truncating unlinked files. - */ - ksft_test_result_skip("ftruncate() failed with ENOENT\n"); + skip_test_dodgy_fs("ftruncate()"); } else { ksft_test_result_fail("ftruncate() failed (%s)\n", strerror(errno)); } diff --git a/tools/testing/selftests/mm/map_populate.c b/tools/testing/selftests/mm/map_populate.c index 433e54fb634f793f2eb4c53ba6b791045c9f4986..9df2636c829bf34d6d0517e126b3deda1f3ba834 100644 --- a/tools/testing/selftests/mm/map_populate.c +++ b/tools/testing/selftests/mm/map_populate.c @@ -18,6 +18,8 @@ #include <unistd.h> #include "../kselftest.h" +#include "vm_util.h" + #define MMAP_SZ 4096 #define BUG_ON(condition, description) \ @@ -88,11 +90,7 @@ int main(int argc, char **argv) ret = ftruncate(fileno(ftmp), MMAP_SZ); if (ret < 0 && errno == ENOENT) { - /* - * This probably means tmpfile() made a file on a filesystem - * that doesn't handle temporary files the way we want. - */ - ksft_exit_skip("ftruncate(fileno(tmpfile())) gave ENOENT, weird filesystem?\n"); + skip_test_dodgy_fs("ftruncate()"); } BUG_ON(ret, "ftruncate()"); diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index 0e629586556b5aae580d8e4ce7491bc93adcc4d6..6effafdc4d8a23f91f0adcb9e43d6196d651ba88 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -5,6 +5,7 @@ #include <err.h> #include <strings.h> /* ffsl() */ #include <unistd.h> /* _SC_PAGESIZE */ +#include "../kselftest.h" #define BIT_ULL(nr) (1ULL << (nr)) #define PM_SOFT_DIRTY BIT_ULL(55) @@ -32,6 +33,23 @@ static inline unsigned int pshift(void) return __page_shift; } +/* + * Plan 9 FS has bugs (at least on QEMU) where certain operations fail with + * ENOENT on unlinked files. See + * https://gitlab.com/qemu-project/qemu/-/issues/103 for some info about such + * bugs. There are rumours of NFS implementations with similar bugs. + * + * Ideally, tests should just detect filesystems known to have such issues and + * bail early. But 9pfs has the additional "feature" that it causes fstatfs to + * pass through the f_type field from the host filesystem. To avoid having to + * scrape /proc/mounts or some other hackery, tests can call this function when + * it seems such a bug might have been encountered. + */ +static inline void skip_test_dodgy_fs(const char *op_name) +{ + ksft_test_result_skip("%s failed with ENOENT. Filesystem might be buggy (9pfs?)\n", op_name); +} + uint64_t pagemap_get_entry(int fd, char *start); bool pagemap_is_softdirty(int fd, char *start); bool pagemap_is_swapped(int fd, char *start); --- base-commit: a91aaf8dd549dcee9caab227ecaa6cbc243bbc5a change-id: 20250317-9pfs-comments-24b6fa5417cd Best regards, -- Brendan Jackman <jackmanb(a)google.com>

7 months, 2 weeks

1
0
0 0

[PATCH net v3 0/3] vsock/bpf: Handle races between sockmap update and connect() disconnecting

by Michal Luczaj

Signal delivery during connect() may disconnect an already established socket. Problem is that such socket might have been placed in a sockmap before the connection was closed. PATCH 1 ensures this race won't lead to an unconnected vsock staying in the sockmap. PATCH 2 selftests it. PATCH 3 fixes a related race. Note that selftest in PATCH 2 does test this code as well, but winning this race variant may take more than 2 seconds, so I'm not advertising it. Signed-off-by: Michal Luczaj <mhal(a)rbox.co> --- Changes in v3: - Selftest: drop unnecessary variable initialization and reorder the calls - Link to v2: https://lore.kernel.org/r/20250314-vsock-trans-signal-race-v2-0-421a41f60f4… Changes in v2: - Handle one more path of tripping the warning - Add a selftest - Collect R-b [Stefano] - Link to v1: https://lore.kernel.org/r/20250307-vsock-trans-signal-race-v1-1-3aca3f771fb… --- Michal Luczaj (3): vsock/bpf: Fix EINTR connect() racing sockmap update selftest/bpf: Add test for AF_VSOCK connect() racing sockmap update vsock/bpf: Fix bpf recvmsg() racing transport reassignment net/vmw_vsock/af_vsock.c | 10 ++- net/vmw_vsock/vsock_bpf.c | 24 ++++-- .../selftests/bpf/prog_tests/sockmap_basic.c | 97 ++++++++++++++++++++++ 3 files changed, 122 insertions(+), 9 deletions(-) --- base-commit: da9e8efe7ee10e8425dc356a9fc593502c8e3933 change-id: 20250305-vsock-trans-signal-race-d62f7718d099 Best regards, -- Michal Luczaj <mhal(a)rbox.co>

7 months, 2 weeks

2
5
0 0

[PATCH net-next v10 00/10] tun: Introduce virtio-net hashing feature

by Akihiko Odaki

virtio-net have two usage of hashes: one is RSS and another is hash reporting. Conventionally the hash calculation was done by the VMM. However, computing the hash after the queue was chosen defeats the purpose of RSS. Another approach is to use eBPF steering program. This approach has another downside: it cannot report the calculated hash due to the restrictive nature of eBPF. Introduce the code to compute hashes to the kernel in order to overcome thse challenges. An alternative solution is to extend the eBPF steering program so that it will be able to report to the userspace, but it is based on context rewrites, which is in feature freeze. We can adopt kfuncs, but they will not be UAPIs. We opt to ioctl to align with other relevant UAPIs (KVM and vhost_net). The patches for QEMU to use this new feature was submitted as RFC and is available at: https://patchew.org/QEMU/20250313-hash-v4-0-c75c494b495e@daynix.com/ This work was presented at LPC 2024: https://lpc.events/event/18/contributions/1963/ V1 -> V2: Changed to introduce a new BPF program type. Signed-off-by: Akihiko Odaki <akihiko.odaki(a)daynix.com> --- Changes in v10: - Split common code and TUN/TAP-specific code into separate patches. - Reverted a spurious style change in patch "tun: Introduce virtio-net hash feature". - Added a comment explaining disable_ipv6 in tests. - Used AF_PACKET for patch "selftest: tun: Add tests for virtio-net hashing". I also added the usage of FIXTURE_VARIANT() as the testing function now needs access to more variant-specific variables. - Corrected the message of patch "selftest: tun: Add tests for virtio-net hashing"; it mentioned validation of configuration but it is not scope of this patch. - Expanded the description of patch "selftest: tun: Add tests for virtio-net hashing". - Added patch "tun: Allow steering eBPF program to fall back". - Changed to handle TUNGETVNETHASHCAP before taking the rtnl lock. - Removed redundant tests for tun_vnet_ioctl(). - Added patch "selftest: tap: Add tests for virtio-net ioctls". - Added a design explanation of ioctls for extensibility and migration. - Removed a few branches in patch "vhost/net: Support VIRTIO_NET_F_HASH_REPORT". - Link to v9: https://lore.kernel.org/r/20250307-rss-v9-0-df76624025eb@daynix.com Changes in v9: - Added a missing return statement in patch "tun: Introduce virtio-net hash feature". - Link to v8: https://lore.kernel.org/r/20250306-rss-v8-0-7ab4f56ff423@daynix.com Changes in v8: - Disabled IPv6 to eliminate noises in tests. - Added a branch in tap to avoid unnecessary dissection when hash reporting is disabled. - Removed unnecessary rtnl_lock(). - Extracted code to handle new ioctls into separate functions to avoid adding extra NULL checks to the code handling other ioctls. - Introduced variable named "fd" to __tun_chr_ioctl(). - s/-/=/g in a patch message to avoid confusing Git. - Link to v7: https://lore.kernel.org/r/20250228-rss-v7-0-844205cbbdd6@daynix.com Changes in v7: - Ensured to set hash_report to VIRTIO_NET_HASH_REPORT_NONE for VHOST_NET_F_VIRTIO_NET_HDR. - s/4/sizeof(u32)/ in patch "virtio_net: Add functions for hashing". - Added tap_skb_cb type. - Rebased. - Link to v6: https://lore.kernel.org/r/20250109-rss-v6-0-b1c90ad708f6@daynix.com Changes in v6: - Extracted changes to fill vnet header holes into another series. - Squashed patches "skbuff: Introduce SKB_EXT_TUN_VNET_HASH", "tun: Introduce virtio-net hash reporting feature", and "tun: Introduce virtio-net RSS" into patch "tun: Introduce virtio-net hash feature". - Dropped the RFC tag. - Link to v5: https://lore.kernel.org/r/20241008-rss-v5-0-f3cf68df005d@daynix.com Changes in v5: - Fixed a compilation error with CONFIG_TUN_VNET_CROSS_LE. - Optimized the calculation of the hash value according to: https://git.dpdk.org/dpdk/commit/?id=3fb1ea032bd6ff8317af5dac9af901f1f324ca… - Added patch "tun: Unify vnet implementation". - Dropped patch "tap: Pad virtio header with zero". - Added patch "selftest: tun: Test vnet ioctls without device". - Reworked selftests to skip for older kernels. - Documented the case when the underlying device is deleted and packets have queue_mapping set by TC. - Reordered test harness arguments. - Added code to handle fragmented packets. - Link to v4: https://lore.kernel.org/r/20240924-rss-v4-0-84e932ec0e6c@daynix.com Changes in v4: - Moved tun_vnet_hash_ext to if_tun.h. - Renamed virtio_net_toeplitz() to virtio_net_toeplitz_calc(). - Replaced htons() with cpu_to_be16(). - Changed virtio_net_hash_rss() to return void. - Reordered variable declarations in virtio_net_hash_rss(). - Removed virtio_net_hdr_v1_hash_from_skb(). - Updated messages of "tap: Pad virtio header with zero" and "tun: Pad virtio header with zero". - Fixed vnet_hash allocation size. - Ensured to free vnet_hash when destructing tun_struct. - Link to v3: https://lore.kernel.org/r/20240915-rss-v3-0-c630015db082@daynix.com Changes in v3: - Reverted back to add ioctl. - Split patch "tun: Introduce virtio-net hashing feature" into "tun: Introduce virtio-net hash reporting feature" and "tun: Introduce virtio-net RSS". - Changed to reuse hash values computed for automq instead of performing RSS hashing when hash reporting is requested but RSS is not. - Extracted relevant data from struct tun_struct to keep it minimal. - Added kernel-doc. - Changed to allow calling TUNGETVNETHASHCAP before TUNSETIFF. - Initialized num_buffers with 1. - Added a test case for unclassified packets. - Fixed error handling in tests. - Changed tests to verify that the queue index will not overflow. - Rebased. - Link to v2: https://lore.kernel.org/r/20231015141644.260646-1-akihiko.odaki@daynix.com --- Akihiko Odaki (10): virtio_net: Add functions for hashing net: flow_dissector: Export flow_keys_dissector_symmetric tun: Allow steering eBPF program to fall back tun: Add common virtio-net hash feature code tun: Introduce virtio-net hash feature tap: Introduce virtio-net hash feature selftest: tun: Test vnet ioctls without device selftest: tun: Add tests for virtio-net hashing selftest: tap: Add tests for virtio-net ioctls vhost/net: Support VIRTIO_NET_F_HASH_REPORT Documentation/networking/tuntap.rst | 7 + drivers/net/Kconfig | 1 + drivers/net/tap.c | 68 ++++- drivers/net/tun.c | 90 +++++-- drivers/net/tun_vnet.h | 155 ++++++++++- drivers/vhost/net.c | 68 ++--- include/linux/if_tap.h | 2 + include/linux/skbuff.h | 3 + include/linux/virtio_net.h | 188 ++++++++++++++ include/net/flow_dissector.h | 1 + include/uapi/linux/if_tun.h | 82 ++++++ net/core/flow_dissector.c | 3 +- net/core/skbuff.c | 4 + tools/testing/selftests/net/Makefile | 2 +- tools/testing/selftests/net/tap.c | 97 ++++++- tools/testing/selftests/net/tun.c | 491 ++++++++++++++++++++++++++++++++++- 16 files changed, 1185 insertions(+), 77 deletions(-) --- base-commit: dd83757f6e686a2188997cb58b5975f744bb7786 change-id: 20240403-rss-e737d89efa77 prerequisite-change-id: 20241230-tun-66e10a49b0c7:v6 prerequisite-patch-id: 871dc5f146fb6b0e3ec8612971a8e8190472c0fb prerequisite-patch-id: 2797ed249d32590321f088373d4055ff3f430a0e prerequisite-patch-id: ea3370c72d4904e2f0536ec76ba5d26784c0cede prerequisite-patch-id: 837e4cf5d6b451424f9b1639455e83a260c4440d prerequisite-patch-id: ea701076f57819e844f5a35efe5cbc5712d3080d prerequisite-patch-id: 701646fb43ad04cc64dd2bf13c150ccbe6f828ce prerequisite-patch-id: 53176dae0c003f5b6c114d43f936cf7140d31bb5 prerequisite-change-id: 20250116-buffers-96e14bf023fc:v2 prerequisite-patch-id: 25fd4f99d4236a05a5ef16ab79f3e85ee57e21cc Best regards, -- Akihiko Odaki <akihiko.odaki(a)daynix.com>

7 months, 2 weeks

3
12
0 0

Re: [PATCH 1/4] selftests: ntsync: fix the wrong condition in wake_all

by Elizabeth Figura

On Friday, 14 March 2025 05:14:30 CDT Su Hui wrote: > On 2025/3/14 17:21, Dan Carpenter wrote: > > On Fri, Mar 14, 2025 at 03:14:51PM +0800, Su Hui wrote: > >> When 'manual=false' and 'signaled=true', then expected value when using > >> NTSYNC_IOC_CREATE_EVENT should be greater than zero. Fix this typo error. > >> > >> Signed-off-by: Su Hui<suhui(a)nfschina.com> > >> --- > >> tools/testing/selftests/drivers/ntsync/ntsync.c | 2 +- > >> 1 file changed, 1 insertion(+), 1 deletion(-) > >> > >> diff --git a/tools/testing/selftests/drivers/ntsync/ntsync.c b/tools/testing/selftests/drivers/ntsync/ntsync.c > >> index 3aad311574c4..bfb6fad653d0 100644 > >> --- a/tools/testing/selftests/drivers/ntsync/ntsync.c > >> +++ b/tools/testing/selftests/drivers/ntsync/ntsync.c > >> @@ -968,7 +968,7 @@ TEST(wake_all) > >> auto_event_args.manual = false; > >> auto_event_args.signaled = true; > >> objs[3] = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &auto_event_args); > >> - EXPECT_EQ(0, objs[3]); > >> + EXPECT_LE(0, objs[3]); > > It's kind of weird how these macros put the constant on the left. > > It returns an "fd" on success. So this look reasonable. It probably > > won't return the zero fd so we could probably check EXPECT_LT()? > Agreed, there are about 29 items that can be changed to EXPECT_LT(). > I can send a v2 patchset with this change if there is no more other > suggestions. I personally think it looks wrong to use EXPECT_LT(), but I'll certainly defer to a higher maintainer on this point.

7 months, 2 weeks

3
3
0 0

[PATCH v3 2/3] rust: replace `addr_of[_mut]!` with `&raw [mut]`

by Antonio Hickey

Replacing all occurrences of `addr_of!(place)` with `&raw const place`, and all occurrences of `addr_of_mut!(place)` with `&raw mut place`. Utilizing the new feature will allow us to reduce macro complexity, and improve consistency with existing reference syntax as `&raw const`, `&raw mut` is very similar to `&`, `&mut` making it fit more naturally with other existing code. Suggested-by: Benno Lossin <benno.lossin(a)proton.me> Link: https://github.com/Rust-for-Linux/linux/issues/1148 Signed-off-by: Antonio Hickey <contact(a)antoniohickey.com> --- rust/kernel/block/mq/request.rs | 4 ++-- rust/kernel/faux.rs | 4 ++-- rust/kernel/fs/file.rs | 2 +- rust/kernel/init.rs | 8 ++++---- rust/kernel/init/macros.rs | 28 +++++++++++++------------- rust/kernel/jump_label.rs | 4 ++-- rust/kernel/kunit.rs | 4 ++-- rust/kernel/list.rs | 2 +- rust/kernel/list/impl_list_item_mod.rs | 6 +++--- rust/kernel/net/phy.rs | 4 ++-- rust/kernel/pci.rs | 4 ++-- rust/kernel/platform.rs | 4 +--- rust/kernel/rbtree.rs | 22 ++++++++++---------- rust/kernel/sync/arc.rs | 2 +- rust/kernel/task.rs | 4 ++-- rust/kernel/workqueue.rs | 8 ++++---- 16 files changed, 54 insertions(+), 56 deletions(-) diff --git a/rust/kernel/block/mq/request.rs b/rust/kernel/block/mq/request.rs index 7943f43b9575..4a5b7ec914ef 100644 --- a/rust/kernel/block/mq/request.rs +++ b/rust/kernel/block/mq/request.rs @@ -12,7 +12,7 @@ }; use core::{ marker::PhantomData, - ptr::{addr_of_mut, NonNull}, + ptr::NonNull, sync::atomic::{AtomicU64, Ordering}, }; @@ -187,7 +187,7 @@ pub(crate) fn refcount(&self) -> &AtomicU64 { pub(crate) unsafe fn refcount_ptr(this: *mut Self) -> *mut AtomicU64 { // SAFETY: Because of the safety requirements of this function, the // field projection is safe. - unsafe { addr_of_mut!((*this).refcount) } + unsafe { &raw mut (*this).refcount } } } diff --git a/rust/kernel/faux.rs b/rust/kernel/faux.rs index 5acc0c02d451..52ac554c1119 100644 --- a/rust/kernel/faux.rs +++ b/rust/kernel/faux.rs @@ -7,7 +7,7 @@ //! C header: [`include/linux/device/faux.h`] use crate::{bindings, device, error::code::*, prelude::*}; -use core::ptr::{addr_of_mut, null, null_mut, NonNull}; +use core::ptr::{null, null_mut, NonNull}; /// The registration of a faux device. /// @@ -45,7 +45,7 @@ impl AsRef<device::Device> for Registration { fn as_ref(&self) -> &device::Device { // SAFETY: The underlying `device` in `faux_device` is guaranteed by the C API to be // a valid initialized `device`. - unsafe { device::Device::as_ref(addr_of_mut!((*self.as_raw()).dev)) } + unsafe { device::Device::as_ref((&raw mut (*self.as_raw()).dev)) } } } diff --git a/rust/kernel/fs/file.rs b/rust/kernel/fs/file.rs index ed57e0137cdb..7ee4830b67f3 100644 --- a/rust/kernel/fs/file.rs +++ b/rust/kernel/fs/file.rs @@ -331,7 +331,7 @@ pub fn flags(&self) -> u32 { // SAFETY: The file is valid because the shared reference guarantees a nonzero refcount. // // FIXME(read_once): Replace with `read_once` when available on the Rust side. - unsafe { core::ptr::addr_of!((*self.as_ptr()).f_flags).read_volatile() } + unsafe { (&raw const (*self.as_ptr()).f_flags).read_volatile() } } } diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs index 7fd1ea8265a5..a8fac6558671 100644 --- a/rust/kernel/init.rs +++ b/rust/kernel/init.rs @@ -122,7 +122,7 @@ //! ```rust //! # #![expect(unreachable_pub, clippy::disallowed_names)] //! use kernel::{init, types::Opaque}; -//! use core::{ptr::addr_of_mut, marker::PhantomPinned, pin::Pin}; +//! use core::{marker::PhantomPinned, pin::Pin}; //! # mod bindings { //! # #![expect(non_camel_case_types)] //! # #![expect(clippy::missing_safety_doc)] @@ -159,7 +159,7 @@ //! unsafe { //! init::pin_init_from_closure(move |slot: *mut Self| { //! // `slot` contains uninit memory, avoid creating a reference. -//! let foo = addr_of_mut!((*slot).foo); +//! let foo = &raw mut (*slot).foo; //! //! // Initialize the `foo` //! bindings::init_foo(Opaque::raw_get(foo)); @@ -541,7 +541,7 @@ macro_rules! stack_try_pin_init { /// /// ```rust /// # use kernel::{macros::{Zeroable, pin_data}, pin_init}; -/// # use core::{ptr::addr_of_mut, marker::PhantomPinned}; +/// # use core::marker::PhantomPinned; /// #[pin_data] /// #[derive(Zeroable)] /// struct Buf { @@ -554,7 +554,7 @@ macro_rules! stack_try_pin_init { /// pin_init!(&this in Buf { /// buf: [0; 64], /// // SAFETY: TODO. -/// ptr: unsafe { addr_of_mut!((*this.as_ptr()).buf).cast() }, +/// ptr: unsafe { &raw mut (*this.as_ptr()).buf.cast() }, /// pin: PhantomPinned, /// }); /// pin_init!(Buf { diff --git a/rust/kernel/init/macros.rs b/rust/kernel/init/macros.rs index 1fd146a83241..af525fbb2f01 100644 --- a/rust/kernel/init/macros.rs +++ b/rust/kernel/init/macros.rs @@ -244,25 +244,25 @@ //! struct __InitOk; //! // This is the expansion of `t,`, which is syntactic sugar for `t: t,`. //! { -//! unsafe { ::core::ptr::write(::core::addr_of_mut!((*slot).t), t) }; +//! unsafe { ::core::ptr::write(&raw mut (*slot).t, t) }; //! } //! // Since initialization could fail later (not in this case, since the //! // error type is `Infallible`) we will need to drop this field if there //! // is an error later. This `DropGuard` will drop the field when it gets //! // dropped and has not yet been forgotten. //! let __t_guard = unsafe { -//! ::pinned_init::__internal::DropGuard::new(::core::addr_of_mut!((*slot).t)) +//! ::pinned_init::__internal::DropGuard::new(&raw mut (*slot).t) //! }; //! // Expansion of `x: 0,`: //! // Since this can be an arbitrary expression we cannot place it inside //! // of the `unsafe` block, so we bind it here. //! { //! let x = 0; -//! unsafe { ::core::ptr::write(::core::addr_of_mut!((*slot).x), x) }; +//! unsafe { ::core::ptr::write(&raw mut (*slot).x, x) }; //! } //! // We again create a `DropGuard`. //! let __x_guard = unsafe { -//! ::kernel::init::__internal::DropGuard::new(::core::addr_of_mut!((*slot).x)) +//! ::kernel::init::__internal::DropGuard::new(&raw mut (*slot).x) //! }; //! // Since initialization has successfully completed, we can now forget //! // the guards. This is not `mem::forget`, since we only have @@ -459,15 +459,15 @@ //! { //! struct __InitOk; //! { -//! unsafe { ::core::ptr::write(::core::addr_of_mut!((*slot).a), a) }; +//! unsafe { ::core::ptr::write(&raw mut (*slot).a, a) }; //! } //! let __a_guard = unsafe { -//! ::kernel::init::__internal::DropGuard::new(::core::addr_of_mut!((*slot).a)) +//! ::kernel::init::__internal::DropGuard::new(&raw mut (*slot).a) //! }; //! let init = Bar::new(36); -//! unsafe { data.b(::core::addr_of_mut!((*slot).b), b)? }; +//! unsafe { data.b(&raw mut (*slot).b, b)? }; //! let __b_guard = unsafe { -//! ::kernel::init::__internal::DropGuard::new(::core::addr_of_mut!((*slot).b)) +//! ::kernel::init::__internal::DropGuard::new(&raw mut (*slot).b) //! }; //! ::core::mem::forget(__b_guard); //! ::core::mem::forget(__a_guard); @@ -1210,7 +1210,7 @@ fn assert_zeroable<T: $crate::init::Zeroable>(_: *mut T) {} // SAFETY: `slot` is valid, because we are inside of an initializer closure, we // return when an error/panic occurs. // We also use the `data` to require the correct trait (`Init` or `PinInit`) for `$field`. - unsafe { $data.$field(::core::ptr::addr_of_mut!((*$slot).$field), init)? }; + unsafe { $data.$field(&raw mut (*$slot).$field, init)? }; // Create the drop guard: // // We rely on macro hygiene to make it impossible for users to access this local variable. @@ -1218,7 +1218,7 @@ fn assert_zeroable<T: $crate::init::Zeroable>(_: *mut T) {} ::kernel::macros::paste! { // SAFETY: We forget the guard later when initialization has succeeded. let [< __ $field _guard >] = unsafe { - $crate::init::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field)) + $crate::init::__internal::DropGuard::new(&raw mut (*$slot).$field) }; $crate::__init_internal!(init_slot($use_data): @@ -1241,7 +1241,7 @@ fn assert_zeroable<T: $crate::init::Zeroable>(_: *mut T) {} // // SAFETY: `slot` is valid, because we are inside of an initializer closure, we // return when an error/panic occurs. - unsafe { $crate::init::Init::__init(init, ::core::ptr::addr_of_mut!((*$slot).$field))? }; + unsafe { $crate::init::Init::__init(init, &raw mut (*$slot).$field)? }; // Create the drop guard: // // We rely on macro hygiene to make it impossible for users to access this local variable. @@ -1249,7 +1249,7 @@ fn assert_zeroable<T: $crate::init::Zeroable>(_: *mut T) {} ::kernel::macros::paste! { // SAFETY: We forget the guard later when initialization has succeeded. let [< __ $field _guard >] = unsafe { - $crate::init::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field)) + $crate::init::__internal::DropGuard::new(&raw mut (*$slot).$field) }; $crate::__init_internal!(init_slot(): @@ -1272,7 +1272,7 @@ fn assert_zeroable<T: $crate::init::Zeroable>(_: *mut T) {} // Initialize the field. // // SAFETY: The memory at `slot` is uninitialized. - unsafe { ::core::ptr::write(::core::ptr::addr_of_mut!((*$slot).$field), $field) }; + unsafe { ::core::ptr::write(&raw mut (*$slot).$field, $field) }; } // Create the drop guard: // @@ -1281,7 +1281,7 @@ fn assert_zeroable<T: $crate::init::Zeroable>(_: *mut T) {} ::kernel::macros::paste! { // SAFETY: We forget the guard later when initialization has succeeded. let [< __ $field _guard >] = unsafe { - $crate::init::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field)) + $crate::init::__internal::DropGuard::new(&raw mut (*$slot).$field) }; $crate::__init_internal!(init_slot($($use_data)?): diff --git a/rust/kernel/jump_label.rs b/rust/kernel/jump_label.rs index 4e974c768dbd..ca10abae0eee 100644 --- a/rust/kernel/jump_label.rs +++ b/rust/kernel/jump_label.rs @@ -20,8 +20,8 @@ #[macro_export] macro_rules! static_branch_unlikely { ($key:path, $keytyp:ty, $field:ident) => {{ - let _key: *const $keytyp = ::core::ptr::addr_of!($key); - let _key: *const $crate::bindings::static_key_false = ::core::ptr::addr_of!((*_key).$field); + let _key: *const $keytyp = &raw const $key; + let _key: *const $crate::bindings::static_key_false = &raw const (*_key).$field; let _key: *const $crate::bindings::static_key = _key.cast(); #[cfg(not(CONFIG_JUMP_LABEL))] diff --git a/rust/kernel/kunit.rs b/rust/kernel/kunit.rs index 824da0e9738a..a17ef3b2e860 100644 --- a/rust/kernel/kunit.rs +++ b/rust/kernel/kunit.rs @@ -128,9 +128,9 @@ unsafe impl Sync for UnaryAssert {} unsafe { $crate::bindings::__kunit_do_failed_assertion( kunit_test, - core::ptr::addr_of!(LOCATION.0), + &raw const LOCATION.0, $crate::bindings::kunit_assert_type_KUNIT_ASSERTION, - core::ptr::addr_of!(ASSERTION.0.assert), + &raw const ASSERTION.0.assert, Some($crate::bindings::kunit_unary_assert_format), core::ptr::null(), ); diff --git a/rust/kernel/list.rs b/rust/kernel/list.rs index c0ed227b8a4f..e98f0820f002 100644 --- a/rust/kernel/list.rs +++ b/rust/kernel/list.rs @@ -176,7 +176,7 @@ pub fn new() -> impl PinInit<Self> { #[inline] unsafe fn fields(me: *mut Self) -> *mut ListLinksFields { // SAFETY: The caller promises that the pointer is valid. - unsafe { Opaque::raw_get(ptr::addr_of!((*me).inner)) } + unsafe { Opaque::raw_get(&raw const (*me).inner) } } /// # Safety diff --git a/rust/kernel/list/impl_list_item_mod.rs b/rust/kernel/list/impl_list_item_mod.rs index a0438537cee1..014b6713d59d 100644 --- a/rust/kernel/list/impl_list_item_mod.rs +++ b/rust/kernel/list/impl_list_item_mod.rs @@ -49,7 +49,7 @@ macro_rules! impl_has_list_links { // SAFETY: The implementation of `raw_get_list_links` only compiles if the field has the // right type. // - // The behavior of `raw_get_list_links` is not changed since the `addr_of_mut!` macro is + // The behavior of `raw_get_list_links` is not changed since the `&raw mut` op is // equivalent to the pointer offset operation in the trait definition. unsafe impl$(<$($implarg),*>)? $crate::list::HasListLinks$(<$id>)? for $self $(<$($selfarg),*>)? @@ -61,7 +61,7 @@ unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut $crate::list::ListLinks$(<$ // SAFETY: The caller promises that the pointer is not dangling. We know that this // expression doesn't follow any pointers, as the `offset_of!` invocation above // would otherwise not compile. - unsafe { ::core::ptr::addr_of_mut!((*ptr)$(.$field)*) } + unsafe { &raw mut (*ptr)$(.$field)* } } } )*}; @@ -103,7 +103,7 @@ macro_rules! impl_has_list_links_self_ptr { unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut $crate::list::ListLinks$(<$id>)? { // SAFETY: The caller promises that the pointer is not dangling. let ptr: *mut $crate::list::ListLinksSelfPtr<$item_type $(, $id)?> = - unsafe { ::core::ptr::addr_of_mut!((*ptr).$field) }; + unsafe { &raw mut (*ptr).$field }; ptr.cast() } } diff --git a/rust/kernel/net/phy.rs b/rust/kernel/net/phy.rs index a59469c785e3..757db052cc09 100644 --- a/rust/kernel/net/phy.rs +++ b/rust/kernel/net/phy.rs @@ -7,7 +7,7 @@ //! C headers: [`include/linux/phy.h`](srctree/include/linux/phy.h). use crate::{error::*, prelude::*, types::Opaque}; -use core::{marker::PhantomData, ptr::addr_of_mut}; +use core::marker::PhantomData; pub mod reg; @@ -285,7 +285,7 @@ impl AsRef<kernel::device::Device> for Device { fn as_ref(&self) -> &kernel::device::Device { let phydev = self.0.get(); // SAFETY: The struct invariant ensures that `mdio.dev` is valid. - unsafe { kernel::device::Device::as_ref(addr_of_mut!((*phydev).mdio.dev)) } + unsafe { kernel::device::Device::as_ref(&raw mut (*phydev).mdio.dev) } } } diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs index f7b2743828ae..6cb9ed1e7cbf 100644 --- a/rust/kernel/pci.rs +++ b/rust/kernel/pci.rs @@ -17,7 +17,7 @@ types::{ARef, ForeignOwnable, Opaque}, ThisModule, }; -use core::{ops::Deref, ptr::addr_of_mut}; +use core::ops::Deref; use kernel::prelude::*; /// An adapter for the registration of PCI drivers. @@ -60,7 +60,7 @@ extern "C" fn probe_callback( ) -> kernel::ffi::c_int { // SAFETY: The PCI bus only ever calls the probe callback with a valid pointer to a // `struct pci_dev`. - let dev = unsafe { device::Device::get_device(addr_of_mut!((*pdev).dev)) }; + let dev = unsafe { device::Device::get_device(&raw mut (*pdev).dev) }; // SAFETY: `dev` is guaranteed to be embedded in a valid `struct pci_dev` by the call // above. let mut pdev = unsafe { Device::from_dev(dev) }; diff --git a/rust/kernel/platform.rs b/rust/kernel/platform.rs index 1297f5292ba9..344875ad7b82 100644 --- a/rust/kernel/platform.rs +++ b/rust/kernel/platform.rs @@ -14,8 +14,6 @@ ThisModule, }; -use core::ptr::addr_of_mut; - /// An adapter for the registration of platform drivers. pub struct Adapter<T: Driver>(T); @@ -55,7 +53,7 @@ unsafe fn unregister(pdrv: &Opaque<Self::RegType>) { impl<T: Driver + 'static> Adapter<T> { extern "C" fn probe_callback(pdev: *mut bindings::platform_device) -> kernel::ffi::c_int { // SAFETY: The platform bus only ever calls the probe callback with a valid `pdev`. - let dev = unsafe { device::Device::get_device(addr_of_mut!((*pdev).dev)) }; + let dev = unsafe { device::Device::get_device(&raw mut (*pdev).dev) }; // SAFETY: `dev` is guaranteed to be embedded in a valid `struct platform_device` by the // call above. let mut pdev = unsafe { Device::from_dev(dev) }; diff --git a/rust/kernel/rbtree.rs b/rust/kernel/rbtree.rs index 1ea25c7092fb..b0ad35663cb0 100644 --- a/rust/kernel/rbtree.rs +++ b/rust/kernel/rbtree.rs @@ -11,7 +11,7 @@ cmp::{Ord, Ordering}, marker::PhantomData, mem::MaybeUninit, - ptr::{addr_of_mut, from_mut, NonNull}, + ptr::{from_mut, NonNull}, }; /// A red-black tree with owned nodes. @@ -238,7 +238,7 @@ pub fn values_mut(&mut self) -> impl Iterator<Item = &'_ mut V> { /// Returns a cursor over the tree nodes, starting with the smallest key. pub fn cursor_front(&mut self) -> Option<Cursor<'_, K, V>> { - let root = addr_of_mut!(self.root); + let root = &raw mut self.root; // SAFETY: `self.root` is always a valid root node let current = unsafe { bindings::rb_first(root) }; NonNull::new(current).map(|current| { @@ -253,7 +253,7 @@ pub fn cursor_front(&mut self) -> Option<Cursor<'_, K, V>> { /// Returns a cursor over the tree nodes, starting with the largest key. pub fn cursor_back(&mut self) -> Option<Cursor<'_, K, V>> { - let root = addr_of_mut!(self.root); + let root = &raw mut self.root; // SAFETY: `self.root` is always a valid root node let current = unsafe { bindings::rb_last(root) }; NonNull::new(current).map(|current| { @@ -459,7 +459,7 @@ pub fn cursor_lower_bound(&mut self, key: &K) -> Option<Cursor<'_, K, V>> let best = best_match?; // SAFETY: `best` is a non-null node so it is valid by the type invariants. - let links = unsafe { addr_of_mut!((*best.as_ptr()).links) }; + let links = unsafe { &raw mut (*best.as_ptr()).links }; NonNull::new(links).map(|current| { // INVARIANT: @@ -767,7 +767,7 @@ pub fn remove_current(self) -> (Option<Self>, RBTreeNode<K, V>) { let node = RBTreeNode { node }; // SAFETY: The reference to the tree used to create the cursor outlives the cursor, so // the tree cannot change. By the tree invariant, all nodes are valid. - unsafe { bindings::rb_erase(&mut (*this).links, addr_of_mut!(self.tree.root)) }; + unsafe { bindings::rb_erase(&mut (*this).links, &raw mut self.tree.root) }; let current = match (prev, next) { (_, Some(next)) => next, @@ -803,7 +803,7 @@ fn remove_neighbor(&mut self, direction: Direction) -> Option<RBTreeNode<K, V>> let neighbor = neighbor.as_ptr(); // SAFETY: The reference to the tree used to create the cursor outlives the cursor, so // the tree cannot change. By the tree invariant, all nodes are valid. - unsafe { bindings::rb_erase(neighbor, addr_of_mut!(self.tree.root)) }; + unsafe { bindings::rb_erase(neighbor, &raw mut self.tree.root) }; // SAFETY: By the type invariant of `Self`, all non-null `rb_node` pointers stored in `self` // point to the links field of `Node<K, V>` objects. let this = unsafe { container_of!(neighbor, Node<K, V>, links) }.cast_mut(); @@ -918,7 +918,7 @@ unsafe fn to_key_value_raw<'b>(node: NonNull<bindings::rb_node>) -> (&'b K, *mut let k = unsafe { &(*this).key }; // SAFETY: The passed `node` is the current node or a non-null neighbor, // thus `this` is valid by the type invariants. - let v = unsafe { addr_of_mut!((*this).value) }; + let v = unsafe { &raw mut (*this).value }; (k, v) } } @@ -1027,7 +1027,7 @@ fn next(&mut self) -> Option<Self::Item> { self.next = unsafe { bindings::rb_next(self.next) }; // SAFETY: By the same reasoning above, it is safe to dereference the node. - Some(unsafe { (addr_of_mut!((*cur).key), addr_of_mut!((*cur).value)) }) + Some(unsafe { (&raw mut (*cur).key, &raw mut (*cur).value) }) } } @@ -1170,7 +1170,7 @@ fn insert(self, node: RBTreeNode<K, V>) -> &'a mut V { // SAFETY: `node` is valid at least until we call `Box::from_raw`, which only happens when // the node is removed or replaced. - let node_links = unsafe { addr_of_mut!((*node).links) }; + let node_links = unsafe { &raw mut (*node).links }; // INVARIANT: We are linking in a new node, which is valid. It remains valid because we // "forgot" it with `Box::into_raw`. @@ -1178,7 +1178,7 @@ fn insert(self, node: RBTreeNode<K, V>) -> &'a mut V { unsafe { bindings::rb_link_node(node_links, self.parent, self.child_field_of_parent) }; // SAFETY: All pointers are valid. `node` has just been inserted into the tree. - unsafe { bindings::rb_insert_color(node_links, addr_of_mut!((*self.rbtree).root)) }; + unsafe { bindings::rb_insert_color(node_links, &raw mut (*self.rbtree).root) }; // SAFETY: The node is valid until we remove it from the tree. unsafe { &mut (*node).value } @@ -1261,7 +1261,7 @@ fn replace(self, node: RBTreeNode<K, V>) -> RBTreeNode<K, V> { // SAFETY: `node` is valid at least until we call `Box::from_raw`, which only happens when // the node is removed or replaced. - let new_node_links = unsafe { addr_of_mut!((*node).links) }; + let new_node_links = unsafe { &raw mut (*node).links }; // SAFETY: This updates the pointers so that `new_node_links` is in the tree where // `self.node_links` used to be. diff --git a/rust/kernel/sync/arc.rs b/rust/kernel/sync/arc.rs index 3cefda7a4372..81d8b0f84957 100644 --- a/rust/kernel/sync/arc.rs +++ b/rust/kernel/sync/arc.rs @@ -243,7 +243,7 @@ pub fn into_raw(self) -> *const T { let ptr = self.ptr.as_ptr(); core::mem::forget(self); // SAFETY: The pointer is valid. - unsafe { core::ptr::addr_of!((*ptr).data) } + unsafe { &raw const (*ptr).data } } /// Recreates an [`Arc`] instance previously deconstructed via [`Arc::into_raw`]. diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs index 49012e711942..b2ac768eed23 100644 --- a/rust/kernel/task.rs +++ b/rust/kernel/task.rs @@ -257,7 +257,7 @@ pub fn as_ptr(&self) -> *mut bindings::task_struct { pub fn group_leader(&self) -> &Task { // SAFETY: The group leader of a task never changes after initialization, so reading this // field is not a data race. - let ptr = unsafe { *ptr::addr_of!((*self.as_ptr()).group_leader) }; + let ptr = unsafe { *(&raw const (*self.as_ptr()).group_leader) }; // SAFETY: The lifetime of the returned task reference is tied to the lifetime of `self`, // and given that a task has a reference to its group leader, we know it must be valid for @@ -269,7 +269,7 @@ pub fn group_leader(&self) -> &Task { pub fn pid(&self) -> Pid { // SAFETY: The pid of a task never changes after initialization, so reading this field is // not a data race. - unsafe { *ptr::addr_of!((*self.as_ptr()).pid) } + unsafe { *(&raw const (*self.as_ptr()).pid) } } /// Returns the UID of the given task. diff --git a/rust/kernel/workqueue.rs b/rust/kernel/workqueue.rs index 0cd100d2aefb..34e8abb38974 100644 --- a/rust/kernel/workqueue.rs +++ b/rust/kernel/workqueue.rs @@ -401,9 +401,9 @@ pub fn new(name: &'static CStr, key: &'static LockClassKey) -> impl PinInit<Self pub unsafe fn raw_get(ptr: *const Self) -> *mut bindings::work_struct { // SAFETY: The caller promises that the pointer is aligned and not dangling. // - // A pointer cast would also be ok due to `#[repr(transparent)]`. We use `addr_of!` so that - // the compiler does not complain that the `work` field is unused. - unsafe { Opaque::raw_get(core::ptr::addr_of!((*ptr).work)) } + // A pointer cast would also be ok due to `#[repr(transparent)]`. We use `&raw const (*ptr).work` + // so that the compiler does not complain that the `work` field is unused. + unsafe { Opaque::raw_get(&raw const (*ptr).work) } } } @@ -510,7 +510,7 @@ macro_rules! impl_has_work { unsafe fn raw_get_work(ptr: *mut Self) -> *mut $crate::workqueue::Work<$work_type $(, $id)?> { // SAFETY: The caller promises that the pointer is not dangling. unsafe { - ::core::ptr::addr_of_mut!((*ptr).$field) + &raw mut (*ptr).$field } } } -- 2.48.1

7 months, 2 weeks

3
6
0 0

[PATCH v2] selftests/mm/cow: Fix the incorrect error handling

by Cyan Yang

There is an error handling did not check the correct return value. This patch will fix it. Fixes: f4b5fd6946e244cdedc3bbb9a1f24c8133b2077a ("selftests/vm: anon_cow: THP tests") Signed-off-by: Cyan Yang <cyan.yang(a)sifive.com> --- tools/testing/selftests/mm/cow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index 9446673645eb..f0cb14ea8608 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -876,7 +876,7 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) mremap_size = thpsize / 2; mremap_mem = mmap(NULL, mremap_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (mem == MAP_FAILED) { + if (mremap_mem == MAP_FAILED) { ksft_test_result_fail("mmap() failed\n"); goto munmap; } -- 2.39.5 (Apple Git-154)

7 months, 2 weeks

4
3
0 0

[PATCH v3 0/6] rust: reduce pointer casts, enable related lints

by Tamir Duberstein

This started with a patch that enabled `clippy::ptr_as_ptr`. Benno Lossin suggested I also look into `clippy::ptr_cast_constness` and I discovered `clippy::as_ptr_cast_mut`. This series now enables all 3 lints. It also enables `clippy::as_underscore` which ensures other pointer casts weren't missed. The first commit reduces the need for pointer casts and is shared with another series[1]. The final patch also enables pointer provenance lints and fixes violations. See that commit message for details. The build system portion of that commit is pretty messy but I couldn't find a better way to convincingly ensure that these lints were applied globally. Suggestions would be very welcome. Link: https://lore.kernel.org/all/20250307-no-offset-v1-0-0c728f63b69c@gmail.com/ [1] Signed-off-by: Tamir Duberstein <tamird(a)gmail.com> --- Changes in v3: - Fixed clippy warning in rust/kernel/firmware.rs. (kernel test robot) Link: https://lore.kernel.org/all/202503120332.YTCpFEvv-lkp@intel.com/ - s/as u64/as bindings::phys_addr_t/g. (Benno Lossin) - Use strict provenance APIs and enable lints. (Benno Lossin) - Link to v2: https://lore.kernel.org/r/20250309-ptr-as-ptr-v2-0-25d60ad922b7@gmail.com Changes in v2: - Fixed typo in first commit message. - Added additional patches, converted to series. - Link to v1: https://lore.kernel.org/r/20250307-ptr-as-ptr-v1-1-582d06514c98@gmail.com --- Tamir Duberstein (6): rust: retain pointer mut-ness in `container_of!` rust: enable `clippy::ptr_as_ptr` lint rust: enable `clippy::ptr_cast_constness` lint rust: enable `clippy::as_ptr_cast_mut` lint rust: enable `clippy::as_underscore` lint rust: use strict provenance APIs Makefile | 13 ++++++++++++- init/Kconfig | 3 +++ rust/Makefile | 26 ++++++++++++++++++++------ rust/bindings/lib.rs | 1 + rust/kernel/alloc.rs | 2 +- rust/kernel/alloc/allocator_test.rs | 2 +- rust/kernel/alloc/kvec.rs | 4 ++-- rust/kernel/block/mq/operations.rs | 2 +- rust/kernel/block/mq/request.rs | 7 ++++--- rust/kernel/device.rs | 5 +++-- rust/kernel/device_id.rs | 2 +- rust/kernel/devres.rs | 19 ++++++++++--------- rust/kernel/error.rs | 2 +- rust/kernel/firmware.rs | 3 ++- rust/kernel/fs/file.rs | 2 +- rust/kernel/io.rs | 16 ++++++++-------- rust/kernel/kunit.rs | 15 +++++++-------- rust/kernel/lib.rs | 25 ++++++++++++++++++++++--- rust/kernel/list/impl_list_item_mod.rs | 2 +- rust/kernel/miscdevice.rs | 2 +- rust/kernel/of.rs | 6 +++--- rust/kernel/pci.rs | 15 +++++++++------ rust/kernel/platform.rs | 6 ++++-- rust/kernel/print.rs | 11 +++++------ rust/kernel/rbtree.rs | 23 ++++++++++------------- rust/kernel/seq_file.rs | 3 ++- rust/kernel/str.rs | 18 +++++++----------- rust/kernel/sync/poll.rs | 2 +- rust/kernel/uaccess.rs | 12 ++++++++---- rust/kernel/workqueue.rs | 12 ++++++------ rust/uapi/lib.rs | 1 + scripts/Makefile.build | 2 +- scripts/Makefile.host | 4 ++++ 33 files changed, 163 insertions(+), 105 deletions(-) --- base-commit: a1eb95d6b5f4cf5cc7b081e85e374d1dd98a213b change-id: 20250307-ptr-as-ptr-21b1867fc4d4 Best regards, -- Tamir Duberstein <tamird(a)gmail.com>

7 months, 2 weeks

5
19
0 0

[PATCH RFC 8/8] selftests/sched_ext: Add test for sched_ext dl_server

by Joel Fernandes

From: Andrea Righi <arighi(a)nvidia.com> Add a selftest to validate the correct behavior of the deadline server for the ext_sched_class. [ Joel: Replaced occurences of CFS in the test with EXT. ] Signed-off-by: Joel Fernandes <joelagnelf(a)nvidia.com> Signed-off-by: Andrea Righi <arighi(a)nvidia.com> --- tools/testing/selftests/sched_ext/Makefile | 1 + .../selftests/sched_ext/rt_stall.bpf.c | 23 ++ tools/testing/selftests/sched_ext/rt_stall.c | 213 ++++++++++++++++++ 3 files changed, 237 insertions(+) create mode 100644 tools/testing/selftests/sched_ext/rt_stall.bpf.c create mode 100644 tools/testing/selftests/sched_ext/rt_stall.c diff --git a/tools/testing/selftests/sched_ext/Makefile b/tools/testing/selftests/sched_ext/Makefile index 011762224600..802e3d8d038f 100644 --- a/tools/testing/selftests/sched_ext/Makefile +++ b/tools/testing/selftests/sched_ext/Makefile @@ -180,6 +180,7 @@ auto-test-targets := \ select_cpu_dispatch_bad_dsq \ select_cpu_dispatch_dbl_dsp \ select_cpu_vtime \ + rt_stall \ test_example \ testcase-targets := $(addsuffix .o,$(addprefix $(SCXOBJ_DIR)/,$(auto-test-targets))) diff --git a/tools/testing/selftests/sched_ext/rt_stall.bpf.c b/tools/testing/selftests/sched_ext/rt_stall.bpf.c new file mode 100644 index 000000000000..80086779dd1e --- /dev/null +++ b/tools/testing/selftests/sched_ext/rt_stall.bpf.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * A scheduler that verified if RT tasks can stall SCHED_EXT tasks. + * + * Copyright (c) 2025 NVIDIA Corporation. + */ + +#include <scx/common.bpf.h> + +char _license[] SEC("license") = "GPL"; + +UEI_DEFINE(uei); + +void BPF_STRUCT_OPS(rt_stall_exit, struct scx_exit_info *ei) +{ + UEI_RECORD(uei, ei); +} + +SEC(".struct_ops.link") +struct sched_ext_ops rt_stall_ops = { + .exit = (void *)rt_stall_exit, + .name = "rt_stall", +}; diff --git a/tools/testing/selftests/sched_ext/rt_stall.c b/tools/testing/selftests/sched_ext/rt_stall.c new file mode 100644 index 000000000000..d4cb545ebfd8 --- /dev/null +++ b/tools/testing/selftests/sched_ext/rt_stall.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2025 NVIDIA Corporation. + */ +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sched.h> +#include <sys/prctl.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <time.h> +#include <linux/sched.h> +#include <signal.h> +#include <bpf/bpf.h> +#include <scx/common.h> +#include <sys/wait.h> +#include <unistd.h> +#include "rt_stall.bpf.skel.h" +#include "scx_test.h" +#include "../kselftest.h" + +#define CORE_ID 0 /* CPU to pin tasks to */ +#define RUN_TIME 5 /* How long to run the test in seconds */ + +/* Simple busy-wait function for test tasks */ +static void process_func(void) +{ + while (1) { + /* Busy wait */ + for (volatile unsigned long i = 0; i < 10000000UL; i++); + } +} + +/* Set CPU affinity to a specific core */ +static void set_affinity(int cpu) +{ + cpu_set_t mask; + + CPU_ZERO(&mask); + CPU_SET(cpu, &mask); + if (sched_setaffinity(0, sizeof(mask), &mask) != 0) { + perror("sched_setaffinity"); + exit(EXIT_FAILURE); + } +} + +/* Set task scheduling policy and priority */ +static void set_sched(int policy, int priority) +{ + struct sched_param param; + + param.sched_priority = priority; + if (sched_setscheduler(0, policy, &param) != 0) { + perror("sched_setscheduler"); + exit(EXIT_FAILURE); + } +} + +/* Get process runtime from /proc/<pid>/stat */ +static float get_process_runtime(int pid) +{ + char path[256]; + FILE *file; + long utime, stime; + int fields; + + snprintf(path, sizeof(path), "/proc/%d/stat", pid); + file = fopen(path, "r"); + if (file == NULL) { + perror("Failed to open stat file"); + return -1; + } + + /* Skip the first 13 fields and read the 14th and 15th */ + fields = fscanf(file, + "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %lu %lu", + &utime, &stime); + fclose(file); + + if (fields != 2) { + fprintf(stderr, "Failed to read stat file\n"); + return -1; + } + + /* Calculate the total time spent in the process */ + long total_time = utime + stime; + long ticks_per_second = sysconf(_SC_CLK_TCK); + float runtime_seconds = total_time * 1.0 / ticks_per_second; + + return runtime_seconds; +} + +static enum scx_test_status setup(void **ctx) +{ + struct rt_stall *skel; + + skel = rt_stall__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(rt_stall__load(skel), "Failed to load skel"); + + *ctx = skel; + + return SCX_TEST_PASS; +} + +static bool sched_stress_test(void) +{ + float cfs_runtime, rt_runtime; + int cfs_pid, rt_pid; + float expected_min_ratio = 0.04; /* 4% */ + + ksft_print_header(); + ksft_set_plan(1); + + /* Create and set up a EXT task */ + cfs_pid = fork(); + if (cfs_pid == 0) { + set_affinity(CORE_ID); + process_func(); + exit(0); + } else if (cfs_pid < 0) { + perror("fork for EXT task"); + ksft_exit_fail(); + } + + /* Create an RT task */ + rt_pid = fork(); + if (rt_pid == 0) { + set_affinity(CORE_ID); + set_sched(SCHED_FIFO, 50); + process_func(); + exit(0); + } else if (rt_pid < 0) { + perror("fork for RT task"); + ksft_exit_fail(); + } + + /* Let the processes run for the specified time */ + sleep(RUN_TIME); + + /* Get runtime for the EXT task */ + cfs_runtime = get_process_runtime(cfs_pid); + if (cfs_runtime != -1) + ksft_print_msg("Runtime of EXT task (PID %d) is %f seconds\n", cfs_pid, cfs_runtime); + else + ksft_exit_fail_msg("Error getting runtime for EXT task (PID %d)\n", cfs_pid); + + /* Get runtime for the RT task */ + rt_runtime = get_process_runtime(rt_pid); + if (rt_runtime != -1) + ksft_print_msg("Runtime of RT task (PID %d) is %f seconds\n", rt_pid, rt_runtime); + else + ksft_exit_fail_msg("Error getting runtime for RT task (PID %d)\n", rt_pid); + + /* Kill the processes */ + kill(cfs_pid, SIGKILL); + kill(rt_pid, SIGKILL); + waitpid(cfs_pid, NULL, 0); + waitpid(rt_pid, NULL, 0); + + /* Verify that the scx task got enough runtime */ + float actual_ratio = cfs_runtime / (cfs_runtime + rt_runtime); + ksft_print_msg("EXT task got %.2f%% of total runtime\n", actual_ratio * 100); + + if (actual_ratio >= expected_min_ratio) { + ksft_test_result_pass("PASS: EXT task got more than %.2f%% of runtime\n", + expected_min_ratio * 100); + return true; + } else { + ksft_test_result_fail("FAIL: EXT task got less than %.2f%% of runtime\n", + expected_min_ratio * 100); + return false; + } +} + +static enum scx_test_status run(void *ctx) +{ + struct rt_stall *skel = ctx; + struct bpf_link *link; + bool res; + + link = bpf_map__attach_struct_ops(skel->maps.rt_stall_ops); + SCX_FAIL_IF(!link, "Failed to attach scheduler"); + + res = sched_stress_test(); + + SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_NONE)); + bpf_link__destroy(link); + + if (!res) + ksft_exit_fail(); + + return SCX_TEST_PASS; +} + +static void cleanup(void *ctx) +{ + struct rt_stall *skel = ctx; + + rt_stall__destroy(skel); +} + +struct scx_test rt_stall = { + .name = "rt_stall", + .description = "Verify that RT tasks cannot stall SCHED_EXT tasks", + .setup = setup, + .run = run, + .cleanup = cleanup, +}; +REGISTER_SCX_TEST(&rt_stall) -- 2.43.0

7 months, 2 weeks

1
0
0 0

[PATCH 0/4] ntsync: some small fixes for doc and selftests

by Su Hui

There are four small fixes for ntsync test and doc. I divided these into four different patches due to different types of errors. If one patch is better, I can do it too. Su Hui (4): selftests: ntsync: fix the wrong condition in wake_all selftests: ntsync: avoid possible overflow in 32-bit machine selftests: ntsync: update config docs: ntsync: update NTSYNC_IOC_* Documentation/userspace-api/ntsync.rst | 18 +++++++++--------- tools/testing/selftests/drivers/ntsync/config | 2 +- .../testing/selftests/drivers/ntsync/ntsync.c | 6 +++--- 3 files changed, 13 insertions(+), 13 deletions(-) -- 2.30.2

7 months, 2 weeks

3
7
0 0

[PATCH v3 00/10] selftests/mm: Some cleanups from trying to run them

by Brendan Jackman

I never had much luck running mm selftests so I spent a few hours digging into why. Looks like most of the reason is missing SKIP checks, so this series is just adding a bunch of those that I found. I did not do anything like all of them, just the ones I spotted in gup_longterm, gup_test, mmap, userfaultfd and memfd_secret. It's a bit unfortunate to have to skip those tests when ftruncate() fails, but I don't have time to dig deep enough into it to actually make them pass. I have observed the issue on 9pfs and heard rumours that NFS has a similar problem. I'm now able to run these test groups successfully: - mmap - gup_test - compaction - migration - page_frag - userfaultfd Signed-off-by: Brendan Jackman <jackmanb(a)google.com> --- Changes in v3: - Added fix for userfaultfd tests. - Dropped attempts to use sudo. - Fixed garbage printf in uffd-stress. (Added EXTRA_CFLAGS=-Werror FORCE_TARGETS=1 to my scripts to prevent such errors happening again). - Fixed missing newlines in ksft_test_result_skip() calls. - Link to v2: https://lore.kernel.org/r/20250221-mm-selftests-v2-0-28c4d66383c5@google.com Changes in v2 (Thanks to Dev for the reviews): - Improve and cleanup some error messages - Add some extra SKIPs - Fix misnaming of nr_cpus variable in uffd tests - Link to v1: https://lore.kernel.org/r/20250220-mm-selftests-v1-0-9bbf57d64463@google.com --- Brendan Jackman (10): selftests/mm: Report errno when things fail in gup_longterm selftests/mm: Skip uffd-stress if userfaultfd not available selftests/mm: Skip uffd-wp-mremap if userfaultfd not available selftests/mm/uffd: Rename nr_cpus -> nr_threads selftests/mm: Print some details when uffd-stress gets bad params selftests/mm: Don't fail uffd-stress if too many CPUs selftests/mm: Skip map_populate on weird filesystems selftests/mm: Skip gup_longerm tests on weird filesystems selftests/mm: Drop unnecessary sudo usage selftests/mm: Ensure uffd-wp-mremap gets pages of each size tools/testing/selftests/mm/gup_longterm.c | 45 ++++++++++++++++++---------- tools/testing/selftests/mm/map_populate.c | 7 +++++ tools/testing/selftests/mm/run_vmtests.sh | 25 ++++++++++++++-- tools/testing/selftests/mm/uffd-common.c | 8 ++--- tools/testing/selftests/mm/uffd-common.h | 2 +- tools/testing/selftests/mm/uffd-stress.c | 42 ++++++++++++++++---------- tools/testing/selftests/mm/uffd-unit-tests.c | 2 +- tools/testing/selftests/mm/uffd-wp-mremap.c | 5 +++- 8 files changed, 95 insertions(+), 41 deletions(-) --- base-commit: 76544811c850a1f4c055aa182b513b7a843868ea change-id: 20250220-mm-selftests-2d7d0542face Best regards, -- Brendan Jackman <jackmanb(a)google.com>

7 months, 2 weeks

4
36
0 0

[RFC PATCH 0/5] KVM: guest_memfd: support for uffd missing

by Nikita Kalyazin

This series is built on top of the v3 write syscall support [1]. With James's KVM userfault [2], it is possible to handle stage-2 faults in guest_memfd in userspace. However, KVM itself also triggers faults in guest_memfd in some cases, for example: PV interfaces like kvmclock, PV EOI and page table walking code when fetching the MMIO instruction on x86. It was agreed in the guest_memfd upstream call on 23 Jan 2025 [3] that KVM would be accessing those pages via userspace page tables. In order for such faults to be handled in userspace, guest_memfd needs to support userfaultfd. This series proposes a limited support for userfaultfd in guest_memfd: - userfaultfd support is conditional to `CONFIG_KVM_GMEM_SHARED_MEM` (as is fault support in general) - Only `page missing` event is currently supported - Userspace is supposed to respond to the event with the `write` syscall followed by `UFFDIO_CONTINUE` ioctl to unblock the faulting process. Note that we can't use `UFFDIO_COPY` here because userfaulfd code does not know how to prepare guest_memfd pages, eg remove them from direct map [4]. Not included in this series: - Proper interface for userfaultfd to recognise guest_memfd mappings - Proper handling of truncation cases after locking the page Request for comments: - Is it a sensible workflow for guest_memfd to resolve a userfault `page missing` event with `write` syscall + `UFFDIO_CONTINUE`? One of the alternatives is teaching `UFFDIO_COPY` how to deal with guest_memfd pages. - What is a way forward to make userfaultfd code aware of guest_memfd? I saw that Patrick hit a somewhat similar problem in [5] when trying to use direct map manipulation functions in KVM and was pointed by David at Elliot's guestmem library [6] that might include a shim for that. Would the library be the right place to expose required interfaces like `vma_is_gmem`? Nikita [1] https://lore.kernel.org/kvm/20250303130838.28812-1-kalyazin@amazon.com/T/ [2] https://lore.kernel.org/kvm/20250109204929.1106563-1-jthoughton@google.com/… [3] https://docs.google.com/document/d/1M6766BzdY1Lhk7LiR5IqVR8B8mG3cr-cxTxOrAo… [4] https://lore.kernel.org/kvm/20250221160728.1584559-1-roypat@amazon.co.uk/T/ [4] https://lore.kernel.org/kvm/20250221160728.1584559-1-roypat@amazon.co.uk/T/… [5] https://lore.kernel.org/kvm/20241122-guestmem-library-v5-2-450e92951a15@qui… Nikita Kalyazin (5): KVM: guest_memfd: add kvm_gmem_vma_is_gmem KVM: guest_memfd: add support for uffd missing mm: userfaultfd: allow to register userfaultfd for guest_memfd mm: userfaultfd: support continue for guest_memfd KVM: selftests: add uffd missing test for guest_memfd include/linux/userfaultfd_k.h | 9 ++ mm/userfaultfd.c | 23 ++++- .../testing/selftests/kvm/guest_memfd_test.c | 88 +++++++++++++++++++ virt/kvm/guest_memfd.c | 17 +++- virt/kvm/kvm_mm.h | 1 + 5 files changed, 136 insertions(+), 2 deletions(-) base-commit: 592e7531753dc4b711f96cd1daf808fd493d3223 -- 2.47.1

7 months, 2 weeks

3
21
0 0

[PATCH v11 00/27] riscv control-flow integrity for usermode

by Deepak Gupta

Basics and overview =================== Software with larger attack surfaces (e.g. network facing apps like databases, browsers or apps relying on browser runtimes) suffer from memory corruption issues which can be utilized by attackers to bend control flow of the program to eventually gain control (by making their payload executable). Attackers are able to perform such attacks by leveraging call-sites which rely on indirect calls or return sites which rely on obtaining return address from stack memory. To mitigate such attacks, risc-v extension zicfilp enforces that all indirect calls must land on a landing pad instruction `lpad` else cpu will raise software check exception (a new cpu exception cause code on riscv). Similarly for return flow, risc-v extension zicfiss extends architecture with - `sspush` instruction to push return address on a shadow stack - `sspopchk` instruction to pop return address from shadow stack and compare with input operand (i.e. return address on stack) - `sspopchk` to raise software check exception if comparision above was a mismatch - Protection mechanism using which shadow stack is not writeable via regular store instructions More information an details can be found at extensions github repo [1]. Equivalent to landing pad (zicfilp) on x86 is `ENDBRANCH` instruction in Intel CET [3] and branch target identification (BTI) [4] on arm. Similarly x86's Intel CET has shadow stack [5] and arm64 has guarded control stack (GCS) [6] which are very similar to risc-v's zicfiss shadow stack. x86 and arm64 support for user mode shadow stack is already in mainline. Kernel awareness for user control flow integrity ================================================ This series picks up Samuel Holland's envcfg changes [2] as well. So if those are being applied independently, they should be removed from this series. Enabling: In order to maintain compatibility and not break anything in user mode, kernel doesn't enable control flow integrity cpu extensions on binary by default. Instead exposes a prctl interface to enable, disable and lock the shadow stack or landing pad feature for a task. This allows userspace (loader) to enumerate if all objects in its address space are compiled with shadow stack and landing pad support and accordingly enable the feature. Additionally if a subsequent `dlopen` happens on a library, user mode can take a decision again to disable the feature (if incoming library is not compiled with support) OR terminate the task (if user mode policy is strict to have all objects in address space to be compiled with control flow integirty cpu feature). prctl to enable shadow stack results in allocating shadow stack from virtual memory and activating for user address space. x86 and arm64 are also following same direction due to similar reason(s). clone/fork: On clone and fork, cfi state for task is inherited by child. Shadow stack is part of virtual memory and is a writeable memory from kernel perspective (writeable via a restricted set of instructions aka shadow stack instructions) Thus kernel changes ensure that this memory is converted into read-only when fork/clone happens and COWed when fault is taken due to sspush, sspopchk or ssamoswap. In case `CLONE_VM` is specified and shadow stack is to be enabled, kernel will automatically allocate a shadow stack for that clone call. map_shadow_stack: x86 introduced `map_shadow_stack` system call to allow user space to explicitly map shadow stack memory in its address space. It is useful to allocate shadow for different contexts managed by a single thread (green threads or contexts) risc-v implements this system call as well. signal management: If shadow stack is enabled for a task, kernel performs an asynchronous control flow diversion to deliver the signal and eventually expects userspace to issue sigreturn so that original execution can be resumed. Even though resume context is prepared by kernel, it is in user space memory and is subject to memory corruption and corruption bugs can be utilized by attacker in this race window to perform arbitrary sigreturn and eventually bypass cfi mechanism. Another issue is how to ensure that cfi related state on sigcontext area is not trampled by legacy apps or apps compiled with old kernel headers. In order to mitigate control-flow hijacting, kernel prepares a token and place it on shadow stack before signal delivery and places address of token in sigcontext structure. During sigreturn, kernel obtains address of token from sigcontext struture, reads token from shadow stack and validates it and only then allow sigreturn to succeed. Compatiblity issue is solved by adopting dynamic sigcontext management introduced for vector extension. This series re-factor the code little bit to allow future sigcontext management easy (as proposed by Andy Chiu from SiFive) config and compilation: Introduce a new risc-v config option `CONFIG_RISCV_USER_CFI`. Selecting this config option picks the kernel support for user control flow integrity. This optin is presented only if toolchain has shadow stack and landing pad support. And is on purpose guarded by toolchain support. Reason being that eventually vDSO also needs to be compiled in with shadow stack and landing pad support. vDSO compile patches are not included as of now because landing pad labeling scheme is yet to settle for usermode runtime. To get more information on kernel interactions with respect to zicfilp and zicfiss, patch series adds documentation for `zicfilp` and `zicfiss` in following: Documentation/arch/riscv/zicfiss.rst Documentation/arch/riscv/zicfilp.rst How to test this series ======================= Toolchain --------- $ git clone git@github.com:sifive/riscv-gnu-toolchain.git -b cfi-dev $ riscv-gnu-toolchain/configure --prefix=<path-to-where-to-build> --with-arch=rv64gc_zicfilp_zicfiss --enable-linux --disable-gdb --with-extra-multilib-test="rv64gc_zicfilp_zicfiss-lp64d:-static" $ make -j$(nproc) Qemu ---- Get the lastest qemu $ cd qemu $ mkdir build $ cd build $ ../configure --target-list=riscv64-softmmu $ make -j$(nproc) Opensbi ------- $ git clone git@github.com:deepak0414/opensbi.git -b v6_cfi_spec_split_opensbi $ make CROSS_COMPILE=<your riscv toolchain> -j$(nproc) PLATFORM=generic Linux ----- Running defconfig is fine. CFI is enabled by default if the toolchain supports it. $ make ARCH=riscv CROSS_COMPILE=<path-to-cfi-riscv-gnu-toolchain>/build/bin/riscv64-unknown-linux-gnu- -j$(nproc) defconfig $ make ARCH=riscv CROSS_COMPILE=<path-to-cfi-riscv-gnu-toolchain>/build/bin/riscv64-unknown-linux-gnu- -j$(nproc) In case you're building your own rootfs using toolchain, please make sure you pick following patch to ensure that vDSO compiled with lpad and shadow stack. "arch/riscv: compile vdso with landing pad" Branch where above patch can be picked https://github.com/deepak0414/linux-riscv-cfi/tree/vdso_user_cfi_v6.12-rc1 Running ------- Modify your qemu command to have: -bios <path-to-cfi-opensbi>/build/platform/generic/firmware/fw_dynamic.bin -cpu rv64,zicfilp=true,zicfiss=true,zimop=true,zcmop=true vDSO related Opens (in the flux) ================================= I am listing these opens for laying out plan and what to expect in future patch sets. And of course for the sake of discussion. Shadow stack and landing pad enabling in vDSO ---------------------------------------------- vDSO must have shadow stack and landing pad support compiled in for task to have shadow stack and landing pad support. This patch series doesn't enable that (yet). Enabling shadow stack support in vDSO should be straight forward (intend to do that in next versions of patch set). Enabling landing pad support in vDSO requires some collaboration with toolchain folks to follow a single label scheme for all object binaries. This is necessary to ensure that all indirect call-sites are setting correct label and target landing pads are decorated with same label scheme. How many vDSOs --------------- Shadow stack instructions are carved out of zimop (may be operations) and if CPU doesn't implement zimop, they're illegal instructions. Kernel could be running on a CPU which may or may not implement zimop. And thus kernel will have to carry 2 different vDSOs and expose the appropriate one depending on whether CPU implements zimop or not. References ========== [1] - https://github.com/riscv/riscv-cfi [2] - https://lore.kernel.org/all/20240814081126.956287-1-samuel.holland@sifive.c… [3] - https://lwn.net/Articles/889475/ [4] - https://developer.arm.com/documentation/109576/0100/Branch-Target-Identific… [5] - https://www.intel.com/content/dam/develop/external/us/en/documents/catc17-i… [6] - https://lwn.net/Articles/940403/ --- changelog --------- v11: - patch "arch/riscv: compile vdso with landing pad" was unconditionally selecting `_zicfilp` for vDSO compile. fixed that. Changed `lpad 1` to to `lpad 0`. v10: - dropped "mm: helper `is_shadow_stack_vma` to check shadow stack vma". This patch is not that interesting to this patch series for risc-v. There are instances in arch directories where VM_SHADOW_STACK flag is anyways used. Dropping this patch to expedite merging in riscv tree. - Took suggestions from `Clement` on "riscv: zicfiss / zicfilp enumeration" to validate presence of cfi based on config. - Added a patch for vDSO to have `lpad 0`. I had omitted this earlier to make sure we add single vdso object with cfi enabled. But a vdso object with scheme of zero labeled landing pad is least common denominator and should work with all objects of zero labeled as well as function-signature labeled objects. v9: - rebased on master (39a803b754d5 fix braino in "9p: fix ->rename_sem exclusion") - dropped "mm: Introduce ARCH_HAS_USER_SHADOW_STACK" (master has it from arm64/gcs) - dropped "prctl: arch-agnostic prctl for shadow stack" (master has it from arm64/gcs) v8: - rebased on palmer/for-next - dropped samuel holland's `envcfg` context switch patches. they are in parlmer/for-next v7: - Removed "riscv/Kconfig: enable HAVE_EXIT_THREAD for riscv" Instead using `deactivate_mm` flow to clean up. see here for more context https://lore.kernel.org/all/20230908203655.543765-1-rick.p.edgecombe@intel.… - Changed the header include in `kselftest`. Hopefully this fixes compile issue faced by Zong Li at SiFive. - Cleaned up an orphaned change to `mm/mmap.c` in below patch "riscv/mm : ensure PROT_WRITE leads to VM_READ | VM_WRITE" - Lock interfaces for shadow stack and indirect branch tracking expect arg == 0 Any future evolution of this interface should accordingly define how arg should be setup. - `mm/map.c` has an instance of using `VM_SHADOW_STACK`. Fixed it to use helper `is_shadow_stack_vma`. - Link to v6: https://lore.kernel.org/r/20241008-v5_user_cfi_series-v6-0-60d9fe073f37@riv… v6: - Picked up Samuel Holland's changes as is with `envcfg` placed in `thread` instead of `thread_info` - fixed unaligned newline escapes in kselftest - cleaned up messages in kselftest and included test output in commit message - fixed a bug in clone path reported by Zong Li - fixed a build issue if CONFIG_RISCV_ISA_V is not selected (this was introduced due to re-factoring signal context management code) v5: - rebased on v6.12-rc1 - Fixed schema related issues in device tree file - Fixed some of the documentation related issues in zicfilp/ss.rst (style issues and added index) - added `SHADOW_STACK_SET_MARKER` so that implementation can define base of shadow stack. - Fixed warnings on definitions added in usercfi.h when CONFIG_RISCV_USER_CFI is not selected. - Adopted context header based signal handling as proposed by Andy Chiu - Added support for enabling kernel mode access to shadow stack using FWFT (https://github.com/riscv-non-isa/riscv-sbi-doc/blob/master/src/ext-firmware…) - Link to v5: https://lore.kernel.org/r/20241001-v5_user_cfi_series-v1-0-3ba65b6e550f@riv… (Note: I had an issue in my workflow due to which version number wasn't picked up correctly while sending out patches) v4: - rebased on 6.11-rc6 - envcfg: Converged with Samuel Holland's patches for envcfg management on per- thread basis. - vma_is_shadow_stack is renamed to is_vma_shadow_stack - picked up Mark Brown's `ARCH_HAS_USER_SHADOW_STACK` patch - signal context: using extended context management to maintain compatibility. - fixed `-Wmissing-prototypes` compiler warnings for prctl functions - Documentation fixes and amending typos. - Link to v4: https://lore.kernel.org/all/20240912231650.3740732-1-debug@rivosinc.com/ v3: - envcfg logic to pick up base envcfg had a bug where `ENVCFG_CBZE` could have been picked on per task basis, even though CPU didn't implement it. Fixed in this series. - dt-bindings As suggested, split into separate commit. fixed the messaging that spec is in public review - arch_is_shadow_stack change arch_is_shadow_stack changed to vma_is_shadow_stack - hwprobe zicfiss / zicfilp if present will get enumerated in hwprobe - selftests As suggested, added object and binary filenames to .gitignore Selftest binary anyways need to be compiled with cfi enabled compiler which will make sure that landing pad and shadow stack are enabled. Thus removed separate enable/disable tests. Cleaned up tests a bit. - Link to v3: https://lore.kernel.org/lkml/20240403234054.2020347-1-debug@rivosinc.com/ v2: - Using config `CONFIG_RISCV_USER_CFI`, kernel support for riscv control flow integrity for user mode programs can be compiled in the kernel. - Enabling of control flow integrity for user programs is left to user runtime - This patch series introduces arch agnostic `prctls` to enable shadow stack and indirect branch tracking. And implements them on riscv. --- --- Changes in v11: - EDITME: describe what is new in this series revision. - EDITME: use bulletpoints and terse descriptions. - Link to v10: https://lore.kernel.org/r/20250210-v5_user_cfi_series-v10-0-163dcfa31c60@ri… --- Andy Chiu (1): riscv: signal: abstract header saving for setup_sigcontext Clément Léger (1): riscv: Add Firmware Feature SBI extensions definitions Deepak Gupta (24): mm: VM_SHADOW_STACK definition for riscv dt-bindings: riscv: zicfilp and zicfiss in dt-bindings (extensions.yaml) riscv: zicfiss / zicfilp enumeration riscv: zicfiss / zicfilp extension csr and bit definitions riscv: usercfi state for task and save/restore of CSR_SSP on trap entry/exit riscv/mm : ensure PROT_WRITE leads to VM_READ | VM_WRITE riscv mm: manufacture shadow stack pte riscv mmu: teach pte_mkwrite to manufacture shadow stack PTEs riscv mmu: write protect and shadow stack riscv/mm: Implement map_shadow_stack() syscall riscv/shstk: If needed allocate a new shadow stack on clone riscv: Implements arch agnostic shadow stack prctls prctl: arch-agnostic prctl for indirect branch tracking riscv/traps: Introduce software check exception riscv/signal: save and restore of shadow stack for signal riscv/kernel: update __show_regs to print shadow stack register riscv/ptrace: riscv cfi status and state via ptrace and in core files riscv/hwprobe: zicfilp / zicfiss enumeration in hwprobe riscv: enable kernel access to shadow stack memory via FWFT sbi call riscv: kernel command line option to opt out of user cfi riscv: create a config for shadow stack and landing pad instr support riscv: Documentation for landing pad / indirect branch tracking riscv: Documentation for shadow stack on riscv kselftest/riscv: kselftest for user mode cfi Jim Shu (1): arch/riscv: compile vdso with landing pad Documentation/arch/riscv/index.rst | 2 + Documentation/arch/riscv/zicfilp.rst | 115 +++++ Documentation/arch/riscv/zicfiss.rst | 176 +++++++ .../devicetree/bindings/riscv/extensions.yaml | 14 + arch/riscv/Kconfig | 20 + arch/riscv/Makefile | 7 +- arch/riscv/include/asm/asm-prototypes.h | 1 + arch/riscv/include/asm/assembler.h | 44 ++ arch/riscv/include/asm/cpufeature.h | 13 + arch/riscv/include/asm/csr.h | 16 + arch/riscv/include/asm/entry-common.h | 2 + arch/riscv/include/asm/hwcap.h | 2 + arch/riscv/include/asm/mman.h | 25 + arch/riscv/include/asm/mmu_context.h | 7 + arch/riscv/include/asm/pgtable.h | 30 +- arch/riscv/include/asm/processor.h | 2 + arch/riscv/include/asm/sbi.h | 26 + arch/riscv/include/asm/thread_info.h | 3 + arch/riscv/include/asm/usercfi.h | 89 ++++ arch/riscv/include/asm/vector.h | 3 + arch/riscv/include/uapi/asm/hwprobe.h | 2 + arch/riscv/include/uapi/asm/ptrace.h | 22 + arch/riscv/include/uapi/asm/sigcontext.h | 1 + arch/riscv/kernel/Makefile | 1 + arch/riscv/kernel/asm-offsets.c | 8 + arch/riscv/kernel/cpufeature.c | 13 + arch/riscv/kernel/entry.S | 31 +- arch/riscv/kernel/head.S | 12 + arch/riscv/kernel/process.c | 26 +- arch/riscv/kernel/ptrace.c | 83 ++++ arch/riscv/kernel/signal.c | 142 +++++- arch/riscv/kernel/sys_hwprobe.c | 2 + arch/riscv/kernel/sys_riscv.c | 10 + arch/riscv/kernel/traps.c | 43 ++ arch/riscv/kernel/usercfi.c | 524 +++++++++++++++++++++ arch/riscv/kernel/vdso/Makefile | 12 + arch/riscv/kernel/vdso/flush_icache.S | 4 + arch/riscv/kernel/vdso/getcpu.S | 4 + arch/riscv/kernel/vdso/rt_sigreturn.S | 4 + arch/riscv/kernel/vdso/sys_hwprobe.S | 4 + arch/riscv/mm/init.c | 2 +- arch/riscv/mm/pgtable.c | 17 + include/linux/cpu.h | 4 + include/linux/mm.h | 7 + include/uapi/linux/elf.h | 1 + include/uapi/linux/prctl.h | 27 ++ kernel/sys.c | 30 ++ tools/testing/selftests/riscv/Makefile | 2 +- tools/testing/selftests/riscv/cfi/.gitignore | 3 + tools/testing/selftests/riscv/cfi/Makefile | 10 + tools/testing/selftests/riscv/cfi/cfi_rv_test.h | 84 ++++ tools/testing/selftests/riscv/cfi/riscv_cfi_test.c | 78 +++ tools/testing/selftests/riscv/cfi/shadowstack.c | 375 +++++++++++++++ tools/testing/selftests/riscv/cfi/shadowstack.h | 37 ++ 54 files changed, 2193 insertions(+), 29 deletions(-) --- base-commit: 39a803b754d5224a3522016b564113ee1e4091b2 change-id: 20240930-v5_user_cfi_series-3dc332f8f5b2 -- - debug

7 months, 2 weeks

2
47
0 0

[PATCH] rust: enable `clippy::ptr_as_ptr` lint

by Tamir Duberstein

In Rust 1.51.0, Clippy introduced the `ignored_unit_patterns` lint [1]: > Though `as` casts between raw pointers are not terrible, > `pointer::cast` is safer because it cannot accidentally change the > pointer's mutability, nor cast the pointer to other types like `usize`. There are a few classes of changes required: - Modules generated by bindgen are marked `#[allow(clippy::ptr_as_ptr)]`. - Inferred casts (` as _`) are replaced with `.cast()`. - Ascribed casts (` as *... T`) are replaced with `.cast::<T>()`. - Multistep casts from references (` as *const _ as *const T`) are replaced with `let x: *const _ = &x;` and `.cast()` or `.cast::<T>()` according to the previous rules. The intermediate `let` binding is required because `(x as *const _).cast::<T>()` results in inference failure. - Native literal C strings are replaced with `c_str!().as_char_ptr()`. Apply these changes and enable the lint -- no functional change intended. Link: https://rust-lang.github.io/rust-clippy/master/index.html#ptr_as_ptr [1] Signed-off-by: Tamir Duberstein <tamird(a)gmail.com> --- Makefile | 1 + rust/bindings/lib.rs | 1 + rust/kernel/alloc/allocator_test.rs | 2 +- rust/kernel/alloc/kvec.rs | 4 ++-- rust/kernel/device.rs | 5 +++-- rust/kernel/devres.rs | 2 +- rust/kernel/error.rs | 2 +- rust/kernel/fs/file.rs | 2 +- rust/kernel/kunit.rs | 15 +++++++-------- rust/kernel/lib.rs | 4 ++-- rust/kernel/list/impl_list_item_mod.rs | 2 +- rust/kernel/pci.rs | 2 +- rust/kernel/platform.rs | 4 +++- rust/kernel/print.rs | 11 +++++------ rust/kernel/seq_file.rs | 3 ++- rust/kernel/str.rs | 2 +- rust/kernel/sync/poll.rs | 2 +- rust/kernel/workqueue.rs | 10 +++++----- rust/uapi/lib.rs | 1 + 19 files changed, 40 insertions(+), 35 deletions(-) diff --git a/Makefile b/Makefile index 70bdbf2218fc..ec8efc8e23ba 100644 --- a/Makefile +++ b/Makefile @@ -483,6 +483,7 @@ export rust_common_flags := --edition=2021 \ -Wclippy::needless_continue \ -Aclippy::needless_lifetimes \ -Wclippy::no_mangle_with_rust_abi \ + -Wclippy::ptr_as_ptr \ -Wclippy::undocumented_unsafe_blocks \ -Wclippy::unnecessary_safety_comment \ -Wclippy::unnecessary_safety_doc \ diff --git a/rust/bindings/lib.rs b/rust/bindings/lib.rs index 014af0d1fc70..0486a32ed314 100644 --- a/rust/bindings/lib.rs +++ b/rust/bindings/lib.rs @@ -25,6 +25,7 @@ )] #[allow(dead_code)] +#[allow(clippy::ptr_as_ptr)] #[allow(clippy::undocumented_unsafe_blocks)] mod bindings_raw { // Manual definition for blocklisted types. diff --git a/rust/kernel/alloc/allocator_test.rs b/rust/kernel/alloc/allocator_test.rs index c37d4c0c64e9..8017aa9d5213 100644 --- a/rust/kernel/alloc/allocator_test.rs +++ b/rust/kernel/alloc/allocator_test.rs @@ -82,7 +82,7 @@ unsafe fn realloc( // SAFETY: Returns either NULL or a pointer to a memory allocation that satisfies or // exceeds the given size and alignment requirements. - let dst = unsafe { libc_aligned_alloc(layout.align(), layout.size()) } as *mut u8; + let dst = unsafe { libc_aligned_alloc(layout.align(), layout.size()) }.cast::<u8>(); let dst = NonNull::new(dst).ok_or(AllocError)?; if flags.contains(__GFP_ZERO) { diff --git a/rust/kernel/alloc/kvec.rs b/rust/kernel/alloc/kvec.rs index ae9d072741ce..c12844764671 100644 --- a/rust/kernel/alloc/kvec.rs +++ b/rust/kernel/alloc/kvec.rs @@ -262,7 +262,7 @@ pub fn spare_capacity_mut(&mut self) -> &mut [MaybeUninit<T>] { // - `self.len` is smaller than `self.capacity` and hence, the resulting pointer is // guaranteed to be part of the same allocated object. // - `self.len` can not overflow `isize`. - let ptr = unsafe { self.as_mut_ptr().add(self.len) } as *mut MaybeUninit<T>; + let ptr = unsafe { self.as_mut_ptr().add(self.len) }.cast::<MaybeUninit<T>>(); // SAFETY: The memory between `self.len` and `self.capacity` is guaranteed to be allocated // and valid, but uninitialized. @@ -554,7 +554,7 @@ fn drop(&mut self) { // - `ptr` points to memory with at least a size of `size_of::<T>() * len`, // - all elements within `b` are initialized values of `T`, // - `len` does not exceed `isize::MAX`. - unsafe { Vec::from_raw_parts(ptr as _, len, len) } + unsafe { Vec::from_raw_parts(ptr.cast(), len, len) } } } diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs index db2d9658ba47..9e500498835d 100644 --- a/rust/kernel/device.rs +++ b/rust/kernel/device.rs @@ -168,16 +168,17 @@ pub fn pr_dbg(&self, args: fmt::Arguments<'_>) { /// `KERN_*`constants, for example, `KERN_CRIT`, `KERN_ALERT`, etc. #[cfg_attr(not(CONFIG_PRINTK), allow(unused_variables))] unsafe fn printk(&self, klevel: &[u8], msg: fmt::Arguments<'_>) { + let msg: *const _ = &msg; // SAFETY: `klevel` is null-terminated and one of the kernel constants. `self.as_raw` // is valid because `self` is valid. The "%pA" format string expects a pointer to // `fmt::Arguments`, which is what we're passing as the last argument. #[cfg(CONFIG_PRINTK)] unsafe { bindings::_dev_printk( - klevel as *const _ as *const crate::ffi::c_char, + klevel.as_ptr().cast::<crate::ffi::c_char>(), self.as_raw(), c_str!("%pA").as_char_ptr(), - &msg as *const _ as *const crate::ffi::c_void, + msg.cast::<crate::ffi::c_void>(), ) }; } diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs index 942376f6f3af..3a9d998ec371 100644 --- a/rust/kernel/devres.rs +++ b/rust/kernel/devres.rs @@ -157,7 +157,7 @@ fn remove_action(this: &Arc<Self>) { #[allow(clippy::missing_safety_doc)] unsafe extern "C" fn devres_callback(ptr: *mut kernel::ffi::c_void) { - let ptr = ptr as *mut DevresInner<T>; + let ptr = ptr.cast::<DevresInner<T>>(); // Devres owned this memory; now that we received the callback, drop the `Arc` and hence the // reference. // SAFETY: Safe, since we leaked an `Arc` reference to devm_add_action() in diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs index f6ecf09cb65f..8654d52b0bb9 100644 --- a/rust/kernel/error.rs +++ b/rust/kernel/error.rs @@ -152,7 +152,7 @@ pub(crate) fn to_blk_status(self) -> bindings::blk_status_t { /// Returns the error encoded as a pointer. pub fn to_ptr<T>(self) -> *mut T { // SAFETY: `self.0` is a valid error due to its invariant. - unsafe { bindings::ERR_PTR(self.0.get() as _) as *mut _ } + unsafe { bindings::ERR_PTR(self.0.get() as _).cast() } } /// Returns a string representing the error, if one exists. diff --git a/rust/kernel/fs/file.rs b/rust/kernel/fs/file.rs index e03dbe14d62a..8936afc234a4 100644 --- a/rust/kernel/fs/file.rs +++ b/rust/kernel/fs/file.rs @@ -364,7 +364,7 @@ fn deref(&self) -> &LocalFile { // // By the type invariants, there are no `fdget_pos` calls that did not take the // `f_pos_lock` mutex. - unsafe { LocalFile::from_raw_file(self as *const File as *const bindings::file) } + unsafe { LocalFile::from_raw_file((self as *const Self).cast()) } } } diff --git a/rust/kernel/kunit.rs b/rust/kernel/kunit.rs index 824da0e9738a..7ed2063c1af0 100644 --- a/rust/kernel/kunit.rs +++ b/rust/kernel/kunit.rs @@ -8,19 +8,20 @@ use core::{ffi::c_void, fmt}; +#[cfg(CONFIG_PRINTK)] +use crate::c_str; + /// Prints a KUnit error-level message. /// /// Public but hidden since it should only be used from KUnit generated code. #[doc(hidden)] pub fn err(args: fmt::Arguments<'_>) { + let args: *const _ = &args; // SAFETY: The format string is null-terminated and the `%pA` specifier matches the argument we // are passing. #[cfg(CONFIG_PRINTK)] unsafe { - bindings::_printk( - c"\x013%pA".as_ptr() as _, - &args as *const _ as *const c_void, - ); + bindings::_printk(c_str!("\x013%pA").as_char_ptr(), args.cast::<c_void>()); } } @@ -29,14 +30,12 @@ pub fn err(args: fmt::Arguments<'_>) { /// Public but hidden since it should only be used from KUnit generated code. #[doc(hidden)] pub fn info(args: fmt::Arguments<'_>) { + let args: *const _ = &args; // SAFETY: The format string is null-terminated and the `%pA` specifier matches the argument we // are passing. #[cfg(CONFIG_PRINTK)] unsafe { - bindings::_printk( - c"\x016%pA".as_ptr() as _, - &args as *const _ as *const c_void, - ); + bindings::_printk(c_str!("\x016%pA").as_char_ptr(), args.cast::<c_void>()); } } diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index 7697c60b2d1a..01264e459c92 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -196,9 +196,9 @@ fn panic(info: &core::panic::PanicInfo<'_>) -> ! { #[macro_export] macro_rules! container_of { ($ptr:expr, $type:ty, $($f:tt)*) => {{ - let ptr = $ptr as *const _ as *const u8; + let ptr: *const _ = $ptr; let offset: usize = ::core::mem::offset_of!($type, $($f)*); - ptr.sub(offset) as *const $type + ptr.cast::<u8>().sub(offset).cast::<$type>() }} } diff --git a/rust/kernel/list/impl_list_item_mod.rs b/rust/kernel/list/impl_list_item_mod.rs index a0438537cee1..1f9498c1458f 100644 --- a/rust/kernel/list/impl_list_item_mod.rs +++ b/rust/kernel/list/impl_list_item_mod.rs @@ -34,7 +34,7 @@ pub unsafe trait HasListLinks<const ID: u64 = 0> { unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut ListLinks<ID> { // SAFETY: The caller promises that the pointer is valid. The implementer promises that the // `OFFSET` constant is correct. - unsafe { (ptr as *mut u8).add(Self::OFFSET) as *mut ListLinks<ID> } + unsafe { ptr.cast::<u8>().add(Self::OFFSET).cast() } } } diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs index 4c98b5b9aa1e..206f71d33ab2 100644 --- a/rust/kernel/pci.rs +++ b/rust/kernel/pci.rs @@ -75,7 +75,7 @@ extern "C" fn probe_callback( // Let the `struct pci_dev` own a reference of the driver's private data. // SAFETY: By the type invariant `pdev.as_raw` returns a valid pointer to a // `struct pci_dev`. - unsafe { bindings::pci_set_drvdata(pdev.as_raw(), data.into_foreign() as _) }; + unsafe { bindings::pci_set_drvdata(pdev.as_raw(), data.into_foreign().cast()) }; } Err(err) => return Error::to_errno(err), } diff --git a/rust/kernel/platform.rs b/rust/kernel/platform.rs index 50e6b0421813..8f9e6b125faf 100644 --- a/rust/kernel/platform.rs +++ b/rust/kernel/platform.rs @@ -66,7 +66,9 @@ extern "C" fn probe_callback(pdev: *mut bindings::platform_device) -> kernel::ff // Let the `struct platform_device` own a reference of the driver's private data. // SAFETY: By the type invariant `pdev.as_raw` returns a valid pointer to a // `struct platform_device`. - unsafe { bindings::platform_set_drvdata(pdev.as_raw(), data.into_foreign() as _) }; + unsafe { + bindings::platform_set_drvdata(pdev.as_raw(), data.into_foreign().cast()) + }; } Err(err) => return Error::to_errno(err), } diff --git a/rust/kernel/print.rs b/rust/kernel/print.rs index b19ee490be58..0245c145ea32 100644 --- a/rust/kernel/print.rs +++ b/rust/kernel/print.rs @@ -25,7 +25,7 @@ // SAFETY: The C contract guarantees that `buf` is valid if it's less than `end`. let mut w = unsafe { RawFormatter::from_ptrs(buf.cast(), end.cast()) }; // SAFETY: TODO. - let _ = w.write_fmt(unsafe { *(ptr as *const fmt::Arguments<'_>) }); + let _ = w.write_fmt(unsafe { *ptr.cast::<fmt::Arguments<'_>>() }); w.pos().cast() } @@ -102,6 +102,7 @@ pub unsafe fn call_printk( module_name: &[u8], args: fmt::Arguments<'_>, ) { + let args: *const _ = &args; // `_printk` does not seem to fail in any path. #[cfg(CONFIG_PRINTK)] // SAFETY: TODO. @@ -109,7 +110,7 @@ pub unsafe fn call_printk( bindings::_printk( format_string.as_ptr(), module_name.as_ptr(), - &args as *const _ as *const c_void, + args.cast::<c_void>(), ); } } @@ -122,15 +123,13 @@ pub unsafe fn call_printk( #[doc(hidden)] #[cfg_attr(not(CONFIG_PRINTK), allow(unused_variables))] pub fn call_printk_cont(args: fmt::Arguments<'_>) { + let args: *const _ = &args; // `_printk` does not seem to fail in any path. // // SAFETY: The format string is fixed. #[cfg(CONFIG_PRINTK)] unsafe { - bindings::_printk( - format_strings::CONT.as_ptr(), - &args as *const _ as *const c_void, - ); + bindings::_printk(format_strings::CONT.as_ptr(), args.cast::<c_void>()); } } diff --git a/rust/kernel/seq_file.rs b/rust/kernel/seq_file.rs index 04947c672979..90545d28e6b7 100644 --- a/rust/kernel/seq_file.rs +++ b/rust/kernel/seq_file.rs @@ -31,12 +31,13 @@ pub unsafe fn from_raw<'a>(ptr: *mut bindings::seq_file) -> &'a SeqFile { /// Used by the [`seq_print`] macro. pub fn call_printf(&self, args: core::fmt::Arguments<'_>) { + let args: *const _ = &args; // SAFETY: Passing a void pointer to `Arguments` is valid for `%pA`. unsafe { bindings::seq_printf( self.inner.get(), c_str!("%pA").as_char_ptr(), - &args as *const _ as *const crate::ffi::c_void, + args.cast::<crate::ffi::c_void>(), ); } } diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs index 28e2201604d6..6a1a982b946d 100644 --- a/rust/kernel/str.rs +++ b/rust/kernel/str.rs @@ -191,7 +191,7 @@ pub unsafe fn from_char_ptr<'a>(ptr: *const crate::ffi::c_char) -> &'a Self { // to a `NUL`-terminated C string. let len = unsafe { bindings::strlen(ptr) } + 1; // SAFETY: Lifetime guaranteed by the safety precondition. - let bytes = unsafe { core::slice::from_raw_parts(ptr as _, len) }; + let bytes = unsafe { core::slice::from_raw_parts(ptr.cast(), len) }; // SAFETY: As `len` is returned by `strlen`, `bytes` does not contain interior `NUL`. // As we have added 1 to `len`, the last byte is known to be `NUL`. unsafe { Self::from_bytes_with_nul_unchecked(bytes) } diff --git a/rust/kernel/sync/poll.rs b/rust/kernel/sync/poll.rs index d5f17153b424..a151f54cde91 100644 --- a/rust/kernel/sync/poll.rs +++ b/rust/kernel/sync/poll.rs @@ -73,7 +73,7 @@ pub fn register_wait(&mut self, file: &File, cv: &PollCondVar) { // be destroyed, the destructor must run. That destructor first removes all waiters, // and then waits for an rcu grace period. Therefore, `cv.wait_queue_head` is valid for // long enough. - unsafe { qproc(file.as_ptr() as _, cv.wait_queue_head.get(), self.0.get()) }; + unsafe { qproc(file.as_ptr().cast(), cv.wait_queue_head.get(), self.0.get()) }; } } } diff --git a/rust/kernel/workqueue.rs b/rust/kernel/workqueue.rs index 0cd100d2aefb..8ff54105be3f 100644 --- a/rust/kernel/workqueue.rs +++ b/rust/kernel/workqueue.rs @@ -170,7 +170,7 @@ impl Queue { pub unsafe fn from_raw<'a>(ptr: *const bindings::workqueue_struct) -> &'a Queue { // SAFETY: The `Queue` type is `#[repr(transparent)]`, so the pointer cast is valid. The // caller promises that the pointer is not dangling. - unsafe { &*(ptr as *const Queue) } + unsafe { &*ptr.cast::<Queue>() } } /// Enqueues a work item. @@ -457,7 +457,7 @@ fn get_work_offset(&self) -> usize { #[inline] unsafe fn raw_get_work(ptr: *mut Self) -> *mut Work<T, ID> { // SAFETY: The caller promises that the pointer is valid. - unsafe { (ptr as *mut u8).add(Self::OFFSET) as *mut Work<T, ID> } + unsafe { ptr.cast::<u8>().add(Self::OFFSET).cast::<Work<T, ID>>() } } /// Returns a pointer to the struct containing the [`Work<T, ID>`] field. @@ -472,7 +472,7 @@ unsafe fn work_container_of(ptr: *mut Work<T, ID>) -> *mut Self { // SAFETY: The caller promises that the pointer points at a field of the right type in the // right kind of struct. - unsafe { (ptr as *mut u8).sub(Self::OFFSET) as *mut Self } + unsafe { ptr.cast::<u8>().sub(Self::OFFSET).cast::<Self>() } } } @@ -538,7 +538,7 @@ unsafe impl<T, const ID: u64> WorkItemPointer<ID> for Arc<T> { unsafe extern "C" fn run(ptr: *mut bindings::work_struct) { // The `__enqueue` method always uses a `work_struct` stored in a `Work<T, ID>`. - let ptr = ptr as *mut Work<T, ID>; + let ptr = ptr.cast::<Work<T, ID>>(); // SAFETY: This computes the pointer that `__enqueue` got from `Arc::into_raw`. let ptr = unsafe { T::work_container_of(ptr) }; // SAFETY: This pointer comes from `Arc::into_raw` and we've been given back ownership. @@ -591,7 +591,7 @@ unsafe impl<T, const ID: u64> WorkItemPointer<ID> for Pin<KBox<T>> { unsafe extern "C" fn run(ptr: *mut bindings::work_struct) { // The `__enqueue` method always uses a `work_struct` stored in a `Work<T, ID>`. - let ptr = ptr as *mut Work<T, ID>; + let ptr = ptr.cast::<Work<T, ID>>(); // SAFETY: This computes the pointer that `__enqueue` got from `Arc::into_raw`. let ptr = unsafe { T::work_container_of(ptr) }; // SAFETY: This pointer comes from `Arc::into_raw` and we've been given back ownership. diff --git a/rust/uapi/lib.rs b/rust/uapi/lib.rs index 13495910271f..fe9bf7b5a306 100644 --- a/rust/uapi/lib.rs +++ b/rust/uapi/lib.rs @@ -15,6 +15,7 @@ #![allow( clippy::all, clippy::undocumented_unsafe_blocks, + clippy::ptr_as_ptr, dead_code, missing_docs, non_camel_case_types, --- base-commit: ff64846bee0e7e3e7bc9363ebad3bab42dd27e24 change-id: 20250307-ptr-as-ptr-21b1867fc4d4 Best regards, -- Tamir Duberstein <tamird(a)gmail.com>

7 months, 2 weeks

5
12
0 0

[PATCH net v2 0/3] vsock/bpf: Handle races between sockmap update and connect() disconnecting

by Michal Luczaj

Signal delivery during connect() may disconnect an already established socket. Problem is that such socket might have been placed in a sockmap before the connection was closed. PATCH 1 ensures this race won't lead to an unconnected vsock staying in the sockmap. PATCH 2 selftests it. PATCH 3 fixes a related race. Note that here the race window is rather difficult to hit and I can't think of an easy way of testing it. Signed-off-by: Michal Luczaj <mhal(a)rbox.co> --- Changes in v2: - Handle one more path of tripping the warning - Add a selftest - Collect R-b [Stefano] - Link to v1: https://lore.kernel.org/r/20250307-vsock-trans-signal-race-v1-1-3aca3f771fb… --- Michal Luczaj (3): vsock/bpf: Fix EINTR connect() racing sockmap update selftest/bpf: Add test for AF_VSOCK connect() racing sockmap update vsock/bpf: Fix bpf recvmsg() racing transport reassignment net/vmw_vsock/af_vsock.c | 10 +- net/vmw_vsock/vsock_bpf.c | 24 +++-- .../selftests/bpf/prog_tests/sockmap_basic.c | 111 +++++++++++++++++++++ 3 files changed, 136 insertions(+), 9 deletions(-) --- base-commit: da9e8efe7ee10e8425dc356a9fc593502c8e3933 change-id: 20250305-vsock-trans-signal-race-d62f7718d099 Best regards, -- Michal Luczaj <mhal(a)rbox.co>

7 months, 2 weeks

1
3
0 0

[PATCH v3 00/17] riscv: add SBI FWFT misaligned exception delegation support

by Clément Léger

The SBI Firmware Feature extension allows the S-mode to request some specific features (either hardware or software) to be enabled. This series uses this extension to request misaligned access exception delegation to S-mode in order to let the kernel handle it. It also adds support for the KVM FWFT SBI extension based on the misaligned access handling infrastructure. FWFT SBI extension is part of the SBI V3.0 specifications [1]. It can be tested using the qemu provided at [2] which contains the series from [3]. kvm-unit-tests [4] can be used inside kvm to tests the correct delegation of misaligned exceptions. Upstream OpenSBI can be used. Note: Since SBI V3.0 is not yet ratified, FWFT extension API is split between interface only and implementation, allowing to pick only the interface which do not have hard dependencies on SBI. The tests can be run using the included kselftest: $ qemu-system-riscv64 \ -cpu rv64,trap-misaligned-access=true,v=true \ -M virt \ -m 1024M \ -bios fw_dynamic.bin \ -kernel Image ... # ./misaligned TAP version 13 1..23 # Starting 23 tests from 1 test cases. # RUN global.gp_load_lh ... # OK global.gp_load_lh ok 1 global.gp_load_lh # RUN global.gp_load_lhu ... # OK global.gp_load_lhu ok 2 global.gp_load_lhu # RUN global.gp_load_lw ... # OK global.gp_load_lw ok 3 global.gp_load_lw # RUN global.gp_load_lwu ... # OK global.gp_load_lwu ok 4 global.gp_load_lwu # RUN global.gp_load_ld ... # OK global.gp_load_ld ok 5 global.gp_load_ld # RUN global.gp_load_c_lw ... # OK global.gp_load_c_lw ok 6 global.gp_load_c_lw # RUN global.gp_load_c_ld ... # OK global.gp_load_c_ld ok 7 global.gp_load_c_ld # RUN global.gp_load_c_ldsp ... # OK global.gp_load_c_ldsp ok 8 global.gp_load_c_ldsp # RUN global.gp_load_sh ... # OK global.gp_load_sh ok 9 global.gp_load_sh # RUN global.gp_load_sw ... # OK global.gp_load_sw ok 10 global.gp_load_sw # RUN global.gp_load_sd ... # OK global.gp_load_sd ok 11 global.gp_load_sd # RUN global.gp_load_c_sw ... # OK global.gp_load_c_sw ok 12 global.gp_load_c_sw # RUN global.gp_load_c_sd ... # OK global.gp_load_c_sd ok 13 global.gp_load_c_sd # RUN global.gp_load_c_sdsp ... # OK global.gp_load_c_sdsp ok 14 global.gp_load_c_sdsp # RUN global.fpu_load_flw ... # OK global.fpu_load_flw ok 15 global.fpu_load_flw # RUN global.fpu_load_fld ... # OK global.fpu_load_fld ok 16 global.fpu_load_fld # RUN global.fpu_load_c_fld ... # OK global.fpu_load_c_fld ok 17 global.fpu_load_c_fld # RUN global.fpu_load_c_fldsp ... # OK global.fpu_load_c_fldsp ok 18 global.fpu_load_c_fldsp # RUN global.fpu_store_fsw ... # OK global.fpu_store_fsw ok 19 global.fpu_store_fsw # RUN global.fpu_store_fsd ... # OK global.fpu_store_fsd ok 20 global.fpu_store_fsd # RUN global.fpu_store_c_fsd ... # OK global.fpu_store_c_fsd ok 21 global.fpu_store_c_fsd # RUN global.fpu_store_c_fsdsp ... # OK global.fpu_store_c_fsdsp ok 22 global.fpu_store_c_fsdsp # RUN global.gen_sigbus ... [12797.988647] misaligned[618]: unhandled signal 7 code 0x1 at 0x0000000000014dc0 in misaligned[4dc0,10000+76000] [12797.988990] CPU: 0 UID: 0 PID: 618 Comm: misaligned Not tainted 6.13.0-rc6-00008-g4ec4468967c9-dirty #51 [12797.989169] Hardware name: riscv-virtio,qemu (DT) [12797.989264] epc : 0000000000014dc0 ra : 0000000000014d00 sp : 00007fffe165d100 [12797.989407] gp : 000000000008f6e8 tp : 0000000000095760 t0 : 0000000000000008 [12797.989544] t1 : 00000000000965d8 t2 : 000000000008e830 s0 : 00007fffe165d160 [12797.989692] s1 : 000000000000001a a0 : 0000000000000000 a1 : 0000000000000002 [12797.989831] a2 : 0000000000000000 a3 : 0000000000000000 a4 : ffffffffdeadbeef [12797.989964] a5 : 000000000008ef61 a6 : 626769735f6e0000 a7 : fffffffffffff000 [12797.990094] s2 : 0000000000000001 s3 : 00007fffe165d838 s4 : 00007fffe165d848 [12797.990238] s5 : 000000000000001a s6 : 0000000000010442 s7 : 0000000000010200 [12797.990391] s8 : 000000000000003a s9 : 0000000000094508 s10: 0000000000000000 [12797.990526] s11: 0000555567460668 t3 : 00007fffe165d070 t4 : 00000000000965d0 [12797.990656] t5 : fefefefefefefeff t6 : 0000000000000073 [12797.990756] status: 0000000200004020 badaddr: 000000000008ef61 cause: 0000000000000006 [12797.990911] Code: 8793 8791 3423 fcf4 3783 fc84 c737 dead 0713 eef7 (c398) 0001 # OK global.gen_sigbus ok 23 global.gen_sigbus # PASSED: 23 / 23 tests passed. # Totals: pass:23 fail:0 xfail:0 xpass:0 skip:0 error:0 With kvm-tools: # lkvm run -k sbi.flat -m 128 Info: # lkvm run -k sbi.flat -m 128 -c 1 --name guest-97 Info: Removed ghost socket file "/root/.lkvm//guest-97.sock". ########################################################################## # kvm-unit-tests ########################################################################## ... [test messages elided] PASS: sbi: fwft: FWFT extension probing no error PASS: sbi: fwft: get/set reserved feature 0x6 error == SBI_ERR_DENIED PASS: sbi: fwft: get/set reserved feature 0x3fffffff error == SBI_ERR_DENIED PASS: sbi: fwft: get/set reserved feature 0x80000000 error == SBI_ERR_DENIED PASS: sbi: fwft: get/set reserved feature 0xbfffffff error == SBI_ERR_DENIED PASS: sbi: fwft: misaligned_deleg: Get misaligned deleg feature no error PASS: sbi: fwft: misaligned_deleg: Set misaligned deleg feature invalid value error PASS: sbi: fwft: misaligned_deleg: Set misaligned deleg feature invalid value error PASS: sbi: fwft: misaligned_deleg: Set misaligned deleg feature value no error PASS: sbi: fwft: misaligned_deleg: Set misaligned deleg feature value 0 PASS: sbi: fwft: misaligned_deleg: Set misaligned deleg feature value no error PASS: sbi: fwft: misaligned_deleg: Set misaligned deleg feature value 1 PASS: sbi: fwft: misaligned_deleg: Verify misaligned load exception trap in supervisor SUMMARY: 50 tests, 2 unexpected failures, 12 skipped This series is available at [6]. Link: https://github.com/riscv-non-isa/riscv-sbi-doc/releases/download/vv3.0-rc2/… [1] Link: https://github.com/rivosinc/qemu/tree/dev/cleger/misaligned [2] Link: https://lore.kernel.org/all/20241211211933.198792-3-fkonrad@amd.com/T/ [3] Link: https://github.com/clementleger/kvm-unit-tests/tree/dev/cleger/fwft_v1 [4] Link: https://github.com/clementleger/unaligned_test [5] Link: https://github.com/rivosinc/linux/tree/dev/cleger/fwft_v1 [6] --- V3: - Added comment about kvm sbi fwft supported/set/get callback requirements - Move struct kvm_sbi_fwft_feature in kvm_sbi_fwft.c - Add a FWFT interface V2: - Added Kselftest for misaligned testing - Added get_user() usage instead of __get_user() - Reenable interrupt when possible in misaligned access handling - Document that riscv supports unaligned-traps - Fix KVM extension state when an init function is present - Rework SBI misaligned accesses trap delegation code - Added support for CPU hotplugging - Added KVM SBI reset callback - Added reset for KVM SBI FWFT lock - Return SBI_ERR_DENIED_LOCKED when LOCK flag is set Clément Léger (17): riscv: add Firmware Feature (FWFT) SBI extensions definitions riscv: sbi: add FWFT extension interface riscv: sbi: add SBI FWFT extension calls riscv: misaligned: request misaligned exception from SBI riscv: misaligned: use on_each_cpu() for scalar misaligned access probing riscv: misaligned: use correct CONFIG_ ifdef for misaligned_access_speed riscv: misaligned: move emulated access uniformity check in a function riscv: misaligned: add a function to check misalign trap delegability riscv: misaligned: factorize trap handling riscv: misaligned: enable IRQs while handling misaligned accesses riscv: misaligned: use get_user() instead of __get_user() Documentation/sysctl: add riscv to unaligned-trap supported archs selftests: riscv: add misaligned access testing RISC-V: KVM: add SBI extension init()/deinit() functions RISC-V: KVM: add SBI extension reset callback RISC-V: KVM: add support for FWFT SBI extension RISC-V: KVM: add support for SBI_FWFT_MISALIGNED_DELEG Documentation/admin-guide/sysctl/kernel.rst | 4 +- arch/riscv/include/asm/cpufeature.h | 8 +- arch/riscv/include/asm/kvm_host.h | 5 +- arch/riscv/include/asm/kvm_vcpu_sbi.h | 12 + arch/riscv/include/asm/kvm_vcpu_sbi_fwft.h | 31 +++ arch/riscv/include/asm/sbi.h | 38 +++ arch/riscv/include/uapi/asm/kvm.h | 1 + arch/riscv/kernel/sbi.c | 123 +++++++++ arch/riscv/kernel/traps.c | 57 ++-- arch/riscv/kernel/traps_misaligned.c | 119 +++++++- arch/riscv/kernel/unaligned_access_speed.c | 11 +- arch/riscv/kvm/Makefile | 1 + arch/riscv/kvm/vcpu.c | 7 +- arch/riscv/kvm/vcpu_sbi.c | 57 ++++ arch/riscv/kvm/vcpu_sbi_fwft.c | 251 +++++++++++++++++ arch/riscv/kvm/vcpu_sbi_sta.c | 3 +- .../selftests/riscv/misaligned/.gitignore | 1 + .../selftests/riscv/misaligned/Makefile | 12 + .../selftests/riscv/misaligned/common.S | 33 +++ .../testing/selftests/riscv/misaligned/fpu.S | 180 +++++++++++++ tools/testing/selftests/riscv/misaligned/gp.S | 103 +++++++ .../selftests/riscv/misaligned/misaligned.c | 254 ++++++++++++++++++ 22 files changed, 1264 insertions(+), 47 deletions(-) create mode 100644 arch/riscv/include/asm/kvm_vcpu_sbi_fwft.h create mode 100644 arch/riscv/kvm/vcpu_sbi_fwft.c create mode 100644 tools/testing/selftests/riscv/misaligned/.gitignore create mode 100644 tools/testing/selftests/riscv/misaligned/Makefile create mode 100644 tools/testing/selftests/riscv/misaligned/common.S create mode 100644 tools/testing/selftests/riscv/misaligned/fpu.S create mode 100644 tools/testing/selftests/riscv/misaligned/gp.S create mode 100644 tools/testing/selftests/riscv/misaligned/misaligned.c -- 2.47.2

7 months, 2 weeks

2
37
0 0

[PATCH v2 0/5] rust: reduce pointer casts, enable related lints

by Tamir Duberstein

This started with a patch that enabled `clippy::ptr_as_ptr`. Benno Lossin suggested I also look into `clippy::ptr_cast_constness` and I discovered `clippy::as_ptr_cast_mut`. This series now enables all 3 lints. It also enables `clippy::as_underscore` which ensures other pointer casts weren't missed. The first commit reduces the need for pointer casts and is shared with another series[1]. Link: https://lore.kernel.org/all/20250307-no-offset-v1-0-0c728f63b69c@gmail.com/ [1] Signed-off-by: Tamir Duberstein <tamird(a)gmail.com> --- Changes in v2: - Fixed typo in first commit message. - Added additional patches, converted to series. - Link to v1: https://lore.kernel.org/r/20250307-ptr-as-ptr-v1-1-582d06514c98@gmail.com --- Tamir Duberstein (5): rust: retain pointer mut-ness in `container_of!` rust: enable `clippy::ptr_as_ptr` lint rust: enable `clippy::ptr_cast_constness` lint rust: enable `clippy::as_ptr_cast_mut` lint rust: enable `clippy::as_underscore` lint Makefile | 4 ++++ rust/bindings/lib.rs | 1 + rust/kernel/alloc/allocator_test.rs | 2 +- rust/kernel/alloc/kvec.rs | 4 ++-- rust/kernel/block/mq/operations.rs | 2 +- rust/kernel/block/mq/request.rs | 7 ++++--- rust/kernel/device.rs | 5 +++-- rust/kernel/device_id.rs | 2 +- rust/kernel/devres.rs | 19 ++++++++++--------- rust/kernel/error.rs | 2 +- rust/kernel/firmware.rs | 2 +- rust/kernel/fs/file.rs | 2 +- rust/kernel/io.rs | 18 +++++++++--------- rust/kernel/kunit.rs | 15 +++++++-------- rust/kernel/lib.rs | 5 ++--- rust/kernel/list/impl_list_item_mod.rs | 2 +- rust/kernel/miscdevice.rs | 2 +- rust/kernel/of.rs | 6 +++--- rust/kernel/pci.rs | 13 ++++++++----- rust/kernel/platform.rs | 6 ++++-- rust/kernel/print.rs | 11 +++++------ rust/kernel/rbtree.rs | 23 ++++++++++------------- rust/kernel/seq_file.rs | 3 ++- rust/kernel/str.rs | 10 +++++----- rust/kernel/sync/poll.rs | 2 +- rust/kernel/workqueue.rs | 12 ++++++------ rust/uapi/lib.rs | 1 + 27 files changed, 95 insertions(+), 86 deletions(-) --- base-commit: ff64846bee0e7e3e7bc9363ebad3bab42dd27e24 change-id: 20250307-ptr-as-ptr-21b1867fc4d4 Best regards, -- Tamir Duberstein <tamird(a)gmail.com>

7 months, 2 weeks

3
37
0 0

Re: [PATCH 1/4] selftests: ntsync: fix the wrong condition in wake_all

by Su Hui

On 2025/3/14 18:14, Su Hui wrote: > On 2025/3/14 17:21, Dan Carpenter wrote: >> On Fri, Mar 14, 2025 at 03:14:51PM +0800, Su Hui wrote: >>> When 'manual=false' and 'signaled=true', then expected value when using >>> NTSYNC_IOC_CREATE_EVENT should be greater than zero. Fix this typo error. >>> >>> Signed-off-by: Su Hui<suhui(a)nfschina.com> >>> --- >>> tools/testing/selftests/drivers/ntsync/ntsync.c | 2 +- >>> 1 file changed, 1 insertion(+), 1 deletion(-) >>> >>> diff --git a/tools/testing/selftests/drivers/ntsync/ntsync.c b/tools/testing/selftests/drivers/ntsync/ntsync.c >>> index 3aad311574c4..bfb6fad653d0 100644 >>> --- a/tools/testing/selftests/drivers/ntsync/ntsync.c >>> +++ b/tools/testing/selftests/drivers/ntsync/ntsync.c >>> @@ -968,7 +968,7 @@ TEST(wake_all) >>> auto_event_args.manual = false; >>> auto_event_args.signaled = true; >>> objs[3] = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &auto_event_args); >>> - EXPECT_EQ(0, objs[3]); >>> + EXPECT_LE(0, objs[3]); >> It's kind of weird how these macros put the constant on the left. >> It returns an "fd" on success. So this look reasonable. It probably >> won't return the zero fd so we could probably check EXPECT_LT()? > Agreed, there are about 29 items that can be changed to EXPECT_LT(). > I can send a v2 patchset with this change if there is no more other > suggestions. Sorry for the wrong style of email:(. Su Hui

7 months, 2 weeks

1
0
0 0

[PATCH net-next] selftests: drv-net: fix merge conflicts resolution

by Matthieu Baerts (NGI0)

After the recent merge between net-next and net, I got some conflicts on my side because the merge resolution was different from Stephen's one [1] I applied on my side in the MPTCP tree. It looks like the code that is now in net-next is using the old way to retrieve the local and remote addresses. This patch is now using the new way, like what was in Stephen's email [1]. Also, in get_interface_info(), there were no conflicts in this area, because that was new code from 'net', but a small adaptation was needed there as well to get the remote address. Fixes: 941defcea7e1 ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net") Link: https://lore.kernel.org/20250311115758.17a1d414@canb.auug.org.au [1] Suggested-by: Stephen Rothwell <sfr(a)canb.auug.org.au> Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org> --- tools/testing/selftests/drivers/net/ping.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py index 7a1026a073681d159202015fc6945e91368863fe..79f07e0510ecc14d3bc2716e14f49f9381bb919f 100755 --- a/tools/testing/selftests/drivers/net/ping.py +++ b/tools/testing/selftests/drivers/net/ping.py @@ -15,18 +15,18 @@ no_sleep=False def _test_v4(cfg) -> None: cfg.require_ipver("4") - cmd(f"ping -c 1 -W0.5 {cfg.remote_v4}") - cmd(f"ping -c 1 -W0.5 {cfg.v4}", host=cfg.remote) - cmd(f"ping -s 65000 -c 1 -W0.5 {cfg.remote_v4}") - cmd(f"ping -s 65000 -c 1 -W0.5 {cfg.v4}", host=cfg.remote) + cmd("ping -c 1 -W0.5 " + cfg.remote_addr_v["4"]) + cmd("ping -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote) + cmd("ping -s 65000 -c 1 -W0.5 " + cfg.remote_addr_v["4"]) + cmd("ping -s 65000 -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote) def _test_v6(cfg) -> None: cfg.require_ipver("6") - cmd(f"ping -c 1 -W5 {cfg.remote_v6}") - cmd(f"ping -c 1 -W5 {cfg.v6}", host=cfg.remote) - cmd(f"ping -s 65000 -c 1 -W0.5 {cfg.remote_v6}") - cmd(f"ping -s 65000 -c 1 -W0.5 {cfg.v6}", host=cfg.remote) + cmd("ping -c 1 -W5 " + cfg.remote_addr_v["6"]) + cmd("ping -c 1 -W5 " + cfg.addr_v["6"], host=cfg.remote) + cmd("ping -s 65000 -c 1 -W0.5 " + cfg.remote_addr_v["6"]) + cmd("ping -s 65000 -c 1 -W0.5 " + cfg.addr_v["6"], host=cfg.remote) def _test_tcp(cfg) -> None: cfg.require_cmd("socat", remote=True) @@ -120,7 +120,7 @@ def get_interface_info(cfg) -> None: global remote_ifname global no_sleep - remote_info = cmd(f"ip -4 -o addr show to {cfg.remote_v4} | awk '{{print $2}}'", shell=True, host=cfg.remote).stdout + remote_info = cmd(f"ip -4 -o addr show to {cfg.remote_addr_v['4']} | awk '{{print $2}}'", shell=True, host=cfg.remote).stdout remote_ifname = remote_info.rstrip('\n') if remote_ifname == "": raise KsftFailEx('Can not get remote interface') --- base-commit: 941defcea7e11ad7ff8f0d4856716dd637d757dd change-id: 20250314-net-next-drv-net-ping-fix-merge-b303167fde16 Best regards, -- Matthieu Baerts (NGI0) <matttbe(a)kernel.org>

7 months, 2 weeks

3
2
0 0

[PATCH 3/3] rust: replace `addr_of[_mut]!` with `&raw [mut]`

by Antonio Hickey

Replacing all occurrences of `addr_of!(place)` with `&raw place`, and all occurrences of `addr_of_mut!(place)` with `&raw mut place`. Utilizing the new feature will allow us to reduce macro complexity, and improve consistency with existing reference syntax as `&raw`, `&raw mut` is very similar to `&`, `&mut` making it fit more naturally with other existing code. Depends on: Patch 1/3 0001-rust-enable-raw_ref_op-feature.patch Suggested-by: Benno Lossin <y86-dev(a)protonmail.com> Link: https://github.com/Rust-for-Linux/linux/issues/1148 Signed-off-by: Antonio Hickey <contact(a)antoniohickey.com> --- rust/kernel/block/mq/request.rs | 4 ++-- rust/kernel/faux.rs | 4 ++-- rust/kernel/fs/file.rs | 2 +- rust/kernel/init.rs | 8 ++++---- rust/kernel/init/macros.rs | 28 +++++++++++++------------- rust/kernel/jump_label.rs | 4 ++-- rust/kernel/kunit.rs | 4 ++-- rust/kernel/list.rs | 2 +- rust/kernel/list/impl_list_item_mod.rs | 6 +++--- rust/kernel/net/phy.rs | 4 ++-- rust/kernel/pci.rs | 4 ++-- rust/kernel/platform.rs | 4 +--- rust/kernel/rbtree.rs | 22 ++++++++++---------- rust/kernel/sync/arc.rs | 2 +- rust/kernel/task.rs | 4 ++-- rust/kernel/workqueue.rs | 8 ++++---- 16 files changed, 54 insertions(+), 56 deletions(-) diff --git a/rust/kernel/block/mq/request.rs b/rust/kernel/block/mq/request.rs index 7943f43b9575..4a5b7ec914ef 100644 --- a/rust/kernel/block/mq/request.rs +++ b/rust/kernel/block/mq/request.rs @@ -12,7 +12,7 @@ }; use core::{ marker::PhantomData, - ptr::{addr_of_mut, NonNull}, + ptr::NonNull, sync::atomic::{AtomicU64, Ordering}, }; @@ -187,7 +187,7 @@ pub(crate) fn refcount(&self) -> &AtomicU64 { pub(crate) unsafe fn refcount_ptr(this: *mut Self) -> *mut AtomicU64 { // SAFETY: Because of the safety requirements of this function, the // field projection is safe. - unsafe { addr_of_mut!((*this).refcount) } + unsafe { &raw mut (*this).refcount } } } diff --git a/rust/kernel/faux.rs b/rust/kernel/faux.rs index 5acc0c02d451..52ac554c1119 100644 --- a/rust/kernel/faux.rs +++ b/rust/kernel/faux.rs @@ -7,7 +7,7 @@ //! C header: [`include/linux/device/faux.h`] use crate::{bindings, device, error::code::*, prelude::*}; -use core::ptr::{addr_of_mut, null, null_mut, NonNull}; +use core::ptr::{null, null_mut, NonNull}; /// The registration of a faux device. /// @@ -45,7 +45,7 @@ impl AsRef<device::Device> for Registration { fn as_ref(&self) -> &device::Device { // SAFETY: The underlying `device` in `faux_device` is guaranteed by the C API to be // a valid initialized `device`. - unsafe { device::Device::as_ref(addr_of_mut!((*self.as_raw()).dev)) } + unsafe { device::Device::as_ref((&raw mut (*self.as_raw()).dev)) } } } diff --git a/rust/kernel/fs/file.rs b/rust/kernel/fs/file.rs index ed57e0137cdb..7ee4830b67f3 100644 --- a/rust/kernel/fs/file.rs +++ b/rust/kernel/fs/file.rs @@ -331,7 +331,7 @@ pub fn flags(&self) -> u32 { // SAFETY: The file is valid because the shared reference guarantees a nonzero refcount. // // FIXME(read_once): Replace with `read_once` when available on the Rust side. - unsafe { core::ptr::addr_of!((*self.as_ptr()).f_flags).read_volatile() } + unsafe { (&raw const (*self.as_ptr()).f_flags).read_volatile() } } } diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs index 7fd1ea8265a5..a8fac6558671 100644 --- a/rust/kernel/init.rs +++ b/rust/kernel/init.rs @@ -122,7 +122,7 @@ //! ```rust //! # #![expect(unreachable_pub, clippy::disallowed_names)] //! use kernel::{init, types::Opaque}; -//! use core::{ptr::addr_of_mut, marker::PhantomPinned, pin::Pin}; +//! use core::{marker::PhantomPinned, pin::Pin}; //! # mod bindings { //! # #![expect(non_camel_case_types)] //! # #![expect(clippy::missing_safety_doc)] @@ -159,7 +159,7 @@ //! unsafe { //! init::pin_init_from_closure(move |slot: *mut Self| { //! // `slot` contains uninit memory, avoid creating a reference. -//! let foo = addr_of_mut!((*slot).foo); +//! let foo = &raw mut (*slot).foo; //! //! // Initialize the `foo` //! bindings::init_foo(Opaque::raw_get(foo)); @@ -541,7 +541,7 @@ macro_rules! stack_try_pin_init { /// /// ```rust /// # use kernel::{macros::{Zeroable, pin_data}, pin_init}; -/// # use core::{ptr::addr_of_mut, marker::PhantomPinned}; +/// # use core::marker::PhantomPinned; /// #[pin_data] /// #[derive(Zeroable)] /// struct Buf { @@ -554,7 +554,7 @@ macro_rules! stack_try_pin_init { /// pin_init!(&this in Buf { /// buf: [0; 64], /// // SAFETY: TODO. -/// ptr: unsafe { addr_of_mut!((*this.as_ptr()).buf).cast() }, +/// ptr: unsafe { &raw mut (*this.as_ptr()).buf.cast() }, /// pin: PhantomPinned, /// }); /// pin_init!(Buf { diff --git a/rust/kernel/init/macros.rs b/rust/kernel/init/macros.rs index 1fd146a83241..af525fbb2f01 100644 --- a/rust/kernel/init/macros.rs +++ b/rust/kernel/init/macros.rs @@ -244,25 +244,25 @@ //! struct __InitOk; //! // This is the expansion of `t,`, which is syntactic sugar for `t: t,`. //! { -//! unsafe { ::core::ptr::write(::core::addr_of_mut!((*slot).t), t) }; +//! unsafe { ::core::ptr::write(&raw mut (*slot).t, t) }; //! } //! // Since initialization could fail later (not in this case, since the //! // error type is `Infallible`) we will need to drop this field if there //! // is an error later. This `DropGuard` will drop the field when it gets //! // dropped and has not yet been forgotten. //! let __t_guard = unsafe { -//! ::pinned_init::__internal::DropGuard::new(::core::addr_of_mut!((*slot).t)) +//! ::pinned_init::__internal::DropGuard::new(&raw mut (*slot).t) //! }; //! // Expansion of `x: 0,`: //! // Since this can be an arbitrary expression we cannot place it inside //! // of the `unsafe` block, so we bind it here. //! { //! let x = 0; -//! unsafe { ::core::ptr::write(::core::addr_of_mut!((*slot).x), x) }; +//! unsafe { ::core::ptr::write(&raw mut (*slot).x, x) }; //! } //! // We again create a `DropGuard`. //! let __x_guard = unsafe { -//! ::kernel::init::__internal::DropGuard::new(::core::addr_of_mut!((*slot).x)) +//! ::kernel::init::__internal::DropGuard::new(&raw mut (*slot).x) //! }; //! // Since initialization has successfully completed, we can now forget //! // the guards. This is not `mem::forget`, since we only have @@ -459,15 +459,15 @@ //! { //! struct __InitOk; //! { -//! unsafe { ::core::ptr::write(::core::addr_of_mut!((*slot).a), a) }; +//! unsafe { ::core::ptr::write(&raw mut (*slot).a, a) }; //! } //! let __a_guard = unsafe { -//! ::kernel::init::__internal::DropGuard::new(::core::addr_of_mut!((*slot).a)) +//! ::kernel::init::__internal::DropGuard::new(&raw mut (*slot).a) //! }; //! let init = Bar::new(36); -//! unsafe { data.b(::core::addr_of_mut!((*slot).b), b)? }; +//! unsafe { data.b(&raw mut (*slot).b, b)? }; //! let __b_guard = unsafe { -//! ::kernel::init::__internal::DropGuard::new(::core::addr_of_mut!((*slot).b)) +//! ::kernel::init::__internal::DropGuard::new(&raw mut (*slot).b) //! }; //! ::core::mem::forget(__b_guard); //! ::core::mem::forget(__a_guard); @@ -1210,7 +1210,7 @@ fn assert_zeroable<T: $crate::init::Zeroable>(_: *mut T) {} // SAFETY: `slot` is valid, because we are inside of an initializer closure, we // return when an error/panic occurs. // We also use the `data` to require the correct trait (`Init` or `PinInit`) for `$field`. - unsafe { $data.$field(::core::ptr::addr_of_mut!((*$slot).$field), init)? }; + unsafe { $data.$field(&raw mut (*$slot).$field, init)? }; // Create the drop guard: // // We rely on macro hygiene to make it impossible for users to access this local variable. @@ -1218,7 +1218,7 @@ fn assert_zeroable<T: $crate::init::Zeroable>(_: *mut T) {} ::kernel::macros::paste! { // SAFETY: We forget the guard later when initialization has succeeded. let [< __ $field _guard >] = unsafe { - $crate::init::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field)) + $crate::init::__internal::DropGuard::new(&raw mut (*$slot).$field) }; $crate::__init_internal!(init_slot($use_data): @@ -1241,7 +1241,7 @@ fn assert_zeroable<T: $crate::init::Zeroable>(_: *mut T) {} // // SAFETY: `slot` is valid, because we are inside of an initializer closure, we // return when an error/panic occurs. - unsafe { $crate::init::Init::__init(init, ::core::ptr::addr_of_mut!((*$slot).$field))? }; + unsafe { $crate::init::Init::__init(init, &raw mut (*$slot).$field)? }; // Create the drop guard: // // We rely on macro hygiene to make it impossible for users to access this local variable. @@ -1249,7 +1249,7 @@ fn assert_zeroable<T: $crate::init::Zeroable>(_: *mut T) {} ::kernel::macros::paste! { // SAFETY: We forget the guard later when initialization has succeeded. let [< __ $field _guard >] = unsafe { - $crate::init::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field)) + $crate::init::__internal::DropGuard::new(&raw mut (*$slot).$field) }; $crate::__init_internal!(init_slot(): @@ -1272,7 +1272,7 @@ fn assert_zeroable<T: $crate::init::Zeroable>(_: *mut T) {} // Initialize the field. // // SAFETY: The memory at `slot` is uninitialized. - unsafe { ::core::ptr::write(::core::ptr::addr_of_mut!((*$slot).$field), $field) }; + unsafe { ::core::ptr::write(&raw mut (*$slot).$field, $field) }; } // Create the drop guard: // @@ -1281,7 +1281,7 @@ fn assert_zeroable<T: $crate::init::Zeroable>(_: *mut T) {} ::kernel::macros::paste! { // SAFETY: We forget the guard later when initialization has succeeded. let [< __ $field _guard >] = unsafe { - $crate::init::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field)) + $crate::init::__internal::DropGuard::new(&raw mut (*$slot).$field) }; $crate::__init_internal!(init_slot($($use_data)?): diff --git a/rust/kernel/jump_label.rs b/rust/kernel/jump_label.rs index 4e974c768dbd..05d4564714c7 100644 --- a/rust/kernel/jump_label.rs +++ b/rust/kernel/jump_label.rs @@ -20,8 +20,8 @@ #[macro_export] macro_rules! static_branch_unlikely { ($key:path, $keytyp:ty, $field:ident) => {{ - let _key: *const $keytyp = ::core::ptr::addr_of!($key); - let _key: *const $crate::bindings::static_key_false = ::core::ptr::addr_of!((*_key).$field); + let _key: *const $keytyp = &raw $key; + let _key: *const $crate::bindings::static_key_false = &raw (*_key).$field; let _key: *const $crate::bindings::static_key = _key.cast(); #[cfg(not(CONFIG_JUMP_LABEL))] diff --git a/rust/kernel/kunit.rs b/rust/kernel/kunit.rs index 824da0e9738a..18357dd782ed 100644 --- a/rust/kernel/kunit.rs +++ b/rust/kernel/kunit.rs @@ -128,9 +128,9 @@ unsafe impl Sync for UnaryAssert {} unsafe { $crate::bindings::__kunit_do_failed_assertion( kunit_test, - core::ptr::addr_of!(LOCATION.0), + &raw LOCATION.0, $crate::bindings::kunit_assert_type_KUNIT_ASSERTION, - core::ptr::addr_of!(ASSERTION.0.assert), + &raw ASSERTION.0.assert, Some($crate::bindings::kunit_unary_assert_format), core::ptr::null(), ); diff --git a/rust/kernel/list.rs b/rust/kernel/list.rs index c0ed227b8a4f..e98f0820f002 100644 --- a/rust/kernel/list.rs +++ b/rust/kernel/list.rs @@ -176,7 +176,7 @@ pub fn new() -> impl PinInit<Self> { #[inline] unsafe fn fields(me: *mut Self) -> *mut ListLinksFields { // SAFETY: The caller promises that the pointer is valid. - unsafe { Opaque::raw_get(ptr::addr_of!((*me).inner)) } + unsafe { Opaque::raw_get(&raw const (*me).inner) } } /// # Safety diff --git a/rust/kernel/list/impl_list_item_mod.rs b/rust/kernel/list/impl_list_item_mod.rs index a0438537cee1..014b6713d59d 100644 --- a/rust/kernel/list/impl_list_item_mod.rs +++ b/rust/kernel/list/impl_list_item_mod.rs @@ -49,7 +49,7 @@ macro_rules! impl_has_list_links { // SAFETY: The implementation of `raw_get_list_links` only compiles if the field has the // right type. // - // The behavior of `raw_get_list_links` is not changed since the `addr_of_mut!` macro is + // The behavior of `raw_get_list_links` is not changed since the `&raw mut` op is // equivalent to the pointer offset operation in the trait definition. unsafe impl$(<$($implarg),*>)? $crate::list::HasListLinks$(<$id>)? for $self $(<$($selfarg),*>)? @@ -61,7 +61,7 @@ unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut $crate::list::ListLinks$(<$ // SAFETY: The caller promises that the pointer is not dangling. We know that this // expression doesn't follow any pointers, as the `offset_of!` invocation above // would otherwise not compile. - unsafe { ::core::ptr::addr_of_mut!((*ptr)$(.$field)*) } + unsafe { &raw mut (*ptr)$(.$field)* } } } )*}; @@ -103,7 +103,7 @@ macro_rules! impl_has_list_links_self_ptr { unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut $crate::list::ListLinks$(<$id>)? { // SAFETY: The caller promises that the pointer is not dangling. let ptr: *mut $crate::list::ListLinksSelfPtr<$item_type $(, $id)?> = - unsafe { ::core::ptr::addr_of_mut!((*ptr).$field) }; + unsafe { &raw mut (*ptr).$field }; ptr.cast() } } diff --git a/rust/kernel/net/phy.rs b/rust/kernel/net/phy.rs index a59469c785e3..757db052cc09 100644 --- a/rust/kernel/net/phy.rs +++ b/rust/kernel/net/phy.rs @@ -7,7 +7,7 @@ //! C headers: [`include/linux/phy.h`](srctree/include/linux/phy.h). use crate::{error::*, prelude::*, types::Opaque}; -use core::{marker::PhantomData, ptr::addr_of_mut}; +use core::marker::PhantomData; pub mod reg; @@ -285,7 +285,7 @@ impl AsRef<kernel::device::Device> for Device { fn as_ref(&self) -> &kernel::device::Device { let phydev = self.0.get(); // SAFETY: The struct invariant ensures that `mdio.dev` is valid. - unsafe { kernel::device::Device::as_ref(addr_of_mut!((*phydev).mdio.dev)) } + unsafe { kernel::device::Device::as_ref(&raw mut (*phydev).mdio.dev) } } } diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs index f7b2743828ae..6cb9ed1e7cbf 100644 --- a/rust/kernel/pci.rs +++ b/rust/kernel/pci.rs @@ -17,7 +17,7 @@ types::{ARef, ForeignOwnable, Opaque}, ThisModule, }; -use core::{ops::Deref, ptr::addr_of_mut}; +use core::ops::Deref; use kernel::prelude::*; /// An adapter for the registration of PCI drivers. @@ -60,7 +60,7 @@ extern "C" fn probe_callback( ) -> kernel::ffi::c_int { // SAFETY: The PCI bus only ever calls the probe callback with a valid pointer to a // `struct pci_dev`. - let dev = unsafe { device::Device::get_device(addr_of_mut!((*pdev).dev)) }; + let dev = unsafe { device::Device::get_device(&raw mut (*pdev).dev) }; // SAFETY: `dev` is guaranteed to be embedded in a valid `struct pci_dev` by the call // above. let mut pdev = unsafe { Device::from_dev(dev) }; diff --git a/rust/kernel/platform.rs b/rust/kernel/platform.rs index 1297f5292ba9..344875ad7b82 100644 --- a/rust/kernel/platform.rs +++ b/rust/kernel/platform.rs @@ -14,8 +14,6 @@ ThisModule, }; -use core::ptr::addr_of_mut; - /// An adapter for the registration of platform drivers. pub struct Adapter<T: Driver>(T); @@ -55,7 +53,7 @@ unsafe fn unregister(pdrv: &Opaque<Self::RegType>) { impl<T: Driver + 'static> Adapter<T> { extern "C" fn probe_callback(pdev: *mut bindings::platform_device) -> kernel::ffi::c_int { // SAFETY: The platform bus only ever calls the probe callback with a valid `pdev`. - let dev = unsafe { device::Device::get_device(addr_of_mut!((*pdev).dev)) }; + let dev = unsafe { device::Device::get_device(&raw mut (*pdev).dev) }; // SAFETY: `dev` is guaranteed to be embedded in a valid `struct platform_device` by the // call above. let mut pdev = unsafe { Device::from_dev(dev) }; diff --git a/rust/kernel/rbtree.rs b/rust/kernel/rbtree.rs index 1ea25c7092fb..b0ad35663cb0 100644 --- a/rust/kernel/rbtree.rs +++ b/rust/kernel/rbtree.rs @@ -11,7 +11,7 @@ cmp::{Ord, Ordering}, marker::PhantomData, mem::MaybeUninit, - ptr::{addr_of_mut, from_mut, NonNull}, + ptr::{from_mut, NonNull}, }; /// A red-black tree with owned nodes. @@ -238,7 +238,7 @@ pub fn values_mut(&mut self) -> impl Iterator<Item = &'_ mut V> { /// Returns a cursor over the tree nodes, starting with the smallest key. pub fn cursor_front(&mut self) -> Option<Cursor<'_, K, V>> { - let root = addr_of_mut!(self.root); + let root = &raw mut self.root; // SAFETY: `self.root` is always a valid root node let current = unsafe { bindings::rb_first(root) }; NonNull::new(current).map(|current| { @@ -253,7 +253,7 @@ pub fn cursor_front(&mut self) -> Option<Cursor<'_, K, V>> { /// Returns a cursor over the tree nodes, starting with the largest key. pub fn cursor_back(&mut self) -> Option<Cursor<'_, K, V>> { - let root = addr_of_mut!(self.root); + let root = &raw mut self.root; // SAFETY: `self.root` is always a valid root node let current = unsafe { bindings::rb_last(root) }; NonNull::new(current).map(|current| { @@ -459,7 +459,7 @@ pub fn cursor_lower_bound(&mut self, key: &K) -> Option<Cursor<'_, K, V>> let best = best_match?; // SAFETY: `best` is a non-null node so it is valid by the type invariants. - let links = unsafe { addr_of_mut!((*best.as_ptr()).links) }; + let links = unsafe { &raw mut (*best.as_ptr()).links }; NonNull::new(links).map(|current| { // INVARIANT: @@ -767,7 +767,7 @@ pub fn remove_current(self) -> (Option<Self>, RBTreeNode<K, V>) { let node = RBTreeNode { node }; // SAFETY: The reference to the tree used to create the cursor outlives the cursor, so // the tree cannot change. By the tree invariant, all nodes are valid. - unsafe { bindings::rb_erase(&mut (*this).links, addr_of_mut!(self.tree.root)) }; + unsafe { bindings::rb_erase(&mut (*this).links, &raw mut self.tree.root) }; let current = match (prev, next) { (_, Some(next)) => next, @@ -803,7 +803,7 @@ fn remove_neighbor(&mut self, direction: Direction) -> Option<RBTreeNode<K, V>> let neighbor = neighbor.as_ptr(); // SAFETY: The reference to the tree used to create the cursor outlives the cursor, so // the tree cannot change. By the tree invariant, all nodes are valid. - unsafe { bindings::rb_erase(neighbor, addr_of_mut!(self.tree.root)) }; + unsafe { bindings::rb_erase(neighbor, &raw mut self.tree.root) }; // SAFETY: By the type invariant of `Self`, all non-null `rb_node` pointers stored in `self` // point to the links field of `Node<K, V>` objects. let this = unsafe { container_of!(neighbor, Node<K, V>, links) }.cast_mut(); @@ -918,7 +918,7 @@ unsafe fn to_key_value_raw<'b>(node: NonNull<bindings::rb_node>) -> (&'b K, *mut let k = unsafe { &(*this).key }; // SAFETY: The passed `node` is the current node or a non-null neighbor, // thus `this` is valid by the type invariants. - let v = unsafe { addr_of_mut!((*this).value) }; + let v = unsafe { &raw mut (*this).value }; (k, v) } } @@ -1027,7 +1027,7 @@ fn next(&mut self) -> Option<Self::Item> { self.next = unsafe { bindings::rb_next(self.next) }; // SAFETY: By the same reasoning above, it is safe to dereference the node. - Some(unsafe { (addr_of_mut!((*cur).key), addr_of_mut!((*cur).value)) }) + Some(unsafe { (&raw mut (*cur).key, &raw mut (*cur).value) }) } } @@ -1170,7 +1170,7 @@ fn insert(self, node: RBTreeNode<K, V>) -> &'a mut V { // SAFETY: `node` is valid at least until we call `Box::from_raw`, which only happens when // the node is removed or replaced. - let node_links = unsafe { addr_of_mut!((*node).links) }; + let node_links = unsafe { &raw mut (*node).links }; // INVARIANT: We are linking in a new node, which is valid. It remains valid because we // "forgot" it with `Box::into_raw`. @@ -1178,7 +1178,7 @@ fn insert(self, node: RBTreeNode<K, V>) -> &'a mut V { unsafe { bindings::rb_link_node(node_links, self.parent, self.child_field_of_parent) }; // SAFETY: All pointers are valid. `node` has just been inserted into the tree. - unsafe { bindings::rb_insert_color(node_links, addr_of_mut!((*self.rbtree).root)) }; + unsafe { bindings::rb_insert_color(node_links, &raw mut (*self.rbtree).root) }; // SAFETY: The node is valid until we remove it from the tree. unsafe { &mut (*node).value } @@ -1261,7 +1261,7 @@ fn replace(self, node: RBTreeNode<K, V>) -> RBTreeNode<K, V> { // SAFETY: `node` is valid at least until we call `Box::from_raw`, which only happens when // the node is removed or replaced. - let new_node_links = unsafe { addr_of_mut!((*node).links) }; + let new_node_links = unsafe { &raw mut (*node).links }; // SAFETY: This updates the pointers so that `new_node_links` is in the tree where // `self.node_links` used to be. diff --git a/rust/kernel/sync/arc.rs b/rust/kernel/sync/arc.rs index 3cefda7a4372..81d8b0f84957 100644 --- a/rust/kernel/sync/arc.rs +++ b/rust/kernel/sync/arc.rs @@ -243,7 +243,7 @@ pub fn into_raw(self) -> *const T { let ptr = self.ptr.as_ptr(); core::mem::forget(self); // SAFETY: The pointer is valid. - unsafe { core::ptr::addr_of!((*ptr).data) } + unsafe { &raw const (*ptr).data } } /// Recreates an [`Arc`] instance previously deconstructed via [`Arc::into_raw`]. diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs index 49012e711942..b2ac768eed23 100644 --- a/rust/kernel/task.rs +++ b/rust/kernel/task.rs @@ -257,7 +257,7 @@ pub fn as_ptr(&self) -> *mut bindings::task_struct { pub fn group_leader(&self) -> &Task { // SAFETY: The group leader of a task never changes after initialization, so reading this // field is not a data race. - let ptr = unsafe { *ptr::addr_of!((*self.as_ptr()).group_leader) }; + let ptr = unsafe { *(&raw const (*self.as_ptr()).group_leader) }; // SAFETY: The lifetime of the returned task reference is tied to the lifetime of `self`, // and given that a task has a reference to its group leader, we know it must be valid for @@ -269,7 +269,7 @@ pub fn group_leader(&self) -> &Task { pub fn pid(&self) -> Pid { // SAFETY: The pid of a task never changes after initialization, so reading this field is // not a data race. - unsafe { *ptr::addr_of!((*self.as_ptr()).pid) } + unsafe { *(&raw const (*self.as_ptr()).pid) } } /// Returns the UID of the given task. diff --git a/rust/kernel/workqueue.rs b/rust/kernel/workqueue.rs index 0cd100d2aefb..34e8abb38974 100644 --- a/rust/kernel/workqueue.rs +++ b/rust/kernel/workqueue.rs @@ -401,9 +401,9 @@ pub fn new(name: &'static CStr, key: &'static LockClassKey) -> impl PinInit<Self pub unsafe fn raw_get(ptr: *const Self) -> *mut bindings::work_struct { // SAFETY: The caller promises that the pointer is aligned and not dangling. // - // A pointer cast would also be ok due to `#[repr(transparent)]`. We use `addr_of!` so that - // the compiler does not complain that the `work` field is unused. - unsafe { Opaque::raw_get(core::ptr::addr_of!((*ptr).work)) } + // A pointer cast would also be ok due to `#[repr(transparent)]`. We use `&raw const (*ptr).work` + // so that the compiler does not complain that the `work` field is unused. + unsafe { Opaque::raw_get(&raw const (*ptr).work) } } } @@ -510,7 +510,7 @@ macro_rules! impl_has_work { unsafe fn raw_get_work(ptr: *mut Self) -> *mut $crate::workqueue::Work<$work_type $(, $id)?> { // SAFETY: The caller promises that the pointer is not dangling. unsafe { - ::core::ptr::addr_of_mut!((*ptr).$field) + &raw mut (*ptr).$field } } } -- 2.48.1

7 months, 2 weeks

2
1
0 0

[PATCH v2 3/3] rust: replace `addr_of[_mut]!` with `&raw [const | mut]`

by Antonio Hickey

Replacing all occurrences of `addr_of!(place)` with `&raw const place`, and all occurrences of `addr_of_mut!(place)` with `&raw mut place`. Utilizing the new feature will allow us to reduce macro complexity, and improve consistency with existing reference syntax as `&raw const`, `&raw mut` is very similar to `&`, `&mut` making it fit more naturally with other existing code than the previously used macros. Suggested-by: Benno Lossin <benno.lossin(a)proton.me> Link: https://github.com/Rust-for-Linux/linux/issues/1148 Signed-off-by: Antonio Hickey <contact(a)antoniohickey.com> --- rust/kernel/block/mq/request.rs | 4 ++-- rust/kernel/faux.rs | 4 ++-- rust/kernel/fs/file.rs | 2 +- rust/kernel/init.rs | 8 ++++---- rust/kernel/init/macros.rs | 28 +++++++++++++------------- rust/kernel/jump_label.rs | 4 ++-- rust/kernel/kunit.rs | 4 ++-- rust/kernel/list.rs | 2 +- rust/kernel/list/impl_list_item_mod.rs | 6 +++--- rust/kernel/net/phy.rs | 4 ++-- rust/kernel/pci.rs | 4 ++-- rust/kernel/platform.rs | 4 +--- rust/kernel/rbtree.rs | 22 ++++++++++---------- rust/kernel/sync/arc.rs | 2 +- rust/kernel/task.rs | 4 ++-- rust/kernel/workqueue.rs | 8 ++++---- 16 files changed, 54 insertions(+), 56 deletions(-) diff --git a/rust/kernel/block/mq/request.rs b/rust/kernel/block/mq/request.rs index 7943f43b9575..4a5b7ec914ef 100644 --- a/rust/kernel/block/mq/request.rs +++ b/rust/kernel/block/mq/request.rs @@ -12,7 +12,7 @@ }; use core::{ marker::PhantomData, - ptr::{addr_of_mut, NonNull}, + ptr::NonNull, sync::atomic::{AtomicU64, Ordering}, }; @@ -187,7 +187,7 @@ pub(crate) fn refcount(&self) -> &AtomicU64 { pub(crate) unsafe fn refcount_ptr(this: *mut Self) -> *mut AtomicU64 { // SAFETY: Because of the safety requirements of this function, the // field projection is safe. - unsafe { addr_of_mut!((*this).refcount) } + unsafe { &raw mut (*this).refcount } } } diff --git a/rust/kernel/faux.rs b/rust/kernel/faux.rs index 5acc0c02d451..52ac554c1119 100644 --- a/rust/kernel/faux.rs +++ b/rust/kernel/faux.rs @@ -7,7 +7,7 @@ //! C header: [`include/linux/device/faux.h`] use crate::{bindings, device, error::code::*, prelude::*}; -use core::ptr::{addr_of_mut, null, null_mut, NonNull}; +use core::ptr::{null, null_mut, NonNull}; /// The registration of a faux device. /// @@ -45,7 +45,7 @@ impl AsRef<device::Device> for Registration { fn as_ref(&self) -> &device::Device { // SAFETY: The underlying `device` in `faux_device` is guaranteed by the C API to be // a valid initialized `device`. - unsafe { device::Device::as_ref(addr_of_mut!((*self.as_raw()).dev)) } + unsafe { device::Device::as_ref((&raw mut (*self.as_raw()).dev)) } } } diff --git a/rust/kernel/fs/file.rs b/rust/kernel/fs/file.rs index ed57e0137cdb..7ee4830b67f3 100644 --- a/rust/kernel/fs/file.rs +++ b/rust/kernel/fs/file.rs @@ -331,7 +331,7 @@ pub fn flags(&self) -> u32 { // SAFETY: The file is valid because the shared reference guarantees a nonzero refcount. // // FIXME(read_once): Replace with `read_once` when available on the Rust side. - unsafe { core::ptr::addr_of!((*self.as_ptr()).f_flags).read_volatile() } + unsafe { (&raw const (*self.as_ptr()).f_flags).read_volatile() } } } diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs index 7fd1ea8265a5..a8fac6558671 100644 --- a/rust/kernel/init.rs +++ b/rust/kernel/init.rs @@ -122,7 +122,7 @@ //! ```rust //! # #![expect(unreachable_pub, clippy::disallowed_names)] //! use kernel::{init, types::Opaque}; -//! use core::{ptr::addr_of_mut, marker::PhantomPinned, pin::Pin}; +//! use core::{marker::PhantomPinned, pin::Pin}; //! # mod bindings { //! # #![expect(non_camel_case_types)] //! # #![expect(clippy::missing_safety_doc)] @@ -159,7 +159,7 @@ //! unsafe { //! init::pin_init_from_closure(move |slot: *mut Self| { //! // `slot` contains uninit memory, avoid creating a reference. -//! let foo = addr_of_mut!((*slot).foo); +//! let foo = &raw mut (*slot).foo; //! //! // Initialize the `foo` //! bindings::init_foo(Opaque::raw_get(foo)); @@ -541,7 +541,7 @@ macro_rules! stack_try_pin_init { /// /// ```rust /// # use kernel::{macros::{Zeroable, pin_data}, pin_init}; -/// # use core::{ptr::addr_of_mut, marker::PhantomPinned}; +/// # use core::marker::PhantomPinned; /// #[pin_data] /// #[derive(Zeroable)] /// struct Buf { @@ -554,7 +554,7 @@ macro_rules! stack_try_pin_init { /// pin_init!(&this in Buf { /// buf: [0; 64], /// // SAFETY: TODO. -/// ptr: unsafe { addr_of_mut!((*this.as_ptr()).buf).cast() }, +/// ptr: unsafe { &raw mut (*this.as_ptr()).buf.cast() }, /// pin: PhantomPinned, /// }); /// pin_init!(Buf { diff --git a/rust/kernel/init/macros.rs b/rust/kernel/init/macros.rs index 1fd146a83241..af525fbb2f01 100644 --- a/rust/kernel/init/macros.rs +++ b/rust/kernel/init/macros.rs @@ -244,25 +244,25 @@ //! struct __InitOk; //! // This is the expansion of `t,`, which is syntactic sugar for `t: t,`. //! { -//! unsafe { ::core::ptr::write(::core::addr_of_mut!((*slot).t), t) }; +//! unsafe { ::core::ptr::write(&raw mut (*slot).t, t) }; //! } //! // Since initialization could fail later (not in this case, since the //! // error type is `Infallible`) we will need to drop this field if there //! // is an error later. This `DropGuard` will drop the field when it gets //! // dropped and has not yet been forgotten. //! let __t_guard = unsafe { -//! ::pinned_init::__internal::DropGuard::new(::core::addr_of_mut!((*slot).t)) +//! ::pinned_init::__internal::DropGuard::new(&raw mut (*slot).t) //! }; //! // Expansion of `x: 0,`: //! // Since this can be an arbitrary expression we cannot place it inside //! // of the `unsafe` block, so we bind it here. //! { //! let x = 0; -//! unsafe { ::core::ptr::write(::core::addr_of_mut!((*slot).x), x) }; +//! unsafe { ::core::ptr::write(&raw mut (*slot).x, x) }; //! } //! // We again create a `DropGuard`. //! let __x_guard = unsafe { -//! ::kernel::init::__internal::DropGuard::new(::core::addr_of_mut!((*slot).x)) +//! ::kernel::init::__internal::DropGuard::new(&raw mut (*slot).x) //! }; //! // Since initialization has successfully completed, we can now forget //! // the guards. This is not `mem::forget`, since we only have @@ -459,15 +459,15 @@ //! { //! struct __InitOk; //! { -//! unsafe { ::core::ptr::write(::core::addr_of_mut!((*slot).a), a) }; +//! unsafe { ::core::ptr::write(&raw mut (*slot).a, a) }; //! } //! let __a_guard = unsafe { -//! ::kernel::init::__internal::DropGuard::new(::core::addr_of_mut!((*slot).a)) +//! ::kernel::init::__internal::DropGuard::new(&raw mut (*slot).a) //! }; //! let init = Bar::new(36); -//! unsafe { data.b(::core::addr_of_mut!((*slot).b), b)? }; +//! unsafe { data.b(&raw mut (*slot).b, b)? }; //! let __b_guard = unsafe { -//! ::kernel::init::__internal::DropGuard::new(::core::addr_of_mut!((*slot).b)) +//! ::kernel::init::__internal::DropGuard::new(&raw mut (*slot).b) //! }; //! ::core::mem::forget(__b_guard); //! ::core::mem::forget(__a_guard); @@ -1210,7 +1210,7 @@ fn assert_zeroable<T: $crate::init::Zeroable>(_: *mut T) {} // SAFETY: `slot` is valid, because we are inside of an initializer closure, we // return when an error/panic occurs. // We also use the `data` to require the correct trait (`Init` or `PinInit`) for `$field`. - unsafe { $data.$field(::core::ptr::addr_of_mut!((*$slot).$field), init)? }; + unsafe { $data.$field(&raw mut (*$slot).$field, init)? }; // Create the drop guard: // // We rely on macro hygiene to make it impossible for users to access this local variable. @@ -1218,7 +1218,7 @@ fn assert_zeroable<T: $crate::init::Zeroable>(_: *mut T) {} ::kernel::macros::paste! { // SAFETY: We forget the guard later when initialization has succeeded. let [< __ $field _guard >] = unsafe { - $crate::init::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field)) + $crate::init::__internal::DropGuard::new(&raw mut (*$slot).$field) }; $crate::__init_internal!(init_slot($use_data): @@ -1241,7 +1241,7 @@ fn assert_zeroable<T: $crate::init::Zeroable>(_: *mut T) {} // // SAFETY: `slot` is valid, because we are inside of an initializer closure, we // return when an error/panic occurs. - unsafe { $crate::init::Init::__init(init, ::core::ptr::addr_of_mut!((*$slot).$field))? }; + unsafe { $crate::init::Init::__init(init, &raw mut (*$slot).$field)? }; // Create the drop guard: // // We rely on macro hygiene to make it impossible for users to access this local variable. @@ -1249,7 +1249,7 @@ fn assert_zeroable<T: $crate::init::Zeroable>(_: *mut T) {} ::kernel::macros::paste! { // SAFETY: We forget the guard later when initialization has succeeded. let [< __ $field _guard >] = unsafe { - $crate::init::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field)) + $crate::init::__internal::DropGuard::new(&raw mut (*$slot).$field) }; $crate::__init_internal!(init_slot(): @@ -1272,7 +1272,7 @@ fn assert_zeroable<T: $crate::init::Zeroable>(_: *mut T) {} // Initialize the field. // // SAFETY: The memory at `slot` is uninitialized. - unsafe { ::core::ptr::write(::core::ptr::addr_of_mut!((*$slot).$field), $field) }; + unsafe { ::core::ptr::write(&raw mut (*$slot).$field, $field) }; } // Create the drop guard: // @@ -1281,7 +1281,7 @@ fn assert_zeroable<T: $crate::init::Zeroable>(_: *mut T) {} ::kernel::macros::paste! { // SAFETY: We forget the guard later when initialization has succeeded. let [< __ $field _guard >] = unsafe { - $crate::init::__internal::DropGuard::new(::core::ptr::addr_of_mut!((*$slot).$field)) + $crate::init::__internal::DropGuard::new(&raw mut (*$slot).$field) }; $crate::__init_internal!(init_slot($($use_data)?): diff --git a/rust/kernel/jump_label.rs b/rust/kernel/jump_label.rs index 4e974c768dbd..ca10abae0eee 100644 --- a/rust/kernel/jump_label.rs +++ b/rust/kernel/jump_label.rs @@ -20,8 +20,8 @@ #[macro_export] macro_rules! static_branch_unlikely { ($key:path, $keytyp:ty, $field:ident) => {{ - let _key: *const $keytyp = ::core::ptr::addr_of!($key); - let _key: *const $crate::bindings::static_key_false = ::core::ptr::addr_of!((*_key).$field); + let _key: *const $keytyp = &raw const $key; + let _key: *const $crate::bindings::static_key_false = &raw const (*_key).$field; let _key: *const $crate::bindings::static_key = _key.cast(); #[cfg(not(CONFIG_JUMP_LABEL))] diff --git a/rust/kernel/kunit.rs b/rust/kernel/kunit.rs index 824da0e9738a..a17ef3b2e860 100644 --- a/rust/kernel/kunit.rs +++ b/rust/kernel/kunit.rs @@ -128,9 +128,9 @@ unsafe impl Sync for UnaryAssert {} unsafe { $crate::bindings::__kunit_do_failed_assertion( kunit_test, - core::ptr::addr_of!(LOCATION.0), + &raw const LOCATION.0, $crate::bindings::kunit_assert_type_KUNIT_ASSERTION, - core::ptr::addr_of!(ASSERTION.0.assert), + &raw const ASSERTION.0.assert, Some($crate::bindings::kunit_unary_assert_format), core::ptr::null(), ); diff --git a/rust/kernel/list.rs b/rust/kernel/list.rs index c0ed227b8a4f..e98f0820f002 100644 --- a/rust/kernel/list.rs +++ b/rust/kernel/list.rs @@ -176,7 +176,7 @@ pub fn new() -> impl PinInit<Self> { #[inline] unsafe fn fields(me: *mut Self) -> *mut ListLinksFields { // SAFETY: The caller promises that the pointer is valid. - unsafe { Opaque::raw_get(ptr::addr_of!((*me).inner)) } + unsafe { Opaque::raw_get(&raw const (*me).inner) } } /// # Safety diff --git a/rust/kernel/list/impl_list_item_mod.rs b/rust/kernel/list/impl_list_item_mod.rs index a0438537cee1..014b6713d59d 100644 --- a/rust/kernel/list/impl_list_item_mod.rs +++ b/rust/kernel/list/impl_list_item_mod.rs @@ -49,7 +49,7 @@ macro_rules! impl_has_list_links { // SAFETY: The implementation of `raw_get_list_links` only compiles if the field has the // right type. // - // The behavior of `raw_get_list_links` is not changed since the `addr_of_mut!` macro is + // The behavior of `raw_get_list_links` is not changed since the `&raw mut` op is // equivalent to the pointer offset operation in the trait definition. unsafe impl$(<$($implarg),*>)? $crate::list::HasListLinks$(<$id>)? for $self $(<$($selfarg),*>)? @@ -61,7 +61,7 @@ unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut $crate::list::ListLinks$(<$ // SAFETY: The caller promises that the pointer is not dangling. We know that this // expression doesn't follow any pointers, as the `offset_of!` invocation above // would otherwise not compile. - unsafe { ::core::ptr::addr_of_mut!((*ptr)$(.$field)*) } + unsafe { &raw mut (*ptr)$(.$field)* } } } )*}; @@ -103,7 +103,7 @@ macro_rules! impl_has_list_links_self_ptr { unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut $crate::list::ListLinks$(<$id>)? { // SAFETY: The caller promises that the pointer is not dangling. let ptr: *mut $crate::list::ListLinksSelfPtr<$item_type $(, $id)?> = - unsafe { ::core::ptr::addr_of_mut!((*ptr).$field) }; + unsafe { &raw mut (*ptr).$field }; ptr.cast() } } diff --git a/rust/kernel/net/phy.rs b/rust/kernel/net/phy.rs index a59469c785e3..757db052cc09 100644 --- a/rust/kernel/net/phy.rs +++ b/rust/kernel/net/phy.rs @@ -7,7 +7,7 @@ //! C headers: [`include/linux/phy.h`](srctree/include/linux/phy.h). use crate::{error::*, prelude::*, types::Opaque}; -use core::{marker::PhantomData, ptr::addr_of_mut}; +use core::marker::PhantomData; pub mod reg; @@ -285,7 +285,7 @@ impl AsRef<kernel::device::Device> for Device { fn as_ref(&self) -> &kernel::device::Device { let phydev = self.0.get(); // SAFETY: The struct invariant ensures that `mdio.dev` is valid. - unsafe { kernel::device::Device::as_ref(addr_of_mut!((*phydev).mdio.dev)) } + unsafe { kernel::device::Device::as_ref(&raw mut (*phydev).mdio.dev) } } } diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs index f7b2743828ae..6cb9ed1e7cbf 100644 --- a/rust/kernel/pci.rs +++ b/rust/kernel/pci.rs @@ -17,7 +17,7 @@ types::{ARef, ForeignOwnable, Opaque}, ThisModule, }; -use core::{ops::Deref, ptr::addr_of_mut}; +use core::ops::Deref; use kernel::prelude::*; /// An adapter for the registration of PCI drivers. @@ -60,7 +60,7 @@ extern "C" fn probe_callback( ) -> kernel::ffi::c_int { // SAFETY: The PCI bus only ever calls the probe callback with a valid pointer to a // `struct pci_dev`. - let dev = unsafe { device::Device::get_device(addr_of_mut!((*pdev).dev)) }; + let dev = unsafe { device::Device::get_device(&raw mut (*pdev).dev) }; // SAFETY: `dev` is guaranteed to be embedded in a valid `struct pci_dev` by the call // above. let mut pdev = unsafe { Device::from_dev(dev) }; diff --git a/rust/kernel/platform.rs b/rust/kernel/platform.rs index 1297f5292ba9..344875ad7b82 100644 --- a/rust/kernel/platform.rs +++ b/rust/kernel/platform.rs @@ -14,8 +14,6 @@ ThisModule, }; -use core::ptr::addr_of_mut; - /// An adapter for the registration of platform drivers. pub struct Adapter<T: Driver>(T); @@ -55,7 +53,7 @@ unsafe fn unregister(pdrv: &Opaque<Self::RegType>) { impl<T: Driver + 'static> Adapter<T> { extern "C" fn probe_callback(pdev: *mut bindings::platform_device) -> kernel::ffi::c_int { // SAFETY: The platform bus only ever calls the probe callback with a valid `pdev`. - let dev = unsafe { device::Device::get_device(addr_of_mut!((*pdev).dev)) }; + let dev = unsafe { device::Device::get_device(&raw mut (*pdev).dev) }; // SAFETY: `dev` is guaranteed to be embedded in a valid `struct platform_device` by the // call above. let mut pdev = unsafe { Device::from_dev(dev) }; diff --git a/rust/kernel/rbtree.rs b/rust/kernel/rbtree.rs index 1ea25c7092fb..b0ad35663cb0 100644 --- a/rust/kernel/rbtree.rs +++ b/rust/kernel/rbtree.rs @@ -11,7 +11,7 @@ cmp::{Ord, Ordering}, marker::PhantomData, mem::MaybeUninit, - ptr::{addr_of_mut, from_mut, NonNull}, + ptr::{from_mut, NonNull}, }; /// A red-black tree with owned nodes. @@ -238,7 +238,7 @@ pub fn values_mut(&mut self) -> impl Iterator<Item = &'_ mut V> { /// Returns a cursor over the tree nodes, starting with the smallest key. pub fn cursor_front(&mut self) -> Option<Cursor<'_, K, V>> { - let root = addr_of_mut!(self.root); + let root = &raw mut self.root; // SAFETY: `self.root` is always a valid root node let current = unsafe { bindings::rb_first(root) }; NonNull::new(current).map(|current| { @@ -253,7 +253,7 @@ pub fn cursor_front(&mut self) -> Option<Cursor<'_, K, V>> { /// Returns a cursor over the tree nodes, starting with the largest key. pub fn cursor_back(&mut self) -> Option<Cursor<'_, K, V>> { - let root = addr_of_mut!(self.root); + let root = &raw mut self.root; // SAFETY: `self.root` is always a valid root node let current = unsafe { bindings::rb_last(root) }; NonNull::new(current).map(|current| { @@ -459,7 +459,7 @@ pub fn cursor_lower_bound(&mut self, key: &K) -> Option<Cursor<'_, K, V>> let best = best_match?; // SAFETY: `best` is a non-null node so it is valid by the type invariants. - let links = unsafe { addr_of_mut!((*best.as_ptr()).links) }; + let links = unsafe { &raw mut (*best.as_ptr()).links }; NonNull::new(links).map(|current| { // INVARIANT: @@ -767,7 +767,7 @@ pub fn remove_current(self) -> (Option<Self>, RBTreeNode<K, V>) { let node = RBTreeNode { node }; // SAFETY: The reference to the tree used to create the cursor outlives the cursor, so // the tree cannot change. By the tree invariant, all nodes are valid. - unsafe { bindings::rb_erase(&mut (*this).links, addr_of_mut!(self.tree.root)) }; + unsafe { bindings::rb_erase(&mut (*this).links, &raw mut self.tree.root) }; let current = match (prev, next) { (_, Some(next)) => next, @@ -803,7 +803,7 @@ fn remove_neighbor(&mut self, direction: Direction) -> Option<RBTreeNode<K, V>> let neighbor = neighbor.as_ptr(); // SAFETY: The reference to the tree used to create the cursor outlives the cursor, so // the tree cannot change. By the tree invariant, all nodes are valid. - unsafe { bindings::rb_erase(neighbor, addr_of_mut!(self.tree.root)) }; + unsafe { bindings::rb_erase(neighbor, &raw mut self.tree.root) }; // SAFETY: By the type invariant of `Self`, all non-null `rb_node` pointers stored in `self` // point to the links field of `Node<K, V>` objects. let this = unsafe { container_of!(neighbor, Node<K, V>, links) }.cast_mut(); @@ -918,7 +918,7 @@ unsafe fn to_key_value_raw<'b>(node: NonNull<bindings::rb_node>) -> (&'b K, *mut let k = unsafe { &(*this).key }; // SAFETY: The passed `node` is the current node or a non-null neighbor, // thus `this` is valid by the type invariants. - let v = unsafe { addr_of_mut!((*this).value) }; + let v = unsafe { &raw mut (*this).value }; (k, v) } } @@ -1027,7 +1027,7 @@ fn next(&mut self) -> Option<Self::Item> { self.next = unsafe { bindings::rb_next(self.next) }; // SAFETY: By the same reasoning above, it is safe to dereference the node. - Some(unsafe { (addr_of_mut!((*cur).key), addr_of_mut!((*cur).value)) }) + Some(unsafe { (&raw mut (*cur).key, &raw mut (*cur).value) }) } } @@ -1170,7 +1170,7 @@ fn insert(self, node: RBTreeNode<K, V>) -> &'a mut V { // SAFETY: `node` is valid at least until we call `Box::from_raw`, which only happens when // the node is removed or replaced. - let node_links = unsafe { addr_of_mut!((*node).links) }; + let node_links = unsafe { &raw mut (*node).links }; // INVARIANT: We are linking in a new node, which is valid. It remains valid because we // "forgot" it with `Box::into_raw`. @@ -1178,7 +1178,7 @@ fn insert(self, node: RBTreeNode<K, V>) -> &'a mut V { unsafe { bindings::rb_link_node(node_links, self.parent, self.child_field_of_parent) }; // SAFETY: All pointers are valid. `node` has just been inserted into the tree. - unsafe { bindings::rb_insert_color(node_links, addr_of_mut!((*self.rbtree).root)) }; + unsafe { bindings::rb_insert_color(node_links, &raw mut (*self.rbtree).root) }; // SAFETY: The node is valid until we remove it from the tree. unsafe { &mut (*node).value } @@ -1261,7 +1261,7 @@ fn replace(self, node: RBTreeNode<K, V>) -> RBTreeNode<K, V> { // SAFETY: `node` is valid at least until we call `Box::from_raw`, which only happens when // the node is removed or replaced. - let new_node_links = unsafe { addr_of_mut!((*node).links) }; + let new_node_links = unsafe { &raw mut (*node).links }; // SAFETY: This updates the pointers so that `new_node_links` is in the tree where // `self.node_links` used to be. diff --git a/rust/kernel/sync/arc.rs b/rust/kernel/sync/arc.rs index 3cefda7a4372..81d8b0f84957 100644 --- a/rust/kernel/sync/arc.rs +++ b/rust/kernel/sync/arc.rs @@ -243,7 +243,7 @@ pub fn into_raw(self) -> *const T { let ptr = self.ptr.as_ptr(); core::mem::forget(self); // SAFETY: The pointer is valid. - unsafe { core::ptr::addr_of!((*ptr).data) } + unsafe { &raw const (*ptr).data } } /// Recreates an [`Arc`] instance previously deconstructed via [`Arc::into_raw`]. diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs index 49012e711942..b2ac768eed23 100644 --- a/rust/kernel/task.rs +++ b/rust/kernel/task.rs @@ -257,7 +257,7 @@ pub fn as_ptr(&self) -> *mut bindings::task_struct { pub fn group_leader(&self) -> &Task { // SAFETY: The group leader of a task never changes after initialization, so reading this // field is not a data race. - let ptr = unsafe { *ptr::addr_of!((*self.as_ptr()).group_leader) }; + let ptr = unsafe { *(&raw const (*self.as_ptr()).group_leader) }; // SAFETY: The lifetime of the returned task reference is tied to the lifetime of `self`, // and given that a task has a reference to its group leader, we know it must be valid for @@ -269,7 +269,7 @@ pub fn group_leader(&self) -> &Task { pub fn pid(&self) -> Pid { // SAFETY: The pid of a task never changes after initialization, so reading this field is // not a data race. - unsafe { *ptr::addr_of!((*self.as_ptr()).pid) } + unsafe { *(&raw const (*self.as_ptr()).pid) } } /// Returns the UID of the given task. diff --git a/rust/kernel/workqueue.rs b/rust/kernel/workqueue.rs index 0cd100d2aefb..34e8abb38974 100644 --- a/rust/kernel/workqueue.rs +++ b/rust/kernel/workqueue.rs @@ -401,9 +401,9 @@ pub fn new(name: &'static CStr, key: &'static LockClassKey) -> impl PinInit<Self pub unsafe fn raw_get(ptr: *const Self) -> *mut bindings::work_struct { // SAFETY: The caller promises that the pointer is aligned and not dangling. // - // A pointer cast would also be ok due to `#[repr(transparent)]`. We use `addr_of!` so that - // the compiler does not complain that the `work` field is unused. - unsafe { Opaque::raw_get(core::ptr::addr_of!((*ptr).work)) } + // A pointer cast would also be ok due to `#[repr(transparent)]`. We use `&raw const (*ptr).work` + // so that the compiler does not complain that the `work` field is unused. + unsafe { Opaque::raw_get(&raw const (*ptr).work) } } } @@ -510,7 +510,7 @@ macro_rules! impl_has_work { unsafe fn raw_get_work(ptr: *mut Self) -> *mut $crate::workqueue::Work<$work_type $(, $id)?> { // SAFETY: The caller promises that the pointer is not dangling. unsafe { - ::core::ptr::addr_of_mut!((*ptr).$field) + &raw mut (*ptr).$field } } } -- 2.48.1

7 months, 2 weeks

1
0
0 0

[RFC PATCH v1 0/2] mseal: allow noop mprotect

by jeffxu＠chromium.org

From: Jeff Xu <jeffxu(a)chromium.org> Initially, when mseal was introduced in 6.10, semantically, when a VMA within the specified address range is sealed, the mprotect will be rejected, leaving all of VMA unmodified. However, adding an extra loop to check the mseal flag for every VMA slows things down a bit, therefore in 6.12, this issue was solved by removing can_modify_mm and checking each VMA’s mseal flag directly without an extra loop [1]. This is a semantic change, i.e. partial update is allowed, VMAs can be updated until a sealed VMA is found. The new semantic also means, we could allow mprotect on a sealed VMA if the new attribute of VMA remains the same as the old one. Relaxing this avoids unnecessary impacts for applications that want to seal a particular mapping. Doing this also has no security impact. The mseal_test is also modified by this patch to adapt to the new semantic. Please note, mseal_test is currently undergoing refactoring, and eventually will be replaced with a new memory sealing selftest. In this patch, I only make a minimum change to make it pass. I considered adding a new testcase in mseal_test to cover this new behavior, however, the existing mseal_test is using wrong patterns and won’t pass the review. Such a new test is better to be added in the new refactored memory sealing tests. The refactoring is currently pending review [2]. [1] https://lore.kernel.org/all/20240817-mseal-depessimize-v3-0-d8d2e037df30@gm… [2] https://lore.kernel.org/all/20241211053311.245636-1-jeffxu@google.com/ Jeff Xu (2): selftests/mm: mseal_test: avoid using no-op mprotect mseal: allow noop mprotect mm/mprotect.c | 6 +++--- tools/testing/selftests/mm/mseal_test.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) -- 2.49.0.rc0.332.g42c0ae87b1-goog

7 months, 2 weeks

5
10
0 0

[PATCH] selftests/bpf: Fix sockopt selftest failure on powerpc

by Saket Kumar Bhaskar

The SO_RCVLOWAT option is defined as 18 in the selftest header, which matches the generic definition. However, on powerpc, SO_RCVLOWAT is defined as 16. This discrepancy causes sol_socket_sockopt() to fail with the default switch case on powerpc. This commit fixes by defining SO_RCVLOWAT as 16 for powerpc. Signed-off-by: Saket Kumar Bhaskar <skb99(a)linux.ibm.com> --- tools/testing/selftests/bpf/progs/bpf_tracing_net.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h index 59843b430f76..bcd44d5018bf 100644 --- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h +++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h @@ -15,7 +15,11 @@ #define SO_KEEPALIVE 9 #define SO_PRIORITY 12 #define SO_REUSEPORT 15 +#if defined(__TARGET_ARCH_powerpc) +#define SO_RCVLOWAT 16 +#else #define SO_RCVLOWAT 18 +#endif #define SO_BINDTODEVICE 25 #define SO_MARK 36 #define SO_MAX_PACING_RATE 47 -- 2.43.5

7 months, 2 weeks

3
2
0 0

Error during --arch x86_64 kunit test run

by Shuah Khan

David, Brendan, Rae, I am seeing the following error when I run ./tools/testing/kunit/kunit.py run --arch x86_64 ERROR:root:ld:arch/x86/realmode/rm/realmode.lds:236: undefined symbol `sev_es_trampoline_start' referenced in expression I isolated it to dependency on CONFIG_AMD_MEM_ENCRYPT I added the option using --kconfig_add ./tools/testing/kunit/kunit.py run --arch x86_64 --kconfig_add CONFIG_AMD_MEM_ENCRYPT=y I see the following RROR:root:Not all Kconfig options selected in kunitconfig were in the generated .config. This is probably due to unsatisfied dependencies. Missing: CONFIG_AMD_MEM_ENCRYPT=y Is there a better way to fix the dependencies? Does kunit default config need changing for x86_64? thanks, -- Shuah

7 months, 2 weeks

2
4
0 0

[PATCH v6 0/3] printf: convert self-test to KUnit

by Tamir Duberstein

This is one of just 3 remaining "Test Module" kselftests (the others being bitmap and scanf), the rest having been converted to KUnit. I tested this using: $ tools/testing/kunit/kunit.py run --arch arm64 --make_options LLVM=1 printf I have also sent out a series converting scanf[0]. Link: https://lore.kernel.org/all/20250204-scanf-kunit-convert-v3-0-386d7c3ee714@… [0] Signed-off-by: Tamir Duberstein <tamird(a)gmail.com> --- Changes in v6: - Use __printf correctly on `__test`. (Petr Mladek) - Rebase on linux-next. - Remove leftover references to `printf.sh`. - Update comment in `hash_pointer`. (Petr Mladek) - Avoid overrun in `KUNIT_EXPECT_MEMNEQ`. (Petr Mladek) - Restore trailing newlines on printk strings and add some missing ones. (Petr Mladek) - Use `kunit_skip` on not-yet-initialized crng. (Petr Mladek) - Link to v5: https://lore.kernel.org/r/20250221-printf-kunit-convert-v5-0-5db840301730@g… Changes in v5: - Update `do_test` `__printf` annotation (Rasmus Villemoes). - Link to v4: https://lore.kernel.org/r/20250214-printf-kunit-convert-v4-0-c254572f1565@g… Changes in v4: - Add patch "implicate test line in failure messages". - Rebase on linux-next, move scanf_kunit.c into lib/tests/. - Link to v3: https://lore.kernel.org/r/20250210-printf-kunit-convert-v3-0-ee6ac5500f5e@g… Changes in v3: - Remove extraneous trailing newlines from failure messages. - Replace `pr_warn` with `kunit_warn`. - Drop arch changes. - Remove KUnit boilerplate from CONFIG_PRINTF_KUNIT_TEST help text. - Restore `total_tests` counting. - Remove tc_fail macro in last patch. - Link to v2: https://lore.kernel.org/r/20250207-printf-kunit-convert-v2-0-057b23860823@g… Changes in v2: - Incorporate code review from prior work[0] by Arpitha Raghunandan. - Link to v1: https://lore.kernel.org/r/20250204-printf-kunit-convert-v1-0-ecf1b846a4de@g… Link: https://lore.kernel.org/lkml/20200817043028.76502-1-98.arpi@gmail.com/t/#u [0] --- Tamir Duberstein (3): printf: convert self-test to KUnit printf: break kunit into test cases printf: implicate test line in failure messages Documentation/core-api/printk-formats.rst | 4 +- Documentation/dev-tools/kselftest.rst | 2 +- MAINTAINERS | 2 +- lib/Kconfig.debug | 12 +- lib/Makefile | 1 - lib/tests/Makefile | 1 + lib/{test_printf.c => tests/printf_kunit.c} | 442 ++++++++++++---------------- tools/testing/selftests/kselftest/module.sh | 2 +- tools/testing/selftests/lib/Makefile | 2 +- tools/testing/selftests/lib/config | 1 - tools/testing/selftests/lib/printf.sh | 4 - 11 files changed, 207 insertions(+), 266 deletions(-) --- base-commit: 7ec162622e66a4ff886f8f28712ea1b13069e1aa change-id: 20250131-printf-kunit-convert-fd4012aa2ec6 Best regards, -- Tamir Duberstein <tamird(a)gmail.com>

7 months, 2 weeks

3
9
0 0

[PATCH v4 0/6] Fix some issues related to an interrupt type in pci_endpoint_test

by Kunihiko Hayashi

This series solves some issues about global "irq_type" that is used for indicating the current type for users. In addition, avoid an unexpected warning that occur due to interrupts remaining after displaying an error caused by devm_request_irq(). Patch 1 includes adding GET_IRQTYPE test (check for failure). Patch 2-4 include fixes for stable kernels that have global "irq_type". Patch 5-6 include improvements for the latest. Changes since v3: - Add GET_IRQTYPE check to pci_endpoint test in selftests - Add the reason why global variables aren't necessary (patch 5/6) - Add Reviewed-by: lines (patch {2, 4, 6}/6) Changes since v2: - Rebase to v6.14-rc1 - Update message to clarify, and add result of call trace (patch 1/5) - Add Reviewed-by: lines (patch 2/5) - Add new patch to remove global "irq_type" variable (patch 4/5) - Add new patch to replace "devm" version of IRQ functions (patch 5/5) Changes since v1: - Divide original patch into two - Add an error message example - Add "pcitest" display example - Add a patch to fix an interrupt remaining issue Kunihiko Hayashi (6): selftests: pci_endpoint: Add GET_IRQTYPE checks to each interrupt test misc: pci_endpoint_test: Avoid issue of interrupts remaining after request_irq error misc: pci_endpoint_test: Fix displaying irq_type after request_irq error misc: pci_endpoint_test: Fix irq_type to convey the correct type misc: pci_endpoint_test: Remove global 'irq_type' and 'no_msi' misc: pci_endpoint_test: Do not use managed irq functions drivers/misc/pci_endpoint_test.c | 31 +++++++------------ .../pci_endpoint/pci_endpoint_test.c | 11 ++++++- 2 files changed, 21 insertions(+), 21 deletions(-) -- 2.25.1

7 months, 2 weeks

4
11
0 0

[PATCH v10 0/8] Buddy allocator like (or non-uniform) folio split

by Zi Yan

Hi all, This patchset adds a new buddy allocator like (or non-uniform) large folio split from a order-n folio to order-m with m < n. It reduces 1. the total number of after-split folios from 2^(n-m) to n-m+1; 2. the amount of memory needed for multi-index xarray split from 2^(n/6-m/6) to n/6-m/6, assuming XA_CHUNK_SHIFT=6; 3. keep more large folios after a split from all order-m folios to order-(n-1) to order-m folios. For example, to split an order-9 to order-0, folio split generates 10 (or 11 for anonymous memory) folios instead of 512, allocates 1 xa_node instead of 8, and leaves 1 order-8, 1 order-7, ..., 1 order-1 and 2 order-0 folios (or 4 order-0 for anonymous memory) instead of 512 order-0 folios. Instead of duplicating existing split_huge_page*() code, __folio_split() is introduced as the shared backend code for both split_huge_page_to_list_to_order() and folio_split(). __folio_split() can support both uniform split and buddy allocator like (or non-uniform) split. All existing split_huge_page*() users can be gradually converted to use folio_split() if possible. In this patchset, I converted truncate_inode_partial_folio() to use folio_split(). xfstests quick group passed for both tmpfs and xfs. I also semi-replicated Hugh's test[12] and ran it without any issue for almost 24 hours. It is on top of mm-everything-2025-03-07-07-55. It is ready to be merged. Changelog === From V9[13] 1. Incorporated Hugh's fixes[14] (Thanks Hugh): a) moved folio_set_order() in __split_folio_to_order() to be called only once for the input folio, b) used folio_test_swapcache() to catch both anon and shmem in swap cache cases, c) moved folio_next() out of for(;;), d) used mapping instead of origin_folio->mapping. 2. Added a TODO in __folio_split(), since large in-swap-cache shmem folio split is not supported yet. 3. Changed __split_folio_to_order() based on David Hildenbrand's MM owner tracking for large folios patchset[15], due to rebasing. From V8[11]: 1. Removed gfp parameter from xas_try_split() and GFP_NOWAIT is used all the time. (per Baolin Wang) 2. Used __xas_init_node_for_split() instead of __xas_alloc_node_for_split() and moved node allocation out. It fixed a bug when xa_node is pre-allocated by xas_nomem() before xas_try_split() is called without being initialized for split. From V7[9]: 1. Fixed a wrong function name in lib/test_xarray.c. 2. Made __split_folio_to_order() never fail, since the old order check is already done in __folio_split(). (per David Hildenbrand) 3. Fixed an issue reported by syzbot[10] by not dropping the original folio during truncate. 4. Fixed a WARNING when READ_ONLY_THP_FOR_FS is enabled. (Thank David Hildenbrand for reporting the issue) 5. Used two separate struct page* parameters, split_at and lock_at, to specify at which subpage the non-uniform split happens and which subpage to keep locked after the split, respectively. It improves code readability. From V6[8]: 1. Added an xarray function xas_try_split() to support iterative folio split, removing the need of using xas_split_alloc() and xas_split(). The function guarantees that at most one xa_node is allocated for each call. 2. Added concrete numbers of after-split folios and xa_node savings to cover letter, commit log. (per Andrew) From V5[7]: 1. Split shmem to any lower order patches are in mm tree, so dropped from this series. 2. Rename split_folio_at() to try_folio_split() to clarify that non-uniform split will not be used if it is not supported. From V4[6]: 1. Enabled shmem support in both uniform and buddy allocator like split and added selftests for it. 2. Added functions to check if uniform split and buddy allocator like split are supported for the given folio and order. 3. Made truncate fall back to uniform split if buddy allocator split is not supported (CONFIG_READ_ONLY_THP_FOR_FS and FS without large folio). 4. Added the missing folio_clear_has_hwpoisoned() to __split_unmapped_folio(). From V3[5]: 1. Used xas_split_alloc(GFP_NOWAIT) instead of xas_nomem(), since extra operations inside xas_split_alloc() are needed for correctness. 2. Enabled folio_split() for shmem and no issue was found with xfstests quick test group. 3. Split both ends of a truncate range in truncate_inode_partial_folio() to avoid wasting memory in shmem truncate (per David Hildenbrand). 4. Removed page_in_folio_offset() since page_folio() does the same thing. 5. Finished truncate related tests from xfstests quick test group on XFS and tmpfs without issues. 6. Disabled buddy allocator like split on CONFIG_READ_ONLY_THP_FOR_FS and FS without large folio. This check was missed in the prior versions. From V2[3]: 1. Incorporated all the feedback from Kirill[4]. 2. Used GFP_NOWAIT for xas_nomem(). 3. Tested the code path when xas_nomem() fails. 4. Added selftests for folio_split(). 5. Fixed no THP config build error. From V1[2]: 1. Split the original patch 1 into multiple ones for easy review (per Kirill). 2. Added xas_destroy() to avoid memory leak. 3. Fixed nr_dropped not used error (per kernel test robot). 4. Added proper error handling when xas_nomem() fails to allocate memory for xas_split() during buddy allocator like split. From RFC[1]: 1. Merged backend code of split_huge_page_to_list_to_order() and folio_split(). The same code is used for both uniform split and buddy allocator like split. 2. Use xas_nomem() instead of xas_split_alloc() for folio_split(). 3. folio_split() now leaves the first after-split folio unlocked, instead of the one containing the given page, since the caller of truncate_inode_partial_folio() locks and unlocks the first folio. 4. Extended split_huge_page debugfs to use folio_split(). 5. Added truncate_inode_partial_folio() as first user of folio_split(). Design === folio_split() splits a large folio in the same way as buddy allocator splits a large free page for allocation. The purpose is to minimize the number of folios after the split. For example, if user wants to free the 3rd subpage in a order-9 folio, folio_split() will split the order-9 folio as: O-0, O-0, O-0, O-0, O-2, O-3, O-4, O-5, O-6, O-7, O-8 if it is anon O-1, O-0, O-0, O-2, O-3, O-4, O-5, O-6, O-7, O-9 if it is pagecache Since anon folio does not support order-1 yet. The split process is similar to existing approach: 1. Unmap all page mappings (split PMD mappings if exist); 2. Split meta data like memcg, page owner, page alloc tag; 3. Copy meta data in struct folio to sub pages, but instead of spliting the whole folio into multiple smaller ones with the same order in a shot, this approach splits the folio iteratively. Taking the example above, this approach first splits the original order-9 into two order-8, then splits left part of order-8 to two order-7 and so on; 4. Post-process split folios, like write mapping->i_pages for pagecache, adjust folio refcounts, add split folios to corresponding list; 5. Remap split folios 6. Unlock split folios. __split_unmapped_folio() and __split_folio_to_order() replace __split_huge_page() and __split_huge_page_tail() respectively. __split_unmapped_folio() uses different approaches to perform uniform split and buddy allocator like split: 1. uniform split: one single call to __split_folio_to_order() is used to uniformly split the given folio. All resulting folios are put back to the list after split. The folio containing the given page is left to caller to unlock and others are unlocked. 2. buddy allocator like (or non-uniform) split: (old_order - new_order) calls to __split_folio_to_order() are used to split the given folio at order N to order N-1. After each call, the target folio is changed to the one containing the page, which is given as a folio_split() parameter. After each call, folios not containing the page are put back to the list. The folio containing the page is put back to the list when its order is new_order. All folios are unlocked except the first folio, which is left to caller to unlock. Patch Overview === 1. Patch 1 added a new xarray function xas_try_split() to perform iterative xarray split. 2. Patch 2 added __split_unmapped_folio() and __split_folio_to_order() to prepare for moving to new backend split code. 3. Patch 3 moved common code in split_huge_page_to_list_to_order() to __folio_split(). 4. Patch 4 added new folio_split() and made split_huge_page_to_list_to_order() share the new __split_unmapped_folio() with folio_split(). 5. Patch 5 removed no longer used __split_huge_page() and __split_huge_page_tail(). 6. Patch 6 added a new in_folio_offset to split_huge_page debugfs for folio_split() test. 7. Patch 7 used try_folio_split() for truncate operation. 8. Patch 8 added folio_split() tests. Any comments and/or suggestions are welcome. Thanks. [1] https://lore.kernel.org/linux-mm/20241008223748.555845-1-ziy@nvidia.com/ [2] https://lore.kernel.org/linux-mm/20241028180932.1319265-1-ziy@nvidia.com/ [3] https://lore.kernel.org/linux-mm/20241101150357.1752726-1-ziy@nvidia.com/ [4] https://lore.kernel.org/linux-mm/e6ppwz5t4p4kvir6eqzoto4y5fmdjdxdyvxvtw43nc… [5] https://lore.kernel.org/linux-mm/20241205001839.2582020-1-ziy@nvidia.com/ [6] https://lore.kernel.org/linux-mm/20250106165513.104899-1-ziy@nvidia.com/ [7] https://lore.kernel.org/linux-mm/20250116211042.741543-1-ziy@nvidia.com/ [8] https://lore.kernel.org/linux-mm/20250205031417.1771278-1-ziy@nvidia.com/ [9] https://lore.kernel.org/linux-mm/20250211155034.268962-1-ziy@nvidia.com/ [10] https://lore.kernel.org/all/67af65cb.050a0220.21dd3.004a.GAE@google.com/ [11] https://lore.kernel.org/linux-mm/20250218235012.1542225-1-ziy@nvidia.com/ [12] https://lore.kernel.org/linux-mm/D45D4F01-E5A5-47E6-8724-01610CC192CC@nvidi… [13] https://lore.kernel.org/linux-mm/20250226210032.2044041-1-ziy@nvidia.com/ [14] https://lore.kernel.org/linux-mm/2fae27fe-6e2e-3587-4b68-072118d80cf8@googl… [15] https://lore.kernel.org/all/20250303163014.1128035-4-david@redhat.com/ Zi Yan (8): xarray: add xas_try_split() to split a multi-index entry mm/huge_memory: add two new (not yet used) functions for folio_split() mm/huge_memory: move folio split common code to __folio_split() mm/huge_memory: add buddy allocator like (non-uniform) folio_split() mm/huge_memory: remove the old, unused __split_huge_page() mm/huge_memory: add folio_split() to debugfs testing interface mm/truncate: use folio_split() in truncate operation selftests/mm: add tests for folio_split(), buddy allocator like split Documentation/core-api/xarray.rst | 14 +- include/linux/huge_mm.h | 36 + include/linux/xarray.h | 6 + lib/test_xarray.c | 52 ++ lib/xarray.c | 132 ++- mm/huge_memory.c | 786 ++++++++++++------ mm/truncate.c | 37 +- tools/testing/radix-tree/Makefile | 1 + .../selftests/mm/split_huge_page_test.c | 34 +- 9 files changed, 809 insertions(+), 289 deletions(-) -- 2.47.2

7 months, 2 weeks

2
16
0 0

[PATCH net 7/7] selftests: net: test for lwtunnel dst ref loops

by Justin Iurman

As recently specified by commit 0ea09cbf8350 ("docs: netdev: add a note on selftest posting") in net-next, the selftest is therefore shipped in this series. However, this selftest does not really test this series. It needs this series to avoid crashing the kernel. What it really tests, thanks to kmemleak, is what was fixed by the following commits: - commit c71a192976de ("net: ipv6: fix dst refleaks in rpl, seg6 and ioam6 lwtunnels") - commit 92191dd10730 ("net: ipv6: fix dst ref loops in rpl, seg6 and ioam6 lwtunnels") - commit c64a0727f9b1 ("net: ipv6: fix dst ref loop on input in seg6 lwt") - commit 13e55fbaec17 ("net: ipv6: fix dst ref loop on input in rpl lwt") - commit 0e7633d7b95b ("net: ipv6: fix dst ref loop in ila lwtunnel") - commit 5da15a9c11c1 ("net: ipv6: fix missing dst ref drop in ila lwtunnel") Cc: Shuah Khan <shuah(a)kernel.org> Cc: linux-kselftest(a)vger.kernel.org Signed-off-by: Justin Iurman <justin.iurman(a)uliege.be> --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/config | 2 + .../selftests/net/lwt_dst_cache_ref_loop.sh | 250 ++++++++++++++++++ 3 files changed, 253 insertions(+) create mode 100755 tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 73ee88d6b043..8f32b4f01aee 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -100,6 +100,7 @@ TEST_PROGS += vlan_bridge_binding.sh TEST_PROGS += bpf_offload.py TEST_PROGS += ipv6_route_update_soft_lockup.sh TEST_PROGS += busy_poll_test.sh +TEST_PROGS += lwt_dst_cache_ref_loop.sh # YNL files, must be before "include ..lib.mk" YNL_GEN_FILES := busy_poller netlink-dumps diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 5b9baf708950..61e5116987f3 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -107,3 +107,5 @@ CONFIG_XFRM_INTERFACE=m CONFIG_XFRM_USER=m CONFIG_IP_NF_MATCH_RPFILTER=m CONFIG_IP6_NF_MATCH_RPFILTER=m +CONFIG_IPV6_ILA=m +CONFIG_IPV6_RPL_LWTUNNEL=y diff --git a/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh b/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh new file mode 100755 index 000000000000..9161f16154a5 --- /dev/null +++ b/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh @@ -0,0 +1,250 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Author: Justin Iurman <justin.iurman(a)uliege.be> +# +# WARNING +# ------- +# This is just a dummy script that triggers encap cases with possible dst cache +# reference loops in affected lwt users (see list below). Some cases are +# pathological configurations for simplicity, others are valid. Overall, we +# don't want this issue to happen, no matter what. In order to catch any +# reference loops, kmemleak MUST be used. The results alone are always blindly +# successful, don't rely on them. Note that the following tests may crash the +# kernel if the fix to prevent lwtunnel_{input|output|xmit}() reentry loops is +# not present. +# +# Affected lwt users so far (please update accordingly if needed): +# - ila_lwt (output only) +# - ioam6_iptunnel (output only) +# - rpl_iptunnel (both input and output) +# - seg6_iptunnel (both input and output) + +source lib.sh + +check_compatibility() +{ + setup_ns tmp_node &>/dev/null + if [ $? != 0 ] + then + echo "SKIP: Cannot create netns." + exit $ksft_skip + fi + + ip link add name veth0 netns $tmp_node type veth \ + peer name veth1 netns $tmp_node &>/dev/null + local ret=$? + + ip -netns $tmp_node link set veth0 up &>/dev/null + ret=$((ret + $?)) + + ip -netns $tmp_node link set veth1 up &>/dev/null + ret=$((ret + $?)) + + if [ $ret != 0 ] + then + echo "SKIP: Cannot configure links." + cleanup_ns $tmp_node + exit $ksft_skip + fi + + lsmod 2>/dev/null | grep -q "ila" + ila_lsmod=$? + [ $ila_lsmod != 0 ] && modprobe ila &>/dev/null + + ip -netns $tmp_node route add 2001:db8:1::/64 \ + encap ila 1:2:3:4 csum-mode no-action ident-type luid hook-type output \ + dev veth0 &>/dev/null + + ip -netns $tmp_node route add 2001:db8:2::/64 \ + encap ioam6 trace prealloc type 0x800000 ns 0 size 4 dev veth0 &>/dev/null + + ip -netns $tmp_node route add 2001:db8:3::/64 \ + encap rpl segs 2001:db8:3::1 dev veth0 &>/dev/null + + ip -netns $tmp_node route add 2001:db8:4::/64 \ + encap seg6 mode inline segs 2001:db8:4::1 dev veth0 &>/dev/null + + ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap ila" + skip_ila=$? + + ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap ioam6" + skip_ioam6=$? + + ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap rpl" + skip_rpl=$? + + ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap seg6" + skip_seg6=$? + + cleanup_ns $tmp_node +} + +setup() +{ + setup_ns alpha beta gamma &>/dev/null + + ip link add name veth-alpha netns $alpha type veth \ + peer name veth-betaL netns $beta &>/dev/null + ip link add name veth-betaR netns $beta type veth \ + peer name veth-gamma netns $gamma &>/dev/null + + ip -netns $alpha link set veth-alpha name veth0 &>/dev/null + ip -netns $beta link set veth-betaL name veth0 &>/dev/null + ip -netns $beta link set veth-betaR name veth1 &>/dev/null + ip -netns $gamma link set veth-gamma name veth0 &>/dev/null + + ip -netns $alpha addr add 2001:db8:1::2/64 dev veth0 &>/dev/null + ip -netns $alpha link set veth0 up &>/dev/null + ip -netns $alpha link set lo up &>/dev/null + ip -netns $alpha route add 2001:db8:2::/64 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + ip -netns $beta addr add 2001:db8:1::1/64 dev veth0 &>/dev/null + ip -netns $beta addr add 2001:db8:2::1/64 dev veth1 &>/dev/null + ip -netns $beta link set veth0 up &>/dev/null + ip -netns $beta link set veth1 up &>/dev/null + ip -netns $beta link set lo up &>/dev/null + ip -netns $beta route del 2001:db8:2::/64 + ip -netns $beta route add 2001:db8:2::/64 dev veth1 + ip netns exec $beta sysctl -wq net.ipv6.conf.all.forwarding=1 &>/dev/null + + ip -netns $gamma addr add 2001:db8:2::2/64 dev veth0 &>/dev/null + ip -netns $gamma link set veth0 up &>/dev/null + ip -netns $gamma link set lo up &>/dev/null + ip -netns $gamma route add 2001:db8:1::/64 \ + via 2001:db8:2::1 dev veth0 &>/dev/null + + sleep 1 + + ip netns exec $alpha ping6 -c 5 -W 1 2001:db8:2::2 &>/dev/null + if [ $? != 0 ] + then + echo "SKIP: Setup failed." + cleanup + exit $ksft_skip + fi + + sleep 1 +} + +cleanup() +{ + cleanup_ns $alpha $beta $gamma + [ $ila_lsmod != 0 ] && modprobe -r ila &>/dev/null +} + +run_ila() +{ + if [ $skip_ila != 0 ] + then + echo "SKIP: ila (output)" + return + fi + + ip -netns $beta route del 2001:db8:2::/64 + + ip -netns $beta route add 2001:db8:2:0:0:0:0:2/128 \ + encap ila 2001:db8:2:0 csum-mode no-action ident-type luid hook-type output \ + dev veth1 &>/dev/null + sleep 1 + + echo "TEST: ila (output)" + ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 + + ip -netns $beta route del 2001:db8:2:0:0:0:0:2/128 + ip -netns $beta route add 2001:db8:2::/64 dev veth1 + sleep 1 +} + +run_ioam6() +{ + if [ $skip_ioam6 != 0 ] + then + echo "SKIP: ioam6 (output)" + return + fi + + ip -netns $beta route change 2001:db8:2::/64 \ + encap ioam6 trace prealloc type 0x800000 ns 1 size 4 \ + dev veth1 &>/dev/null + sleep 1 + + echo "TEST: ioam6 (output)" + ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 +} + +run_rpl() +{ + if [ $skip_rpl != 0 ] + then + echo "SKIP: rpl (input)" + echo "SKIP: rpl (output)" + return + fi + + ip -netns $beta route change 2001:db8:2::/64 \ + encap rpl segs 2001:db8:2::2 \ + dev veth1 &>/dev/null + sleep 1 + + echo "TEST: rpl (input)" + ip netns exec $alpha ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 + + echo "TEST: rpl (output)" + ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 +} + +run_seg6() +{ + if [ $skip_seg6 != 0 ] + then + echo "SKIP: seg6 (input)" + echo "SKIP: seg6 (output)" + return + fi + + ip -netns $beta route change 2001:db8:2::/64 \ + encap seg6 mode inline segs 2001:db8:2::2 \ + dev veth1 &>/dev/null + sleep 1 + + echo "TEST: seg6 (input)" + ip netns exec $alpha ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 + + echo "TEST: seg6 (output)" + ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 +} + +run() +{ + run_ila + run_ioam6 + run_rpl + run_seg6 +} + +if [ "$(id -u)" -ne 0 ] +then + echo "SKIP: Need root privileges." + exit $ksft_skip +fi + +if [ ! -x "$(command -v ip)" ] +then + echo "SKIP: Could not run test without ip tool." + exit $ksft_skip +fi + +check_compatibility +setup +run +cleanup + +exit $ksft_pass -- 2.34.1

7 months, 2 weeks

2
1
0 0

Re: [PATCH 3/3] rust: replace `addr_of[_mut]!` with `&raw [mut]`

by Benno Lossin

On Thu Mar 13, 2025 at 6:33 AM CET, Antonio Hickey wrote: > Replacing all occurrences of `addr_of!(place)` with `&raw place`, and > all occurrences of `addr_of_mut!(place)` with `&raw mut place`. > > Utilizing the new feature will allow us to reduce macro complexity, and > improve consistency with existing reference syntax as `&raw`, `&raw mut` > is very similar to `&`, `&mut` making it fit more naturally with other > existing code. > > Depends on: Patch 1/3 0001-rust-enable-raw_ref_op-feature.patch This information shouldn't be in the commit message. You can put it below the `---` (that won't end up in the commit message). But since you sent this as part of a series, you don't need to mention it. > Suggested-by: Benno Lossin <y86-dev(a)protonmail.com> > Link: https://github.com/Rust-for-Linux/linux/issues/1148 > Signed-off-by: Antonio Hickey <contact(a)antoniohickey.com> > --- > rust/kernel/block/mq/request.rs | 4 ++-- > rust/kernel/faux.rs | 4 ++-- > rust/kernel/fs/file.rs | 2 +- > rust/kernel/init.rs | 8 ++++---- > rust/kernel/init/macros.rs | 28 +++++++++++++------------- > rust/kernel/jump_label.rs | 4 ++-- > rust/kernel/kunit.rs | 4 ++-- > rust/kernel/list.rs | 2 +- > rust/kernel/list/impl_list_item_mod.rs | 6 +++--- > rust/kernel/net/phy.rs | 4 ++-- > rust/kernel/pci.rs | 4 ++-- > rust/kernel/platform.rs | 4 +--- > rust/kernel/rbtree.rs | 22 ++++++++++---------- > rust/kernel/sync/arc.rs | 2 +- > rust/kernel/task.rs | 4 ++-- > rust/kernel/workqueue.rs | 8 ++++---- > 16 files changed, 54 insertions(+), 56 deletions(-) [...] > diff --git a/rust/kernel/jump_label.rs b/rust/kernel/jump_label.rs > index 4e974c768dbd..05d4564714c7 100644 > --- a/rust/kernel/jump_label.rs > +++ b/rust/kernel/jump_label.rs > @@ -20,8 +20,8 @@ > #[macro_export] > macro_rules! static_branch_unlikely { > ($key:path, $keytyp:ty, $field:ident) => {{ > - let _key: *const $keytyp = ::core::ptr::addr_of!($key); > - let _key: *const $crate::bindings::static_key_false = ::core::ptr::addr_of!((*_key).$field); > + let _key: *const $keytyp = &raw $key; This should be `&raw const $key`. I wrote that wrongly in the issue. > + let _key: *const $crate::bindings::static_key_false = &raw (*_key).$field; Same here. > let _key: *const $crate::bindings::static_key = _key.cast(); > > #[cfg(not(CONFIG_JUMP_LABEL))] > diff --git a/rust/kernel/kunit.rs b/rust/kernel/kunit.rs > index 824da0e9738a..18357dd782ed 100644 > --- a/rust/kernel/kunit.rs > +++ b/rust/kernel/kunit.rs > @@ -128,9 +128,9 @@ unsafe impl Sync for UnaryAssert {} > unsafe { > $crate::bindings::__kunit_do_failed_assertion( > kunit_test, > - core::ptr::addr_of!(LOCATION.0), > + &raw LOCATION.0, And here. > $crate::bindings::kunit_assert_type_KUNIT_ASSERTION, > - core::ptr::addr_of!(ASSERTION.0.assert), > + &raw ASSERTION.0.assert, Lastly here as well. --- Cheers, Benno

7 months, 2 weeks

1
0
0 0

arm64: Kernel crash at devm_kmalloc include/linux/device.h: drivers/base/devres.c

by Naresh Kamboju

Regression on arm64 FVP and rock-pi-4-b while booting the Linux next-20250312 and next-20250313. the following crash noticed with KVM Kconfigs. First seen on next-20250312. Good: next-20250311 Bad: 6.14.0-rc6-next-20250312 and 6.14.0-rc6-next-20250313 Boot regression: arm64 devm_kmalloc rk_iommu_of_xlate kernel panic Reported-by: Linux Kernel Functional Testing <lkft(a)linaro.org> ## Boot log [ 0.000000] Booting Linux on physical CPU 0x0000000000 [0x410fd034] [ 0.000000] Linux version 6.14.0-rc6-next-20250313 (tuxmake@tuxmake) (aarch64-linux-gnu-gcc (Debian 13.3.0-12) 13.3.0, GNU ld (GNU Binutils for Debian) 2.44) #1 SMP PREEMPT @1741850074 [ 0.000000] KASLR disabled due to lack of seed [ 0.000000] Machine model: Radxa ROCK Pi 4B <trim> [ 1.028830] SuperH (H)SCI(F) driver initialized [ 1.030168] STM32 USART driver initialized [ 1.052086] Unable to handle kernel NULL pointer dereference at virtual address 00000000000002a0 [ 1.052877] Mem abort info: [ 1.053137] ESR = 0x0000000096000004 [ 1.053481] EC = 0x25: DABT (current EL), IL = 32 bits [ 1.053960] SET = 0, FnV = 0 [ 1.054241] EA = 0, S1PTW = 0 [ 1.054530] FSC = 0x04: level 0 translation fault [ 1.054971] Data abort info: [ 1.055236] ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000 [ 1.055760] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 [ 1.056216] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [ 1.056693] [00000000000002a0] user address but active_mm is swapper [ 1.057264] Internal error: Oops: 0000000096000004 [#1] PREEMPT SMP [ 1.057821] Modules linked in: [ 1.058105] CPU: 4 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.14.0-rc6-next-20250313 #1 [ 1.058823] Hardware name: Radxa ROCK Pi 4B (DT) [ 1.059236] pstate: 00000005 (nzcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 1.059854] pc : devm_kmalloc (include/linux/device.h:805 drivers/base/devres.c:853) [ 1.060225] lr : rk_iommu_of_xlate (drivers/iommu/rockchip-iommu.c:1152) [ 1.060614] sp : ffff80008306b7c0 [ 1.060913] x29: ffff80008306b7c0 x28: ffff8000825a9068 x27: ffff80008225ce00 [ 1.061560] x26: 0000000000000001 x25: ffff80008225e188 x24: ffff000000eafa80 [ 1.062206] x23: 0000000000000000 x22: ffff800081af6a98 x21: 0000000000000000 [ 1.062850] x20: 0000000000000010 x19: 0000000000000000 x18: 00000000ffffffff [ 1.063493] x17: ffff0000019abc00 x16: ffff000001985400 x15: ffff80008306b820 [ 1.064136] x14: ffff000002736a1c x13: ffff00000273627c x12: 0101010101010101 [ 1.064780] x11: 7f7f7f7f7f7f7f7f x10: 000000000016f8a0 x9 : ffff800080b65870 [ 1.065424] x8 : ffff80008306b718 x7 : 0000000000000000 x6 : 0000000000000001 [ 1.066066] x5 : ffff800082890000 x4 : ffff8000828905f0 x3 : 0000000000000000 [ 1.066708] x2 : 0000000000000dc0 x1 : 0000000000000010 x0 : 0000000000000090 [ 1.067351] Call trace: [ 1.067576] devm_kmalloc (include/linux/device.h:805 drivers/base/devres.c:853) (P) [ 1.067937] rk_iommu_of_xlate (drivers/iommu/rockchip-iommu.c:1152) [ 1.068295] of_iommu_xlate (drivers/iommu/of_iommu.c:39) [ 1.068629] of_iommu_configure (drivers/iommu/of_iommu.c:71 drivers/iommu/of_iommu.c:98 drivers/iommu/of_iommu.c:149) [ 1.069008] of_dma_configure_id (drivers/of/device.c:161) [ 1.069392] platform_dma_configure (drivers/base/platform.c:1455) [ 1.069796] __iommu_probe_device (drivers/iommu/iommu.c:430 drivers/iommu/iommu.c:569) [ 1.070194] probe_iommu_group (drivers/iommu/iommu.c:1722) [ 1.070553] bus_for_each_dev (drivers/base/bus.c:370) [ 1.070906] iommu_device_register (drivers/iommu/iommu.c:1875 drivers/iommu/iommu.c:276) [ 1.071304] rk_iommu_probe (drivers/iommu/rockchip-iommu.c:1263) [ 1.071652] platform_probe (drivers/base/platform.c:1404) [ 1.071986] really_probe (drivers/base/dd.c:579 drivers/base/dd.c:658) [ 1.072316] __driver_probe_device (drivers/base/dd.c:800) [ 1.072716] driver_probe_device (drivers/base/dd.c:830) [ 1.073100] __driver_attach (drivers/base/dd.c:1217) [ 1.073453] bus_for_each_dev (drivers/base/bus.c:370) [ 1.073805] driver_attach (drivers/base/dd.c:1235) [ 1.074135] bus_add_driver (drivers/base/bus.c:678) [ 1.074488] driver_register (drivers/base/driver.c:249) [ 1.074836] __platform_driver_register (drivers/base/platform.c:868) [ 1.075261] rk_iommu_driver_init (drivers/iommu/rockchip-iommu.c:1380) [ 1.075644] do_one_initcall (init/main.c:1257) [ 1.075996] kernel_init_freeable (init/main.c:1318 (discriminator 1) init/main.c:1335 (discriminator 1) init/main.c:1354 (discriminator 1) init/main.c:1567 (discriminator 1)) [ 1.076393] kernel_init (init/main.c:1461) [ 1.076720] ret_from_fork (arch/arm64/kernel/entry.S:863) [ 1.077053] Code: aa0003f5 b1020020 a90153f3 aa0103f4 (b942a2b7) All code ======== 0: aa0003f5 mov x21, x0 4: b1020020 adds x0, x1, #0x80 8: a90153f3 stp x19, x20, [sp, #16] c: aa0103f4 mov x20, x1 10:* b942a2b7 ldr w23, [x21, #672] <-- trapping instruction Code starting with the faulting instruction =========================================== 0: b942a2b7 ldr w23, [x21, #672] [ 1.077594] ---[ end trace 0000000000000000 ]--- [ 1.078059] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b [ 1.078736] SMP: stopping secondary CPUs [ 1.079099] Kernel Offset: disabled [ 1.079412] CPU features: 0x0400,00041058,01000400,8200421b [ 1.079909] Memory Limit: none [ 1.080188] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]--- ## Source * Kernel version: 6.14.0-rc6-next-20250313 * Git tree: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git * Git sha: 613af589b566093ce7388bf3202fca70d742c166 * Git describe: 6.14.0-rc6-next-20250313 * Project details: https://qa-reports.linaro.org/lkft/linux-next-master/build/next-20250305/ * Architectures: arm64 * Compilers: gcc-13 ## Test data * Test log: https://qa-reports.linaro.org/lkft/linux-next-master/build/next-20250313/te… * Test history: https://qa-reports.linaro.org/lkft/linux-next-master/build/next-20250313/te… * Test details: https://qa-reports.linaro.org/lkft/linux-next-master/build/next-20250313/te… * Build link: https://storage.tuxsuite.com/public/linaro/lkft/builds/2uFgT27J8KGNrdNXzKgU… * Kernel config: https://storage.tuxsuite.com/public/linaro/lkft/builds/2uFgT27J8KGNrdNXzKgU… -- Linaro LKFT https://lkft.linaro.org

7 months, 2 weeks

2
1
0 0

[PATCH net-next] selftests: net: bump GRO timeout for gro/setup_veth

by Jakub Kicinski

Commit 51bef03e1a71 ("selftests/net: deflake GRO tests") recently switched to NAPI suspension, and lowered the timeout from 1ms to 100us. This started causing flakes in netdev-run CI. Let's bump it to 200us. In a quick test of a debug kernel I see failures with 100us, with 200us in 5 runs I see 2 completely clean runs and 3 with a single retry (GRO test will retry up to 5 times). Signed-off-by: Jakub Kicinski <kuba(a)kernel.org> --- CC: krakauer(a)google.com CC: willemb(a)google.com CC: shuah(a)kernel.org CC: linux-kselftest(a)vger.kernel.org --- tools/testing/selftests/net/setup_veth.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh index eb3182066d12..152bf4c65747 100644 --- a/tools/testing/selftests/net/setup_veth.sh +++ b/tools/testing/selftests/net/setup_veth.sh @@ -11,7 +11,7 @@ setup_veth_ns() { local -r ns_mac="$4" [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}" - echo 100000 > "/sys/class/net/${ns_dev}/gro_flush_timeout" + echo 200000 > "/sys/class/net/${ns_dev}/gro_flush_timeout" echo 1 > "/sys/class/net/${ns_dev}/napi_defer_hard_irqs" ip link set dev "${ns_dev}" netns "${ns_name}" mtu 65535 ip -netns "${ns_name}" link set dev "${ns_dev}" up -- 2.48.1

7 months, 3 weeks

3
2
0 0

[PATCH net-next v22 00/23] Introducing OpenVPN Data Channel Offload

by Antonio Quartulli

Notable changes since v21: * accessed crypto_slot->primary_idx via READ/WRITE_ONCE * made ovpn_aead_init() static * converted link tx/rx packet counters from u32 to to uint * ensured all u32 NL attributes are read by nla_get_u32() * ensured all u32 NL attrivutes are written by nla_put_u32() * reset cache upon float or local endpoint change * dropped check for delta > 0 in keepalive worker scheduling * improved comments in update endpoints logic * converted local_ip to void* to avoid useless casts Please note that some patches were already reviewed/tested by a few people. These patches have retained the tags as they have hardly been touched. The latest code can also be found at: https://github.com/OpenVPN/ovpn-net-next Thanks a lot! Best Regards, Antonio Quartulli OpenVPN Inc. --- Changes in v21: - EDITME: describe what is new in this series revision. - EDITME: use bulletpoints and terse descriptions. - Link to v20: https://lore.kernel.org/r/20250227-b4-ovpn-v20-0-93f363310834@openvpn.net --- Antonio Quartulli (23): net: introduce OpenVPN Data Channel Offload (ovpn) ovpn: add basic netlink support ovpn: add basic interface creation/destruction/management routines ovpn: keep carrier always on for MP interfaces ovpn: introduce the ovpn_peer object ovpn: introduce the ovpn_socket object ovpn: implement basic TX path (UDP) ovpn: implement basic RX path (UDP) ovpn: implement packet processing ovpn: store tunnel and transport statistics ovpn: implement TCP transport skb: implement skb_send_sock_locked_with_flags() ovpn: add support for MSG_NOSIGNAL in tcp_sendmsg ovpn: implement multi-peer support ovpn: implement peer lookup logic ovpn: implement keepalive mechanism ovpn: add support for updating local or remote UDP endpoint ovpn: implement peer add/get/dump/delete via netlink ovpn: implement key add/get/del/swap via netlink ovpn: kill key and notify userspace in case of IV exhaustion ovpn: notify userspace when a peer is deleted ovpn: add basic ethtool support testing/selftests: add test tool and scripts for ovpn module Documentation/netlink/specs/ovpn.yaml | 367 +++ Documentation/netlink/specs/rt_link.yaml | 16 + MAINTAINERS | 11 + drivers/net/Kconfig | 15 + drivers/net/Makefile | 1 + drivers/net/ovpn/Makefile | 22 + drivers/net/ovpn/bind.c | 55 + drivers/net/ovpn/bind.h | 101 + drivers/net/ovpn/crypto.c | 211 ++ drivers/net/ovpn/crypto.h | 145 ++ drivers/net/ovpn/crypto_aead.c | 409 ++++ drivers/net/ovpn/crypto_aead.h | 29 + drivers/net/ovpn/io.c | 462 ++++ drivers/net/ovpn/io.h | 34 + drivers/net/ovpn/main.c | 339 +++ drivers/net/ovpn/main.h | 14 + drivers/net/ovpn/netlink-gen.c | 213 ++ drivers/net/ovpn/netlink-gen.h | 41 + drivers/net/ovpn/netlink.c | 1249 ++++++++++ drivers/net/ovpn/netlink.h | 18 + drivers/net/ovpn/ovpnpriv.h | 57 + drivers/net/ovpn/peer.c | 1367 +++++++++++ drivers/net/ovpn/peer.h | 163 ++ drivers/net/ovpn/pktid.c | 129 ++ drivers/net/ovpn/pktid.h | 87 + drivers/net/ovpn/proto.h | 118 + drivers/net/ovpn/skb.h | 61 + drivers/net/ovpn/socket.c | 244 ++ drivers/net/ovpn/socket.h | 49 + drivers/net/ovpn/stats.c | 21 + drivers/net/ovpn/stats.h | 47 + drivers/net/ovpn/tcp.c | 592 +++++ drivers/net/ovpn/tcp.h | 36 + drivers/net/ovpn/udp.c | 442 ++++ drivers/net/ovpn/udp.h | 25 + include/linux/skbuff.h | 2 + include/uapi/linux/if_link.h | 15 + include/uapi/linux/ovpn.h | 109 + include/uapi/linux/udp.h | 1 + net/core/skbuff.c | 18 +- net/ipv6/af_inet6.c | 1 + net/ipv6/udp.c | 1 + tools/testing/selftests/Makefile | 1 + tools/testing/selftests/net/ovpn/.gitignore | 2 + tools/testing/selftests/net/ovpn/Makefile | 31 + tools/testing/selftests/net/ovpn/common.sh | 92 + tools/testing/selftests/net/ovpn/config | 10 + tools/testing/selftests/net/ovpn/data64.key | 5 + tools/testing/selftests/net/ovpn/ovpn-cli.c | 2395 ++++++++++++++++++++ tools/testing/selftests/net/ovpn/tcp_peers.txt | 5 + .../testing/selftests/net/ovpn/test-chachapoly.sh | 9 + .../selftests/net/ovpn/test-close-socket-tcp.sh | 9 + .../selftests/net/ovpn/test-close-socket.sh | 45 + tools/testing/selftests/net/ovpn/test-float.sh | 9 + tools/testing/selftests/net/ovpn/test-tcp.sh | 9 + tools/testing/selftests/net/ovpn/test.sh | 113 + tools/testing/selftests/net/ovpn/udp_peers.txt | 5 + 57 files changed, 10072 insertions(+), 5 deletions(-) --- base-commit: 40587f749df216889163dd6e02d88ad53e759e66 change-id: 20241002-b4-ovpn-eeee35c694a2 Best regards, -- Antonio Quartulli <antonio(a)openvpn.net>

7 months, 3 weeks

2
24
0 0

[PATCH v7 00/10] iommufd: Add vIOMMU infrastructure (Part-2: vDEVICE)

by Nicolin Chen

Following the previous vIOMMU series, this adds another vDEVICE structure, representing the association from an iommufd_device to an iommufd_viommu. This gives the whole architecture a new "v" layer: _______________________________________________________________________ | iommufd (with vIOMMU/vDEVICE) | | _____________ _____________ | | | | | | | | |----------------| vIOMMU |<---| vDEVICE |<------| | | | | | |_____________| | | | | ______ | | _____________ ___|____ | | | | | | | | | | | | | | | IOAS |<---|(HWPT_PAGING)|<---| HWPT_NESTED |<--| DEVICE | | | | |______| |_____________| |_____________| |________| | |______|________|______________|__________________|_______________|_____| | | | | | ______v_____ | ______v_____ ______v_____ ___v__ | struct | | PFN | (paging) | | (nested) | |struct| |iommu_device| |------>|iommu_domain|<----|iommu_domain|<----|device| |____________| storage|____________| |____________| |______| This vDEVICE object is used to collect and store all vIOMMU-related device information/attributes in a VM. As an initial series for vDEVICE, add only the virt_id to the vDEVICE, which is a vIOMMU specific device ID in a VM: e.g. vSID of ARM SMMUv3, vDeviceID of AMD IOMMU, and vRID of Intel VT-d to a Context Table. This virt_id helps IOMMU drivers to link the vID to a pID of the device against the physical IOMMU instance. This is essential for a vIOMMU-based invalidation, where the request contains a device's vID for a device cache flush, e.g. ATC invalidation. Therefore, with this vDEVICE object, support a vIOMMU-based invalidation, by reusing IOMMUFD_CMD_HWPT_INVALIDATE for a vIOMMU object to flush cache with a given driver data. As for the implementation of the series, add driver support in ARM SMMUv3 for a real world use case. This series is on Github: https://github.com/nicolinc/iommufd/commits/iommufd_viommu_p2-v7 (QEMU branch for testing will be provided in Jason's nesting series) Changelog v7 * Added "Reviewed-by" from Jason * Corrected a line of comments in iommufd_vdevice_destroy() v6 https://lore.kernel.org/all/cover.1730313494.git.nicolinc@nvidia.com/ * Fixed kdoc in the uAPI header * Fixed indentations in iommufd.rst * Replaced vdev->idev with vdev->dev * Added "Reviewed-by" from Kevin and Jason * Updated kdoc of struct iommu_vdevice_alloc * Fixed lockdep function call in iommufd_viommu_find_dev * Added missing iommu_dev validation between viommu and idev * Skipped SMMUv3 driver changes (to post in a separate series) * Replaced !cache_invalidate_user in WARN_ON of the allocation path with cache_invalidate_user validation in iommufd_hwpt_invalidate v5 https://lore.kernel.org/all/cover.1729897278.git.nicolinc@nvidia.com/ * Dropped driver-allocated vDEVICE support * Changed vdev_to_dev helper to iommufd_viommu_find_dev v4 https://lore.kernel.org/all/cover.1729555967.git.nicolinc@nvidia.com/ * Added missing brackets in switch-case * Fixed the unreleased idev refcount issue * Reworked the iommufd_vdevice_alloc allocator * Dropped support for IOMMU_VIOMMU_TYPE_DEFAULT * Added missing TEST_LENGTH and fail_nth coverages * Added a verification to the driver-allocated vDEVICE object * Added an iommufd_vdevice_abort for a missing mutex protection * Added a u64 structure arm_vsmmu_invalidation_cmd for user command conversion v3 https://lore.kernel.org/all/cover.1728491532.git.nicolinc@nvidia.com/ * Added Jason's Reviewed-by * Split this invalidation part out of the part-1 series * Repurposed VDEV_ID ioctl to a wider vDEVICE structure and ioctl * Reduced viommu_api functions by allowing drivers to access viommu and vdevice structure directly * Dropped vdevs_rwsem by using xa_lock instead * Dropped arm_smmu_cache_invalidate_user v2 https://lore.kernel.org/all/cover.1724776335.git.nicolinc@nvidia.com/ * Limited vdev_id to one per idev * Added a rw_sem to protect the vdev_id list * Reworked driver-level APIs with proper lockings * Added a new viommu_api file for IOMMUFD_DRIVER config * Dropped useless iommu_dev point from the viommu structure * Added missing index numnbers to new types in the uAPI header * Dropped IOMMU_VIOMMU_INVALIDATE uAPI; Instead, reuse the HWPT one * Reworked mock_viommu_cache_invalidate() using the new iommu helper * Reordered details of set/unset_vdev_id handlers for proper lockings v1 https://lore.kernel.org/all/cover.1723061377.git.nicolinc@nvidia.com/ Thanks! Nicolin Jason Gunthorpe (1): iommu: Add iommu_copy_struct_from_full_user_array helper Nicolin Chen (9): iommufd/viommu: Add IOMMUFD_OBJ_VDEVICE and IOMMU_VDEVICE_ALLOC ioctl iommufd/selftest: Add IOMMU_VDEVICE_ALLOC test coverage iommu/viommu: Add cache_invalidate to iommufd_viommu_ops iommufd: Allow hwpt_id to carry viommu_id for IOMMU_HWPT_INVALIDATE iommufd/viommu: Add iommufd_viommu_find_dev helper iommufd/selftest: Add mock_viommu_cache_invalidate iommufd/selftest: Add IOMMU_TEST_OP_DEV_CHECK_CACHE test command iommufd/selftest: Add vIOMMU coverage for IOMMU_HWPT_INVALIDATE ioctl Documentation: userspace-api: iommufd: Update vDEVICE drivers/iommu/iommufd/iommufd_private.h | 18 ++ drivers/iommu/iommufd/iommufd_test.h | 30 +++ include/linux/iommu.h | 48 ++++- include/linux/iommufd.h | 22 ++ include/uapi/linux/iommufd.h | 31 ++- tools/testing/selftests/iommu/iommufd_utils.h | 83 +++++++ drivers/iommu/iommufd/driver.c | 13 ++ drivers/iommu/iommufd/hw_pagetable.c | 40 +++- drivers/iommu/iommufd/main.c | 6 + drivers/iommu/iommufd/selftest.c | 98 ++++++++- drivers/iommu/iommufd/viommu.c | 76 +++++++ tools/testing/selftests/iommu/iommufd.c | 204 +++++++++++++++++- .../selftests/iommu/iommufd_fail_nth.c | 4 + Documentation/userspace-api/iommufd.rst | 41 +++- 14 files changed, 688 insertions(+), 26 deletions(-) base-commit: 0780dd4af09a5360392f5c376c35ffc2599a9c0e -- 2.43.0

7 months, 3 weeks

4
14
0 0

[PATCHv5 net 0/3] bond: fix xfrm offload issues

by Hangbin Liu

The first patch fixes the incorrect locks using in bond driver. The second patch fixes the xfrm offload feature during setup active-backup mode. The third patch add a ipsec offload testing. v5: use list_for_each_entry_safe() when del item in list (Nikolay Aleksandrov) do not call spin_lock_bh in sleep function xdo_dev_state_free (Nikolay Aleksandrov) set xso.real_dev = NULL to avoid __xfrm_state_delete() is called in parallel() (Cosmin Ratiu) remove spin lock in bond_ipsec_add_sa_all() as it doesn't resolve the race condition. v4: hold xs->lock for bond_ipsec_{del, add}_sa_all (Cosmin Ratiu) use the defer helpers in lib.sh for selftest (Petr Machata) v3: move the ipsec deletion to bond_ipsec_free_sa (Cosmin Ratiu) v2: do not turn carrier on if bond change link failed (Nikolay Aleksandrov) move the mutex lock to a work queue (Cosmin Ratiu) Hangbin Liu (3): bonding: fix calling sleeping function in spin lock and some race conditions bonding: fix xfrm offload feature setup on active-backup mode selftests: bonding: add ipsec offload test drivers/net/bonding/bond_main.c | 71 +++++--- drivers/net/bonding/bond_netlink.c | 16 +- include/net/bonding.h | 1 + .../selftests/drivers/net/bonding/Makefile | 3 +- .../drivers/net/bonding/bond_ipsec_offload.sh | 154 ++++++++++++++++++ .../selftests/drivers/net/bonding/config | 4 + 6 files changed, 222 insertions(+), 27 deletions(-) create mode 100755 tools/testing/selftests/drivers/net/bonding/bond_ipsec_offload.sh -- 2.46.0

7 months, 3 weeks

5
13
0 0

[PATCH] selftests/mm/cow: Fix the incorrect error handling

by Cyan Yang

There are two error handlings did not check the correct return value. This patch will fix them. Fixes: f4b5fd6946e244cdedc3bbb9a1f24c8133b2077a ("selftests/vm: anon_cow: THP tests") Signed-off-by: Cyan Yang <cyan.yang(a)sifive.com> --- tools/testing/selftests/mm/cow.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index 9446673645eb..16fcadc090a4 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -876,13 +876,13 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) mremap_size = thpsize / 2; mremap_mem = mmap(NULL, mremap_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (mem == MAP_FAILED) { + if (mremap_mem == MAP_FAILED) { ksft_test_result_fail("mmap() failed\n"); goto munmap; } tmp = mremap(mem + mremap_size, mremap_size, mremap_size, MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem); - if (tmp != mremap_mem) { + if (tmp == MAP_FAILED) { ksft_test_result_fail("mremap() failed\n"); goto munmap; } -- 2.39.5 (Apple Git-154)

7 months, 3 weeks

3
4
0 0

[PATCH] selftest/powerpc/mm/pkey: fix build-break introduced by commit 00894c3fc917

by Madhavan Srinivasan

Build break was reported in the powerpc mailing list for next-20250218 with below errors make[1]: Nothing to be done for 'all'. BUILD_TARGET=/root/venkat/linux-next/tools/testing/selftests/powerpc/mm; mkdir -p $BUILD_TARGET; make OUTPUT=$BUILD_TARGET -k -C mm all CC pkey_exec_prot In file included from pkey_exec_prot.c:18: /root/venkat/linux-next/tools/testing/selftests/powerpc/include/pkeys.h: In function ‘pkeys_unsupported’: /root/venkat/linux-next/tools/testing/selftests/powerpc/include/pkeys.h:96:34: error: ‘PKEY_UNRESTRICTED’ undeclared (first use in this function) 96 | pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED); | ^~~~~~~~~~~~~~~~~ https://lore.kernel.org/all/20250113170619.484698-2-yury.khrustalev@arm.com/ patchset has been queued to arm64/for-next/pkey_unrestricted which is causing a build break in the selftest/powerpc builds. Commit 6d61527d931ba ("mm/pkey: Add PKEY_UNRESTRICTED macro") added a macro PKEY_UNRESTRICTED to handle implicit literal value of 0x0 (which is "unrestricted"). Add the same to selftest/powerpc/pkeys.h to fix the reported build break. Reported-by: Venkat Rao Bagalkote <venkat88(a)linux.ibm.com> Closes: https://lore.kernel.org/lkml/3267ea6e-5a1a-4752-96ef-8351c912d386@linux.ibm… Tested-by: Venkat Rao Bagalkote <venkat88(a)linux.ibm.com> Signed-off-by: Madhavan Srinivasan <maddy(a)linux.ibm.com> --- Catalin, can you take this fix via arm64/for-next/pkey_unrestricted? Patch applies clean on top of arm64/for-next/pkey_unrestricted tools/testing/selftests/powerpc/include/pkeys.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/powerpc/include/pkeys.h b/tools/testing/selftests/powerpc/include/pkeys.h index c6d4063dd4f6..d6deb6ffa1b9 100644 --- a/tools/testing/selftests/powerpc/include/pkeys.h +++ b/tools/testing/selftests/powerpc/include/pkeys.h @@ -24,6 +24,9 @@ #undef PKEY_DISABLE_EXECUTE #define PKEY_DISABLE_EXECUTE 0x4 +#undef PKEY_UNRESTRICTED +#define PKEY_UNRESTRICTED 0x0 + /* Older versions of libc do not define this */ #ifndef SEGV_PKUERR #define SEGV_PKUERR 4 -- 2.48.1

7 months, 3 weeks

2
2
0 0

[PATCH 6.1.y] selftests/vm: fix undefined reference of the `default_huge_page_size`

by Andrey Kalachev

The commit a584c7734a4d ("selftests: mm: fix map_hugetlb failure on 64K page size systems") backported the fix from v6.8 to stable v6.1. The patch uses default_huge_page_size() function, which definition moved into vm_util.[ch] by commit bd4d67e76f699 ("selftests/mm: merge default_huge_page_size() into one") merged to upsream since v6.4. However, in v6.1 common definition/declaration for the default_huge_page_size() we doesn't have, the following build error is seen: map_hugetlb.c:79:25: warning: implicit declaration of function ‘default_huge_page_size’ [-Wimplicit-function-declaration] 79 | hugepage_size = default_huge_page_size(); | ^~~~~~~~~~~~~~~~~~~~~~ /usr/bin/ld: /tmp/ccx95BZz.o: in function `main': map_hugetlb.c:(.text+0x104): undefined reference to `default_huge_page_size' Place default_huge_page_size() function body into map_hugetlb.c to fix this issue. Fixes: a584c7734a4d ("selftests: mm: fix map_hugetlb failure on 64K page size systems") Signed-off-by: Andrey Kalachev <kalachev(a)swemel.ru> --- tools/testing/selftests/vm/map_hugetlb.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c index c65c55b7a789..5826c50b6736 100644 --- a/tools/testing/selftests/vm/map_hugetlb.c +++ b/tools/testing/selftests/vm/map_hugetlb.c @@ -67,6 +67,30 @@ static int read_bytes(char *addr, size_t length) return 0; } +/* + * default_huge_page_size copied from mlock2-tests.c + */ +unsigned long default_huge_page_size(void) +{ + unsigned long hps = 0; + char *line = NULL; + size_t linelen = 0; + FILE *f = fopen("/proc/meminfo", "r"); + + if (!f) + return 0; + while (getline(&line, &linelen, f) > 0) { + if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) { + hps <<= 10; + break; + } + } + + free(line); + fclose(f); + return hps; +} + int main(int argc, char **argv) { void *addr; -- 2.39.5

7 months, 3 weeks

1
0
0 0

[PATCH 00/11] selftests: ublk: bug fixes & consolidation

by Ming Lei

Hello Jens and guys, This patchset fixes several issues(1, 2, 4) and consolidate & improve the tests in the following ways: - support shellcheck and fixes all warning - misc cleanup - improve cleanup code path(module load/unload, cleanup temp files) - help to reuse the same test source code and scripts for other projects(liburing[1], blktest, ...) - add two stress tests for covering IO workloads vs. removing device & killing ublk server, given buffer lifetime is one big thing for ublk-zc [1] https://github.com/ming1/liburing/commits/ublk-zc - just need one line change for overriding skip_code, libring uses 77 and kselftests takes 4 Ming Lei (11): selftests: ublk: make ublk_stop_io_daemon() more reliable selftests: ublk: fix build failure selftests: ublk: add --foreground command line selftests: ublk: fix parsing '-a' argument selftests: ublk: support shellcheck and fix all warning selftests: ublk: don't pass ${dev_id} to _cleanup_test() selftests: ublk: move zero copy feature check into _add_ublk_dev() selftests: ublk: load/unload ublk_drv when preparing & cleaning up tests selftests: ublk: add one stress test for covering IO vs. removing device selftests: ublk: add stress test for covering IO vs. killing ublk server selftests: ublk: improve test usability tools/testing/selftests/ublk/Makefile | 6 + tools/testing/selftests/ublk/kublk.c | 43 +++-- tools/testing/selftests/ublk/kublk.h | 2 + tools/testing/selftests/ublk/test_common.sh | 167 ++++++++++++++---- tools/testing/selftests/ublk/test_loop_01.sh | 13 +- tools/testing/selftests/ublk/test_loop_02.sh | 14 +- tools/testing/selftests/ublk/test_loop_03.sh | 16 +- tools/testing/selftests/ublk/test_loop_04.sh | 14 +- tools/testing/selftests/ublk/test_null_01.sh | 9 +- .../testing/selftests/ublk/test_stress_01.sh | 47 +++++ .../testing/selftests/ublk/test_stress_02.sh | 47 +++++ 11 files changed, 300 insertions(+), 78 deletions(-) create mode 100755 tools/testing/selftests/ublk/test_stress_01.sh create mode 100755 tools/testing/selftests/ublk/test_stress_02.sh -- 2.47.0

7 months, 3 weeks

3
20
0 0

[PATCH v4 00/12] selftests/mm: Some cleanups from trying to run them

by Brendan Jackman

I never had much luck running mm selftests so I spent a few hours digging into why. Looks like most of the reason is missing SKIP checks, so this series is just adding a bunch of those that I found. I did not do anything like all of them, just the ones I spotted in gup_longterm, gup_test, mmap, userfaultfd and memfd_secret. It's a bit unfortunate to have to skip those tests when ftruncate() fails, but I don't have time to dig deep enough into it to actually make them pass. I have observed the issue on 9pfs and heard rumours that NFS has a similar problem. I'm now able to run these test groups successfully: - mmap - gup_test - compaction - migration - page_frag - userfaultfd - mlock I've never gone past "Waiting for hugetlb memory to get depleted", in the hugetlb tests. I don't know if they are stuck or if they would eventually work if I was patient enough (testing on a 1G machine). I have not investigated further. I had some issues with mlock tests failing due to -ENOSRCH from mlock2(), I can no longer reproduce that though, things work OK now. Of the remaining tests there may be others that work fine, but there's no convenient way to survey the whole output of run_vmtests.sh so I'm just going test by test here. In my spare moments I am slowly chipping away at a setup to run these tests continuously in a reasonably hermetic QEMU environment via virtme-ng: https://github.com/bjackman/linux/blob/5fad4b9c592290f38e0f8bc73c9abb9c99d8… Hopefully that will eventually offer a way to provide a "canned" environment where the tests are known to work, which can be fairly easily reproduced by any developer. Signed-off-by: Brendan Jackman <jackmanb(a)google.com> --- Changes in v4: - NOT ADDRESSED: still using errno==ENOENT as a hacky way to detect buggy filesystems: https://lore.kernel.org/all/CA+i-1C3srkh44tN8dMQ5aD-jhoksUkdEpa+mMfdDtDrPAU… - Added some incomplete cleanups for the mlock tests. - Fixed divide-by-zero error when running uffd-stress on <32cpu systems. - Fixed misnamed nr_threads variable (now nr_parallel). - Fixed reporting io_uring errors (retval instead of errno). - Link to v3: https://lore.kernel.org/r/20250228-mm-selftests-v3-0-958e3b6f0203@google.com Changes in v3: - Added fix for userfaultfd tests. - Dropped attempts to use sudo. - Fixed garbage printf in uffd-stress. (Added EXTRA_CFLAGS=-Werror FORCE_TARGETS=1 to my scripts to prevent such errors happening again). - Fixed missing newlines in ksft_test_result_skip() calls. - Link to v2: https://lore.kernel.org/r/20250221-mm-selftests-v2-0-28c4d66383c5@google.com Changes in v2 (Thanks to Dev for the reviews): - Improve and cleanup some error messages - Add some extra SKIPs - Fix misnaming of nr_cpus variable in uffd tests - Link to v1: https://lore.kernel.org/r/20250220-mm-selftests-v1-0-9bbf57d64463@google.com --- Brendan Jackman (12): selftests/mm: Report errno when things fail in gup_longterm selftests/mm: Skip uffd-stress if userfaultfd not available selftests/mm: Skip uffd-wp-mremap if userfaultfd not available selftests/mm/uffd: Rename nr_cpus -> nr_parallel selftests/mm: Print some details when uffd-stress gets bad params selftests/mm: Don't fail uffd-stress if too many CPUs selftests/mm: Skip map_populate on weird filesystems selftests/mm: Skip gup_longterm tests on weird filesystems selftests/mm: Drop unnecessary sudo usage selftests/mm: Ensure uffd-wp-mremap gets pages of each size selftests/mm: Skip mlock tests if nobody user can't read it selftests/mm/mlock: Print error on failure tools/testing/selftests/mm/gup_longterm.c | 45 +++++++++++++++++--------- tools/testing/selftests/mm/map_populate.c | 7 ++++ tools/testing/selftests/mm/mlock-random-test.c | 4 +-- tools/testing/selftests/mm/mlock2.h | 8 ++++- tools/testing/selftests/mm/run_vmtests.sh | 27 ++++++++++++++-- tools/testing/selftests/mm/uffd-common.c | 8 ++--- tools/testing/selftests/mm/uffd-common.h | 2 +- tools/testing/selftests/mm/uffd-stress.c | 42 +++++++++++++++--------- tools/testing/selftests/mm/uffd-unit-tests.c | 2 +- tools/testing/selftests/mm/uffd-wp-mremap.c | 5 ++- 10 files changed, 105 insertions(+), 45 deletions(-) --- base-commit: dcb38e6757f1b7944af9347ce6b54263d3666478 change-id: 20250220-mm-selftests-2d7d0542face Best regards, -- Brendan Jackman <jackmanb(a)google.com>

7 months, 3 weeks

1
12
0 0

[PATCHv4 net 0/2] bonding: fix incorrect mac address setting

by Hangbin Liu

The mac address on backup slave should be convert from Solicited-Node Multicast address, not from bonding unicast target address. v4: no change, just repost. v3: also fix the mac setting for slave_set_ns_maddr. (Jay) Add function description for slave_set_ns_maddr/slave_set_ns_maddrs (Jay) v2: fix patch 01's subject Hangbin Liu (2): bonding: fix incorrect MAC address setting to receive NS messages selftests: bonding: fix incorrect mac address drivers/net/bonding/bond_options.c | 55 ++++++++++++++++--- .../drivers/net/bonding/bond_options.sh | 4 +- 2 files changed, 49 insertions(+), 10 deletions(-) -- 2.46.0

7 months, 3 weeks

3
5
0 0

[PATCH v7 bpf-next 0/2] security: Propagate caller information in bpf hooks

by Blaise Boscaccy

Hello, While trying to implement an eBPF gatekeeper program, we ran into an issue whereas the LSM hooks are missing some relevant data. Certain subcommands passed to the bpf() syscall can be invoked from either the kernel or userspace. Additionally, some fields in the bpf_attr struct contain pointers, and depending on where the subcommand was invoked, they could point to either user or kernel memory. One example of this is the bpf_prog_load subcommand and its fd_array. This data is made available and used by the verifier but not made available to the LSM subsystem. This patchset simply exposes that information to applicable LSM hooks. Change list: - v6 -> v7 - use gettid/pid in lieu of getpid/tgid in test condition - v5 -> v6 - fix regression caused by is_kernel renaming - simplify test logic - v4 -> v5 - merge v4 selftest breakout patch back into a single patch - change "is_kernel" to "kernel" - add selftest using new kernel flag - v3 -> v4 - split out selftest changes into a separate patch - v2 -> v3 - reorder params so that the new boolean flag is the last param - fixup function signatures in bpf selftests - v1 -> v2 - Pass a boolean flag in lieu of bpfptr_t Revisions: - v6 https://lore.kernel.org/bpf/20250308013314.719150-1-bboscaccy@linux.microso… - v5 https://lore.kernel.org/bpf/20250307213651.3065714-1-bboscaccy@linux.micros… - v4 https://lore.kernel.org/bpf/20250304203123.3935371-1-bboscaccy@linux.micros… - v3 https://lore.kernel.org/bpf/20250303222416.3909228-1-bboscaccy@linux.micros… - v2 https://lore.kernel.org/bpf/20250228165322.3121535-1-bboscaccy@linux.micros… - v1 https://lore.kernel.org/bpf/20250226003055.1654837-1-bboscaccy@linux.micros… Blaise Boscaccy (2): security: Propagate caller information in bpf hooks selftests/bpf: Add a kernel flag test for LSM bpf hook include/linux/lsm_hook_defs.h | 6 +-- include/linux/security.h | 12 +++--- kernel/bpf/syscall.c | 10 ++--- security/security.c | 15 ++++--- security/selinux/hooks.c | 6 +-- .../selftests/bpf/prog_tests/kernel_flag.c | 43 +++++++++++++++++++ .../selftests/bpf/progs/rcu_read_lock.c | 3 +- .../bpf/progs/test_cgroup1_hierarchy.c | 4 +- .../selftests/bpf/progs/test_kernel_flag.c | 28 ++++++++++++ .../bpf/progs/test_kfunc_dynptr_param.c | 6 +-- .../selftests/bpf/progs/test_lookup_key.c | 2 +- .../selftests/bpf/progs/test_ptr_untrusted.c | 2 +- .../bpf/progs/test_task_under_cgroup.c | 2 +- .../bpf/progs/test_verify_pkcs7_sig.c | 2 +- 14 files changed, 108 insertions(+), 33 deletions(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/kernel_flag.c create mode 100644 tools/testing/selftests/bpf/progs/test_kernel_flag.c -- 2.48.1

7 months, 3 weeks

4
5
0 0

[PATCH net-next 4/4] tools/testing/selftests/cgroup: add test for SO_PEERCGROUPID

by Alexander Mikhalitsyn

Cc: linux-kselftest(a)vger.kernel.org Cc: linux-kernel(a)vger.kernel.org Cc: netdev(a)vger.kernel.org Cc: cgroups(a)vger.kernel.org Cc: "David S. Miller" <davem(a)davemloft.net> Cc: Eric Dumazet <edumazet(a)google.com> Cc: Jakub Kicinski <kuba(a)kernel.org> Cc: Paolo Abeni <pabeni(a)redhat.com> Cc: Willem de Bruijn <willemb(a)google.com> Cc: Leon Romanovsky <leon(a)kernel.org> Cc: Arnd Bergmann <arnd(a)arndb.de> Cc: Christian Brauner <brauner(a)kernel.org> Cc: Kuniyuki Iwashima <kuniyu(a)amazon.com> Cc: Lennart Poettering <mzxreary(a)0pointer.de> Cc: Luca Boccassi <bluca(a)debian.org> Cc: Tejun Heo <tj(a)kernel.org> Cc: Johannes Weiner <hannes(a)cmpxchg.org> Cc: "Michal Koutný" <mkoutny(a)suse.com> Cc: Shuah Khan <shuah(a)kernel.org> Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn(a)canonical.com> --- tools/testing/selftests/cgroup/Makefile | 2 + .../selftests/cgroup/test_so_peercgroupid.c | 308 ++++++++++++++++++ 2 files changed, 310 insertions(+) create mode 100644 tools/testing/selftests/cgroup/test_so_peercgroupid.c diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile index 1b897152bab6..a932ff068081 100644 --- a/tools/testing/selftests/cgroup/Makefile +++ b/tools/testing/selftests/cgroup/Makefile @@ -16,6 +16,7 @@ TEST_GEN_PROGS += test_kill TEST_GEN_PROGS += test_kmem TEST_GEN_PROGS += test_memcontrol TEST_GEN_PROGS += test_pids +TEST_GEN_PROGS += test_so_peercgroupid TEST_GEN_PROGS += test_zswap LOCAL_HDRS += $(selfdir)/clone3/clone3_selftests.h $(selfdir)/pidfd/pidfd.h @@ -31,4 +32,5 @@ $(OUTPUT)/test_kill: cgroup_util.c $(OUTPUT)/test_kmem: cgroup_util.c $(OUTPUT)/test_memcontrol: cgroup_util.c $(OUTPUT)/test_pids: cgroup_util.c +$(OUTPUT)/test_so_peercgroupid: cgroup_util.c $(OUTPUT)/test_zswap: cgroup_util.c diff --git a/tools/testing/selftests/cgroup/test_so_peercgroupid.c b/tools/testing/selftests/cgroup/test_so_peercgroupid.c new file mode 100644 index 000000000000..2bf1f00a45c7 --- /dev/null +++ b/tools/testing/selftests/cgroup/test_so_peercgroupid.c @@ -0,0 +1,308 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +#define _GNU_SOURCE +#include <error.h> +#include <inttypes.h> +#include <limits.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/socket.h> +#include <linux/socket.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <sys/un.h> +#include <sys/signal.h> +#include <sys/types.h> +#include <sys/wait.h> + +#include "../kselftest_harness.h" +#include "cgroup_util.h" + +#define clean_errno() (errno == 0 ? "None" : strerror(errno)) +#define log_err(MSG, ...) \ + fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", __FILE__, __LINE__, \ + clean_errno(), ##__VA_ARGS__) + +#ifndef SO_PEERCGROUPID +#define SO_PEERCGROUPID 83 +#endif + +static void child_die() +{ + exit(1); +} + +struct sock_addr { + char sock_name[32]; + struct sockaddr_un listen_addr; + socklen_t addrlen; +}; + +FIXTURE(so_peercgroupid) +{ + int server; + pid_t client_pid; + int sync_sk[2]; + struct sock_addr server_addr; + struct sock_addr *client_addr; + char cgroup_root[PATH_MAX]; + char *test_cgroup1; + char *test_cgroup2; +}; + +FIXTURE_VARIANT(so_peercgroupid) +{ + int type; + bool abstract; +}; + +FIXTURE_VARIANT_ADD(so_peercgroupid, stream_pathname) +{ + .type = SOCK_STREAM, + .abstract = 0, +}; + +FIXTURE_VARIANT_ADD(so_peercgroupid, stream_abstract) +{ + .type = SOCK_STREAM, + .abstract = 1, +}; + +FIXTURE_VARIANT_ADD(so_peercgroupid, seqpacket_pathname) +{ + .type = SOCK_SEQPACKET, + .abstract = 0, +}; + +FIXTURE_VARIANT_ADD(so_peercgroupid, seqpacket_abstract) +{ + .type = SOCK_SEQPACKET, + .abstract = 1, +}; + +FIXTURE_VARIANT_ADD(so_peercgroupid, dgram_pathname) +{ + .type = SOCK_DGRAM, + .abstract = 0, +}; + +FIXTURE_VARIANT_ADD(so_peercgroupid, dgram_abstract) +{ + .type = SOCK_DGRAM, + .abstract = 1, +}; + +FIXTURE_SETUP(so_peercgroupid) +{ + self->client_addr = mmap(NULL, sizeof(*self->client_addr), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(MAP_FAILED, self->client_addr); + + self->cgroup_root[0] = '\0'; +} + +FIXTURE_TEARDOWN(so_peercgroupid) +{ + close(self->server); + + kill(self->client_pid, SIGKILL); + waitpid(self->client_pid, NULL, 0); + + if (!variant->abstract) { + unlink(self->server_addr.sock_name); + unlink(self->client_addr->sock_name); + } + + if (strlen(self->cgroup_root) > 0) { + cg_enter_current(self->cgroup_root); + + if (self->test_cgroup1) + cg_destroy(self->test_cgroup1); + free(self->test_cgroup1); + + if (self->test_cgroup2) + cg_destroy(self->test_cgroup2); + free(self->test_cgroup2); + } +} + +static void fill_sockaddr(struct sock_addr *addr, bool abstract) +{ + char *sun_path_buf = (char *)&addr->listen_addr.sun_path; + + addr->listen_addr.sun_family = AF_UNIX; + addr->addrlen = offsetof(struct sockaddr_un, sun_path); + snprintf(addr->sock_name, sizeof(addr->sock_name), "so_peercgroupid_%d", getpid()); + addr->addrlen += strlen(addr->sock_name); + if (abstract) { + *sun_path_buf = '\0'; + addr->addrlen++; + sun_path_buf++; + } else { + unlink(addr->sock_name); + } + memcpy(sun_path_buf, addr->sock_name, strlen(addr->sock_name)); +} + +static void client(FIXTURE_DATA(so_peercgroupid) *self, + const FIXTURE_VARIANT(so_peercgroupid) *variant) +{ + int cfd, err; + socklen_t len; + uint64_t peer_cgroup_id = 0, test_cgroup1_id = 0, test_cgroup2_id = 0; + char state; + + cfd = socket(AF_UNIX, variant->type, 0); + if (cfd < 0) { + log_err("socket"); + child_die(); + } + + if (variant->type == SOCK_DGRAM) { + fill_sockaddr(self->client_addr, variant->abstract); + + if (bind(cfd, (struct sockaddr *)&self->client_addr->listen_addr, self->client_addr->addrlen)) { + log_err("bind"); + child_die(); + } + } + + /* negative testcase: no peer for socket yet */ + len = sizeof(peer_cgroup_id); + err = getsockopt(cfd, SOL_SOCKET, SO_PEERCGROUPID, &peer_cgroup_id, &len); + if (!err || (errno != ENODATA)) { + log_err("getsockopt must fail with errno == ENODATA when socket has no peer"); + child_die(); + } + + if (connect(cfd, (struct sockaddr *)&self->server_addr.listen_addr, + self->server_addr.addrlen) != 0) { + log_err("connect"); + child_die(); + } + + state = 'R'; + write(self->sync_sk[1], &state, sizeof(state)); + + read(self->sync_sk[1], &test_cgroup1_id, sizeof(uint64_t)); + read(self->sync_sk[1], &test_cgroup2_id, sizeof(uint64_t)); + + len = sizeof(peer_cgroup_id); + if (getsockopt(cfd, SOL_SOCKET, SO_PEERCGROUPID, &peer_cgroup_id, &len)) { + log_err("Failed to get SO_PEERCGROUPID"); + child_die(); + } + + /* + * There is a difference between connection-oriented sockets + * and connectionless ones from the perspective of SO_PEERCGROUPID. + * + * sk->sk_cgrp_data is getting filled when we allocate struct sock (see call to cgroup_sk_alloc()). + * For DGRAM socket, self->server socket is our peer and by the time when we allocate it, + * parent process sits in a test_cgroup1. Then it changes cgroup to test_cgroup2, but it does not + * affect anything. + * For STREAM/SEQPACKET socket, self->server is not our peer, but that one we get from accept() + * syscall. And by the time when we call accept(), parent process sits in test_cgroup2. + * + * Let's ensure that it works like that and if it get changed then we should detect it + * as it's a clear UAPI change. + */ + if (variant->type == SOCK_DGRAM) { + /* cgroup id from SO_PEERCGROUPID should be equal to the test_cgroup1_id */ + if (peer_cgroup_id != test_cgroup1_id) { + log_err("peer_cgroup_id != test_cgroup1_id: %" PRId64 " != %" PRId64, peer_cgroup_id, test_cgroup1_id); + child_die(); + } + } else { + /* cgroup id from SO_PEERCGROUPID should be equal to the test_cgroup2_id */ + if (peer_cgroup_id != test_cgroup2_id) { + log_err("peer_cgroup_id != test_cgroup2_id: %" PRId64 " != %" PRId64, peer_cgroup_id, test_cgroup2_id); + child_die(); + } + } +} + +TEST_F(so_peercgroupid, test) +{ + uint64_t test_cgroup1_id, test_cgroup2_id; + int err; + int pfd; + char state; + int child_status = 0; + + if (cg_find_unified_root(self->cgroup_root, sizeof(self->cgroup_root), NULL)) + ksft_exit_skip("cgroup v2 isn't mounted\n"); + + self->test_cgroup1 = cg_name(self->cgroup_root, "so_peercgroupid_cg1"); + ASSERT_NE(NULL, self->test_cgroup1); + + self->test_cgroup2 = cg_name(self->cgroup_root, "so_peercgroupid_cg2"); + ASSERT_NE(NULL, self->test_cgroup2); + + err = cg_create(self->test_cgroup1); + ASSERT_EQ(0, err); + + err = cg_create(self->test_cgroup2); + ASSERT_EQ(0, err); + + test_cgroup1_id = cg_get_id(self->test_cgroup1); + ASSERT_LT(0, test_cgroup1_id); + + test_cgroup2_id = cg_get_id(self->test_cgroup2); + ASSERT_LT(0, test_cgroup2_id); + + /* enter test_cgroup1 before allocating a socket */ + err = cg_enter_current(self->test_cgroup1); + ASSERT_EQ(0, err); + + self->server = socket(AF_UNIX, variant->type, 0); + ASSERT_NE(-1, self->server); + + /* enter test_cgroup2 after allocating a socket */ + err = cg_enter_current(self->test_cgroup2); + ASSERT_EQ(0, err); + + fill_sockaddr(&self->server_addr, variant->abstract); + + err = bind(self->server, (struct sockaddr *)&self->server_addr.listen_addr, self->server_addr.addrlen); + ASSERT_EQ(0, err); + + if (variant->type != SOCK_DGRAM) { + err = listen(self->server, 1); + ASSERT_EQ(0, err); + } + + err = socketpair(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0, self->sync_sk); + EXPECT_EQ(err, 0); + + self->client_pid = fork(); + ASSERT_NE(-1, self->client_pid); + if (self->client_pid == 0) { + close(self->server); + close(self->sync_sk[0]); + client(self, variant); + exit(0); + } + close(self->sync_sk[1]); + + if (variant->type != SOCK_DGRAM) { + pfd = accept(self->server, NULL, NULL); + ASSERT_NE(-1, pfd); + } else { + pfd = self->server; + } + + /* wait until the child arrives at checkpoint */ + read(self->sync_sk[0], &state, sizeof(state)); + ASSERT_EQ(state, 'R'); + + write(self->sync_sk[0], &test_cgroup1_id, sizeof(uint64_t)); + write(self->sync_sk[0], &test_cgroup2_id, sizeof(uint64_t)); + + close(pfd); + waitpid(self->client_pid, &child_status, 0); + ASSERT_EQ(0, WIFEXITED(child_status) ? WEXITSTATUS(child_status) : 1); +} + +TEST_HARNESS_MAIN -- 2.43.0

7 months, 3 weeks

3
2
0 0

[PATCH v12 3/3] selftests/rseq: Add test for mm_cid compaction

by Gabriele Monaco

A task in the kernel (task_mm_cid_work) runs somewhat periodically to compact the mm_cid for each process. Add a test to validate that it runs correctly and timely. The test spawns 1 thread pinned to each CPU, then each thread, including the main one, runs in short bursts for some time. During this period, the mm_cids should be spanning all numbers between 0 and nproc. At the end of this phase, a thread with high enough mm_cid (>= nproc/2) is selected to be the new leader, all other threads terminate. After some time, the only running thread should see 0 as mm_cid, if that doesn't happen, the compaction mechanism didn't work and the test fails. The test never fails if only 1 core is available, in which case, we cannot test anything as the only available mm_cid is 0. Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com> Signed-off-by: Gabriele Monaco <gmonaco(a)redhat.com> --- tools/testing/selftests/rseq/.gitignore | 1 + tools/testing/selftests/rseq/Makefile | 2 +- .../selftests/rseq/mm_cid_compaction_test.c | 200 ++++++++++++++++++ 3 files changed, 202 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/rseq/mm_cid_compaction_test.c diff --git a/tools/testing/selftests/rseq/.gitignore b/tools/testing/selftests/rseq/.gitignore index 16496de5f6ce4..2c89f97e4f737 100644 --- a/tools/testing/selftests/rseq/.gitignore +++ b/tools/testing/selftests/rseq/.gitignore @@ -3,6 +3,7 @@ basic_percpu_ops_test basic_percpu_ops_mm_cid_test basic_test basic_rseq_op_test +mm_cid_compaction_test param_test param_test_benchmark param_test_compare_twice diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile index 5a3432fceb586..ce1b38f46a355 100644 --- a/tools/testing/selftests/rseq/Makefile +++ b/tools/testing/selftests/rseq/Makefile @@ -16,7 +16,7 @@ OVERRIDE_TARGETS = 1 TEST_GEN_PROGS = basic_test basic_percpu_ops_test basic_percpu_ops_mm_cid_test param_test \ param_test_benchmark param_test_compare_twice param_test_mm_cid \ - param_test_mm_cid_benchmark param_test_mm_cid_compare_twice + param_test_mm_cid_benchmark param_test_mm_cid_compare_twice mm_cid_compaction_test TEST_GEN_PROGS_EXTENDED = librseq.so diff --git a/tools/testing/selftests/rseq/mm_cid_compaction_test.c b/tools/testing/selftests/rseq/mm_cid_compaction_test.c new file mode 100644 index 0000000000000..7ddde3b657dd6 --- /dev/null +++ b/tools/testing/selftests/rseq/mm_cid_compaction_test.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: LGPL-2.1 +#define _GNU_SOURCE +#include <assert.h> +#include <pthread.h> +#include <sched.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stddef.h> + +#include "../kselftest.h" +#include "rseq.h" + +#define VERBOSE 0 +#define printf_verbose(fmt, ...) \ + do { \ + if (VERBOSE) \ + printf(fmt, ##__VA_ARGS__); \ + } while (0) + +/* 0.5 s */ +#define RUNNER_PERIOD 500000 +/* Number of runs before we terminate or get the token */ +#define THREAD_RUNS 5 + +/* + * Number of times we check that the mm_cid were compacted. + * Checks are repeated every RUNNER_PERIOD. + */ +#define MM_CID_COMPACT_TIMEOUT 10 + +struct thread_args { + int cpu; + int num_cpus; + pthread_mutex_t *token; + pthread_barrier_t *barrier; + pthread_t *tinfo; + struct thread_args *args_head; +}; + +static void __noreturn *thread_runner(void *arg) +{ + struct thread_args *args = arg; + int i, ret, curr_mm_cid; + cpu_set_t cpumask; + + CPU_ZERO(&cpumask); + CPU_SET(args->cpu, &cpumask); + ret = pthread_setaffinity_np(pthread_self(), sizeof(cpumask), &cpumask); + if (ret) { + errno = ret; + perror("Error: failed to set affinity"); + abort(); + } + pthread_barrier_wait(args->barrier); + + for (i = 0; i < THREAD_RUNS; i++) + usleep(RUNNER_PERIOD); + curr_mm_cid = rseq_current_mm_cid(); + /* + * We select one thread with high enough mm_cid to be the new leader. + * All other threads (including the main thread) will terminate. + * After some time, the mm_cid of the only remaining thread should + * converge to 0, if not, the test fails. + */ + if (curr_mm_cid >= args->num_cpus / 2 && + !pthread_mutex_trylock(args->token)) { + printf_verbose( + "cpu%d has mm_cid=%d and will be the new leader.\n", + sched_getcpu(), curr_mm_cid); + for (i = 0; i < args->num_cpus; i++) { + if (args->tinfo[i] == pthread_self()) + continue; + ret = pthread_join(args->tinfo[i], NULL); + if (ret) { + errno = ret; + perror("Error: failed to join thread"); + abort(); + } + } + pthread_barrier_destroy(args->barrier); + free(args->tinfo); + free(args->token); + free(args->barrier); + free(args->args_head); + + for (i = 0; i < MM_CID_COMPACT_TIMEOUT; i++) { + curr_mm_cid = rseq_current_mm_cid(); + printf_verbose("run %d: mm_cid=%d on cpu%d.\n", i, + curr_mm_cid, sched_getcpu()); + if (curr_mm_cid == 0) + exit(EXIT_SUCCESS); + usleep(RUNNER_PERIOD); + } + exit(EXIT_FAILURE); + } + printf_verbose("cpu%d has mm_cid=%d and is going to terminate.\n", + sched_getcpu(), curr_mm_cid); + pthread_exit(NULL); +} + +int test_mm_cid_compaction(void) +{ + cpu_set_t affinity; + int i, j, ret = 0, num_threads; + pthread_t *tinfo; + pthread_mutex_t *token; + pthread_barrier_t *barrier; + struct thread_args *args; + + sched_getaffinity(0, sizeof(affinity), &affinity); + num_threads = CPU_COUNT(&affinity); + tinfo = calloc(num_threads, sizeof(*tinfo)); + if (!tinfo) { + perror("Error: failed to allocate tinfo"); + return -1; + } + args = calloc(num_threads, sizeof(*args)); + if (!args) { + perror("Error: failed to allocate args"); + ret = -1; + goto out_free_tinfo; + } + token = malloc(sizeof(*token)); + if (!token) { + perror("Error: failed to allocate token"); + ret = -1; + goto out_free_args; + } + barrier = malloc(sizeof(*barrier)); + if (!barrier) { + perror("Error: failed to allocate barrier"); + ret = -1; + goto out_free_token; + } + if (num_threads == 1) { + fprintf(stderr, "Cannot test on a single cpu. " + "Skipping mm_cid_compaction test.\n"); + /* only skipping the test, this is not a failure */ + goto out_free_barrier; + } + pthread_mutex_init(token, NULL); + ret = pthread_barrier_init(barrier, NULL, num_threads); + if (ret) { + errno = ret; + perror("Error: failed to initialise barrier"); + goto out_free_barrier; + } + for (i = 0, j = 0; i < CPU_SETSIZE && j < num_threads; i++) { + if (!CPU_ISSET(i, &affinity)) + continue; + args[j].num_cpus = num_threads; + args[j].tinfo = tinfo; + args[j].token = token; + args[j].barrier = barrier; + args[j].cpu = i; + args[j].args_head = args; + if (!j) { + /* The first thread is the main one */ + tinfo[0] = pthread_self(); + ++j; + continue; + } + ret = pthread_create(&tinfo[j], NULL, thread_runner, &args[j]); + if (ret) { + errno = ret; + perror("Error: failed to create thread"); + abort(); + } + ++j; + } + printf_verbose("Started %d threads.\n", num_threads); + + /* Also main thread will terminate if it is not selected as leader */ + thread_runner(&args[0]); + + /* only reached in case of errors */ +out_free_barrier: + free(barrier); +out_free_token: + free(token); +out_free_args: + free(args); +out_free_tinfo: + free(tinfo); + + return ret; +} + +int main(int argc, char **argv) +{ + if (!rseq_mm_cid_available()) { + fprintf(stderr, "Error: rseq_mm_cid unavailable\n"); + return -1; + } + if (test_mm_cid_compaction()) + return -1; + return 0; +} -- 2.48.1

7 months, 3 weeks

1
0
0 0

[PATCH v8 net-next 0/3] DUALPI2 patch

by chia-yu.chang＠nokia-bell-labs.com

From: Chia-Yu Chang <chia-yu.chang(a)nokia-bell-labs.com> Hello, Please find DUALPI2 patch v8. v8 - Fix warning messages in v7 v7 - Separate into 3 patches to avoid mixing changes of documentation, selftest, and code. (Cong Wang <xiyou.wangcong(a)gmail.com>) v6 - Add modprobe for dulapi2 in tc-testing script tc-testing/tdc.sh (Jakub Kicinski <kuba(a)kernel.org>) - Update test cases in dualpi2.json - Update commit message v5 - A comparison was done between MQ + DUALPI2, MQ + FQ_PIE, MQ + FQ_CODEL: Unshaped 1gigE with 4 download streams test: - Summary of tcp_4down run 'MQ + FQ_CODEL': avg median # data pts Ping (ms) ICMP : 1.19 1.34 ms 349 TCP download avg : 235.42 N/A Mbits/s 349 TCP download sum : 941.68 N/A Mbits/s 349 TCP download::1 : 235.19 235.39 Mbits/s 349 TCP download::2 : 235.03 235.35 Mbits/s 349 TCP download::3 : 236.89 235.44 Mbits/s 349 TCP download::4 : 234.57 235.19 Mbits/s 349 - Summary of tcp_4down run 'MQ + FQ_PIE' avg median # data pts Ping (ms) ICMP : 1.21 1.37 ms 350 TCP download avg : 235.42 N/A Mbits/s 350 TCP download sum : 941.61 N/A Mbits/s 350 TCP download::1 : 232.54 233.13 Mbits/s 350 TCP download::2 : 232.52 232.80 Mbits/s 350 TCP download::3 : 233.14 233.78 Mbits/s 350 TCP download::4 : 243.41 241.48 Mbits/s 350 - Summary of tcp_4down run 'MQ + DUALPI2' avg median # data pts Ping (ms) ICMP : 1.19 1.34 ms 349 TCP download avg : 235.42 N/A Mbits/s 349 TCP download sum : 941.68 N/A Mbits/s 349 TCP download::1 : 235.19 235.39 Mbits/s 349 TCP download::2 : 235.03 235.35 Mbits/s 349 TCP download::3 : 236.89 235.44 Mbits/s 349 TCP download::4 : 234.57 235.19 Mbits/s 349 Unshaped 1gigE with 128 download streams test: - Summary of tcp_128down run 'MQ + FQ_CODEL': avg median # data pts Ping (ms) ICMP : 1.88 1.86 ms 350 TCP download avg : 7.39 N/A Mbits/s 350 TCP download sum : 946.47 N/A Mbits/s 350 - Summary of tcp_128down run 'MQ + FQ_PIE': avg median # data pts Ping (ms) ICMP : 1.88 1.86 ms 350 TCP download avg : 7.39 N/A Mbits/s 350 TCP download sum : 946.47 N/A Mbits/s 350 - Summary of tcp_128down run 'MQ + DUALPI2': avg median # data pts Ping (ms) ICMP : 1.88 1.86 ms 350 TCP download avg : 7.39 N/A Mbits/s 350 TCP download sum : 946.47 N/A Mbits/s 350 Unshaped 10gigE with 4 download streams test: - Summary of tcp_4down run 'MQ + FQ_CODEL': avg median # data pts Ping (ms) ICMP : 0.22 0.23 ms 350 TCP download avg : 2354.08 N/A Mbits/s 350 TCP download sum : 9416.31 N/A Mbits/s 350 TCP download::1 : 2353.65 2352.81 Mbits/s 350 TCP download::2 : 2354.54 2354.21 Mbits/s 350 TCP download::3 : 2353.56 2353.78 Mbits/s 350 TCP download::4 : 2354.56 2354.45 Mbits/s 350 - Summary of tcp_4down run 'MQ + FQ_PIE': avg median # data pts Ping (ms) ICMP : 0.20 0.19 ms 350 TCP download avg : 2354.76 N/A Mbits/s 350 TCP download sum : 9419.04 N/A Mbits/s 350 TCP download::1 : 2354.77 2353.89 Mbits/s 350 TCP download::2 : 2353.41 2354.29 Mbits/s 350 TCP download::3 : 2356.18 2354.19 Mbits/s 350 TCP download::4 : 2354.68 2353.15 Mbits/s 350 - Summary of tcp_4down run 'MQ + DUALPI2': avg median # data pts Ping (ms) ICMP : 0.24 0.24 ms 350 TCP download avg : 2354.11 N/A Mbits/s 350 TCP download sum : 9416.43 N/A Mbits/s 350 TCP download::1 : 2354.75 2353.93 Mbits/s 350 TCP download::2 : 2353.15 2353.75 Mbits/s 350 TCP download::3 : 2353.49 2353.72 Mbits/s 350 TCP download::4 : 2355.04 2353.73 Mbits/s 350 Unshaped 10gigE with 128 download streams test: - Summary of tcp_128down run 'MQ + FQ_CODEL': avg median # data pts Ping (ms) ICMP : 7.57 8.69 ms 350 TCP download avg : 73.97 N/A Mbits/s 350 TCP download sum : 9467.82 N/A Mbits/s 350 - Summary of tcp_128down run 'MQ + FQ_PIE': avg median # data pts Ping (ms) ICMP : 7.82 8.91 ms 350 TCP download avg : 73.97 N/A Mbits/s 350 TCP download sum : 9468.42 N/A Mbits/s 350 - Summary of tcp_128down run 'MQ + DUALPI2': avg median # data pts Ping (ms) ICMP : 6.87 7.93 ms 350 TCP download avg : 73.95 N/A Mbits/s 350 TCP download sum : 9465.87 N/A Mbits/s 350 From the results shown above, we see small differences between combinations. - Update commit message to include results of no_split_gso and split_gso (Dave Taht <dave.taht(a)gmail.com> and Paolo Abeni <pabeni(a)redhat.com>) - Add memlimit in dualpi2 attribute, and add memory_used, max_memory_used, memory_limit in dualpi2 stats (Dave Taht <dave.taht(a)gmail.com>) - Update note in sch_dualpi2.c related to BBRv3 status (Dave Taht <dave.taht(a)gmail.com>) - Update license identifier (Dave Taht <dave.taht(a)gmail.com>) - Add selftest in tools/testing/selftests/tc-testing (Cong Wang <xiyou.wangcong(a)gmail.com>) - Use netlink policies for parameter checks (Jamal Hadi Salim <jhs(a)mojatatu.com>) - Modify texts & fix typos in Documentation/netlink/specs/tc.yaml (Dave Taht <dave.taht(a)gmail.com>) - Add dscsriptions of packet counter statistics and reset function of sch_dualpi2.c - Fix step_thresh in packets - Update code comments in sch_dualpi2.c v4 - Update statement in Kconfig for DualPI2 (Stephen Hemminger <stephen(a)networkplumber.org>) - Put a blank line after #define in sch_dualpi2.c (Stephen Hemminger <stephen(a)networkplumber.org>) - Fix line length warning v3 - Fix compilaiton error - Update Documentation/netlink/specs/tc.yaml (Jakub Kicinski <kuba(a)kernel.org>) v2 - Add Documentation/netlink/specs/tc.yaml (Jakub Kicinski <kuba(a)kernel.org>) - Use dualpi2 instead of skb prefix (Jamal Hadi Salim <jhs(a)mojatatu.com>) - Replace nla_parse_nested_deprecated with nla_parse_nested (Jamal Hadi Salim <jhs(a)mojatatu.com>) - Fix line length warning For more details of DualPI2, plesae refer IETF RFC9332 (https://datatracker.ietf.org/doc/html/rfc9332). Best regards, Chia-Yu Chia-Yu Chang (2): Documentation: netlink: specs: tc: Add DualPI2 specification selftests/tc-testing: Add selftests for qdisc DualPI2 Koen De Schepper (1): sched: Add dualpi2 qdisc Documentation/netlink/specs/tc.yaml | 140 +++ include/linux/netdevice.h | 1 + include/uapi/linux/pkt_sched.h | 38 + net/sched/Kconfig | 12 + net/sched/Makefile | 1 + net/sched/sch_dualpi2.c | 1082 +++++++++++++++++ tools/testing/selftests/tc-testing/config | 1 + .../tc-testing/tc-tests/qdiscs/dualpi2.json | 149 +++ tools/testing/selftests/tc-testing/tdc.sh | 1 + 9 files changed, 1425 insertions(+) create mode 100644 net/sched/sch_dualpi2.c create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/dualpi2.json -- 2.34.1

7 months, 3 weeks

2
4
0 0

[PATCH] selftests/bpf: Convert comma to semicolon

by Chen Ni

Replace comma between expressions with semicolons. Using a ',' in place of a ';' can have unintended side effects. Although that is not the case here, it is seems best to use ';' unless ',' is intended. Found by inspection. No functional change intended. Compile tested only. Signed-off-by: Chen Ni <nichen(a)iscas.ac.cn> --- tools/testing/selftests/bpf/prog_tests/fd_array.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/fd_array.c b/tools/testing/selftests/bpf/prog_tests/fd_array.c index a1d52e73fb16..9add890c2d37 100644 --- a/tools/testing/selftests/bpf/prog_tests/fd_array.c +++ b/tools/testing/selftests/bpf/prog_tests/fd_array.c @@ -83,8 +83,8 @@ static inline int bpf_prog_get_map_ids(int prog_fd, __u32 *nr_map_ids, __u32 *ma int err; memset(&info, 0, len); - info.nr_map_ids = *nr_map_ids, - info.map_ids = ptr_to_u64(map_ids), + info.nr_map_ids = *nr_map_ids; + info.map_ids = ptr_to_u64(map_ids); err = bpf_prog_get_info_by_fd(prog_fd, &info, &len); if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd")) -- 2.25.1

7 months, 3 weeks

3
2
0 0

[PATCH v21 00/24] Introducing OpenVPN Data Channel Offload

by Antonio Quartulli

Notable changes since v20: * removed newline at the end of message in NL_SET_ERR_MSG_FMT_MOD * dropped udp_init() and related build_protos() and directly use .encap_destroy [instead of overriding sk->close()] * defered peer_del() call to worker in case of transport errors, as we may be in non-sleepable context * used kfree() instead of kfree_rcu() when releasing socket as we just invoked synchronize_rcu() * fix comment in ovpn_tcp_parse() * invoked peer->tcp.sk_cb.prot->release_cb instead of explicitly calling tcp_release_cb. This way we don't need to export it again * moved call to peer_put() after call to peer->tcp.sk_cb.prot->close * moved switch(mode) inside ovpn_peers_free() * simplified skip logic in ovpn_peers_free() * fixed check to avoid passing a negative delay to keepalive's schedule_work() Please note that some patches were already reviewed/tested by a few people. These patches have retained the tags as they have hardly been touched. The latest code can also be found at: https://github.com/OpenVPN/ovpn-net-next Thanks a lot! Best Regards, Antonio Quartulli OpenVPN Inc. To: netdev(a)vger.kernel.org To: Eric Dumazet <edumazet(a)google.com> To: Jakub Kicinski <kuba(a)kernel.org> To: Paolo Abeni <pabeni(a)redhat.com> To: Donald Hunter <donald.hunter(a)gmail.com> To: Antonio Quartulli <antonio(a)openvpn.net> To: Shuah Khan <shuah(a)kernel.org> To: sd(a)queasysnail.net To: ryazanov.s.a(a)gmail.com To: Andrew Lunn <andrew+netdev(a)lunn.ch> Cc: Simon Horman <horms(a)kernel.org> Cc: linux-kernel(a)vger.kernel.org Cc: linux-kselftest(a)vger.kernel.org Cc: Xiao Liang <shaw.leon(a)gmail.com> Signed-off-by: Antonio Quartulli <antonio(a)openvpn.net> --- Antonio Quartulli (24): net: introduce OpenVPN Data Channel Offload (ovpn) ovpn: add basic netlink support ovpn: add basic interface creation/destruction/management routines ovpn: keep carrier always on for MP interfaces ovpn: introduce the ovpn_peer object ovpn: introduce the ovpn_socket object ovpn: implement basic TX path (UDP) ovpn: implement basic RX path (UDP) ovpn: implement packet processing ovpn: store tunnel and transport statistics ovpn: implement TCP transport skb: implement skb_send_sock_locked_with_flags() ovpn: add support for MSG_NOSIGNAL in tcp_sendmsg ovpn: implement multi-peer support ovpn: implement peer lookup logic ovpn: implement keepalive mechanism ovpn: add support for updating local UDP endpoint ovpn: add support for peer floating ovpn: implement peer add/get/dump/delete via netlink ovpn: implement key add/get/del/swap via netlink ovpn: kill key and notify userspace in case of IV exhaustion ovpn: notify userspace when a peer is deleted ovpn: add basic ethtool support testing/selftests: add test tool and scripts for ovpn module Documentation/netlink/specs/ovpn.yaml | 371 +++ Documentation/netlink/specs/rt_link.yaml | 16 + MAINTAINERS | 11 + drivers/net/Kconfig | 15 + drivers/net/Makefile | 1 + drivers/net/ovpn/Makefile | 22 + drivers/net/ovpn/bind.c | 55 + drivers/net/ovpn/bind.h | 101 + drivers/net/ovpn/crypto.c | 211 ++ drivers/net/ovpn/crypto.h | 145 ++ drivers/net/ovpn/crypto_aead.c | 408 ++++ drivers/net/ovpn/crypto_aead.h | 33 + drivers/net/ovpn/io.c | 462 ++++ drivers/net/ovpn/io.h | 34 + drivers/net/ovpn/main.c | 339 +++ drivers/net/ovpn/main.h | 14 + drivers/net/ovpn/netlink-gen.c | 213 ++ drivers/net/ovpn/netlink-gen.h | 41 + drivers/net/ovpn/netlink.c | 1249 ++++++++++ drivers/net/ovpn/netlink.h | 18 + drivers/net/ovpn/ovpnpriv.h | 57 + drivers/net/ovpn/peer.c | 1352 +++++++++++ drivers/net/ovpn/peer.h | 163 ++ drivers/net/ovpn/pktid.c | 129 ++ drivers/net/ovpn/pktid.h | 87 + drivers/net/ovpn/proto.h | 118 + drivers/net/ovpn/skb.h | 61 + drivers/net/ovpn/socket.c | 244 ++ drivers/net/ovpn/socket.h | 49 + drivers/net/ovpn/stats.c | 21 + drivers/net/ovpn/stats.h | 47 + drivers/net/ovpn/tcp.c | 592 +++++ drivers/net/ovpn/tcp.h | 36 + drivers/net/ovpn/udp.c | 442 ++++ drivers/net/ovpn/udp.h | 25 + include/linux/skbuff.h | 2 + include/uapi/linux/if_link.h | 15 + include/uapi/linux/ovpn.h | 110 + include/uapi/linux/udp.h | 1 + net/core/skbuff.c | 18 +- net/ipv6/af_inet6.c | 1 + net/ipv6/udp.c | 1 + tools/testing/selftests/Makefile | 1 + tools/testing/selftests/net/ovpn/.gitignore | 2 + tools/testing/selftests/net/ovpn/Makefile | 31 + tools/testing/selftests/net/ovpn/common.sh | 92 + tools/testing/selftests/net/ovpn/config | 10 + tools/testing/selftests/net/ovpn/data64.key | 5 + tools/testing/selftests/net/ovpn/ovpn-cli.c | 2395 ++++++++++++++++++++ tools/testing/selftests/net/ovpn/tcp_peers.txt | 5 + .../testing/selftests/net/ovpn/test-chachapoly.sh | 9 + .../selftests/net/ovpn/test-close-socket-tcp.sh | 9 + .../selftests/net/ovpn/test-close-socket.sh | 45 + tools/testing/selftests/net/ovpn/test-float.sh | 9 + tools/testing/selftests/net/ovpn/test-tcp.sh | 9 + tools/testing/selftests/net/ovpn/test.sh | 113 + tools/testing/selftests/net/ovpn/udp_peers.txt | 5 + 57 files changed, 10065 insertions(+), 5 deletions(-) --- base-commit: fb05579a176f7bccc8d279665fc0e46dfed43dfb change-id: 20250304-b4-ovpn-tmp-153379e78603 Best regards, -- Antonio Quartulli <antonio(a)openvpn.net>

7 months, 3 weeks

2
45
0 0

[PATCH v15 00/15] PCI: EP: Add RC-to-EP doorbell with platform MSI controller

by Frank Li

┌────────────┐ ┌───────────────────────────────────┐ ┌────────────────┐ │ │ │ │ │ │ │ │ │ PCI Endpoint │ │ PCI Host │ │ │ │ │ │ │ │ │◄──┤ 1.platform_msi_domain_alloc_irqs()│ │ │ │ │ │ │ │ │ │ MSI ├──►│ 2.write_msi_msg() ├──►├─BAR<n> │ │ Controller │ │ update doorbell register address│ │ │ │ │ │ for BAR │ │ │ │ │ │ │ │ 3. Write BAR<n>│ │ │◄──┼───────────────────────────────────┼───┤ │ │ │ │ │ │ │ │ ├──►│ 4.Irq Handle │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └────────────┘ └───────────────────────────────────┘ └────────────────┘ This patches based on old https://lore.kernel.org/imx/20221124055036.1630573-1-Frank.Li@nxp.com/ Original patch only target to vntb driver. But actually it is common method. This patches add new API to pci-epf-core, so any EP driver can use it. Previous v2 discussion here. https://lore.kernel.org/imx/20230911220920.1817033-1-Frank.Li@nxp.com/ Changes in v15: - rebase to v6.14-rc1 - fix build issue find by kernel test robot - Link to v14: https://lore.kernel.org/r/20250207-ep-msi-v14-0-9671b136f2b8@nxp.com Changes in v14: Marc Zyngier raised concerns about adding DOMAIN_BUS_DEVICE_PCI_EP_MSI. As a result, the approach has been reverted to the v9 method. However, there are several improvements: MSI now supports msi-map in addition to msi-parent. - The struct device: id is used as the endpoint function (EPF) device identity to map to the stream ID (sideband information). - The EPC device tree source (DTS) utilizes msi-map to provide such information. - The EPF device's of_node is set to the EPC controller’s node. This approach is commonly used for multi-function device (MFD) platform child devices, allowing them to inherit properties from the MFD device’s DTS, such as reset-cells and gpio-cells. This method is well-suited for the current case, as the EPF is inherently created/binded to the EPC and should inherit the EPC’s DTS node properties. Additionally: Since the basic IMX95 LUT support has already been merged into the mainline, a DTS and driver increment patch is added to complete the solution. The patch is rebased onto the latest linux-next tree and aligned with the new pcitest framework. - Link to v13: https://lore.kernel.org/r/20241218-ep-msi-v13-0-646e2192dc24@nxp.com Changes in v13: - Change to use DOMAIN_BUS_PCI_DEVICE_EP_MSI - Change request id as func | vfunc << 3 - Remove IRQ_DOMAIN_MSI_IMMUTABLE Thomas Gleixner: I hope capture all your points in review comments. If missed, let me know. - Link to v12: https://lore.kernel.org/r/20241211-ep-msi-v12-0-33d4532fa520@nxp.com Changes in v12: - Change to use IRQ_DOMAIN_MSI_IMMUTABLE and add help function irq_domain_msi_is_immuatble(). - split PCI: endpoint: pci-ep-msi: Add MSI address/data pair mutable check to 3 patches - Link to v11: https://lore.kernel.org/r/20241209-ep-msi-v11-0-7434fa8397bd@nxp.com Changes in v11: - Change to use MSI_FLAG_MSG_IMMUTABLE - Link to v10: https://lore.kernel.org/r/20241204-ep-msi-v10-0-87c378dbcd6d@nxp.com Changes in v10: Thomas Gleixner: There are big change in pci-ep-msi.c. I am sure if go on the corrent path. The key improvement is remove only 1 function devices's limitation. I use new patch for imutable check, which relative additional feature compared to base enablement patch. - Remove patch Add msi_remove_device_irq_domain() in platform_device_msi_free_irqs_all() - Add new patch irqchip/gic-v3-its: Avoid overwriting msi_prepare callback if provided by msi_domain_info - Remove only support 1 endpoint function limiation. - Create one MSI domain for each endpoint function devices. - Use "msi-map" in pci ep controler node, instead of of msi-parent. first argument is (func_no << 8 | vfunc_no) - Link to v9: https://lore.kernel.org/r/20241203-ep-msi-v9-0-a60dbc3f15dd@nxp.com Changes in v9 - Add patch platform-msi: Add msi_remove_device_irq_domain() in platform_device_msi_free_irqs_all() - Remove patch PCI: endpoint: Add pci_epc_get_fn() API for customizable filtering - Remove API pci_epf_align_inbound_addr_lo_hi - Move doorbell_alloc in to doorbell_enable function. - Link to v8: https://lore.kernel.org/r/20241116-ep-msi-v8-0-6f1f68ffd1bb@nxp.com Changes in v8: - update helper function name to pci_epf_align_inbound_addr() - Link to v7: https://lore.kernel.org/r/20241114-ep-msi-v7-0-d4ac7aafbd2c@nxp.com Changes in v7: - Add helper function pci_epf_align_addr(); - Link to v6: https://lore.kernel.org/r/20241112-ep-msi-v6-0-45f9722e3c2a@nxp.com Changes in v6: - change doorbell_addr to doorbell_offset - use round_down() - add Niklas's test by tag - rebase to pci/endpoint - Link to v5: https://lore.kernel.org/r/20241108-ep-msi-v5-0-a14951c0d007@nxp.com Changes in v5: - Move request_irq to epf test function driver for more flexiable user case - Add fixed size bar handler - Some minor improvememtn to see each patches's changelog. - Link to v4: https://lore.kernel.org/r/20241031-ep-msi-v4-0-717da2d99b28@nxp.com Changes in v4: - Remove patch genirq/msi: Add cleanup guard define for msi_lock_descs()/msi_unlock_descs() - Use new method to avoid compatible problem. Add new command DOORBELL_ENABLE and DOORBELL_DISABLE. pcitest -B send DOORBELL_ENABLE first, EP test function driver try to remap one of BAR_N (except test register bar) to ITS MSI MMIO space. Old driver don't support new command, so failure return, not side effect. After test, DOORBELL_DISABLE command send out to recover original map, so pcitest bar test can pass as normal. - Other detail change see each patches's change log - Link to v3: https://lore.kernel.org/r/20241015-ep-msi-v3-0-cedc89a16c1a@nxp.com Change from v2 to v3 - Fixed manivannan's comments - Move common part to pci-ep-msi.c and pci-ep-msi.h - rebase to 6.12-rc1 - use RevID to distingiush old version mkdir /sys/kernel/config/pci_ep/functions/pci_epf_test/func1 echo 16 > /sys/kernel/config/pci_ep/functions/pci_epf_test/func1/msi_interrupts echo 0x080c > /sys/kernel/config/pci_ep/functions/pci_epf_test/func1/deviceid echo 0x1957 > /sys/kernel/config/pci_ep/functions/pci_epf_test/func1/vendorid echo 1 > /sys/kernel/config/pci_ep/functions/pci_epf_test/func1/revid ^^^^^^ to enable platform msi support. ln -s /sys/kernel/config/pci_ep/functions/pci_epf_test/func1 /sys/kernel/config/pci_ep/controllers/4c380000.pcie-ep - use new device ID, which identify support doorbell to avoid broken compatility. Enable doorbell support only for PCI_DEVICE_ID_IMX8_DB, while other devices keep the same behavior as before. EP side RC with old driver RC with new driver PCI_DEVICE_ID_IMX8_DB no probe doorbell enabled Other device ID doorbell disabled* doorbell disabled* * Behavior remains unchanged. Change from v1 to v2 - Add missed patch for endpont/pci-epf-test.c - Move alloc and free to epc driver from epf. - Provide general help function for EPC driver to alloc platform msi irq. - Fixed manivannan's comments. Signed-off-by: Frank Li <Frank.Li(a)nxp.com> --- Frank Li (15): platform-msi: Add msi_remove_device_irq_domain() in platform_device_msi_free_irqs_all() irqdomain: Add IRQ_DOMAIN_FLAG_MSI_IMMUTABLE and irq_domain_is_msi_immutable() irqchip/gic-v3-its: Set IRQ_DOMAIN_FLAG_MSI_IMMUTABLE for ITS irqchip/gic-v3-its: Add support for device tree msi-map and msi-mask PCI: endpoint: Set ID and of_node for function driver PCI: endpoint: Add RC-to-EP doorbell support using platform MSI controller PCI: endpoint: pci-ep-msi: Add MSI address/data pair mutable check PCI: endpoint: Add pci_epf_align_inbound_addr() helper for address alignment PCI: endpoint: pci-epf-test: Add doorbell test support misc: pci_endpoint_test: Add doorbell test case selftests: pci_endpoint: Add doorbell test case pci: imx6: Add helper function imx_pcie_add_lut_by_rid() pci: imx6: Add LUT setting for MSI/IOMMU in Endpoint mode arm64: dts: imx95: Add msi-map for pci-ep device arm64: dts: imx95-19x19-evk: Add PCIe1 endpoint function overlay file arch/arm64/boot/dts/freescale/Makefile | 3 + .../dts/freescale/imx95-19x19-evk-pcie1-ep.dtso | 21 ++++ arch/arm64/boot/dts/freescale/imx95.dtsi | 1 + drivers/base/platform-msi.c | 1 + drivers/irqchip/irq-gic-v3-its-msi-parent.c | 8 ++ drivers/irqchip/irq-gic-v3-its.c | 2 +- drivers/misc/pci_endpoint_test.c | 81 +++++++++++++ drivers/pci/controller/dwc/pci-imx6.c | 25 ++-- drivers/pci/endpoint/Makefile | 1 + drivers/pci/endpoint/functions/pci-epf-test.c | 132 +++++++++++++++++++++ drivers/pci/endpoint/pci-ep-msi.c | 90 ++++++++++++++ drivers/pci/endpoint/pci-epf-core.c | 48 ++++++++ include/linux/irqdomain.h | 7 ++ include/linux/pci-ep-msi.h | 28 +++++ include/linux/pci-epf.h | 21 ++++ include/uapi/linux/pcitest.h | 1 + .../selftests/pci_endpoint/pci_endpoint_test.c | 25 ++++ 17 files changed, 486 insertions(+), 9 deletions(-) --- base-commit: 2014c95afecee3e76ca4a56956a936e23283f05b change-id: 20241010-ep-msi-8b4cab33b1be Best regards, --- Frank Li <Frank.Li(a)nxp.com>

7 months, 3 weeks

4
28
0 0

[PATCH v3 net 0/8] eth: bnxt: fix several bugs in the bnxt module

by Taehee Yoo

The first fixes setting incorrect skb->truesize. When xdp-mb prog returns XDP_PASS, skb is allocated and initialized. Currently, The truesize is calculated as BNXT_RX_PAGE_SIZE * sinfo->nr_frags, but sinfo->nr_frags is flushed by napi_build_skb(). So, it stores sinfo before calling napi_build_skb() and then use it for calculate truesize. The second fixes kernel panic in the bnxt_queue_mem_alloc(). The bnxt_queue_mem_alloc() accesses rx ring descriptor. rx ring descriptors are allocated when the interface is up and it's freed when the interface is down. So, if bnxt_queue_mem_alloc() is called when the interface is down, kernel panic occurs. This patch makes the bnxt_queue_mem_alloc() return -ENETDOWN if rx ring descriptors are not allocated. The third patch fixes kernel panic in the bnxt_queue_{start | stop}(). When a queue is restarted bnxt_queue_{start | stop}() are called. These functions set MRU to 0 to stop packet flow and then to set up the remaining things. MRU variable is a member of vnic_info[] the first vnic_info is for default and the second is for ntuple. The first vnic_info is always allocated when interface is up, but the second is allocated only when ntuple is enabled. (ethtool -K eth0 ntuple <on | off>). Currently, the bnxt_queue_{start | stop}() access vnic_info[BNXT_VNIC_NTUPLE] regardless of whether ntuple is enabled or not. So kernel panic occurs. This patch make the bnxt_queue_{start | stop}() use bp->nr_vnics instead of BNXT_VNIC_NTUPLE. The fourth patch fixes a warning due to checksum state. The bnxt_rx_pkt() checks whether skb->ip_summed is not CHECKSUM_NONE before updating ip_summed. if ip_summed is not CHECKSUM_NONE, it WARNS about it. However, the bnxt_xdp_build_skb() is called in XDP-MB-PASS path and it updates ip_summed earlier than bnxt_rx_pkt(). So, in the XDP-MB-PASS path, the bnxt_rx_pkt() always warns about checksum. Updating ip_summed at the bnxt_xdp_build_skb() is unnecessary and duplicate, so it is removed. The fifth patch fixes a kernel panic in the bnxt_get_queue_stats{rx | tx}(). The bnxt_get_queue_stats{rx | tx}() callback functions are called when a queue is resetting. These internally access rx and tx rings without null check, but rings are allocated and initialized when the interface is up. So, these functions are called when the interface is down, it occurs a kernel panic. The sixth patch fixes memory leak in queue reset logic. When a queue is resetting, tpa_info is allocated for the new queue and tpa_info for an old queue is not used anymore. So it should be freed, but not. The seventh patch makes net_devmem_unbind_dmabuf() ignore -ENETDOWN. When devmem socket is closed, net_devmem_unbind_dmabuf() is called to unbind/release resources. If interface is down, the driver returns -ENETDOWN. The -ENETDOWN return value is not an actual error, because the interface will release resources when the interface is down. So, net_devmem_unbind_dmabuf() needs to ignore -ENETDOWN. The last patch adds XDP testcases to tools/testing/selftests/drivers/net/ping.py. v3: - Copy nr_frags instead of full copy. (1/8) - Add Review tags from Somnath. (3/8) - Add new patch for fixing kernel panic in the bnxt_get_queue_stats{rx | tx}(). (5/8) - Add new patch for fixing memory leak in queue reset. (6/8) v2: - Do not use num_frags in the bnxt_xdp_build_skb(). (1/6) - Add Review tags from Somnath and Jakub. (2/6) - Add new patch for fixing checksum warning. (4/6) - Add new patch for fixing warning in net_devmem_unbind_dmabuf(). (5/6) - Add new XDP testcases to ping.py (6/6) Taehee Yoo (8): eth: bnxt: fix truesize for mb-xdp-pass case eth: bnxt: return fail if interface is down in bnxt_queue_mem_alloc() eth: bnxt: do not use BNXT_VNIC_NTUPLE unconditionally in queue restart logic eth: bnxt: do not update checksum in bnxt_xdp_build_skb() eth: bnxt: fix kernel panic in the bnxt_get_queue_stats{rx | tx} eth: bnxt: fix memory leak in queue reset net: devmem: do not WARN conditionally after netdev_rx_queue_restart() selftests: drv-net: add xdp cases for ping.py drivers/net/ethernet/broadcom/bnxt/bnxt.c | 25 ++- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 13 +- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h | 3 +- net/core/devmem.c | 4 +- tools/testing/selftests/drivers/net/ping.py | 200 ++++++++++++++++-- .../testing/selftests/net/lib/xdp_dummy.bpf.c | 6 + 6 files changed, 220 insertions(+), 31 deletions(-) -- 2.34.1

7 months, 3 weeks

5
15
0 0

[PATCH v6 bpf-next 0/2] security: Propagate caller information in bpf hooks

by Blaise Boscaccy

Hello, While trying to implement an eBPF gatekeeper program, we ran into an issue whereas the LSM hooks are missing some relevant data. Certain subcommands passed to the bpf() syscall can be invoked from either the kernel or userspace. Additionally, some fields in the bpf_attr struct contain pointers, and depending on where the subcommand was invoked, they could point to either user or kernel memory. One example of this is the bpf_prog_load subcommand and its fd_array. This data is made available and used by the verifier but not made available to the LSM subsystem. This patchset simply exposes that information to applicable LSM hooks. Change list: - v5 -> v6 - fix regression caused by is_kernel renaming - simplify test logic - v4 -> v5 - merge v4 selftest breakout patch back into a single patch - change "is_kernel" to "kernel" - add selftest using new kernel flag - v3 -> v4 - split out selftest changes into a separate patch - v2 -> v3 - reorder params so that the new boolean flag is the last param - fixup function signatures in bpf selftests - v1 -> v2 - Pass a boolean flag in lieu of bpfptr_t Revisions: - v5 https://lore.kernel.org/bpf/20250307213651.3065714-1-bboscaccy@linux.micros… - v4 https://lore.kernel.org/bpf/20250304203123.3935371-1-bboscaccy@linux.micros… - v3 https://lore.kernel.org/bpf/20250303222416.3909228-1-bboscaccy@linux.micros… - v2 https://lore.kernel.org/bpf/20250228165322.3121535-1-bboscaccy@linux.micros… - v1 https://lore.kernel.org/bpf/20250226003055.1654837-1-bboscaccy@linux.micros… Blaise Boscaccy (2): security: Propagate caller information in bpf hooks selftests/bpf: Add a kernel flag test for LSM bpf hook include/linux/lsm_hook_defs.h | 6 +-- include/linux/security.h | 12 +++--- kernel/bpf/syscall.c | 10 ++--- security/security.c | 15 ++++--- security/selinux/hooks.c | 6 +-- .../selftests/bpf/prog_tests/kernel_flag.c | 43 +++++++++++++++++++ .../selftests/bpf/progs/rcu_read_lock.c | 3 +- .../bpf/progs/test_cgroup1_hierarchy.c | 4 +- .../selftests/bpf/progs/test_kernel_flag.c | 28 ++++++++++++ .../bpf/progs/test_kfunc_dynptr_param.c | 6 +-- .../selftests/bpf/progs/test_lookup_key.c | 2 +- .../selftests/bpf/progs/test_ptr_untrusted.c | 2 +- .../bpf/progs/test_task_under_cgroup.c | 2 +- .../bpf/progs/test_verify_pkcs7_sig.c | 2 +- 14 files changed, 108 insertions(+), 33 deletions(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/kernel_flag.c create mode 100644 tools/testing/selftests/bpf/progs/test_kernel_flag.c -- 2.48.1

7 months, 3 weeks

3
8
0 0

[PATCH 6.1.y] KVM: selftests: Fix build error due to assert in dirty_log_test

by Andrey Kalachev

Hi all. Please apply that forgotten patch to fix v6.1 KVM selftests broken build. Origin of the patch can be founded here [1] Regards, AK [1] https://lore.kernel.org/stable/20240403164230.1722018-1-rananta@google.com/ -- 2.30.2

7 months, 3 weeks

1
1
0 0

[PATCH] mm/huge_memory: drop beyond-EOF folios with the right number of refs.

by Zi Yan

When an after-split folio is large and needs to be dropped due to EOF, folio_put_refs(folio, folio_nr_pages(folio)) should be used to drop all page cache refs. Otherwise, the folio will not be freed, causing memory leak. This leak would happen on a filesystem with blocksize > page_size and a truncate is performed, where the blocksize makes folios split to >0 order ones, causing truncated folios not being freed. Fixes: c010d47f107f ("mm: thp: split huge page to any lower order pages") Reported-by: Hugh Dickins <hughd(a)google.com> Closes: https://lore.kernel.org/all/fcbadb7f-dd3e-21df-f9a7-2853b53183c4@google.com/ Cc: stable(a)vger.kernel.org Signed-off-by: Zi Yan <ziy(a)nvidia.com> --- mm/huge_memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 3d3ebdc002d5..373781b21e5c 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3304,7 +3304,7 @@ static void __split_huge_page(struct page *page, struct list_head *list, folio_account_cleaned(tail, inode_to_wb(folio->mapping->host)); __filemap_remove_folio(tail, NULL); - folio_put(tail); + folio_put_refs(tail, folio_nr_pages(tail)); } else if (!folio_test_anon(folio)) { __xa_store(&folio->mapping->i_pages, tail->index, tail, 0); -- 2.47.2

7 months, 3 weeks

1
0
0 0

[PATCH v9 0/8] Buddy allocator like (or non-uniform) folio split

by Zi Yan

Hi all, This patchset adds a new buddy allocator like (or non-uniform) large folio split from a order-n folio to order-m with m < n. It reduces 1. the total number of after-split folios from 2^(n-m) to n-m+1; 2. the amount of memory needed for multi-index xarray split from 2^(n/6-m/6) to n/6-m/6, assuming XA_CHUNK_SHIFT=6; 3. keep more large folios after a split from all order-m folios to order-(n-1) to order-m folios. For example, to split an order-9 to order-0, folio split generates 10 (or 11 for anonymous memory) folios instead of 512, allocates 1 xa_node instead of 8, and leaves 1 order-8, 1 order-7, ..., 1 order-1 and 2 order-0 folios (or 4 order-0 for anonymous memory) instead of 512 order-0 folios. Instead of duplicating existing split_huge_page*() code, __folio_split() is introduced as the shared backend code for both split_huge_page_to_list_to_order() and folio_split(). __folio_split() can support both uniform split and buddy allocator like (or non-uniform) split. All existing split_huge_page*() users can be gradually converted to use folio_split() if possible. In this patchset, I converted truncate_inode_partial_folio() to use folio_split(). xfstests quick group passed for both tmpfs and xfs. It is on top of mm-everything-2025-02-26-03-56 with V8 reverted. It is ready to be merged. Changelog === From V8[11]: 1. Removed gfp parameter from xas_try_split() and GFP_NOWAIT is used all the time. (per Baolin Wang) 2. Used __xas_init_node_for_split() instead of __xas_alloc_node_for_split() and moved node allocation out. It fixed a bug when xa_node is pre-allocated by xas_nomem() before xas_try_split() is called without being initialized for split. From V7[9]: 1. Fixed a wrong function name in lib/test_xarray.c. 2. Made __split_folio_to_order() never fail, since the old order check is already done in __folio_split(). (per David Hildenbrand) 3. Fixed an issue reported by syzbot[10] by not dropping the original folio during truncate. 4. Fixed a WARNING when READ_ONLY_THP_FOR_FS is enabled. (Thank David Hildenbrand for reporting the issue) 5. Used two separate struct page* parameters, split_at and lock_at, to specify at which subpage the non-uniform split happens and which subpage to keep locked after the split, respectively. It improves code readability. From V6[8]: 1. Added an xarray function xas_try_split() to support iterative folio split, removing the need of using xas_split_alloc() and xas_split(). The function guarantees that at most one xa_node is allocated for each call. 2. Added concrete numbers of after-split folios and xa_node savings to cover letter, commit log. (per Andrew) From V5[7]: 1. Split shmem to any lower order patches are in mm tree, so dropped from this series. 2. Rename split_folio_at() to try_folio_split() to clarify that non-uniform split will not be used if it is not supported. From V4[6]: 1. Enabled shmem support in both uniform and buddy allocator like split and added selftests for it. 2. Added functions to check if uniform split and buddy allocator like split are supported for the given folio and order. 3. Made truncate fall back to uniform split if buddy allocator split is not supported (CONFIG_READ_ONLY_THP_FOR_FS and FS without large folio). 4. Added the missing folio_clear_has_hwpoisoned() to __split_unmapped_folio(). From V3[5]: 1. Used xas_split_alloc(GFP_NOWAIT) instead of xas_nomem(), since extra operations inside xas_split_alloc() are needed for correctness. 2. Enabled folio_split() for shmem and no issue was found with xfstests quick test group. 3. Split both ends of a truncate range in truncate_inode_partial_folio() to avoid wasting memory in shmem truncate (per David Hildenbrand). 4. Removed page_in_folio_offset() since page_folio() does the same thing. 5. Finished truncate related tests from xfstests quick test group on XFS and tmpfs without issues. 6. Disabled buddy allocator like split on CONFIG_READ_ONLY_THP_FOR_FS and FS without large folio. This check was missed in the prior versions. From V2[3]: 1. Incorporated all the feedback from Kirill[4]. 2. Used GFP_NOWAIT for xas_nomem(). 3. Tested the code path when xas_nomem() fails. 4. Added selftests for folio_split(). 5. Fixed no THP config build error. From V1[2]: 1. Split the original patch 1 into multiple ones for easy review (per Kirill). 2. Added xas_destroy() to avoid memory leak. 3. Fixed nr_dropped not used error (per kernel test robot). 4. Added proper error handling when xas_nomem() fails to allocate memory for xas_split() during buddy allocator like split. From RFC[1]: 1. Merged backend code of split_huge_page_to_list_to_order() and folio_split(). The same code is used for both uniform split and buddy allocator like split. 2. Use xas_nomem() instead of xas_split_alloc() for folio_split(). 3. folio_split() now leaves the first after-split folio unlocked, instead of the one containing the given page, since the caller of truncate_inode_partial_folio() locks and unlocks the first folio. 4. Extended split_huge_page debugfs to use folio_split(). 5. Added truncate_inode_partial_folio() as first user of folio_split(). Design === folio_split() splits a large folio in the same way as buddy allocator splits a large free page for allocation. The purpose is to minimize the number of folios after the split. For example, if user wants to free the 3rd subpage in a order-9 folio, folio_split() will split the order-9 folio as: O-0, O-0, O-0, O-0, O-2, O-3, O-4, O-5, O-6, O-7, O-8 if it is anon O-1, O-0, O-0, O-2, O-3, O-4, O-5, O-6, O-7, O-9 if it is pagecache Since anon folio does not support order-1 yet. The split process is similar to existing approach: 1. Unmap all page mappings (split PMD mappings if exist); 2. Split meta data like memcg, page owner, page alloc tag; 3. Copy meta data in struct folio to sub pages, but instead of spliting the whole folio into multiple smaller ones with the same order in a shot, this approach splits the folio iteratively. Taking the example above, this approach first splits the original order-9 into two order-8, then splits left part of order-8 to two order-7 and so on; 4. Post-process split folios, like write mapping->i_pages for pagecache, adjust folio refcounts, add split folios to corresponding list; 5. Remap split folios 6. Unlock split folios. __split_unmapped_folio() and __split_folio_to_order() replace __split_huge_page() and __split_huge_page_tail() respectively. __split_unmapped_folio() uses different approaches to perform uniform split and buddy allocator like split: 1. uniform split: one single call to __split_folio_to_order() is used to uniformly split the given folio. All resulting folios are put back to the list after split. The folio containing the given page is left to caller to unlock and others are unlocked. 2. buddy allocator like (or non-uniform) split: (old_order - new_order) calls to __split_folio_to_order() are used to split the given folio at order N to order N-1. After each call, the target folio is changed to the one containing the page, which is given as a folio_split() parameter. After each call, folios not containing the page are put back to the list. The folio containing the page is put back to the list when its order is new_order. All folios are unlocked except the first folio, which is left to caller to unlock. Patch Overview === 1. Patch 1 added a new xarray function xas_try_split() to perform iterative xarray split. 2. Patch 2 added __split_unmapped_folio() and __split_folio_to_order() to prepare for moving to new backend split code. 3. Patch 3 moved common code in split_huge_page_to_list_to_order() to __folio_split(). 4. Patch 4 added new folio_split() and made split_huge_page_to_list_to_order() share the new __split_unmapped_folio() with folio_split(). 5. Patch 5 removed no longer used __split_huge_page() and __split_huge_page_tail(). 6. Patch 6 added a new in_folio_offset to split_huge_page debugfs for folio_split() test. 7. Patch 7 used try_folio_split() for truncate operation. 8. Patch 8 added folio_split() tests. Any comments and/or suggestions are welcome. Thanks. [1] https://lore.kernel.org/linux-mm/20241008223748.555845-1-ziy@nvidia.com/ [2] https://lore.kernel.org/linux-mm/20241028180932.1319265-1-ziy@nvidia.com/ [3] https://lore.kernel.org/linux-mm/20241101150357.1752726-1-ziy@nvidia.com/ [4] https://lore.kernel.org/linux-mm/e6ppwz5t4p4kvir6eqzoto4y5fmdjdxdyvxvtw43nc… [5] https://lore.kernel.org/linux-mm/20241205001839.2582020-1-ziy@nvidia.com/ [6] https://lore.kernel.org/linux-mm/20250106165513.104899-1-ziy@nvidia.com/ [7] https://lore.kernel.org/linux-mm/20250116211042.741543-1-ziy@nvidia.com/ [8] https://lore.kernel.org/linux-mm/20250205031417.1771278-1-ziy@nvidia.com/ [9] https://lore.kernel.org/linux-mm/20250211155034.268962-1-ziy@nvidia.com/ [10] https://lore.kernel.org/all/67af65cb.050a0220.21dd3.004a.GAE@google.com/ [11] https://lore.kernel.org/linux-mm/20250218235012.1542225-1-ziy@nvidia.com/ Zi Yan (8): xarray: add xas_try_split() to split a multi-index entry mm/huge_memory: add two new (not yet used) functions for folio_split() mm/huge_memory: move folio split common code to __folio_split() mm/huge_memory: add buddy allocator like (non-uniform) folio_split() mm/huge_memory: remove the old, unused __split_huge_page() mm/huge_memory: add folio_split() to debugfs testing interface mm/truncate: use buddy allocator like folio split for truncate operation selftests/mm: add tests for folio_split(), buddy allocator like split Documentation/core-api/xarray.rst | 14 +- include/linux/huge_mm.h | 36 + include/linux/xarray.h | 6 + lib/test_xarray.c | 52 ++ lib/xarray.c | 131 ++- mm/huge_memory.c | 755 ++++++++++++------ mm/truncate.c | 31 +- tools/testing/radix-tree/Makefile | 1 + .../selftests/mm/split_huge_page_test.c | 34 +- 9 files changed, 783 insertions(+), 277 deletions(-) -- 2.47.2

7 months, 3 weeks

5
30
0 0

[PATCH bpf-next v2 0/3] bpf: Fix use-after-free of sockmap

by Jiayuan Chen

1. Issue Syzkaller reported this issue [1]. 2. Reproduce We can reproduce this issue by using the test_sockmap_with_close_on_write() test I provided in selftest, also you need to apply the following patch to ensure 100% reproducibility (sleep after checking sock): ''' static void sk_psock_verdict_data_ready(struct sock *sk) { ....... if (unlikely(!sock)) return; + if (!strcmp("test_progs", current->comm)) { + printk("sleep 2s to wait socket freed\n"); + mdelay(2000); + printk("sleep end\n"); + } ops = READ_ONCE(sock->ops); if (!ops || !ops->read_skb) return; } ''' Then running './test_progs -v sockmap_basic', and if the kernel has KASAN enabled [2], you will see the following warning: ''' BUG: KASAN: slab-use-after-free in sk_psock_verdict_data_ready+0x29b/0x2d0 Read of size 8 at addr ffff88813a777020 by task test_progs/47055 Tainted: [O]=OOT_MODULE Call Trace: <TASK> dump_stack_lvl+0x53/0x70 print_address_description.constprop.0+0x30/0x420 ? sk_psock_verdict_data_ready+0x29b/0x2d0 print_report+0xb7/0x270 ? sk_psock_verdict_data_ready+0x29b/0x2d0 ? kasan_addr_to_slab+0xd/0xa0 ? sk_psock_verdict_data_ready+0x29b/0x2d0 kasan_report+0xca/0x100 ? sk_psock_verdict_data_ready+0x29b/0x2d0 sk_psock_verdict_data_ready+0x29b/0x2d0 unix_stream_sendmsg+0x4a6/0xa40 ? __pfx_unix_stream_sendmsg+0x10/0x10 ? fdget+0x2c1/0x3a0 __sys_sendto+0x39c/0x410 ''' 3. Reason ''' CPU0 CPU1 unix_stream_sendmsg(sk): other = unix_peer(sk) other->sk_data_ready(other): socket *sock = sk->sk_socket if (unlikely(!sock)) return; close(other): ... other->close() free(socket) READ_ONCE(sock->ops) ^ use 'sock' after free ''' For TCP, UDP, or other protocols, we have already performed rcu_read_lock() when the network stack receives packets in ip_input.c: ''' ip_local_deliver_finish(): rcu_read_lock() ip_protocol_deliver_rcu() xxx_rcv rcu_read_unlock() ''' However, for Unix sockets, sk_data_ready is called directly from the process context without rcu_read_lock() protection. 4. Solution Based on the fact that the 'struct socket' is released using call_rcu(), We add rcu_read_{un}lock() at the entrance and exit of our sk_data_ready. It will not increase performance overhead, at least for TCP and UDP, they are already in a relatively large critical section. Of course, we can also add a custom callback for Unix sockets and call rcu_read_lock() before calling _verdict_data_ready like this: ''' if (sk_is_unix(sk)) sk->sk_data_ready = sk_psock_verdict_data_ready_rcu; else sk->sk_data_ready = sk_psock_verdict_data_ready; sk_psock_verdict_data_ready_rcu(): rcu_read_lock() sk_psock_verdict_data_ready() rcu_read_unlock() ''' However, this will cause too many branches, and it's not suitable to distinguish network protocols in skmsg.c. [1] https://syzkaller.appspot.com/bug?extid=dd90a702f518e0eac072 [2] https://syzkaller.appspot.com/text?tag=KernelConfig&x=1362a5aee630ff34 --- v1 -> v2: 1. Add Fixes tag. 2. Extend selftest of edge case for TCP/UDP sockets. 3. Add Reviewed-by and Acked-by tag. https://lore.kernel.org/bpf/20250226132242.52663-1-jiayuan.chen@linux.dev/T… Jiayuan Chen (3): bpf, sockmap: avoid using sk_socket after free selftests/bpf: Add socketpair to create_pair to support unix socket selftests/bpf: Add edge case tests for sockmap net/core/skmsg.c | 18 ++++-- .../selftests/bpf/prog_tests/socket_helpers.h | 13 +++- .../selftests/bpf/prog_tests/sockmap_basic.c | 59 +++++++++++++++++++ 3 files changed, 84 insertions(+), 6 deletions(-) -- 2.47.1

7 months, 3 weeks

2
7
0 0

[PATCH bpf-next v12 0/5] xsk: TX metadata Launch Time support

by Song Yoong Siang

This series expands the XDP TX metadata framework to allow user applications to pass per packet 64-bit launch time directly to the kernel driver, requesting launch time hardware offload support. The XDP TX metadata framework will not perform any clock conversion or packet reordering. Please note that the role of Tx metadata is just to pass the launch time, not to enable the offload feature. Users will need to enable the launch time hardware offload feature of the device by using the respective command, such as the tc-etf command. Although some devices use the tc-etf command to enable their launch time hardware offload feature, xsk packets will not go through the etf qdisc. Therefore, in my opinion, the launch time should always be based on the PTP Hardware Clock (PHC). Thus, i did not include a clock ID to indicate the clock source. To simplify the test steps, I modified the xdp_hw_metadata bpf self-test tool in such a way that it will set the launch time based on the offset provided by the user and the value of the Receive Hardware Timestamp, which is against the PHC. This will eliminate the need to discipline System Clock with the PHC and then use clock_gettime() to get the time. Please note that AF_XDP lacks a feedback mechanism to inform the application if the requested launch time is invalid. So, users are expected to familiar with the horizon of the launch time of the device they use and not request a launch time that is beyond the horizon. Otherwise, the driver might interpret the launch time incorrectly and react wrongly. For stmmac and igc, where modulo computation is used, a launch time larger than the horizon will cause the device to transmit the packet earlier that the requested launch time. Although there is no feedback mechanism for the launch time request for now, user still can check whether the requested launch time is working or not, by requesting the Transmit Completion Hardware Timestamp. v12: - Fix the comment in include/uapi/linux/if_xdp.h to allign with what is generated by ./tools/net/ynl/ynl-regen.sh to avoid dirty tree error in the netdev/ynl checks. v11: https://lore.kernel.org/netdev/20250216074302.956937-1-yoong.siang.song@int… - regenerate netdev_xsk_flags based on latest netdev.yaml (Jakub) v10: https://lore.kernel.org/netdev/20250207021943.814768-1-yoong.siang.song@int… - use net_err_ratelimited(), instead of net_ratelimit() (Maciej) - accumulate the amount of used descs in local variable and update the igc_metadata_request::used_desc once (Maciej) - Ensure reverse christmas tree rule (Maciej) V9: https://lore.kernel.org/netdev/20250206060408.808325-1-yoong.siang.song@int… - Remove the igc_desc_unused() checking (Maciej) - Ensure that skb allocation and DMA mapping work before proceeding to fill in igc_tx_buffer info, context desc, and data desc (Maciej) - Rate limit the error messages (Maciej) - Update the comment to indicate that the 2 descriptors needed by the empty frame are already taken into consideration (Maciej) - Handle the case where the insertion of an empty frame fails and explain the reason behind (Maciej) - put self SOB tag as last tag (Maciej) V8: https://lore.kernel.org/netdev/20250205024116.798862-1-yoong.siang.song@int… - check the number of used descriptor in xsk_tx_metadata_request() by using used_desc of struct igc_metadata_request, and then decreases the budget with it (Maciej) - submit another bug fix patch to set the buffer type for empty frame (Maciej): https://lore.kernel.org/netdev/20250205023603.798819-1-yoong.siang.song@int… V7: https://lore.kernel.org/netdev/20250204004907.789330-1-yoong.siang.song@int… - split the refactoring code of igc empty packet insertion into a separate commit (Faizal) - add explanation on why the value "4" is used as igc transmit budget (Faizal) - perform a stress test by sending 1000 packets with 10ms interval and launch time set to 500us in the future (Faizal & Yong Liang) V6: https://lore.kernel.org/netdev/20250116155350.555374-1-yoong.siang.song@int… - fix selftest build errors by using asprintf() and realloc(), instead of managing the buffer sizes manually (Daniel, Stanislav) V5: https://lore.kernel.org/netdev/20250114152718.120588-1-yoong.siang.song@int… - change netdev feature name from tx-launch-time to tx-launch-time-fifo to explicitly state the FIFO behaviour (Stanislav) - improve the looping of xdp_hw_metadata app to wait for packet tx completion to be more readable by using clock_gettime() (Stanislav) - add launch time setup steps into xdp_hw_metadata app (Stanislav) V4: https://lore.kernel.org/netdev/20250106135506.9687-1-yoong.siang.song@intel… - added XDP launch time support to the igc driver (Jesper & Florian) - added per-driver launch time limitation on xsk-tx-metadata.rst (Jesper) - added explanation on FIFO behavior on xsk-tx-metadata.rst (Jakub) - added step to enable launch time in the commit message (Jesper & Willem) - explicitly documented the type of launch_time and which clock source it is against (Willem) V3: https://lore.kernel.org/netdev/20231203165129.1740512-1-yoong.siang.song@in… - renamed to use launch time (Jesper & Willem) - changed the default launch time in xdp_hw_metadata apps from 1s to 0.1s because some NICs do not support such a large future time. V2: https://lore.kernel.org/netdev/20231201062421.1074768-1-yoong.siang.song@in… - renamed to use Earliest TxTime First (Willem) - renamed to use txtime (Willem) V1: https://lore.kernel.org/netdev/20231130162028.852006-1-yoong.siang.song@int… Song Yoong Siang (5): xsk: Add launch time hardware offload support to XDP Tx metadata selftests/bpf: Add launch time request to xdp_hw_metadata net: stmmac: Add launch time support to XDP ZC igc: Refactor empty frame insertion for launch time support igc: Add launch time support to XDP ZC Documentation/netlink/specs/netdev.yaml | 4 + Documentation/networking/xsk-tx-metadata.rst | 62 +++++++ drivers/net/ethernet/intel/igc/igc.h | 1 + drivers/net/ethernet/intel/igc/igc_main.c | 143 +++++++++++---- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 2 + .../net/ethernet/stmicro/stmmac/stmmac_main.c | 13 ++ include/net/xdp_sock.h | 10 ++ include/net/xdp_sock_drv.h | 1 + include/uapi/linux/if_xdp.h | 10 ++ include/uapi/linux/netdev.h | 3 + net/core/netdev-genl.c | 2 + net/xdp/xsk.c | 3 + tools/include/uapi/linux/if_xdp.h | 10 ++ tools/include/uapi/linux/netdev.h | 3 + tools/testing/selftests/bpf/xdp_hw_metadata.c | 168 +++++++++++++++++- 15 files changed, 396 insertions(+), 39 deletions(-) -- 2.34.1

7 months, 3 weeks

5
10
0 0

[PATCH net-next 3/4] tools/testing/selftests/cgroup/cgroup_util: add cg_get_id helper

by Alexander Mikhalitsyn

Cc: linux-kselftest(a)vger.kernel.org Cc: linux-kernel(a)vger.kernel.org Cc: netdev(a)vger.kernel.org Cc: cgroups(a)vger.kernel.org Cc: "David S. Miller" <davem(a)davemloft.net> Cc: Eric Dumazet <edumazet(a)google.com> Cc: Jakub Kicinski <kuba(a)kernel.org> Cc: Paolo Abeni <pabeni(a)redhat.com> Cc: Willem de Bruijn <willemb(a)google.com> Cc: Leon Romanovsky <leon(a)kernel.org> Cc: Arnd Bergmann <arnd(a)arndb.de> Cc: Christian Brauner <brauner(a)kernel.org> Cc: Kuniyuki Iwashima <kuniyu(a)amazon.com> Cc: Lennart Poettering <mzxreary(a)0pointer.de> Cc: Luca Boccassi <bluca(a)debian.org> Cc: Tejun Heo <tj(a)kernel.org> Cc: Johannes Weiner <hannes(a)cmpxchg.org> Cc: "Michal Koutný" <mkoutny(a)suse.com> Cc: Shuah Khan <shuah(a)kernel.org> Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn(a)canonical.com> --- tools/testing/selftests/cgroup/cgroup_util.c | 15 +++++++++++++++ tools/testing/selftests/cgroup/cgroup_util.h | 2 ++ 2 files changed, 17 insertions(+) diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c index 1e2d46636a0c..b60e0e1433f4 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.c +++ b/tools/testing/selftests/cgroup/cgroup_util.c @@ -205,6 +205,21 @@ int cg_open(const char *cgroup, const char *control, int flags) return open(path, flags); } +/* + * Returns cgroup id on success, or -1 on failure. + */ +uint64_t cg_get_id(const char *cgroup) +{ + struct stat st; + int ret; + + ret = stat(cgroup, &st); + if (ret) + return -1; + + return st.st_ino; +} + int cg_write_numeric(const char *cgroup, const char *control, long value) { char buf[64]; diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h index 19b131ee7707..3f2d9676ceda 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.h +++ b/tools/testing/selftests/cgroup/cgroup_util.h @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include <stdbool.h> +#include <stdint.h> #include <stdlib.h> #include "../kselftest.h" @@ -39,6 +40,7 @@ long cg_read_key_long(const char *cgroup, const char *control, const char *key); extern long cg_read_lc(const char *cgroup, const char *control); extern int cg_write(const char *cgroup, const char *control, char *buf); extern int cg_open(const char *cgroup, const char *control, int flags); +extern uint64_t cg_get_id(const char *cgroup); int cg_write_numeric(const char *cgroup, const char *control, long value); extern int cg_run(const char *cgroup, int (*fn)(const char *cgroup, void *arg), -- 2.43.0

7 months, 3 weeks

1
0
0 0

🎁 Yes, a Truly Free SEO Cleanup Awaits!

by Free Backlinks Clean up

Hi there, I understand the skepticism—“free” offers often come with strings attached. But we’re genuinely committed to supporting webmasters and giving back to the community with no hidden catches. Simply fill out the form, and our team will deliver a comprehensive SEO cleanup within a day—no cost, no commitments. Get Your Free SEO Cleanup Here: https://www.1free-seo.com/get-started/ (https://www.1free-seo.com/get-started/) Cheers, Mike Larson WhatsApp: +1 (833) 854-6783 (https://wa.me/18338546783) Telegram: https://t.me/freeseosupport (https://t.me/freeseosupport) Book appointment: https://1free-seo.com/free-seo-consultaition/ (https://1free-seo.com/free-seo-consultaition/) Unsubscribe (https://clicks.1free-seo.com/?na=u&nk=2080001-9a11fc6fe9&nek=24-) | Manage your subscription (https://clicks.1free-seo.com/?na=p&nk=2080001-9a11fc6fe9&nek=24-) | View online (https://clicks.1free-seo.com/?na=v&nk=2080001-9a11fc6fe9&id=24)

7 months, 3 weeks

1
0
0 0

[PATCH v3 0/2] scanf: convert self-test to KUnit

by Tamir Duberstein

This is one of just 3 remaining "Test Module" kselftests (the others being bitmap and printf), the rest having been converted to KUnit. In addition to the enclosed patch, please consider this an RFC on the removal of the "Test Module" kselftest machinery. I tested this using: $ tools/testing/kunit/kunit.py run --arch arm64 --make_options LLVM=1 scanf Signed-off-by: Tamir Duberstein <tamird(a)gmail.com> --- Changes in v3: - Reduce diff noise in lib/Makefile. (Petr Mladek) - Split `scanf_test` into a few test cases. New output: : =================== scanf (10 subtests) ==================== : [PASSED] numbers_simple : ====================== numbers_list ======================= : [PASSED] delim=" " : [PASSED] delim=":" : [PASSED] delim="," : [PASSED] delim="-" : [PASSED] delim="/" : ================== [PASSED] numbers_list =================== : ============ numbers_list_field_width_typemax ============= : [PASSED] delim=" " : [PASSED] delim=":" : [PASSED] delim="," : [PASSED] delim="-" : [PASSED] delim="/" : ======== [PASSED] numbers_list_field_width_typemax ========= : =========== numbers_list_field_width_val_width ============ : [PASSED] delim=" " : [PASSED] delim=":" : [PASSED] delim="," : [PASSED] delim="-" : [PASSED] delim="/" : ======= [PASSED] numbers_list_field_width_val_width ======== : [PASSED] numbers_slice : [PASSED] numbers_prefix_overflow : [PASSED] test_simple_strtoull : [PASSED] test_simple_strtoll : [PASSED] test_simple_strtoul : [PASSED] test_simple_strtol : ====================== [PASSED] scanf ====================== : ============================================================ : Testing complete. Ran 22 tests: passed: 22 : Elapsed time: 5.517s total, 0.001s configuring, 5.440s building, 0.067s running - Link to v2: https://lore.kernel.org/r/20250203-scanf-kunit-convert-v2-1-277a618d804e@gm… Changes in v2: - Rename lib/{test_scanf.c => scanf_kunit.c}. (Andy Shevchenko) - Link to v1: https://lore.kernel.org/r/20250131-scanf-kunit-convert-v1-1-0976524f0eba@gm… --- Tamir Duberstein (2): scanf: convert self-test to KUnit scanf: break kunit into test cases MAINTAINERS | 2 +- arch/m68k/configs/amiga_defconfig | 1 - arch/m68k/configs/apollo_defconfig | 1 - arch/m68k/configs/atari_defconfig | 1 - arch/m68k/configs/bvme6000_defconfig | 1 - arch/m68k/configs/hp300_defconfig | 1 - arch/m68k/configs/mac_defconfig | 1 - arch/m68k/configs/multi_defconfig | 1 - arch/m68k/configs/mvme147_defconfig | 1 - arch/m68k/configs/mvme16x_defconfig | 1 - arch/m68k/configs/q40_defconfig | 1 - arch/m68k/configs/sun3_defconfig | 1 - arch/m68k/configs/sun3x_defconfig | 1 - arch/powerpc/configs/ppc64_defconfig | 1 - lib/Kconfig.debug | 20 +- lib/Makefile | 2 +- lib/scanf_kunit.c | 800 ++++++++++++++++++++++++++++++++++ lib/test_scanf.c | 814 ----------------------------------- tools/testing/selftests/lib/Makefile | 2 +- tools/testing/selftests/lib/config | 1 - tools/testing/selftests/lib/scanf.sh | 4 - 21 files changed, 820 insertions(+), 838 deletions(-) --- base-commit: a86bf2283d2c9769205407e2b54777c03d012939 change-id: 20250131-scanf-kunit-convert-f70dc33bb34c Best regards, -- Tamir Duberstein <tamird(a)gmail.com>

7 months, 3 weeks

3
6
0 0

[PATCH bpf-next 0/2] selftests/bpf: Move test_lwt_seg6local to test_progs

by Bastien Curutchet (eBPF Foundation)

Hi all, This patch series continues the work to migrate the script tests into prog_tests. test_lwt_seg6local.sh tests some bpf_lwt_* helpers. It contains only one test that uses a network topology quite different than the ones that can be found in others prog_tests/lwt_*.c files so I add a new prog_tests/lwt_seg6local.c file. While working on the migration I noticed that some routes present in the script weren't needed so PATCH 1 deletes them and then PATCH 2 migrates the test into the test_progs framework. Signed-off-by: Bastien Curutchet (eBPF Foundation) <bastien.curutchet(a)bootlin.com> --- Bastien Curutchet (eBPF Foundation) (2): selftests/bpf: lwt_seg6local: Remove unused routes selftests/bpf: lwt_seg6local: Move test to test_progs tools/testing/selftests/bpf/Makefile | 1 - .../selftests/bpf/prog_tests/lwt_seg6local.c | 176 +++++++++++++++++++++ tools/testing/selftests/bpf/test_lwt_seg6local.sh | 156 ------------------ 3 files changed, 176 insertions(+), 157 deletions(-) --- base-commit: 86eb3a47230a41c6ccf5cdae8ee0a7e7292aa29d change-id: 20250214-seg6local-64bcde44b66e Best regards, -- Bastien Curutchet (eBPF Foundation) <bastien.curutchet(a)bootlin.com>

7 months, 3 weeks

3
4
0 0

[PATCH v2] kunit: tool: Fix bug in parsing test plan

by Rae Moar

A bug was identified where the KTAP below caused an infinite loop: TAP version 13 ok 4 test_case 1..4 The infinite loop was caused by the parser not parsing a test plan if following a test result line. Fix this bug to correctly parse test plan line. Signed-off-by: Rae Moar <rmoar(a)google.com> --- Changes since v1: - Remove error reported when test plan is missing. tools/testing/kunit/kunit_parser.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index 29fc27e8949b..da53a709773a 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -759,7 +759,7 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: # If parsing the main/top-level test, parse KTAP version line and # test plan test.name = "main" - ktap_line = parse_ktap_header(lines, test, printer) + parse_ktap_header(lines, test, printer) test.log.extend(parse_diagnostic(lines)) parse_test_plan(lines, test) parent_test = True @@ -768,13 +768,12 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: # the KTAP version line and/or subtest header line ktap_line = parse_ktap_header(lines, test, printer) subtest_line = parse_test_header(lines, test) + test.log.extend(parse_diagnostic(lines)) + parse_test_plan(lines, test) parent_test = (ktap_line or subtest_line) if parent_test: - # If KTAP version line and/or subtest header is found, attempt - # to parse test plan and print test header - test.log.extend(parse_diagnostic(lines)) - parse_test_plan(lines, test) print_test_header(test, printer) + expected_count = test.expected_count subtests = [] test_num = 1 base-commit: 0619a4868fc1b32b07fb9ed6c69adc5e5cf4e4b2 -- 2.49.0.rc0.332.g42c0ae87b1-goog

7 months, 3 weeks

3
2
0 0

[PATCH v5 bpf-next 0/2] security: Propagate caller information in bpf hooks

by Blaise Boscaccy

Hello, While trying to implement an eBPF gatekeeper program, we ran into an issue whereas the LSM hooks are missing some relevant data. Certain subcommands passed to the bpf() syscall can be invoked from either the kernel or userspace. Additionally, some fields in the bpf_attr struct contain pointers, and depending on where the subcommand was invoked, they could point to either user or kernel memory. One example of this is the bpf_prog_load subcommand and its fd_array. This data is made available and used by the verifier but not made available to the LSM subsystem. This patchset simply exposes that information to applicable LSM hooks. Change list: - v4 -> v5 - merge v4 selftest breakout patch back into a single patch - change "is_kernel" to "kernel" - add selftest using new kernel flag - v3 -> v4 - split out selftest changes into a separate patch - v2 -> v3 - reorder params so that the new boolean flag is the last param - fixup function signatures in bpf selftests - v1 -> v2 - Pass a boolean flag in lieu of bpfptr_t Revisions: - v4 https://lore.kernel.org/bpf/20250304203123.3935371-1-bboscaccy@linux.micros… - v3 https://lore.kernel.org/bpf/20250303222416.3909228-1-bboscaccy@linux.micros… - v2 https://lore.kernel.org/bpf/20250228165322.3121535-1-bboscaccy@linux.micros… - v1 https://lore.kernel.org/bpf/20250226003055.1654837-1-bboscaccy@linux.micros… Blaise Boscaccy (2): security: Propagate caller information in bpf hooks selftests/bpf: Add a kernel flag test for LSM bpf hook include/linux/lsm_hook_defs.h | 6 +-- include/linux/security.h | 12 +++--- kernel/bpf/syscall.c | 10 ++--- security/security.c | 15 ++++--- security/selinux/hooks.c | 6 +-- .../selftests/bpf/prog_tests/kernel_flag.c | 43 +++++++++++++++++++ .../selftests/bpf/progs/rcu_read_lock.c | 3 +- .../bpf/progs/test_cgroup1_hierarchy.c | 4 +- .../selftests/bpf/progs/test_kernel_flag.c | 32 ++++++++++++++ .../bpf/progs/test_kfunc_dynptr_param.c | 6 +-- .../selftests/bpf/progs/test_lookup_key.c | 2 +- .../selftests/bpf/progs/test_ptr_untrusted.c | 2 +- .../bpf/progs/test_task_under_cgroup.c | 2 +- .../bpf/progs/test_verify_pkcs7_sig.c | 2 +- 14 files changed, 112 insertions(+), 33 deletions(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/kernel_flag.c create mode 100644 tools/testing/selftests/bpf/progs/test_kernel_flag.c -- 2.48.1

7 months, 3 weeks

2
3
0 0

[PATCH v5 0/3] printf: convert self-test to KUnit

by Tamir Duberstein

This is one of just 3 remaining "Test Module" kselftests (the others being bitmap and scanf), the rest having been converted to KUnit. I tested this using: $ tools/testing/kunit/kunit.py run --arch arm64 --make_options LLVM=1 printf I have also sent out a series converting scanf[0]. Link: https://lore.kernel.org/all/20250204-scanf-kunit-convert-v3-0-386d7c3ee714@… [0] Signed-off-by: Tamir Duberstein <tamird(a)gmail.com> --- Changes in v5: - Update `do_test` `__printf` annotation (Rasmus Villemoes). - Link to v4: https://lore.kernel.org/r/20250214-printf-kunit-convert-v4-0-c254572f1565@g… Changes in v4: - Add patch "implicate test line in failure messages". - Rebase on linux-next, move scanf_kunit.c into lib/tests/. - Link to v3: https://lore.kernel.org/r/20250210-printf-kunit-convert-v3-0-ee6ac5500f5e@g… Changes in v3: - Remove extraneous trailing newlines from failure messages. - Replace `pr_warn` with `kunit_warn`. - Drop arch changes. - Remove KUnit boilerplate from CONFIG_PRINTF_KUNIT_TEST help text. - Restore `total_tests` counting. - Remove tc_fail macro in last patch. - Link to v2: https://lore.kernel.org/r/20250207-printf-kunit-convert-v2-0-057b23860823@g… Changes in v2: - Incorporate code review from prior work[0] by Arpitha Raghunandan. - Link to v1: https://lore.kernel.org/r/20250204-printf-kunit-convert-v1-0-ecf1b846a4de@g… Link: https://lore.kernel.org/lkml/20200817043028.76502-1-98.arpi@gmail.com/t/#u [0] --- Tamir Duberstein (3): printf: convert self-test to KUnit printf: break kunit into test cases printf: implicate test line in failure messages Documentation/core-api/printk-formats.rst | 4 +- MAINTAINERS | 2 +- lib/Kconfig.debug | 12 +- lib/Makefile | 1 - lib/tests/Makefile | 1 + lib/{test_printf.c => tests/printf_kunit.c} | 437 ++++++++++++---------------- tools/testing/selftests/lib/config | 1 - tools/testing/selftests/lib/printf.sh | 4 - 8 files changed, 200 insertions(+), 262 deletions(-) --- base-commit: d4b0fd87ff0d4338b259dc79b2b3c6f7e70e8afa change-id: 20250131-printf-kunit-convert-fd4012aa2ec6 Best regards, -- Tamir Duberstein <tamird(a)gmail.com>

7 months, 3 weeks

2
16
0 0

[PATCH net-next v8 0/6] tun: Introduce virtio-net hashing feature

by Akihiko Odaki

virtio-net have two usage of hashes: one is RSS and another is hash reporting. Conventionally the hash calculation was done by the VMM. However, computing the hash after the queue was chosen defeats the purpose of RSS. Another approach is to use eBPF steering program. This approach has another downside: it cannot report the calculated hash due to the restrictive nature of eBPF. Introduce the code to compute hashes to the kernel in order to overcome thse challenges. An alternative solution is to extend the eBPF steering program so that it will be able to report to the userspace, but it is based on context rewrites, which is in feature freeze. We can adopt kfuncs, but they will not be UAPIs. We opt to ioctl to align with other relevant UAPIs (KVM and vhost_net). The patches for QEMU to use this new feature was submitted as RFC and is available at: https://patchew.org/QEMU/20240915-hash-v3-0-79cb08d28647@daynix.com/ This work was presented at LPC 2024: https://lpc.events/event/18/contributions/1963/ V1 -> V2: Changed to introduce a new BPF program type. Signed-off-by: Akihiko Odaki <akihiko.odaki(a)daynix.com> --- Changes in v8: - Disabled IPv6 to eliminate noises in tests. - Added a branch in tap to avoid unnecessary dissection when hash reporting is disabled. - Removed unnecessary rtnl_lock(). - Extracted code to handle new ioctls into separate functions to avoid adding extra NULL checks to the code handling other ioctls. - Introduced variable named "fd" to __tun_chr_ioctl(). - s/-/=/g in a patch message to avoid confusing Git. - Link to v7: https://lore.kernel.org/r/20250228-rss-v7-0-844205cbbdd6@daynix.com Changes in v7: - Ensured to set hash_report to VIRTIO_NET_HASH_REPORT_NONE for VHOST_NET_F_VIRTIO_NET_HDR. - s/4/sizeof(u32)/ in patch "virtio_net: Add functions for hashing". - Added tap_skb_cb type. - Rebased. - Link to v6: https://lore.kernel.org/r/20250109-rss-v6-0-b1c90ad708f6@daynix.com Changes in v6: - Extracted changes to fill vnet header holes into another series. - Squashed patches "skbuff: Introduce SKB_EXT_TUN_VNET_HASH", "tun: Introduce virtio-net hash reporting feature", and "tun: Introduce virtio-net RSS" into patch "tun: Introduce virtio-net hash feature". - Dropped the RFC tag. - Link to v5: https://lore.kernel.org/r/20241008-rss-v5-0-f3cf68df005d@daynix.com Changes in v5: - Fixed a compilation error with CONFIG_TUN_VNET_CROSS_LE. - Optimized the calculation of the hash value according to: https://git.dpdk.org/dpdk/commit/?id=3fb1ea032bd6ff8317af5dac9af901f1f324ca… - Added patch "tun: Unify vnet implementation". - Dropped patch "tap: Pad virtio header with zero". - Added patch "selftest: tun: Test vnet ioctls without device". - Reworked selftests to skip for older kernels. - Documented the case when the underlying device is deleted and packets have queue_mapping set by TC. - Reordered test harness arguments. - Added code to handle fragmented packets. - Link to v4: https://lore.kernel.org/r/20240924-rss-v4-0-84e932ec0e6c@daynix.com Changes in v4: - Moved tun_vnet_hash_ext to if_tun.h. - Renamed virtio_net_toeplitz() to virtio_net_toeplitz_calc(). - Replaced htons() with cpu_to_be16(). - Changed virtio_net_hash_rss() to return void. - Reordered variable declarations in virtio_net_hash_rss(). - Removed virtio_net_hdr_v1_hash_from_skb(). - Updated messages of "tap: Pad virtio header with zero" and "tun: Pad virtio header with zero". - Fixed vnet_hash allocation size. - Ensured to free vnet_hash when destructing tun_struct. - Link to v3: https://lore.kernel.org/r/20240915-rss-v3-0-c630015db082@daynix.com Changes in v3: - Reverted back to add ioctl. - Split patch "tun: Introduce virtio-net hashing feature" into "tun: Introduce virtio-net hash reporting feature" and "tun: Introduce virtio-net RSS". - Changed to reuse hash values computed for automq instead of performing RSS hashing when hash reporting is requested but RSS is not. - Extracted relevant data from struct tun_struct to keep it minimal. - Added kernel-doc. - Changed to allow calling TUNGETVNETHASHCAP before TUNSETIFF. - Initialized num_buffers with 1. - Added a test case for unclassified packets. - Fixed error handling in tests. - Changed tests to verify that the queue index will not overflow. - Rebased. - Link to v2: https://lore.kernel.org/r/20231015141644.260646-1-akihiko.odaki@daynix.com --- Akihiko Odaki (6): virtio_net: Add functions for hashing net: flow_dissector: Export flow_keys_dissector_symmetric tun: Introduce virtio-net hash feature selftest: tun: Test vnet ioctls without device selftest: tun: Add tests for virtio-net hashing vhost/net: Support VIRTIO_NET_F_HASH_REPORT Documentation/networking/tuntap.rst | 7 + drivers/net/Kconfig | 1 + drivers/net/tap.c | 67 +++- drivers/net/tun.c | 98 +++++- drivers/net/tun_vnet.h | 159 ++++++++- drivers/vhost/net.c | 49 +-- include/linux/if_tap.h | 2 + include/linux/skbuff.h | 3 + include/linux/virtio_net.h | 188 ++++++++++ include/net/flow_dissector.h | 1 + include/uapi/linux/if_tun.h | 75 ++++ net/core/flow_dissector.c | 3 +- net/core/skbuff.c | 4 + tools/testing/selftests/net/Makefile | 2 +- tools/testing/selftests/net/tun.c | 656 ++++++++++++++++++++++++++++++++++- 15 files changed, 1254 insertions(+), 61 deletions(-) --- base-commit: dd83757f6e686a2188997cb58b5975f744bb7786 change-id: 20240403-rss-e737d89efa77 prerequisite-change-id: 20241230-tun-66e10a49b0c7:v6 prerequisite-patch-id: 871dc5f146fb6b0e3ec8612971a8e8190472c0fb prerequisite-patch-id: 2797ed249d32590321f088373d4055ff3f430a0e prerequisite-patch-id: ea3370c72d4904e2f0536ec76ba5d26784c0cede prerequisite-patch-id: 837e4cf5d6b451424f9b1639455e83a260c4440d prerequisite-patch-id: ea701076f57819e844f5a35efe5cbc5712d3080d prerequisite-patch-id: 701646fb43ad04cc64dd2bf13c150ccbe6f828ce prerequisite-patch-id: 53176dae0c003f5b6c114d43f936cf7140d31bb5 prerequisite-change-id: 20250116-buffers-96e14bf023fc:v2 prerequisite-patch-id: 25fd4f99d4236a05a5ef16ab79f3e85ee57e21cc Best regards, -- Akihiko Odaki <akihiko.odaki(a)daynix.com>

7 months, 3 weeks

4
9
0 0

[PATCH v2 0/4] RISC-V KVM PMU fix and selftest improvement

by Atish Patra

This series adds a fix for KVM PMU code and improves the pmu selftest by allowing generating precise number of interrupts. It also provided another additional option to the overflow test that allows user to generate custom number of LCOFI interrupts. Signed-off-by: Atish Patra <atishp(a)rivosinc.com> --- Changes in v2: - Initialized the local overflow irq variable to 0 indicate that it's not a allowed value. - Moved the introduction of argument option `n` to the last patch. - Link to v1: https://lore.kernel.org/r/20250226-kvm_pmu_improve-v1-0-74c058c2bf6d@rivosi… --- Atish Patra (4): RISC-V: KVM: Disable the kernel perf counter during configure KVM: riscv: selftests: Do not start the counter in the overflow handler KVM: riscv: selftests: Change command line option KVM: riscv: selftests: Allow number of interrupts to be configurable arch/riscv/kvm/vcpu_pmu.c | 1 + tools/testing/selftests/kvm/riscv/sbi_pmu_test.c | 81 ++++++++++++++++-------- 2 files changed, 57 insertions(+), 25 deletions(-) --- base-commit: 0ad2507d5d93f39619fc42372c347d6006b64319 change-id: 20250225-kvm_pmu_improve-fffd038b2404 -- Regards, Atish patra

7 months, 3 weeks

3
6
0 0

[PATCH v2 net 0/6] eth: bnxt: fix several bugs in the bnxt module

by Taehee Yoo

The first fixes setting incorrect skb->truesize. When xdp-mb prog returns XDP_PASS, skb is allocated and initialized. Currently, The truesize is calculated as BNXT_RX_PAGE_SIZE * sinfo->nr_frags, but sinfo->nr_frags is flushed by napi_build_skb(). So, it stores sinfo before calling napi_build_skb() and then use it for calculate truesize. The second fixes kernel panic in the bnxt_queue_mem_alloc(). The bnxt_queue_mem_alloc() accesses rx ring descriptor. rx ring descriptors are allocated when the interface is up and it's freed when the interface is down. So, if bnxt_queue_mem_alloc() is called when the interface is down, kernel panic occurs. This patch makes the bnxt_queue_mem_alloc() return -ENETDOWN if rx ring descriptors are not allocated. The third patch fixes kernel panic in the bnxt_queue_{start | stop}(). When a queue is restarted bnxt_queue_{start | stop}() are called. These functions set MRU to 0 to stop packet flow and then to set up the remaining things. MRU variable is a member of vnic_info[] the first vnic_info is for default and the second is for ntuple. The first vnic_info is always allocated when interface is up, but the second is allocated only when ntuple is enabled. (ethtool -K eth0 ntuple <on | off>). Currently, the bnxt_queue_{start | stop}() access vnic_info[BNXT_VNIC_NTUPLE] regardless of whether ntuple is enabled or not. So kernel panic occurs. This patch make the bnxt_queue_{start | stop}() use bp->nr_vnics instead of BNXT_VNIC_NTUPLE. The fourth patch fixes a warning due to checksum state. The bnxt_rx_pkt() checks whether skb->ip_summed is not CHECKSUM_NONE before updating ip_summed. if ip_summed is not CHECKSUM_NONE, it WARNS about it. However, the bnxt_xdp_build_skb() is called in XDP-MB-PASS path and it updates ip_summed earlier than bnxt_rx_pkt(). So, in the XDP-MB-PASS path, the bnxt_rx_pkt() always warns about checksum. Updating ip_summed at the bnxt_xdp_build_skb() is unnecessary and duplicate, so it is removed. The fifth patch makes net_devmem_unbind_dmabuf() ignore -ENETDOWN. When devmem socket is closed, net_devmem_unbind_dmabuf() is called to unbind/release resources. If interface is down, the driver returns -ENETDOWN. The -ENETDOWN return value is not an actual error, because the interface will release resources when the interface is down. So, net_devmem_unbind_dmabuf() needs to ignore -ENETDOWN. The last patch adds XDP testcases to tools/testing/selftests/drivers/net/ping.py. v2: - Do not use num_frags in the bnxt_xdp_build_skb(). (1/6) - Add Review tags from Somnath and Jakub. (2/6) - Add new patch for fixing checksum warning. (4/6) - Add new patch for fixing warning in net_devmem_unbind_dmabuf(). (5/6) - Add new XDP testcases to ping.py (6/6) Taehee Yoo (6): eth: bnxt: fix truesize for mb-xdp-pass case eth: bnxt: return fail if interface is down in bnxt_queue_mem_alloc() eth: bnxt: do not use BNXT_VNIC_NTUPLE unconditionally in queue restart logic eth: bnxt: do not update checksum in bnxt_xdp_build_skb() net: devmem: do not WARN conditionally after netdev_rx_queue_restart() selftests: drv-net: add xdp cases for ping.py drivers/net/ethernet/broadcom/bnxt/bnxt.c | 36 ++-- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 18 +- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h | 6 +- net/core/devmem.c | 4 +- tools/testing/selftests/drivers/net/ping.py | 200 ++++++++++++++++-- .../testing/selftests/net/lib/xdp_dummy.bpf.c | 6 + 6 files changed, 221 insertions(+), 49 deletions(-) -- 2.34.1

7 months, 3 weeks

3
9
0 0

[PATCHv4 net 0/3] bond: fix xfrm offload issues

by Hangbin Liu

The first patch fixes the incorrect locks using in bond driver. The second patch fixes the xfrm offload feature during setup active-backup mode. The third patch add a ipsec offload testing. v4: hold xs->lock for bond_ipsec_{del, add}_sa_all (Cosmin Ratiu) use the defer helpers in lib.sh for selftest (Petr Machata) v3: move the ipsec deletion to bond_ipsec_free_sa (Cosmin Ratiu) v2: do not turn carrier on if bond change link failed (Nikolay Aleksandrov) move the mutex lock to a work queue (Cosmin Ratiu) Hangbin Liu (3): bonding: move IPsec deletion to bond_ipsec_free_sa bonding: fix xfrm offload feature setup on active-backup mode selftests: bonding: add ipsec offload test drivers/net/bonding/bond_main.c | 55 +++++-- drivers/net/bonding/bond_netlink.c | 16 +- include/net/bonding.h | 1 + .../selftests/drivers/net/bonding/Makefile | 3 +- .../drivers/net/bonding/bond_ipsec_offload.sh | 154 ++++++++++++++++++ .../selftests/drivers/net/bonding/config | 4 + 6 files changed, 208 insertions(+), 25 deletions(-) create mode 100755 tools/testing/selftests/drivers/net/bonding/bond_ipsec_offload.sh -- 2.46.0

7 months, 3 weeks

4
13
0 0

[PATCH net-next v2 1/2] selftests: drv-net: add path helper for net/lib

by Jakub Kicinski

Looks like a lot of users of recently added env.rpath() actually want to access stuff under net/lib. Add another helper. Signed-off-by: Jakub Kicinski <kuba(a)kernel.org> --- tools/testing/selftests/drivers/net/hds.py | 2 +- tools/testing/selftests/drivers/net/hw/csum.py | 2 +- tools/testing/selftests/drivers/net/lib/py/env.py | 7 +++++++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py index 7cc74faed743..def44c10349a 100755 --- a/tools/testing/selftests/drivers/net/hds.py +++ b/tools/testing/selftests/drivers/net/hds.py @@ -20,7 +20,7 @@ from lib.py import defer, ethtool, ip def _xdp_onoff(cfg): - prog = cfg.rpath("../../net/lib/xdp_dummy.bpf.o") + prog = cfg.lpath("xdp_dummy.bpf.o") ip("link set dev %s xdp obj %s sec xdp" % (cfg.ifname, prog)) ip("link set dev %s xdp off" % cfg.ifname) diff --git a/tools/testing/selftests/drivers/net/hw/csum.py b/tools/testing/selftests/drivers/net/hw/csum.py index 701aca1361e0..49ec98aef579 100755 --- a/tools/testing/selftests/drivers/net/hw/csum.py +++ b/tools/testing/selftests/drivers/net/hw/csum.py @@ -88,7 +88,7 @@ from lib.py import bkg, cmd, wait_port_listen with NetDrvEpEnv(__file__, nsim_test=False) as cfg: check_nic_features(cfg) - cfg.bin_local = cfg.rpath("../../../net/lib/csum") + cfg.bin_local = cfg.lpath("csum") cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) cases = [] diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index fd4d674e6c72..2a1f8bd0ec19 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -30,6 +30,13 @@ from .remote import Remote src_dir = Path(self.src_path).parent.resolve() return (src_dir / path).as_posix() + def lpath(self, path): + """ + Similar to rpath, but for files in net/lib TARGET. + """ + lib_dir = (Path(__file__).parent / "../../../../net/lib").resolve() + return (lib_dir / path).as_posix() + def _load_env_file(self): env = os.environ.copy() -- 2.48.1

7 months, 3 weeks

3
7
0 0

[PATCH net-next] selftests: openvswitch: don't hardcode the drop reason subsys

by Jakub Kicinski

WiFi removed one of their subsys entries from drop reasons, in commit 286e69677065 ("wifi: mac80211: Drop cooked monitor support") SKB_DROP_REASON_SUBSYS_OPENVSWITCH is now 2 not 3. The drop reasons are not uAPI, read the correct value from debug info. We need to enable vmlinux BTF, otherwise pahole needs a few GB of memory to decode the enum name. Signed-off-by: Jakub Kicinski <kuba(a)kernel.org> --- CC: shuah(a)kernel.org CC: pshelar(a)ovn.org CC: aconole(a)redhat.com CC: amorenoz(a)redhat.com CC: linux-kselftest(a)vger.kernel.org CC: dev(a)openvswitch.org --- tools/testing/selftests/net/config | 2 ++ .../testing/selftests/net/openvswitch/openvswitch.sh | 11 ++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 5b9baf708950..3365bcc35304 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -18,6 +18,8 @@ CONFIG_DUMMY=y CONFIG_BRIDGE_VLAN_FILTERING=y CONFIG_BRIDGE=y CONFIG_CRYPTO_CHACHA20POLY1305=m +CONFIG_DEBUG_INFO_BTF=y +CONFIG_DEBUG_INFO_BTF_MODULES=n CONFIG_VLAN_8021Q=y CONFIG_GENEVE=m CONFIG_IFB=y diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index 960e1ab4dd04..3c8d3455d8e7 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -330,6 +330,11 @@ test_psample() { # - drop packets and verify the right drop reason is reported test_drop_reason() { which perf >/dev/null 2>&1 || return $ksft_skip + which pahole >/dev/null 2>&1 || return $ksft_skip + + ovs_drop_subsys=$(pahole -C skb_drop_reason_subsys | + awk '/OPENVSWITCH/ { print $3; }' | + tr -d ,) sbx_add "test_drop_reason" || return $? @@ -373,7 +378,7 @@ test_drop_reason() { "in_port(2),eth(),eth_type(0x0800),ipv4(src=172.31.110.20,proto=1),icmp()" 'drop' ovs_drop_record_and_run "test_drop_reason" ip netns exec client ping -c 2 172.31.110.20 - ovs_drop_reason_count 0x30001 # OVS_DROP_FLOW_ACTION + ovs_drop_reason_count 0x${ovs_drop_subsys}0001 # OVS_DROP_FLOW_ACTION if [[ "$?" -ne "2" ]]; then info "Did not detect expected drops: $?" return 1 @@ -390,7 +395,7 @@ test_drop_reason() { ovs_drop_record_and_run \ "test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 6000 - ovs_drop_reason_count 0x30004 # OVS_DROP_EXPLICIT_ACTION_ERROR + ovs_drop_reason_count 0x${ovs_drop_subsys}0004 # OVS_DROP_EXPLICIT_ACTION_ERROR if [[ "$?" -ne "1" ]]; then info "Did not detect expected explicit error drops: $?" return 1 @@ -398,7 +403,7 @@ test_drop_reason() { ovs_drop_record_and_run \ "test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 7000 - ovs_drop_reason_count 0x30003 # OVS_DROP_EXPLICIT_ACTION + ovs_drop_reason_count 0x${ovs_drop_subsys}0003 # OVS_DROP_EXPLICIT_ACTION if [[ "$?" -ne "1" ]]; then info "Did not detect expected explicit drops: $?" return 1 -- 2.48.1

7 months, 3 weeks

4
3
0 0

[PATCH net-next 1/2] selftests: net: fix error message in bpf_offload

by Jakub Kicinski

We hit a following exception on timeout, nmaps is never set: Test bpftool bound info reporting (own ns)... Traceback (most recent call last): File "/home/virtme/testing-1/tools/testing/selftests/net/./bpf_offload.py", line 1128, in <module> check_dev_info(False, "") File "/home/virtme/testing-1/tools/testing/selftests/net/./bpf_offload.py", line 583, in check_dev_info maps = bpftool_map_list_wait(expected=2, ns=ns) File "/home/virtme/testing-1/tools/testing/selftests/net/./bpf_offload.py", line 215, in bpftool_map_list_wait raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps)) NameError: name 'nmaps' is not defined Signed-off-by: Jakub Kicinski <kuba(a)kernel.org> --- CC: shuah(a)kernel.org CC: linux-kselftest(a)vger.kernel.org CC: bpf(a)vger.kernel.org --- tools/testing/selftests/net/bpf_offload.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/bpf_offload.py b/tools/testing/selftests/net/bpf_offload.py index fd0d959914e4..4a9be8c49561 100755 --- a/tools/testing/selftests/net/bpf_offload.py +++ b/tools/testing/selftests/net/bpf_offload.py @@ -207,9 +207,11 @@ netns = [] # net namespaces to be removed raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs)) def bpftool_map_list_wait(expected=0, n_retry=20, ns=""): + nmaps = None for i in range(n_retry): maps = bpftool_map_list(ns=ns) - if len(maps) == expected: + nmaps = len(maps) + if nmaps == expected: return maps time.sleep(0.05) raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps)) -- 2.48.1

7 months, 3 weeks

3
4
0 0

[PATCH net-next v6 0/8] Device memory TCP TX

by Mina Almasry

v6: https://lore.kernel.org/netdev/20250222191517.743530-1-almasrymina@google.c… === v6 has no major changes. Addressed a few issues from Paolo and David, and collected Acks from Stan. Thank you everyone for the review! Changes: - retain behavior to process MSG_FASTOPEN even if the provided cmsg is invalid (Paolo). - Rework the freeing of tx_vec slightly (it now has its own err label). (Paolo). - Squash the commit that makes dmabuf unbinding scheduled work into the same one which implements the TX path so we don't run into future errors on bisecting (Paolo). - Fix/add comments to explain how dmabuf binding refcounting works (David). v5: https://lore.kernel.org/netdev/20250220020914.895431-1-almasrymina@google.c… === v5 has no major changes; it clears up the relatively minor issues pointed out to in v4, and rebases the series on top of net-next to resolve the conflict with a patch that raced to the tree. It also collects the review tags from v4. Changes: - Rebase to net-next - Fix issues in selftest (Stan). - Address comments in the devmem and netmem driver docs (Stan and Bagas) - Fix zerocopy_fill_skb_from_devmem return error code (Stan). v4: https://lore.kernel.org/netdev/20250203223916.1064540-1-almasrymina@google.… === v4 mainly addresses the critical driver support issue surfaced in v3 by Paolo and Stan. Drivers aiming to support netmem_tx should make sure not to pass the netmem dma-addrs to the dma-mapping APIs, as these dma-addrs may come from dma-bufs. Additionally other feedback from v3 is addressed. Major changes: - Add helpers to handle netmem dma-addrs. Add GVE support for netmem_tx. - Fix binding->tx_vec not being freed on error paths during the tx binding. - Add a minimal devmem_tx test to devmem.py. - Clean up everything obsolete from the cover letter (Paolo). v3: https://patchwork.kernel.org/project/netdevbpf/list/?series=929401&state=* === Address minor comments from RFCv2 and fix a few build warnings and ynl-regen issues. No major changes. RFC v2: https://patchwork.kernel.org/project/netdevbpf/list/?series=920056&state=* ======= RFC v2 addresses much of the feedback from RFC v1. I plan on sending something close to this as net-next reopens, sending it slightly early to get feedback if any. Major changes: -------------- - much improved UAPI as suggested by Stan. We now interpret the iov_base of the passed in iov from userspace as the offset into the dmabuf to send from. This removes the need to set iov.iov_base = NULL which may be confusing to users, and enables us to send multiple iovs in the same sendmsg() call. ncdevmem and the docs show a sample use of that. - Removed the duplicate dmabuf iov_iter in binding->iov_iter. I think this is good improvment as it was confusing to keep track of 2 iterators for the same sendmsg, and mistracking both iterators caused a couple of bugs reported in the last iteration that are now resolved with this streamlining. - Improved test coverage in ncdevmem. Now multiple sendmsg() are tested, and sending multiple iovs in the same sendmsg() is tested. - Fixed issue where dmabuf unmapping was happening in invalid context (Stan). ==================================================================== The TX path had been dropped from the Device Memory TCP patch series post RFCv1 [1], to make that series slightly easier to review. This series rebases the implementation of the TX path on top of the net_iov/netmem framework agreed upon and merged. The motivation for the feature is thoroughly described in the docs & cover letter of the original proposal, so I don't repeat the lengthy descriptions here, but they are available in [1]. Full outline on usage of the TX path is detailed in the documentation included with this series. Test example is available via the kselftest included in the series as well. The series is relatively small, as the TX path for this feature largely piggybacks on the existing MSG_ZEROCOPY implementation. Patch Overview: --------------- 1. Documentation & tests to give high level overview of the feature being added. 1. Add netmem refcounting needed for the TX path. 2. Devmem TX netlink API. 3. Devmem TX net stack implementation. 4. Make dma-buf unbinding scheduled work to handle TX cases where it gets freed from contexts where we can't sleep. 5. Add devmem TX documentation. 6. Add scaffolding enabling driver support for netmem_tx. Add helpers, driver feature flag, and docs to enable drivers to declare netmem_tx support. 7. Guard netmem_tx against being enabled against drivers that don't support it. 8. Add devmem_tx selftests. Add TX path to ncdevmem and add a test to devmem.py. Testing: -------- Testing is very similar to devmem TCP RX path. The ncdevmem test used for the RX path is now augemented with client functionality to test TX path. * Test Setup: Kernel: net-next with this RFC and memory provider API cherry-picked locally. Hardware: Google Cloud A3 VMs. NIC: GVE with header split & RSS & flow steering support. Performance results are not included with this version, unfortunately. I'm having issues running the dma-buf exporter driver against the upstream kernel on my test setup. The issues are specific to that dma-buf exporter and do not affect this patch series. I plan to follow up this series with perf fixes if the tests point to issues once they're up and running. Special thanks to Stan who took a stab at rebasing the TX implementation on top of the netmem/net_iov framework merged. Parts of his proposal [2] that are reused as-is are forked off into their own patches to give full credit. [1] https://lore.kernel.org/netdev/20240909054318.1809580-1-almasrymina@google.… [2] https://lore.kernel.org/netdev/20240913150913.1280238-2-sdf@fomichev.me/T/#… Cc: sdf(a)fomichev.me Cc: asml.silence(a)gmail.com Cc: dw(a)davidwei.uk Cc: Jamal Hadi Salim <jhs(a)mojatatu.com> Cc: Victor Nogueira <victor(a)mojatatu.com> Cc: Pedro Tammela <pctammela(a)mojatatu.com> Cc: Samiullah Khawaja <skhawaja(a)google.com> Mina Almasry (7): net: add get_netmem/put_netmem support net: devmem: Implement TX path net: add devmem TCP TX documentation net: enable driver support for netmem TX gve: add netmem TX support to GVE DQO-RDA mode net: check for driver support in netmem TX selftests: ncdevmem: Implement devmem TCP TX Stanislav Fomichev (1): net: devmem: TCP tx netlink api Documentation/netlink/specs/netdev.yaml | 12 + Documentation/networking/devmem.rst | 150 ++++++++- .../networking/net_cachelines/net_device.rst | 1 + Documentation/networking/netdev-features.rst | 5 + Documentation/networking/netmem.rst | 23 +- drivers/net/ethernet/google/gve/gve_main.c | 4 + drivers/net/ethernet/google/gve/gve_tx_dqo.c | 8 +- include/linux/netdevice.h | 2 + include/linux/skbuff.h | 17 +- include/linux/skbuff_ref.h | 4 +- include/net/netmem.h | 23 ++ include/net/sock.h | 1 + include/uapi/linux/netdev.h | 1 + net/core/datagram.c | 48 ++- net/core/dev.c | 3 + net/core/devmem.c | 115 ++++++- net/core/devmem.h | 77 ++++- net/core/netdev-genl-gen.c | 13 + net/core/netdev-genl-gen.h | 1 + net/core/netdev-genl.c | 73 ++++- net/core/skbuff.c | 48 ++- net/core/sock.c | 6 + net/ipv4/ip_output.c | 3 +- net/ipv4/tcp.c | 50 ++- net/ipv6/ip6_output.c | 3 +- net/vmw_vsock/virtio_transport_common.c | 5 +- tools/include/uapi/linux/netdev.h | 1 + .../selftests/drivers/net/hw/devmem.py | 26 +- .../selftests/drivers/net/hw/ncdevmem.c | 300 +++++++++++++++++- 29 files changed, 950 insertions(+), 73 deletions(-) base-commit: 80c4a0015ce249cf0917a04dbb3cc652a6811079 -- 2.48.1.658.g4767266eb4-goog

7 months, 3 weeks

6
24
0 0

[PATCH] selftests/bpf: Move test_lwt_ip_encap to test_progs

by Bastien Curutchet (eBPF Foundation)

test_lwt_ip_encap.sh isn't used by the BPF CI. Add a new file in the test_progs framework to migrate the tests done by test_lwt_ip_encap.sh. It uses the same network topology and the same BPF programs located in progs/test_lwt_ip_encap.c. Rework the GSO part to avoid using nc and dd. Remove test_lwt_ip_encap.sh and its Makefile entry. Signed-off-by: Bastien Curutchet (eBPF Foundation) <bastien.curutchet(a)bootlin.com> --- tools/testing/selftests/bpf/Makefile | 3 +- .../selftests/bpf/prog_tests/lwt_ip_encap.c | 540 +++++++++++++++++++++ tools/testing/selftests/bpf/test_lwt_ip_encap.sh | 476 ------------------ 3 files changed, 541 insertions(+), 478 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index e6a02d5b87d123cef7e6b41bfbc96d34083f38e1..df4814b5200a5a0e732b19ab3a5975957fb7cbc9 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -95,7 +95,7 @@ TEST_GEN_PROGS += test_progs-cpuv4 TEST_INST_SUBDIRS += cpuv4 endif -TEST_GEN_FILES = test_lwt_ip_encap.bpf.o test_tc_edt.bpf.o +TEST_GEN_FILES = test_tc_edt.bpf.o TEST_FILES = xsk_prereqs.sh $(wildcard progs/btf_dump_test_case_*.c) # Order correspond to 'make run_tests' order @@ -104,7 +104,6 @@ TEST_PROGS := test_kmod.sh \ test_lirc_mode2.sh \ test_xdp_vlan_mode_generic.sh \ test_xdp_vlan_mode_native.sh \ - test_lwt_ip_encap.sh \ test_tc_tunnel.sh \ test_tc_edt.sh \ test_xdping.sh \ diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_ip_encap.c b/tools/testing/selftests/bpf/prog_tests/lwt_ip_encap.c new file mode 100644 index 0000000000000000000000000000000000000000..61fcded43b46cab7775237c6d85de07b5df7d87e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/lwt_ip_encap.c @@ -0,0 +1,540 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <netinet/in.h> + +#include "network_helpers.h" +#include "test_progs.h" + +#define BPF_FILE "test_lwt_ip_encap.bpf.o" + +#define NETNS_NAME_SIZE 32 +#define NETNS_BASE "ns-lwt-ip-encap" + +#define IP4_ADDR_1 "172.16.1.100" +#define IP4_ADDR_2 "172.16.2.100" +#define IP4_ADDR_3 "172.16.3.100" +#define IP4_ADDR_4 "172.16.4.100" +#define IP4_ADDR_5 "172.16.5.100" +#define IP4_ADDR_6 "172.16.6.100" +#define IP4_ADDR_7 "172.16.7.100" +#define IP4_ADDR_8 "172.16.8.100" +#define IP4_ADDR_GRE "172.16.16.100" + +#define IP4_ADDR_SRC IP4_ADDR_1 +#define IP4_ADDR_DST IP4_ADDR_4 + +#define IP6_ADDR_1 "fb01::1" +#define IP6_ADDR_2 "fb02::1" +#define IP6_ADDR_3 "fb03::1" +#define IP6_ADDR_4 "fb04::1" +#define IP6_ADDR_5 "fb05::1" +#define IP6_ADDR_6 "fb06::1" +#define IP6_ADDR_7 "fb07::1" +#define IP6_ADDR_8 "fb08::1" +#define IP6_ADDR_GRE "fb10::1" + +#define IP6_ADDR_SRC IP6_ADDR_1 +#define IP6_ADDR_DST IP6_ADDR_4 + +/* Setup/topology: + * + * NS1 NS2 NS3 + * veth1 <---> veth2 veth3 <---> veth4 (the top route) + * veth5 <---> veth6 veth7 <---> veth8 (the bottom route) + * + * Each vethN gets IP[4|6]_ADDR_N address. + * + * IP*_ADDR_SRC = IP*_ADDR_1 + * IP*_ADDR_DST = IP*_ADDR_4 + * + * All tests test pings from IP*_ADDR__SRC to IP*_ADDR_DST. + * + * By default, routes are configured to allow packets to go + * IP*_ADDR_1 <=> IP*_ADDR_2 <=> IP*_ADDR_3 <=> IP*_ADDR_4 (the top route). + * + * A GRE device is installed in NS3 with IP*_ADDR_GRE, and + * NS1/NS2 are configured to route packets to IP*_ADDR_GRE via IP*_ADDR_8 + * (the bottom route). + * + * Tests: + * + * 1. Routes NS2->IP*_ADDR_DST are brought down, so the only way a ping + * from IP*_ADDR_SRC to IP*_ADDR_DST can work is via IP*_ADDR_GRE. + * + * 2a. In an egress test, a bpf LWT_XMIT program is installed on veth1 + * that encaps the packets with an IP/GRE header to route to IP*_ADDR_GRE. + * + * ping: SRC->[encap at veth1:egress]->GRE:decap->DST + * ping replies go DST->SRC directly + * + * 2b. In an ingress test, a bpf LWT_IN program is installed on veth2 + * that encaps the packets with an IP/GRE header to route to IP*_ADDR_GRE. + * + * ping: SRC->[encap at veth2:ingress]->GRE:decap->DST + * ping replies go DST->SRC directly + */ + +static int create_ns(char *name, size_t name_sz) +{ + if (!name) + goto fail; + + if (!ASSERT_OK(append_tid(name, name_sz), "append TID")) + goto fail; + + SYS(fail, "ip netns add %s", name); + + /* rp_filter gets confused by what these tests are doing, so disable it */ + SYS(fail, "ip netns exec %s sysctl -wq net.ipv4.conf.all.rp_filter=0", name); + SYS(fail, "ip netns exec %s sysctl -wq net.ipv4.conf.default.rp_filter=0", name); + /* Disable IPv6 DAD because it sometimes takes too long and fails tests */ + SYS(fail, "ip netns exec %s sysctl -wq net.ipv6.conf.all.accept_dad=0", name); + SYS(fail, "ip netns exec %s sysctl -wq net.ipv6.conf.default.accept_dad=0", name); + + return 0; +fail: + return -1; +} + +static int set_top_addr(const char *ns1, const char *ns2, const char *ns3) +{ + SYS(fail, "ip -n %s a add %s/24 dev veth1", ns1, IP4_ADDR_1); + SYS(fail, "ip -n %s a add %s/24 dev veth2", ns2, IP4_ADDR_2); + SYS(fail, "ip -n %s a add %s/24 dev veth3", ns2, IP4_ADDR_3); + SYS(fail, "ip -n %s a add %s/24 dev veth4", ns3, IP4_ADDR_4); + SYS(fail, "ip -n %s -6 a add %s/128 dev veth1", ns1, IP6_ADDR_1); + SYS(fail, "ip -n %s -6 a add %s/128 dev veth2", ns2, IP6_ADDR_2); + SYS(fail, "ip -n %s -6 a add %s/128 dev veth3", ns2, IP6_ADDR_3); + SYS(fail, "ip -n %s -6 a add %s/128 dev veth4", ns3, IP6_ADDR_4); + + SYS(fail, "ip -n %s link set dev veth1 up", ns1); + SYS(fail, "ip -n %s link set dev veth2 up", ns2); + SYS(fail, "ip -n %s link set dev veth3 up", ns2); + SYS(fail, "ip -n %s link set dev veth4 up", ns3); + + return 0; +fail: + return 1; +} + +static int set_bottom_addr(const char *ns1, const char *ns2, const char *ns3) +{ + SYS(fail, "ip -n %s a add %s/24 dev veth5", ns1, IP4_ADDR_5); + SYS(fail, "ip -n %s a add %s/24 dev veth6", ns2, IP4_ADDR_6); + SYS(fail, "ip -n %s a add %s/24 dev veth7", ns2, IP4_ADDR_7); + SYS(fail, "ip -n %s a add %s/24 dev veth8", ns3, IP4_ADDR_8); + SYS(fail, "ip -n %s -6 a add %s/128 dev veth5", ns1, IP6_ADDR_5); + SYS(fail, "ip -n %s -6 a add %s/128 dev veth6", ns2, IP6_ADDR_6); + SYS(fail, "ip -n %s -6 a add %s/128 dev veth7", ns2, IP6_ADDR_7); + SYS(fail, "ip -n %s -6 a add %s/128 dev veth8", ns3, IP6_ADDR_8); + + SYS(fail, "ip -n %s link set dev veth5 up", ns1); + SYS(fail, "ip -n %s link set dev veth6 up", ns2); + SYS(fail, "ip -n %s link set dev veth7 up", ns2); + SYS(fail, "ip -n %s link set dev veth8 up", ns3); + + return 0; +fail: + return 1; +} + +static int configure_vrf(const char *ns1, const char *ns2) +{ + if (!ns1 || !ns2) + goto fail; + + SYS(fail, "ip -n %s link add red type vrf table 1001", ns1); + SYS(fail, "ip -n %s link set red up", ns1); + SYS(fail, "ip -n %s route add table 1001 unreachable default metric 8192", ns1); + SYS(fail, "ip -n %s -6 route add table 1001 unreachable default metric 8192", ns1); + SYS(fail, "ip -n %s link set veth1 vrf red", ns1); + SYS(fail, "ip -n %s link set veth5 vrf red", ns1); + + SYS(fail, "ip -n %s link add red type vrf table 1001", ns2); + SYS(fail, "ip -n %s link set red up", ns2); + SYS(fail, "ip -n %s route add table 1001 unreachable default metric 8192", ns2); + SYS(fail, "ip -n %s -6 route add table 1001 unreachable default metric 8192", ns2); + SYS(fail, "ip -n %s link set veth2 vrf red", ns2); + SYS(fail, "ip -n %s link set veth3 vrf red", ns2); + SYS(fail, "ip -n %s link set veth6 vrf red", ns2); + SYS(fail, "ip -n %s link set veth7 vrf red", ns2); + + return 0; +fail: + return -1; +} + +static int configure_ns1(const char *ns1, const char *vrf) +{ + struct nstoken *nstoken = NULL; + + if (!ns1 || !vrf) + goto fail; + + nstoken = open_netns(ns1); + if (!ASSERT_OK_PTR(nstoken, "open ns1")) + goto fail; + + /* Top route */ + SYS(fail, "ip route add %s/32 dev veth1 %s", IP4_ADDR_2, vrf); + SYS(fail, "ip route add default dev veth1 via %s %s", IP4_ADDR_2, vrf); + SYS(fail, "ip -6 route add %s/128 dev veth1 %s", IP6_ADDR_2, vrf); + SYS(fail, "ip -6 route add default dev veth1 via %s %s", IP6_ADDR_2, vrf); + /* Bottom route */ + SYS(fail, "ip route add %s/32 dev veth5 %s", IP4_ADDR_6, vrf); + SYS(fail, "ip route add %s/32 dev veth5 via %s %s", IP4_ADDR_7, IP4_ADDR_6, vrf); + SYS(fail, "ip route add %s/32 dev veth5 via %s %s", IP4_ADDR_8, IP4_ADDR_6, vrf); + SYS(fail, "ip -6 route add %s/128 dev veth5 %s", IP6_ADDR_6, vrf); + SYS(fail, "ip -6 route add %s/128 dev veth5 via %s %s", IP6_ADDR_7, IP6_ADDR_6, vrf); + SYS(fail, "ip -6 route add %s/128 dev veth5 via %s %s", IP6_ADDR_8, IP6_ADDR_6, vrf); + + close_netns(nstoken); + return 0; +fail: + close_netns(nstoken); + return -1; +} + +static int configure_ns2(const char *ns2, const char *vrf) +{ + struct nstoken *nstoken = NULL; + + if (!ns2 || !vrf) + goto fail; + + nstoken = open_netns(ns2); + if (!ASSERT_OK_PTR(nstoken, "open ns2")) + goto fail; + + SYS(fail, "ip netns exec %s sysctl -wq net.ipv4.ip_forward=1", ns2); + SYS(fail, "ip netns exec %s sysctl -wq net.ipv6.conf.all.forwarding=1", ns2); + + /* Top route */ + SYS(fail, "ip route add %s/32 dev veth2 %s", IP4_ADDR_1, vrf); + SYS(fail, "ip route add %s/32 dev veth3 %s", IP4_ADDR_4, vrf); + SYS(fail, "ip -6 route add %s/128 dev veth2 %s", IP6_ADDR_1, vrf); + SYS(fail, "ip -6 route add %s/128 dev veth3 %s", IP6_ADDR_4, vrf); + /* Bottom route */ + SYS(fail, "ip route add %s/32 dev veth6 %s", IP4_ADDR_5, vrf); + SYS(fail, "ip route add %s/32 dev veth7 %s", IP4_ADDR_8, vrf); + SYS(fail, "ip -6 route add %s/128 dev veth6 %s", IP6_ADDR_5, vrf); + SYS(fail, "ip -6 route add %s/128 dev veth7 %s", IP6_ADDR_8, vrf); + + close_netns(nstoken); + return 0; +fail: + close_netns(nstoken); + return -1; +} + +static int configure_ns3(const char *ns3) +{ + struct nstoken *nstoken = NULL; + + if (!ns3) + goto fail; + + nstoken = open_netns(ns3); + if (!ASSERT_OK_PTR(nstoken, "open ns3")) + goto fail; + + /* Top route */ + SYS(fail, "ip route add %s/32 dev veth4", IP4_ADDR_3); + SYS(fail, "ip route add %s/32 dev veth4 via %s", IP4_ADDR_1, IP4_ADDR_3); + SYS(fail, "ip route add %s/32 dev veth4 via %s", IP4_ADDR_2, IP4_ADDR_3); + SYS(fail, "ip -6 route add %s/128 dev veth4", IP6_ADDR_3); + SYS(fail, "ip -6 route add %s/128 dev veth4 via %s", IP6_ADDR_1, IP6_ADDR_3); + SYS(fail, "ip -6 route add %s/128 dev veth4 via %s", IP6_ADDR_2, IP6_ADDR_3); + /* Bottom route */ + SYS(fail, "ip route add %s/32 dev veth8", IP4_ADDR_7); + SYS(fail, "ip route add %s/32 dev veth8 via %s", IP4_ADDR_5, IP4_ADDR_7); + SYS(fail, "ip route add %s/32 dev veth8 via %s", IP4_ADDR_6, IP4_ADDR_7); + SYS(fail, "ip -6 route add %s/128 dev veth8", IP6_ADDR_7); + SYS(fail, "ip -6 route add %s/128 dev veth8 via %s", IP6_ADDR_5, IP6_ADDR_7); + SYS(fail, "ip -6 route add %s/128 dev veth8 via %s", IP6_ADDR_6, IP6_ADDR_7); + + /* Configure IPv4 GRE device */ + SYS(fail, "ip tunnel add gre_dev mode gre remote %s local %s ttl 255", + IP4_ADDR_1, IP4_ADDR_GRE); + SYS(fail, "ip link set gre_dev up"); + SYS(fail, "ip a add %s dev gre_dev", IP4_ADDR_GRE); + + /* Configure IPv6 GRE device */ + SYS(fail, "ip tunnel add gre6_dev mode ip6gre remote %s local %s ttl 255", + IP6_ADDR_1, IP6_ADDR_GRE); + SYS(fail, "ip link set gre6_dev up"); + SYS(fail, "ip a add %s dev gre6_dev", IP6_ADDR_GRE); + + close_netns(nstoken); + return 0; +fail: + close_netns(nstoken); + return -1; +} + +static int setup_network(char *ns1, char *ns2, char *ns3, const char *vrf) +{ + if (!ns1 || !ns2 || !ns3 || !vrf) + goto fail; + + SYS(fail, "ip -n %s link add veth1 type veth peer name veth2 netns %s", ns1, ns2); + SYS(fail, "ip -n %s link add veth3 type veth peer name veth4 netns %s", ns2, ns3); + SYS(fail, "ip -n %s link add veth5 type veth peer name veth6 netns %s", ns1, ns2); + SYS(fail, "ip -n %s link add veth7 type veth peer name veth8 netns %s", ns2, ns3); + + if (vrf[0]) { + if (!ASSERT_OK(configure_vrf(ns1, ns2), "configure vrf")) + goto fail; + } + if (!ASSERT_OK(set_top_addr(ns1, ns2, ns3), "set top addresses")) + goto fail; + + if (!ASSERT_OK(set_bottom_addr(ns1, ns2, ns3), "set bottom addresses")) + goto fail; + + if (!ASSERT_OK(configure_ns1(ns1, vrf), "configure ns1 routes")) + goto fail; + + if (!ASSERT_OK(configure_ns2(ns2, vrf), "configure ns2 routes")) + goto fail; + + if (!ASSERT_OK(configure_ns3(ns3), "configure ns3 routes")) + goto fail; + + /* Link bottom route to the GRE tunnels */ + SYS(fail, "ip -n %s route add %s/32 dev veth5 via %s %s", + ns1, IP4_ADDR_GRE, IP4_ADDR_6, vrf); + SYS(fail, "ip -n %s route add %s/32 dev veth7 via %s %s", + ns2, IP4_ADDR_GRE, IP4_ADDR_8, vrf); + SYS(fail, "ip -n %s -6 route add %s/128 dev veth5 via %s %s", + ns1, IP6_ADDR_GRE, IP6_ADDR_6, vrf); + SYS(fail, "ip -n %s -6 route add %s/128 dev veth7 via %s %s", + ns2, IP6_ADDR_GRE, IP6_ADDR_8, vrf); + + return 0; +fail: + return -1; +} + +int remove_routes_to_gredev(const char *ns1, const char *ns2, const char *vrf) +{ + SYS(fail, "ip -n %s route del %s dev veth5 %s", ns1, IP4_ADDR_GRE, vrf); + SYS(fail, "ip -n %s route del %s dev veth7 %s", ns2, IP4_ADDR_GRE, vrf); + SYS(fail, "ip -n %s -6 route del %s/128 dev veth5 %s", ns1, IP6_ADDR_GRE, vrf); + SYS(fail, "ip -n %s -6 route del %s/128 dev veth7 %s", ns2, IP6_ADDR_GRE, vrf); + + return 0; +fail: + return -1; +} + +int add_unreachable_routes_to_gredev(const char *ns1, const char *ns2, const char *vrf) +{ + SYS(fail, "ip -n %s route add unreachable %s/32 %s", ns1, IP4_ADDR_GRE, vrf); + SYS(fail, "ip -n %s route add unreachable %s/32 %s", ns2, IP4_ADDR_GRE, vrf); + SYS(fail, "ip -n %s -6 route add unreachable %s/128 %s", ns1, IP6_ADDR_GRE, vrf); + SYS(fail, "ip -n %s -6 route add unreachable %s/128 %s", ns2, IP6_ADDR_GRE, vrf); + + return 0; +fail: + return -1; +} + +#define GSO_SIZE 5000 +#define GSO_TCP_PORT 9000 +/* This tests the fix from commit ea0371f78799 ("net: fix GSO in bpf_lwt_push_ip_encap") */ +static int test_gso_fix(const char *ns1, const char *ns3, int family) +{ + const char *ip_addr = family == AF_INET ? IP4_ADDR_DST : IP6_ADDR_DST; + char gso_packet[GSO_SIZE] = {}; + struct nstoken *nstoken = NULL; + int sfd, cfd, afd; + ssize_t bytes; + int ret = -1; + + if (!ns1 || !ns3) + return ret; + + nstoken = open_netns(ns3); + if (!ASSERT_OK_PTR(nstoken, "open ns3")) + return ret; + + sfd = start_server_str(family, SOCK_STREAM, ip_addr, GSO_TCP_PORT, NULL); + if (!ASSERT_OK_FD(sfd, "start server")) + goto close_netns; + + close_netns(nstoken); + + nstoken = open_netns(ns1); + if (!ASSERT_OK_PTR(nstoken, "open ns1")) + goto close_server; + + cfd = connect_to_addr_str(family, SOCK_STREAM, ip_addr, GSO_TCP_PORT, NULL); + if (!ASSERT_OK_FD(cfd, "connect to server")) + goto close_server; + + close_netns(nstoken); + nstoken = NULL; + + afd = accept(sfd, NULL, NULL); + if (!ASSERT_OK_FD(afd, "accept")) + goto close_client; + + /* Send a packet larger than MTU */ + bytes = send(cfd, gso_packet, GSO_SIZE, 0); + if (!ASSERT_EQ(bytes, GSO_SIZE, "send packet")) + goto close_accept; + + /* Verify we received all expected bytes */ + bytes = read(afd, gso_packet, GSO_SIZE); + if (!ASSERT_EQ(bytes, GSO_SIZE, "receive packet")) + goto close_accept; + + ret = 0; + +close_accept: + close(afd); +close_client: + close(cfd); +close_server: + close(sfd); +close_netns: + close_netns(nstoken); + + return ret; +} + +static int check_ping_ok(const char *ns1) +{ + SYS(fail, "ip netns exec %s ping -c 1 -W1 -I veth1 %s > /dev/null", ns1, IP4_ADDR_DST); + SYS(fail, "ip netns exec %s ping6 -c 1 -W1 -I veth1 %s > /dev/null", ns1, IP6_ADDR_DST); + return 0; +fail: + return -1; +} + +static int check_ping_fails(const char *ns1) +{ + int ret; + + ret = SYS_NOFAIL("ip netns exec %s ping -c 1 -W1 -I veth1 %s", ns1, IP4_ADDR_DST); + if (!ret) + return -1; + + ret = SYS_NOFAIL("ip netns exec %s ping6 -c 1 -W1 -I veth1 %s", ns1, IP6_ADDR_DST); + if (!ret) + return -1; + + return 0; +} + +#define EGRESS true +#define INGRESS false +#define IPV4_ENCAP true +#define IPV6_ENCAP false +static void lwt_ip_encap(bool ipv4_encap, bool egress, const char *vrf) +{ + char ns1[NETNS_NAME_SIZE] = NETNS_BASE "-1-"; + char ns2[NETNS_NAME_SIZE] = NETNS_BASE "-2-"; + char ns3[NETNS_NAME_SIZE] = NETNS_BASE "-3-"; + char *sec = ipv4_encap ? "encap_gre" : "encap_gre6"; + + if (!vrf) + return; + + if (!ASSERT_OK(create_ns(ns1, NETNS_NAME_SIZE), "create ns1")) + goto out; + if (!ASSERT_OK(create_ns(ns2, NETNS_NAME_SIZE), "create ns2")) + goto out; + if (!ASSERT_OK(create_ns(ns3, NETNS_NAME_SIZE), "create ns3")) + goto out; + + if (!ASSERT_OK(setup_network(ns1, ns2, ns3, vrf), "setup network")) + goto out; + + /* By default, pings work */ + if (!ASSERT_OK(check_ping_ok(ns1), "ping OK")) + goto out; + + /* Remove NS2->DST routes, ping fails */ + SYS(out, "ip -n %s route del %s/32 dev veth3 %s", ns2, IP4_ADDR_DST, vrf); + SYS(out, "ip -n %s -6 route del %s/128 dev veth3 %s", ns2, IP6_ADDR_DST, vrf); + if (!ASSERT_OK(check_ping_fails(ns1), "ping expected fail")) + goto out; + + /* Install replacement routes (LWT/eBPF), pings succeed */ + if (egress) { + SYS(out, "ip -n %s route add %s encap bpf xmit obj %s sec %s dev veth1 %s", + ns1, IP4_ADDR_DST, BPF_FILE, sec, vrf); + SYS(out, "ip -n %s -6 route add %s encap bpf xmit obj %s sec %s dev veth1 %s", + ns1, IP6_ADDR_DST, BPF_FILE, sec, vrf); + } else { + SYS(out, "ip -n %s route add %s encap bpf in obj %s sec %s dev veth2 %s", + ns2, IP4_ADDR_DST, BPF_FILE, sec, vrf); + SYS(out, "ip -n %s -6 route add %s encap bpf in obj %s sec %s dev veth2 %s", + ns2, IP6_ADDR_DST, BPF_FILE, sec, vrf); + } + + if (!ASSERT_OK(check_ping_ok(ns1), "ping OK")) + goto out; + + /* Skip GSO tests with VRF: VRF routing needs properly assigned + * source IP/device, which is easy to do with ping but hard with TCP. + */ + if (egress && !vrf[0]) { + if (!ASSERT_OK(test_gso_fix(ns1, ns3, AF_INET), "test GSO")) + goto out; + } + + /* Negative test: remove routes to GRE devices: ping fails */ + if (!ASSERT_OK(remove_routes_to_gredev(ns1, ns2, vrf), "remove routes to gredev")) + goto out; + if (!ASSERT_OK(check_ping_fails(ns1), "ping expected fail")) + goto out; + + /* Another negative test */ + if (!ASSERT_OK(add_unreachable_routes_to_gredev(ns1, ns2, vrf), + "add unreachable routes")) + goto out; + ASSERT_OK(check_ping_fails(ns1), "ping expected fail"); + +out: + SYS_NOFAIL("ip netns del %s", ns1); + SYS_NOFAIL("ip netns del %s", ns2); + SYS_NOFAIL("ip netns del %s", ns3); +} + +void test_lwt_ip_encap_vrf_ipv6(void) +{ + if (test__start_subtest("egress")) + lwt_ip_encap(IPV6_ENCAP, EGRESS, "vrf red"); + + if (test__start_subtest("ingress")) + lwt_ip_encap(IPV6_ENCAP, INGRESS, "vrf red"); +} + +void test_lwt_ip_encap_vrf_ipv4(void) +{ + if (test__start_subtest("egress")) + lwt_ip_encap(IPV4_ENCAP, EGRESS, "vrf red"); + + if (test__start_subtest("ingress")) + lwt_ip_encap(IPV4_ENCAP, INGRESS, "vrf red"); +} + +void test_lwt_ip_encap_ipv6(void) +{ + if (test__start_subtest("egress")) + lwt_ip_encap(IPV6_ENCAP, EGRESS, ""); + + if (test__start_subtest("ingress")) + lwt_ip_encap(IPV6_ENCAP, INGRESS, ""); +} + +void test_lwt_ip_encap_ipv4(void) +{ + if (test__start_subtest("egress")) + lwt_ip_encap(IPV4_ENCAP, EGRESS, ""); + + if (test__start_subtest("ingress")) + lwt_ip_encap(IPV4_ENCAP, INGRESS, ""); +} diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh deleted file mode 100755 index 1e565f47aca940d8dc7235d823c48537d7a708b8..0000000000000000000000000000000000000000 --- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh +++ /dev/null @@ -1,476 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# Setup/topology: -# -# NS1 NS2 NS3 -# veth1 <---> veth2 veth3 <---> veth4 (the top route) -# veth5 <---> veth6 veth7 <---> veth8 (the bottom route) -# -# each vethN gets IPv[4|6]_N address -# -# IPv*_SRC = IPv*_1 -# IPv*_DST = IPv*_4 -# -# all tests test pings from IPv*_SRC to IPv*_DST -# -# by default, routes are configured to allow packets to go -# IP*_1 <=> IP*_2 <=> IP*_3 <=> IP*_4 (the top route) -# -# a GRE device is installed in NS3 with IPv*_GRE, and -# NS1/NS2 are configured to route packets to IPv*_GRE via IP*_8 -# (the bottom route) -# -# Tests: -# -# 1. routes NS2->IPv*_DST are brought down, so the only way a ping -# from IP*_SRC to IP*_DST can work is via IPv*_GRE -# -# 2a. in an egress test, a bpf LWT_XMIT program is installed on veth1 -# that encaps the packets with an IP/GRE header to route to IPv*_GRE -# -# ping: SRC->[encap at veth1:egress]->GRE:decap->DST -# ping replies go DST->SRC directly -# -# 2b. in an ingress test, a bpf LWT_IN program is installed on veth2 -# that encaps the packets with an IP/GRE header to route to IPv*_GRE -# -# ping: SRC->[encap at veth2:ingress]->GRE:decap->DST -# ping replies go DST->SRC directly - -BPF_FILE="test_lwt_ip_encap.bpf.o" -if [[ $EUID -ne 0 ]]; then - echo "This script must be run as root" - echo "FAIL" - exit 1 -fi - -readonly NS1="ns1-$(mktemp -u XXXXXX)" -readonly NS2="ns2-$(mktemp -u XXXXXX)" -readonly NS3="ns3-$(mktemp -u XXXXXX)" - -readonly IPv4_1="172.16.1.100" -readonly IPv4_2="172.16.2.100" -readonly IPv4_3="172.16.3.100" -readonly IPv4_4="172.16.4.100" -readonly IPv4_5="172.16.5.100" -readonly IPv4_6="172.16.6.100" -readonly IPv4_7="172.16.7.100" -readonly IPv4_8="172.16.8.100" -readonly IPv4_GRE="172.16.16.100" - -readonly IPv4_SRC=$IPv4_1 -readonly IPv4_DST=$IPv4_4 - -readonly IPv6_1="fb01::1" -readonly IPv6_2="fb02::1" -readonly IPv6_3="fb03::1" -readonly IPv6_4="fb04::1" -readonly IPv6_5="fb05::1" -readonly IPv6_6="fb06::1" -readonly IPv6_7="fb07::1" -readonly IPv6_8="fb08::1" -readonly IPv6_GRE="fb10::1" - -readonly IPv6_SRC=$IPv6_1 -readonly IPv6_DST=$IPv6_4 - -TEST_STATUS=0 -TESTS_SUCCEEDED=0 -TESTS_FAILED=0 - -TMPFILE="" - -process_test_results() -{ - if [[ "${TEST_STATUS}" -eq 0 ]] ; then - echo "PASS" - TESTS_SUCCEEDED=$((TESTS_SUCCEEDED+1)) - else - echo "FAIL" - TESTS_FAILED=$((TESTS_FAILED+1)) - fi -} - -print_test_summary_and_exit() -{ - echo "passed tests: ${TESTS_SUCCEEDED}" - echo "failed tests: ${TESTS_FAILED}" - if [ "${TESTS_FAILED}" -eq "0" ] ; then - exit 0 - else - exit 1 - fi -} - -setup() -{ - set -e # exit on error - TEST_STATUS=0 - - # create devices and namespaces - ip netns add "${NS1}" - ip netns add "${NS2}" - ip netns add "${NS3}" - - # rp_filter gets confused by what these tests are doing, so disable it - ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec ${NS1} sysctl -wq net.ipv4.conf.default.rp_filter=0 - ip netns exec ${NS2} sysctl -wq net.ipv4.conf.default.rp_filter=0 - ip netns exec ${NS3} sysctl -wq net.ipv4.conf.default.rp_filter=0 - - # disable IPv6 DAD because it sometimes takes too long and fails tests - ip netns exec ${NS1} sysctl -wq net.ipv6.conf.all.accept_dad=0 - ip netns exec ${NS2} sysctl -wq net.ipv6.conf.all.accept_dad=0 - ip netns exec ${NS3} sysctl -wq net.ipv6.conf.all.accept_dad=0 - ip netns exec ${NS1} sysctl -wq net.ipv6.conf.default.accept_dad=0 - ip netns exec ${NS2} sysctl -wq net.ipv6.conf.default.accept_dad=0 - ip netns exec ${NS3} sysctl -wq net.ipv6.conf.default.accept_dad=0 - - ip link add veth1 type veth peer name veth2 - ip link add veth3 type veth peer name veth4 - ip link add veth5 type veth peer name veth6 - ip link add veth7 type veth peer name veth8 - - ip netns exec ${NS2} sysctl -wq net.ipv4.ip_forward=1 - ip netns exec ${NS2} sysctl -wq net.ipv6.conf.all.forwarding=1 - - ip link set veth1 netns ${NS1} - ip link set veth2 netns ${NS2} - ip link set veth3 netns ${NS2} - ip link set veth4 netns ${NS3} - ip link set veth5 netns ${NS1} - ip link set veth6 netns ${NS2} - ip link set veth7 netns ${NS2} - ip link set veth8 netns ${NS3} - - if [ ! -z "${VRF}" ] ; then - ip -netns ${NS1} link add red type vrf table 1001 - ip -netns ${NS1} link set red up - ip -netns ${NS1} route add table 1001 unreachable default metric 8192 - ip -netns ${NS1} -6 route add table 1001 unreachable default metric 8192 - ip -netns ${NS1} link set veth1 vrf red - ip -netns ${NS1} link set veth5 vrf red - - ip -netns ${NS2} link add red type vrf table 1001 - ip -netns ${NS2} link set red up - ip -netns ${NS2} route add table 1001 unreachable default metric 8192 - ip -netns ${NS2} -6 route add table 1001 unreachable default metric 8192 - ip -netns ${NS2} link set veth2 vrf red - ip -netns ${NS2} link set veth3 vrf red - ip -netns ${NS2} link set veth6 vrf red - ip -netns ${NS2} link set veth7 vrf red - fi - - # configure addesses: the top route (1-2-3-4) - ip -netns ${NS1} addr add ${IPv4_1}/24 dev veth1 - ip -netns ${NS2} addr add ${IPv4_2}/24 dev veth2 - ip -netns ${NS2} addr add ${IPv4_3}/24 dev veth3 - ip -netns ${NS3} addr add ${IPv4_4}/24 dev veth4 - ip -netns ${NS1} -6 addr add ${IPv6_1}/128 nodad dev veth1 - ip -netns ${NS2} -6 addr add ${IPv6_2}/128 nodad dev veth2 - ip -netns ${NS2} -6 addr add ${IPv6_3}/128 nodad dev veth3 - ip -netns ${NS3} -6 addr add ${IPv6_4}/128 nodad dev veth4 - - # configure addresses: the bottom route (5-6-7-8) - ip -netns ${NS1} addr add ${IPv4_5}/24 dev veth5 - ip -netns ${NS2} addr add ${IPv4_6}/24 dev veth6 - ip -netns ${NS2} addr add ${IPv4_7}/24 dev veth7 - ip -netns ${NS3} addr add ${IPv4_8}/24 dev veth8 - ip -netns ${NS1} -6 addr add ${IPv6_5}/128 nodad dev veth5 - ip -netns ${NS2} -6 addr add ${IPv6_6}/128 nodad dev veth6 - ip -netns ${NS2} -6 addr add ${IPv6_7}/128 nodad dev veth7 - ip -netns ${NS3} -6 addr add ${IPv6_8}/128 nodad dev veth8 - - ip -netns ${NS1} link set dev veth1 up - ip -netns ${NS2} link set dev veth2 up - ip -netns ${NS2} link set dev veth3 up - ip -netns ${NS3} link set dev veth4 up - ip -netns ${NS1} link set dev veth5 up - ip -netns ${NS2} link set dev veth6 up - ip -netns ${NS2} link set dev veth7 up - ip -netns ${NS3} link set dev veth8 up - - # configure routes: IP*_SRC -> veth1/IP*_2 (= top route) default; - # the bottom route to specific bottom addresses - - # NS1 - # top route - ip -netns ${NS1} route add ${IPv4_2}/32 dev veth1 ${VRF} - ip -netns ${NS1} route add default dev veth1 via ${IPv4_2} ${VRF} # go top by default - ip -netns ${NS1} -6 route add ${IPv6_2}/128 dev veth1 ${VRF} - ip -netns ${NS1} -6 route add default dev veth1 via ${IPv6_2} ${VRF} # go top by default - # bottom route - ip -netns ${NS1} route add ${IPv4_6}/32 dev veth5 ${VRF} - ip -netns ${NS1} route add ${IPv4_7}/32 dev veth5 via ${IPv4_6} ${VRF} - ip -netns ${NS1} route add ${IPv4_8}/32 dev veth5 via ${IPv4_6} ${VRF} - ip -netns ${NS1} -6 route add ${IPv6_6}/128 dev veth5 ${VRF} - ip -netns ${NS1} -6 route add ${IPv6_7}/128 dev veth5 via ${IPv6_6} ${VRF} - ip -netns ${NS1} -6 route add ${IPv6_8}/128 dev veth5 via ${IPv6_6} ${VRF} - - # NS2 - # top route - ip -netns ${NS2} route add ${IPv4_1}/32 dev veth2 ${VRF} - ip -netns ${NS2} route add ${IPv4_4}/32 dev veth3 ${VRF} - ip -netns ${NS2} -6 route add ${IPv6_1}/128 dev veth2 ${VRF} - ip -netns ${NS2} -6 route add ${IPv6_4}/128 dev veth3 ${VRF} - # bottom route - ip -netns ${NS2} route add ${IPv4_5}/32 dev veth6 ${VRF} - ip -netns ${NS2} route add ${IPv4_8}/32 dev veth7 ${VRF} - ip -netns ${NS2} -6 route add ${IPv6_5}/128 dev veth6 ${VRF} - ip -netns ${NS2} -6 route add ${IPv6_8}/128 dev veth7 ${VRF} - - # NS3 - # top route - ip -netns ${NS3} route add ${IPv4_3}/32 dev veth4 - ip -netns ${NS3} route add ${IPv4_1}/32 dev veth4 via ${IPv4_3} - ip -netns ${NS3} route add ${IPv4_2}/32 dev veth4 via ${IPv4_3} - ip -netns ${NS3} -6 route add ${IPv6_3}/128 dev veth4 - ip -netns ${NS3} -6 route add ${IPv6_1}/128 dev veth4 via ${IPv6_3} - ip -netns ${NS3} -6 route add ${IPv6_2}/128 dev veth4 via ${IPv6_3} - # bottom route - ip -netns ${NS3} route add ${IPv4_7}/32 dev veth8 - ip -netns ${NS3} route add ${IPv4_5}/32 dev veth8 via ${IPv4_7} - ip -netns ${NS3} route add ${IPv4_6}/32 dev veth8 via ${IPv4_7} - ip -netns ${NS3} -6 route add ${IPv6_7}/128 dev veth8 - ip -netns ${NS3} -6 route add ${IPv6_5}/128 dev veth8 via ${IPv6_7} - ip -netns ${NS3} -6 route add ${IPv6_6}/128 dev veth8 via ${IPv6_7} - - # configure IPv4 GRE device in NS3, and a route to it via the "bottom" route - ip -netns ${NS3} tunnel add gre_dev mode gre remote ${IPv4_1} local ${IPv4_GRE} ttl 255 - ip -netns ${NS3} link set gre_dev up - ip -netns ${NS3} addr add ${IPv4_GRE} dev gre_dev - ip -netns ${NS1} route add ${IPv4_GRE}/32 dev veth5 via ${IPv4_6} ${VRF} - ip -netns ${NS2} route add ${IPv4_GRE}/32 dev veth7 via ${IPv4_8} ${VRF} - - - # configure IPv6 GRE device in NS3, and a route to it via the "bottom" route - ip -netns ${NS3} -6 tunnel add name gre6_dev mode ip6gre remote ${IPv6_1} local ${IPv6_GRE} ttl 255 - ip -netns ${NS3} link set gre6_dev up - ip -netns ${NS3} -6 addr add ${IPv6_GRE} nodad dev gre6_dev - ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6} ${VRF} - ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8} ${VRF} - - TMPFILE=$(mktemp /tmp/test_lwt_ip_encap.XXXXXX) - - sleep 1 # reduce flakiness - set +e -} - -cleanup() -{ - if [ -f ${TMPFILE} ] ; then - rm ${TMPFILE} - fi - - ip netns del ${NS1} 2> /dev/null - ip netns del ${NS2} 2> /dev/null - ip netns del ${NS3} 2> /dev/null -} - -trap cleanup EXIT - -remove_routes_to_gredev() -{ - ip -netns ${NS1} route del ${IPv4_GRE} dev veth5 ${VRF} - ip -netns ${NS2} route del ${IPv4_GRE} dev veth7 ${VRF} - ip -netns ${NS1} -6 route del ${IPv6_GRE}/128 dev veth5 ${VRF} - ip -netns ${NS2} -6 route del ${IPv6_GRE}/128 dev veth7 ${VRF} -} - -add_unreachable_routes_to_gredev() -{ - ip -netns ${NS1} route add unreachable ${IPv4_GRE}/32 ${VRF} - ip -netns ${NS2} route add unreachable ${IPv4_GRE}/32 ${VRF} - ip -netns ${NS1} -6 route add unreachable ${IPv6_GRE}/128 ${VRF} - ip -netns ${NS2} -6 route add unreachable ${IPv6_GRE}/128 ${VRF} -} - -test_ping() -{ - local readonly PROTO=$1 - local readonly EXPECTED=$2 - local RET=0 - - if [ "${PROTO}" == "IPv4" ] ; then - ip netns exec ${NS1} ping -c 1 -W 1 -I veth1 ${IPv4_DST} 2>&1 > /dev/null - RET=$? - elif [ "${PROTO}" == "IPv6" ] ; then - ip netns exec ${NS1} ping6 -c 1 -W 1 -I veth1 ${IPv6_DST} 2>&1 > /dev/null - RET=$? - else - echo " test_ping: unknown PROTO: ${PROTO}" - TEST_STATUS=1 - fi - - if [ "0" != "${RET}" ]; then - RET=1 - fi - - if [ "${EXPECTED}" != "${RET}" ] ; then - echo " test_ping failed: expected: ${EXPECTED}; got ${RET}" - TEST_STATUS=1 - fi -} - -test_gso() -{ - local readonly PROTO=$1 - local readonly PKT_SZ=5000 - local IP_DST="" - : > ${TMPFILE} # trim the capture file - - # check that nc is present - command -v nc >/dev/null 2>&1 || \ - { echo >&2 "nc is not available: skipping TSO tests"; return; } - - # listen on port 9000, capture TCP into $TMPFILE - if [ "${PROTO}" == "IPv4" ] ; then - IP_DST=${IPv4_DST} - ip netns exec ${NS3} bash -c \ - "nc -4 -l -p 9000 > ${TMPFILE} &" - elif [ "${PROTO}" == "IPv6" ] ; then - IP_DST=${IPv6_DST} - ip netns exec ${NS3} bash -c \ - "nc -6 -l -p 9000 > ${TMPFILE} &" - RET=$? - else - echo " test_gso: unknown PROTO: ${PROTO}" - TEST_STATUS=1 - fi - sleep 1 # let nc start listening - - # send a packet larger than MTU - ip netns exec ${NS1} bash -c \ - "dd if=/dev/zero bs=$PKT_SZ count=1 > /dev/tcp/${IP_DST}/9000 2>/dev/null" - sleep 2 # let the packet get delivered - - # verify we received all expected bytes - SZ=$(stat -c %s ${TMPFILE}) - if [ "$SZ" != "$PKT_SZ" ] ; then - echo " test_gso failed: ${PROTO}" - TEST_STATUS=1 - fi -} - -test_egress() -{ - local readonly ENCAP=$1 - echo "starting egress ${ENCAP} encap test ${VRF}" - setup - - # by default, pings work - test_ping IPv4 0 - test_ping IPv6 0 - - # remove NS2->DST routes, ping fails - ip -netns ${NS2} route del ${IPv4_DST}/32 dev veth3 ${VRF} - ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3 ${VRF} - test_ping IPv4 1 - test_ping IPv6 1 - - # install replacement routes (LWT/eBPF), pings succeed - if [ "${ENCAP}" == "IPv4" ] ; then - ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \ - ${BPF_FILE} sec encap_gre dev veth1 ${VRF} - ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \ - ${BPF_FILE} sec encap_gre dev veth1 ${VRF} - elif [ "${ENCAP}" == "IPv6" ] ; then - ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \ - ${BPF_FILE} sec encap_gre6 dev veth1 ${VRF} - ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \ - ${BPF_FILE} sec encap_gre6 dev veth1 ${VRF} - else - echo " unknown encap ${ENCAP}" - TEST_STATUS=1 - fi - test_ping IPv4 0 - test_ping IPv6 0 - - # skip GSO tests with VRF: VRF routing needs properly assigned - # source IP/device, which is easy to do with ping and hard with dd/nc. - if [ -z "${VRF}" ] ; then - test_gso IPv4 - test_gso IPv6 - fi - - # a negative test: remove routes to GRE devices: ping fails - remove_routes_to_gredev - test_ping IPv4 1 - test_ping IPv6 1 - - # another negative test - add_unreachable_routes_to_gredev - test_ping IPv4 1 - test_ping IPv6 1 - - cleanup - process_test_results -} - -test_ingress() -{ - local readonly ENCAP=$1 - echo "starting ingress ${ENCAP} encap test ${VRF}" - setup - - # need to wait a bit for IPv6 to autoconf, otherwise - # ping6 sometimes fails with "unable to bind to address" - - # by default, pings work - test_ping IPv4 0 - test_ping IPv6 0 - - # remove NS2->DST routes, pings fail - ip -netns ${NS2} route del ${IPv4_DST}/32 dev veth3 ${VRF} - ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3 ${VRF} - test_ping IPv4 1 - test_ping IPv6 1 - - # install replacement routes (LWT/eBPF), pings succeed - if [ "${ENCAP}" == "IPv4" ] ; then - ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \ - ${BPF_FILE} sec encap_gre dev veth2 ${VRF} - ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \ - ${BPF_FILE} sec encap_gre dev veth2 ${VRF} - elif [ "${ENCAP}" == "IPv6" ] ; then - ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \ - ${BPF_FILE} sec encap_gre6 dev veth2 ${VRF} - ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \ - ${BPF_FILE} sec encap_gre6 dev veth2 ${VRF} - else - echo "FAIL: unknown encap ${ENCAP}" - TEST_STATUS=1 - fi - test_ping IPv4 0 - test_ping IPv6 0 - - # a negative test: remove routes to GRE devices: ping fails - remove_routes_to_gredev - test_ping IPv4 1 - test_ping IPv6 1 - - # another negative test - add_unreachable_routes_to_gredev - test_ping IPv4 1 - test_ping IPv6 1 - - cleanup - process_test_results -} - -VRF="" -test_egress IPv4 -test_egress IPv6 -test_ingress IPv4 -test_ingress IPv6 - -VRF="vrf red" -test_egress IPv4 -test_egress IPv6 -test_ingress IPv4 -test_ingress IPv6 - -print_test_summary_and_exit --- base-commit: 5fd21aaac37919abc5c5d0df1eb06a9f02518f27 change-id: 20250206-lwt_ip-b6a91d2787bf Best regards, -- Bastien Curutchet (eBPF Foundation) <bastien.curutchet(a)bootlin.com>

7 months, 3 weeks

2
1
0 0

[PATCH] selftests/nolibc: stop testing constructor order

by Thomas Weißschuh

The execution order of constructors in undefined and depends on the toolchain. While recent toolchains seems to have a stable order, it doesn't work for older ones and may also change at any time. Stop validating the order and instead only validate that all constructors are executed. Reported-by: Willy Tarreau <w(a)1wt.eu> Closes: https://lore.kernel.org/lkml/20250301110735.GA18621@1wt.eu/ Signed-off-by: Thomas Weißschuh <linux(a)weissschuh.net> --- tools/testing/selftests/nolibc/nolibc-test-linkage.c | 6 +++--- tools/testing/selftests/nolibc/nolibc-test.c | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/nolibc/nolibc-test-linkage.c b/tools/testing/selftests/nolibc/nolibc-test-linkage.c index 5ff4c8a1db2a46cf3f8cb55bdabaa5e8819b344c..a7ca8325863face9cd4134a717fe4c7761bdeb7f 100644 --- a/tools/testing/selftests/nolibc/nolibc-test-linkage.c +++ b/tools/testing/selftests/nolibc/nolibc-test-linkage.c @@ -11,16 +11,16 @@ void *linkage_test_errno_addr(void) return &errno; } -int linkage_test_constructor_test_value; +int linkage_test_constructor_test_value = 0; __attribute__((constructor)) static void constructor1(void) { - linkage_test_constructor_test_value = 2; + linkage_test_constructor_test_value |= 1 << 0; } __attribute__((constructor)) static void constructor2(void) { - linkage_test_constructor_test_value *= 3; + linkage_test_constructor_test_value |= 1 << 1; } diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index a5abf16dbfe0f2aed286964fdfc391bc6201ef3b..5884a891c491544050fc35b07322c73a1a9dbaf3 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -692,14 +692,14 @@ int expect_strtox(int llen, void *func, const char *input, int base, intmax_t ex __attribute__((constructor)) static void constructor1(void) { - constructor_test_value = 1; + constructor_test_value |= 1 << 0; } __attribute__((constructor)) static void constructor2(int argc, char **argv, char **envp) { if (argc && argv && envp) - constructor_test_value *= 2; + constructor_test_value |= 1 << 1; } int run_startup(int min, int max) @@ -738,9 +738,9 @@ int run_startup(int min, int max) CASE_TEST(environ_HOME); EXPECT_PTRNZ(1, getenv("HOME")); break; CASE_TEST(auxv_addr); EXPECT_PTRGT(test_auxv != (void *)-1, test_auxv, brk); break; CASE_TEST(auxv_AT_UID); EXPECT_EQ(1, getauxval(AT_UID), getuid()); break; - CASE_TEST(constructor); EXPECT_EQ(is_nolibc, constructor_test_value, 2); break; + CASE_TEST(constructor); EXPECT_EQ(is_nolibc, constructor_test_value, 0x3); break; CASE_TEST(linkage_errno); EXPECT_PTREQ(1, linkage_test_errno_addr(), &errno); break; - CASE_TEST(linkage_constr); EXPECT_EQ(is_nolibc, linkage_test_constructor_test_value, 6); break; + CASE_TEST(linkage_constr); EXPECT_EQ(1, linkage_test_constructor_test_value, 0x3); break; case __LINE__: return ret; /* must be last */ /* note: do not set any defaults so as to permit holes above */ --- base-commit: 6e406202a44a1a37176da0333cec10d5320c4b33 change-id: 20250306-nolibc-constructor-order-6921e8c93591 Best regards, -- Thomas Weißschuh <linux(a)weissschuh.net>

7 months, 3 weeks

2
1
0 0

[PATCH v2] selftests: livepatch: test if ftrace can trace a livepatched function

by Filipe Xavier

This new test makes sure that ftrace can trace a function that was introduced by a livepatch. Signed-off-by: Filipe Xavier <felipeaggger(a)gmail.com> --- Changes in v2: - functions.sh: added reset tracing on push and pop_config. - test-ftrace.sh: enabled tracing_on before test init. - nitpick: added double quotations on filenames and fixed some wording. - Link to v1: https://lore.kernel.org/r/20250102-ftrace-selftest-livepatch-v1-1-84880baef… --- tools/testing/selftests/livepatch/functions.sh | 14 ++++++++++ tools/testing/selftests/livepatch/test-ftrace.sh | 33 ++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh index e5d06fb402335d85959bafe099087effc6ddce12..e6c13514002dae5f8d7461f90b8241ab43024ea4 100644 --- a/tools/testing/selftests/livepatch/functions.sh +++ b/tools/testing/selftests/livepatch/functions.sh @@ -62,6 +62,9 @@ function push_config() { awk -F'[: ]' '{print "file " $1 " line " $2 " " $4}') FTRACE_ENABLED=$(sysctl --values kernel.ftrace_enabled) KPROBE_ENABLED=$(cat "$SYSFS_KPROBES_DIR/enabled") + TRACING_ON=$(cat "$SYSFS_DEBUG_DIR/tracing/tracing_on") + CURRENT_TRACER=$(cat "$SYSFS_DEBUG_DIR/tracing/current_tracer") + FTRACE_FILTER=$(cat "$SYSFS_DEBUG_DIR/tracing/set_ftrace_filter") } function pop_config() { @@ -74,6 +77,17 @@ function pop_config() { if [[ -n "$KPROBE_ENABLED" ]]; then echo "$KPROBE_ENABLED" > "$SYSFS_KPROBES_DIR/enabled" fi + if [[ -n "$TRACING_ON" ]]; then + echo "$TRACING_ON" > "$SYSFS_DEBUG_DIR/tracing/tracing_on" + fi + if [[ -n "$CURRENT_TRACER" ]]; then + echo "$CURRENT_TRACER" > "$SYSFS_DEBUG_DIR/tracing/current_tracer" + fi + if [[ "$FTRACE_FILTER" == *"#"* ]]; then + echo > "$SYSFS_DEBUG_DIR/tracing/set_ftrace_filter" + elif [[ -n "$FTRACE_FILTER" ]]; then + echo "$FTRACE_FILTER" > "$SYSFS_DEBUG_DIR/tracing/set_ftrace_filter" + fi } function set_dynamic_debug() { diff --git a/tools/testing/selftests/livepatch/test-ftrace.sh b/tools/testing/selftests/livepatch/test-ftrace.sh index fe14f248913acbec46fb6c0fec38a2fc84209d39..66af5d726c52e48e5177804e182b4ff31784d5ac 100755 --- a/tools/testing/selftests/livepatch/test-ftrace.sh +++ b/tools/testing/selftests/livepatch/test-ftrace.sh @@ -61,4 +61,37 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete % rmmod $MOD_LIVEPATCH" +# - verify livepatch can load +# - check if traces have a patched function +# - unload livepatch and reset trace + +start_test "trace livepatched function and check that the live patch remains in effect" + +TRACE_FILE="$SYSFS_DEBUG_DIR/tracing/trace" +FUNCTION_NAME="livepatch_cmdline_proc_show" + +load_lp $MOD_LIVEPATCH + +echo 1 > "$SYSFS_DEBUG_DIR/tracing/tracing_on" +echo $FUNCTION_NAME > "$SYSFS_DEBUG_DIR/tracing/set_ftrace_filter" +echo "function" > "$SYSFS_DEBUG_DIR/tracing/current_tracer" +echo "" > "$TRACE_FILE" + +if [[ "$(cat /proc/cmdline)" != "$MOD_LIVEPATCH: this has been live patched" ]] ; then + echo -e "FAIL\n\n" + die "livepatch kselftest(s) failed" +fi + +grep -q $FUNCTION_NAME "$TRACE_FILE" +FOUND=$? + +disable_lp $MOD_LIVEPATCH +unload_lp $MOD_LIVEPATCH + +if [ "$FOUND" -eq 1 ]; then + echo -e "FAIL\n\n" + die "livepatch kselftest(s) failed" +fi + + exit 0 --- base-commit: fc033cf25e612e840e545f8d5ad2edd6ba613ed5 change-id: 20250101-ftrace-selftest-livepatch-161fb77dbed8 Best regards, -- Filipe Xavier <felipeaggger(a)gmail.com>

7 months, 3 weeks

4
4
0 0

[PATCH v3 1/5] tools/nolibc: add support for openat(2)

by Louis Taylor

openat is useful to avoid needing to construct relative paths, so expose a wrapper for using it directly. Signed-off-by: Louis Taylor <louis(a)kragniz.eu> --- tools/include/nolibc/sys.h | 25 ++++++++++++++++++++ tools/testing/selftests/nolibc/nolibc-test.c | 17 +++++++++++++ 2 files changed, 42 insertions(+) diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index 8f44c33b1213..3cd938f9abda 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -765,6 +765,31 @@ int mount(const char *src, const char *tgt, return __sysret(sys_mount(src, tgt, fst, flags, data)); } +/* + * int openat(int dirfd, const char *path, int flags[, mode_t mode]); + */ + +static __attribute__((unused)) +int sys_openat(int dirfd, const char *path, int flags, mode_t mode) +{ + return my_syscall4(__NR_openat, dirfd, path, flags, mode); +} + +static __attribute__((unused)) +int openat(int dirfd, const char *path, int flags, ...) +{ + mode_t mode = 0; + + if (flags & O_CREAT) { + va_list args; + + va_start(args, flags); + mode = va_arg(args, mode_t); + va_end(args); + } + + return __sysret(sys_openat(dirfd, path, flags, mode)); +} /* * int open(const char *path, int flags[, mode_t mode]); diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 79c3e6a845f3..e8faddcecf9d 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -1028,6 +1028,22 @@ int test_rlimit(void) return 0; } +int test_openat(void) +{ + int dev, null; + + dev = openat(AT_FDCWD, "/dev", O_DIRECTORY); + if (dev < 0) + return -1; + + null = openat(dev, "null", O_RDONLY); + close(dev); + if (null < 0) + return -1; + + close(null); + return 0; +} /* Run syscall tests between IDs <min> and <max>. * Return 0 on success, non-zero on failure. @@ -1116,6 +1132,7 @@ int run_syscall(int min, int max) CASE_TEST(mmap_munmap_good); EXPECT_SYSZR(1, test_mmap_munmap()); break; CASE_TEST(open_tty); EXPECT_SYSNE(1, tmp = open("/dev/null", 0), -1); if (tmp != -1) close(tmp); break; CASE_TEST(open_blah); EXPECT_SYSER(1, tmp = open("/proc/self/blah", 0), -1, ENOENT); if (tmp != -1) close(tmp); break; + CASE_TEST(openat_dir); EXPECT_SYSZR(1, test_openat()); break; CASE_TEST(pipe); EXPECT_SYSZR(1, test_pipe()); break; CASE_TEST(poll_null); EXPECT_SYSZR(1, poll(NULL, 0, 0)); break; CASE_TEST(poll_stdout); EXPECT_SYSNE(1, ({ struct pollfd fds = { 1, POLLOUT, 0}; poll(&fds, 1, 0); }), -1); break; -- 2.45.2

7 months, 3 weeks

2
2
0 0

[PATCH bpf-next v5 0/6] XDP metadata support for tun driver

by Marcus Wichelmann

Hi all, this v5 of the patch series is very similar to v4, but rebased onto the bpf-next/net branch instead of bpf-next/master. Because the commit c047e0e0e435 ("selftests/bpf: Optionally open a dedicated namespace to run test in it") is not yet included in this branch, I changed the xdp_context_tuntap test to manually create a namespace to run the test in. Not so successful pipeline: https://github.com/kernel-patches/bpf/actions/runs/13682405154 The CI pipeline failed because of veristat changes in seemingly unrelated eBPF programs. I don't think this has to do with the changes from this patch series, but if it does, please let me know what I may have to do different to make the CI pass. --- v5: - rebase onto bpf-next/net - resolve rebase conflicts - change xdp_context_tuntap test to manually create and open a network namespace using netns_new v4: https://lore.kernel.org/bpf/20250227142330.1605996-1-marcus.wichelmann@hetz… - strip unrelated changes from the selftest patches - extend commit message for "selftests/bpf: refactor xdp_context_functional test and bpf program" - the NOARP flag was not effective to prevent other packets from interfering with the tests, add a filter to the XDP program instead - run xdp_context_tuntap in a separate namespace to avoid conflicts with other tests v3: https://lore.kernel.org/bpf/20250224152909.3911544-1-marcus.wichelmann@hetz… - change the condition to handle xdp_buffs without metadata support, as suggested by Willem de Bruijn <willemb(a)google.com> - add clarifying comment why that condition is needed - set NOARP flag in selftests to ensure that the kernel does not send packets on the test interfaces that may interfere with the tests v2: https://lore.kernel.org/bpf/20250217172308.3291739-1-marcus.wichelmann@hetz… - submit against bpf-next subtree - split commits and improved commit messages - remove redundant metasize check and add clarifying comment instead - use max() instead of ternary operator - add selftest for metadata support in the tun driver v1: https://lore.kernel.org/all/20250130171614.1657224-1-marcus.wichelmann@hetz… Marcus Wichelmann (6): net: tun: enable XDP metadata support net: tun: enable transfer of XDP metadata to skb selftests/bpf: move open_tuntap to network helpers selftests/bpf: refactor xdp_context_functional test and bpf program selftests/bpf: add test for XDP metadata support in tun driver selftests/bpf: fix file descriptor assertion in open_tuntap helper drivers/net/tun.c | 28 +++- tools/testing/selftests/bpf/network_helpers.c | 28 ++++ tools/testing/selftests/bpf/network_helpers.h | 3 + .../selftests/bpf/prog_tests/lwt_helpers.h | 29 ---- .../bpf/prog_tests/xdp_context_test_run.c | 145 +++++++++++++++++- .../selftests/bpf/progs/test_xdp_meta.c | 53 +++++-- 6 files changed, 230 insertions(+), 56 deletions(-) -- 2.43.0

7 months, 3 weeks

2
7
0 0

[PATCH v2 1/5] tools/nolibc: add support for openat(2)

by Louis Taylor

openat is useful to avoid needing to construct relative paths, so expose a wrapper for using it directly. Signed-off-by: Louis Taylor <louis(a)kragniz.eu> --- tools/include/nolibc/sys.h | 25 ++++++++++++++++++++ tools/testing/selftests/nolibc/nolibc-test.c | 21 ++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index 8f44c33b1213..3cd938f9abda 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -765,6 +765,31 @@ int mount(const char *src, const char *tgt, return __sysret(sys_mount(src, tgt, fst, flags, data)); } +/* + * int openat(int dirfd, const char *path, int flags[, mode_t mode]); + */ + +static __attribute__((unused)) +int sys_openat(int dirfd, const char *path, int flags, mode_t mode) +{ + return my_syscall4(__NR_openat, dirfd, path, flags, mode); +} + +static __attribute__((unused)) +int openat(int dirfd, const char *path, int flags, ...) +{ + mode_t mode = 0; + + if (flags & O_CREAT) { + va_list args; + + va_start(args, flags); + mode = va_arg(args, mode_t); + va_end(args); + } + + return __sysret(sys_openat(dirfd, path, flags, mode)); +} /* * int open(const char *path, int flags[, mode_t mode]); diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 79c3e6a845f3..2a1629938dd6 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -1028,6 +1028,26 @@ int test_rlimit(void) return 0; } +static int test_openat(void) +{ + int dev; + int null; + + dev = openat(AT_FDCWD, "/dev", O_DIRECTORY); + if (dev < 0) + return -1; + + null = openat(dev, "null", 0); + if (null < 0) { + close(dev); + return -1; + } + + close(dev); + close(null); + + return 0; +} /* Run syscall tests between IDs <min> and <max>. * Return 0 on success, non-zero on failure. @@ -1116,6 +1136,7 @@ int run_syscall(int min, int max) CASE_TEST(mmap_munmap_good); EXPECT_SYSZR(1, test_mmap_munmap()); break; CASE_TEST(open_tty); EXPECT_SYSNE(1, tmp = open("/dev/null", 0), -1); if (tmp != -1) close(tmp); break; CASE_TEST(open_blah); EXPECT_SYSER(1, tmp = open("/proc/self/blah", 0), -1, ENOENT); if (tmp != -1) close(tmp); break; + CASE_TEST(openat_dir); EXPECT_SYSNE(1, test_openat(), -1); break; CASE_TEST(pipe); EXPECT_SYSZR(1, test_pipe()); break; CASE_TEST(poll_null); EXPECT_SYSZR(1, poll(NULL, 0, 0)); break; CASE_TEST(poll_stdout); EXPECT_SYSNE(1, ({ struct pollfd fds = { 1, POLLOUT, 0}; poll(&fds, 1, 0); }), -1); break; -- 2.45.2

7 months, 3 weeks

3
8
0 0

[PATCH] kunit: tool: Fix bug in parsing test plan

by Rae Moar

A bug was identified where the KTAP below caused an infinite loop: TAP version 13 ok 4 test_case 1..4 The infinite loop was caused by the parser not parsing a test plan if following a test result line. Fix bug to correctly parse test plan and add error if test plan is missing. Signed-off-by: Rae Moar <rmoar(a)google.com> --- tools/testing/kunit/kunit_parser.py | 12 +++++++----- tools/testing/kunit/kunit_tool_test.py | 5 ++--- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index 29fc27e8949b..5dcbc670e1dc 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -761,20 +761,22 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: test.name = "main" ktap_line = parse_ktap_header(lines, test, printer) test.log.extend(parse_diagnostic(lines)) - parse_test_plan(lines, test) + plan_line = parse_test_plan(lines, test) parent_test = True else: # If not the main test, attempt to parse a test header containing # the KTAP version line and/or subtest header line ktap_line = parse_ktap_header(lines, test, printer) subtest_line = parse_test_header(lines, test) + test.log.extend(parse_diagnostic(lines)) + plan_line = parse_test_plan(lines, test) parent_test = (ktap_line or subtest_line) if parent_test: - # If KTAP version line and/or subtest header is found, attempt - # to parse test plan and print test header - test.log.extend(parse_diagnostic(lines)) - parse_test_plan(lines, test) print_test_header(test, printer) + + if parent_test and not plan_line: + test.add_error(printer, 'missing test plan!') + expected_count = test.expected_count subtests = [] test_num = 1 diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 0bcb0cc002f8..e1e142c1a850 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -181,8 +181,7 @@ class KUnitParserTest(unittest.TestCase): result = kunit_parser.parse_run_tests( kunit_parser.extract_tap_lines( file.readlines()), stdout) - # A missing test plan is not an error. - self.assertEqual(result.counts, kunit_parser.TestCounts(passed=10, errors=0)) + self.assertEqual(result.counts, kunit_parser.TestCounts(passed=10, errors=2)) self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) def test_no_tests(self): @@ -203,7 +202,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual( kunit_parser.TestStatus.NO_TESTS, result.subtests[0].subtests[0].status) - self.assertEqual(result.counts, kunit_parser.TestCounts(passed=1, errors=1)) + self.assertEqual(result.counts, kunit_parser.TestCounts(passed=1, errors=2)) def test_no_kunit_output(self): base-commit: 0619a4868fc1b32b07fb9ed6c69adc5e5cf4e4b2 -- 2.48.1.711.g2feabab25a-goog

7 months, 3 weeks

3
5
0 0

[PATCH v8 0/4] scanf: convert self-test to KUnit

by Tamir Duberstein

This is one of just 3 remaining "Test Module" kselftests (the others being bitmap and printf), the rest having been converted to KUnit. In addition to the enclosed patch, please consider this an RFC on the removal of the "Test Module" kselftest machinery. I tested this using: $ tools/testing/kunit/kunit.py run --arch arm64 --make_options LLVM=1 scanf Failure output before this series: [ 383.100048] test_scanf: vsscanf("1574 9 64ca 935b 7 142d ff58 0", "%4hx %1hx %4hx %4hx %1hx %4hx %4hx %1hx", ...) expected 2472240330 got 1690959881 [ 383.102843] test_scanf: vsscanf("f12:2:d:2:c166:1:36b:1906", "%3hx:%1hx:%1hx:%1hx:%4hx:%1hx:%3hx:%4hx", ...) expected 131085 got 851970 [ 383.105376] test_scanf: vsscanf("4,b2fe,3,593,6,0,3bde,0", "%1hx,%4hx,%1hx,%3hx,%1hx,%1hx,%4hx,%1hx", ...) expected 93519875 got 242430 [ 383.105659] test_scanf: vsscanf("6-1-2-1-d9e6-f-93e-e567", "%1hx-%1hx-%1hx-%1hx-%4hx-%1hx-%3hx-%4hx", ...) expected 65538 got 131073 [ 383.106127] test_scanf: vsscanf("72d6/35/e88d/1/0/6c8c/7/1", "%4hx/%2hx/%4hx/%1hx/%1hx/%4hx/%1hx/%1hx", ...) expected 125069 got 3901554741 [ 383.106235] test_scanf: vsscanf("c9bea1b8122113e9a168df573", "%4hx%4hx%1hx%4hx%4hx%1hx%4hx%3hx", ...) expected 571539457 got 106936 ... [ 383.106398] test_scanf: failed 6 out of 2545 tests Failure output after this series: # numbers_list_field_width_val_width: ASSERTION FAILED at lib/scanf_kunit.c:94 lib/scanf_kunit.c:555: vsscanf("0 1e 3e43 31f0 0 0 5797 9c70", "%1hx %2hx %4hx %4hx %1hx %1hx %4hx %4hx", ...) expected 837828163 got 1044578334 not ok 1 " " # numbers_list_field_width_val_width: ASSERTION FAILED at lib/scanf_kunit.c:94 lib/scanf_kunit.c:555: vsscanf("dc2:1c:0:3531:2621:5172:1:7", "%3hx:%2hx:%1hx:%4hx:%4hx:%4hx:%1hx:%1hx", ...) expected 892403712 got 28 not ok 2 ":" # numbers_list_field_width_val_width: ASSERTION FAILED at lib/scanf_kunit.c:94 lib/scanf_kunit.c:555: vsscanf("e083,8f6e,b,70ca,1,1,aab1,10e4", "%4hx,%4hx,%1hx,%4hx,%1hx,%1hx,%4hx,%4hx", ...) expected 1892286475 got 757614 not ok 3 "," # numbers_list_field_width_val_width: ASSERTION FAILED at lib/scanf_kunit.c:94 lib/scanf_kunit.c:555: vsscanf("2e72-8435-1-2fc-7cbd-c2f1-7158-2b41", "%4hx-%4hx-%1hx-%3hx-%4hx-%4hx-%4hx-%4hx", ...) expected 50069505 got 99381 not ok 4 "-" # numbers_list_field_width_val_width: ASSERTION FAILED at lib/scanf_kunit.c:94 lib/scanf_kunit.c:555: vsscanf("403/0/17/1/11e7/1/1fe8/34ba", "%3hx/%1hx/%2hx/%1hx/%4hx/%1hx/%4hx/%4hx", ...) expected 65559 got 1507328 not ok 5 "/" Signed-off-by: Tamir Duberstein <tamird(a)gmail.com> --- Changes in v8: - Expand "scanf: remove redundant debug logs" commit message. (Andy Shevchenko) - Add patch "implicate test line in failure messages". - Rebase on linux-next, move scanf_kunit.c into lib/tests/. - Link to v7: https://lore.kernel.org/r/20250211-scanf-kunit-convert-v7-0-c057f0a3d9d8@gm… Changes in v7: - Remove redundant debug logs. (Petr Mladek) - Drop Petr's Acked-by. - Use original test assertions as KUNIT_*_EQ_MSG produces hard-to-parse messages. The new failure output is: - Link to v6: https://lore.kernel.org/r/20250210-scanf-kunit-convert-v6-0-4d583d07f92d@gm… Changes in v6: - s/at boot/at runtime/ for consistency with the printf series. - Go back to kmalloc. (Geert Uytterhoeven) - Link to v5: https://lore.kernel.org/r/20250210-scanf-kunit-convert-v5-0-8e64f3a7de99@gm… Changes in v5: - Remove extraneous trailing newlines from failure messages. - Replace `pr_debug` with `kunit_printk`. - Use static char arrays instead of kmalloc. - Drop KUnit boilerplate from CONFIG_SCANF_KUNIT_TEST help text. - Drop arch changes. - Link to v4: https://lore.kernel.org/r/20250207-scanf-kunit-convert-v4-0-a23e2afaede8@gm… Changes in v4: - Bake `test` into various macros, greatly reducing diff noise. - Revert control flow changes. - Link to v3: https://lore.kernel.org/r/20250204-scanf-kunit-convert-v3-0-386d7c3ee714@gm… Changes in v3: - Reduce diff noise in lib/Makefile. (Petr Mladek) - Split `scanf_test` into a few test cases. New output: : =================== scanf (10 subtests) ==================== : [PASSED] numbers_simple : ====================== numbers_list ======================= : [PASSED] delim=" " : [PASSED] delim=":" : [PASSED] delim="," : [PASSED] delim="-" : [PASSED] delim="/" : ================== [PASSED] numbers_list =================== : ============ numbers_list_field_width_typemax ============= : [PASSED] delim=" " : [PASSED] delim=":" : [PASSED] delim="," : [PASSED] delim="-" : [PASSED] delim="/" : ======== [PASSED] numbers_list_field_width_typemax ========= : =========== numbers_list_field_width_val_width ============ : [PASSED] delim=" " : [PASSED] delim=":" : [PASSED] delim="," : [PASSED] delim="-" : [PASSED] delim="/" : ======= [PASSED] numbers_list_field_width_val_width ======== : [PASSED] numbers_slice : [PASSED] numbers_prefix_overflow : [PASSED] test_simple_strtoull : [PASSED] test_simple_strtoll : [PASSED] test_simple_strtoul : [PASSED] test_simple_strtol : ====================== [PASSED] scanf ====================== : ============================================================ : Testing complete. Ran 22 tests: passed: 22 : Elapsed time: 5.517s total, 0.001s configuring, 5.440s building, 0.067s running - Link to v2: https://lore.kernel.org/r/20250203-scanf-kunit-convert-v2-1-277a618d804e@gm… Changes in v2: - Rename lib/{test_scanf.c => scanf_kunit.c}. (Andy Shevchenko) - Link to v1: https://lore.kernel.org/r/20250131-scanf-kunit-convert-v1-1-0976524f0eba@gm… --- Tamir Duberstein (4): scanf: implicate test line in failure messages scanf: remove redundant debug logs scanf: convert self-test to KUnit scanf: break kunit into test cases MAINTAINERS | 2 +- lib/Kconfig.debug | 12 +- lib/Makefile | 1 - lib/tests/Makefile | 1 + lib/{test_scanf.c => tests/scanf_kunit.c} | 299 +++++++++++++++--------------- tools/testing/selftests/lib/Makefile | 2 +- tools/testing/selftests/lib/config | 1 - tools/testing/selftests/lib/scanf.sh | 4 - 8 files changed, 160 insertions(+), 162 deletions(-) --- base-commit: 7b7a883c7f4de1ee5040bd1c32aabaafde54d209 change-id: 20250131-scanf-kunit-convert-f70dc33bb34c Best regards, -- Tamir Duberstein <tamird(a)gmail.com>

7 months, 3 weeks

4
19
0 0

[PATCH net-next v7 0/6] tun: Introduce virtio-net hashing feature

by Akihiko Odaki

virtio-net have two usage of hashes: one is RSS and another is hash reporting. Conventionally the hash calculation was done by the VMM. However, computing the hash after the queue was chosen defeats the purpose of RSS. Another approach is to use eBPF steering program. This approach has another downside: it cannot report the calculated hash due to the restrictive nature of eBPF. Introduce the code to compute hashes to the kernel in order to overcome thse challenges. An alternative solution is to extend the eBPF steering program so that it will be able to report to the userspace, but it is based on context rewrites, which is in feature freeze. We can adopt kfuncs, but they will not be UAPIs. We opt to ioctl to align with other relevant UAPIs (KVM and vhost_net). The patches for QEMU to use this new feature was submitted as RFC and is available at: https://patchew.org/QEMU/20240915-hash-v3-0-79cb08d28647@daynix.com/ This work was presented at LPC 2024: https://lpc.events/event/18/contributions/1963/ V1 -> V2: Changed to introduce a new BPF program type. Signed-off-by: Akihiko Odaki <akihiko.odaki(a)daynix.com> --- Changes in v7: - Ensured to set hash_report to VIRTIO_NET_HASH_REPORT_NONE for VHOST_NET_F_VIRTIO_NET_HDR. - s/4/sizeof(u32)/ in patch "virtio_net: Add functions for hashing". - Added tap_skb_cb type. - Rebased. - Link to v6: https://lore.kernel.org/r/20250109-rss-v6-0-b1c90ad708f6@daynix.com Changes in v6: - Extracted changes to fill vnet header holes into another series. - Squashed patches "skbuff: Introduce SKB_EXT_TUN_VNET_HASH", "tun: Introduce virtio-net hash reporting feature", and "tun: Introduce virtio-net RSS" into patch "tun: Introduce virtio-net hash feature". - Dropped the RFC tag. - Link to v5: https://lore.kernel.org/r/20241008-rss-v5-0-f3cf68df005d@daynix.com Changes in v5: - Fixed a compilation error with CONFIG_TUN_VNET_CROSS_LE. - Optimized the calculation of the hash value according to: https://git.dpdk.org/dpdk/commit/?id=3fb1ea032bd6ff8317af5dac9af901f1f324ca… - Added patch "tun: Unify vnet implementation". - Dropped patch "tap: Pad virtio header with zero". - Added patch "selftest: tun: Test vnet ioctls without device". - Reworked selftests to skip for older kernels. - Documented the case when the underlying device is deleted and packets have queue_mapping set by TC. - Reordered test harness arguments. - Added code to handle fragmented packets. - Link to v4: https://lore.kernel.org/r/20240924-rss-v4-0-84e932ec0e6c@daynix.com Changes in v4: - Moved tun_vnet_hash_ext to if_tun.h. - Renamed virtio_net_toeplitz() to virtio_net_toeplitz_calc(). - Replaced htons() with cpu_to_be16(). - Changed virtio_net_hash_rss() to return void. - Reordered variable declarations in virtio_net_hash_rss(). - Removed virtio_net_hdr_v1_hash_from_skb(). - Updated messages of "tap: Pad virtio header with zero" and "tun: Pad virtio header with zero". - Fixed vnet_hash allocation size. - Ensured to free vnet_hash when destructing tun_struct. - Link to v3: https://lore.kernel.org/r/20240915-rss-v3-0-c630015db082@daynix.com Changes in v3: - Reverted back to add ioctl. - Split patch "tun: Introduce virtio-net hashing feature" into "tun: Introduce virtio-net hash reporting feature" and "tun: Introduce virtio-net RSS". - Changed to reuse hash values computed for automq instead of performing RSS hashing when hash reporting is requested but RSS is not. - Extracted relevant data from struct tun_struct to keep it minimal. - Added kernel-doc. - Changed to allow calling TUNGETVNETHASHCAP before TUNSETIFF. - Initialized num_buffers with 1. - Added a test case for unclassified packets. - Fixed error handling in tests. - Changed tests to verify that the queue index will not overflow. - Rebased. - Link to v2: https://lore.kernel.org/r/20231015141644.260646-1-akihiko.odaki@daynix.com --- Akihiko Odaki (6): virtio_net: Add functions for hashing net: flow_dissector: Export flow_keys_dissector_symmetric tun: Introduce virtio-net hash feature selftest: tun: Test vnet ioctls without device selftest: tun: Add tests for virtio-net hashing vhost/net: Support VIRTIO_NET_F_HASH_REPORT Documentation/networking/tuntap.rst | 7 + drivers/net/Kconfig | 1 + drivers/net/tap.c | 62 +++- drivers/net/tun.c | 89 ++++- drivers/net/tun_vnet.h | 180 +++++++++- drivers/vhost/net.c | 49 +-- include/linux/if_tap.h | 2 + include/linux/skbuff.h | 3 + include/linux/virtio_net.h | 188 +++++++++++ include/net/flow_dissector.h | 1 + include/uapi/linux/if_tun.h | 75 +++++ net/core/flow_dissector.c | 3 +- net/core/skbuff.c | 4 + tools/testing/selftests/net/Makefile | 2 +- tools/testing/selftests/net/tun.c | 627 ++++++++++++++++++++++++++++++++++- 15 files changed, 1231 insertions(+), 62 deletions(-) --- base-commit: dd83757f6e686a2188997cb58b5975f744bb7786 change-id: 20240403-rss-e737d89efa77 prerequisite-change-id: 20241230-tun-66e10a49b0c7:v6 prerequisite-patch-id: 871dc5f146fb6b0e3ec8612971a8e8190472c0fb prerequisite-patch-id: 2797ed249d32590321f088373d4055ff3f430a0e prerequisite-patch-id: ea3370c72d4904e2f0536ec76ba5d26784c0cede prerequisite-patch-id: 837e4cf5d6b451424f9b1639455e83a260c4440d prerequisite-patch-id: ea701076f57819e844f5a35efe5cbc5712d3080d prerequisite-patch-id: 701646fb43ad04cc64dd2bf13c150ccbe6f828ce prerequisite-patch-id: 53176dae0c003f5b6c114d43f936cf7140d31bb5 prerequisite-change-id: 20250116-buffers-96e14bf023fc:v2 prerequisite-patch-id: 25fd4f99d4236a05a5ef16ab79f3e85ee57e21cc Best regards, -- Akihiko Odaki <akihiko.odaki(a)daynix.com>

7 months, 3 weeks

4
12
0 0

Re: [brauner-github:vfs.all 205/231] WARNING: modpost: vmlinux: section mismatch in reference: initramfs_test_cases+0x0 (section: .data) -> initramfs_test_extract (section: .init.text)

by David Disseldorp

[cc'ing linux-kselftest and kunit-dev] Hi, On Wed, 5 Mar 2025 01:47:55 +0800, kernel test robot wrote: > tree: https://github.com/brauner/linux.git vfs.all > head: ea47e99a3a234837d5fea0d1a20bb2ad1eaa6dd4 > commit: b6736cfccb582b7c016cba6cd484fbcf30d499af [205/231] initramfs_test: kunit tests for initramfs unpacking > config: x86_64-buildonly-randconfig-002-20250304 (https://download.01.org/0day-ci/archive/20250305/202503050109.t5Ab93hX-lkp@…) > compiler: gcc-12 (Debian 12.2.0-14) 12.2.0 > reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250305/202503050109.t5Ab93hX-lkp@…) > > If you fix the issue in a separate patch/commit (i.e. not just a new version of > the same patch/commit), kindly add following tags > | Reported-by: kernel test robot <lkp(a)intel.com> > | Closes: https://lore.kernel.org/oe-kbuild-all/202503050109.t5Ab93hX-lkp@intel.com/ > > All warnings (new ones prefixed by >>, old ones prefixed by <<): > > >> WARNING: modpost: vmlinux: section mismatch in reference: initramfs_test_cases+0x0 (section: .data) -> initramfs_test_extract (section: .init.text) > >> WARNING: modpost: vmlinux: section mismatch in reference: initramfs_test_cases+0x30 (section: .data) -> initramfs_test_fname_overrun (section: .init.text) > >> WARNING: modpost: vmlinux: section mismatch in reference: initramfs_test_cases+0x60 (section: .data) -> initramfs_test_data (section: .init.text) > >> WARNING: modpost: vmlinux: section mismatch in reference: initramfs_test_cases+0x90 (section: .data) -> initramfs_test_csum (section: .init.text) > >> WARNING: modpost: vmlinux: section mismatch in reference: initramfs_test_cases+0xc0 (section: .data) -> initramfs_test_hardlink (section: .init.text) > >> WARNING: modpost: vmlinux: section mismatch in reference: initramfs_test_cases+0xf0 (section: .data) -> initramfs_test_many (section: .init.text) These new warnings are covered in the commit message. The kunit_test_init_section_suites() registered tests aren't in the .init section as debugfs entries are retained for results reporting (without an ability to rerun them). IIUC, the __kunit_init_test_suites->CONCATENATE(..., _probe) suffix is intended to suppress the modpost warning - @kunit-dev: any ideas why this isn't working as intended? Thanks, David

7 months, 3 weeks

3
3
0 0

[PATCH] selftests: riscv: fix v_exec_initval_nolibc.c

by Ignacio Encinas

Vector registers are zero initialized by the kernel. Stop accepting "all ones" as a clean value. Note that this was not working as expected given that value == 0xff can be assumed to be always false by the compiler as value's range is [-128, 127]. Both GCC (-Wtype-limits) and clang (-Wtautological-constant-out-of-range-compare) warn about this. Signed-off-by: Ignacio Encinas <ignacio(a)iencinas.com> --- I tried looking why "all ones" was previously deemed a "clean" value but couldn't find any information. It looks like the kernel always zero-initializes the vector registers. If "all ones" is still acceptable for any reason, my intention is to spin a v2 changing the types of `value` and `prev_value` to unsigned char. --- tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c b/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c index 35c0812e32de0c82a54f84bd52c4272507121e35..b712c4d258a6cb045aa96de4a75299714866f5e6 100644 --- a/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c +++ b/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c @@ -6,7 +6,7 @@ * the values. To further ensure consistency, this file is compiled without * libc and without auto-vectorization. * - * To be "clean" all values must be either all ones or all zeroes. + * To be "clean" all values must be all zeroes. */ #define __stringify_1(x...) #x @@ -46,7 +46,7 @@ int main(int argc, char **argv) : "=r" (value)); \ if (first) { \ first = 0; \ - } else if (value != prev_value || !(value == 0x00 || value == 0xff)) { \ + } else if (value != prev_value || value != 0x00) { \ printf("Register " __stringify(register) \ " values not clean! value: %u\n", value); \ exit(-1); \ --- base-commit: 03d38806a902b36bf364cae8de6f1183c0a35a67 change-id: 20250301-fix-v_exec_initval_nolibc-498d976c372d Best regards, -- Ignacio Encinas <ignacio(a)iencinas.com>

7 months, 3 weeks

3
3
0 0

[PATCH v4 0/8] initramfs: kunit tests and cleanups

by David Disseldorp

This patchset adds basic kunit test coverage for initramfs unpacking and cleans up some minor buffer handling issues / inefficiencies. Changes since v3: - Drop shared unpack buffer changes + rework into initramfs: allocate heap buffers together (patch 5/8) + extra review complexity wasn't worth the tiny boot-time heap saving - move hardlink hash leak repro into first initramfs_test patch - add note regarding kunit section=.data -> section=.init.text warning Changes since v2 (patch 2 only): - fix !CONFIG_INITRAMFS_PRESERVE_MTIME kunit test checks - add test MODULE_DESCRIPTION(), as suggested by Jeff Johnson - add some missing headers, reported by kernel test robot Changes since v1 (RFC): - rebase atop v6.12-rc6 and filename field overrun fix from https://lore.kernel.org/r/20241030035509.20194-2-ddiss@suse.de - add unit test coverage (new patches 1 and 2) - add patch: fix hardlink hash leak without TRAILER - rework patch: avoid static buffer for error message + drop unnecessary message propagation - drop patch: cpio_buf reuse for built-in and bootloader initramfs + no good justification for the change Feedback appreciated. David Disseldorp (8): init: add initramfs_internal.h initramfs_test: kunit tests for initramfs unpacking vsprintf: add simple_strntoul initramfs: avoid memcpy for hex header fields initramfs: allocate heap buffers together initramfs: reuse name_len for dir mtime tracking initramfs: fix hardlink hash leak without TRAILER initramfs: avoid static buffer for error message include/linux/kstrtox.h | 1 + init/.kunitconfig | 3 + init/Kconfig | 7 + init/Makefile | 1 + init/initramfs.c | 66 ++++---- init/initramfs_internal.h | 8 + init/initramfs_test.c | 407 ++++++++++++++++++++++++++++++++++++++++++++++ lib/vsprintf.c | 7 + 8 files changed, 472 insertions(+), 28 deletions(-) create mode 100644 init/.kunitconfig create mode 100644 init/initramfs_internal.h create mode 100644 init/initramfs_test.c

7 months, 4 weeks

2
11
0 0

[PATCH] selftests: Override command line in lib.mk

by Akihiko Odaki

Documentation/dev-tools/kselftest.rst says you can use the "TARGETS" variable on the make command line to run only tests targeted for a single subsystem: $ make TARGETS="size timers" kselftest A natural way to narrow down further to a particular test in a subsystem is to specify e.g., TEST_GEN_PROGS: $ make TARGETS=net TEST_PROGS= TEST_GEN_PROGS=tun kselftest However, this does not work well because the following statement in tools/testing/selftests/lib.mk gets ignored: TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS)) Add the override directive to make it and similar ones will be effective even when TEST_GEN_PROGS and similar variables are specified in the command line. Signed-off-by: Akihiko Odaki <akihiko.odaki(a)daynix.com> --- tools/testing/selftests/lib.mk | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index d6edcfcb5be832ddee4c3d34b5ad221e9295f878..68116e51f97d62376c63f727ba3fd1f616c67562 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -93,9 +93,9 @@ TOOLS_INCLUDES := -isystem $(top_srcdir)/tools/include/uapi # TEST_PROGS are for test shell scripts. # TEST_CUSTOM_PROGS and TEST_PROGS will be run by common run_tests # and install targets. Common clean doesn't touch them. -TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS)) -TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED)) -TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES)) +override TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS)) +override TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED)) +override TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES)) all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) \ $(if $(TEST_GEN_MODS_DIR),gen_mods_dir) --- base-commit: dd83757f6e686a2188997cb58b5975f744bb7786 change-id: 20250306-lib-4ac9711c10a2 Best regards, -- Akihiko Odaki <akihiko.odaki(a)daynix.com>

7 months, 4 weeks

1
0
0 0

[PATCHv3 net 0/2] bonding: fix incorrect mac address setting

by Hangbin Liu

The mac address on backup slave should be convert from Solicited-Node Multicast address, not from bonding unicast target address. v3: also fix the mac setting for slave_set_ns_maddr. (Jay) Add function description for slave_set_ns_maddr/slave_set_ns_maddrs (Jay) v2: fix patch 01's subject Hangbin Liu (2): bonding: fix incorrect MAC address setting to receive NS messages selftests: bonding: fix incorrect mac address drivers/net/bonding/bond_options.c | 55 ++++++++++++++++--- .../drivers/net/bonding/bond_options.sh | 4 +- 2 files changed, 49 insertions(+), 10 deletions(-) -- 2.46.0

7 months, 4 weeks

4
9
0 0

[PATCH bpf-next v3] selftests/Makefile: override the srctree for out-of-tree builds

by Li Zhijian

Fixes an issue where out-of-tree kselftest builds fail when building the BPF and bpftools components. The failure occurs because the top-level Makefile passes a relative srctree path to its sub-Makefiles, which leads to errors in locating necessary files. For example, the following error is encountered: ``` $ make V=1 O=$build/ TARGETS=hid kselftest-all ... make -C ../tools/testing/selftests all make[4]: Entering directory '/path/to/linux/tools/testing/selftests/hid' make -C /path/to/linux/tools/testing/selftests/../../../tools/lib/bpf OUTPUT=/path/to/linux/O/kselftest/hid/tools/build/libbpf/ \ EXTRA_CFLAGS='-g -O0' \ DESTDIR=/path/to/linux/O/kselftest/hid/tools prefix= all install_headers make[5]: Entering directory '/path/to/linux/tools/lib/bpf' ... make[5]: Entering directory '/path/to/linux/tools/bpf/bpftool' Makefile:127: ../tools/build/Makefile.feature: No such file or directory make[5]: *** No rule to make target '../tools/build/Makefile.feature'. Stop. ``` To resolve this, override the srctree in the kselftests's top Makefile when performing an out-of-tree build. This ensures that all sub-Makefiles have the correct path to the source tree, preventing directory resolution errors. Cc: Andrii Nakryiko <andrii.nakryiko(a)gmail.com> Signed-off-by: Li Zhijian <lizhijian(a)fujitsu.com> Tested-by: Quentin Monnet <qmo(a)kernel.org> --- Cc: Masahiro Yamada <masahiroy(a)kernel.org> V3: collected Tested-by and rebased on bpf-next V2: - handle srctree in selftests itself rather than the linux' top Makefile # Masahiro Yamada <masahiroy(a)kernel.org> V1: https://lore.kernel.org/lkml/20241217031052.69744-1-lizhijian@fujitsu.com/ --- tools/testing/selftests/Makefile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 2401e973c359..f04a3b0003f6 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -154,15 +154,19 @@ override LDFLAGS = override MAKEFLAGS = endif +top_srcdir ?= ../../.. + # Append kselftest to KBUILD_OUTPUT and O to avoid cluttering # KBUILD_OUTPUT with selftest objects and headers installed # by selftests Makefile or lib.mk. +# Override the `srctree` variable to ensure it is correctly resolved in +# sub-Makefiles, such as those within `bpf`, when managing targets like +# `net` and `hid`. ifdef building_out_of_srctree override LDFLAGS = +override srctree := $(top_srcdir) endif -top_srcdir ?= ../../.. - ifeq ("$(origin O)", "command line") KBUILD_OUTPUT := $(O) endif -- 2.44.0

7 months, 4 weeks

3
2
0 0

[PATCH v9 0/7] mseal system mappings

by jeffxu＠chromium.org

From: Jeff Xu <jeffxu(a)chromium.org> This is V9 version, addressing comments from V8, without code logic change. ------------------------------------------------------------------- As discussed during mseal() upstream process [1], mseal() protects the VMAs of a given virtual memory range against modifications, such as the read/write (RW) and no-execute (NX) bits. For complete descriptions of memory sealing, please see mseal.rst [2]. The mseal() is useful to mitigate memory corruption issues where a corrupted pointer is passed to a memory management system. For example, such an attacker primitive can break control-flow integrity guarantees since read-only memory that is supposed to be trusted can become writable or .text pages can get remapped. The system mappings are readonly only, memory sealing can protect them from ever changing to writable or unmmap/remapped as different attributes. System mappings such as vdso, vvar, vvar_vclock, vectors (arm compat-mode), sigpage (arm compat-mode), are created by the kernel during program initialization, and could be sealed after creation. Unlike the aforementioned mappings, the uprobe mapping is not established during program startup. However, its lifetime is the same as the process's lifetime [3]. It could be sealed from creation. The vsyscall on x86-64 uses a special address (0xffffffffff600000), which is outside the mm managed range. This means mprotect, munmap, and mremap won't work on the vsyscall. Since sealing doesn't enhance the vsyscall's security, it is skipped in this patch. If we ever seal the vsyscall, it is probably only for decorative purpose, i.e. showing the 'sl' flag in the /proc/pid/smaps. For this patch, it is ignored. It is important to note that the CHECKPOINT_RESTORE feature (CRIU) may alter the system mappings during restore operations. UML(User Mode Linux) and gVisor, rr are also known to change the vdso/vvar mappings. Consequently, this feature cannot be universally enabled across all systems. As such, CONFIG_MSEAL_SYSTEM_MAPPINGS is disabled by default. To support mseal of system mappings, architectures must define CONFIG_ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS and update their special mappings calls to pass mseal flag. Additionally, architectures must confirm they do not unmap/remap system mappings during the process lifetime. The existence of this flag for an architecture implies that it does not require the remapping of thest system mappings during process lifetime, so sealing these mappings is safe from a kernel perspective. This version covers x86-64 and arm64 archiecture as minimum viable feature. While no specific CPU hardware features are required for enable this feature on an archiecture, memory sealing requires a 64-bit kernel. Other architectures can choose whether or not to adopt this feature. Currently, I'm not aware of any instances in the kernel code that actively munmap/mremap a system mapping without a request from userspace. The PPC does call munmap when _install_special_mapping fails for vdso; however, it's uncertain if this will ever fail for PPC - this needs to be investigated by PPC in the future [4]. The UML kernel can add this support when KUnit tests require it [5]. In this version, we've improved the handling of system mapping sealing from previous versions, instead of modifying the _install_special_mapping function itself, which would affect all architectures, we now call _install_special_mapping with a sealing flag only within the specific architecture that requires it. This targeted approach offers two key advantages: 1) It limits the code change's impact to the necessary architectures, and 2) It aligns with the software architecture by keeping the core memory management within the mm layer, while delegating the decision of sealing system mappings to the individual architecture, which is particularly relevant since 32-bit architectures never require sealing. Prior to this patch series, we explored sealing special mappings from userspace using glibc's dynamic linker. This approach revealed several issues: - The PT_LOAD header may report an incorrect length for vdso, (smaller than its actual size). The dynamic linker, which relies on PT_LOAD information to determine mapping size, would then split and partially seal the vdso mapping. Since each architecture has its own vdso/vvar code, fixing this in the kernel would require going through each archiecture. Our initial goal was to enable sealing readonly mappings, e.g. .text, across all architectures, sealing vdso from kernel since creation appears to be simpler than sealing vdso at glibc. - The [vvar] mapping header only contains address information, not length information. Similar issues might exist for other special mappings. - Mappings like uprobe are not covered by the dynamic linker, and there is no effective solution for them. This feature's security enhancements will benefit ChromeOS, Android, and other high security systems. Testing: This feature was tested on ChromeOS and Android for both x86-64 and ARM64. - Enable sealing and verify vdso/vvar, sigpage, vector are sealed properly, i.e. "sl" shown in the smaps for those mappings, and mremap is blocked. - Passing various automation tests (e.g. pre-checkin) on ChromeOS and Android to ensure the sealing doesn't affect the functionality of Chromebook and Android phone. I also tested the feature on Ubuntu on x86-64: - With config disabled, vdso/vvar is not sealed, - with config enabled, vdso/vvar is sealed, and booting up Ubuntu is OK, normal operations such as browsing the web, open/edit doc are OK. Link: https://lore.kernel.org/all/20240415163527.626541-1-jeffxu@chromium.org/ [1] Link: Documentation/userspace-api/mseal.rst [2] Link: https://lore.kernel.org/all/CABi2SkU9BRUnqf70-nksuMCQ+yyiWjo3fM4XkRkL-NrCZx… [3] Link: https://lore.kernel.org/all/CABi2SkV6JJwJeviDLsq9N4ONvQ=EFANsiWkgiEOjyT9TQS… [4] Link: https://lore.kernel.org/all/202502251035.239B85A93@keescook/ [5] ------------------------------------------- History: V9: - Add negative test in selftest (Kees Cook) - fx typos in text (Kees Cook) V8: - Change ARCH_SUPPORTS_MSEAL_X to ARCH_SUPPORTS_MSEAL_X (Liam R. Howlett) - Update comments in Kconfig and mseal.rst (Lorenzo Stoakes, Liam R. Howlett) - Change patch header perfix to "mseal sysmap" (Lorenzo Stoakes) - Remove "vm_flags =" (Kees Cook, Liam R. Howlett, Oleg Nesterov) - Drop uml architecture (Lorenzo Stoakes, Kees Cook) - Add a selftest to verify system mappings are sealed (Lorenzo Stoakes) V7: https://lore.kernel.org/all/20250224225246.3712295-1-jeffxu@google.com/ - Remove cover letter from the first patch (Liam R. Howlett) - Change macro name to VM_SEALED_SYSMAP (Liam R. Howlett) - logging and fclose() in selftest (Liam R. Howlett) V6: https://lore.kernel.org/all/20250224174513.3600914-1-jeffxu@google.com/ - mseal.rst: fix a typo (Randy Dunlap) - security/Kconfig: add rr into note (Liam R. Howlett) - remove mseal_system_mappings() and use macro instead (Liam R. Howlett) - mseal.rst: add incompatible userland software (Lorenzo Stoakes) - remove RFC from title (Kees Cook) V5 https://lore.kernel.org/all/20250212032155.1276806-1-jeffxu@google.com/ - Remove kernel cmd line (Lorenzo Stoakes) - Add test info (Lorenzo Stoakes) - Add threat model info (Lorenzo Stoakes) - Fix x86 selftest: test_mremap_vdso - Restrict code change to ARM64/x86-64/UM arch only. - Add userprocess.h to include seal_system_mapping(). - Remove sealing vsyscall. - Split the patch. V4: https://lore.kernel.org/all/20241125202021.3684919-1-jeffxu@google.com/ - ARCH_HAS_SEAL_SYSTEM_MAPPINGS (Lorenzo Stoakes) - test info (Lorenzo Stoakes) - Update mseal.rst (Liam R. Howlett) - Update test_mremap_vdso.c (Liam R. Howlett) - Misc. style, comments, doc update (Liam R. Howlett) V3: https://lore.kernel.org/all/20241113191602.3541870-1-jeffxu@google.com/ - Revert uprobe to v1 logic (Oleg Nesterov) - use CONFIG_SEAL_SYSTEM_MAPPINGS instead of _ALWAYS/_NEVER (Kees Cook) - Move kernel cmd line from fs/exec.c to mm/mseal.c and misc. (Liam R. Howlett) V2: https://lore.kernel.org/all/20241014215022.68530-1-jeffxu@google.com/ - Seal uprobe always (Oleg Nesterov) - Update comments and description (Randy Dunlap, Liam R.Howlett, Oleg Nesterov) - Rebase to linux_main V1: - https://lore.kernel.org/all/20241004163155.3493183-1-jeffxu@google.com/ -------------------------------------------------- Jeff Xu (7): mseal sysmap: kernel config and header change selftests: x86: test_mremap_vdso: skip if vdso is msealed mseal sysmap: enable x86-64 mseal sysmap: enable arm64 mseal sysmap: uprobe mapping mseal sysmap: update mseal.rst selftest: test system mappings are sealed. Documentation/userspace-api/mseal.rst | 20 +++ arch/arm64/Kconfig | 1 + arch/arm64/kernel/vdso.c | 12 +- arch/x86/Kconfig | 1 + arch/x86/entry/vdso/vma.c | 7 +- include/linux/mm.h | 10 ++ init/Kconfig | 22 ++++ kernel/events/uprobes.c | 3 +- security/Kconfig | 21 ++++ tools/testing/selftests/Makefile | 1 + .../mseal_system_mappings/.gitignore | 2 + .../selftests/mseal_system_mappings/Makefile | 6 + .../selftests/mseal_system_mappings/config | 1 + .../mseal_system_mappings/sysmap_is_sealed.c | 119 ++++++++++++++++++ .../testing/selftests/x86/test_mremap_vdso.c | 43 +++++++ 15 files changed, 261 insertions(+), 8 deletions(-) create mode 100644 tools/testing/selftests/mseal_system_mappings/.gitignore create mode 100644 tools/testing/selftests/mseal_system_mappings/Makefile create mode 100644 tools/testing/selftests/mseal_system_mappings/config create mode 100644 tools/testing/selftests/mseal_system_mappings/sysmap_is_sealed.c -- 2.48.1.711.g2feabab25a-goog

7 months, 4 weeks

5
15
0 0

[PATCH 0/2] kselftest/arm64: mte: Minor fixes to the MTE hugetlb test

by Catalin Marinas

The first patch makes use of the correct terminology for synchronous and asynchronous errors. The second patch checks whether PROT_MTE is supported on hugetlb mappings before continuing with the tests. Such support was added in 6.13 but people tend to use current kselftests on older kernels. Avoid the failure reporting on such kernels, just skip the tests. Catalin Marinas (2): kselftest/arm64: mte: Use the correct naming for tag check modes in check_hugetlb_options.c kselftest/arm64: mte: Skip the hugetlb tests if MTE not supported on such mappings .../arm64/mte/check_hugetlb_options.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-)

7 months, 4 weeks

3
6
0 0

Re: [PATCH v8 3/4] scanf: convert self-test to KUnit

by Petr Mladek

On Sat 2025-02-15 14:52:22, Tamir Duberstein wrote: > On Sat, Feb 15, 2025 at 1:51 PM kernel test robot <lkp(a)intel.com> wrote: > > > > Hi Tamir, > > > > kernel test robot noticed the following build warnings: > > > > [auto build test WARNING on 7b7a883c7f4de1ee5040bd1c32aabaafde54d209] > > > > url: > https://github.com/intel-lab-lkp/linux/commits/Tamir-Duberstein/scanf-impli… > > base: 7b7a883c7f4de1ee5040bd1c32aabaafde54d209 > > patch link: > https://lore.kernel.org/r/20250214-scanf-kunit-convert-v8-3-5ea50f95f83c%40… > > patch subject: [PATCH v8 3/4] scanf: convert self-test to KUnit > > config: sh-randconfig-002-20250216 ( > https://download.01.org/0day-ci/archive/20250216/202502160245.KUrryBJR-lkp@… > ) > > compiler: sh4-linux-gcc (GCC) 14.2.0 > > reproduce (this is a W=1 build): ( > https://download.01.org/0day-ci/archive/20250216/202502160245.KUrryBJR-lkp@… > ) > > > > If you fix the issue in a separate patch/commit (i.e. not just a new > version of > > the same patch/commit), kindly add following tags > > | Reported-by: kernel test robot <lkp(a)intel.com> > > | Closes: > https://lore.kernel.org/oe-kbuild-all/202502160245.KUrryBJR-lkp@intel.com/ > > > > All warnings (new ones prefixed by >>): > > > > In file included from <command-line>: > > lib/tests/scanf_kunit.c: In function 'numbers_list_ll': > > >> include/linux/compiler.h:197:61: warning: function 'numbers_list_ll' > might be a candidate for 'gnu_scanf' format attribute > [-Wsuggest-attribute=format] > > I am not able to reproduce these warnings with clang 19.1.7. They also > don't obviously make sense to me. I have reproduced the problem with gcc: $> gcc --version gcc (SUSE Linux) 14.2.1 20250220 [revision 9ffecde121af883b60bbe60d00425036bc873048] $> make W=1 lib/test_scanf.ko CALL scripts/checksyscalls.sh DESCEND objtool INSTALL libsubcmd_headers CC [M] lib/test_scanf.o In file included from <command-line>: lib/test_scanf.c: In function ‘numbers_list_ll’: ./include/linux/compiler.h:197:61: warning: function ‘numbers_list_ll’ might be a candidate for ‘gnu_scanf’ format attribute [-Wsuggest-attribute=format] 197 | #define __BUILD_BUG_ON_ZERO_MSG(e, msg) ((int)sizeof(struct {_Static_assert(!(e), msg);})) | ^ [...] It seems that it is a regression introduced by the first patch of this patch set. And the fix is: diff --git a/lib/test_scanf.c b/lib/test_scanf.c index d1664e0d0138..e65b10c3dc11 100644 --- a/lib/test_scanf.c +++ b/lib/test_scanf.c @@ -27,7 +27,7 @@ static struct rnd_state rnd_state __initdata; typedef int (*check_fn)(const char *file, const int line, const void *check_data, const char *string, const char *fmt, int n_args, va_list ap); -static void __scanf(6, 0) __init +static void __scanf(6, 8) __init _test(const char *file, const int line, check_fn fn, const void *check_data, const char *string, const char *fmt, int n_args, ...) { Best Regards, Petr

7 months, 4 weeks

2
1
0 0

[PATCH 00/32] kselftest harness and nolibc compatibility

by Thomas Weißschuh

Nolibc is useful for selftests as the test programs can be very small, and compiled with just a kernel crosscompiler, without userspace support. Currently nolibc is only usable with kselftest.h, not the more convenient to use kselftest_harness.h This series provides this compatibility by adding new features to nolibc and removing the usage of problematic features from the harness. The first half of the series are changes to the harness, the second one are for nolibc. Both parts are very independent and can go through different trees. The last patch is not meant to be applied and serves as test that everything works correctly. Based on the next branch of the nolibc tree: https://web.git.kernel.org/pub/scm/linux/kernel/git/nolibc/linux-nolibc.git… Signed-off-by: Thomas Weißschuh <thomas.weissschuh(a)linutronix.de> --- Thomas Weißschuh (32): selftests: harness: Add harness selftest selftests: harness: Use C89 comment style selftests: harness: Ignore unused variant argument warning selftests: harness: Mark functions without prototypes static selftests: harness: Remove inline qualifier for wrappers selftests: harness: Guard includes on nolibc selftests: harness: Remove dependency on libatomic selftests: harness: Implement test timeouts through pidfd selftests: harness: Don't set setup_completed for fixtureless tests selftests: harness: Always provide "self" and "variant" selftests: harness: Move teardown conditional into test metadata selftests: harness: Add teardown callback to test metadata selftests: harness: Stop using setjmp()/longjmp() tools/nolibc: handle intmax_t/uintmax_t in printf tools/nolibc: use intmax definitions from compiler tools/nolibc: use pselect6_time64 if available tools/nolibc: use ppoll_time64 if available tools/nolibc: add tolower() and toupper() tools/nolibc: add _exit() tools/nolibc: add setpgrp() tools/nolibc: implement waitpid() in terms of waitid() Revert "selftests/nolibc: use waitid() over waitpid()" tools/nolibc: add dprintf() and vdprintf() tools/nolibc: add getopt() tools/nolibc: allow different write callbacks in printf tools/nolibc: allow limiting of printf destination size tools/nolibc: add snprintf() and friends selftests/nolibc: use snprintf() for printf tests selftests/nolibc: rename vfprintf test suite selftests/nolibc: add test for snprintf() truncation tools/nolibc: implement width padding in printf() HACK: selftests/nolibc: demonstrate usage of the kselftest harness tools/include/nolibc/Makefile | 1 + tools/include/nolibc/getopt.h | 105 ++ tools/include/nolibc/nolibc.h | 1 + tools/include/nolibc/stdint.h | 4 +- tools/include/nolibc/stdio.h | 127 +- tools/include/nolibc/string.h | 17 + tools/include/nolibc/sys.h | 102 +- tools/testing/selftests/Makefile | 1 + tools/testing/selftests/kselftest/.gitignore | 1 + tools/testing/selftests/kselftest/Makefile | 6 + .../testing/selftests/kselftest/harness-selftest.c | 129 ++ .../selftests/kselftest/harness-selftest.expected | 62 + .../selftests/kselftest/harness-selftest.sh | 14 + tools/testing/selftests/kselftest_harness.h | 188 +-- tools/testing/selftests/nolibc/Makefile | 17 +- tools/testing/selftests/nolibc/harness-selftest.c | 1 + tools/testing/selftests/nolibc/nolibc-test.c | 1712 +------------------- tools/testing/selftests/nolibc/run-tests.sh | 2 +- 18 files changed, 639 insertions(+), 1851 deletions(-) --- base-commit: cb839e0cc881b4abd4a2e64cd06c2e313987a189 change-id: 20250130-nolibc-kselftest-harness-8b2c8cac43bf Best regards, -- Thomas Weißschuh <thomas.weissschuh(a)linutronix.de>

7 months, 4 weeks

2
42
0 0

[PATCH rcu 00/10] Miscellaneous RCU changes for v6.15

by Boqun Feng

Hi, Please find the upcoming miscellaneous RCU changes. The changes can also be found at: git://git.kernel.org/pub/scm/linux/kernel/git/rcu/linux.git misc.2025.03.04a Regards, Boqun Paul E. McKenney (6): rcu: Split rcu_report_exp_cpu_mult() mask parameter and use for tracing rcu: Fix get_state_synchronize_rcu_full() GP-start detection rcu-tasks: Move RCU Tasks self-tests to core_initcall() rcu/nocb: Print segment lengths in show_rcu_nocb_gp_state() context_tracking: Make RCU watch ct_kernel_exit_state() warning Flush console log from kernel_power_off() Uladzislau Rezki (Sony) (3): rcutorture: Allow a negative value for nfakewriters rcu: Update TREE05.boot to test normal synchronize_rcu() rcu: Use _full() API to debug synchronize_rcu() Zilin Guan (1): rcu: Remove READ_ONCE() for rdp->gpwrap access in __note_gp_changes() include/linux/printk.h | 6 ++++ include/linux/rcupdate.h | 6 ---- include/linux/rcupdate_wait.h | 3 ++ init/main.c | 1 - kernel/context_tracking.c | 9 +++--- kernel/printk/printk.c | 4 +-- kernel/rcu/rcu.h | 2 +- kernel/rcu/rcutorture.c | 22 ++++++++++---- kernel/rcu/tasks.h | 5 +++- kernel/rcu/tree.c | 29 +++++++++++-------- kernel/rcu/tree_exp.h | 6 ++-- kernel/rcu/tree_nocb.h | 20 +++++++++---- kernel/reboot.c | 1 + .../rcutorture/configs/rcu/TREE05.boot | 6 ++++ 14 files changed, 78 insertions(+), 42 deletions(-) -- 2.48.1

7 months, 4 weeks

1
10
0 0

[PATCH rcu 00/11] Lazy Preempt changes for v6.15

by Boqun Feng

Hi, Please find the upcoming changes for CONFIG_PREEMPT_LAZY in RCU. The changes can also be found at: git://git.kernel.org/pub/scm/linux/kernel/git/rcu/linux.git lazypreempt.2025.02.24a Paul & Ankur, I put patch #7 and #8 (bug fixes in rcutorture) before patch #9 (which is the one that enables non-preemptible RCU in preemptible kernel), because I want to avoid introduce a bug in-between a series, appreciate it if you can double check on this. Thanks! Regards, Boqun Ankur Arora (7): rcu: fix header guard for rcu_all_qs() rcu: rename PREEMPT_AUTO to PREEMPT_LAZY sched: update __cond_resched comment about RCU quiescent states rcu: handle unstable rdp in rcu_read_unlock_strict() rcu: handle quiescent states for PREEMPT_RCU=n, PREEMPT_COUNT=y osnoise: provide quiescent states rcu: limit PREEMPT_RCU configurations Boqun Feng (1): rcutorture: Update ->extendables check for lazy preemption Paul E. McKenney (3): rcutorture: Update rcutorture_one_extend_check() for lazy preemption rcutorture: Make scenario TREE10 build CONFIG_PREEMPT_LAZY=y rcutorture: Make scenario TREE07 build CONFIG_PREEMPT_LAZY=y include/linux/rcupdate.h | 2 +- include/linux/rcutree.h | 2 +- include/linux/srcutiny.h | 2 +- kernel/rcu/Kconfig | 4 +-- kernel/rcu/rcutorture.c | 26 ++++++++++++--- kernel/rcu/srcutiny.c | 14 ++++---- kernel/rcu/tree_plugin.h | 22 ++++++++++--- kernel/sched/core.c | 4 ++- kernel/trace/trace_osnoise.c | 32 +++++++++---------- .../selftests/rcutorture/configs/rcu/TREE07 | 3 +- .../selftests/rcutorture/configs/rcu/TREE10 | 3 +- 11 files changed, 73 insertions(+), 41 deletions(-) -- 2.39.5 (Apple Git-154)

7 months, 4 weeks

4
15
0 0

[PATCH net-next 1/2] selftests: drv-net: use env.rpath in the HDS test

by Jakub Kicinski

Commit 29b036be1b0b ("selftests: drv-net: test XDP, HDS auto and the ioctl path") added a new test case in the net tree, now that this code has made its way to net-next convert it to use the env.rpath() helper instead of manually computing the relative path. Signed-off-by: Jakub Kicinski <kuba(a)kernel.org> --- tools/testing/selftests/drivers/net/hds.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py index 873f5219e41d..7cc74faed743 100755 --- a/tools/testing/selftests/drivers/net/hds.py +++ b/tools/testing/selftests/drivers/net/hds.py @@ -20,8 +20,7 @@ from lib.py import defer, ethtool, ip def _xdp_onoff(cfg): - test_dir = os.path.dirname(os.path.realpath(__file__)) - prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + prog = cfg.rpath("../../net/lib/xdp_dummy.bpf.o") ip("link set dev %s xdp obj %s sec xdp" % (cfg.ifname, prog)) ip("link set dev %s xdp off" % cfg.ifname) -- 2.48.1

7 months, 4 weeks

3
4
0 0