- Linux-kselftest-mirror - lists.linaro.org

[PATCH v2 6/7] selftests: Fix stdbuf compatibility in mixed libc environments

by Aqib Faruqui

The original stdbuf use only checked if /usr/bin/stdbuf exists in the host's system but failed to verify compatibility between stdbuf and the target test binary. The issue occurs when: - Host system has glibc-based stdbuf from coreutils - Selftest binaries are compiled with a non-glibc toolchain (cross compilation) The fix adds a runtime compatibility test against the target test binary before enabling stdbuf, enabling cross-compiled selftests to run successfully. Signed-off-by: Aqib Faruqui <aqibaf(a)amazon.com> --- tools/testing/selftests/kselftest/runner.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh index 2c3c58e65..8d4e33bd5 100644 --- a/tools/testing/selftests/kselftest/runner.sh +++ b/tools/testing/selftests/kselftest/runner.sh @@ -107,7 +107,7 @@ run_one() echo "# Warning: file $TEST is missing!" echo "not ok $test_num $TEST_HDR_MSG" else - if [ -x /usr/bin/stdbuf ]; then + if [ -x /usr/bin/stdbuf ] && [ -x "$TEST" ] && /usr/bin/stdbuf --output=L ldd "$TEST" >/dev/null 2>&1; then stdbuf="/usr/bin/stdbuf --output=L " fi eval kselftest_cmd_args="\$${kselftest_cmd_args_ref:-}" -- 2.47.3

4 months, 1 week

1
0
0 0

[PATCH v2 5/7] rseq: selftests: Add non-glibc compatibility fixes

by Aqib Faruqui

The rseq selftests rely on features provided by glibc that may not be available in non-glibc C libraries: 1. The __GNU_PREREQ macro and glibc's thread pointer implementation are not available in non-glibc libraries 2. The __NR_rseq syscall number may not be defined in non-glibc headers Add a fallback thread pointer implementation for non-glibc systems using the pre-existing inline assembly to access thread-local storage directly via %fs/%gs registers. Also provide a fallback definition for __NR_rseq when not already defined by the C library headers: 527 for alpha and 293 for other architectures. Signed-off-by: Aqib Faruqui <aqibaf(a)amazon.com> --- .../selftests/rseq/rseq-x86-thread-pointer.h | 14 ++++++++++++++ tools/testing/selftests/rseq/rseq.c | 8 ++++++++ 2 files changed, 22 insertions(+) diff --git a/tools/testing/selftests/rseq/rseq-x86-thread-pointer.h b/tools/testing/selftests/rseq/rseq-x86-thread-pointer.h index d3133587d..a7c402926 100644 --- a/tools/testing/selftests/rseq/rseq-x86-thread-pointer.h +++ b/tools/testing/selftests/rseq/rseq-x86-thread-pointer.h @@ -14,6 +14,7 @@ extern "C" { #endif +#ifdef __GLIBC__ #if __GNUC_PREREQ (11, 1) static inline void *rseq_thread_pointer(void) { @@ -32,6 +33,19 @@ static inline void *rseq_thread_pointer(void) return __result; } #endif /* !GCC 11 */ +#else +static inline void *rseq_thread_pointer(void) +{ + void *__result; + +# ifdef __x86_64__ + __asm__ ("mov %%fs:0, %0" : "=r" (__result)); +# else + __asm__ ("mov %%gs:0, %0" : "=r" (__result)); +# endif + return __result; +} +#endif /* !__GLIBC__ */ #ifdef __cplusplus } diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index 663a9cef1..1a6f73c98 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -36,6 +36,14 @@ #include "../kselftest.h" #include "rseq.h" +#ifndef __NR_rseq +#ifdef __alpha__ +#define __NR_rseq 527 +#else +#define __NR_rseq 293 +#endif +#endif + /* * Define weak versions to play nice with binaries that are statically linked * against a libc that doesn't support registering its own rseq. -- 2.47.3

4 months, 1 week

1
0
0 0

[PATCH v2 4/7] KVM: selftests: Add backtrace fallback

by Aqib Faruqui

The backtrace() function is a GNU extension available in glibc but may not be present in non-glibc libraries. KVM selftests use backtrace() for error reporting and debugging. Add conditional inclusion of execinfo.h only for glibc builds and provide a weak stub implementation of backtrace() that returns 0 (stack trace empty) for non-glibc systems. Signed-off-by: Aqib Faruqui <aqibaf(a)amazon.com> --- tools/testing/selftests/kvm/lib/assert.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c index b49690658..c9778dc6c 100644 --- a/tools/testing/selftests/kvm/lib/assert.c +++ b/tools/testing/selftests/kvm/lib/assert.c @@ -6,11 +6,19 @@ */ #include "test_util.h" -#include <execinfo.h> #include <sys/syscall.h> +#ifdef __GLIBC__ +#include <execinfo.h> /* backtrace */ +#endif + #include "kselftest.h" +int __attribute__((weak)) backtrace(void **buffer, int size) +{ + return 0; +} + /* Dumps the current stack trace to stderr. */ static void __attribute__((noinline)) test_dump_stack(void); static void test_dump_stack(void) -- 2.47.3

4 months, 1 week

1
0
0 0

[PATCH v2 3/7] selftests: kselftest: Add memfd_create syscall compatibility

by Aqib Faruqui

The memfd_create function and related MFD_* flags may not be available in non-glibc C libraries. Some selftests use memfd_create for memory backing operations. Add fallback definitions for MFD_CLOEXEC and MFD_HUGETLB flags, and provide a memfd_create wrapper. Signed-off-by: Aqib Faruqui <aqibaf(a)amazon.com> --- tools/testing/selftests/kselftest.h | 19 +++++++++++++++++++ tools/testing/selftests/kvm/lib/kvm_util.c | 1 + 2 files changed, 20 insertions(+) diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index c3b6d2604..f362c6766 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -57,6 +57,7 @@ #include <string.h> #include <stdio.h> #include <sys/utsname.h> +#include <sys/syscall.h> #endif #ifndef ARRAY_SIZE @@ -80,6 +81,24 @@ #endif #endif /* end arch */ +#ifndef MFD_CLOEXEC +#define MFD_CLOEXEC 0x0001U +#endif + +#ifndef MFD_HUGETLB +#define MFD_HUGETLB 0x0004U +#endif + +static inline int memfd_create(const char *name, unsigned int flags) +{ +#ifdef __NR_memfd_create + return syscall(__NR_memfd_create, name, flags); +#else + errno = ENOSYS; + return -1; +#endif +} + /* define kselftest exit codes */ #define KSFT_PASS 0 #define KSFT_FAIL 1 diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index c3f5142b0..a78b64117 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -15,6 +15,7 @@ #include <sys/resource.h> #include <sys/types.h> #include <sys/stat.h> +#include <sys/syscall.h> #include <unistd.h> #include <linux/kernel.h> -- 2.47.3

4 months, 1 week

1
0
0 0

[PATCH v2 2/7] selftests: harness: Include pidfd.h to get syscall definitions from tools/

by Aqib Faruqui

The kselftest harness uses pidfd_open() for test timeout handling but may not have access to the syscall definitions in non-glibc environments. Include pidfd.h to ensure the syscall numbers are available. Signed-off-by: Aqib Faruqui <aqibaf(a)amazon.com> --- tools/testing/selftests/kselftest_harness.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 2925e47db..1dd3e5a1b 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -69,6 +69,7 @@ #include <unistd.h> #include "kselftest.h" +#include "pidfd/pidfd.h" #define TEST_TIMEOUT_DEFAULT 30 -- 2.47.3

4 months, 1 week

1
0
0 0

[PATCH v2 1/7] selftests/pidfd: Add architecture-specific fallback definitions for pidfd_open

by Aqib Faruqui

The pidfd_open syscall number varies by architecture. Add fallback definitions for Alpha (544) and other architectures (434) to ensure compatibility with non-glibc C libraries that may not define these syscall numbers. Signed-off-by: Aqib Faruqui <aqibaf(a)amazon.com> --- tools/testing/selftests/pidfd/pidfd.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index f87993def..c373ff18e 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -45,8 +45,12 @@ #endif #ifndef __NR_pidfd_open +#ifdef __alpha__ +#define __NR_pidfd_open 544 +#else #define __NR_pidfd_open 434 #endif +#endif #ifndef __NR_pidfd_send_signal #define __NR_pidfd_send_signal 424 -- 2.47.3

4 months, 1 week

1
0
0 0

[Patch v2] selftests/mm: check content to see whether mremap corrupt data

by Wei Yang

After mremap(), add a check on content to see whether mremap corrupt data. Signed-off-by: Wei Yang <richard.weiyang(a)gmail.com> --- v2: add check on content instead of just test backed folio --- tools/testing/selftests/mm/split_huge_page_test.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c index 10ae65ea032f..229b6dcabece 100644 --- a/tools/testing/selftests/mm/split_huge_page_test.c +++ b/tools/testing/selftests/mm/split_huge_page_test.c @@ -423,10 +423,14 @@ static void split_pte_mapped_thp(void) /* smap does not show THPs after mremap, use kpageflags instead */ thp_size = 0; - for (i = 0; i < pagesize * 4; i++) + for (i = 0; i < pagesize * 4; i++) { + if (pte_mapped[i] != (char)i) + ksft_exit_fail_msg("%ld byte corrupted\n", i); + if (i % pagesize == 0 && is_backed_by_folio(&pte_mapped[i], pmd_order, pagemap_fd, kpageflags_fd)) thp_size++; + } if (thp_size != 4) ksft_exit_fail_msg("Some THPs are missing during mremap\n"); -- 2.34.1

4 months, 1 week

4
22
0 0

[PATCH net-next v5] selftests: net: add test for ipv6 fragmentation

by Brett A C Sheffield

Add selftest for the IPv6 fragmentation regression which affected several stable kernels. Commit a18dfa9925b9 ("ipv6: save dontfrag in cork") was backported to stable without some prerequisite commits. This caused a regression when sending IPv6 UDP packets by preventing fragmentation and instead returning -1 (EMSGSIZE). Add selftest to check for this issue by attempting to send a packet larger than the interface MTU. The packet will be fragmented on a working kernel, with sendmsg(2) correctly returning the expected number of bytes sent. When the regression is present, sendmsg returns -1 and sets errno to EMSGSIZE. Link: https://lore.kernel.org/stable/aElivdUXqd1OqgMY@karahi.gladserv.com Signed-off-by: Brett A C Sheffield <bacs(a)librecast.net> Reviewed-by: Willem de Bruijn <willemb(a)google.com> --- Thanks for the reviews Willem and Jakub. On 2025-09-01 09:45, Willem de Bruijn wrote: > > +int main(void) > > +{ > > + struct in6_addr addr = { > > + .s6_addr[15] = 0x01, /* ::1 */ > > + }; > > + struct sockaddr_in6 sa = { > > + .sin6_family = AF_INET6, > > + .sin6_addr = addr, > > + .sin6_port = 9 /* port 9/udp (DISCARD) */ > > htons addr is already initialized in network byte order (BE) here. Verified with: char ip6[INET6_ADDRSTRLEN]; inet_ntop(AF_INET6, &(sa.sin6_addr), ip6, INET6_ADDRSTRLEN); printf("The address is %s\n", ip6); which prints "The address is ::1" All other suggestions adopted in v5. v5 changes: - disable_dad: delete - not needed for lo - main: simplify failure paths - main: char -> static char buf - setup: remove pointless return value - setup: remove unused variable fd - setup: merge with interface_up() to simplify - setup: check all system call return values - remove no longer used headers v4 changes: - fix "else should follow close brace" (checkpatch ERROR) v3 changes: - add usleep instead of busy polling on sendmsg - simplify error handling by using error() and leaving cleanup to O/S - use loopback interface - don't bother creating TAP - send to localhost (::1) v2 changes: - remove superfluous namespace calls - unshare(2) suffices - remove usleep(). Don't wait for the interface to be ready, just send, and handle the (less likely) error case by retrying. - set destination address only once - document our use of the IPv6 link-local source address - send to port 9 (DISCARD) instead of 4242 (DONT PANIC) - ensure sockets are closed on failure paths - use KSFT exit codes for clarity v4: https://lore.kernel.org/netdev/20250901123757.13112-1-bacs@librecast.net v3: https://lore.kernel.org/netdev/20250901112248.5218-1-bacs@librecast.net v2: https://lore.kernel.org/netdev/20250831102908.14655-1-bacs@librecast.net v1: https://lore.kernel.org/netdev/20250825092548.4436-3-bacs@librecast.net tools/testing/selftests/net/.gitignore | 1 + tools/testing/selftests/net/Makefile | 1 + .../selftests/net/ipv6_fragmentation.c | 115 ++++++++++++++++++ 3 files changed, 117 insertions(+) create mode 100644 tools/testing/selftests/net/ipv6_fragmentation.c diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 47c293c2962f..3d4b4a53dfda 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -16,6 +16,7 @@ ip_local_port_range ipsec ipv6_flowlabel ipv6_flowlabel_mgr +ipv6_fragmentation log.txt msg_oob msg_zerocopy diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index eef0b8f8a7b0..276e0481d996 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -117,6 +117,7 @@ TEST_GEN_FILES += tfo TEST_PROGS += tfo_passive.sh TEST_PROGS += broadcast_pmtu.sh TEST_PROGS += ipv6_force_forwarding.sh +TEST_GEN_PROGS += ipv6_fragmentation TEST_PROGS += route_hint.sh # YNL files, must be before "include ..lib.mk" diff --git a/tools/testing/selftests/net/ipv6_fragmentation.c b/tools/testing/selftests/net/ipv6_fragmentation.c new file mode 100644 index 000000000000..b76ce7b713fc --- /dev/null +++ b/tools/testing/selftests/net/ipv6_fragmentation.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Brett A C Sheffield <bacs(a)librecast.net> + * + * Kernel selftest for the IPv6 fragmentation regression which affected stable + * kernels: + * + * https://lore.kernel.org/stable/aElivdUXqd1OqgMY@karahi.gladserv.com + * + * Commit: a18dfa9925b9 ("ipv6: save dontfrag in cork") was backported to stable + * without some prerequisite commits. + * + * This caused a regression when sending IPv6 UDP packets by preventing + * fragmentation and instead returning -1 (EMSGSIZE). + * + * This selftest demonstrates the issue by sending an IPv6 UDP packet to + * localhost (::1) on the loopback interface from the autoconfigured link-local + * address. + * + * sendmsg(2) returns bytes sent correctly on a working kernel, and returns -1 + * (EMSGSIZE) when the regression is present. + * + * The regression was not present in the mainline kernel, but add this test to + * catch similar breakage in future. + */ + +#define _GNU_SOURCE + +#include <error.h> +#include <net/if.h> +#include <netinet/in.h> +#include <sched.h> +#include <stdio.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <unistd.h> +#include "../kselftest.h" + +#define MTU 1500 +#define LARGER_THAN_MTU 8192 + +static void setup(void) +{ + struct ifreq ifr = { + .ifr_name = "lo" + }; + int ctl; + + /* we need to set MTU, so do this in a namespace to play nicely */ + if (unshare(CLONE_NEWNET) == -1) + error(KSFT_FAIL, errno, "unshare"); + + ctl = socket(AF_LOCAL, SOCK_STREAM, 0); + if (ctl == -1) + error(KSFT_FAIL, errno, "socket"); + + /* ensure MTU is smaller than what we plan to send */ + ifr.ifr_mtu = MTU; + if (ioctl(ctl, SIOCSIFMTU, &ifr) == -1) + error(KSFT_FAIL, errno, "ioctl: set MTU"); + + /* bring up interface */ + if (ioctl(ctl, SIOCGIFFLAGS, &ifr) == -1) + error(KSFT_FAIL, errno, "ioctl SIOCGIFFLAGS"); + ifr.ifr_flags = ifr.ifr_flags | IFF_UP; + if (ioctl(ctl, SIOCSIFFLAGS, &ifr) == -1) + error(KSFT_FAIL, errno, "ioctl: bring interface up"); + + if (close(ctl) == -1) + error(KSFT_FAIL, errno, "close"); +} + +int main(void) +{ + struct in6_addr addr = { + .s6_addr[15] = 0x01, /* ::1 */ + }; + struct sockaddr_in6 sa = { + .sin6_family = AF_INET6, + .sin6_addr = addr, + .sin6_port = 9 /* port 9/udp (DISCARD) */ + }; + static char buf[LARGER_THAN_MTU] = {0}; + struct iovec iov = { .iov_base = buf, .iov_len = sizeof(buf) }; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_name = (struct sockaddr *)&sa, + .msg_namelen = sizeof(sa), + }; + ssize_t rc; + int err = KSFT_FAIL; + int s; + + printf("Testing IPv6 fragmentation\n"); + setup(); + s = socket(AF_INET6, SOCK_DGRAM, 0); +send_again: + rc = sendmsg(s, &msg, 0); + if (rc == -1) { + /* if interface wasn't ready, try again */ + if (errno == EADDRNOTAVAIL) { + usleep(1000); + goto send_again; + } + error(KSFT_FAIL, errno, "sendmsg"); + } else if (rc != LARGER_THAN_MTU) { + error(KSFT_FAIL, errno, "sendmsg returned %zi, expected %i", + rc, LARGER_THAN_MTU); + } + printf("[PASS] sendmsg() returned %zi\n", rc); + err = KSFT_PASS; + close(s); + return err; +} base-commit: cd8a4cfa6bb43a441901e82f5c222dddc75a18a3 -- 2.49.1

4 months, 1 week

2
1
0 0

[PATCH bpf-next v2 00/14] selftests/bpf: Integrate test_xsk.c to test_progs framework

by Bastien Curutchet (eBPF Foundation)

Hi all, This is a second version of a series I sent some time ago, it continues the work of migrating the script tests into prog_tests. The test_xsk.sh script covers many AF_XDP use cases. The tests it runs are defined in xksxceiver.c. Since this script is used to test real hardware, the goal here is to leave it as it is, and only integrate the tests that run on veth peers into the test_progs framework. Some tests are flaky so they can't be integrated in the CI as they are. I think that fixing their flakyness would require a significant amount of work. So, as first step, I've excluded them from the list of tests migrated to the CI (see PATCH 13). If these tests get fixed at some point, integrating them into the CI will be straightforward. PATCH 1 extracts test_xsk[.c/.h] from xskxceiver[.c/.h] to make the tests available to test_progs. PATCH 2 to 5 fix small issues in the current test PATCH 7 to 12 handle all errors to release resources instead of calling exit() when any error occurs. PATCH 13 isolates some flaky tests PATCH 14 integrate the non-flaky tests to the test_progs framework Maciej, I've fixed the bug you found in the initial series. I've looked for any hardware able to run test_xsk.sh in my office, but I couldn't find one ... So here again, only the veth part has been tested, sorry about that. Signed-off-by: Bastien Curutchet (eBPF Foundation) <bastien.curutchet(a)bootlin.com> --- Changes in v2: - Rebase on the latest bpf-next_base and integrate the newly added tests to the work (adjust_tail* and tx_queue_consumer tests) - Re-order patches to split xkxceiver sooner. - Fix the bug reported by Maciej. - Fix verbose mode in test_xsk.sh by keeping kselftest (remove PATCH 1, 7 and 8) - Link to v1: https://lore.kernel.org/r/20250313-xsk-v1-0-7374729a93b9@bootlin.com --- Bastien Curutchet (eBPF Foundation) (14): selftests/bpf: test_xsk: Split xskxceiver selftests/bpf: test_xsk: Initialize bitmap before use selftests/bpf: test_xsk: Fix memory leaks selftests/bpf: test_xsk: Wrap test clean-up in functions selftests/bpf: test_xsk: Release resources when swap fails selftests/bpf: test_xsk: Add return value to init_iface() selftests/bpf: test_xsk: Don't exit immediately when xsk_attach fails selftests/bpf: test_xsk: Don't exit immediately when gettimeofday fails selftests/bpf: test_xsk: Don't exit immediately when workers fail selftests/bpf: test_xsk: Don't exit immediately if validate_traffic fails selftests/bpf: test_xsk: Don't exit immediately on allocation failures selftests/bpf: test_xsk: Move exit_with_error to xskxceiver.c selftests/bpf: test_xsk: Isolate flaky tests selftests/bpf: test_xsk: Integrate test_xsk.c to test_progs framework tools/testing/selftests/bpf/Makefile | 11 +- tools/testing/selftests/bpf/prog_tests/test_xsk.c | 2616 ++++++++++++++++++++ tools/testing/selftests/bpf/prog_tests/test_xsk.h | 294 +++ tools/testing/selftests/bpf/prog_tests/xsk.c | 146 ++ tools/testing/selftests/bpf/xskxceiver.c | 2698 +-------------------- tools/testing/selftests/bpf/xskxceiver.h | 156 -- 6 files changed, 3183 insertions(+), 2738 deletions(-) --- base-commit: 1e6c91221f429972767f073295e2dd0d372520e7 change-id: 20250218-xsk-0cf90e975d14 Best regards, -- Bastien Curutchet (eBPF Foundation) <bastien.curutchet(a)bootlin.com>

4 months, 1 week

1
14
0 0

[PATCH v15 net-next 00/14] AccECN protocol patch series

by chia-yu.chang＠nokia-bell-labs.com

From: Chia-Yu Chang <chia-yu.chang(a)nokia-bell-labs.com> Hello, Please find the v15 AccECN protocol patch series, which covers the core functionality of Accurate ECN, AccECN negotiation, AccECN TCP options, and AccECN failure handling. The Accurate ECN draft can be found in https://datatracker.ietf.org/doc/html/draft-ietf-tcpm-accurate-ecn-28, and it will become RFC9768. This patch series is part of the full AccECN patch series, which is available at https://github.com/L4STeam/linux-net-next/commits/upstream_l4steam/ Best Regards, Chia-Yu --- v15 (14-Aug-205) - Update pahole results in commit messages - Accurate ECN will become RFC9768 v14 (22-Jul-2025) - Add missing const for struct tcp_sock of tcp_accecn_option_beacon_check() of #11 (Simon Horman <horms(a)kernel.org>) v13 (18-Jul-2025) - Implement tcp_accecn_extract_syn_ect() and tcp_accecn_reflector_flags() with static array lookup of patch #6 (Paolo Abeni <pabeni(a)redhat.com>) - Fix typos in comments of #6 and remove patch #7 of v12 about simulatenous connect (Paolo Abeni <pabeni(a)redhat.com>) - Move TCP_ACCECN_E1B_INIT_OFFSET, TCP_ACCECN_E0B_INIT_OFFSET, and TCP_ACCECN_CEB_INIT_OFFSET from patch #7 to #11 (Paolo Abeni <pabeni(a)redhat.com>) - Use static array lookup in tcp_accecn_optfield_to_ecnfield() of patch #11 (Paolo Abeni <pabeni(a)redhat.com>) - Return false when WARN_ON_ONCE() is true in tcp_accecn_process_option() of patch #11 (Paolo Abeni <pabeni(a)redhat.com>) - Make synack_ecn_bytes as static const array and use const u32 pointer in tcp_options_write() of #11 (Paolo Abeni <pabeni(a)redhat.com>) - Use ALIGN() and ALIGN_DOWN() in tcp_options_fit_accecn() to pad TCP AccECN option to dword of #11 (Paolo Abeni <pabeni(a)redhat.com>) - Return TCP_ACCECN_OPT_FAIL_SEEN if WARN_ON_ONCE() is true in tcp_accecn_option_init() of #12 (Paolo Abeni <pabeni(a)redhat.com>) v12 (04-Jul-2025) - Fix compilation issues with some intermediate patches in v11 - Add more comments for AccECN helpers of tcp_ecn.h v11 (03-Jul-2025) - Fix compilation issues with some intermediate patches in v10 v10 (02-Jul-2025) - Add new patch of separated header file include/net/tcp_ecn.h to include ECN and AccECN functions (Eric Dumazet <edumazet(a)google.com>) - Add comments on the AccECN helper functions in tcp_ecn.h (Eric Dumazet <edumazet(a)google.com>) - Add documentation of tcp_ecn, tcp_ecn_option, tcp_ecn_beacon in ip-sysctl.rst to the corresponding patch (Eric Dumazet <edumazet(a)google.com>) - Split wait third ACK functionality into a separated patch from AccECN negotiation patch (Eric Dumazet <edumazet(a)google.com>) - Add READ_ONCE() over every reads of sysctl for all patches in the series (Eric Dumazet <edumazet(a)google.com>) - Merge heuristics of AccECN option ceb/cep and ACE field multi-wrap into a single patch - Add a table of SACK block reduction and required AccECN field in patch #15 commit message (Eric Dumazet <edumazet(a)google.com>) v9 (21-Jun-2025) - Use tcp_data_ecn_check() to set TCP_ECN_SEE flag only for RFC3168 ECN (Paolo Abeni <pabeni(a)redhat.com>) - Add comments about setting TCP_ECN_SEEN flag for RFC3168 and Accruate ECN (Paolo Abeni <pabeni(a)redhat.com>) - Restruct the code in the for loop of tcp_accecn_process_option() (Paolo Abeni <pabeni(a)redhat.com>) - Remove ecn_bytes and add use_synack_ecn_bytes flag to identify whether syn_ack_bytes or received_ecn_bytes is used (Paolo Abeni <pabeni(a)redhat.com>) - Replace leftover_bytes and leftover_size with leftover_highbyte and leftover_lowbyte and add comments in tcp_options_write() (Paolo Abeni <pabeni(a)redhat.com>) - Add comments and commit message about the 1st retx SYN still attempt AccECN negotiation (Paolo Abeni <pabeni(a)redhat.com>) v8 (10-Jun-2025) - Add new helper function tcp_ecn_received_counters_payload() in #6 (Paolo Abeni <pabeni(a)redhat.com>) - Set opts->num_sack_blocks=0 to avoid potential undefined value in #8 (Paolo Abeni <pabeni(a)redhat.com>) - Reset leftover_size to 2 once leftover_bytes is used in #9 (Paolo Abeni <pabeni(a)redhat.com>) - Add new helper function tcp_accecn_opt_demand_min() in #10 (Paolo Abeni <pabeni(a)redhat.com>) - Add new helper function tcp_accecn_saw_opt_fail_recv() in #11 (Paolo Abeni <pabeni(a)redhat.com>) - Update tcp_options_fit_accecn() to avoid using recursion in #14 (Paolo Abeni <pabeni(a)redhat.com>) v7 (14-May-2025) - Modify group sizes of tcp_sock_write_txrx and tcp_sock_write_rx in #3 based on pahole results (Paolo Abeni <pabeni(a)redhat.com>) - Fix the issue in #4 and #5 where the RFC3168 ECN behavior in tcp_ecn_send() is changed (Paolo Abeni <pabeni(a)redhat.com>) - Modify group size of tcp_sock_write_txrx in #4 and #6 based on pahole results (Paolo Abeni <pabeni(a)redhat.com>) - Update commit message for #9 to explain the increase in tcp_sock_write_rx group size - Modify group size of tcp_sock_write_tx in #10 based on pahole results v6 (09-May-2025) - Add #3 to utilize exisintg holes of tcp_sock_write_txrx group for later patches (#4, #9, #10) with new u8 members (Paolo Abeni <pabeni(a)redhat.com>) - Add pahole outcomes before and after commit in #4, #5, #6, #9, #10, #15 (Paolo Abeni <pabeni(a)redhat.com>) - Define new helper function tcp_send_ack_reflect_ect() for sending ACK with reflected ECT in #5 (Paolo Abeni <pabeni(a)redhat.com>) - Add comments for function tcp_ecn_rcv_synack() in #5 (Paolo Abeni <pabeni(a)redhat.com>) - Add enum/define to be used by sysctl_tcp_ecn in #5, sysctl_tcp_ecn_option in #9, and sysctl_tcp_ecn_option_beacon in #10 (Paolo Abeni <pabeni(a)redhat.com>) - Move accecn_fail_mode and saw_accecn_opt in #5 and #11 to use exisintg holes of tcp_sock (Paolo Abeni <pabeni(a)redhat.com>) - Change data type of new members of tcp_request_sock and move them to the end of struct in #5 and #11 (Paolo Abeni <pabeni(a)redhat.com>) - Move new members of tcp_info to the end of struct in #6 (Paolo Abeni <pabeni(a)redhat.com>) - Merge previous #7 into #9 (Paolo Abeni <pabeni(a)redhat.com>) - Mask ecnfield with INET_ECN_MASK to remove WARN_ONCE in #9 (Paolo Abeni <pabeni(a)redhat.com>) - Reduce the indentation levels for reabability in #9 and #10 (Paolo Abeni <pabeni(a)redhat.com>) - Move delivered_ecn_bytes to the RX group in #9, accecn_opt_tstamp to the TX group in #10, pkts_acked_ewma to the RX group in #15 (Paolo Abeni <pabeni(a)redhat.com>) - Add changes in Documentation/networking/net_cachelines/tcp_sock.rst for new tcp_sock members in #3, #5, #6, #9, #10, #15 v5 (22-Apr-2025) - Further fix for 32-bit ARM alignment in tcp.c (Simon Horman <horms(a)kernel.org>) v4 (18-Apr-2025) - Fix 32-bit ARM assertion for alignment requirement (Simon Horman <horms(a)kernel.org>) v3 (14-Apr-2025) - Fix patch apply issue in v2 (Jakub Kicinski <kuba(a)kernel.org>) v2 (18-Mar-2025) - Add one missing patch from the previous AccECN protocol preparation patch series to this patch series. --- Chia-Yu Chang (5): tcp: reorganize tcp_sock_write_txrx group for variables later tcp: ecn functions in separated include file tcp: accecn: AccECN option send control tcp: accecn: AccECN option failure handling tcp: accecn: try to fit AccECN option with SACK Ilpo Järvinen (9): tcp: reorganize SYN ECN code tcp: fast path functions later tcp: AccECN core tcp: accecn: AccECN negotiation tcp: accecn: add AccECN rx byte counters tcp: accecn: AccECN needs to know delivered bytes tcp: sack option handling improvements tcp: accecn: AccECN option tcp: accecn: AccECN option ceb/cep and ACE field multi-wrap heuristics Documentation/networking/ip-sysctl.rst | 55 +- .../networking/net_cachelines/tcp_sock.rst | 12 + include/linux/tcp.h | 32 +- include/net/netns/ipv4.h | 2 + include/net/tcp.h | 87 ++- include/net/tcp_ecn.h | 649 ++++++++++++++++++ include/uapi/linux/tcp.h | 7 + net/ipv4/syncookies.c | 4 + net/ipv4/sysctl_net_ipv4.c | 19 + net/ipv4/tcp.c | 28 +- net/ipv4/tcp_input.c | 353 ++++++++-- net/ipv4/tcp_ipv4.c | 8 +- net/ipv4/tcp_minisocks.c | 40 +- net/ipv4/tcp_output.c | 294 ++++++-- net/ipv6/syncookies.c | 2 + net/ipv6/tcp_ipv6.c | 1 + 16 files changed, 1409 insertions(+), 184 deletions(-) create mode 100644 include/net/tcp_ecn.h -- 2.34.1

4 months, 1 week

4
29
0 0

[PATCH bpf-next v2 2/2] selftests/bpf: Test kfunc bpf_strcasecmp

by Rong Tao

From: Rong Tao <rongtao(a)cestc.cn> Add testsuites for kfunc bpf_strcasecmp. Signed-off-by: Rong Tao <rongtao(a)cestc.cn> --- tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c | 6 ++++++ tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c | 1 + tools/testing/selftests/bpf/progs/string_kfuncs_success.c | 5 +++++ 3 files changed, 12 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c index 53af438bd998..99d72c68f76a 100644 --- a/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c +++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c @@ -31,6 +31,8 @@ char *invalid_kern_ptr = (char *)-1; /* Passing NULL to string kfuncs (treated as a userspace ptr) */ SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_null1(void *ctx) { return bpf_strcmp(NULL, "hello"); } SEC("syscall") __retval(USER_PTR_ERR)int test_strcmp_null2(void *ctx) { return bpf_strcmp("hello", NULL); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_null1(void *ctx) { return bpf_strcasecmp(NULL, "HELLO"); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strcasecmp_null2(void *ctx) { return bpf_strcasecmp("HELLO", NULL); } SEC("syscall") __retval(USER_PTR_ERR)int test_strchr_null(void *ctx) { return bpf_strchr(NULL, 'a'); } SEC("syscall") __retval(USER_PTR_ERR)int test_strchrnul_null(void *ctx) { return bpf_strchrnul(NULL, 'a'); } SEC("syscall") __retval(USER_PTR_ERR)int test_strnchr_null(void *ctx) { return bpf_strnchr(NULL, 1, 'a'); } @@ -49,6 +51,8 @@ SEC("syscall") __retval(USER_PTR_ERR)int test_strnstr_null2(void *ctx) { return /* Passing userspace ptr to string kfuncs */ SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr1(void *ctx) { return bpf_strcmp(user_ptr, "hello"); } SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr2(void *ctx) { return bpf_strcmp("hello", user_ptr); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_user_ptr1(void *ctx) { return bpf_strcasecmp(user_ptr, "HELLO"); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_user_ptr2(void *ctx) { return bpf_strcasecmp("HELLO", user_ptr); } SEC("syscall") __retval(USER_PTR_ERR) int test_strchr_user_ptr(void *ctx) { return bpf_strchr(user_ptr, 'a'); } SEC("syscall") __retval(USER_PTR_ERR) int test_strchrnul_user_ptr(void *ctx) { return bpf_strchrnul(user_ptr, 'a'); } SEC("syscall") __retval(USER_PTR_ERR) int test_strnchr_user_ptr(void *ctx) { return bpf_strnchr(user_ptr, 1, 'a'); } @@ -69,6 +73,8 @@ SEC("syscall") __retval(USER_PTR_ERR) int test_strnstr_user_ptr2(void *ctx) { re /* Passing invalid kernel ptr to string kfuncs should always return -EFAULT */ SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault1(void *ctx) { return bpf_strcmp(invalid_kern_ptr, "hello"); } SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault2(void *ctx) { return bpf_strcmp("hello", invalid_kern_ptr); } +SEC("syscall") __retval(-EFAULT) int test_strcasecmp_pagefault1(void *ctx) { return bpf_strcasecmp(invalid_kern_ptr, "HELLO"); } +SEC("syscall") __retval(-EFAULT) int test_strcasecmp_pagefault2(void *ctx) { return bpf_strcasecmp("HELLO", invalid_kern_ptr); } SEC("syscall") __retval(-EFAULT) int test_strchr_pagefault(void *ctx) { return bpf_strchr(invalid_kern_ptr, 'a'); } SEC("syscall") __retval(-EFAULT) int test_strchrnul_pagefault(void *ctx) { return bpf_strchrnul(invalid_kern_ptr, 'a'); } SEC("syscall") __retval(-EFAULT) int test_strnchr_pagefault(void *ctx) { return bpf_strnchr(invalid_kern_ptr, 1, 'a'); } diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c index 89fb4669b0e9..e41cc5601994 100644 --- a/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c +++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c @@ -7,6 +7,7 @@ char long_str[XATTR_SIZE_MAX + 1]; SEC("syscall") int test_strcmp_too_long(void *ctx) { return bpf_strcmp(long_str, long_str); } +SEC("syscall") int test_strcasecmp_too_long(void *ctx) { return bpf_strcasecmp(long_str, long_str); } SEC("syscall") int test_strchr_too_long(void *ctx) { return bpf_strchr(long_str, 'b'); } SEC("syscall") int test_strchrnul_too_long(void *ctx) { return bpf_strchrnul(long_str, 'b'); } SEC("syscall") int test_strnchr_too_long(void *ctx) { return bpf_strnchr(long_str, sizeof(long_str), 'b'); } diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c index 46697f381878..67830456637b 100644 --- a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c +++ b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c @@ -12,6 +12,11 @@ char str[] = "hello world"; /* Functional tests */ __test(0) int test_strcmp_eq(void *ctx) { return bpf_strcmp(str, "hello world"); } __test(1) int test_strcmp_neq(void *ctx) { return bpf_strcmp(str, "hello"); } +__test(0) int test_strcasecmp_eq1(void *ctx) { return bpf_strcasecmp(str, "hello world"); } +__test(0) int test_strcasecmp_eq2(void *ctx) { return bpf_strcasecmp(str, "HELLO WORLD"); } +__test(0) int test_strcasecmp_eq3(void *ctx) { return bpf_strcasecmp(str, "HELLO world"); } +__test(1) int test_strcasecmp_neq1(void *ctx) { return bpf_strcasecmp(str, "hello"); } +__test(1) int test_strcasecmp_neq2(void *ctx) { return bpf_strcasecmp(str, "HELLO"); } __test(1) int test_strchr_found(void *ctx) { return bpf_strchr(str, 'e'); } __test(11) int test_strchr_null(void *ctx) { return bpf_strchr(str, '\0'); } __test(-ENOENT) int test_strchr_notfound(void *ctx) { return bpf_strchr(str, 'x'); } -- 2.51.0

4 months, 1 week

2
2
0 0

[PATCH bpf-next v3 2/2] selftests/bpf: Test kfunc bpf_strcasecmp

by Rong Tao

From: Rong Tao <rongtao(a)cestc.cn> Add testsuites for kfunc bpf_strcasecmp. Signed-off-by: Rong Tao <rongtao(a)cestc.cn> --- tools/testing/selftests/bpf/prog_tests/string_kfuncs.c | 1 + tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c | 6 ++++++ tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c | 1 + tools/testing/selftests/bpf/progs/string_kfuncs_success.c | 5 +++++ 4 files changed, 13 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c index 35af8044d059..4d66fad3c8bd 100644 --- a/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c +++ b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c @@ -8,6 +8,7 @@ static const char * const test_cases[] = { "strcmp", + "strcasecmp", "strchr", "strchrnul", "strnchr", diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c index 53af438bd998..99d72c68f76a 100644 --- a/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c +++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c @@ -31,6 +31,8 @@ char *invalid_kern_ptr = (char *)-1; /* Passing NULL to string kfuncs (treated as a userspace ptr) */ SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_null1(void *ctx) { return bpf_strcmp(NULL, "hello"); } SEC("syscall") __retval(USER_PTR_ERR)int test_strcmp_null2(void *ctx) { return bpf_strcmp("hello", NULL); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_null1(void *ctx) { return bpf_strcasecmp(NULL, "HELLO"); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strcasecmp_null2(void *ctx) { return bpf_strcasecmp("HELLO", NULL); } SEC("syscall") __retval(USER_PTR_ERR)int test_strchr_null(void *ctx) { return bpf_strchr(NULL, 'a'); } SEC("syscall") __retval(USER_PTR_ERR)int test_strchrnul_null(void *ctx) { return bpf_strchrnul(NULL, 'a'); } SEC("syscall") __retval(USER_PTR_ERR)int test_strnchr_null(void *ctx) { return bpf_strnchr(NULL, 1, 'a'); } @@ -49,6 +51,8 @@ SEC("syscall") __retval(USER_PTR_ERR)int test_strnstr_null2(void *ctx) { return /* Passing userspace ptr to string kfuncs */ SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr1(void *ctx) { return bpf_strcmp(user_ptr, "hello"); } SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr2(void *ctx) { return bpf_strcmp("hello", user_ptr); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_user_ptr1(void *ctx) { return bpf_strcasecmp(user_ptr, "HELLO"); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_user_ptr2(void *ctx) { return bpf_strcasecmp("HELLO", user_ptr); } SEC("syscall") __retval(USER_PTR_ERR) int test_strchr_user_ptr(void *ctx) { return bpf_strchr(user_ptr, 'a'); } SEC("syscall") __retval(USER_PTR_ERR) int test_strchrnul_user_ptr(void *ctx) { return bpf_strchrnul(user_ptr, 'a'); } SEC("syscall") __retval(USER_PTR_ERR) int test_strnchr_user_ptr(void *ctx) { return bpf_strnchr(user_ptr, 1, 'a'); } @@ -69,6 +73,8 @@ SEC("syscall") __retval(USER_PTR_ERR) int test_strnstr_user_ptr2(void *ctx) { re /* Passing invalid kernel ptr to string kfuncs should always return -EFAULT */ SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault1(void *ctx) { return bpf_strcmp(invalid_kern_ptr, "hello"); } SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault2(void *ctx) { return bpf_strcmp("hello", invalid_kern_ptr); } +SEC("syscall") __retval(-EFAULT) int test_strcasecmp_pagefault1(void *ctx) { return bpf_strcasecmp(invalid_kern_ptr, "HELLO"); } +SEC("syscall") __retval(-EFAULT) int test_strcasecmp_pagefault2(void *ctx) { return bpf_strcasecmp("HELLO", invalid_kern_ptr); } SEC("syscall") __retval(-EFAULT) int test_strchr_pagefault(void *ctx) { return bpf_strchr(invalid_kern_ptr, 'a'); } SEC("syscall") __retval(-EFAULT) int test_strchrnul_pagefault(void *ctx) { return bpf_strchrnul(invalid_kern_ptr, 'a'); } SEC("syscall") __retval(-EFAULT) int test_strnchr_pagefault(void *ctx) { return bpf_strnchr(invalid_kern_ptr, 1, 'a'); } diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c index 89fb4669b0e9..e41cc5601994 100644 --- a/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c +++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c @@ -7,6 +7,7 @@ char long_str[XATTR_SIZE_MAX + 1]; SEC("syscall") int test_strcmp_too_long(void *ctx) { return bpf_strcmp(long_str, long_str); } +SEC("syscall") int test_strcasecmp_too_long(void *ctx) { return bpf_strcasecmp(long_str, long_str); } SEC("syscall") int test_strchr_too_long(void *ctx) { return bpf_strchr(long_str, 'b'); } SEC("syscall") int test_strchrnul_too_long(void *ctx) { return bpf_strchrnul(long_str, 'b'); } SEC("syscall") int test_strnchr_too_long(void *ctx) { return bpf_strnchr(long_str, sizeof(long_str), 'b'); } diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c index 46697f381878..67830456637b 100644 --- a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c +++ b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c @@ -12,6 +12,11 @@ char str[] = "hello world"; /* Functional tests */ __test(0) int test_strcmp_eq(void *ctx) { return bpf_strcmp(str, "hello world"); } __test(1) int test_strcmp_neq(void *ctx) { return bpf_strcmp(str, "hello"); } +__test(0) int test_strcasecmp_eq1(void *ctx) { return bpf_strcasecmp(str, "hello world"); } +__test(0) int test_strcasecmp_eq2(void *ctx) { return bpf_strcasecmp(str, "HELLO WORLD"); } +__test(0) int test_strcasecmp_eq3(void *ctx) { return bpf_strcasecmp(str, "HELLO world"); } +__test(1) int test_strcasecmp_neq1(void *ctx) { return bpf_strcasecmp(str, "hello"); } +__test(1) int test_strcasecmp_neq2(void *ctx) { return bpf_strcasecmp(str, "HELLO"); } __test(1) int test_strchr_found(void *ctx) { return bpf_strchr(str, 'e'); } __test(11) int test_strchr_null(void *ctx) { return bpf_strchr(str, '\0'); } __test(-ENOENT) int test_strchr_notfound(void *ctx) { return bpf_strchr(str, 'x'); } -- 2.51.0

4 months, 1 week

1
0
0 0

[PATCH bpf-next v3 0/2] bpf: Add kfunc bpf_strcasecmp()

by Rong Tao

Kfunc already support bpf_strcmp, this patchset introduce bpf_strcasecmp and add some selftests. Rong Tao (2): bpf: add bpf_strcasecmp kfunc selftests/bpf: Test kfunc bpf_strcasecmp kernel/bpf/helpers.c | 68 +++++++++++++------ .../selftests/bpf/prog_tests/string_kfuncs.c | 1 + .../bpf/progs/string_kfuncs_failure1.c | 6 ++ .../bpf/progs/string_kfuncs_failure2.c | 1 + .../bpf/progs/string_kfuncs_success.c | 5 ++ 5 files changed, 61 insertions(+), 20 deletions(-) --- v3: Update prog_tests/string_kfuncs.c for "strcasecmp"; v2: Remove __ign prefix from __bpf_strcasecmp and add E2BIG failure test; https://lore.kernel.org/lkml/tencent_8646158457D4511C447C833B21B3ACF6CB07@q… v1: https://lore.kernel.org/lkml/tencent_5AE811A28781BE106AD6CDE59F4ADD2BFA06@q… -- 2.51.0

4 months, 1 week

1
0
0 0

[PATCH net-next 1/2] selftests: drv-net: rss_ctx: use Netlink for timed reconfig

by Jakub Kicinski

The rss_ctx test has gotten pretty flaky after I increased the queue count in NIPA 2->3. Not 100% clear why. We get a lot of failures in the rss_ctx.test_hitless_key_update case. Looking closer it appears that the failures are mostly due to startup costs. I measured the following timing for ethtool -X: - python cmd(shell=True) : 150-250msec - python cmd(shell=False) : 50- 70msec - timed in bash : 45- 55msec - YNL Netlink call : 2- 4msec - .set_rxfh callback : 1- 2msec The target in the test was set to 200msec. We were mostly measuring ethtool startup cost it seems. Switch to YNL since it's 100x faster. Lower the pass criteria to ~75msec, no real science behind this number but we removed ~150msec of overhead, and the old target was 200msec. So any driver that was passing previously should still pass with 75msec. Separately we should probably follow up on defaulting to shell=False, when script doesn't explicitly ask for True, because the overhead is rather significant. Signed-off-by: Jakub Kicinski <kuba(a)kernel.org> --- tools/testing/selftests/drivers/net/hw/rss_ctx.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index 9838b8457e5a..3fc5688605b5 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -335,19 +335,20 @@ from lib.py import ethtool, ip, defer, GenerateTraffic, CmdExitFailure data = get_rss(cfg) key_len = len(data['rss-hash-key']) - key = _rss_key_rand(key_len) + ethnl = EthtoolFamily() + key = random.randbytes(key_len) tgen = GenerateTraffic(cfg) try: errors0, carrier0 = get_drop_err_sum(cfg) t0 = datetime.datetime.now() - ethtool(f"-X {cfg.ifname} hkey " + _rss_key_str(key)) + ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, "hkey": key}) t1 = datetime.datetime.now() errors1, carrier1 = get_drop_err_sum(cfg) finally: tgen.wait_pkts_and_stop(5000) - ksft_lt((t1 - t0).total_seconds(), 0.2) + ksft_lt((t1 - t0).total_seconds(), 0.075) ksft_eq(errors1 - errors1, 0) ksft_eq(carrier1 - carrier0, 0) -- 2.51.0

4 months, 1 week

2
4
0 0

[PATCH] selftests/fs/mount-notify: Fix compilation failure.

by Xing Guo

Commit c6d9775c2066 ("selftests/fs/mount-notify: build with tools include dir") introduces the struct __kernel_fsid_t to decouple dependency with headers_install. The commit forgets to define a macro for __kernel_fsid_t and it will cause type re-definition issue. Signed-off-by: Xing Guo <higuoxing(a)gmail.com> --- .../mount-notify/mount-notify_test.c | 17 ++++++++--------- .../mount-notify/mount-notify_test_ns.c | 18 ++++++++---------- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c index 63ce708d93ed..e4b7c2b457ee 100644 --- a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c +++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c @@ -2,6 +2,13 @@ // Copyright (c) 2025 Miklos Szeredi <miklos(a)szeredi.hu> #define _GNU_SOURCE + +// Needed for linux/fanotify.h +typedef struct { + int val[2]; +} __kernel_fsid_t; +#define __kernel_fsid_t __kernel_fsid_t + #include <fcntl.h> #include <sched.h> #include <stdio.h> @@ -10,20 +17,12 @@ #include <sys/mount.h> #include <unistd.h> #include <sys/syscall.h> +#include <sys/fanotify.h> #include "../../kselftest_harness.h" #include "../statmount/statmount.h" #include "../utils.h" -// Needed for linux/fanotify.h -#ifndef __kernel_fsid_t -typedef struct { - int val[2]; -} __kernel_fsid_t; -#endif - -#include <sys/fanotify.h> - static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX"; static const int mark_cmds[] = { diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c index 090a5ca65004..9f57ca46e3af 100644 --- a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c +++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c @@ -2,6 +2,13 @@ // Copyright (c) 2025 Miklos Szeredi <miklos(a)szeredi.hu> #define _GNU_SOURCE + +// Needed for linux/fanotify.h +typedef struct { + int val[2]; +} __kernel_fsid_t; +#define __kernel_fsid_t __kernel_fsid_t + #include <fcntl.h> #include <sched.h> #include <stdio.h> @@ -10,21 +17,12 @@ #include <sys/mount.h> #include <unistd.h> #include <sys/syscall.h> +#include <sys/fanotify.h> #include "../../kselftest_harness.h" -#include "../../pidfd/pidfd.h" #include "../statmount/statmount.h" #include "../utils.h" -// Needed for linux/fanotify.h -#ifndef __kernel_fsid_t -typedef struct { - int val[2]; -} __kernel_fsid_t; -#endif - -#include <sys/fanotify.h> - static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX"; static const int mark_types[] = { -- 2.50.1

4 months, 1 week

4
7
0 0

[PATCH bpf-next v2 1/2] bpf: add bpf_strcasecmp kfunc

by Rong Tao

From: Rong Tao <rongtao(a)cestc.cn> bpf_strcasecmp() function performs same like bpf_strcmp() except ignoring the case of the characters. Signed-off-by: Rong Tao <rongtao(a)cestc.cn> --- kernel/bpf/helpers.c | 68 +++++++++++++++++++++++++++++++------------- 1 file changed, 48 insertions(+), 20 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 401b4932cc49..238fd992c786 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -3349,45 +3349,72 @@ __bpf_kfunc void __bpf_trap(void) * __get_kernel_nofault instead of plain dereference to make them safe. */ -/** - * bpf_strcmp - Compare two strings - * @s1__ign: One string - * @s2__ign: Another string - * - * Return: - * * %0 - Strings are equal - * * %-1 - @s1__ign is smaller - * * %1 - @s2__ign is smaller - * * %-EFAULT - Cannot read one of the strings - * * %-E2BIG - One of strings is too large - * * %-ERANGE - One of strings is outside of kernel address space - */ -__bpf_kfunc int bpf_strcmp(const char *s1__ign, const char *s2__ign) +int __bpf_strcasecmp(const char *s1, const char *s2, bool ignore_case) { char c1, c2; int i; - if (!copy_from_kernel_nofault_allowed(s1__ign, 1) || - !copy_from_kernel_nofault_allowed(s2__ign, 1)) { + if (!copy_from_kernel_nofault_allowed(s1, 1) || + !copy_from_kernel_nofault_allowed(s2, 1)) { return -ERANGE; } guard(pagefault)(); for (i = 0; i < XATTR_SIZE_MAX; i++) { - __get_kernel_nofault(&c1, s1__ign, char, err_out); - __get_kernel_nofault(&c2, s2__ign, char, err_out); + __get_kernel_nofault(&c1, s1, char, err_out); + __get_kernel_nofault(&c2, s2, char, err_out); + if (ignore_case) { + c1 = tolower(c1); + c2 = tolower(c2); + } if (c1 != c2) return c1 < c2 ? -1 : 1; if (c1 == '\0') return 0; - s1__ign++; - s2__ign++; + s1++; + s2++; } return -E2BIG; err_out: return -EFAULT; } +/** + * bpf_strcmp - Compare two strings + * @s1__ign: One string + * @s2__ign: Another string + * + * Return: + * * %0 - Strings are equal + * * %-1 - @s1__ign is smaller + * * %1 - @s2__ign is smaller + * * %-EFAULT - Cannot read one of the strings + * * %-E2BIG - One of strings is too large + * * %-ERANGE - One of strings is outside of kernel address space + */ +__bpf_kfunc int bpf_strcmp(const char *s1__ign, const char *s2__ign) +{ + return __bpf_strcasecmp(s1__ign, s2__ign, false); +} + +/** + * bpf_strcasecmp - Compare two strings, ignoring the case of the characters + * @s1__ign: One string + * @s2__ign: Another string + * + * Return: + * * %0 - Strings are equal + * * %-1 - @s1__ign is smaller + * * %1 - @s2__ign is smaller + * * %-EFAULT - Cannot read one of the strings + * * %-E2BIG - One of strings is too large + * * %-ERANGE - One of strings is outside of kernel address space + */ +__bpf_kfunc int bpf_strcasecmp(const char *s1__ign, const char *s2__ign) +{ + return __bpf_strcasecmp(s1__ign, s2__ign, true); +} + /** * bpf_strnchr - Find a character in a length limited string * @s__ign: The string to be searched @@ -3832,6 +3859,7 @@ BTF_ID_FLAGS(func, bpf_iter_dmabuf_destroy, KF_ITER_DESTROY | KF_SLEEPABLE) #endif BTF_ID_FLAGS(func, __bpf_trap) BTF_ID_FLAGS(func, bpf_strcmp); +BTF_ID_FLAGS(func, bpf_strcasecmp); BTF_ID_FLAGS(func, bpf_strchr); BTF_ID_FLAGS(func, bpf_strchrnul); BTF_ID_FLAGS(func, bpf_strnchr); -- 2.51.0

4 months, 1 week

1
0
0 0

[PATCH bpf-next v2 0/2] bpf: Add kfunc bpf_strcasecmp()

by Rong Tao

Kfunc already support bpf_strcmp, this patchset introduce bpf_strcasecmp and add some selftests. Rong Tao (2): bpf: add bpf_strcasecmp kfunc selftests/bpf: Test kfunc bpf_strcasecmp kernel/bpf/helpers.c | 68 +++++++++++++------ .../bpf/progs/string_kfuncs_failure1.c | 6 ++ .../bpf/progs/string_kfuncs_failure2.c | 1 + .../bpf/progs/string_kfuncs_success.c | 5 ++ 4 files changed, 60 insertions(+), 20 deletions(-) --- v2: Remove __ign prefix from __bpf_strcasecmp and add E2BIG failure test; v1: https://lore.kernel.org/lkml/tencent_5AE811A28781BE106AD6CDE59F4ADD2BFA06@q… -- 2.51.0

4 months, 1 week

1
0
0 0

[PATCH bpf-next 1/2] bpf: add bpf_strcasecmp kfunc

by Rong Tao

From: Rong Tao <rongtao(a)cestc.cn> bpf_strcasecmp() function performs same like bpf_strcmp() except ignoring the case of the characters. Signed-off-by: Rong Tao <rongtao(a)cestc.cn> --- kernel/bpf/helpers.c | 56 +++++++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 14 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 401b4932cc49..e807a708e5fc 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -3349,20 +3349,7 @@ __bpf_kfunc void __bpf_trap(void) * __get_kernel_nofault instead of plain dereference to make them safe. */ -/** - * bpf_strcmp - Compare two strings - * @s1__ign: One string - * @s2__ign: Another string - * - * Return: - * * %0 - Strings are equal - * * %-1 - @s1__ign is smaller - * * %1 - @s2__ign is smaller - * * %-EFAULT - Cannot read one of the strings - * * %-E2BIG - One of strings is too large - * * %-ERANGE - One of strings is outside of kernel address space - */ -__bpf_kfunc int bpf_strcmp(const char *s1__ign, const char *s2__ign) +int __bpf_strcasecmp(const char *s1__ign, const char *s2__ign, bool ignore_case) { char c1, c2; int i; @@ -3376,6 +3363,10 @@ __bpf_kfunc int bpf_strcmp(const char *s1__ign, const char *s2__ign) for (i = 0; i < XATTR_SIZE_MAX; i++) { __get_kernel_nofault(&c1, s1__ign, char, err_out); __get_kernel_nofault(&c2, s2__ign, char, err_out); + if (ignore_case) { + c1 = tolower(c1); + c2 = tolower(c2); + } if (c1 != c2) return c1 < c2 ? -1 : 1; if (c1 == '\0') @@ -3388,6 +3379,42 @@ __bpf_kfunc int bpf_strcmp(const char *s1__ign, const char *s2__ign) return -EFAULT; } +/** + * bpf_strcmp - Compare two strings + * @s1__ign: One string + * @s2__ign: Another string + * + * Return: + * * %0 - Strings are equal + * * %-1 - @s1__ign is smaller + * * %1 - @s2__ign is smaller + * * %-EFAULT - Cannot read one of the strings + * * %-E2BIG - One of strings is too large + * * %-ERANGE - One of strings is outside of kernel address space + */ +__bpf_kfunc int bpf_strcmp(const char *s1__ign, const char *s2__ign) +{ + return __bpf_strcasecmp(s1__ign, s2__ign, false); +} + +/** + * bpf_strcasecmp - Compare two strings, ignoring the case of the characters + * @s1__ign: One string + * @s2__ign: Another string + * + * Return: + * * %0 - Strings are equal + * * %-1 - @s1__ign is smaller + * * %1 - @s2__ign is smaller + * * %-EFAULT - Cannot read one of the strings + * * %-E2BIG - One of strings is too large + * * %-ERANGE - One of strings is outside of kernel address space + */ +__bpf_kfunc int bpf_strcasecmp(const char *s1__ign, const char *s2__ign) +{ + return __bpf_strcasecmp(s1__ign, s2__ign, true); +} + /** * bpf_strnchr - Find a character in a length limited string * @s__ign: The string to be searched @@ -3832,6 +3859,7 @@ BTF_ID_FLAGS(func, bpf_iter_dmabuf_destroy, KF_ITER_DESTROY | KF_SLEEPABLE) #endif BTF_ID_FLAGS(func, __bpf_trap) BTF_ID_FLAGS(func, bpf_strcmp); +BTF_ID_FLAGS(func, bpf_strcasecmp); BTF_ID_FLAGS(func, bpf_strchr); BTF_ID_FLAGS(func, bpf_strchrnul); BTF_ID_FLAGS(func, bpf_strnchr); -- 2.51.0

4 months, 1 week

2
2
0 0

[PATCH bpf-next 2/2] selftests/bpf: Test kfunc bpf_strcasecmp

by Rong Tao

From: Rong Tao <rongtao(a)cestc.cn> Add testsuites for kfunc bpf_strcasecmp. Signed-off-by: Rong Tao <rongtao(a)cestc.cn> --- tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c | 6 ++++++ tools/testing/selftests/bpf/progs/string_kfuncs_success.c | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c index 53af438bd998..99d72c68f76a 100644 --- a/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c +++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c @@ -31,6 +31,8 @@ char *invalid_kern_ptr = (char *)-1; /* Passing NULL to string kfuncs (treated as a userspace ptr) */ SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_null1(void *ctx) { return bpf_strcmp(NULL, "hello"); } SEC("syscall") __retval(USER_PTR_ERR)int test_strcmp_null2(void *ctx) { return bpf_strcmp("hello", NULL); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_null1(void *ctx) { return bpf_strcasecmp(NULL, "HELLO"); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strcasecmp_null2(void *ctx) { return bpf_strcasecmp("HELLO", NULL); } SEC("syscall") __retval(USER_PTR_ERR)int test_strchr_null(void *ctx) { return bpf_strchr(NULL, 'a'); } SEC("syscall") __retval(USER_PTR_ERR)int test_strchrnul_null(void *ctx) { return bpf_strchrnul(NULL, 'a'); } SEC("syscall") __retval(USER_PTR_ERR)int test_strnchr_null(void *ctx) { return bpf_strnchr(NULL, 1, 'a'); } @@ -49,6 +51,8 @@ SEC("syscall") __retval(USER_PTR_ERR)int test_strnstr_null2(void *ctx) { return /* Passing userspace ptr to string kfuncs */ SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr1(void *ctx) { return bpf_strcmp(user_ptr, "hello"); } SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr2(void *ctx) { return bpf_strcmp("hello", user_ptr); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_user_ptr1(void *ctx) { return bpf_strcasecmp(user_ptr, "HELLO"); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_user_ptr2(void *ctx) { return bpf_strcasecmp("HELLO", user_ptr); } SEC("syscall") __retval(USER_PTR_ERR) int test_strchr_user_ptr(void *ctx) { return bpf_strchr(user_ptr, 'a'); } SEC("syscall") __retval(USER_PTR_ERR) int test_strchrnul_user_ptr(void *ctx) { return bpf_strchrnul(user_ptr, 'a'); } SEC("syscall") __retval(USER_PTR_ERR) int test_strnchr_user_ptr(void *ctx) { return bpf_strnchr(user_ptr, 1, 'a'); } @@ -69,6 +73,8 @@ SEC("syscall") __retval(USER_PTR_ERR) int test_strnstr_user_ptr2(void *ctx) { re /* Passing invalid kernel ptr to string kfuncs should always return -EFAULT */ SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault1(void *ctx) { return bpf_strcmp(invalid_kern_ptr, "hello"); } SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault2(void *ctx) { return bpf_strcmp("hello", invalid_kern_ptr); } +SEC("syscall") __retval(-EFAULT) int test_strcasecmp_pagefault1(void *ctx) { return bpf_strcasecmp(invalid_kern_ptr, "HELLO"); } +SEC("syscall") __retval(-EFAULT) int test_strcasecmp_pagefault2(void *ctx) { return bpf_strcasecmp("HELLO", invalid_kern_ptr); } SEC("syscall") __retval(-EFAULT) int test_strchr_pagefault(void *ctx) { return bpf_strchr(invalid_kern_ptr, 'a'); } SEC("syscall") __retval(-EFAULT) int test_strchrnul_pagefault(void *ctx) { return bpf_strchrnul(invalid_kern_ptr, 'a'); } SEC("syscall") __retval(-EFAULT) int test_strnchr_pagefault(void *ctx) { return bpf_strnchr(invalid_kern_ptr, 1, 'a'); } diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c index 46697f381878..67830456637b 100644 --- a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c +++ b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c @@ -12,6 +12,11 @@ char str[] = "hello world"; /* Functional tests */ __test(0) int test_strcmp_eq(void *ctx) { return bpf_strcmp(str, "hello world"); } __test(1) int test_strcmp_neq(void *ctx) { return bpf_strcmp(str, "hello"); } +__test(0) int test_strcasecmp_eq1(void *ctx) { return bpf_strcasecmp(str, "hello world"); } +__test(0) int test_strcasecmp_eq2(void *ctx) { return bpf_strcasecmp(str, "HELLO WORLD"); } +__test(0) int test_strcasecmp_eq3(void *ctx) { return bpf_strcasecmp(str, "HELLO world"); } +__test(1) int test_strcasecmp_neq1(void *ctx) { return bpf_strcasecmp(str, "hello"); } +__test(1) int test_strcasecmp_neq2(void *ctx) { return bpf_strcasecmp(str, "HELLO"); } __test(1) int test_strchr_found(void *ctx) { return bpf_strchr(str, 'e'); } __test(11) int test_strchr_null(void *ctx) { return bpf_strchr(str, '\0'); } __test(-ENOENT) int test_strchr_notfound(void *ctx) { return bpf_strchr(str, 'x'); } -- 2.51.0

4 months, 1 week

2
1
0 0

[PATCH bpf-next 0/2] bpf: Add kfunc bpf_strcasecmp()

by Rong Tao

From: Rong Tao <rongtao(a)cestc.cn> Kfunc already support bpf_strcmp, this patchset introduce bpf_strcasecmp and add some selftests. Rong Tao (2): bpf: add bpf_strcasecmp kfunc selftests/bpf: Test kfunc bpf_strcasecmp kernel/bpf/helpers.c | 56 ++++++++++++++----- .../bpf/progs/string_kfuncs_failure1.c | 6 ++ .../bpf/progs/string_kfuncs_success.c | 5 ++ 3 files changed, 53 insertions(+), 14 deletions(-) -- 2.51.0

4 months, 1 week

1
0
0 0

[PATCH] selftests: net: fix spelling and grammar mistakes

by Praveen Balakrishnan

Fix several spelling and grammatical mistakes in output messages from the net selftests to improve readability. Only the message strings for the test output have been modified. No changes to the functional logic of the tests have been made. Signed-off-by: Praveen Balakrishnan <praveen.balakrishnan(a)magd.ox.ac.uk> --- tools/testing/selftests/net/openvswitch/ovs-dpctl.py | 2 +- tools/testing/selftests/net/rps_default_mask.sh | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index 8a0396bfaf99..b521e0dea506 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -1877,7 +1877,7 @@ class OvsPacket(GenericNetlinkSocket): elif msg["cmd"] == OvsPacket.OVS_PACKET_CMD_EXECUTE: up.execute(msg) else: - print("Unkonwn cmd: %d" % msg["cmd"]) + print("Unknown cmd: %d" % msg["cmd"]) except NetlinkError as ne: raise ne diff --git a/tools/testing/selftests/net/rps_default_mask.sh b/tools/testing/selftests/net/rps_default_mask.sh index 4287a8529890..b200019b3c80 100755 --- a/tools/testing/selftests/net/rps_default_mask.sh +++ b/tools/testing/selftests/net/rps_default_mask.sh @@ -54,16 +54,16 @@ cleanup echo 1 > /proc/sys/net/core/rps_default_mask setup -chk_rps "changing rps_default_mask dont affect existing devices" "" lo $INITIAL_RPS_DEFAULT_MASK +chk_rps "changing rps_default_mask doesn't affect existing devices" "" lo $INITIAL_RPS_DEFAULT_MASK echo 3 > /proc/sys/net/core/rps_default_mask -chk_rps "changing rps_default_mask dont affect existing netns" $NETNS lo 0 +chk_rps "changing rps_default_mask doesn't affect existing netns" $NETNS lo 0 ip link add name $VETH type veth peer netns $NETNS name $VETH ip link set dev $VETH up ip -n $NETNS link set dev $VETH up -chk_rps "changing rps_default_mask affect newly created devices" "" $VETH 3 -chk_rps "changing rps_default_mask don't affect newly child netns[II]" $NETNS $VETH 0 +chk_rps "changing rps_default_mask affects newly created devices" "" $VETH 3 +chk_rps "changing rps_default_mask doesn't affect newly child netns[II]" $NETNS $VETH 0 ip link del dev $VETH ip netns del $NETNS @@ -72,8 +72,8 @@ chk_rps "rps_default_mask is 0 by default in child netns" "$NETNS" lo 0 ip netns exec $NETNS sysctl -qw net.core.rps_default_mask=1 ip link add name $VETH type veth peer netns $NETNS name $VETH -chk_rps "changing rps_default_mask in child ns don't affect the main one" "" lo $INITIAL_RPS_DEFAULT_MASK +chk_rps "changing rps_default_mask in child ns doesn't affect the main one" "" lo $INITIAL_RPS_DEFAULT_MASK chk_rps "changing rps_default_mask in child ns affects new childns devices" $NETNS $VETH 1 -chk_rps "changing rps_default_mask in child ns don't affect existing devices" $NETNS lo 0 +chk_rps "changing rps_default_mask in child ns doesn't affect existing devices" $NETNS lo 0 exit $ret -- 2.39.5

4 months, 1 week

3
2
0 0

[RFC PATCH v2 bpf-next 0/3] bpf: cgroup: support writing and freezing cgroups from BPF

by Djalal Harouni

This patch series add support to write cgroup interfaces from BPF. It is useful to freeze a cgroup hierarchy on suspicious activity for a more thorough analysis before killing it. Planned users of this feature are: systemd and BPF tools where the cgroup hierarchy could be a system service, user session, k8s pod or a container. The writing happens via kernfs nodes and the cgroup must be on the default hierarchy. It implements the requests and feedback from v1 [1] where now we use a unified path for cgroup user space and BPF writing. So I want to validate that this is the right approach first. Todo: * Limit size of data to be written. * Further tests. * Add cgroup kill support. # RFC v1 -> v2 * Implemented Alexei and Tejun requests [1]. * Unified path where user space or BPF writing end up taking directly a kernfs_node with an example on the "cgroup.freeze" interface. [1] https://lore.kernel.org/bpf/20240327225334.58474-1-tixxdz@gmail.com/ Djalal Harouni (3): kernfs: cgroup: support writing cgroup interfaces from a kernfs node bpf: cgroup: Add BPF Kfunc to write cgroup interfaces selftests/bpf: add selftest for bpf_cgroup_write_interface include/linux/cgroup.h | 3 ++ kernel/bpf/helpers.c | 45 +++++ kernel/cgroup/cgroup.c | 102 +++++++ tools/testing/selftests/bpf/prog_tests/task_freeze_cgroup.c | 172 ++++++++++++ tools/testing/selftests/bpf/progs/test_task_freeze_cgroup.c | 155 ++++++++++ 5 files changed, 471 insertions(+), 6 deletions(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/task_freeze_cgroup.c create mode 100644 tools/testing/selftests/bpf/progs/test_task_freeze_cgroup.c -- 2.34.1

4 months, 1 week

4
15
0 0

[PATCH net] selftests: drv-net: csum: fix interface name for remote host

by Jakub Kicinski

Use cfg.remote_ifname for arguments of remote command. Without this UDP tests fail in NIPA where local interface is called enp1s0 and remote enp0s4. Fixes: 1d0dc857b5d8 ("selftests: drv-net: add checksum tests") Signed-off-by: Jakub Kicinski <kuba(a)kernel.org> --- CC: shuah(a)kernel.org CC: willemb(a)google.com CC: petrm(a)nvidia.com CC: linux-kselftest(a)vger.kernel.org --- tools/testing/selftests/drivers/net/hw/csum.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hw/csum.py b/tools/testing/selftests/drivers/net/hw/csum.py index cd23af875317..3e3a89a34afe 100755 --- a/tools/testing/selftests/drivers/net/hw/csum.py +++ b/tools/testing/selftests/drivers/net/hw/csum.py @@ -17,7 +17,7 @@ from lib.py import bkg, cmd, wait_port_listen ip_args = f"-{ipver} -S {cfg.remote_addr_v[ipver]} -D {cfg.addr_v[ipver]}" rx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -n 100 {ip_args} -r 1 -R {extra_args}" - tx_cmd = f"{cfg.bin_remote} -i {cfg.ifname} -n 100 {ip_args} -r 1 -T {extra_args}" + tx_cmd = f"{cfg.bin_remote} -i {cfg.remote_ifname} -n 100 {ip_args} -r 1 -T {extra_args}" with bkg(rx_cmd, exit_wait=True): wait_port_listen(34000, proto="udp") @@ -37,7 +37,7 @@ from lib.py import bkg, cmd, wait_port_listen if extra_args != "-U -Z": extra_args += " -r 1" - rx_cmd = f"{cfg.bin_remote} -i {cfg.ifname} -L 1 -n 100 {ip_args} -R {extra_args}" + rx_cmd = f"{cfg.bin_remote} -i {cfg.remote_ifname} -L 1 -n 100 {ip_args} -R {extra_args}" tx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -L 1 -n 100 {ip_args} -T {extra_args}" with bkg(rx_cmd, host=cfg.remote, exit_wait=True): -- 2.51.0

4 months, 1 week

4
3
0 0

[PATCH v4 0/2] KVM: guest_memfd: use write for population

by Kalyazin, Nikita

[ based on kvm/next ] Implement guest_memfd allocation and population via the write syscall. This is useful in non-CoCo use cases where the host can access guest memory. Even though the same can also be achieved via userspace mapping and memcpying from userspace, write provides a more performant option because it does not need to set page tables and it does not cause a page fault for every page like memcpy would. Note that memcpy cannot be accelerated via MADV_POPULATE_WRITE as it is not supported by guest_memfd and relies on GUP. Populating 512MiB of guest_memfd on a x86 machine: - via memcpy: 436 ms - via write: 202 ms (-54%) v4: - Switch from implementing the write callback to write_iter - Remove conditional compilation - Rebase to kvm/next v3: - https://lore.kernel.org/kvm/20250303130838.28812-1-kalyazin@amazon.com - David/Mike D: Only compile support for the write syscall if CONFIG_KVM_GMEM_SHARED_MEM (now gone) is enabled. v2: - https://lore.kernel.org/kvm/20241129123929.64790-1-kalyazin@amazon.com - Switch from an ioctl to the write syscall to implement population v1: - https://lore.kernel.org/kvm/20241024095429.54052-1-kalyazin@amazon.com Nikita Kalyazin (2): KVM: guest_memfd: add generic population via write KVM: selftests: update guest_memfd write tests .../testing/selftests/kvm/guest_memfd_test.c | 85 +++++++++++++++++-- virt/kvm/guest_memfd.c | 64 +++++++++++++- 2 files changed, 142 insertions(+), 7 deletions(-) base-commit: a6ad54137af92535cfe32e19e5f3bc1bb7dbd383 -- 2.50.1

4 months, 1 week

4
9
0 0

[PATCH net-next v4] selftests: net: add test for ipv6 fragmentation

by Brett A C Sheffield

Add selftest for the IPv6 fragmentation regression which affected several stable kernels. Commit a18dfa9925b9 ("ipv6: save dontfrag in cork") was backported to stable without some prerequisite commits. This caused a regression when sending IPv6 UDP packets by preventing fragmentation and instead returning -1 (EMSGSIZE). Add selftest to check for this issue by attempting to send a packet larger than the interface MTU. The packet will be fragmented on a working kernel, with sendmsg(2) correctly returning the expected number of bytes sent. When the regression is present, sendmsg returns -1 and sets errno to EMSGSIZE. Link: https://lore.kernel.org/stable/aElivdUXqd1OqgMY@karahi.gladserv.com Signed-off-by: Brett A C Sheffield <bacs(a)librecast.net> --- v4 changes: - fix "else should follow close brace" (checkpatch ERROR) v3 changes: - add usleep instead of busy polling on sendmsg - simplify error handling by using error() and leaving cleanup to O/S - use loopback interface - don't bother creating TAP - send to localhost (::1) v2 changes: - remove superfluous namespace calls - unshare(2) suffices - remove usleep(). Don't wait for the interface to be ready, just send, and handle the (less likely) error case by retrying. - set destination address only once - document our use of the IPv6 link-local source address - send to port 9 (DISCARD) instead of 4242 (DONT PANIC) - ensure sockets are closed on failure paths - use KSFT exit codes for clarity v3: https://lore.kernel.org/netdev/20250901112248.5218-1-bacs@librecast.net v2: https://lore.kernel.org/netdev/20250831102908.14655-1-bacs@librecast.net v1: https://lore.kernel.org/netdev/20250825092548.4436-3-bacs@librecast.net tools/testing/selftests/net/.gitignore | 1 + tools/testing/selftests/net/Makefile | 1 + .../selftests/net/ipv6_fragmentation.c | 144 ++++++++++++++++++ 3 files changed, 146 insertions(+) create mode 100644 tools/testing/selftests/net/ipv6_fragmentation.c diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 47c293c2962f..3d4b4a53dfda 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -16,6 +16,7 @@ ip_local_port_range ipsec ipv6_flowlabel ipv6_flowlabel_mgr +ipv6_fragmentation log.txt msg_oob msg_zerocopy diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index eef0b8f8a7b0..276e0481d996 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -117,6 +117,7 @@ TEST_GEN_FILES += tfo TEST_PROGS += tfo_passive.sh TEST_PROGS += broadcast_pmtu.sh TEST_PROGS += ipv6_force_forwarding.sh +TEST_GEN_PROGS += ipv6_fragmentation TEST_PROGS += route_hint.sh # YNL files, must be before "include ..lib.mk" diff --git a/tools/testing/selftests/net/ipv6_fragmentation.c b/tools/testing/selftests/net/ipv6_fragmentation.c new file mode 100644 index 000000000000..6d1311e26501 --- /dev/null +++ b/tools/testing/selftests/net/ipv6_fragmentation.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Brett A C Sheffield <bacs(a)librecast.net> + * + * Kernel selftest for the IPv6 fragmentation regression which affected stable + * kernels: + * + * https://lore.kernel.org/stable/aElivdUXqd1OqgMY@karahi.gladserv.com + * + * Commit: a18dfa9925b9 ("ipv6: save dontfrag in cork") was backported to stable + * without some prerequisite commits. + * + * This caused a regression when sending IPv6 UDP packets by preventing + * fragmentation and instead returning -1 (EMSGSIZE). + * + * This selftest demonstrates the issue by sending an IPv6 UDP packet to + * localhost (::1) on the loopback interface from the autoconfigured link-local + * address. + * + * sendmsg(2) returns bytes sent correctly on a working kernel, and returns -1 + * (EMSGSIZE) when the regression is present. + * + * The regression was not present in the mainline kernel, but add this test to + * catch similar breakage in future. + */ + +#define _GNU_SOURCE + +#include <error.h> +#include <fcntl.h> +#include <linux/if_tun.h> +#include <net/if.h> +#include <netinet/in.h> +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <unistd.h> +#include "../kselftest.h" + +#define MTU 1500 +#define LARGER_THAN_MTU 8192 + +/* bring up interface */ +static int interface_up(int ctl, struct ifreq *ifr) +{ + if (ioctl(ctl, SIOCGIFFLAGS, ifr) == -1) + error(KSFT_FAIL, errno, "ioctl SIOCGIFFLAGS"); + ifr->ifr_flags = ifr->ifr_flags | IFF_UP; + return ioctl(ctl, SIOCSIFFLAGS, ifr); +} + +/* no need to wait for DAD in our namespace */ +static int disable_dad(char *ifname) +{ + char sysvar[] = "/proc/sys/net/ipv6/conf/%s/accept_dad"; + char fname[IFNAMSIZ + sizeof(sysvar)]; + int fd; + + snprintf(fname, sizeof(fname), sysvar, ifname); + fd = open(fname, O_WRONLY); + if (fd == -1) + error(KSFT_FAIL, errno, "open accept_dad"); + if (write(fd, "0", 1) != 1) + error(KSFT_FAIL, errno, "write accept_dad"); + + return close(fd); +} + +static int setup(void) +{ + struct ifreq ifr = { + .ifr_name = "lo" + }; + int fd = -1; + int ctl; + + /* we need to set MTU, so do this in a namespace to play nicely */ + if (unshare(CLONE_NEWNET) == -1) + error(KSFT_FAIL, errno, "unshare"); + + ctl = socket(AF_LOCAL, SOCK_STREAM, 0); + if (ctl == -1) + error(KSFT_FAIL, errno, "socket"); + + /* ensure MTU is smaller than what we plan to send */ + ifr.ifr_mtu = MTU; + if (ioctl(ctl, SIOCSIFMTU, &ifr) == -1) + error(KSFT_FAIL, errno, "ioctl: set MTU"); + + disable_dad("lo"); + interface_up(ctl, &ifr); + + close(ctl); + return fd; +} + +int main(void) +{ + struct in6_addr addr = { + .s6_addr[15] = 0x01, /* ::1 */ + }; + struct sockaddr_in6 sa = { + .sin6_family = AF_INET6, + .sin6_addr = addr, + .sin6_port = 9 /* port 9/udp (DISCARD) */ + }; + char buf[LARGER_THAN_MTU] = {0}; + struct iovec iov = { .iov_base = buf, .iov_len = sizeof(buf)}; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_name = (struct sockaddr *)&sa, + .msg_namelen = sizeof(sa), + }; + ssize_t rc; + int ns_fd; + int err = KSFT_FAIL; + int s; + + printf("Testing IPv6 fragmentation\n"); + ns_fd = setup(); + s = socket(AF_INET6, SOCK_DGRAM, 0); +send_again: + rc = sendmsg(s, &msg, 0); + if (rc == -1) { + /* if interface wasn't ready, try again */ + if (errno == EADDRNOTAVAIL) { + usleep(1000); + goto send_again; + } + printf("[FAIL] sendmsg: %s\n", strerror(errno)); + } else if (rc != LARGER_THAN_MTU) { + printf("[FAIL] sendmsg() returned %zi, expected %i\n", rc, LARGER_THAN_MTU); + } else { + printf("[PASS] sendmsg() returned %zi\n", rc); + err = KSFT_PASS; + } + close(s); + close(ns_fd); + return err; +} base-commit: 864ecc4a6dade82d3f70eab43dad0e277aa6fc78 -- 2.49.1

4 months, 1 week

3
2
0 0

[PATCH 2/9] KVM: selftests: Add __packed attribute fallback

by Aqib Faruqui

Kernel UAPI headers use __packed but don't provide the definition in userspace builds. Add a fallback definition matching the kernel's implementation. This follows the same pattern used by BPF and SGX selftests. Signed-off-by: Aqib Faruqui <aqibaf(a)amazon.com> --- tools/testing/selftests/kvm/include/kvm_util.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 23a506d7e..7fae7f5e7 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -5,6 +5,10 @@ #ifndef SELFTEST_KVM_UTIL_H #define SELFTEST_KVM_UTIL_H +#ifndef __packed +#define __packed __attribute__((__packed__)) +#endif + #include "test_util.h" #include <linux/compiler.h> -- 2.47.3

4 months, 1 week

3
2
0 0

[PATCH] selftests/mm: directly add pagesize instead of increase until page size

by Wei Yang

The check of is_backed_by_folio() is done on each page. Directly move pointer to next page instead of increase one and check if it is page size aligned. Signed-off-by: Wei Yang <richard.weiyang(a)gmail.com> --- tools/testing/selftests/mm/split_huge_page_test.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c index 10ae65ea032f..7f7016ba4054 100644 --- a/tools/testing/selftests/mm/split_huge_page_test.c +++ b/tools/testing/selftests/mm/split_huge_page_test.c @@ -423,9 +423,8 @@ static void split_pte_mapped_thp(void) /* smap does not show THPs after mremap, use kpageflags instead */ thp_size = 0; - for (i = 0; i < pagesize * 4; i++) - if (i % pagesize == 0 && - is_backed_by_folio(&pte_mapped[i], pmd_order, pagemap_fd, kpageflags_fd)) + for (i = 0; i < pagesize * 4; i += pagesize) + if (is_backed_by_folio(&pte_mapped[i], pmd_order, pagemap_fd, kpageflags_fd)) thp_size++; if (thp_size != 4) -- 2.34.1

4 months, 1 week

4
8
0 0

[PATCH v7 00/29] KVM: arm64: Implement support for SME

by Mark Brown

I've removed the RFC tag from this version of the series, but the items that I'm looking for feedback on remains the same: - The userspace ABI, in particular: - The vector length used for the SVE registers, access to the SVE registers and access to ZA and (if available) ZT0 depending on the current state of PSTATE.{SM,ZA}. - The use of a single finalisation for both SVE and SME. - The addition of control for enabling fine grained traps in a similar manner to FGU but without the UNDEF, I'm not clear if this is desired at all and at present this requires symmetric read and write traps like FGU. That seemed like it might be desired from an implementation point of view but we already have one case where we enable an asymmetric trap (for ARM64_WORKAROUND_AMPERE_AC03_CPU_38) and it seems generally useful to enable asymmetrically. This series implements support for SME use in non-protected KVM guests. Much of this is very similar to SVE, the main additional challenge that SME presents is that it introduces a new vector length similar to the SVE vector length and two new controls which change the registers seen by guests: - PSTATE.ZA enables the ZA matrix register and, if SME2 is supported, the ZT0 LUT register. - PSTATE.SM enables streaming mode, a new floating point mode which uses the SVE register set with the separately configured SME vector length. In streaming mode implementation of the FFR register is optional. It is also permitted to build systems which support SME without SVE, in this case when not in streaming mode no SVE registers or instructions are available. Further, there is no requirement that there be any overlap in the set of vector lengths supported by SVE and SME in a system, this is expected to be a common situation in practical systems. Since there is a new vector length to configure we introduce a new feature parallel to the existing SVE one with a new pseudo register for the streaming mode vector length. Due to the overlap with SVE caused by streaming mode rather than finalising SME as a separate feature we use the existing SVE finalisation to also finalise SME, a new define KVM_ARM_VCPU_VEC is provided to help make user code clearer. Finalising SVE and SME separately would introduce complication with register access since finalising SVE makes the SVE registers writeable by userspace and doing multiple finalisations results in an error being reported. Dealing with a state where the SVE registers are writeable due to one of SVE or SME being finalised but may have their VL changed by the other being finalised seems like needless complexity with minimal practical utility, it seems clearer to just express directly that only one finalisation can be done in the ABI. Access to the floating point registers follows the architecture: - When both SVE and SME are present: - If PSTATE.SM == 0 the vector length used for the Z and P registers is the SVE vector length. - If PSTATE.SM == 1 the vector length used for the Z and P registers is the SME vector length. - If only SME is present: - If PSTATE.SM == 0 the Z and P registers are inaccessible and the floating point state accessed via the encodings for the V registers. - If PSTATE.SM == 1 the vector length used for the Z and P registers - The SME specific ZA and ZT0 registers are only accessible if SVCR.ZA is 1. The VMM must understand this, in particular when loading state SVCR should be configured before other state. It should be noted that while the architecture refers to PSTATE.SM and PSTATE.ZA these PSTATE bits are not preserved in SPSR_ELx, they are only accessible via SVCR. There are a large number of subfeatures for SME, most of which only offer additional instructions but some of which (SME2 and FA64) add architectural state. These are configured via the ID registers as per usual. Protected KVM supported, with the implementation maintaining the existing restriction that the hypervisor will refuse to run if streaming mode or ZA is enabled. This both simplfies the code and avoids the need to allocate storage for host ZA and ZT0 state, there seems to be little practical use case for supporting this and the memory usage would be non-trivial. The new KVM_ARM_VCPU_VEC feature and ZA and ZT0 registers have not been added to the get-reg-list selftest, the idea of supporting additional features there without restructuring the program to generate all possible feature combinations has been rejected. I will post a separate series which does that restructuring. Signed-off-by: Mark Brown <broonie(a)kernel.org> --- Changes in v7: - Rebase onto v6.17-rc1. - Handle SMIDR_EL1 as a VM wide ID register and use this in feat_sme_smps(). - Expose affinity fields in SMIDR_EL1. - Remove SMPRI_EL1 from vcpu_sysreg, the value is always 0 currently. - Prevent userspace writes to SMPRIMAP_EL2. - Link to v6: https://lore.kernel.org/r/20250625-kvm-arm64-sme-v6-0-114cff4ffe04@kernel.o… Changes in v6: - Rebase onto v6.16-rc3. - Link to v5: https://lore.kernel.org/r/20250417-kvm-arm64-sme-v5-0-f469a2d5f574@kernel.o… Changes in v5: - Rebase onto v6.15-rc2. - Add pKVM guest support. - Always restore SVCR. - Link to v4: https://lore.kernel.org/r/20250214-kvm-arm64-sme-v4-0-d64a681adcc2@kernel.o… Changes in v4: - Rebase onto v6.14-rc2 and Mark Rutland's fixes. - Expose SME to nested guests. - Additional cleanups and test fixes following on from the rebase. - Flush register state on VMM PSTATE.{SM,ZA}. - Link to v3: https://lore.kernel.org/r/20241220-kvm-arm64-sme-v3-0-05b018c1ffeb@kernel.o… Changes in v3: - Rebase onto v6.12-rc2. - Link to v2: https://lore.kernel.org/r/20231222-kvm-arm64-sme-v2-0-da226cb180bb@kernel.o… Changes in v2: - Rebase onto v6.7-rc3. - Configure subfeatures based on host system only. - Complete nVHE support. - There was some snafu with sending v1 out, it didn't make it to the lists but in case it hit people's inboxes I'm sending as v2. --- Mark Brown (29): arm64/sysreg: Update SMIDR_EL1 to DDI0601 2025-06 arm64/fpsimd: Update FA64 and ZT0 enables when loading SME state arm64/fpsimd: Decide to save ZT0 and streaming mode FFR at bind time arm64/fpsimd: Check enable bit for FA64 when saving EFI state arm64/fpsimd: Determine maximum virtualisable SME vector length KVM: arm64: Introduce non-UNDEF FGT control KVM: arm64: Pay attention to FFR parameter in SVE save and load KVM: arm64: Pull ctxt_has_ helpers to start of sysreg-sr.h KVM: arm64: Move SVE state access macros after feature test macros KVM: arm64: Rename SVE finalization constants to be more general KVM: arm64: Document the KVM ABI for SME KVM: arm64: Define internal features for SME KVM: arm64: Rename sve_state_reg_region KVM: arm64: Store vector lengths in an array KVM: arm64: Implement SME vector length configuration KVM: arm64: Support SME control registers KVM: arm64: Support TPIDR2_EL0 KVM: arm64: Support SME identification registers for guests KVM: arm64: Support SME priority registers KVM: arm64: Provide assembly for SME register access KVM: arm64: Support userspace access to streaming mode Z and P registers KVM: arm64: Flush register state on writes to SVCR.SM and SVCR.ZA KVM: arm64: Expose SME specific state to userspace KVM: arm64: Context switch SME state for guests KVM: arm64: Handle SME exceptions KVM: arm64: Expose SME to nested guests KVM: arm64: Provide interface for configuring and enabling SME for guests KVM: arm64: selftests: Add SME system registers to get-reg-list KVM: arm64: selftests: Add SME to set_id_regs test Documentation/virt/kvm/api.rst | 117 +++++++---- arch/arm64/include/asm/fpsimd.h | 26 +++ arch/arm64/include/asm/kvm_emulate.h | 6 + arch/arm64/include/asm/kvm_host.h | 169 ++++++++++++--- arch/arm64/include/asm/kvm_hyp.h | 5 +- arch/arm64/include/asm/kvm_pkvm.h | 2 +- arch/arm64/include/asm/vncr_mapping.h | 2 + arch/arm64/include/uapi/asm/kvm.h | 33 +++ arch/arm64/kernel/cpufeature.c | 2 - arch/arm64/kernel/fpsimd.c | 89 ++++---- arch/arm64/kvm/arm.c | 10 + arch/arm64/kvm/config.c | 8 +- arch/arm64/kvm/fpsimd.c | 28 ++- arch/arm64/kvm/guest.c | 252 ++++++++++++++++++++--- arch/arm64/kvm/handle_exit.c | 14 ++ arch/arm64/kvm/hyp/fpsimd.S | 28 ++- arch/arm64/kvm/hyp/include/hyp/switch.h | 175 ++++++++++++++-- arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 110 ++++++---- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 86 ++++++-- arch/arm64/kvm/hyp/nvhe/pkvm.c | 85 ++++++-- arch/arm64/kvm/hyp/nvhe/switch.c | 4 +- arch/arm64/kvm/hyp/nvhe/sys_regs.c | 6 + arch/arm64/kvm/hyp/vhe/switch.c | 17 +- arch/arm64/kvm/hyp/vhe/sysreg-sr.c | 7 + arch/arm64/kvm/nested.c | 3 +- arch/arm64/kvm/reset.c | 156 ++++++++++---- arch/arm64/kvm/sys_regs.c | 141 ++++++++++++- arch/arm64/tools/sysreg | 8 +- include/uapi/linux/kvm.h | 1 + tools/testing/selftests/kvm/arm64/get-reg-list.c | 15 +- tools/testing/selftests/kvm/arm64/set_id_regs.c | 27 ++- 31 files changed, 1328 insertions(+), 304 deletions(-) --- base-commit: 062b3e4a1f880f104a8d4b90b767788786aa7b78 change-id: 20230301-kvm-arm64-sme-06a1246d3636 Best regards, -- Mark Brown <broonie(a)kernel.org>

4 months, 1 week

3
31
0 0

[PATCH v1 00/36] mm: remove nth_page()

by David Hildenbrand

This is based on mm-unstable. I will only CC non-MM folks on the cover letter and the respective patch to not flood too many inboxes (the lists receive all patches). -- As discussed recently with Linus, nth_page() is just nasty and we would like to remove it. To recap, the reason we currently need nth_page() within a folio is because on some kernel configs (SPARSEMEM without SPARSEMEM_VMEMMAP), the memmap is allocated per memory section. While buddy allocations cannot cross memory section boundaries, hugetlb and dax folios can. So crossing a memory section means that "page++" could do the wrong thing. Instead, nth_page() on these problematic configs always goes from page->pfn, to the go from (++pfn)->page, which is rather nasty. Likely, many people have no idea when nth_page() is required and when it might be dropped. We refer to such problematic PFN ranges and "non-contiguous pages". If we only deal with "contiguous pages", there is not need for nth_page(). Besides that "obvious" folio case, we might end up using nth_page() within CMA allocations (again, could span memory sections), and in one corner case (kfence) when processing memblock allocations (again, could span memory sections). So let's handle all that, add sanity checks, and remove nth_page(). Patch #1 -> #5 : stop making SPARSEMEM_VMEMMAP user-selectable + cleanups Patch #6 -> #13 : disallow folios to have non-contiguous pages Patch #14 -> #20 : remove nth_page() usage within folios Patch #21 : disallow CMA allocations of non-contiguous pages Patch #22 -> #32 : sanity+check + remove nth_page() usage within SG entry Patch #33 : sanity-check + remove nth_page() usage in unpin_user_page_range_dirty_lock() Patch #34 : remove nth_page() in kfence Patch #35 : adjust stale comment regarding nth_page Patch #36 : mm: remove nth_page() A lot of this is inspired from the discussion at [1] between Linus, Jason and me, so cudos to them. [1] https://lore.kernel.org/all/CAHk-=wiCYfNp4AJLBORU-c7ZyRBUp66W2-Et6cdQ4REx-G… RFC -> v1: * "wireguard: selftests: remove CONFIG_SPARSEMEM_VMEMMAP=y from qemu kernel config" -> Mention that it was never really relevant for the test * "mm/mm_init: make memmap_init_compound() look more like prep_compound_page()" -> Mention the setup of page links * "mm: limit folio/compound page sizes in problematic kernel configs" -> Improve comment for PUD handling, mentioning hugetlb and dax * "mm: simplify folio_page() and folio_page_idx()" -> Call variable "n" * "mm/hugetlb: cleanup hugetlb_folio_init_tail_vmemmap()" -> Keep __init_single_page() and refer to the usage of memblock_reserved_mark_noinit() * "fs: hugetlbfs: cleanup folio in adjust_range_hwpoison()" * "fs: hugetlbfs: remove nth_page() usage within folio in adjust_range_hwpoison()" -> Separate nth_page() removal from cleanups -> Further improve cleanups * "io_uring/zcrx: remove nth_page() usage within folio" -> Keep the io_copy_cache for now and limit to nth_page() removal * "mm/gup: drop nth_page() usage within folio when recording subpages" -> Cleanup record_subpages as bit * "mm/cma: refuse handing out non-contiguous page ranges" -> Replace another instance of "pfn_to_page(pfn)" where we already have the page * "scatterlist: disallow non-contigous page ranges in a single SG entry" -> We have to EXPORT the symbol. I thought about moving it to mm_inline.h, but I really don't want to include that in include/linux/scatterlist.h * "ata: libata-eh: drop nth_page() usage within SG entry" * "mspro_block: drop nth_page() usage within SG entry" * "memstick: drop nth_page() usage within SG entry" * "mmc: drop nth_page() usage within SG entry" -> Keep PAGE_SHIFT * "scsi: scsi_lib: drop nth_page() usage within SG entry" * "scsi: sg: drop nth_page() usage within SG entry" -> Split patches, Keep PAGE_SHIFT * "crypto: remove nth_page() usage within SG entry" -> Keep PAGE_SHIFT * "kfence: drop nth_page() usage" -> Keep modifying i and use "start_pfn" only instead Cc: Andrew Morton <akpm(a)linux-foundation.org> Cc: Linus Torvalds <torvalds(a)linux-foundation.org> Cc: Jason Gunthorpe <jgg(a)nvidia.com> Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com> Cc: "Liam R. Howlett" <Liam.Howlett(a)oracle.com> Cc: Vlastimil Babka <vbabka(a)suse.cz> Cc: Mike Rapoport <rppt(a)kernel.org> Cc: Suren Baghdasaryan <surenb(a)google.com> Cc: Michal Hocko <mhocko(a)suse.com> Cc: Jens Axboe <axboe(a)kernel.dk> Cc: Marek Szyprowski <m.szyprowski(a)samsung.com> Cc: Robin Murphy <robin.murphy(a)arm.com> Cc: John Hubbard <jhubbard(a)nvidia.com> Cc: Peter Xu <peterx(a)redhat.com> Cc: Alexander Potapenko <glider(a)google.com> Cc: Marco Elver <elver(a)google.com> Cc: Dmitry Vyukov <dvyukov(a)google.com> Cc: Brendan Jackman <jackmanb(a)google.com> Cc: Johannes Weiner <hannes(a)cmpxchg.org> Cc: Zi Yan <ziy(a)nvidia.com> Cc: Dennis Zhou <dennis(a)kernel.org> Cc: Tejun Heo <tj(a)kernel.org> Cc: Christoph Lameter <cl(a)gentwo.org> Cc: Muchun Song <muchun.song(a)linux.dev> Cc: Oscar Salvador <osalvador(a)suse.de> Cc: x86(a)kernel.org Cc: linux-arm-kernel(a)lists.infradead.org Cc: linux-mips(a)vger.kernel.org Cc: linux-s390(a)vger.kernel.org Cc: linux-crypto(a)vger.kernel.org Cc: linux-ide(a)vger.kernel.org Cc: intel-gfx(a)lists.freedesktop.org Cc: dri-devel(a)lists.freedesktop.org Cc: linux-mmc(a)vger.kernel.org Cc: linux-arm-kernel(a)axis.com Cc: linux-scsi(a)vger.kernel.org Cc: kvm(a)vger.kernel.org Cc: virtualization(a)lists.linux.dev Cc: linux-mm(a)kvack.org Cc: io-uring(a)vger.kernel.org Cc: iommu(a)lists.linux.dev Cc: kasan-dev(a)googlegroups.com Cc: wireguard(a)lists.zx2c4.com Cc: netdev(a)vger.kernel.org Cc: linux-kselftest(a)vger.kernel.org Cc: linux-riscv(a)lists.infradead.org David Hildenbrand (36): mm: stop making SPARSEMEM_VMEMMAP user-selectable arm64: Kconfig: drop superfluous "select SPARSEMEM_VMEMMAP" s390/Kconfig: drop superfluous "select SPARSEMEM_VMEMMAP" x86/Kconfig: drop superfluous "select SPARSEMEM_VMEMMAP" wireguard: selftests: remove CONFIG_SPARSEMEM_VMEMMAP=y from qemu kernel config mm/page_alloc: reject unreasonable folio/compound page sizes in alloc_contig_range_noprof() mm/memremap: reject unreasonable folio/compound page sizes in memremap_pages() mm/hugetlb: check for unreasonable folio sizes when registering hstate mm/mm_init: make memmap_init_compound() look more like prep_compound_page() mm: sanity-check maximum folio size in folio_set_order() mm: limit folio/compound page sizes in problematic kernel configs mm: simplify folio_page() and folio_page_idx() mm/hugetlb: cleanup hugetlb_folio_init_tail_vmemmap() mm/mm/percpu-km: drop nth_page() usage within single allocation fs: hugetlbfs: remove nth_page() usage within folio in adjust_range_hwpoison() fs: hugetlbfs: cleanup folio in adjust_range_hwpoison() mm/pagewalk: drop nth_page() usage within folio in folio_walk_start() mm/gup: drop nth_page() usage within folio when recording subpages io_uring/zcrx: remove nth_page() usage within folio mips: mm: convert __flush_dcache_pages() to __flush_dcache_folio_pages() mm/cma: refuse handing out non-contiguous page ranges dma-remap: drop nth_page() in dma_common_contiguous_remap() scatterlist: disallow non-contigous page ranges in a single SG entry ata: libata-eh: drop nth_page() usage within SG entry drm/i915/gem: drop nth_page() usage within SG entry mspro_block: drop nth_page() usage within SG entry memstick: drop nth_page() usage within SG entry mmc: drop nth_page() usage within SG entry scsi: scsi_lib: drop nth_page() usage within SG entry scsi: sg: drop nth_page() usage within SG entry vfio/pci: drop nth_page() usage within SG entry crypto: remove nth_page() usage within SG entry mm/gup: drop nth_page() usage in unpin_user_page_range_dirty_lock() kfence: drop nth_page() usage block: update comment of "struct bio_vec" regarding nth_page() mm: remove nth_page() arch/arm64/Kconfig | 1 - arch/mips/include/asm/cacheflush.h | 11 +++-- arch/mips/mm/cache.c | 8 ++-- arch/s390/Kconfig | 1 - arch/x86/Kconfig | 1 - crypto/ahash.c | 4 +- crypto/scompress.c | 8 ++-- drivers/ata/libata-sff.c | 6 +-- drivers/gpu/drm/i915/gem/i915_gem_pages.c | 2 +- drivers/memstick/core/mspro_block.c | 3 +- drivers/memstick/host/jmb38x_ms.c | 3 +- drivers/memstick/host/tifm_ms.c | 3 +- drivers/mmc/host/tifm_sd.c | 4 +- drivers/mmc/host/usdhi6rol0.c | 4 +- drivers/scsi/scsi_lib.c | 3 +- drivers/scsi/sg.c | 3 +- drivers/vfio/pci/pds/lm.c | 3 +- drivers/vfio/pci/virtio/migrate.c | 3 +- fs/hugetlbfs/inode.c | 33 +++++-------- include/crypto/scatterwalk.h | 4 +- include/linux/bvec.h | 7 +-- include/linux/mm.h | 48 +++++++++++++++---- include/linux/page-flags.h | 5 +- include/linux/scatterlist.h | 3 +- io_uring/zcrx.c | 4 +- kernel/dma/remap.c | 2 +- mm/Kconfig | 3 +- mm/cma.c | 39 +++++++++------ mm/gup.c | 14 ++++-- mm/hugetlb.c | 22 +++++---- mm/internal.h | 1 + mm/kfence/core.c | 12 +++-- mm/memremap.c | 3 ++ mm/mm_init.c | 15 +++--- mm/page_alloc.c | 5 +- mm/pagewalk.c | 2 +- mm/percpu-km.c | 2 +- mm/util.c | 34 +++++++++++++ tools/testing/scatterlist/linux/mm.h | 1 - .../selftests/wireguard/qemu/kernel.config | 1 - 40 files changed, 202 insertions(+), 129 deletions(-) base-commit: efa7612003b44c220551fd02466bfbad5180fc83 -- 2.50.1

4 months, 1 week

14
141
0 0

[PATCH net-next v3] selftests: net: add test for ipv6 fragmentation

by Brett A C Sheffield

Add selftest for the IPv6 fragmentation regression which affected several stable kernels. Commit a18dfa9925b9 ("ipv6: save dontfrag in cork") was backported to stable without some prerequisite commits. This caused a regression when sending IPv6 UDP packets by preventing fragmentation and instead returning -1 (EMSGSIZE). Add selftest to check for this issue by attempting to send a packet larger than the interface MTU. The packet will be fragmented on a working kernel, with sendmsg(2) correctly returning the expected number of bytes sent. When the regression is present, sendmsg returns -1 and sets errno to EMSGSIZE. Link: https://lore.kernel.org/stable/aElivdUXqd1OqgMY@karahi.gladserv.com Signed-off-by: Brett A C Sheffield <bacs(a)librecast.net> --- Thanks again Willem for the prompt review. I've incorporated your suggested changes into v3. v3 changes: - add usleep instead of busy polling on sendmsg - simplify error handling by using error() and leaving cleanup to O/S - use loopback interface - don't bother creating TAP - send to localhost (::1) v2 changes: - remove superfluous namespace calls - unshare(2) suffices - remove usleep(). Don't wait for the interface to be ready, just send, and handle the (less likely) error case by retrying. - set destination address only once - document our use of the IPv6 link-local source address - send to port 9 (DISCARD) instead of 4242 (DONT PANIC) - ensure sockets are closed on failure paths - use KSFT exit codes for clarity v2: https://lore.kernel.org/netdev/20250831102908.14655-1-bacs@librecast.net v1: https://lore.kernel.org/netdev/20250825092548.4436-3-bacs@librecast.net tools/testing/selftests/net/.gitignore | 1 + tools/testing/selftests/net/Makefile | 1 + .../selftests/net/ipv6_fragmentation.c | 145 ++++++++++++++++++ 3 files changed, 147 insertions(+) create mode 100644 tools/testing/selftests/net/ipv6_fragmentation.c diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 47c293c2962f..3d4b4a53dfda 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -16,6 +16,7 @@ ip_local_port_range ipsec ipv6_flowlabel ipv6_flowlabel_mgr +ipv6_fragmentation log.txt msg_oob msg_zerocopy diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index eef0b8f8a7b0..276e0481d996 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -117,6 +117,7 @@ TEST_GEN_FILES += tfo TEST_PROGS += tfo_passive.sh TEST_PROGS += broadcast_pmtu.sh TEST_PROGS += ipv6_force_forwarding.sh +TEST_GEN_PROGS += ipv6_fragmentation TEST_PROGS += route_hint.sh # YNL files, must be before "include ..lib.mk" diff --git a/tools/testing/selftests/net/ipv6_fragmentation.c b/tools/testing/selftests/net/ipv6_fragmentation.c new file mode 100644 index 000000000000..be79a0340826 --- /dev/null +++ b/tools/testing/selftests/net/ipv6_fragmentation.c @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Brett A C Sheffield <bacs(a)librecast.net> + * + * Kernel selftest for the IPv6 fragmentation regression which affected stable + * kernels: + * + * https://lore.kernel.org/stable/aElivdUXqd1OqgMY@karahi.gladserv.com + * + * Commit: a18dfa9925b9 ("ipv6: save dontfrag in cork") was backported to stable + * without some prerequisite commits. + * + * This caused a regression when sending IPv6 UDP packets by preventing + * fragmentation and instead returning -1 (EMSGSIZE). + * + * This selftest demonstrates the issue by sending an IPv6 UDP packet to + * localhost (::1) on the loopback interface from the autoconfigured link-local + * address. + * + * sendmsg(2) returns bytes sent correctly on a working kernel, and returns -1 + * (EMSGSIZE) when the regression is present. + * + * The regression was not present in the mainline kernel, but add this test to + * catch similar breakage in future. + */ + +#define _GNU_SOURCE + +#include <error.h> +#include <fcntl.h> +#include <linux/if_tun.h> +#include <net/if.h> +#include <netinet/in.h> +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <unistd.h> +#include "../kselftest.h" + +#define MTU 1500 +#define LARGER_THAN_MTU 8192 + +/* bring up interface */ +static int interface_up(int ctl, struct ifreq *ifr) +{ + if (ioctl(ctl, SIOCGIFFLAGS, ifr) == -1) + error(KSFT_FAIL, errno, "ioctl SIOCGIFFLAGS"); + ifr->ifr_flags = ifr->ifr_flags | IFF_UP; + return ioctl(ctl, SIOCSIFFLAGS, ifr); +} + +/* no need to wait for DAD in our namespace */ +static int disable_dad(char *ifname) +{ + char sysvar[] = "/proc/sys/net/ipv6/conf/%s/accept_dad"; + char fname[IFNAMSIZ + sizeof(sysvar)]; + int fd; + + snprintf(fname, sizeof(fname), sysvar, ifname); + fd = open(fname, O_WRONLY); + if (fd == -1) + error(KSFT_FAIL, errno, "open accept_dad"); + if (write(fd, "0", 1) != 1) + error(KSFT_FAIL, errno, "write accept_dad"); + + return close(fd); +} + +static int setup(void) +{ + struct ifreq ifr = { + .ifr_name = "lo" + }; + int fd = -1; + int ctl; + + /* we need to set MTU, so do this in a namespace to play nicely */ + if (unshare(CLONE_NEWNET) == -1) + error(KSFT_FAIL, errno, "unshare"); + + ctl = socket(AF_LOCAL, SOCK_STREAM, 0); + if (ctl == -1) + error(KSFT_FAIL, errno, "socket"); + + /* ensure MTU is smaller than what we plan to send */ + ifr.ifr_mtu = MTU; + if (ioctl(ctl, SIOCSIFMTU, &ifr) == -1) + error(KSFT_FAIL, errno, "ioctl: set MTU"); + + disable_dad("lo"); + interface_up(ctl, &ifr); + + close(ctl); + return fd; +} + +int main(void) +{ + struct in6_addr addr = { + .s6_addr[15] = 0x01, /* ::1 */ + }; + struct sockaddr_in6 sa = { + .sin6_family = AF_INET6, + .sin6_addr = addr, + .sin6_port = 9 /* port 9/udp (DISCARD) */ + }; + char buf[LARGER_THAN_MTU] = {0}; + struct iovec iov = { .iov_base = buf, .iov_len = sizeof(buf)}; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_name = (struct sockaddr *)&sa, + .msg_namelen = sizeof(sa), + }; + ssize_t rc; + int ns_fd; + int err = KSFT_FAIL; + int s; + + printf("Testing IPv6 fragmentation\n"); + ns_fd = setup(); + s = socket(AF_INET6, SOCK_DGRAM, 0); +send_again: + rc = sendmsg(s, &msg, 0); + if (rc == -1) { + /* if interface wasn't ready, try again */ + if (errno == EADDRNOTAVAIL) { + usleep(1000); + goto send_again; + } + printf("[FAIL] sendmsg: %s\n", strerror(errno)); + } else if (rc != LARGER_THAN_MTU) { + printf("[FAIL] sendmsg() returned %zi, expected %i\n", rc, LARGER_THAN_MTU); + } + else { + printf("[PASS] sendmsg() returned %zi\n", rc); + err = KSFT_PASS; + } + close(s); + close(ns_fd); + return err; +} base-commit: 864ecc4a6dade82d3f70eab43dad0e277aa6fc78 -- 2.49.1

4 months, 1 week

1
0
0 0

[PATCH v5 0/7] Add support for FEAT_{LS64, LS64_V} and related tests

by Yicong Yang

From: Yicong Yang <yangyicong(a)hisilicon.com> Armv8.7 introduces single-copy atomic 64-byte loads and stores instructions and its variants named under FEAT_{LS64, LS64_V}. Add support for Armv8.7 FEAT_{LS64, LS64_V}: - Add identifying and enabling in the cpufeature list - Expose the support of these features to userspace through HWCAP3 and cpuinfo - Add related hwcap test - Handle the trap of unsupported memory (normal/uncacheable) access in a VM A real scenario for this feature is that the userspace driver can make use of this to implement direct WQE (workqueue entry) - a mechanism to fill WQE directly into the hardware. Picked Marc's 2 patches form [1] for handling the LS64 trap in a VM on emulated MMIO and the introduce of KVM_EXIT_ARM_LDST64B. [1] https://lore.kernel.org/linux-arm-kernel/20240815125959.2097734-1-maz@kerne… Tested with updated hwcap test: [root@localhost tmp]# dmesg | grep "All CPU(s) started" [ 14.789859] CPU: All CPU(s) started at EL2 [root@localhost tmp]# ./hwcap # LS64 present ok 217 cpuinfo_match_LS64 ok 218 sigill_LS64 ok 219 # SKIP sigbus_LS64_V # LS64_V present ok 220 cpuinfo_match_LS64_V ok 221 sigill_LS64_V ok 222 # SKIP sigbus_LS64_V # 115 skipped test(s) detected. Consider enabling relevant config options to improve coverage. # Totals: pass:107 fail:0 xfail:0 xpass:0 skip:115 error:0 root@localhost:/mnt# dmesg | grep "All CPU(s) started" [ 0.281152] CPU: All CPU(s) started at EL1 root@localhost:/mnt# ./hwcap # LS64 present ok 217 cpuinfo_match_LS64 ok 218 sigill_LS64 ok 219 # SKIP sigbus_LS64 # LS64_V present ok 220 cpuinfo_match_LS64_V ok 221 sigill_LS64_V ok 222 # SKIP sigbus_LS64_V # 115 skipped test(s) detected. Consider enabling relevant config options to improve coverage. # Totals: pass:107 fail:0 xfail:0 xpass:0 skip:115 error:0 Change since v4: - Rebase on v6.17-rc2 and fix the conflicts Link: https://lore.kernel.org/linux-arm-kernel/20250715081356.12442-1-yangyicong@… Change since v3: - Inject DABT fault for LS64 fault on unsupported memory but with valid memslot Link: https://lore.kernel.org/linux-arm-kernel/20250626080906.64230-1-yangyicong@… Change since v2: - Handle the LS64 fault to userspace and allow userspace to inject LS64 fault - Reorder the patches to make KVM handling prior to feature support Link: https://lore.kernel.org/linux-arm-kernel/20250331094320.35226-1-yangyicong@… Change since v1: - Drop the support for LS64_ACCDATA - handle the DABT of unsupported memory type after checking the memory attributes Link: https://lore.kernel.org/linux-arm-kernel/20241202135504.14252-1-yangyicong@… Marc Zyngier (2): KVM: arm64: Add exit to userspace on {LD,ST}64B* outside of memslots KVM: arm64: Add documentation for KVM_EXIT_ARM_LDST64B Yicong Yang (5): KVM: arm64: Handle DABT caused by LS64* instructions on unsupported memory arm64: Provide basic EL2 setup for FEAT_{LS64, LS64_V} usage at EL0/1 arm64: Add support for FEAT_{LS64, LS64_V} KVM: arm64: Enable FEAT_{LS64, LS64_V} in the supported guest kselftest/arm64: Add HWCAP test for FEAT_{LS64, LS64_V} Documentation/arch/arm64/booting.rst | 12 +++ Documentation/arch/arm64/elf_hwcaps.rst | 6 ++ Documentation/virt/kvm/api.rst | 43 +++++++++-- arch/arm64/include/asm/el2_setup.h | 12 ++- arch/arm64/include/asm/esr.h | 8 ++ arch/arm64/include/asm/hwcap.h | 2 + arch/arm64/include/asm/kvm_emulate.h | 7 ++ arch/arm64/include/uapi/asm/hwcap.h | 2 + arch/arm64/kernel/cpufeature.c | 51 +++++++++++++ arch/arm64/kernel/cpuinfo.c | 2 + arch/arm64/kvm/inject_fault.c | 22 ++++++ arch/arm64/kvm/mmio.c | 27 ++++++- arch/arm64/kvm/mmu.c | 14 +++- arch/arm64/tools/cpucaps | 2 + include/uapi/linux/kvm.h | 3 +- tools/testing/selftests/arm64/abi/hwcap.c | 90 +++++++++++++++++++++++ 16 files changed, 292 insertions(+), 11 deletions(-) -- 2.24.0

4 months, 1 week

1
8
0 0

[PATCH v2] KVM: loongarch: selftests: Remove common tests built by TEST_GEN_PROGS_COMMON

by dayss1224＠gmail.com

From: Dong Yang <dayss1224(a)gmail.com> Remove the common KVM test cases already added to TEST_GEN_PROGS_COMMON as following: demand_paging_test dirty_log_test guest_print_test kvm_binary_stats_test kvm_create_max_vcpus kvm_page_table_test set_memory_region_test Fixes: a867688c8cbb ("KVM: selftests: Add supported test cases for LoongArch") Signed-off-by: Quan Zhou <zhouquan(a)iscas.ac.cn> Signed-off-by: Dong Yang <dayss1224(a)gmail.com> Changes in v2: - Add "TEST_GEN_PROGS_loongarch = $(TEST_GEN_PROGS_COMMON)" to include common tests --- tools/testing/selftests/kvm/Makefile.kvm | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index 38b95998e1e6..3fd1c2ae68d0 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -198,18 +198,12 @@ TEST_GEN_PROGS_riscv += coalesced_io_test TEST_GEN_PROGS_riscv += get-reg-list TEST_GEN_PROGS_riscv += steal_time +TEST_GEN_PROGS_loongarch = $(TEST_GEN_PROGS_COMMON) TEST_GEN_PROGS_loongarch += coalesced_io_test -TEST_GEN_PROGS_loongarch += demand_paging_test TEST_GEN_PROGS_loongarch += dirty_log_perf_test -TEST_GEN_PROGS_loongarch += dirty_log_test -TEST_GEN_PROGS_loongarch += guest_print_test TEST_GEN_PROGS_loongarch += hardware_disable_test -TEST_GEN_PROGS_loongarch += kvm_binary_stats_test -TEST_GEN_PROGS_loongarch += kvm_create_max_vcpus -TEST_GEN_PROGS_loongarch += kvm_page_table_test TEST_GEN_PROGS_loongarch += memslot_modification_stress_test TEST_GEN_PROGS_loongarch += memslot_perf_test -TEST_GEN_PROGS_loongarch += set_memory_region_test SPLIT_TESTS += arch_timer SPLIT_TESTS += get-reg-list base-commit: b23ff7e52a79f4fe2382e3564719b97b718166d1 -- 2.34.1

4 months, 1 week

1
0
0 0

[PATCHSET V1 0/2] cpuidle, bpf: Introduce BPF-based extensible cpuidle policy via struct_ops

by Lin Yikai

Summary ---------- Hi, everyone, This patch set introduces an extensible cpuidle governor framework using BPF struct_ops, enabling dynamic implementation of idle-state selection policies via BPF programs. Motivation ---------- As is well-known, CPUs support multiple idle states (e.g., C0, C1, C2, ...), where deeper states reduce power consumption, but results in longer wakeup latency, potentially affecting performance. Existing generic cpuidle governors operate effectively in common scenarios but exhibit suboptimal behavior in specific Android phone's use cases. Our testing reveals that during low-utilization scenarios (e.g., screen-off background tasks like music playback with CPU utilization <10%), the C0 state occupies ~50% of idle time, causing significant energy inefficiency. Reducing C0 to ≤20% could yield ≥5% power savings on mobile phones. To address this, we expect: 1.Dynamic governor switching to power-saved policies for low cpu utilization scenarios (e.g., screen-off mode) 2.Dynamic switching to alternate governors for high-performance scenarios (e.g., gaming) OverView ---------- The BPF cpuidle ext governor registers at postcore_initcall() but remains disabled by default due to its low priority "rating" with value "1". Activation requires adjust higer "rating" than other governors within BPF. Core Components: 1.**struct cpuidle_gov_ext_ops** – BPF-overridable operations: - ops.enable()/ops.disable(): enable or disable callback - ops.select(): cpu Idle-state selection logic - ops.set_stop_tick(): Scheduler tick management after state selection - ops.reflect(): feedback info about previous idle state. - ops.init()/ops.deinit(): Initialization or cleanup. 2.**Critical kfuncs for kernel state access**: - bpf_cpuidle_ext_gov_update_rating(): Activate ext governor by raising rating must be called from "ops.init()" - bpf_cpuidle_ext_gov_latency_req(): get idle-state latency constraints - bpf_tick_nohz_get_sleep_length(): get CPU sleep duration in tickless mode Future work ---------- 1. Scenario detection: Identifying low-utilization states (e.g., screen-off + background music) 2. Policy optimization: Optimizing state-selection algorithms for specific scenarios Lin Yikai (2): Subject: [PATCH v1 1/2] cpuidle: Implement BPF extensible cpuidle class Subject: [PATCH v1 2/2] selftests/bpf: Add selftests drivers/cpuidle/Kconfig | 12 + drivers/cpuidle/governors/Makefile | 1 + drivers/cpuidle/governors/ext.c | 537 ++++++++++++++++++ .../bpf/prog_tests/test_cpuidle_gov_ext.c | 28 + .../selftests/bpf/progs/cpuidle_gov_ext.c | 208 +++++++ 5 files changed, 786 insertions(+) create mode 100644 drivers/cpuidle/governors/ext.c create mode 100644 tools/testing/selftests/bpf/prog_tests/test_cpuidle_gov_ext.c create mode 100644 tools/testing/selftests/bpf/progs/cpuidle_gov_ext.c -- 2.43.0

4 months, 1 week

4
6
0 0

[PATCH net-next] selftests: net: add test for ipv6 fragmentation

by Brett A C Sheffield

Add selftest for the IPv6 fragmentation regression which affected several stable kernels. Commit a18dfa9925b9 ("ipv6: save dontfrag in cork") was backported to stable without some prerequisite commits. This caused a regression when sending IPv6 UDP packets by preventing fragmentation and instead returning -1 (EMSGSIZE). Add selftest to check for this issue by attempting to send a packet larger than the interface MTU. The packet will be fragmented on a working kernel, with sendmsg(2) correctly returning the expected number of bytes sent. When the regression is present, sendmsg returns -1 and sets errno to EMSGSIZE. Signed-off-by: Brett A C Sheffield <bacs(a)librecast.net> Link: https://lore.kernel.org/stable/aElivdUXqd1OqgMY@karahi.gladserv.com --- tools/testing/selftests/net/.gitignore | 1 + tools/testing/selftests/net/Makefile | 1 + .../selftests/net/ipv6_fragmentation.c | 204 ++++++++++++++++++ 3 files changed, 206 insertions(+) create mode 100644 tools/testing/selftests/net/ipv6_fragmentation.c diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 47c293c2962f..3d4b4a53dfda 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -16,6 +16,7 @@ ip_local_port_range ipsec ipv6_flowlabel ipv6_flowlabel_mgr +ipv6_fragmentation log.txt msg_oob msg_zerocopy diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index b31a71f2b372..f83f91b758ae 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -117,6 +117,7 @@ TEST_GEN_FILES += tfo TEST_PROGS += tfo_passive.sh TEST_PROGS += broadcast_pmtu.sh TEST_PROGS += ipv6_force_forwarding.sh +TEST_GEN_PROGS += ipv6_fragmentation # YNL files, must be before "include ..lib.mk" YNL_GEN_FILES := busy_poller netlink-dumps diff --git a/tools/testing/selftests/net/ipv6_fragmentation.c b/tools/testing/selftests/net/ipv6_fragmentation.c new file mode 100644 index 000000000000..21e1a3cdc63d --- /dev/null +++ b/tools/testing/selftests/net/ipv6_fragmentation.c @@ -0,0 +1,204 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Brett A C Sheffield <bacs(a)librecast.net> + * + * Kernel selftest for the IPv6 fragmentation regression which affected + * stable kernels: + * + * https://lore.kernel.org/stable/aElivdUXqd1OqgMY@karahi.gladserv.com + * + * Commit: + * a18dfa9925b9 ("ipv6: save dontfrag in cork") + * was backported to stable without some prerequisite commits. + * + * This caused a regression when sending IPv6 UDP packets by preventing + * fragmentation and instead returning -1 (EMSGSIZE). + * + * This selftest demonstrates the issue. sendmsg returns correctly (8192) + * on a working kernel, and returns -1 (EMSGSIZE) when the regression is + * present. + * + * The regression was not present in the mainline kernel, but add this test to + * catch similar breakage in future. + */ + +#define _GNU_SOURCE + +#include <fcntl.h> +#include <linux/if_tun.h> +#include <net/if.h> +#include <netinet/in.h> +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <unistd.h> + +#define MTU 1500 +#define LARGER_THAN_MTU 8192 + +/* ensure MTU is smaller than what we plan to send */ +static int set_mtu(int ctl, char *ifname, struct ifreq *ifr) +{ + ifr->ifr_mtu = MTU; + return ioctl(ctl, SIOCSIFMTU, ifr); +} + +/* bring up interface */ +static int interface_up(int ctl, char *ifname, struct ifreq *ifr) +{ + if (ioctl(ctl, SIOCGIFFLAGS, ifr) == -1) { + perror("ioctl SIOCGIFFLAGS"); + return -1; + } + ifr->ifr_flags = ifr->ifr_flags | IFF_UP; + return ioctl(ctl, SIOCSIFFLAGS, ifr); +} + +/* no need to wait for DAD in our namespace */ +static int disable_dad(char *ifname) +{ + char sysvar[] = "/proc/sys/net/ipv6/conf/%s/accept_dad"; + char fname[IFNAMSIZ + sizeof(sysvar)]; + int fd; + + snprintf(fname, sizeof(fname), sysvar, ifname); + fd = open(fname, O_WRONLY); + if (fd == -1) { + perror("open accept_dad"); + return -1; + } + if (write(fd, "0", 1) != 1) { + perror("write"); + return -1; + } + return close(fd); +} + +/* create TAP interface that will be deleted when this process exits */ +static int create_interface(int ctl, char *ifname, struct ifreq *ifr) +{ + int fd; + + fd = open("/dev/net/tun", O_RDWR); + if (fd == -1) { + perror("open tun"); + return -1; + } + + ifr->ifr_flags = IFF_TAP | IFF_NO_PI; + if (ioctl(fd, TUNSETIFF, (void *)ifr) == -1) { + close(fd); + perror("ioctl: TUNSETIFF"); + return -1; + } + strcpy(ifname, ifr->ifr_name); + + return fd; +} + +/* we need to set MTU, so do this in a namespace to play nicely */ +static int create_namespace(void) +{ + const char *netns_path = "/proc/self/ns/net"; + int fd; + + if (unshare(CLONE_NEWNET) != 0) { + perror("unshare"); + return -1; + } + + fd = open(netns_path, O_RDONLY); + if (fd == -1) { + perror("open"); + return -1; + } + + if (setns(fd, CLONE_NEWNET)) { + perror("setns"); + return -1; + } + + return 0; +} + +static int setup(void) +{ + struct ifreq ifr = {0}; + char ifname[IFNAMSIZ]; + int fd = -1; + int ctl; + + if (create_namespace() == -1) + return -1; + + ctl = socket(AF_LOCAL, SOCK_STREAM, 0); + if (ctl == -1) + return -1; + + memset(ifname, 0, sizeof(ifname)); + fd = create_interface(ctl, ifname, &ifr); + if (fd == -1) + goto err_close_ctl; + if (disable_dad(ifname) == -1) + goto err_close_fd; + if (interface_up(ctl, ifname, &ifr) == -1) + goto err_close_fd; + if (set_mtu(ctl, ifname, &ifr) == -1) + goto err_close_fd; + usleep(10000); /* give interface a moment to wake up */ + goto err_close_ctl; +err_close_fd: + close(fd); + fd = -1; +err_close_ctl: + close(ctl); + return fd; +} + +int main(void) +{ + /* address doesn't matter, use an IPv6 multicast address for simplicity */ + struct in6_addr addr = { + .s6_addr[0] = 0xff, /* multicast */ + .s6_addr[1] = 0x12, /* set flags (T, link-local) */ + }; + struct sockaddr_in6 sa = { + .sin6_family = AF_INET6, + .sin6_addr = addr, + .sin6_port = 4242 + }; + char buf[LARGER_THAN_MTU] = {0}; + struct iovec iov = { .iov_base = buf, .iov_len = sizeof(buf)}; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_name = (struct sockaddr *)&sa, + .msg_namelen = sizeof(sa), + }; + ssize_t rc; + int ns_fd; + int s; + + printf("Testing IPv6 fragmentation\n"); + ns_fd = setup(); + if (ns_fd == -1) + return 1; + s = socket(AF_INET6, SOCK_DGRAM, 0); + msg.msg_name = (struct sockaddr *)&sa; + msg.msg_namelen = sizeof(sa); + rc = sendmsg(s, &msg, 0); + if (rc == -1) { + perror("send"); + return 1; + } else if (rc != LARGER_THAN_MTU) { + fprintf(stderr, "send() returned %zi\n", rc); + return 1; + } + close(s); + close(ns_fd); + + return 0; +} -- 2.49.1

4 months, 1 week

3
5
0 0

[PATCH] kselftest/arm64: Don't open code SVE_PT_SIZE() in fp-ptrace

by Mark Brown

In fp-trace when allocating a buffer to write SVE register data we open code the addition of the header size to the VL depeendent register data size, which lead to an underallocation bug when we cut'n'pasted the code for FPSIMD format writes. Use the SVE_PT_SIZE() macro that the kernel UAPI provides for this. Fixes: b84d2b27954f ("kselftest/arm64: Test FPSIMD format data writes via NT_ARM_SVE in fp-ptrace") Signed-off-by: Mark Brown <broonie(a)kernel.org> --- tools/testing/selftests/arm64/fp/fp-ptrace.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.c b/tools/testing/selftests/arm64/fp/fp-ptrace.c index 124bc883365e..cdd7a45c045d 100644 --- a/tools/testing/selftests/arm64/fp/fp-ptrace.c +++ b/tools/testing/selftests/arm64/fp/fp-ptrace.c @@ -1187,7 +1187,7 @@ static void sve_write_sve(pid_t child, struct test_config *config) if (!vl) return; - iov.iov_len = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE); + iov.iov_len = SVE_PT_SIZE(vq, SVE_PT_REGS_SVE); iov.iov_base = malloc(iov.iov_len); if (!iov.iov_base) { ksft_print_msg("Failed allocating %lu byte SVE write buffer\n", @@ -1234,8 +1234,7 @@ static void sve_write_fpsimd(pid_t child, struct test_config *config) if (!vl) return; - iov.iov_len = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, - SVE_PT_REGS_FPSIMD); + iov.iov_len = SVE_PT_SIZE(vq, SVE_PT_REGS_FPSIMD); iov.iov_base = malloc(iov.iov_len); if (!iov.iov_base) { ksft_print_msg("Failed allocating %lu byte SVE write buffer\n", --- base-commit: 8f5ae30d69d7543eee0d70083daf4de8fe15d585 change-id: 20250808-arm64-fp-trace-macro-02ede083da51 Best regards, -- Mark Brown <broonie(a)kernel.org>

4 months, 1 week

2
1
0 0

[PATCHv4 iproute2-next] iplink: bond_slave: add support for actor_port_prio

by Hangbin Liu

Add support for the actor_port_prio option for bond slaves. This per-port priority can be used by the bonding driver in ad_select to choose the higher-priority aggregator during failover. Signed-off-by: Hangbin Liu <liuhangbin(a)gmail.com> --- v4: no update v3: rename ad_actor_port_prio to actor_port_prio v2: no update --- ip/iplink_bond.c | 1 + ip/iplink_bond_slave.c | 18 ++++++++++++++++-- man/man8/ip-link.8.in | 6 ++++++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/ip/iplink_bond.c b/ip/iplink_bond.c index d6960f6d9b03..1a2c1b3042a0 100644 --- a/ip/iplink_bond.c +++ b/ip/iplink_bond.c @@ -91,6 +91,7 @@ static const char *ad_select_tbl[] = { "stable", "bandwidth", "count", + "prio", NULL, }; diff --git a/ip/iplink_bond_slave.c b/ip/iplink_bond_slave.c index ad6875006950..c88100e248dd 100644 --- a/ip/iplink_bond_slave.c +++ b/ip/iplink_bond_slave.c @@ -15,7 +15,9 @@ static void print_explain(FILE *f) { - fprintf(f, "Usage: ... bond_slave [ queue_id ID ] [ prio PRIORITY ]\n"); + fprintf(f, "Usage: ... bond_slave [ queue_id ID ] [ prio PRIORITY ]\n" + " [ actor_port_prio PRIORITY ]\n" + ); } static void explain(void) @@ -145,12 +147,18 @@ static void bond_slave_print_opt(struct link_util *lu, FILE *f, struct rtattr *t state); print_slave_oper_state(f, "ad_partner_oper_port_state_str", state); } + + if (tb[IFLA_BOND_SLAVE_ACTOR_PORT_PRIO]) + print_int(PRINT_ANY, + "actor_port_prio", + "actor_port_prio %d ", + rta_getattr_u16(tb[IFLA_BOND_SLAVE_ACTOR_PORT_PRIO])); } static int bond_slave_parse_opt(struct link_util *lu, int argc, char **argv, struct nlmsghdr *n) { - __u16 queue_id; + __u16 queue_id, actor_port_prio; int prio; while (argc > 0) { @@ -164,6 +172,12 @@ static int bond_slave_parse_opt(struct link_util *lu, int argc, char **argv, if (get_s32(&prio, *argv, 0)) invarg("prio is invalid", *argv); addattr32(n, 1024, IFLA_BOND_SLAVE_PRIO, prio); + } else if (strcmp(*argv, "actor_port_prio") == 0) { + NEXT_ARG(); + if (get_u16(&actor_port_prio, *argv, 0)) + invarg("actor prio is invalid", *argv); + addattr16(n, 1024, IFLA_BOND_SLAVE_ACTOR_PORT_PRIO, + actor_port_prio); } else { if (matches(*argv, "help") != 0) fprintf(stderr, diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in index e3297c577152..7995943ab11f 100644 --- a/man/man8/ip-link.8.in +++ b/man/man8/ip-link.8.in @@ -2846,6 +2846,12 @@ the following additional arguments are supported: (a 32bit signed value). This option only valid for active-backup(1), balance-tlb (5) and balance-alb (6) mode. +.sp +.BI actor_port_prio " PRIORITY" +- set the slave's ad actor port priority for 802.3ad aggregation selection +logic during failover (a 16bit unsigned value). This option only valid for +802.3ad (4) mode. + .in -8 .TP -- 2.50.1

4 months, 1 week

2
5
0 0

[PATCH 1/9] KVM: selftests: Add pidfd_open syscall number fallback

by Aqib Faruqui

The kselftest-harness uses pidfd_open() for test timeout handling, but non-glibc C library headers may not define this syscall number. Add architecture-specific fallback definitions to pidfd.h, including support for Alpha (544) and other architectures (434). Update kselftest_harness.h to include pidfd.h for the syscall definitions. Signed-off-by: Aqib Faruqui <aqibaf(a)amazon.com> --- tools/testing/selftests/kselftest_harness.h | 1 + tools/testing/selftests/pidfd/pidfd.h | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 2925e47db..1dd3e5a1b 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -69,6 +69,7 @@ #include <unistd.h> #include "kselftest.h" +#include "pidfd/pidfd.h" #define TEST_TIMEOUT_DEFAULT 30 diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index f87993def..c373ff18e 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -45,8 +45,12 @@ #endif #ifndef __NR_pidfd_open +#ifdef __alpha__ +#define __NR_pidfd_open 544 +#else #define __NR_pidfd_open 434 #endif +#endif #ifndef __NR_pidfd_send_signal #define __NR_pidfd_send_signal 424 -- 2.47.3

4 months, 1 week

2
1
0 0

[PATCH v4 00/15] Consolidate iommu page table implementations (AMD)

by Jason Gunthorpe

[All the precursor patches are merged now and AMD/RISCV/VTD conversions are written] Currently each of the iommu page table formats duplicates all of the logic to maintain the page table and perform map/unmap/etc operations. There are several different versions of the algorithms between all the different formats. The io-pgtable system provides an interface to help isolate the page table code from the iommu driver, but doesn't provide tools to implement the common algorithms. This makes it very hard to improve the state of the pagetable code under the iommu domains as any proposed improvement needs to alter a large number of different driver code paths. Combined with a lack of software based testing this makes improvement in this area very hard. iommufd wants several new page table operations: - More efficient map/unmap operations, using iommufd's batching logic - unmap that returns the physical addresses into a batch as it progresses - cut that allows splitting areas so large pages can have holes poked in them dynamically (ie guestmemfd hitless shared/private transitions) - More agressive freeing of table memory to avoid waste - Fragmenting large pages so that dirty tracking can be more granular - Reassembling large pages so that VMs can run at full IO performance in migration/dirty tracking error flows - KHO integration for kernel live upgrade Together these are algorithmically complex enough to be a very significant task to go and implement in all the page table formats we support. Just the "server" focused drivers use almost all the formats (ARMv8 S1&S2 / x86 PAE / AMDv1 / VT-D SS / RISCV) Instead of doing the duplicated work, this series takes the first step to consolidate the algorithms into one places. In spirit it is similar to the work Christoph did a few years back to pull the redundant get_user_pages() implementations out of the arch code into core MM. This unlocked a great deal of improvement in that space in the following years. I would like to see the same benefit in iommu as well. My first RFC showed a bigger picture with all most all formats and more algorithms. This series reorganizes that to be narrowly focused on just enough to convert the AMD driver to use the new mechanism. kunit tests are provided that allow good testing of the algorithms and all formats on x86, nothing is arch specific. AMD is one of the simpler options as the HW is quite uniform with few different options/bugs while still requiring the complicated contiguous pages support. The HW also has a very simple range based invalidation approach that is easy to implement. The AMD v1 and AMD v2 page table formats are implemented bit for bit identical to the current code, tested using a compare kunit test that checks against the io-pgtable version (on github, see below). Updating the AMD driver to replace the io-pgtable layer with the new stuff is fairly straightforward now. The layering is fixed up in the new version so that all the invalidation goes through function pointers. Several small fixing patches have come out of this as I've been fixing the problems that the test suite uncovers in the current code, and implementing the fixed version in iommupt. On performance, there is a quite wide variety of implementation designs across all the drivers. Looking at some key performance across the main formats: iommu_map(): pgsz ,avg new,old ns, min new,old ns , min % (+ve is better) 2^12, 53,66 , 51,63 , 19.19 (AMDV1) 256*2^12, 386,1909 , 367,1795 , 79.79 256*2^21, 362,1633 , 355,1556 , 77.77 2^12, 56,62 , 52,59 , 11.11 (AMDv2) 256*2^12, 405,1355 , 357,1292 , 72.72 256*2^21, 393,1160 , 358,1114 , 67.67 2^12, 55,65 , 53,62 , 14.14 (VTD second stage) 256*2^12, 391,518 , 332,512 , 35.35 256*2^21, 383,635 , 336,624 , 46.46 2^12, 57,65 , 55,63 , 12.12 (ARM 64 bit) 256*2^12, 380,389 , 361,369 , 2.02 256*2^21, 358,419 , 345,400 , 13.13 iommu_unmap(): pgsz ,avg new,old ns, min new,old ns , min % (+ve is better) 2^12, 69,88 , 65,85 , 23.23 (AMDv1) 256*2^12, 353,6498 , 331,6029 , 94.94 256*2^21, 373,6014 , 360,5706 , 93.93 2^12, 71,72 , 66,69 , 4.04 (AMDv2) 256*2^12, 228,891 , 206,871 , 76.76 256*2^21, 254,721 , 245,711 , 65.65 2^12, 69,87 , 65,82 , 20.20 (VTD second stage) 256*2^12, 210,321 , 200,315 , 36.36 256*2^21, 255,349 , 238,342 , 30.30 2^12, 72,77 , 68,74 , 8.08 (ARM 64 bit) 256*2^12, 521,357 , 447,346 , -29.29 256*2^21, 489,358 , 433,345 , -25.25 * Above numbers include additional patches to remove the iommu_pgsize() overheads. gcc 13.3.0, i7-12700 This version provides fairly consistent performance across formats. ARM unmap performance is quite different because this version supports contiguous pages and uses a very different algorithm for unmapping. Though why it is so worse compared to AMDv1 I haven't figured out yet. The per-format commits include a more detailed chart. There is a second branch: https://github.com/jgunthorpe/linux/commits/iommu_pt_all Containing supporting work and future steps: - ARM short descriptor (32 bit), ARM long descriptor (64 bit) formats - RISCV format and RISCV conversion https://github.com/jgunthorpe/linux/commits/iommu_pt_riscv - Support for a DMA incoherent HW page table walker - VT-D second stage format and VT-D conversion https://github.com/jgunthorpe/linux/commits/iommu_pt_vtd - DART v1 & v2 format - Draft of a iommufd 'cut' operation to break down huge pages - A compare test that checks the iommupt formats against the iopgtable interface, including updating AMD to have a working iopgtable and patches to make VT-D have an iopgtable for testing. - A performance test to micro-benchmark map and unmap against iogptable My strategy is to go one by one for the drivers: - AMD driver conversion - RISCV page table and driver - Intel VT-D driver and VTDSS page table - Flushing improvements for RISCV - ARM SMMUv3 And concurrently work on the algorithm side: - debugfs content dump, like VT-D has - Cut support - Increase/Decrease page size support - map/unmap batching - KHO As we make more algorithm improvements the value to convert the drivers increases. This is on github: https://github.com/jgunthorpe/linux/commits/iommu_pt v3: - Rebase on v6.16-rc3 - Integrate the HATS/HATDis changes - Remove 'default n' from kconfig - Remove unused 'PT_FIXED_TOP_LEVEL' - Improve comments and coumentation - Fix some compile warnings from kbuild robots v2: https://patch.msgid.link/r/0-v3-a93aab628dbc+521-iommu_pt_jgg@nvidia.com - Rebase on v6.16-rc2 - s/PT_ENTRY_WORD_SIZE/PT_ITEM_WORD_SIZE/s to follow the language better - Comment and documentation updates - Add PT_TOP_PHYS_MASK to help manage alignment restrictions on the top pointer - Add missed force_aperture = true - Make pt_iommu_deinit() take care of the not-yet-inited error case internally as AMD/RISCV/VTD all shared this logic - Change gather_range() into gather_range_pages() so it also deals with the page list. This makes the following cache flushing series simpler - Fix missed update of unmap->unmapped in some error cases - Change clear_contig() to order the gather more logically - Remove goto from the error handling in __map_range_leaf() - s/log2_/oalog2_/ in places where the argument is an oaddr_t - Pass the pts to pt_table_install64/32() - Do not use SIGN_EXTEND for the AMDv2 page table because of Vasant's information on how PASID 0 works. v1: https://patch.msgid.link/r/0-v2-5c26bde5c22d+58b-iommu_pt_jgg@nvidia.com - AMD driver only, many code changes RFC: https://lore.kernel.org/all/0-v1-01fa10580981+1d-iommu_pt_jgg@nvidia.com/ Cc: Michael Roth <michael.roth(a)amd.com> Cc: Alexey Kardashevskiy <aik(a)amd.com> Cc: Pasha Tatashin <pasha.tatashin(a)soleen.com> Cc: James Gowans <jgowans(a)amazon.com> Signed-off-by: Jason Gunthorpe <jgg(a)nvidia.com> Alejandro Jimenez (1): iommu/amd: Use the generic iommu page table Jason Gunthorpe (14): genpt: Generic Page Table base API genpt: Add Documentation/ files iommupt: Add the basic structure of the iommu implementation iommupt: Add the AMD IOMMU v1 page table format iommupt: Add iova_to_phys op iommupt: Add unmap_pages op iommupt: Add map_pages op iommupt: Add read_and_clear_dirty op iommupt: Add a kunit test for Generic Page Table iommupt: Add a mock pagetable format for iommufd selftest to use iommufd: Change the selftest to use iommupt instead of xarray iommupt: Add the x86 64 bit page table format iommu/amd: Remove AMD io_pgtable support iommupt: Add a kunit test for the IOMMU implementation .clang-format | 1 + Documentation/driver-api/generic_pt.rst | 140 ++ Documentation/driver-api/index.rst | 1 + drivers/iommu/Kconfig | 2 + drivers/iommu/Makefile | 1 + drivers/iommu/amd/Kconfig | 5 +- drivers/iommu/amd/Makefile | 2 +- drivers/iommu/amd/amd_iommu.h | 1 - drivers/iommu/amd/amd_iommu_types.h | 109 +- drivers/iommu/amd/io_pgtable.c | 560 -------- drivers/iommu/amd/io_pgtable_v2.c | 370 ------ drivers/iommu/amd/iommu.c | 538 ++++---- drivers/iommu/generic_pt/.kunitconfig | 13 + drivers/iommu/generic_pt/Kconfig | 67 + drivers/iommu/generic_pt/fmt/Makefile | 26 + drivers/iommu/generic_pt/fmt/amdv1.h | 409 ++++++ drivers/iommu/generic_pt/fmt/defs_amdv1.h | 21 + drivers/iommu/generic_pt/fmt/defs_x86_64.h | 21 + drivers/iommu/generic_pt/fmt/iommu_amdv1.c | 15 + drivers/iommu/generic_pt/fmt/iommu_mock.c | 10 + drivers/iommu/generic_pt/fmt/iommu_template.h | 48 + drivers/iommu/generic_pt/fmt/iommu_x86_64.c | 11 + drivers/iommu/generic_pt/fmt/x86_64.h | 248 ++++ drivers/iommu/generic_pt/iommu_pt.h | 1146 +++++++++++++++++ drivers/iommu/generic_pt/kunit_generic_pt.h | 717 +++++++++++ drivers/iommu/generic_pt/kunit_iommu.h | 183 +++ drivers/iommu/generic_pt/kunit_iommu_pt.h | 451 +++++++ drivers/iommu/generic_pt/pt_common.h | 354 +++++ drivers/iommu/generic_pt/pt_defs.h | 323 +++++ drivers/iommu/generic_pt/pt_fmt_defaults.h | 193 +++ drivers/iommu/generic_pt/pt_iter.h | 636 +++++++++ drivers/iommu/generic_pt/pt_log2.h | 130 ++ drivers/iommu/io-pgtable.c | 4 - drivers/iommu/iommufd/Kconfig | 1 + drivers/iommu/iommufd/iommufd_test.h | 11 +- drivers/iommu/iommufd/selftest.c | 438 +++---- include/linux/generic_pt/common.h | 166 +++ include/linux/generic_pt/iommu.h | 270 ++++ include/linux/io-pgtable.h | 2 - tools/testing/selftests/iommu/iommufd.c | 60 +- tools/testing/selftests/iommu/iommufd_utils.h | 12 + 41 files changed, 6124 insertions(+), 1592 deletions(-) create mode 100644 Documentation/driver-api/generic_pt.rst delete mode 100644 drivers/iommu/amd/io_pgtable.c delete mode 100644 drivers/iommu/amd/io_pgtable_v2.c create mode 100644 drivers/iommu/generic_pt/.kunitconfig create mode 100644 drivers/iommu/generic_pt/Kconfig create mode 100644 drivers/iommu/generic_pt/fmt/Makefile create mode 100644 drivers/iommu/generic_pt/fmt/amdv1.h create mode 100644 drivers/iommu/generic_pt/fmt/defs_amdv1.h create mode 100644 drivers/iommu/generic_pt/fmt/defs_x86_64.h create mode 100644 drivers/iommu/generic_pt/fmt/iommu_amdv1.c create mode 100644 drivers/iommu/generic_pt/fmt/iommu_mock.c create mode 100644 drivers/iommu/generic_pt/fmt/iommu_template.h create mode 100644 drivers/iommu/generic_pt/fmt/iommu_x86_64.c create mode 100644 drivers/iommu/generic_pt/fmt/x86_64.h create mode 100644 drivers/iommu/generic_pt/iommu_pt.h create mode 100644 drivers/iommu/generic_pt/kunit_generic_pt.h create mode 100644 drivers/iommu/generic_pt/kunit_iommu.h create mode 100644 drivers/iommu/generic_pt/kunit_iommu_pt.h create mode 100644 drivers/iommu/generic_pt/pt_common.h create mode 100644 drivers/iommu/generic_pt/pt_defs.h create mode 100644 drivers/iommu/generic_pt/pt_fmt_defaults.h create mode 100644 drivers/iommu/generic_pt/pt_iter.h create mode 100644 drivers/iommu/generic_pt/pt_log2.h create mode 100644 include/linux/generic_pt/common.h create mode 100644 include/linux/generic_pt/iommu.h base-commit: 8da0d63bd5726ff656bfa1eacb45d6f5cce65616 -- 2.43.0

4 months, 1 week

2
36
0 0

[PATCH 3/9] KVM: selftests: Add pthread_attr_setaffinity_np fallback

by Aqib Faruqui

The pthread_attr_setaffinity_np function is a GNU extension that may not be available in non-glibc C libraries. Some KVM selftests use this function for CPU affinity control. Add a function declaration and weak stub implementation for non-glibc builds. This allows tests to build, with the affinity setting being a no-op and errno set for the caller when the actual function is not available. Signed-off-by: Aqib Faruqui <aqibaf(a)amazon.com> --- tools/testing/selftests/kvm/include/kvm_util.h | 4 ++++ tools/testing/selftests/kvm/lib/kvm_util.c | 11 +++++++++++ 2 files changed, 15 insertions(+) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 7fae7f5e7..8177178b5 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -31,6 +31,10 @@ #include "kvm_util_types.h" #include "sparsebit.h" +#ifndef __GLIBC__ +int pthread_attr_setaffinity_np(pthread_attr_t *attr, size_t cpusetsize, const cpu_set_t *cpuset); +#endif /* __GLIBC__ */ + #define KVM_DEV_PATH "/dev/kvm" #define KVM_MAX_VCPUS 512 diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index c3f5142b0..5ce80303d 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -20,6 +20,17 @@ #define KVM_UTIL_MIN_PFN 2 +#ifndef __GLIBC__ +int __attribute__((weak)) +pthread_attr_setaffinity_np(pthread_attr_t *__attr, + size_t __cpusetsize, + const cpu_set_t *__cpuset) +{ + errno = ENOSYS; + return -1; +} +#endif + uint32_t guest_random_seed; struct guest_random_state guest_rng; static uint32_t last_guest_seed; -- 2.47.3

4 months, 1 week

2
1
0 0

[PATCH 5/9] KVM: selftests: Prevent PAGE_SIZE redefinition on x86

by Aqib Faruqui

Prevent PAGE_SIZE redefinition warnings that can occur due to namespace pollution from included headers. Add an #ifndef directive before defining PAGE_SIZE to avoid redefinition conflicts. Signed-off-by: Aqib Faruqui <aqibaf(a)amazon.com> --- tools/testing/selftests/kvm/include/x86/processor.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index 2efb05c2f..3f93d1b4f 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -368,7 +368,9 @@ static inline unsigned int x86_model(unsigned int eax) #define PHYSICAL_PAGE_MASK GENMASK_ULL(51, 12) #define PAGE_SHIFT 12 +#ifndef PAGE_SIZE #define PAGE_SIZE (1ULL << PAGE_SHIFT) +#endif #define PAGE_MASK (~(PAGE_SIZE-1) & PHYSICAL_PAGE_MASK) #define HUGEPAGE_SHIFT(x) (PAGE_SHIFT + (((x) - 1) * 9)) -- 2.47.3

4 months, 1 week

2
1
0 0

[PATCH bpf-next v5 0/2] Fix bpf_strnstr len error

by Rong Tao

From: Rong Tao <rongtao(a)cestc.cn> Fix bpf_strnstr() wrong 'len' parameter, bpf_strnstr("open", "open", 4) should return 0 instead of -ENOENT. And fix a more general case when s2 is a suffix of the first len characters of s1. Rong Tao (2): bpf/helpers: bpf_strnstr: Exact match length selftests/bpf: Add tests for bpf_strnstr kernel/bpf/helpers.c | 9 ++++++++- .../testing/selftests/bpf/progs/string_kfuncs_success.c | 8 ++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) --- v5: Add more commit message and change the patch 0001's comment; v4: Add comment and more selftests; https://lore.kernel.org/all/tencent_CBD40091C14056E8298BE3725B65EE156405@qq… v3: Fix selftests/bpf error in v2, sorry about that; https://lore.kernel.org/lkml/tencent_69BD268FBA201219240B51661D5E96A8D80A@q… v2: Follow Andrii Nakryiko's advise, fix the 'wrong fix'; https://lore.kernel.org/lkml/tencent_DF4DA83EEBFB9246E5E3357BB40911CCA005@q… v1: https://lore.kernel.org/lkml/tencent_65E5988AD52BEC280D22964189505CD6ED06@q… -- 2.51.0

4 months, 1 week

2
1
0 0

[PATCH bpf-next v5 1/2] bpf/helpers: bpf_strnstr: Exact match length

by Rong Tao

From: Rong Tao <rongtao(a)cestc.cn> strnstr should not treat the ending '\0' of s2 as a matching character if the parameter 'len' equal to s2 string length, for example: 1. bpf_strnstr("openat", "open", 4) = -ENOENT 2. bpf_strnstr("openat", "open", 5) = 0 This patch makes (1) return 0, fix just the `len == strlen(s2)` case. And fix a more general case when s2 is a suffix of the first len characters of s1. Fixes: e91370550f1f ("bpf: Add kfuncs for read-only string operations") Signed-off-by: Rong Tao <rongtao(a)cestc.cn> --- kernel/bpf/helpers.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 401b4932cc49..91ad124844ae 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -3672,10 +3672,17 @@ __bpf_kfunc int bpf_strnstr(const char *s1__ign, const char *s2__ign, size_t len guard(pagefault)(); for (i = 0; i < XATTR_SIZE_MAX; i++) { - for (j = 0; i + j < len && j < XATTR_SIZE_MAX; j++) { + for (j = 0; i + j <= len && j < XATTR_SIZE_MAX; j++) { __get_kernel_nofault(&c2, s2__ign + j, char, err_out); if (c2 == '\0') return i; + /** + * We allow reading an extra byte from s2 (note the + * `i + j <= len` above) to cover the case when s2 is + * a suffix of the first len chars of s1. + */ + if (i + j == len) + break; __get_kernel_nofault(&c1, s1__ign + j, char, err_out); if (c1 == '\0') return -ENOENT; -- 2.51.0

4 months, 1 week

2
1
0 0

[PATCH bpf-next v5 2/2] selftests/bpf: Add tests for bpf_strnstr

by Rong Tao

From: Rong Tao <rongtao(a)cestc.cn> Add tests for bpf_strnstr(): bpf_strnstr("", "", 0) = 0 bpf_strnstr("hello world", "hello", 5) = 0 bpf_strnstr(str, "hello", 4) = -ENOENT bpf_strnstr("", "a", 0) = -ENOENT Signed-off-by: Rong Tao <rongtao(a)cestc.cn> --- tools/testing/selftests/bpf/progs/string_kfuncs_success.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c index 46697f381878..a47690174e0e 100644 --- a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c +++ b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c @@ -30,8 +30,12 @@ __test(2) int test_strcspn(void *ctx) { return bpf_strcspn(str, "lo"); } __test(6) int test_strstr_found(void *ctx) { return bpf_strstr(str, "world"); } __test(-ENOENT) int test_strstr_notfound(void *ctx) { return bpf_strstr(str, "hi"); } __test(0) int test_strstr_empty(void *ctx) { return bpf_strstr(str, ""); } -__test(0) int test_strnstr_found(void *ctx) { return bpf_strnstr(str, "hello", 6); } -__test(-ENOENT) int test_strnstr_notfound(void *ctx) { return bpf_strnstr(str, "hi", 10); } +__test(0) int test_strnstr_found1(void *ctx) { return bpf_strnstr("", "", 0); } +__test(0) int test_strnstr_found2(void *ctx) { return bpf_strnstr(str, "hello", 5); } +__test(0) int test_strnstr_found3(void *ctx) { return bpf_strnstr(str, "hello", 6); } +__test(-ENOENT) int test_strnstr_notfound1(void *ctx) { return bpf_strnstr(str, "hi", 10); } +__test(-ENOENT) int test_strnstr_notfound2(void *ctx) { return bpf_strnstr(str, "hello", 4); } +__test(-ENOENT) int test_strnstr_notfound3(void *ctx) { return bpf_strnstr("", "a", 0); } __test(0) int test_strnstr_empty(void *ctx) { return bpf_strnstr(str, "", 1); } char _license[] SEC("license") = "GPL"; -- 2.51.0

4 months, 1 week

1
0
0 0

[PATCH v8 1/1] selftests/mm/uffd: Refactor non-composite global vars into struct

by Ujwal Kundur

Refactor macros and non-composite global variable definitions into a struct that is defined at the start of a test and is passed around instead of relying on global vars. Signed-off-by: Ujwal Kundur <ujwal.kundur(a)gmail.com> Acked-by: Peter Xu <peterx(a)redhat.com> Reviewed-by: Brendan Jackman <jackmanb(a)google.com> --- v7: https://lore.kernel.org/all/20250817065211.855-1-ujwal.kundur@gmail.com/ Changes since v7: - rebase on mm-new again after cd79a1d9b08a was dropped - address minor formatting nits (David) Changes since v6: - rebased on 6.17-rc1 changes (cd79a1d9b08a) - removes unused args and adds the __unused attribute; since change is cosmetic-only, carry forward Acked-by tag - verified output remains unchanged using virtme-ng Changes since v5: - ensure uffd_global_test_opts_t instances are initialized - verified output remains unchanged using virtme-ng Changes since v4: - define gopts as global within uffd-stress.c to retain existing sigalrm handler logic Changes since v3: - more formatting fixes Changes since v2: - redo patch on mm-new branch Changes since v1: - indentation fixes - squash into single patch to assist bisections tools/testing/selftests/mm/uffd-common.c | 271 ++++----- tools/testing/selftests/mm/uffd-common.h | 78 +-- tools/testing/selftests/mm/uffd-stress.c | 228 ++++---- tools/testing/selftests/mm/uffd-unit-tests.c | 559 ++++++++++--------- tools/testing/selftests/mm/uffd-wp-mremap.c | 20 +- 5 files changed, 615 insertions(+), 541 deletions(-) diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c index a37088a23ffe..994fe8c03923 100644 --- a/tools/testing/selftests/mm/uffd-common.c +++ b/tools/testing/selftests/mm/uffd-common.c @@ -7,18 +7,29 @@ #include "uffd-common.h" -#define BASE_PMD_ADDR ((void *)(1UL << 30)) - -volatile bool test_uffdio_copy_eexist = true; -unsigned long nr_parallel, nr_pages, nr_pages_per_cpu, page_size; -char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap; -int uffd = -1, uffd_flags, finished, *pipefd, test_type; -bool map_shared; -bool test_uffdio_wp = true; -unsigned long long *count_verify; uffd_test_ops_t *uffd_test_ops; uffd_test_case_ops_t *uffd_test_case_ops; -atomic_bool ready_for_fork; + +#define BASE_PMD_ADDR ((void *)(1UL << 30)) + +/* pthread_mutex_t starts at page offset 0 */ +pthread_mutex_t *area_mutex(char *area, unsigned long nr, uffd_global_test_opts_t *gopts) +{ + return (pthread_mutex_t *) (area + nr * gopts->page_size); +} + +/* + * count is placed in the page after pthread_mutex_t naturally aligned + * to avoid non alignment faults on non-x86 archs. + */ +volatile unsigned long long *area_count(char *area, unsigned long nr, + uffd_global_test_opts_t *gopts) +{ + return (volatile unsigned long long *) + ((unsigned long)(area + nr * gopts->page_size + + sizeof(pthread_mutex_t) + sizeof(unsigned long long) - 1) & + ~(unsigned long)(sizeof(unsigned long long) - 1)); +} static int uffd_mem_fd_create(off_t mem_size, bool hugetlb) { @@ -40,15 +51,15 @@ static int uffd_mem_fd_create(off_t mem_size, bool hugetlb) return mem_fd; } -static void anon_release_pages(char *rel_area) +static void anon_release_pages(uffd_global_test_opts_t *gopts, char *rel_area) { - if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED)) + if (madvise(rel_area, gopts->nr_pages * gopts->page_size, MADV_DONTNEED)) err("madvise(MADV_DONTNEED) failed"); } -static int anon_allocate_area(void **alloc_area, bool is_src) +static int anon_allocate_area(uffd_global_test_opts_t *gopts, void **alloc_area, bool is_src) { - *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, + *alloc_area = mmap(NULL, gopts->nr_pages * gopts->page_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (*alloc_area == MAP_FAILED) { *alloc_area = NULL; @@ -57,31 +68,32 @@ static int anon_allocate_area(void **alloc_area, bool is_src) return 0; } -static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset) +static void noop_alias_mapping(uffd_global_test_opts_t *gopts, __u64 *start, + size_t len, unsigned long offset) { } -static void hugetlb_release_pages(char *rel_area) +static void hugetlb_release_pages(uffd_global_test_opts_t *gopts, char *rel_area) { - if (!map_shared) { - if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED)) + if (!gopts->map_shared) { + if (madvise(rel_area, gopts->nr_pages * gopts->page_size, MADV_DONTNEED)) err("madvise(MADV_DONTNEED) failed"); } else { - if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE)) + if (madvise(rel_area, gopts->nr_pages * gopts->page_size, MADV_REMOVE)) err("madvise(MADV_REMOVE) failed"); } } -static int hugetlb_allocate_area(void **alloc_area, bool is_src) +static int hugetlb_allocate_area(uffd_global_test_opts_t *gopts, void **alloc_area, bool is_src) { - off_t size = nr_pages * page_size; + off_t size = gopts->nr_pages * gopts->page_size; off_t offset = is_src ? 0 : size; void *area_alias = NULL; char **alloc_area_alias; int mem_fd = uffd_mem_fd_create(size * 2, true); *alloc_area = mmap(NULL, size, PROT_READ | PROT_WRITE, - (map_shared ? MAP_SHARED : MAP_PRIVATE) | + (gopts->map_shared ? MAP_SHARED : MAP_PRIVATE) | (is_src ? 0 : MAP_NORESERVE), mem_fd, offset); if (*alloc_area == MAP_FAILED) { @@ -89,7 +101,7 @@ static int hugetlb_allocate_area(void **alloc_area, bool is_src) return -errno; } - if (map_shared) { + if (gopts->map_shared) { area_alias = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, mem_fd, offset); if (area_alias == MAP_FAILED) @@ -97,9 +109,9 @@ static int hugetlb_allocate_area(void **alloc_area, bool is_src) } if (is_src) { - alloc_area_alias = &area_src_alias; + alloc_area_alias = &gopts->area_src_alias; } else { - alloc_area_alias = &area_dst_alias; + alloc_area_alias = &gopts->area_dst_alias; } if (area_alias) *alloc_area_alias = area_alias; @@ -108,24 +120,25 @@ static int hugetlb_allocate_area(void **alloc_area, bool is_src) return 0; } -static void hugetlb_alias_mapping(__u64 *start, size_t len, unsigned long offset) +static void hugetlb_alias_mapping(uffd_global_test_opts_t *gopts, __u64 *start, + size_t len, unsigned long offset) { - if (!map_shared) + if (!gopts->map_shared) return; - *start = (unsigned long) area_dst_alias + offset; + *start = (unsigned long) gopts->area_dst_alias + offset; } -static void shmem_release_pages(char *rel_area) +static void shmem_release_pages(uffd_global_test_opts_t *gopts, char *rel_area) { - if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE)) + if (madvise(rel_area, gopts->nr_pages * gopts->page_size, MADV_REMOVE)) err("madvise(MADV_REMOVE) failed"); } -static int shmem_allocate_area(void **alloc_area, bool is_src) +static int shmem_allocate_area(uffd_global_test_opts_t *gopts, void **alloc_area, bool is_src) { void *area_alias = NULL; - size_t bytes = nr_pages * page_size, hpage_size = read_pmd_pagesize(); + size_t bytes = gopts->nr_pages * gopts->page_size, hpage_size = read_pmd_pagesize(); unsigned long offset = is_src ? 0 : bytes; char *p = NULL, *p_alias = NULL; int mem_fd = uffd_mem_fd_create(bytes * 2, false); @@ -159,22 +172,23 @@ static int shmem_allocate_area(void **alloc_area, bool is_src) err("mmap of anonymous memory failed at %p", p_alias); if (is_src) - area_src_alias = area_alias; + gopts->area_src_alias = area_alias; else - area_dst_alias = area_alias; + gopts->area_dst_alias = area_alias; close(mem_fd); return 0; } -static void shmem_alias_mapping(__u64 *start, size_t len, unsigned long offset) +static void shmem_alias_mapping(uffd_global_test_opts_t *gopts, __u64 *start, + size_t len, unsigned long offset) { - *start = (unsigned long)area_dst_alias + offset; + *start = (unsigned long)gopts->area_dst_alias + offset; } -static void shmem_check_pmd_mapping(void *p, int expect_nr_hpages) +static void shmem_check_pmd_mapping(uffd_global_test_opts_t *gopts, void *p, int expect_nr_hpages) { - if (!check_huge_shmem(area_dst_alias, expect_nr_hpages, + if (!check_huge_shmem(gopts->area_dst_alias, expect_nr_hpages, read_pmd_pagesize())) err("Did not find expected %d number of hugepages", expect_nr_hpages); @@ -234,18 +248,18 @@ void uffd_stats_report(struct uffd_args *args, int n_cpus) printf("\n"); } -int userfaultfd_open(uint64_t *features) +int userfaultfd_open(uffd_global_test_opts_t *gopts, uint64_t *features) { struct uffdio_api uffdio_api; - uffd = uffd_open(UFFD_FLAGS); - if (uffd < 0) + gopts->uffd = uffd_open(UFFD_FLAGS); + if (gopts->uffd < 0) return -1; - uffd_flags = fcntl(uffd, F_GETFD, NULL); + gopts->uffd_flags = fcntl(gopts->uffd, F_GETFD, NULL); uffdio_api.api = UFFD_API; uffdio_api.features = *features; - if (ioctl(uffd, UFFDIO_API, &uffdio_api)) + if (ioctl(gopts->uffd, UFFDIO_API, &uffdio_api)) /* Probably lack of CAP_PTRACE? */ return -1; if (uffdio_api.api != UFFD_API) @@ -255,59 +269,63 @@ int userfaultfd_open(uint64_t *features) return 0; } -static inline void munmap_area(void **area) +static inline void munmap_area(uffd_global_test_opts_t *gopts, void **area) { if (*area) - if (munmap(*area, nr_pages * page_size)) + if (munmap(*area, gopts->nr_pages * gopts->page_size)) err("munmap"); *area = NULL; } -void uffd_test_ctx_clear(void) +void uffd_test_ctx_clear(uffd_global_test_opts_t *gopts) { size_t i; - if (pipefd) { - for (i = 0; i < nr_parallel * 2; ++i) { - if (close(pipefd[i])) + if (gopts->pipefd) { + for (i = 0; i < gopts->nr_parallel * 2; ++i) { + if (close(gopts->pipefd[i])) err("close pipefd"); } - free(pipefd); - pipefd = NULL; + free(gopts->pipefd); + gopts->pipefd = NULL; } - if (count_verify) { - free(count_verify); - count_verify = NULL; + if (gopts->count_verify) { + free(gopts->count_verify); + gopts->count_verify = NULL; } - if (uffd != -1) { - if (close(uffd)) + if (gopts->uffd != -1) { + if (close(gopts->uffd)) err("close uffd"); - uffd = -1; + gopts->uffd = -1; } - munmap_area((void **)&area_src); - munmap_area((void **)&area_src_alias); - munmap_area((void **)&area_dst); - munmap_area((void **)&area_dst_alias); - munmap_area((void **)&area_remap); + munmap_area(gopts, (void **)&gopts->area_src); + munmap_area(gopts, (void **)&gopts->area_src_alias); + munmap_area(gopts, (void **)&gopts->area_dst); + munmap_area(gopts, (void **)&gopts->area_dst_alias); + munmap_area(gopts, (void **)&gopts->area_remap); } -int uffd_test_ctx_init(uint64_t features, const char **errmsg) +int uffd_test_ctx_init(uffd_global_test_opts_t *gopts, uint64_t features, const char **errmsg) { unsigned long nr, cpu; int ret; + gopts->area_src_alias = NULL; + gopts->area_dst_alias = NULL; + gopts->area_remap = NULL; + if (uffd_test_case_ops && uffd_test_case_ops->pre_alloc) { - ret = uffd_test_case_ops->pre_alloc(errmsg); + ret = uffd_test_case_ops->pre_alloc(gopts, errmsg); if (ret) return ret; } - ret = uffd_test_ops->allocate_area((void **)&area_src, true); - ret |= uffd_test_ops->allocate_area((void **)&area_dst, false); + ret = uffd_test_ops->allocate_area(gopts, (void **) &gopts->area_src, true); + ret |= uffd_test_ops->allocate_area(gopts, (void **) &gopts->area_dst, false); if (ret) { if (errmsg) *errmsg = "memory allocation failed"; @@ -315,26 +333,26 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg) } if (uffd_test_case_ops && uffd_test_case_ops->post_alloc) { - ret = uffd_test_case_ops->post_alloc(errmsg); + ret = uffd_test_case_ops->post_alloc(gopts, errmsg); if (ret) return ret; } - ret = userfaultfd_open(&features); + ret = userfaultfd_open(gopts, &features); if (ret) { if (errmsg) *errmsg = "possible lack of privilege"; return ret; } - count_verify = malloc(nr_pages * sizeof(unsigned long long)); - if (!count_verify) + gopts->count_verify = malloc(gopts->nr_pages * sizeof(unsigned long long)); + if (!gopts->count_verify) err("count_verify"); - for (nr = 0; nr < nr_pages; nr++) { - *area_mutex(area_src, nr) = + for (nr = 0; nr < gopts->nr_pages; nr++) { + *area_mutex(gopts->area_src, nr, gopts) = (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER; - count_verify[nr] = *area_count(area_src, nr) = 1; + gopts->count_verify[nr] = *area_count(gopts->area_src, nr, gopts) = 1; /* * In the transition between 255 to 256, powerpc will * read out of order in my_bcmp and see both bytes as @@ -342,7 +360,7 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg) * after the count, to avoid my_bcmp to trigger false * positives. */ - *(area_count(area_src, nr) + 1) = 1; + *(area_count(gopts->area_src, nr, gopts) + 1) = 1; } /* @@ -363,13 +381,13 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg) * proactively split the thp and drop any accidentally initialized * pages within area_dst. */ - uffd_test_ops->release_pages(area_dst); + uffd_test_ops->release_pages(gopts, gopts->area_dst); - pipefd = malloc(sizeof(int) * nr_parallel * 2); - if (!pipefd) + gopts->pipefd = malloc(sizeof(int) * gopts->nr_parallel * 2); + if (!gopts->pipefd) err("pipefd"); - for (cpu = 0; cpu < nr_parallel; cpu++) - if (pipe2(&pipefd[cpu * 2], O_CLOEXEC | O_NONBLOCK)) + for (cpu = 0; cpu < gopts->nr_parallel; cpu++) + if (pipe2(&gopts->pipefd[cpu * 2], O_CLOEXEC | O_NONBLOCK)) err("pipe"); return 0; @@ -416,9 +434,9 @@ static void continue_range(int ufd, __u64 start, __u64 len, bool wp) ret, (int64_t) req.mapped); } -int uffd_read_msg(int ufd, struct uffd_msg *msg) +int uffd_read_msg(uffd_global_test_opts_t *gopts, struct uffd_msg *msg) { - int ret = read(uffd, msg, sizeof(*msg)); + int ret = read(gopts->uffd, msg, sizeof(*msg)); if (ret != sizeof(*msg)) { if (ret < 0) { @@ -433,7 +451,8 @@ int uffd_read_msg(int ufd, struct uffd_msg *msg) return 0; } -void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args) +void uffd_handle_page_fault(uffd_global_test_opts_t *gopts, struct uffd_msg *msg, + struct uffd_args *args) { unsigned long offset; @@ -442,7 +461,7 @@ void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args) if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP) { /* Write protect page faults */ - wp_range(uffd, msg->arg.pagefault.address, page_size, false); + wp_range(gopts->uffd, msg->arg.pagefault.address, gopts->page_size, false); args->wp_faults++; } else if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) { uint8_t *area; @@ -460,12 +479,12 @@ void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args) * (UFFD-registered). */ - area = (uint8_t *)(area_dst + - ((char *)msg->arg.pagefault.address - - area_dst_alias)); - for (b = 0; b < page_size; ++b) + area = (uint8_t *)(gopts->area_dst + + ((char *)msg->arg.pagefault.address - + gopts->area_dst_alias)); + for (b = 0; b < gopts->page_size; ++b) area[b] = ~area[b]; - continue_range(uffd, msg->arg.pagefault.address, page_size, + continue_range(gopts->uffd, msg->arg.pagefault.address, gopts->page_size, args->apply_wp); args->minor_faults++; } else { @@ -493,10 +512,10 @@ void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args) if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) err("unexpected write fault"); - offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst; - offset &= ~(page_size-1); + offset = (char *)(unsigned long)msg->arg.pagefault.address - gopts->area_dst; + offset &= ~(gopts->page_size-1); - if (copy_page(uffd, offset, args->apply_wp)) + if (copy_page(gopts, offset, args->apply_wp)) args->missing_faults++; } } @@ -504,6 +523,7 @@ void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args) void *uffd_poll_thread(void *arg) { struct uffd_args *args = (struct uffd_args *)arg; + uffd_global_test_opts_t *gopts = args->gopts; unsigned long cpu = args->cpu; struct pollfd pollfd[2]; struct uffd_msg msg; @@ -514,12 +534,12 @@ void *uffd_poll_thread(void *arg) if (!args->handle_fault) args->handle_fault = uffd_handle_page_fault; - pollfd[0].fd = uffd; + pollfd[0].fd = gopts->uffd; pollfd[0].events = POLLIN; - pollfd[1].fd = pipefd[cpu*2]; + pollfd[1].fd = gopts->pipefd[cpu*2]; pollfd[1].events = POLLIN; - ready_for_fork = true; + gopts->ready_for_fork = true; for (;;) { ret = poll(pollfd, 2, -1); @@ -537,30 +557,30 @@ void *uffd_poll_thread(void *arg) } if (!(pollfd[0].revents & POLLIN)) err("pollfd[0].revents %d", pollfd[0].revents); - if (uffd_read_msg(uffd, &msg)) + if (uffd_read_msg(gopts, &msg)) continue; switch (msg.event) { default: err("unexpected msg event %u\n", msg.event); break; case UFFD_EVENT_PAGEFAULT: - args->handle_fault(&msg, args); + args->handle_fault(gopts, &msg, args); break; case UFFD_EVENT_FORK: - close(uffd); - uffd = msg.arg.fork.ufd; - pollfd[0].fd = uffd; + close(gopts->uffd); + gopts->uffd = msg.arg.fork.ufd; + pollfd[0].fd = gopts->uffd; break; case UFFD_EVENT_REMOVE: uffd_reg.range.start = msg.arg.remove.start; uffd_reg.range.len = msg.arg.remove.end - msg.arg.remove.start; - if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range)) + if (ioctl(gopts->uffd, UFFDIO_UNREGISTER, &uffd_reg.range)) err("remove failure"); break; case UFFD_EVENT_REMAP: - area_remap = area_dst; /* save for later unmap */ - area_dst = (char *)(unsigned long)msg.arg.remap.to; + gopts->area_remap = gopts->area_dst; /* save for later unmap */ + gopts->area_dst = (char *)(unsigned long)msg.arg.remap.to; break; } } @@ -568,17 +588,18 @@ void *uffd_poll_thread(void *arg) return NULL; } -static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy, +static void retry_copy_page(uffd_global_test_opts_t *gopts, struct uffdio_copy *uffdio_copy, unsigned long offset) { - uffd_test_ops->alias_mapping(&uffdio_copy->dst, + uffd_test_ops->alias_mapping(gopts, + &uffdio_copy->dst, uffdio_copy->len, offset); - if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) { + if (ioctl(gopts->uffd, UFFDIO_COPY, uffdio_copy)) { /* real retval in ufdio_copy.copy */ if (uffdio_copy->copy != -EEXIST) err("UFFDIO_COPY retry error: %"PRId64, - (int64_t)uffdio_copy->copy); + (int64_t)uffdio_copy->copy); } else { err("UFFDIO_COPY retry unexpected: %"PRId64, (int64_t)uffdio_copy->copy); @@ -597,60 +618,60 @@ static void wake_range(int ufd, unsigned long addr, unsigned long len) addr), exit(1); } -int __copy_page(int ufd, unsigned long offset, bool retry, bool wp) +int __copy_page(uffd_global_test_opts_t *gopts, unsigned long offset, bool retry, bool wp) { struct uffdio_copy uffdio_copy; - if (offset >= nr_pages * page_size) + if (offset >= gopts->nr_pages * gopts->page_size) err("unexpected offset %lu\n", offset); - uffdio_copy.dst = (unsigned long) area_dst + offset; - uffdio_copy.src = (unsigned long) area_src + offset; - uffdio_copy.len = page_size; + uffdio_copy.dst = (unsigned long) gopts->area_dst + offset; + uffdio_copy.src = (unsigned long) gopts->area_src + offset; + uffdio_copy.len = gopts->page_size; if (wp) uffdio_copy.mode = UFFDIO_COPY_MODE_WP; else uffdio_copy.mode = 0; uffdio_copy.copy = 0; - if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) { + if (ioctl(gopts->uffd, UFFDIO_COPY, &uffdio_copy)) { /* real retval in ufdio_copy.copy */ if (uffdio_copy.copy != -EEXIST) err("UFFDIO_COPY error: %"PRId64, (int64_t)uffdio_copy.copy); - wake_range(ufd, uffdio_copy.dst, page_size); - } else if (uffdio_copy.copy != page_size) { + wake_range(gopts->uffd, uffdio_copy.dst, gopts->page_size); + } else if (uffdio_copy.copy != gopts->page_size) { err("UFFDIO_COPY error: %"PRId64, (int64_t)uffdio_copy.copy); } else { - if (test_uffdio_copy_eexist && retry) { - test_uffdio_copy_eexist = false; - retry_copy_page(ufd, &uffdio_copy, offset); + if (gopts->test_uffdio_copy_eexist && retry) { + gopts->test_uffdio_copy_eexist = false; + retry_copy_page(gopts, &uffdio_copy, offset); } return 1; } return 0; } -int copy_page(int ufd, unsigned long offset, bool wp) +int copy_page(uffd_global_test_opts_t *gopts, unsigned long offset, bool wp) { - return __copy_page(ufd, offset, false, wp); + return __copy_page(gopts, offset, false, wp); } -int move_page(int ufd, unsigned long offset, unsigned long len) +int move_page(uffd_global_test_opts_t *gopts, unsigned long offset, unsigned long len) { struct uffdio_move uffdio_move; - if (offset + len > nr_pages * page_size) + if (offset + len > gopts->nr_pages * gopts->page_size) err("unexpected offset %lu and length %lu\n", offset, len); - uffdio_move.dst = (unsigned long) area_dst + offset; - uffdio_move.src = (unsigned long) area_src + offset; + uffdio_move.dst = (unsigned long) gopts->area_dst + offset; + uffdio_move.src = (unsigned long) gopts->area_src + offset; uffdio_move.len = len; uffdio_move.mode = UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES; uffdio_move.move = 0; - if (ioctl(ufd, UFFDIO_MOVE, &uffdio_move)) { + if (ioctl(gopts->uffd, UFFDIO_MOVE, &uffdio_move)) { /* real retval in uffdio_move.move */ if (uffdio_move.move != -EEXIST) err("UFFDIO_MOVE error: %"PRId64, (int64_t)uffdio_move.move); - wake_range(ufd, uffdio_move.dst, len); + wake_range(gopts->uffd, uffdio_move.dst, len); } else if (uffdio_move.move != len) { err("UFFDIO_MOVE error: %"PRId64, (int64_t)uffdio_move.move); } else diff --git a/tools/testing/selftests/mm/uffd-common.h b/tools/testing/selftests/mm/uffd-common.h index 7700cbfa3975..37d3ca55905f 100644 --- a/tools/testing/selftests/mm/uffd-common.h +++ b/tools/testing/selftests/mm/uffd-common.h @@ -56,20 +56,17 @@ #define err(fmt, ...) errexit(1, fmt, ##__VA_ARGS__) -/* pthread_mutex_t starts at page offset 0 */ -#define area_mutex(___area, ___nr) \ - ((pthread_mutex_t *) ((___area) + (___nr)*page_size)) -/* - * count is placed in the page after pthread_mutex_t naturally aligned - * to avoid non alignment faults on non-x86 archs. - */ -#define area_count(___area, ___nr) \ - ((volatile unsigned long long *) ((unsigned long) \ - ((___area) + (___nr)*page_size + \ - sizeof(pthread_mutex_t) + \ - sizeof(unsigned long long) - 1) & \ - ~(unsigned long)(sizeof(unsigned long long) \ - - 1))) +struct uffd_global_test_opts { + unsigned long nr_parallel, nr_pages, nr_pages_per_cpu, page_size; + char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap; + int uffd, uffd_flags, finished, *pipefd, test_type; + bool map_shared; + bool test_uffdio_wp; + unsigned long long *count_verify; + volatile bool test_uffdio_copy_eexist; + atomic_bool ready_for_fork; +}; +typedef struct uffd_global_test_opts uffd_global_test_opts_t; /* Userfaultfd test statistics */ struct uffd_args { @@ -79,50 +76,55 @@ struct uffd_args { unsigned long missing_faults; unsigned long wp_faults; unsigned long minor_faults; + struct uffd_global_test_opts *gopts; /* A custom fault handler; defaults to uffd_handle_page_fault. */ - void (*handle_fault)(struct uffd_msg *msg, struct uffd_args *args); + void (*handle_fault)(struct uffd_global_test_opts *gopts, + struct uffd_msg *msg, + struct uffd_args *args); }; struct uffd_test_ops { - int (*allocate_area)(void **alloc_area, bool is_src); - void (*release_pages)(char *rel_area); - void (*alias_mapping)(__u64 *start, size_t len, unsigned long offset); - void (*check_pmd_mapping)(void *p, int expect_nr_hpages); + int (*allocate_area)(uffd_global_test_opts_t *gopts, void **alloc_area, bool is_src); + void (*release_pages)(uffd_global_test_opts_t *gopts, char *rel_area); + void (*alias_mapping)(uffd_global_test_opts_t *gopts, + __u64 *start, + size_t len, + unsigned long offset); + void (*check_pmd_mapping)(uffd_global_test_opts_t *gopts, void *p, int expect_nr_hpages); }; typedef struct uffd_test_ops uffd_test_ops_t; struct uffd_test_case_ops { - int (*pre_alloc)(const char **errmsg); - int (*post_alloc)(const char **errmsg); + int (*pre_alloc)(uffd_global_test_opts_t *gopts, const char **errmsg); + int (*post_alloc)(uffd_global_test_opts_t *gopts, const char **errmsg); }; typedef struct uffd_test_case_ops uffd_test_case_ops_t; -extern unsigned long nr_parallel, nr_pages, nr_pages_per_cpu, page_size; -extern char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap; -extern int uffd, uffd_flags, finished, *pipefd, test_type; -extern bool map_shared; -extern bool test_uffdio_wp; -extern unsigned long long *count_verify; -extern volatile bool test_uffdio_copy_eexist; -extern atomic_bool ready_for_fork; - +extern uffd_global_test_opts_t *uffd_gtest_opts; extern uffd_test_ops_t anon_uffd_test_ops; extern uffd_test_ops_t shmem_uffd_test_ops; extern uffd_test_ops_t hugetlb_uffd_test_ops; extern uffd_test_ops_t *uffd_test_ops; extern uffd_test_case_ops_t *uffd_test_case_ops; +pthread_mutex_t *area_mutex(char *area, unsigned long nr, uffd_global_test_opts_t *gopts); +volatile unsigned long long *area_count(char *area, + unsigned long nr, + uffd_global_test_opts_t *gopts); + void uffd_stats_report(struct uffd_args *args, int n_cpus); -int uffd_test_ctx_init(uint64_t features, const char **errmsg); -void uffd_test_ctx_clear(void); -int userfaultfd_open(uint64_t *features); -int uffd_read_msg(int ufd, struct uffd_msg *msg); +int uffd_test_ctx_init(uffd_global_test_opts_t *gopts, uint64_t features, const char **errmsg); +void uffd_test_ctx_clear(uffd_global_test_opts_t *gopts); +int userfaultfd_open(uffd_global_test_opts_t *gopts, uint64_t *features); +int uffd_read_msg(uffd_global_test_opts_t *gopts, struct uffd_msg *msg); void wp_range(int ufd, __u64 start, __u64 len, bool wp); -void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args); -int __copy_page(int ufd, unsigned long offset, bool retry, bool wp); -int copy_page(int ufd, unsigned long offset, bool wp); -int move_page(int ufd, unsigned long offset, unsigned long len); +void uffd_handle_page_fault(uffd_global_test_opts_t *gopts, + struct uffd_msg *msg, + struct uffd_args *args); +int __copy_page(uffd_global_test_opts_t *gopts, unsigned long offset, bool retry, bool wp); +int copy_page(uffd_global_test_opts_t *gopts, unsigned long offset, bool wp); +int move_page(uffd_global_test_opts_t *gopts, unsigned long offset, unsigned long len); void *uffd_poll_thread(void *arg); int uffd_open_dev(unsigned int flags); diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c index 991db8f90727..10154800a73f 100644 --- a/tools/testing/selftests/mm/uffd-stress.c +++ b/tools/testing/selftests/mm/uffd-stress.c @@ -44,6 +44,12 @@ uint64_t features; #define BOUNCE_VERIFY (1<<2) #define BOUNCE_POLL (1<<3) static int bounces; +/* defined globally for this particular test as the sigalrm handler + * depends on test_uffdio_*_eexist. + * XXX: define gopts in main() when we figure out a way to deal with + * test_uffdio_*_eexist. + */ +static uffd_global_test_opts_t *gopts; /* exercise the test_uffdio_*_eexist every ALARM_INTERVAL_SECS */ #define ALARM_INTERVAL_SECS 10 @@ -76,54 +82,58 @@ static void usage(void) exit(1); } -static void uffd_stats_reset(struct uffd_args *args, unsigned long n_cpus) +static void uffd_stats_reset(uffd_global_test_opts_t *gopts, struct uffd_args *args, + unsigned long n_cpus) { int i; for (i = 0; i < n_cpus; i++) { args[i].cpu = i; - args[i].apply_wp = test_uffdio_wp; + args[i].apply_wp = gopts->test_uffdio_wp; args[i].missing_faults = 0; args[i].wp_faults = 0; args[i].minor_faults = 0; + args[i].gopts = gopts; } } static void *locking_thread(void *arg) { - unsigned long cpu = (unsigned long) arg; + struct uffd_args *args = (struct uffd_args *) arg; + uffd_global_test_opts_t *gopts = args->gopts; + unsigned long cpu = (unsigned long) args->cpu; unsigned long page_nr; unsigned long long count; if (!(bounces & BOUNCE_RANDOM)) { page_nr = -bounces; if (!(bounces & BOUNCE_RACINGFAULTS)) - page_nr += cpu * nr_pages_per_cpu; + page_nr += cpu * gopts->nr_pages_per_cpu; } - while (!finished) { + while (!gopts->finished) { if (bounces & BOUNCE_RANDOM) { if (getrandom(&page_nr, sizeof(page_nr), 0) != sizeof(page_nr)) err("getrandom failed"); } else page_nr += 1; - page_nr %= nr_pages; - pthread_mutex_lock(area_mutex(area_dst, page_nr)); - count = *area_count(area_dst, page_nr); - if (count != count_verify[page_nr]) + page_nr %= gopts->nr_pages; + pthread_mutex_lock(area_mutex(gopts->area_dst, page_nr, gopts)); + count = *area_count(gopts->area_dst, page_nr, gopts); + if (count != gopts->count_verify[page_nr]) err("page_nr %lu memory corruption %llu %llu", - page_nr, count, count_verify[page_nr]); + page_nr, count, gopts->count_verify[page_nr]); count++; - *area_count(area_dst, page_nr) = count_verify[page_nr] = count; - pthread_mutex_unlock(area_mutex(area_dst, page_nr)); + *area_count(gopts->area_dst, page_nr, gopts) = gopts->count_verify[page_nr] = count; + pthread_mutex_unlock(area_mutex(gopts->area_dst, page_nr, gopts)); } return NULL; } -static int copy_page_retry(int ufd, unsigned long offset) +static int copy_page_retry(uffd_global_test_opts_t *gopts, unsigned long offset) { - return __copy_page(ufd, offset, true, test_uffdio_wp); + return __copy_page(gopts, offset, true, gopts->test_uffdio_wp); } pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER; @@ -131,15 +141,16 @@ pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER; static void *uffd_read_thread(void *arg) { struct uffd_args *args = (struct uffd_args *)arg; + uffd_global_test_opts_t *gopts = args->gopts; struct uffd_msg msg; pthread_mutex_unlock(&uffd_read_mutex); /* from here cancellation is ok */ for (;;) { - if (uffd_read_msg(uffd, &msg)) + if (uffd_read_msg(gopts, &msg)) continue; - uffd_handle_page_fault(&msg, args); + uffd_handle_page_fault(gopts, &msg, args); } return NULL; @@ -147,32 +158,34 @@ static void *uffd_read_thread(void *arg) static void *background_thread(void *arg) { - unsigned long cpu = (unsigned long) arg; + struct uffd_args *args = (struct uffd_args *) arg; + uffd_global_test_opts_t *gopts = args->gopts; + unsigned long cpu = (unsigned long) args->cpu; unsigned long page_nr, start_nr, mid_nr, end_nr; - start_nr = cpu * nr_pages_per_cpu; - end_nr = (cpu+1) * nr_pages_per_cpu; + start_nr = cpu * gopts->nr_pages_per_cpu; + end_nr = (cpu+1) * gopts->nr_pages_per_cpu; mid_nr = (start_nr + end_nr) / 2; /* Copy the first half of the pages */ for (page_nr = start_nr; page_nr < mid_nr; page_nr++) - copy_page_retry(uffd, page_nr * page_size); + copy_page_retry(gopts, page_nr * gopts->page_size); /* * If we need to test uffd-wp, set it up now. Then we'll have * at least the first half of the pages mapped already which * can be write-protected for testing */ - if (test_uffdio_wp) - wp_range(uffd, (unsigned long)area_dst + start_nr * page_size, - nr_pages_per_cpu * page_size, true); + if (gopts->test_uffdio_wp) + wp_range(gopts->uffd, (unsigned long)gopts->area_dst + start_nr * gopts->page_size, + gopts->nr_pages_per_cpu * gopts->page_size, true); /* * Continue the 2nd half of the page copying, handling write * protection faults if any */ for (page_nr = mid_nr; page_nr < end_nr; page_nr++) - copy_page_retry(uffd, page_nr * page_size); + copy_page_retry(gopts, page_nr * gopts->page_size); return NULL; } @@ -180,17 +193,21 @@ static void *background_thread(void *arg) static int stress(struct uffd_args *args) { unsigned long cpu; - pthread_t locking_threads[nr_parallel]; - pthread_t uffd_threads[nr_parallel]; - pthread_t background_threads[nr_parallel]; + uffd_global_test_opts_t *gopts = args->gopts; + pthread_t locking_threads[gopts->nr_parallel]; + pthread_t uffd_threads[gopts->nr_parallel]; + pthread_t background_threads[gopts->nr_parallel]; - finished = 0; - for (cpu = 0; cpu < nr_parallel; cpu++) { + gopts->finished = 0; + for (cpu = 0; cpu < gopts->nr_parallel; cpu++) { if (pthread_create(&locking_threads[cpu], &attr, - locking_thread, (void *)cpu)) + locking_thread, (void *)&args[cpu])) return 1; if (bounces & BOUNCE_POLL) { - if (pthread_create(&uffd_threads[cpu], &attr, uffd_poll_thread, &args[cpu])) + if (pthread_create(&uffd_threads[cpu], + &attr, + uffd_poll_thread, + (void *) &args[cpu])) err("uffd_poll_thread create"); } else { if (pthread_create(&uffd_threads[cpu], &attr, @@ -200,10 +217,10 @@ static int stress(struct uffd_args *args) pthread_mutex_lock(&uffd_read_mutex); } if (pthread_create(&background_threads[cpu], &attr, - background_thread, (void *)cpu)) + background_thread, (void *)&args[cpu])) return 1; } - for (cpu = 0; cpu < nr_parallel; cpu++) + for (cpu = 0; cpu < gopts->nr_parallel; cpu++) if (pthread_join(background_threads[cpu], NULL)) return 1; @@ -216,17 +233,17 @@ static int stress(struct uffd_args *args) * UFFDIO_COPY without writing zero pages into area_dst * because the background threads already completed). */ - uffd_test_ops->release_pages(area_src); + uffd_test_ops->release_pages(gopts, gopts->area_src); - finished = 1; - for (cpu = 0; cpu < nr_parallel; cpu++) + gopts->finished = 1; + for (cpu = 0; cpu < gopts->nr_parallel; cpu++) if (pthread_join(locking_threads[cpu], NULL)) return 1; - for (cpu = 0; cpu < nr_parallel; cpu++) { + for (cpu = 0; cpu < gopts->nr_parallel; cpu++) { char c; if (bounces & BOUNCE_POLL) { - if (write(pipefd[cpu*2+1], &c, 1) != 1) + if (write(gopts->pipefd[cpu*2+1], &c, 1) != 1) err("pipefd write error"); if (pthread_join(uffd_threads[cpu], (void *)&args[cpu])) @@ -242,26 +259,26 @@ static int stress(struct uffd_args *args) return 0; } -static int userfaultfd_stress(void) +static int userfaultfd_stress(uffd_global_test_opts_t *gopts) { void *area; unsigned long nr; - struct uffd_args args[nr_parallel]; - uint64_t mem_size = nr_pages * page_size; + struct uffd_args args[gopts->nr_parallel]; + uint64_t mem_size = gopts->nr_pages * gopts->page_size; int flags = 0; - memset(args, 0, sizeof(struct uffd_args) * nr_parallel); + memset(args, 0, sizeof(struct uffd_args) * gopts->nr_parallel); - if (features & UFFD_FEATURE_WP_UNPOPULATED && test_type == TEST_ANON) + if (features & UFFD_FEATURE_WP_UNPOPULATED && gopts->test_type == TEST_ANON) flags = UFFD_FEATURE_WP_UNPOPULATED; - if (uffd_test_ctx_init(flags, NULL)) + if (uffd_test_ctx_init(gopts, flags, NULL)) err("context init failed"); - if (posix_memalign(&area, page_size, page_size)) + if (posix_memalign(&area, gopts->page_size, gopts->page_size)) err("out of memory"); zeropage = area; - bzero(zeropage, page_size); + bzero(zeropage, gopts->page_size); pthread_mutex_lock(&uffd_read_mutex); @@ -284,18 +301,18 @@ static int userfaultfd_stress(void) fflush(stdout); if (bounces & BOUNCE_POLL) - fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); + fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK); else - fcntl(uffd, F_SETFL, uffd_flags & ~O_NONBLOCK); + fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags & ~O_NONBLOCK); /* register */ - if (uffd_register(uffd, area_dst, mem_size, - true, test_uffdio_wp, false)) + if (uffd_register(gopts->uffd, gopts->area_dst, mem_size, + true, gopts->test_uffdio_wp, false)) err("register failure"); - if (area_dst_alias) { - if (uffd_register(uffd, area_dst_alias, mem_size, - true, test_uffdio_wp, false)) + if (gopts->area_dst_alias) { + if (uffd_register(gopts->uffd, gopts->area_dst_alias, mem_size, + true, gopts->test_uffdio_wp, false)) err("register failure alias"); } @@ -323,87 +340,88 @@ static int userfaultfd_stress(void) * MADV_DONTNEED only after the UFFDIO_REGISTER, so it's * required to MADV_DONTNEED here. */ - uffd_test_ops->release_pages(area_dst); + uffd_test_ops->release_pages(gopts, gopts->area_dst); - uffd_stats_reset(args, nr_parallel); + uffd_stats_reset(gopts, args, gopts->nr_parallel); /* bounce pass */ if (stress(args)) { - uffd_test_ctx_clear(); + uffd_test_ctx_clear(gopts); return 1; } /* Clear all the write protections if there is any */ - if (test_uffdio_wp) - wp_range(uffd, (unsigned long)area_dst, - nr_pages * page_size, false); + if (gopts->test_uffdio_wp) + wp_range(gopts->uffd, (unsigned long)gopts->area_dst, + gopts->nr_pages * gopts->page_size, false); /* unregister */ - if (uffd_unregister(uffd, area_dst, mem_size)) + if (uffd_unregister(gopts->uffd, gopts->area_dst, mem_size)) err("unregister failure"); - if (area_dst_alias) { - if (uffd_unregister(uffd, area_dst_alias, mem_size)) + if (gopts->area_dst_alias) { + if (uffd_unregister(gopts->uffd, gopts->area_dst_alias, mem_size)) err("unregister failure alias"); } /* verification */ if (bounces & BOUNCE_VERIFY) - for (nr = 0; nr < nr_pages; nr++) - if (*area_count(area_dst, nr) != count_verify[nr]) + for (nr = 0; nr < gopts->nr_pages; nr++) + if (*area_count(gopts->area_dst, nr, gopts) != + gopts->count_verify[nr]) err("error area_count %llu %llu %lu\n", - *area_count(area_src, nr), - count_verify[nr], nr); + *area_count(gopts->area_src, nr, gopts), + gopts->count_verify[nr], nr); /* prepare next bounce */ - swap(area_src, area_dst); + swap(gopts->area_src, gopts->area_dst); - swap(area_src_alias, area_dst_alias); + swap(gopts->area_src_alias, gopts->area_dst_alias); - uffd_stats_report(args, nr_parallel); + uffd_stats_report(args, gopts->nr_parallel); } - uffd_test_ctx_clear(); + uffd_test_ctx_clear(gopts); return 0; } -static void set_test_type(const char *type) +static void set_test_type(uffd_global_test_opts_t *gopts, const char *type) { if (!strcmp(type, "anon")) { - test_type = TEST_ANON; + gopts->test_type = TEST_ANON; uffd_test_ops = &anon_uffd_test_ops; } else if (!strcmp(type, "hugetlb")) { - test_type = TEST_HUGETLB; + gopts->test_type = TEST_HUGETLB; uffd_test_ops = &hugetlb_uffd_test_ops; - map_shared = true; + gopts->map_shared = true; } else if (!strcmp(type, "hugetlb-private")) { - test_type = TEST_HUGETLB; + gopts->test_type = TEST_HUGETLB; uffd_test_ops = &hugetlb_uffd_test_ops; } else if (!strcmp(type, "shmem")) { - map_shared = true; - test_type = TEST_SHMEM; + gopts->map_shared = true; + gopts->test_type = TEST_SHMEM; uffd_test_ops = &shmem_uffd_test_ops; } else if (!strcmp(type, "shmem-private")) { - test_type = TEST_SHMEM; + gopts->test_type = TEST_SHMEM; uffd_test_ops = &shmem_uffd_test_ops; } } -static void parse_test_type_arg(const char *raw_type) +static void parse_test_type_arg(uffd_global_test_opts_t *gopts, const char *raw_type) { - set_test_type(raw_type); + set_test_type(gopts, raw_type); - if (!test_type) + if (!gopts->test_type) err("failed to parse test type argument: '%s'", raw_type); - if (test_type == TEST_HUGETLB) - page_size = default_huge_page_size(); + if (gopts->test_type == TEST_HUGETLB) + gopts->page_size = default_huge_page_size(); else - page_size = sysconf(_SC_PAGE_SIZE); + gopts->page_size = sysconf(_SC_PAGE_SIZE); - if (!page_size) + if (!gopts->page_size) err("Unable to determine page size"); - if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2 - > page_size) + if ((unsigned long) area_count(NULL, 0, gopts) + sizeof(unsigned long long) * 2 + > gopts->page_size) err("Impossible to run this test"); /* @@ -415,21 +433,21 @@ static void parse_test_type_arg(const char *raw_type) if (uffd_get_features(&features) && errno == ENOENT) ksft_exit_skip("failed to get available features (%d)\n", errno); - test_uffdio_wp = test_uffdio_wp && + gopts->test_uffdio_wp = gopts->test_uffdio_wp && (features & UFFD_FEATURE_PAGEFAULT_FLAG_WP); - if (test_type != TEST_ANON && !(features & UFFD_FEATURE_WP_HUGETLBFS_SHMEM)) - test_uffdio_wp = false; + if (gopts->test_type != TEST_ANON && !(features & UFFD_FEATURE_WP_HUGETLBFS_SHMEM)) + gopts->test_uffdio_wp = false; - close(uffd); - uffd = -1; + close(gopts->uffd); + gopts->uffd = -1; } static void sigalrm(int sig) { if (sig != SIGALRM) abort(); - test_uffdio_copy_eexist = true; + gopts->test_uffdio_copy_eexist = true; alarm(ALARM_INTERVAL_SECS); } @@ -438,6 +456,8 @@ int main(int argc, char **argv) unsigned long nr_cpus; size_t bytes; + gopts = (uffd_global_test_opts_t *) malloc(sizeof(uffd_global_test_opts_t)); + if (argc < 4) usage(); @@ -445,11 +465,11 @@ int main(int argc, char **argv) err("failed to arm SIGALRM"); alarm(ALARM_INTERVAL_SECS); - parse_test_type_arg(argv[1]); + parse_test_type_arg(gopts, argv[1]); bytes = atol(argv[2]) * 1024 * 1024; - if (test_type == TEST_HUGETLB && - get_free_hugepages() < 2 * (bytes / page_size) + 10) { + if (gopts->test_type == TEST_HUGETLB && + get_free_hugepages() < 2 * (bytes / gopts->page_size) + 10) { printf("skip: Skipping userfaultfd... not enough hugepages\n"); return KSFT_SKIP; } @@ -459,15 +479,15 @@ int main(int argc, char **argv) /* Don't let calculation below go to zero. */ ksft_print_msg("_SC_NPROCESSORS_ONLN (%lu) too large, capping nr_threads to 32\n", nr_cpus); - nr_parallel = 32; + gopts->nr_parallel = 32; } else { - nr_parallel = nr_cpus; + gopts->nr_parallel = nr_cpus; } - nr_pages_per_cpu = bytes / page_size / nr_parallel; - if (!nr_pages_per_cpu) { + gopts->nr_pages_per_cpu = bytes / gopts->page_size / gopts->nr_parallel; + if (!gopts->nr_pages_per_cpu) { _err("pages_per_cpu = 0, cannot test (%lu / %lu / %lu)", - bytes, page_size, nr_parallel); + bytes, gopts->page_size, gopts->nr_parallel); usage(); } @@ -476,11 +496,11 @@ int main(int argc, char **argv) _err("invalid bounces"); usage(); } - nr_pages = nr_pages_per_cpu * nr_parallel; + gopts->nr_pages = gopts->nr_pages_per_cpu * gopts->nr_parallel; printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n", - nr_pages, nr_pages_per_cpu); - return userfaultfd_stress(); + gopts->nr_pages, gopts->nr_pages_per_cpu); + return userfaultfd_stress(gopts); } #else /* __NR_userfaultfd */ diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index 50501b38e34e..9e3be2ee7f1b 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -76,7 +76,7 @@ struct uffd_test_args { typedef struct uffd_test_args uffd_test_args_t; /* Returns: UFFD_TEST_* */ -typedef void (*uffd_test_fn)(uffd_test_args_t *); +typedef void (*uffd_test_fn)(uffd_global_test_opts_t *, uffd_test_args_t *); typedef struct { const char *name; @@ -181,33 +181,6 @@ static int test_uffd_api(bool use_dev) return 1; } -/* - * This function initializes the global variables. TODO: remove global - * vars and then remove this. - */ -static int -uffd_setup_environment(uffd_test_args_t *args, uffd_test_case_t *test, - mem_type_t *mem_type, const char **errmsg) -{ - map_shared = mem_type->shared; - uffd_test_ops = mem_type->mem_ops; - uffd_test_case_ops = test->test_case_ops; - - if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB)) - page_size = default_huge_page_size(); - else - page_size = psize(); - - /* Ensure we have at least 2 pages */ - nr_pages = MAX(UFFD_TEST_MEM_SIZE, page_size * 2) / page_size; - /* TODO: remove this global var.. it's so ugly */ - nr_parallel = 1; - - /* Initialize test arguments */ - args->mem_type = mem_type; - - return uffd_test_ctx_init(test->uffd_feature_required, errmsg); -} static bool uffd_feature_supported(uffd_test_case_t *test) { @@ -237,7 +210,8 @@ static int pagemap_open(void) } while (0) typedef struct { - int parent_uffd, child_uffd; + uffd_global_test_opts_t *gopts; + int child_uffd; } fork_event_args; static void *fork_event_consumer(void *data) @@ -245,10 +219,10 @@ static void *fork_event_consumer(void *data) fork_event_args *args = data; struct uffd_msg msg = { 0 }; - ready_for_fork = true; + args->gopts->ready_for_fork = true; /* Read until a full msg received */ - while (uffd_read_msg(args->parent_uffd, &msg)); + while (uffd_read_msg(args->gopts, &msg)); if (msg.event != UFFD_EVENT_FORK) err("wrong message: %u\n", msg.event); @@ -304,9 +278,9 @@ static void unpin_pages(pin_args *args) args->pinned = false; } -static int pagemap_test_fork(int uffd, bool with_event, bool test_pin) +static int pagemap_test_fork(uffd_global_test_opts_t *gopts, bool with_event, bool test_pin) { - fork_event_args args = { .parent_uffd = uffd, .child_uffd = -1 }; + fork_event_args args = { .gopts = gopts, .child_uffd = -1 }; pthread_t thread; pid_t child; uint64_t value; @@ -314,10 +288,10 @@ static int pagemap_test_fork(int uffd, bool with_event, bool test_pin) /* Prepare a thread to resolve EVENT_FORK */ if (with_event) { - ready_for_fork = false; + gopts->ready_for_fork = false; if (pthread_create(&thread, NULL, fork_event_consumer, &args)) err("pthread_create()"); - while (!ready_for_fork) + while (!gopts->ready_for_fork) ; /* Wait for the poll_thread to start executing before forking */ } @@ -328,14 +302,14 @@ static int pagemap_test_fork(int uffd, bool with_event, bool test_pin) fd = pagemap_open(); - if (test_pin && pin_pages(&args, area_dst, page_size)) + if (test_pin && pin_pages(&args, gopts->area_dst, gopts->page_size)) /* * Normally when reach here we have pinned in * previous tests, so shouldn't fail anymore */ err("pin page failed in child"); - value = pagemap_get_entry(fd, area_dst); + value = pagemap_get_entry(fd, gopts->area_dst); /* * After fork(), we should handle uffd-wp bit differently: * @@ -361,70 +335,70 @@ static int pagemap_test_fork(int uffd, bool with_event, bool test_pin) return result; } -static void uffd_wp_unpopulated_test(uffd_test_args_t *args) +static void uffd_wp_unpopulated_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { uint64_t value; int pagemap_fd; - if (uffd_register(uffd, area_dst, nr_pages * page_size, + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, false, true, false)) err("register failed"); pagemap_fd = pagemap_open(); /* Test applying pte marker to anon unpopulated */ - wp_range(uffd, (uint64_t)area_dst, page_size, true); - value = pagemap_get_entry(pagemap_fd, area_dst); + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true); + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, true); /* Test unprotect on anon pte marker */ - wp_range(uffd, (uint64_t)area_dst, page_size, false); - value = pagemap_get_entry(pagemap_fd, area_dst); + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, false); + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, false); /* Test zap on anon marker */ - wp_range(uffd, (uint64_t)area_dst, page_size, true); - if (madvise(area_dst, page_size, MADV_DONTNEED)) + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true); + if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED)) err("madvise(MADV_DONTNEED) failed"); - value = pagemap_get_entry(pagemap_fd, area_dst); + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, false); /* Test fault in after marker removed */ - *area_dst = 1; - value = pagemap_get_entry(pagemap_fd, area_dst); + *gopts->area_dst = 1; + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, false); /* Drop it to make pte none again */ - if (madvise(area_dst, page_size, MADV_DONTNEED)) + if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED)) err("madvise(MADV_DONTNEED) failed"); /* Test read-zero-page upon pte marker */ - wp_range(uffd, (uint64_t)area_dst, page_size, true); - *(volatile char *)area_dst; + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true); + *(volatile char *)gopts->area_dst; /* Drop it to make pte none again */ - if (madvise(area_dst, page_size, MADV_DONTNEED)) + if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED)) err("madvise(MADV_DONTNEED) failed"); uffd_test_pass(); } -static void uffd_wp_fork_test_common(uffd_test_args_t *args, +static void uffd_wp_fork_test_common(uffd_global_test_opts_t *gopts, uffd_test_args_t *args, bool with_event) { int pagemap_fd; uint64_t value; - if (uffd_register(uffd, area_dst, nr_pages * page_size, + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, false, true, false)) err("register failed"); pagemap_fd = pagemap_open(); /* Touch the page */ - *area_dst = 1; - wp_range(uffd, (uint64_t)area_dst, page_size, true); - value = pagemap_get_entry(pagemap_fd, area_dst); + *gopts->area_dst = 1; + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true); + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, true); - if (pagemap_test_fork(uffd, with_event, false)) { + if (pagemap_test_fork(gopts, with_event, false)) { uffd_test_fail("Detected %s uffd-wp bit in child in present pte", with_event ? "missing" : "stall"); goto out; @@ -442,79 +416,80 @@ static void uffd_wp_fork_test_common(uffd_test_args_t *args, * to expose pte markers. */ if (args->mem_type->shared) { - if (madvise(area_dst, page_size, MADV_DONTNEED)) + if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED)) err("MADV_DONTNEED"); } else { /* * NOTE: ignore retval because private-hugetlb doesn't yet * support swapping, so it could fail. */ - madvise(area_dst, page_size, MADV_PAGEOUT); + madvise(gopts->area_dst, gopts->page_size, MADV_PAGEOUT); } /* Uffd-wp should persist even swapped out */ - value = pagemap_get_entry(pagemap_fd, area_dst); + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, true); - if (pagemap_test_fork(uffd, with_event, false)) { + if (pagemap_test_fork(gopts, with_event, false)) { uffd_test_fail("Detected %s uffd-wp bit in child in zapped pte", with_event ? "missing" : "stall"); goto out; } /* Unprotect; this tests swap pte modifications */ - wp_range(uffd, (uint64_t)area_dst, page_size, false); - value = pagemap_get_entry(pagemap_fd, area_dst); + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, false); + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, false); /* Fault in the page from disk */ - *area_dst = 2; - value = pagemap_get_entry(pagemap_fd, area_dst); + *gopts->area_dst = 2; + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, false); uffd_test_pass(); out: - if (uffd_unregister(uffd, area_dst, nr_pages * page_size)) + if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size)) err("unregister failed"); close(pagemap_fd); } -static void uffd_wp_fork_test(uffd_test_args_t *args) +static void uffd_wp_fork_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_wp_fork_test_common(args, false); + uffd_wp_fork_test_common(gopts, args, false); } -static void uffd_wp_fork_with_event_test(uffd_test_args_t *args) +static void uffd_wp_fork_with_event_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_wp_fork_test_common(args, true); + uffd_wp_fork_test_common(gopts, args, true); } -static void uffd_wp_fork_pin_test_common(uffd_test_args_t *args, +static void uffd_wp_fork_pin_test_common(uffd_global_test_opts_t *gopts, + uffd_test_args_t *args, bool with_event) { int pagemap_fd; pin_args pin_args = {}; - if (uffd_register(uffd, area_dst, page_size, false, true, false)) + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->page_size, false, true, false)) err("register failed"); pagemap_fd = pagemap_open(); /* Touch the page */ - *area_dst = 1; - wp_range(uffd, (uint64_t)area_dst, page_size, true); + *gopts->area_dst = 1; + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true); /* * 1. First pin, then fork(). This tests fork() special path when * doing early CoW if the page is private. */ - if (pin_pages(&pin_args, area_dst, page_size)) { + if (pin_pages(&pin_args, gopts->area_dst, gopts->page_size)) { uffd_test_skip("Possibly CONFIG_GUP_TEST missing " "or unprivileged"); close(pagemap_fd); - uffd_unregister(uffd, area_dst, page_size); + uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size); return; } - if (pagemap_test_fork(uffd, with_event, false)) { + if (pagemap_test_fork(gopts, with_event, false)) { uffd_test_fail("Detected %s uffd-wp bit in early CoW of fork()", with_event ? "missing" : "stall"); unpin_pages(&pin_args); @@ -527,49 +502,50 @@ static void uffd_wp_fork_pin_test_common(uffd_test_args_t *args, * 2. First fork(), then pin (in the child, where test_pin==true). * This tests COR, aka, page unsharing on private memories. */ - if (pagemap_test_fork(uffd, with_event, true)) { + if (pagemap_test_fork(gopts, with_event, true)) { uffd_test_fail("Detected %s uffd-wp bit when RO pin", with_event ? "missing" : "stall"); goto out; } uffd_test_pass(); out: - if (uffd_unregister(uffd, area_dst, page_size)) + if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size)) err("register failed"); close(pagemap_fd); } -static void uffd_wp_fork_pin_test(uffd_test_args_t *args) +static void uffd_wp_fork_pin_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_wp_fork_pin_test_common(args, false); + uffd_wp_fork_pin_test_common(gopts, args, false); } -static void uffd_wp_fork_pin_with_event_test(uffd_test_args_t *args) +static void uffd_wp_fork_pin_with_event_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_wp_fork_pin_test_common(args, true); + uffd_wp_fork_pin_test_common(gopts, args, true); } -static void check_memory_contents(char *p) +static void check_memory_contents(uffd_global_test_opts_t *gopts, char *p) { unsigned long i, j; uint8_t expected_byte; - for (i = 0; i < nr_pages; ++i) { + for (i = 0; i < gopts->nr_pages; ++i) { expected_byte = ~((uint8_t)(i % ((uint8_t)-1))); - for (j = 0; j < page_size; j++) { - uint8_t v = *(uint8_t *)(p + (i * page_size) + j); + for (j = 0; j < gopts->page_size; j++) { + uint8_t v = *(uint8_t *)(p + (i * gopts->page_size) + j); if (v != expected_byte) err("unexpected page contents"); } } } -static void uffd_minor_test_common(bool test_collapse, bool test_wp) +static void uffd_minor_test_common(uffd_global_test_opts_t *gopts, bool test_collapse, bool test_wp) { unsigned long p; pthread_t uffd_mon; char c; struct uffd_args args = { 0 }; + args.gopts = gopts; /* * NOTE: MADV_COLLAPSE is not yet compatible with WP, so testing @@ -577,7 +553,7 @@ static void uffd_minor_test_common(bool test_collapse, bool test_wp) */ assert(!(test_collapse && test_wp)); - if (uffd_register(uffd, area_dst_alias, nr_pages * page_size, + if (uffd_register(gopts->uffd, gopts->area_dst_alias, gopts->nr_pages * gopts->page_size, /* NOTE! MADV_COLLAPSE may not work with uffd-wp */ false, test_wp, true)) err("register failure"); @@ -586,9 +562,9 @@ static void uffd_minor_test_common(bool test_collapse, bool test_wp) * After registering with UFFD, populate the non-UFFD-registered side of * the shared mapping. This should *not* trigger any UFFD minor faults. */ - for (p = 0; p < nr_pages; ++p) - memset(area_dst + (p * page_size), p % ((uint8_t)-1), - page_size); + for (p = 0; p < gopts->nr_pages; ++p) + memset(gopts->area_dst + (p * gopts->page_size), p % ((uint8_t)-1), + gopts->page_size); args.apply_wp = test_wp; if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) @@ -600,50 +576,51 @@ static void uffd_minor_test_common(bool test_collapse, bool test_wp) * fault. uffd_poll_thread will resolve the fault by bit-flipping the * page's contents, and then issuing a CONTINUE ioctl. */ - check_memory_contents(area_dst_alias); + check_memory_contents(gopts, gopts->area_dst_alias); - if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c)) err("pipe write"); if (pthread_join(uffd_mon, NULL)) err("join() failed"); if (test_collapse) { - if (madvise(area_dst_alias, nr_pages * page_size, + if (madvise(gopts->area_dst_alias, gopts->nr_pages * gopts->page_size, MADV_COLLAPSE)) { /* It's fine to fail for this one... */ uffd_test_skip("MADV_COLLAPSE failed"); return; } - uffd_test_ops->check_pmd_mapping(area_dst, - nr_pages * page_size / + uffd_test_ops->check_pmd_mapping(gopts, + gopts->area_dst, + gopts->nr_pages * gopts->page_size / read_pmd_pagesize()); /* * This won't cause uffd-fault - it purely just makes sure there * was no corruption. */ - check_memory_contents(area_dst_alias); + check_memory_contents(gopts, gopts->area_dst_alias); } - if (args.missing_faults != 0 || args.minor_faults != nr_pages) + if (args.missing_faults != 0 || args.minor_faults != gopts->nr_pages) uffd_test_fail("stats check error"); else uffd_test_pass(); } -void uffd_minor_test(uffd_test_args_t *args) +void uffd_minor_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_minor_test_common(false, false); + uffd_minor_test_common(gopts, false, false); } -void uffd_minor_wp_test(uffd_test_args_t *args) +void uffd_minor_wp_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_minor_test_common(false, true); + uffd_minor_test_common(gopts, false, true); } -void uffd_minor_collapse_test(uffd_test_args_t *args) +void uffd_minor_collapse_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_minor_test_common(true, false); + uffd_minor_test_common(gopts, true, false); } static sigjmp_buf jbuf, *sigbuf; @@ -678,7 +655,7 @@ static void sighndl(int sig, siginfo_t *siginfo, void *ptr) * This also tests UFFD_FEATURE_EVENT_FORK event along with the signal * feature. Using monitor thread, verify no userfault events are generated. */ -static int faulting_process(int signal_test, bool wp) +static int faulting_process(uffd_global_test_opts_t *gopts, int signal_test, bool wp) { unsigned long nr, i; unsigned long long count; @@ -687,7 +664,7 @@ static int faulting_process(int signal_test, bool wp) struct sigaction act; volatile unsigned long signalled = 0; - split_nr_pages = (nr_pages + 1) / 2; + split_nr_pages = (gopts->nr_pages + 1) / 2; if (signal_test) { sigbuf = &jbuf; @@ -701,7 +678,7 @@ static int faulting_process(int signal_test, bool wp) for (nr = 0; nr < split_nr_pages; nr++) { volatile int steps = 1; - unsigned long offset = nr * page_size; + unsigned long offset = nr * gopts->page_size; if (signal_test) { if (sigsetjmp(*sigbuf, 1) != 0) { @@ -713,15 +690,15 @@ static int faulting_process(int signal_test, bool wp) if (steps == 1) { /* This is a MISSING request */ steps++; - if (copy_page(uffd, offset, wp)) + if (copy_page(gopts, offset, wp)) signalled++; } else { /* This is a WP request */ assert(steps == 2); - wp_range(uffd, - (__u64)area_dst + + wp_range(gopts->uffd, + (__u64)gopts->area_dst + offset, - page_size, false); + gopts->page_size, false); } } else { signalled++; @@ -730,51 +707,53 @@ static int faulting_process(int signal_test, bool wp) } } - count = *area_count(area_dst, nr); - if (count != count_verify[nr]) + count = *area_count(gopts->area_dst, nr, gopts); + if (count != gopts->count_verify[nr]) err("nr %lu memory corruption %llu %llu\n", - nr, count, count_verify[nr]); + nr, count, gopts->count_verify[nr]); /* * Trigger write protection if there is by writing * the same value back. */ - *area_count(area_dst, nr) = count; + *area_count(gopts->area_dst, nr, gopts) = count; } if (signal_test) return signalled != split_nr_pages; - area_dst = mremap(area_dst, nr_pages * page_size, nr_pages * page_size, - MREMAP_MAYMOVE | MREMAP_FIXED, area_src); - if (area_dst == MAP_FAILED) + gopts->area_dst = mremap(gopts->area_dst, gopts->nr_pages * gopts->page_size, + gopts->nr_pages * gopts->page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, + gopts->area_src); + if (gopts->area_dst == MAP_FAILED) err("mremap"); /* Reset area_src since we just clobbered it */ - area_src = NULL; + gopts->area_src = NULL; - for (; nr < nr_pages; nr++) { - count = *area_count(area_dst, nr); - if (count != count_verify[nr]) { + for (; nr < gopts->nr_pages; nr++) { + count = *area_count(gopts->area_dst, nr, gopts); + if (count != gopts->count_verify[nr]) { err("nr %lu memory corruption %llu %llu\n", - nr, count, count_verify[nr]); + nr, count, gopts->count_verify[nr]); } /* * Trigger write protection if there is by writing * the same value back. */ - *area_count(area_dst, nr) = count; + *area_count(gopts->area_dst, nr, gopts) = count; } - uffd_test_ops->release_pages(area_dst); + uffd_test_ops->release_pages(gopts, gopts->area_dst); - for (nr = 0; nr < nr_pages; nr++) - for (i = 0; i < page_size; i++) - if (*(area_dst + nr * page_size + i) != 0) + for (nr = 0; nr < gopts->nr_pages; nr++) + for (i = 0; i < gopts->page_size; i++) + if (*(gopts->area_dst + nr * gopts->page_size + i) != 0) err("page %lu offset %lu is not zero", nr, i); return 0; } -static void uffd_sigbus_test_common(bool wp) +static void uffd_sigbus_test_common(uffd_global_test_opts_t *gopts, bool wp) { unsigned long userfaults; pthread_t uffd_mon; @@ -782,25 +761,26 @@ static void uffd_sigbus_test_common(bool wp) int err; char c; struct uffd_args args = { 0 }; + args.gopts = gopts; - ready_for_fork = false; + gopts->ready_for_fork = false; - fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); + fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK); - if (uffd_register(uffd, area_dst, nr_pages * page_size, + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, true, wp, false)) err("register failure"); - if (faulting_process(1, wp)) + if (faulting_process(gopts, 1, wp)) err("faulting process failed"); - uffd_test_ops->release_pages(area_dst); + uffd_test_ops->release_pages(gopts, gopts->area_dst); args.apply_wp = wp; if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) err("uffd_poll_thread create"); - while (!ready_for_fork) + while (!gopts->ready_for_fork) ; /* Wait for the poll_thread to start executing before forking */ pid = fork(); @@ -808,12 +788,12 @@ static void uffd_sigbus_test_common(bool wp) err("fork"); if (!pid) - exit(faulting_process(2, wp)); + exit(faulting_process(gopts, 2, wp)); waitpid(pid, &err, 0); if (err) err("faulting process failed"); - if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c)) err("pipe write"); if (pthread_join(uffd_mon, (void **)&userfaults)) err("pthread_join()"); @@ -824,28 +804,29 @@ static void uffd_sigbus_test_common(bool wp) uffd_test_pass(); } -static void uffd_sigbus_test(uffd_test_args_t *args) +static void uffd_sigbus_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_sigbus_test_common(false); + uffd_sigbus_test_common(gopts, false); } -static void uffd_sigbus_wp_test(uffd_test_args_t *args) +static void uffd_sigbus_wp_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_sigbus_test_common(true); + uffd_sigbus_test_common(gopts, true); } -static void uffd_events_test_common(bool wp) +static void uffd_events_test_common(uffd_global_test_opts_t *gopts, bool wp) { pthread_t uffd_mon; pid_t pid; int err; char c; struct uffd_args args = { 0 }; + args.gopts = gopts; - ready_for_fork = false; + gopts->ready_for_fork = false; - fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); - if (uffd_register(uffd, area_dst, nr_pages * page_size, + fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK); + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, true, wp, false)) err("register failure"); @@ -853,7 +834,7 @@ static void uffd_events_test_common(bool wp) if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) err("uffd_poll_thread create"); - while (!ready_for_fork) + while (!gopts->ready_for_fork) ; /* Wait for the poll_thread to start executing before forking */ pid = fork(); @@ -861,39 +842,39 @@ static void uffd_events_test_common(bool wp) err("fork"); if (!pid) - exit(faulting_process(0, wp)); + exit(faulting_process(gopts, 0, wp)); waitpid(pid, &err, 0); if (err) err("faulting process failed"); - if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c)) err("pipe write"); if (pthread_join(uffd_mon, NULL)) err("pthread_join()"); - if (args.missing_faults != nr_pages) + if (args.missing_faults != gopts->nr_pages) uffd_test_fail("Fault counts wrong"); else uffd_test_pass(); } -static void uffd_events_test(uffd_test_args_t *args) +static void uffd_events_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_events_test_common(false); + uffd_events_test_common(gopts, false); } -static void uffd_events_wp_test(uffd_test_args_t *args) +static void uffd_events_wp_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_events_test_common(true); + uffd_events_test_common(gopts, true); } -static void retry_uffdio_zeropage(int ufd, +static void retry_uffdio_zeropage(uffd_global_test_opts_t *gopts, struct uffdio_zeropage *uffdio_zeropage) { - uffd_test_ops->alias_mapping(&uffdio_zeropage->range.start, + uffd_test_ops->alias_mapping(gopts, &uffdio_zeropage->range.start, uffdio_zeropage->range.len, 0); - if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) { + if (ioctl(gopts->uffd, UFFDIO_ZEROPAGE, uffdio_zeropage)) { if (uffdio_zeropage->zeropage != -EEXIST) err("UFFDIO_ZEROPAGE error: %"PRId64, (int64_t)uffdio_zeropage->zeropage); @@ -903,16 +884,16 @@ static void retry_uffdio_zeropage(int ufd, } } -static bool do_uffdio_zeropage(int ufd, bool has_zeropage) +static bool do_uffdio_zeropage(uffd_global_test_opts_t *gopts, bool has_zeropage) { struct uffdio_zeropage uffdio_zeropage = { 0 }; int ret; __s64 res; - uffdio_zeropage.range.start = (unsigned long) area_dst; - uffdio_zeropage.range.len = page_size; + uffdio_zeropage.range.start = (unsigned long) gopts->area_dst; + uffdio_zeropage.range.len = gopts->page_size; uffdio_zeropage.mode = 0; - ret = ioctl(ufd, UFFDIO_ZEROPAGE, &uffdio_zeropage); + ret = ioctl(gopts->uffd, UFFDIO_ZEROPAGE, &uffdio_zeropage); res = uffdio_zeropage.zeropage; if (ret) { /* real retval in ufdio_zeropage.zeropage */ @@ -921,10 +902,10 @@ static bool do_uffdio_zeropage(int ufd, bool has_zeropage) else if (res != -EINVAL) err("UFFDIO_ZEROPAGE not -EINVAL"); } else if (has_zeropage) { - if (res != page_size) + if (res != gopts->page_size) err("UFFDIO_ZEROPAGE unexpected size"); else - retry_uffdio_zeropage(ufd, &uffdio_zeropage); + retry_uffdio_zeropage(gopts, &uffdio_zeropage); return true; } else err("UFFDIO_ZEROPAGE succeeded"); @@ -950,25 +931,29 @@ uffd_register_detect_zeropage(int uffd, void *addr, uint64_t len) } /* exercise UFFDIO_ZEROPAGE */ -static void uffd_zeropage_test(uffd_test_args_t *args) +static void uffd_zeropage_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { bool has_zeropage; int i; - has_zeropage = uffd_register_detect_zeropage(uffd, area_dst, page_size); - if (area_dst_alias) + has_zeropage = uffd_register_detect_zeropage(gopts->uffd, + gopts->area_dst, + gopts->page_size); + if (gopts->area_dst_alias) /* Ignore the retval; we already have it */ - uffd_register_detect_zeropage(uffd, area_dst_alias, page_size); + uffd_register_detect_zeropage(gopts->uffd, gopts->area_dst_alias, gopts->page_size); - if (do_uffdio_zeropage(uffd, has_zeropage)) - for (i = 0; i < page_size; i++) - if (area_dst[i] != 0) + if (do_uffdio_zeropage(gopts, has_zeropage)) + for (i = 0; i < gopts->page_size; i++) + if (gopts->area_dst[i] != 0) err("data non-zero at offset %d\n", i); - if (uffd_unregister(uffd, area_dst, page_size)) + if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size)) err("unregister"); - if (area_dst_alias && uffd_unregister(uffd, area_dst_alias, page_size)) + if (gopts->area_dst_alias && uffd_unregister(gopts->uffd, + gopts->area_dst_alias, + gopts->page_size)) err("unregister"); uffd_test_pass(); @@ -987,26 +972,27 @@ static void uffd_register_poison(int uffd, void *addr, uint64_t len) err("registered area doesn't support COPY and POISON ioctls"); } -static void do_uffdio_poison(int uffd, unsigned long offset) +static void do_uffdio_poison(uffd_global_test_opts_t *gopts, unsigned long offset) { struct uffdio_poison uffdio_poison = { 0 }; int ret; __s64 res; - uffdio_poison.range.start = (unsigned long) area_dst + offset; - uffdio_poison.range.len = page_size; + uffdio_poison.range.start = (unsigned long) gopts->area_dst + offset; + uffdio_poison.range.len = gopts->page_size; uffdio_poison.mode = 0; - ret = ioctl(uffd, UFFDIO_POISON, &uffdio_poison); + ret = ioctl(gopts->uffd, UFFDIO_POISON, &uffdio_poison); res = uffdio_poison.updated; if (ret) err("UFFDIO_POISON error: %"PRId64, (int64_t)res); - else if (res != page_size) + else if (res != gopts->page_size) err("UFFDIO_POISON unexpected size: %"PRId64, (int64_t)res); } -static void uffd_poison_handle_fault( - struct uffd_msg *msg, struct uffd_args *args) +static void uffd_poison_handle_fault(uffd_global_test_opts_t *gopts, + struct uffd_msg *msg, + struct uffd_args *args) { unsigned long offset; @@ -1017,20 +1003,20 @@ static void uffd_poison_handle_fault( (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR)) err("unexpected fault type %llu", msg->arg.pagefault.flags); - offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst; - offset &= ~(page_size-1); + offset = (char *)(unsigned long)msg->arg.pagefault.address - gopts->area_dst; + offset &= ~(gopts->page_size-1); /* Odd pages -> copy zeroed page; even pages -> poison. */ - if (offset & page_size) - copy_page(uffd, offset, false); + if (offset & gopts->page_size) + copy_page(gopts, offset, false); else - do_uffdio_poison(uffd, offset); + do_uffdio_poison(gopts, offset); } /* Make sure to cover odd/even, and minimum duplications */ #define UFFD_POISON_TEST_NPAGES 4 -static void uffd_poison_test(uffd_test_args_t *targs) +static void uffd_poison_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs) { pthread_t uffd_mon; char c; @@ -1039,15 +1025,17 @@ static void uffd_poison_test(uffd_test_args_t *targs) unsigned long nr_sigbus = 0; unsigned long nr, poison_pages = UFFD_POISON_TEST_NPAGES; - if (nr_pages < poison_pages) { - uffd_test_skip("Too few pages for POISON test"); + if (gopts->nr_pages < poison_pages) { + uffd_test_skip("Too less pages for POISON test"); return; } - fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); + args.gopts = gopts; + + fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK); - uffd_register_poison(uffd, area_dst, poison_pages * page_size); - memset(area_src, 0, poison_pages * page_size); + uffd_register_poison(gopts->uffd, gopts->area_dst, poison_pages * gopts->page_size); + memset(gopts->area_src, 0, poison_pages * gopts->page_size); args.handle_fault = uffd_poison_handle_fault; if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) @@ -1060,8 +1048,8 @@ static void uffd_poison_test(uffd_test_args_t *targs) err("sigaction"); for (nr = 0; nr < poison_pages; ++nr) { - unsigned long offset = nr * page_size; - const char *bytes = (const char *) area_dst + offset; + unsigned long offset = nr * gopts->page_size; + const char *bytes = (const char *) gopts->area_dst + offset; const char *i; if (sigsetjmp(*sigbuf, 1)) { @@ -1074,14 +1062,14 @@ static void uffd_poison_test(uffd_test_args_t *targs) continue; } - for (i = bytes; i < bytes + page_size; ++i) { + for (i = bytes; i < bytes + gopts->page_size; ++i) { if (*i) err("nonzero byte in area_dst (%p) at %p: %u", - area_dst, i, *i); + gopts->area_dst, i, *i); } } - if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c)) err("pipe write"); if (pthread_join(uffd_mon, NULL)) err("pthread_join()"); @@ -1094,7 +1082,9 @@ static void uffd_poison_test(uffd_test_args_t *targs) } static void -uffd_move_handle_fault_common(struct uffd_msg *msg, struct uffd_args *args, +uffd_move_handle_fault_common(uffd_global_test_opts_t *gopts, + struct uffd_msg *msg, + struct uffd_args *args, unsigned long len) { unsigned long offset; @@ -1106,28 +1096,32 @@ uffd_move_handle_fault_common(struct uffd_msg *msg, struct uffd_args *args, (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR | UFFD_PAGEFAULT_FLAG_WRITE)) err("unexpected fault type %llu", msg->arg.pagefault.flags); - offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst; + offset = (char *)(unsigned long)msg->arg.pagefault.address - gopts->area_dst; offset &= ~(len-1); - if (move_page(uffd, offset, len)) + if (move_page(gopts, offset, len)) args->missing_faults++; } -static void uffd_move_handle_fault(struct uffd_msg *msg, +static void uffd_move_handle_fault(uffd_global_test_opts_t *gopts, struct uffd_msg *msg, struct uffd_args *args) { - uffd_move_handle_fault_common(msg, args, page_size); + uffd_move_handle_fault_common(gopts, msg, args, gopts->page_size); } -static void uffd_move_pmd_handle_fault(struct uffd_msg *msg, +static void uffd_move_pmd_handle_fault(uffd_global_test_opts_t *gopts, struct uffd_msg *msg, struct uffd_args *args) { - uffd_move_handle_fault_common(msg, args, read_pmd_pagesize()); + uffd_move_handle_fault_common(gopts, msg, args, read_pmd_pagesize()); } static void -uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size, - void (*handle_fault)(struct uffd_msg *msg, struct uffd_args *args)) +uffd_move_test_common(uffd_global_test_opts_t *gopts, + uffd_test_args_t *targs, + unsigned long chunk_size, + void (*handle_fault)(struct uffd_global_test_opts *gopts, + struct uffd_msg *msg, struct uffd_args *args) +) { unsigned long nr; pthread_t uffd_mon; @@ -1139,11 +1133,13 @@ uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size, unsigned long src_offs = 0; unsigned long dst_offs = 0; + args.gopts = gopts; + /* Prevent source pages from being mapped more than once */ - if (madvise(area_src, nr_pages * page_size, MADV_DONTFORK)) + if (madvise(gopts->area_src, gopts->nr_pages * gopts->page_size, MADV_DONTFORK)) err("madvise(MADV_DONTFORK) failure"); - if (uffd_register(uffd, area_dst, nr_pages * page_size, + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, true, false, false)) err("register failure"); @@ -1151,22 +1147,22 @@ uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size, if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) err("uffd_poll_thread create"); - step_size = chunk_size / page_size; - step_count = nr_pages / step_size; + step_size = chunk_size / gopts->page_size; + step_count = gopts->nr_pages / step_size; - if (chunk_size > page_size) { - char *aligned_src = ALIGN_UP(area_src, chunk_size); - char *aligned_dst = ALIGN_UP(area_dst, chunk_size); + if (chunk_size > gopts->page_size) { + char *aligned_src = ALIGN_UP(gopts->area_src, chunk_size); + char *aligned_dst = ALIGN_UP(gopts->area_dst, chunk_size); - if (aligned_src != area_src || aligned_dst != area_dst) { - src_offs = (aligned_src - area_src) / page_size; - dst_offs = (aligned_dst - area_dst) / page_size; + if (aligned_src != gopts->area_src || aligned_dst != gopts->area_dst) { + src_offs = (aligned_src - gopts->area_src) / gopts->page_size; + dst_offs = (aligned_dst - gopts->area_dst) / gopts->page_size; step_count--; } - orig_area_src = area_src; - orig_area_dst = area_dst; - area_src = aligned_src; - area_dst = aligned_dst; + orig_area_src = gopts->area_src; + orig_area_dst = gopts->area_dst; + gopts->area_src = aligned_src; + gopts->area_dst = aligned_dst; } /* @@ -1180,34 +1176,34 @@ uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size, /* Check area_src content */ for (i = 0; i < step_size; i++) { - count = *area_count(area_src, nr + i); - if (count != count_verify[src_offs + nr + i]) + count = *area_count(gopts->area_src, nr + i, gopts); + if (count != gopts->count_verify[src_offs + nr + i]) err("nr %lu source memory invalid %llu %llu\n", - nr + i, count, count_verify[src_offs + nr + i]); + nr + i, count, gopts->count_verify[src_offs + nr + i]); } /* Faulting into area_dst should move the page or the huge page */ for (i = 0; i < step_size; i++) { - count = *area_count(area_dst, nr + i); - if (count != count_verify[dst_offs + nr + i]) + count = *area_count(gopts->area_dst, nr + i, gopts); + if (count != gopts->count_verify[dst_offs + nr + i]) err("nr %lu memory corruption %llu %llu\n", - nr, count, count_verify[dst_offs + nr + i]); + nr, count, gopts->count_verify[dst_offs + nr + i]); } /* Re-check area_src content which should be empty */ for (i = 0; i < step_size; i++) { - count = *area_count(area_src, nr + i); + count = *area_count(gopts->area_src, nr + i, gopts); if (count != 0) err("nr %lu move failed %llu %llu\n", - nr, count, count_verify[src_offs + nr + i]); + nr, count, gopts->count_verify[src_offs + nr + i]); } } - if (chunk_size > page_size) { - area_src = orig_area_src; - area_dst = orig_area_dst; + if (chunk_size > gopts->page_size) { + gopts->area_src = orig_area_src; + gopts->area_dst = orig_area_dst; } - if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c)) err("pipe write"); if (pthread_join(uffd_mon, NULL)) err("join() failed"); @@ -1218,24 +1214,24 @@ uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size, uffd_test_pass(); } -static void uffd_move_test(uffd_test_args_t *targs) +static void uffd_move_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs) { - uffd_move_test_common(targs, page_size, uffd_move_handle_fault); + uffd_move_test_common(gopts, targs, gopts->page_size, uffd_move_handle_fault); } -static void uffd_move_pmd_test(uffd_test_args_t *targs) +static void uffd_move_pmd_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs) { - if (madvise(area_dst, nr_pages * page_size, MADV_HUGEPAGE)) + if (madvise(gopts->area_dst, gopts->nr_pages * gopts->page_size, MADV_HUGEPAGE)) err("madvise(MADV_HUGEPAGE) failure"); - uffd_move_test_common(targs, read_pmd_pagesize(), + uffd_move_test_common(gopts, targs, read_pmd_pagesize(), uffd_move_pmd_handle_fault); } -static void uffd_move_pmd_split_test(uffd_test_args_t *targs) +static void uffd_move_pmd_split_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs) { - if (madvise(area_dst, nr_pages * page_size, MADV_NOHUGEPAGE)) + if (madvise(gopts->area_dst, gopts->nr_pages * gopts->page_size, MADV_NOHUGEPAGE)) err("madvise(MADV_NOHUGEPAGE) failure"); - uffd_move_test_common(targs, read_pmd_pagesize(), + uffd_move_test_common(gopts, targs, read_pmd_pagesize(), uffd_move_pmd_handle_fault); } @@ -1295,6 +1291,11 @@ typedef enum { THR_STATE_UNINTERRUPTIBLE, } thread_state; +typedef struct { + uffd_global_test_opts_t *gopts; + volatile pid_t *pid; +} mmap_changing_thread_args; + static void sleep_short(void) { usleep(1000); @@ -1337,7 +1338,9 @@ static void thread_state_until(pid_t tid, thread_state state) static void *uffd_mmap_changing_thread(void *opaque) { - volatile pid_t *pid = opaque; + mmap_changing_thread_args *args = opaque; + uffd_global_test_opts_t *gopts = args->gopts; + volatile pid_t *pid = args->pid; int ret; /* Unfortunately, it's only fetch-able from the thread itself.. */ @@ -1345,21 +1348,21 @@ static void *uffd_mmap_changing_thread(void *opaque) *pid = syscall(SYS_gettid); /* Inject an event, this will hang solid until the event read */ - ret = madvise(area_dst, page_size, MADV_REMOVE); + ret = madvise(gopts->area_dst, gopts->page_size, MADV_REMOVE); if (ret) err("madvise(MADV_REMOVE) failed"); return NULL; } -static void uffd_consume_message(int fd) +static void uffd_consume_message(uffd_global_test_opts_t *gopts) { struct uffd_msg msg = { 0 }; - while (uffd_read_msg(fd, &msg)); + while (uffd_read_msg(gopts, &msg)); } -static void uffd_mmap_changing_test(uffd_test_args_t *targs) +static void uffd_mmap_changing_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs) { /* * This stores the real PID (which can be different from how tid is @@ -1368,13 +1371,14 @@ static void uffd_mmap_changing_test(uffd_test_args_t *targs) pid_t pid = 0; pthread_t tid; int ret; + mmap_changing_thread_args args = { gopts, &pid }; - if (uffd_register(uffd, area_dst, nr_pages * page_size, + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, true, false, false)) err("uffd_register() failed"); /* Create a thread to generate the racy event */ - ret = pthread_create(&tid, NULL, uffd_mmap_changing_thread, &pid); + ret = pthread_create(&tid, NULL, uffd_mmap_changing_thread, &args); if (ret) err("pthread_create() failed"); @@ -1388,26 +1392,26 @@ static void uffd_mmap_changing_test(uffd_test_args_t *targs) /* Wait until the thread hangs at REMOVE event */ thread_state_until(pid, THR_STATE_UNINTERRUPTIBLE); - if (!uffdio_mmap_changing_test_copy(uffd)) + if (!uffdio_mmap_changing_test_copy(gopts->uffd)) return; - if (!uffdio_mmap_changing_test_zeropage(uffd)) + if (!uffdio_mmap_changing_test_zeropage(gopts->uffd)) return; - if (!uffdio_mmap_changing_test_move(uffd)) + if (!uffdio_mmap_changing_test_move(gopts->uffd)) return; - if (!uffdio_mmap_changing_test_poison(uffd)) + if (!uffdio_mmap_changing_test_poison(gopts->uffd)) return; - if (!uffdio_mmap_changing_test_continue(uffd)) + if (!uffdio_mmap_changing_test_continue(gopts->uffd)) return; /* * All succeeded above! Recycle everything. Start by reading the * event so as to kick the thread roll again.. */ - uffd_consume_message(uffd); + uffd_consume_message(gopts); ret = pthread_join(tid, NULL); assert(ret == 0); @@ -1415,10 +1419,10 @@ static void uffd_mmap_changing_test(uffd_test_args_t *targs) uffd_test_pass(); } -static int prevent_hugepages(const char **errmsg) +static int prevent_hugepages(uffd_global_test_opts_t *gopts, const char **errmsg) { /* This should be done before source area is populated */ - if (madvise(area_src, nr_pages * page_size, MADV_NOHUGEPAGE)) { + if (madvise(gopts->area_src, gopts->nr_pages * gopts->page_size, MADV_NOHUGEPAGE)) { /* Ignore only if CONFIG_TRANSPARENT_HUGEPAGE=n */ if (errno != EINVAL) { if (errmsg) @@ -1429,10 +1433,10 @@ static int prevent_hugepages(const char **errmsg) return 0; } -static int request_hugepages(const char **errmsg) +static int request_hugepages(uffd_global_test_opts_t *gopts, const char **errmsg) { /* This should be done before source area is populated */ - if (madvise(area_src, nr_pages * page_size, MADV_HUGEPAGE)) { + if (madvise(gopts->area_src, gopts->nr_pages * gopts->page_size, MADV_HUGEPAGE)) { if (errmsg) { *errmsg = (errno == EINVAL) ? "CONFIG_TRANSPARENT_HUGEPAGE is not set" : @@ -1456,13 +1460,17 @@ struct uffd_test_case_ops uffd_move_test_pmd_case_ops = { * Note that _UFFDIO_ZEROPAGE is tested separately in the zeropage test. */ static void -do_register_ioctls_test(uffd_test_args_t *args, bool miss, bool wp, bool minor) +do_register_ioctls_test(uffd_global_test_opts_t *gopts, + uffd_test_args_t *args, + bool miss, + bool wp, + bool minor) { uint64_t ioctls = 0, expected = BIT_ULL(_UFFDIO_WAKE); mem_type_t *mem_type = args->mem_type; int ret; - ret = uffd_register_with_ioctls(uffd, area_dst, page_size, + ret = uffd_register_with_ioctls(gopts->uffd, gopts->area_dst, gopts->page_size, miss, wp, minor, &ioctls); /* @@ -1493,18 +1501,18 @@ do_register_ioctls_test(uffd_test_args_t *args, bool miss, bool wp, bool minor) "(miss=%d, wp=%d, minor=%d): expected=0x%"PRIx64", " "returned=0x%"PRIx64, miss, wp, minor, expected, ioctls); - if (uffd_unregister(uffd, area_dst, page_size)) + if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size)) err("unregister"); } -static void uffd_register_ioctls_test(uffd_test_args_t *args) +static void uffd_register_ioctls_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { int miss, wp, minor; for (miss = 0; miss <= 1; miss++) for (wp = 0; wp <= 1; wp++) for (minor = 0; minor <= 1; minor++) - do_register_ioctls_test(args, miss, wp, minor); + do_register_ioctls_test(gopts, args, miss, wp, minor); uffd_test_pass(); } @@ -1742,6 +1750,28 @@ int main(int argc, char *argv[]) } for (j = 0; j < n_mems; j++) { mem_type = &mem_types[j]; + + /* Initialize global test options */ + uffd_global_test_opts_t gopts = { 0 }; + + gopts.map_shared = mem_type->shared; + uffd_test_ops = mem_type->mem_ops; + uffd_test_case_ops = test->test_case_ops; + + if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB)) + gopts.page_size = default_huge_page_size(); + else + gopts.page_size = psize(); + + /* Ensure we have at least 2 pages */ + gopts.nr_pages = MAX(UFFD_TEST_MEM_SIZE, gopts.page_size * 2) + / gopts.page_size; + + gopts.nr_parallel = 1; + + /* Initialize test arguments */ + args.mem_type = mem_type; + if (!(test->mem_targets & mem_type->mem_flag)) continue; @@ -1756,13 +1786,12 @@ int main(int argc, char *argv[]) uffd_test_skip("feature missing"); continue; } - if (uffd_setup_environment(&args, test, mem_type, - &errmsg)) { + if (uffd_test_ctx_init(&gopts, test->uffd_feature_required, &errmsg)) { uffd_test_skip(errmsg); continue; } - test->uffd_fn(&args); - uffd_test_ctx_clear(); + test->uffd_fn(&gopts, &args); + uffd_test_ctx_clear(&gopts); } } diff --git a/tools/testing/selftests/mm/uffd-wp-mremap.c b/tools/testing/selftests/mm/uffd-wp-mremap.c index 78038c40aaaf..4e4a591cf527 100644 --- a/tools/testing/selftests/mm/uffd-wp-mremap.c +++ b/tools/testing/selftests/mm/uffd-wp-mremap.c @@ -152,7 +152,8 @@ static bool range_is_swapped(void *addr, size_t size) return true; } -static void test_one_folio(size_t size, bool private, bool swapout, bool hugetlb) +static void test_one_folio(uffd_global_test_opts_t *gopts, size_t size, bool private, + bool swapout, bool hugetlb) { struct uffdio_writeprotect wp_prms; uint64_t features = 0; @@ -176,21 +177,21 @@ static void test_one_folio(size_t size, bool private, bool swapout, bool hugetlb } /* Register range for uffd-wp. */ - if (userfaultfd_open(&features)) { + if (userfaultfd_open(gopts, &features)) { if (errno == ENOENT) ksft_test_result_skip("userfaultfd not available\n"); else ksft_test_result_fail("userfaultfd_open() failed\n"); goto out; } - if (uffd_register(uffd, mem, size, false, true, false)) { + if (uffd_register(gopts->uffd, mem, size, false, true, false)) { ksft_test_result_fail("uffd_register() failed\n"); goto out; } wp_prms.mode = UFFDIO_WRITEPROTECT_MODE_WP; wp_prms.range.start = (uintptr_t)mem; wp_prms.range.len = size; - if (ioctl(uffd, UFFDIO_WRITEPROTECT, &wp_prms)) { + if (ioctl(gopts->uffd, UFFDIO_WRITEPROTECT, &wp_prms)) { ksft_test_result_fail("ioctl(UFFDIO_WRITEPROTECT) failed\n"); goto out; } @@ -237,9 +238,9 @@ static void test_one_folio(size_t size, bool private, bool swapout, bool hugetlb out: if (mem) munmap(mem, size); - if (uffd >= 0) { - close(uffd); - uffd = -1; + if (gopts->uffd >= 0) { + close(gopts->uffd); + gopts->uffd = -1; } } @@ -331,6 +332,7 @@ static const struct testcase testcases[] = { int main(int argc, char **argv) { + uffd_global_test_opts_t gopts = { 0 }; struct thp_settings settings; int i, j, plan = 0; @@ -362,8 +364,8 @@ int main(int argc, char **argv) const struct testcase *tc = &testcases[i]; for (j = 0; j < *tc->nr_sizes; j++) - test_one_folio(tc->sizes[j], tc->private, tc->swapout, - tc->hugetlb); + test_one_folio(&gopts, tc->sizes[j], tc->private, + tc->swapout, tc->hugetlb); } /* If THP is supported, restore original THP settings. */ -- 2.30.2

4 months, 1 week

1
0
0 0

[PATCH v7 1/1] selftests/mm/uffd: Refactor non-composite global vars into struct

by Ujwal Kundur

Refactor macros and non-composite global variable definitions into a struct that is defined at the start of a test and is passed around instead of relying on global vars. Signed-off-by: Ujwal Kundur <ujwal.kundur(a)gmail.com> Acked-by: Peter Xu <peterx(a)redhat.com> --- Previous versions and discussion at: https://lore.kernel.org/all/20250702152057.4067-1-ujwal.kundur@gmail.com/ Changes since v6: - rebased on 6.17-rc1 changes (cd79a1d9b08a) - removes unused args and adds the __unused attribute; since change is cosmetic-only, carry forward Acked-by tag - verified output remains unchanged using virtme-ng Changes since v5: - ensure uffd_global_test_opts_t instances are initialized - verified output remains unchanged using virtme-ng Changes since v4: - define gopts as global within uffd-stress.c to retain existing sigalrm handler logic Changes since v3: - more formatting fixes Changes since v2: - redo patch on mm-new branch Changes since v1: - indentation fixes - squash into single patch to assist bisections tools/testing/selftests/mm/uffd-common.c | 275 ++++----- tools/testing/selftests/mm/uffd-common.h | 78 +-- tools/testing/selftests/mm/uffd-stress.c | 228 ++++---- tools/testing/selftests/mm/uffd-unit-tests.c | 561 ++++++++++--------- tools/testing/selftests/mm/uffd-wp-mremap.c | 23 +- 5 files changed, 623 insertions(+), 542 deletions(-) diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c index e309ec886fa7..f4e9a5f43e24 100644 --- a/tools/testing/selftests/mm/uffd-common.c +++ b/tools/testing/selftests/mm/uffd-common.c @@ -7,18 +7,30 @@ #include "uffd-common.h" -#define BASE_PMD_ADDR ((void *)(1UL << 30)) - -volatile bool test_uffdio_copy_eexist = true; -unsigned long nr_parallel, nr_pages, nr_pages_per_cpu, page_size; -char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap; -int uffd = -1, uffd_flags, finished, *pipefd, test_type; -bool map_shared; -bool test_uffdio_wp = true; -unsigned long long *count_verify; uffd_test_ops_t *uffd_test_ops; uffd_test_case_ops_t *uffd_test_case_ops; -atomic_bool ready_for_fork; + +#define BASE_PMD_ADDR ((void *)(1UL << 30)) + +/* pthread_mutex_t starts at page offset 0 */ +pthread_mutex_t *area_mutex(char *area, unsigned long nr, uffd_global_test_opts_t *gopts) +{ + return (pthread_mutex_t *) (area + nr * gopts->page_size); +} + +/* + * count is placed in the page after pthread_mutex_t naturally aligned + * to avoid non alignment faults on non-x86 archs. + */ +volatile unsigned long long *area_count( + char *area, unsigned long nr, + uffd_global_test_opts_t *gopts) +{ + return (volatile unsigned long long *) + ((unsigned long)(area + nr * gopts->page_size + + sizeof(pthread_mutex_t) + sizeof(unsigned long long) - 1) & + ~(unsigned long)(sizeof(unsigned long long) - 1)); +} static int uffd_mem_fd_create(off_t mem_size, bool hugetlb) { @@ -40,15 +52,16 @@ static int uffd_mem_fd_create(off_t mem_size, bool hugetlb) return mem_fd; } -static void anon_release_pages(char *rel_area) +static void anon_release_pages(uffd_global_test_opts_t *gopts, char *rel_area) { - if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED)) + if (madvise(rel_area, gopts->nr_pages * gopts->page_size, MADV_DONTNEED)) err("madvise(MADV_DONTNEED) failed"); } -static int anon_allocate_area(void **alloc_area, bool __unused is_src) +static int anon_allocate_area(uffd_global_test_opts_t *gopts, void **alloc_area, + bool __unused is_src) { - *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, + *alloc_area = mmap(NULL, gopts->nr_pages * gopts->page_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (*alloc_area == MAP_FAILED) { *alloc_area = NULL; @@ -57,32 +70,32 @@ static int anon_allocate_area(void **alloc_area, bool __unused is_src) return 0; } -static void noop_alias_mapping(__u64 __unused *start, size_t __unused len, - unsigned long __unused offset) +static void noop_alias_mapping(uffd_global_test_opts_t *gopts, __u64 __unused *start, + size_t __unused len, unsigned long __unused offset) { } -static void hugetlb_release_pages(char *rel_area) +static void hugetlb_release_pages(uffd_global_test_opts_t *gopts, char *rel_area) { - if (!map_shared) { - if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED)) + if (!gopts->map_shared) { + if (madvise(rel_area, gopts->nr_pages * gopts->page_size, MADV_DONTNEED)) err("madvise(MADV_DONTNEED) failed"); } else { - if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE)) + if (madvise(rel_area, gopts->nr_pages * gopts->page_size, MADV_REMOVE)) err("madvise(MADV_REMOVE) failed"); } } -static int hugetlb_allocate_area(void **alloc_area, bool is_src) +static int hugetlb_allocate_area(uffd_global_test_opts_t *gopts, void **alloc_area, bool is_src) { - off_t size = nr_pages * page_size; + off_t size = gopts->nr_pages * gopts->page_size; off_t offset = is_src ? 0 : size; void *area_alias = NULL; char **alloc_area_alias; int mem_fd = uffd_mem_fd_create(size * 2, true); *alloc_area = mmap(NULL, size, PROT_READ | PROT_WRITE, - (map_shared ? MAP_SHARED : MAP_PRIVATE) | + (gopts->map_shared ? MAP_SHARED : MAP_PRIVATE) | (is_src ? 0 : MAP_NORESERVE), mem_fd, offset); if (*alloc_area == MAP_FAILED) { @@ -90,7 +103,7 @@ static int hugetlb_allocate_area(void **alloc_area, bool is_src) return -errno; } - if (map_shared) { + if (gopts->map_shared) { area_alias = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, mem_fd, offset); if (area_alias == MAP_FAILED) @@ -98,9 +111,9 @@ static int hugetlb_allocate_area(void **alloc_area, bool is_src) } if (is_src) { - alloc_area_alias = &area_src_alias; + alloc_area_alias = &gopts->area_src_alias; } else { - alloc_area_alias = &area_dst_alias; + alloc_area_alias = &gopts->area_dst_alias; } if (area_alias) *alloc_area_alias = area_alias; @@ -109,24 +122,25 @@ static int hugetlb_allocate_area(void **alloc_area, bool is_src) return 0; } -static void hugetlb_alias_mapping(__u64 *start, size_t __unused len, unsigned long offset) +static void hugetlb_alias_mapping(uffd_global_test_opts_t *gopts, __u64 *start, + size_t __unused len, unsigned long offset) { - if (!map_shared) + if (!gopts->map_shared) return; - *start = (unsigned long) area_dst_alias + offset; + *start = (unsigned long) gopts->area_dst_alias + offset; } -static void shmem_release_pages(char *rel_area) +static void shmem_release_pages(uffd_global_test_opts_t *gopts, char *rel_area) { - if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE)) + if (madvise(rel_area, gopts->nr_pages * gopts->page_size, MADV_REMOVE)) err("madvise(MADV_REMOVE) failed"); } -static int shmem_allocate_area(void **alloc_area, bool is_src) +static int shmem_allocate_area(uffd_global_test_opts_t *gopts, void **alloc_area, bool is_src) { void *area_alias = NULL; - size_t bytes = nr_pages * page_size, hpage_size = read_pmd_pagesize(); + size_t bytes = gopts->nr_pages * gopts->page_size, hpage_size = read_pmd_pagesize(); unsigned long offset = is_src ? 0 : bytes; char *p = NULL, *p_alias = NULL; int mem_fd = uffd_mem_fd_create(bytes * 2, false); @@ -160,22 +174,24 @@ static int shmem_allocate_area(void **alloc_area, bool is_src) err("mmap of anonymous memory failed at %p", p_alias); if (is_src) - area_src_alias = area_alias; + gopts->area_src_alias = area_alias; else - area_dst_alias = area_alias; + gopts->area_dst_alias = area_alias; close(mem_fd); return 0; } -static void shmem_alias_mapping(__u64 *start, size_t __unused len, unsigned long offset) +static void shmem_alias_mapping(uffd_global_test_opts_t *gopts, __u64 *start, + size_t __unused len, unsigned long offset) { - *start = (unsigned long)area_dst_alias + offset; + *start = (unsigned long)gopts->area_dst_alias + offset; } -static void shmem_check_pmd_mapping(void __unused *p, int expect_nr_hpages) +static void shmem_check_pmd_mapping(uffd_global_test_opts_t *gopts, void __unused *p, + int expect_nr_hpages) { - if (!check_huge_shmem(area_dst_alias, expect_nr_hpages, + if (!check_huge_shmem(gopts->area_dst_alias, expect_nr_hpages, read_pmd_pagesize())) err("Did not find expected %d number of hugepages", expect_nr_hpages); @@ -235,18 +251,18 @@ void uffd_stats_report(struct uffd_args *args, int n_cpus) printf("\n"); } -int userfaultfd_open(uint64_t *features) +int userfaultfd_open(uffd_global_test_opts_t *gopts, uint64_t *features) { struct uffdio_api uffdio_api; - uffd = uffd_open(UFFD_FLAGS); - if (uffd < 0) + gopts->uffd = uffd_open(UFFD_FLAGS); + if (gopts->uffd < 0) return -1; - uffd_flags = fcntl(uffd, F_GETFD, NULL); + gopts->uffd_flags = fcntl(gopts->uffd, F_GETFD, NULL); uffdio_api.api = UFFD_API; uffdio_api.features = *features; - if (ioctl(uffd, UFFDIO_API, &uffdio_api)) + if (ioctl(gopts->uffd, UFFDIO_API, &uffdio_api)) /* Probably lack of CAP_PTRACE? */ return -1; if (uffdio_api.api != UFFD_API) @@ -256,59 +272,63 @@ int userfaultfd_open(uint64_t *features) return 0; } -static inline void munmap_area(void **area) +static inline void munmap_area(uffd_global_test_opts_t *gopts, void **area) { if (*area) - if (munmap(*area, nr_pages * page_size)) + if (munmap(*area, gopts->nr_pages * gopts->page_size)) err("munmap"); *area = NULL; } -void uffd_test_ctx_clear(void) +void uffd_test_ctx_clear(uffd_global_test_opts_t *gopts) { size_t i; - if (pipefd) { - for (i = 0; i < nr_parallel * 2; ++i) { - if (close(pipefd[i])) + if (gopts->pipefd) { + for (i = 0; i < gopts->nr_parallel * 2; ++i) { + if (close(gopts->pipefd[i])) err("close pipefd"); } - free(pipefd); - pipefd = NULL; + free(gopts->pipefd); + gopts->pipefd = NULL; } - if (count_verify) { - free(count_verify); - count_verify = NULL; + if (gopts->count_verify) { + free(gopts->count_verify); + gopts->count_verify = NULL; } - if (uffd != -1) { - if (close(uffd)) + if (gopts->uffd != -1) { + if (close(gopts->uffd)) err("close uffd"); - uffd = -1; + gopts->uffd = -1; } - munmap_area((void **)&area_src); - munmap_area((void **)&area_src_alias); - munmap_area((void **)&area_dst); - munmap_area((void **)&area_dst_alias); - munmap_area((void **)&area_remap); + munmap_area(gopts, (void **)&gopts->area_src); + munmap_area(gopts, (void **)&gopts->area_src_alias); + munmap_area(gopts, (void **)&gopts->area_dst); + munmap_area(gopts, (void **)&gopts->area_dst_alias); + munmap_area(gopts, (void **)&gopts->area_remap); } -int uffd_test_ctx_init(uint64_t features, const char **errmsg) +int uffd_test_ctx_init(uffd_global_test_opts_t *gopts, uint64_t features, const char **errmsg) { unsigned long nr, cpu; int ret; + gopts->area_src_alias = NULL; + gopts->area_dst_alias = NULL; + gopts->area_remap = NULL; + if (uffd_test_case_ops && uffd_test_case_ops->pre_alloc) { - ret = uffd_test_case_ops->pre_alloc(errmsg); + ret = uffd_test_case_ops->pre_alloc(gopts, errmsg); if (ret) return ret; } - ret = uffd_test_ops->allocate_area((void **)&area_src, true); - ret |= uffd_test_ops->allocate_area((void **)&area_dst, false); + ret = uffd_test_ops->allocate_area(gopts, (void **) &gopts->area_src, true); + ret |= uffd_test_ops->allocate_area(gopts, (void **) &gopts->area_dst, false); if (ret) { if (errmsg) *errmsg = "memory allocation failed"; @@ -316,26 +336,26 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg) } if (uffd_test_case_ops && uffd_test_case_ops->post_alloc) { - ret = uffd_test_case_ops->post_alloc(errmsg); + ret = uffd_test_case_ops->post_alloc(gopts, errmsg); if (ret) return ret; } - ret = userfaultfd_open(&features); + ret = userfaultfd_open(gopts, &features); if (ret) { if (errmsg) *errmsg = "possible lack of privilege"; return ret; } - count_verify = malloc(nr_pages * sizeof(unsigned long long)); - if (!count_verify) + gopts->count_verify = malloc(gopts->nr_pages * sizeof(unsigned long long)); + if (!gopts->count_verify) err("count_verify"); - for (nr = 0; nr < nr_pages; nr++) { - *area_mutex(area_src, nr) = + for (nr = 0; nr < gopts->nr_pages; nr++) { + *area_mutex(gopts->area_src, nr, gopts) = (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER; - count_verify[nr] = *area_count(area_src, nr) = 1; + gopts->count_verify[nr] = *area_count(gopts->area_src, nr, gopts) = 1; /* * In the transition between 255 to 256, powerpc will * read out of order in my_bcmp and see both bytes as @@ -343,7 +363,7 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg) * after the count, to avoid my_bcmp to trigger false * positives. */ - *(area_count(area_src, nr) + 1) = 1; + *(area_count(gopts->area_src, nr, gopts) + 1) = 1; } /* @@ -364,13 +384,13 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg) * proactively split the thp and drop any accidentally initialized * pages within area_dst. */ - uffd_test_ops->release_pages(area_dst); + uffd_test_ops->release_pages(gopts, gopts->area_dst); - pipefd = malloc(sizeof(int) * nr_parallel * 2); - if (!pipefd) + gopts->pipefd = malloc(sizeof(int) * gopts->nr_parallel * 2); + if (!gopts->pipefd) err("pipefd"); - for (cpu = 0; cpu < nr_parallel; cpu++) - if (pipe2(&pipefd[cpu * 2], O_CLOEXEC | O_NONBLOCK)) + for (cpu = 0; cpu < gopts->nr_parallel; cpu++) + if (pipe2(&gopts->pipefd[cpu * 2], O_CLOEXEC | O_NONBLOCK)) err("pipe"); return 0; @@ -417,9 +437,9 @@ static void continue_range(int ufd, __u64 start, __u64 len, bool wp) ret, (int64_t) req.mapped); } -int uffd_read_msg(struct uffd_msg *msg) +int uffd_read_msg(uffd_global_test_opts_t *gopts, struct uffd_msg *msg) { - int ret = read(uffd, msg, sizeof(*msg)); + int ret = read(gopts->uffd, msg, sizeof(*msg)); if (ret != sizeof(*msg)) { if (ret < 0) { @@ -434,7 +454,8 @@ int uffd_read_msg(struct uffd_msg *msg) return 0; } -void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args) +void uffd_handle_page_fault(uffd_global_test_opts_t *gopts, struct uffd_msg *msg, + struct uffd_args *args) { unsigned long offset; @@ -443,7 +464,7 @@ void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args) if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP) { /* Write protect page faults */ - wp_range(uffd, msg->arg.pagefault.address, page_size, false); + wp_range(gopts->uffd, msg->arg.pagefault.address, gopts->page_size, false); args->wp_faults++; } else if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) { uint8_t *area; @@ -461,12 +482,12 @@ void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args) * (UFFD-registered). */ - area = (uint8_t *)(area_dst + - ((char *)msg->arg.pagefault.address - - area_dst_alias)); - for (b = 0; b < page_size; ++b) + area = (uint8_t *)(gopts->area_dst + + ((char *)msg->arg.pagefault.address - + gopts->area_dst_alias)); + for (b = 0; b < gopts->page_size; ++b) area[b] = ~area[b]; - continue_range(uffd, msg->arg.pagefault.address, page_size, + continue_range(gopts->uffd, msg->arg.pagefault.address, gopts->page_size, args->apply_wp); args->minor_faults++; } else { @@ -494,10 +515,10 @@ void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args) if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) err("unexpected write fault"); - offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst; - offset &= ~(page_size-1); + offset = (char *)(unsigned long)msg->arg.pagefault.address - gopts->area_dst; + offset &= ~(gopts->page_size-1); - if (copy_page(uffd, offset, args->apply_wp)) + if (copy_page(gopts, offset, args->apply_wp)) args->missing_faults++; } } @@ -505,6 +526,7 @@ void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args) void *uffd_poll_thread(void *arg) { struct uffd_args *args = (struct uffd_args *)arg; + uffd_global_test_opts_t *gopts = args->gopts; unsigned long cpu = args->cpu; struct pollfd pollfd[2]; struct uffd_msg msg; @@ -515,12 +537,12 @@ void *uffd_poll_thread(void *arg) if (!args->handle_fault) args->handle_fault = uffd_handle_page_fault; - pollfd[0].fd = uffd; + pollfd[0].fd = gopts->uffd; pollfd[0].events = POLLIN; - pollfd[1].fd = pipefd[cpu*2]; + pollfd[1].fd = gopts->pipefd[cpu*2]; pollfd[1].events = POLLIN; - ready_for_fork = true; + gopts->ready_for_fork = true; for (;;) { ret = poll(pollfd, 2, -1); @@ -538,30 +560,30 @@ void *uffd_poll_thread(void *arg) } if (!(pollfd[0].revents & POLLIN)) err("pollfd[0].revents %d", pollfd[0].revents); - if (uffd_read_msg(&msg)) + if (uffd_read_msg(gopts, &msg)) continue; switch (msg.event) { default: err("unexpected msg event %u\n", msg.event); break; case UFFD_EVENT_PAGEFAULT: - args->handle_fault(&msg, args); + args->handle_fault(gopts, &msg, args); break; case UFFD_EVENT_FORK: - close(uffd); - uffd = msg.arg.fork.ufd; - pollfd[0].fd = uffd; + close(gopts->uffd); + gopts->uffd = msg.arg.fork.ufd; + pollfd[0].fd = gopts->uffd; break; case UFFD_EVENT_REMOVE: uffd_reg.range.start = msg.arg.remove.start; uffd_reg.range.len = msg.arg.remove.end - msg.arg.remove.start; - if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range)) + if (ioctl(gopts->uffd, UFFDIO_UNREGISTER, &uffd_reg.range)) err("remove failure"); break; case UFFD_EVENT_REMAP: - area_remap = area_dst; /* save for later unmap */ - area_dst = (char *)(unsigned long)msg.arg.remap.to; + gopts->area_remap = gopts->area_dst; /* save for later unmap */ + gopts->area_dst = (char *)(unsigned long)msg.arg.remap.to; break; } } @@ -569,17 +591,18 @@ void *uffd_poll_thread(void *arg) return NULL; } -static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy, +static void retry_copy_page(uffd_global_test_opts_t *gopts, struct uffdio_copy *uffdio_copy, unsigned long offset) { - uffd_test_ops->alias_mapping(&uffdio_copy->dst, + uffd_test_ops->alias_mapping(gopts, + &uffdio_copy->dst, uffdio_copy->len, offset); - if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) { + if (ioctl(gopts->uffd, UFFDIO_COPY, uffdio_copy)) { /* real retval in ufdio_copy.copy */ if (uffdio_copy->copy != -EEXIST) err("UFFDIO_COPY retry error: %"PRId64, - (int64_t)uffdio_copy->copy); + (int64_t)uffdio_copy->copy); } else { err("UFFDIO_COPY retry unexpected: %"PRId64, (int64_t)uffdio_copy->copy); @@ -598,60 +621,60 @@ static void wake_range(int ufd, unsigned long addr, unsigned long len) addr), exit(1); } -int __copy_page(int ufd, unsigned long offset, bool retry, bool wp) +int __copy_page(uffd_global_test_opts_t *gopts, unsigned long offset, bool retry, bool wp) { struct uffdio_copy uffdio_copy; - if (offset >= nr_pages * page_size) + if (offset >= gopts->nr_pages * gopts->page_size) err("unexpected offset %lu\n", offset); - uffdio_copy.dst = (unsigned long) area_dst + offset; - uffdio_copy.src = (unsigned long) area_src + offset; - uffdio_copy.len = page_size; + uffdio_copy.dst = (unsigned long) gopts->area_dst + offset; + uffdio_copy.src = (unsigned long) gopts->area_src + offset; + uffdio_copy.len = gopts->page_size; if (wp) uffdio_copy.mode = UFFDIO_COPY_MODE_WP; else uffdio_copy.mode = 0; uffdio_copy.copy = 0; - if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) { + if (ioctl(gopts->uffd, UFFDIO_COPY, &uffdio_copy)) { /* real retval in ufdio_copy.copy */ if (uffdio_copy.copy != -EEXIST) err("UFFDIO_COPY error: %"PRId64, (int64_t)uffdio_copy.copy); - wake_range(ufd, uffdio_copy.dst, page_size); - } else if (uffdio_copy.copy != page_size) { + wake_range(gopts->uffd, uffdio_copy.dst, gopts->page_size); + } else if (uffdio_copy.copy != gopts->page_size) { err("UFFDIO_COPY error: %"PRId64, (int64_t)uffdio_copy.copy); } else { - if (test_uffdio_copy_eexist && retry) { - test_uffdio_copy_eexist = false; - retry_copy_page(ufd, &uffdio_copy, offset); + if (gopts->test_uffdio_copy_eexist && retry) { + gopts->test_uffdio_copy_eexist = false; + retry_copy_page(gopts, &uffdio_copy, offset); } return 1; } return 0; } -int copy_page(int ufd, unsigned long offset, bool wp) +int copy_page(uffd_global_test_opts_t *gopts, unsigned long offset, bool wp) { - return __copy_page(ufd, offset, false, wp); + return __copy_page(gopts, offset, false, wp); } -int move_page(int ufd, unsigned long offset, unsigned long len) +int move_page(uffd_global_test_opts_t *gopts, unsigned long offset, unsigned long len) { struct uffdio_move uffdio_move; - if (offset + len > nr_pages * page_size) + if (offset + len > gopts->nr_pages * gopts->page_size) err("unexpected offset %lu and length %lu\n", offset, len); - uffdio_move.dst = (unsigned long) area_dst + offset; - uffdio_move.src = (unsigned long) area_src + offset; + uffdio_move.dst = (unsigned long) gopts->area_dst + offset; + uffdio_move.src = (unsigned long) gopts->area_src + offset; uffdio_move.len = len; uffdio_move.mode = UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES; uffdio_move.move = 0; - if (ioctl(ufd, UFFDIO_MOVE, &uffdio_move)) { + if (ioctl(gopts->uffd, UFFDIO_MOVE, &uffdio_move)) { /* real retval in uffdio_move.move */ if (uffdio_move.move != -EEXIST) err("UFFDIO_MOVE error: %"PRId64, (int64_t)uffdio_move.move); - wake_range(ufd, uffdio_move.dst, len); + wake_range(gopts->uffd, uffdio_move.dst, len); } else if (uffdio_move.move != len) { err("UFFDIO_MOVE error: %"PRId64, (int64_t)uffdio_move.move); } else diff --git a/tools/testing/selftests/mm/uffd-common.h b/tools/testing/selftests/mm/uffd-common.h index 2e7066d69103..37d3ca55905f 100644 --- a/tools/testing/selftests/mm/uffd-common.h +++ b/tools/testing/selftests/mm/uffd-common.h @@ -56,20 +56,17 @@ #define err(fmt, ...) errexit(1, fmt, ##__VA_ARGS__) -/* pthread_mutex_t starts at page offset 0 */ -#define area_mutex(___area, ___nr) \ - ((pthread_mutex_t *) ((___area) + (___nr)*page_size)) -/* - * count is placed in the page after pthread_mutex_t naturally aligned - * to avoid non alignment faults on non-x86 archs. - */ -#define area_count(___area, ___nr) \ - ((volatile unsigned long long *) ((unsigned long) \ - ((___area) + (___nr)*page_size + \ - sizeof(pthread_mutex_t) + \ - sizeof(unsigned long long) - 1) & \ - ~(unsigned long)(sizeof(unsigned long long) \ - - 1))) +struct uffd_global_test_opts { + unsigned long nr_parallel, nr_pages, nr_pages_per_cpu, page_size; + char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap; + int uffd, uffd_flags, finished, *pipefd, test_type; + bool map_shared; + bool test_uffdio_wp; + unsigned long long *count_verify; + volatile bool test_uffdio_copy_eexist; + atomic_bool ready_for_fork; +}; +typedef struct uffd_global_test_opts uffd_global_test_opts_t; /* Userfaultfd test statistics */ struct uffd_args { @@ -79,50 +76,55 @@ struct uffd_args { unsigned long missing_faults; unsigned long wp_faults; unsigned long minor_faults; + struct uffd_global_test_opts *gopts; /* A custom fault handler; defaults to uffd_handle_page_fault. */ - void (*handle_fault)(struct uffd_msg *msg, struct uffd_args *args); + void (*handle_fault)(struct uffd_global_test_opts *gopts, + struct uffd_msg *msg, + struct uffd_args *args); }; struct uffd_test_ops { - int (*allocate_area)(void **alloc_area, bool is_src); - void (*release_pages)(char *rel_area); - void (*alias_mapping)(__u64 *start, size_t len, unsigned long offset); - void (*check_pmd_mapping)(void *p, int expect_nr_hpages); + int (*allocate_area)(uffd_global_test_opts_t *gopts, void **alloc_area, bool is_src); + void (*release_pages)(uffd_global_test_opts_t *gopts, char *rel_area); + void (*alias_mapping)(uffd_global_test_opts_t *gopts, + __u64 *start, + size_t len, + unsigned long offset); + void (*check_pmd_mapping)(uffd_global_test_opts_t *gopts, void *p, int expect_nr_hpages); }; typedef struct uffd_test_ops uffd_test_ops_t; struct uffd_test_case_ops { - int (*pre_alloc)(const char **errmsg); - int (*post_alloc)(const char **errmsg); + int (*pre_alloc)(uffd_global_test_opts_t *gopts, const char **errmsg); + int (*post_alloc)(uffd_global_test_opts_t *gopts, const char **errmsg); }; typedef struct uffd_test_case_ops uffd_test_case_ops_t; -extern unsigned long nr_parallel, nr_pages, nr_pages_per_cpu, page_size; -extern char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap; -extern int uffd, uffd_flags, finished, *pipefd, test_type; -extern bool map_shared; -extern bool test_uffdio_wp; -extern unsigned long long *count_verify; -extern volatile bool test_uffdio_copy_eexist; -extern atomic_bool ready_for_fork; - +extern uffd_global_test_opts_t *uffd_gtest_opts; extern uffd_test_ops_t anon_uffd_test_ops; extern uffd_test_ops_t shmem_uffd_test_ops; extern uffd_test_ops_t hugetlb_uffd_test_ops; extern uffd_test_ops_t *uffd_test_ops; extern uffd_test_case_ops_t *uffd_test_case_ops; +pthread_mutex_t *area_mutex(char *area, unsigned long nr, uffd_global_test_opts_t *gopts); +volatile unsigned long long *area_count(char *area, + unsigned long nr, + uffd_global_test_opts_t *gopts); + void uffd_stats_report(struct uffd_args *args, int n_cpus); -int uffd_test_ctx_init(uint64_t features, const char **errmsg); -void uffd_test_ctx_clear(void); -int userfaultfd_open(uint64_t *features); -int uffd_read_msg(struct uffd_msg *msg); +int uffd_test_ctx_init(uffd_global_test_opts_t *gopts, uint64_t features, const char **errmsg); +void uffd_test_ctx_clear(uffd_global_test_opts_t *gopts); +int userfaultfd_open(uffd_global_test_opts_t *gopts, uint64_t *features); +int uffd_read_msg(uffd_global_test_opts_t *gopts, struct uffd_msg *msg); void wp_range(int ufd, __u64 start, __u64 len, bool wp); -void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args); -int __copy_page(int ufd, unsigned long offset, bool retry, bool wp); -int copy_page(int ufd, unsigned long offset, bool wp); -int move_page(int ufd, unsigned long offset, unsigned long len); +void uffd_handle_page_fault(uffd_global_test_opts_t *gopts, + struct uffd_msg *msg, + struct uffd_args *args); +int __copy_page(uffd_global_test_opts_t *gopts, unsigned long offset, bool retry, bool wp); +int copy_page(uffd_global_test_opts_t *gopts, unsigned long offset, bool wp); +int move_page(uffd_global_test_opts_t *gopts, unsigned long offset, unsigned long len); void *uffd_poll_thread(void *arg); int uffd_open_dev(unsigned int flags); diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c index 24aac0ae96c6..9b216e842bbb 100644 --- a/tools/testing/selftests/mm/uffd-stress.c +++ b/tools/testing/selftests/mm/uffd-stress.c @@ -44,6 +44,12 @@ uint64_t features; #define BOUNCE_VERIFY (1<<2) #define BOUNCE_POLL (1<<3) static int bounces; +/* defined globally for this particular test as the sigalrm handler + * depends on test_uffdio_*_eexist. + * XXX: define gopts in main() when we figure out a way to deal with + * test_uffdio_*_eexist. + */ +static uffd_global_test_opts_t *gopts; /* exercise the test_uffdio_*_eexist every ALARM_INTERVAL_SECS */ #define ALARM_INTERVAL_SECS 10 @@ -76,54 +82,58 @@ static void usage(void) exit(1); } -static void uffd_stats_reset(struct uffd_args *args, unsigned long n_cpus) +static void uffd_stats_reset(uffd_global_test_opts_t *gopts, struct uffd_args *args, + unsigned long n_cpus) { int i; for (i = 0; i < n_cpus; i++) { args[i].cpu = i; - args[i].apply_wp = test_uffdio_wp; + args[i].apply_wp = gopts->test_uffdio_wp; args[i].missing_faults = 0; args[i].wp_faults = 0; args[i].minor_faults = 0; + args[i].gopts = gopts; } } static void *locking_thread(void *arg) { - unsigned long cpu = (unsigned long) arg; + struct uffd_args *args = (struct uffd_args *) arg; + uffd_global_test_opts_t *gopts = args->gopts; + unsigned long cpu = (unsigned long) args->cpu; unsigned long page_nr; unsigned long long count; if (!(bounces & BOUNCE_RANDOM)) { page_nr = -bounces; if (!(bounces & BOUNCE_RACINGFAULTS)) - page_nr += cpu * nr_pages_per_cpu; + page_nr += cpu * gopts->nr_pages_per_cpu; } - while (!finished) { + while (!gopts->finished) { if (bounces & BOUNCE_RANDOM) { if (getrandom(&page_nr, sizeof(page_nr), 0) != sizeof(page_nr)) err("getrandom failed"); } else page_nr += 1; - page_nr %= nr_pages; - pthread_mutex_lock(area_mutex(area_dst, page_nr)); - count = *area_count(area_dst, page_nr); - if (count != count_verify[page_nr]) + page_nr %= gopts->nr_pages; + pthread_mutex_lock(area_mutex(gopts->area_dst, page_nr, gopts)); + count = *area_count(gopts->area_dst, page_nr, gopts); + if (count != gopts->count_verify[page_nr]) err("page_nr %lu memory corruption %llu %llu", - page_nr, count, count_verify[page_nr]); + page_nr, count, gopts->count_verify[page_nr]); count++; - *area_count(area_dst, page_nr) = count_verify[page_nr] = count; - pthread_mutex_unlock(area_mutex(area_dst, page_nr)); + *area_count(gopts->area_dst, page_nr, gopts) = gopts->count_verify[page_nr] = count; + pthread_mutex_unlock(area_mutex(gopts->area_dst, page_nr, gopts)); } return NULL; } -static int copy_page_retry(int ufd, unsigned long offset) +static int copy_page_retry(uffd_global_test_opts_t *gopts, unsigned long offset) { - return __copy_page(ufd, offset, true, test_uffdio_wp); + return __copy_page(gopts, offset, true, gopts->test_uffdio_wp); } pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER; @@ -131,15 +141,16 @@ pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER; static void *uffd_read_thread(void *arg) { struct uffd_args *args = (struct uffd_args *)arg; + uffd_global_test_opts_t *gopts = args->gopts; struct uffd_msg msg; pthread_mutex_unlock(&uffd_read_mutex); /* from here cancellation is ok */ for (;;) { - if (uffd_read_msg(&msg)) + if (uffd_read_msg(gopts, &msg)) continue; - uffd_handle_page_fault(&msg, args); + uffd_handle_page_fault(gopts, &msg, args); } return NULL; @@ -147,32 +158,34 @@ static void *uffd_read_thread(void *arg) static void *background_thread(void *arg) { - unsigned long cpu = (unsigned long) arg; + struct uffd_args *args = (struct uffd_args *) arg; + uffd_global_test_opts_t *gopts = args->gopts; + unsigned long cpu = (unsigned long) args->cpu; unsigned long page_nr, start_nr, mid_nr, end_nr; - start_nr = cpu * nr_pages_per_cpu; - end_nr = (cpu+1) * nr_pages_per_cpu; + start_nr = cpu * gopts->nr_pages_per_cpu; + end_nr = (cpu+1) * gopts->nr_pages_per_cpu; mid_nr = (start_nr + end_nr) / 2; /* Copy the first half of the pages */ for (page_nr = start_nr; page_nr < mid_nr; page_nr++) - copy_page_retry(uffd, page_nr * page_size); + copy_page_retry(gopts, page_nr * gopts->page_size); /* * If we need to test uffd-wp, set it up now. Then we'll have * at least the first half of the pages mapped already which * can be write-protected for testing */ - if (test_uffdio_wp) - wp_range(uffd, (unsigned long)area_dst + start_nr * page_size, - nr_pages_per_cpu * page_size, true); + if (gopts->test_uffdio_wp) + wp_range(gopts->uffd, (unsigned long)gopts->area_dst + start_nr * gopts->page_size, + gopts->nr_pages_per_cpu * gopts->page_size, true); /* * Continue the 2nd half of the page copying, handling write * protection faults if any */ for (page_nr = mid_nr; page_nr < end_nr; page_nr++) - copy_page_retry(uffd, page_nr * page_size); + copy_page_retry(gopts, page_nr * gopts->page_size); return NULL; } @@ -180,17 +193,21 @@ static void *background_thread(void *arg) static int stress(struct uffd_args *args) { unsigned long cpu; - pthread_t locking_threads[nr_parallel]; - pthread_t uffd_threads[nr_parallel]; - pthread_t background_threads[nr_parallel]; + uffd_global_test_opts_t *gopts = args->gopts; + pthread_t locking_threads[gopts->nr_parallel]; + pthread_t uffd_threads[gopts->nr_parallel]; + pthread_t background_threads[gopts->nr_parallel]; - finished = 0; - for (cpu = 0; cpu < nr_parallel; cpu++) { + gopts->finished = 0; + for (cpu = 0; cpu < gopts->nr_parallel; cpu++) { if (pthread_create(&locking_threads[cpu], &attr, - locking_thread, (void *)cpu)) + locking_thread, (void *)&args[cpu])) return 1; if (bounces & BOUNCE_POLL) { - if (pthread_create(&uffd_threads[cpu], &attr, uffd_poll_thread, &args[cpu])) + if (pthread_create(&uffd_threads[cpu], + &attr, + uffd_poll_thread, + (void *) &args[cpu])) err("uffd_poll_thread create"); } else { if (pthread_create(&uffd_threads[cpu], &attr, @@ -200,10 +217,10 @@ static int stress(struct uffd_args *args) pthread_mutex_lock(&uffd_read_mutex); } if (pthread_create(&background_threads[cpu], &attr, - background_thread, (void *)cpu)) + background_thread, (void *)&args[cpu])) return 1; } - for (cpu = 0; cpu < nr_parallel; cpu++) + for (cpu = 0; cpu < gopts->nr_parallel; cpu++) if (pthread_join(background_threads[cpu], NULL)) return 1; @@ -216,17 +233,17 @@ static int stress(struct uffd_args *args) * UFFDIO_COPY without writing zero pages into area_dst * because the background threads already completed). */ - uffd_test_ops->release_pages(area_src); + uffd_test_ops->release_pages(gopts, gopts->area_src); - finished = 1; - for (cpu = 0; cpu < nr_parallel; cpu++) + gopts->finished = 1; + for (cpu = 0; cpu < gopts->nr_parallel; cpu++) if (pthread_join(locking_threads[cpu], NULL)) return 1; - for (cpu = 0; cpu < nr_parallel; cpu++) { + for (cpu = 0; cpu < gopts->nr_parallel; cpu++) { char c; if (bounces & BOUNCE_POLL) { - if (write(pipefd[cpu*2+1], &c, 1) != 1) + if (write(gopts->pipefd[cpu*2+1], &c, 1) != 1) err("pipefd write error"); if (pthread_join(uffd_threads[cpu], (void *)&args[cpu])) @@ -242,26 +259,26 @@ static int stress(struct uffd_args *args) return 0; } -static int userfaultfd_stress(void) +static int userfaultfd_stress(uffd_global_test_opts_t *gopts) { void *area; unsigned long nr; - struct uffd_args args[nr_parallel]; - uint64_t mem_size = nr_pages * page_size; + struct uffd_args args[gopts->nr_parallel]; + uint64_t mem_size = gopts->nr_pages * gopts->page_size; int flags = 0; - memset(args, 0, sizeof(struct uffd_args) * nr_parallel); + memset(args, 0, sizeof(struct uffd_args) * gopts->nr_parallel); - if (features & UFFD_FEATURE_WP_UNPOPULATED && test_type == TEST_ANON) + if (features & UFFD_FEATURE_WP_UNPOPULATED && gopts->test_type == TEST_ANON) flags = UFFD_FEATURE_WP_UNPOPULATED; - if (uffd_test_ctx_init(flags, NULL)) + if (uffd_test_ctx_init(gopts, flags, NULL)) err("context init failed"); - if (posix_memalign(&area, page_size, page_size)) + if (posix_memalign(&area, gopts->page_size, gopts->page_size)) err("out of memory"); zeropage = area; - bzero(zeropage, page_size); + bzero(zeropage, gopts->page_size); pthread_mutex_lock(&uffd_read_mutex); @@ -284,18 +301,18 @@ static int userfaultfd_stress(void) fflush(stdout); if (bounces & BOUNCE_POLL) - fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); + fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK); else - fcntl(uffd, F_SETFL, uffd_flags & ~O_NONBLOCK); + fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags & ~O_NONBLOCK); /* register */ - if (uffd_register(uffd, area_dst, mem_size, - true, test_uffdio_wp, false)) + if (uffd_register(gopts->uffd, gopts->area_dst, mem_size, + true, gopts->test_uffdio_wp, false)) err("register failure"); - if (area_dst_alias) { - if (uffd_register(uffd, area_dst_alias, mem_size, - true, test_uffdio_wp, false)) + if (gopts->area_dst_alias) { + if (uffd_register(gopts->uffd, gopts->area_dst_alias, mem_size, + true, gopts->test_uffdio_wp, false)) err("register failure alias"); } @@ -323,87 +340,88 @@ static int userfaultfd_stress(void) * MADV_DONTNEED only after the UFFDIO_REGISTER, so it's * required to MADV_DONTNEED here. */ - uffd_test_ops->release_pages(area_dst); + uffd_test_ops->release_pages(gopts, gopts->area_dst); - uffd_stats_reset(args, nr_parallel); + uffd_stats_reset(gopts, args, gopts->nr_parallel); /* bounce pass */ if (stress(args)) { - uffd_test_ctx_clear(); + uffd_test_ctx_clear(gopts); return 1; } /* Clear all the write protections if there is any */ - if (test_uffdio_wp) - wp_range(uffd, (unsigned long)area_dst, - nr_pages * page_size, false); + if (gopts->test_uffdio_wp) + wp_range(gopts->uffd, (unsigned long)gopts->area_dst, + gopts->nr_pages * gopts->page_size, false); /* unregister */ - if (uffd_unregister(uffd, area_dst, mem_size)) + if (uffd_unregister(gopts->uffd, gopts->area_dst, mem_size)) err("unregister failure"); - if (area_dst_alias) { - if (uffd_unregister(uffd, area_dst_alias, mem_size)) + if (gopts->area_dst_alias) { + if (uffd_unregister(gopts->uffd, gopts->area_dst_alias, mem_size)) err("unregister failure alias"); } /* verification */ if (bounces & BOUNCE_VERIFY) - for (nr = 0; nr < nr_pages; nr++) - if (*area_count(area_dst, nr) != count_verify[nr]) + for (nr = 0; nr < gopts->nr_pages; nr++) + if (*area_count(gopts->area_dst, nr, gopts) != + gopts->count_verify[nr]) err("error area_count %llu %llu %lu\n", - *area_count(area_src, nr), - count_verify[nr], nr); + *area_count(gopts->area_src, nr, gopts), + gopts->count_verify[nr], nr); /* prepare next bounce */ - swap(area_src, area_dst); + swap(gopts->area_src, gopts->area_dst); - swap(area_src_alias, area_dst_alias); + swap(gopts->area_src_alias, gopts->area_dst_alias); - uffd_stats_report(args, nr_parallel); + uffd_stats_report(args, gopts->nr_parallel); } - uffd_test_ctx_clear(); + uffd_test_ctx_clear(gopts); return 0; } -static void set_test_type(const char *type) +static void set_test_type(uffd_global_test_opts_t *gopts, const char *type) { if (!strcmp(type, "anon")) { - test_type = TEST_ANON; + gopts->test_type = TEST_ANON; uffd_test_ops = &anon_uffd_test_ops; } else if (!strcmp(type, "hugetlb")) { - test_type = TEST_HUGETLB; + gopts->test_type = TEST_HUGETLB; uffd_test_ops = &hugetlb_uffd_test_ops; - map_shared = true; + gopts->map_shared = true; } else if (!strcmp(type, "hugetlb-private")) { - test_type = TEST_HUGETLB; + gopts->test_type = TEST_HUGETLB; uffd_test_ops = &hugetlb_uffd_test_ops; } else if (!strcmp(type, "shmem")) { - map_shared = true; - test_type = TEST_SHMEM; + gopts->map_shared = true; + gopts->test_type = TEST_SHMEM; uffd_test_ops = &shmem_uffd_test_ops; } else if (!strcmp(type, "shmem-private")) { - test_type = TEST_SHMEM; + gopts->test_type = TEST_SHMEM; uffd_test_ops = &shmem_uffd_test_ops; } } -static void parse_test_type_arg(const char *raw_type) +static void parse_test_type_arg(uffd_global_test_opts_t *gopts, const char *raw_type) { - set_test_type(raw_type); + set_test_type(gopts, raw_type); - if (!test_type) + if (!gopts->test_type) err("failed to parse test type argument: '%s'", raw_type); - if (test_type == TEST_HUGETLB) - page_size = default_huge_page_size(); + if (gopts->test_type == TEST_HUGETLB) + gopts->page_size = default_huge_page_size(); else - page_size = sysconf(_SC_PAGE_SIZE); + gopts->page_size = sysconf(_SC_PAGE_SIZE); - if (!page_size) + if (!gopts->page_size) err("Unable to determine page size"); - if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2 - > page_size) + if ((unsigned long) area_count(NULL, 0, gopts) + sizeof(unsigned long long) * 2 + > gopts->page_size) err("Impossible to run this test"); /* @@ -415,21 +433,21 @@ static void parse_test_type_arg(const char *raw_type) if (uffd_get_features(&features) && errno == ENOENT) ksft_exit_skip("failed to get available features (%d)\n", errno); - test_uffdio_wp = test_uffdio_wp && + gopts->test_uffdio_wp = gopts->test_uffdio_wp && (features & UFFD_FEATURE_PAGEFAULT_FLAG_WP); - if (test_type != TEST_ANON && !(features & UFFD_FEATURE_WP_HUGETLBFS_SHMEM)) - test_uffdio_wp = false; + if (gopts->test_type != TEST_ANON && !(features & UFFD_FEATURE_WP_HUGETLBFS_SHMEM)) + gopts->test_uffdio_wp = false; - close(uffd); - uffd = -1; + close(gopts->uffd); + gopts->uffd = -1; } static void sigalrm(int sig) { if (sig != SIGALRM) abort(); - test_uffdio_copy_eexist = true; + gopts->test_uffdio_copy_eexist = true; alarm(ALARM_INTERVAL_SECS); } @@ -438,6 +456,8 @@ int main(int argc, char **argv) unsigned long nr_cpus; size_t bytes; + gopts = (uffd_global_test_opts_t *) malloc(sizeof(uffd_global_test_opts_t)); + if (argc < 4) usage(); @@ -445,11 +465,11 @@ int main(int argc, char **argv) err("failed to arm SIGALRM"); alarm(ALARM_INTERVAL_SECS); - parse_test_type_arg(argv[1]); + parse_test_type_arg(gopts, argv[1]); bytes = atol(argv[2]) * 1024 * 1024; - if (test_type == TEST_HUGETLB && - get_free_hugepages() < bytes / page_size) { + if (gopts->test_type == TEST_HUGETLB && + get_free_hugepages() < bytes / gopts->page_size) { printf("skip: Skipping userfaultfd... not enough hugepages\n"); return KSFT_SKIP; } @@ -459,15 +479,15 @@ int main(int argc, char **argv) /* Don't let calculation below go to zero. */ ksft_print_msg("_SC_NPROCESSORS_ONLN (%lu) too large, capping nr_threads to 32\n", nr_cpus); - nr_parallel = 32; + gopts->nr_parallel = 32; } else { - nr_parallel = nr_cpus; + gopts->nr_parallel = nr_cpus; } - nr_pages_per_cpu = bytes / page_size / nr_parallel; - if (!nr_pages_per_cpu) { + gopts->nr_pages_per_cpu = bytes / gopts->page_size / gopts->nr_parallel; + if (!gopts->nr_pages_per_cpu) { _err("pages_per_cpu = 0, cannot test (%lu / %lu / %lu)", - bytes, page_size, nr_parallel); + bytes, gopts->page_size, gopts->nr_parallel); usage(); } @@ -476,11 +496,11 @@ int main(int argc, char **argv) _err("invalid bounces"); usage(); } - nr_pages = nr_pages_per_cpu * nr_parallel; + gopts->nr_pages = gopts->nr_pages_per_cpu * gopts->nr_parallel; printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n", - nr_pages, nr_pages_per_cpu); - return userfaultfd_stress(); + gopts->nr_pages, gopts->nr_pages_per_cpu); + return userfaultfd_stress(gopts); } #else /* __NR_userfaultfd */ diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index ee9b407e893e..101eaca2662e 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -76,7 +76,7 @@ struct uffd_test_args { typedef struct uffd_test_args uffd_test_args_t; /* Returns: UFFD_TEST_* */ -typedef void (*uffd_test_fn)(uffd_test_args_t *); +typedef void (*uffd_test_fn)(uffd_global_test_opts_t *, uffd_test_args_t *); typedef struct { const char *name; @@ -181,33 +181,6 @@ static int test_uffd_api(bool use_dev) return 1; } -/* - * This function initializes the global variables. TODO: remove global - * vars and then remove this. - */ -static int -uffd_setup_environment(uffd_test_args_t *args, uffd_test_case_t *test, - mem_type_t *mem_type, const char **errmsg) -{ - map_shared = mem_type->shared; - uffd_test_ops = mem_type->mem_ops; - uffd_test_case_ops = test->test_case_ops; - - if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB)) - page_size = default_huge_page_size(); - else - page_size = psize(); - - /* Ensure we have at least 2 pages */ - nr_pages = MAX(UFFD_TEST_MEM_SIZE, page_size * 2) / page_size; - /* TODO: remove this global var.. it's so ugly */ - nr_parallel = 1; - - /* Initialize test arguments */ - args->mem_type = mem_type; - - return uffd_test_ctx_init(test->uffd_feature_required, errmsg); -} static bool uffd_feature_supported(uffd_test_case_t *test) { @@ -237,7 +210,8 @@ static int pagemap_open(void) } while (0) typedef struct { - int parent_uffd, child_uffd; + uffd_global_test_opts_t *gopts; + int child_uffd; } fork_event_args; static void *fork_event_consumer(void *data) @@ -245,10 +219,10 @@ static void *fork_event_consumer(void *data) fork_event_args *args = data; struct uffd_msg msg = { 0 }; - ready_for_fork = true; + args->gopts->ready_for_fork = true; /* Read until a full msg received */ - while (uffd_read_msg(&msg)); + while (uffd_read_msg(args->gopts, &msg)); if (msg.event != UFFD_EVENT_FORK) err("wrong message: %u\n", msg.event); @@ -304,9 +278,9 @@ static void unpin_pages(pin_args *args) args->pinned = false; } -static int pagemap_test_fork(int uffd, bool with_event, bool test_pin) +static int pagemap_test_fork(uffd_global_test_opts_t *gopts, bool with_event, bool test_pin) { - fork_event_args args = { .parent_uffd = uffd, .child_uffd = -1 }; + fork_event_args args = { .gopts = gopts, .child_uffd = -1 }; pthread_t thread; pid_t child; uint64_t value; @@ -314,10 +288,10 @@ static int pagemap_test_fork(int uffd, bool with_event, bool test_pin) /* Prepare a thread to resolve EVENT_FORK */ if (with_event) { - ready_for_fork = false; + gopts->ready_for_fork = false; if (pthread_create(&thread, NULL, fork_event_consumer, &args)) err("pthread_create()"); - while (!ready_for_fork) + while (!gopts->ready_for_fork) ; /* Wait for the poll_thread to start executing before forking */ } @@ -328,14 +302,14 @@ static int pagemap_test_fork(int uffd, bool with_event, bool test_pin) fd = pagemap_open(); - if (test_pin && pin_pages(&args, area_dst, page_size)) + if (test_pin && pin_pages(&args, gopts->area_dst, gopts->page_size)) /* * Normally when reach here we have pinned in * previous tests, so shouldn't fail anymore */ err("pin page failed in child"); - value = pagemap_get_entry(fd, area_dst); + value = pagemap_get_entry(fd, gopts->area_dst); /* * After fork(), we should handle uffd-wp bit differently: * @@ -361,70 +335,71 @@ static int pagemap_test_fork(int uffd, bool with_event, bool test_pin) return result; } -static void uffd_wp_unpopulated_test(uffd_test_args_t __unused *args) +static void uffd_wp_unpopulated_test(uffd_global_test_opts_t *gopts, + uffd_test_args_t __unused *args) { uint64_t value; int pagemap_fd; - if (uffd_register(uffd, area_dst, nr_pages * page_size, + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, false, true, false)) err("register failed"); pagemap_fd = pagemap_open(); /* Test applying pte marker to anon unpopulated */ - wp_range(uffd, (uint64_t)area_dst, page_size, true); - value = pagemap_get_entry(pagemap_fd, area_dst); + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true); + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, true); /* Test unprotect on anon pte marker */ - wp_range(uffd, (uint64_t)area_dst, page_size, false); - value = pagemap_get_entry(pagemap_fd, area_dst); + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, false); + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, false); /* Test zap on anon marker */ - wp_range(uffd, (uint64_t)area_dst, page_size, true); - if (madvise(area_dst, page_size, MADV_DONTNEED)) + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true); + if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED)) err("madvise(MADV_DONTNEED) failed"); - value = pagemap_get_entry(pagemap_fd, area_dst); + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, false); /* Test fault in after marker removed */ - *area_dst = 1; - value = pagemap_get_entry(pagemap_fd, area_dst); + *gopts->area_dst = 1; + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, false); /* Drop it to make pte none again */ - if (madvise(area_dst, page_size, MADV_DONTNEED)) + if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED)) err("madvise(MADV_DONTNEED) failed"); /* Test read-zero-page upon pte marker */ - wp_range(uffd, (uint64_t)area_dst, page_size, true); - *(volatile char *)area_dst; + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true); + *(volatile char *)gopts->area_dst; /* Drop it to make pte none again */ - if (madvise(area_dst, page_size, MADV_DONTNEED)) + if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED)) err("madvise(MADV_DONTNEED) failed"); uffd_test_pass(); } -static void uffd_wp_fork_test_common(uffd_test_args_t *args, +static void uffd_wp_fork_test_common(uffd_global_test_opts_t *gopts, uffd_test_args_t *args, bool with_event) { int pagemap_fd; uint64_t value; - if (uffd_register(uffd, area_dst, nr_pages * page_size, + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, false, true, false)) err("register failed"); pagemap_fd = pagemap_open(); /* Touch the page */ - *area_dst = 1; - wp_range(uffd, (uint64_t)area_dst, page_size, true); - value = pagemap_get_entry(pagemap_fd, area_dst); + *gopts->area_dst = 1; + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true); + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, true); - if (pagemap_test_fork(uffd, with_event, false)) { + if (pagemap_test_fork(gopts, with_event, false)) { uffd_test_fail("Detected %s uffd-wp bit in child in present pte", with_event ? "missing" : "stall"); goto out; @@ -442,79 +417,80 @@ static void uffd_wp_fork_test_common(uffd_test_args_t *args, * to expose pte markers. */ if (args->mem_type->shared) { - if (madvise(area_dst, page_size, MADV_DONTNEED)) + if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED)) err("MADV_DONTNEED"); } else { /* * NOTE: ignore retval because private-hugetlb doesn't yet * support swapping, so it could fail. */ - madvise(area_dst, page_size, MADV_PAGEOUT); + madvise(gopts->area_dst, gopts->page_size, MADV_PAGEOUT); } /* Uffd-wp should persist even swapped out */ - value = pagemap_get_entry(pagemap_fd, area_dst); + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, true); - if (pagemap_test_fork(uffd, with_event, false)) { + if (pagemap_test_fork(gopts, with_event, false)) { uffd_test_fail("Detected %s uffd-wp bit in child in zapped pte", with_event ? "missing" : "stall"); goto out; } /* Unprotect; this tests swap pte modifications */ - wp_range(uffd, (uint64_t)area_dst, page_size, false); - value = pagemap_get_entry(pagemap_fd, area_dst); + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, false); + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, false); /* Fault in the page from disk */ - *area_dst = 2; - value = pagemap_get_entry(pagemap_fd, area_dst); + *gopts->area_dst = 2; + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, false); uffd_test_pass(); out: - if (uffd_unregister(uffd, area_dst, nr_pages * page_size)) + if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size)) err("unregister failed"); close(pagemap_fd); } -static void uffd_wp_fork_test(uffd_test_args_t *args) +static void uffd_wp_fork_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_wp_fork_test_common(args, false); + uffd_wp_fork_test_common(gopts, args, false); } -static void uffd_wp_fork_with_event_test(uffd_test_args_t *args) +static void uffd_wp_fork_with_event_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_wp_fork_test_common(args, true); + uffd_wp_fork_test_common(gopts, args, true); } -static void uffd_wp_fork_pin_test_common(uffd_test_args_t __unused *args, +static void uffd_wp_fork_pin_test_common(uffd_global_test_opts_t *gopts, + uffd_test_args_t __unused *args, bool with_event) { int pagemap_fd; pin_args pin_args = {}; - if (uffd_register(uffd, area_dst, page_size, false, true, false)) + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->page_size, false, true, false)) err("register failed"); pagemap_fd = pagemap_open(); /* Touch the page */ - *area_dst = 1; - wp_range(uffd, (uint64_t)area_dst, page_size, true); + *gopts->area_dst = 1; + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true); /* * 1. First pin, then fork(). This tests fork() special path when * doing early CoW if the page is private. */ - if (pin_pages(&pin_args, area_dst, page_size)) { + if (pin_pages(&pin_args, gopts->area_dst, gopts->page_size)) { uffd_test_skip("Possibly CONFIG_GUP_TEST missing " "or unprivileged"); close(pagemap_fd); - uffd_unregister(uffd, area_dst, page_size); + uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size); return; } - if (pagemap_test_fork(uffd, with_event, false)) { + if (pagemap_test_fork(gopts, with_event, false)) { uffd_test_fail("Detected %s uffd-wp bit in early CoW of fork()", with_event ? "missing" : "stall"); unpin_pages(&pin_args); @@ -527,49 +503,50 @@ static void uffd_wp_fork_pin_test_common(uffd_test_args_t __unused *args, * 2. First fork(), then pin (in the child, where test_pin==true). * This tests COR, aka, page unsharing on private memories. */ - if (pagemap_test_fork(uffd, with_event, true)) { + if (pagemap_test_fork(gopts, with_event, true)) { uffd_test_fail("Detected %s uffd-wp bit when RO pin", with_event ? "missing" : "stall"); goto out; } uffd_test_pass(); out: - if (uffd_unregister(uffd, area_dst, page_size)) + if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size)) err("register failed"); close(pagemap_fd); } -static void uffd_wp_fork_pin_test(uffd_test_args_t *args) +static void uffd_wp_fork_pin_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_wp_fork_pin_test_common(args, false); + uffd_wp_fork_pin_test_common(gopts, args, false); } -static void uffd_wp_fork_pin_with_event_test(uffd_test_args_t *args) +static void uffd_wp_fork_pin_with_event_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_wp_fork_pin_test_common(args, true); + uffd_wp_fork_pin_test_common(gopts, args, true); } -static void check_memory_contents(char *p) +static void check_memory_contents(uffd_global_test_opts_t *gopts, char *p) { unsigned long i, j; uint8_t expected_byte; - for (i = 0; i < nr_pages; ++i) { + for (i = 0; i < gopts->nr_pages; ++i) { expected_byte = ~((uint8_t)(i % ((uint8_t)-1))); - for (j = 0; j < page_size; j++) { - uint8_t v = *(uint8_t *)(p + (i * page_size) + j); + for (j = 0; j < gopts->page_size; j++) { + uint8_t v = *(uint8_t *)(p + (i * gopts->page_size) + j); if (v != expected_byte) err("unexpected page contents"); } } } -static void uffd_minor_test_common(bool test_collapse, bool test_wp) +static void uffd_minor_test_common(uffd_global_test_opts_t *gopts, bool test_collapse, bool test_wp) { unsigned long p; pthread_t uffd_mon; char c; struct uffd_args args = { 0 }; + args.gopts = gopts; /* * NOTE: MADV_COLLAPSE is not yet compatible with WP, so testing @@ -577,7 +554,7 @@ static void uffd_minor_test_common(bool test_collapse, bool test_wp) */ assert(!(test_collapse && test_wp)); - if (uffd_register(uffd, area_dst_alias, nr_pages * page_size, + if (uffd_register(gopts->uffd, gopts->area_dst_alias, gopts->nr_pages * gopts->page_size, /* NOTE! MADV_COLLAPSE may not work with uffd-wp */ false, test_wp, true)) err("register failure"); @@ -586,9 +563,9 @@ static void uffd_minor_test_common(bool test_collapse, bool test_wp) * After registering with UFFD, populate the non-UFFD-registered side of * the shared mapping. This should *not* trigger any UFFD minor faults. */ - for (p = 0; p < nr_pages; ++p) - memset(area_dst + (p * page_size), p % ((uint8_t)-1), - page_size); + for (p = 0; p < gopts->nr_pages; ++p) + memset(gopts->area_dst + (p * gopts->page_size), p % ((uint8_t)-1), + gopts->page_size); args.apply_wp = test_wp; if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) @@ -600,50 +577,51 @@ static void uffd_minor_test_common(bool test_collapse, bool test_wp) * fault. uffd_poll_thread will resolve the fault by bit-flipping the * page's contents, and then issuing a CONTINUE ioctl. */ - check_memory_contents(area_dst_alias); + check_memory_contents(gopts, gopts->area_dst_alias); - if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c)) err("pipe write"); if (pthread_join(uffd_mon, NULL)) err("join() failed"); if (test_collapse) { - if (madvise(area_dst_alias, nr_pages * page_size, + if (madvise(gopts->area_dst_alias, gopts->nr_pages * gopts->page_size, MADV_COLLAPSE)) { /* It's fine to fail for this one... */ uffd_test_skip("MADV_COLLAPSE failed"); return; } - uffd_test_ops->check_pmd_mapping(area_dst, - nr_pages * page_size / + uffd_test_ops->check_pmd_mapping(gopts, + gopts->area_dst, + gopts->nr_pages * gopts->page_size / read_pmd_pagesize()); /* * This won't cause uffd-fault - it purely just makes sure there * was no corruption. */ - check_memory_contents(area_dst_alias); + check_memory_contents(gopts, gopts->area_dst_alias); } - if (args.missing_faults != 0 || args.minor_faults != nr_pages) + if (args.missing_faults != 0 || args.minor_faults != gopts->nr_pages) uffd_test_fail("stats check error"); else uffd_test_pass(); } -void uffd_minor_test(uffd_test_args_t __unused *args) +void uffd_minor_test(uffd_global_test_opts_t *gopts, uffd_test_args_t __unused *args) { - uffd_minor_test_common(false, false); + uffd_minor_test_common(gopts, false, false); } -void uffd_minor_wp_test(uffd_test_args_t __unused *args) +void uffd_minor_wp_test(uffd_global_test_opts_t *gopts, uffd_test_args_t __unused *args) { - uffd_minor_test_common(false, true); + uffd_minor_test_common(gopts, false, true); } -void uffd_minor_collapse_test(uffd_test_args_t __unused *args) +void uffd_minor_collapse_test(uffd_global_test_opts_t *gopts, uffd_test_args_t __unused *args) { - uffd_minor_test_common(true, false); + uffd_minor_test_common(gopts, true, false); } static sigjmp_buf jbuf, *sigbuf; @@ -678,7 +656,7 @@ static void sighndl(int sig, siginfo_t __unused *siginfo, void __unused *ptr) * This also tests UFFD_FEATURE_EVENT_FORK event along with the signal * feature. Using monitor thread, verify no userfault events are generated. */ -static int faulting_process(int signal_test, bool wp) +static int faulting_process(uffd_global_test_opts_t *gopts, int signal_test, bool wp) { unsigned long nr, i; unsigned long long count; @@ -687,7 +665,7 @@ static int faulting_process(int signal_test, bool wp) struct sigaction act; volatile unsigned long signalled = 0; - split_nr_pages = (nr_pages + 1) / 2; + split_nr_pages = (gopts->nr_pages + 1) / 2; if (signal_test) { sigbuf = &jbuf; @@ -701,7 +679,7 @@ static int faulting_process(int signal_test, bool wp) for (nr = 0; nr < split_nr_pages; nr++) { volatile int steps = 1; - unsigned long offset = nr * page_size; + unsigned long offset = nr * gopts->page_size; if (signal_test) { if (sigsetjmp(*sigbuf, 1) != 0) { @@ -713,15 +691,15 @@ static int faulting_process(int signal_test, bool wp) if (steps == 1) { /* This is a MISSING request */ steps++; - if (copy_page(uffd, offset, wp)) + if (copy_page(gopts, offset, wp)) signalled++; } else { /* This is a WP request */ assert(steps == 2); - wp_range(uffd, - (__u64)area_dst + + wp_range(gopts->uffd, + (__u64)gopts->area_dst + offset, - page_size, false); + gopts->page_size, false); } } else { signalled++; @@ -730,51 +708,53 @@ static int faulting_process(int signal_test, bool wp) } } - count = *area_count(area_dst, nr); - if (count != count_verify[nr]) + count = *area_count(gopts->area_dst, nr, gopts); + if (count != gopts->count_verify[nr]) err("nr %lu memory corruption %llu %llu\n", - nr, count, count_verify[nr]); + nr, count, gopts->count_verify[nr]); /* * Trigger write protection if there is by writing * the same value back. */ - *area_count(area_dst, nr) = count; + *area_count(gopts->area_dst, nr, gopts) = count; } if (signal_test) return signalled != split_nr_pages; - area_dst = mremap(area_dst, nr_pages * page_size, nr_pages * page_size, - MREMAP_MAYMOVE | MREMAP_FIXED, area_src); - if (area_dst == MAP_FAILED) + gopts->area_dst = mremap(gopts->area_dst, gopts->nr_pages * gopts->page_size, + gopts->nr_pages * gopts->page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, + gopts->area_src); + if (gopts->area_dst == MAP_FAILED) err("mremap"); /* Reset area_src since we just clobbered it */ - area_src = NULL; + gopts->area_src = NULL; - for (; nr < nr_pages; nr++) { - count = *area_count(area_dst, nr); - if (count != count_verify[nr]) { + for (; nr < gopts->nr_pages; nr++) { + count = *area_count(gopts->area_dst, nr, gopts); + if (count != gopts->count_verify[nr]) { err("nr %lu memory corruption %llu %llu\n", - nr, count, count_verify[nr]); + nr, count, gopts->count_verify[nr]); } /* * Trigger write protection if there is by writing * the same value back. */ - *area_count(area_dst, nr) = count; + *area_count(gopts->area_dst, nr, gopts) = count; } - uffd_test_ops->release_pages(area_dst); + uffd_test_ops->release_pages(gopts, gopts->area_dst); - for (nr = 0; nr < nr_pages; nr++) - for (i = 0; i < page_size; i++) - if (*(area_dst + nr * page_size + i) != 0) + for (nr = 0; nr < gopts->nr_pages; nr++) + for (i = 0; i < gopts->page_size; i++) + if (*(gopts->area_dst + nr * gopts->page_size + i) != 0) err("page %lu offset %lu is not zero", nr, i); return 0; } -static void uffd_sigbus_test_common(bool wp) +static void uffd_sigbus_test_common(uffd_global_test_opts_t *gopts, bool wp) { unsigned long userfaults; pthread_t uffd_mon; @@ -782,25 +762,26 @@ static void uffd_sigbus_test_common(bool wp) int err; char c; struct uffd_args args = { 0 }; + args.gopts = gopts; - ready_for_fork = false; + gopts->ready_for_fork = false; - fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); + fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK); - if (uffd_register(uffd, area_dst, nr_pages * page_size, + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, true, wp, false)) err("register failure"); - if (faulting_process(1, wp)) + if (faulting_process(gopts, 1, wp)) err("faulting process failed"); - uffd_test_ops->release_pages(area_dst); + uffd_test_ops->release_pages(gopts, gopts->area_dst); args.apply_wp = wp; if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) err("uffd_poll_thread create"); - while (!ready_for_fork) + while (!gopts->ready_for_fork) ; /* Wait for the poll_thread to start executing before forking */ pid = fork(); @@ -808,12 +789,12 @@ static void uffd_sigbus_test_common(bool wp) err("fork"); if (!pid) - exit(faulting_process(2, wp)); + exit(faulting_process(gopts, 2, wp)); waitpid(pid, &err, 0); if (err) err("faulting process failed"); - if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c)) err("pipe write"); if (pthread_join(uffd_mon, (void **)&userfaults)) err("pthread_join()"); @@ -824,28 +805,29 @@ static void uffd_sigbus_test_common(bool wp) uffd_test_pass(); } -static void uffd_sigbus_test(uffd_test_args_t __unused *args) +static void uffd_sigbus_test(uffd_global_test_opts_t *gopts, uffd_test_args_t __unused *args) { - uffd_sigbus_test_common(false); + uffd_sigbus_test_common(gopts, false); } -static void uffd_sigbus_wp_test(uffd_test_args_t __unused *args) +static void uffd_sigbus_wp_test(uffd_global_test_opts_t *gopts, uffd_test_args_t __unused *args) { - uffd_sigbus_test_common(true); + uffd_sigbus_test_common(gopts, true); } -static void uffd_events_test_common(bool wp) +static void uffd_events_test_common(uffd_global_test_opts_t *gopts, bool wp) { pthread_t uffd_mon; pid_t pid; int err; char c; struct uffd_args args = { 0 }; + args.gopts = gopts; - ready_for_fork = false; + gopts->ready_for_fork = false; - fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); - if (uffd_register(uffd, area_dst, nr_pages * page_size, + fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK); + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, true, wp, false)) err("register failure"); @@ -853,7 +835,7 @@ static void uffd_events_test_common(bool wp) if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) err("uffd_poll_thread create"); - while (!ready_for_fork) + while (!gopts->ready_for_fork) ; /* Wait for the poll_thread to start executing before forking */ pid = fork(); @@ -861,39 +843,39 @@ static void uffd_events_test_common(bool wp) err("fork"); if (!pid) - exit(faulting_process(0, wp)); + exit(faulting_process(gopts, 0, wp)); waitpid(pid, &err, 0); if (err) err("faulting process failed"); - if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c)) err("pipe write"); if (pthread_join(uffd_mon, NULL)) err("pthread_join()"); - if (args.missing_faults != nr_pages) + if (args.missing_faults != gopts->nr_pages) uffd_test_fail("Fault counts wrong"); else uffd_test_pass(); } -static void uffd_events_test(uffd_test_args_t __unused *args) +static void uffd_events_test(uffd_global_test_opts_t *gopts, uffd_test_args_t __unused *args) { - uffd_events_test_common(false); + uffd_events_test_common(gopts, false); } -static void uffd_events_wp_test(uffd_test_args_t __unused *args) +static void uffd_events_wp_test(uffd_global_test_opts_t *gopts, uffd_test_args_t __unused *args) { - uffd_events_test_common(true); + uffd_events_test_common(gopts, true); } -static void retry_uffdio_zeropage(int ufd, +static void retry_uffdio_zeropage(uffd_global_test_opts_t *gopts, struct uffdio_zeropage *uffdio_zeropage) { - uffd_test_ops->alias_mapping(&uffdio_zeropage->range.start, + uffd_test_ops->alias_mapping(gopts, &uffdio_zeropage->range.start, uffdio_zeropage->range.len, 0); - if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) { + if (ioctl(gopts->uffd, UFFDIO_ZEROPAGE, uffdio_zeropage)) { if (uffdio_zeropage->zeropage != -EEXIST) err("UFFDIO_ZEROPAGE error: %"PRId64, (int64_t)uffdio_zeropage->zeropage); @@ -903,16 +885,16 @@ static void retry_uffdio_zeropage(int ufd, } } -static bool do_uffdio_zeropage(int ufd, bool has_zeropage) +static bool do_uffdio_zeropage(uffd_global_test_opts_t *gopts, bool has_zeropage) { struct uffdio_zeropage uffdio_zeropage = { 0 }; int ret; __s64 res; - uffdio_zeropage.range.start = (unsigned long) area_dst; - uffdio_zeropage.range.len = page_size; + uffdio_zeropage.range.start = (unsigned long) gopts->area_dst; + uffdio_zeropage.range.len = gopts->page_size; uffdio_zeropage.mode = 0; - ret = ioctl(ufd, UFFDIO_ZEROPAGE, &uffdio_zeropage); + ret = ioctl(gopts->uffd, UFFDIO_ZEROPAGE, &uffdio_zeropage); res = uffdio_zeropage.zeropage; if (ret) { /* real retval in ufdio_zeropage.zeropage */ @@ -921,10 +903,10 @@ static bool do_uffdio_zeropage(int ufd, bool has_zeropage) else if (res != -EINVAL) err("UFFDIO_ZEROPAGE not -EINVAL"); } else if (has_zeropage) { - if (res != page_size) + if (res != gopts->page_size) err("UFFDIO_ZEROPAGE unexpected size"); else - retry_uffdio_zeropage(ufd, &uffdio_zeropage); + retry_uffdio_zeropage(gopts, &uffdio_zeropage); return true; } else err("UFFDIO_ZEROPAGE succeeded"); @@ -950,25 +932,29 @@ uffd_register_detect_zeropage(int uffd, void *addr, uint64_t len) } /* exercise UFFDIO_ZEROPAGE */ -static void uffd_zeropage_test(uffd_test_args_t __unused *args) +static void uffd_zeropage_test(uffd_global_test_opts_t *gopts, uffd_test_args_t __unused *args) { bool has_zeropage; int i; - has_zeropage = uffd_register_detect_zeropage(uffd, area_dst, page_size); - if (area_dst_alias) + has_zeropage = uffd_register_detect_zeropage(gopts->uffd, + gopts->area_dst, + gopts->page_size); + if (gopts->area_dst_alias) /* Ignore the retval; we already have it */ - uffd_register_detect_zeropage(uffd, area_dst_alias, page_size); + uffd_register_detect_zeropage(gopts->uffd, gopts->area_dst_alias, gopts->page_size); - if (do_uffdio_zeropage(uffd, has_zeropage)) - for (i = 0; i < page_size; i++) - if (area_dst[i] != 0) + if (do_uffdio_zeropage(gopts, has_zeropage)) + for (i = 0; i < gopts->page_size; i++) + if (gopts->area_dst[i] != 0) err("data non-zero at offset %d\n", i); - if (uffd_unregister(uffd, area_dst, page_size)) + if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size)) err("unregister"); - if (area_dst_alias && uffd_unregister(uffd, area_dst_alias, page_size)) + if (gopts->area_dst_alias && uffd_unregister(gopts->uffd, + gopts->area_dst_alias, + gopts->page_size)) err("unregister"); uffd_test_pass(); @@ -987,26 +973,27 @@ static void uffd_register_poison(int uffd, void *addr, uint64_t len) err("registered area doesn't support COPY and POISON ioctls"); } -static void do_uffdio_poison(int uffd, unsigned long offset) +static void do_uffdio_poison(uffd_global_test_opts_t *gopts, unsigned long offset) { struct uffdio_poison uffdio_poison = { 0 }; int ret; __s64 res; - uffdio_poison.range.start = (unsigned long) area_dst + offset; - uffdio_poison.range.len = page_size; + uffdio_poison.range.start = (unsigned long) gopts->area_dst + offset; + uffdio_poison.range.len = gopts->page_size; uffdio_poison.mode = 0; - ret = ioctl(uffd, UFFDIO_POISON, &uffdio_poison); + ret = ioctl(gopts->uffd, UFFDIO_POISON, &uffdio_poison); res = uffdio_poison.updated; if (ret) err("UFFDIO_POISON error: %"PRId64, (int64_t)res); - else if (res != page_size) + else if (res != gopts->page_size) err("UFFDIO_POISON unexpected size: %"PRId64, (int64_t)res); } -static void uffd_poison_handle_fault( - struct uffd_msg *msg, struct uffd_args __unused *args) +static void uffd_poison_handle_fault(uffd_global_test_opts_t *gopts, + struct uffd_msg *msg, + struct uffd_args __unused *args) { unsigned long offset; @@ -1017,20 +1004,20 @@ static void uffd_poison_handle_fault( (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR)) err("unexpected fault type %llu", msg->arg.pagefault.flags); - offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst; - offset &= ~(page_size-1); + offset = (char *)(unsigned long)msg->arg.pagefault.address - gopts->area_dst; + offset &= ~(gopts->page_size-1); /* Odd pages -> copy zeroed page; even pages -> poison. */ - if (offset & page_size) - copy_page(uffd, offset, false); + if (offset & gopts->page_size) + copy_page(gopts, offset, false); else - do_uffdio_poison(uffd, offset); + do_uffdio_poison(gopts, offset); } /* Make sure to cover odd/even, and minimum duplications */ #define UFFD_POISON_TEST_NPAGES 4 -static void uffd_poison_test(uffd_test_args_t __unused *targs) +static void uffd_poison_test(uffd_global_test_opts_t *gopts, uffd_test_args_t __unused *targs) { pthread_t uffd_mon; char c; @@ -1039,15 +1026,17 @@ static void uffd_poison_test(uffd_test_args_t __unused *targs) unsigned long nr_sigbus = 0; unsigned long nr, poison_pages = UFFD_POISON_TEST_NPAGES; - if (nr_pages < poison_pages) { - uffd_test_skip("Too few pages for POISON test"); + if (gopts->nr_pages < poison_pages) { + uffd_test_skip("Too less pages for POISON test"); return; } - fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); + args.gopts = gopts; + + fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK); - uffd_register_poison(uffd, area_dst, poison_pages * page_size); - memset(area_src, 0, poison_pages * page_size); + uffd_register_poison(gopts->uffd, gopts->area_dst, poison_pages * gopts->page_size); + memset(gopts->area_src, 0, poison_pages * gopts->page_size); args.handle_fault = uffd_poison_handle_fault; if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) @@ -1060,8 +1049,8 @@ static void uffd_poison_test(uffd_test_args_t __unused *targs) err("sigaction"); for (nr = 0; nr < poison_pages; ++nr) { - unsigned long offset = nr * page_size; - const char *bytes = (const char *) area_dst + offset; + unsigned long offset = nr * gopts->page_size; + const char *bytes = (const char *) gopts->area_dst + offset; const char *i; if (sigsetjmp(*sigbuf, 1)) { @@ -1074,14 +1063,14 @@ static void uffd_poison_test(uffd_test_args_t __unused *targs) continue; } - for (i = bytes; i < bytes + page_size; ++i) { + for (i = bytes; i < bytes + gopts->page_size; ++i) { if (*i) err("nonzero byte in area_dst (%p) at %p: %u", - area_dst, i, *i); + gopts->area_dst, i, *i); } } - if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c)) err("pipe write"); if (pthread_join(uffd_mon, NULL)) err("pthread_join()"); @@ -1094,7 +1083,9 @@ static void uffd_poison_test(uffd_test_args_t __unused *targs) } static void -uffd_move_handle_fault_common(struct uffd_msg *msg, struct uffd_args *args, +uffd_move_handle_fault_common(uffd_global_test_opts_t *gopts, + struct uffd_msg *msg, + struct uffd_args *args, unsigned long len) { unsigned long offset; @@ -1106,28 +1097,32 @@ uffd_move_handle_fault_common(struct uffd_msg *msg, struct uffd_args *args, (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR | UFFD_PAGEFAULT_FLAG_WRITE)) err("unexpected fault type %llu", msg->arg.pagefault.flags); - offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst; + offset = (char *)(unsigned long)msg->arg.pagefault.address - gopts->area_dst; offset &= ~(len-1); - if (move_page(uffd, offset, len)) + if (move_page(gopts, offset, len)) args->missing_faults++; } -static void uffd_move_handle_fault(struct uffd_msg *msg, +static void uffd_move_handle_fault(uffd_global_test_opts_t *gopts, struct uffd_msg *msg, struct uffd_args *args) { - uffd_move_handle_fault_common(msg, args, page_size); + uffd_move_handle_fault_common(gopts, msg, args, gopts->page_size); } -static void uffd_move_pmd_handle_fault(struct uffd_msg *msg, +static void uffd_move_pmd_handle_fault(uffd_global_test_opts_t *gopts, struct uffd_msg *msg, struct uffd_args *args) { - uffd_move_handle_fault_common(msg, args, read_pmd_pagesize()); + uffd_move_handle_fault_common(gopts, msg, args, read_pmd_pagesize()); } static void -uffd_move_test_common(uffd_test_args_t __unused *targs, unsigned long chunk_size, - void (*handle_fault)(struct uffd_msg *msg, struct uffd_args *args)) +uffd_move_test_common(uffd_global_test_opts_t *gopts, + uffd_test_args_t __unused *targs, + unsigned long chunk_size, + void (*handle_fault)(struct uffd_global_test_opts *gopts, + struct uffd_msg *msg, struct uffd_args *args) +) { unsigned long nr; pthread_t uffd_mon; @@ -1139,11 +1134,13 @@ uffd_move_test_common(uffd_test_args_t __unused *targs, unsigned long chunk_size unsigned long src_offs = 0; unsigned long dst_offs = 0; + args.gopts = gopts; + /* Prevent source pages from being mapped more than once */ - if (madvise(area_src, nr_pages * page_size, MADV_DONTFORK)) + if (madvise(gopts->area_src, gopts->nr_pages * gopts->page_size, MADV_DONTFORK)) err("madvise(MADV_DONTFORK) failure"); - if (uffd_register(uffd, area_dst, nr_pages * page_size, + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, true, false, false)) err("register failure"); @@ -1151,22 +1148,22 @@ uffd_move_test_common(uffd_test_args_t __unused *targs, unsigned long chunk_size if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) err("uffd_poll_thread create"); - step_size = chunk_size / page_size; - step_count = nr_pages / step_size; + step_size = chunk_size / gopts->page_size; + step_count = gopts->nr_pages / step_size; - if (chunk_size > page_size) { - char *aligned_src = ALIGN_UP(area_src, chunk_size); - char *aligned_dst = ALIGN_UP(area_dst, chunk_size); + if (chunk_size > gopts->page_size) { + char *aligned_src = ALIGN_UP(gopts->area_src, chunk_size); + char *aligned_dst = ALIGN_UP(gopts->area_dst, chunk_size); - if (aligned_src != area_src || aligned_dst != area_dst) { - src_offs = (aligned_src - area_src) / page_size; - dst_offs = (aligned_dst - area_dst) / page_size; + if (aligned_src != gopts->area_src || aligned_dst != gopts->area_dst) { + src_offs = (aligned_src - gopts->area_src) / gopts->page_size; + dst_offs = (aligned_dst - gopts->area_dst) / gopts->page_size; step_count--; } - orig_area_src = area_src; - orig_area_dst = area_dst; - area_src = aligned_src; - area_dst = aligned_dst; + orig_area_src = gopts->area_src; + orig_area_dst = gopts->area_dst; + gopts->area_src = aligned_src; + gopts->area_dst = aligned_dst; } /* @@ -1180,34 +1177,34 @@ uffd_move_test_common(uffd_test_args_t __unused *targs, unsigned long chunk_size /* Check area_src content */ for (i = 0; i < step_size; i++) { - count = *area_count(area_src, nr + i); - if (count != count_verify[src_offs + nr + i]) + count = *area_count(gopts->area_src, nr + i, gopts); + if (count != gopts->count_verify[src_offs + nr + i]) err("nr %lu source memory invalid %llu %llu\n", - nr + i, count, count_verify[src_offs + nr + i]); + nr + i, count, gopts->count_verify[src_offs + nr + i]); } /* Faulting into area_dst should move the page or the huge page */ for (i = 0; i < step_size; i++) { - count = *area_count(area_dst, nr + i); - if (count != count_verify[dst_offs + nr + i]) + count = *area_count(gopts->area_dst, nr + i, gopts); + if (count != gopts->count_verify[dst_offs + nr + i]) err("nr %lu memory corruption %llu %llu\n", - nr, count, count_verify[dst_offs + nr + i]); + nr, count, gopts->count_verify[dst_offs + nr + i]); } /* Re-check area_src content which should be empty */ for (i = 0; i < step_size; i++) { - count = *area_count(area_src, nr + i); + count = *area_count(gopts->area_src, nr + i, gopts); if (count != 0) err("nr %lu move failed %llu %llu\n", - nr, count, count_verify[src_offs + nr + i]); + nr, count, gopts->count_verify[src_offs + nr + i]); } } - if (chunk_size > page_size) { - area_src = orig_area_src; - area_dst = orig_area_dst; + if (chunk_size > gopts->page_size) { + gopts->area_src = orig_area_src; + gopts->area_dst = orig_area_dst; } - if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c)) err("pipe write"); if (pthread_join(uffd_mon, NULL)) err("join() failed"); @@ -1218,24 +1215,24 @@ uffd_move_test_common(uffd_test_args_t __unused *targs, unsigned long chunk_size uffd_test_pass(); } -static void uffd_move_test(uffd_test_args_t *targs) +static void uffd_move_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs) { - uffd_move_test_common(targs, page_size, uffd_move_handle_fault); + uffd_move_test_common(gopts, targs, gopts->page_size, uffd_move_handle_fault); } -static void uffd_move_pmd_test(uffd_test_args_t *targs) +static void uffd_move_pmd_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs) { - if (madvise(area_dst, nr_pages * page_size, MADV_HUGEPAGE)) + if (madvise(gopts->area_dst, gopts->nr_pages * gopts->page_size, MADV_HUGEPAGE)) err("madvise(MADV_HUGEPAGE) failure"); - uffd_move_test_common(targs, read_pmd_pagesize(), + uffd_move_test_common(gopts, targs, read_pmd_pagesize(), uffd_move_pmd_handle_fault); } -static void uffd_move_pmd_split_test(uffd_test_args_t *targs) +static void uffd_move_pmd_split_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs) { - if (madvise(area_dst, nr_pages * page_size, MADV_NOHUGEPAGE)) + if (madvise(gopts->area_dst, gopts->nr_pages * gopts->page_size, MADV_NOHUGEPAGE)) err("madvise(MADV_NOHUGEPAGE) failure"); - uffd_move_test_common(targs, read_pmd_pagesize(), + uffd_move_test_common(gopts, targs, read_pmd_pagesize(), uffd_move_pmd_handle_fault); } @@ -1295,6 +1292,11 @@ typedef enum { THR_STATE_UNINTERRUPTIBLE, } thread_state; +typedef struct { + uffd_global_test_opts_t *gopts; + volatile pid_t *pid; +} mmap_changing_thread_args; + static void sleep_short(void) { usleep(1000); @@ -1337,7 +1339,9 @@ static void thread_state_until(pid_t tid, thread_state state) static void *uffd_mmap_changing_thread(void *opaque) { - volatile pid_t *pid = opaque; + mmap_changing_thread_args *args = opaque; + uffd_global_test_opts_t *gopts = args->gopts; + volatile pid_t *pid = args->pid; int ret; /* Unfortunately, it's only fetch-able from the thread itself.. */ @@ -1345,21 +1349,22 @@ static void *uffd_mmap_changing_thread(void *opaque) *pid = syscall(SYS_gettid); /* Inject an event, this will hang solid until the event read */ - ret = madvise(area_dst, page_size, MADV_REMOVE); + ret = madvise(gopts->area_dst, gopts->page_size, MADV_REMOVE); if (ret) err("madvise(MADV_REMOVE) failed"); return NULL; } -static void uffd_consume_message(void) +static void uffd_consume_message(uffd_global_test_opts_t *gopts) { struct uffd_msg msg = { 0 }; - while (uffd_read_msg(&msg)); + while (uffd_read_msg(gopts, &msg)); } -static void uffd_mmap_changing_test(uffd_test_args_t __unused *targs) +static void uffd_mmap_changing_test(uffd_global_test_opts_t *gopts, + uffd_test_args_t __unused *targs) { /* * This stores the real PID (which can be different from how tid is @@ -1368,13 +1373,14 @@ static void uffd_mmap_changing_test(uffd_test_args_t __unused *targs) pid_t pid = 0; pthread_t tid; int ret; + mmap_changing_thread_args args = { gopts, &pid }; - if (uffd_register(uffd, area_dst, nr_pages * page_size, + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, true, false, false)) err("uffd_register() failed"); /* Create a thread to generate the racy event */ - ret = pthread_create(&tid, NULL, uffd_mmap_changing_thread, &pid); + ret = pthread_create(&tid, NULL, uffd_mmap_changing_thread, &args); if (ret) err("pthread_create() failed"); @@ -1388,26 +1394,26 @@ static void uffd_mmap_changing_test(uffd_test_args_t __unused *targs) /* Wait until the thread hangs at REMOVE event */ thread_state_until(pid, THR_STATE_UNINTERRUPTIBLE); - if (!uffdio_mmap_changing_test_copy(uffd)) + if (!uffdio_mmap_changing_test_copy(gopts->uffd)) return; - if (!uffdio_mmap_changing_test_zeropage(uffd)) + if (!uffdio_mmap_changing_test_zeropage(gopts->uffd)) return; - if (!uffdio_mmap_changing_test_move(uffd)) + if (!uffdio_mmap_changing_test_move(gopts->uffd)) return; - if (!uffdio_mmap_changing_test_poison(uffd)) + if (!uffdio_mmap_changing_test_poison(gopts->uffd)) return; - if (!uffdio_mmap_changing_test_continue(uffd)) + if (!uffdio_mmap_changing_test_continue(gopts->uffd)) return; /* * All succeeded above! Recycle everything. Start by reading the * event so as to kick the thread roll again.. */ - uffd_consume_message(); + uffd_consume_message(gopts); ret = pthread_join(tid, NULL); assert(ret == 0); @@ -1415,10 +1421,10 @@ static void uffd_mmap_changing_test(uffd_test_args_t __unused *targs) uffd_test_pass(); } -static int prevent_hugepages(const char **errmsg) +static int prevent_hugepages(uffd_global_test_opts_t *gopts, const char **errmsg) { /* This should be done before source area is populated */ - if (madvise(area_src, nr_pages * page_size, MADV_NOHUGEPAGE)) { + if (madvise(gopts->area_src, gopts->nr_pages * gopts->page_size, MADV_NOHUGEPAGE)) { /* Ignore only if CONFIG_TRANSPARENT_HUGEPAGE=n */ if (errno != EINVAL) { if (errmsg) @@ -1429,10 +1435,10 @@ static int prevent_hugepages(const char **errmsg) return 0; } -static int request_hugepages(const char **errmsg) +static int request_hugepages(uffd_global_test_opts_t *gopts, const char **errmsg) { /* This should be done before source area is populated */ - if (madvise(area_src, nr_pages * page_size, MADV_HUGEPAGE)) { + if (madvise(gopts->area_src, gopts->nr_pages * gopts->page_size, MADV_HUGEPAGE)) { if (errmsg) { *errmsg = (errno == EINVAL) ? "CONFIG_TRANSPARENT_HUGEPAGE is not set" : @@ -1456,13 +1462,17 @@ struct uffd_test_case_ops uffd_move_test_pmd_case_ops = { * Note that _UFFDIO_ZEROPAGE is tested separately in the zeropage test. */ static void -do_register_ioctls_test(uffd_test_args_t *args, bool miss, bool wp, bool minor) +do_register_ioctls_test(uffd_global_test_opts_t *gopts, + uffd_test_args_t *args, + bool miss, + bool wp, + bool minor) { uint64_t ioctls = 0, expected = BIT_ULL(_UFFDIO_WAKE); mem_type_t *mem_type = args->mem_type; int ret; - ret = uffd_register_with_ioctls(uffd, area_dst, page_size, + ret = uffd_register_with_ioctls(gopts->uffd, gopts->area_dst, gopts->page_size, miss, wp, minor, &ioctls); /* @@ -1493,18 +1503,18 @@ do_register_ioctls_test(uffd_test_args_t *args, bool miss, bool wp, bool minor) "(miss=%d, wp=%d, minor=%d): expected=0x%"PRIx64", " "returned=0x%"PRIx64, miss, wp, minor, expected, ioctls); - if (uffd_unregister(uffd, area_dst, page_size)) + if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size)) err("unregister"); } -static void uffd_register_ioctls_test(uffd_test_args_t *args) +static void uffd_register_ioctls_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { int miss, wp, minor; for (miss = 0; miss <= 1; miss++) for (wp = 0; wp <= 1; wp++) for (minor = 0; minor <= 1; minor++) - do_register_ioctls_test(args, miss, wp, minor); + do_register_ioctls_test(gopts, args, miss, wp, minor); uffd_test_pass(); } @@ -1742,6 +1752,28 @@ int main(int argc, char *argv[]) } for (j = 0; j < n_mems; j++) { mem_type = &mem_types[j]; + + /* Initialize global test options */ + uffd_global_test_opts_t gopts = { 0 }; + + gopts.map_shared = mem_type->shared; + uffd_test_ops = mem_type->mem_ops; + uffd_test_case_ops = test->test_case_ops; + + if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB)) + gopts.page_size = default_huge_page_size(); + else + gopts.page_size = psize(); + + /* Ensure we have at least 2 pages */ + gopts.nr_pages = MAX(UFFD_TEST_MEM_SIZE, gopts.page_size * 2) + / gopts.page_size; + + gopts.nr_parallel = 1; + + /* Initialize test arguments */ + args.mem_type = mem_type; + if (!(test->mem_targets & mem_type->mem_flag)) continue; @@ -1756,13 +1788,12 @@ int main(int argc, char *argv[]) uffd_test_skip("feature missing"); continue; } - if (uffd_setup_environment(&args, test, mem_type, - &errmsg)) { + if (uffd_test_ctx_init(&gopts, test->uffd_feature_required, &errmsg)) { uffd_test_skip(errmsg); continue; } - test->uffd_fn(&args); - uffd_test_ctx_clear(); + test->uffd_fn(&gopts, &args); + uffd_test_ctx_clear(&gopts); } } diff --git a/tools/testing/selftests/mm/uffd-wp-mremap.c b/tools/testing/selftests/mm/uffd-wp-mremap.c index b2b6116e6580..ec860625b25b 100644 --- a/tools/testing/selftests/mm/uffd-wp-mremap.c +++ b/tools/testing/selftests/mm/uffd-wp-mremap.c @@ -152,7 +152,11 @@ static bool range_is_swapped(void *addr, size_t size) return true; } -static void test_one_folio(size_t size, bool private, bool swapout, bool hugetlb) +static void test_one_folio(uffd_global_test_opts_t *gopts, + size_t size, + bool private, + bool swapout, + bool hugetlb) { struct uffdio_writeprotect wp_prms; uint64_t features = 0; @@ -176,21 +180,21 @@ static void test_one_folio(size_t size, bool private, bool swapout, bool hugetlb } /* Register range for uffd-wp. */ - if (userfaultfd_open(&features)) { + if (userfaultfd_open(gopts, &features)) { if (errno == ENOENT) ksft_test_result_skip("userfaultfd not available\n"); else ksft_test_result_fail("userfaultfd_open() failed\n"); goto out; } - if (uffd_register(uffd, mem, size, false, true, false)) { + if (uffd_register(gopts->uffd, mem, size, false, true, false)) { ksft_test_result_fail("uffd_register() failed\n"); goto out; } wp_prms.mode = UFFDIO_WRITEPROTECT_MODE_WP; wp_prms.range.start = (uintptr_t)mem; wp_prms.range.len = size; - if (ioctl(uffd, UFFDIO_WRITEPROTECT, &wp_prms)) { + if (ioctl(gopts->uffd, UFFDIO_WRITEPROTECT, &wp_prms)) { ksft_test_result_fail("ioctl(UFFDIO_WRITEPROTECT) failed\n"); goto out; } @@ -237,9 +241,9 @@ static void test_one_folio(size_t size, bool private, bool swapout, bool hugetlb out: if (mem) munmap(mem, size); - if (uffd >= 0) { - close(uffd); - uffd = -1; + if (gopts->uffd >= 0) { + close(gopts->uffd); + gopts->uffd = -1; } } @@ -331,6 +335,7 @@ static const struct testcase testcases[] = { int main(void) { + uffd_global_test_opts_t gopts = { 0 }; struct thp_settings settings; int i, j, plan = 0; @@ -362,8 +367,8 @@ int main(void) const struct testcase *tc = &testcases[i]; for (j = 0; j < *tc->nr_sizes; j++) - test_one_folio(tc->sizes[j], tc->private, tc->swapout, - tc->hugetlb); + test_one_folio(&gopts, tc->sizes[j], tc->private, + tc->swapout, tc->hugetlb); } /* If THP is supported, restore original THP settings. */ -- 2.30.2

4 months, 1 week

5
7
0 0

[PATCH 9/9] selftests: kselftest: Add ulong typedef for non-glibc compatibility

by Aqib Faruqui

Some C libraries may not define the ulong typedef that is commonly available as a BSD/GNU extension. Add a fallback typedef to ensure ulong is available across all selftest environments. Signed-off-by: Aqib Faruqui <aqibaf(a)amazon.com> --- tools/testing/selftests/kselftest.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index f362c6766..a1088a2af 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -58,6 +58,11 @@ #include <stdio.h> #include <sys/utsname.h> #include <sys/syscall.h> +#include <sys/types.h> +#endif + +#ifndef ulong +typedef unsigned long ulong; #endif #ifndef ARRAY_SIZE -- 2.47.3

4 months, 1 week

1
0
0 0

[PATCH 8/9] selftests: Fix stdbuf compatibility in mixed libc environments

by Aqib Faruqui

The original stdbuf use only checked if /usr/bin/stdbuf exists in the host's system but failed to verify compatibility between stdbuf and the target test binary. The issue occurs when: - Host system has glibc-based stdbuf from coreutils - Selftest binaries are compiled with a non-glibc toolchain (cross compilation) The fix adds a runtime compatibility test against the target test binary before enabling stdbuf, enabling cross-compiled selftests to run successfully. Signed-off-by: Aqib Faruqui <aqibaf(a)amazon.com> --- tools/testing/selftests/kselftest/runner.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh index 2c3c58e65..8d4e33bd5 100644 --- a/tools/testing/selftests/kselftest/runner.sh +++ b/tools/testing/selftests/kselftest/runner.sh @@ -107,7 +107,7 @@ run_one() echo "# Warning: file $TEST is missing!" echo "not ok $test_num $TEST_HDR_MSG" else - if [ -x /usr/bin/stdbuf ]; then + if [ -x /usr/bin/stdbuf ] && [ -x "$TEST" ] && /usr/bin/stdbuf --output=L ldd "$TEST" >/dev/null 2>&1; then stdbuf="/usr/bin/stdbuf --output=L " fi eval kselftest_cmd_args="\$${kselftest_cmd_args_ref:-}" -- 2.47.3

4 months, 1 week

1
0
0 0

[PATCH 7/9] rseq: selftests: Add non-glibc compatibility fixes

by Aqib Faruqui

The rseq selftests rely on features provided by glibc that may not be available in non-glibc C libraries: 1. The __GNU_PREREQ macro and glibc's thread pointer implementation are not available in non-glibc libraries 2. The __NR_rseq syscall number may not be defined in non-glibc headers Add a fallback thread pointer implementation for non-glibc systems using the pre-existing inline assembly to access thread-local storage directly via %fs/%gs registers. Also provide a fallback definition for __NR_rseq when not already defined by the C library headers: 527 for alpha and 293 for other architectures. Signed-off-by: Aqib Faruqui <aqibaf(a)amazon.com> --- .../selftests/rseq/rseq-x86-thread-pointer.h | 14 ++++++++++++++ tools/testing/selftests/rseq/rseq.c | 8 ++++++++ 2 files changed, 22 insertions(+) diff --git a/tools/testing/selftests/rseq/rseq-x86-thread-pointer.h b/tools/testing/selftests/rseq/rseq-x86-thread-pointer.h index d3133587d..a7c402926 100644 --- a/tools/testing/selftests/rseq/rseq-x86-thread-pointer.h +++ b/tools/testing/selftests/rseq/rseq-x86-thread-pointer.h @@ -14,6 +14,7 @@ extern "C" { #endif +#ifdef __GLIBC__ #if __GNUC_PREREQ (11, 1) static inline void *rseq_thread_pointer(void) { @@ -32,6 +33,19 @@ static inline void *rseq_thread_pointer(void) return __result; } #endif /* !GCC 11 */ +#else +static inline void *rseq_thread_pointer(void) +{ + void *__result; + +# ifdef __x86_64__ + __asm__ ("mov %%fs:0, %0" : "=r" (__result)); +# else + __asm__ ("mov %%gs:0, %0" : "=r" (__result)); +# endif + return __result; +} +#endif /* !__GLIBC__ */ #ifdef __cplusplus } diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index 663a9cef1..1a6f73c98 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -36,6 +36,14 @@ #include "../kselftest.h" #include "rseq.h" +#ifndef __NR_rseq +#ifdef __alpha__ +#define __NR_rseq 527 +#else +#define __NR_rseq 293 +#endif +#endif + /* * Define weak versions to play nice with binaries that are statically linked * against a libc that doesn't support registering its own rseq. -- 2.47.3

4 months, 1 week

1
0
0 0

[PATCH 6/9] KVM: selftests: Add backtrace fallback

by Aqib Faruqui

The backtrace() function is a GNU extension available in glibc but may not be present in non-glibc libraries. KVM selftests use backtrace() for error reporting and debugging. Add conditional inclusion of execinfo.h only for glibc builds and provide a weak stub implementation of backtrace() that returns 0 (stack trace empty) for non-glibc systems. Signed-off-by: Aqib Faruqui <aqibaf(a)amazon.com> --- tools/testing/selftests/kvm/lib/assert.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c index b49690658..c9778dc6c 100644 --- a/tools/testing/selftests/kvm/lib/assert.c +++ b/tools/testing/selftests/kvm/lib/assert.c @@ -6,11 +6,19 @@ */ #include "test_util.h" -#include <execinfo.h> #include <sys/syscall.h> +#ifdef __GLIBC__ +#include <execinfo.h> /* backtrace */ +#endif + #include "kselftest.h" +int __attribute__((weak)) backtrace(void **buffer, int size) +{ + return 0; +} + /* Dumps the current stack trace to stderr. */ static void __attribute__((noinline)) test_dump_stack(void); static void test_dump_stack(void) -- 2.47.3

4 months, 1 week

1
0
0 0

[PATCH 4/9] selftests: kselftest: Add memfd_create syscall compatibility

by Aqib Faruqui

The memfd_create function and related MFD_* flags may not be available in non-glibc C libraries. Some selftests use memfd_create for memory backing operations. Add fallback definitions for MFD_CLOEXEC and MFD_HUGETLB flags, and provide a memfd_create wrapper. Signed-off-by: Aqib Faruqui <aqibaf(a)amazon.com> --- tools/testing/selftests/kselftest.h | 19 +++++++++++++++++++ tools/testing/selftests/kvm/lib/kvm_util.c | 1 + 2 files changed, 20 insertions(+) diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index c3b6d2604..f362c6766 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -57,6 +57,7 @@ #include <string.h> #include <stdio.h> #include <sys/utsname.h> +#include <sys/syscall.h> #endif #ifndef ARRAY_SIZE @@ -80,6 +81,24 @@ #endif #endif /* end arch */ +#ifndef MFD_CLOEXEC +#define MFD_CLOEXEC 0x0001U +#endif + +#ifndef MFD_HUGETLB +#define MFD_HUGETLB 0x0004U +#endif + +static inline int memfd_create(const char *name, unsigned int flags) +{ +#ifdef __NR_memfd_create + return syscall(__NR_memfd_create, name, flags); +#else + errno = ENOSYS; + return -1; +#endif +} + /* define kselftest exit codes */ #define KSFT_PASS 0 #define KSFT_FAIL 1 diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 5ce80303d..cb5209f6a 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -15,6 +15,7 @@ #include <sys/resource.h> #include <sys/types.h> #include <sys/stat.h> +#include <sys/syscall.h> #include <unistd.h> #include <linux/kernel.h> -- 2.47.3

4 months, 1 week

1
0
0 0

2026

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-kselftest-mirror