March 2024 - Linux-kselftest-mirror

[PATCH] selftests/mm: Confirm VA exhaustion without reliance on correctness of mmap()

by Dev Jain

Currently, VA exhaustion is being checked by passing a hint to mmap() and expecting it to fail. This patch makes a stricter test by successful write() calls from /proc/self/maps to a dump file, confirming that a free chunk is indeed not available. Signed-off-by: Dev Jain <dev.jain(a)arm.com> --- Merge dependency: https://lore.kernel.org/all/20240314122250.68534-1-dev.jain@arm.com/ .../selftests/mm/virtual_address_range.c | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/tools/testing/selftests/mm/virtual_address_range.c b/tools/testing/selftests/mm/virtual_address_range.c index 7bcf8d48256a..31063613dfd9 100644 --- a/tools/testing/selftests/mm/virtual_address_range.c +++ b/tools/testing/selftests/mm/virtual_address_range.c @@ -12,6 +12,8 @@ #include <errno.h> #include <sys/mman.h> #include <sys/time.h> +#include <fcntl.h> + #include "../kselftest.h" /* @@ -93,6 +95,69 @@ static int validate_lower_address_hint(void) return 1; } +static int validate_complete_va_space(void) +{ + unsigned long start_addr, end_addr, prev_end_addr; + char line[400]; + char prot[6]; + FILE *file; + int fd; + + fd = open("va_dump", O_CREAT | O_WRONLY, 0600); + unlink("va_dump"); + if (fd < 0) { + ksft_test_result_skip("cannot create or open dump file\n"); + ksft_finished(); + } + + file = fopen("/proc/self/maps", "r"); + if (file == NULL) + ksft_exit_fail_msg("cannot open /proc/self/maps\n"); + + prev_end_addr = 0; + while (fgets(line, sizeof(line), file)) { + unsigned long hop; + int ret; + + ret = sscanf(line, "%lx-%lx %s[rwxp-]", + &start_addr, &end_addr, prot); + if (ret != 3) + ksft_exit_fail_msg("sscanf failed, cannot parse\n"); + + /* end of userspace mappings; ignore vsyscall mapping */ + if (start_addr & (1UL << 63)) + return 0; + + /* /proc/self/maps must have gaps less than 1GB only */ + if (start_addr - prev_end_addr >= SZ_1GB) + return 1; + + prev_end_addr = end_addr; + + if (prot[0] != 'r') + continue; + + /* + * Confirm whether MAP_CHUNK_SIZE chunk can be found or not. + * If write succeeds, no need to check MAP_CHUNK_SIZE - 1 + * addresses after that. If the address was not held by this + * process, write would fail with errno set to EFAULT. + * Anyways, if write returns anything apart from 1, exit the + * program since that would mean a bug in /proc/self/maps. + */ + hop = 0; + while (start_addr + hop < end_addr) { + if (write(fd, (void *)(start_addr + hop), 1) != 1) + return 1; + else + lseek(fd, 0, SEEK_SET); + + hop += MAP_CHUNK_SIZE; + } + } + return 0; +} + int main(int argc, char *argv[]) { char *ptr[NR_CHUNKS_LOW]; @@ -135,6 +200,10 @@ int main(int argc, char *argv[]) validate_addr(hptr[i], 1); } hchunks = i; + if (validate_complete_va_space()) { + ksft_test_result_fail("BUG in mmap() or /proc/self/maps\n"); + ksft_finished(); + } for (i = 0; i < lchunks; i++) munmap(ptr[i], MAP_CHUNK_SIZE); -- 2.34.1

1 year, 3 months

2
3
0 0

[PATCH v1 0/5] Add support for the Idle HLT intercept feature

by Manali Shukla

The upcoming new Idle HLT Intercept feature allows for the HLT instruction execution by a vCPU to be intercepted by the hypervisor only if there are no pending V_INTR and V_NMI events for the vCPU. When the vCPU is expected to service the pending V_INTR and V_NMI events, the Idle HLT intercept won’t trigger. The feature allows the hypervisor to determine if the vCPU is actually idle and reduces wasteful VMEXITs. Presence of the Idle HLT Intercept feature is indicated via CPUID function Fn8000_000A_EDX[30]. Document for the Idle HLT intercept feature will be available in the next version of "AMD64 Architecture Programmer’s Manual". Testing Done: Added a selftest to test the Idle HLT intercept functionality. Tested SEV and SEV-ES guest for the Idle HLT intercept functionality. Manali Shukla (5): x86/cpufeatures: Add CPUID feature bit for Idle HLT intercept KVM: SVM: Add Idle HLT intercept support tools: Add KVM exit reason for the Idle HLT selftests: Add an interface to read the data of named vcpu stat selftests: KVM: SVM: Add Idle HLT intercept test arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/svm.h | 1 + arch/x86/include/uapi/asm/svm.h | 2 + arch/x86/kvm/svm/svm.c | 11 +- tools/arch/x86/include/uapi/asm/svm.h | 2 + tools/testing/selftests/kvm/Makefile | 1 + .../selftests/kvm/include/kvm_util_base.h | 11 ++ tools/testing/selftests/kvm/lib/kvm_util.c | 41 ++++++ .../selftests/kvm/x86_64/svm_idlehlt_test.c | 119 ++++++++++++++++++ 9 files changed, 186 insertions(+), 3 deletions(-) create mode 100644 tools/testing/selftests/kvm/x86_64/svm_idlehlt_test.c base-commit: fdd58834d132046149699b88a27a0db26829f4fb -- 2.34.1

1 year, 3 months

2
12
0 0

[PATCH] selftests/mm: Parse VMA range in one go

by Dev Jain

Use sscanf() to directly parse the VMA range. No functional change is intended. Signed-off-by: Dev Jain <dev.jain(a)arm.com> --- tools/testing/selftests/mm/mlock2-tests.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/tools/testing/selftests/mm/mlock2-tests.c b/tools/testing/selftests/mm/mlock2-tests.c index 26f744188ad0..7f0d50fa361d 100644 --- a/tools/testing/selftests/mm/mlock2-tests.c +++ b/tools/testing/selftests/mm/mlock2-tests.c @@ -20,8 +20,6 @@ static int get_vm_area(unsigned long addr, struct vm_boundaries *area) FILE *file; int ret = 1; char line[1024] = {0}; - char *end_addr; - char *stop; unsigned long start; unsigned long end; @@ -37,21 +35,10 @@ static int get_vm_area(unsigned long addr, struct vm_boundaries *area) memset(area, 0, sizeof(struct vm_boundaries)); while(fgets(line, 1024, file)) { - end_addr = strchr(line, '-'); - if (!end_addr) { + if (sscanf(line, "%lx-%lx", &start, &end) != 2) { ksft_print_msg("cannot parse /proc/self/maps\n"); goto out; } - *end_addr = '\0'; - end_addr++; - stop = strchr(end_addr, ' '); - if (!stop) { - ksft_print_msg("cannot parse /proc/self/maps\n"); - goto out; - } - - sscanf(line, "%lx", &start); - sscanf(end_addr, "%lx", &end); if (start <= addr && end > addr) { area->start = start; -- 2.34.1

1 year, 3 months

1
0
0 0

[PATCH v1] selftests/mm: sigbus-wp test requires UFFD_FEATURE_WP_HUGETLBFS_SHMEM

by Edward Liaw

The sigbus-wp test requires the UFFD_FEATURE_WP_HUGETLBFS_SHMEM flag for shmem and hugetlb targets. Otherwise it is not backwards compatible with kernels <5.19 and fails with EINVAL. Signed-off-by: Edward Liaw <edliaw(a)google.com> --- tools/testing/selftests/mm/uffd-unit-tests.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index 4a48dc617c6b..21ec23206ab4 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -1437,7 +1437,8 @@ uffd_test_case_t uffd_tests[] = { .uffd_fn = uffd_sigbus_wp_test, .mem_targets = MEM_ALL, .uffd_feature_required = UFFD_FEATURE_SIGBUS | - UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_PAGEFAULT_FLAG_WP, + UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_PAGEFAULT_FLAG_WP | + UFFD_FEATURE_WP_HUGETLBFS_SHMEM, }, { .name = "events", -- 2.44.0.396.g6e790dbe36-goog

1 year, 3 months

2
1
0 0

[PATCH v2] uffd-unit-tests: Fix ARM related issue with fork after pthread_create

by Edward Liaw

Following issue was observed while running the uffd-unit-tests selftest on ARM devices. On x86_64 no issues were detected: pthread_create followed by fork caused deadlock in certain cases wherein fork required some work to be completed by the created thread. Used synchronization to ensure that created thread's start function has started before invoking fork. Signed-off-by: Lokesh Gidra <lokeshgidra(a)google.com> [edliaw: Refactored to use atomic_bool] Signed-off-by: Edward Liaw <edliaw(a)google.com> --- tools/testing/selftests/mm/uffd-common.c | 3 +++ tools/testing/selftests/mm/uffd-common.h | 2 ++ tools/testing/selftests/mm/uffd-unit-tests.c | 10 ++++++++++ 3 files changed, 15 insertions(+) diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c index b0ac0ec2356d..7ad6ba660c7d 100644 --- a/tools/testing/selftests/mm/uffd-common.c +++ b/tools/testing/selftests/mm/uffd-common.c @@ -18,6 +18,7 @@ bool test_uffdio_wp = true; unsigned long long *count_verify; uffd_test_ops_t *uffd_test_ops; uffd_test_case_ops_t *uffd_test_case_ops; +atomic_bool ready_for_fork; static int uffd_mem_fd_create(off_t mem_size, bool hugetlb) { @@ -518,6 +519,8 @@ void *uffd_poll_thread(void *arg) pollfd[1].fd = pipefd[cpu*2]; pollfd[1].events = POLLIN; + ready_for_fork = true; + for (;;) { ret = poll(pollfd, 2, -1); if (ret <= 0) { diff --git a/tools/testing/selftests/mm/uffd-common.h b/tools/testing/selftests/mm/uffd-common.h index cb055282c89c..cc5629c3d2aa 100644 --- a/tools/testing/selftests/mm/uffd-common.h +++ b/tools/testing/selftests/mm/uffd-common.h @@ -32,6 +32,7 @@ #include <inttypes.h> #include <stdint.h> #include <sys/random.h> +#include <stdatomic.h> #include "../kselftest.h" #include "vm_util.h" @@ -103,6 +104,7 @@ extern bool map_shared; extern bool test_uffdio_wp; extern unsigned long long *count_verify; extern volatile bool test_uffdio_copy_eexist; +extern atomic_bool ready_for_fork; extern uffd_test_ops_t anon_uffd_test_ops; extern uffd_test_ops_t shmem_uffd_test_ops; diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index 2b9f8cc52639..4a48dc617c6b 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -775,6 +775,8 @@ static void uffd_sigbus_test_common(bool wp) char c; struct uffd_args args = { 0 }; + ready_for_fork = false; + fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); if (uffd_register(uffd, area_dst, nr_pages * page_size, @@ -790,6 +792,9 @@ static void uffd_sigbus_test_common(bool wp) if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) err("uffd_poll_thread create"); + while (!ready_for_fork) + ; /* Wait for the poll_thread to start executing before forking */ + pid = fork(); if (pid < 0) err("fork"); @@ -829,6 +834,8 @@ static void uffd_events_test_common(bool wp) char c; struct uffd_args args = { 0 }; + ready_for_fork = false; + fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); if (uffd_register(uffd, area_dst, nr_pages * page_size, true, wp, false)) @@ -838,6 +845,9 @@ static void uffd_events_test_common(bool wp) if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) err("uffd_poll_thread create"); + while (!ready_for_fork) + ; /* Wait for the poll_thread to start executing before forking */ + pid = fork(); if (pid < 0) err("fork"); -- 2.44.0.396.g6e790dbe36-goog

1 year, 3 months

1
0
0 0

[BUG] selftests/net: test_vxlan_mdb.sh: 84 out of 642 tests [FAIL]

by Mirsad Todorovac

Hi, While running kselftest on vanilla torvalds tree kernel commit v6.8-11167-g4438a810f396, the test suite reported a number of errors. I was using the latest iproute2-next suite on an Ubuntu 22.04 LTS box. # Tests passed: 558 # Tests failed: 84 not ok 90 selftests: net: test_vxlan_mdb.sh # exit=1 495:# TEST: Destination IP - match [FAIL] 496:# TEST: Destination IP - no match [FAIL] 497:# TEST: Default destination port - match [FAIL] 498:# TEST: Default destination port - no match [FAIL] 499:# TEST: Non-default destination port - match [FAIL] 500:# TEST: Non-default destination port - no match [FAIL] 501:# TEST: Default destination VNI - match [FAIL] 502:# TEST: Default destination VNI - no match [FAIL] 503:# TEST: Non-default destination VNI - match [FAIL] 504:# TEST: Non-default destination VNI - no match [FAIL] 521:# TEST: Destination IP - match [FAIL] 522:# TEST: Destination IP - no match [FAIL] 523:# TEST: Default destination port - match [FAIL] 524:# TEST: Default destination port - no match [FAIL] 525:# TEST: Non-default destination port - match [FAIL] 526:# TEST: Non-default destination port - no match [FAIL] 527:# TEST: Default destination VNI - match [FAIL] 528:# TEST: Default destination VNI - no match [FAIL] 529:# TEST: Non-default destination VNI - match [FAIL] 530:# TEST: Non-default destination VNI - no match [FAIL] 549:# TEST: Forward valid source - first VTEP [FAIL] 550:# TEST: Forward valid source - second VTEP [FAIL] 551:# TEST: Block excluded source after removal - first VTEP [FAIL] 552:# TEST: Block excluded source after removal - second VTEP [FAIL] 553:# TEST: Forward valid source after removal - first VTEP [FAIL] 554:# TEST: Forward valid source after removal - second VTEP [FAIL] 571:# TEST: Forward valid source - first VTEP [FAIL] 572:# TEST: Forward valid source - second VTEP [FAIL] 573:# TEST: Block excluded source after removal - first VTEP [FAIL] 574:# TEST: Block excluded source after removal - second VTEP [FAIL] 575:# TEST: Forward valid source after removal - first VTEP [FAIL] 576:# TEST: Forward valid source after removal - second VTEP [FAIL] 593:# TEST: Forward valid source - first VTEP [FAIL] 594:# TEST: Forward valid source - second VTEP [FAIL] 595:# TEST: Block excluded source after removal - first VTEP [FAIL] 596:# TEST: Block excluded source after removal - second VTEP [FAIL] 597:# TEST: Forward valid source after removal - first VTEP [FAIL] 598:# TEST: Forward valid source after removal - second VTEP [FAIL] 615:# TEST: Forward valid source - first VTEP [FAIL] 616:# TEST: Forward valid source - second VTEP [FAIL] 617:# TEST: Block excluded source after removal - first VTEP [FAIL] 618:# TEST: Block excluded source after removal - second VTEP [FAIL] 619:# TEST: Forward valid source after removal - first VTEP [FAIL] 620:# TEST: Forward valid source after removal - second VTEP [FAIL] 636:# TEST: Forward valid source [FAIL] 637:# TEST: Receive of valid source after removal from group [FAIL] 648:# TEST: Forward valid source [FAIL] 649:# TEST: Receive of valid source after removal from group [FAIL] 660:# TEST: Forward valid source [FAIL] 661:# TEST: Receive of valid source after removal from group [FAIL] 672:# TEST: Forward valid source [FAIL] 673:# TEST: Receive of valid source after removal from group [FAIL] 683:# TEST: Egress VNI translation - PVID configured [FAIL] 684:# TEST: Egress VNI translation - no PVID configured [FAIL] 685:# TEST: Egress VNI translation - PVID reconfigured [FAIL] 695:# TEST: Egress VNI translation - PVID configured [FAIL] 696:# TEST: Egress VNI translation - no PVID configured [FAIL] 697:# TEST: Egress VNI translation - PVID reconfigured [FAIL] 707:# TEST: Registered IPv4 multicast - first VTEP [FAIL] 709:# TEST: Unregistered IPv4 multicast - first VTEP [FAIL] 710:# TEST: Unregistered IPv4 multicast - second VTEP [FAIL] 711:# TEST: Link-local IPv4 multicast - first VTEP [FAIL] 712:# TEST: Link-local IPv4 multicast - second VTEP [FAIL] 713:# TEST: Registered IPv4 multicast with a unicast MAC - first VTEP [FAIL] 714:# TEST: Registered IPv4 multicast with a unicast MAC - second VTEP [FAIL] 715:# TEST: Registered IPv4 multicast with a broadcast MAC - first VTEP [FAIL] 716:# TEST: Registered IPv4 multicast with a broadcast MAC - second VTEP [FAIL] 734:# TEST: Registered IPv4 multicast - first VTEP [FAIL] 736:# TEST: Unregistered IPv4 multicast - first VTEP [FAIL] 737:# TEST: Unregistered IPv4 multicast - second VTEP [FAIL] 738:# TEST: Link-local IPv4 multicast - first VTEP [FAIL] 739:# TEST: Link-local IPv4 multicast - second VTEP [FAIL] 740:# TEST: Registered IPv4 multicast with a unicast MAC - first VTEP [FAIL] 741:# TEST: Registered IPv4 multicast with a unicast MAC - second VTEP [FAIL] 742:# TEST: Registered IPv4 multicast with a broadcast MAC - first VTEP [FAIL] 743:# TEST: Registered IPv4 multicast with a broadcast MAC - second VTEP [FAIL] 761:# TEST: IP multicast - first VTEP [FAIL] 763:# TEST: Broadcast - first VTEP [FAIL] 765:# TEST: IP multicast after removal - first VTEP [FAIL] 766:# TEST: IP multicast after removal - second VTEP [FAIL] 779:# TEST: IP multicast - first VTEP [FAIL] 781:# TEST: Broadcast - first VTEP [FAIL] 783:# TEST: IP multicast after removal - first VTEP [FAIL] 784:# TEST: IP multicast after removal - second VTEP [FAIL] The problem is present at least since 6.8-rc7. Please find attached the config and the full output of test_vxlan_mdb.sh. Hope this helps. Best regards, Mirsad Todorovac

1 year, 3 months

3
7
0 0

[PATCH v1] uffd-unit-tests: Fix ARM related issue with fork after pthread_create

by Edward Liaw

Following issue was observed while running the uffd-unit-tests selftest on ARM devices. On x86_64 no issues were detected: pthread_create followed by fork caused deadlock in certain cases wherein fork required some work to be completed by the created thread. Used synchronization to ensure that created thread's start function has started before invoking fork. Signed-off-by: Lokesh Gidra <lokeshgidra(a)google.com> [edliaw: Refactored to use atomic_bool] Signed-off-by: Edward Liaw <edliaw(a)google.com> --- tools/testing/selftests/mm/uffd-common.c | 4 +++- tools/testing/selftests/mm/uffd-common.h | 2 ++ tools/testing/selftests/mm/uffd-unit-tests.c | 10 ++++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c index b0ac0ec2356d..14ed98c3a389 100644 --- a/tools/testing/selftests/mm/uffd-common.c +++ b/tools/testing/selftests/mm/uffd-common.c @@ -17,7 +17,7 @@ bool map_shared; bool test_uffdio_wp = true; unsigned long long *count_verify; uffd_test_ops_t *uffd_test_ops; -uffd_test_case_ops_t *uffd_test_case_ops; +atomic_bool ready_for_fork; static int uffd_mem_fd_create(off_t mem_size, bool hugetlb) { @@ -518,6 +518,8 @@ void *uffd_poll_thread(void *arg) pollfd[1].fd = pipefd[cpu*2]; pollfd[1].events = POLLIN; + ready_for_fork = true; + for (;;) { ret = poll(pollfd, 2, -1); if (ret <= 0) { diff --git a/tools/testing/selftests/mm/uffd-common.h b/tools/testing/selftests/mm/uffd-common.h index cb055282c89c..cc5629c3d2aa 100644 --- a/tools/testing/selftests/mm/uffd-common.h +++ b/tools/testing/selftests/mm/uffd-common.h @@ -32,6 +32,7 @@ #include <inttypes.h> #include <stdint.h> #include <sys/random.h> +#include <stdatomic.h> #include "../kselftest.h" #include "vm_util.h" @@ -103,6 +104,7 @@ extern bool map_shared; extern bool test_uffdio_wp; extern unsigned long long *count_verify; extern volatile bool test_uffdio_copy_eexist; +extern atomic_bool ready_for_fork; extern uffd_test_ops_t anon_uffd_test_ops; extern uffd_test_ops_t shmem_uffd_test_ops; diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index 2b9f8cc52639..4a48dc617c6b 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -775,6 +775,8 @@ static void uffd_sigbus_test_common(bool wp) char c; struct uffd_args args = { 0 }; + ready_for_fork = false; + fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); if (uffd_register(uffd, area_dst, nr_pages * page_size, @@ -790,6 +792,9 @@ static void uffd_sigbus_test_common(bool wp) if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) err("uffd_poll_thread create"); + while (!ready_for_fork) + ; /* Wait for the poll_thread to start executing before forking */ + pid = fork(); if (pid < 0) err("fork"); @@ -829,6 +834,8 @@ static void uffd_events_test_common(bool wp) char c; struct uffd_args args = { 0 }; + ready_for_fork = false; + fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); if (uffd_register(uffd, area_dst, nr_pages * page_size, true, wp, false)) @@ -838,6 +845,9 @@ static void uffd_events_test_common(bool wp) if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) err("uffd_poll_thread create"); + while (!ready_for_fork) + ; /* Wait for the poll_thread to start executing before forking */ + pid = fork(); if (pid < 0) err("fork"); -- 2.44.0.396.g6e790dbe36-goog

1 year, 3 months

1
0
0 0

[PATCH bpf-next v4 0/6] sleepable bpf_timer (was: allow HID-BPF to do device IOs)

by Benjamin Tissoires

New version of the sleepable bpf_timer code, without the HID changes, as they can now go through the HID tree independantly. For reference, the use cases I have in mind: --- Basically, I need to be able to defer a HID-BPF program for the following reasons (from the aforementioned patch): 1. defer an event: Sometimes we receive an out of proximity event, but the device can not be trusted enough, and we need to ensure that we won't receive another one in the following n milliseconds. So we need to wait those n milliseconds, and eventually re-inject that event in the stack. 2. inject new events in reaction to one given event: We might want to transform one given event into several. This is the case for macro keys where a single key press is supposed to send a sequence of key presses. But this could also be used to patch a faulty behavior, if a device forgets to send a release event. 3. communicate with the device in reaction to one event: We might want to communicate back to the device after a given event. For example a device might send us an event saying that it came back from sleeping state and needs to be re-initialized. Currently we can achieve that by keeping a userspace program around, raise a bpf event, and let that userspace program inject the events and commands. However, we are just keeping that program alive as a daemon for just scheduling commands. There is no logic in it, so it doesn't really justify an actual userspace wakeup. So a kernel workqueue seems simpler to handle. bpf_timers are currently running in a soft IRQ context, this patch series implements a sleppable context for them. Cheers, Benjamin To: Alexei Starovoitov <ast(a)kernel.org> To: Daniel Borkmann <daniel(a)iogearbox.net> To: Andrii Nakryiko <andrii(a)kernel.org> To: Martin KaFai Lau <martin.lau(a)linux.dev> To: Eduard Zingerman <eddyz87(a)gmail.com> To: Song Liu <song(a)kernel.org> To: Yonghong Song <yonghong.song(a)linux.dev> To: John Fastabend <john.fastabend(a)gmail.com> To: KP Singh <kpsingh(a)kernel.org> To: Stanislav Fomichev <sdf(a)google.com> To: Hao Luo <haoluo(a)google.com> To: Jiri Olsa <jolsa(a)kernel.org> To: Mykola Lysenko <mykolal(a)fb.com> To: Shuah Khan <shuah(a)kernel.org> Cc: Benjamin Tissoires <bentiss(a)kernel.org> Cc: <bpf(a)vger.kernel.org> Cc: <linux-kernel(a)vger.kernel.org> Cc: <linux-kselftest(a)vger.kernel.org> --- Changes in v4: - dropped the HID changes, they can go independently from bpf-core - addressed Alexei's and Eduard's remarks - added selftests - Link to v3: https://lore.kernel.org/r/20240221-hid-bpf-sleepable-v3-0-1fb378ca6301@kern… Changes in v3: - fixed the crash from v2 - changed the API to have only BPF_F_TIMER_SLEEPABLE for bpf_timer_start() - split the new kfuncs/verifier patch into several sub-patches, for easier reviews - Link to v2: https://lore.kernel.org/r/20240214-hid-bpf-sleepable-v2-0-5756b054724d@kern… Changes in v2: - make use of bpf_timer (and dropped the custom HID handling) - implemented bpf_timer_set_sleepable_cb as a kfunc - still not implemented global subprogs - no sleepable bpf_timer selftests yet - Link to v1: https://lore.kernel.org/r/20240209-hid-bpf-sleepable-v1-0-4cc895b5adbd@kern… --- Benjamin Tissoires (6): bpf/helpers: introduce sleepable bpf_timers bpf/verifier: add bpf_timer as a kfunc capable type bpf/helpers: introduce bpf_timer_set_sleepable_cb() kfunc bpf/helpers: mark the callback of bpf_timer_set_sleepable_cb() as sleepable tools: sync include/uapi/linux/bpf.h selftests/bpf: add sleepable timer tests include/linux/bpf_verifier.h | 1 + include/uapi/linux/bpf.h | 4 + kernel/bpf/helpers.c | 132 ++++++++++++++++++++- kernel/bpf/verifier.c | 92 +++++++++++++- tools/include/uapi/linux/bpf.h | 4 + tools/testing/selftests/bpf/bpf_experimental.h | 4 + .../selftests/bpf/bpf_testmod/bpf_testmod.c | 5 + .../selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h | 1 + tools/testing/selftests/bpf/prog_tests/timer.c | 1 + tools/testing/selftests/bpf/progs/timer.c | 40 ++++++- tools/testing/selftests/bpf/progs/timer_failure.c | 114 +++++++++++++++++- 11 files changed, 387 insertions(+), 11 deletions(-) --- base-commit: 9187210eee7d87eea37b45ea93454a88681894a4 change-id: 20240205-hid-bpf-sleepable-c01260fd91c4 Best regards, -- Benjamin Tissoires <bentiss(a)kernel.org>

1 year, 3 months

3
14
0 0

[PATCH 0/8] iommufd support pasid attach/replace

by Yi Liu

PASID (Process Address Space ID) is a PCIe extension to tag the DMA transactions out of a physical device, and most modern IOMMU hardware have supported PASID granular address translation. So a PASID-capable device can be attached to multiple hwpts (a.k.a. domains), each attachment is tagged with a pasid. This series first adds a missing iommu API to replace domain for a pasid, then adds iommufd APIs for device drivers to attach/replace/detach pasid to/from hwpt per userspace's request, and adds selftest to validate the iommufd APIs. pasid attach/replace is mandatory on Intel VT-d given the PASID table locates in the physical address space hence must be managed by the kernel, both for supporting vSVA and coming SIOV. But it's optional on ARM/AMD which allow configuring the PASID/CD table either in host physical address space or nested on top of an GPA address space. This series only add VT-d support as the minimal requirement. Complete code can be found in below link: https://github.com/yiliu1765/iommufd/tree/iommufd_pasid Change log: v1: - Implemnet iommu_replace_device_pasid() to fall back to the original domain if this replacement failed (Kevin) - Add check in do_attach() to check corressponding attach_fn per the pasid value. rfc: https://lore.kernel.org/linux-iommu/20230926092651.17041-1-yi.l.liu@intel.c… Regards, Yi Liu Kevin Tian (1): iommufd: Support attach/replace hwpt per pasid Lu Baolu (2): iommu: Introduce a replace API for device pasid iommu/vt-d: Add set_dev_pasid callback for nested domain Yi Liu (5): iommufd: replace attach_fn with a structure iommufd/selftest: Add set_dev_pasid and remove_dev_pasid in mock iommu iommufd/selftest: Add a helper to get test device iommufd/selftest: Add test ops to test pasid attach/detach iommufd/selftest: Add coverage for iommufd pasid attach/detach drivers/iommu/intel/nested.c | 47 +++++ drivers/iommu/iommu-priv.h | 2 + drivers/iommu/iommu.c | 82 ++++++-- drivers/iommu/iommufd/Makefile | 1 + drivers/iommu/iommufd/device.c | 50 +++-- drivers/iommu/iommufd/iommufd_private.h | 23 +++ drivers/iommu/iommufd/iommufd_test.h | 24 +++ drivers/iommu/iommufd/pasid.c | 138 ++++++++++++++ drivers/iommu/iommufd/selftest.c | 176 ++++++++++++++++-- include/linux/iommufd.h | 6 + tools/testing/selftests/iommu/iommufd.c | 172 +++++++++++++++++ .../selftests/iommu/iommufd_fail_nth.c | 28 ++- tools/testing/selftests/iommu/iommufd_utils.h | 78 ++++++++ 13 files changed, 785 insertions(+), 42 deletions(-) create mode 100644 drivers/iommu/iommufd/pasid.c -- 2.34.1

1 year, 3 months

5
35
0 0

[PATCH V3 bpf-next 1/2] bpf: add bpf_task_get_cgroup kfunc

by Jose Fernandez

This patch enhances the BPF helpers by adding a kfunc to retrieve the cgroup v2 of a task, addressing a previous limitation where only bpf_task_get_cgroup1 was available for cgroup v1. The new kfunc is particularly useful for scenarios where obtaining the cgroup ID of a task other than the "current" one is necessary, which the existing bpf_get_current_cgroup_id helper cannot accommodate. A specific use case at Netflix involved the sched_switch tracepoint, where we had to get the cgroup IDs of both the prev and next tasks. The bpf_task_get_cgroup kfunc acquires and returns a reference to a task's default cgroup, ensuring thread-safe access by correctly implementing RCU read locking and unlocking. It leverages the existing cgroup.h helper, and cgroup_tryget to safely acquire a reference to it. Signed-off-by: Jose Fernandez <josef(a)netflix.com> Reviewed-by: Tycho Andersen <tycho(a)tycho.pizza> Acked-by: Yonghong Song <yonghong.song(a)linux.dev> Acked-by: Stanislav Fomichev <sdf(a)google.com> --- V2 -> V3: No changes V1 -> V2: Return a pointer to the cgroup instead of the cgroup ID kernel/bpf/helpers.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index a89587859571..bbd19d5eedb6 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2266,6 +2266,31 @@ bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) return NULL; return cgrp; } + +/** + * bpf_task_get_cgroup - Acquire a reference to the default cgroup of a task. + * @task: The target task + * + * This function returns the task's default cgroup, primarily + * designed for use with cgroup v2. In cgroup v1, the concept of default + * cgroup varies by subsystem, and while this function will work with + * cgroup v1, it's recommended to use bpf_task_get_cgroup1 instead. + * A cgroup returned by this kfunc which is not subsequently stored in a + * map, must be released by calling bpf_cgroup_release(). + * + * Return: On success, the cgroup is returned. On failure, NULL is returned. + */ +__bpf_kfunc struct cgroup *bpf_task_get_cgroup(struct task_struct *task) +{ + struct cgroup *cgrp; + + rcu_read_lock(); + cgrp = task_dfl_cgroup(task); + if (!cgroup_tryget(cgrp)) + cgrp = NULL; + rcu_read_unlock(); + return cgrp; +} #endif /* CONFIG_CGROUPS */ /** @@ -2573,6 +2598,7 @@ BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_RCU | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_under_cgroup, KF_RCU) BTF_ID_FLAGS(func, bpf_task_get_cgroup1, KF_ACQUIRE | KF_RCU | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_task_get_cgroup, KF_ACQUIRE | KF_RCU | KF_RET_NULL) #endif BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_throw) base-commit: c733239f8f530872a1f80d8c45dcafbaff368737 -- 2.40.1

1 year, 3 months

4
5
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-kselftest-mirror March 2024