- Linux-kselftest-mirror - lists.linaro.org

[PATCH v4 1/2] selftests/sgx: Rename 'eenter' and 'sgx_call_vdso'

by Jarkko Sakkinen

Rename symbols for better clarity: * 'eenter' -> 'vdso_sgx_enter_enclave' * 'sgx_call_vdso' -> 'sgx_enter_enclave' Signed-off-by: Jarkko Sakkinen <jarkko(a)kernel.org> --- v2: Refined thh renames just a bit. tools/testing/selftests/sgx/call.S | 6 +++--- tools/testing/selftests/sgx/main.c | 25 +++++++++++++------------ tools/testing/selftests/sgx/main.h | 4 ++-- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/sgx/call.S b/tools/testing/selftests/sgx/call.S index 4ecadc7490f4..b09a25890f3b 100644 --- a/tools/testing/selftests/sgx/call.S +++ b/tools/testing/selftests/sgx/call.S @@ -5,8 +5,8 @@ .text - .global sgx_call_vdso -sgx_call_vdso: + .global sgx_enter_enclave +sgx_enter_enclave: .cfi_startproc push %r15 .cfi_adjust_cfa_offset 8 @@ -27,7 +27,7 @@ sgx_call_vdso: .cfi_adjust_cfa_offset 8 push 0x38(%rsp) .cfi_adjust_cfa_offset 8 - call *eenter(%rip) + call *vdso_sgx_enter_enclave(%rip) add $0x10, %rsp .cfi_adjust_cfa_offset -0x10 pop %rbx diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c index d304a4044eb9..43da68388e25 100644 --- a/tools/testing/selftests/sgx/main.c +++ b/tools/testing/selftests/sgx/main.c @@ -21,7 +21,7 @@ #include "../kselftest.h" static const uint64_t MAGIC = 0x1122334455667788ULL; -vdso_sgx_enter_enclave_t eenter; +vdso_sgx_enter_enclave_t vdso_sgx_enter_enclave; struct vdso_symtab { Elf64_Sym *elf_symtab; @@ -149,7 +149,7 @@ int main(int argc, char *argv[]) { struct sgx_enclave_run run; struct vdso_symtab symtab; - Elf64_Sym *eenter_sym; + Elf64_Sym *sgx_enter_enclave_sym; uint64_t result = 0; struct encl encl; unsigned int i; @@ -194,29 +194,30 @@ int main(int argc, char *argv[]) if (!vdso_get_symtab(addr, &symtab)) goto err; - eenter_sym = vdso_symtab_get(&symtab, "__vdso_sgx_enter_enclave"); - if (!eenter_sym) + sgx_enter_enclave_sym = vdso_symtab_get(&symtab, "__vdso_sgx_enter_enclave"); + if (!sgx_enter_enclave_sym) goto err; - eenter = addr + eenter_sym->st_value; + vdso_sgx_enter_enclave = addr + sgx_enter_enclave_sym->st_value; - ret = sgx_call_vdso((void *)&MAGIC, &result, 0, EENTER, NULL, NULL, &run); - if (!report_results(&run, ret, result, "sgx_call_vdso")) + ret = sgx_enter_enclave((void *)&MAGIC, &result, 0, EENTER, + NULL, NULL, &run); + if (!report_results(&run, ret, result, "sgx_enter_enclave_unclobbered")) goto err; /* Invoke the vDSO directly. */ result = 0; - ret = eenter((unsigned long)&MAGIC, (unsigned long)&result, 0, EENTER, - 0, 0, &run); - if (!report_results(&run, ret, result, "eenter")) + ret = vdso_sgx_enter_enclave((unsigned long)&MAGIC, (unsigned long)&result, + 0, EENTER, 0, 0, &run); + if (!report_results(&run, ret, result, "sgx_enter_enclave")) goto err; /* And with an exit handler. */ run.user_handler = (__u64)user_handler; run.user_data = 0xdeadbeef; - ret = eenter((unsigned long)&MAGIC, (unsigned long)&result, 0, EENTER, - 0, 0, &run); + ret = vdso_sgx_enter_enclave((unsigned long)&MAGIC, (unsigned long)&result, + 0, EENTER, 0, 0, &run); if (!report_results(&run, ret, result, "user_handler")) goto err; diff --git a/tools/testing/selftests/sgx/main.h b/tools/testing/selftests/sgx/main.h index 67211a708f04..68672fd86cf9 100644 --- a/tools/testing/selftests/sgx/main.h +++ b/tools/testing/selftests/sgx/main.h @@ -35,7 +35,7 @@ bool encl_load(const char *path, struct encl *encl); bool encl_measure(struct encl *encl); bool encl_build(struct encl *encl); -int sgx_call_vdso(void *rdi, void *rsi, long rdx, u32 function, void *r8, void *r9, - struct sgx_enclave_run *run); +int sgx_enter_enclave(void *rdi, void *rsi, long rdx, u32 function, void *r8, void *r9, + struct sgx_enclave_run *run); #endif /* MAIN_H */ -- 2.31.1

4 years, 1 month

3
7
0 0

[PATCHv2] selftests: xfrm: put cleanup code into an exit trap

by Po-Hsu Lin

If the xfrm_policy.sh script gets terminated by any reason, the netns namespace files created by the test will be left alone. In this case a second attempt to run this test will fail with: # Cannot create namespace file "/run/netns/ns1": File exists Move the netns cleanup code into an exit trap so that we can ensure these files will be removed in the end. Changes in V2: - Update commit message and patch title. Signed-off-by: Po-Hsu Lin <po-hsu.lin(a)canonical.com> --- tools/testing/selftests/net/xfrm_policy.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh index bdf450e..bb4632b 100755 --- a/tools/testing/selftests/net/xfrm_policy.sh +++ b/tools/testing/selftests/net/xfrm_policy.sh @@ -28,6 +28,11 @@ KEY_AES=0x0123456789abcdef0123456789012345 SPI1=0x1 SPI2=0x2 +cleanup() { + for i in 1 2 3 4;do ip netns del ns$i 2>/dev/null ;done +} +trap cleanup EXIT + do_esp_policy() { local ns=$1 local me=$2 @@ -481,6 +486,4 @@ check_hthresh_repeat "policies with repeated htresh change" check_random_order ns3 "policies inserted in random order" -for i in 1 2 3 4;do ip netns del ns$i;done - exit $ret -- 2.7.4

4 years, 1 month

1
0
0 0

[PATCH resend v2 3/5] MAINTAINERS: add tools/testing/selftests/vm/ to MEMORY MANAGEMENT

by David Hildenbrand

MEMORY MANAGEMENT seems to be a good fit. Cc: Andrew Morton <akpm(a)linux-foundation.org> Cc: Michal Hocko <mhocko(a)suse.com> Cc: Oscar Salvador <osalvador(a)suse.de> Cc: Jason Gunthorpe <jgg(a)ziepe.ca> Cc: Peter Xu <peterx(a)redhat.com> Cc: Shuah Khan <shuah(a)kernel.org> Cc: linux-kselftest(a)vger.kernel.org Signed-off-by: David Hildenbrand <david(a)redhat.com> --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 9450e052f1b1..cd267d218e08 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11566,6 +11566,7 @@ F: include/linux/mm.h F: include/linux/mmzone.h F: include/linux/vmalloc.h F: mm/ +F: tools/testing/selftests/vm/ MEMORY TECHNOLOGY DEVICES (MTD) M: Miquel Raynal <miquel.raynal(a)bootlin.com> -- 2.30.2

4 years, 1 month

2
1
0 0

[PATCH v4 0/4] KVM statistics data fd-based binary interface

by Jing Zhang

This patchset provides a file descriptor for every VM and VCPU to read KVM statistics data in binary format. It is meant to provide a lightweight, flexible, scalable and efficient lock-free solution for user space telemetry applications to pull the statistics data periodically for large scale systems. The pulling frequency could be as high as a few times per second. In this patchset, every statistics data are treated to have some attributes as below: * architecture dependent or common * VM statistics data or VCPU statistics data * type: cumulative, instantaneous, * unit: none for simple counter, nanosecond, microsecond, millisecond, second, Byte, KiByte, MiByte, GiByte. Clock Cycles Since no lock/synchronization is used, the consistency between all the statistics data is not guaranteed. That means not all statistics data are read out at the exact same time, since the statistics date are still being updated by KVM subsystems while they are read out. --- * v3 -> v4 - Rebase to kvm/queue, commit 9f242010c3b4 ("KVM: avoid "deadlock" between install_new_memslots and MMU notifier") - Use C-stype comments in the whole patch - Fix wrong count for x86 VCPU stats descriptors - Fix KVM stats data size counting and validity check in selftest * v2 -> v3 - Rebase to kvm/queue, commit edf408f5257b ("KVM: avoid "deadlock" between install_new_memslots and MMU notifier") - Resolve some nitpicks about format * v1 -> v2 - Use ARRAY_SIZE to count the number of stats descriptors - Fix missing `size` field initialization in macro STATS_DESC [1] https://lore.kernel.org/kvm/20210402224359.2297157-1-jingzhangos@google.com [2] https://lore.kernel.org/kvm/20210415151741.1607806-1-jingzhangos@google.com [3] https://lore.kernel.org/kvm/20210423181727.596466-1-jingzhangos@google.com --- Jing Zhang (4): KVM: stats: Separate common stats from architecture specific ones KVM: stats: Add fd-based API to read binary stats data KVM: stats: Add documentation for statistics data binary interface KVM: selftests: Add selftest for KVM statistics data binary interface Documentation/virt/kvm/api.rst | 171 ++++++++ arch/arm64/include/asm/kvm_host.h | 9 +- arch/arm64/kvm/guest.c | 42 +- arch/mips/include/asm/kvm_host.h | 9 +- arch/mips/kvm/mips.c | 67 ++- arch/powerpc/include/asm/kvm_host.h | 9 +- arch/powerpc/kvm/book3s.c | 68 +++- arch/powerpc/kvm/book3s_hv.c | 12 +- arch/powerpc/kvm/book3s_pr.c | 2 +- arch/powerpc/kvm/book3s_pr_papr.c | 2 +- arch/powerpc/kvm/booke.c | 63 ++- arch/s390/include/asm/kvm_host.h | 9 +- arch/s390/kvm/kvm-s390.c | 133 +++++- arch/x86/include/asm/kvm_host.h | 9 +- arch/x86/kvm/x86.c | 71 +++- include/linux/kvm_host.h | 132 +++++- include/linux/kvm_types.h | 12 + include/uapi/linux/kvm.h | 50 +++ tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 3 + .../testing/selftests/kvm/include/kvm_util.h | 3 + .../selftests/kvm/kvm_bin_form_stats.c | 380 ++++++++++++++++++ tools/testing/selftests/kvm/lib/kvm_util.c | 11 + virt/kvm/kvm_main.c | 237 ++++++++++- 24 files changed, 1415 insertions(+), 90 deletions(-) create mode 100644 tools/testing/selftests/kvm/kvm_bin_form_stats.c base-commit: 9f242010c3b46e63bc62f08fff42cef992d3801b -- 2.31.1.527.g47e6f16901-goog

4 years, 1 month

4
10
0 0

[PATCH resend v2 5/5] selftests/vm: add test for MADV_POPULATE_(READ|WRITE)

by David Hildenbrand

Let's add a simple test for MADV_POPULATE_READ and MADV_POPULATE_WRITE, verifying some error handling, that population works, and that softdirty tracking works as expected. For now, limit the test to private anonymous memory. Cc: Andrew Morton <akpm(a)linux-foundation.org> Cc: Arnd Bergmann <arnd(a)arndb.de> Cc: Michal Hocko <mhocko(a)suse.com> Cc: Oscar Salvador <osalvador(a)suse.de> Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org> Cc: Andrea Arcangeli <aarcange(a)redhat.com> Cc: Minchan Kim <minchan(a)kernel.org> Cc: Jann Horn <jannh(a)google.com> Cc: Jason Gunthorpe <jgg(a)ziepe.ca> Cc: Dave Hansen <dave.hansen(a)intel.com> Cc: Hugh Dickins <hughd(a)google.com> Cc: Rik van Riel <riel(a)surriel.com> Cc: Michael S. Tsirkin <mst(a)redhat.com> Cc: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com> Cc: Vlastimil Babka <vbabka(a)suse.cz> Cc: Richard Henderson <rth(a)twiddle.net> Cc: Ivan Kokshaysky <ink(a)jurassic.park.msu.ru> Cc: Matt Turner <mattst88(a)gmail.com> Cc: Thomas Bogendoerfer <tsbogend(a)alpha.franken.de> Cc: "James E.J. Bottomley" <James.Bottomley(a)HansenPartnership.com> Cc: Helge Deller <deller(a)gmx.de> Cc: Chris Zankel <chris(a)zankel.net> Cc: Max Filippov <jcmvbkbc(a)gmail.com> Cc: Mike Kravetz <mike.kravetz(a)oracle.com> Cc: Peter Xu <peterx(a)redhat.com> Cc: Rolf Eike Beer <eike-kernel(a)sf-tec.de> Cc: Shuah Khan <shuah(a)kernel.org> Cc: linux-alpha(a)vger.kernel.org Cc: linux-mips(a)vger.kernel.org Cc: linux-parisc(a)vger.kernel.org Cc: linux-xtensa(a)linux-xtensa.org Cc: linux-arch(a)vger.kernel.org Cc: linux-kselftest(a)vger.kernel.org Cc: Linux API <linux-api(a)vger.kernel.org> Signed-off-by: David Hildenbrand <david(a)redhat.com> --- tools/testing/selftests/vm/.gitignore | 1 + tools/testing/selftests/vm/Makefile | 1 + tools/testing/selftests/vm/madv_populate.c | 342 +++++++++++++++++++++ tools/testing/selftests/vm/run_vmtests.sh | 16 + 4 files changed, 360 insertions(+) create mode 100644 tools/testing/selftests/vm/madv_populate.c diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index b4fc0148360e..c9a5dd1adf7d 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -24,3 +24,4 @@ hmm-tests local_config.* protection_keys_32 protection_keys_64 +madv_populate diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 8b0cd421ebd3..04b6650c1924 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -42,6 +42,7 @@ TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += userfaultfd +TEST_GEN_FILES += madv_populate ifeq ($(MACHINE),x86_64) CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32) diff --git a/tools/testing/selftests/vm/madv_populate.c b/tools/testing/selftests/vm/madv_populate.c new file mode 100644 index 000000000000..b959e4ebdad4 --- /dev/null +++ b/tools/testing/selftests/vm/madv_populate.c @@ -0,0 +1,342 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * MADV_POPULATE_READ and MADV_POPULATE_WRITE tests + * + * Copyright 2021, Red Hat, Inc. + * + * Author(s): David Hildenbrand <david(a)redhat.com> + */ +#define _GNU_SOURCE +#include <stdlib.h> +#include <string.h> +#include <stdbool.h> +#include <stdint.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <sys/mman.h> + +#include "../kselftest.h" + +#if defined(MADV_POPULATE_READ) && defined(MADV_POPULATE_WRITE) + +/* + * For now, we're using 2 MiB of private anonymous memory for all tests. + */ +#define SIZE (2 * 1024 * 1024) + +static size_t pagesize; + +static uint64_t pagemap_get_entry(int fd, char *start) +{ + const unsigned long pfn = (unsigned long)start / pagesize; + uint64_t entry; + int ret; + + ret = pread(fd, &entry, sizeof(entry), pfn * sizeof(entry)); + if (ret != sizeof(entry)) + ksft_exit_fail_msg("reading pagemap failed\n"); + return entry; +} + +static bool pagemap_is_populated(int fd, char *start) +{ + uint64_t entry = pagemap_get_entry(fd, start); + + /* Present or swapped. */ + return entry & 0xc000000000000000ull; +} + +static bool pagemap_is_softdirty(int fd, char *start) +{ + uint64_t entry = pagemap_get_entry(fd, start); + + return entry & 0x0080000000000000ull; +} + +static void sense_support(void) +{ + char *addr; + int ret; + + addr = mmap(0, pagesize, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + if (!addr) + ksft_exit_fail_msg("mmap failed\n"); + + ret = madvise(addr, pagesize, MADV_POPULATE_READ); + if (ret) + ksft_exit_skip("MADV_POPULATE_READ is not available\n"); + + ret = madvise(addr, pagesize, MADV_POPULATE_WRITE); + if (ret) + ksft_exit_skip("MADV_POPULATE_WRITE is not available\n"); + + munmap(addr, pagesize); +} + +static void test_prot_read(void) +{ + char *addr; + int ret; + + ksft_print_msg("[RUN] %s\n", __func__); + + addr = mmap(0, SIZE, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + if (addr == MAP_FAILED) + ksft_exit_fail_msg("mmap failed\n"); + + ret = madvise(addr, SIZE, MADV_POPULATE_READ); + ksft_test_result(!ret, "MADV_POPULATE_READ with PROT_READ\n"); + + ret = madvise(addr, SIZE, MADV_POPULATE_WRITE); + ksft_test_result(ret == -1 && errno == EINVAL, + "MADV_POPULATE_WRITE with PROT_READ\n"); + + munmap(addr, SIZE); +} + +static void test_prot_write(void) +{ + char *addr; + int ret; + + ksft_print_msg("[RUN] %s\n", __func__); + + addr = mmap(0, SIZE, PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + if (addr == MAP_FAILED) + ksft_exit_fail_msg("mmap failed\n"); + + ret = madvise(addr, SIZE, MADV_POPULATE_READ); + ksft_test_result(ret == -1 && errno == EINVAL, + "MADV_POPULATE_READ with PROT_WRITE\n"); + + ret = madvise(addr, SIZE, MADV_POPULATE_WRITE); + ksft_test_result(!ret, "MADV_POPULATE_WRITE with PROT_WRITE\n"); + + munmap(addr, SIZE); +} + +static void test_holes(void) +{ + char *addr; + int ret; + + ksft_print_msg("[RUN] %s\n", __func__); + + addr = mmap(0, SIZE, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + if (addr == MAP_FAILED) + ksft_exit_fail_msg("mmap failed\n"); + ret = munmap(addr + pagesize, pagesize); + if (ret) + ksft_exit_fail_msg("munmap failed\n"); + + /* Hole in the middle */ + ret = madvise(addr, SIZE, MADV_POPULATE_READ); + ksft_test_result(ret == -1 && errno == ENOMEM, + "MADV_POPULATE_READ with holes in the middle\n"); + ret = madvise(addr, SIZE, MADV_POPULATE_WRITE); + ksft_test_result(ret == -1 && errno == ENOMEM, + "MADV_POPULATE_WRITE with holes in the middle\n"); + + /* Hole at end */ + ret = madvise(addr, 2 * pagesize, MADV_POPULATE_READ); + ksft_test_result(ret == -1 && errno == ENOMEM, + "MADV_POPULATE_READ with holes at the end\n"); + ret = madvise(addr, 2 * pagesize, MADV_POPULATE_WRITE); + ksft_test_result(ret == -1 && errno == ENOMEM, + "MADV_POPULATE_WRITE with holes at the end\n"); + + /* Hole at beginning */ + ret = madvise(addr + pagesize, pagesize, MADV_POPULATE_READ); + ksft_test_result(ret == -1 && errno == ENOMEM, + "MADV_POPULATE_READ with holes at the beginning\n"); + ret = madvise(addr + pagesize, pagesize, MADV_POPULATE_WRITE); + ksft_test_result(ret == -1 && errno == ENOMEM, + "MADV_POPULATE_WRITE with holes at the beginning\n"); + + munmap(addr, SIZE); +} + +static bool range_is_populated(char *start, ssize_t size) +{ + int fd = open("/proc/self/pagemap", O_RDONLY); + bool ret = true; + + if (fd < 0) + ksft_exit_fail_msg("opening pagemap failed\n"); + for (; size > 0 && ret; size -= pagesize, start += pagesize) + if (!pagemap_is_populated(fd, start)) + ret = false; + close(fd); + return ret; +} + +static bool range_is_not_populated(char *start, ssize_t size) +{ + int fd = open("/proc/self/pagemap", O_RDONLY); + bool ret = true; + + if (fd < 0) + ksft_exit_fail_msg("opening pagemap failed\n"); + for (; size > 0 && ret; size -= pagesize, start += pagesize) + if (pagemap_is_populated(fd, start)) + ret = false; + close(fd); + return ret; +} + +static void test_populate_read(void) +{ + char *addr; + int ret; + + ksft_print_msg("[RUN] %s\n", __func__); + + addr = mmap(0, SIZE, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + if (addr == MAP_FAILED) + ksft_exit_fail_msg("mmap failed\n"); + ksft_test_result(range_is_not_populated(addr, SIZE), + "range initially not populated\n"); + + ret = madvise(addr, SIZE, MADV_POPULATE_READ); + ksft_test_result(!ret, "MADV_POPULATE_READ\n"); + ksft_test_result(range_is_populated(addr, SIZE), + "range is populated\n"); + + munmap(addr, SIZE); +} + +static void test_populate_write(void) +{ + char *addr; + int ret; + + ksft_print_msg("[RUN] %s\n", __func__); + + addr = mmap(0, SIZE, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + if (addr == MAP_FAILED) + ksft_exit_fail_msg("mmap failed\n"); + ksft_test_result(range_is_not_populated(addr, SIZE), + "range initially not populated\n"); + + ret = madvise(addr, SIZE, MADV_POPULATE_WRITE); + ksft_test_result(!ret, "MADV_POPULATE_WRITE\n"); + ksft_test_result(range_is_populated(addr, SIZE), + "range is populated\n"); + + munmap(addr, SIZE); +} + +static bool range_is_softdirty(char *start, ssize_t size) +{ + int fd = open("/proc/self/pagemap", O_RDONLY); + bool ret = true; + + if (fd < 0) + ksft_exit_fail_msg("opening pagemap failed\n"); + for (; size > 0 && ret; size -= pagesize, start += pagesize) + if (!pagemap_is_softdirty(fd, start)) + ret = false; + close(fd); + return ret; +} + +static bool range_is_not_softdirty(char *start, ssize_t size) +{ + int fd = open("/proc/self/pagemap", O_RDONLY); + bool ret = true; + + if (fd < 0) + ksft_exit_fail_msg("opening pagemap failed\n"); + for (; size > 0 && ret; size -= pagesize, start += pagesize) + if (pagemap_is_softdirty(fd, start)) + ret = false; + close(fd); + return ret; +} + +static void clear_softdirty(void) +{ + int fd = open("/proc/self/clear_refs", O_WRONLY); + const char *ctrl = "4"; + int ret; + + if (fd < 0) + ksft_exit_fail_msg("opening clear_refs failed\n"); + ret = write(fd, ctrl, strlen(ctrl)); + if (ret != strlen(ctrl)) + ksft_exit_fail_msg("writing clear_refs failed\n"); + close(fd); +} + +static void test_softdirty(void) +{ + char *addr; + int ret; + + ksft_print_msg("[RUN] %s\n", __func__); + + addr = mmap(0, SIZE, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + if (addr == MAP_FAILED) + ksft_exit_fail_msg("mmap failed\n"); + + /* Clear any softdirty bits. */ + clear_softdirty(); + ksft_test_result(range_is_not_softdirty(addr, SIZE), + "range is not softdirty\n"); + + /* Populating READ should set softdirty. */ + ret = madvise(addr, SIZE, MADV_POPULATE_READ); + ksft_test_result(!ret, "MADV_POPULATE_READ\n"); + ksft_test_result(range_is_not_softdirty(addr, SIZE), + "range is not softdirty\n"); + + /* Populating WRITE should set softdirty. */ + ret = madvise(addr, SIZE, MADV_POPULATE_WRITE); + ksft_test_result(!ret, "MADV_POPULATE_WRITE\n"); + ksft_test_result(range_is_softdirty(addr, SIZE), + "range is softdirty\n"); + + munmap(addr, SIZE); +} + +int main(int argc, char **argv) +{ + int err; + + pagesize = getpagesize(); + + ksft_print_header(); + ksft_set_plan(21); + + sense_support(); + test_prot_read(); + test_prot_write(); + test_holes(); + test_populate_read(); + test_populate_write(); + test_softdirty(); + + err = ksft_get_fail_cnt(); + if (err) + ksft_exit_fail_msg("%d out of %d tests failed\n", + err, ksft_test_num()); + return ksft_exit_pass(); +} + +#else /* defined(MADV_POPULATE_READ) && defined(MADV_POPULATE_WRITE) */ + +#warning "missing MADV_POPULATE_READ or MADV_POPULATE_WRITE definition" + +int main(int argc, char **argv) +{ + ksft_print_header(); + ksft_exit_skip("MADV_POPULATE_READ or MADV_POPULATE_WRITE not defined\n"); +} + +#endif /* defined(MADV_POPULATE_READ) && defined(MADV_POPULATE_WRITE) */ diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh index e953f3cd9664..955782d138ab 100755 --- a/tools/testing/selftests/vm/run_vmtests.sh +++ b/tools/testing/selftests/vm/run_vmtests.sh @@ -346,4 +346,20 @@ else exitcode=1 fi +echo "--------------------------------------------------------" +echo "running MADV_POPULATE_READ and MADV_POPULATE_WRITE tests" +echo "--------------------------------------------------------" +./madv_populate +ret_val=$? + +if [ $ret_val -eq 0 ]; then + echo "[PASS]" +elif [ $ret_val -eq $ksft_skip ]; then + echo "[SKIP]" + exitcode=$ksft_skip +else + echo "[FAIL]" + exitcode=1 +fi + exit $exitcode -- 2.30.2

4 years, 1 month

1
0
0 0

[PATCH resend v2 4/5] selftests/vm: add protection_keys_32 / protection_keys_64 to gitignore

by David Hildenbrand

We missed to add two binaries to gitignore. Cc: Andrew Morton <akpm(a)linux-foundation.org> Cc: Michal Hocko <mhocko(a)suse.com> Cc: Oscar Salvador <osalvador(a)suse.de> Cc: Jason Gunthorpe <jgg(a)ziepe.ca> Cc: Peter Xu <peterx(a)redhat.com> Cc: Ram Pai <linuxram(a)us.ibm.com> Cc: Shuah Khan <shuah(a)kernel.org> Cc: linux-kselftest(a)vger.kernel.org Signed-off-by: David Hildenbrand <david(a)redhat.com> --- tools/testing/selftests/vm/.gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index 9a35c3f6a557..b4fc0148360e 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -22,3 +22,5 @@ map_fixed_noreplace write_to_hugetlbfs hmm-tests local_config.* +protection_keys_32 +protection_keys_64 -- 2.30.2

4 years, 1 month

1
0
0 0

[PATCH v18 0/9] mm: introduce memfd_secret system call to create "secret" memory areas

by Mike Rapoport

From: Mike Rapoport <rppt(a)linux.ibm.com> Hi, @Andrew, this is based on v5.12-rc1, I can rebase whatever way you prefer. This is an implementation of "secret" mappings backed by a file descriptor. The file descriptor backing secret memory mappings is created using a dedicated memfd_secret system call The desired protection mode for the memory is configured using flags parameter of the system call. The mmap() of the file descriptor created with memfd_secret() will create a "secret" memory mapping. The pages in that mapping will be marked as not present in the direct map and will be present only in the page table of the owning mm. Although normally Linux userspace mappings are protected from other users, such secret mappings are useful for environments where a hostile tenant is trying to trick the kernel into giving them access to other tenants mappings. Additionally, in the future the secret mappings may be used as a mean to protect guest memory in a virtual machine host. For demonstration of secret memory usage we've created a userspace library https://git.kernel.org/pub/scm/linux/kernel/git/jejb/secret-memory-preloade… that does two things: the first is act as a preloader for openssl to redirect all the OPENSSL_malloc calls to secret memory meaning any secret keys get automatically protected this way and the other thing it does is expose the API to the user who needs it. We anticipate that a lot of the use cases would be like the openssl one: many toolkits that deal with secret keys already have special handling for the memory to try to give them greater protection, so this would simply be pluggable into the toolkits without any need for user application modification. Hiding secret memory mappings behind an anonymous file allows usage of the page cache for tracking pages allocated for the "secret" mappings as well as using address_space_operations for e.g. page migration callbacks. The anonymous file may be also used implicitly, like hugetlb files, to implement mmap(MAP_SECRET) and use the secret memory areas with "native" mm ABIs in the future. Removing of the pages from the direct map may cause its fragmentation on architectures that use large pages to map the physical memory which affects the system performance. However, the original Kconfig text for CONFIG_DIRECT_GBPAGES said that gigabyte pages in the direct map "... can improve the kernel's performance a tiny bit ..." (commit 00d1c5e05736 ("x86: add gbpages switches")) and the recent report [1] showed that "... although 1G mappings are a good default choice, there is no compelling evidence that it must be the only choice". Hence, it is sufficient to have secretmem disabled by default with the ability of a system administrator to enable it at boot time. In addition, there is also a long term goal to improve management of the direct map. [1] https://lore.kernel.org/linux-mm/213b4567-46ce-f116-9cdf-bbd0c884eb3c@linux… v18: * rebase on v5.12-rc1 * merge kfence fix into the original patch * massage commit message of the patch introducing the memfd_secret syscall v17: https://lore.kernel.org/lkml/20210208084920.2884-1-rppt@kernel.org * Remove pool of large pages backing secretmem allocations, per Michal Hocko * Add secretmem pages to unevictable LRU, per Michal Hocko * Use GFP_HIGHUSER as secretmem mapping mask, per Michal Hocko * Make secretmem an opt-in feature that is disabled by default v16: https://lore.kernel.org/lkml/20210121122723.3446-1-rppt@kernel.org * Fix memory leak intorduced in v15 * Clean the data left from previous page user before handing the page to the userspace v15: https://lore.kernel.org/lkml/20210120180612.1058-1-rppt@kernel.org * Add riscv/Kconfig update to disable set_memory operations for nommu builds (patch 3) * Update the code around add_to_page_cache() per Matthew's comments (patches 6,7) * Add fixups for build/checkpatch errors discovered by CI systems v14: https://lore.kernel.org/lkml/20201203062949.5484-1-rppt@kernel.org * Finally s/mod_node_page_state/mod_lruvec_page_state/ v13: https://lore.kernel.org/lkml/20201201074559.27742-1-rppt@kernel.org * Added Reviewed-by, thanks Catalin and David * s/mod_node_page_state/mod_lruvec_page_state/ as Shakeel suggested Older history: v12: https://lore.kernel.org/lkml/20201125092208.12544-1-rppt@kernel.org v11: https://lore.kernel.org/lkml/20201124092556.12009-1-rppt@kernel.org v10: https://lore.kernel.org/lkml/20201123095432.5860-1-rppt@kernel.org v9: https://lore.kernel.org/lkml/20201117162932.13649-1-rppt@kernel.org v8: https://lore.kernel.org/lkml/20201110151444.20662-1-rppt@kernel.org v7: https://lore.kernel.org/lkml/20201026083752.13267-1-rppt@kernel.org v6: https://lore.kernel.org/lkml/20200924132904.1391-1-rppt@kernel.org v5: https://lore.kernel.org/lkml/20200916073539.3552-1-rppt@kernel.org v4: https://lore.kernel.org/lkml/20200818141554.13945-1-rppt@kernel.org v3: https://lore.kernel.org/lkml/20200804095035.18778-1-rppt@kernel.org v2: https://lore.kernel.org/lkml/20200727162935.31714-1-rppt@kernel.org v1: https://lore.kernel.org/lkml/20200720092435.17469-1-rppt@kernel.org rfc-v2: https://lore.kernel.org/lkml/20200706172051.19465-1-rppt@kernel.org/ rfc-v1: https://lore.kernel.org/lkml/20200130162340.GA14232@rapoport-lnx/ rfc-v0: https://lore.kernel.org/lkml/1572171452-7958-1-git-send-email-rppt@kernel.o… Mike Rapoport (9): mm: add definition of PMD_PAGE_ORDER mmap: make mlock_future_check() global riscv/Kconfig: make direct map manipulation options depend on MMU set_memory: allow set_direct_map_*_noflush() for multiple pages set_memory: allow querying whether set_direct_map_*() is actually enabled mm: introduce memfd_secret system call to create "secret" memory areas PM: hibernate: disable when there are active secretmem users arch, mm: wire up memfd_secret system call where relevant secretmem: test: add basic selftest for memfd_secret(2) arch/arm64/include/asm/Kbuild | 1 - arch/arm64/include/asm/cacheflush.h | 6 - arch/arm64/include/asm/kfence.h | 2 +- arch/arm64/include/asm/set_memory.h | 17 ++ arch/arm64/include/uapi/asm/unistd.h | 1 + arch/arm64/kernel/machine_kexec.c | 1 + arch/arm64/mm/mmu.c | 6 +- arch/arm64/mm/pageattr.c | 23 +- arch/riscv/Kconfig | 4 +- arch/riscv/include/asm/set_memory.h | 4 +- arch/riscv/include/asm/unistd.h | 1 + arch/riscv/mm/pageattr.c | 8 +- arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + arch/x86/include/asm/set_memory.h | 4 +- arch/x86/mm/pat/set_memory.c | 8 +- fs/dax.c | 11 +- include/linux/pgtable.h | 3 + include/linux/secretmem.h | 30 +++ include/linux/set_memory.h | 16 +- include/linux/syscalls.h | 1 + include/uapi/asm-generic/unistd.h | 6 +- include/uapi/linux/magic.h | 1 + kernel/power/hibernate.c | 5 +- kernel/power/snapshot.c | 4 +- kernel/sys_ni.c | 2 + mm/Kconfig | 3 + mm/Makefile | 1 + mm/gup.c | 10 + mm/internal.h | 3 + mm/mlock.c | 3 +- mm/mmap.c | 5 +- mm/secretmem.c | 261 +++++++++++++++++++ mm/vmalloc.c | 5 +- scripts/checksyscalls.sh | 4 + tools/testing/selftests/vm/.gitignore | 1 + tools/testing/selftests/vm/Makefile | 3 +- tools/testing/selftests/vm/memfd_secret.c | 296 ++++++++++++++++++++++ tools/testing/selftests/vm/run_vmtests.sh | 17 ++ 39 files changed, 726 insertions(+), 53 deletions(-) create mode 100644 arch/arm64/include/asm/set_memory.h create mode 100644 include/linux/secretmem.h create mode 100644 mm/secretmem.c create mode 100644 tools/testing/selftests/vm/memfd_secret.c -- 2.28.0

4 years, 1 month

6
20
0 0

[PATCH] kselftest/arm64: Add missing stddef.h include to BTI tests

by Mark Brown

Explicitly include stddef.h when building the BTI tests so that we have a definition of NULL, with at least some toolchains this is not done implicitly by anything else: test.c: In function ‘start’: test.c:214:25: error: ‘NULL’ undeclared (first use in this function) 214 | sigaction(SIGILL, &sa, NULL); | ^~~~ test.c:20:1: note: ‘NULL’ is defined in header ‘<stddef.h>’; did you forget to ‘#include <stddef.h>’? Signed-off-by: Mark Brown <broonie(a)kernel.org> --- tools/testing/selftests/arm64/bti/test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/arm64/bti/test.c b/tools/testing/selftests/arm64/bti/test.c index 656b04976ccc..67b77ab83c20 100644 --- a/tools/testing/selftests/arm64/bti/test.c +++ b/tools/testing/selftests/arm64/bti/test.c @@ -6,6 +6,7 @@ #include "system.h" +#include <stddef.h> #include <linux/errno.h> #include <linux/auxvec.h> #include <linux/signal.h> -- 2.20.1

4 years, 1 month

2
1
0 0

[PATCH 1/1] mnt: Delete two unneeded bool conversions

by Zhen Lei

The result of an expression consisting of a single relational operator is already of the bool type and does not need to be evaluated explicitly. No functional change. Signed-off-by: Zhen Lei <thunder.leizhen(a)huawei.com> --- tools/testing/selftests/mount/unprivileged-remount-test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/mount/unprivileged-remount-test.c b/tools/testing/selftests/mount/unprivileged-remount-test.c index 584dc6bc3b06679..d2917054fe3ae56 100644 --- a/tools/testing/selftests/mount/unprivileged-remount-test.c +++ b/tools/testing/selftests/mount/unprivileged-remount-test.c @@ -204,7 +204,7 @@ bool test_unpriv_remount(const char *fstype, const char *mount_options, if (!WIFEXITED(status)) { die("child did not terminate cleanly\n"); } - return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false; + return WEXITSTATUS(status) == EXIT_SUCCESS; } create_and_enter_userns(); @@ -282,7 +282,7 @@ static bool test_priv_mount_unpriv_remount(void) if (!WIFEXITED(status)) { die("child did not terminate cleanly\n"); } - return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false; + return WEXITSTATUS(status) == EXIT_SUCCESS; } orig_mnt_flags = read_mnt_flags(orig_path); -- 2.26.0.106.g9fadedd

4 years, 1 month

1
0
0 0

[PATCH 1/1] selftests: timers: Remove several unneeded semicolons

by Zhen Lei

The semicolon immediately following '}' is unneeded. Signed-off-by: Zhen Lei <thunder.leizhen(a)huawei.com> --- tools/testing/selftests/timers/alarmtimer-suspend.c | 2 +- tools/testing/selftests/timers/inconsistency-check.c | 2 +- tools/testing/selftests/timers/nanosleep.c | 2 +- tools/testing/selftests/timers/nsleep-lat.c | 2 +- tools/testing/selftests/timers/set-timer-lat.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/timers/alarmtimer-suspend.c b/tools/testing/selftests/timers/alarmtimer-suspend.c index 4da09dbf83ba8e1..54da4b088f4c898 100644 --- a/tools/testing/selftests/timers/alarmtimer-suspend.c +++ b/tools/testing/selftests/timers/alarmtimer-suspend.c @@ -79,7 +79,7 @@ char *clockstring(int clockid) return "CLOCK_BOOTTIME_ALARM"; case CLOCK_TAI: return "CLOCK_TAI"; - }; + } return "UNKNOWN_CLOCKID"; } diff --git a/tools/testing/selftests/timers/inconsistency-check.c b/tools/testing/selftests/timers/inconsistency-check.c index 022d3ffe3fbffbd..e6756d9c60a7e17 100644 --- a/tools/testing/selftests/timers/inconsistency-check.c +++ b/tools/testing/selftests/timers/inconsistency-check.c @@ -72,7 +72,7 @@ char *clockstring(int clockid) return "CLOCK_BOOTTIME_ALARM"; case CLOCK_TAI: return "CLOCK_TAI"; - }; + } return "UNKNOWN_CLOCKID"; } diff --git a/tools/testing/selftests/timers/nanosleep.c b/tools/testing/selftests/timers/nanosleep.c index 71b5441c2fd9f6f..433a09676aeb8cd 100644 --- a/tools/testing/selftests/timers/nanosleep.c +++ b/tools/testing/selftests/timers/nanosleep.c @@ -72,7 +72,7 @@ char *clockstring(int clockid) return "CLOCK_BOOTTIME_ALARM"; case CLOCK_TAI: return "CLOCK_TAI"; - }; + } return "UNKNOWN_CLOCKID"; } diff --git a/tools/testing/selftests/timers/nsleep-lat.c b/tools/testing/selftests/timers/nsleep-lat.c index eb3e79ed7b4a9eb..a7ca9825e106a68 100644 --- a/tools/testing/selftests/timers/nsleep-lat.c +++ b/tools/testing/selftests/timers/nsleep-lat.c @@ -72,7 +72,7 @@ char *clockstring(int clockid) return "CLOCK_BOOTTIME_ALARM"; case CLOCK_TAI: return "CLOCK_TAI"; - }; + } return "UNKNOWN_CLOCKID"; } diff --git a/tools/testing/selftests/timers/set-timer-lat.c b/tools/testing/selftests/timers/set-timer-lat.c index 50da45437daab0b..d60bbcad487fd11 100644 --- a/tools/testing/selftests/timers/set-timer-lat.c +++ b/tools/testing/selftests/timers/set-timer-lat.c @@ -80,7 +80,7 @@ char *clockstring(int clockid) return "CLOCK_BOOTTIME_ALARM"; case CLOCK_TAI: return "CLOCK_TAI"; - }; + } return "UNKNOWN_CLOCKID"; } -- 2.26.0.106.g9fadedd

4 years, 1 month

1
0
0 0

[PATCH 1/2] kunit: Do not typecheck binary assertions

by David Gow

The use of typecheck() in KUNIT_EXPECT_EQ() and friends is causing more problems than I think it's worth. Things like enums need to have their values explicitly cast, and literals all need to be very precisely typed for the code to compile. While typechecking does have its uses, the additional overhead of having lots of needless casts -- combined with the awkward error messages which don't mention which types are involved -- makes tests less readable and more difficult to write. By removing the typecheck() call, the two arguments still need to be of compatible types, but don't need to be of exactly the same time, which seems a less confusing and more useful compromise. Signed-off-by: David Gow <davidgow(a)google.com> --- I appreciate that this is probably a bit controversial (and, indeed, I was a bit hesitant about sending it out myself), but after sitting on it for a few days, I still think this is probably an improvement overall. The second patch does fix what I think is an actual bug, though, so even if this isn't determined to be a good idea, it (or some equivalent) should probably go through. Cheers, -- David include/kunit/test.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/kunit/test.h b/include/kunit/test.h index 49601c4b98b8..4c56ffcb7403 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -775,7 +775,6 @@ void kunit_do_assertion(struct kunit *test, do { \ typeof(left) __left = (left); \ typeof(right) __right = (right); \ - ((void)__typecheck(__left, __right)); \ \ KUNIT_ASSERTION(test, \ __left op __right, \ -- 2.31.1.607.g51e8a6a459-goog

4 years, 1 month

3
6
0 0

[PATCH v3 0/2] secretmem: optimize page_is_secretmem()

by Mike Rapoport

From: Mike Rapoport <rppt(a)linux.ibm.com> Hi, This is an updated version of page_is_secretmem() changes. This is based on v5.12-rc7-mmots-2021-04-15-16-28. @Andrew, please let me know if you'd like me to rebase it differently or resend the entire set. v3: * add missing put_compound_head() if we are to return NULL from gup_page_range(), thanks David. * add unlikely() to test for page_is_secretmem. v2: * move the check for secretmem page in gup_pte_range after we get a reference to the page, per Matthew. Mike Rapoport (2): secretmem/gup: don't check if page is secretmem without reference secretmem: optimize page_is_secretmem() include/linux/secretmem.h | 26 +++++++++++++++++++++++++- mm/gup.c | 6 +++--- mm/secretmem.c | 12 +----------- 3 files changed, 29 insertions(+), 15 deletions(-) -- 2.28.0 Mike Rapoport (2): secretmem/gup: don't check if page is secretmem without reference secretmem: optimize page_is_secretmem() include/linux/secretmem.h | 26 +++++++++++++++++++++++++- mm/gup.c | 8 +++++--- mm/secretmem.c | 12 +----------- 3 files changed, 31 insertions(+), 15 deletions(-) -- 2.28.0

4 years, 1 month

3
4
0 0

[PATCH v3 1/2] selftests/sgx: Rename 'eenter' and 'sgx_call_vdso'

by Jarkko Sakkinen

Rename symbols for better clarity: * 'eenter' -> 'vdso_sgx_enter_enclave' * 'sgx_call_vdso' -> 'sgx_enter_enclave' Signed-off-by: Jarkko Sakkinen <jarkko(a)kernel.org> --- v2: Refined thh renames just a bit. tools/testing/selftests/sgx/call.S | 6 +++--- tools/testing/selftests/sgx/main.c | 25 +++++++++++++------------ tools/testing/selftests/sgx/main.h | 4 ++-- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/sgx/call.S b/tools/testing/selftests/sgx/call.S index 4ecadc7490f4..b09a25890f3b 100644 --- a/tools/testing/selftests/sgx/call.S +++ b/tools/testing/selftests/sgx/call.S @@ -5,8 +5,8 @@ .text - .global sgx_call_vdso -sgx_call_vdso: + .global sgx_enter_enclave +sgx_enter_enclave: .cfi_startproc push %r15 .cfi_adjust_cfa_offset 8 @@ -27,7 +27,7 @@ sgx_call_vdso: .cfi_adjust_cfa_offset 8 push 0x38(%rsp) .cfi_adjust_cfa_offset 8 - call *eenter(%rip) + call *vdso_sgx_enter_enclave(%rip) add $0x10, %rsp .cfi_adjust_cfa_offset -0x10 pop %rbx diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c index d304a4044eb9..43da68388e25 100644 --- a/tools/testing/selftests/sgx/main.c +++ b/tools/testing/selftests/sgx/main.c @@ -21,7 +21,7 @@ #include "../kselftest.h" static const uint64_t MAGIC = 0x1122334455667788ULL; -vdso_sgx_enter_enclave_t eenter; +vdso_sgx_enter_enclave_t vdso_sgx_enter_enclave; struct vdso_symtab { Elf64_Sym *elf_symtab; @@ -149,7 +149,7 @@ int main(int argc, char *argv[]) { struct sgx_enclave_run run; struct vdso_symtab symtab; - Elf64_Sym *eenter_sym; + Elf64_Sym *sgx_enter_enclave_sym; uint64_t result = 0; struct encl encl; unsigned int i; @@ -194,29 +194,30 @@ int main(int argc, char *argv[]) if (!vdso_get_symtab(addr, &symtab)) goto err; - eenter_sym = vdso_symtab_get(&symtab, "__vdso_sgx_enter_enclave"); - if (!eenter_sym) + sgx_enter_enclave_sym = vdso_symtab_get(&symtab, "__vdso_sgx_enter_enclave"); + if (!sgx_enter_enclave_sym) goto err; - eenter = addr + eenter_sym->st_value; + vdso_sgx_enter_enclave = addr + sgx_enter_enclave_sym->st_value; - ret = sgx_call_vdso((void *)&MAGIC, &result, 0, EENTER, NULL, NULL, &run); - if (!report_results(&run, ret, result, "sgx_call_vdso")) + ret = sgx_enter_enclave((void *)&MAGIC, &result, 0, EENTER, + NULL, NULL, &run); + if (!report_results(&run, ret, result, "sgx_enter_enclave_unclobbered")) goto err; /* Invoke the vDSO directly. */ result = 0; - ret = eenter((unsigned long)&MAGIC, (unsigned long)&result, 0, EENTER, - 0, 0, &run); - if (!report_results(&run, ret, result, "eenter")) + ret = vdso_sgx_enter_enclave((unsigned long)&MAGIC, (unsigned long)&result, + 0, EENTER, 0, 0, &run); + if (!report_results(&run, ret, result, "sgx_enter_enclave")) goto err; /* And with an exit handler. */ run.user_handler = (__u64)user_handler; run.user_data = 0xdeadbeef; - ret = eenter((unsigned long)&MAGIC, (unsigned long)&result, 0, EENTER, - 0, 0, &run); + ret = vdso_sgx_enter_enclave((unsigned long)&MAGIC, (unsigned long)&result, + 0, EENTER, 0, 0, &run); if (!report_results(&run, ret, result, "user_handler")) goto err; diff --git a/tools/testing/selftests/sgx/main.h b/tools/testing/selftests/sgx/main.h index 67211a708f04..68672fd86cf9 100644 --- a/tools/testing/selftests/sgx/main.h +++ b/tools/testing/selftests/sgx/main.h @@ -35,7 +35,7 @@ bool encl_load(const char *path, struct encl *encl); bool encl_measure(struct encl *encl); bool encl_build(struct encl *encl); -int sgx_call_vdso(void *rdi, void *rsi, long rdx, u32 function, void *r8, void *r9, - struct sgx_enclave_run *run); +int sgx_enter_enclave(void *rdi, void *rsi, long rdx, u32 function, void *r8, void *r9, + struct sgx_enclave_run *run); #endif /* MAIN_H */ -- 2.31.1

4 years, 1 month

1
1
0 0

[PATCH v6 00/10] userfaultfd: add minor fault handling for shmem

by Axel Rasmussen

Base ==== This series is based on (and therefore should apply cleanly to) the tag "v5.12-rc8-mmots-2021-04-21-23-08", with the following applied first: 1. Peter's selftest cleanup series: https://lore.kernel.org/patchwork/cover/1412450/ 2. My patch to fix a pre-existing BUG_ON in an edge case: https://lore.kernel.org/patchwork/patch/1419758/ Changelog ========= v5->v6: - Picked up {Reviewed,Acked}-by's. - Rebased onto v5.12-rc8-mmots-2021-04-21-23-08. - Put mistakenly removed delete_from_page_cache() back in the error path in shmem_mfill_atomic_pte(). [Hugh] - Keep shmem_mfill_atomic_pte() naming, instead of shmem_mcopy_... Likewise, rename our new helper to mfill_atomic_install_pte(). [Hugh] - Return directly instead of "goto out" in shmem_mfill_atomic_pte(), saving a couple of lines. [Peter] v4->v5: - Picked up {Reviewed,Acked}-by's. - Fix cleanup in error path in shmem_mcopy_atomic_pte(). [Hugh, Peter] - Mention switching to lru_cache_add() in the commit message of 9/10. [Hugh] - Split + reorder commits, so now we 1) implement the faulting path, 2) implement the CONTINUE ioctl, and 3) advertise the feature. Squash the documentation update into step (3). [Hugh, Peter] - Reorder install_pte() cleanup to come before selftest changes. [Hugh] v3->v4: - Fix handling of the shmem private mcopy case. Previously, I had (incorrectly) assumed that !vma_is_anonymous() was equivalent to "the page will be in the page cache". But, in this case we have an optimization where we allocate a new *anonymous* page. So, use a new "bool page_in_cache" instead, which checks if page->mapping is set. Correct several places with this new check. [Hugh] - Fix calling mm_counter() before page_add_..._rmap(). [Hugh] - When modifying shmem_mcopy_atomic_pte() to use the new install_pte() helper, just use lru_cache_add_inactive_or_unevictable(), no need to branch and maybe use lru_cache_add(). [Hugh] - De-pluralize mcopy_atomic_install_pte(s). [Hugh] - Make "writable" a bool, and initialize consistently. [Hugh] v2->v3: - Picked up {Reviewed,Acked}-by's. - Reorder commits: introduce CONTINUE before MINOR registration. [Hugh, Peter] - Don't try to {unlock,put}_page an xarray value in shmem_getpage_gfp. [Hugh] - Move enum mcopy_atomic_mode forward declare out of CONFIG_HUGETLB_PAGE. [Hugh] - Keep mistakenly removed UFFD_USER_MODE_ONLY in selftest. [Peter] - Cleanup context management in self test (make clear implicit, remove unneeded return values now that we have err()). [Peter] - Correct dst_pte argument to dst_pmd in shmem_mcopy_atomic_pte macro. [Hugh] - Mention the new shmem support feature in documentation. [Hugh] v1->v2: - Pick up Reviewed-by's. - Don't swapin page when a minor fault occurs. Notice that it needs to be swapped in, and just immediately fire the minor fault. Let a future CONTINUE deal with swapping in the page. [Peter] - Clarify comment about i_size checks in mm/userfaultfd.c. [Peter] - Only forward declare once (out of #ifdef) in hugetlb.h. [Peter] Changes since [2]: - Squash the fixes ([2]) in with the original series ([1]). This makes reviewing easier, as we no longer have to sift through deltas undoing what we had done before. [Hugh, Peter] - Modify shmem_mcopy_atomic_pte() to use the new mcopy_atomic_install_ptes() helper, reducing code duplication. [Hugh] - Properly trigger handle_userfault() in the shmem_swapin_page() case. [Hugh] - Use shmem_getpage() instead of find_lock_page() to lookup the existing page in for continue. This properly deals with swapped-out pages. [Hugh] - Unconditionally pte_mkdirty() for anon memory (as before). [Peter] - Don't include userfaultfd_k.h in either hugetlb.h or shmem_fs.h. [Hugh] - Add comment for UFFD_FEATURE_MINOR_SHMEM (to match _HUGETLBFS). [Hugh] - Fix some small cleanup issues (parens, reworded conditionals, reduced plumbing of some parameters, simplify labels/gotos, ...). [Hugh, Peter] Overview ======== See the series which added minor faults for hugetlbfs [3] for a detailed overview of minor fault handling in general. This series adds the same support for shmem-backed areas. This series is structured as follows: - Commits 1 and 2 are cleanups. - Commits 3 and 4 implement the new feature (minor fault handling for shmem). - Commit 5 advertises that the feature is now available since at this point it's fully implemented. - Commit 6 is a final cleanup, modifying an existing code path to re-use a new helper we've introduced. - Commits 7, 8, 9, 10 update the userfaultfd selftest to exercise the feature. Use Case ======== In some cases it is useful to have VM memory backed by tmpfs instead of hugetlbfs. So, this feature will be used to support the same VM live migration use case described in my original series. Additionally, Android folks (Lokesh Gidra <lokeshgidra(a)google.com>) hope to optimize the Android Runtime garbage collector using this feature: "The plan is to use userfaultfd for concurrently compacting the heap. With this feature, the heap can be shared-mapped at another location where the GC-thread(s) could continue the compaction operation without the need to invoke userfault ioctl(UFFDIO_COPY) each time. OTOH, if and when Java threads get faults on the heap, UFFDIO_CONTINUE can be used to resume execution. Furthermore, this feature enables updating references in the 'non-moving' portion of the heap efficiently. Without this feature, uneccessary page copying (ioctl(UFFDIO_COPY)) would be required." [1] https://lore.kernel.org/patchwork/cover/1388144/ [2] https://lore.kernel.org/patchwork/patch/1408161/ [3] https://lore.kernel.org/linux-fsdevel/20210301222728.176417-1-axelrasmussen… Axel Rasmussen (10): userfaultfd/hugetlbfs: avoid including userfaultfd_k.h in hugetlb.h userfaultfd/shmem: combine shmem_{mcopy_atomic,mfill_zeropage}_pte userfaultfd/shmem: support minor fault registration for shmem userfaultfd/shmem: support UFFDIO_CONTINUE for shmem userfaultfd/shmem: advertise shmem minor fault support userfaultfd/shmem: modify shmem_mfill_atomic_pte to use install_pte() userfaultfd/selftests: use memfd_create for shmem test type userfaultfd/selftests: create alias mappings in the shmem test userfaultfd/selftests: reinitialize test context in each test userfaultfd/selftests: exercise minor fault handling shmem support Documentation/admin-guide/mm/userfaultfd.rst | 3 +- fs/userfaultfd.c | 6 +- include/linux/hugetlb.h | 2 +- include/linux/shmem_fs.h | 19 +- include/linux/userfaultfd_k.h | 5 + include/uapi/linux/userfaultfd.h | 7 +- mm/hugetlb.c | 1 + mm/memory.c | 8 +- mm/shmem.c | 120 +++----- mm/userfaultfd.c | 175 ++++++++---- tools/testing/selftests/vm/userfaultfd.c | 274 ++++++++++++------- 11 files changed, 364 insertions(+), 256 deletions(-) -- 2.31.1.527.g47e6f16901-goog

4 years, 1 month

3
12
0 0

[PATCH v3 00/13] Add futex2 syscalls

by André Almeida

Hi, This patch series introduces the futex2 syscalls. * What happened to the current futex()? For some years now, developers have been trying to add new features to futex, but maintainers have been reluctant to accept then, given the multiplexed interface full of legacy features and tricky to do big changes. Some problems that people tried to address with patchsets are: NUMA-awareness[0], smaller sized futexes[1], wait on multiple futexes[2]. NUMA, for instance, just doesn't fit the current API in a reasonable way. Considering that, it's not possible to merge new features into the current futex. ** The NUMA problem At the current implementation, all futex kernel side infrastructure is stored on a single node. Given that, all futex() calls issued by processors that aren't located on that node will have a memory access penalty when doing it. ** The 32bit sized futex problem Embedded systems or anything with memory constrains would benefit of using smaller sizes for the futex userspace integer. Also, a mutex implementation can be done using just three values, so 8 bits is enough for various scenarios. ** The wait on multiple problem The use case lies in the Wine implementation of the Windows NT interface WaitMultipleObjects. This Windows API function allows a thread to sleep waiting on the first of a set of event sources (mutexes, timers, signal, console input, etc) to signal. Considering this is a primitive synchronization operation for Windows applications, being able to quickly signal events on the producer side, and quickly go to sleep on the consumer side is essential for good performance of those running over Wine. [0] https://lore.kernel.org/lkml/20160505204230.932454245@linutronix.de/ [1] https://lore.kernel.org/lkml/20191221155659.3159-2-malteskarupke@web.de/ [2] https://lore.kernel.org/lkml/20200213214525.183689-1-andrealmeid@collabora.… * The solution As proposed by Peter Zijlstra and Florian Weimer[3], a new interface is required to solve this, which must be designed with those features in mind. futex2() is that interface. As opposed to the current multiplexed interface, the new one should have one syscall per operation. This will allow the maintainability of the API if it gets extended, and will help users with type checking of arguments. In particular, the new interface is extended to support the ability to wait on any of a list of futexes at a time, which could be seen as a vectored extension of the FUTEX_WAIT semantics. [3] https://lore.kernel.org/lkml/20200303120050.GC2596@hirez.programming.kicks-… * The interface The new interface can be seen in details in the following patches, but this is a high level summary of what the interface can do: - Supports wake/wait semantics, as in futex() - Supports requeue operations, similarly as FUTEX_CMP_REQUEUE, but with individual flags for each address - Supports waiting for a vector of futexes, using a new syscall named futex_waitv() - Supports variable sized futexes (8bits, 16bits, 32bits and 64bits) - Supports NUMA-awareness operations, where the user can specify on which memory node would like to operate * Implementation The internal implementation follows a similar design to the original futex. Given that we want to replicate the same external behavior of current futex, this should be somewhat expected. For some functions, like the init and the code to get a shared key, I literally copied code and comments from kernel/futex.c. I decided to do so instead of exposing the original function as a public function since in that way we can freely modify our implementation if required, without any impact on old futex. Also, the comments precisely describes the details and corner cases of the implementation. Each patch contains a brief description of implementation, but patch 6 "docs: locking: futex2: Add documentation" adds a more complete document about it. * The patchset This patchset can be also found at my git tree: https://gitlab.collabora.com/tonyk/linux/-/tree/futex2-dev - Patch 1: Implements wait/wake, and the basics foundations of futex2 - Patches 2-4: Implement the remaining features (shared, waitv, requeue). - Patch 5: Adds the x86_x32 ABI handling. I kept it in a separated patch since I'm not sure if x86_x32 is still a thing, or if it should return -ENOSYS. - Patch 6: Add a documentation file which details the interface and the internal implementation. - Patches 7-13: Selftests for all operations along with perf support for futex2. - Patch 14: While working on porting glibc for futex2, I found out that there's a futex_wake() call at the user thread exit path, if that thread was created with clone(..., CLONE_CHILD_SETTID, ...). In order to make pthreads work with futex2, it was required to add this patch. Note that this is more a proof-of-concept of what we will need to do in future, rather than part of the interface and shouldn't be merged as it is. * Testing: This patchset provides selftests for each operation and their flags. Along with that, the following work was done: ** Stability To stress the interface in "real world scenarios": - glibc[4]: nptl's low level locking was modified to use futex2 API (except for robust and PI things). All relevant nptl/ tests passed. - Wine[5]: Proton/Wine was modified in order to use futex2() for the emulation of Windows NT sync mechanisms based on futex, called "fsync". Triple-A games with huge CPU's loads and tons of parallel jobs worked as expected when compared with the previous FUTEX_WAIT_MULTIPLE implementation at futex(). Some games issue 42k futex2() calls per second. - Full GNU/Linux distro: I installed the modified glibc in my host machine, so all pthread's programs would use futex2(). After tweaking systemd[6] to allow futex2() calls at seccomp, everything worked as expected (web browsers do some syscall sandboxing and need some configuration as well). - perf: The perf benchmarks tests can also be used to stress the interface, and they can be found in this patchset. ** Performance - For comparing futex() and futex2() performance, I used the artificial benchmarks implemented at perf (wake, wake-parallel, hash and requeue). The setup was 200 runs for each test and using 8, 80, 800, 8000 for the number of threads, Note that for this test, I'm not using patch 14 ("kernel: Enable waitpid() for futex2") , for reasons explained at "The patchset" section. - For the first three ones, I measured an average of 4% gain in performance. This is not a big step, but it shows that the new interface is at least comparable in performance with the current one. - For requeue, I measured an average of 21% decrease in performance compared to the original futex implementation. This is expected given the new design with individual flags. The performance trade-offs are explained at patch 4 ("futex2: Implement requeue operation"). [4] https://gitlab.collabora.com/tonyk/glibc/-/tree/futex2 [5] https://gitlab.collabora.com/tonyk/wine/-/tree/proton_5.13 [6] https://gitlab.collabora.com/tonyk/systemd * FAQ ** "Where's the code for NUMA and FUTEX_8/16/64?" The current code is already complex enough to take some time for review, so I believe it's better to split that work out to a future iteration of this patchset. Besides that, this RFC is the core part of the infrastructure, and the following features will not pose big design changes to it, the work will be more about wiring up the flags and modifying some functions. ** "Where's the PI/robust stuff?" As said by Peter Zijlstra at [3], all those new features are related to the "simple" futex interface, that doesn't use PI or robust. Do we want to have this complexity at futex2() and if so, should it be part of this patchset or can it be future work? Thanks, André * Changelog Changes from v2: - API now supports 64bit futexes, in addition to 8, 16 and 32. - This API change will break the glibc[4] and Proton[5] ports for now. - Refactored futex2_wait and futex2_waitv selftests v2: https://lore.kernel.org/lkml/20210304004219.134051-1-andrealmeid@collabora.… Changes from v1: - Unified futex_set_timer_and_wait and __futex_wait code - Dropped _carefull from linked list function calls - Fixed typos on docs patch - uAPI flags are now added as features are introduced, instead of all flags in patch 1 - Removed struct futex_single_waiter in favor of an anon struct v1: https://lore.kernel.org/lkml/20210215152404.250281-1-andrealmeid@collabora.… André Almeida (13): futex2: Implement wait and wake functions futex2: Add support for shared futexes futex2: Implement vectorized wait futex2: Implement requeue operation futex2: Add compatibility entry point for x86_x32 ABI docs: locking: futex2: Add documentation selftests: futex2: Add wake/wait test selftests: futex2: Add timeout test selftests: futex2: Add wouldblock test selftests: futex2: Add waitv test selftests: futex2: Add requeue test perf bench: Add futex2 benchmark tests kernel: Enable waitpid() for futex2 Documentation/locking/futex2.rst | 198 +++ Documentation/locking/index.rst | 1 + MAINTAINERS | 2 +- arch/arm/tools/syscall.tbl | 4 + arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 8 + arch/x86/entry/syscalls/syscall_32.tbl | 4 + arch/x86/entry/syscalls/syscall_64.tbl | 4 + fs/inode.c | 1 + include/linux/compat.h | 26 + include/linux/fs.h | 1 + include/linux/syscalls.h | 17 + include/uapi/asm-generic/unistd.h | 14 +- include/uapi/linux/futex.h | 31 + init/Kconfig | 7 + kernel/Makefile | 1 + kernel/fork.c | 2 + kernel/futex2.c | 1252 +++++++++++++++++ kernel/sys_ni.c | 9 + tools/arch/x86/include/asm/unistd_64.h | 12 + tools/include/uapi/asm-generic/unistd.h | 11 +- .../arch/x86/entry/syscalls/syscall_64.tbl | 4 + tools/perf/bench/bench.h | 4 + tools/perf/bench/futex-hash.c | 24 +- tools/perf/bench/futex-requeue.c | 57 +- tools/perf/bench/futex-wake-parallel.c | 41 +- tools/perf/bench/futex-wake.c | 37 +- tools/perf/bench/futex.h | 47 + tools/perf/builtin-bench.c | 18 +- .../selftests/futex/functional/.gitignore | 3 + .../selftests/futex/functional/Makefile | 6 +- .../futex/functional/futex2_requeue.c | 164 +++ .../selftests/futex/functional/futex2_wait.c | 195 +++ .../selftests/futex/functional/futex2_waitv.c | 154 ++ .../futex/functional/futex_wait_timeout.c | 58 +- .../futex/functional/futex_wait_wouldblock.c | 33 +- .../testing/selftests/futex/functional/run.sh | 6 + .../selftests/futex/include/futex2test.h | 112 ++ 38 files changed, 2518 insertions(+), 52 deletions(-) create mode 100644 Documentation/locking/futex2.rst create mode 100644 kernel/futex2.c create mode 100644 tools/testing/selftests/futex/functional/futex2_requeue.c create mode 100644 tools/testing/selftests/futex/functional/futex2_wait.c create mode 100644 tools/testing/selftests/futex/functional/futex2_waitv.c create mode 100644 tools/testing/selftests/futex/include/futex2test.h -- 2.31.1

4 years, 1 month

3
16
0 0

[PATCH v2 1/2] selftests/sgx: Rename 'eenter' and 'sgx_call_vdso'

by Jarkko Sakkinen

Rename symbols for better clarity: * 'eenter' -> 'vdso_sgx_enter_enclave' * 'sgx_call_vdso' -> 'sgx_enter_enclave' Signed-off-by: Jarkko Sakkinen <jarkko(a)kernel.org> --- v2: Refined the renames just a bit. tools/testing/selftests/sgx/call.S | 6 +++--- tools/testing/selftests/sgx/main.c | 25 +++++++++++++------------ tools/testing/selftests/sgx/main.h | 4 ++-- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/sgx/call.S b/tools/testing/selftests/sgx/call.S index 4ecadc7490f4..b09a25890f3b 100644 --- a/tools/testing/selftests/sgx/call.S +++ b/tools/testing/selftests/sgx/call.S @@ -5,8 +5,8 @@ .text - .global sgx_call_vdso -sgx_call_vdso: + .global sgx_enter_enclave +sgx_enter_enclave: .cfi_startproc push %r15 .cfi_adjust_cfa_offset 8 @@ -27,7 +27,7 @@ sgx_call_vdso: .cfi_adjust_cfa_offset 8 push 0x38(%rsp) .cfi_adjust_cfa_offset 8 - call *eenter(%rip) + call *vdso_sgx_enter_enclave(%rip) add $0x10, %rsp .cfi_adjust_cfa_offset -0x10 pop %rbx diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c index d304a4044eb9..43da68388e25 100644 --- a/tools/testing/selftests/sgx/main.c +++ b/tools/testing/selftests/sgx/main.c @@ -21,7 +21,7 @@ #include "../kselftest.h" static const uint64_t MAGIC = 0x1122334455667788ULL; -vdso_sgx_enter_enclave_t eenter; +vdso_sgx_enter_enclave_t vdso_sgx_enter_enclave; struct vdso_symtab { Elf64_Sym *elf_symtab; @@ -149,7 +149,7 @@ int main(int argc, char *argv[]) { struct sgx_enclave_run run; struct vdso_symtab symtab; - Elf64_Sym *eenter_sym; + Elf64_Sym *sgx_enter_enclave_sym; uint64_t result = 0; struct encl encl; unsigned int i; @@ -194,29 +194,30 @@ int main(int argc, char *argv[]) if (!vdso_get_symtab(addr, &symtab)) goto err; - eenter_sym = vdso_symtab_get(&symtab, "__vdso_sgx_enter_enclave"); - if (!eenter_sym) + sgx_enter_enclave_sym = vdso_symtab_get(&symtab, "__vdso_sgx_enter_enclave"); + if (!sgx_enter_enclave_sym) goto err; - eenter = addr + eenter_sym->st_value; + vdso_sgx_enter_enclave = addr + sgx_enter_enclave_sym->st_value; - ret = sgx_call_vdso((void *)&MAGIC, &result, 0, EENTER, NULL, NULL, &run); - if (!report_results(&run, ret, result, "sgx_call_vdso")) + ret = sgx_enter_enclave((void *)&MAGIC, &result, 0, EENTER, + NULL, NULL, &run); + if (!report_results(&run, ret, result, "sgx_enter_enclave_unclobbered")) goto err; /* Invoke the vDSO directly. */ result = 0; - ret = eenter((unsigned long)&MAGIC, (unsigned long)&result, 0, EENTER, - 0, 0, &run); - if (!report_results(&run, ret, result, "eenter")) + ret = vdso_sgx_enter_enclave((unsigned long)&MAGIC, (unsigned long)&result, + 0, EENTER, 0, 0, &run); + if (!report_results(&run, ret, result, "sgx_enter_enclave")) goto err; /* And with an exit handler. */ run.user_handler = (__u64)user_handler; run.user_data = 0xdeadbeef; - ret = eenter((unsigned long)&MAGIC, (unsigned long)&result, 0, EENTER, - 0, 0, &run); + ret = vdso_sgx_enter_enclave((unsigned long)&MAGIC, (unsigned long)&result, + 0, EENTER, 0, 0, &run); if (!report_results(&run, ret, result, "user_handler")) goto err; diff --git a/tools/testing/selftests/sgx/main.h b/tools/testing/selftests/sgx/main.h index 67211a708f04..68672fd86cf9 100644 --- a/tools/testing/selftests/sgx/main.h +++ b/tools/testing/selftests/sgx/main.h @@ -35,7 +35,7 @@ bool encl_load(const char *path, struct encl *encl); bool encl_measure(struct encl *encl); bool encl_build(struct encl *encl); -int sgx_call_vdso(void *rdi, void *rsi, long rdx, u32 function, void *r8, void *r9, - struct sgx_enclave_run *run); +int sgx_enter_enclave(void *rdi, void *rsi, long rdx, u32 function, void *r8, void *r9, + struct sgx_enclave_run *run); #endif /* MAIN_H */ -- 2.31.1

4 years, 1 month

1
1
0 0

[PATCH 1/2] selftests/sgx: Rename 'eenter' and 'sgx_call_vdso'

by Jarkko Sakkinen

Rename 'eenter' as 'sgx_enter_enclave', and 'sgx_call_vdso' as 'sgx_enter_enclave_unclobbered', in order to better self-describe the meaning and purpose of the symbols. Signed-off-by: Jarkko Sakkinen <jarkko(a)kernel.org> --- tools/testing/selftests/sgx/call.S | 6 +++--- tools/testing/selftests/sgx/main.c | 25 +++++++++++++------------ tools/testing/selftests/sgx/main.h | 4 ++-- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/sgx/call.S b/tools/testing/selftests/sgx/call.S index 4ecadc7490f4..71865302119c 100644 --- a/tools/testing/selftests/sgx/call.S +++ b/tools/testing/selftests/sgx/call.S @@ -5,8 +5,8 @@ .text - .global sgx_call_vdso -sgx_call_vdso: + .global sgx_enter_enclave_unclobbered +sgx_enter_enclave_unclobbered: .cfi_startproc push %r15 .cfi_adjust_cfa_offset 8 @@ -27,7 +27,7 @@ sgx_call_vdso: .cfi_adjust_cfa_offset 8 push 0x38(%rsp) .cfi_adjust_cfa_offset 8 - call *eenter(%rip) + call *sgx_enter_enclave(%rip) add $0x10, %rsp .cfi_adjust_cfa_offset -0x10 pop %rbx diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c index d304a4044eb9..d3e1db9ee4bc 100644 --- a/tools/testing/selftests/sgx/main.c +++ b/tools/testing/selftests/sgx/main.c @@ -21,7 +21,7 @@ #include "../kselftest.h" static const uint64_t MAGIC = 0x1122334455667788ULL; -vdso_sgx_enter_enclave_t eenter; +vdso_sgx_enter_enclave_t sgx_enter_enclave; struct vdso_symtab { Elf64_Sym *elf_symtab; @@ -149,7 +149,7 @@ int main(int argc, char *argv[]) { struct sgx_enclave_run run; struct vdso_symtab symtab; - Elf64_Sym *eenter_sym; + Elf64_Sym *sgx_enter_enclave_sym; uint64_t result = 0; struct encl encl; unsigned int i; @@ -194,29 +194,30 @@ int main(int argc, char *argv[]) if (!vdso_get_symtab(addr, &symtab)) goto err; - eenter_sym = vdso_symtab_get(&symtab, "__vdso_sgx_enter_enclave"); - if (!eenter_sym) + sgx_enter_enclave_sym = vdso_symtab_get(&symtab, "__vdso_sgx_enter_enclave"); + if (!sgx_enter_enclave_sym) goto err; - eenter = addr + eenter_sym->st_value; + sgx_enter_enclave = addr + sgx_enter_enclave_sym->st_value; - ret = sgx_call_vdso((void *)&MAGIC, &result, 0, EENTER, NULL, NULL, &run); - if (!report_results(&run, ret, result, "sgx_call_vdso")) + ret = sgx_enter_enclave_unclobbered((void *)&MAGIC, &result, 0, EENTER, + NULL, NULL, &run); + if (!report_results(&run, ret, result, "sgx_enter_enclave_unclobbered")) goto err; /* Invoke the vDSO directly. */ result = 0; - ret = eenter((unsigned long)&MAGIC, (unsigned long)&result, 0, EENTER, - 0, 0, &run); - if (!report_results(&run, ret, result, "eenter")) + ret = sgx_enter_enclave((unsigned long)&MAGIC, (unsigned long)&result, + 0, EENTER, 0, 0, &run); + if (!report_results(&run, ret, result, "sgx_enter_enclave")) goto err; /* And with an exit handler. */ run.user_handler = (__u64)user_handler; run.user_data = 0xdeadbeef; - ret = eenter((unsigned long)&MAGIC, (unsigned long)&result, 0, EENTER, - 0, 0, &run); + ret = sgx_enter_enclave((unsigned long)&MAGIC, (unsigned long)&result, + 0, EENTER, 0, 0, &run); if (!report_results(&run, ret, result, "user_handler")) goto err; diff --git a/tools/testing/selftests/sgx/main.h b/tools/testing/selftests/sgx/main.h index 67211a708f04..13479837f6eb 100644 --- a/tools/testing/selftests/sgx/main.h +++ b/tools/testing/selftests/sgx/main.h @@ -35,7 +35,7 @@ bool encl_load(const char *path, struct encl *encl); bool encl_measure(struct encl *encl); bool encl_build(struct encl *encl); -int sgx_call_vdso(void *rdi, void *rsi, long rdx, u32 function, void *r8, void *r9, - struct sgx_enclave_run *run); +int sgx_enter_enclave_unclobbered(void *rdi, void *rsi, long rdx, u32 function, void *r8, void *r9, + struct sgx_enclave_run *run); #endif /* MAIN_H */ -- 2.31.1

4 years, 1 month

1
2
0 0

[PATCH v8] fat: Add KUnit tests for checksums and timestamps

by David Gow

Add some basic sanity-check tests for the fat_checksum() function and the fat_time_unix2fat() and fat_time_fat2unix() functions. These unit tests verify these functions return correct output for a number of test inputs. These tests were inspored by -- and serve a similar purpose to -- the timestamp parsing KUnit tests in ext4[1]. Note that, unlike fat_time_unix2fat, fat_time_fat2unix wasn't previously exported, so this patch exports it as well. This is required for the case where we're building the fat and fat_test as modules. [1]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/fs/… Signed-off-by: David Gow <davidgow(a)google.com> Acked-by: OGAWA Hirofumi <hirofumi(a)mail.parknet.co.jp> --- It's been a while, but this hopefully is a final version of the FAT KUnit patchset. It has a number of changes to keep it up-to-date with current KUnit standards, notably the use of parameterised tests and the addition of a '.kunitconfig' file to allow for easy testing. It also fixes an endianness tagging issue picked up by the kernel test robot under sparse on pa-risc. Cheers, -- David Changes since v7: https://lore.kernel.org/linux-kselftest/20201028064631.3774908-1-davidgow@g… - Make the two timestamp tests parameterised: this means that the KUnit runtime and tooling are aware of the different testcases (and print a nice list of them to the TAP log when the test is run). - Fix some issues sparse picked up with __le32 tagged integers. - Add an fs/fat/.kunitconfig file which contains all the Kconfig entries needed to run the test. The test can now be run with: ./tools/testing/kunit/kunit.py run --kunitconfig fs/fat/.kunitconfig Changes since v6: https://lore.kernel.org/linux-kselftest/20201024060558.2556249-1-davidgow@g… - Make CONFIG_FAT_DEFAULT_CODEPAGE depend on FAT_FS, rather than either VFAT_FS or MSDOS_FS. - This means that FAT_KUNIT_TEST can now also just depend of FAT_FS - Fix a few warnings that KUnit tool was eating: - KUnit's type checking needs a specific cast for the fat_checksum() expected results. - The time test cases shouldn't be 'const' - The fake superblock is now static, as otherwise it increased the stack size too much. Changes since v4/5: https://lore.kernel.org/linux-kselftest/20201024052047.2526780-1-davidgow@g… - Fix a typo introduced in the Kconfig. It builds now. Changes since v3: https://lore.kernel.org/linux-kselftest/20201021061713.1545931-1-davidgow@g… - Update the Kconfig entry to use "depends on" rather than "select", as discussed in [2]. - Depend on "MSDOS_FS || VFAT_FS", rather than "FAT_FS", as we need the CONFIG_FAT_DEFAULT_CODEPAGE symbol to be defined. Changes since v2: https://lore.kernel.org/linux-kselftest/20201020055856.1270482-1-davidgow@g… - Comment that the export for fat_time_fat2unix() function is for KUnit tests. Changes since v1: https://lore.kernel.org/linux-kselftest/20201017064107.375174-1-davidgow@go… - Now export fat_time_fat2unix() so that the test can access it when built as a module. [2]: https://lore.kernel.org/linux-ext4/52959e99-4105-3de9-730c-c46894b82bdd@inf… fs/fat/.kunitconfig | 5 ++ fs/fat/Kconfig | 14 +++- fs/fat/Makefile | 2 + fs/fat/fat_test.c | 197 ++++++++++++++++++++++++++++++++++++++++++++ fs/fat/misc.c | 2 + 5 files changed, 219 insertions(+), 1 deletion(-) create mode 100644 fs/fat/.kunitconfig create mode 100644 fs/fat/fat_test.c diff --git a/fs/fat/.kunitconfig b/fs/fat/.kunitconfig new file mode 100644 index 000000000000..0a6971dbeccb --- /dev/null +++ b/fs/fat/.kunitconfig @@ -0,0 +1,5 @@ +CONFIG_KUNIT=y +CONFIG_FAT_FS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_FAT_KUNIT_TEST=y diff --git a/fs/fat/Kconfig b/fs/fat/Kconfig index 66532a71e8fd..238cc55f84c4 100644 --- a/fs/fat/Kconfig +++ b/fs/fat/Kconfig @@ -77,7 +77,7 @@ config VFAT_FS config FAT_DEFAULT_CODEPAGE int "Default codepage for FAT" - depends on MSDOS_FS || VFAT_FS + depends on FAT_FS default 437 help This option should be set to the codepage of your FAT filesystems. @@ -115,3 +115,15 @@ config FAT_DEFAULT_UTF8 Say Y if you use UTF-8 encoding for file names, N otherwise. See <file:Documentation/filesystems/vfat.rst> for more information. + +config FAT_KUNIT_TEST + tristate "Unit Tests for FAT filesystems" if !KUNIT_ALL_TESTS + depends on KUNIT && FAT_FS + default KUNIT_ALL_TESTS + help + This builds the FAT KUnit tests + + For more information on KUnit and unit tests in general, please refer + to the KUnit documentation in Documentation/dev-tools/kunit + + If unsure, say N diff --git a/fs/fat/Makefile b/fs/fat/Makefile index 70645ce2f7fc..2b034112690d 100644 --- a/fs/fat/Makefile +++ b/fs/fat/Makefile @@ -10,3 +10,5 @@ obj-$(CONFIG_MSDOS_FS) += msdos.o fat-y := cache.o dir.o fatent.o file.o inode.o misc.o nfs.o vfat-y := namei_vfat.o msdos-y := namei_msdos.o + +obj-$(CONFIG_FAT_KUNIT_TEST) += fat_test.o diff --git a/fs/fat/fat_test.c b/fs/fat/fat_test.c new file mode 100644 index 000000000000..febd25f57d4b --- /dev/null +++ b/fs/fat/fat_test.c @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KUnit tests for FAT filesystems. + * + * Copyright (C) 2020 Google LLC. + * Author: David Gow <davidgow(a)google.com> + */ + +#include <kunit/test.h> + +#include "fat.h" + +static void fat_checksum_test(struct kunit *test) +{ + /* With no extension. */ + KUNIT_EXPECT_EQ(test, fat_checksum("VMLINUX "), (u8)44); + /* With 3-letter extension. */ + KUNIT_EXPECT_EQ(test, fat_checksum("README TXT"), (u8)115); + /* With short (1-letter) extension. */ + KUNIT_EXPECT_EQ(test, fat_checksum("ABCDEFGHA "), (u8)98); +} + + +struct fat_timestamp_testcase { + const char *name; + struct timespec64 ts; + __le16 time; + __le16 date; + u8 cs; + int time_offset; +}; + +static struct fat_timestamp_testcase time_test_cases[] = { + { + .name = "Earliest possible UTC (1980-01-01 00:00:00)", + .ts = {.tv_sec = 315532800LL, .tv_nsec = 0L}, + .time = cpu_to_le16(0), + .date = cpu_to_le16(33), + .cs = 0, + .time_offset = 0, + }, + { + .name = "Latest possible UTC (2107-12-31 23:59:58)", + .ts = {.tv_sec = 4354819198LL, .tv_nsec = 0L}, + .time = cpu_to_le16(49021), + .date = cpu_to_le16(65439), + .cs = 0, + .time_offset = 0, + }, + { + .name = "Earliest possible (UTC-11) (== 1979-12-31 13:00:00 UTC)", + .ts = {.tv_sec = 315493200LL, .tv_nsec = 0L}, + .time = cpu_to_le16(0), + .date = cpu_to_le16(33), + .cs = 0, + .time_offset = 11 * 60, + }, + { + .name = "Latest possible (UTC+11) (== 2108-01-01 10:59:58 UTC)", + .ts = {.tv_sec = 4354858798LL, .tv_nsec = 0L}, + .time = cpu_to_le16(49021), + .date = cpu_to_le16(65439), + .cs = 0, + .time_offset = -11 * 60, + }, + { + .name = "Leap Day / Year (1996-02-29 00:00:00)", + .ts = {.tv_sec = 825552000LL, .tv_nsec = 0L}, + .time = cpu_to_le16(0), + .date = cpu_to_le16(8285), + .cs = 0, + .time_offset = 0, + }, + { + .name = "Year 2000 is leap year (2000-02-29 00:00:00)", + .ts = {.tv_sec = 951782400LL, .tv_nsec = 0L}, + .time = cpu_to_le16(0), + .date = cpu_to_le16(10333), + .cs = 0, + .time_offset = 0, + }, + { + .name = "Year 2100 not leap year (2100-03-01 00:00:00)", + .ts = {.tv_sec = 4107542400LL, .tv_nsec = 0L}, + .time = cpu_to_le16(0), + .date = cpu_to_le16(61537), + .cs = 0, + .time_offset = 0, + }, + { + .name = "Leap year + timezone UTC+1 (== 2004-02-29 00:30:00 UTC)", + .ts = {.tv_sec = 1078014600LL, .tv_nsec = 0L}, + .time = cpu_to_le16(48064), + .date = cpu_to_le16(12380), + .cs = 0, + .time_offset = -60, + }, + { + .name = "Leap year + timezone UTC-1 (== 2004-02-29 23:30:00 UTC)", + .ts = {.tv_sec = 1078097400LL, .tv_nsec = 0L}, + .time = cpu_to_le16(960), + .date = cpu_to_le16(12385), + .cs = 0, + .time_offset = 60, + }, + { + .name = "VFAT odd-second resolution (1999-12-31 23:59:59)", + .ts = {.tv_sec = 946684799LL, .tv_nsec = 0L}, + .time = cpu_to_le16(49021), + .date = cpu_to_le16(10143), + .cs = 100, + .time_offset = 0, + }, + { + .name = "VFAT 10ms resolution (1980-01-01 00:00:00:0010)", + .ts = {.tv_sec = 315532800LL, .tv_nsec = 10000000L}, + .time = cpu_to_le16(0), + .date = cpu_to_le16(33), + .cs = 1, + .time_offset = 0, + }, +}; + +static void time_testcase_desc(struct fat_timestamp_testcase *t, + char *desc) +{ + strscpy(desc, t->name, KUNIT_PARAM_DESC_SIZE); +} + +KUNIT_ARRAY_PARAM(fat_time, time_test_cases, time_testcase_desc); + +static void fat_time_fat2unix_test(struct kunit *test) +{ + static struct msdos_sb_info fake_sb; + struct timespec64 ts; + struct fat_timestamp_testcase *testcase = + (struct fat_timestamp_testcase *)test->param_value; + + fake_sb.options.tz_set = 1; + fake_sb.options.time_offset = testcase->time_offset; + + fat_time_fat2unix(&fake_sb, &ts, + testcase->time, + testcase->date, + testcase->cs); + KUNIT_EXPECT_EQ_MSG(test, + testcase->ts.tv_sec, + ts.tv_sec, + "Timestamp mismatch (seconds)\n"); + KUNIT_EXPECT_EQ_MSG(test, + testcase->ts.tv_nsec, + ts.tv_nsec, + "Timestamp mismatch (nanoseconds)\n"); +} + +static void fat_time_unix2fat_test(struct kunit *test) +{ + static struct msdos_sb_info fake_sb; + __le16 date, time; + u8 cs; + struct fat_timestamp_testcase *testcase = + (struct fat_timestamp_testcase *)test->param_value; + + fake_sb.options.tz_set = 1; + fake_sb.options.time_offset = testcase->time_offset; + + fat_time_unix2fat(&fake_sb, &(testcase->ts), + &time, &date, &cs); + KUNIT_EXPECT_EQ_MSG(test, + le16_to_cpu(testcase->time), + le16_to_cpu(time), + "Time mismatch\n"); + KUNIT_EXPECT_EQ_MSG(test, + le16_to_cpu(testcase->date), + le16_to_cpu(date), + "Date mismatch\n"); + KUNIT_EXPECT_EQ_MSG(test, + testcase->cs, + cs, + "Centisecond mismatch\n"); +} + +static struct kunit_case fat_test_cases[] = { + KUNIT_CASE(fat_checksum_test), + KUNIT_CASE_PARAM(fat_time_fat2unix_test, fat_time_gen_params), + KUNIT_CASE_PARAM(fat_time_unix2fat_test, fat_time_gen_params), + {}, +}; + +static struct kunit_suite fat_test_suite = { + .name = "fat_test", + .test_cases = fat_test_cases, +}; + +kunit_test_suites(&fat_test_suite); + +MODULE_LICENSE("GPL v2"); diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 18a50a46b57f..9073fa927be3 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -229,6 +229,8 @@ void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec64 *ts, ts->tv_nsec = 0; } } +/* Export fat_time_fat2unix() for the fat_test KUnit tests. */ +EXPORT_SYMBOL_GPL(fat_time_fat2unix); /* Convert linear UNIX date to a FAT time/date pair. */ void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec64 *ts, -- 2.31.1.368.gbe11c130af-goog

4 years, 1 month

3
4
0 0

[PATCH] kbuild: replace LANG=C with LC_ALL=C

by Masahiro Yamada

LANG gives a weak default to each LC_* in case it is not explicitly defined. LC_ALL, if set, overrides all other LC_* variables. LANG < LC_CTYPE, LC_COLLATE, LC_MONETARY, LC_NUMERIC, ... < LC_ALL This is why documentation such as [1] suggests to set LC_ALL in build scripts to get the deterministic result. LANG=C is not strong enough to override LC_* that may be set by end users. [1]: https://reproducible-builds.org/docs/locales/ Signed-off-by: Masahiro Yamada <masahiroy(a)kernel.org> --- arch/powerpc/boot/wrapper | 2 +- scripts/nsdeps | 2 +- scripts/recordmcount.pl | 2 +- scripts/setlocalversion | 2 +- scripts/tags.sh | 2 +- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 2 +- usr/gen_initramfs.sh | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index 41fa0a8715e3..cdb796b76e2e 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -191,7 +191,7 @@ if [ -z "$kernel" ]; then kernel=vmlinux fi -LANG=C elfformat="`${CROSS}objdump -p "$kernel" | grep 'file format' | awk '{print $4}'`" +LC_ALL=C elfformat="`${CROSS}objdump -p "$kernel" | grep 'file format' | awk '{print $4}'`" case "$elfformat" in elf64-powerpcle) format=elf64lppc ;; elf64-powerpc) format=elf32ppc ;; diff --git a/scripts/nsdeps b/scripts/nsdeps index e8ce2a4d704a..04c4b96e95ec 100644 --- a/scripts/nsdeps +++ b/scripts/nsdeps @@ -44,7 +44,7 @@ generate_deps() { for source_file in $mod_source_files; do sed '/MODULE_IMPORT_NS/Q' $source_file > ${source_file}.tmp offset=$(wc -l ${source_file}.tmp | awk '{print $1;}') - cat $source_file | grep MODULE_IMPORT_NS | LANG=C sort -u >> ${source_file}.tmp + cat $source_file | grep MODULE_IMPORT_NS | LC_ALL=C sort -u >> ${source_file}.tmp tail -n +$((offset +1)) ${source_file} | grep -v MODULE_IMPORT_NS >> ${source_file}.tmp if ! diff -q ${source_file} ${source_file}.tmp; then mv ${source_file}.tmp ${source_file} diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 867860ea57da..0a7fc9507d6f 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -497,7 +497,7 @@ sub update_funcs # # Step 2: find the sections and mcount call sites # -open(IN, "LANG=C $objdump -hdr $inputfile|") || die "error running $objdump"; +open(IN, "LC_ALL=C $objdump -hdr $inputfile|") || die "error running $objdump"; my $text; diff --git a/scripts/setlocalversion b/scripts/setlocalversion index bb709eda96cd..db941f6d9591 100755 --- a/scripts/setlocalversion +++ b/scripts/setlocalversion @@ -126,7 +126,7 @@ scm_version() fi # Check for svn and a svn repo. - if rev=$(LANG= LC_ALL= LC_MESSAGES=C svn info 2>/dev/null | grep '^Last Changed Rev'); then + if rev=$(LC_ALL=C svn info 2>/dev/null | grep '^Last Changed Rev'); then rev=$(echo $rev | awk '{print $NF}') printf -- '-svn%s' "$rev" diff --git a/scripts/tags.sh b/scripts/tags.sh index fd96734deff1..db8ba411860a 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -326,5 +326,5 @@ esac # Remove structure forward declarations. if [ -n "$remove_structs" ]; then - LANG=C sed -i -e '/^$[a-zA-Z_][a-zA-Z0-9_]*$\t.*\t\/\^struct \1;.*\$\/;"\tx$/d' $1 + LC_ALL=C sed -i -e '/^$[a-zA-Z_][a-zA-Z0-9_]*$\t.*\t\/\^struct \1;.*\$\/;"\tx$/d' $1 fi diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 10a030b53b23..1d2a6e7b877c 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -273,7 +273,7 @@ check_mptcp_disabled() ip netns exec ${disabled_ns} sysctl -q net.mptcp.enabled=0 local err=0 - LANG=C ip netns exec ${disabled_ns} ./mptcp_connect -t $timeout -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \ + LC_ALL=C ip netns exec ${disabled_ns} ./mptcp_connect -t $timeout -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \ grep -q "^socket: Protocol not available$" && err=1 ip netns delete ${disabled_ns} diff --git a/usr/gen_initramfs.sh b/usr/gen_initramfs.sh index 8ae831657e5d..63476bb70b41 100755 --- a/usr/gen_initramfs.sh +++ b/usr/gen_initramfs.sh @@ -147,7 +147,7 @@ dir_filelist() { header "$1" srcdir=$(echo "$1" | sed -e 's://*:/:g') - dirlist=$(find "${srcdir}" -printf "%p %m %U %G\n" | LANG=C sort) + dirlist=$(find "${srcdir}" -printf "%p %m %U %G\n" | LC_ALL=C sort) # If $dirlist is only one line, then the directory is empty if [ "$(echo "${dirlist}" | wc -l)" -gt 1 ]; then -- 2.27.0

4 years, 1 month

4
4
0 0

[PATCH AUTOSEL 4.4 13/19] selftests: Set CC to clang in lib.mk if LLVM is set

by Sasha Levin

From: Yonghong Song <yhs(a)fb.com> [ Upstream commit 26e6dd1072763cd5696b75994c03982dde952ad9 ] selftests/bpf/Makefile includes lib.mk. With the following command make -j60 LLVM=1 LLVM_IAS=1 <=== compile kernel make -j60 -C tools/testing/selftests/bpf LLVM=1 LLVM_IAS=1 V=1 some files are still compiled with gcc. This patch fixed lib.mk issue which sets CC to gcc in all cases. Signed-off-by: Yonghong Song <yhs(a)fb.com> Signed-off-by: Alexei Starovoitov <ast(a)kernel.org> Acked-by: Andrii Nakryiko <andrii(a)kernel.org> Link: https://lore.kernel.org/bpf/20210413153413.3027426-1-yhs@fb.com Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/lib.mk | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 50a93f5f13d6..d8fa6c72b7ca 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -1,6 +1,10 @@ # This mimics the top-level Makefile. We do it explicitly here so that this # Makefile can operate with or without the kbuild infrastructure. +ifneq ($(LLVM),) +CC := clang +else CC := $(CROSS_COMPILE)gcc +endif define RUN_TESTS @for TEST in $(TEST_PROGS); do \ -- 2.30.2

4 years, 1 month

1
0
0 0

[PATCH AUTOSEL 4.9 13/22] selftests: Set CC to clang in lib.mk if LLVM is set

by Sasha Levin

From: Yonghong Song <yhs(a)fb.com> [ Upstream commit 26e6dd1072763cd5696b75994c03982dde952ad9 ] selftests/bpf/Makefile includes lib.mk. With the following command make -j60 LLVM=1 LLVM_IAS=1 <=== compile kernel make -j60 -C tools/testing/selftests/bpf LLVM=1 LLVM_IAS=1 V=1 some files are still compiled with gcc. This patch fixed lib.mk issue which sets CC to gcc in all cases. Signed-off-by: Yonghong Song <yhs(a)fb.com> Signed-off-by: Alexei Starovoitov <ast(a)kernel.org> Acked-by: Andrii Nakryiko <andrii(a)kernel.org> Link: https://lore.kernel.org/bpf/20210413153413.3027426-1-yhs@fb.com Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/lib.mk | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 50a93f5f13d6..d8fa6c72b7ca 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -1,6 +1,10 @@ # This mimics the top-level Makefile. We do it explicitly here so that this # Makefile can operate with or without the kbuild infrastructure. +ifneq ($(LLVM),) +CC := clang +else CC := $(CROSS_COMPILE)gcc +endif define RUN_TESTS @for TEST in $(TEST_PROGS); do \ -- 2.30.2

4 years, 1 month

1
0
0 0

[PATCH AUTOSEL 4.14 15/25] selftests: Set CC to clang in lib.mk if LLVM is set

by Sasha Levin

From: Yonghong Song <yhs(a)fb.com> [ Upstream commit 26e6dd1072763cd5696b75994c03982dde952ad9 ] selftests/bpf/Makefile includes lib.mk. With the following command make -j60 LLVM=1 LLVM_IAS=1 <=== compile kernel make -j60 -C tools/testing/selftests/bpf LLVM=1 LLVM_IAS=1 V=1 some files are still compiled with gcc. This patch fixed lib.mk issue which sets CC to gcc in all cases. Signed-off-by: Yonghong Song <yhs(a)fb.com> Signed-off-by: Alexei Starovoitov <ast(a)kernel.org> Acked-by: Andrii Nakryiko <andrii(a)kernel.org> Link: https://lore.kernel.org/bpf/20210413153413.3027426-1-yhs@fb.com Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/lib.mk | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index c9be64dc681d..cd3034602ea5 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -1,6 +1,10 @@ # This mimics the top-level Makefile. We do it explicitly here so that this # Makefile can operate with or without the kbuild infrastructure. +ifneq ($(LLVM),) +CC := clang +else CC := $(CROSS_COMPILE)gcc +endif ifeq (0,$(MAKELEVEL)) OUTPUT := $(shell pwd) -- 2.30.2

4 years, 1 month

1
0
0 0

[PATCH AUTOSEL 4.19 20/32] selftests: Set CC to clang in lib.mk if LLVM is set

by Sasha Levin

From: Yonghong Song <yhs(a)fb.com> [ Upstream commit 26e6dd1072763cd5696b75994c03982dde952ad9 ] selftests/bpf/Makefile includes lib.mk. With the following command make -j60 LLVM=1 LLVM_IAS=1 <=== compile kernel make -j60 -C tools/testing/selftests/bpf LLVM=1 LLVM_IAS=1 V=1 some files are still compiled with gcc. This patch fixed lib.mk issue which sets CC to gcc in all cases. Signed-off-by: Yonghong Song <yhs(a)fb.com> Signed-off-by: Alexei Starovoitov <ast(a)kernel.org> Acked-by: Andrii Nakryiko <andrii(a)kernel.org> Link: https://lore.kernel.org/bpf/20210413153413.3027426-1-yhs@fb.com Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/lib.mk | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 0ef203ec59fd..a5d40653a921 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -1,6 +1,10 @@ # This mimics the top-level Makefile. We do it explicitly here so that this # Makefile can operate with or without the kbuild infrastructure. +ifneq ($(LLVM),) +CC := clang +else CC := $(CROSS_COMPILE)gcc +endif ifeq (0,$(MAKELEVEL)) OUTPUT := $(shell pwd) -- 2.30.2

4 years, 1 month

1
0
0 0

[PATCH AUTOSEL 5.4 27/46] selftests: Set CC to clang in lib.mk if LLVM is set

by Sasha Levin

From: Yonghong Song <yhs(a)fb.com> [ Upstream commit 26e6dd1072763cd5696b75994c03982dde952ad9 ] selftests/bpf/Makefile includes lib.mk. With the following command make -j60 LLVM=1 LLVM_IAS=1 <=== compile kernel make -j60 -C tools/testing/selftests/bpf LLVM=1 LLVM_IAS=1 V=1 some files are still compiled with gcc. This patch fixed lib.mk issue which sets CC to gcc in all cases. Signed-off-by: Yonghong Song <yhs(a)fb.com> Signed-off-by: Alexei Starovoitov <ast(a)kernel.org> Acked-by: Andrii Nakryiko <andrii(a)kernel.org> Link: https://lore.kernel.org/bpf/20210413153413.3027426-1-yhs@fb.com Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/lib.mk | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 3ed0134a764d..67386aa3f31d 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -1,6 +1,10 @@ # This mimics the top-level Makefile. We do it explicitly here so that this # Makefile can operate with or without the kbuild infrastructure. +ifneq ($(LLVM),) +CC := clang +else CC := $(CROSS_COMPILE)gcc +endif ifeq (0,$(MAKELEVEL)) ifeq ($(OUTPUT),) -- 2.30.2

4 years, 1 month

1
0
0 0

[PATCH AUTOSEL 5.10 78/85] selftests: mlxsw: Fix mausezahn invocation in ERSPAN scale test

by Sasha Levin

From: Petr Machata <petrm(a)nvidia.com> [ Upstream commit 1233898ab758cbcf5f6fea10b8dd16a0b2c24fab ] The mirror_gre_scale test creates as many ERSPAN sessions as the underlying chip supports, and tests that they all work. In order to determine that it issues a stream of ICMP packets and checks if they are mirrored as expected. However, the mausezahn invocation missed the -6 flag to identify the use of IPv6 protocol, and was sending ICMP messages over IPv6, as opposed to ICMP6. It also didn't pass an explicit source IP address, which apparently worked at some point in the past, but does not anymore. To fix these issues, extend the function mirror_test() in mirror_lib by detecting the IPv6 protocol addresses, and using a different ICMP scheme. Fix __mirror_gre_test() in the selftest itself to pass a source IP address. Signed-off-by: Petr Machata <petrm(a)nvidia.com> Signed-off-by: David S. Miller <davem(a)davemloft.net> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- .../drivers/net/mlxsw/mirror_gre_scale.sh | 3 ++- .../selftests/net/forwarding/mirror_lib.sh | 19 +++++++++++++++++-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh index 6f3a70df63bc..e00435753008 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh @@ -120,12 +120,13 @@ __mirror_gre_test() sleep 5 for ((i = 0; i < count; ++i)); do + local sip=$(mirror_gre_ipv6_addr 1 $i)::1 local dip=$(mirror_gre_ipv6_addr 1 $i)::2 local htun=h3-gt6-$i local message icmp6_capture_install $htun - mirror_test v$h1 "" $dip $htun 100 10 + mirror_test v$h1 $sip $dip $htun 100 10 icmp6_capture_uninstall $htun done } diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh index 13db1cb50e57..6406cd76a19d 100644 --- a/tools/testing/selftests/net/forwarding/mirror_lib.sh +++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh @@ -20,6 +20,13 @@ mirror_uninstall() tc filter del dev $swp1 $direction pref 1000 } +is_ipv6() +{ + local addr=$1; shift + + [[ -z ${addr//[0-9a-fA-F:]/} ]] +} + mirror_test() { local vrf_name=$1; shift @@ -29,9 +36,17 @@ mirror_test() local pref=$1; shift local expect=$1; shift + if is_ipv6 $dip; then + local proto=-6 + local type="icmp6 type=128" # Echo request. + else + local proto= + local type="icmp echoreq" + fi + local t0=$(tc_rule_stats_get $dev $pref) - $MZ $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \ - -c 10 -d 100msec -t icmp type=8 + $MZ $proto $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \ + -c 10 -d 100msec -t $type sleep 0.5 local t1=$(tc_rule_stats_get $dev $pref) local delta=$((t1 - t0)) -- 2.30.2

4 years, 1 month

1
0
0 0

[PATCH AUTOSEL 5.10 77/85] selftests: mlxsw: Increase the tolerance of backlog buildup

by Sasha Levin

From: Petr Machata <petrm(a)nvidia.com> [ Upstream commit dda7f4fa55839baeb72ae040aeaf9ccf89d3e416 ] The intention behind this test is to make sure that qdisc limit is correctly projected to the HW. However, first, due to rounding in the qdisc, and then in the driver, the number cannot actually be accurate. And second, the approach to testing this is to oversubscribe the port with traffic generated on the same switch. The actual backlog size therefore fluctuates. In practice, this test proved to be noisier than the rest, and spuriously fails every now and then. Increase the tolerance to 10 % to avoid these issues. Signed-off-by: Petr Machata <petrm(a)nvidia.com> Acked-by: Jiri Pirko <jiri(a)nvidia.com> Signed-off-by: David S. Miller <davem(a)davemloft.net> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh index b0cb1aaffdda..33ddd01689be 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh @@ -507,8 +507,8 @@ do_red_test() check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." local diff=$((limit - backlog)) pct=$((100 * diff / limit)) - ((0 <= pct && pct <= 5)) - check_err $? "backlog $backlog / $limit expected <= 5% distance" + ((0 <= pct && pct <= 10)) + check_err $? "backlog $backlog / $limit expected <= 10% distance" log_test "TC $((vlan - 10)): RED backlog > limit" stop_traffic -- 2.30.2

4 years, 1 month

1
0
0 0

[PATCH AUTOSEL 5.10 53/85] selftests: Set CC to clang in lib.mk if LLVM is set

by Sasha Levin

From: Yonghong Song <yhs(a)fb.com> [ Upstream commit 26e6dd1072763cd5696b75994c03982dde952ad9 ] selftests/bpf/Makefile includes lib.mk. With the following command make -j60 LLVM=1 LLVM_IAS=1 <=== compile kernel make -j60 -C tools/testing/selftests/bpf LLVM=1 LLVM_IAS=1 V=1 some files are still compiled with gcc. This patch fixed lib.mk issue which sets CC to gcc in all cases. Signed-off-by: Yonghong Song <yhs(a)fb.com> Signed-off-by: Alexei Starovoitov <ast(a)kernel.org> Acked-by: Andrii Nakryiko <andrii(a)kernel.org> Link: https://lore.kernel.org/bpf/20210413153413.3027426-1-yhs@fb.com Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/lib.mk | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index a5ce26d548e4..9a41d8bb9ff1 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -1,6 +1,10 @@ # This mimics the top-level Makefile. We do it explicitly here so that this # Makefile can operate with or without the kbuild infrastructure. +ifneq ($(LLVM),) +CC := clang +else CC := $(CROSS_COMPILE)gcc +endif ifeq (0,$(MAKELEVEL)) ifeq ($(OUTPUT),) -- 2.30.2

4 years, 1 month

1
0
0 0

[PATCH AUTOSEL 5.10 24/85] selftests/powerpc: Fix L1D flushing tests for Power10

by Sasha Levin

From: Russell Currey <ruscur(a)russell.cc> [ Upstream commit 3a72c94ebfb1f171eba0715998010678a09ec796 ] The rfi_flush and entry_flush selftests work by using the PM_LD_MISS_L1 perf event to count L1D misses. The value of this event has changed over time: - Power7 uses 0x400f0 - Power8 and Power9 use both 0x400f0 and 0x3e054 - Power10 uses only 0x3e054 Rather than relying on raw values, configure perf to count L1D read misses in the most explicit way available. This fixes the selftests to work on systems without 0x400f0 as PM_LD_MISS_L1, and should change no behaviour for systems that the tests already worked on. The only potential downside is that referring to a specific perf event requires PMU support implemented in the kernel for that platform. Signed-off-by: Russell Currey <ruscur(a)russell.cc> Acked-by: Daniel Axtens <dja(a)axtens.net> Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au> Link: https://lore.kernel.org/r/20210223070227.2916871-1-ruscur@russell.cc Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/powerpc/security/entry_flush.c | 2 +- tools/testing/selftests/powerpc/security/flush_utils.h | 4 ++++ tools/testing/selftests/powerpc/security/rfi_flush.c | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/powerpc/security/entry_flush.c b/tools/testing/selftests/powerpc/security/entry_flush.c index 78cf914fa321..68ce377b205e 100644 --- a/tools/testing/selftests/powerpc/security/entry_flush.c +++ b/tools/testing/selftests/powerpc/security/entry_flush.c @@ -53,7 +53,7 @@ int entry_flush_test(void) entry_flush = entry_flush_orig; - fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1); + fd = perf_event_open_counter(PERF_TYPE_HW_CACHE, PERF_L1D_READ_MISS_CONFIG, -1); FAIL_IF(fd < 0); p = (char *)memalign(zero_size, CACHELINE_SIZE); diff --git a/tools/testing/selftests/powerpc/security/flush_utils.h b/tools/testing/selftests/powerpc/security/flush_utils.h index 07a5eb301466..7a3d60292916 100644 --- a/tools/testing/selftests/powerpc/security/flush_utils.h +++ b/tools/testing/selftests/powerpc/security/flush_utils.h @@ -9,6 +9,10 @@ #define CACHELINE_SIZE 128 +#define PERF_L1D_READ_MISS_CONFIG ((PERF_COUNT_HW_CACHE_L1D) | \ + (PERF_COUNT_HW_CACHE_OP_READ << 8) | \ + (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)) + void syscall_loop(char *p, unsigned long iterations, unsigned long zero_size); diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c index 7565fd786640..f73484a6470f 100644 --- a/tools/testing/selftests/powerpc/security/rfi_flush.c +++ b/tools/testing/selftests/powerpc/security/rfi_flush.c @@ -54,7 +54,7 @@ int rfi_flush_test(void) rfi_flush = rfi_flush_orig; - fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1); + fd = perf_event_open_counter(PERF_TYPE_HW_CACHE, PERF_L1D_READ_MISS_CONFIG, -1); FAIL_IF(fd < 0); p = (char *)memalign(zero_size, CACHELINE_SIZE); -- 2.30.2

4 years, 1 month

1
0
0 0

[PATCH AUTOSEL 5.11 097/104] selftests: mlxsw: Fix mausezahn invocation in ERSPAN scale test

by Sasha Levin

From: Petr Machata <petrm(a)nvidia.com> [ Upstream commit 1233898ab758cbcf5f6fea10b8dd16a0b2c24fab ] The mirror_gre_scale test creates as many ERSPAN sessions as the underlying chip supports, and tests that they all work. In order to determine that it issues a stream of ICMP packets and checks if they are mirrored as expected. However, the mausezahn invocation missed the -6 flag to identify the use of IPv6 protocol, and was sending ICMP messages over IPv6, as opposed to ICMP6. It also didn't pass an explicit source IP address, which apparently worked at some point in the past, but does not anymore. To fix these issues, extend the function mirror_test() in mirror_lib by detecting the IPv6 protocol addresses, and using a different ICMP scheme. Fix __mirror_gre_test() in the selftest itself to pass a source IP address. Signed-off-by: Petr Machata <petrm(a)nvidia.com> Signed-off-by: David S. Miller <davem(a)davemloft.net> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- .../drivers/net/mlxsw/mirror_gre_scale.sh | 3 ++- .../selftests/net/forwarding/mirror_lib.sh | 19 +++++++++++++++++-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh index 6f3a70df63bc..e00435753008 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh @@ -120,12 +120,13 @@ __mirror_gre_test() sleep 5 for ((i = 0; i < count; ++i)); do + local sip=$(mirror_gre_ipv6_addr 1 $i)::1 local dip=$(mirror_gre_ipv6_addr 1 $i)::2 local htun=h3-gt6-$i local message icmp6_capture_install $htun - mirror_test v$h1 "" $dip $htun 100 10 + mirror_test v$h1 $sip $dip $htun 100 10 icmp6_capture_uninstall $htun done } diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh index 13db1cb50e57..6406cd76a19d 100644 --- a/tools/testing/selftests/net/forwarding/mirror_lib.sh +++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh @@ -20,6 +20,13 @@ mirror_uninstall() tc filter del dev $swp1 $direction pref 1000 } +is_ipv6() +{ + local addr=$1; shift + + [[ -z ${addr//[0-9a-fA-F:]/} ]] +} + mirror_test() { local vrf_name=$1; shift @@ -29,9 +36,17 @@ mirror_test() local pref=$1; shift local expect=$1; shift + if is_ipv6 $dip; then + local proto=-6 + local type="icmp6 type=128" # Echo request. + else + local proto= + local type="icmp echoreq" + fi + local t0=$(tc_rule_stats_get $dev $pref) - $MZ $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \ - -c 10 -d 100msec -t icmp type=8 + $MZ $proto $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \ + -c 10 -d 100msec -t $type sleep 0.5 local t1=$(tc_rule_stats_get $dev $pref) local delta=$((t1 - t0)) -- 2.30.2

4 years, 1 month

1
0
0 0

[PATCH AUTOSEL 5.11 096/104] selftests: mlxsw: Increase the tolerance of backlog buildup

by Sasha Levin

From: Petr Machata <petrm(a)nvidia.com> [ Upstream commit dda7f4fa55839baeb72ae040aeaf9ccf89d3e416 ] The intention behind this test is to make sure that qdisc limit is correctly projected to the HW. However, first, due to rounding in the qdisc, and then in the driver, the number cannot actually be accurate. And second, the approach to testing this is to oversubscribe the port with traffic generated on the same switch. The actual backlog size therefore fluctuates. In practice, this test proved to be noisier than the rest, and spuriously fails every now and then. Increase the tolerance to 10 % to avoid these issues. Signed-off-by: Petr Machata <petrm(a)nvidia.com> Acked-by: Jiri Pirko <jiri(a)nvidia.com> Signed-off-by: David S. Miller <davem(a)davemloft.net> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh index b0cb1aaffdda..33ddd01689be 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh @@ -507,8 +507,8 @@ do_red_test() check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." local diff=$((limit - backlog)) pct=$((100 * diff / limit)) - ((0 <= pct && pct <= 5)) - check_err $? "backlog $backlog / $limit expected <= 5% distance" + ((0 <= pct && pct <= 10)) + check_err $? "backlog $backlog / $limit expected <= 10% distance" log_test "TC $((vlan - 10)): RED backlog > limit" stop_traffic -- 2.30.2

4 years, 1 month

1
0
0 0

[PATCH AUTOSEL 5.11 069/104] selftests: Set CC to clang in lib.mk if LLVM is set

by Sasha Levin

From: Yonghong Song <yhs(a)fb.com> [ Upstream commit 26e6dd1072763cd5696b75994c03982dde952ad9 ] selftests/bpf/Makefile includes lib.mk. With the following command make -j60 LLVM=1 LLVM_IAS=1 <=== compile kernel make -j60 -C tools/testing/selftests/bpf LLVM=1 LLVM_IAS=1 V=1 some files are still compiled with gcc. This patch fixed lib.mk issue which sets CC to gcc in all cases. Signed-off-by: Yonghong Song <yhs(a)fb.com> Signed-off-by: Alexei Starovoitov <ast(a)kernel.org> Acked-by: Andrii Nakryiko <andrii(a)kernel.org> Link: https://lore.kernel.org/bpf/20210413153413.3027426-1-yhs@fb.com Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/lib.mk | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index a5ce26d548e4..9a41d8bb9ff1 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -1,6 +1,10 @@ # This mimics the top-level Makefile. We do it explicitly here so that this # Makefile can operate with or without the kbuild infrastructure. +ifneq ($(LLVM),) +CC := clang +else CC := $(CROSS_COMPILE)gcc +endif ifeq (0,$(MAKELEVEL)) ifeq ($(OUTPUT),) -- 2.30.2

4 years, 1 month

1
0
0 0

[PATCH AUTOSEL 5.11 040/104] selftests: mptcp: launch mptcp_connect with timeout

by Sasha Levin

From: Matthieu Baerts <matthieu.baerts(a)tessares.net> [ Upstream commit 5888a61cb4e00695075bbacfd86f3fa73af00413 ] 'mptcp_connect' already has a timeout for poll() but in some cases, it is not enough. With "timeout" tool, we will force the command to fail if it doesn't finish on time. Thanks to that, the script will continue and display details about the current state before marking the test as failed. Displaying this state is very important to be able to understand the issue. Best to have our CI reporting the issue than just "the test hanged". Note that in mptcp_connect.sh, we were using a long timeout to validate the fact we cannot create a socket if a sysctl is set. We don't need this timeout. In diag.sh, we want to send signals to mptcp_connect instances that have been started in the netns. But we cannot send this signal to 'timeout' otherwise that will stop the timeout and messages telling us SIGUSR1 has been received will be printed. Instead of trying to find the right PID and storing them in an array, we can simply use the output of 'ip netns pids' which is all the PIDs we want to send signal to. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/160 Signed-off-by: Matthieu Baerts <matthieu.baerts(a)tessares.net> Signed-off-by: Mat Martineau <mathew.j.martineau(a)linux.intel.com> Signed-off-by: David S. Miller <davem(a)davemloft.net> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/net/mptcp/diag.sh | 55 ++++++++++++------- .../selftests/net/mptcp/mptcp_connect.sh | 15 +++-- .../testing/selftests/net/mptcp/mptcp_join.sh | 22 ++++++-- .../selftests/net/mptcp/simult_flows.sh | 13 ++++- 4 files changed, 72 insertions(+), 33 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 39edce4f541c..2674ba20d524 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -5,8 +5,9 @@ rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) ns="ns1-$rndh" ksft_skip=4 test_cnt=1 +timeout_poll=100 +timeout_test=$((timeout_poll * 2 + 1)) ret=0 -pids=() flush_pids() { @@ -14,18 +15,14 @@ flush_pids() # give it some time sleep 1.1 - for pid in ${pids[@]}; do - [ -d /proc/$pid ] && kill -SIGUSR1 $pid >/dev/null 2>&1 - done - pids=() + ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGUSR1 &>/dev/null } cleanup() { + ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGKILL &>/dev/null + ip netns del $ns - for pid in ${pids[@]}; do - [ -d /proc/$pid ] && kill -9 $pid >/dev/null 2>&1 - done } ip -Version > /dev/null 2>&1 @@ -79,39 +76,57 @@ trap cleanup EXIT ip netns add $ns ip -n $ns link set dev lo up -echo "a" | ip netns exec $ns ./mptcp_connect -p 10000 -l 0.0.0.0 -t 100 >/dev/null & +echo "a" | \ + timeout ${timeout_test} \ + ip netns exec $ns \ + ./mptcp_connect -p 10000 -l -t ${timeout_poll} \ + 0.0.0.0 >/dev/null & sleep 0.1 -pids[0]=$! chk_msk_nr 0 "no msk on netns creation" -echo "b" | ip netns exec $ns ./mptcp_connect -p 10000 127.0.0.1 -j -t 100 >/dev/null & +echo "b" | \ + timeout ${timeout_test} \ + ip netns exec $ns \ + ./mptcp_connect -p 10000 -j -t ${timeout_poll} \ + 127.0.0.1 >/dev/null & sleep 0.1 -pids[1]=$! chk_msk_nr 2 "after MPC handshake " chk_msk_remote_key_nr 2 "....chk remote_key" chk_msk_fallback_nr 0 "....chk no fallback" flush_pids -echo "a" | ip netns exec $ns ./mptcp_connect -p 10001 -s TCP -l 0.0.0.0 -t 100 >/dev/null & -pids[0]=$! +echo "a" | \ + timeout ${timeout_test} \ + ip netns exec $ns \ + ./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} \ + 0.0.0.0 >/dev/null & sleep 0.1 -echo "b" | ip netns exec $ns ./mptcp_connect -p 10001 127.0.0.1 -j -t 100 >/dev/null & -pids[1]=$! +echo "b" | \ + timeout ${timeout_test} \ + ip netns exec $ns \ + ./mptcp_connect -p 10001 -j -t ${timeout_poll} \ + 127.0.0.1 >/dev/null & sleep 0.1 chk_msk_fallback_nr 1 "check fallback" flush_pids NR_CLIENTS=100 for I in `seq 1 $NR_CLIENTS`; do - echo "a" | ip netns exec $ns ./mptcp_connect -p $((I+10001)) -l 0.0.0.0 -t 100 -w 10 >/dev/null & - pids[$((I*2))]=$! + echo "a" | \ + timeout ${timeout_test} \ + ip netns exec $ns \ + ./mptcp_connect -p $((I+10001)) -l -w 10 \ + -t ${timeout_poll} 0.0.0.0 >/dev/null & done sleep 0.1 for I in `seq 1 $NR_CLIENTS`; do - echo "b" | ip netns exec $ns ./mptcp_connect -p $((I+10001)) 127.0.0.1 -t 100 -w 10 >/dev/null & - pids[$((I*2 + 1))]=$! + echo "b" | \ + timeout ${timeout_test} \ + ip netns exec $ns \ + ./mptcp_connect -p $((I+10001)) -w 10 \ + -t ${timeout_poll} 127.0.0.1 >/dev/null & done sleep 1.5 diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index e927df83efb9..c37acb790bd6 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -11,7 +11,8 @@ cin="" cout="" ksft_skip=4 capture=false -timeout=30 +timeout_poll=30 +timeout_test=$((timeout_poll * 2 + 1)) ipv6=true ethtool_random_on=true tc_delay="$((RANDOM%50))" @@ -272,7 +273,7 @@ check_mptcp_disabled() ip netns exec ${disabled_ns} sysctl -q net.mptcp.enabled=0 local err=0 - LANG=C ip netns exec ${disabled_ns} ./mptcp_connect -t $timeout -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \ + LANG=C ip netns exec ${disabled_ns} ./mptcp_connect -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \ grep -q "^socket: Protocol not available$" && err=1 ip netns delete ${disabled_ns} @@ -414,14 +415,20 @@ do_transfer() local stat_cookietx_last=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesSent | while read a count c rest ;do echo $count;done) local stat_cookierx_last=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesRecv | while read a count c rest ;do echo $count;done) - ip netns exec ${listener_ns} ./mptcp_connect -t $timeout -l -p $port -s ${srv_proto} $extra_args $local_addr < "$sin" > "$sout" & + timeout ${timeout_test} \ + ip netns exec ${listener_ns} \ + ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ + $extra_args $local_addr < "$sin" > "$sout" & local spid=$! wait_local_port_listen "${listener_ns}" "${port}" local start start=$(date +%s%3N) - ip netns exec ${connector_ns} ./mptcp_connect -t $timeout -p $port -s ${cl_proto} $extra_args $connect_addr < "$cin" > "$cout" & + timeout ${timeout_test} \ + ip netns exec ${connector_ns} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $extra_args $connect_addr < "$cin" > "$cout" & local cpid=$! wait $cpid diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 9aa9624cff97..99c5dc0eeb26 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -8,7 +8,8 @@ cin="" cinsent="" cout="" ksft_skip=4 -timeout=30 +timeout_poll=30 +timeout_test=$((timeout_poll * 2 + 1)) mptcp_connect="" capture=0 @@ -249,17 +250,26 @@ do_transfer() local_addr="0.0.0.0" fi - ip netns exec ${listener_ns} $mptcp_connect -t $timeout -l -p $port \ - -s ${srv_proto} ${local_addr} < "$sin" > "$sout" & + timeout ${timeout_test} \ + ip netns exec ${listener_ns} \ + $mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ + ${local_addr} < "$sin" > "$sout" & spid=$! sleep 1 if [ "$test_link_fail" -eq 0 ];then - ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" & + timeout ${timeout_test} \ + ip netns exec ${connector_ns} \ + $mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $connect_addr < "$cin" > "$cout" & else - ( cat "$cin" ; sleep 2; link_failure $listener_ns ; cat "$cin" ) | tee "$cinsent" | \ - ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr > "$cout" & + ( cat "$cin" ; sleep 2; link_failure $listener_ns ; cat "$cin" ) | \ + tee "$cinsent" | \ + timeout ${timeout_test} \ + ip netns exec ${connector_ns} \ + $mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $connect_addr > "$cout" & fi cpid=$! diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index f039ee57eb3c..3aeef3bcb101 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -7,7 +7,8 @@ ns2="ns2-$rndh" ns3="ns3-$rndh" capture=false ksft_skip=4 -timeout=30 +timeout_poll=30 +timeout_test=$((timeout_poll * 2 + 1)) test_cnt=1 ret=0 bail=0 @@ -157,14 +158,20 @@ do_transfer() sleep 1 fi - ip netns exec ${ns3} ./mptcp_connect -jt $timeout -l -p $port 0.0.0.0 < "$sin" > "$sout" & + timeout ${timeout_test} \ + ip netns exec ${ns3} \ + ./mptcp_connect -jt ${timeout_poll} -l -p $port \ + 0.0.0.0 < "$sin" > "$sout" & local spid=$! wait_local_port_listen "${ns3}" "${port}" local start start=$(date +%s%3N) - ip netns exec ${ns1} ./mptcp_connect -jt $timeout -p $port 10.0.3.3 < "$cin" > "$cout" & + timeout ${timeout_test} \ + ip netns exec ${ns1} \ + ./mptcp_connect -jt ${timeout_poll} -p $port \ + 10.0.3.3 < "$cin" > "$cout" & local cpid=$! wait $cpid -- 2.30.2

4 years, 1 month

1
0
0 0

[PATCH AUTOSEL 5.11 029/104] selftests/powerpc: Fix L1D flushing tests for Power10

by Sasha Levin

From: Russell Currey <ruscur(a)russell.cc> [ Upstream commit 3a72c94ebfb1f171eba0715998010678a09ec796 ] The rfi_flush and entry_flush selftests work by using the PM_LD_MISS_L1 perf event to count L1D misses. The value of this event has changed over time: - Power7 uses 0x400f0 - Power8 and Power9 use both 0x400f0 and 0x3e054 - Power10 uses only 0x3e054 Rather than relying on raw values, configure perf to count L1D read misses in the most explicit way available. This fixes the selftests to work on systems without 0x400f0 as PM_LD_MISS_L1, and should change no behaviour for systems that the tests already worked on. The only potential downside is that referring to a specific perf event requires PMU support implemented in the kernel for that platform. Signed-off-by: Russell Currey <ruscur(a)russell.cc> Acked-by: Daniel Axtens <dja(a)axtens.net> Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au> Link: https://lore.kernel.org/r/20210223070227.2916871-1-ruscur@russell.cc Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/powerpc/security/entry_flush.c | 2 +- tools/testing/selftests/powerpc/security/flush_utils.h | 4 ++++ tools/testing/selftests/powerpc/security/rfi_flush.c | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/powerpc/security/entry_flush.c b/tools/testing/selftests/powerpc/security/entry_flush.c index 78cf914fa321..68ce377b205e 100644 --- a/tools/testing/selftests/powerpc/security/entry_flush.c +++ b/tools/testing/selftests/powerpc/security/entry_flush.c @@ -53,7 +53,7 @@ int entry_flush_test(void) entry_flush = entry_flush_orig; - fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1); + fd = perf_event_open_counter(PERF_TYPE_HW_CACHE, PERF_L1D_READ_MISS_CONFIG, -1); FAIL_IF(fd < 0); p = (char *)memalign(zero_size, CACHELINE_SIZE); diff --git a/tools/testing/selftests/powerpc/security/flush_utils.h b/tools/testing/selftests/powerpc/security/flush_utils.h index 07a5eb301466..7a3d60292916 100644 --- a/tools/testing/selftests/powerpc/security/flush_utils.h +++ b/tools/testing/selftests/powerpc/security/flush_utils.h @@ -9,6 +9,10 @@ #define CACHELINE_SIZE 128 +#define PERF_L1D_READ_MISS_CONFIG ((PERF_COUNT_HW_CACHE_L1D) | \ + (PERF_COUNT_HW_CACHE_OP_READ << 8) | \ + (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)) + void syscall_loop(char *p, unsigned long iterations, unsigned long zero_size); diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c index 7565fd786640..f73484a6470f 100644 --- a/tools/testing/selftests/powerpc/security/rfi_flush.c +++ b/tools/testing/selftests/powerpc/security/rfi_flush.c @@ -54,7 +54,7 @@ int rfi_flush_test(void) rfi_flush = rfi_flush_orig; - fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1); + fd = perf_event_open_counter(PERF_TYPE_HW_CACHE, PERF_L1D_READ_MISS_CONFIG, -1); FAIL_IF(fd < 0); p = (char *)memalign(zero_size, CACHELINE_SIZE); -- 2.30.2

4 years, 1 month

1
0
0 0

[PATCH AUTOSEL 5.12 107/116] selftests: mlxsw: Fix mausezahn invocation in ERSPAN scale test

by Sasha Levin

From: Petr Machata <petrm(a)nvidia.com> [ Upstream commit 1233898ab758cbcf5f6fea10b8dd16a0b2c24fab ] The mirror_gre_scale test creates as many ERSPAN sessions as the underlying chip supports, and tests that they all work. In order to determine that it issues a stream of ICMP packets and checks if they are mirrored as expected. However, the mausezahn invocation missed the -6 flag to identify the use of IPv6 protocol, and was sending ICMP messages over IPv6, as opposed to ICMP6. It also didn't pass an explicit source IP address, which apparently worked at some point in the past, but does not anymore. To fix these issues, extend the function mirror_test() in mirror_lib by detecting the IPv6 protocol addresses, and using a different ICMP scheme. Fix __mirror_gre_test() in the selftest itself to pass a source IP address. Signed-off-by: Petr Machata <petrm(a)nvidia.com> Signed-off-by: David S. Miller <davem(a)davemloft.net> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- .../drivers/net/mlxsw/mirror_gre_scale.sh | 3 ++- .../selftests/net/forwarding/mirror_lib.sh | 19 +++++++++++++++++-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh index 6f3a70df63bc..e00435753008 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh @@ -120,12 +120,13 @@ __mirror_gre_test() sleep 5 for ((i = 0; i < count; ++i)); do + local sip=$(mirror_gre_ipv6_addr 1 $i)::1 local dip=$(mirror_gre_ipv6_addr 1 $i)::2 local htun=h3-gt6-$i local message icmp6_capture_install $htun - mirror_test v$h1 "" $dip $htun 100 10 + mirror_test v$h1 $sip $dip $htun 100 10 icmp6_capture_uninstall $htun done } diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh index 13db1cb50e57..6406cd76a19d 100644 --- a/tools/testing/selftests/net/forwarding/mirror_lib.sh +++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh @@ -20,6 +20,13 @@ mirror_uninstall() tc filter del dev $swp1 $direction pref 1000 } +is_ipv6() +{ + local addr=$1; shift + + [[ -z ${addr//[0-9a-fA-F:]/} ]] +} + mirror_test() { local vrf_name=$1; shift @@ -29,9 +36,17 @@ mirror_test() local pref=$1; shift local expect=$1; shift + if is_ipv6 $dip; then + local proto=-6 + local type="icmp6 type=128" # Echo request. + else + local proto= + local type="icmp echoreq" + fi + local t0=$(tc_rule_stats_get $dev $pref) - $MZ $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \ - -c 10 -d 100msec -t icmp type=8 + $MZ $proto $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \ + -c 10 -d 100msec -t $type sleep 0.5 local t1=$(tc_rule_stats_get $dev $pref) local delta=$((t1 - t0)) -- 2.30.2

4 years, 1 month

1
0
0 0

[PATCH AUTOSEL 5.12 106/116] selftests: mlxsw: Increase the tolerance of backlog buildup

by Sasha Levin

From: Petr Machata <petrm(a)nvidia.com> [ Upstream commit dda7f4fa55839baeb72ae040aeaf9ccf89d3e416 ] The intention behind this test is to make sure that qdisc limit is correctly projected to the HW. However, first, due to rounding in the qdisc, and then in the driver, the number cannot actually be accurate. And second, the approach to testing this is to oversubscribe the port with traffic generated on the same switch. The actual backlog size therefore fluctuates. In practice, this test proved to be noisier than the rest, and spuriously fails every now and then. Increase the tolerance to 10 % to avoid these issues. Signed-off-by: Petr Machata <petrm(a)nvidia.com> Acked-by: Jiri Pirko <jiri(a)nvidia.com> Signed-off-by: David S. Miller <davem(a)davemloft.net> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh index b0cb1aaffdda..33ddd01689be 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh @@ -507,8 +507,8 @@ do_red_test() check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." local diff=$((limit - backlog)) pct=$((100 * diff / limit)) - ((0 <= pct && pct <= 5)) - check_err $? "backlog $backlog / $limit expected <= 5% distance" + ((0 <= pct && pct <= 10)) + check_err $? "backlog $backlog / $limit expected <= 10% distance" log_test "TC $((vlan - 10)): RED backlog > limit" stop_traffic -- 2.30.2

4 years, 1 month

1
0
0 0

[PATCH AUTOSEL 5.12 078/116] selftests: Set CC to clang in lib.mk if LLVM is set

by Sasha Levin

From: Yonghong Song <yhs(a)fb.com> [ Upstream commit 26e6dd1072763cd5696b75994c03982dde952ad9 ] selftests/bpf/Makefile includes lib.mk. With the following command make -j60 LLVM=1 LLVM_IAS=1 <=== compile kernel make -j60 -C tools/testing/selftests/bpf LLVM=1 LLVM_IAS=1 V=1 some files are still compiled with gcc. This patch fixed lib.mk issue which sets CC to gcc in all cases. Signed-off-by: Yonghong Song <yhs(a)fb.com> Signed-off-by: Alexei Starovoitov <ast(a)kernel.org> Acked-by: Andrii Nakryiko <andrii(a)kernel.org> Link: https://lore.kernel.org/bpf/20210413153413.3027426-1-yhs@fb.com Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/lib.mk | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index a5ce26d548e4..9a41d8bb9ff1 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -1,6 +1,10 @@ # This mimics the top-level Makefile. We do it explicitly here so that this # Makefile can operate with or without the kbuild infrastructure. +ifneq ($(LLVM),) +CC := clang +else CC := $(CROSS_COMPILE)gcc +endif ifeq (0,$(MAKELEVEL)) ifeq ($(OUTPUT),) -- 2.30.2

4 years, 1 month

1
0
0 0

[PATCH AUTOSEL 5.12 043/116] selftests: mptcp: launch mptcp_connect with timeout

by Sasha Levin

From: Matthieu Baerts <matthieu.baerts(a)tessares.net> [ Upstream commit 5888a61cb4e00695075bbacfd86f3fa73af00413 ] 'mptcp_connect' already has a timeout for poll() but in some cases, it is not enough. With "timeout" tool, we will force the command to fail if it doesn't finish on time. Thanks to that, the script will continue and display details about the current state before marking the test as failed. Displaying this state is very important to be able to understand the issue. Best to have our CI reporting the issue than just "the test hanged". Note that in mptcp_connect.sh, we were using a long timeout to validate the fact we cannot create a socket if a sysctl is set. We don't need this timeout. In diag.sh, we want to send signals to mptcp_connect instances that have been started in the netns. But we cannot send this signal to 'timeout' otherwise that will stop the timeout and messages telling us SIGUSR1 has been received will be printed. Instead of trying to find the right PID and storing them in an array, we can simply use the output of 'ip netns pids' which is all the PIDs we want to send signal to. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/160 Signed-off-by: Matthieu Baerts <matthieu.baerts(a)tessares.net> Signed-off-by: Mat Martineau <mathew.j.martineau(a)linux.intel.com> Signed-off-by: David S. Miller <davem(a)davemloft.net> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/net/mptcp/diag.sh | 55 ++++++++++++------- .../selftests/net/mptcp/mptcp_connect.sh | 15 +++-- .../testing/selftests/net/mptcp/mptcp_join.sh | 22 ++++++-- .../selftests/net/mptcp/simult_flows.sh | 13 ++++- 4 files changed, 72 insertions(+), 33 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 39edce4f541c..2674ba20d524 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -5,8 +5,9 @@ rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) ns="ns1-$rndh" ksft_skip=4 test_cnt=1 +timeout_poll=100 +timeout_test=$((timeout_poll * 2 + 1)) ret=0 -pids=() flush_pids() { @@ -14,18 +15,14 @@ flush_pids() # give it some time sleep 1.1 - for pid in ${pids[@]}; do - [ -d /proc/$pid ] && kill -SIGUSR1 $pid >/dev/null 2>&1 - done - pids=() + ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGUSR1 &>/dev/null } cleanup() { + ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGKILL &>/dev/null + ip netns del $ns - for pid in ${pids[@]}; do - [ -d /proc/$pid ] && kill -9 $pid >/dev/null 2>&1 - done } ip -Version > /dev/null 2>&1 @@ -79,39 +76,57 @@ trap cleanup EXIT ip netns add $ns ip -n $ns link set dev lo up -echo "a" | ip netns exec $ns ./mptcp_connect -p 10000 -l 0.0.0.0 -t 100 >/dev/null & +echo "a" | \ + timeout ${timeout_test} \ + ip netns exec $ns \ + ./mptcp_connect -p 10000 -l -t ${timeout_poll} \ + 0.0.0.0 >/dev/null & sleep 0.1 -pids[0]=$! chk_msk_nr 0 "no msk on netns creation" -echo "b" | ip netns exec $ns ./mptcp_connect -p 10000 127.0.0.1 -j -t 100 >/dev/null & +echo "b" | \ + timeout ${timeout_test} \ + ip netns exec $ns \ + ./mptcp_connect -p 10000 -j -t ${timeout_poll} \ + 127.0.0.1 >/dev/null & sleep 0.1 -pids[1]=$! chk_msk_nr 2 "after MPC handshake " chk_msk_remote_key_nr 2 "....chk remote_key" chk_msk_fallback_nr 0 "....chk no fallback" flush_pids -echo "a" | ip netns exec $ns ./mptcp_connect -p 10001 -s TCP -l 0.0.0.0 -t 100 >/dev/null & -pids[0]=$! +echo "a" | \ + timeout ${timeout_test} \ + ip netns exec $ns \ + ./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} \ + 0.0.0.0 >/dev/null & sleep 0.1 -echo "b" | ip netns exec $ns ./mptcp_connect -p 10001 127.0.0.1 -j -t 100 >/dev/null & -pids[1]=$! +echo "b" | \ + timeout ${timeout_test} \ + ip netns exec $ns \ + ./mptcp_connect -p 10001 -j -t ${timeout_poll} \ + 127.0.0.1 >/dev/null & sleep 0.1 chk_msk_fallback_nr 1 "check fallback" flush_pids NR_CLIENTS=100 for I in `seq 1 $NR_CLIENTS`; do - echo "a" | ip netns exec $ns ./mptcp_connect -p $((I+10001)) -l 0.0.0.0 -t 100 -w 10 >/dev/null & - pids[$((I*2))]=$! + echo "a" | \ + timeout ${timeout_test} \ + ip netns exec $ns \ + ./mptcp_connect -p $((I+10001)) -l -w 10 \ + -t ${timeout_poll} 0.0.0.0 >/dev/null & done sleep 0.1 for I in `seq 1 $NR_CLIENTS`; do - echo "b" | ip netns exec $ns ./mptcp_connect -p $((I+10001)) 127.0.0.1 -t 100 -w 10 >/dev/null & - pids[$((I*2 + 1))]=$! + echo "b" | \ + timeout ${timeout_test} \ + ip netns exec $ns \ + ./mptcp_connect -p $((I+10001)) -w 10 \ + -t ${timeout_poll} 127.0.0.1 >/dev/null & done sleep 1.5 diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 10a030b53b23..65b3b983efc2 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -11,7 +11,8 @@ cin="" cout="" ksft_skip=4 capture=false -timeout=30 +timeout_poll=30 +timeout_test=$((timeout_poll * 2 + 1)) ipv6=true ethtool_random_on=true tc_delay="$((RANDOM%50))" @@ -273,7 +274,7 @@ check_mptcp_disabled() ip netns exec ${disabled_ns} sysctl -q net.mptcp.enabled=0 local err=0 - LANG=C ip netns exec ${disabled_ns} ./mptcp_connect -t $timeout -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \ + LANG=C ip netns exec ${disabled_ns} ./mptcp_connect -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \ grep -q "^socket: Protocol not available$" && err=1 ip netns delete ${disabled_ns} @@ -430,14 +431,20 @@ do_transfer() local stat_cookietx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent") local stat_cookierx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv") - ip netns exec ${listener_ns} ./mptcp_connect -t $timeout -l -p $port -s ${srv_proto} $extra_args $local_addr < "$sin" > "$sout" & + timeout ${timeout_test} \ + ip netns exec ${listener_ns} \ + ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ + $extra_args $local_addr < "$sin" > "$sout" & local spid=$! wait_local_port_listen "${listener_ns}" "${port}" local start start=$(date +%s%3N) - ip netns exec ${connector_ns} ./mptcp_connect -t $timeout -p $port -s ${cl_proto} $extra_args $connect_addr < "$cin" > "$cout" & + timeout ${timeout_test} \ + ip netns exec ${connector_ns} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $extra_args $connect_addr < "$cin" > "$cout" & local cpid=$! wait $cpid diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index ad32240fbfda..43ed99de7734 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -8,7 +8,8 @@ cin="" cinsent="" cout="" ksft_skip=4 -timeout=30 +timeout_poll=30 +timeout_test=$((timeout_poll * 2 + 1)) mptcp_connect="" capture=0 do_all_tests=1 @@ -245,17 +246,26 @@ do_transfer() local_addr="0.0.0.0" fi - ip netns exec ${listener_ns} $mptcp_connect -t $timeout -l -p $port \ - -s ${srv_proto} ${local_addr} < "$sin" > "$sout" & + timeout ${timeout_test} \ + ip netns exec ${listener_ns} \ + $mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ + ${local_addr} < "$sin" > "$sout" & spid=$! sleep 1 if [ "$test_link_fail" -eq 0 ];then - ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" & + timeout ${timeout_test} \ + ip netns exec ${connector_ns} \ + $mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $connect_addr < "$cin" > "$cout" & else - ( cat "$cin" ; sleep 2; link_failure $listener_ns ; cat "$cin" ) | tee "$cinsent" | \ - ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr > "$cout" & + ( cat "$cin" ; sleep 2; link_failure $listener_ns ; cat "$cin" ) | \ + tee "$cinsent" | \ + timeout ${timeout_test} \ + ip netns exec ${connector_ns} \ + $mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $connect_addr > "$cout" & fi cpid=$! diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index f039ee57eb3c..3aeef3bcb101 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -7,7 +7,8 @@ ns2="ns2-$rndh" ns3="ns3-$rndh" capture=false ksft_skip=4 -timeout=30 +timeout_poll=30 +timeout_test=$((timeout_poll * 2 + 1)) test_cnt=1 ret=0 bail=0 @@ -157,14 +158,20 @@ do_transfer() sleep 1 fi - ip netns exec ${ns3} ./mptcp_connect -jt $timeout -l -p $port 0.0.0.0 < "$sin" > "$sout" & + timeout ${timeout_test} \ + ip netns exec ${ns3} \ + ./mptcp_connect -jt ${timeout_poll} -l -p $port \ + 0.0.0.0 < "$sin" > "$sout" & local spid=$! wait_local_port_listen "${ns3}" "${port}" local start start=$(date +%s%3N) - ip netns exec ${ns1} ./mptcp_connect -jt $timeout -p $port 10.0.3.3 < "$cin" > "$cout" & + timeout ${timeout_test} \ + ip netns exec ${ns1} \ + ./mptcp_connect -jt ${timeout_poll} -p $port \ + 10.0.3.3 < "$cin" > "$cout" & local cpid=$! wait $cpid -- 2.30.2

4 years, 1 month

1
0
0 0

[PATCH AUTOSEL 5.12 029/116] selftests/powerpc: Fix L1D flushing tests for Power10

by Sasha Levin

From: Russell Currey <ruscur(a)russell.cc> [ Upstream commit 3a72c94ebfb1f171eba0715998010678a09ec796 ] The rfi_flush and entry_flush selftests work by using the PM_LD_MISS_L1 perf event to count L1D misses. The value of this event has changed over time: - Power7 uses 0x400f0 - Power8 and Power9 use both 0x400f0 and 0x3e054 - Power10 uses only 0x3e054 Rather than relying on raw values, configure perf to count L1D read misses in the most explicit way available. This fixes the selftests to work on systems without 0x400f0 as PM_LD_MISS_L1, and should change no behaviour for systems that the tests already worked on. The only potential downside is that referring to a specific perf event requires PMU support implemented in the kernel for that platform. Signed-off-by: Russell Currey <ruscur(a)russell.cc> Acked-by: Daniel Axtens <dja(a)axtens.net> Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au> Link: https://lore.kernel.org/r/20210223070227.2916871-1-ruscur@russell.cc Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/powerpc/security/entry_flush.c | 2 +- tools/testing/selftests/powerpc/security/flush_utils.h | 4 ++++ tools/testing/selftests/powerpc/security/rfi_flush.c | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/powerpc/security/entry_flush.c b/tools/testing/selftests/powerpc/security/entry_flush.c index 78cf914fa321..68ce377b205e 100644 --- a/tools/testing/selftests/powerpc/security/entry_flush.c +++ b/tools/testing/selftests/powerpc/security/entry_flush.c @@ -53,7 +53,7 @@ int entry_flush_test(void) entry_flush = entry_flush_orig; - fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1); + fd = perf_event_open_counter(PERF_TYPE_HW_CACHE, PERF_L1D_READ_MISS_CONFIG, -1); FAIL_IF(fd < 0); p = (char *)memalign(zero_size, CACHELINE_SIZE); diff --git a/tools/testing/selftests/powerpc/security/flush_utils.h b/tools/testing/selftests/powerpc/security/flush_utils.h index 07a5eb301466..7a3d60292916 100644 --- a/tools/testing/selftests/powerpc/security/flush_utils.h +++ b/tools/testing/selftests/powerpc/security/flush_utils.h @@ -9,6 +9,10 @@ #define CACHELINE_SIZE 128 +#define PERF_L1D_READ_MISS_CONFIG ((PERF_COUNT_HW_CACHE_L1D) | \ + (PERF_COUNT_HW_CACHE_OP_READ << 8) | \ + (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)) + void syscall_loop(char *p, unsigned long iterations, unsigned long zero_size); diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c index 7565fd786640..f73484a6470f 100644 --- a/tools/testing/selftests/powerpc/security/rfi_flush.c +++ b/tools/testing/selftests/powerpc/security/rfi_flush.c @@ -54,7 +54,7 @@ int rfi_flush_test(void) rfi_flush = rfi_flush_orig; - fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1); + fd = perf_event_open_counter(PERF_TYPE_HW_CACHE, PERF_L1D_READ_MISS_CONFIG, -1); FAIL_IF(fd < 0); p = (char *)memalign(zero_size, CACHELINE_SIZE); -- 2.30.2

4 years, 1 month

1
0
0 0

[PATCH v3] netfilter: nf_conntrack: Add conntrack helper for ESP/IPsec

by Cole Dishington

Introduce changes to add ESP connection tracking helper to netfilter conntrack. The connection tracking of ESP is based on IPsec SPIs. The underlying motivation for this patch was to allow multiple VPN ESP clients to be distinguished when using NAT. Added config flag CONFIG_NF_CT_PROTO_ESP to enable the ESP/IPsec conntrack helper. Signed-off-by: Cole Dishington <Cole.Dishington(a)alliedtelesis.co.nz> --- Notes: Thanks for your time reviewing! Q. > +static int esp_tuple_to_nlattr(struct sk_buff *skb, > + const struct nf_conntrack_tuple *t) > +{ > + if (nla_put_be16(skb, CTA_PROTO_SRC_ESP_ID, t->src.u.esp.id) || > + nla_put_be16(skb, CTA_PROTO_DST_ESP_ID, t->dst.u.esp.id)) > + goto nla_put_failure; This exposes the 16 bit kernel-generated IDs, right? Should this dump the real on-wire SPIs instead? Or is there are reason why the internal IDs need exposure? A. I think I need to expose the internal esp ids here due to esp_nlattr_to_tuple(). If esp id was changed to real SPIs here I would be unable to lookup the correct tuple (without IP addresses too). changes in v3: - Flush all esp entries for a given netns on nf_conntrack_proto_pernet_fini - Replace _esp_table (and its spinlock) shared over netns with per netns linked lists and bitmap (for esp ids) - Init IPv6 any address with IN6ADDR_ANY_INIT rather than ipv6_addr_set() - Change l3num on hash key from u16 to u8 - Add selftests file for testing tracker with ipv4 and ipv6 - Removed credits .../linux/netfilter/nf_conntrack_proto_esp.h | 23 + .../net/netfilter/ipv4/nf_conntrack_ipv4.h | 3 + include/net/netfilter/nf_conntrack.h | 6 + include/net/netfilter/nf_conntrack_l4proto.h | 16 + include/net/netfilter/nf_conntrack_tuple.h | 3 + include/net/netns/conntrack.h | 17 + .../netfilter/nf_conntrack_tuple_common.h | 3 + .../linux/netfilter/nfnetlink_conntrack.h | 2 + net/netfilter/Kconfig | 10 + net/netfilter/Makefile | 1 + net/netfilter/nf_conntrack_core.c | 23 + net/netfilter/nf_conntrack_netlink.c | 4 +- net/netfilter/nf_conntrack_proto.c | 15 + net/netfilter/nf_conntrack_proto_esp.c | 741 ++++++++++++++++++ net/netfilter/nf_conntrack_standalone.c | 8 + net/netfilter/nf_internals.h | 4 +- .../netfilter/conntrack_esp_related.sh | 268 +++++++ 17 files changed, 1145 insertions(+), 2 deletions(-) create mode 100644 include/linux/netfilter/nf_conntrack_proto_esp.h create mode 100644 net/netfilter/nf_conntrack_proto_esp.c create mode 100755 tools/testing/selftests/netfilter/conntrack_esp_related.sh diff --git a/include/linux/netfilter/nf_conntrack_proto_esp.h b/include/linux/netfilter/nf_conntrack_proto_esp.h new file mode 100644 index 000000000000..96888669edd7 --- /dev/null +++ b/include/linux/netfilter/nf_conntrack_proto_esp.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _CONNTRACK_PROTO_ESP_H +#define _CONNTRACK_PROTO_ESP_H +#include <asm/byteorder.h> +#include <net/netfilter/nf_conntrack_tuple.h> + +/* ESP PROTOCOL HEADER */ + +struct esphdr { + __u32 spi; +}; + +struct nf_ct_esp { + __u32 l_spi, r_spi; +}; + +void nf_ct_esp_pernet_flush(struct net *net); + +void destroy_esp_conntrack_entry(struct nf_conn *ct); + +bool esp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, + struct net *net, struct nf_conntrack_tuple *tuple); +#endif /* _CONNTRACK_PROTO_ESP_H */ diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index 2c8c2b023848..1aee91592639 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -25,5 +25,8 @@ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite; #ifdef CONFIG_NF_CT_PROTO_GRE extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre; #endif +#ifdef CONFIG_NF_CT_PROTO_ESP +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_esp; +#endif #endif /*_NF_CONNTRACK_IPV4_H*/ diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 439379ca9ffa..4011be8c5e39 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -21,6 +21,7 @@ #include <linux/netfilter/nf_conntrack_dccp.h> #include <linux/netfilter/nf_conntrack_sctp.h> #include <linux/netfilter/nf_conntrack_proto_gre.h> +#include <linux/netfilter/nf_conntrack_proto_esp.h> #include <net/netfilter/nf_conntrack_tuple.h> @@ -36,6 +37,7 @@ union nf_conntrack_proto { struct ip_ct_tcp tcp; struct nf_ct_udp udp; struct nf_ct_gre gre; + struct nf_ct_esp esp; unsigned int tmpl_padto; }; @@ -47,6 +49,10 @@ struct nf_conntrack_net { unsigned int users4; unsigned int users6; unsigned int users_bridge; + +#ifdef CONFIG_NF_CT_PROTO_ESP + DECLARE_BITMAP(esp_id_map, 1024); +#endif }; #include <linux/types.h> diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 96f9cf81f46b..f700de0b9059 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -75,6 +75,8 @@ bool nf_conntrack_invert_icmp_tuple(struct nf_conntrack_tuple *tuple, const struct nf_conntrack_tuple *orig); bool nf_conntrack_invert_icmpv6_tuple(struct nf_conntrack_tuple *tuple, const struct nf_conntrack_tuple *orig); +bool nf_conntrack_invert_esp_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig); int nf_conntrack_inet_error(struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, @@ -132,6 +134,11 @@ int nf_conntrack_gre_packet(struct nf_conn *ct, unsigned int dataoff, enum ip_conntrack_info ctinfo, const struct nf_hook_state *state); +int nf_conntrack_esp_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); void nf_conntrack_generic_init_net(struct net *net); void nf_conntrack_tcp_init_net(struct net *net); @@ -141,6 +148,8 @@ void nf_conntrack_dccp_init_net(struct net *net); void nf_conntrack_sctp_init_net(struct net *net); void nf_conntrack_icmp_init_net(struct net *net); void nf_conntrack_icmpv6_init_net(struct net *net); +int nf_conntrack_esp_init(void); +void nf_conntrack_esp_init_net(struct net *net); /* Existing built-in generic protocol */ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic; @@ -240,4 +249,11 @@ static inline struct nf_gre_net *nf_gre_pernet(struct net *net) } #endif +#ifdef CONFIG_NF_CT_PROTO_ESP +static inline struct nf_esp_net *nf_esp_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.esp; +} +#endif + #endif /*_NF_CONNTRACK_PROTOCOL_H*/ diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h index 9334371c94e2..60279ffabe36 100644 --- a/include/net/netfilter/nf_conntrack_tuple.h +++ b/include/net/netfilter/nf_conntrack_tuple.h @@ -62,6 +62,9 @@ struct nf_conntrack_tuple { struct { __be16 key; } gre; + struct { + __be16 id; + } esp; } u; /* The protocol. */ diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 806454e767bf..43cd1e78f790 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -69,6 +69,20 @@ struct nf_gre_net { }; #endif +#ifdef CONFIG_NF_CT_PROTO_ESP +enum esp_conntrack { + ESP_CT_UNREPLIED, + ESP_CT_REPLIED, + ESP_CT_MAX +}; + +struct nf_esp_net { + spinlock_t id_list_lock; + struct list_head id_list; + unsigned int esp_timeouts[ESP_CT_MAX]; +}; +#endif + struct nf_ip_net { struct nf_generic_net generic; struct nf_tcp_net tcp; @@ -84,6 +98,9 @@ struct nf_ip_net { #ifdef CONFIG_NF_CT_PROTO_GRE struct nf_gre_net gre; #endif +#ifdef CONFIG_NF_CT_PROTO_ESP + struct nf_esp_net esp; +#endif }; struct ct_pcpu { diff --git a/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h b/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h index 64390fac6f7e..78600cb4bfff 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h @@ -39,6 +39,9 @@ union nf_conntrack_man_proto { struct { __be16 key; /* GRE key is 32bit, PPtP only uses 16bit */ } gre; + struct { + __be16 id; + } esp; }; #define CTINFO2DIR(ctinfo) ((ctinfo) >= IP_CT_IS_REPLY ? IP_CT_DIR_REPLY : IP_CT_DIR_ORIGINAL) diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h index d8484be72fdc..744d8931adeb 100644 --- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h +++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -90,6 +90,8 @@ enum ctattr_l4proto { CTA_PROTO_ICMPV6_ID, CTA_PROTO_ICMPV6_TYPE, CTA_PROTO_ICMPV6_CODE, + CTA_PROTO_SRC_ESP_ID, + CTA_PROTO_DST_ESP_ID, __CTA_PROTO_MAX }; #define CTA_PROTO_MAX (__CTA_PROTO_MAX - 1) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 1a92063c73a4..7269312d322e 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -199,6 +199,16 @@ config NF_CT_PROTO_UDPLITE If unsure, say Y. +config NF_CT_PROTO_ESP + bool "ESP protocol support" + depends on NETFILTER_ADVANCED + help + ESP connection tracking helper. Provides connection tracking for IPsec + clients behind this device based on SPI, especially useful for + distinguishing multiple clients when using NAT. + + If unsure, say N. + config NF_CONNTRACK_AMANDA tristate "Amanda backup protocol support" depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 33da7bf1b68e..0942f2c48ddb 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -14,6 +14,7 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o +nf_conntrack-$(CONFIG_NF_CT_PROTO_ESP) += nf_conntrack_proto_esp.o obj-$(CONFIG_NETFILTER) = netfilter.o diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index ff0168736f6e..3bef361d19ce 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -295,6 +295,10 @@ nf_ct_get_tuple(const struct sk_buff *skb, #ifdef CONFIG_NF_CT_PROTO_GRE case IPPROTO_GRE: return gre_pkt_to_tuple(skb, dataoff, net, tuple); +#endif +#ifdef CONFIG_NF_CT_PROTO_ESP + case IPPROTO_ESP: + return esp_pkt_to_tuple(skb, dataoff, net, tuple); #endif case IPPROTO_TCP: case IPPROTO_UDP: /* fallthrough */ @@ -439,6 +443,10 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, #if IS_ENABLED(CONFIG_IPV6) case IPPROTO_ICMPV6: return nf_conntrack_invert_icmpv6_tuple(inverse, orig); +#endif +#ifdef CONFIG_NF_CT_PROTO_ESP + case IPPROTO_ESP: + return nf_conntrack_invert_esp_tuple(inverse, orig); #endif } @@ -593,6 +601,13 @@ static void destroy_gre_conntrack(struct nf_conn *ct) #endif } +static void destroy_esp_conntrack(struct nf_conn *ct) +{ +#ifdef CONFIG_NF_CT_PROTO_ESP + destroy_esp_conntrack_entry(ct); +#endif +} + static void destroy_conntrack(struct nf_conntrack *nfct) { @@ -609,6 +624,9 @@ destroy_conntrack(struct nf_conntrack *nfct) if (unlikely(nf_ct_protonum(ct) == IPPROTO_GRE)) destroy_gre_conntrack(ct); + if (unlikely(nf_ct_protonum(ct) == IPPROTO_ESP)) + destroy_esp_conntrack(ct); + local_bh_disable(); /* Expectations will have been removed in clean_from_lists, * except TFTP can create an expectation on the first packet, @@ -1783,6 +1801,11 @@ static int nf_conntrack_handle_packet(struct nf_conn *ct, case IPPROTO_GRE: return nf_conntrack_gre_packet(ct, skb, dataoff, ctinfo, state); +#endif +#ifdef CONFIG_NF_CT_PROTO_ESP + case IPPROTO_ESP: + return nf_conntrack_esp_packet(ct, skb, dataoff, + ctinfo, state); #endif } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 1d519b0e51a5..8df33dbbf5a3 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1382,7 +1382,9 @@ static const struct nla_policy tuple_nla_policy[CTA_TUPLE_MAX+1] = { CTA_FILTER_F_CTA_PROTO_ICMP_ID | \ CTA_FILTER_F_CTA_PROTO_ICMPV6_TYPE | \ CTA_FILTER_F_CTA_PROTO_ICMPV6_CODE | \ - CTA_FILTER_F_CTA_PROTO_ICMPV6_ID) + CTA_FILTER_F_CTA_PROTO_ICMPV6_ID | \ + CTA_FILTER_F_CTA_PROTO_SRC_ESP_ID | \ + CTA_FILTER_F_CTA_PROTO_DST_ESP_ID) static int ctnetlink_parse_tuple_filter(const struct nlattr * const cda[], diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 47e9319d2cf3..e71ddb4e33cc 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -112,6 +112,9 @@ const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto) #ifdef CONFIG_NF_CT_PROTO_GRE case IPPROTO_GRE: return &nf_conntrack_l4proto_gre; #endif +#ifdef CONFIG_NF_CT_PROTO_ESP + case IPPROTO_ESP: return &nf_conntrack_l4proto_esp; +#endif #if IS_ENABLED(CONFIG_IPV6) case IPPROTO_ICMPV6: return &nf_conntrack_l4proto_icmpv6; #endif /* CONFIG_IPV6 */ @@ -656,6 +659,12 @@ int nf_conntrack_proto_init(void) goto cleanup_sockopt; #endif +#ifdef CONFIG_NF_CT_PROTO_ESP + ret = nf_conntrack_esp_init(); + if (ret < 0) + goto cleanup_sockopt; +#endif + return ret; #if IS_ENABLED(CONFIG_IPV6) @@ -691,6 +700,9 @@ void nf_conntrack_proto_pernet_init(struct net *net) #ifdef CONFIG_NF_CT_PROTO_GRE nf_conntrack_gre_init_net(net); #endif +#ifdef CONFIG_NF_CT_PROTO_ESP + nf_conntrack_esp_init_net(net); +#endif } void nf_conntrack_proto_pernet_fini(struct net *net) @@ -698,6 +710,9 @@ void nf_conntrack_proto_pernet_fini(struct net *net) #ifdef CONFIG_NF_CT_PROTO_GRE nf_ct_gre_keymap_flush(net); #endif +#ifdef CONFIG_NF_CT_PROTO_ESP + nf_ct_esp_pernet_flush(net); +#endif } module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, diff --git a/net/netfilter/nf_conntrack_proto_esp.c b/net/netfilter/nf_conntrack_proto_esp.c new file mode 100644 index 000000000000..1bc0cb879bfd --- /dev/null +++ b/net/netfilter/nf_conntrack_proto_esp.c @@ -0,0 +1,741 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * <:copyright-gpl + * Copyright 2008 Broadcom Corp. All Rights Reserved. + * Copyright (C) 2021 Allied Telesis Labs NZ + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. + * :> + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/timer.h> +#include <linux/list.h> +#include <linux/seq_file.h> +#include <linux/in.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <net/dst.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_l4proto.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_timeout.h> +#include <linux/netfilter/nf_conntrack_proto_esp.h> +#include <net/netns/hash.h> +#include <linux/rhashtable.h> +#include <net/ipv6.h> + +#include "nf_internals.h" + +/* esp_id of 0 is left for unassigned values */ +#define TEMP_SPI_START 1 +#define TEMP_SPI_MAX (TEMP_SPI_START + 1024 - 1) + +struct _esp_entry { + /* linked list node for per net lookup via esp_id */ + struct list_head net_node; + + /* Hash table nodes for each required lookup + * lnode: net->hash_mix, l_spi, l_ip, r_ip + * rnode: net->hash_mix, r_spi, r_ip + * incmpl_rlist: net->hash_mix, r_ip + */ + struct rhash_head lnode; + struct rhash_head rnode; + struct rhlist_head incmpl_rlist; + + u16 esp_id; + + u16 l3num; + + u32 l_spi; + u32 r_spi; + + union nf_inet_addr l_ip; + union nf_inet_addr r_ip; + + u32 alloc_time_jiffies; + struct net *net; +}; + +struct _esp_hkey { + u8 l3num; + union nf_inet_addr src_ip; + union nf_inet_addr dst_ip; + u32 net_hmix; + u32 spi; +}; + +extern unsigned int nf_conntrack_net_id; + +static struct rhashtable ltable; +static struct rhashtable rtable; +static struct rhltable incmpl_rtable; +static unsigned int esp_timeouts[ESP_CT_MAX] = { + [ESP_CT_UNREPLIED] = 60 * HZ, + [ESP_CT_REPLIED] = 3600 * HZ, +}; + +static void esp_ip_addr_copy(int af, union nf_inet_addr *dst, + const union nf_inet_addr *src) +{ + if (af == AF_INET6) + dst->in6 = src->in6; + else + dst->ip = src->ip; +} + +static int esp_ip_addr_equal(int af, const union nf_inet_addr *a, + const union nf_inet_addr *b) +{ + if (af == AF_INET6) + return ipv6_addr_equal(&a->in6, &b->in6); + return a->ip == b->ip; +} + +static inline struct nf_esp_net *esp_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.esp; +} + +static inline void calculate_key(const u32 net_hmix, const u32 spi, + const u8 l3num, + const union nf_inet_addr *src_ip, + const union nf_inet_addr *dst_ip, + struct _esp_hkey *key) +{ + key->net_hmix = net_hmix; + key->spi = spi; + key->l3num = l3num; + esp_ip_addr_copy(l3num, &key->src_ip, src_ip); + esp_ip_addr_copy(l3num, &key->dst_ip, dst_ip); +} + +static inline u32 calculate_hash(const void *data, u32 len, u32 seed) +{ + return jhash(data, len, seed); +} + +static int ltable_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) +{ + struct _esp_hkey obj_key = {}; + const struct _esp_hkey *key = (const struct _esp_hkey *)arg->key; + const struct _esp_entry *eobj = (const struct _esp_entry *)obj; + u32 net_hmix = net_hash_mix(eobj->net); + + calculate_key(net_hmix, eobj->l_spi, eobj->l3num, &eobj->l_ip, + &eobj->r_ip, &obj_key); + return memcmp(key, &obj_key, sizeof(struct _esp_hkey)); +} + +static int rtable_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) +{ + const union nf_inet_addr any = { .in6 = IN6ADDR_ANY_INIT }; + struct _esp_hkey obj_key = {}; + const struct _esp_hkey *key = (const struct _esp_hkey *)arg->key; + const struct _esp_entry *eobj = (const struct _esp_entry *)obj; + u32 net_hmix = net_hash_mix(eobj->net); + + calculate_key(net_hmix, eobj->r_spi, eobj->l3num, &any, &eobj->r_ip, + &obj_key); + return memcmp(key, &obj_key, sizeof(struct _esp_hkey)); +} + +static int incmpl_table_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) +{ + const union nf_inet_addr any = { .in6 = IN6ADDR_ANY_INIT }; + struct _esp_hkey obj_key = {}; + const struct _esp_hkey *key = (const struct _esp_hkey *)arg->key; + const struct _esp_entry *eobj = (const struct _esp_entry *)obj; + u32 net_hmix = net_hash_mix(eobj->net); + + calculate_key(net_hmix, 0, eobj->l3num, &any, &eobj->r_ip, &obj_key); + return memcmp(key, &obj_key, sizeof(struct _esp_hkey)); +} + +static u32 ltable_obj_hashfn(const void *data, u32 len, u32 seed) +{ + struct _esp_hkey key = {}; + const struct _esp_entry *eobj = (const struct _esp_entry *)data; + u32 net_hmix = net_hash_mix(eobj->net); + + calculate_key(net_hmix, eobj->l_spi, eobj->l3num, &eobj->l_ip, + &eobj->r_ip, &key); + return calculate_hash(&key, len, seed); +} + +static u32 rtable_obj_hashfn(const void *data, u32 len, u32 seed) +{ + const union nf_inet_addr any = { .in6 = IN6ADDR_ANY_INIT }; + struct _esp_hkey key = {}; + const struct _esp_entry *eobj = (const struct _esp_entry *)data; + u32 net_hmix = net_hash_mix(eobj->net); + + calculate_key(net_hmix, eobj->r_spi, eobj->l3num, &any, &eobj->r_ip, &key); + return calculate_hash(&key, len, seed); +} + +static u32 incmpl_table_obj_hashfn(const void *data, u32 len, u32 seed) +{ + const union nf_inet_addr any = { .in6 = IN6ADDR_ANY_INIT }; + struct _esp_hkey key = {}; + const struct _esp_entry *eobj = (const struct _esp_entry *)data; + u32 net_hmix = net_hash_mix(eobj->net); + + calculate_key(net_hmix, 0, eobj->l3num, &any, &eobj->r_ip, &key); + return calculate_hash(&key, len, seed); +} + +static const struct rhashtable_params ltable_params = { + .key_len = sizeof(struct _esp_hkey), + .head_offset = offsetof(struct _esp_entry, lnode), + .hashfn = calculate_hash, + .obj_hashfn = ltable_obj_hashfn, + .obj_cmpfn = ltable_obj_cmpfn, +}; + +static const struct rhashtable_params rtable_params = { + .key_len = sizeof(struct _esp_hkey), + .head_offset = offsetof(struct _esp_entry, rnode), + .hashfn = calculate_hash, + .obj_hashfn = rtable_obj_hashfn, + .obj_cmpfn = rtable_obj_cmpfn, +}; + +static const struct rhashtable_params incmpl_rtable_params = { + .key_len = sizeof(struct _esp_hkey), + .head_offset = offsetof(struct _esp_entry, incmpl_rlist), + .hashfn = calculate_hash, + .obj_hashfn = incmpl_table_obj_hashfn, + .obj_cmpfn = incmpl_table_obj_cmpfn, +}; + +int nf_conntrack_esp_init(void) +{ + int ret; + + ret = rhashtable_init(&ltable, &ltable_params); + if (ret) + return ret; + + ret = rhashtable_init(&rtable, &rtable_params); + if (ret) + goto err_free_ltable; + + ret = rhltable_init(&incmpl_rtable, &incmpl_rtable_params); + if (ret) + goto err_free_rtable; + + return ret; + +err_free_rtable: + rhashtable_destroy(&rtable); +err_free_ltable: + rhashtable_destroy(&ltable); + + return ret; +} + +void nf_conntrack_esp_init_net(struct net *net) +{ + int i; + struct nf_esp_net *net_esp = esp_pernet(net); + + spin_lock_init(&net_esp->id_list_lock); + INIT_LIST_HEAD(&net_esp->id_list); + + for (i = 0; i < ESP_CT_MAX; i++) + net_esp->esp_timeouts[i] = esp_timeouts[i]; +} + +static struct _esp_entry *find_esp_entry_by_id(struct nf_esp_net *esp_net, int esp_id) +{ + struct list_head *pos, *head; + struct _esp_entry *esp_entry; + + head = &esp_net->id_list; + list_for_each(pos, head) { + esp_entry = list_entry(pos, struct _esp_entry, net_node); + if (esp_entry->esp_id == esp_id) + return esp_entry; + } + return NULL; +} + +static void free_esp_entry(struct nf_conntrack_net *cnet, struct _esp_entry *esp_entry) +{ + if (esp_entry) { + /* Remove from all the hash tables */ + pr_debug("Removing entry %x from all tables", esp_entry->esp_id); + list_del(&esp_entry->net_node); + rhashtable_remove_fast(&ltable, &esp_entry->lnode, ltable_params); + rhashtable_remove_fast(&rtable, &esp_entry->rnode, rtable_params); + rhltable_remove(&incmpl_rtable, &esp_entry->incmpl_rlist, incmpl_rtable_params); + clear_bit(esp_entry->esp_id - TEMP_SPI_START, cnet->esp_id_map); + kfree(esp_entry); + } +} + +/* Free an entry referred to by esp_id. + * + * NOTE: + * Per net linked list locking and unlocking is the responsibility of the calling function. + * Range checking is the responsibility of the calling function. + */ +static void free_esp_entry_by_id(struct net *net, int esp_id) +{ + struct nf_esp_net *esp_net = esp_pernet(net); + struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id); + struct _esp_entry *esp_entry = find_esp_entry_by_id(esp_net, esp_id); + + free_esp_entry(cnet, esp_entry); +} + +/* Allocate the first available IPSEC table entry. + * NOTE: This function may block on per net list lock. + */ +struct _esp_entry *alloc_esp_entry(struct net *net) +{ + struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id); + struct nf_esp_net *esp_net = esp_pernet(net); + struct _esp_entry *esp_entry; + int id; + +again: + id = find_first_zero_bit(cnet->esp_id_map, 1024); + if (id >= 1024) + return NULL; + + if (test_and_set_bit(id, cnet->esp_id_map)) + goto again; /* raced */ + + esp_entry = kmalloc(sizeof(*esp_entry), GFP_ATOMIC); + if (!esp_entry) { + clear_bit(id, cnet->esp_id_map); + return NULL; + } + + esp_entry->esp_id = id + TEMP_SPI_START; + esp_entry->alloc_time_jiffies = nfct_time_stamp; + esp_entry->net = net; + + spin_lock(&esp_net->id_list_lock); + list_add(&esp_entry->net_node, &esp_net->id_list); + spin_unlock(&esp_net->id_list_lock); + + return esp_entry; +} + +/* Search for an ESP entry in the initial state based on the IP address of + * the remote peer. + */ +static struct _esp_entry *search_esp_entry_init_remote(struct net *net, + u16 l3num, + const union nf_inet_addr *src_ip) +{ + const union nf_inet_addr any = { .in6 = IN6ADDR_ANY_INIT }; + u32 net_hmix = net_hash_mix(net); + struct _esp_entry *first_esp_entry = NULL; + struct _esp_entry *esp_entry; + struct _esp_hkey key = {}; + struct rhlist_head *pos, *list; + + calculate_key(net_hmix, 0, l3num, &any, src_ip, &key); + list = rhltable_lookup(&incmpl_rtable, (const void *)&key, incmpl_rtable_params); + rhl_for_each_entry_rcu(esp_entry, pos, list, incmpl_rlist) { + if (net_eq(net, esp_entry->net) && + l3num == esp_entry->l3num && + esp_ip_addr_equal(l3num, src_ip, &esp_entry->r_ip)) { + if (!first_esp_entry) { + first_esp_entry = esp_entry; + } else if (first_esp_entry->alloc_time_jiffies - esp_entry->alloc_time_jiffies <= 0) { + /* This entry is older than the last one found so treat this + * as a better match. + */ + first_esp_entry = esp_entry; + } + } + } + + if (first_esp_entry) { + if (first_esp_entry->l3num == AF_INET) { + pr_debug("Matches incmpl_rtable entry %x with l_spi %x r_ip %pI4\n", + first_esp_entry->esp_id, first_esp_entry->l_spi, + &first_esp_entry->r_ip.in); + } else { + pr_debug("Matches incmpl_rtable entry %x with l_spi %x r_ip %pI6\n", + first_esp_entry->esp_id, first_esp_entry->l_spi, + &first_esp_entry->r_ip.in6); + } + } + + return first_esp_entry; +} + +/* Search for an ESP entry by SPI, source and destination IP addresses. + * NOTE: This function may block on per net list lock. + */ +static struct _esp_entry *search_esp_entry_by_spi(struct net *net, const __u32 spi, + u16 l3num, + const union nf_inet_addr *src_ip, + const union nf_inet_addr *dst_ip) +{ + const union nf_inet_addr any = { .in6 = IN6ADDR_ANY_INIT }; + u32 net_hmix = net_hash_mix(net); + struct _esp_entry *esp_entry; + struct _esp_hkey key = {}; + + /* Check for matching established session or repeated initial LAN side */ + /* LAN side first */ + calculate_key(net_hmix, spi, l3num, src_ip, dst_ip, &key); + esp_entry = rhashtable_lookup_fast(&ltable, (const void *)&key, ltable_params); + if (esp_entry) { + /* When r_spi is set this is an established session. When not set it's + * a repeated initial packet from LAN side. But both cases are treated + * the same. + */ + if (esp_entry->l3num == AF_INET) { + pr_debug("Matches ltable entry %x with l_spi %x l_ip %pI4 r_ip %pI4\n", + esp_entry->esp_id, esp_entry->l_spi, + &esp_entry->l_ip.in, &esp_entry->r_ip.in); + } else { + pr_debug("Matches ltable entry %x with l_spi %x l_ip %pI6 r_ip %pI6\n", + esp_entry->esp_id, esp_entry->l_spi, + &esp_entry->l_ip.in6, &esp_entry->r_ip.in6); + } + return esp_entry; + } + + /* Established remote side */ + calculate_key(net_hmix, spi, l3num, &any, src_ip, &key); + esp_entry = rhashtable_lookup_fast(&rtable, (const void *)&key, rtable_params); + if (esp_entry) { + if (esp_entry->l3num == AF_INET) { + pr_debug("Matches rtable entry %x with l_spi %x r_spi %x l_ip %pI4 r_ip %pI4\n", + esp_entry->esp_id, esp_entry->l_spi, esp_entry->r_spi, + &esp_entry->l_ip.in, &esp_entry->r_ip.in); + } else { + pr_debug("Matches rtable entry %x with l_spi %x r_spi %x l_ip %pI6 r_ip %pI6\n", + esp_entry->esp_id, esp_entry->l_spi, esp_entry->r_spi, + &esp_entry->l_ip.in6, &esp_entry->r_ip.in6); + } + return esp_entry; + } + + /* Incomplete remote side, check if packet has a missing r_spi */ + esp_entry = search_esp_entry_init_remote(net, l3num, src_ip); + if (esp_entry) { + int err; + + esp_entry->r_spi = spi; + /* Remove entry from incmpl_rtable and add to rtable */ + rhltable_remove(&incmpl_rtable, &esp_entry->incmpl_rlist, incmpl_rtable_params); + /* Error will not be due to duplicate as established remote side lookup + * above would have found it. Delete entry. + */ + err = rhashtable_insert_fast(&rtable, &esp_entry->rnode, rtable_params); + if (err) { + struct nf_esp_net *esp_net = esp_pernet(net); + + spin_lock(&esp_net->id_list_lock); + free_esp_entry_by_id(net, esp_entry->esp_id); + spin_unlock(&esp_net->id_list_lock); + return NULL; + } + return esp_entry; + } + + if (l3num == AF_INET) { + pr_debug("No entry matches for spi %x src_ip %pI4 dst_ip %pI4\n", + spi, &src_ip->in, &dst_ip->in); + } else { + pr_debug("No entry matches for spi %x src_ip %pI6 dst_ip %pI6\n", + spi, &src_ip->in6, &dst_ip->in6); + } + return NULL; +} + +/* invert esp part of tuple */ +bool nf_conntrack_invert_esp_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig) +{ + tuple->dst.u.esp.id = orig->dst.u.esp.id; + tuple->src.u.esp.id = orig->src.u.esp.id; + return true; +} + +/* esp hdr info to tuple */ +bool esp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, + struct net *net, struct nf_conntrack_tuple *tuple) +{ + struct esphdr _esphdr, *esphdr; + struct _esp_entry *esp_entry; + u32 spi; + + esphdr = skb_header_pointer(skb, dataoff, sizeof(_esphdr), &_esphdr); + if (!esphdr) { + /* try to behave like "nf_conntrack_proto_generic" */ + tuple->src.u.all = 0; + tuple->dst.u.all = 0; + return true; + } + spi = ntohl(esphdr->spi); + + /* Check if esphdr already associated with a pre-existing connection: + * if no, create a new connection, missing the r_spi; + * if yes, check if we have seen the source IP: + * if no, fill in r_spi in the pre-existing connection. + */ + esp_entry = search_esp_entry_by_spi(net, spi, tuple->src.l3num, + &tuple->src.u3, &tuple->dst.u3); + if (!esp_entry) { + struct _esp_hkey key = {}; + const union nf_inet_addr any = { .in6 = IN6ADDR_ANY_INIT }; + u32 net_hmix = net_hash_mix(net); + struct nf_esp_net *esp_net = esp_pernet(net); + struct _esp_entry *esp_entry_old; + int err; + + esp_entry = alloc_esp_entry(net); + if (!esp_entry) { + pr_debug("All esp connection slots in use\n"); + return false; + } + esp_entry->l_spi = spi; + esp_entry->l3num = tuple->src.l3num; + esp_ip_addr_copy(esp_entry->l3num, &esp_entry->l_ip, &tuple->src.u3); + esp_ip_addr_copy(esp_entry->l3num, &esp_entry->r_ip, &tuple->dst.u3); + + /* Add entries to the hash tables */ + + calculate_key(net_hmix, esp_entry->l_spi, esp_entry->l3num, &esp_entry->l_ip, + &esp_entry->r_ip, &key); + esp_entry_old = rhashtable_lookup_get_insert_key(&ltable, &key, &esp_entry->lnode, + ltable_params); + if (esp_entry_old) { + spin_lock(&esp_net->id_list_lock); + + if (IS_ERR(esp_entry_old)) { + free_esp_entry_by_id(net, esp_entry->esp_id); + spin_unlock(&esp_net->id_list_lock); + return false; + } + + free_esp_entry_by_id(net, esp_entry->esp_id); + spin_unlock(&esp_net->id_list_lock); + + /* insertion raced, use existing entry */ + esp_entry = esp_entry_old; + } + /* esp_entry_old == NULL -- insertion successful */ + + calculate_key(net_hmix, 0, esp_entry->l3num, &any, &esp_entry->r_ip, &key); + err = rhltable_insert_key(&incmpl_rtable, (const void *)&key, + &esp_entry->incmpl_rlist, incmpl_rtable_params); + if (err) { + spin_lock(&esp_net->id_list_lock); + free_esp_entry_by_id(net, esp_entry->esp_id); + spin_unlock(&esp_net->id_list_lock); + return false; + } + + if (esp_entry->l3num == AF_INET) { + pr_debug("New entry %x with l_spi %x l_ip %pI4 r_ip %pI4\n", + esp_entry->esp_id, esp_entry->l_spi, + &esp_entry->l_ip.in, &esp_entry->r_ip.in); + } else { + pr_debug("New entry %x with l_spi %x l_ip %pI6 r_ip %pI6\n", + esp_entry->esp_id, esp_entry->l_spi, + &esp_entry->l_ip.in6, &esp_entry->r_ip.in6); + } + } + + tuple->dst.u.esp.id = esp_entry->esp_id; + tuple->src.u.esp.id = esp_entry->esp_id; + return true; +} + +#ifdef CONFIG_NF_CONNTRACK_PROCFS +/* print private data for conntrack */ +static void esp_print_conntrack(struct seq_file *s, struct nf_conn *ct) +{ + seq_printf(s, "l_spi=%x, r_spi=%x ", ct->proto.esp.l_spi, ct->proto.esp.r_spi); +} +#endif + +/* Returns verdict for packet, and may modify conntrack */ +int nf_conntrack_esp_packet(struct nf_conn *ct, struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state) +{ + int esp_id; + struct nf_conntrack_tuple *tuple; + unsigned int *timeouts = nf_ct_timeout_lookup(ct); + struct nf_esp_net *esp_net = esp_pernet(nf_ct_net(ct)); + + if (!timeouts) + timeouts = esp_net->esp_timeouts; + + /* If we've seen traffic both ways, this is some kind of ESP + * stream. Extend timeout. + */ + if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { + nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[ESP_CT_REPLIED]); + /* Also, more likely to be important, and not a probe */ + if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) { + /* Was originally IPCT_STATUS but this is no longer an option. + * GRE uses assured for same purpose + */ + nf_conntrack_event_cache(IPCT_ASSURED, ct); + + /* Retrieve SPIs of original and reply from esp_entry. + * Both directions should contain the same esp_entry, + * so just check the first one. + */ + tuple = nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL); + + esp_id = tuple->src.u.esp.id; + if (esp_id >= TEMP_SPI_START && esp_id <= TEMP_SPI_MAX) { + struct _esp_entry *esp_entry; + + spin_lock(&esp_net->id_list_lock); + esp_entry = find_esp_entry_by_id(esp_net, esp_id); + spin_unlock(&esp_net->id_list_lock); + + if (esp_entry) { + ct->proto.esp.l_spi = esp_entry->l_spi; + ct->proto.esp.r_spi = esp_entry->r_spi; + } + } + } + } else { + nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[ESP_CT_UNREPLIED]); + } + + return NF_ACCEPT; +} + +void nf_ct_esp_pernet_flush(struct net *net) +{ + struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id); + struct nf_esp_net *esp_net = esp_pernet(net); + struct list_head *pos, *tmp, *head = &esp_net->id_list; + struct _esp_entry *esp_entry; + + spin_lock(&esp_net->id_list_lock); + list_for_each_safe(pos, tmp, head) { + esp_entry = list_entry(pos, struct _esp_entry, net_node); + free_esp_entry(cnet, esp_entry); + } + spin_unlock(&esp_net->id_list_lock); +} + +/* Called when a conntrack entry has already been removed from the hashes + * and is about to be deleted from memory + */ +void destroy_esp_conntrack_entry(struct nf_conn *ct) +{ + struct nf_conntrack_tuple *tuple; + enum ip_conntrack_dir dir; + int esp_id; + struct net *net = nf_ct_net(ct); + struct nf_esp_net *esp_net = esp_pernet(net); + + /* Probably all the ESP entries referenced in this connection are the same, + * but the free function handles repeated frees, so best to do them all. + */ + for (dir = IP_CT_DIR_ORIGINAL; dir < IP_CT_DIR_MAX; dir++) { + tuple = nf_ct_tuple(ct, dir); + + spin_lock(&esp_net->id_list_lock); + + esp_id = tuple->src.u.esp.id; + if (esp_id >= TEMP_SPI_START && esp_id <= TEMP_SPI_MAX) + free_esp_entry_by_id(net, esp_id); + tuple->src.u.esp.id = 0; + + esp_id = tuple->dst.u.esp.id; + if (esp_id >= TEMP_SPI_START && esp_id <= TEMP_SPI_MAX) + free_esp_entry_by_id(net, esp_id); + tuple->dst.u.esp.id = 0; + + spin_unlock(&esp_net->id_list_lock); + } +} + +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) + +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nfnetlink_conntrack.h> + +static int esp_tuple_to_nlattr(struct sk_buff *skb, + const struct nf_conntrack_tuple *t) +{ + if (nla_put_be16(skb, CTA_PROTO_SRC_ESP_ID, t->src.u.esp.id) || + nla_put_be16(skb, CTA_PROTO_DST_ESP_ID, t->dst.u.esp.id)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static const struct nla_policy esp_nla_policy[CTA_PROTO_MAX + 1] = { + [CTA_PROTO_SRC_ESP_ID] = { .type = NLA_U16 }, + [CTA_PROTO_DST_ESP_ID] = { .type = NLA_U16 }, +}; + +static int esp_nlattr_to_tuple(struct nlattr *tb[], + struct nf_conntrack_tuple *t, + u32 flags) +{ + if (flags & CTA_FILTER_FLAG(CTA_PROTO_SRC_ESP_ID)) { + if (!tb[CTA_PROTO_SRC_ESP_ID]) + return -EINVAL; + + t->src.u.esp.id = nla_get_be16(tb[CTA_PROTO_SRC_ESP_ID]); + } + + if (flags & CTA_FILTER_FLAG(CTA_PROTO_DST_ESP_ID)) { + if (!tb[CTA_PROTO_DST_ESP_ID]) + return -EINVAL; + + t->dst.u.esp.id = nla_get_be16(tb[CTA_PROTO_DST_ESP_ID]); + } + + return 0; +} + +static unsigned int esp_nlattr_tuple_size(void) +{ + return nla_policy_len(esp_nla_policy, CTA_PROTO_MAX + 1); +} +#endif + +/* protocol helper struct */ +const struct nf_conntrack_l4proto nf_conntrack_l4proto_esp = { + .l4proto = IPPROTO_ESP, +#ifdef CONFIG_NF_CONNTRACK_PROCFS + .print_conntrack = esp_print_conntrack, +#endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) + .tuple_to_nlattr = esp_tuple_to_nlattr, + .nlattr_tuple_size = esp_nlattr_tuple_size, + .nlattr_to_tuple = esp_nlattr_to_tuple, + .nla_policy = esp_nla_policy, +#endif +}; diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index c6c0cb465664..7922ff6cf5a4 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -88,6 +88,14 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, ntohs(tuple->src.u.gre.key), ntohs(tuple->dst.u.gre.key)); break; + case IPPROTO_ESP: + /* Both src and dest esp.id should be equal but showing both + * will help find errors. + */ + seq_printf(s, "srcid=0x%x dstid=0x%x ", + ntohs(tuple->src.u.esp.id), + ntohs(tuple->dst.u.esp.id)); + break; default: break; } diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 832ae64179f0..4fd8956aec65 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -19,7 +19,9 @@ #define CTA_FILTER_F_CTA_PROTO_ICMPV6_TYPE (1 << 9) #define CTA_FILTER_F_CTA_PROTO_ICMPV6_CODE (1 << 10) #define CTA_FILTER_F_CTA_PROTO_ICMPV6_ID (1 << 11) -#define CTA_FILTER_F_MAX (1 << 12) +#define CTA_FILTER_F_CTA_PROTO_SRC_ESP_ID (1 << 12) +#define CTA_FILTER_F_CTA_PROTO_DST_ESP_ID (1 << 13) +#define CTA_FILTER_F_MAX (1 << 14) #define CTA_FILTER_F_ALL (CTA_FILTER_F_MAX-1) #define CTA_FILTER_FLAG(ctattr) CTA_FILTER_F_ ## ctattr diff --git a/tools/testing/selftests/netfilter/conntrack_esp_related.sh b/tools/testing/selftests/netfilter/conntrack_esp_related.sh new file mode 100755 index 000000000000..88b0f164664f --- /dev/null +++ b/tools/testing/selftests/netfilter/conntrack_esp_related.sh @@ -0,0 +1,268 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# <:copyright-gpl +# Copyright (C) 2021 Allied Telesis Labs NZ +# +# check that related ESP connections are tracked via spi. +# +# Setup is: +# +# nsclient3(veth0) -> (veth2) +# (br0)nsrouter1(veth1) -> (veth1)nsrouter2 -> (veth0)nsclient2 +# nsclient1(veth0) -> (veth0) +# Setup xfrm esp connections for IPv4 and IPv6 and check they are tracked. +# +# In addition, nsrouter1 will perform IP masquerading. If nsrouter1 does not support esp +# connection tracking, it will be unable to tell the difference between packets from nsclient2 to +# either nsclient1 or nsclient3. +# +# ESP connections (for IPv6) need to use tunnel mode, as ICMPv6 computes checksum over encapsulating +# IP header addresses. + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 +ns_all="nsclient1 nsclient3 nsrouter1 nsrouter2 nsclient2" + +conntrack -V > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without conntrack tool" + exit $ksft_skip +fi + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ipv4() { + echo -n 192.168.$1.$2 +} + +ipv6 () { + echo -n dead:$1::$2 +} + +cleanup() { + for n in $ns_all; do ip netns del $n;done +} + +check_counter() +{ + local ns_name=$1 + local name="unknown" + local expect="packets 0 bytes 0" + local lret=0 + + cnt=$(ip netns exec $ns_name nft list counter inet filter "$name" | grep -q "$expect") + if [ $? -ne 0 ]; then + echo "ERROR: counter $name in $ns_name has unexpected value (expected $expect)" 1>&2 + ip netns exec $ns_name nft list counter inet filter "$name" 1>&2 + lret=1 + fi + return $lret +} + +check_unknown() +{ + for n in nsrouter1 nsrouter2; do + check_counter $n + if [ $? -ne 0 ] ;then + return 1 + fi + done + return 0 +} + +check_conntrack() +{ + local ret=0 + + for p in ipv4 ipv6; do + cnt=$(ip netns exec nsrouter1 conntrack -f $p -L 2>&1) + # Check tracked connection was esp by port (conntrack shows unknown at the moment) + local num=$(echo -e "$cnt" | grep -cE "[a-zA-Z]+ +50") + if [ $? -ne 0 ] || [ "x$num" != "x2" ]; then + echo -e "ERROR: expect to see two conntrack esp flows for $p:\n $cnt" 1>&2 + ret=1 + fi + done + return $ret +} + +for n in $ns_all; do + ip netns add $n + ip -net $n link set lo up +done + +ip link add veth0 netns nsclient1 type veth peer name veth0 netns nsrouter1 +ip link add veth0 netns nsclient3 type veth peer name veth2 netns nsrouter1 +ip link add br0 netns nsrouter1 type bridge +ip -net nsrouter1 link set veth0 master br0 +ip -net nsrouter1 link set veth2 master br0 +ip link add veth1 netns nsrouter1 type veth peer name veth1 netns nsrouter2 +ip link add veth0 netns nsrouter2 type veth peer name veth0 netns nsclient2 + +for n in $ns_all; do + ip -net $n link set veth0 up +done +ip -net nsrouter1 link set veth1 up +ip -net nsrouter1 link set veth2 up +ip -net nsrouter1 link set br0 up +ip -net nsrouter2 link set veth1 up + +for i in 1 2; do + ip -net nsclient$i addr add $(ipv4 $i 2)/24 dev veth0 + ip -net nsclient$i addr add $(ipv6 $i 2)/64 dev veth0 + ip -net nsclient$i route add default via $(ipv4 $i 1) + ip -net nsclient$i -6 route add default via $(ipv6 $i 1) + + ip -net nsrouter$i addr add $(ipv4 3 $i)/24 dev veth1 + ip -net nsrouter$i addr add $(ipv6 3 $i)/64 dev veth1 +done +ip -net nsrouter1 addr add $(ipv4 1 1)/24 dev br0 +ip -net nsrouter1 addr add $(ipv6 1 1)/64 dev br0 +ip -net nsrouter2 addr add $(ipv4 2 1)/24 dev veth0 +ip -net nsrouter2 addr add $(ipv6 2 1)/64 dev veth0 + +ip -net nsclient3 addr add $(ipv4 1 3)/24 dev veth0 +ip -net nsclient3 addr add $(ipv6 1 3)/64 dev veth0 +ip -net nsclient3 route add default via $(ipv4 1 1) +ip -net nsclient3 -6 route add default via $(ipv6 1 1) + +ip -net nsrouter1 route add default via $(ipv4 3 2) +ip -net nsrouter1 -6 route add default via $(ipv6 3 2) +ip -net nsrouter2 route add default via $(ipv4 3 1) +ip -net nsrouter2 -6 route add default via $(ipv6 3 1) + +for i in 1 2; do + ip netns exec nsrouter$i sysctl -q net.ipv4.conf.all.forwarding=1 + ip netns exec nsrouter$i sysctl -q net.ipv6.conf.all.forwarding=1 +done + +for i in 1 2; do + ip netns exec nsrouter$i nft -f - <<-EOF + table inet filter { + counter unknown { } + chain forward { + type filter hook forward priority 0; policy accept; + meta l4proto esp ct state new,established accept + counter name "unknown" accept + } + } + EOF +done + +for i in 1 2; do + ip netns exec nsrouter1 nft -f - <<-EOF + table ip nat { + chain postrouting { + type nat hook postrouting priority 0; policy accept; + oifname "veth1" counter masquerade + } + } + table ip6 nat { + chain postrouting { + type nat hook postrouting priority 0; policy accept; + oifname "veth1" counter masquerade + } + } + EOF +done +sleep 2 + +ip_tunnel() { + ip -net nsclient$2 tunnel add tunnel$1 mode vti${1%4} local $3 remote $4 key 0x$1 + ip -net nsclient$2 link set tunnel$1 up +} + +ip_xfrm() { + ip -net nsclient$2 xfrm state add src $4 dst $5 \ + proto esp spi 0x$1$2$3 mode tunnel mark 0x$1 \ + sel src $6 dst $7 \ + auth-trunc 'hmac(sha256)' \ + 0x0000000000000000000000000000000000000000000000000000000000000$1$2$3 128 \ + enc 'cbc(aes)' \ + 0x0000000000000000000000000000000000000000000000000000000000000$1$2$3 + + ip -net nsclient$2 xfrm state add src $5 dst $4 \ + proto esp spi 0x$1$3$2 mode tunnel mark 0x$1 \ + sel src $7 dst $6 \ + auth-trunc 'hmac(sha256)' \ + 0x0000000000000000000000000000000000000000000000000000000000000$1$3$2 128 \ + enc 'cbc(aes)' \ + 0x0000000000000000000000000000000000000000000000000000000000000$1$3$2 + + ip -net nsclient$2 xfrm policy add src $7 dst $6 dir in mark 0x$1 \ + tmpl src $5 dst $4 proto esp mode tunnel + ip -net nsclient$2 xfrm policy add src $6 dst $7 dir out mark 0x$1 \ + tmpl src $4 dst $5 proto esp mode tunnel +} + +ip_tunnel 4 1 $(ipv4 1 2) $(ipv4 2 2) +ip -net nsclient1 addr add $(ipv4 250 1)/24 dev tunnel4 +ip_xfrm 4 1 2 $(ipv4 1 2) $(ipv4 2 2) $(ipv4 250 1) $(ipv4 250 2) + +ip_tunnel 4 3 $(ipv4 1 3) $(ipv4 2 2) +ip -net nsclient3 addr add $(ipv4 251 1)/24 dev tunnel4 +ip_xfrm 4 3 2 $(ipv4 1 3) $(ipv4 2 2) $(ipv4 251 1) $(ipv4 251 2) + +ip_tunnel 4 2 $(ipv4 2 2) $(ipv4 3 1) +ip -net nsclient2 addr add $(ipv4 250 2)/24 dev tunnel4 +ip -net nsclient2 addr add $(ipv4 251 2)/24 dev tunnel4 +ip_xfrm 4 2 1 $(ipv4 2 2) $(ipv4 3 1) $(ipv4 250 2) $(ipv4 250 1) +ip_xfrm 4 2 3 $(ipv4 2 2) $(ipv4 3 1) $(ipv4 251 2) $(ipv4 251 1) + + +ip_tunnel 6 1 $(ipv6 1 2) $(ipv6 2 2) +ip -net nsclient1 addr add $(ipv6 250 1)/64 dev tunnel6 +ip_xfrm 6 1 2 $(ipv6 1 2) $(ipv6 2 2) $(ipv6 250 1) $(ipv6 250 2) + +ip_tunnel 6 3 $(ipv6 1 3) $(ipv6 2 2) +ip -net nsclient3 addr add $(ipv6 251 1)/64 dev tunnel6 +ip_xfrm 6 3 2 $(ipv6 1 3) $(ipv6 2 2) $(ipv6 251 1) $(ipv6 251 2) + +ip_tunnel 6 2 $(ipv6 2 2) $(ipv6 3 1) +ip -net nsclient2 addr add $(ipv6 250 2)/64 dev tunnel6 +ip -net nsclient2 addr add $(ipv6 251 2)/64 dev tunnel6 +ip_xfrm 6 2 1 $(ipv6 2 2) $(ipv6 3 1) $(ipv6 250 2) $(ipv6 250 1) +ip_xfrm 6 2 3 $(ipv6 2 2) $(ipv6 3 1) $(ipv6 251 2) $(ipv6 251 1) + +test_ping() { + ip netns exec $1 ping -q -c 1 $2 >/dev/null 2>&1 + if [ $? -ne 0 ]; then + echo "ERROR: netns ip routing/connectivity broken from $1 to $2" 1>&2 + fi +} + +test_ping nsclient1 $(ipv4 250 2) +test_ping nsclient3 $(ipv4 251 2) +test_ping nsclient1 $(ipv6 250 2) +test_ping nsclient3 $(ipv6 251 2) + +check_conntrack +if [ $? -ne 0 ]; then + ret=1 +fi + +check_unknown +if [ $? -ne 0 ]; then + ret=1 +fi + +if [ $ret -eq 0 ];then + echo "PASS: ESP connections were tracked via SPIs" +else + echo "ERROR: ESP connections were not tracked" +fi + +cleanup +exit $ret -- 2.31.1

4 years, 1 month

2
2
0 0

[RFC v2 0/4] kunit: tool: add support for QEMU

by Brendan Higgins

TL;DR: Add support to kunit_tool to dispatch tests via QEMU. Also add support to immediately shutdown a kernel after running KUnit tests. Background ---------- KUnit has supported running on all architectures for quite some time; however, kunit_tool - the script commonly used to invoke KUnit tests - has only fully supported KUnit run on UML. Its functionality has been broken up for some time to separate the configure, build, run, and parse phases making it possible to be used in part on other architectures to a small extent. Nevertheless, kunit_tool has not supported running tests on other architectures. What this patchset does ----------------------- This patchset introduces first class support to kunit_tool for KUnit to be run on many popular architectures via QEMU. It does this by adding two new flags: `--arch` and `--cross_compile`. `--arch` allows an architecture to be specified by the name the architecture is given in `arch/`. It uses the specified architecture to select a minimal amount of Kconfigs and QEMU configs needed for the architecture to run in QEMU and provide a console from which KTAP results can be scraped. `--cross_compile` allows a toolchain prefix to be specified to make similar to how `CROSS_COMPILE` is used. Additionally, this patchset revives the previously considered "kunit: tool: add support for QEMU"[1] patchs. The motivation for this new kernel command line flags, `kunit_shutdown`, is to better support running KUnit tests inside of QEMU. For most popular architectures, QEMU can be made to terminate when the Linux kernel that is being run is reboted, halted, or powered off. As Kees pointed out in a previous discussion[2], it is possible to make a kernel initrd that can reboot the kernel immediately, doing this for every architecture would likely be infeasible. Instead, just having an option for the kernel to shutdown when it is done with testing seems a lot simpler, especially since it is an option which would only available in testing configurations of the kernel anyway. What discussion remains for this patchset? ------------------------------------------ The first most obvious thing is settling the debate about `kunit_shutdown`. If I recall correctly, Kees suggested that it might be better to just add a new initrd; however, as I mentioned above, now to support many new architectures, it may be substantially easier to support this option. So I am hoping with this new usecase, the argument for `kunit_shutdown` will be more compelling. The second and likely harder issue is figuring out the best way to configure and provide configs for running KUnit tests via QEMU. I provide a pretty primitive way in this patchset which is not super flexible; for example, for our PPC support we have it set to build big endian, and POWER8 - we currently don't support a way to change that. Nevertheless, having sensible defaults is handy too, so we will probably want to have some support for overriding defaults, while still being able to have defaults. [1] http://patches.linaro.org/patch/208336/ [2] https://lkml.org/lkml/2020/6/26/988 Brendan Higgins (3): Documentation: Add kunit_shutdown to kernel-parameters.txt kunit: tool: add support for QEMU Documentation: kunit: document support for QEMU in kunit_tool David Gow (1): kunit: Add 'kunit_shutdown' option .../admin-guide/kernel-parameters.txt | 8 + Documentation/dev-tools/kunit/usage.rst | 37 +++- lib/kunit/executor.c | 20 ++ tools/testing/kunit/kunit.py | 33 ++- tools/testing/kunit/kunit_config.py | 2 +- tools/testing/kunit/kunit_kernel.py | 209 +++++++++++++++--- tools/testing/kunit/kunit_parser.py | 2 +- tools/testing/kunit/kunit_tool_test.py | 15 +- 8 files changed, 278 insertions(+), 48 deletions(-) base-commit: 7af08140979a6e7e12b78c93b8625c8d25b084e2 -- 2.31.1.498.g6c1eba8ee3d-goog

4 years, 1 month

3
11
0 0

[PATCH] lib/test: convert test_overflow.c to use KUnit

by Daniel Latypov

This largely consists of: * replacing `err |= test_func(...)` calls with `test_func(test, ...)`. * changing pr_warn to KUNIT_*_MSG() macros to mark the failed test while printing a diagnostic message * dropping __init as there are concerns of this causing issues w/ KUnit when running on some other arches. With the exception of now needing to set CONFIG_KUNIT=y, the test can still be run as before, at boot time or as a test module. But now the output of the test will be TAP (Test Anything Protocol), the format used by kselftest instead of just warnings and printing "FAIL!" , see [1]. But now the test can also be run more quickly via kunit.py $ ./tools/testing/kunit/kunit.py run --kunitconfig /dev/stdin <<EOF CONFIG_KUNIT=y CONFIG_TEST_OVERFLOW=y EOF Note: after a mrproper this took > Elapsed time: 38.683s total, 3.598s configuring, 32.141s building, 0.000s running (Normally would be less clunky as you wouldn't use a heredoc). [1] Here's an example failure message after tweaking the test array: TAP version 14 1..1 # Subtest: lib_overflow 1..10 u8 : 18 arithmetic tests # test_u8_overflow: EXPECTATION FAILED at lib/test_overflow.c:250 Expected _got == p->sum, but _got == 1 p->sum == 0 wrong result for 1 + 0 (type u8) ... not ok 1 - test_u8_overflow s8 : 19 arithmetic tests ok 2 - test_s8_overflow ... ok: (s32)(0 << 31) == 0 ok: (s64)(0 << 63) == 0 ok 9 - test_overflow_shift Signed-off-by: Daniel Latypov <dlatypov(a)google.com> --- lib/Kconfig.debug | 4 +- lib/test_overflow.c | 406 ++++++++++++++++++++------------------------ 2 files changed, 185 insertions(+), 225 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 678c13967580..8e19ba9ac22c 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2195,7 +2195,9 @@ config TEST_XARRAY tristate "Test the XArray code at runtime" config TEST_OVERFLOW - tristate "Test check_*_overflow() functions at runtime" + tristate "Test check_*_overflow() functions at runtime" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS config TEST_RHASHTABLE tristate "Perform selftest on resizable hash table" diff --git a/lib/test_overflow.c b/lib/test_overflow.c index 7a4b6f6c5473..1719b924f04f 100644 --- a/lib/test_overflow.c +++ b/lib/test_overflow.c @@ -2,10 +2,9 @@ /* * Test cases for arithmetic overflow checks. */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <kunit/test.h> #include <linux/device.h> -#include <linux/init.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/module.h> @@ -19,7 +18,7 @@ t a, b; \ t sum, diff, prod; \ bool s_of, d_of, p_of; \ - } t ## _tests[] __initconst + } t ## _tests[] DEFINE_TEST_ARRAY(u8) = { {0, 0, 0, 0, 0, false, false, false}, @@ -215,48 +214,38 @@ DEFINE_TEST_ARRAY(s64) = { {0, -S64_MAX, -S64_MAX, S64_MAX, 0, false, false, false}, }; -#define check_one_op(t, fmt, op, sym, a, b, r, of) do { \ - t _r; \ - bool _of; \ - \ - _of = check_ ## op ## _overflow(a, b, &_r); \ - if (_of != of) { \ - pr_warn("expected "fmt" "sym" "fmt \ - " to%s overflow (type %s)\n", \ - a, b, of ? "" : " not", #t); \ - err = 1; \ - } \ - if (_r != r) { \ - pr_warn("expected "fmt" "sym" "fmt" == " \ - fmt", got "fmt" (type %s)\n", \ - a, b, r, _r, #t); \ - err = 1; \ - } \ +#define check_one_op(t, fmt, op, sym, a, b, want, of) do { \ + t _got; \ + bool _of; \ + \ + _of = check_ ## op ## _overflow(a, b, &_got); \ + KUNIT_EXPECT_EQ_MSG(test, _of, of, \ + "expected " fmt " " sym " " fmt " to%s overflow " \ + "(type %s)", a, b, of ? "" : " not", #t); \ + \ + KUNIT_EXPECT_EQ_MSG(test, _got, want, \ + "wrong result for " fmt " " sym " " fmt " (type %s)", \ + a, b, #t); \ } while (0) #define DEFINE_TEST_FUNC(t, fmt) \ -static int __init do_test_ ## t(const struct test_ ## t *p) \ -{ \ - int err = 0; \ - \ +static void do_test_ ## t(struct kunit *test, const struct test_ ## t *p)\ +{ \ check_one_op(t, fmt, add, "+", p->a, p->b, p->sum, p->s_of); \ check_one_op(t, fmt, add, "+", p->b, p->a, p->sum, p->s_of); \ check_one_op(t, fmt, sub, "-", p->a, p->b, p->diff, p->d_of); \ check_one_op(t, fmt, mul, "*", p->a, p->b, p->prod, p->p_of); \ check_one_op(t, fmt, mul, "*", p->b, p->a, p->prod, p->p_of); \ - \ - return err; \ } \ \ -static int __init test_ ## t ## _overflow(void) { \ - int err = 0; \ +static void test_ ## t ## _overflow(struct kunit *test) \ +{ \ unsigned i; \ \ pr_info("%-3s: %zu arithmetic tests\n", #t, \ ARRAY_SIZE(t ## _tests)); \ for (i = 0; i < ARRAY_SIZE(t ## _tests); ++i) \ - err |= do_test_ ## t(&t ## _tests[i]); \ - return err; \ + do_test_ ## t(test, &t ## _tests[i]); \ } DEFINE_TEST_FUNC(u8, "%d"); @@ -270,30 +259,10 @@ DEFINE_TEST_FUNC(u64, "%llu"); DEFINE_TEST_FUNC(s64, "%lld"); #endif -static int __init test_overflow_calculation(void) +static void test_overflow_shift(struct kunit *test) { - int err = 0; - - err |= test_u8_overflow(); - err |= test_s8_overflow(); - err |= test_u16_overflow(); - err |= test_s16_overflow(); - err |= test_u32_overflow(); - err |= test_s32_overflow(); -#if BITS_PER_LONG == 64 - err |= test_u64_overflow(); - err |= test_s64_overflow(); -#endif - - return err; -} - -static int __init test_overflow_shift(void) -{ - int err = 0; - /* Args are: value, shift, type, expected result, overflow expected */ -#define TEST_ONE_SHIFT(a, s, t, expect, of) ({ \ +#define TEST_ONE_SHIFT(a, s, t, expect, of) do { \ int __failed = 0; \ typeof(a) __a = (a); \ typeof(s) __s = (s); \ @@ -316,153 +285,153 @@ static int __init test_overflow_shift(void) if (!__failed) \ pr_info("ok: (%s)(%s << %s) == %s\n", #t, #a, #s, \ of ? "overflow" : #expect); \ - __failed; \ -}) + KUNIT_EXPECT_EQ_MSG(test, __failed, 0, \ + "(%s)(%s << %s) failed", #t, #a, #s); \ +} while (0) /* Sane shifts. */ - err |= TEST_ONE_SHIFT(1, 0, u8, 1 << 0, false); - err |= TEST_ONE_SHIFT(1, 4, u8, 1 << 4, false); - err |= TEST_ONE_SHIFT(1, 7, u8, 1 << 7, false); - err |= TEST_ONE_SHIFT(0xF, 4, u8, 0xF << 4, false); - err |= TEST_ONE_SHIFT(1, 0, u16, 1 << 0, false); - err |= TEST_ONE_SHIFT(1, 10, u16, 1 << 10, false); - err |= TEST_ONE_SHIFT(1, 15, u16, 1 << 15, false); - err |= TEST_ONE_SHIFT(0xFF, 8, u16, 0xFF << 8, false); - err |= TEST_ONE_SHIFT(1, 0, int, 1 << 0, false); - err |= TEST_ONE_SHIFT(1, 16, int, 1 << 16, false); - err |= TEST_ONE_SHIFT(1, 30, int, 1 << 30, false); - err |= TEST_ONE_SHIFT(1, 0, s32, 1 << 0, false); - err |= TEST_ONE_SHIFT(1, 16, s32, 1 << 16, false); - err |= TEST_ONE_SHIFT(1, 30, s32, 1 << 30, false); - err |= TEST_ONE_SHIFT(1, 0, unsigned int, 1U << 0, false); - err |= TEST_ONE_SHIFT(1, 20, unsigned int, 1U << 20, false); - err |= TEST_ONE_SHIFT(1, 31, unsigned int, 1U << 31, false); - err |= TEST_ONE_SHIFT(0xFFFFU, 16, unsigned int, 0xFFFFU << 16, false); - err |= TEST_ONE_SHIFT(1, 0, u32, 1U << 0, false); - err |= TEST_ONE_SHIFT(1, 20, u32, 1U << 20, false); - err |= TEST_ONE_SHIFT(1, 31, u32, 1U << 31, false); - err |= TEST_ONE_SHIFT(0xFFFFU, 16, u32, 0xFFFFU << 16, false); - err |= TEST_ONE_SHIFT(1, 0, u64, 1ULL << 0, false); - err |= TEST_ONE_SHIFT(1, 40, u64, 1ULL << 40, false); - err |= TEST_ONE_SHIFT(1, 63, u64, 1ULL << 63, false); - err |= TEST_ONE_SHIFT(0xFFFFFFFFULL, 32, u64, - 0xFFFFFFFFULL << 32, false); + TEST_ONE_SHIFT(1, 0, u8, 1 << 0, false); + TEST_ONE_SHIFT(1, 4, u8, 1 << 4, false); + TEST_ONE_SHIFT(1, 7, u8, 1 << 7, false); + TEST_ONE_SHIFT(0xF, 4, u8, 0xF << 4, false); + TEST_ONE_SHIFT(1, 0, u16, 1 << 0, false); + TEST_ONE_SHIFT(1, 10, u16, 1 << 10, false); + TEST_ONE_SHIFT(1, 15, u16, 1 << 15, false); + TEST_ONE_SHIFT(0xFF, 8, u16, 0xFF << 8, false); + TEST_ONE_SHIFT(1, 0, int, 1 << 0, false); + TEST_ONE_SHIFT(1, 16, int, 1 << 16, false); + TEST_ONE_SHIFT(1, 30, int, 1 << 30, false); + TEST_ONE_SHIFT(1, 0, s32, 1 << 0, false); + TEST_ONE_SHIFT(1, 16, s32, 1 << 16, false); + TEST_ONE_SHIFT(1, 30, s32, 1 << 30, false); + TEST_ONE_SHIFT(1, 0, unsigned int, 1U << 0, false); + TEST_ONE_SHIFT(1, 20, unsigned int, 1U << 20, false); + TEST_ONE_SHIFT(1, 31, unsigned int, 1U << 31, false); + TEST_ONE_SHIFT(0xFFFFU, 16, unsigned int, 0xFFFFU << 16, false); + TEST_ONE_SHIFT(1, 0, u32, 1U << 0, false); + TEST_ONE_SHIFT(1, 20, u32, 1U << 20, false); + TEST_ONE_SHIFT(1, 31, u32, 1U << 31, false); + TEST_ONE_SHIFT(0xFFFFU, 16, u32, 0xFFFFU << 16, false); + TEST_ONE_SHIFT(1, 0, u64, 1ULL << 0, false); + TEST_ONE_SHIFT(1, 40, u64, 1ULL << 40, false); + TEST_ONE_SHIFT(1, 63, u64, 1ULL << 63, false); + TEST_ONE_SHIFT(0xFFFFFFFFULL, 32, u64, 0xFFFFFFFFULL << 32, false); /* Sane shift: start and end with 0, without a too-wide shift. */ - err |= TEST_ONE_SHIFT(0, 7, u8, 0, false); - err |= TEST_ONE_SHIFT(0, 15, u16, 0, false); - err |= TEST_ONE_SHIFT(0, 31, unsigned int, 0, false); - err |= TEST_ONE_SHIFT(0, 31, u32, 0, false); - err |= TEST_ONE_SHIFT(0, 63, u64, 0, false); + TEST_ONE_SHIFT(0, 7, u8, 0, false); + TEST_ONE_SHIFT(0, 15, u16, 0, false); + TEST_ONE_SHIFT(0, 31, unsigned int, 0, false); + TEST_ONE_SHIFT(0, 31, u32, 0, false); + TEST_ONE_SHIFT(0, 63, u64, 0, false); /* Sane shift: start and end with 0, without reaching signed bit. */ - err |= TEST_ONE_SHIFT(0, 6, s8, 0, false); - err |= TEST_ONE_SHIFT(0, 14, s16, 0, false); - err |= TEST_ONE_SHIFT(0, 30, int, 0, false); - err |= TEST_ONE_SHIFT(0, 30, s32, 0, false); - err |= TEST_ONE_SHIFT(0, 62, s64, 0, false); + TEST_ONE_SHIFT(0, 6, s8, 0, false); + TEST_ONE_SHIFT(0, 14, s16, 0, false); + TEST_ONE_SHIFT(0, 30, int, 0, false); + TEST_ONE_SHIFT(0, 30, s32, 0, false); + TEST_ONE_SHIFT(0, 62, s64, 0, false); /* Overflow: shifted the bit off the end. */ - err |= TEST_ONE_SHIFT(1, 8, u8, 0, true); - err |= TEST_ONE_SHIFT(1, 16, u16, 0, true); - err |= TEST_ONE_SHIFT(1, 32, unsigned int, 0, true); - err |= TEST_ONE_SHIFT(1, 32, u32, 0, true); - err |= TEST_ONE_SHIFT(1, 64, u64, 0, true); + TEST_ONE_SHIFT(1, 8, u8, 0, true); + TEST_ONE_SHIFT(1, 16, u16, 0, true); + TEST_ONE_SHIFT(1, 32, unsigned int, 0, true); + TEST_ONE_SHIFT(1, 32, u32, 0, true); + TEST_ONE_SHIFT(1, 64, u64, 0, true); /* Overflow: shifted into the signed bit. */ - err |= TEST_ONE_SHIFT(1, 7, s8, 0, true); - err |= TEST_ONE_SHIFT(1, 15, s16, 0, true); - err |= TEST_ONE_SHIFT(1, 31, int, 0, true); - err |= TEST_ONE_SHIFT(1, 31, s32, 0, true); - err |= TEST_ONE_SHIFT(1, 63, s64, 0, true); + TEST_ONE_SHIFT(1, 7, s8, 0, true); + TEST_ONE_SHIFT(1, 15, s16, 0, true); + TEST_ONE_SHIFT(1, 31, int, 0, true); + TEST_ONE_SHIFT(1, 31, s32, 0, true); + TEST_ONE_SHIFT(1, 63, s64, 0, true); /* Overflow: high bit falls off unsigned types. */ /* 10010110 */ - err |= TEST_ONE_SHIFT(150, 1, u8, 0, true); + TEST_ONE_SHIFT(150, 1, u8, 0, true); /* 1000100010010110 */ - err |= TEST_ONE_SHIFT(34966, 1, u16, 0, true); + TEST_ONE_SHIFT(34966, 1, u16, 0, true); /* 10000100000010001000100010010110 */ - err |= TEST_ONE_SHIFT(2215151766U, 1, u32, 0, true); - err |= TEST_ONE_SHIFT(2215151766U, 1, unsigned int, 0, true); + TEST_ONE_SHIFT(2215151766U, 1, u32, 0, true); + TEST_ONE_SHIFT(2215151766U, 1, unsigned int, 0, true); /* 1000001000010000010000000100000010000100000010001000100010010110 */ - err |= TEST_ONE_SHIFT(9372061470395238550ULL, 1, u64, 0, true); + TEST_ONE_SHIFT(9372061470395238550ULL, 1, u64, 0, true); /* Overflow: bit shifted into signed bit on signed types. */ /* 01001011 */ - err |= TEST_ONE_SHIFT(75, 1, s8, 0, true); + TEST_ONE_SHIFT(75, 1, s8, 0, true); /* 0100010001001011 */ - err |= TEST_ONE_SHIFT(17483, 1, s16, 0, true); + TEST_ONE_SHIFT(17483, 1, s16, 0, true); /* 01000010000001000100010001001011 */ - err |= TEST_ONE_SHIFT(1107575883, 1, s32, 0, true); - err |= TEST_ONE_SHIFT(1107575883, 1, int, 0, true); + TEST_ONE_SHIFT(1107575883, 1, s32, 0, true); + TEST_ONE_SHIFT(1107575883, 1, int, 0, true); /* 0100000100001000001000000010000001000010000001000100010001001011 */ - err |= TEST_ONE_SHIFT(4686030735197619275LL, 1, s64, 0, true); + TEST_ONE_SHIFT(4686030735197619275LL, 1, s64, 0, true); /* Overflow: bit shifted past signed bit on signed types. */ /* 01001011 */ - err |= TEST_ONE_SHIFT(75, 2, s8, 0, true); + TEST_ONE_SHIFT(75, 2, s8, 0, true); /* 0100010001001011 */ - err |= TEST_ONE_SHIFT(17483, 2, s16, 0, true); + TEST_ONE_SHIFT(17483, 2, s16, 0, true); /* 01000010000001000100010001001011 */ - err |= TEST_ONE_SHIFT(1107575883, 2, s32, 0, true); - err |= TEST_ONE_SHIFT(1107575883, 2, int, 0, true); + TEST_ONE_SHIFT(1107575883, 2, s32, 0, true); + TEST_ONE_SHIFT(1107575883, 2, int, 0, true); /* 0100000100001000001000000010000001000010000001000100010001001011 */ - err |= TEST_ONE_SHIFT(4686030735197619275LL, 2, s64, 0, true); + TEST_ONE_SHIFT(4686030735197619275LL, 2, s64, 0, true); /* Overflow: values larger than destination type. */ - err |= TEST_ONE_SHIFT(0x100, 0, u8, 0, true); - err |= TEST_ONE_SHIFT(0xFF, 0, s8, 0, true); - err |= TEST_ONE_SHIFT(0x10000U, 0, u16, 0, true); - err |= TEST_ONE_SHIFT(0xFFFFU, 0, s16, 0, true); - err |= TEST_ONE_SHIFT(0x100000000ULL, 0, u32, 0, true); - err |= TEST_ONE_SHIFT(0x100000000ULL, 0, unsigned int, 0, true); - err |= TEST_ONE_SHIFT(0xFFFFFFFFUL, 0, s32, 0, true); - err |= TEST_ONE_SHIFT(0xFFFFFFFFUL, 0, int, 0, true); - err |= TEST_ONE_SHIFT(0xFFFFFFFFFFFFFFFFULL, 0, s64, 0, true); + TEST_ONE_SHIFT(0x100, 0, u8, 0, true); + TEST_ONE_SHIFT(0xFF, 0, s8, 0, true); + TEST_ONE_SHIFT(0x10000U, 0, u16, 0, true); + TEST_ONE_SHIFT(0xFFFFU, 0, s16, 0, true); + TEST_ONE_SHIFT(0x100000000ULL, 0, u32, 0, true); + TEST_ONE_SHIFT(0x100000000ULL, 0, unsigned int, 0, true); + TEST_ONE_SHIFT(0xFFFFFFFFUL, 0, s32, 0, true); + TEST_ONE_SHIFT(0xFFFFFFFFUL, 0, int, 0, true); + TEST_ONE_SHIFT(0xFFFFFFFFFFFFFFFFULL, 0, s64, 0, true); /* Nonsense: negative initial value. */ - err |= TEST_ONE_SHIFT(-1, 0, s8, 0, true); - err |= TEST_ONE_SHIFT(-1, 0, u8, 0, true); - err |= TEST_ONE_SHIFT(-5, 0, s16, 0, true); - err |= TEST_ONE_SHIFT(-5, 0, u16, 0, true); - err |= TEST_ONE_SHIFT(-10, 0, int, 0, true); - err |= TEST_ONE_SHIFT(-10, 0, unsigned int, 0, true); - err |= TEST_ONE_SHIFT(-100, 0, s32, 0, true); - err |= TEST_ONE_SHIFT(-100, 0, u32, 0, true); - err |= TEST_ONE_SHIFT(-10000, 0, s64, 0, true); - err |= TEST_ONE_SHIFT(-10000, 0, u64, 0, true); + TEST_ONE_SHIFT(-1, 0, s8, 0, true); + TEST_ONE_SHIFT(-1, 0, u8, 0, true); + TEST_ONE_SHIFT(-5, 0, s16, 0, true); + TEST_ONE_SHIFT(-5, 0, u16, 0, true); + TEST_ONE_SHIFT(-10, 0, int, 0, true); + TEST_ONE_SHIFT(-10, 0, unsigned int, 0, true); + TEST_ONE_SHIFT(-100, 0, s32, 0, true); + TEST_ONE_SHIFT(-100, 0, u32, 0, true); + TEST_ONE_SHIFT(-10000, 0, s64, 0, true); + TEST_ONE_SHIFT(-10000, 0, u64, 0, true); /* Nonsense: negative shift values. */ - err |= TEST_ONE_SHIFT(0, -5, s8, 0, true); - err |= TEST_ONE_SHIFT(0, -5, u8, 0, true); - err |= TEST_ONE_SHIFT(0, -10, s16, 0, true); - err |= TEST_ONE_SHIFT(0, -10, u16, 0, true); - err |= TEST_ONE_SHIFT(0, -15, int, 0, true); - err |= TEST_ONE_SHIFT(0, -15, unsigned int, 0, true); - err |= TEST_ONE_SHIFT(0, -20, s32, 0, true); - err |= TEST_ONE_SHIFT(0, -20, u32, 0, true); - err |= TEST_ONE_SHIFT(0, -30, s64, 0, true); - err |= TEST_ONE_SHIFT(0, -30, u64, 0, true); + TEST_ONE_SHIFT(0, -5, s8, 0, true); + TEST_ONE_SHIFT(0, -5, u8, 0, true); + TEST_ONE_SHIFT(0, -10, s16, 0, true); + TEST_ONE_SHIFT(0, -10, u16, 0, true); + TEST_ONE_SHIFT(0, -15, int, 0, true); + TEST_ONE_SHIFT(0, -15, unsigned int, 0, true); + TEST_ONE_SHIFT(0, -20, s32, 0, true); + TEST_ONE_SHIFT(0, -20, u32, 0, true); + TEST_ONE_SHIFT(0, -30, s64, 0, true); + TEST_ONE_SHIFT(0, -30, u64, 0, true); /* Overflow: shifted at or beyond entire type's bit width. */ - err |= TEST_ONE_SHIFT(0, 8, u8, 0, true); - err |= TEST_ONE_SHIFT(0, 9, u8, 0, true); - err |= TEST_ONE_SHIFT(0, 8, s8, 0, true); - err |= TEST_ONE_SHIFT(0, 9, s8, 0, true); - err |= TEST_ONE_SHIFT(0, 16, u16, 0, true); - err |= TEST_ONE_SHIFT(0, 17, u16, 0, true); - err |= TEST_ONE_SHIFT(0, 16, s16, 0, true); - err |= TEST_ONE_SHIFT(0, 17, s16, 0, true); - err |= TEST_ONE_SHIFT(0, 32, u32, 0, true); - err |= TEST_ONE_SHIFT(0, 33, u32, 0, true); - err |= TEST_ONE_SHIFT(0, 32, int, 0, true); - err |= TEST_ONE_SHIFT(0, 33, int, 0, true); - err |= TEST_ONE_SHIFT(0, 32, s32, 0, true); - err |= TEST_ONE_SHIFT(0, 33, s32, 0, true); - err |= TEST_ONE_SHIFT(0, 64, u64, 0, true); - err |= TEST_ONE_SHIFT(0, 65, u64, 0, true); - err |= TEST_ONE_SHIFT(0, 64, s64, 0, true); - err |= TEST_ONE_SHIFT(0, 65, s64, 0, true); + TEST_ONE_SHIFT(0, 8, u8, 0, true); + TEST_ONE_SHIFT(0, 9, u8, 0, true); + TEST_ONE_SHIFT(0, 8, s8, 0, true); + TEST_ONE_SHIFT(0, 9, s8, 0, true); + TEST_ONE_SHIFT(0, 16, u16, 0, true); + TEST_ONE_SHIFT(0, 17, u16, 0, true); + TEST_ONE_SHIFT(0, 16, s16, 0, true); + TEST_ONE_SHIFT(0, 17, s16, 0, true); + TEST_ONE_SHIFT(0, 32, u32, 0, true); + TEST_ONE_SHIFT(0, 33, u32, 0, true); + TEST_ONE_SHIFT(0, 32, int, 0, true); + TEST_ONE_SHIFT(0, 33, int, 0, true); + TEST_ONE_SHIFT(0, 32, s32, 0, true); + TEST_ONE_SHIFT(0, 33, s32, 0, true); + TEST_ONE_SHIFT(0, 64, u64, 0, true); + TEST_ONE_SHIFT(0, 65, u64, 0, true); + TEST_ONE_SHIFT(0, 64, s64, 0, true); + TEST_ONE_SHIFT(0, 65, s64, 0, true); /* * Corner case: for unsigned types, we fail when we've shifted @@ -473,13 +442,11 @@ static int __init test_overflow_shift(void) * signed bit). So, for now, we will test this condition but * mark it as not expected to overflow. */ - err |= TEST_ONE_SHIFT(0, 7, s8, 0, false); - err |= TEST_ONE_SHIFT(0, 15, s16, 0, false); - err |= TEST_ONE_SHIFT(0, 31, int, 0, false); - err |= TEST_ONE_SHIFT(0, 31, s32, 0, false); - err |= TEST_ONE_SHIFT(0, 63, s64, 0, false); - - return err; + TEST_ONE_SHIFT(0, 7, s8, 0, false); + TEST_ONE_SHIFT(0, 15, s16, 0, false); + TEST_ONE_SHIFT(0, 31, int, 0, false); + TEST_ONE_SHIFT(0, 31, s32, 0, false); + TEST_ONE_SHIFT(0, 63, s64, 0, false); } /* @@ -499,7 +466,7 @@ static int __init test_overflow_shift(void) #define TEST_SIZE (5 * 4096) #define DEFINE_TEST_ALLOC(func, free_func, want_arg, want_gfp, want_node)\ -static int __init test_ ## func (void *arg) \ +static void test_ ## func (struct kunit *test, void *arg) \ { \ volatile size_t a = TEST_SIZE; \ volatile size_t b = (SIZE_MAX / TEST_SIZE) + 1; \ @@ -507,31 +474,26 @@ static int __init test_ ## func (void *arg) \ \ /* Tiny allocation test. */ \ ptr = alloc ## want_arg ## want_gfp ## want_node (func, arg, 1);\ - if (!ptr) { \ - pr_warn(#func " failed regular allocation?!\n"); \ - return 1; \ - } \ + KUNIT_ASSERT_NOT_ERR_OR_NULL_MSG(test, ptr, \ + #func " failed regular allocation?!"); \ free ## want_arg (free_func, arg, ptr); \ \ /* Wrapped allocation test. */ \ ptr = alloc ## want_arg ## want_gfp ## want_node (func, arg, \ a * b); \ - if (!ptr) { \ - pr_warn(#func " unexpectedly failed bad wrapping?!\n"); \ - return 1; \ - } \ + KUNIT_ASSERT_NOT_ERR_OR_NULL_MSG(test, ptr, \ + #func " unexpectedly failed bad wrapping?!"); \ free ## want_arg (free_func, arg, ptr); \ \ /* Saturated allocation test. */ \ ptr = alloc ## want_arg ## want_gfp ## want_node (func, arg, \ array_size(a, b)); \ if (ptr) { \ - pr_warn(#func " missed saturation!\n"); \ + KUNIT_FAIL(test, #func " missed saturation!\n"); \ free ## want_arg (free_func, arg, ptr); \ - return 1; \ + return; \ } \ pr_info(#func " detected saturation\n"); \ - return 0; \ } /* @@ -555,60 +517,56 @@ DEFINE_TEST_ALLOC(kvzalloc_node, kvfree, 0, 1, 1); DEFINE_TEST_ALLOC(devm_kmalloc, devm_kfree, 1, 1, 0); DEFINE_TEST_ALLOC(devm_kzalloc, devm_kfree, 1, 1, 0); -static int __init test_overflow_allocation(void) +static void test_overflow_allocation(struct kunit *test) { const char device_name[] = "overflow-test"; struct device *dev; - int err = 0; /* Create dummy device for devm_kmalloc()-family tests. */ dev = root_device_register(device_name); - if (IS_ERR(dev)) { - pr_warn("Cannot register test device\n"); - return 1; - } - - err |= test_kmalloc(NULL); - err |= test_kmalloc_node(NULL); - err |= test_kzalloc(NULL); - err |= test_kzalloc_node(NULL); - err |= test_kvmalloc(NULL); - err |= test_kvmalloc_node(NULL); - err |= test_kvzalloc(NULL); - err |= test_kvzalloc_node(NULL); - err |= test_vmalloc(NULL); - err |= test_vmalloc_node(NULL); - err |= test_vzalloc(NULL); - err |= test_vzalloc_node(NULL); - err |= test_devm_kmalloc(dev); - err |= test_devm_kzalloc(dev); + KUNIT_ASSERT_NOT_ERR_OR_NULL_MSG(test, dev, + "Cannot register test device"); + + test_kmalloc(test, NULL); + test_kmalloc_node(test, NULL); + test_kzalloc(test, NULL); + test_kzalloc_node(test, NULL); + test_kvmalloc(test, NULL); + test_kvmalloc_node(test, NULL); + test_kvzalloc(test, NULL); + test_kvzalloc_node(test, NULL); + test_vmalloc(test, NULL); + test_vmalloc_node(test, NULL); + test_vzalloc(test, NULL); + test_vzalloc_node(test, NULL); + test_devm_kmalloc(test, dev); + test_devm_kzalloc(test, dev); device_unregister(dev); - - return err; } -static int __init test_module_init(void) -{ - int err = 0; - - err |= test_overflow_calculation(); - err |= test_overflow_shift(); - err |= test_overflow_allocation(); +static struct kunit_case overflow_test_cases[] = { + KUNIT_CASE(test_u8_overflow), + KUNIT_CASE(test_s8_overflow), + KUNIT_CASE(test_u16_overflow), + KUNIT_CASE(test_s16_overflow), + KUNIT_CASE(test_u32_overflow), + KUNIT_CASE(test_s32_overflow), +#if BITS_PER_LONG == 64 + KUNIT_CASE(test_u64_overflow), + KUNIT_CASE(test_s64_overflow), +#endif - if (err) { - pr_warn("FAIL!\n"); - err = -EINVAL; - } else { - pr_info("all tests passed\n"); - } + KUNIT_CASE(test_overflow_shift), + KUNIT_CASE(test_overflow_allocation), + {} +}; - return err; -} +static struct kunit_suite overflow_test_suite = { + .name = "lib_overflow", + .test_cases = overflow_test_cases, +}; -static void __exit test_module_exit(void) -{ } +kunit_test_suites(&overflow_test_suite); -module_init(test_module_init); -module_exit(test_module_exit); MODULE_LICENSE("Dual MIT/GPL"); base-commit: cda689f8708b6bef0b921c3a17fcdecbe959a079 -- 2.31.1.527.g47e6f16901-goog

4 years, 1 month

2
2
0 0

[PATCH v4] KVM: x86: Fix KVM_GET_CPUID2 ioctl to return cpuid entries count

by Valeriy Vdovin

KVM_GET_CPUID2 kvm ioctl is not very well documented, but the way it is implemented in function kvm_vcpu_ioctl_get_cpuid2 suggests that even at error path it will try to return number of entries to the caller. But The dispatcher kvm vcpu ioctl dispatcher code in kvm_arch_vcpu_ioctl ignores any output from this function if it sees the error return code. It's very explicit by the code that it was designed to receive some small number of entries to return E2BIG along with the corrected number. This lost logic in the dispatcher code has been restored by removing the lines that check for function return code and skip if error is found. Without it, the ioctl caller will see both the number of entries and the correct error. In selftests relevant function vcpu_get_cpuid has also been modified to utilize the number of cpuid entries returned along with errno E2BIG. Signed-off-by: Valeriy Vdovin <valeriy.vdovin(a)virtuozzo.com> --- v4: - Added description to documentation of KVM_GET_CPUID2. - Copy back nent only if E2BIG is returned. - Fixed error code sign. - Corrected version message Documentation/virt/kvm/api.rst | 81 ++++++++++++------- arch/x86/kvm/x86.c | 11 ++- .../selftests/kvm/lib/x86_64/processor.c | 20 +++-- 3 files changed, 73 insertions(+), 39 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 245d80581f15..c7cfe4b9614e 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -711,7 +711,34 @@ resulting CPUID configuration through KVM_GET_CPUID2 in case. }; -4.21 KVM_SET_SIGNAL_MASK +4.21 KVM_GET_CPUID2 +------------------ + +:Capability: basic +:Architectures: x86 +:Type: vcpu ioctl +:Parameters: struct kvm_cpuid (in/out) +:Returns: 0 on success, -1 on error + +Returns a full list of cpuid entries that are supported by this vcpu and were +previously set by KVM_SET_CPUID/KVM_SET_CPUID2. + +The userspace must specify the number of cpuid entries it is ready to accept +from the kernel in the 'nent' field of 'struct kmv_cpuid'. + +The kernel will try to return all the cpuid entries it has in the response. +If the userspace nent value is too small for the full response, the kernel will +set the error code to -E2BIG, set the same 'nent' field to the actual number of +cpuid_entries and return without writing back any entries to the userspace. +The userspace can thus implement a two-call sequence, where the first call is +made with nent set to 0 to read the number of entries from the kernel and +use this response to allocate enough memory for a full response for the second +call. + +The call cal also return with error code -EFAULT in case of other errors. + + +4.22 KVM_SET_SIGNAL_MASK ------------------------ :Capability: basic @@ -737,7 +764,7 @@ signal mask. }; -4.22 KVM_GET_FPU +4.23 KVM_GET_FPU ---------------- :Capability: basic @@ -766,7 +793,7 @@ Reads the floating point state from the vcpu. }; -4.23 KVM_SET_FPU +4.24 KVM_SET_FPU ---------------- :Capability: basic @@ -795,7 +822,7 @@ Writes the floating point state to the vcpu. }; -4.24 KVM_CREATE_IRQCHIP +4.25 KVM_CREATE_IRQCHIP ----------------------- :Capability: KVM_CAP_IRQCHIP, KVM_CAP_S390_IRQCHIP (s390) @@ -817,7 +844,7 @@ Note that on s390 the KVM_CAP_S390_IRQCHIP vm capability needs to be enabled before KVM_CREATE_IRQCHIP can be used. -4.25 KVM_IRQ_LINE +4.26 KVM_IRQ_LINE ----------------- :Capability: KVM_CAP_IRQCHIP @@ -886,7 +913,7 @@ be used for a userspace interrupt controller. }; -4.26 KVM_GET_IRQCHIP +4.27 KVM_GET_IRQCHIP -------------------- :Capability: KVM_CAP_IRQCHIP @@ -911,7 +938,7 @@ KVM_CREATE_IRQCHIP into a buffer provided by the caller. }; -4.27 KVM_SET_IRQCHIP +4.28 KVM_SET_IRQCHIP -------------------- :Capability: KVM_CAP_IRQCHIP @@ -936,7 +963,7 @@ KVM_CREATE_IRQCHIP from a buffer provided by the caller. }; -4.28 KVM_XEN_HVM_CONFIG +4.29 KVM_XEN_HVM_CONFIG ----------------------- :Capability: KVM_CAP_XEN_HVM @@ -972,7 +999,7 @@ fields must be zero. No other flags are currently valid in the struct kvm_xen_hvm_config. -4.29 KVM_GET_CLOCK +4.30 KVM_GET_CLOCK ------------------ :Capability: KVM_CAP_ADJUST_CLOCK @@ -1005,7 +1032,7 @@ TSC is not stable. }; -4.30 KVM_SET_CLOCK +4.31 KVM_SET_CLOCK ------------------ :Capability: KVM_CAP_ADJUST_CLOCK @@ -1027,7 +1054,7 @@ such as migration. }; -4.31 KVM_GET_VCPU_EVENTS +4.32 KVM_GET_VCPU_EVENTS ------------------------ :Capability: KVM_CAP_VCPU_EVENTS @@ -1146,7 +1173,7 @@ directly to the virtual CPU). __u32 reserved[12]; }; -4.32 KVM_SET_VCPU_EVENTS +4.33 KVM_SET_VCPU_EVENTS ------------------------ :Capability: KVM_CAP_VCPU_EVENTS @@ -1209,7 +1236,7 @@ exceptions by manipulating individual registers using the KVM_SET_ONE_REG API. See KVM_GET_VCPU_EVENTS for the data structure. -4.33 KVM_GET_DEBUGREGS +4.34 KVM_GET_DEBUGREGS ---------------------- :Capability: KVM_CAP_DEBUGREGS @@ -1231,7 +1258,7 @@ Reads debug registers from the vcpu. }; -4.34 KVM_SET_DEBUGREGS +4.35 KVM_SET_DEBUGREGS ---------------------- :Capability: KVM_CAP_DEBUGREGS @@ -1246,7 +1273,7 @@ See KVM_GET_DEBUGREGS for the data structure. The flags field is unused yet and must be cleared on entry. -4.35 KVM_SET_USER_MEMORY_REGION +4.36 KVM_SET_USER_MEMORY_REGION ------------------------------- :Capability: KVM_CAP_USER_MEMORY @@ -1315,7 +1342,7 @@ The KVM_SET_MEMORY_REGION does not allow fine grained control over memory allocation and is deprecated. -4.36 KVM_SET_TSS_ADDR +4.37 KVM_SET_TSS_ADDR --------------------- :Capability: KVM_CAP_SET_TSS_ADDR @@ -1335,7 +1362,7 @@ because of a quirk in the virtualization implementation (see the internals documentation when it pops into existence). -4.37 KVM_ENABLE_CAP +4.38 KVM_ENABLE_CAP ------------------- :Capability: KVM_CAP_ENABLE_CAP @@ -1390,7 +1417,7 @@ function properly, this is the place to put them. The vcpu ioctl should be used for vcpu-specific capabilities, the vm ioctl for vm-wide capabilities. -4.38 KVM_GET_MP_STATE +4.39 KVM_GET_MP_STATE --------------------- :Capability: KVM_CAP_MP_STATE @@ -1438,7 +1465,7 @@ For arm/arm64: The only states that are valid are KVM_MP_STATE_STOPPED and KVM_MP_STATE_RUNNABLE which reflect if the vcpu is paused or not. -4.39 KVM_SET_MP_STATE +4.40 KVM_SET_MP_STATE --------------------- :Capability: KVM_CAP_MP_STATE @@ -1460,7 +1487,7 @@ For arm/arm64: The only states that are valid are KVM_MP_STATE_STOPPED and KVM_MP_STATE_RUNNABLE which reflect if the vcpu should be paused or not. -4.40 KVM_SET_IDENTITY_MAP_ADDR +4.41 KVM_SET_IDENTITY_MAP_ADDR ------------------------------ :Capability: KVM_CAP_SET_IDENTITY_MAP_ADDR @@ -1484,7 +1511,7 @@ documentation when it pops into existence). Fails if any VCPU has already been created. -4.41 KVM_SET_BOOT_CPU_ID +4.42 KVM_SET_BOOT_CPU_ID ------------------------ :Capability: KVM_CAP_SET_BOOT_CPU_ID @@ -1499,7 +1526,7 @@ is vcpu 0. This ioctl has to be called before vcpu creation, otherwise it will return EBUSY error. -4.42 KVM_GET_XSAVE +4.43 KVM_GET_XSAVE ------------------ :Capability: KVM_CAP_XSAVE @@ -1518,7 +1545,7 @@ otherwise it will return EBUSY error. This ioctl would copy current vcpu's xsave struct to the userspace. -4.43 KVM_SET_XSAVE +4.44 KVM_SET_XSAVE ------------------ :Capability: KVM_CAP_XSAVE @@ -1537,7 +1564,7 @@ This ioctl would copy current vcpu's xsave struct to the userspace. This ioctl would copy userspace's xsave struct to the kernel. -4.44 KVM_GET_XCRS +4.45 KVM_GET_XCRS ----------------- :Capability: KVM_CAP_XCRS @@ -1564,7 +1591,7 @@ This ioctl would copy userspace's xsave struct to the kernel. This ioctl would copy current vcpu's xcrs to the userspace. -4.45 KVM_SET_XCRS +4.46 KVM_SET_XCRS ----------------- :Capability: KVM_CAP_XCRS @@ -1591,7 +1618,7 @@ This ioctl would copy current vcpu's xcrs to the userspace. This ioctl would set vcpu's xcr to the value userspace specified. -4.46 KVM_GET_SUPPORTED_CPUID +4.47 KVM_GET_SUPPORTED_CPUID ---------------------------- :Capability: KVM_CAP_EXT_CPUID @@ -1676,7 +1703,7 @@ if that returns true and you use KVM_CREATE_IRQCHIP, or if you emulate the feature in userspace, then you can enable the feature for KVM_SET_CPUID2. -4.47 KVM_PPC_GET_PVINFO +4.48 KVM_PPC_GET_PVINFO ----------------------- :Capability: KVM_CAP_PPC_GET_PVINFO diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index efc7a82ab140..3f941b1f4e78 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4773,14 +4773,17 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid))) goto out; + r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid, cpuid_arg->entries); - if (r) + + if (r && r != -E2BIG) goto out; - r = -EFAULT; - if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid))) + + if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid))) { + r = -EFAULT; goto out; - r = 0; + } break; } case KVM_GET_MSRS: { diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index a8906e60a108..a412b39ad791 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -727,17 +727,21 @@ struct kvm_cpuid2 *vcpu_get_cpuid(struct kvm_vm *vm, uint32_t vcpuid) cpuid = allocate_kvm_cpuid2(); max_ent = cpuid->nent; + cpuid->nent = 0; - for (cpuid->nent = 1; cpuid->nent <= max_ent; cpuid->nent++) { - rc = ioctl(vcpu->fd, KVM_GET_CPUID2, cpuid); - if (!rc) - break; + rc = ioctl(vcpu->fd, KVM_GET_CPUID2, cpuid); + TEST_ASSERT(rc == -1 && errno == E2BIG, + "KVM_GET_CPUID2 should return E2BIG: %d %d", + rc, errno); - TEST_ASSERT(rc == -1 && errno == E2BIG, - "KVM_GET_CPUID2 should either succeed or give E2BIG: %d %d", - rc, errno); - } + TEST_ASSERT(cpuid->nent, + "KVM_GET_CPUID2 failed to set cpuid->nent with E2BIG"); + + TEST_ASSERT(cpuid->nent < max_ent, + "KVM_GET_CPUID2 has %d entries, expected maximum: %d", + cpuid->nent, max_ent); + rc = ioctl(vcpu->fd, KVM_GET_CPUID2, cpuid); TEST_ASSERT(rc == 0, "KVM_GET_CPUID2 failed, rc: %i errno: %i", rc, errno); -- 2.17.1

4 years, 1 month

6
9
0 0

[PATCH] selftests: kvm: remove reassignment of non-absolute variables

by Bill Wendling

Clang's integrated assembler does not allow symbols with non-absolute values to be reassigned. Modify the interrupt entry loop macro to be compatible with IAS by using a label and an offset. Cc: Jian Cai <caij2003(a)gmail.com> Signed-off-by: Bill Wendling <morbo(a)google.com> References: https://lore.kernel.org/lkml/20200714233024.1789985-1-caij2003@gmail.com/ --- tools/testing/selftests/kvm/lib/x86_64/handlers.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/x86_64/handlers.S b/tools/testing/selftests/kvm/lib/x86_64/handlers.S index aaf7bc7d2ce1..3f9181e9a0a7 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/handlers.S +++ b/tools/testing/selftests/kvm/lib/x86_64/handlers.S @@ -54,9 +54,9 @@ idt_handlers: .align 8 /* Fetch current address and append it to idt_handlers. */ - current_handler = . +0 : .pushsection .rodata -.quad current_handler + .quad 0b .popsection .if ! \has_error -- 2.29.2.576.ga3fc446d84-goog

4 years, 1 month

4
9
0 0

Subject: [PATCH 0/1] SGX self test fails

by Tim Gardner

I'm just starting my learning curve on SGX, so I don't know if I've missed some setup for the SGX device entries. After looking at arch/x86/kernel/cpu/sgx/driver.c I see that there is no mode value for either sgx_dev_enclave or sgx_dev_provision. With this patch I can get the SGX self test to complete: sudo ./test_sgx Warning: no execute permissions on device file /dev/sgx_enclave 0x0000000000000000 0x0000000000002000 0x03 0x0000000000002000 0x0000000000001000 0x05 0x0000000000003000 0x0000000000003000 0x03 SUCCESS Is the warning even necessary ? Tim

4 years, 1 month

4
7
0 0

[PATCH 2/2] lib/test: convert lib/test_list_sort.c to use KUnit

by Daniel Latypov

Functionally, this just means that the test output will be slightly changed and it'll now depend on CONFIG_KUNIT=y/m. It'll still run at boot time and can still be built as a loadable module. There was a pre-existing patch to convert this test that I found later, here [1]. Compared to [1], this patch doesn't rename files and uses KUnit features more heavily (i.e. does more than converting pr_err() calls to KUNIT_FAIL()). What this conversion gives us: * a shorter test thanks to KUnit's macros * a way to run this a bit more easily via kunit.py (and CONFIG_KUNIT_ALL_TESTS=y) [2] * a structured way of reporting pass/fail * uses kunit-managed allocations to avoid the risk of memory leaks * more descriptive error messages: * i.e. it prints out which fields are invalid, what the expected values are, etc. What this conversion does not do: * change the name of the file (and thus the name of the module) * change the name of the config option Leaving these as-is for now to minimize the impact to people wanting to run this test. IMO, that concern trumps following KUnit's style guide for both names, at least for now. [1] https://lore.kernel.org/linux-kselftest/20201015014616.309000-1-vitor@massa… [2] Can be run via $ ./tools/testing/kunit/kunit.py run --kunitconfig /dev/stdin <<EOF CONFIG_KUNIT=y CONFIG_TEST_LIST_SORT=y EOF [16:55:56] Configuring KUnit Kernel ... [16:55:56] Building KUnit Kernel ... [16:56:29] Starting KUnit Kernel ... [16:56:32] ============================================================ [16:56:32] ======== [PASSED] list_sort ======== [16:56:32] [PASSED] list_sort_test [16:56:32] ============================================================ [16:56:32] Testing complete. 1 tests run. 0 failed. 0 crashed. [16:56:32] Elapsed time: 35.668s total, 0.001s configuring, 32.725s building, 0.000s running Note: the build time is as after a `make mrproper`. Signed-off-by: Daniel Latypov <dlatypov(a)google.com> --- lib/Kconfig.debug | 5 +- lib/test_list_sort.c | 128 +++++++++++++++++-------------------------- 2 files changed, 54 insertions(+), 79 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 417c3d3e521b..09a0cc8a55cc 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1999,8 +1999,9 @@ config LKDTM Documentation/fault-injection/provoke-crashes.rst config TEST_LIST_SORT - tristate "Linked list sorting test" - depends on DEBUG_KERNEL || m + tristate "Linked list sorting test" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS help Enable this to turn on 'list_sort()' function test. This test is executed only once during system boot (so affects only boot time), diff --git a/lib/test_list_sort.c b/lib/test_list_sort.c index 1f017d3b610e..ccfd98dbf57c 100644 --- a/lib/test_list_sort.c +++ b/lib/test_list_sort.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only -#define pr_fmt(fmt) "list_sort_test: " fmt +#include <kunit/test.h> #include <linux/kernel.h> #include <linux/list_sort.h> @@ -23,67 +23,52 @@ struct debug_el { struct list_head list; unsigned int poison2; int value; - unsigned serial; + unsigned int serial; }; -/* Array, containing pointers to all elements in the test list */ -static struct debug_el **elts __initdata; - -static int __init check(struct debug_el *ela, struct debug_el *elb) +static void check(struct kunit *test, struct debug_el *ela, struct debug_el *elb) { - if (ela->serial >= TEST_LIST_LEN) { - pr_err("error: incorrect serial %d\n", ela->serial); - return -EINVAL; - } - if (elb->serial >= TEST_LIST_LEN) { - pr_err("error: incorrect serial %d\n", elb->serial); - return -EINVAL; - } - if (elts[ela->serial] != ela || elts[elb->serial] != elb) { - pr_err("error: phantom element\n"); - return -EINVAL; - } - if (ela->poison1 != TEST_POISON1 || ela->poison2 != TEST_POISON2) { - pr_err("error: bad poison: %#x/%#x\n", - ela->poison1, ela->poison2); - return -EINVAL; - } - if (elb->poison1 != TEST_POISON1 || elb->poison2 != TEST_POISON2) { - pr_err("error: bad poison: %#x/%#x\n", - elb->poison1, elb->poison2); - return -EINVAL; - } - return 0; + struct debug_el **elts = test->priv; + + KUNIT_EXPECT_LT_MSG(test, ela->serial, (unsigned int)TEST_LIST_LEN, "incorrect serial"); + KUNIT_EXPECT_LT_MSG(test, elb->serial, (unsigned int)TEST_LIST_LEN, "incorrect serial"); + + KUNIT_EXPECT_PTR_EQ_MSG(test, elts[ela->serial], ela, "phantom element"); + KUNIT_EXPECT_PTR_EQ_MSG(test, elts[elb->serial], elb, "phantom element"); + + KUNIT_EXPECT_EQ_MSG(test, ela->poison1, TEST_POISON1, "bad poison"); + KUNIT_EXPECT_EQ_MSG(test, ela->poison2, TEST_POISON2, "bad poison"); + + KUNIT_EXPECT_EQ_MSG(test, elb->poison1, TEST_POISON1, "bad poison"); + KUNIT_EXPECT_EQ_MSG(test, elb->poison2, TEST_POISON2, "bad poison"); } -static int __init cmp(void *priv, struct list_head *a, struct list_head *b) +/* `priv` is the test pointer so check() can fail the test if the list is invalid. */ +static int cmp(void *priv, struct list_head *a, struct list_head *b) { struct debug_el *ela, *elb; ela = container_of(a, struct debug_el, list); elb = container_of(b, struct debug_el, list); - check(ela, elb); + check(priv, ela, elb); return ela->value - elb->value; } -static int __init list_sort_test(void) +static void list_sort_test(struct kunit *test) { - int i, count = 1, err = -ENOMEM; - struct debug_el *el; + int i, count = 1; + struct debug_el *el, **elts; struct list_head *cur; LIST_HEAD(head); - pr_debug("start testing list_sort()\n"); - - elts = kcalloc(TEST_LIST_LEN, sizeof(*elts), GFP_KERNEL); - if (!elts) - return err; + elts = kunit_kcalloc(test, TEST_LIST_LEN, sizeof(*elts), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, elts); + test->priv = elts; for (i = 0; i < TEST_LIST_LEN; i++) { - el = kmalloc(sizeof(*el), GFP_KERNEL); - if (!el) - goto exit; + el = kunit_kmalloc(test, sizeof(*el), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, el); /* force some equivalencies */ el->value = prandom_u32() % (TEST_LIST_LEN / 3); @@ -94,55 +79,44 @@ static int __init list_sort_test(void) list_add_tail(&el->list, &head); } - list_sort(NULL, &head, cmp); + list_sort(test, &head, cmp); - err = -EINVAL; for (cur = head.next; cur->next != &head; cur = cur->next) { struct debug_el *el1; int cmp_result; - if (cur->next->prev != cur) { - pr_err("error: list is corrupted\n"); - goto exit; - } + KUNIT_ASSERT_PTR_EQ_MSG(test, cur->next->prev, cur, + "list is corrupted"); - cmp_result = cmp(NULL, cur, cur->next); - if (cmp_result > 0) { - pr_err("error: list is not sorted\n"); - goto exit; - } + cmp_result = cmp(test, cur, cur->next); + KUNIT_ASSERT_LE_MSG(test, cmp_result, 0, "list is not sorted"); el = container_of(cur, struct debug_el, list); el1 = container_of(cur->next, struct debug_el, list); - if (cmp_result == 0 && el->serial >= el1->serial) { - pr_err("error: order of equivalent elements not " - "preserved\n"); - goto exit; + if (cmp_result == 0) { + KUNIT_ASSERT_LE_MSG(test, el->serial, el1->serial, + "order of equivalent elements not preserved"); } - if (check(el, el1)) { - pr_err("error: element check failed\n"); - goto exit; - } + check(test, el, el1); count++; } - if (head.prev != cur) { - pr_err("error: list is corrupted\n"); - goto exit; - } + KUNIT_EXPECT_PTR_EQ_MSG(test, head.prev, cur, "list is corrupted"); + KUNIT_EXPECT_EQ_MSG(test, count, TEST_LIST_LEN, + "list length changed after sorting!"); +} - if (count != TEST_LIST_LEN) { - pr_err("error: bad list length %d", count); - goto exit; - } +static struct kunit_case list_sort_cases[] = { + KUNIT_CASE(list_sort_test), + {} +}; + +static struct kunit_suite list_sort_suite = { + .name = "list_sort", + .test_cases = list_sort_cases, +}; + +kunit_test_suites(&list_sort_suite); - err = 0; -exit: - for (i = 0; i < TEST_LIST_LEN; i++) - kfree(elts[i]); - kfree(elts); - return err; -} -module_init(list_sort_test); MODULE_LICENSE("GPL"); -- 2.31.1.498.g6c1eba8ee3d-goog

4 years, 1 month

4
5
0 0

[PATCH v2 1/2] kunit: introduce kunit_kmalloc_array/kunit_kcalloc() helpers

by Daniel Latypov

Add in: * kunit_kmalloc_array() and wire up kunit_kmalloc() to be a special case of it. * kunit_kcalloc() for symmetry with kunit_kzalloc() This should using KUnit more natural by making it more similar to the existing *alloc() APIs. And while we shouldn't necessarily be writing unit tests where overflow should be a concern, it can't hurt to be safe. Signed-off-by: Daniel Latypov <dlatypov(a)google.com> Reviewed-by: David Gow <davidgow(a)google.com> Reviewed-by: Brendan Higgins <brendanhiggins(a)google.com> --- v1 -> v2: s/kzalloc/kcalloc in doc comment. --- include/kunit/test.h | 36 ++++++++++++++++++++++++++++++++---- lib/kunit/test.c | 22 ++++++++++++---------- 2 files changed, 44 insertions(+), 14 deletions(-) diff --git a/include/kunit/test.h b/include/kunit/test.h index 49601c4b98b8..e8ecb69dd567 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -577,16 +577,30 @@ static inline int kunit_destroy_named_resource(struct kunit *test, void kunit_remove_resource(struct kunit *test, struct kunit_resource *res); /** - * kunit_kmalloc() - Like kmalloc() except the allocation is *test managed*. + * kunit_kmalloc_array() - Like kmalloc_array() except the allocation is *test managed*. * @test: The test context object. + * @n: number of elements. * @size: The size in bytes of the desired memory. * @gfp: flags passed to underlying kmalloc(). * - * Just like `kmalloc(...)`, except the allocation is managed by the test case + * Just like `kmalloc_array(...)`, except the allocation is managed by the test case * and is automatically cleaned up after the test case concludes. See &struct * kunit_resource for more information. */ -void *kunit_kmalloc(struct kunit *test, size_t size, gfp_t gfp); +void *kunit_kmalloc_array(struct kunit *test, size_t n, size_t size, gfp_t flags); + +/** + * kunit_kmalloc() - Like kmalloc() except the allocation is *test managed*. + * @test: The test context object. + * @size: The size in bytes of the desired memory. + * @gfp: flags passed to underlying kmalloc(). + * + * See kmalloc() and kunit_kmalloc_array() for more information. + */ +static inline void *kunit_kmalloc(struct kunit *test, size_t size, gfp_t gfp) +{ + return kunit_kmalloc_array(test, 1, size, gfp); +} /** * kunit_kfree() - Like kfree except for allocations managed by KUnit. @@ -601,13 +615,27 @@ void kunit_kfree(struct kunit *test, const void *ptr); * @size: The size in bytes of the desired memory. * @gfp: flags passed to underlying kmalloc(). * - * See kzalloc() and kunit_kmalloc() for more information. + * See kzalloc() and kunit_kmalloc_array() for more information. */ static inline void *kunit_kzalloc(struct kunit *test, size_t size, gfp_t gfp) { return kunit_kmalloc(test, size, gfp | __GFP_ZERO); } +/** + * kunit_kcalloc() - Just like kunit_kmalloc_array(), but zeroes the allocation. + * @test: The test context object. + * @n: number of elements. + * @size: The size in bytes of the desired memory. + * @gfp: flags passed to underlying kmalloc(). + * + * See kcalloc() and kunit_kmalloc_array() for more information. + */ +static inline void *kunit_kcalloc(struct kunit *test, size_t n, size_t size, gfp_t flags) +{ + return kunit_kmalloc_array(test, n, size, flags | __GFP_ZERO); +} + void kunit_cleanup(struct kunit *test); void kunit_log_append(char *log, const char *fmt, ...); diff --git a/lib/kunit/test.c b/lib/kunit/test.c index 2f6cc0123232..41fa46b14c3b 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c @@ -573,41 +573,43 @@ int kunit_destroy_resource(struct kunit *test, kunit_resource_match_t match, } EXPORT_SYMBOL_GPL(kunit_destroy_resource); -struct kunit_kmalloc_params { +struct kunit_kmalloc_array_params { + size_t n; size_t size; gfp_t gfp; }; -static int kunit_kmalloc_init(struct kunit_resource *res, void *context) +static int kunit_kmalloc_array_init(struct kunit_resource *res, void *context) { - struct kunit_kmalloc_params *params = context; + struct kunit_kmalloc_array_params *params = context; - res->data = kmalloc(params->size, params->gfp); + res->data = kmalloc_array(params->n, params->size, params->gfp); if (!res->data) return -ENOMEM; return 0; } -static void kunit_kmalloc_free(struct kunit_resource *res) +static void kunit_kmalloc_array_free(struct kunit_resource *res) { kfree(res->data); } -void *kunit_kmalloc(struct kunit *test, size_t size, gfp_t gfp) +void *kunit_kmalloc_array(struct kunit *test, size_t n, size_t size, gfp_t gfp) { - struct kunit_kmalloc_params params = { + struct kunit_kmalloc_array_params params = { .size = size, + .n = n, .gfp = gfp }; return kunit_alloc_resource(test, - kunit_kmalloc_init, - kunit_kmalloc_free, + kunit_kmalloc_array_init, + kunit_kmalloc_array_free, gfp, &params); } -EXPORT_SYMBOL_GPL(kunit_kmalloc); +EXPORT_SYMBOL_GPL(kunit_kmalloc_array); void kunit_kfree(struct kunit *test, const void *ptr) { base-commit: cda689f8708b6bef0b921c3a17fcdecbe959a079 -- 2.31.1.527.g47e6f16901-goog

4 years, 1 month

1
1
0 0

[PATCH v2] kbuild: replace LANG=C with LC_ALL=C

by Masahiro Yamada

LANG gives a weak default to each LC_* in case it is not explicitly defined. LC_ALL, if set, overrides all other LC_* variables. LANG < LC_CTYPE, LC_COLLATE, LC_MONETARY, LC_NUMERIC, ... < LC_ALL This is why documentation such as [1] suggests to set LC_ALL in build scripts to get the deterministic result. LANG=C is not strong enough to override LC_* that may be set by end users. [1]: https://reproducible-builds.org/docs/locales/ Signed-off-by: Masahiro Yamada <masahiroy(a)kernel.org> Acked-by: Michael Ellerman <mpe(a)ellerman.id.au> (powerpc) Reviewed-by: Matthias Maennich <maennich(a)google.com> Acked-by: Matthieu Baerts <matthieu.baerts(a)tessares.net> (mptcp) --- Changes in v2: - rebase arch/powerpc/boot/wrapper | 2 +- scripts/nsdeps | 2 +- scripts/recordmcount.pl | 2 +- scripts/setlocalversion | 2 +- scripts/tags.sh | 2 +- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 2 +- usr/gen_initramfs.sh | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index 41fa0a8715e3..cdb796b76e2e 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -191,7 +191,7 @@ if [ -z "$kernel" ]; then kernel=vmlinux fi -LANG=C elfformat="`${CROSS}objdump -p "$kernel" | grep 'file format' | awk '{print $4}'`" +LC_ALL=C elfformat="`${CROSS}objdump -p "$kernel" | grep 'file format' | awk '{print $4}'`" case "$elfformat" in elf64-powerpcle) format=elf64lppc ;; elf64-powerpc) format=elf32ppc ;; diff --git a/scripts/nsdeps b/scripts/nsdeps index e8ce2a4d704a..04c4b96e95ec 100644 --- a/scripts/nsdeps +++ b/scripts/nsdeps @@ -44,7 +44,7 @@ generate_deps() { for source_file in $mod_source_files; do sed '/MODULE_IMPORT_NS/Q' $source_file > ${source_file}.tmp offset=$(wc -l ${source_file}.tmp | awk '{print $1;}') - cat $source_file | grep MODULE_IMPORT_NS | LANG=C sort -u >> ${source_file}.tmp + cat $source_file | grep MODULE_IMPORT_NS | LC_ALL=C sort -u >> ${source_file}.tmp tail -n +$((offset +1)) ${source_file} | grep -v MODULE_IMPORT_NS >> ${source_file}.tmp if ! diff -q ${source_file} ${source_file}.tmp; then mv ${source_file}.tmp ${source_file} diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 867860ea57da..0a7fc9507d6f 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -497,7 +497,7 @@ sub update_funcs # # Step 2: find the sections and mcount call sites # -open(IN, "LANG=C $objdump -hdr $inputfile|") || die "error running $objdump"; +open(IN, "LC_ALL=C $objdump -hdr $inputfile|") || die "error running $objdump"; my $text; diff --git a/scripts/setlocalversion b/scripts/setlocalversion index bb709eda96cd..db941f6d9591 100755 --- a/scripts/setlocalversion +++ b/scripts/setlocalversion @@ -126,7 +126,7 @@ scm_version() fi # Check for svn and a svn repo. - if rev=$(LANG= LC_ALL= LC_MESSAGES=C svn info 2>/dev/null | grep '^Last Changed Rev'); then + if rev=$(LC_ALL=C svn info 2>/dev/null | grep '^Last Changed Rev'); then rev=$(echo $rev | awk '{print $NF}') printf -- '-svn%s' "$rev" diff --git a/scripts/tags.sh b/scripts/tags.sh index fd96734deff1..db8ba411860a 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -326,5 +326,5 @@ esac # Remove structure forward declarations. if [ -n "$remove_structs" ]; then - LANG=C sed -i -e '/^$[a-zA-Z_][a-zA-Z0-9_]*$\t.*\t\/\^struct \1;.*\$\/;"\tx$/d' $1 + LC_ALL=C sed -i -e '/^$[a-zA-Z_][a-zA-Z0-9_]*$\t.*\t\/\^struct \1;.*\$\/;"\tx$/d' $1 fi diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 9236609731b1..3c4cb72ed8a4 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -274,7 +274,7 @@ check_mptcp_disabled() ip netns exec ${disabled_ns} sysctl -q net.mptcp.enabled=0 local err=0 - LANG=C ip netns exec ${disabled_ns} ./mptcp_connect -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \ + LC_ALL=C ip netns exec ${disabled_ns} ./mptcp_connect -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \ grep -q "^socket: Protocol not available$" && err=1 ip netns delete ${disabled_ns} diff --git a/usr/gen_initramfs.sh b/usr/gen_initramfs.sh index 8ae831657e5d..63476bb70b41 100755 --- a/usr/gen_initramfs.sh +++ b/usr/gen_initramfs.sh @@ -147,7 +147,7 @@ dir_filelist() { header "$1" srcdir=$(echo "$1" | sed -e 's://*:/:g') - dirlist=$(find "${srcdir}" -printf "%p %m %U %G\n" | LANG=C sort) + dirlist=$(find "${srcdir}" -printf "%p %m %U %G\n" | LC_ALL=C sort) # If $dirlist is only one line, then the directory is empty if [ "$(echo "${dirlist}" | wc -l)" -gt 1 ]; then -- 2.27.0

4 years, 2 months

2
1
0 0

[PATCH v4] selftests/mincore: get readahead size for check_file_mmap()

by Jeffle Xu

The readahead size used to be 2MB, thus it's reasonable to set the file size as 4MB when checking check_file_mmap(). However since commit c2e4cd57cfa1 ("block: lift setting the readahead size into the block layer"), readahead size could be as large as twice the io_opt, and thus the hardcoded file size no longer works. check_file_mmap() may report "Read-ahead pages reached the end of the file" when the readahead size actually exceeds the file size in this case. To fix this issue, read the exact readahead window size via BLKRAGET ioctl. Since now we have the readahead window size, take a more fine-grained check. It is worth noting that this fine-grained check may be broken as the sync readahead algorithm of kernel changes. It may be acceptable since the algorithm of readahead ranging should be quite stable, and we could tune the test case accorddingly if the algorithm indeed changes. Reported-by: James Wang <jnwang(a)linux.alibaba.com> Acked-by: Ricardo Cañuelo <ricardo.canuelo(a)collabora.com> Signed-off-by: Jeffle Xu <jefflexu(a)linux.alibaba.com> --- changes since v3: - make the check more fine-grained since we have the exact readahead window size now, as suggested by Ricardo Cañuelo chnages since v2: - add 'Reported-by' chnages since v1: - add the test name "mincore" in the subject line - add the error message in commit message - rename @filesize to @file_size to keep a more consistent naming convention --- .../selftests/mincore/mincore_selftest.c | 96 +++++++++++++------ 1 file changed, 68 insertions(+), 28 deletions(-) diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c index 5a1e85ff5d32..369b35af4b4f 100644 --- a/tools/testing/selftests/mincore/mincore_selftest.c +++ b/tools/testing/selftests/mincore/mincore_selftest.c @@ -15,6 +15,11 @@ #include <string.h> #include <fcntl.h> #include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/ioctl.h> +#include <sys/sysmacros.h> +#include <sys/mount.h> #include "../kselftest.h" #include "../kselftest_harness.h" @@ -193,12 +198,44 @@ TEST(check_file_mmap) int retval; int page_size; int fd; - int i; + int i, start, end; int ra_pages = 0; + long ra_size, file_size; + struct stat stats; + dev_t devt; + unsigned int major, minor; + char devpath[32]; + + retval = stat(".", &stats); + ASSERT_EQ(0, retval) { + TH_LOG("Can't stat pwd: %s", strerror(errno)); + } + + devt = stats.st_dev; + major = major(devt); + minor = minor(devt); + snprintf(devpath, sizeof(devpath), "/dev/block/%u:%u", major, minor); + + fd = open(devpath, O_RDONLY); + ASSERT_NE(-1, fd) { + TH_LOG("Can't open underlying disk %s", strerror(errno)); + } + + retval = ioctl(fd, BLKRAGET, &ra_size); + ASSERT_EQ(0, retval) { + TH_LOG("Error ioctl with the underlying disk: %s", strerror(errno)); + } + + /* + * BLKRAGET ioctl returns the readahead size in sectors (512 bytes). + * Make file_size large enough to contain the readahead window. + */ + ra_size *= 512; + file_size = ra_size * 2; page_size = sysconf(_SC_PAGESIZE); - vec_size = FILE_SIZE / page_size; - if (FILE_SIZE % page_size) + vec_size = file_size / page_size; + if (file_size % page_size) vec_size++; vec = calloc(vec_size, sizeof(unsigned char)); @@ -213,7 +250,7 @@ TEST(check_file_mmap) strerror(errno)); } errno = 0; - retval = fallocate(fd, 0, 0, FILE_SIZE); + retval = fallocate(fd, 0, 0, file_size); ASSERT_EQ(0, retval) { TH_LOG("Error allocating space for the temporary file: %s", strerror(errno)); @@ -223,12 +260,12 @@ TEST(check_file_mmap) * Map the whole file, the pages shouldn't be fetched yet. */ errno = 0; - addr = mmap(NULL, FILE_SIZE, PROT_READ | PROT_WRITE, + addr = mmap(NULL, file_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); ASSERT_NE(MAP_FAILED, addr) { TH_LOG("mmap error: %s", strerror(errno)); } - retval = mincore(addr, FILE_SIZE, vec); + retval = mincore(addr, file_size, vec); ASSERT_EQ(0, retval); for (i = 0; i < vec_size; i++) { ASSERT_EQ(0, vec[i]) { @@ -240,38 +277,41 @@ TEST(check_file_mmap) * Touch a page in the middle of the mapping. We expect the next * few pages (the readahead window) to be populated too. */ - addr[FILE_SIZE / 2] = 1; - retval = mincore(addr, FILE_SIZE, vec); + addr[file_size / 2] = 1; + retval = mincore(addr, file_size, vec); ASSERT_EQ(0, retval); - ASSERT_EQ(1, vec[FILE_SIZE / 2 / page_size]) { - TH_LOG("Page not found in memory after use"); - } - i = FILE_SIZE / 2 / page_size + 1; - while (i < vec_size && vec[i]) { - ra_pages++; - i++; - } - EXPECT_GT(ra_pages, 0) { - TH_LOG("No read-ahead pages found in memory"); - } + /* + * Readahead window is [start, end). So far the sync readahead + * algorithm takes the page that triggers the page fault as the + * midpoint. + */ + ra_pages = ra_size / page_size; + start = file_size / 2 / page_size - ra_pages / 2; + end = start + ra_pages; - EXPECT_LT(i, vec_size) { - TH_LOG("Read-ahead pages reached the end of the file"); + /* + * Check there's no hole in the readahead window. + */ + for (i = start; i < end; i++) { + ASSERT_EQ(1, vec[i]) { + TH_LOG("Hole found in read-ahead window"); + } } + /* - * End of the readahead window. The rest of the pages shouldn't - * be in memory. + * Check there's no page beyond the readahead window. */ - if (i < vec_size) { - while (i < vec_size && !vec[i]) - i++; - EXPECT_EQ(vec_size, i) { + for (i = 0; i < vec_size; i++) { + if (i == start) + i = end; + + EXPECT_EQ(0, vec[i]) { TH_LOG("Unexpected page in memory beyond readahead window"); } } - munmap(addr, FILE_SIZE); + munmap(addr, file_size); close(fd); free(vec); } -- 2.27.0

4 years, 2 months

2
1
0 0

[PATCH] selftests/powerpc: remove unneeded semicolon

by Yang Li

Eliminate the following coccicheck warning: ./tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c:327:4-5: Unneeded semicolon Reported-by: Abaci Robot <abaci(a)linux.alibaba.com> Signed-off-by: Yang Li <yang.lee(a)linux.alibaba.com> --- tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c index 02dffb6..b099753 100644 --- a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c +++ b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c @@ -324,7 +324,7 @@ int compress_file(int argc, char **argv, void *handle) fprintf(stderr, "error: cannot progress; "); fprintf(stderr, "too many faults\n"); exit(-1); - }; + } } fault_tries = NX_MAX_FAULTS; /* Reset for the next chunk */ -- 1.8.3.1

4 years, 2 months

2
1
0 0

[PATCH v3 0/4] powerpc/selftests: Add Power10 2nd DAWR selftests

by Ravi Bangoria

Power10 introduced 2nd watchpoint (DAWR). ISA 3.1, Book 3S, Ch 9 - 'Debug Facilities' covers the feature in detail. Kernel patches to enable the 2nd DAWR are already in[1], including kvm enablement[2]. These patches adds selftests for 2nd DAWR. [1]: https://git.kernel.org/torvalds/c/deb2bd9bcc8428d4b65b6ba640ba8b57c1b20b17 [2]: https://git.kernel.org/torvalds/c/bd1de1a0e6eff4bde5ceae969673b85b8446fd6a v2: https://lore.kernel.org/r/20210407054938.312857-1-ravi.bangoria@linux.ibm.c… v2->v3: - Fixed some checkpatch warnings v1: https://lore.kernel.org/r/20200723102058.312282-1-ravi.bangoria@linux.ibm.c… v1->v2: - Kvm patches are already upstream - Rebased selftests to powerpc/next Ravi Bangoria (4): powerpc/selftests/ptrace-hwbreak: Add testcases for 2nd DAWR powerpc/selftests/perf-hwbreak: Coalesce event creation code powerpc/selftests/perf-hwbreak: Add testcases for 2nd DAWR powerpc/selftests: Add selftest to test concurrent perf/ptrace events .../selftests/powerpc/ptrace/.gitignore | 1 + .../testing/selftests/powerpc/ptrace/Makefile | 2 +- .../selftests/powerpc/ptrace/perf-hwbreak.c | 631 +++++++++++++++-- .../selftests/powerpc/ptrace/ptrace-hwbreak.c | 79 +++ .../powerpc/ptrace/ptrace-perf-hwbreak.c | 659 ++++++++++++++++++ 5 files changed, 1330 insertions(+), 42 deletions(-) create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c -- 2.27.0

4 years, 2 months

2
5
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-kselftest-mirror