August 2024 - Linux-kselftest-mirror

[PATCH] selftests/vDSO: support DT_GNU_HASH

by Fangrui Song

glibc added support for DT_GNU_HASH in 2006 and DT_HASH has been obsoleted for more than one decade in many Linux distributions. Many vDSOs support DT_GNU_HASH. This patch adds selftests support. Signed-off-by: Fangrui Song <maskray(a)google.com> --- tools/testing/selftests/vDSO/parse_vdso.c | 105 ++++++++++++++++------ 1 file changed, 79 insertions(+), 26 deletions(-) diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c index 4ae417372e9e..35cb545da13e 100644 --- a/tools/testing/selftests/vDSO/parse_vdso.c +++ b/tools/testing/selftests/vDSO/parse_vdso.c @@ -47,6 +47,7 @@ static struct vdso_info /* Symbol table */ ELF(Sym) *symtab; const char *symstrings; + ELF(Word) *gnu_hash; ELF(Word) *bucket, *chain; ELF(Word) nbucket, nchain; @@ -75,6 +76,16 @@ static unsigned long elf_hash(const char *name) return h; } +static uint32_t gnu_hash(const char *name) +{ + const unsigned char *s = (void *)name; + uint32_t h = 5381; + + for (; *s; s++) + h += h * 32 + *s; + return h; +} + void vdso_init_from_sysinfo_ehdr(uintptr_t base) { size_t i; @@ -117,6 +128,7 @@ void vdso_init_from_sysinfo_ehdr(uintptr_t base) */ ELF(Word) *hash = 0; vdso_info.symstrings = 0; + vdso_info.gnu_hash = 0; vdso_info.symtab = 0; vdso_info.versym = 0; vdso_info.verdef = 0; @@ -137,6 +149,11 @@ void vdso_init_from_sysinfo_ehdr(uintptr_t base) ((uintptr_t)dyn[i].d_un.d_ptr + vdso_info.load_offset); break; + case DT_GNU_HASH: + vdso_info.gnu_hash = + (ELF(Word) *)((uintptr_t)dyn[i].d_un.d_ptr + + vdso_info.load_offset); + break; case DT_VERSYM: vdso_info.versym = (ELF(Versym) *) ((uintptr_t)dyn[i].d_un.d_ptr @@ -149,17 +166,26 @@ void vdso_init_from_sysinfo_ehdr(uintptr_t base) break; } } - if (!vdso_info.symstrings || !vdso_info.symtab || !hash) + if (!vdso_info.symstrings || !vdso_info.symtab || + (!hash && !vdso_info.gnu_hash)) return; /* Failed */ if (!vdso_info.verdef) vdso_info.versym = 0; /* Parse the hash table header. */ - vdso_info.nbucket = hash[0]; - vdso_info.nchain = hash[1]; - vdso_info.bucket = &hash[2]; - vdso_info.chain = &hash[vdso_info.nbucket + 2]; + if (vdso_info.gnu_hash) { + vdso_info.nbucket = vdso_info.gnu_hash[0]; + /* The bucket array is located after the header (4 uint32) and the bloom + filter (size_t array of gnu_hash[2] elements). */ + vdso_info.bucket = vdso_info.gnu_hash + 4 + + sizeof(size_t) / 4 * vdso_info.gnu_hash[2]; + } else { + vdso_info.nbucket = hash[0]; + vdso_info.nchain = hash[1]; + vdso_info.bucket = &hash[2]; + vdso_info.chain = &hash[vdso_info.nbucket + 2]; + } /* That's all we need. */ vdso_info.valid = true; @@ -203,6 +229,26 @@ static bool vdso_match_version(ELF(Versym) ver, && !strcmp(name, vdso_info.symstrings + aux->vda_name); } +static bool check_sym(ELF(Sym) *sym, ELF(Word) i, const char *name, + const char *version, unsigned long ver_hash) +{ + /* Check for a defined global or weak function w/ right name. */ + if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC) + return false; + if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL && + ELF64_ST_BIND(sym->st_info) != STB_WEAK) + return false; + if (strcmp(name, vdso_info.symstrings + sym->st_name)) + return false; + + /* Check symbol version. */ + if (vdso_info.versym && + !vdso_match_version(vdso_info.versym[i], version, ver_hash)) + return false; + + return true; +} + void *vdso_sym(const char *version, const char *name) { unsigned long ver_hash; @@ -210,29 +256,36 @@ void *vdso_sym(const char *version, const char *name) return 0; ver_hash = elf_hash(version); - ELF(Word) chain = vdso_info.bucket[elf_hash(name) % vdso_info.nbucket]; + ELF(Word) i; - for (; chain != STN_UNDEF; chain = vdso_info.chain[chain]) { - ELF(Sym) *sym = &vdso_info.symtab[chain]; + if (vdso_info.gnu_hash) { + uint32_t h1 = gnu_hash(name), h2, *hashval; - /* Check for a defined global or weak function w/ right name. */ - if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC) - continue; - if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL && - ELF64_ST_BIND(sym->st_info) != STB_WEAK) - continue; - if (sym->st_shndx == SHN_UNDEF) - continue; - if (strcmp(name, vdso_info.symstrings + sym->st_name)) - continue; - - /* Check symbol version. */ - if (vdso_info.versym - && !vdso_match_version(vdso_info.versym[chain], - version, ver_hash)) - continue; - - return (void *)(vdso_info.load_offset + sym->st_value); + i = vdso_info.bucket[h1 % vdso_info.nbucket]; + if (i == 0) + return 0; + h1 |= 1; + hashval = vdso_info.bucket + vdso_info.nbucket + + (i - vdso_info.gnu_hash[1]); + for (;; i++) { + ELF(Sym) *sym = &vdso_info.symtab[i]; + h2 = *hashval++; + if (h1 == (h2 | 1) && + check_sym(sym, i, name, version, ver_hash)) + return (void *)(vdso_info.load_offset + + sym->st_value); + if (h2 & 1) + break; + } + } else { + i = vdso_info.bucket[elf_hash(name) % vdso_info.nbucket]; + for (; i; i = vdso_info.chain[i]) { + ELF(Sym) *sym = &vdso_info.symtab[i]; + if (sym->st_shndx != SHN_UNDEF && + check_sym(sym, i, name, version, ver_hash)) + return (void *)(vdso_info.load_offset + + sym->st_value); + } } return 0; -- 2.46.0.76.ge559c4bf1a-goog

1 year, 1 month

3
5
0 0

[PATCH] selftests: filesystems: fix warn_unused_result build warnings

by Abhinav Jain

Add return value checks for read & write calls in test_listmount_ns function. This patch resolves below compilation warnings: ``` statmount_test_ns.c: In function ‘test_listmount_ns’: statmount_test_ns.c:322:17: warning: ignoring return value of ‘write’ declared with attribute ‘warn_unused_result’ [-Wunused-result] statmount_test_ns.c:323:17: warning: ignoring return value of ‘read’ declared with attribute ‘warn_unused_result’ [-Wunused-result] ``` Signed-off-by: Abhinav Jain <jain.abhinav177(a)gmail.com> --- .../selftests/filesystems/statmount/statmount_test_ns.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c index e044f5fc57fd..70cb0c8b21cf 100644 --- a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c +++ b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c @@ -319,8 +319,11 @@ static void test_listmount_ns(void) * Tell our parent how many mounts we have, and then wait for it * to tell us we're done. */ - write(child_ready_pipe[1], &nr_mounts, sizeof(nr_mounts)); - read(parent_ready_pipe[0], &cval, sizeof(cval)); + if (write(child_ready_pipe[1], &nr_mounts, sizeof(nr_mounts)) != + sizeof(nr_mounts)) + ret = NSID_ERROR; + if (read(parent_ready_pipe[0], &cval, sizeof(cval)) != sizeof(cval)) + ret = NSID_ERROR; exit(NSID_PASS); } -- 2.34.1

1 year, 1 month

2
2
0 0

[PATCH v3 0/3] RISC-V: mm: do not treat hint addr on mmap as the upper bound to search

by Yangyu Chen

Previous patch series[1][2] changes a mmap behavior that treats the hint address as the upper bound of the mmap address range. The motivation of the previous patch series is that some user space software may assume 48-bit address space and use higher bits to encode some information, which may collide with large virtual address space mmap may return. However, to make sv48 by default, we don't need to change the meaning of the hint address on mmap as the upper bound of the mmap address range. This behavior breaks some user space software like Chromium that gets ENOMEM error when the hint address + size is not big enough, as specified in [3]. Other ISAs with larger than 48-bit virtual address space like x86, arm64, and powerpc do not have this special mmap behavior on hint address. They all just make 48-bit / 47-bit virtual address space by default, and if a user space software wants to large virtual address space, it only need to specify a hint address larger than 48-bit / 47-bit. Thus, this patch series change mmap to use sv48 by default but does not treat the hint address as the upper bound of the mmap address range. After this patch, the behavior of mmap will align with existing behavior on other ISAs with larger than 48-bit virtual address space like x86, arm64, and powerpc. The user space software will no longer need to rewrite their code to fit with this special mmap behavior only on RISC-V. Note: Charlie also created another series [4] to completely remove the arch_get_mmap_end and arch_get_mmap_base behavior based on the hint address and size. However, this will cause programs like Go and Java, which need to store information in the higher bits of the pointer, to fail on Sv57 machines. Changes in v3: - Rebase to newest master - Changes some information in cover letter after patchset [2] - Use patch [5] to patch selftests - Link to v2: https://lore.kernel.org/linux-riscv/tencent_B2D0435BC011135736262764B511994… Changes in v2: - correct arch_get_mmap_end and arch_get_mmap_base - Add description in documentation about mmap behavior on kernel v6.6-6.7. - Improve commit message and cover letter - Rebase to newest riscv/for-next branch - Link to v1: https://lore.kernel.org/linux-riscv/tencent_F3B3B5AB1C9D704763CA423E1A41F8B… [1] https://lore.kernel.org/linux-riscv/20230809232218.849726-1-charlie@rivosin… [2] https://lore.kernel.org/linux-riscv/20240130-use_mmap_hint_address-v3-0-8a6… [3] https://lore.kernel.org/linux-riscv/MEYP282MB2312A08FF95D44014AB78411C68D2@… [4] https://lore.kernel.org/linux-riscv/20240826-riscv_mmap-v1-0-cd8962afe47f@r… [5] https://lore.kernel.org/linux-riscv/20240826-riscv_mmap-v1-2-cd8962afe47f@r… Charlie Jenkins (1): riscv: selftests: Remove mmap hint address checks Yangyu Chen (2): RISC-V: mm: not use hint addr as upper bound Documentation: riscv: correct sv57 kernel behavior Documentation/arch/riscv/vm-layout.rst | 43 ++++++++---- arch/riscv/include/asm/processor.h | 20 ++---- .../selftests/riscv/mm/mmap_bottomup.c | 2 - .../testing/selftests/riscv/mm/mmap_default.c | 2 - tools/testing/selftests/riscv/mm/mmap_test.h | 67 ------------------- 5 files changed, 36 insertions(+), 98 deletions(-) -- 2.45.2

1 year, 1 month

4
9
0 0

[PATCH v3 0/3] selftests: Fix cpuid / vendor checking build issues

by Ilpo Järvinen

This series first generalizes resctrl selftest non-contiguous CAT check to not assume non-AMD vendor implies Intel. Second, it improves kselftest common parts and resctrl selftest such that the use of __cpuid_count() does not lead into a build failure (happens at least on ARM). While ARM does not currently support resctrl features, there's an ongoing work to enable resctrl support also for it on the kernel side. In any case, a common header such as kselftest.h should have a proper fallback in place for what it provides, thus it seems justified to fix this common level problem on the common level rather than e.g. disabling build for resctrl selftest for archs lacking resctrl support. I've dropped reviewed and tested by tags from the last patch due to major changes into the makefile logic. So it would be helpful if Muhammad could retest with this version. v3: - Remove "empty" wording - Also cast input parameters to void - Initialize ARCH from uname -m if not set (this might allow cleaning up some other makefiles but that is left as future work) v2: - Removed RFC from the last patch & added Fixes and tags - Fixed the error message's line splits - Noted down the reason for void casts in the stub Ilpo Järvinen (3): selftests/resctrl: Generalize non-contiguous CAT check selftests/resctrl: Always initialize ecx to avoid build warnings kselftest: Provide __cpuid_count() stub on non-x86 archs tools/testing/selftests/kselftest.h | 6 +++++ tools/testing/selftests/lib.mk | 6 +++++ tools/testing/selftests/resctrl/cat_test.c | 28 +++++++++++++--------- 3 files changed, 29 insertions(+), 11 deletions(-) -- 2.39.2

1 year, 1 month

1
4
0 0

[PATCH 0/6] Extend pmu_counters_test to AMD CPUs

by Colton Lewis

(I was positive I had sent this already, but I couldn't find it on the mailing list to reply to and ask for reviews.) Extend pmu_counters_test to AMD CPUs. As the AMD PMU is quite different from Intel with different events and feature sets, this series introduces a new code path to test it, specifically focusing on the core counters including the PerfCtrExtCore and PerfMonV2 features. Northbridge counters and cache counters exist, but are not as important and can be deferred to a later series. The first patch is a bug fix that could be submitted separately. The series has been tested on both Intel and AMD machines, but I have not found an AMD machine old enough to lack PerfCtrExtCore. I have made efforts that no part of the code has any dependency on its presence. I am aware of similar work in this direction done by Jinrong Liang [1]. He told me he is not working on it currently and I am not intruding by making my own submission. [1] https://lore.kernel.org/kvm/20231121115457.76269-1-cloudliang@tencent.com/ Colton Lewis (6): KVM: x86: selftests: Fix typos in macro variable use KVM: x86: selftests: Define AMD PMU CPUID leaves KVM: x86: selftests: Set up AMD VM in pmu_counters_test KVM: x86: selftests: Test read/write core counters KVM: x86: selftests: Test core events KVM: x86: selftests: Test PerfMonV2 .../selftests/kvm/include/x86_64/processor.h | 7 + .../selftests/kvm/x86_64/pmu_counters_test.c | 267 ++++++++++++++++-- 2 files changed, 249 insertions(+), 25 deletions(-) -- 2.46.0.76.ge559c4bf1a-goog

1 year, 1 month

3
20
0 0

[PATCH 0/2] Improve migration by backing off earlier

by Dev Jain

It was recently observed at [1] that during the folio unmapping stage of migration, when the PTEs are cleared, a racing thread faulting on that folio may increase the refcount of the folio, sleep on the folio lock (the migration path has the lock), and migration ultimately fails when asserting the actual refcount against the expected. Migration is a best effort service; the unmapping and the moving phase are wrapped around loops for retrying. The refcount of the folio is currently being asserted during the move stage; if it fails, we retry. But, if a racing thread changes the refcount, and ends up sleeping on the folio lock (which is mostly the case), there is no way the refcount would be decremented; as a result, this renders the retrying useless. In the first patch, we make the refcount check also during the unmap stage; if it fails, we restore the original state of the PTE, drop the folio lock, let the system make progress, and retry unmapping again. This improves the probability of migration winning the race. Given that migration is a best-effort service, it is wrong to fail the test for just a single failure; hence, fail the test after 100 consecutive failures (where 100 is still a subjective choice). [1] https://lore.kernel.org/all/20240801081657.1386743-1-dev.jain@arm.com/ Dev Jain (2): mm: Retry migration earlier upon refcount mismatch selftests/mm: Do not fail test for a single migration failure mm/migrate.c | 9 +++++++++ tools/testing/selftests/mm/migration.c | 17 +++++++++++------ 2 files changed, 20 insertions(+), 6 deletions(-) -- 2.30.2

1 year, 1 month

6
29
0 0

[PATCH] selftests/futex: Create test for robust list

by André Almeida

Create a test for the robust list mechanism. Signed-off-by: André Almeida <andrealmeid(a)igalia.com> --- .../selftests/futex/functional/.gitignore | 1 + .../selftests/futex/functional/Makefile | 3 +- .../selftests/futex/functional/robust_list.c | 450 ++++++++++++++++++ 3 files changed, 453 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/futex/functional/robust_list.c diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore index fbcbdb6963b3..4726e1be7497 100644 --- a/tools/testing/selftests/futex/functional/.gitignore +++ b/tools/testing/selftests/futex/functional/.gitignore @@ -9,3 +9,4 @@ futex_wait_wouldblock futex_wait futex_requeue futex_waitv +robust_list diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile index f79f9bac7918..b8635a1ac7f6 100644 --- a/tools/testing/selftests/futex/functional/Makefile +++ b/tools/testing/selftests/futex/functional/Makefile @@ -17,7 +17,8 @@ TEST_GEN_PROGS := \ futex_wait_private_mapped_file \ futex_wait \ futex_requeue \ - futex_waitv + futex_waitv \ + robust_list TEST_PROGS := run.sh diff --git a/tools/testing/selftests/futex/functional/robust_list.c b/tools/testing/selftests/futex/functional/robust_list.c new file mode 100644 index 000000000000..5cc0edaaf028 --- /dev/null +++ b/tools/testing/selftests/futex/functional/robust_list.c @@ -0,0 +1,450 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright Igalia, 2024 + * + * Robust list test by André Almeida <andrealmeid(a)igalia.com> + * + * The robust list uAPI allows userspace to create "robust" locks, in the sense + * that if the lock holder thread dies, the remaining threads that are waiting + * for the lock won't block forever, waiting for a lock that will never be + * released. + * + * This is achieve by userspace setting a list where a thread can enter all the + * locks (futexes) that it is holding. The robust list is a linked list, and + * userspace register the start of the list with the syscall set_robust_list(). + * If such thread eventually dies, the kernel will walk this list, waking up one + * thread waiting for each futex and marking the futex word with the flag + * FUTEX_OWNER_DIED. + * + * See also + * man set_robust_list + * Documententation/locking/robust-futex-ABI.rst + * Documententation/locking/robust-futexes.rst + */ + +#define _GNU_SOURCE + +#include "../../kselftest_harness.h" + +#include "futextest.h" + +#include <pthread.h> +#include <stdatomic.h> +#include <stddef.h> + +#define STACK_SIZE (1024 * 1024) + +#define FUTEX_TIMEOUT 3 + +static pthread_barrier_t barrier, barrier2; + +int set_robust_list(struct robust_list_head *head, size_t len) +{ + return syscall(SYS_set_robust_list, head, len); +} + +int get_robust_list(int pid, struct robust_list_head **head, size_t *len_ptr) +{ + return syscall(SYS_get_robust_list, pid, head, len_ptr); +} + +int futex2_wait(void *futex, int val, struct timespec *timo) +{ + return syscall(SYS_futex_wait, futex, val, ~0U, FUTEX2_SIZE_U32, timo, CLOCK_MONOTONIC); +} + +/* + * Basic lock struct, contains just the futex word and the robust list element + * Real implementations have also a *prev to easily walk in the list + */ +struct lock_struct { + int futex; + struct robust_list list; +}; + +/* + * Helper function to spawn a child thread. Returns -1 on error, pid on success + */ +static int create_child(int (*fn)(void *arg), void *arg) +{ + char *stack; + pid_t pid; + + stack = mmap(NULL, STACK_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); + if (stack == MAP_FAILED) + return -1; + + stack += STACK_SIZE; + + pid = clone(fn, stack, CLONE_VM | SIGCHLD, arg); + + if (pid == -1) + return -1; + + return pid; +} + +/* + * Helper function to prepare and register a robust list + */ +static int set_list(struct robust_list_head *head) +{ + int ret; + + ret = set_robust_list(head, sizeof(struct robust_list_head)); + if (ret) + return ret; + + head->futex_offset = (size_t) offsetof(struct lock_struct, futex) - + (size_t) offsetof(struct lock_struct, list); + head->list.next = &head->list; + head->list_op_pending = NULL; + + return 0; +} + +/* + * A basic (and incomplete) mutex lock function with robustness + */ +static int mutex_lock(struct lock_struct *lock, struct robust_list_head *head, bool error_inject) +{ + int *futex = &lock->futex, zero = 0, ret = -1; + pid_t tid = gettid(); + + /* + * Set list_op_pending before starting the lock, so the kernel can catch + * the case where the thread died during the lock operation + */ + head->list_op_pending = &lock->list; + + if (atomic_compare_exchange_strong(futex, &zero, tid)) { + /* + * We took the lock, insert it in the robust list + */ + struct robust_list *list = &head->list; + + /* Error injection to test list_op_pending */ + if (error_inject) + return 0; + + while (list->next != &head->list) + list = list->next; + + list->next = &lock->list; + lock->list.next = &head->list; + + ret = 0; + } else { + /* + * We didn't take the lock, wait until the owner wakes (or dies) + */ + struct timespec to; + + clock_gettime(CLOCK_MONOTONIC, &to); + to.tv_sec = to.tv_sec + FUTEX_TIMEOUT; + + tid = atomic_load(futex); + /* Kernel ignores futexes without the waiters flag */ + tid |= FUTEX_WAITERS; + atomic_store(futex, tid); + + ret = futex2_wait(futex, tid, &to); + + /* + * A real mutex_lock() implementation would loop here to finally + * take the lock. We don't care about that, so we stop here. + */ + } + + head->list_op_pending = NULL; + + return ret; +} + +/* + * This child thread will succeed taking the lock, and then will exit holding it + */ +static int child_fn_lock(void *arg) +{ + struct lock_struct *lock = (struct lock_struct *) arg; + struct robust_list_head head; + int ret; + + ret = set_list(&head); + if (ret) + ksft_test_result_fail("set_robust_list error\n"); + + ret = mutex_lock(lock, &head, false); + if (ret) + ksft_test_result_fail("mutex_lock error\n"); + + pthread_barrier_wait(&barrier); + + /* + * There's a race here: the parent thread needs to be inside + * futex_wait() before the child thread dies, otherwise it will miss the + * wakeup from handle_futex_death() that this child will emit. We wait a + * little bit just to make sure that this happens. + */ + sleep(1); + + return 0; +} + +/* + * Spawns a child thread that will set a robust list, take the lock, register it + * in the robust list and die. The parent thread will wait on this futex, and + * should be waken up when the child exits. + */ +TEST(robustness) +{ + struct lock_struct lock = { .futex = 0 }; + struct robust_list_head head; + int ret, *futex = &lock.futex; + + ret = set_list(&head); + ASSERT_EQ(ret, 0); + + /* + * Lets use a barrier to ensure that the child thread takes the lock + * before the parent + */ + ret = pthread_barrier_init(&barrier, NULL, 2); + ASSERT_EQ(ret, 0); + + ret = create_child(&child_fn_lock, &lock); + ASSERT_NE(ret, -1); + + pthread_barrier_wait(&barrier); + ret = mutex_lock(&lock, &head, false); + + /* + * futex_wait() should return 0 and the futex word should be marked with + * FUTEX_OWNER_DIED + */ + ASSERT_EQ(ret, 0) TH_LOG("futex wait returned %d", errno); + ASSERT_TRUE(*futex | FUTEX_OWNER_DIED); + + pthread_barrier_destroy(&barrier); +} + +/* + * The only valid value for len is sizeof(*head) + */ +TEST(set_robust_list_invalid_size) +{ + struct robust_list_head head; + size_t head_size = sizeof(struct robust_list_head); + int ret; + + ret = set_robust_list(&head, head_size); + ASSERT_EQ(ret, 0); + + ret = set_robust_list(&head, head_size * 2); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EINVAL); + + ret = set_robust_list(&head, head_size - 1); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EINVAL); + + ret = set_robust_list(&head, 0); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EINVAL); +} + +/* + * Test get_robust_list with pid = 0, getting the list of the running thread + */ +TEST(get_robust_list_self) +{ + struct robust_list_head head, head2, *get_head; + size_t head_size = sizeof(struct robust_list_head), len_ptr; + int ret; + + ret = set_robust_list(&head, head_size); + ASSERT_EQ(ret, 0); + + ret = get_robust_list(0, &get_head, &len_ptr); + ASSERT_EQ(ret, 0); + ASSERT_EQ(get_head, &head); + ASSERT_EQ(head_size, len_ptr); + + ret = set_robust_list(&head2, head_size); + ASSERT_EQ(ret, 0); + + ret = get_robust_list(0, &get_head, &len_ptr); + ASSERT_EQ(ret, 0); + ASSERT_EQ(get_head, &head2); + ASSERT_EQ(head_size, len_ptr); +} + +static int child_list(void *arg) +{ + struct robust_list_head *head = (struct robust_list_head *) arg; + int ret; + + ret = set_robust_list(head, sizeof(struct robust_list_head)); + if (ret) + ksft_test_result_fail("set_robust_list error\n"); + + pthread_barrier_wait(&barrier); + pthread_barrier_wait(&barrier2); + + return 0; +} + +/* + * Test get_robust_list from another thread. We use two barriers here to ensure + * that: + * 1) the child thread set the list before we try to get it from the + * parent + * 2) the child thread still alive when we try to get the list from it + */ +TEST(get_robust_list_child) +{ + pid_t tid; + int ret; + struct robust_list_head head, *get_head; + size_t len_ptr; + + ret = pthread_barrier_init(&barrier, NULL, 2); + ret = pthread_barrier_init(&barrier2, NULL, 2); + ASSERT_EQ(ret, 0); + + tid = create_child(&child_list, &head); + ASSERT_NE(tid, -1); + + pthread_barrier_wait(&barrier); + + ret = get_robust_list(tid, &get_head, &len_ptr); + ASSERT_EQ(ret, 0); + ASSERT_EQ(&head, get_head); + + pthread_barrier_wait(&barrier2); + + pthread_barrier_destroy(&barrier); + pthread_barrier_destroy(&barrier2); +} + +static int child_fn_lock_with_error(void *arg) +{ + struct lock_struct *lock = (struct lock_struct *) arg; + struct robust_list_head head; + int ret; + + ret = set_list(&head); + if (ret) + ksft_test_result_fail("set_robust_list error\n"); + + ret = mutex_lock(lock, &head, true); + if (ret) + ksft_test_result_fail("mutex_lock error\n"); + + pthread_barrier_wait(&barrier); + + sleep(1); + + return 0; +} + +/* + * Same as robustness test, but inject an error where the mutex_lock() exits + * earlier, just after setting list_op_pending and taking the lock, to test the + * list_op_pending mechanism + */ +TEST(set_list_op_pending) +{ + struct lock_struct lock = { .futex = 0 }; + struct robust_list_head head; + int ret, *futex = &lock.futex; + + ret = set_list(&head); + ASSERT_EQ(ret, 0); + + ret = pthread_barrier_init(&barrier, NULL, 2); + ASSERT_EQ(ret, 0); + + ret = create_child(&child_fn_lock_with_error, &lock); + ASSERT_NE(ret, -1); + + pthread_barrier_wait(&barrier); + ret = mutex_lock(&lock, &head, false); + + ASSERT_EQ(ret, 0) TH_LOG("futex wait returned %d", errno); + ASSERT_TRUE(*futex | FUTEX_OWNER_DIED); + + pthread_barrier_destroy(&barrier); +} + +#define CHILD_NR 10 + +static int child_lock_holder(void *arg) +{ + struct lock_struct *locks = (struct lock_struct *) arg; + struct robust_list_head head; + int i; + + set_list(&head); + + for (i = 0; i < CHILD_NR; i++) { + locks[i].futex = 0; + mutex_lock(&locks[i], &head, false); + } + + pthread_barrier_wait(&barrier); + pthread_barrier_wait(&barrier2); + + sleep(1); + return 0; +} + +static int child_wait_lock(void *arg) +{ + struct lock_struct *lock = (struct lock_struct *) arg; + struct robust_list_head head; + int ret; + + pthread_barrier_wait(&barrier2); + ret = mutex_lock(lock, &head, false); + + if (ret) + ksft_test_result_fail("mutex_lock error\n"); + + if (!(lock->futex | FUTEX_OWNER_DIED)) + ksft_test_result_fail("futex not marked with FUTEX_OWNER_DIED\n"); + + return 0; +} + +/* + * Test a robust list of more than one element. All the waiters should wake when + * the holder dies + */ +TEST(robust_list_multiple_elements) +{ + struct lock_struct locks[CHILD_NR]; + int i, ret; + + ret = pthread_barrier_init(&barrier, NULL, 2); + ASSERT_EQ(ret, 0); + ret = pthread_barrier_init(&barrier2, NULL, CHILD_NR + 1); + ASSERT_EQ(ret, 0); + + create_child(&child_lock_holder, &locks); + + /* Wait until the locker thread takes the look */ + pthread_barrier_wait(&barrier); + + for (i = 0; i < CHILD_NR; i++) + create_child(&child_wait_lock, &locks[i]); + + /* Wait for all children to return */ + while (wait(NULL) > 0); + + pthread_barrier_destroy(&barrier); + pthread_barrier_destroy(&barrier2); +} + +TEST_HARNESS_MAIN -- 2.46.0

1 year, 1 month

2
2
0 0

[PATCH v3 0/5] Wire up getrandom() vDSO implementation on powerpc

by Christophe Leroy

This series wires up getrandom() vDSO implementation on powerpc. Tested on PPC32 on real hardware. Tested on PPC64 (both BE and LE) on QEMU: Performance on powerpc 885: ~# ./vdso_test_getrandom bench-single vdso: 25000000 times in 62.938002291 seconds libc: 25000000 times in 535.581916866 seconds syscall: 25000000 times in 531.525042806 seconds Performance on powerpc 8321: ~# ./vdso_test_getrandom bench-single vdso: 25000000 times in 16.899318858 seconds libc: 25000000 times in 131.050596522 seconds syscall: 25000000 times in 129.794790389 seconds Performance on QEMU pseries: ~ # ./vdso_test_getrandom bench-single vdso: 25000000 times in 4.977777162 seconds libc: 25000000 times in 75.516749981 seconds syscall: 25000000 times in 86.842242014 seconds In order to run selftests, some fixes are needed, see https://lore.kernel.org/linuxppc-dev/6c5da802e72befecfa09046c489aa45d934d61… Those selftest fixes are independant and are not required to apply and use this series. Changes in v3: - Rebased on recent random git tree (0c7e00e22c21) - Fixed build failures reported by robots around VM_DROPPABLE - Fixed crash on PPC64 due to clobbered r13 by not using r13 anymore (saving it was not enough for signals). - Split final patch in two, first for PPC32, second for PPC64 - Moved selftest fixes out of this series Changes in v2: - Define VM_DROPPABLE for powerpc/32 - Fixes generic vDSO getrandom headers to enable CONFIG_COMPAT build. - Fixed size of generation counter - Fixed selftests to work on non x86 architectures Christophe Leroy (5): mm: Define VM_DROPPABLE for powerpc/32 powerpc/vdso32: Add crtsavres powerpc/vdso: Refactor CFLAGS for CVDSO build powerpc/vdso: Wire up getrandom() vDSO implementation on PPC32 powerpc/vdso: Wire up getrandom() vDSO implementation on PPC64 arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/asm-compat.h | 8 + arch/powerpc/include/asm/mman.h | 2 +- arch/powerpc/include/asm/vdso/getrandom.h | 54 ++++ arch/powerpc/include/asm/vdso/vsyscall.h | 6 + arch/powerpc/include/asm/vdso_datapage.h | 2 + arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kernel/vdso/Makefile | 57 ++-- arch/powerpc/kernel/vdso/getrandom.S | 58 ++++ arch/powerpc/kernel/vdso/gettimeofday.S | 13 - arch/powerpc/kernel/vdso/vdso32.lds.S | 1 + arch/powerpc/kernel/vdso/vdso64.lds.S | 1 + arch/powerpc/kernel/vdso/vgetrandom-chacha.S | 299 +++++++++++++++++++ arch/powerpc/kernel/vdso/vgetrandom.c | 14 + fs/proc/task_mmu.c | 4 +- include/linux/mm.h | 4 +- include/trace/events/mmflags.h | 4 +- tools/arch/powerpc/vdso | 1 + tools/testing/selftests/vDSO/Makefile | 4 + 19 files changed, 492 insertions(+), 42 deletions(-) create mode 100644 arch/powerpc/include/asm/vdso/getrandom.h create mode 100644 arch/powerpc/kernel/vdso/getrandom.S create mode 100644 arch/powerpc/kernel/vdso/vgetrandom-chacha.S create mode 100644 arch/powerpc/kernel/vdso/vgetrandom.c create mode 120000 tools/arch/powerpc/vdso -- 2.44.0

1 year, 1 month

3
11
0 0

[PATCH net-next] wireguard: allowedips: Add WGALLOWEDIP_F_REMOVE_ME flag

by Jordan Rife

With the current API the only way to remove an allowed IP is to completely rebuild the allowed IPs set for a peer using WGPEER_F_REPLACE_ALLOWEDIPS. In other words, if my current configuration is such that a peer has allowed IP IPs 192.168.0.2 and 192.168.0.3 and I want to remove 192.168.0.2 the actual transition looks like this. [192.168.0.2, 192.168.0.3] <-- Initial state [] <-- Step 1: Allowed IPs removed for peer [192.168.0.3] <-- Step 2: Allowed IPs added back for peer This is true even if the allowed IP list is small and the update does not need to be batched into multiple WG_CMD_SET_DEVICE requests, as the removal and subsequent addition of IPs is non-atomic within a single request. Consequently, wg_allowedips_lookup_dst and wg_allowedips_lookup_src may return NULL while reconfiguring a peer even for packets bound for IPs a user did not intend to remove leading to unintended interruptions in connectivity. This presents in userspace as failed calls to sendto and sendmsg. In my case, I ran netperf while repeatedly reconfiguring the allowed IPs for a peer with wg. /usr/local/bin/netperf -H 10.102.73.72 -l 10m -t UDP_STREAM -- -R 1 -m 1024 send_data: data send error: No route to host (errno 113) netperf: send_omni: send_data failed: No route to host While this may not be of particular concern for environments where peers and allowed IPs are mostly static, Cilium manages peers and allowed IPs in a dynamic environment where peers (i.e. Kubernetes nodes) and allowed IPs (i.e. Pods running on those nodes) can frequently change. Cilium must continually keep its WireGuard device's configuration in sync with its cluster state leading to unnecessary churn and packet drops. This patch introduces a new flag called WGALLOWEDIP_F_REMOVE_ME which in the same way that WGPEER_F_REMOVE_ME allows a user to remove a single peer from a WireGuard device's configuration allows a user to remove an IP from a peer's set of allowed IPs. This has two benefits. First, it allows systems such as Cilium to avoid introducing connectivity blips while reconfiguring a WireGuard device. Second, it allows us to more efficiently keep the device's configuration in sync with the cluster state, as we no longer need to do frequent rebuilds of the allowed IPs list for each peer. Instead, the device's configuration can be incrementally updated. This patch also bumps WG_GENL_VERSION which can be used by clients to detect whether or not their system supports the WGALLOWEDIP_F_REMOVE_ME flag. Signed-off-by: Jordan Rife <jrife(a)google.com> Link: https://github.com/cilium/cilium/issues/33159 --- drivers/net/wireguard/allowedips.c | 103 ++++++++++---- drivers/net/wireguard/allowedips.h | 4 + drivers/net/wireguard/netlink.c | 45 +++++-- drivers/net/wireguard/selftest/allowedips.c | 30 +++++ include/uapi/linux/wireguard.h | 11 +- tools/testing/selftests/wireguard/Makefile | 18 +++ tools/testing/selftests/wireguard/netns.sh | 38 ++++++ tools/testing/selftests/wireguard/remove-ip.c | 126 ++++++++++++++++++ 8 files changed, 333 insertions(+), 42 deletions(-) create mode 100644 tools/testing/selftests/wireguard/Makefile create mode 100644 tools/testing/selftests/wireguard/remove-ip.c diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c index 4b8528206cc8a..47a96a1b8f0ea 100644 --- a/drivers/net/wireguard/allowedips.c +++ b/drivers/net/wireguard/allowedips.c @@ -249,6 +249,56 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key, return 0; } +static void _remove(struct allowedips_node __rcu *node, struct mutex *lock) +{ + struct allowedips_node *child, **parent_bit, *parent; + bool free_parent; + + list_del_init(&node->peer_list); + RCU_INIT_POINTER(node->peer, NULL); + if (node->bit[0] && node->bit[1]) + return; + child = rcu_dereference_protected(node->bit[!rcu_access_pointer(node->bit[0])], + lockdep_is_held(lock)); + if (child) + child->parent_bit_packed = node->parent_bit_packed; + parent_bit = (struct allowedips_node **)(node->parent_bit_packed & ~3UL); + *parent_bit = child; + parent = (void *)parent_bit - + offsetof(struct allowedips_node, bit[node->parent_bit_packed & 1]); + free_parent = !rcu_access_pointer(node->bit[0]) && + !rcu_access_pointer(node->bit[1]) && + (node->parent_bit_packed & 3) <= 1 && + !rcu_access_pointer(parent->peer); + if (free_parent) + child = rcu_dereference_protected(parent->bit[!(node->parent_bit_packed & 1)], + lockdep_is_held(lock)); + call_rcu(&node->rcu, node_free_rcu); + if (!free_parent) + return; + if (child) + child->parent_bit_packed = parent->parent_bit_packed; + *(struct allowedips_node **)(parent->parent_bit_packed & ~3UL) = child; + call_rcu(&parent->rcu, node_free_rcu); +} + +static int remove(struct allowedips_node __rcu **trie, u8 bits, const u8 *key, + u8 cidr, struct wg_peer *peer, struct mutex *lock) +{ + struct allowedips_node *node; + + if (unlikely(cidr > bits || !peer)) + return -EINVAL; + if (!rcu_access_pointer(*trie) || + !node_placement(*trie, key, cidr, bits, &node, lock) || + peer != node->peer) + return 0; + + _remove(node, lock); + + return 0; +} + void wg_allowedips_init(struct allowedips *table) { table->root4 = table->root6 = NULL; @@ -300,43 +350,38 @@ int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip, return add(&table->root6, 128, key, cidr, peer, lock); } +int wg_allowedips_remove_v4(struct allowedips *table, const struct in_addr *ip, + u8 cidr, struct wg_peer *peer, struct mutex *lock) +{ + /* Aligned so it can be passed to fls */ + u8 key[4] __aligned(__alignof(u32)); + + ++table->seq; + swap_endian(key, (const u8 *)ip, 32); + return remove(&table->root4, 32, key, cidr, peer, lock); +} + +int wg_allowedips_remove_v6(struct allowedips *table, const struct in6_addr *ip, + u8 cidr, struct wg_peer *peer, struct mutex *lock) +{ + /* Aligned so it can be passed to fls64 */ + u8 key[16] __aligned(__alignof(u64)); + + ++table->seq; + swap_endian(key, (const u8 *)ip, 128); + return remove(&table->root6, 128, key, cidr, peer, lock); +} + void wg_allowedips_remove_by_peer(struct allowedips *table, struct wg_peer *peer, struct mutex *lock) { - struct allowedips_node *node, *child, **parent_bit, *parent, *tmp; - bool free_parent; + struct allowedips_node *node, *tmp; if (list_empty(&peer->allowedips_list)) return; ++table->seq; list_for_each_entry_safe(node, tmp, &peer->allowedips_list, peer_list) { - list_del_init(&node->peer_list); - RCU_INIT_POINTER(node->peer, NULL); - if (node->bit[0] && node->bit[1]) - continue; - child = rcu_dereference_protected(node->bit[!rcu_access_pointer(node->bit[0])], - lockdep_is_held(lock)); - if (child) - child->parent_bit_packed = node->parent_bit_packed; - parent_bit = (struct allowedips_node **)(node->parent_bit_packed & ~3UL); - *parent_bit = child; - parent = (void *)parent_bit - - offsetof(struct allowedips_node, bit[node->parent_bit_packed & 1]); - free_parent = !rcu_access_pointer(node->bit[0]) && - !rcu_access_pointer(node->bit[1]) && - (node->parent_bit_packed & 3) <= 1 && - !rcu_access_pointer(parent->peer); - if (free_parent) - child = rcu_dereference_protected( - parent->bit[!(node->parent_bit_packed & 1)], - lockdep_is_held(lock)); - call_rcu(&node->rcu, node_free_rcu); - if (!free_parent) - continue; - if (child) - child->parent_bit_packed = parent->parent_bit_packed; - *(struct allowedips_node **)(parent->parent_bit_packed & ~3UL) = child; - call_rcu(&parent->rcu, node_free_rcu); + _remove(node, lock); } } diff --git a/drivers/net/wireguard/allowedips.h b/drivers/net/wireguard/allowedips.h index 2346c797eb4d8..931958cb6e100 100644 --- a/drivers/net/wireguard/allowedips.h +++ b/drivers/net/wireguard/allowedips.h @@ -38,6 +38,10 @@ int wg_allowedips_insert_v4(struct allowedips *table, const struct in_addr *ip, u8 cidr, struct wg_peer *peer, struct mutex *lock); int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip, u8 cidr, struct wg_peer *peer, struct mutex *lock); +int wg_allowedips_remove_v4(struct allowedips *table, const struct in_addr *ip, + u8 cidr, struct wg_peer *peer, struct mutex *lock); +int wg_allowedips_remove_v6(struct allowedips *table, const struct in6_addr *ip, + u8 cidr, struct wg_peer *peer, struct mutex *lock); void wg_allowedips_remove_by_peer(struct allowedips *table, struct wg_peer *peer, struct mutex *lock); /* The ip input pointer should be __aligned(__alignof(u64))) */ diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c index f7055180ba4aa..5f2a8553ab43d 100644 --- a/drivers/net/wireguard/netlink.c +++ b/drivers/net/wireguard/netlink.c @@ -46,7 +46,8 @@ static const struct nla_policy peer_policy[WGPEER_A_MAX + 1] = { static const struct nla_policy allowedip_policy[WGALLOWEDIP_A_MAX + 1] = { [WGALLOWEDIP_A_FAMILY] = { .type = NLA_U16 }, [WGALLOWEDIP_A_IPADDR] = NLA_POLICY_MIN_LEN(sizeof(struct in_addr)), - [WGALLOWEDIP_A_CIDR_MASK] = { .type = NLA_U8 } + [WGALLOWEDIP_A_CIDR_MASK] = { .type = NLA_U8 }, + [WGALLOWEDIP_A_FLAGS] = { .type = NLA_U32 } }; static struct wg_device *lookup_interface(struct nlattr **attrs, @@ -329,6 +330,7 @@ static int set_port(struct wg_device *wg, u16 port) static int set_allowedip(struct wg_peer *peer, struct nlattr **attrs) { int ret = -EINVAL; + u32 flags = 0; u16 family; u8 cidr; @@ -337,19 +339,38 @@ static int set_allowedip(struct wg_peer *peer, struct nlattr **attrs) return ret; family = nla_get_u16(attrs[WGALLOWEDIP_A_FAMILY]); cidr = nla_get_u8(attrs[WGALLOWEDIP_A_CIDR_MASK]); + if (attrs[WGALLOWEDIP_A_FLAGS]) + flags = nla_get_u32(attrs[WGALLOWEDIP_A_FLAGS]); if (family == AF_INET && cidr <= 32 && - nla_len(attrs[WGALLOWEDIP_A_IPADDR]) == sizeof(struct in_addr)) - ret = wg_allowedips_insert_v4( - &peer->device->peer_allowedips, - nla_data(attrs[WGALLOWEDIP_A_IPADDR]), cidr, peer, - &peer->device->device_update_lock); - else if (family == AF_INET6 && cidr <= 128 && - nla_len(attrs[WGALLOWEDIP_A_IPADDR]) == sizeof(struct in6_addr)) - ret = wg_allowedips_insert_v6( - &peer->device->peer_allowedips, - nla_data(attrs[WGALLOWEDIP_A_IPADDR]), cidr, peer, - &peer->device->device_update_lock); + nla_len(attrs[WGALLOWEDIP_A_IPADDR]) == sizeof(struct in_addr)) { + if (flags & WGALLOWEDIP_F_REMOVE_ME) + ret = wg_allowedips_remove_v4(&peer->device->peer_allowedips, + nla_data(attrs[WGALLOWEDIP_A_IPADDR]), + cidr, + peer, + &peer->device->device_update_lock); + else + ret = wg_allowedips_insert_v4(&peer->device->peer_allowedips, + nla_data(attrs[WGALLOWEDIP_A_IPADDR]), + cidr, + peer, + &peer->device->device_update_lock); + } else if (family == AF_INET6 && cidr <= 128 && + nla_len(attrs[WGALLOWEDIP_A_IPADDR]) == sizeof(struct in6_addr)) { + if (flags & WGALLOWEDIP_F_REMOVE_ME) + ret = wg_allowedips_remove_v6(&peer->device->peer_allowedips, + nla_data(attrs[WGALLOWEDIP_A_IPADDR]), + cidr, + peer, + &peer->device->device_update_lock); + else + ret = wg_allowedips_insert_v6(&peer->device->peer_allowedips, + nla_data(attrs[WGALLOWEDIP_A_IPADDR]), + cidr, + peer, + &peer->device->device_update_lock); + } return ret; } diff --git a/drivers/net/wireguard/selftest/allowedips.c b/drivers/net/wireguard/selftest/allowedips.c index 3d1f64ff2e122..9f6458a889e96 100644 --- a/drivers/net/wireguard/selftest/allowedips.c +++ b/drivers/net/wireguard/selftest/allowedips.c @@ -461,6 +461,10 @@ static __init struct wg_peer *init_peer(void) wg_allowedips_insert_v##version(&t, ip##version(ipa, ipb, ipc, ipd), \ cidr, mem, &mutex) +#define remove(version, mem, ipa, ipb, ipc, ipd, cidr) \ + wg_allowedips_remove_v##version(&t, ip##version(ipa, ipb, ipc, ipd), \ + cidr, mem, &mutex) + #define maybe_fail() do { \ ++i; \ if (!_s) { \ @@ -586,6 +590,32 @@ bool __init wg_allowedips_selftest(void) test_negative(4, a, 192, 0, 0, 0); test_negative(4, a, 255, 0, 0, 0); + insert(4, a, 1, 0, 0, 0, 32); + insert(4, a, 192, 0, 0, 0, 24); + insert(6, a, 0x24446801, 0x40e40800, 0xdeaebeef, 0xdefbeef, 128); + insert(6, a, 0x24446800, 0xf0e40800, 0xeeaebeef, 0, 98); + test(4, a, 1, 0, 0, 0); + test(4, a, 192, 0, 0, 1); + test(6, a, 0x24446801, 0x40e40800, 0xdeaebeef, 0xdefbeef); + test(6, a, 0x24446800, 0xf0e40800, 0xeeaebeef, 0x10101010); + /* Must be an exact match to remove */ + remove(4, a, 192, 0, 0, 0, 32); + test(4, a, 192, 0, 0, 1); + remove(4, a, 192, 0, 0, 0, 24); + test_negative(4, a, 192, 0, 0, 1); + remove(4, a, 1, 0, 0, 0, 32); + test_negative(4, a, 1, 0, 0, 0); + /* Must be an exact match to remove */ + remove(6, a, 0x24446801, 0x40e40800, 0xdeaebeef, 0xdefbeef, 96); + test(6, a, 0x24446801, 0x40e40800, 0xdeaebeef, 0xdefbeef); + remove(6, a, 0x24446801, 0x40e40800, 0xdeaebeef, 0xdefbeef, 128); + test_negative(6, a, 0x24446801, 0x40e40800, 0xdeaebeef, 0xdefbeef); + /* Must match the peer to remove */ + remove(6, b, 0x24446800, 0xf0e40800, 0xeeaebeef, 0, 98); + test(6, a, 0x24446800, 0xf0e40800, 0xeeaebeef, 0x10101010); + remove(6, a, 0x24446800, 0xf0e40800, 0xeeaebeef, 0, 98); + test_negative(6, a, 0x24446800, 0xf0e40800, 0xeeaebeef, 0x10101010); + wg_allowedips_free(&t, &mutex); wg_allowedips_init(&t); insert(4, a, 192, 168, 0, 0, 16); diff --git a/include/uapi/linux/wireguard.h b/include/uapi/linux/wireguard.h index ae88be14c9478..e219194cb9f5a 100644 --- a/include/uapi/linux/wireguard.h +++ b/include/uapi/linux/wireguard.h @@ -101,6 +101,10 @@ * WGALLOWEDIP_A_FAMILY: NLA_U16 * WGALLOWEDIP_A_IPADDR: struct in_addr or struct in6_addr * WGALLOWEDIP_A_CIDR_MASK: NLA_U8 + * WGALLOWEDIP_A_FLAGS: NLA_U32, WGALLOWEDIP_F_REMOVE_ME if + * the specified IP should be removed, + * otherwise this IP will be added if + * it is not already present. * 0: NLA_NESTED * ... * 0: NLA_NESTED @@ -132,7 +136,7 @@ #define _WG_UAPI_WIREGUARD_H #define WG_GENL_NAME "wireguard" -#define WG_GENL_VERSION 1 +#define WG_GENL_VERSION 2 #define WG_KEY_LEN 32 @@ -184,11 +188,16 @@ enum wgpeer_attribute { }; #define WGPEER_A_MAX (__WGPEER_A_LAST - 1) +enum wgallowedip_flag { + WGALLOWEDIP_F_REMOVE_ME = 1U << 0, + __WGALLOWEDIP_F_ALL = WGALLOWEDIP_F_REMOVE_ME +}; enum wgallowedip_attribute { WGALLOWEDIP_A_UNSPEC, WGALLOWEDIP_A_FAMILY, WGALLOWEDIP_A_IPADDR, WGALLOWEDIP_A_CIDR_MASK, + WGALLOWEDIP_A_FLAGS, __WGALLOWEDIP_A_LAST }; #define WGALLOWEDIP_A_MAX (__WGALLOWEDIP_A_LAST - 1) diff --git a/tools/testing/selftests/wireguard/Makefile b/tools/testing/selftests/wireguard/Makefile new file mode 100644 index 0000000000000..4f4db54f89cb3 --- /dev/null +++ b/tools/testing/selftests/wireguard/Makefile @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Note: To build this you must install libnl-3 and libnl-genl-3 development +# packages. +remove-ip: + gcc -I/usr/include/libnl3 \ + -I../../../../usr/include \ + remove-ip.c \ + -o remove-ip \ + -lnl-genl-3 \ + -lnl-3 + +.PHONY: all +all: remove-ip + +.PHONY: clean +clean: + rm remove-ip diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index 405ff262ca93d..70058d6ebbe85 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -28,6 +28,7 @@ exec 3>&1 export LANG=C export WG_HIDE_KEYS=never NPROC=( /sys/devices/system/cpu/cpu+([0-9]) ); NPROC=${#NPROC[@]} +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) netns0="wg-test-$$-0" netns1="wg-test-$$-1" netns2="wg-test-$$-2" @@ -610,6 +611,43 @@ n0 wg set wg0 peer "$pub2" allowed-ips "$allowedips" } < <(n0 wg show wg0 allowed-ips) ip0 link del wg0 +# Test IP removal +allowedips=( ) +for i in {1..197}; do + allowedips+=( 192.168.0.$i ) + allowedips+=( abcd::$i ) +done +saved_ifs="$IFS" +IFS=, +allowedips="${allowedips[*]}" +IFS="$saved_ifs" +ip0 link add wg0 type wireguard +n0 wg set wg0 peer "$pub1" allowed-ips "$allowedips" +pub1_hex=$(echo "$pub1" | base64 -d | xxd -p -c 50) +n0 $SCRIPT_DIR/remove-ip wg0 "$pub1_hex" 4 192.168.0.1 +n0 $SCRIPT_DIR/remove-ip wg0 "$pub1_hex" 4 192.168.0.20 +n0 $SCRIPT_DIR/remove-ip wg0 "$pub1_hex" 4 192.168.0.100 +n0 $SCRIPT_DIR/remove-ip wg0 "$pub1_hex" 6 abcd::1 +n0 $SCRIPT_DIR/remove-ip wg0 "$pub1_hex" 6 abcd::20 +n0 $SCRIPT_DIR/remove-ip wg0 "$pub1_hex" 6 abcd::100 +n0 wg show wg0 allowed-ips +{ + read -r pub allowedips + [[ $pub == "$pub1" ]] + i=0 + for ip in $allowedips; do + [[ "$ip" != "192.168.0.1" ]] + [[ "$ip" != "192.168.0.20" ]] + [[ "$ip" != "192.168.0.100" ]] + [[ "$ip" != "abcd::1" ]] + [[ "$ip" != "abcd::20" ]] + [[ "$ip" != "abcd::100" ]] + ((++i)) + done + ((i == 388)) +} < <(n0 wg show wg0 allowed-ips) +ip0 link del wg0 + ! n0 wg show doesnotexist || false ip0 link add wg0 type wireguard diff --git a/tools/testing/selftests/wireguard/remove-ip.c b/tools/testing/selftests/wireguard/remove-ip.c new file mode 100644 index 0000000000000..242f922d99b56 --- /dev/null +++ b/tools/testing/selftests/wireguard/remove-ip.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/wireguard.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <netlink/socket.h> +#include <netlink/netlink.h> +#include <netlink/genl/ctrl.h> +#include <netlink/genl/genl.h> +#include <netlink/genl/family.h> + +#define CURVE25519_KEY_SIZE 32 + +const char *usage = "Usage: remove-ip INTERFACE_NAME PEER_PUBLIC_KEY_HEX IP_VERSION IP"; + +char h2b(char c) +{ + if ('0' <= c && c <= '9') + return c - '0'; + else if ('a' <= c && c <= 'f') + return 10 + (c - 'a'); + + return -1; +} + +int parse_key(const char *raw, unsigned char key[CURVE25519_KEY_SIZE]) +{ + int ret = 0; + int i; + + for (i = 0; i < CURVE25519_KEY_SIZE; i++) { + char h, l; + + h = h2b(raw[0]); + if (h < 0) + return -1; + + l = h2b(raw[1]); + if (l < 0) + return -1; + + key[i] = (h << 4) | l; + raw += 2; + } + + return 0; +} + +int main(int argc, char **argv) +{ + unsigned char addr[sizeof(struct in6_addr)]; + unsigned char pub_key[CURVE25519_KEY_SIZE]; + struct nl_sock *sock; + struct nl_msg *msg; + int addr_len; + int family; + int cidr; + int af; + + if (argc < 5) { + printf("Not enough arguments.\n\n%s\n", usage); + return -1; + } + + if (parse_key(argv[2], pub_key)) { + printf("Could not parse public key\n"); + return -1; + } + + switch (argv[3][0]) { + case '4': + af = AF_INET; + addr_len = sizeof(struct in_addr); + cidr = 32; + break; + case '6': + af = AF_INET6; + addr_len = sizeof(struct in6_addr); + cidr = 128; + break; + default: + printf("Invalid IP version\n"); + return -1; + } + + if (inet_pton(af, argv[4], &addr) <= 0) { + printf("Could not parse IP address\n"); + return -1; + } + + sock = nl_socket_alloc(); + genl_connect(sock); + family = genl_ctrl_resolve(sock, WG_GENL_NAME); + msg = nlmsg_alloc(); + genlmsg_put(msg, NL_AUTO_PID, NL_AUTO_SEQ, family, 0, NLM_F_ECHO, + WG_CMD_SET_DEVICE, WG_GENL_VERSION); + nla_put_string(msg, WGDEVICE_A_IFNAME, argv[1]); + + struct nlattr *peers = nla_nest_start(msg, WGDEVICE_A_PEERS); + struct nlattr *peer0 = nla_nest_start(msg, 0); + + nla_put(msg, WGPEER_A_PUBLIC_KEY, CURVE25519_KEY_SIZE, pub_key); + + struct nlattr *allowed_ips = nla_nest_start(msg, WGPEER_A_ALLOWEDIPS); + struct nlattr *allowed_ip0 = nla_nest_start(msg, 0); + + nla_put_u16(msg, WGALLOWEDIP_A_FAMILY, af); + nla_put(msg, WGALLOWEDIP_A_IPADDR, addr_len, &addr); + nla_put_u8(msg, WGALLOWEDIP_A_CIDR_MASK, cidr); + nla_put_u32(msg, WGALLOWEDIP_A_FLAGS, WGALLOWEDIP_F_REMOVE_ME); + nla_nest_end(msg, allowed_ip0); + nla_nest_end(msg, allowed_ips); + nla_nest_end(msg, peer0); + nla_nest_end(msg, peers); + + int err = nl_send_sync(sock, msg); + + if (err < 0) { + char message[256]; + + nl_perror(err, message); + printf("An error occurred: %d - %s\n", err, message); + } + + return err; +} -- 2.46.0.469.g59c65b2a67-goog

1 year, 1 month

2
1
0 0

[PATCH 0/2] Adding SO_PEEK_OFF for TCPv6

by jmaloy＠redhat.com

From: Jon Maloy <jmaloy(a)redhat.com> Adding SO_PEEK_OFF for TCPv6 and selftest for both TCPv4 and TCPv6. Jon Maloy (2): tcp: add SO_PEEK_OFF socket option tor TCPv6 selftests: add selftest for tcp SO_PEEK_OFF support net/ipv6/af_inet6.c | 1 + tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/tcp_so_peek_off.c | 181 ++++++++++++++++++ 3 files changed, 183 insertions(+) create mode 100644 tools/testing/selftests/net/tcp_so_peek_off.c -- 2.45.2

1 year, 1 month

4
8
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-kselftest-mirror August 2024