- Linux-kselftest-mirror - lists.linaro.org

[PATCH v3 4/6] KVM: LoongArch: selftests: Add timer interrupt test case

by Bibo Mao

Add timer test case based on common arch_timer code, timer interrupt with one-shot and period mode is tested. Signed-off-by: Bibo Mao <maobibo(a)loongson.cn> --- tools/testing/selftests/kvm/Makefile.kvm | 1 + .../kvm/include/loongarch/arch_timer.h | 84 ++++++++++++ .../kvm/include/loongarch/processor.h | 10 ++ .../selftests/kvm/lib/loongarch/processor.c | 4 +- .../selftests/kvm/loongarch/arch_timer.c | 125 ++++++++++++++++++ 5 files changed, 222 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/kvm/include/loongarch/arch_timer.h create mode 100644 tools/testing/selftests/kvm/loongarch/arch_timer.c diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index 148d427ff24b..9d01f4d0e3f9 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -210,6 +210,7 @@ TEST_GEN_PROGS_riscv += mmu_stress_test TEST_GEN_PROGS_riscv += rseq_test TEST_GEN_PROGS_riscv += steal_time +TEST_GEN_PROGS_loongarch = arch_timer TEST_GEN_PROGS_loongarch += coalesced_io_test TEST_GEN_PROGS_loongarch += demand_paging_test TEST_GEN_PROGS_loongarch += dirty_log_perf_test diff --git a/tools/testing/selftests/kvm/include/loongarch/arch_timer.h b/tools/testing/selftests/kvm/include/loongarch/arch_timer.h new file mode 100644 index 000000000000..b6399e748f72 --- /dev/null +++ b/tools/testing/selftests/kvm/include/loongarch/arch_timer.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * LoongArch Constant Timer specific interface + */ +#ifndef SELFTEST_KVM_ARCH_TIMER_H +#define SELFTEST_KVM_ARCH_TIMER_H + +#include "processor.h" +/* LoongArch timer frequency is constant 100MHZ */ +#define TIMER_FREQ (100UL << 20) +#define msec_to_cycles(msec) (TIMER_FREQ * (unsigned long)(msec) / 1000) +#define usec_to_cycles(usec) (TIMER_FREQ * (unsigned long)(usec) / 1000000) +#define cycles_to_usec(cycles) ((unsigned long)(cycles) * 1000000 / TIMER_FREQ) + +static inline unsigned long timer_get_cycles(void) +{ + unsigned long val = 0; + + __asm__ __volatile__( + "rdtime.d %0, $zero\n\t" + : "=r"(val) + : + ); + + return val; +} + +static inline void timer_set_next_cmp_ms(unsigned int msec, bool period) +{ + unsigned long val; + + val = msec_to_cycles(msec) & CSR_TCFG_VAL; + val |= CSR_TCFG_EN; + if (period) + val |= CSR_TCFG_PERIOD; + csr_write(val, LOONGARCH_CSR_TCFG); +} + +static inline void disable_timer(void) +{ + csr_write(0, LOONGARCH_CSR_TCFG); +} + +static inline unsigned long timer_get_val(void) +{ + return csr_read(LOONGARCH_CSR_TVAL); +} + +static inline unsigned long timer_get_cfg(void) +{ + return csr_read(LOONGARCH_CSR_TCFG); +} + +static inline void timer_irq_enable(void) +{ + unsigned long val; + + val = csr_read(LOONGARCH_CSR_ECFG); + val |= ECFGF_TIMER; + csr_write(val, LOONGARCH_CSR_ECFG); +} + +static inline void timer_irq_disable(void) +{ + unsigned long val; + + val = csr_read(LOONGARCH_CSR_ECFG); + val &= ~ECFGF_TIMER; + csr_write(val, LOONGARCH_CSR_ECFG); +} + +static inline void __delay(uint64_t cycles) +{ + uint64_t start = timer_get_cycles(); + + while ((timer_get_cycles() - start) < cycles) + cpu_relax(); +} + +static inline void udelay(unsigned long usec) +{ + __delay(usec_to_cycles(usec)); +} +#endif /* SELFTEST_KVM_ARCH_TIMER_H */ diff --git a/tools/testing/selftests/kvm/include/loongarch/processor.h b/tools/testing/selftests/kvm/include/loongarch/processor.h index b027f8f4dac7..61f6e215046b 100644 --- a/tools/testing/selftests/kvm/include/loongarch/processor.h +++ b/tools/testing/selftests/kvm/include/loongarch/processor.h @@ -83,6 +83,8 @@ #define LOONGARCH_CSR_PRMD 0x1 #define LOONGARCH_CSR_EUEN 0x2 #define LOONGARCH_CSR_ECFG 0x4 +#define ECFGB_TIMER 11 +#define ECFGF_TIMER (BIT_ULL(ECFGB_TIMER)) #define LOONGARCH_CSR_ESTAT 0x5 /* Exception status */ #define CSR_ESTAT_EXC_SHIFT 16 #define CSR_ESTAT_EXC_WIDTH 6 @@ -111,6 +113,14 @@ #define LOONGARCH_CSR_KS1 0x31 #define LOONGARCH_CSR_TMID 0x40 #define LOONGARCH_CSR_TCFG 0x41 +#define CSR_TCFG_VAL (BIT_ULL(48) - BIT_ULL(2)) +#define CSR_TCFG_PERIOD_SHIFT 1 +#define CSR_TCFG_PERIOD (0x1UL << CSR_TCFG_PERIOD_SHIFT) +#define CSR_TCFG_EN (0x1UL) +#define LOONGARCH_CSR_TVAL 0x42 +#define LOONGARCH_CSR_TINTCLR 0x44 /* Timer interrupt clear */ +#define CSR_TINTCLR_TI_SHIFT 0 +#define CSR_TINTCLR_TI (1 << CSR_TINTCLR_TI_SHIFT) /* TLB refill exception entry */ #define LOONGARCH_CSR_TLBRENTRY 0x88 #define LOONGARCH_CSR_TLBRSAVE 0x8b diff --git a/tools/testing/selftests/kvm/lib/loongarch/processor.c b/tools/testing/selftests/kvm/lib/loongarch/processor.c index 20ba476ccb72..436990258068 100644 --- a/tools/testing/selftests/kvm/lib/loongarch/processor.c +++ b/tools/testing/selftests/kvm/lib/loongarch/processor.c @@ -271,8 +271,8 @@ static void loongarch_vcpu_setup(struct kvm_vcpu *vcpu) TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); } - /* user mode and page enable mode */ - val = PLV_USER | CSR_CRMD_PG; + /* kernel mode and page enable mode */ + val = PLV_KERN | CSR_CRMD_PG; loongarch_set_csr(vcpu, LOONGARCH_CSR_CRMD, val); loongarch_set_csr(vcpu, LOONGARCH_CSR_PRMD, val); loongarch_set_csr(vcpu, LOONGARCH_CSR_EUEN, 1); diff --git a/tools/testing/selftests/kvm/loongarch/arch_timer.c b/tools/testing/selftests/kvm/loongarch/arch_timer.c new file mode 100644 index 000000000000..a8b7ff05faf6 --- /dev/null +++ b/tools/testing/selftests/kvm/loongarch/arch_timer.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * The test validates one-shot constant timer IRQ using CSR_TCFG and + * CSR_TVAL registers. + */ +#include "arch_timer.h" +#include "kvm_util.h" +#include "processor.h" +#include "timer_test.h" +#include "ucall_common.h" + +static void guest_irq_handler(struct ex_regs *regs) +{ + uint64_t xcnt, val, cfg, xcnt_diff_us; + unsigned int intid; + uint32_t cpu = guest_get_vcpuid(); + struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; + + intid = !!(regs->estat & BIT(INT_TI)); + + /* Make sure we are dealing with the correct timer IRQ */ + GUEST_ASSERT_EQ(intid, 1); + + cfg = timer_get_cfg(); + if (cfg & CSR_TCFG_PERIOD) { + WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter - 1); + if (shared_data->nr_iter == 0) + disable_timer(); + csr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR); + return; + } + + /* + * On physical machine, value of LOONGARCH_CSR_TVAL is BIT_ULL(48) - 1 + * On virtual machine, its value counts down from BIT_ULL(48) - 1 + */ + val = timer_get_val(); + xcnt = timer_get_cycles(); + xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt); + + /* Basic 'timer condition met' check */ + __GUEST_ASSERT(val > cfg, + "val = 0x%lx, cfg = 0x%lx, xcnt_diff_us = 0x%lx", + val, cfg, xcnt_diff_us); + + csr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR); + WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1); +} + +static void guest_test_oneshot_timer(uint32_t cpu) +{ + uint32_t irq_iter, config_iter; + uint64_t us; + struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; + + shared_data->nr_iter = 0; + shared_data->guest_stage = 0; + us = msecs_to_usecs(test_args.timer_period_ms) + test_args.timer_err_margin_us; + for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) { + shared_data->xcnt = timer_get_cycles(); + + /* Setup the next interrupt */ + timer_set_next_cmp_ms(test_args.timer_period_ms, false); + /* Setup a timeout for the interrupt to arrive */ + udelay(us); + + irq_iter = READ_ONCE(shared_data->nr_iter); + __GUEST_ASSERT(config_iter + 1 == irq_iter, + "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n" + " Guest timer interrupt was not triggered within the specified\n" + " interval, try to increase the error margin by [-e] option.\n", + config_iter + 1, irq_iter); + } +} + +static void guest_test_period_timer(uint32_t cpu) +{ + uint32_t irq_iter; + uint64_t us; + struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; + + shared_data->nr_iter = test_args.nr_iter; + shared_data->xcnt = timer_get_cycles(); + us = msecs_to_usecs(test_args.timer_period_ms) + test_args.timer_err_margin_us; + timer_set_next_cmp_ms(test_args.timer_period_ms, true); + /* Setup a timeout for the interrupt to arrive */ + udelay(us * test_args.nr_iter); + irq_iter = READ_ONCE(shared_data->nr_iter); + __GUEST_ASSERT(irq_iter == 0, + "irq_iter = 0x%x.\n" + " Guest period timer interrupt was not triggered within the specified\n" + " interval, try to increase the error margin by [-e] option.\n", + irq_iter); +} + +static void guest_code(void) +{ + uint32_t cpu = guest_get_vcpuid(); + + timer_irq_enable(); + local_irq_enable(); + guest_test_oneshot_timer(cpu); + guest_test_period_timer(cpu); + + GUEST_DONE(); +} + +struct kvm_vm *test_vm_create(void) +{ + struct kvm_vm *vm; + int nr_vcpus = test_args.nr_vcpus; + + vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus); + vm_init_descriptor_tables(vm); + vm_install_exception_handler(vm, EXCCODE_INT, guest_irq_handler); + + /* Make all the test's cmdline args visible to the guest */ + sync_global_to_guest(vm, test_args); + return vm; +} + +void test_vm_cleanup(struct kvm_vm *vm) +{ + kvm_vm_free(vm); +} -- 2.39.3

1 month, 3 weeks

1
0
0 0

[PATCH v3 3/6] KVM: LoongArch: selftests: Add exception handler register interface

by Bibo Mao

Add interrupt and exception handler register interface. When exception happens, execute registered exception handler if exists, else report error. Signed-off-by: Bibo Mao <maobibo(a)loongson.cn> --- .../kvm/include/loongarch/processor.h | 14 +++++++++ .../selftests/kvm/lib/loongarch/processor.c | 29 +++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/tools/testing/selftests/kvm/include/loongarch/processor.h b/tools/testing/selftests/kvm/include/loongarch/processor.h index 0bf120d23092..b027f8f4dac7 100644 --- a/tools/testing/selftests/kvm/include/loongarch/processor.h +++ b/tools/testing/selftests/kvm/include/loongarch/processor.h @@ -84,6 +84,11 @@ #define LOONGARCH_CSR_EUEN 0x2 #define LOONGARCH_CSR_ECFG 0x4 #define LOONGARCH_CSR_ESTAT 0x5 /* Exception status */ +#define CSR_ESTAT_EXC_SHIFT 16 +#define CSR_ESTAT_EXC_WIDTH 6 +#define CSR_ESTAT_EXC (0x3f << CSR_ESTAT_EXC_SHIFT) +#define EXCCODE_INT 0 /* Interrupt */ +#define INT_TI 11 /* Timer interrupt*/ #define LOONGARCH_CSR_ERA 0x6 /* ERA */ #define LOONGARCH_CSR_BADV 0x7 /* Bad virtual address */ #define LOONGARCH_CSR_EENTRY 0xc @@ -155,6 +160,15 @@ struct ex_regs { #define PRMD_OFFSET_EXREGS offsetof(struct ex_regs, prmd) #define EXREGS_SIZE sizeof(struct ex_regs) +#define VECTOR_NUM 64 +typedef void(*handler_fn)(struct ex_regs *); +struct handlers { + handler_fn exception_handlers[VECTOR_NUM]; +}; + +void vm_init_descriptor_tables(struct kvm_vm *vm); +void vm_install_exception_handler(struct kvm_vm *vm, int vector, handler_fn handler); + static inline void local_irq_enable(void) { unsigned int flags = CSR_CRMD_IE; diff --git a/tools/testing/selftests/kvm/lib/loongarch/processor.c b/tools/testing/selftests/kvm/lib/loongarch/processor.c index 5b5f09acf229..20ba476ccb72 100644 --- a/tools/testing/selftests/kvm/lib/loongarch/processor.c +++ b/tools/testing/selftests/kvm/lib/loongarch/processor.c @@ -11,6 +11,7 @@ #define LOONGARCH_GUEST_STACK_VADDR_MIN 0x200000 static vm_paddr_t invalid_pgtable[4]; +static vm_vaddr_t exception_handlers; static uint64_t virt_pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level) { @@ -184,6 +185,13 @@ void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) void route_exception(struct ex_regs *regs) { unsigned long pc, estat, badv; + int vector; + struct handlers *handlers; + + handlers = (struct handlers *)exception_handlers; + vector = (regs->estat & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT; + if (handlers && handlers->exception_handlers[vector]) + return handlers->exception_handlers[vector](regs); pc = regs->pc; badv = regs->badv; @@ -192,6 +200,27 @@ void route_exception(struct ex_regs *regs) while (1) ; } +void vm_init_descriptor_tables(struct kvm_vm *vm) +{ + void *addr; + + vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers), + LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA); + + addr = addr_gva2hva(vm, vm->handlers); + memset(addr, 0, vm->page_size); + exception_handlers = vm->handlers; + sync_global_to_guest(vm, exception_handlers); +} + +void vm_install_exception_handler(struct kvm_vm *vm, int vector, handler_fn handler) +{ + struct handlers *handlers = addr_gva2hva(vm, vm->handlers); + + assert(vector < VECTOR_NUM); + handlers->exception_handlers[vector] = handler; +} + void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) { int i; -- 2.39.3

1 month, 3 weeks

1
0
0 0

[PATCH v3 2/6] KVM: LoongArch: selftests: Add basic interfaces

by Bibo Mao

Add some basic function interfaces such as CSR register access, local irq enable or disable APIs. Signed-off-by: Bibo Mao <maobibo(a)loongson.cn> --- .../kvm/include/loongarch/processor.h | 52 +++++++++++++++++++ .../selftests/kvm/lib/loongarch/processor.c | 5 ++ 2 files changed, 57 insertions(+) diff --git a/tools/testing/selftests/kvm/include/loongarch/processor.h b/tools/testing/selftests/kvm/include/loongarch/processor.h index 374caddfb0db..0bf120d23092 100644 --- a/tools/testing/selftests/kvm/include/loongarch/processor.h +++ b/tools/testing/selftests/kvm/include/loongarch/processor.h @@ -113,6 +113,28 @@ #define CSR_TLBREHI_PS_SHIFT 0 #define CSR_TLBREHI_PS (0x3fUL << CSR_TLBREHI_PS_SHIFT) +#define csr_read(csr) \ +({ \ + register unsigned long __v; \ + __asm__ __volatile__( \ + "csrrd %[val], %[reg]\n\t" \ + : [val] "=r" (__v) \ + : [reg] "i" (csr) \ + : "memory"); \ + __v; \ +}) + +#define csr_write(v, csr) \ +({ \ + register unsigned long __v = v; \ + __asm__ __volatile__ ( \ + "csrwr %[val], %[reg]\n\t" \ + : [val] "+r" (__v) \ + : [reg] "i" (csr) \ + : "memory"); \ + __v; \ +}) + #define EXREGS_GPRS (32) #ifndef __ASSEMBLER__ @@ -133,6 +155,36 @@ struct ex_regs { #define PRMD_OFFSET_EXREGS offsetof(struct ex_regs, prmd) #define EXREGS_SIZE sizeof(struct ex_regs) +static inline void local_irq_enable(void) +{ + unsigned int flags = CSR_CRMD_IE; + + register unsigned int mask asm("$t0") = CSR_CRMD_IE; + + __asm__ __volatile__( + "csrxchg %[val], %[mask], %[reg]\n\t" + : [val] "+r" (flags) + : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD) + : "memory"); +} + +static inline void local_irq_disable(void) +{ + unsigned int flags = 0; + + register unsigned int mask asm("$t0") = CSR_CRMD_IE; + + __asm__ __volatile__( + "csrxchg %[val], %[mask], %[reg]\n\t" + : [val] "+r" (flags) + : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD) + : "memory"); +} + +static inline void cpu_relax(void) +{ + asm volatile("nop" ::: "memory"); +} #else #define PC_OFFSET_EXREGS ((EXREGS_GPRS + 0) * 8) #define ESTAT_OFFSET_EXREGS ((EXREGS_GPRS + 1) * 8) diff --git a/tools/testing/selftests/kvm/lib/loongarch/processor.c b/tools/testing/selftests/kvm/lib/loongarch/processor.c index 0ac1abcb71cb..5b5f09acf229 100644 --- a/tools/testing/selftests/kvm/lib/loongarch/processor.c +++ b/tools/testing/selftests/kvm/lib/loongarch/processor.c @@ -344,3 +344,8 @@ void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) regs.pc = (uint64_t)guest_code; vcpu_regs_set(vcpu, &regs); } + +uint32_t guest_get_vcpuid(void) +{ + return csr_read(LOONGARCH_CSR_CPUID); +} -- 2.39.3

1 month, 3 weeks

1
0
0 0

[PATCH v3 1/6] KVM: LoongArch: selftests: Add system registers save and restore on exception

by Bibo Mao

When system returns from exception with ertn instruction, PC comes from LOONGARCH_CSR_ERA, and CSR_CRMD comes LOONGARCH_CSR_PRMD. Here save CSR register CSR_ERA and CSR_PRMD in stack, and restore them from stack. So it can be modified by exception handler in future. Signed-off-by: Bibo Mao <maobibo(a)loongson.cn> --- tools/testing/selftests/kvm/include/loongarch/processor.h | 5 ++++- tools/testing/selftests/kvm/lib/loongarch/exception.S | 6 ++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/include/loongarch/processor.h b/tools/testing/selftests/kvm/include/loongarch/processor.h index 6427a3275e6a..374caddfb0db 100644 --- a/tools/testing/selftests/kvm/include/loongarch/processor.h +++ b/tools/testing/selftests/kvm/include/loongarch/processor.h @@ -124,18 +124,21 @@ struct ex_regs { unsigned long pc; unsigned long estat; unsigned long badv; + unsigned long prmd; }; #define PC_OFFSET_EXREGS offsetof(struct ex_regs, pc) #define ESTAT_OFFSET_EXREGS offsetof(struct ex_regs, estat) #define BADV_OFFSET_EXREGS offsetof(struct ex_regs, badv) +#define PRMD_OFFSET_EXREGS offsetof(struct ex_regs, prmd) #define EXREGS_SIZE sizeof(struct ex_regs) #else #define PC_OFFSET_EXREGS ((EXREGS_GPRS + 0) * 8) #define ESTAT_OFFSET_EXREGS ((EXREGS_GPRS + 1) * 8) #define BADV_OFFSET_EXREGS ((EXREGS_GPRS + 2) * 8) -#define EXREGS_SIZE ((EXREGS_GPRS + 3) * 8) +#define PRMD_OFFSET_EXREGS ((EXREGS_GPRS + 3) * 8) +#define EXREGS_SIZE ((EXREGS_GPRS + 4) * 8) #endif #endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/lib/loongarch/exception.S b/tools/testing/selftests/kvm/lib/loongarch/exception.S index 88bfa505c6f5..3f1e4b67c5ae 100644 --- a/tools/testing/selftests/kvm/lib/loongarch/exception.S +++ b/tools/testing/selftests/kvm/lib/loongarch/exception.S @@ -51,9 +51,15 @@ handle_exception: st.d t0, sp, ESTAT_OFFSET_EXREGS csrrd t0, LOONGARCH_CSR_BADV st.d t0, sp, BADV_OFFSET_EXREGS + csrrd t0, LOONGARCH_CSR_PRMD + st.d t0, sp, PRMD_OFFSET_EXREGS or a0, sp, zero bl route_exception + ld.d t0, sp, PC_OFFSET_EXREGS + csrwr t0, LOONGARCH_CSR_ERA + ld.d t0, sp, PRMD_OFFSET_EXREGS + csrwr t0, LOONGARCH_CSR_PRMD restore_gprs sp csrrd sp, LOONGARCH_CSR_KS0 ertn -- 2.39.3

1 month, 3 weeks

1
0
0 0

[PATCH v2] selftests: cgroup: make test_memcg_sock robust against delayed sock stats

by Guopeng Zhang

test_memcg_sock() currently requires that memory.stat's "sock " counter is exactly zero immediately after the TCP server exits. On a busy system this assumption is too strict: - Socket memory may be freed with a small delay (e.g. RCU callbacks). - memcg statistics are updated asynchronously via the rstat flushing worker, so the "sock " value in memory.stat can stay non-zero for a short period of time even after all socket memory has been uncharged. As a result, test_memcg_sock() can intermittently fail even though socket memory accounting is working correctly. Make the test more robust by polling memory.stat for the "sock " counter and allowing it some time to drop to zero instead of checking it only once. The timeout is set to 3 seconds to cover the periodic rstat flush interval (FLUSH_TIME = 2*HZ by default) plus some scheduling slack. If the counter does not become zero within the timeout, the test still fails as before. On my test system, running test_memcontrol 50 times produced: - Before this patch: 6/50 runs passed. - After this patch: 50/50 runs passed. Suggested-by: Lance Yang <lance.yang(a)linux.dev> Signed-off-by: Guopeng Zhang <zhangguopeng(a)kylinos.cn> --- v2: - Mention the periodic rstat flush interval (FLUSH_TIME = 2*HZ) in the comment and clarify the rationale for the 3s timeout. - Replace the hard-coded retry count and wait interval with macros to avoid magic numbers and make the 3s timeout calculation explicit. --- .../selftests/cgroup/test_memcontrol.c | 30 ++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index 4e1647568c5b..7bea656658a2 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -24,6 +24,9 @@ static bool has_localevents; static bool has_recursiveprot; +#define MEMCG_SOCKSTAT_WAIT_RETRIES 30 /* 3s total */ +#define MEMCG_SOCKSTAT_WAIT_INTERVAL_US (100 * 1000) /* 100 ms */ + int get_temp_fd(void) { return open(".", O_TMPFILE | O_RDWR | O_EXCL); @@ -1384,6 +1387,8 @@ static int test_memcg_sock(const char *root) int bind_retries = 5, ret = KSFT_FAIL, pid, err; unsigned short port; char *memcg; + long sock_post = -1; + int i; memcg = cg_name(root, "memcg_test"); if (!memcg) @@ -1432,7 +1437,30 @@ static int test_memcg_sock(const char *root) if (cg_read_long(memcg, "memory.current") < 0) goto cleanup; - if (cg_read_key_long(memcg, "memory.stat", "sock ")) + /* + * memory.stat is updated asynchronously via the memcg rstat + * flushing worker, which runs periodically (every 2 seconds, + * see FLUSH_TIME). On a busy system, the "sock " counter may + * stay non-zero for a short period of time after the TCP + * connection is closed and all socket memory has been + * uncharged. + * + * Poll memory.stat for up to 3 seconds (~FLUSH_TIME plus some + * scheduling slack) and require that the "sock " counter + * eventually drops to zero. + */ + for (i = 0; i < MEMCG_SOCKSTAT_WAIT_RETRIES; i++) { + sock_post = cg_read_key_long(memcg, "memory.stat", "sock "); + if (sock_post < 0) + goto cleanup; + + if (!sock_post) + break; + + usleep(MEMCG_SOCKSTAT_WAIT_INTERVAL_US); + } + + if (sock_post) goto cleanup; ret = KSFT_PASS; -- 2.25.1

1 month, 3 weeks

2
2
0 0

[PATCH] selftests: cgroup: make test_memcg_sock robust against delayed sock stats

by Guopeng Zhang

test_memcg_sock() currently requires that memory.stat's "sock " counter is exactly zero immediately after the TCP server exits. On a busy system this assumption is too strict: - Socket memory may be freed with a small delay (e.g. RCU callbacks). - memcg statistics are updated asynchronously via the rstat flushing worker, so the "sock " value in memory.stat can stay non-zero for a short period of time even after all socket memory has been uncharged. As a result, test_memcg_sock() can intermittently fail even though socket memory accounting is working correctly. Make the test more robust by polling memory.stat for the "sock " counter and allowing it some time to drop to zero instead of checking it only once. If the counter does not become zero within the timeout, the test still fails as before. On my test system, running test_memcontrol 50 times produced: - Before this patch: 6/50 runs passed. - After this patch: 50/50 runs passed. Signed-off-by: Guopeng Zhang <zhangguopeng(a)kylinos.cn> --- .../selftests/cgroup/test_memcontrol.c | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index 4e1647568c5b..86d9981cddd8 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -1384,6 +1384,8 @@ static int test_memcg_sock(const char *root) int bind_retries = 5, ret = KSFT_FAIL, pid, err; unsigned short port; char *memcg; + long sock_post = -1; + int i, retries = 30; memcg = cg_name(root, "memcg_test"); if (!memcg) @@ -1432,7 +1434,27 @@ static int test_memcg_sock(const char *root) if (cg_read_long(memcg, "memory.current") < 0) goto cleanup; - if (cg_read_key_long(memcg, "memory.stat", "sock ")) + /* + * memory.stat is updated asynchronously via the memcg rstat + * flushing worker, so the "sock " counter may stay non-zero + * for a short period of time after the TCP connection is + * closed and all socket memory has been uncharged. + * + * Poll memory.stat for up to 3 seconds and require that the + * "sock " counter eventually drops to zero. + */ + for (i = 0; i < retries; i++) { + sock_post = cg_read_key_long(memcg, "memory.stat", "sock "); + if (sock_post < 0) + goto cleanup; + + if (!sock_post) + break; + + usleep(100 * 1000); /* 100ms */ + } + + if (sock_post) goto cleanup; ret = KSFT_PASS; -- 2.25.1

1 month, 3 weeks

3
3
0 0

[PATCH net 00/11] mptcp: misc fixes for v6.18-rc7

by Matthieu Baerts (NGI0)

Here are various unrelated fixes: - Patch 1: Fix window space computation for fallback connections which can affect ACK generation. A fix for v5.11. - Patch 2: Avoid unneeded subflow-level drops due to unsynced received window. A fix for v5.11. - Patch 3: Avoid premature close for fallback connections with PREEMPT kernels. A fix for v5.12. - Patch 4: Reset instead of fallback in case of data in the MPTCP out-of-order queue. A fix for v5.7. - Patches 5-7: Avoid also sending "plain" TCP reset when closing with an MP_FASTCLOSE. A fix for v6.1. - Patches 8-9: Longer timeout for background connections in MPTCP Join selftests. An additional fix for recent patches for v5.13/v6.1. - Patches 10-11: Fix typo in a check introduce in a recent refactoring. A fix for v6.15. Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org> --- Gang Yan (2): mptcp: fix address removal logic in mptcp_pm_nl_rm_addr selftests: mptcp: add a check for 'add_addr_accepted' Matthieu Baerts (NGI0) (3): selftests: mptcp: join: fastclose: remove flaky marks selftests: mptcp: join: endpoints: longer timeout selftests: mptcp: join: userspace: longer timeout Paolo Abeni (6): mptcp: fix ack generation for fallback msk mptcp: avoid unneeded subflow-level drops mptcp: fix premature close in case of fallback mptcp: do not fallback when OoO is present mptcp: decouple mptcp fastclose from tcp close mptcp: fix duplicate reset on fastclose net/mptcp/options.c | 54 +++++++++++++++++++++- net/mptcp/pm_kernel.c | 2 +- net/mptcp/protocol.c | 59 +++++++++++++++++-------- net/mptcp/protocol.h | 3 +- tools/testing/selftests/net/mptcp/mptcp_join.sh | 27 ++++++----- 5 files changed, 113 insertions(+), 32 deletions(-) --- base-commit: 8e0a754b0836d996802713bbebc87bc1cc17925c change-id: 20251117-net-mptcp-misc-fixes-6-18-rc6-835d94cdc095 Best regards, -- Matthieu Baerts (NGI0) <matttbe(a)kernel.org>

1 month, 3 weeks

3
14
0 0

[PATCH net-next v2 0/4] netconsole: Allow userdata buffer to grow dynamically

by Gustavo Luiz Duarte

The current netconsole implementation allocates a static buffer for extradata (userdata + sysdata) with a fixed size of MAX_EXTRADATA_ENTRY_LEN * MAX_EXTRADATA_ITEMS bytes for every target, regardless of whether userspace actually uses this feature. This forces us to keep MAX_EXTRADATA_ITEMS small (16), which is restrictive for users who need to attach more metadata to their log messages. This patch series enables dynamic allocation of the userdata buffer, allowing it to grow on-demand based on actual usage. The series: 1. Refactors send_fragmented_body() to simplify handling of separated userdata and sysdata (patch 1/4) 2. Splits userdata and sysdata into separate buffers (patch 2/4) 3. Implements dynamic allocation for the userdata buffer (patch 3/4) 4. Increases MAX_USERDATA_ITEMS from 16 to 256 now that we can do so without memory waste (patch 4/4) Benefits: - No memory waste when userdata is not used - Targets that use userdata only consume what they need - Users can attach significantly more metadata without impacting systems that don't use this feature Signed-off-by: Gustavo Luiz Duarte <gustavold(a)gmail.com> --- Changes in v2: - Added null pointer checks for userdata and sysdata buffers - Added MAX_SYSDATA_ITEMS to enum sysdata_feature - Moved code out of ifdef in send_msg_no_fragmentation() - Renamed variables in send_fragmented_body() to make it easier to reason about the code - Link to v1: https://lore.kernel.org/r/20251105-netconsole_dynamic_extradata-v1-0-142890… --- Gustavo Luiz Duarte (4): netconsole: Simplify send_fragmented_body() netconsole: Split userdata and sysdata netconsole: Dynamic allocation of userdata buffer netconsole: Increase MAX_USERDATA_ITEMS drivers/net/netconsole.c | 370 ++++++++++----------- .../selftests/drivers/net/netcons_overflow.sh | 2 +- 2 files changed, 179 insertions(+), 193 deletions(-) --- base-commit: 68fa5b092efab37a4f08a47b22bb8ca98f7f6223 change-id: 20251007-netconsole_dynamic_extradata-21bd9d726568 Best regards, -- Gustavo Duarte <gustavold(a)meta.com>

1 month, 3 weeks

2
11
0 0

[PATCH] selftests: tracing: Update fprobe selftest for ftrace based fprobe

by Masami Hiramatsu (Google)

From: Masami Hiramatsu (Google) <mhiramat(a)kernel.org> Since the ftrace fprobe is both fgraph and ftrace based implemented, the selftest needs to be updated. This does not count the actual number of lines, but just check the differences. Signed-off-by: Masami Hiramatsu (Google) <mhiramat(a)kernel.org> --- .../ftrace/test.d/dynevent/add_remove_fprobe.tc | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc index 2506f464811b..47067a5e3cb0 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc @@ -28,25 +28,21 @@ test -d events/fprobes/myevent1 test -d events/fprobes/myevent2 echo 1 > events/fprobes/myevent1/enable -# Make sure the event is attached and is the only one +# Make sure the event is attached. grep -q $PLACE enabled_functions cnt=`cat enabled_functions | wc -l` -if [ $cnt -ne $((ocnt + 1)) ]; then +if [ $cnt -eq $ocnt ]; then exit_fail fi echo 1 > events/fprobes/myevent2/enable -# It should till be the only attached function -cnt=`cat enabled_functions | wc -l` -if [ $cnt -ne $((ocnt + 1)) ]; then - exit_fail -fi +cnt2=`cat enabled_functions | wc -l` echo 1 > events/fprobes/myevent3/enable # If the function is different, the attached function should be increased grep -q $PLACE2 enabled_functions cnt=`cat enabled_functions | wc -l` -if [ $cnt -ne $((ocnt + 2)) ]; then +if [ $cnt -eq $cnt2 ]; then exit_fail fi @@ -56,12 +52,6 @@ echo "-:myevent2" >> dynamic_events grep -q myevent1 dynamic_events ! grep -q myevent2 dynamic_events -# should still have 2 left -cnt=`cat enabled_functions | wc -l` -if [ $cnt -ne $((ocnt + 2)) ]; then - exit_fail -fi - echo 0 > events/fprobes/enable echo > dynamic_events

1 month, 3 weeks

3
3
0 0

[PATCH] KVM: selftests: Add SYNC after guest ITS setup in vgic_lpi_stress

by Maximilian Dittgen

vgic_lpi_stress sends MAPTI and MAPC commands during guest GIC setup to map interrupt events to ITT entries and collection IDs to redistributors, respectively. Theoretically, we have no guarantee that the ITS will finish handling these mapping commands before the selftest calls KVM_SIGNAL_MSI to inject LPIs to the guest. If LPIs are injected before ITS mapping completes, the ITS cannot properly pass the interrupt on to the redistributor. In practice, KVM processes ITS commands synchronously, so SYNC calls are functionally unnecessary and ignored in vgic_its_handle_command(). However, selftests should test based on ARM specification and be blind to KVM-specific implementation optimizations. Thus, we must update the test to be architecturally compliant and logically correct. Fix by adding a SYNC command to the selftests ITS library, then calling SYNC after ITS mapping to ensure mapping completes before signal_lpi() writes to GITS_TRANSLATER. This patch depends on commit a24f7afce048 ("KVM: selftests: fix MAPC RDbase target formatting in vgic_lpi_stress"), which is queued in kvmarm/fixes. Signed-off-by: Maximilian Dittgen <mdittgen(a)amazon.de> --- Validated by the following debug logging to the GITS_CMD_SYNC handler in vgic_its_handle_command(): kvm_info("ITS SYNC command: %016llx %016llx %016llx %016llx\n", its_cmd[0], its_cmd[1], its_cmd[2], its_cmd[3]); Initialized a selftest guest with 4 vCPUs by: ./vgic_lpi_stress -v 4 Confirmed that an ITS SYNC was successfully called for all 4 vCPUs: kvm [5094]: ITS SYNC command: 0000000000000005 0000000000000000 0000000000000000 0000000000000000 kvm [5094]: ITS SYNC command: 0000000000000005 0000000000000000 0000000000010000 0000000000000000 kvm [5094]: ITS SYNC command: 0000000000000005 0000000000000000 0000000000020000 0000000000000000 kvm [5094]: ITS SYNC command: 0000000000000005 0000000000000000 0000000000030000 0000000000000000 --- tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c | 4 ++++ .../testing/selftests/kvm/include/arm64/gic_v3_its.h | 1 + tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c | 11 +++++++++++ 3 files changed, 16 insertions(+) diff --git a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c index 687d04463983..e857a605f577 100644 --- a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c +++ b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c @@ -118,6 +118,10 @@ static void guest_setup_gic(void) guest_setup_its_mappings(); guest_invalidate_all_rdists(); + + /* SYNC to ensure ITS setup is complete */ + for (cpuid = 0; cpuid < test_data.nr_cpus; cpuid++) + its_send_sync_cmd(test_data.cmdq_base_va, cpuid); } static void guest_code(size_t nr_lpis) diff --git a/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h index 3722ed9c8f96..58feef3eb386 100644 --- a/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h +++ b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h @@ -15,5 +15,6 @@ void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool val void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id, u32 collection_id, u32 intid); void its_send_invall_cmd(void *cmdq_base, u32 collection_id); +void its_send_sync_cmd(void *cmdq_base, u32 vcpu_id); #endif // __SELFTESTS_GIC_V3_ITS_H__ diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c index 0e2f8ed90f30..d9ee331074ea 100644 --- a/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c +++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c @@ -253,3 +253,14 @@ void its_send_invall_cmd(void *cmdq_base, u32 collection_id) its_send_cmd(cmdq_base, &cmd); } + +void its_send_sync_cmd(void *cmdq_base, u32 vcpu_id) +{ + struct its_cmd_block cmd = {}; + + its_encode_cmd(&cmd, GITS_CMD_SYNC); + its_encode_target(&cmd, procnum_to_rdbase(vcpu_id)); + + its_send_cmd(cmdq_base, &cmd); +} + -- 2.50.1 (Apple Git-155) Amazon Web Services Development Center Germany GmbH Tamara-Danz-Str. 13 10243 Berlin Geschaeftsfuehrung: Christian Schlaeger, Christof Hellmis Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B Sitz: Berlin Ust-ID: DE 365 538 597

1 month, 3 weeks

3
5
0 0

[PATCH] selftests/dma: fix invalid array access in printf

by Zhang Chujun

The printf statement attempts to print the DMA direction string using the syntax 'dir[directions]', which is an invalid array access. The variable 'dir' is an integer, and 'directions' is a char pointer array. This incorrect syntax should be 'directions[dir]', using 'dir' as the index into the 'directions' array. Fix this by correcting the array access from 'dir[directions]' to 'directions[dir]'. Signed-off-by: Zhang Chujun <zhangchujun(a)cmss.chinamobile.com> diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c index b12f1f9babf8..b925756373ce 100644 --- a/tools/testing/selftests/dma/dma_map_benchmark.c +++ b/tools/testing/selftests/dma/dma_map_benchmark.c @@ -118,7 +118,7 @@ int main(int argc, char **argv) } printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s granule: %d\n", - threads, seconds, node, dir[directions], granule); + threads, seconds, node, directions[dir], granule); printf("average map latency(us):%.1f standard deviation:%.1f\n", map.avg_map_100ns/10.0, map.map_stddev/10.0); printf("average unmap latency(us):%.1f standard deviation:%.1f\n", -- 2.50.1.windows.1

1 month, 3 weeks

2
3
0 0

[PATCH RESEND v3] selftests/run_kselftest.sh: exit with error if tests fail

by Brendan Jackman

Parsing KTAP is quite an inconvenience, but most of the time the thing you really want to know is "did anything fail"? Let's give the user the his information without them needing to parse anything. Because of the use of subshells and namespaces, this needs to be communicated via a file. Just write arbitrary data into the file and treat non-empty content as a signal that something failed. In case any user depends on the current behaviour, such as running this from a script with `set -e` and parsing the result for failures afterwards, add a flag they can set to get the old behaviour, namely --no-error-on-fail. Signed-off-by: Brendan Jackman <jackmanb(a)google.com> --- Changes in v3: - Fixed quoting - Link to v2: https://lore.kernel.org/r/20251014-b4-ksft-error-on-fail-v2-1-b3e2657237b8@… Changes in v2: - Fixed bug in report_failure() - Made error-on-fail the default - Link to v1: https://lore.kernel.org/r/20251007-b4-ksft-error-on-fail-v1-1-71bf058f5662@… --- tools/testing/selftests/kselftest/runner.sh | 14 ++++++++++---- tools/testing/selftests/run_kselftest.sh | 14 ++++++++++++++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh index 2c3c58e65a419f5ee8d7dc51a37671237a07fa0b..3a62039fa6217f3453423ff011575d0a1eb8c275 100644 --- a/tools/testing/selftests/kselftest/runner.sh +++ b/tools/testing/selftests/kselftest/runner.sh @@ -44,6 +44,12 @@ tap_timeout() fi } +report_failure() +{ + echo "not ok $*" + echo "$*" >> "$kselftest_failures_file" +} + run_one() { DIR="$1" @@ -105,7 +111,7 @@ run_one() echo "# $TEST_HDR_MSG" if [ ! -e "$TEST" ]; then echo "# Warning: file $TEST is missing!" - echo "not ok $test_num $TEST_HDR_MSG" + report_failure "$test_num $TEST_HDR_MSG" else if [ -x /usr/bin/stdbuf ]; then stdbuf="/usr/bin/stdbuf --output=L " @@ -123,7 +129,7 @@ run_one() interpreter=$(head -n 1 "$TEST" | cut -c 3-) cmd="$stdbuf $interpreter ./$BASENAME_TEST" else - echo "not ok $test_num $TEST_HDR_MSG" + report_failure "$test_num $TEST_HDR_MSG" return fi fi @@ -137,9 +143,9 @@ run_one() echo "ok $test_num $TEST_HDR_MSG # SKIP" elif [ $rc -eq $timeout_rc ]; then \ echo "#" - echo "not ok $test_num $TEST_HDR_MSG # TIMEOUT $kselftest_timeout seconds" + report_failure "$test_num $TEST_HDR_MSG # TIMEOUT $kselftest_timeout seconds" else - echo "not ok $test_num $TEST_HDR_MSG # exit=$rc" + report_failure "$test_num $TEST_HDR_MSG # exit=$rc" fi) cd - >/dev/null fi diff --git a/tools/testing/selftests/run_kselftest.sh b/tools/testing/selftests/run_kselftest.sh index 0443beacf3621ae36cb12ffd57f696ddef3526b5..d4be97498b32e975c63a1167d3060bdeba674c8c 100755 --- a/tools/testing/selftests/run_kselftest.sh +++ b/tools/testing/selftests/run_kselftest.sh @@ -33,6 +33,7 @@ Usage: $0 [OPTIONS] -c | --collection COLLECTION Run all tests from COLLECTION -l | --list List the available collection:test entries -d | --dry-run Don't actually run any tests + -f | --no-error-on-fail Don't exit with an error just because tests failed -n | --netns Run each test in namespace -h | --help Show this usage info -o | --override-timeout Number of seconds after which we timeout @@ -44,6 +45,7 @@ COLLECTIONS="" TESTS="" dryrun="" kselftest_override_timeout="" +ERROR_ON_FAIL=true while true; do case "$1" in -s | --summary) @@ -65,6 +67,9 @@ while true; do -d | --dry-run) dryrun="echo" shift ;; + -f | --no-error-on-fail) + ERROR_ON_FAIL=false + shift ;; -n | --netns) RUN_IN_NETNS=1 shift ;; @@ -105,9 +110,18 @@ if [ -n "$TESTS" ]; then available="$(echo "$valid" | sed -e 's/ /\n/g')" fi +kselftest_failures_file="$(mktemp --tmpdir kselftest-failures-XXXXXX)" +export kselftest_failures_file + collections=$(echo "$available" | cut -d: -f1 | sort | uniq) for collection in $collections ; do [ -w /dev/kmsg ] && echo "kselftest: Running tests in $collection" >> /dev/kmsg tests=$(echo "$available" | grep "^$collection:" | cut -d: -f2) ($dryrun cd "$collection" && $dryrun run_many $tests) done + +failures="$(cat "$kselftest_failures_file")" +rm "$kselftest_failures_file" +if "$ERROR_ON_FAIL" && [ "$failures" ]; then + exit 1 +fi --- base-commit: 8f5ae30d69d7543eee0d70083daf4de8fe15d585 change-id: 20251007-b4-ksft-error-on-fail-0c2cb3246041 Best regards, -- Brendan Jackman <jackmanb(a)google.com>

1 month, 3 weeks

2
1
0 0

[PATCH v2 00/18] vfio: selftests: Support for multi-device tests

by David Matlack

This series adds support for tests that use multiple devices, and adds one new test, vfio_pci_device_init_perf_test, which measures parallel device initialization time to demonstrate the improvement from commit e908f58b6beb ("vfio/pci: Separate SR-IOV VF dev_set"). This series also breaks apart the monolithic vfio_util.h and vfio_pci_device.c into separate files, to account for all the new code. This required quite a bit of code motion so the diffstat looks large. The final layout is more granular and provides a better separation of the IOMMU code from the device code. Final layout: C files: - tools/testing/selftests/vfio/lib/iommu.c - tools/testing/selftests/vfio/lib/iova_allocator.c - tools/testing/selftests/vfio/lib/libvfio.c - tools/testing/selftests/vfio/lib/vfio_pci_device.c - tools/testing/selftests/vfio/lib/vfio_pci_driver.c H files: - tools/testing/selftests/vfio/lib/include/libvfio.h - tools/testing/selftests/vfio/lib/include/libvfio/assert.h - tools/testing/selftests/vfio/lib/include/libvfio/iommu.h - tools/testing/selftests/vfio/lib/include/libvfio/iova_allocator.h - tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_device.h - tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_driver.h Notably, vfio_util.h is now gone and replaced with libvfio.h. This series is based on vfio/next plus Alex Mastro's series to add the IOVA allocator [1]. It should apply cleanly to vfio/next once Alex's series is merged into 6.18 and then into vfio/next. This series can be found on GitHub: https://github.com/dmatlack/linux/tree/vfio/selftests/init_perf_test/v2 [1] https://lore.kernel.org/kvm/20251111-iova-ranges-v3-0-7960244642c5@fb.com/ Cc: Alex Mastro <amastro(a)fb.com> Cc: Jason Gunthorpe <jgg(a)nvidia.com> Cc: Josh Hilke <jrhilke(a)google.com> Cc: Raghavendra Rao Ananta <rananta(a)google.com> Cc: Vipin Sharma <vipinsh(a)google.com> v2: - Require tests to call iommu_init() and manage struct iommu objects rather than implicitly doing it in vfio_pci_device_init(). - Drop all the device wrappers for IOMMU methods and require tests to interact with the iommu_*() helper functions directly. - Add a commit to eliminate INVALID_IOVA. This is a simple cleanup I've been meaning to make. - Upgrade some driver logging to error (Raghavendra) - Remove plurality from helper function that fetches BDF from environment variable (Raghavendra) - Fix cleanup.sh to only delete the device directory when cleaning up all devices (Raghavendra) v1: https://lore.kernel.org/kvm/20251008232531.1152035-1-dmatlack@google.com/ David Matlack (18): vfio: selftests: Move run.sh into scripts directory vfio: selftests: Split run.sh into separate scripts vfio: selftests: Allow passing multiple BDFs on the command line vfio: selftests: Rename struct vfio_iommu_mode to iommu_mode vfio: selftests: Introduce struct iommu vfio: selftests: Support multiple devices in the same container/iommufd vfio: selftests: Eliminate overly chatty logging vfio: selftests: Prefix logs with device BDF where relevant vfio: selftests: Upgrade driver logging to dev_err() vfio: selftests: Rename struct vfio_dma_region to dma_region vfio: selftests: Move IOMMU library code into iommu.c vfio: selftests: Move IOVA allocator into iova_allocator.c vfio: selftests: Stop passing device for IOMMU operations vfio: selftests: Rename vfio_util.h to libvfio.h vfio: selftests: Move vfio_selftests_*() helpers into libvfio.c vfio: selftests: Split libvfio.h into separate header files vfio: selftests: Eliminate INVALID_IOVA vfio: selftests: Add vfio_pci_device_init_perf_test tools/testing/selftests/vfio/Makefile | 9 +- .../selftests/vfio/lib/drivers/dsa/dsa.c | 36 +- .../selftests/vfio/lib/drivers/ioat/ioat.c | 18 +- .../selftests/vfio/lib/include/libvfio.h | 26 + .../vfio/lib/include/libvfio/assert.h | 54 ++ .../vfio/lib/include/libvfio/iommu.h | 76 +++ .../vfio/lib/include/libvfio/iova_allocator.h | 23 + .../lib/include/libvfio/vfio_pci_device.h | 125 ++++ .../lib/include/libvfio/vfio_pci_driver.h | 97 +++ .../selftests/vfio/lib/include/vfio_util.h | 331 ----------- tools/testing/selftests/vfio/lib/iommu.c | 465 +++++++++++++++ .../selftests/vfio/lib/iova_allocator.c | 94 +++ tools/testing/selftests/vfio/lib/libvfio.c | 78 +++ tools/testing/selftests/vfio/lib/libvfio.mk | 5 +- .../selftests/vfio/lib/vfio_pci_device.c | 555 +----------------- .../selftests/vfio/lib/vfio_pci_driver.c | 16 +- tools/testing/selftests/vfio/run.sh | 109 ---- .../testing/selftests/vfio/scripts/cleanup.sh | 41 ++ tools/testing/selftests/vfio/scripts/lib.sh | 42 ++ tools/testing/selftests/vfio/scripts/run.sh | 16 + tools/testing/selftests/vfio/scripts/setup.sh | 48 ++ .../selftests/vfio/vfio_dma_mapping_test.c | 46 +- .../selftests/vfio/vfio_iommufd_setup_test.c | 2 +- .../vfio/vfio_pci_device_init_perf_test.c | 167 ++++++ .../selftests/vfio/vfio_pci_device_test.c | 12 +- .../selftests/vfio/vfio_pci_driver_test.c | 51 +- 26 files changed, 1479 insertions(+), 1063 deletions(-) create mode 100644 tools/testing/selftests/vfio/lib/include/libvfio.h create mode 100644 tools/testing/selftests/vfio/lib/include/libvfio/assert.h create mode 100644 tools/testing/selftests/vfio/lib/include/libvfio/iommu.h create mode 100644 tools/testing/selftests/vfio/lib/include/libvfio/iova_allocator.h create mode 100644 tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_device.h create mode 100644 tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_driver.h delete mode 100644 tools/testing/selftests/vfio/lib/include/vfio_util.h create mode 100644 tools/testing/selftests/vfio/lib/iommu.c create mode 100644 tools/testing/selftests/vfio/lib/iova_allocator.c create mode 100644 tools/testing/selftests/vfio/lib/libvfio.c delete mode 100755 tools/testing/selftests/vfio/run.sh create mode 100755 tools/testing/selftests/vfio/scripts/cleanup.sh create mode 100755 tools/testing/selftests/vfio/scripts/lib.sh create mode 100755 tools/testing/selftests/vfio/scripts/run.sh create mode 100755 tools/testing/selftests/vfio/scripts/setup.sh create mode 100644 tools/testing/selftests/vfio/vfio_pci_device_init_perf_test.c base-commit: 0ed3a30fd996cb0cac872432cf25185fda7e5316 prerequisite-patch-id: dcf23dcc1198960bda3102eefaa21df60b2e4c54 prerequisite-patch-id: e32e56d5bf7b6c7dd40d737aa3521560407e00f5 prerequisite-patch-id: 4f79a41bf10a4c025ba5f433551b46035aa15878 prerequisite-patch-id: f903a45f0c32319138cd93a007646ab89132b18c -- 2.52.0.rc1.455.g30608eb744-goog

1 month, 3 weeks

2
26
0 0

[PATCH net-next v2 00/12] selftests: drv-net: convert GRO and Toeplitz tests to work for drivers in NIPA

by Jakub Kicinski

Main objective of this series is to convert the gro.sh and toeplitz.sh tests to be "NIPA-compatible" - meaning make use of the Python env, which lets us run the tests against either netdevsim or a real device. The tests seem to have been written with a different flow in mind. Namely they source different bash "setup" scripts depending on arguments passed to the test. While I have nothing against the use of bash and the overall architecture - the existing code needs quite a bit of work (don't assume MAC/IP addresses, support remote endpoint over SSH). If I'm the one fixing it, I'd rather convert them to our "simplistic" Python. This series rewrites the tests in Python while addressing their shortcomings. The functionality of running the test over loopback on a real device is retained but with a different method of invocation (see the last patch). Once again we are dealing with a script which run over a variety of protocols (combination of [ipv4, ipv6, ipip] x [tcp, udp]). The first 4 patches add support for test variants to our scripts. We use the term "variant" in the same sense as the C kselftest_harness.h - variant is just a set of static input arguments. Note that neither GRO nor the Toeplitz test fully passes for me on any HW I have access to. But this is unrelated to the conversion. This series is not making any real functional changes to the tests, it is limited to improving the "test harness" scripts. v2: [patch 5] fix accidental modification of gitignore [patch 8] fix typo in "compared" [patch 9] fix typo I -> It [patch 10] fix typoe configure -> configured v1: https://lore.kernel.org/20251117205810.1617533-1-kuba@kernel.org Jakub Kicinski (12): selftests: net: py: coding style improvements selftests: net: py: extract the case generation logic selftests: net: py: add test variants selftests: drv-net: xdp: use variants for qstat tests selftests: net: relocate gro and toeplitz tests to drivers/net selftests: net: py: support ksft ready without wait selftests: net: py: read ip link info about remote dev netdevsim: pass packets thru GRO on Rx selftests: drv-net: add a Python version of the GRO test selftests: drv-net: hw: convert the Toeplitz test to Python netdevsim: add loopback support selftests: net: remove old setup_* scripts tools/testing/selftests/drivers/net/Makefile | 2 + .../testing/selftests/drivers/net/hw/Makefile | 6 +- tools/testing/selftests/net/Makefile | 7 - tools/testing/selftests/net/lib/Makefile | 1 + drivers/net/netdevsim/netdev.c | 26 ++- .../testing/selftests/{ => drivers}/net/gro.c | 5 +- .../{net => drivers/net/hw}/toeplitz.c | 7 +- .../testing/selftests/drivers/net/.gitignore | 1 + tools/testing/selftests/drivers/net/gro.py | 161 ++++++++++++++ .../selftests/drivers/net/hw/.gitignore | 1 + .../drivers/net/hw/lib/py/__init__.py | 4 +- .../selftests/drivers/net/hw/toeplitz.py | 208 ++++++++++++++++++ .../selftests/drivers/net/lib/py/__init__.py | 4 +- .../selftests/drivers/net/lib/py/env.py | 2 + tools/testing/selftests/drivers/net/xdp.py | 42 ++-- tools/testing/selftests/net/.gitignore | 2 - tools/testing/selftests/net/gro.sh | 105 --------- .../selftests/net/lib/ksft_setup_loopback.sh | 111 ++++++++++ .../testing/selftests/net/lib/py/__init__.py | 5 +- tools/testing/selftests/net/lib/py/ksft.py | 93 ++++++-- tools/testing/selftests/net/lib/py/nsim.py | 2 +- tools/testing/selftests/net/lib/py/utils.py | 20 +- tools/testing/selftests/net/setup_loopback.sh | 120 ---------- tools/testing/selftests/net/setup_veth.sh | 45 ---- tools/testing/selftests/net/toeplitz.sh | 199 ----------------- .../testing/selftests/net/toeplitz_client.sh | 28 --- 26 files changed, 630 insertions(+), 577 deletions(-) rename tools/testing/selftests/{ => drivers}/net/gro.c (99%) rename tools/testing/selftests/{net => drivers/net/hw}/toeplitz.c (99%) create mode 100755 tools/testing/selftests/drivers/net/gro.py create mode 100755 tools/testing/selftests/drivers/net/hw/toeplitz.py delete mode 100755 tools/testing/selftests/net/gro.sh create mode 100755 tools/testing/selftests/net/lib/ksft_setup_loopback.sh delete mode 100644 tools/testing/selftests/net/setup_loopback.sh delete mode 100644 tools/testing/selftests/net/setup_veth.sh delete mode 100755 tools/testing/selftests/net/toeplitz.sh delete mode 100755 tools/testing/selftests/net/toeplitz_client.sh -- 2.51.1

1 month, 3 weeks

3
29
0 0

[bpf-next v1 1/5] selftests/bpf: use sockaddr_storage instead of addr_port in cls_redirect test

by Hoyeon Lee

The cls_redirect test uses a custom addr_port structure to represent IPv4/IPv6 addresses and ports. This custom wrapper requires extra conversion logic and specific helpers such as fill_addr_port(), which are no longer necessary when using standard socket address structures. This commit replaces addr_port with the standard sockaddr_storage so that test handles address families and ports using the native socket types. This removes the custom helper, eliminates redundant casts, and simplifies tuple handling without functional changes. Signed-off-by: Hoyeon Lee <hoyeon.lee(a)suse.com> --- .../selftests/bpf/prog_tests/cls_redirect.c | 95 ++++++------------- 1 file changed, 30 insertions(+), 65 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c index 34b59f6baca1..9a7d365f9b24 100644 --- a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c @@ -22,79 +22,42 @@ static int duration = 0; -struct addr_port { - in_port_t port; - union { - struct in_addr in_addr; - struct in6_addr in6_addr; - }; -}; - struct tuple { int family; - struct addr_port src; - struct addr_port dst; + struct sockaddr_storage src; + struct sockaddr_storage dst; }; -static bool fill_addr_port(const struct sockaddr *sa, struct addr_port *ap) -{ - const struct sockaddr_in6 *in6; - const struct sockaddr_in *in; - - switch (sa->sa_family) { - case AF_INET: - in = (const struct sockaddr_in *)sa; - ap->in_addr = in->sin_addr; - ap->port = in->sin_port; - return true; - - case AF_INET6: - in6 = (const struct sockaddr_in6 *)sa; - ap->in6_addr = in6->sin6_addr; - ap->port = in6->sin6_port; - return true; - - default: - return false; - } -} -static bool set_up_conn(const struct sockaddr *addr, socklen_t len, int type, +static bool set_up_conn(const struct sockaddr_storage *addr, socklen_t len, int type, int *server, int *conn, struct tuple *tuple) { struct sockaddr_storage ss; socklen_t slen = sizeof(ss); - struct sockaddr *sa = (struct sockaddr *)&ss; - *server = start_server_addr(type, (struct sockaddr_storage *)addr, len, NULL); + *server = start_server_addr(type, addr, len, NULL); if (*server < 0) return false; - if (CHECK_FAIL(getsockname(*server, sa, &slen))) + if (CHECK_FAIL(getsockname(*server, (struct sockaddr *)&ss, &slen))) goto close_server; - *conn = connect_to_addr(type, (struct sockaddr_storage *)sa, slen, NULL); + *conn = connect_to_addr(type, &ss, slen, NULL); if (*conn < 0) goto close_server; /* We want to simulate packets arriving at conn, so we have to * swap src and dst. */ - slen = sizeof(ss); - if (CHECK_FAIL(getsockname(*conn, sa, &slen))) + slen = sizeof(tuple->dst); + if (CHECK_FAIL(getsockname(*conn, (struct sockaddr *)&tuple->dst, &slen))) goto close_conn; - if (CHECK_FAIL(!fill_addr_port(sa, &tuple->dst))) + slen = sizeof(tuple->src); + if (CHECK_FAIL(getpeername(*conn, (struct sockaddr *)&tuple->src, &slen))) goto close_conn; - slen = sizeof(ss); - if (CHECK_FAIL(getpeername(*conn, sa, &slen))) - goto close_conn; - - if (CHECK_FAIL(!fill_addr_port(sa, &tuple->src))) - goto close_conn; - - tuple->family = ss.ss_family; + tuple->family = tuple->dst.ss_family; return true; close_conn: @@ -110,17 +73,16 @@ static socklen_t prepare_addr(struct sockaddr_storage *addr, int family) { struct sockaddr_in *addr4; struct sockaddr_in6 *addr6; + memset(addr, 0, sizeof(*addr)); switch (family) { case AF_INET: addr4 = (struct sockaddr_in *)addr; - memset(addr4, 0, sizeof(*addr4)); addr4->sin_family = family; addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); return sizeof(*addr4); case AF_INET6: addr6 = (struct sockaddr_in6 *)addr; - memset(addr6, 0, sizeof(*addr6)); addr6->sin6_family = family; addr6->sin6_addr = in6addr_loopback; return sizeof(*addr6); @@ -244,7 +206,11 @@ static void encap_init(encap_headers_t *encap, uint8_t hop_count, uint8_t proto) static size_t build_input(const struct test_cfg *test, void *const buf, const struct tuple *tuple) { - in_port_t sport = tuple->src.port; + struct sockaddr_in6 *src_in6 = (struct sockaddr_in6 *)&tuple->src; + struct sockaddr_in6 *dst_in6 = (struct sockaddr_in6 *)&tuple->dst; + struct sockaddr_in *src_in = (struct sockaddr_in *)&tuple->src; + struct sockaddr_in *dst_in = (struct sockaddr_in *)&tuple->dst; + in_port_t sport, dport; encap_headers_t encap; struct iphdr ip; struct ipv6hdr ipv6; @@ -254,6 +220,9 @@ static size_t build_input(const struct test_cfg *test, void *const buf, uint8_t *p = buf; int proto; + sport = (tuple->family == AF_INET) ? src_in->sin_port : src_in6->sin6_port; + dport = (tuple->family == AF_INET) ? dst_in->sin_port : dst_in6->sin6_port; + proto = IPPROTO_IPIP; if (tuple->family == AF_INET6) proto = IPPROTO_IPV6; @@ -277,8 +246,8 @@ static size_t build_input(const struct test_cfg *test, void *const buf, .version = 4, .ttl = IPDEFTTL, .protocol = proto, - .saddr = tuple->src.in_addr.s_addr, - .daddr = tuple->dst.in_addr.s_addr, + .saddr = src_in->sin_addr.s_addr, + .daddr = dst_in->sin_addr.s_addr, }; p = mempcpy(p, &ip, sizeof(ip)); break; @@ -287,8 +256,8 @@ static size_t build_input(const struct test_cfg *test, void *const buf, .version = 6, .hop_limit = IPDEFTTL, .nexthdr = proto, - .saddr = tuple->src.in6_addr, - .daddr = tuple->dst.in6_addr, + .saddr = src_in6->sin6_addr, + .daddr = dst_in6->sin6_addr, }; p = mempcpy(p, &ipv6, sizeof(ipv6)); break; @@ -303,18 +272,16 @@ static size_t build_input(const struct test_cfg *test, void *const buf, case TCP: tcp = (struct tcphdr){ .source = sport, - .dest = tuple->dst.port, + .dest = dport, + .syn = (test->flags == SYN), + .ack = (test->flags == ACK), }; - if (test->flags == SYN) - tcp.syn = true; - if (test->flags == ACK) - tcp.ack = true; p = mempcpy(p, &tcp, sizeof(tcp)); break; case UDP: udp = (struct udphdr){ .source = sport, - .dest = tuple->dst.port, + .dest = dport, }; p = mempcpy(p, &udp, sizeof(udp)); break; @@ -339,25 +306,23 @@ static void test_cls_redirect_common(struct bpf_program *prog) LIBBPF_OPTS(bpf_test_run_opts, tattr); int families[] = { AF_INET, AF_INET6 }; struct sockaddr_storage ss; - struct sockaddr *addr; socklen_t slen; int i, j, err, prog_fd; int servers[__NR_KIND][ARRAY_SIZE(families)] = {}; int conns[__NR_KIND][ARRAY_SIZE(families)] = {}; struct tuple tuples[__NR_KIND][ARRAY_SIZE(families)]; - addr = (struct sockaddr *)&ss; for (i = 0; i < ARRAY_SIZE(families); i++) { slen = prepare_addr(&ss, families[i]); if (CHECK_FAIL(!slen)) goto cleanup; - if (CHECK_FAIL(!set_up_conn(addr, slen, SOCK_DGRAM, + if (CHECK_FAIL(!set_up_conn(&ss, slen, SOCK_DGRAM, &servers[UDP][i], &conns[UDP][i], &tuples[UDP][i]))) goto cleanup; - if (CHECK_FAIL(!set_up_conn(addr, slen, SOCK_STREAM, + if (CHECK_FAIL(!set_up_conn(&ss, slen, SOCK_STREAM, &servers[TCP][i], &conns[TCP][i], &tuples[TCP][i]))) goto cleanup; -- 2.51.1

1 month, 3 weeks

2
3
0 0

[PATCH v4 00/10] riscv: Add Zalasr ISA extension support

by Xu Lu

This patch adds support for the Zalasr ISA extension, which supplies the real load acquire/store release instructions. The specification can be found here: https://github.com/riscv/riscv-zalasr/blob/main/chapter2.adoc This patch seires has been tested with ltp on Qemu with Brensan's zalasr support patch[1]. Some false positive spacing error happens during patch checking. Thus I CCed maintainers of checkpatch.pl as well. [1] https://lore.kernel.org/all/CAGPSXwJEdtqW=nx71oufZp64nK6tK=0rytVEcz4F-gfvCO… v4: - Apply acquire/release semantics to arch_atomic operations. Thanks to Andrea. v3: - Apply acquire/release semantics to arch_xchg/arch_cmpxchg operations so as to ensure FENCE.TSO ordering between operations which precede the UNLOCK+LOCK sequence and operations which follow the sequence. Thanks to Andrea. - Support hwprobe of Zalasr. - Allow Zalasr extensions for Guest/VM. v2: - Adjust the order of Zalasr and Zalrsc in dt-bindings. Thanks to Conor. Xu Lu (10): riscv: Add ISA extension parsing for Zalasr dt-bindings: riscv: Add Zalasr ISA extension description riscv: hwprobe: Export Zalasr extension riscv: Introduce Zalasr instructions riscv: Apply Zalasr to smp_load_acquire/smp_store_release riscv: Apply acquire/release semantics to arch_xchg/arch_cmpxchg operations riscv: Apply acquire/release semantics to arch_atomic operations riscv: Remove arch specific __atomic_acquire/release_fence RISC-V: KVM: Allow Zalasr extensions for Guest/VM RISC-V: KVM: selftests: Add Zalasr extensions to get-reg-list test Documentation/arch/riscv/hwprobe.rst | 5 +- .../devicetree/bindings/riscv/extensions.yaml | 5 + arch/riscv/include/asm/atomic.h | 70 ++++++++- arch/riscv/include/asm/barrier.h | 91 +++++++++-- arch/riscv/include/asm/cmpxchg.h | 144 +++++++++--------- arch/riscv/include/asm/fence.h | 4 - arch/riscv/include/asm/hwcap.h | 1 + arch/riscv/include/asm/insn-def.h | 79 ++++++++++ arch/riscv/include/uapi/asm/hwprobe.h | 1 + arch/riscv/include/uapi/asm/kvm.h | 1 + arch/riscv/kernel/cpufeature.c | 1 + arch/riscv/kernel/sys_hwprobe.c | 1 + arch/riscv/kvm/vcpu_onereg.c | 2 + .../selftests/kvm/riscv/get-reg-list.c | 4 + 14 files changed, 314 insertions(+), 95 deletions(-) -- 2.20.1

1 month, 3 weeks

7
12
0 0

[PATCH v3 0/8] introduce VM_MAYBE_GUARD and make it sticky

by Lorenzo Stoakes

Currently, guard regions are not visible to users except through /proc/$pid/pagemap, with no explicit visibility at the VMA level. This makes the feature less useful, as it isn't entirely apparent which VMAs may have these entries present, especially when performing actions which walk through memory regions such as those performed by CRIU. This series addresses this issue by introducing the VM_MAYBE_GUARD flag which fulfils this role, updating the smaps logic to display an entry for these. The semantics of this flag are that a guard region MAY be present if set (we cannot be sure, as we can't efficiently track whether an MADV_GUARD_REMOVE finally removes all the guard regions in a VMA) - but if not set the VMA definitely does NOT have any guard regions present. It's problematic to establish this flag without further action, because that means that VMAs with guard regions in them become non-mergeable with adjacent VMAs for no especially good reason. To work around this, this series also introduces the concept of 'sticky' VMA flags - that is flags which: a. if set in one VMA and not in another still permit those VMAs to be merged (if otherwise compatible). b. When they are merged, the resultant VMA must have the flag set. The VMA logic is updated to propagate these flags correctly. Additionally, VM_MAYBE_GUARD being an explicit VMA flag allows us to solve an issue with file-backed guard regions - previously these established an anon_vma object for file-backed mappings solely to have vma_needs_copy() correctly propagate guard region mappings to child processes. We introduce a new flag alias VM_COPY_ON_FORK (which currently only specifies VM_MAYBE_GUARD) and update vma_needs_copy() to check explicitly for this flag and to copy page tables if it is present, which resolves this issue. Additionally, we add the ability for allow-listed VMA flags to be atomically writable with only mmap/VMA read locks held. The only flag we allow so far is VM_MAYBE_GUARD, which we carefully ensure does not cause any races by being allowed to do so. This allows us to maintain guard region installation as a read-locked operation and not endure the overhead of obtaining a write lock here. Finally we introduce extensive VMA userland tests to assert that the sticky VMA logic behaves correctly as well as guard region self tests to assert that smaps visibility is correctly implemented. v3: * Propagated tags thanks Vlastimil & Pedro! :) * Fixed doc nit as per Pedro. * Added vma_flag_test_atomic() in preparation for fixing retract_page_tables() (see below). We make this not require any locks, as we serialise on the page table lock in retract_page_tables(). * Split the atomic flag enablement and actually setting the flag for guard install into two separate commits so we clearly separate the various VMA flag implementation details and us enabling this feature. * Mentioned setting anon_vma for anonymous mappings in commit message as per Vlastimil. * Fixed an issue with retract_page_tables() whereby madvise(..., MADV_COLLAPSE) relies upon file-backed VMAs not being collapsed due to the UFFD WP VMA flag being set or the VMA having vma->anon_vma set (i.e. being a MAP_PRIVATE file-backed VMA). This was updated to also check for VM_MAYBE_GUARD. * Introduced MADV_COLLAPSE self test to assert that the behaviour is correct. I first reproduced the issue locally and then adapted the test to assert that this no longer occurs. * Mentioned KCSAN permissiveness in commit message as per Pedro. * Mentioned mmap/VMA read lock excluding mmap/VMA write lock and thus avoiding meaningful RMW races in commit message as per Vlastimil. * Mentioned previous unconditional vma->anon_vma installation on guard region installation as per Vlastimil. * Avoided having merging compromised by reordering patches such that the sticky VMA functionality is implemented prior to VM_MAYBE_GUARD being utilised upon guard region installation, rendering Vlastimil's request to mention this in a commit message unnecessary. * Separated out sticky and copy on fork patches as per Pedro. * Added VM_PFNMAP, VM_MIXEDMAP, VM_UFFD_WP to VM_COPY_ON_FORK to make things more consistent and clean. * Added mention of why generally VM_STICKY should be VM_COPY_ON_FORK in copy on fork patch. v2: * Separated out userland VMA tests for sticky behaviour as per Suren. * Added the concept of atomic writable VMA flags as per Pedro and Vlastimil. * Made VM_MAYBE_GUARD an atomic writable flag so we don't have to take a VMA write lock in madvise() as per Pedro and Vlastimil. https://lore.kernel.org/all/cover.1762422915.git.lorenzo.stoakes@oracle.com/ v1: https://lore.kernel.org/all/cover.1761756437.git.lorenzo.stoakes@oracle.com/ Lorenzo Stoakes (8): mm: introduce VM_MAYBE_GUARD and make visible in /proc/$pid/smaps mm: add atomic VMA flags and set VM_MAYBE_GUARD as such mm: implement sticky VMA flags mm: introduce copy-on-fork VMAs and make VM_MAYBE_GUARD one mm: set the VM_MAYBE_GUARD flag on guard region install tools/testing/vma: add VMA sticky userland tests tools/testing/selftests/mm: add MADV_COLLAPSE test case tools/testing/selftests/mm: add smaps visibility guard region test Documentation/filesystems/proc.rst | 5 +- fs/proc/task_mmu.c | 1 + include/linux/mm.h | 102 ++++++++++++ include/trace/events/mmflags.h | 1 + mm/khugepaged.c | 72 +++++--- mm/madvise.c | 22 ++- mm/memory.c | 14 +- mm/vma.c | 22 +-- tools/testing/selftests/mm/guard-regions.c | 185 +++++++++++++++++++++ tools/testing/selftests/mm/vm_util.c | 5 + tools/testing/selftests/mm/vm_util.h | 1 + tools/testing/vma/vma.c | 89 ++++++++-- tools/testing/vma/vma_internal.h | 56 +++++++ 13 files changed, 511 insertions(+), 64 deletions(-) -- 2.51.0

1 month, 3 weeks

7
29
0 0

[PATCH v2 0/2] Optimize the allocation of vector regset

by Yong-Xuan Wang

The vector regset uses the maximum possible vlenb 8192 to allocate a 2^18 bytes buffer to copy the vector register. But most platforms don’t support the largest vlenb. The regset has 2 users, ptrace syscall and coredump. When handling the PTRACE_GETREGSET requests from ptrace syscall, Linux will prepare a kernel buffer which size is min(user buffer size, limit). A malicious user process might overwhelm a memory-constrainted system when the buffer limit is very large. The coredump uses regset_get_alloc() to get the context of vector register. But this API allocates buffer before checking whether the target process uses vector extension, this wastes time to prepare a large memory buffer. The buffer limit can be determined after getting platform vlenb in the early boot stage, this can let the regset buffer match real hardware limits. Also add .active callbacks to let the coredump skip vector part when target process doesn't use it. After this patchset, userspace process needs 2 ptrace syscalls to retrieve the vector regset with PTRACE_GETREGSET. The first ptrace call only reads the header to get the vlenb information. Then prepare a suitable buffer to get the register context. The new vector ptrace kselftest demonstrates it. --- v2: - fix issues in vector ptrace kselftest (Andy) Yong-Xuan Wang (2): riscv: ptrace: Optimize the allocation of vector regset selftests: riscv: Add test for the Vector ptrace interface arch/riscv/include/asm/vector.h | 1 + arch/riscv/kernel/ptrace.c | 24 +++- arch/riscv/kernel/vector.c | 2 + tools/testing/selftests/riscv/vector/Makefile | 5 +- .../selftests/riscv/vector/vstate_ptrace.c | 134 ++++++++++++++++++ 5 files changed, 162 insertions(+), 4 deletions(-) create mode 100644 tools/testing/selftests/riscv/vector/vstate_ptrace.c -- 2.43.0

1 month, 3 weeks

3
4
0 0

[PATCH] selftests: seccomp: Handle syscall interruption in notification test

by Wake Liu

The user_notification_wait_killable_after_reply test fails due to an unhandled error when a traced syscall is interrupted by a signal. When a signal arrives after the tracer has received a seccomp notification but before it has replied, the notification can become stale. Any subsequent reply (like with SECCOMP_IOCTL_NOTIF_ADDFD) will fail with -ENOENT. This patch fixes the test by handling the -ENOENT return value from SECCOMP_IOCTL_NOTIF_ADDFD, preventing the test from failing incorrectly. The loop counter is decremented to re-run the iteration for the restarted syscall. Signed-off-by: Wake Liu <wakel(a)google.com> --- tools/testing/selftests/seccomp/seccomp_bpf.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 574fdd102eb5..c3e598c9c4ee 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -5048,8 +5048,12 @@ TEST(user_notification_wait_killable_after_reply) addfd.id = req.id; addfd.flags = SECCOMP_ADDFD_FLAG_SEND; addfd.srcfd = 0; - ASSERT_GE(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), 0) - kill(pid, SIGKILL); + ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); + if (ret < 0 && errno == ENOENT) { + i--; + continue; + } + ASSERT_GE(ret, 0); } /* -- 2.52.0.rc1.455.g30608eb744-goog

1 month, 3 weeks

1
0
0 0

[PATCH nf-next v9 0/3] Add IPIP flowtable SW acceleration

by Lorenzo Bianconi

Introduce SW acceleration for IPIP tunnels in the netfilter flowtable infrastructure. This series introduces basic infrastructure to accelerate other tunnel types (e.g. IP6IP6). --- Changes in v9: - Fixed IPIP tunnel offloading when VLAN encapsulation is enabled. - Add IPIP tunnel over vlan self-test - Remove wrong filed from flow_offload_tuple key - Cosmetics - Link to v8: https://lore.kernel.org/r/20251023-nf-flowtable-ipip-v8-0-5d5d8595c730@kern… Changes in v8: - Rebase on top of the following series (not yet applied) https://patchwork.ozlabs.org/project/netfilter-devel/list/?series=477081 - Link to v7: https://lore.kernel.org/r/20251021-nf-flowtable-ipip-v7-0-a45214896106@kern… Changes in v7: - Introduce sw acceleration for tx path of IPIP tunnels - Rely on exact match during flowtable entry lookup - Fix typos - Link to v6: https://lore.kernel.org/r/20250818-nf-flowtable-ipip-v6-0-eda90442739c@kern… Changes in v6: - Rebase on top of nf-next main branch - Link to v5: https://lore.kernel.org/r/20250721-nf-flowtable-ipip-v5-0-0865af9e58c6@kern… Changes in v5: - Rely on __ipv4_addr_hash() to compute the hash used as encap ID - Remove unnecessary pskb_may_pull() in nf_flow_tuple_encap() - Add nf_flow_ip4_ecanp_pop utility routine - Link to v4: https://lore.kernel.org/r/20250718-nf-flowtable-ipip-v4-0-f8bb1c18b986@kern… Changes in v4: - Use the hash value of the saddr, daddr and protocol of outer IP header as encapsulation id. - Link to v3: https://lore.kernel.org/r/20250703-nf-flowtable-ipip-v3-0-880afd319b9f@kern… Changes in v3: - Add outer IP header sanity checks - target nf-next tree instead of net-next - Link to v2: https://lore.kernel.org/r/20250627-nf-flowtable-ipip-v2-0-c713003ce75b@kern… Changes in v2: - Introduce IPIP flowtable selftest - Link to v1: https://lore.kernel.org/r/20250623-nf-flowtable-ipip-v1-1-2853596e3941@kern… --- Lorenzo Bianconi (3): net: netfilter: Add IPIP flowtable rx sw acceleration net: netfilter: Add IPIP flowtable tx sw acceleration selftests: netfilter: nft_flowtable.sh: Add IPIP flowtable selftest include/linux/netdevice.h | 13 ++ include/net/netfilter/nf_flow_table.h | 18 +++ net/ipv4/ipip.c | 25 ++++ net/netfilter/nf_flow_table_core.c | 3 + net/netfilter/nf_flow_table_ip.c | 135 +++++++++++++++++++-- net/netfilter/nf_flow_table_path.c | 84 +++++++++++-- .../selftests/net/netfilter/nft_flowtable.sh | 69 +++++++++++ 7 files changed, 328 insertions(+), 19 deletions(-) --- base-commit: 32e4b1bf1bbfe63e52e2fff7ade0aaeb805defe3 change-id: 20250623-nf-flowtable-ipip-1b3d7b08d067 Best regards, -- Lorenzo Bianconi <lorenzo(a)kernel.org>

1 month, 3 weeks

2
11
0 0

Wiadomość z księgowości

by Marek Poradecki

Dzień dobry, pomagamy przedsiębiorcom wprowadzić model wymiany walut, który minimalizuje wahania kosztów przy rozliczeniach międzynarodowych. Kiedyv możemy umówić się na 15-minutową rozmowę, aby zaprezentować, jak taki model mógłby działać w Państwa firmie - z gwarancją indywidualnych kursów i pełnym uproszczeniem płatności? Proszę o propozycję dogodnego terminu. Pozdrawiam Marek Poradecki

1 month, 3 weeks

1
0
0 0

[PATCH] selftests/seccomp: improve backwards compatibility for older kernels

by Wake Liu

This commit introduces checks for kernel version and seccomp filter flag support to the seccomp selftests. It also includes conditional header inclusions using __GLIBC_PREREQ. Some tests were gated by kernel version, and adjustments were made for flags introduced after kernel 5.4. This ensures the selftests can run and pass correctly on kernel versions 5.4 and later, preventing failures due to features not present in older kernels. The use of __GLIBC_PREREQ ensures proper compilation and functionality across different glibc versions in a mainline Linux kernel context. While it might appear redundant in specific build environments due to global overrides, it is crucial for upstream correctness and portability. Signed-off-by: Wake Liu <wakel(a)google.com> --- tools/testing/selftests/seccomp/seccomp_bpf.c | 108 ++++++++++++++++-- 1 file changed, 99 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 61acbd45ffaa..9b660cff5a4a 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -13,12 +13,14 @@ * we need to use the kernel's siginfo.h file and trick glibc * into accepting it. */ +#if defined(__GLIBC__) && defined(__GLIBC_PREREQ) #if !__GLIBC_PREREQ(2, 26) # include <asm/siginfo.h> # define __have_siginfo_t 1 # define __have_sigval_t 1 # define __have_sigevent_t 1 #endif +#endif #include <errno.h> #include <linux/filter.h> @@ -300,6 +302,26 @@ int seccomp(unsigned int op, unsigned int flags, void *args) } #endif +int seccomp_flag_supported(int flag) +{ + /* + * Probes if a seccomp filter flag is supported by the kernel. + * + * When an unsupported flag is passed to seccomp(SECCOMP_SET_MODE_FILTER, ...), + * the kernel returns EINVAL. + * + * When a supported flag is passed, the kernel proceeds to validate the + * filter program pointer. By passing NULL for the filter program, + * the kernel attempts to dereference a bad address, resulting in EFAULT. + * + * Therefore, checking for EFAULT indicates that the flag itself was + * recognized and supported by the kernel. + */ + if (seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL) == -1 && errno == EFAULT) + return 1; + return 0; +} + #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n])) #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ @@ -2436,13 +2458,12 @@ TEST(detect_seccomp_filter_flags) ASSERT_NE(ENOSYS, errno) { TH_LOG("Kernel does not support seccomp syscall!"); } - EXPECT_EQ(-1, ret); - EXPECT_EQ(EFAULT, errno) { - TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!", - flag); - } - all_flags |= flag; + if (seccomp_flag_supported(flag)) + all_flags |= flag; + else + TH_LOG("Filter flag (0x%X) is not found to be supported!", + flag); } /* @@ -2870,6 +2891,12 @@ TEST_F(TSYNC, two_siblings_with_one_divergence) TEST_F(TSYNC, two_siblings_with_one_divergence_no_tid_in_err) { + /* Depends on 5189149 (seccomp: allow TSYNC and USER_NOTIF together) */ + if (!seccomp_flag_supported(SECCOMP_FILTER_FLAG_TSYNC_ESRCH)) { + SKIP(return, "Kernel does not support SECCOMP_FILTER_FLAG_TSYNC_ESRCH"); + return; + } + long ret, flags; void *status; @@ -3475,6 +3502,11 @@ TEST(user_notification_basic) TEST(user_notification_with_tsync) { + /* Depends on 5189149 (seccomp: allow TSYNC and USER_NOTIF together) */ + if (!seccomp_flag_supported(SECCOMP_FILTER_FLAG_TSYNC_ESRCH)) { + SKIP(return, "Kernel does not support SECCOMP_FILTER_FLAG_TSYNC_ESRCH"); + return; + } int ret; unsigned int flags; @@ -3966,6 +3998,13 @@ TEST(user_notification_filter_empty) TEST(user_ioctl_notification_filter_empty) { + /* Depends on 95036a7 (seccomp: interrupt SECCOMP_IOCTL_NOTIF_RECV + * when all users have exited) */ + if (!ksft_min_kernel_version(6, 11)) { + SKIP(return, "Kernel version < 6.11"); + return; + } + pid_t pid; long ret; int status, p[2]; @@ -4119,6 +4158,12 @@ int get_next_fd(int prev_fd) TEST(user_notification_addfd) { + /* Depends on 0ae71c7 (seccomp: Support atomic "addfd + send reply") */ + if (!ksft_min_kernel_version(5, 14)) { + SKIP(return, "Kernel version < 5.14"); + return; + } + pid_t pid; long ret; int status, listener, memfd, fd, nextfd; @@ -4281,6 +4326,12 @@ TEST(user_notification_addfd) TEST(user_notification_addfd_rlimit) { + /* Depends on 7cf97b1 (seccomp: Introduce addfd ioctl to seccomp user notifier) */ + if (!ksft_min_kernel_version(5, 9)) { + SKIP(return, "Kernel version < 5.9"); + return; + } + pid_t pid; long ret; int status, listener, memfd; @@ -4326,9 +4377,12 @@ TEST(user_notification_addfd_rlimit) EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); EXPECT_EQ(errno, EMFILE); - addfd.flags = SECCOMP_ADDFD_FLAG_SEND; - EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); - EXPECT_EQ(errno, EMFILE); + /* Depends on 0ae71c7 (seccomp: Support atomic "addfd + send reply") */ + if (ksft_min_kernel_version(5, 14)) { + addfd.flags = SECCOMP_ADDFD_FLAG_SEND; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); + EXPECT_EQ(errno, EMFILE); + } addfd.newfd = 100; addfd.flags = SECCOMP_ADDFD_FLAG_SETFD; @@ -4356,6 +4410,12 @@ TEST(user_notification_addfd_rlimit) TEST(user_notification_sync) { + /* Depends on 48a1084 (seccomp: add the synchronous mode for seccomp_unotify) */ + if (!ksft_min_kernel_version(6, 6)) { + SKIP(return, "Kernel version < 6.6"); + return; + } + struct seccomp_notif req = {}; struct seccomp_notif_resp resp = {}; int status, listener; @@ -4520,6 +4580,12 @@ static char get_proc_stat(struct __test_metadata *_metadata, pid_t pid) TEST(user_notification_fifo) { + /* Depends on 4cbf6f6 (seccomp: Use FIFO semantics to order notifications) */ + if (!ksft_min_kernel_version(5, 19)) { + SKIP(return, "Kernel version < 5.19"); + return; + } + struct seccomp_notif_resp resp = {}; struct seccomp_notif req = {}; int i, status, listener; @@ -4623,6 +4689,12 @@ static long get_proc_syscall(struct __test_metadata *_metadata, int pid) /* Ensure non-fatal signals prior to receive are unmodified */ TEST(user_notification_wait_killable_pre_notification) { + /* Depends on c2aa2df (seccomp: Add wait_killable semantic to seccomp user notifier) */ + if (!ksft_min_kernel_version(5, 19)) { + SKIP(return, "Kernel version < 5.19"); + return; + } + struct sigaction new_action = { .sa_handler = signal_handler, }; @@ -4693,6 +4765,12 @@ TEST(user_notification_wait_killable_pre_notification) /* Ensure non-fatal signals after receive are blocked */ TEST(user_notification_wait_killable) { + /* Depends on c2aa2df (seccomp: Add wait_killable semantic to seccomp user notifier) */ + if (!ksft_min_kernel_version(5, 19)) { + SKIP(return, "Kernel version < 5.19"); + return; + } + struct sigaction new_action = { .sa_handler = signal_handler, }; @@ -4772,6 +4850,12 @@ TEST(user_notification_wait_killable) /* Ensure fatal signals after receive are not blocked */ TEST(user_notification_wait_killable_fatal) { + /* Depends on c2aa2df (seccomp: Add wait_killable semantic to seccomp user notifier) */ + if (!ksft_min_kernel_version(5, 19)) { + SKIP(return, "Kernel version < 5.19"); + return; + } + struct seccomp_notif req = {}; int listener, status; pid_t pid; @@ -4854,6 +4938,12 @@ static void *tsync_vs_dead_thread_leader_sibling(void *_args) */ TEST(tsync_vs_dead_thread_leader) { + /* Depends on bfafe5e (seccomp: release task filters when the task exits) */ + if (!ksft_min_kernel_version(6, 11)) { + SKIP(return, "Kernel version < 6.11"); + return; + } + int status; pid_t pid; long ret; -- 2.50.1.703.g449372360f-goog

1 month, 3 weeks

2
2
0 0

[PATCH net-next 13/13] selftests: drv-net: selftest for ipvlan-macnat mode

by Dmitry Skorodumov

Implemented a self-test for ipvlan in l2macnat mode. The test verifies: 1) It's not possible to configure an ip in l2macnat mode on ipvtap 2) It creates several net namespaces - Default namespace emulates host, - ipvlan-tst-phy emulates some host in remote network - ipvlan-tst-0/1 emulate VMs on host. Test verifies, that MAC addresses are as expected in ARP/NEIGH tables: all MACs in 'tst-phy' points to "host" mac-address all MACs in Default and tst are real ones 3) The l2macnat mode has limited number of addresses remembered on port. Test verifies, that this limit really works. Signed-off-by: Dmitry Skorodumov <skorodumov.dmitry(a)huawei.com> --- tools/testing/selftests/net/Makefile | 3 + .../selftests/net/ipvtap_macnat_bridge.py | 174 +++++++++ .../selftests/net/ipvtap_macnat_test.sh | 332 ++++++++++++++++++ 3 files changed, 509 insertions(+) create mode 100755 tools/testing/selftests/net/ipvtap_macnat_bridge.py create mode 100755 tools/testing/selftests/net/ipvtap_macnat_test.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index b5127e968108..ff28012d34db 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -203,6 +203,9 @@ YNL_GEN_PROGS := netlink-dumps TEST_GEN_FILES += $(YNL_GEN_FILES) TEST_GEN_PROGS += $(YNL_GEN_PROGS) +TEST_PROGS += ipvtap_macnat_test.sh +TEST_FILES += ipvtap_macnat_bridge.py + TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c)) TEST_INCLUDES := forwarding/lib.sh diff --git a/tools/testing/selftests/net/ipvtap_macnat_bridge.py b/tools/testing/selftests/net/ipvtap_macnat_bridge.py new file mode 100755 index 000000000000..6fc4762b03cd --- /dev/null +++ b/tools/testing/selftests/net/ipvtap_macnat_bridge.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Script to bridge ipvtap and tap, +needed to simulate behaviour of virtual machine using ipvtap. + +ipvtap in macnat mode cannot have IP address. +Due to limitations of ipvtap, it also cannot be plugged +into bridge. +Use this script to connect ipvtap and tap and assing IP to tap. +""" + +import socket +import os +import select +import sys +import signal +import fcntl +import struct +import subprocess + +# Linux TUN/TAP constants +TUNSETIFF = 0x400454ca +IFF_TUN = 0x0001 +IFF_TAP = 0x0002 +IFF_NO_PI = 0x1000 + +ns_name = "non-initialized" + +class TapBridge: + def __init__(self, tap, ipvtap, buffer_size=65536): + self.tap_name = tap + self.ipvtap_name = ipvtap + self.buffer_size = buffer_size + self.running = False + + def open_tap_file(self, path): + """Open TAP interface as a file""" + try: + return os.open(path, os.O_RDWR) + except Exception as e: + print(f"Error opening {path}: {e}") + return None + + def open_ipvtap_sock(self, tap_name): + """Open a TAP interface using raw socket""" + try: + sock = socket.socket(socket.AF_PACKET, + socket.SOCK_RAW, + socket.ntohs(0x0003)) + sock.bind((tap_name, 0)) + sock.setblocking(False) + print(f"Connected to IPVTAP interface: {tap_name}") + return sock + + except Exception as e: + print(f"Error opening IPVTAP interface {tap_name}: {e}") + return None + + def create_tap_interface(self, tap_name): + """Create and configure a TAP interface using /dev/net/tun""" + try: + # Open the tun device + tun_fd = os.open('/dev/net/tun', os.O_RDWR) + if tun_fd < 0: + raise Exception("Failed to open /dev/net/tun") + + # Prepare the ifr structure + tap_name_bytes = tap_name.encode('utf-8') + ifr = struct.pack('16sH', tap_name_bytes, IFF_TAP | IFF_NO_PI) + + # Set the interface name and flags + result = fcntl.ioctl(tun_fd, TUNSETIFF, ifr) + + # Get the actual interface name that was set + unpacked = struct.unpack('16sH', result) + actual_name = unpacked[0].split(b'\x00')[0].decode() + print(f"Created TAP interface: {actual_name}") + + return tun_fd + + except Exception as e: + print(f"Error creating TAP interface {tap_name}: {e}") + return None + + def forward_data(self, from_fd, to_fd, description): + """Forward data from one file descriptor to another""" + try: + data = os.read(from_fd, self.buffer_size) + if data: + os.write(to_fd, data) + return True + return False + + except BlockingIOError: + return True + except Exception as e: + print(f"Error forwarding data {description}: {e}") + return False + + def run(self): + """Main bridge loop""" + # Create TAP interfaces + tap1_fd = self.create_tap_interface(self.tap_name) + + sock = self.open_ipvtap_sock(self.ipvtap_name) + tap2_fd = sock.fileno() + + if tap1_fd is None or tap2_fd is None: + print("Failed to create TAP interfaces") + return + + print("Press Ctrl+C to stop\n") + + self.running = True + stats = {'tap1_to_tap2': 0, 'tap2_to_tap1': 0} + while self.running: + try: + # Use select to monitor both file descriptors + readable, _, _ = select.select([tap1_fd, tap2_fd], [], [], 1.0) + + for fd in readable: + if fd == tap1_fd: + descr = f"from {self.tap_name} to {self.ipvtap_name}" + if self.forward_data(tap1_fd, tap2_fd, descr): + stats['tap1_to_tap2'] += 1 + else: + self.running = False + elif fd == tap2_fd: + descr = f"from {self.ipvtap_name} to {self.tap_name}" + if self.forward_data(tap2_fd, tap1_fd, descr): + stats['tap2_to_tap1'] += 1 + else: + self.running = False + + except KeyboardInterrupt: + print("\nShutting down...") + self.running = False + except Exception as e: + print(f"Error in main loop: {e}") + self.running = False + + # Cleanup + os.close(tap1_fd) + os.close(tap2_fd) + print(f"Bridge stopped in {ns_name}. Stats: {stats}") + + +def signal_handler(_sig, _frame): + print(f'\nReceived interrupt signal, shutting down bridge in {ns_name}') + sys.exit(0) + + +if __name__ == "__main__": + ns_name = subprocess.getoutput("ip netns identify") or "default" + + signal.signal(signal.SIGINT, signal_handler) + + # Check if running as root + if os.geteuid() != 0: + print("ERROR: This script must be run as root!") + sys.exit(1) + + if len(sys.argv) != 3: + print("Usage: tap_bridge.py tap_name ipvtap_name") + sys.exit(1) + + TAP = sys.argv[1] + IPVTAP = sys.argv[2] + + print(f"Starting TAP bridge between {TAP} and {IPVTAP} in {ns_name}") + bridge = TapBridge(TAP, IPVTAP) + bridge.run() diff --git a/tools/testing/selftests/net/ipvtap_macnat_test.sh b/tools/testing/selftests/net/ipvtap_macnat_test.sh new file mode 100755 index 000000000000..5f684a6d7603 --- /dev/null +++ b/tools/testing/selftests/net/ipvtap_macnat_test.sh @@ -0,0 +1,332 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Tests for ipvtap in macnat mode + +NS_TST0=ipvlan-tst-0 +NS_TST1=ipvlan-tst-1 +NS_PHY=ipvlan-tst-phy + +IP_HOST=172.25.0.1 +IP_PHY=172.25.0.2 +IP_TST0=172.25.0.10 +IP_TST1=172.25.0.30 + +IP_OK0=("172.25.0.10" "172.25.0.11" "172.25.0.12" "172.25.0.13") +IP6_OK0=("fc00::10" "fc00::11" "fc00::12" "fc00::13" ) + +IP_OVFL0="172.25.0.14" +IP6_OVFL0="fc00::14" + +IP6_HOST=fc00::1 +IP6_PHY=fc00::2 +IP6_TST0=fc00::10 +IP6_TST1=fc00::30 + +MAC_HOST="92:3a:00:00:00:01" +MAC_PHY="92:3a:00:00:00:02" +MAC_TST0="92:3a:00:00:00:10" +MAC_TST1="92:3a:00:00:00:30" + +VETH_HOST=vethtst +VETH_PHY=vethtst.p + +# +# The testing environment looks this way: +# +# |------HOST------| |------PHY-------| +# | veth<----------------->veth | +# |------|--|------| |----------------| +# | | +# | | |-----TST0-------| +# | |------------|----ipvtap | +# | |----------------| +# | +# | |-----TST1-------| +# |---------------|----ivtap | +# |----------------| +# +# The macnat mode is for virtual machines, so ipvtap-interface is supposed +# to be used only for traffic monitoring and doesn't have ip-address. +# +# To simulate a virtual machine on ipvtap, we create TAP-interfaces +# in TST environments and assing IP-addresses to them. +# TAP and IPVTAP are connected with simple python script. +# + +ns_run() { + ns=$1 + shift + if [[ "$ns" == "default" ]]; then + "$@" >/dev/null + else + ip netns exec "$ns" "$@" >/dev/null + fi +} + +configure_ns() { + local ns=$1 + local n=$2 + local ip=$3 + local ip6=$4 + local mac=$5 + + ns_run $ns ip link set lo up + + if ! ip link add netns $ns name ipvtap0.$n link $VETH_HOST \ + type ipvtap mode l2macnat bridge; then + exit_error "FAIL: Failed to configure ipvtap link." + fi + ns_run $ns ip link set ipvtap0.$n up + + ns_run $ns ip tuntap add mode tap tap0.$n + ns_run $ns ip link set dev tap0.$n address $mac + # disable dad + ns_run $ns sysctl -w net/ipv6/conf/tap0.$n/accept_dad=0 + ns_run $ns ip link set tap0.$n up + ns_run $ns ip a a $ip/24 dev tap0.$n + ns_run $ns ip a a $ip6/64 dev tap0.$n +} + +start_macnat_bridge() { + local ns=$1 + local n=$2 + ip netns exec $ns python3 ipvtap_macnat_bridge.py tap0.$n ipvtap0.$n & +} + +configure_veth() { + local ns=$1 + local veth=$2 + local ip=$3 + local ip6=$4 + local mac=$5 + + ns_run $ns ip link set lo up + ns_run $ns ethtool -K $veth tx off rx off + ns_run $ns ip link set dev $veth address $mac + ns_run $ns ip link set $veth up + ns_run $ns ip a a $ip/24 dev $veth + ns_run $ns ip a a $ip6/64 dev $veth +} + +setup_env() { + ip netns add $NS_TST0 + ip netns add $NS_TST1 + ip netns add $NS_PHY + + # setup simulated other-host (phy) and host itself + ip link add $VETH_HOST type veth peer name $VETH_PHY \ + netns $NS_PHY >/dev/null + + # host config + configure_veth default $VETH_HOST $IP_HOST $IP6_HOST $MAC_HOST + configure_veth $NS_PHY $VETH_PHY $IP_PHY $IP6_PHY $MAC_PHY + + # TST namespaces config + configure_ns $NS_TST0 0 $IP_TST0 $IP6_TST0 $MAC_TST0 + configure_ns $NS_TST1 1 $IP_TST1 $IP6_TST1 $MAC_TST1 +} + +ping_all() { + # This will learn MAC/IP addresses on ipvtap + local ns=$1 + + ns_run $ns ping -c 1 $IP_TST0 + ns_run $ns ping -c 1 $IP6_TST0 + + ns_run $ns ping -c 1 $IP_TST1 + ns_run $ns ping -c 1 $IP6_TST1 + + ns_run $ns ping -c 1 $IP_HOST + ns_run $ns ping -c 1 $IP6_HOST + + ns_run $ns ping -c 1 $IP_PHY + ns_run $ns ping -c 1 $IP6_PHY +} + +check_mac_eq() { + # Ensure IP corresponds to MAC. + local ns=$1 + local ip=$2 + local mac=$3 + local dev=$4 + + if [[ "$ns" == "default" ]]; then + out=$( + ip neigh show $ip dev $dev \ + | grep "$ip" \ + | grep "$mac" + ) + else + out=$( + ip netns exec $ns \ + ip neigh show $ip dev $dev \ + | grep "$ip" \ + | grep "$mac" + ) + fi + + if [[ 'X'$out'X' == "XX" ]]; then + exit_error "FAIL: '$ip' is not '$mac'" + fi +} + +cleanup_env() { + ip link del $VETH_HOST + ip netns del $NS_TST0 + ip netns del $NS_TST1 + ip netns del $NS_PHY +} + +exit_error() { + echo $1 + exit 1 +} + +test_check_mac() { + # All IPs in NS_PHY should have MAC of the host + check_mac_eq $NS_PHY $IP_TST0 $MAC_HOST $VETH_PHY + check_mac_eq $NS_PHY $IP6_TST0 $MAC_HOST $VETH_PHY + check_mac_eq $NS_PHY $IP_TST1 $MAC_HOST $VETH_PHY + check_mac_eq $NS_PHY $IP6_TST1 $MAC_HOST $VETH_PHY + check_mac_eq $NS_PHY $IP_HOST $MAC_HOST $VETH_PHY + check_mac_eq $NS_PHY $IP6_HOST $MAC_HOST $VETH_PHY + + # All IPs in TST0 should have corresponding MAC + check_mac_eq $NS_TST0 $IP_HOST $MAC_HOST tap0.0 + check_mac_eq $NS_TST0 $IP6_HOST $MAC_HOST tap0.0 + check_mac_eq $NS_TST0 $IP_TST1 $MAC_TST1 tap0.0 + check_mac_eq $NS_TST0 $IP6_TST1 $MAC_TST1 tap0.0 + check_mac_eq $NS_TST0 $IP_PHY $MAC_PHY tap0.0 + check_mac_eq $NS_TST0 $IP6_PHY $MAC_PHY tap0.0 + + # All IPs in host should have corresponding MAC + check_mac_eq default $IP_TST0 $MAC_TST0 $VETH_HOST + check_mac_eq default $IP6_TST0 $MAC_TST0 $VETH_HOST + check_mac_eq default $IP_TST1 $MAC_TST1 $VETH_HOST + check_mac_eq default $IP6_TST1 $MAC_TST1 $VETH_HOST + check_mac_eq default $IP_PHY $MAC_PHY $VETH_HOST + check_mac_eq default $IP6_PHY $MAC_PHY $VETH_HOST +} + +test_ip_add() { + # adding IPs to ipvtap should be forbidden and should fail + if ns_run $NS_TST0 ip a a 172.26.0.1/24 dev ipvtap0.0; then + exit_error "FAIL: Module allowed to add ip to ipvtap." + fi + + if ns_run $NS_TST0 ip a a fc01::1/64 dev ipvtap0.0; then + exit_error "FAIL: Module allowed to add ip6 to ipvtap." + fi +} + +test_ip_overflow() { + # The ipvtap remembers limited number of addresses on interface. + # Let's overflow it and check that oldest one doesn't work. + + ns_run $NS_TST0 ip addr flush dev tap0.0 + + # Add exactly 4 ip addresses + for ip in "${IP_OK0[@]}"; do + ns_run $NS_TST0 ip a a $ip/24 dev tap0.0 + ns_run $NS_TST0 ping -c 1 $IP_HOST -I $ip + done + + # Initial check that ping works + if ! ping -c 2 $IP_TST0; then + exit_error "FAIL: Failed to ping tst0" + fi + + # Add 1 more ip addresses + ns_run $NS_TST0 ip a a $IP_OVFL0/24 dev tap0.0 + ns_run $NS_TST0 ping -c 1 $IP_HOST -I $IP_OVFL0 + # check that ping to oldest one from host fails. + echo "the next ping should fail:" + if ping -c 2 $IP_TST0; then + exit_error "FAIL: IP-0 still exists on interface" + fi + + # ping host using address-0 and force relearn of IP0. + # Host should be able ping after that + ns_run $NS_TST0 ping -c 1 $IP_HOST -I $IP_TST0 + + if ! ping -c 2 $IP_TST0; then + exit_error "FAIL: Failed to ping tst0 at stage 3" + fi +} + +test_ip6_overflow() { + # The ipvtap stores limited number of addresses on interface. + # Let's overflow it and check that oldest one doesn't work. + + ns_run $NS_TST0 ip addr flush dev tap0.0 + + # Add exactly 4 ip addresses + for ip6 in "${IP6_OK0[@]}"; do + ns_run $NS_TST0 ip a a $ip6/64 dev tap0.0 + ns_run $NS_TST0 ping -c 1 $IP6_HOST -I $ip6 + done + + # Initial check that ping6 works + if ! ping -c 2 $IP6_TST0; then + exit_error "FAIL: Failed to ping6 tst0" + fi + + # Add 1 more ip6 addresses + ns_run $NS_TST0 ip a a $IP6_OVFL0/64 dev tap0.0 + ns_run $NS_TST0 ping -c 1 $IP6_HOST -I $IP6_OVFL0 + # check that ping to oldest one from host fails. + echo "the next ping should fail:" + if ping -c 2 $IP6_TST0; then + exit_error "FAIL: IP6-0 still exists on interface" + fi + + # ping host using address-0 and force relearn of IP0. + # Host should be able ping after that + ns_run $NS_TST0 ping -c 1 $IP6_HOST -I $IP6_TST0 + if ! ping -c 2 $IP6_TST0; then + exit_error "FAIL: Failed to ping6 tst0 at stage 3" + fi +} + +exec_test() { + echo "TEST: "$2 + $1 + echo "PASSED: "$2 +} + +trap cleanup_env EXIT + +echo "ipvlan macnat tests" +echo "===================" + +modprobe -q tap +modprobe -q ipvlan +modprobe -q ipvtap + +setup_env + +exec_test test_ip_add "ip add not allowed" + +start_macnat_bridge $NS_TST0 0 +mb_pid1=$! +start_macnat_bridge $NS_TST1 1 +mb_pid2=$! + +echo "<<< Preparation: pinging all...." +ping_all default +ping_all $NS_TST0 +ping_all $NS_TST1 +ping_all $NS_PHY +echo "Finished preparational pinging all. >>>" + +exec_test test_check_mac "mac correctness" +exec_test test_ip_overflow "ip learn capacity overflow" +exec_test test_ip6_overflow "ip6 learn capacity overflow" + +kill -INT $mb_pid1 +kill -INT $mb_pid2 +wait $mb_pid1 +wait $mb_pid2 + +echo "All tests passed" -- 2.25.1

1 month, 3 weeks

2
1
0 0

[PATCH] selftests/futex: Fix storing address of local variable

by Ankit Khushwaha

In `child_circular_list()` address of local variable ��struct lock_struct a�� is assigned to `head.list.next` raising the following warning. robust_list.c: In function ��child_circular_list��: robust_list.c:522:24: warning: storing the address of local variable ��a�� in ��head.list.next�� [-Wdangling-pointer=] 522 | head.list.next = &a.list; | ~~~~~~~~~~~~~~~^~~~~~~~~ robust_list.c:513:28: note: ��a�� declared here 513 | struct lock_struct a, b, c; | ^ robust_list.c:512:40: note: ��head�� declared here 512 | static struct robust_list_head head; | ^~~~ Defining the value with static keyword to fix this. Signed-off-by: Ankit Khushwaha <ankitkhushwaha.linux(a)gmail.com> --- tools/testing/selftests/futex/functional/robust_list.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/futex/functional/robust_list.c b/tools/testing/selftests/futex/functional/robust_list.c index e7d1254e18ca..d1aab1cc5a37 100644 --- a/tools/testing/selftests/futex/functional/robust_list.c +++ b/tools/testing/selftests/futex/functional/robust_list.c @@ -510,7 +510,7 @@ TEST(test_robust_list_multiple_elements) static int child_circular_list(void *arg) { static struct robust_list_head head; - struct lock_struct a, b, c; + static struct lock_struct a, b, c; int ret; ret = set_list(&head); -- 2.51.1

1 month, 3 weeks

2
2
0 0

[syzbot ci] Re: ipvlan: support mac-nat mode

by syzbot ci

syzbot ci has tested the following series [v4] ipvlan: support mac-nat mode https://lore.kernel.org/all/20251118100046.2944392-1-skorodumov.dmitry@huaw… * [PATCH net-next 01/13] ipvlan: Support MACNAT mode * [PATCH net-next 02/13] ipvlan: macnat: Handle rx mcast-ip and unicast eth * [PATCH net-next 03/13] ipvlan: Forget all IP when device goes down * [PATCH net-next 04/13] ipvlan: Support IPv6 in macnat mode. * [PATCH net-next 05/13] ipvlan: Fix compilation warning about __be32 -> u32 * [PATCH net-next 06/13] ipvlan: Make the addrs_lock be per port * [PATCH net-next 07/13] ipvlan: Take addr_lock in ipvlan_open() * [PATCH net-next 08/13] ipvlan: Don't allow children to use IPs of main * [PATCH net-next 09/13] ipvlan: const-specifier for functions that use iaddr * [PATCH net-next 10/13] ipvlan: Common code from v6/v4 validator_event * [PATCH net-next 11/13] ipvlan: common code to handle ipv6/ipv4 address events * [PATCH net-next 12/13] ipvlan: Ignore PACKET_LOOPBACK in handle_mode_l2() * [PATCH net-next 13/13] selftests: drv-net: selftest for ipvlan-macnat mode and found the following issue: WARNING: suspicious RCU usage in ipvlan_addr_event Full report is available here: https://ci.syzbot.org/series/e483b93a-1063-4c8a-b0e2-89530e79768b *** WARNING: suspicious RCU usage in ipvlan_addr_event tree: net-next URL: https://kernel.googlesource.com/pub/scm/linux/kernel/git/netdev/net-next.git base: c99ebb6132595b4b288a413981197eb076547c5a arch: amd64 compiler: Debian clang version 20.1.8 (++20250708063551+0c9f909b7976-1~exp1~20250708183702.136), Debian LLD 20.1.8 config: https://ci.syzbot.org/builds/ac5af6f3-6b14-4e35-9d81-ee1522de3952/config 8021q: adding VLAN 0 to HW filter on device batadv0 ============================= WARNING: suspicious RCU usage syzkaller #0 Not tainted ----------------------------- drivers/net/ipvlan/ipvlan.h:128 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 2 locks held by syz-executor/5984: #0: ffffffff8f2cc248 (rtnl_mutex){+.+.}-{4:4}, at: inet_rtm_newaddr+0x3b0/0x18b0 #1: ffffffff8f39d9b0 ((inetaddr_chain).rwsem){++++}-{4:4}, at: blocking_notifier_call_chain+0x54/0x90 stack backtrace: CPU: 1 UID: 0 PID: 5984 Comm: syz-executor Not tainted syzkaller #0 PREEMPT(full) Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 Call Trace: <TASK> dump_stack_lvl+0x189/0x250 lockdep_rcu_suspicious+0x140/0x1d0 ipvlan_addr_event+0x60b/0x950 notifier_call_chain+0x1b6/0x3e0 blocking_notifier_call_chain+0x6a/0x90 __inet_insert_ifa+0xa13/0xbf0 inet_rtm_newaddr+0xf3a/0x18b0 rtnetlink_rcv_msg+0x7cf/0xb70 netlink_rcv_skb+0x208/0x470 netlink_unicast+0x82f/0x9e0 netlink_sendmsg+0x805/0xb30 __sock_sendmsg+0x21c/0x270 __sys_sendto+0x3bd/0x520 __x64_sys_sendto+0xde/0x100 do_syscall_64+0xfa/0xfa0 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f711f191503 Code: 64 89 02 48 c7 c0 ff ff ff ff eb b7 66 2e 0f 1f 84 00 00 00 00 00 90 80 3d 61 70 22 00 00 41 89 ca 74 14 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 75 c3 0f 1f 40 00 55 48 83 ec 30 44 89 4c 24 RSP: 002b:00007ffc44b05f28 EFLAGS: 00000202 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00007f711ff14620 RCX: 00007f711f191503 RDX: 0000000000000028 RSI: 00007f711ff14670 RDI: 0000000000000003 RBP: 0000000000000001 R08: 00007ffc44b05f44 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000000003 R13: 0000000000000000 R14: 00007f711ff14670 R15: 0000000000000000 </TASK> syz-executor (5984) used greatest stack depth: 19864 bytes left *** If these findings have caused you to resend the series or submit a separate fix, please add the following tag to your commit message: Tested-by: syzbot(a)syzkaller.appspotmail.com --- This report is generated by a bot. It may contain errors. syzbot ci engineers can be reached at syzkaller(a)googlegroups.com.

1 month, 3 weeks

1
0
0 0

[PATCH net-next 00/12] selftests: drv-net: convert GRO and Toeplitz tests to work for drivers in NIPA

by Jakub Kicinski

Main objective of this series is to convert the gro.sh and toeplitz.sh tests to be "NIPA-compatible" - meaning make use of the Python env, which lets us run the tests against either netdevsim or a real device. The tests seem to have been written with a different flow in mind. Namely they source different bash "setup" scripts depending on arguments passed to the test. While I have nothing against the use of bash and the overall architecture - the existing code needs quite a bit of work (don't assume MAC/IP addresses, support remote endpoint over SSH). If I'm the one fixing it, I'd rather convert them to our "simplistic" Python. This series rewrites the tests in Python while addressing their shortcomings. The functionality of running the test over loopback on a real device is retained but with a different method of invocation (see the last patch). Once again we are dealing with a script which run over a variety of protocols (combination of [ipv4, ipv6, ipip] x [tcp, udp]). The first 4 patches add support for test variants to our scripts. We use the term "variant" in the same sense as the C kselftest_harness.h - variant is just a set of static input arguments. Note that neither GRO nor the Toeplitz test fully passes for me on any HW I have access to. But this is unrelated to the conversion. This series is not making any real functional changes to the tests, it is limited to improving the "test harness" scripts. Jakub Kicinski (12): selftests: net: py: coding style improvements selftests: net: py: extract the case generation logic selftests: net: py: add test variants selftests: drv-net: xdp: use variants for qstat tests selftests: net: relocate gro and toeplitz tests to drivers/net selftests: net: py: support ksft ready without wait selftests: net: py: read ip link info about remote dev netdevsim: pass packets thru GRO on Rx selftests: drv-net: add a Python version of the GRO test selftests: drv-net: hw: convert the Toeplitz test to Python netdevsim: add loopback support selftests: net: remove old setup_* scripts tools/testing/selftests/drivers/net/Makefile | 2 + .../testing/selftests/drivers/net/hw/Makefile | 6 +- tools/testing/selftests/net/Makefile | 7 - tools/testing/selftests/net/lib/Makefile | 1 + drivers/net/netdevsim/netdev.c | 26 ++- .../testing/selftests/{ => drivers}/net/gro.c | 5 +- .../{net => drivers/net/hw}/toeplitz.c | 7 +- .../testing/selftests/drivers/net/.gitignore | 1 + tools/testing/selftests/drivers/net/gro.py | 161 ++++++++++++++ .../selftests/drivers/net/hw/.gitignore | 3 +- .../drivers/net/hw/lib/py/__init__.py | 4 +- .../selftests/drivers/net/hw/toeplitz.py | 208 ++++++++++++++++++ .../selftests/drivers/net/lib/py/__init__.py | 4 +- .../selftests/drivers/net/lib/py/env.py | 2 + tools/testing/selftests/drivers/net/xdp.py | 42 ++-- tools/testing/selftests/net/.gitignore | 2 - tools/testing/selftests/net/gro.sh | 105 --------- .../selftests/net/lib/ksft_setup_loopback.sh | 111 ++++++++++ .../testing/selftests/net/lib/py/__init__.py | 5 +- tools/testing/selftests/net/lib/py/ksft.py | 93 ++++++-- tools/testing/selftests/net/lib/py/nsim.py | 2 +- tools/testing/selftests/net/lib/py/utils.py | 20 +- tools/testing/selftests/net/setup_loopback.sh | 120 ---------- tools/testing/selftests/net/setup_veth.sh | 45 ---- tools/testing/selftests/net/toeplitz.sh | 199 ----------------- .../testing/selftests/net/toeplitz_client.sh | 28 --- 26 files changed, 631 insertions(+), 578 deletions(-) rename tools/testing/selftests/{ => drivers}/net/gro.c (99%) rename tools/testing/selftests/{net => drivers/net/hw}/toeplitz.c (99%) create mode 100755 tools/testing/selftests/drivers/net/gro.py create mode 100755 tools/testing/selftests/drivers/net/hw/toeplitz.py delete mode 100755 tools/testing/selftests/net/gro.sh create mode 100755 tools/testing/selftests/net/lib/ksft_setup_loopback.sh delete mode 100644 tools/testing/selftests/net/setup_loopback.sh delete mode 100644 tools/testing/selftests/net/setup_veth.sh delete mode 100755 tools/testing/selftests/net/toeplitz.sh delete mode 100755 tools/testing/selftests/net/toeplitz_client.sh -- 2.51.1

1 month, 3 weeks

2
19
0 0

[PATCH bpf-next 2/4] selftests/bpf: Add tests for SHA hash kfuncs

by Daniel Hodges

Add selftests to validate the SHA-256, SHA-384, and SHA-512 hash kfuncs introduced in the BPF crypto subsystem. The tests verify both correct functionality and proper error handling. Test Data: All tests use the well-known NIST test vector input "abc" and validate against the standardized expected outputs for each algorithm. This ensures the BPF kfunc wrappers correctly delegate to the kernel crypto library. Signed-off-by: Daniel Hodges <git(a)danielhodges.dev> --- .../selftests/bpf/prog_tests/crypto_hash.c | 129 ++++++++++++++++++ .../testing/selftests/bpf/progs/crypto_hash.c | 83 +++++++++++ 2 files changed, 212 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/crypto_hash.c create mode 100644 tools/testing/selftests/bpf/progs/crypto_hash.c diff --git a/tools/testing/selftests/bpf/prog_tests/crypto_hash.c b/tools/testing/selftests/bpf/prog_tests/crypto_hash.c new file mode 100644 index 000000000000..4600dad693d4 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/crypto_hash.c @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include "crypto_hash.skel.h" + +/* NIST test vectors for SHA-256("abc") */ +static const unsigned char expected_sha256[32] = { + 0xba, 0x78, 0x16, 0xbf, 0x8f, 0x01, 0xcf, 0xea, + 0x41, 0x41, 0x40, 0xde, 0x5d, 0xae, 0x22, 0x23, + 0xb0, 0x03, 0x61, 0xa3, 0x96, 0x17, 0x7a, 0x9c, + 0xb4, 0x10, 0xff, 0x61, 0xf2, 0x00, 0x15, 0xad +}; + +/* NIST test vectors for SHA-384("abc") */ +static const unsigned char expected_sha384[48] = { + 0xcb, 0x00, 0x75, 0x3f, 0x45, 0xa3, 0x5e, 0x8b, + 0xb5, 0xa0, 0x3d, 0x69, 0x9a, 0xc6, 0x50, 0x07, + 0x27, 0x2c, 0x32, 0xab, 0x0e, 0xde, 0xd1, 0x63, + 0x1a, 0x8b, 0x60, 0x5a, 0x43, 0xff, 0x5b, 0xed, + 0x80, 0x86, 0x07, 0x2b, 0xa1, 0xe7, 0xcc, 0x23, + 0x58, 0xba, 0xec, 0xa1, 0x34, 0xc8, 0x25, 0xa7 +}; + +/* NIST test vectors for SHA-512("abc") */ +static const unsigned char expected_sha512[64] = { + 0xdd, 0xaf, 0x35, 0xa1, 0x93, 0x61, 0x7a, 0xba, + 0xcc, 0x41, 0x73, 0x49, 0xae, 0x20, 0x41, 0x31, + 0x12, 0xe6, 0xfa, 0x4e, 0x89, 0xa9, 0x7e, 0xa2, + 0x0a, 0x9e, 0xee, 0xe6, 0x4b, 0x55, 0xd3, 0x9a, + 0x21, 0x92, 0x99, 0x2a, 0x27, 0x4f, 0xc1, 0xa8, + 0x36, 0xba, 0x3c, 0x23, 0xa3, 0xfe, 0xeb, 0xbd, + 0x45, 0x4d, 0x44, 0x23, 0x64, 0x3c, 0xe8, 0x0e, + 0x2a, 0x9a, 0xc9, 0x4f, 0xa5, 0x4c, 0xa4, 0x9f +}; + +static void test_sha256_basic(void) +{ + struct crypto_hash *skel; + int err, prog_fd; + + LIBBPF_OPTS(bpf_test_run_opts, topts); + + skel = crypto_hash__open_and_load(); + if (!ASSERT_OK_PTR(skel, "crypto_hash__open_and_load")) + return; + + prog_fd = bpf_program__fd(skel->progs.test_sha256); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_sha256"); + ASSERT_EQ(skel->data->sha256_status, 0, "sha256_status"); + ASSERT_EQ(memcmp(skel->bss->sha256_output, expected_sha256, 32), 0, + "sha256_output_match"); + + crypto_hash__destroy(skel); +} + +static void test_sha384_basic(void) +{ + struct crypto_hash *skel; + int err, prog_fd; + + LIBBPF_OPTS(bpf_test_run_opts, topts); + + skel = crypto_hash__open_and_load(); + if (!ASSERT_OK_PTR(skel, "crypto_hash__open_and_load")) + return; + + /* Run SHA-384 test */ + prog_fd = bpf_program__fd(skel->progs.test_sha384); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_sha384"); + ASSERT_EQ(skel->data->sha384_status, 0, "sha384_status"); + ASSERT_EQ(memcmp(skel->bss->sha384_output, expected_sha384, 48), 0, + "sha384_output_match"); + + crypto_hash__destroy(skel); +} + +static void test_sha512_basic(void) +{ + struct crypto_hash *skel; + int err, prog_fd; + + LIBBPF_OPTS(bpf_test_run_opts, topts); + + skel = crypto_hash__open_and_load(); + if (!ASSERT_OK_PTR(skel, "crypto_hash__open_and_load")) + return; + + prog_fd = bpf_program__fd(skel->progs.test_sha512); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_sha512"); + ASSERT_EQ(skel->data->sha512_status, 0, "sha512_status"); + ASSERT_EQ(memcmp(skel->bss->sha512_output, expected_sha512, 64), 0, + "sha512_output_match"); + + crypto_hash__destroy(skel); +} + +static void test_sha256_invalid_params(void) +{ + struct crypto_hash *skel; + int err, prog_fd; + + LIBBPF_OPTS(bpf_test_run_opts, topts); + + skel = crypto_hash__open_and_load(); + if (!ASSERT_OK_PTR(skel, "crypto_hash__open_and_load")) + return; + + prog_fd = bpf_program__fd(skel->progs.test_sha256_zero_len); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_zero_len"); + ASSERT_EQ(skel->data->sha256_status, 0, "zero_len_rejected"); + + crypto_hash__destroy(skel); +} + +void test_crypto_hash(void) +{ + if (test__start_subtest("sha256_basic")) + test_sha256_basic(); + if (test__start_subtest("sha384_basic")) + test_sha384_basic(); + if (test__start_subtest("sha512_basic")) + test_sha512_basic(); + if (test__start_subtest("sha256_invalid_params")) + test_sha256_invalid_params(); +} diff --git a/tools/testing/selftests/bpf/progs/crypto_hash.c b/tools/testing/selftests/bpf/progs/crypto_hash.c new file mode 100644 index 000000000000..d01f23557411 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/crypto_hash.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" +#include "bpf_kfuncs.h" + +unsigned char test_input[3] = "abc"; + +/* Expected SHA-256 hash of "abc" */ +/* ba7816bf 8f01cfea 414140de 5dae2223 b00361a3 96177a9c b410ff61 f20015ad */ +unsigned char expected_sha256[32] = { + 0xba, 0x78, 0x16, 0xbf, 0x8f, 0x01, 0xcf, 0xea, + 0x41, 0x41, 0x40, 0xde, 0x5d, 0xae, 0x22, 0x23, + 0xb0, 0x03, 0x61, 0xa3, 0x96, 0x17, 0x7a, 0x9c, + 0xb4, 0x10, 0xff, 0x61, 0xf2, 0x00, 0x15, 0xad +}; + +/* Output buffers for test results */ +unsigned char sha256_output[32] = {}; +unsigned char sha384_output[48] = {}; +unsigned char sha512_output[64] = {}; + +int sha256_status = -1; +int sha384_status = -1; +int sha512_status = -1; + +/* Declare the SHA hash kfuncs */ +extern int bpf_sha256_hash(const struct bpf_dynptr *data, const struct bpf_dynptr *out) __ksym; +extern int bpf_sha384_hash(const struct bpf_dynptr *data, const struct bpf_dynptr *out) __ksym; +extern int bpf_sha512_hash(const struct bpf_dynptr *data, const struct bpf_dynptr *out) __ksym; + +SEC("syscall") +int test_sha256(void *ctx) +{ + struct bpf_dynptr input_ptr, output_ptr; + + bpf_dynptr_from_mem(test_input, sizeof(test_input), 0, &input_ptr); + bpf_dynptr_from_mem(sha256_output, sizeof(sha256_output), 0, &output_ptr); + + sha256_status = bpf_sha256_hash(&input_ptr, &output_ptr); + return 0; +} + +SEC("syscall") +int test_sha384(void *ctx) +{ + struct bpf_dynptr input_ptr, output_ptr; + + bpf_dynptr_from_mem(test_input, sizeof(test_input), 0, &input_ptr); + bpf_dynptr_from_mem(sha384_output, sizeof(sha384_output), 0, &output_ptr); + + sha384_status = bpf_sha384_hash(&input_ptr, &output_ptr); + return 0; +} + +SEC("syscall") +int test_sha512(void *ctx) +{ + struct bpf_dynptr input_ptr, output_ptr; + + bpf_dynptr_from_mem(test_input, sizeof(test_input), 0, &input_ptr); + bpf_dynptr_from_mem(sha512_output, sizeof(sha512_output), 0, &output_ptr); + + sha512_status = bpf_sha512_hash(&input_ptr, &output_ptr); + return 0; +} + +SEC("syscall") +int test_sha256_zero_len(void *ctx) +{ + struct bpf_dynptr input_ptr, output_ptr; + int ret; + + bpf_dynptr_from_mem(test_input, 0, 0, &input_ptr); + bpf_dynptr_from_mem(sha256_output, sizeof(sha256_output), 0, &output_ptr); + + ret = bpf_sha256_hash(&input_ptr, &output_ptr); + sha256_status = (ret == -22) ? 0 : ret; + return 0; +} + +char __license[] SEC("license") = "GPL"; -- 2.51.0

1 month, 3 weeks

2
1
0 0

[PATCH v6] selftests: af_unix: Add tests for ECONNRESET and EOF semantics

by Sunday Adelodun

Add selftests to verify and document Linux’s intended behaviour for UNIX domain sockets (SOCK_STREAM and SOCK_DGRAM) when a peer closes. The tests verify that: 1. SOCK_STREAM returns EOF when the peer closes normally. 2. SOCK_STREAM returns ECONNRESET if the peer closes with unread data. 3. SOCK_SEQPACKET returns EOF when the peer closes normally. 4. SOCK_SEQPACKET returns ECONNRESET if the peer closes with unread data. 5. SOCK_DGRAM does not return ECONNRESET when the peer closes. This follows up on review feedback suggesting a selftest to clarify Linux’s semantics. Suggested-by: Kuniyuki Iwashima <kuniyu(a)google.com> Signed-off-by: Sunday Adelodun <adelodunolaoluwa(a)yahoo.com> --- changelog: changes from v5 to v6 - Remove the not-needed check for self->child > 0 in the FIXTURE_TEARDOWN changes from v4 to v5: 1. Moved the send() call before the socket type check in Test 2 to ensure the unread data behavior is tested for SOCK_DGRAM as well. 2. Removed the misleading commend about accept() for clarity. 3. Applied indentation fixes for style consistency (alignment with open parenthesis). 4. Minor comment and formatting cleanups for clarity and adherence to kernel coding style. tools/testing/selftests/net/.gitignore | 1 + tools/testing/selftests/net/af_unix/Makefile | 1 + .../selftests/net/af_unix/unix_connreset.c | 177 ++++++++++++++++++ 3 files changed, 179 insertions(+) create mode 100644 tools/testing/selftests/net/af_unix/unix_connreset.c diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 439101b518ee..e89a60581a13 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -65,3 +65,4 @@ udpgso udpgso_bench_rx udpgso_bench_tx unix_connect +unix_connreset diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index de805cbbdf69..5826a8372451 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -7,6 +7,7 @@ TEST_GEN_PROGS := \ scm_pidfd \ scm_rights \ unix_connect \ + unix_connreset \ # end of TEST_GEN_PROGS include ../../lib.mk diff --git a/tools/testing/selftests/net/af_unix/unix_connreset.c b/tools/testing/selftests/net/af_unix/unix_connreset.c new file mode 100644 index 000000000000..bffef2b54bfd --- /dev/null +++ b/tools/testing/selftests/net/af_unix/unix_connreset.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Selftest for AF_UNIX socket close and ECONNRESET behaviour. + * + * This test verifies: + * 1. SOCK_STREAM returns EOF when the peer closes normally. + * 2. SOCK_STREAM returns ECONNRESET if peer closes with unread data. + * 3. SOCK_SEQPACKET returns EOF when the peer closes normally. + * 4. SOCK_SEQPACKET returns ECONNRESET if the peer closes with unread data. + * 5. SOCK_DGRAM does not return ECONNRESET when the peer closes. + * + * These tests document the intended Linux behaviour. + * + */ + +#define _GNU_SOURCE +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <unistd.h> +#include <errno.h> +#include <sys/socket.h> +#include <sys/un.h> +#include "../../kselftest_harness.h" + +#define SOCK_PATH "/tmp/af_unix_connreset.sock" + +static void remove_socket_file(void) +{ + unlink(SOCK_PATH); +} + +FIXTURE(unix_sock) +{ + int server; + int client; + int child; +}; + +FIXTURE_VARIANT(unix_sock) +{ + int socket_type; + const char *name; +}; + +FIXTURE_VARIANT_ADD(unix_sock, stream) { + .socket_type = SOCK_STREAM, + .name = "SOCK_STREAM", +}; + +FIXTURE_VARIANT_ADD(unix_sock, dgram) { + .socket_type = SOCK_DGRAM, + .name = "SOCK_DGRAM", +}; + +FIXTURE_VARIANT_ADD(unix_sock, seqpacket) { + .socket_type = SOCK_SEQPACKET, + .name = "SOCK_SEQPACKET", +}; + +FIXTURE_SETUP(unix_sock) +{ + struct sockaddr_un addr = {}; + int err; + + addr.sun_family = AF_UNIX; + strcpy(addr.sun_path, SOCK_PATH); + remove_socket_file(); + + self->server = socket(AF_UNIX, variant->socket_type, 0); + ASSERT_LT(-1, self->server); + + err = bind(self->server, (struct sockaddr *)&addr, sizeof(addr)); + ASSERT_EQ(0, err); + + if (variant->socket_type == SOCK_STREAM || + variant->socket_type == SOCK_SEQPACKET) { + err = listen(self->server, 1); + ASSERT_EQ(0, err); + } + + self->client = socket(AF_UNIX, variant->socket_type | SOCK_NONBLOCK, 0); + ASSERT_LT(-1, self->client); + + err = connect(self->client, (struct sockaddr *)&addr, sizeof(addr)); + ASSERT_EQ(0, err); +} + +FIXTURE_TEARDOWN(unix_sock) +{ + if (variant->socket_type == SOCK_STREAM || + variant->socket_type == SOCK_SEQPACKET) + close(self->child); + + close(self->client); + close(self->server); + remove_socket_file(); +} + +/* Test 1: peer closes normally */ +TEST_F(unix_sock, eof) +{ + char buf[16] = {}; + ssize_t n; + + if (variant->socket_type == SOCK_STREAM || + variant->socket_type == SOCK_SEQPACKET) { + self->child = accept(self->server, NULL, NULL); + ASSERT_LT(-1, self->child); + + close(self->child); + } else { + close(self->server); + } + + n = recv(self->client, buf, sizeof(buf), 0); + + if (variant->socket_type == SOCK_STREAM || + variant->socket_type == SOCK_SEQPACKET) { + ASSERT_EQ(0, n); + } else { + ASSERT_EQ(-1, n); + ASSERT_EQ(EAGAIN, errno); + } +} + +/* Test 2: peer closes with unread data */ +TEST_F(unix_sock, reset_unread_behavior) +{ + char buf[16] = {}; + ssize_t n; + + /* Send data that will remain unread */ + send(self->client, "hello", 5, 0); + + if (variant->socket_type == SOCK_DGRAM) { + /* No real connection, just close the server */ + close(self->server); + } else { + self->child = accept(self->server, NULL, NULL); + ASSERT_LT(-1, self->child); + + /* Peer closes before client reads */ + close(self->child); + } + + n = recv(self->client, buf, sizeof(buf), 0); + ASSERT_EQ(-1, n); + + if (variant->socket_type == SOCK_STREAM || + variant->socket_type == SOCK_SEQPACKET) { + ASSERT_EQ(ECONNRESET, errno); + } else { + ASSERT_EQ(EAGAIN, errno); + } +} + +/* Test 3: closing unaccepted (embryo) server socket should reset client. */ +TEST_F(unix_sock, reset_closed_embryo) +{ + char buf[16] = {}; + ssize_t n; + + if (variant->socket_type == SOCK_DGRAM) + SKIP(return, "This test only applies to SOCK_STREAM and SOCK_SEQPACKET"); + + /* Close server without accept()ing */ + close(self->server); + + n = recv(self->client, buf, sizeof(buf), 0); + + ASSERT_EQ(-1, n); + ASSERT_EQ(ECONNRESET, errno); +} + +TEST_HARNESS_MAIN + -- 2.43.0

1 month, 3 weeks

2
1
0 0

[PATCH net-next 0/8] selftests: mptcp: counter cache & stats before timeout

by Matthieu Baerts (NGI0)

Here are a bunch of small improvements to the MPTCP selftests: - Patch 1: move code to mptcp_lib.sh to prepare the new features. - Patch 2: simplify mptcp_lib_pr_err_stats helper use. - Patch 3: remove unused last column from nstat output. - Patch 4: improve stats dump in mptcp_join.sh. - Patch 5: get counters from nstat history and simplify mptcp_connect.sh. - Patch 6: avoid taking the same packet trace twice. - Patch 7: wait for an event instead of a fix time. - Patch 8: instead of using 'timeout' and print the stats after, another internal timeout is used: if it fires, it will print stats, then stop everything. This avoids confusions around stats in case of timeout. Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org> --- Matthieu Baerts (NGI0) (8): selftests: mptcp: lib: introduce 'nstat_{init,get}' selftests: mptcp: lib: remove stats files args selftests: mptcp: lib: stats: remove nstat rate columns selftests: mptcp: join: dump stats from history selftests: mptcp: lib: get counters from nstat history selftests: mptcp: connect: avoid double packet traces selftests: mptcp: wait for port instead of sleep selftests: mptcp: get stats just before timing out tools/testing/selftests/net/mptcp/mptcp_connect.sh | 140 ++++++++++----------- tools/testing/selftests/net/mptcp/mptcp_join.sh | 65 +++++----- tools/testing/selftests/net/mptcp/mptcp_lib.sh | 58 +++++++-- tools/testing/selftests/net/mptcp/mptcp_sockopt.sh | 43 ++++--- tools/testing/selftests/net/mptcp/simult_flows.sh | 44 ++++--- tools/testing/selftests/net/mptcp/userspace_pm.sh | 3 +- 6 files changed, 203 insertions(+), 150 deletions(-) --- base-commit: df58ee7d8faf353ebf5d4703c35fcf3e578e9b1b change-id: 20251114-net-next-mptcp-sft-count-cache-stats-timeout-faa64482db92 Best regards, -- Matthieu Baerts (NGI0) <matttbe(a)kernel.org>

1 month, 3 weeks

2
9
0 0

[PATCH] rust: allow `clippy::disallowed_names` for doctests

by Miguel Ojeda

Examples (i.e. doctests) may want to use names such as `foo`, thus the `clippy::disallowed_names` lint gets in the way. Thus allow it for all doctests. In addition, remove it from the existing `expect`s we have in a few doctests. This does not mean that we should stop trying to find good names for our examples, though. Suggested-by: Alice Ryhl <aliceryhl(a)google.com> Link: https://lore.kernel.org/rust-for-linux/aRHSLChi5HYXW4-9@google.com/ Signed-off-by: Miguel Ojeda <ojeda(a)kernel.org> --- rust/kernel/init.rs | 3 +-- rust/kernel/types.rs | 1 - scripts/rustdoc_test_gen.rs | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs index e476d81c1a27..899b9a962762 100644 --- a/rust/kernel/init.rs +++ b/rust/kernel/init.rs @@ -30,7 +30,7 @@ //! ## General Examples //! //! ```rust -//! # #![expect(clippy::disallowed_names, clippy::undocumented_unsafe_blocks)] +//! # #![expect(clippy::undocumented_unsafe_blocks)] //! use kernel::types::Opaque; //! use pin_init::pin_init_from_closure; //! @@ -67,7 +67,6 @@ //! ``` //! //! ```rust -//! # #![expect(clippy::disallowed_names)] //! use kernel::{prelude::*, types::Opaque}; //! use core::{ptr::addr_of_mut, marker::PhantomPinned, pin::Pin}; //! # mod bindings { diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs index 835824788506..9c5e7dbf1632 100644 --- a/rust/kernel/types.rs +++ b/rust/kernel/types.rs @@ -289,7 +289,6 @@ fn drop(&mut self) { /// # Examples /// /// ``` -/// # #![expect(clippy::disallowed_names)] /// use kernel::types::Opaque; /// # // Emulate a C struct binding which is from C, maybe uninitialized or not, only the C side /// # // knows. diff --git a/scripts/rustdoc_test_gen.rs b/scripts/rustdoc_test_gen.rs index 0e6a0542d1bd..be0561049660 100644 --- a/scripts/rustdoc_test_gen.rs +++ b/scripts/rustdoc_test_gen.rs @@ -208,7 +208,7 @@ macro_rules! assert_eq {{ #[allow(unused)] static __DOCTEST_ANCHOR: i32 = ::core::line!() as i32 + {body_offset} + 1; {{ - #![allow(unreachable_pub)] + #![allow(unreachable_pub, clippy::disallowed_names)] {body} main(); }} -- 2.51.2

1 month, 3 weeks

5
9
0 0

[PATCH bpf-next 4/4] selftests/bpf: Add tests for ECDSA signature verification kfuncs

by Daniel Hodges

Add selftests to validate the ECDSA signature verification kfuncs introduced in the BPF crypto subsystem. The tests verify both valid signature acceptance and invalid signature rejection using the context-based ECDSA API. The tests use RFC 6979 test vectors for NIST P-256 (secp256r1) with well-known valid signatures. The algorithm "p1363(ecdsa-nist-p256)" is used to handle standard r||s signature format. Signed-off-by: Daniel Hodges <git(a)danielhodges.dev> --- .../selftests/bpf/prog_tests/ecdsa_verify.c | 96 ++++++++ .../selftests/bpf/progs/ecdsa_verify.c | 228 ++++++++++++++++++ 2 files changed, 324 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/ecdsa_verify.c create mode 100644 tools/testing/selftests/bpf/progs/ecdsa_verify.c diff --git a/tools/testing/selftests/bpf/prog_tests/ecdsa_verify.c b/tools/testing/selftests/bpf/prog_tests/ecdsa_verify.c new file mode 100644 index 000000000000..d1f83ce08ad6 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/ecdsa_verify.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include "ecdsa_verify.skel.h" + +static void test_ecdsa_verify_valid_signature(void) +{ + struct ecdsa_verify *skel; + int err, prog_fd; + + LIBBPF_OPTS(bpf_test_run_opts, topts); + + skel = ecdsa_verify__open_and_load(); + if (!ASSERT_OK_PTR(skel, "ecdsa_verify__open_and_load")) + return; + + prog_fd = bpf_program__fd(skel->progs.test_ecdsa_verify_valid); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_ecdsa_verify_valid"); + ASSERT_EQ(skel->data->ctx_create_status, 0, "ctx_create_status"); + ASSERT_EQ(skel->data->verify_result, 0, "verify_valid_signature"); + + ecdsa_verify__destroy(skel); +} + +static void test_ecdsa_verify_invalid_signature(void) +{ + struct ecdsa_verify *skel; + int err, prog_fd; + + LIBBPF_OPTS(bpf_test_run_opts, topts); + + skel = ecdsa_verify__open_and_load(); + if (!ASSERT_OK_PTR(skel, "ecdsa_verify__open_and_load")) + return; + + prog_fd = bpf_program__fd(skel->progs.test_ecdsa_verify_invalid); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_ecdsa_verify_invalid"); + ASSERT_NEQ(skel->data->verify_invalid_result, 0, "verify_invalid_signature_rejected"); + + ecdsa_verify__destroy(skel); +} + +static void test_ecdsa_sign_and_verify(void) +{ + struct ecdsa_verify *skel; + int err, prog_fd; + + LIBBPF_OPTS(bpf_test_run_opts, topts); + + skel = ecdsa_verify__open_and_load(); + if (!ASSERT_OK_PTR(skel, "ecdsa_verify__open_and_load")) + return; + + prog_fd = bpf_program__fd(skel->progs.test_ecdsa_sign_verify); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_ecdsa_sign_verify"); + ASSERT_GT(skel->data->sign_result, 0, "sign_returns_signature_size"); + ASSERT_EQ(skel->data->sign_verify_result, 0, "verify_generated_signature"); + + ecdsa_verify__destroy(skel); +} + +static void test_ecdsa_size_queries(void) +{ + struct ecdsa_verify *skel; + int err, prog_fd; + + LIBBPF_OPTS(bpf_test_run_opts, topts); + + skel = ecdsa_verify__open_and_load(); + if (!ASSERT_OK_PTR(skel, "ecdsa_verify__open_and_load")) + return; + + prog_fd = bpf_program__fd(skel->progs.test_ecdsa_size_queries); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_ecdsa_size_queries"); + ASSERT_EQ(skel->data->keysize_result, 256, "keysize_p256"); + ASSERT_EQ(skel->data->digestsize_result, 64, "digestsize_p256"); + ASSERT_EQ(skel->data->maxsize_result, 64, "maxsize_p256"); + + ecdsa_verify__destroy(skel); +} + +void test_ecdsa_verify(void) +{ + if (test__start_subtest("verify_valid_signature")) + test_ecdsa_verify_valid_signature(); + if (test__start_subtest("verify_invalid_signature")) + test_ecdsa_verify_invalid_signature(); + if (test__start_subtest("sign_and_verify")) + test_ecdsa_sign_and_verify(); + if (test__start_subtest("size_queries")) + test_ecdsa_size_queries(); +} diff --git a/tools/testing/selftests/bpf/progs/ecdsa_verify.c b/tools/testing/selftests/bpf/progs/ecdsa_verify.c new file mode 100644 index 000000000000..fb24baf6c437 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/ecdsa_verify.c @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +struct bpf_ecdsa_ctx; +extern struct bpf_ecdsa_ctx * +bpf_ecdsa_ctx_create(const struct bpf_dynptr *algo_name, + const struct bpf_dynptr *public_key, int *err) __ksym; +extern struct bpf_ecdsa_ctx * +bpf_ecdsa_ctx_create_with_privkey(const struct bpf_dynptr *algo_name, + const struct bpf_dynptr *private_key, int *err) __ksym; +extern int bpf_ecdsa_verify(struct bpf_ecdsa_ctx *ctx, + const struct bpf_dynptr *message, + const struct bpf_dynptr *signature) __ksym; +extern int bpf_ecdsa_sign(struct bpf_ecdsa_ctx *ctx, + const struct bpf_dynptr *message, + const struct bpf_dynptr *signature) __ksym; +extern int bpf_ecdsa_keysize(struct bpf_ecdsa_ctx *ctx) __ksym; +extern int bpf_ecdsa_digestsize(struct bpf_ecdsa_ctx *ctx) __ksym; +extern int bpf_ecdsa_maxsize(struct bpf_ecdsa_ctx *ctx) __ksym; +extern void bpf_ecdsa_ctx_release(struct bpf_ecdsa_ctx *ctx) __ksym; + +/* NIST P-256 test vector + * This is a known valid ECDSA signature for testing purposes + */ + +/* Algorithm name for P-256 with p1363 format (standard r||s signature) */ +char algo_p256[] = "p1363(ecdsa-nist-p256)"; + +/* Public key in uncompressed format: 0x04 || x || y (65 bytes) */ +unsigned char pubkey_p256[65] = { + 0x04, /* Uncompressed point indicator */ + /* X coordinate (32 bytes) */ + 0x60, 0xfe, 0xd4, 0xba, 0x25, 0x5a, 0x9d, 0x31, + 0xc9, 0x61, 0xeb, 0x74, 0xc6, 0x35, 0x6d, 0x68, + 0xc0, 0x49, 0xb8, 0x92, 0x3b, 0x61, 0xfa, 0x6c, + 0xe6, 0x69, 0x62, 0x2e, 0x60, 0xf2, 0x9f, 0xb6, + /* Y coordinate (32 bytes) */ + 0x79, 0x03, 0xfe, 0x10, 0x08, 0xb8, 0xbc, 0x99, + 0xa4, 0x1a, 0xe9, 0xe9, 0x56, 0x28, 0xbc, 0x64, + 0xf2, 0xf1, 0xb2, 0x0c, 0x2d, 0x7e, 0x9f, 0x51, + 0x77, 0xa3, 0xc2, 0x94, 0xd4, 0x46, 0x22, 0x99 +}; + +/* Message hash (32 bytes) - SHA-256 of "sample" */ +unsigned char message_hash[32] = { + 0xaf, 0x2b, 0xdb, 0xe1, 0xaa, 0x9b, 0x6e, 0xc1, + 0xe2, 0xad, 0xe1, 0xd6, 0x94, 0xf4, 0x1f, 0xc7, + 0x1a, 0x83, 0x1d, 0x02, 0x68, 0xe9, 0x89, 0x15, + 0x62, 0x11, 0x3d, 0x8a, 0x62, 0xad, 0xd1, 0xbf +}; + +/* Valid signature r || s (64 bytes) */ +unsigned char valid_signature[64] = { + /* r component (32 bytes) */ + 0xef, 0xd4, 0x8b, 0x2a, 0xac, 0xb6, 0xa8, 0xfd, + 0x11, 0x40, 0xdd, 0x9c, 0xd4, 0x5e, 0x81, 0xd6, + 0x9d, 0x2c, 0x87, 0x7b, 0x56, 0xaa, 0xf9, 0x91, + 0xc3, 0x4d, 0x0e, 0xa8, 0x4e, 0xaf, 0x37, 0x16, + /* s component (32 bytes) */ + 0xf7, 0xcb, 0x1c, 0x94, 0x2d, 0x65, 0x7c, 0x41, + 0xd4, 0x36, 0xc7, 0xa1, 0xb6, 0xe2, 0x9f, 0x65, + 0xf3, 0xe9, 0x00, 0xdb, 0xb9, 0xaf, 0xf4, 0x06, + 0x4d, 0xc4, 0xab, 0x2f, 0x84, 0x3a, 0xcd, 0xa8 +}; + +/* Invalid signature (modified r component) for negative test */ +unsigned char invalid_signature[64] = { + /* r component (32 bytes) - first byte modified */ + 0xff, 0xd4, 0x8b, 0x2a, 0xac, 0xb6, 0xa8, 0xfd, + 0x11, 0x40, 0xdd, 0x9c, 0xd4, 0x5e, 0x81, 0xd6, + 0x9d, 0x2c, 0x87, 0x7b, 0x56, 0xaa, 0xf9, 0x91, + 0xc3, 0x4d, 0x0e, 0xa8, 0x4e, 0xaf, 0x37, 0x16, + /* s component (32 bytes) */ + 0xf7, 0xcb, 0x1c, 0x94, 0x2d, 0x65, 0x7c, 0x41, + 0xd4, 0x36, 0xc7, 0xa1, 0xb6, 0xe2, 0x9f, 0x65, + 0xf3, 0xe9, 0x00, 0xdb, 0xb9, 0xaf, 0xf4, 0x06, + 0x4d, 0xc4, 0xab, 0x2f, 0x84, 0x3a, 0xcd, 0xa8 +}; + +/* Private key for signing (32 bytes) - matches the public key above */ +unsigned char privkey_p256[32] = { + 0xc9, 0xaf, 0xa9, 0xd8, 0x45, 0xba, 0x75, 0x16, + 0x6b, 0x5c, 0x21, 0x57, 0x67, 0xb1, 0xd6, 0x93, + 0x4e, 0x50, 0xc3, 0xdb, 0x36, 0xe8, 0x9b, 0x12, + 0x7b, 0x8a, 0x62, 0x2b, 0x12, 0x0f, 0x67, 0x21 +}; + +/* Test results */ +int verify_result = -1; +int verify_invalid_result = -1; +int ctx_create_status = -1; +int sign_result = -1; +int sign_verify_result = -1; +int keysize_result = -1; +int digestsize_result = -1; +int maxsize_result = -1; +unsigned char generated_signature[64] = {0}; + +SEC("syscall") +int test_ecdsa_verify_valid(void *ctx) +{ + struct bpf_ecdsa_ctx *ecdsa_ctx; + struct bpf_dynptr algo_ptr, key_ptr, msg_ptr, sig_ptr; + int err = 0; + + bpf_dynptr_from_mem(algo_p256, sizeof(algo_p256) - 1, 0, &algo_ptr); + bpf_dynptr_from_mem(pubkey_p256, sizeof(pubkey_p256), 0, &key_ptr); + + ecdsa_ctx = bpf_ecdsa_ctx_create(&algo_ptr, &key_ptr, &err); + if (!ecdsa_ctx) { + ctx_create_status = err; + return 0; + } + ctx_create_status = 0; + + bpf_dynptr_from_mem(message_hash, sizeof(message_hash), 0, &msg_ptr); + bpf_dynptr_from_mem(valid_signature, sizeof(valid_signature), 0, &sig_ptr); + + verify_result = bpf_ecdsa_verify(ecdsa_ctx, &msg_ptr, &sig_ptr); + + bpf_ecdsa_ctx_release(ecdsa_ctx); + + return 0; +} + +SEC("syscall") +int test_ecdsa_verify_invalid(void *ctx) +{ + struct bpf_ecdsa_ctx *ecdsa_ctx; + struct bpf_dynptr algo_ptr, key_ptr, msg_ptr, sig_ptr; + int err = 0; + + bpf_dynptr_from_mem(algo_p256, sizeof(algo_p256) - 1, 0, &algo_ptr); + bpf_dynptr_from_mem(pubkey_p256, sizeof(pubkey_p256), 0, &key_ptr); + + ecdsa_ctx = bpf_ecdsa_ctx_create(&algo_ptr, &key_ptr, &err); + if (!ecdsa_ctx) + return 0; + + bpf_dynptr_from_mem(message_hash, sizeof(message_hash), 0, &msg_ptr); + bpf_dynptr_from_mem(invalid_signature, sizeof(invalid_signature), 0, &sig_ptr); + + verify_invalid_result = bpf_ecdsa_verify(ecdsa_ctx, &msg_ptr, &sig_ptr); + + bpf_ecdsa_ctx_release(ecdsa_ctx); + + return 0; +} + +SEC("syscall") +int test_ecdsa_sign_verify(void *ctx) +{ + struct bpf_ecdsa_ctx *sign_ctx, *verify_ctx; + struct bpf_dynptr algo_ptr, privkey_ptr, pubkey_ptr, msg_ptr, sig_ptr; + int err = 0; + + /* Create signing context with private key */ + bpf_dynptr_from_mem(algo_p256, sizeof(algo_p256) - 1, 0, &algo_ptr); + bpf_dynptr_from_mem(privkey_p256, sizeof(privkey_p256), 0, &privkey_ptr); + + sign_ctx = bpf_ecdsa_ctx_create_with_privkey(&algo_ptr, &privkey_ptr, &err); + if (!sign_ctx) { + sign_result = err; + return 0; + } + + /* Sign the message */ + bpf_dynptr_from_mem(message_hash, sizeof(message_hash), 0, &msg_ptr); + bpf_dynptr_from_mem(generated_signature, sizeof(generated_signature), 0, &sig_ptr); + + sign_result = bpf_ecdsa_sign(sign_ctx, &msg_ptr, &sig_ptr); + + bpf_ecdsa_ctx_release(sign_ctx); + + /* If signing succeeded, verify the generated signature */ + if (sign_result > 0 && sign_result <= (int)sizeof(generated_signature)) { + unsigned int sig_size; + + /* Explicitly bound the value for the verifier */ + sig_size = sign_result & 0x3F; /* Max 64 bytes */ + + bpf_dynptr_from_mem(algo_p256, sizeof(algo_p256) - 1, 0, &algo_ptr); + bpf_dynptr_from_mem(pubkey_p256, sizeof(pubkey_p256), 0, &pubkey_ptr); + + verify_ctx = bpf_ecdsa_ctx_create(&algo_ptr, &pubkey_ptr, &err); + if (!verify_ctx) { + sign_verify_result = err; + return 0; + } + + bpf_dynptr_from_mem(message_hash, sizeof(message_hash), 0, &msg_ptr); + bpf_dynptr_from_mem(generated_signature, sig_size, 0, &sig_ptr); + + sign_verify_result = bpf_ecdsa_verify(verify_ctx, &msg_ptr, &sig_ptr); + + bpf_ecdsa_ctx_release(verify_ctx); + } + + return 0; +} + +SEC("syscall") +int test_ecdsa_size_queries(void *ctx) +{ + struct bpf_ecdsa_ctx *ecdsa_ctx; + struct bpf_dynptr algo_ptr, key_ptr; + int err = 0; + + bpf_dynptr_from_mem(algo_p256, sizeof(algo_p256) - 1, 0, &algo_ptr); + bpf_dynptr_from_mem(pubkey_p256, sizeof(pubkey_p256), 0, &key_ptr); + + ecdsa_ctx = bpf_ecdsa_ctx_create(&algo_ptr, &key_ptr, &err); + if (!ecdsa_ctx) + return 0; + + keysize_result = bpf_ecdsa_keysize(ecdsa_ctx); + digestsize_result = bpf_ecdsa_digestsize(ecdsa_ctx); + maxsize_result = bpf_ecdsa_maxsize(ecdsa_ctx); + + bpf_ecdsa_ctx_release(ecdsa_ctx); + + return 0; +} + +char __license[] SEC("license") = "GPL"; -- 2.51.0

1 month, 3 weeks

1
0
0 0

[PATCH v2] cpuset: relax the overlap check for cgroup-v2

by Sun Shaojie

In cgroup v2, a mutual overlap check is required when at least one of two cpusets is exclusive. However, this check should be relaxed and limited to cases where both cpusets are exclusive. The table 1 shows the partition states of A1 and B1 after each step before applying this patch. Table 1: Before applying the patch Step | A1's prstate | B1's prstate | #1> mkdir -p A1 | member | | #2> echo "0-1" > A1/cpuset.cpus | member | | #3> echo "root" > A1/cpuset.cpus.partition | root | | #4> mkdir -p B1 | root | member | #5> echo "0-3" > B1/cpuset.cpus | root invalid | member | #6> echo "root" > B1/cpuset.cpus.partition | root invalid | root invalid | After step #5, A1 changes from "root" to "root invalid" because its CPUs (0-1) overlap with those requested by B1 (0-3). However, B1 can actually use CPUs 2-3, so it would be more reasonable for A1 to remain as "root." This patch relaxes the exclusive cpuset check for cgroup v2 while preserving the current cgroup v1 behavior. Signed-off-by: Sun Shaojie <sunshaojie(a)kylinos.cn> --- v1 -> v2: - Keeps the current cgroup v1 behavior unchanged - Link: https://lore.kernel.org/cgroups/c8e234f4-2c27-4753-8f39-8ae83197efd3@redhat… --- kernel/cgroup/cpuset.c | 9 +++++++-- tools/testing/selftests/cgroup/test_cpuset_prs.sh | 10 +++++----- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 52468d2c178a..3240b3ab5998 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -592,8 +592,13 @@ static inline bool cpusets_are_exclusive(struct cpuset *cs1, struct cpuset *cs2) */ static inline bool cpus_excl_conflict(struct cpuset *cs1, struct cpuset *cs2) { - /* If either cpuset is exclusive, check if they are mutually exclusive */ - if (is_cpu_exclusive(cs1) || is_cpu_exclusive(cs2)) + /* If both cpusets are exclusive, check if they are mutually exclusive */ + if (is_cpu_exclusive(cs1) && is_cpu_exclusive(cs2)) + return !cpusets_are_exclusive(cs1, cs2); + + /* In cgroup-v1, if either cpuset is exclusive, check if they are mutually exclusive */ + if (!is_in_v2_mode() && + (is_cpu_exclusive(cs1) != is_cpu_exclusive(cs2))) return !cpusets_are_exclusive(cs1, cs2); /* Exclusive_cpus cannot intersect */ diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index a17256d9f88a..903dddfe88d7 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -269,7 +269,7 @@ TEST_MATRIX=( " C0-3:S+ C1-3:S+ C2-3 . X2-3 X3:P2 . . 0 A1:0-2|A2:3|A3:3 A1:P0|A2:P2 3" " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3" " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 C2-3 . . . P2 0 A1:0-3|A2:1-3|A3:2-3|B1:2-3 A1:P0|A3:P0|B1:P-2" + " C0-3:S+ C1-3:S+ C2-3 C2-3 . . . P2 0 A1:0-1|A2:1|A3:1|B1:2-3 A1:P0|A3:P0|B1:P2 2-3" " C0-3:S+ C1-3:S+ C2-3 C4-5 . . . P2 0 B1:4-5 B1:P2 4-5" " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4" " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2:C1-3 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4" @@ -318,7 +318,7 @@ TEST_MATRIX=( # Invalid to valid local partition direct transition tests " C1-3:S+:P2 X4:P2 . . . . . . 0 A1:1-3|XA1:1-3|A2:1-3:XA2: A1:P2|A2:P-2 1-3" " C1-3:S+:P2 X4:P2 . . . X3:P2 . . 0 A1:1-2|XA1:1-3|A2:3:XA2:3 A1:P2|A2:P2 1-3" - " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4|B1:4-6 A1:P-2|B1:P0" + " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4|B1:5-6 A1:P2|B1:P0 0-4" " C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3|B1:4-6 A1:P2|B1:P0 0-3" # Local partition invalidation tests @@ -388,10 +388,10 @@ TEST_MATRIX=( " C0-1:S+ C1 . C2-3 . P2 . . 0 A1:0-1|A2:1 A1:P0|A2:P-2" " C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0|A2:1 A1:P1|A2:P2 0-1|1" - # A non-exclusive cpuset.cpus change will invalidate partition and its siblings - " C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P-1|B1:P0" + # A non-exclusive cpuset.cpus change will not invalidate partition and its siblings + " C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2|B1:3 A1:P1|B1:P0" " C0-1:P1 . . P1:C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P-1|B1:P-1" - " C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P0|B1:P-1" + " C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-1|B1:2-3 A1:P0|B1:P1" # cpuset.cpus can overlap with sibling cpuset.cpus.exclusive but not subsumed by it " C0-3 . . C4-5 X5 . . . 0 A1:0-3|B1:4-5" -- 2.25.1

1 month, 3 weeks

4
17
0 0

[PATCH] selftests/mm: fix division-by-zero in uffd-unit-tests

by Carlos Llamas

Commit 4dfd4bba8578 ("selftests/mm/uffd: refactor non-composite global vars into struct") moved some of the operations previously implemented in uffd_setup_environment() earlier in the main test loop. The calculation of nr_pages, which involves a division by page_size, now occurs before checking that default_huge_page_size() returns a non-zero This leads to a division-by-zero error on systems with !CONFIG_HUGETLB. Fix this by relocating the non-zero page_size check before the nr_pages calculation, as it was originally implemented. Cc: stable(a)vger.kernel.org Fixes: 4dfd4bba8578 ("selftests/mm/uffd: refactor non-composite global vars into struct") Signed-off-by: Carlos Llamas <cmllamas(a)google.com> --- tools/testing/selftests/mm/uffd-unit-tests.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index 9e3be2ee7f1b..f917b4c4c943 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -1758,10 +1758,15 @@ int main(int argc, char *argv[]) uffd_test_ops = mem_type->mem_ops; uffd_test_case_ops = test->test_case_ops; - if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB)) + if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB)) { gopts.page_size = default_huge_page_size(); - else + if (gopts.page_size == 0) { + uffd_test_skip("huge page size is 0, feature missing?"); + continue; + } + } else { gopts.page_size = psize(); + } /* Ensure we have at least 2 pages */ gopts.nr_pages = MAX(UFFD_TEST_MEM_SIZE, gopts.page_size * 2) @@ -1776,12 +1781,6 @@ int main(int argc, char *argv[]) continue; uffd_test_start("%s on %s", test->name, mem_type->name); - if ((mem_type->mem_flag == MEM_HUGETLB || - mem_type->mem_flag == MEM_HUGETLB_PRIVATE) && - (default_huge_page_size() == 0)) { - uffd_test_skip("huge page size is 0, feature missing?"); - continue; - } if (!uffd_feature_supported(test)) { uffd_test_skip("feature missing"); continue; -- 2.51.2.1041.gc1ab5b90ca-goog

1 month, 3 weeks

6
9
0 0

[PATCH v4 2/2] selftests/riscv: Add Zicbop prefetch test

by Yao Zihong

Add selftests to cbo.c to verify Zicbop extension behavior, and split the previous `--sigill` mode into two options so they can be tested independently. The test checks: - That hwprobe correctly reports Zicbop presence and block size. - That prefetch instructions execute without exception on valid and NULL addresses when Zicbop is present. Signed-off-by: Yao Zihong <zihong.plct(a)isrc.iscas.ac.cn> --- tools/testing/selftests/riscv/hwprobe/cbo.c | 168 ++++++++++++++++---- 1 file changed, 139 insertions(+), 29 deletions(-) diff --git a/tools/testing/selftests/riscv/hwprobe/cbo.c b/tools/testing/selftests/riscv/hwprobe/cbo.c index 5e96ef785d0d..281ab440e696 100644 --- a/tools/testing/selftests/riscv/hwprobe/cbo.c +++ b/tools/testing/selftests/riscv/hwprobe/cbo.c @@ -15,24 +15,31 @@ #include <linux/compiler.h> #include <linux/kernel.h> #include <asm/ucontext.h> +#include <getopt.h> #include "hwprobe.h" #include "../../kselftest.h" #define MK_CBO(fn) le32_bswap((uint32_t)(fn) << 20 | 10 << 15 | 2 << 12 | 0 << 7 | 15) +#define MK_PREFETCH(fn) \ + le32_bswap(0 << 25 | (uint32_t)(fn) << 20 | 10 << 15 | 6 << 12 | 0 << 7 | 19) static char mem[4096] __aligned(4096) = { [0 ... 4095] = 0xa5 }; -static bool illegal_insn; +static bool got_fault; -static void sigill_handler(int sig, siginfo_t *info, void *context) +static void fault_handler(int sig, siginfo_t *info, void *context) { unsigned long *regs = (unsigned long *)&((ucontext_t *)context)->uc_mcontext; uint32_t insn = *(uint32_t *)regs[0]; - assert(insn == MK_CBO(regs[11])); + if (sig == SIGILL) + assert(insn == MK_CBO(regs[11])); - illegal_insn = true; + if (sig == SIGSEGV || sig == SIGBUS) + assert(insn == MK_PREFETCH(regs[11])); + + got_fault = true; regs[0] += 4; } @@ -45,39 +52,51 @@ static void sigill_handler(int sig, siginfo_t *info, void *context) : : "r" (base), "i" (fn), "i" (MK_CBO(fn)) : "a0", "a1", "memory"); \ }) +#define prefetch_insn(base, fn) \ +({ \ + asm volatile( \ + "mv a0, %0\n" \ + "li a1, %1\n" \ + ".4byte %2\n" \ + : : "r" (base), "i" (fn), "i" (MK_PREFETCH(fn)) : "a0", "a1"); \ +}) + static void cbo_inval(char *base) { cbo_insn(base, 0); } static void cbo_clean(char *base) { cbo_insn(base, 1); } static void cbo_flush(char *base) { cbo_insn(base, 2); } static void cbo_zero(char *base) { cbo_insn(base, 4); } +static void prefetch_i(char *base) { prefetch_insn(base, 0); } +static void prefetch_r(char *base) { prefetch_insn(base, 1); } +static void prefetch_w(char *base) { prefetch_insn(base, 3); } static void test_no_cbo_inval(void *arg) { ksft_print_msg("Testing cbo.inval instruction remain privileged\n"); - illegal_insn = false; + got_fault = false; cbo_inval(&mem[0]); - ksft_test_result(illegal_insn, "No cbo.inval\n"); + ksft_test_result(got_fault, "No cbo.inval\n"); } static void test_no_zicbom(void *arg) { ksft_print_msg("Testing Zicbom instructions remain privileged\n"); - illegal_insn = false; + got_fault = false; cbo_clean(&mem[0]); - ksft_test_result(illegal_insn, "No cbo.clean\n"); + ksft_test_result(got_fault, "No cbo.clean\n"); - illegal_insn = false; + got_fault = false; cbo_flush(&mem[0]); - ksft_test_result(illegal_insn, "No cbo.flush\n"); + ksft_test_result(got_fault, "No cbo.flush\n"); } static void test_no_zicboz(void *arg) { ksft_print_msg("No Zicboz, testing cbo.zero remains privileged\n"); - illegal_insn = false; + got_fault = false; cbo_zero(&mem[0]); - ksft_test_result(illegal_insn, "No cbo.zero\n"); + ksft_test_result(got_fault, "No cbo.zero\n"); } static bool is_power_of_2(__u64 n) @@ -85,6 +104,54 @@ static bool is_power_of_2(__u64 n) return n != 0 && (n & (n - 1)) == 0; } +static void test_zicbop(void *arg) +{ + struct riscv_hwprobe pair = { + .key = RISCV_HWPROBE_KEY_ZICBOP_BLOCK_SIZE, + }; + + struct sigaction act = { + .sa_sigaction = &fault_handler, + .sa_flags = SA_SIGINFO + }; + + struct sigaction dfl = { + .sa_handler = SIG_DFL + }; + + cpu_set_t *cpus = (cpu_set_t *)arg; + __u64 block_size; + long rc; + + rc = sigaction(SIGSEGV, &act, NULL); + assert(rc == 0); + rc = sigaction(SIGBUS, &act, NULL); + assert(rc == 0); + + rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)cpus, 0); + block_size = pair.value; + ksft_test_result(rc == 0 && pair.key == RISCV_HWPROBE_KEY_ZICBOP_BLOCK_SIZE && + is_power_of_2(block_size), "Zicbop block size\n"); + ksft_print_msg("Zicbop block size: %llu\n", block_size); + + got_fault = false; + prefetch_i(&mem[0]); + prefetch_r(&mem[0]); + prefetch_w(&mem[0]); + ksft_test_result(!got_fault, "Zicbop prefetch.* on valid address\n"); + + got_fault = false; + prefetch_i(NULL); + prefetch_r(NULL); + prefetch_w(NULL); + ksft_test_result(!got_fault, "Zicbop prefetch.* on NULL\n"); + + rc = sigaction(SIGBUS, &dfl, NULL); + assert(rc == 0); + rc = sigaction(SIGSEGV, &dfl, NULL); + assert(rc == 0); +} + static void test_zicbom(void *arg) { struct riscv_hwprobe pair = { @@ -100,13 +167,13 @@ static void test_zicbom(void *arg) is_power_of_2(block_size), "Zicbom block size\n"); ksft_print_msg("Zicbom block size: %llu\n", block_size); - illegal_insn = false; + got_fault = false; cbo_clean(&mem[block_size]); - ksft_test_result(!illegal_insn, "cbo.clean\n"); + ksft_test_result(!got_fault, "cbo.clean\n"); - illegal_insn = false; + got_fault = false; cbo_flush(&mem[block_size]); - ksft_test_result(!illegal_insn, "cbo.flush\n"); + ksft_test_result(!got_fault, "cbo.flush\n"); } static void test_zicboz(void *arg) @@ -125,11 +192,11 @@ static void test_zicboz(void *arg) is_power_of_2(block_size), "Zicboz block size\n"); ksft_print_msg("Zicboz block size: %llu\n", block_size); - illegal_insn = false; + got_fault = false; cbo_zero(&mem[block_size]); - ksft_test_result(!illegal_insn, "cbo.zero\n"); + ksft_test_result(!got_fault, "cbo.zero\n"); - if (illegal_insn || !is_power_of_2(block_size)) { + if (got_fault || !is_power_of_2(block_size)) { ksft_test_result_skip("cbo.zero check\n"); return; } @@ -177,7 +244,19 @@ static void check_no_zicbo_cpus(cpu_set_t *cpus, __u64 cbo) rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)&one_cpu, 0); assert(rc == 0 && pair.key == RISCV_HWPROBE_KEY_IMA_EXT_0); - cbostr = cbo == RISCV_HWPROBE_EXT_ZICBOZ ? "Zicboz" : "Zicbom"; + switch (cbo) { + case RISCV_HWPROBE_EXT_ZICBOZ: + cbostr = "Zicboz"; + break; + case RISCV_HWPROBE_EXT_ZICBOM: + cbostr = "Zicbom"; + break; + case RISCV_HWPROBE_EXT_ZICBOP: + cbostr = "Zicbop"; + break; + default: + ksft_exit_fail_msg("Internal error: invalid cbo %llu\n", cbo); + } if (pair.value & cbo) ksft_exit_fail_msg("%s is only present on a subset of harts.\n" @@ -194,6 +273,7 @@ enum { TEST_ZICBOM, TEST_NO_ZICBOM, TEST_NO_CBO_INVAL, + TEST_ZICBOP }; static struct test_info { @@ -206,26 +286,51 @@ static struct test_info { [TEST_ZICBOM] = { .nr_tests = 3, test_zicbom }, [TEST_NO_ZICBOM] = { .nr_tests = 2, test_no_zicbom }, [TEST_NO_CBO_INVAL] = { .nr_tests = 1, test_no_cbo_inval }, + [TEST_ZICBOP] = { .nr_tests = 3, test_zicbop } +}; + +static const struct option long_opts[] = { + {"zicbom-raises-sigill", no_argument, 0, 'm'}, + {"zicboz-raises-sigill", no_argument, 0, 'z'}, + {0, 0, 0, 0} }; int main(int argc, char **argv) { struct sigaction act = { - .sa_sigaction = &sigill_handler, + .sa_sigaction = &fault_handler, .sa_flags = SA_SIGINFO, }; + struct riscv_hwprobe pair; unsigned int plan = 0; cpu_set_t cpus; long rc; - int i; - - if (argc > 1 && !strcmp(argv[1], "--sigill")) { - rc = sigaction(SIGILL, &act, NULL); - assert(rc == 0); - tests[TEST_NO_ZICBOZ].enabled = true; - tests[TEST_NO_ZICBOM].enabled = true; - tests[TEST_NO_CBO_INVAL].enabled = true; + int i, opt, long_index; + + long_index = 0; + + while ((opt = getopt_long(argc, argv, "mz", long_opts, &long_index)) != -1) { + switch (opt) { + case 'm': + tests[TEST_NO_ZICBOM].enabled = true; + tests[TEST_NO_CBO_INVAL].enabled = true; + rc = sigaction(SIGILL, &act, NULL); + assert(rc == 0); + break; + case 'z': + tests[TEST_NO_ZICBOZ].enabled = true; + rc = sigaction(SIGILL, &act, NULL); + assert(rc == 0); + break; + case '?': + fprintf(stderr, + "Usage: %s [--zicbom-raises-sigill|-m] [--zicboz-raises-sigill|-z]\n", + argv[0]); + exit(1); + default: + break; + } } rc = sched_getaffinity(0, sizeof(cpu_set_t), &cpus); @@ -253,6 +358,11 @@ int main(int argc, char **argv) check_no_zicbo_cpus(&cpus, RISCV_HWPROBE_EXT_ZICBOM); } + if (pair.value & RISCV_HWPROBE_EXT_ZICBOP) + tests[TEST_ZICBOP].enabled = true; + else + check_no_zicbo_cpus(&cpus, RISCV_HWPROBE_EXT_ZICBOP); + for (i = 0; i < ARRAY_SIZE(tests); ++i) plan += tests[i].enabled ? tests[i].nr_tests : 0; -- 2.47.2

1 month, 3 weeks

2
1
0 0

[PATCH iommufd] iommufd: Make vfio_compat's unmap succeed if the range is already empty

by Jason Gunthorpe

iommufd returns ENOENT when attempting to unmap a range that is already empty, while vfio type1 returns success. Fix vfio_compat to match. Fixes: d624d6652a65 ("iommufd: vfio container FD ioctl compatibility") Reported-by: Alex Mastro <amastro(a)fb.com> Closes: https://lore.kernel.org/r/aP0S5ZF9l3sWkJ1G@devgpu012.nha5.facebook.com Signed-off-by: Jason Gunthorpe <jgg(a)nvidia.com> --- drivers/iommu/iommufd/io_pagetable.c | 12 +++--------- drivers/iommu/iommufd/ioas.c | 4 ++++ tools/testing/selftests/iommu/iommufd.c | 2 ++ 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c index c0360c450880b8..75d60f2ad90082 100644 --- a/drivers/iommu/iommufd/io_pagetable.c +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -707,7 +707,8 @@ static int iopt_unmap_iova_range(struct io_pagetable *iopt, unsigned long start, struct iopt_area *area; unsigned long unmapped_bytes = 0; unsigned int tries = 0; - int rc = -ENOENT; + /* If there are no mapped entries then success */ + int rc = 0; /* * The domains_rwsem must be held in read mode any time any area->pages @@ -777,8 +778,6 @@ static int iopt_unmap_iova_range(struct io_pagetable *iopt, unsigned long start, down_write(&iopt->iova_rwsem); } - if (unmapped_bytes) - rc = 0; out_unlock_iova: up_write(&iopt->iova_rwsem); @@ -815,13 +814,8 @@ int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova, int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped) { - int rc; - - rc = iopt_unmap_iova_range(iopt, 0, ULONG_MAX, unmapped); /* If the IOVAs are empty then unmap all succeeds */ - if (rc == -ENOENT) - return 0; - return rc; + return iopt_unmap_iova_range(iopt, 0, ULONG_MAX, unmapped); } /* The caller must always free all the nodes in the allowed_iova rb_root. */ diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c index 1542c5fd10a85c..459a7c5169154b 100644 --- a/drivers/iommu/iommufd/ioas.c +++ b/drivers/iommu/iommufd/ioas.c @@ -367,6 +367,10 @@ int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd) &unmapped); if (rc) goto out_put; + if (!unmapped) { + rc = -ENOENT; + goto out_put; + } } cmd->length = unmapped; diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 3eebf5e3b974f4..bb4d33dde3c899 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -2638,6 +2638,8 @@ TEST_F(vfio_compat_mock_domain, map) ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd)); ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd)); ASSERT_EQ(BUFFER_SIZE, unmap_cmd.size); + /* Unmap of empty is success */ + ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd)); /* UNMAP_FLAG_ALL requires 0 iova/size */ ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd)); base-commit: b09ed52db1e688eb8205b1939ca1345179ecd515 -- 2.43.0

1 month, 3 weeks

5
5
0 0

[PATCH v6 0/3] platform/chrome: Fix an UAF via replacing fops

by Tzung-Bi Shih

The series is separated from [1] to show the independency and compare potential use cases easier. This use case replaces filp->f_op to revocable-aware warppers. It relies on the revocable core part [2]. It tries to fix an UAF in the fops of cros_ec_chardev after the underlying protocol device has gone by using revocable. The warppers make sure file operations in drivers won't be called if the resource has been revoked. The 1st patch introduces revocable fops replacement. The 2nd patch supports the fops replacement in miscdevice. The 3rd patch uses the support from miscdevice to fix the UAF. [1] https://lore.kernel.org/chrome-platform/20251016054204.1523139-1-tzungbi@ke… [2] https://lore.kernel.org/chrome-platform/20251106152330.11733-1-tzungbi@kern… v6: - New, separated from an existing series. Tzung-Bi Shih (3): revocable: Add fops replacement char: misc: Leverage revocable fops replacement platform/chrome: cros_ec_chardev: Secure cros_ec_device via revocable drivers/char/misc.c | 18 ++- drivers/platform/chrome/cros_ec_chardev.c | 1 + fs/Makefile | 2 +- fs/fs_revocable.c | 156 ++++++++++++++++++++++ include/linux/fs_revocable.h | 14 ++ include/linux/miscdevice.h | 2 + 6 files changed, 190 insertions(+), 3 deletions(-) create mode 100644 fs/fs_revocable.c create mode 100644 include/linux/fs_revocable.h -- 2.48.1

1 month, 3 weeks

3
10
0 0

[PATCH net-next v4 0/5] netconsole: support automatic target recovery

by Andre Carvalho

This patchset introduces target resume capability to netconsole allowing it to recover targets when underlying low-level interface comes back online. The patchset starts by refactoring netconsole state representation in order to allow representing deactivated targets (targets that are disabled due to interfaces going down). It then modifies netconsole to handle NETDEV_UP events for such targets and setups netpoll. Targets are matched with incoming interfaces depending on how they were initially bound in netconsole (by mac or interface name). The patchset includes a selftest that validates netconsole target state transitions and that target is functional after resumed. Signed-off-by: Andre Carvalho <asantostc(a)gmail.com> --- Changes in v4: - Simplify selftest cleanup, removing trap setup in loop. - Drop netpoll helper (__setup_netpoll_hold) and manage reference inside netconsole. - Move resume_list processing logic to separate function. - Link to v3: https://lore.kernel.org/r/20251109-netcons-retrigger-v3-0-1654c280bbe6@gmai… Changes in v3: - Resume by mac or interface name depending on how target was created. - Attempt to resume target without holding target list lock, by moving the target to a temporary list. This is required as netpoll may attempt to allocate memory. - Link to v2: https://lore.kernel.org/r/20250921-netcons-retrigger-v2-0-a0e84006237f@gmai… Changes in v2: - Attempt to resume target in the same thread, instead of using workqueue . - Add wrapper around __netpoll_setup (patch 4). - Renamed resume_target to maybe_resume_target and moved conditionals to inside its implementation, keeping code more clear. - Verify that device addr matches target mac address when target was setup using mac. - Update selftest to cover targets bound by mac and interface name. - Fix typo in selftest comment and sort tests alphabetically in Makefile. - Link to v1: https://lore.kernel.org/r/20250909-netcons-retrigger-v1-0-3aea904926cf@gmai… --- Andre Carvalho (3): netconsole: convert 'enabled' flag to enum for clearer state management netconsole: resume previously deactivated target selftests: netconsole: validate target resume Breno Leitao (2): netconsole: add target_state enum netconsole: add STATE_DEACTIVATED to track targets disabled by low level drivers/net/netconsole.c | 145 ++++++++++++++++----- tools/testing/selftests/drivers/net/Makefile | 1 + .../selftests/drivers/net/lib/sh/lib_netcons.sh | 35 ++++- .../selftests/drivers/net/netcons_resume.sh | 97 ++++++++++++++ 4 files changed, 244 insertions(+), 34 deletions(-) --- base-commit: c9dfb92de0738eb7fe6a591ad1642333793e8b6e change-id: 20250816-netcons-retrigger-a4f547bfc867 Best regards, -- Andre Carvalho <asantostc(a)gmail.com>

1 month, 3 weeks

2
9
0 0

[PATCH net-next v9 00/14] vsock: add namespace support to vhost-vsock and loopback

by Bobby Eshleman

This series adds namespace support to vhost-vsock and loopback. It does not add namespaces to any of the other guest transports (virtio-vsock, hyperv, or vmci). The current revision supports two modes: local and global. Local mode is complete isolation of namespaces, while global mode is complete sharing between namespaces of CIDs (the original behavior). The mode is set using /proc/sys/net/vsock/ns_mode. Modes are per-netns and write-once. This allows a system to configure namespaces independently (some may share CIDs, others are completely isolated). This also supports future possible mixed use cases, where there may be namespaces in global mode spinning up VMs while there are mixed mode namespaces that provide services to the VMs, but are not allowed to allocate from the global CID pool (this mode is not implemented in this series). If a socket or VM is created when a namespace is global but the namespace changes to local, the socket or VM will continue working normally. That is, the socket or VM assumes the mode behavior of the namespace at the time the socket/VM was created. The original mode is captured in vsock_create() and so occurs at the time of socket(2) and accept(2) for sockets and open(2) on /dev/vhost-vsock for VMs. This prevents a socket/VM connection from suddenly breaking due to a namespace mode change. Any new sockets/VMs created after the mode change will adopt the new mode's behavior. Additionally, added tests for the new namespace features: tools/testing/selftests/vsock/vmtest.sh 1..29 ok 1 vm_server_host_client ok 2 vm_client_host_server ok 3 vm_loopback ok 4 ns_guest_local_mode_rejected ok 5 ns_host_vsock_ns_mode_ok ok 6 ns_host_vsock_ns_mode_write_once_ok ok 7 ns_global_same_cid_fails ok 8 ns_local_same_cid_ok ok 9 ns_global_local_same_cid_ok ok 10 ns_local_global_same_cid_ok ok 11 ns_diff_global_host_connect_to_global_vm_ok ok 12 ns_diff_global_host_connect_to_local_vm_fails ok 13 ns_diff_global_vm_connect_to_global_host_ok ok 14 ns_diff_global_vm_connect_to_local_host_fails ok 15 ns_diff_local_host_connect_to_local_vm_fails ok 16 ns_diff_local_vm_connect_to_local_host_fails ok 17 ns_diff_global_to_local_loopback_local_fails ok 18 ns_diff_local_to_global_loopback_fails ok 19 ns_diff_local_to_local_loopback_fails ok 20 ns_diff_global_to_global_loopback_ok ok 21 ns_same_local_loopback_ok ok 22 ns_same_local_host_connect_to_local_vm_ok ok 23 ns_same_local_vm_connect_to_local_host_ok ok 24 ns_mode_change_connection_continue_vm_ok ok 25 ns_mode_change_connection_continue_host_ok ok 26 ns_mode_change_connection_continue_both_ok ok 27 ns_delete_vm_ok ok 28 ns_delete_host_ok ok 29 ns_delete_both_ok SUMMARY: PASS=29 SKIP=0 FAIL=0 Dependent on series: https://lore.kernel.org/all/20251108-vsock-selftests-fixes-and-improvements… Thanks again for everyone's help and reviews! Suggested-by: Sargun Dhillon <sargun(a)sargun.me> Signed-off-by: Bobby Eshleman <bobbyeshleman(a)gmail.com> To: Stefano Garzarella <sgarzare(a)redhat.com> To: Shuah Khan <shuah(a)kernel.org> To: David S. Miller <davem(a)davemloft.net> To: Eric Dumazet <edumazet(a)google.com> To: Jakub Kicinski <kuba(a)kernel.org> To: Paolo Abeni <pabeni(a)redhat.com> To: Simon Horman <horms(a)kernel.org> To: Stefan Hajnoczi <stefanha(a)redhat.com> To: Michael S. Tsirkin <mst(a)redhat.com> To: Jason Wang <jasowang(a)redhat.com> To: Xuan Zhuo <xuanzhuo(a)linux.alibaba.com> To: Eugenio Pérez <eperezma(a)redhat.com> To: K. Y. Srinivasan <kys(a)microsoft.com> To: Haiyang Zhang <haiyangz(a)microsoft.com> To: Wei Liu <wei.liu(a)kernel.org> To: Dexuan Cui <decui(a)microsoft.com> To: Bryan Tan <bryan-bt.tan(a)broadcom.com> To: Vishnu Dasa <vishnu.dasa(a)broadcom.com> To: Broadcom internal kernel review list <bcm-kernel-feedback-list(a)broadcom.com> Cc: virtualization(a)lists.linux.dev Cc: netdev(a)vger.kernel.org Cc: linux-kselftest(a)vger.kernel.org Cc: linux-kernel(a)vger.kernel.org Cc: kvm(a)vger.kernel.org Cc: linux-hyperv(a)vger.kernel.org Cc: berrange(a)redhat.com Cc: Sargun Dhillon <sargun(a)sargun.me> Changes in v9: - reorder loopback patch after patch for virtio transport common code - remove module ordering tests patch because loopback no longer depends on pernet ops - major simplifications in vsock_loopback - added a new patch for blocking local mode for guests, added test case to check - add net ref tracking to vsock_loopback patch - Link to v8: https://lore.kernel.org/r/20251023-vsock-vmtest-v8-0-dea984d02bb0@meta.com Changes in v8: - Break generic cleanup/refactoring patches into standalone series, remove those from this series - Link to dependency: https://lore.kernel.org/all/20251022-vsock-selftests-fixes-and-improvements… - Link to v7: https://lore.kernel.org/r/20251021-vsock-vmtest-v7-0-0661b7b6f081@meta.com Changes in v7: - fix hv_sock build - break out vmtest patches into distinct, more well-scoped patches - change `orig_net_mode` to `net_mode` - many fixes and style changes in per-patch change sets (see individual patches for specific changes) - optimize `virtio_vsock_skb_cb` layout - update commit messages with more useful descriptions - vsock_loopback: use orig_net_mode instead of current net mode - add tests for edge cases (ns deletion, mode changing, loopback module load ordering) - Link to v6: https://lore.kernel.org/r/20250916-vsock-vmtest-v6-0-064d2eb0c89d@meta.com Changes in v6: - define behavior when mode changes to local while socket/VM is alive - af_vsock: clarify description of CID behavior - af_vsock: use stronger langauge around CID rules (dont use "may") - af_vsock: improve naming of buf/buffer - af_vsock: improve string length checking on proc writes - vsock_loopback: add space in struct to clarify lock protection - vsock_loopback: do proper cleanup/unregister on vsock_loopback_exit() - vsock_loopback: use virtio_vsock_skb_net() instead of sock_net() - vsock_loopback: set loopback to NULL after kfree() - vsock_loopback: use pernet_operations and remove callback mechanism - vsock_loopback: add macros for "global" and "local" - vsock_loopback: fix length checking - vmtest.sh: check for namespace support in vmtest.sh - Link to v5: https://lore.kernel.org/r/20250827-vsock-vmtest-v5-0-0ba580bede5b@meta.com Changes in v5: - /proc/net/vsock_ns_mode -> /proc/sys/net/vsock/ns_mode - vsock_global_net -> vsock_global_dummy_net - fix netns lookup in vhost_vsock to respect pid namespaces - add callbacks for vsock_loopback to avoid circular dependency - vmtest.sh loads vsock_loopback module - remove vsock_net_mode_can_set() - change vsock_net_write_mode() to return true/false based on success - make vsock_net_mode enum instead of u8 - Link to v4: https://lore.kernel.org/r/20250805-vsock-vmtest-v4-0-059ec51ab111@meta.com Changes in v4: - removed RFC tag - implemented loopback support - renamed new tests to better reflect behavior - completed suite of tests with permutations of ns modes and vsock_test as guest/host - simplified socat bridging with unix socket instead of tcp + veth - only use vsock_test for success case, socat for failure case (context in commit message) - lots of cleanup Changes in v3: - add notion of "modes" - add procfs /proc/net/vsock_ns_mode - local and global modes only - no /dev/vhost-vsock-netns - vmtest.sh already merged, so new patch just adds new tests for NS - Link to v2: https://lore.kernel.org/kvm/20250312-vsock-netns-v2-0-84bffa1aa97a@gmail.com Changes in v2: - only support vhost-vsock namespaces - all g2h namespaces retain old behavior, only common API changes impacted by vhost-vsock changes - add /dev/vhost-vsock-netns for "opt-in" - leave /dev/vhost-vsock to old behavior - removed netns module param - Link to v1: https://lore.kernel.org/r/20200116172428.311437-1-sgarzare@redhat.com Changes in v1: - added 'netns' module param to vsock.ko to enable the network namespace support (disabled by default) - added 'vsock_net_eq()' to check the "net" assigned to a socket only when 'netns' support is enabled - Link to RFC: https://patchwork.ozlabs.org/cover/1202235/ --- Bobby Eshleman (14): vsock: a per-net vsock NS mode state vsock: add netns to vsock core vsock/virtio: add netns support to virtio transport and virtio common vsock/virtio: pack struct virtio_vsock_skb_cb vsock: add netns and netns_tracker to vsock skb cb vsock/loopback: add netns support vhost/vsock: add netns support vsock: reject bad VSOCK_NET_MODE_LOCAL configuration for G2H selftests/vsock: add namespace helpers to vmtest.sh selftests/vsock: prepare vm management helpers for namespaces selftests/vsock: add tests for proc sys vsock ns_mode selftests/vsock: add namespace tests for CID collisions selftests/vsock: add tests for host <-> vm connectivity with namespaces selftests/vsock: add tests for namespace deletion and mode changes MAINTAINERS | 1 + drivers/vhost/vsock.c | 48 +- include/linux/virtio_vsock.h | 43 +- include/net/af_vsock.h | 57 +- include/net/net_namespace.h | 4 + include/net/netns/vsock.h | 17 + net/vmw_vsock/af_vsock.c | 290 +++++++++- net/vmw_vsock/hyperv_transport.c | 1 + net/vmw_vsock/virtio_transport.c | 14 +- net/vmw_vsock/virtio_transport_common.c | 57 +- net/vmw_vsock/vsock_loopback.c | 48 +- tools/testing/selftests/vsock/vmtest.sh | 931 ++++++++++++++++++++++++++++++-- 12 files changed, 1418 insertions(+), 93 deletions(-) --- base-commit: 962ac5ca99a5c3e7469215bf47572440402dfd59 change-id: 20250325-vsock-vmtest-b3a21d2102c2 prerequisite-message-id: <20251022-vsock-selftests-fixes-and-improvements-v1-0-edeb179d6463(a)meta.com> prerequisite-patch-id: a2eecc3851f2509ed40009a7cab6990c6d7cfff5 prerequisite-patch-id: 501db2100636b9c8fcb3b64b8b1df797ccbede85 prerequisite-patch-id: ba1a2f07398a035bc48ef72edda41888614be449 prerequisite-patch-id: fd5cc5445aca9355ce678e6d2bfa89fab8a57e61 prerequisite-patch-id: 795ab4432ffb0843e22b580374782e7e0d99b909 prerequisite-patch-id: 1499d263dc933e75366c09e045d2125ca39f7ddd prerequisite-patch-id: f92d99bb1d35d99b063f818a19dcda999152d74c prerequisite-patch-id: e3296f38cdba6d903e061cff2bbb3e7615e8e671 prerequisite-patch-id: bc4662b4710d302d4893f58708820fc2a0624325 prerequisite-patch-id: f8991f2e98c2661a706183fde6b35e2b8d9aedcf prerequisite-patch-id: 44bf9ed69353586d284e5ee63d6fffa30439a698 prerequisite-patch-id: d50621bc630eeaf608bbaf260370c8dabf6326df Best regards, -- Bobby Eshleman <bobbyeshleman(a)meta.com>

1 month, 3 weeks

2
33
0 0

[bpf-next v1 5/5] selftests/bpf: propagate LLVM toolchain to runqslower build

by Hoyeon Lee

The selftests/bpf invokes a nested make when building runqslower, but LLVM toolchain version (clang/llvm-strip) is not propagated. As a result, runqslower is built with system default clang, not respecting specified LLVM version. # LLVM=-21 make -C tools/testing/selftests/bpf ... make feature_display=0 -C /bpf/tools/bpf/runqslower \ OUTPUT=/bpf/tools/testing/selftests/bpf/tools/build/runqslower/ \ BPFOBJ_OUTPUT=/bpf/tools/testing/selftests/bpf/tools/build/libbpf/ \ BPFOBJ=/bpf/tools/testing/selftests/bpf/tools/build/libbpf/libbpf.a \ BPF_INCLUDE=/bpf/tools/testing/selftests/bpf/tools/include \ BPFTOOL_OUTPUT=/bpf/tools/testing/selftests/bpf/tools/build/bpftool/ \ VMLINUX_BTF=/sys/kernel/btf/vmlinux BPF_TARGET_ENDIAN=--target=bpfel \ EXTRA_CFLAGS='-g -O0 ' EXTRA_LDFLAGS=' ' && \ cp /bpf/tools/testing/selftests/bpf/tools/build/runqslower/runqslower \ /bpf/tools/testing/selftests/bpf/runqslower clang -g -O2 --target=bpfel -I/bpf/tools/testing/selftests/bpf/tools/build/runqslower/ \ -I/bpf/tools/testing/selftests/bpf/tools/include -I/bpf/tools/include/uapi \ -c runqslower.bpf.c -o /bpf/tools/testing/selftests/bpf/tools/build/runqslower/runqslower.bpf.o && \ llvm-strip -g /bpf/tools/testing/selftests/bpf/tools/build/runqslower//runqslower.bpf.o /bin/sh: 1: clang: not found Explicitly propagate CLANG and LLVM_STRIP to the runqslower sub-make so that the LLVM toolchain selection from lib.mk is preserved. Signed-off-by: Hoyeon Lee <hoyeon.lee(a)suse.com> --- tools/testing/selftests/bpf/Makefile | 1 + tools/testing/selftests/lib.mk | 1 + 2 files changed, 2 insertions(+) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 34ea23c63bd5..79ab69920dca 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -306,6 +306,7 @@ endif $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT) $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ + CLANG=$(CLANG) LLVM_STRIP=$(LLVM_STRIP) \ OUTPUT=$(RUNQSLOWER_OUTPUT) VMLINUX_BTF=$(VMLINUX_BTF) \ BPFTOOL_OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf/ \ diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index a448fae57831..f14255b2afbd 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -8,6 +8,7 @@ LLVM_SUFFIX := $(LLVM) endif CLANG := $(LLVM_PREFIX)clang$(LLVM_SUFFIX) +LLVM_STRIP := $(LLVM_PREFIX)llvm-strip$(LLVM_SUFFIX) CLANG_TARGET_FLAGS_arm := arm-linux-gnueabi CLANG_TARGET_FLAGS_arm64 := aarch64-linux-gnu -- 2.51.1

1 month, 3 weeks

2
2
0 0

[PATCH v2 4/7] KVM: LoongArch: selftests: Add timer test case with one-shot mode

by Bibo Mao

Add timer test case based on common arch_timer code, one-shot mode is tested with timer interrupt. Signed-off-by: Bibo Mao <maobibo(a)loongson.cn> --- tools/testing/selftests/kvm/Makefile.kvm | 10 +- .../kvm/include/loongarch/arch_timer.h | 79 +++++++++++++++ .../kvm/include/loongarch/processor.h | 10 ++ .../selftests/kvm/lib/loongarch/processor.c | 4 +- .../selftests/kvm/loongarch/arch_timer.c | 98 +++++++++++++++++++ 5 files changed, 196 insertions(+), 5 deletions(-) create mode 100644 tools/testing/selftests/kvm/include/loongarch/arch_timer.h create mode 100644 tools/testing/selftests/kvm/loongarch/arch_timer.c diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index 148d427ff24b..662adf8f309b 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -183,6 +183,8 @@ TEST_GEN_PROGS_arm64 += memslot_perf_test TEST_GEN_PROGS_arm64 += mmu_stress_test TEST_GEN_PROGS_arm64 += rseq_test TEST_GEN_PROGS_arm64 += steal_time +SPLIT_TESTS_arm64 += arch_timer +SPLIT_TESTS_arm64 += get-reg-list TEST_GEN_PROGS_s390 = $(TEST_GEN_PROGS_COMMON) TEST_GEN_PROGS_s390 += s390/memop @@ -209,6 +211,8 @@ TEST_GEN_PROGS_riscv += memslot_perf_test TEST_GEN_PROGS_riscv += mmu_stress_test TEST_GEN_PROGS_riscv += rseq_test TEST_GEN_PROGS_riscv += steal_time +SPLIT_TESTS_riscv += arch_timer +SPLIT_TESTS_riscv += get-reg-list TEST_GEN_PROGS_loongarch += coalesced_io_test TEST_GEN_PROGS_loongarch += demand_paging_test @@ -222,10 +226,10 @@ TEST_GEN_PROGS_loongarch += kvm_page_table_test TEST_GEN_PROGS_loongarch += memslot_modification_stress_test TEST_GEN_PROGS_loongarch += memslot_perf_test TEST_GEN_PROGS_loongarch += set_memory_region_test +TEST_GEN_PROGS_loongarch += arch_timer +SPLIT_TESTS_loongarch = arch_timer -SPLIT_TESTS += arch_timer -SPLIT_TESTS += get-reg-list - +SPLIT_TESTS += $(SPLIT_TESTS_$(ARCH)) TEST_PROGS += $(TEST_PROGS_$(ARCH)) TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(ARCH)) TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(ARCH)) diff --git a/tools/testing/selftests/kvm/include/loongarch/arch_timer.h b/tools/testing/selftests/kvm/include/loongarch/arch_timer.h new file mode 100644 index 000000000000..94b1cba2744d --- /dev/null +++ b/tools/testing/selftests/kvm/include/loongarch/arch_timer.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * LoongArch Constant Timer specific interface + */ +#ifndef SELFTEST_KVM_ARCH_TIMER_H +#define SELFTEST_KVM_ARCH_TIMER_H + +#include "processor.h" +/* LoongArch timer frequency is constant 100MHZ */ +#define TIMER_FREQ (100UL << 20) +#define msec_to_cycles(msec) (TIMER_FREQ * (unsigned long)(msec) / 1000) +#define usec_to_cycles(usec) (TIMER_FREQ * (unsigned long)(usec) / 1000000) +#define cycles_to_usec(cycles) ((unsigned long)(cycles) * 1000000 / TIMER_FREQ) + +static inline unsigned long timer_get_cycles(void) +{ + unsigned long val = 0; + + __asm__ __volatile__( + "rdtime.d %0, $zero\n\t" + : "=r"(val) + : + ); + + return val; +} + +static inline void timer_set_next_cmp_ms(unsigned int msec, bool period) +{ + unsigned long val; + + val = msec_to_cycles(msec) & CSR_TCFG_VAL; + val |= CSR_TCFG_EN; + if (period) + val |= CSR_TCFG_PERIOD; + csr_write(val, LOONGARCH_CSR_TCFG); +} + +static inline unsigned long timer_get_val(void) +{ + return csr_read(LOONGARCH_CSR_TVAL); +} + +static inline unsigned long timer_get_cfg(void) +{ + return csr_read(LOONGARCH_CSR_TCFG); +} + +static inline void timer_irq_enable(void) +{ + unsigned long val; + + val = csr_read(LOONGARCH_CSR_ECFG); + val |= ECFGF_TIMER; + csr_write(val, LOONGARCH_CSR_ECFG); +} + +static inline void timer_irq_disable(void) +{ + unsigned long val; + + val = csr_read(LOONGARCH_CSR_ECFG); + val &= ~ECFGF_TIMER; + csr_write(val, LOONGARCH_CSR_ECFG); +} + +static inline void __delay(uint64_t cycles) +{ + uint64_t start = timer_get_cycles(); + + while ((timer_get_cycles() - start) < cycles) + cpu_relax(); +} + +static inline void udelay(unsigned long usec) +{ + __delay(usec_to_cycles(usec)); +} +#endif /* SELFTEST_KVM_ARCH_TIMER_H */ diff --git a/tools/testing/selftests/kvm/include/loongarch/processor.h b/tools/testing/selftests/kvm/include/loongarch/processor.h index b027f8f4dac7..61f6e215046b 100644 --- a/tools/testing/selftests/kvm/include/loongarch/processor.h +++ b/tools/testing/selftests/kvm/include/loongarch/processor.h @@ -83,6 +83,8 @@ #define LOONGARCH_CSR_PRMD 0x1 #define LOONGARCH_CSR_EUEN 0x2 #define LOONGARCH_CSR_ECFG 0x4 +#define ECFGB_TIMER 11 +#define ECFGF_TIMER (BIT_ULL(ECFGB_TIMER)) #define LOONGARCH_CSR_ESTAT 0x5 /* Exception status */ #define CSR_ESTAT_EXC_SHIFT 16 #define CSR_ESTAT_EXC_WIDTH 6 @@ -111,6 +113,14 @@ #define LOONGARCH_CSR_KS1 0x31 #define LOONGARCH_CSR_TMID 0x40 #define LOONGARCH_CSR_TCFG 0x41 +#define CSR_TCFG_VAL (BIT_ULL(48) - BIT_ULL(2)) +#define CSR_TCFG_PERIOD_SHIFT 1 +#define CSR_TCFG_PERIOD (0x1UL << CSR_TCFG_PERIOD_SHIFT) +#define CSR_TCFG_EN (0x1UL) +#define LOONGARCH_CSR_TVAL 0x42 +#define LOONGARCH_CSR_TINTCLR 0x44 /* Timer interrupt clear */ +#define CSR_TINTCLR_TI_SHIFT 0 +#define CSR_TINTCLR_TI (1 << CSR_TINTCLR_TI_SHIFT) /* TLB refill exception entry */ #define LOONGARCH_CSR_TLBRENTRY 0x88 #define LOONGARCH_CSR_TLBRSAVE 0x8b diff --git a/tools/testing/selftests/kvm/lib/loongarch/processor.c b/tools/testing/selftests/kvm/lib/loongarch/processor.c index 20ba476ccb72..436990258068 100644 --- a/tools/testing/selftests/kvm/lib/loongarch/processor.c +++ b/tools/testing/selftests/kvm/lib/loongarch/processor.c @@ -271,8 +271,8 @@ static void loongarch_vcpu_setup(struct kvm_vcpu *vcpu) TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); } - /* user mode and page enable mode */ - val = PLV_USER | CSR_CRMD_PG; + /* kernel mode and page enable mode */ + val = PLV_KERN | CSR_CRMD_PG; loongarch_set_csr(vcpu, LOONGARCH_CSR_CRMD, val); loongarch_set_csr(vcpu, LOONGARCH_CSR_PRMD, val); loongarch_set_csr(vcpu, LOONGARCH_CSR_EUEN, 1); diff --git a/tools/testing/selftests/kvm/loongarch/arch_timer.c b/tools/testing/selftests/kvm/loongarch/arch_timer.c new file mode 100644 index 000000000000..2a2cebcf3885 --- /dev/null +++ b/tools/testing/selftests/kvm/loongarch/arch_timer.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * + * The test validates one-shot constant timer IRQ using CSR_TCFG and + * CSR_TVAL registers. + */ +#include "arch_timer.h" +#include "kvm_util.h" +#include "processor.h" +#include "timer_test.h" +#include "ucall_common.h" + +static void guest_irq_handler(struct ex_regs *regs) +{ + uint64_t xcnt, val, cfg, xcnt_diff_us; + unsigned int intid; + uint32_t cpu = guest_get_vcpuid(); + struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; + + intid = !!(regs->estat & BIT(INT_TI)); + + /* Make sure we are dealing with the correct timer IRQ */ + GUEST_ASSERT_EQ(intid, 1); + + cfg = timer_get_cfg(); + + /* + * On physical machine, value of LOONGARCH_CSR_TVAL is BIT_ULL(48) - 1 + * On virtual machine, its value counts down from BIT_ULL(48) - 1 + */ + val = timer_get_val(); + xcnt = timer_get_cycles(); + xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt); + + /* Basic 'timer condition met' check */ + __GUEST_ASSERT(val > cfg, + "val = 0x%lx, cfg = 0x%lx, xcnt_diff_us = 0x%lx", + val, cfg, xcnt_diff_us); + + csr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR); + WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1); +} + +static void guest_test_oneshot_timer(uint32_t cpu) +{ + uint32_t irq_iter, config_iter; + uint64_t us; + struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; + + shared_data->nr_iter = 0; + shared_data->guest_stage = 0; + us = msecs_to_usecs(test_args.timer_period_ms) + test_args.timer_err_margin_us; + for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) { + shared_data->xcnt = timer_get_cycles(); + + /* Setup the next interrupt */ + timer_set_next_cmp_ms(test_args.timer_period_ms, false); + /* Setup a timeout for the interrupt to arrive */ + udelay(us); + + irq_iter = READ_ONCE(shared_data->nr_iter); + __GUEST_ASSERT(config_iter + 1 == irq_iter, + "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n" + " Guest timer interrupt was not triggered within the specified\n" + " interval, try to increase the error margin by [-e] option.\n", + config_iter + 1, irq_iter); + } +} + +static void guest_code(void) +{ + uint32_t cpu = guest_get_vcpuid(); + + timer_irq_enable(); + local_irq_enable(); + guest_test_oneshot_timer(cpu); + + GUEST_DONE(); +} + +struct kvm_vm *test_vm_create(void) +{ + struct kvm_vm *vm; + int nr_vcpus = test_args.nr_vcpus; + + vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus); + vm_init_descriptor_tables(vm); + vm_install_exception_handler(vm, EXCCODE_INT, guest_irq_handler); + + /* Make all the test's cmdline args visible to the guest */ + sync_global_to_guest(vm, test_args); + return vm; +} + +void test_vm_cleanup(struct kvm_vm *vm) +{ + kvm_vm_free(vm); +} -- 2.39.3

1 month, 3 weeks

2
2
0 0

[PATCH v3 0/1] cpuset: relax the overlap check for cgroup-v2

by Sun Shaojie

1 month, 3 weeks

1
1
0 0

[bpf-next v1 4/5] selftests/bpf: replace TCP CC string comparisons with bpf_strncmp

by Hoyeon Lee

The connect4_prog and bpf_iter_setsockopt tests duplicate the same open-coded TCP congestion control string comparison logic. Since bpf_strncmp() provides the same functionality, use it instead to avoid repeated open-coded loops. This change applies only to functional BPF tests and does not affect the verifier performance benchmarks (veristat.cfg). No functional changes intended. Signed-off-by: Hoyeon Lee <hoyeon.lee(a)suse.com> --- .../selftests/bpf/progs/bpf_iter_setsockopt.c | 17 ++------------- .../selftests/bpf/progs/connect4_prog.c | 21 +++++++------------ 2 files changed, 10 insertions(+), 28 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c index 774d4dbe8189..a8aa5a71d846 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c @@ -18,23 +18,10 @@ unsigned short reuse_listen_hport = 0; unsigned short listen_hport = 0; -char cubic_cc[TCP_CA_NAME_MAX] = "bpf_cubic"; +const char cubic_cc[] = "bpf_cubic"; char dctcp_cc[TCP_CA_NAME_MAX] = "bpf_dctcp"; bool random_retry = false; -static bool tcp_cc_eq(const char *a, const char *b) -{ - int i; - - for (i = 0; i < TCP_CA_NAME_MAX; i++) { - if (a[i] != b[i]) - return false; - if (!a[i]) - break; - } - - return true; -} SEC("iter/tcp") int change_tcp_cc(struct bpf_iter__tcp *ctx) @@ -58,7 +45,7 @@ int change_tcp_cc(struct bpf_iter__tcp *ctx) cur_cc, sizeof(cur_cc))) return 0; - if (!tcp_cc_eq(cur_cc, cubic_cc)) + if (bpf_strncmp(cur_cc, TCP_CA_NAME_MAX, cubic_cc)) return 0; if (random_retry && bpf_get_prandom_u32() % 4 == 1) diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c index 9e9ebf27b878..9d158cfad981 100644 --- a/tools/testing/selftests/bpf/progs/connect4_prog.c +++ b/tools/testing/selftests/bpf/progs/connect4_prog.c @@ -34,6 +34,9 @@ #define SOL_TCP 6 #endif +const char reno[] = "reno"; +const char cubic[] = "cubic"; + __attribute__ ((noinline)) __weak int do_bind(struct bpf_sock_addr *ctx) { @@ -50,35 +53,27 @@ int do_bind(struct bpf_sock_addr *ctx) } static __inline int verify_cc(struct bpf_sock_addr *ctx, - char expected[TCP_CA_NAME_MAX]) + const char expected[]) { char buf[TCP_CA_NAME_MAX]; - int i; if (bpf_getsockopt(ctx, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf))) return 1; - for (i = 0; i < TCP_CA_NAME_MAX; i++) { - if (buf[i] != expected[i]) - return 1; - if (buf[i] == 0) - break; - } + if (bpf_strncmp(buf, TCP_CA_NAME_MAX, expected)) + return 1; return 0; } static __inline int set_cc(struct bpf_sock_addr *ctx) { - char reno[TCP_CA_NAME_MAX] = "reno"; - char cubic[TCP_CA_NAME_MAX] = "cubic"; - - if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &reno, sizeof(reno))) + if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, (void *)reno, sizeof(reno))) return 1; if (verify_cc(ctx, reno)) return 1; - if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &cubic, sizeof(cubic))) + if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, (void *)cubic, sizeof(cubic))) return 1; if (verify_cc(ctx, cubic)) return 1; -- 2.51.1

1 month, 3 weeks

1
0
0 0

[bpf-next v1 3/5] selftests/bpf: move common TCP helpers into bpf_tracing_net.h

by Hoyeon Lee

Some BPF selftests contain identical copies of the min(), max(), before(), and after() helpers. These repeated snippets are the same across the tests and do not need to be defined separately. Move these helpers into bpf_tracing_net.h so they can be shared by TCP related BPF programs. This removes repeated code and keeps the helpers in a single place. Signed-off-by: Hoyeon Lee <hoyeon.lee(a)suse.com> --- tools/testing/selftests/bpf/progs/bpf_cc_cubic.c | 9 --------- tools/testing/selftests/bpf/progs/bpf_cubic.c | 7 ------- tools/testing/selftests/bpf/progs/bpf_dctcp.c | 6 ------ tools/testing/selftests/bpf/progs/bpf_tracing_net.h | 11 +++++++++++ .../selftests/bpf/progs/tcp_ca_write_sk_pacing.c | 2 -- 5 files changed, 11 insertions(+), 24 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c index 4e51785e7606..9af19dfe4e80 100644 --- a/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c +++ b/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c @@ -22,10 +22,6 @@ #define TCP_PACING_CA_RATIO (120) #define TCP_REORDERING (12) -#define min(a, b) ((a) < (b) ? (a) : (b)) -#define max(a, b) ((a) > (b) ? (a) : (b)) -#define after(seq2, seq1) before(seq1, seq2) - extern void cubictcp_init(struct sock *sk) __ksym; extern void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) __ksym; extern __u32 cubictcp_recalc_ssthresh(struct sock *sk) __ksym; @@ -34,11 +30,6 @@ extern __u32 tcp_reno_undo_cwnd(struct sock *sk) __ksym; extern void cubictcp_acked(struct sock *sk, const struct ack_sample *sample) __ksym; extern void cubictcp_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym; -static bool before(__u32 seq1, __u32 seq2) -{ - return (__s32)(seq1-seq2) < 0; -} - static __u64 div64_u64(__u64 dividend, __u64 divisor) { return dividend / divisor; diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c index f089faa97ae6..46fb2b37d3a7 100644 --- a/tools/testing/selftests/bpf/progs/bpf_cubic.c +++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c @@ -20,13 +20,6 @@ char _license[] SEC("license") = "GPL"; #define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi) -#define min(a, b) ((a) < (b) ? (a) : (b)) -#define max(a, b) ((a) > (b) ? (a) : (b)) -static bool before(__u32 seq1, __u32 seq2) -{ - return (__s32)(seq1-seq2) < 0; -} -#define after(seq2, seq1) before(seq1, seq2) extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym; extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym; diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c index 32c511bcd60b..1cc83140849f 100644 --- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c +++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c @@ -13,16 +13,10 @@ #ifndef EBUSY #define EBUSY 16 #endif -#define min(a, b) ((a) < (b) ? (a) : (b)) -#define max(a, b) ((a) > (b) ? (a) : (b)) #define min_not_zero(x, y) ({ \ typeof(x) __x = (x); \ typeof(y) __y = (y); \ __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); }) -static bool before(__u32 seq1, __u32 seq2) -{ - return (__s32)(seq1-seq2) < 0; -} char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h index 17db400f0e0d..39e98e16c113 100644 --- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h +++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h @@ -5,6 +5,17 @@ #include <vmlinux.h> #include <bpf/bpf_core_read.h> +#define min(a, b) ((a) < (b) ? (a) : (b)) +#define max(a, b) ((a) > (b) ? (a) : (b)) + +static inline bool before(__u32 seq1, __u32 seq2) +{ + return (__s32)(seq1 - seq2) < 0; +} + +#define after(seq2, seq1) before(seq1, seq2) + + #define AF_INET 2 #define AF_INET6 10 diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c index a58b5194fc89..022291f21dfb 100644 --- a/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c +++ b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c @@ -8,8 +8,6 @@ char _license[] SEC("license") = "GPL"; #define USEC_PER_SEC 1000000UL -#define min(a, b) ((a) < (b) ? (a) : (b)) - static unsigned int tcp_left_out(const struct tcp_sock *tp) { return tp->sacked_out + tp->lost_out; -- 2.51.1

1 month, 3 weeks

1
0
0 0

[bpf-next v1 2/5] selftests/bpf: use sockaddr_storage instead of sa46 in select_reuseport test

by Hoyeon Lee

The select_reuseport selftest uses a custom sa46 union to represent IPv4 and IPv6 addresses. This custom wrapper requires extra manual handling for address family and field extraction. Replace sa46 with sockaddr_storage and update the helper functions to operate on native socket structures. This simplifies the code and removes unnecessary custom address-handling logic. No functional changes intended. Signed-off-by: Hoyeon Lee <hoyeon.lee(a)suse.com> --- .../bpf/prog_tests/select_reuseport.c | 67 ++++++++++--------- 1 file changed, 34 insertions(+), 33 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c index 036d4760d2c1..3dbcc091f16c 100644 --- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c +++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c @@ -41,11 +41,7 @@ static struct bpf_object *obj; static __u32 index_zero; static int epfd; -static union sa46 { - struct sockaddr_in6 v6; - struct sockaddr_in v4; - sa_family_t family; -} srv_sa; +static struct sockaddr_storage srv_sa; #define RET_IF(condition, tag, format...) ({ \ if (CHECK_FAIL(condition)) { \ @@ -135,24 +131,24 @@ static int prepare_bpf_obj(void) return 0; } -static void sa46_init_loopback(union sa46 *sa, sa_family_t family) +static void ss_init_loopback(struct sockaddr_storage *sa, sa_family_t family) { memset(sa, 0, sizeof(*sa)); - sa->family = family; - if (sa->family == AF_INET6) - sa->v6.sin6_addr = in6addr_loopback; + sa->ss_family = family; + if (sa->ss_family == AF_INET6) + ((struct sockaddr_in6 *)sa)->sin6_addr = in6addr_loopback; else - sa->v4.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + ((struct sockaddr_in *)sa)->sin_addr.s_addr = htonl(INADDR_LOOPBACK); } -static void sa46_init_inany(union sa46 *sa, sa_family_t family) +static void ss_init_inany(struct sockaddr_storage *sa, sa_family_t family) { memset(sa, 0, sizeof(*sa)); - sa->family = family; - if (sa->family == AF_INET6) - sa->v6.sin6_addr = in6addr_any; + sa->ss_family = family; + if (sa->ss_family == AF_INET6) + ((struct sockaddr_in6 *)sa)->sin6_addr = in6addr_any; else - sa->v4.sin_addr.s_addr = INADDR_ANY; + ((struct sockaddr_in *)sa)->sin_addr.s_addr = INADDR_ANY; } static int read_int_sysctl(const char *sysctl) @@ -228,7 +224,7 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd, int cli_fd) { struct data_check expected = {}, result; - union sa46 cli_sa; + struct sockaddr_storage cli_sa; socklen_t addrlen; int err; @@ -251,26 +247,32 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd, } if (family == AF_INET6) { + struct sockaddr_in6 *srv_v6 = (struct sockaddr_in6 *)&srv_sa; + struct sockaddr_in6 *cli_v6 = (struct sockaddr_in6 *)&cli_sa; + expected.eth_protocol = htons(ETH_P_IPV6); - expected.bind_inany = !srv_sa.v6.sin6_addr.s6_addr32[3] && - !srv_sa.v6.sin6_addr.s6_addr32[2] && - !srv_sa.v6.sin6_addr.s6_addr32[1] && - !srv_sa.v6.sin6_addr.s6_addr32[0]; + expected.bind_inany = !srv_v6->sin6_addr.s6_addr32[3] && + !srv_v6->sin6_addr.s6_addr32[2] && + !srv_v6->sin6_addr.s6_addr32[1] && + !srv_v6->sin6_addr.s6_addr32[0]; - memcpy(&expected.skb_addrs[0], cli_sa.v6.sin6_addr.s6_addr32, - sizeof(cli_sa.v6.sin6_addr)); + memcpy(&expected.skb_addrs[0], cli_v6->sin6_addr.s6_addr32, + sizeof(cli_v6->sin6_addr)); memcpy(&expected.skb_addrs[4], &in6addr_loopback, sizeof(in6addr_loopback)); - expected.skb_ports[0] = cli_sa.v6.sin6_port; - expected.skb_ports[1] = srv_sa.v6.sin6_port; + expected.skb_ports[0] = cli_v6->sin6_port; + expected.skb_ports[1] = srv_v6->sin6_port; } else { + struct sockaddr_in *srv_v4 = (struct sockaddr_in *)&srv_sa; + struct sockaddr_in *cli_v4 = (struct sockaddr_in *)&cli_sa; + expected.eth_protocol = htons(ETH_P_IP); - expected.bind_inany = !srv_sa.v4.sin_addr.s_addr; + expected.bind_inany = !srv_v4->sin_addr.s_addr; - expected.skb_addrs[0] = cli_sa.v4.sin_addr.s_addr; + expected.skb_addrs[0] = cli_v4->sin_addr.s_addr; expected.skb_addrs[1] = htonl(INADDR_LOOPBACK); - expected.skb_ports[0] = cli_sa.v4.sin_port; - expected.skb_ports[1] = srv_sa.v4.sin_port; + expected.skb_ports[0] = cli_v4->sin_port; + expected.skb_ports[1] = srv_v4->sin_port; } if (memcmp(&result, &expected, offsetof(struct data_check, @@ -364,16 +366,15 @@ static void check_results(void) static int send_data(int type, sa_family_t family, void *data, size_t len, enum result expected) { - union sa46 cli_sa; + struct sockaddr_storage cli_sa; int fd, err; fd = socket(family, type, 0); RET_ERR(fd == -1, "socket()", "fd:%d errno:%d\n", fd, errno); - sa46_init_loopback(&cli_sa, family); + ss_init_loopback(&cli_sa, family); err = bind(fd, (struct sockaddr *)&cli_sa, sizeof(cli_sa)); RET_ERR(fd == -1, "bind(cli_sa)", "err:%d errno:%d\n", err, errno); - err = sendto(fd, data, len, MSG_FASTOPEN, (struct sockaddr *)&srv_sa, sizeof(srv_sa)); RET_ERR(err != len && expected >= PASS, @@ -589,9 +590,9 @@ static void prepare_sk_fds(int type, sa_family_t family, bool inany) socklen_t addrlen; if (inany) - sa46_init_inany(&srv_sa, family); + ss_init_inany(&srv_sa, family); else - sa46_init_loopback(&srv_sa, family); + ss_init_loopback(&srv_sa, family); addrlen = sizeof(srv_sa); /* -- 2.51.1

1 month, 3 weeks

1
0
0 0

[PATCH v2 3/7] KVM: LoongArch: selftests: Add basic interfaces

by Bibo Mao

Add some basic function interfaces such as CSR register access, local irq enable or disable APIs. Signed-off-by: Bibo Mao <maobibo(a)loongson.cn> --- .../kvm/include/loongarch/processor.h | 52 +++++++++++++++++++ .../selftests/kvm/lib/loongarch/processor.c | 5 ++ 2 files changed, 57 insertions(+) diff --git a/tools/testing/selftests/kvm/include/loongarch/processor.h b/tools/testing/selftests/kvm/include/loongarch/processor.h index a18ac7bff303..b027f8f4dac7 100644 --- a/tools/testing/selftests/kvm/include/loongarch/processor.h +++ b/tools/testing/selftests/kvm/include/loongarch/processor.h @@ -118,6 +118,28 @@ #define CSR_TLBREHI_PS_SHIFT 0 #define CSR_TLBREHI_PS (0x3fUL << CSR_TLBREHI_PS_SHIFT) +#define csr_read(csr) \ +({ \ + register unsigned long __v; \ + __asm__ __volatile__( \ + "csrrd %[val], %[reg]\n\t" \ + : [val] "=r" (__v) \ + : [reg] "i" (csr) \ + : "memory"); \ + __v; \ +}) + +#define csr_write(v, csr) \ +({ \ + register unsigned long __v = v; \ + __asm__ __volatile__ ( \ + "csrwr %[val], %[reg]\n\t" \ + : [val] "+r" (__v) \ + : [reg] "i" (csr) \ + : "memory"); \ + __v; \ +}) + #define EXREGS_GPRS (32) #ifndef __ASSEMBLER__ @@ -147,6 +169,36 @@ struct handlers { void vm_init_descriptor_tables(struct kvm_vm *vm); void vm_install_exception_handler(struct kvm_vm *vm, int vector, handler_fn handler); +static inline void local_irq_enable(void) +{ + unsigned int flags = CSR_CRMD_IE; + + register unsigned int mask asm("$t0") = CSR_CRMD_IE; + + __asm__ __volatile__( + "csrxchg %[val], %[mask], %[reg]\n\t" + : [val] "+r" (flags) + : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD) + : "memory"); +} + +static inline void local_irq_disable(void) +{ + unsigned int flags = 0; + + register unsigned int mask asm("$t0") = CSR_CRMD_IE; + + __asm__ __volatile__( + "csrxchg %[val], %[mask], %[reg]\n\t" + : [val] "+r" (flags) + : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD) + : "memory"); +} + +static inline void cpu_relax(void) +{ + asm volatile("nop" ::: "memory"); +} #else #define PC_OFFSET_EXREGS ((EXREGS_GPRS + 0) * 8) #define ESTAT_OFFSET_EXREGS ((EXREGS_GPRS + 1) * 8) diff --git a/tools/testing/selftests/kvm/lib/loongarch/processor.c b/tools/testing/selftests/kvm/lib/loongarch/processor.c index be537c5ff74e..20ba476ccb72 100644 --- a/tools/testing/selftests/kvm/lib/loongarch/processor.c +++ b/tools/testing/selftests/kvm/lib/loongarch/processor.c @@ -373,3 +373,8 @@ void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) regs.pc = (uint64_t)guest_code; vcpu_regs_set(vcpu, &regs); } + +uint32_t guest_get_vcpuid(void) +{ + return csr_read(LOONGARCH_CSR_CPUID); +} -- 2.39.3

1 month, 4 weeks

2
1
0 0

Re: [PATCH] selftests/cgroup: conform test to TAP format output

by Michal Koutný

On Fri, Nov 14, 2025 at 11:55:48AM +0800, Guopeng Zhang <zhangguopeng(a)kylinos.cn> wrote: > Actually, selftests are no longer just something for developers to view locally; they are now extensively > run in CI and stable branch regression testing. Using a standardized layout means that general test runners > and CI systems can parse the cgroup test results without any special handling. Nice. I appreciate you took this up. > This patch is not part of a formal, tree-wide conversion series I am running; it is an incremental step to align the > cgroup C tests with the existing TAP usage. I started here because these tests already use ksft_test_result_*() and > only require minor changes to generate proper TAP output. The tests are in various state of usage, correctness and usefulness, hence... > > > I'm asking to better asses whether also the scripts listed in > > Makefile:TEST_PROGS should be converted too. > > I agree that having them produce TAP output would benefit tooling and CI. I did not want to mix > that into this change, but if you and other maintainers think this direction is reasonable, > I would be happy to follow up and convert the cgroup shell tests to TAP as well. ...I'd suggest next focus on test_cpuset_prs.sh (as discussed, it may need more changes to adapt its output too). Michal

1 month, 4 weeks

2
1
0 0

[PATCH bpf-next] selftests/bpf: simplify the kernel_count bench trigger

by Menglong Dong

Remove the "trigger_count" in trigger_bench.c and reuse trigger_driver() instead for trigger_kernel_count_setup(). With the calling to bpf_get_numa_node_id(), the result for "kernel_count" will become a little more accurate. It will also easier if we want to test the performance of livepatch, just hook the bpf_get_numa_node_id() and run the "kernel_count" bench trigger. Signed-off-by: Menglong Dong <dongml2(a)chinatelecom.cn> --- .../selftests/bpf/benchs/bench_trigger.c | 5 +---- .../testing/selftests/bpf/progs/trigger_bench.c | 17 +++++------------ 2 files changed, 6 insertions(+), 16 deletions(-) diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c index 1e2aff007c2a..34fd8fa3b803 100644 --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -179,11 +179,8 @@ static void trigger_syscall_count_setup(void) static void trigger_kernel_count_setup(void) { setup_ctx(); - bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); - bpf_program__set_autoload(ctx.skel->progs.trigger_count, true); + ctx.skel->rodata->kernel_count = 1; load_ctx(); - /* override driver program */ - ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_count); } static void trigger_kprobe_setup(void) diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c index 3d5f30c29ae3..6564d1909c7b 100644 --- a/tools/testing/selftests/bpf/progs/trigger_bench.c +++ b/tools/testing/selftests/bpf/progs/trigger_bench.c @@ -39,26 +39,19 @@ int bench_trigger_uprobe_multi(void *ctx) return 0; } +const volatile int kernel_count = 0; const volatile int batch_iters = 0; -SEC("?raw_tp") -int trigger_count(void *ctx) -{ - int i; - - for (i = 0; i < batch_iters; i++) - inc_counter(); - - return 0; -} - SEC("?raw_tp") int trigger_driver(void *ctx) { int i; - for (i = 0; i < batch_iters; i++) + for (i = 0; i < batch_iters; i++) { (void)bpf_get_numa_node_id(); /* attach point for benchmarking */ + if (kernel_count) + inc_counter(); + } return 0; } -- 2.51.2

1 month, 4 weeks

2
2
0 0

[PATCH net-next] selftests: drv-net: xdp: make the XDP qstats tests less flaky

by Jakub Kicinski

The XDP qstats tests send 2k packets over a single socket. Looks like when netdev CI is busy running those tests in QEMU occasionally flakes. The target doesn't get to run at all before all 2000 packets are sent. Lower the number of packets to 1000 and reopen the socket every 50 packets, to give RSS a chance to spread the packets to multiple queues. For the netdev CI testing either lowering the count or using multiple sockets is enough, but let's do both for extra resiliency. Signed-off-by: Jakub Kicinski <kuba(a)kernel.org> --- CC: shuah(a)kernel.org CC: ast(a)kernel.org CC: hawk(a)kernel.org CC: john.fastabend(a)gmail.com CC: sdf(a)fomichev.me CC: linux-kselftest(a)vger.kernel.org --- tools/testing/selftests/drivers/net/xdp.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py index a148004e1c36..834a37ae7d0d 100755 --- a/tools/testing/selftests/drivers/net/xdp.py +++ b/tools/testing/selftests/drivers/net/xdp.py @@ -687,9 +687,12 @@ from lib.py import ip, bpftool, defer "/dev/null" # Listener runs on "remote" in case of XDP_TX rx_host = cfg.remote if act == XDPAction.TX else None - # We want to spew 2000 packets quickly, bash seems to do a good enough job - tx_udp = f"exec 5<>/dev/udp/{cfg.addr}/{port}; " \ - "for i in `seq 2000`; do echo a >&5; done; exec 5>&-" + # We want to spew 1000 packets quickly, bash seems to do a good enough job + # Each reopening of the socket gives us a differenot local port (for RSS) + tx_udp = "for _ in `seq 20`; do " \ + f"exec 5<>/dev/udp/{cfg.addr}/{port}; " \ + "for i in `seq 50`; do echo a >&5; done; " \ + "exec 5>&-; done" cfg.wait_hw_stats_settle() # Qstats have more clearly defined semantics than rtnetlink. @@ -704,11 +707,11 @@ from lib.py import ip, bpftool, defer cfg.wait_hw_stats_settle() after = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] - ksft_ge(after['rx-packets'] - before['rx-packets'], 2000) + expected_pkts = 1000 + ksft_ge(after['rx-packets'] - before['rx-packets'], expected_pkts) if act == XDPAction.TX: - ksft_ge(after['tx-packets'] - before['tx-packets'], 2000) + ksft_ge(after['tx-packets'] - before['tx-packets'], expected_pkts) - expected_pkts = 2000 stats = _get_stats(prog_info["maps"]["map_xdp_stats"]) ksft_eq(stats[XDPStats.RX.value], expected_pkts, "XDP RX stats mismatch") if act == XDPAction.TX: -- 2.51.1

1 month, 4 weeks

3
2
0 0

[PATCH net-next] selftests: drv-net: xdp: Fix register spill error with clang 20

by Dimitri Daskalakis

On clang 20.1.8 the XDP program fails to load with a register spill error. Since hdr_len is a __u32, the compiler decided it only needed the lower 32-bits of ctx->data, which later triggers the register spill verifier error. Suggested-by: Martin KaFai Lau <martin.lau(a)kernel.org> Signed-off-by: Dimitri Daskalakis <dimitri.daskalakis1(a)gmail.com> Signed-off-by: Jakub Kicinski <kuba(a)kernel.org> --- libbpf: prog 'xdp_prog': BPF program load failed: Permission denied libbpf: prog 'xdp_prog': -- BEGIN PROG LOAD LOG -- 0: R1=ctx() R10=fp0 ; return xdp_prog_common(ctx); @ xdp_native.bpf.c:670 0: (85) call pc+1 caller: R10=fp0 callee: frame1: R1=ctx() R10=fp0 2: frame1: R1=ctx() R10=fp0 ; static int xdp_prog_common(struct xdp_md *ctx) @ xdp_native.bpf.c:635 2: (bf) r7 = r1 ; frame1: R1=ctx() R7_w=ctx() 3: (b4) w1 = 0 ; frame1: R1_w=0 ; key = XDP_MODE; @ xdp_native.bpf.c:640 4: (63) *(u32 *)(r10 -336) = r1 ; frame1: R1_w=0 R10=fp0 fp-336=????0 5: (bf) r2 = r10 ; frame1: R2_w=fp0 R10=fp0 6: (07) r2 += -336 ; frame1: R2_w=fp-336 ; mode = bpf_map_lookup_elem(&map_xdp_setup, &key); @ xdp_native.bpf.c:641 7: (18) r1 = 0xff110001099ada00 ; frame1: R1_w=map_ptr(map=map_xdp_setup,ks=4,vs=4) 9: (85) call bpf_map_lookup_elem#1 ; frame1: R0=map_value(map=map_xdp_setup,ks=4,vs=4) 10: (bf) r8 = r0 ; frame1: R0=map_value(map=map_xdp_setup,ks=4,vs=4) R8_w=map_value(map=map_xdp_setup,ks=4,vs=4) 11: (b4) w6 = 2 ; frame1: R6_w=2 ; if (!mode) @ xdp_native.bpf.c:642 12: (15) if r8 == 0x0 goto pc+669 ; frame1: R8_w=map_value(map=map_xdp_setup,ks=4,vs=4) 13: (b4) w1 = 1 ; frame1: R1_w=1 ; key = XDP_PORT; @ xdp_native.bpf.c:645 14: (63) *(u32 *)(r10 -336) = r1 ; frame1: R1_w=1 R10=fp0 fp-336=????1 15: (bf) r2 = r10 ; frame1: R2_w=fp0 R10=fp0 16: (07) r2 += -336 ; frame1: R2_w=fp-336 ; port = bpf_map_lookup_elem(&map_xdp_setup, &key); @ xdp_native.bpf.c:646 17: (18) r1 = 0xff110001099ada00 ; frame1: R1_w=map_ptr(map=map_xdp_setup,ks=4,vs=4) 19: (85) call bpf_map_lookup_elem#1 ; frame1: R0=map_value(map=map_xdp_setup,ks=4,vs=4) ; if (!port) @ xdp_native.bpf.c:647 20: (15) if r0 == 0x0 goto pc+661 ; frame1: R0=map_value(map=map_xdp_setup,ks=4,vs=4) ; switch (*mode) { @ xdp_native.bpf.c:650 21: (61) r1 = *(u32 *)(r8 +0) ; frame1: R1_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) R8=map_value(map=map_xdp_setup,ks=4,vs=4) 22: (66) if w1 s> 0x1 goto pc+20 43: frame1: R0=map_value(map=map_xdp_setup,ks=4,vs=4) R1=scalar(smin=umin=smin32=umin32=2,smax=umax=umax32=0x7fffffff,var_off=(0x0; 0x7fffffff)) R6=2 R7=ctx() R8=map_value(map=map_xdp_setup,ks=4,vs=4) R10=fp0 fp-336=????1 ; switch (*mode) { @ xdp_native.bpf.c:650 43: (16) if w1 == 0x2 goto pc+26 ; frame1: R1=scalar(smin=umin=smin32=umin32=3,smax=umax=umax32=0x7fffffff,var_off=(0x0; 0x7fffffff)) 44: (16) if w1 == 0x3 goto pc+123 168: frame1: R0=map_value(map=map_xdp_setup,ks=4,vs=4) R1=3 R6=2 R7=ctx() R8=map_value(map=map_xdp_setup,ks=4,vs=4) R10=fp0 fp-336=????1 ; return xdp_adjst_tail(ctx, (__u16)(*port)); @ xdp_native.bpf.c:658 168: (61) r2 = *(u32 *)(r0 +0) ; frame1: R0=map_value(map=map_xdp_setup,ks=4,vs=4) R2_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) ; udph = filter_udphdr(ctx, port); @ xdp_native.bpf.c:430 169: (54) w2 &= 65535 ; frame1: R2_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=0xffff,var_off=(0x0; 0xffff)) 170: (bf) r1 = r7 ; frame1: R1_w=ctx() R7=ctx() 171: (85) call pc+512 caller: frame1: R6=2 R7=ctx() R8=map_value(map=map_xdp_setup,ks=4,vs=4) R10=fp0 fp-336=????1 callee: frame2: R1_w=ctx() R2_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=0xffff,var_off=(0x0; 0xffff)) R10=fp0 684: frame2: R1=ctx() R2=scalar(smin=smin32=0,smax=umax=smax32=umax32=0xffff,var_off=(0x0; 0xffff)) R10=fp0 ; static struct udphdr *filter_udphdr(struct xdp_md *ctx, __u16 port) @ xdp_native.bpf.c:71 684: (bc) w6 = w2 ; frame2: R2=scalar(id=54,smin=smin32=0,smax=umax=smax32=umax32=0xffff,var_off=(0x0; 0xffff)) R6_w=scalar(id=54,smin=smin32=0,smax=umax=smax32=umax32=0xffff,var_off=(0x0; 0xffff)) 685: (bf) r7 = r1 ; frame2: R1=ctx() R7_w=ctx() ; err = bpf_xdp_pull_data(ctx, sizeof(*eth)); @ xdp_native.bpf.c:78 686: (b4) w2 = 14 ; frame2: R2_w=14 687: (85) call bpf_xdp_pull_data#85514 ; frame2: R0_w=scalar() 688: (bc) w1 = w0 ; frame2: R0_w=scalar() R1_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) 689: (b7) r0 = 0 ; frame2: R0_w=0 ; if (err) @ xdp_native.bpf.c:79 690: (56) if w1 != 0x0 goto pc+55 ; frame2: R1_w=0 ; data_end = (void *)(long)ctx->data_end; @ xdp_native.bpf.c:82 691: (61) r2 = *(u32 *)(r7 +4) ; frame2: R2_w=pkt_end() R7_w=ctx() ; data = eth = (void *)(long)ctx->data; @ xdp_native.bpf.c:83 692: (61) r1 = *(u32 *)(r7 +0) ; frame2: R1_w=pkt(r=0) R7_w=ctx() ; if (data + sizeof(*eth) > data_end) @ xdp_native.bpf.c:85 693: (bf) r3 = r1 ; frame2: R1_w=pkt(r=0) R3_w=pkt(r=0) 694: (07) r3 += 14 ; frame2: R3=pkt(off=14,r=0) 695: (2d) if r3 > r2 goto pc+50 ; frame2: R2=pkt_end() R3=pkt(off=14,r=14) ; if (eth->h_proto == bpf_htons(ETH_P_IP)) { @ xdp_native.bpf.c:88 696: (71) r2 = *(u8 *)(r1 +12) ; frame2: R1=pkt(r=14) R2_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=255,var_off=(0x0; 0xff)) 697: (71) r1 = *(u8 *)(r1 +13) ; frame2: R1_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=255,var_off=(0x0; 0xff)) 698: (64) w1 <<= 8 ; frame2: R1_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=0xff00,var_off=(0x0; 0xff00)) 699: (4c) w1 |= w2 ; frame2: R1_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=0xffff,var_off=(0x0; 0xffff)) R2_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=255,var_off=(0x0; 0xff)) 700: (16) if w1 == 0xdd86 goto pc+15 ; frame2: R1_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=0xffff,var_off=(0x0; 0xffff)) 701: (56) if w1 != 0x8 goto pc+44 ; frame2: R1_w=8 ; err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*iph) + @ xdp_native.bpf.c:91 702: (bf) r1 = r7 ; frame2: R1_w=ctx() R7=ctx() 703: (b4) w2 = 42 ; frame2: R2_w=42 704: (85) call bpf_xdp_pull_data#85514 ; frame2: R0=scalar() 705: (bc) w1 = w0 ; frame2: R0=scalar() R1_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) 706: (b7) r0 = 0 ; frame2: R0_w=0 ; if (err) @ xdp_native.bpf.c:93 707: (56) if w1 != 0x0 goto pc+38 ; frame2: R1_w=0 ; data_end = (void *)(long)ctx->data_end; @ xdp_native.bpf.c:96 708: (61) r1 = *(u32 *)(r7 +4) ; frame2: R1_w=pkt_end() R7=ctx() ; data = (void *)(long)ctx->data; @ xdp_native.bpf.c:97 709: (61) r2 = *(u32 *)(r7 +0) ; frame2: R2_w=pkt(r=0) R7=ctx() ; if (iph + 1 > (struct iphdr *)data_end || @ xdp_native.bpf.c:101 710: (bf) r7 = r2 ; frame2: R2_w=pkt(r=0) R7_w=pkt(r=0) 711: (07) r7 += 34 ; frame2: R7_w=pkt(off=34,r=0) 712: (2d) if r7 > r1 goto pc+33 ; frame2: R1_w=pkt_end() R7_w=pkt(off=34,r=34) ; iph->protocol != IPPROTO_UDP) @ xdp_native.bpf.c:102 713: (71) r2 = *(u8 *)(r2 +23) ; frame2: R2=scalar(smin=smin32=0,smax=umax=smax32=umax32=255,var_off=(0x0; 0xff)) ; if (iph + 1 > (struct iphdr *)data_end || @ xdp_native.bpf.c:101 714: (16) if w2 == 0x11 goto pc+14 729: frame2: R0=0 R1=pkt_end() R2=17 R6=scalar(id=54,smin=smin32=0,smax=umax=smax32=umax32=0xffff,var_off=(0x0; 0xffff)) R7=pkt(off=34,r=34) R10=fp0 ; if (udph + 1 > (struct udphdr *)data_end) @ xdp_native.bpf.c:128 729: (bf) r2 = r7 ; frame2: R2_w=pkt(off=34,r=34) R7=pkt(off=34,r=34) 730: (07) r2 += 8 ; frame2: R2=pkt(off=42,r=34) 731: (2d) if r2 > r1 goto pc+14 ; frame2: R1=pkt_end() R2=pkt(off=42,r=42) ; if (udph->dest != bpf_htons(port)) @ xdp_native.bpf.c:131 732: (dc) r6 = be16 r6 ; frame2: R6_w=scalar() 733: (69) r1 = *(u16 *)(r7 +2) ; frame2: R1_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=0xffff,var_off=(0x0; 0xffff)) R7=pkt(off=34,r=42) 734: (5e) if w1 != w6 goto pc+11 ; frame2: R1_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=0xffff,var_off=(0x0; 0xffff)) R6_w=scalar(smax=0x7fffffff0000ffff,umax=0xffffffff0000ffff,smin32=0,smax32=umax32=0xffff,var_off=(0x0; 0xffffffff0000ffff)) 735: (b4) w1 = 0 ; frame2: R1_w=0 736: (63) *(u32 *)(r10 -4) = r1 ; frame2: R1_w=0 R10=fp0 fp-8=0000???? 737: (bf) r2 = r10 ; frame2: R2_w=fp0 R10=fp0 738: (07) r2 += -4 ; frame2: R2_w=fp-4 ; count = bpf_map_lookup_elem(&map_xdp_stats, &stat_type); @ xdp_native.bpf.c:65 739: (18) r1 = 0xff110001099ad200 ; frame2: R1_w=map_ptr(map=map_xdp_stats,ks=4,vs=8) 741: (85) call bpf_map_lookup_elem#1 ; frame2: R0=map_value(map=map_xdp_stats,ks=4,vs=8) ; if (count) @ xdp_native.bpf.c:67 742: (15) if r0 == 0x0 goto pc+2 ; frame2: R0=map_value(map=map_xdp_stats,ks=4,vs=8) 743: (b7) r1 = 1 ; frame2: R1_w=1 ; __sync_fetch_and_add(count, 1); @ xdp_native.bpf.c:68 744: (db) r1 = atomic64_fetch_add((u64 *)(r0 +0), r1) ; frame2: R0=map_value(map=map_xdp_stats,ks=4,vs=8) R1_w=scalar() 745: (bf) r0 = r7 ; frame2: R0_w=pkt(off=34,r=42) R7=pkt(off=34,r=42) ; } @ xdp_native.bpf.c:137 746: (95) exit returning from callee: frame2: R0_w=pkt(off=34,r=42) R1_w=scalar() R6=scalar(smax=0x7fffffff0000ffff,umax=0xffffffff0000ffff,smin32=0,smax32=umax32=0xffff,var_off=(0x0; 0xffffffff0000ffff)) R7=pkt(off=34,r=42) R10=fp0 fp-8=0000???? to caller at 172: frame1: R0_w=pkt(off=34,r=42) R6=2 R7=ctx() R8=map_value(map=map_xdp_setup,ks=4,vs=4) R10=fp0 fp-336=????1 ; udph = filter_udphdr(ctx, port); @ xdp_native.bpf.c:430 172: (bf) r8 = r0 ; frame1: R0_w=pkt(off=34,r=42) R8_w=pkt(off=34,r=42) ; if (!udph) @ xdp_native.bpf.c:431 173: (15) if r8 == 0x0 goto pc+508 ; frame1: R8_w=pkt(off=34,r=42) ; hdr_len = (void *)udph - (void *)(long)ctx->data + @ xdp_native.bpf.c:434 174: (61) r9 = *(u32 *)(r7 +0) ; frame1: R7=ctx() R9_w=pkt(r=0) ; key = XDP_ADJST_OFFSET; @ xdp_native.bpf.c:436 175: (63) *(u32 *)(r10 -328) = r6 ; frame1: R6=2 R10=fp0 fp-328=????2 176: (bf) r2 = r10 ; frame1: R2_w=fp0 R10=fp0 177: (07) r2 += -328 ; frame1: R2_w=fp-328 ; adjust_offset = bpf_map_lookup_elem(&map_xdp_setup, &key); @ xdp_native.bpf.c:437 178: (18) r1 = 0xff110001099ada00 ; frame1: R1_w=map_ptr(map=map_xdp_setup,ks=4,vs=4) 180: (85) call bpf_map_lookup_elem#1 ; frame1: R0=map_value(map=map_xdp_setup,ks=4,vs=4) ; if (!adjust_offset) @ xdp_native.bpf.c:438 181: (15) if r0 == 0x0 goto pc+500 ; frame1: R0=map_value(map=map_xdp_setup,ks=4,vs=4) 182: (63) *(u32 *)(r10 -344) = r9 invalid size of register spill processed 2736 insns (limit 1000000) max_states_per_insn 5 total_states 190 peak_states 156 mark_read 19 -- END PROG LOAD LOG -- libbpf: prog 'xdp_prog': failed to load: -13 libbpf: failed to load object '/root/ksft-net-drv/net/lib/xdp_native.bpf.o' tools/testing/selftests/net/lib/xdp_native.bpf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c index c368fc045f4b..64f05229ab24 100644 --- a/tools/testing/selftests/net/lib/xdp_native.bpf.c +++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c @@ -332,7 +332,7 @@ static __u16 csum_fold_helper(__u32 csum) } static int xdp_adjst_tail_shrnk_data(struct xdp_md *ctx, __u16 offset, - __u32 hdr_len) + unsigned long hdr_len) { char tmp_buff[MAX_ADJST_OFFSET]; __u32 buff_pos, udp_csum = 0; @@ -422,8 +422,9 @@ static int xdp_adjst_tail(struct xdp_md *ctx, __u16 port) { struct udphdr *udph = NULL; __s32 *adjust_offset, *val; - __u32 key, hdr_len; + unsigned long hdr_len; void *offset_ptr; + __u32 key; __u8 tag; int ret; -- 2.47.3

1 month, 4 weeks

2
1
0 0

[PATCH v4 0/2] libbpf: fix BTF dedup to support recursive typedef

by Paul Houssel

Pahole fails to encode BTF for some Go projects (e.g. Kubernetes and Podman) due to recursive type definitions that create reference loops not representable in C. These recursive typedefs trigger a failure in the BTF deduplication algorithm. This patch extends btf_dedup_struct_types() to properly handle potential recursion for BTF_KIND_TYPEDEF, similar to how recursion is already handled for BTF_KIND_STRUCT. This allows pahole to successfully generate BTF for Go binaries using recursive types without impacting existing C-based workflows. Changes in v4: fix typo found by Claude-based CI Changes in v3: 1. Patch 1: Adjusted the comment of btf_dedup_ref_type() to refer to typedef as well. 2. Patch 2: Update of the "dedup: recursive typedef" test to include a duplicated version of the types to make sure deduplication still happens in this case. Changes in v2: 1. Patch 1: Refactored code to prevent copying existing logic. Instead of adding a new function we modify the existing btf_dedup_struct_type() function to handle the BTF_KIND_TYPEDEF case. Calls to btf_hash_struct() and btf_shallow_equal_struct() are replaced with calls to functions that select btf_hash_struct() / btf_hash_typedef() based on the type. 2. Patch 2: Added tests v3: https://lore.kernel.org/lkml/cover.1763024337.git.paul.houssel@orange.com/ v2: https://lore.kernel.org/lkml/cover.1762956564.git.paul.houssel@orange.com/ v1: https://lore.kernel.org/lkml/20251107153408.159342-1-paulhoussel2@gmail.com/ Paul Houssel (2): libbpf: fix BTF dedup to support recursive typedef definitions selftests/bpf: add BTF dedup tests for recursive typedef definitions tools/lib/bpf/btf.c | 71 +++++++++++++++----- tools/testing/selftests/bpf/prog_tests/btf.c | 65 ++++++++++++++++++ 2 files changed, 120 insertions(+), 16 deletions(-) -- 2.51.0

1 month, 4 weeks

3
5
0 0

2026

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-kselftest-mirror