May 2021 - Linux-kselftest-mirror

[PATCH 1/2] KVM: Deliver VM fault signals to userspace

by James Houghton

This patch has been written to support page-ins using userfaultfd's SIGBUS feature. When a userfaultfd is created with UFFD_FEATURE_SIGBUS, `handle_userfault` will return VM_FAULT_SIGBUS instead of putting the calling thread to sleep. Normal (non-guest) threads that access memory that has been registered with a UFFD_FEATURE_SIGBUS userfaultfd receive a SIGBUS. When a vCPU gets an EPT page fault in a userfaultfd-registered region, KVM calls into `handle_userfault` to resolve the page fault. With UFFD_FEATURE_SIGBUS, VM_FAULT_SIGBUS is returned, but a SIGBUS is never delivered to the userspace thread. This patch propagates the VM_FAULT_SIGBUS error up to KVM, where we then send the signal. Upon receiving a VM_FAULT_SIGBUS, the KVM_RUN ioctl will exit to userspace. This functionality already exists. This allows a hypervisor to do page-ins with UFFD_FEATURE_SIGBUS: 1. Setup a SIGBUS handler to save the address of the SIGBUS (to a thread-local variable). 2. Enter the guest. 3. Immediately after KVM_RUN returns, check if the address has been set. 4. If an address has been set, we exited due to a page fault that we can now handle. 5. Userspace can do anything it wants to make the memory available, using MODE_NOWAKE for the UFFDIO memory installation ioctls. 6. Re-enter the guest. If the memory still isn't ready, this process will repeat. This style of demand paging is significantly faster than the standard poll/read/wake mechanism userfaultfd uses and completely bypasses the userfaultfd waitq. For a single vCPU, page-in throughput increases by about 3-4x. Signed-off-by: James Houghton <jthoughton(a)google.com> Suggested-by: Jue Wang <juew(a)google.com> --- include/linux/hugetlb.h | 2 +- include/linux/mm.h | 3 ++- mm/gup.c | 57 +++++++++++++++++++++++++++-------------- mm/hugetlb.c | 5 +++- virt/kvm/kvm_main.c | 30 +++++++++++++++++++++- 5 files changed, 74 insertions(+), 23 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index b92f25ccef58..a777fb254df0 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -119,7 +119,7 @@ int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_ar long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, unsigned long *, long, unsigned int, - int *); + int *, int *); void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long, struct page *); void __unmap_hugepage_range_final(struct mmu_gather *tlb, diff --git a/include/linux/mm.h b/include/linux/mm.h index 322ec61d0da7..1dcd1ac81992 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1824,7 +1824,8 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages, long pin_user_pages_locked(unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, int *locked); long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, - struct page **pages, unsigned int gup_flags); + struct page **pages, unsigned int gup_flags, + int *fault_error); long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages, struct page **pages, unsigned int gup_flags); diff --git a/mm/gup.c b/mm/gup.c index 0697134b6a12..ab55a67aef78 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -881,7 +881,8 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address, * is, *@locked will be set to 0 and -EBUSY returned. */ static int faultin_page(struct vm_area_struct *vma, - unsigned long address, unsigned int *flags, int *locked) + unsigned long address, unsigned int *flags, int *locked, + int *fault_error) { unsigned int fault_flags = 0; vm_fault_t ret; @@ -906,6 +907,8 @@ static int faultin_page(struct vm_area_struct *vma, } ret = handle_mm_fault(vma, address, fault_flags, NULL); + if (fault_error) + *fault_error = ret; if (ret & VM_FAULT_ERROR) { int err = vm_fault_to_errno(ret, *flags); @@ -996,6 +999,8 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) * @vmas: array of pointers to vmas corresponding to each page. * Or NULL if the caller does not require them. * @locked: whether we're still with the mmap_lock held + * @fault_error: VM fault error from handle_mm_fault. NULL if the caller + * does not require this error. * * Returns either number of pages pinned (which may be less than the * number requested), or an error. Details about the return value: @@ -1040,6 +1045,13 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) * when it's been released. Otherwise, it must be held for either * reading or writing and will not be released. * + * If @fault_error != NULL, __get_user_pages will return the VM fault error + * from handle_mm_fault() in this argument in the event of a VM fault error. + * On success (ret == nr_pages) fault_error is zero. + * On failure (ret != nr_pages) fault_error may still be 0 if the error did + * not originate from handle_mm_fault(). + * + * * In most cases, get_user_pages or get_user_pages_fast should be used * instead of __get_user_pages. __get_user_pages should be used only if * you need some special @gup_flags. @@ -1047,7 +1059,8 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) static long __get_user_pages(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, - struct vm_area_struct **vmas, int *locked) + struct vm_area_struct **vmas, int *locked, + int *fault_error) { long ret = 0, i = 0; struct vm_area_struct *vma = NULL; @@ -1097,7 +1110,7 @@ static long __get_user_pages(struct mm_struct *mm, if (is_vm_hugetlb_page(vma)) { i = follow_hugetlb_page(mm, vma, pages, vmas, &start, &nr_pages, i, - gup_flags, locked); + gup_flags, locked, fault_error); if (locked && *locked == 0) { /* * We've got a VM_FAULT_RETRY @@ -1124,7 +1137,8 @@ static long __get_user_pages(struct mm_struct *mm, page = follow_page_mask(vma, start, foll_flags, &ctx); if (!page) { - ret = faultin_page(vma, start, &foll_flags, locked); + ret = faultin_page(vma, start, &foll_flags, locked, + fault_error); switch (ret) { case 0: goto retry; @@ -1280,7 +1294,8 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm, struct page **pages, struct vm_area_struct **vmas, int *locked, - unsigned int flags) + unsigned int flags, + int *fault_error) { long ret, pages_done; bool lock_dropped; @@ -1311,7 +1326,7 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm, lock_dropped = false; for (;;) { ret = __get_user_pages(mm, start, nr_pages, flags, pages, - vmas, locked); + vmas, locked, fault_error); if (!locked) /* VM_FAULT_RETRY couldn't trigger, bypass */ return ret; @@ -1371,7 +1386,7 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm, *locked = 1; ret = __get_user_pages(mm, start, 1, flags | FOLL_TRIED, - pages, NULL, locked); + pages, NULL, locked, fault_error); if (!*locked) { /* Continue to retry until we succeeded */ BUG_ON(ret != 0); @@ -1458,7 +1473,7 @@ long populate_vma_page_range(struct vm_area_struct *vma, * not result in a stack expansion that recurses back here. */ return __get_user_pages(mm, start, nr_pages, gup_flags, - NULL, NULL, locked); + NULL, NULL, locked, NULL); } /* @@ -1524,7 +1539,7 @@ int __mm_populate(unsigned long start, unsigned long len, int ignore_errors) static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, struct page **pages, struct vm_area_struct **vmas, int *locked, - unsigned int foll_flags) + unsigned int foll_flags, int *fault_error) { struct vm_area_struct *vma; unsigned long vm_flags; @@ -1590,7 +1605,8 @@ struct page *get_dump_page(unsigned long addr) if (mmap_read_lock_killable(mm)) return NULL; ret = __get_user_pages_locked(mm, addr, 1, &page, NULL, &locked, - FOLL_FORCE | FOLL_DUMP | FOLL_GET); + FOLL_FORCE | FOLL_DUMP | FOLL_GET, + NULL); if (locked) mmap_read_unlock(mm); @@ -1704,11 +1720,11 @@ static long __gup_longterm_locked(struct mm_struct *mm, if (!(gup_flags & FOLL_LONGTERM)) return __get_user_pages_locked(mm, start, nr_pages, pages, vmas, - NULL, gup_flags); + NULL, gup_flags, NULL); flags = memalloc_pin_save(); do { rc = __get_user_pages_locked(mm, start, nr_pages, pages, vmas, - NULL, gup_flags); + NULL, gup_flags, NULL); if (rc <= 0) break; rc = check_and_migrate_movable_pages(rc, pages, gup_flags); @@ -1764,7 +1780,8 @@ static long __get_user_pages_remote(struct mm_struct *mm, return __get_user_pages_locked(mm, start, nr_pages, pages, vmas, locked, - gup_flags | FOLL_TOUCH | FOLL_REMOTE); + gup_flags | FOLL_TOUCH | FOLL_REMOTE, + NULL); } /** @@ -1941,7 +1958,7 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages, return __get_user_pages_locked(current->mm, start, nr_pages, pages, NULL, locked, - gup_flags | FOLL_TOUCH); + gup_flags | FOLL_TOUCH, NULL); } EXPORT_SYMBOL(get_user_pages_locked); @@ -1961,7 +1978,8 @@ EXPORT_SYMBOL(get_user_pages_locked); * (e.g. FOLL_FORCE) are not required. */ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, - struct page **pages, unsigned int gup_flags) + struct page **pages, unsigned int gup_flags, + int *fault_error) { struct mm_struct *mm = current->mm; int locked = 1; @@ -1978,7 +1996,8 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, mmap_read_lock(mm); ret = __get_user_pages_locked(mm, start, nr_pages, pages, NULL, - &locked, gup_flags | FOLL_TOUCH); + &locked, gup_flags | FOLL_TOUCH, + fault_error); if (locked) mmap_read_unlock(mm); return ret; @@ -2550,7 +2569,7 @@ static int __gup_longterm_unlocked(unsigned long start, int nr_pages, mmap_read_unlock(current->mm); } else { ret = get_user_pages_unlocked(start, nr_pages, - pages, gup_flags); + pages, gup_flags, NULL); } return ret; @@ -2880,7 +2899,7 @@ long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages, return -EINVAL; gup_flags |= FOLL_PIN; - return get_user_pages_unlocked(start, nr_pages, pages, gup_flags); + return get_user_pages_unlocked(start, nr_pages, pages, gup_flags, NULL); } EXPORT_SYMBOL(pin_user_pages_unlocked); @@ -2909,6 +2928,6 @@ long pin_user_pages_locked(unsigned long start, unsigned long nr_pages, gup_flags |= FOLL_PIN; return __get_user_pages_locked(current->mm, start, nr_pages, pages, NULL, locked, - gup_flags | FOLL_TOUCH); + gup_flags | FOLL_TOUCH, NULL); } EXPORT_SYMBOL(pin_user_pages_locked); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 3db405dea3dc..889ac33d57d5 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5017,7 +5017,8 @@ static void record_subpages_vmas(struct page *page, struct vm_area_struct *vma, long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, unsigned long *nr_pages, - long i, unsigned int flags, int *locked) + long i, unsigned int flags, int *locked, + int *fault_error) { unsigned long pfn_offset; unsigned long vaddr = *position; @@ -5103,6 +5104,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, } ret = hugetlb_fault(mm, vma, vaddr, fault_flags); if (ret & VM_FAULT_ERROR) { + if (fault_error) + *fault_error = ret; err = vm_fault_to_errno(ret, flags); remainder = 0; break; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2799c6660cce..0a20d926ae32 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2004,6 +2004,30 @@ static bool hva_to_pfn_fast(unsigned long addr, bool write_fault, return false; } +static void kvm_send_vm_fault_signal(int fault_error, int errno, + unsigned long address, + struct task_struct *tsk) +{ + kernel_siginfo_t info; + + clear_siginfo(&info); + + if (fault_error == VM_FAULT_SIGBUS) + info.si_signo = SIGBUS; + else if (fault_error == VM_FAULT_SIGSEGV) + info.si_signo = SIGSEGV; + else + // Other fault errors should not result in a signal. + return; + + info.si_errno = errno; + info.si_code = BUS_ADRERR; + info.si_addr = (void __user *)address; + info.si_addr_lsb = PAGE_SHIFT; + + send_sig_info(info.si_signo, &info, tsk); +} + /* * The slow path to get the pfn of the specified host virtual address, * 1 indicates success, -errno is returned if error is detected. @@ -2014,6 +2038,7 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, unsigned int flags = FOLL_HWPOISON; struct page *page; int npages = 0; + int fault_error; might_sleep(); @@ -2025,7 +2050,10 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, if (async) flags |= FOLL_NOWAIT; - npages = get_user_pages_unlocked(addr, 1, &page, flags); + npages = get_user_pages_unlocked(addr, 1, &page, flags, &fault_error); + if (fault_error & VM_FAULT_ERROR) + kvm_send_vm_fault_signal(fault_error, npages, addr, current); + if (npages != 1) return npages; -- 2.31.1.751.gd2f1c929bd-goog

3 years, 9 months

3
5
0 0

[PATCH 1/2] kselftest/vm: Rename TARGETS to BUILD_TARGETS

by Alistair Popple

The TARGETS variable can be used when building selftests to specify a subset of selftests to build and run like so: make -C tools/testing/selftests TARGETS=ptrace run_tests However specifying TARGETS=vm results in the following build error as the TARGETS variable is used internally in the Makefile: make[1]: *** No rule to make target 'vm.c', needed by 'linux/kselftest/vm/vm_32'. Stop. Fix this by renaming the vm Makefile TARGETS variable to BUILD_TARGETS. Signed-off-by: Alistair Popple <apopple(a)nvidia.com> --- tools/testing/selftests/vm/Makefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 73e1cc96d7c2..110751ce8701 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -49,9 +49,9 @@ CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_progra CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_64bit_program.c) CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_program.c -no-pie) -TARGETS := protection_keys -BINARIES_32 := $(TARGETS:%=%_32) -BINARIES_64 := $(TARGETS:%=%_64) +BUILD_TARGETS := protection_keys +BINARIES_32 := $(BUILD_TARGETS:%=%_32) +BINARIES_64 := $(BUILD_TARGETS:%=%_64) ifeq ($(CAN_BUILD_WITH_NOPIE),1) CFLAGS += -no-pie @@ -104,7 +104,7 @@ $(BINARIES_32): CFLAGS += -m32 $(BINARIES_32): LDLIBS += -lrt -ldl -lm $(BINARIES_32): $(OUTPUT)/%_32: %.c $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@ -$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-32,$(t)))) +$(foreach t,$(BUILD_TARGETS),$(eval $(call gen-target-rule-32,$(t)))) endif ifeq ($(CAN_BUILD_X86_64),1) @@ -112,7 +112,7 @@ $(BINARIES_64): CFLAGS += -m64 $(BINARIES_64): LDLIBS += -lrt -ldl $(BINARIES_64): $(OUTPUT)/%_64: %.c $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@ -$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-64,$(t)))) +$(foreach t,$(BUILD_TARGETS),$(eval $(call gen-target-rule-64,$(t)))) endif # x86_64 users should be encouraged to install 32-bit libraries -- 2.20.1

3 years, 9 months

1
1
0 0

[PATCH v2 1/2] KVM: selftests: Keep track of memslots more efficiently

by Maciej S. Szmigiero

From: "Maciej S. Szmigiero" <maciej.szmigiero(a)oracle.com> The KVM selftest framework was using a simple list for keeping track of the memslots currently in use. This resulted in lookups and adding a single memslot being O(n), the later due to linear scanning of the existing memslot set to check for the presence of any conflicting entries. Before this change, benchmarking high count of memslots was more or less impossible as pretty much all the benchmark time was spent in the selftest framework code. We can simply use a rbtree for keeping track of both of gfn and hva. We don't need an interval tree for hva here as we can't have overlapping memslots because we allocate a completely new memory chunk for each new memslot. Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero(a)oracle.com> Reviewed-by: Andrew Jones <drjones(a)redhat.com> --- Changes from v1: * Add Andrew's Reviewed-by: tag tools/testing/selftests/kvm/Makefile | 2 +- tools/testing/selftests/kvm/lib/kvm_util.c | 141 ++++++++++++++---- .../selftests/kvm/lib/kvm_util_internal.h | 15 +- tools/testing/selftests/kvm/lib/rbtree.c | 1 + 4 files changed, 124 insertions(+), 35 deletions(-) create mode 100644 tools/testing/selftests/kvm/lib/rbtree.c diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index a6d61f451f88..c30a21c1d676 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -33,7 +33,7 @@ ifeq ($(ARCH),s390) UNAME_M := s390x endif -LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c +LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/rbtree.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index e5fbf16f725b..69ee0a72c7d8 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -197,7 +197,9 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) TEST_ASSERT(vm != NULL, "Insufficient Memory"); INIT_LIST_HEAD(&vm->vcpus); - INIT_LIST_HEAD(&vm->userspace_mem_regions); + vm->regions.gpa_tree = RB_ROOT; + vm->regions.hva_tree = RB_ROOT; + hash_init(vm->regions.slot_hash); vm->mode = mode; vm->type = 0; @@ -349,13 +351,14 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, */ void kvm_vm_restart(struct kvm_vm *vmp, int perm) { + int ctr; struct userspace_mem_region *region; vm_open(vmp, perm); if (vmp->has_irqchip) vm_create_irqchip(vmp); - list_for_each_entry(region, &vmp->userspace_mem_regions, list) { + hash_for_each(vmp->regions.slot_hash, ctr, region, slot_node) { int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, &region->region); TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" " rc: %i errno: %i\n" @@ -418,14 +421,21 @@ uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm) static struct userspace_mem_region * userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end) { - struct userspace_mem_region *region; + struct rb_node *node; - list_for_each_entry(region, &vm->userspace_mem_regions, list) { + for (node = vm->regions.gpa_tree.rb_node; node; ) { + struct userspace_mem_region *region = + container_of(node, struct userspace_mem_region, gpa_node); uint64_t existing_start = region->region.guest_phys_addr; uint64_t existing_end = region->region.guest_phys_addr + region->region.memory_size - 1; if (start <= existing_end && end >= existing_start) return region; + + if (start < existing_start) + node = node->rb_left; + else + node = node->rb_right; } return NULL; @@ -540,11 +550,16 @@ void kvm_vm_release(struct kvm_vm *vmp) } static void __vm_mem_region_delete(struct kvm_vm *vm, - struct userspace_mem_region *region) + struct userspace_mem_region *region, + bool unlink) { int ret; - list_del(&region->list); + if (unlink) { + rb_erase(&region->gpa_node, &vm->regions.gpa_tree); + rb_erase(&region->hva_node, &vm->regions.hva_tree); + hash_del(&region->slot_node); + } region->region.memory_size = 0; ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region); @@ -563,14 +578,16 @@ static void __vm_mem_region_delete(struct kvm_vm *vm, */ void kvm_vm_free(struct kvm_vm *vmp) { - struct userspace_mem_region *region, *tmp; + int ctr; + struct hlist_node *node; + struct userspace_mem_region *region; if (vmp == NULL) return; /* Free userspace_mem_regions. */ - list_for_each_entry_safe(region, tmp, &vmp->userspace_mem_regions, list) - __vm_mem_region_delete(vmp, region); + hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node) + __vm_mem_region_delete(vmp, region, false); /* Free sparsebit arrays. */ sparsebit_free(&vmp->vpages_valid); @@ -652,6 +669,57 @@ int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len) return 0; } +static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree, + struct userspace_mem_region *region) +{ + struct rb_node **cur, *parent; + + for (cur = &gpa_tree->rb_node, parent = NULL; *cur; ) { + struct userspace_mem_region *cregion; + + cregion = container_of(*cur, typeof(*cregion), gpa_node); + parent = *cur; + if (region->region.guest_phys_addr < + cregion->region.guest_phys_addr) + cur = &(*cur)->rb_left; + else { + TEST_ASSERT(region->region.guest_phys_addr != + cregion->region.guest_phys_addr, + "Duplicate GPA in region tree"); + + cur = &(*cur)->rb_right; + } + } + + rb_link_node(&region->gpa_node, parent, cur); + rb_insert_color(&region->gpa_node, gpa_tree); +} + +static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree, + struct userspace_mem_region *region) +{ + struct rb_node **cur, *parent; + + for (cur = &hva_tree->rb_node, parent = NULL; *cur; ) { + struct userspace_mem_region *cregion; + + cregion = container_of(*cur, typeof(*cregion), hva_node); + parent = *cur; + if (region->host_mem < cregion->host_mem) + cur = &(*cur)->rb_left; + else { + TEST_ASSERT(region->host_mem != + cregion->host_mem, + "Duplicate HVA in region tree"); + + cur = &(*cur)->rb_right; + } + } + + rb_link_node(&region->hva_node, parent, cur); + rb_insert_color(&region->hva_node, hva_tree); +} + /* * VM Userspace Memory Region Add * @@ -716,7 +784,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, (uint64_t) region->region.memory_size); /* Confirm no region with the requested slot already exists. */ - list_for_each_entry(region, &vm->userspace_mem_regions, list) { + hash_for_each_possible(vm->regions.slot_hash, region, slot_node, + slot) { if (region->region.slot != slot) continue; @@ -796,8 +865,10 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, ret, errno, slot, flags, guest_paddr, (uint64_t) region->region.memory_size); - /* Add to linked-list of memory regions. */ - list_add(&region->list, &vm->userspace_mem_regions); + /* Add to quick lookup data structures */ + vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region); + vm_userspace_mem_region_hva_insert(&vm->regions.hva_tree, region); + hash_add(vm->regions.slot_hash, &region->slot_node, slot); } /* @@ -820,10 +891,10 @@ memslot2region(struct kvm_vm *vm, uint32_t memslot) { struct userspace_mem_region *region; - list_for_each_entry(region, &vm->userspace_mem_regions, list) { + hash_for_each_possible(vm->regions.slot_hash, region, slot_node, + memslot) if (region->region.slot == memslot) return region; - } fprintf(stderr, "No mem region with the requested slot found,\n" " requested slot: %u\n", memslot); @@ -908,7 +979,7 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) */ void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot) { - __vm_mem_region_delete(vm, memslot2region(vm, slot)); + __vm_mem_region_delete(vm, memslot2region(vm, slot), true); } /* @@ -1180,16 +1251,14 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa) { struct userspace_mem_region *region; - list_for_each_entry(region, &vm->userspace_mem_regions, list) { - if ((gpa >= region->region.guest_phys_addr) - && (gpa <= (region->region.guest_phys_addr - + region->region.memory_size - 1))) - return (void *) ((uintptr_t) region->host_mem - + (gpa - region->region.guest_phys_addr)); + region = userspace_mem_region_find(vm, gpa, gpa); + if (!region) { + TEST_FAIL("No vm physical memory at 0x%lx", gpa); + return NULL; } - TEST_FAIL("No vm physical memory at 0x%lx", gpa); - return NULL; + return (void *)((uintptr_t)region->host_mem + + (gpa - region->region.guest_phys_addr)); } /* @@ -1211,15 +1280,22 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa) */ vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) { - struct userspace_mem_region *region; + struct rb_node *node; + + for (node = vm->regions.hva_tree.rb_node; node; ) { + struct userspace_mem_region *region = + container_of(node, struct userspace_mem_region, hva_node); + + if (hva >= region->host_mem) { + if (hva <= (region->host_mem + + region->region.memory_size - 1)) + return (vm_paddr_t)((uintptr_t) + region->region.guest_phys_addr + + (hva - (uintptr_t)region->host_mem)); - list_for_each_entry(region, &vm->userspace_mem_regions, list) { - if ((hva >= region->host_mem) - && (hva <= (region->host_mem - + region->region.memory_size - 1))) - return (vm_paddr_t) ((uintptr_t) - region->region.guest_phys_addr - + (hva - (uintptr_t) region->host_mem)); + node = node->rb_right; + } else + node = node->rb_left; } TEST_FAIL("No mapping to a guest physical address, hva: %p", hva); @@ -1745,6 +1821,7 @@ int _kvm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) */ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) { + int ctr; struct userspace_mem_region *region; struct vcpu *vcpu; @@ -1752,7 +1829,7 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd); fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size); fprintf(stream, "%*sMem Regions:\n", indent, ""); - list_for_each_entry(region, &vm->userspace_mem_regions, list) { + hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) { fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx " "host_virt: %p\n", indent + 2, "", (uint64_t) region->region.guest_phys_addr, diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h index 34465dc562d8..af310110602b 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h +++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h @@ -8,6 +8,9 @@ #ifndef SELFTEST_KVM_UTIL_INTERNAL_H #define SELFTEST_KVM_UTIL_INTERNAL_H +#include "linux/hashtable.h" +#include "linux/rbtree.h" + #include "sparsebit.h" #define KVM_DEV_PATH "/dev/kvm" @@ -20,7 +23,9 @@ struct userspace_mem_region { void *host_mem; void *mmap_start; size_t mmap_size; - struct list_head list; + struct rb_node gpa_node; + struct rb_node hva_node; + struct hlist_node slot_node; }; struct vcpu { @@ -33,6 +38,12 @@ struct vcpu { uint32_t dirty_gfns_count; }; +struct userspace_mem_regions { + struct rb_root gpa_tree; + struct rb_root hva_tree; + DECLARE_HASHTABLE(slot_hash, 9); +}; + struct kvm_vm { int mode; unsigned long type; @@ -45,7 +56,7 @@ struct kvm_vm { unsigned int va_bits; uint64_t max_gfn; struct list_head vcpus; - struct list_head userspace_mem_regions; + struct userspace_mem_regions regions; struct sparsebit *vpages_valid; struct sparsebit *vpages_mapped; bool has_irqchip; diff --git a/tools/testing/selftests/kvm/lib/rbtree.c b/tools/testing/selftests/kvm/lib/rbtree.c new file mode 100644 index 000000000000..a703f0194ea3 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/rbtree.c @@ -0,0 +1 @@ +#include "../../../../lib/rbtree.c"

3 years, 9 months

2
2
0 0

[PATCH v5 26/28] selftest/x86/amx: Test case for AMX state copy optimization in signal delivery

by Chang S. Bae

Add a test case to verify that unused states are excluded, by leaving a known pattern on the signal stack and verifying that it is still intact after taking a subsequent signal. Signed-off-by: Chang S. Bae <chang.seok.bae(a)intel.com> Reviewed-by: Len Brown <len.brown(a)intel.com> Cc: x86(a)kernel.org Cc: linux-kernel(a)vger.kernel.org Cc: linux-kselftest(a)vger.kernel.org --- Changes from v4: * Separated out as a new patch. * Improved the test routine by explicitly checking sigframe write. --- tools/testing/selftests/x86/amx.c | 137 ++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) diff --git a/tools/testing/selftests/x86/amx.c b/tools/testing/selftests/x86/amx.c index 7d177c03cdcf..c5a5582e2b6f 100644 --- a/tools/testing/selftests/x86/amx.c +++ b/tools/testing/selftests/x86/amx.c @@ -562,6 +562,142 @@ static void test_ptrace(void) test_tile_write(); } +/* Signal handling test */ + +static bool init_tiledata_state_before_signal; +static bool load_tiledata_at_first; +static bool sigalarmed, sigused; + +#define SIGFRAME_TILEDATA_SIGNATURE 0xFF + +static void handle_sigusr1(int sig, siginfo_t *info, void *ctx_void) +{ + void *xsave = ((ucontext_t *)ctx_void)->uc_mcontext.fpregs; + + memset(xsave + xsave_xtiledata_offset, SIGFRAME_TILEDATA_SIGNATURE, xsave_xtiledata_size); + + sigused = true; +} + +static void handle_sigalrm(int sig, siginfo_t *info, void *ctx_void) +{ + void *xsave = ((ucontext_t *)ctx_void)->uc_mcontext.fpregs; + char d = SIGFRAME_TILEDATA_SIGNATURE; + bool written = false; + int i; + + for (i = 0; i < xsave_xtiledata_size; i++) { + written = memcmp(xsave + xsave_xtiledata_offset + i, &d, 1); + if (written) + break; + } + + if (__xgetbv(1) & XFEATURE_MASK_XTILEDATA) + err(1, "tile data state at signal delivery"); + + if (init_tiledata_state_before_signal && written) { + errs++; + printf("[FAIL]\tTile data was %swritten on sigframe.\n", !written ? "not " : ""); + } + + set_xstatebv(xsave_buffer, XFEATURE_MASK_XTILEDATA); + set_tiledata(xsave_buffer + xsave_xtiledata_offset); + xrstor(xsave_buffer, -1, -1); + sigalarmed = true; +} + +static void test_signal_handling(void) +{ + pid_t child; + + sigalarmed = false; + sigused = false; + + child = fork(); + if (child < 0) { + err(1, "fork"); + } else if (child > 0) { + do { + int status; + + wait(&status); + if (WIFSTOPPED(status)) + kill(child, SIGCONT); + else if (WIFEXITED(status) && !WEXITSTATUS(status)) + break; + else + err(1, "signal test child"); + } while (1); + return; + } + + printf("\tBefore signal, load tile data -- %s, re-initialized -- %s:\n", + load_tiledata_at_first ? "yes" : "no", + init_tiledata_state_before_signal ? "yes" : "no"); + + syscall(SYS_arch_prctl, ARCH_GET_XSTATE, XFEATURE_MASK_XTILE); + + raise(SIGUSR1); + if (!sigused) + err(1, "SIGUSR1"); + + if (load_tiledata_at_first) { + set_xstatebv(xsave_buffer, XFEATURE_MASK_XTILEDATA); + set_tiledata(xsave_buffer + xsave_xtiledata_offset); + xrstor(xsave_buffer, -1, -1); + memcpy(tiledata, xsave_buffer + xsave_xtiledata_offset, xsave_xtiledata_size); + } + + if (init_tiledata_state_before_signal) { + set_xstatebv(xsave_buffer, 0); + xrstor(xsave_buffer, -1, -1); + memset(tiledata, 0, xsave_xtiledata_size); + } + + raise(SIGALRM); + if (!sigalarmed) + err(1, "SIGALRM"); + + __xsave(xsave_buffer, XFEATURE_MASK_XTILEDATA, 0); + if (memcmp(tiledata, xsave_buffer + xsave_xtiledata_offset, xsave_xtiledata_size)) { + errs++; + printf("[FAIL]\tTile data was not restored at sigreturn\n"); + } + + if (errs) + nerrs++; + else + printf("[OK]\tTile data was %swritten on sigframe and restored at sigreturn\n", + init_tiledata_state_before_signal ? "not " : ""); + _exit(0); +} + +static void test_signal(void) +{ + printf("[RUN]\tCheck tile data state in signal path:\n"); + + sethandler(SIGALRM, handle_sigalrm, 0); + sethandler(SIGUSR1, handle_sigusr1, 0); + + load_tiledata_at_first = false; + init_tiledata_state_before_signal = true; + errs = 0; + test_signal_handling(); + + load_tiledata_at_first = true; + init_tiledata_state_before_signal = false; + errs = 0; + test_signal_handling(); + + load_tiledata_at_first = true; + init_tiledata_state_before_signal = true; + errs = 0; + test_signal_handling(); + + clearhandler(SIGALRM); + clearhandler(SIGUSR1); +} + int main(void) { /* Check hardware availability at first */ @@ -592,6 +728,7 @@ int main(void) test_fork(); test_context_switch(); test_ptrace(); + test_signal(); clearhandler(SIGSEGV); -- 2.17.1

3 years, 9 months

1
0
0 0

[PATCH v5 23/28] selftest/x86/amx: Test cases for the AMX state management

by Chang S. Bae

This selftest verifies that the xstate arch_prctl works for AMX state and that a forked task has the AMX state in the INIT-state. In addition, this test verifies that the kernel correctly context switches unique AMX data, when multiple threads are using AMX. Finally, the test verifies that ptrace() can insert data into existing threads. These test cases do not depend on AMX compiler support, as they employ userspace-XSAVE directly to access AMX state. Signed-off-by: Chang S. Bae <chang.seok.bae(a)intel.com> Reviewed-by: Len Brown <len.brown(a)intel.com> Cc: linux-kernel(a)vger.kernel.org Cc: linux-kselftest(a)vger.kernel.org --- Changes from v4: * Added test for arch_prctl. * Excluded tile config details to focus on testing the kernel's ability to manage dynamic user state. * Removed tile instructions. * Simplified the fork() and ptrace() test routine. * Massaged the changelog. Changes from v2: * Updated the test messages and the changelog as tile data is not inherited to a child anymore. * Removed bytecode for the instructions already supported by binutils. * Changed to check the XSAVE availability in a reliable way. Changes from v1: * Removed signal testing code --- tools/testing/selftests/x86/Makefile | 2 +- tools/testing/selftests/x86/amx.c | 601 +++++++++++++++++++++++++++ 2 files changed, 602 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/x86/amx.c diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 333980375bc7..2f7feb03867b 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -17,7 +17,7 @@ TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer -TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering +TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering amx # Some selftests require 32bit support enabled also on 64bit systems TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall diff --git a/tools/testing/selftests/x86/amx.c b/tools/testing/selftests/x86/amx.c new file mode 100644 index 000000000000..7d177c03cdcf --- /dev/null +++ b/tools/testing/selftests/x86/amx.c @@ -0,0 +1,601 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <err.h> +#include <errno.h> +#include <elf.h> +#include <pthread.h> +#include <setjmp.h> +#include <stdio.h> +#include <string.h> +#include <stdbool.h> +#include <unistd.h> +#include <x86intrin.h> + +#include <linux/futex.h> + +#include <sys/ptrace.h> +#include <sys/shm.h> +#include <sys/syscall.h> +#include <sys/wait.h> +#include <sys/uio.h> + +#ifndef __x86_64__ +# error This test is 64-bit only +#endif + +static inline uint64_t __xgetbv(uint32_t index) +{ + uint32_t eax, edx; + + asm volatile("xgetbv;" + : "=a" (eax), "=d" (edx) + : "c" (index)); + return eax + ((uint64_t)edx << 32); +} + +static inline void __cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) +{ + asm volatile("cpuid;" + : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) + : "0" (*eax), "2" (*ecx)); +} + +static inline void __xsave(void *buffer, uint32_t lo, uint32_t hi) +{ + asm volatile("xsave (%%rdi)" + : : "D" (buffer), "a" (lo), "d" (hi) + : "memory"); +} + +static inline void __xrstor(void *buffer, uint32_t lo, uint32_t hi) +{ + asm volatile("xrstor (%%rdi)" + : : "D" (buffer), "a" (lo), "d" (hi)); +} + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static void clearhandler(int sig) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = SIG_DFL; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static jmp_buf jmpbuf; + +/* Hardware info check: */ + +static bool xsave_disabled; + +static void handle_sigill(int sig, siginfo_t *si, void *ctx_void) +{ + xsave_disabled = true; + siglongjmp(jmpbuf, 1); +} + +#define XFEATURE_XTILECFG 17 +#define XFEATURE_XTILEDATA 18 +#define XFEATURE_MASK_XTILECFG (1 << XFEATURE_XTILECFG) +#define XFEATURE_MASK_XTILEDATA (1 << XFEATURE_XTILEDATA) +#define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILECFG | XFEATURE_MASK_XTILEDATA) + +static inline bool check_xsave_capability(void) +{ + sethandler(SIGILL, handle_sigill, 0); + + if ((!sigsetjmp(jmpbuf, 1)) && (__xgetbv(0) & XFEATURE_MASK_XTILEDATA)) { + clearhandler(SIGILL); + return true; + } + + clearhandler(SIGILL); + return false; +} + +static uint32_t xsave_size; + +static uint32_t xsave_xtiledata_offset; +static uint32_t xsave_xtiledata_size; + +static uint32_t xsave_xtilecfg_offset; +static uint32_t xsave_xtilecfg_size; + +#define XSTATE_CPUID 0xd +#define XSTATE_USER_STATE_SUBLEAVE 0x0 + +static void check_cpuid(void) +{ + uint32_t eax, ebx, ecx, edx; + + eax = XSTATE_CPUID; + ecx = XSTATE_USER_STATE_SUBLEAVE; + + __cpuid(&eax, &ebx, &ecx, &edx); + if (!ebx) + err(1, "xstate cpuid: xsave size"); + + xsave_size = ebx; + + eax = XSTATE_CPUID; + ecx = XFEATURE_XTILECFG; + + __cpuid(&eax, &ebx, &ecx, &edx); + if (!eax || !ebx) + err(1, "xstate cpuid: tile config state"); + + xsave_xtilecfg_size = eax; + xsave_xtilecfg_offset = ebx; + + eax = XSTATE_CPUID; + ecx = XFEATURE_XTILEDATA; + + __cpuid(&eax, &ebx, &ecx, &edx); + if (!eax || !ebx) + err(1, "xstate cpuid: tile data state"); + + xsave_xtiledata_size = eax; + xsave_xtiledata_offset = ebx; +} + +/* The helpers for managing XSAVE buffer and tile states: */ + +#define XSAVE_HDR_OFFSET 512 + +static inline uint64_t get_xstatebv(void *xsave) +{ + return *(uint64_t *)(xsave + XSAVE_HDR_OFFSET); +} + +static inline void set_xstatebv(void *xsave, uint64_t bv) +{ + *(uint64_t *)(xsave + XSAVE_HDR_OFFSET) = bv; +} + +static void set_tiledata(void *tiledata) +{ + int *ptr = tiledata; + int data = rand(); + int i; + + for (i = 0; i < xsave_xtiledata_size / sizeof(int); i++, ptr++) + *ptr = data; +} + +static void *xsave_buffer, *tiledata; +static int nerrs, errs; + +static void handle_sigsegv(int sig, siginfo_t *si, void *ctx_void) +{ + siglongjmp(jmpbuf, 1); +} + +static bool xrstor(void *buffer, uint32_t lo, uint32_t hi) +{ + if (!sigsetjmp(jmpbuf, 1)) { + __xrstor(buffer, lo, hi); + return true; + } else { + return false; + } +} + +/* arch_prctl test */ + +#define ARCH_GET_XSTATE 0x1021 +#define ARCH_PUT_XSTATE 0x1022 + +#define ARCH_PRCTL_REPEAT 10 + +static void test_arch_prctl(void) +{ + bool xfd_armed; + pid_t child; + int rc, i; + + child = fork(); + if (child < 0) { + err(1, "fork"); + } else if (child > 0) { + int status; + + wait(&status); + if (!WIFEXITED(status) || WEXITSTATUS(status)) + err(1, "arch_prctl test child"); + return; + } + + set_xstatebv(xsave_buffer, XFEATURE_MASK_XTILE); + set_tiledata(xsave_buffer + xsave_xtiledata_offset); + + printf("[RUN]\tCheck ARCH_GET_XSTATE/ARCH_SET_XSTATE.\n"); + + printf("\tLoad tile data without GET:\n"); + + if (!xrstor(xsave_buffer, -1, -1)) { + printf("[OK]\tBlocked.\n"); + } else { + nerrs++; + printf("[FAIL]\tSucceeded.\n"); + } + + printf("\tGET with invalid arg:\n"); + + rc = syscall(SYS_arch_prctl, ARCH_GET_XSTATE, -1); + if (rc == -EPERM) { + printf("[OK]\tEPERM was returned.\n"); + } else { + nerrs++; + printf("[FAIL]\tNo EPERM was returned.\n"); + } + + printf("\tGET with AMX state %d-times:\n", ARCH_PRCTL_REPEAT); + + for (i = 0; i < ARCH_PRCTL_REPEAT; i++) { + rc = syscall(SYS_arch_prctl, ARCH_GET_XSTATE, XFEATURE_MASK_XTILE); + if (rc != 0) + break; + + xfd_armed = !xrstor(xsave_buffer, -1, -1); + if (xfd_armed) + break; + } + + if (i == ARCH_PRCTL_REPEAT) { + printf("[OK]\tNo error and correctly disarmed XFD.\n"); + } else { + nerrs++; + i++; + if (rc) + printf("[FAIL]\t%d-th GET returned error (rc=%d).\n", i, rc); + else + printf("[FAIL]\t%d-th GET failed to disarm XFD.\n", i); + } + + printf("\tPUT with AMX state %d-times:\n", ARCH_PRCTL_REPEAT); + + for (i = 0; i < ARCH_PRCTL_REPEAT; i++) { + rc = syscall(SYS_arch_prctl, ARCH_PUT_XSTATE, XFEATURE_MASK_XTILE); + if (rc != 0) + break; + + xfd_armed = !xrstor(xsave_buffer, -1, -1); + if (xfd_armed) + break; + } + + if ((i == (ARCH_PRCTL_REPEAT - 1)) && xfd_armed) { + printf("[OK]\tNo error and re-armed XFD at the end.\n"); + } else { + nerrs++; + i++; + if (rc) + printf("[FAIL]\t%d-th PUT returned error (rc=%d).\n", i, rc); + else if (i == ARCH_PRCTL_REPEAT) + printf("[FAIL]\tthe final PUT failed to arm XFD.\n"); + else + printf("[FAIL]\t%d-th PUT disarm XFD.\n", i); + } + + _exit(0); +} + +/* Testing tile data inheritance */ + +static void test_fork(void) +{ + pid_t child, grandchild; + + child = fork(); + if (child < 0) { + err(1, "fork"); + } else if (child > 0) { + int status; + + wait(&status); + if (!WIFEXITED(status) || WEXITSTATUS(status)) + err(1, "fork test child"); + return; + } + + printf("[RUN]\tCheck tile data inheritance.\n\tBefore fork(), load tile data -- yes:\n"); + + set_xstatebv(xsave_buffer, XFEATURE_MASK_XTILE); + set_tiledata(xsave_buffer + xsave_xtiledata_offset); + memset(xsave_buffer + xsave_xtilecfg_offset, 1, xsave_xtilecfg_size); + syscall(SYS_arch_prctl, ARCH_GET_XSTATE, XFEATURE_MASK_XTILE); + xrstor(xsave_buffer, -1, -1); + + grandchild = fork(); + if (grandchild < 0) { + err(1, "fork"); + } else if (grandchild > 0) { + int status; + + wait(&status); + if (!WIFEXITED(status) || WEXITSTATUS(status)) + err(1, "fork test child"); + _exit(0); + } + + if (__xgetbv(1) & XFEATURE_MASK_XTILE) { + nerrs++; + printf("[FAIL]\tIn a child, AMX state is not initialized.\n"); + } else { + printf("[OK]\tIn a child, AMX state is initialized.\n"); + } + _exit(0); +} + +/* Context switching test */ + +#define ITERATIONS 10 +#define NUM_THREADS 5 + +struct futex_info { + int current; + int *futex; + int next; +}; + +static inline void command_wait(struct futex_info *info, int value) +{ + do { + sched_yield(); + } while (syscall(SYS_futex, info->futex, FUTEX_WAIT, value, 0, 0, 0)); +} + +static inline void command_wake(struct futex_info *info, int value) +{ + do { + *info->futex = value; + while (!syscall(SYS_futex, info->futex, FUTEX_WAKE, 1, 0, 0, 0)) + sched_yield(); + } while (0); +} + +static inline int get_iterative_value(int id) +{ + return ((id << 1) & ~0x1); +} + +static inline int get_endpoint_value(int id) +{ + return ((id << 1) | 0x1); +} + +static void *check_tiledata(void *info) +{ + struct futex_info *finfo = (struct futex_info *)info; + void *xsave, *tiledata; + int i; + + xsave = aligned_alloc(64, xsave_size); + if (!xsave) + err(1, "aligned_alloc()"); + + tiledata = malloc(xsave_xtiledata_size); + if (!tiledata) + err(1, "malloc()"); + + set_xstatebv(xsave, XFEATURE_MASK_XTILEDATA); + set_tiledata(xsave + xsave_xtiledata_offset); + syscall(SYS_arch_prctl, ARCH_GET_XSTATE, XFEATURE_MASK_XTILE); + xrstor(xsave, -1, -1); + memcpy(tiledata, xsave + xsave_xtiledata_offset, xsave_xtiledata_size); + + for (i = 0; i < ITERATIONS; i++) { + command_wait(finfo, get_iterative_value(finfo->current)); + + __xsave(xsave, XFEATURE_MASK_XTILEDATA, 0); + if (memcmp(tiledata, xsave + xsave_xtiledata_offset, xsave_xtiledata_size)) + errs++; + + set_tiledata(xsave + xsave_xtiledata_offset); + syscall(SYS_arch_prctl, ARCH_GET_XSTATE, XFEATURE_MASK_XTILE); + xrstor(xsave, -1, -1); + memcpy(tiledata, xsave + xsave_xtiledata_offset, xsave_xtiledata_size); + + command_wake(finfo, get_iterative_value(finfo->next)); + } + + command_wait(finfo, get_endpoint_value(finfo->current)); + + free(xsave); + free(tiledata); + return NULL; +} + +static int create_threads(int num, struct futex_info *finfo) +{ + const int shm_id = shmget(IPC_PRIVATE, sizeof(int), IPC_CREAT | 0666); + int *futex = shmat(shm_id, NULL, 0); + pthread_t thread; + int i; + + for (i = 0; i < num; i++) { + finfo[i].futex = futex; + finfo[i].current = i + 1; + finfo[i].next = (i + 2) % (num + 1); + + if (pthread_create(&thread, NULL, check_tiledata, &finfo[i])) + err(1, "pthread_create()"); + } + return 0; +} + +static void test_context_switch(void) +{ + struct futex_info *finfo; + cpu_set_t cpuset; + int i; + + printf("[RUN]\tCheck tile data context switches.\n"); + printf("\t# of context switches -- %u, # of threads -- %d:\n", + ITERATIONS * NUM_THREADS, NUM_THREADS); + + errs = 0; + + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) + err(1, "sched_setaffinity to CPU 0"); + + finfo = malloc(sizeof(*finfo) * NUM_THREADS); + if (!finfo) + err(1, "malloc()"); + + create_threads(NUM_THREADS, finfo); + + for (i = 0; i < ITERATIONS; i++) { + command_wake(finfo, get_iterative_value(1)); + command_wait(finfo, get_iterative_value(0)); + } + + for (i = 1; i <= NUM_THREADS; i++) + command_wake(finfo, get_endpoint_value(i)); + + if (errs) { + nerrs += errs; + printf("[FAIL]\tIncorrect cases were found -- (%d / %u).\n", + errs, ITERATIONS * NUM_THREADS); + free(finfo); + return; + } + + free(finfo); + printf("[OK]\tNo incorrect case was found.\n"); +} + +/* Ptrace test */ + +static bool set_tiledata_state; + +static int write_tiledata(pid_t child) +{ + struct iovec iov; + + iov.iov_base = xsave_buffer; + iov.iov_len = xsave_size; + + set_xstatebv(xsave_buffer, set_tiledata_state ? XFEATURE_MASK_XTILEDATA : 0); + set_tiledata(xsave_buffer + xsave_xtiledata_offset); + if (set_tiledata_state) + memcpy(tiledata, xsave_buffer + xsave_xtiledata_offset, xsave_xtiledata_size); + else + memset(tiledata, 0, xsave_xtiledata_size); + + if (ptrace(PTRACE_SETREGSET, child, (uint32_t)NT_X86_XSTATE, &iov)) + err(1, "PTRACE_SETREGSET"); + + if (ptrace(PTRACE_GETREGSET, child, (uint32_t)NT_X86_XSTATE, &iov)) + err(1, "PTRACE_GETREGSET"); + + return memcmp(tiledata, xsave_buffer + xsave_xtiledata_offset, xsave_xtiledata_size); +} + +static void test_tile_write(void) +{ + pid_t child; + int status; + + child = fork(); + if (child < 0) + err(1, "fork"); + + if (!child) { + printf("\tInject tile data -- %s:\n", + set_tiledata_state ? "yes" : "no"); + + if (ptrace(PTRACE_TRACEME, 0, NULL, NULL)) + err(1, "PTRACE_TRACEME"); + + raise(SIGTRAP); + _exit(0); + } + + do { + wait(&status); + } while (WSTOPSIG(status) != SIGTRAP); + + errs = write_tiledata(child); + if (errs) { + nerrs++; + printf("[FAIL]\tTile data was %swritten on ptracee.\n", + set_tiledata_state ? "not " : ""); + } else { + printf("[OK]\tTile data was %swritten on ptracee.\n", + set_tiledata_state ? "" : "not "); + } + + ptrace(PTRACE_DETACH, child, NULL, NULL); + wait(&status); + if (!WIFEXITED(status) || WEXITSTATUS(status)) + err(1, "fork test child"); +} + +static void test_ptrace(void) +{ + printf("[RUN]\tCheck ptrace() to inject tile data.\n"); + + set_tiledata_state = true; + test_tile_write(); + + set_tiledata_state = false; + test_tile_write(); +} + +int main(void) +{ + /* Check hardware availability at first */ + + if (!check_xsave_capability()) { + if (xsave_disabled) + printf("XSAVE disabled.\n"); + else + printf("Tile data not available.\n"); + return 0; + } + + check_cpuid(); + + xsave_buffer = aligned_alloc(64, xsave_size); + if (!xsave_buffer) + err(1, "aligned_alloc()"); + + tiledata = malloc(xsave_xtiledata_size); + if (!tiledata) + err(1, "malloc()"); + + nerrs = 0; + + sethandler(SIGSEGV, handle_sigsegv, 0); + + test_arch_prctl(); + test_fork(); + test_context_switch(); + test_ptrace(); + + clearhandler(SIGSEGV); + + free(xsave_buffer); + free(tiledata); + return nerrs ? 1 : 0; +} -- 2.17.1

3 years, 9 months

1
0
0 0

[PATCH v7 0/7] Fork brute force attack mitigation

by John Wood

Attacks against vulnerable userspace applications with the purpose to break ASLR or bypass canaries traditionally use some level of brute force with the help of the fork system call. This is possible since when creating a new process using fork its memory contents are the same as those of the parent process (the process that called the fork system call). So, the attacker can test the memory infinite times to find the correct memory values or the correct memory addresses without worrying about crashing the application. Based on the above scenario it would be nice to have this detected and mitigated, and this is the goal of this patch serie. Specifically the following attacks are expected to be detected: 1.- Launching (fork()/exec()) a setuid/setgid process repeatedly until a desirable memory layout is got (e.g. Stack Clash). 2.- Connecting to an exec()ing network daemon (e.g. xinetd) repeatedly until a desirable memory layout is got (e.g. what CTFs do for simple network service). 3.- Launching processes without exec() (e.g. Android Zygote) and exposing state to attack a sibling. 4.- Connecting to a fork()ing network daemon (e.g. apache) repeatedly until the previously shared memory layout of all the other children is exposed (e.g. kind of related to HeartBleed). In each case, a privilege boundary has been crossed: Case 1: setuid/setgid process Case 2: network to local Case 3: privilege changes Case 4: network to local So, what will really be detected are fork/exec brute force attacks that cross any of the commented bounds. The implementation details and comparison against other existing implementations can be found in the "Documentation" patch. It is important to mention that this version has changed the method used to track the information related to the application crashes. Prior this version, a pointer per process (in the task_struct structure) held a reference to the shared statistical data. Or in other words, these stats were shared by all the fork hierarchy processes. But this has an important drawback: a brute force attack that happens through the execve system call losts the faults info since these statistics are freed when the fork hierarchy disappears. So, the solution adopted in the v6 version was to use an upper fork hierarchy to track the info for this attack type. But, as Valdis Kletnieks pointed out during this discussion [1], this method can be easily bypassed using a double exec (well, this was the method used in the kselftest to avoid the detection ;) ). So, in this version, to track all the statistical data (info related with application crashes), the extended attributes feature for the executable files are used. The xattr is also used to mark the executables as "not allowed" when an attack is detected. Then, the execve system call rely on this flag to avoid following executions of this file. [1] https://lore.kernel.org/kernelnewbies/20210330173459.GA3163@ubuntu/ Moreover, I think this solves another problem pointed out by Andi Kleen during the v5 review [2] related to the possibility that a supervisor respawns processes killed by the Brute LSM. He suggested adding some way so a supervisor can know that a process has been killed by Brute and then decide to respawn or not. So, now, the supervisor can read the brute xattr of one executable and know if it is blocked by Brute and why (using the statistical data). [2] https://lore.kernel.org/kernel-hardening/878s78dnrm.fsf@linux.intel.com/ Knowing all this information I will explain now the different patches: The 1/7 patch defines a new LSM hook to get the fatal signal of a task. This will be useful during the attack detection phase. The 2/7 patch defines a new LSM and the necessary sysctl attributes to fine tuning the attack detection. The 3/7 patch detects a fork/exec brute force attack and narrows the possible cases taken into account the privilege boundary crossing. The 4/7 patch mitigates a brute force attack. The 5/7 patch adds self-tests to validate the Brute LSM expectations. The 6/7 patch adds the documentation to explain this implementation. The 7/7 patch updates the maintainers file. This patch serie is a task of the KSPP [3] and can also be accessed from my github tree [4] in the "brute_v7" branch. [3] https://github.com/KSPP/linux/issues/39 [4] https://github.com/johwood/linux/ When I ran the "checkpatch" script I got the following errors, but I think they are false positives as I follow the same coding style for the others extended attributes suffixes. ---------------------------------------------------------------------------- ../patches/brute_v7/v7-0003-security-brute-Detect-a-brute-force-attack.patch ---------------------------------------------------------------------------- ERROR: Macros with complex values should be enclosed in parentheses 89: FILE: include/uapi/linux/xattr.h:80: +#define XATTR_NAME_BRUTE XATTR_SECURITY_PREFIX XATTR_BRUTE_SUFFIX ----------------------------------------------------------------------------- ../patches/brute_v7/v7-0005-selftests-brute-Add-tests-for-the-Brute-LSM.patch ----------------------------------------------------------------------------- ERROR: Macros with complex values should be enclosed in parentheses 100: FILE: tools/testing/selftests/brute/rmxattr.c:18: +#define XATTR_NAME_BRUTE XATTR_SECURITY_PREFIX XATTR_BRUTE_SUFFIX When I ran the "kernel-doc" script with the following parameters: ./scripts/kernel-doc --none -v security/brute/brute.c I got the following warning: security/brute/brute.c:65: warning: contents before sections But I don't understand why it is complaining. Could it be a false positive? The previous versions can be found in: RFC https://lore.kernel.org/kernel-hardening/20200910202107.3799376-1-keescook@… Version 2 https://lore.kernel.org/kernel-hardening/20201025134540.3770-1-john.wood@gm… Version 3 https://lore.kernel.org/lkml/20210221154919.68050-1-john.wood@gmx.com/ Version 4 https://lore.kernel.org/lkml/20210227150956.6022-1-john.wood@gmx.com/ Version 5 https://lore.kernel.org/kernel-hardening/20210227153013.6747-1-john.wood@gm… Version 6 https://lore.kernel.org/kernel-hardening/20210307113031.11671-1-john.wood@g… Changelog RFC -> v2 ------------------- - Rename this feature with a more suitable name (Jann Horn, Kees Cook). - Convert the code to an LSM (Kees Cook). - Add locking to avoid data races (Jann Horn). - Add a new LSM hook to get the fatal signal of a task (Jann Horn, Kees Cook). - Add the last crashes timestamps list to avoid false positives in the attack detection (Jann Horn). - Use "period" instead of "rate" (Jann Horn). - Other minor changes suggested (Jann Horn, Kees Cook). Changelog v2 -> v3 ------------------ - Compute the application crash period on an on-going basis (Kees Cook). - Detect a brute force attack through the execve system call (Kees Cook). - Detect an slow brute force attack (Randy Dunlap). - Fine tuning the detection taken into account privilege boundary crossing (Kees Cook). - Taken into account only fatal signals delivered by the kernel (Kees Cook). - Remove the sysctl attributes to fine tuning the detection (Kees Cook). - Remove the prctls to allow per process enabling/disabling (Kees Cook). - Improve the documentation (Kees Cook). - Fix some typos in the documentation (Randy Dunlap). - Add self-test to validate the expectations (Kees Cook). Changelog v3 -> v4 ------------------ - Fix all the warnings shown by the tool "scripts/kernel-doc" (Randy Dunlap). Changelog v4 -> v5 ------------------ - Fix some typos (Randy Dunlap). Changelog v5 -> v6 ------------------ - Fix a reported deadlock (kernel test robot). - Add high level details to the documentation (Andi Kleen). Changelog v6 -> v7 ------------------ - Add the "Reviewed-by:" tag to the first patch. - Rearrange the brute LSM between lockdown and yama (Kees Cook). - Split subdir and obj in security/Makefile (Kees Cook). - Reduce the number of header files included (Kees Cook). - Print the pid when an attack is detected (Kees Cook). - Use the socket_accept LSM hook instead of socket_sock_rcv_skb hook to avoid running a hook on every incoming network packet (Kees Cook). - Update the documentation and fix it to render it properly (Jonathan Corbet). - Manage correctly an exec brute force attack avoiding the bypass (Valdis Kletnieks). - Other minor changes and cleanups. Any constructive comments are welcome. Thanks in advance. John Wood (7): security: Add LSM hook at the point where a task gets a fatal signal security/brute: Define a LSM and add sysctl attributes security/brute: Detect a brute force attack security/brute: Mitigate a brute force attack selftests/brute: Add tests for the Brute LSM Documentation: Add documentation for the Brute LSM MAINTAINERS: Add a new entry for the Brute LSM Documentation/admin-guide/LSM/Brute.rst | 334 +++++++++++ Documentation/admin-guide/LSM/index.rst | 1 + MAINTAINERS | 7 + include/linux/lsm_hook_defs.h | 1 + include/linux/lsm_hooks.h | 4 + include/linux/security.h | 4 + include/uapi/linux/xattr.h | 3 + kernel/signal.c | 1 + security/Kconfig | 11 +- security/Makefile | 2 + security/brute/Kconfig | 15 + security/brute/Makefile | 2 + security/brute/brute.c | 716 +++++++++++++++++++++++ security/security.c | 5 + tools/testing/selftests/Makefile | 1 + tools/testing/selftests/brute/.gitignore | 2 + tools/testing/selftests/brute/Makefile | 5 + tools/testing/selftests/brute/config | 1 + tools/testing/selftests/brute/rmxattr.c | 34 ++ tools/testing/selftests/brute/test.c | 507 ++++++++++++++++ tools/testing/selftests/brute/test.sh | 256 ++++++++ 21 files changed, 1907 insertions(+), 5 deletions(-) create mode 100644 Documentation/admin-guide/LSM/Brute.rst create mode 100644 security/brute/Kconfig create mode 100644 security/brute/Makefile create mode 100644 security/brute/brute.c create mode 100644 tools/testing/selftests/brute/.gitignore create mode 100644 tools/testing/selftests/brute/Makefile create mode 100644 tools/testing/selftests/brute/config create mode 100644 tools/testing/selftests/brute/rmxattr.c create mode 100644 tools/testing/selftests/brute/test.c create mode 100755 tools/testing/selftests/brute/test.sh -- 2.25.1

3 years, 9 months

2
11
0 0

[PATCH v1 0/4] kunit: tool: add support for QEMU

by Brendan Higgins

TL;DR: Add support to kunit_tool to dispatch tests via QEMU. Also add support to immediately shutdown a kernel after running KUnit tests. Background ---------- KUnit has supported running on all architectures for quite some time; however, kunit_tool - the script commonly used to invoke KUnit tests - has only fully supported KUnit run on UML. Its functionality has been broken up for some time to separate the configure, build, run, and parse phases making it possible to be used in part on other architectures to a small extent. Nevertheless, kunit_tool has not supported running tests on other architectures. What this patchset does ----------------------- This patchset introduces first class support to kunit_tool for KUnit to be run on many popular architectures via QEMU. It does this by adding two new flags: `--arch` and `--cross_compile`. `--arch` allows an architecture to be specified by the name the architecture is given in `arch/`. It uses the specified architecture to select a minimal amount of Kconfigs and QEMU configs needed for the architecture to run in QEMU and provide a console from which KTAP results can be scraped. `--cross_compile` allows a toolchain prefix to be specified to make similar to how `CROSS_COMPILE` is used. Additionally, this patchset revives the previously considered "kunit: tool: add support for QEMU"[1] patchs. The motivation for this new kernel command line flags, `kunit_shutdown`, is to better support running KUnit tests inside of QEMU. For most popular architectures, QEMU can be made to terminate when the Linux kernel that is being run is reboted, halted, or powered off. As Kees pointed out in a previous discussion[2], it is possible to make a kernel initrd that can reboot the kernel immediately, doing this for every architecture would likely be infeasible. Instead, just having an option for the kernel to shutdown when it is done with testing seems a lot simpler, especially since it is an option which would only available in testing configurations of the kernel anyway. Changes since last revision --------------------------- Mostly fixed lots of minor issues suggested/poited out by David and Daniel. Also reworked how qemu_configs are loaded: Now each config is in its own Python file and is loaded dynamically. Given the number of improvements that address the biggest concerns I had in the last RFC, I decided to promote this to a normal patch set. What discussion remains for this patchset? ------------------------------------------ I am still hoping to see some discussion regarding the kunit_shutdown patch: I want to make sure with the added context of QEMU running under kunit_tool that this is now a reasonable approach. Nevertheless, I am pretty happy with this patchset as is, and I did not get any negative feedback on the previous revision, so I think we can probably just move forward as is. Brendan Higgins (3): Documentation: Add kunit_shutdown to kernel-parameters.txt kunit: tool: add support for QEMU Documentation: kunit: document support for QEMU in kunit_tool David Gow (1): kunit: Add 'kunit_shutdown' option .../admin-guide/kernel-parameters.txt | 8 + Documentation/dev-tools/kunit/usage.rst | 37 +++- lib/kunit/executor.c | 20 ++ tools/testing/kunit/kunit.py | 57 +++++- tools/testing/kunit/kunit_config.py | 7 +- tools/testing/kunit/kunit_kernel.py | 172 +++++++++++++++--- tools/testing/kunit/kunit_parser.py | 2 +- tools/testing/kunit/kunit_tool_test.py | 18 +- tools/testing/kunit/qemu_config.py | 17 ++ tools/testing/kunit/qemu_configs/alpha.py | 10 + tools/testing/kunit/qemu_configs/arm.py | 13 ++ tools/testing/kunit/qemu_configs/arm64.py | 12 ++ tools/testing/kunit/qemu_configs/i386.py | 10 + tools/testing/kunit/qemu_configs/powerpc.py | 12 ++ tools/testing/kunit/qemu_configs/riscv.py | 31 ++++ tools/testing/kunit/qemu_configs/s390.py | 14 ++ tools/testing/kunit/qemu_configs/sparc.py | 10 + tools/testing/kunit/qemu_configs/x86_64.py | 10 + 18 files changed, 411 insertions(+), 49 deletions(-) create mode 100644 tools/testing/kunit/qemu_config.py create mode 100644 tools/testing/kunit/qemu_configs/alpha.py create mode 100644 tools/testing/kunit/qemu_configs/arm.py create mode 100644 tools/testing/kunit/qemu_configs/arm64.py create mode 100644 tools/testing/kunit/qemu_configs/i386.py create mode 100644 tools/testing/kunit/qemu_configs/powerpc.py create mode 100644 tools/testing/kunit/qemu_configs/riscv.py create mode 100644 tools/testing/kunit/qemu_configs/s390.py create mode 100644 tools/testing/kunit/qemu_configs/sparc.py create mode 100644 tools/testing/kunit/qemu_configs/x86_64.py base-commit: 38182162b50aa4e970e5997df0a0c4288147a153 -- 2.31.1.607.g51e8a6a459-goog

3 years, 9 months

2
15
0 0

[PATCH v5 0/4] KVM statistics data fd-based binary interface

by Jing Zhang

This patchset provides a file descriptor for every VM and VCPU to read KVM statistics data in binary format. It is meant to provide a lightweight, flexible, scalable and efficient lock-free solution for user space telemetry applications to pull the statistics data periodically for large scale systems. The pulling frequency could be as high as a few times per second. In this patchset, every statistics data are treated to have some attributes as below: * architecture dependent or common * VM statistics data or VCPU statistics data * type: cumulative, instantaneous, * unit: none for simple counter, nanosecond, microsecond, millisecond, second, Byte, KiByte, MiByte, GiByte. Clock Cycles Since no lock/synchronization is used, the consistency between all the statistics data is not guaranteed. That means not all statistics data are read out at the exact same time, since the statistics date are still being updated by KVM subsystems while they are read out. --- * v4 -> v5 - Rebase to kvm/queue, commit a4345a7cecfb ("Merge tag 'kvmarm-fixes-5.13-1'") - Change maximum stats name length to 48 - Replace VM_STATS_COMMON/VCPU_STATS_COMMON macros with stats descriptor definition macros. - Fixed some errors/warnings reported by checkpatch.pl * v3 -> v4 - Rebase to kvm/queue, commit 9f242010c3b4 ("KVM: avoid "deadlock" between install_new_memslots and MMU notifier") - Use C-stype comments in the whole patch - Fix wrong count for x86 VCPU stats descriptors - Fix KVM stats data size counting and validity check in selftest * v2 -> v3 - Rebase to kvm/queue, commit edf408f5257b ("KVM: avoid "deadlock" between install_new_memslots and MMU notifier") - Resolve some nitpicks about format * v1 -> v2 - Use ARRAY_SIZE to count the number of stats descriptors - Fix missing `size` field initialization in macro STATS_DESC [1] https://lore.kernel.org/kvm/20210402224359.2297157-1-jingzhangos@google.com [2] https://lore.kernel.org/kvm/20210415151741.1607806-1-jingzhangos@google.com [3] https://lore.kernel.org/kvm/20210423181727.596466-1-jingzhangos@google.com [4] https://lore.kernel.org/kvm/20210429203740.1935629-1-jingzhangos@google.com --- Jing Zhang (4): KVM: stats: Separate common stats from architecture specific ones KVM: stats: Add fd-based API to read binary stats data KVM: stats: Add documentation for statistics data binary interface KVM: selftests: Add selftest for KVM statistics data binary interface Documentation/virt/kvm/api.rst | 171 ++++++++ arch/arm64/include/asm/kvm_host.h | 9 +- arch/arm64/kvm/guest.c | 38 +- arch/mips/include/asm/kvm_host.h | 9 +- arch/mips/kvm/mips.c | 64 ++- arch/powerpc/include/asm/kvm_host.h | 9 +- arch/powerpc/kvm/book3s.c | 64 ++- arch/powerpc/kvm/book3s_hv.c | 12 +- arch/powerpc/kvm/book3s_pr.c | 2 +- arch/powerpc/kvm/book3s_pr_papr.c | 2 +- arch/powerpc/kvm/booke.c | 59 ++- arch/s390/include/asm/kvm_host.h | 9 +- arch/s390/kvm/kvm-s390.c | 129 +++++- arch/x86/include/asm/kvm_host.h | 9 +- arch/x86/kvm/x86.c | 67 +++- include/linux/kvm_host.h | 136 ++++++- include/linux/kvm_types.h | 12 + include/uapi/linux/kvm.h | 50 +++ tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 3 + .../testing/selftests/kvm/include/kvm_util.h | 3 + .../selftests/kvm/kvm_bin_form_stats.c | 379 ++++++++++++++++++ tools/testing/selftests/kvm/lib/kvm_util.c | 12 + virt/kvm/kvm_main.c | 237 ++++++++++- 24 files changed, 1396 insertions(+), 90 deletions(-) create mode 100644 tools/testing/selftests/kvm/kvm_bin_form_stats.c base-commit: a4345a7cecfb91ae78cd43d26b0c6a956420761a -- 2.31.1.751.gd2f1c929bd-goog

3 years, 9 months

4
29
0 0

[PATCH] selftests: net: devlink_port_split.py: skip the test if no devlink device

by Po-Hsu Lin

When there is no devlink device, the following command will return: $ devlink -j dev show {dev:{}} This will cause IndexError when trying to access the first element in dev of this json dataset. Use the kselftest framework skip code to skip this test in this case. Example output with this change: # selftests: net: devlink_port_split.py # no devlink device was found, test skipped ok 7 selftests: net: devlink_port_split.py # SKIP Link: https://bugs.launchpad.net/bugs/1928889 Signed-off-by: Po-Hsu Lin <po-hsu.lin(a)canonical.com> --- tools/testing/selftests/net/devlink_port_split.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/devlink_port_split.py b/tools/testing/selftests/net/devlink_port_split.py index 834066d..2b5d6ff 100755 --- a/tools/testing/selftests/net/devlink_port_split.py +++ b/tools/testing/selftests/net/devlink_port_split.py @@ -18,6 +18,8 @@ import sys # +# Kselftest framework requirement - SKIP code is 4 +KSFT_SKIP=4 Port = collections.namedtuple('Port', 'bus_info name') @@ -239,7 +241,11 @@ def main(cmdline=None): assert stderr == "" devs = json.loads(stdout)['dev'] - dev = list(devs.keys())[0] + if devs: + dev = list(devs.keys())[0] + else: + print("no devlink device was found, test skipped") + sys.exit(KSFT_SKIP) cmd = "devlink dev show %s" % dev stdout, stderr = run_command(cmd) -- 2.7.4

3 years, 9 months

2
1
0 0

[PATCH] selftests: Add .gitignore for nci test suite

by David Matlack

Building the nci test suite produces a binary, nci_dev, that git then tries to track. Add a .gitignore file to tell git to ignore this binary. Signed-off-by: David Matlack <dmatlack(a)google.com> --- tools/testing/selftests/nci/.gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 tools/testing/selftests/nci/.gitignore diff --git a/tools/testing/selftests/nci/.gitignore b/tools/testing/selftests/nci/.gitignore new file mode 100644 index 000000000000..448eeb4590fc --- /dev/null +++ b/tools/testing/selftests/nci/.gitignore @@ -0,0 +1 @@ +/nci_dev -- 2.31.1.751.gd2f1c929bd-goog

3 years, 9 months

2
1
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-kselftest-mirror May 2021