- Linux-kselftest-mirror - lists.linaro.org

[PATCH v2] userfaultfd/shmem: fix MCOPY_ATOMIC_CONTNUE behavior

by Axel Rasmussen

Previously, we shared too much of the code with COPY and ZEROPAGE, so we manipulated things in various invalid ways: - Previously, we unconditionally called shmem_inode_acct_block. In the continue case, we're looking up an existing page which would have been accounted for properly when it was allocated. So doing it twice results in double-counting, and eventually leaking. - Previously, we made the pte writable whenever the VMA was writable. However, for continue, consider this case: 1. A tmpfs file was created 2. The non-UFFD-registered side mmap()-s with MAP_SHARED 3. The UFFD-registered side mmap()-s with MAP_PRIVATE In this case, even though the UFFD-registered VMA may be writable, we still want CoW behavior. So, check for this case and don't make the pte writable. - The offset / max_off checking doesn't necessarily hurt anything, but it's not needed in the CONTINUE case, so skip it. - Previously, we unconditionally called ClearPageDirty() in the error path. In the continue case though, since this is an existing page, it might have already been dirty before we started touching it. So, remember whether or not it was dirty before we set_page_dirty(), and only clear the bit if it wasn't dirty before. - Previously, we unconditionally removed the page from the page cache in the error path. But in the continue case, we didn't add it - it was already there because the page is present in some second (non-UFFD-registered) mapping. So, removing it is invalid. Because the error handling issues are easy to exercise in the selftest, make a small modification there to do so. Finally, refactor shmem_mcopy_atomic_pte a bit. By this point, we've added a lot of "if (!is_continue)"-s everywhere. It's cleaner to just check for that mode first thing, and then "goto" down to where the parts we actually want are. This leaves the code in between cleaner. Changes since v1: - Refactor to skip ahead with goto, instead of adding several more "if (!is_continue)". - Fix unconditional ClearPageDirty(). - Don't pte_mkwrite() when is_continue && !VM_SHARED. Fixes: 00da60b9d0a0 ("userfaultfd: support minor fault handling for shmem") Signed-off-by: Axel Rasmussen <axelrasmussen(a)google.com> --- mm/shmem.c | 67 ++++++++++++++---------- tools/testing/selftests/vm/userfaultfd.c | 12 +++++ 2 files changed, 51 insertions(+), 28 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index d2e0e81b7d2e..8ab1f1f29987 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2378,17 +2378,22 @@ int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, pte_t _dst_pte, *dst_pte; int ret; pgoff_t offset, max_off; - - ret = -ENOMEM; - if (!shmem_inode_acct_block(inode, 1)) - goto out; + int writable; + bool was_dirty; if (is_continue) { ret = -EFAULT; page = find_lock_page(mapping, pgoff); if (!page) - goto out_unacct_blocks; - } else if (!*pagep) { + goto out; + goto install_ptes; + } + + ret = -ENOMEM; + if (!shmem_inode_acct_block(inode, 1)) + goto out; + + if (!*pagep) { page = shmem_alloc_page(gfp, info, pgoff); if (!page) goto out_unacct_blocks; @@ -2415,13 +2420,11 @@ int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, *pagep = NULL; } - if (!is_continue) { - VM_BUG_ON(PageSwapBacked(page)); - VM_BUG_ON(PageLocked(page)); - __SetPageLocked(page); - __SetPageSwapBacked(page); - __SetPageUptodate(page); - } + VM_BUG_ON(PageSwapBacked(page)); + VM_BUG_ON(PageLocked(page)); + __SetPageLocked(page); + __SetPageSwapBacked(page); + __SetPageUptodate(page); ret = -EFAULT; offset = linear_page_index(dst_vma, dst_addr); @@ -2429,16 +2432,18 @@ int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, if (unlikely(offset >= max_off)) goto out_release; - /* If page wasn't already in the page cache, add it. */ - if (!is_continue) { - ret = shmem_add_to_page_cache(page, mapping, pgoff, NULL, - gfp & GFP_RECLAIM_MASK, dst_mm); - if (ret) - goto out_release; - } + ret = shmem_add_to_page_cache(page, mapping, pgoff, NULL, + gfp & GFP_RECLAIM_MASK, dst_mm); + if (ret) + goto out_release; +install_ptes: _dst_pte = mk_pte(page, dst_vma->vm_page_prot); - if (dst_vma->vm_flags & VM_WRITE) + /* For CONTINUE on a non-shared VMA, don't pte_mkwrite for CoW. */ + writable = is_continue && !(dst_vma->vm_flags & VM_SHARED) + ? 0 + : dst_vma->vm_flags & VM_WRITE; + if (writable) _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte)); else { /* @@ -2448,15 +2453,18 @@ int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, * unconditionally before unlock_page(), but doing it * only if VM_WRITE is not set is faster. */ + was_dirty = PageDirty(page); set_page_dirty(page); } dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl); - ret = -EFAULT; - max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); - if (unlikely(offset >= max_off)) - goto out_release_unlock; + if (!is_continue) { + ret = -EFAULT; + max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + if (unlikely(offset >= max_off)) + goto out_release_unlock; + } ret = -EEXIST; if (!pte_none(*dst_pte)) @@ -2485,13 +2493,16 @@ int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, return ret; out_release_unlock: pte_unmap_unlock(dst_pte, ptl); - ClearPageDirty(page); - delete_from_page_cache(page); + if (!was_dirty) + ClearPageDirty(page); + if (!is_continue) + delete_from_page_cache(page); out_release: unlock_page(page); put_page(page); out_unacct_blocks: - shmem_inode_unacct_blocks(inode, 1); + if (!is_continue) + shmem_inode_unacct_blocks(inode, 1); goto out; } #endif /* CONFIG_USERFAULTFD */ diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index f6c86b036d0f..d8541a59dae5 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -485,6 +485,7 @@ static void wp_range(int ufd, __u64 start, __u64 len, bool wp) static void continue_range(int ufd, __u64 start, __u64 len) { struct uffdio_continue req; + int ret; req.range.start = start; req.range.len = len; @@ -493,6 +494,17 @@ static void continue_range(int ufd, __u64 start, __u64 len) if (ioctl(ufd, UFFDIO_CONTINUE, &req)) err("UFFDIO_CONTINUE failed for address 0x%" PRIx64, (uint64_t)start); + + /* + * Error handling within the kernel for continue is subtly different + * from copy or zeropage, so it may be a source of bugs. Trigger an + * error (-EEXIST) on purpose, to verify doing so doesn't cause a BUG. + */ + req.mapped = 0; + ret = ioctl(ufd, UFFDIO_CONTINUE, &req); + if (ret >= 0 || req.mapped != -EEXIST) + err("failed to exercise UFFDIO_CONTINUE error handling, ret=%d, mapped=%" PRId64, + ret, req.mapped); } static void *locking_thread(void *arg) -- 2.31.0.291.g576ba9dcdaf-goog

4 years, 9 months

2
1
0 0

[PATCH AUTOSEL 5.10 27/33] selftests/vm: fix out-of-tree build

by Sasha Levin

From: Rong Chen <rong.a.chen(a)intel.com> [ Upstream commit 19ec368cbc7ee1915e78c120b7a49c7f14734192 ] When building out-of-tree, attempting to make target from $(OUTPUT) directory: make[1]: *** No rule to make target '$(OUTPUT)/protection_keys.c', needed by '$(OUTPUT)/protection_keys_32'. Link: https://lkml.kernel.org/r/20210315094700.522753-1-rong.a.chen@intel.com Signed-off-by: Rong Chen <rong.a.chen(a)intel.com> Reported-by: kernel test robot <lkp(a)intel.com> Cc: Shuah Khan <shuah(a)kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/vm/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index e63f31632708..2cf32e6b376e 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -99,7 +99,7 @@ endef ifeq ($(CAN_BUILD_I386),1) $(BINARIES_32): CFLAGS += -m32 $(BINARIES_32): LDLIBS += -lrt -ldl -lm -$(BINARIES_32): %_32: %.c +$(BINARIES_32): $(OUTPUT)/%_32: %.c $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@ $(foreach t,$(TARGETS),$(eval $(call gen-target-rule-32,$(t)))) endif @@ -107,7 +107,7 @@ endif ifeq ($(CAN_BUILD_X86_64),1) $(BINARIES_64): CFLAGS += -m64 $(BINARIES_64): LDLIBS += -lrt -ldl -$(BINARIES_64): %_64: %.c +$(BINARIES_64): $(OUTPUT)/%_64: %.c $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@ $(foreach t,$(TARGETS),$(eval $(call gen-target-rule-64,$(t)))) endif -- 2.30.1

4 years, 9 months

1
0
0 0

[PATCH AUTOSEL 5.10 20/33] kselftest/arm64: sve: Do not use non-canonical FFR register value

by Sasha Levin

From: Andre Przywara <andre.przywara(a)arm.com> [ Upstream commit 7011d72588d16a9e5f5d85acbc8b10019809599c ] The "First Fault Register" (FFR) is an SVE register that mimics a predicate register, but clears bits when a load or store fails to handle an element of a vector. The supposed usage scenario is to initialise this register (using SETFFR), then *read* it later on to learn about elements that failed to load or store. Explicit writes to this register using the WRFFR instruction are only supposed to *restore* values previously read from the register (for context-switching only). As the manual describes, this register holds only certain values, it: "... contains a monotonic predicate value, in which starting from bit 0 there are zero or more 1 bits, followed only by 0 bits in any remaining bit positions." Any other value is UNPREDICTABLE and is not supposed to be "restored" into the register. The SVE test currently tries to write a signature pattern into the register, which is *not* a canonical FFR value. Apparently the existing setups treat UNPREDICTABLE as "read-as-written", but a new implementation actually only stores canonical values. As a consequence, the sve-test fails immediately when comparing the FFR value: ----------- # ./sve-test Vector length: 128 bits PID: 207 Mismatch: PID=207, iteration=0, reg=48 Expected [cf00] Got [0f00] Aborted ----------- Fix this by only populating the FFR with proper canonical values. Effectively the requirement described above limits us to 17 unique values over 16 bits worth of FFR, so we condense our signature down to 4 bits (2 bits from the PID, 2 bits from the generation) and generate the canonical pattern from it. Any bits describing elements above the minimum 128 bit are set to 0. This aligns the FFR usage to the architecture and fixes the test on microarchitectures implementing FFR in a more restricted way. Signed-off-by: Andre Przywara <andre.przywara(a)arm.com> Reviwed-by: Mark Brown <broonie(a)kernel.org> Link: https://lore.kernel.org/r/20210319120128.29452-1-andre.przywara@arm.com Signed-off-by: Will Deacon <will(a)kernel.org> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/arm64/fp/sve-test.S | 22 ++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S index f95074c9b48b..07f14e279a90 100644 --- a/tools/testing/selftests/arm64/fp/sve-test.S +++ b/tools/testing/selftests/arm64/fp/sve-test.S @@ -284,16 +284,28 @@ endfunction // Set up test pattern in the FFR // x0: pid // x2: generation +// +// We need to generate a canonical FFR value, which consists of a number of +// low "1" bits, followed by a number of zeros. This gives us 17 unique values +// per 16 bits of FFR, so we create a 4 bit signature out of the PID and +// generation, and use that as the initial number of ones in the pattern. +// We fill the upper lanes of FFR with zeros. // Beware: corrupts P0. function setup_ffr mov x4, x30 - bl pattern + and w0, w0, #0x3 + bfi w0, w2, #2, #2 + mov w1, #1 + lsl w1, w1, w0 + sub w1, w1, #1 + ldr x0, =ffrref - ldr x1, =scratch - rdvl x2, #1 - lsr x2, x2, #3 - bl memcpy + strh w1, [x0], 2 + rdvl x1, #1 + lsr x1, x1, #3 + sub x1, x1, #2 + bl memclr mov x0, #0 ldr x1, =ffrref -- 2.30.1

4 years, 9 months

1
0
0 0

[PATCH AUTOSEL 5.10 07/33] kunit: tool: Fix a python tuple typing error

by Sasha Levin

From: David Gow <davidgow(a)google.com> [ Upstream commit 7421b1a4d10c633ca5f14c8236d3e2c1de07e52b ] The first argument to namedtuple() should match the name of the type, which wasn't the case for KconfigEntryBase. Fixing this is enough to make mypy show no python typing errors again. Fixes 97752c39bd ("kunit: kunit_tool: Allow .kunitconfig to disable config items") Signed-off-by: David Gow <davidgow(a)google.com> Reviewed-by: Daniel Latypov <dlatypov(a)google.com> Acked-by: Brendan Higgins <brendanhiggins(a)google.com> Signed-off-by: Shuah Khan <skhan(a)linuxfoundation.org> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/kunit/kunit_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/kunit/kunit_config.py b/tools/testing/kunit/kunit_config.py index 02ffc3a3e5dc..b30e9d6db6b4 100644 --- a/tools/testing/kunit/kunit_config.py +++ b/tools/testing/kunit/kunit_config.py @@ -12,7 +12,7 @@ import re CONFIG_IS_NOT_SET_PATTERN = r'^# CONFIG_(\w+) is not set$' CONFIG_PATTERN = r'^CONFIG_(\w+)=(\S+|".*")$' -KconfigEntryBase = collections.namedtuple('KconfigEntry', ['name', 'value']) +KconfigEntryBase = collections.namedtuple('KconfigEntryBase', ['name', 'value']) class KconfigEntry(KconfigEntryBase): -- 2.30.1

4 years, 9 months

1
0
0 0

[PATCH AUTOSEL 5.11 32/38] selftests/vm: fix out-of-tree build

by Sasha Levin

From: Rong Chen <rong.a.chen(a)intel.com> [ Upstream commit 19ec368cbc7ee1915e78c120b7a49c7f14734192 ] When building out-of-tree, attempting to make target from $(OUTPUT) directory: make[1]: *** No rule to make target '$(OUTPUT)/protection_keys.c', needed by '$(OUTPUT)/protection_keys_32'. Link: https://lkml.kernel.org/r/20210315094700.522753-1-rong.a.chen@intel.com Signed-off-by: Rong Chen <rong.a.chen(a)intel.com> Reported-by: kernel test robot <lkp(a)intel.com> Cc: Shuah Khan <shuah(a)kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/vm/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index d42115e4284d..8b0cd421ebd3 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -101,7 +101,7 @@ endef ifeq ($(CAN_BUILD_I386),1) $(BINARIES_32): CFLAGS += -m32 $(BINARIES_32): LDLIBS += -lrt -ldl -lm -$(BINARIES_32): %_32: %.c +$(BINARIES_32): $(OUTPUT)/%_32: %.c $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@ $(foreach t,$(TARGETS),$(eval $(call gen-target-rule-32,$(t)))) endif @@ -109,7 +109,7 @@ endif ifeq ($(CAN_BUILD_X86_64),1) $(BINARIES_64): CFLAGS += -m64 $(BINARIES_64): LDLIBS += -lrt -ldl -$(BINARIES_64): %_64: %.c +$(BINARIES_64): $(OUTPUT)/%_64: %.c $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@ $(foreach t,$(TARGETS),$(eval $(call gen-target-rule-64,$(t)))) endif -- 2.30.1

4 years, 9 months

1
0
0 0

[PATCH AUTOSEL 5.11 23/38] kselftest/arm64: sve: Do not use non-canonical FFR register value

by Sasha Levin

From: Andre Przywara <andre.przywara(a)arm.com> [ Upstream commit 7011d72588d16a9e5f5d85acbc8b10019809599c ] The "First Fault Register" (FFR) is an SVE register that mimics a predicate register, but clears bits when a load or store fails to handle an element of a vector. The supposed usage scenario is to initialise this register (using SETFFR), then *read* it later on to learn about elements that failed to load or store. Explicit writes to this register using the WRFFR instruction are only supposed to *restore* values previously read from the register (for context-switching only). As the manual describes, this register holds only certain values, it: "... contains a monotonic predicate value, in which starting from bit 0 there are zero or more 1 bits, followed only by 0 bits in any remaining bit positions." Any other value is UNPREDICTABLE and is not supposed to be "restored" into the register. The SVE test currently tries to write a signature pattern into the register, which is *not* a canonical FFR value. Apparently the existing setups treat UNPREDICTABLE as "read-as-written", but a new implementation actually only stores canonical values. As a consequence, the sve-test fails immediately when comparing the FFR value: ----------- # ./sve-test Vector length: 128 bits PID: 207 Mismatch: PID=207, iteration=0, reg=48 Expected [cf00] Got [0f00] Aborted ----------- Fix this by only populating the FFR with proper canonical values. Effectively the requirement described above limits us to 17 unique values over 16 bits worth of FFR, so we condense our signature down to 4 bits (2 bits from the PID, 2 bits from the generation) and generate the canonical pattern from it. Any bits describing elements above the minimum 128 bit are set to 0. This aligns the FFR usage to the architecture and fixes the test on microarchitectures implementing FFR in a more restricted way. Signed-off-by: Andre Przywara <andre.przywara(a)arm.com> Reviwed-by: Mark Brown <broonie(a)kernel.org> Link: https://lore.kernel.org/r/20210319120128.29452-1-andre.przywara@arm.com Signed-off-by: Will Deacon <will(a)kernel.org> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/arm64/fp/sve-test.S | 22 ++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S index 9210691aa998..e3e08d9c7020 100644 --- a/tools/testing/selftests/arm64/fp/sve-test.S +++ b/tools/testing/selftests/arm64/fp/sve-test.S @@ -284,16 +284,28 @@ endfunction // Set up test pattern in the FFR // x0: pid // x2: generation +// +// We need to generate a canonical FFR value, which consists of a number of +// low "1" bits, followed by a number of zeros. This gives us 17 unique values +// per 16 bits of FFR, so we create a 4 bit signature out of the PID and +// generation, and use that as the initial number of ones in the pattern. +// We fill the upper lanes of FFR with zeros. // Beware: corrupts P0. function setup_ffr mov x4, x30 - bl pattern + and w0, w0, #0x3 + bfi w0, w2, #2, #2 + mov w1, #1 + lsl w1, w1, w0 + sub w1, w1, #1 + ldr x0, =ffrref - ldr x1, =scratch - rdvl x2, #1 - lsr x2, x2, #3 - bl memcpy + strh w1, [x0], 2 + rdvl x1, #1 + lsr x1, x1, #3 + sub x1, x1, #2 + bl memclr mov x0, #0 ldr x1, =ffrref -- 2.30.1

4 years, 9 months

1
0
0 0

[PATCH AUTOSEL 5.11 08/38] kunit: tool: Fix a python tuple typing error

by Sasha Levin

From: David Gow <davidgow(a)google.com> [ Upstream commit 7421b1a4d10c633ca5f14c8236d3e2c1de07e52b ] The first argument to namedtuple() should match the name of the type, which wasn't the case for KconfigEntryBase. Fixing this is enough to make mypy show no python typing errors again. Fixes 97752c39bd ("kunit: kunit_tool: Allow .kunitconfig to disable config items") Signed-off-by: David Gow <davidgow(a)google.com> Reviewed-by: Daniel Latypov <dlatypov(a)google.com> Acked-by: Brendan Higgins <brendanhiggins(a)google.com> Signed-off-by: Shuah Khan <skhan(a)linuxfoundation.org> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/kunit/kunit_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/kunit/kunit_config.py b/tools/testing/kunit/kunit_config.py index bdd60230764b..27fe086d2d0d 100644 --- a/tools/testing/kunit/kunit_config.py +++ b/tools/testing/kunit/kunit_config.py @@ -13,7 +13,7 @@ from typing import List, Set CONFIG_IS_NOT_SET_PATTERN = r'^# CONFIG_(\w+) is not set$' CONFIG_PATTERN = r'^CONFIG_(\w+)=(\S+|".*")$' -KconfigEntryBase = collections.namedtuple('KconfigEntry', ['name', 'value']) +KconfigEntryBase = collections.namedtuple('KconfigEntryBase', ['name', 'value']) class KconfigEntry(KconfigEntryBase): -- 2.30.1

4 years, 9 months

1
0
0 0

[PATCH v7 1/4] lib: vsprintf: scanf: Negative number must have field width > 1

by Richard Fitzgerald

If a signed number field starts with a '-' the field width must be > 1, or unlimited, to allow at least one digit after the '-'. This patch adds a check for this. If a signed field starts with '-' and field_width == 1 the scanf will quit. It is ok for a signed number field to have a field width of 1 if it starts with a digit. In that case the single digit can be converted. Signed-off-by: Richard Fitzgerald <rf(a)opensource.cirrus.com> Reviewed-by: Petr Mladek <pmladek(a)suse.com> Acked-by: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com> --- lib/vsprintf.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 41ddc353ebb8..f78651e9b030 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -3466,8 +3466,12 @@ int vsscanf(const char *buf, const char *fmt, va_list args) str = skip_spaces(str); digit = *str; - if (is_sign && digit == '-') + if (is_sign && digit == '-') { + if (field_width == 1) + break; + digit = *(str + 1); + } if (!digit || (base == 16 && !isxdigit(digit)) -- 2.20.1

4 years, 9 months

2
5
0 0

[RFC PATCH v5 00/10] KVM: selftests: some improvement and a new test for kvm page table

by Yanan Wang

Hi, This v5 series can mainly include two parts. Based on kvm queue branch: https://git.kernel.org/pub/scm/virt/kvm/kvm.git/log/?h=queue In the first part, all the known hugetlb backing src types specified with different hugepage sizes are listed, so that we can specify use of hugetlb source of the exact granularity that we want, instead of the system default ones. And as all the known hugetlb page sizes are listed, it's appropriate for all architectures. Besides, a helper that can get granularity of different backing src types(anonumous/thp/hugetlb) is added, so that we can use the accurate backing src granularity for kinds of alignment or guest memory accessing of vcpus. In the second part, a new test is added: This test is added to serve as a performance tester and a bug reproducer for kvm page table code (GPA->HPA mappings), it gives guidance for the people trying to make some improvement for kvm. And the following explains what we can exactly do through this test. The function guest_code() can cover the conditions where a single vcpu or multiple vcpus access guest pages within the same memory region, in three VM stages(before dirty logging, during dirty logging, after dirty logging). Besides, the backing src memory type(ANONYMOUS/THP/HUGETLB) of the tested memory region can be specified by users, which means normal page mappings or block mappings can be chosen by users to be created in the test. If ANONYMOUS memory is specified, kvm will create normal page mappings for the tested memory region before dirty logging, and update attributes of the page mappings from RO to RW during dirty logging. If THP/HUGETLB memory is specified, kvm will create block mappings for the tested memory region before dirty logging, and split the blcok mappings into normal page mappings during dirty logging, and coalesce the page mappings back into block mappings after dirty logging is stopped. So in summary, as a performance tester, this test can present the performance of kvm creating/updating normal page mappings, or the performance of kvm creating/splitting/recovering block mappings, through execution time. When we need to coalesce the page mappings back to block mappings after dirty logging is stopped, we have to firstly invalidate *all* the TLB entries for the page mappings right before installation of the block entry, because a TLB conflict abort error could occur if we can't invalidate the TLB entries fully. We have hit this TLB conflict twice on aarch64 software implementation and fixed it. As this test can imulate process from dirty logging enabled to dirty logging stopped of a VM with block mappings, so it can also reproduce this TLB conflict abort due to inadequate TLB invalidation when coalescing tables. Links about the TLB conflict abort: https://lore.kernel.org/lkml/20201201201034.116760-3-wangyanan55@huawei.com/ --- Change logs: v4->v5: - Use synchronization(sem_wait) for time measurement - Add a new patch about TEST_ASSERT(patch 4) - Address Andrew Jones's comments for v4 series - Add Andrew Jones's R-b tags in some patches - v4: https://lore.kernel.org/lkml/20210302125751.19080-1-wangyanan55@huawei.com/ v3->v4: - Add a helper to get system default hugetlb page size - Add tags of Reviewed-by of Ben in the patches - v3: https://lore.kernel.org/lkml/20210301065916.11484-1-wangyanan55@huawei.com/ v2->v3: - Add tags of Suggested-by, Reviewed-by in the patches - Add a generic micro to get hugetlb page sizes - Some changes for suggestions about v2 series - v2: https://lore.kernel.org/lkml/20210225055940.18748-1-wangyanan55@huawei.com/ v1->v2: - Add a patch to sync header files - Add helpers to get granularity of different backing src types - Some changes for suggestions about v1 series - v1: https://lore.kernel.org/lkml/20210208090841.333724-1-wangyanan55@huawei.com/ --- Yanan Wang (10): tools headers: sync headers of asm-generic/hugetlb_encode.h tools headers: Add a macro to get HUGETLB page sizes for mmap KVM: selftests: Use flag CLOCK_MONOTONIC_RAW for timing KVM: selftests: Print the errno besides error-string in TEST_ASSERT KVM: selftests: Make a generic helper to get vm guest mode strings KVM: selftests: Add a helper to get system configured THP page size KVM: selftests: Add a helper to get system default hugetlb page size KVM: selftests: List all hugetlb src types specified with page sizes KVM: selftests: Adapt vm_userspace_mem_region_add to new helpers KVM: selftests: Add a test for kvm page table code include/uapi/linux/mman.h | 2 + tools/include/asm-generic/hugetlb_encode.h | 3 + tools/include/uapi/linux/mman.h | 2 + tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 3 + .../selftests/kvm/demand_paging_test.c | 8 +- .../selftests/kvm/dirty_log_perf_test.c | 14 +- .../testing/selftests/kvm/include/kvm_util.h | 4 +- .../testing/selftests/kvm/include/test_util.h | 21 +- .../selftests/kvm/kvm_page_table_test.c | 512 ++++++++++++++++++ tools/testing/selftests/kvm/lib/assert.c | 4 +- tools/testing/selftests/kvm/lib/kvm_util.c | 59 +- tools/testing/selftests/kvm/lib/test_util.c | 163 +++++- tools/testing/selftests/kvm/steal_time.c | 4 +- 14 files changed, 739 insertions(+), 61 deletions(-) create mode 100644 tools/testing/selftests/kvm/kvm_page_table_test.c -- 2.23.0

4 years, 9 months

4
20
0 0

[PATCH] userfaultfd/shmem: fix MCOPY_ATOMIC_CONTNUE error handling + accounting

by Axel Rasmussen

Previously, in the error path, we unconditionally removed the page from the page cache. But in the continue case, we didn't add it - it was already there because the page is used by a second (non-UFFD-registered) mapping. So, in that case, it's incorrect to remove it as the other mapping may still use it normally. For this error handling failure, trivially exercise it in the userfaultfd selftest, to detect this kind of bug in the future. Also, we previously were unconditionally calling shmem_inode_acct_block. In the continue case, however, this is incorrect, because we would have already accounted for the RAM usage when the page was originally allocated (since at this point it's already in the page cache). So, doing it in the continue case causes us to double-count. Fixes: 00da60b9d0a0 ("userfaultfd: support minor fault handling for shmem") Signed-off-by: Axel Rasmussen <axelrasmussen(a)google.com> --- mm/shmem.c | 15 ++++++++++----- tools/testing/selftests/vm/userfaultfd.c | 12 ++++++++++++ 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index d2e0e81b7d2e..5ac8ea737004 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2379,9 +2379,11 @@ int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, int ret; pgoff_t offset, max_off; - ret = -ENOMEM; - if (!shmem_inode_acct_block(inode, 1)) - goto out; + if (!is_continue) { + ret = -ENOMEM; + if (!shmem_inode_acct_block(inode, 1)) + goto out; + } if (is_continue) { ret = -EFAULT; @@ -2389,6 +2391,7 @@ int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, if (!page) goto out_unacct_blocks; } else if (!*pagep) { + ret = -ENOMEM; page = shmem_alloc_page(gfp, info, pgoff); if (!page) goto out_unacct_blocks; @@ -2486,12 +2489,14 @@ int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, out_release_unlock: pte_unmap_unlock(dst_pte, ptl); ClearPageDirty(page); - delete_from_page_cache(page); + if (!is_continue) + delete_from_page_cache(page); out_release: unlock_page(page); put_page(page); out_unacct_blocks: - shmem_inode_unacct_blocks(inode, 1); + if (!is_continue) + shmem_inode_unacct_blocks(inode, 1); goto out; } #endif /* CONFIG_USERFAULTFD */ diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index f6c86b036d0f..d8541a59dae5 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -485,6 +485,7 @@ static void wp_range(int ufd, __u64 start, __u64 len, bool wp) static void continue_range(int ufd, __u64 start, __u64 len) { struct uffdio_continue req; + int ret; req.range.start = start; req.range.len = len; @@ -493,6 +494,17 @@ static void continue_range(int ufd, __u64 start, __u64 len) if (ioctl(ufd, UFFDIO_CONTINUE, &req)) err("UFFDIO_CONTINUE failed for address 0x%" PRIx64, (uint64_t)start); + + /* + * Error handling within the kernel for continue is subtly different + * from copy or zeropage, so it may be a source of bugs. Trigger an + * error (-EEXIST) on purpose, to verify doing so doesn't cause a BUG. + */ + req.mapped = 0; + ret = ioctl(ufd, UFFDIO_CONTINUE, &req); + if (ret >= 0 || req.mapped != -EEXIST) + err("failed to exercise UFFDIO_CONTINUE error handling, ret=%d, mapped=%" PRId64, + ret, req.mapped); } static void *locking_thread(void *arg) -- 2.31.0.291.g576ba9dcdaf-goog

4 years, 9 months

2
1
0 0

[PATCH] selftests/timers: remove unneeded semicolon

by Jiapeng Chong

Fix the following coccicheck warnings: ./tools/testing/selftests/timers/nanosleep.c:75:2-3: Unneeded semicolon Reported-by: Abaci Robot <abaci(a)linux.alibaba.com> Signed-off-by: Jiapeng Chong <jiapeng.chong(a)linux.alibaba.com> --- tools/testing/selftests/timers/nanosleep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/timers/nanosleep.c b/tools/testing/selftests/timers/nanosleep.c index 71b5441..433a096 100644 --- a/tools/testing/selftests/timers/nanosleep.c +++ b/tools/testing/selftests/timers/nanosleep.c @@ -72,7 +72,7 @@ char *clockstring(int clockid) return "CLOCK_BOOTTIME_ALARM"; case CLOCK_TAI: return "CLOCK_TAI"; - }; + } return "UNKNOWN_CLOCKID"; } -- 1.8.3.1

4 years, 9 months

2
1
0 0

[PATCH] selftests/timers: Fix spelling mistake "clocksourc" -> "clocksource"

by Colin King

From: Colin Ian King <colin.king(a)canonical.com> There is a spelling mistake in a comment. Fix it. Signed-off-by: Colin Ian King <colin.king(a)canonical.com> --- tools/testing/selftests/timers/clocksource-switch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/timers/clocksource-switch.c b/tools/testing/selftests/timers/clocksource-switch.c index bfc974b4572d..2d66abd877e6 100644 --- a/tools/testing/selftests/timers/clocksource-switch.c +++ b/tools/testing/selftests/timers/clocksource-switch.c @@ -3,7 +3,7 @@ * (C) Copyright IBM 2012 * Licensed under the GPLv2 * - * NOTE: This is a meta-test which quickly changes the clocksourc and + * NOTE: This is a meta-test which quickly changes the clocksource and * then uses other tests to detect problems. Thus this test requires * that the inconsistency-check and nanosleep tests be present in the * same directory it is run from. -- 2.30.2

4 years, 9 months

3
2
0 0

[PATCH v5] selftests: fix prepending $(OUTPUT) to $(TEST_PROGS)

by Ilya Leoshkevich

Currently the following command produces an error message: linux# make kselftest TARGETS=bpf O=/mnt/linux-build # selftests: bpf: test_libbpf.sh # ./test_libbpf.sh: line 23: ./test_libbpf_open: No such file or directory # test_libbpf: failed at file test_l4lb.o # selftests: test_libbpf [FAILED] The error message might not affect the return code of make, therefore one needs to grep make output in order to detect it. This is not the only instance of the same underlying problem; any test with more than one element in $(TEST_PROGS) fails the same way. Another example: linux# make O=/mnt/linux-build TARGETS=splice kselftest [...] # ./short_splice_read.sh: 15: ./splice_read: not found # FAIL: /sys/module/test_module/sections/.init.text 2 not ok 2 selftests: splice: short_splice_read.sh # exit=1 The current logic prepends $(OUTPUT) only to the first member of $(TEST_PROGS). After that, run_one() does cd `dirname $TEST` For all tests except the first one, `dirname $TEST` is ., which means they cannot access the files generated in $(OUTPUT). Fix by using $(addprefix) to prepend $(OUTPUT)/ to each member of $(TEST_PROGS). Fixes: 1a940687e424 ("selftests: lib.mk: copy test scripts and test files for make O=dir run") Signed-off-by: Ilya Leoshkevich <iii(a)linux.ibm.com> --- v1->v2: - Append / to $(OUTPUT). - Use $(addprefix) instead of $(foreach). v2->v3: - Split the patch in two. - Improve the commit message. v3: https://lore.kernel.org/linux-kselftest/20191024121347.22189-1-iii@linux.ib… v3->v4: - Drop the first patch. - Add a note regarding make return code to the commit message. v4: https://lore.kernel.org/linux-kselftest/20191115150428.61131-1-iii@linux.ib… v4->v5: - Add another reproducer to the commit message. tools/testing/selftests/lib.mk | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index a5ce26d548e4..be17462fe146 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -74,7 +74,8 @@ ifdef building_out_of_srctree rsync -aq $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(OUTPUT); \ fi @if [ "X$(TEST_PROGS)" != "X" ]; then \ - $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(OUTPUT)/$(TEST_PROGS)) ; \ + $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) \ + $(addprefix $(OUTPUT)/,$(TEST_PROGS))) ; \ else \ $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS)); \ fi -- 2.29.2

4 years, 9 months

2
1
0 0

[PATCH v6 0/8] Fork brute force attack mitigation

by John Wood

Attacks against vulnerable userspace applications with the purpose to break ASLR or bypass canaries traditionally use some level of brute force with the help of the fork system call. This is possible since when creating a new process using fork its memory contents are the same as those of the parent process (the process that called the fork system call). So, the attacker can test the memory infinite times to find the correct memory values or the correct memory addresses without worrying about crashing the application. Based on the above scenario it would be nice to have this detected and mitigated, and this is the goal of this patch serie. Specifically the following attacks are expected to be detected: 1.- Launching (fork()/exec()) a setuid/setgid process repeatedly until a desirable memory layout is got (e.g. Stack Clash). 2.- Connecting to an exec()ing network daemon (e.g. xinetd) repeatedly until a desirable memory layout is got (e.g. what CTFs do for simple network service). 3.- Launching processes without exec() (e.g. Android Zygote) and exposing state to attack a sibling. 4.- Connecting to a fork()ing network daemon (e.g. apache) repeatedly until the previously shared memory layout of all the other children is exposed (e.g. kind of related to HeartBleed). In each case, a privilege boundary has been crossed: Case 1: setuid/setgid process Case 2: network to local Case 3: privilege changes Case 4: network to local So, what will really be detected are fork/exec brute force attacks that cross any of the commented bounds. The implementation details and comparison against other existing implementations can be found in the "Documentation" patch. Knowing all this information I will explain now the different patches: The 1/8 patch defines a new LSM hook to get the fatal signal of a task. This will be useful during the attack detection phase. The 2/8 patch defines a new LSM and manages the statistical data shared by all the fork hierarchy processes. The 3/8 patch detects a fork/exec brute force attack. The 4/8 patch narrows the detection taken into account the privilege boundary crossing. The 5/8 patch mitigates a brute force attack. The 6/8 patch adds self-tests to validate the Brute LSM expectations. The 7/8 patch adds the documentation to explain this implementation. The 8/8 patch updates the maintainers file. This patch serie is a task of the KSPP [1] and can also be accessed from my github tree [2] in the "brute_v6" branch. [1] https://github.com/KSPP/linux/issues/39 [2] https://github.com/johwood/linux/ The previous versions can be found in: RFC https://lore.kernel.org/kernel-hardening/20200910202107.3799376-1-keescook@… Version 2 https://lore.kernel.org/kernel-hardening/20201025134540.3770-1-john.wood@gm… Version 3 https://lore.kernel.org/lkml/20210221154919.68050-1-john.wood@gmx.com/ Version 4 https://lore.kernel.org/lkml/20210227150956.6022-1-john.wood@gmx.com/ Version 5 https://lore.kernel.org/kernel-hardening/20210227153013.6747-1-john.wood@gm… Changelog RFC -> v2 ------------------- - Rename this feature with a more suitable name (Jann Horn, Kees Cook). - Convert the code to an LSM (Kees Cook). - Add locking to avoid data races (Jann Horn). - Add a new LSM hook to get the fatal signal of a task (Jann Horn, Kees Cook). - Add the last crashes timestamps list to avoid false positives in the attack detection (Jann Horn). - Use "period" instead of "rate" (Jann Horn). - Other minor changes suggested (Jann Horn, Kees Cook). Changelog v2 -> v3 ------------------ - Compute the application crash period on an on-going basis (Kees Cook). - Detect a brute force attack through the execve system call (Kees Cook). - Detect an slow brute force attack (Randy Dunlap). - Fine tuning the detection taken into account privilege boundary crossing (Kees Cook). - Taken into account only fatal signals delivered by the kernel (Kees Cook). - Remove the sysctl attributes to fine tuning the detection (Kees Cook). - Remove the prctls to allow per process enabling/disabling (Kees Cook). - Improve the documentation (Kees Cook). - Fix some typos in the documentation (Randy Dunlap). - Add self-test to validate the expectations (Kees Cook). Changelog v3 -> v4 ------------------ - Fix all the warnings shown by the tool "scripts/kernel-doc" (Randy Dunlap). Changelog v4 -> v5 ------------------ - Fix some typos (Randy Dunlap). Changelog v5 -> v6 ------------------ - Fix a reported deadlock (kernel test robot). - Add high level details to the documentation (Andi Kleen). Any constructive comments are welcome. Thanks. John Wood (8): security: Add LSM hook at the point where a task gets a fatal signal security/brute: Define a LSM and manage statistical data securtiy/brute: Detect a brute force attack security/brute: Fine tuning the attack detection security/brute: Mitigate a brute force attack selftests/brute: Add tests for the Brute LSM Documentation: Add documentation for the Brute LSM MAINTAINERS: Add a new entry for the Brute LSM Documentation/admin-guide/LSM/Brute.rst | 278 ++++++ Documentation/admin-guide/LSM/index.rst | 1 + MAINTAINERS | 7 + include/linux/lsm_hook_defs.h | 1 + include/linux/lsm_hooks.h | 4 + include/linux/security.h | 4 + kernel/signal.c | 1 + security/Kconfig | 11 +- security/Makefile | 4 + security/brute/Kconfig | 13 + security/brute/Makefile | 2 + security/brute/brute.c | 1107 ++++++++++++++++++++++ security/security.c | 5 + tools/testing/selftests/Makefile | 1 + tools/testing/selftests/brute/.gitignore | 2 + tools/testing/selftests/brute/Makefile | 5 + tools/testing/selftests/brute/config | 1 + tools/testing/selftests/brute/exec.c | 44 + tools/testing/selftests/brute/test.c | 507 ++++++++++ tools/testing/selftests/brute/test.sh | 226 +++++ 20 files changed, 2219 insertions(+), 5 deletions(-) create mode 100644 Documentation/admin-guide/LSM/Brute.rst create mode 100644 security/brute/Kconfig create mode 100644 security/brute/Makefile create mode 100644 security/brute/brute.c create mode 100644 tools/testing/selftests/brute/.gitignore create mode 100644 tools/testing/selftests/brute/Makefile create mode 100644 tools/testing/selftests/brute/config create mode 100644 tools/testing/selftests/brute/exec.c create mode 100644 tools/testing/selftests/brute/test.c create mode 100755 tools/testing/selftests/brute/test.sh -- 2.25.1

4 years, 9 months

3
30
0 0

[PATCH -next] treewide: Remove duplicated include from tm-vmx-unavail.c

by Zheng Yongjun

Remove duplicated include. Reported-by: Hulk Robot <hulkci(a)huawei.com> Signed-off-by: Zheng Yongjun <zhengyongjun3(a)huawei.com> --- tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c index e2a0c07e8362..9ef37a9836ac 100644 --- a/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c +++ b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c @@ -17,7 +17,6 @@ #include <pthread.h> #include <sys/mman.h> #include <unistd.h> -#include <pthread.h> #include "tm.h" #include "utils.h"

4 years, 9 months

2
1
0 0

[PATCH] [v2] tools: testing: Remove duplicate includes

by Wan Jiabing

sched.h has been included at line 33, so remove the duplicate one at line 36. inttypes.h has been included at line 19, so remove the duplicate one at line 23. pthread.h has been included at line 17,so remove the duplicate one at line 20. Signed-off-by: Wan Jiabing <wanjiabing(a)vivo.com> --- tools/testing/selftests/powerpc/mm/tlbie_test.c | 1 - tools/testing/selftests/powerpc/tm/tm-poison.c | 1 - tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c | 1 - 3 files changed, 3 deletions(-) diff --git a/tools/testing/selftests/powerpc/mm/tlbie_test.c b/tools/testing/selftests/powerpc/mm/tlbie_test.c index f85a0938ab25..48344a74b212 100644 --- a/tools/testing/selftests/powerpc/mm/tlbie_test.c +++ b/tools/testing/selftests/powerpc/mm/tlbie_test.c @@ -33,7 +33,6 @@ #include <sched.h> #include <time.h> #include <stdarg.h> -#include <sched.h> #include <pthread.h> #include <signal.h> #include <sys/prctl.h> diff --git a/tools/testing/selftests/powerpc/tm/tm-poison.c b/tools/testing/selftests/powerpc/tm/tm-poison.c index 29e5f26af7b9..27c083a03d1f 100644 --- a/tools/testing/selftests/powerpc/tm/tm-poison.c +++ b/tools/testing/selftests/powerpc/tm/tm-poison.c @@ -20,7 +20,6 @@ #include <sched.h> #include <sys/types.h> #include <signal.h> -#include <inttypes.h> #include "tm.h" diff --git a/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c index e2a0c07e8362..9ef37a9836ac 100644 --- a/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c +++ b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c @@ -17,7 +17,6 @@ #include <pthread.h> #include <sys/mman.h> #include <unistd.h> -#include <pthread.h> #include "tm.h" #include "utils.h" -- 2.25.1

4 years, 9 months

2
1
0 0

Re: [PATCH v31 10/12] selftests/landlock: Add user space tests

by Kees Cook

On Wed, Mar 24, 2021 at 08:15:18PM +0100, Mickaël Salaün wrote: > From: Mickaël Salaün <mic(a)linux.microsoft.com> > > Test all Landlock system calls, ptrace hooks semantic and filesystem > access-control with multiple layouts. > > Test coverage for security/landlock/ is 93.6% of lines. The code not > covered only deals with internal kernel errors (e.g. memory allocation) > and race conditions. > > Cc: James Morris <jmorris(a)namei.org> > Cc: Jann Horn <jannh(a)google.com> > Cc: Kees Cook <keescook(a)chromium.org> > Cc: Serge E. Hallyn <serge(a)hallyn.com> > Cc: Shuah Khan <shuah(a)kernel.org> > Signed-off-by: Mickaël Salaün <mic(a)linux.microsoft.com> Reviewed-by: Kees Cook <keescook(a)chromium.org> -- Kees Cook

4 years, 9 months

1
0
0 0

[PATCH v30 00/12] Landlock LSM

by Mickaël Salaün

Hi, This patch series is mainly a rebase on top of v5.12-rc3 and a synchronization with the new mount_setattr(2). A light cleanup of hook_sb_delete() and new tests are also included. The SLOC count is 1329 for security/landlock/ and 2556 for tools/testing/selftest/landlock/ . Test coverage for security/landlock/ is 93.6% of lines. The code not covered only deals with internal kernel errors (e.g. memory allocation) and race conditions. This series is being fuzzed by syzkaller (which may cover internal kernel errors), and patches are on their way: https://github.com/google/syzkaller/pull/2380 The compiled documentation is available here: https://landlock.io/linux-doc/landlock-v30/userspace-api/landlock.html This series can be applied on top of v5.12-rc3 . This can be tested with CONFIG_SECURITY_LANDLOCK, CONFIG_SAMPLE_LANDLOCK and by prepending "landlock," to CONFIG_LSM. This patch series can be found in a Git repository here: https://github.com/landlock-lsm/linux/commits/landlock-v30 This patch series seems ready for upstream and I would really appreciate final reviews. Landlock LSM ------------ The goal of Landlock is to enable to restrict ambient rights (e.g. global filesystem access) for a set of processes. Because Landlock is a stackable LSM [1], it makes possible to create safe security sandboxes as new security layers in addition to the existing system-wide access-controls. This kind of sandbox is expected to help mitigate the security impact of bugs or unexpected/malicious behaviors in user-space applications. Landlock empowers any process, including unprivileged ones, to securely restrict themselves. Landlock is inspired by seccomp-bpf but instead of filtering syscalls and their raw arguments, a Landlock rule can restrict the use of kernel objects like file hierarchies, according to the kernel semantic. Landlock also takes inspiration from other OS sandbox mechanisms: XNU Sandbox, FreeBSD Capsicum or OpenBSD Pledge/Unveil. In this current form, Landlock misses some access-control features. This enables to minimize this patch series and ease review. This series still addresses multiple use cases, especially with the combined use of seccomp-bpf: applications with built-in sandboxing, init systems, security sandbox tools and security-oriented APIs [2]. [1] https://lore.kernel.org/lkml/50db058a-7dde-441b-a7f9-f6837fe8b69f@schaufler… [2] https://lore.kernel.org/lkml/f646e1c7-33cf-333f-070c-0a40ad0468cd@digikod.n… Previous versions: v29: https://lore.kernel.org/lkml/20210225190614.2181147-1-mic@digikod.net/ v28: https://lore.kernel.org/lkml/20210202162710.657398-1-mic@digikod.net/ v27: https://lore.kernel.org/lkml/20210121205119.793296-1-mic@digikod.net/ v26: https://lore.kernel.org/lkml/20201209192839.1396820-1-mic@digikod.net/ v25: https://lore.kernel.org/lkml/20201201192322.213239-1-mic@digikod.net/ v24: https://lore.kernel.org/lkml/20201112205141.775752-1-mic@digikod.net/ v23: https://lore.kernel.org/lkml/20201103182109.1014179-1-mic@digikod.net/ v22: https://lore.kernel.org/lkml/20201027200358.557003-1-mic@digikod.net/ v21: https://lore.kernel.org/lkml/20201008153103.1155388-1-mic@digikod.net/ v20: https://lore.kernel.org/lkml/20200802215903.91936-1-mic@digikod.net/ v19: https://lore.kernel.org/lkml/20200707180955.53024-1-mic@digikod.net/ v18: https://lore.kernel.org/lkml/20200526205322.23465-1-mic@digikod.net/ v17: https://lore.kernel.org/lkml/20200511192156.1618284-1-mic@digikod.net/ v16: https://lore.kernel.org/lkml/20200416103955.145757-1-mic@digikod.net/ v15: https://lore.kernel.org/lkml/20200326202731.693608-1-mic@digikod.net/ v14: https://lore.kernel.org/lkml/20200224160215.4136-1-mic@digikod.net/ v13: https://lore.kernel.org/lkml/20191104172146.30797-1-mic@digikod.net/ v12: https://lore.kernel.org/lkml/20191031164445.29426-1-mic@digikod.net/ v11: https://lore.kernel.org/lkml/20191029171505.6650-1-mic@digikod.net/ v10: https://lore.kernel.org/lkml/20190721213116.23476-1-mic@digikod.net/ v9: https://lore.kernel.org/lkml/20190625215239.11136-1-mic@digikod.net/ v8: https://lore.kernel.org/lkml/20180227004121.3633-1-mic@digikod.net/ v7: https://lore.kernel.org/lkml/20170821000933.13024-1-mic@digikod.net/ v6: https://lore.kernel.org/lkml/20170328234650.19695-1-mic@digikod.net/ v5: https://lore.kernel.org/lkml/20170222012632.4196-1-mic@digikod.net/ v4: https://lore.kernel.org/lkml/20161026065654.19166-1-mic@digikod.net/ v3: https://lore.kernel.org/lkml/20160914072415.26021-1-mic@digikod.net/ v2: https://lore.kernel.org/lkml/1472121165-29071-1-git-send-email-mic@digikod.… v1: https://lore.kernel.org/kernel-hardening/1458784008-16277-1-git-send-email-… Casey Schaufler (1): LSM: Infrastructure management of the superblock Mickaël Salaün (11): landlock: Add object management landlock: Add ruleset and domain management landlock: Set up the security framework and manage credentials landlock: Add ptrace restrictions fs,security: Add sb_delete hook landlock: Support filesystem access-control landlock: Add syscall implementations arch: Wire up Landlock syscalls selftests/landlock: Add user space tests samples/landlock: Add a sandbox manager example landlock: Add user and kernel documentation Documentation/security/index.rst | 1 + Documentation/security/landlock.rst | 79 + Documentation/userspace-api/index.rst | 1 + Documentation/userspace-api/landlock.rst | 307 ++ MAINTAINERS | 15 + arch/Kconfig | 7 + arch/alpha/kernel/syscalls/syscall.tbl | 3 + arch/arm/tools/syscall.tbl | 3 + arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 6 + arch/ia64/kernel/syscalls/syscall.tbl | 3 + arch/m68k/kernel/syscalls/syscall.tbl | 3 + arch/microblaze/kernel/syscalls/syscall.tbl | 3 + arch/mips/kernel/syscalls/syscall_n32.tbl | 3 + arch/mips/kernel/syscalls/syscall_n64.tbl | 3 + arch/mips/kernel/syscalls/syscall_o32.tbl | 3 + arch/parisc/kernel/syscalls/syscall.tbl | 3 + arch/powerpc/kernel/syscalls/syscall.tbl | 3 + arch/s390/kernel/syscalls/syscall.tbl | 3 + arch/sh/kernel/syscalls/syscall.tbl | 3 + arch/sparc/kernel/syscalls/syscall.tbl | 3 + arch/um/Kconfig | 1 + arch/x86/entry/syscalls/syscall_32.tbl | 3 + arch/x86/entry/syscalls/syscall_64.tbl | 3 + arch/xtensa/kernel/syscalls/syscall.tbl | 3 + fs/super.c | 1 + include/linux/lsm_hook_defs.h | 1 + include/linux/lsm_hooks.h | 4 + include/linux/security.h | 4 + include/linux/syscalls.h | 7 + include/uapi/asm-generic/unistd.h | 8 +- include/uapi/linux/landlock.h | 128 + kernel/sys_ni.c | 5 + samples/Kconfig | 7 + samples/Makefile | 1 + samples/landlock/.gitignore | 1 + samples/landlock/Makefile | 13 + samples/landlock/sandboxer.c | 238 ++ security/Kconfig | 11 +- security/Makefile | 2 + security/landlock/Kconfig | 21 + security/landlock/Makefile | 4 + security/landlock/common.h | 20 + security/landlock/cred.c | 46 + security/landlock/cred.h | 58 + security/landlock/fs.c | 687 ++++ security/landlock/fs.h | 56 + security/landlock/limits.h | 21 + security/landlock/object.c | 67 + security/landlock/object.h | 91 + security/landlock/ptrace.c | 120 + security/landlock/ptrace.h | 14 + security/landlock/ruleset.c | 473 +++ security/landlock/ruleset.h | 165 + security/landlock/setup.c | 40 + security/landlock/setup.h | 18 + security/landlock/syscalls.c | 445 +++ security/security.c | 51 +- security/selinux/hooks.c | 58 +- security/selinux/include/objsec.h | 6 + security/selinux/ss/services.c | 3 +- security/smack/smack.h | 6 + security/smack/smack_lsm.c | 35 +- tools/testing/selftests/Makefile | 1 + tools/testing/selftests/landlock/.gitignore | 2 + tools/testing/selftests/landlock/Makefile | 24 + tools/testing/selftests/landlock/base_test.c | 219 ++ tools/testing/selftests/landlock/common.h | 183 ++ tools/testing/selftests/landlock/config | 7 + tools/testing/selftests/landlock/fs_test.c | 2792 +++++++++++++++++ .../testing/selftests/landlock/ptrace_test.c | 337 ++ tools/testing/selftests/landlock/true.c | 5 + 72 files changed, 6896 insertions(+), 77 deletions(-) create mode 100644 Documentation/security/landlock.rst create mode 100644 Documentation/userspace-api/landlock.rst create mode 100644 include/uapi/linux/landlock.h create mode 100644 samples/landlock/.gitignore create mode 100644 samples/landlock/Makefile create mode 100644 samples/landlock/sandboxer.c create mode 100644 security/landlock/Kconfig create mode 100644 security/landlock/Makefile create mode 100644 security/landlock/common.h create mode 100644 security/landlock/cred.c create mode 100644 security/landlock/cred.h create mode 100644 security/landlock/fs.c create mode 100644 security/landlock/fs.h create mode 100644 security/landlock/limits.h create mode 100644 security/landlock/object.c create mode 100644 security/landlock/object.h create mode 100644 security/landlock/ptrace.c create mode 100644 security/landlock/ptrace.h create mode 100644 security/landlock/ruleset.c create mode 100644 security/landlock/ruleset.h create mode 100644 security/landlock/setup.c create mode 100644 security/landlock/setup.h create mode 100644 security/landlock/syscalls.c create mode 100644 tools/testing/selftests/landlock/.gitignore create mode 100644 tools/testing/selftests/landlock/Makefile create mode 100644 tools/testing/selftests/landlock/base_test.c create mode 100644 tools/testing/selftests/landlock/common.h create mode 100644 tools/testing/selftests/landlock/config create mode 100644 tools/testing/selftests/landlock/fs_test.c create mode 100644 tools/testing/selftests/landlock/ptrace_test.c create mode 100644 tools/testing/selftests/landlock/true.c base-commit: 1e28eed17697bcf343c6743f0028cc3b5dd88bf0 -- 2.30.2

4 years, 9 months

4
43
0 0

[PATCH] userfaultfd/shmem: fix minor fault page leak

by Axel Rasmussen

This fix is analogous to Peter Xu's fix for hugetlb [0]. If we don't put_page() after getting the page out of the page cache, we leak the reference. The fix can be verified by checking /proc/meminfo and running the userfaultfd selftest in shmem mode. Without the fix, we see MemFree / MemAvailable steadily decreasing with each run of the test. With the fix, memory is correctly freed after the test program exits. Fixes: 00da60b9d0a0 ("userfaultfd: support minor fault handling for shmem") Signed-off-by: Axel Rasmussen <axelrasmussen(a)google.com> --- mm/shmem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/shmem.c b/mm/shmem.c index ef8c9f5e92fc..d2e0e81b7d2e 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1831,6 +1831,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, if (page && vma && userfaultfd_minor(vma)) { unlock_page(page); + put_page(page); *fault_type = handle_userfault(vmf, VM_UFFD_MINOR); return 0; } -- 2.31.0.rc2.261.g7f71774620-goog

4 years, 9 months

3
4
0 0

[PATCH] tools: testing: Remove duplicate include of string.h

by Wan Jiabing

string.h has been included at line 15.So we remove the duplicate one at line 17. Signed-off-by: Wan Jiabing <wanjiabing(a)vivo.com> --- tools/testing/selftests/mincore/mincore_selftest.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c index 5a1e85ff5d32..e54106643337 100644 --- a/tools/testing/selftests/mincore/mincore_selftest.c +++ b/tools/testing/selftests/mincore/mincore_selftest.c @@ -14,7 +14,6 @@ #include <sys/mman.h> #include <string.h> #include <fcntl.h> -#include <string.h> #include "../kselftest.h" #include "../kselftest_harness.h" -- 2.25.1

4 years, 9 months

2
1
0 0

[PATCH v6 1/2] mm: huge_memory: a new debugfs interface for splitting THP tests.

by Zi Yan

From: Zi Yan <ziy(a)nvidia.com> We did not have a direct user interface of splitting the compound page backing a THP and there is no need unless we want to expose the THP implementation details to users. Make <debugfs>/split_huge_pages accept a new command to do that. By writing "<pid>,<vaddr_start>,<vaddr_end>" to <debugfs>/split_huge_pages, THPs within the given virtual address range from the process with the given pid are split. It is used to test split_huge_page function. In addition, a selftest program is added to tools/testing/selftests/vm to utilize the interface by splitting PMD THPs and PTE-mapped THPs. This does not change the old behavior, i.e., writing 1 to the interface to split all THPs in the system. Changelog: >From v5: 1. Skipped special VMAs and other fixes. (suggested by Yang Shi) >From v4: 1. Fixed the error code return issue, spotted by kernel test robot <lkp(a)intel.com>. >From v3: 1. Factored out split huge pages in the given pid code to a separate function. 2. Added the missing put_page for not split pages. 3. pr_debug -> pr_info, make reading results simpler. >From v2: 1. Reused existing <debugfs>/split_huge_pages interface. (suggested by Yang Shi) >From v1: 1. Removed unnecessary calling to vma_migratable, spotted by kernel test robot <lkp(a)intel.com>. 2. Dropped the use of find_mm_struct and code it directly, since there is no need for the permission check in that function and the function is only available when migration is on. 3. Added some comments in the selftest program to clarify how PTE-mapped THPs are formed. Signed-off-by: Zi Yan <ziy(a)nvidia.com> Reviewed-by: Yang Shi <shy828301(a)gmail.com> --- mm/huge_memory.c | 151 ++++++++- tools/testing/selftests/vm/.gitignore | 1 + tools/testing/selftests/vm/Makefile | 1 + .../selftests/vm/split_huge_page_test.c | 318 ++++++++++++++++++ 4 files changed, 464 insertions(+), 7 deletions(-) create mode 100644 tools/testing/selftests/vm/split_huge_page_test.c diff --git a/mm/huge_memory.c b/mm/huge_memory.c index bff92dea5ab3..b653255a548e 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -7,6 +7,7 @@ #include <linux/mm.h> #include <linux/sched.h> +#include <linux/sched/mm.h> #include <linux/sched/coredump.h> #include <linux/sched/numa_balancing.h> #include <linux/highmem.h> @@ -2922,16 +2923,14 @@ static struct shrinker deferred_split_shrinker = { }; #ifdef CONFIG_DEBUG_FS -static int split_huge_pages_set(void *data, u64 val) +static void split_huge_pages_all(void) { struct zone *zone; struct page *page; unsigned long pfn, max_zone_pfn; unsigned long total = 0, split = 0; - if (val != 1) - return -EINVAL; - + pr_info("Split all THPs\n"); for_each_populated_zone(zone) { max_zone_pfn = zone_end_pfn(zone); for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) { @@ -2959,11 +2958,149 @@ static int split_huge_pages_set(void *data, u64 val) } pr_info("%lu of %lu THP split\n", split, total); +} - return 0; +static inline bool vma_not_suitable_for_thp_split(struct vm_area_struct *vma) +{ + return vma_is_special_huge(vma) || (vma->vm_flags & VM_IO) || + is_vm_hugetlb_page(vma); } -DEFINE_DEBUGFS_ATTRIBUTE(split_huge_pages_fops, NULL, split_huge_pages_set, - "%llu\n"); + +static int split_huge_pages_pid(int pid, unsigned long vaddr_start, + unsigned long vaddr_end) +{ + int ret = 0; + struct task_struct *task; + struct mm_struct *mm; + unsigned long total = 0, split = 0; + unsigned long addr; + + vaddr_start &= PAGE_MASK; + vaddr_end &= PAGE_MASK; + + /* Find the task_struct from pid */ + rcu_read_lock(); + task = find_task_by_vpid(pid); + if (!task) { + rcu_read_unlock(); + ret = -ESRCH; + goto out; + } + get_task_struct(task); + rcu_read_unlock(); + + /* Find the mm_struct */ + mm = get_task_mm(task); + put_task_struct(task); + + if (!mm) { + ret = -EINVAL; + goto out; + } + + pr_info("Split huge pages in pid: %d, vaddr: [0x%lx - 0x%lx]\n", + pid, vaddr_start, vaddr_end); + + mmap_read_lock(mm); + /* + * always increase addr by PAGE_SIZE, since we could have a PTE page + * table filled with PTE-mapped THPs, each of which is distinct. + */ + for (addr = vaddr_start; addr < vaddr_end; addr += PAGE_SIZE) { + struct vm_area_struct *vma = find_vma(mm, addr); + unsigned int follflags; + struct page *page; + + if (!vma || addr < vma->vm_start) + break; + + /* skip special VMA and hugetlb VMA */ + if (vma_not_suitable_for_thp_split(vma)) { + addr = vma->vm_end; + continue; + } + + /* FOLL_DUMP to ignore special (like zero) pages */ + follflags = FOLL_GET | FOLL_DUMP; + page = follow_page(vma, addr, follflags); + + if (IS_ERR(page)) + continue; + if (!page) + continue; + + if (!is_transparent_hugepage(page)) + goto next; + + total++; + if (!can_split_huge_page(compound_head(page), NULL)) + goto next; + + if (!trylock_page(page)) + goto next; + + if (!split_huge_page(page)) + split++; + + unlock_page(page); +next: + put_page(page); + } + mmap_read_unlock(mm); + mmput(mm); + + pr_info("%lu of %lu THP split\n", split, total); + +out: + return ret; +} + +#define MAX_INPUT_BUF_SZ 255 + +static ssize_t split_huge_pages_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppops) +{ + static DEFINE_MUTEX(split_debug_mutex); + ssize_t ret; + char input_buf[MAX_INPUT_BUF_SZ]; /* hold pid, start_vaddr, end_vaddr */ + int pid; + unsigned long vaddr_start, vaddr_end; + + ret = mutex_lock_interruptible(&split_debug_mutex); + if (ret) + return ret; + + ret = -EFAULT; + + memset(input_buf, 0, MAX_INPUT_BUF_SZ); + if (copy_from_user(input_buf, buf, min_t(size_t, count, MAX_INPUT_BUF_SZ))) + goto out; + + input_buf[MAX_INPUT_BUF_SZ - 1] = '\0'; + ret = sscanf(input_buf, "%d,0x%lx,0x%lx", &pid, &vaddr_start, &vaddr_end); + if (ret == 1 && pid == 1) { + split_huge_pages_all(); + ret = strlen(input_buf); + goto out; + } else if (ret != 3) { + ret = -EINVAL; + goto out; + } + + ret = split_huge_pages_pid(pid, vaddr_start, vaddr_end); + if (!ret) + ret = strlen(input_buf); +out: + mutex_unlock(&split_debug_mutex); + return ret; + +} + +static const struct file_operations split_huge_pages_fops = { + .owner = THIS_MODULE, + .write = split_huge_pages_write, + .llseek = no_llseek, +}; static int __init split_huge_pages_debugfs(void) { diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index 9a35c3f6a557..1f651e85ed60 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -22,3 +22,4 @@ map_fixed_noreplace write_to_hugetlbfs hmm-tests local_config.* +split_huge_page_test diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index d42115e4284d..4cbc91d6869f 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -42,6 +42,7 @@ TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += userfaultfd +TEST_GEN_FILES += split_huge_page_test ifeq ($(MACHINE),x86_64) CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32) diff --git a/tools/testing/selftests/vm/split_huge_page_test.c b/tools/testing/selftests/vm/split_huge_page_test.c new file mode 100644 index 000000000000..2c0c18e60c57 --- /dev/null +++ b/tools/testing/selftests/vm/split_huge_page_test.c @@ -0,0 +1,318 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * A test of splitting PMD THPs and PTE-mapped THPs from a specified virtual + * address range in a process via <debugfs>/split_huge_pages interface. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <inttypes.h> +#include <string.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <malloc.h> +#include <stdbool.h> + +uint64_t pagesize; +unsigned int pageshift; +uint64_t pmd_pagesize; + +#define PMD_SIZE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size" +#define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages" +#define SMAP_PATH "/proc/self/smaps" +#define INPUT_MAX 80 + +#define PFN_MASK ((1UL<<55)-1) +#define KPF_THP (1UL<<22) + +int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file) +{ + uint64_t paddr; + uint64_t page_flags; + + if (pagemap_file) { + pread(pagemap_file, &paddr, sizeof(paddr), + ((long)vaddr >> pageshift) * sizeof(paddr)); + + if (kpageflags_file) { + pread(kpageflags_file, &page_flags, sizeof(page_flags), + (paddr & PFN_MASK) * sizeof(page_flags)); + + return !!(page_flags & KPF_THP); + } + } + return 0; +} + + +static uint64_t read_pmd_pagesize(void) +{ + int fd; + char buf[20]; + ssize_t num_read; + + fd = open(PMD_SIZE_PATH, O_RDONLY); + if (fd == -1) { + perror("Open hpage_pmd_size failed"); + exit(EXIT_FAILURE); + } + num_read = read(fd, buf, 19); + if (num_read < 1) { + close(fd); + perror("Read hpage_pmd_size failed"); + exit(EXIT_FAILURE); + } + buf[num_read] = '\0'; + close(fd); + + return strtoul(buf, NULL, 10); +} + +static int write_file(const char *path, const char *buf, size_t buflen) +{ + int fd; + ssize_t numwritten; + + fd = open(path, O_WRONLY); + if (fd == -1) + return 0; + + numwritten = write(fd, buf, buflen - 1); + close(fd); + if (numwritten < 1) + return 0; + + return (unsigned int) numwritten; +} + +static void write_debugfs(int pid, uint64_t vaddr_start, uint64_t vaddr_end) +{ + char input[INPUT_MAX]; + int ret; + + ret = snprintf(input, INPUT_MAX, "%d,0x%lx,0x%lx", pid, vaddr_start, + vaddr_end); + if (ret >= INPUT_MAX) { + printf("%s: Debugfs input is too long\n", __func__); + exit(EXIT_FAILURE); + } + + if (!write_file(SPLIT_DEBUGFS, input, ret + 1)) { + perror(SPLIT_DEBUGFS); + exit(EXIT_FAILURE); + } +} + +#define MAX_LINE_LENGTH 500 + +static bool check_for_pattern(FILE *fp, const char *pattern, char *buf) +{ + while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) { + if (!strncmp(buf, pattern, strlen(pattern))) + return true; + } + return false; +} + +static uint64_t check_huge(void *addr) +{ + uint64_t thp = 0; + int ret; + FILE *fp; + char buffer[MAX_LINE_LENGTH]; + char addr_pattern[MAX_LINE_LENGTH]; + + ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-", + (unsigned long) addr); + if (ret >= MAX_LINE_LENGTH) { + printf("%s: Pattern is too long\n", __func__); + exit(EXIT_FAILURE); + } + + + fp = fopen(SMAP_PATH, "r"); + if (!fp) { + printf("%s: Failed to open file %s\n", __func__, SMAP_PATH); + exit(EXIT_FAILURE); + } + if (!check_for_pattern(fp, addr_pattern, buffer)) + goto err_out; + + /* + * Fetch the AnonHugePages: in the same block and check the number of + * hugepages. + */ + if (!check_for_pattern(fp, "AnonHugePages:", buffer)) + goto err_out; + + if (sscanf(buffer, "AnonHugePages:%10ld kB", &thp) != 1) { + printf("Reading smap error\n"); + exit(EXIT_FAILURE); + } + +err_out: + fclose(fp); + return thp; +} + +void split_pmd_thp(void) +{ + char *one_page; + size_t len = 4 * pmd_pagesize; + uint64_t thp_size; + size_t i; + + one_page = memalign(pmd_pagesize, len); + + if (!one_page) { + printf("Fail to allocate memory\n"); + exit(EXIT_FAILURE); + } + + madvise(one_page, len, MADV_HUGEPAGE); + + for (i = 0; i < len; i++) + one_page[i] = (char)i; + + thp_size = check_huge(one_page); + if (!thp_size) { + printf("No THP is allocated\n"); + exit(EXIT_FAILURE); + } + + /* split all THPs */ + write_debugfs(getpid(), (uint64_t)one_page, (uint64_t)one_page + len); + + for (i = 0; i < len; i++) + if (one_page[i] != (char)i) { + printf("%ld byte corrupted\n", i); + exit(EXIT_FAILURE); + } + + + thp_size = check_huge(one_page); + if (thp_size) { + printf("Still %ld kB AnonHugePages not split\n", thp_size); + exit(EXIT_FAILURE); + } + + printf("Split huge pages successful\n"); + free(one_page); +} + +void split_pte_mapped_thp(void) +{ + char *one_page, *pte_mapped, *pte_mapped2; + size_t len = 4 * pmd_pagesize; + uint64_t thp_size; + size_t i; + const char *pagemap_template = "/proc/%d/pagemap"; + const char *kpageflags_proc = "/proc/kpageflags"; + char pagemap_proc[255]; + int pagemap_fd; + int kpageflags_fd; + + if (snprintf(pagemap_proc, 255, pagemap_template, getpid()) < 0) { + perror("get pagemap proc error"); + exit(EXIT_FAILURE); + } + pagemap_fd = open(pagemap_proc, O_RDONLY); + + if (pagemap_fd == -1) { + perror("read pagemap:"); + exit(EXIT_FAILURE); + } + + kpageflags_fd = open(kpageflags_proc, O_RDONLY); + + if (kpageflags_fd == -1) { + perror("read kpageflags:"); + exit(EXIT_FAILURE); + } + + one_page = mmap((void *)(1UL << 30), len, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + + madvise(one_page, len, MADV_HUGEPAGE); + + for (i = 0; i < len; i++) + one_page[i] = (char)i; + + thp_size = check_huge(one_page); + if (!thp_size) { + printf("No THP is allocated\n"); + exit(EXIT_FAILURE); + } + + /* remap the first pagesize of first THP */ + pte_mapped = mremap(one_page, pagesize, pagesize, MREMAP_MAYMOVE); + + /* remap the Nth pagesize of Nth THP */ + for (i = 1; i < 4; i++) { + pte_mapped2 = mremap(one_page + pmd_pagesize * i + pagesize * i, + pagesize, pagesize, + MREMAP_MAYMOVE|MREMAP_FIXED, + pte_mapped + pagesize * i); + if (pte_mapped2 == (char *)-1) { + perror("mremap failed"); + exit(EXIT_FAILURE); + } + } + + /* smap does not show THPs after mremap, use kpageflags instead */ + thp_size = 0; + for (i = 0; i < pagesize * 4; i++) + if (i % pagesize == 0 && + is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd)) + thp_size++; + + if (thp_size != 4) { + printf("Some THPs are missing during mremap\n"); + exit(EXIT_FAILURE); + } + + /* split all remapped THPs */ + write_debugfs(getpid(), (uint64_t)pte_mapped, + (uint64_t)pte_mapped + pagesize * 4); + + /* smap does not show THPs after mremap, use kpageflags instead */ + thp_size = 0; + for (i = 0; i < pagesize * 4; i++) { + if (pte_mapped[i] != (char)i) { + printf("%ld byte corrupted\n", i); + exit(EXIT_FAILURE); + } + if (i % pagesize == 0 && + is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd)) + thp_size++; + } + + if (thp_size) { + printf("Still %ld THPs not split\n", thp_size); + exit(EXIT_FAILURE); + } + + printf("Split PTE-mapped huge pages successful\n"); + munmap(one_page, len); + close(pagemap_fd); + close(kpageflags_fd); +} + +int main(int argc, char **argv) +{ + if (geteuid() != 0) { + printf("Please run the benchmark as root\n"); + exit(EXIT_FAILURE); + } + + pagesize = getpagesize(); + pageshift = ffs(pagesize) - 1; + pmd_pagesize = read_pmd_pagesize(); + + split_pmd_thp(); + split_pte_mapped_thp(); + + return 0; +} -- 2.30.2

4 years, 9 months

3
3
0 0

[PATCH v6] selftests/x86: Use getauxval() to simplify the code in sgx

by Tianjia Zhang

Simplify the sgx code implemntation by using library function getauxval() instead of a custom function to get the base address of vDSO. Signed-off-by: Tianjia Zhang <tianjia.zhang(a)linux.alibaba.com> Reviewed-by: Jarkko Sakkinen <jarkko(a)kernel.org> Acked-by: Shuah Khan <skhan(a)linuxfoundation.org> --- tools/testing/selftests/sgx/main.c | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c index 724cec700926..5167505fbb46 100644 --- a/tools/testing/selftests/sgx/main.c +++ b/tools/testing/selftests/sgx/main.c @@ -15,6 +15,7 @@ #include <sys/stat.h> #include <sys/time.h> #include <sys/types.h> +#include <sys/auxv.h> #include "defines.h" #include "main.h" #include "../kselftest.h" @@ -28,24 +29,6 @@ struct vdso_symtab { Elf64_Word *elf_hashtab; }; -static void *vdso_get_base_addr(char *envp[]) -{ - Elf64_auxv_t *auxv; - int i; - - for (i = 0; envp[i]; i++) - ; - - auxv = (Elf64_auxv_t *)&envp[i + 1]; - - for (i = 0; auxv[i].a_type != AT_NULL; i++) { - if (auxv[i].a_type == AT_SYSINFO_EHDR) - return (void *)auxv[i].a_un.a_val; - } - - return NULL; -} - static Elf64_Dyn *vdso_get_dyntab(void *addr) { Elf64_Ehdr *ehdr = addr; @@ -162,7 +145,7 @@ static int user_handler(long rdi, long rsi, long rdx, long ursp, long r8, long r return 0; } -int main(int argc, char *argv[], char *envp[]) +int main(int argc, char *argv[]) { struct sgx_enclave_run run; struct vdso_symtab symtab; @@ -203,7 +186,8 @@ int main(int argc, char *argv[], char *envp[]) memset(&run, 0, sizeof(run)); run.tcs = encl.encl_base; - addr = vdso_get_base_addr(envp); + /* Get vDSO base address */ + addr = (void *)getauxval(AT_SYSINFO_EHDR); if (!addr) goto err; -- 2.19.1.3.ge56e4f7

4 years, 9 months

3
5
0 0

[PATCH RFC v2 0/8] Add support for synchronous signals on perf events

by Marco Elver

The perf subsystem today unifies various tracing and monitoring features, from both software and hardware. One benefit of the perf subsystem is automatically inheriting events to child tasks, which enables process-wide events monitoring with low overheads. By default perf events are non-intrusive, not affecting behaviour of the tasks being monitored. For certain use-cases, however, it makes sense to leverage the generality of the perf events subsystem and optionally allow the tasks being monitored to receive signals on events they are interested in. This patch series adds the option to synchronously signal user space on events. To better support process-wide synchronous self-monitoring, without events propagating to children that do not share the current process's shared environment, two pre-requisite patches are added to optionally restrict inheritance to CLONE_THREAD, and remove events on exec (without affecting the parent). Examples how to use these features can be found in the two kselftests at the end of the series. The kselftests verify and stress test the basic functionality. The discussion at [1] led to the changes proposed in this series. The approach taken in patch "Add support for SIGTRAP on perf events" to use 'event_limit' to trigger the signal was kindly suggested by Peter Zijlstra in [2]. [1] https://lore.kernel.org/lkml/CACT4Y+YPrXGw+AtESxAgPyZ84TYkNZdP0xpocX2jwVAbZ… [2] https://lore.kernel.org/lkml/YBv3rAT566k+6zjg@hirez.programming.kicks-ass.n… Motivation and example uses: 1. Our immediate motivation is low-overhead sampling-based race detection for user space [3]. By using perf_event_open() at process initialization, we can create hardware breakpoint/watchpoint events that are propagated automatically to all threads in a process. As far as we are aware, today no existing kernel facility (such as ptrace) allows us to set up process-wide watchpoints with minimal overheads (that are comparable to mprotect() of whole pages). [3] https://llvm.org/devmtg/2020-09/slides/Morehouse-GWP-Tsan.pdf 2. Other low-overhead error detectors that rely on detecting accesses to certain memory locations or code, process-wide and also only in a specific set of subtasks or threads. Other ideas for use-cases we found interesting, but should only illustrate the range of potential to further motivate the utility (we're sure there are more): 3. Code hot patching without full stop-the-world. Specifically, by setting a code breakpoint to entry to the patched routine, then send signals to threads and check that they are not in the routine, but without stopping them further. If any of the threads will enter the routine, it will receive SIGTRAP and pause. 4. Safepoints without mprotect(). Some Java implementations use "load from a known memory location" as a safepoint. When threads need to be stopped, the page containing the location is mprotect()ed and threads get a signal. This could be replaced with a watchpoint, which does not require a whole page nor DTLB shootdowns. 5. Threads receiving signals on performance events to throttle/unthrottle themselves. 6. Tracking data flow globally. --- v2: * Patch "Support only inheriting events if cloned with CLONE_THREAD" added to series. * Patch "Add support for event removal on exec" added to series. * Patch "Add kselftest for process-wide sigtrap handling" added to series. * Patch "Add kselftest for remove_on_exec" added to series. * Implicitly restrict inheriting events if sigtrap, but the child was cloned with CLONE_CLEAR_SIGHAND, because it is not generally safe if the child cleared all signal handlers to continue sending SIGTRAP. * Various minor fixes (see details in patches). v1: https://lkml.kernel.org/r/20210223143426.2412737-1-elver@google.com Marco Elver (8): perf/core: Apply PERF_EVENT_IOC_MODIFY_ATTRIBUTES to children perf/core: Support only inheriting events if cloned with CLONE_THREAD perf/core: Add support for event removal on exec signal: Introduce TRAP_PERF si_code and si_perf to siginfo perf/core: Add support for SIGTRAP on perf events perf/core: Add breakpoint information to siginfo on SIGTRAP selftests/perf: Add kselftest for process-wide sigtrap handling selftests/perf: Add kselftest for remove_on_exec arch/m68k/kernel/signal.c | 3 + arch/x86/kernel/signal_compat.c | 5 +- fs/signalfd.c | 4 + include/linux/compat.h | 2 + include/linux/perf_event.h | 5 +- include/linux/signal.h | 1 + include/uapi/asm-generic/siginfo.h | 6 +- include/uapi/linux/perf_event.h | 5 +- include/uapi/linux/signalfd.h | 4 +- kernel/events/core.c | 130 ++++++++- kernel/fork.c | 2 +- kernel/signal.c | 11 + .../testing/selftests/perf_events/.gitignore | 3 + tools/testing/selftests/perf_events/Makefile | 6 + tools/testing/selftests/perf_events/config | 1 + .../selftests/perf_events/remove_on_exec.c | 256 ++++++++++++++++++ tools/testing/selftests/perf_events/settings | 1 + .../selftests/perf_events/sigtrap_threads.c | 202 ++++++++++++++ 18 files changed, 632 insertions(+), 15 deletions(-) create mode 100644 tools/testing/selftests/perf_events/.gitignore create mode 100644 tools/testing/selftests/perf_events/Makefile create mode 100644 tools/testing/selftests/perf_events/config create mode 100644 tools/testing/selftests/perf_events/remove_on_exec.c create mode 100644 tools/testing/selftests/perf_events/settings create mode 100644 tools/testing/selftests/perf_events/sigtrap_threads.c -- 2.30.1.766.gb4fecdf3b7-goog

4 years, 9 months

3
23
0 0

[PATCH] kselftest: arm64: Add BTI tests

by Mark Brown

Add some tests that verify that BTI functions correctly for static binaries built with and without BTI support, verifying that SIGILL is generated when expected and is not generated in other situations. Since BTI support is still being rolled out in distributions these tests are built entirely free standing, no libc support is used at all so none of the standard helper functions for kselftest can be used and we open code everything. This also means we aren't testing the kernel support for the dynamic linker, though the test program can be readily adapted for that once it becomes something that we can reliably build and run. These tests were originally written by Dave Martin, I've adapted them for kselftest, mainly around the build system and the output format. Signed-off-by: Mark Brown <broonie(a)kernel.org> Cc: Dave Martin <Dave.Martin(a)arm.com> --- tools/testing/selftests/arm64/Makefile | 2 +- tools/testing/selftests/arm64/bti/.gitignore | 2 + tools/testing/selftests/arm64/bti/Makefile | 61 +++++ tools/testing/selftests/arm64/bti/assembler.h | 80 ++++++ tools/testing/selftests/arm64/bti/btitest.h | 23 ++ tools/testing/selftests/arm64/bti/compiler.h | 21 ++ .../selftests/arm64/bti/gen/.gitignore | 2 + tools/testing/selftests/arm64/bti/signal.c | 37 +++ tools/testing/selftests/arm64/bti/signal.h | 21 ++ tools/testing/selftests/arm64/bti/start.S | 14 ++ tools/testing/selftests/arm64/bti/syscall.S | 23 ++ tools/testing/selftests/arm64/bti/system.c | 22 ++ tools/testing/selftests/arm64/bti/system.h | 28 +++ tools/testing/selftests/arm64/bti/test.c | 234 ++++++++++++++++++ tools/testing/selftests/arm64/bti/teststubs.S | 39 +++ .../testing/selftests/arm64/bti/trampoline.S | 29 +++ 16 files changed, 637 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/arm64/bti/.gitignore create mode 100644 tools/testing/selftests/arm64/bti/Makefile create mode 100644 tools/testing/selftests/arm64/bti/assembler.h create mode 100644 tools/testing/selftests/arm64/bti/btitest.h create mode 100644 tools/testing/selftests/arm64/bti/compiler.h create mode 100644 tools/testing/selftests/arm64/bti/gen/.gitignore create mode 100644 tools/testing/selftests/arm64/bti/signal.c create mode 100644 tools/testing/selftests/arm64/bti/signal.h create mode 100644 tools/testing/selftests/arm64/bti/start.S create mode 100644 tools/testing/selftests/arm64/bti/syscall.S create mode 100644 tools/testing/selftests/arm64/bti/system.c create mode 100644 tools/testing/selftests/arm64/bti/system.h create mode 100644 tools/testing/selftests/arm64/bti/test.c create mode 100644 tools/testing/selftests/arm64/bti/teststubs.S create mode 100644 tools/testing/selftests/arm64/bti/trampoline.S diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile index 2c9d012797a7..ced910fb4019 100644 --- a/tools/testing/selftests/arm64/Makefile +++ b/tools/testing/selftests/arm64/Makefile @@ -4,7 +4,7 @@ ARCH ?= $(shell uname -m 2>/dev/null || echo not) ifneq (,$(filter $(ARCH),aarch64 arm64)) -ARM64_SUBTARGETS ?= tags signal pauth fp mte +ARM64_SUBTARGETS ?= tags signal pauth fp mte bti else ARM64_SUBTARGETS := endif diff --git a/tools/testing/selftests/arm64/bti/.gitignore b/tools/testing/selftests/arm64/bti/.gitignore new file mode 100644 index 000000000000..73869fabada4 --- /dev/null +++ b/tools/testing/selftests/arm64/bti/.gitignore @@ -0,0 +1,2 @@ +btitest +nobtitest diff --git a/tools/testing/selftests/arm64/bti/Makefile b/tools/testing/selftests/arm64/bti/Makefile new file mode 100644 index 000000000000..73e013c082a6 --- /dev/null +++ b/tools/testing/selftests/arm64/bti/Makefile @@ -0,0 +1,61 @@ +# SPDX-License-Identifier: GPL-2.0 + +TEST_GEN_PROGS := btitest nobtitest + +PROGS := $(patsubst %,gen/%,$(TEST_GEN_PROGS)) + +# These tests are built as freestanding binaries since otherwise BTI +# support in ld.so is required which is not currently widespread; when +# it is available it will still be useful to test this separately as the +# cases for statically linked and dynamically lined binaries are +# slightly different. + +CFLAGS_NOBTI = -DBTI=0 +CFLAGS_BTI = -mbranch-protection=standard -DBTI=1 + +CFLAGS_COMMON = -ffreestanding -Wall -Wextra $(CFLAGS) + +BTI_CC_COMMAND = $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -c -o $@ $< +NOBTI_CC_COMMAND = $(CC) $(CFLAGS_NOBTI) $(CFLAGS_COMMON) -c -o $@ $< + +%-bti.o: %.c + $(BTI_CC_COMMAND) + +%-bti.o: %.S + $(BTI_CC_COMMAND) + +%-nobti.o: %.c + $(NOBTI_CC_COMMAND) + +%-nobti.o: %.S + $(NOBTI_CC_COMMAND) + +BTI_OBJS = \ + test-bti.o \ + signal-bti.o \ + start-bti.o \ + syscall-bti.o \ + system-bti.o \ + teststubs-bti.o \ + trampoline-bti.o +gen/btitest: $(BTI_OBJS) + $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -o $@ $^ + +NOBTI_OBJS = \ + test-nobti.o \ + signal-nobti.o \ + start-nobti.o \ + syscall-nobti.o \ + system-nobti.o \ + teststubs-nobti.o \ + trampoline-nobti.o +gen/nobtitest: $(NOBTI_OBJS) + $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -o $@ $^ + +# Including KSFT lib.mk here will also mangle the TEST_GEN_PROGS list +# to account for any OUTPUT target-dirs optionally provided by +# the toplevel makefile +include ../../lib.mk + +$(TEST_GEN_PROGS): $(PROGS) + cp $(PROGS) $(OUTPUT)/ diff --git a/tools/testing/selftests/arm64/bti/assembler.h b/tools/testing/selftests/arm64/bti/assembler.h new file mode 100644 index 000000000000..04e7b72880ef --- /dev/null +++ b/tools/testing/selftests/arm64/bti/assembler.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin <Dave.Martin(a)arm.com> + */ + +#ifndef ASSEMBLER_H +#define ASSEMBLER_H + +#define NT_GNU_PROPERTY_TYPE_0 5 +#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000 + +/* Bits for GNU_PROPERTY_AARCH64_FEATURE_1_BTI */ +#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0) +#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1U << 1) + + +.macro startfn name:req + .globl \name +\name: + .macro endfn + .size \name, . - \name + .type \name, @function + .purgem endfn + .endm +.endm + +.macro emit_aarch64_feature_1_and + .pushsection .note.gnu.property, "a" + .align 3 + .long 2f - 1f + .long 6f - 3f + .long NT_GNU_PROPERTY_TYPE_0 +1: .string "GNU" +2: + .align 3 +3: .long GNU_PROPERTY_AARCH64_FEATURE_1_AND + .long 5f - 4f +4: +#if BTI + .long GNU_PROPERTY_AARCH64_FEATURE_1_PAC | \ + GNU_PROPERTY_AARCH64_FEATURE_1_BTI +#else + .long 0 +#endif +5: + .align 3 +6: + .popsection +.endm + +.macro paciasp + hint 0x19 +.endm + +.macro autiasp + hint 0x1d +.endm + +.macro __bti_ + hint 0x20 +.endm + +.macro __bti_c + hint 0x22 +.endm + +.macro __bti_j + hint 0x24 +.endm + +.macro __bti_jc + hint 0x26 +.endm + +.macro bti what= + __bti_\what +.endm + +#endif /* ! ASSEMBLER_H */ diff --git a/tools/testing/selftests/arm64/bti/btitest.h b/tools/testing/selftests/arm64/bti/btitest.h new file mode 100644 index 000000000000..2aff9b10336e --- /dev/null +++ b/tools/testing/selftests/arm64/bti/btitest.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin <Dave.Martin(a)arm.com> + */ + +#ifndef BTITEST_H +#define BTITEST_H + +/* Trampolines for calling the test stubs: */ +void call_using_br_x0(void (*)(void)); +void call_using_br_x16(void (*)(void)); +void call_using_blr(void (*)(void)); + +/* Test stubs: */ +void nohint_func(void); +void bti_none_func(void); +void bti_c_func(void); +void bti_j_func(void); +void bti_jc_func(void); +void paciasp_func(void); + +#endif /* !BTITEST_H */ diff --git a/tools/testing/selftests/arm64/bti/compiler.h b/tools/testing/selftests/arm64/bti/compiler.h new file mode 100644 index 000000000000..ebb6204f447a --- /dev/null +++ b/tools/testing/selftests/arm64/bti/compiler.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin <Dave.Martin(a)arm.com> + */ + +#ifndef COMPILER_H +#define COMPILER_H + +#define __always_unused __attribute__((__unused__)) +#define __noreturn __attribute__((__noreturn__)) +#define __unreachable() __builtin_unreachable() + +/* curse(e) has value e, but the compiler cannot assume so */ +#define curse(e) ({ \ + __typeof__(e) __curse_e = (e); \ + asm ("" : "+r" (__curse_e)); \ + __curse_e; \ +}) + +#endif /* ! COMPILER_H */ diff --git a/tools/testing/selftests/arm64/bti/gen/.gitignore b/tools/testing/selftests/arm64/bti/gen/.gitignore new file mode 100644 index 000000000000..73869fabada4 --- /dev/null +++ b/tools/testing/selftests/arm64/bti/gen/.gitignore @@ -0,0 +1,2 @@ +btitest +nobtitest diff --git a/tools/testing/selftests/arm64/bti/signal.c b/tools/testing/selftests/arm64/bti/signal.c new file mode 100644 index 000000000000..f3fd29b91141 --- /dev/null +++ b/tools/testing/selftests/arm64/bti/signal.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin <Dave.Martin(a)arm.com> + */ + +#include "system.h" +#include "signal.h" + +int sigemptyset(sigset_t *s) +{ + unsigned int i; + + for (i = 0; i < _NSIG_WORDS; ++i) + s->sig[i] = 0; + + return 0; +} + +int sigaddset(sigset_t *s, int n) +{ + if (n < 1 || n > _NSIG) + return -EINVAL; + + s->sig[(n - 1) / _NSIG_BPW] |= 1UL << (n - 1) % _NSIG_BPW; + return 0; +} + +int sigaction(int n, struct sigaction *sa, const struct sigaction *old) +{ + return syscall(__NR_rt_sigaction, n, sa, old, sizeof(sa->sa_mask)); +} + +int sigprocmask(int how, const sigset_t *mask, sigset_t *old) +{ + return syscall(__NR_rt_sigprocmask, how, mask, old, sizeof(*mask)); +} diff --git a/tools/testing/selftests/arm64/bti/signal.h b/tools/testing/selftests/arm64/bti/signal.h new file mode 100644 index 000000000000..103457dc880e --- /dev/null +++ b/tools/testing/selftests/arm64/bti/signal.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin <Dave.Martin(a)arm.com> + */ + +#ifndef SIGNAL_H +#define SIGNAL_H + +#include <linux/signal.h> + +#include "system.h" + +typedef __sighandler_t sighandler_t; + +int sigemptyset(sigset_t *s); +int sigaddset(sigset_t *s, int n); +int sigaction(int n, struct sigaction *sa, const struct sigaction *old); +int sigprocmask(int how, const sigset_t *mask, sigset_t *old); + +#endif /* ! SIGNAL_H */ diff --git a/tools/testing/selftests/arm64/bti/start.S b/tools/testing/selftests/arm64/bti/start.S new file mode 100644 index 000000000000..831f952e0572 --- /dev/null +++ b/tools/testing/selftests/arm64/bti/start.S @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin <Dave.Martin(a)arm.com> + */ + +#include "assembler.h" + +startfn _start + mov x0, sp + b start +endfn + +emit_aarch64_feature_1_and diff --git a/tools/testing/selftests/arm64/bti/syscall.S b/tools/testing/selftests/arm64/bti/syscall.S new file mode 100644 index 000000000000..8dde8b6f3db1 --- /dev/null +++ b/tools/testing/selftests/arm64/bti/syscall.S @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin <Dave.Martin(a)arm.com> + */ + +#include "assembler.h" + +startfn syscall + bti c + mov w8, w0 + mov x0, x1 + mov x1, x2 + mov x2, x3 + mov x3, x4 + mov x4, x5 + mov x5, x6 + mov x6, x7 + svc #0 + ret +endfn + +emit_aarch64_feature_1_and diff --git a/tools/testing/selftests/arm64/bti/system.c b/tools/testing/selftests/arm64/bti/system.c new file mode 100644 index 000000000000..6385d8d4973b --- /dev/null +++ b/tools/testing/selftests/arm64/bti/system.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin <Dave.Martin(a)arm.com> + */ + +#include "system.h" + +#include <asm/unistd.h> + +#include "compiler.h" + +void __noreturn exit(int n) +{ + syscall(__NR_exit, n); + __unreachable(); +} + +ssize_t write(int fd, const void *buf, size_t size) +{ + return syscall(__NR_write, fd, buf, size); +} diff --git a/tools/testing/selftests/arm64/bti/system.h b/tools/testing/selftests/arm64/bti/system.h new file mode 100644 index 000000000000..aca118589705 --- /dev/null +++ b/tools/testing/selftests/arm64/bti/system.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin <Dave.Martin(a)arm.com> + */ + +#ifndef SYSTEM_H +#define SYSTEM_H + +#include <linux/types.h> +#include <linux/stddef.h> + +typedef __kernel_size_t size_t; +typedef __kernel_ssize_t ssize_t; + +#include <linux/errno.h> +#include <asm/hwcap.h> +#include <asm/ptrace.h> +#include <asm/unistd.h> + +#include "compiler.h" + +long syscall(int nr, ...); + +void __noreturn exit(int n); +ssize_t write(int fd, const void *buf, size_t size); + +#endif /* ! SYSTEM_H */ diff --git a/tools/testing/selftests/arm64/bti/test.c b/tools/testing/selftests/arm64/bti/test.c new file mode 100644 index 000000000000..656b04976ccc --- /dev/null +++ b/tools/testing/selftests/arm64/bti/test.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019,2021 Arm Limited + * Original author: Dave Martin <Dave.Martin(a)arm.com> + */ + +#include "system.h" + +#include <linux/errno.h> +#include <linux/auxvec.h> +#include <linux/signal.h> +#include <asm/sigcontext.h> +#include <asm/ucontext.h> + +typedef struct ucontext ucontext_t; + +#include "btitest.h" +#include "compiler.h" +#include "signal.h" + +#define EXPECTED_TESTS 18 + +static volatile unsigned int test_num = 1; +static unsigned int test_passed; +static unsigned int test_failed; +static unsigned int test_skipped; + +static void fdputs(int fd, const char *str) +{ + size_t len = 0; + const char *p = str; + + while (*p++) + ++len; + + write(fd, str, len); +} + +static void putstr(const char *str) +{ + fdputs(1, str); +} + +static void putnum(unsigned int num) +{ + char c; + + if (num / 10) + putnum(num / 10); + + c = '0' + (num % 10); + write(1, &c, 1); +} + +#define puttestname(test_name, trampoline_name) do { \ + putstr(test_name); \ + putstr("/"); \ + putstr(trampoline_name); \ +} while (0) + +void print_summary(void) +{ + putstr("# Totals: pass:"); + putnum(test_passed); + putstr(" fail:"); + putnum(test_failed); + putstr(" xfail:0 xpass:0 skip:"); + putnum(test_skipped); + putstr(" error:0\n"); +} + +static const char *volatile current_test_name; +static const char *volatile current_trampoline_name; +static volatile int sigill_expected, sigill_received; + +static void handler(int n, siginfo_t *si __always_unused, + void *uc_ __always_unused) +{ + ucontext_t *uc = uc_; + + putstr("# \t[SIGILL in "); + puttestname(current_test_name, current_trampoline_name); + putstr(", BTYPE="); + write(1, &"00011011"[((uc->uc_mcontext.pstate & PSR_BTYPE_MASK) + >> PSR_BTYPE_SHIFT) * 2], 2); + if (!sigill_expected) { + putstr("]\n"); + putstr("not ok "); + putnum(test_num); + putstr(" "); + puttestname(current_test_name, current_trampoline_name); + putstr("(unexpected SIGILL)\n"); + print_summary(); + exit(128 + n); + } + + putstr(" (expected)]\n"); + sigill_received = 1; + /* zap BTYPE so that resuming the faulting code will work */ + uc->uc_mcontext.pstate &= ~PSR_BTYPE_MASK; +} + +static int skip_all; + +static void __do_test(void (*trampoline)(void (*)(void)), + void (*fn)(void), + const char *trampoline_name, + const char *name, + int expect_sigill) +{ + if (skip_all) { + test_skipped++; + putstr("ok "); + putnum(test_num); + putstr(" "); + puttestname(name, trampoline_name); + putstr(" # SKIP\n"); + + return; + } + + /* Branch Target exceptions should only happen in BTI binaries: */ + if (!BTI) + expect_sigill = 0; + + sigill_expected = expect_sigill; + sigill_received = 0; + current_test_name = name; + current_trampoline_name = trampoline_name; + + trampoline(fn); + + if (expect_sigill && !sigill_received) { + putstr("not ok "); + test_failed++; + } else { + putstr("ok "); + test_passed++; + } + putnum(test_num++); + putstr(" "); + puttestname(name, trampoline_name); + putstr("\n"); +} + +#define do_test(expect_sigill_br_x0, \ + expect_sigill_br_x16, \ + expect_sigill_blr, \ + name) \ +do { \ + __do_test(call_using_br_x0, name, "call_using_br_x0", #name, \ + expect_sigill_br_x0); \ + __do_test(call_using_br_x16, name, "call_using_br_x16", #name, \ + expect_sigill_br_x16); \ + __do_test(call_using_blr, name, "call_using_blr", #name, \ + expect_sigill_blr); \ +} while (0) + +void start(int *argcp) +{ + struct sigaction sa; + void *const *p; + const struct auxv_entry { + unsigned long type; + unsigned long val; + } *auxv; + unsigned long hwcap = 0, hwcap2 = 0; + + putstr("TAP version 13\n"); + putstr("1.."); + putnum(EXPECTED_TESTS); + putstr("\n"); + + /* Gross hack for finding AT_HWCAP2 from the initial process stack: */ + p = (void *const *)argcp + 1 + *argcp + 1; /* start of environment */ + /* step over environment */ + while (*p++) + ; + for (auxv = (const struct auxv_entry *)p; auxv->type != AT_NULL; ++auxv) { + switch (auxv->type) { + case AT_HWCAP: + hwcap = auxv->val; + break; + case AT_HWCAP2: + hwcap2 = auxv->val; + break; + default: + break; + } + } + + if (hwcap & HWCAP_PACA) + putstr("# HWCAP_PACA present\n"); + else + putstr("# HWCAP_PACA not present\n"); + + if (hwcap2 & HWCAP2_BTI) { + putstr("# HWCAP2_BTI present\n"); + if (!(hwcap & HWCAP_PACA)) + putstr("# Bad hardware? Expect problems.\n"); + } else { + putstr("# HWCAP2_BTI not present\n"); + skip_all = 1; + } + + putstr("# Test binary"); + if (!BTI) + putstr(" not"); + putstr(" built for BTI\n"); + + sa.sa_handler = (sighandler_t)(void *)handler; + sa.sa_flags = SA_SIGINFO; + sigemptyset(&sa.sa_mask); + sigaction(SIGILL, &sa, NULL); + sigaddset(&sa.sa_mask, SIGILL); + sigprocmask(SIG_UNBLOCK, &sa.sa_mask, NULL); + + do_test(1, 1, 1, nohint_func); + do_test(1, 1, 1, bti_none_func); + do_test(1, 0, 0, bti_c_func); + do_test(0, 0, 1, bti_j_func); + do_test(0, 0, 0, bti_jc_func); + do_test(1, 0, 0, paciasp_func); + + print_summary(); + + if (test_num - 1 != EXPECTED_TESTS) + putstr("# WARNING - EXPECTED TEST COUNT WRONG\n"); + + if (test_failed) + exit(1); + else + exit(0); +} diff --git a/tools/testing/selftests/arm64/bti/teststubs.S b/tools/testing/selftests/arm64/bti/teststubs.S new file mode 100644 index 000000000000..b62c8c35f67e --- /dev/null +++ b/tools/testing/selftests/arm64/bti/teststubs.S @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin <Dave.Martin(a)arm.com> + */ + +#include "assembler.h" + +startfn bti_none_func + bti + ret +endfn + +startfn bti_c_func + bti c + ret +endfn + +startfn bti_j_func + bti j + ret +endfn + +startfn bti_jc_func + bti jc + ret +endfn + +startfn paciasp_func + paciasp + autiasp + ret +endfn + +startfn nohint_func + ret +endfn + +emit_aarch64_feature_1_and diff --git a/tools/testing/selftests/arm64/bti/trampoline.S b/tools/testing/selftests/arm64/bti/trampoline.S new file mode 100644 index 000000000000..09beb3f361f1 --- /dev/null +++ b/tools/testing/selftests/arm64/bti/trampoline.S @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin <Dave.Martin(a)arm.com> + */ + +#include "assembler.h" + +startfn call_using_br_x0 + bti c + br x0 +endfn + +startfn call_using_br_x16 + bti c + mov x16, x0 + br x16 +endfn + +startfn call_using_blr + paciasp + stp x29, x30, [sp, #-16]! + blr x0 + ldp x29, x30, [sp], #16 + autiasp + ret +endfn + +emit_aarch64_feature_1_and -- 2.20.1

4 years, 9 months

2
1
0 0

[PATCH 00/11] kselftest/arm64: mte: Fix feature detection and compilation

by Andre Przywara

When trying to run the arm64 MTE (Memory Tagging Extension) selftests on a model with the new FEAT_MTE3 capability, the MTE feature detection failed, because it was overzealously checking for one exact feature version only (0b0010). Trying to fix that (patch 06/11) led me into the rabbit hole of userland tool compilation, which triggered patches 01-05/11, to let me actually compile the selftests on an arm64 machine running Ubuntu 20.04. Before I actually fixed that, I tried some other compiler and distro; patches 07 and 08 are my witnesses. Then I got brave and tried clang: entering patches 09/11 and 10/11. Eventually I tried to run the whole thing on that model again, and, you guessed it, patch 11/11 concludes this apparent "2 minute job". Eventually I can now compile the mte selftests on Ubuntu 20.04 with both the native gcc and clang without warnings, also with some custom made cross compiler. And they even run now! Please have a look, also you may try to compile it on your setup, if you feel adventurous: $ make -C tools/testing/selftests TARGETS=arm64 ARM64_SUBTARGETS=mte Cheers, Andre Andre Przywara (11): kselftest/arm64: mte: Fix compilation with native compiler kselftest/arm64: mte: Fix pthread linking kselftest/arm64: mte: ksm_options: Fix fscanf warning kselftest/arm64: mte: user_mem: Fix write() warning kselftest/arm64: mte: common: Fix write() warnings kselftest/arm64: mte: Fix MTE feature detection kselftest/arm64: mte: Use cross-compiler if specified kselftest/arm64: mte: Output warning about failing compiler kselftest/arm64: mte: Makefile: Fix clang compilation kselftest/arm64: mte: Fix clang warning kselftest/arm64: mte: Report filename on failing temp file creation tools/testing/selftests/arm64/mte/Makefile | 15 +++++-- .../selftests/arm64/mte/check_ksm_options.c | 5 ++- .../selftests/arm64/mte/check_user_mem.c | 3 +- .../selftests/arm64/mte/mte_common_util.c | 39 +++++++++++-------- 4 files changed, 39 insertions(+), 23 deletions(-) -- 2.17.5

4 years, 9 months

4
15
0 0

[GIT PULL] KUnit fixes update for Linux 5.12-rc5

by Shuah Khan

Hi Linus, Please pull the following KUnit fixes update for Linux 5.12-rc5. This KUnit update for Linux 5.12-rc5 consists of two fixes to kunit tool from David Gow. diff is attached. thanks, -- Shuah ---------------------------------------------------------------- The following changes since commit a38fd8748464831584a19438cbb3082b5a2dab15: Linux 5.12-rc2 (2021-03-05 17:33:41 -0800) are available in the Git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest tags/linux-kselftest-kunit-fixes-5.12-rc5.1 for you to fetch changes up to 7fd53f41f771d250eb08db08650940f017e37c26: kunit: tool: Disable PAGE_POISONING under --alltests (2021-03-11 14:37:37 -0700) ---------------------------------------------------------------- linux-kselftest-kunit-fixes-5.12-rc5.1 This KUnit update for Linux 5.12-rc5 consists of two fixes to kunit tool from David Gow. ---------------------------------------------------------------- David Gow (2): kunit: tool: Fix a python tuple typing error kunit: tool: Disable PAGE_POISONING under --alltests tools/testing/kunit/configs/broken_on_uml.config | 2 ++ tools/testing/kunit/kunit_config.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) ----------------------------------------------------------------

4 years, 9 months

2
1
0 0

[PATCH] tools: testing: Remove duplicate include of sched.h

by Wan Jiabing

sched.h has been included at line 33. So we remove the duplicate one at line 36. Signed-off-by: Wan Jiabing <wanjiabing(a)vivo.com> --- tools/testing/selftests/powerpc/mm/tlbie_test.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/mm/tlbie_test.c b/tools/testing/selftests/powerpc/mm/tlbie_test.c index f85a0938ab25..48344a74b212 100644 --- a/tools/testing/selftests/powerpc/mm/tlbie_test.c +++ b/tools/testing/selftests/powerpc/mm/tlbie_test.c @@ -33,7 +33,6 @@ #include <sched.h> #include <time.h> #include <stdarg.h> -#include <sched.h> #include <pthread.h> #include <signal.h> #include <sys/prctl.h> -- 2.25.1

4 years, 9 months

2
1
0 0

[PATCH] tools: testing: inttypes.h is included twice

by Wan Jiabing

inttypes.h has been included at line 19. So we remove the duplicate one at line 23. Signed-off-by: Wan Jiabing <wanjiabing(a)vivo.com> --- tools/testing/selftests/powerpc/tm/tm-poison.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/tm/tm-poison.c b/tools/testing/selftests/powerpc/tm/tm-poison.c index 29e5f26af7b9..27c083a03d1f 100644 --- a/tools/testing/selftests/powerpc/tm/tm-poison.c +++ b/tools/testing/selftests/powerpc/tm/tm-poison.c @@ -20,7 +20,6 @@ #include <sched.h> #include <sys/types.h> #include <signal.h> -#include <inttypes.h> #include "tm.h" -- 2.25.1

4 years, 9 months

1
0
0 0

[PATCH] tools: testing: pthread.h is included twice

by Wan Jiabing

pthread.h has been included at line 17. So we remove the duplicate one at line 20. Signed-off-by: Wan Jiabing <wanjiabing(a)vivo.com> --- tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c index e2a0c07e8362..9ef37a9836ac 100644 --- a/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c +++ b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c @@ -17,7 +17,6 @@ #include <pthread.h> #include <sys/mman.h> #include <unistd.h> -#include <pthread.h> #include "tm.h" #include "utils.h" -- 2.25.1

4 years, 9 months

1
0
0 0

[PATCH bpf-next v2 0/2] add support for batched ops in LPM trie

by Pedro Tammela

The patch itself is straightforward thanks to the infrastructure that is already in-place. The tests follows the other '*_map_batch_ops' tests with minor tweaks. v1 -> v2: Fixes for checkpatch warnings Pedro Tammela (2): bpf: add support for batched operations in LPM trie maps bpf: selftests: add tests for batched ops in LPM trie maps kernel/bpf/lpm_trie.c | 3 + .../map_tests/lpm_trie_map_batch_ops.c (new) | 158 ++++++++++++++++++ 2 files changed, 161 insertions(+) create mode 100644 tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c -- 2.25.1

4 years, 9 months

1
2
0 0

Re: [selftests] e48d82b67a: BUG_TestSlub_RZ_alloc(Not_tainted):Redzone_overwritten

by Vlastimil Babka

On 3/17/21 9:36 AM, kernel test robot wrote: > > > Greeting, > > FYI, we noticed the following commit (built with gcc-9): > > commit: e48d82b67a2b760eedf7b95ca15f41267496386c ("[PATCH 1/2] selftests: add a kselftest for SLUB debugging functionality") > url: https://github.com/0day-ci/linux/commits/glittao-gmail-com/selftests-add-a-… > base: https://git.kernel.org/cgit/linux/kernel/git/shuah/linux-kselftest.git next > > in testcase: trinity > version: trinity-static-i386-x86_64-f93256fb_2019-08-28 > with following parameters: > > group: group-04 > > test-description: Trinity is a linux system call fuzz tester. > test-url: http://codemonkey.org.uk/projects/trinity/ > > > on test machine: qemu-system-i386 -enable-kvm -cpu SandyBridge -smp 2 -m 8G > > caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace): > > > +---------------------------------------------------------------------------------------------------------------+-----------+------------+ > | | v5.12-rc2 | e48d82b67a | > +---------------------------------------------------------------------------------------------------------------+-----------+------------+ > | BUG_TestSlub_RZ_alloc(Not_tainted):Redzone_overwritten | 0 | 69 | > | INFO:0x(ptrval)-0x(ptrval)@offset=#.First_byte#instead_of | 0 | 69 | > | INFO:Allocated_in_resiliency_test_age=#cpu=#pid= | 0 | 69 | > | INFO:Slab0x(ptrval)objects=#used=#fp=0x(ptrval)flags= | 0 | 69 | > | INFO:Object0x(ptrval)@offset=#fp=0x(ptrval) | 0 | 69 | > | BUG_TestSlub_next_ptr_free(Tainted:G_B):Freechain_corrupt | 0 | 69 | > | INFO:Freed_in_resiliency_test_age=#cpu=#pid= | 0 | 69 | > | BUG_TestSlub_next_ptr_free(Tainted:G_B):Wrong_object_count.Counter_is#but_counted_were | 0 | 69 | > | BUG_TestSlub_next_ptr_free(Tainted:G_B):Redzone_overwritten | 0 | 69 | > | BUG_TestSlub_next_ptr_free(Tainted:G_B):Objects_remaining_in_TestSlub_next_ptr_free_on__kmem_cache_shutdown() | 0 | 69 | > | INFO:Object0x(ptrval)@offset= | 0 | 69 | > | BUG_TestSlub_1th_word_free(Tainted:G_B):Poison_overwritten | 0 | 69 | > | BUG_TestSlub_50th_word_free(Tainted:G_B):Poison_overwritten | 0 | 69 | > | BUG_TestSlub_RZ_free(Tainted:G_B):Redzone_overwritten | 0 | 69 | > +---------------------------------------------------------------------------------------------------------------+-----------+------------+ > > > If you fix the issue, kindly add following tag > Reported-by: kernel test robot <oliver.sang(a)intel.com> > > > > [ 22.154049] random: get_random_u32 called from __kmem_cache_create+0x23/0x3e0 with crng_init=0 > [ 22.154070] random: get_random_u32 called from cache_random_seq_create+0x7c/0x140 with crng_init=0 > [ 22.154167] random: get_random_u32 called from allocate_slab+0x155/0x5e0 with crng_init=0 > [ 22.154690] test_slub: 1. kmem_cache: Clobber Redzone 0x12->0x(ptrval) > [ 22.164499] ============================================================================= > [ 22.166629] BUG TestSlub_RZ_alloc (Not tainted): Redzone overwritten > [ 22.168179] ----------------------------------------------------------------------------- > [ 22.168179] > [ 22.168372] Disabling lock debugging due to kernel taint > [ 22.168372] INFO: 0x(ptrval)-0x(ptrval) @offset=1064. First byte 0x12 instead of 0xcc > [ 22.168372] INFO: Allocated in resiliency_test+0x47/0x1be age=3 cpu=0 pid=1 > [ 22.168372] __slab_alloc+0x57/0x80 > [ 22.168372] kmem_cache_alloc (kbuild/src/consumer/mm/slub.c:2871 kbuild/src/consumer/mm/slub.c:2915 kbuild/src/consumer/mm/slub.c:2920) > [ 22.168372] resiliency_test (kbuild/src/consumer/lib/test_slub.c:34 kbuild/src/consumer/lib/test_slub.c:107) > [ 22.168372] test_slub_init (kbuild/src/consumer/lib/test_slub.c:124) > [ 22.168372] do_one_initcall (kbuild/src/consumer/init/main.c:1226) > [ 22.168372] kernel_init_freeable (kbuild/src/consumer/init/main.c:1298 kbuild/src/consumer/init/main.c:1315 kbuild/src/consumer/init/main.c:1335 kbuild/src/consumer/init/main.c:1537) > [ 22.168372] kernel_init (kbuild/src/consumer/init/main.c:1426) > [ 22.168372] ret_from_fork (kbuild/src/consumer/arch/x86/entry/entry_32.S:856) > [ 22.168372] INFO: Slab 0x(ptrval) objects=16 used=1 fp=0x(ptrval) flags=0x40000201 > [ 22.168372] INFO: Object 0x(ptrval) @offset=1000 fp=0x(ptrval) > [ 22.168372] > [ 22.168372] Redzone (ptrval): cc cc cc cc cc cc cc cc ........ > [ 22.168372] Object (ptrval): 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk > [ 22.168372] Object (ptrval): 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b a5 kkkkkkkkkkkkkkk. > [ 22.168372] Redzone (ptrval): 12 cc cc cc .... > [ 22.168372] Padding (ptrval): 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ > [ 22.168372] CPU: 0 PID: 1 Comm: swapper/0 Tainted: G B 5.12.0-rc2-00001-ge48d82b67a2b #1 > [ 22.168372] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 > [ 22.168372] Call Trace: > [ 22.168372] dump_stack (kbuild/src/consumer/lib/dump_stack.c:122) > [ 22.168372] print_trailer (kbuild/src/consumer/mm/slub.c:737) > [ 22.168372] check_bytes_and_report.cold (kbuild/src/consumer/mm/slub.c:807) > [ 22.168372] check_object (kbuild/src/consumer/mm/slub.c:914) > [ 22.168372] validate_slab (kbuild/src/consumer/mm/slub.c:4635) Hm but in this case the output means the tested functionality (slub debugging) is working as intended. So what can we do? Indicate/teach somehow to the bot that this is OK? Does kselftest have some support for this? Or silence the validation output for testing purposes? (I would prefer not to) Thanks, Vlastimil

4 years, 9 months

3
3
0 0

[PATCH v5 1/2] mm: huge_memory: a new debugfs interface for splitting THP tests.

by Zi Yan

From: Zi Yan <ziy(a)nvidia.com> We did not have a direct user interface of splitting the compound page backing a THP and there is no need unless we want to expose the THP implementation details to users. Make <debugfs>/split_huge_pages accept a new command to do that. By writing "<pid>,<vaddr_start>,<vaddr_end>" to <debugfs>/split_huge_pages, THPs within the given virtual address range from the process with the given pid are split. It is used to test split_huge_page function. In addition, a selftest program is added to tools/testing/selftests/vm to utilize the interface by splitting PMD THPs and PTE-mapped THPs. This does not change the old behavior, i.e., writing 1 to the interface to split all THPs in the system. Changelog: >From v5: 1. Skipped special VMAs and other fixes. (suggested by Yang Shi) >From v4: 1. Fixed the error code return issue, spotted by kernel test robot <lkp(a)intel.com>. >From v3: 1. Factored out split huge pages in the given pid code to a separate function. 2. Added the missing put_page for not split pages. 3. pr_debug -> pr_info, make reading results simpler. >From v2: 1. Reused existing <debugfs>/split_huge_pages interface. (suggested by Yang Shi) >From v1: 1. Removed unnecessary calling to vma_migratable, spotted by kernel test robot <lkp(a)intel.com>. 2. Dropped the use of find_mm_struct and code it directly, since there is no need for the permission check in that function and the function is only available when migration is on. 3. Added some comments in the selftest program to clarify how PTE-mapped THPs are formed. Signed-off-by: Zi Yan <ziy(a)nvidia.com> --- mm/huge_memory.c | 143 +++++++- tools/testing/selftests/vm/.gitignore | 1 + tools/testing/selftests/vm/Makefile | 1 + .../selftests/vm/split_huge_page_test.c | 318 ++++++++++++++++++ 4 files changed, 456 insertions(+), 7 deletions(-) create mode 100644 tools/testing/selftests/vm/split_huge_page_test.c diff --git a/mm/huge_memory.c b/mm/huge_memory.c index bff92dea5ab3..9bf9bc489228 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -7,6 +7,7 @@ #include <linux/mm.h> #include <linux/sched.h> +#include <linux/sched/mm.h> #include <linux/sched/coredump.h> #include <linux/sched/numa_balancing.h> #include <linux/highmem.h> @@ -2922,16 +2923,14 @@ static struct shrinker deferred_split_shrinker = { }; #ifdef CONFIG_DEBUG_FS -static int split_huge_pages_set(void *data, u64 val) +static void split_huge_pages_all(void) { struct zone *zone; struct page *page; unsigned long pfn, max_zone_pfn; unsigned long total = 0, split = 0; - if (val != 1) - return -EINVAL; - + pr_info("Split all THPs\n"); for_each_populated_zone(zone) { max_zone_pfn = zone_end_pfn(zone); for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) { @@ -2959,11 +2958,141 @@ static int split_huge_pages_set(void *data, u64 val) } pr_info("%lu of %lu THP split\n", split, total); +} - return 0; +static int split_huge_pages_pid(int pid, unsigned long vaddr_start, + unsigned long vaddr_end) +{ + int ret = 0; + struct task_struct *task; + struct mm_struct *mm; + unsigned long total = 0, split = 0; + unsigned long addr; + + vaddr_start &= PAGE_MASK; + vaddr_end &= PAGE_MASK; + + /* Find the task_struct from pid */ + rcu_read_lock(); + task = find_task_by_vpid(pid); + if (!task) { + rcu_read_unlock(); + ret = -ESRCH; + goto out; + } + get_task_struct(task); + rcu_read_unlock(); + + /* Find the mm_struct */ + mm = get_task_mm(task); + put_task_struct(task); + + if (!mm) { + ret = -EINVAL; + goto out; + } + + pr_info("Split huge pages in pid: %d, vaddr: [0x%lx - 0x%lx]\n", + pid, vaddr_start, vaddr_end); + + mmap_read_lock(mm); + /* + * always increase addr by PAGE_SIZE, since we could have a PTE page + * table filled with PTE-mapped THPs, each of which is distinct. + */ + for (addr = vaddr_start; addr < vaddr_end; addr += PAGE_SIZE) { + struct vm_area_struct *vma = find_vma(mm, addr); + unsigned int follflags; + struct page *page; + + if (!vma || addr < vma->vm_start) + break; + + /* skip special VMA and hugetlb VMA */ + if (vma_is_special_huge(vma) || is_vm_hugetlb_page(vma)) { + addr = vma->vm_end; + continue; + } + + /* FOLL_DUMP to ignore special (like zero) pages */ + follflags = FOLL_GET | FOLL_DUMP; + page = follow_page(vma, addr, follflags); + + if (IS_ERR(page)) + continue; + if (!page) + continue; + + if (!is_transparent_hugepage(page)) + goto next; + + total++; + if (!can_split_huge_page(compound_head(page), NULL)) + goto next; + + if (!trylock_page(page)) + goto next; + + if (!split_huge_page(page)) + split++; + + unlock_page(page); +next: + put_page(page); + } + mmap_read_unlock(mm); + mmput(mm); + + pr_info("%lu of %lu THP split\n", split, total); + +out: + return ret; } -DEFINE_DEBUGFS_ATTRIBUTE(split_huge_pages_fops, NULL, split_huge_pages_set, - "%llu\n"); + +static ssize_t split_huge_pages_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppops) +{ + static DEFINE_MUTEX(split_debug_mutex); + ssize_t ret; + char input_buf[80]; /* hold pid, start_vaddr, end_vaddr */ + int pid; + unsigned long vaddr_start, vaddr_end; + + ret = mutex_lock_interruptible(&split_debug_mutex); + if (ret) + return ret; + + ret = -EFAULT; + + memset(input_buf, 0, 80); + if (copy_from_user(input_buf, buf, min_t(size_t, count, 80))) + goto out; + + input_buf[79] = '\0'; + ret = sscanf(input_buf, "%d,0x%lx,0x%lx", &pid, &vaddr_start, &vaddr_end); + if (ret == 1 && pid == 1) { + split_huge_pages_all(); + ret = strlen(input_buf); + goto out; + } else if (ret != 3) { + ret = -EINVAL; + goto out; + } + + ret = split_huge_pages_pid(pid, vaddr_start, vaddr_end); + if (!ret) + ret = strlen(input_buf); +out: + mutex_unlock(&split_debug_mutex); + return ret; + +} + +static const struct file_operations split_huge_pages_fops = { + .owner = THIS_MODULE, + .write = split_huge_pages_write, + .llseek = no_llseek, +}; static int __init split_huge_pages_debugfs(void) { diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index 9a35c3f6a557..1f651e85ed60 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -22,3 +22,4 @@ map_fixed_noreplace write_to_hugetlbfs hmm-tests local_config.* +split_huge_page_test diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index d42115e4284d..4cbc91d6869f 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -42,6 +42,7 @@ TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += userfaultfd +TEST_GEN_FILES += split_huge_page_test ifeq ($(MACHINE),x86_64) CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32) diff --git a/tools/testing/selftests/vm/split_huge_page_test.c b/tools/testing/selftests/vm/split_huge_page_test.c new file mode 100644 index 000000000000..2c0c18e60c57 --- /dev/null +++ b/tools/testing/selftests/vm/split_huge_page_test.c @@ -0,0 +1,318 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * A test of splitting PMD THPs and PTE-mapped THPs from a specified virtual + * address range in a process via <debugfs>/split_huge_pages interface. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <inttypes.h> +#include <string.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <malloc.h> +#include <stdbool.h> + +uint64_t pagesize; +unsigned int pageshift; +uint64_t pmd_pagesize; + +#define PMD_SIZE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size" +#define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages" +#define SMAP_PATH "/proc/self/smaps" +#define INPUT_MAX 80 + +#define PFN_MASK ((1UL<<55)-1) +#define KPF_THP (1UL<<22) + +int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file) +{ + uint64_t paddr; + uint64_t page_flags; + + if (pagemap_file) { + pread(pagemap_file, &paddr, sizeof(paddr), + ((long)vaddr >> pageshift) * sizeof(paddr)); + + if (kpageflags_file) { + pread(kpageflags_file, &page_flags, sizeof(page_flags), + (paddr & PFN_MASK) * sizeof(page_flags)); + + return !!(page_flags & KPF_THP); + } + } + return 0; +} + + +static uint64_t read_pmd_pagesize(void) +{ + int fd; + char buf[20]; + ssize_t num_read; + + fd = open(PMD_SIZE_PATH, O_RDONLY); + if (fd == -1) { + perror("Open hpage_pmd_size failed"); + exit(EXIT_FAILURE); + } + num_read = read(fd, buf, 19); + if (num_read < 1) { + close(fd); + perror("Read hpage_pmd_size failed"); + exit(EXIT_FAILURE); + } + buf[num_read] = '\0'; + close(fd); + + return strtoul(buf, NULL, 10); +} + +static int write_file(const char *path, const char *buf, size_t buflen) +{ + int fd; + ssize_t numwritten; + + fd = open(path, O_WRONLY); + if (fd == -1) + return 0; + + numwritten = write(fd, buf, buflen - 1); + close(fd); + if (numwritten < 1) + return 0; + + return (unsigned int) numwritten; +} + +static void write_debugfs(int pid, uint64_t vaddr_start, uint64_t vaddr_end) +{ + char input[INPUT_MAX]; + int ret; + + ret = snprintf(input, INPUT_MAX, "%d,0x%lx,0x%lx", pid, vaddr_start, + vaddr_end); + if (ret >= INPUT_MAX) { + printf("%s: Debugfs input is too long\n", __func__); + exit(EXIT_FAILURE); + } + + if (!write_file(SPLIT_DEBUGFS, input, ret + 1)) { + perror(SPLIT_DEBUGFS); + exit(EXIT_FAILURE); + } +} + +#define MAX_LINE_LENGTH 500 + +static bool check_for_pattern(FILE *fp, const char *pattern, char *buf) +{ + while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) { + if (!strncmp(buf, pattern, strlen(pattern))) + return true; + } + return false; +} + +static uint64_t check_huge(void *addr) +{ + uint64_t thp = 0; + int ret; + FILE *fp; + char buffer[MAX_LINE_LENGTH]; + char addr_pattern[MAX_LINE_LENGTH]; + + ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-", + (unsigned long) addr); + if (ret >= MAX_LINE_LENGTH) { + printf("%s: Pattern is too long\n", __func__); + exit(EXIT_FAILURE); + } + + + fp = fopen(SMAP_PATH, "r"); + if (!fp) { + printf("%s: Failed to open file %s\n", __func__, SMAP_PATH); + exit(EXIT_FAILURE); + } + if (!check_for_pattern(fp, addr_pattern, buffer)) + goto err_out; + + /* + * Fetch the AnonHugePages: in the same block and check the number of + * hugepages. + */ + if (!check_for_pattern(fp, "AnonHugePages:", buffer)) + goto err_out; + + if (sscanf(buffer, "AnonHugePages:%10ld kB", &thp) != 1) { + printf("Reading smap error\n"); + exit(EXIT_FAILURE); + } + +err_out: + fclose(fp); + return thp; +} + +void split_pmd_thp(void) +{ + char *one_page; + size_t len = 4 * pmd_pagesize; + uint64_t thp_size; + size_t i; + + one_page = memalign(pmd_pagesize, len); + + if (!one_page) { + printf("Fail to allocate memory\n"); + exit(EXIT_FAILURE); + } + + madvise(one_page, len, MADV_HUGEPAGE); + + for (i = 0; i < len; i++) + one_page[i] = (char)i; + + thp_size = check_huge(one_page); + if (!thp_size) { + printf("No THP is allocated\n"); + exit(EXIT_FAILURE); + } + + /* split all THPs */ + write_debugfs(getpid(), (uint64_t)one_page, (uint64_t)one_page + len); + + for (i = 0; i < len; i++) + if (one_page[i] != (char)i) { + printf("%ld byte corrupted\n", i); + exit(EXIT_FAILURE); + } + + + thp_size = check_huge(one_page); + if (thp_size) { + printf("Still %ld kB AnonHugePages not split\n", thp_size); + exit(EXIT_FAILURE); + } + + printf("Split huge pages successful\n"); + free(one_page); +} + +void split_pte_mapped_thp(void) +{ + char *one_page, *pte_mapped, *pte_mapped2; + size_t len = 4 * pmd_pagesize; + uint64_t thp_size; + size_t i; + const char *pagemap_template = "/proc/%d/pagemap"; + const char *kpageflags_proc = "/proc/kpageflags"; + char pagemap_proc[255]; + int pagemap_fd; + int kpageflags_fd; + + if (snprintf(pagemap_proc, 255, pagemap_template, getpid()) < 0) { + perror("get pagemap proc error"); + exit(EXIT_FAILURE); + } + pagemap_fd = open(pagemap_proc, O_RDONLY); + + if (pagemap_fd == -1) { + perror("read pagemap:"); + exit(EXIT_FAILURE); + } + + kpageflags_fd = open(kpageflags_proc, O_RDONLY); + + if (kpageflags_fd == -1) { + perror("read kpageflags:"); + exit(EXIT_FAILURE); + } + + one_page = mmap((void *)(1UL << 30), len, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + + madvise(one_page, len, MADV_HUGEPAGE); + + for (i = 0; i < len; i++) + one_page[i] = (char)i; + + thp_size = check_huge(one_page); + if (!thp_size) { + printf("No THP is allocated\n"); + exit(EXIT_FAILURE); + } + + /* remap the first pagesize of first THP */ + pte_mapped = mremap(one_page, pagesize, pagesize, MREMAP_MAYMOVE); + + /* remap the Nth pagesize of Nth THP */ + for (i = 1; i < 4; i++) { + pte_mapped2 = mremap(one_page + pmd_pagesize * i + pagesize * i, + pagesize, pagesize, + MREMAP_MAYMOVE|MREMAP_FIXED, + pte_mapped + pagesize * i); + if (pte_mapped2 == (char *)-1) { + perror("mremap failed"); + exit(EXIT_FAILURE); + } + } + + /* smap does not show THPs after mremap, use kpageflags instead */ + thp_size = 0; + for (i = 0; i < pagesize * 4; i++) + if (i % pagesize == 0 && + is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd)) + thp_size++; + + if (thp_size != 4) { + printf("Some THPs are missing during mremap\n"); + exit(EXIT_FAILURE); + } + + /* split all remapped THPs */ + write_debugfs(getpid(), (uint64_t)pte_mapped, + (uint64_t)pte_mapped + pagesize * 4); + + /* smap does not show THPs after mremap, use kpageflags instead */ + thp_size = 0; + for (i = 0; i < pagesize * 4; i++) { + if (pte_mapped[i] != (char)i) { + printf("%ld byte corrupted\n", i); + exit(EXIT_FAILURE); + } + if (i % pagesize == 0 && + is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd)) + thp_size++; + } + + if (thp_size) { + printf("Still %ld THPs not split\n", thp_size); + exit(EXIT_FAILURE); + } + + printf("Split PTE-mapped huge pages successful\n"); + munmap(one_page, len); + close(pagemap_fd); + close(kpageflags_fd); +} + +int main(int argc, char **argv) +{ + if (geteuid() != 0) { + printf("Please run the benchmark as root\n"); + exit(EXIT_FAILURE); + } + + pagesize = getpagesize(); + pageshift = ffs(pagesize) - 1; + pmd_pagesize = read_pmd_pagesize(); + + split_pmd_thp(); + split_pte_mapped_thp(); + + return 0; +} -- 2.30.2

4 years, 9 months

3
5
0 0

[PATCH] kselftest/arm64: sve: Do not use non-canonical FFR register value

by Andre Przywara

The "First Fault Register" (FFR) is an SVE register that mimics a predicate register, but clears bits when a load or store fails to handle an element of a vector. The supposed usage scenario is to initialise this register (using SETFFR), then *read* it later on to learn about elements that failed to load or store. Explicit writes to this register using the WRFFR instruction are only supposed to *restore* values previously read from the register (for context-switching only). As the manual describes, this register holds only certain values, it: "... contains a monotonic predicate value, in which starting from bit 0 there are zero or more 1 bits, followed only by 0 bits in any remaining bit positions." Any other value is UNPREDICTABLE and is not supposed to be "restored" into the register. The SVE test currently tries to write a signature pattern into the register, which is *not* a canonical FFR value. Apparently the existing setups treat UNPREDICTABLE as "read-as-written", but a new implementation actually only stores canonical values. As a consequence, the sve-test fails immediately when comparing the FFR value: ----------- # ./sve-test Vector length: 128 bits PID: 207 Mismatch: PID=207, iteration=0, reg=48 Expected [cf00] Got [0f00] Aborted ----------- Fix this by only populating the FFR with proper canonical values. Effectively the requirement described above limits us to 17 unique values over 16 bits worth of FFR, so we condense our signature down to 4 bits (2 bits from the PID, 2 bits from the generation) and generate the canonical pattern from it. Any bits describing elements above the minimum 128 bit are set to 0. This aligns the FFR usage to the architecture and fixes the test on microarchitectures implementing FFR in a more restricted way. Signed-off-by: Andre Przywara <andre.przywara(a)arm.com> --- tools/testing/selftests/arm64/fp/sve-test.S | 22 ++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S index 9210691aa998..e3e08d9c7020 100644 --- a/tools/testing/selftests/arm64/fp/sve-test.S +++ b/tools/testing/selftests/arm64/fp/sve-test.S @@ -284,16 +284,28 @@ endfunction // Set up test pattern in the FFR // x0: pid // x2: generation +// +// We need to generate a canonical FFR value, which consists of a number of +// low "1" bits, followed by a number of zeros. This gives us 17 unique values +// per 16 bits of FFR, so we create a 4 bit signature out of the PID and +// generation, and use that as the initial number of ones in the pattern. +// We fill the upper lanes of FFR with zeros. // Beware: corrupts P0. function setup_ffr mov x4, x30 - bl pattern + and w0, w0, #0x3 + bfi w0, w2, #2, #2 + mov w1, #1 + lsl w1, w1, w0 + sub w1, w1, #1 + ldr x0, =ffrref - ldr x1, =scratch - rdvl x2, #1 - lsr x2, x2, #3 - bl memcpy + strh w1, [x0], 2 + rdvl x1, #1 + lsr x1, x1, #3 + sub x1, x1, #2 + bl memclr mov x0, #0 ldr x1, =ffrref -- 2.25.1

4 years, 9 months

3
2
0 0

[RFC 0/2] CPU-Idle latency selftest framework

by Pratik Rajesh Sampat

A kernel module + userspace driver to estimate the wakeup latency caused by going into stop states. The motivation behind this program is to find significant deviations behind advertised latency and residency values. The patchset measures latencies for two kinds of events. IPIs and Timers As this is a software-only mechanism, there will additional latencies of the kernel-firmware-hardware interactions. To account for that, the program also measures a baseline latency on a 100 percent loaded CPU and the latencies achieved must be in view relative to that. To achieve this, we introduce a kernel module and expose its control knobs through the debugfs interface that the selftests can engage with. The kernel module provides the following interfaces within /sys/kernel/debug/latency_test/ for, IPI test: ipi_cpu_dest = Destination CPU for the IPI ipi_cpu_src = Origin of the IPI ipi_latency_ns = Measured latency time in ns Timeout test: timeout_cpu_src = CPU on which the timer to be queued timeout_expected_ns = Timer duration timeout_diff_ns = Difference of actual duration vs expected timer Sample output on a POWER9 system is as follows: # --IPI Latency Test--- # Baseline Average IPI latency(ns): 3114 # Observed Average IPI latency(ns) - State0: 3265 # Observed Average IPI latency(ns) - State1: 3507 # Observed Average IPI latency(ns) - State2: 3739 # Observed Average IPI latency(ns) - State3: 3807 # Observed Average IPI latency(ns) - State4: 17070 # Observed Average IPI latency(ns) - State5: 1038174 # Observed Average IPI latency(ns) - State6: 1068784 # # --Timeout Latency Test-- # Baseline Average timeout diff(ns): 1420 # Observed Average timeout diff(ns) - State0: 1640 # Observed Average timeout diff(ns) - State1: 1764 # Observed Average timeout diff(ns) - State2: 1715 # Observed Average timeout diff(ns) - State3: 1845 # Observed Average timeout diff(ns) - State4: 16581 # Observed Average timeout diff(ns) - State5: 939977 # Observed Average timeout diff(ns) - State6: 1073024 Things to keep in mind: 1. This kernel module + bash driver does not guarantee idleness on a core when the IPI and the Timer is armed. It only invokes sleep and hopes that the core is idle once the IPI/Timer is invoked onto it. Hence this program must be run on a completely idle system for best results 2. Even on a completely idle system, there maybe book-keeping tasks or jitter tasks that can run on the core we want idle. This can create outliers in the latency measurement. Thankfully, these outliers should be large enough to easily weed them out. 3. A userspace only selftest variant was also sent out as RFC based on suggestions over the previous patchset to simply the kernel complexeity. However, a userspace only approach had more noise in the latency measurement due to userspace-kernel interactions which led to run to run variance and a lesser accurate test. Another downside of the nature of a userspace program is that it takes orders of magnitude longer to complete a full system test compared to the kernel framework. RFC patch: https://lkml.org/lkml/2020/9/2/356 4. For Intel Systems, the Timer based latencies don't exactly give out the measure of idle latencies. This is because of a hardware optimization mechanism that pre-arms a CPU when a timer is set to wakeup. That doesn't make this metric useless for Intel systems, it just means that is measuring IPI/Timer responding latency rather than idle wakeup latencies. (Source: https://lkml.org/lkml/2020/9/2/610) For solution to this problem, a hardware based latency analyzer is devised by Artem Bityutskiy from Intel. https://youtu.be/Opk92aQyvt0?t=8266 https://intel.github.io/wult/ Pratik Rajesh Sampat (2): cpuidle: Extract IPI based and timer based wakeup latency from idle states selftest/cpuidle: Add support for cpuidle latency measurement drivers/cpuidle/Makefile | 1 + drivers/cpuidle/test-cpuidle_latency.c | 157 ++++++++++ lib/Kconfig.debug | 10 + tools/testing/selftests/Makefile | 1 + tools/testing/selftests/cpuidle/Makefile | 6 + tools/testing/selftests/cpuidle/cpuidle.sh | 316 +++++++++++++++++++++ tools/testing/selftests/cpuidle/settings | 2 + 7 files changed, 493 insertions(+) create mode 100644 drivers/cpuidle/test-cpuidle_latency.c create mode 100644 tools/testing/selftests/cpuidle/Makefile create mode 100755 tools/testing/selftests/cpuidle/cpuidle.sh create mode 100644 tools/testing/selftests/cpuidle/settings -- 2.17.1

4 years, 9 months

3
6
0 0

[RFC PATCH v4 0/9] KVM: selftests: some improvement and a new test for kvm page table

by Yanan Wang

Hi, This v4 series can mainly include two parts. Based on kvm queue branch: https://git.kernel.org/pub/scm/virt/kvm/kvm.git/log/?h=queue Links of v1: https://lore.kernel.org/lkml/20210208090841.333724-1-wangyanan55@huawei.com/ Links of v2: https://lore.kernel.org/lkml/20210225055940.18748-1-wangyanan55@huawei.com/ Links of v3: https://lore.kernel.org/lkml/20210301065916.11484-1-wangyanan55@huawei.com/ In the first part, all the known hugetlb backing src types specified with different hugepage sizes are listed, so that we can specify use of hugetlb source of the exact granularity that we want, instead of the system default ones. And as all the known hugetlb page sizes are listed, it's appropriate for all architectures. Besides, a helper that can get granularity of different backing src types(anonumous/thp/hugetlb) is added, so that we can use the accurate backing src granularity for kinds of alignment or guest memory accessing of vcpus. In the second part, a new test is added: This test is added to serve as a performance tester and a bug reproducer for kvm page table code (GPA->HPA mappings), it gives guidance for the people trying to make some improvement for kvm. And the following explains what we can exactly do through this test. The function guest_code() can cover the conditions where a single vcpu or multiple vcpus access guest pages within the same memory region, in three VM stages(before dirty logging, during dirty logging, after dirty logging). Besides, the backing src memory type(ANONYMOUS/THP/HUGETLB) of the tested memory region can be specified by users, which means normal page mappings or block mappings can be chosen by users to be created in the test. If ANONYMOUS memory is specified, kvm will create normal page mappings for the tested memory region before dirty logging, and update attributes of the page mappings from RO to RW during dirty logging. If THP/HUGETLB memory is specified, kvm will create block mappings for the tested memory region before dirty logging, and split the blcok mappings into normal page mappings during dirty logging, and coalesce the page mappings back into block mappings after dirty logging is stopped. So in summary, as a performance tester, this test can present the performance of kvm creating/updating normal page mappings, or the performance of kvm creating/splitting/recovering block mappings, through execution time. When we need to coalesce the page mappings back to block mappings after dirty logging is stopped, we have to firstly invalidate *all* the TLB entries for the page mappings right before installation of the block entry, because a TLB conflict abort error could occur if we can't invalidate the TLB entries fully. We have hit this TLB conflict twice on aarch64 software implementation and fixed it. As this test can imulate process from dirty logging enabled to dirty logging stopped of a VM with block mappings, so it can also reproduce this TLB conflict abort due to inadequate TLB invalidation when coalescing tables. Links about the TLB conflict abort: https://lore.kernel.org/lkml/20201201201034.116760-3-wangyanan55@huawei.com/ --- Change logs: v3->v4: - Add a helper to get system default hugetlb page size - Add tags of Reviewed-by of Ben in the patches v2->v3: - Add tags of Suggested-by, Reviewed-by in the patches - Add a generic micro to get hugetlb page sizes - Some changes for suggestions about v2 series v1->v2: - Add a patch to sync header files - Add helpers to get granularity of different backing src types - Some changes for suggestions about v1 series --- Yanan Wang (9): tools headers: sync headers of asm-generic/hugetlb_encode.h tools headers: Add a macro to get HUGETLB page sizes for mmap KVM: selftests: Use flag CLOCK_MONOTONIC_RAW for timing KVM: selftests: Make a generic helper to get vm guest mode strings KVM: selftests: Add a helper to get system configured THP page size KVM: selftests: Add a helper to get system default hugetlb page size KVM: selftests: List all hugetlb src types specified with page sizes KVM: selftests: Adapt vm_userspace_mem_region_add to new helpers KVM: selftests: Add a test for kvm page table code include/uapi/linux/mman.h | 2 + tools/include/asm-generic/hugetlb_encode.h | 3 + tools/include/uapi/linux/mman.h | 2 + tools/testing/selftests/kvm/Makefile | 3 + .../selftests/kvm/demand_paging_test.c | 8 +- .../selftests/kvm/dirty_log_perf_test.c | 14 +- .../testing/selftests/kvm/include/kvm_util.h | 4 +- .../testing/selftests/kvm/include/test_util.h | 21 +- .../selftests/kvm/kvm_page_table_test.c | 476 ++++++++++++++++++ tools/testing/selftests/kvm/lib/kvm_util.c | 59 ++- tools/testing/selftests/kvm/lib/test_util.c | 122 ++++- tools/testing/selftests/kvm/steal_time.c | 4 +- 12 files changed, 659 insertions(+), 59 deletions(-) create mode 100644 tools/testing/selftests/kvm/kvm_page_table_test.c -- 2.19.1

4 years, 9 months

3
24
0 0

[PATCH bpf-next 0/2] add support for batched ops in LPM trie

by Pedro Tammela

The patch itself is straightforward thanks to the infrastructure that is already in-place. The tests follows the other '*_map_batch_ops' tests with minor tweaks. Pedro Tammela (2): bpf: add support for batched operations in LPM trie maps bpf: selftests: add tests for batched ops in LPM trie maps kernel/bpf/lpm_trie.c | 3 + .../map_tests/lpm_trie_map_batch_ops.c (new) | 158 ++++++++++++++++++ 2 files changed, 161 insertions(+) create mode 100644 tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c -- 2.25.1

4 years, 9 months

1
2
0 0

[PATCH] selftests: net: forwarding: Fix a typo

by Bhaskar Chowdhury

s/verfied/verified/ Signed-off-by: Bhaskar Chowdhury <unixbhaskar(a)gmail.com> --- tools/testing/selftests/net/forwarding/fib_offload_lib.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/forwarding/fib_offload_lib.sh b/tools/testing/selftests/net/forwarding/fib_offload_lib.sh index 66496659bea7..e134a5f529c9 100644 --- a/tools/testing/selftests/net/forwarding/fib_offload_lib.sh +++ b/tools/testing/selftests/net/forwarding/fib_offload_lib.sh @@ -224,7 +224,7 @@ fib_ipv4_plen_test() ip -n $ns link set dev dummy1 up # Add two routes with the same key and different prefix length and - # make sure both are in hardware. It can be verfied that both are + # make sure both are in hardware. It can be verified that both are # sharing the same leaf by checking the /proc/net/fib_trie ip -n $ns route add 192.0.2.0/24 dev dummy1 ip -n $ns route add 192.0.2.0/25 dev dummy1 -- 2.26.2

4 years, 9 months

2
1
0 0

Re: [PATCH v30 10/12] selftests/landlock: Add user space tests

by Kees Cook

On Tue, Mar 16, 2021 at 09:42:50PM +0100, Mickaël Salaün wrote: > From: Mickaël Salaün <mic(a)linux.microsoft.com> > > Test all Landlock system calls, ptrace hooks semantic and filesystem > access-control with multiple layouts. > > Test coverage for security/landlock/ is 93.6% of lines. The code not > covered only deals with internal kernel errors (e.g. memory allocation) > and race conditions. > > Cc: James Morris <jmorris(a)namei.org> > Cc: Jann Horn <jannh(a)google.com> > Cc: Kees Cook <keescook(a)chromium.org> > Cc: Serge E. Hallyn <serge(a)hallyn.com> > Cc: Shuah Khan <shuah(a)kernel.org> > Signed-off-by: Mickaël Salaün <mic(a)linux.microsoft.com> > Reviewed-by: Vincent Dagonneau <vincent.dagonneau(a)ssi.gouv.fr> > Link: https://lore.kernel.org/r/20210316204252.427806-11-mic@digikod.net This is terrific. I love the coverage. How did you measure this, BTW? To increase it into memory allocation failures, have you tried allocation fault injection: https://www.kernel.org/doc/html/latest/fault-injection/fault-injection.html > [...] > +TEST(inconsistent_attr) { > + const long page_size = sysconf(_SC_PAGESIZE); > + char *const buf = malloc(page_size + 1); > + struct landlock_ruleset_attr *const ruleset_attr = (void *)buf; > + > + ASSERT_NE(NULL, buf); > + > + /* Checks copy_from_user(). */ > + ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 0, 0)); > + /* The size if less than sizeof(struct landlock_attr_enforce). */ > + ASSERT_EQ(EINVAL, errno); > + ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 1, 0)); > + ASSERT_EQ(EINVAL, errno); Almost everywhere you're using ASSERT instead of EXPECT. Is this correct (in the sense than as soon as an ASSERT fails the rest of the test is skipped)? I do see you using EXPECT is some places, but I figured I'd ask about the intention here. > +/* > + * TEST_F_FORK() is useful when a test drop privileges but the corresponding > + * FIXTURE_TEARDOWN() requires them (e.g. to remove files from a directory > + * where write actions are denied). For convenience, FIXTURE_TEARDOWN() is > + * also called when the test failed, but not when FIXTURE_SETUP() failed. For > + * this to be possible, we must not call abort() but instead exit smoothly > + * (hence the step print). > + */ Hm, interesting. I think this should be extracted into a separate patch and added to the test harness proper. Could this be solved with TEARDOWN being called on SETUP failure? > +#define TEST_F_FORK(fixture_name, test_name) \ > + static void fixture_name##_##test_name##_child( \ > + struct __test_metadata *_metadata, \ > + FIXTURE_DATA(fixture_name) *self, \ > + const FIXTURE_VARIANT(fixture_name) *variant); \ > + TEST_F(fixture_name, test_name) \ > + { \ > + int status; \ > + const pid_t child = fork(); \ > + if (child < 0) \ > + abort(); \ > + if (child == 0) { \ > + _metadata->no_print = 1; \ > + fixture_name##_##test_name##_child(_metadata, self, variant); \ > + if (_metadata->skip) \ > + _exit(255); \ > + if (_metadata->passed) \ > + _exit(0); \ > + _exit(_metadata->step); \ > + } \ > + if (child != waitpid(child, &status, 0)) \ > + abort(); \ > + if (WIFSIGNALED(status) || !WIFEXITED(status)) { \ > + _metadata->passed = 0; \ > + _metadata->step = 1; \ > + return; \ > + } \ > + switch (WEXITSTATUS(status)) { \ > + case 0: \ > + _metadata->passed = 1; \ > + break; \ > + case 255: \ > + _metadata->passed = 1; \ > + _metadata->skip = 1; \ > + break; \ > + default: \ > + _metadata->passed = 0; \ > + _metadata->step = WEXITSTATUS(status); \ > + break; \ > + } \ > + } \ This looks like a subset of __wait_for_test()? Could __TEST_F_IMPL() be updated instead to do this? (Though the fork overhead might not be great for everyone.) -- Kees Cook

4 years, 9 months

2
3
0 0

[PATCH 1/2] selftests: add a kselftest for SLUB debugging functionality

by glittao＠gmail.com

From: Oliver Glitta <glittao(a)gmail.com> SLUB has resiliency_test() function which is hidden behind #ifdef SLUB_RESILIENCY_TEST that is not part of Kconfig, so nobody runs it. Kselftest should proper replacement for it. Try changing byte in redzone after allocation and changing pointer to next free node, first byte, 50th byte and redzone byte. Check if validation finds errors. There are several differences from the original resiliency test: Tests create own caches with known state instead of corrupting shared kmalloc caches. The corruption of freepointer uses correct offset, the original resiliency test got broken with freepointer changes. Scratch changing random byte test, because it does not have meaning in this form where we need deterministic results. Add new option CONFIG_TEST_SLUB in Kconfig. Add parameter to function validate_slab_cache() to return number of errors in cache. Signed-off-by: Oliver Glitta <glittao(a)gmail.com> --- lib/Kconfig.debug | 4 + lib/Makefile | 1 + lib/test_slub.c | 125 +++++++++++++++++++++++++++ mm/slab.h | 1 + mm/slub.c | 34 +++++--- tools/testing/selftests/lib/Makefile | 2 +- tools/testing/selftests/lib/config | 1 + tools/testing/selftests/lib/slub.sh | 3 + 8 files changed, 159 insertions(+), 12 deletions(-) create mode 100644 lib/test_slub.c create mode 100755 tools/testing/selftests/lib/slub.sh diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 2779c29d9981..2d56092abbc4 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2123,6 +2123,10 @@ config TEST_KSTRTOX config TEST_PRINTF tristate "Test printf() family of functions at runtime" +config TEST_SLUB + tristate "Test SLUB cache errors at runtime" + depends on SLUB_DEBUG + config TEST_BITMAP tristate "Test bitmap_*() family of functions at runtime" help diff --git a/lib/Makefile b/lib/Makefile index b5307d3eec1a..b6603803b1c4 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -83,6 +83,7 @@ obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o obj-$(CONFIG_TEST_PRINTF) += test_printf.o +obj-$(CONFIG_TEST_SLUB) += test_slub.o obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o obj-$(CONFIG_TEST_STRSCPY) += test_strscpy.o obj-$(CONFIG_TEST_UUID) += test_uuid.o diff --git a/lib/test_slub.c b/lib/test_slub.c new file mode 100644 index 000000000000..0075d9b44251 --- /dev/null +++ b/lib/test_slub.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Test cases for slub facility. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include "../mm/slab.h" + +#include "../tools/testing/selftests/kselftest_module.h" + + +KSTM_MODULE_GLOBALS(); + + +static void __init validate_result(struct kmem_cache *s, int expected_errors) +{ + int errors = 0; + + validate_slab_cache(s, &errors); + KSTM_CHECK_ZERO(errors - expected_errors); +} + +static void __init test_clobber_zone(void) +{ + struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_alloc", 64, 0, + SLAB_RED_ZONE, NULL); + u8 *p = kmem_cache_alloc(s, GFP_KERNEL); + + p[64] = 0x12; + pr_err("1. kmem_cache: Clobber Redzone 0x12->0x%p\n", p + 64); + + validate_result(s, 1); + kmem_cache_free(s, p); + kmem_cache_destroy(s); +} + +static void __init test_next_pointer(void) +{ + struct kmem_cache *s = kmem_cache_create("TestSlub_next_ptr_free", 64, 0, + SLAB_RED_ZONE, NULL); + u8 *p = kmem_cache_alloc(s, GFP_KERNEL); + + kmem_cache_free(s, p); + p[s->offset] = 0x12; + pr_err("1. kmem_cache: Clobber next pointer 0x34 -> -0x%p\n", p); + + validate_result(s, 1); + kmem_cache_destroy(s); +} + +static void __init test_first_word(void) +{ + struct kmem_cache *s = kmem_cache_create("TestSlub_1th_word_free", 64, 0, + SLAB_POISON, NULL); + u8 *p = kmem_cache_alloc(s, GFP_KERNEL); + + kmem_cache_free(s, p); + *p = 0x78; + pr_err("2. kmem_cache: Clobber first word 0x78->0x%p\n", p); + + validate_result(s, 1); + kmem_cache_destroy(s); +} + +static void __init test_clobber_50th_byte(void) +{ + struct kmem_cache *s = kmem_cache_create("TestSlub_50th_word_free", 64, 0, + SLAB_POISON, NULL); + u8 *p = kmem_cache_alloc(s, GFP_KERNEL); + + kmem_cache_free(s, p); + p[50] = 0x9a; + pr_err("3. kmem_cache: Clobber 50th byte 0x9a->0x%p\n", p); + + validate_result(s, 1); + kmem_cache_destroy(s); +} + +static void __init test_clobber_redzone_free(void) +{ + struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_free", 64, 0, + SLAB_RED_ZONE, NULL); + u8 *p = kmem_cache_alloc(s, GFP_KERNEL); + + kmem_cache_free(s, p); + p[64] = 0xab; + pr_err("4. kmem_cache: Clobber redzone 0xab->0x%p\n", p); + + validate_result(s, 1); + kmem_cache_destroy(s); +} + +static void __init resiliency_test(void) +{ + + BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10); + + pr_err("SLUB resiliency testing\n"); + pr_err("-----------------------\n"); + pr_err("A. Corruption after allocation\n"); + + test_clobber_zone(); + + pr_err("\nB. Corruption after free\n"); + + test_next_pointer(); + test_first_word(); + test_clobber_50th_byte(); + test_clobber_redzone_free(); +} + + +static void __init selftest(void) +{ + resiliency_test(); +} + + +KSTM_MODULE_LOADERS(test_slub); +MODULE_LICENSE("GPL"); diff --git a/mm/slab.h b/mm/slab.h index 076582f58f68..5fc18d506b3b 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -215,6 +215,7 @@ DECLARE_STATIC_KEY_TRUE(slub_debug_enabled); DECLARE_STATIC_KEY_FALSE(slub_debug_enabled); #endif extern void print_tracking(struct kmem_cache *s, void *object); +long validate_slab_cache(struct kmem_cache *s, int *errors); #else static inline void print_tracking(struct kmem_cache *s, void *object) { diff --git a/mm/slub.c b/mm/slub.c index e26c274b4657..c00e2b263e03 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4612,7 +4612,8 @@ static int count_total(struct page *page) #endif #ifdef CONFIG_SLUB_DEBUG -static void validate_slab(struct kmem_cache *s, struct page *page) +static void validate_slab(struct kmem_cache *s, struct page *page, + int *errors) { void *p; void *addr = page_address(page); @@ -4620,8 +4621,10 @@ static void validate_slab(struct kmem_cache *s, struct page *page) slab_lock(page); - if (!check_slab(s, page) || !on_freelist(s, page, NULL)) + if (!check_slab(s, page) || !on_freelist(s, page, NULL)) { + *errors += 1; goto unlock; + } /* Now we know that a valid freelist exists */ map = get_map(s, page); @@ -4629,8 +4632,10 @@ static void validate_slab(struct kmem_cache *s, struct page *page) u8 val = test_bit(__obj_to_index(s, addr, p), map) ? SLUB_RED_INACTIVE : SLUB_RED_ACTIVE; - if (!check_object(s, page, p, val)) + if (!check_object(s, page, p, val)) { + *errors += 1; break; + } } put_map(map); unlock: @@ -4638,7 +4643,7 @@ static void validate_slab(struct kmem_cache *s, struct page *page) } static int validate_slab_node(struct kmem_cache *s, - struct kmem_cache_node *n) + struct kmem_cache_node *n, int *errors) { unsigned long count = 0; struct page *page; @@ -4647,30 +4652,34 @@ static int validate_slab_node(struct kmem_cache *s, spin_lock_irqsave(&n->list_lock, flags); list_for_each_entry(page, &n->partial, slab_list) { - validate_slab(s, page); + validate_slab(s, page, errors); count++; } - if (count != n->nr_partial) + if (count != n->nr_partial) { pr_err("SLUB %s: %ld partial slabs counted but counter=%ld\n", s->name, count, n->nr_partial); + *errors += 1; + } if (!(s->flags & SLAB_STORE_USER)) goto out; list_for_each_entry(page, &n->full, slab_list) { - validate_slab(s, page); + validate_slab(s, page, errors); count++; } - if (count != atomic_long_read(&n->nr_slabs)) + if (count != atomic_long_read(&n->nr_slabs)) { pr_err("SLUB: %s %ld slabs counted but counter=%ld\n", s->name, count, atomic_long_read(&n->nr_slabs)); + *errors += 1; + } out: spin_unlock_irqrestore(&n->list_lock, flags); return count; } -static long validate_slab_cache(struct kmem_cache *s) +long validate_slab_cache(struct kmem_cache *s, int *errors) { int node; unsigned long count = 0; @@ -4678,10 +4687,12 @@ static long validate_slab_cache(struct kmem_cache *s) flush_all(s); for_each_kmem_cache_node(s, node, n) - count += validate_slab_node(s, n); + count += validate_slab_node(s, n, errors); return count; } +EXPORT_SYMBOL(validate_slab_cache); + /* * Generate lists of code addresses where slabcache objects are allocated * and freed. @@ -5336,9 +5347,10 @@ static ssize_t validate_store(struct kmem_cache *s, const char *buf, size_t length) { int ret = -EINVAL; + int errors = 0; if (buf[0] == '1') { - ret = validate_slab_cache(s); + ret = validate_slab_cache(s, &errors); if (ret >= 0) ret = length; } diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile index a105f094676e..f168313b7949 100644 --- a/tools/testing/selftests/lib/Makefile +++ b/tools/testing/selftests/lib/Makefile @@ -4,6 +4,6 @@ # No binaries, but make sure arg-less "make" doesn't trigger "run_tests" all: -TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh strscpy.sh +TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh strscpy.sh slub.sh include ../lib.mk diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config index b80ee3f6e265..4190863032e7 100644 --- a/tools/testing/selftests/lib/config +++ b/tools/testing/selftests/lib/config @@ -3,3 +3,4 @@ CONFIG_TEST_BITMAP=m CONFIG_PRIME_NUMBERS=m CONFIG_TEST_STRSCPY=m CONFIG_TEST_BITOPS=m +CONFIG_TEST_SLUB=m \ No newline at end of file diff --git a/tools/testing/selftests/lib/slub.sh b/tools/testing/selftests/lib/slub.sh new file mode 100755 index 000000000000..8b5757702910 --- /dev/null +++ b/tools/testing/selftests/lib/slub.sh @@ -0,0 +1,3 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0+ +$(dirname $0)/../kselftest/module.sh "slub" test_slub -- 2.17.1

4 years, 9 months

4
8
0 0

[PATCH v2 1/2] kvm/kvm_util: add _vm_ioctl

by Emanuele Giuseppe Esposito

As in kvm_ioctl and _kvm_ioctl, add the respective _vm_ioctl for vm_ioctl. _vm_ioctl invokes an ioctl using the vm fd, leaving the caller to test the result. Signed-off-by: Emanuele Giuseppe Esposito <eesposit(a)redhat.com> --- tools/testing/selftests/kvm/include/kvm_util.h | 1 + tools/testing/selftests/kvm/lib/kvm_util.c | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 2d7eb6989e83..d53a5f7cad61 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -133,6 +133,7 @@ void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl, int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl, void *arg); void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); +int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg); void kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); int _kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index e5fbf16f725b..b8849a1aca79 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1697,11 +1697,16 @@ void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) { int ret; - ret = ioctl(vm->fd, cmd, arg); + ret = _vm_ioctl(vm, cmd, arg); TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)", cmd, ret, errno, strerror(errno)); } +int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) +{ + return ioctl(vm->fd, cmd, arg); +} + /* * KVM system ioctl * -- 2.29.2

4 years, 9 months

3
6
0 0

[PATCH] selftests/kvm: add get_msr_index_features

by Emanuele Giuseppe Esposito

Test the KVM_GET_MSR_FEATURE_INDEX_LIST and KVM_GET_MSR_INDEX_LIST ioctls. Signed-off-by: Emanuele Giuseppe Esposito <eesposit(a)redhat.com> --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + .../kvm/x86_64/get_msr_index_features.c | 124 ++++++++++++++++++ 3 files changed, 126 insertions(+) create mode 100644 tools/testing/selftests/kvm/x86_64/get_msr_index_features.c diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 32b87cc77c8e..d99f3969d371 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -5,6 +5,7 @@ /s390x/resets /s390x/sync_regs_test /x86_64/cr4_cpuid_sync_test +/x86_64/get_msr_index_features /x86_64/debug_regs /x86_64/evmcs_test /x86_64/get_cpuid_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index a6d61f451f88..c748b9650e28 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -39,6 +39,7 @@ LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test +TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test TEST_GEN_PROGS_x86_64 += x86_64/get_cpuid_test TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid diff --git a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c new file mode 100644 index 000000000000..ad9972d99dfa --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test that KVM_GET_MSR_INDEX_LIST and + * KVM_GET_MSR_FEATURE_INDEX_LIST work as intended + * + * Copyright (C) 2020, Red Hat, Inc. + */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "../lib/kvm_util_internal.h" + +static int kvm_num_index_msrs(int kvm_fd, int nmsrs) +{ + struct kvm_msr_list *list; + int r; + + list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); + list->nmsrs = nmsrs; + r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list); + TEST_ASSERT(r == -1 && errno == E2BIG, + "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i", + r); + + r = list->nmsrs; + free(list); + return r; +} + +static void test_get_msr_index(void) +{ + int old_res, res, kvm_fd; + + kvm_fd = open(KVM_DEV_PATH, O_RDONLY); + if (kvm_fd < 0) + exit(KSFT_SKIP); + + old_res = kvm_num_index_msrs(kvm_fd, 0); + TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0"); + + if (old_res != 1) { + res = kvm_num_index_msrs(kvm_fd, 1); + TEST_ASSERT(res > 1, "Expecting nmsrs to be > 1"); + TEST_ASSERT(res == old_res, "Expecting nmsrs to be identical"); + } + + close(kvm_fd); +} + +static int kvm_num_feature_msrs(int kvm_fd, int nmsrs) +{ + struct kvm_msr_list *list; + int r; + + list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); + list->nmsrs = nmsrs; + r = ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list); + TEST_ASSERT(r == -1 && errno == E2BIG, + "Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST probe, r: %i", + r); + + r = list->nmsrs; + free(list); + return r; +} + +struct kvm_msr_list *kvm_get_msr_feature_list(int kvm_fd, int nmsrs) +{ + struct kvm_msr_list *list; + int r; + + list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); + list->nmsrs = nmsrs; + r = ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list); + + TEST_ASSERT(r == 0, + "Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST, r: %i", + r); + + return list; +} + +static void test_get_msr_feature(void) +{ + int res, old_res, i, kvm_fd; + struct kvm_msr_list *feature_list; + + kvm_fd = open(KVM_DEV_PATH, O_RDONLY); + if (kvm_fd < 0) + exit(KSFT_SKIP); + + old_res = kvm_num_feature_msrs(kvm_fd, 0); + TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0"); + + if (old_res != 1) { + res = kvm_num_feature_msrs(kvm_fd, 1); + TEST_ASSERT(res > 1, "Expecting nmsrs to be > 1"); + TEST_ASSERT(res == old_res, "Expecting nmsrs to be identical"); + } + + feature_list = kvm_get_msr_feature_list(kvm_fd, old_res); + TEST_ASSERT(old_res == feature_list->nmsrs, + "Unmatching number of msr indexes"); + + for (i = 0; i < feature_list->nmsrs; i++) + kvm_get_feature_msr(feature_list->indices[i]); + + free(feature_list); + close(kvm_fd); +} + +int main(int argc, char *argv[]) +{ + if (kvm_check_cap(KVM_CAP_GET_MSR_FEATURES)) + test_get_msr_feature(); + + test_get_msr_index(); +} -- 2.29.2

4 years, 9 months

4
7
0 0

[PATCH] selftests/sgx: improve error detection and messages

by Dave Hansen

From: Dave Hansen <dave.hansen(a)linux.intel.com> The SGX device file (/dev/sgx_enclave) is unusual in that it requires execute permissions. It has to be both "chmod +x" *and* be on a filesystem without 'noexec'. In the future, udev and systemd should get updates to set up systems automatically. But, for now, nobody's systems do this automatically, and everybody gets error messages like this when running ./test_sgx: 0x0000000000000000 0x0000000000002000 0x03 0x0000000000002000 0x0000000000001000 0x05 0x0000000000003000 0x0000000000003000 0x03 mmap() failed, errno=1. That isn't very user friendly, even for forgetful kernel developers. Further, the test case is rather haphazard about its use of fprintf() versus perror(). Improve the error messages. Use perror() where possible. Lastly, do some sanity checks on opening and mmap()ing the device file so that we can get a decent error message out to the user. Now, if your user doesn't have permission, you'll get the following: $ ls -l /dev/sgx_enclave crw------- 1 root root 10, 126 Mar 18 11:29 /dev/sgx_enclave $ ./test_sgx Unable to open /dev/sgx_enclave: Permission denied If you then 'chown dave:dave /dev/sgx_enclave' (or whatever), but you leave execute permissions off, you'll get: $ ls -l /dev/sgx_enclave crw------- 1 dave dave 10, 126 Mar 18 11:29 /dev/sgx_enclave $ ./test_sgx no execute permissions on device file If you fix that with "chmod ug+x /dev/sgx" but you leave /dev as noexec, you'll get this: $ mount | grep "/dev .*noexec" udev on /dev type devtmpfs (rw,nosuid,noexec,...) $ ./test_sgx ERROR: mmap for exec: Operation not permitted mmap() succeeded for PROT_READ, but failed for PROT_EXEC check that user has execute permissions on /dev/sgx_enclave and that /dev does not have noexec set: 'mount | grep "/dev .*noexec"' That can be fixed with: mount -o remount,noexec /devESC Hopefully, the combination of better error messages and the search engines indexing this message will help people fix their systems until we do this properly. Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com> Cc: Jarkko Sakkinen <jarkko(a)kernel.org> Cc: Shuah Khan <shuah(a)kernel.org> Cc: Borislav Petkov <bp(a)alien8.de> Cc: x86(a)kernel.org Cc: linux-sgx(a)vger.kernel.org Cc: linux-kselftest(a)vger.kernel.org --- b/tools/testing/selftests/sgx/load.c | 66 +++++++++++++++++++++++++++-------- b/tools/testing/selftests/sgx/main.c | 2 - 2 files changed, 53 insertions(+), 15 deletions(-) diff -puN tools/testing/selftests/sgx/load.c~sgx-selftest-err-rework tools/testing/selftests/sgx/load.c --- a/tools/testing/selftests/sgx/load.c~sgx-selftest-err-rework 2021-03-18 12:18:38.649828215 -0700 +++ b/tools/testing/selftests/sgx/load.c 2021-03-18 12:40:46.388824904 -0700 @@ -45,19 +45,19 @@ static bool encl_map_bin(const char *pat fd = open(path, O_RDONLY); if (fd == -1) { - perror("open()"); + perror("enclave executable open()"); return false; } ret = stat(path, &sb); if (ret) { - perror("stat()"); + perror("enclave executable stat()"); goto err; } bin = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (bin == MAP_FAILED) { - perror("mmap()"); + perror("enclave executable mmap()"); goto err; } @@ -90,8 +90,7 @@ static bool encl_ioc_create(struct encl ioc.src = (unsigned long)secs; rc = ioctl(encl->fd, SGX_IOC_ENCLAVE_CREATE, &ioc); if (rc) { - fprintf(stderr, "SGX_IOC_ENCLAVE_CREATE failed: errno=%d\n", - errno); + perror("SGX_IOC_ENCLAVE_CREATE failed"); munmap((void *)secs->base, encl->encl_size); return false; } @@ -116,31 +115,69 @@ static bool encl_ioc_add_pages(struct en rc = ioctl(encl->fd, SGX_IOC_ENCLAVE_ADD_PAGES, &ioc); if (rc < 0) { - fprintf(stderr, "SGX_IOC_ENCLAVE_ADD_PAGES failed: errno=%d.\n", - errno); + perror("SGX_IOC_ENCLAVE_ADD_PAGES failed"); return false; } return true; } + + bool encl_load(const char *path, struct encl *encl) { + const char device_path[] = "/dev/sgx_enclave"; Elf64_Phdr *phdr_tbl; off_t src_offset; Elf64_Ehdr *ehdr; + struct stat sb; + void *ptr; int i, j; int ret; + int fd = -1; memset(encl, 0, sizeof(*encl)); - ret = open("/dev/sgx_enclave", O_RDWR); - if (ret < 0) { - fprintf(stderr, "Unable to open /dev/sgx_enclave\n"); + fd = open(device_path, O_RDWR); + if (fd < 0) { + perror("Unable to open /dev/sgx_enclave"); + goto err; + } + + ret = stat(device_path, &sb); + if (ret) { + perror("device file stat()"); + goto err; + } + + /* + * This just checks if the /dev file has these permission + * bits set. It does not check that the current user is + * the owner or in the owning group. + */ + if (!(sb.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH))) { + fprintf(stderr, "no execute permissions on device file\n"); + goto err; + } + + ptr = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, fd, 0); + if (ptr == (void *)-1) { + perror("mmap for read"); + goto err; + } + munmap(ptr, PAGE_SIZE); + + ptr = mmap(NULL, PAGE_SIZE, PROT_EXEC, MAP_SHARED, fd, 0); + if (ptr == (void *)-1) { + perror("ERROR: mmap for exec"); + fprintf(stderr, "mmap() succeeded for PROT_READ, but failed for PROT_EXEC\n"); + fprintf(stderr, "check that user has execute permissions on %s and\n", device_path); + fprintf(stderr, "that /dev does not have noexec set: 'mount | grep \"/dev .*noexec\"'\n"); goto err; } + munmap(ptr, PAGE_SIZE); - encl->fd = ret; + encl->fd = fd; if (!encl_map_bin(path, encl)) goto err; @@ -217,6 +254,8 @@ bool encl_load(const char *path, struct return true; err: + if (fd != -1) + close(fd); encl_delete(encl); return false; } @@ -229,7 +268,7 @@ static bool encl_map_area(struct encl *e area = mmap(NULL, encl_size * 2, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (area == MAP_FAILED) { - perror("mmap"); + perror("reservation mmap()"); return false; } @@ -268,8 +307,7 @@ bool encl_build(struct encl *encl) ioc.sigstruct = (uint64_t)&encl->sigstruct; ret = ioctl(encl->fd, SGX_IOC_ENCLAVE_INIT, &ioc); if (ret) { - fprintf(stderr, "SGX_IOC_ENCLAVE_INIT failed: errno=%d\n", - errno); + perror("SGX_IOC_ENCLAVE_INIT failed"); return false; } diff -puN tools/testing/selftests/sgx/main.c~sgx-selftest-err-rework tools/testing/selftests/sgx/main.c --- a/tools/testing/selftests/sgx/main.c~sgx-selftest-err-rework 2021-03-18 12:18:38.652828215 -0700 +++ b/tools/testing/selftests/sgx/main.c 2021-03-18 12:18:38.657828215 -0700 @@ -195,7 +195,7 @@ int main(int argc, char *argv[], char *e addr = mmap((void *)encl.encl_base + seg->offset, seg->size, seg->prot, MAP_SHARED | MAP_FIXED, encl.fd, 0); if (addr == MAP_FAILED) { - fprintf(stderr, "mmap() failed, errno=%d.\n", errno); + perror("mmap() segment failed"); exit(KSFT_FAIL); } } _

4 years, 9 months

2
1
0 0

[PATCH v4 1/2] mm: huge_memory: a new debugfs interface for splitting THP tests.

by Zi Yan

From: Zi Yan <ziy(a)nvidia.com> We did not have a direct user interface of splitting the compound page backing a THP and there is no need unless we want to expose the THP implementation details to users. Make <debugfs>/split_huge_pages accept a new command to do that. By writing "<pid>,<vaddr_start>,<vaddr_end>" to <debugfs>/split_huge_pages, THPs within the given virtual address range from the process with the given pid are split. It is used to test split_huge_page function. In addition, a selftest program is added to tools/testing/selftests/vm to utilize the interface by splitting PMD THPs and PTE-mapped THPs. This does not change the old behavior, i.e., writing 1 to the interface to split all THPs in the system. Changelog: >From v3: 1. Factored out split huge pages in the given pid code to a separate function. 2. Added the missing put_page for not split pages. 3. pr_debug -> pr_info, make reading results simpler. >From v2: 1. Reused existing <debugfs>/split_huge_pages interface. (suggested by Yang Shi) >From v1: 1. Removed unnecessary calling to vma_migratable, spotted by kernel test robot <lkp(a)intel.com>. 2. Dropped the use of find_mm_struct and code it directly, since there is no need for the permission check in that function and the function is only available when migration is on. 3. Added some comments in the selftest program to clarify how PTE-mapped THPs are formed. Signed-off-by: Zi Yan <ziy(a)nvidia.com> --- mm/huge_memory.c | 136 +++++++- tools/testing/selftests/vm/.gitignore | 1 + tools/testing/selftests/vm/Makefile | 1 + .../selftests/vm/split_huge_page_test.c | 313 ++++++++++++++++++ 4 files changed, 444 insertions(+), 7 deletions(-) create mode 100644 tools/testing/selftests/vm/split_huge_page_test.c diff --git a/mm/huge_memory.c b/mm/huge_memory.c index bff92dea5ab3..3bfee54e2cd0 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -7,6 +7,7 @@ #include <linux/mm.h> #include <linux/sched.h> +#include <linux/sched/mm.h> #include <linux/sched/coredump.h> #include <linux/sched/numa_balancing.h> #include <linux/highmem.h> @@ -2922,16 +2923,14 @@ static struct shrinker deferred_split_shrinker = { }; #ifdef CONFIG_DEBUG_FS -static int split_huge_pages_set(void *data, u64 val) +static void split_huge_pages_all(void) { struct zone *zone; struct page *page; unsigned long pfn, max_zone_pfn; unsigned long total = 0, split = 0; - if (val != 1) - return -EINVAL; - + pr_info("Split all THPs\n"); for_each_populated_zone(zone) { max_zone_pfn = zone_end_pfn(zone); for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) { @@ -2959,11 +2958,134 @@ static int split_huge_pages_set(void *data, u64 val) } pr_info("%lu of %lu THP split\n", split, total); +} - return 0; +static int split_huge_pages_pid(int pid, unsigned long vaddr_start, + unsigned long vaddr_end) +{ + int ret = 0; + struct task_struct *task; + struct mm_struct *mm; + unsigned long total = 0, split = 0; + unsigned long addr; + + vaddr_start &= PAGE_MASK; + vaddr_end &= PAGE_MASK; + + /* Find the task_struct from pid */ + rcu_read_lock(); + task = find_task_by_vpid(pid); + if (!task) { + rcu_read_unlock(); + ret = -ESRCH; + goto out; + } + get_task_struct(task); + rcu_read_unlock(); + + /* Find the mm_struct */ + mm = get_task_mm(task); + put_task_struct(task); + + if (!mm) { + ret = -EINVAL; + goto out; + } + + pr_info("Split huge pages in pid: %d, vaddr: [0x%lx - 0x%lx]\n", + pid, vaddr_start, vaddr_end); + + mmap_read_lock(mm); + /* + * always increase addr by PAGE_SIZE, since we could have a PTE page + * table filled with PTE-mapped THPs, each of which is distinct. + */ + for (addr = vaddr_start; addr < vaddr_end; addr += PAGE_SIZE) { + struct vm_area_struct *vma = find_vma(mm, addr); + unsigned int follflags; + struct page *page; + + if (!vma || addr < vma->vm_start) + break; + + /* FOLL_DUMP to ignore special (like zero) pages */ + follflags = FOLL_GET | FOLL_DUMP; + page = follow_page(vma, addr, follflags); + + if (IS_ERR(page)) + break; + if (!page) + break; + + if (!is_transparent_hugepage(page)) + goto next; + + total++; + if (!can_split_huge_page(compound_head(page), NULL)) + goto next; + + if (!trylock_page(page)) + goto next; + + if (!split_huge_page(page)) + split++; + + unlock_page(page); +next: + put_page(page); + } + mmap_read_unlock(mm); + mmput(mm); + + pr_info("%lu of %lu THP split\n", split, total); + +out: + return ret; } -DEFINE_DEBUGFS_ATTRIBUTE(split_huge_pages_fops, NULL, split_huge_pages_set, - "%llu\n"); + +static ssize_t split_huge_pages_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppops) +{ + static DEFINE_MUTEX(mutex); + ssize_t ret; + char input_buf[80]; /* hold pid, start_vaddr, end_vaddr */ + int pid; + unsigned long vaddr_start, vaddr_end; + + ret = mutex_lock_interruptible(&mutex); + if (ret) + return ret; + + ret = -EFAULT; + + memset(input_buf, 0, 80); + if (copy_from_user(input_buf, buf, min_t(size_t, count, 80))) + goto out; + + input_buf[79] = '\0'; + ret = sscanf(input_buf, "%d,0x%lx,0x%lx", &pid, &vaddr_start, &vaddr_end); + if (ret == 1 && pid == 1) { + split_huge_pages_all(); + ret = strlen(input_buf); + goto out; + } else if (ret != 3) { + ret = -EINVAL; + goto out; + } + + if (!split_huge_pages_pid(pid, vaddr_start, vaddr_end)) + ret = strlen(input_buf); +out: + mutex_unlock(&mutex); + return ret; + +} + +static const struct file_operations split_huge_pages_fops = { + .owner = THIS_MODULE, + .write = split_huge_pages_write, + .llseek = no_llseek, +}; static int __init split_huge_pages_debugfs(void) { diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index 9a35c3f6a557..1f651e85ed60 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -22,3 +22,4 @@ map_fixed_noreplace write_to_hugetlbfs hmm-tests local_config.* +split_huge_page_test diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index d42115e4284d..4cbc91d6869f 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -42,6 +42,7 @@ TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += userfaultfd +TEST_GEN_FILES += split_huge_page_test ifeq ($(MACHINE),x86_64) CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32) diff --git a/tools/testing/selftests/vm/split_huge_page_test.c b/tools/testing/selftests/vm/split_huge_page_test.c new file mode 100644 index 000000000000..9f33ddbb3182 --- /dev/null +++ b/tools/testing/selftests/vm/split_huge_page_test.c @@ -0,0 +1,313 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * A test of splitting PMD THPs and PTE-mapped THPs from a specified virtual + * address range in a process via <debugfs>/split_huge_pages interface. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <inttypes.h> +#include <string.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <malloc.h> +#include <stdbool.h> + +uint64_t pagesize; +unsigned int pageshift; +uint64_t pmd_pagesize; + +#define PMD_SIZE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size" +#define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages" +#define SMAP_PATH "/proc/self/smaps" +#define INPUT_MAX 80 + +#define PFN_MASK ((1UL<<55)-1) +#define KPF_THP (1UL<<22) + +int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file) +{ + uint64_t paddr; + uint64_t page_flags; + + if (pagemap_file) { + pread(pagemap_file, &paddr, sizeof(paddr), + ((long)vaddr >> pageshift) * sizeof(paddr)); + + if (kpageflags_file) { + pread(kpageflags_file, &page_flags, sizeof(page_flags), + (paddr & PFN_MASK) * sizeof(page_flags)); + + return !!(page_flags & KPF_THP); + } + } + return 0; +} + + +static uint64_t read_pmd_pagesize(void) +{ + int fd; + char buf[20]; + ssize_t num_read; + + fd = open(PMD_SIZE_PATH, O_RDONLY); + if (fd == -1) { + perror("Open hpage_pmd_size failed"); + exit(EXIT_FAILURE); + } + num_read = read(fd, buf, 19); + if (num_read < 1) { + close(fd); + perror("Read hpage_pmd_size failed"); + exit(EXIT_FAILURE); + } + buf[num_read] = '\0'; + close(fd); + + return strtoul(buf, NULL, 10); +} + +static int write_file(const char *path, const char *buf, size_t buflen) +{ + int fd; + ssize_t numwritten; + + fd = open(path, O_WRONLY); + if (fd == -1) + return 0; + + numwritten = write(fd, buf, buflen - 1); + close(fd); + if (numwritten < 1) + return 0; + + return (unsigned int) numwritten; +} + +static void write_debugfs(int pid, uint64_t vaddr_start, uint64_t vaddr_end) +{ + char input[INPUT_MAX]; + int ret; + + ret = snprintf(input, INPUT_MAX, "%d,0x%lx,0x%lx", pid, vaddr_start, + vaddr_end); + if (ret >= INPUT_MAX) { + printf("%s: Debugfs input is too long\n", __func__); + exit(EXIT_FAILURE); + } + + if (!write_file(SPLIT_DEBUGFS, input, ret + 1)) { + perror(SPLIT_DEBUGFS); + exit(EXIT_FAILURE); + } +} + +#define MAX_LINE_LENGTH 500 + +static bool check_for_pattern(FILE *fp, const char *pattern, char *buf) +{ + while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) { + if (!strncmp(buf, pattern, strlen(pattern))) + return true; + } + return false; +} + +static uint64_t check_huge(void *addr) +{ + uint64_t thp = 0; + int ret; + FILE *fp; + char buffer[MAX_LINE_LENGTH]; + char addr_pattern[MAX_LINE_LENGTH]; + + ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-", + (unsigned long) addr); + if (ret >= MAX_LINE_LENGTH) { + printf("%s: Pattern is too long\n", __func__); + exit(EXIT_FAILURE); + } + + + fp = fopen(SMAP_PATH, "r"); + if (!fp) { + printf("%s: Failed to open file %s\n", __func__, SMAP_PATH); + exit(EXIT_FAILURE); + } + if (!check_for_pattern(fp, addr_pattern, buffer)) + goto err_out; + + /* + * Fetch the AnonHugePages: in the same block and check the number of + * hugepages. + */ + if (!check_for_pattern(fp, "AnonHugePages:", buffer)) + goto err_out; + + if (sscanf(buffer, "AnonHugePages:%10ld kB", &thp) != 1) { + printf("Reading smap error\n"); + exit(EXIT_FAILURE); + } + +err_out: + fclose(fp); + return thp; +} + +void split_pmd_thp(void) +{ + char *one_page; + size_t len = 4 * pmd_pagesize; + uint64_t thp_size; + size_t i; + + one_page = memalign(pmd_pagesize, len); + + madvise(one_page, len, MADV_HUGEPAGE); + + for (i = 0; i < len; i++) + one_page[i] = (char)i; + + thp_size = check_huge(one_page); + if (!thp_size) { + printf("No THP is allocatd"); + exit(EXIT_FAILURE); + } + + /* split all THPs */ + write_debugfs(getpid(), (uint64_t)one_page, (uint64_t)one_page + len); + + for (i = 0; i < len; i++) + if (one_page[i] != (char)i) { + printf("%ld byte corrupted\n", i); + exit(EXIT_FAILURE); + } + + + thp_size = check_huge(one_page); + if (thp_size) { + printf("Still %ld kB AnonHugePages not split\n", thp_size); + exit(EXIT_FAILURE); + } + + printf("Split huge pages successful\n"); + free(one_page); +} + +void split_pte_mapped_thp(void) +{ + char *one_page, *pte_mapped, *pte_mapped2; + size_t len = 4 * pmd_pagesize; + uint64_t thp_size; + size_t i; + const char *pagemap_template = "/proc/%d/pagemap"; + const char *kpageflags_proc = "/proc/kpageflags"; + char pagemap_proc[255]; + int pagemap_fd; + int kpageflags_fd; + + if (snprintf(pagemap_proc, 255, pagemap_template, getpid()) < 0) { + perror("get pagemap proc error"); + exit(EXIT_FAILURE); + } + pagemap_fd = open(pagemap_proc, O_RDONLY); + + if (pagemap_fd == -1) { + perror("read pagemap:"); + exit(EXIT_FAILURE); + } + + kpageflags_fd = open(kpageflags_proc, O_RDONLY); + + if (kpageflags_fd == -1) { + perror("read kpageflags:"); + exit(EXIT_FAILURE); + } + + one_page = mmap((void *)(1UL << 30), len, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + + madvise(one_page, len, MADV_HUGEPAGE); + + for (i = 0; i < len; i++) + one_page[i] = (char)i; + + thp_size = check_huge(one_page); + if (!thp_size) { + printf("No THP is allocatd"); + exit(EXIT_FAILURE); + } + + /* remap the first pagesize of first THP */ + pte_mapped = mremap(one_page, pagesize, pagesize, MREMAP_MAYMOVE); + + /* remap the Nth pagesize of Nth THP */ + for (i = 1; i < 4; i++) { + pte_mapped2 = mremap(one_page + pmd_pagesize * i + pagesize * i, + pagesize, pagesize, + MREMAP_MAYMOVE|MREMAP_FIXED, + pte_mapped + pagesize * i); + if (pte_mapped2 == (char *)-1) { + perror("mremap failed"); + exit(EXIT_FAILURE); + } + } + + /* smap does not show THPs after mremap, use kpageflags instead */ + thp_size = 0; + for (i = 0; i < pagesize * 4; i++) + if (i % pagesize == 0 && + is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd)) + thp_size++; + + if (thp_size != 4) { + printf("Some THPs are missing during mremap\n"); + exit(EXIT_FAILURE); + } + + /* split all remapped THPs */ + write_debugfs(getpid(), (uint64_t)pte_mapped, + (uint64_t)pte_mapped + pagesize * 4); + + /* smap does not show THPs after mremap, use kpageflags instead */ + thp_size = 0; + for (i = 0; i < pagesize * 4; i++) { + if (pte_mapped[i] != (char)i) { + printf("%ld byte corrupted\n", i); + exit(EXIT_FAILURE); + } + if (i % pagesize == 0 && + is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd)) + thp_size++; + } + + if (thp_size) { + printf("Still %ld THPs not split\n", thp_size); + exit(EXIT_FAILURE); + } + + printf("Split PTE-mapped huge pages successful\n"); + munmap(one_page, len); + close(pagemap_fd); + close(kpageflags_fd); +} + +int main(int argc, char **argv) +{ + if (geteuid() != 0) { + printf("Please run the benchmark as root\n"); + exit(EXIT_FAILURE); + } + + pagesize = getpagesize(); + pageshift = ffs(pagesize) - 1; + pmd_pagesize = read_pmd_pagesize(); + + split_pmd_thp(); + split_pte_mapped_thp(); + + return 0; +} -- 2.30.1

4 years, 9 months

4
8
0 0

[PATCH] selftests/kvm: add test for KVM_GET_MSR_FEATURE_INDEX_LIST

by Emanuele Giuseppe Esposito

Extend the kvm_get_feature_msr function to cover also KVM_GET_MSR_FEATURE_INDEX_LIST. Signed-off-by: Emanuele Giuseppe Esposito <eesposit(a)redhat.com> --- tools/testing/selftests/kvm/lib/x86_64/processor.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index a8906e60a108..3eaa6b0172a9 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -688,13 +688,20 @@ uint64_t kvm_get_feature_msr(uint64_t msr_index) struct kvm_msr_entry entry; } buffer = {}; int r, kvm_fd; + struct kvm_msr_list features_list; buffer.header.nmsrs = 1; buffer.entry.index = msr_index; + features_list.nmsrs = 1; + kvm_fd = open(KVM_DEV_PATH, O_RDONLY); if (kvm_fd < 0) exit(KSFT_SKIP); + r = ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, &features_list); + TEST_ASSERT(r < 0 && r != -E2BIG, "KVM_GET_MSR_FEATURE_INDEX_LIST IOCTL failed,\n" + " rc: %i errno: %i", r, errno); + r = ioctl(kvm_fd, KVM_GET_MSRS, &buffer.header); TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n" " rc: %i errno: %i", r, errno); -- 2.29.2

4 years, 9 months

3
3
0 0

[PATCH v1 5/5] selftests/vm: add test for MADV_POPULATE_(READ|WRITE)

by David Hildenbrand

Let's add a simple test for MADV_POPULATE_READ and MADV_POPULATE_WRITE, verifying some error handling, that population works, and that softdirty tracking works as expected. For now, limit the test to private anonymous memory. Cc: Andrew Morton <akpm(a)linux-foundation.org> Cc: Arnd Bergmann <arnd(a)arndb.de> Cc: Michal Hocko <mhocko(a)suse.com> Cc: Oscar Salvador <osalvador(a)suse.de> Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org> Cc: Andrea Arcangeli <aarcange(a)redhat.com> Cc: Minchan Kim <minchan(a)kernel.org> Cc: Jann Horn <jannh(a)google.com> Cc: Jason Gunthorpe <jgg(a)ziepe.ca> Cc: Dave Hansen <dave.hansen(a)intel.com> Cc: Hugh Dickins <hughd(a)google.com> Cc: Rik van Riel <riel(a)surriel.com> Cc: Michael S. Tsirkin <mst(a)redhat.com> Cc: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com> Cc: Vlastimil Babka <vbabka(a)suse.cz> Cc: Richard Henderson <rth(a)twiddle.net> Cc: Ivan Kokshaysky <ink(a)jurassic.park.msu.ru> Cc: Matt Turner <mattst88(a)gmail.com> Cc: Thomas Bogendoerfer <tsbogend(a)alpha.franken.de> Cc: "James E.J. Bottomley" <James.Bottomley(a)HansenPartnership.com> Cc: Helge Deller <deller(a)gmx.de> Cc: Chris Zankel <chris(a)zankel.net> Cc: Max Filippov <jcmvbkbc(a)gmail.com> Cc: Mike Kravetz <mike.kravetz(a)oracle.com> Cc: Peter Xu <peterx(a)redhat.com> Cc: Rolf Eike Beer <eike-kernel(a)sf-tec.de> Cc: Shuah Khan <shuah(a)kernel.org> Cc: linux-alpha(a)vger.kernel.org Cc: linux-mips(a)vger.kernel.org Cc: linux-parisc(a)vger.kernel.org Cc: linux-xtensa(a)linux-xtensa.org Cc: linux-arch(a)vger.kernel.org Cc: linux-kselftest(a)vger.kernel.org Cc: Linux API <linux-api(a)vger.kernel.org> Signed-off-by: David Hildenbrand <david(a)redhat.com> --- tools/testing/selftests/vm/.gitignore | 1 + tools/testing/selftests/vm/Makefile | 1 + tools/testing/selftests/vm/madv_populate.c | 342 +++++++++++++++++++++ tools/testing/selftests/vm/run_vmtests.sh | 16 + 4 files changed, 360 insertions(+) create mode 100644 tools/testing/selftests/vm/madv_populate.c diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index b4fc0148360e..c9a5dd1adf7d 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -24,3 +24,4 @@ hmm-tests local_config.* protection_keys_32 protection_keys_64 +madv_populate diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index d42115e4284d..4851f3f84575 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -42,6 +42,7 @@ TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += userfaultfd +TEST_GEN_FILES += madv_populate ifeq ($(MACHINE),x86_64) CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32) diff --git a/tools/testing/selftests/vm/madv_populate.c b/tools/testing/selftests/vm/madv_populate.c new file mode 100644 index 000000000000..b959e4ebdad4 --- /dev/null +++ b/tools/testing/selftests/vm/madv_populate.c @@ -0,0 +1,342 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * MADV_POPULATE_READ and MADV_POPULATE_WRITE tests + * + * Copyright 2021, Red Hat, Inc. + * + * Author(s): David Hildenbrand <david(a)redhat.com> + */ +#define _GNU_SOURCE +#include <stdlib.h> +#include <string.h> +#include <stdbool.h> +#include <stdint.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <sys/mman.h> + +#include "../kselftest.h" + +#if defined(MADV_POPULATE_READ) && defined(MADV_POPULATE_WRITE) + +/* + * For now, we're using 2 MiB of private anonymous memory for all tests. + */ +#define SIZE (2 * 1024 * 1024) + +static size_t pagesize; + +static uint64_t pagemap_get_entry(int fd, char *start) +{ + const unsigned long pfn = (unsigned long)start / pagesize; + uint64_t entry; + int ret; + + ret = pread(fd, &entry, sizeof(entry), pfn * sizeof(entry)); + if (ret != sizeof(entry)) + ksft_exit_fail_msg("reading pagemap failed\n"); + return entry; +} + +static bool pagemap_is_populated(int fd, char *start) +{ + uint64_t entry = pagemap_get_entry(fd, start); + + /* Present or swapped. */ + return entry & 0xc000000000000000ull; +} + +static bool pagemap_is_softdirty(int fd, char *start) +{ + uint64_t entry = pagemap_get_entry(fd, start); + + return entry & 0x0080000000000000ull; +} + +static void sense_support(void) +{ + char *addr; + int ret; + + addr = mmap(0, pagesize, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + if (!addr) + ksft_exit_fail_msg("mmap failed\n"); + + ret = madvise(addr, pagesize, MADV_POPULATE_READ); + if (ret) + ksft_exit_skip("MADV_POPULATE_READ is not available\n"); + + ret = madvise(addr, pagesize, MADV_POPULATE_WRITE); + if (ret) + ksft_exit_skip("MADV_POPULATE_WRITE is not available\n"); + + munmap(addr, pagesize); +} + +static void test_prot_read(void) +{ + char *addr; + int ret; + + ksft_print_msg("[RUN] %s\n", __func__); + + addr = mmap(0, SIZE, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + if (addr == MAP_FAILED) + ksft_exit_fail_msg("mmap failed\n"); + + ret = madvise(addr, SIZE, MADV_POPULATE_READ); + ksft_test_result(!ret, "MADV_POPULATE_READ with PROT_READ\n"); + + ret = madvise(addr, SIZE, MADV_POPULATE_WRITE); + ksft_test_result(ret == -1 && errno == EINVAL, + "MADV_POPULATE_WRITE with PROT_READ\n"); + + munmap(addr, SIZE); +} + +static void test_prot_write(void) +{ + char *addr; + int ret; + + ksft_print_msg("[RUN] %s\n", __func__); + + addr = mmap(0, SIZE, PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + if (addr == MAP_FAILED) + ksft_exit_fail_msg("mmap failed\n"); + + ret = madvise(addr, SIZE, MADV_POPULATE_READ); + ksft_test_result(ret == -1 && errno == EINVAL, + "MADV_POPULATE_READ with PROT_WRITE\n"); + + ret = madvise(addr, SIZE, MADV_POPULATE_WRITE); + ksft_test_result(!ret, "MADV_POPULATE_WRITE with PROT_WRITE\n"); + + munmap(addr, SIZE); +} + +static void test_holes(void) +{ + char *addr; + int ret; + + ksft_print_msg("[RUN] %s\n", __func__); + + addr = mmap(0, SIZE, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + if (addr == MAP_FAILED) + ksft_exit_fail_msg("mmap failed\n"); + ret = munmap(addr + pagesize, pagesize); + if (ret) + ksft_exit_fail_msg("munmap failed\n"); + + /* Hole in the middle */ + ret = madvise(addr, SIZE, MADV_POPULATE_READ); + ksft_test_result(ret == -1 && errno == ENOMEM, + "MADV_POPULATE_READ with holes in the middle\n"); + ret = madvise(addr, SIZE, MADV_POPULATE_WRITE); + ksft_test_result(ret == -1 && errno == ENOMEM, + "MADV_POPULATE_WRITE with holes in the middle\n"); + + /* Hole at end */ + ret = madvise(addr, 2 * pagesize, MADV_POPULATE_READ); + ksft_test_result(ret == -1 && errno == ENOMEM, + "MADV_POPULATE_READ with holes at the end\n"); + ret = madvise(addr, 2 * pagesize, MADV_POPULATE_WRITE); + ksft_test_result(ret == -1 && errno == ENOMEM, + "MADV_POPULATE_WRITE with holes at the end\n"); + + /* Hole at beginning */ + ret = madvise(addr + pagesize, pagesize, MADV_POPULATE_READ); + ksft_test_result(ret == -1 && errno == ENOMEM, + "MADV_POPULATE_READ with holes at the beginning\n"); + ret = madvise(addr + pagesize, pagesize, MADV_POPULATE_WRITE); + ksft_test_result(ret == -1 && errno == ENOMEM, + "MADV_POPULATE_WRITE with holes at the beginning\n"); + + munmap(addr, SIZE); +} + +static bool range_is_populated(char *start, ssize_t size) +{ + int fd = open("/proc/self/pagemap", O_RDONLY); + bool ret = true; + + if (fd < 0) + ksft_exit_fail_msg("opening pagemap failed\n"); + for (; size > 0 && ret; size -= pagesize, start += pagesize) + if (!pagemap_is_populated(fd, start)) + ret = false; + close(fd); + return ret; +} + +static bool range_is_not_populated(char *start, ssize_t size) +{ + int fd = open("/proc/self/pagemap", O_RDONLY); + bool ret = true; + + if (fd < 0) + ksft_exit_fail_msg("opening pagemap failed\n"); + for (; size > 0 && ret; size -= pagesize, start += pagesize) + if (pagemap_is_populated(fd, start)) + ret = false; + close(fd); + return ret; +} + +static void test_populate_read(void) +{ + char *addr; + int ret; + + ksft_print_msg("[RUN] %s\n", __func__); + + addr = mmap(0, SIZE, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + if (addr == MAP_FAILED) + ksft_exit_fail_msg("mmap failed\n"); + ksft_test_result(range_is_not_populated(addr, SIZE), + "range initially not populated\n"); + + ret = madvise(addr, SIZE, MADV_POPULATE_READ); + ksft_test_result(!ret, "MADV_POPULATE_READ\n"); + ksft_test_result(range_is_populated(addr, SIZE), + "range is populated\n"); + + munmap(addr, SIZE); +} + +static void test_populate_write(void) +{ + char *addr; + int ret; + + ksft_print_msg("[RUN] %s\n", __func__); + + addr = mmap(0, SIZE, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + if (addr == MAP_FAILED) + ksft_exit_fail_msg("mmap failed\n"); + ksft_test_result(range_is_not_populated(addr, SIZE), + "range initially not populated\n"); + + ret = madvise(addr, SIZE, MADV_POPULATE_WRITE); + ksft_test_result(!ret, "MADV_POPULATE_WRITE\n"); + ksft_test_result(range_is_populated(addr, SIZE), + "range is populated\n"); + + munmap(addr, SIZE); +} + +static bool range_is_softdirty(char *start, ssize_t size) +{ + int fd = open("/proc/self/pagemap", O_RDONLY); + bool ret = true; + + if (fd < 0) + ksft_exit_fail_msg("opening pagemap failed\n"); + for (; size > 0 && ret; size -= pagesize, start += pagesize) + if (!pagemap_is_softdirty(fd, start)) + ret = false; + close(fd); + return ret; +} + +static bool range_is_not_softdirty(char *start, ssize_t size) +{ + int fd = open("/proc/self/pagemap", O_RDONLY); + bool ret = true; + + if (fd < 0) + ksft_exit_fail_msg("opening pagemap failed\n"); + for (; size > 0 && ret; size -= pagesize, start += pagesize) + if (pagemap_is_softdirty(fd, start)) + ret = false; + close(fd); + return ret; +} + +static void clear_softdirty(void) +{ + int fd = open("/proc/self/clear_refs", O_WRONLY); + const char *ctrl = "4"; + int ret; + + if (fd < 0) + ksft_exit_fail_msg("opening clear_refs failed\n"); + ret = write(fd, ctrl, strlen(ctrl)); + if (ret != strlen(ctrl)) + ksft_exit_fail_msg("writing clear_refs failed\n"); + close(fd); +} + +static void test_softdirty(void) +{ + char *addr; + int ret; + + ksft_print_msg("[RUN] %s\n", __func__); + + addr = mmap(0, SIZE, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + if (addr == MAP_FAILED) + ksft_exit_fail_msg("mmap failed\n"); + + /* Clear any softdirty bits. */ + clear_softdirty(); + ksft_test_result(range_is_not_softdirty(addr, SIZE), + "range is not softdirty\n"); + + /* Populating READ should set softdirty. */ + ret = madvise(addr, SIZE, MADV_POPULATE_READ); + ksft_test_result(!ret, "MADV_POPULATE_READ\n"); + ksft_test_result(range_is_not_softdirty(addr, SIZE), + "range is not softdirty\n"); + + /* Populating WRITE should set softdirty. */ + ret = madvise(addr, SIZE, MADV_POPULATE_WRITE); + ksft_test_result(!ret, "MADV_POPULATE_WRITE\n"); + ksft_test_result(range_is_softdirty(addr, SIZE), + "range is softdirty\n"); + + munmap(addr, SIZE); +} + +int main(int argc, char **argv) +{ + int err; + + pagesize = getpagesize(); + + ksft_print_header(); + ksft_set_plan(21); + + sense_support(); + test_prot_read(); + test_prot_write(); + test_holes(); + test_populate_read(); + test_populate_write(); + test_softdirty(); + + err = ksft_get_fail_cnt(); + if (err) + ksft_exit_fail_msg("%d out of %d tests failed\n", + err, ksft_test_num()); + return ksft_exit_pass(); +} + +#else /* defined(MADV_POPULATE_READ) && defined(MADV_POPULATE_WRITE) */ + +#warning "missing MADV_POPULATE_READ or MADV_POPULATE_WRITE definition" + +int main(int argc, char **argv) +{ + ksft_print_header(); + ksft_exit_skip("MADV_POPULATE_READ or MADV_POPULATE_WRITE not defined\n"); +} + +#endif /* defined(MADV_POPULATE_READ) && defined(MADV_POPULATE_WRITE) */ diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh index e953f3cd9664..955782d138ab 100755 --- a/tools/testing/selftests/vm/run_vmtests.sh +++ b/tools/testing/selftests/vm/run_vmtests.sh @@ -346,4 +346,20 @@ else exitcode=1 fi +echo "--------------------------------------------------------" +echo "running MADV_POPULATE_READ and MADV_POPULATE_WRITE tests" +echo "--------------------------------------------------------" +./madv_populate +ret_val=$? + +if [ $ret_val -eq 0 ]; then + echo "[PASS]" +elif [ $ret_val -eq $ksft_skip ]; then + echo "[SKIP]" + exitcode=$ksft_skip +else + echo "[FAIL]" + exitcode=1 +fi + exit $exitcode -- 2.29.2

4 years, 9 months

1
0
0 0

[PATCH v1 4/5] selftests/vm: add protection_keys_32 / protection_keys_64 to gitignore

by David Hildenbrand

We missed to add two binaries to gitignore. Cc: Andrew Morton <akpm(a)linux-foundation.org> Cc: Michal Hocko <mhocko(a)suse.com> Cc: Oscar Salvador <osalvador(a)suse.de> Cc: Jason Gunthorpe <jgg(a)ziepe.ca> Cc: Peter Xu <peterx(a)redhat.com> Cc: Ram Pai <linuxram(a)us.ibm.com> Cc: Shuah Khan <shuah(a)kernel.org> Cc: linux-kselftest(a)vger.kernel.org Signed-off-by: David Hildenbrand <david(a)redhat.com> --- tools/testing/selftests/vm/.gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index 9a35c3f6a557..b4fc0148360e 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -22,3 +22,5 @@ map_fixed_noreplace write_to_hugetlbfs hmm-tests local_config.* +protection_keys_32 +protection_keys_64 -- 2.29.2

4 years, 9 months

1
0
0 0

[PATCH v1 3/5] MAINTAINERS: add tools/testing/selftests/vm/ to MEMORY MANAGEMENT

by David Hildenbrand

MEMORY MANAGEMENT seems to be a good fit. Cc: Andrew Morton <akpm(a)linux-foundation.org> Cc: Michal Hocko <mhocko(a)suse.com> Cc: Oscar Salvador <osalvador(a)suse.de> Cc: Jason Gunthorpe <jgg(a)ziepe.ca> Cc: Peter Xu <peterx(a)redhat.com> Cc: Shuah Khan <shuah(a)kernel.org> Cc: linux-kselftest(a)vger.kernel.org Signed-off-by: David Hildenbrand <david(a)redhat.com> --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index aa84121c5611..b00963f4aa09 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11560,6 +11560,7 @@ F: include/linux/mm.h F: include/linux/mmzone.h F: include/linux/vmalloc.h F: mm/ +F: tools/testing/selftests/vm/ MEMORY TECHNOLOGY DEVICES (MTD) M: Miquel Raynal <miquel.raynal(a)bootlin.com> -- 2.29.2

4 years, 9 months

1
0
0 0

[PATCH] selftests/kvm: add set_boot_cpu_id test

by Emanuele Giuseppe Esposito

Test for the KVM_SET_BOOT_CPU_ID ioctl. Check that it correctly allows to change the BSP vcpu. Signed-off-by: Emanuele Giuseppe Esposito <eesposit(a)redhat.com> --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + .../selftests/kvm/x86_64/set_boot_cpu_id.c | 151 ++++++++++++++++++ 3 files changed, 153 insertions(+) create mode 100644 tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 32b87cc77c8e..43b8aa82aefe 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -5,6 +5,7 @@ /s390x/resets /s390x/sync_regs_test /x86_64/cr4_cpuid_sync_test +/x86_64/set_boot_cpu_id /x86_64/debug_regs /x86_64/evmcs_test /x86_64/get_cpuid_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index a6d61f451f88..e7b62666e06e 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -39,6 +39,7 @@ LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test +TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test TEST_GEN_PROGS_x86_64 += x86_64/get_cpuid_test TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c new file mode 100644 index 000000000000..4077be4e4015 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test that KVM_SET_BOOT_CPU_ID works as intended + * + * Copyright (C) 2020, Red Hat, Inc. + */ +#define _GNU_SOURCE /* for program_invocation_name */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +#define N_VCPU 2 +#define VCPU_ID0 0 +#define VCPU_ID1 1 + +#define WRONG_BSP 2 + +static uint32_t get_bsp_flag(void) +{ + return rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_BSP; +} + +static void guest_bsp_vcpu(void *arg) +{ + GUEST_SYNC(1); + + GUEST_ASSERT(get_bsp_flag() != 0); + + GUEST_DONE(); +} + +static void guest_not_bsp_vcpu(void *arg) +{ + GUEST_SYNC(1); + + GUEST_ASSERT(get_bsp_flag() == 0); + + GUEST_DONE(); +} + +static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid, int stage) +{ + struct ucall uc; + + printf("vcpu executing...\n"); + vcpu_run(vm, vcpuid); + printf("vcpu executed\n"); + + switch (get_ucall(vm, vcpuid, &uc)) { + case UCALL_SYNC: + printf("stage %d sync %ld\n", stage, uc.args[1]); + TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && + uc.args[1] == stage + 1, + "Stage %d: Unexpected register values vmexit, got %lx", + stage + 1, (ulong)uc.args[1]); + return; + case UCALL_DONE: + printf("got done\n"); + return; + case UCALL_ABORT: + TEST_ASSERT(false, "%s at %s:%ld\n\tvalues: %#lx, %#lx", (const char *)uc.args[0], + __FILE__, uc.args[1], uc.args[2], uc.args[3]); + default: + TEST_ASSERT(false, "Unexpected exit: %s", + exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason)); + } +} + +static void check_wrong_bsp(void) +{ + struct kvm_vm *vm; + int res; + + vm = vm_create_default(VCPU_ID0, 0, guest_bsp_vcpu); + + res = _kvm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) WRONG_BSP); + TEST_ASSERT(res == -1, "KVM_SET_BOOT_CPU_ID set to a non-existent vcpu %d", WRONG_BSP); + + kvm_vm_free(vm); +} + +static struct kvm_vm *create_vm(void) +{ + struct kvm_vm *vm; + uint64_t vcpu_pages = (DEFAULT_STACK_PGS) * 2; + uint64_t extra_pg_pages = vcpu_pages / PTES_PER_MIN_PAGE * N_VCPU; + uint64_t pages = DEFAULT_GUEST_PHY_PAGES + vcpu_pages + extra_pg_pages; + + pages = vm_adjust_num_guest_pages(VM_MODE_DEFAULT, pages); + vm = vm_create(VM_MODE_DEFAULT, pages, O_RDWR); + + kvm_vm_elf_load(vm, program_invocation_name, 0, 0); + vm_create_irqchip(vm); + + return vm; +} + +static void add_x86_vcpu(struct kvm_vm *vm, uint32_t vcpuid, void *code) +{ + vm_vcpu_add_default(vm, vcpuid, code); + vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid()); +} + +static void run_vm_bsp(uint32_t bsp_vcpu) +{ + struct kvm_vm *vm; + int stage; + void *vcpu0_code, *vcpu1_code; + + vm = create_vm(); + + vcpu0_code = guest_bsp_vcpu; + vcpu1_code = guest_not_bsp_vcpu; + + if (bsp_vcpu == VCPU_ID1) { + vcpu0_code = guest_not_bsp_vcpu; + vcpu1_code = guest_bsp_vcpu; + + vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1); + } + + add_x86_vcpu(vm, VCPU_ID0, vcpu0_code); + add_x86_vcpu(vm, VCPU_ID1, vcpu1_code); + + for (stage = 0; stage < 2; stage++) { + run_vcpu(vm, VCPU_ID0, stage); + run_vcpu(vm, VCPU_ID1, stage); + } + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + if (!kvm_check_cap(KVM_CAP_SET_BOOT_CPU_ID)) { + print_skip("set_boot_cpu_id not available"); + return 0; + } + + run_vm_bsp(VCPU_ID0); + run_vm_bsp(VCPU_ID1); + run_vm_bsp(VCPU_ID0); + + check_wrong_bsp(); +} -- 2.29.2

4 years, 9 months

2
1
0 0

[PATCH AUTOSEL 5.10 37/54] kselftest: arm64: Fix exit code of sve-ptrace

by Sasha Levin

From: Mark Brown <broonie(a)kernel.org> [ Upstream commit 07e644885bf6727a48db109fad053cb43f3c9859 ] We track if sve-ptrace encountered a failure in a variable but don't actually use that value when we exit the program, do so. Signed-off-by: Mark Brown <broonie(a)kernel.org> Link: https://lore.kernel.org/r/20210309190304.39169-1-broonie@kernel.org Signed-off-by: Will Deacon <will(a)kernel.org> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/arm64/fp/sve-ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index b2282be6f938..612d3899614a 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -332,5 +332,5 @@ int main(void) ksft_print_cnts(); - return 0; + return ret; } -- 2.30.1

4 years, 9 months

1
0
0 0

[PATCH AUTOSEL 5.11 39/61] kselftest: arm64: Fix exit code of sve-ptrace

by Sasha Levin

From: Mark Brown <broonie(a)kernel.org> [ Upstream commit 07e644885bf6727a48db109fad053cb43f3c9859 ] We track if sve-ptrace encountered a failure in a variable but don't actually use that value when we exit the program, do so. Signed-off-by: Mark Brown <broonie(a)kernel.org> Link: https://lore.kernel.org/r/20210309190304.39169-1-broonie@kernel.org Signed-off-by: Will Deacon <will(a)kernel.org> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/arm64/fp/sve-ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index b2282be6f938..612d3899614a 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -332,5 +332,5 @@ int main(void) ksft_print_cnts(); - return 0; + return ret; } -- 2.30.1

4 years, 9 months

1
0
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-kselftest-mirror