Patch #1, #2 and #3 enables p10 2nd DAWR feature for Book3S kvm guest. DAWR
is a hypervisor resource and thus H_SET_MODE hcall is used to set/unset it.
A new case H_SET_MODE_RESOURCE_SET_DAWR1 is introduced in H_SET_MODE hcall
for setting/unsetting 2nd DAWR. Also, new capability KVM_CAP_PPC_DAWR1 has
been added to query 2nd DAWR support via kvm ioctl.
This feature also needs to be enabled in Qemu to really use it. I'll reply
link to qemu patches once I post them in qemu-devel mailing list.
Patch #4, #5, #6 and #7 adds selftests to test 2nd DAWR.
Dependency:
1: p10 kvm base enablement
https://lore.kernel.org/linuxppc-dev/20200602055325.6102-1-alistair@popple.…
2: 2nd DAWR powervm/baremetal enablement
https://lore.kernel.org/linuxppc-dev/20200723090813.303838-1-ravi.bangoria@…
3: ptrace PPC_DEBUG_FEATURE_DATA_BP_DAWR_ARCH_31 flag
https://lore.kernel.org/linuxppc-dev/20200723093330.306341-1-ravi.bangoria@…
Patches in this series applies fine on top of powerpc/next (9a77c4a0a125)
plus above dependency patches.
Ravi Bangoria (7):
powerpc/watchpoint/kvm: Rename current DAWR macros and variables
powerpc/watchpoint/kvm: Add infrastructure to support 2nd DAWR
powerpc/watchpoint/kvm: Introduce new capability for 2nd DAWR
powerpc/selftests/ptrace-hwbreak: Add testcases for 2nd DAWR
powerpc/selftests/perf-hwbreak: Coalesce event creation code
powerpc/selftests/perf-hwbreak: Add testcases for 2nd DAWR
powerpc/selftests: Add selftest to test concurrent perf/ptrace events
Documentation/virt/kvm/api.rst | 6 +-
arch/powerpc/include/asm/hvcall.h | 2 +
arch/powerpc/include/asm/kvm_host.h | 6 +-
arch/powerpc/include/uapi/asm/kvm.h | 8 +-
arch/powerpc/kernel/asm-offsets.c | 6 +-
arch/powerpc/kvm/book3s_hv.c | 73 +-
arch/powerpc/kvm/book3s_hv_nested.c | 15 +-
arch/powerpc/kvm/book3s_hv_rmhandlers.S | 43 +-
arch/powerpc/kvm/powerpc.c | 3 +
include/uapi/linux/kvm.h | 1 +
tools/arch/powerpc/include/uapi/asm/kvm.h | 8 +-
.../selftests/powerpc/ptrace/.gitignore | 1 +
.../testing/selftests/powerpc/ptrace/Makefile | 2 +-
.../selftests/powerpc/ptrace/perf-hwbreak.c | 646 +++++++++++++++--
.../selftests/powerpc/ptrace/ptrace-hwbreak.c | 79 +++
.../powerpc/ptrace/ptrace-perf-hwbreak.c | 659 ++++++++++++++++++
16 files changed, 1476 insertions(+), 82 deletions(-)
create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c
--
2.26.2
From: Leon He <leon.he(a)unisoc.com>
There are two errors in the dmabuf-heap selftest:
1. The 'char name[5]' was not initialized to zero, which will cause
strcmp(name, "vgem") failed in check_vgem().
2. The return value of test_alloc_errors() should be reversed, other-
wise the while loop in main() will be broken.
Signed-off-by: Leon He <leon.he(a)unisoc.com>
---
tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c
index cd5e1f6..836b185 100644
--- a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c
+++ b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c
@@ -22,7 +22,7 @@
static int check_vgem(int fd)
{
drm_version_t version = { 0 };
- char name[5];
+ char name[5] = { 0 };
int ret;
version.name_len = 4;
@@ -357,7 +357,7 @@ static int test_alloc_errors(char *heap_name)
if (heap_fd >= 0)
close(heap_fd);
- return ret;
+ return !ret;
}
int main(void)
--
2.7.4
KUnit test cases run on kthreads, and kthreads don't have an
adddress space (current->mm is NULL), but processes have mm.
The purpose of this patch is to allow to borrow mm to KUnit kthread
after userspace is brought up, because we know that there are processes
running, at least the process that loaded the module to borrow mm.
This allows, for example, tests such as user_copy_kunit, which uses
vm_mmap, which needs current->mm.
Signed-off-by: Vitor Massaru Iha <vitor(a)massaru.org>
---
v2:
* splitted patch in 3:
- Allows to install and load modules in root filesystem;
- Provides an userspace memory context when tests are compiled
as module;
- Convert test_user_copy to KUnit test;
* added documentation;
* added more explanation;
* tested a pointer;
* released mput();
---
Documentation/dev-tools/kunit/usage.rst | 14 ++++++++++++++
include/kunit/test.h | 12 ++++++++++++
lib/kunit/try-catch.c | 15 ++++++++++++++-
3 files changed, 40 insertions(+), 1 deletion(-)
diff --git a/Documentation/dev-tools/kunit/usage.rst b/Documentation/dev-tools/kunit/usage.rst
index 3c3fe8b5fecc..9f909157be34 100644
--- a/Documentation/dev-tools/kunit/usage.rst
+++ b/Documentation/dev-tools/kunit/usage.rst
@@ -448,6 +448,20 @@ We can now use it to test ``struct eeprom_buffer``:
.. _kunit-on-non-uml:
+User-space context
+------------------
+
+I case you need a user-space context, for now this is only possible through
+tests compiled as a module. And it will be necessary to use a root filesystem
+and uml_utilities.
+
+Example:
+
+.. code-block:: bash
+
+ ./tools/testing/kunit/kunit.py run --timeout=60 --uml_rootfs_dir=.uml_rootfs
+
+
KUnit on non-UML architectures
==============================
diff --git a/include/kunit/test.h b/include/kunit/test.h
index 59f3144f009a..ae3337139c65 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -222,6 +222,18 @@ struct kunit {
* protect it with some type of lock.
*/
struct list_head resources; /* Protected by lock. */
+ /*
+ * KUnit test cases run on kthreads, and kthreads don't have an
+ * adddress space (current->mm is NULL), but processes have mm.
+ *
+ * The purpose of this mm_struct is to allow to borrow mm to KUnit kthread
+ * after userspace is brought up, because we know that there are processes
+ * running, at least the process that loaded the module to borrow mm.
+ *
+ * This allows, for example, tests such as user_copy_kunit, which uses
+ * vm_mmap, which needs current->mm.
+ */
+ struct mm_struct *mm;
};
void kunit_init_test(struct kunit *test, const char *name, char *log);
diff --git a/lib/kunit/try-catch.c b/lib/kunit/try-catch.c
index 0dd434e40487..d03e2093985b 100644
--- a/lib/kunit/try-catch.c
+++ b/lib/kunit/try-catch.c
@@ -11,7 +11,8 @@
#include <linux/completion.h>
#include <linux/kernel.h>
#include <linux/kthread.h>
-
+#include <linux/sched/mm.h>
+#include <linux/sched/task.h>
#include "try-catch-impl.h"
void __noreturn kunit_try_catch_throw(struct kunit_try_catch *try_catch)
@@ -24,8 +25,17 @@ EXPORT_SYMBOL_GPL(kunit_try_catch_throw);
static int kunit_generic_run_threadfn_adapter(void *data)
{
struct kunit_try_catch *try_catch = data;
+ struct kunit *test = try_catch->test;
+
+ if (test != NULL && test->mm != NULL)
+ kthread_use_mm(test->mm);
try_catch->try(try_catch->context);
+ if (test != NULL && test->mm != NULL) {
+ kthread_unuse_mm(test->mm);
+ mmput(test->mm);
+ test->mm = NULL;
+ }
complete_and_exit(try_catch->try_completion, 0);
}
@@ -65,6 +75,9 @@ void kunit_try_catch_run(struct kunit_try_catch *try_catch, void *context)
try_catch->context = context;
try_catch->try_completion = &try_completion;
try_catch->try_result = 0;
+
+ test->mm = get_task_mm(current);
+
task_struct = kthread_run(kunit_generic_run_threadfn_adapter,
try_catch,
"kunit_try_catch_thread");
base-commit: 725aca9585956676687c4cb803e88f770b0df2b2
prerequisite-patch-id: 5e5f9a8a05c5680fda1b04c9ab1b95ce91dc88b2
prerequisite-patch-id: 4d997940f4a9f303424af9bac412de1af861f9d9
prerequisite-patch-id: 582b6d9d28ce4b71628890ec832df6522ca68de0
--
2.26.2
The following 4 tests in timers can take longer than the default 45
seconds that added in commit 852c8cbf34d3 ("selftests/kselftest/runner.sh:
Add 45 second timeout per test") to run:
* nsleep-lat - 2m7.350s
* set-timer-lat - 2m0.66s
* inconsistency-check - 1m45.074s
* raw_skew - 2m0.013s
Thus they will be marked as failed with the current 45s setting:
not ok 3 selftests: timers: nsleep-lat # TIMEOUT
not ok 4 selftests: timers: set-timer-lat # TIMEOUT
not ok 6 selftests: timers: inconsistency-check # TIMEOUT
not ok 7 selftests: timers: raw_skew # TIMEOUT
Disable the timeout setting for timers can make these tests finish
properly:
ok 3 selftests: timers: nsleep-lat
ok 4 selftests: timers: set-timer-lat
ok 6 selftests: timers: inconsistency-check
ok 7 selftests: timers: raw_skew
https://bugs.launchpad.net/bugs/1864626
Fixes: 852c8cbf34d3 ("selftests/kselftest/runner.sh: Add 45 second timeout per test")
Signed-off-by: Po-Hsu Lin <po-hsu.lin(a)canonical.com>
---
tools/testing/selftests/timers/Makefile | 1 +
tools/testing/selftests/timers/settings | 1 +
2 files changed, 2 insertions(+)
create mode 100644 tools/testing/selftests/timers/settings
diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile
index 7656c7c..0e73a16 100644
--- a/tools/testing/selftests/timers/Makefile
+++ b/tools/testing/selftests/timers/Makefile
@@ -13,6 +13,7 @@ DESTRUCTIVE_TESTS = alarmtimer-suspend valid-adjtimex adjtick change_skew \
TEST_GEN_PROGS_EXTENDED = $(DESTRUCTIVE_TESTS)
+TEST_FILES := settings
include ../lib.mk
diff --git a/tools/testing/selftests/timers/settings b/tools/testing/selftests/timers/settings
new file mode 100644
index 0000000..e7b9417
--- /dev/null
+++ b/tools/testing/selftests/timers/settings
@@ -0,0 +1 @@
+timeout=0
--
2.7.4
Hello!
v7:
- break out sock usage counting fixes into more cleanly backportable pieces
- code style cleanups (christian)
- clarify addfd commit log (christian)
- add ..._SIZE_{VER0,LATEST} and BUILD_BUG_ON()s (christian)
- remove undef (christian)
- fix addfd embedded URL reference numbers
v6: https://lore.kernel.org/lkml/20200706201720.3482959-1-keescook@chromium.org/
This continues the thread-merge between [1] and [2]. tl;dr: add a way for
a seccomp user_notif process manager to inject files into the managed
process in order to handle emulation of various fd-returning syscalls
across security boundaries. Containers folks and Chrome are in need
of the feature, and investigating this solution uncovered (and fixed)
implementation issues with existing file sending routines.
I intend to carry this in the for-next/seccomp tree, unless someone
has objections. :) Please review and test!
-Kees
[1] https://lore.kernel.org/lkml/20200603011044.7972-1-sargun@sargun.me/
[2] https://lore.kernel.org/lkml/20200610045214.1175600-1-keescook@chromium.org/
Kees Cook (7):
net/compat: Add missing sock updates for SCM_RIGHTS
pidfd: Add missing sock updates for pidfd_getfd()
net/scm: Regularize compat handling of scm_detach_fds()
fs: Move __scm_install_fd() to __receive_fd()
fs: Add receive_fd() wrapper for __receive_fd()
pidfd: Replace open-coded receive_fd()
fs: Expand __receive_fd() to accept existing fd
Sargun Dhillon (2):
seccomp: Introduce addfd ioctl to seccomp user notifier
selftests/seccomp: Test SECCOMP_IOCTL_NOTIF_ADDFD
fs/file.c | 57 +++++
include/linux/file.h | 19 ++
include/linux/seccomp.h | 4 +
include/net/sock.h | 4 +
include/uapi/linux/seccomp.h | 22 ++
kernel/pid.c | 14 +-
kernel/seccomp.c | 173 ++++++++++++-
net/compat.c | 55 ++---
net/core/scm.c | 50 +---
net/core/sock.c | 21 ++
tools/testing/selftests/seccomp/seccomp_bpf.c | 229 ++++++++++++++++++
11 files changed, 566 insertions(+), 82 deletions(-)
--
2.25.1
When the KVM MMU zaps a page, it will recursively zap the unsynced child
pages, but not the synced ones. This can create problems over time when
running many nested guests because it leaves unlinked pages which will not
be freed until the page quota is hit. With the default page quota of 20
shadow pages per 1000 guest pages, this looks like a memory leak and can
degrade MMU performance.
In a recent benchmark, substantial performance degradation was observed:
An L1 guest was booted with 64G memory.
2G nested Windows guests were booted, 10 at a time for 20
iterations. (200 total boots)
Windows was used in this benchmark because they touch all of their
memory on startup.
By the end of the benchmark, the nested guests were taking ~10% longer
to boot. With this patch there is no degradation in boot time.
Without this patch the benchmark ends with hundreds of thousands of
stale EPT02 pages cluttering up rmaps and the page hash map. As a
result, VM shutdown is also much slower: deleting memslot 0 was
observed to take over a minute. With this patch it takes just a
few miliseconds.
If TDP is enabled, zap child shadow pages when zapping the only parent
shadow page.
Tested by running the kvm-unit-tests suite on an Intel Haswell machine.
No regressions versus
commit c34b26b98cac ("KVM: MIPS: clean up redundant 'kvm_run' parameters"),
or warnings.
Reviewed-by: Peter Shier <pshier(a)google.com>
Signed-off-by: Ben Gardon <bgardon(a)google.com>
---
arch/x86/kvm/mmu/mmu.c | 49 +++++++++++++++++++++++++++++++++++++-----
1 file changed, 44 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index fa506aaaf0194..c550bc3831dcc 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2626,13 +2626,52 @@ static bool mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
return false;
}
-static void kvm_mmu_page_unlink_children(struct kvm *kvm,
- struct kvm_mmu_page *sp)
+static int kvm_mmu_page_unlink_children(struct kvm *kvm,
+ struct kvm_mmu_page *sp,
+ struct list_head *invalid_list)
{
unsigned i;
+ int zapped = 0;
+
+ for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
+ u64 *sptep = sp->spt + i;
+ u64 spte = *sptep;
+ struct kvm_mmu_page *child_sp;
+
+ /*
+ * Zap the page table entry, unlinking any potential child
+ * page
+ */
+ mmu_page_zap_pte(kvm, sp, sptep);
+
+ /* If there is no child page for this spte, continue */
+ if (!is_shadow_present_pte(spte) ||
+ is_last_spte(spte, sp->role.level))
+ continue;
+
+ /*
+ * If TDP is enabled, then any shadow pages are part of either
+ * the EPT01 or an EPT02. In either case, do not expect the
+ * same pattern of page reuse seen in x86 PTs for
+ * copy-on-write and similar techniques. In this case, it is
+ * unlikely that a parentless shadow PT will be used again in
+ * the near future. Zap it to keep the rmaps and page hash
+ * maps from filling up with stale EPT02 pages.
+ */
+ if (!tdp_enabled)
+ continue;
+
+ child_sp = to_shadow_page(spte & PT64_BASE_ADDR_MASK);
+ if (WARN_ON_ONCE(!child_sp))
+ continue;
+
+ /* Zap the page if it has no remaining parent pages */
+ if (!child_sp->parent_ptes.val)
+ zapped += kvm_mmu_prepare_zap_page(kvm, child_sp,
+ invalid_list);
+ }
- for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
- mmu_page_zap_pte(kvm, sp, sp->spt + i);
+ return zapped;
}
static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
@@ -2678,7 +2717,7 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
trace_kvm_mmu_prepare_zap_page(sp);
++kvm->stat.mmu_shadow_zapped;
*nr_zapped = mmu_zap_unsync_children(kvm, sp, invalid_list);
- kvm_mmu_page_unlink_children(kvm, sp);
+ *nr_zapped += kvm_mmu_page_unlink_children(kvm, sp, invalid_list);
kvm_mmu_unlink_parents(kvm, sp);
/* Zapping children means active_mmu_pages has become unstable. */
--
2.28.0.rc0.142.g3c755180ce-goog