This patch has been written to support page-ins using userfaultfd's
SIGBUS feature. When a userfaultfd is created with UFFD_FEATURE_SIGBUS,
`handle_userfault` will return VM_FAULT_SIGBUS instead of putting the
calling thread to sleep. Normal (non-guest) threads that access memory
that has been registered with a UFFD_FEATURE_SIGBUS userfaultfd receive
a SIGBUS.
When a vCPU gets an EPT page fault in a userfaultfd-registered region,
KVM calls into `handle_userfault` to resolve the page fault. With
UFFD_FEATURE_SIGBUS, VM_FAULT_SIGBUS is returned, but a SIGBUS is never
delivered to the userspace thread. This patch propagates the
VM_FAULT_SIGBUS error up to KVM, where we then send the signal.
Upon receiving a VM_FAULT_SIGBUS, the KVM_RUN ioctl will exit to
userspace. This functionality already exists. This allows a hypervisor
to do page-ins with UFFD_FEATURE_SIGBUS:
1. Setup a SIGBUS handler to save the address of the SIGBUS (to a
thread-local variable).
2. Enter the guest.
3. Immediately after KVM_RUN returns, check if the address has been set.
4. If an address has been set, we exited due to a page fault that we can
now handle.
5. Userspace can do anything it wants to make the memory available,
using MODE_NOWAKE for the UFFDIO memory installation ioctls.
6. Re-enter the guest. If the memory still isn't ready, this process
will repeat.
This style of demand paging is significantly faster than the standard
poll/read/wake mechanism userfaultfd uses and completely bypasses the
userfaultfd waitq. For a single vCPU, page-in throughput increases by
about 3-4x.
Signed-off-by: James Houghton <jthoughton(a)google.com>
Suggested-by: Jue Wang <juew(a)google.com>
---
include/linux/hugetlb.h | 2 +-
include/linux/mm.h | 3 ++-
mm/gup.c | 57 +++++++++++++++++++++++++++--------------
mm/hugetlb.c | 5 +++-
virt/kvm/kvm_main.c | 30 +++++++++++++++++++++-
5 files changed, 74 insertions(+), 23 deletions(-)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index b92f25ccef58..a777fb254df0 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -119,7 +119,7 @@ int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_ar
long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
struct page **, struct vm_area_struct **,
unsigned long *, unsigned long *, long, unsigned int,
- int *);
+ int *, int *);
void unmap_hugepage_range(struct vm_area_struct *,
unsigned long, unsigned long, struct page *);
void __unmap_hugepage_range_final(struct mmu_gather *tlb,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 322ec61d0da7..1dcd1ac81992 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1824,7 +1824,8 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
long pin_user_pages_locked(unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages, int *locked);
long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
- struct page **pages, unsigned int gup_flags);
+ struct page **pages, unsigned int gup_flags,
+ int *fault_error);
long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
struct page **pages, unsigned int gup_flags);
diff --git a/mm/gup.c b/mm/gup.c
index 0697134b6a12..ab55a67aef78 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -881,7 +881,8 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
* is, *@locked will be set to 0 and -EBUSY returned.
*/
static int faultin_page(struct vm_area_struct *vma,
- unsigned long address, unsigned int *flags, int *locked)
+ unsigned long address, unsigned int *flags, int *locked,
+ int *fault_error)
{
unsigned int fault_flags = 0;
vm_fault_t ret;
@@ -906,6 +907,8 @@ static int faultin_page(struct vm_area_struct *vma,
}
ret = handle_mm_fault(vma, address, fault_flags, NULL);
+ if (fault_error)
+ *fault_error = ret;
if (ret & VM_FAULT_ERROR) {
int err = vm_fault_to_errno(ret, *flags);
@@ -996,6 +999,8 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
* @vmas: array of pointers to vmas corresponding to each page.
* Or NULL if the caller does not require them.
* @locked: whether we're still with the mmap_lock held
+ * @fault_error: VM fault error from handle_mm_fault. NULL if the caller
+ * does not require this error.
*
* Returns either number of pages pinned (which may be less than the
* number requested), or an error. Details about the return value:
@@ -1040,6 +1045,13 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
* when it's been released. Otherwise, it must be held for either
* reading or writing and will not be released.
*
+ * If @fault_error != NULL, __get_user_pages will return the VM fault error
+ * from handle_mm_fault() in this argument in the event of a VM fault error.
+ * On success (ret == nr_pages) fault_error is zero.
+ * On failure (ret != nr_pages) fault_error may still be 0 if the error did
+ * not originate from handle_mm_fault().
+ *
+ *
* In most cases, get_user_pages or get_user_pages_fast should be used
* instead of __get_user_pages. __get_user_pages should be used only if
* you need some special @gup_flags.
@@ -1047,7 +1059,8 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
static long __get_user_pages(struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
- struct vm_area_struct **vmas, int *locked)
+ struct vm_area_struct **vmas, int *locked,
+ int *fault_error)
{
long ret = 0, i = 0;
struct vm_area_struct *vma = NULL;
@@ -1097,7 +1110,7 @@ static long __get_user_pages(struct mm_struct *mm,
if (is_vm_hugetlb_page(vma)) {
i = follow_hugetlb_page(mm, vma, pages, vmas,
&start, &nr_pages, i,
- gup_flags, locked);
+ gup_flags, locked, fault_error);
if (locked && *locked == 0) {
/*
* We've got a VM_FAULT_RETRY
@@ -1124,7 +1137,8 @@ static long __get_user_pages(struct mm_struct *mm,
page = follow_page_mask(vma, start, foll_flags, &ctx);
if (!page) {
- ret = faultin_page(vma, start, &foll_flags, locked);
+ ret = faultin_page(vma, start, &foll_flags, locked,
+ fault_error);
switch (ret) {
case 0:
goto retry;
@@ -1280,7 +1294,8 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
struct page **pages,
struct vm_area_struct **vmas,
int *locked,
- unsigned int flags)
+ unsigned int flags,
+ int *fault_error)
{
long ret, pages_done;
bool lock_dropped;
@@ -1311,7 +1326,7 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
lock_dropped = false;
for (;;) {
ret = __get_user_pages(mm, start, nr_pages, flags, pages,
- vmas, locked);
+ vmas, locked, fault_error);
if (!locked)
/* VM_FAULT_RETRY couldn't trigger, bypass */
return ret;
@@ -1371,7 +1386,7 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
*locked = 1;
ret = __get_user_pages(mm, start, 1, flags | FOLL_TRIED,
- pages, NULL, locked);
+ pages, NULL, locked, fault_error);
if (!*locked) {
/* Continue to retry until we succeeded */
BUG_ON(ret != 0);
@@ -1458,7 +1473,7 @@ long populate_vma_page_range(struct vm_area_struct *vma,
* not result in a stack expansion that recurses back here.
*/
return __get_user_pages(mm, start, nr_pages, gup_flags,
- NULL, NULL, locked);
+ NULL, NULL, locked, NULL);
}
/*
@@ -1524,7 +1539,7 @@ int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start,
unsigned long nr_pages, struct page **pages,
struct vm_area_struct **vmas, int *locked,
- unsigned int foll_flags)
+ unsigned int foll_flags, int *fault_error)
{
struct vm_area_struct *vma;
unsigned long vm_flags;
@@ -1590,7 +1605,8 @@ struct page *get_dump_page(unsigned long addr)
if (mmap_read_lock_killable(mm))
return NULL;
ret = __get_user_pages_locked(mm, addr, 1, &page, NULL, &locked,
- FOLL_FORCE | FOLL_DUMP | FOLL_GET);
+ FOLL_FORCE | FOLL_DUMP | FOLL_GET,
+ NULL);
if (locked)
mmap_read_unlock(mm);
@@ -1704,11 +1720,11 @@ static long __gup_longterm_locked(struct mm_struct *mm,
if (!(gup_flags & FOLL_LONGTERM))
return __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
- NULL, gup_flags);
+ NULL, gup_flags, NULL);
flags = memalloc_pin_save();
do {
rc = __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
- NULL, gup_flags);
+ NULL, gup_flags, NULL);
if (rc <= 0)
break;
rc = check_and_migrate_movable_pages(rc, pages, gup_flags);
@@ -1764,7 +1780,8 @@ static long __get_user_pages_remote(struct mm_struct *mm,
return __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
locked,
- gup_flags | FOLL_TOUCH | FOLL_REMOTE);
+ gup_flags | FOLL_TOUCH | FOLL_REMOTE,
+ NULL);
}
/**
@@ -1941,7 +1958,7 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
return __get_user_pages_locked(current->mm, start, nr_pages,
pages, NULL, locked,
- gup_flags | FOLL_TOUCH);
+ gup_flags | FOLL_TOUCH, NULL);
}
EXPORT_SYMBOL(get_user_pages_locked);
@@ -1961,7 +1978,8 @@ EXPORT_SYMBOL(get_user_pages_locked);
* (e.g. FOLL_FORCE) are not required.
*/
long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
- struct page **pages, unsigned int gup_flags)
+ struct page **pages, unsigned int gup_flags,
+ int *fault_error)
{
struct mm_struct *mm = current->mm;
int locked = 1;
@@ -1978,7 +1996,8 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
mmap_read_lock(mm);
ret = __get_user_pages_locked(mm, start, nr_pages, pages, NULL,
- &locked, gup_flags | FOLL_TOUCH);
+ &locked, gup_flags | FOLL_TOUCH,
+ fault_error);
if (locked)
mmap_read_unlock(mm);
return ret;
@@ -2550,7 +2569,7 @@ static int __gup_longterm_unlocked(unsigned long start, int nr_pages,
mmap_read_unlock(current->mm);
} else {
ret = get_user_pages_unlocked(start, nr_pages,
- pages, gup_flags);
+ pages, gup_flags, NULL);
}
return ret;
@@ -2880,7 +2899,7 @@ long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
return -EINVAL;
gup_flags |= FOLL_PIN;
- return get_user_pages_unlocked(start, nr_pages, pages, gup_flags);
+ return get_user_pages_unlocked(start, nr_pages, pages, gup_flags, NULL);
}
EXPORT_SYMBOL(pin_user_pages_unlocked);
@@ -2909,6 +2928,6 @@ long pin_user_pages_locked(unsigned long start, unsigned long nr_pages,
gup_flags |= FOLL_PIN;
return __get_user_pages_locked(current->mm, start, nr_pages,
pages, NULL, locked,
- gup_flags | FOLL_TOUCH);
+ gup_flags | FOLL_TOUCH, NULL);
}
EXPORT_SYMBOL(pin_user_pages_locked);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3db405dea3dc..889ac33d57d5 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5017,7 +5017,8 @@ static void record_subpages_vmas(struct page *page, struct vm_area_struct *vma,
long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct page **pages, struct vm_area_struct **vmas,
unsigned long *position, unsigned long *nr_pages,
- long i, unsigned int flags, int *locked)
+ long i, unsigned int flags, int *locked,
+ int *fault_error)
{
unsigned long pfn_offset;
unsigned long vaddr = *position;
@@ -5103,6 +5104,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
}
ret = hugetlb_fault(mm, vma, vaddr, fault_flags);
if (ret & VM_FAULT_ERROR) {
+ if (fault_error)
+ *fault_error = ret;
err = vm_fault_to_errno(ret, flags);
remainder = 0;
break;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2799c6660cce..0a20d926ae32 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2004,6 +2004,30 @@ static bool hva_to_pfn_fast(unsigned long addr, bool write_fault,
return false;
}
+static void kvm_send_vm_fault_signal(int fault_error, int errno,
+ unsigned long address,
+ struct task_struct *tsk)
+{
+ kernel_siginfo_t info;
+
+ clear_siginfo(&info);
+
+ if (fault_error == VM_FAULT_SIGBUS)
+ info.si_signo = SIGBUS;
+ else if (fault_error == VM_FAULT_SIGSEGV)
+ info.si_signo = SIGSEGV;
+ else
+ // Other fault errors should not result in a signal.
+ return;
+
+ info.si_errno = errno;
+ info.si_code = BUS_ADRERR;
+ info.si_addr = (void __user *)address;
+ info.si_addr_lsb = PAGE_SHIFT;
+
+ send_sig_info(info.si_signo, &info, tsk);
+}
+
/*
* The slow path to get the pfn of the specified host virtual address,
* 1 indicates success, -errno is returned if error is detected.
@@ -2014,6 +2038,7 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
unsigned int flags = FOLL_HWPOISON;
struct page *page;
int npages = 0;
+ int fault_error;
might_sleep();
@@ -2025,7 +2050,10 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
if (async)
flags |= FOLL_NOWAIT;
- npages = get_user_pages_unlocked(addr, 1, &page, flags);
+ npages = get_user_pages_unlocked(addr, 1, &page, flags, &fault_error);
+ if (fault_error & VM_FAULT_ERROR)
+ kvm_send_vm_fault_signal(fault_error, npages, addr, current);
+
if (npages != 1)
return npages;
--
2.31.1.751.gd2f1c929bd-goog
Attacks against vulnerable userspace applications with the purpose to break
ASLR or bypass canaries traditionally use some level of brute force with
the help of the fork system call. This is possible since when creating a
new process using fork its memory contents are the same as those of the
parent process (the process that called the fork system call). So, the
attacker can test the memory infinite times to find the correct memory
values or the correct memory addresses without worrying about crashing the
application.
Based on the above scenario it would be nice to have this detected and
mitigated, and this is the goal of this patch serie. Specifically the
following attacks are expected to be detected:
1.- Launching (fork()/exec()) a setuid/setgid process repeatedly until a
desirable memory layout is got (e.g. Stack Clash).
2.- Connecting to an exec()ing network daemon (e.g. xinetd) repeatedly
until a desirable memory layout is got (e.g. what CTFs do for simple
network service).
3.- Launching processes without exec() (e.g. Android Zygote) and exposing
state to attack a sibling.
4.- Connecting to a fork()ing network daemon (e.g. apache) repeatedly until
the previously shared memory layout of all the other children is
exposed (e.g. kind of related to HeartBleed).
In each case, a privilege boundary has been crossed:
Case 1: setuid/setgid process
Case 2: network to local
Case 3: privilege changes
Case 4: network to local
So, what will really be detected are fork/exec brute force attacks that
cross any of the commented bounds.
The implementation details and comparison against other existing
implementations can be found in the "Documentation" patch.
It is important to mention that this version has changed the method used to
track the information related to the application crashes. Prior this
version, a pointer per process (in the task_struct structure) held a
reference to the shared statistical data. Or in other words, these stats
were shared by all the fork hierarchy processes. But this has an important
drawback: a brute force attack that happens through the execve system call
losts the faults info since these statistics are freed when the fork
hierarchy disappears. So, the solution adopted in the v6 version was to use
an upper fork hierarchy to track the info for this attack type. But, as
Valdis Kletnieks pointed out during this discussion [1], this method can
be easily bypassed using a double exec (well, this was the method used in
the kselftest to avoid the detection ;) ). So, in this version, to track
all the statistical data (info related with application crashes), the
extended attributes feature for the executable files are used. The xattr is
also used to mark the executables as "not allowed" when an attack is
detected. Then, the execve system call rely on this flag to avoid following
executions of this file.
[1] https://lore.kernel.org/kernelnewbies/20210330173459.GA3163@ubuntu/
Moreover, I think this solves another problem pointed out by Andi Kleen
during the v5 review [2] related to the possibility that a supervisor
respawns processes killed by the Brute LSM. He suggested adding some way so
a supervisor can know that a process has been killed by Brute and then
decide to respawn or not. So, now, the supervisor can read the brute xattr
of one executable and know if it is blocked by Brute and why (using the
statistical data).
[2] https://lore.kernel.org/kernel-hardening/878s78dnrm.fsf@linux.intel.com/
Knowing all this information I will explain now the different patches:
The 1/7 patch defines a new LSM hook to get the fatal signal of a task.
This will be useful during the attack detection phase.
The 2/7 patch defines a new LSM and the necessary sysctl attributes to fine
tuning the attack detection.
The 3/7 patch detects a fork/exec brute force attack and narrows the
possible cases taken into account the privilege boundary crossing.
The 4/7 patch mitigates a brute force attack.
The 5/7 patch adds self-tests to validate the Brute LSM expectations.
The 6/7 patch adds the documentation to explain this implementation.
The 7/7 patch updates the maintainers file.
This patch serie is a task of the KSPP [3] and can also be accessed from my
github tree [4] in the "brute_v7" branch.
[3] https://github.com/KSPP/linux/issues/39
[4] https://github.com/johwood/linux/
When I ran the "checkpatch" script I got the following errors, but I think
they are false positives as I follow the same coding style for the others
extended attributes suffixes.
----------------------------------------------------------------------------
../patches/brute_v7/v7-0003-security-brute-Detect-a-brute-force-attack.patch
----------------------------------------------------------------------------
ERROR: Macros with complex values should be enclosed in parentheses
89: FILE: include/uapi/linux/xattr.h:80:
+#define XATTR_NAME_BRUTE XATTR_SECURITY_PREFIX XATTR_BRUTE_SUFFIX
-----------------------------------------------------------------------------
../patches/brute_v7/v7-0005-selftests-brute-Add-tests-for-the-Brute-LSM.patch
-----------------------------------------------------------------------------
ERROR: Macros with complex values should be enclosed in parentheses
100: FILE: tools/testing/selftests/brute/rmxattr.c:18:
+#define XATTR_NAME_BRUTE XATTR_SECURITY_PREFIX XATTR_BRUTE_SUFFIX
When I ran the "kernel-doc" script with the following parameters:
./scripts/kernel-doc --none -v security/brute/brute.c
I got the following warning:
security/brute/brute.c:65: warning: contents before sections
But I don't understand why it is complaining. Could it be a false positive?
The previous versions can be found in:
RFC
https://lore.kernel.org/kernel-hardening/20200910202107.3799376-1-keescook@…
Version 2
https://lore.kernel.org/kernel-hardening/20201025134540.3770-1-john.wood@gm…
Version 3
https://lore.kernel.org/lkml/20210221154919.68050-1-john.wood@gmx.com/
Version 4
https://lore.kernel.org/lkml/20210227150956.6022-1-john.wood@gmx.com/
Version 5
https://lore.kernel.org/kernel-hardening/20210227153013.6747-1-john.wood@gm…
Version 6
https://lore.kernel.org/kernel-hardening/20210307113031.11671-1-john.wood@g…
Changelog RFC -> v2
-------------------
- Rename this feature with a more suitable name (Jann Horn, Kees Cook).
- Convert the code to an LSM (Kees Cook).
- Add locking to avoid data races (Jann Horn).
- Add a new LSM hook to get the fatal signal of a task (Jann Horn, Kees
Cook).
- Add the last crashes timestamps list to avoid false positives in the
attack detection (Jann Horn).
- Use "period" instead of "rate" (Jann Horn).
- Other minor changes suggested (Jann Horn, Kees Cook).
Changelog v2 -> v3
------------------
- Compute the application crash period on an on-going basis (Kees Cook).
- Detect a brute force attack through the execve system call (Kees Cook).
- Detect an slow brute force attack (Randy Dunlap).
- Fine tuning the detection taken into account privilege boundary crossing
(Kees Cook).
- Taken into account only fatal signals delivered by the kernel (Kees
Cook).
- Remove the sysctl attributes to fine tuning the detection (Kees Cook).
- Remove the prctls to allow per process enabling/disabling (Kees Cook).
- Improve the documentation (Kees Cook).
- Fix some typos in the documentation (Randy Dunlap).
- Add self-test to validate the expectations (Kees Cook).
Changelog v3 -> v4
------------------
- Fix all the warnings shown by the tool "scripts/kernel-doc" (Randy
Dunlap).
Changelog v4 -> v5
------------------
- Fix some typos (Randy Dunlap).
Changelog v5 -> v6
------------------
- Fix a reported deadlock (kernel test robot).
- Add high level details to the documentation (Andi Kleen).
Changelog v6 -> v7
------------------
- Add the "Reviewed-by:" tag to the first patch.
- Rearrange the brute LSM between lockdown and yama (Kees Cook).
- Split subdir and obj in security/Makefile (Kees Cook).
- Reduce the number of header files included (Kees Cook).
- Print the pid when an attack is detected (Kees Cook).
- Use the socket_accept LSM hook instead of socket_sock_rcv_skb hook to
avoid running a hook on every incoming network packet (Kees Cook).
- Update the documentation and fix it to render it properly (Jonathan
Corbet).
- Manage correctly an exec brute force attack avoiding the bypass (Valdis
Kletnieks).
- Other minor changes and cleanups.
Any constructive comments are welcome.
Thanks in advance.
John Wood (7):
security: Add LSM hook at the point where a task gets a fatal signal
security/brute: Define a LSM and add sysctl attributes
security/brute: Detect a brute force attack
security/brute: Mitigate a brute force attack
selftests/brute: Add tests for the Brute LSM
Documentation: Add documentation for the Brute LSM
MAINTAINERS: Add a new entry for the Brute LSM
Documentation/admin-guide/LSM/Brute.rst | 334 +++++++++++
Documentation/admin-guide/LSM/index.rst | 1 +
MAINTAINERS | 7 +
include/linux/lsm_hook_defs.h | 1 +
include/linux/lsm_hooks.h | 4 +
include/linux/security.h | 4 +
include/uapi/linux/xattr.h | 3 +
kernel/signal.c | 1 +
security/Kconfig | 11 +-
security/Makefile | 2 +
security/brute/Kconfig | 15 +
security/brute/Makefile | 2 +
security/brute/brute.c | 716 +++++++++++++++++++++++
security/security.c | 5 +
tools/testing/selftests/Makefile | 1 +
tools/testing/selftests/brute/.gitignore | 2 +
tools/testing/selftests/brute/Makefile | 5 +
tools/testing/selftests/brute/config | 1 +
tools/testing/selftests/brute/rmxattr.c | 34 ++
tools/testing/selftests/brute/test.c | 507 ++++++++++++++++
tools/testing/selftests/brute/test.sh | 256 ++++++++
21 files changed, 1907 insertions(+), 5 deletions(-)
create mode 100644 Documentation/admin-guide/LSM/Brute.rst
create mode 100644 security/brute/Kconfig
create mode 100644 security/brute/Makefile
create mode 100644 security/brute/brute.c
create mode 100644 tools/testing/selftests/brute/.gitignore
create mode 100644 tools/testing/selftests/brute/Makefile
create mode 100644 tools/testing/selftests/brute/config
create mode 100644 tools/testing/selftests/brute/rmxattr.c
create mode 100644 tools/testing/selftests/brute/test.c
create mode 100755 tools/testing/selftests/brute/test.sh
--
2.25.1
TL;DR: Add support to kunit_tool to dispatch tests via QEMU. Also add
support to immediately shutdown a kernel after running KUnit tests.
Background
----------
KUnit has supported running on all architectures for quite some time;
however, kunit_tool - the script commonly used to invoke KUnit tests -
has only fully supported KUnit run on UML. Its functionality has been
broken up for some time to separate the configure, build, run, and parse
phases making it possible to be used in part on other architectures to a
small extent. Nevertheless, kunit_tool has not supported running tests
on other architectures.
What this patchset does
-----------------------
This patchset introduces first class support to kunit_tool for KUnit to
be run on many popular architectures via QEMU. It does this by adding
two new flags: `--arch` and `--cross_compile`.
`--arch` allows an architecture to be specified by the name the
architecture is given in `arch/`. It uses the specified architecture to
select a minimal amount of Kconfigs and QEMU configs needed for the
architecture to run in QEMU and provide a console from which KTAP
results can be scraped.
`--cross_compile` allows a toolchain prefix to be specified to make
similar to how `CROSS_COMPILE` is used.
Additionally, this patchset revives the previously considered "kunit:
tool: add support for QEMU"[1] patchs. The motivation for this new
kernel command line flags, `kunit_shutdown`, is to better support
running KUnit tests inside of QEMU. For most popular architectures, QEMU
can be made to terminate when the Linux kernel that is being run is
reboted, halted, or powered off. As Kees pointed out in a previous
discussion[2], it is possible to make a kernel initrd that can reboot
the kernel immediately, doing this for every architecture would likely
be infeasible. Instead, just having an option for the kernel to shutdown
when it is done with testing seems a lot simpler, especially since it is
an option which would only available in testing configurations of the
kernel anyway.
Changes since last revision
---------------------------
Mostly fixed lots of minor issues suggested/poited out by David and
Daniel. Also reworked how qemu_configs are loaded: Now each config is in
its own Python file and is loaded dynamically. Given the number of
improvements that address the biggest concerns I had in the last RFC, I
decided to promote this to a normal patch set.
What discussion remains for this patchset?
------------------------------------------
I am still hoping to see some discussion regarding the kunit_shutdown
patch: I want to make sure with the added context of QEMU running under
kunit_tool that this is now a reasonable approach. Nevertheless, I am
pretty happy with this patchset as is, and I did not get any negative
feedback on the previous revision, so I think we can probably just move
forward as is.
Brendan Higgins (3):
Documentation: Add kunit_shutdown to kernel-parameters.txt
kunit: tool: add support for QEMU
Documentation: kunit: document support for QEMU in kunit_tool
David Gow (1):
kunit: Add 'kunit_shutdown' option
.../admin-guide/kernel-parameters.txt | 8 +
Documentation/dev-tools/kunit/usage.rst | 37 +++-
lib/kunit/executor.c | 20 ++
tools/testing/kunit/kunit.py | 57 +++++-
tools/testing/kunit/kunit_config.py | 7 +-
tools/testing/kunit/kunit_kernel.py | 172 +++++++++++++++---
tools/testing/kunit/kunit_parser.py | 2 +-
tools/testing/kunit/kunit_tool_test.py | 18 +-
tools/testing/kunit/qemu_config.py | 17 ++
tools/testing/kunit/qemu_configs/alpha.py | 10 +
tools/testing/kunit/qemu_configs/arm.py | 13 ++
tools/testing/kunit/qemu_configs/arm64.py | 12 ++
tools/testing/kunit/qemu_configs/i386.py | 10 +
tools/testing/kunit/qemu_configs/powerpc.py | 12 ++
tools/testing/kunit/qemu_configs/riscv.py | 31 ++++
tools/testing/kunit/qemu_configs/s390.py | 14 ++
tools/testing/kunit/qemu_configs/sparc.py | 10 +
tools/testing/kunit/qemu_configs/x86_64.py | 10 +
18 files changed, 411 insertions(+), 49 deletions(-)
create mode 100644 tools/testing/kunit/qemu_config.py
create mode 100644 tools/testing/kunit/qemu_configs/alpha.py
create mode 100644 tools/testing/kunit/qemu_configs/arm.py
create mode 100644 tools/testing/kunit/qemu_configs/arm64.py
create mode 100644 tools/testing/kunit/qemu_configs/i386.py
create mode 100644 tools/testing/kunit/qemu_configs/powerpc.py
create mode 100644 tools/testing/kunit/qemu_configs/riscv.py
create mode 100644 tools/testing/kunit/qemu_configs/s390.py
create mode 100644 tools/testing/kunit/qemu_configs/sparc.py
create mode 100644 tools/testing/kunit/qemu_configs/x86_64.py
base-commit: 38182162b50aa4e970e5997df0a0c4288147a153
--
2.31.1.607.g51e8a6a459-goog
This patchset provides a file descriptor for every VM and VCPU to read
KVM statistics data in binary format.
It is meant to provide a lightweight, flexible, scalable and efficient
lock-free solution for user space telemetry applications to pull the
statistics data periodically for large scale systems. The pulling
frequency could be as high as a few times per second.
In this patchset, every statistics data are treated to have some
attributes as below:
* architecture dependent or common
* VM statistics data or VCPU statistics data
* type: cumulative, instantaneous,
* unit: none for simple counter, nanosecond, microsecond,
millisecond, second, Byte, KiByte, MiByte, GiByte. Clock Cycles
Since no lock/synchronization is used, the consistency between all
the statistics data is not guaranteed. That means not all statistics
data are read out at the exact same time, since the statistics date
are still being updated by KVM subsystems while they are read out.
---
* v4 -> v5
- Rebase to kvm/queue, commit a4345a7cecfb ("Merge tag
'kvmarm-fixes-5.13-1'")
- Change maximum stats name length to 48
- Replace VM_STATS_COMMON/VCPU_STATS_COMMON macros with stats
descriptor definition macros.
- Fixed some errors/warnings reported by checkpatch.pl
* v3 -> v4
- Rebase to kvm/queue, commit 9f242010c3b4 ("KVM: avoid "deadlock"
between install_new_memslots and MMU notifier")
- Use C-stype comments in the whole patch
- Fix wrong count for x86 VCPU stats descriptors
- Fix KVM stats data size counting and validity check in selftest
* v2 -> v3
- Rebase to kvm/queue, commit edf408f5257b ("KVM: avoid "deadlock"
between install_new_memslots and MMU notifier")
- Resolve some nitpicks about format
* v1 -> v2
- Use ARRAY_SIZE to count the number of stats descriptors
- Fix missing `size` field initialization in macro STATS_DESC
[1] https://lore.kernel.org/kvm/20210402224359.2297157-1-jingzhangos@google.com
[2] https://lore.kernel.org/kvm/20210415151741.1607806-1-jingzhangos@google.com
[3] https://lore.kernel.org/kvm/20210423181727.596466-1-jingzhangos@google.com
[4] https://lore.kernel.org/kvm/20210429203740.1935629-1-jingzhangos@google.com
---
Jing Zhang (4):
KVM: stats: Separate common stats from architecture specific ones
KVM: stats: Add fd-based API to read binary stats data
KVM: stats: Add documentation for statistics data binary interface
KVM: selftests: Add selftest for KVM statistics data binary interface
Documentation/virt/kvm/api.rst | 171 ++++++++
arch/arm64/include/asm/kvm_host.h | 9 +-
arch/arm64/kvm/guest.c | 38 +-
arch/mips/include/asm/kvm_host.h | 9 +-
arch/mips/kvm/mips.c | 64 ++-
arch/powerpc/include/asm/kvm_host.h | 9 +-
arch/powerpc/kvm/book3s.c | 64 ++-
arch/powerpc/kvm/book3s_hv.c | 12 +-
arch/powerpc/kvm/book3s_pr.c | 2 +-
arch/powerpc/kvm/book3s_pr_papr.c | 2 +-
arch/powerpc/kvm/booke.c | 59 ++-
arch/s390/include/asm/kvm_host.h | 9 +-
arch/s390/kvm/kvm-s390.c | 129 +++++-
arch/x86/include/asm/kvm_host.h | 9 +-
arch/x86/kvm/x86.c | 67 +++-
include/linux/kvm_host.h | 136 ++++++-
include/linux/kvm_types.h | 12 +
include/uapi/linux/kvm.h | 50 +++
tools/testing/selftests/kvm/.gitignore | 1 +
tools/testing/selftests/kvm/Makefile | 3 +
.../testing/selftests/kvm/include/kvm_util.h | 3 +
.../selftests/kvm/kvm_bin_form_stats.c | 379 ++++++++++++++++++
tools/testing/selftests/kvm/lib/kvm_util.c | 12 +
virt/kvm/kvm_main.c | 237 ++++++++++-
24 files changed, 1396 insertions(+), 90 deletions(-)
create mode 100644 tools/testing/selftests/kvm/kvm_bin_form_stats.c
base-commit: a4345a7cecfb91ae78cd43d26b0c6a956420761a
--
2.31.1.751.gd2f1c929bd-goog
When there is no devlink device, the following command will return:
$ devlink -j dev show
{dev:{}}
This will cause IndexError when trying to access the first element
in dev of this json dataset. Use the kselftest framework skip code
to skip this test in this case.
Example output with this change:
# selftests: net: devlink_port_split.py
# no devlink device was found, test skipped
ok 7 selftests: net: devlink_port_split.py # SKIP
Link: https://bugs.launchpad.net/bugs/1928889
Signed-off-by: Po-Hsu Lin <po-hsu.lin(a)canonical.com>
---
tools/testing/selftests/net/devlink_port_split.py | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/net/devlink_port_split.py b/tools/testing/selftests/net/devlink_port_split.py
index 834066d..2b5d6ff 100755
--- a/tools/testing/selftests/net/devlink_port_split.py
+++ b/tools/testing/selftests/net/devlink_port_split.py
@@ -18,6 +18,8 @@ import sys
#
+# Kselftest framework requirement - SKIP code is 4
+KSFT_SKIP=4
Port = collections.namedtuple('Port', 'bus_info name')
@@ -239,7 +241,11 @@ def main(cmdline=None):
assert stderr == ""
devs = json.loads(stdout)['dev']
- dev = list(devs.keys())[0]
+ if devs:
+ dev = list(devs.keys())[0]
+ else:
+ print("no devlink device was found, test skipped")
+ sys.exit(KSFT_SKIP)
cmd = "devlink dev show %s" % dev
stdout, stderr = run_command(cmd)
--
2.7.4
Building the nci test suite produces a binary, nci_dev, that git then
tries to track. Add a .gitignore file to tell git to ignore this binary.
Signed-off-by: David Matlack <dmatlack(a)google.com>
---
tools/testing/selftests/nci/.gitignore | 1 +
1 file changed, 1 insertion(+)
create mode 100644 tools/testing/selftests/nci/.gitignore
diff --git a/tools/testing/selftests/nci/.gitignore b/tools/testing/selftests/nci/.gitignore
new file mode 100644
index 000000000000..448eeb4590fc
--- /dev/null
+++ b/tools/testing/selftests/nci/.gitignore
@@ -0,0 +1 @@
+/nci_dev
--
2.31.1.751.gd2f1c929bd-goog