As reported and suggested by Willy, the inline __sysret() helper
introduces three types of conversions and increases the size:
(1) the "unsigned long" argument to __sysret() forces a sign extension
from all sys_* functions that used to return 'int'
(2) the comparison with the error range now has to be performed on a
'unsigned long' instead of an 'int'
(3) the return value from __sysret() is a 'long' (note, a signed long)
which then has to be turned back to an 'int' before being returned by the
caller to satisfy the caller's prototype.
To fix up this, firstly, let's use macro instead of inline function to
preserves the input type and avoids these useless conversions (1), (3).
Secondly, comparison to -MAX_ERRNO inflicts on all integer returns where
we could previously keep a simple sign comparison, let's use a new
is_signed_type() macro from include/linux/compiler.h to limit the
comparision to -MAX_ERRNO (2) only on demand and preserves a simple sign
comparision for most of the cases as before.
Thirdly, fix up the following warning by an explicit conversion and let
__sysret() be able to accept the (void *) type of argument:
sysroot/powerpc/include/sys.h: In function 'sbrk':
sysroot/powerpc/include/sys.h:104:16: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
104 | return (void *)__sysret(-ENOMEM);
Fourthly, to further workaround the argument type with 'const', must use
__auto_type in a new enough version or use 'long' as before.
Here reports the size testing result of nolibc-test with gcc 13.2.0:
before:
// ppc64le with powerpc64-linux-gcc
$ size nolibc-test
text data bss dec hex filename
28004 8 80 28092 6dbc nolibc-test
// mips with mips64-linux-gcc (CFLAGS="-mabi=32 -EL")
$ size nolibc-test
text data bss dec hex filename
23164 64 64 23292 5afc nolibc-test
after:
// ppc64le with powerpc64-linux-gcc
$ size nolibc-test
text data bss dec hex filename
27828 8 80 27916 6d0c nolibc-test
// mips with mips64-linux-gcc (CFLAGS="-mabi=32 -EL")
$ size nolibc-test
text data bss dec hex filename
22924 64 64 23052 5a0c nolibc-test
Suggested-by: Willy Tarreau <w(a)1wt.eu>
Link: https://lore.kernel.org/lkml/20230806095846.GB10627@1wt.eu/
Link: https://lore.kernel.org/lkml/20230806134348.GA19145@1wt.eu/
Signed-off-by: Zhangjin Wu <falcon(a)tinylab.org>
---
Hi, Willy
v4 rebases on latest 20230806-for-6.6-1 and fixes up a warning reported
by the new -Wall -Wextra options.
Changes from v3 --> v4:
* fix up a new warning about 'ret < 0' when the input arg type is (void *)
Changes from v2 --> v3:
* define a __GXX_HAS_AUTO_TYPE_WITH_CONST_SUPPORT for gcc >= 11.0 (ABI_VERSION >= 1016)
* split __sysret() to two versions by the macro instead of a mixed unified and unreadable version
* use shorter __ret instead of __sysret_arg
Changes from v1 --> v2:
* fix up argument with 'const' in the type
* support "void *" argument
v2: https://lore.kernel.org/lkml/95fe3e732f455fab653fe1427118d905e4d04257.16913…
v1: https://lore.kernel.org/lkml/20230806131921.52453-1-falcon@tinylab.org/
---
tools/include/nolibc/sys.h | 66 +++++++++++++++++++++++++++++++-------
1 file changed, 55 insertions(+), 11 deletions(-)
diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
index 833d6c5e86dc..565b4a295c11 100644
--- a/tools/include/nolibc/sys.h
+++ b/tools/include/nolibc/sys.h
@@ -35,15 +35,59 @@
* (src/internal/syscall_ret.c) and glibc (sysdeps/unix/sysv/linux/sysdep.h)
*/
-static __inline__ __attribute__((unused, always_inline))
-long __sysret(unsigned long ret)
-{
- if (ret >= (unsigned long)-MAX_ERRNO) {
- SET_ERRNO(-(long)ret);
- return -1;
- }
- return ret;
-}
+/*
+ * Whether 'type' is a signed type or an unsigned type. Supports scalar types,
+ * bool and also pointer types. (from include/linux/compiler.h)
+ */
+#define __is_signed_type(type) (((type)(-1)) < (type)1)
+
+/* __auto_type is used instead of __typeof__ to workaround the build error
+ * 'error: assignment of read-only variable' when the argument has 'const' in
+ * the type, but __auto_type is a new feature from newer gcc version and it
+ * only works with 'const' from gcc 11.0 (__GXX_ABI_VERSION = 1016)
+ * https://gcc.gnu.org/legacy-ml/gcc-patches/2013-11/msg01378.html
+ */
+
+#if __GXX_ABI_VERSION >= 1016
+#define __GXX_HAS_AUTO_TYPE_WITH_CONST_SUPPORT
+#endif
+
+#ifdef __GXX_HAS_AUTO_TYPE_WITH_CONST_SUPPORT
+#define __sysret(arg) \
+({ \
+ __auto_type __ret = (arg); \
+ if (__is_signed_type(__typeof__(arg))) { \
+ if ((long)__ret < 0) { \
+ SET_ERRNO(-(long)__ret); \
+ __ret = (__typeof__(arg))(-1L); \
+ } \
+ } else { \
+ if ((unsigned long)__ret >= (unsigned long)-MAX_ERRNO) { \
+ SET_ERRNO(-(long)__ret); \
+ __ret = (__typeof__(arg))(-1L); \
+ } \
+ } \
+ __ret; \
+})
+
+#else /* ! __GXX_HAS_AUTO_TYPE_WITH_CONST_SUPPORT */
+#define __sysret(arg) \
+({ \
+ long __ret = (long)(arg); \
+ if (__is_signed_type(__typeof__(arg))) { \
+ if (__ret < 0) { \
+ SET_ERRNO(-__ret); \
+ __ret = -1L; \
+ } \
+ } else { \
+ if ((unsigned long)__ret >= (unsigned long)-MAX_ERRNO) { \
+ SET_ERRNO(-__ret); \
+ __ret = -1L; \
+ } \
+ } \
+ (__typeof__(arg))__ret; \
+})
+#endif /* ! __GXX_HAS_AUTO_TYPE_WITH_CONST_SUPPORT */
/* Functions in this file only describe syscalls. They're declared static so
* that the compiler usually decides to inline them while still being allowed
@@ -94,7 +138,7 @@ void *sbrk(intptr_t inc)
if (ret && sys_brk(ret + inc) == ret + inc)
return ret + inc;
- return (void *)__sysret(-ENOMEM);
+ return __sysret((void *)-ENOMEM);
}
@@ -682,7 +726,7 @@ void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd,
static __attribute__((unused))
void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset)
{
- return (void *)__sysret((unsigned long)sys_mmap(addr, length, prot, flags, fd, offset));
+ return __sysret(sys_mmap(addr, length, prot, flags, fd, offset));
}
static __attribute__((unused))
--
2.25.1
As is described in the "How to use MPTCP?" section in MPTCP wiki [1]:
"Your app should create sockets with IPPROTO_MPTCP as the proto:
( socket(AF_INET, SOCK_STREAM, IPPROTO_MPTCP); ). Legacy apps can be
forced to create and use MPTCP sockets instead of TCP ones via the
mptcpize command bundled with the mptcpd daemon."
But the mptcpize (LD_PRELOAD technique) command has some limitations
[2]:
- it doesn't work if the application is not using libc (e.g. GoLang
apps)
- in some envs, it might not be easy to set env vars / change the way
apps are launched, e.g. on Android
- mptcpize needs to be launched with all apps that want MPTCP: we could
have more control from BPF to enable MPTCP only for some apps or all the
ones of a netns or a cgroup, etc.
- it is not in BPF, we cannot talk about it at netdev conf.
So this patchset attempts to use BPF to implement functions similer to
mptcpize.
The main idea is to add a hook in sys_socket() to change the protocol id
from IPPROTO_TCP (or 0) to IPPROTO_MPTCP.
[1]
https://github.com/multipath-tcp/mptcp_net-next/wiki
[2]
https://github.com/multipath-tcp/mptcp_net-next/issues/79
v12:
- update diag_* log of update_socket_protocol.
- add 'ip netns show' after 'ip netns del' to check if there is
a test did not clean up its netns.
- return libbpf_get_error() instead of -EIO for the error from
open_and_load().
- Use getsockopt(SOL_PROTOCOL) to verify mptcp protocol intead of
using 'ss -tOni'.
v11:
- add comments about outputs of 'ss' and 'nstat'.
- use "err = verify_mptcpify()" instead of using =+.
v10:
- drop "#ifdef CONFIG_BPF_JIT".
- include vmlinux.h and bpf_tracing_net.h to avoid defining some
macros.
- drop unneeded checks for mptcp.
v9:
- update comment for 'update_socket_protocol'.
v8:
- drop the additional checks on the 'protocol' value after the
'update_socket_protocol()' call.
v7:
- add __weak and __diag_* for update_socket_protocol.
v6:
- add update_socket_protocol.
v5:
- add bpf_mptcpify helper.
v4:
- use lsm_cgroup/socket_create
v3:
- patch 8: char cmd[128]; -> char cmd[256];
v2:
- Fix build selftests errors reported by CI
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/79
Geliang Tang (5):
bpf: Add update_socket_protocol hook
selftests/bpf: Use random netns name for mptcp
selftests/bpf: Add two mptcp netns helpers
selftests/bpf: Fix error checks of mptcp open_and_load
selftests/bpf: Add mptcpify test
net/mptcp/bpf.c | 15 ++
net/socket.c | 26 +++-
.../testing/selftests/bpf/prog_tests/mptcp.c | 146 +++++++++++++++---
tools/testing/selftests/bpf/progs/mptcpify.c | 20 +++
4 files changed, 186 insertions(+), 21 deletions(-)
create mode 100644 tools/testing/selftests/bpf/progs/mptcpify.c
--
2.35.3
Hi all,
following bug is trying to workaround an error on ppc64le, where
zram01.sh LTP test (there is also kernel selftest
tools/testing/selftests/zram/zram01.sh, but LTP test got further
updates) has often mem_used_total 0 although zram is already filled.
Patch tries to repeatedly read /sys/block/zram*/mm_stat for 1 sec,
waiting for mem_used_total > 0. The question if this is expected and
should be workarounded or a bug which should be fixed.
REPRODUCE THE ISSUE
Quickest way to install only zram tests and their dependencies:
make autotools && ./configure && for i in testcases/lib/ testcases/kernel/device-drivers/zram/; do cd $i && make -j$(getconf _NPROCESSORS_ONLN) && make install && cd -; done
Run the test (only on vfat)
PATH="/opt/ltp/testcases/bin:$PATH" LTP_SINGLE_FS_TYPE=vfat zram01.sh
Petr Vorel (1):
zram01.sh: Workaround division by 0 on vfat on ppc64le
.../kernel/device-drivers/zram/zram01.sh | 27 +++++++++++++++++--
1 file changed, 25 insertions(+), 2 deletions(-)
--
2.38.0
*Changes in v26:*
- Code re-structurring and API changes in PAGEMAP_IOCTL
*Changes in v25*:
- Do proper filtering on hole as well (hole got missed earlier)
*Changes in v24*:
- Rebase on top of next-20230710
- Place WP markers in case of hole as well
*Changes in v23*:
- Set vec_buf_index in loop only when vec_buf_index is set
- Return -EFAULT instead of -EINVAL if vec is NULL
- Correctly return the walk ending address to the page granularity
*Changes in v22*:
- Interface change:
- Replace [start start + len) with [start, end)
- Return the ending address of the address walk in start
*Changes in v21*:
- Abort walk instead of returning error if WP is to be performed on
partial hugetlb
*Changes in v20*
- Correct PAGE_IS_FILE and add PAGE_IS_PFNZERO
*Changes in v19*
- Minor changes and interface updates
*Changes in v18*
- Rebase on top of next-20230613
- Minor updates
*Changes in v17*
- Rebase on top of next-20230606
- Minor improvements in PAGEMAP_SCAN IOCTL patch
*Changes in v16*
- Fix a corner case
- Add exclusive PM_SCAN_OP_WP back
*Changes in v15*
- Build fix (Add missed build fix in RESEND)
*Changes in v14*
- Fix build error caused by #ifdef added at last minute in some configs
*Changes in v13*
- Rebase on top of next-20230414
- Give-up on using uffd_wp_range() and write new helpers, flush tlb only
once
*Changes in v12*
- Update and other memory types to UFFD_FEATURE_WP_ASYNC
- Rebaase on top of next-20230406
- Review updates
*Changes in v11*
- Rebase on top of next-20230307
- Base patches on UFFD_FEATURE_WP_UNPOPULATED
- Do a lot of cosmetic changes and review updates
- Remove ENGAGE_WP + !GET operation as it can be performed with
UFFDIO_WRITEPROTECT
*Changes in v10*
- Add specific condition to return error if hugetlb is used with wp
async
- Move changes in tools/include/uapi/linux/fs.h to separate patch
- Add documentation
*Changes in v9:*
- Correct fault resolution for userfaultfd wp async
- Fix build warnings and errors which were happening on some configs
- Simplify pagemap ioctl's code
*Changes in v8:*
- Update uffd async wp implementation
- Improve PAGEMAP_IOCTL implementation
*Changes in v7:*
- Add uffd wp async
- Update the IOCTL to use uffd under the hood instead of soft-dirty
flags
*Motivation*
The real motivation for adding PAGEMAP_SCAN IOCTL is to emulate Windows
GetWriteWatch() and ResetWriteWatch() syscalls [1]. The GetWriteWatch()
retrieves the addresses of the pages that are written to in a region of
virtual memory.
This syscall is used in Windows applications and games etc. This syscall is
being emulated in pretty slow manner in userspace. Our purpose is to
enhance the kernel such that we translate it efficiently in a better way.
Currently some out of tree hack patches are being used to efficiently
emulate it in some kernels. We intend to replace those with these patches.
So the whole gaming on Linux can effectively get benefit from this. It
means there would be tons of users of this code.
CRIU use case [2] was mentioned by Andrei and Danylo:
> Use cases for migrating sparse VMAs are binaries sanitized with ASAN,
> MSAN or TSAN [3]. All of these sanitizers produce sparse mappings of
> shadow memory [4]. Being able to migrate such binaries allows to highly
> reduce the amount of work needed to identify and fix post-migration
> crashes, which happen constantly.
Andrei's defines the following uses of this code:
* it is more granular and allows us to track changed pages more
effectively. The current interface can clear dirty bits for the entire
process only. In addition, reading info about pages is a separate
operation. It means we must freeze the process to read information
about all its pages, reset dirty bits, only then we can start dumping
pages. The information about pages becomes more and more outdated,
while we are processing pages. The new interface solves both these
downsides. First, it allows us to read pte bits and clear the
soft-dirty bit atomically. It means that CRIU will not need to freeze
processes to pre-dump their memory. Second, it clears soft-dirty bits
for a specified region of memory. It means CRIU will have actual info
about pages to the moment of dumping them.
* The new interface has to be much faster because basic page filtering
is happening in the kernel. With the old interface, we have to read
pagemap for each page.
*Implementation Evolution (Short Summary)*
From the definition of GetWriteWatch(), we feel like kernel's soft-dirty
feature can be used under the hood with some additions like:
* reset soft-dirty flag for only a specific region of memory instead of
clearing the flag for the entire process
* get and clear soft-dirty flag for a specific region atomically
So we decided to use ioctl on pagemap file to read or/and reset soft-dirty
flag. But using soft-dirty flag, sometimes we get extra pages which weren't
even written. They had become soft-dirty because of VMA merging and
VM_SOFTDIRTY flag. This breaks the definition of GetWriteWatch(). We were
able to by-pass this short coming by ignoring VM_SOFTDIRTY until David
reported that mprotect etc messes up the soft-dirty flag while ignoring
VM_SOFTDIRTY [5]. This wasn't happening until [6] got introduced. We
discussed if we can revert these patches. But we could not reach to any
conclusion. So at this point, I made couple of tries to solve this whole
VM_SOFTDIRTY issue by correcting the soft-dirty implementation:
* [7] Correct the bug fixed wrongly back in 2014. It had potential to cause
regression. We left it behind.
* [8] Keep a list of soft-dirty part of a VMA across splits and merges. I
got the reply don't increase the size of the VMA by 8 bytes.
At this point, we left soft-dirty considering it is too much delicate and
userfaultfd [9] seemed like the only way forward. From there onward, we
have been basing soft-dirty emulation on userfaultfd wp feature where
kernel resolves the faults itself when WP_ASYNC feature is used. It was
straight forward to add WP_ASYNC feature in userfautlfd. Now we get only
those pages dirty or written-to which are really written in reality. (PS
There is another WP_UNPOPULATED userfautfd feature is required which is
needed to avoid pre-faulting memory before write-protecting [9].)
All the different masks were added on the request of CRIU devs to create
interface more generic and better.
[1] https://learn.microsoft.com/en-us/windows/win32/api/memoryapi/nf-memoryapi-…
[2] https://lore.kernel.org/all/20221014134802.1361436-1-mdanylo@google.com
[3] https://github.com/google/sanitizers
[4] https://github.com/google/sanitizers/wiki/AddressSanitizerAlgorithm#64-bit
[5] https://lore.kernel.org/all/bfcae708-db21-04b4-0bbe-712badd03071@redhat.com
[6] https://lore.kernel.org/all/20220725142048.30450-1-peterx@redhat.com/
[7] https://lore.kernel.org/all/20221122115007.2787017-1-usama.anjum@collabora.…
[8] https://lore.kernel.org/all/20221220162606.1595355-1-usama.anjum@collabora.…
[9] https://lore.kernel.org/all/20230306213925.617814-1-peterx@redhat.com
[10] https://lore.kernel.org/all/20230125144529.1630917-1-mdanylo@google.com
* Original Cover letter from v8*
Hello,
Note:
Soft-dirty pages and pages which have been written-to are synonyms. As
kernel already has soft-dirty feature inside which we have given up to
use, we are using written-to terminology while using UFFD async WP under
the hood.
It is possible to find and clear soft-dirty pages entirely in userspace.
But it isn't efficient:
- The mprotect and SIGSEGV handler for bookkeeping
- The userfaultfd wp (synchronous) with the handler for bookkeeping
Some benchmarks can be seen here[1]. This series adds features that weren't
present earlier:
- There is no atomic get soft-dirty/Written-to status and clear present in
the kernel.
- The pages which have been written-to can not be found in accurate way.
(Kernel's soft-dirty PTE bit + sof_dirty VMA bit shows more soft-dirty
pages than there actually are.)
Historically, soft-dirty PTE bit tracking has been used in the CRIU
project. The procfs interface is enough for finding the soft-dirty bit
status and clearing the soft-dirty bit of all the pages of a process.
We have the use case where we need to track the soft-dirty PTE bit for
only specific pages on-demand. We need this tracking and clear mechanism
of a region of memory while the process is running to emulate the
getWriteWatch() syscall of Windows.
*(Moved to using UFFD instead of soft-dirty feature to find pages which
have been written-to from v7 patch series)*:
Stop using the soft-dirty flags for finding which pages have been
written to. It is too delicate and wrong as it shows more soft-dirty
pages than the actual soft-dirty pages. There is no interest in
correcting it [2][3] as this is how the feature was written years ago.
It shouldn't be updated to changed behaviour. Peter Xu has suggested
using the async version of the UFFD WP [4] as it is based inherently
on the PTEs.
So in this patch series, I've added a new mode to the UFFD which is
asynchronous version of the write protect. When this variant of the
UFFD WP is used, the page faults are resolved automatically by the
kernel. The pages which have been written-to can be found by reading
pagemap file (!PM_UFFD_WP). This feature can be used successfully to
find which pages have been written to from the time the pages were
write protected. This works just like the soft-dirty flag without
showing any extra pages which aren't soft-dirty in reality.
The information related to pages if the page is file mapped, present and
swapped is required for the CRIU project [5][6]. The addition of the
required mask, any mask, excluded mask and return masks are also required
for the CRIU project [5].
The IOCTL returns the addresses of the pages which match the specific
masks. The page addresses are returned in struct page_region in a compact
form. The max_pages is needed to support a use case where user only wants
to get a specific number of pages. So there is no need to find all the
pages of interest in the range when max_pages is specified. The IOCTL
returns when the maximum number of the pages are found. The max_pages is
optional. If max_pages is specified, it must be equal or greater than the
vec_size. This restriction is needed to handle worse case when one
page_region only contains info of one page and it cannot be compacted.
This is needed to emulate the Windows getWriteWatch() syscall.
The patch series include the detailed selftest which can be used as an
example for the uffd async wp test and PAGEMAP_IOCTL. It shows the
interface usages as well.
[1] https://lore.kernel.org/lkml/54d4c322-cd6e-eefd-b161-2af2b56aae24@collabora…
[2] https://lore.kernel.org/all/20221220162606.1595355-1-usama.anjum@collabora.…
[3] https://lore.kernel.org/all/20221122115007.2787017-1-usama.anjum@collabora.…
[4] https://lore.kernel.org/all/Y6Hc2d+7eTKs7AiH@x1n
[5] https://lore.kernel.org/all/YyiDg79flhWoMDZB@gmail.com/
[6] https://lore.kernel.org/all/20221014134802.1361436-1-mdanylo@google.com/
Regards,
Muhammad Usama Anjum
Muhammad Usama Anjum (4):
fs/proc/task_mmu: Implement IOCTL to get and optionally clear info
about PTEs
tools headers UAPI: Update linux/fs.h with the kernel sources
mm/pagemap: add documentation of PAGEMAP_SCAN IOCTL
selftests: mm: add pagemap ioctl tests
Peter Xu (1):
userfaultfd: UFFD_FEATURE_WP_ASYNC
Documentation/admin-guide/mm/pagemap.rst | 64 +
Documentation/admin-guide/mm/userfaultfd.rst | 35 +
fs/proc/task_mmu.c | 653 ++++++++
fs/userfaultfd.c | 26 +-
include/linux/hugetlb.h | 1 +
include/linux/userfaultfd_k.h | 21 +-
include/uapi/linux/fs.h | 58 +
include/uapi/linux/userfaultfd.h | 9 +-
mm/hugetlb.c | 34 +-
mm/memory.c | 27 +-
tools/include/uapi/linux/fs.h | 58 +
tools/testing/selftests/mm/.gitignore | 2 +
tools/testing/selftests/mm/Makefile | 3 +-
tools/testing/selftests/mm/config | 1 +
tools/testing/selftests/mm/pagemap_ioctl.c | 1485 ++++++++++++++++++
tools/testing/selftests/mm/run_vmtests.sh | 4 +
16 files changed, 2457 insertions(+), 24 deletions(-)
create mode 100644 tools/testing/selftests/mm/pagemap_ioctl.c
--
2.39.2
As reported and suggested by Willy, the inline __sysret() helper
introduces three types of conversions and increases the size:
(1) the "unsigned long" argument to __sysret() forces a sign extension
from all sys_* functions that used to return 'int'
(2) the comparison with the error range now has to be performed on a
'unsigned long' instead of an 'int'
(3) the return value from __sysret() is a 'long' (note, a signed long)
which then has to be turned back to an 'int' before being returned by the
caller to satisfy the caller's prototype.
To fix up this, firstly, let's use macro instead of inline function to
preserves the input type and avoids these useless conversions (1), (3).
Secondly, comparison to -MAX_ERRNO inflicts on all integer returns where
we could previously keep a simple sign comparison, let's use a new
is_signed_type() macro from include/linux/compiler.h to limit the
comparision to -MAX_ERRNO (2) only on demand and preserves a simple sign
comparision for most of the cases as before.
Thirdly, fix up the following warning by an explicit conversion and let
__sysret() be able to accept the (void *) type of argument:
sysroot/powerpc/include/sys.h: In function 'sbrk':
sysroot/powerpc/include/sys.h:104:16: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
104 | return (void *)__sysret(-ENOMEM);
Fourthly, to further workaround the argument type with 'const', must use
__auto_type in a new enough version or use 'long' as before.
Here reports the size testing result with nolibc-test:
before:
// ppc64le
$ size nolibc-test
text data bss dec hex filename
27916 8 80 28004 6d64 nolibc-test
// mips
$ size nolibc-test
text data bss dec hex filename
23276 64 64 23404 5b6c nolibc-test
after:
// ppc64le
$ size nolibc-test
text data bss dec hex filename
27736 8 80 27824 6cb0 nolibc-test
// mips
$ size nolibc-test
text data bss dec hex filename
23036 64 64 23164 5a7c nolibc-test
Suggested-by: Willy Tarreau <w(a)1wt.eu>
Link: https://lore.kernel.org/lkml/20230806095846.GB10627@1wt.eu/
Link: https://lore.kernel.org/lkml/20230806134348.GA19145@1wt.eu/
Signed-off-by: Zhangjin Wu <falcon(a)tinylab.org>
---
v2 here is further fix up argument with 'const' in the type and also
support "void *" argument, v1 is [1].
Tested on many architectures (i386, x86_64, mips, ppc64) and gcc version
(from gcc 4.8-13.1.0), compiles well without any warning and errors and
also with smaller size.
[1]: https://lore.kernel.org/lkml/20230806131921.52453-1-falcon@tinylab.org/
---
tools/include/nolibc/sys.h | 52 ++++++++++++++++++++++++++++++--------
1 file changed, 41 insertions(+), 11 deletions(-)
diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
index 56f63eb48a1b..9c7448ae19e2 100644
--- a/tools/include/nolibc/sys.h
+++ b/tools/include/nolibc/sys.h
@@ -35,15 +35,45 @@
* (src/internal/syscall_ret.c) and glibc (sysdeps/unix/sysv/linux/sysdep.h)
*/
-static __inline__ __attribute__((unused, always_inline))
-long __sysret(unsigned long ret)
-{
- if (ret >= (unsigned long)-MAX_ERRNO) {
- SET_ERRNO(-(long)ret);
- return -1;
- }
- return ret;
-}
+/*
+ * Whether 'type' is a signed type or an unsigned type. Supports scalar types,
+ * bool and also pointer types. (from include/linux/compiler.h)
+ */
+#define __is_signed_type(type) (((type)(-1)) < (type)1)
+
+/* __auto_type is used instead of __typeof__ to workaround the build error
+ * 'error: assignment of read-only variable' when the argument has 'const' in
+ * the type, but __auto_type is a new feature from newer version and it only
+ * work with 'const' from gcc 11.0 (__GXX_ABI_VERSION = 1016)
+ * https://gcc.gnu.org/legacy-ml/gcc-patches/2013-11/msg01378.html
+ */
+
+#if __GXX_ABI_VERSION < 1016
+#define __typeofdecl(arg) long
+#define __typeofconv1(arg) (long)
+#define __typeofconv2(arg) (long)
+#else
+#define __typeofdecl(arg) __auto_type
+#define __typeofconv1(arg)
+#define __typeofconv2(arg) (__typeof__(arg))
+#endif
+
+#define __sysret(arg) \
+({ \
+ __typeofdecl(arg) __sysret_arg = __typeofconv1(arg)(arg); \
+ if (__is_signed_type(__typeof__(arg))) { \
+ if (__sysret_arg < 0) { \
+ SET_ERRNO(-(long)__sysret_arg); \
+ __sysret_arg = __typeofconv2(arg)(-1L); \
+ } \
+ } else { \
+ if ((unsigned long)__sysret_arg >= (unsigned long)-MAX_ERRNO) { \
+ SET_ERRNO(-(long)__sysret_arg); \
+ __sysret_arg = __typeofconv2(arg)(-1L); \
+ } \
+ } \
+ (__typeof__(arg))__sysret_arg; \
+})
/* Functions in this file only describe syscalls. They're declared static so
* that the compiler usually decides to inline them while still being allowed
@@ -94,7 +124,7 @@ void *sbrk(intptr_t inc)
if (ret && sys_brk(ret + inc) == ret + inc)
return ret + inc;
- return (void *)__sysret(-ENOMEM);
+ return __sysret((void *)-ENOMEM);
}
@@ -682,7 +712,7 @@ void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd,
static __attribute__((unused))
void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset)
{
- return (void *)__sysret((unsigned long)sys_mmap(addr, length, prot, flags, fd, offset));
+ return __sysret(sys_mmap(addr, length, prot, flags, fd, offset));
}
static __attribute__((unused))
--
2.25.1
As reported and suggested by Willy, the inline __sysret() helper
introduces three types of conversions and increases the size:
(1) the "unsigned long" argument to __sysret() forces a sign extension
from all sys_* functions that used to return 'int'
(2) the comparison with the error range now has to be performed on a
'unsigned long' instead of an 'int'
(3) the return value from __sysret() is a 'long' (note, a signed long)
which then has to be turned back to an 'int' before being returned by the
caller to satisfy the caller's prototype.
To fix up this, firstly, let's use macro instead of inline function to
preserves the input type and avoids these useless conversions (1), (3).
Secondly, comparison to -MAX_ERRNO inflicts on all integer returns where
we could previously keep a simple sign comparison, let's use a new
is_signed_type() macro from include/linux/compiler.h to limit the
comparision to -MAX_ERRNO (2) only on demand and preserves a simple sign
comparision for most of the cases as before.
Thirdly, fix up the following warning by an explicit conversion:
sysroot/powerpc/include/sys.h: In function 'sbrk':
sysroot/powerpc/include/sys.h:104:16: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
104 | return (void *)__sysret(-ENOMEM);
Here reports the size testing result with nolibc-test:
before:
// ppc64le
$ size nolibc-test
text data bss dec hex filename
27916 8 80 28004 6d64 nolibc-test
// mips
$ size nolibc-test
text data bss dec hex filename
23276 64 64 23404 5b6c nolibc-test
after:
// ppc64le
$ size nolibc-test
text data bss dec hex filename
27736 8 80 27824 6cb0 nolibc-test
// mips
$ size nolibc-test
text data bss dec hex filename
23036 64 64 23164 5a7c nolibc-test
Suggested-by: Willy Tarreau <w(a)1wt.eu>
Link: https://lore.kernel.org/lkml/20230806095846.GB10627@1wt.eu/#R
Signed-off-by: Zhangjin Wu <falcon(a)tinylab.org>
---
tools/include/nolibc/compiler.h | 9 +++++++++
tools/include/nolibc/sys.h | 27 +++++++++++++++++----------
2 files changed, 26 insertions(+), 10 deletions(-)
diff --git a/tools/include/nolibc/compiler.h b/tools/include/nolibc/compiler.h
index beddc3665d69..360dfc533814 100644
--- a/tools/include/nolibc/compiler.h
+++ b/tools/include/nolibc/compiler.h
@@ -22,4 +22,13 @@
# define __no_stack_protector __attribute__((__optimize__("-fno-stack-protector")))
#endif /* defined(__has_attribute) */
+/*
+ * from include/linux/compiler.h
+ *
+ * Whether 'type' is a signed type or an unsigned type. Supports scalar types,
+ * bool and also pointer types.
+ */
+#define is_signed_type(type) (((type)(-1)) < (type)1)
+#define is_unsigned_type(type) (!is_signed_type(type))
+
#endif /* _NOLIBC_COMPILER_H */
diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
index 56f63eb48a1b..8271302f79c4 100644
--- a/tools/include/nolibc/sys.h
+++ b/tools/include/nolibc/sys.h
@@ -35,15 +35,22 @@
* (src/internal/syscall_ret.c) and glibc (sysdeps/unix/sysv/linux/sysdep.h)
*/
-static __inline__ __attribute__((unused, always_inline))
-long __sysret(unsigned long ret)
-{
- if (ret >= (unsigned long)-MAX_ERRNO) {
- SET_ERRNO(-(long)ret);
- return -1;
- }
- return ret;
-}
+#define __sysret(arg) \
+({ \
+ __typeof__(arg) __sysret_arg = (arg); \
+ if (is_signed_type(__typeof__(arg))) { \
+ if (__sysret_arg < 0) { \
+ SET_ERRNO(-(int)__sysret_arg); \
+ __sysret_arg = -1L; \
+ } \
+ } else { \
+ if ((unsigned long)__sysret_arg >= (unsigned long)-MAX_ERRNO) { \
+ SET_ERRNO(-(int)__sysret_arg); \
+ __sysret_arg = -1L; \
+ } \
+ } \
+ __sysret_arg; \
+})
/* Functions in this file only describe syscalls. They're declared static so
* that the compiler usually decides to inline them while still being allowed
@@ -94,7 +101,7 @@ void *sbrk(intptr_t inc)
if (ret && sys_brk(ret + inc) == ret + inc)
return ret + inc;
- return (void *)__sysret(-ENOMEM);
+ return (void *)__sysret((unsigned long)-ENOMEM);
}
--
2.25.1
Hi, Willy
Now, the dependent pmac32_defconfig patch has been merged into the
powerpc next-test branch [1] ;-)
v6 here with a clean up of the CFLAGS for ppc variants, removed the
redundant -Wl options and call cc-option to check the -mmultiple option
for llvm as kernel does. v5 is [2].
Tests run with local toolchains and latest toolchains.
$ for arch in ppc ppc64 ppc64le; do \
make run-user XARCH=$arch | grep "status: "; \
done
166 test(s): 158 passed, 8 skipped, 0 failed => status: warning
166 test(s): 158 passed, 8 skipped, 0 failed => status: warning
166 test(s): 158 passed, 8 skipped, 0 failed => status: warning
$ for arch in ppc ppc64 ppc64le; do \
make run-user XARCH=$arch CC=/labs/linux-lab/prebuilt/toolchains/ppc64/gcc-13.1.0-nolibc/powerpc64-linux/bin/powerpc64-linux-gcc | grep "status: "; \
done
166 test(s): 158 passed, 8 skipped, 0 failed => status: warning
166 test(s): 158 passed, 8 skipped, 0 failed => status: warning
166 test(s): 158 passed, 8 skipped, 0 failed => status: warning
Changes from v5 --> v6:
* selftests/nolibc: add test support for ppc
selftests/nolibc: add test support for ppc64le
selftests/nolibc: add test support for ppc64
Removed the -Wl options.
As comment from arch/powerpc/Makefile, use -mmultiple with cc-option for llvm has no such options.
* tools/nolibc: add support for powerpc
tools/nolibc: add support for powerpc64
selftests/nolibc: add XARCH and ARCH mapping support
selftests/nolibc: allow customize CROSS_COMPILE by architecture
selftests/nolibc: customize CROSS_COMPILE for 32/64-bit powerpc
No changes.
BR,
Zhangjin Wu
---
[1]: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git/commit/?h…
[2]: https://lore.kernel.org/lkml/cover.1691062722.git.falcon@tinylab.org/
Zhangjin Wu (8):
tools/nolibc: add support for powerpc
tools/nolibc: add support for powerpc64
selftests/nolibc: add XARCH and ARCH mapping support
selftests/nolibc: add test support for ppc
selftests/nolibc: add test support for ppc64le
selftests/nolibc: add test support for ppc64
selftests/nolibc: allow customize CROSS_COMPILE by architecture
selftests/nolibc: customize CROSS_COMPILE for 32/64-bit powerpc
tools/include/nolibc/arch-powerpc.h | 213 ++++++++++++++++++++++++
tools/include/nolibc/arch.h | 2 +
tools/testing/selftests/nolibc/Makefile | 74 ++++++--
3 files changed, 277 insertions(+), 12 deletions(-)
create mode 100644 tools/include/nolibc/arch-powerpc.h
--
2.25.1
Hi, Willy
Based on the CROSS_COMPILE customize support [1] from the last ppc
patchset, to further make run-user/run targets happy for all of the
nolibc supported architectures, let's customize CROSS_COMPILE for all of
them.
Beside loongarch, all of the other architectures have local toolchains.
let's use the one from [2] for loongarch, it has a different prefix.
And also, as suggested by you in our previous discuss, let's add some
notes for the toolchains and firmwares instead of automatically download
them.
Now, the test iteration becomes very simple and pretty:
$ ARCHS="i386 x86_64 arm64 arm mips ppc ppc64 ppc64le riscv s390"
$ for arch in ${ARCHS[@]}; do printf "%9s: " $arch; make run-user XARCH=$arch | grep status; done
i386: 165 test(s): 157 passed, 8 skipped, 0 failed => status: warning
x86_64: 165 test(s): 157 passed, 8 skipped, 0 failed => status: warning
arm64: 165 test(s): 157 passed, 8 skipped, 0 failed => status: warning
arm: 165 test(s): 156 passed, 9 skipped, 0 failed => status: warning
mips: 165 test(s): 156 passed, 9 skipped, 0 failed => status: warning
ppc: 165 test(s): 157 passed, 8 skipped, 0 failed => status: warning
ppc64: 165 test(s): 157 passed, 8 skipped, 0 failed => status: warning
ppc64le: 165 test(s): 157 passed, 8 skipped, 0 failed => status: warning
riscv: 165 test(s): 156 passed, 9 skipped, 0 failed => status: warning
s390: 165 test(s): 156 passed, 9 skipped, 0 failed => status: warning
(I have no qemu-user currently for loongarch, so, no test result above)
Best regards,
Zhangjin
---
[1] https://lore.kernel.org/lkml/cover.1691259983.git.falcon@tinylab.org/
[2] https://mirrors.edge.kernel.org/pub/tools/crosstool/
Zhangjin Wu (4):
selftests/nolibc: allow use x86_64 toolchain for i386
selftests/nolibc: customize CROSS_COMPILE for many architectures
selftests/nolibc: customize CROSS_COMPILE for loongarch
selftests/nolibc: add some notes about qemu tools
tools/testing/selftests/nolibc/Makefile | 32 ++++++++++++++++++++++++-
1 file changed, 31 insertions(+), 1 deletion(-)
--
2.25.1
To help the developers to avoid mistakes and keep the code smaller let's
enable compiler warnings.
I stuck with __attribute__((unused)) over __maybe_unused in
nolibc-test.c for consistency with nolibc proper.
If we want to add a define it needs to be added twice once for nolibc
proper and once for nolibc-test otherwise libc-test wouldn't build
anymore.
Signed-off-by: Thomas Weißschuh <linux(a)weissschuh.net>
---
Changes in v3:
- Make getpagesize() return "int"
- Simplify validation of read() return value
- Don't make functions static that are to be used as breakpoints
- Drop -s from LDFLAGS
- Use proper types for read()/write() return values
- Fix unused parameter warnings in new setvbuf()
- Link to v2: https://lore.kernel.org/r/20230801-nolibc-warnings-v2-0-1ba5ca57bd9b@weisss…
Changes in v2:
- Don't drop unused test helpers, mark them as __attribute__((unused))
- Make some function in nolibc-test static
- Also handle -W and -Wextra
- Link to v1: https://lore.kernel.org/r/20230731-nolibc-warnings-v1-0-74973d2a52d7@weisss…
---
Thomas Weißschuh (14):
tools/nolibc: drop unused variables
tools/nolibc: fix return type of getpagesize()
tools/nolibc: setvbuf: avoid unused parameter warnings
tools/nolibc: sys: avoid implicit sign cast
tools/nolibc: stdint: use int for size_t on 32bit
selftests/nolibc: drop unused variables
selftests/nolibc: mark test helpers as potentially unused
selftests/nolibc: make functions static if possible
selftests/nolibc: avoid unused parameter warnings
selftests/nolibc: avoid sign-compare warnings
selftests/nolibc: use correct return type for read() and write()
selftests/nolibc: prevent out of bounds access in expect_vfprintf
selftests/nolibc: don't strip nolibc-test
selftests/nolibc: enable compiler warnings
tools/include/nolibc/stdint.h | 4 +
tools/include/nolibc/stdio.h | 5 +-
tools/include/nolibc/sys.h | 7 +-
tools/testing/selftests/nolibc/Makefile | 4 +-
tools/testing/selftests/nolibc/nolibc-test.c | 111 ++++++++++++++++-----------
5 files changed, 80 insertions(+), 51 deletions(-)
---
base-commit: bc87f9562af7b2b4cb07dcaceccfafcf05edaff8
change-id: 20230731-nolibc-warnings-c6e47284ac03
Best regards,
--
Thomas Weißschuh <linux(a)weissschuh.net>
Hi,
This is the v2 to fix cpu buffers unavailable problem after some
operations on file 'tracing_cpumask' and 'snapshot', also upload
its testcase. Changes show as below.
v2:
- Fix compile issue reported-by kernel test robot <lkp(a)intel.com> with
suggestion from Steve:
- Link: https://lore.kernel.org/all/202308050731.PQutr3r0-lkp@intel.com/
- Link: https://lore.kernel.org/all/20230804125107.41d6cdb1@gandalf.local.home/
- Add a step to set tracing_on in testcase (see patch 2) and add
descriptions of each step.
v1:
- Link: https://lore.kernel.org/all/20230804124549.2562977-1-zhengyejian1@huawei.co…
Zheng Yejian (2):
tracing: Fix cpu buffers unavailable due to 'record_disabled' messed
selftests/ftrace: Add a basic testcase for snapshot
kernel/trace/trace.c | 6 ++++
.../ftrace/test.d/00basic/snapshot1.tc | 31 +++++++++++++++++++
2 files changed, 37 insertions(+)
create mode 100644 tools/testing/selftests/ftrace/test.d/00basic/snapshot1.tc
--
2.25.1
Hi, steve,
after some operations on file 'tracing_cpumask' and 'snapshot', trace
ring buffer can no longer record anything. This series contain a patch
to fix it and a constrived testcase to reproduce it.
I think the reproduction testcase is useful to help others to check if
their version has this problem and verify the bugfix. However, currently
in "tools/testing/selftests/ftrace/test.d", there seems no appropriate
subdirectory to put this kind reproductions.
So I now put the testcase in "00basic" because it is basicly simple. Or
would there be a new directory to collect simple reproduction testcases?
Zheng Yejian (2):
tracing: Fix cpu buffers unavailable due to 'record_disabled' messed
selftests/ftrace: Add a basic testcase for snapshot
kernel/trace/trace.c | 2 ++
.../ftrace/test.d/00basic/snapshot1.tc | 17 +++++++++++++++++
2 files changed, 19 insertions(+)
create mode 100644 tools/testing/selftests/ftrace/test.d/00basic/snapshot1.tc
--
2.25.1
Here is a new batch of fixes related to MPTCP for v6.5 and older.
Patches 1 and 2 fix issues with MPTCP Join selftest when manually
launched with '-i' parameter to use 'ip mptcp' tool instead of the
dedicated one (pm_nl_ctl). The issues have been there since v5.18.
Thank you Andrea for your first contributions to MPTCP code in the
upstream kernel!
Patch 3 avoids corrupting the data stream when trying to reset
connections that have fallen back to TCP. This can happen from v6.1.
Patch 4 fixes a race when doing a disconnect() and an accept() in
parallel on a listener socket. The issue only happens in rare cases if
the user is really unlucky since a fix that landed in v6.3 but
backported up to v6.1.
Signed-off-by: Matthieu Baerts <matthieu.baerts(a)tessares.net>
---
Andrea Claudi (2):
selftests: mptcp: join: fix 'delete and re-add' test
selftests: mptcp: join: fix 'implicit EP' test
Paolo Abeni (2):
mptcp: avoid bogus reset on fallback close
mptcp: fix disconnect vs accept race
net/mptcp/protocol.c | 2 +-
net/mptcp/protocol.h | 1 -
net/mptcp/subflow.c | 60 ++++++++++++-------------
tools/testing/selftests/net/mptcp/mptcp_join.sh | 6 ++-
4 files changed, 35 insertions(+), 34 deletions(-)
---
base-commit: 0f71c9caf26726efea674646f566984e735cc3b9
change-id: 20230803-upstream-net-20230803-misc-fixes-6-5-6046c6ca74b6
Best regards,
--
Matthieu Baerts <matthieu.baerts(a)tessares.net>
Submit the top-level headers also from the kunit test module notifier
initialization callback, so external tools that are parsing dmesg for
kunit test output are able to tell how many test suites should be expected
and whether to continue parsing after complete output from the first test
suite is collected.
Extend kunit module notifier initialization callback with a processing
path for only listing the tests provided by a module if the kunit action
parameter is set to "list", so external tools can obtain a list of test
cases to be executed in advance and can make a better job on assigning
kernel messages interleaved with kunit output to specific tests.
Use test filtering functions in kunit module notifier callback functions,
so external tools are able to execute individual test cases from kunit
test modules in order to still better isolate their potential impact on
kernel messages that appear interleaved with output from other tests.
v4: Use kunit_exec_run_tests() (Mauro, Rae), but prevent it from
emitting the headers when called on load of non-test modules,
- don't use a different list format, use kunit_exec_list_tests() (Rae),
- refresh on top of newly introduced attributes patches, handle newly
introduced kunit.action=list_attr case (Rae).
v3: Fix CONFIG_GLOB, required by filtering functions, not selected when
building as a module.
v2: Fix new name of a structure moved to kunit namespace not updated
across all uses.
Janusz Krzysztofik (3):
kunit: Report the count of test suites in a module
kunit: Make 'list' action available to kunit test modules
kunit: Allow kunit test modules to use test filtering
include/kunit/test.h | 21 ++++++++
lib/kunit/Kconfig | 2 +-
lib/kunit/executor.c | 115 +++++++++++++++++++++++++------------------
lib/kunit/test.c | 40 ++++++++++++++-
4 files changed, 128 insertions(+), 50 deletions(-)
base-commit: 5a175d369c702ce08c9feb630125c9fc7a9e1370
--
2.41.0
Commit 3bcbc20942db ("selftests/rseq: Play nice with binaries statically
linked against glibc 2.35+") which is now in Linus' tree introduced uses
of __weak but did nothing to ensure that a definition is provided for it
resulting in build failures for the rseq tests:
rseq.c:41:1: error: unknown type name '__weak'
__weak ptrdiff_t __rseq_offset;
^
rseq.c:41:17: error: expected ';' after top level declarator
__weak ptrdiff_t __rseq_offset;
^
;
rseq.c:42:1: error: unknown type name '__weak'
__weak unsigned int __rseq_size;
^
rseq.c:43:1: error: unknown type name '__weak'
__weak unsigned int __rseq_flags;
Fix this by using the definition from tools/include compiler.h.
Fixes: 3bcbc20942db ("selftests/rseq: Play nice with binaries statically linked against glibc 2.35+")
Signed-off-by: Mark Brown <broonie(a)kernel.org>
---
It'd be good if the KVM testing could include builds of the rseq
selftests, the KVM tests pull in code from rseq but not the build system
which has resulted in multiple failures like this.
---
tools/testing/selftests/rseq/Makefile | 4 +++-
tools/testing/selftests/rseq/rseq.c | 2 ++
2 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile
index b357ba24af06..7a957c7d459a 100644
--- a/tools/testing/selftests/rseq/Makefile
+++ b/tools/testing/selftests/rseq/Makefile
@@ -4,8 +4,10 @@ ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),)
CLANG_FLAGS += -no-integrated-as
endif
+top_srcdir = ../../../..
+
CFLAGS += -O2 -Wall -g -I./ $(KHDR_INCLUDES) -L$(OUTPUT) -Wl,-rpath=./ \
- $(CLANG_FLAGS)
+ $(CLANG_FLAGS) -I$(top_srcdir)/tools/include
LDLIBS += -lpthread -ldl
# Own dependencies because we only want to build against 1st prerequisite, but
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
index a723da253244..96e812bdf8a4 100644
--- a/tools/testing/selftests/rseq/rseq.c
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -31,6 +31,8 @@
#include <sys/auxv.h>
#include <linux/auxvec.h>
+#include <linux/compiler.h>
+
#include "../kselftest.h"
#include "rseq.h"
---
base-commit: 5d0c230f1de8c7515b6567d9afba1f196fb4e2f4
change-id: 20230804-kselftest-rseq-build-9d537942b1de
Best regards,
--
Mark Brown <broonie(a)kernel.org>
test_kmem_basic creates 100,000 negative dentries, with each one mapping
to a slab object. After memory.high is set, these are reclaimed through
the shrink_slab function call which reclaims all 100,000 entries. The
test passes the majority of the time because when slab1 or current is
calculated, it is often above 0, however, 0 is also an acceptable value.
Signed-off-by: Lucas Karpinski <lkarpins(a)redhat.com>
---
In the previous patch, I missed a change to the variable 'current' even
after some testing as the issue was so sporadic. Current takes the slab
size into account and can also face the same issue where it fails since
the reported value is 0, which is an acceptable value.
Drop: b4abfc19 in mm-unstable
V2: https://lore.kernel.org/all/ix6vzgjqay2x7bskle7pypoint4nj66fwq7odvd5hektatv…
tools/testing/selftests/cgroup/test_kmem.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c
index 1b2cec9d18a4..ed2e50bb1e76 100644
--- a/tools/testing/selftests/cgroup/test_kmem.c
+++ b/tools/testing/selftests/cgroup/test_kmem.c
@@ -75,11 +75,11 @@ static int test_kmem_basic(const char *root)
sleep(1);
slab1 = cg_read_key_long(cg, "memory.stat", "slab ");
- if (slab1 <= 0)
+ if (slab1 < 0)
goto cleanup;
current = cg_read_long(cg, "memory.current");
- if (current <= 0)
+ if (current < 0)
goto cleanup;
if (slab1 < slab0 / 2 && current < slab0 / 2)
--
2.41.0
Hi, Willy
Here is last 3 patches for v6.6 from me.
It includes two generic patches from the tinyconfig part1 series and one
static related patch derived from Thomas' series.
Best regards,
Zhangjin
Zhangjin Wu (3):
selftests/nolibc: allow report with existing test log
selftests/nolibc: fix up O= option support
tools/nolibc: stackprotector.h: make __stack_chk_init static
tools/include/nolibc/crt.h | 2 +-
tools/include/nolibc/stackprotector.h | 5 ++---
tools/testing/selftests/nolibc/Makefile | 11 +++++++++--
3 files changed, 12 insertions(+), 6 deletions(-)
--
2.25.1
Will noticed that with newer toolchains memcpy() ends up being
implemented with SVE instructions, breaking the signals tests when in
streaming mode. We fixed this by using an open coded version of
OPTIMZER_HIDE_VAR(), but in the process it was noticed that some of the
selftests are using the tools/include headers and it might be nice to
share things there. We also have a custom compiler.h in the BTI tests.
Update the tools/include headers to have what we need, pull them into
the arm64 selftests build and make use of them in the signals and BTI
tests. Since the resulting changes are a bit invasive for a fix we keep
an initial patch using the open coding, updating and replacing that
later.
Signed-off-by: Mark Brown <broonie(a)kernel.org>
---
Changes in v4:
- Roll in a refactoring to include and use the tools/include headers.
- Link to v3: https://lore.kernel.org/r/20230720-arm64-signal-memcpy-fix-v3-1-08aed2385d6…
Changes in v3:
- Open code OPTIMISER_HIDE_VAR() instead of the memory clobber.
- Link to v2: https://lore.kernel.org/r/20230712-arm64-signal-memcpy-fix-v2-1-494f7025caf…
Changes in v2:
- Rebase onto v6.5-rc1.
- Link to v1: https://lore.kernel.org/r/20230628-arm64-signal-memcpy-fix-v1-1-db3e0300829…
---
Mark Brown (6):
kselftest/arm64: Exit streaming mode after collecting signal context
tools compiler.h: Add OPTIMIZER_HIDE_VAR()
tools include: Add some common function attributes
kselftest/arm64: Make the tools/include headers available
kselftest/arm64: Use shared OPTIMZER_HIDE_VAR() definiton
kselftest/arm64: Use the tools/include compiler.h rather than our own
tools/include/linux/compiler.h | 18 +++++++++++++++
tools/testing/selftests/arm64/Makefile | 2 ++
tools/testing/selftests/arm64/bti/compiler.h | 21 -----------------
tools/testing/selftests/arm64/bti/system.c | 4 +---
tools/testing/selftests/arm64/bti/system.h | 4 ++--
tools/testing/selftests/arm64/bti/test.c | 1 -
.../selftests/arm64/signal/test_signals_utils.h | 27 +++++++++++++++++++++-
7 files changed, 49 insertions(+), 28 deletions(-)
---
base-commit: 6eaae198076080886b9e7d57f4ae06fa782f90ef
change-id: 20230628-arm64-signal-memcpy-fix-7de3b3c8fa10
Best regards,
--
Mark Brown <broonie(a)kernel.org>
[ Upstream commit 4acfe3dfde685a5a9eaec5555351918e2d7266a1 ]
Dan Carpenter spotted a race condition in a couple of situations like
these in the test_firmware driver:
static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
{
u8 val;
int ret;
ret = kstrtou8(buf, 10, &val);
if (ret)
return ret;
mutex_lock(&test_fw_mutex);
*(u8 *)cfg = val;
mutex_unlock(&test_fw_mutex);
/* Always return full write size even if we didn't consume all */
return size;
}
static ssize_t config_num_requests_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
int rc;
mutex_lock(&test_fw_mutex);
if (test_fw_config->reqs) {
pr_err("Must call release_all_firmware prior to changing config\n");
rc = -EINVAL;
mutex_unlock(&test_fw_mutex);
goto out;
}
mutex_unlock(&test_fw_mutex);
// NOTE: HERE is the race!!! Function can be preempted!
// test_fw_config->reqs can change between the release of
// the lock about and acquire of the lock in the
// test_dev_config_update_u8()
rc = test_dev_config_update_u8(buf, count,
&test_fw_config->num_requests);
out:
return rc;
}
static ssize_t config_read_fw_idx_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
return test_dev_config_update_u8(buf, count,
&test_fw_config->read_fw_idx);
}
The function test_dev_config_update_u8() is called from both the locked
and the unlocked context, function config_num_requests_store() and
config_read_fw_idx_store() which can both be called asynchronously as
they are driver's methods, while test_dev_config_update_u8() and siblings
change their argument pointed to by u8 *cfg or similar pointer.
To avoid deadlock on test_fw_mutex, the lock is dropped before calling
test_dev_config_update_u8() and re-acquired within test_dev_config_update_u8()
itself, but alas this creates a race condition.
Having two locks wouldn't assure a race-proof mutual exclusion.
This situation is best avoided by the introduction of a new, unlocked
function __test_dev_config_update_u8() which can be called from the locked
context and reducing test_dev_config_update_u8() to:
static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
{
int ret;
mutex_lock(&test_fw_mutex);
ret = __test_dev_config_update_u8(buf, size, cfg);
mutex_unlock(&test_fw_mutex);
return ret;
}
doing the locking and calling the unlocked primitive, which enables both
locked and unlocked versions without duplication of code.
Fixes: c92316bf8e948 ("test_firmware: add batched firmware tests")
Cc: Luis R. Rodriguez <mcgrof(a)kernel.org>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Russ Weight <russell.h.weight(a)intel.com>
Cc: Takashi Iwai <tiwai(a)suse.de>
Cc: Tianfei Zhang <tianfei.zhang(a)intel.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: Colin Ian King <colin.i.king(a)gmail.com>
Cc: Randy Dunlap <rdunlap(a)infradead.org>
Cc: linux-kselftest(a)vger.kernel.org
Cc: stable(a)vger.kernel.org # v5.4, 4.19, 4.14
Suggested-by: Dan Carpenter <error27(a)gmail.com>
Link: https://lore.kernel.org/r/20230509084746.48259-1-mirsad.todorovac@alu.unizg…
Signed-off-by: Mirsad Todorovac <mirsad.todorovac(a)alu.unizg.hr>
[ This is the patch to fix the racing condition in locking for the 5.4, ]
[ 4.19 and 4.14 stable branches. Not all the fixes from the upstream ]
[ commit apply, but those which do are verbatim equal to those in the ]
[ upstream commit. ]
---
v3:
minor bug fixes in the commit description. no change to the code.
5.4, 4.19 and 4.14 passed build, 5.4 and 4.19 passed kselftest.
unable to boot 4.14, should work (no changes to lib/test_firmware.c).
v2:
bundled locking and ENOSPC patches together.
tested on 5.4 and 4.19 stable.
lib/test_firmware.c | 37 ++++++++++++++++++++++++++++---------
1 file changed, 28 insertions(+), 9 deletions(-)
diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index 38553944e967..92d7195d5b5b 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -301,16 +301,26 @@ static ssize_t config_test_show_str(char *dst,
return len;
}
-static int test_dev_config_update_bool(const char *buf, size_t size,
- bool *cfg)
+static inline int __test_dev_config_update_bool(const char *buf, size_t size,
+ bool *cfg)
{
int ret;
- mutex_lock(&test_fw_mutex);
if (strtobool(buf, cfg) < 0)
ret = -EINVAL;
else
ret = size;
+
+ return ret;
+}
+
+static int test_dev_config_update_bool(const char *buf, size_t size,
+ bool *cfg)
+{
+ int ret;
+
+ mutex_lock(&test_fw_mutex);
+ ret = __test_dev_config_update_bool(buf, size, cfg);
mutex_unlock(&test_fw_mutex);
return ret;
@@ -340,7 +350,7 @@ static ssize_t test_dev_config_show_int(char *buf, int cfg)
return snprintf(buf, PAGE_SIZE, "%d\n", val);
}
-static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
+static inline int __test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
{
int ret;
long new;
@@ -352,14 +362,23 @@ static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
if (new > U8_MAX)
return -EINVAL;
- mutex_lock(&test_fw_mutex);
*(u8 *)cfg = new;
- mutex_unlock(&test_fw_mutex);
/* Always return full write size even if we didn't consume all */
return size;
}
+static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
+{
+ int ret;
+
+ mutex_lock(&test_fw_mutex);
+ ret = __test_dev_config_update_u8(buf, size, cfg);
+ mutex_unlock(&test_fw_mutex);
+
+ return ret;
+}
+
static ssize_t test_dev_config_show_u8(char *buf, u8 cfg)
{
u8 val;
@@ -392,10 +411,10 @@ static ssize_t config_num_requests_store(struct device *dev,
mutex_unlock(&test_fw_mutex);
goto out;
}
- mutex_unlock(&test_fw_mutex);
- rc = test_dev_config_update_u8(buf, count,
- &test_fw_config->num_requests);
+ rc = __test_dev_config_update_u8(buf, count,
+ &test_fw_config->num_requests);
+ mutex_unlock(&test_fw_mutex);
out:
return rc;
--
2.34.1
[ Upstream commit 4acfe3dfde685a5a9eaec5555351918e2d7266a1 ]
Dan Carpenter spotted a race condition in a couple of situations like
these in the test_firmware driver:
static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
{
u8 val;
int ret;
ret = kstrtou8(buf, 10, &val);
if (ret)
return ret;
mutex_lock(&test_fw_mutex);
*(u8 *)cfg = val;
mutex_unlock(&test_fw_mutex);
/* Always return full write size even if we didn't consume all */
return size;
}
static ssize_t config_num_requests_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
int rc;
mutex_lock(&test_fw_mutex);
if (test_fw_config->reqs) {
pr_err("Must call release_all_firmware prior to changing config\n");
rc = -EINVAL;
mutex_unlock(&test_fw_mutex);
goto out;
}
mutex_unlock(&test_fw_mutex);
// NOTE: HERE is the race!!! Function can be preempted!
// test_fw_config->reqs can change between the release of
// the lock about and acquire of the lock in the
// test_dev_config_update_u8()
rc = test_dev_config_update_u8(buf, count,
&test_fw_config->num_requests);
out:
return rc;
}
static ssize_t config_read_fw_idx_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
return test_dev_config_update_u8(buf, count,
&test_fw_config->read_fw_idx);
}
The function test_dev_config_update_u8() is called from both the locked
and the unlocked context, function config_num_requests_store() and
config_read_fw_idx_store() which can both be called asynchronously as
they are driver's methods, while test_dev_config_update_u8() and siblings
change their argument pointed to by u8 *cfg or similar pointer.
To avoid deadlock on test_fw_mutex, the lock is dropped before calling
test_dev_config_update_u8() and re-acquired within test_dev_config_update_u8()
itself, but alas this creates a race condition.
Having two locks wouldn't assure a race-proof mutual exclusion.
This situation is best avoided by the introduction of a new, unlocked
function __test_dev_config_update_u8() which can be called from the locked
context and reducing test_dev_config_update_u8() to:
static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
{
int ret;
mutex_lock(&test_fw_mutex);
ret = __test_dev_config_update_u8(buf, size, cfg);
mutex_unlock(&test_fw_mutex);
return ret;
}
doing the locking and calling the unlocked primitive, which enables both
locked and unlocked versions without duplication of code.
Fixes: c92316bf8e948 ("test_firmware: add batched firmware tests")
Cc: Luis R. Rodriguez <mcgrof(a)kernel.org>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Russ Weight <russell.h.weight(a)intel.com>
Cc: Takashi Iwai <tiwai(a)suse.de>
Cc: Tianfei Zhang <tianfei.zhang(a)intel.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: Colin Ian King <colin.i.king(a)gmail.com>
Cc: Randy Dunlap <rdunlap(a)infradead.org>
Cc: linux-kselftest(a)vger.kernel.org
Cc: stable(a)vger.kernel.org # v5.4, 4.19, 4.14
Suggested-by: Dan Carpenter <error27(a)gmail.com>
Link: https://lore.kernel.org/r/20230509084746.48259-1-mirsad.todorovac@alu.unizg…
Signed-off-by: Mirsad Todorovac <mirsad.todorovac(a)alu.unizg.hr>
[ This is the patch to fix the racing condition in locking for the 5.4, ]
[ 4.19 and 4.14 stable branches. Not all the fixes from the upstream ]
[ commit apply, but those which do are verbatim equal to those in the ]
[ upstream commit. ]
---
v4:
minor versioning clarifications for the patchwork. no changes to the commit.
v3:
fixed a minor typo. no change to commit.
v2:
tested on 5.4 stable build.
lib/test_firmware.c | 37 ++++++++++++++++++++++++++++---------
1 file changed, 28 insertions(+), 9 deletions(-)
diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index 38553944e967..92d7195d5b5b 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -301,16 +301,26 @@ static ssize_t config_test_show_str(char *dst,
return len;
}
-static int test_dev_config_update_bool(const char *buf, size_t size,
- bool *cfg)
+static inline int __test_dev_config_update_bool(const char *buf, size_t size,
+ bool *cfg)
{
int ret;
- mutex_lock(&test_fw_mutex);
if (strtobool(buf, cfg) < 0)
ret = -EINVAL;
else
ret = size;
+
+ return ret;
+}
+
+static int test_dev_config_update_bool(const char *buf, size_t size,
+ bool *cfg)
+{
+ int ret;
+
+ mutex_lock(&test_fw_mutex);
+ ret = __test_dev_config_update_bool(buf, size, cfg);
mutex_unlock(&test_fw_mutex);
return ret;
@@ -340,7 +350,7 @@ static ssize_t test_dev_config_show_int(char *buf, int cfg)
return snprintf(buf, PAGE_SIZE, "%d\n", val);
}
-static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
+static inline int __test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
{
int ret;
long new;
@@ -352,14 +362,23 @@ static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
if (new > U8_MAX)
return -EINVAL;
- mutex_lock(&test_fw_mutex);
*(u8 *)cfg = new;
- mutex_unlock(&test_fw_mutex);
/* Always return full write size even if we didn't consume all */
return size;
}
+static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
+{
+ int ret;
+
+ mutex_lock(&test_fw_mutex);
+ ret = __test_dev_config_update_u8(buf, size, cfg);
+ mutex_unlock(&test_fw_mutex);
+
+ return ret;
+}
+
static ssize_t test_dev_config_show_u8(char *buf, u8 cfg)
{
u8 val;
@@ -392,10 +411,10 @@ static ssize_t config_num_requests_store(struct device *dev,
mutex_unlock(&test_fw_mutex);
goto out;
}
- mutex_unlock(&test_fw_mutex);
- rc = test_dev_config_update_u8(buf, count,
- &test_fw_config->num_requests);
+ rc = __test_dev_config_update_u8(buf, count,
+ &test_fw_config->num_requests);
+ mutex_unlock(&test_fw_mutex);
out:
return rc;
--
2.34.1
[ commit be37bed754ed90b2655382f93f9724b3c1aae847 upstream ]
Dan Carpenter spotted that test_fw_config->reqs will be leaked if
trigger_batched_requests_store() is called two or more times.
The same appears with trigger_batched_requests_async_store().
This bug wasn't triggered by the tests, but observed by Dan's visual
inspection of the code.
The recommended workaround was to return -EBUSY if test_fw_config->reqs
is already allocated.
Fixes: c92316bf8e94 ("test_firmware: add batched firmware tests")
Cc: Luis Chamberlain <mcgrof(a)kernel.org>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Russ Weight <russell.h.weight(a)intel.com>
Cc: Tianfei Zhang <tianfei.zhang(a)intel.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: Colin Ian King <colin.i.king(a)gmail.com>
Cc: Randy Dunlap <rdunlap(a)infradead.org>
Cc: linux-kselftest(a)vger.kernel.org
Cc: stable(a)vger.kernel.org # v4.19
Suggested-by: Dan Carpenter <error27(a)gmail.com>
Suggested-by: Takashi Iwai <tiwai(a)suse.de>
Link: https://lore.kernel.org/r/20230509084746.48259-2-mirsad.todorovac@alu.unizg…
Signed-off-by: Mirsad Todorovac <mirsad.todorovac(a)alu.unizg.hr>
[ This is a backport to v4.19 stable branch without a change in code from the 5.4+ patch ]
---
v2:
no changes to commit. minor clarifications with versioning for the patchwork.
v1:
patch sumbmitted verbatim from the 5.4+ branch to 4.19
lib/test_firmware.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index f4cc874021da..e4688821eab8 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -618,6 +618,11 @@ static ssize_t trigger_batched_requests_store(struct device *dev,
mutex_lock(&test_fw_mutex);
+ if (test_fw_config->reqs) {
+ rc = -EBUSY;
+ goto out_bail;
+ }
+
test_fw_config->reqs =
vzalloc(array3_size(sizeof(struct test_batched_req),
test_fw_config->num_requests, 2));
@@ -721,6 +726,11 @@ ssize_t trigger_batched_requests_async_store(struct device *dev,
mutex_lock(&test_fw_mutex);
+ if (test_fw_config->reqs) {
+ rc = -EBUSY;
+ goto out_bail;
+ }
+
test_fw_config->reqs =
vzalloc(array3_size(sizeof(struct test_batched_req),
test_fw_config->num_requests, 2));
--
2.34.1
The openvswitch selftests currently contain a few cases for managing the
datapath, which includes creating datapath instances, adding interfaces,
and doing some basic feature / upcall tests. This is useful to validate
the control path.
Add the ability to program some of the more common flows with actions. This
can be improved overtime to include regression testing, etc.
v2->v3:
1. Dropped support for ipv6 in nat() case
2. Fixed a spelling mistake in 2/5 commit message.
v1->v2:
1. Fix issue when parsing ipv6 in the NAT action
2. Fix issue calculating length during ctact parsing
3. Fix error message when invalid bridge is passed
4. Fold in Adrian's patch to support key masks
Aaron Conole (4):
selftests: openvswitch: add an initial flow programming case
selftests: openvswitch: add a test for ipv4 forwarding
selftests: openvswitch: add basic ct test case parsing
selftests: openvswitch: add ct-nat test case with ipv4
Adrian Moreno (1):
selftests: openvswitch: support key masks
.../selftests/net/openvswitch/openvswitch.sh | 223 +++++++
.../selftests/net/openvswitch/ovs-dpctl.py | 588 +++++++++++++++++-
2 files changed, 787 insertions(+), 24 deletions(-)
--
2.40.1
Submit the top-level headers also from the kunit test module notifier
initialization callback, so external tools that are parsing dmesg for
kunit test output are able to tell how many test suites should be expected
and whether to continue parsing after complete output from the first test
suite is collected.
Extend kunit module notifier initialization callback with a processing
path for only listing the tests provided by a module if the kunit action
parameter is set to "list", so external tools can obtain a list of test
cases to be executed in advance and can make a better job on assigning
kernel messages interleaved with kunit output to specific tests.
Use test filtering functions in kunit module notifier callback functions,
so external tools are able to execute individual test cases from kunit
test modules in order to still better isolate their potential impact on
kernel messages that appear interleaved with output from other tests.
v3: Fix CONFIG_GLOB, required by filtering fuctions, not selected when
building as a module.
v2: Fix new name of a structure moved to kunit namespace not updated
across all uses.
Janusz Krzysztofik (3):
kunit: Report the count of test suites in a module
kunit: Make 'list' action available to kunit test modules
kunit: Allow kunit test modules to use test filtering
include/kunit/test.h | 14 +++++++++++
lib/kunit/Kconfig | 2 +-
lib/kunit/executor.c | 57 +++++++++++++++++++++++++-------------------
lib/kunit/test.c | 57 +++++++++++++++++++++++++++++++++++++++++++-
4 files changed, 104 insertions(+), 26 deletions(-)
--
2.41.0
NOTE: This patch is tested against 5.4 stable
NOTE: This is a patch for the 5.4 stable branch, not for the torvalds tree.
The torvalds tree, and stable tree 5.10, 5.15, 6.1 and 6.4 branches
were fixed in the separate
commit ID 4acfe3dfde68 ("test_firmware: prevent race conditions by a correct implementation of locking")
which was incompatible with 5.4
Dan Carpenter spotted a race condition in a couple of situations like
these in the test_firmware driver:
static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
{
u8 val;
int ret;
ret = kstrtou8(buf, 10, &val);
if (ret)
return ret;
mutex_lock(&test_fw_mutex);
*(u8 *)cfg = val;
mutex_unlock(&test_fw_mutex);
/* Always return full write size even if we didn't consume all */
return size;
}
static ssize_t config_num_requests_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
int rc;
mutex_lock(&test_fw_mutex);
if (test_fw_config->reqs) {
pr_err("Must call release_all_firmware prior to changing config\n");
rc = -EINVAL;
mutex_unlock(&test_fw_mutex);
goto out;
}
mutex_unlock(&test_fw_mutex);
// NOTE: HERE is the race!!! Function can be preempted!
// test_fw_config->reqs can change between the release of
// the lock about and acquire of the lock in the
// test_dev_config_update_u8()
rc = test_dev_config_update_u8(buf, count,
&test_fw_config->num_requests);
out:
return rc;
}
static ssize_t config_read_fw_idx_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
return test_dev_config_update_u8(buf, count,
&test_fw_config->read_fw_idx);
}
The function test_dev_config_update_u8() is called from both the locked
and the unlocked context, function config_num_requests_store() and
config_read_fw_idx_store() which can both be called asynchronously as
they are driver's methods, while test_dev_config_update_u8() and siblings
change their argument pointed to by u8 *cfg or similar pointer.
To avoid deadlock on test_fw_mutex, the lock is dropped before calling
test_dev_config_update_u8() and re-acquired within test_dev_config_update_u8()
itself, but alas this creates a race condition.
Having two locks wouldn't assure a race-proof mutual exclusion.
This situation is best avoided by the introduction of a new, unlocked
function __test_dev_config_update_u8() which can be called from the locked
context and reducing test_dev_config_update_u8() to:
static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
{
int ret;
mutex_lock(&test_fw_mutex);
ret = __test_dev_config_update_u8(buf, size, cfg);
mutex_unlock(&test_fw_mutex);
return ret;
}
doing the locking and calling the unlocked primitive, which enables both
locked and unlocked versions without duplication of code.
Fixes: c92316bf8e948 ("test_firmware: add batched firmware tests")
Cc: Luis R. Rodriguez <mcgrof(a)kernel.org>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Russ Weight <russell.h.weight(a)intel.com>
Cc: Takashi Iwai <tiwai(a)suse.de>
Cc: Tianfei Zhang <tianfei.zhang(a)intel.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: Colin Ian King <colin.i.king(a)gmail.com>
Cc: Randy Dunlap <rdunlap(a)infradead.org>
Cc: linux-kselftest(a)vger.kernel.org
Cc: stable(a)vger.kernel.org # v5.4
Suggested-by: Dan Carpenter <error27(a)gmail.com>
Link: https://lore.kernel.org/r/20230509084746.48259-1-mirsad.todorovac@alu.unizg…
Signed-off-by: Mirsad Todorovac <mirsad.todorovac(a)alu.unizg.hr>
---
lib/test_firmware.c | 37 ++++++++++++++++++++++++++++---------
1 file changed, 28 insertions(+), 9 deletions(-)
diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index 38553944e967..92d7195d5b5b 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -301,16 +301,26 @@ static ssize_t config_test_show_str(char *dst,
return len;
}
-static int test_dev_config_update_bool(const char *buf, size_t size,
- bool *cfg)
+static inline int __test_dev_config_update_bool(const char *buf, size_t size,
+ bool *cfg)
{
int ret;
- mutex_lock(&test_fw_mutex);
if (strtobool(buf, cfg) < 0)
ret = -EINVAL;
else
ret = size;
+
+ return ret;
+}
+
+static int test_dev_config_update_bool(const char *buf, size_t size,
+ bool *cfg)
+{
+ int ret;
+
+ mutex_lock(&test_fw_mutex);
+ ret = __test_dev_config_update_bool(buf, size, cfg);
mutex_unlock(&test_fw_mutex);
return ret;
@@ -340,7 +350,7 @@ static ssize_t test_dev_config_show_int(char *buf, int cfg)
return snprintf(buf, PAGE_SIZE, "%d\n", val);
}
-static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
+static inline int __test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
{
int ret;
long new;
@@ -352,14 +362,23 @@ static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
if (new > U8_MAX)
return -EINVAL;
- mutex_lock(&test_fw_mutex);
*(u8 *)cfg = new;
- mutex_unlock(&test_fw_mutex);
/* Always return full write size even if we didn't consume all */
return size;
}
+static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
+{
+ int ret;
+
+ mutex_lock(&test_fw_mutex);
+ ret = __test_dev_config_update_u8(buf, size, cfg);
+ mutex_unlock(&test_fw_mutex);
+
+ return ret;
+}
+
static ssize_t test_dev_config_show_u8(char *buf, u8 cfg)
{
u8 val;
@@ -392,10 +411,10 @@ static ssize_t config_num_requests_store(struct device *dev,
mutex_unlock(&test_fw_mutex);
goto out;
}
- mutex_unlock(&test_fw_mutex);
- rc = test_dev_config_update_u8(buf, count,
- &test_fw_config->num_requests);
+ rc = __test_dev_config_update_u8(buf, count,
+ &test_fw_config->num_requests);
+ mutex_unlock(&test_fw_mutex);
out:
return rc;
--
2.34.1
test_kmem_basic creates 100,000 negative dentries, with each one mapping
to a slab object. After memory.high is set, these are reclaimed through
the shrink_slab function call which reclaims all 100,000 entries. The
test passes the majority of the time because when slab1 is calculated,
it is often above 0, however, 0 is also an acceptable value.
Signed-off-by: Lucas Karpinski <lkarpins(a)redhat.com>
---
v3: rebased on mm-unstable
tools/testing/selftests/cgroup/test_kmem.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c
index 1b2cec9d18a4..67cc0182058d 100644
--- a/tools/testing/selftests/cgroup/test_kmem.c
+++ b/tools/testing/selftests/cgroup/test_kmem.c
@@ -75,7 +75,7 @@ static int test_kmem_basic(const char *root)
sleep(1);
slab1 = cg_read_key_long(cg, "memory.stat", "slab ");
- if (slab1 <= 0)
+ if (slab1 < 0)
goto cleanup;
current = cg_read_long(cg, "memory.current");
--
2.41.0
As is described in the "How to use MPTCP?" section in MPTCP wiki [1]:
"Your app should create sockets with IPPROTO_MPTCP as the proto:
( socket(AF_INET, SOCK_STREAM, IPPROTO_MPTCP); ). Legacy apps can be
forced to create and use MPTCP sockets instead of TCP ones via the
mptcpize command bundled with the mptcpd daemon."
But the mptcpize (LD_PRELOAD technique) command has some limitations
[2]:
- it doesn't work if the application is not using libc (e.g. GoLang
apps)
- in some envs, it might not be easy to set env vars / change the way
apps are launched, e.g. on Android
- mptcpize needs to be launched with all apps that want MPTCP: we could
have more control from BPF to enable MPTCP only for some apps or all the
ones of a netns or a cgroup, etc.
- it is not in BPF, we cannot talk about it at netdev conf.
So this patchset attempts to use BPF to implement functions similer to
mptcpize.
The main idea is to add a hook in sys_socket() to change the protocol id
from IPPROTO_TCP (or 0) to IPPROTO_MPTCP.
[1]
https://github.com/multipath-tcp/mptcp_net-next/wiki
[2]
https://github.com/multipath-tcp/mptcp_net-next/issues/79
v10:
- drop "#ifdef CONFIG_BPF_JIT".
- include vmlinux.h and bpf_tracing_net.h to avoid defining some
macros.
- drop unneeded checks for mptcp.
v9:
- update comment for 'update_socket_protocol'.
v8:
- drop the additional checks on the 'protocol' value after the
'update_socket_protocol()' call.
v7:
- add __weak and __diag_* for update_socket_protocol.
v6:
- add update_socket_protocol.
v5:
- add bpf_mptcpify helper.
v4:
- use lsm_cgroup/socket_create
v3:
- patch 8: char cmd[128]; -> char cmd[256];
v2:
- Fix build selftests errors reported by CI
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/79
Geliang Tang (5):
bpf: Add update_socket_protocol hook
selftests/bpf: Use random netns name for mptcp
selftests/bpf: Add two mptcp netns helpers
selftests/bpf: Drop unneeded checks for mptcp
selftests/bpf: Add mptcpify test
net/mptcp/bpf.c | 15 ++
net/socket.c | 24 ++++
.../testing/selftests/bpf/prog_tests/mptcp.c | 129 +++++++++++++++---
tools/testing/selftests/bpf/progs/mptcpify.c | 20 +++
4 files changed, 169 insertions(+), 19 deletions(-)
create mode 100644 tools/testing/selftests/bpf/progs/mptcpify.c
--
2.35.3
As is described in the "How to use MPTCP?" section in MPTCP wiki [1]:
"Your app should create sockets with IPPROTO_MPTCP as the proto:
( socket(AF_INET, SOCK_STREAM, IPPROTO_MPTCP); ). Legacy apps can be
forced to create and use MPTCP sockets instead of TCP ones via the
mptcpize command bundled with the mptcpd daemon."
But the mptcpize (LD_PRELOAD technique) command has some limitations
[2]:
- it doesn't work if the application is not using libc (e.g. GoLang
apps)
- in some envs, it might not be easy to set env vars / change the way
apps are launched, e.g. on Android
- mptcpize needs to be launched with all apps that want MPTCP: we could
have more control from BPF to enable MPTCP only for some apps or all the
ones of a netns or a cgroup, etc.
- it is not in BPF, we cannot talk about it at netdev conf.
So this patchset attempts to use BPF to implement functions similer to
mptcpize.
The main idea is to add a hook in sys_socket() to change the protocol id
from IPPROTO_TCP (or 0) to IPPROTO_MPTCP.
[1]
https://github.com/multipath-tcp/mptcp_net-next/wiki
[2]
https://github.com/multipath-tcp/mptcp_net-next/issues/79
v9:
- update comment for 'update_socket_protocol'.
v8:
- drop the additional checks on the 'protocol' value after the
'update_socket_protocol()' call.
v7:
- add __weak and __diag_* for update_socket_protocol.
v6:
- add update_socket_protocol.
v5:
- add bpf_mptcpify helper.
v4:
- use lsm_cgroup/socket_create
v3:
- patch 8: char cmd[128]; -> char cmd[256];
v2:
- Fix build selftests errors reported by CI
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/79
Geliang Tang (4):
bpf: Add update_socket_protocol hook
selftests/bpf: Use random netns name for mptcp
selftests/bpf: Add two mptcp netns helpers
selftests/bpf: Add mptcpify test
net/mptcp/bpf.c | 17 +++
net/socket.c | 24 ++++
.../testing/selftests/bpf/prog_tests/mptcp.c | 125 ++++++++++++++++--
tools/testing/selftests/bpf/progs/mptcpify.c | 25 ++++
4 files changed, 182 insertions(+), 9 deletions(-)
create mode 100644 tools/testing/selftests/bpf/progs/mptcpify.c
--
2.35.3
This test fails routinely in our prod testing environment, and I can
reproduce it locally as well.
The test allocates dcache inside a cgroup, then drops the memory limit
and checks that usage drops correspondingly. The reason it fails is
because dentries are freed with an RCU delay - a debugging sleep shows
that usage drops as expected shortly after.
Insert a 1s sleep after dropping the limit. This should be good
enough, assuming that machines running those tests are otherwise not
very busy.
Signed-off-by: Johannes Weiner <hannes(a)cmpxchg.org>
---
tools/testing/selftests/cgroup/test_kmem.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c
index 258ddc565deb..1b2cec9d18a4 100644
--- a/tools/testing/selftests/cgroup/test_kmem.c
+++ b/tools/testing/selftests/cgroup/test_kmem.c
@@ -70,6 +70,10 @@ static int test_kmem_basic(const char *root)
goto cleanup;
cg_write(cg, "memory.high", "1M");
+
+ /* wait for RCU freeing */
+ sleep(1);
+
slab1 = cg_read_key_long(cg, "memory.stat", "slab ");
if (slab1 <= 0)
goto cleanup;
--
2.41.0
The riscv selftests (which were modeled after the arm64 selftests) are
improperly declaring the "emit_tests" target to depend upon the "all"
target. This approach, when combined with commit 9fc96c7c19df
("selftests: error out if kernel header files are not yet built"), has
caused build failures [1] on arm64, and is likely to cause similar
failures for riscv.
To fix this, simply remove the unnecessary "all" dependency from the
emit_tests target. The dependency is still effectively honored, because
again, invocation is via "install", which also depends upon "all".
An alternative approach would be to harden the emit_tests target so that
it can depend upon "all", but that's a lot more complicated and hard to
get right, and doesn't seem worth it, especially given that emit_tests
should probably not be overridden at all.
[1] https://lore.kernel.org/20230710-kselftest-fix-arm64-v1-1-48e872844f25@kern…
Fixes: 9fc96c7c19df ("selftests: error out if kernel header files are not yet built")
Signed-off-by: John Hubbard <jhubbard(a)nvidia.com>
---
Andrew,
With this, and with my arm64 fix [2] that you've already put into
mm-unstable, you should be able to safely drop commit 819187ab8741
("selftests: fix arm64 test installation").
[2] https://lore.kernel.org/20230711005629.2547838-1-jhubbard@nvidia.com
thanks,
John Hubbard
tools/testing/selftests/riscv/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/testing/selftests/riscv/Makefile b/tools/testing/selftests/riscv/Makefile
index 9dd629cc86aa..f4b3d5c9af5b 100644
--- a/tools/testing/selftests/riscv/Makefile
+++ b/tools/testing/selftests/riscv/Makefile
@@ -43,7 +43,7 @@ run_tests: all
done
# Avoid any output on non riscv on emit_tests
-emit_tests: all
+emit_tests:
@for DIR in $(RISCV_SUBTARGETS); do \
BUILD_TARGET=$(OUTPUT)/$$DIR; \
$(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@; \
base-commit: 3f01e9fed8454dcd89727016c3e5b2fbb8f8e50c
prerequisite-patch-id: 37c92f7425689ff069fb83996a25cd98e78d7242
--
2.41.0
Nested translation is a hardware feature that is supported by many modern
IOMMU hardwares. It has two stages (stage-1, stage-2) address translation
to get access to the physical address. stage-1 translation table is owned
by userspace (e.g. by a guest OS), while stage-2 is owned by kernel. Changes
to stage-1 translation table should be followed by an IOTLB invalidation.
Take Intel VT-d as an example, the stage-1 translation table is I/O page
table. As the below diagram shows, guest I/O page table pointer in GPA
(guest physical address) is passed to host and be used to perform the stage-1
address translation. Along with it, modifications to present mappings in the
guest I/O page table should be followed with an IOTLB invalidation.
.-------------. .---------------------------.
| vIOMMU | | Guest I/O page table |
| | '---------------------------'
.----------------/
| PASID Entry |--- PASID cache flush --+
'-------------' |
| | V
| | I/O page table pointer in GPA
'-------------'
Guest
------| Shadow |---------------------------|--------
v v v
Host
.-------------. .------------------------.
| pIOMMU | | FS for GIOVA->GPA |
| | '------------------------'
.----------------/ |
| PASID Entry | V (Nested xlate)
'----------------\.----------------------------------.
| | | SS for GPA->HPA, unmanaged domain|
| | '----------------------------------'
'-------------'
Where:
- FS = First stage page tables
- SS = Second stage page tables
<Intel VT-d Nested translation>
In IOMMUFD, all the translation tables are tracked by hw_pagetable (hwpt)
and each has an iommu_domain allocated from iommu driver. So in this series
hw_pagetable and iommu_domain means the same thing if no special note.
IOMMUFD has already supported allocating hw_pagetable that is linked with
an IOAS. However, nesting requires IOMMUFD to allow allocating hw_pagetable
with driver specific parameters and interface to sync stage-1 IOTLB as user
owns the stage-1 translation table.
This series is based on the iommu hw info reporting series [1]. It first
introduces new iommu op for allocating domains with user data and the op
for invalidate stage-1 IOTLB, and then extend the IOMMUFD internal infrastructure
to accept user_data and parent hwpt, then relay the data to iommu core to
allocate user iommu_domain. After it, extends the ioctl IOMMU_HWPT_ALLOC to
accept user data and stage-2 hwpt ID to allocate hwpt. Along with it, ioctl
IOMMU_HWPT_INVALIDATE is added to invalidate stage-1 IOTLB. This is needed
for user-managed hwpts. Selftest is added as well to cover the new ioctls.
Complete code can be found in [2], QEMU could can be found in [3].
At last, this is a team work together with Nicolin Chen, Lu Baolu. Thanks
them for the help. ^_^. Look forward to your feedbacks.
[1] https://lore.kernel.org/linux-iommu/20230724105936.107042-1-yi.l.liu@intel.…
[2] https://github.com/yiliu1765/iommufd/tree/iommufd_nesting
[3] https://github.com/yiliu1765/qemu/tree/wip/iommufd_rfcv4_nesting
Change log:
v3:
- Add new uAPI things in alphabetical order
- Pass in "enum iommu_hwpt_type hwpt_type" to op->domain_alloc_user for
sanity, replacing the previous op->domain_alloc_user_data_len solution
- Return ERR_PTR from domain_alloc_user instead of NULL
- Only add IOMMU_RESV_SW_MSI to kernel-managed HWPT in nested translation (Kevin)
- Add IOMMU_RESV_IOVA_RANGES to report resv iova ranges to userspace hence
userspace is able to exclude the ranges in the stage-1 HWPT (e.g. guest I/O
page table). (Kevin)
- Add selftest coverage for the new IOMMU_RESV_IOVA_RANGES ioctl
- Minor changes per Kevin's inputs
v2: https://lore.kernel.org/linux-iommu/20230511143844.22693-1-yi.l.liu@intel.c…
- Add union iommu_domain_user_data to include all user data structures to avoid
passing void * in kernel APIs.
- Add iommu op to return user data length for user domain allocation
- Rename struct iommu_hwpt_alloc::data_type to be hwpt_type
- Store the invalidation data length in iommu_domain_ops::cache_invalidate_user_data_len
- Convert cache_invalidate_user op to be int instead of void
- Remove @data_type in struct iommu_hwpt_invalidate
- Remove out_hwpt_type_bitmap in struct iommu_hw_info hence drop patch 08 of v1
v1: https://lore.kernel.org/linux-iommu/20230309080910.607396-1-yi.l.liu@intel.…
Thanks,
Yi Liu
Lu Baolu (2):
iommu: Add new iommu op to create domains owned by userspace
iommu: Add nested domain support
Nicolin Chen (6):
iommufd/hw_pagetable: Do not populate user-managed hw_pagetables
iommufd: Only enforce IOMMU_RESV_SW_MSI when attaching user-managed
HWPT
iommufd/selftest: Add domain_alloc_user() support in iommu mock
iommufd/selftest: Add coverage for IOMMU_HWPT_ALLOC with user data
iommufd/selftest: Add IOMMU_TEST_OP_MD_CHECK_IOTLB test op
iommufd/selftest: Add coverage for IOMMU_HWPT_INVALIDATE ioctl
Yi Liu (9):
iommufd/hw_pagetable: Use domain_alloc_user op for domain allocation
iommufd: Pass in hwpt_type/parent/user_data to
iommufd_hw_pagetable_alloc()
iommufd: Add IOMMU_RESV_IOVA_RANGES
iommufd: IOMMU_HWPT_ALLOC allocation with user data
iommufd: Add IOMMU_HWPT_INVALIDATE
iommufd/selftest: Add a helper to get test device
iommufd/selftest: Add IOMMU_TEST_OP_DEV_[ADD|DEL]_RESERVED to add/del
reserved regions to selftest device
iommufd/selftest: Add .get_resv_regions() for mock_dev
iommufd/selftest: Add coverage for IOMMU_RESV_IOVA_RANGES
drivers/iommu/iommufd/device.c | 9 +-
drivers/iommu/iommufd/hw_pagetable.c | 181 +++++++++++-
drivers/iommu/iommufd/io_pagetable.c | 5 +-
drivers/iommu/iommufd/iommufd_private.h | 20 +-
drivers/iommu/iommufd/iommufd_test.h | 36 +++
drivers/iommu/iommufd/main.c | 59 +++-
drivers/iommu/iommufd/selftest.c | 266 ++++++++++++++++--
include/linux/iommu.h | 34 +++
include/uapi/linux/iommufd.h | 96 ++++++-
tools/testing/selftests/iommu/iommufd.c | 224 ++++++++++++++-
tools/testing/selftests/iommu/iommufd_utils.h | 70 +++++
11 files changed, 958 insertions(+), 42 deletions(-)
--
2.34.1
test_kmem_basic creates 100,000 negative dentries, with each one mapping
to a slab object. After memory.high is set, these are reclaimed through
the shrink_slab function call which reclaims all 100,000 entries. The
test passes the majority of the time because when slab1 is calculated,
it is often above 0, however, 0 is also an acceptable value.
Signed-off-by: Lucas Karpinski <lkarpins(a)redhat.com>
---
https://lore.kernel.org/all/m6jbt5hzq27ygt3l4xyiaxxb7i5auvb2lahbcj4yaxxigqz…
V2: Corrected title
tools/testing/selftests/cgroup/test_kmem.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c
index 258ddc565deb..ba0a0bfc5a98 100644
--- a/tools/testing/selftests/cgroup/test_kmem.c
+++ b/tools/testing/selftests/cgroup/test_kmem.c
@@ -71,7 +71,7 @@ static int test_kmem_basic(const char *root)
cg_write(cg, "memory.high", "1M");
slab1 = cg_read_key_long(cg, "memory.stat", "slab ");
- if (slab1 <= 0)
+ if (slab1 < 0)
goto cleanup;
current = cg_read_long(cg, "memory.current");
--
2.41.0
The following error happens:
In file included from vstate_exec_nolibc.c:2:
/usr/include/riscv64-linux-gnu/sys/prctl.h:42:12: error: conflicting types for ‘prctl’; h
ave ‘int(int, ...)’
42 | extern int prctl (int __option, ...) __THROW;
| ^~~~~
In file included from ./../../../../include/nolibc/nolibc.h:99,
from <command-line>:
./../../../../include/nolibc/sys.h:892:5: note: previous definition of ‘prctl’ with type
‘int(int, long unsigned int, long unsigned int, long unsigned int, long unsigned int)
’
892 | int prctl(int option, unsigned long arg2, unsigned long arg3,
| ^~~~~
Fix this by not including <sys/prctl.h>, which is not needed here since
prctl syscall is directly called using its number.
Fixes: 7cf6198ce22d ("selftests: Test RISC-V Vector prctl interface")
Signed-off-by: Alexandre Ghiti <alexghiti(a)rivosinc.com>
---
tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c | 2 --
1 file changed, 2 deletions(-)
diff --git a/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c
index 5cbc392944a6..2c0d2b1126c1 100644
--- a/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c
+++ b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c
@@ -1,6 +1,4 @@
// SPDX-License-Identifier: GPL-2.0-only
-#include <sys/prctl.h>
-
#define THIS_PROGRAM "./vstate_exec_nolibc"
int main(int argc, char **argv)
--
2.39.2
[ Upstream commit 4acfe3dfde685a5a9eaec5555351918e2d7266a1 ]
Dan Carpenter spotted a race condition in a couple of situations like
these in the test_firmware driver:
static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
{
u8 val;
int ret;
ret = kstrtou8(buf, 10, &val);
if (ret)
return ret;
mutex_lock(&test_fw_mutex);
*(u8 *)cfg = val;
mutex_unlock(&test_fw_mutex);
/* Always return full write size even if we didn't consume all */
return size;
}
static ssize_t config_num_requests_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
int rc;
mutex_lock(&test_fw_mutex);
if (test_fw_config->reqs) {
pr_err("Must call release_all_firmware prior to changing config\n");
rc = -EINVAL;
mutex_unlock(&test_fw_mutex);
goto out;
}
mutex_unlock(&test_fw_mutex);
// NOTE: HERE is the race!!! Function can be preempted!
// test_fw_config->reqs can change between the release of
// the lock about and acquire of the lock in the
// test_dev_config_update_u8()
rc = test_dev_config_update_u8(buf, count,
&test_fw_config->num_requests);
out:
return rc;
}
static ssize_t config_read_fw_idx_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
return test_dev_config_update_u8(buf, count,
&test_fw_config->read_fw_idx);
}
The function test_dev_config_update_u8() is called from both the locked
and the unlocked context, function config_num_requests_store() and
config_read_fw_idx_store() which can both be called asynchronously as
they are driver's methods, while test_dev_config_update_u8() and siblings
change their argument pointed to by u8 *cfg or similar pointer.
To avoid deadlock on test_fw_mutex, the lock is dropped before calling
test_dev_config_update_u8() and re-acquired within test_dev_config_update_u8()
itself, but alas this creates a race condition.
Having two locks wouldn't assure a race-proof mutual exclusion.
This situation is best avoided by the introduction of a new, unlocked
function __test_dev_config_update_u8() which can be called from the locked
context and reducing test_dev_config_update_u8() to:
static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
{
int ret;
mutex_lock(&test_fw_mutex);
ret = __test_dev_config_update_u8(buf, size, cfg);
mutex_unlock(&test_fw_mutex);
return ret;
}
doing the locking and calling the unlocked primitive, which enables both
locked and unlocked versions without duplication of code.
Fixes: c92316bf8e948 ("test_firmware: add batched firmware tests")
Cc: Luis R. Rodriguez <mcgrof(a)kernel.org>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Russ Weight <russell.h.weight(a)intel.com>
Cc: Takashi Iwai <tiwai(a)suse.de>
Cc: Tianfei Zhang <tianfei.zhang(a)intel.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: Colin Ian King <colin.i.king(a)gmail.com>
Cc: Randy Dunlap <rdunlap(a)infradead.org>
Cc: linux-kselftest(a)vger.kernel.org
Cc: stable(a)vger.kernel.org # v5.4, 4.19
Suggested-by: Dan Carpenter <error27(a)gmail.com>
Link: https://lore.kernel.org/r/20230509084746.48259-1-mirsad.todorovac@alu.unizg…
Signed-off-by: Mirsad Todorovac <mirsad.todorovac(a)alu.unizg.hr>
[ This is the patch to fix the racing condition in locking for the 5.4, ]
[ 4.19 and 4.4 stable branches. Not all the fixes from the upstream ]
[ commit apply, but those which do are verbatim equal to those in the ]
[ upstream commit. ]
---
v2:
bundled locking and ENOSPC patches together.
tested on 5.4 and 4.19 stable.
lib/test_firmware.c | 37 ++++++++++++++++++++++++++++---------
1 file changed, 28 insertions(+), 9 deletions(-)
diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index 38553944e967..92d7195d5b5b 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -301,16 +301,26 @@ static ssize_t config_test_show_str(char *dst,
return len;
}
-static int test_dev_config_update_bool(const char *buf, size_t size,
- bool *cfg)
+static inline int __test_dev_config_update_bool(const char *buf, size_t size,
+ bool *cfg)
{
int ret;
- mutex_lock(&test_fw_mutex);
if (strtobool(buf, cfg) < 0)
ret = -EINVAL;
else
ret = size;
+
+ return ret;
+}
+
+static int test_dev_config_update_bool(const char *buf, size_t size,
+ bool *cfg)
+{
+ int ret;
+
+ mutex_lock(&test_fw_mutex);
+ ret = __test_dev_config_update_bool(buf, size, cfg);
mutex_unlock(&test_fw_mutex);
return ret;
@@ -340,7 +350,7 @@ static ssize_t test_dev_config_show_int(char *buf, int cfg)
return snprintf(buf, PAGE_SIZE, "%d\n", val);
}
-static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
+static inline int __test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
{
int ret;
long new;
@@ -352,14 +362,23 @@ static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
if (new > U8_MAX)
return -EINVAL;
- mutex_lock(&test_fw_mutex);
*(u8 *)cfg = new;
- mutex_unlock(&test_fw_mutex);
/* Always return full write size even if we didn't consume all */
return size;
}
+static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
+{
+ int ret;
+
+ mutex_lock(&test_fw_mutex);
+ ret = __test_dev_config_update_u8(buf, size, cfg);
+ mutex_unlock(&test_fw_mutex);
+
+ return ret;
+}
+
static ssize_t test_dev_config_show_u8(char *buf, u8 cfg)
{
u8 val;
@@ -392,10 +411,10 @@ static ssize_t config_num_requests_store(struct device *dev,
mutex_unlock(&test_fw_mutex);
goto out;
}
- mutex_unlock(&test_fw_mutex);
- rc = test_dev_config_update_u8(buf, count,
- &test_fw_config->num_requests);
+ rc = __test_dev_config_update_u8(buf, count,
+ &test_fw_config->num_requests);
+ mutex_unlock(&test_fw_mutex);
out:
return rc;
--
2.39.3
It seems that the most critical issue with vm.memfd_noexec=2 (the fact
that passing MFD_EXEC would bypass it entirely[1]) has been fixed in
Andrew's tree[2], but there are still some outstanding issues that need
to be addressed:
* The dmesg warnings are pr_warn_once, which on most systems means that
they will be used up by systemd or some other boot process and
userspace developers will never see it. The original patch posted to
the ML used pr_warn_ratelimited but the merged patch had it changed
(with a comment about it being "per review"), but given that the
current warnings are useless, pr_warn_ratelimited makes far more
sense.
* vm.memfd_noexec=2 shouldn't reject old-style memfd_create(2) syscalls
because it will make it far to difficult to ever migrate. Instead it
should imply MFD_EXEC.
* The racheting mechanism for vm.memfd_noexec doesn't make sense as a
security mechanism because a CAP_SYS_ADMIN capable user can create
executable binaries in a hidden tmpfs very easily, not to mention the
many other things they can do.
* The memfd selftests would not exit with a non-zero error code when
certain tests that ran in a forked process (specifically the ones
related to MFD_EXEC and MFD_NOEXEC_SEAL) failed.
(This patchset is based on top of Jeff Xu's patches[2] fixing the
MFD_EXEC bug in vm.memfd_noexec=2.)
[1]: https://lore.kernel.org/all/ZJwcsU0vI-nzgOB_@codewreck.org/
[2]: https://lore.kernel.org/all/20230705063315.3680666-1-jeffxu@google.com/
Aleksa Sarai (3):
memfd: cleanups for vm.memfd_noexec handling
memfd: remove racheting feature from vm.memfd_noexec
selftests: memfd: error out test process when child test fails
include/linux/pid_namespace.h | 16 +++------
kernel/pid_sysctl.h | 7 ----
mm/memfd.c | 32 +++++++----------
tools/testing/selftests/memfd/memfd_test.c | 41 ++++++++++++++++++----
4 files changed, 51 insertions(+), 45 deletions(-)
--
2.41.0
As is described in the "How to use MPTCP?" section in MPTCP wiki [1]:
"Your app should create sockets with IPPROTO_MPTCP as the proto:
( socket(AF_INET, SOCK_STREAM, IPPROTO_MPTCP); ). Legacy apps can be
forced to create and use MPTCP sockets instead of TCP ones via the
mptcpize command bundled with the mptcpd daemon."
But the mptcpize (LD_PRELOAD technique) command has some limitations
[2]:
- it doesn't work if the application is not using libc (e.g. GoLang
apps)
- in some envs, it might not be easy to set env vars / change the way
apps are launched, e.g. on Android
- mptcpize needs to be launched with all apps that want MPTCP: we could
have more control from BPF to enable MPTCP only for some apps or all the
ones of a netns or a cgroup, etc.
- it is not in BPF, we cannot talk about it at netdev conf.
So this patchset attempts to use BPF to implement functions similer to
mptcpize.
The main idea is to add a hook in sys_socket() to change the protocol id
from IPPROTO_TCP (or 0) to IPPROTO_MPTCP.
[1]
https://github.com/multipath-tcp/mptcp_net-next/wiki
[2]
https://github.com/multipath-tcp/mptcp_net-next/issues/79
v8:
- drop the additional checks on the 'protocol' value after the
'update_socket_protocol()' call.
v7:
- add __weak and __diag_* for update_socket_protocol.
v6:
- add update_socket_protocol.
v5:
- add bpf_mptcpify helper.
v4:
- use lsm_cgroup/socket_create
v3:
- patch 8: char cmd[128]; -> char cmd[256];
v2:
- Fix build selftests errors reported by CI
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/79
Geliang Tang (4):
bpf: Add update_socket_protocol hook
selftests/bpf: Use random netns name for mptcp
selftests/bpf: Add two mptcp netns helpers
selftests/bpf: Add mptcpify test
net/mptcp/bpf.c | 17 +++
net/socket.c | 25 ++++
.../testing/selftests/bpf/prog_tests/mptcp.c | 125 ++++++++++++++++--
tools/testing/selftests/bpf/progs/mptcpify.c | 25 ++++
4 files changed, 183 insertions(+), 9 deletions(-)
create mode 100644 tools/testing/selftests/bpf/progs/mptcpify.c
--
2.35.3
Hi, Willy
Here is the v5, purely include the ppc parts, with two critical fixups
for the latest gcc 13.1.0 toolchain, now, both run and run-user pass.
Here is the run-user test report:
// with local toolchains
$ for arch in ppc ppc64 ppc64le; do make run-user XARCH=$arch | grep "status: "; done
165 test(s): 157 passed, 8 skipped, 0 failed => status: warning
165 test(s): 157 passed, 8 skipped, 0 failed => status: warning
165 test(s): 157 passed, 8 skipped, 0 failed => status: warning
// with latest toolchains
$ for arch in ppc ppc64 ppc64le; do make run-user XARCH=$arch CC=/path/to/gcc-13.1.0-nolibc/powerpc64-linux/bin/powerpc64-linux-gcc | grep status; file nolibc-test; done
165 test(s): 157 passed, 8 skipped, 0 failed => status: warning
nolibc-test: ELF 32-bit MSB executable, PowerPC or cisco 4500, version 1 (SYSV), statically linked, stripped
165 test(s): 157 passed, 8 skipped, 0 failed => status: warning
nolibc-test: ELF 64-bit MSB executable, 64-bit PowerPC or cisco 7500, version 1 (SYSV), statically linked, stripped
165 test(s): 157 passed, 8 skipped, 0 failed => status: warning
nolibc-test: ELF 64-bit LSB executable, 64-bit PowerPC or cisco 7500, version 1 (SYSV), statically linked, stripped
Since the missing serial console enabling patch [1] for ppc32 has
already gotten a Reviewed-by line from the ppc maintainer, now, the ppc
defconfig aligns with the others', and it is able to simply move the
nolibc-test-config related stuff to the next tinyconfig series.
Based on v4 [2], beside removing several nolibc-test-config related
patches, two bugs with the latest gcc 13.1.0 have been fixed.
Changes from v4 --> v5:
* tools/nolibc: add support for powerpc64
selftests/nolibc: add XARCH and ARCH mapping support
selftests/nolibc: add test support for ppc64
selftests/nolibc: allow customize CROSS_COMPILE by architecture
selftests/nolibc: customize CROSS_COMPILE for 32/64-bit powerpc
Almost the same as v4.
* tools/nolibc: add support for powerpc
For 32-bit PowerPC, with newer gcc compilers (e.g. gcc 13.1.0),
"omit-frame-pointer" fails with __attribute__((no_stack_protector)) but
works with __attribute__((__optimize__("-fno-stack-protector")))
Using the later for ppc32 to workaround the issue.
* selftests/nolibc: add test support for ppc
Add default CFLAGS for ppc to allow build with the
latest powerpc64-linux-gcc toolchain from
https://mirrors.edge.kernel.org/pub/tools/crosstool/
* selftests/nolibc: add test support for ppc64le
Align with kernel, prefer elfv2 ABI to elfv1 ABI when the toolchain
support, otherwise, ABI mismatched binary will not run.
Best regards,
Zhangjin Wu
---
[1]: https://lore.kernel.org/lkml/bb7b5f9958b3e3a20f6573ff7ce7c5dc566e7e32.16909…
[2]: https://lore.kernel.org/lkml/cover.1690916314.git.falcon@tinylab.org/
Zhangjin Wu (8):
tools/nolibc: add support for powerpc
tools/nolibc: add support for powerpc64
selftests/nolibc: add XARCH and ARCH mapping support
selftests/nolibc: add test support for ppc
selftests/nolibc: add test support for ppc64le
selftests/nolibc: add test support for ppc64
selftests/nolibc: allow customize CROSS_COMPILE by architecture
selftests/nolibc: customize CROSS_COMPILE for 32/64-bit powerpc
tools/include/nolibc/arch-powerpc.h | 213 ++++++++++++++++++++++++
tools/include/nolibc/arch.h | 2 +
tools/testing/selftests/nolibc/Makefile | 74 ++++++--
3 files changed, 277 insertions(+), 12 deletions(-)
create mode 100644 tools/include/nolibc/arch-powerpc.h
--
2.25.1
Hi, Willy, Hi Thomas
v4 here is mainly with a new nolibc-test-config target from your
suggestions and with the reordering of some patches to make
nolibc-test-config be fast forward.
run-user tests for all of the powerpc variants:
$ for arch in ppc ppc64 ppc64le; do make run-user XARCH=$arch | grep status; done
165 test(s): 157 passed, 8 skipped, 0 failed => status: warning
165 test(s): 157 passed, 8 skipped, 0 failed => status: warning
165 test(s): 157 passed, 8 skipped, 0 failed => status: warning
and defconfig + run for ppc:
$ make nolibc-test-config XARCH=ppc
$ make run XARCH=ppc
165 test(s): 159 passed, 6 skipped, 0 failed => status: warning
* tools/nolibc: add support for powerpc
tools/nolibc: add support for powerpc64
No change.
* selftests/nolibc: fix up O= option support
selftests/nolibc: add macros to reduce duplicated changes
From tinyconfig-part1 patchset, required by our nolibc-test-config target
Let nolibc-test-config be able to use objtree and the kernel related
macros directly.
* selftests/nolibc: add XARCH and ARCH mapping support
Moved before nolibc-test-config, for the NOLIBC_TEST_CONFIG macro used by
nolibc-test-config target
Willy talked about this twice, let nolibc-test-config be able to use
nolibc-test-$(XARCH).config listed in NOLIBC_TEST_CONFIG directly.
* selftests/nolibc: add nolibc-test-config target
selftests/nolibc: add help for nolibc-test-config target
A new generic nolibc-test-config target is added, allows to enable
additional options for a top-level config target.
defconfig is reserved as an alias of nolibc-test-config.
As suggested by Thomas and Willy.
* selftests/nolibc: add test support for ppc
selftests/nolibc: add test support for ppc64le
selftests/nolibc: add test support for ppc64
Renamed from $(XARCH).config to nolibc-test-$(XARCH).config
As suggested by Willy.
* selftests/nolibc: allow customize CROSS_COMPILE by architecture
selftests/nolibc: customize CROSS_COMPILE for 32/64-bit powerpc
Moved here as suggested by Willy.
Best regards,
Zhangjin
---
[1]: https://lore.kernel.org/lkml/cover.1690468707.git.falcon@tinylab.org/
Zhangjin Wu (12):
tools/nolibc: add support for powerpc
tools/nolibc: add support for powerpc64
selftests/nolibc: fix up O= option support
selftests/nolibc: add macros to reduce duplicated changes
selftests/nolibc: add XARCH and ARCH mapping support
selftests/nolibc: add nolibc-test-config target
selftests/nolibc: add help for nolibc-test-config target
selftests/nolibc: add test support for ppc
selftests/nolibc: add test support for ppc64le
selftests/nolibc: add test support for ppc64
selftests/nolibc: allow customize CROSS_COMPILE by architecture
selftests/nolibc: customize CROSS_COMPILE for 32/64-bit powerpc
tools/include/nolibc/arch-powerpc.h | 202 ++++++++++++++++++
tools/include/nolibc/arch.h | 2 +
tools/testing/selftests/nolibc/Makefile | 157 ++++++++++----
.../nolibc/configs/nolibc-test-ppc.config | 3 +
4 files changed, 327 insertions(+), 37 deletions(-)
create mode 100644 tools/include/nolibc/arch-powerpc.h
create mode 100644 tools/testing/selftests/nolibc/configs/nolibc-test-ppc.config
--
2.25.1
As is described in the "How to use MPTCP?" section in MPTCP wiki [1]:
"Your app should create sockets with IPPROTO_MPTCP as the proto:
( socket(AF_INET, SOCK_STREAM, IPPROTO_MPTCP); ). Legacy apps can be
forced to create and use MPTCP sockets instead of TCP ones via the
mptcpize command bundled with the mptcpd daemon."
But the mptcpize (LD_PRELOAD technique) command has some limitations
[2]:
- it doesn't work if the application is not using libc (e.g. GoLang
apps)
- in some envs, it might not be easy to set env vars / change the way
apps are launched, e.g. on Android
- mptcpize needs to be launched with all apps that want MPTCP: we could
have more control from BPF to enable MPTCP only for some apps or all the
ones of a netns or a cgroup, etc.
- it is not in BPF, we cannot talk about it at netdev conf.
So this patchset attempts to use BPF to implement functions similer to
mptcpize.
The main idea is to add a hook in sys_socket() to change the protocol id
from IPPROTO_TCP (or 0) to IPPROTO_MPTCP.
[1]
https://github.com/multipath-tcp/mptcp_net-next/wiki
[2]
https://github.com/multipath-tcp/mptcp_net-next/issues/79
v7:
- add __weak and __diag_* for update_socket_protocol.
v6:
- add update_socket_protocol.
v5:
- add bpf_mptcpify helper.
v4:
- use lsm_cgroup/socket_create
v3:
- patch 8: char cmd[128]; -> char cmd[256];
v2:
- Fix build selftests errors reported by CI
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/79
Geliang Tang (6):
net: socket: add update_socket_protocol hook
bpf: Register mptcp modret set
selftests/bpf: Add mptcpify program
selftests/bpf: use random netns name for mptcp
selftests/bpf: add two mptcp netns helpers
selftests/bpf: Add mptcpify selftest
net/mptcp/bpf.c | 17 +++
net/socket.c | 26 ++++
.../testing/selftests/bpf/prog_tests/mptcp.c | 125 ++++++++++++++++--
tools/testing/selftests/bpf/progs/mptcpify.c | 25 ++++
4 files changed, 184 insertions(+), 9 deletions(-)
create mode 100644 tools/testing/selftests/bpf/progs/mptcpify.c
--
2.35.3
Here's a follow-up from my RFC series last year:
https://lore.kernel.org/lkml/20221004093131.40392-1-thuth@redhat.com/T/
Basic idea of this series is now to use the kselftest_harness.h
framework to get TAP output in the tests, so that it is easier
for the user to see what is going on, and e.g. to be able to
detect whether a certain test is part of the test binary or not
(which is useful when tests get extended in the course of time).
Thomas Huth (4):
KVM: selftests: Rename the ASSERT_EQ macro
KVM: selftests: x86: Use TAP interface in the sync_regs test
KVM: selftests: x86: Use TAP interface in the fix_hypercall test
KVM: selftests: x86: Use TAP interface in the userspace_msr_exit test
.../selftests/kvm/aarch64/aarch32_id_regs.c | 8 +-
.../selftests/kvm/aarch64/page_fault_test.c | 10 +-
.../testing/selftests/kvm/include/test_util.h | 4 +-
tools/testing/selftests/kvm/lib/kvm_util.c | 2 +-
.../selftests/kvm/max_guest_memory_test.c | 2 +-
tools/testing/selftests/kvm/s390x/cmma_test.c | 62 +++++-----
tools/testing/selftests/kvm/s390x/memop.c | 6 +-
tools/testing/selftests/kvm/s390x/tprot.c | 4 +-
.../x86_64/dirty_log_page_splitting_test.c | 18 +--
.../x86_64/exit_on_emulation_failure_test.c | 2 +-
.../selftests/kvm/x86_64/fix_hypercall_test.c | 16 ++-
.../kvm/x86_64/nested_exceptions_test.c | 12 +-
.../kvm/x86_64/recalc_apic_map_test.c | 6 +-
.../selftests/kvm/x86_64/sync_regs_test.c | 113 +++++++++++++++---
.../selftests/kvm/x86_64/tsc_msrs_test.c | 32 ++---
.../kvm/x86_64/userspace_msr_exit_test.c | 19 +--
.../vmx_exception_with_invalid_guest_state.c | 2 +-
.../selftests/kvm/x86_64/vmx_pmu_caps_test.c | 3 +-
.../selftests/kvm/x86_64/xapic_state_test.c | 8 +-
.../selftests/kvm/x86_64/xen_vmcall_test.c | 20 ++--
20 files changed, 218 insertions(+), 131 deletions(-)
--
2.39.3
To help the developers to avoid mistakes and keep the code smaller let's
enable compiler warnings.
I stuck with __attribute__((unused)) over __maybe_unused in
nolibc-test.c for consistency with nolibc proper.
If we want to add a define it needs to be added twice once for nolibc
proper and once for nolibc-test otherwise libc-test wouldn't build
anymore.
Signed-off-by: Thomas Weißschuh <linux(a)weissschuh.net>
---
Changes in v2:
- Don't drop unused test helpers, mark them as __attribute__((unused))
- Make some function in nolibc-test static
- Also handle -W and -Wextra
- Link to v1: https://lore.kernel.org/r/20230731-nolibc-warnings-v1-0-74973d2a52d7@weisss…
---
Thomas Weißschuh (10):
tools/nolibc: drop unused variables
tools/nolibc: sys: avoid implicit sign cast
tools/nolibc: stdint: use int for size_t on 32bit
selftests/nolibc: drop unused variables
selftests/nolibc: mark test helpers as potentially unused
selftests/nolibc: make functions static if possible
selftests/nolibc: avoid unused arguments warnings
selftests/nolibc: avoid sign-compare warnings
selftests/nolibc: test return value of read() in test_vfprintf
selftests/nolibc: enable compiler warnings
tools/include/nolibc/stdint.h | 4 +
tools/include/nolibc/sys.h | 3 +-
tools/testing/selftests/nolibc/Makefile | 2 +-
tools/testing/selftests/nolibc/nolibc-test.c | 108 +++++++++++++++++----------
4 files changed, 74 insertions(+), 43 deletions(-)
---
base-commit: dfef4fc45d5713eb23d87f0863aff9c33bd4bfaf
change-id: 20230731-nolibc-warnings-c6e47284ac03
Best regards,
--
Thomas Weißschuh <linux(a)weissschuh.net>
Changes from RFC[1]
- Rebase on latest mm-unstable
- Add base-commit
----
There are use cases that need to apply DAMOS schemes to specific address
ranges or DAMON monitoring targets. NUMA nodes in the physical address
space, special memory objects in the virtual address space, and
monitoring target specific efficient monitoring results snapshot
retrieval could be examples of such use cases. This patchset extends
DAMOS filters feature for such cases, by implementing two more filter
types, namely address ranges and DAMON monitoring types.
Patches sequence
----------------
The first seven patches are for the address ranges based DAMOS filter.
The first patch implements the filter feature and expose it via DAMON
kernel API. The second patch further expose the feature to users via
DAMON sysfs interface. The third and fourth patches implement unit
tests and selftests for the feature. Three patches (fifth to seventh)
updating the documents follow.
The following six patches are for the DAMON monitoring target based
DAMOS filter. The eighth patch implements the feature in the core layer
and expose it via DAMON's kernel API. The ninth patch further expose it
to users via DAMON sysfs interface. Tenth patch add a selftest, and two
patches (eleventh and twelfth) update documents.
[1] https://lore.kernel.org/damon/20230728203444.70703-1-sj@kernel.org/
SeongJae Park (13):
mm/damon/core: introduce address range type damos filter
mm/damon/sysfs-schemes: support address range type DAMOS filter
mm/damon/core-test: add a unit test for __damos_filter_out()
selftests/damon/sysfs: test address range damos filter
Docs/mm/damon/design: update for address range filters
Docs/ABI/damon: update for address range DAMOS filter
Docs/admin-guide/mm/damon/usage: update for address range type DAMOS
filter
mm/damon/core: implement target type damos filter
mm/damon/sysfs-schemes: support target damos filter
selftests/damon/sysfs: test damon_target filter
Docs/mm/damon/design: update for DAMON monitoring target type DAMOS
filter
Docs/ABI/damon: update for DAMON monitoring target type DAMOS filter
Docs/admin-guide/mm/damon/usage: update for DAMON monitoring target
type DAMOS filter
.../ABI/testing/sysfs-kernel-mm-damon | 27 +++++-
Documentation/admin-guide/mm/damon/usage.rst | 34 +++++---
Documentation/mm/damon/design.rst | 24 ++++--
include/linux/damon.h | 28 +++++--
mm/damon/core-test.h | 61 ++++++++++++++
mm/damon/core.c | 62 ++++++++++++++
mm/damon/sysfs-schemes.c | 83 +++++++++++++++++++
tools/testing/selftests/damon/sysfs.sh | 5 ++
8 files changed, 299 insertions(+), 25 deletions(-)
base-commit: 32f9db36a0031f99629b5910d795b3f13f284472
--
2.25.1
Changes from RFC[1]
- Rebase on latest mm-unstable
- Add base-commit
----
The tried_regions directory of DAMON sysfs interface is useful for
retrieving monitoring results snapshot or DAMOS debugging. However, for
common use case that need to monitor only the total size of the scheme
tried regions (e.g., monitoring working set size), the kernel overhead
for directory construction and user overhead for reading the content
could be high if the number of monitoring region is not small. This
patchset implements DAMON sysfs files for efficient support of the use
case.
The first patch implements the sysfs file to reduce the user space
overhead, and the second patch implements a command for reducing the
kernel space overhead.
The third patch adds a selftest for the new file, and following two
patches update documents.
[1] https://lore.kernel.org/damon/20230728201817.70602-1-sj@kernel.org/
SeongJae Park (5):
mm/damon/sysfs-schemes: implement DAMOS tried total bytes file
mm/damon/sysfs: implement a command for updating only schemes tried
total bytes
selftests/damon/sysfs: test tried_regions/total_bytes file
Docs/ABI/damon: update for tried_regions/total_bytes
Docs/admin-guide/mm/damon/usage: update for tried_regions/total_bytes
.../ABI/testing/sysfs-kernel-mm-damon | 13 +++++-
Documentation/admin-guide/mm/damon/usage.rst | 42 ++++++++++++-------
mm/damon/sysfs-common.h | 2 +-
mm/damon/sysfs-schemes.c | 24 ++++++++++-
mm/damon/sysfs.c | 26 +++++++++---
tools/testing/selftests/damon/sysfs.sh | 1 +
6 files changed, 83 insertions(+), 25 deletions(-)
base-commit: a57d8094e1946e9dbdba0dddf0e10f9f4dceae0d
--
2.25.1
With test case kvm_page_table_test, start time is acquired with
time type CLOCK_MONOTONIC_RAW, however end time in function timespec_elapsed
is acquired with time type CLOCK_MONOTONIC. This will cause
inaccurate elapsed time calculation on some platform such as LoongArch.
This patch modified test case kvm_page_table_test, and uses unified
time type CLOCK_MONOTONIC for start time.
Signed-off-by: Bibo Mao <maobibo(a)loongson.cn>
---
tools/testing/selftests/kvm/kvm_page_table_test.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c
index b3b00be1ef82..69f26d80c821 100644
--- a/tools/testing/selftests/kvm/kvm_page_table_test.c
+++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
@@ -200,7 +200,7 @@ static void *vcpu_worker(void *data)
if (READ_ONCE(host_quit))
return NULL;
- clock_gettime(CLOCK_MONOTONIC_RAW, &start);
+ clock_gettime(CLOCK_MONOTONIC, &start);
ret = _vcpu_run(vcpu);
ts_diff = timespec_elapsed(start);
@@ -367,7 +367,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
/* Test the stage of KVM creating mappings */
*current_stage = KVM_CREATE_MAPPINGS;
- clock_gettime(CLOCK_MONOTONIC_RAW, &start);
+ clock_gettime(CLOCK_MONOTONIC, &start);
vcpus_complete_new_stage(*current_stage);
ts_diff = timespec_elapsed(start);
@@ -380,7 +380,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
*current_stage = KVM_UPDATE_MAPPINGS;
- clock_gettime(CLOCK_MONOTONIC_RAW, &start);
+ clock_gettime(CLOCK_MONOTONIC, &start);
vcpus_complete_new_stage(*current_stage);
ts_diff = timespec_elapsed(start);
@@ -392,7 +392,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
*current_stage = KVM_ADJUST_MAPPINGS;
- clock_gettime(CLOCK_MONOTONIC_RAW, &start);
+ clock_gettime(CLOCK_MONOTONIC, &start);
vcpus_complete_new_stage(*current_stage);
ts_diff = timespec_elapsed(start);
--
2.27.0