The basic idea here is to "simulate" memory poisoning for VMs. A VM
running on some host might encounter a memory error, after which some
page(s) are poisoned (i.e., future accesses SIGBUS). They expect that
once poisoned, pages can never become "un-poisoned". So, when we live
migrate the VM, we need to preserve the poisoned status of these pages.
When live migrating, we try to get the guest running on its new host as
quickly as possible. So, we start it running before all memory has been
copied, and before we're certain which pages should be poisoned or not.
So the basic way to use this new feature is:
- On the new host, the guest's memory is registered with userfaultfd, in
either MISSING or MINOR mode (doesn't really matter for this purpose).
- On any first access, we get a userfaultfd event. At this point we can
communicate with the old host to find out if the page was poisoned.
- If so, we can respond with a UFFDIO_POISON - this places a swap marker
so any future accesses will SIGBUS. Because the pte is now "present",
future accesses won't generate more userfaultfd events, they'll just
SIGBUS directly.
UFFDIO_POISON does not handle unmapping previously-present PTEs. This
isn't needed, because during live migration we want to intercept
all accesses with userfaultfd (not just writes, so WP mode isn't useful
for this). So whether minor or missing mode is being used (or both), the
PTE won't be present in any case, so handling that case isn't needed.
Why return VM_FAULT_HWPOISON instead of VM_FAULT_SIGBUS when one of
these markers is encountered? For "normal" userspace programs there
isn't a big difference, both yield a SIGBUS. The difference for KVM is
key though: VM_FAULT_HWPOISON will result in an MCE being injected into
the guest (which is the behavior we want). With VM_FAULT_SIGBUS, the
hypervisor would need to catch the SIGBUS and deal with the MCE
injection itself.
Signed-off-by: Axel Rasmussen <axelrasmussen(a)google.com>
---
fs/userfaultfd.c | 63 ++++++++++++++++++++++++++++++++
include/linux/swapops.h | 3 +-
include/linux/userfaultfd_k.h | 4 ++
include/uapi/linux/userfaultfd.h | 25 +++++++++++--
mm/memory.c | 4 ++
mm/userfaultfd.c | 62 ++++++++++++++++++++++++++++++-
6 files changed, 156 insertions(+), 5 deletions(-)
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 7cecd49e078b..c26a883399c9 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1965,6 +1965,66 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg)
return ret;
}
+static inline int userfaultfd_poison(struct userfaultfd_ctx *ctx, unsigned long arg)
+{
+ __s64 ret;
+ struct uffdio_poison uffdio_poison;
+ struct uffdio_poison __user *user_uffdio_poison;
+ struct userfaultfd_wake_range range;
+
+ user_uffdio_poison = (struct uffdio_poison __user *)arg;
+
+ ret = -EAGAIN;
+ if (atomic_read(&ctx->mmap_changing))
+ goto out;
+
+ ret = -EFAULT;
+ if (copy_from_user(&uffdio_poison, user_uffdio_poison,
+ /* don't copy the output fields */
+ sizeof(uffdio_poison) - (sizeof(__s64))))
+ goto out;
+
+ ret = validate_range(ctx->mm, uffdio_poison.range.start,
+ uffdio_poison.range.len);
+ if (ret)
+ goto out;
+
+ ret = -EINVAL;
+ /* double check for wraparound just in case. */
+ if (uffdio_poison.range.start + uffdio_poison.range.len <=
+ uffdio_poison.range.start) {
+ goto out;
+ }
+ if (uffdio_poison.mode & ~UFFDIO_POISON_MODE_DONTWAKE)
+ goto out;
+
+ if (mmget_not_zero(ctx->mm)) {
+ ret = mfill_atomic_poison(ctx->mm, uffdio_poison.range.start,
+ uffdio_poison.range.len,
+ &ctx->mmap_changing, 0);
+ mmput(ctx->mm);
+ } else {
+ return -ESRCH;
+ }
+
+ if (unlikely(put_user(ret, &user_uffdio_poison->updated)))
+ return -EFAULT;
+ if (ret < 0)
+ goto out;
+
+ /* len == 0 would wake all */
+ BUG_ON(!ret);
+ range.len = ret;
+ if (!(uffdio_poison.mode & UFFDIO_POISON_MODE_DONTWAKE)) {
+ range.start = uffdio_poison.range.start;
+ wake_userfault(ctx, &range);
+ }
+ ret = range.len == uffdio_poison.range.len ? 0 : -EAGAIN;
+
+out:
+ return ret;
+}
+
static inline unsigned int uffd_ctx_features(__u64 user_features)
{
/*
@@ -2066,6 +2126,9 @@ static long userfaultfd_ioctl(struct file *file, unsigned cmd,
case UFFDIO_CONTINUE:
ret = userfaultfd_continue(ctx, arg);
break;
+ case UFFDIO_POISON:
+ ret = userfaultfd_poison(ctx, arg);
+ break;
}
return ret;
}
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 4c932cb45e0b..8259fee32421 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -394,7 +394,8 @@ typedef unsigned long pte_marker;
#define PTE_MARKER_UFFD_WP BIT(0)
#define PTE_MARKER_SWAPIN_ERROR BIT(1)
-#define PTE_MARKER_MASK (BIT(2) - 1)
+#define PTE_MARKER_UFFD_POISON BIT(2)
+#define PTE_MARKER_MASK (BIT(3) - 1)
static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
{
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index ac7b0c96d351..ac8c6854097c 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -46,6 +46,7 @@ enum mfill_atomic_mode {
MFILL_ATOMIC_COPY,
MFILL_ATOMIC_ZEROPAGE,
MFILL_ATOMIC_CONTINUE,
+ MFILL_ATOMIC_POISON,
NR_MFILL_ATOMIC_MODES,
};
@@ -83,6 +84,9 @@ extern ssize_t mfill_atomic_zeropage(struct mm_struct *dst_mm,
extern ssize_t mfill_atomic_continue(struct mm_struct *dst_mm, unsigned long dst_start,
unsigned long len, atomic_t *mmap_changing,
uffd_flags_t flags);
+extern ssize_t mfill_atomic_poison(struct mm_struct *dst_mm, unsigned long start,
+ unsigned long len, atomic_t *mmap_changing,
+ uffd_flags_t flags);
extern int mwriteprotect_range(struct mm_struct *dst_mm,
unsigned long start, unsigned long len,
bool enable_wp, atomic_t *mmap_changing);
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index 66dd4cd277bd..62151706c5a3 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -39,7 +39,8 @@
UFFD_FEATURE_MINOR_SHMEM | \
UFFD_FEATURE_EXACT_ADDRESS | \
UFFD_FEATURE_WP_HUGETLBFS_SHMEM | \
- UFFD_FEATURE_WP_UNPOPULATED)
+ UFFD_FEATURE_WP_UNPOPULATED | \
+ UFFD_FEATURE_POISON)
#define UFFD_API_IOCTLS \
((__u64)1 << _UFFDIO_REGISTER | \
(__u64)1 << _UFFDIO_UNREGISTER | \
@@ -49,12 +50,14 @@
(__u64)1 << _UFFDIO_COPY | \
(__u64)1 << _UFFDIO_ZEROPAGE | \
(__u64)1 << _UFFDIO_WRITEPROTECT | \
- (__u64)1 << _UFFDIO_CONTINUE)
+ (__u64)1 << _UFFDIO_CONTINUE | \
+ (__u64)1 << _UFFDIO_POISON)
#define UFFD_API_RANGE_IOCTLS_BASIC \
((__u64)1 << _UFFDIO_WAKE | \
(__u64)1 << _UFFDIO_COPY | \
+ (__u64)1 << _UFFDIO_WRITEPROTECT | \
(__u64)1 << _UFFDIO_CONTINUE | \
- (__u64)1 << _UFFDIO_WRITEPROTECT)
+ (__u64)1 << _UFFDIO_POISON)
/*
* Valid ioctl command number range with this API is from 0x00 to
@@ -71,6 +74,7 @@
#define _UFFDIO_ZEROPAGE (0x04)
#define _UFFDIO_WRITEPROTECT (0x06)
#define _UFFDIO_CONTINUE (0x07)
+#define _UFFDIO_POISON (0x08)
#define _UFFDIO_API (0x3F)
/* userfaultfd ioctl ids */
@@ -91,6 +95,8 @@
struct uffdio_writeprotect)
#define UFFDIO_CONTINUE _IOWR(UFFDIO, _UFFDIO_CONTINUE, \
struct uffdio_continue)
+#define UFFDIO_POISON _IOWR(UFFDIO, _UFFDIO_POISON, \
+ struct uffdio_poison)
/* read() structure */
struct uffd_msg {
@@ -225,6 +231,7 @@ struct uffdio_api {
#define UFFD_FEATURE_EXACT_ADDRESS (1<<11)
#define UFFD_FEATURE_WP_HUGETLBFS_SHMEM (1<<12)
#define UFFD_FEATURE_WP_UNPOPULATED (1<<13)
+#define UFFD_FEATURE_POISON (1<<14)
__u64 features;
__u64 ioctls;
@@ -321,6 +328,18 @@ struct uffdio_continue {
__s64 mapped;
};
+struct uffdio_poison {
+ struct uffdio_range range;
+#define UFFDIO_POISON_MODE_DONTWAKE ((__u64)1<<0)
+ __u64 mode;
+
+ /*
+ * Fields below here are written by the ioctl and must be at the end:
+ * the copy_from_user will not read past here.
+ */
+ __s64 updated;
+};
+
/*
* Flags for the userfaultfd(2) system call itself.
*/
diff --git a/mm/memory.c b/mm/memory.c
index d8a9a770b1f1..7fbda39e060d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3692,6 +3692,10 @@ static vm_fault_t handle_pte_marker(struct vm_fault *vmf)
if (WARN_ON_ONCE(!marker))
return VM_FAULT_SIGBUS;
+ /* Poison emulation explicitly requested for this PTE. */
+ if (marker & PTE_MARKER_UFFD_POISON)
+ return VM_FAULT_HWPOISON;
+
/* Higher priority than uffd-wp when data corrupted */
if (marker & PTE_MARKER_SWAPIN_ERROR)
return VM_FAULT_SIGBUS;
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index a2bf37ee276d..87b62ca1e09e 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -286,6 +286,51 @@ static int mfill_atomic_pte_continue(pmd_t *dst_pmd,
goto out;
}
+/* Handles UFFDIO_POISON for all non-hugetlb VMAs. */
+static int mfill_atomic_pte_poison(pmd_t *dst_pmd,
+ struct vm_area_struct *dst_vma,
+ unsigned long dst_addr,
+ uffd_flags_t flags)
+{
+ int ret;
+ struct mm_struct *dst_mm = dst_vma->vm_mm;
+ pte_t _dst_pte, *dst_pte;
+ spinlock_t *ptl;
+
+ _dst_pte = make_pte_marker(PTE_MARKER_UFFD_POISON);
+ dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
+
+ if (vma_is_shmem(dst_vma)) {
+ struct inode *inode;
+ pgoff_t offset, max_off;
+
+ /* serialize against truncate with the page table lock */
+ inode = dst_vma->vm_file->f_inode;
+ offset = linear_page_index(dst_vma, dst_addr);
+ max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
+ ret = -EFAULT;
+ if (unlikely(offset >= max_off))
+ goto out_unlock;
+ }
+
+ ret = -EEXIST;
+ /*
+ * For now, we don't handle unmapping pages, so only support filling in
+ * none PTEs, or replacing PTE markers.
+ */
+ if (!pte_none_mostly(*dst_pte))
+ goto out_unlock;
+
+ set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
+
+ /* No need to invalidate - it was non-present before */
+ update_mmu_cache(dst_vma, dst_addr, dst_pte);
+ ret = 0;
+out_unlock:
+ pte_unmap_unlock(dst_pte, ptl);
+ return ret;
+}
+
static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address)
{
pgd_t *pgd;
@@ -336,8 +381,12 @@ static __always_inline ssize_t mfill_atomic_hugetlb(
* supported by hugetlb. A PMD_SIZE huge pages may exist as used
* by THP. Since we can not reliably insert a zero page, this
* feature is not supported.
+ *
+ * PTE marker handling for hugetlb is a bit special, so for now
+ * UFFDIO_POISON is not supported.
*/
- if (uffd_flags_mode_is(flags, MFILL_ATOMIC_ZEROPAGE)) {
+ if (uffd_flags_mode_is(flags, MFILL_ATOMIC_ZEROPAGE) ||
+ uffd_flags_mode_is(flags, MFILL_ATOMIC_POISON)) {
mmap_read_unlock(dst_mm);
return -EINVAL;
}
@@ -481,6 +530,9 @@ static __always_inline ssize_t mfill_atomic_pte(pmd_t *dst_pmd,
if (uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE)) {
return mfill_atomic_pte_continue(dst_pmd, dst_vma,
dst_addr, flags);
+ } else if (uffd_flags_mode_is(flags, MFILL_ATOMIC_POISON)) {
+ return mfill_atomic_pte_poison(dst_pmd, dst_vma,
+ dst_addr, flags);
}
/*
@@ -702,6 +754,14 @@ ssize_t mfill_atomic_continue(struct mm_struct *dst_mm, unsigned long start,
uffd_flags_set_mode(flags, MFILL_ATOMIC_CONTINUE));
}
+ssize_t mfill_atomic_poison(struct mm_struct *dst_mm, unsigned long start,
+ unsigned long len, atomic_t *mmap_changing,
+ uffd_flags_t flags)
+{
+ return mfill_atomic(dst_mm, start, 0, len, mmap_changing,
+ uffd_flags_set_mode(flags, MFILL_ATOMIC_POISON));
+}
+
long uffd_wp_range(struct vm_area_struct *dst_vma,
unsigned long start, unsigned long len, bool enable_wp)
{
--
2.41.0.255.g8b1d071c50-goog
Writing `subprocess.Popen[str]` requires python 3.9+.
kunit.py has an assertion that the python version is 3.7+, so we should
try to stay backwards compatible.
This conflicts a bit with commit 1da2e6220e11 ("kunit: tool: fix
pre-existing `mypy --strict` errors and update run_checks.py"), since
mypy complains like so
> kunit_kernel.py:95: error: Missing type parameters for generic type "Popen" [type-arg]
Note: `mypy --strict --python-version 3.7` does not work.
We could annotate each file with comments like
`# mypy: disable-error-code="type-arg"
but then we might still get nudged to break back-compat in other files.
This patch adds a `mypy.ini` file since it seems like the only way to
disable specific error codes for all our files.
Note: run_checks.py doesn't need to specify `--config_file mypy.ini`,
but I think being explicit is better, particularly since most kernel
devs won't be familiar with how mypy works.
Fixes: 695e26030858 ("kunit: tool: add subscripts for type annotations where appropriate")
Reported-by: SeongJae Park <sj(a)kernel.org>
Link: https://lore.kernel.org/linux-kselftest/20230501171520.138753-1-sj@kernel.o…
Signed-off-by: Daniel Latypov <dlatypov(a)google.com>
---
tools/testing/kunit/kunit_kernel.py | 6 +++---
tools/testing/kunit/mypy.ini | 6 ++++++
tools/testing/kunit/run_checks.py | 2 +-
3 files changed, 10 insertions(+), 4 deletions(-)
create mode 100644 tools/testing/kunit/mypy.ini
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index f01f94106129..7f648802caf6 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -92,7 +92,7 @@ class LinuxSourceTreeOperations:
if stderr: # likely only due to build warnings
print(stderr.decode())
- def start(self, params: List[str], build_dir: str) -> subprocess.Popen[str]:
+ def start(self, params: List[str], build_dir: str) -> subprocess.Popen:
raise RuntimeError('not implemented!')
@@ -113,7 +113,7 @@ class LinuxSourceTreeOperationsQemu(LinuxSourceTreeOperations):
kconfig.merge_in_entries(base_kunitconfig)
return kconfig
- def start(self, params: List[str], build_dir: str) -> subprocess.Popen[str]:
+ def start(self, params: List[str], build_dir: str) -> subprocess.Popen:
kernel_path = os.path.join(build_dir, self._kernel_path)
qemu_command = ['qemu-system-' + self._qemu_arch,
'-nodefaults',
@@ -142,7 +142,7 @@ class LinuxSourceTreeOperationsUml(LinuxSourceTreeOperations):
kconfig.merge_in_entries(base_kunitconfig)
return kconfig
- def start(self, params: List[str], build_dir: str) -> subprocess.Popen[str]:
+ def start(self, params: List[str], build_dir: str) -> subprocess.Popen:
"""Runs the Linux UML binary. Must be named 'linux'."""
linux_bin = os.path.join(build_dir, 'linux')
params.extend(['mem=1G', 'console=tty', 'kunit_shutdown=halt'])
diff --git a/tools/testing/kunit/mypy.ini b/tools/testing/kunit/mypy.ini
new file mode 100644
index 000000000000..ddd288309efa
--- /dev/null
+++ b/tools/testing/kunit/mypy.ini
@@ -0,0 +1,6 @@
+[mypy]
+strict = True
+
+# E.g. we can't write subprocess.Popen[str] until Python 3.9+.
+# But kunit.py tries to support Python 3.7+, so let's disable it.
+disable_error_code = type-arg
diff --git a/tools/testing/kunit/run_checks.py b/tools/testing/kunit/run_checks.py
index 8208c3b3135e..c6d494ea3373 100755
--- a/tools/testing/kunit/run_checks.py
+++ b/tools/testing/kunit/run_checks.py
@@ -23,7 +23,7 @@ commands: Dict[str, Sequence[str]] = {
'kunit_tool_test.py': ['./kunit_tool_test.py'],
'kunit smoke test': ['./kunit.py', 'run', '--kunitconfig=lib/kunit', '--build_dir=kunit_run_checks'],
'pytype': ['/bin/sh', '-c', 'pytype *.py'],
- 'mypy': ['mypy', '--strict', '--exclude', '_test.py$', '--exclude', 'qemu_configs/', '.'],
+ 'mypy': ['mypy', '--config-file', 'mypy.ini', '--exclude', '_test.py$', '--exclude', 'qemu_configs/', '.'],
}
# The user might not have mypy or pytype installed, skip them if so.
base-commit: a42077b787680cbc365a96446b30f32399fa3f6f
--
2.40.1.495.gc816e09b53d-goog
Events Tracing infrastructure contains lot of files, directories
(internally in terms of inodes, dentries). And ends up by consuming
memory in MBs. We can have multiple events of Events Tracing, which
further requires more memory.
Instead of creating inodes/dentries, eventfs could keep meta-data and
skip the creation of inodes/dentries. As and when require, eventfs will
create the inodes/dentries only for required files/directories.
Also eventfs would delete the inodes/dentries once no more requires
but preserve the meta data.
Tracing events took ~9MB, with this approach it took ~4.5MB
for ~10K files/dir.
v2:
Patch 01: new patch:'Require all trace events to have a TRACE_SYSTEM'
Patch 02: moved from v1 1/9
Patch 03: moved from v1 2/9
As suggested by Zheng Yejian, introduced eventfs_prepare_ef()
helper function to add files or directories to eventfs
fix WARNING reported by kernel test robot in v1 8/9
Patch 04: moved from v1 3/9
used eventfs_prepare_ef() to add files
fix WARNING reported by kernel test robot in v1 8/9
Patch 05: moved from v1 4/9
fix compiling warning reported by kernel test robot in v1 4/9
Patch 06: moved from v1 5/9
Patch 07: moved from v1 6/9
Patch 08: moved from v1 7/9
Patch 09: moved from v1 8/9
rebased because of v3 01/10
Patch 10: moved from v1 9/9
v1:
Patch 1: add header file
Patch 2: resolved kernel test robot issues
protecting eventfs lists using nested eventfs_rwsem
Patch 3: protecting eventfs lists using nested eventfs_rwsem
Patch 4: improve events cleanup code to fix crashes
Patch 5: resolved kernel test robot issues
removed d_instantiate_anon() calls
Patch 6: resolved kernel test robot issues
fix kprobe test in eventfs_root_lookup()
protecting eventfs lists using nested eventfs_rwsem
Patch 7: remove header file
Patch 8: pass eventfs_rwsem as argument to eventfs functions
called eventfs_remove_events_dir() instead of tracefs_remove()
from event_trace_del_tracer()
Patch 9: new patch to fix kprobe test case
fs/tracefs/Makefile | 1 +
fs/tracefs/event_inode.c | 757 ++++++++++++++++++
fs/tracefs/inode.c | 124 ++-
fs/tracefs/internal.h | 25 +
include/linux/trace_events.h | 1 +
include/linux/tracefs.h | 49 ++
kernel/trace/trace.h | 3 +-
kernel/trace/trace_events.c | 78 +-
.../ftrace/test.d/kprobe/kprobe_args_char.tc | 4 +-
.../test.d/kprobe/kprobe_args_string.tc | 4 +-
10 files changed, 994 insertions(+), 52 deletions(-)
create mode 100644 fs/tracefs/event_inode.c
create mode 100644 fs/tracefs/internal.h
--
2.40.0
Hi, Willy
Here is the v2 of our old patchset about test report [1].
The trailing '\r' fixup has been merged, so, here only resend the left
parts with an additional patch to restore the failed tests print.
This patchset is rebased on the dev.2023.06.14a branch of linux-rcu [2].
Tests have passed for 'x86 run':
138 test(s) passed, 0 skipped, 0 failed.
See all results in /labs/linux-lab/src/linux-stable/tools/testing/selftests/nolibc/run.out
Also did 'run-user' for x86, mips and arm64.
Changes from v1 -> v2:
1. selftests/nolibc: add a standalone test report macro
As Willy pointed out, the old method with additional test-report
target not work in 'make -j'.
A new macro is added to share the same report logic among the
run-user, run and rerun targets, the path to test log file is
2. selftests/nolibc: always print the path to test log file
Always print the path to test log file, but move it to a new line to
avoid annoying people when the test pass without any failures.
3. selftests/nolibc: restore the failed tests print
Restore printing of the failed tests to avoid manually opening
the test log file when there are really failues.
Best regards,
Zhangjin
---
[1]: https://lore.kernel.org/lkml/cover.1685936428.git.falcon@tinylab.org/
[2]: https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git/
Zhangjin Wu (3):
selftests/nolibc: add a standalone test report macro
selftests/nolibc: always print the path to test log file
selftests/nolibc: restore the failed tests print
tools/testing/selftests/nolibc/Makefile | 19 +++++++------------
1 file changed, 7 insertions(+), 12 deletions(-)
--
2.25.1
Hi, Willy
This is the revision of the v4 part2 of support for rv32 [1], this
further split the generic KARCH code out of the old rv32 compile patch
and also add kernel specific KARCH and nolibc specific NARCH for
tools/include/nolibc/Makefile too.
This is rebased on the dev.2023.06.14a branch of linux-rcu repo [2] with
basic run-user and run tests.
Changes from v4 -> v5:
* selftests/nolibc: allow customize kernel specific ARCH variable
The KARCH customize support part splitted out of the old rv32 compile
patch and removed the one passed to tools/include/nolibc/Makefile.
* tools/nolibc: add kernel and nolibc specific ARCH variables
Pass original ARCH to tools/include/nolibc/Makefile, add KARCH and
NARCH for kernel and nolibc respectively.
* selftests/nolibc: riscv: customize makefile for rv32
Now, it is rv32 specific, no generic code.
Best regards,
Zhangjin
---
[1]: https://lore.kernel.org/linux-riscv/cover.1686128703.git.falcon@tinylab.org/
[2]: https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git/
Zhangjin Wu (5):
tools/nolibc: fix up #error compile failures with -ENOSYS
tools/nolibc: fix up undeclared syscall macros with #ifdef and -ENOSYS
selftests/nolibc: allow customize kernel specific ARCH variable
tools/nolibc: add kernel and nolibc specific ARCH variables
selftests/nolibc: riscv: customize makefile for rv32
tools/include/nolibc/Makefile | 18 +++++++++---
tools/include/nolibc/sys.h | 38 ++++++++++++++++---------
tools/testing/selftests/nolibc/Makefile | 18 ++++++++++--
3 files changed, 55 insertions(+), 19 deletions(-)
--
2.25.1
Hi,
This patchset further improves porting of nolibc to new architectures,
it is based on our previous v5 sysret helper series [1].
It mainly shrinks the assembly _start by moving most of its operations
to a C version of _start_c() function. and also, it removes the old
sys_stat() support by using the sys_statx() instead and therefore,
removes all of the arch specific sys_stat_struct.
Tested 'run' on all of the supported architectures:
arch/board | result
------------|------------
arm/vexpress-a9 | 141 test(s) passed, 1 skipped, 0 failed. See all results in /labs/linux-lab/logging/nolibc/arm-vexpress-a9-nolibc-test.log
arm/virt | 141 test(s) passed, 1 skipped, 0 failed. See all results in /labs/linux-lab/logging/nolibc/arm-virt-nolibc-test.log
aarch64/virt | 141 test(s) passed, 1 skipped, 0 failed. See all results in /labs/linux-lab/logging/nolibc/aarch64-virt-nolibc-test.log
ppc/g3beige | not supported
ppc/ppce500 | not supported
i386/pc | 141 test(s) passed, 1 skipped, 0 failed. See all results in /labs/linux-lab/logging/nolibc/i386-pc-nolibc-test.log
x86_64/pc | 141 test(s) passed, 1 skipped, 0 failed. See all results in /labs/linux-lab/logging/nolibc/x86_64-pc-nolibc-test.log
mipsel/malta | 141 test(s) passed, 1 skipped, 0 failed. See all results in /labs/linux-lab/logging/nolibc/mipsel-malta-nolibc-test.log
loongarch64/virt | 141 test(s) passed, 1 skipped, 0 failed. See all results in /labs/linux-lab/logging/nolibc/loongarch64-virt-nolibc-test.log
riscv64/virt | 141 test(s) passed, 1 skipped, 0 failed. See all results in /labs/linux-lab/logging/nolibc/riscv64-virt-nolibc-test.log
riscv32/virt | 119 test(s) passed, 1 skipped, 22 failed. See all results in /labs/linux-lab/logging/nolibc/riscv32-virt-nolibc-test.log
s390x/s390-ccw-virtio | 141 test(s) passed, 1 skipped, 0 failed. See all results in /labs/linux-lab/logging/nolibc/s390x-s390-ccw-virtio-nolibc-test.log
Notes:
- ppc support are ready locally, will be sent out later.
- full riscv32/virt support are ready locally, will be sent out later.
Changes:
* tools/nolibc: remove old arch specific stat support
Just like the __NR_statx we used in nolibc-test.c, Let's only
reserve sys_statx() and use it to implement the stat() function.
Remove the old sys_stat() and sys_stat_struct completely.
* tools/nolibc: add new crt.h with _start_c
A new C version of _start_c() is added to only require a 'sp' argument
and find the others (argc, argv, envp/environ, auxv) for us in C.
* tools/nolibc: include crt.h before arch.h
Include crt.h before arch.h to let _start() be able to call the new
added _start_c() in arch-<ARCH>.h.
* tools/nolibc: arm: shrink _start with _start_c
tools/nolibc: aarch64: shrink _start with _start_c
tools/nolibc: i386: shrink _start with _start_c
tools/nolibc: x86_64: shrink _start with _start_c
tools/nolibc: mips: shrink _start with _start_c
tools/nolibc: loongarch: shrink _start with _start_c
tools/nolibc: riscv: shrink _start with _start_c
tools/nolibc: s390: shrink _start with _start_c
Move most of the operations from the assembly _start() to the C
_start_c(), only require to do minimal operations in assembly _start
now.
With this patchset, porting nolibc to a new architecture become easier,
the powerpc porting will be added later.
Best regards,
Zhangjin
---
[1]: https://lore.kernel.org/lkml/cover.1687957589.git.falcon@tinylab.org/
Zhangjin Wu (11):
tools/nolibc: remove old arch specific stat support
tools/nolibc: add new crt.h with _start_c
tools/nolibc: include crt.h before arch.h
tools/nolibc: arm: shrink _start with _start_c
tools/nolibc: aarch64: shrink _start with _start_c
tools/nolibc: i386: shrink _start with _start_c
tools/nolibc: x86_64: shrink _start with _start_c
tools/nolibc: mips: shrink _start with _start_c
tools/nolibc: loongarch: shrink _start with _start_c
tools/nolibc: riscv: shrink _start with _start_c
tools/nolibc: s390: shrink _start with _start_c
tools/include/nolibc/Makefile | 1 +
tools/include/nolibc/arch-aarch64.h | 53 ++----------------
tools/include/nolibc/arch-arm.h | 79 ++-------------------------
tools/include/nolibc/arch-i386.h | 58 +++-----------------
tools/include/nolibc/arch-loongarch.h | 42 ++------------
tools/include/nolibc/arch-mips.h | 73 +++----------------------
tools/include/nolibc/arch-riscv.h | 65 ++--------------------
tools/include/nolibc/arch-s390.h | 60 ++------------------
tools/include/nolibc/arch-x86_64.h | 54 ++----------------
tools/include/nolibc/crt.h | 57 +++++++++++++++++++
tools/include/nolibc/nolibc.h | 1 +
tools/include/nolibc/signal.h | 1 +
tools/include/nolibc/stdio.h | 1 +
tools/include/nolibc/stdlib.h | 1 +
tools/include/nolibc/sys.h | 64 ++++------------------
tools/include/nolibc/time.h | 1 +
tools/include/nolibc/types.h | 4 +-
tools/include/nolibc/unistd.h | 1 +
18 files changed, 122 insertions(+), 494 deletions(-)
create mode 100644 tools/include/nolibc/crt.h
--
2.25.1
The kernel cmdline option panic_on_warn expects an integer, it is not a
plain option as documented. A number of uses in the tree figured this
already, and use panic_on_warn=1 for their purpose.
Adjust a comment which otherwise may mislead people in the future.
Fixes: 9e3961a097 ("kernel: add panic_on_warn")
Signed-off-by: Olaf Hering <olaf(a)aepfle.de>
---
Documentation/admin-guide/kernel-parameters.txt | 2 +-
tools/testing/selftests/rcutorture/bin/kvm.sh | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 9e5bab29685f..15196f84df49 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4049,7 +4049,7 @@
extra details on the taint flags that users can pick
to compose the bitmask to assign to panic_on_taint.
- panic_on_warn panic() instead of WARN(). Useful to cause kdump
+ panic_on_warn=1 panic() instead of WARN(). Useful to cause kdump
on a WARN().
parkbd.port= [HW] Parallel port number the keyboard adapter is
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 62f3b0f56e4d..d3cdc2d33d4b 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -655,4 +655,4 @@ fi
# Control buffer size: --bootargs trace_buf_size=3k
# Get trace-buffer dumps on all oopses: --bootargs ftrace_dump_on_oops
# Ditto, but dump only the oopsing CPU: --bootargs ftrace_dump_on_oops=orig_cpu
-# Heavy-handed way to also dump on warnings: --bootargs panic_on_warn
+# Heavy-handed way to also dump on warnings: --bootargs panic_on_warn=1
Hi, Thomas, David, Willy
Thanks very much for your kindly review.
This is the revision of v3 "tools/nolibc: add a new syscall helper" [1],
this mainly applies the suggestion from David in this reply [2] and
rebased everything on the dev.2023.06.14a branch of linux-rcu [3].
The old __sysret() doesn't support the syscalls with pointer return
value, this revision now supports such syscalls. The left mmap() syscall
is converted to use this new __sysret() with additional test cases.
Changes from v3 -> v4:
* tools/nolibc: sys.h: add a syscall return helper
tools/nolibc: unistd.h: apply __sysret() helper
tools/nolibc: sys.h: apply __sysret() helper
The original v3 series, no code change, except the Reviewed-by lines
from Thomas.
* tools/nolibc: unistd.h: reorder the syscall macros
reorder the syscall macros in using order and align most of them.
* tools/nolibc: add missing my_syscall6() for mips
required by mmap() syscall, this is the last missing my_syscall6().
* tools/nolibc: __sysret: support syscalls who return a pointer
Apply suggestion from David.
Let __sysret() also supports syscalls with pointer return value, so, the
return value is converted to unsigned long and the comparing of < 0 is
converted to the comparing of [(unsigned long)-MAX_ERRNO, (unsigned long)-1].
This also allows return a huge value (not pointer) with highest bit as 1.
It is able to merge this one to the first one if necessary.
* tools/nolibc: clean up mmap() support
Apply new __sysret(), clean up #ifdef and some macros.
* selftests/nolibc: add EXPECT_PTREQ, EXPECT_PTRNE and EXPECT_PTRER
selftests/nolibc: add sbrk_0 to test current brk getting
selftests/nolibc: add mmap and munmap test cases
Add some mmap & munmap test cases and the corresponding helpers, to
verify one of the new helpers, a sbrk_0 test case is also added.
Best regards,
Zhangjin
---
[1]: https://lore.kernel.org/linux-riscv/87e7a391-b97b-4001-b12a-76d20790563e@t-…
[2]: https://lore.kernel.org/linux-riscv/94dd5170929f454fbc0a10a2eb3b108d@AcuMS.…
[3]: https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git/
Zhangjin Wu (10):
tools/nolibc: sys.h: add a syscall return helper
tools/nolibc: unistd.h: apply __sysret() helper
tools/nolibc: sys.h: apply __sysret() helper
tools/nolibc: unistd.h: reorder the syscall macros
tools/nolibc: add missing my_syscall6() for mips
tools/nolibc: __sysret: support syscalls who return a pointer
tools/nolibc: clean up mmap() support
selftests/nolibc: add EXPECT_PTREQ, EXPECT_PTRNE and EXPECT_PTRER
selftests/nolibc: add sbrk_0 to test current brk getting
selftests/nolibc: add mmap and munmap test cases
tools/include/nolibc/arch-mips.h | 26 ++
tools/include/nolibc/nolibc.h | 9 +-
tools/include/nolibc/sys.h | 391 +++----------------
tools/include/nolibc/types.h | 11 +
tools/include/nolibc/unistd.h | 13 +-
tools/testing/selftests/nolibc/nolibc-test.c | 90 +++++
6 files changed, 191 insertions(+), 349 deletions(-)
--
2.25.1
From: Björn Töpel <bjorn(a)rivosinc.com>
When you're cross-building kselftest, in this case RISC-V:
| make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- O=/tmp/kselftest \
| HOSTCC=gcc FORMAT= SKIP_TARGETS="arm64 ia64 powerpc sparc64 x86 \
| sgx" -C tools/testing/selftests gen_tar
the components (paths) that fail to build are skipped. In this case,
openat2 failed due to missing library support, and proc due to an
x86-64 only test.
This tiny series addresses the problems above.
Björn
Björn Töpel (2):
selftests/openat2: Run-time check for -fsanitize=undefined
selftests/proc: Do not build x86-64 tests on non-x86-64 builds
tools/testing/selftests/openat2/Makefile | 9 ++++++++-
tools/testing/selftests/proc/Makefile | 4 ++++
2 files changed, 12 insertions(+), 1 deletion(-)
base-commit: 3a8a670eeeaa40d87bd38a587438952741980c18
--
2.39.2
Hi, all
Thanks very much for your review suggestions of the v1 series [1], we
just sent out the generic part1 [2], and here is the part2 of the whole
v2 revision.
Changes from v1 -> v2:
* Don't emulate the return values in the new syscalls path, fix up or
support the new syscalls in the side of the related test cases (1-3)
selftests/nolibc: remove gettimeofday_bad1/2 completely
selftests/nolibc: support two errnos with EXPECT_SYSER2()
selftests/nolibc: waitpid_min: add waitid syscall support
(Review suggestions from Willy and Thomas)
* Fix up new failure of the state_timestamps test case (4, new)
tools/nolibc: add missing nanoseconds support for __NR_statx
(Fixes for the commit a89c937d781a ("tools/nolibc: support nanoseconds in stat()")
* Add new waitstatus macros as a standalone patch for the waitid support (5)
tools/nolibc: add more wait status related types
(Split and Cleanup for the waitid syscall based sys_wait4)
* Pure 64bit lseek and time64 select/poll/gettimeofday support (6-11)
tools/nolibc: add pure 64bit off_t, time_t and blkcnt_t
tools/nolibc: sys_lseek: add pure 64bit lseek
tools/nolibc: add pure 64bit time structs
tools/nolibc: sys_select: add pure 64bit select
tools/nolibc: sys_poll: add pure 64bit poll
tools/nolibc: sys_gettimeofday: add pure 64bit gettimeofday
(Review suggestions from Arnd, Thomas and Willy, time32 variants have
been removed completely and some fixups)
* waitid syscall support cleanup (12)
tools/nolibc: sys_wait4: add waitid syscall support
(Sync with the waitstatus macros update and Removal of emulated code)
* rv32 nolibc-test support, commit message update (13)
selftests/nolibc: riscv: customize makefile for rv32
(Review suggestions from Thomas, explain more about the change logic in commit message)
Best regards,
Zhangjin
---
[1]: https://lore.kernel.org/linux-riscv/20230529113143.GB2762@1wt.eu/T/#t
[2]: https://lore.kernel.org/linux-riscv/cover.1685362482.git.falcon@tinylab.org/
Zhangjin Wu (13):
selftests/nolibc: remove gettimeofday_bad1/2 completely
selftests/nolibc: support two errnos with EXPECT_SYSER2()
selftests/nolibc: waitpid_min: add waitid syscall support
tools/nolibc: add missing nanoseconds support for __NR_statx
tools/nolibc: add more wait status related types
tools/nolibc: add pure 64bit off_t, time_t and blkcnt_t
tools/nolibc: sys_lseek: add pure 64bit lseek
tools/nolibc: add pure 64bit time structs
tools/nolibc: sys_select: add pure 64bit select
tools/nolibc: sys_poll: add pure 64bit poll
tools/nolibc: sys_gettimeofday: add pure 64bit gettimeofday
tools/nolibc: sys_wait4: add waitid syscall support
selftests/nolibc: riscv: customize makefile for rv32
tools/include/nolibc/arch-aarch64.h | 3 -
tools/include/nolibc/arch-loongarch.h | 3 -
tools/include/nolibc/arch-riscv.h | 3 -
tools/include/nolibc/std.h | 28 ++--
tools/include/nolibc/sys.h | 134 +++++++++++++++----
tools/include/nolibc/types.h | 58 +++++++-
tools/testing/selftests/nolibc/Makefile | 11 +-
tools/testing/selftests/nolibc/nolibc-test.c | 20 +--
8 files changed, 202 insertions(+), 58 deletions(-)
--
2.25.1