Allow function redirection using ftrace. This is basically
equivalent to the static_stub support in the previous patch,
but does not require the function being replaced to be modified (save
for the addition of KUNIT_STUBBABLE/noinline).
This is hidden behind the CONFIG_KUNIT_FTRACE_STUBS option, and has a
number of dependencies, including ftrace and CONFIG_KALLSYMS_ALL.
As a result, it only works on architectures where these are available.
You can run the KUnit example tests with the following: $
./tools/testing/kunit/kunit.py run --kunitconfig
lib/kunit/stubs_example.kunitconfig --arch=x86_64
To the end user, replacing a function is very simple, e.g.
KUNIT_STUBBABLE void real_func(int n);
void replacement_func(int n);
/* in tests */
kunit_activate_ftrace_stub(test, real_func, replacement_func);
The implementation is inspired by Steven's snippet here [1].
Some more details:
* stubbing is automatically undone at the end of tests
* it can also be manually undone with kunit_deactive_ftrace_stub()
* stubbing only applies when current->kunit_test == test
* note: currently can't have more than one test running at a time
* KUNIT_STUBBABLE marks tests as noinline when CONFIG_KUNIT_STUBS is set
* this ensures we can actually stub all calls
* KUNIT_STUBBABLE_TRAMPOLINE is a version that evaluates to
__always_inline when stubbing is not enabled
* This may need to be used with a wrapper function.
* See the doc comment for more details.
Sharp-edges:
* kernel livepatch only works on some arches (not UML)
* if you don't use noinline/KUNIT_STUBBABLE, functions might be inlined
and thus none of this works:
* if it's always inlined, at least the attempt to stub will fail
* if it's sometimes inlined, then the stub silently won't work
[1]
https://lore.kernel.org/lkml/20220224091550.2b7e8784@gandalf.local.home
Co-developed-by: Daniel Latypov <dlatypov(a)google.com>
Signed-off-by: Eddie Phillips <eddiephillips(a)google.com>
---
Link to original: https://lore.kernel.org/all/20220910212804.670622-3-davidgow@google.com/
include/kunit/ftrace_stub.h | 84 ++++++++++++++++
lib/kunit/Kconfig | 11 +++
lib/kunit/Makefile | 4 +
lib/kunit/ftrace_stub.c | 146 ++++++++++++++++++++++++++++
lib/kunit/kunit-example-test.c | 29 +++++-
lib/kunit/stubs_example.kunitconfig | 10 ++
6 files changed, 282 insertions(+), 2 deletions(-)
create mode 100644 include/kunit/ftrace_stub.h
create mode 100644 lib/kunit/ftrace_stub.c
create mode 100644 lib/kunit/stubs_example.kunitconfig
diff --git a/include/kunit/ftrace_stub.h b/include/kunit/ftrace_stub.h
new file mode 100644
index 000000000000..bfd57ea6289c
--- /dev/null
+++ b/include/kunit/ftrace_stub.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _KUNIT_FTRACE_STUB_H
+#define _KUNIT_FTRACE_STUB_H
+
+/** KUNIT_STUBBABLE - marks a function as stubbable when stubbing support is
+ * enabled.
+ *
+ * Stubbing uses ftrace internally, so we can only stub out functions when they
+ * are not inlined. This macro eavlautes to noinline when stubbing support is
+ * enabled to thus make it safe.
+ *
+ * If you cannot add this annotation to the function, you can instead use
+ * KUNIT_STUBBABLE_TRAMPOLINE, which is the same, but evaluates to
+ * __always_inline when stubbing is not enabled.
+ *
+ * Consider copy_to_user, which is marked as __always_inline:
+ *
+ * .. code-block:: c
+ * static KUNIT_STUBBABLE_TRAMPOLINE unsigned long
+ * copy_to_user_trampoline(void __user *to, const void *from, unsigned long n)
+ * {
+ * return copy_to_user(to, from, n);
+ * }
+ *
+ * Then we simply need to update our code to go through this function instead
+ * (in the places where we want to stub it out).
+ */
+#if IS_ENABLED(CONFIG_KUNIT_FTRACE_STUBS)
+#define KUNIT_STUBBABLE noinline
+#define KUNIT_STUBBABLE_TRAMPOLINE noinline
+#else
+#define KUNIT_STUBBABLE
+#define KUNIT_STUBBABLE_TRAMPOLINE __always_inline
+#endif
+
+struct kunit;
+
+/**
+ * kunit_activate_ftrace_stub() - makes all calls to @func go to @replacement during @test.
+ * @test: The test context object.
+ * @func: The function to stub out, must be annotated with KUNIT_STUBBABLE.
+ * @replacement: The function to replace @func with.
+ *
+ * All calls to @func will instead call @replacement for the duration of the
+ * current test. If called from outside the test's thread, the function will
+ * not be redirected.
+ *
+ * The redirection can be disabled again with kunit_deactivate_ftrace_stub().
+ *
+ * Example:
+ *
+ * .. code-block:: c
+ * KUNIT_STUBBABLE int real_func(int n)
+ * {
+ * pr_info("real_func() called with %d", n);
+ * return 0;
+ * }
+ *
+ * void replacement_func(int n)
+ * {
+ * pr_info("replacement_func() called with %d", n);
+ * return 42;
+ * }
+ *
+ * void example_test(struct kunit *test)
+ * {
+ * kunit_active_ftrace_stub(test, real_func, replacement_func);
+ * KUNIT_EXPECT_EQ(test, real_func(1), 42);
+ * }
+ *
+ */
+#define kunit_activate_ftrace_stub(test, real_fn_addr, replacement_addr) do { \
+ typecheck_fn(typeof(&replacement_addr), real_fn_addr); \
+ __kunit_activate_ftrace_stub(test, #real_fn_addr, real_fn_addr, replacement_addr); \
+} while (0)
+
+void __kunit_activate_ftrace_stub(struct kunit *test,
+ const char *name,
+ void *real_fn_addr,
+ void *replacement_addr);
+
+
+void kunit_deactivate_ftrace_stub(struct kunit *test, void *real_fn_addr);
+#endif /* _KUNIT_STUB_H */
diff --git a/lib/kunit/Kconfig b/lib/kunit/Kconfig
index 7a6af361d2fc..8a629017b917 100644
--- a/lib/kunit/Kconfig
+++ b/lib/kunit/Kconfig
@@ -70,6 +70,17 @@ config KUNIT_ALL_TESTS
If unsure, say N.
+config KUNIT_FTRACE_STUBS
+ bool "Support for stubbing out functions in KUnit tests with ftrace and kernel livepatch"
+ depends on FTRACE=y && FUNCTION_TRACER=y && MODULES=y && DEBUG_KERNEL=y && KALLSYMS_ALL=y
+ help
+ Builds support for stubbing out functions for the duration of KUnit
+ test cases or suites using ftrace.
+ See KUNIT_EXAMPLE_TEST for an example.
+
+ NOTE: this does not work on all architectures (like UML) and
+ relies on a lot of magic (see the dependencies list).
+
config KUNIT_DEFAULT_ENABLED
bool "Default value of kunit.enable"
default y
diff --git a/lib/kunit/Makefile b/lib/kunit/Makefile
index 656f1fa35abc..f04f6ea4d6a8 100644
--- a/lib/kunit/Makefile
+++ b/lib/kunit/Makefile
@@ -29,3 +29,7 @@ obj-$(CONFIG_KUNIT_TEST) += assert_test.o
endif
obj-$(CONFIG_KUNIT_EXAMPLE_TEST) += kunit-example-test.o
+
+ifeq ($(CONFIG_KUNIT_FTRACE_STUBS),y)
+kunit-objs += ftrace_stub.o
+endif
\ No newline at end of file
diff --git a/lib/kunit/ftrace_stub.c b/lib/kunit/ftrace_stub.c
new file mode 100644
index 000000000000..b19eaa35f5ed
--- /dev/null
+++ b/lib/kunit/ftrace_stub.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <kunit/ftrace_stub.h>
+#include <kunit/test.h>
+
+#include <linux/typecheck.h>
+
+#include <linux/ftrace.h>
+#include <linux/livepatch.h>
+#include <linux/sched.h>
+
+
+struct kunit_ftrace_stub_ctx {
+ struct kunit *test;
+ unsigned long real_fn_addr; /* used as a key to lookup the stub */
+ unsigned long replacement_addr;
+ struct ftrace_ops ops; /* a copy of kunit_stub_base_ops with .private set */
+};
+
+static void kunit_stub_trampoline(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *ops,
+ struct ftrace_regs *fregs)
+{
+ struct kunit_ftrace_stub_ctx *ctx = ops->private;
+ int lock_bit;
+
+ if (current->kunit_test != ctx->test)
+ return;
+
+ lock_bit = ftrace_test_recursion_trylock(ip, parent_ip);
+ KUNIT_ASSERT_GE(ctx->test, lock_bit, 0);
+
+ ftrace_regs_set_instruction_pointer(fregs, ctx->replacement_addr);
+
+ ftrace_test_recursion_unlock(lock_bit);
+}
+
+static struct ftrace_ops kunit_stub_base_ops = {
+ .func = &kunit_stub_trampoline,
+ .flags = FTRACE_OPS_FL_IPMODIFY |
+#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
+ FTRACE_OPS_FL_SAVE_REGS |
+#endif
+ FTRACE_OPS_FL_DYNAMIC
+};
+
+static void __kunit_ftrace_stub_resource_free(struct kunit_resource *res)
+{
+ struct kunit_ftrace_stub_ctx *ctx = res->data;
+
+ unregister_ftrace_function(&ctx->ops);
+ kfree(ctx);
+}
+
+/* Matching function for kunit_find_resource(). match_data is real_fn_addr. */
+static bool __kunit_static_stub_resource_match(struct kunit *test,
+ struct kunit_resource *res,
+ void *match_real_fn_addr)
+{
+ /* This pointer is only valid if res is a static stub resource. */
+ struct kunit_ftrace_stub_ctx *ctx = res->data;
+
+ /* Make sure the resource is a static stub resource. */
+ if (res->free != &__kunit_ftrace_stub_resource_free)
+ return false;
+
+ return ctx->real_fn_addr == (unsigned long)match_real_fn_addr;
+}
+
+void kunit_deactivate_ftrace_stub(struct kunit *test, void *real_fn_addr)
+{
+ struct kunit_resource *res;
+
+ KUNIT_ASSERT_PTR_NE_MSG(test, real_fn_addr, NULL,
+ "Tried to deactivate a NULL stub.");
+
+ /* Look up the existing stub for this function. */
+ res = kunit_find_resource(test,
+ __kunit_static_stub_resource_match,
+ real_fn_addr);
+
+ /* Error out if the stub doesn't exist. */
+ KUNIT_ASSERT_PTR_NE_MSG(test, res, NULL,
+ "Tried to deactivate a nonexistent stub.");
+
+ /* Free the stub. We 'put' twice, as we got a reference
+ * from kunit_find_resource(). The free function will deactivate the
+ * ftrace stub.
+ */
+ kunit_remove_resource(test, res);
+ kunit_put_resource(res);
+}
+EXPORT_SYMBOL_GPL(kunit_deactivate_ftrace_stub);
+
+void __kunit_activate_ftrace_stub(struct kunit *test,
+ const char *name,
+ void *real_fn_addr,
+ void *replacement_addr)
+{
+ unsigned long ftrace_ip;
+ struct kunit_ftrace_stub_ctx *ctx;
+ int ret;
+
+ ftrace_ip = ftrace_location((unsigned long)real_fn_addr);
+ if (!ftrace_ip)
+ KUNIT_FAIL_ASSERTION(test, KUNIT_ASSERTION,
+ "%s ip is invalid: not a function, or is marked notrace or inline", name);
+
+ /* Allocate the stub context, which contains pointers to the replacement
+ * function and the test object. It's also registered as a KUnit
+ * resource which can be looked up by address (to deactivate manually)
+ * and is destroyed automatically on test exit.
+ */
+ ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+ KUNIT_ASSERT_PTR_NE_MSG(test, ctx, NULL, "failed to allocate kunit stub for %s", name);
+
+ ctx->test = test;
+ ctx->ops = kunit_stub_base_ops;
+ ctx->ops.private = ctx;
+ ctx->real_fn_addr = (unsigned long)real_fn_addr;
+ ctx->replacement_addr = (unsigned long)replacement_addr;
+
+ ret = ftrace_set_filter_ip(&ctx->ops, ftrace_ip, 0, 0);
+ if (ret) {
+ kfree(ctx);
+ KUNIT_FAIL_ASSERTION(test, KUNIT_ASSERTION,
+ "failed to set filter ip for %s: %d", name, ret);
+ }
+
+ ret = register_ftrace_function(&ctx->ops);
+ if (ret) {
+ kfree(ctx);
+ if (ret == -EBUSY)
+ KUNIT_FAIL_ASSERTION(
+ test, KUNIT_ASSERTION,
+ "failed to register stub (-EBUSY) for %s, likely due to already stubbing it?",
+ name);
+ KUNIT_FAIL_ASSERTION(test, KUNIT_ASSERTION,
+ "failed to register stub for %s: %d", name,
+ ret);
+ }
+
+ kunit_alloc_resource(test, NULL,
+ __kunit_ftrace_stub_resource_free,
+ GFP_KERNEL, ctx);
+}
+EXPORT_SYMBOL_GPL(__kunit_activate_ftrace_stub);
diff --git a/lib/kunit/kunit-example-test.c b/lib/kunit/kunit-example-test.c
index 9452b163956f..676ad552ae7b 100644
--- a/lib/kunit/kunit-example-test.c
+++ b/lib/kunit/kunit-example-test.c
@@ -6,8 +6,9 @@
* Author: Brendan Higgins <brendanhiggins(a)google.com>
*/
-#include <kunit/test.h>
+#include <kunit/ftrace_stub.h>
#include <kunit/static_stub.h>
+#include <kunit/test.h>
/*
* This is the most fundamental element of KUnit, the test case. A test case
@@ -152,7 +153,7 @@ static void example_all_expect_macros_test(struct kunit *test)
}
/* This is a function we'll replace with static stubs. */
-static int add_one(int i)
+static KUNIT_STUBBABLE int add_one(int i)
{
/* This will trigger the stub if active. */
KUNIT_STATIC_STUB_REDIRECT(add_one, i);
@@ -221,6 +222,29 @@ static void example_static_stub_using_fn_ptr_test(struct kunit *test)
KUNIT_EXPECT_EQ(test, add_one(1), 2);
}
+/*
+ * This test shows the use of dynamic stubs.
+ */
+static void example_ftrace_stub_test(struct kunit *test)
+{
+#if !IS_ENABLED(CONFIG_KUNIT_FTRACE_STUBS)
+ kunit_skip(test, "KUNIT_FTRACE_STUBS not enabled");
+#else
+ /* By default, function is not stubbed. */
+ KUNIT_EXPECT_EQ(test, add_one(1), 2);
+
+ /* Replace add_one() with subtract_one(). */
+ kunit_activate_ftrace_stub(test, add_one, subtract_one);
+
+ /* add_one() is now replaced. */
+ KUNIT_EXPECT_EQ(test, add_one(1), 0);
+
+ /* Return add_one() to normal. */
+ kunit_deactivate_ftrace_stub(test, add_one);
+ KUNIT_EXPECT_EQ(test, add_one(1), 2);
+#endif
+}
+
static const struct example_param {
int value;
} example_params_array[] = {
@@ -506,6 +530,7 @@ static struct kunit_case example_test_cases[] = {
KUNIT_CASE(example_all_expect_macros_test),
KUNIT_CASE(example_static_stub_test),
KUNIT_CASE(example_static_stub_using_fn_ptr_test),
+ KUNIT_CASE(example_ftrace_stub_test),
KUNIT_CASE(example_priv_test),
KUNIT_CASE_PARAM(example_params_test, example_gen_params),
KUNIT_CASE_PARAM_WITH_INIT(example_params_test_with_init, kunit_array_gen_params,
diff --git a/lib/kunit/stubs_example.kunitconfig b/lib/kunit/stubs_example.kunitconfig
new file mode 100644
index 000000000000..20af4da9bc75
--- /dev/null
+++ b/lib/kunit/stubs_example.kunitconfig
@@ -0,0 +1,10 @@
+CONFIG_KUNIT=y
+CONFIG_KUNIT_FTRACE_STUBS=y
+CONFIG_KUNIT_EXAMPLE_TEST=y
+
+# Depedencies
+CONFIG_FTRACE=y
+CONFIG_FUNCTION_TRACER=y
+CONFIG_MODULES=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_KALLSYMS_ALL=y
--
2.51.1.851.g4ebd6896fd-goog
This small patchset is about avoid verifier bug warning when conditional
jumps on same register when the register holds a scalar with range.
v4:
- make code better. (Alexei)
v3:
https://lore.kernel.org/bpf/20251031154107.403054-1-kafai.wan@linux.dev/
- Enhance is_scalar_branch_taken() to handle scalar case. (Eduard)
- Update the selftest to cover all conditional jump opcodes. (Eduard)
v2:
https://lore.kernel.org/bpf/20251025053017.2308823-1-kafai.wan@linux.dev/
- Enhance is_branch_taken() and is_scalar_branch_taken() to handle
branch direction computation for same register. (Eduard and Alexei)
- Update the selftest.
v1:
https://lore.kernel.org/bpf/20251022164457.1203756-1-kafai.wan@linux.dev/
---
KaFai Wan (2):
bpf: Skip bounds adjustment for conditional jumps on same scalar
register
selftests/bpf: Add test for conditional jumps on same scalar register
kernel/bpf/verifier.c | 31 ++++
.../selftests/bpf/progs/verifier_bounds.c | 154 ++++++++++++++++++
2 files changed, 185 insertions(+)
--
2.43.0
Hi all,
This series refactors the VMA count limit code to improve clarity,
test coverage, and observability.
The VMA count limit, controlled by sysctl_max_map_count, is a safeguard
that prevents a single process from consuming excessive kernel memory
by creating too many memory mappings.
A major change since v3 is the first patch in the series which instead of
attempting to fix overshooting the limit now documents that this is the
intended behavior. As Hugh pointed out, the lenient check (>) in do_mmap()
and do_brk_flags() is intentional to allow for potential VMA merges or
expansions when the process is at the sysctl_max_map_count limit.
The consensus is that this historical behavior is correct but non-obvious.
This series now focuses on making that behavior clear and the surrounding
code more robust. Based on feedback from Lorenzo and David, this series
retains the helper function and the rename of map_count.
The refined v4 series is now structured as follows:
1. Documents the lenient VMA count checks with comments to clarify
their purpose.
2. Adds a comprehensive selftest to codify the expected behavior at the
limit, including the lenient mmap case.
3. Introduces max_vma_count() to abstract the max map count sysctl,
making the sysctl static and converting all callers to use the new
helper.
4. Renames mm_struct->map_count to the more explicit vma_count for
better code clarity.
5. Adds a tracepoint for observability when a process fails to
allocate a VMA due to the count limit.
Tested on x86_64 and arm64:
1. Build test:
allyesconfig for rename
2. Selftests:
cd tools/testing/selftests/mm && \
make && \
./run_vmtests.sh -t max_vma_count
3. vma tests:
cd tools/testing/vma && \
make && \
./vma
Link to v3:
https://lore.kernel.org/r/20251013235259.589015-1-kaleshsingh@google.com/
Thanks to everyone for the valuable discussion on previous revisions.
-- Kalesh
Kalesh Singh (5):
mm: Document lenient map_count checks
mm/selftests: add max_vma_count tests
mm: Introduce max_vma_count() to abstract the max map count sysctl
mm: rename mm_struct::map_count to vma_count
mm/tracing: introduce trace_mm_insufficient_vma_slots event
MAINTAINERS | 2 +
fs/binfmt_elf.c | 2 +-
fs/coredump.c | 2 +-
include/linux/mm.h | 2 -
include/linux/mm_types.h | 2 +-
include/trace/events/vma.h | 32 +
kernel/fork.c | 2 +-
mm/debug.c | 2 +-
mm/internal.h | 3 +
mm/mmap.c | 25 +-
mm/mremap.c | 13 +-
mm/nommu.c | 8 +-
mm/util.c | 1 -
mm/vma.c | 42 +-
mm/vma_internal.h | 2 +
tools/testing/selftests/mm/.gitignore | 1 +
tools/testing/selftests/mm/Makefile | 1 +
.../selftests/mm/max_vma_count_tests.c | 716 ++++++++++++++++++
tools/testing/selftests/mm/run_vmtests.sh | 5 +
tools/testing/vma/vma.c | 32 +-
tools/testing/vma/vma_internal.h | 13 +-
21 files changed, 856 insertions(+), 52 deletions(-)
create mode 100644 include/trace/events/vma.h
create mode 100644 tools/testing/selftests/mm/max_vma_count_tests.c
base-commit: b227c04932039bccc21a0a89cd6df50fa57e4716
--
2.51.1.851.g4ebd6896fd-goog
Problem
=======
When host APEI is unable to claim a synchronous external abort (SEA)
during guest abort, today KVM directly injects an asynchronous SError
into the VCPU then resumes it. The injected SError usually results in
unpleasant guest kernel panic.
One of the major situation of guest SEA is when VCPU consumes recoverable
uncorrected memory error (UER), which is not uncommon at all in modern
datacenter servers with large amounts of physical memory. Although SError
and guest panic is sufficient to stop the propagation of corrupted memory,
there is room to recover from an UER in a more graceful manner.
Proposed Solution
=================
The idea is, we can replay the SEA to the faulting VCPU. If the memory
error consumption or the fault that cause SEA is not from guest kernel,
the blast radius can be limited to the poison-consuming guest process,
while the VM can keep running.
In addition, instead of doing under the hood without involving userspace,
there are benefits to redirect the SEA to VMM:
- VM customers care about the disruptions caused by memory errors, and
VMM usually has the responsibility to start the process of notifying
the customers of memory error events in their VMs. For example some
cloud provider emits a critical log in their observability UI [1], and
provides a playbook for customers on how to mitigate disruptions to
their workloads.
- VMM can protect future memory error consumption by unmapping the poisoned
pages from stage-2 page table with KVM userfault [2], or by splitting the
memslot that contains the poisoned pages.
- VMM can keep track of SEA events in the VM. When VMM thinks the status
on the host or the VM is bad enough, e.g. number of distinct SEAs
exceeds a threshold, it can restart the VM on another healthy host.
- Behavior parity with x86 architecture. When machine check exception
(MCE) is caused by VCPU, kernel or KVM signals userspace SIGBUS to
let VMM either recover from the MCE, or terminate itself with VM.
The prior RFC proposes to implement SIGBUS on arm64 as well, but
Marc preferred KVM exit over signal [3]. However, implementation
aside, returning SEA to VMM is on par with returning MCE to VMM.
Once SEA is redirected to VMM, among other actions, VMM is encouraged
to inject external aborts into the faulting VCPU.
New UAPIs
=========
This patchset introduces following userspace-visible changes to empower
VMM to control what happens for SEA on guest memory:
- KVM_CAP_ARM_SEA_TO_USER. While taking SEA, if userspace has enabled
this new capability at VM creation, and the SEA is not owned by kernel
allocated memory, instead of injecting SError, return KVM_EXIT_ARM_SEA
to userspace.
- KVM_EXIT_ARM_SEA. This is the VM exit reason VMM gets. The details
about the SEA is provided in arm_sea as much as possible, including
sanitized ESR value at EL2, faulting guest virtual and physical
addresses if available.
* From v3 [4]
- Rebased on commit 3a8660878839 ("Linux 6.18-rc1").
- In selftest, print a message if GVA or GPA expects to be valid.
* From v2 [5]:
- Rebased on "[PATCH] KVM: arm64: nv: Handle SEAs due to VNCR redirection" [6]
and kvmarm/next commit 7b8346bd9fce6 ("KVM: arm64: Don't attempt vLPI
mappings when vPE allocation is disabled")
- Took the host_owns_sea implementation from Oliver [7, 8].
- Excluded the guest SEA injection patches.
- Updated selftest.
* From v1 [9]:
- Rebased on commit 4d62121ce9b5 ("KVM: arm64: vgic-debug: Avoid
dereferencing NULL ITE pointer").
- Sanitize ESR_EL2 before reporting it to userspace.
- Do not do KVM_EXIT_ARM_SEA when SEA is caused by memory allocated to
stage-2 translation table.
[1] https://cloud.google.com/solutions/sap/docs/manage-host-errors
[2] https://lore.kernel.org/kvm/20250109204929.1106563-1-jthoughton@google.com
[3] https://lore.kernel.org/kvm/86pljbqqh0.wl-maz@kernel.org
[4] https://lore.kernel.org/kvmarm/20250731205844.1346839-1-jiaqiyan@google.com
[5] https://lore.kernel.org/kvm/20250604050902.3944054-1-jiaqiyan@google.com
[6] https://lore.kernel.org/kvmarm/20250729182342.3281742-1-oliver.upton@linux.…
[7] https://lore.kernel.org/kvm/aHFohmTb9qR_JG1E@linux.dev
[8] https://lore.kernel.org/kvm/aHK-DPufhLy5Dtuk@linux.dev
[9] https://lore.kernel.org/kvm/20250505161412.1926643-1-jiaqiyan@google.com
Jiaqi Yan (3):
KVM: arm64: VM exit to userspace to handle SEA
KVM: selftests: Test for KVM_EXIT_ARM_SEA
Documentation: kvm: new UAPI for handling SEA
Documentation/virt/kvm/api.rst | 61 ++++
arch/arm64/include/asm/kvm_host.h | 2 +
arch/arm64/kvm/arm.c | 5 +
arch/arm64/kvm/mmu.c | 68 +++-
include/uapi/linux/kvm.h | 10 +
tools/arch/arm64/include/asm/esr.h | 2 +
tools/testing/selftests/kvm/Makefile.kvm | 1 +
.../testing/selftests/kvm/arm64/sea_to_user.c | 331 ++++++++++++++++++
tools/testing/selftests/kvm/lib/kvm_util.c | 1 +
9 files changed, 480 insertions(+), 1 deletion(-)
create mode 100644 tools/testing/selftests/kvm/arm64/sea_to_user.c
--
2.51.0.760.g7b8bcc2412-goog
Hello,
this series is a small follow-up to the test_tc_tunnel recent
integration, to address some small missing details raised during the
final review ([1]). This is mostly about adding some missing checks on
net namespaces management.
[1] https://lore.kernel.org/bpf/1ac9d14e-4250-480c-b863-410be78ac6c6@linux.dev/
Signed-off-by: Alexis Lothoré (eBPF Foundation) <alexis.lothore(a)bootlin.com>
---
Alexis Lothoré (eBPF Foundation) (3):
selftests/bpf: skip tc_tunnel subtest if its setup fails
selftests/bpf: add checks in tc_tunnel when entering net namespaces
selftests/bpf: use start_server_str rather than start_reuseport_server in tc_tunnel
.../selftests/bpf/prog_tests/test_tc_tunnel.c | 162 ++++++++++++++-------
1 file changed, 107 insertions(+), 55 deletions(-)
---
base-commit: 1e2d874b04ba46a3b9fe6697097aa437641f4339
change-id: 20251030-tc_tunnel_improv-6b9d1c22c6f6
Best regards,
--
Alexis Lothoré, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com
Overall, we encountered a warning [1] that can be triggered by running the
selftest I provided.
MPTCP creates subflows for data transmission between two endpoints.
However, BPF can use sockops to perform additional operations when TCP
completes the three-way handshake. The issue arose because we used sockmap
in sockops, which replaces sk->sk_prot and some handlers. Since subflows
also have their own specialized handlers, this creates a conflict and leads
to traffic failure. Therefore, we need to reject operations targeting
subflows.
This patchset simply prevents the combination of subflows and sockmap
without changing any functionality.
A complete integration of MPTCP and sockmap would require more effort, for
example, we would need to retrieve the parent socket from subflows in
sockmap and implement handlers like read_skb.
If maintainers don't object, we can further improve this in subsequent
work.
[1] truncated warning:
[ 18.234652] ------------[ cut here ]------------
[ 18.234664] WARNING: CPU: 1 PID: 388 at net/mptcp/protocol.c:68 mptcp_stream_accept+0x34c/0x380
[ 18.234726] Modules linked in:
[ 18.234755] RIP: 0010:mptcp_stream_accept+0x34c/0x380
[ 18.234762] RSP: 0018:ffffc90000cf3cf8 EFLAGS: 00010202
[ 18.234800] PKRU: 55555554
[ 18.234806] Call Trace:
[ 18.234810] <TASK>
[ 18.234837] do_accept+0xeb/0x190
[ 18.234861] ? __x64_sys_pselect6+0x61/0x80
[ 18.234898] ? _raw_spin_unlock+0x12/0x30
[ 18.234915] ? alloc_fd+0x11e/0x190
[ 18.234925] __sys_accept4+0x8c/0x100
[ 18.234930] __x64_sys_accept+0x1f/0x30
[ 18.234933] x64_sys_call+0x202f/0x20f0
[ 18.234966] do_syscall_64+0x72/0x9a0
[ 18.234979] ? switch_fpu_return+0x60/0xf0
[ 18.234993] ? irqentry_exit_to_user_mode+0xdb/0x1e0
[ 18.235002] ? irqentry_exit+0x3f/0x50
[ 18.235005] ? clear_bhb_loop+0x50/0xa0
[ 18.235022] ? clear_bhb_loop+0x50/0xa0
[ 18.235025] ? clear_bhb_loop+0x50/0xa0
[ 18.235028] entry_SYSCALL_64_after_hwframe+0x76/0x7e
[ 18.235066] </TASK>
[ 18.235109] ---[ end trace 0000000000000000 ]---
---
v2: https://lore.kernel.org/bpf/20251020060503.325369-1-jiayuan.chen@linux.dev/…
Some advice suggested by Jakub Sitnicki
v1: https://lore.kernel.org/mptcp/a0a2b87119a06c5ffaa51427a0964a05534fe6f1@linu…
Some advice from Matthieu Baerts.
Jiayuan Chen (3):
net,mptcp: fix proto fallback detection with BPF sockmap
bpf,sockmap: disallow MPTCP sockets from sockmap
selftests/bpf: Add mptcp test with sockmap
net/core/sock_map.c | 27 ++++
net/mptcp/protocol.c | 9 +-
.../testing/selftests/bpf/prog_tests/mptcp.c | 150 ++++++++++++++++++
.../selftests/bpf/progs/mptcp_sockmap.c | 43 +++++
4 files changed, 227 insertions(+), 2 deletions(-)
create mode 100644 tools/testing/selftests/bpf/progs/mptcp_sockmap.c
--
2.43.0
Hello,
this is yet another conversion series, this time tackling the
test_tc_edt.sh. This one was at the bottom of our list due to the fact
that it is based on some bandwith measurement (and so, increasing the
risk to make it flaky in CI), but here is an attempt anyway, as it also
showcases a nice example of BPF-based rate shaping.
The converted test roughly follows the original script logic, with two
veths in two namespaces, a TCP connection between a client and a server,
and the client pushing as much data as possible during a specific
period. We then compute the effective data rate, shaped by the eBPF
program, by reading the RX interface stats, and compare it to the target
rate. The test passes if the measured rate is within a defined error
margin.
There are two knobs driving the robustness of the test in CI:
- the test duration (the higher, the more precise is the effective rate)
- the tolerated error margin
The original test was configured with a 20s duration and a 1% error
margin. The new test is configured with a 2s duration and a 2% error
margin, to:
- make the duration tolerable in CI
- while keeping enough margin for rate measure fluctuations depending on
the CI machines load
This has been run multiple times locally to ensure that those values are
sane, and once in CI before sending the series, but I suggest to let it
live a few days in CI to see how it really behaves.
Signed-off-by: Alexis Lothoré (eBPF Foundation) <alexis.lothore(a)bootlin.com>
---
Alexis Lothoré (eBPF Foundation) (4):
selftests/bpf: rename test_tc_edt.bpf.c section to expose program type
selftests/bpf: integrate test_tc_edt into test_progs
selftests/bpf: remove test_tc_edt.sh
selftests/bpf: do not hardcode target rate in test_tc_edt BPF program
tools/testing/selftests/bpf/Makefile | 2 -
.../testing/selftests/bpf/prog_tests/test_tc_edt.c | 274 +++++++++++++++++++++
tools/testing/selftests/bpf/progs/test_tc_edt.c | 9 +-
tools/testing/selftests/bpf/test_tc_edt.sh | 100 --------
4 files changed, 279 insertions(+), 106 deletions(-)
---
base-commit: 1e2d874b04ba46a3b9fe6697097aa437641f4339
change-id: 20251030-tc_edt-3ea8e8d3d14e
Best regards,
--
Alexis Lothoré, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com
This small patchset is about avoid verifier bug warning when conditional
jumps on same register when the register holds a scalar with range.
v3:
- Enhance is_scalar_branch_taken() to handle scalar case. (Eduard)
- Update the selftest to cover all conditional jump opcodes. (Eduard)
v2:
https://lore.kernel.org/bpf/20251025053017.2308823-1-kafai.wan@linux.dev/
- Enhance is_branch_taken() and is_scalar_branch_taken() to handle
branch direction computation for same register. (Eduard and Alexei)
- Update the selftest.
v1:
https://lore.kernel.org/bpf/20251022164457.1203756-1-kafai.wan@linux.dev/
---
KaFai Wan (2):
bpf: Skip bounds adjustment for conditional jumps on same scalar
register
selftests/bpf: Add test for conditional jumps on same scalar register
kernel/bpf/verifier.c | 33 ++++
.../selftests/bpf/progs/verifier_bounds.c | 154 ++++++++++++++++++
2 files changed, 187 insertions(+)
--
2.43.0
nolibc currently uses 32-bit types for various APIs. These are
problematic as their reduced value range can lead to truncated values.
Signed-off-by: Thomas Weißschuh <linux(a)weissschuh.net>
---
Thomas Weißschuh (12):
tools/nolibc: use 64-bit ino_t
tools/nolibc: handle 64-bit off_t for llseek
tools/nolibc: prefer the llseek syscall
tools/nolibc: use 64-bit off_t
tools/nolibc: remove now superfluous overflow check in llseek
tools/nolibc: remove more __nolibc_enosys() fallbacks
tools/nolibc: prefer explicit 64-bit time-related system calls
tools/nolibc: gettimeofday(): avoid libgcc 64-bit divisions
tools/nolibc: use a custom struct timespec
tools/nolibc: always use 64-bit time types
selftests/nolibc: test compatibility of timespec and __kernel_timespec
tools/nolibc: remove time conversions
tools/include/nolibc/arch-s390.h | 3 +
tools/include/nolibc/poll.h | 12 ++--
tools/include/nolibc/std.h | 6 +-
tools/include/nolibc/sys.h | 21 +++---
tools/include/nolibc/sys/time.h | 2 +-
tools/include/nolibc/sys/timerfd.h | 20 +-----
tools/include/nolibc/time.h | 96 ++++++----------------------
tools/include/nolibc/types.h | 9 ++-
tools/testing/selftests/nolibc/nolibc-test.c | 18 ++++++
9 files changed, 68 insertions(+), 119 deletions(-)
---
base-commit: 90ee85c0e1e4b5804ceebbd731653e10ef3849a6
change-id: 20251001-nolibc-uapi-types-1c072d10fcc7
Best regards,
--
Thomas Weißschuh <linux(a)weissschuh.net>