As describe in the help string, the user might want to disable these
tests if they don't like to see stacktraces/BUG etc in their kernel log.
However, if they enable PANIC_ON_OOPS, these tests also crash the
machine, which it's safe to assume _almost_ nobody wants.
One might argue that _absolutely_ nobody ever wants their kernel to
crash so this should just be a hard dependency instead of a default.
However, since this is rather special code that's anyway concerned with
deliberately doing "bad" things, the normal rules don't seem to apply,
hence prefer flexibility and allow users to set up a crashing Kconfig if
they so choose.
Signed-off-by: Brendan Jackman <jackmanb(a)google.com>
---
lib/kunit/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/kunit/Kconfig b/lib/kunit/Kconfig
index 50ecf55d2b9c8a82f2aff7a0b4156bd6179b0a2f..498cc51e493dc9a819e012b8082fb765f25512b9 100644
--- a/lib/kunit/Kconfig
+++ b/lib/kunit/Kconfig
@@ -28,7 +28,7 @@ config KUNIT_FAULT_TEST
bool "Enable KUnit tests which print BUG stacktraces"
depends on KUNIT_TEST
depends on !UML
- default y
+ default !PANIC_ON_OOPS
help
Enables fault handling tests for the KUnit framework. These tests may
trigger a kernel BUG(), and the associated stack trace, even when they
---
base-commit: 7bc16e72ddb993d706f698c2f6cee694e485f557
change-id: 20251207-kunit-fault-no-panic-e9bdce848031
Best regards,
--
Brendan Jackman <jackmanb(a)google.com>
This series improves error propagation in cpumap and adds selftests that
cover the failure cases.
Currently, failures returned from __cpu_map_entry_alloc() are ignored
and always converted to -ENOMEM by cpu_map_update_elem(). This series
ensures the correct error propagation and adds selftests.
Changes:
v2:
- send to bpf-next, not to bpf
- drop Fixes: tag
v1: https://lore.kernel.org/bpf/20251128160504.57844-1-enjuk@amazon.com/
Kohei Enju (2):
bpf: cpumap: propagate underlying error in cpu_map_update_elem()
selftests/bpf: add tests for attaching invalid fd
kernel/bpf/cpumap.c | 21 ++++++++++++-------
.../bpf/prog_tests/xdp_cpumap_attach.c | 19 +++++++++++++++--
2 files changed, 30 insertions(+), 10 deletions(-)
--
2.51.0
If there is a large number (hundreds) of dmabufs allocated, the text
output generated from dmabuf_iter_seq_show can exceed common user buffer
sizes (e.g. PAGE_SIZE) necessitating multiple start/stop cycles to
iterate through all dmabufs. However the dmabuf iterator currently
returns NULL in dmabuf_iter_seq_start for all non-zero pos values, which
results in the truncation of the output before all dmabufs are handled.
After dma_buf_iter_begin / dma_buf_iter_next, the refcount of the buffer
is elevated so that the BPF iterator program can run without holding any
locks. When a stop occurs, instead of immediately dropping the reference
on the buffer, stash a pointer to the buffer in seq->priv until
either start is called or the iterator is released. This also enables
the resumption of iteration without first walking through the list of
dmabufs based on the pos value.
Fixes: 76ea95534995 ("bpf: Add dmabuf iterator")
Signed-off-by: T.J. Mercier <tjmercier(a)google.com>
---
kernel/bpf/dmabuf_iter.c | 56 +++++++++++++++++++++++++++++++++++-----
1 file changed, 49 insertions(+), 7 deletions(-)
diff --git a/kernel/bpf/dmabuf_iter.c b/kernel/bpf/dmabuf_iter.c
index 4dd7ef7c145c..cd500248abd9 100644
--- a/kernel/bpf/dmabuf_iter.c
+++ b/kernel/bpf/dmabuf_iter.c
@@ -6,10 +6,33 @@
#include <linux/kernel.h>
#include <linux/seq_file.h>
+struct dmabuf_iter_priv {
+ /*
+ * If this pointer is non-NULL, the buffer's refcount is elevated to
+ * prevent destruction between stop/start. If reading is not resumed and
+ * start is never called again, then dmabuf_iter_seq_fini drops the
+ * reference when the iterator is released.
+ */
+ struct dma_buf *dmabuf;
+};
+
static void *dmabuf_iter_seq_start(struct seq_file *seq, loff_t *pos)
{
- if (*pos)
- return NULL;
+ struct dmabuf_iter_priv *p = seq->private;
+
+ if (*pos) {
+ struct dma_buf *dmabuf = p->dmabuf;
+
+ if (!dmabuf)
+ return NULL;
+
+ /*
+ * Always resume from where we stopped, regardless of the value
+ * of pos.
+ */
+ p->dmabuf = NULL;
+ return dmabuf;
+ }
return dma_buf_iter_begin();
}
@@ -54,8 +77,11 @@ static void dmabuf_iter_seq_stop(struct seq_file *seq, void *v)
{
struct dma_buf *dmabuf = v;
- if (dmabuf)
- dma_buf_put(dmabuf);
+ if (dmabuf) {
+ struct dmabuf_iter_priv *p = seq->private;
+
+ p->dmabuf = dmabuf;
+ }
}
static const struct seq_operations dmabuf_iter_seq_ops = {
@@ -71,11 +97,27 @@ static void bpf_iter_dmabuf_show_fdinfo(const struct bpf_iter_aux_info *aux,
seq_puts(seq, "dmabuf iter\n");
}
+static int dmabuf_iter_seq_init(void *priv, struct bpf_iter_aux_info *aux)
+{
+ struct dmabuf_iter_priv *p = (struct dmabuf_iter_priv *)priv;
+
+ p->dmabuf = NULL;
+ return 0;
+}
+
+static void dmabuf_iter_seq_fini(void *priv)
+{
+ struct dmabuf_iter_priv *p = (struct dmabuf_iter_priv *)priv;
+
+ if (p->dmabuf)
+ dma_buf_put(p->dmabuf);
+}
+
static const struct bpf_iter_seq_info dmabuf_iter_seq_info = {
.seq_ops = &dmabuf_iter_seq_ops,
- .init_seq_private = NULL,
- .fini_seq_private = NULL,
- .seq_priv_size = 0,
+ .init_seq_private = dmabuf_iter_seq_init,
+ .fini_seq_private = dmabuf_iter_seq_fini,
+ .seq_priv_size = sizeof(struct dmabuf_iter_priv),
};
static struct bpf_iter_reg bpf_dmabuf_reg_info = {
base-commit: 30f09200cc4aefbd8385b01e41bde2e4565a6f0e
--
2.52.0.177.g9f829587af-goog
The resctrl selftest currently exhibits several failures on Hygon CPUs
due to missing vendor detection and edge-case handling specific to
Hygon's architecture.
This patch series addresses three distinct issues:
1. Missing CPU vendor detection, causing the test to fail with
"# Can not get vendor info..." on Hygon CPUs.
2. A division-by-zero crash in SNC detection on Hygon CPUs.
3. Incorrect handling of non-contiguous CBM support on Hygon CPUs.
These changes enable resctrl selftest to run successfully on
Hygon CPUs that support Platform QoS features.
Changelog:
v2:
- Patch 1: switch all of the vendor id bitmasks to use BIT() (Reinette)
- Patch 2: add Reviewed-by: Reinette Chatre <reinette.chatre(a)intel.com>
- Patch 3: add Reviewed-by: Reinette Chatre <reinette.chatre(a)intel.com>
add a maintainer note to highlight it is not a candidate for
backport (Reinette)
Xiaochen Shen (3):
selftests/resctrl: Add CPU vendor detection for Hygon
selftests/resctrl: Fix a division by zero error on Hygon
selftests/resctrl: Fix non-contiguous CBM check for Hygon
tools/testing/selftests/resctrl/cat_test.c | 4 ++--
tools/testing/selftests/resctrl/resctrl.h | 6 ++++--
tools/testing/selftests/resctrl/resctrl_tests.c | 2 ++
tools/testing/selftests/resctrl/resctrlfs.c | 10 ++++++++++
4 files changed, 18 insertions(+), 4 deletions(-)
--
2.47.3
This patch series introduces a new configfs attribute that enables sending
messages directly through netconsole without going through the kernel's logging
infrastructure.
This feature allows users to send custom messages, alerts, or status updates
directly to netconsole receivers by writing to
/sys/kernel/config/netconsole/<target>/send_msg, without poluting kernel
buffers, and sending msgs to the serial, which could be slow.
At Meta this is currently used in two cases right now (through printk by
now):
a) When a new workload enters or leave the machine.
b) From time to time, as a "ping" to make sure the netconsole/machine
is alive.
The implementation reuses the existing message transmission functions
(send_msg_udp() and send_ext_msg_udp()) to handle both basic and extended
message formats.
Regarding code organization, this version uses forward declarations for
send_msg_udp() and send_ext_msg_udp() functions rather than relocating them
within the file. While forward declarations do add a small amount of
redundancy, they avoid the larger churn that would result from moving entire
function definitions.
---
Breno Leitao (4):
netconsole: extract message fragmentation into send_msg_udp()
netconsole: Add configfs attribute for direct message sending
selftests/netconsole: Switch to configfs send_msg interface
Documentation: netconsole: Document send_msg configfs attribute
Documentation/networking/netconsole.rst | 40 +++++++++++++++
drivers/net/netconsole.c | 59 ++++++++++++++++++----
.../selftests/drivers/net/netcons_sysdata.sh | 2 +-
3 files changed, 91 insertions(+), 10 deletions(-)
---
base-commit: ab084f0b8d6d2ee4b1c6a28f39a2a7430bdfa7f0
change-id: 20251127-netconsole_send_msg-89813956dc23
Best regards,
--
Breno Leitao <leitao(a)debian.org>
This series improves the CPU cost of RX token management by adding an
attribute to NETDEV_CMD_BIND_RX that configures sockets using the
binding to avoid the xarray allocator and instead use a per-binding niov
array and a uref field in niov.
Improvement is ~13% cpu util per RX user thread.
Using kperf, the following results were observed:
Before:
Average RX worker idle %: 13.13, flows 4, test runs 11
After:
Average RX worker idle %: 26.32, flows 4, test runs 11
Two other approaches were tested, but with no improvement. Namely, 1)
using a hashmap for tokens and 2) keeping an xarray of atomic counters
but using RCU so that the hotpath could be mostly lockless. Neither of
these approaches proved better than the simple array in terms of CPU.
The attribute NETDEV_A_DMABUF_AUTORELEASE is added to toggle the
optimization. It is an optional attribute and defaults to 0 (i.e.,
optimization on).
To: David S. Miller <davem(a)davemloft.net>
To: Eric Dumazet <edumazet(a)google.com>
To: Jakub Kicinski <kuba(a)kernel.org>
To: Paolo Abeni <pabeni(a)redhat.com>
To: Simon Horman <horms(a)kernel.org>
To: Kuniyuki Iwashima <kuniyu(a)google.com>
To: Willem de Bruijn <willemb(a)google.com>
To: Neal Cardwell <ncardwell(a)google.com>
To: David Ahern <dsahern(a)kernel.org>
To: Mina Almasry <almasrymina(a)google.com>
To: Arnd Bergmann <arnd(a)arndb.de>
To: Jonathan Corbet <corbet(a)lwn.net>
To: Andrew Lunn <andrew+netdev(a)lunn.ch>
To: Shuah Khan <shuah(a)kernel.org>
Cc: Stanislav Fomichev <sdf(a)fomichev.me>
Cc: netdev(a)vger.kernel.org
Cc: linux-kernel(a)vger.kernel.org
Cc: linux-arch(a)vger.kernel.org
Cc: linux-doc(a)vger.kernel.org
Cc: linux-kselftest(a)vger.kernel.org
Signed-off-by: Bobby Eshleman <bobbyeshleman(a)meta.com>
Changes in v7:
- use netlink instead of sockopt (Stan)
- restrict system to only one mode, dmabuf bindings can not co-exist
with different modes (Stan)
- use static branching to enforce single system-wide mode (Stan)
- Link to v6: https://lore.kernel.org/r/20251104-scratch-bobbyeshleman-devmem-tcp-token-u…
Changes in v6:
- renamed 'net: devmem: use niov array for token management' to refer to
optionality of new config
- added documentation and tests
- make autorelease flag per-socket sockopt instead of binding
field / sysctl
- many per-patch changes (see Changes sections per-patch)
- Link to v5: https://lore.kernel.org/r/20251023-scratch-bobbyeshleman-devmem-tcp-token-u…
Changes in v5:
- add sysctl to opt-out of performance benefit, back to old token release
- Link to v4: https://lore.kernel.org/all/20250926-scratch-bobbyeshleman-devmem-tcp-token…
Changes in v4:
- rebase to net-next
- Link to v3: https://lore.kernel.org/r/20250926-scratch-bobbyeshleman-devmem-tcp-token-u…
Changes in v3:
- make urefs per-binding instead of per-socket, reducing memory
footprint
- fallback to cleaning up references in dmabuf unbind if socket
leaked tokens
- drop ethtool patch
- Link to v2: https://lore.kernel.org/r/20250911-scratch-bobbyeshleman-devmem-tcp-token-u…
Changes in v2:
- net: ethtool: prevent user from breaking devmem single-binding rule
(Mina)
- pre-assign niovs in binding->vec for RX case (Mina)
- remove WARNs on invalid user input (Mina)
- remove extraneous binding ref get (Mina)
- remove WARN for changed binding (Mina)
- always use GFP_ZERO for binding->vec (Mina)
- fix length of alloc for urefs
- use atomic_set(, 0) to initialize sk_user_frags.urefs
- Link to v1: https://lore.kernel.org/r/20250902-scratch-bobbyeshleman-devmem-tcp-token-u…
---
Bobby Eshleman (5):
net: devmem: rename tx_vec to vec in dmabuf binding
net: devmem: refactor sock_devmem_dontneed for autorelease split
net: devmem: implement autorelease token management
net: devmem: document NETDEV_A_DMABUF_AUTORELEASE netlink attribute
selftests: drv-net: devmem: add autorelease tests
Documentation/netlink/specs/netdev.yaml | 12 +++
Documentation/networking/devmem.rst | 70 +++++++++++++
include/net/netmem.h | 1 +
include/net/sock.h | 7 +-
include/uapi/linux/netdev.h | 1 +
net/core/devmem.c | 121 ++++++++++++++++++----
net/core/devmem.h | 13 ++-
net/core/netdev-genl-gen.c | 5 +-
net/core/netdev-genl.c | 13 ++-
net/core/sock.c | 103 ++++++++++++++----
net/ipv4/tcp.c | 78 +++++++++++---
net/ipv4/tcp_ipv4.c | 13 ++-
net/ipv4/tcp_minisocks.c | 3 +-
tools/include/uapi/linux/netdev.h | 1 +
tools/testing/selftests/drivers/net/hw/devmem.py | 22 +++-
tools/testing/selftests/drivers/net/hw/ncdevmem.c | 19 ++--
16 files changed, 401 insertions(+), 81 deletions(-)
---
base-commit: 4c52142904b33b41c3ff7ee58670b4e3b3bf1120
change-id: 20250829-scratch-bobbyeshleman-devmem-tcp-token-upstream-292be174d503
Best regards,
--
Bobby Eshleman <bobbyeshleman(a)meta.com>
This series introduces NUMA-aware memory placement support for KVM guests
with guest_memfd memory backends. It builds upon Fuad Tabba's work (V17)
that enabled host-mapping for guest_memfd memory [1] and can be applied
directly applied on KVM tree [2] (branch kvm-next, base commit: a6ad5413,
Merge branch 'guest-memfd-mmap' into HEAD)
== Background ==
KVM's guest-memfd memory backend currently lacks support for NUMA policy
enforcement, causing guest memory allocations to be distributed across host
nodes according to kernel's default behavior, irrespective of any policy
specified by the VMM. This limitation arises because conventional userspace
NUMA control mechanisms like mbind(2) don't work since the memory isn't
directly mapped to userspace when allocations occur.
Fuad's work [1] provides the necessary mmap capability, and this series
leverages it to enable mbind(2).
== Implementation ==
This series implements proper NUMA policy support for guest-memfd by:
1. Adding mempolicy-aware allocation APIs to the filemap layer.
2. Introducing custom inodes (via a dedicated slab-allocated inode cache,
kvm_gmem_inode_info) to store NUMA policy and metadata for guest memory.
3. Implementing get/set_policy vm_ops in guest_memfd to support NUMA
policy.
With these changes, VMMs can now control guest memory placement by mapping
guest_memfd file descriptor and using mbind(2) to specify:
- Policy modes: default, bind, interleave, or preferred
- Host NUMA nodes: List of target nodes for memory allocation
These Policies affect only future allocations and do not migrate existing
memory. This matches mbind(2)'s default behavior which affects only new
allocations unless overridden with MPOL_MF_MOVE/MPOL_MF_MOVE_ALL flags (Not
supported for guest_memfd as it is unmovable by design).
== Upstream Plan ==
Phased approach as per David's guest_memfd extension overview [3] and
community calls [4]:
Phase 1 (this series):
1. Focuses on shared guest_memfd support (non-CoCo VMs).
2. Builds on Fuad's host-mapping work [1].
Phase2 (future work):
1. NUMA support for private guest_memfd (CoCo VMs).
2. Depends on SNP in-place conversion support [5].
This series provides a clean integration path for NUMA-aware memory
management for guest_memfd and lays the groundwork for future confidential
computing NUMA capabilities.
Thanks,
Shivank
== Changelog ==
- v1,v2: Extended the KVM_CREATE_GUEST_MEMFD IOCTL to pass mempolicy.
- v3: Introduced fbind() syscall for VMM memory-placement configuration.
- v4-v6: Current approach using shared_policy support and vm_ops (based on
suggestions from David [6] and guest_memfd bi-weekly upstream
call discussion [7]).
- v7: Use inodes to store NUMA policy instead of file [8].
- v8: Rebase on top of Fuad's V12: Host mmaping for guest_memfd memory.
- v9: Rebase on top of Fuad's V13 and incorporate review comments
- V10: Rebase on top of Fuad's V17. Use latest guest_memfd inode patch
from Ackerley (with David's review comments). Use newer kmem_cache_create()
API variant with arg parameter (Vlastimil)
- V11: Rebase on kvm-next, remove RFC tag, use Ackerley's latest patch
and fix a rcu race bug during kvm module unload.
[1] https://lore.kernel.org/all/20250729225455.670324-1-seanjc@google.com
[2] https://git.kernel.org/pub/scm/virt/kvm/kvm.git/log/?h=next
[3] https://lore.kernel.org/all/c1c9591d-218a-495c-957b-ba356c8f8e09@redhat.com
[4] https://docs.google.com/document/d/1M6766BzdY1Lhk7LiR5IqVR8B8mG3cr-cxTxOrAo…
[5] https://lore.kernel.org/all/20250613005400.3694904-1-michael.roth@amd.com
[6] https://lore.kernel.org/all/6fbef654-36e2-4be5-906e-2a648a845278@redhat.com
[7] https://lore.kernel.org/all/2b77e055-98ac-43a1-a7ad-9f9065d7f38f@amd.com
[8] https://lore.kernel.org/all/diqzbjumm167.fsf@ackerleytng-ctop.c.googlers.com
Ackerley Tng (1):
KVM: guest_memfd: Use guest mem inodes instead of anonymous inodes
Matthew Wilcox (Oracle) (2):
mm/filemap: Add NUMA mempolicy support to filemap_alloc_folio()
mm/filemap: Extend __filemap_get_folio() to support NUMA memory
policies
Shivank Garg (4):
mm/mempolicy: Export memory policy symbols
KVM: guest_memfd: Add slab-allocated inode cache
KVM: guest_memfd: Enforce NUMA mempolicy using shared policy
KVM: guest_memfd: selftests: Add tests for mmap and NUMA policy
support
fs/bcachefs/fs-io-buffered.c | 2 +-
fs/btrfs/compression.c | 4 +-
fs/btrfs/verity.c | 2 +-
fs/erofs/zdata.c | 2 +-
fs/f2fs/compress.c | 2 +-
include/linux/pagemap.h | 18 +-
include/uapi/linux/magic.h | 1 +
mm/filemap.c | 23 +-
mm/mempolicy.c | 6 +
mm/readahead.c | 2 +-
tools/testing/selftests/kvm/Makefile.kvm | 1 +
.../testing/selftests/kvm/guest_memfd_test.c | 121 ++++++++
virt/kvm/guest_memfd.c | 262 ++++++++++++++++--
virt/kvm/kvm_main.c | 7 +-
virt/kvm/kvm_mm.h | 9 +-
15 files changed, 412 insertions(+), 50 deletions(-)
--
2.43.0
---
== Earlier Postings ==
v10: https://lore.kernel.org/all/20250811090605.16057-2-shivankg@amd.com
v9: https://lore.kernel.org/all/20250713174339.13981-2-shivankg@amd.com
v8: https://lore.kernel.org/all/20250618112935.7629-1-shivankg@amd.com
v7: https://lore.kernel.org/all/20250408112402.181574-1-shivankg@amd.com
v6: https://lore.kernel.org/all/20250226082549.6034-1-shivankg@amd.com
v5: https://lore.kernel.org/all/20250219101559.414878-1-shivankg@amd.com
v4: https://lore.kernel.org/all/20250210063227.41125-1-shivankg@amd.com
v3: https://lore.kernel.org/all/20241105164549.154700-1-shivankg@amd.com
v2: https://lore.kernel.org/all/20240919094438.10987-1-shivankg@amd.com
v1: https://lore.kernel.org/all/20240916165743.201087-1-shivankg@amd.com
Patch series "Fix va_high_addr_switch.sh test failure - again", v1.
There are two issues exist for the va_high_addr_switch test. One issue is
the test return value is ignored in va_high_addr_switch.sh. The second is
the va_high_addr_switch requires 6 hugepages but it requires 5.
Besides that, the nr_hugepages setup in run_vmtests.sh for arm64 can be
done in va_high_addr_switch.sh too.
This patch: (of 3)
The return value should be return value of va_high_addr_switch, otherwise
a test failure would be silently ignored.
Fixes: d9d957bd7b61 ("selftests/mm: alloc hugepages in va_high_addr_switch test")
CC: Luiz Capitulino <luizcap(a)redhat.com>
Signed-off-by: Chunyu Hu <chuhu(a)redhat.com>
---
tools/testing/selftests/mm/va_high_addr_switch.sh | 2 ++
1 file changed, 2 insertions(+)
diff --git a/tools/testing/selftests/mm/va_high_addr_switch.sh b/tools/testing/selftests/mm/va_high_addr_switch.sh
index a7d4b02b21dd..f89fe078a8e6 100755
--- a/tools/testing/selftests/mm/va_high_addr_switch.sh
+++ b/tools/testing/selftests/mm/va_high_addr_switch.sh
@@ -114,4 +114,6 @@ save_nr_hugepages
# 4 keep_mapped pages, and one for tmp usage
setup_nr_hugepages 5
./va_high_addr_switch --run-hugetlb
+retcode=$?
restore_nr_hugepages
+exit $retcode
--
2.49.0