We see quite a few flakes during the TSO test against virtualized
devices in NIPA. There's often 10-30 retransmissions during the
test. Sometimes as many as 100. Set the retransmission threshold
at 1/4th of the wire frame target.
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
---
CC: shuah(a)kernel.org
CC: willemb(a)google.com
CC: daniel.zahka(a)gmail.com
CC: linux-kselftest(a)vger.kernel.org
---
tools/testing/selftests/drivers/net/hw/tso.py | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py
index c13dd5efa27a..0998e68ebaf0 100755
--- a/tools/testing/selftests/drivers/net/hw/tso.py
+++ b/tools/testing/selftests/drivers/net/hw/tso.py
@@ -60,16 +60,17 @@ from lib.py import bkg, cmd, defer, ethtool, ip, rand_port, wait_port_listen
sock_wait_drain(sock)
qstat_new = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
- # No math behind the 10 here, but try to catch cases where
- # TCP falls back to non-LSO.
- ksft_lt(tcp_sock_get_retrans(sock), 10)
- sock.close()
-
# Check that at least 90% of the data was sent as LSO packets.
# System noise may cause false negatives. Also header overheads
# will add up to 5% of extra packes... The check is best effort.
total_lso_wire = len(buf) * 0.90 // cfg.dev["mtu"]
total_lso_super = len(buf) * 0.90 // cfg.dev["tso_max_size"]
+
+ # Make sure we have order of magnitude more LSO packets than
+ # retransmits, in case TCP retransmitted all the LSO packets.
+ ksft_lt(tcp_sock_get_retrans(sock), total_lso_wire / 4)
+ sock.close()
+
if should_lso:
if cfg.have_stat_super_count:
ksft_ge(qstat_new['tx-hw-gso-packets'] -
--
2.50.1
The CI has hit a couple of cases of:
RUN global.data_steal ...
tls.c:2762:data_steal:Expected recv(cfd, buf2, sizeof(buf2), MSG_DONTWAIT) (20000) == -1 (-1)
data_steal: Test terminated by timeout
FAIL global.data_steal
Looks like the 2msec sleep is not long enough. Make the sleep longer,
and then instead of second sleep wait for the thieving process to exit.
That way we can be sure it called recv() before us.
While at it also avoid trying to steal more than a record, this seems
to be causing issues in manual testing as well.
Fixes: d7e82594a45c ("selftests: tls: test TCP stealing data from under the TLS socket")
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
---
tools/testing/selftests/net/tls.c | 12 +++++-------
1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index dd82e198d21f..8df86ca630e0 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -2875,17 +2875,18 @@ TEST(data_steal) {
pid = fork();
ASSERT_GE(pid, 0);
if (!pid) {
- EXPECT_EQ(recv(cfd, buf, sizeof(buf), MSG_WAITALL),
- sizeof(buf));
+ EXPECT_EQ(recv(cfd, buf, sizeof(buf) / 2, MSG_WAITALL),
+ sizeof(buf) / 2);
exit(!__test_passed(_metadata));
}
- usleep(2000);
+ usleep(10000);
ASSERT_EQ(setsockopt(fd, SOL_TLS, TLS_TX, &tls, tls.len), 0);
ASSERT_EQ(setsockopt(cfd, SOL_TLS, TLS_RX, &tls, tls.len), 0);
EXPECT_EQ(send(fd, buf, sizeof(buf), 0), sizeof(buf));
- usleep(2000);
+ EXPECT_EQ(wait(&status), pid);
+ EXPECT_EQ(status, 0);
EXPECT_EQ(recv(cfd, buf2, sizeof(buf2), MSG_DONTWAIT), -1);
/* Don't check errno, the error will be different depending
* on what random bytes TLS interpreted as the record length.
@@ -2893,9 +2894,6 @@ TEST(data_steal) {
close(fd);
close(cfd);
-
- EXPECT_EQ(wait(&status), pid);
- EXPECT_EQ(status, 0);
}
static void __attribute__((constructor)) fips_check(void) {
--
2.50.1
Test that threaded state (in the persistent NAPI config) gets updated
even when NAPI with given ID is not allocated at the time.
This test is validating commit ccba9f6baa90 ("net: update NAPI threaded
config even for disabled NAPIs").
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
---
Somehow I missed sending this out with the fix series.
CC: joe(a)dama.to
CC: shuah(a)kernel.org
CC: linux-kselftest(a)vger.kernel.org
---
.../selftests/drivers/net/napi_threaded.py | 31 ++++++++++++++++++-
1 file changed, 30 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/drivers/net/napi_threaded.py b/tools/testing/selftests/drivers/net/napi_threaded.py
index 9699a100a87d..ed66efa481b0 100755
--- a/tools/testing/selftests/drivers/net/napi_threaded.py
+++ b/tools/testing/selftests/drivers/net/napi_threaded.py
@@ -38,6 +38,34 @@ from lib.py import cmd, defer, ethtool
return combined
+def napi_init(cfg, nl) -> None:
+ """
+ Test that threaded state (in the persistent NAPI config) gets updated
+ even when NAPI with given ID is not allocated at the time.
+ """
+
+ qcnt = _setup_deferred_cleanup(cfg)
+
+ _set_threaded_state(cfg, 1)
+ cmd(f"ethtool -L {cfg.ifname} combined 1")
+ _set_threaded_state(cfg, 0)
+ cmd(f"ethtool -L {cfg.ifname} combined {qcnt}")
+
+ napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True)
+ for napi in napis:
+ ksft_eq(napi['threaded'], 'disabled')
+ ksft_eq(napi.get('pid'), None)
+
+ cmd(f"ethtool -L {cfg.ifname} combined 1")
+ _set_threaded_state(cfg, 1)
+ cmd(f"ethtool -L {cfg.ifname} combined {qcnt}")
+
+ napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True)
+ for napi in napis:
+ ksft_eq(napi['threaded'], 'enabled')
+ ksft_ne(napi.get('pid'), None)
+
+
def enable_dev_threaded_disable_napi_threaded(cfg, nl) -> None:
"""
Test that when napi threaded is enabled at device level and
@@ -103,7 +131,8 @@ from lib.py import cmd, defer, ethtool
""" Ksft boiler plate main """
with NetDrvEnv(__file__, queue_count=2) as cfg:
- ksft_run([change_num_queues,
+ ksft_run([napi_init,
+ change_num_queues,
enable_dev_threaded_disable_napi_threaded],
args=(cfg, NetdevFamily()))
ksft_exit()
--
2.50.1
ncdevmem tests that the kernel correctly rejects attempts
to deactivate queues with MPs bound.
Make the configure_channels() test support combined channels.
Currently it tries to set the queue counts to rx N tx N-1,
which only makes sense for devices which have IRQs per ring
type. Most modern devices used combined IRQs/channels with
both Rx and Tx queues. Since the math is total Rx == combined+Rx
setting Rx when combined is non-zero will be increasing the total
queue count, not decreasing as the test intends.
Note that the test would previously also try to set the Tx
ring count to Rx - 1, for some reason. Which would be 0
if the device has only 2 queues configured.
With this change (device with 2 queues):
setting channel count rx:1 tx:1
YNL set channels: Kernel error: 'requested channel counts are too low for existing memory provider setting (2)'
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
---
CC: shuah(a)kernel.org
CC: almasrymina(a)google.com
CC: sdf(a)fomichev.me
CC: joe(a)dama.to
CC: linux-kselftest(a)vger.kernel.org
---
.../selftests/drivers/net/hw/ncdevmem.c | 78 ++++++++++++++++++-
1 file changed, 76 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
index be937542b4c0..71961a7688e6 100644
--- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c
+++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
@@ -356,7 +356,81 @@ static int configure_rss(void)
static int configure_channels(unsigned int rx, unsigned int tx)
{
- return run_command("ethtool -L %s rx %u tx %u", ifname, rx, tx);
+ struct ethtool_channels_get_req *gchan;
+ struct ethtool_channels_set_req *schan;
+ struct ethtool_channels_get_rsp *chan;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+ int ret;
+
+ fprintf(stderr, "setting channel count rx:%u tx:%u\n", rx, tx);
+
+ ys = ynl_sock_create(&ynl_ethtool_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return -1;
+ }
+
+ gchan = ethtool_channels_get_req_alloc();
+ if (!gchan) {
+ ret = -1;
+ goto exit_close_sock;
+ }
+
+ ethtool_channels_get_req_set_header_dev_index(gchan, ifindex);
+ chan = ethtool_channels_get(ys, gchan);
+ ethtool_channels_get_req_free(gchan);
+ if (!chan) {
+ fprintf(stderr, "YNL get channels: %s\n", ys->err.msg);
+ ret = -1;
+ goto exit_close_sock;
+ }
+
+ schan = ethtool_channels_set_req_alloc();
+ if (!schan) {
+ ret = -1;
+ goto exit_free_chan;
+ }
+
+ ethtool_channels_set_req_set_header_dev_index(schan, ifindex);
+
+ if (chan->_present.combined_count) {
+ if (chan->_present.rx_count || chan->_present.tx_count) {
+ ethtool_channels_set_req_set_rx_count(schan, 0);
+ ethtool_channels_set_req_set_tx_count(schan, 0);
+ }
+
+ if (rx == tx) {
+ ethtool_channels_set_req_set_combined_count(schan, rx);
+ } else if (rx > tx) {
+ ethtool_channels_set_req_set_combined_count(schan, tx);
+ ethtool_channels_set_req_set_rx_count(schan, rx - tx);
+ } else {
+ ethtool_channels_set_req_set_combined_count(schan, rx);
+ ethtool_channels_set_req_set_tx_count(schan, tx - rx);
+ }
+
+ ret = ethtool_channels_set(ys, schan);
+ if (ret)
+ fprintf(stderr, "YNL set channels: %s\n", ys->err.msg);
+ } else if (chan->_present.rx_count) {
+ ethtool_channels_set_req_set_rx_count(schan, rx);
+ ethtool_channels_set_req_set_tx_count(schan, tx);
+
+ ret = ethtool_channels_set(ys, schan);
+ if (ret)
+ fprintf(stderr, "YNL set channels: %s\n", ys->err.msg);
+ } else {
+ fprintf(stderr, "Error: device has neither combined nor rx channels\n");
+ ret = -1;
+ }
+ ethtool_channels_set_req_free(schan);
+exit_free_chan:
+ ethtool_channels_get_rsp_free(chan);
+exit_close_sock:
+ ynl_sock_destroy(ys);
+
+ return ret;
}
static int configure_flow_steering(struct sockaddr_in6 *server_sin)
@@ -752,7 +826,7 @@ void run_devmem_tests(void)
error(1, 0, "Failed to bind\n");
/* Deactivating a bound queue should not be legal */
- if (!configure_channels(num_queues, num_queues - 1))
+ if (!configure_channels(num_queues, num_queues))
error(1, 0, "Deactivating a bound queue should be illegal.\n");
/* Closing the netlink socket does an implicit unbind */
--
2.50.1
The kernel has recently added support for shadow stacks, currently
x86 only using their CET feature but both arm64 and RISC-V have
equivalent features (GCS and Zicfiss respectively), I am actively
working on GCS[1]. With shadow stacks the hardware maintains an
additional stack containing only the return addresses for branch
instructions which is not generally writeable by userspace and ensures
that any returns are to the recorded addresses. This provides some
protection against ROP attacks and making it easier to collect call
stacks. These shadow stacks are allocated in the address space of the
userspace process.
Our API for shadow stacks does not currently offer userspace any
flexiblity for managing the allocation of shadow stacks for newly
created threads, instead the kernel allocates a new shadow stack with
the same size as the normal stack whenever a thread is created with the
feature enabled. The stacks allocated in this way are freed by the
kernel when the thread exits or shadow stacks are disabled for the
thread. This lack of flexibility and control isn't ideal, in the vast
majority of cases the shadow stack will be over allocated and the
implicit allocation and deallocation is not consistent with other
interfaces. As far as I can tell the interface is done in this manner
mainly because the shadow stack patches were in development since before
clone3() was implemented.
Since clone3() is readily extensible let's add support for specifying a
shadow stack when creating a new thread or process, keeping the current
implicit allocation behaviour if one is not specified either with
clone3() or through the use of clone(). The user must provide a shadow
stack pointer, this must point to memory mapped for use as a shadow
stackby map_shadow_stack() with an architecture specified shadow stack
token at the top of the stack.
Yuri Khrustalev has raised questions from the libc side regarding
discoverability of extended clone3() structure sizes[2], this seems like
a general issue with clone3(). There was a suggestion to add a hwcap on
arm64 which isn't ideal but is doable there, though architecture
specific mechanisms would also be needed for x86 (and RISC-V if it's
support gets merged before this does). The idea has, however, had
strong pushback from the architecture maintainers and it is possible to
detect support for this in clone3() by attempting a call with a
misaligned shadow stack pointer specified so no hwcap has been added.
[1] https://lore.kernel.org/linux-arm-kernel/20241001-arm64-gcs-v13-0-222b78d87…
[2] https://lore.kernel.org/r/aCs65ccRQtJBnZ_5@arm.com
Signed-off-by: Mark Brown <broonie(a)kernel.org>
---
Changes in v18:
- Rebase onto v6.16-rc3.
- Thanks to pointers from Yuri Khrustalev this version has been tested
on x86 so I have removed the RFT tag.
- Clarify clone3_shadow_stack_valid() comment about the Kconfig check.
- Remove redundant GCSB DSYNCs in arm64 code.
- Fix token validation on x86.
- Link to v17: https://lore.kernel.org/r/20250609-clone3-shadow-stack-v17-0-8840ed97ff6f@k…
Changes in v17:
- Rebase onto v6.16-rc1.
- Link to v16: https://lore.kernel.org/r/20250416-clone3-shadow-stack-v16-0-2ffc9ca3917b@k…
Changes in v16:
- Rebase onto v6.15-rc2.
- Roll in fixes from x86 testing from Rick Edgecombe.
- Rework so that the argument is shadow_stack_token.
- Link to v15: https://lore.kernel.org/r/20250408-clone3-shadow-stack-v15-0-3fa245c6e3be@k…
Changes in v15:
- Rebase onto v6.15-rc1.
- Link to v14: https://lore.kernel.org/r/20250206-clone3-shadow-stack-v14-0-805b53af73b9@k…
Changes in v14:
- Rebase onto v6.14-rc1.
- Link to v13: https://lore.kernel.org/r/20241203-clone3-shadow-stack-v13-0-93b89a81a5ed@k…
Changes in v13:
- Rebase onto v6.13-rc1.
- Link to v12: https://lore.kernel.org/r/20241031-clone3-shadow-stack-v12-0-7183eb8bee17@k…
Changes in v12:
- Add the regular prctl() to the userspace API document since arm64
support is queued in -next.
- Link to v11: https://lore.kernel.org/r/20241005-clone3-shadow-stack-v11-0-2a6a2bd6d651@k…
Changes in v11:
- Rebase onto arm64 for-next/gcs, which is based on v6.12-rc1, and
integrate arm64 support.
- Rework the interface to specify a shadow stack pointer rather than a
base and size like we do for the regular stack.
- Link to v10: https://lore.kernel.org/r/20240821-clone3-shadow-stack-v10-0-06e8797b9445@k…
Changes in v10:
- Integrate fixes & improvements for the x86 implementation from Rick
Edgecombe.
- Require that the shadow stack be VM_WRITE.
- Require that the shadow stack base and size be sizeof(void *) aligned.
- Clean up trailing newline.
- Link to v9: https://lore.kernel.org/r/20240819-clone3-shadow-stack-v9-0-962d74f99464@ke…
Changes in v9:
- Pull token validation earlier and report problems with an error return
to parent rather than signal delivery to the child.
- Verify that the top of the supplied shadow stack is VM_SHADOW_STACK.
- Rework token validation to only do the page mapping once.
- Drop no longer needed support for testing for signals in selftest.
- Fix typo in comments.
- Link to v8: https://lore.kernel.org/r/20240808-clone3-shadow-stack-v8-0-0acf37caf14c@ke…
Changes in v8:
- Fix token verification with user specified shadow stack.
- Don't track user managed shadow stacks for child processes.
- Link to v7: https://lore.kernel.org/r/20240731-clone3-shadow-stack-v7-0-a9532eebfb1d@ke…
Changes in v7:
- Rebase onto v6.11-rc1.
- Typo fixes.
- Link to v6: https://lore.kernel.org/r/20240623-clone3-shadow-stack-v6-0-9ee7783b1fb9@ke…
Changes in v6:
- Rebase onto v6.10-rc3.
- Ensure we don't try to free the parent shadow stack in error paths of
x86 arch code.
- Spelling fixes in userspace API document.
- Additional cleanups and improvements to the clone3() tests to support
the shadow stack tests.
- Link to v5: https://lore.kernel.org/r/20240203-clone3-shadow-stack-v5-0-322c69598e4b@ke…
Changes in v5:
- Rebase onto v6.8-rc2.
- Rework ABI to have the user allocate the shadow stack memory with
map_shadow_stack() and a token.
- Force inlining of the x86 shadow stack enablement.
- Move shadow stack enablement out into a shared header for reuse by
other tests.
- Link to v4: https://lore.kernel.org/r/20231128-clone3-shadow-stack-v4-0-8b28ffe4f676@ke…
Changes in v4:
- Formatting changes.
- Use a define for minimum shadow stack size and move some basic
validation to fork.c.
- Link to v3: https://lore.kernel.org/r/20231120-clone3-shadow-stack-v3-0-a7b8ed3e2acc@ke…
Changes in v3:
- Rebase onto v6.7-rc2.
- Remove stale shadow_stack in internal kargs.
- If a shadow stack is specified unconditionally use it regardless of
CLONE_ parameters.
- Force enable shadow stacks in the selftest.
- Update changelogs for RISC-V feature rename.
- Link to v2: https://lore.kernel.org/r/20231114-clone3-shadow-stack-v2-0-b613f8681155@ke…
Changes in v2:
- Rebase onto v6.7-rc1.
- Remove ability to provide preallocated shadow stack, just specify the
desired size.
- Link to v1: https://lore.kernel.org/r/20231023-clone3-shadow-stack-v1-0-d867d0b5d4d0@ke…
---
Mark Brown (8):
arm64/gcs: Return a success value from gcs_alloc_thread_stack()
Documentation: userspace-api: Add shadow stack API documentation
selftests: Provide helper header for shadow stack testing
fork: Add shadow stack support to clone3()
selftests/clone3: Remove redundant flushes of output streams
selftests/clone3: Factor more of main loop into test_clone3()
selftests/clone3: Allow tests to flag if -E2BIG is a valid error code
selftests/clone3: Test shadow stack support
Documentation/userspace-api/index.rst | 1 +
Documentation/userspace-api/shadow_stack.rst | 44 +++++
arch/arm64/include/asm/gcs.h | 8 +-
arch/arm64/kernel/process.c | 8 +-
arch/arm64/mm/gcs.c | 55 +++++-
arch/x86/include/asm/shstk.h | 11 +-
arch/x86/kernel/process.c | 2 +-
arch/x86/kernel/shstk.c | 53 ++++-
include/asm-generic/cacheflush.h | 11 ++
include/linux/sched/task.h | 17 ++
include/uapi/linux/sched.h | 9 +-
kernel/fork.c | 93 +++++++--
tools/testing/selftests/clone3/clone3.c | 226 ++++++++++++++++++----
tools/testing/selftests/clone3/clone3_selftests.h | 65 ++++++-
tools/testing/selftests/ksft_shstk.h | 98 ++++++++++
15 files changed, 620 insertions(+), 81 deletions(-)
---
base-commit: 86731a2a651e58953fc949573895f2fa6d456841
change-id: 20231019-clone3-shadow-stack-15d40d2bf536
Best regards,
--
Mark Brown <broonie(a)kernel.org>
This series adds ONE_REG interface for SBI FWFT extension implemented
by KVM RISC-V. This was missed out in accepted SBI FWFT patches for
KVM RISC-V.
These patches can also be found in the riscv_kvm_fwft_one_reg_v1 branch
at: https://github.com/avpatel/linux.git
Anup Patel (6):
RISC-V: KVM: Set initial value of hedeleg in kvm_arch_vcpu_create()
RISC-V: KVM: Introduce feature specific reset for SBI FWFT
RISC-V: KVM: Introduce optional ONE_REG callbacks for SBI extensions
RISC-V: KVM: Move copy_sbi_ext_reg_indices() to SBI implementation
RISC-V: KVM: Implement ONE_REG interface for SBI FWFT state
KVM: riscv: selftests: Add SBI FWFT to get-reg-list test
arch/riscv/include/asm/kvm_vcpu_sbi.h | 23 +-
arch/riscv/include/uapi/asm/kvm.h | 14 ++
arch/riscv/kvm/vcpu.c | 3 +-
arch/riscv/kvm/vcpu_onereg.c | 60 +-----
arch/riscv/kvm/vcpu_sbi.c | 172 ++++++++++++---
arch/riscv/kvm/vcpu_sbi_fwft.c | 199 ++++++++++++++++--
arch/riscv/kvm/vcpu_sbi_sta.c | 64 ++++--
.../selftests/kvm/riscv/get-reg-list.c | 28 +++
8 files changed, 436 insertions(+), 127 deletions(-)
--
2.43.0
Add a README file for RISC-V specific kernel selftests under
tools/testing/selftests/riscv/. This mirrors the existing README
for arm64, providing clear guidance on how the tests are architecture
specific and skipped on non-riscv systems. It also includes
standard make commands for building, running and installing the
tests, along with a reference to general kselftest documentation.
Signed-off-by: Bala-Vignesh-Reddy <reddybalavignesh9979(a)gmail.com>
---
tools/testing/selftests/riscv/README | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
create mode 100644 tools/testing/selftests/riscv/README
diff --git a/tools/testing/selftests/riscv/README b/tools/testing/selftests/riscv/README
new file mode 100644
index 000000000000..443da395da68
--- /dev/null
+++ b/tools/testing/selftests/riscv/README
@@ -0,0 +1,24 @@
+KSelfTest RISC-V
+================
+
+- These tests are riscv specific and so not built or run but just skipped
+ completely when env-variable ARCH is found to be different than 'riscv'.
+
+- Holding true the above, RISC-V KSFT tests can be run within the
+ KSelfTest framework using standard Linux top-level-makefile targets:
+
+ $ make TARGETS=riscv kselftest-clean
+ $ make TARGETS=riscv kselftest
+
+ or
+
+ $ make -C tools/testing/selftests TARGETS=riscv \
+ INSTALL_PATH=<your-installation-path> install
+
+ or, alternatively, only specific riscv/ subtargets can be picked:
+
+ $ make -C tools/testing/selftests TARGETS=riscv RISCV_SUBTARGETS="mm vector" \
+ INSTALL_PATH=<your-installation-path> install
+
+ Further details on building and running KSFT can be found in:
+ Documentation/dev-tools/kselftest.rst
--
2.43.0