We recently notice that the step_after_suspend_test would
fail on our plenty devices. The test believesit failed to
enter suspend state with
$ sudo ./step_after_suspend_test
TAP version 13
Bail out! Failed to enter Suspend state
However, in the kernel message, I indeed see the system get
suspended and then wake up later.
[611172.033108] PM: suspend entry (s2idle)
[611172.044940] Filesystems sync: 0.006 seconds
[611172.052254] Freezing user space processes
[611172.059319] Freezing user space processes completed (elapsed 0.001 seconds)
[611172.067920] OOM killer disabled.
[611172.072465] Freezing remaining freezable tasks
[611172.080332] Freezing remaining freezable tasks completed (elapsed 0.001 seconds)
[611172.089724] printk: Suspending console(s) (use no_console_suspend to debug)
[611172.117126] serial 00:03: disabled
--- some other hardware get reconnected ---
[611203.136277] OOM killer enabled.
[611203.140637] Restarting tasks ...
[611203.141135] usb 1-8.1: USB disconnect, device number 7
[611203.141755] done.
[611203.155268] random: crng reseeded on system resumption
[611203.162059] PM: suspend exit
After investigation, I notice that for the code block
if (write(power_state_fd, "mem", strlen("mem")) != strlen("mem"))
ksft_exit_fail_msg("Failed to enter Suspend state\n");
The write will return -1 and errno is set to 16 (device busy).
It should be caused by the write function is not successfully returned
before the system suspend and the return value get messed when waking up.
As a result, It may be better to check the time passed of those few instructions
to determine whether the suspend is executed correctly for it is pretty hard to
execute those few lines for 4 seconds, or even more if it is not long enough.
Fixes: bfd092b8c2728 ("selftests: breakpoint: add step_after_suspend_test")
Reported-by: Sinadin Shan <sinadin.shan(a)oracle.com>
Signed-off-by: Yifei Liu <yifei.l.liu(a)oracle.com>
---
.../selftests/breakpoints/step_after_suspend_test.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/breakpoints/step_after_suspend_test.c b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
index dfec31fb9b30d..d615f091e5bae 100644
--- a/tools/testing/selftests/breakpoints/step_after_suspend_test.c
+++ b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
@@ -18,6 +18,7 @@
#include <sys/timerfd.h>
#include <sys/types.h>
#include <sys/wait.h>
+#include <time.h>
#include "../kselftest.h"
@@ -133,6 +134,7 @@ void suspend(void)
int timerfd;
int err;
struct itimerspec spec = {};
+ clock_t t;
if (getuid() != 0)
ksft_exit_skip("Please run the test as root - Exiting.\n");
@@ -152,8 +154,11 @@ void suspend(void)
if (err < 0)
ksft_exit_fail_msg("timerfd_settime() failed\n");
- if (write(power_state_fd, "mem", strlen("mem")) != strlen("mem"))
- ksft_exit_fail_msg("Failed to enter Suspend state\n");
+ t = clock();
+ write(power_state_fd, "mem", strlen("mem"));
+ t = clock()-t;
+ if ((int)(t) < 4)
+ ksft_exit_fail_msg("Failed to enter Suspend state %d\n",errno);
close(timerfd);
close(power_state_fd);
--
2.45.2
grep -rnIF "#define __NR_userfaultfd"
tools/include/uapi/asm-generic/unistd.h:681:#define __NR_userfaultfd 282
arch/x86/include/generated/uapi/asm/unistd_32.h:374:#define
__NR_userfaultfd 374
arch/x86/include/generated/uapi/asm/unistd_64.h:327:#define
__NR_userfaultfd 323
arch/x86/include/generated/uapi/asm/unistd_x32.h:282:#define
__NR_userfaultfd (__X32_SYSCALL_BIT + 323)
arch/arm/include/generated/uapi/asm/unistd-eabi.h:347:#define
__NR_userfaultfd (__NR_SYSCALL_BASE + 388)
arch/arm/include/generated/uapi/asm/unistd-oabi.h:359:#define
__NR_userfaultfd (__NR_SYSCALL_BASE + 388)
include/uapi/asm-generic/unistd.h:681:#define __NR_userfaultfd 282
The number is dependent on the architecture. The above data shows that:
x86 374
x86_64 323
The value of __NR_userfaultfd was changed to 282 when
asm-generic/unistd.h was included. It makes the test to fail every time
as the correct number of this syscall on x86_64 is 323. Fix the header
to asm/unistd.h.
Fixes: a5c6bc590094 ("selftests/mm: remove local __NR_* definitions")
Signed-off-by: Muhammad Usama Anjum <usama.anjum(a)collabora.com>
---
tools/testing/selftests/mm/pagemap_ioctl.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c
index fc90af2a97b80..bcc73b4e805c6 100644
--- a/tools/testing/selftests/mm/pagemap_ioctl.c
+++ b/tools/testing/selftests/mm/pagemap_ioctl.c
@@ -15,7 +15,7 @@
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <math.h>
-#include <asm-generic/unistd.h>
+#include <asm/unistd.h>
#include <pthread.h>
#include <sys/resource.h>
#include <assert.h>
--
2.39.2
With `long` mapped to `isize`, `size_t`/`__kernel_size_t` mapped to
usize and `char` mapped to `u8`, many of the existing casts are no
longer necessary.
Signed-off-by: Gary Guo <gary(a)garyguo.net>
---
rust/kernel/kunit.rs | 10 ++--------
rust/kernel/print.rs | 4 ++--
rust/kernel/str.rs | 6 +++---
rust/kernel/uaccess.rs | 27 +++++++--------------------
4 files changed, 14 insertions(+), 33 deletions(-)
diff --git a/rust/kernel/kunit.rs b/rust/kernel/kunit.rs
index 0ba77276ae7ef..766aeb1c6aea8 100644
--- a/rust/kernel/kunit.rs
+++ b/rust/kernel/kunit.rs
@@ -17,10 +17,7 @@ pub fn err(args: fmt::Arguments<'_>) {
// are passing.
#[cfg(CONFIG_PRINTK)]
unsafe {
- bindings::_printk(
- b"\x013%pA\0".as_ptr() as _,
- &args as *const _ as *const c_void,
- );
+ bindings::_printk(b"\x013%pA\0".as_ptr(), &args as *const _ as *const c_void);
}
}
@@ -33,10 +30,7 @@ pub fn info(args: fmt::Arguments<'_>) {
// are passing.
#[cfg(CONFIG_PRINTK)]
unsafe {
- bindings::_printk(
- b"\x016%pA\0".as_ptr() as _,
- &args as *const _ as *const c_void,
- );
+ bindings::_printk(b"\x016%pA\0".as_ptr(), &args as *const _ as *const c_void);
}
}
diff --git a/rust/kernel/print.rs b/rust/kernel/print.rs
index 508b0221256c9..90ae4f2568910 100644
--- a/rust/kernel/print.rs
+++ b/rust/kernel/print.rs
@@ -104,7 +104,7 @@ pub unsafe fn call_printk(
#[cfg(CONFIG_PRINTK)]
unsafe {
bindings::_printk(
- format_string.as_ptr() as _,
+ format_string.as_ptr(),
module_name.as_ptr(),
&args as *const _ as *const c_void,
);
@@ -125,7 +125,7 @@ pub fn call_printk_cont(args: fmt::Arguments<'_>) {
#[cfg(CONFIG_PRINTK)]
unsafe {
bindings::_printk(
- format_strings::CONT.as_ptr() as _,
+ format_strings::CONT.as_ptr(),
&args as *const _ as *const c_void,
);
}
diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs
index 3980d37bd4b29..2d30bca079e37 100644
--- a/rust/kernel/str.rs
+++ b/rust/kernel/str.rs
@@ -190,7 +190,7 @@ pub unsafe fn from_char_ptr<'a>(ptr: *const crate::ffi::c_char) -> &'a Self {
// to a `NUL`-terminated C string.
let len = unsafe { bindings::strlen(ptr) } + 1;
// SAFETY: Lifetime guaranteed by the safety precondition.
- let bytes = unsafe { core::slice::from_raw_parts(ptr as _, len as _) };
+ let bytes = unsafe { core::slice::from_raw_parts(ptr as _, len) };
// SAFETY: As `len` is returned by `strlen`, `bytes` does not contain interior `NUL`.
// As we have added 1 to `len`, the last byte is known to be `NUL`.
unsafe { Self::from_bytes_with_nul_unchecked(bytes) }
@@ -249,7 +249,7 @@ pub unsafe fn from_bytes_with_nul_unchecked_mut(bytes: &mut [u8]) -> &mut CStr {
/// Returns a C pointer to the string.
#[inline]
pub const fn as_char_ptr(&self) -> *const crate::ffi::c_char {
- self.0.as_ptr() as _
+ self.0.as_ptr()
}
/// Convert the string to a byte slice without the trailing `NUL` byte.
@@ -817,7 +817,7 @@ pub fn try_from_fmt(args: fmt::Arguments<'_>) -> Result<Self, Error> {
// SAFETY: The buffer is valid for read because `f.bytes_written()` is bounded by `size`
// (which the minimum buffer size) and is non-zero (we wrote at least the `NUL` terminator)
// so `f.bytes_written() - 1` doesn't underflow.
- let ptr = unsafe { bindings::memchr(buf.as_ptr().cast(), 0, (f.bytes_written() - 1) as _) };
+ let ptr = unsafe { bindings::memchr(buf.as_ptr().cast(), 0, f.bytes_written() - 1) };
if !ptr.is_null() {
return Err(EINVAL);
}
diff --git a/rust/kernel/uaccess.rs b/rust/kernel/uaccess.rs
index c746a1f1bb5ad..eb72fbcf152a1 100644
--- a/rust/kernel/uaccess.rs
+++ b/rust/kernel/uaccess.rs
@@ -8,7 +8,7 @@
alloc::Flags,
bindings,
error::Result,
- ffi::{c_ulong, c_void},
+ ffi::c_void,
prelude::*,
types::{AsBytes, FromBytes},
};
@@ -227,13 +227,9 @@ pub fn read_raw(&mut self, out: &mut [MaybeUninit<u8>]) -> Result {
if len > self.length {
return Err(EFAULT);
}
- let Ok(len_ulong) = c_ulong::try_from(len) else {
- return Err(EFAULT);
- };
- // SAFETY: `out_ptr` points into a mutable slice of length `len_ulong`, so we may write
+ // SAFETY: `out_ptr` points into a mutable slice of length `len`, so we may write
// that many bytes to it.
- let res =
- unsafe { bindings::copy_from_user(out_ptr, self.ptr as *const c_void, len_ulong) };
+ let res = unsafe { bindings::copy_from_user(out_ptr, self.ptr as *const c_void, len) };
if res != 0 {
return Err(EFAULT);
}
@@ -262,9 +258,6 @@ pub fn read<T: FromBytes>(&mut self) -> Result<T> {
if len > self.length {
return Err(EFAULT);
}
- let Ok(len_ulong) = c_ulong::try_from(len) else {
- return Err(EFAULT);
- };
let mut out: MaybeUninit<T> = MaybeUninit::uninit();
// SAFETY: The local variable `out` is valid for writing `size_of::<T>()` bytes.
//
@@ -275,7 +268,7 @@ pub fn read<T: FromBytes>(&mut self) -> Result<T> {
bindings::_copy_from_user(
out.as_mut_ptr().cast::<c_void>(),
self.ptr as *const c_void,
- len_ulong,
+ len,
)
};
if res != 0 {
@@ -338,12 +331,9 @@ pub fn write_slice(&mut self, data: &[u8]) -> Result {
if len > self.length {
return Err(EFAULT);
}
- let Ok(len_ulong) = c_ulong::try_from(len) else {
- return Err(EFAULT);
- };
- // SAFETY: `data_ptr` points into an immutable slice of length `len_ulong`, so we may read
+ // SAFETY: `data_ptr` points into an immutable slice of length `len`, so we may read
// that many bytes from it.
- let res = unsafe { bindings::copy_to_user(self.ptr as *mut c_void, data_ptr, len_ulong) };
+ let res = unsafe { bindings::copy_to_user(self.ptr as *mut c_void, data_ptr, len) };
if res != 0 {
return Err(EFAULT);
}
@@ -362,9 +352,6 @@ pub fn write<T: AsBytes>(&mut self, value: &T) -> Result {
if len > self.length {
return Err(EFAULT);
}
- let Ok(len_ulong) = c_ulong::try_from(len) else {
- return Err(EFAULT);
- };
// SAFETY: The reference points to a value of type `T`, so it is valid for reading
// `size_of::<T>()` bytes.
//
@@ -375,7 +362,7 @@ pub fn write<T: AsBytes>(&mut self, value: &T) -> Result {
bindings::_copy_to_user(
self.ptr as *mut c_void,
(value as *const T).cast::<c_void>(),
- len_ulong,
+ len,
)
};
if res != 0 {
--
2.44.1
This patch allows progs to elide a null check on statically known map
lookup keys. In other words, if the verifier can statically prove that
the lookup will be in-bounds, allow the prog to drop the null check.
This is useful for two reasons:
1. Large numbers of nullness checks (especially when they cannot fail)
unnecessarily pushes prog towards BPF_COMPLEXITY_LIMIT_JMP_SEQ.
2. It forms a tighter contract between programmer and verifier.
For (1), bpftrace is starting to make heavier use of percpu scratch
maps. As a result, for user scripts with large number of unrolled loops,
we are starting to hit jump complexity verification errors. These
percpu lookups cannot fail anyways, as we only use static key values.
Eliding nullness probably results in less work for verifier as well.
For (2), percpu scratch maps are often used as a larger stack, as the
currrent stack is limited to 512 bytes. In these situations, it is
desirable for the programmer to express: "this lookup should never fail,
and if it does, it means I messed up the code". By omitting the null
check, the programmer can "ask" the verifier to double check the logic.
Changes from v1:
* Added a check for when R2 is not a ptr to stack
* Added a check for when stack is uninitialized (no stack slot yet)
* Fix spinlock reg id bumping
* Updated existing tests to account for null elision
* Added test case for when R2 can be both const and non-const
Daniel Xu (2):
bpf: verifier: Support eliding map lookup nullness
bpf: selftests: verifier: Add nullness elision tests
kernel/bpf/verifier.c | 64 ++++++-
tools/testing/selftests/bpf/progs/iters.c | 14 +-
.../selftests/bpf/progs/map_kptr_fail.c | 2 +-
.../bpf/progs/verifier_array_access.c | 166 ++++++++++++++++++
.../selftests/bpf/progs/verifier_map_in_map.c | 2 +-
.../testing/selftests/bpf/verifier/map_kptr.c | 2 +-
6 files changed, 239 insertions(+), 11 deletions(-)
--
2.46.0
Recently we committed a fix to allow processes to receive notifications for
non-zero exits via the process connector module. Commit is a4c9a56e6a2c.
However, for threads, when it does a pthread_exit(&exit_status) call, the
kernel is not aware of the exit status with which pthread_exit is called.
It is sent by child thread to the parent process, if it is waiting in
pthread_join(). Hence, for a thread exiting abnormally, kernel cannot
send notifications to any listening processes.
The exception to this is if the thread is sent a signal which it has not
handled, and dies along with it's process as a result; for eg. SIGSEGV or
SIGKILL. In this case, kernel is aware of the non-zero exit and sends a
notification for it.
For our use case, we cannot have parent wait in pthread_join, one of the
main reasons for this being that we do not want to track normal
pthread_exit(), which could be a very large number. We only want to be
notified of any abnormal exits. Hence, threads are created with
pthread_attr_t set to PTHREAD_CREATE_DETACHED.
To fix this problem, we add a new type PROC_CN_MCAST_NOTIFY to proc connector
API, which allows a thread to send it's exit status to kernel either when
it needs to call pthread_exit() with non-zero value to indicate some
error or from signal handler before pthread_exit().
Anjali Kulkarni (2):
connector/cn_proc: Handle threads for proc connector
connector/cn_proc: Selftest for threads case
drivers/connector/cn_proc.c | 11 ++-
include/linux/cn_proc.h | 5 +-
include/uapi/linux/cn_proc.h | 4 +-
kernel/exit.c | 5 +-
tools/testing/selftests/connector/Makefile | 23 ++++-
.../testing/selftests/connector/proc_filter.c | 5 +
tools/testing/selftests/connector/thread.c | 87 +++++++++++++++++
.../selftests/connector/thread_filter.c | 93 +++++++++++++++++++
8 files changed, 226 insertions(+), 7 deletions(-)
create mode 100644 tools/testing/selftests/connector/thread.c
create mode 100644 tools/testing/selftests/connector/thread_filter.c
--
2.45.2
Hi, all,
I was testing Linux torvalds tree vanilla kernel, and I've noticed for a number of releases this
./nci_dev stops testing until it's terminated (15).
Now, I tried to examine what went wrong, I hoped it will go away by itself. it didn't, so I am posting
a bug report.
The ./nci_dev seems to be stuck in several processes waiting on each other. I was able to produce
stacktraces. I am unable to tell if it is testsuite bug or a problem in underlying syscalls.
user@host:~/linux/kernel/linux_torvalds$ sudo gdb --pid 14132
GNU gdb (Ubuntu 15.0.50.20240403-0ubuntu1) 15.0.50.20240403-git
Copyright (C) 2024 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.
Type "show copying" and "show warranty" for details.
This GDB was configured as "x86_64-linux-gnu".
Type "show configuration" for configuration details.
For bug reporting instructions, please see:
<https://www.gnu.org/software/gdb/bugs/>.
Find the GDB manual and other documentation resources online at:
<http://www.gnu.org/software/gdb/documentation/>.
For help, type "help".
Type "apropos word" to search for commands related to "word".
Attaching to process 14132
Reading symbols from /home/marvin/linux/kernel/linux_torvalds/tools/testing/selftests/nci/nci_dev...
Reading symbols from /lib/x86_64-linux-gnu/libc.so.6...
Reading symbols from /usr/lib/debug/.build-id/6d/64b17fbac799e68da7ebd9985ddf9b5cb375e6.debug...
Reading symbols from /lib64/ld-linux-x86-64.so.2...
Reading symbols from /usr/lib/debug/.build-id/35/3e1b6cb0eebc08cf3ff812eae8a51b4efd684e.debug...
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
0x00007be7cf3107a7 in __GI___wait4 (pid=pid@entry=14133, stat_loc=stat_loc@entry=0x7ffef60482dc, options=options@entry=0, usage=usage@entry=0x0) at ../sysdeps/unix/sysv/linux/wait4.c:30
warning: 30 ../sysdeps/unix/sysv/linux/wait4.c: No such file or directory
(gdb) where
#0 0x00007be7cf3107a7 in __GI___wait4 (pid=pid@entry=14133, stat_loc=stat_loc@entry=0x7ffef60482dc, options=options@entry=0, usage=usage@entry=0x0) at ../sysdeps/unix/sysv/linux/wait4.c:30
#1 0x00007be7cf3108eb in __GI___waitpid (pid=pid@entry=14133, stat_loc=stat_loc@entry=0x7ffef60482dc, options=options@entry=0) at ./posix/waitpid.c:38
#2 0x00005d550d59299b in wrapper_NCI_start_poll (_metadata=0x7be7cf486000, variant=0x5d550d597020 <_NCI_NCI2_0_object>) at nci_dev.c:625
#3 0x00005d550d591a94 in __run_test (f=f@entry=0x5d550d5970a0 <_NCI_fixture_object>, variant=variant@entry=0x5d550d597020 <_NCI_NCI2_0_object>, t=t@entry=0x7be7cf486000) at ../kselftest_harness.h:1249
#4 0x00005d550d58fd47 in test_harness_run (argv=0x7ffef60488f8, argc=1) at ../kselftest_harness.h:1319
#5 main (argc=1, argv=0x7ffef60488f8) at nci_dev.c:904
(gdb)
user@host:~$ sudo gdb --pid 14133
GNU gdb (Ubuntu 15.0.50.20240403-0ubuntu1) 15.0.50.20240403-git
Copyright (C) 2024 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.
Type "show copying" and "show warranty" for details.
This GDB was configured as "x86_64-linux-gnu".
Type "show configuration" for configuration details.
For bug reporting instructions, please see:
<https://www.gnu.org/software/gdb/bugs/>.
Find the GDB manual and other documentation resources online at:
<http://www.gnu.org/software/gdb/documentation/>.
For help, type "help".
Type "apropos word" to search for commands related to "word".
Attaching to process 14133
[New LWP 14137]
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
0x00007be7cf298d61 in __futex_abstimed_wait_common64 (private=128, cancel=true, abstime=0x0, op=265, expected=14137, futex_word=0x7be7cf000990) at ./nptl/futex-internal.c:57
warning: 57 ./nptl/futex-internal.c: No such file or directory
(gdb) where
#0 0x00007be7cf298d61 in __futex_abstimed_wait_common64 (private=128, cancel=true, abstime=0x0, op=265, expected=14137, futex_word=0x7be7cf000990) at ./nptl/futex-internal.c:57
#1 __futex_abstimed_wait_common (cancel=true, private=128, abstime=0x0, clockid=0, expected=14137, futex_word=0x7be7cf000990) at ./nptl/futex-internal.c:87
#2 __GI___futex_abstimed_wait_cancelable64 (futex_word=futex_word@entry=0x7be7cf000990, expected=14137, clockid=clockid@entry=0, abstime=abstime@entry=0x0, private=private@entry=128)
at ./nptl/futex-internal.c:139
#3 0x00007be7cf29e793 in __pthread_clockjoin_ex (threadid=136235540547264, thread_return=thread_return@entry=0x7ffef6047dd0, clockid=clockid@entry=0, abstime=abstime@entry=0x0,
block=block@entry=true) at ./nptl/pthread_join_common.c:102
#4 0x00007be7cf29e633 in ___pthread_join (threadid=<optimized out>, thread_return=thread_return@entry=0x7ffef6047dd0) at ./nptl/pthread_join.c:24
#5 0x00005d550d591e48 in NCI_setup (_metadata=_metadata@entry=0x7be7cf486000, self=self@entry=0x7ffef60482e0, variant=<optimized out>) at nci_dev.c:447
#6 0x00005d550d5929f3 in wrapper_NCI_start_poll (_metadata=0x7be7cf486000, variant=0x5d550d597020 <_NCI_NCI2_0_object>) at nci_dev.c:625
#7 0x00005d550d591a94 in __run_test (f=f@entry=0x5d550d5970a0 <_NCI_fixture_object>, variant=variant@entry=0x5d550d597020 <_NCI_NCI2_0_object>, t=t@entry=0x7be7cf486000)
at ../kselftest_harness.h:1249
#8 0x00005d550d58fd47 in test_harness_run (argv=0x7ffef60488f8, argc=1) at ../kselftest_harness.h:1319
#9 main (argc=1, argv=0x7ffef60488f8) at nci_dev.c:904
(gdb)
I hope this can help you see what went wrong. The testing suite gets stuck on each run.
Best regards,
Mirsad Todorovac
Running this test on a small system produces different failures every
test checking deletions, and some flushes. From different test runs:
TEST: Common host entries configuration tests (L2) [FAIL]
Failed to delete L2 host entry
TEST: Common port group entries configuration tests (IPv4 (S, G)) [FAIL]
IPv4 (S, G) entry with VLAN 10 not deleted when VLAN was not specified
TEST: Common port group entries configuration tests (IPv6 (*, G)) [FAIL]
IPv6 (*, G) entry with VLAN 10 not deleted when VLAN was not specified
TEST: Flush tests [FAIL]
Entry not flushed by specified VLAN ID
TEST: Flush tests [FAIL]
IPv6 host entry not flushed by "nopermanent" state
Add a short sleep after deletion and flush to resolve this.
Create a delay variable just for this test to allow short sleep, the
lib.sh WAIT_TIME of 5 seconds makes the test far longer than necessary.
Tested on several weak systems with 0.1s delay:
- Ivy Bridge Celeron netbook (2014 x86_64)
- Raspberry Pi 3B (2016 aarch64)
- Small KVM VM on Intel 10th gen (2020 x86_64)
All these systems ran 25 test runs in a row with 100% pass OK.
Fixes: b6d00da08610 ("selftests: forwarding: Add bridge MDB test")
Signed-off-by: Jamie Bainbridge <jamie.bainbridge(a)gmail.com>
---
v2: Avoid false check failures as seen by Jakub Kicinski.
---
.../selftests/net/forwarding/bridge_mdb.sh | 28 +++++++++++++++++++
1 file changed, 28 insertions(+)
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
index d9d587454d207931a539f59be15cbc63d471888f..49136279973d05d0e6b14237228ab58455554bb0 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -30,6 +30,9 @@ ALL_TESTS="
ctrl_test
"
+# time to wait for delete and flush to complete
+: "${SETTLE_DELAY:=0.1}"
+
NUM_NETIFS=4
source lib.sh
source tc_common.sh
@@ -152,6 +155,7 @@ cfg_test_host_common()
check_fail $? "Managed to replace $name host entry"
bridge mdb del dev br0 port br0 grp $grp $state vid 10
+ sleep "$SETTLE_DELAY"
bridge mdb get dev br0 grp $grp vid 10 &> /dev/null
check_fail $? "Failed to delete $name host entry"
@@ -208,6 +212,7 @@ cfg_test_port_common()
check_err $? "Failed to replace $name entry"
bridge mdb del dev br0 port $swp1 $grp_key permanent vid 10
+ sleep "$SETTLE_DELAY"
bridge mdb get dev br0 $grp_key vid 10 &> /dev/null
check_fail $? "Failed to delete $name entry"
@@ -230,6 +235,7 @@ cfg_test_port_common()
check_err $? "$name entry with VLAN 20 not added when VLAN was not specified"
bridge mdb del dev br0 port $swp1 $grp_key permanent
+ sleep "$SETTLE_DELAY"
bridge mdb get dev br0 $grp_key vid 10 &> /dev/null
check_fail $? "$name entry with VLAN 10 not deleted when VLAN was not specified"
bridge mdb get dev br0 $grp_key vid 20 &> /dev/null
@@ -310,6 +316,7 @@ __cfg_test_port_ip_star_g()
bridge -d mdb get dev br0 grp $grp src $src1 vid 10 &> /dev/null
check_err $? "(S, G) entry not created"
bridge mdb del dev br0 port $swp1 grp $grp vid 10
+ sleep "$SETTLE_DELAY"
bridge -d mdb get dev br0 grp $grp vid 10 &> /dev/null
check_fail $? "(*, G) entry not deleted"
bridge -d mdb get dev br0 grp $grp src $src1 vid 10 &> /dev/null
@@ -828,6 +835,7 @@ cfg_test_flush()
bridge mdb add dev br0 port $swp1 grp 239.1.1.8 vid 10 temp
bridge mdb flush dev br0
+ sleep "$SETTLE_DELAY"
num_entries=$(bridge mdb show dev br0 | wc -l)
[[ $num_entries -eq 0 ]]
check_err $? 0 "Not all entries flushed after flush all"
@@ -840,6 +848,7 @@ cfg_test_flush()
bridge mdb add dev br0 port br0 grp 239.1.1.1 vid 10
bridge mdb flush dev br0 port $swp1
+ sleep "$SETTLE_DELAY"
bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1"
check_fail $? "Entry not flushed by specified port"
@@ -849,11 +858,13 @@ cfg_test_flush()
check_err $? "Host entry flushed by wrong port"
bridge mdb flush dev br0 port br0
+ sleep "$SETTLE_DELAY"
bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port br0"
check_fail $? "Host entry not flushed by specified port"
bridge mdb flush dev br0
+ sleep "$SETTLE_DELAY"
# Check that when flushing by VLAN ID only entries programmed with the
# specified VLAN ID are flushed and the rest are not.
@@ -864,6 +875,7 @@ cfg_test_flush()
bridge mdb add dev br0 port $swp2 grp 239.1.1.1 vid 20
bridge mdb flush dev br0 vid 10
+ sleep "$SETTLE_DELAY"
bridge mdb get dev br0 grp 239.1.1.1 vid 10 &> /dev/null
check_fail $? "Entry not flushed by specified VLAN ID"
@@ -871,6 +883,7 @@ cfg_test_flush()
check_err $? "Entry flushed by wrong VLAN ID"
bridge mdb flush dev br0
+ sleep "$SETTLE_DELAY"
# Check that all permanent entries are flushed when "permanent" is
# specified and that temporary entries are not.
@@ -879,6 +892,7 @@ cfg_test_flush()
bridge mdb add dev br0 port $swp2 grp 239.1.1.1 temp vid 10
bridge mdb flush dev br0 permanent
+ sleep "$SETTLE_DELAY"
bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1"
check_fail $? "Entry not flushed by \"permanent\" state"
@@ -886,6 +900,7 @@ cfg_test_flush()
check_err $? "Entry flushed by wrong state (\"permanent\")"
bridge mdb flush dev br0
+ sleep "$SETTLE_DELAY"
# Check that all temporary entries are flushed when "nopermanent" is
# specified and that permanent entries are not.
@@ -894,6 +909,7 @@ cfg_test_flush()
bridge mdb add dev br0 port $swp2 grp 239.1.1.1 temp vid 10
bridge mdb flush dev br0 nopermanent
+ sleep "$SETTLE_DELAY"
bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1"
check_err $? "Entry flushed by wrong state (\"nopermanent\")"
@@ -901,6 +917,7 @@ cfg_test_flush()
check_fail $? "Entry not flushed by \"nopermanent\" state"
bridge mdb flush dev br0
+ sleep "$SETTLE_DELAY"
# Check that L2 host entries are not flushed when "nopermanent" is
# specified, but flushed when "permanent" is specified.
@@ -908,16 +925,19 @@ cfg_test_flush()
bridge mdb add dev br0 port br0 grp 01:02:03:04:05:06 permanent vid 10
bridge mdb flush dev br0 nopermanent
+ sleep "$SETTLE_DELAY"
bridge mdb get dev br0 grp 01:02:03:04:05:06 vid 10 &> /dev/null
check_err $? "L2 host entry flushed by wrong state (\"nopermanent\")"
bridge mdb flush dev br0 permanent
+ sleep "$SETTLE_DELAY"
bridge mdb get dev br0 grp 01:02:03:04:05:06 vid 10 &> /dev/null
check_fail $? "L2 host entry not flushed by \"permanent\" state"
bridge mdb flush dev br0
+ sleep "$SETTLE_DELAY"
# Check that IPv4 host entries are not flushed when "permanent" is
# specified, but flushed when "nopermanent" is specified.
@@ -925,16 +945,19 @@ cfg_test_flush()
bridge mdb add dev br0 port br0 grp 239.1.1.1 temp vid 10
bridge mdb flush dev br0 permanent
+ sleep "$SETTLE_DELAY"
bridge mdb get dev br0 grp 239.1.1.1 vid 10 &> /dev/null
check_err $? "IPv4 host entry flushed by wrong state (\"permanent\")"
bridge mdb flush dev br0 nopermanent
+ sleep "$SETTLE_DELAY"
bridge mdb get dev br0 grp 239.1.1.1 vid 10 &> /dev/null
check_fail $? "IPv4 host entry not flushed by \"nopermanent\" state"
bridge mdb flush dev br0
+ sleep "$SETTLE_DELAY"
# Check that IPv6 host entries are not flushed when "permanent" is
# specified, but flushed when "nopermanent" is specified.
@@ -942,16 +965,19 @@ cfg_test_flush()
bridge mdb add dev br0 port br0 grp ff0e::1 temp vid 10
bridge mdb flush dev br0 permanent
+ sleep "$SETTLE_DELAY"
bridge mdb get dev br0 grp ff0e::1 vid 10 &> /dev/null
check_err $? "IPv6 host entry flushed by wrong state (\"permanent\")"
bridge mdb flush dev br0 nopermanent
+ sleep "$SETTLE_DELAY"
bridge mdb get dev br0 grp ff0e::1 vid 10 &> /dev/null
check_fail $? "IPv6 host entry not flushed by \"nopermanent\" state"
bridge mdb flush dev br0
+ sleep "$SETTLE_DELAY"
# Check that when flushing by routing protocol only entries programmed
# with the specified routing protocol are flushed and the rest are not.
@@ -961,6 +987,7 @@ cfg_test_flush()
bridge mdb add dev br0 port br0 grp 239.1.1.1 vid 10
bridge mdb flush dev br0 proto bgp
+ sleep "$SETTLE_DELAY"
bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1"
check_fail $? "Entry not flushed by specified routing protocol"
@@ -970,6 +997,7 @@ cfg_test_flush()
check_err $? "Host entry flushed by wrong routing protocol"
bridge mdb flush dev br0
+ sleep "$SETTLE_DELAY"
# Test that an error is returned when trying to flush using unsupported
# parameters.
--
2.39.2