We have some many cases that will create child process as well, such as pidfd_wait. Previously, we will signal/kill the parent process when it is time out, but this signal will not be sent to its child process. In such case, if child process doesn't terminate itself, ksefltest framework will hang forever.
below ps tree show the situation when ksefltest is blocking: root 1172 0.0 0.0 5996 2500 ? S 07:03 0:00 _ /bin/bash /lkp/lkp/src/tests/kernel-selftests root 1216 0.0 0.0 4392 1976 ? S 07:03 0:00 _ make run_tests -C pidfd root 1218 0.0 0.0 2396 1652 ? S 07:03 0:00 _ /bin/sh -c BASE_DIR="/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests"; . /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/runner.sh; if [ "X" != "X" ]; then per_test_logging=1; fi; run_many /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_fdinfo_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_open_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_poll_test /usr/src/perf_selftests-x86_64-rhel- 8. root 12491 0.0 0.0 2396 132 ? S 07:03 0:00 _ /bin/sh -c BASE_DIR="/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests"; . /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/runner.sh; if [ "X" != "X" ]; then per_test_logging=1; fi; run_many /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_fdinfo_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_open_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_poll_test /usr/src/perf_selftests-x86_64-r he root 12492 0.0 0.0 2396 132 ? S 07:03 0:00 _ /bin/sh -c BASE_DIR="/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests"; . /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/runner.sh; if [ "X" != "X" ]; then per_test_logging=1; fi; run_many /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_fdinfo_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_open_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_poll_test /usr/src/perf_selftests-x86_ 64 root 12493 0.0 0.0 2396 132 ? S 07:03 0:00 _ /bin/sh -c BASE_DIR="/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests"; . /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/runner.sh; if [ "X" != "X" ]; then per_test_logging=1; fi; run_many /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_fdinfo_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_open_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_poll_test /usr/src/perf_selftests- x8 root 12496 0.0 0.0 2396 132 ? S 07:03 0:00 _ /bin/sh -c BASE_DIR="/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests"; . /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/runner.sh; if [ "X" != "X" ]; then per_test_logging=1; fi; run_many /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_fdinfo_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_open_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_poll_test /usr/src/perf_selfte st root 12498 0.0 0.0 10564 6116 ? S 07:03 0:00 _ perl /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/prefix.pl root 12503 0.0 0.0 2452 112 ? T 07:03 0:00 ./pidfd_wait root 12621 0.0 0.0 2372 1600 ? SLs 07:04 0:00 /usr/sbin/watchdog root 19438 0.0 0.0 992 60 ? Ss 07:39 0:00 /lkp/lkp/src/bin/event/wakeup activate-monitor
Here we group all its child processes so that kill() can signal all of them in timeout.
CC: Kees Cook keescook@chromium.org CC: Andy Lutomirski luto@amacapital.net CC: Will Drewry wad@chromium.org CC: Shuah Khan shuah@kernel.org CC: Christian Brauner christian@brauner.io CC: Philip Li philip.li@intel.com Suggested-by: yang xu xuyang2018.jy@cn.fujitsu.com Signed-off-by: Li Zhijian lizhijian@cn.fujitsu.com --- tools/testing/selftests/kselftest_harness.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index ae0f0f33b2a6..c7251396e7ee 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -875,7 +875,8 @@ static void __timeout_handler(int sig, siginfo_t *info, void *ucontext) }
t->timed_out = true; - kill(t->pid, SIGKILL); + // signal process group + kill(-(t->pid), SIGKILL); }
void __wait_for_test(struct __test_metadata *t) @@ -985,6 +986,7 @@ void __run_test(struct __fixture_metadata *f, ksft_print_msg("ERROR SPAWNING TEST CHILD\n"); t->passed = 0; } else if (t->pid == 0) { + setpgrp(); t->fn(t, variant); if (t->skip) _exit(255);
0Day/LKP observed that the kselftest blocks foever since one of the pidfd_wait doesn't terminate in 1 of 30 runs. After digging into the source, we found that it blocks at: ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL), 0);
we can reproduce it by: $ while true; do make run_tests -C pidfd; done
a delay to ensure that the parent can see child process WCONTINUED.
CC: Christian Brauner christian@brauner.io CC: Shuah Khan shuah@kernel.org CC: Philip Li philip.li@intel.com Reported-by: kernel test robot lkp@intel.com Signed-off-by: Li Zhijian lizhijian@cn.fujitsu.com --- tools/testing/selftests/pidfd/pidfd_wait.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c index be2943f072f6..5abd26da4caa 100644 --- a/tools/testing/selftests/pidfd/pidfd_wait.c +++ b/tools/testing/selftests/pidfd/pidfd_wait.c @@ -107,7 +107,9 @@ TEST(wait_states)
if (pid == 0) { kill(getpid(), SIGSTOP); + usleep(1000); kill(getpid(), SIGSTOP); + usleep(1000); exit(EXIT_SUCCESS); }
On Fri, Oct 29, 2021 at 10:45:27AM +0800, Li Zhijian wrote:
We have some many cases that will create child process as well, such as pidfd_wait. Previously, we will signal/kill the parent process when it is time out, but this signal will not be sent to its child process. In such case, if child process doesn't terminate itself, ksefltest framework will hang forever.
below ps tree show the situation when ksefltest is blocking: root 1172 0.0 0.0 5996 2500 ? S 07:03 0:00 _ /bin/bash /lkp/lkp/src/tests/kernel-selftests root 1216 0.0 0.0 4392 1976 ? S 07:03 0:00 _ make run_tests -C pidfd root 1218 0.0 0.0 2396 1652 ? S 07:03 0:00 _ /bin/sh -c BASE_DIR="/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests"; . /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/runner.sh; if [ "X" != "X" ]; then per_test_logging=1; fi; run_many /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_fdinfo_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_open_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_poll_test /usr/src/perf_selftests-x86_64-rhe
l-8.
root 12491 0.0 0.0 2396 132 ? S 07:03 0:00 _ /bin/sh -c BASE_DIR="/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests"; . /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/runner.sh; if [ "X" != "X" ]; then per_test_logging=1; fi; run_many /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_fdinfo_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_open_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_poll_test /usr/src/perf_selftests-x86_64
-rhe
root 12492 0.0 0.0 2396 132 ? S 07:03 0:00 _ /bin/sh -c BASE_DIR="/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests"; . /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/runner.sh; if [ "X" != "X" ]; then per_test_logging=1; fi; run_many /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_fdinfo_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_open_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_poll_test /usr/src/perf_selftests-x8
6_64
root 12493 0.0 0.0 2396 132 ? S 07:03 0:00 _ /bin/sh -c BASE_DIR="/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests"; . /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/runner.sh; if [ "X" != "X" ]; then per_test_logging=1; fi; run_many /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_fdinfo_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_open_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_poll_test /usr/src/perf_selftest
s-x8
root 12496 0.0 0.0 2396 132 ? S 07:03 0:00 _ /bin/sh -c BASE_DIR="/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests"; . /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/runner.sh; if [ "X" != "X" ]; then per_test_logging=1; fi; run_many /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_fdinfo_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_open_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_poll_test /usr/src/perf_self
test
root 12498 0.0 0.0 10564 6116 ? S 07:03 0:00 _ perl /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/prefix.pl root 12503 0.0 0.0 2452 112 ? T 07:03 0:00 ./pidfd_wait root 12621 0.0 0.0 2372 1600 ? SLs 07:04 0:00 /usr/sbin/watchdog root 19438 0.0 0.0 992 60 ? Ss 07:39 0:00 /lkp/lkp/src/bin/event/wakeup activate-monitor
Here we group all its child processes so that kill() can signal all of them in timeout.
CC: Kees Cook keescook@chromium.org CC: Andy Lutomirski luto@amacapital.net CC: Will Drewry wad@chromium.org CC: Shuah Khan shuah@kernel.org CC: Christian Brauner christian@brauner.io CC: Philip Li philip.li@intel.com Suggested-by: yang xu xuyang2018.jy@cn.fujitsu.com Signed-off-by: Li Zhijian lizhijian@cn.fujitsu.com
Seems sensible. Is it guaranteed that t->pid is neither 0 nor 1? If not then maybe sanity check t->pid at least for not being 1 as negating that would mean "signal everything that you have permission to signal". :)
Otherwise, Acked-by: Christian Brauner christian.brauner@ubuntu.com
tools/testing/selftests/kselftest_harness.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index ae0f0f33b2a6..c7251396e7ee 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -875,7 +875,8 @@ static void __timeout_handler(int sig, siginfo_t *info, void *ucontext) } t->timed_out = true;
- kill(t->pid, SIGKILL);
- // signal process group
- kill(-(t->pid), SIGKILL);
} void __wait_for_test(struct __test_metadata *t) @@ -985,6 +986,7 @@ void __run_test(struct __fixture_metadata *f, ksft_print_msg("ERROR SPAWNING TEST CHILD\n"); t->passed = 0; } else if (t->pid == 0) {
t->fn(t, variant); if (t->skip) _exit(255);setpgrp();
-- 2.33.0
On Fri, Oct 29, 2021 at 10:45:28AM +0800, Li Zhijian wrote:
0Day/LKP observed that the kselftest blocks foever since one of the pidfd_wait doesn't terminate in 1 of 30 runs. After digging into the source, we found that it blocks at: ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL), 0);
we can reproduce it by: $ while true; do make run_tests -C pidfd; done
a delay to ensure that the parent can see child process WCONTINUED.
CC: Christian Brauner christian@brauner.io CC: Shuah Khan shuah@kernel.org CC: Philip Li philip.li@intel.com Reported-by: kernel test robot lkp@intel.com Signed-off-by: Li Zhijian lizhijian@cn.fujitsu.com
Not a fan of the usleep() solution but if it fixes it it's fine for a test, I think. Acked-by: Christian Brauner christian.brauner@ubuntu.com
On 29/10/2021 16:31, Christian Brauner wrote:
On Fri, Oct 29, 2021 at 10:45:27AM +0800, Li Zhijian wrote:
We have some many cases that will create child process as well, such as pidfd_wait. Previously, we will signal/kill the parent process when it is time out, but this signal will not be sent to its child process. In such case, if child process doesn't terminate itself, ksefltest framework will hang forever.
below ps tree show the situation when ksefltest is blocking: root 1172 0.0 0.0 5996 2500 ? S 07:03 0:00 _ /bin/bash /lkp/lkp/src/tests/kernel-selftests root 1216 0.0 0.0 4392 1976 ? S 07:03 0:00 _ make run_tests -C pidfd root 1218 0.0 0.0 2396 1652 ? S 07:03 0:00 _ /bin/sh -c BASE_DIR="/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests"; . /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/runner.sh; if [ "X" != "X" ]; then per_test_logging=1; fi; run_many /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_fdinfo_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_open_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_poll_test /usr/src/perf_selftests-x86_64-rhel-8. root 12491 0.0 0.0 2396 132 ? S 07:03 0:00 _ /bin/sh -c BASE_DIR="/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests"; . /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/runner.sh; if [ "X" != "X" ]; then per_test_logging=1; fi; run_many /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_fdinfo_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_open_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_poll_test /usr/src/perf_selftests-x86_64-rhe root 12492 0.0 0.0 2396 132 ? S 07:03 0:00 _ /bin/sh -c BASE_DIR="/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests"; . /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/runner.sh; if [ "X" != "X" ]; then per_test_logging=1; fi; run_many /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_fdinfo_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_open_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_poll_test /usr/src/perf_selftests-x86_64 root 12493 0.0 0.0 2396 132 ? S 07:03 0:00 _ /bin/sh -c BASE_DIR="/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests"; . /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/runner.sh; if [ "X" != "X" ]; then per_test_logging=1; fi; run_many /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_fdinfo_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_open_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_poll_test /usr/src/perf_selftests-x8 root 12496 0.0 0.0 2396 132 ? S 07:03 0:00 _ /bin/sh -c BASE_DIR="/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests"; . /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/runner.sh; if [ "X" != "X" ]; then per_test_logging=1; fi; run_many /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_fdinfo_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_open_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_poll_test /usr/src/perf_selftest root 12498 0.0 0.0 10564 6116 ? S 07:03 0:00 _ perl /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/prefix.pl root 12503 0.0 0.0 2452 112 ? T 07:03 0:00 ./pidfd_wait root 12621 0.0 0.0 2372 1600 ? SLs 07:04 0:00 /usr/sbin/watchdog root 19438 0.0 0.0 992 60 ? Ss 07:39 0:00 /lkp/lkp/src/bin/event/wakeup activate-monitor
Here we group all its child processes so that kill() can signal all of them in timeout.
CC: Kees Cook keescook@chromium.org CC: Andy Lutomirski luto@amacapital.net CC: Will Drewry wad@chromium.org CC: Shuah Khan shuah@kernel.org CC: Christian Brauner christian@brauner.io CC: Philip Li philip.li@intel.com Suggested-by: yang xu xuyang2018.jy@cn.fujitsu.com Signed-off-by: Li Zhijian lizhijian@cn.fujitsu.com
Seems sensible. Is it guaranteed that t->pid is neither 0 nor 1?
that's true, we fire the timer(reach here) only if fork() succeeds.
Thanks Zhijian
If not then maybe sanity check t->pid at least for not being 1 as negating that would mean "signal everything that you have permission to signal". :)
Otherwise, Acked-by: Christian Brauner christian.brauner@ubuntu.com
tools/testing/selftests/kselftest_harness.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index ae0f0f33b2a6..c7251396e7ee 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -875,7 +875,8 @@ static void __timeout_handler(int sig, siginfo_t *info, void *ucontext) } t->timed_out = true;
- kill(t->pid, SIGKILL);
- // signal process group
- kill(-(t->pid), SIGKILL); }
void __wait_for_test(struct __test_metadata *t) @@ -985,6 +986,7 @@ void __run_test(struct __fixture_metadata *f, ksft_print_msg("ERROR SPAWNING TEST CHILD\n"); t->passed = 0; } else if (t->pid == 0) {
t->fn(t, variant); if (t->skip) _exit(255);setpgrp();
-- 2.33.0
kindly ping
On 29/10/2021 16:31, Christian Brauner wrote:
On Fri, Oct 29, 2021 at 10:45:27AM +0800, Li Zhijian wrote:
We have some many cases that will create child process as well, such as pidfd_wait. Previously, we will signal/kill the parent process when it is time out, but this signal will not be sent to its child process. In such case, if child process doesn't terminate itself, ksefltest framework will hang forever.
below ps tree show the situation when ksefltest is blocking: root 1172 0.0 0.0 5996 2500 ? S 07:03 0:00 _ /bin/bash /lkp/lkp/src/tests/kernel-selftests root 1216 0.0 0.0 4392 1976 ? S 07:03 0:00 _ make run_tests -C pidfd root 1218 0.0 0.0 2396 1652 ? S 07:03 0:00 _ /bin/sh -c BASE_DIR="/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests"; . /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/runner.sh; if [ "X" != "X" ]; then per_test_logging=1; fi; run_many /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_fdinfo_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_open_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_poll_test /usr/src/perf_selftests-x86_64-rhel-8. root 12491 0.0 0.0 2396 132 ? S 07:03 0:00 _ /bin/sh -c BASE_DIR="/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests"; . /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/runner.sh; if [ "X" != "X" ]; then per_test_logging=1; fi; run_many /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_fdinfo_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_open_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_poll_test /usr/src/perf_selftests-x86_64-rhe root 12492 0.0 0.0 2396 132 ? S 07:03 0:00 _ /bin/sh -c BASE_DIR="/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests"; . /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/runner.sh; if [ "X" != "X" ]; then per_test_logging=1; fi; run_many /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_fdinfo_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_open_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_poll_test /usr/src/perf_selftests-x86_64 root 12493 0.0 0.0 2396 132 ? S 07:03 0:00 _ /bin/sh -c BASE_DIR="/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests"; . /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/runner.sh; if [ "X" != "X" ]; then per_test_logging=1; fi; run_many /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_fdinfo_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_open_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_poll_test /usr/src/perf_selftests-x8 root 12496 0.0 0.0 2396 132 ? S 07:03 0:00 _ /bin/sh -c BASE_DIR="/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests"; . /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/runner.sh; if [ "X" != "X" ]; then per_test_logging=1; fi; run_many /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_fdinfo_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_open_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_poll_test /usr/src/perf_selftest root 12498 0.0 0.0 10564 6116 ? S 07:03 0:00 _ perl /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/prefix.pl root 12503 0.0 0.0 2452 112 ? T 07:03 0:00 ./pidfd_wait root 12621 0.0 0.0 2372 1600 ? SLs 07:04 0:00 /usr/sbin/watchdog root 19438 0.0 0.0 992 60 ? Ss 07:39 0:00 /lkp/lkp/src/bin/event/wakeup activate-monitor
Here we group all its child processes so that kill() can signal all of them in timeout.
CC: Kees Cook keescook@chromium.org CC: Andy Lutomirski luto@amacapital.net CC: Will Drewry wad@chromium.org CC: Shuah Khan shuah@kernel.org CC: Christian Brauner christian@brauner.io CC: Philip Li philip.li@intel.com Suggested-by: yang xu xuyang2018.jy@cn.fujitsu.com Signed-off-by: Li Zhijian lizhijian@cn.fujitsu.com
Seems sensible. Is it guaranteed that t->pid is neither 0 nor 1? If not then maybe sanity check t->pid at least for not being 1 as negating that would mean "signal everything that you have permission to signal". :)
Otherwise, Acked-by: Christian Brauner christian.brauner@ubuntu.com
tools/testing/selftests/kselftest_harness.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index ae0f0f33b2a6..c7251396e7ee 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -875,7 +875,8 @@ static void __timeout_handler(int sig, siginfo_t *info, void *ucontext) } t->timed_out = true;
- kill(t->pid, SIGKILL);
- // signal process group
- kill(-(t->pid), SIGKILL); }
void __wait_for_test(struct __test_metadata *t) @@ -985,6 +986,7 @@ void __run_test(struct __fixture_metadata *f, ksft_print_msg("ERROR SPAWNING TEST CHILD\n"); t->passed = 0; } else if (t->pid == 0) {
t->fn(t, variant); if (t->skip) _exit(255);setpgrp();
-- 2.33.0
On 10/29/21 2:32 AM, Christian Brauner wrote:
On Fri, Oct 29, 2021 at 10:45:28AM +0800, Li Zhijian wrote:
0Day/LKP observed that the kselftest blocks foever since one of the pidfd_wait doesn't terminate in 1 of 30 runs. After digging into the source, we found that it blocks at: ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL), 0);
we can reproduce it by: $ while true; do make run_tests -C pidfd; done
a delay to ensure that the parent can see child process WCONTINUED.
CC: Christian Brauner christian@brauner.io CC: Shuah Khan shuah@kernel.org CC: Philip Li philip.li@intel.com Reported-by: kernel test robot lkp@intel.com Signed-off-by: Li Zhijian lizhijian@cn.fujitsu.com
Not a fan of the usleep() solution but if it fixes it it's fine for a test, I think. Acked-by: Christian Brauner christian.brauner@ubuntu.com
I don't like introducing usleep() which will increase the kselftest run-time. Every little bit adds up if we allow usleep() in tests.
thanks, -- Shuah
On 12/2/21 8:05 PM, lizhijian@fujitsu.com wrote:
kindly ping
On 29/10/2021 16:31, Christian Brauner wrote:
On Fri, Oct 29, 2021 at 10:45:27AM +0800, Li Zhijian wrote:
We have some many cases that will create child process as well, such as pidfd_wait. Previously, we will signal/kill the parent process when it is time out, but this signal will not be sent to its child process. In such case, if child process doesn't terminate itself, ksefltest framework will hang forever.
below ps tree show the situation when ksefltest is blocking: root 1172 0.0 0.0 5996 2500 ? S 07:03 0:00 _ /bin/bash /lkp/lkp/src/tests/kernel-selftests root 1216 0.0 0.0 4392 1976 ? S 07:03 0:00 _ make run_tests -C pidfd root 1218 0.0 0.0 2396 1652 ? S 07:03 0:00 _ /bin/sh -c BASE_DIR="/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests"; . /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/runner.sh; if [ "X" != "X" ]; then per_test_logging=1; fi; run_many /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_fdinfo_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_open_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_poll_test /usr/src/perf_selftests-x86_64-rhel-8. root 12491 0.0 0.0 2396 132 ? S 07:03 0:00 _ /bin/sh -c BASE_DIR="/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests"; . /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/runner.sh; if [ "X" != "X" ]; then per_test_logging=1; fi; run_many /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_fdinfo_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_open_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_poll_test /usr/src/perf_selftests-x86_64-rhe root 12492 0.0 0.0 2396 132 ? S 07:03 0:00 _ /bin/sh -c BASE_DIR="/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests"; . /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/runner.sh; if [ "X" != "X" ]; then per_test_logging=1; fi; run_many /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_fdinfo_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_open_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_poll_test /usr/src/perf_selftests-x86_64 root 12493 0.0 0.0 2396 132 ? S 07:03 0:00 _ /bin/sh -c BASE_DIR="/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests"; . /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/runner.sh; if [ "X" != "X" ]; then per_test_logging=1; fi; run_many /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_fdinfo_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_open_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_poll_test /usr/src/perf_selftests-x8 root 12496 0.0 0.0 2396 132 ? S 07:03 0:00 _ /bin/sh -c BASE_DIR="/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests"; . /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/runner.sh; if [ "X" != "X" ]; then per_test_logging=1; fi; run_many /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_fdinfo_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_open_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_poll_test /usr/src/perf_selftest root 12498 0.0 0.0 10564 6116 ? S 07:03 0:00 _ perl /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/prefix.pl root 12503 0.0 0.0 2452 112 ? T 07:03 0:00 ./pidfd_wait root 12621 0.0 0.0 2372 1600 ? SLs 07:04 0:00 /usr/sbin/watchdog root 19438 0.0 0.0 992 60 ? Ss 07:39 0:00 /lkp/lkp/src/bin/event/wakeup activate-monitor
Here we group all its child processes so that kill() can signal all of them in timeout.
CC: Kees Cook keescook@chromium.org CC: Andy Lutomirski luto@amacapital.net CC: Will Drewry wad@chromium.org CC: Shuah Khan shuah@kernel.org CC: Christian Brauner christian@brauner.io CC: Philip Li philip.li@intel.com Suggested-by: yang xu xuyang2018.jy@cn.fujitsu.com Signed-off-by: Li Zhijian lizhijian@cn.fujitsu.com
Seems sensible. Is it guaranteed that t->pid is neither 0 nor 1? If not then maybe sanity check t->pid at least for not being 1 as negating that would mean "signal everything that you have permission to signal". :)
Otherwise, Acked-by: Christian Brauner christian.brauner@ubuntu.com
tools/testing/selftests/kselftest_harness.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index ae0f0f33b2a6..c7251396e7ee 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -875,7 +875,8 @@ static void __timeout_handler(int sig, siginfo_t *info, void *ucontext) } t->timed_out = true;
- kill(t->pid, SIGKILL);
- // signal process group
- kill(-(t->pid), SIGKILL); } void __wait_for_test(struct __test_metadata *t)
@@ -985,6 +986,7 @@ void __run_test(struct __fixture_metadata *f, ksft_print_msg("ERROR SPAWNING TEST CHILD\n"); t->passed = 0; } else if (t->pid == 0) {
setpgrp(); t->fn(t, variant); if (t->skip) _exit(255);
-- 2.33.0
Kees,
Will you be able to take a look at this fix? This is in the kselftest_harness
thanks, -- Shuah
On 04/12/2021 01:03, Shuah Khan wrote:
On 10/29/21 2:32 AM, Christian Brauner wrote:
On Fri, Oct 29, 2021 at 10:45:28AM +0800, Li Zhijian wrote:
0Day/LKP observed that the kselftest blocks foever since one of the pidfd_wait doesn't terminate in 1 of 30 runs. After digging into the source, we found that it blocks at: ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL), 0);
we can reproduce it by: $ while true; do make run_tests -C pidfd; done
a delay to ensure that the parent can see child process WCONTINUED.
CC: Christian Brauner christian@brauner.io CC: Shuah Khan shuah@kernel.org CC: Philip Li philip.li@intel.com Reported-by: kernel test robot lkp@intel.com Signed-off-by: Li Zhijian lizhijian@cn.fujitsu.com
Not a fan of the usleep() solution but if it fixes it it's fine for a test, I think. Acked-by: Christian Brauner christian.brauner@ubuntu.com
I don't like introducing usleep() which will increase the kselftest run-time. Every little bit adds up if we allow usleep() in tests.
Thanks for your comments.
how about introduce a pipe to communicate between child and parent.
From d68c4629dd60a1e22cb83b771d38e899352ff9a9 Mon Sep 17 00:00:00 2001 From: Li Zhijian lizhijian@cn.fujitsu.com Date: Tue, 26 Oct 2021 16:39:56 +0800 Subject: [PATCH] ksefltest: pidfd: Fix wait_states: Test terminated by timeout
0Day/LKP observed that the kselftest blocks foever since one of the pidfd_wait doesn't terminate in 1 of 30 runs. After digging into the source, we found that it blocks at: ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL), 0);
we can reproduce it by: $ while true; do make run_tests -C pidfd; done
Introduce a blocking read in child process to make sure the parent can check its WCONTINUED.
CC: Philip Li philip.li@intel.com Reported-by: kernel test robot lkp@intel.com Signed-off-by: Li Zhijian lizhijian@cn.fujitsu.com --- tools/testing/selftests/pidfd/pidfd_wait.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c index be2943f072f6..d5c0ffa26c32 100644 --- a/tools/testing/selftests/pidfd/pidfd_wait.c +++ b/tools/testing/selftests/pidfd/pidfd_wait.c @@ -96,21 +96,26 @@ TEST(wait_states) .flags = CLONE_PIDFD | CLONE_PARENT_SETTID, .exit_signal = SIGCHLD, }; - int ret; + int ret, pfd[2]; pid_t pid; siginfo_t info = { .si_signo = 0, }; - + ASSERT_EQ(pipe(pfd), 0); pid = sys_clone3(&args); ASSERT_GE(pid, 0);
if (pid == 0) { + char buf[2]; + close(pfd[1]); kill(getpid(), SIGSTOP); + ASSERT_EQ(read(pfd[0], buf, 1), 1); + close(pfd[0]); kill(getpid(), SIGSTOP); exit(EXIT_SUCCESS); }
+ close(pfd[0]); ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0); ASSERT_EQ(info.si_signo, SIGCHLD); ASSERT_EQ(info.si_code, CLD_STOPPED); @@ -119,6 +124,8 @@ TEST(wait_states) ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0);
ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL), 0); + ASSERT_EQ(write(pfd[1], "C", 1), 1); + close(pfd[1]); ASSERT_EQ(info.si_signo, SIGCHLD); ASSERT_EQ(info.si_code, CLD_CONTINUED); ASSERT_EQ(info.si_pid, parent_tid);
I just post V2 which split these 2 patches standalone for reviewing easily. And rewrite 2/2 with pipe()
PTAL
Thanks Zhijian
On 04/12/2021 01:03, Shuah Khan wrote:
On 12/2/21 8:05 PM, lizhijian@fujitsu.com wrote:
kindly ping
On 29/10/2021 16:31, Christian Brauner wrote:
On Fri, Oct 29, 2021 at 10:45:27AM +0800, Li Zhijian wrote:
We have some many cases that will create child process as well, such as pidfd_wait. Previously, we will signal/kill the parent process when it is time out, but this signal will not be sent to its child process. In such case, if child process doesn't terminate itself, ksefltest framework will hang forever.
below ps tree show the situation when ksefltest is blocking: root 1172 0.0 0.0 5996 2500 ? S 07:03 0:00 _ /bin/bash /lkp/lkp/src/tests/kernel-selftests root 1216 0.0 0.0 4392 1976 ? S 07:03 0:00 _ make run_tests -C pidfd root 1218 0.0 0.0 2396 1652 ? S 07:03 0:00 _ /bin/sh -c BASE_DIR="/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests"; . /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/runner.sh; if [ "X" != "X" ]; then per_test_logging=1; fi; run_many /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_fdinfo_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_open_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_poll_test /usr/src/perf_selftests-x86_64-rhel-8. root 12491 0.0 0.0 2396 132 ? S 07:03 0:00 _ /bin/sh -c BASE_DIR="/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests"; . /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/runner.sh; if [ "X" != "X" ]; then per_test_logging=1; fi; run_many /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_fdinfo_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_open_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_poll_test /usr/src/perf_selftests-x86_64-rhe root 12492 0.0 0.0 2396 132 ? S 07:03 0:00 _ /bin/sh -c BASE_DIR="/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests"; . /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/runner.sh; if [ "X" != "X" ]; then per_test_logging=1; fi; run_many /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_fdinfo_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_open_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_poll_test /usr/src/perf_selftests-x86_64 root 12493 0.0 0.0 2396 132 ? S 07:03 0:00 _ /bin/sh -c BASE_DIR="/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests"; . /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/runner.sh; if [ "X" != "X" ]; then per_test_logging=1; fi; run_many /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_fdinfo_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_open_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_poll_test /usr/src/perf_selftests-x8 root 12496 0.0 0.0 2396 132 ? S 07:03 0:00 _ /bin/sh -c BASE_DIR="/usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests"; . /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/runner.sh; if [ "X" != "X" ]; then per_test_logging=1; fi; run_many /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_fdinfo_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_open_test /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/pidfd/pidfd_poll_test /usr/src/perf_selftest root 12498 0.0 0.0 10564 6116 ? S 07:03 0:00 _ perl /usr/src/perf_selftests-x86_64-rhel-8.3-kselftests-519d81956ee277b4419c723adfb154603c2565ba/tools/testing/selftests/kselftest/prefix.pl root 12503 0.0 0.0 2452 112 ? T 07:03 0:00 ./pidfd_wait root 12621 0.0 0.0 2372 1600 ? SLs 07:04 0:00 /usr/sbin/watchdog root 19438 0.0 0.0 992 60 ? Ss 07:39 0:00 /lkp/lkp/src/bin/event/wakeup activate-monitor
Here we group all its child processes so that kill() can signal all of them in timeout.
CC: Kees Cook keescook@chromium.org CC: Andy Lutomirski luto@amacapital.net CC: Will Drewry wad@chromium.org CC: Shuah Khan shuah@kernel.org CC: Christian Brauner christian@brauner.io CC: Philip Li philip.li@intel.com Suggested-by: yang xu xuyang2018.jy@cn.fujitsu.com Signed-off-by: Li Zhijian lizhijian@cn.fujitsu.com
Seems sensible. Is it guaranteed that t->pid is neither 0 nor 1? If not then maybe sanity check t->pid at least for not being 1 as negating that would mean "signal everything that you have permission to signal". :)
Otherwise, Acked-by: Christian Brauner christian.brauner@ubuntu.com
tools/testing/selftests/kselftest_harness.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index ae0f0f33b2a6..c7251396e7ee 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -875,7 +875,8 @@ static void __timeout_handler(int sig, siginfo_t *info, void *ucontext) } t->timed_out = true; - kill(t->pid, SIGKILL); + // signal process group + kill(-(t->pid), SIGKILL); } void __wait_for_test(struct __test_metadata *t) @@ -985,6 +986,7 @@ void __run_test(struct __fixture_metadata *f, ksft_print_msg("ERROR SPAWNING TEST CHILD\n"); t->passed = 0; } else if (t->pid == 0) { + setpgrp(); t->fn(t, variant); if (t->skip) _exit(255); -- 2.33.0
Kees,
Will you be able to take a look at this fix? This is in the kselftest_harness
thanks, -- Shuah
linux-kselftest-mirror@lists.linaro.org