Hi,
Passing a non-blocking pidfd to waitid() currently has no effect, i.e. is not supported. There are users which would like to use waitid() on pidfds that are O_NONBLOCK and mix it with pidfds that are blocking and both pass them to waitid(). The expected behavior is to have waitid() return -EAGAIN for non-blocking pidfds and to block for blocking pidfds without needing to perform any additional checks for flags set on the pidfd before passing it to waitid(). Non-blocking pidfds will return EAGAIN from waitid() when no child process is ready yet. Returning -EAGAIN for non-blocking pidfds makes it easier for event loops that handle EAGAIN specially.
It also makes the API more consistent and uniform. In essence, waitid() is treated like a read on a non-blocking pidfd or a recvmsg() on a non-blocking socket. With the addition of support for non-blocking pidfds we support the same functionality that sockets do. For sockets() recvmsg() supports MSG_DONTWAIT for pidfds waitid() supports WNOHANG. Both flags are per-call options. In contrast non-blocking pidfds and non-blocking sockets are a setting on an open file description affecting all threads in the calling process as well as other processes that hold file descriptors referring to the same open file description. Both behaviors, per call and per open file description, have genuine use-cases.
A concrete use-case that was brought on-list (see [1]) was Josh's async pidfd library. Ever since the introduction of pidfds and more advanced async io various programming languages such as Rust have grown support for async event libraries. These libraries are created to help build epoll-based event loops around file descriptors. A common pattern is to automatically make all file descriptors they manage to O_NONBLOCK.
For such libraries the EAGAIN error code is treated specially. When a function is called that returns EAGAIN the function isn't called again until the event loop indicates the the file descriptor is ready. Supporting EAGAIN when waiting on pidfds makes such libraries just work with little effort.
Thanks! Christian
[1]: https://lore.kernel.org/lkml/20200811181236.GA18763@localhost/
Christian Brauner (4): pidfd: support PIDFD_NONBLOCK in pidfd_open() exit: support non-blocking pidfds tests: port pidfd_wait to kselftest harness tests: add waitid() tests for non-blocking pidfds
include/uapi/linux/pidfd.h | 12 + kernel/exit.c | 19 +- kernel/pid.c | 12 +- tools/testing/selftests/pidfd/pidfd.h | 4 + tools/testing/selftests/pidfd/pidfd_wait.c | 298 +++++++++------------ 5 files changed, 161 insertions(+), 184 deletions(-) create mode 100644 include/uapi/linux/pidfd.h
base-commit: d012a7190fc1fd72ed48911e77ca97ba4521bccd
Introduce PIDFD_NONBLOCK to support non-blocking pidfd file descriptors.
Ever since the introduction of pidfds and more advanced async io various programming languages such as Rust have grown support for async event libraries. These libraries are created to help build epoll-based event loops around file descriptors. A common pattern is to automatically make all file descriptors they manage to O_NONBLOCK.
For such libraries the EAGAIN error code is treated specially. When a function is called that returns EAGAIN the function isn't called again until the event loop indicates the the file descriptor is ready. Supporting EAGAIN when waiting on pidfds makes such libraries just work with little effort. In the following patch we will extend waitid() internally to support non-blocking pidfds.
Link: https://lore.kernel.org/lkml/20200811181236.GA18763@localhost/ Link: https://github.com/joshtriplett/async-pidfd Cc: Kees Cook keescook@chromium.org Cc: Sargun Dhillon sargun@sargun.me Cc: Oleg Nesterov oleg@redhat.com Suggested-by: Josh Triplett josh@joshtriplett.org Signed-off-by: Christian Brauner christian.brauner@ubuntu.com --- include/uapi/linux/pidfd.h | 12 ++++++++++++ kernel/pid.c | 12 +++++++----- 2 files changed, 19 insertions(+), 5 deletions(-) create mode 100644 include/uapi/linux/pidfd.h
diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h new file mode 100644 index 000000000000..5406fbc13074 --- /dev/null +++ b/include/uapi/linux/pidfd.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#ifndef _UAPI_LINUX_PIDFD_H +#define _UAPI_LINUX_PIDFD_H + +#include <linux/types.h> +#include <linux/fcntl.h> + +/* Flags for pidfd_open(). */ +#define PIDFD_NONBLOCK O_NONBLOCK + +#endif /* _UAPI_LINUX_PIDFD_H */ diff --git a/kernel/pid.c b/kernel/pid.c index b2562a7ce525..74ddbff1a6ba 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -43,6 +43,7 @@ #include <linux/sched/task.h> #include <linux/idr.h> #include <net/sock.h> +#include <uapi/linux/pidfd.h>
struct pid init_struct_pid = { .count = REFCOUNT_INIT(1), @@ -522,7 +523,8 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns) /** * pidfd_create() - Create a new pid file descriptor. * - * @pid: struct pid that the pidfd will reference + * @pid: struct pid that the pidfd will reference + * @flags: flags to pass * * This creates a new pid file descriptor with the O_CLOEXEC flag set. * @@ -532,12 +534,12 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns) * Return: On success, a cloexec pidfd is returned. * On error, a negative errno number will be returned. */ -static int pidfd_create(struct pid *pid) +static int pidfd_create(struct pid *pid, unsigned int flags) { int fd;
fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid), - O_RDWR | O_CLOEXEC); + flags | O_RDWR | O_CLOEXEC); if (fd < 0) put_pid(pid);
@@ -565,7 +567,7 @@ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags) int fd; struct pid *p;
- if (flags) + if (flags & ~PIDFD_NONBLOCK) return -EINVAL;
if (pid <= 0) @@ -576,7 +578,7 @@ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags) return -ESRCH;
if (pid_has_task(p, PIDTYPE_TGID)) - fd = pidfd_create(p); + fd = pidfd_create(p, flags); else fd = -EINVAL;
On 08/31, Christian Brauner wrote:
--- /dev/null +++ b/include/uapi/linux/pidfd.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_PIDFD_H +#define _UAPI_LINUX_PIDFD_H
+#include <linux/types.h> +#include <linux/fcntl.h>
+/* Flags for pidfd_open(). */ +#define PIDFD_NONBLOCK O_NONBLOCK
+#endif /* _UAPI_LINUX_PIDFD_H */
Why? Can't we simply use O_NONBLOCK ?
Oleg.
On Tue, Sep 01, 2020 at 06:23:10PM +0200, Oleg Nesterov wrote:
On 08/31, Christian Brauner wrote:
--- /dev/null +++ b/include/uapi/linux/pidfd.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_PIDFD_H +#define _UAPI_LINUX_PIDFD_H
+#include <linux/types.h> +#include <linux/fcntl.h>
+/* Flags for pidfd_open(). */ +#define PIDFD_NONBLOCK O_NONBLOCK
+#endif /* _UAPI_LINUX_PIDFD_H */
Why? Can't we simply use O_NONBLOCK ?
It's the same thing we seem to do for any other (anon inode) fds:
include/linux/eventfd.h:#define EFD_NONBLOCK O_NONBLOCK include/uapi/linux/inotify.h:#define IN_NONBLOCK O_NONBLOCK include/uapi/linux/signalfd.h:#define SFD_NONBLOCK O_NONBLOCK include/uapi/linux/timerfd.h:#define TFD_NONBLOCK O_NONBLOCK
also for O_CLOEXEC:
include/linux/eventfd.h:#define EFD_CLOEXEC O_CLOEXEC include/linux/userfaultfd_k.h:#define UFFD_CLOEXEC O_CLOEXEC include/uapi/linux/eventpoll.h:#define EPOLL_CLOEXEC O_CLOEXEC include/uapi/linux/mount.h:#define OPEN_TREE_CLOEXEC O_CLOEXEC include/uapi/linux/perf_event.h:#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ include/uapi/linux/signalfd.h:#define SFD_CLOEXEC O_CLOEXEC include/uapi/linux/timerfd.h:#define TFD_CLOEXEC O_CLOEXEC
So I think we should just do the same. A clean flag namespace seems nicer to me too tbh.
Christian
On 09/01, Christian Brauner wrote:
On Tue, Sep 01, 2020 at 06:23:10PM +0200, Oleg Nesterov wrote:
On 08/31, Christian Brauner wrote:
--- /dev/null +++ b/include/uapi/linux/pidfd.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_PIDFD_H +#define _UAPI_LINUX_PIDFD_H
+#include <linux/types.h> +#include <linux/fcntl.h>
+/* Flags for pidfd_open(). */ +#define PIDFD_NONBLOCK O_NONBLOCK
+#endif /* _UAPI_LINUX_PIDFD_H */
Why? Can't we simply use O_NONBLOCK ?
It's the same thing we seem to do for any other (anon inode) fds:
include/linux/eventfd.h:#define EFD_NONBLOCK O_NONBLOCK include/uapi/linux/inotify.h:#define IN_NONBLOCK O_NONBLOCK include/uapi/linux/signalfd.h:#define SFD_NONBLOCK O_NONBLOCK include/uapi/linux/timerfd.h:#define TFD_NONBLOCK O_NONBLOCK
also for O_CLOEXEC:
include/linux/eventfd.h:#define EFD_CLOEXEC O_CLOEXEC include/linux/userfaultfd_k.h:#define UFFD_CLOEXEC O_CLOEXEC include/uapi/linux/eventpoll.h:#define EPOLL_CLOEXEC O_CLOEXEC include/uapi/linux/mount.h:#define OPEN_TREE_CLOEXEC O_CLOEXEC include/uapi/linux/perf_event.h:#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ include/uapi/linux/signalfd.h:#define SFD_CLOEXEC O_CLOEXEC include/uapi/linux/timerfd.h:#define TFD_CLOEXEC O_CLOEXEC
So I think we should just do the same.
Hmm, OK, then I have to agree.
A clean flag namespace seems nicer to me too tbh.
Disagree but this doesn't matter ;)
Oleg.
Passing a non-blocking pidfd to waitid() currently has no effect, i.e. is not supported. There are users which would like to use waitid() on pidfds that are O_NONBLOCK and mix it with pidfds that are blocking and both pass them to waitid(). The expected behavior is to have waitid() return -EAGAIN for non-blocking pidfds and to block for blocking pidfds without needing to perform any additional checks for flags set on the pidfd before passing it to waitid(). Non-blocking pidfds will return EAGAIN from waitid() when no child process is ready yet. Returning -EAGAIN for non-blocking pidfds makes it easier for event loops that handle EAGAIN specially.
It also makes the API more consistent and uniform. In essence, waitid() is treated like a read on a non-blocking pidfd or a recvmsg() on a non-blocking socket. With the addition of support for non-blocking pidfds we support the same functionality that sockets do. For sockets() recvmsg() supports MSG_DONTWAIT for pidfds waitid() supports WNOHANG. Both flags are per-call options. In contrast non-blocking pidfds and non-blocking sockets are a setting on an open file description affecting all threads in the calling process as well as other processes that hold file descriptors referring to the same open file description. Both behaviors, per call and per open file description, have genuine use-cases.
The implementation should be straightforward, we simply raise the WNOHANG flag when a non-blocking pidfd is passed and introduce a eagain_error member in struct wait_opts similar to the notask_error member. The former is set to -EAGAIN for non-blocking pidfds and to zero for all other cases. If no child process exists non-blocking pidfd users will continue to see ECHILD but if child processes exist but have not yet exited users will see EAGAIN.
A concrete use-case that was brought on-list was Josh's async pidfd library. Ever since the introduction of pidfds and more advanced async io various programming languages such as Rust have grown support for async event libraries. These libraries are created to help build epoll-based event loops around file descriptors. A common pattern is to automatically make all file descriptors they manage to O_NONBLOCK.
For such libraries the EAGAIN error code is treated specially. When a function is called that returns EAGAIN the function isn't called again until the event loop indicates the the file descriptor is ready. Supporting EAGAIN when waiting on pidfds makes such libraries just work with little effort.
Link: https://lore.kernel.org/lkml/20200811181236.GA18763@localhost/ Link: https://github.com/joshtriplett/async-pidfd Cc: Kees Cook keescook@chromium.org Cc: Sargun Dhillon sargun@sargun.me Cc: Jann Horn jannh@google.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Ingo Molnar mingo@kernel.org Cc: Oleg Nesterov oleg@redhat.com Cc: "Peter Zijlstra (Intel)" peterz@infradead.org Suggested-by: Josh Triplett josh@joshtriplett.org Signed-off-by: Christian Brauner christian.brauner@ubuntu.com --- kernel/exit.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-)
diff --git a/kernel/exit.c b/kernel/exit.c index 733e80f334e7..598f2fefd721 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -934,6 +934,7 @@ struct wait_opts {
wait_queue_entry_t child_wait; int notask_error; + int eagain_error; };
static int eligible_pid(struct wait_opts *wo, struct task_struct *p) @@ -1461,6 +1462,8 @@ static long do_wait(struct wait_opts *wo)
notask: retval = wo->notask_error; + if (!retval) + retval = wo->eagain_error; if (!retval && !(wo->wo_flags & WNOHANG)) { retval = -ERESTARTSYS; if (!signal_pending(current)) { @@ -1474,7 +1477,7 @@ static long do_wait(struct wait_opts *wo) return retval; }
-static struct pid *pidfd_get_pid(unsigned int fd) +static struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags) { struct fd f; struct pid *pid; @@ -1484,8 +1487,10 @@ static struct pid *pidfd_get_pid(unsigned int fd) return ERR_PTR(-EBADF);
pid = pidfd_pid(f.file); - if (!IS_ERR(pid)) + if (!IS_ERR(pid)) { get_pid(pid); + *flags = f.file->f_flags; + }
fdput(f); return pid; @@ -1498,6 +1503,7 @@ static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop, struct pid *pid = NULL; enum pid_type type; long ret; + unsigned int f_flags = 0;
if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED| __WNOTHREAD|__WCLONE|__WALL)) @@ -1531,9 +1537,10 @@ static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop, if (upid < 0) return -EINVAL;
- pid = pidfd_get_pid(upid); + pid = pidfd_get_pid(upid, &f_flags); if (IS_ERR(pid)) return PTR_ERR(pid); + break; default: return -EINVAL; @@ -1544,6 +1551,11 @@ static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop, wo.wo_flags = options; wo.wo_info = infop; wo.wo_rusage = ru; + wo.eagain_error = 0; + if (f_flags & O_NONBLOCK) { + wo.wo_flags |= WNOHANG; + wo.eagain_error = -EAGAIN; + } ret = do_wait(&wo);
put_pid(pid); @@ -1618,6 +1630,7 @@ long kernel_wait4(pid_t upid, int __user *stat_addr, int options, wo.wo_info = NULL; wo.wo_stat = 0; wo.wo_rusage = ru; + wo.eagain_error = 0; ret = do_wait(&wo); put_pid(pid); if (ret > 0 && stat_addr && put_user(wo.wo_stat, stat_addr))
On 08/31, Christian Brauner wrote:
--- a/kernel/exit.c +++ b/kernel/exit.c @@ -934,6 +934,7 @@ struct wait_opts { wait_queue_entry_t child_wait; int notask_error;
- int eagain_error;
}; static int eligible_pid(struct wait_opts *wo, struct task_struct *p) @@ -1461,6 +1462,8 @@ static long do_wait(struct wait_opts *wo) notask: retval = wo->notask_error;
- if (!retval)
if (!retval && !(wo->wo_flags & WNOHANG)) { retval = -ERESTARTSYS;retval = wo->eagain_error;
I must have missed something but I don't understand why do we need the new ->eagain_error and the change in do_wait().
@@ -1544,6 +1551,11 @@ static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop, wo.wo_flags = options; wo.wo_info = infop; wo.wo_rusage = ru;
- wo.eagain_error = 0;
- if (f_flags & O_NONBLOCK) {
wo.wo_flags |= WNOHANG;
wo.eagain_error = -EAGAIN;
- } ret = do_wait(&wo);
Can't kernel_waitid() simply do
if (f_flags & O_NONBLOCK) wo.wo_flags |= WNOHANG; ret = do_wait(); if (!ret & (f_flags & O_NONBLOCK)) ret = -EAGAIN;
?
Oleg.
On Tue, Sep 01, 2020 at 06:11:54PM +0200, Oleg Nesterov wrote:
On 08/31, Christian Brauner wrote:
--- a/kernel/exit.c +++ b/kernel/exit.c @@ -934,6 +934,7 @@ struct wait_opts { wait_queue_entry_t child_wait; int notask_error;
- int eagain_error;
}; static int eligible_pid(struct wait_opts *wo, struct task_struct *p) @@ -1461,6 +1462,8 @@ static long do_wait(struct wait_opts *wo) notask: retval = wo->notask_error;
- if (!retval)
if (!retval && !(wo->wo_flags & WNOHANG)) { retval = -ERESTARTSYS;retval = wo->eagain_error;
I must have missed something but I don't understand why do we need the new ->eagain_error and the change in do_wait().
@@ -1544,6 +1551,11 @@ static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop, wo.wo_flags = options; wo.wo_info = infop; wo.wo_rusage = ru;
- wo.eagain_error = 0;
- if (f_flags & O_NONBLOCK) {
wo.wo_flags |= WNOHANG;
wo.eagain_error = -EAGAIN;
- } ret = do_wait(&wo);
Can't kernel_waitid() simply do
if (f_flags & O_NONBLOCK) wo.wo_flags |= WNOHANG; ret = do_wait(); if (!ret & (f_flags & O_NONBLOCK)) ret = -EAGAIN;
?
Heh, indeed, that's even a smaller patch. Will change to that!
Thanks for the review, Oleg! Christia
All of the new pidfd selftests already use the new kselftest harness infrastructure. It makes for clearer output, makes the code easier to understand, and makes adding new tests way simpler.
Cc: Shuah Khan shuah@kernel.org Cc: linux-kselftest@vger.kernel.org Signed-off-by: Christian Brauner christian.brauner@ubuntu.com --- tools/testing/selftests/pidfd/pidfd_wait.c | 213 ++++----------------- 1 file changed, 39 insertions(+), 174 deletions(-)
diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c index 7079f8eef792..075c716f6fb8 100644 --- a/tools/testing/selftests/pidfd/pidfd_wait.c +++ b/tools/testing/selftests/pidfd/pidfd_wait.c @@ -17,7 +17,7 @@ #include <unistd.h>
#include "pidfd.h" -#include "../kselftest.h" +#include "../kselftest_harness.h"
#define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr)))
@@ -32,9 +32,8 @@ static int sys_waitid(int which, pid_t pid, siginfo_t *info, int options, return syscall(__NR_waitid, which, pid, info, options, ru); }
-static int test_pidfd_wait_simple(void) +TEST(wait_simple) { - const char *test_name = "pidfd wait simple"; int pidfd = -1, status = 0; pid_t parent_tid = -1; struct clone_args args = { @@ -50,76 +49,40 @@ static int test_pidfd_wait_simple(void) };
pidfd = open("/proc/self", O_DIRECTORY | O_RDONLY | O_CLOEXEC); - if (pidfd < 0) - ksft_exit_fail_msg("%s test: failed to open /proc/self %s\n", - test_name, strerror(errno)); + ASSERT_GE(pidfd, 0);
pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); - if (pid == 0) - ksft_exit_fail_msg( - "%s test: succeeded to wait on invalid pidfd %s\n", - test_name, strerror(errno)); - close(pidfd); + ASSERT_NE(pid, 0); + EXPECT_EQ(close(pidfd), 0); pidfd = -1;
pidfd = open("/dev/null", O_RDONLY | O_CLOEXEC); - if (pidfd == 0) - ksft_exit_fail_msg("%s test: failed to open /dev/null %s\n", - test_name, strerror(errno)); + ASSERT_GE(pidfd, 0);
pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); - if (pid == 0) - ksft_exit_fail_msg( - "%s test: succeeded to wait on invalid pidfd %s\n", - test_name, strerror(errno)); - close(pidfd); + ASSERT_NE(pid, 0); + EXPECT_EQ(close(pidfd), 0); pidfd = -1;
pid = sys_clone3(&args); - if (pid < 0) - ksft_exit_fail_msg("%s test: failed to create new process %s\n", - test_name, strerror(errno)); + ASSERT_GE(pid, 1);
if (pid == 0) exit(EXIT_SUCCESS);
pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); - if (pid < 0) - ksft_exit_fail_msg( - "%s test: failed to wait on process with pid %d and pidfd %d: %s\n", - test_name, parent_tid, pidfd, strerror(errno)); - - if (!WIFEXITED(info.si_status) || WEXITSTATUS(info.si_status)) - ksft_exit_fail_msg( - "%s test: unexpected status received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, parent_tid, pidfd, strerror(errno)); - close(pidfd); - - if (info.si_signo != SIGCHLD) - ksft_exit_fail_msg( - "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_signo, parent_tid, pidfd, - strerror(errno)); - - if (info.si_code != CLD_EXITED) - ksft_exit_fail_msg( - "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_code, parent_tid, pidfd, - strerror(errno)); - - if (info.si_pid != parent_tid) - ksft_exit_fail_msg( - "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_pid, parent_tid, pidfd, - strerror(errno)); - - ksft_test_result_pass("%s test: Passed\n", test_name); - return 0; + ASSERT_GE(pid, 0); + ASSERT_EQ(WIFEXITED(info.si_status), true); + ASSERT_EQ(WEXITSTATUS(info.si_status), 0); + EXPECT_EQ(close(pidfd), 0); + + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_EXITED); + ASSERT_EQ(info.si_pid, parent_tid); }
-static int test_pidfd_wait_states(void) +TEST(wait_states) { - const char *test_name = "pidfd wait states"; int pidfd = -1, status = 0; pid_t parent_tid = -1; struct clone_args args = { @@ -135,9 +98,7 @@ static int test_pidfd_wait_states(void) };
pid = sys_clone3(&args); - if (pid < 0) - ksft_exit_fail_msg("%s test: failed to create new process %s\n", - test_name, strerror(errno)); + ASSERT_GE(pid, 0);
if (pid == 0) { kill(getpid(), SIGSTOP); @@ -145,127 +106,31 @@ static int test_pidfd_wait_states(void) exit(EXIT_SUCCESS); }
- ret = sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL); - if (ret < 0) - ksft_exit_fail_msg( - "%s test: failed to wait on WSTOPPED process with pid %d and pidfd %d: %s\n", - test_name, parent_tid, pidfd, strerror(errno)); - - if (info.si_signo != SIGCHLD) - ksft_exit_fail_msg( - "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_signo, parent_tid, pidfd, - strerror(errno)); - - if (info.si_code != CLD_STOPPED) - ksft_exit_fail_msg( - "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_code, parent_tid, pidfd, - strerror(errno)); - - if (info.si_pid != parent_tid) - ksft_exit_fail_msg( - "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_pid, parent_tid, pidfd, - strerror(errno)); - - ret = sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0); - if (ret < 0) - ksft_exit_fail_msg( - "%s test: failed to send signal to process with pid %d and pidfd %d: %s\n", - test_name, parent_tid, pidfd, strerror(errno)); - - ret = sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL); - if (ret < 0) - ksft_exit_fail_msg( - "%s test: failed to wait WCONTINUED on process with pid %d and pidfd %d: %s\n", - test_name, parent_tid, pidfd, strerror(errno)); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0); + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_STOPPED); + ASSERT_EQ(info.si_pid, parent_tid);
- if (info.si_signo != SIGCHLD) - ksft_exit_fail_msg( - "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_signo, parent_tid, pidfd, - strerror(errno)); + ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0);
- if (info.si_code != CLD_CONTINUED) - ksft_exit_fail_msg( - "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_code, parent_tid, pidfd, - strerror(errno)); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL), 0); + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_CONTINUED); + ASSERT_EQ(info.si_pid, parent_tid);
- if (info.si_pid != parent_tid) - ksft_exit_fail_msg( - "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_pid, parent_tid, pidfd, - strerror(errno)); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED, NULL), 0); + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_STOPPED); + ASSERT_EQ(info.si_pid, parent_tid);
- ret = sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED, NULL); - if (ret < 0) - ksft_exit_fail_msg( - "%s test: failed to wait on WUNTRACED process with pid %d and pidfd %d: %s\n", - test_name, parent_tid, pidfd, strerror(errno)); + ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0), 0);
- if (info.si_signo != SIGCHLD) - ksft_exit_fail_msg( - "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_signo, parent_tid, pidfd, - strerror(errno)); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0); + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_KILLED); + ASSERT_EQ(info.si_pid, parent_tid);
- if (info.si_code != CLD_STOPPED) - ksft_exit_fail_msg( - "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_code, parent_tid, pidfd, - strerror(errno)); - - if (info.si_pid != parent_tid) - ksft_exit_fail_msg( - "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_pid, parent_tid, pidfd, - strerror(errno)); - - ret = sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0); - if (ret < 0) - ksft_exit_fail_msg( - "%s test: failed to send SIGKILL to process with pid %d and pidfd %d: %s\n", - test_name, parent_tid, pidfd, strerror(errno)); - - ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); - if (ret < 0) - ksft_exit_fail_msg( - "%s test: failed to wait on WEXITED process with pid %d and pidfd %d: %s\n", - test_name, parent_tid, pidfd, strerror(errno)); - - if (info.si_signo != SIGCHLD) - ksft_exit_fail_msg( - "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_signo, parent_tid, pidfd, - strerror(errno)); - - if (info.si_code != CLD_KILLED) - ksft_exit_fail_msg( - "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_code, parent_tid, pidfd, - strerror(errno)); - - if (info.si_pid != parent_tid) - ksft_exit_fail_msg( - "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_pid, parent_tid, pidfd, - strerror(errno)); - - close(pidfd); - - ksft_test_result_pass("%s test: Passed\n", test_name); - return 0; + EXPECT_EQ(close(pidfd), 0); }
-int main(int argc, char **argv) -{ - ksft_print_header(); - ksft_set_plan(2); - - test_pidfd_wait_simple(); - test_pidfd_wait_states(); - - return ksft_exit_pass(); -} +TEST_HARNESS_MAIN
Verify that the PIDFD_NONBLOCK flag works with pidfd_open() and that waitid() with a non-blocking pidfd returns EAGAIN:
TAP version 13 1..3 # Starting 3 tests from 1 test cases. # RUN global.wait_simple ... # OK global.wait_simple ok 1 global.wait_simple # RUN global.wait_states ... # OK global.wait_states ok 2 global.wait_states # RUN global.wait_nonblock ... # OK global.wait_nonblock ok 3 global.wait_nonblock # PASSED: 3 / 3 tests passed. # Totals: pass:3 fail:0 xfail:0 xpass:0 skip:0 error:0
Cc: Shuah Khan shuah@kernel.org Cc: linux-kselftest@vger.kernel.org Signed-off-by: Christian Brauner christian.brauner@ubuntu.com --- tools/testing/selftests/pidfd/pidfd.h | 4 ++ tools/testing/selftests/pidfd/pidfd_wait.c | 83 +++++++++++++++++++++- 2 files changed, 86 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index a2c80914e3dc..01f8d3c0cf2c 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -46,6 +46,10 @@ #define __NR_pidfd_getfd -1 #endif
+#ifndef PIDFD_NONBLOCK +#define PIDFD_NONBLOCK O_NONBLOCK +#endif + /* * The kernel reserves 300 pids via RESERVED_PIDS in kernel/pid.c * That means, when it wraps around any pid < 300 will be skipped. diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c index 075c716f6fb8..cefce4d3d2f6 100644 --- a/tools/testing/selftests/pidfd/pidfd_wait.c +++ b/tools/testing/selftests/pidfd/pidfd_wait.c @@ -21,6 +21,11 @@
#define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr)))
+/* Attempt to de-conflict with the selftests tree. */ +#ifndef SKIP +#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__) +#endif + static pid_t sys_clone3(struct clone_args *args) { return syscall(__NR_clone3, args, sizeof(struct clone_args)); @@ -65,7 +70,7 @@ TEST(wait_simple) pidfd = -1;
pid = sys_clone3(&args); - ASSERT_GE(pid, 1); + ASSERT_GE(pid, 0);
if (pid == 0) exit(EXIT_SUCCESS); @@ -133,4 +138,80 @@ TEST(wait_states) EXPECT_EQ(close(pidfd), 0); }
+TEST(wait_nonblock) +{ + int pidfd, status = 0; + unsigned int flags = 0; + pid_t parent_tid = -1; + struct clone_args args = { + .parent_tid = ptr_to_u64(&parent_tid), + .flags = CLONE_PARENT_SETTID, + .exit_signal = SIGCHLD, + }; + int ret; + pid_t pid; + siginfo_t info = { + .si_signo = 0, + }; + + /* + * Callers need to see ECHILD with non-blocking pidfds when no child + * processes exists. + */ + pidfd = sys_pidfd_open(getpid(), PIDFD_NONBLOCK); + EXPECT_GE(pidfd, 0) { + /* pidfd_open() doesn't support PIDFD_NONBLOCK. */ + ASSERT_EQ(errno, EINVAL); + SKIP(return, "Skipping PIDFD_NONBLOCK test"); + } + + pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + ASSERT_LT(pid, 0); + ASSERT_EQ(errno, ECHILD); + EXPECT_EQ(close(pidfd), 0); + + pid = sys_clone3(&args); + ASSERT_GE(pid, 0); + + if (pid == 0) { + kill(getpid(), SIGSTOP); + exit(EXIT_SUCCESS); + } + + pidfd = sys_pidfd_open(pid, PIDFD_NONBLOCK); + EXPECT_GE(pidfd, 0) { + /* pidfd_open() doesn't support PIDFD_NONBLOCK. */ + ASSERT_EQ(errno, EINVAL); + SKIP(return, "Skipping PIDFD_NONBLOCK test"); + } + + flags = fcntl(pidfd, F_GETFL, 0); + ASSERT_GT(flags, 0); + ASSERT_GT((flags & O_NONBLOCK), 0); + + /* + * Callers need to see EAGAIN/EWOULDBLOCK with non-blocking pidfd when + * child processes exist but none have exited. + */ + pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + ASSERT_LT(pid, 0); + ASSERT_EQ(errno, EAGAIN); + + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0); + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_STOPPED); + ASSERT_EQ(info.si_pid, parent_tid); + + ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0); + + ASSERT_EQ(fcntl(pidfd, F_SETFL, (flags & ~O_NONBLOCK)), 0); + + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0); + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_EXITED); + ASSERT_EQ(info.si_pid, parent_tid); + + EXPECT_EQ(close(pidfd), 0); +} + TEST_HARNESS_MAIN
linux-kselftest-mirror@lists.linaro.org