The means by which a pid is determined from a pidfd is duplicated, some callers holding a reference to the (pid)fd, and others explicitly pinning the pid.
Introduce __pidfd_get_pid() which abstracts both approaches and provide optional output parameters for file->f_flags and the fd (the latter of which, if provided, prevents the function from decrementing the fd's reference count).
We add a wrapper function pidfd_get_pid() which performs the same functionality as the original, only deferring to __pidfd_get_pid() for the heavy lifting.
Additionally, abstract the ability to open a pidfd by opening a /proc/<pid> directory (used by the pidfd_send_signal() system call), providing a pidfd_get_pid_proc() wrapper function to do so.
Doing this allows us to eliminate open-coded pidfd pid lookup and to consistently handle this in one place.
This lays the groundwork for a subsequent patch which adds a new sentinel pidfd to explicitly reference the current process (i.e. thread group leader) without the need for a pidfd.
Signed-off-by: Lorenzo Stoakes lorenzo.stoakes@oracle.com --- include/linux/pid.h | 42 +++++++++++++++++++++++++++++++++- kernel/pid.c | 55 +++++++++++++++++++++++++++++++-------------- kernel/signal.c | 26 +++++---------------- 3 files changed, 84 insertions(+), 39 deletions(-)
diff --git a/include/linux/pid.h b/include/linux/pid.h index a3aad9b4074c..68b02eab7509 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -2,6 +2,7 @@ #ifndef _LINUX_PID_H #define _LINUX_PID_H
+#include <linux/file.h> #include <linux/pid_types.h> #include <linux/rculist.h> #include <linux/rcupdate.h> @@ -72,8 +73,47 @@ extern struct pid init_struct_pid;
struct file;
+ +/** + * __pidfd_get_pid() - Retrieve a pid associated with the specified pidfd. + * + * @pidfd: The pidfd whose pid we want, or the fd of a /proc/<pid> file if + * @alloc_proc is also set. + * @pin_pid: If set, then the reference counter of the returned pid is + * incremented. If not set, then @fd should be provided to pin the + * pidfd. + * @allow_proc: If set, then an fd of a /proc/<pid> file can be passed instead + * of a pidfd, and this will be used to determine the pid. + * @flags: Output variable, if non-NULL, then the file->f_flags of the + * pidfd will be set here. + * @fd: Output variable, if non-NULL, then the pidfd reference will + * remain elevated and the caller will need to decrement it + * themselves. + * + * Returns: If successful, the pid associated with the pidfd, otherwise an + * error. + */ +struct pid *__pidfd_get_pid(unsigned int pidfd, bool pin_pid, + bool allow_proc, unsigned int *flags, + struct fd *fd); + +static inline struct pid *pidfd_get_pid(unsigned int pidfd, unsigned int *flags) +{ + return __pidfd_get_pid(pidfd, /* pin_pid = */ true, + /* allow_proc = */ false, + flags, /* fd = */ NULL); +} + +static inline struct pid *pidfd_to_pid_proc(unsigned int pidfd, + unsigned int *flags, + struct fd *fd) +{ + return __pidfd_get_pid(pidfd, /* pin_pid = */ false, + /* allow_proc = */ true, + flags, fd); +} + struct pid *pidfd_pid(const struct file *file); -struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags); struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags); int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret); void do_notify_pidfd(struct task_struct *task); diff --git a/kernel/pid.c b/kernel/pid.c index 2715afb77eab..26e2581210c4 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -36,6 +36,7 @@ #include <linux/pid_namespace.h> #include <linux/init_task.h> #include <linux/syscalls.h> +#include <linux/proc_fs.h> #include <linux/proc_ns.h> #include <linux/refcount.h> #include <linux/anon_inodes.h> @@ -534,22 +535,47 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns) } EXPORT_SYMBOL_GPL(find_ge_pid);
-struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags) +struct pid *__pidfd_get_pid(unsigned int pidfd, bool pin_pid, + bool allow_proc, unsigned int *flags, + struct fd *fd) { struct fd f; struct pid *pid; + struct file *file;
f = fdget(fd); - if (!fd_file(f)) + file = fd_file(f); + if (!file) return ERR_PTR(-EBADF);
- pid = pidfd_pid(fd_file(f)); - if (!IS_ERR(pid)) { - get_pid(pid); - *flags = fd_file(f)->f_flags; + pid = pidfd_pid(file); + /* If we allow opening a pidfd via /proc/<pid>, do so. */ + if (IS_ERR(pid) && allow_proc) + pid = tgid_pidfd_to_pid(file); + + if (IS_ERR(pid)) { + fdput(f); + return pid; }
- fdput(f); + if (pin_pid) + get_pid(pid); + else + WARN_ON_ONCE(!fd); /* Nothing to keep pid/pidfd around? */ + + if (flags) + *flags = file->f_flags; + + /* + * If the user provides an fd output then it will handle decrementing + * its reference counter. + */ + if (fd) + *fd = f; + else + /* Otherwise we release it. */ + fdput(f); + return pid; }
@@ -747,23 +773,18 @@ SYSCALL_DEFINE3(pidfd_getfd, int, pidfd, int, fd, unsigned int, flags) { struct pid *pid; - struct fd f; int ret;
/* flags is currently unused - make sure it's unset */ if (flags) return -EINVAL;
- f = fdget(pidfd); - if (!fd_file(f)) - return -EBADF; - - pid = pidfd_pid(fd_file(f)); + pid = pidfd_get_pid(pidfd, NULL); if (IS_ERR(pid)) - ret = PTR_ERR(pid); - else - ret = pidfd_getfd(pid, fd); + return PTR_ERR(pid);
- fdput(f); + ret = pidfd_getfd(pid, fd); + + put_pid(pid); return ret; } diff --git a/kernel/signal.c b/kernel/signal.c index 6e57036f947f..abbb1931deba 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3894,17 +3894,6 @@ static int copy_siginfo_from_user_any(kernel_siginfo_t *kinfo, return copy_siginfo_from_user(kinfo, info); }
-static struct pid *pidfd_to_pid(const struct file *file) -{ - struct pid *pid; - - pid = pidfd_pid(file); - if (!IS_ERR(pid)) - return pid; - - return tgid_pidfd_to_pid(file); -} - #define PIDFD_SEND_SIGNAL_FLAGS \ (PIDFD_SIGNAL_THREAD | PIDFD_SIGNAL_THREAD_GROUP | \ PIDFD_SIGNAL_PROCESS_GROUP) @@ -3931,6 +3920,7 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig, struct pid *pid; kernel_siginfo_t kinfo; enum pid_type type; + unsigned int f_flags;
/* Enforce flags be set to 0 until we add an extension. */ if (flags & ~PIDFD_SEND_SIGNAL_FLAGS) @@ -3940,16 +3930,10 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig, if (hweight32(flags & PIDFD_SEND_SIGNAL_FLAGS) > 1) return -EINVAL;
- f = fdget(pidfd); - if (!fd_file(f)) - return -EBADF; - /* Is this a pidfd? */ - pid = pidfd_to_pid(fd_file(f)); - if (IS_ERR(pid)) { - ret = PTR_ERR(pid); - goto err; - } + pid = pidfd_to_pid_proc(pidfd, &f_flags, &f); + if (IS_ERR(pid)) + return PTR_ERR(pid);
ret = -EINVAL; if (!access_pidfd_pidns(pid)) @@ -3958,7 +3942,7 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig, switch (flags) { case 0: /* Infer scope from the type of pidfd. */ - if (fd_file(f)->f_flags & PIDFD_THREAD) + if (f_flags & PIDFD_THREAD) type = PIDTYPE_PID; else type = PIDTYPE_TGID;