Currently, when a task is dead we still print the pid it used to use in the fdinfo files of its pidfds. This doesn't make much sense since the pid may have already been reused. So verify that the task is still alive. If the task is not alive anymore, we will print -1. This allows us to differentiate between a task not being present in a given pid namespace - in which case we already print 0 - and a task having been reaped.
Note that this uses PIDTYPE_PID for the check. Technically, we could've checked PIDTYPE_TGID since pidfds currently only refer to thread-group leaders but if they won't anymore in the future then this check becomes problematic without it being immediately obvious to non-experts imho. If a thread is created via clone(CLONE_THREAD) than struct pid has a single non-empty list pid->tasks[PIDTYPE_PID] and this pid can't be used as a PIDTYPE_TGID meaning pid->tasks[PIDTYPE_TGID] will return NULL even though the thread-group leader might still be very much alive. We could be more complicated and do something like:
bool alive = false; rcu_read_lock(); struct task_struct *tsk = pid_task(pid, PIDTYPE_PID); if (tsk && task_tgid(tsk)) alive = true; rcu_read_unlock();
but it's really not worth it. We already have created a pidfd and we thus know it refers to a thread-group leader. Checking PIDTYPE_PID is fine and is easier to maintain should we ever allow pidfds to refer to threads.
Cc: Jann Horn jannh@google.com Cc: Christian Kellner christian@kellner.me Cc: Oleg Nesterov oleg@redhat.com Cc: linux-api@vger.kernel.org Signed-off-by: Christian Brauner christian.brauner@ubuntu.com --- kernel/fork.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-)
diff --git a/kernel/fork.c b/kernel/fork.c index 782986962d47..a67944a5e542 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1695,6 +1695,18 @@ static int pidfd_release(struct inode *inode, struct file *file) }
#ifdef CONFIG_PROC_FS +static inline bool task_alive(struct pid *pid) +{ + bool alive = true; + + rcu_read_lock(); + if (!pid_task(pid, PIDTYPE_PID)) + alive = false; + rcu_read_unlock(); + + return alive; +} + /** * pidfd_show_fdinfo - print information about a pidfd * @m: proc fdinfo file @@ -1732,15 +1744,20 @@ static int pidfd_release(struct inode *inode, struct file *file) */ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f) { - struct pid_namespace *ns = proc_pid_ns(file_inode(m->file)); struct pid *pid = f->private_data; - pid_t nr = pid_nr_ns(pid, ns); + struct pid_namespace *ns; + pid_t nr = -1; + + if (likely(task_alive(pid))) { + ns = proc_pid_ns(file_inode(m->file)); + nr = pid_nr_ns(pid, ns); + }
- seq_put_decimal_ull(m, "Pid:\t", nr); + seq_put_decimal_ll(m, "Pid:\t", nr);
#ifdef CONFIG_PID_NS - seq_put_decimal_ull(m, "\nNSpid:\t", nr); - if (nr) { + seq_put_decimal_ll(m, "\nNSpid:\t", nr); + if (nr > 0) { int i;
/* If nr is non-zero it means that 'pid' is valid and that @@ -1749,7 +1766,7 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f) * Start at one below the already printed level. */ for (i = ns->level + 1; i <= pid->level; i++) - seq_put_decimal_ull(m, "\t", pid->numbers[i].nr); + seq_put_decimal_ll(m, "\t", pid->numbers[i].nr); } #endif seq_putc(m, '\n');
Test that the fdinfo field of a pidfd referring to a dead process correctly shows Pid: -1 and NSpid: -1.
Cc: Christian Kellner christian@kellner.me Signed-off-by: Christian Brauner christian.brauner@ubuntu.com --- .../selftests/pidfd/pidfd_fdinfo_test.c | 59 ++++++++++++++----- 1 file changed, 45 insertions(+), 14 deletions(-)
diff --git a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c index 3721be994abd..22558524f71c 100644 --- a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c +++ b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c @@ -113,11 +113,15 @@ static struct child clone_newns(int (*fn)(void *), void *args, return ret; }
+static inline void child_close(struct child *child) +{ + close(child->fd); +} + static inline int child_join(struct child *child, struct error *err) { int r;
- (void)close(child->fd); r = wait_for_pid(child->pid); if (r < 0) error_set(err, PIDFD_ERROR, "waitpid failed (ret %d, errno %d)", @@ -128,6 +132,12 @@ static inline int child_join(struct child *child, struct error *err) return r; }
+static inline int child_join_close(struct child *child, struct error *err) +{ + child_close(child); + return child_join(child, err); +} + static inline void trim_newline(char *str) { char *pos = strrchr(str, '\n'); @@ -136,8 +146,8 @@ static inline void trim_newline(char *str) *pos = '\0'; }
-static int verify_fdinfo_nspid(int pidfd, struct error *err, - const char *expect, ...) +static int verify_fdinfo(int pidfd, struct error *err, const char *prefix, + size_t prefix_len, const char *expect, ...) { char buffer[512] = {0, }; char path[512] = {0, }; @@ -160,17 +170,20 @@ static int verify_fdinfo_nspid(int pidfd, struct error *err, pidfd);
while (getline(&line, &n, f) != -1) { - if (strncmp(line, "NSpid:", 6)) + char *val; + + if (strncmp(line, prefix, prefix_len)) continue;
found = 1;
- r = strcmp(line + 6, buffer); + val = line + prefix_len; + r = strcmp(val, buffer); if (r != 0) { trim_newline(line); trim_newline(buffer); - error_set(err, PIDFD_FAIL, "NSpid: '%s' != '%s'", - line + 6, buffer); + error_set(err, PIDFD_FAIL, "%s '%s' != '%s'", + prefix, val, buffer); } break; } @@ -179,8 +192,8 @@ static int verify_fdinfo_nspid(int pidfd, struct error *err, fclose(f);
if (found == 0) - return error_set(err, PIDFD_FAIL, "NSpid not found for fd %d", - pidfd); + return error_set(err, PIDFD_FAIL, "%s not found for fd %d", + prefix, pidfd);
return PIDFD_PASS; } @@ -213,7 +226,7 @@ static int child_fdinfo_nspid_test(void *args) }
pidfd = *(int *)args; - r = verify_fdinfo_nspid(pidfd, &err, "\t0\n"); + r = verify_fdinfo(pidfd, &err, "NSpid:", 6, "\t0\n");
if (r != PIDFD_PASS) ksft_print_msg("NSpid fdinfo check failed: %s\n", err.msg); @@ -242,24 +255,42 @@ static void test_pidfd_fdinfo_nspid(void) /* The children will have pid 1 in the new pid namespace, * so the line must be 'NSPid:\t<pid>\t1'. */ - verify_fdinfo_nspid(a.fd, &err, "\t%d\t%d\n", a.pid, 1); - verify_fdinfo_nspid(b.fd, &err, "\t%d\t%d\n", b.pid, 1); + verify_fdinfo(a.fd, &err, "NSpid:", 6, "\t%d\t%d\n", a.pid, 1); + verify_fdinfo(b.fd, &err, "NSpid:", 6, "\t%d\t%d\n", b.pid, 1);
/* wait for the process, check the exit status and set * 'err' accordingly, if it is not already set. */ + child_join_close(&a, &err); + child_join_close(&b, &err); + + error_report(&err, test_name); +} + +static void test_pidfd_dead_fdinfo(void) +{ + struct child a; + struct error err = {0, }; + const char *test_name = "pidfd check fdinfo for dead process"; + + /* Create a new child in a new pid and mount namespace */ + a = clone_newns(child_fdinfo_nspid_test, NULL, &err); + error_check(&err, test_name); child_join(&a, &err); - child_join(&b, &err);
+ verify_fdinfo(a.fd, &err, "Pid:", 4, "\t-1\n"); + verify_fdinfo(a.fd, &err, "NSpid:", 6, "\t-1\n"); + child_close(&a); error_report(&err, test_name); }
int main(int argc, char **argv) { ksft_print_header(); - ksft_set_plan(1); + ksft_set_plan(2);
test_pidfd_fdinfo_nspid(); + test_pidfd_dead_fdinfo();
return ksft_exit_pass(); }
On Tue, 2019-10-15 at 16:13 +0200, Christian Brauner wrote:
Test that the fdinfo field of a pidfd referring to a dead process correctly shows Pid: -1 and NSpid: -1.
Cc: Christian Kellner christian@kellner.me Signed-off-by: Christian Brauner christian.brauner@ubuntu.com
Looks good to me.
Reviewed-by: Christian Kellner christian@kellner.me
On 10/15, Christian Brauner wrote:
+static inline bool task_alive(struct pid *pid) +{
- bool alive = true;
- rcu_read_lock();
- if (!pid_task(pid, PIDTYPE_PID))
alive = false;
- rcu_read_unlock();
- return alive;
+}
Well, the usage of rcu_read_lock/unlock looks confusing to me...
I mean, this helper does not need rcu lock at all. Except rcu_dereference_check() will complain.
static inline bool task_alive(struct pid *pid) { bool alive;
/* shut up rcu_dereference_check() */ rcu_lock_acquire(&rcu_lock_map); alive = !!pid_task(pid, PIDTYPE_PID)); rcu_lock_release(&rcu_lock_map);
return alive; }
looks more clear imo.
But in fact I'd suggest to simply use !hlist_empty(&pid->tasks[PIDTYPE_PID]) in pidfd_show_fdinfo() and do not add a new helper.
Oleg.
On Tue, Oct 15, 2019 at 04:43:57PM +0200, Oleg Nesterov wrote:
On 10/15, Christian Brauner wrote:
+static inline bool task_alive(struct pid *pid) +{
- bool alive = true;
- rcu_read_lock();
- if (!pid_task(pid, PIDTYPE_PID))
alive = false;
- rcu_read_unlock();
- return alive;
+}
Well, the usage of rcu_read_lock/unlock looks confusing to me...
I mean, this helper does not need rcu lock at all. Except rcu_dereference_check() will complain.
Yep, I think we have another codepath were the rcu locks might be purely cosmetic so I thought it's not a big deal (see below).
static inline bool task_alive(struct pid *pid) { bool alive;
/* shut up rcu_dereference_check() */ rcu_lock_acquire(&rcu_lock_map); alive = !!pid_task(pid, PIDTYPE_PID)); rcu_lock_release(&rcu_lock_map); return alive;
}
looks more clear imo.
But in fact I'd suggest to simply use !hlist_empty(&pid->tasks[PIDTYPE_PID]) in pidfd_show_fdinfo() and do not add a new helper.
Sounds good to me. But can't we then just do something similar just with !hlist_empty(&pid->tasks[PIDTYPE_TGID])
in v5.4-rc3:kernel/pid.c:pidfd_open():514-517 ?
or would this be problematic because of de_thread()?
Thanks! Christian
On 10/15, Christian Brauner wrote:
On Tue, Oct 15, 2019 at 04:43:57PM +0200, Oleg Nesterov wrote:
But in fact I'd suggest to simply use !hlist_empty(&pid->tasks[PIDTYPE_PID]) in pidfd_show_fdinfo() and do not add a new helper.
Sounds good to me. But can't we then just do something similar just with !hlist_empty(&pid->tasks[PIDTYPE_TGID])
in v5.4-rc3:kernel/pid.c:pidfd_open():514-517 ?
Agreed. Actually, it seems to me I suggested to use rcu_lock_acquire() rather than rcu_read_lock() in pidfd_open() too.
But hlist_empty(pid->tasks[type]) looks even better.
If you decide to add a new helper, you can also change do_wait() which checks hlist_empty(&wo->wo_pid->tasks[wo->wo_type]). May be even __change_pid().
Oleg.
Currently, when a task is dead we still print the pid it used to use in the fdinfo files of its pidfds. This doesn't make much sense since the pid may have already been reused. So verify that the task is still alive by introducing the task_alive() helper which will be used by other callers in follow-up patches. If the task is not alive anymore, we will print -1. This allows us to differentiate between a task not being present in a given pid namespace - in which case we already print 0 - and a task having been reaped.
Note that this uses PIDTYPE_PID for the check. Technically, we could've checked PIDTYPE_TGID since pidfds currently only refer to thread-group leaders but if they won't anymore in the future then this check becomes problematic without it being immediately obvious to non-experts imho. If a thread is created via clone(CLONE_THREAD) than struct pid has a single non-empty list pid->tasks[PIDTYPE_PID] and this pid can't be used as a PIDTYPE_TGID meaning pid->tasks[PIDTYPE_TGID] will return NULL even though the thread-group leader might still be very much alive. So checking PIDTYPE_PID is fine and is easier to maintain should we ever allow pidfds to refer to threads.
Cc: Jann Horn jannh@google.com Cc: Christian Kellner christian@kellner.me Cc: Oleg Nesterov oleg@redhat.com Cc: linux-api@vger.kernel.org Signed-off-by: Christian Brauner christian.brauner@ubuntu.com --- /* v1 */ Link: https://lore.kernel.org/r/20191015141332.4055-1-christian.brauner@ubuntu.com
/* v2 */ - Oleg Nesterov oleg@redhat.com: - simplify check whether task is still alive to hlist_empty() - optionally introduce generic helper to replace open coded hlist_emtpy() checks whether or not a task is alive - Christian Brauner christian.brauner@ubuntu.com: - introduce task_alive() helper and use in pidfd_show_fdinfo() --- include/linux/pid.h | 4 ++++ kernel/fork.c | 17 +++++++++++------ 2 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/include/linux/pid.h b/include/linux/pid.h index 9645b1194c98..5f1c8ce10b71 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -85,6 +85,10 @@ static inline struct pid *get_pid(struct pid *pid)
extern void put_pid(struct pid *pid); extern struct task_struct *pid_task(struct pid *pid, enum pid_type); +static inline bool task_alive(struct pid *pid, enum pid_type type) +{ + return !hlist_empty(&pid->tasks[type]); +} extern struct task_struct *get_pid_task(struct pid *pid, enum pid_type);
extern struct pid *get_task_pid(struct task_struct *task, enum pid_type type); diff --git a/kernel/fork.c b/kernel/fork.c index 782986962d47..ef9a9d661079 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1732,15 +1732,20 @@ static int pidfd_release(struct inode *inode, struct file *file) */ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f) { - struct pid_namespace *ns = proc_pid_ns(file_inode(m->file)); struct pid *pid = f->private_data; - pid_t nr = pid_nr_ns(pid, ns); + struct pid_namespace *ns; + pid_t nr = -1;
- seq_put_decimal_ull(m, "Pid:\t", nr); + if (likely(task_alive(pid, PIDTYPE_PID))) { + ns = proc_pid_ns(file_inode(m->file)); + nr = pid_nr_ns(pid, ns); + } + + seq_put_decimal_ll(m, "Pid:\t", nr);
#ifdef CONFIG_PID_NS - seq_put_decimal_ull(m, "\nNSpid:\t", nr); - if (nr) { + seq_put_decimal_ll(m, "\nNSpid:\t", nr); + if (nr > 0) { int i;
/* If nr is non-zero it means that 'pid' is valid and that @@ -1749,7 +1754,7 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f) * Start at one below the already printed level. */ for (i = ns->level + 1; i <= pid->level; i++) - seq_put_decimal_ull(m, "\t", pid->numbers[i].nr); + seq_put_decimal_ll(m, "\t", pid->numbers[i].nr); } #endif seq_putc(m, '\n');
Test that the fdinfo field of a pidfd referring to a dead process correctly shows Pid: -1 and NSpid: -1.
Cc: Christian Kellner christian@kellner.me Reviewed-by: Christian Kellner christian@kellner.me Signed-off-by: Christian Brauner christian.brauner@ubuntu.com --- /* v1 */ Link: https://lore.kernel.org/r/20191015141332.4055-2-christian.brauner@ubuntu.com
/* v2 */ unchanged --- .../selftests/pidfd/pidfd_fdinfo_test.c | 59 ++++++++++++++----- 1 file changed, 45 insertions(+), 14 deletions(-)
diff --git a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c index 3721be994abd..22558524f71c 100644 --- a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c +++ b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c @@ -113,11 +113,15 @@ static struct child clone_newns(int (*fn)(void *), void *args, return ret; }
+static inline void child_close(struct child *child) +{ + close(child->fd); +} + static inline int child_join(struct child *child, struct error *err) { int r;
- (void)close(child->fd); r = wait_for_pid(child->pid); if (r < 0) error_set(err, PIDFD_ERROR, "waitpid failed (ret %d, errno %d)", @@ -128,6 +132,12 @@ static inline int child_join(struct child *child, struct error *err) return r; }
+static inline int child_join_close(struct child *child, struct error *err) +{ + child_close(child); + return child_join(child, err); +} + static inline void trim_newline(char *str) { char *pos = strrchr(str, '\n'); @@ -136,8 +146,8 @@ static inline void trim_newline(char *str) *pos = '\0'; }
-static int verify_fdinfo_nspid(int pidfd, struct error *err, - const char *expect, ...) +static int verify_fdinfo(int pidfd, struct error *err, const char *prefix, + size_t prefix_len, const char *expect, ...) { char buffer[512] = {0, }; char path[512] = {0, }; @@ -160,17 +170,20 @@ static int verify_fdinfo_nspid(int pidfd, struct error *err, pidfd);
while (getline(&line, &n, f) != -1) { - if (strncmp(line, "NSpid:", 6)) + char *val; + + if (strncmp(line, prefix, prefix_len)) continue;
found = 1;
- r = strcmp(line + 6, buffer); + val = line + prefix_len; + r = strcmp(val, buffer); if (r != 0) { trim_newline(line); trim_newline(buffer); - error_set(err, PIDFD_FAIL, "NSpid: '%s' != '%s'", - line + 6, buffer); + error_set(err, PIDFD_FAIL, "%s '%s' != '%s'", + prefix, val, buffer); } break; } @@ -179,8 +192,8 @@ static int verify_fdinfo_nspid(int pidfd, struct error *err, fclose(f);
if (found == 0) - return error_set(err, PIDFD_FAIL, "NSpid not found for fd %d", - pidfd); + return error_set(err, PIDFD_FAIL, "%s not found for fd %d", + prefix, pidfd);
return PIDFD_PASS; } @@ -213,7 +226,7 @@ static int child_fdinfo_nspid_test(void *args) }
pidfd = *(int *)args; - r = verify_fdinfo_nspid(pidfd, &err, "\t0\n"); + r = verify_fdinfo(pidfd, &err, "NSpid:", 6, "\t0\n");
if (r != PIDFD_PASS) ksft_print_msg("NSpid fdinfo check failed: %s\n", err.msg); @@ -242,24 +255,42 @@ static void test_pidfd_fdinfo_nspid(void) /* The children will have pid 1 in the new pid namespace, * so the line must be 'NSPid:\t<pid>\t1'. */ - verify_fdinfo_nspid(a.fd, &err, "\t%d\t%d\n", a.pid, 1); - verify_fdinfo_nspid(b.fd, &err, "\t%d\t%d\n", b.pid, 1); + verify_fdinfo(a.fd, &err, "NSpid:", 6, "\t%d\t%d\n", a.pid, 1); + verify_fdinfo(b.fd, &err, "NSpid:", 6, "\t%d\t%d\n", b.pid, 1);
/* wait for the process, check the exit status and set * 'err' accordingly, if it is not already set. */ + child_join_close(&a, &err); + child_join_close(&b, &err); + + error_report(&err, test_name); +} + +static void test_pidfd_dead_fdinfo(void) +{ + struct child a; + struct error err = {0, }; + const char *test_name = "pidfd check fdinfo for dead process"; + + /* Create a new child in a new pid and mount namespace */ + a = clone_newns(child_fdinfo_nspid_test, NULL, &err); + error_check(&err, test_name); child_join(&a, &err); - child_join(&b, &err);
+ verify_fdinfo(a.fd, &err, "Pid:", 4, "\t-1\n"); + verify_fdinfo(a.fd, &err, "NSpid:", 6, "\t-1\n"); + child_close(&a); error_report(&err, test_name); }
int main(int argc, char **argv) { ksft_print_header(); - ksft_set_plan(1); + ksft_set_plan(2);
test_pidfd_fdinfo_nspid(); + test_pidfd_dead_fdinfo();
return ksft_exit_pass(); }
Replace hlist_empty() with the new task_alive() helper which is more idiomatic, easier to grep for, and unifies how callers perform this check.
Cc: Oleg Nesterov oleg@redhat.com Signed-off-by: Christian Brauner christian.brauner@ubuntu.com --- /* v1 */ patch not present
/* v2 */ patch introduced --- kernel/pid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/pid.c b/kernel/pid.c index 0a9f2e437217..70ad4a9f728c 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -299,7 +299,7 @@ static void __change_pid(struct task_struct *task, enum pid_type type, *pid_ptr = new;
for (tmp = PIDTYPE_MAX; --tmp >= 0; ) - if (!hlist_empty(&pid->tasks[tmp])) + if (task_alive(pid, tmp)) return;
free_pid(pid);
Replace hlist_empty() with the new task_alive() helper which is more idiomatic, easier to grep for, and unifies how callers perform this check.
Cc: Oleg Nesterov oleg@redhat.com Signed-off-by: Christian Brauner christian.brauner@ubuntu.com --- /* v1 */ patch not present
/* v2 */ patch introduced --- kernel/exit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/exit.c b/kernel/exit.c index a46a50d67002..2bb0cbe94bda 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1457,7 +1457,7 @@ static long do_wait(struct wait_opts *wo) */ wo->notask_error = -ECHILD; if ((wo->wo_type < PIDTYPE_MAX) && - (!wo->wo_pid || hlist_empty(&wo->wo_pid->tasks[wo->wo_type]))) + (!wo->wo_pid || !task_alive(wo->wo_pid, wo->wo_type))) goto notask;
set_current_state(TASK_INTERRUPTIBLE);
Use the new task_alive() helper in pidfd_open(). This simplifies the code and avoids taking rcu_read_{lock,unlock}() and leads to overall nicer code.
Cc: Oleg Nesterov oleg@redhat.com Signed-off-by: Christian Brauner christian.brauner@ubuntu.com --- /* v1 */ patch not present
/* v2 */ patch introduced --- kernel/pid.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/kernel/pid.c b/kernel/pid.c index 70ad4a9f728c..1f425b6c4c47 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -497,7 +497,7 @@ static int pidfd_create(struct pid *pid) */ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags) { - int fd, ret; + int fd; struct pid *p;
if (flags) @@ -510,13 +510,11 @@ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags) if (!p) return -ESRCH;
- ret = 0; - rcu_read_lock(); - if (!pid_task(p, PIDTYPE_TGID)) - ret = -EINVAL; - rcu_read_unlock(); + if (task_alive(p, PIDTYPE_TGID)) + fd = pidfd_create(p); + else + fd = -EINVAL;
- fd = ret ?: pidfd_create(p); put_pid(p); return fd; }
On 10/16, Christian Brauner wrote:
+static inline bool task_alive(struct pid *pid, enum pid_type type) +{
- return !hlist_empty(&pid->tasks[type]);
+}
So you decided to add a helper ;) OK, but note that its name is very confusing and misleading. Even more than pid_alive() we already have.
What does "alive" actually mean? Say, task_alive(pid, PIDTYPE_SID) == F after fork(). Then it becomes T if this task does setsid().
And why task_ if it accepts pid+pid_type? May be pid_has_task() or something like this...
OK, since I can't suggest a better name I won't really argue. Feel free to add my reviewed-by to this series.
Oleg.
On Wed, Oct 16, 2019 at 06:24:09PM +0200, Oleg Nesterov wrote:
On 10/16, Christian Brauner wrote:
+static inline bool task_alive(struct pid *pid, enum pid_type type) +{
- return !hlist_empty(&pid->tasks[type]);
+}
So you decided to add a helper ;) OK, but note that its name is very confusing and misleading. Even more than pid_alive() we already have.
That's why I chose that name. This is the second time I get bitten by taking inspiration from prior naming examples. :)
What does "alive" actually mean? Say, task_alive(pid, PIDTYPE_SID) == F after fork(). Then it becomes T if this task does setsid().
Yes, that annoyed me to. If you think about pidfd_open() you have a similar problem. The question we are asking in pidfd_open() is not task_alive() but rather was-this-pid-used-as.
And why task_ if it accepts pid+pid_type? May be pid_has_task() or something like this...
Given what I said above that might be a decent name.
OK, since I can't suggest a better name I won't really argue. Feel free to add my reviewed-by to this series.
No, naming is important. Thanks for being picky about that too and I'll happily resend. :)
Thanks! Christian
On 10/16, Christian Brauner wrote:
On Wed, Oct 16, 2019 at 06:24:09PM +0200, Oleg Nesterov wrote:
And why task_ if it accepts pid+pid_type? May be pid_has_task() or something like this...
Given what I said above that might be a decent name.
OK, since I can't suggest a better name I won't really argue. Feel free to add my reviewed-by to this series.
No, naming is important. Thanks for being picky about that too and I'll happily resend. :)
Thanks ;) May be pid_in_use() ? Up to you, anything which starts with pid_ looks better to me than task_alive().
Oleg.
Currently, when a task is dead we still print the pid it used to use in the fdinfo files of its pidfds. This doesn't make much sense since the pid may have already been reused. So verify that the task is still alive by introducing the pid_has_task() helper which will be used by other callers in follow-up patches. If the task is not alive anymore, we will print -1. This allows us to differentiate between a task not being present in a given pid namespace - in which case we already print 0 - and a task having been reaped.
Note that this uses PIDTYPE_PID for the check. Technically, we could've checked PIDTYPE_TGID since pidfds currently only refer to thread-group leaders but if they won't anymore in the future then this check becomes problematic without it being immediately obvious to non-experts imho. If a thread is created via clone(CLONE_THREAD) than struct pid has a single non-empty list pid->tasks[PIDTYPE_PID] and this pid can't be used as a PIDTYPE_TGID meaning pid->tasks[PIDTYPE_TGID] will return NULL even though the thread-group leader might still be very much alive. So checking PIDTYPE_PID is fine and is easier to maintain should we ever allow pidfds to refer to threads.
Cc: Jann Horn jannh@google.com Cc: Christian Kellner christian@kellner.me Cc: linux-api@vger.kernel.org Signed-off-by: Christian Brauner christian.brauner@ubuntu.com Reviewed-by: Oleg Nesterov oleg@redhat.com --- /* pidfd selftests */ passed
/* v1 */ Link: https://lore.kernel.org/r/20191015141332.4055-1-christian.brauner@ubuntu.com
/* v2 */ Link: https://lore.kernel.org/r/20191016153606.2326-1-christian.brauner@ubuntu.com - Oleg Nesterov oleg@redhat.com: - simplify check whether task is still alive to hlist_empty() - optionally introduce generic helper to replace open coded hlist_emtpy() checks whether or not a task is alive - Christian Brauner christian.brauner@ubuntu.com: - introduce task_alive() helper and use in pidfd_show_fdinfo()
/* v3 */ - Oleg Nesterov oleg@redhat.com: - s/task_alive/pid_has_task/g - Christian Brauner christian.brauner@ubuntu.com: - reword commit message to better reflect naming switch from task_alive() to pid_has_task() --- include/linux/pid.h | 4 ++++ kernel/fork.c | 17 +++++++++++------ 2 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/include/linux/pid.h b/include/linux/pid.h index 9645b1194c98..034e3cd60dc0 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -85,6 +85,10 @@ static inline struct pid *get_pid(struct pid *pid)
extern void put_pid(struct pid *pid); extern struct task_struct *pid_task(struct pid *pid, enum pid_type); +static inline bool pid_has_task(struct pid *pid, enum pid_type type) +{ + return !hlist_empty(&pid->tasks[type]); +} extern struct task_struct *get_pid_task(struct pid *pid, enum pid_type);
extern struct pid *get_task_pid(struct task_struct *task, enum pid_type type); diff --git a/kernel/fork.c b/kernel/fork.c index 782986962d47..ffa314838b43 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1732,15 +1732,20 @@ static int pidfd_release(struct inode *inode, struct file *file) */ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f) { - struct pid_namespace *ns = proc_pid_ns(file_inode(m->file)); struct pid *pid = f->private_data; - pid_t nr = pid_nr_ns(pid, ns); + struct pid_namespace *ns; + pid_t nr = -1;
- seq_put_decimal_ull(m, "Pid:\t", nr); + if (likely(pid_has_task(pid, PIDTYPE_PID))) { + ns = proc_pid_ns(file_inode(m->file)); + nr = pid_nr_ns(pid, ns); + } + + seq_put_decimal_ll(m, "Pid:\t", nr);
#ifdef CONFIG_PID_NS - seq_put_decimal_ull(m, "\nNSpid:\t", nr); - if (nr) { + seq_put_decimal_ll(m, "\nNSpid:\t", nr); + if (nr > 0) { int i;
/* If nr is non-zero it means that 'pid' is valid and that @@ -1749,7 +1754,7 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f) * Start at one below the already printed level. */ for (i = ns->level + 1; i <= pid->level; i++) - seq_put_decimal_ull(m, "\t", pid->numbers[i].nr); + seq_put_decimal_ll(m, "\t", pid->numbers[i].nr); } #endif seq_putc(m, '\n');
Test that the fdinfo field of a pidfd referring to a dead process correctly shows Pid: -1 and NSpid: -1.
Cc: Christian Kellner christian@kellner.me Cc: linux-kselftest@vger.kernel.org Reviewed-by: Christian Kellner christian@kellner.me Signed-off-by: Christian Brauner christian.brauner@ubuntu.com --- /* pidfd selftests */ passed
/* v1 */ Link: https://lore.kernel.org/r/20191015141332.4055-2-christian.brauner@ubuntu.com
/* v2 */ Link: https://lore.kernel.org/r/20191016153606.2326-2-christian.brauner@ubuntu.com unchanged
/* v3 */ unchanged --- .../selftests/pidfd/pidfd_fdinfo_test.c | 59 ++++++++++++++----- 1 file changed, 45 insertions(+), 14 deletions(-)
diff --git a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c index 3721be994abd..22558524f71c 100644 --- a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c +++ b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c @@ -113,11 +113,15 @@ static struct child clone_newns(int (*fn)(void *), void *args, return ret; }
+static inline void child_close(struct child *child) +{ + close(child->fd); +} + static inline int child_join(struct child *child, struct error *err) { int r;
- (void)close(child->fd); r = wait_for_pid(child->pid); if (r < 0) error_set(err, PIDFD_ERROR, "waitpid failed (ret %d, errno %d)", @@ -128,6 +132,12 @@ static inline int child_join(struct child *child, struct error *err) return r; }
+static inline int child_join_close(struct child *child, struct error *err) +{ + child_close(child); + return child_join(child, err); +} + static inline void trim_newline(char *str) { char *pos = strrchr(str, '\n'); @@ -136,8 +146,8 @@ static inline void trim_newline(char *str) *pos = '\0'; }
-static int verify_fdinfo_nspid(int pidfd, struct error *err, - const char *expect, ...) +static int verify_fdinfo(int pidfd, struct error *err, const char *prefix, + size_t prefix_len, const char *expect, ...) { char buffer[512] = {0, }; char path[512] = {0, }; @@ -160,17 +170,20 @@ static int verify_fdinfo_nspid(int pidfd, struct error *err, pidfd);
while (getline(&line, &n, f) != -1) { - if (strncmp(line, "NSpid:", 6)) + char *val; + + if (strncmp(line, prefix, prefix_len)) continue;
found = 1;
- r = strcmp(line + 6, buffer); + val = line + prefix_len; + r = strcmp(val, buffer); if (r != 0) { trim_newline(line); trim_newline(buffer); - error_set(err, PIDFD_FAIL, "NSpid: '%s' != '%s'", - line + 6, buffer); + error_set(err, PIDFD_FAIL, "%s '%s' != '%s'", + prefix, val, buffer); } break; } @@ -179,8 +192,8 @@ static int verify_fdinfo_nspid(int pidfd, struct error *err, fclose(f);
if (found == 0) - return error_set(err, PIDFD_FAIL, "NSpid not found for fd %d", - pidfd); + return error_set(err, PIDFD_FAIL, "%s not found for fd %d", + prefix, pidfd);
return PIDFD_PASS; } @@ -213,7 +226,7 @@ static int child_fdinfo_nspid_test(void *args) }
pidfd = *(int *)args; - r = verify_fdinfo_nspid(pidfd, &err, "\t0\n"); + r = verify_fdinfo(pidfd, &err, "NSpid:", 6, "\t0\n");
if (r != PIDFD_PASS) ksft_print_msg("NSpid fdinfo check failed: %s\n", err.msg); @@ -242,24 +255,42 @@ static void test_pidfd_fdinfo_nspid(void) /* The children will have pid 1 in the new pid namespace, * so the line must be 'NSPid:\t<pid>\t1'. */ - verify_fdinfo_nspid(a.fd, &err, "\t%d\t%d\n", a.pid, 1); - verify_fdinfo_nspid(b.fd, &err, "\t%d\t%d\n", b.pid, 1); + verify_fdinfo(a.fd, &err, "NSpid:", 6, "\t%d\t%d\n", a.pid, 1); + verify_fdinfo(b.fd, &err, "NSpid:", 6, "\t%d\t%d\n", b.pid, 1);
/* wait for the process, check the exit status and set * 'err' accordingly, if it is not already set. */ + child_join_close(&a, &err); + child_join_close(&b, &err); + + error_report(&err, test_name); +} + +static void test_pidfd_dead_fdinfo(void) +{ + struct child a; + struct error err = {0, }; + const char *test_name = "pidfd check fdinfo for dead process"; + + /* Create a new child in a new pid and mount namespace */ + a = clone_newns(child_fdinfo_nspid_test, NULL, &err); + error_check(&err, test_name); child_join(&a, &err); - child_join(&b, &err);
+ verify_fdinfo(a.fd, &err, "Pid:", 4, "\t-1\n"); + verify_fdinfo(a.fd, &err, "NSpid:", 6, "\t-1\n"); + child_close(&a); error_report(&err, test_name); }
int main(int argc, char **argv) { ksft_print_header(); - ksft_set_plan(1); + ksft_set_plan(2);
test_pidfd_fdinfo_nspid(); + test_pidfd_dead_fdinfo();
return ksft_exit_pass(); }
Replace hlist_empty() with the new pid_has_task() helper which is more idiomatic, easier to grep for, and unifies how callers perform this check.
Signed-off-by: Christian Brauner christian.brauner@ubuntu.com Reviewed-by: Oleg Nesterov oleg@redhat.com --- /* pidfd selftests */ passed
/* v1 */ patch not present
/* v2 */ Link: https://lore.kernel.org/r/20191016153606.2326-3-christian.brauner@ubuntu.com patch introduced
/* v2 */ - Oleg Nesterov oleg@redhat.com: - s/task_alive/pid_has_task/g --- kernel/pid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/pid.c b/kernel/pid.c index 0a9f2e437217..124d40b239b1 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -299,7 +299,7 @@ static void __change_pid(struct task_struct *task, enum pid_type type, *pid_ptr = new;
for (tmp = PIDTYPE_MAX; --tmp >= 0; ) - if (!hlist_empty(&pid->tasks[tmp])) + if (pid_has_task(pid, tmp)) return;
free_pid(pid);
Replace hlist_empty() with the new pid_has_task() helper which is more idiomatic, easier to grep for, and unifies how callers perform this check.
Signed-off-by: Christian Brauner christian.brauner@ubuntu.com Reviewed-by: Oleg Nesterov oleg@redhat.com --- /* pidfd selftests */ passed
/* v1 */ patch not present
/* v2 */ Link: https://lore.kernel.org/r/20191016153606.2326-4-christian.brauner@ubuntu.com patch introduced
/* v3 */ - Oleg Nesterov oleg@redhat.com: - s/task_alive/pid_has_task/ --- kernel/exit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/exit.c b/kernel/exit.c index a46a50d67002..f2d20ab74422 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1457,7 +1457,7 @@ static long do_wait(struct wait_opts *wo) */ wo->notask_error = -ECHILD; if ((wo->wo_type < PIDTYPE_MAX) && - (!wo->wo_pid || hlist_empty(&wo->wo_pid->tasks[wo->wo_type]))) + (!wo->wo_pid || !pid_has_task(wo->wo_pid, wo->wo_type))) goto notask;
set_current_state(TASK_INTERRUPTIBLE);
Use the new pid_has_task() helper in pidfd_open(). This simplifies the code and avoids taking rcu_read_{lock,unlock}() and leads to overall nicer code.
Signed-off-by: Christian Brauner christian.brauner@ubuntu.com Reviewed-by: Oleg Nesterov oleg@redhat.com --- /* pidfd selftests */ passed
/* v1 */ patch not present
/* v2 */ Link: https://lore.kernel.org/r/20191016153606.2326-5-christian.brauner@ubuntu.com patch introduced
/* v3 */ - Oleg Nesterov oleg@redhat.com: - s/task_alive/pid_has_task/ --- kernel/pid.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/kernel/pid.c b/kernel/pid.c index 124d40b239b1..7b5f6c963d72 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -497,7 +497,7 @@ static int pidfd_create(struct pid *pid) */ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags) { - int fd, ret; + int fd; struct pid *p;
if (flags) @@ -510,13 +510,11 @@ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags) if (!p) return -ESRCH;
- ret = 0; - rcu_read_lock(); - if (!pid_task(p, PIDTYPE_TGID)) - ret = -EINVAL; - rcu_read_unlock(); + if (pid_has_task(p, PIDTYPE_TGID)) + fd = pidfd_create(p); + else + fd = -EINVAL;
- fd = ret ?: pidfd_create(p); put_pid(p); return fd; }
On Thu, Oct 17, 2019 at 12:18:28PM +0200, Christian Brauner wrote:
Currently, when a task is dead we still print the pid it used to use in the fdinfo files of its pidfds. This doesn't make much sense since the pid may have already been reused. So verify that the task is still alive by introducing the pid_has_task() helper which will be used by other callers in follow-up patches. If the task is not alive anymore, we will print -1. This allows us to differentiate between a task not being present in a given pid namespace
- in which case we already print 0 - and a task having been reaped.
Note that this uses PIDTYPE_PID for the check. Technically, we could've checked PIDTYPE_TGID since pidfds currently only refer to thread-group leaders but if they won't anymore in the future then this check becomes problematic without it being immediately obvious to non-experts imho. If a thread is created via clone(CLONE_THREAD) than struct pid has a single non-empty list pid->tasks[PIDTYPE_PID] and this pid can't be used as a PIDTYPE_TGID meaning pid->tasks[PIDTYPE_TGID] will return NULL even though the thread-group leader might still be very much alive. So checking PIDTYPE_PID is fine and is easier to maintain should we ever allow pidfds to refer to threads.
Cc: Jann Horn jannh@google.com Cc: Christian Kellner christian@kellner.me Cc: linux-api@vger.kernel.org Signed-off-by: Christian Brauner christian.brauner@ubuntu.com Reviewed-by: Oleg Nesterov oleg@redhat.com
Applied this series to: https://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux.git/log/?h=pid...
Thanks! Christian
linux-kselftest-mirror@lists.linaro.org