Michal Koutný mkoutny@suse.com wrote:
The check is currently against the current->cred but since those are going to change and we want to check RLIMIT_NPROC condition after the switch, supply the capability check with the new cred. But since we're checking new_user being INIT_USER any new cred's capability-based allowance may be redundant when the check fails and the alternative solution would be revert of the commit 2863643fb8b9 ("set_user: add capability check when rlimit(RLIMIT_NPROC) exceeds")
As of commit 2863643fb8b9 ("set_user: add capability check when rlimit(RLIMIT_NPROC) exceeds") setting the flag to see if execve should check RLIMIT_NPROC is buggy, as it tests the capabilites from before the credential change and not aftwards.
As of commit 21d1c5e386bc ("Reimplement RLIMIT_NPROC on top of ucounts") examining the rlimit is buggy as cred->ucounts has not yet been properly set in the new credential.
Make the code correct and more robust moving the test to see if execve() needs to test RLIMIT_NPROC into commit_creds, and defer all of the rest of the logic into execve() itself.
As the flag only indicateds that RLIMIT_NPROC should be checked in execve rename it from PF_NPROC_EXCEEDED to PF_NPROC_CHECK.
Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20220207121800.5079-2-mkoutny@suse.com Link: https://lkml.kernel.org/r/20220207121800.5079-3-mkoutny@suse.com Reported-by: Michal Koutný mkoutny@suse.com Fixes: 2863643fb8b9 ("set_user: add capability check when rlimit(RLIMIT_NPROC) exceeds") Fixes: 21d1c5e386bc ("Reimplement RLIMIT_NPROC on top of ucounts") Signed-off-by: "Eric W. Biederman" ebiederm@xmission.com --- fs/exec.c | 15 ++++++++------- include/linux/sched.h | 2 +- kernel/cred.c | 13 +++++++++---- kernel/fork.c | 2 +- kernel/sys.c | 14 -------------- 5 files changed, 19 insertions(+), 27 deletions(-)
diff --git a/fs/exec.c b/fs/exec.c index 79f2c9483302..1e7f757cbc2c 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1875,20 +1875,21 @@ static int do_execveat_common(int fd, struct filename *filename, return PTR_ERR(filename);
/* - * We move the actual failure in case of RLIMIT_NPROC excess from - * set*uid() to execve() because too many poorly written programs - * don't check setuid() return code. Here we additionally recheck - * whether NPROC limit is still exceeded. + * After calling set*uid() is RLIMT_NPROC exceeded? + * This can not be checked in set*uid() because too many programs don't + * check the setuid() return code. */ - if ((current->flags & PF_NPROC_EXCEEDED) && - is_ucounts_overlimit(current_ucounts(), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) { + if ((current->flags & PF_NPROC_CHECK) && + is_ucounts_overlimit(current_ucounts(), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC)) && + (current_user() != INIT_USER) && + !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) { retval = -EAGAIN; goto out_ret; }
/* We're below the limit (still or again), so we don't want to make * further execve() calls fail. */ - current->flags &= ~PF_NPROC_EXCEEDED; + current->flags &= ~PF_NPROC_CHECK;
bprm = alloc_bprm(fd, filename); if (IS_ERR(bprm)) { diff --git a/include/linux/sched.h b/include/linux/sched.h index 75ba8aa60248..6605a262a6be 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1678,7 +1678,7 @@ extern struct pid *cad_pid; #define PF_DUMPCORE 0x00000200 /* Dumped core */ #define PF_SIGNALED 0x00000400 /* Killed by a signal */ #define PF_MEMALLOC 0x00000800 /* Allocating memory */ -#define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */ +#define PF_NPROC_CHECK 0x00001000 /* Check in execve if RLIMIT_NPROC was exceeded */ #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ #define PF_FROZEN 0x00010000 /* Frozen for system suspend */ diff --git a/kernel/cred.c b/kernel/cred.c index 933155c96922..229cff081167 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -490,13 +490,18 @@ int commit_creds(struct cred *new) if (!gid_eq(new->fsgid, old->fsgid)) key_fsgid_changed(new);
- /* do it - * RLIMIT_NPROC limits on user->processes have already been checked - * in set_user(). + /* + * Remember if the NPROC limit may be exceeded. The set*uid() functions + * can not fail if the NPROC limit is exceeded as too many programs + * don't check the return code. Instead enforce the NPROC limit for + * programs doing set*uid()+execve by harmlessly defering the failure + * to the execve() stage. */ alter_cred_subscribers(new, 2); - if (new->user != old->user || new->user_ns != old->user_ns) + if (new->user != old->user || new->user_ns != old->user_ns) { inc_rlimit_ucounts(new->ucounts, UCOUNT_RLIMIT_NPROC, 1); + task->flags |= PF_NPROC_CHECK; + } rcu_assign_pointer(task->real_cred, new); rcu_assign_pointer(task->cred, new); if (new->user != old->user || new->user_ns != old->user_ns) diff --git a/kernel/fork.c b/kernel/fork.c index 17d8a8c85e3b..2b6a28a86325 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2031,7 +2031,7 @@ static __latent_entropy struct task_struct *copy_process( !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) goto bad_fork_cleanup_count; } - current->flags &= ~PF_NPROC_EXCEEDED; + current->flags &= ~PF_NPROC_CHECK;
/* * If multiple threads are within copy_process(), then this check diff --git a/kernel/sys.c b/kernel/sys.c index ecc4cf019242..b1ed21d79f3b 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -472,20 +472,6 @@ static int set_user(struct cred *new) if (!new_user) return -EAGAIN;
- /* - * We don't fail in case of NPROC limit excess here because too many - * poorly written programs don't check set*uid() return code, assuming - * it never fails if called by root. We may still enforce NPROC limit - * for programs doing set*uid()+execve() by harmlessly deferring the - * failure to the execve() stage. - */ - if (is_ucounts_overlimit(new->ucounts, UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC)) && - new_user != INIT_USER && - !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) - current->flags |= PF_NPROC_EXCEEDED; - else - current->flags &= ~PF_NPROC_EXCEEDED; - free_uid(new->user); new->user = new_user; return 0;