 
            Hello!
v5: - merge ioctl fixes into Sargun's patches directly - adjust new API to avoid "ufd_required" argument - drop general clean up patches now present in for-next/seccomp v4: https://lore.kernel.org/lkml/20200616032524.460144-1-keescook@chromium.org/
This continues the thread-merge between [1] and [2]. tl;dr: add a way for a seccomp user_notif process manager to inject files into the managed process in order to handle emulation of various fd-returning syscalls across security boundaries. Containers folks and Chrome are in need of the feature, and investigating this solution uncovered (and fixed) implementation issues with existing file sending routines.
I intend to carry this in the seccomp tree, unless someone has objections. :) Please review and test!
-Kees
[1] https://lore.kernel.org/lkml/20200603011044.7972-1-sargun@sargun.me/ [2] https://lore.kernel.org/lkml/20200610045214.1175600-1-keescook@chromium.org/
Kees Cook (5): net/scm: Regularize compat handling of scm_detach_fds() fs: Move __scm_install_fd() to __fd_install_received() fs: Add fd_install_received() wrapper for __fd_install_received() pidfd: Replace open-coded partial fd_install_received() fs: Expand __fd_install_received() to accept fd
Sargun Dhillon (2): seccomp: Introduce addfd ioctl to seccomp user notifier selftests/seccomp: Test SECCOMP_IOCTL_NOTIF_ADDFD
fs/file.c | 63 +++++ include/linux/file.h | 19 ++ include/uapi/linux/seccomp.h | 22 ++ kernel/pid.c | 11 +- kernel/seccomp.c | 172 ++++++++++++- net/compat.c | 55 ++--- net/core/scm.c | 50 +--- tools/testing/selftests/seccomp/seccomp_bpf.c | 229 ++++++++++++++++++ 8 files changed, 540 insertions(+), 81 deletions(-)
 
            Duplicate the cleanups from commit 2618d530dd8b ("net/scm: cleanup scm_detach_fds") into the compat code.
Move the check added in commit 1f466e1f15cf ("net: cleanly handle kernel vs user buffers for ->msg_control") to before the compat call, even though it should be impossible for an in-kernel call to also be compat.
Correct the int "flags" argument to unsigned int to match fd_install() and similar APIs.
Regularize any remaining differences, including a whitespace issue, a checkpatch warning, and add the check from commit 6900317f5eff ("net, scm: fix PaX detected msg_controllen overflow in scm_detach_fds") which fixed an overflow unique to 64-bit. To avoid confusion when comparing the compat handler to the native handler, just include the same check in the compat handler.
Fixes: 48a87cc26c13 ("net: netprio: fd passed in SCM_RIGHTS datagram not set correctly") Fixes: d84295067fc7 ("net: net_cls: fd passed in SCM_RIGHTS datagram not set correctly") Signed-off-by: Kees Cook keescook@chromium.org --- include/net/scm.h | 1 + net/compat.c | 55 +++++++++++++++++++++-------------------------- net/core/scm.c | 18 ++++++++-------- 3 files changed, 35 insertions(+), 39 deletions(-)
diff --git a/include/net/scm.h b/include/net/scm.h index 1ce365f4c256..581a94d6c613 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -37,6 +37,7 @@ struct scm_cookie { #endif };
+int __scm_install_fd(struct file *file, int __user *ufd, unsigned int o_flags); void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm); void scm_detach_fds_compat(struct msghdr *msg, struct scm_cookie *scm); int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm); diff --git a/net/compat.c b/net/compat.c index 5e3041a2c37d..27d477fdcaa0 100644 --- a/net/compat.c +++ b/net/compat.c @@ -281,39 +281,31 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat return 0; }
-void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm) +static int scm_max_fds_compat(struct msghdr *msg) { - struct compat_cmsghdr __user *cm = (struct compat_cmsghdr __user *) kmsg->msg_control; - int fdmax = (kmsg->msg_controllen - sizeof(struct compat_cmsghdr)) / sizeof(int); - int fdnum = scm->fp->count; - struct file **fp = scm->fp->fp; - int __user *cmfptr; - int err = 0, i; + if (msg->msg_controllen <= sizeof(struct compat_cmsghdr)) + return 0; + return (msg->msg_controllen - sizeof(struct compat_cmsghdr)) / sizeof(int); +}
- if (fdnum < fdmax) - fdmax = fdnum; +void scm_detach_fds_compat(struct msghdr *msg, struct scm_cookie *scm) +{ + struct compat_cmsghdr __user *cm = + (struct compat_cmsghdr __user *)msg->msg_control; + unsigned int o_flags = (msg->msg_flags & MSG_CMSG_CLOEXEC) ? O_CLOEXEC : 0; + int fdmax = min_t(int, scm_max_fds_compat(msg), scm->fp->count); + int __user *cmsg_data = CMSG_USER_DATA(cm); + int err = 0, i;
- for (i = 0, cmfptr = (int __user *) CMSG_COMPAT_DATA(cm); i < fdmax; i++, cmfptr++) { - int new_fd; - err = security_file_receive(fp[i]); + for (i = 0; i < fdmax; i++) { + err = __scm_install_fd(scm->fp->fp[i], cmsg_data + i, o_flags); if (err) break; - err = get_unused_fd_flags(MSG_CMSG_CLOEXEC & kmsg->msg_flags - ? O_CLOEXEC : 0); - if (err < 0) - break; - new_fd = err; - err = put_user(new_fd, cmfptr); - if (err) { - put_unused_fd(new_fd); - break; - } - /* Bump the usage count and install the file. */ - fd_install(new_fd, get_file(fp[i])); }
if (i > 0) { int cmlen = CMSG_COMPAT_LEN(i * sizeof(int)); + err = put_user(SOL_SOCKET, &cm->cmsg_level); if (!err) err = put_user(SCM_RIGHTS, &cm->cmsg_type); @@ -321,16 +313,19 @@ void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm) err = put_user(cmlen, &cm->cmsg_len); if (!err) { cmlen = CMSG_COMPAT_SPACE(i * sizeof(int)); - kmsg->msg_control += cmlen; - kmsg->msg_controllen -= cmlen; + if (msg->msg_controllen < cmlen) + cmlen = msg->msg_controllen; + msg->msg_control += cmlen; + msg->msg_controllen -= cmlen; } } - if (i < fdnum) - kmsg->msg_flags |= MSG_CTRUNC; + + if (i < scm->fp->count || (scm->fp->count && fdmax <= 0)) + msg->msg_flags |= MSG_CTRUNC;
/* - * All of the files that fit in the message have had their - * usage counts incremented, so we just free the list. + * All of the files that fit in the message have had their usage counts + * incremented, so we just free the list. */ __scm_destroy(scm); } diff --git a/net/core/scm.c b/net/core/scm.c index 875df1c2989d..6151678c73ed 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -280,7 +280,7 @@ void put_cmsg_scm_timestamping(struct msghdr *msg, struct scm_timestamping_inter } EXPORT_SYMBOL(put_cmsg_scm_timestamping);
-static int __scm_install_fd(struct file *file, int __user *ufd, int o_flags) +int __scm_install_fd(struct file *file, int __user *ufd, unsigned int o_flags) { struct socket *sock; int new_fd; @@ -319,29 +319,29 @@ static int scm_max_fds(struct msghdr *msg)
void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) { - struct cmsghdr __user *cm - = (__force struct cmsghdr __user*)msg->msg_control; - int o_flags = (msg->msg_flags & MSG_CMSG_CLOEXEC) ? O_CLOEXEC : 0; + struct cmsghdr __user *cm = + (__force struct cmsghdr __user *)msg->msg_control; + unsigned int o_flags = (msg->msg_flags & MSG_CMSG_CLOEXEC) ? O_CLOEXEC : 0; int fdmax = min_t(int, scm_max_fds(msg), scm->fp->count); int __user *cmsg_data = CMSG_USER_DATA(cm); int err = 0, i;
+ /* no use for FD passing from kernel space callers */ + if (WARN_ON_ONCE(!msg->msg_control_is_user)) + return; + if (msg->msg_flags & MSG_CMSG_COMPAT) { scm_detach_fds_compat(msg, scm); return; }
- /* no use for FD passing from kernel space callers */ - if (WARN_ON_ONCE(!msg->msg_control_is_user)) - return; - for (i = 0; i < fdmax; i++) { err = __scm_install_fd(scm->fp->fp[i], cmsg_data + i, o_flags); if (err) break; }
- if (i > 0) { + if (i > 0) { int cmlen = CMSG_LEN(i * sizeof(int));
err = put_user(SOL_SOCKET, &cm->cmsg_level);
 
            In preparation for users of the "install a received file" logic outside of net/ (pidfd and seccomp), relocate and rename __scm_install_fd() from net/core/scm.c to __fd_install_received() in fs/file.c, and provide a wrapper named fd_install_received_user(), as future patches will change the interface to __fd_install_received().
Signed-off-by: Kees Cook keescook@chromium.org --- fs/file.c | 45 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/file.h | 8 ++++++++ include/net/scm.h | 1 - net/compat.c | 2 +- net/core/scm.c | 32 +------------------------------ 5 files changed, 55 insertions(+), 33 deletions(-)
diff --git a/fs/file.c b/fs/file.c index abb8b7081d7a..f2167d6feec6 100644 --- a/fs/file.c +++ b/fs/file.c @@ -11,6 +11,7 @@ #include <linux/export.h> #include <linux/fs.h> #include <linux/mm.h> +#include <linux/net.h> #include <linux/sched/signal.h> #include <linux/slab.h> #include <linux/file.h> @@ -18,6 +19,8 @@ #include <linux/bitops.h> #include <linux/spinlock.h> #include <linux/rcupdate.h> +#include <net/cls_cgroup.h> +#include <net/netprio_cgroup.h>
unsigned int sysctl_nr_open __read_mostly = 1024*1024; unsigned int sysctl_nr_open_min = BITS_PER_LONG; @@ -931,6 +934,48 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags) return err; }
+/** + * __fd_install_received() - Install received file into file descriptor table + * + * @file: struct file that was received from another process + * @ufd: __user pointer to write new fd number to + * @o_flags: the O_* flags to apply to the new fd entry + * + * Installs a received file into the file descriptor table, with appropriate + * checks and count updates. Writes the fd number to userspace. + * + * Returns -ve on error. + */ +int __fd_install_received(struct file *file, int __user *ufd, unsigned int o_flags) +{ + struct socket *sock; + int new_fd; + int error; + + error = security_file_receive(file); + if (error) + return error; + + new_fd = get_unused_fd_flags(o_flags); + if (new_fd < 0) + return new_fd; + + error = put_user(new_fd, ufd); + if (error) { + put_unused_fd(new_fd); + return error; + } + + /* Bump the usage count and install the file. */ + sock = sock_from_file(file, &error); + if (sock) { + sock_update_netprioidx(&sock->sk->sk_cgrp_data); + sock_update_classid(&sock->sk->sk_cgrp_data); + } + fd_install(new_fd, get_file(file)); + return 0; +} + static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags) { int err = -EBADF; diff --git a/include/linux/file.h b/include/linux/file.h index 122f80084a3e..fe18a1a0d555 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -91,6 +91,14 @@ extern void put_unused_fd(unsigned int fd);
extern void fd_install(unsigned int fd, struct file *file);
+extern int __fd_install_received(struct file *file, int __user *ufd, + unsigned int o_flags); +static inline int fd_install_received_user(struct file *file, int __user *ufd, + unsigned int o_flags) +{ + return __fd_install_received(file, ufd, o_flags); +} + extern void flush_delayed_fput(void); extern void __fput_sync(struct file *);
diff --git a/include/net/scm.h b/include/net/scm.h index 581a94d6c613..1ce365f4c256 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -37,7 +37,6 @@ struct scm_cookie { #endif };
-int __scm_install_fd(struct file *file, int __user *ufd, unsigned int o_flags); void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm); void scm_detach_fds_compat(struct msghdr *msg, struct scm_cookie *scm); int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm); diff --git a/net/compat.c b/net/compat.c index 27d477fdcaa0..94f288e8dac5 100644 --- a/net/compat.c +++ b/net/compat.c @@ -298,7 +298,7 @@ void scm_detach_fds_compat(struct msghdr *msg, struct scm_cookie *scm) int err = 0, i;
for (i = 0; i < fdmax; i++) { - err = __scm_install_fd(scm->fp->fp[i], cmsg_data + i, o_flags); + err = fd_install_received_user(scm->fp->fp[i], cmsg_data + i, o_flags); if (err) break; } diff --git a/net/core/scm.c b/net/core/scm.c index 6151678c73ed..df190f1fdd28 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -280,36 +280,6 @@ void put_cmsg_scm_timestamping(struct msghdr *msg, struct scm_timestamping_inter } EXPORT_SYMBOL(put_cmsg_scm_timestamping);
-int __scm_install_fd(struct file *file, int __user *ufd, unsigned int o_flags) -{ - struct socket *sock; - int new_fd; - int error; - - error = security_file_receive(file); - if (error) - return error; - - new_fd = get_unused_fd_flags(o_flags); - if (new_fd < 0) - return new_fd; - - error = put_user(new_fd, ufd); - if (error) { - put_unused_fd(new_fd); - return error; - } - - /* Bump the usage count and install the file. */ - sock = sock_from_file(file, &error); - if (sock) { - sock_update_netprioidx(&sock->sk->sk_cgrp_data); - sock_update_classid(&sock->sk->sk_cgrp_data); - } - fd_install(new_fd, get_file(file)); - return 0; -} - static int scm_max_fds(struct msghdr *msg) { if (msg->msg_controllen <= sizeof(struct cmsghdr)) @@ -336,7 +306,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) }
for (i = 0; i < fdmax; i++) { - err = __scm_install_fd(scm->fp->fp[i], cmsg_data + i, o_flags); + err = fd_install_received_user(scm->fp->fp[i], cmsg_data + i, o_flags); if (err) break; }
 
            For both pidfd and seccomp, the __user pointer is not used. Update __fd_install_received() to make writing to ufd optional via a NULL check. However, for the fd_install_received_user() wrapper, ufd is NULL checked so an -EFAULT can be returned to avoid changing the SCM_RIGHTS interface behavior. Add new wrapper fd_install_received() for pidfd and seccomp that does not use the ufd argument. For the new helper, the new fd needs to be returned on success. Update the existing callers to handle it.
Signed-off-by: Kees Cook keescook@chromium.org --- fs/file.c | 22 ++++++++++++++-------- include/linux/file.h | 7 +++++++ net/compat.c | 2 +- net/core/scm.c | 2 +- 4 files changed, 23 insertions(+), 10 deletions(-)
diff --git a/fs/file.c b/fs/file.c index f2167d6feec6..de85a42defe2 100644 --- a/fs/file.c +++ b/fs/file.c @@ -942,9 +942,10 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags) * @o_flags: the O_* flags to apply to the new fd entry * * Installs a received file into the file descriptor table, with appropriate - * checks and count updates. Writes the fd number to userspace. + * checks and count updates. Optionally writes the fd number to userspace, if + * @ufd is non-NULL. * - * Returns -ve on error. + * Returns newly install fd or -ve on error. */ int __fd_install_received(struct file *file, int __user *ufd, unsigned int o_flags) { @@ -960,20 +961,25 @@ int __fd_install_received(struct file *file, int __user *ufd, unsigned int o_fla if (new_fd < 0) return new_fd;
- error = put_user(new_fd, ufd); - if (error) { - put_unused_fd(new_fd); - return error; + if (ufd) { + error = put_user(new_fd, ufd); + if (error) { + put_unused_fd(new_fd); + return error; + } }
- /* Bump the usage count and install the file. */ + /* Bump the usage count and install the file. The resulting value of + * "error" is ignored here since we only need to take action when + * the file is a socket and testing "sock" for NULL is sufficient. + */ sock = sock_from_file(file, &error); if (sock) { sock_update_netprioidx(&sock->sk->sk_cgrp_data); sock_update_classid(&sock->sk->sk_cgrp_data); } fd_install(new_fd, get_file(file)); - return 0; + return new_fd; }
static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags) diff --git a/include/linux/file.h b/include/linux/file.h index fe18a1a0d555..e19974ed9322 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -9,6 +9,7 @@ #include <linux/compiler.h> #include <linux/types.h> #include <linux/posix_types.h> +#include <linux/errno.h>
struct file;
@@ -96,8 +97,14 @@ extern int __fd_install_received(struct file *file, int __user *ufd, static inline int fd_install_received_user(struct file *file, int __user *ufd, unsigned int o_flags) { + if (ufd == NULL) + return -EFAULT; return __fd_install_received(file, ufd, o_flags); } +static inline int fd_install_received(struct file *file, unsigned int o_flags) +{ + return __fd_install_received(file, NULL, o_flags); +}
extern void flush_delayed_fput(void); extern void __fput_sync(struct file *); diff --git a/net/compat.c b/net/compat.c index 94f288e8dac5..71494337cca7 100644 --- a/net/compat.c +++ b/net/compat.c @@ -299,7 +299,7 @@ void scm_detach_fds_compat(struct msghdr *msg, struct scm_cookie *scm)
for (i = 0; i < fdmax; i++) { err = fd_install_received_user(scm->fp->fp[i], cmsg_data + i, o_flags); - if (err) + if (err < 0) break; }
diff --git a/net/core/scm.c b/net/core/scm.c index df190f1fdd28..b9a0442ebd26 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -307,7 +307,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
for (i = 0; i < fdmax; i++) { err = fd_install_received_user(scm->fp->fp[i], cmsg_data + i, o_flags); - if (err) + if (err < 0) break; }
 
            On Wed, Jun 17, 2020 at 03:03:23PM -0700, Kees Cook wrote:
For both pidfd and seccomp, the __user pointer is not used. Update __fd_install_received() to make writing to ufd optional via a NULL check. However, for the fd_install_received_user() wrapper, ufd is NULL checked so an -EFAULT can be returned to avoid changing the SCM_RIGHTS interface behavior. Add new wrapper fd_install_received() for pidfd and seccomp that does not use the ufd argument. For the new helper, the new fd needs to be returned on success. Update the existing callers to handle it.
Signed-off-by: Kees Cook keescook@chromium.org
fs/file.c | 22 ++++++++++++++-------- include/linux/file.h | 7 +++++++ net/compat.c | 2 +- net/core/scm.c | 2 +- 4 files changed, 23 insertions(+), 10 deletions(-)
diff --git a/fs/file.c b/fs/file.c index f2167d6feec6..de85a42defe2 100644 --- a/fs/file.c +++ b/fs/file.c @@ -942,9 +942,10 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags)
- @o_flags: the O_* flags to apply to the new fd entry
- Installs a received file into the file descriptor table, with appropriate
- checks and count updates. Writes the fd number to userspace.
- checks and count updates. Optionally writes the fd number to userspace, if
- @ufd is non-NULL.
- Returns -ve on error.
*/
- Returns newly install fd or -ve on error.
int __fd_install_received(struct file *file, int __user *ufd, unsigned int o_flags) { @@ -960,20 +961,25 @@ int __fd_install_received(struct file *file, int __user *ufd, unsigned int o_fla if (new_fd < 0) return new_fd;
- error = put_user(new_fd, ufd);
- if (error) {
put_unused_fd(new_fd);
return error;
- if (ufd) {
error = put_user(new_fd, ufd);
if (error) {
put_unused_fd(new_fd);
return error;}
}
- /* Bump the usage count and install the file. */
- /* Bump the usage count and install the file. The resulting value of
* "error" is ignored here since we only need to take action when
* the file is a socket and testing "sock" for NULL is sufficient.sock = sock_from_file(file, &error); if (sock) { sock_update_netprioidx(&sock->sk->sk_cgrp_data); sock_update_classid(&sock->sk->sk_cgrp_data); } fd_install(new_fd, get_file(file));
*/
- return 0;
- return new_fd;
} static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags) diff --git a/include/linux/file.h b/include/linux/file.h index fe18a1a0d555..e19974ed9322 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -9,6 +9,7 @@ #include <linux/compiler.h> #include <linux/types.h> #include <linux/posix_types.h> +#include <linux/errno.h> struct file; @@ -96,8 +97,14 @@ extern int __fd_install_received(struct file *file, int __user *ufd, static inline int fd_install_received_user(struct file *file, int __user *ufd, unsigned int o_flags) {
- if (ufd == NULL)
return -EFAULT;
Isn't this *technically* a behvaiour change? Nonetheless, I think this is a much better approach than forcing everyone to do null checking, and avoids at least one error case where the kernel installs FDs for SCM_RIGHTS, and they're not actualy usable.
return __fd_install_received(file, ufd, o_flags); } +static inline int fd_install_received(struct file *file, unsigned int o_flags) +{
- return __fd_install_received(file, NULL, o_flags);
+} extern void flush_delayed_fput(void); extern void __fput_sync(struct file *); diff --git a/net/compat.c b/net/compat.c index 94f288e8dac5..71494337cca7 100644 --- a/net/compat.c +++ b/net/compat.c @@ -299,7 +299,7 @@ void scm_detach_fds_compat(struct msghdr *msg, struct scm_cookie *scm) for (i = 0; i < fdmax; i++) { err = fd_install_received_user(scm->fp->fp[i], cmsg_data + i, o_flags);
if (err)
}
if (err < 0) break;diff --git a/net/core/scm.c b/net/core/scm.c index df190f1fdd28..b9a0442ebd26 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -307,7 +307,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) for (i = 0; i < fdmax; i++) { err = fd_install_received_user(scm->fp->fp[i], cmsg_data + i, o_flags);
if (err)
}
if (err < 0) break;2.25.1
Reviewed-by: Sargun Dhillon sargun@sargun.me
 
            On Thu, Jun 18, 2020 at 05:49:19AM +0000, Sargun Dhillon wrote:
On Wed, Jun 17, 2020 at 03:03:23PM -0700, Kees Cook wrote:
[...] static inline int fd_install_received_user(struct file *file, int __user *ufd, unsigned int o_flags) {
- if (ufd == NULL)
return -EFAULT;Isn't this *technically* a behvaiour change? Nonetheless, I think this is a much better approach than forcing everyone to do null checking, and avoids at least one error case where the kernel installs FDs for SCM_RIGHTS, and they're not actualy usable.
So, the only behavior change I see is that the order of sanity checks is changed.
The loop in scm_detach_fds() is:
for (i = 0; i < fdmax; i++) { err = __scm_install_fd(scm->fp->fp[i], cmsg_data + i, o_flags); if (err < 0) break; }
Before, __scm_install_fd() does:
error = security_file_receive(file); if (error) return error;
new_fd = get_unused_fd_flags(o_flags); if (new_fd < 0) return new_fd;
error = put_user(new_fd, ufd); if (error) { put_unused_fd(new_fd); return error; } ...
After, fd_install_received_user() and __fd_install_received() does:
if (ufd == NULL) return -EFAULT; ... error = security_file_receive(file); if (error) return error; ... new_fd = get_unused_fd_flags(o_flags); if (new_fd < 0) return new_fd; ... error = put_user(new_fd, ufd); if (error) { put_unused_fd(new_fd); return error; }
i.e. if a caller attempts a receive that is rejected by LSM *and* includes a NULL userpointer destination, they will get an EFAULT now instead of an EPERM.
I struggle to imagine a situation where this could possible matter (both fail, neither installs files). It is only the error code that is different. I am comfortable making this change and seeing if anyone screams. If they do, I can restore the v4 "ufd_required" way of doing it.
Reviewed-by: Sargun Dhillon sargun@sargun.me
Thanks!
 
            From: Kees Cook
Sent: 18 June 2020 21:13 On Thu, Jun 18, 2020 at 05:49:19AM +0000, Sargun Dhillon wrote:
On Wed, Jun 17, 2020 at 03:03:23PM -0700, Kees Cook wrote:
[...] static inline int fd_install_received_user(struct file *file, int __user *ufd, unsigned int o_flags) {
- if (ufd == NULL)
return -EFAULT;Isn't this *technically* a behvaiour change? Nonetheless, I think this is a much better approach than forcing everyone to do null checking, and avoids at least one error case where the kernel installs FDs for SCM_RIGHTS, and they're not actualy usable.
So, the only behavior change I see is that the order of sanity checks is changed.
The loop in scm_detach_fds() is:
for (i = 0; i < fdmax; i++) { err = __scm_install_fd(scm->fp->fp[i], cmsg_data + i, o_flags); if (err < 0) break; }Before, __scm_install_fd() does:
error = security_file_receive(file); if (error) return error; new_fd = get_unused_fd_flags(o_flags); if (new_fd < 0) return new_fd; error = put_user(new_fd, ufd); if (error) { put_unused_fd(new_fd); return error; }...
After, fd_install_received_user() and __fd_install_received() does:
if (ufd == NULL) return -EFAULT;... error = security_file_receive(file); if (error) return error; ... new_fd = get_unused_fd_flags(o_flags); if (new_fd < 0) return new_fd; ... error = put_user(new_fd, ufd); if (error) { put_unused_fd(new_fd); return error; }
i.e. if a caller attempts a receive that is rejected by LSM *and* includes a NULL userpointer destination, they will get an EFAULT now instead of an EPERM.
The 'user' pointer the fd is written to is in the middle of the 'cmsg' buffer. So to hit 'ufd == NULL' the program would have to pass a small negative integer!
The error paths are strange if there are multiple fd in the message. A quick look at the old code seems to imply that if the user doesn't supply a big enough buffer then the extra 'file *' just get closed. OTOH if there is an error processing one of the files the request fails with the earlier file allocated fd numbers.
In addition most of the userspace buffer is written after the loop - any errors there return -EFAULT (SIGSEGV) without even trying to tidy up the allocated fd.
ISTM that the put_user(new_fd, ufd) could be done in __scm_install_fd() after __fd_install_received() returns.
scm_detach_fds() could do the put_user(SOL_SOCKET,...) before actually processing the first file - so that the state can be left unchanged when a naff buffer is passed.
David
- Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK Registration No: 1397386 (Wales)
 
            The sock counting (sock_update_netprioidx() and sock_update_classid()) was missing from pidfd's implementation of received fd installation. Replace the open-coded version with a call to the new fd_install_received() helper.
Fixes: 8649c322f75c ("pid: Implement pidfd_getfd syscall") Signed-off-by: Kees Cook keescook@chromium.org --- kernel/pid.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-)
diff --git a/kernel/pid.c b/kernel/pid.c index f1496b757162..24924ec5df0e 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -635,18 +635,9 @@ static int pidfd_getfd(struct pid *pid, int fd) if (IS_ERR(file)) return PTR_ERR(file);
- ret = security_file_receive(file); - if (ret) { - fput(file); - return ret; - } - - ret = get_unused_fd_flags(O_CLOEXEC); + ret = fd_install_received(file, O_CLOEXEC); if (ret < 0) fput(file); - else - fd_install(ret, file); - return ret; }
 
            On Wed, Jun 17, 2020 at 03:03:24PM -0700, Kees Cook wrote:
The sock counting (sock_update_netprioidx() and sock_update_classid()) was missing from pidfd's implementation of received fd installation. Replace the open-coded version with a call to the new fd_install_received() helper.
Fixes: 8649c322f75c ("pid: Implement pidfd_getfd syscall") Signed-off-by: Kees Cook keescook@chromium.org
kernel/pid.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-)
diff --git a/kernel/pid.c b/kernel/pid.c index f1496b757162..24924ec5df0e 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -635,18 +635,9 @@ static int pidfd_getfd(struct pid *pid, int fd) if (IS_ERR(file)) return PTR_ERR(file);
- ret = security_file_receive(file);
- if (ret) {
fput(file);
return ret;- }
- ret = get_unused_fd_flags(O_CLOEXEC);
- ret = fd_install_received(file, O_CLOEXEC); if (ret < 0) fput(file);
- else
fd_install(ret, file);
So someone just sent a fix for pidfd_getfd() that was based on the changes done here.
I've been on vacation so didn't have a change to review this series and I see it's already in linux-next. This introduces a memory leak and actually proves a point I tried to stress when adding this helper: fd_install_received() in contrast to fd_install() does _not_ consume a reference because it takes one before it calls into fd_install(). That means, you need an unconditional fput() here both in the failure and error path. I strongly suggest though that we simply align the behavior between fd_install() and fd_install_received() and have the latter simply consume a reference when it succeeds! Imho, this bug proves that I was right to insist on this before. ;)
Thanks! Christian
 
            On Mon, Jul 06, 2020 at 03:07:13PM +0200, Christian Brauner wrote:
On Wed, Jun 17, 2020 at 03:03:24PM -0700, Kees Cook wrote:
The sock counting (sock_update_netprioidx() and sock_update_classid()) was missing from pidfd's implementation of received fd installation. Replace the open-coded version with a call to the new fd_install_received() helper.
Fixes: 8649c322f75c ("pid: Implement pidfd_getfd syscall") Signed-off-by: Kees Cook keescook@chromium.org
kernel/pid.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-)
diff --git a/kernel/pid.c b/kernel/pid.c index f1496b757162..24924ec5df0e 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -635,18 +635,9 @@ static int pidfd_getfd(struct pid *pid, int fd) if (IS_ERR(file)) return PTR_ERR(file);
- ret = security_file_receive(file);
- if (ret) {
fput(file);
return ret;- }
- ret = get_unused_fd_flags(O_CLOEXEC);
- ret = fd_install_received(file, O_CLOEXEC); if (ret < 0) fput(file);
- else
fd_install(ret, file);So someone just sent a fix for pidfd_getfd() that was based on the changes done here.
Hi! Ah yes, that didn't get CCed to me. I'll go reply.
I've been on vacation so didn't have a change to review this series and I see it's already in linux-next. This introduces a memory leak and actually proves a point I tried to stress when adding this helper: fd_install_received() in contrast to fd_install() does _not_ consume a reference because it takes one before it calls into fd_install(). That means, you need an unconditional fput() here both in the failure and error path.
Yup, this was a mistake in my refactoring of the pidfs changes.
I strongly suggest though that we simply align the behavior between fd_install() and fd_install_received() and have the latter simply consume a reference when it succeeds! Imho, this bug proves that I was right to insist on this before. ;)
I still don't agree: it radically complicates the SCM_RIGHTS and seccomp cases. The primary difference is that fd_install() cannot fail, and it was optimized for this situation. The other file-related helpers that can fail do not consume the reference, so this is in keeping with those as well.
 
            On Mon, Jul 06, 2020 at 08:34:06AM -0700, Kees Cook wrote:
On Mon, Jul 06, 2020 at 03:07:13PM +0200, Christian Brauner wrote:
On Wed, Jun 17, 2020 at 03:03:24PM -0700, Kees Cook wrote:
The sock counting (sock_update_netprioidx() and sock_update_classid()) was missing from pidfd's implementation of received fd installation. Replace the open-coded version with a call to the new fd_install_received() helper.
Fixes: 8649c322f75c ("pid: Implement pidfd_getfd syscall") Signed-off-by: Kees Cook keescook@chromium.org
kernel/pid.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-)
diff --git a/kernel/pid.c b/kernel/pid.c index f1496b757162..24924ec5df0e 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -635,18 +635,9 @@ static int pidfd_getfd(struct pid *pid, int fd) if (IS_ERR(file)) return PTR_ERR(file);
- ret = security_file_receive(file);
- if (ret) {
fput(file);
return ret;- }
- ret = get_unused_fd_flags(O_CLOEXEC);
- ret = fd_install_received(file, O_CLOEXEC); if (ret < 0) fput(file);
- else
fd_install(ret, file);So someone just sent a fix for pidfd_getfd() that was based on the changes done here.
Hi! Ah yes, that didn't get CCed to me. I'll go reply.
I've been on vacation so didn't have a change to review this series and I see it's already in linux-next. This introduces a memory leak and actually proves a point I tried to stress when adding this helper: fd_install_received() in contrast to fd_install() does _not_ consume a reference because it takes one before it calls into fd_install(). That means, you need an unconditional fput() here both in the failure and error path.
Yup, this was a mistake in my refactoring of the pidfs changes.
I already did.
I strongly suggest though that we simply align the behavior between fd_install() and fd_install_received() and have the latter simply consume a reference when it succeeds! Imho, this bug proves that I was right to insist on this before. ;)
I still don't agree: it radically complicates the SCM_RIGHTS and seccomp
I'm sorry, I don't buy it yet, though I might've missed something in the discussions: :) After applying the patches in your series this literally is just (which is hardly radical ;):
diff --git a/fs/file.c b/fs/file.c index 9568bcfd1f44..26930b2ea39d 100644 --- a/fs/file.c +++ b/fs/file.c @@ -974,7 +974,7 @@ int __fd_install_received(int fd, struct file *file, int __user *ufd, }
if (fd < 0) - fd_install(new_fd, get_file(file)); + fd_install(new_fd, file); else { new_fd = fd; error = replace_fd(new_fd, file, o_flags); diff --git a/net/compat.c b/net/compat.c index 71494337cca7..605a5a67200c 100644 --- a/net/compat.c +++ b/net/compat.c @@ -298,9 +298,11 @@ void scm_detach_fds_compat(struct msghdr *msg, struct scm_cookie *scm) int err = 0, i;
for (i = 0; i < fdmax; i++) { - err = fd_install_received_user(scm->fp->fp[i], cmsg_data + i, o_flags); - if (err < 0) + err = fd_install_received_user(get_file(scm->fp->fp[i]), cmsg_data + i, o_flags); + if (err < 0) { + fput(scm->fp->fp[i]); break; + } }
if (i > 0) { diff --git a/net/core/scm.c b/net/core/scm.c index b9a0442ebd26..0d06446ae598 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -306,9 +306,11 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) }
for (i = 0; i < fdmax; i++) { - err = fd_install_received_user(scm->fp->fp[i], cmsg_data + i, o_flags); - if (err < 0) + err = fd_install_received_user(get_file(scm->fp->fp[i]), cmsg_data + i, o_flags); + if (err < 0) { + fput(scm->fp->fp[i]); break; + } }
if (i > 0) {
cases. The primary difference is that fd_install() cannot fail, and it was optimized for this situation. The other file-related helpers that can fail do not consume the reference, so this is in keeping with those as well.
That's not a real problem. Any function that can fail and which consumes a reference on success is assumed to not mutate the reference if it fails anywhere. So I don't see that as an issue.
The problem here is that the current patch invites bugs and has already produced one because fd_install() and fd_install_*() have the same naming scheme but different behavior when dealing with references. That's just not a good idea.
Christian
 
            On Mon, Jul 06, 2020 at 06:12:47PM +0200, Christian Brauner wrote:
On Mon, Jul 06, 2020 at 08:34:06AM -0700, Kees Cook wrote:
On Mon, Jul 06, 2020 at 03:07:13PM +0200, Christian Brauner wrote:
On Wed, Jun 17, 2020 at 03:03:24PM -0700, Kees Cook wrote:
The sock counting (sock_update_netprioidx() and sock_update_classid()) was missing from pidfd's implementation of received fd installation. Replace the open-coded version with a call to the new fd_install_received() helper.
Fixes: 8649c322f75c ("pid: Implement pidfd_getfd syscall") Signed-off-by: Kees Cook keescook@chromium.org
kernel/pid.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-)
diff --git a/kernel/pid.c b/kernel/pid.c index f1496b757162..24924ec5df0e 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -635,18 +635,9 @@ static int pidfd_getfd(struct pid *pid, int fd) if (IS_ERR(file)) return PTR_ERR(file);
- ret = security_file_receive(file);
- if (ret) {
fput(file);
return ret;- }
- ret = get_unused_fd_flags(O_CLOEXEC);
- ret = fd_install_received(file, O_CLOEXEC); if (ret < 0) fput(file);
- else
fd_install(ret, file);So someone just sent a fix for pidfd_getfd() that was based on the changes done here.
Hi! Ah yes, that didn't get CCed to me. I'll go reply.
I've been on vacation so didn't have a change to review this series and I see it's already in linux-next. This introduces a memory leak and actually proves a point I tried to stress when adding this helper: fd_install_received() in contrast to fd_install() does _not_ consume a reference because it takes one before it calls into fd_install(). That means, you need an unconditional fput() here both in the failure and error path.
Yup, this was a mistake in my refactoring of the pidfs changes.
I already did.
I strongly suggest though that we simply align the behavior between fd_install() and fd_install_received() and have the latter simply consume a reference when it succeeds! Imho, this bug proves that I was right to insist on this before. ;)
I still don't agree: it radically complicates the SCM_RIGHTS and seccomp
I'm sorry, I don't buy it yet, though I might've missed something in the discussions: :) After applying the patches in your series this literally is just (which is hardly radical ;):
diff --git a/fs/file.c b/fs/file.c index 9568bcfd1f44..26930b2ea39d 100644 --- a/fs/file.c +++ b/fs/file.c @@ -974,7 +974,7 @@ int __fd_install_received(int fd, struct file *file, int __user *ufd, }
if (fd < 0)
fd_install(new_fd, get_file(file));
fd_install(new_fd, file); else { new_fd = fd; error = replace_fd(new_fd, file, o_flags);diff --git a/net/compat.c b/net/compat.c index 71494337cca7..605a5a67200c 100644 --- a/net/compat.c +++ b/net/compat.c @@ -298,9 +298,11 @@ void scm_detach_fds_compat(struct msghdr *msg, struct scm_cookie *scm) int err = 0, i;
for (i = 0; i < fdmax; i++) {
err = fd_install_received_user(scm->fp->fp[i], cmsg_data + i, o_flags);
if (err < 0)
err = fd_install_received_user(get_file(scm->fp->fp[i]), cmsg_data + i, o_flags);
if (err < 0) {
fput(scm->fp->fp[i]); break;
} } if (i > 0) {diff --git a/net/core/scm.c b/net/core/scm.c index b9a0442ebd26..0d06446ae598 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -306,9 +306,11 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) }
for (i = 0; i < fdmax; i++) {
err = fd_install_received_user(scm->fp->fp[i], cmsg_data + i, o_flags);
if (err < 0)
err = fd_install_received_user(get_file(scm->fp->fp[i]), cmsg_data + i, o_flags);
if (err < 0) {
fput(scm->fp->fp[i]); break;
} } if (i > 0) {cases. The primary difference is that fd_install() cannot fail, and it was optimized for this situation. The other file-related helpers that can fail do not consume the reference, so this is in keeping with those as well.
That's not a real problem. Any function that can fail and which consumes a reference on success is assumed to not mutate the reference if it fails anywhere. So I don't see that as an issue.
The problem here is that the current patch invites bugs and has already produced one because fd_install() and fd_install_*() have the same naming scheme but different behavior when dealing with references. That's just not a good idea.
That being said, if you and others feel that this worry is nonsense then sure let's fix the bug that this introduces in this series and move on. If you do are you going to resend?
Christian
 
            On Mon, Jul 06, 2020 at 06:12:45PM +0200, Christian Brauner wrote:
On Mon, Jul 06, 2020 at 08:34:06AM -0700, Kees Cook wrote:
Yup, this was a mistake in my refactoring of the pidfs changes.
I already did.
Er, what? (I had a typo in my quote: s/pidfs/pidfd/.) I was trying to say that this was just a mistake in my refactoring of the pidfd usage of the new helper.
I still don't agree: it radically complicates the SCM_RIGHTS and seccomp
I'm sorry, I don't buy it yet, though I might've missed something in the discussions: :) After applying the patches in your series this literally is just (which is hardly radical ;):
Agreed, "radical" was too strong.
diff --git a/fs/file.c b/fs/file.c index 9568bcfd1f44..26930b2ea39d 100644 --- a/fs/file.c +++ b/fs/file.c @@ -974,7 +974,7 @@ int __fd_install_received(int fd, struct file *file, int __user *ufd, }
if (fd < 0)
fd_install(new_fd, get_file(file));
fd_install(new_fd, file); else { new_fd = fd; error = replace_fd(new_fd, file, o_flags);diff --git a/net/compat.c b/net/compat.c index 71494337cca7..605a5a67200c 100644 --- a/net/compat.c +++ b/net/compat.c @@ -298,9 +298,11 @@ void scm_detach_fds_compat(struct msghdr *msg, struct scm_cookie *scm) int err = 0, i;
for (i = 0; i < fdmax; i++) {
err = fd_install_received_user(scm->fp->fp[i], cmsg_data + i, o_flags);
if (err < 0)
err = fd_install_received_user(get_file(scm->fp->fp[i]), cmsg_data + i, o_flags);
if (err < 0) {
fput(scm->fp->fp[i]); break;
} } if (i > 0) {diff --git a/net/core/scm.c b/net/core/scm.c index b9a0442ebd26..0d06446ae598 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -306,9 +306,11 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) }
for (i = 0; i < fdmax; i++) {
err = fd_install_received_user(scm->fp->fp[i], cmsg_data + i, o_flags);
if (err < 0)
err = fd_install_received_user(get_file(scm->fp->fp[i]), cmsg_data + i, o_flags);
if (err < 0) {
fput(scm->fp->fp[i]); break;
} } if (i > 0) {
But my point stands: I really dislike this; suddenly the caller needs to manage this when it should be an entirely internal detail to the function. It was only pidfd doing it wrong, and that was entirely my fault in the conversion.
The problem here is that the current patch invites bugs and has already produced one because fd_install() and fd_install_*() have the same naming scheme but different behavior when dealing with references. That's just not a good idea.
I will rename the helper and add explicit documentation, but I really don't think callers should have to deal with managing the helper's split ref lifetime.
 
            Expand __fd_install_received() with support for replace_fd() for the coming seccomp "addfd" ioctl(). Add new wrapper fd_replace_received() for the new mode and update existing wrappers to retain old mode.
Signed-off-by: Kees Cook keescook@chromium.org --- fs/file.c | 22 +++++++++++++++++----- include/linux/file.h | 10 +++++++--- 2 files changed, 24 insertions(+), 8 deletions(-)
diff --git a/fs/file.c b/fs/file.c index de85a42defe2..9568bcfd1f44 100644 --- a/fs/file.c +++ b/fs/file.c @@ -937,6 +937,7 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags) /** * __fd_install_received() - Install received file into file descriptor table * + * @fd: fd to install into (if negative, a new fd will be allocated) * @file: struct file that was received from another process * @ufd: __user pointer to write new fd number to * @o_flags: the O_* flags to apply to the new fd entry @@ -947,7 +948,8 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags) * * Returns newly install fd or -ve on error. */ -int __fd_install_received(struct file *file, int __user *ufd, unsigned int o_flags) +int __fd_install_received(int fd, struct file *file, int __user *ufd, + unsigned int o_flags) { struct socket *sock; int new_fd; @@ -957,9 +959,11 @@ int __fd_install_received(struct file *file, int __user *ufd, unsigned int o_fla if (error) return error;
- new_fd = get_unused_fd_flags(o_flags); - if (new_fd < 0) - return new_fd; + if (fd < 0) { + new_fd = get_unused_fd_flags(o_flags); + if (new_fd < 0) + return new_fd; + }
if (ufd) { error = put_user(new_fd, ufd); @@ -969,6 +973,15 @@ int __fd_install_received(struct file *file, int __user *ufd, unsigned int o_fla } }
+ if (fd < 0) + fd_install(new_fd, get_file(file)); + else { + new_fd = fd; + error = replace_fd(new_fd, file, o_flags); + if (error) + return error; + } + /* Bump the usage count and install the file. The resulting value of * "error" is ignored here since we only need to take action when * the file is a socket and testing "sock" for NULL is sufficient. @@ -978,7 +991,6 @@ int __fd_install_received(struct file *file, int __user *ufd, unsigned int o_fla sock_update_netprioidx(&sock->sk->sk_cgrp_data); sock_update_classid(&sock->sk->sk_cgrp_data); } - fd_install(new_fd, get_file(file)); return new_fd; }
diff --git a/include/linux/file.h b/include/linux/file.h index e19974ed9322..04389b0da11b 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -92,18 +92,22 @@ extern void put_unused_fd(unsigned int fd);
extern void fd_install(unsigned int fd, struct file *file);
-extern int __fd_install_received(struct file *file, int __user *ufd, +extern int __fd_install_received(int fd, struct file *file, int __user *ufd, unsigned int o_flags); static inline int fd_install_received_user(struct file *file, int __user *ufd, unsigned int o_flags) { if (ufd == NULL) return -EFAULT; - return __fd_install_received(file, ufd, o_flags); + return __fd_install_received(-1, file, ufd, o_flags); } static inline int fd_install_received(struct file *file, unsigned int o_flags) { - return __fd_install_received(file, NULL, o_flags); + return __fd_install_received(-1, file, NULL, o_flags); +} +static inline int fd_replace_received(int fd, struct file *file, unsigned int o_flags) +{ + return __fd_install_received(fd, file, NULL, o_flags); }
extern void flush_delayed_fput(void);
 
            From: Sargun Dhillon sargun@sargun.me
This adds a seccomp notifier ioctl which allows for the listener to "add" file descriptors to a process which originated a seccomp user notification. This allows calls like mount, and mknod to be "implemented", as the return value, and the arguments are data in memory. On the other hand, calls like connect can be "implemented" using pidfd_getfd.
Unfortunately, there are calls which return file descriptors, like open, which are vulnerable to ToCToU attacks, and require that the more privileged supervisor can inspect the argument, and perform the syscall on behalf of the process generating the notification. This allows the file descriptor generated from that open call to be returned to the calling process.
In addition, there is functionality to allow for replacement of specific file descriptors, following dup2-like semantics.
The ioctl handling is based on the discussions[1] of how Extensible Arguments should interact with ioctls. Instead of building size into the addfd structure, make it a function of the ioctl command (which is how sizes are normally passed to ioctls). To support forward and backward compatibility, just mask out the direction and size, and match everything. The size (and any future direction) checks are done along with copy_struct_from_user() logic.
As a note, the seccomp_notif_addfd structure is laid out based on 8-byte alignment without requiring packing as there have been packing issues with uapi highlighted before[1][2]. Although we could overload the newfd field and use -1 to indicate that it is not to be used, doing so requires changing the size of the fd field, and introduces struct packing complexity.
[1]: https://lore.kernel.org/lkml/87o8w9bcaf.fsf@mid.deneb.enyo.de/ [2]: https://lore.kernel.org/lkml/a328b91d-fd8f-4f27-b3c2-91a9c45f18c0@rasmusvill... [3]: https://lore.kernel.org/lkml/20200612104629.GA15814@ircssh-2.c.rugged-nimbus...
Suggested-by: Matt Denton mpdenton@google.com Link: https://lore.kernel.org/r/20200603011044.7972-4-sargun@sargun.me Signed-off-by: Sargun Dhillon sargun@sargun.me Co-developed-by: Kees Cook keescook@chromium.org Signed-off-by: Kees Cook keescook@chromium.org --- include/uapi/linux/seccomp.h | 22 +++++ kernel/seccomp.c | 172 ++++++++++++++++++++++++++++++++++- 2 files changed, 193 insertions(+), 1 deletion(-)
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index 965290f7dcc2..6ba18b82a02e 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -113,6 +113,25 @@ struct seccomp_notif_resp { __u32 flags; };
+/* valid flags for seccomp_notif_addfd */ +#define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */ + +/** + * struct seccomp_notif_addfd + * @id: The ID of the seccomp notification + * @flags: SECCOMP_ADDFD_FLAG_* + * @srcfd: The local fd number + * @newfd: Optional remote FD number if SETFD option is set, otherwise 0. + * @newfd_flags: The O_* flags the remote FD should have applied + */ +struct seccomp_notif_addfd { + __u64 id; + __u32 flags; + __u32 srcfd; + __u32 newfd; + __u32 newfd_flags; +}; + #define SECCOMP_IOC_MAGIC '!' #define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr) #define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type) @@ -124,5 +143,8 @@ struct seccomp_notif_resp { #define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \ struct seccomp_notif_resp) #define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOW(2, __u64) +/* On success, the return value is the remote process's added fd number */ +#define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \ + struct seccomp_notif_addfd)
#endif /* _UAPI_LINUX_SECCOMP_H */ diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 0ed57e8c49d0..a7c20c408b83 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -87,10 +87,42 @@ struct seccomp_knotif { long val; u32 flags;
- /* Signals when this has entered SECCOMP_NOTIFY_REPLIED */ + /* + * Signals when this has changed states, such as the listener + * dying, a new seccomp addfd message, or changing to REPLIED + */ struct completion ready;
struct list_head list; + + /* outstanding addfd requests */ + struct list_head addfd; +}; + +/** + * struct seccomp_kaddfd - container for seccomp_addfd ioctl messages + * + * @file: A reference to the file to install in the other task + * @fd: The fd number to install it at. If the fd number is -1, it means the + * installing process should allocate the fd as normal. + * @flags: The flags for the new file descriptor. At the moment, only O_CLOEXEC + * is allowed. + * @ret: The return value of the installing process. It is set to the fd num + * upon success (>= 0). + * @completion: Indicates that the installing process has completed fd + * installation, or gone away (either due to successful + * reply, or signal) + * + */ +struct seccomp_kaddfd { + struct file *file; + int fd; + unsigned int flags; + + /* To only be set on reply */ + int ret; + struct completion completion; + struct list_head list; };
/** @@ -793,6 +825,17 @@ static u64 seccomp_next_notify_id(struct seccomp_filter *filter) return filter->notif->next_id++; }
+static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd) +{ + /* + * Remove the notification, and reset the list pointers, indicating + * that it has been handled. + */ + list_del_init(&addfd->list); + addfd->ret = fd_replace_received(addfd->fd, addfd->file, addfd->flags); + complete(&addfd->completion); +} + static int seccomp_do_user_notification(int this_syscall, struct seccomp_filter *match, const struct seccomp_data *sd) @@ -801,6 +844,7 @@ static int seccomp_do_user_notification(int this_syscall, u32 flags = 0; long ret = 0; struct seccomp_knotif n = {}; + struct seccomp_kaddfd *addfd, *tmp;
mutex_lock(&match->notify_lock); err = -ENOSYS; @@ -813,6 +857,7 @@ static int seccomp_do_user_notification(int this_syscall, n.id = seccomp_next_notify_id(match); init_completion(&n.ready); list_add(&n.list, &match->notif->notifications); + INIT_LIST_HEAD(&n.addfd);
up(&match->notif->request); wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM); @@ -821,14 +866,31 @@ static int seccomp_do_user_notification(int this_syscall, /* * This is where we wait for a reply from userspace. */ +wait: err = wait_for_completion_interruptible(&n.ready); mutex_lock(&match->notify_lock); if (err == 0) { + /* Check if we were woken up by a addfd message */ + addfd = list_first_entry_or_null(&n.addfd, + struct seccomp_kaddfd, list); + if (addfd && n.state != SECCOMP_NOTIFY_REPLIED) { + seccomp_handle_addfd(addfd); + mutex_unlock(&match->notify_lock); + goto wait; + } ret = n.val; err = n.error; flags = n.flags; }
+ /* If there were any pending addfd calls, clear them out */ + list_for_each_entry_safe(addfd, tmp, &n.addfd, list) { + /* The process went away before we got a chance to handle it */ + addfd->ret = -ESRCH; + list_del_init(&addfd->list); + complete(&addfd->completion); + } + /* * Note that it's possible the listener died in between the time when * we were notified of a respons (or a signal) and when we were able to @@ -1069,6 +1131,11 @@ static int seccomp_notify_release(struct inode *inode, struct file *file) knotif->error = -ENOSYS; knotif->val = 0;
+ /* + * We do not need to wake up any pending addfd messages, as + * the notifier will do that for us, as this just looks + * like a standard reply. + */ complete(&knotif->ready); }
@@ -1233,12 +1300,107 @@ static long seccomp_notify_id_valid(struct seccomp_filter *filter, return ret; }
+static long seccomp_notify_addfd(struct seccomp_filter *filter, + struct seccomp_notif_addfd __user *uaddfd, + unsigned int size) +{ + struct seccomp_notif_addfd addfd; + struct seccomp_knotif *knotif; + struct seccomp_kaddfd kaddfd; + int ret; + + /* 24 is original sizeof(struct seccomp_notif_addfd) */ + if (size < 24 || size >= PAGE_SIZE) + return -EINVAL; + + ret = copy_struct_from_user(&addfd, sizeof(addfd), uaddfd, size); + if (ret) + return ret; + + if (addfd.newfd_flags & ~O_CLOEXEC) + return -EINVAL; + + if (addfd.flags & ~SECCOMP_ADDFD_FLAG_SETFD) + return -EINVAL; + + if (addfd.newfd && !(addfd.flags & SECCOMP_ADDFD_FLAG_SETFD)) + return -EINVAL; + + kaddfd.file = fget(addfd.srcfd); + if (!kaddfd.file) + return -EBADF; + + kaddfd.flags = addfd.newfd_flags; + kaddfd.fd = (addfd.flags & SECCOMP_ADDFD_FLAG_SETFD) ? + addfd.newfd : -1; + init_completion(&kaddfd.completion); + + ret = mutex_lock_interruptible(&filter->notify_lock); + if (ret < 0) + goto out; + + knotif = find_notification(filter, addfd.id); + if (!knotif) { + ret = -ENOENT; + goto out_unlock; + } + + /* + * We do not want to allow for FD injection to occur before the + * notification has been picked up by a userspace handler, or after + * the notification has been replied to. + */ + if (knotif->state != SECCOMP_NOTIFY_SENT) { + ret = -EINPROGRESS; + goto out_unlock; + } + + list_add(&kaddfd.list, &knotif->addfd); + complete(&knotif->ready); + mutex_unlock(&filter->notify_lock); + + /* Now we wait for it to be processed or be interrupted */ + ret = wait_for_completion_interruptible(&kaddfd.completion); + if (ret == 0) { + /* + * We had a successful completion. The other side has already + * removed us from the addfd queue, and + * wait_for_completion_interruptible has a memory barrier upon + * success that lets us read this value directly without + * locking. + */ + ret = kaddfd.ret; + goto out; + } + + mutex_lock(&filter->notify_lock); + /* + * Even though we were woken up by a signal and not a successful + * completion, a completion may have happened in the mean time. + * + * We need to check again if the addfd request has been handled, + * and if not, we will remove it from the queue. + */ + if (list_empty(&kaddfd.list)) + ret = kaddfd.ret; + else + list_del(&kaddfd.list); + +out_unlock: + mutex_unlock(&filter->notify_lock); +out: + fput(kaddfd.file); + + return ret; +} + static long seccomp_notify_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct seccomp_filter *filter = file->private_data; void __user *buf = (void __user *)arg;
+ /* Fixed-size ioctls */ switch (cmd) { case SECCOMP_IOCTL_NOTIF_RECV: return seccomp_notify_recv(filter, buf); @@ -1247,9 +1409,17 @@ static long seccomp_notify_ioctl(struct file *file, unsigned int cmd, case SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR: case SECCOMP_IOCTL_NOTIF_ID_VALID: return seccomp_notify_id_valid(filter, buf); + } + + /* Extensible Argument ioctls */ +#define EA_IOCTL(cmd) ((cmd) & ~(IOC_INOUT | IOCSIZE_MASK)) + switch (EA_IOCTL(cmd)) { + case EA_IOCTL(SECCOMP_IOCTL_NOTIF_ADDFD): + return seccomp_notify_addfd(filter, buf, _IOC_SIZE(cmd)); default: return -EINVAL; } +#undef EA_IOCTL }
static __poll_t seccomp_notify_poll(struct file *file,
 
            From: Sargun Dhillon sargun@sargun.me
Test whether we can add file descriptors in response to notifications. This injects the file descriptors via notifications, and then uses kcmp to determine whether or not it has been successful.
It also includes some basic sanity checking for arguments.
Signed-off-by: Sargun Dhillon sargun@sargun.me Link: https://lore.kernel.org/r/20200603011044.7972-5-sargun@sargun.me Co-developed-by: Kees Cook keescook@chromium.org Signed-off-by: Kees Cook keescook@chromium.org --- tools/testing/selftests/seccomp/seccomp_bpf.c | 229 ++++++++++++++++++ 1 file changed, 229 insertions(+)
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 4e1891f8a0cd..143eafdc4fdc 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -45,6 +45,7 @@ #include <sys/socket.h> #include <sys/ioctl.h> #include <linux/kcmp.h> +#include <sys/resource.h>
#include <unistd.h> #include <sys/syscall.h> @@ -168,7 +169,9 @@ struct seccomp_metadata {
#ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER #define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3) +#endif
+#ifndef SECCOMP_RET_USER_NOTIF #define SECCOMP_RET_USER_NOTIF 0x7fc00000U
#define SECCOMP_IOC_MAGIC '!' @@ -204,6 +207,39 @@ struct seccomp_notif_sizes { }; #endif
+#ifndef SECCOMP_IOCTL_NOTIF_ADDFD +/* On success, the return value is the remote process's added fd number */ +#define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \ + struct seccomp_notif_addfd) + +/* valid flags for seccomp_notif_addfd */ +#define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */ + +struct seccomp_notif_addfd { + __u64 id; + __u32 flags; + __u32 srcfd; + __u32 newfd; + __u32 newfd_flags; +}; +#endif + +struct seccomp_notif_addfd_small { + __u64 id; + char weird[4]; +}; +#define SECCOMP_IOCTL_NOTIF_ADDFD_SMALL \ + SECCOMP_IOW(3, struct seccomp_notif_addfd_small) + +struct seccomp_notif_addfd_big { + union { + struct seccomp_notif_addfd addfd; + char buf[sizeof(struct seccomp_notif_addfd) + 8]; + }; +}; +#define SECCOMP_IOCTL_NOTIF_ADDFD_BIG \ + SECCOMP_IOWR(3, struct seccomp_notif_addfd_big) + #ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY #define PTRACE_EVENTMSG_SYSCALL_ENTRY 1 #define PTRACE_EVENTMSG_SYSCALL_EXIT 2 @@ -3833,6 +3869,199 @@ TEST(user_notification_filter_empty_threaded) EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0); }
+TEST(user_notification_addfd) +{ + pid_t pid; + long ret; + int status, listener, memfd, fd; + struct seccomp_notif_addfd addfd = {}; + struct seccomp_notif_addfd_small small = {}; + struct seccomp_notif_addfd_big big = {}; + struct seccomp_notif req = {}; + struct seccomp_notif_resp resp = {}; + /* 100 ms */ + struct timespec delay = { .tv_nsec = 100000000 }; + + memfd = memfd_create("test", 0); + ASSERT_GE(memfd, 0); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + /* Check that the basic notification machinery works */ + listener = user_notif_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); + ASSERT_GE(listener, 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + if (syscall(__NR_getppid) != USER_NOTIF_MAGIC) + exit(1); + exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); + } + + ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + + addfd.srcfd = memfd; + addfd.newfd = 0; + addfd.id = req.id; + addfd.flags = 0x0; + + /* Verify bad newfd_flags cannot be set */ + addfd.newfd_flags = ~O_CLOEXEC; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); + EXPECT_EQ(errno, EINVAL); + addfd.newfd_flags = O_CLOEXEC; + + /* Verify bad flags cannot be set */ + addfd.flags = 0xff; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); + EXPECT_EQ(errno, EINVAL); + addfd.flags = 0; + + /* Verify that remote_fd cannot be set without setting flags */ + addfd.newfd = 1; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); + EXPECT_EQ(errno, EINVAL); + addfd.newfd = 0; + + /* Verify small size cannot be set */ + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_SMALL, &small), -1); + EXPECT_EQ(errno, EINVAL); + + /* Verify we can't send bits filled in unknown buffer area */ + memset(&big, 0xAA, sizeof(big)); + big.addfd = addfd; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big), -1); + EXPECT_EQ(errno, E2BIG); + + + /* Verify we can set an arbitrary remote fd */ + fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); + /* + * The child has fds 0(stdin), 1(stdout), 2(stderr), 3(memfd), + * 4(listener), so the newly allocated fd should be 5. + */ + EXPECT_EQ(fd, 5); + EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0); + + /* Verify we can set an arbitrary remote fd with large size */ + memset(&big, 0x0, sizeof(big)); + big.addfd = addfd; + fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big); + EXPECT_EQ(fd, 6); + + /* Verify we can set a specific remote fd */ + addfd.newfd = 42; + addfd.flags = SECCOMP_ADDFD_FLAG_SETFD; + fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); + EXPECT_EQ(fd, 42); + EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0); + + /* Resume syscall */ + resp.id = req.id; + resp.error = 0; + resp.val = USER_NOTIF_MAGIC; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); + + /* + * This sets the ID of the ADD FD to the last request plus 1. The + * notification ID increments 1 per notification. + */ + addfd.id = req.id + 1; + + /* This spins until the underlying notification is generated */ + while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 && + errno != -EINPROGRESS) + nanosleep(&delay, NULL); + + memset(&req, 0, sizeof(req)); + ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + ASSERT_EQ(addfd.id, req.id); + + resp.id = req.id; + resp.error = 0; + resp.val = USER_NOTIF_MAGIC; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); + + /* Wait for child to finish. */ + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + + close(memfd); +} + +TEST(user_notification_addfd_rlimit) +{ + pid_t pid; + long ret; + int status, listener, memfd; + struct seccomp_notif_addfd addfd = {}; + struct seccomp_notif req = {}; + struct seccomp_notif_resp resp = {}; + const struct rlimit lim = { + .rlim_cur = 0, + .rlim_max = 0, + }; + + memfd = memfd_create("test", 0); + ASSERT_GE(memfd, 0); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + /* Check that the basic notification machinery works */ + listener = user_notif_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); + ASSERT_GE(listener, 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) + exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); + + + ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + + ASSERT_EQ(prlimit(pid, RLIMIT_NOFILE, &lim, NULL), 0); + + addfd.srcfd = memfd; + addfd.newfd_flags = O_CLOEXEC; + addfd.newfd = 0; + addfd.id = req.id; + addfd.flags = 0; + + /* Should probably spot check /proc/sys/fs/file-nr */ + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); + EXPECT_EQ(errno, EMFILE); + + addfd.newfd = 100; + addfd.flags = SECCOMP_ADDFD_FLAG_SETFD; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); + EXPECT_EQ(errno, EBADF); + + resp.id = req.id; + resp.error = 0; + resp.val = USER_NOTIF_MAGIC; + + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); + + /* Wait for child to finish. */ + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + + close(memfd); +} + /* * TODO: * - expand NNP testing
linux-kselftest-mirror@lists.linaro.org



