As with openat2(2), this allows userspace to easily figure out what flags and fields are supported by clone3(2). For fields which are not flag-based, we simply set every bit in the field so that a naive bitwise-and would show that any value of the field is valid.
For args->exit_signal, since we have an explicit bitmask for the field defined already (CSIGNAL) we can indicate that only those bits are supported by current kernels. If we add some extra bits to exit_signal in the future, being able to detect them as new features would be quite useful.
The intended way of using this interface to get feature information looks something like the following:
static bool clone3_clear_sighand_supported; static bool clone3_cgroup_supported;
int check_clone3_support(void) { int err; struct clone_args args = {};
err = clone3(&args, CHECK_FIELDS | sizeof(args)); assert(err < 0); switch (errno) { case EFAULT: case E2BIG: /* Old kernel... */ check_support_the_old_way(); break; case EEXTSYS_NOOP: clone3_clear_sighand_supported = (how.flags & CLONE_CLEAR_SIGHAND); clone3_cgroup_supported = (how.flags & CLONE_INTO_CGROUP) && (how.cgroup != 0); break; } }
Signed-off-by: Aleksa Sarai cyphar@cyphar.com --- kernel/fork.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-)
diff --git a/kernel/fork.c b/kernel/fork.c index cc760491f201..bded93f4f5f4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2925,11 +2925,15 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, } #endif
+ +#define CLONE3_VALID_FLAGS (CLONE_LEGACY_FLAGS | CLONE_CLEAR_SIGHAND | CLONE_INTO_CGROUP) + noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs, struct clone_args __user *uargs, size_t usize) { int err; + bool check_fields; struct clone_args args; pid_t *kset_tid = kargs->set_tid;
@@ -2941,11 +2945,34 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs, CLONE_ARGS_SIZE_VER2); BUILD_BUG_ON(sizeof(struct clone_args) != CLONE_ARGS_SIZE_VER2);
+ check_fields = usize & CHECK_FIELDS; + usize &= ~CHECK_FIELDS; + if (unlikely(usize > PAGE_SIZE)) return -E2BIG; if (unlikely(usize < CLONE_ARGS_SIZE_VER0)) return -EINVAL;
+ if (unlikely(check_fields)) { + memset(&args, 0, sizeof(args)); + args = (struct clone_args) { + .flags = CLONE3_VALID_FLAGS, + .pidfd = 0xFFFFFFFFFFFFFFFF, + .child_tid = 0xFFFFFFFFFFFFFFFF, + .parent_tid = 0xFFFFFFFFFFFFFFFF, + .exit_signal = (u64) CSIGNAL, + .stack = 0xFFFFFFFFFFFFFFFF, + .stack_size = 0xFFFFFFFFFFFFFFFF, + .tls = 0xFFFFFFFFFFFFFFFF, + .set_tid = 0xFFFFFFFFFFFFFFFF, + .set_tid_size = 0xFFFFFFFFFFFFFFFF, + .cgroup = 0xFFFFFFFFFFFFFFFF, + }; + + err = copy_struct_to_user(uargs, usize, &args, sizeof(args), NULL); + return err ?: -EEXTSYS_NOOP; + } + err = copy_struct_from_user(&args, sizeof(args), uargs, usize); if (err) return err; @@ -3025,8 +3052,7 @@ static inline bool clone3_stack_valid(struct kernel_clone_args *kargs) static bool clone3_args_valid(struct kernel_clone_args *kargs) { /* Verify that no unknown flags are passed along. */ - if (kargs->flags & - ~(CLONE_LEGACY_FLAGS | CLONE_CLEAR_SIGHAND | CLONE_INTO_CGROUP)) + if (kargs->flags & ~CLONE3_VALID_FLAGS) return false;
/*