Create a new robust_list() syscall. The current syscall can't be expanded to cover the following use case, so a new one is needed. This new syscall allows users to set multiple robust lists per process and to have either 32bit or 64bit pointers in the list.
* Interface
This is the proposed interface:
long set_robust_list2(void *head, int index, unsigned int flags)
`head` is the head of the userspace struct robust_list_head, just as old set_robust_list(). It needs to be a void pointer since it can point to a normal robust_list_head or a compat_robust_list_head.
`flags` can be used for defining the list type:
enum robust_list_type { ROBUST_LIST_32BIT, ROBUST_LIST_64BIT, };
`index` is the index in the internal robust_list's linked list (the naming starts to get confusing, I reckon). If `index == -1`, that means that user wants to set a new robust_list, and the kernel will append it in the end of the list, assign a new index and return this index to the user. If `index >= 0`, that means that user wants to re-set `*head` of an already existing list (similarly to what happens when you call set_robust_list() twice with different `*head`).
If `index` is out of range, or it points to a non-existing robust_list, or if the internal list is full, an error is returned.
Unaligned `head` addresses are refused by the kernel with -EINVAL.
User cannot remove lists.
* Implementation
The old syscall's set/get_robust_list() are converted to use the linked list as well. When using only the old syscalls user shouldn't any difference as the internal code will handle the linked list insertion as usual. When mixing old and new interfaces users should be aware that one of the elements of the list was created by another syscall and they should have special care handling this element index.
On exit, the linked list is parsed and all robust lists regardless of which interface it was used to create them are handled.
Signed-off-by: André Almeida andrealmeid@igalia.com --- include/linux/futex.h | 5 +- include/linux/sched.h | 5 +- include/uapi/asm-generic/unistd.h | 2 + include/uapi/linux/futex.h | 24 +++++++++ kernel/futex/core.c | 111 ++++++++++++++++++++++++++++++-------- kernel/futex/futex.h | 5 ++ kernel/futex/syscalls.c | 81 ++++++++++++++++++++++++++-- 7 files changed, 204 insertions(+), 29 deletions(-)
diff --git a/include/linux/futex.h b/include/linux/futex.h index cd7c5d12c846566c56f3f3ea74b95e437a6e8193..7721629926535c775bd7b05b5283a3d0b51262d6 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -75,10 +75,11 @@ enum {
static inline void futex_init_task(struct task_struct *tsk) { - tsk->robust_list = NULL; + tsk->robust_list_index = -1; #ifdef CONFIG_COMPAT - tsk->compat_robust_list = NULL; + tsk->compat_robust_list_index = -1; #endif + INIT_LIST_HEAD(&tsk->robust_list2); INIT_LIST_HEAD(&tsk->pi_state_list); tsk->pi_state_cache = NULL; tsk->futex_state = FUTEX_STATE_OK; diff --git a/include/linux/sched.h b/include/linux/sched.h index 51e5d05a9fcd407dcd53b7b7cb8c59783660a826..a37c55cf0a4d942ec1fbedb8bcd4be5a3ebb20bb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1322,10 +1322,11 @@ struct task_struct { u32 rmid; #endif #ifdef CONFIG_FUTEX - struct robust_list_head __user *robust_list; + int robust_list_index; #ifdef CONFIG_COMPAT - struct robust_list_head32 __user *compat_robust_list; + int compat_robust_list_index; #endif + struct list_head robust_list2; struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; struct mutex futex_exit_mutex; diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 2892a45023af6d3eb941623d4fed04841ab07e02..ebe68c2c88eb5390dda184ce9268a8d3a606c9e5 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -852,6 +852,8 @@ __SYSCALL(__NR_removexattrat, sys_removexattrat) #define __NR_open_tree_attr 467 __SYSCALL(__NR_open_tree_attr, sys_open_tree_attr)
+#define __NR_set_robust_list2 467 + #undef __NR_syscalls #define __NR_syscalls 468
diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h index 7e2744ec89336a260e89883e95222eda199eeb7f..cbd321eca03afb6bdcf47e9534761d82f9de7e43 100644 --- a/include/uapi/linux/futex.h +++ b/include/uapi/linux/futex.h @@ -153,6 +153,30 @@ struct robust_list_head { struct robust_list __user *list_op_pending; };
+#define ROBUST_LISTS_PER_TASK 10 + +enum robust_list2_type { + ROBUST_LIST_32BIT, + ROBUST_LIST_64BIT, +}; + +#define ROBUST_LIST_TYPE_MASK (ROBUST_LIST_32BIT | ROBUST_LIST_64BIT) + +/* + * This is an entry of a linked list of robust lists. + * + * @head: can point to a 64bit list or a 32bit list + * @list_type: determine the size of the futex pointers in the list + * @index: the index of this entry in the list + * @list: linked list element + */ +struct robust_list2_entry { + void __user *head; + enum robust_list2_type list_type; + unsigned int index; + struct list_head list; +}; + /* * Are there any waiters for this robust futex: */ diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 8640770aadc611b7341a3abb41bdb740e6394479..49b3bc592948a811f995017027f33ad8f285531f 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -1151,9 +1151,9 @@ static inline int fetch_robust_entry(struct robust_list __user **entry, * * We silently return on any sign of list-walking problem. */ -static void exit_robust_list64(struct task_struct *curr) +static void exit_robust_list64(struct task_struct *curr, + struct robust_list_head __user *head) { - struct robust_list_head __user *head = curr->robust_list; struct robust_list __user *entry, *next_entry, *pending; unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; unsigned int next_pi; @@ -1213,7 +1213,8 @@ static void exit_robust_list64(struct task_struct *curr) } } #else -static void exit_robust_list64(struct task_struct *curr) +static void exit_robust_list64(struct task_struct *curr, + struct robust_list_head __user *head) { pr_warn("32bit kernel should not allow ROBUST_LIST_64BIT"); } @@ -1250,9 +1251,9 @@ fetch_robust_entry32(u32 *uentry, struct robust_list __user **entry, * * We silently return on any sign of list-walking problem. */ -static void exit_robust_list32(struct task_struct *curr) +static void exit_robust_list32(struct task_struct *curr, + struct robust_list_head32 __user *head) { - struct robust_list_head32 __user *head = curr->compat_robust_list; struct robust_list __user *entry, *next_entry, *pending; unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; unsigned int next_pi; @@ -1318,6 +1319,70 @@ static void exit_robust_list32(struct task_struct *curr) } }
+long do_set_robust_list2(struct robust_list_head __user *head, + int index, unsigned int type) +{ + struct list_head *list2 = ¤t->robust_list2; + struct robust_list2_entry *prev, *new = NULL; + + if (index == -1) { + if (list_empty(list2)) { + index = 0; + } else { + prev = list_last_entry(list2, struct robust_list2_entry, list); + index = prev->index + 1; + } + + if (index >= ROBUST_LISTS_PER_TASK) + return -EINVAL; + + new = kmalloc(sizeof(struct robust_list2_entry), GFP_KERNEL); + if (!new) + return -ENOMEM; + + list_add_tail(&new->list, list2); + new->index = index; + + } else if (index >= 0) { + struct robust_list2_entry *curr; + + if (list_empty(list2)) + return -ENOENT; + + list_for_each_entry(curr, list2, list) { + if (index == curr->index) { + new = curr; + break; + } + } + + if (!new) + return -ENOENT; + } + + BUG_ON(!new); + new->head = head; + new->list_type = type; + + return index; +} + +struct robust_list_head __user *get_robust_list2(int index, struct task_struct *task) +{ + struct list_head *list2 = &task->robust_list2; + struct robust_list2_entry *curr; + + if (list_empty(list2) || index == -1) + return NULL; + + list_for_each_entry(curr, list2, list) { + if (index == curr->index) + return curr->head; + } + + return NULL; +} + #ifdef CONFIG_FUTEX_PI
/* @@ -1411,24 +1476,28 @@ static inline void exit_pi_state_list(struct task_struct *curr) { }
static void futex_cleanup(struct task_struct *tsk) { -#ifdef CONFIG_64BIT - if (unlikely(tsk->robust_list)) { - exit_robust_list64(tsk); - tsk->robust_list = NULL; - } -#else - if (unlikely(tsk->robust_list)) { - exit_robust_list32(tsk); - tsk->robust_list = NULL; - } -#endif + struct robust_list2_entry *curr, *n; + struct list_head *list2 = &tsk->robust_list2;
-#ifdef CONFIG_COMPAT - if (unlikely(tsk->compat_robust_list)) { - exit_robust_list32(tsk); - tsk->compat_robust_list = NULL; + /* + * Walk through the linked list, parsing robust lists and freeing the + * allocated lists + */ + if (unlikely(!list_empty(list2))) { + list_for_each_entry_safe(curr, n, list2, list) { + if (curr->head != NULL) { + if (curr->list_type == ROBUST_LIST_64BIT) + exit_robust_list64(tsk, curr->head); + else if (curr->list_type == ROBUST_LIST_32BIT) + exit_robust_list32(tsk, curr->head); + curr->head = NULL; + } + list_del_init(&curr->list); + kfree(curr); + } } -#endif + + tsk->robust_list_index = -1;
if (unlikely(!list_empty(&tsk->pi_state_list))) exit_pi_state_list(tsk); diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h index fcd1617212eed0e3c2367d2b463a0e019eda6d13..67201e51fa1798a21ff68f60b1e35977b9bd267b 100644 --- a/kernel/futex/futex.h +++ b/kernel/futex/futex.h @@ -467,6 +467,11 @@ extern int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, extern int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset);
+extern long do_set_robust_list2(struct robust_list_head __user *head, + int index, unsigned int type); + +extern struct robust_list_head __user *get_robust_list2(int index, struct task_struct *task); + /** * struct futex_vector - Auxiliary struct for futex_waitv() * @w: Userspace provided data diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index dba193dfd216cc929c8f4d979aa2bcd99237e2d8..56ee1123cbd8ea26c8d22aa74e5faed2974ec577 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -20,6 +20,18 @@ * the list. There can only be one such pending lock. */
+#ifdef CONFIG_64BIT +static inline int robust_list_native_type(void) +{ + return ROBUST_LIST_64BIT; +} +#else +static inline int robust_list_native_type(void) +{ + return ROBUST_LIST_32BIT; +} +#endif + /** * sys_set_robust_list() - Set the robust-futex list head of a task * @head: pointer to the list-head @@ -28,17 +40,63 @@ SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, size_t, len) { + unsigned int type = robust_list_native_type(); + int ret; + /* * The kernel knows only one size for now: */ if (unlikely(len != sizeof(*head))) return -EINVAL;
- current->robust_list = head; + ret = do_set_robust_list2(head, current->robust_list_index, type); + if (ret < 0) + return ret; + + current->robust_list_index = ret;
return 0; }
+#define ROBUST_LIST_FLAGS ROBUST_LIST_TYPE_MASK + +/* + * sys_set_robust_list2() + * + * When index == -1, create a new list for user. When index >= 0, try to find + * the corresponding list and re-set the head there. + * + * Return values: + * >= 0: success, index of the robust list + * -EINVAL: invalid flags, invalid index + * -ENOENT: requested index no where to be found + * -ENOMEM: error allocating new list + * -ESRCH: too many allocated lists + */ +SYSCALL_DEFINE3(set_robust_list2, struct robust_list_head __user *, head, + int, index, unsigned int, flags) +{ + unsigned int type; + + type = flags & ROBUST_LIST_TYPE_MASK; + + if (index < -1 || index >= ROBUST_LISTS_PER_TASK) + return -EINVAL; + + if ((flags & ~ROBUST_LIST_FLAGS) != 0) + return -EINVAL; + + if (((uintptr_t) head % sizeof(u32)) != 0) + return -EINVAL; + +#ifndef CONFIG_64BIT + if (type == ROBUST_LIST_64BIT) + return -EINVAL; +#endif + + return do_set_robust_list2(head, index, type); +} + /** * sys_get_robust_list() - Get the robust-futex list head of a task * @pid: pid of the process [zero for current task] @@ -52,6 +110,7 @@ SYSCALL_DEFINE3(get_robust_list, int, pid, struct robust_list_head __user *head; unsigned long ret; struct task_struct *p; + int index;
rcu_read_lock();
@@ -68,9 +127,11 @@ SYSCALL_DEFINE3(get_robust_list, int, pid, if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) goto err_unlock;
- head = p->robust_list; + index = p->robust_list_index; rcu_read_unlock();
+ head = get_robust_list2(index, p); + if (put_user(sizeof(*head), len_ptr)) return -EFAULT; return put_user(head, head_ptr); @@ -443,10 +504,19 @@ COMPAT_SYSCALL_DEFINE2(set_robust_list, struct robust_list_head32 __user *, head, compat_size_t, len) { + unsigned int type = ROBUST_LIST_32BIT; + int ret; + if (unlikely(len != sizeof(*head))) return -EINVAL;
- current->compat_robust_list = head; + ret = do_set_robust_list2((struct robust_list_head __user *) head, + current->robust_list_index, type); + if (ret < 0) + return ret; + + current->robust_list_index = ret; +
return 0; } @@ -458,6 +528,7 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, struct robust_list_head32 __user *head; unsigned long ret; struct task_struct *p; + int index;
rcu_read_lock();
@@ -474,9 +545,11 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) goto err_unlock;
- head = p->compat_robust_list; + index = p->compat_robust_list_index; rcu_read_unlock();
+ head = (struct robust_list_head32 __user *) get_robust_list2(index, p); + if (put_user(sizeof(*head), len_ptr)) return -EFAULT; return put_user(ptr_to_compat(head), head_ptr);