Hello,
This version is a complete rewrite of the syscall (thanks Thomas for the suggestions!).
* Use case
The use-case for the new syscalls is detailed in the last patch version:
https://lore.kernel.org/lkml/20250626-tonyk-robust_futex-v5-0-179194dbde8f@i...
* The syscall interface
Documented at patches 3/9 "futex: Create set_robust_list2() syscall" and 4/9 "futex: Create get_robust_list2() syscall".
* Testing
I expanded the current robust list selftest to use the new interface, and also ported the original syscall to use the new syscall internals, and everything survived the tests.
* Changelog
Changes from v5: - Complete interface rewrite, there are so many changes but the main ones are the following points - Array of robust lists now has a static size, allocated once during the first usage of the list - Now that the list of robust lists have a fixed size, I removed the logic of having a command for creating a new index on the list. To simplify things for everyone, userspace just need to call set_robust_list2(head, 32-bit/64-bit type, index). - Created get_robust_list2() - The new code can be better integrated with the original interface - v5: https://lore.kernel.org/r/20250626-tonyk-robust_futex-v5-0-179194dbde8f@igal...
Feedback is very welcomed!
--- André Almeida (9): futex: Use explicit sizes for compat_robust_list structs futex: Make exit_robust_list32() unconditionally available for 64-bit kernels futex: Create set_robust_list2() syscall futex: Create get_robust_list2() syscall futex: Wire up set_robust_list2 syscall futex: Wire up get_robust_list2 syscall selftests/futex: Expand for set_robust_list2() selftests/futex: Expand for get_robust_list2() futex: Use new robust list API internally
arch/alpha/kernel/syscalls/syscall.tbl | 2 + arch/arm/tools/syscall.tbl | 2 + arch/m68k/kernel/syscalls/syscall.tbl | 2 + arch/microblaze/kernel/syscalls/syscall.tbl | 2 + arch/mips/kernel/syscalls/syscall_n32.tbl | 2 + arch/mips/kernel/syscalls/syscall_n64.tbl | 2 + arch/mips/kernel/syscalls/syscall_o32.tbl | 2 + arch/parisc/kernel/syscalls/syscall.tbl | 2 + arch/powerpc/kernel/syscalls/syscall.tbl | 2 + arch/s390/kernel/syscalls/syscall.tbl | 2 + arch/sh/kernel/syscalls/syscall.tbl | 2 + arch/sparc/kernel/syscalls/syscall.tbl | 2 + arch/x86/entry/syscalls/syscall_32.tbl | 2 + arch/x86/entry/syscalls/syscall_64.tbl | 2 + arch/xtensa/kernel/syscalls/syscall.tbl | 2 + include/linux/compat.h | 13 +- include/linux/futex.h | 30 +- include/linux/sched.h | 6 +- include/uapi/asm-generic/unistd.h | 7 +- include/uapi/linux/futex.h | 26 ++ kernel/futex/core.c | 140 ++++-- kernel/futex/syscalls.c | 134 +++++- kernel/sys_ni.c | 2 + scripts/syscall.tbl | 1 + .../selftests/futex/functional/robust_list.c | 504 +++++++++++++++++++-- 25 files changed, 788 insertions(+), 105 deletions(-) --- base-commit: c42ba5a87bdccbca11403b7ca8bad1a57b833732 change-id: 20250225-tonyk-robust_futex-60adeedac695
Best regards,
There are two functions for handling robust lists during a task exit: exit_robust_list() and compat_exit_robust_list(). The first one handles either 64-bit or 32-bit lists, depending on the kernel bitness. compat_exit_robust_list() exists only in 64-bit kernels that supports 32-bit syscalls entry points (also known as compat entry points).
The new syscall set_robust_list2() needs to handle both 64-bit and 32-bit robust lists, regardless of compat entry being enabled, so it needs to have both functions always available.
In preparation for this, use explicit size for struct members of compat_robust_list and compat_robust_list_head. Rename the structs and compat_exit_robust_list() to make clear which bitness it handles.
Keep exit_robust_list() as it is: used to handle the native bit size of the kernel.
Signed-off-by: André Almeida andrealmeid@igalia.com --- include/linux/compat.h | 13 +++---------- include/linux/futex.h | 2 +- include/linux/sched.h | 2 +- include/uapi/linux/futex.h | 10 ++++++++++ kernel/futex/core.c | 20 ++++++++++---------- kernel/futex/syscalls.c | 8 ++++---- 6 files changed, 29 insertions(+), 26 deletions(-)
diff --git a/include/linux/compat.h b/include/linux/compat.h index 56cebaff0c91..2c5a7f980182 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -385,15 +385,8 @@ struct compat_ifconf { compat_caddr_t ifcbuf; };
-struct compat_robust_list { - compat_uptr_t next; -}; - -struct compat_robust_list_head { - struct compat_robust_list list; - compat_long_t futex_offset; - compat_uptr_t list_op_pending; -}; +struct robust_list32; +struct robust_list_head32;
#ifdef CONFIG_COMPAT_OLD_SIGACTION struct compat_old_sigaction { @@ -672,7 +665,7 @@ asmlinkage long compat_sys_waitid(int, compat_pid_t, struct compat_siginfo __user *, int, struct compat_rusage __user *); asmlinkage long -compat_sys_set_robust_list(struct compat_robust_list_head __user *head, +compat_sys_set_robust_list(struct robust_list_head32 __user *head, compat_size_t len); asmlinkage long compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, diff --git a/include/linux/futex.h b/include/linux/futex.h index 9e9750f04980..322851e4a703 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -66,7 +66,7 @@ static inline void futex_init_task(struct task_struct *tsk) { tsk->robust_list = NULL; #ifdef CONFIG_COMPAT - tsk->compat_robust_list = NULL; + tsk->robust_list32 = NULL; #endif INIT_LIST_HEAD(&tsk->pi_state_list); tsk->pi_state_cache = NULL; diff --git a/include/linux/sched.h b/include/linux/sched.h index cbb7340c5866..76cabfab5b73 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1332,7 +1332,7 @@ struct task_struct { #ifdef CONFIG_FUTEX struct robust_list_head __user *robust_list; #ifdef CONFIG_COMPAT - struct compat_robust_list_head __user *compat_robust_list; + struct robust_list_head32 __user *robust_list32; #endif struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h index 7e2744ec8933..86efb089893d 100644 --- a/include/uapi/linux/futex.h +++ b/include/uapi/linux/futex.h @@ -153,6 +153,16 @@ struct robust_list_head { struct robust_list __user *list_op_pending; };
+struct robust_list32 { + __u32 next; +}; + +struct robust_list_head32 { + struct robust_list32 list; + __s32 futex_offset; + __u32 list_op_pending; +}; + /* * Are there any waiters for this robust futex: */ diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 125804fbb5cb..c99d7baab24e 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -1227,7 +1227,7 @@ static void __user *futex_uaddr(struct robust_list __user *entry, * Fetch a robust-list pointer. Bit 0 signals PI futexes: */ static inline int -compat_fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry, +fetch_robust_entry32(compat_uptr_t *uentry, struct robust_list __user **entry, compat_uptr_t __user *head, unsigned int *pi) { if (get_user(*uentry, head)) @@ -1245,9 +1245,9 @@ compat_fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **ent * * We silently return on any sign of list-walking problem. */ -static void compat_exit_robust_list(struct task_struct *curr) +static void exit_robust_list32(struct task_struct *curr) { - struct compat_robust_list_head __user *head = curr->compat_robust_list; + struct robust_list_head32 __user *head = curr->robust_list32; struct robust_list __user *entry, *next_entry, *pending; unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; unsigned int next_pi; @@ -1259,7 +1259,7 @@ static void compat_exit_robust_list(struct task_struct *curr) * Fetch the list head (which was registered earlier, via * sys_set_robust_list()): */ - if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi)) + if (fetch_robust_entry32(&uentry, &entry, &head->list.next, &pi)) return; /* * Fetch the relative futex offset: @@ -1270,7 +1270,7 @@ static void compat_exit_robust_list(struct task_struct *curr) * Fetch any possibly pending lock-add first, and handle it * if it exists: */ - if (compat_fetch_robust_entry(&upending, &pending, + if (fetch_robust_entry32(&upending, &pending, &head->list_op_pending, &pip)) return;
@@ -1280,8 +1280,8 @@ static void compat_exit_robust_list(struct task_struct *curr) * Fetch the next entry in the list before calling * handle_futex_death: */ - rc = compat_fetch_robust_entry(&next_uentry, &next_entry, - (compat_uptr_t __user *)&entry->next, &next_pi); + rc = fetch_robust_entry32(&next_uentry, &next_entry, + (u32 __user *)&entry->next, &next_pi); /* * A pending lock might already be on the list, so * dont process it twice: @@ -1413,9 +1413,9 @@ static void futex_cleanup(struct task_struct *tsk) }
#ifdef CONFIG_COMPAT - if (unlikely(tsk->compat_robust_list)) { - compat_exit_robust_list(tsk); - tsk->compat_robust_list = NULL; + if (unlikely(tsk->robust_list32)) { + exit_robust_list32(tsk); + tsk->robust_list32 = NULL; } #endif
diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index 880c9bf2f315..1de8ff230d54 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -43,7 +43,7 @@ static inline void __user *futex_task_robust_list(struct task_struct *p, bool co { #ifdef CONFIG_COMPAT if (compat) - return p->compat_robust_list; + return p->robust_list32; #endif return p->robust_list; } @@ -468,13 +468,13 @@ SYSCALL_DEFINE4(futex_requeue,
#ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE2(set_robust_list, - struct compat_robust_list_head __user *, head, + struct robust_list_head32 __user *, head, compat_size_t, len) { if (unlikely(len != sizeof(*head))) return -EINVAL;
- current->compat_robust_list = head; + current->robust_list32 = head;
return 0; } @@ -483,7 +483,7 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, compat_uptr_t __user *, head_ptr, compat_size_t __user *, len_ptr) { - struct compat_robust_list_head __user *head = futex_get_robust_list_common(pid, true); + struct robust_list_head32 __user *head = futex_get_robust_list_common(pid, true);
if (IS_ERR(head)) return PTR_ERR(head);
The new syscall set_robust_list2() needs to handle both 64-bit and 32-bit robust lists, but not every 64-bit platform have compat entry points. Make exit_robust_list32() unconditionally available for 64-bit kernels regardless of having a compat configuration.
Signed-off-by: André Almeida andrealmeid@igalia.com --- kernel/futex/core.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-)
diff --git a/kernel/futex/core.c b/kernel/futex/core.c index c99d7baab24e..136639897ff9 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -31,7 +31,6 @@ * "The futexes are also cursed." * "But they come in a choice of three flavours!" */ -#include <linux/compat.h> #include <linux/jhash.h> #include <linux/pagemap.h> #include <linux/debugfs.h> @@ -1213,12 +1212,12 @@ static void exit_robust_list(struct task_struct *curr) } }
-#ifdef CONFIG_COMPAT +#ifdef CONFIG_64BIT static void __user *futex_uaddr(struct robust_list __user *entry, compat_long_t futex_offset) { - compat_uptr_t base = ptr_to_compat(entry); - void __user *uaddr = compat_ptr(base + futex_offset); + u32 base = (u32)(unsigned long)(entry); + void __user *uaddr = (void __user *)(unsigned long)(base + futex_offset);
return uaddr; } @@ -1227,13 +1226,13 @@ static void __user *futex_uaddr(struct robust_list __user *entry, * Fetch a robust-list pointer. Bit 0 signals PI futexes: */ static inline int -fetch_robust_entry32(compat_uptr_t *uentry, struct robust_list __user **entry, - compat_uptr_t __user *head, unsigned int *pi) +fetch_robust_entry32(u32 *uentry, struct robust_list __user **entry, + u32 __user *head, unsigned int *pi) { if (get_user(*uentry, head)) return -EFAULT;
- *entry = compat_ptr((*uentry) & ~1); + *entry = (void __user *)(unsigned long)((*uentry) & ~1); *pi = (unsigned int)(*uentry) & 1;
return 0; @@ -1251,8 +1250,8 @@ static void exit_robust_list32(struct task_struct *curr) struct robust_list __user *entry, *next_entry, *pending; unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; unsigned int next_pi; - compat_uptr_t uentry, next_uentry, upending; - compat_long_t futex_offset; + u32 uentry, next_uentry, upending; + s32 futex_offset; int rc;
/* @@ -1412,7 +1411,7 @@ static void futex_cleanup(struct task_struct *tsk) tsk->robust_list = NULL; }
-#ifdef CONFIG_COMPAT +#ifdef CONFIG_64BIT if (unlikely(tsk->robust_list32)) { exit_robust_list32(tsk); tsk->robust_list32 = NULL;
Emulators (like FEX-Emu, to run x86 apps on top of Aarch64) have two special needs about robust lists: to be able to register more than one robust list, one of the app being emulated and one list for the emulator itself; and to be able to walk on 32-bit robusts lists on a 64-bit platform without compat entry points.
The current syscall allows for one robust list per task (on x86-64, it can have two if compat is enabled) and on Aarch64 there's no way to parse a 32-bit robust list. The current syscall cannot be expanded to solve both needs, so create a new syscall, set_robust_list2() with the following signature:
sys_set_robust_list2(struct robust_list_head *head, unsigned int index, unsigned int cmd, unsigned int flags)
The new syscall allows to set multiple lists per task, of 64-bit or 32-bit types.
- `*head` is the same structure used in the current syscall. - `index` is the index of the list to be set with `head`. - `cmd` defines the operation to perform: - `FUTEX_ROBUST_LIST_CMD_SET_64` set a 64-bit robust list at `index` - `FUTEX_ROBUST_LIST_CMD_SET_32` set a 32-bit robust list at `index` - `FUTEX_ROBUST_LIST_CMD_LIST_LIMIT` get the limit of lists per task - `flag` is unused now but can be used to expand the interface
Setting an index twice overwrites the last instance.
The array of lists is dynamically allocated in the first use, but has a fixed size determined by the kernel. 8 slots are more than enough to cover the target use case and allows for more use cases. The command for getting the list limit allows to userspace check if the kernel ever expands this list. The first two slots are reserved for the kernel, to store the original syscall robust_list_head's.
The array of lists is destroyed only during task exit.
The `FUTEX_ROBUST_LIST_CMD_SET_64` operation is only available for 64-bit kernels. In such kernels, lists created with `FUTEX_ROBUST_LIST_CMD_SET_32` are marked with `FUTEX_ROBUST_LIST_ENTRY_32BIT` and the kernel handles it with a special function exit_robust_list32() to be able to walk in a list of 32-bit pointers.
For 32-bit kernels, there's no special function available as every user list and list handling functions will all have the same bitness.
Signed-off-by: André Almeida andrealmeid@igalia.com --- include/linux/futex.h | 26 +++++++++++ include/linux/sched.h | 1 + include/uapi/linux/futex.h | 16 +++++++ kernel/futex/core.c | 111 ++++++++++++++++++++++++++++++++++++++++++--- kernel/futex/syscalls.c | 41 +++++++++++++++++ 5 files changed, 188 insertions(+), 7 deletions(-)
diff --git a/include/linux/futex.h b/include/linux/futex.h index 322851e4a703..3dba249bcd32 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -5,6 +5,7 @@ #include <linux/sched.h> #include <linux/ktime.h> #include <linux/mm_types.h> +#include <linux/compat.h>
#include <uapi/linux/futex.h>
@@ -62,12 +63,35 @@ enum { FUTEX_STATE_DEAD, };
+#define FUTEX_ROBUST_LIST_NATIVE_IDX 0 +#define FUTEX_ROBUST_LIST_COMPAT_IDX 1 +#define FUTEX_ROBUST_LIST2_IDX 2 +#define FUTEX_ROBUST_LISTS_PER_USER 8 +#define FUTEX_ROBUST_LIST2_MAX_IDX (FUTEX_ROBUST_LIST2_IDX + FUTEX_ROBUST_LISTS_PER_USER) + +/* + * List entries without _32BIT flag are using the native machine size + */ +#define FUTEX_ROBUST_LIST_ENTRY_INUSE 0x1UL +#define FUTEX_ROBUST_LIST_ENTRY_32BIT 0x2UL +#define FUTEX_ROBUST_LIST_ENTRY_MASK (~0x3UL) + +static inline bool futex_in_32bit_syscall(void) +{ +#ifdef CONFIG_X86 + return !IS_ENABLED(CONFIG_64BIT) || in_32bit_syscall(); +#else + return !IS_ENABLED(CONFIG_64BIT); +#endif +} + static inline void futex_init_task(struct task_struct *tsk) { tsk->robust_list = NULL; #ifdef CONFIG_COMPAT tsk->robust_list32 = NULL; #endif + tsk->futex_robust_lists = NULL; INIT_LIST_HEAD(&tsk->pi_state_list); tsk->pi_state_cache = NULL; tsk->futex_state = FUTEX_STATE_OK; @@ -82,6 +106,8 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3); int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4);
+int futex_robust_list_set(uintptr_t head, enum robust_list2_cmd cmd, unsigned int index); + #ifdef CONFIG_FUTEX_PRIVATE_HASH int futex_hash_allocate_default(void); void futex_hash_free(struct mm_struct *mm); diff --git a/include/linux/sched.h b/include/linux/sched.h index 76cabfab5b73..de2f3cbe4953 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1331,6 +1331,7 @@ struct task_struct { #endif #ifdef CONFIG_FUTEX struct robust_list_head __user *robust_list; + uintptr_t *futex_robust_lists; #ifdef CONFIG_COMPAT struct robust_list_head32 __user *robust_list32; #endif diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h index 86efb089893d..2ba5c0c3bb59 100644 --- a/include/uapi/linux/futex.h +++ b/include/uapi/linux/futex.h @@ -163,6 +163,22 @@ struct robust_list_head32 { __u32 list_op_pending; };
+/* + * Commands for set_robust_list2 syscall + */ +enum robust_list2_cmd { + FUTEX_ROBUST_LIST_CMD_SET_64, + FUTEX_ROBUST_LIST_CMD_SET_32, + FUTEX_ROBUST_LIST_CMD_LIST_LIMIT, + FUTEX_ROBUST_LIST_CMD_USER_MAX, + + /* + * Kernel internal, rejected for user space + */ + FUTEX_ROBUST_LIST_SET_NATIVE = 128, + FUTEX_ROBUST_LIST_SET_COMPAT, +}; + /* * Are there any waiters for this robust futex: */ diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 136639897ff9..14d8a7176367 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -71,6 +71,57 @@ struct futex_private_hash { struct futex_hash_bucket queues[]; };
+int futex_robust_list_set(uintptr_t head, enum robust_list2_cmd cmd, + unsigned int index) +{ + uintptr_t entry = FUTEX_ROBUST_LIST_ENTRY_INUSE; + uintptr_t *rl = current->futex_robust_lists; + + if (!rl) { + rl = kcalloc(FUTEX_ROBUST_LIST2_MAX_IDX, sizeof(*rl), GFP_KERNEL); + if (!rl) + return -ENOMEM; + + scoped_guard(mutex, ¤t->futex_exit_mutex) { + /* check if another thread set the list before us */ + if (current->futex_robust_lists) { + kfree(rl); + rl = current->futex_robust_lists; + } else { + current->futex_robust_lists = rl; + } + } + + } + + switch (cmd) { + case FUTEX_ROBUST_LIST_CMD_SET_64: + if (futex_in_32bit_syscall()) + return -EINVAL; + break; + case FUTEX_ROBUST_LIST_CMD_SET_32: + entry |= FUTEX_ROBUST_LIST_ENTRY_32BIT; + break; + case FUTEX_ROBUST_LIST_SET_NATIVE: + index = FUTEX_ROBUST_LIST_NATIVE_IDX; + break; + case FUTEX_ROBUST_LIST_SET_COMPAT: + if (!IS_ENABLED(CONFIG_64BIT)) + return -EINVAL; + index = FUTEX_ROBUST_LIST_COMPAT_IDX; + entry |= FUTEX_ROBUST_LIST_ENTRY_32BIT; + break; + default: + return -EINVAL; + } + + entry |= head; + scoped_guard(mutex, ¤t->futex_exit_mutex) + rl[index] = entry; + + return 0; +} + /* * Fault injections for futexes. */ @@ -1150,9 +1201,8 @@ static inline int fetch_robust_entry(struct robust_list __user **entry, * * We silently return on any sign of list-walking problem. */ -static void exit_robust_list(struct task_struct *curr) +static void exit_robust_list(struct task_struct *curr, struct robust_list_head __user *head) { - struct robust_list_head __user *head = curr->robust_list; struct robust_list __user *entry, *next_entry, *pending; unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; unsigned int next_pi; @@ -1244,9 +1294,8 @@ fetch_robust_entry32(u32 *uentry, struct robust_list __user **entry, * * We silently return on any sign of list-walking problem. */ -static void exit_robust_list32(struct task_struct *curr) +static void exit_robust_list32(struct task_struct *curr, struct robust_list_head32 __user *head) { - struct robust_list_head32 __user *head = curr->robust_list32; struct robust_list __user *entry, *next_entry, *pending; unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; unsigned int next_pi; @@ -1311,7 +1360,15 @@ static void exit_robust_list32(struct task_struct *curr) handle_futex_death(uaddr, curr, pip, HANDLE_DEATH_PENDING); } } -#endif + +#else + +static void exit_robust_list32(struct task_struct *curr, struct robust_list_head32 __user *head) +{ + pr_crit("32-bit kernel should never call %s", __func__); +} + +#endif /* CONFIG_64BIT */
#ifdef CONFIG_FUTEX_PI
@@ -1404,20 +1461,60 @@ static void exit_pi_state_list(struct task_struct *curr) static inline void exit_pi_state_list(struct task_struct *curr) { } #endif
+static void exit_robust_lists(struct task_struct *tsk) +{ + uintptr_t *rl = tsk->futex_robust_lists; + + tsk->futex_robust_lists = NULL; + + for (unsigned int idx = 0; idx < FUTEX_ROBUST_LIST2_MAX_IDX; idx++) { + uintptr_t entry = rl[idx]; + + if (!(entry & FUTEX_ROBUST_LIST_ENTRY_MASK)) + continue; + + /* + * If the list type is the same as the kernel bitness, always + * calls exit_robust_list(). exit_robust_list32() is only for + * 32-bit lists in a 64-bit kernel. + */ + if (IS_ENABLED(CONFIG_64BIT) && (entry & FUTEX_ROBUST_LIST_ENTRY_32BIT)) { + struct robust_list_head32 __user *head; + + entry &= FUTEX_ROBUST_LIST_ENTRY_MASK; + + head = (__force struct robust_list_head32 __user *)entry; + exit_robust_list32(tsk, head); + } else { + struct robust_list_head __user *head; + + entry &= FUTEX_ROBUST_LIST_ENTRY_MASK; + + head = (__force struct robust_list_head __user *)entry; + exit_robust_list(tsk, head); + } + } + + kfree(rl); +} + static void futex_cleanup(struct task_struct *tsk) { if (unlikely(tsk->robust_list)) { - exit_robust_list(tsk); + exit_robust_list(tsk, tsk->robust_list); tsk->robust_list = NULL; }
#ifdef CONFIG_64BIT if (unlikely(tsk->robust_list32)) { - exit_robust_list32(tsk); + exit_robust_list32(tsk, tsk->robust_list32); tsk->robust_list32 = NULL; } #endif
+ if (unlikely(tsk->futex_robust_lists)) + exit_robust_lists(tsk); + if (unlikely(!list_empty(&tsk->pi_state_list))) exit_pi_state_list(tsk); } diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index 1de8ff230d54..0b7fa88aa34c 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -109,6 +109,47 @@ SYSCALL_DEFINE3(get_robust_list, int, pid, return put_user(head, head_ptr); }
+SYSCALL_DEFINE4(set_robust_list2, struct robust_list_head *, head, unsigned int, + index, unsigned int, cmd, unsigned int, flags) +{ + uintptr_t entry = (__force uintptr_t)head; + size_t align = sizeof(u32); + + if (flags) + return -EINVAL; + + if (cmd >= FUTEX_ROBUST_LIST_CMD_USER_MAX) + return -EINVAL; + + if (index >= FUTEX_ROBUST_LISTS_PER_USER) + return -EINVAL; + + /* + * The first two indexes are reserved for the kernel to be used with the + * legacy syscall, so we hide them from userspace. + * + * We map [0, FUTEX_ROBUST_LISTS_PER_USER) to + * [FUTEX_ROBUST_LIST2_IDX, FUTEX_ROBUST_LIST2_MAX_IDX) + */ + index += FUTEX_ROBUST_LIST2_IDX; + + switch (cmd) { + case FUTEX_ROBUST_LIST_CMD_SET_64: + if (futex_in_32bit_syscall()) + return -EOPNOTSUPP; + align = sizeof(u64); + fallthrough; + case FUTEX_ROBUST_LIST_CMD_SET_32: + if (entry % align) + return -EINVAL; + return futex_robust_list_set(entry, cmd, index); + case FUTEX_ROBUST_LIST_CMD_LIST_LIMIT: + return FUTEX_ROBUST_LISTS_PER_USER; + } + + return -EINVAL; +} + long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3) {
As in the original robust list interface, to pair with set_robust_list2(), create a get_robust_list2() syscall with the following signature:
get_robust_list2(int pid, void __user **head_ptr, unsigned int index, unsigned int flags)
- `pid` sets with task's list should be returned. If is 0, it gets the list of the calling task. - `index` is the index of the list to get - `flags` is unused but can be used for expanding the interface
Signed-off-by: André Almeida andrealmeid@igalia.com --- For some reason I wasn't able to use put_user() for 32-bit lists.. it kept corrupting the value due to wrong write size I believe. copy_to_user() worked fine nonetheless. --- kernel/futex/syscalls.c | 61 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 57 insertions(+), 4 deletions(-)
diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index 0b7fa88aa34c..f730d16632fc 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -48,7 +48,7 @@ static inline void __user *futex_task_robust_list(struct task_struct *p, bool co return p->robust_list; }
-static void __user *futex_get_robust_list_common(int pid, bool compat) +static void __user *futex_get_robust_list_common(int pid, bool compat, int index) { struct task_struct *p = current; void __user *head; @@ -75,7 +75,15 @@ static void __user *futex_get_robust_list_common(int pid, bool compat) if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) goto err_unlock;
- head = futex_task_robust_list(p, compat); + if (index >= 0) { + scoped_guard(mutex, &p->futex_exit_mutex) { + uintptr_t *rl = p->futex_robust_lists; + + head = rl ? (void __user *) rl[index] : NULL; + } + } else { + head = futex_task_robust_list(p, compat); + }
up_read(&p->signal->exec_update_lock); put_task_struct(p); @@ -99,7 +107,7 @@ SYSCALL_DEFINE3(get_robust_list, int, pid, struct robust_list_head __user * __user *, head_ptr, size_t __user *, len_ptr) { - struct robust_list_head __user *head = futex_get_robust_list_common(pid, false); + struct robust_list_head __user *head = futex_get_robust_list_common(pid, false, -1);
if (IS_ERR(head)) return PTR_ERR(head); @@ -150,6 +158,51 @@ SYSCALL_DEFINE4(set_robust_list2, struct robust_list_head *, head, unsigned int, return -EINVAL; }
+SYSCALL_DEFINE4(get_robust_list2, int, pid, + void __user * __user *, head_ptr, + unsigned int, index, unsigned int, flags) +{ + void __user *entry_ptr; + uintptr_t entry; + + if (index >= FUTEX_ROBUST_LISTS_PER_USER) + return -EINVAL; + + if (flags) + return -EINVAL; + + /* + * The first two indexes are reserved for the kernel to be used with the + * legacy syscall, so we hide them from userspace. + * + * We map [0, FUTEX_ROBUST_LISTS_PER_USER) to + * [FUTEX_ROBUST_LIST2_IDX, FUTEX_ROBUST_LIST2_MAX_IDX) + */ + index += FUTEX_ROBUST_LIST2_IDX; + + entry_ptr = futex_get_robust_list_common(pid, false, index); + if (IS_ERR(entry_ptr)) + return PTR_ERR(entry_ptr); + + entry = (uintptr_t) entry_ptr; + + if (entry & FUTEX_ROBUST_LIST_ENTRY_32BIT) { + entry &= FUTEX_ROBUST_LIST_ENTRY_MASK; + + if (copy_to_user(head_ptr, &entry, sizeof(u32))) + return -EFAULT; + + return 0; + } else { + struct robust_list_head *head; + + entry &= FUTEX_ROBUST_LIST_ENTRY_MASK; + head = (__force struct robust_list_head __user *)entry; + + return put_user(head, head_ptr); + } +} + long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3) { @@ -524,7 +577,7 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, compat_uptr_t __user *, head_ptr, compat_size_t __user *, len_ptr) { - struct robust_list_head32 __user *head = futex_get_robust_list_common(pid, true); + struct robust_list_head32 __user *head = futex_get_robust_list_common(pid, true, -1);
if (IS_ERR(head)) return PTR_ERR(head);
Wire up the new set_robust_list2 syscall in all available architectures.
Signed-off-by: André Almeida andrealmeid@igalia.com --- arch/alpha/kernel/syscalls/syscall.tbl | 1 + arch/arm/tools/syscall.tbl | 1 + arch/m68k/kernel/syscalls/syscall.tbl | 1 + arch/microblaze/kernel/syscalls/syscall.tbl | 1 + arch/mips/kernel/syscalls/syscall_n32.tbl | 1 + arch/mips/kernel/syscalls/syscall_n64.tbl | 1 + arch/mips/kernel/syscalls/syscall_o32.tbl | 1 + arch/parisc/kernel/syscalls/syscall.tbl | 1 + arch/powerpc/kernel/syscalls/syscall.tbl | 1 + arch/s390/kernel/syscalls/syscall.tbl | 1 + arch/sh/kernel/syscalls/syscall.tbl | 1 + arch/sparc/kernel/syscalls/syscall.tbl | 1 + arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + arch/xtensa/kernel/syscalls/syscall.tbl | 1 + include/uapi/asm-generic/unistd.h | 5 ++++- kernel/sys_ni.c | 1 + scripts/syscall.tbl | 1 + 18 files changed, 21 insertions(+), 1 deletion(-)
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 16dca28ebf17..d0cb7b902cc6 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -509,3 +509,4 @@ 577 common open_tree_attr sys_open_tree_attr 578 common file_getattr sys_file_getattr 579 common file_setattr sys_file_setattr +580 common set_robust_list2 sys_set_robust_list2 diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index b07e699aaa3c..910e6e14ccf0 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -484,3 +484,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common set_robust_list2 sys_set_robust_list2 diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index f41d38dfbf13..eee3f320483d 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -469,3 +469,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common set_robust_list2 sys_set_robust_list2 diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index 580af574fe73..6c69d8ebbc38 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -475,3 +475,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common set_robust_list2 sys_set_robust_list2 diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index d824ffe9a014..f70db3741b0e 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -408,3 +408,4 @@ 467 n32 open_tree_attr sys_open_tree_attr 468 n32 file_getattr sys_file_getattr 469 n32 file_setattr sys_file_setattr +470 n32 set_robust_list2 sys_set_robust_list2 diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 7a7049c2c307..9480488f9495 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -384,3 +384,4 @@ 467 n64 open_tree_attr sys_open_tree_attr 468 n64 file_getattr sys_file_getattr 469 n64 file_setattr sys_file_setattr +470 n64 set_robust_list2 sys_set_robust_list2 diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index d330274f0601..2761c9cd8946 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -457,3 +457,4 @@ 467 o32 open_tree_attr sys_open_tree_attr 468 o32 file_getattr sys_file_getattr 469 o32 file_setattr sys_file_setattr +470 o32 set_robust_list2 sys_set_robust_list2 diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 88a788a7b18d..eb37fda5c48f 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -468,3 +468,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common set_robust_list2 sys_set_robust_list2 diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index b453e80dfc00..472bebec449d 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -560,3 +560,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common set_robust_list2 sys_set_robust_list2 diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 8a6744d658db..ba7fac304941 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -472,3 +472,4 @@ 467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr sys_file_setattr +470 common set_robust_list2 sys_set_robust_list2 sys_set_robust_list2 diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 5e9c9eff5539..c05c94a742be 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -473,3 +473,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common set_robust_list2 sys_set_robust_list2 diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index ebb7d06d1044..3a59f3008325 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -515,3 +515,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common set_robust_list2 sys_set_robust_list2 diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 4877e16da69a..e9d6e1a1d777 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -475,3 +475,4 @@ 467 i386 open_tree_attr sys_open_tree_attr 468 i386 file_getattr sys_file_getattr 469 i386 file_setattr sys_file_setattr +470 i386 set_robust_list2 sys_set_robust_list2 diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index ced2a1deecd7..8fdcf090300d 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -394,6 +394,7 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common set_robust_list2 sys_set_robust_list2
# # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 374e4cb788d8..d7bb6b9104dd 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -440,3 +440,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common set_robust_list2 sys_set_robust_list2 diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 04e0077fb4c9..44fc87287983 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -858,8 +858,11 @@ __SYSCALL(__NR_file_getattr, sys_file_getattr) #define __NR_file_setattr 469 __SYSCALL(__NR_file_setattr, sys_file_setattr)
+#define __NR_set_robust_list2 470 +__SYSCALL(__NR_set_robust_list2, sys_set_robust_list2) + #undef __NR_syscalls -#define __NR_syscalls 470 +#define __NR_syscalls 471
/* * 32 bit systems traditionally used different diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index bf5d05c635ff..0ca2cfe69b11 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -172,6 +172,7 @@ COND_SYSCALL_COMPAT(fadvise64_64); COND_SYSCALL(lsm_get_self_attr); COND_SYSCALL(lsm_set_self_attr); COND_SYSCALL(lsm_list_modules); +COND_SYSCALL(set_robust_list2);
/* CONFIG_MMU only */ COND_SYSCALL(swapon); diff --git a/scripts/syscall.tbl b/scripts/syscall.tbl index d1ae5e92c615..58c334aa8922 100644 --- a/scripts/syscall.tbl +++ b/scripts/syscall.tbl @@ -410,3 +410,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common set_robust_list2 sys_set_robust_list2
Wire up the new get_robust_list2 syscall in all available architectures.
Signed-off-by: André Almeida andrealmeid@igalia.com --- arch/alpha/kernel/syscalls/syscall.tbl | 1 + arch/arm/tools/syscall.tbl | 1 + arch/m68k/kernel/syscalls/syscall.tbl | 1 + arch/microblaze/kernel/syscalls/syscall.tbl | 1 + arch/mips/kernel/syscalls/syscall_n32.tbl | 1 + arch/mips/kernel/syscalls/syscall_n64.tbl | 1 + arch/mips/kernel/syscalls/syscall_o32.tbl | 1 + arch/parisc/kernel/syscalls/syscall.tbl | 1 + arch/powerpc/kernel/syscalls/syscall.tbl | 1 + arch/s390/kernel/syscalls/syscall.tbl | 1 + arch/sh/kernel/syscalls/syscall.tbl | 1 + arch/sparc/kernel/syscalls/syscall.tbl | 1 + arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + arch/xtensa/kernel/syscalls/syscall.tbl | 1 + include/uapi/asm-generic/unistd.h | 4 +++- kernel/sys_ni.c | 1 + 17 files changed, 19 insertions(+), 1 deletion(-)
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index d0cb7b902cc6..b4a42beda6db 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -510,3 +510,4 @@ 578 common file_getattr sys_file_getattr 579 common file_setattr sys_file_setattr 580 common set_robust_list2 sys_set_robust_list2 +581 common get_robust_list2 sys_get_robust_list2 diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 910e6e14ccf0..d4a4d8446cb0 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -485,3 +485,4 @@ 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr 470 common set_robust_list2 sys_set_robust_list2 +471 common get_robust_list2 sys_get_robust_list2 diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index eee3f320483d..c2f1c5a3313c 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -470,3 +470,4 @@ 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr 470 common set_robust_list2 sys_set_robust_list2 +471 common get_robust_list2 sys_get_robust_list2 diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index 6c69d8ebbc38..1389dd194eec 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -476,3 +476,4 @@ 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr 470 common set_robust_list2 sys_set_robust_list2 +471 common get_robust_list2 sys_get_robust_list2 diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index f70db3741b0e..e149d2ddbc2f 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -409,3 +409,4 @@ 468 n32 file_getattr sys_file_getattr 469 n32 file_setattr sys_file_setattr 470 n32 set_robust_list2 sys_set_robust_list2 +471 n32 get_robust_list2 sys_get_robust_list2 diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 9480488f9495..7ddddc89a751 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -385,3 +385,4 @@ 468 n64 file_getattr sys_file_getattr 469 n64 file_setattr sys_file_setattr 470 n64 set_robust_list2 sys_set_robust_list2 +471 n64 get_robust_list2 sys_get_robust_list2 diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 2761c9cd8946..c0a5ebafed1a 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -458,3 +458,4 @@ 468 o32 file_getattr sys_file_getattr 469 o32 file_setattr sys_file_setattr 470 o32 set_robust_list2 sys_set_robust_list2 +471 o32 get_robust_list2 sys_get_robust_list2 diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index eb37fda5c48f..4c6cb64ec113 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -469,3 +469,4 @@ 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr 470 common set_robust_list2 sys_set_robust_list2 +471 common get_robust_list2 sys_get_robust_list2 diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 472bebec449d..1475fa6b3ee3 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -561,3 +561,4 @@ 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr 470 common set_robust_list2 sys_set_robust_list2 +470 common get_robust_list2 sys_get_robust_list2 diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index ba7fac304941..b8161ee922ef 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -473,3 +473,4 @@ 468 common file_getattr sys_file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr sys_file_setattr 470 common set_robust_list2 sys_set_robust_list2 sys_set_robust_list2 +471 common get_robust_list2 sys_get_robust_list2 sys_get_robust_list2 diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index c05c94a742be..566baa152634 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -474,3 +474,4 @@ 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr 470 common set_robust_list2 sys_set_robust_list2 +471 common get_robust_list2 sys_get_robust_list2 diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 3a59f3008325..fb3844c17711 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -516,3 +516,4 @@ 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr 470 common set_robust_list2 sys_set_robust_list2 +471 common get_robust_list2 sys_get_robust_list2 diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index e9d6e1a1d777..0df93458ef37 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -476,3 +476,4 @@ 468 i386 file_getattr sys_file_getattr 469 i386 file_setattr sys_file_setattr 470 i386 set_robust_list2 sys_set_robust_list2 +471 i386 get_robust_list2 sys_get_robust_list2 diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 8fdcf090300d..e7fdcc3d6e52 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -395,6 +395,7 @@ 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr 470 common set_robust_list2 sys_set_robust_list2 +471 common get_robust_list2 sys_get_robust_list2
# # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index d7bb6b9104dd..bd63dbc78c0e 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -441,3 +441,4 @@ 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr 470 common set_robust_list2 sys_set_robust_list2 +471 common get_robust_list2 sys_get_robust_list2 diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 44fc87287983..9539e893c9ac 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -860,9 +860,11 @@ __SYSCALL(__NR_file_setattr, sys_file_setattr)
#define __NR_set_robust_list2 470 __SYSCALL(__NR_set_robust_list2, sys_set_robust_list2) +#define __NR_get_robust_list2 471 +__SYSCALL(__NR_get_robust_list2, sys_get_robust_list2)
#undef __NR_syscalls -#define __NR_syscalls 471 +#define __NR_syscalls 472
/* * 32 bit systems traditionally used different diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 0ca2cfe69b11..0a7f7634446c 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -173,6 +173,7 @@ COND_SYSCALL(lsm_get_self_attr); COND_SYSCALL(lsm_set_self_attr); COND_SYSCALL(lsm_list_modules); COND_SYSCALL(set_robust_list2); +COND_SYSCALL(get_robust_list2);
/* CONFIG_MMU only */ COND_SYSCALL(swapon);
Hi André,
kernel test robot noticed the following build warnings:
[auto build test WARNING on c42ba5a87bdccbca11403b7ca8bad1a57b833732]
url: https://github.com/intel-lab-lkp/linux/commits/Andr-Almeida/futex-Use-explic... base: c42ba5a87bdccbca11403b7ca8bad1a57b833732 patch link: https://lore.kernel.org/r/20251122-tonyk-robust_futex-v6-6-05fea005a0fd%40ig... patch subject: [PATCH v6 6/9] futex: Wire up get_robust_list2 syscall config: arc-allnoconfig (https://download.01.org/0day-ci/archive/20251122/202511221454.rsysOoSt-lkp@i...) compiler: arc-linux-gcc (GCC) 15.1.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251122/202511221454.rsysOoSt-lkp@i...)
If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot lkp@intel.com | Closes: https://lore.kernel.org/oe-kbuild-all/202511221454.rsysOoSt-lkp@intel.com/
All warnings (new ones prefixed by >>):
<stdin>:1627:2: warning: #warning syscall get_robust_list2 not implemented [-Wcpp]
--
<stdin>:1627:2: warning: #warning syscall get_robust_list2 not implemented [-Wcpp]
Hi André,
kernel test robot noticed the following build errors:
[auto build test ERROR on c42ba5a87bdccbca11403b7ca8bad1a57b833732]
url: https://github.com/intel-lab-lkp/linux/commits/Andr-Almeida/futex-Use-explic... base: c42ba5a87bdccbca11403b7ca8bad1a57b833732 patch link: https://lore.kernel.org/r/20251122-tonyk-robust_futex-v6-6-05fea005a0fd%40ig... patch subject: [PATCH v6 6/9] futex: Wire up get_robust_list2 syscall config: powerpc-allnoconfig (https://download.01.org/0day-ci/archive/20251122/202511221516.vYMzvSVO-lkp@i...) compiler: powerpc-linux-gcc (GCC) 15.1.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251122/202511221516.vYMzvSVO-lkp@i...)
If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot lkp@intel.com | Closes: https://lore.kernel.org/oe-kbuild-all/202511221516.vYMzvSVO-lkp@intel.com/
All errors (new ones prefixed by >>):
error: arch/powerpc/kernel/syscalls/syscall.tbl: syscall table is not sorted or duplicates the same syscall number
make[3]: *** [arch/powerpc/kernel/syscalls/Makefile:31: arch/powerpc/include/generated/asm/syscall_table_64.h] Error 1 make[3]: *** Deleting file 'arch/powerpc/include/generated/asm/syscall_table_64.h'
error: arch/powerpc/kernel/syscalls/syscall.tbl: syscall table is not sorted or duplicates the same syscall number
make[3]: *** [arch/powerpc/kernel/syscalls/Makefile:35: arch/powerpc/include/generated/asm/syscall_table_spu.h] Error 1 make[3]: *** Deleting file 'arch/powerpc/include/generated/asm/syscall_table_spu.h'
error: arch/powerpc/kernel/syscalls/syscall.tbl: syscall table is not sorted or duplicates the same syscall number
make[3]: *** [arch/powerpc/kernel/syscalls/Makefile:27: arch/powerpc/include/generated/asm/syscall_table_32.h] Error 1 make[3]: *** Deleting file 'arch/powerpc/include/generated/asm/syscall_table_32.h' make[3]: Target 'all' not remade because of errors. make[2]: *** [arch/powerpc/Makefile:397: archheaders] Error 2 make[2]: Target 'prepare' not remade because of errors. make[1]: *** [Makefile:248: __sub-make] Error 2 make[1]: Target 'prepare' not remade because of errors. make: *** [Makefile:248: __sub-make] Error 2 make: Target 'prepare' not remade because of errors.
Hi André,
kernel test robot noticed the following build warnings:
[auto build test WARNING on c42ba5a87bdccbca11403b7ca8bad1a57b833732]
url: https://github.com/intel-lab-lkp/linux/commits/Andr-Almeida/futex-Use-explic... base: c42ba5a87bdccbca11403b7ca8bad1a57b833732 patch link: https://lore.kernel.org/r/20251122-tonyk-robust_futex-v6-6-05fea005a0fd%40ig... patch subject: [PATCH v6 6/9] futex: Wire up get_robust_list2 syscall config: hexagon-allnoconfig (https://download.01.org/0day-ci/archive/20251122/202511221522.uRpEUT5O-lkp@i...) compiler: clang version 22.0.0git (https://github.com/llvm/llvm-project 9e9fe08b16ea2c4d9867fb4974edf2a3776d6ece) reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251122/202511221522.uRpEUT5O-lkp@i...)
If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot lkp@intel.com | Closes: https://lore.kernel.org/oe-kbuild-all/202511221522.uRpEUT5O-lkp@intel.com/
All warnings (new ones prefixed by >>):
<stdin>:1627:2: warning: syscall get_robust_list2 not implemented [-W#warnings]
1627 | #warning syscall get_robust_list2 not implemented | ^ 1 warning generated. --
<stdin>:1627:2: warning: syscall get_robust_list2 not implemented [-W#warnings]
1627 | #warning syscall get_robust_list2 not implemented | ^ 1 warning generated.
Reuse the same selftest for the original set_robust_list() syscall for the new set_robust_list2() syscall. Use kselftest variants feature to run the relevant tests for both interfaces. Create new test cases for checking invalid parameters, the ability to correctly set multiple lists for the same task, and to use 32-bit lists in a 64-bit task.
Signed-off-by: André Almeida andrealmeid@igalia.com --- .../selftests/futex/functional/robust_list.c | 409 +++++++++++++++++++-- 1 file changed, 387 insertions(+), 22 deletions(-)
diff --git a/tools/testing/selftests/futex/functional/robust_list.c b/tools/testing/selftests/futex/functional/robust_list.c index e7d1254e18ca..bf47e9ab2951 100644 --- a/tools/testing/selftests/futex/functional/robust_list.c +++ b/tools/testing/selftests/futex/functional/robust_list.c @@ -42,6 +42,27 @@
#define SLEEP_US 100
+#ifndef SYS_set_robust_list2 +# define SYS_set_robust_list2 470 + +enum robust_list_cmd { + FUTEX_ROBUST_LIST_CMD_SET_64, + FUTEX_ROBUST_LIST_CMD_SET_32, + FUTEX_ROBUST_LIST_CMD_LIST_LIMIT, + FUTEX_ROBUST_LIST_CMD_USER_MAX, +}; + +struct robust_list32 { + uint32_t next; +}; + +struct robust_list_head32 { + struct robust_list32 list; + int32_t futex_offset; + uint32_t list_op_pending; +}; +#endif + static pthread_barrier_t barrier, barrier2;
static int set_robust_list(struct robust_list_head *head, size_t len) @@ -54,6 +75,58 @@ static int get_robust_list(int pid, struct robust_list_head **head, size_t *len_ return syscall(SYS_get_robust_list, pid, head, len_ptr); }
+static int set_robust_list2(struct robust_list_head *head, int index, + enum robust_list_cmd cmd, unsigned int flags) +{ + return syscall(SYS_set_robust_list2, head, index, cmd, flags); +} + +static bool robust_list2_support(void) +{ + int ret = set_robust_list2(0, 0, FUTEX_ROBUST_LIST_CMD_LIST_LIMIT, 0); + + if (ret == -1 && errno == ENOSYS) + return false; + + return true; +} + +/* + * Return the set command according to the app bitness + */ +static int get_cmd_set(void) +{ + return sizeof(uintptr_t) == 8 ? FUTEX_ROBUST_LIST_CMD_SET_64 : + FUTEX_ROBUST_LIST_CMD_SET_32; +} + +FIXTURE(robust_api) {}; + +FIXTURE_VARIANT(robust_api) +{ + bool robust2; +}; + +FIXTURE_SETUP(robust_api) +{ + if (!variant->robust2) + return; + + ASSERT_NE(robust_list2_support(), false); +} + +FIXTURE_TEARDOWN(robust_api) {} + +FIXTURE_VARIANT_ADD(robust_api, robust1) +{ + .robust2 = false, +}; + +FIXTURE_VARIANT_ADD(robust_api, robust2) +{ + .robust2 = true, +}; + /* * Basic lock struct, contains just the futex word and the robust list element * Real implementations have also a *prev to easily walk in the list @@ -61,6 +134,12 @@ static int get_robust_list(int pid, struct robust_list_head **head, size_t *len_ struct lock_struct { _Atomic(unsigned int) futex; struct robust_list list; + bool robust2; +}; + +struct lock_struct32 { + _Atomic(uint32_t) futex; + struct robust_list32 list; };
/* @@ -89,20 +168,17 @@ static int create_child(int (*fn)(void *arg), void *arg) /* * Helper function to prepare and register a robust list */ -static int set_list(struct robust_list_head *head) +static int set_list(struct robust_list_head *head, bool robust2, int index) { - int ret; - - ret = set_robust_list(head, sizeof(*head)); - if (ret) - return ret; - head->futex_offset = (size_t) offsetof(struct lock_struct, futex) - (size_t) offsetof(struct lock_struct, list); head->list.next = &head->list; head->list_op_pending = NULL;
- return 0; + if (!robust2) + return set_robust_list(head, sizeof(*head)); + + return set_robust_list2(head, index, get_cmd_set(), 0); }
/* @@ -174,7 +250,7 @@ static int child_fn_lock(void *arg) struct robust_list_head head; int ret;
- ret = set_list(&head); + ret = set_list(&head, lock->robust2, 0); if (ret) { ksft_test_result_fail("set_robust_list error\n"); return ret; @@ -204,14 +280,16 @@ static int child_fn_lock(void *arg) * in the robust list and die. The parent thread will wait on this futex, and * should be waken up when the child exits. */ -TEST(test_robustness) +TEST_F(robust_api, test_robustness) { struct lock_struct lock = { .futex = 0 }; _Atomic(unsigned int) *futex = &lock.futex; - struct robust_list_head head; int ret, pid, wstatus; + struct robust_list_head head;
- ret = set_list(&head); + lock.robust2 = variant->robust2; + + ret = set_list(&head, lock.robust2, 0); ASSERT_EQ(ret, 0);
/* @@ -270,6 +348,46 @@ TEST(test_set_robust_list_invalid_size) ksft_test_result_pass("%s\n", __func__); }
+/* + * Test invalid parameters + */ +TEST(test_set_robust_list2_inval) +{ + struct robust_list_head head; + int ret, list_limit; + + if (!robust_list2_support()) { + ksft_test_result_skip("robust_list2 not supported\n"); + return; + } + + /* Bad flag */ + ret = set_robust_list2(&head, 0, get_cmd_set(), 999); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EINVAL); + + /* Bad index */ + list_limit = set_robust_list2(NULL, 0, FUTEX_ROBUST_LIST_CMD_LIST_LIMIT, 0); + ASSERT_GT(list_limit, 0); + + ret = set_robust_list2(&head, -1, get_cmd_set(), 0); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EINVAL); + + ret = set_robust_list2(&head, list_limit + 1, get_cmd_set(), 0); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EINVAL); + + /* Bad command */ + ret = set_robust_list2(&head, 0, FUTEX_ROBUST_LIST_CMD_USER_MAX, 0); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EINVAL); + + ret = set_robust_list2(&head, 0, -1, 0); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EINVAL); +} + /* * Test get_robust_list with pid = 0, getting the list of the running thread */ @@ -363,7 +481,7 @@ static int child_fn_lock_with_error(void *arg) struct robust_list_head head; int ret;
- ret = set_list(&head); + ret = set_list(&head, false, 0); if (ret) { ksft_test_result_fail("set_robust_list error\n"); return -1; @@ -388,14 +506,16 @@ static int child_fn_lock_with_error(void *arg) * earlier, just after setting list_op_pending and taking the lock, to test the * list_op_pending mechanism */ -TEST(test_set_list_op_pending) +TEST_F(robust_api, test_set_list_op_pending) { struct lock_struct lock = { .futex = 0 }; _Atomic(unsigned int) *futex = &lock.futex; - struct robust_list_head head; int ret, wstatus; + struct robust_list_head head; + + lock.robust2 = variant->robust2;
- ret = set_list(&head); + ret = set_list(&head, lock.robust2, 0); ASSERT_EQ(ret, 0);
ret = pthread_barrier_init(&barrier, NULL, 2); @@ -429,7 +549,7 @@ static int child_lock_holder(void *arg) struct robust_list_head head; int i;
- set_list(&head); + set_list(&head, locks[0].robust2, 0);
for (i = 0; i < CHILD_NR; i++) { locks[i].futex = 0; @@ -471,12 +591,19 @@ static int child_wait_lock(void *arg) * Test a robust list of more than one element. All the waiters should wake when * the holder dies */ -TEST(test_robust_list_multiple_elements) +TEST_F(robust_api, test_robust_list_multiple_elements) { struct lock_struct locks[CHILD_NR]; pid_t pids[CHILD_NR + 1]; int i, ret, wstatus;
+ if (!robust_list2_support()) { + ksft_test_result_skip("robust_list2 not supported\n"); + return; + } + + locks[0].robust2 = variant->robust2; + ret = pthread_barrier_init(&barrier, NULL, 2); ASSERT_EQ(ret, 0); ret = pthread_barrier_init(&barrier2, NULL, CHILD_NR + 1); @@ -507,13 +634,98 @@ TEST(test_robust_list_multiple_elements) ksft_test_result_pass("%s\n", __func__); }
+static int child_lock_holder_multiple_lists(void *arg) +{ + struct lock_struct *locks = arg; + struct robust_list_head *heads; + int i, list_limit; + + list_limit = set_robust_list2(NULL, 0, FUTEX_ROBUST_LIST_CMD_LIST_LIMIT, 0); + + heads = malloc(list_limit * sizeof(*heads)); + if (!heads) + return -1; + + for (i = 0; i < list_limit; i++) { + set_list(&heads[i], true, i); + locks[i].futex = 0; + mutex_lock(&locks[i], &heads[i], false); + } + + pthread_barrier_wait(&barrier); + pthread_barrier_wait(&barrier2); + + /* See comment at child_fn_lock() */ + usleep(SLEEP_US); + + return 0; +} + +/* + * Similar to test_robust_list_multiple_elements, but instead of one list with + * several elements, create several lists with one element. + */ +TEST(test_robust_list_multiple_lists) +{ + int i, ret, wstatus, list_limit; + struct lock_struct *locks; + pid_t *pids; + + if (!robust_list2_support()) { + ksft_test_result_skip("robust_list2 not supported\n"); + return; + } + + list_limit = set_robust_list2(NULL, 0, FUTEX_ROBUST_LIST_CMD_LIST_LIMIT, 0); + ASSERT_GT(list_limit, 1); + + locks = malloc(list_limit * sizeof(*locks)); + ASSERT_NE(locks, NULL); + + pids = malloc(list_limit * sizeof(*pids)); + ASSERT_NE(pids, NULL); + + ret = pthread_barrier_init(&barrier, NULL, 2); + ASSERT_EQ(ret, 0); + ret = pthread_barrier_init(&barrier2, NULL, list_limit + 1); + ASSERT_EQ(ret, 0); + + pids[0] = create_child(&child_lock_holder_multiple_lists, locks); + + /* Wait until the locker thread takes the look */ + pthread_barrier_wait(&barrier); + + for (i = 0; i < list_limit; i++) + pids[i+1] = create_child(&child_wait_lock, &locks[i]); + + /* Wait for all children to return */ + ret = 0; + + for (i = 0; i < list_limit; i++) { + waitpid(pids[i], &wstatus, 0); + if (WEXITSTATUS(wstatus)) + ret = -1; + } + + pthread_barrier_destroy(&barrier); + pthread_barrier_destroy(&barrier2); + + /* Pass only if the child hasn't return error */ + if (!ret) + ksft_test_result_pass("%s\n", __func__); + + free(locks); + free(pids); +} + static int child_circular_list(void *arg) { - static struct robust_list_head head; + struct robust_list_head head; struct lock_struct a, b, c; + bool robust2 = *(bool *) arg; int ret;
- ret = set_list(&head); + ret = set_list(&head, robust2, 0); if (ret) { ksft_test_result_fail("set_list error\n"); return -1; @@ -536,11 +748,12 @@ static int child_circular_list(void *arg) * while processing it so it won't be trapped in an infinite loop while handling * a process exit */ -TEST(test_circular_list) +TEST_F(robust_api, test_circular_list) { int wstatus; + bool robust2 = variant->robust2;
- create_child(child_circular_list, NULL); + create_child(child_circular_list, &robust2);
wait(&wstatus);
@@ -549,4 +762,156 @@ TEST(test_circular_list) ksft_test_result_pass("%s\n", __func__); }
+/* + * 32-bit version of child_lock_holder. + */ +static int child_lock_holder32(void *arg) +{ + struct lock_struct32 *locks = arg; + struct robust_list_head32 *head; + pid_t tid = gettid(); + int i, ret; + + head = mmap((void *)0x10000, sizeof(*head), PROT_READ | PROT_WRITE, + MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + if (!head || ((uint32_t)(uintptr_t) head) > 0x7FFFFFFF) { + ksft_test_result_fail("child_lock_holder32 error\n"); + return -1; + } + + head->futex_offset = (uint32_t) ((size_t) offsetof(struct lock_struct32, futex) - + (size_t) offsetof(struct lock_struct32, list)); + head->list.next = (uint32_t)(uintptr_t) &head->list; + head->list_op_pending = (uint32_t)(uintptr_t) NULL; + + ret = set_robust_list2((struct robust_list_head *) head, 0, + FUTEX_ROBUST_LIST_CMD_SET_32, 0); + if (ret) { + ksft_test_result_fail("set_robust_list2 error\n"); + return -1; + } + + /* + * Take all the locks and insert them in the list + */ + for (i = 0; i < CHILD_NR; i++) { + struct robust_list32 *list = &head->list; + + locks[i].futex = tid; + + while (list->next != (uint32_t)(uintptr_t) &head->list) + list = (struct robust_list32 *)(uintptr_t) list->next; + + list->next = (uint32_t)(uintptr_t) &locks[i].list; + locks[i].list.next = (uint32_t)(uintptr_t) &head->list; + } + + pthread_barrier_wait(&barrier); + pthread_barrier_wait(&barrier2); + + /* See comment at child_fn_lock() */ + usleep(SLEEP_US); + + /* Exit holding all the locks */ + return 0; +} + +static int child_wait_lock32(void *arg) +{ + struct lock_struct32 *lock = arg; + _Atomic(unsigned int) *futex; + struct timespec to; + pid_t tid; + int ret; + + futex = &lock->futex; + + pthread_barrier_wait(&barrier2); + + to.tv_sec = FUTEX_TIMEOUT; + to.tv_nsec = 0; + + tid = atomic_load(futex); + + /* Kernel ignores futexes without the waiters flag */ + tid |= FUTEX_WAITERS; + atomic_store(futex, tid); + + ret = futex_wait((futex_t *) futex, tid, &to, 0); + + if (ret) { + ksft_test_result_fail("futex_wait error\n"); + return -1; + } + + if (!(lock->futex & FUTEX_OWNER_DIED)) { + ksft_test_result_fail("futex not marked with FUTEX_OWNER_DIED\n"); + return -1; + } + + return 0; +} + +/* + * Test to create a 32-bit robust list in a 64-bit kernel. Replicate + * test_robust_list_multiple_elements, but it's simplified: don't do all the + * mutex lock dance, just insert futexes in the list and check if the kernel + * correctly walks the list and wake the threads + */ +TEST(test_32bit_lists) +{ + struct lock_struct32 *locks; + pid_t pids[CHILD_NR + 1]; + int i, ret, wstatus; + + if (sizeof(uintptr_t) != 8) { + ksft_test_result_skip("Test only for 64-bit\n"); + return; + } + + if (!robust_list2_support()) { + ksft_test_result_skip("robust_list2 not supported\n"); + return; + } + + locks = mmap((void *)0x20000, sizeof(*locks) * CHILD_NR, + PROT_READ | PROT_WRITE, MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_ANONYMOUS, + -1, 0); + + ASSERT_NE(locks, NULL); + ASSERT_LT((uintptr_t) locks, 0x7FFFFFFF); + + ret = pthread_barrier_init(&barrier, NULL, 2); + ASSERT_EQ(ret, 0); + ret = pthread_barrier_init(&barrier2, NULL, CHILD_NR + 1); + ASSERT_EQ(ret, 0); + + pids[0] = create_child(&child_lock_holder32, locks); + + /* Wait until the locker thread takes the look */ + pthread_barrier_wait(&barrier); + + for (i = 0; i < CHILD_NR; i++) + pids[i+1] = create_child(&child_wait_lock32, &locks[i]); + + /* Wait for all children to return */ + ret = 0; + + for (i = 0; i < CHILD_NR; i++) { + waitpid(pids[i], &wstatus, 0); + if (WEXITSTATUS(wstatus)) + ret = -1; + } + + pthread_barrier_destroy(&barrier); + pthread_barrier_destroy(&barrier2); + + /* Pass only if the child hasn't return error */ + if (!ret) + ksft_test_result_pass("%s\n", __func__); + + munmap(locks, sizeof(*locks) * CHILD_NR); +} + TEST_HARNESS_MAIN
Reuse the same selftest for the original set_robust_list() syscall for the new set_robust_list2() syscall. Use kselftest variants feature to run the relevant tests for both interfaces. Create a new test case to get different lists from the same task.
Signed-off-by: André Almeida andrealmeid@igalia.com --- .../selftests/futex/functional/robust_list.c | 95 ++++++++++++++++++---- 1 file changed, 81 insertions(+), 14 deletions(-)
diff --git a/tools/testing/selftests/futex/functional/robust_list.c b/tools/testing/selftests/futex/functional/robust_list.c index bf47e9ab2951..e6b26d7b9502 100644 --- a/tools/testing/selftests/futex/functional/robust_list.c +++ b/tools/testing/selftests/futex/functional/robust_list.c @@ -44,6 +44,7 @@
#ifndef SYS_set_robust_list2 # define SYS_set_robust_list2 470 +# define SYS_get_robust_list2 471
enum robust_list_cmd { FUTEX_ROBUST_LIST_CMD_SET_64, @@ -81,6 +82,12 @@ static int set_robust_list2(struct robust_list_head *head, int index, return syscall(SYS_set_robust_list2, head, index, cmd, flags); }
+static int get_robust_list2(int pid, struct robust_list_head **head, + unsigned int index, unsigned int flags) +{ + return syscall(SYS_get_robust_list2, pid, head, index, flags); +} + static bool robust_list2_support(void) { int ret = set_robust_list2(0, 0, FUTEX_ROBUST_LIST_CMD_LIST_LIMIT, 0); @@ -181,6 +188,23 @@ static int set_list(struct robust_list_head *head, bool robust2, int index) return set_robust_list2(head, index, get_cmd_set(), 0); }
+static int get_list(pid_t pid, struct robust_list_head **head, bool robust2, int index) +{ + int ret; + + if (!robust2) { + size_t len_ptr; + + ret = get_robust_list(pid, head, &len_ptr); + if (sizeof(**head) != len_ptr) + return -EINVAL; + + return ret; + } + + return get_robust_list2(pid, head, index, 0); +} + /* * A basic (and incomplete) mutex lock function with robustness */ @@ -391,37 +415,44 @@ TEST(test_set_robust_list2_inval) /* * Test get_robust_list with pid = 0, getting the list of the running thread */ -TEST(test_get_robust_list_self) +TEST_F(robust_api, test_get_robust_list_self) { struct robust_list_head head, head2, *get_head; - size_t head_size = sizeof(head), len_ptr; + bool robust2 = variant->robust2; int ret;
- ret = set_robust_list(&head, head_size); + ret = set_list(&head, robust2, 0); ASSERT_EQ(ret, 0);
- ret = get_robust_list(0, &get_head, &len_ptr); + ret = get_list(0, &get_head, robust2, 0); ASSERT_EQ(ret, 0); ASSERT_EQ(get_head, &head); - ASSERT_EQ(head_size, len_ptr);
- ret = set_robust_list(&head2, head_size); + ret = set_list(&head2, robust2, 0); ASSERT_EQ(ret, 0);
- ret = get_robust_list(0, &get_head, &len_ptr); + ret = get_list(0, &get_head, robust2, 0); ASSERT_EQ(ret, 0); ASSERT_EQ(get_head, &head2); - ASSERT_EQ(head_size, len_ptr);
ksft_test_result_pass("%s\n", __func__); }
+struct child_arg_struct { + struct robust_list_head *head; + bool robust2; +}; + static int child_list(void *arg) { - struct robust_list_head *head = arg; + struct child_arg_struct *child = arg; + struct robust_list_head *head; + bool robust2 = child->robust2; int ret;
- ret = set_robust_list(head, sizeof(*head)); + head = child->head; + + ret = set_list(head, robust2, 0); if (ret) { ksft_test_result_fail("set_robust_list error\n"); return -1; @@ -444,23 +475,26 @@ static int child_list(void *arg) * parent * 2) the child thread still alive when we try to get the list from it */ -TEST(test_get_robust_list_child) +TEST_F(robust_api, test_get_robust_list_child) { struct robust_list_head head, *get_head; + bool robust2 = variant->robust2; + struct child_arg_struct child = + {.robust2 = robust2, .head = &head}; int ret, wstatus; - size_t len_ptr; pid_t tid;
+ ret = pthread_barrier_init(&barrier, NULL, 2); ret = pthread_barrier_init(&barrier2, NULL, 2); ASSERT_EQ(ret, 0);
- tid = create_child(&child_list, &head); + tid = create_child(&child_list, &child); ASSERT_NE(tid, -1);
pthread_barrier_wait(&barrier);
- ret = get_robust_list(tid, &get_head, &len_ptr); + ret = get_list(tid, &get_head, robust2, 0); ASSERT_EQ(ret, 0); ASSERT_EQ(&head, get_head);
@@ -914,4 +948,37 @@ TEST(test_32bit_lists) munmap(locks, sizeof(*locks) * CHILD_NR); }
+/* + * Test setting and getting mutiples head lists + */ +TEST(set_and_get_robust2) +{ + struct robust_list_head *head = NULL, *heads; + int i, list_limit, ret; + + if (!robust_list2_support()) { + ksft_test_result_skip("robust_list2 not supported\n"); + return; + } + + list_limit = set_robust_list2(NULL, 0, FUTEX_ROBUST_LIST_CMD_LIST_LIMIT, 0); + + heads = malloc(list_limit * sizeof(*heads)); + ASSERT_NE(heads, NULL); + + for (i = 0; i < list_limit; i++) { + ret = set_list(&heads[i], true, i); + ASSERT_EQ(ret, 0); + } + + for (i = 0; i < list_limit; i++) { + ret = get_list(0, &head, true, i); + ASSERT_EQ(ret, 0); + ASSERT_EQ(head, &heads[i]); + } + + free(heads); + ksft_test_result_pass("%s\n", __func__); +} + TEST_HARNESS_MAIN
The new robust list API internals can handle any kind of robust list, so to simplify the code, reuse the same mechanisms for the original API and when calling the original set syscall, set the head in the array of lists. The first two indexes of the array of robust lists are reserved for the original API lists, the native robust list and the compat robust list.
Signed-off-by: André Almeida andrealmeid@igalia.com --- include/linux/futex.h | 4 ---- include/linux/sched.h | 5 ----- kernel/futex/core.c | 12 ------------ kernel/futex/syscalls.c | 52 ++++++++++++++++++++++++------------------------- 4 files changed, 25 insertions(+), 48 deletions(-)
diff --git a/include/linux/futex.h b/include/linux/futex.h index 3dba249bcd32..ce27f6307c60 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -87,10 +87,6 @@ static inline bool futex_in_32bit_syscall(void)
static inline void futex_init_task(struct task_struct *tsk) { - tsk->robust_list = NULL; -#ifdef CONFIG_COMPAT - tsk->robust_list32 = NULL; -#endif tsk->futex_robust_lists = NULL; INIT_LIST_HEAD(&tsk->pi_state_list); tsk->pi_state_cache = NULL; diff --git a/include/linux/sched.h b/include/linux/sched.h index de2f3cbe4953..e0f28e7f0a2d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -75,7 +75,6 @@ struct pid_namespace; struct pipe_inode_info; struct rcu_node; struct reclaim_state; -struct robust_list_head; struct root_domain; struct rq; struct sched_attr; @@ -1330,11 +1329,7 @@ struct task_struct { u32 rmid; #endif #ifdef CONFIG_FUTEX - struct robust_list_head __user *robust_list; uintptr_t *futex_robust_lists; -#ifdef CONFIG_COMPAT - struct robust_list_head32 __user *robust_list32; -#endif struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; struct mutex futex_exit_mutex; diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 14d8a7176367..f91df175033d 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -1500,18 +1500,6 @@ static void exit_robust_lists(struct task_struct *tsk)
static void futex_cleanup(struct task_struct *tsk) { - if (unlikely(tsk->robust_list)) { - exit_robust_list(tsk, tsk->robust_list); - tsk->robust_list = NULL; - } - -#ifdef CONFIG_64BIT - if (unlikely(tsk->robust_list32)) { - exit_robust_list32(tsk, tsk->robust_list32); - tsk->robust_list32 = NULL; - } -#endif - if (unlikely(tsk->futex_robust_lists)) exit_robust_lists(tsk);
diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index f730d16632fc..2a44791db37a 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -28,32 +28,29 @@ SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, size_t, len) { + enum robust_list2_cmd cmd; /* * The kernel knows only one size for now: */ if (unlikely(len != sizeof(*head))) return -EINVAL;
- current->robust_list = head; + cmd = IS_ENABLED(CONFIG_64BIT) ? FUTEX_ROBUST_LIST_CMD_SET_64 : + FUTEX_ROBUST_LIST_CMD_SET_32;
- return 0; + return futex_robust_list_set((uintptr_t) head, cmd, + FUTEX_ROBUST_LIST_NATIVE_IDX); }
-static inline void __user *futex_task_robust_list(struct task_struct *p, bool compat) -{ -#ifdef CONFIG_COMPAT - if (compat) - return p->robust_list32; -#endif - return p->robust_list; -} - -static void __user *futex_get_robust_list_common(int pid, bool compat, int index) +static void __user *futex_get_robust_list_common(int pid, unsigned int index) { struct task_struct *p = current; void __user *head; int ret;
+ if (index >= FUTEX_ROBUST_LIST2_MAX_IDX) + return (void __user *)ERR_PTR(-EINVAL); + scoped_guard(rcu) { if (pid) { p = find_task_by_vpid(pid); @@ -75,14 +72,10 @@ static void __user *futex_get_robust_list_common(int pid, bool compat, int index if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) goto err_unlock;
- if (index >= 0) { - scoped_guard(mutex, &p->futex_exit_mutex) { - uintptr_t *rl = p->futex_robust_lists; + scoped_guard(mutex, &p->futex_exit_mutex) { + uintptr_t *rl = p->futex_robust_lists;
- head = rl ? (void __user *) rl[index] : NULL; - } - } else { - head = futex_task_robust_list(p, compat); + head = rl ? (void __user *) rl[index] : NULL; }
up_read(&p->signal->exec_update_lock); @@ -107,7 +100,11 @@ SYSCALL_DEFINE3(get_robust_list, int, pid, struct robust_list_head __user * __user *, head_ptr, size_t __user *, len_ptr) { - struct robust_list_head __user *head = futex_get_robust_list_common(pid, false, -1); + struct robust_list_head __user *head = + futex_get_robust_list_common(pid, FUTEX_ROBUST_LIST_NATIVE_IDX); + + head = (struct robust_list_head __user *) + ((uintptr_t) head & FUTEX_ROBUST_LIST_ENTRY_MASK);
if (IS_ERR(head)) return PTR_ERR(head); @@ -180,7 +177,7 @@ SYSCALL_DEFINE4(get_robust_list2, int, pid, */ index += FUTEX_ROBUST_LIST2_IDX;
- entry_ptr = futex_get_robust_list_common(pid, false, index); + entry_ptr = futex_get_robust_list_common(pid, index); if (IS_ERR(entry_ptr)) return PTR_ERR(entry_ptr);
@@ -568,22 +565,23 @@ COMPAT_SYSCALL_DEFINE2(set_robust_list, if (unlikely(len != sizeof(*head))) return -EINVAL;
- current->robust_list32 = head; - - return 0; + return futex_robust_list_set((uintptr_t) head, FUTEX_ROBUST_LIST_CMD_SET_32, + FUTEX_ROBUST_LIST_COMPAT_IDX); }
COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, compat_uptr_t __user *, head_ptr, compat_size_t __user *, len_ptr) { - struct robust_list_head32 __user *head = futex_get_robust_list_common(pid, true, -1); + struct robust_list_head32 __user *head = + futex_get_robust_list_common(pid, FUTEX_ROBUST_LIST_COMPAT_IDX);
- if (IS_ERR(head)) - return PTR_ERR(head); + head = (struct robust_list_head32 __user *) + ((uintptr_t) head & FUTEX_ROBUST_LIST_ENTRY_MASK);
if (put_user(sizeof(*head), len_ptr)) return -EFAULT; + return put_user(ptr_to_compat(head), head_ptr); } #endif /* CONFIG_COMPAT */
Em 22/11/2025 02:50, André Almeida escreveu:
Hello,
This version is a complete rewrite of the syscall (thanks Thomas for the suggestions!).
- Use case
The use-case for the new syscalls is detailed in the last patch version:
https://lore.kernel.org/lkml/20250626-tonyk-robust_futex-v5-0-179194dbde8f@i...
- The syscall interface
Documented at patches 3/9 "futex: Create set_robust_list2() syscall" and 4/9 "futex: Create get_robust_list2() syscall".
- Testing
I expanded the current robust list selftest to use the new interface, and also ported the original syscall to use the new syscall internals, and everything survived the tests.
- Changelog
Changes from v5:
- Complete interface rewrite, there are so many changes but the main ones are the following points
- Array of robust lists now has a static size, allocated once during the first usage of the list
- Now that the list of robust lists have a fixed size, I removed the logic of having a command for creating a new index on the list. To simplify things for everyone, userspace just need to call set_robust_list2(head, 32-bit/64-bit type, index).
Actually, this won't work well. The application that calls set_robust_list2() may be using glibc, that will also want to call set_robust_list2(), and there's no way to know which lists are being by each library. I will re-add the create/modify logic for the next version.
Hi André,
kernel test robot noticed the following build warnings:
[auto build test WARNING on c42ba5a87bdccbca11403b7ca8bad1a57b833732]
url: https://github.com/intel-lab-lkp/linux/commits/Andr-Almeida/futex-Use-explic... base: c42ba5a87bdccbca11403b7ca8bad1a57b833732 patch link: https://lore.kernel.org/r/20251122-tonyk-robust_futex-v6-4-05fea005a0fd%40ig... patch subject: [PATCH v6 4/9] futex: Create get_robust_list2() syscall config: i386-randconfig-063-20251123 (https://download.01.org/0day-ci/archive/20251123/202511231423.O6KdcL8r-lkp@i...) compiler: gcc-14 (Debian 14.2.0-19) 14.2.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251123/202511231423.O6KdcL8r-lkp@i...)
If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot lkp@intel.com | Closes: https://lore.kernel.org/oe-kbuild-all/202511231423.O6KdcL8r-lkp@intel.com/
sparse warnings: (new ones prefixed by >>)
kernel/futex/syscalls.c:200:22: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected struct robust_list_head *head @@ got struct robust_list_head [noderef] __user * @@
kernel/futex/syscalls.c:200:22: sparse: expected struct robust_list_head *head kernel/futex/syscalls.c:200:22: sparse: got struct robust_list_head [noderef] __user *
kernel/futex/syscalls.c:202:24: sparse: sparse: incorrect type in initializer (different address spaces) @@ expected void [noderef] __user *__x @@ got struct robust_list_head *head @@
kernel/futex/syscalls.c:202:24: sparse: expected void [noderef] __user *__x kernel/futex/syscalls.c:202:24: sparse: got struct robust_list_head *head
vim +200 kernel/futex/syscalls.c
160 161 SYSCALL_DEFINE4(get_robust_list2, int, pid, 162 void __user * __user *, head_ptr, 163 unsigned int, index, unsigned int, flags) 164 { 165 void __user *entry_ptr; 166 uintptr_t entry; 167 168 if (index >= FUTEX_ROBUST_LISTS_PER_USER) 169 return -EINVAL; 170 171 if (flags) 172 return -EINVAL; 173 174 /* 175 * The first two indexes are reserved for the kernel to be used with the 176 * legacy syscall, so we hide them from userspace. 177 * 178 * We map [0, FUTEX_ROBUST_LISTS_PER_USER) to 179 * [FUTEX_ROBUST_LIST2_IDX, FUTEX_ROBUST_LIST2_MAX_IDX) 180 */ 181 index += FUTEX_ROBUST_LIST2_IDX; 182 183 entry_ptr = futex_get_robust_list_common(pid, false, index); 184 if (IS_ERR(entry_ptr)) 185 return PTR_ERR(entry_ptr); 186 187 entry = (uintptr_t) entry_ptr; 188 189 if (entry & FUTEX_ROBUST_LIST_ENTRY_32BIT) { 190 entry &= FUTEX_ROBUST_LIST_ENTRY_MASK; 191 192 if (copy_to_user(head_ptr, &entry, sizeof(u32))) 193 return -EFAULT; 194 195 return 0; 196 } else { 197 struct robust_list_head *head; 198 199 entry &= FUTEX_ROBUST_LIST_ENTRY_MASK;
200 head = (__force struct robust_list_head __user *)entry;
201
202 return put_user(head, head_ptr);
203 } 204 } 205
Hi André,
kernel test robot noticed the following build warnings:
[auto build test WARNING on c42ba5a87bdccbca11403b7ca8bad1a57b833732]
url: https://github.com/intel-lab-lkp/linux/commits/Andr-Almeida/futex-Use-explic... base: c42ba5a87bdccbca11403b7ca8bad1a57b833732 patch link: https://lore.kernel.org/r/20251122-tonyk-robust_futex-v6-4-05fea005a0fd%40ig... patch subject: [PATCH v6 4/9] futex: Create get_robust_list2() syscall config: loongarch-randconfig-r122-20251123 (https://download.01.org/0day-ci/archive/20251123/202511231703.YmF7ihi0-lkp@i...) compiler: loongarch64-linux-gcc (GCC) 15.1.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251123/202511231703.YmF7ihi0-lkp@i...)
If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot lkp@intel.com | Closes: https://lore.kernel.org/oe-kbuild-all/202511231703.YmF7ihi0-lkp@intel.com/
sparse warnings: (new ones prefixed by >>) kernel/futex/syscalls.c:200:22: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected struct robust_list_head *head @@ got struct robust_list_head [noderef] __user * @@ kernel/futex/syscalls.c:200:22: sparse: expected struct robust_list_head *head kernel/futex/syscalls.c:200:22: sparse: got struct robust_list_head [noderef] __user *
kernel/futex/syscalls.c:202:24: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected void [noderef] __user *__pu_val @@ got struct robust_list_head *head @@
kernel/futex/syscalls.c:202:24: sparse: expected void [noderef] __user *__pu_val kernel/futex/syscalls.c:202:24: sparse: got struct robust_list_head *head
vim +202 kernel/futex/syscalls.c
160 161 SYSCALL_DEFINE4(get_robust_list2, int, pid, 162 void __user * __user *, head_ptr, 163 unsigned int, index, unsigned int, flags) 164 { 165 void __user *entry_ptr; 166 uintptr_t entry; 167 168 if (index >= FUTEX_ROBUST_LISTS_PER_USER) 169 return -EINVAL; 170 171 if (flags) 172 return -EINVAL; 173 174 /* 175 * The first two indexes are reserved for the kernel to be used with the 176 * legacy syscall, so we hide them from userspace. 177 * 178 * We map [0, FUTEX_ROBUST_LISTS_PER_USER) to 179 * [FUTEX_ROBUST_LIST2_IDX, FUTEX_ROBUST_LIST2_MAX_IDX) 180 */ 181 index += FUTEX_ROBUST_LIST2_IDX; 182 183 entry_ptr = futex_get_robust_list_common(pid, false, index); 184 if (IS_ERR(entry_ptr)) 185 return PTR_ERR(entry_ptr); 186 187 entry = (uintptr_t) entry_ptr; 188 189 if (entry & FUTEX_ROBUST_LIST_ENTRY_32BIT) { 190 entry &= FUTEX_ROBUST_LIST_ENTRY_MASK; 191 192 if (copy_to_user(head_ptr, &entry, sizeof(u32))) 193 return -EFAULT; 194 195 return 0; 196 } else { 197 struct robust_list_head *head; 198 199 entry &= FUTEX_ROBUST_LIST_ENTRY_MASK;
200 head = (__force struct robust_list_head __user *)entry;
201
202 return put_user(head, head_ptr);
203 } 204 } 205
linux-kselftest-mirror@lists.linaro.org