 
            From: Linus Torvalds torvalds@linux-foundation.org
commit 86e6b1547b3d013bc392adf775b89318441403c2 upstream.
It turns out that AMD has a "Meltdown Lite(tm)" issue with non-canonical accesses in kernel space. And so using just the high bit to decide whether an access is in user space or kernel space ends up with the good old "leak speculative data" if you have the right gadget using the result:
CVE-2020-12965 “Transient Execution of Non-Canonical Accesses“
Now, the kernel surrounds the access with a STAC/CLAC pair, and those instructions end up serializing execution on older Zen architectures, which closes the speculation window.
But that was true only up until Zen 5, which renames the AC bit [1]. That improves performance of STAC/CLAC a lot, but also means that the speculation window is now open.
Note that this affects not just the new address masking, but also the regular valid_user_address() check used by access_ok(), and the asm version of the sign bit check in the get_user() helpers.
It does not affect put_user() or clear_user() variants, since there's no speculative result to be used in a gadget for those operations.
Link: https://lore.kernel.org/all/80d94591-1297-4afb-b510-c665efd37f10@citrix.com/ Link: https://lore.kernel.org/all/20241023094448.GAZxjFkEOOF_DM83TQ@fat_crate.loca... [1] Link: https://www.amd.com/en/resources/product-security/bulletin/amd-sb-1010.html Link: https://arxiv.org/pdf/2108.10771 Cc: stable@vger.kernel.org # 6.10.x: e60cc61: vfs: dcache: move hashlen_hash() from callers into d_hash() Cc: stable@vger.kernel.org # 6.10.x: e782985: runtime constants: add default dummy infrastructure Cc: stable@vger.kernel.org # 6.10.x: e3c92e8: runtime constants: add x86 architecture support Fixes: 2865baf54077 ("x86: support user address masking instead of non-speculative conditional") Fixes: 6014bc27561f ("x86-64: make access_ok() independent of LAM") Fixes: b19b74bc99b1 ("x86/mm: Rework address range check in get_user() and put_user()") Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Jimmy Tran jtoantran@google.com --- arch/x86/include/asm/uaccess_64.h | 45 ++++++++++++++++++++----------- arch/x86/kernel/cpu/common.c | 10 +++++++ arch/x86/kernel/vmlinux.lds.S | 1 + arch/x86/lib/getuser.S | 9 +++++-- 4 files changed, 47 insertions(+), 18 deletions(-)
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index f2c02e4469ccc..e68eded5ee490 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -11,6 +11,13 @@ #include <asm/alternative.h> #include <asm/cpufeatures.h> #include <asm/page.h> +#include <asm/runtime-const.h> + +/* + * Virtual variable: there's no actual backing store for this, + * it can purely be used as 'runtime_const_ptr(USER_PTR_MAX)' + */ +extern unsigned long USER_PTR_MAX;
#ifdef CONFIG_ADDRESS_MASKING /* @@ -49,35 +56,41 @@ static inline unsigned long __untagged_addr_remote(struct mm_struct *mm,
#endif
+#define valid_user_address(x) \ + ((__force unsigned long)(x) <= runtime_const_ptr(USER_PTR_MAX)) + /* - * The virtual address space space is logically divided into a kernel - * half and a user half. When cast to a signed type, user pointers - * are positive and kernel pointers are negative. + * Masking the user address is an alternative to a conditional + * user_access_begin that can avoid the fencing. This only works + * for dense accesses starting at the address. */ -#define valid_user_address(x) ((long)(x) >= 0) +static inline void __user *mask_user_address(const void __user *ptr) +{ + unsigned long mask; + + asm("cmp %1,%0\n\t" + "sbb %0,%0" + : "=r" (mask) + : "r" (ptr), + "0" (runtime_const_ptr(USER_PTR_MAX))); + return (__force void __user *)(mask | (__force unsigned long)ptr); +}
/* * User pointers can have tag bits on x86-64. This scheme tolerates * arbitrary values in those bits rather then masking them off. * * Enforce two rules: - * 1. 'ptr' must be in the user half of the address space + * 1. 'ptr' must be in the user part of the address space * 2. 'ptr+size' must not overflow into kernel addresses * - * Note that addresses around the sign change are not valid addresses, - * and will GP-fault even with LAM enabled if the sign bit is set (see - * "CR3.LAM_SUP" that can narrow the canonicality check if we ever - * enable it, but not remove it entirely). - * - * So the "overflow into kernel addresses" does not imply some sudden - * exact boundary at the sign bit, and we can allow a lot of slop on the - * size check. + * Note that we always have at least one guard page between the + * max user address and the non-canonical gap, allowing us to + * ignore small sizes entirely. * * In fact, we could probably remove the size check entirely, since * any kernel accesses will be in increasing address order starting - * at 'ptr', and even if the end might be in kernel space, we'll - * hit the GP faults for non-canonical accesses before we ever get - * there. + * at 'ptr'. * * That's a separate optimization, for now just handle the small * constant case. diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f66c71bffa6d9..2369e85055c0e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -65,6 +65,7 @@ #include <asm/set_memory.h> #include <asm/traps.h> #include <asm/sev.h> +#include <asm/runtime-const.h>
#include "cpu.h"
@@ -2490,6 +2491,15 @@ void __init arch_cpu_finalize_init(void) alternative_instructions();
if (IS_ENABLED(CONFIG_X86_64)) { + unsigned long USER_PTR_MAX = TASK_SIZE_MAX-1; + + /* + * Enable this when LAM is gated on LASS support + if (cpu_feature_enabled(X86_FEATURE_LAM)) + USER_PTR_MAX = (1ul << 63) - PAGE_SIZE - 1; + */ + runtime_const_init(ptr, USER_PTR_MAX); + /* * Make sure the first 2MB area is not mapped by huge pages * There are typically fixed size MTRRs in there and overlapping diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index cb5b41480a848..a698819fd5d5f 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -373,6 +373,7 @@ SECTIONS
RUNTIME_CONST(shift, d_hash_shift) RUNTIME_CONST(ptr, dentry_hashtable) + RUNTIME_CONST(ptr, USER_PTR_MAX)
. = ALIGN(PAGE_SIZE);
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index 6913fbce6544f..ffa3fff259578 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -39,8 +39,13 @@
.macro check_range size:req .if IS_ENABLED(CONFIG_X86_64) - mov %rax, %rdx - sar $63, %rdx + movq $0x0123456789abcdef,%rdx + 1: + .pushsection runtime_ptr_USER_PTR_MAX,"a" + .long 1b - 8 - . + .popsection + cmp %rax, %rdx + sbb %rdx, %rdx or %rdx, %rax .else cmp $TASK_SIZE_MAX-\size+1, %eax