This patch series backports a critical security fix, identified as CVE-2020-12965 ("Transient Execution of Non-Canonical Accesses"), to the 6.6.y stable kernel tree.
commit 573f45a9f9a47fed4c7957609689b772121b33d7 upstream.
David Laight (1): x86: fix off-by-one in access_ok()
Linus Torvalds (5): vfs: dcache: move hashlen_hash() from callers into d_hash() runtime constants: add default dummy infrastructure runtime constants: add x86 architecture support arm64: add 'runtime constant' support x86: fix user address masking non-canonical speculation issue
arch/arm64/include/asm/runtime-const.h | 92 ++++++++++++++++++++++++++ arch/arm64/kernel/vmlinux.lds.S | 3 + arch/x86/include/asm/runtime-const.h | 61 +++++++++++++++++ arch/x86/include/asm/uaccess_64.h | 45 ++++++++----- arch/x86/kernel/cpu/common.c | 10 +++ arch/x86/kernel/vmlinux.lds.S | 4 ++ arch/x86/lib/getuser.S | 9 ++- fs/dcache.c | 17 +++-- include/asm-generic/Kbuild | 1 + include/asm-generic/runtime-const.h | 15 +++++ include/asm-generic/vmlinux.lds.h | 8 +++ 11 files changed, 243 insertions(+), 22 deletions(-) create mode 100644 arch/arm64/include/asm/runtime-const.h create mode 100644 arch/x86/include/asm/runtime-const.h create mode 100644 include/asm-generic/runtime-const.h
-- 2.50.0.727.gbf7dc18ff4-goog
From: Linus Torvalds torvalds@linux-foundation.org
commit e60cc61153e61e4e38bd983492df9959e82ae4dc upstream.
Both __d_lookup_rcu() and __d_lookup_rcu_op_compare() have the full 'name_hash' value of the qstr that they want to look up, and mask it off to just the low 32-bit hash before calling down to d_hash().
Other callers just load the 32-bit hash and pass it as the argument.
If we move the masking into d_hash() itself, it simplifies the two callers that currently do the masking, and is a no-op for the other cases. It doesn't actually change the generated code since the compiler will inline d_hash() and see that the end result is the same.
[ Technically, since the parse tree changes, the code generation may not be 100% the same, and for me on x86-64, this does result in gcc switching the operands around for one 'cmpl' instruction. So not necessarily the exact same code generation, but equivalent ]
However, this does encapsulate the 'd_hash()' operation more, and makes the shift operation in particular be a "shift 32 bits right, return full word". Which matches the instruction semantics on both x86-64 and arm64 better, since a 32-bit shift will clear the upper bits.
That makes the next step of introducing a "shift by runtime constant" more obvious and generates the shift with no extraneous type masking.
Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Jimmy Tran jtoantran@google.com --- fs/dcache.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/fs/dcache.c b/fs/dcache.c index 4030c010a7682..82adee104f82c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -100,9 +100,9 @@ static unsigned int d_hash_shift __read_mostly;
static struct hlist_bl_head *dentry_hashtable __read_mostly;
-static inline struct hlist_bl_head *d_hash(unsigned int hash) +static inline struct hlist_bl_head *d_hash(unsigned long hashlen) { - return dentry_hashtable + (hash >> d_hash_shift); + return dentry_hashtable + ((u32)hashlen >> d_hash_shift); }
#define IN_LOOKUP_SHIFT 10 @@ -2286,7 +2286,7 @@ static noinline struct dentry *__d_lookup_rcu_op_compare( unsigned *seqp) { u64 hashlen = name->hash_len; - struct hlist_bl_head *b = d_hash(hashlen_hash(hashlen)); + struct hlist_bl_head *b = d_hash(hashlen); struct hlist_bl_node *node; struct dentry *dentry;
@@ -2353,7 +2353,7 @@ struct dentry *__d_lookup_rcu(const struct dentry *parent, { u64 hashlen = name->hash_len; const unsigned char *str = name->name; - struct hlist_bl_head *b = d_hash(hashlen_hash(hashlen)); + struct hlist_bl_head *b = d_hash(hashlen); struct hlist_bl_node *node; struct dentry *dentry;
From: Linus Torvalds torvalds@linux-foundation.org
commit e78298556ee5d881f6679effb2a6743969ea6e2d upstream.
This adds the initial dummy support for 'runtime constants' for when an architecture doesn't actually support an implementation of fixing up said runtime constants.
This ends up being the fallback to just using the variables as regular __ro_after_init variables, and changes the dcache d_hash() function to use this model.
Cc: stable@vger.kernel.org # 6.10.x: e60cc61: vfs: dcache: move hashlen_hash Fixes: e78298556ee5 ("runtime constants: add default dummy infrastructure") Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Jimmy Tran jtoantran@google.com --- fs/dcache.c | 11 ++++++++++- include/asm-generic/Kbuild | 1 + include/asm-generic/runtime-const.h | 15 +++++++++++++++ include/asm-generic/vmlinux.lds.h | 8 ++++++++ 4 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 include/asm-generic/runtime-const.h
diff --git a/fs/dcache.c b/fs/dcache.c index 82adee104f82c..9e5c92b4b4aaa 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -35,6 +35,8 @@ #include "internal.h" #include "mount.h"
+#include <asm/runtime-const.h> + /* * Usage: * dcache->d_inode->i_lock protects: @@ -102,7 +104,8 @@ static struct hlist_bl_head *dentry_hashtable __read_mostly;
static inline struct hlist_bl_head *d_hash(unsigned long hashlen) { - return dentry_hashtable + ((u32)hashlen >> d_hash_shift); + return runtime_const_ptr(dentry_hashtable) + + runtime_const_shift_right_32(hashlen, d_hash_shift); }
#define IN_LOOKUP_SHIFT 10 @@ -3297,6 +3300,9 @@ static void __init dcache_init_early(void) 0, 0); d_hash_shift = 32 - d_hash_shift; + + runtime_const_init(shift, d_hash_shift); + runtime_const_init(ptr, dentry_hashtable); }
static void __init dcache_init(void) @@ -3325,6 +3331,9 @@ static void __init dcache_init(void) 0, 0); d_hash_shift = 32 - d_hash_shift; + + runtime_const_init(shift, d_hash_shift); + runtime_const_init(ptr, dentry_hashtable); }
/* SLAB cache for __getname() consumers */ diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index 941be574bbe00..22673ec5defbb 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -46,6 +46,7 @@ mandatory-y += pci.h mandatory-y += percpu.h mandatory-y += pgalloc.h mandatory-y += preempt.h +mandatory-y += runtime-const.h mandatory-y += rwonce.h mandatory-y += sections.h mandatory-y += serial.h diff --git a/include/asm-generic/runtime-const.h b/include/asm-generic/runtime-const.h new file mode 100644 index 0000000000000..3e68a17fbf287 --- /dev/null +++ b/include/asm-generic/runtime-const.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RUNTIME_CONST_H +#define _ASM_RUNTIME_CONST_H + +/* + * This is the fallback for when the architecture doesn't + * support the runtime const operations. + * + * We just use the actual symbols as-is. + */ +#define runtime_const_ptr(sym) (sym) +#define runtime_const_shift_right_32(val, sym) ((u32)(val)>>(sym)) +#define runtime_const_init(type, sym) do { } while (0) + +#endif diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index cf3f8b9bf43f0..66bfd3dc91a33 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -907,6 +907,14 @@ #define CON_INITCALL \ BOUNDED_SECTION_POST_LABEL(.con_initcall.init, __con_initcall, _start, _end)
+#define RUNTIME_NAME(t, x) runtime_##t##_##x + +#define RUNTIME_CONST(t, x) \ + . = ALIGN(8); \ + RUNTIME_NAME(t, x) : AT(ADDR(RUNTIME_NAME(t, x)) - LOAD_OFFSET) { \ + *(RUNTIME_NAME(t, x)); \ + } + /* Alignment must be consistent with (kunit_suite *) in include/kunit/test.h */ #define KUNIT_TABLE() \ . = ALIGN(8); \
From: Linus Torvalds torvalds@linux-foundation.org
commit e3c92e81711d14b46c3121d36bc8e152cb843923 upstream.
This implements the runtime constant infrastructure for x86, allowing the dcache d_hash() function to be generated using as a constant for hash table address followed by shift by a constant of the hash index.
Cc: stable@vger.kernel.org # 6.10.x: e60cc61: vfs: dcache: move hashlen_hash Cc: stable@vger.kernel.org # 6.10.x: e782985: runtime constants: add default Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Jimmy Tran jtoantran@google.com --- arch/x86/include/asm/runtime-const.h | 61 ++++++++++++++++++++++++++++ arch/x86/kernel/vmlinux.lds.S | 3 ++ 2 files changed, 64 insertions(+) create mode 100644 arch/x86/include/asm/runtime-const.h
diff --git a/arch/x86/include/asm/runtime-const.h b/arch/x86/include/asm/runtime-const.h new file mode 100644 index 0000000000000..76fdeaa0faa3f --- /dev/null +++ b/arch/x86/include/asm/runtime-const.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RUNTIME_CONST_H +#define _ASM_RUNTIME_CONST_H + +#define runtime_const_ptr(sym) ({ \ + typeof(sym) __ret; \ + asm_inline("mov %1,%0\n1:\n" \ + ".pushsection runtime_ptr_" #sym ","a"\n\t" \ + ".long 1b - %c2 - .\n\t" \ + ".popsection" \ + : "=r" (__ret) \ + : "i" (0x0123456789abcdefULL), \ + "i" (sizeof(long))); \ + __ret; }) + +// The 'typeof' will create at _least_ a 32-bit type, but +// will happily also take a bigger type and the 'shrl' will +// clear the upper bits +#define runtime_const_shift_right_32(val, sym) ({ \ + typeof(0u+(val)) __ret = (val); \ + asm_inline("shrl $12,%k0\n1:\n" \ + ".pushsection runtime_shift_" #sym ","a"\n\t" \ + ".long 1b - 1 - .\n\t" \ + ".popsection" \ + : "+r" (__ret)); \ + __ret; }) + +#define runtime_const_init(type, sym) do { \ + extern s32 __start_runtime_##type##_##sym[]; \ + extern s32 __stop_runtime_##type##_##sym[]; \ + runtime_const_fixup(__runtime_fixup_##type, \ + (unsigned long)(sym), \ + __start_runtime_##type##_##sym, \ + __stop_runtime_##type##_##sym); \ +} while (0) + +/* + * The text patching is trivial - you can only do this at init time, + * when the text section hasn't been marked RO, and before the text + * has ever been executed. + */ +static inline void __runtime_fixup_ptr(void *where, unsigned long val) +{ + *(unsigned long *)where = val; +} + +static inline void __runtime_fixup_shift(void *where, unsigned long val) +{ + *(unsigned char *)where = val; +} + +static inline void runtime_const_fixup(void (*fn)(void *, unsigned long), + unsigned long val, s32 *start, s32 *end) +{ + while (start < end) { + fn(*start + (void *)start, val); + start++; + } +} + +#endif diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index c57d5df1abc60..cb5b41480a848 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -371,6 +371,9 @@ SECTIONS PERCPU_SECTION(INTERNODE_CACHE_BYTES) #endif
+ RUNTIME_CONST(shift, d_hash_shift) + RUNTIME_CONST(ptr, dentry_hashtable) + . = ALIGN(PAGE_SIZE);
/* freed after init ends here */
From: Linus Torvalds torvalds@linux-foundation.org
commit 94a2bc0f611cd9fa4d26e4679bf7ea4b01b12d56 upstream.
This implements the runtime constant infrastructure for arm64, allowing the dcache d_hash() function to be generated using as a constant for hash table address followed by shift by a constant of the hash index.
[ Fixed up to deal with the big-endian case as per Mark Rutland ]
Cc: stable@vger.kernel.org # 6.10.x: e60cc61: vfs: dcache: move hashlen_hash() from callers into d_hash() Cc: stable@vger.kernel.org # 6.10.x: e782985: runtime constants: add default dummy infrastructure Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Jimmy Tran jtoantran@google.com --- arch/arm64/include/asm/runtime-const.h | 92 ++++++++++++++++++++++++++ arch/arm64/kernel/vmlinux.lds.S | 3 + 2 files changed, 95 insertions(+) create mode 100644 arch/arm64/include/asm/runtime-const.h
diff --git a/arch/arm64/include/asm/runtime-const.h b/arch/arm64/include/asm/runtime-const.h new file mode 100644 index 0000000000000..81faccb54e95d --- /dev/null +++ b/arch/arm64/include/asm/runtime-const.h @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RUNTIME_CONST_H +#define _ASM_RUNTIME_CONST_H + +#include <asm/cacheflush.h> + +/* Sigh. You can still run arm64 in BE mode */ +#include <asm/byteorder.h> + +#define runtime_const_ptr(sym) ({ \ + typeof(sym) __ret; \ + asm_inline("1:\t" \ + "movz %0, #0xcdef\n\t" \ + "movk %0, #0x89ab, lsl #16\n\t" \ + "movk %0, #0x4567, lsl #32\n\t" \ + "movk %0, #0x0123, lsl #48\n\t" \ + ".pushsection runtime_ptr_" #sym ","a"\n\t" \ + ".long 1b - .\n\t" \ + ".popsection" \ + : "=r" (__ret)); \ + __ret; }) + +#define runtime_const_shift_right_32(val, sym) ({ \ + unsigned long __ret; \ + asm_inline("1:\t" \ + "lsr %w0,%w1,#12\n\t" \ + ".pushsection runtime_shift_" #sym ","a"\n\t" \ + ".long 1b - .\n\t" \ + ".popsection" \ + : "=r" (__ret) \ + : "r" (0u+(val))); \ + __ret; }) + +#define runtime_const_init(type, sym) do { \ + extern s32 __start_runtime_##type##_##sym[]; \ + extern s32 __stop_runtime_##type##_##sym[]; \ + runtime_const_fixup(__runtime_fixup_##type, \ + (unsigned long)(sym), \ + __start_runtime_##type##_##sym, \ + __stop_runtime_##type##_##sym); \ +} while (0) + +/* 16-bit immediate for wide move (movz and movk) in bits 5..20 */ +static inline void __runtime_fixup_16(__le32 *p, unsigned int val) +{ + u32 insn = le32_to_cpu(*p); + + insn &= 0xffe0001f; + insn |= (val & 0xffff) << 5; + *p = cpu_to_le32(insn); +} + +static inline void __runtime_fixup_caches(void *where, unsigned int insns) +{ + unsigned long va = (unsigned long)where; + + caches_clean_inval_pou(va, va + 4*insns); +} + +static inline void __runtime_fixup_ptr(void *where, unsigned long val) +{ + __le32 *p = lm_alias(where); + + __runtime_fixup_16(p, val); + __runtime_fixup_16(p+1, val >> 16); + __runtime_fixup_16(p+2, val >> 32); + __runtime_fixup_16(p+3, val >> 48); + __runtime_fixup_caches(where, 4); +} + +/* Immediate value is 6 bits starting at bit #16 */ +static inline void __runtime_fixup_shift(void *where, unsigned long val) +{ + __le32 *p = lm_alias(where); + u32 insn = le32_to_cpu(*p); + + insn &= 0xffc0ffff; + insn |= (val & 63) << 16; + *p = cpu_to_le32(insn); + __runtime_fixup_caches(where, 1); +} + +static inline void runtime_const_fixup(void (*fn)(void *, unsigned long), + unsigned long val, s32 *start, s32 *end) +{ + while (start < end) { + fn(*start + (void *)start, val); + start++; + } +} + +#endif diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index d4353741f331e..f1719116592da 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -265,6 +265,9 @@ SECTIONS EXIT_DATA }
+ RUNTIME_CONST(shift, d_hash_shift) + RUNTIME_CONST(ptr, dentry_hashtable) + PERCPU_SECTION(L1_CACHE_BYTES) HYPERVISOR_PERCPU_SECTION
From: Linus Torvalds torvalds@linux-foundation.org
commit 86e6b1547b3d013bc392adf775b89318441403c2 upstream.
It turns out that AMD has a "Meltdown Lite(tm)" issue with non-canonical accesses in kernel space. And so using just the high bit to decide whether an access is in user space or kernel space ends up with the good old "leak speculative data" if you have the right gadget using the result:
CVE-2020-12965 “Transient Execution of Non-Canonical Accesses“
Now, the kernel surrounds the access with a STAC/CLAC pair, and those instructions end up serializing execution on older Zen architectures, which closes the speculation window.
But that was true only up until Zen 5, which renames the AC bit [1]. That improves performance of STAC/CLAC a lot, but also means that the speculation window is now open.
Note that this affects not just the new address masking, but also the regular valid_user_address() check used by access_ok(), and the asm version of the sign bit check in the get_user() helpers.
It does not affect put_user() or clear_user() variants, since there's no speculative result to be used in a gadget for those operations.
Link: https://lore.kernel.org/all/80d94591-1297-4afb-b510-c665efd37f10@citrix.com/ Link: https://lore.kernel.org/all/20241023094448.GAZxjFkEOOF_DM83TQ@fat_crate.loca... [1] Link: https://www.amd.com/en/resources/product-security/bulletin/amd-sb-1010.html Link: https://arxiv.org/pdf/2108.10771 Cc: stable@vger.kernel.org # 6.10.x: e60cc61: vfs: dcache: move hashlen_hash() from callers into d_hash() Cc: stable@vger.kernel.org # 6.10.x: e782985: runtime constants: add default dummy infrastructure Cc: stable@vger.kernel.org # 6.10.x: e3c92e8: runtime constants: add x86 architecture support Fixes: 2865baf54077 ("x86: support user address masking instead of non-speculative conditional") Fixes: 6014bc27561f ("x86-64: make access_ok() independent of LAM") Fixes: b19b74bc99b1 ("x86/mm: Rework address range check in get_user() and put_user()") Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Jimmy Tran jtoantran@google.com --- arch/x86/include/asm/uaccess_64.h | 45 ++++++++++++++++++++----------- arch/x86/kernel/cpu/common.c | 10 +++++++ arch/x86/kernel/vmlinux.lds.S | 1 + arch/x86/lib/getuser.S | 9 +++++-- 4 files changed, 47 insertions(+), 18 deletions(-)
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index f2c02e4469ccc..e68eded5ee490 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -11,6 +11,13 @@ #include <asm/alternative.h> #include <asm/cpufeatures.h> #include <asm/page.h> +#include <asm/runtime-const.h> + +/* + * Virtual variable: there's no actual backing store for this, + * it can purely be used as 'runtime_const_ptr(USER_PTR_MAX)' + */ +extern unsigned long USER_PTR_MAX;
#ifdef CONFIG_ADDRESS_MASKING /* @@ -49,35 +56,41 @@ static inline unsigned long __untagged_addr_remote(struct mm_struct *mm,
#endif
+#define valid_user_address(x) \ + ((__force unsigned long)(x) <= runtime_const_ptr(USER_PTR_MAX)) + /* - * The virtual address space space is logically divided into a kernel - * half and a user half. When cast to a signed type, user pointers - * are positive and kernel pointers are negative. + * Masking the user address is an alternative to a conditional + * user_access_begin that can avoid the fencing. This only works + * for dense accesses starting at the address. */ -#define valid_user_address(x) ((long)(x) >= 0) +static inline void __user *mask_user_address(const void __user *ptr) +{ + unsigned long mask; + + asm("cmp %1,%0\n\t" + "sbb %0,%0" + : "=r" (mask) + : "r" (ptr), + "0" (runtime_const_ptr(USER_PTR_MAX))); + return (__force void __user *)(mask | (__force unsigned long)ptr); +}
/* * User pointers can have tag bits on x86-64. This scheme tolerates * arbitrary values in those bits rather then masking them off. * * Enforce two rules: - * 1. 'ptr' must be in the user half of the address space + * 1. 'ptr' must be in the user part of the address space * 2. 'ptr+size' must not overflow into kernel addresses * - * Note that addresses around the sign change are not valid addresses, - * and will GP-fault even with LAM enabled if the sign bit is set (see - * "CR3.LAM_SUP" that can narrow the canonicality check if we ever - * enable it, but not remove it entirely). - * - * So the "overflow into kernel addresses" does not imply some sudden - * exact boundary at the sign bit, and we can allow a lot of slop on the - * size check. + * Note that we always have at least one guard page between the + * max user address and the non-canonical gap, allowing us to + * ignore small sizes entirely. * * In fact, we could probably remove the size check entirely, since * any kernel accesses will be in increasing address order starting - * at 'ptr', and even if the end might be in kernel space, we'll - * hit the GP faults for non-canonical accesses before we ever get - * there. + * at 'ptr'. * * That's a separate optimization, for now just handle the small * constant case. diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f66c71bffa6d9..2369e85055c0e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -65,6 +65,7 @@ #include <asm/set_memory.h> #include <asm/traps.h> #include <asm/sev.h> +#include <asm/runtime-const.h>
#include "cpu.h"
@@ -2490,6 +2491,15 @@ void __init arch_cpu_finalize_init(void) alternative_instructions();
if (IS_ENABLED(CONFIG_X86_64)) { + unsigned long USER_PTR_MAX = TASK_SIZE_MAX-1; + + /* + * Enable this when LAM is gated on LASS support + if (cpu_feature_enabled(X86_FEATURE_LAM)) + USER_PTR_MAX = (1ul << 63) - PAGE_SIZE - 1; + */ + runtime_const_init(ptr, USER_PTR_MAX); + /* * Make sure the first 2MB area is not mapped by huge pages * There are typically fixed size MTRRs in there and overlapping diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index cb5b41480a848..a698819fd5d5f 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -373,6 +373,7 @@ SECTIONS
RUNTIME_CONST(shift, d_hash_shift) RUNTIME_CONST(ptr, dentry_hashtable) + RUNTIME_CONST(ptr, USER_PTR_MAX)
. = ALIGN(PAGE_SIZE);
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index 6913fbce6544f..ffa3fff259578 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -39,8 +39,13 @@
.macro check_range size:req .if IS_ENABLED(CONFIG_X86_64) - mov %rax, %rdx - sar $63, %rdx + movq $0x0123456789abcdef,%rdx + 1: + .pushsection runtime_ptr_USER_PTR_MAX,"a" + .long 1b - 8 - . + .popsection + cmp %rax, %rdx + sbb %rdx, %rdx or %rdx, %rax .else cmp $TASK_SIZE_MAX-\size+1, %eax
From: David Laight David.Laight@ACULAB.COM
commit 573f45a9f9a47fed4c7957609689b772121b33d7 upstream.
When the size isn't a small constant, __access_ok() will call valid_user_address() with the address after the last byte of the user buffer.
It is valid for a buffer to end with the last valid user address so valid_user_address() must allow accesses to the base of the guard page.
[ This introduces an off-by-one in the other direction for the plain non-sized accesses, but since we have that guard region that is a whole page, those checks "allowing" accesses to that guard region don't really matter. The access will fault anyway, whether to the guard page or if the address has been masked to all ones - Linus ]
Cc: stable@vger.kernel.org # 6.12.x: 86e6b15: x86: fix user address masking non-canonical speculation issue Cc: stable@vger.kernel.org # 6.10.x: e60cc61: vfs: dcache: move hashlen_hash() from callers into d_hash() Cc: stable@vger.kernel.org # 6.10.x: e782985: runtime constants: add default dummy infrastructure Cc: stable@vger.kernel.org # 6.10.x: e3c92e8: runtime constants: add x86 architecture support Fixes: 86e6b1547b3d0 ("x86: fix user address masking non-canonical speculation issue") Signed-off-by: David Laight david.laight@aculab.com Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Jimmy Tran jtoantran@google.com --- arch/x86/kernel/cpu/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2369e85055c0e..6c69dea644ffc 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -2491,12 +2491,12 @@ void __init arch_cpu_finalize_init(void) alternative_instructions();
if (IS_ENABLED(CONFIG_X86_64)) { - unsigned long USER_PTR_MAX = TASK_SIZE_MAX-1; + unsigned long USER_PTR_MAX = TASK_SIZE_MAX;
/* * Enable this when LAM is gated on LASS support if (cpu_feature_enabled(X86_FEATURE_LAM)) - USER_PTR_MAX = (1ul << 63) - PAGE_SIZE - 1; + USER_PTR_MAX = (1ul << 63) - PAGE_SIZE; */ runtime_const_init(ptr, USER_PTR_MAX);
On Wed, 23 Jul 2025 at 09:32, Jimmy Tran jtoantran@google.com wrote:
This patch series backports a critical security fix, identified as CVE-2020-12965 ("Transient Execution of Non-Canonical Accesses"), to the 6.6.y stable kernel tree.
I suspect it's fine just backporting the whole thing, but the actual fix is just the workaround for the AMD speculation issue with non-canonical addresses:
x86: fix user address masking non-canonical speculation issue
and the rest is purely "set up the infrastructure so that that can be back-ported".
A different alternative would be to just take the logic of that user address masking, which exists in two places:
arch/x86/include/asm/uaccess_64.h:
#define mask_user_address(x) ((typeof(x))((long)(x)|((long)(x)>>63)))
arch/x86/lib/getuser.S:
mov %rax, %rdx sar $63, %rdx or %rdx, %rax
and make that generate worse code - by *instead* of using the runtime-const infrastructure, only introduce a single new variable for that USER_PTR_MAX value, and use an actual memory load instead of the runtime constant.
I dunno. That would be a noticeably smaller and more targeted patch, but it would be different from what any mainline kernel has done, so somebody would have to test it a lot.
So I guess back-porting this all is the simpler thing (and does generate better code).
But I did want to point out that the backport results in 250+ lines of patches, and I suspect you *could* do it in a quarter of the size or less. At the cost of having to have somebody who really cares.
Linus
On Wed, 23 Jul 2025 16:32:03 +0000 Jimmy Tran jtoantran@google.com wrote:
This patch series backports a critical security fix, identified as CVE-2020-12965 ("Transient Execution of Non-Canonical Accesses"), to the 6.6.y stable kernel tree.
You probably want to pick up the 'cmov' variant of:
+static inline void __user *mask_user_address(const void __user *ptr) +{ + unsigned long mask; + + asm("cmp %1,%0\n\t" + "sbb %0,%0" + : "=r" (mask) + : "r" (ptr), + "0" (runtime_const_ptr(USER_PTR_MAX))); + return (__force void __user *)(mask | (__force unsigned long)ptr); +}
Converting kernel addresses to USER_PTR_MAX instead of ~0 means that is isn't critical that the base address is accessed first. (I'm not sure that x86 completely disables mapping to user address zero.)
That is more problematic for 32bit (address masking wasn't enabled last time I looked) because not all supported cpu support cmov.
David
commit 573f45a9f9a47fed4c7957609689b772121b33d7 upstream.
David Laight (1): x86: fix off-by-one in access_ok()
Linus Torvalds (5): vfs: dcache: move hashlen_hash() from callers into d_hash() runtime constants: add default dummy infrastructure runtime constants: add x86 architecture support arm64: add 'runtime constant' support x86: fix user address masking non-canonical speculation issue
arch/arm64/include/asm/runtime-const.h | 92 ++++++++++++++++++++++++++ arch/arm64/kernel/vmlinux.lds.S | 3 + arch/x86/include/asm/runtime-const.h | 61 +++++++++++++++++ arch/x86/include/asm/uaccess_64.h | 45 ++++++++----- arch/x86/kernel/cpu/common.c | 10 +++ arch/x86/kernel/vmlinux.lds.S | 4 ++ arch/x86/lib/getuser.S | 9 ++- fs/dcache.c | 17 +++-- include/asm-generic/Kbuild | 1 + include/asm-generic/runtime-const.h | 15 +++++ include/asm-generic/vmlinux.lds.h | 8 +++ 11 files changed, 243 insertions(+), 22 deletions(-) create mode 100644 arch/arm64/include/asm/runtime-const.h create mode 100644 arch/x86/include/asm/runtime-const.h create mode 100644 include/asm-generic/runtime-const.h
-- 2.50.0.727.gbf7dc18ff4-goog
Hi everyone,
This is v2 of my series to backport the critical security fix, identified as CVE-2020-12965 ("Transient Execution of Non-Canonical Accesses"), to the 6.6.y stable kernel tree.
Linus Torvalds's second proposed solution offers a more targeted and smaller backport for CVE-2020-12965 compared to backporting the entire patch series.
This alternative would focus solely on the user address masking logic that addresses the AMD speculation issue with non-canonical addresses.
Instead of introducing the extensive "runtime-constant" infrastructure seen in the larger patch series, this solution would:
- Introduce a single new variable for the USER_PTR_MAX value. - Use an actual memory load to access this USER_PTR_MAX value, rather than leveraging the runtime_const mechanism.
While this approach would result in a noticeably smaller and more localized patch, it would differ from what's currently in the mainline kernel. This divergence would necessitate significant additional testing to ensure its stability.
I am ready to implement the second proposed solution if the maintainers wish to move forward in that direction, understanding the testing implications. Please let me know your preference.
Changes in v2: ============== - Incorporated the commit 91309a708: x86: use cmov for user address as suggested by David Laight. This commit is now included as the first patch in the series.
This series addresses the CVE-2020-12965 vulnerability by introducing the necessary x86 infrastructure and the specific fix for user address masking non-canonical speculation issues.
v1: ============== This patch series backports a critical security fix, identified as CVE-2020-12965 ("Transient Execution of Non-Canonical Accesses"), to the 6.6.y stable kernel tree.
David Laight (1): x86: fix off-by-one in access_ok()
Linus Torvalds (6): vfs: dcache: move hashlen_hash() from callers into d_hash() runtime constants: add default dummy infrastructure runtime constants: add x86 architecture support arm64: add 'runtime constant' support x86: fix user address masking non-canonical speculation issue x86: use cmov for user address masking
arch/arm64/include/asm/runtime-const.h | 92 ++++++++++++++++++++++++++ arch/arm64/kernel/vmlinux.lds.S | 3 + arch/x86/include/asm/runtime-const.h | 61 +++++++++++++++++ arch/x86/include/asm/uaccess_64.h | 44 +++++++----- arch/x86/kernel/cpu/common.c | 10 +++ arch/x86/kernel/vmlinux.lds.S | 4 ++ arch/x86/lib/getuser.S | 10 ++- fs/dcache.c | 17 +++-- include/asm-generic/Kbuild | 1 + include/asm-generic/runtime-const.h | 15 +++++ include/asm-generic/vmlinux.lds.h | 8 +++ 11 files changed, 242 insertions(+), 23 deletions(-) create mode 100644 arch/arm64/include/asm/runtime-const.h create mode 100644 arch/x86/include/asm/runtime-const.h create mode 100644 include/asm-generic/runtime-const.h
From: Linus Torvalds torvalds@linux-foundation.org
commit e60cc61153e61e4e38bd983492df9959e82ae4dc upstream.
Both __d_lookup_rcu() and __d_lookup_rcu_op_compare() have the full 'name_hash' value of the qstr that they want to look up, and mask it off to just the low 32-bit hash before calling down to d_hash().
Other callers just load the 32-bit hash and pass it as the argument.
If we move the masking into d_hash() itself, it simplifies the two callers that currently do the masking, and is a no-op for the other cases. It doesn't actually change the generated code since the compiler will inline d_hash() and see that the end result is the same.
[ Technically, since the parse tree changes, the code generation may not be 100% the same, and for me on x86-64, this does result in gcc switching the operands around for one 'cmpl' instruction. So not necessarily the exact same code generation, but equivalent ]
However, this does encapsulate the 'd_hash()' operation more, and makes the shift operation in particular be a "shift 32 bits right, return full word". Which matches the instruction semantics on both x86-64 and arm64 better, since a 32-bit shift will clear the upper bits.
That makes the next step of introducing a "shift by runtime constant" more obvious and generates the shift with no extraneous type masking.
Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Jimmy Tran jtoantran@google.com --- fs/dcache.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/fs/dcache.c b/fs/dcache.c index 4030c010a7682..82adee104f82c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -100,9 +100,9 @@ static unsigned int d_hash_shift __read_mostly;
static struct hlist_bl_head *dentry_hashtable __read_mostly;
-static inline struct hlist_bl_head *d_hash(unsigned int hash) +static inline struct hlist_bl_head *d_hash(unsigned long hashlen) { - return dentry_hashtable + (hash >> d_hash_shift); + return dentry_hashtable + ((u32)hashlen >> d_hash_shift); }
#define IN_LOOKUP_SHIFT 10 @@ -2286,7 +2286,7 @@ static noinline struct dentry *__d_lookup_rcu_op_compare( unsigned *seqp) { u64 hashlen = name->hash_len; - struct hlist_bl_head *b = d_hash(hashlen_hash(hashlen)); + struct hlist_bl_head *b = d_hash(hashlen); struct hlist_bl_node *node; struct dentry *dentry;
@@ -2353,7 +2353,7 @@ struct dentry *__d_lookup_rcu(const struct dentry *parent, { u64 hashlen = name->hash_len; const unsigned char *str = name->name; - struct hlist_bl_head *b = d_hash(hashlen_hash(hashlen)); + struct hlist_bl_head *b = d_hash(hashlen); struct hlist_bl_node *node; struct dentry *dentry;
From: Linus Torvalds torvalds@linux-foundation.org
commit e78298556ee5d881f6679effb2a6743969ea6e2d upstream.
This adds the initial dummy support for 'runtime constants' for when an architecture doesn't actually support an implementation of fixing up said runtime constants.
This ends up being the fallback to just using the variables as regular __ro_after_init variables, and changes the dcache d_hash() function to use this model.
Cc: stable@vger.kernel.org # 6.10.x: e60cc61: vfs: dcache: move hashlen_hash Fixes: e78298556ee5 ("runtime constants: add default dummy infrastructure") Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Jimmy Tran jtoantran@google.com --- fs/dcache.c | 11 ++++++++++- include/asm-generic/Kbuild | 1 + include/asm-generic/runtime-const.h | 15 +++++++++++++++ include/asm-generic/vmlinux.lds.h | 8 ++++++++ 4 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 include/asm-generic/runtime-const.h
diff --git a/fs/dcache.c b/fs/dcache.c index 82adee104f82c..9e5c92b4b4aaa 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -35,6 +35,8 @@ #include "internal.h" #include "mount.h"
+#include <asm/runtime-const.h> + /* * Usage: * dcache->d_inode->i_lock protects: @@ -102,7 +104,8 @@ static struct hlist_bl_head *dentry_hashtable __read_mostly;
static inline struct hlist_bl_head *d_hash(unsigned long hashlen) { - return dentry_hashtable + ((u32)hashlen >> d_hash_shift); + return runtime_const_ptr(dentry_hashtable) + + runtime_const_shift_right_32(hashlen, d_hash_shift); }
#define IN_LOOKUP_SHIFT 10 @@ -3297,6 +3300,9 @@ static void __init dcache_init_early(void) 0, 0); d_hash_shift = 32 - d_hash_shift; + + runtime_const_init(shift, d_hash_shift); + runtime_const_init(ptr, dentry_hashtable); }
static void __init dcache_init(void) @@ -3325,6 +3331,9 @@ static void __init dcache_init(void) 0, 0); d_hash_shift = 32 - d_hash_shift; + + runtime_const_init(shift, d_hash_shift); + runtime_const_init(ptr, dentry_hashtable); }
/* SLAB cache for __getname() consumers */ diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index 941be574bbe00..22673ec5defbb 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -46,6 +46,7 @@ mandatory-y += pci.h mandatory-y += percpu.h mandatory-y += pgalloc.h mandatory-y += preempt.h +mandatory-y += runtime-const.h mandatory-y += rwonce.h mandatory-y += sections.h mandatory-y += serial.h diff --git a/include/asm-generic/runtime-const.h b/include/asm-generic/runtime-const.h new file mode 100644 index 0000000000000..3e68a17fbf287 --- /dev/null +++ b/include/asm-generic/runtime-const.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RUNTIME_CONST_H +#define _ASM_RUNTIME_CONST_H + +/* + * This is the fallback for when the architecture doesn't + * support the runtime const operations. + * + * We just use the actual symbols as-is. + */ +#define runtime_const_ptr(sym) (sym) +#define runtime_const_shift_right_32(val, sym) ((u32)(val)>>(sym)) +#define runtime_const_init(type, sym) do { } while (0) + +#endif diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index cf3f8b9bf43f0..66bfd3dc91a33 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -907,6 +907,14 @@ #define CON_INITCALL \ BOUNDED_SECTION_POST_LABEL(.con_initcall.init, __con_initcall, _start, _end)
+#define RUNTIME_NAME(t, x) runtime_##t##_##x + +#define RUNTIME_CONST(t, x) \ + . = ALIGN(8); \ + RUNTIME_NAME(t, x) : AT(ADDR(RUNTIME_NAME(t, x)) - LOAD_OFFSET) { \ + *(RUNTIME_NAME(t, x)); \ + } + /* Alignment must be consistent with (kunit_suite *) in include/kunit/test.h */ #define KUNIT_TABLE() \ . = ALIGN(8); \
From: Linus Torvalds torvalds@linux-foundation.org
commit e3c92e81711d14b46c3121d36bc8e152cb843923 upstream.
This implements the runtime constant infrastructure for x86, allowing the dcache d_hash() function to be generated using as a constant for hash table address followed by shift by a constant of the hash index.
Cc: stable@vger.kernel.org # 6.10.x: e60cc61: vfs: dcache: move hashlen_hash Cc: stable@vger.kernel.org # 6.10.x: e782985: runtime constants: add default Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Jimmy Tran jtoantran@google.com --- arch/x86/include/asm/runtime-const.h | 61 ++++++++++++++++++++++++++++ arch/x86/kernel/vmlinux.lds.S | 3 ++ 2 files changed, 64 insertions(+) create mode 100644 arch/x86/include/asm/runtime-const.h
diff --git a/arch/x86/include/asm/runtime-const.h b/arch/x86/include/asm/runtime-const.h new file mode 100644 index 0000000000000..76fdeaa0faa3f --- /dev/null +++ b/arch/x86/include/asm/runtime-const.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RUNTIME_CONST_H +#define _ASM_RUNTIME_CONST_H + +#define runtime_const_ptr(sym) ({ \ + typeof(sym) __ret; \ + asm_inline("mov %1,%0\n1:\n" \ + ".pushsection runtime_ptr_" #sym ","a"\n\t" \ + ".long 1b - %c2 - .\n\t" \ + ".popsection" \ + : "=r" (__ret) \ + : "i" (0x0123456789abcdefULL), \ + "i" (sizeof(long))); \ + __ret; }) + +// The 'typeof' will create at _least_ a 32-bit type, but +// will happily also take a bigger type and the 'shrl' will +// clear the upper bits +#define runtime_const_shift_right_32(val, sym) ({ \ + typeof(0u+(val)) __ret = (val); \ + asm_inline("shrl $12,%k0\n1:\n" \ + ".pushsection runtime_shift_" #sym ","a"\n\t" \ + ".long 1b - 1 - .\n\t" \ + ".popsection" \ + : "+r" (__ret)); \ + __ret; }) + +#define runtime_const_init(type, sym) do { \ + extern s32 __start_runtime_##type##_##sym[]; \ + extern s32 __stop_runtime_##type##_##sym[]; \ + runtime_const_fixup(__runtime_fixup_##type, \ + (unsigned long)(sym), \ + __start_runtime_##type##_##sym, \ + __stop_runtime_##type##_##sym); \ +} while (0) + +/* + * The text patching is trivial - you can only do this at init time, + * when the text section hasn't been marked RO, and before the text + * has ever been executed. + */ +static inline void __runtime_fixup_ptr(void *where, unsigned long val) +{ + *(unsigned long *)where = val; +} + +static inline void __runtime_fixup_shift(void *where, unsigned long val) +{ + *(unsigned char *)where = val; +} + +static inline void runtime_const_fixup(void (*fn)(void *, unsigned long), + unsigned long val, s32 *start, s32 *end) +{ + while (start < end) { + fn(*start + (void *)start, val); + start++; + } +} + +#endif diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index c57d5df1abc60..cb5b41480a848 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -371,6 +371,9 @@ SECTIONS PERCPU_SECTION(INTERNODE_CACHE_BYTES) #endif
+ RUNTIME_CONST(shift, d_hash_shift) + RUNTIME_CONST(ptr, dentry_hashtable) + . = ALIGN(PAGE_SIZE);
/* freed after init ends here */
From: Linus Torvalds torvalds@linux-foundation.org
commit 94a2bc0f611cd9fa4d26e4679bf7ea4b01b12d56 upstream.
This implements the runtime constant infrastructure for arm64, allowing the dcache d_hash() function to be generated using as a constant for hash table address followed by shift by a constant of the hash index.
[ Fixed up to deal with the big-endian case as per Mark Rutland ]
Cc: stable@vger.kernel.org # 6.10.x: e60cc61: vfs: dcache: move hashlen_hash() from callers into d_hash() Cc: stable@vger.kernel.org # 6.10.x: e782985: runtime constants: add default dummy infrastructure Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Jimmy Tran jtoantran@google.com --- arch/arm64/include/asm/runtime-const.h | 92 ++++++++++++++++++++++++++ arch/arm64/kernel/vmlinux.lds.S | 3 + 2 files changed, 95 insertions(+) create mode 100644 arch/arm64/include/asm/runtime-const.h
diff --git a/arch/arm64/include/asm/runtime-const.h b/arch/arm64/include/asm/runtime-const.h new file mode 100644 index 0000000000000..81faccb54e95d --- /dev/null +++ b/arch/arm64/include/asm/runtime-const.h @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RUNTIME_CONST_H +#define _ASM_RUNTIME_CONST_H + +#include <asm/cacheflush.h> + +/* Sigh. You can still run arm64 in BE mode */ +#include <asm/byteorder.h> + +#define runtime_const_ptr(sym) ({ \ + typeof(sym) __ret; \ + asm_inline("1:\t" \ + "movz %0, #0xcdef\n\t" \ + "movk %0, #0x89ab, lsl #16\n\t" \ + "movk %0, #0x4567, lsl #32\n\t" \ + "movk %0, #0x0123, lsl #48\n\t" \ + ".pushsection runtime_ptr_" #sym ","a"\n\t" \ + ".long 1b - .\n\t" \ + ".popsection" \ + : "=r" (__ret)); \ + __ret; }) + +#define runtime_const_shift_right_32(val, sym) ({ \ + unsigned long __ret; \ + asm_inline("1:\t" \ + "lsr %w0,%w1,#12\n\t" \ + ".pushsection runtime_shift_" #sym ","a"\n\t" \ + ".long 1b - .\n\t" \ + ".popsection" \ + : "=r" (__ret) \ + : "r" (0u+(val))); \ + __ret; }) + +#define runtime_const_init(type, sym) do { \ + extern s32 __start_runtime_##type##_##sym[]; \ + extern s32 __stop_runtime_##type##_##sym[]; \ + runtime_const_fixup(__runtime_fixup_##type, \ + (unsigned long)(sym), \ + __start_runtime_##type##_##sym, \ + __stop_runtime_##type##_##sym); \ +} while (0) + +/* 16-bit immediate for wide move (movz and movk) in bits 5..20 */ +static inline void __runtime_fixup_16(__le32 *p, unsigned int val) +{ + u32 insn = le32_to_cpu(*p); + + insn &= 0xffe0001f; + insn |= (val & 0xffff) << 5; + *p = cpu_to_le32(insn); +} + +static inline void __runtime_fixup_caches(void *where, unsigned int insns) +{ + unsigned long va = (unsigned long)where; + + caches_clean_inval_pou(va, va + 4*insns); +} + +static inline void __runtime_fixup_ptr(void *where, unsigned long val) +{ + __le32 *p = lm_alias(where); + + __runtime_fixup_16(p, val); + __runtime_fixup_16(p+1, val >> 16); + __runtime_fixup_16(p+2, val >> 32); + __runtime_fixup_16(p+3, val >> 48); + __runtime_fixup_caches(where, 4); +} + +/* Immediate value is 6 bits starting at bit #16 */ +static inline void __runtime_fixup_shift(void *where, unsigned long val) +{ + __le32 *p = lm_alias(where); + u32 insn = le32_to_cpu(*p); + + insn &= 0xffc0ffff; + insn |= (val & 63) << 16; + *p = cpu_to_le32(insn); + __runtime_fixup_caches(where, 1); +} + +static inline void runtime_const_fixup(void (*fn)(void *, unsigned long), + unsigned long val, s32 *start, s32 *end) +{ + while (start < end) { + fn(*start + (void *)start, val); + start++; + } +} + +#endif diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index d4353741f331e..f1719116592da 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -265,6 +265,9 @@ SECTIONS EXIT_DATA }
+ RUNTIME_CONST(shift, d_hash_shift) + RUNTIME_CONST(ptr, dentry_hashtable) + PERCPU_SECTION(L1_CACHE_BYTES) HYPERVISOR_PERCPU_SECTION
From: Linus Torvalds torvalds@linux-foundation.org
commit 86e6b1547b3d013bc392adf775b89318441403c2 upstream.
It turns out that AMD has a "Meltdown Lite(tm)" issue with non-canonical accesses in kernel space. And so using just the high bit to decide whether an access is in user space or kernel space ends up with the good old "leak speculative data" if you have the right gadget using the result:
CVE-2020-12965 “Transient Execution of Non-Canonical Accesses“
Now, the kernel surrounds the access with a STAC/CLAC pair, and those instructions end up serializing execution on older Zen architectures, which closes the speculation window.
But that was true only up until Zen 5, which renames the AC bit [1]. That improves performance of STAC/CLAC a lot, but also means that the speculation window is now open.
Note that this affects not just the new address masking, but also the regular valid_user_address() check used by access_ok(), and the asm version of the sign bit check in the get_user() helpers.
It does not affect put_user() or clear_user() variants, since there's no speculative result to be used in a gadget for those operations.
Link: https://lore.kernel.org/all/80d94591-1297-4afb-b510-c665efd37f10@citrix.com/ Link: https://lore.kernel.org/all/20241023094448.GAZxjFkEOOF_DM83TQ@fat_crate.loca... [1] Link: https://www.amd.com/en/resources/product-security/bulletin/amd-sb-1010.html Link: https://arxiv.org/pdf/2108.10771 Cc: stable@vger.kernel.org # 6.10.x: e60cc61: vfs: dcache: move hashlen_hash() from callers into d_hash() Cc: stable@vger.kernel.org # 6.10.x: e782985: runtime constants: add default dummy infrastructure Cc: stable@vger.kernel.org # 6.10.x: e3c92e8: runtime constants: add x86 architecture support Fixes: 2865baf54077 ("x86: support user address masking instead of non-speculative conditional") Fixes: 6014bc27561f ("x86-64: make access_ok() independent of LAM") Fixes: b19b74bc99b1 ("x86/mm: Rework address range check in get_user() and put_user()") Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Jimmy Tran jtoantran@google.com --- arch/x86/include/asm/uaccess_64.h | 45 ++++++++++++++++++++----------- arch/x86/kernel/cpu/common.c | 10 +++++++ arch/x86/kernel/vmlinux.lds.S | 1 + arch/x86/lib/getuser.S | 9 +++++-- 4 files changed, 47 insertions(+), 18 deletions(-)
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index f2c02e4469ccc..e68eded5ee490 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -11,6 +11,13 @@ #include <asm/alternative.h> #include <asm/cpufeatures.h> #include <asm/page.h> +#include <asm/runtime-const.h> + +/* + * Virtual variable: there's no actual backing store for this, + * it can purely be used as 'runtime_const_ptr(USER_PTR_MAX)' + */ +extern unsigned long USER_PTR_MAX;
#ifdef CONFIG_ADDRESS_MASKING /* @@ -49,35 +56,41 @@ static inline unsigned long __untagged_addr_remote(struct mm_struct *mm,
#endif
+#define valid_user_address(x) \ + ((__force unsigned long)(x) <= runtime_const_ptr(USER_PTR_MAX)) + /* - * The virtual address space space is logically divided into a kernel - * half and a user half. When cast to a signed type, user pointers - * are positive and kernel pointers are negative. + * Masking the user address is an alternative to a conditional + * user_access_begin that can avoid the fencing. This only works + * for dense accesses starting at the address. */ -#define valid_user_address(x) ((long)(x) >= 0) +static inline void __user *mask_user_address(const void __user *ptr) +{ + unsigned long mask; + + asm("cmp %1,%0\n\t" + "sbb %0,%0" + : "=r" (mask) + : "r" (ptr), + "0" (runtime_const_ptr(USER_PTR_MAX))); + return (__force void __user *)(mask | (__force unsigned long)ptr); +}
/* * User pointers can have tag bits on x86-64. This scheme tolerates * arbitrary values in those bits rather then masking them off. * * Enforce two rules: - * 1. 'ptr' must be in the user half of the address space + * 1. 'ptr' must be in the user part of the address space * 2. 'ptr+size' must not overflow into kernel addresses * - * Note that addresses around the sign change are not valid addresses, - * and will GP-fault even with LAM enabled if the sign bit is set (see - * "CR3.LAM_SUP" that can narrow the canonicality check if we ever - * enable it, but not remove it entirely). - * - * So the "overflow into kernel addresses" does not imply some sudden - * exact boundary at the sign bit, and we can allow a lot of slop on the - * size check. + * Note that we always have at least one guard page between the + * max user address and the non-canonical gap, allowing us to + * ignore small sizes entirely. * * In fact, we could probably remove the size check entirely, since * any kernel accesses will be in increasing address order starting - * at 'ptr', and even if the end might be in kernel space, we'll - * hit the GP faults for non-canonical accesses before we ever get - * there. + * at 'ptr'. * * That's a separate optimization, for now just handle the small * constant case. diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f66c71bffa6d9..2369e85055c0e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -65,6 +65,7 @@ #include <asm/set_memory.h> #include <asm/traps.h> #include <asm/sev.h> +#include <asm/runtime-const.h>
#include "cpu.h"
@@ -2490,6 +2491,15 @@ void __init arch_cpu_finalize_init(void) alternative_instructions();
if (IS_ENABLED(CONFIG_X86_64)) { + unsigned long USER_PTR_MAX = TASK_SIZE_MAX-1; + + /* + * Enable this when LAM is gated on LASS support + if (cpu_feature_enabled(X86_FEATURE_LAM)) + USER_PTR_MAX = (1ul << 63) - PAGE_SIZE - 1; + */ + runtime_const_init(ptr, USER_PTR_MAX); + /* * Make sure the first 2MB area is not mapped by huge pages * There are typically fixed size MTRRs in there and overlapping diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index cb5b41480a848..a698819fd5d5f 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -373,6 +373,7 @@ SECTIONS
RUNTIME_CONST(shift, d_hash_shift) RUNTIME_CONST(ptr, dentry_hashtable) + RUNTIME_CONST(ptr, USER_PTR_MAX)
. = ALIGN(PAGE_SIZE);
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index 6913fbce6544f..ffa3fff259578 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -39,8 +39,13 @@
.macro check_range size:req .if IS_ENABLED(CONFIG_X86_64) - mov %rax, %rdx - sar $63, %rdx + movq $0x0123456789abcdef,%rdx + 1: + .pushsection runtime_ptr_USER_PTR_MAX,"a" + .long 1b - 8 - . + .popsection + cmp %rax, %rdx + sbb %rdx, %rdx or %rdx, %rax .else cmp $TASK_SIZE_MAX-\size+1, %eax
From: David Laight David.Laight@ACULAB.COM
commit 573f45a9f9a47fed4c7957609689b772121b33d7 upstream.
When the size isn't a small constant, __access_ok() will call valid_user_address() with the address after the last byte of the user buffer.
It is valid for a buffer to end with the last valid user address so valid_user_address() must allow accesses to the base of the guard page.
[ This introduces an off-by-one in the other direction for the plain non-sized accesses, but since we have that guard region that is a whole page, those checks "allowing" accesses to that guard region don't really matter. The access will fault anyway, whether to the guard page or if the address has been masked to all ones - Linus ]
Cc: stable@vger.kernel.org # 6.12.x: 86e6b15: x86: fix user address masking non-canonical speculation issue Cc: stable@vger.kernel.org # 6.10.x: e60cc61: vfs: dcache: move hashlen_hash() from callers into d_hash() Cc: stable@vger.kernel.org # 6.10.x: e782985: runtime constants: add default dummy infrastructure Cc: stable@vger.kernel.org # 6.10.x: e3c92e8: runtime constants: add x86 architecture support Fixes: 86e6b1547b3d0 ("x86: fix user address masking non-canonical speculation issue") Signed-off-by: David Laight david.laight@aculab.com Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Jimmy Tran jtoantran@google.com --- arch/x86/kernel/cpu/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2369e85055c0e..6c69dea644ffc 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -2491,12 +2491,12 @@ void __init arch_cpu_finalize_init(void) alternative_instructions();
if (IS_ENABLED(CONFIG_X86_64)) { - unsigned long USER_PTR_MAX = TASK_SIZE_MAX-1; + unsigned long USER_PTR_MAX = TASK_SIZE_MAX;
/* * Enable this when LAM is gated on LASS support if (cpu_feature_enabled(X86_FEATURE_LAM)) - USER_PTR_MAX = (1ul << 63) - PAGE_SIZE - 1; + USER_PTR_MAX = (1ul << 63) - PAGE_SIZE; */ runtime_const_init(ptr, USER_PTR_MAX);
From: Linus Torvalds torvalds@linux-foundation.org
commit 91309a70829d94c735c8bb1cc383e78c96127a16 upstream
This was a suggestion by David Laight, and while I was slightly worried that some micro-architecture would predict cmov like a conditional branch, there is little reason to actually believe any core would be that broken.
Intel documents that their existing cores treat CMOVcc as a data dependency that will constrain speculation in their "Speculative Execution Side Channel Mitigations" whitepaper:
"Other instructions such as CMOVcc, AND, ADC, SBB and SETcc can also be used to prevent bounds check bypass by constraining speculative execution on current family 6 processors (Intel® Core™, Intel® Atom™, Intel® Xeon® and Intel® Xeon Phi™ processors)"
and while that leaves the future uarch issues open, that's certainly true of our traditional SBB usage too.
Any core that predicts CMOV will be unusable for various crypto algorithms that need data-independent timing stability, so let's just treat CMOV as the safe choice that simplifies the address masking by avoiding an extra instruction and doesn't need a temporary register.
Cc: stable@vger.kernel.org # 6.12.x: 573f45a: x86: x86: fix off-by-one in access_ok() Cc: stable@vger.kernel.org # 6.12.x: 86e6b15: x86: fix user address masking non-canonical speculation issue Cc: stable@vger.kernel.org # 6.10.x: e60cc61: vfs: dcache: move hashlen_hash() from callers into d_hash() Cc: stable@vger.kernel.org # 6.10.x: e782985: runtime constants: add default dummy infrastructure Cc: stable@vger.kernel.org # 6.10.x: e3c92e8: runtime constants: add x86 architecture support Suggested-by: David Laight David.Laight@aculab.com Link: https://www.intel.com/content/dam/develop/external/us/en/documents/336996-sp... Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Jimmy Tran jtoantran@google.com --- arch/x86/include/asm/uaccess_64.h | 13 ++++++------- arch/x86/lib/getuser.S | 5 ++--- 2 files changed, 8 insertions(+), 10 deletions(-)
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index e68eded5ee490..123d36c89722f 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -66,14 +66,13 @@ static inline unsigned long __untagged_addr_remote(struct mm_struct *mm, */ static inline void __user *mask_user_address(const void __user *ptr) { - unsigned long mask; - + void __user *ret; asm("cmp %1,%0\n\t" - "sbb %0,%0" - : "=r" (mask) - : "r" (ptr), - "0" (runtime_const_ptr(USER_PTR_MAX))); - return (__force void __user *)(mask | (__force unsigned long)ptr); + "cmova %1,%0" + :"=r" (ret) + :"r" (runtime_const_ptr(USER_PTR_MAX)), + "0" (ptr)); + return ret; }
/* diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index ffa3fff259578..0f7f58f20b068 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -44,9 +44,8 @@ .pushsection runtime_ptr_USER_PTR_MAX,"a" .long 1b - 8 - . .popsection - cmp %rax, %rdx - sbb %rdx, %rdx - or %rdx, %rax + cmp %rdx, %rax + cmova %rdx, %rax .else cmp $TASK_SIZE_MAX-\size+1, %eax .if \size != 8
linux-stable-mirror@lists.linaro.org