Hi Greg,
What follows is backports of the upstream commits
6a877d2450ac x86/fpu: Take task_struct* in copy_sigframe_from_user_to_xstate() 1c813ce03055 x86/fpu: Add a pkru argument to copy_uabi_from_kernel_to_xstate(). 2c87767c35ee x86/fpu: Add a pkru argument to copy_uabi_to_xstate() 4a804c4f8356 x86/fpu: Allow PKRU to be (once again) written by ptrace. d7e5aceace51 x86/fpu: Emulate XRSTOR's behavior if the xfeatures PKRU bit is not set 6ea25770b043 selftests/vm/pkeys: Add a regression test for setting PKRU through ptrace
Those commits fix a regression introduced in 5.14 (by e84ba47e313d) related to handling of PKRU through ptrace(2).
The only substantive difference between this patch series and the upstream patch series is that, because on upstream kernels KVM also calls into copy_uabi_to_xstate(), while on 5.15 KVM has its own separate code path, this patch series copies (five lines of) previously KVM-specific code into copy_uabi_to_xstate() whereas the upstream patch series moves previously KVM-specific code into copy_uabi_to_xstate(). All other changes are adjustments for context that varies or refactorings that are newer than 5.15.
From: Kyle Huey me@kylehuey.com
commit 6a877d2450ac upstream
This will allow copy_sigframe_from_user_to_xstate() to grab the address of thread_struct's pkru value in a later patch.
Signed-off-by: Kyle Huey me@kylehuey.com Signed-off-by: Dave Hansen dave.hansen@linux.intel.com Link: https://lore.kernel.org/all/20221115230932.7126-2-khuey%40kylehuey.com --- arch/x86/include/asm/fpu/xstate.h | 2 +- arch/x86/kernel/fpu/signal.c | 2 +- arch/x86/kernel/fpu/xstate.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 109dfcc75299..b98b302c2751 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -137,7 +137,7 @@ extern void __init update_regset_xstate_info(unsigned int size, void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); int xfeature_size(int xfeature_nr); int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf); -int copy_sigframe_from_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf); +int copy_sigframe_from_user_to_xstate(struct task_struct *tsk, const void __user *ubuf);
void xsaves(struct xregs_state *xsave, u64 mask); void xrstors(struct xregs_state *xsave, u64 mask); diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 7f71bd4dcd0d..7f76cb099e66 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -370,7 +370,7 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, fpregs_unlock();
if (use_xsave() && !fx_only) { - ret = copy_sigframe_from_user_to_xstate(&fpu->state.xsave, buf_fx); + ret = copy_sigframe_from_user_to_xstate(tsk, buf_fx); if (ret) return ret; } else { diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index c8def1b7f8fb..e1b717427955 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1169,10 +1169,10 @@ int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) * XSAVE[S] format and copy to the target thread. This is called from the * sigreturn() and rt_sigreturn() system calls. */ -int copy_sigframe_from_user_to_xstate(struct xregs_state *xsave, +int copy_sigframe_from_user_to_xstate(struct task_struct *tsk, const void __user *ubuf) { - return copy_uabi_to_xstate(xsave, NULL, ubuf); + return copy_uabi_to_xstate(&tsk->thread.fpu.state.xsave, NULL, ubuf); }
static bool validate_xsaves_xrstors(u64 mask)
From: Kyle Huey me@kylehuey.com
commit 1c813ce03055 upstream
ptrace (through PTRACE_SETREGSET with NT_X86_XSTATE) ultimately calls copy_uabi_from_kernel_to_xstate(). In preparation for eventually handling PKRU in copy_uabi_to_xstate, pass in a pointer to the PKRU location.
Signed-off-by: Kyle Huey me@kylehuey.com Signed-off-by: Dave Hansen dave.hansen@linux.intel.com Link: https://lore.kernel.org/all/20221115230932.7126-3-khuey%40kylehuey.com --- arch/x86/include/asm/fpu/xstate.h | 2 +- arch/x86/kernel/fpu/regset.c | 2 +- arch/x86/kernel/fpu/xstate.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index b98b302c2751..d91df71f60fb 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -136,7 +136,7 @@ extern void __init update_regset_xstate_info(unsigned int size,
void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); int xfeature_size(int xfeature_nr); -int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf); +int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf, u32 *pkru); int copy_sigframe_from_user_to_xstate(struct task_struct *tsk, const void __user *ubuf);
void xsaves(struct xregs_state *xsave, u64 mask); diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 125cbbe10fef..bd243ae57680 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -163,7 +163,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, }
fpu_force_restore(fpu); - ret = copy_uabi_from_kernel_to_xstate(&fpu->state.xsave, kbuf ?: tmpbuf); + ret = copy_uabi_from_kernel_to_xstate(&fpu->state.xsave, kbuf ?: tmpbuf, &target->thread.pkru);
out: vfree(tmpbuf); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index e1b717427955..555b9870e9ba 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1159,7 +1159,7 @@ static int copy_uabi_to_xstate(struct xregs_state *xsave, const void *kbuf, * format and copy to the target thread. This is called from * xstateregs_set(). */ -int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) +int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf, u32 *pkru) { return copy_uabi_to_xstate(xsave, kbuf, NULL); }
From: Kyle Huey me@kylehuey.com
commit 2c87767c35ee upstream
In preparation for adding PKRU handling code into copy_uabi_to_xstate(), add an argument that copy_uabi_from_kernel_to_xstate() can use to pass the canonical location of the PKRU value. For copy_sigframe_from_user_to_xstate() the kernel will actually restore the PKRU value from the fpstate, but pass in the thread_struct's pkru location anyways for consistency.
Signed-off-by: Kyle Huey me@kylehuey.com Signed-off-by: Dave Hansen dave.hansen@linux.intel.com Link: https://lore.kernel.org/all/20221115230932.7126-4-khuey%40kylehuey.com --- arch/x86/kernel/fpu/xstate.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 555b9870e9ba..8abce8015b72 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1092,7 +1092,7 @@ static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size,
static int copy_uabi_to_xstate(struct xregs_state *xsave, const void *kbuf, - const void __user *ubuf) + const void __user *ubuf, u32 *pkru) { unsigned int offset, size; struct xstate_header hdr; @@ -1161,7 +1161,7 @@ static int copy_uabi_to_xstate(struct xregs_state *xsave, const void *kbuf, */ int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf, u32 *pkru) { - return copy_uabi_to_xstate(xsave, kbuf, NULL); + return copy_uabi_to_xstate(xsave, kbuf, NULL, pkru); }
/* @@ -1172,7 +1172,7 @@ int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf, int copy_sigframe_from_user_to_xstate(struct task_struct *tsk, const void __user *ubuf) { - return copy_uabi_to_xstate(&tsk->thread.fpu.state.xsave, NULL, ubuf); + return copy_uabi_to_xstate(&tsk->thread.fpu.state.xsave, NULL, ubuf, &tsk->thread.pkru); }
static bool validate_xsaves_xrstors(u64 mask)
From: Kyle Huey me@kylehuey.com
commit 4a804c4f8356 upstream
Handle PKRU in copy_uabi_to_xstate() for the benefit of APIs that write the XSTATE such as PTRACE_SETREGSET with NT_X86_XSTATE.
This restores the pre-5.14 behavior of ptrace. The regression can be seen by running gdb and executing `p $pkru`, `set $pkru = 42`, and `p $pkru`. On affected kernels (5.14+) the write to the PKRU register (which gdb performs through ptrace) is ignored.
Fixes: e84ba47e313d ("x86/fpu: Hook up PKRU into ptrace()") Signed-off-by: Kyle Huey me@kylehuey.com Signed-off-by: Dave Hansen dave.hansen@linux.intel.com Link: https://lore.kernel.org/all/20221115230932.7126-5-khuey%40kylehuey.com --- arch/x86/kernel/fpu/xstate.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+)
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 8abce8015b72..fe9050c60adc 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1091,6 +1091,29 @@ static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size, }
+/** + * copy_uabi_to_xstate - Copy a UABI format buffer to the kernel xstate + * @fpstate: The fpstate buffer to copy to + * @kbuf: The UABI format buffer, if it comes from the kernel + * @ubuf: The UABI format buffer, if it comes from userspace + * @pkru: The location to write the PKRU value to + * + * Converts from the UABI format into the kernel internal hardware + * dependent format. + * + * This function ultimately has two different callers with distinct PKRU + * behavior. + * 1. When called from sigreturn the PKRU register will be restored from + * @fpstate via an XRSTOR. Correctly copying the UABI format buffer to + * @fpstate is sufficient to cover this case, but the caller will also + * pass a pointer to the thread_struct's pkru field in @pkru and updating + * it is harmless. + * 2. When called from ptrace the PKRU register will be restored from the + * thread_struct's pkru field. A pointer to that is passed in @pkru. + * The kernel will restore it manually, so the XRSTOR behavior that resets + * the PKRU register to the hardware init value (0) if the corresponding + * xfeatures bit is not set is emulated here. + */ static int copy_uabi_to_xstate(struct xregs_state *xsave, const void *kbuf, const void __user *ubuf, u32 *pkru) { @@ -1140,6 +1163,13 @@ static int copy_uabi_to_xstate(struct xregs_state *xsave, const void *kbuf, } }
+ if (hdr.xfeatures & XFEATURE_MASK_PKRU) { + struct pkru_state *xpkru; + + xpkru = __raw_xsave_addr(xsave, XFEATURE_PKRU); + *pkru = xpkru->pkru; + } + /* * The state that came in from userspace was user-state only. * Mask all the user states out of 'xfeatures':
From: Kyle Huey me@kylehuey.com
commit d7e5aceace51 upstream
The hardware XRSTOR instruction resets the PKRU register to its hardware init value (namely 0) if the PKRU bit is not set in the xfeatures mask. Emulating that here restores the pre-5.14 behavior for PTRACE_SET_REGSET with NT_X86_XSTATE, and makes sigreturn (which still uses XRSTOR) and behave identically.
Fixes: e84ba47e313d ("x86/fpu: Hook up PKRU into ptrace()") Signed-off-by: Kyle Huey me@kylehuey.com Signed-off-by: Dave Hansen dave.hansen@linux.intel.com Link: https://lore.kernel.org/all/20221115230932.7126-6-khuey%40kylehuey.com --- arch/x86/kernel/fpu/xstate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index fe9050c60adc..8bbf37c0bebe 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1168,7 +1168,8 @@ static int copy_uabi_to_xstate(struct xregs_state *xsave, const void *kbuf,
xpkru = __raw_xsave_addr(xsave, XFEATURE_PKRU); *pkru = xpkru->pkru; - } + } else + *pkru = 0;
/* * The state that came in from userspace was user-state only.
From: Kyle Huey me@kylehuey.com
commit 6ea25770b043 upstream
This tests PTRACE_SETREGSET with NT_X86_XSTATE modifying PKRU directly and removing the PKRU bit from XSTATE_BV.
Signed-off-by: Kyle Huey me@kylehuey.com Signed-off-by: Dave Hansen dave.hansen@linux.intel.com Link: https://lore.kernel.org/all/20221115230932.7126-7-khuey%40kylehuey.com --- tools/testing/selftests/vm/pkey-x86.h | 12 ++ tools/testing/selftests/vm/protection_keys.c | 131 ++++++++++++++++++- 2 files changed, 141 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h index e4a4ce2b826d..ea8c8afbcdbb 100644 --- a/tools/testing/selftests/vm/pkey-x86.h +++ b/tools/testing/selftests/vm/pkey-x86.h @@ -119,6 +119,18 @@ static inline int cpu_has_pkeys(void) return 1; }
+static inline int cpu_max_xsave_size(void) +{ + unsigned long XSTATE_CPUID = 0xd; + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + + __cpuid_count(XSTATE_CPUID, 0, eax, ebx, ecx, edx); + return ecx; +} + static inline u32 pkey_bit_position(int pkey) { return pkey * PKEY_BITS_PER_PKEY; diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 2d0ae88665db..2d48272b2463 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -18,12 +18,13 @@ * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks * * Compile like this: - * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm - * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm + * gcc -mxsave -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm + * gcc -mxsave -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm */ #define _GNU_SOURCE #define __SANE_USERSPACE_TYPES__ #include <errno.h> +#include <linux/elf.h> #include <linux/futex.h> #include <time.h> #include <sys/time.h> @@ -1550,6 +1551,129 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) do_not_expect_pkey_fault("plain read on recently PROT_EXEC area"); }
+#if defined(__i386__) || defined(__x86_64__) +void test_ptrace_modifies_pkru(int *ptr, u16 pkey) +{ + u32 new_pkru; + pid_t child; + int status, ret; + int pkey_offset = pkey_reg_xstate_offset(); + size_t xsave_size = cpu_max_xsave_size(); + void *xsave; + u32 *pkey_register; + u64 *xstate_bv; + struct iovec iov; + + new_pkru = ~read_pkey_reg(); + /* Don't make PROT_EXEC mappings inaccessible */ + new_pkru &= ~3; + + child = fork(); + pkey_assert(child >= 0); + dprintf3("[%d] fork() ret: %d\n", getpid(), child); + if (!child) { + ptrace(PTRACE_TRACEME, 0, 0, 0); + /* Stop and allow the tracer to modify PKRU directly */ + raise(SIGSTOP); + + /* + * need __read_pkey_reg() version so we do not do shadow_pkey_reg + * checking + */ + if (__read_pkey_reg() != new_pkru) + exit(1); + + /* Stop and allow the tracer to clear XSTATE_BV for PKRU */ + raise(SIGSTOP); + + if (__read_pkey_reg() != 0) + exit(1); + + /* Stop and allow the tracer to examine PKRU */ + raise(SIGSTOP); + + exit(0); + } + + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); + + xsave = (void *)malloc(xsave_size); + pkey_assert(xsave > 0); + + /* Modify the PKRU register directly */ + iov.iov_base = xsave; + iov.iov_len = xsave_size; + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + + pkey_register = (u32 *)(xsave + pkey_offset); + pkey_assert(*pkey_register == read_pkey_reg()); + + *pkey_register = new_pkru; + + ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + + /* Test that the modification is visible in ptrace before any execution */ + memset(xsave, 0xCC, xsave_size); + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + pkey_assert(*pkey_register == new_pkru); + + /* Execute the tracee */ + ret = ptrace(PTRACE_CONT, child, 0, 0); + pkey_assert(ret == 0); + + /* Test that the tracee saw the PKRU value change */ + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); + + /* Test that the modification is visible in ptrace after execution */ + memset(xsave, 0xCC, xsave_size); + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + pkey_assert(*pkey_register == new_pkru); + + /* Clear the PKRU bit from XSTATE_BV */ + xstate_bv = (u64 *)(xsave + 512); + *xstate_bv &= ~(1 << 9); + + ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + + /* Test that the modification is visible in ptrace before any execution */ + memset(xsave, 0xCC, xsave_size); + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + pkey_assert(*pkey_register == 0); + + ret = ptrace(PTRACE_CONT, child, 0, 0); + pkey_assert(ret == 0); + + /* Test that the tracee saw the PKRU value go to 0 */ + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); + + /* Test that the modification is visible in ptrace after execution */ + memset(xsave, 0xCC, xsave_size); + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + pkey_assert(*pkey_register == 0); + + ret = ptrace(PTRACE_CONT, child, 0, 0); + pkey_assert(ret == 0); + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFEXITED(status)); + pkey_assert(WEXITSTATUS(status) == 0); + free(xsave); +} +#endif + void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) { int size = PAGE_SIZE; @@ -1585,6 +1709,9 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = { test_pkey_syscalls_bad_args, test_pkey_alloc_exhaust, test_pkey_alloc_free_attach_pkey0, +#if defined(__i386__) || defined(__x86_64__) + test_ptrace_modifies_pkru, +#endif };
void run_tests_once(void)
linux-stable-mirror@lists.linaro.org