On 2/22/2023 10:40 AM, Mingwei Zhang wrote:
We have this [1]:
if (fpu_state_size_dynamic()) mask &= (header.xfeatures | xinit->header.xcomp_bv);
If header.xfeatures[18] = 0 then mask[18] = 0 because xinit->header.xcomp_bv[18] = 0. Then, it won't hit that code. So, I'm confused about the problem that you described here.
Read the suggested changelog I wrote in my reply to Mingwei.
TLDR:
xsave.header.xfeatures[18] = 1 xinit.header.xfeatures[18] = 0 -> mask[18] = 1 -> __raw_xsave_addr(xsave, 18) <- Success -> __raw_xsave_addr(xinit, 18) <- WARN
Oh, sigh.. This should be caught last time.
Hmm, then since we store init state for legacy ones [1], unless it is too aggressive, perhaps the loop can be simplified like this:
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 714166cc25f2..2dac6f5f3ade 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1118,21 +1118,13 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, zerofrom = offsetof(struct xregs_state, extended_state_area);
/* - * The ptrace buffer is in non-compacted XSAVE format. In - * non-compacted format disabled features still occupy state space, - * but there is no state to copy from in the compacted - * init_fpstate. The gap tracking will zero these states. + * Indicate which states to copy from fpstate. When not present in + * fpstate, those extended states are either initialized or + * disabled. They are also known to have an all zeros init state. + * Thus, remove them from 'mask' to zero those features in the user + * buffer instead of retrieving them from init_fpstate. */ - mask = fpstate->user_xfeatures; - - /* - * Dynamic features are not present in init_fpstate. When they are - * in an all zeros init state, remove those from 'mask' to zero - * those features in the user buffer instead of retrieving them - * from init_fpstate. - */ - if (fpu_state_size_dynamic()) - mask &= (header.xfeatures | xinit->header.xcomp_bv); + mask = header.xfeatures;
for_each_extended_xfeature(i, mask) { /* @@ -1151,9 +1143,8 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, pkru.pkru = pkru_val; membuf_write(&to, &pkru, sizeof(pkru)); } else { - copy_feature(header.xfeatures & BIT_ULL(i), &to, + membuf_write(&to, __raw_xsave_addr(xsave, i), - __raw_xsave_addr(xinit, i), xstate_sizes[i]); } /*
Chang: to reproduce this issue, you can simply run the amx_test in the kvm selftest directory.
Yeah, I was able to reproduce it with this ptrace test:
diff --git a/tools/testing/selftests/x86/amx.c b/tools/testing/selftests/x86/amx.c index 625e42901237..ae02bc81846d 100644 --- a/tools/testing/selftests/x86/amx.c +++ b/tools/testing/selftests/x86/amx.c @@ -14,8 +14,10 @@ #include <sys/auxv.h> #include <sys/mman.h> #include <sys/shm.h> +#include <sys/ptrace.h> #include <sys/syscall.h> #include <sys/wait.h> +#include <sys/uio.h>
#include "../kselftest.h" /* For __cpuid_count() */
@@ -826,6 +828,76 @@ static void test_context_switch(void) free(finfo); }
+/* Ptrace test */ + +static bool inject_tiledata(pid_t target) +{ + struct xsave_buffer *xbuf; + struct iovec iov; + + xbuf = alloc_xbuf(); + if (!xbuf) + fatal_error("unable to allocate XSAVE buffer"); + + load_rand_tiledata(xbuf); + + memcpy(&stashed_xsave->bytes[xtiledata.xbuf_offset], + &xbuf->bytes[xtiledata.xbuf_offset], + xtiledata.size); + + iov.iov_base = xbuf; + iov.iov_len = xbuf_size; + + if (ptrace(PTRACE_SETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov)) + fatal_error("PTRACE_SETREGSET"); + + if (ptrace(PTRACE_GETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov)) + err(1, "PTRACE_GETREGSET"); + + if (!memcmp(&stashed_xsave->bytes[xtiledata.xbuf_offset], + &xbuf->bytes[xtiledata.xbuf_offset], + xtiledata.size)) + return true; + else + return false; +} + +static void test_ptrace(void) +{ + pid_t child; + int status; + + child = fork(); + if (child < 0) { + err(1, "fork"); + } else if (!child) { + if (ptrace(PTRACE_TRACEME, 0, NULL, NULL)) + err(1, "PTRACE_TRACEME"); + + /* Use the state to expand the kernel buffer */ + load_rand_tiledata(stashed_xsave); + + raise(SIGTRAP); + _exit(0); + } + + do { + wait(&status); + } while (WSTOPSIG(status) != SIGTRAP); + + printf("\tInject tile data via ptrace()\n"); + + if (inject_tiledata(child)) + printf("[OK]\tTile data was written on ptracee.\n"); + else + printf("[FAIL]\tTile data was not written on ptracee.\n"); + + ptrace(PTRACE_DETACH, child, NULL, NULL); + wait(&status); + if (!WIFEXITED(status) || WEXITSTATUS(status)) + err(1, "ptrace test"); +} + int main(void) { /* Check hardware availability at first */ @@ -846,6 +918,8 @@ int main(void) ctxtswtest_config.num_threads = 5; test_context_switch();
+ test_ptrace(); + clearhandler(SIGILL); free_stashed_xsave();
Thanks, Chang
[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch...