From: Ammar Faizi ammarfaizi2@gnuweeb.org
On Mon, 23 Jan 2023 15:58:12 -0800, "H. Peter Anvin" wrote:
On 1/23/23 15:43, Ammar Faizi wrote:
Align them to spot differences:
0x200893 = 0b1000000000100010010011 0x200a93 = 0b1000000000101010010011 ^
Or just xor them to find the differences:
(gdb) p/x 0x200893 ^ 0x200a93 $3 = 0x200
** Checks my Intel SDM cheat sheets. **
Then, I was like "Oh, that's (1 << 9) a.k.a. IF. Of course we can't change rflags[IF] from userspace!!!".
In short, we can't use 0x200893 as the rflags_sentinel value because it clears the interrupt flag.
Right, my mistake.
I changed it to 0x200a93. The test passed on my machine. But I don't have a FRED system to test the special case.
Didn't manage to apply the feedback from Andrew about the way to handle redzone properly, though.
Something like this...
----------
This is just an RFC patchset.
Xin Li reported sysret_rip test fails at:
assert(ctx->uc_mcontext.gregs[REG_EFL] == ctx->uc_mcontext.gregs[REG_R11]);
in a FRED system. Handle the FRED system scenario too. There are two patches in this series. Comments welcome...
Note: Only tested for 'syscall' sets %rcx=%rip and %r11=%rflags case. I don't have a FRED system to test it.
How to test this:
$ make -C tools/testing/selftests/x86 $ tools/testing/selftests/x86/sysret_rip_64
Link: https://lore.kernel.org/lkml/5d4ad3e3-034f-c7da-d141-9c001c2343af@intel.com Signed-off-by: Ammar Faizi ammarfaizi2@gnuweeb.org ---
Ammar Faizi (2): selftests/x86: sysret_rip: Handle syscall in a FRED system selftests/x86: sysret_rip: Add more syscall tests with respect to `%rcx` and `%r11`
tools/testing/selftests/x86/sysret_rip.c | 105 ++++++++++++++++++++++- 1 file changed, 104 insertions(+), 1 deletion(-)
base-commit: e12ad468c22065a2826b2fc4c11d2113a7975301
From: Ammar Faizi ammarfaizi2@gnuweeb.org
The current selftest asserts %r11 == %rflags after the 'syscall' returns to user. Such an assertion doesn't apply to a FRED system because in a FRED system the 'syscall' instruction does not set %r11=%rflags and %rcx=%rip.
Handle the FRED case. Now, test that:
- "syscall" in a FRED system doesn't clobber %rcx and %r11. - "syscall" in a non-FRED system sets %rcx=%rip and %r11=%rflags.
The 'raise()' function from libc can't be used to control those registers. Therefore, create a syscall wrapper in inline Assembly to fully control them.
Fixes: 660602140103 ("selftests/x86: Add a selftest for SYSRET to noncanonical addresses") Link: https://lore.kernel.org/lkml/25b96960-a07e-a952-5c23-786b55054126@zytor.com Reported-by: Xin Li xin3.li@intel.com Co-developed-by: "H. Peter Anvin" hpa@zytor.com Signed-off-by: Ammar Faizi ammarfaizi2@gnuweeb.org ---
Missing a Signed-off-by tag from HPA.
@hpa send your sign off if you like this patch.
tools/testing/selftests/x86/sysret_rip.c | 96 +++++++++++++++++++++++- 1 file changed, 95 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/x86/sysret_rip.c b/tools/testing/selftests/x86/sysret_rip.c index 84d74be1d90207ab..9056f2e2674d2bc5 100644 --- a/tools/testing/selftests/x86/sysret_rip.c +++ b/tools/testing/selftests/x86/sysret_rip.c @@ -39,6 +39,95 @@ asm ( extern const char test_page[]; static void const *current_test_page_addr = test_page;
+/* Arbitrary values */ +static const unsigned long r11_sentinel = 0xfeedfacedeadbeef; +static const unsigned long rcx_sentinel = 0x5ca1ab1e0b57ac1e; + +/* An arbitrary *valid* RFLAGS value */ +static const unsigned long rflags_sentinel = 0x200a93; + +enum regs_ok { + REGS_ERROR = -1, /* Invalid register contents */ + REGS_SAVED = 0, /* Registers properly preserved */ + REGS_SYSRET = 1 /* Registers match syscall/sysret */ +}; + +/* + * Returns: + * 0 = %rcx and %r11 preserved. + * 1 = %rcx and %r11 set to %rflags and %rip. + * -1 = %rcx and/or %r11 set to any other values. + * + * Note that check_regs_syscall() sets %rbx to the syscall return %rip. + */ +static enum regs_ok check_regs_result(unsigned long r11, unsigned long rcx, + unsigned long rbx) +{ + if (r11 == r11_sentinel && rcx == rcx_sentinel) { + return REGS_SAVED; + } else if (r11 == rflags_sentinel && rcx == rbx) { + return REGS_SYSRET; + } else { + printf("[FAIL] check_regs_result\n"); + printf(" r11_sentinel = %#lx; %%r11 = %#lx;\n", r11_sentinel, r11); + printf(" rcx_sentinel = %#lx; %%rcx = %#lx;\n", rcx_sentinel, rcx); + printf(" rflags_sentinel = %#lx\n", rflags_sentinel); + return REGS_ERROR; + } +} + +static long do_syscall(long nr_syscall, unsigned long arg1, unsigned long arg2, + unsigned long arg3, unsigned long arg4, + unsigned long arg5, unsigned long arg6) +{ + register unsigned long r11 asm("%r11"); + register unsigned long r10 asm("%r10"); + register unsigned long r8 asm("%r8"); + register unsigned long r9 asm("%r9"); + unsigned long rcx, rbx; + + r11 = r11_sentinel; + rcx = rcx_sentinel; + r10 = arg4; + r8 = arg5; + r9 = arg6; + + asm volatile ( + "movq -8(%%rsp), %%r12\n\t" /* Don't clobber redzone. */ + "pushq %[rflags_sentinel]\n\t" + "popf\n\t" + "movq %%r12, -8(%%rsp)\n\t" + "leaq 1f(%%rip), %[rbx]\n\t" + "syscall\n" + "1:" + + : "+a" (nr_syscall), + "+r" (r11), + "+c" (rcx), + [rbx] "=b" (rbx) + + : [rflags_sentinel] "g" (rflags_sentinel), + "D" (arg1), /* %rdi */ + "S" (arg2), /* %rsi */ + "d" (arg3), /* %rdx */ + "r" (r10), + "r" (r8), + "r" (r9) + + : "r12", "memory" + ); + + /* + * Test that: + * + * - "syscall" in a FRED system doesn't clobber %rcx and %r11. + * - "syscall" in a non-FRED system sets %rcx=%rip and %r11=%rflags. + * + */ + assert(check_regs_result(r11, rcx, rbx) != REGS_ERROR); + return nr_syscall; +} + static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), int flags) { @@ -101,11 +190,16 @@ static void sigusr1(int sig, siginfo_t *info, void *ctx_void) return; }
+static void __raise(int sig) +{ + do_syscall(__NR_kill, getpid(), sig, 0, 0, 0, 0); +} + static void test_sigreturn_to(unsigned long ip) { rip = ip; printf("[RUN]\tsigreturn to 0x%lx\n", ip); - raise(SIGUSR1); + __raise(SIGUSR1); }
static jmp_buf jmpbuf;
On 1/23/23 16:26, Ammar Faizi wrote:
+static long do_syscall(long nr_syscall, unsigned long arg1, unsigned long arg2,
unsigned long arg3, unsigned long arg4,
unsigned long arg5, unsigned long arg6)
+{
- register unsigned long r11 asm("%r11");
- register unsigned long r10 asm("%r10");
- register unsigned long r8 asm("%r8");
- register unsigned long r9 asm("%r9");
- unsigned long rcx, rbx;
- r11 = r11_sentinel;
- rcx = rcx_sentinel;
- r10 = arg4;
- r8 = arg5;
- r9 = arg6;
- asm volatile (
"movq -8(%%rsp), %%r12\n\t" /* Don't clobber redzone. */
"pushq %[rflags_sentinel]\n\t"
"popf\n\t"
"movq %%r12, -8(%%rsp)\n\t"
"leaq 1f(%%rip), %[rbx]\n\t"
"syscall\n"
"1:"
: "+a" (nr_syscall),
"+r" (r11),
"+c" (rcx),
[rbx] "=b" (rbx)
: [rflags_sentinel] "g" (rflags_sentinel),
"D" (arg1), /* %rdi */
"S" (arg2), /* %rsi */
"d" (arg3), /* %rdx */
"r" (r10),
"r" (r8),
"r" (r9)
: "r12", "memory"
- );
- /*
* Test that:
*
* - "syscall" in a FRED system doesn't clobber %rcx and %r11.
* - "syscall" in a non-FRED system sets %rcx=%rip and %r11=%rflags.
*
*/
- assert(check_regs_result(r11, rcx, rbx) != REGS_ERROR);
- return nr_syscall;
+}
So as per Andrew's comment, add:
register void * rsp asm("%rsp");
...
"+r" (rsp) /* clobber the redzone */
... as the right way to avoid redzone problems.
-hpa
On Mon, Jan 23, 2023 at 05:40:23PM -0800, H. Peter Anvin wrote:
On 1/23/23 16:26, Ammar Faizi wrote:
+static long do_syscall(long nr_syscall, unsigned long arg1, unsigned long arg2,
unsigned long arg3, unsigned long arg4,
unsigned long arg5, unsigned long arg6)
+{
- register unsigned long r11 asm("%r11");
- register unsigned long r10 asm("%r10");
- register unsigned long r8 asm("%r8");
- register unsigned long r9 asm("%r9");
- unsigned long rcx, rbx;
- r11 = r11_sentinel;
- rcx = rcx_sentinel;
- r10 = arg4;
- r8 = arg5;
- r9 = arg6;
- asm volatile (
"movq -8(%%rsp), %%r12\n\t" /* Don't clobber redzone. */
"pushq %[rflags_sentinel]\n\t"
"popf\n\t"
"movq %%r12, -8(%%rsp)\n\t"
"leaq 1f(%%rip), %[rbx]\n\t"
"syscall\n"
"1:"
: "+a" (nr_syscall),
"+r" (r11),
"+c" (rcx),
[rbx] "=b" (rbx)
: [rflags_sentinel] "g" (rflags_sentinel),
"D" (arg1), /* %rdi */
"S" (arg2), /* %rsi */
"d" (arg3), /* %rdx */
"r" (r10),
"r" (r8),
"r" (r9)
: "r12", "memory"
- );
- /*
* Test that:
*
* - "syscall" in a FRED system doesn't clobber %rcx and %r11.
* - "syscall" in a non-FRED system sets %rcx=%rip and %r11=%rflags.
*
*/
- assert(check_regs_result(r11, rcx, rbx) != REGS_ERROR);
- return nr_syscall;
+}
So as per Andrew's comment, add:
register void * rsp asm("%rsp");
...
"+r" (rsp) /* clobber the redzone */
... as the right way to avoid redzone problems.
Fixed in v2.
On Mon, Jan 23, 2023 at 05:40:23PM -0800, H. Peter Anvin wrote:
So as per Andrew's comment, add:
register void * rsp asm("%rsp");
...
"+r" (rsp) /* clobber the redzone */
... as the right way to avoid redzone problems.
I played with this more. I found something wrong with this. This doesn't work for me. The compiler still uses red zone despite I use "+r" (rsp).
What did I do wrong?
-----
ammarfaizi2@integral2:/tmp$ gcc -fno-stack-protector -O2 -Wall -Wextra test.c -o test ammarfaizi2@integral2:/tmp$ objdump --no-show-raw-insn -d test | grep "a_leaf_func_with_red_zone>:" -A8 0000000000001180 <a_leaf_func_with_red_zone>: 1180: endbr64 1184: mov $0x1,%eax 1189: mov %rax,-0x8(%rsp) ## BUG!!! 118e: pushf 118f: pop %rax 1190: mov -0x8(%rsp),%rax ## BUG!!! 1195: ret
ammarfaizi2@integral2:/tmp$ clang -O2 -Wall -Wextra test.c -o test ammarfaizi2@integral2:/tmp$ objdump --no-show-raw-insn -d test | grep "a_leaf_func_with_red_zone>:" -A6 0000000000001140 <a_leaf_func_with_red_zone>: 1140: mov $0x1,%eax 1145: mov %rax,-0x8(%rsp) ## BUG!!! 114a: pushf 114b: pop %rax 114c: mov -0x8(%rsp),%rax ## BUG!!! 1151: ret
----- ammarfaizi2@integral2:~$ gcc --version gcc (Ubuntu 11.3.0-1ubuntu1~22.04) 11.3.0 Copyright (C) 2021 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
ammarfaizi2@integral2:~$ clang --version Ubuntu clang version 16.0.0 (++20230124031324+d63e492562f2-1~exp1~20230124151444.705) Target: x86_64-pc-linux-gnu Thread model: posix InstalledDir: /usr/bin
----- test.c:
#include <stdio.h> static inline void clobber_redzone(void) { register void *rsp __asm__("%rsp"); unsigned long rflags;
__asm__ volatile ("pushf; popq %1" : "+r" (rsp), "=r" (rflags)); }
static inline void set_red_zone(long *mem, long val) { __asm__ volatile ("movq %[val], %[mem]" : [mem] "=m" (*mem) : [val] "r" (val)); }
static inline long get_red_zone(long *mem) { long ret;
__asm__ volatile ("movq %[in], %[out]" : [out] "=r" (ret) : [in] "m" (*mem)); return ret; }
__attribute__((__noinline__)) long a_leaf_func_with_red_zone(void) { long x;
set_red_zone(&x, 1); clobber_redzone(); /* The correct retval is 1 */ return get_red_zone(&x); }
int main(void) { printf("ret = %ld\n", a_leaf_func_with_red_zone()); return 0; }
On 26/01/2023 8:08 pm, Ammar Faizi wrote:
On Mon, Jan 23, 2023 at 05:40:23PM -0800, H. Peter Anvin wrote:
So as per Andrew's comment, add:
register void * rsp asm("%rsp");
...
"+r" (rsp) /* clobber the redzone */
... as the right way to avoid redzone problems.
I played with this more. I found something wrong with this. This doesn't work for me. The compiler still uses red zone despite I use "+r" (rsp).
What did I do wrong?
Well this is a fine mess...
https://godbolt.org/z/MaPM7s8qr does the right thing, but is now contrary to the prior discussion regarding calls in asm, which concluded that the "+r"(rsp) was the way to go.
Furthermore GCC regressed in 9.0 and emits:
warning: listing the stack pointer register 'rsp' in a clobber list is deprecated [-Wdeprecated]
which might be the intention of the developers, but is wrong seeing as this is the only way to say "I modify the redzone" to the compiler...
~Andrew
On 15/02/2023 9:17 am, Andrew Cooper wrote:
On 26/01/2023 8:08 pm, Ammar Faizi wrote:
On Mon, Jan 23, 2023 at 05:40:23PM -0800, H. Peter Anvin wrote:
So as per Andrew's comment, add:
register void * rsp asm("%rsp");
...
"+r" (rsp) /* clobber the redzone */
... as the right way to avoid redzone problems.
I played with this more. I found something wrong with this. This doesn't work for me. The compiler still uses red zone despite I use "+r" (rsp).
What did I do wrong?
Well this is a fine mess...
https://godbolt.org/z/MaPM7s8qr does the right thing, but is now contrary to the prior discussion regarding calls in asm, which concluded that the "+r"(rsp) was the way to go.
Furthermore GCC regressed in 9.0 and emits:
warning: listing the stack pointer register 'rsp' in a clobber list is deprecated [-Wdeprecated]
which might be the intention of the developers, but is wrong seeing as this is the only way to say "I modify the redzone" to the compiler...
I've opened https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108799
~Andrew
On Wed, Feb 15, 2023 at 10:29:37AM +0000, Andrew Cooper wrote:
I've opened https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108799
Added myself to the CC list. Thanks for opening.
On Wed, Feb 15, 2023 at 09:17:23AM +0000, Andrew Cooper wrote:
On 26/01/2023 8:08 pm, Ammar Faizi wrote:
What did I do wrong?
Well this is a fine mess...
https://godbolt.org/z/MaPM7s8qr does the right thing, but is now contrary to the prior discussion regarding calls in asm, which concluded that the "+r"(rsp) was the way to go.
Does that also mean the ASM_CALL_CONSTRAINT macro in arch/x86/include/asm/asm.h macro is wrong?
That macro adds a "+r"(rsp) constraint, and we assume it's safe to execute the "call" instruction with that constraint in an inline Assembly.
I am not sure what "+r" (rsp) actually does. And if we are now complaining, "+r" (rsp) doesn't work. Since when it works? Or at least, where is that rule written? I couldn't find any GCC or Clang version that does it right with the "+r" (rsp) constraint (from a quick playing with that godbolt link).
Furthermore GCC regressed in 9.0 and emits:
warning: listing the stack pointer register 'rsp' in a clobber list is deprecated [-Wdeprecated]
which might be the intention of the developers, but is wrong seeing as this is the only way to say "I modify the redzone" to the compiler...
Yeah, adding "rsp" to the clobber list works. But sadly, it's deprecated in GCC. Not sure what the reason is.
I think the most straightforward and safest way, for now, is: "Don't clobber the red zone from the inline asm.".
I will use the previous approach to avoid red-zone clobbering in the next revision. That's by adding "r12" to the clobber list and preserving the red zone content in "%r12".
[ Resending, missed Xin Li from the Cc list... ]
On Mon, Jan 23, 2023 at 05:40:23PM -0800, H. Peter Anvin wrote:
So as per Andrew's comment, add:
register void * rsp asm("%rsp");
...
"+r" (rsp) /* clobber the redzone */
... as the right way to avoid redzone problems.
I played with this more. I found something wrong with this. This doesn't work for me. The compiler still uses red zone despite I use "+r" (rsp).
What did I do wrong?
-----
ammarfaizi2@integral2:/tmp$ gcc -fno-stack-protector -O2 -Wall -Wextra test.c -o test ammarfaizi2@integral2:/tmp$ objdump --no-show-raw-insn -d test | grep "a_leaf_func_with_red_zone>:" -A8 0000000000001180 <a_leaf_func_with_red_zone>: 1180: endbr64 1184: mov $0x1,%eax 1189: mov %rax,-0x8(%rsp) ## BUG!!! 118e: pushf 118f: pop %rax 1190: mov -0x8(%rsp),%rax ## BUG!!! 1195: ret
ammarfaizi2@integral2:/tmp$ clang -O2 -Wall -Wextra test.c -o test ammarfaizi2@integral2:/tmp$ objdump --no-show-raw-insn -d test | grep "a_leaf_func_with_red_zone>:" -A6 0000000000001140 <a_leaf_func_with_red_zone>: 1140: mov $0x1,%eax 1145: mov %rax,-0x8(%rsp) ## BUG!!! 114a: pushf 114b: pop %rax 114c: mov -0x8(%rsp),%rax ## BUG!!! 1151: ret
----- ammarfaizi2@integral2:~$ gcc --version gcc (Ubuntu 11.3.0-1ubuntu1~22.04) 11.3.0 Copyright (C) 2021 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
ammarfaizi2@integral2:~$ clang --version Ubuntu clang version 16.0.0 (++20230124031324+d63e492562f2-1~exp1~20230124151444.705) Target: x86_64-pc-linux-gnu Thread model: posix InstalledDir: /usr/bin
----- test.c:
#include <stdio.h> static inline void clobber_redzone(void) { register void *rsp __asm__("%rsp"); unsigned long rflags;
__asm__ volatile ("pushf; popq %1" : "+r" (rsp), "=r" (rflags)); }
static inline void set_red_zone(long *mem, long val) { __asm__ volatile ("movq %[val], %[mem]" : [mem] "=m" (*mem) : [val] "r" (val)); }
static inline long get_red_zone(long *mem) { long ret;
__asm__ volatile ("movq %[in], %[out]" : [out] "=r" (ret) : [in] "m" (*mem)); return ret; }
__attribute__((__noinline__)) long a_leaf_func_with_red_zone(void) { long x;
set_red_zone(&x, 1); clobber_redzone(); /* The correct retval is 1 */ return get_red_zone(&x); }
int main(void) { printf("ret = %ld\n", a_leaf_func_with_red_zone()); return 0; }
From: Ammar Faizi ammarfaizi2@gnuweeb.org
Test that:
- "syscall" in a FRED system doesn't clobber %rcx and %r11. - "syscall" in a non-FRED system sets %rcx=%rip and %r11=%rflags.
Test them out with a trivial system call like __NR_getppid and friends which are extremely likely to return with SYSRET on an IDT system; check that it returns a nonnegative value and then save the result.
Link: https://lore.kernel.org/lkml/25b96960-a07e-a952-5c23-786b55054126@zytor.com Co-developed-by: "H. Peter Anvin" hpa@zytor.com Signed-off-by: Ammar Faizi ammarfaizi2@gnuweeb.org ---
Missing a Signed-off-by tag from HPA.
@hpa send your sign off if you like this patch.
tools/testing/selftests/x86/sysret_rip.c | 9 +++++++++ 1 file changed, 9 insertions(+)
diff --git a/tools/testing/selftests/x86/sysret_rip.c b/tools/testing/selftests/x86/sysret_rip.c index 9056f2e2674d2bc5..c55f6d04f0ae1f2d 100644 --- a/tools/testing/selftests/x86/sysret_rip.c +++ b/tools/testing/selftests/x86/sysret_rip.c @@ -252,8 +252,17 @@ static void test_syscall_fallthrough_to(unsigned long ip) printf("[OK]\tWe survived\n"); }
+static void test_syscall_rcx_r11(void) +{ + do_syscall(__NR_getpid, 0, 0, 0, 0, 0, 0); + do_syscall(__NR_gettid, 0, 0, 0, 0, 0, 0); + do_syscall(__NR_getppid, 0, 0, 0, 0, 0, 0); +} + int main() { + test_syscall_rcx_r11(); + /* * When the kernel returns from a slow-path syscall, it will * detect whether SYSRET is appropriate. If it incorrectly
linux-kselftest-mirror@lists.linaro.org