This is a note to let you know that I've just added the patch titled
x86/entry/64: Move SWAPGS into the common IRET-to-usermode path
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-entry-64-move-swapgs-into-the-common-iret-to-usermode-path.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 8a055d7f411d41755ce30db5bb65b154777c4b78 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto(a)kernel.org>
Date: Thu, 2 Nov 2017 00:59:00 -0700
Subject: x86/entry/64: Move SWAPGS into the common IRET-to-usermode path
From: Andy Lutomirski <luto(a)kernel.org>
commit 8a055d7f411d41755ce30db5bb65b154777c4b78 upstream.
All of the code paths that ended up doing IRET to usermode did
SWAPGS immediately beforehand. Move the SWAPGS into the common
code.
Signed-off-by: Andy Lutomirski <luto(a)kernel.org>
Cc: Borislav Petkov <bpetkov(a)suse.de>
Cc: Brian Gerst <brgerst(a)gmail.com>
Cc: Dave Hansen <dave.hansen(a)intel.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Link: http://lkml.kernel.org/r/27fd6f45b7cd640de38fb9066fd0349bcd11f8e1.150960930…
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/entry/entry_64.S | 32 ++++++++++++++------------------
arch/x86/entry/entry_64_compat.S | 3 +--
2 files changed, 15 insertions(+), 20 deletions(-)
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -250,12 +250,14 @@ return_from_SYSCALL_64:
/*
* Try to use SYSRET instead of IRET if we're returning to
- * a completely clean 64-bit userspace context.
+ * a completely clean 64-bit userspace context. If we're not,
+ * go to the slow exit path.
*/
movq RCX(%rsp), %rcx
movq RIP(%rsp), %r11
- cmpq %rcx, %r11 /* RCX == RIP */
- jne opportunistic_sysret_failed
+
+ cmpq %rcx, %r11 /* SYSRET requires RCX == RIP */
+ jne swapgs_restore_regs_and_return_to_usermode
/*
* On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
@@ -273,14 +275,14 @@ return_from_SYSCALL_64:
/* If this changed %rcx, it was not canonical */
cmpq %rcx, %r11
- jne opportunistic_sysret_failed
+ jne swapgs_restore_regs_and_return_to_usermode
cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */
- jne opportunistic_sysret_failed
+ jne swapgs_restore_regs_and_return_to_usermode
movq R11(%rsp), %r11
cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */
- jne opportunistic_sysret_failed
+ jne swapgs_restore_regs_and_return_to_usermode
/*
* SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
@@ -301,12 +303,12 @@ return_from_SYSCALL_64:
* would never get past 'stuck_here'.
*/
testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
- jnz opportunistic_sysret_failed
+ jnz swapgs_restore_regs_and_return_to_usermode
/* nothing to check for RSP */
cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */
- jne opportunistic_sysret_failed
+ jne swapgs_restore_regs_and_return_to_usermode
/*
* We win! This label is here just for ease of understanding
@@ -319,10 +321,6 @@ syscall_return_via_sysret:
movq RSP(%rsp), %rsp
UNWIND_HINT_EMPTY
USERGS_SYSRET64
-
-opportunistic_sysret_failed:
- SWAPGS
- jmp restore_regs_and_return_to_usermode
END(entry_SYSCALL_64)
ENTRY(stub_ptregs_64)
@@ -423,8 +421,7 @@ ENTRY(ret_from_fork)
movq %rsp, %rdi
call syscall_return_slowpath /* returns with IRQs disabled */
TRACE_IRQS_ON /* user mode is traced as IRQS on */
- SWAPGS
- jmp restore_regs_and_return_to_usermode
+ jmp swapgs_restore_regs_and_return_to_usermode
1:
/* kernel thread */
@@ -612,9 +609,8 @@ GLOBAL(retint_user)
mov %rsp,%rdi
call prepare_exit_to_usermode
TRACE_IRQS_IRETQ
- SWAPGS
-GLOBAL(restore_regs_and_return_to_usermode)
+GLOBAL(swapgs_restore_regs_and_return_to_usermode)
#ifdef CONFIG_DEBUG_ENTRY
/* Assert that pt_regs indicates user mode. */
testl $3, CS(%rsp)
@@ -622,6 +618,7 @@ GLOBAL(restore_regs_and_return_to_usermo
ud2
1:
#endif
+ SWAPGS
RESTORE_EXTRA_REGS
RESTORE_C_REGS
REMOVE_PT_GPREGS_FROM_STACK 8
@@ -1343,8 +1340,7 @@ ENTRY(nmi)
* Return back to user mode. We must *not* do the normal exit
* work, because we don't want to enable interrupts.
*/
- SWAPGS
- jmp restore_regs_and_return_to_usermode
+ jmp swapgs_restore_regs_and_return_to_usermode
.Lnmi_from_kernel:
/*
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -337,8 +337,7 @@ ENTRY(entry_INT80_compat)
/* Go back to user mode. */
TRACE_IRQS_ON
- SWAPGS
- jmp restore_regs_and_return_to_usermode
+ jmp swapgs_restore_regs_and_return_to_usermode
END(entry_INT80_compat)
ENTRY(stub32_clone)
Patches currently in stable-queue which might be from luto(a)kernel.org are
queue-4.14/x86-entry-64-simplify-reg-restore-code-in-the-standard-iret-paths.patch
queue-4.14/x86-cpufeatures-fix-various-details-in-the-feature-definitions.patch
queue-4.14/x86-entry-64-move-the-ist-stacks-into-struct-cpu_entry_area.patch
queue-4.14/x86-dumpstack-add-get_stack_info-support-for-the-sysenter-stack.patch
queue-4.14/x86-asm-don-t-use-the-confusing-.ifeq-directive.patch
queue-4.14/selftests-x86-ldt_gdt-add-infrastructure-to-test-set_thread_area.patch
queue-4.14/x86-entry-remap-the-tss-into-the-cpu-entry-area.patch
queue-4.14/x86-entry-64-paravirt-use-paravirt-safe-macro-to-access-eflags.patch
queue-4.14/x86-mm-fixmap-generalize-the-gdt-fixmap-mechanism-introduce-struct-cpu_entry_area.patch
queue-4.14/x86-paravirt-dont-patch-flush_tlb_single.patch
queue-4.14/x86-dumpstack-handle-stack-overflow-on-all-stacks.patch
queue-4.14/x86-entry-64-use-pop-instead-of-movq-in-syscall_return_via_sysret.patch
queue-4.14/x86-entry-64-return-to-userspace-from-the-trampoline-stack.patch
queue-4.14/x86-paravirt-provide-a-way-to-check-for-hypervisors.patch
queue-4.14/xen-x86-entry-64-add-xen-nmi-trap-entry.patch
queue-4.14/x86-entry-64-remove-the-restore_c_regs_and_iret-label.patch
queue-4.14/x86-entry-64-create-a-per-cpu-syscall-entry-trampoline.patch
queue-4.14/x86-entry-64-remove-the-restore_..._regs-infrastructure.patch
queue-4.14/x86-xen-64-x86-entry-64-clean-up-sp-code-in-cpu_initialize_context.patch
queue-4.14/x86-entry-add-task_top_of_stack-to-find-the-top-of-a-task-s-stack.patch
queue-4.14/x86-entry-64-remove-all-remaining-direct-thread_struct-sp0-reads.patch
queue-4.14/x86-boot-relocate-definition-of-the-initial-state-of-cr0.patch
queue-4.14/x86-entry-64-stop-initializing-tss.sp0-at-boot.patch
queue-4.14/x86-entry-64-de-xen-ify-our-nmi-code.patch
queue-4.14/objtool-don-t-report-end-of-section-error-after-an-empty-unwind-hint.patch
queue-4.14/x86-entry-64-pass-sp0-directly-to-load_sp0.patch
queue-4.14/x86-entry-64-use-a-per-cpu-trampoline-stack-for-idt-entries.patch
queue-4.14/x86-entry-64-move-swapgs-into-the-common-iret-to-usermode-path.patch
queue-4.14/x86-entry-64-shorten-test-instructions.patch
queue-4.14/x86-cpufeature-add-user-mode-instruction-prevention-definitions.patch
queue-4.14/x86-cpufeatures-make-cpu-bugs-sticky.patch
queue-4.14/x86-espfix-64-stop-assuming-that-pt_regs-is-on-the-entry-stack.patch
queue-4.14/x86-traps-use-a-new-on_thread_stack-helper-to-clean-up-an-assertion.patch
queue-4.14/x86-xen-fix-xen-head-elf-annotations.patch
queue-4.14/x86-entry-64-remove-thread_struct-sp0.patch
queue-4.14/x86-entry-move-sysenter_stack-to-the-beginning-of-struct-tss_struct.patch
queue-4.14/x86-entry-64-allocate-and-enable-the-sysenter-stack.patch
queue-4.14/x86-unwinder-orc-dont-bail-on-stack-overflow.patch
queue-4.14/x86-head-fix-head-elf-function-annotations.patch
queue-4.14/selftests-x86-ldt_gdt-run-most-existing-ldt-test-cases-against-the-gdt-as-well.patch
queue-4.14/x86-entry-32-pull-the-msr_ia32_sysenter_cs-update-code-out-of-native_load_sp0.patch
queue-4.14/x86-entry-64-split-the-iret-to-user-and-iret-to-kernel-paths.patch
queue-4.14/x86-entry-64-shrink-paranoid_exit_restore-and-make-labels-local.patch
queue-4.14/x86-head-add-unwind-hint-annotations.patch
queue-4.14/x86-head-remove-unused-bad_address-code.patch
queue-4.14/x86-xen-add-unwind-hint-annotations.patch
queue-4.14/x86-kasan-64-teach-kasan-about-the-cpu_entry_area.patch
queue-4.14/x86-head-remove-confusing-comment.patch
queue-4.14/x86-entry-64-remove-the-sysenter-stack-canary.patch
queue-4.14/x86-mm-kasan-don-t-use-vmemmap_populate-to-initialize-shadow.patch
queue-4.14/x86-entry-gdt-put-per-cpu-gdt-remaps-in-ascending-order.patch
queue-4.14/x86-entry-fix-assumptions-that-the-hw-tss-is-at-the-beginning-of-cpu_tss.patch
queue-4.14/x86-cpufeatures-re-tabulate-the-x86_feature-definitions.patch
queue-4.14/x86-entry-32-fix-cpu_current_top_of_stack-initialization-at-boot.patch
queue-4.14/ptrace-x86-make-user_64bit_mode-available-to-32-bit-builds.patch
queue-4.14/x86-entry-64-make-cpu_entry_area.tss-read-only.patch
queue-4.14/x86-mm-relocate-page-fault-error-codes-to-traps.h.patch
queue-4.14/x86-unwinder-handle-stack-overflows-more-gracefully.patch
queue-4.14/x86-entry-64-use-pop-instead-of-mov-to-restore-regs-on-nmi-return.patch
queue-4.14/x86-irq-64-print-the-offending-ip-in-the-stack-overflow-warning.patch
queue-4.14/x86-entry-clean-up-the-sysenter_stack-code.patch
queue-4.14/x86-entry-64-merge-the-fast-and-slow-sysret-paths.patch
queue-4.14/x86-entry-64-separate-cpu_current_top_of_stack-from-tss.sp0.patch
queue-4.14/x86-boot-annotate-verify_cpu-as-a-callable-function.patch
queue-4.14/x86-irq-remove-an-old-outdated-comment-about-context-tracking-races.patch
This is a note to let you know that I've just added the patch titled
x86/entry/64: Merge the fast and slow SYSRET paths
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-entry-64-merge-the-fast-and-slow-sysret-paths.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From a512210643da8082cb44181dba8b18e752bd68f0 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto(a)kernel.org>
Date: Thu, 2 Nov 2017 00:59:04 -0700
Subject: x86/entry/64: Merge the fast and slow SYSRET paths
From: Andy Lutomirski <luto(a)kernel.org>
commit a512210643da8082cb44181dba8b18e752bd68f0 upstream.
They did almost the same thing. Remove a bunch of pointless
instructions (mostly hidden in macros) and reduce cognitive load by
merging them.
Signed-off-by: Andy Lutomirski <luto(a)kernel.org>
Cc: Borislav Petkov <bpetkov(a)suse.de>
Cc: Brian Gerst <brgerst(a)gmail.com>
Cc: Dave Hansen <dave.hansen(a)intel.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Link: http://lkml.kernel.org/r/1204e20233fcab9130a1ba80b3b1879b5db3fc1f.150960930…
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/entry/entry_64.S | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -221,10 +221,9 @@ entry_SYSCALL_64_fastpath:
TRACE_IRQS_ON /* user mode is traced as IRQs on */
movq RIP(%rsp), %rcx
movq EFLAGS(%rsp), %r11
- RESTORE_C_REGS_EXCEPT_RCX_R11
- movq RSP(%rsp), %rsp
+ addq $6*8, %rsp /* skip extra regs -- they were preserved */
UNWIND_HINT_EMPTY
- USERGS_SYSRET64
+ jmp .Lpop_c_regs_except_rcx_r11_and_sysret
1:
/*
@@ -318,6 +317,7 @@ syscall_return_via_sysret:
/* rcx and r11 are already restored (see code above) */
UNWIND_HINT_EMPTY
POP_EXTRA_REGS
+.Lpop_c_regs_except_rcx_r11_and_sysret:
popq %rsi /* skip r11 */
popq %r10
popq %r9
Patches currently in stable-queue which might be from luto(a)kernel.org are
queue-4.14/x86-entry-64-simplify-reg-restore-code-in-the-standard-iret-paths.patch
queue-4.14/x86-cpufeatures-fix-various-details-in-the-feature-definitions.patch
queue-4.14/x86-entry-64-move-the-ist-stacks-into-struct-cpu_entry_area.patch
queue-4.14/x86-dumpstack-add-get_stack_info-support-for-the-sysenter-stack.patch
queue-4.14/x86-asm-don-t-use-the-confusing-.ifeq-directive.patch
queue-4.14/selftests-x86-ldt_gdt-add-infrastructure-to-test-set_thread_area.patch
queue-4.14/x86-entry-remap-the-tss-into-the-cpu-entry-area.patch
queue-4.14/x86-entry-64-paravirt-use-paravirt-safe-macro-to-access-eflags.patch
queue-4.14/x86-mm-fixmap-generalize-the-gdt-fixmap-mechanism-introduce-struct-cpu_entry_area.patch
queue-4.14/x86-paravirt-dont-patch-flush_tlb_single.patch
queue-4.14/x86-dumpstack-handle-stack-overflow-on-all-stacks.patch
queue-4.14/x86-entry-64-use-pop-instead-of-movq-in-syscall_return_via_sysret.patch
queue-4.14/x86-entry-64-return-to-userspace-from-the-trampoline-stack.patch
queue-4.14/x86-paravirt-provide-a-way-to-check-for-hypervisors.patch
queue-4.14/xen-x86-entry-64-add-xen-nmi-trap-entry.patch
queue-4.14/x86-entry-64-remove-the-restore_c_regs_and_iret-label.patch
queue-4.14/x86-entry-64-create-a-per-cpu-syscall-entry-trampoline.patch
queue-4.14/x86-entry-64-remove-the-restore_..._regs-infrastructure.patch
queue-4.14/x86-xen-64-x86-entry-64-clean-up-sp-code-in-cpu_initialize_context.patch
queue-4.14/x86-entry-add-task_top_of_stack-to-find-the-top-of-a-task-s-stack.patch
queue-4.14/x86-entry-64-remove-all-remaining-direct-thread_struct-sp0-reads.patch
queue-4.14/x86-boot-relocate-definition-of-the-initial-state-of-cr0.patch
queue-4.14/x86-entry-64-stop-initializing-tss.sp0-at-boot.patch
queue-4.14/x86-entry-64-de-xen-ify-our-nmi-code.patch
queue-4.14/objtool-don-t-report-end-of-section-error-after-an-empty-unwind-hint.patch
queue-4.14/x86-entry-64-pass-sp0-directly-to-load_sp0.patch
queue-4.14/x86-entry-64-use-a-per-cpu-trampoline-stack-for-idt-entries.patch
queue-4.14/x86-entry-64-move-swapgs-into-the-common-iret-to-usermode-path.patch
queue-4.14/x86-entry-64-shorten-test-instructions.patch
queue-4.14/x86-cpufeature-add-user-mode-instruction-prevention-definitions.patch
queue-4.14/x86-cpufeatures-make-cpu-bugs-sticky.patch
queue-4.14/x86-espfix-64-stop-assuming-that-pt_regs-is-on-the-entry-stack.patch
queue-4.14/x86-traps-use-a-new-on_thread_stack-helper-to-clean-up-an-assertion.patch
queue-4.14/x86-xen-fix-xen-head-elf-annotations.patch
queue-4.14/x86-entry-64-remove-thread_struct-sp0.patch
queue-4.14/x86-entry-move-sysenter_stack-to-the-beginning-of-struct-tss_struct.patch
queue-4.14/x86-entry-64-allocate-and-enable-the-sysenter-stack.patch
queue-4.14/x86-unwinder-orc-dont-bail-on-stack-overflow.patch
queue-4.14/x86-head-fix-head-elf-function-annotations.patch
queue-4.14/selftests-x86-ldt_gdt-run-most-existing-ldt-test-cases-against-the-gdt-as-well.patch
queue-4.14/x86-entry-32-pull-the-msr_ia32_sysenter_cs-update-code-out-of-native_load_sp0.patch
queue-4.14/x86-entry-64-split-the-iret-to-user-and-iret-to-kernel-paths.patch
queue-4.14/x86-entry-64-shrink-paranoid_exit_restore-and-make-labels-local.patch
queue-4.14/x86-head-add-unwind-hint-annotations.patch
queue-4.14/x86-head-remove-unused-bad_address-code.patch
queue-4.14/x86-xen-add-unwind-hint-annotations.patch
queue-4.14/x86-kasan-64-teach-kasan-about-the-cpu_entry_area.patch
queue-4.14/x86-head-remove-confusing-comment.patch
queue-4.14/x86-entry-64-remove-the-sysenter-stack-canary.patch
queue-4.14/x86-mm-kasan-don-t-use-vmemmap_populate-to-initialize-shadow.patch
queue-4.14/x86-entry-gdt-put-per-cpu-gdt-remaps-in-ascending-order.patch
queue-4.14/x86-entry-fix-assumptions-that-the-hw-tss-is-at-the-beginning-of-cpu_tss.patch
queue-4.14/x86-cpufeatures-re-tabulate-the-x86_feature-definitions.patch
queue-4.14/x86-entry-32-fix-cpu_current_top_of_stack-initialization-at-boot.patch
queue-4.14/ptrace-x86-make-user_64bit_mode-available-to-32-bit-builds.patch
queue-4.14/x86-entry-64-make-cpu_entry_area.tss-read-only.patch
queue-4.14/x86-mm-relocate-page-fault-error-codes-to-traps.h.patch
queue-4.14/x86-unwinder-handle-stack-overflows-more-gracefully.patch
queue-4.14/x86-entry-64-use-pop-instead-of-mov-to-restore-regs-on-nmi-return.patch
queue-4.14/x86-irq-64-print-the-offending-ip-in-the-stack-overflow-warning.patch
queue-4.14/x86-entry-clean-up-the-sysenter_stack-code.patch
queue-4.14/x86-entry-64-merge-the-fast-and-slow-sysret-paths.patch
queue-4.14/x86-entry-64-separate-cpu_current_top_of_stack-from-tss.sp0.patch
queue-4.14/x86-boot-annotate-verify_cpu-as-a-callable-function.patch
queue-4.14/x86-irq-remove-an-old-outdated-comment-about-context-tracking-races.patch
This is a note to let you know that I've just added the patch titled
x86/entry/64: De-Xen-ify our NMI code
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-entry-64-de-xen-ify-our-nmi-code.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 929bacec21478a72c78e4f29f98fb799bd00105a Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto(a)kernel.org>
Date: Thu, 2 Nov 2017 00:59:08 -0700
Subject: x86/entry/64: De-Xen-ify our NMI code
From: Andy Lutomirski <luto(a)kernel.org>
commit 929bacec21478a72c78e4f29f98fb799bd00105a upstream.
Xen PV is fundamentally incompatible with our fancy NMI code: it
doesn't use IST at all, and Xen entries clobber two stack slots
below the hardware frame.
Drop Xen PV support from our NMI code entirely.
Signed-off-by: Andy Lutomirski <luto(a)kernel.org>
Reviewed-by: Borislav Petkov <bp(a)suse.de>
Acked-by: Juergen Gross <jgross(a)suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky(a)oracle.com>
Cc: Borislav Petkov <bpetkov(a)suse.de>
Cc: Brian Gerst <brgerst(a)gmail.com>
Cc: Dave Hansen <dave.hansen(a)intel.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Link: http://lkml.kernel.org/r/bfbe711b5ae03f672f8848999a8eb2711efc7f98.150960930…
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/entry/entry_64.S | 30 ++++++++++++++++++------------
1 file changed, 18 insertions(+), 12 deletions(-)
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1241,9 +1241,13 @@ ENTRY(error_exit)
jmp retint_user
END(error_exit)
-/* Runs on exception stack */
+/*
+ * Runs on exception stack. Xen PV does not go through this path at all,
+ * so we can use real assembly here.
+ */
ENTRY(nmi)
UNWIND_HINT_IRET_REGS
+
/*
* We allow breakpoints in NMIs. If a breakpoint occurs, then
* the iretq it performs will take us out of NMI context.
@@ -1301,7 +1305,7 @@ ENTRY(nmi)
* stacks lest we corrupt the "NMI executing" variable.
*/
- SWAPGS_UNSAFE_STACK
+ swapgs
cld
movq %rsp, %rdx
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
@@ -1466,7 +1470,7 @@ nested_nmi_out:
popq %rdx
/* We are returning to kernel mode, so this cannot result in a fault. */
- INTERRUPT_RETURN
+ iretq
first_nmi:
/* Restore rdx. */
@@ -1497,7 +1501,7 @@ first_nmi:
pushfq /* RFLAGS */
pushq $__KERNEL_CS /* CS */
pushq $1f /* RIP */
- INTERRUPT_RETURN /* continues at repeat_nmi below */
+ iretq /* continues at repeat_nmi below */
UNWIND_HINT_IRET_REGS
1:
#endif
@@ -1572,20 +1576,22 @@ nmi_restore:
/*
* Clear "NMI executing". Set DF first so that we can easily
* distinguish the remaining code between here and IRET from
- * the SYSCALL entry and exit paths. On a native kernel, we
- * could just inspect RIP, but, on paravirt kernels,
- * INTERRUPT_RETURN can translate into a jump into a
- * hypercall page.
+ * the SYSCALL entry and exit paths.
+ *
+ * We arguably should just inspect RIP instead, but I (Andy) wrote
+ * this code when I had the misapprehension that Xen PV supported
+ * NMIs, and Xen PV would break that approach.
*/
std
movq $0, 5*8(%rsp) /* clear "NMI executing" */
/*
- * INTERRUPT_RETURN reads the "iret" frame and exits the NMI
- * stack in a single instruction. We are returning to kernel
- * mode, so this cannot result in a fault.
+ * iretq reads the "iret" frame and exits the NMI stack in a
+ * single instruction. We are returning to kernel mode, so this
+ * cannot result in a fault. Similarly, we don't need to worry
+ * about espfix64 on the way back to kernel mode.
*/
- INTERRUPT_RETURN
+ iretq
END(nmi)
ENTRY(ignore_sysret)
Patches currently in stable-queue which might be from luto(a)kernel.org are
queue-4.14/x86-entry-64-simplify-reg-restore-code-in-the-standard-iret-paths.patch
queue-4.14/x86-cpufeatures-fix-various-details-in-the-feature-definitions.patch
queue-4.14/x86-entry-64-move-the-ist-stacks-into-struct-cpu_entry_area.patch
queue-4.14/x86-dumpstack-add-get_stack_info-support-for-the-sysenter-stack.patch
queue-4.14/x86-asm-don-t-use-the-confusing-.ifeq-directive.patch
queue-4.14/selftests-x86-ldt_gdt-add-infrastructure-to-test-set_thread_area.patch
queue-4.14/x86-entry-remap-the-tss-into-the-cpu-entry-area.patch
queue-4.14/x86-entry-64-paravirt-use-paravirt-safe-macro-to-access-eflags.patch
queue-4.14/x86-mm-fixmap-generalize-the-gdt-fixmap-mechanism-introduce-struct-cpu_entry_area.patch
queue-4.14/x86-paravirt-dont-patch-flush_tlb_single.patch
queue-4.14/x86-dumpstack-handle-stack-overflow-on-all-stacks.patch
queue-4.14/x86-entry-64-use-pop-instead-of-movq-in-syscall_return_via_sysret.patch
queue-4.14/x86-entry-64-return-to-userspace-from-the-trampoline-stack.patch
queue-4.14/x86-paravirt-provide-a-way-to-check-for-hypervisors.patch
queue-4.14/xen-x86-entry-64-add-xen-nmi-trap-entry.patch
queue-4.14/x86-entry-64-remove-the-restore_c_regs_and_iret-label.patch
queue-4.14/x86-entry-64-create-a-per-cpu-syscall-entry-trampoline.patch
queue-4.14/x86-entry-64-remove-the-restore_..._regs-infrastructure.patch
queue-4.14/x86-xen-64-x86-entry-64-clean-up-sp-code-in-cpu_initialize_context.patch
queue-4.14/x86-entry-add-task_top_of_stack-to-find-the-top-of-a-task-s-stack.patch
queue-4.14/x86-entry-64-remove-all-remaining-direct-thread_struct-sp0-reads.patch
queue-4.14/x86-boot-relocate-definition-of-the-initial-state-of-cr0.patch
queue-4.14/x86-entry-64-stop-initializing-tss.sp0-at-boot.patch
queue-4.14/x86-entry-64-de-xen-ify-our-nmi-code.patch
queue-4.14/objtool-don-t-report-end-of-section-error-after-an-empty-unwind-hint.patch
queue-4.14/x86-entry-64-pass-sp0-directly-to-load_sp0.patch
queue-4.14/x86-entry-64-use-a-per-cpu-trampoline-stack-for-idt-entries.patch
queue-4.14/x86-entry-64-move-swapgs-into-the-common-iret-to-usermode-path.patch
queue-4.14/x86-entry-64-shorten-test-instructions.patch
queue-4.14/x86-cpufeature-add-user-mode-instruction-prevention-definitions.patch
queue-4.14/x86-cpufeatures-make-cpu-bugs-sticky.patch
queue-4.14/x86-espfix-64-stop-assuming-that-pt_regs-is-on-the-entry-stack.patch
queue-4.14/x86-traps-use-a-new-on_thread_stack-helper-to-clean-up-an-assertion.patch
queue-4.14/x86-xen-fix-xen-head-elf-annotations.patch
queue-4.14/x86-entry-64-remove-thread_struct-sp0.patch
queue-4.14/x86-entry-move-sysenter_stack-to-the-beginning-of-struct-tss_struct.patch
queue-4.14/x86-entry-64-allocate-and-enable-the-sysenter-stack.patch
queue-4.14/x86-unwinder-orc-dont-bail-on-stack-overflow.patch
queue-4.14/x86-head-fix-head-elf-function-annotations.patch
queue-4.14/selftests-x86-ldt_gdt-run-most-existing-ldt-test-cases-against-the-gdt-as-well.patch
queue-4.14/x86-entry-32-pull-the-msr_ia32_sysenter_cs-update-code-out-of-native_load_sp0.patch
queue-4.14/x86-entry-64-split-the-iret-to-user-and-iret-to-kernel-paths.patch
queue-4.14/x86-entry-64-shrink-paranoid_exit_restore-and-make-labels-local.patch
queue-4.14/x86-head-add-unwind-hint-annotations.patch
queue-4.14/x86-head-remove-unused-bad_address-code.patch
queue-4.14/x86-xen-add-unwind-hint-annotations.patch
queue-4.14/x86-kasan-64-teach-kasan-about-the-cpu_entry_area.patch
queue-4.14/x86-head-remove-confusing-comment.patch
queue-4.14/x86-entry-64-remove-the-sysenter-stack-canary.patch
queue-4.14/x86-mm-kasan-don-t-use-vmemmap_populate-to-initialize-shadow.patch
queue-4.14/x86-entry-gdt-put-per-cpu-gdt-remaps-in-ascending-order.patch
queue-4.14/x86-entry-fix-assumptions-that-the-hw-tss-is-at-the-beginning-of-cpu_tss.patch
queue-4.14/x86-cpufeatures-re-tabulate-the-x86_feature-definitions.patch
queue-4.14/x86-entry-32-fix-cpu_current_top_of_stack-initialization-at-boot.patch
queue-4.14/ptrace-x86-make-user_64bit_mode-available-to-32-bit-builds.patch
queue-4.14/x86-entry-64-make-cpu_entry_area.tss-read-only.patch
queue-4.14/x86-mm-relocate-page-fault-error-codes-to-traps.h.patch
queue-4.14/x86-unwinder-handle-stack-overflows-more-gracefully.patch
queue-4.14/x86-entry-64-use-pop-instead-of-mov-to-restore-regs-on-nmi-return.patch
queue-4.14/x86-irq-64-print-the-offending-ip-in-the-stack-overflow-warning.patch
queue-4.14/x86-entry-clean-up-the-sysenter_stack-code.patch
queue-4.14/x86-entry-64-merge-the-fast-and-slow-sysret-paths.patch
queue-4.14/x86-entry-64-separate-cpu_current_top_of_stack-from-tss.sp0.patch
queue-4.14/x86-boot-annotate-verify_cpu-as-a-callable-function.patch
queue-4.14/x86-irq-remove-an-old-outdated-comment-about-context-tracking-races.patch
This is a note to let you know that I've just added the patch titled
x86/entry/64: Create a per-CPU SYSCALL entry trampoline
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-entry-64-create-a-per-cpu-syscall-entry-trampoline.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 3386bc8aed825e9f1f65ce38df4b109b2019b71a Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto(a)kernel.org>
Date: Mon, 4 Dec 2017 15:07:25 +0100
Subject: x86/entry/64: Create a per-CPU SYSCALL entry trampoline
From: Andy Lutomirski <luto(a)kernel.org>
commit 3386bc8aed825e9f1f65ce38df4b109b2019b71a upstream.
Handling SYSCALL is tricky: the SYSCALL handler is entered with every
single register (except FLAGS), including RSP, live. It somehow needs
to set RSP to point to a valid stack, which means it needs to save the
user RSP somewhere and find its own stack pointer. The canonical way
to do this is with SWAPGS, which lets us access percpu data using the
%gs prefix.
With PAGE_TABLE_ISOLATION-like pagetable switching, this is
problematic. Without a scratch register, switching CR3 is impossible, so
%gs-based percpu memory would need to be mapped in the user pagetables.
Doing that without information leaks is difficult or impossible.
Instead, use a different sneaky trick. Map a copy of the first part
of the SYSCALL asm at a different address for each CPU. Now RIP
varies depending on the CPU, so we can use RIP-relative memory access
to access percpu memory. By putting the relevant information (one
scratch slot and the stack address) at a constant offset relative to
RIP, we can make SYSCALL work without relying on %gs.
A nice thing about this approach is that we can easily switch it on
and off if we want pagetable switching to be configurable.
The compat variant of SYSCALL doesn't have this problem in the first
place -- there are plenty of scratch registers, since we don't care
about preserving r8-r15. This patch therefore doesn't touch SYSCALL32
at all.
This patch actually seems to be a small speedup. With this patch,
SYSCALL touches an extra cache line and an extra virtual page, but
the pipeline no longer stalls waiting for SWAPGS. It seems that, at
least in a tight loop, the latter outweights the former.
Thanks to David Laight for an optimization tip.
Signed-off-by: Andy Lutomirski <luto(a)kernel.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Borislav Petkov <bpetkov(a)suse.de>
Cc: Boris Ostrovsky <boris.ostrovsky(a)oracle.com>
Cc: Borislav Petkov <bp(a)alien8.de>
Cc: Brian Gerst <brgerst(a)gmail.com>
Cc: Dave Hansen <dave.hansen(a)intel.com>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: David Laight <David.Laight(a)aculab.com>
Cc: Denys Vlasenko <dvlasenk(a)redhat.com>
Cc: Eduardo Valentin <eduval(a)amazon.com>
Cc: Greg KH <gregkh(a)linuxfoundation.org>
Cc: H. Peter Anvin <hpa(a)zytor.com>
Cc: Josh Poimboeuf <jpoimboe(a)redhat.com>
Cc: Juergen Gross <jgross(a)suse.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Rik van Riel <riel(a)redhat.com>
Cc: Will Deacon <will.deacon(a)arm.com>
Cc: aliguori(a)amazon.com
Cc: daniel.gruss(a)iaik.tugraz.at
Cc: hughd(a)google.com
Cc: keescook(a)google.com
Link: https://lkml.kernel.org/r/20171204150606.403607157@linutronix.de
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/entry/entry_64.S | 58 ++++++++++++++++++++++++++++++++++++++++++
arch/x86/include/asm/fixmap.h | 2 +
arch/x86/kernel/asm-offsets.c | 1
arch/x86/kernel/cpu/common.c | 15 ++++++++++
arch/x86/kernel/vmlinux.lds.S | 9 ++++++
5 files changed, 84 insertions(+), 1 deletion(-)
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -136,6 +136,64 @@ END(native_usergs_sysret64)
* with them due to bugs in both AMD and Intel CPUs.
*/
+ .pushsection .entry_trampoline, "ax"
+
+/*
+ * The code in here gets remapped into cpu_entry_area's trampoline. This means
+ * that the assembler and linker have the wrong idea as to where this code
+ * lives (and, in fact, it's mapped more than once, so it's not even at a
+ * fixed address). So we can't reference any symbols outside the entry
+ * trampoline and expect it to work.
+ *
+ * Instead, we carefully abuse %rip-relative addressing.
+ * _entry_trampoline(%rip) refers to the start of the remapped) entry
+ * trampoline. We can thus find cpu_entry_area with this macro:
+ */
+
+#define CPU_ENTRY_AREA \
+ _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
+
+/* The top word of the SYSENTER stack is hot and is usable as scratch space. */
+#define RSP_SCRATCH CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + \
+ SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA
+
+ENTRY(entry_SYSCALL_64_trampoline)
+ UNWIND_HINT_EMPTY
+ swapgs
+
+ /* Stash the user RSP. */
+ movq %rsp, RSP_SCRATCH
+
+ /* Load the top of the task stack into RSP */
+ movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
+
+ /* Start building the simulated IRET frame. */
+ pushq $__USER_DS /* pt_regs->ss */
+ pushq RSP_SCRATCH /* pt_regs->sp */
+ pushq %r11 /* pt_regs->flags */
+ pushq $__USER_CS /* pt_regs->cs */
+ pushq %rcx /* pt_regs->ip */
+
+ /*
+ * x86 lacks a near absolute jump, and we can't jump to the real
+ * entry text with a relative jump. We could push the target
+ * address and then use retq, but this destroys the pipeline on
+ * many CPUs (wasting over 20 cycles on Sandy Bridge). Instead,
+ * spill RDI and restore it in a second-stage trampoline.
+ */
+ pushq %rdi
+ movq $entry_SYSCALL_64_stage2, %rdi
+ jmp *%rdi
+END(entry_SYSCALL_64_trampoline)
+
+ .popsection
+
+ENTRY(entry_SYSCALL_64_stage2)
+ UNWIND_HINT_EMPTY
+ popq %rdi
+ jmp entry_SYSCALL_64_after_hwframe
+END(entry_SYSCALL_64_stage2)
+
ENTRY(entry_SYSCALL_64)
UNWIND_HINT_EMPTY
/*
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -61,6 +61,8 @@ struct cpu_entry_area {
* of the TSS region.
*/
struct tss_struct tss;
+
+ char entry_trampoline[PAGE_SIZE];
};
#define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE)
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -101,4 +101,5 @@ void common(void) {
/* Layout info for cpu_entry_area */
OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
+ OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
}
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -486,6 +486,8 @@ DEFINE_PER_CPU(struct cpu_entry_area *,
static inline void setup_cpu_entry_area(int cpu)
{
#ifdef CONFIG_X86_64
+ extern char _entry_trampoline[];
+
/* On 64-bit systems, we use a read-only fixmap GDT. */
pgprot_t gdt_prot = PAGE_KERNEL_RO;
#else
@@ -532,6 +534,11 @@ static inline void setup_cpu_entry_area(
#ifdef CONFIG_X86_32
this_cpu_write(cpu_entry_area, get_cpu_entry_area(cpu));
#endif
+
+#ifdef CONFIG_X86_64
+ __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
+ __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
+#endif
}
/* Load the original GDT from the per-cpu structure */
@@ -1395,10 +1402,16 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char,
/* May not be marked __init: used by software suspend */
void syscall_init(void)
{
+ extern char _entry_trampoline[];
+ extern char entry_SYSCALL_64_trampoline[];
+
int cpu = smp_processor_id();
+ unsigned long SYSCALL64_entry_trampoline =
+ (unsigned long)get_cpu_entry_area(cpu)->entry_trampoline +
+ (entry_SYSCALL_64_trampoline - _entry_trampoline);
wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
- wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
+ wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
#ifdef CONFIG_IA32_EMULATION
wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -107,6 +107,15 @@ SECTIONS
SOFTIRQENTRY_TEXT
*(.fixup)
*(.gnu.warning)
+
+#ifdef CONFIG_X86_64
+ . = ALIGN(PAGE_SIZE);
+ _entry_trampoline = .;
+ *(.entry_trampoline)
+ . = ALIGN(PAGE_SIZE);
+ ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
+#endif
+
/* End of text section */
_etext = .;
} :text = 0x9090
Patches currently in stable-queue which might be from luto(a)kernel.org are
queue-4.14/x86-entry-64-simplify-reg-restore-code-in-the-standard-iret-paths.patch
queue-4.14/x86-cpufeatures-fix-various-details-in-the-feature-definitions.patch
queue-4.14/x86-entry-64-move-the-ist-stacks-into-struct-cpu_entry_area.patch
queue-4.14/x86-dumpstack-add-get_stack_info-support-for-the-sysenter-stack.patch
queue-4.14/x86-asm-don-t-use-the-confusing-.ifeq-directive.patch
queue-4.14/selftests-x86-ldt_gdt-add-infrastructure-to-test-set_thread_area.patch
queue-4.14/x86-entry-remap-the-tss-into-the-cpu-entry-area.patch
queue-4.14/x86-entry-64-paravirt-use-paravirt-safe-macro-to-access-eflags.patch
queue-4.14/x86-mm-fixmap-generalize-the-gdt-fixmap-mechanism-introduce-struct-cpu_entry_area.patch
queue-4.14/x86-paravirt-dont-patch-flush_tlb_single.patch
queue-4.14/x86-dumpstack-handle-stack-overflow-on-all-stacks.patch
queue-4.14/x86-entry-64-use-pop-instead-of-movq-in-syscall_return_via_sysret.patch
queue-4.14/x86-entry-64-return-to-userspace-from-the-trampoline-stack.patch
queue-4.14/x86-paravirt-provide-a-way-to-check-for-hypervisors.patch
queue-4.14/xen-x86-entry-64-add-xen-nmi-trap-entry.patch
queue-4.14/x86-entry-64-remove-the-restore_c_regs_and_iret-label.patch
queue-4.14/x86-entry-64-create-a-per-cpu-syscall-entry-trampoline.patch
queue-4.14/x86-entry-64-remove-the-restore_..._regs-infrastructure.patch
queue-4.14/x86-xen-64-x86-entry-64-clean-up-sp-code-in-cpu_initialize_context.patch
queue-4.14/x86-entry-add-task_top_of_stack-to-find-the-top-of-a-task-s-stack.patch
queue-4.14/x86-entry-64-remove-all-remaining-direct-thread_struct-sp0-reads.patch
queue-4.14/x86-boot-relocate-definition-of-the-initial-state-of-cr0.patch
queue-4.14/x86-entry-64-stop-initializing-tss.sp0-at-boot.patch
queue-4.14/x86-entry-64-de-xen-ify-our-nmi-code.patch
queue-4.14/objtool-don-t-report-end-of-section-error-after-an-empty-unwind-hint.patch
queue-4.14/x86-entry-64-pass-sp0-directly-to-load_sp0.patch
queue-4.14/x86-entry-64-use-a-per-cpu-trampoline-stack-for-idt-entries.patch
queue-4.14/x86-entry-64-move-swapgs-into-the-common-iret-to-usermode-path.patch
queue-4.14/x86-entry-64-shorten-test-instructions.patch
queue-4.14/x86-cpufeature-add-user-mode-instruction-prevention-definitions.patch
queue-4.14/x86-cpufeatures-make-cpu-bugs-sticky.patch
queue-4.14/x86-espfix-64-stop-assuming-that-pt_regs-is-on-the-entry-stack.patch
queue-4.14/x86-traps-use-a-new-on_thread_stack-helper-to-clean-up-an-assertion.patch
queue-4.14/x86-xen-fix-xen-head-elf-annotations.patch
queue-4.14/x86-entry-64-remove-thread_struct-sp0.patch
queue-4.14/x86-entry-move-sysenter_stack-to-the-beginning-of-struct-tss_struct.patch
queue-4.14/x86-entry-64-allocate-and-enable-the-sysenter-stack.patch
queue-4.14/x86-unwinder-orc-dont-bail-on-stack-overflow.patch
queue-4.14/x86-head-fix-head-elf-function-annotations.patch
queue-4.14/selftests-x86-ldt_gdt-run-most-existing-ldt-test-cases-against-the-gdt-as-well.patch
queue-4.14/x86-entry-32-pull-the-msr_ia32_sysenter_cs-update-code-out-of-native_load_sp0.patch
queue-4.14/x86-entry-64-split-the-iret-to-user-and-iret-to-kernel-paths.patch
queue-4.14/x86-entry-64-shrink-paranoid_exit_restore-and-make-labels-local.patch
queue-4.14/x86-head-add-unwind-hint-annotations.patch
queue-4.14/x86-head-remove-unused-bad_address-code.patch
queue-4.14/x86-xen-add-unwind-hint-annotations.patch
queue-4.14/x86-kasan-64-teach-kasan-about-the-cpu_entry_area.patch
queue-4.14/x86-head-remove-confusing-comment.patch
queue-4.14/x86-entry-64-remove-the-sysenter-stack-canary.patch
queue-4.14/x86-mm-kasan-don-t-use-vmemmap_populate-to-initialize-shadow.patch
queue-4.14/x86-entry-gdt-put-per-cpu-gdt-remaps-in-ascending-order.patch
queue-4.14/x86-entry-fix-assumptions-that-the-hw-tss-is-at-the-beginning-of-cpu_tss.patch
queue-4.14/x86-cpufeatures-re-tabulate-the-x86_feature-definitions.patch
queue-4.14/x86-entry-32-fix-cpu_current_top_of_stack-initialization-at-boot.patch
queue-4.14/ptrace-x86-make-user_64bit_mode-available-to-32-bit-builds.patch
queue-4.14/x86-entry-64-make-cpu_entry_area.tss-read-only.patch
queue-4.14/x86-mm-relocate-page-fault-error-codes-to-traps.h.patch
queue-4.14/x86-unwinder-handle-stack-overflows-more-gracefully.patch
queue-4.14/x86-entry-64-use-pop-instead-of-mov-to-restore-regs-on-nmi-return.patch
queue-4.14/x86-irq-64-print-the-offending-ip-in-the-stack-overflow-warning.patch
queue-4.14/x86-entry-clean-up-the-sysenter_stack-code.patch
queue-4.14/x86-entry-64-merge-the-fast-and-slow-sysret-paths.patch
queue-4.14/x86-entry-64-separate-cpu_current_top_of_stack-from-tss.sp0.patch
queue-4.14/x86-boot-annotate-verify_cpu-as-a-callable-function.patch
queue-4.14/x86-irq-remove-an-old-outdated-comment-about-context-tracking-races.patch
This is a note to let you know that I've just added the patch titled
x86/entry/64: Allocate and enable the SYSENTER stack
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-entry-64-allocate-and-enable-the-sysenter-stack.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 1a79797b58cddfa948420a7553241c79c013e3ca Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto(a)kernel.org>
Date: Mon, 4 Dec 2017 15:07:12 +0100
Subject: x86/entry/64: Allocate and enable the SYSENTER stack
From: Andy Lutomirski <luto(a)kernel.org>
commit 1a79797b58cddfa948420a7553241c79c013e3ca upstream.
This will simplify future changes that want scratch variables early in
the SYSENTER handler -- they'll be able to spill registers to the
stack. It also lets us get rid of a SWAPGS_UNSAFE_STACK user.
This does not depend on CONFIG_IA32_EMULATION=y because we'll want the
stack space even without IA32 emulation.
As far as I can tell, the reason that this wasn't done from day 1 is
that we use IST for #DB and #BP, which is IMO rather nasty and causes
a lot more problems than it solves. But, since #DB uses IST, we don't
actually need a real stack for SYSENTER (because SYSENTER with TF set
will invoke #DB on the IST stack rather than the SYSENTER stack).
I want to remove IST usage from these vectors some day, and this patch
is a prerequisite for that as well.
Signed-off-by: Andy Lutomirski <luto(a)kernel.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Borislav Petkov <bp(a)suse.de>
Cc: Boris Ostrovsky <boris.ostrovsky(a)oracle.com>
Cc: Borislav Petkov <bp(a)alien8.de>
Cc: Borislav Petkov <bpetkov(a)suse.de>
Cc: Brian Gerst <brgerst(a)gmail.com>
Cc: Dave Hansen <dave.hansen(a)intel.com>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: David Laight <David.Laight(a)aculab.com>
Cc: Denys Vlasenko <dvlasenk(a)redhat.com>
Cc: Eduardo Valentin <eduval(a)amazon.com>
Cc: Greg KH <gregkh(a)linuxfoundation.org>
Cc: H. Peter Anvin <hpa(a)zytor.com>
Cc: Josh Poimboeuf <jpoimboe(a)redhat.com>
Cc: Juergen Gross <jgross(a)suse.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Rik van Riel <riel(a)redhat.com>
Cc: Will Deacon <will.deacon(a)arm.com>
Cc: aliguori(a)amazon.com
Cc: daniel.gruss(a)iaik.tugraz.at
Cc: hughd(a)google.com
Cc: keescook(a)google.com
Link: https://lkml.kernel.org/r/20171204150605.312726423@linutronix.de
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/entry/entry_64_compat.S | 2 +-
arch/x86/include/asm/processor.h | 3 ---
arch/x86/kernel/asm-offsets.c | 5 +++++
arch/x86/kernel/asm-offsets_32.c | 5 -----
arch/x86/kernel/cpu/common.c | 4 +++-
arch/x86/kernel/process.c | 2 --
arch/x86/kernel/traps.c | 3 +--
7 files changed, 10 insertions(+), 14 deletions(-)
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -48,7 +48,7 @@
*/
ENTRY(entry_SYSENTER_compat)
/* Interrupts are off on entry. */
- SWAPGS_UNSAFE_STACK
+ SWAPGS
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
/*
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -339,14 +339,11 @@ struct tss_struct {
*/
unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
-#ifdef CONFIG_X86_32
/*
* Space for the temporary SYSENTER stack.
*/
unsigned long SYSENTER_stack_canary;
unsigned long SYSENTER_stack[64];
-#endif
-
} ____cacheline_aligned;
DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -93,4 +93,9 @@ void common(void) {
BLANK();
DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
+
+ /* Offset from cpu_tss to SYSENTER_stack */
+ OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
+ /* Size of SYSENTER_stack */
+ DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
}
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -50,11 +50,6 @@ void foo(void)
DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
offsetofend(struct tss_struct, SYSENTER_stack));
- /* Offset from cpu_tss to SYSENTER_stack */
- OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
- /* Size of SYSENTER_stack */
- DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
-
#ifdef CONFIG_CC_STACKPROTECTOR
BLANK();
OFFSET(stack_canary_offset, stack_canary, canary);
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1361,7 +1361,9 @@ void syscall_init(void)
* AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
*/
wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
- wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
+ wrmsrl_safe(MSR_IA32_SYSENTER_ESP,
+ (unsigned long)this_cpu_ptr(&cpu_tss) +
+ offsetofend(struct tss_struct, SYSENTER_stack));
wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
#else
wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -71,9 +71,7 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(
*/
.io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 },
#endif
-#ifdef CONFIG_X86_32
.SYSENTER_stack_canary = STACK_END_MAGIC,
-#endif
};
EXPORT_PER_CPU_SYMBOL(cpu_tss);
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -794,14 +794,13 @@ dotraplinkage void do_debug(struct pt_re
debug_stack_usage_dec();
exit:
-#if defined(CONFIG_X86_32)
/*
* This is the most likely code path that involves non-trivial use
* of the SYSENTER stack. Check that we haven't overrun it.
*/
WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC,
"Overran or corrupted SYSENTER stack\n");
-#endif
+
ist_exit(regs);
}
NOKPROBE_SYMBOL(do_debug);
Patches currently in stable-queue which might be from luto(a)kernel.org are
queue-4.14/x86-entry-64-simplify-reg-restore-code-in-the-standard-iret-paths.patch
queue-4.14/x86-cpufeatures-fix-various-details-in-the-feature-definitions.patch
queue-4.14/x86-entry-64-move-the-ist-stacks-into-struct-cpu_entry_area.patch
queue-4.14/x86-dumpstack-add-get_stack_info-support-for-the-sysenter-stack.patch
queue-4.14/x86-asm-don-t-use-the-confusing-.ifeq-directive.patch
queue-4.14/selftests-x86-ldt_gdt-add-infrastructure-to-test-set_thread_area.patch
queue-4.14/x86-entry-remap-the-tss-into-the-cpu-entry-area.patch
queue-4.14/x86-entry-64-paravirt-use-paravirt-safe-macro-to-access-eflags.patch
queue-4.14/x86-mm-fixmap-generalize-the-gdt-fixmap-mechanism-introduce-struct-cpu_entry_area.patch
queue-4.14/x86-paravirt-dont-patch-flush_tlb_single.patch
queue-4.14/x86-dumpstack-handle-stack-overflow-on-all-stacks.patch
queue-4.14/x86-entry-64-use-pop-instead-of-movq-in-syscall_return_via_sysret.patch
queue-4.14/x86-entry-64-return-to-userspace-from-the-trampoline-stack.patch
queue-4.14/x86-paravirt-provide-a-way-to-check-for-hypervisors.patch
queue-4.14/xen-x86-entry-64-add-xen-nmi-trap-entry.patch
queue-4.14/x86-entry-64-remove-the-restore_c_regs_and_iret-label.patch
queue-4.14/x86-entry-64-create-a-per-cpu-syscall-entry-trampoline.patch
queue-4.14/x86-entry-64-remove-the-restore_..._regs-infrastructure.patch
queue-4.14/x86-xen-64-x86-entry-64-clean-up-sp-code-in-cpu_initialize_context.patch
queue-4.14/x86-entry-add-task_top_of_stack-to-find-the-top-of-a-task-s-stack.patch
queue-4.14/x86-entry-64-remove-all-remaining-direct-thread_struct-sp0-reads.patch
queue-4.14/x86-boot-relocate-definition-of-the-initial-state-of-cr0.patch
queue-4.14/x86-entry-64-stop-initializing-tss.sp0-at-boot.patch
queue-4.14/x86-entry-64-de-xen-ify-our-nmi-code.patch
queue-4.14/objtool-don-t-report-end-of-section-error-after-an-empty-unwind-hint.patch
queue-4.14/x86-entry-64-pass-sp0-directly-to-load_sp0.patch
queue-4.14/x86-entry-64-use-a-per-cpu-trampoline-stack-for-idt-entries.patch
queue-4.14/x86-entry-64-move-swapgs-into-the-common-iret-to-usermode-path.patch
queue-4.14/x86-entry-64-shorten-test-instructions.patch
queue-4.14/x86-cpufeature-add-user-mode-instruction-prevention-definitions.patch
queue-4.14/x86-cpufeatures-make-cpu-bugs-sticky.patch
queue-4.14/x86-espfix-64-stop-assuming-that-pt_regs-is-on-the-entry-stack.patch
queue-4.14/x86-traps-use-a-new-on_thread_stack-helper-to-clean-up-an-assertion.patch
queue-4.14/x86-xen-fix-xen-head-elf-annotations.patch
queue-4.14/x86-entry-64-remove-thread_struct-sp0.patch
queue-4.14/x86-entry-move-sysenter_stack-to-the-beginning-of-struct-tss_struct.patch
queue-4.14/x86-entry-64-allocate-and-enable-the-sysenter-stack.patch
queue-4.14/x86-unwinder-orc-dont-bail-on-stack-overflow.patch
queue-4.14/x86-head-fix-head-elf-function-annotations.patch
queue-4.14/selftests-x86-ldt_gdt-run-most-existing-ldt-test-cases-against-the-gdt-as-well.patch
queue-4.14/x86-entry-32-pull-the-msr_ia32_sysenter_cs-update-code-out-of-native_load_sp0.patch
queue-4.14/x86-entry-64-split-the-iret-to-user-and-iret-to-kernel-paths.patch
queue-4.14/x86-entry-64-shrink-paranoid_exit_restore-and-make-labels-local.patch
queue-4.14/x86-head-add-unwind-hint-annotations.patch
queue-4.14/x86-head-remove-unused-bad_address-code.patch
queue-4.14/x86-xen-add-unwind-hint-annotations.patch
queue-4.14/x86-kasan-64-teach-kasan-about-the-cpu_entry_area.patch
queue-4.14/x86-head-remove-confusing-comment.patch
queue-4.14/x86-entry-64-remove-the-sysenter-stack-canary.patch
queue-4.14/x86-mm-kasan-don-t-use-vmemmap_populate-to-initialize-shadow.patch
queue-4.14/x86-entry-gdt-put-per-cpu-gdt-remaps-in-ascending-order.patch
queue-4.14/x86-entry-fix-assumptions-that-the-hw-tss-is-at-the-beginning-of-cpu_tss.patch
queue-4.14/x86-cpufeatures-re-tabulate-the-x86_feature-definitions.patch
queue-4.14/x86-entry-32-fix-cpu_current_top_of_stack-initialization-at-boot.patch
queue-4.14/ptrace-x86-make-user_64bit_mode-available-to-32-bit-builds.patch
queue-4.14/x86-entry-64-make-cpu_entry_area.tss-read-only.patch
queue-4.14/x86-mm-relocate-page-fault-error-codes-to-traps.h.patch
queue-4.14/x86-unwinder-handle-stack-overflows-more-gracefully.patch
queue-4.14/x86-entry-64-use-pop-instead-of-mov-to-restore-regs-on-nmi-return.patch
queue-4.14/x86-irq-64-print-the-offending-ip-in-the-stack-overflow-warning.patch
queue-4.14/x86-entry-clean-up-the-sysenter_stack-code.patch
queue-4.14/x86-entry-64-merge-the-fast-and-slow-sysret-paths.patch
queue-4.14/x86-entry-64-separate-cpu_current_top_of_stack-from-tss.sp0.patch
queue-4.14/x86-boot-annotate-verify_cpu-as-a-callable-function.patch
queue-4.14/x86-irq-remove-an-old-outdated-comment-about-context-tracking-races.patch
This is a note to let you know that I've just added the patch titled
x86/entry/32: Fix cpu_current_top_of_stack initialization at boot
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-entry-32-fix-cpu_current_top_of_stack-initialization-at-boot.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From cd493a6deb8b78eca280d05f7fa73fd69403ae29 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto(a)kernel.org>
Date: Thu, 2 Nov 2017 00:59:15 -0700
Subject: x86/entry/32: Fix cpu_current_top_of_stack initialization at boot
From: Andy Lutomirski <luto(a)kernel.org>
commit cd493a6deb8b78eca280d05f7fa73fd69403ae29 upstream.
cpu_current_top_of_stack's initialization forgot about
TOP_OF_KERNEL_STACK_PADDING. This bug didn't matter because the
idle threads never enter user mode.
Signed-off-by: Andy Lutomirski <luto(a)kernel.org>
Reviewed-by: Borislav Petkov <bp(a)suse.de>
Cc: Borislav Petkov <bpetkov(a)suse.de>
Cc: Brian Gerst <brgerst(a)gmail.com>
Cc: Dave Hansen <dave.hansen(a)intel.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Link: http://lkml.kernel.org/r/e5e370a7e6e4fddd1c4e4cf619765d96bb874b21.150960930…
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/kernel/smpboot.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -962,8 +962,7 @@ void common_cpu_up(unsigned int cpu, str
#ifdef CONFIG_X86_32
/* Stack for startup_32 can be just as for start_secondary onwards */
irq_ctx_init(cpu);
- per_cpu(cpu_current_top_of_stack, cpu) =
- (unsigned long)task_stack_page(idle) + THREAD_SIZE;
+ per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle);
#else
initial_gs = per_cpu_offset(cpu);
#endif
Patches currently in stable-queue which might be from luto(a)kernel.org are
queue-4.14/x86-entry-64-simplify-reg-restore-code-in-the-standard-iret-paths.patch
queue-4.14/x86-cpufeatures-fix-various-details-in-the-feature-definitions.patch
queue-4.14/x86-entry-64-move-the-ist-stacks-into-struct-cpu_entry_area.patch
queue-4.14/x86-dumpstack-add-get_stack_info-support-for-the-sysenter-stack.patch
queue-4.14/x86-asm-don-t-use-the-confusing-.ifeq-directive.patch
queue-4.14/selftests-x86-ldt_gdt-add-infrastructure-to-test-set_thread_area.patch
queue-4.14/x86-entry-remap-the-tss-into-the-cpu-entry-area.patch
queue-4.14/x86-entry-64-paravirt-use-paravirt-safe-macro-to-access-eflags.patch
queue-4.14/x86-mm-fixmap-generalize-the-gdt-fixmap-mechanism-introduce-struct-cpu_entry_area.patch
queue-4.14/x86-paravirt-dont-patch-flush_tlb_single.patch
queue-4.14/x86-dumpstack-handle-stack-overflow-on-all-stacks.patch
queue-4.14/x86-entry-64-use-pop-instead-of-movq-in-syscall_return_via_sysret.patch
queue-4.14/x86-entry-64-return-to-userspace-from-the-trampoline-stack.patch
queue-4.14/x86-paravirt-provide-a-way-to-check-for-hypervisors.patch
queue-4.14/xen-x86-entry-64-add-xen-nmi-trap-entry.patch
queue-4.14/x86-entry-64-remove-the-restore_c_regs_and_iret-label.patch
queue-4.14/x86-entry-64-create-a-per-cpu-syscall-entry-trampoline.patch
queue-4.14/x86-entry-64-remove-the-restore_..._regs-infrastructure.patch
queue-4.14/x86-xen-64-x86-entry-64-clean-up-sp-code-in-cpu_initialize_context.patch
queue-4.14/x86-entry-add-task_top_of_stack-to-find-the-top-of-a-task-s-stack.patch
queue-4.14/x86-entry-64-remove-all-remaining-direct-thread_struct-sp0-reads.patch
queue-4.14/x86-boot-relocate-definition-of-the-initial-state-of-cr0.patch
queue-4.14/x86-entry-64-stop-initializing-tss.sp0-at-boot.patch
queue-4.14/x86-entry-64-de-xen-ify-our-nmi-code.patch
queue-4.14/objtool-don-t-report-end-of-section-error-after-an-empty-unwind-hint.patch
queue-4.14/x86-entry-64-pass-sp0-directly-to-load_sp0.patch
queue-4.14/x86-entry-64-use-a-per-cpu-trampoline-stack-for-idt-entries.patch
queue-4.14/x86-entry-64-move-swapgs-into-the-common-iret-to-usermode-path.patch
queue-4.14/x86-entry-64-shorten-test-instructions.patch
queue-4.14/x86-cpufeature-add-user-mode-instruction-prevention-definitions.patch
queue-4.14/x86-cpufeatures-make-cpu-bugs-sticky.patch
queue-4.14/x86-espfix-64-stop-assuming-that-pt_regs-is-on-the-entry-stack.patch
queue-4.14/x86-traps-use-a-new-on_thread_stack-helper-to-clean-up-an-assertion.patch
queue-4.14/x86-xen-fix-xen-head-elf-annotations.patch
queue-4.14/x86-entry-64-remove-thread_struct-sp0.patch
queue-4.14/x86-entry-move-sysenter_stack-to-the-beginning-of-struct-tss_struct.patch
queue-4.14/x86-entry-64-allocate-and-enable-the-sysenter-stack.patch
queue-4.14/x86-unwinder-orc-dont-bail-on-stack-overflow.patch
queue-4.14/x86-head-fix-head-elf-function-annotations.patch
queue-4.14/selftests-x86-ldt_gdt-run-most-existing-ldt-test-cases-against-the-gdt-as-well.patch
queue-4.14/x86-entry-32-pull-the-msr_ia32_sysenter_cs-update-code-out-of-native_load_sp0.patch
queue-4.14/x86-entry-64-split-the-iret-to-user-and-iret-to-kernel-paths.patch
queue-4.14/x86-entry-64-shrink-paranoid_exit_restore-and-make-labels-local.patch
queue-4.14/x86-head-add-unwind-hint-annotations.patch
queue-4.14/x86-head-remove-unused-bad_address-code.patch
queue-4.14/x86-xen-add-unwind-hint-annotations.patch
queue-4.14/x86-kasan-64-teach-kasan-about-the-cpu_entry_area.patch
queue-4.14/x86-head-remove-confusing-comment.patch
queue-4.14/x86-entry-64-remove-the-sysenter-stack-canary.patch
queue-4.14/x86-mm-kasan-don-t-use-vmemmap_populate-to-initialize-shadow.patch
queue-4.14/x86-entry-gdt-put-per-cpu-gdt-remaps-in-ascending-order.patch
queue-4.14/x86-entry-fix-assumptions-that-the-hw-tss-is-at-the-beginning-of-cpu_tss.patch
queue-4.14/x86-cpufeatures-re-tabulate-the-x86_feature-definitions.patch
queue-4.14/x86-entry-32-fix-cpu_current_top_of_stack-initialization-at-boot.patch
queue-4.14/ptrace-x86-make-user_64bit_mode-available-to-32-bit-builds.patch
queue-4.14/x86-entry-64-make-cpu_entry_area.tss-read-only.patch
queue-4.14/x86-mm-relocate-page-fault-error-codes-to-traps.h.patch
queue-4.14/x86-unwinder-handle-stack-overflows-more-gracefully.patch
queue-4.14/x86-entry-64-use-pop-instead-of-mov-to-restore-regs-on-nmi-return.patch
queue-4.14/x86-irq-64-print-the-offending-ip-in-the-stack-overflow-warning.patch
queue-4.14/x86-entry-clean-up-the-sysenter_stack-code.patch
queue-4.14/x86-entry-64-merge-the-fast-and-slow-sysret-paths.patch
queue-4.14/x86-entry-64-separate-cpu_current_top_of_stack-from-tss.sp0.patch
queue-4.14/x86-boot-annotate-verify_cpu-as-a-callable-function.patch
queue-4.14/x86-irq-remove-an-old-outdated-comment-about-context-tracking-races.patch
This is a note to let you know that I've just added the patch titled
x86/dumpstack: Handle stack overflow on all stacks
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-dumpstack-handle-stack-overflow-on-all-stacks.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 6e60e583426c2f8751c22c2dfe5c207083b4483a Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto(a)kernel.org>
Date: Mon, 4 Dec 2017 15:07:18 +0100
Subject: x86/dumpstack: Handle stack overflow on all stacks
From: Andy Lutomirski <luto(a)kernel.org>
commit 6e60e583426c2f8751c22c2dfe5c207083b4483a upstream.
We currently special-case stack overflow on the task stack. We're
going to start putting special stacks in the fixmap with a custom
layout, so they'll have guard pages, too. Teach the unwinder to be
able to unwind an overflow of any of the stacks.
Signed-off-by: Andy Lutomirski <luto(a)kernel.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Borislav Petkov <bp(a)suse.de>
Cc: Boris Ostrovsky <boris.ostrovsky(a)oracle.com>
Cc: Borislav Petkov <bp(a)alien8.de>
Cc: Borislav Petkov <bpetkov(a)suse.de>
Cc: Brian Gerst <brgerst(a)gmail.com>
Cc: Dave Hansen <dave.hansen(a)intel.com>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: David Laight <David.Laight(a)aculab.com>
Cc: Denys Vlasenko <dvlasenk(a)redhat.com>
Cc: Eduardo Valentin <eduval(a)amazon.com>
Cc: Greg KH <gregkh(a)linuxfoundation.org>
Cc: H. Peter Anvin <hpa(a)zytor.com>
Cc: Josh Poimboeuf <jpoimboe(a)redhat.com>
Cc: Juergen Gross <jgross(a)suse.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Rik van Riel <riel(a)redhat.com>
Cc: Will Deacon <will.deacon(a)arm.com>
Cc: aliguori(a)amazon.com
Cc: daniel.gruss(a)iaik.tugraz.at
Cc: hughd(a)google.com
Cc: keescook(a)google.com
Link: https://lkml.kernel.org/r/20171204150605.802057305@linutronix.de
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/kernel/dumpstack.c | 24 ++++++++++++++----------
1 file changed, 14 insertions(+), 10 deletions(-)
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -112,24 +112,28 @@ void show_trace_log_lvl(struct task_stru
* - task stack
* - interrupt stack
* - HW exception stacks (double fault, nmi, debug, mce)
+ * - SYSENTER stack
*
- * x86-32 can have up to three stacks:
+ * x86-32 can have up to four stacks:
* - task stack
* - softirq stack
* - hardirq stack
+ * - SYSENTER stack
*/
for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
const char *stack_name;
- /*
- * If we overflowed the task stack into a guard page, jump back
- * to the bottom of the usable stack.
- */
- if (task_stack_page(task) - (void *)stack < PAGE_SIZE)
- stack = task_stack_page(task);
-
- if (get_stack_info(stack, task, &stack_info, &visit_mask))
- break;
+ if (get_stack_info(stack, task, &stack_info, &visit_mask)) {
+ /*
+ * We weren't on a valid stack. It's possible that
+ * we overflowed a valid stack into a guard page.
+ * See if the next page up is valid so that we can
+ * generate some kind of backtrace if this happens.
+ */
+ stack = (unsigned long *)PAGE_ALIGN((unsigned long)stack);
+ if (get_stack_info(stack, task, &stack_info, &visit_mask))
+ break;
+ }
stack_name = stack_type_name(stack_info.type);
if (stack_name)
Patches currently in stable-queue which might be from luto(a)kernel.org are
queue-4.14/x86-entry-64-simplify-reg-restore-code-in-the-standard-iret-paths.patch
queue-4.14/x86-cpufeatures-fix-various-details-in-the-feature-definitions.patch
queue-4.14/x86-entry-64-move-the-ist-stacks-into-struct-cpu_entry_area.patch
queue-4.14/x86-dumpstack-add-get_stack_info-support-for-the-sysenter-stack.patch
queue-4.14/x86-asm-don-t-use-the-confusing-.ifeq-directive.patch
queue-4.14/selftests-x86-ldt_gdt-add-infrastructure-to-test-set_thread_area.patch
queue-4.14/x86-entry-remap-the-tss-into-the-cpu-entry-area.patch
queue-4.14/x86-entry-64-paravirt-use-paravirt-safe-macro-to-access-eflags.patch
queue-4.14/x86-mm-fixmap-generalize-the-gdt-fixmap-mechanism-introduce-struct-cpu_entry_area.patch
queue-4.14/x86-paravirt-dont-patch-flush_tlb_single.patch
queue-4.14/x86-dumpstack-handle-stack-overflow-on-all-stacks.patch
queue-4.14/x86-entry-64-use-pop-instead-of-movq-in-syscall_return_via_sysret.patch
queue-4.14/x86-entry-64-return-to-userspace-from-the-trampoline-stack.patch
queue-4.14/x86-paravirt-provide-a-way-to-check-for-hypervisors.patch
queue-4.14/xen-x86-entry-64-add-xen-nmi-trap-entry.patch
queue-4.14/x86-entry-64-remove-the-restore_c_regs_and_iret-label.patch
queue-4.14/x86-entry-64-create-a-per-cpu-syscall-entry-trampoline.patch
queue-4.14/x86-entry-64-remove-the-restore_..._regs-infrastructure.patch
queue-4.14/x86-xen-64-x86-entry-64-clean-up-sp-code-in-cpu_initialize_context.patch
queue-4.14/x86-entry-add-task_top_of_stack-to-find-the-top-of-a-task-s-stack.patch
queue-4.14/x86-entry-64-remove-all-remaining-direct-thread_struct-sp0-reads.patch
queue-4.14/x86-boot-relocate-definition-of-the-initial-state-of-cr0.patch
queue-4.14/x86-entry-64-stop-initializing-tss.sp0-at-boot.patch
queue-4.14/x86-entry-64-de-xen-ify-our-nmi-code.patch
queue-4.14/objtool-don-t-report-end-of-section-error-after-an-empty-unwind-hint.patch
queue-4.14/x86-entry-64-pass-sp0-directly-to-load_sp0.patch
queue-4.14/x86-entry-64-use-a-per-cpu-trampoline-stack-for-idt-entries.patch
queue-4.14/x86-entry-64-move-swapgs-into-the-common-iret-to-usermode-path.patch
queue-4.14/x86-entry-64-shorten-test-instructions.patch
queue-4.14/x86-cpufeature-add-user-mode-instruction-prevention-definitions.patch
queue-4.14/x86-cpufeatures-make-cpu-bugs-sticky.patch
queue-4.14/x86-espfix-64-stop-assuming-that-pt_regs-is-on-the-entry-stack.patch
queue-4.14/x86-traps-use-a-new-on_thread_stack-helper-to-clean-up-an-assertion.patch
queue-4.14/x86-xen-fix-xen-head-elf-annotations.patch
queue-4.14/x86-entry-64-remove-thread_struct-sp0.patch
queue-4.14/x86-entry-move-sysenter_stack-to-the-beginning-of-struct-tss_struct.patch
queue-4.14/x86-entry-64-allocate-and-enable-the-sysenter-stack.patch
queue-4.14/x86-unwinder-orc-dont-bail-on-stack-overflow.patch
queue-4.14/x86-head-fix-head-elf-function-annotations.patch
queue-4.14/selftests-x86-ldt_gdt-run-most-existing-ldt-test-cases-against-the-gdt-as-well.patch
queue-4.14/x86-entry-32-pull-the-msr_ia32_sysenter_cs-update-code-out-of-native_load_sp0.patch
queue-4.14/x86-entry-64-split-the-iret-to-user-and-iret-to-kernel-paths.patch
queue-4.14/x86-entry-64-shrink-paranoid_exit_restore-and-make-labels-local.patch
queue-4.14/x86-head-add-unwind-hint-annotations.patch
queue-4.14/x86-head-remove-unused-bad_address-code.patch
queue-4.14/x86-xen-add-unwind-hint-annotations.patch
queue-4.14/x86-kasan-64-teach-kasan-about-the-cpu_entry_area.patch
queue-4.14/x86-head-remove-confusing-comment.patch
queue-4.14/x86-entry-64-remove-the-sysenter-stack-canary.patch
queue-4.14/x86-mm-kasan-don-t-use-vmemmap_populate-to-initialize-shadow.patch
queue-4.14/x86-entry-gdt-put-per-cpu-gdt-remaps-in-ascending-order.patch
queue-4.14/x86-entry-fix-assumptions-that-the-hw-tss-is-at-the-beginning-of-cpu_tss.patch
queue-4.14/x86-cpufeatures-re-tabulate-the-x86_feature-definitions.patch
queue-4.14/x86-entry-32-fix-cpu_current_top_of_stack-initialization-at-boot.patch
queue-4.14/ptrace-x86-make-user_64bit_mode-available-to-32-bit-builds.patch
queue-4.14/x86-entry-64-make-cpu_entry_area.tss-read-only.patch
queue-4.14/x86-mm-relocate-page-fault-error-codes-to-traps.h.patch
queue-4.14/x86-unwinder-handle-stack-overflows-more-gracefully.patch
queue-4.14/x86-entry-64-use-pop-instead-of-mov-to-restore-regs-on-nmi-return.patch
queue-4.14/x86-irq-64-print-the-offending-ip-in-the-stack-overflow-warning.patch
queue-4.14/x86-entry-clean-up-the-sysenter_stack-code.patch
queue-4.14/x86-entry-64-merge-the-fast-and-slow-sysret-paths.patch
queue-4.14/x86-entry-64-separate-cpu_current_top_of_stack-from-tss.sp0.patch
queue-4.14/x86-boot-annotate-verify_cpu-as-a-callable-function.patch
queue-4.14/x86-irq-remove-an-old-outdated-comment-about-context-tracking-races.patch