This is a note to let you know that I've just added the patch titled
x86/pti/efi: broken conversion from efi to kernel page table
to the 4.4-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git%3Ba=su...
The filename of the patch is: x86-pti-efi-broken-conversion-from-efi-to-kernel-page-table.patch and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree, please let stable@vger.kernel.org know about it.
From pasha.tatashin@oracle.com Sat Jan 13 14:14:57 2018
From: Pavel Tatashin pasha.tatashin@oracle.com Date: Thu, 11 Jan 2018 14:07:46 -0500 Subject: x86/pti/efi: broken conversion from efi to kernel page table To: steven.sistare@oracle.com, linux-kernel@vger.kernel.org, tglx@linutronix.de, mingo@redhat.com, hpa@zytor.com, x86@kernel.org, gregkh@linuxfoundation.org, jkosina@suse.cz, hughd@google.com, dave.hansen@linux.intel.com, luto@kernel.org, torvalds@linux-foundation.org Message-ID: 20180111190746.15426-2-pasha.tatashin@oracle.com
From: Pavel Tatashin pasha.tatashin@oracle.com
In entry_64.S we have code like this:
/* Unconditionally use kernel CR3 for do_nmi() */ /* %rax is saved above, so OK to clobber here */ ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER /* If PCID enabled, NOFLUSH now and NOFLUSH on return */ ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID pushq %rax /* mask off "user" bit of pgd address and 12 PCID bits: */ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax movq %rax, %cr3 2:
/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ call do_nmi
With this instruction: andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
We unconditionally switch from whatever our CR3 was to kernel page table. But, in arch/x86/platform/efi/efi_64.c We temporarily set a different page table, that does not have the kernel page table with 0x1000 offset from it.
Look in efi_thunk() and efi_thunk_set_virtual_address_map().
So, while CR3 points to the other page table, we get an NMI interrupt, and clear 0x1000 from CR3, resulting in a bogus CR3 if the 0x1000 bit was set.
The efi page table comes from realmode/rm/trampoline_64.S:
arch/x86/realmode/rm/trampoline_64.S
141 .bss 142 .balign PAGE_SIZE 143 GLOBAL(trampoline_pgd) .space PAGE_SIZE
Notice: alignment is PAGE_SIZE, so after applying KAISER_SHADOW_PGD_OFFSET which equal to PAGE_SIZE, we can get a different page table.
But, even if we fix alignment, here the trampoline binary is later copied into dynamically allocated memory in reserve_real_mode(), so we need to fix that place as well.
Fixes: 8a43ddfb93a0 ("KAISER: Kernel Address Isolation")
Signed-off-by: Pavel Tatashin pasha.tatashin@oracle.com Reviewed-by: Steven Sistare steven.sistare@oracle.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org --- arch/x86/include/asm/kaiser.h | 8 ++++++++ arch/x86/realmode/init.c | 4 +++- arch/x86/realmode/rm/trampoline_64.S | 3 ++- 3 files changed, 13 insertions(+), 2 deletions(-)
--- a/arch/x86/include/asm/kaiser.h +++ b/arch/x86/include/asm/kaiser.h @@ -19,6 +19,12 @@
#define KAISER_SHADOW_PGD_OFFSET 0x1000
+/* + * A page table address must have this alignment to stay the same when + * KAISER_SHADOW_PGD_OFFSET mask is applied + */ +#define KAISER_KERNEL_PGD_ALIGNMENT (KAISER_SHADOW_PGD_OFFSET << 1) + #ifdef __ASSEMBLY__ #ifdef CONFIG_PAGE_TABLE_ISOLATION
@@ -71,6 +77,8 @@ movq PER_CPU_VAR(unsafe_stack_register_b
#else /* CONFIG_PAGE_TABLE_ISOLATION */
+#define KAISER_KERNEL_PGD_ALIGNMENT PAGE_SIZE + .macro SWITCH_KERNEL_CR3 .endm .macro SWITCH_USER_CR3 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -1,5 +1,6 @@ #include <linux/io.h> #include <linux/memblock.h> +#include <linux/kaiser.h>
#include <asm/cacheflush.h> #include <asm/pgtable.h> @@ -15,7 +16,8 @@ void __init reserve_real_mode(void) size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob);
/* Has to be under 1M so we can execute real-mode AP code. */ - mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); + mem = memblock_find_in_range(0, 1 << 20, size, + KAISER_KERNEL_PGD_ALIGNMENT); if (!mem) panic("Cannot allocate trampoline\n");
--- a/arch/x86/realmode/rm/trampoline_64.S +++ b/arch/x86/realmode/rm/trampoline_64.S @@ -30,6 +30,7 @@ #include <asm/msr.h> #include <asm/segment.h> #include <asm/processor-flags.h> +#include <asm/kaiser.h> #include "realmode.h"
.text @@ -139,7 +140,7 @@ tr_gdt: tr_gdt_end:
.bss - .balign PAGE_SIZE + .balign KAISER_KERNEL_PGD_ALIGNMENT GLOBAL(trampoline_pgd) .space PAGE_SIZE
.balign 8
Patches currently in stable-queue which might be from pasha.tatashin@oracle.com are
queue-4.4/x86-pti-efi-broken-conversion-from-efi-to-kernel-page-table.patch