This is a note to let you know that I've just added the patch titled
x86/mm/pti: Add functions to clone kernel PMDs
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-mm-pti-add-functions-to-clone-kernel-pmds.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 03f4424f348e8be95eb1bbeba09461cd7b867828 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto(a)kernel.org>
Date: Mon, 4 Dec 2017 15:07:42 +0100
Subject: x86/mm/pti: Add functions to clone kernel PMDs
From: Andy Lutomirski <luto(a)kernel.org>
commit 03f4424f348e8be95eb1bbeba09461cd7b867828 upstream.
Provide infrastructure to:
- find a kernel PMD for a mapping which must be visible to user space for
the entry/exit code to work.
- walk an address range and share the kernel PMD with it.
This reuses a small part of the original KAISER patches to populate the
user space page table.
[ tglx: Made it universally usable so it can be used for any kind of shared
mapping. Add a mechanism to clear specific bits in the user space
visible PMD entry. Folded Andys simplifactions ]
Originally-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Signed-off-by: Andy Lutomirski <luto(a)kernel.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Boris Ostrovsky <boris.ostrovsky(a)oracle.com>
Cc: Borislav Petkov <bp(a)alien8.de>
Cc: Borislav Petkov <bpetkov(a)suse.de>
Cc: Brian Gerst <brgerst(a)gmail.com>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: David Laight <David.Laight(a)aculab.com>
Cc: Denys Vlasenko <dvlasenk(a)redhat.com>
Cc: Eduardo Valentin <eduval(a)amazon.com>
Cc: Greg KH <gregkh(a)linuxfoundation.org>
Cc: H. Peter Anvin <hpa(a)zytor.com>
Cc: Josh Poimboeuf <jpoimboe(a)redhat.com>
Cc: Juergen Gross <jgross(a)suse.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Will Deacon <will.deacon(a)arm.com>
Cc: aliguori(a)amazon.com
Cc: daniel.gruss(a)iaik.tugraz.at
Cc: hughd(a)google.com
Cc: keescook(a)google.com
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/mm/pti.c | 127 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 127 insertions(+)
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -48,6 +48,11 @@
#undef pr_fmt
#define pr_fmt(fmt) "Kernel/User page tables isolation: " fmt
+/* Backporting helper */
+#ifndef __GFP_NOTRACK
+#define __GFP_NOTRACK 0
+#endif
+
static void __init pti_print_if_insecure(const char *reason)
{
if (boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
@@ -138,6 +143,128 @@ pgd_t __pti_set_user_pgd(pgd_t *pgdp, pg
}
/*
+ * Walk the user copy of the page tables (optionally) trying to allocate
+ * page table pages on the way down.
+ *
+ * Returns a pointer to a P4D on success, or NULL on failure.
+ */
+static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
+{
+ pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
+ gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+
+ if (address < PAGE_OFFSET) {
+ WARN_ONCE(1, "attempt to walk user address\n");
+ return NULL;
+ }
+
+ if (pgd_none(*pgd)) {
+ unsigned long new_p4d_page = __get_free_page(gfp);
+ if (!new_p4d_page)
+ return NULL;
+
+ if (pgd_none(*pgd)) {
+ set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
+ new_p4d_page = 0;
+ }
+ if (new_p4d_page)
+ free_page(new_p4d_page);
+ }
+ BUILD_BUG_ON(pgd_large(*pgd) != 0);
+
+ return p4d_offset(pgd, address);
+}
+
+/*
+ * Walk the user copy of the page tables (optionally) trying to allocate
+ * page table pages on the way down.
+ *
+ * Returns a pointer to a PMD on success, or NULL on failure.
+ */
+static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
+{
+ gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+ p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
+ pud_t *pud;
+
+ BUILD_BUG_ON(p4d_large(*p4d) != 0);
+ if (p4d_none(*p4d)) {
+ unsigned long new_pud_page = __get_free_page(gfp);
+ if (!new_pud_page)
+ return NULL;
+
+ if (p4d_none(*p4d)) {
+ set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
+ new_pud_page = 0;
+ }
+ if (new_pud_page)
+ free_page(new_pud_page);
+ }
+
+ pud = pud_offset(p4d, address);
+ /* The user page tables do not use large mappings: */
+ if (pud_large(*pud)) {
+ WARN_ON(1);
+ return NULL;
+ }
+ if (pud_none(*pud)) {
+ unsigned long new_pmd_page = __get_free_page(gfp);
+ if (!new_pmd_page)
+ return NULL;
+
+ if (pud_none(*pud)) {
+ set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
+ new_pmd_page = 0;
+ }
+ if (new_pmd_page)
+ free_page(new_pmd_page);
+ }
+
+ return pmd_offset(pud, address);
+}
+
+static void __init
+pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
+{
+ unsigned long addr;
+
+ /*
+ * Clone the populated PMDs which cover start to end. These PMD areas
+ * can have holes.
+ */
+ for (addr = start; addr < end; addr += PMD_SIZE) {
+ pmd_t *pmd, *target_pmd;
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+
+ pgd = pgd_offset_k(addr);
+ if (WARN_ON(pgd_none(*pgd)))
+ return;
+ p4d = p4d_offset(pgd, addr);
+ if (WARN_ON(p4d_none(*p4d)))
+ return;
+ pud = pud_offset(p4d, addr);
+ if (pud_none(*pud))
+ continue;
+ pmd = pmd_offset(pud, addr);
+ if (pmd_none(*pmd))
+ continue;
+
+ target_pmd = pti_user_pagetable_walk_pmd(addr);
+ if (WARN_ON(!target_pmd))
+ return;
+
+ /*
+ * Copy the PMD. That is, the kernelmode and usermode
+ * tables will share the last-level page tables of this
+ * address range
+ */
+ *target_pmd = pmd_clear_flags(*pmd, clear);
+ }
+}
+
+/*
* Initialize kernel page table isolation
*/
void __init pti_init(void)
Patches currently in stable-queue which might be from luto(a)kernel.org are
queue-4.14/x86-mm-pti-allow-nx-poison-to-be-set-in-p4d-pgd.patch
queue-4.14/x86-dumpstack-indicate-in-oops-whether-pti-is-configured-and-enabled.patch
queue-4.14/x86-mm-clarify-the-whole-asid-kernel-pcid-user-pcid-naming.patch
queue-4.14/x86-mm-abstract-switching-cr3.patch
queue-4.14/x86-mm-optimize-restore_cr3.patch
queue-4.14/x86-mm-pti-force-entry-through-trampoline-when-pti-active.patch
queue-4.14/x86-entry-align-entry-text-section-to-pmd-boundary.patch
queue-4.14/x86-mm-64-make-a-full-pgd-entry-size-hole-in-the-memory-map.patch
queue-4.14/x86-cpu_entry_area-add-debugstore-entries-to-cpu_entry_area.patch
queue-4.14/x86-mm-pti-share-entry-text-pmd.patch
queue-4.14/x86-ldt-make-the-ldt-mapping-ro.patch
queue-4.14/x86-mm-dump_pagetables-check-user-space-page-table-for-wx-pages.patch
queue-4.14/x86-pti-map-the-vsyscall-page-if-needed.patch
queue-4.14/x86-mm-pti-disable-global-pages-if-page_table_isolation-y.patch
queue-4.14/x86-mm-dump_pagetables-allow-dumping-current-pagetables.patch
queue-4.14/x86-mm-pti-add-infrastructure-for-page-table-isolation.patch
queue-4.14/x86-cpufeatures-add-x86_bug_cpu_insecure.patch
queue-4.14/x86-mm-use-fix-pcid-to-optimize-user-kernel-switches.patch
queue-4.14/x86-mm-use-invpcid-for-__native_flush_tlb_single.patch
queue-4.14/x86-mm-pti-prepare-the-x86-entry-assembly-code-for-entry-exit-cr3-switching.patch
queue-4.14/x86-mm-allow-flushing-for-future-asid-switches.patch
queue-4.14/x86-mm-pti-add-kconfig.patch
queue-4.14/x86-mm-pti-add-mapping-helper-functions.patch
queue-4.14/x86-mm-pti-map-espfix-into-user-space.patch
queue-4.14/x86-mm-pti-share-cpu_entry_area-with-user-space-page-tables.patch
queue-4.14/x86-mm-pti-add-functions-to-clone-kernel-pmds.patch
queue-4.14/x86-pti-add-the-pti-cmdline-option-and-documentation.patch
queue-4.14/x86-events-intel-ds-map-debug-buffers-in-cpu_entry_area.patch
queue-4.14/x86-mm-pti-allocate-a-separate-user-pgd.patch
queue-4.14/x86-pti-put-the-ldt-in-its-own-pgd-if-pti-is-on.patch
queue-4.14/x86-mm-pti-populate-user-pgd.patch
queue-4.14/x86-mm-dump_pagetables-add-page-table-directory-to-the-debugfs-vfs-hierarchy.patch
This is a note to let you know that I've just added the patch titled
x86/mm: Optimize RESTORE_CR3
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-mm-optimize-restore_cr3.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 21e94459110252d41b45c0c8ba50fd72a664d50c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz(a)infradead.org>
Date: Mon, 4 Dec 2017 15:08:00 +0100
Subject: x86/mm: Optimize RESTORE_CR3
From: Peter Zijlstra <peterz(a)infradead.org>
commit 21e94459110252d41b45c0c8ba50fd72a664d50c upstream.
Most NMI/paranoid exceptions will not in fact change pagetables and would
thus not require TLB flushing, however RESTORE_CR3 uses flushing CR3
writes.
Restores to kernel PCIDs can be NOFLUSH, because we explicitly flush the
kernel mappings and now that we track which user PCIDs need flushing we can
avoid those too when possible.
This does mean RESTORE_CR3 needs an additional scratch_reg, luckily both
sites have plenty available.
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Andy Lutomirski <luto(a)kernel.org>
Cc: Boris Ostrovsky <boris.ostrovsky(a)oracle.com>
Cc: Borislav Petkov <bp(a)alien8.de>
Cc: Brian Gerst <brgerst(a)gmail.com>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: David Laight <David.Laight(a)aculab.com>
Cc: Denys Vlasenko <dvlasenk(a)redhat.com>
Cc: Eduardo Valentin <eduval(a)amazon.com>
Cc: Greg KH <gregkh(a)linuxfoundation.org>
Cc: H. Peter Anvin <hpa(a)zytor.com>
Cc: Josh Poimboeuf <jpoimboe(a)redhat.com>
Cc: Juergen Gross <jgross(a)suse.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Will Deacon <will.deacon(a)arm.com>
Cc: aliguori(a)amazon.com
Cc: daniel.gruss(a)iaik.tugraz.at
Cc: hughd(a)google.com
Cc: keescook(a)google.com
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/entry/calling.h | 30 ++++++++++++++++++++++++++++--
arch/x86/entry/entry_64.S | 4 ++--
2 files changed, 30 insertions(+), 4 deletions(-)
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -281,8 +281,34 @@ For 32-bit we have the following convent
.Ldone_\@:
.endm
-.macro RESTORE_CR3 save_reg:req
+.macro RESTORE_CR3 scratch_reg:req save_reg:req
ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+
+ ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
+
+ /*
+ * KERNEL pages can always resume with NOFLUSH as we do
+ * explicit flushes.
+ */
+ bt $X86_CR3_PTI_SWITCH_BIT, \save_reg
+ jnc .Lnoflush_\@
+
+ /*
+ * Check if there's a pending flush for the user ASID we're
+ * about to set.
+ */
+ movq \save_reg, \scratch_reg
+ andq $(0x7FF), \scratch_reg
+ bt \scratch_reg, THIS_CPU_user_pcid_flush_mask
+ jnc .Lnoflush_\@
+
+ btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
+ jmp .Lwrcr3_\@
+
+.Lnoflush_\@:
+ SET_NOFLUSH_BIT \save_reg
+
+.Lwrcr3_\@:
/*
* The CR3 write could be avoided when not changing its value,
* but would require a CR3 read *and* a scratch register.
@@ -301,7 +327,7 @@ For 32-bit we have the following convent
.endm
.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
.endm
-.macro RESTORE_CR3 save_reg:req
+.macro RESTORE_CR3 scratch_reg:req save_reg:req
.endm
#endif
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1288,7 +1288,7 @@ ENTRY(paranoid_exit)
testl %ebx, %ebx /* swapgs needed? */
jnz .Lparanoid_exit_no_swapgs
TRACE_IRQS_IRETQ
- RESTORE_CR3 save_reg=%r14
+ RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
SWAPGS_UNSAFE_STACK
jmp .Lparanoid_exit_restore
.Lparanoid_exit_no_swapgs:
@@ -1730,7 +1730,7 @@ end_repeat_nmi:
movq $-1, %rsi
call do_nmi
- RESTORE_CR3 save_reg=%r14
+ RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
testl %ebx, %ebx /* swapgs needed? */
jnz nmi_restore
Patches currently in stable-queue which might be from peterz(a)infradead.org are
queue-4.14/x86-mm-pti-allow-nx-poison-to-be-set-in-p4d-pgd.patch
queue-4.14/x86-dumpstack-indicate-in-oops-whether-pti-is-configured-and-enabled.patch
queue-4.14/x86-mm-clarify-the-whole-asid-kernel-pcid-user-pcid-naming.patch
queue-4.14/x86-mm-abstract-switching-cr3.patch
queue-4.14/x86-mm-optimize-restore_cr3.patch
queue-4.14/x86-mm-pti-force-entry-through-trampoline-when-pti-active.patch
queue-4.14/x86-entry-align-entry-text-section-to-pmd-boundary.patch
queue-4.14/x86-mm-64-make-a-full-pgd-entry-size-hole-in-the-memory-map.patch
queue-4.14/x86-cpu_entry_area-add-debugstore-entries-to-cpu_entry_area.patch
queue-4.14/x86-mm-pti-share-entry-text-pmd.patch
queue-4.14/x86-ldt-make-the-ldt-mapping-ro.patch
queue-4.14/x86-mm-dump_pagetables-check-user-space-page-table-for-wx-pages.patch
queue-4.14/x86-pti-map-the-vsyscall-page-if-needed.patch
queue-4.14/x86-mm-pti-disable-global-pages-if-page_table_isolation-y.patch
queue-4.14/x86-mm-dump_pagetables-allow-dumping-current-pagetables.patch
queue-4.14/x86-mm-pti-add-infrastructure-for-page-table-isolation.patch
queue-4.14/x86-cpufeatures-add-x86_bug_cpu_insecure.patch
queue-4.14/x86-mm-use-fix-pcid-to-optimize-user-kernel-switches.patch
queue-4.14/x86-mm-use-invpcid-for-__native_flush_tlb_single.patch
queue-4.14/x86-mm-pti-prepare-the-x86-entry-assembly-code-for-entry-exit-cr3-switching.patch
queue-4.14/x86-mm-allow-flushing-for-future-asid-switches.patch
queue-4.14/x86-mm-pti-add-kconfig.patch
queue-4.14/x86-mm-pti-add-mapping-helper-functions.patch
queue-4.14/x86-mm-pti-map-espfix-into-user-space.patch
queue-4.14/x86-mm-pti-share-cpu_entry_area-with-user-space-page-tables.patch
queue-4.14/x86-mm-pti-add-functions-to-clone-kernel-pmds.patch
queue-4.14/x86-pti-add-the-pti-cmdline-option-and-documentation.patch
queue-4.14/x86-events-intel-ds-map-debug-buffers-in-cpu_entry_area.patch
queue-4.14/x86-mm-pti-allocate-a-separate-user-pgd.patch
queue-4.14/x86-pti-put-the-ldt-in-its-own-pgd-if-pti-is-on.patch
queue-4.14/x86-mm-pti-populate-user-pgd.patch
queue-4.14/x86-mm-dump_pagetables-add-page-table-directory-to-the-debugfs-vfs-hierarchy.patch
This is a note to let you know that I've just added the patch titled
x86/mm/dump_pagetables: Add page table directory to the debugfs VFS hierarchy
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-mm-dump_pagetables-add-page-table-directory-to-the-debugfs-vfs-hierarchy.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 75298aa179d56cd64f54e58a19fffc8ab922b4c0 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp(a)suse.de>
Date: Mon, 4 Dec 2017 15:08:04 +0100
Subject: x86/mm/dump_pagetables: Add page table directory to the debugfs VFS hierarchy
From: Borislav Petkov <bp(a)suse.de>
commit 75298aa179d56cd64f54e58a19fffc8ab922b4c0 upstream.
The upcoming support for dumping the kernel and the user space page tables
of the current process would create more random files in the top level
debugfs directory.
Add a page table directory and move the existing file to it.
Signed-off-by: Borislav Petkov <bp(a)suse.de>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Andy Lutomirski <luto(a)kernel.org>
Cc: Boris Ostrovsky <boris.ostrovsky(a)oracle.com>
Cc: Borislav Petkov <bp(a)alien8.de>
Cc: Brian Gerst <brgerst(a)gmail.com>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: David Laight <David.Laight(a)aculab.com>
Cc: Denys Vlasenko <dvlasenk(a)redhat.com>
Cc: Eduardo Valentin <eduval(a)amazon.com>
Cc: Greg KH <gregkh(a)linuxfoundation.org>
Cc: H. Peter Anvin <hpa(a)zytor.com>
Cc: Josh Poimboeuf <jpoimboe(a)redhat.com>
Cc: Juergen Gross <jgross(a)suse.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Will Deacon <will.deacon(a)arm.com>
Cc: aliguori(a)amazon.com
Cc: daniel.gruss(a)iaik.tugraz.at
Cc: hughd(a)google.com
Cc: keescook(a)google.com
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/mm/debug_pagetables.c | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
--- a/arch/x86/mm/debug_pagetables.c
+++ b/arch/x86/mm/debug_pagetables.c
@@ -22,21 +22,26 @@ static const struct file_operations ptdu
.release = single_release,
};
-static struct dentry *pe;
+static struct dentry *dir, *pe;
static int __init pt_dump_debug_init(void)
{
- pe = debugfs_create_file("kernel_page_tables", S_IRUSR, NULL, NULL,
- &ptdump_fops);
- if (!pe)
+ dir = debugfs_create_dir("page_tables", NULL);
+ if (!dir)
return -ENOMEM;
+ pe = debugfs_create_file("kernel", 0400, dir, NULL, &ptdump_fops);
+ if (!pe)
+ goto err;
return 0;
+err:
+ debugfs_remove_recursive(dir);
+ return -ENOMEM;
}
static void __exit pt_dump_debug_exit(void)
{
- debugfs_remove_recursive(pe);
+ debugfs_remove_recursive(dir);
}
module_init(pt_dump_debug_init);
Patches currently in stable-queue which might be from bp(a)suse.de are
queue-4.14/x86-mm-pti-allow-nx-poison-to-be-set-in-p4d-pgd.patch
queue-4.14/x86-mm-pti-force-entry-through-trampoline-when-pti-active.patch
queue-4.14/x86-mm-pti-disable-global-pages-if-page_table_isolation-y.patch
queue-4.14/x86-mm-pti-add-infrastructure-for-page-table-isolation.patch
queue-4.14/x86-mm-pti-prepare-the-x86-entry-assembly-code-for-entry-exit-cr3-switching.patch
queue-4.14/x86-pti-add-the-pti-cmdline-option-and-documentation.patch
queue-4.14/x86-mm-pti-allocate-a-separate-user-pgd.patch
queue-4.14/x86-mm-pti-populate-user-pgd.patch
queue-4.14/x86-mm-dump_pagetables-add-page-table-directory-to-the-debugfs-vfs-hierarchy.patch
This is a note to let you know that I've just added the patch titled
x86/mm: Clarify the whole ASID/kernel PCID/user PCID naming
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-mm-clarify-the-whole-asid-kernel-pcid-user-pcid-naming.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 0a126abd576ebc6403f063dbe20cf7416c9d9393 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz(a)infradead.org>
Date: Tue, 5 Dec 2017 13:34:53 +0100
Subject: x86/mm: Clarify the whole ASID/kernel PCID/user PCID naming
From: Peter Zijlstra <peterz(a)infradead.org>
commit 0a126abd576ebc6403f063dbe20cf7416c9d9393 upstream.
Ideally we'd also use sparse to enforce this separation so it becomes much
more difficult to mess up.
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Andy Lutomirski <luto(a)kernel.org>
Cc: Boris Ostrovsky <boris.ostrovsky(a)oracle.com>
Cc: Borislav Petkov <bp(a)alien8.de>
Cc: Brian Gerst <brgerst(a)gmail.com>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: David Laight <David.Laight(a)aculab.com>
Cc: Denys Vlasenko <dvlasenk(a)redhat.com>
Cc: Eduardo Valentin <eduval(a)amazon.com>
Cc: Greg KH <gregkh(a)linuxfoundation.org>
Cc: H. Peter Anvin <hpa(a)zytor.com>
Cc: Josh Poimboeuf <jpoimboe(a)redhat.com>
Cc: Juergen Gross <jgross(a)suse.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Will Deacon <will.deacon(a)arm.com>
Cc: aliguori(a)amazon.com
Cc: daniel.gruss(a)iaik.tugraz.at
Cc: hughd(a)google.com
Cc: keescook(a)google.com
Cc: linux-mm(a)kvack.org
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/include/asm/tlbflush.h | 55 +++++++++++++++++++++++++++++++---------
1 file changed, 43 insertions(+), 12 deletions(-)
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -13,16 +13,33 @@
#include <asm/pti.h>
#include <asm/processor-flags.h>
-static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
-{
- /*
- * Bump the generation count. This also serves as a full barrier
- * that synchronizes with switch_mm(): callers are required to order
- * their read of mm_cpumask after their writes to the paging
- * structures.
- */
- return atomic64_inc_return(&mm->context.tlb_gen);
-}
+/*
+ * The x86 feature is called PCID (Process Context IDentifier). It is similar
+ * to what is traditionally called ASID on the RISC processors.
+ *
+ * We don't use the traditional ASID implementation, where each process/mm gets
+ * its own ASID and flush/restart when we run out of ASID space.
+ *
+ * Instead we have a small per-cpu array of ASIDs and cache the last few mm's
+ * that came by on this CPU, allowing cheaper switch_mm between processes on
+ * this CPU.
+ *
+ * We end up with different spaces for different things. To avoid confusion we
+ * use different names for each of them:
+ *
+ * ASID - [0, TLB_NR_DYN_ASIDS-1]
+ * the canonical identifier for an mm
+ *
+ * kPCID - [1, TLB_NR_DYN_ASIDS]
+ * the value we write into the PCID part of CR3; corresponds to the
+ * ASID+1, because PCID 0 is special.
+ *
+ * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS]
+ * for KPTI each mm has two address spaces and thus needs two
+ * PCID values, but we can still do with a single ASID denomination
+ * for each mm. Corresponds to kPCID + 2048.
+ *
+ */
/* There are 12 bits of space for ASIDS in CR3 */
#define CR3_HW_ASID_BITS 12
@@ -41,7 +58,7 @@ static inline u64 inc_mm_tlb_gen(struct
/*
* ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid. -1 below to account
- * for them being zero-based. Another -1 is because ASID 0 is reserved for
+ * for them being zero-based. Another -1 is because PCID 0 is reserved for
* use by non-PCID-aware users.
*/
#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2)
@@ -52,6 +69,9 @@ static inline u64 inc_mm_tlb_gen(struct
*/
#define TLB_NR_DYN_ASIDS 6
+/*
+ * Given @asid, compute kPCID
+ */
static inline u16 kern_pcid(u16 asid)
{
VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
@@ -86,7 +106,7 @@ static inline u16 kern_pcid(u16 asid)
}
/*
- * The user PCID is just the kernel one, plus the "switch bit".
+ * Given @asid, compute uPCID
*/
static inline u16 user_pcid(u16 asid)
{
@@ -484,6 +504,17 @@ static inline void flush_tlb_page(struct
void native_flush_tlb_others(const struct cpumask *cpumask,
const struct flush_tlb_info *info);
+static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
+{
+ /*
+ * Bump the generation count. This also serves as a full barrier
+ * that synchronizes with switch_mm(): callers are required to order
+ * their read of mm_cpumask after their writes to the paging
+ * structures.
+ */
+ return atomic64_inc_return(&mm->context.tlb_gen);
+}
+
static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
struct mm_struct *mm)
{
Patches currently in stable-queue which might be from peterz(a)infradead.org are
queue-4.14/x86-mm-pti-allow-nx-poison-to-be-set-in-p4d-pgd.patch
queue-4.14/x86-dumpstack-indicate-in-oops-whether-pti-is-configured-and-enabled.patch
queue-4.14/x86-mm-clarify-the-whole-asid-kernel-pcid-user-pcid-naming.patch
queue-4.14/x86-mm-abstract-switching-cr3.patch
queue-4.14/x86-mm-optimize-restore_cr3.patch
queue-4.14/x86-mm-pti-force-entry-through-trampoline-when-pti-active.patch
queue-4.14/x86-entry-align-entry-text-section-to-pmd-boundary.patch
queue-4.14/x86-mm-64-make-a-full-pgd-entry-size-hole-in-the-memory-map.patch
queue-4.14/x86-cpu_entry_area-add-debugstore-entries-to-cpu_entry_area.patch
queue-4.14/x86-mm-pti-share-entry-text-pmd.patch
queue-4.14/x86-ldt-make-the-ldt-mapping-ro.patch
queue-4.14/x86-mm-dump_pagetables-check-user-space-page-table-for-wx-pages.patch
queue-4.14/x86-pti-map-the-vsyscall-page-if-needed.patch
queue-4.14/x86-mm-pti-disable-global-pages-if-page_table_isolation-y.patch
queue-4.14/x86-mm-dump_pagetables-allow-dumping-current-pagetables.patch
queue-4.14/x86-mm-pti-add-infrastructure-for-page-table-isolation.patch
queue-4.14/x86-cpufeatures-add-x86_bug_cpu_insecure.patch
queue-4.14/x86-mm-use-fix-pcid-to-optimize-user-kernel-switches.patch
queue-4.14/x86-mm-use-invpcid-for-__native_flush_tlb_single.patch
queue-4.14/x86-mm-pti-prepare-the-x86-entry-assembly-code-for-entry-exit-cr3-switching.patch
queue-4.14/x86-mm-allow-flushing-for-future-asid-switches.patch
queue-4.14/x86-mm-pti-add-kconfig.patch
queue-4.14/x86-mm-pti-add-mapping-helper-functions.patch
queue-4.14/x86-mm-pti-map-espfix-into-user-space.patch
queue-4.14/x86-mm-pti-share-cpu_entry_area-with-user-space-page-tables.patch
queue-4.14/x86-mm-pti-add-functions-to-clone-kernel-pmds.patch
queue-4.14/x86-pti-add-the-pti-cmdline-option-and-documentation.patch
queue-4.14/x86-events-intel-ds-map-debug-buffers-in-cpu_entry_area.patch
queue-4.14/x86-mm-pti-allocate-a-separate-user-pgd.patch
queue-4.14/x86-pti-put-the-ldt-in-its-own-pgd-if-pti-is-on.patch
queue-4.14/x86-mm-pti-populate-user-pgd.patch
queue-4.14/x86-mm-dump_pagetables-add-page-table-directory-to-the-debugfs-vfs-hierarchy.patch
This is a note to let you know that I've just added the patch titled
x86/mm: Allow flushing for future ASID switches
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-mm-allow-flushing-for-future-asid-switches.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 2ea907c4fe7b78e5840c1dc07800eae93248cad1 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen(a)linux.intel.com>
Date: Mon, 4 Dec 2017 15:07:57 +0100
Subject: x86/mm: Allow flushing for future ASID switches
From: Dave Hansen <dave.hansen(a)linux.intel.com>
commit 2ea907c4fe7b78e5840c1dc07800eae93248cad1 upstream.
If changing the page tables in such a way that an invalidation of all
contexts (aka. PCIDs / ASIDs) is required, they can be actively invalidated
by:
1. INVPCID for each PCID (works for single pages too).
2. Load CR3 with each PCID without the NOFLUSH bit set
3. Load CR3 with the NOFLUSH bit set for each and do INVLPG for each address.
But, none of these are really feasible since there are ~6 ASIDs (12 with
PAGE_TABLE_ISOLATION) at the time that invalidation is required.
Instead of actively invalidating them, invalidate the *current* context and
also mark the cpu_tlbstate _quickly_ to indicate future invalidation to be
required.
At the next context-switch, look for this indicator
('invalidate_other' being set) invalidate all of the
cpu_tlbstate.ctxs[] entries.
This ensures that any future context switches will do a full flush
of the TLB, picking up the previous changes.
[ tglx: Folded more fixups from Peter ]
Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Andy Lutomirski <luto(a)kernel.org>
Cc: Boris Ostrovsky <boris.ostrovsky(a)oracle.com>
Cc: Borislav Petkov <bp(a)alien8.de>
Cc: Brian Gerst <brgerst(a)gmail.com>
Cc: David Laight <David.Laight(a)aculab.com>
Cc: Denys Vlasenko <dvlasenk(a)redhat.com>
Cc: Eduardo Valentin <eduval(a)amazon.com>
Cc: Greg KH <gregkh(a)linuxfoundation.org>
Cc: H. Peter Anvin <hpa(a)zytor.com>
Cc: Josh Poimboeuf <jpoimboe(a)redhat.com>
Cc: Juergen Gross <jgross(a)suse.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Will Deacon <will.deacon(a)arm.com>
Cc: aliguori(a)amazon.com
Cc: daniel.gruss(a)iaik.tugraz.at
Cc: hughd(a)google.com
Cc: keescook(a)google.com
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/include/asm/tlbflush.h | 37 +++++++++++++++++++++++++++++--------
arch/x86/mm/tlb.c | 35 +++++++++++++++++++++++++++++++++++
2 files changed, 64 insertions(+), 8 deletions(-)
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -135,6 +135,17 @@ struct tlb_state {
bool is_lazy;
/*
+ * If set we changed the page tables in such a way that we
+ * needed an invalidation of all contexts (aka. PCIDs / ASIDs).
+ * This tells us to go invalidate all the non-loaded ctxs[]
+ * on the next context switch.
+ *
+ * The current ctx was kept up-to-date as it ran and does not
+ * need to be invalidated.
+ */
+ bool invalidate_other;
+
+ /*
* Access to this CR4 shadow and to H/W CR4 is protected by
* disabling interrupts when modifying either one.
*/
@@ -212,6 +223,14 @@ static inline unsigned long cr4_read_sha
}
/*
+ * Mark all other ASIDs as invalid, preserves the current.
+ */
+static inline void invalidate_other_asid(void)
+{
+ this_cpu_write(cpu_tlbstate.invalidate_other, true);
+}
+
+/*
* Save some of cr4 feature set we're using (e.g. Pentium 4MB
* enable and PPro Global page enable), so that any CPU's that boot
* up after us can get the correct flags. This should only be used
@@ -298,14 +317,6 @@ static inline void __flush_tlb_all(void)
*/
__flush_tlb();
}
-
- /*
- * Note: if we somehow had PCID but not PGE, then this wouldn't work --
- * we'd end up flushing kernel translations for the current ASID but
- * we might fail to flush kernel translations for other cached ASIDs.
- *
- * To avoid this issue, we force PCID off if PGE is off.
- */
}
/*
@@ -315,6 +326,16 @@ static inline void __flush_tlb_one(unsig
{
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
__flush_tlb_single(addr);
+
+ if (!static_cpu_has(X86_FEATURE_PTI))
+ return;
+
+ /*
+ * __flush_tlb_single() will have cleared the TLB entry for this ASID,
+ * but since kernel space is replicated across all, we must also
+ * invalidate all others.
+ */
+ invalidate_other_asid();
}
#define TLB_FLUSH_ALL -1UL
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -28,6 +28,38 @@
* Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
*/
+/*
+ * We get here when we do something requiring a TLB invalidation
+ * but could not go invalidate all of the contexts. We do the
+ * necessary invalidation by clearing out the 'ctx_id' which
+ * forces a TLB flush when the context is loaded.
+ */
+void clear_asid_other(void)
+{
+ u16 asid;
+
+ /*
+ * This is only expected to be set if we have disabled
+ * kernel _PAGE_GLOBAL pages.
+ */
+ if (!static_cpu_has(X86_FEATURE_PTI)) {
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
+ /* Do not need to flush the current asid */
+ if (asid == this_cpu_read(cpu_tlbstate.loaded_mm_asid))
+ continue;
+ /*
+ * Make sure the next time we go to switch to
+ * this asid, we do a flush:
+ */
+ this_cpu_write(cpu_tlbstate.ctxs[asid].ctx_id, 0);
+ }
+ this_cpu_write(cpu_tlbstate.invalidate_other, false);
+}
+
atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
@@ -42,6 +74,9 @@ static void choose_new_asid(struct mm_st
return;
}
+ if (this_cpu_read(cpu_tlbstate.invalidate_other))
+ clear_asid_other();
+
for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) !=
next->context.ctx_id)
Patches currently in stable-queue which might be from dave.hansen(a)linux.intel.com are
queue-4.14/x86-mm-pti-allow-nx-poison-to-be-set-in-p4d-pgd.patch
queue-4.14/x86-dumpstack-indicate-in-oops-whether-pti-is-configured-and-enabled.patch
queue-4.14/x86-mm-clarify-the-whole-asid-kernel-pcid-user-pcid-naming.patch
queue-4.14/x86-mm-abstract-switching-cr3.patch
queue-4.14/x86-mm-optimize-restore_cr3.patch
queue-4.14/x86-mm-pti-force-entry-through-trampoline-when-pti-active.patch
queue-4.14/x86-entry-align-entry-text-section-to-pmd-boundary.patch
queue-4.14/x86-mm-64-make-a-full-pgd-entry-size-hole-in-the-memory-map.patch
queue-4.14/x86-cpu_entry_area-add-debugstore-entries-to-cpu_entry_area.patch
queue-4.14/x86-mm-pti-share-entry-text-pmd.patch
queue-4.14/x86-ldt-make-the-ldt-mapping-ro.patch
queue-4.14/x86-mm-dump_pagetables-check-user-space-page-table-for-wx-pages.patch
queue-4.14/x86-pti-map-the-vsyscall-page-if-needed.patch
queue-4.14/x86-mm-pti-disable-global-pages-if-page_table_isolation-y.patch
queue-4.14/x86-mm-dump_pagetables-allow-dumping-current-pagetables.patch
queue-4.14/x86-mm-pti-add-infrastructure-for-page-table-isolation.patch
queue-4.14/x86-cpufeatures-add-x86_bug_cpu_insecure.patch
queue-4.14/x86-mm-use-fix-pcid-to-optimize-user-kernel-switches.patch
queue-4.14/x86-mm-use-invpcid-for-__native_flush_tlb_single.patch
queue-4.14/x86-mm-pti-prepare-the-x86-entry-assembly-code-for-entry-exit-cr3-switching.patch
queue-4.14/x86-mm-allow-flushing-for-future-asid-switches.patch
queue-4.14/x86-mm-pti-add-kconfig.patch
queue-4.14/x86-mm-pti-add-mapping-helper-functions.patch
queue-4.14/x86-mm-pti-map-espfix-into-user-space.patch
queue-4.14/x86-mm-pti-share-cpu_entry_area-with-user-space-page-tables.patch
queue-4.14/x86-mm-pti-add-functions-to-clone-kernel-pmds.patch
queue-4.14/x86-pti-add-the-pti-cmdline-option-and-documentation.patch
queue-4.14/x86-events-intel-ds-map-debug-buffers-in-cpu_entry_area.patch
queue-4.14/x86-mm-pti-allocate-a-separate-user-pgd.patch
queue-4.14/x86-pti-put-the-ldt-in-its-own-pgd-if-pti-is-on.patch
queue-4.14/x86-mm-pti-populate-user-pgd.patch
queue-4.14/x86-mm-dump_pagetables-add-page-table-directory-to-the-debugfs-vfs-hierarchy.patch
This is a note to let you know that I've just added the patch titled
x86/mm: Abstract switching CR3
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-mm-abstract-switching-cr3.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 48e111982cda033fec832c6b0592c2acedd85d04 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen(a)linux.intel.com>
Date: Mon, 4 Dec 2017 15:07:58 +0100
Subject: x86/mm: Abstract switching CR3
From: Dave Hansen <dave.hansen(a)linux.intel.com>
commit 48e111982cda033fec832c6b0592c2acedd85d04 upstream.
In preparation to adding additional PCID flushing, abstract the
loading of a new ASID into CR3.
[ PeterZ: Split out from big combo patch ]
Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Andy Lutomirski <luto(a)kernel.org>
Cc: Boris Ostrovsky <boris.ostrovsky(a)oracle.com>
Cc: Borislav Petkov <bp(a)alien8.de>
Cc: Brian Gerst <brgerst(a)gmail.com>
Cc: David Laight <David.Laight(a)aculab.com>
Cc: Denys Vlasenko <dvlasenk(a)redhat.com>
Cc: Eduardo Valentin <eduval(a)amazon.com>
Cc: Greg KH <gregkh(a)linuxfoundation.org>
Cc: H. Peter Anvin <hpa(a)zytor.com>
Cc: Josh Poimboeuf <jpoimboe(a)redhat.com>
Cc: Juergen Gross <jgross(a)suse.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Will Deacon <will.deacon(a)arm.com>
Cc: aliguori(a)amazon.com
Cc: daniel.gruss(a)iaik.tugraz.at
Cc: hughd(a)google.com
Cc: keescook(a)google.com
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/mm/tlb.c | 22 ++++++++++++++++++++--
1 file changed, 20 insertions(+), 2 deletions(-)
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -100,6 +100,24 @@ static void choose_new_asid(struct mm_st
*need_flush = true;
}
+static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
+{
+ unsigned long new_mm_cr3;
+
+ if (need_flush) {
+ new_mm_cr3 = build_cr3(pgdir, new_asid);
+ } else {
+ new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
+ }
+
+ /*
+ * Caution: many callers of this function expect
+ * that load_cr3() is serializing and orders TLB
+ * fills with respect to the mm_cpumask writes.
+ */
+ write_cr3(new_mm_cr3);
+}
+
void leave_mm(int cpu)
{
struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
@@ -230,7 +248,7 @@ void switch_mm_irqs_off(struct mm_struct
if (need_flush) {
this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
- write_cr3(build_cr3(next->pgd, new_asid));
+ load_new_mm_cr3(next->pgd, new_asid, true);
/*
* NB: This gets called via leave_mm() in the idle path
@@ -243,7 +261,7 @@ void switch_mm_irqs_off(struct mm_struct
trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
} else {
/* The new ASID is already up to date. */
- write_cr3(build_cr3_noflush(next->pgd, new_asid));
+ load_new_mm_cr3(next->pgd, new_asid, false);
/* See above wrt _rcuidle. */
trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
Patches currently in stable-queue which might be from dave.hansen(a)linux.intel.com are
queue-4.14/x86-mm-pti-allow-nx-poison-to-be-set-in-p4d-pgd.patch
queue-4.14/x86-dumpstack-indicate-in-oops-whether-pti-is-configured-and-enabled.patch
queue-4.14/x86-mm-clarify-the-whole-asid-kernel-pcid-user-pcid-naming.patch
queue-4.14/x86-mm-abstract-switching-cr3.patch
queue-4.14/x86-mm-optimize-restore_cr3.patch
queue-4.14/x86-mm-pti-force-entry-through-trampoline-when-pti-active.patch
queue-4.14/x86-entry-align-entry-text-section-to-pmd-boundary.patch
queue-4.14/x86-mm-64-make-a-full-pgd-entry-size-hole-in-the-memory-map.patch
queue-4.14/x86-cpu_entry_area-add-debugstore-entries-to-cpu_entry_area.patch
queue-4.14/x86-mm-pti-share-entry-text-pmd.patch
queue-4.14/x86-ldt-make-the-ldt-mapping-ro.patch
queue-4.14/x86-mm-dump_pagetables-check-user-space-page-table-for-wx-pages.patch
queue-4.14/x86-pti-map-the-vsyscall-page-if-needed.patch
queue-4.14/x86-mm-pti-disable-global-pages-if-page_table_isolation-y.patch
queue-4.14/x86-mm-dump_pagetables-allow-dumping-current-pagetables.patch
queue-4.14/x86-mm-pti-add-infrastructure-for-page-table-isolation.patch
queue-4.14/x86-cpufeatures-add-x86_bug_cpu_insecure.patch
queue-4.14/x86-mm-use-fix-pcid-to-optimize-user-kernel-switches.patch
queue-4.14/x86-mm-use-invpcid-for-__native_flush_tlb_single.patch
queue-4.14/x86-mm-pti-prepare-the-x86-entry-assembly-code-for-entry-exit-cr3-switching.patch
queue-4.14/x86-mm-allow-flushing-for-future-asid-switches.patch
queue-4.14/x86-mm-pti-add-kconfig.patch
queue-4.14/x86-mm-pti-add-mapping-helper-functions.patch
queue-4.14/x86-mm-pti-map-espfix-into-user-space.patch
queue-4.14/x86-mm-pti-share-cpu_entry_area-with-user-space-page-tables.patch
queue-4.14/x86-mm-pti-add-functions-to-clone-kernel-pmds.patch
queue-4.14/x86-pti-add-the-pti-cmdline-option-and-documentation.patch
queue-4.14/x86-events-intel-ds-map-debug-buffers-in-cpu_entry_area.patch
queue-4.14/x86-mm-pti-allocate-a-separate-user-pgd.patch
queue-4.14/x86-pti-put-the-ldt-in-its-own-pgd-if-pti-is-on.patch
queue-4.14/x86-mm-pti-populate-user-pgd.patch
queue-4.14/x86-mm-dump_pagetables-add-page-table-directory-to-the-debugfs-vfs-hierarchy.patch
This is a note to let you know that I've just added the patch titled
x86/mm/64: Make a full PGD-entry size hole in the memory map
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-mm-64-make-a-full-pgd-entry-size-hole-in-the-memory-map.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 9f449772a3106bcdd4eb8fdeb281147b0e99fb30 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto(a)kernel.org>
Date: Tue, 12 Dec 2017 07:56:44 -0800
Subject: x86/mm/64: Make a full PGD-entry size hole in the memory map
From: Andy Lutomirski <luto(a)kernel.org>
commit 9f449772a3106bcdd4eb8fdeb281147b0e99fb30 upstream.
Shrink vmalloc space from 16384TiB to 12800TiB to enlarge the hole starting
at 0xff90000000000000 to be a full PGD entry.
A subsequent patch will use this hole for the pagetable isolation LDT
alias.
Signed-off-by: Andy Lutomirski <luto(a)kernel.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Borislav Petkov <bp(a)alien8.de>
Cc: Brian Gerst <brgerst(a)gmail.com>
Cc: Dave Hansen <dave.hansen(a)intel.com>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: David Laight <David.Laight(a)aculab.com>
Cc: H. Peter Anvin <hpa(a)zytor.com>
Cc: Josh Poimboeuf <jpoimboe(a)redhat.com>
Cc: Juergen Gross <jgross(a)suse.com>
Cc: Kees Cook <keescook(a)chromium.org>
Cc: Kirill A. Shutemov <kirill(a)shutemov.name>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
Documentation/x86/x86_64/mm.txt | 4 ++--
arch/x86/include/asm/pgtable_64_types.h | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -29,8 +29,8 @@ Virtual memory map with 5 level page tab
hole caused by [56:63] sign extension
ff00000000000000 - ff0fffffffffffff (=52 bits) guard hole, reserved for hypervisor
ff10000000000000 - ff8fffffffffffff (=55 bits) direct mapping of all phys. memory
-ff90000000000000 - ff91ffffffffffff (=49 bits) hole
-ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space
+ff90000000000000 - ff9fffffffffffff (=52 bits) hole
+ffa0000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space (12800 TB)
ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole
ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
... unused hole ...
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -79,8 +79,8 @@ typedef struct { pteval_t pte; } pte_t;
#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
#ifdef CONFIG_X86_5LEVEL
-# define VMALLOC_SIZE_TB _AC(16384, UL)
-# define __VMALLOC_BASE _AC(0xff92000000000000, UL)
+# define VMALLOC_SIZE_TB _AC(12800, UL)
+# define __VMALLOC_BASE _AC(0xffa0000000000000, UL)
# define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
#else
# define VMALLOC_SIZE_TB _AC(32, UL)
Patches currently in stable-queue which might be from luto(a)kernel.org are
queue-4.14/x86-mm-pti-allow-nx-poison-to-be-set-in-p4d-pgd.patch
queue-4.14/x86-dumpstack-indicate-in-oops-whether-pti-is-configured-and-enabled.patch
queue-4.14/x86-mm-clarify-the-whole-asid-kernel-pcid-user-pcid-naming.patch
queue-4.14/x86-mm-abstract-switching-cr3.patch
queue-4.14/x86-mm-optimize-restore_cr3.patch
queue-4.14/x86-mm-pti-force-entry-through-trampoline-when-pti-active.patch
queue-4.14/x86-entry-align-entry-text-section-to-pmd-boundary.patch
queue-4.14/x86-mm-64-make-a-full-pgd-entry-size-hole-in-the-memory-map.patch
queue-4.14/x86-cpu_entry_area-add-debugstore-entries-to-cpu_entry_area.patch
queue-4.14/x86-mm-pti-share-entry-text-pmd.patch
queue-4.14/x86-ldt-make-the-ldt-mapping-ro.patch
queue-4.14/x86-mm-dump_pagetables-check-user-space-page-table-for-wx-pages.patch
queue-4.14/x86-pti-map-the-vsyscall-page-if-needed.patch
queue-4.14/x86-mm-pti-disable-global-pages-if-page_table_isolation-y.patch
queue-4.14/x86-mm-dump_pagetables-allow-dumping-current-pagetables.patch
queue-4.14/x86-mm-pti-add-infrastructure-for-page-table-isolation.patch
queue-4.14/x86-cpufeatures-add-x86_bug_cpu_insecure.patch
queue-4.14/x86-mm-use-fix-pcid-to-optimize-user-kernel-switches.patch
queue-4.14/x86-mm-use-invpcid-for-__native_flush_tlb_single.patch
queue-4.14/x86-mm-pti-prepare-the-x86-entry-assembly-code-for-entry-exit-cr3-switching.patch
queue-4.14/x86-mm-allow-flushing-for-future-asid-switches.patch
queue-4.14/x86-mm-pti-add-kconfig.patch
queue-4.14/x86-mm-pti-add-mapping-helper-functions.patch
queue-4.14/x86-mm-pti-map-espfix-into-user-space.patch
queue-4.14/x86-mm-pti-share-cpu_entry_area-with-user-space-page-tables.patch
queue-4.14/x86-mm-pti-add-functions-to-clone-kernel-pmds.patch
queue-4.14/x86-pti-add-the-pti-cmdline-option-and-documentation.patch
queue-4.14/x86-events-intel-ds-map-debug-buffers-in-cpu_entry_area.patch
queue-4.14/x86-mm-pti-allocate-a-separate-user-pgd.patch
queue-4.14/x86-pti-put-the-ldt-in-its-own-pgd-if-pti-is-on.patch
queue-4.14/x86-mm-pti-populate-user-pgd.patch
queue-4.14/x86-mm-dump_pagetables-add-page-table-directory-to-the-debugfs-vfs-hierarchy.patch
This is a note to let you know that I've just added the patch titled
x86/ldt: Make the LDT mapping RO
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-ldt-make-the-ldt-mapping-ro.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 9f5cb6b32d9e0a3a7453222baaf15664d92adbf2 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx(a)linutronix.de>
Date: Fri, 15 Dec 2017 20:35:11 +0100
Subject: x86/ldt: Make the LDT mapping RO
From: Thomas Gleixner <tglx(a)linutronix.de>
commit 9f5cb6b32d9e0a3a7453222baaf15664d92adbf2 upstream.
Now that the LDT mapping is in a known area when PAGE_TABLE_ISOLATION is
enabled its a primary target for attacks, if a user space interface fails
to validate a write address correctly. That can never happen, right?
The SDM states:
If the segment descriptors in the GDT or an LDT are placed in ROM, the
processor can enter an indefinite loop if software or the processor
attempts to update (write to) the ROM-based segment descriptors. To
prevent this problem, set the accessed bits for all segment descriptors
placed in a ROM. Also, remove operating-system or executive code that
attempts to modify segment descriptors located in ROM.
So its a valid approach to set the ACCESS bit when setting up the LDT entry
and to map the table RO. Fixup the selftest so it can handle that new mode.
Remove the manual ACCESS bit setter in set_tls_desc() as this is now
pointless. Folded the patch from Peter Ziljstra.
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Andy Lutomirski <luto(a)kernel.org>
Cc: Borislav Petkov <bp(a)alien8.de>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: H. Peter Anvin <hpa(a)zytor.com>
Cc: Josh Poimboeuf <jpoimboe(a)redhat.com>
Cc: Juergen Gross <jgross(a)suse.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/include/asm/desc.h | 2 ++
arch/x86/kernel/ldt.c | 7 ++++++-
arch/x86/kernel/tls.c | 11 ++---------
tools/testing/selftests/x86/ldt_gdt.c | 3 +--
4 files changed, 11 insertions(+), 12 deletions(-)
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -21,6 +21,8 @@ static inline void fill_ldt(struct desc_
desc->type = (info->read_exec_only ^ 1) << 1;
desc->type |= info->contents << 2;
+ /* Set the ACCESS bit so it can be mapped RO */
+ desc->type |= 1;
desc->s = 1;
desc->dpl = 0x3;
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -158,7 +158,12 @@ map_ldt_struct(struct mm_struct *mm, str
ptep = get_locked_pte(mm, va, &ptl);
if (!ptep)
return -ENOMEM;
- pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL & ~_PAGE_GLOBAL));
+ /*
+ * Map it RO so the easy to find address is not a primary
+ * target via some kernel interface which misses a
+ * permission check.
+ */
+ pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL));
set_pte_at(mm, va, ptep, pte);
pte_unmap_unlock(ptep, ptl);
}
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -93,17 +93,10 @@ static void set_tls_desc(struct task_str
cpu = get_cpu();
while (n-- > 0) {
- if (LDT_empty(info) || LDT_zero(info)) {
+ if (LDT_empty(info) || LDT_zero(info))
memset(desc, 0, sizeof(*desc));
- } else {
+ else
fill_ldt(desc, info);
-
- /*
- * Always set the accessed bit so that the CPU
- * doesn't try to write to the (read-only) GDT.
- */
- desc->type |= 1;
- }
++info;
++desc;
}
--- a/tools/testing/selftests/x86/ldt_gdt.c
+++ b/tools/testing/selftests/x86/ldt_gdt.c
@@ -122,8 +122,7 @@ static void check_valid_segment(uint16_t
* NB: Different Linux versions do different things with the
* accessed bit in set_thread_area().
*/
- if (ar != expected_ar &&
- (ldt || ar != (expected_ar | AR_ACCESSED))) {
+ if (ar != expected_ar && ar != (expected_ar | AR_ACCESSED)) {
printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n",
(ldt ? "LDT" : "GDT"), index, ar, expected_ar);
nerrs++;
Patches currently in stable-queue which might be from tglx(a)linutronix.de are
queue-4.14/x86-mm-pti-allow-nx-poison-to-be-set-in-p4d-pgd.patch
queue-4.14/x86-dumpstack-indicate-in-oops-whether-pti-is-configured-and-enabled.patch
queue-4.14/x86-mm-clarify-the-whole-asid-kernel-pcid-user-pcid-naming.patch
queue-4.14/x86-mm-abstract-switching-cr3.patch
queue-4.14/x86-mm-optimize-restore_cr3.patch
queue-4.14/x86-mm-pti-force-entry-through-trampoline-when-pti-active.patch
queue-4.14/x86-entry-align-entry-text-section-to-pmd-boundary.patch
queue-4.14/x86-mm-64-make-a-full-pgd-entry-size-hole-in-the-memory-map.patch
queue-4.14/x86-cpu_entry_area-add-debugstore-entries-to-cpu_entry_area.patch
queue-4.14/x86-mm-pti-share-entry-text-pmd.patch
queue-4.14/x86-ldt-make-the-ldt-mapping-ro.patch
queue-4.14/x86-mm-dump_pagetables-check-user-space-page-table-for-wx-pages.patch
queue-4.14/x86-pti-map-the-vsyscall-page-if-needed.patch
queue-4.14/x86-mm-pti-disable-global-pages-if-page_table_isolation-y.patch
queue-4.14/x86-mm-dump_pagetables-allow-dumping-current-pagetables.patch
queue-4.14/x86-mm-pti-add-infrastructure-for-page-table-isolation.patch
queue-4.14/x86-cpufeatures-add-x86_bug_cpu_insecure.patch
queue-4.14/x86-mm-use-fix-pcid-to-optimize-user-kernel-switches.patch
queue-4.14/x86-mm-use-invpcid-for-__native_flush_tlb_single.patch
queue-4.14/x86-mm-pti-prepare-the-x86-entry-assembly-code-for-entry-exit-cr3-switching.patch
queue-4.14/x86-mm-allow-flushing-for-future-asid-switches.patch
queue-4.14/x86-mm-pti-add-kconfig.patch
queue-4.14/x86-mm-pti-add-mapping-helper-functions.patch
queue-4.14/x86-mm-pti-map-espfix-into-user-space.patch
queue-4.14/x86-mm-pti-share-cpu_entry_area-with-user-space-page-tables.patch
queue-4.14/x86-mm-pti-add-functions-to-clone-kernel-pmds.patch
queue-4.14/x86-pti-add-the-pti-cmdline-option-and-documentation.patch
queue-4.14/x86-events-intel-ds-map-debug-buffers-in-cpu_entry_area.patch
queue-4.14/x86-mm-pti-allocate-a-separate-user-pgd.patch
queue-4.14/x86-pti-put-the-ldt-in-its-own-pgd-if-pti-is-on.patch
queue-4.14/x86-mm-pti-populate-user-pgd.patch
queue-4.14/x86-mm-dump_pagetables-add-page-table-directory-to-the-debugfs-vfs-hierarchy.patch
This is a note to let you know that I've just added the patch titled
x86/entry: Align entry text section to PMD boundary
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-entry-align-entry-text-section-to-pmd-boundary.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 2f7412ba9c6af5ab16bdbb4a3fdb1dcd2b4fd3c2 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx(a)linutronix.de>
Date: Mon, 4 Dec 2017 15:07:46 +0100
Subject: x86/entry: Align entry text section to PMD boundary
From: Thomas Gleixner <tglx(a)linutronix.de>
commit 2f7412ba9c6af5ab16bdbb4a3fdb1dcd2b4fd3c2 upstream.
The (irq)entry text must be visible in the user space page tables. To allow
simple PMD based sharing, make the entry text PMD aligned.
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Andy Lutomirski <luto(a)kernel.org>
Cc: Boris Ostrovsky <boris.ostrovsky(a)oracle.com>
Cc: Borislav Petkov <bp(a)alien8.de>
Cc: Brian Gerst <brgerst(a)gmail.com>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: David Laight <David.Laight(a)aculab.com>
Cc: Denys Vlasenko <dvlasenk(a)redhat.com>
Cc: Eduardo Valentin <eduval(a)amazon.com>
Cc: Greg KH <gregkh(a)linuxfoundation.org>
Cc: H. Peter Anvin <hpa(a)zytor.com>
Cc: Josh Poimboeuf <jpoimboe(a)redhat.com>
Cc: Juergen Gross <jgross(a)suse.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Will Deacon <will.deacon(a)arm.com>
Cc: aliguori(a)amazon.com
Cc: daniel.gruss(a)iaik.tugraz.at
Cc: hughd(a)google.com
Cc: keescook(a)google.com
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/kernel/vmlinux.lds.S | 8 ++++++++
1 file changed, 8 insertions(+)
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -61,11 +61,17 @@ jiffies_64 = jiffies;
. = ALIGN(HPAGE_SIZE); \
__end_rodata_hpage_align = .;
+#define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE);
+#define ALIGN_ENTRY_TEXT_END . = ALIGN(PMD_SIZE);
+
#else
#define X64_ALIGN_RODATA_BEGIN
#define X64_ALIGN_RODATA_END
+#define ALIGN_ENTRY_TEXT_BEGIN
+#define ALIGN_ENTRY_TEXT_END
+
#endif
PHDRS {
@@ -102,8 +108,10 @@ SECTIONS
CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
+ ALIGN_ENTRY_TEXT_BEGIN
ENTRY_TEXT
IRQENTRY_TEXT
+ ALIGN_ENTRY_TEXT_END
SOFTIRQENTRY_TEXT
*(.fixup)
*(.gnu.warning)
Patches currently in stable-queue which might be from tglx(a)linutronix.de are
queue-4.14/x86-mm-pti-allow-nx-poison-to-be-set-in-p4d-pgd.patch
queue-4.14/x86-dumpstack-indicate-in-oops-whether-pti-is-configured-and-enabled.patch
queue-4.14/x86-mm-clarify-the-whole-asid-kernel-pcid-user-pcid-naming.patch
queue-4.14/x86-mm-abstract-switching-cr3.patch
queue-4.14/x86-mm-optimize-restore_cr3.patch
queue-4.14/x86-mm-pti-force-entry-through-trampoline-when-pti-active.patch
queue-4.14/x86-entry-align-entry-text-section-to-pmd-boundary.patch
queue-4.14/x86-mm-64-make-a-full-pgd-entry-size-hole-in-the-memory-map.patch
queue-4.14/x86-cpu_entry_area-add-debugstore-entries-to-cpu_entry_area.patch
queue-4.14/x86-mm-pti-share-entry-text-pmd.patch
queue-4.14/x86-ldt-make-the-ldt-mapping-ro.patch
queue-4.14/x86-mm-dump_pagetables-check-user-space-page-table-for-wx-pages.patch
queue-4.14/x86-pti-map-the-vsyscall-page-if-needed.patch
queue-4.14/x86-mm-pti-disable-global-pages-if-page_table_isolation-y.patch
queue-4.14/x86-mm-dump_pagetables-allow-dumping-current-pagetables.patch
queue-4.14/x86-mm-pti-add-infrastructure-for-page-table-isolation.patch
queue-4.14/x86-cpufeatures-add-x86_bug_cpu_insecure.patch
queue-4.14/x86-mm-use-fix-pcid-to-optimize-user-kernel-switches.patch
queue-4.14/x86-mm-use-invpcid-for-__native_flush_tlb_single.patch
queue-4.14/x86-mm-pti-prepare-the-x86-entry-assembly-code-for-entry-exit-cr3-switching.patch
queue-4.14/x86-mm-allow-flushing-for-future-asid-switches.patch
queue-4.14/x86-mm-pti-add-kconfig.patch
queue-4.14/x86-mm-pti-add-mapping-helper-functions.patch
queue-4.14/x86-mm-pti-map-espfix-into-user-space.patch
queue-4.14/x86-mm-pti-share-cpu_entry_area-with-user-space-page-tables.patch
queue-4.14/x86-mm-pti-add-functions-to-clone-kernel-pmds.patch
queue-4.14/x86-pti-add-the-pti-cmdline-option-and-documentation.patch
queue-4.14/x86-events-intel-ds-map-debug-buffers-in-cpu_entry_area.patch
queue-4.14/x86-mm-pti-allocate-a-separate-user-pgd.patch
queue-4.14/x86-pti-put-the-ldt-in-its-own-pgd-if-pti-is-on.patch
queue-4.14/x86-mm-pti-populate-user-pgd.patch
queue-4.14/x86-mm-dump_pagetables-add-page-table-directory-to-the-debugfs-vfs-hierarchy.patch
This is a note to let you know that I've just added the patch titled
x86/dumpstack: Indicate in Oops whether PTI is configured and enabled
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-dumpstack-indicate-in-oops-whether-pti-is-configured-and-enabled.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 5f26d76c3fd67c48806415ef8b1116c97beff8ba Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka(a)suse.cz>
Date: Tue, 19 Dec 2017 22:33:46 +0100
Subject: x86/dumpstack: Indicate in Oops whether PTI is configured and enabled
From: Vlastimil Babka <vbabka(a)suse.cz>
commit 5f26d76c3fd67c48806415ef8b1116c97beff8ba upstream.
CONFIG_PAGE_TABLE_ISOLATION is relatively new and intrusive feature that may
still have some corner cases which could take some time to manifest and be
fixed. It would be useful to have Oops messages indicate whether it was
enabled for building the kernel, and whether it was disabled during boot.
Example of fully enabled:
Oops: 0001 [#1] SMP PTI
Example of enabled during build, but disabled during boot:
Oops: 0001 [#1] SMP NOPTI
We can decide to remove this after the feature has been tested in the field
long enough.
[ tglx: Made it use boot_cpu_has() as requested by Borislav ]
Signed-off-by: Vlastimil Babka <vbabka(a)suse.cz>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Eduardo Valentin <eduval(a)amazon.com>
Acked-by: Dave Hansen <dave.hansen(a)intel.com>
Cc: Andy Lutomirski <luto(a)kernel.org>
Cc: Andy Lutomirsky <luto(a)kernel.org>
Cc: Boris Ostrovsky <boris.ostrovsky(a)oracle.com>
Cc: Borislav Petkov <bp(a)alien8.de>
Cc: Brian Gerst <brgerst(a)gmail.com>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: David Laight <David.Laight(a)aculab.com>
Cc: Denys Vlasenko <dvlasenk(a)redhat.com>
Cc: Greg KH <gregkh(a)linuxfoundation.org>
Cc: H. Peter Anvin <hpa(a)zytor.com>
Cc: Josh Poimboeuf <jpoimboe(a)redhat.com>
Cc: Juergen Gross <jgross(a)suse.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Will Deacon <will.deacon(a)arm.com>
Cc: aliguori(a)amazon.com
Cc: bpetkov(a)suse.de
Cc: daniel.gruss(a)iaik.tugraz.at
Cc: hughd(a)google.com
Cc: jkosina(a)suse.cz
Cc: keescook(a)google.com
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/kernel/dumpstack.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -297,11 +297,13 @@ int __die(const char *str, struct pt_reg
unsigned long sp;
#endif
printk(KERN_DEFAULT
- "%s: %04lx [#%d]%s%s%s%s\n", str, err & 0xffff, ++die_counter,
+ "%s: %04lx [#%d]%s%s%s%s%s\n", str, err & 0xffff, ++die_counter,
IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
- IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "");
+ IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "",
+ IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ?
+ (boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : "");
if (notify_die(DIE_OOPS, str, regs, err,
current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
Patches currently in stable-queue which might be from vbabka(a)suse.cz are
queue-4.14/x86-dumpstack-indicate-in-oops-whether-pti-is-configured-and-enabled.patch
This is a note to let you know that I've just added the patch titled
x86/cpu_entry_area: Add debugstore entries to cpu_entry_area
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-cpu_entry_area-add-debugstore-entries-to-cpu_entry_area.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 10043e02db7f8a4161f76434931051e7d797a5f6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx(a)linutronix.de>
Date: Mon, 4 Dec 2017 15:07:49 +0100
Subject: x86/cpu_entry_area: Add debugstore entries to cpu_entry_area
From: Thomas Gleixner <tglx(a)linutronix.de>
commit 10043e02db7f8a4161f76434931051e7d797a5f6 upstream.
The Intel PEBS/BTS debug store is a design trainwreck as it expects virtual
addresses which must be visible in any execution context.
So it is required to make these mappings visible to user space when kernel
page table isolation is active.
Provide enough room for the buffer mappings in the cpu_entry_area so the
buffers are available in the user space visible page tables.
At the point where the kernel side entry area is populated there is no
buffer available yet, but the kernel PMD must be populated. To achieve this
set the entries for these buffers to non present.
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Andy Lutomirski <luto(a)kernel.org>
Cc: Boris Ostrovsky <boris.ostrovsky(a)oracle.com>
Cc: Borislav Petkov <bp(a)alien8.de>
Cc: Brian Gerst <brgerst(a)gmail.com>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: David Laight <David.Laight(a)aculab.com>
Cc: Denys Vlasenko <dvlasenk(a)redhat.com>
Cc: Eduardo Valentin <eduval(a)amazon.com>
Cc: Greg KH <gregkh(a)linuxfoundation.org>
Cc: H. Peter Anvin <hpa(a)zytor.com>
Cc: Josh Poimboeuf <jpoimboe(a)redhat.com>
Cc: Juergen Gross <jgross(a)suse.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Will Deacon <will.deacon(a)arm.com>
Cc: aliguori(a)amazon.com
Cc: daniel.gruss(a)iaik.tugraz.at
Cc: hughd(a)google.com
Cc: keescook(a)google.com
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/events/intel/ds.c | 5 ++--
arch/x86/events/perf_event.h | 21 +------------------
arch/x86/include/asm/cpu_entry_area.h | 13 ++++++++++++
arch/x86/include/asm/intel_ds.h | 36 ++++++++++++++++++++++++++++++++++
arch/x86/mm/cpu_entry_area.c | 27 +++++++++++++++++++++++++
5 files changed, 81 insertions(+), 21 deletions(-)
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -8,11 +8,12 @@
#include "../perf_event.h"
+/* Waste a full page so it can be mapped into the cpu_entry_area */
+DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
+
/* The size of a BTS record in bytes: */
#define BTS_RECORD_SIZE 24
-#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
-#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4)
#define PEBS_FIXUP_SIZE PAGE_SIZE
/*
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -14,6 +14,8 @@
#include <linux/perf_event.h>
+#include <asm/intel_ds.h>
+
/* To enable MSR tracing please use the generic trace points. */
/*
@@ -77,8 +79,6 @@ struct amd_nb {
struct event_constraint event_constraints[X86_PMC_IDX_MAX];
};
-/* The maximal number of PEBS events: */
-#define MAX_PEBS_EVENTS 8
#define PEBS_COUNTER_MASK ((1ULL << MAX_PEBS_EVENTS) - 1)
/*
@@ -95,23 +95,6 @@ struct amd_nb {
PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \
PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)
-/*
- * A debug store configuration.
- *
- * We only support architectures that use 64bit fields.
- */
-struct debug_store {
- u64 bts_buffer_base;
- u64 bts_index;
- u64 bts_absolute_maximum;
- u64 bts_interrupt_threshold;
- u64 pebs_buffer_base;
- u64 pebs_index;
- u64 pebs_absolute_maximum;
- u64 pebs_interrupt_threshold;
- u64 pebs_event_reset[MAX_PEBS_EVENTS];
-};
-
#define PEBS_REGS \
(PERF_REG_X86_AX | \
PERF_REG_X86_BX | \
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -5,6 +5,7 @@
#include <linux/percpu-defs.h>
#include <asm/processor.h>
+#include <asm/intel_ds.h>
/*
* cpu_entry_area is a percpu region that contains things needed by the CPU
@@ -40,6 +41,18 @@ struct cpu_entry_area {
*/
char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
#endif
+#ifdef CONFIG_CPU_SUP_INTEL
+ /*
+ * Per CPU debug store for Intel performance monitoring. Wastes a
+ * full page at the moment.
+ */
+ struct debug_store cpu_debug_store;
+ /*
+ * The actual PEBS/BTS buffers must be mapped to user space
+ * Reserve enough fixmap PTEs.
+ */
+ struct debug_store_buffers cpu_debug_buffers;
+#endif
};
#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area))
--- /dev/null
+++ b/arch/x86/include/asm/intel_ds.h
@@ -0,0 +1,36 @@
+#ifndef _ASM_INTEL_DS_H
+#define _ASM_INTEL_DS_H
+
+#include <linux/percpu-defs.h>
+
+#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
+#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4)
+
+/* The maximal number of PEBS events: */
+#define MAX_PEBS_EVENTS 8
+
+/*
+ * A debug store configuration.
+ *
+ * We only support architectures that use 64bit fields.
+ */
+struct debug_store {
+ u64 bts_buffer_base;
+ u64 bts_index;
+ u64 bts_absolute_maximum;
+ u64 bts_interrupt_threshold;
+ u64 pebs_buffer_base;
+ u64 pebs_index;
+ u64 pebs_absolute_maximum;
+ u64 pebs_interrupt_threshold;
+ u64 pebs_event_reset[MAX_PEBS_EVENTS];
+} __aligned(PAGE_SIZE);
+
+DECLARE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
+
+struct debug_store_buffers {
+ char bts_buffer[BTS_BUFFER_SIZE];
+ char pebs_buffer[PEBS_BUFFER_SIZE];
+};
+
+#endif
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -38,6 +38,32 @@ cea_map_percpu_pages(void *cea_vaddr, vo
cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
}
+static void percpu_setup_debug_store(int cpu)
+{
+#ifdef CONFIG_CPU_SUP_INTEL
+ int npages;
+ void *cea;
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return;
+
+ cea = &get_cpu_entry_area(cpu)->cpu_debug_store;
+ npages = sizeof(struct debug_store) / PAGE_SIZE;
+ BUILD_BUG_ON(sizeof(struct debug_store) % PAGE_SIZE != 0);
+ cea_map_percpu_pages(cea, &per_cpu(cpu_debug_store, cpu), npages,
+ PAGE_KERNEL);
+
+ cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers;
+ /*
+ * Force the population of PMDs for not yet allocated per cpu
+ * memory like debug store buffers.
+ */
+ npages = sizeof(struct debug_store_buffers) / PAGE_SIZE;
+ for (; npages; npages--, cea += PAGE_SIZE)
+ cea_set_pte(cea, 0, PAGE_NONE);
+#endif
+}
+
/* Setup the fixmap mappings only once per-processor */
static void __init setup_cpu_entry_area(int cpu)
{
@@ -109,6 +135,7 @@ static void __init setup_cpu_entry_area(
cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline,
__pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
#endif
+ percpu_setup_debug_store(cpu);
}
static __init void setup_cpu_entry_area_ptes(void)
Patches currently in stable-queue which might be from tglx(a)linutronix.de are
queue-4.14/x86-mm-pti-allow-nx-poison-to-be-set-in-p4d-pgd.patch
queue-4.14/x86-dumpstack-indicate-in-oops-whether-pti-is-configured-and-enabled.patch
queue-4.14/x86-mm-clarify-the-whole-asid-kernel-pcid-user-pcid-naming.patch
queue-4.14/x86-mm-abstract-switching-cr3.patch
queue-4.14/x86-mm-optimize-restore_cr3.patch
queue-4.14/x86-mm-pti-force-entry-through-trampoline-when-pti-active.patch
queue-4.14/x86-entry-align-entry-text-section-to-pmd-boundary.patch
queue-4.14/x86-mm-64-make-a-full-pgd-entry-size-hole-in-the-memory-map.patch
queue-4.14/x86-cpu_entry_area-add-debugstore-entries-to-cpu_entry_area.patch
queue-4.14/x86-mm-pti-share-entry-text-pmd.patch
queue-4.14/x86-ldt-make-the-ldt-mapping-ro.patch
queue-4.14/x86-mm-dump_pagetables-check-user-space-page-table-for-wx-pages.patch
queue-4.14/x86-pti-map-the-vsyscall-page-if-needed.patch
queue-4.14/x86-mm-pti-disable-global-pages-if-page_table_isolation-y.patch
queue-4.14/x86-mm-dump_pagetables-allow-dumping-current-pagetables.patch
queue-4.14/x86-mm-pti-add-infrastructure-for-page-table-isolation.patch
queue-4.14/x86-cpufeatures-add-x86_bug_cpu_insecure.patch
queue-4.14/x86-mm-use-fix-pcid-to-optimize-user-kernel-switches.patch
queue-4.14/x86-mm-use-invpcid-for-__native_flush_tlb_single.patch
queue-4.14/x86-mm-pti-prepare-the-x86-entry-assembly-code-for-entry-exit-cr3-switching.patch
queue-4.14/x86-mm-allow-flushing-for-future-asid-switches.patch
queue-4.14/x86-mm-pti-add-kconfig.patch
queue-4.14/x86-mm-pti-add-mapping-helper-functions.patch
queue-4.14/x86-mm-pti-map-espfix-into-user-space.patch
queue-4.14/x86-mm-pti-share-cpu_entry_area-with-user-space-page-tables.patch
queue-4.14/x86-mm-pti-add-functions-to-clone-kernel-pmds.patch
queue-4.14/x86-pti-add-the-pti-cmdline-option-and-documentation.patch
queue-4.14/x86-events-intel-ds-map-debug-buffers-in-cpu_entry_area.patch
queue-4.14/x86-mm-pti-allocate-a-separate-user-pgd.patch
queue-4.14/x86-pti-put-the-ldt-in-its-own-pgd-if-pti-is-on.patch
queue-4.14/x86-mm-pti-populate-user-pgd.patch
queue-4.14/x86-mm-dump_pagetables-add-page-table-directory-to-the-debugfs-vfs-hierarchy.patch
This is a note to let you know that I've just added the patch titled
x86/cpufeatures: Add X86_BUG_CPU_INSECURE
to the 4.14-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-cpufeatures-add-x86_bug_cpu_insecure.patch
and it can be found in the queue-4.14 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From a89f040fa34ec9cd682aed98b8f04e3c47d998bd Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx(a)linutronix.de>
Date: Mon, 4 Dec 2017 15:07:33 +0100
Subject: x86/cpufeatures: Add X86_BUG_CPU_INSECURE
From: Thomas Gleixner <tglx(a)linutronix.de>
commit a89f040fa34ec9cd682aed98b8f04e3c47d998bd upstream.
Many x86 CPUs leak information to user space due to missing isolation of
user space and kernel space page tables. There are many well documented
ways to exploit that.
The upcoming software migitation of isolating the user and kernel space
page tables needs a misfeature flag so code can be made runtime
conditional.
Add the BUG bits which indicates that the CPU is affected and add a feature
bit which indicates that the software migitation is enabled.
Assume for now that _ALL_ x86 CPUs are affected by this. Exceptions can be
made later.
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Andy Lutomirski <luto(a)kernel.org>
Cc: Boris Ostrovsky <boris.ostrovsky(a)oracle.com>
Cc: Borislav Petkov <bp(a)alien8.de>
Cc: Brian Gerst <brgerst(a)gmail.com>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: David Laight <David.Laight(a)aculab.com>
Cc: Denys Vlasenko <dvlasenk(a)redhat.com>
Cc: Eduardo Valentin <eduval(a)amazon.com>
Cc: Greg KH <gregkh(a)linuxfoundation.org>
Cc: H. Peter Anvin <hpa(a)zytor.com>
Cc: Josh Poimboeuf <jpoimboe(a)redhat.com>
Cc: Juergen Gross <jgross(a)suse.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Will Deacon <will.deacon(a)arm.com>
Cc: aliguori(a)amazon.com
Cc: daniel.gruss(a)iaik.tugraz.at
Cc: hughd(a)google.com
Cc: keescook(a)google.com
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/include/asm/cpufeatures.h | 3 ++-
arch/x86/include/asm/disabled-features.h | 8 +++++++-
arch/x86/kernel/cpu/common.c | 4 ++++
3 files changed, 13 insertions(+), 2 deletions(-)
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -201,7 +201,7 @@
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
-
+#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */
@@ -340,5 +340,6 @@
#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
+#define X86_BUG_CPU_INSECURE X86_BUG(14) /* CPU is insecure and needs kernel page table isolation */
#endif /* _ASM_X86_CPUFEATURES_H */
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -44,6 +44,12 @@
# define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31))
#endif
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+# define DISABLE_PTI 0
+#else
+# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
+#endif
+
/*
* Make sure to add features to the correct mask
*/
@@ -54,7 +60,7 @@
#define DISABLED_MASK4 (DISABLE_PCID)
#define DISABLED_MASK5 0
#define DISABLED_MASK6 0
-#define DISABLED_MASK7 0
+#define DISABLED_MASK7 (DISABLE_PTI)
#define DISABLED_MASK8 0
#define DISABLED_MASK9 (DISABLE_MPX)
#define DISABLED_MASK10 0
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -898,6 +898,10 @@ static void __init early_identify_cpu(st
}
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
+
+ /* Assume for now that ALL x86 CPUs are insecure */
+ setup_force_cpu_bug(X86_BUG_CPU_INSECURE);
+
fpu__init_system(c);
#ifdef CONFIG_X86_32
Patches currently in stable-queue which might be from tglx(a)linutronix.de are
queue-4.14/x86-cpufeatures-add-x86_bug_cpu_insecure.patch
Hello,
4.14.9 fails to boot if CONFIG_MCORE2 is enabled and when compiled with
gcc 6+. More details in the following bug reports:
https://bugzilla.kernel.org/show_bug.cgi?id=198263https://bugs.gentoo.org/642268
I bisected it to the commit below:
$ git bisect good
2bc9fa0beaf10206a778f02e9e5cb62f50345b1a is the first bad commit
commit 2bc9fa0beaf10206a778f02e9e5cb62f50345b1a
Author: Andy Lutomirski <luto(a)kernel.org>
Date: Mon Dec 4 15:07:23 2017 +0100
x86/entry/64: Use a per-CPU trampoline stack for IDT entries
commit 7f2590a110b837af5679d08fc25c6227c5a8c497 upstream.
Historically, IDT entries from usermode have always gone directly
to the running task's kernel stack. Rearrange it so that we enter
on
a per-CPU trampoline stack and then manually switch to the task's
stack.
This touches a couple of extra cachelines, but it gives us a chance
to run some code before we touch the kernel stack.
The asm isn't exactly beautiful, but I think that fully refactoring
it can wait.
Signed-off-by: Andy Lutomirski <luto(a)kernel.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Borislav Petkov <bp(a)suse.de>
Reviewed-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Boris Ostrovsky <boris.ostrovsky(a)oracle.com>
Cc: Borislav Petkov <bp(a)alien8.de>
Cc: Borislav Petkov <bpetkov(a)suse.de>
Cc: Brian Gerst <brgerst(a)gmail.com>
Cc: Dave Hansen <dave.hansen(a)intel.com>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: David Laight <David.Laight(a)aculab.com>
Cc: Denys Vlasenko <dvlasenk(a)redhat.com>
Cc: Eduardo Valentin <eduval(a)amazon.com>
Cc: Greg KH <gregkh(a)linuxfoundation.org>
Cc: H. Peter Anvin <hpa(a)zytor.com>
Cc: Josh Poimboeuf <jpoimboe(a)redhat.com>
Cc: Juergen Gross <jgross(a)suse.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Rik van Riel <riel(a)redhat.com>
Cc: Will Deacon <will.deacon(a)arm.com>
Cc: aliguori(a)amazon.com
Cc: daniel.gruss(a)iaik.tugraz.at
Cc: hughd(a)google.com
Cc: keescook(a)google.com
Link: https://lkml.kernel.org/r/20171204150606.225330557@linutronix
.de
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
:040000 040000 275d4746936a9e521a2b5041856f7dc1d1820dc6
8f8e869fd59c3dd781dceffa76e53e41d733a0cf M arch
$ git bisect log
git bisect start
# bad: [dad5c1402c570cd07a80113784bc20a7f930c8ae] Linux 4.14.9
git bisect bad dad5c1402c570cd07a80113784bc20a7f930c8ae
# good: [7b3775017f4e6b87dfd2c7f63d1eaf057948f31d] Linux 4.14.8
git bisect good 7b3775017f4e6b87dfd2c7f63d1eaf057948f31d
# good: [d120cd749ef9770ee98b708a83b49547dcf1c0e1] x86/entry/64:
Separate cpu_current_top_of_stack from TSS.sp0
git bisect good d120cd749ef9770ee98b708a83b49547dcf1c0e1
# bad: [97f41b41c432e5a80c91445d92c2f4b729984d36] powerpc/xmon: Avoid
tripping SMP hardlockup watchdog
git bisect bad 97f41b41c432e5a80c91445d92c2f4b729984d36
# bad: [bfd66a406fe7e590055c1d6714adc697f18664c8] PCI: Avoid bus reset
if bridge itself is broken
git bisect bad bfd66a406fe7e590055c1d6714adc697f18664c8
# bad: [8388d287e361a2fd0a39bece30a736d692d5c3d8] x86/cpufeatures: Make
CPU bugs sticky
git bisect bad 8388d287e361a2fd0a39bece30a736d692d5c3d8
# bad: [bb568391775d4a840992e2d2493f39d6e86401e3] x86/entry/64: Move
the IST stacks into struct cpu_entry_area
git bisect bad bb568391775d4a840992e2d2493f39d6e86401e3
# bad: [2bc9fa0beaf10206a778f02e9e5cb62f50345b1a] x86/entry/64: Use a
per-CPU trampoline stack for IDT entries
git bisect bad 2bc9fa0beaf10206a778f02e9e5cb62f50345b1a
# good: [c3dbef1bd0f7eb09daf49409ea533aa1b0eeb82e] x86/espfix/64: Stop
assuming that pt_regs is on the entry stack
git bisect good c3dbef1bd0f7eb09daf49409ea533aa1b0eeb82e
# first bad commit: [2bc9fa0beaf10206a778f02e9e5cb62f50345b1a]
x86/entry/64: Use a per-CPU trampoline stack for IDT entries
This is a note to let you know that I've just added the patch titled
tracing: Fix possible double free on failure of allocating trace buffer
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
tracing-fix-possible-double-free-on-failure-of-allocating-trace-buffer.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 4397f04575c44e1440ec2e49b6302785c95fd2f8 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt(a)goodmis.org>
Date: Tue, 26 Dec 2017 20:07:34 -0500
Subject: tracing: Fix possible double free on failure of allocating trace buffer
From: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
commit 4397f04575c44e1440ec2e49b6302785c95fd2f8 upstream.
Jing Xia and Chunyan Zhang reported that on failing to allocate part of the
tracing buffer, memory is freed, but the pointers that point to them are not
initialized back to NULL, and later paths may try to free the freed memory
again. Jing and Chunyan fixed one of the locations that does this, but
missed a spot.
Link: http://lkml.kernel.org/r/20171226071253.8968-1-chunyan.zhang@spreadtrum.com
Fixes: 737223fbca3b1 ("tracing: Consolidate buffer allocation code")
Reported-by: Jing Xia <jing.xia(a)spreadtrum.com>
Reported-by: Chunyan Zhang <chunyan.zhang(a)spreadtrum.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
kernel/trace/trace.c | 1 +
1 file changed, 1 insertion(+)
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6955,6 +6955,7 @@ allocate_trace_buffer(struct trace_array
buf->data = alloc_percpu(struct trace_array_cpu);
if (!buf->data) {
ring_buffer_free(buf->buffer);
+ buf->buffer = NULL;
return -ENOMEM;
}
Patches currently in stable-queue which might be from rostedt(a)goodmis.org are
queue-4.9/tracing-fix-crash-when-it-fails-to-alloc-ring-buffer.patch
queue-4.9/tracing-remove-extra-zeroing-out-of-the-ring-buffer-page.patch
queue-4.9/tracing-fix-possible-double-free-on-failure-of-allocating-trace-buffer.patch
This is a note to let you know that I've just added the patch titled
tracing: Remove extra zeroing out of the ring buffer page
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
tracing-remove-extra-zeroing-out-of-the-ring-buffer-page.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 6b7e633fe9c24682df550e5311f47fb524701586 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt(a)goodmis.org>
Date: Fri, 22 Dec 2017 20:38:57 -0500
Subject: tracing: Remove extra zeroing out of the ring buffer page
From: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
commit 6b7e633fe9c24682df550e5311f47fb524701586 upstream.
The ring_buffer_read_page() takes care of zeroing out any extra data in the
page that it returns. There's no need to zero it out again from the
consumer. It was removed from one consumer of this function, but
read_buffers_splice_read() did not remove it, and worse, it contained a
nasty bug because of it.
Fixes: 2711ca237a084 ("ring-buffer: Move zeroing out excess in page to ring buffer code")
Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
kernel/trace/trace.c | 10 +---------
1 file changed, 1 insertion(+), 9 deletions(-)
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6181,7 +6181,7 @@ tracing_buffers_splice_read(struct file
.spd_release = buffer_spd_release,
};
struct buffer_ref *ref;
- int entries, size, i;
+ int entries, i;
ssize_t ret = 0;
#ifdef CONFIG_TRACER_MAX_TRACE
@@ -6232,14 +6232,6 @@ tracing_buffers_splice_read(struct file
break;
}
- /*
- * zero out any left over data, this is going to
- * user land.
- */
- size = ring_buffer_page_len(ref->page);
- if (size < PAGE_SIZE)
- memset(ref->page + size, 0, PAGE_SIZE - size);
-
page = virt_to_page(ref->page);
spd.pages[i] = page;
Patches currently in stable-queue which might be from rostedt(a)goodmis.org are
queue-4.9/tracing-fix-crash-when-it-fails-to-alloc-ring-buffer.patch
queue-4.9/tracing-remove-extra-zeroing-out-of-the-ring-buffer-page.patch
queue-4.9/tracing-fix-possible-double-free-on-failure-of-allocating-trace-buffer.patch
This is a note to let you know that I've just added the patch titled
tracing: Fix crash when it fails to alloc ring buffer
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
tracing-fix-crash-when-it-fails-to-alloc-ring-buffer.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 24f2aaf952ee0b59f31c3a18b8b36c9e3d3c2cf5 Mon Sep 17 00:00:00 2001
From: Jing Xia <jing.xia(a)spreadtrum.com>
Date: Tue, 26 Dec 2017 15:12:53 +0800
Subject: tracing: Fix crash when it fails to alloc ring buffer
From: Jing Xia <jing.xia(a)spreadtrum.com>
commit 24f2aaf952ee0b59f31c3a18b8b36c9e3d3c2cf5 upstream.
Double free of the ring buffer happens when it fails to alloc new
ring buffer instance for max_buffer if TRACER_MAX_TRACE is configured.
The root cause is that the pointer is not set to NULL after the buffer
is freed in allocate_trace_buffers(), and the freeing of the ring
buffer is invoked again later if the pointer is not equal to Null,
as:
instance_mkdir()
|-allocate_trace_buffers()
|-allocate_trace_buffer(tr, &tr->trace_buffer...)
|-allocate_trace_buffer(tr, &tr->max_buffer...)
// allocate fail(-ENOMEM),first free
// and the buffer pointer is not set to null
|-ring_buffer_free(tr->trace_buffer.buffer)
// out_free_tr
|-free_trace_buffers()
|-free_trace_buffer(&tr->trace_buffer);
//if trace_buffer is not null, free again
|-ring_buffer_free(buf->buffer)
|-rb_free_cpu_buffer(buffer->buffers[cpu])
// ring_buffer_per_cpu is null, and
// crash in ring_buffer_per_cpu->pages
Link: http://lkml.kernel.org/r/20171226071253.8968-1-chunyan.zhang@spreadtrum.com
Fixes: 737223fbca3b1 ("tracing: Consolidate buffer allocation code")
Signed-off-by: Jing Xia <jing.xia(a)spreadtrum.com>
Signed-off-by: Chunyan Zhang <chunyan.zhang(a)spreadtrum.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
kernel/trace/trace.c | 2 ++
1 file changed, 2 insertions(+)
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6979,7 +6979,9 @@ static int allocate_trace_buffers(struct
allocate_snapshot ? size : 1);
if (WARN_ON(ret)) {
ring_buffer_free(tr->trace_buffer.buffer);
+ tr->trace_buffer.buffer = NULL;
free_percpu(tr->trace_buffer.data);
+ tr->trace_buffer.data = NULL;
return -ENOMEM;
}
tr->allocated_snapshot = allocate_snapshot;
Patches currently in stable-queue which might be from jing.xia(a)spreadtrum.com are
queue-4.9/tracing-fix-crash-when-it-fails-to-alloc-ring-buffer.patch
queue-4.9/tracing-fix-possible-double-free-on-failure-of-allocating-trace-buffer.patch
This is a note to let you know that I've just added the patch titled
tracing: Fix possible double free on failure of allocating trace buffer
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
tracing-fix-possible-double-free-on-failure-of-allocating-trace-buffer.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 4397f04575c44e1440ec2e49b6302785c95fd2f8 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt(a)goodmis.org>
Date: Tue, 26 Dec 2017 20:07:34 -0500
Subject: tracing: Fix possible double free on failure of allocating trace buffer
From: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
commit 4397f04575c44e1440ec2e49b6302785c95fd2f8 upstream.
Jing Xia and Chunyan Zhang reported that on failing to allocate part of the
tracing buffer, memory is freed, but the pointers that point to them are not
initialized back to NULL, and later paths may try to free the freed memory
again. Jing and Chunyan fixed one of the locations that does this, but
missed a spot.
Link: http://lkml.kernel.org/r/20171226071253.8968-1-chunyan.zhang@spreadtrum.com
Fixes: 737223fbca3b1 ("tracing: Consolidate buffer allocation code")
Reported-by: Jing Xia <jing.xia(a)spreadtrum.com>
Reported-by: Chunyan Zhang <chunyan.zhang(a)spreadtrum.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
kernel/trace/trace.c | 1 +
1 file changed, 1 insertion(+)
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6531,6 +6531,7 @@ allocate_trace_buffer(struct trace_array
buf->data = alloc_percpu(struct trace_array_cpu);
if (!buf->data) {
ring_buffer_free(buf->buffer);
+ buf->buffer = NULL;
return -ENOMEM;
}
Patches currently in stable-queue which might be from rostedt(a)goodmis.org are
queue-4.4/tracing-fix-crash-when-it-fails-to-alloc-ring-buffer.patch
queue-4.4/tracing-remove-extra-zeroing-out-of-the-ring-buffer-page.patch
queue-4.4/tracing-fix-possible-double-free-on-failure-of-allocating-trace-buffer.patch
This is a note to let you know that I've just added the patch titled
tracing: Remove extra zeroing out of the ring buffer page
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
tracing-remove-extra-zeroing-out-of-the-ring-buffer-page.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 6b7e633fe9c24682df550e5311f47fb524701586 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt(a)goodmis.org>
Date: Fri, 22 Dec 2017 20:38:57 -0500
Subject: tracing: Remove extra zeroing out of the ring buffer page
From: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
commit 6b7e633fe9c24682df550e5311f47fb524701586 upstream.
The ring_buffer_read_page() takes care of zeroing out any extra data in the
page that it returns. There's no need to zero it out again from the
consumer. It was removed from one consumer of this function, but
read_buffers_splice_read() did not remove it, and worse, it contained a
nasty bug because of it.
Fixes: 2711ca237a084 ("ring-buffer: Move zeroing out excess in page to ring buffer code")
Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
kernel/trace/trace.c | 10 +---------
1 file changed, 1 insertion(+), 9 deletions(-)
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5754,7 +5754,7 @@ tracing_buffers_splice_read(struct file
.spd_release = buffer_spd_release,
};
struct buffer_ref *ref;
- int entries, size, i;
+ int entries, i;
ssize_t ret = 0;
#ifdef CONFIG_TRACER_MAX_TRACE
@@ -5805,14 +5805,6 @@ tracing_buffers_splice_read(struct file
break;
}
- /*
- * zero out any left over data, this is going to
- * user land.
- */
- size = ring_buffer_page_len(ref->page);
- if (size < PAGE_SIZE)
- memset(ref->page + size, 0, PAGE_SIZE - size);
-
page = virt_to_page(ref->page);
spd.pages[i] = page;
Patches currently in stable-queue which might be from rostedt(a)goodmis.org are
queue-4.4/tracing-fix-crash-when-it-fails-to-alloc-ring-buffer.patch
queue-4.4/tracing-remove-extra-zeroing-out-of-the-ring-buffer-page.patch
queue-4.4/tracing-fix-possible-double-free-on-failure-of-allocating-trace-buffer.patch
This is a note to let you know that I've just added the patch titled
tracing: Fix crash when it fails to alloc ring buffer
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
tracing-fix-crash-when-it-fails-to-alloc-ring-buffer.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 24f2aaf952ee0b59f31c3a18b8b36c9e3d3c2cf5 Mon Sep 17 00:00:00 2001
From: Jing Xia <jing.xia(a)spreadtrum.com>
Date: Tue, 26 Dec 2017 15:12:53 +0800
Subject: tracing: Fix crash when it fails to alloc ring buffer
From: Jing Xia <jing.xia(a)spreadtrum.com>
commit 24f2aaf952ee0b59f31c3a18b8b36c9e3d3c2cf5 upstream.
Double free of the ring buffer happens when it fails to alloc new
ring buffer instance for max_buffer if TRACER_MAX_TRACE is configured.
The root cause is that the pointer is not set to NULL after the buffer
is freed in allocate_trace_buffers(), and the freeing of the ring
buffer is invoked again later if the pointer is not equal to Null,
as:
instance_mkdir()
|-allocate_trace_buffers()
|-allocate_trace_buffer(tr, &tr->trace_buffer...)
|-allocate_trace_buffer(tr, &tr->max_buffer...)
// allocate fail(-ENOMEM),first free
// and the buffer pointer is not set to null
|-ring_buffer_free(tr->trace_buffer.buffer)
// out_free_tr
|-free_trace_buffers()
|-free_trace_buffer(&tr->trace_buffer);
//if trace_buffer is not null, free again
|-ring_buffer_free(buf->buffer)
|-rb_free_cpu_buffer(buffer->buffers[cpu])
// ring_buffer_per_cpu is null, and
// crash in ring_buffer_per_cpu->pages
Link: http://lkml.kernel.org/r/20171226071253.8968-1-chunyan.zhang@spreadtrum.com
Fixes: 737223fbca3b1 ("tracing: Consolidate buffer allocation code")
Signed-off-by: Jing Xia <jing.xia(a)spreadtrum.com>
Signed-off-by: Chunyan Zhang <chunyan.zhang(a)spreadtrum.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
kernel/trace/trace.c | 2 ++
1 file changed, 2 insertions(+)
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6555,7 +6555,9 @@ static int allocate_trace_buffers(struct
allocate_snapshot ? size : 1);
if (WARN_ON(ret)) {
ring_buffer_free(tr->trace_buffer.buffer);
+ tr->trace_buffer.buffer = NULL;
free_percpu(tr->trace_buffer.data);
+ tr->trace_buffer.data = NULL;
return -ENOMEM;
}
tr->allocated_snapshot = allocate_snapshot;
Patches currently in stable-queue which might be from jing.xia(a)spreadtrum.com are
queue-4.4/tracing-fix-crash-when-it-fails-to-alloc-ring-buffer.patch
queue-4.4/tracing-fix-possible-double-free-on-failure-of-allocating-trace-buffer.patch