This is a note to let you know that I've just added the patch titled
kaiser: name that 0x1000 KAISER_SHADOW_PGD_OFFSET
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
kaiser-name-that-0x1000-kaiser_shadow_pgd_offset.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Wed Jan 3 20:37:21 CET 2018
From: Hugh Dickins <hughd(a)google.com>
Date: Sat, 9 Sep 2017 17:31:18 -0700
Subject: kaiser: name that 0x1000 KAISER_SHADOW_PGD_OFFSET
From: Hugh Dickins <hughd(a)google.com>
There's a 0x1000 in various places, which looks better with a name.
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/entry/entry_64.S | 4 ++--
arch/x86/include/asm/kaiser.h | 7 +++++--
2 files changed, 7 insertions(+), 4 deletions(-)
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1318,7 +1318,7 @@ ENTRY(nmi)
movq %cr3, %rax
pushq %rax
#ifdef CONFIG_KAISER_REAL_SWITCH
- andq $(~0x1000), %rax
+ andq $(~KAISER_SHADOW_PGD_OFFSET), %rax
#endif
movq %rax, %cr3
#endif
@@ -1561,7 +1561,7 @@ end_repeat_nmi:
movq %cr3, %rax
pushq %rax
#ifdef CONFIG_KAISER_REAL_SWITCH
- andq $(~0x1000), %rax
+ andq $(~KAISER_SHADOW_PGD_OFFSET), %rax
#endif
movq %rax, %cr3
#endif
--- a/arch/x86/include/asm/kaiser.h
+++ b/arch/x86/include/asm/kaiser.h
@@ -13,13 +13,16 @@
* A minimalistic kernel mapping holds the parts needed to be mapped in user
* mode, such as the entry/exit functions of the user space, or the stacks.
*/
+
+#define KAISER_SHADOW_PGD_OFFSET 0x1000
+
#ifdef __ASSEMBLY__
#ifdef CONFIG_KAISER
.macro _SWITCH_TO_KERNEL_CR3 reg
movq %cr3, \reg
#ifdef CONFIG_KAISER_REAL_SWITCH
-andq $(~0x1000), \reg
+andq $(~KAISER_SHADOW_PGD_OFFSET), \reg
#endif
movq \reg, %cr3
.endm
@@ -27,7 +30,7 @@ movq \reg, %cr3
.macro _SWITCH_TO_USER_CR3 reg
movq %cr3, \reg
#ifdef CONFIG_KAISER_REAL_SWITCH
-orq $(0x1000), \reg
+orq $(KAISER_SHADOW_PGD_OFFSET), \reg
#endif
movq \reg, %cr3
.endm
Patches currently in stable-queue which might be from hughd(a)google.com are
queue-4.9/kaiser-vmstat-show-nr_kaisertable-as-nr_overhead.patch
queue-4.9/kaiser-add-nokaiser-boot-option-using-alternative.patch
queue-4.9/kaiser-fix-unlikely-error-in-alloc_ldt_struct.patch
queue-4.9/kaiser-kaiser_flush_tlb_on_return_to_user-check-pcid.patch
queue-4.9/x86-paravirt-dont-patch-flush_tlb_single.patch
queue-4.9/kaiser-merged-update.patch
queue-4.9/kaiser-delete-kaiser_real_switch-option.patch
queue-4.9/kaiser-kaiser_remove_mapping-move-along-the-pgd.patch
queue-4.9/kaiser-fix-perf-crashes.patch
queue-4.9/kaiser-drop-is_atomic-arg-to-kaiser_pagetable_walk.patch
queue-4.9/kaiser-load_new_mm_cr3-let-switch_user_cr3-flush-user.patch
queue-4.9/kaiser-enhanced-by-kernel-and-user-pcids.patch
queue-4.9/kaiser-x86_cr3_pcid_noflush-and-x86_cr3_pcid_user.patch
queue-4.9/kaiser-align-addition-to-x86-mm-makefile.patch
queue-4.9/kaiser-use-alternative-instead-of-x86_cr3_pcid_noflush.patch
queue-4.9/kaiser-stack-map-page_size-at-thread_size-page_size.patch
queue-4.9/kaiser-name-that-0x1000-kaiser_shadow_pgd_offset.patch
queue-4.9/kaiser-fix-regs-to-do_nmi-ifndef-config_kaiser.patch
queue-4.9/kaiser-do-not-set-_page_nx-on-pgd_none.patch
queue-4.9/kaiser-tidied-up-asm-kaiser.h-somewhat.patch
queue-4.9/kaiser-cleanups-while-trying-for-gold-link.patch
queue-4.9/kaiser-tidied-up-kaiser_add-remove_mapping-slightly.patch
queue-4.9/kaiser-fix-build-and-fixme-in-alloc_ldt_struct.patch
queue-4.9/kaiser-kernel-address-isolation.patch
queue-4.9/kaiser-enomem-if-kaiser_pagetable_walk-null.patch
queue-4.9/kaiser-asm-tlbflush.h-handle-nopge-at-lower-level.patch
queue-4.9/kaiser-paranoid_entry-pass-cr3-need-to-paranoid_exit.patch
queue-4.9/kaiser-kaiser-depends-on-smp.patch
queue-4.9/kaiser-pcid-0-for-kernel-and-128-for-user.patch
This is a note to let you know that I've just added the patch titled
kaiser: load_new_mm_cr3() let SWITCH_USER_CR3 flush user
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
kaiser-load_new_mm_cr3-let-switch_user_cr3-flush-user.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Wed Jan 3 20:37:21 CET 2018
From: Hugh Dickins <hughd(a)google.com>
Date: Thu, 17 Aug 2017 15:00:37 -0700
Subject: kaiser: load_new_mm_cr3() let SWITCH_USER_CR3 flush user
From: Hugh Dickins <hughd(a)google.com>
We have many machines (Westmere, Sandybridge, Ivybridge) supporting
PCID but not INVPCID: on these load_new_mm_cr3() simply crashed.
Flushing user context inside load_new_mm_cr3() without the use of
invpcid is difficult: momentarily switch from kernel to user context
and back to do so? I'm not sure whether that can be safely done at
all, and would risk polluting user context with kernel internals,
and kernel context with stale user externals.
Instead, follow the hint in the comment that was there: change
X86_CR3_PCID_USER_VAR to be a per-cpu variable, then load_new_mm_cr3()
can leave a note in it, for SWITCH_USER_CR3 on return to userspace to
flush user context TLB, instead of default X86_CR3_PCID_USER_NOFLUSH.
Which works well enough that there's no need to do it this way only
when invpcid is unsupported: it's a good alternative to invpcid here.
But there's a couple of inlines in asm/tlbflush.h that need to do the
same trick, so it's best to localize all this per-cpu business in
mm/kaiser.c: moving that part of the initialization from setup_pcid()
to kaiser_setup_pcid(); with kaiser_flush_tlb_on_return_to_user() the
function for noting an X86_CR3_PCID_USER_FLUSH. And let's keep a
KAISER_SHADOW_PGD_OFFSET in there, to avoid the extra OR on exit.
I did try to make the feature tests in asm/tlbflush.h more consistent
with each other: there seem to be far too many ways of performing such
tests, and I don't have a good grasp of their differences. At first
I converted them all to be static_cpu_has(): but that proved to be a
mistake, as the comment in __native_flush_tlb_single() hints; so then
I reversed and made them all this_cpu_has(). Probably all gratuitous
change, but that's the way it's working at present.
I am slightly bothered by the way non-per-cpu X86_CR3_PCID_KERN_VAR
gets re-initialized by each cpu (before and after these changes):
no problem when (as usual) all cpus on a machine have the same
features, but in principle incorrect. However, my experiment
to per-cpu-ify that one did not end well...
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/include/asm/kaiser.h | 18 +++++++-----
arch/x86/include/asm/tlbflush.h | 56 +++++++++++++++++++++++++++-------------
arch/x86/kernel/cpu/common.c | 22 ---------------
arch/x86/mm/kaiser.c | 50 +++++++++++++++++++++++++++++++----
arch/x86/mm/tlb.c | 46 ++++++++++++--------------------
5 files changed, 113 insertions(+), 79 deletions(-)
--- a/arch/x86/include/asm/kaiser.h
+++ b/arch/x86/include/asm/kaiser.h
@@ -32,13 +32,12 @@ movq \reg, %cr3
.macro _SWITCH_TO_USER_CR3 reg
movq %cr3, \reg
andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg
-/*
- * This can obviously be one instruction by putting the
- * KAISER_SHADOW_PGD_OFFSET bit in the X86_CR3_PCID_USER_VAR.
- * But, just leave it now for simplicity.
- */
-orq X86_CR3_PCID_USER_VAR, \reg
-orq $(KAISER_SHADOW_PGD_OFFSET), \reg
+orq PER_CPU_VAR(X86_CR3_PCID_USER_VAR), \reg
+js 9f
+// FLUSH this time, reset to NOFLUSH for next time
+// But if nopcid? Consider using 0x80 for user pcid?
+movb $(0x80), PER_CPU_VAR(X86_CR3_PCID_USER_VAR+7)
+9:
movq \reg, %cr3
.endm
@@ -90,6 +89,11 @@ movq PER_CPU_VAR(unsafe_stack_register_b
*/
DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+extern unsigned long X86_CR3_PCID_KERN_VAR;
+DECLARE_PER_CPU(unsigned long, X86_CR3_PCID_USER_VAR);
+
+extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
+
/**
* kaiser_add_mapping - map a virtual memory part to the shadow (user) mapping
* @addr: the start address of the range
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -13,6 +13,7 @@ static inline void __invpcid(unsigned lo
unsigned long type)
{
struct { u64 d[2]; } desc = { { pcid, addr } };
+
/*
* The memory clobber is because the whole point is to invalidate
* stale TLB entries and, especially if we're flushing global
@@ -131,27 +132,42 @@ static inline void cr4_set_bits_and_upda
cr4_set_bits(mask);
}
+/*
+ * Declare a couple of kaiser interfaces here for convenience,
+ * to avoid the need for asm/kaiser.h in unexpected places.
+ */
+#ifdef CONFIG_KAISER
+extern void kaiser_setup_pcid(void);
+extern void kaiser_flush_tlb_on_return_to_user(void);
+#else
+static inline void kaiser_setup_pcid(void)
+{
+}
+static inline void kaiser_flush_tlb_on_return_to_user(void)
+{
+}
+#endif
+
static inline void __native_flush_tlb(void)
{
- if (!cpu_feature_enabled(X86_FEATURE_INVPCID)) {
+ if (this_cpu_has(X86_FEATURE_INVPCID)) {
/*
- * If current->mm == NULL then we borrow a mm which may change during a
- * task switch and therefore we must not be preempted while we write CR3
- * back:
+ * Note, this works with CR4.PCIDE=0 or 1.
*/
- preempt_disable();
- native_write_cr3(native_read_cr3());
- preempt_enable();
+ invpcid_flush_all_nonglobals();
return;
}
+
/*
- * We are no longer using globals with KAISER, so a
- * "nonglobals" flush would work too. But, this is more
- * conservative.
- *
- * Note, this works with CR4.PCIDE=0 or 1.
+ * If current->mm == NULL then we borrow a mm which may change during a
+ * task switch and therefore we must not be preempted while we write CR3
+ * back:
*/
- invpcid_flush_all();
+ preempt_disable();
+ if (this_cpu_has(X86_FEATURE_PCID))
+ kaiser_flush_tlb_on_return_to_user();
+ native_write_cr3(native_read_cr3());
+ preempt_enable();
}
static inline void __native_flush_tlb_global_irq_disabled(void)
@@ -167,9 +183,13 @@ static inline void __native_flush_tlb_gl
static inline void __native_flush_tlb_global(void)
{
+#ifdef CONFIG_KAISER
+ /* Globals are not used at all */
+ __native_flush_tlb();
+#else
unsigned long flags;
- if (static_cpu_has(X86_FEATURE_INVPCID)) {
+ if (this_cpu_has(X86_FEATURE_INVPCID)) {
/*
* Using INVPCID is considerably faster than a pair of writes
* to CR4 sandwiched inside an IRQ flag save/restore.
@@ -186,10 +206,9 @@ static inline void __native_flush_tlb_gl
* be called from deep inside debugging code.)
*/
raw_local_irq_save(flags);
-
__native_flush_tlb_global_irq_disabled();
-
raw_local_irq_restore(flags);
+#endif
}
static inline void __native_flush_tlb_single(unsigned long addr)
@@ -200,9 +219,12 @@ static inline void __native_flush_tlb_si
*
* The ASIDs used below are hard-coded. But, we must not
* call invpcid(type=1/2) before CR4.PCIDE=1. Just call
- * invpcid in the case we are called early.
+ * invlpg in the case we are called early.
*/
+
if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) {
+ if (this_cpu_has(X86_FEATURE_PCID))
+ kaiser_flush_tlb_on_return_to_user();
asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
return;
}
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -324,33 +324,12 @@ static __always_inline void setup_smap(s
}
}
-/*
- * These can have bit 63 set, so we can not just use a plain "or"
- * instruction to get their value or'd into CR3. It would take
- * another register. So, we use a memory reference to these
- * instead.
- *
- * This is also handy because systems that do not support
- * PCIDs just end up or'ing a 0 into their CR3, which does
- * no harm.
- */
-__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR = 0;
-__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_USER_VAR = 0;
-
static void setup_pcid(struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_PCID)) {
if (cpu_has(c, X86_FEATURE_PGE)) {
cr4_set_bits(X86_CR4_PCIDE);
/*
- * These variables are used by the entry/exit
- * code to change PCIDs.
- */
-#ifdef CONFIG_KAISER
- X86_CR3_PCID_KERN_VAR = X86_CR3_PCID_KERN_NOFLUSH;
- X86_CR3_PCID_USER_VAR = X86_CR3_PCID_USER_NOFLUSH;
-#endif
- /*
* INVPCID has two "groups" of types:
* 1/2: Invalidate an individual address
* 3/4: Invalidate all contexts
@@ -375,6 +354,7 @@ static void setup_pcid(struct cpuinfo_x8
clear_cpu_cap(c, X86_FEATURE_PCID);
}
}
+ kaiser_setup_pcid();
}
/*
--- a/arch/x86/mm/kaiser.c
+++ b/arch/x86/mm/kaiser.c
@@ -11,12 +11,26 @@
#include <linux/uaccess.h>
#include <asm/kaiser.h>
+#include <asm/tlbflush.h> /* to verify its kaiser declarations */
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/desc.h>
+
#ifdef CONFIG_KAISER
+__visible
+DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+
+/*
+ * These can have bit 63 set, so we can not just use a plain "or"
+ * instruction to get their value or'd into CR3. It would take
+ * another register. So, we use a memory reference to these instead.
+ *
+ * This is also handy because systems that do not support PCIDs
+ * just end up or'ing a 0 into their CR3, which does no harm.
+ */
+__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR;
+DEFINE_PER_CPU(unsigned long, X86_CR3_PCID_USER_VAR);
-__visible DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
/*
* At runtime, the only things we map are some things for CPU
* hotplug, and stacks for new processes. No two CPUs will ever
@@ -238,9 +252,6 @@ static void __init kaiser_init_all_pgds(
WARN_ON(__ret); \
} while (0)
-extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
-extern unsigned long X86_CR3_PCID_KERN_VAR;
-extern unsigned long X86_CR3_PCID_USER_VAR;
/*
* If anything in here fails, we will likely die on one of the
* first kernel->user transitions and init will die. But, we
@@ -294,8 +305,6 @@ void __init kaiser_init(void)
kaiser_add_user_map_early(&X86_CR3_PCID_KERN_VAR, PAGE_SIZE,
__PAGE_KERNEL);
- kaiser_add_user_map_early(&X86_CR3_PCID_USER_VAR, PAGE_SIZE,
- __PAGE_KERNEL);
}
/* Add a mapping to the shadow mapping, and synchronize the mappings */
@@ -358,4 +367,33 @@ pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp,
}
return pgd;
}
+
+void kaiser_setup_pcid(void)
+{
+ unsigned long kern_cr3 = 0;
+ unsigned long user_cr3 = KAISER_SHADOW_PGD_OFFSET;
+
+ if (this_cpu_has(X86_FEATURE_PCID)) {
+ kern_cr3 |= X86_CR3_PCID_KERN_NOFLUSH;
+ user_cr3 |= X86_CR3_PCID_USER_NOFLUSH;
+ }
+ /*
+ * These variables are used by the entry/exit
+ * code to change PCID and pgd and TLB flushing.
+ */
+ X86_CR3_PCID_KERN_VAR = kern_cr3;
+ this_cpu_write(X86_CR3_PCID_USER_VAR, user_cr3);
+}
+
+/*
+ * Make a note that this cpu will need to flush USER tlb on return to user.
+ * Caller checks whether this_cpu_has(X86_FEATURE_PCID) before calling:
+ * if cpu does not, then the NOFLUSH bit will never have been set.
+ */
+void kaiser_flush_tlb_on_return_to_user(void)
+{
+ this_cpu_write(X86_CR3_PCID_USER_VAR,
+ X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET);
+}
+EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user);
#endif /* CONFIG_KAISER */
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -6,13 +6,14 @@
#include <linux/interrupt.h>
#include <linux/export.h>
#include <linux/cpu.h>
+#include <linux/debugfs.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/cache.h>
#include <asm/apic.h>
#include <asm/uv/uv.h>
-#include <linux/debugfs.h>
+#include <asm/kaiser.h>
/*
* TLB flushing, formerly SMP-only
@@ -38,34 +39,23 @@ static void load_new_mm_cr3(pgd_t *pgdir
{
unsigned long new_mm_cr3 = __pa(pgdir);
- /*
- * KAISER, plus PCIDs needs some extra work here. But,
- * if either of features is not present, we need no
- * PCIDs here and just do a normal, full TLB flush with
- * the write_cr3()
- */
- if (!IS_ENABLED(CONFIG_KAISER) ||
- !cpu_feature_enabled(X86_FEATURE_PCID))
- goto out_set_cr3;
- /*
- * We reuse the same PCID for different tasks, so we must
- * flush all the entires for the PCID out when we change
- * tasks.
- */
- new_mm_cr3 = X86_CR3_PCID_KERN_FLUSH | __pa(pgdir);
-
- /*
- * The flush from load_cr3() may leave old TLB entries
- * for userspace in place. We must flush that context
- * separately. We can theoretically delay doing this
- * until we actually load up the userspace CR3, but
- * that's a bit tricky. We have to have the "need to
- * flush userspace PCID" bit per-cpu and check it in the
- * exit-to-userspace paths.
- */
- invpcid_flush_single_context(X86_CR3_PCID_ASID_USER);
+#ifdef CONFIG_KAISER
+ if (this_cpu_has(X86_FEATURE_PCID)) {
+ /*
+ * We reuse the same PCID for different tasks, so we must
+ * flush all the entries for the PCID out when we change tasks.
+ * Flush KERN below, flush USER when returning to userspace in
+ * kaiser's SWITCH_USER_CR3 (_SWITCH_TO_USER_CR3) macro.
+ *
+ * invpcid_flush_single_context(X86_CR3_PCID_ASID_USER) could
+ * do it here, but can only be used if X86_FEATURE_INVPCID is
+ * available - and many machines support pcid without invpcid.
+ */
+ new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH;
+ kaiser_flush_tlb_on_return_to_user();
+ }
+#endif /* CONFIG_KAISER */
-out_set_cr3:
/*
* Caution: many callers of this function expect
* that load_cr3() is serializing and orders TLB
Patches currently in stable-queue which might be from hughd(a)google.com are
queue-4.9/kaiser-vmstat-show-nr_kaisertable-as-nr_overhead.patch
queue-4.9/kaiser-add-nokaiser-boot-option-using-alternative.patch
queue-4.9/kaiser-fix-unlikely-error-in-alloc_ldt_struct.patch
queue-4.9/kaiser-kaiser_flush_tlb_on_return_to_user-check-pcid.patch
queue-4.9/x86-paravirt-dont-patch-flush_tlb_single.patch
queue-4.9/kaiser-merged-update.patch
queue-4.9/kaiser-delete-kaiser_real_switch-option.patch
queue-4.9/kaiser-kaiser_remove_mapping-move-along-the-pgd.patch
queue-4.9/kaiser-fix-perf-crashes.patch
queue-4.9/kaiser-drop-is_atomic-arg-to-kaiser_pagetable_walk.patch
queue-4.9/kaiser-load_new_mm_cr3-let-switch_user_cr3-flush-user.patch
queue-4.9/kaiser-enhanced-by-kernel-and-user-pcids.patch
queue-4.9/kaiser-x86_cr3_pcid_noflush-and-x86_cr3_pcid_user.patch
queue-4.9/kaiser-align-addition-to-x86-mm-makefile.patch
queue-4.9/kaiser-use-alternative-instead-of-x86_cr3_pcid_noflush.patch
queue-4.9/kaiser-stack-map-page_size-at-thread_size-page_size.patch
queue-4.9/kaiser-name-that-0x1000-kaiser_shadow_pgd_offset.patch
queue-4.9/kaiser-fix-regs-to-do_nmi-ifndef-config_kaiser.patch
queue-4.9/kaiser-do-not-set-_page_nx-on-pgd_none.patch
queue-4.9/kaiser-tidied-up-asm-kaiser.h-somewhat.patch
queue-4.9/kaiser-cleanups-while-trying-for-gold-link.patch
queue-4.9/kaiser-tidied-up-kaiser_add-remove_mapping-slightly.patch
queue-4.9/kaiser-fix-build-and-fixme-in-alloc_ldt_struct.patch
queue-4.9/kaiser-kernel-address-isolation.patch
queue-4.9/kaiser-enomem-if-kaiser_pagetable_walk-null.patch
queue-4.9/kaiser-asm-tlbflush.h-handle-nopge-at-lower-level.patch
queue-4.9/kaiser-paranoid_entry-pass-cr3-need-to-paranoid_exit.patch
queue-4.9/kaiser-kaiser-depends-on-smp.patch
queue-4.9/kaiser-pcid-0-for-kernel-and-128-for-user.patch
This is a note to let you know that I've just added the patch titled
kaiser: kaiser_flush_tlb_on_return_to_user() check PCID
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
kaiser-kaiser_flush_tlb_on_return_to_user-check-pcid.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Wed Jan 3 20:37:21 CET 2018
From: Hugh Dickins <hughd(a)google.com>
Date: Sat, 4 Nov 2017 18:43:06 -0700
Subject: kaiser: kaiser_flush_tlb_on_return_to_user() check PCID
From: Hugh Dickins <hughd(a)google.com>
Let kaiser_flush_tlb_on_return_to_user() do the X86_FEATURE_PCID
check, instead of each caller doing it inline first: nobody needs
to optimize for the noPCID case, it's clearer this way, and better
suits later changes. Replace those no-op X86_CR3_PCID_KERN_FLUSH lines
by a BUILD_BUG_ON() in load_new_mm_cr3(), in case something changes.
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Acked-by: Jiri Kosina <jkosina(a)suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/include/asm/tlbflush.h | 4 ++--
arch/x86/mm/kaiser.c | 6 +++---
arch/x86/mm/tlb.c | 8 ++++----
3 files changed, 9 insertions(+), 9 deletions(-)
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -158,7 +158,7 @@ static inline void __native_flush_tlb(vo
* back:
*/
preempt_disable();
- if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID))
+ if (kaiser_enabled)
kaiser_flush_tlb_on_return_to_user();
native_write_cr3(native_read_cr3());
preempt_enable();
@@ -217,7 +217,7 @@ static inline void __native_flush_tlb_si
*/
if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) {
- if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID))
+ if (kaiser_enabled)
kaiser_flush_tlb_on_return_to_user();
asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
return;
--- a/arch/x86/mm/kaiser.c
+++ b/arch/x86/mm/kaiser.c
@@ -435,12 +435,12 @@ void kaiser_setup_pcid(void)
/*
* Make a note that this cpu will need to flush USER tlb on return to user.
- * Caller checks whether this_cpu_has(X86_FEATURE_PCID) before calling:
- * if cpu does not, then the NOFLUSH bit will never have been set.
+ * If cpu does not have PCID, then the NOFLUSH bit will never have been set.
*/
void kaiser_flush_tlb_on_return_to_user(void)
{
- this_cpu_write(x86_cr3_pcid_user,
+ if (this_cpu_has(X86_FEATURE_PCID))
+ this_cpu_write(x86_cr3_pcid_user,
X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET);
}
EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user);
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -39,7 +39,7 @@ static void load_new_mm_cr3(pgd_t *pgdir
{
unsigned long new_mm_cr3 = __pa(pgdir);
- if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) {
+ if (kaiser_enabled) {
/*
* We reuse the same PCID for different tasks, so we must
* flush all the entries for the PCID out when we change tasks.
@@ -50,10 +50,10 @@ static void load_new_mm_cr3(pgd_t *pgdir
* do it here, but can only be used if X86_FEATURE_INVPCID is
* available - and many machines support pcid without invpcid.
*
- * The line below is a no-op: X86_CR3_PCID_KERN_FLUSH is now 0;
- * but keep that line in there in case something changes.
+ * If X86_CR3_PCID_KERN_FLUSH actually added something, then it
+ * would be needed in the write_cr3() below - if PCIDs enabled.
*/
- new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH;
+ BUILD_BUG_ON(X86_CR3_PCID_KERN_FLUSH);
kaiser_flush_tlb_on_return_to_user();
}
Patches currently in stable-queue which might be from hughd(a)google.com are
queue-4.9/kaiser-vmstat-show-nr_kaisertable-as-nr_overhead.patch
queue-4.9/kaiser-add-nokaiser-boot-option-using-alternative.patch
queue-4.9/kaiser-fix-unlikely-error-in-alloc_ldt_struct.patch
queue-4.9/kaiser-kaiser_flush_tlb_on_return_to_user-check-pcid.patch
queue-4.9/x86-paravirt-dont-patch-flush_tlb_single.patch
queue-4.9/kaiser-merged-update.patch
queue-4.9/kaiser-delete-kaiser_real_switch-option.patch
queue-4.9/kaiser-kaiser_remove_mapping-move-along-the-pgd.patch
queue-4.9/kaiser-fix-perf-crashes.patch
queue-4.9/kaiser-drop-is_atomic-arg-to-kaiser_pagetable_walk.patch
queue-4.9/kaiser-load_new_mm_cr3-let-switch_user_cr3-flush-user.patch
queue-4.9/kaiser-enhanced-by-kernel-and-user-pcids.patch
queue-4.9/kaiser-x86_cr3_pcid_noflush-and-x86_cr3_pcid_user.patch
queue-4.9/kaiser-align-addition-to-x86-mm-makefile.patch
queue-4.9/kaiser-use-alternative-instead-of-x86_cr3_pcid_noflush.patch
queue-4.9/kaiser-stack-map-page_size-at-thread_size-page_size.patch
queue-4.9/kaiser-name-that-0x1000-kaiser_shadow_pgd_offset.patch
queue-4.9/kaiser-fix-regs-to-do_nmi-ifndef-config_kaiser.patch
queue-4.9/kaiser-do-not-set-_page_nx-on-pgd_none.patch
queue-4.9/kaiser-tidied-up-asm-kaiser.h-somewhat.patch
queue-4.9/kaiser-cleanups-while-trying-for-gold-link.patch
queue-4.9/kaiser-tidied-up-kaiser_add-remove_mapping-slightly.patch
queue-4.9/kaiser-fix-build-and-fixme-in-alloc_ldt_struct.patch
queue-4.9/kaiser-kernel-address-isolation.patch
queue-4.9/kaiser-enomem-if-kaiser_pagetable_walk-null.patch
queue-4.9/kaiser-asm-tlbflush.h-handle-nopge-at-lower-level.patch
queue-4.9/kaiser-paranoid_entry-pass-cr3-need-to-paranoid_exit.patch
queue-4.9/kaiser-kaiser-depends-on-smp.patch
queue-4.9/kaiser-pcid-0-for-kernel-and-128-for-user.patch
This is a note to let you know that I've just added the patch titled
kaiser: kaiser_remove_mapping() move along the pgd
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
kaiser-kaiser_remove_mapping-move-along-the-pgd.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Wed Jan 3 20:37:21 CET 2018
From: Hugh Dickins <hughd(a)google.com>
Date: Mon, 2 Oct 2017 10:57:24 -0700
Subject: kaiser: kaiser_remove_mapping() move along the pgd
From: Hugh Dickins <hughd(a)google.com>
When removing the bogus comment from kaiser_remove_mapping(),
I really ought to have checked the extent of its bogosity: as
Neel points out, there is nothing to stop unmap_pud_range_nofree()
from continuing beyond the end of a pud (and starting in the wrong
position on the next).
Fix kaiser_remove_mapping() to constrain the extent and advance pgd
pointer correctly: use pgd_addr_end() macro as used throughout base
mm (but don't assume page-rounded start and size in this case).
But this bug was very unlikely to trigger in this backport: since
any buddy allocation is contained within a single pud extent, and
we are not using vmapped stacks (and are only mapping one page of
stack anyway): the only way to hit this bug here would be when
freeing a large modified ldt.
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/mm/kaiser.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
--- a/arch/x86/mm/kaiser.c
+++ b/arch/x86/mm/kaiser.c
@@ -319,11 +319,13 @@ void kaiser_remove_mapping(unsigned long
extern void unmap_pud_range_nofree(pgd_t *pgd,
unsigned long start, unsigned long end);
unsigned long end = start + size;
- unsigned long addr;
+ unsigned long addr, next;
+ pgd_t *pgd;
- for (addr = start; addr < end; addr += PGDIR_SIZE) {
- pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(addr));
- unmap_pud_range_nofree(pgd, addr, end);
+ pgd = native_get_shadow_pgd(pgd_offset_k(start));
+ for (addr = start; addr < end; pgd++, addr = next) {
+ next = pgd_addr_end(addr, end);
+ unmap_pud_range_nofree(pgd, addr, next);
}
}
Patches currently in stable-queue which might be from hughd(a)google.com are
queue-4.9/kaiser-vmstat-show-nr_kaisertable-as-nr_overhead.patch
queue-4.9/kaiser-add-nokaiser-boot-option-using-alternative.patch
queue-4.9/kaiser-fix-unlikely-error-in-alloc_ldt_struct.patch
queue-4.9/kaiser-kaiser_flush_tlb_on_return_to_user-check-pcid.patch
queue-4.9/x86-paravirt-dont-patch-flush_tlb_single.patch
queue-4.9/kaiser-merged-update.patch
queue-4.9/kaiser-delete-kaiser_real_switch-option.patch
queue-4.9/kaiser-kaiser_remove_mapping-move-along-the-pgd.patch
queue-4.9/kaiser-fix-perf-crashes.patch
queue-4.9/kaiser-drop-is_atomic-arg-to-kaiser_pagetable_walk.patch
queue-4.9/kaiser-load_new_mm_cr3-let-switch_user_cr3-flush-user.patch
queue-4.9/kaiser-enhanced-by-kernel-and-user-pcids.patch
queue-4.9/kaiser-x86_cr3_pcid_noflush-and-x86_cr3_pcid_user.patch
queue-4.9/kaiser-align-addition-to-x86-mm-makefile.patch
queue-4.9/kaiser-use-alternative-instead-of-x86_cr3_pcid_noflush.patch
queue-4.9/kaiser-stack-map-page_size-at-thread_size-page_size.patch
queue-4.9/kaiser-name-that-0x1000-kaiser_shadow_pgd_offset.patch
queue-4.9/kaiser-fix-regs-to-do_nmi-ifndef-config_kaiser.patch
queue-4.9/kaiser-do-not-set-_page_nx-on-pgd_none.patch
queue-4.9/kaiser-tidied-up-asm-kaiser.h-somewhat.patch
queue-4.9/kaiser-cleanups-while-trying-for-gold-link.patch
queue-4.9/kaiser-tidied-up-kaiser_add-remove_mapping-slightly.patch
queue-4.9/kaiser-fix-build-and-fixme-in-alloc_ldt_struct.patch
queue-4.9/kaiser-kernel-address-isolation.patch
queue-4.9/kaiser-enomem-if-kaiser_pagetable_walk-null.patch
queue-4.9/kaiser-asm-tlbflush.h-handle-nopge-at-lower-level.patch
queue-4.9/kaiser-paranoid_entry-pass-cr3-need-to-paranoid_exit.patch
queue-4.9/kaiser-kaiser-depends-on-smp.patch
queue-4.9/kaiser-pcid-0-for-kernel-and-128-for-user.patch
This is a note to let you know that I've just added the patch titled
kaiser: fix unlikely error in alloc_ldt_struct()
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
kaiser-fix-unlikely-error-in-alloc_ldt_struct.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Wed Jan 3 20:37:21 CET 2018
From: Hugh Dickins <hughd(a)google.com>
Date: Mon, 4 Dec 2017 20:13:35 -0800
Subject: kaiser: fix unlikely error in alloc_ldt_struct()
From: Hugh Dickins <hughd(a)google.com>
An error from kaiser_add_mapping() here is not at all likely, but
Eric Biggers rightly points out that __free_ldt_struct() relies on
new_ldt->size being initialized: move that up.
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/kernel/ldt.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -79,11 +79,11 @@ static struct ldt_struct *alloc_ldt_stru
ret = kaiser_add_mapping((unsigned long)new_ldt->entries, alloc_size,
__PAGE_KERNEL);
+ new_ldt->size = size;
if (ret) {
__free_ldt_struct(new_ldt);
return NULL;
}
- new_ldt->size = size;
return new_ldt;
}
Patches currently in stable-queue which might be from hughd(a)google.com are
queue-4.9/kaiser-vmstat-show-nr_kaisertable-as-nr_overhead.patch
queue-4.9/kaiser-add-nokaiser-boot-option-using-alternative.patch
queue-4.9/kaiser-fix-unlikely-error-in-alloc_ldt_struct.patch
queue-4.9/kaiser-kaiser_flush_tlb_on_return_to_user-check-pcid.patch
queue-4.9/x86-paravirt-dont-patch-flush_tlb_single.patch
queue-4.9/kaiser-merged-update.patch
queue-4.9/kaiser-delete-kaiser_real_switch-option.patch
queue-4.9/kaiser-kaiser_remove_mapping-move-along-the-pgd.patch
queue-4.9/kaiser-fix-perf-crashes.patch
queue-4.9/kaiser-drop-is_atomic-arg-to-kaiser_pagetable_walk.patch
queue-4.9/kaiser-load_new_mm_cr3-let-switch_user_cr3-flush-user.patch
queue-4.9/kaiser-enhanced-by-kernel-and-user-pcids.patch
queue-4.9/kaiser-x86_cr3_pcid_noflush-and-x86_cr3_pcid_user.patch
queue-4.9/kaiser-align-addition-to-x86-mm-makefile.patch
queue-4.9/kaiser-use-alternative-instead-of-x86_cr3_pcid_noflush.patch
queue-4.9/kaiser-stack-map-page_size-at-thread_size-page_size.patch
queue-4.9/kaiser-name-that-0x1000-kaiser_shadow_pgd_offset.patch
queue-4.9/kaiser-fix-regs-to-do_nmi-ifndef-config_kaiser.patch
queue-4.9/kaiser-do-not-set-_page_nx-on-pgd_none.patch
queue-4.9/kaiser-tidied-up-asm-kaiser.h-somewhat.patch
queue-4.9/kaiser-cleanups-while-trying-for-gold-link.patch
queue-4.9/kaiser-tidied-up-kaiser_add-remove_mapping-slightly.patch
queue-4.9/kaiser-fix-build-and-fixme-in-alloc_ldt_struct.patch
queue-4.9/kaiser-kernel-address-isolation.patch
queue-4.9/kaiser-enomem-if-kaiser_pagetable_walk-null.patch
queue-4.9/kaiser-asm-tlbflush.h-handle-nopge-at-lower-level.patch
queue-4.9/kaiser-paranoid_entry-pass-cr3-need-to-paranoid_exit.patch
queue-4.9/kaiser-kaiser-depends-on-smp.patch
queue-4.9/kaiser-pcid-0-for-kernel-and-128-for-user.patch
This is a note to let you know that I've just added the patch titled
kaiser: fix regs to do_nmi() ifndef CONFIG_KAISER
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
kaiser-fix-regs-to-do_nmi-ifndef-config_kaiser.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Wed Jan 3 20:37:21 CET 2018
From: Hugh Dickins <hughd(a)google.com>
Date: Thu, 21 Sep 2017 20:39:56 -0700
Subject: kaiser: fix regs to do_nmi() ifndef CONFIG_KAISER
From: Hugh Dickins <hughd(a)google.com>
pjt has observed that nmi's second (nmi_from_kernel) call to do_nmi()
adjusted the %rdi regs arg, rightly when CONFIG_KAISER, but wrongly
when not CONFIG_KAISER.
Although the minimal change is to add an #ifdef CONFIG_KAISER around
the addq line, that looks cluttered, and I prefer how the first call
to do_nmi() handled it: prepare args in %rdi and %rsi before getting
into the CONFIG_KAISER block, since it does not touch them at all.
And while we're here, place the "#ifdef CONFIG_KAISER" that follows
each, to enclose the "Unconditionally restore CR3" comment: matching
how the "Unconditionally use kernel CR3" comment above is enclosed.
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/entry/entry_64.S | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1323,12 +1323,13 @@ ENTRY(nmi)
movq %rax, %cr3
#endif
call do_nmi
+
+#ifdef CONFIG_KAISER
/*
* Unconditionally restore CR3. I know we return to
* kernel code that needs user CR3, but do we ever return
* to "user mode" where we need the kernel CR3?
*/
-#ifdef CONFIG_KAISER
popq %rax
mov %rax, %cr3
#endif
@@ -1552,6 +1553,8 @@ end_repeat_nmi:
SWAPGS
xorl %ebx, %ebx
1:
+ movq %rsp, %rdi
+ movq $-1, %rsi
#ifdef CONFIG_KAISER
/* Unconditionally use kernel CR3 for do_nmi() */
/* %rax is saved above, so OK to clobber here */
@@ -1564,16 +1567,14 @@ end_repeat_nmi:
#endif
/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
- movq %rsp, %rdi
- addq $8, %rdi /* point %rdi at ptregs, fixed up for CR3 */
- movq $-1, %rsi
call do_nmi
+
+#ifdef CONFIG_KAISER
/*
* Unconditionally restore CR3. We might be returning to
* kernel code that needs user CR3, like just just before
* a sysret.
*/
-#ifdef CONFIG_KAISER
popq %rax
mov %rax, %cr3
#endif
Patches currently in stable-queue which might be from hughd(a)google.com are
queue-4.9/kaiser-vmstat-show-nr_kaisertable-as-nr_overhead.patch
queue-4.9/kaiser-add-nokaiser-boot-option-using-alternative.patch
queue-4.9/kaiser-fix-unlikely-error-in-alloc_ldt_struct.patch
queue-4.9/kaiser-kaiser_flush_tlb_on_return_to_user-check-pcid.patch
queue-4.9/x86-paravirt-dont-patch-flush_tlb_single.patch
queue-4.9/kaiser-merged-update.patch
queue-4.9/kaiser-delete-kaiser_real_switch-option.patch
queue-4.9/kaiser-kaiser_remove_mapping-move-along-the-pgd.patch
queue-4.9/kaiser-fix-perf-crashes.patch
queue-4.9/kaiser-drop-is_atomic-arg-to-kaiser_pagetable_walk.patch
queue-4.9/kaiser-load_new_mm_cr3-let-switch_user_cr3-flush-user.patch
queue-4.9/kaiser-enhanced-by-kernel-and-user-pcids.patch
queue-4.9/kaiser-x86_cr3_pcid_noflush-and-x86_cr3_pcid_user.patch
queue-4.9/kaiser-align-addition-to-x86-mm-makefile.patch
queue-4.9/kaiser-use-alternative-instead-of-x86_cr3_pcid_noflush.patch
queue-4.9/kaiser-stack-map-page_size-at-thread_size-page_size.patch
queue-4.9/kaiser-name-that-0x1000-kaiser_shadow_pgd_offset.patch
queue-4.9/kaiser-fix-regs-to-do_nmi-ifndef-config_kaiser.patch
queue-4.9/kaiser-do-not-set-_page_nx-on-pgd_none.patch
queue-4.9/kaiser-tidied-up-asm-kaiser.h-somewhat.patch
queue-4.9/kaiser-cleanups-while-trying-for-gold-link.patch
queue-4.9/kaiser-tidied-up-kaiser_add-remove_mapping-slightly.patch
queue-4.9/kaiser-fix-build-and-fixme-in-alloc_ldt_struct.patch
queue-4.9/kaiser-kernel-address-isolation.patch
queue-4.9/kaiser-enomem-if-kaiser_pagetable_walk-null.patch
queue-4.9/kaiser-asm-tlbflush.h-handle-nopge-at-lower-level.patch
queue-4.9/kaiser-paranoid_entry-pass-cr3-need-to-paranoid_exit.patch
queue-4.9/kaiser-kaiser-depends-on-smp.patch
queue-4.9/kaiser-pcid-0-for-kernel-and-128-for-user.patch
This is a note to let you know that I've just added the patch titled
kaiser: KAISER depends on SMP
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
kaiser-kaiser-depends-on-smp.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Wed Jan 3 20:37:21 CET 2018
From: Hugh Dickins <hughd(a)google.com>
Date: Wed, 13 Sep 2017 14:03:10 -0700
Subject: kaiser: KAISER depends on SMP
From: Hugh Dickins <hughd(a)google.com>
It is absurd that KAISER should depend on SMP, but apparently nobody
has tried a UP build before: which breaks on implicit declaration of
function 'per_cpu_offset' in arch/x86/mm/kaiser.c.
Now, you would expect that to be trivially fixed up; but looking at
the System.map when that block is #ifdef'ed out of kaiser_init(),
I see that in a UP build __per_cpu_user_mapped_end is precisely at
__per_cpu_user_mapped_start, and the items carefully gathered into
that section for user-mapping on SMP, dispersed elsewhere on UP.
So, some other kind of section assignment will be needed on UP,
but implementing that is not a priority: just make KAISER depend
on SMP for now.
Also inserted a blank line before the option, tidied up the
brief Kconfig help message, and added an "If unsure, Y".
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
security/Kconfig | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -30,14 +30,16 @@ config SECURITY
model will be used.
If you are unsure how to answer this question, answer N.
+
config KAISER
bool "Remove the kernel mapping in user mode"
default y
- depends on X86_64
- depends on !PARAVIRT
+ depends on X86_64 && SMP && !PARAVIRT
help
- This enforces a strict kernel and user space isolation in order to close
- hardware side channels on kernel address information.
+ This enforces a strict kernel and user space isolation, in order
+ to close hardware side channels on kernel address information.
+
+ If you are unsure how to answer this question, answer Y.
config KAISER_REAL_SWITCH
bool "KAISER: actually switch page tables"
Patches currently in stable-queue which might be from hughd(a)google.com are
queue-4.9/kaiser-vmstat-show-nr_kaisertable-as-nr_overhead.patch
queue-4.9/kaiser-add-nokaiser-boot-option-using-alternative.patch
queue-4.9/kaiser-fix-unlikely-error-in-alloc_ldt_struct.patch
queue-4.9/kaiser-kaiser_flush_tlb_on_return_to_user-check-pcid.patch
queue-4.9/x86-paravirt-dont-patch-flush_tlb_single.patch
queue-4.9/kaiser-merged-update.patch
queue-4.9/kaiser-delete-kaiser_real_switch-option.patch
queue-4.9/kaiser-kaiser_remove_mapping-move-along-the-pgd.patch
queue-4.9/kaiser-fix-perf-crashes.patch
queue-4.9/kaiser-drop-is_atomic-arg-to-kaiser_pagetable_walk.patch
queue-4.9/kaiser-load_new_mm_cr3-let-switch_user_cr3-flush-user.patch
queue-4.9/kaiser-enhanced-by-kernel-and-user-pcids.patch
queue-4.9/kaiser-x86_cr3_pcid_noflush-and-x86_cr3_pcid_user.patch
queue-4.9/kaiser-align-addition-to-x86-mm-makefile.patch
queue-4.9/kaiser-use-alternative-instead-of-x86_cr3_pcid_noflush.patch
queue-4.9/kaiser-stack-map-page_size-at-thread_size-page_size.patch
queue-4.9/kaiser-name-that-0x1000-kaiser_shadow_pgd_offset.patch
queue-4.9/kaiser-fix-regs-to-do_nmi-ifndef-config_kaiser.patch
queue-4.9/kaiser-do-not-set-_page_nx-on-pgd_none.patch
queue-4.9/kaiser-tidied-up-asm-kaiser.h-somewhat.patch
queue-4.9/kaiser-cleanups-while-trying-for-gold-link.patch
queue-4.9/kaiser-tidied-up-kaiser_add-remove_mapping-slightly.patch
queue-4.9/kaiser-fix-build-and-fixme-in-alloc_ldt_struct.patch
queue-4.9/kaiser-kernel-address-isolation.patch
queue-4.9/kaiser-enomem-if-kaiser_pagetable_walk-null.patch
queue-4.9/kaiser-asm-tlbflush.h-handle-nopge-at-lower-level.patch
queue-4.9/kaiser-paranoid_entry-pass-cr3-need-to-paranoid_exit.patch
queue-4.9/kaiser-kaiser-depends-on-smp.patch
queue-4.9/kaiser-pcid-0-for-kernel-and-128-for-user.patch
This is a note to let you know that I've just added the patch titled
kaiser: ENOMEM if kaiser_pagetable_walk() NULL
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
kaiser-enomem-if-kaiser_pagetable_walk-null.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Wed Jan 3 20:37:21 CET 2018
From: Hugh Dickins <hughd(a)google.com>
Date: Sun, 3 Sep 2017 18:48:02 -0700
Subject: kaiser: ENOMEM if kaiser_pagetable_walk() NULL
From: Hugh Dickins <hughd(a)google.com>
kaiser_add_user_map() took no notice when kaiser_pagetable_walk() failed.
And avoid its might_sleep() when atomic (though atomic at present unused).
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/mm/kaiser.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
--- a/arch/x86/mm/kaiser.c
+++ b/arch/x86/mm/kaiser.c
@@ -98,11 +98,11 @@ static pte_t *kaiser_pagetable_walk(unsi
pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(address));
gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
- might_sleep();
if (is_atomic) {
gfp &= ~GFP_KERNEL;
gfp |= __GFP_HIGH | __GFP_ATOMIC;
- }
+ } else
+ might_sleep();
if (pgd_none(*pgd)) {
WARN_ONCE(1, "All shadow pgds should have been populated");
@@ -159,13 +159,17 @@ int kaiser_add_user_map(const void *__st
unsigned long end_addr = PAGE_ALIGN(start_addr + size);
unsigned long target_address;
- for (;address < end_addr; address += PAGE_SIZE) {
+ for (; address < end_addr; address += PAGE_SIZE) {
target_address = get_pa_from_mapping(address);
if (target_address == -1) {
ret = -EIO;
break;
}
pte = kaiser_pagetable_walk(address, false);
+ if (!pte) {
+ ret = -ENOMEM;
+ break;
+ }
if (pte_none(*pte)) {
set_pte(pte, __pte(flags | target_address));
} else {
Patches currently in stable-queue which might be from hughd(a)google.com are
queue-4.9/kaiser-vmstat-show-nr_kaisertable-as-nr_overhead.patch
queue-4.9/kaiser-add-nokaiser-boot-option-using-alternative.patch
queue-4.9/kaiser-fix-unlikely-error-in-alloc_ldt_struct.patch
queue-4.9/kaiser-kaiser_flush_tlb_on_return_to_user-check-pcid.patch
queue-4.9/x86-paravirt-dont-patch-flush_tlb_single.patch
queue-4.9/kaiser-merged-update.patch
queue-4.9/kaiser-delete-kaiser_real_switch-option.patch
queue-4.9/kaiser-kaiser_remove_mapping-move-along-the-pgd.patch
queue-4.9/kaiser-fix-perf-crashes.patch
queue-4.9/kaiser-drop-is_atomic-arg-to-kaiser_pagetable_walk.patch
queue-4.9/kaiser-load_new_mm_cr3-let-switch_user_cr3-flush-user.patch
queue-4.9/kaiser-enhanced-by-kernel-and-user-pcids.patch
queue-4.9/kaiser-x86_cr3_pcid_noflush-and-x86_cr3_pcid_user.patch
queue-4.9/kaiser-align-addition-to-x86-mm-makefile.patch
queue-4.9/kaiser-use-alternative-instead-of-x86_cr3_pcid_noflush.patch
queue-4.9/kaiser-stack-map-page_size-at-thread_size-page_size.patch
queue-4.9/kaiser-name-that-0x1000-kaiser_shadow_pgd_offset.patch
queue-4.9/kaiser-fix-regs-to-do_nmi-ifndef-config_kaiser.patch
queue-4.9/kaiser-do-not-set-_page_nx-on-pgd_none.patch
queue-4.9/kaiser-tidied-up-asm-kaiser.h-somewhat.patch
queue-4.9/kaiser-cleanups-while-trying-for-gold-link.patch
queue-4.9/kaiser-tidied-up-kaiser_add-remove_mapping-slightly.patch
queue-4.9/kaiser-fix-build-and-fixme-in-alloc_ldt_struct.patch
queue-4.9/kaiser-kernel-address-isolation.patch
queue-4.9/kaiser-enomem-if-kaiser_pagetable_walk-null.patch
queue-4.9/kaiser-asm-tlbflush.h-handle-nopge-at-lower-level.patch
queue-4.9/kaiser-paranoid_entry-pass-cr3-need-to-paranoid_exit.patch
queue-4.9/kaiser-kaiser-depends-on-smp.patch
queue-4.9/kaiser-pcid-0-for-kernel-and-128-for-user.patch
This is a note to let you know that I've just added the patch titled
kaiser: enhanced by kernel and user PCIDs
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
kaiser-enhanced-by-kernel-and-user-pcids.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Wed Jan 3 20:37:21 CET 2018
From: Hugh Dickins <hughd(a)google.com>
Date: Wed, 30 Aug 2017 16:23:00 -0700
Subject: kaiser: enhanced by kernel and user PCIDs
From: Hugh Dickins <hughd(a)google.com>
Merged performance improvements to Kaiser, using distinct kernel
and user Process Context Identifiers to minimize the TLB flushing.
[This work actually all from Dave Hansen 2017-08-30:
still omitting trackswitch mods, and KAISER_REAL_SWITCH deleted.]
Signed-off-by: Hugh Dickins <hughd(a)google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/entry/entry_64.S | 10 ++++-
arch/x86/entry/entry_64_compat.S | 1
arch/x86/include/asm/cpufeatures.h | 1
arch/x86/include/asm/kaiser.h | 15 ++++++-
arch/x86/include/asm/pgtable_types.h | 26 +++++++++++++
arch/x86/include/asm/tlbflush.h | 54 +++++++++++++++++++++++-----
arch/x86/include/uapi/asm/processor-flags.h | 3 +
arch/x86/kernel/cpu/common.c | 34 +++++++++++++++++
arch/x86/kvm/x86.c | 3 +
arch/x86/mm/kaiser.c | 7 +++
arch/x86/mm/tlb.c | 46 ++++++++++++++++++++++-
11 files changed, 182 insertions(+), 18 deletions(-)
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1317,7 +1317,10 @@ ENTRY(nmi)
/* %rax is saved above, so OK to clobber here */
movq %cr3, %rax
pushq %rax
- andq $(~KAISER_SHADOW_PGD_OFFSET), %rax
+ /* mask off "user" bit of pgd address and 12 PCID bits: */
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+ /* Add back kernel PCID and "no flush" bit */
+ orq X86_CR3_PCID_KERN_VAR, %rax
movq %rax, %cr3
#endif
call do_nmi
@@ -1558,7 +1561,10 @@ end_repeat_nmi:
/* %rax is saved above, so OK to clobber here */
movq %cr3, %rax
pushq %rax
- andq $(~KAISER_SHADOW_PGD_OFFSET), %rax
+ /* mask off "user" bit of pgd address and 12 PCID bits: */
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+ /* Add back kernel PCID and "no flush" bit */
+ orq X86_CR3_PCID_KERN_VAR, %rax
movq %rax, %cr3
#endif
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -13,6 +13,7 @@
#include <asm/irqflags.h>
#include <asm/asm.h>
#include <asm/smap.h>
+#include <asm/pgtable_types.h>
#include <asm/kaiser.h>
#include <linux/linkage.h>
#include <linux/err.h>
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -189,6 +189,7 @@
#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
+#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 4) /* Effectively INVPCID && CR4.PCIDE=1 */
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
--- a/arch/x86/include/asm/kaiser.h
+++ b/arch/x86/include/asm/kaiser.h
@@ -1,5 +1,8 @@
#ifndef _ASM_X86_KAISER_H
#define _ASM_X86_KAISER_H
+
+#include <uapi/asm/processor-flags.h> /* For PCID constants */
+
/*
* This file includes the definitions for the KAISER feature.
* KAISER is a counter measure against x86_64 side channel attacks on
@@ -21,13 +24,21 @@
.macro _SWITCH_TO_KERNEL_CR3 reg
movq %cr3, \reg
-andq $(~KAISER_SHADOW_PGD_OFFSET), \reg
+andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg
+orq X86_CR3_PCID_KERN_VAR, \reg
movq \reg, %cr3
.endm
.macro _SWITCH_TO_USER_CR3 reg
movq %cr3, \reg
-orq $(KAISER_SHADOW_PGD_OFFSET), \reg
+andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg
+/*
+ * This can obviously be one instruction by putting the
+ * KAISER_SHADOW_PGD_OFFSET bit in the X86_CR3_PCID_USER_VAR.
+ * But, just leave it now for simplicity.
+ */
+orq X86_CR3_PCID_USER_VAR, \reg
+orq $(KAISER_SHADOW_PGD_OFFSET), \reg
movq \reg, %cr3
.endm
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -141,6 +141,32 @@
_PAGE_SOFT_DIRTY)
#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
+/* The ASID is the lower 12 bits of CR3 */
+#define X86_CR3_PCID_ASID_MASK (_AC((1<<12)-1,UL))
+
+/* Mask for all the PCID-related bits in CR3: */
+#define X86_CR3_PCID_MASK (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_MASK)
+#if defined(CONFIG_KAISER) && defined(CONFIG_X86_64)
+#define X86_CR3_PCID_ASID_KERN (_AC(0x4,UL))
+#define X86_CR3_PCID_ASID_USER (_AC(0x6,UL))
+
+#define X86_CR3_PCID_KERN_FLUSH (X86_CR3_PCID_ASID_KERN)
+#define X86_CR3_PCID_USER_FLUSH (X86_CR3_PCID_ASID_USER)
+#define X86_CR3_PCID_KERN_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_KERN)
+#define X86_CR3_PCID_USER_NOFLUSH (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_USER)
+#else
+#define X86_CR3_PCID_ASID_KERN (_AC(0x0,UL))
+#define X86_CR3_PCID_ASID_USER (_AC(0x0,UL))
+/*
+ * PCIDs are unsupported on 32-bit and none of these bits can be
+ * set in CR3:
+ */
+#define X86_CR3_PCID_KERN_FLUSH (0)
+#define X86_CR3_PCID_USER_FLUSH (0)
+#define X86_CR3_PCID_KERN_NOFLUSH (0)
+#define X86_CR3_PCID_USER_NOFLUSH (0)
+#endif
+
/*
* The cache modes defined here are used to translate between pure SW usage
* and the HW defined cache mode bits and/or PAT entries.
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -13,7 +13,6 @@ static inline void __invpcid(unsigned lo
unsigned long type)
{
struct { u64 d[2]; } desc = { { pcid, addr } };
-
/*
* The memory clobber is because the whole point is to invalidate
* stale TLB entries and, especially if we're flushing global
@@ -134,14 +133,25 @@ static inline void cr4_set_bits_and_upda
static inline void __native_flush_tlb(void)
{
+ if (!cpu_feature_enabled(X86_FEATURE_INVPCID)) {
+ /*
+ * If current->mm == NULL then we borrow a mm which may change during a
+ * task switch and therefore we must not be preempted while we write CR3
+ * back:
+ */
+ preempt_disable();
+ native_write_cr3(native_read_cr3());
+ preempt_enable();
+ return;
+ }
/*
- * If current->mm == NULL then we borrow a mm which may change during a
- * task switch and therefore we must not be preempted while we write CR3
- * back:
- */
- preempt_disable();
- native_write_cr3(native_read_cr3());
- preempt_enable();
+ * We are no longer using globals with KAISER, so a
+ * "nonglobals" flush would work too. But, this is more
+ * conservative.
+ *
+ * Note, this works with CR4.PCIDE=0 or 1.
+ */
+ invpcid_flush_all();
}
static inline void __native_flush_tlb_global_irq_disabled(void)
@@ -163,6 +173,8 @@ static inline void __native_flush_tlb_gl
/*
* Using INVPCID is considerably faster than a pair of writes
* to CR4 sandwiched inside an IRQ flag save/restore.
+ *
+ * Note, this works with CR4.PCIDE=0 or 1.
*/
invpcid_flush_all();
return;
@@ -182,7 +194,31 @@ static inline void __native_flush_tlb_gl
static inline void __native_flush_tlb_single(unsigned long addr)
{
- asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
+ /*
+ * SIMICS #GP's if you run INVPCID with type 2/3
+ * and X86_CR4_PCIDE clear. Shame!
+ *
+ * The ASIDs used below are hard-coded. But, we must not
+ * call invpcid(type=1/2) before CR4.PCIDE=1. Just call
+ * invpcid in the case we are called early.
+ */
+ if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) {
+ asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
+ return;
+ }
+ /* Flush the address out of both PCIDs. */
+ /*
+ * An optimization here might be to determine addresses
+ * that are only kernel-mapped and only flush the kernel
+ * ASID. But, userspace flushes are probably much more
+ * important performance-wise.
+ *
+ * Make sure to do only a single invpcid when KAISER is
+ * disabled and we have only a single ASID.
+ */
+ if (X86_CR3_PCID_ASID_KERN != X86_CR3_PCID_ASID_USER)
+ invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr);
+ invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr);
}
static inline void __flush_tlb_all(void)
--- a/arch/x86/include/uapi/asm/processor-flags.h
+++ b/arch/x86/include/uapi/asm/processor-flags.h
@@ -77,7 +77,8 @@
#define X86_CR3_PWT _BITUL(X86_CR3_PWT_BIT)
#define X86_CR3_PCD_BIT 4 /* Page Cache Disable */
#define X86_CR3_PCD _BITUL(X86_CR3_PCD_BIT)
-#define X86_CR3_PCID_MASK _AC(0x00000fff,UL) /* PCID Mask */
+#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */
+#define X86_CR3_PCID_NOFLUSH _BITULL(X86_CR3_PCID_NOFLUSH_BIT)
/*
* Intel CPU features in CR4
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -324,11 +324,45 @@ static __always_inline void setup_smap(s
}
}
+/*
+ * These can have bit 63 set, so we can not just use a plain "or"
+ * instruction to get their value or'd into CR3. It would take
+ * another register. So, we use a memory reference to these
+ * instead.
+ *
+ * This is also handy because systems that do not support
+ * PCIDs just end up or'ing a 0 into their CR3, which does
+ * no harm.
+ */
+__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_KERN_VAR = 0;
+__aligned(PAGE_SIZE) unsigned long X86_CR3_PCID_USER_VAR = 0;
+
static void setup_pcid(struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_PCID)) {
if (cpu_has(c, X86_FEATURE_PGE)) {
cr4_set_bits(X86_CR4_PCIDE);
+ /*
+ * These variables are used by the entry/exit
+ * code to change PCIDs.
+ */
+#ifdef CONFIG_KAISER
+ X86_CR3_PCID_KERN_VAR = X86_CR3_PCID_KERN_NOFLUSH;
+ X86_CR3_PCID_USER_VAR = X86_CR3_PCID_USER_NOFLUSH;
+#endif
+ /*
+ * INVPCID has two "groups" of types:
+ * 1/2: Invalidate an individual address
+ * 3/4: Invalidate all contexts
+ *
+ * 1/2 take a PCID, but 3/4 do not. So, 3/4
+ * ignore the PCID argument in the descriptor.
+ * But, we have to be careful not to call 1/2
+ * with an actual non-zero PCID in them before
+ * we do the above cr4_set_bits().
+ */
+ if (cpu_has(c, X86_FEATURE_INVPCID))
+ set_cpu_cap(c, X86_FEATURE_INVPCID_SINGLE);
} else {
/*
* flush_tlb_all(), as currently implemented, won't
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -773,7 +773,8 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, u
return 1;
/* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */
- if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
+ if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_ASID_MASK) ||
+ !is_long_mode(vcpu))
return 1;
}
--- a/arch/x86/mm/kaiser.c
+++ b/arch/x86/mm/kaiser.c
@@ -239,6 +239,8 @@ static void __init kaiser_init_all_pgds(
} while (0)
extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
+extern unsigned long X86_CR3_PCID_KERN_VAR;
+extern unsigned long X86_CR3_PCID_USER_VAR;
/*
* If anything in here fails, we will likely die on one of the
* first kernel->user transitions and init will die. But, we
@@ -289,6 +291,11 @@ void __init kaiser_init(void)
kaiser_add_user_map_early(&debug_idt_table,
sizeof(gate_desc) * NR_VECTORS,
__PAGE_KERNEL);
+
+ kaiser_add_user_map_early(&X86_CR3_PCID_KERN_VAR, PAGE_SIZE,
+ __PAGE_KERNEL);
+ kaiser_add_user_map_early(&X86_CR3_PCID_USER_VAR, PAGE_SIZE,
+ __PAGE_KERNEL);
}
/* Add a mapping to the shadow mapping, and synchronize the mappings */
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -34,6 +34,46 @@ struct flush_tlb_info {
unsigned long flush_end;
};
+static void load_new_mm_cr3(pgd_t *pgdir)
+{
+ unsigned long new_mm_cr3 = __pa(pgdir);
+
+ /*
+ * KAISER, plus PCIDs needs some extra work here. But,
+ * if either of features is not present, we need no
+ * PCIDs here and just do a normal, full TLB flush with
+ * the write_cr3()
+ */
+ if (!IS_ENABLED(CONFIG_KAISER) ||
+ !cpu_feature_enabled(X86_FEATURE_PCID))
+ goto out_set_cr3;
+ /*
+ * We reuse the same PCID for different tasks, so we must
+ * flush all the entires for the PCID out when we change
+ * tasks.
+ */
+ new_mm_cr3 = X86_CR3_PCID_KERN_FLUSH | __pa(pgdir);
+
+ /*
+ * The flush from load_cr3() may leave old TLB entries
+ * for userspace in place. We must flush that context
+ * separately. We can theoretically delay doing this
+ * until we actually load up the userspace CR3, but
+ * that's a bit tricky. We have to have the "need to
+ * flush userspace PCID" bit per-cpu and check it in the
+ * exit-to-userspace paths.
+ */
+ invpcid_flush_single_context(X86_CR3_PCID_ASID_USER);
+
+out_set_cr3:
+ /*
+ * Caution: many callers of this function expect
+ * that load_cr3() is serializing and orders TLB
+ * fills with respect to the mm_cpumask writes.
+ */
+ write_cr3(new_mm_cr3);
+}
+
/*
* We cannot call mmdrop() because we are in interrupt context,
* instead update mm->cpu_vm_mask.
@@ -45,7 +85,7 @@ void leave_mm(int cpu)
BUG();
if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) {
cpumask_clear_cpu(cpu, mm_cpumask(active_mm));
- load_cr3(swapper_pg_dir);
+ load_new_mm_cr3(swapper_pg_dir);
/*
* This gets called in the idle path where RCU
* functions differently. Tracing normally
@@ -120,7 +160,7 @@ void switch_mm_irqs_off(struct mm_struct
* ordering guarantee we need.
*
*/
- load_cr3(next->pgd);
+ load_new_mm_cr3(next->pgd);
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
@@ -167,7 +207,7 @@ void switch_mm_irqs_off(struct mm_struct
* As above, load_cr3() is serializing and orders TLB
* fills with respect to the mm_cpumask write.
*/
- load_cr3(next->pgd);
+ load_new_mm_cr3(next->pgd);
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
load_mm_cr4(next);
load_mm_ldt(next);
Patches currently in stable-queue which might be from hughd(a)google.com are
queue-4.9/kaiser-vmstat-show-nr_kaisertable-as-nr_overhead.patch
queue-4.9/kaiser-add-nokaiser-boot-option-using-alternative.patch
queue-4.9/kaiser-fix-unlikely-error-in-alloc_ldt_struct.patch
queue-4.9/kaiser-kaiser_flush_tlb_on_return_to_user-check-pcid.patch
queue-4.9/x86-paravirt-dont-patch-flush_tlb_single.patch
queue-4.9/kaiser-merged-update.patch
queue-4.9/kaiser-delete-kaiser_real_switch-option.patch
queue-4.9/kaiser-kaiser_remove_mapping-move-along-the-pgd.patch
queue-4.9/kaiser-fix-perf-crashes.patch
queue-4.9/kaiser-drop-is_atomic-arg-to-kaiser_pagetable_walk.patch
queue-4.9/kaiser-load_new_mm_cr3-let-switch_user_cr3-flush-user.patch
queue-4.9/kaiser-enhanced-by-kernel-and-user-pcids.patch
queue-4.9/kaiser-x86_cr3_pcid_noflush-and-x86_cr3_pcid_user.patch
queue-4.9/kaiser-align-addition-to-x86-mm-makefile.patch
queue-4.9/kaiser-use-alternative-instead-of-x86_cr3_pcid_noflush.patch
queue-4.9/kaiser-stack-map-page_size-at-thread_size-page_size.patch
queue-4.9/kaiser-name-that-0x1000-kaiser_shadow_pgd_offset.patch
queue-4.9/kaiser-fix-regs-to-do_nmi-ifndef-config_kaiser.patch
queue-4.9/kaiser-do-not-set-_page_nx-on-pgd_none.patch
queue-4.9/kaiser-tidied-up-asm-kaiser.h-somewhat.patch
queue-4.9/kaiser-cleanups-while-trying-for-gold-link.patch
queue-4.9/kaiser-tidied-up-kaiser_add-remove_mapping-slightly.patch
queue-4.9/kaiser-fix-build-and-fixme-in-alloc_ldt_struct.patch
queue-4.9/kaiser-kernel-address-isolation.patch
queue-4.9/kaiser-enomem-if-kaiser_pagetable_walk-null.patch
queue-4.9/kaiser-asm-tlbflush.h-handle-nopge-at-lower-level.patch
queue-4.9/kaiser-paranoid_entry-pass-cr3-need-to-paranoid_exit.patch
queue-4.9/kaiser-kaiser-depends-on-smp.patch
queue-4.9/kaiser-pcid-0-for-kernel-and-128-for-user.patch