From: Thomas Gleixner <tglx(a)linutronix.de>
commit 356e4bfff2c5489e016fdb925adbf12a1e3950ee upstream
For certain use cases it is desired to enforce mitigations so they cannot
be undone afterwards. That's important for loader stubs which want to
prevent a child from disabling the mitigation again. Will also be used for
seccomp(). The extra state preserving of the prctl state for SSB is a
preparatory step for EBPF dymanic speculation control.
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Signed-off-by: David Woodhouse <dwmw(a)amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa(a)csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley(a)gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov(a)vmware.com>
Reviewed-by: Bo Gan <ganb(a)vmware.com>
---
Documentation/spec_ctrl.txt | 34 +++++++++++++++++++++-------------
arch/x86/kernel/cpu/bugs.c | 35 +++++++++++++++++++++++++----------
fs/proc/array.c | 3 +++
include/linux/sched.h | 9 +++++++++
include/uapi/linux/prctl.h | 1 +
5 files changed, 59 insertions(+), 23 deletions(-)
diff --git a/Documentation/spec_ctrl.txt b/Documentation/spec_ctrl.txt
index ddbebcd..1b3690d 100644
--- a/Documentation/spec_ctrl.txt
+++ b/Documentation/spec_ctrl.txt
@@ -25,19 +25,21 @@ PR_GET_SPECULATION_CTRL
-----------------------
PR_GET_SPECULATION_CTRL returns the state of the speculation misfeature
-which is selected with arg2 of prctl(2). The return value uses bits 0-2 with
+which is selected with arg2 of prctl(2). The return value uses bits 0-3 with
the following meaning:
-==== ================ ===================================================
-Bit Define Description
-==== ================ ===================================================
-0 PR_SPEC_PRCTL Mitigation can be controlled per task by
- PR_SET_SPECULATION_CTRL
-1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is
- disabled
-2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is
- enabled
-==== ================ ===================================================
+==== ===================== ===================================================
+Bit Define Description
+==== ===================== ===================================================
+0 PR_SPEC_PRCTL Mitigation can be controlled per task by
+ PR_SET_SPECULATION_CTRL
+1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is
+ disabled
+2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is
+ enabled
+3 PR_SPEC_FORCE_DISABLE Same as PR_SPEC_DISABLE, but cannot be undone. A
+ subsequent prctl(..., PR_SPEC_ENABLE) will fail.
+==== ===================== ===================================================
If all bits are 0 the CPU is not affected by the speculation misfeature.
@@ -47,9 +49,11 @@ misfeature will fail.
PR_SET_SPECULATION_CTRL
-----------------------
+
PR_SET_SPECULATION_CTRL allows to control the speculation misfeature, which
is selected by arg2 of :manpage:`prctl(2)` per task. arg3 is used to hand
-in the control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE.
+in the control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE or
+PR_SPEC_FORCE_DISABLE.
Common error codes
------------------
@@ -70,10 +74,13 @@ Value Meaning
0 Success
ERANGE arg3 is incorrect, i.e. it's neither PR_SPEC_ENABLE nor
- PR_SPEC_DISABLE
+ PR_SPEC_DISABLE nor PR_SPEC_FORCE_DISABLE
ENXIO Control of the selected speculation misfeature is not possible.
See PR_GET_SPECULATION_CTRL.
+
+EPERM Speculation was disabled with PR_SPEC_FORCE_DISABLE and caller
+ tried to enable it again.
======= =================================================================
Speculation misfeature controls
@@ -84,3 +91,4 @@ Speculation misfeature controls
* prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, 0, 0, 0);
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0);
* prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0);
+ * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 64b54a4..d6897ca 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -531,21 +531,37 @@ static void ssb_select_mitigation()
static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
{
- bool rds = !!test_tsk_thread_flag(task, TIF_RDS);
+ bool update;
if (ssb_mode != SPEC_STORE_BYPASS_PRCTL)
return -ENXIO;
- if (ctrl == PR_SPEC_ENABLE)
- clear_tsk_thread_flag(task, TIF_RDS);
- else
- set_tsk_thread_flag(task, TIF_RDS);
+ switch (ctrl) {
+ case PR_SPEC_ENABLE:
+ /* If speculation is force disabled, enable is not allowed */
+ if (task_spec_ssb_force_disable(task))
+ return -EPERM;
+ task_clear_spec_ssb_disable(task);
+ update = test_and_clear_tsk_thread_flag(task, TIF_RDS);
+ break;
+ case PR_SPEC_DISABLE:
+ task_set_spec_ssb_disable(task);
+ update = !test_and_set_tsk_thread_flag(task, TIF_RDS);
+ break;
+ case PR_SPEC_FORCE_DISABLE:
+ task_set_spec_ssb_disable(task);
+ task_set_spec_ssb_force_disable(task);
+ update = !test_and_set_tsk_thread_flag(task, TIF_RDS);
+ break;
+ default:
+ return -ERANGE;
+ }
/*
* If being set on non-current task, delay setting the CPU
* mitigation until it is next scheduled.
*/
- if (task == current && rds != !!test_tsk_thread_flag(task, TIF_RDS))
+ if (task == current && update)
speculative_store_bypass_update();
return 0;
@@ -557,7 +573,9 @@ static int ssb_prctl_get(struct task_struct *task)
case SPEC_STORE_BYPASS_DISABLE:
return PR_SPEC_DISABLE;
case SPEC_STORE_BYPASS_PRCTL:
- if (test_tsk_thread_flag(task, TIF_RDS))
+ if (task_spec_ssb_force_disable(task))
+ return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
+ if (task_spec_ssb_disable(task))
return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
default:
@@ -570,9 +588,6 @@ static int ssb_prctl_get(struct task_struct *task)
int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
unsigned long ctrl)
{
- if (ctrl != PR_SPEC_ENABLE && ctrl != PR_SPEC_DISABLE)
- return -ERANGE;
-
switch (which) {
case PR_SPEC_STORE_BYPASS:
return ssb_prctl_set(task, ctrl);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index bb48358..3141478 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -341,6 +341,9 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
case PR_SPEC_NOT_AFFECTED:
seq_printf(m, "not vulnerable");
break;
+ case PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE:
+ seq_printf(m, "thread force mitigated");
+ break;
case PR_SPEC_PRCTL | PR_SPEC_DISABLE:
seq_printf(m, "thread mitigated");
break;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 90bea39..725498c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2167,6 +2167,8 @@ static inline void memalloc_noio_restore(unsigned int flags)
#define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */
#define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */
#define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */
+#define PFA_SPEC_SSB_DISABLE 4 /* Speculative Store Bypass disabled */
+#define PFA_SPEC_SSB_FORCE_DISABLE 5 /* Speculative Store Bypass force disabled*/
#define TASK_PFA_TEST(name, func) \
@@ -2190,6 +2192,13 @@ TASK_PFA_TEST(SPREAD_SLAB, spread_slab)
TASK_PFA_SET(SPREAD_SLAB, spread_slab)
TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
+TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable)
+TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable)
+TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)
+
+TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
+TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
+
/*
* task->jobctl flags
*/
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 3b316be..64776b7 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -207,5 +207,6 @@ struct prctl_mm_map {
# define PR_SPEC_PRCTL (1UL << 0)
# define PR_SPEC_ENABLE (1UL << 1)
# define PR_SPEC_DISABLE (1UL << 2)
+# define PR_SPEC_FORCE_DISABLE (1UL << 3)
#endif /* _LINUX_PRCTL_H */
From: Thomas Gleixner <tglx(a)linutronix.de>
commit a73ec77ee17ec556fe7f165d00314cb7c047b1ac upstream
Add prctl based control for Speculative Store Bypass mitigation and make it
the default mitigation for Intel and AMD.
Andi Kleen provided the following rationale (slightly redacted):
There are multiple levels of impact of Speculative Store Bypass:
1) JITed sandbox.
It cannot invoke system calls, but can do PRIME+PROBE and may have call
interfaces to other code
2) Native code process.
No protection inside the process at this level.
3) Kernel.
4) Between processes.
The prctl tries to protect against case (1) doing attacks.
If the untrusted code can do random system calls then control is already
lost in a much worse way. So there needs to be system call protection in
some way (using a JIT not allowing them or seccomp). Or rather if the
process can subvert its environment somehow to do the prctl it can already
execute arbitrary code, which is much worse than SSB.
To put it differently, the point of the prctl is to not allow JITed code
to read data it shouldn't read from its JITed sandbox. If it already has
escaped its sandbox then it can already read everything it wants in its
address space, and do much worse.
The ability to control Speculative Store Bypass allows to enable the
protection selectively without affecting overall system performance.
Based on an initial patch from Tim Chen. Completely rewritten.
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk(a)oracle.com>
Signed-off-by: David Woodhouse <dwmw(a)amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa(a)csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley(a)gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov(a)vmware.com>
Reviewed-by: Bo Gan <ganb(a)vmware.com>
---
Documentation/kernel-parameters.txt | 6 ++
arch/x86/include/asm/nospec-branch.h | 1
arch/x86/kernel/cpu/bugs.c | 83 ++++++++++++++++++++++++++++++----
3 files changed, 79 insertions(+), 11 deletions(-)
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index dc138b8..80202de 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3651,7 +3651,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
off - Unconditionally enable Speculative Store Bypass
auto - Kernel detects whether the CPU model contains an
implementation of Speculative Store Bypass and
- picks the most appropriate mitigation
+ picks the most appropriate mitigation.
+ prctl - Control Speculative Store Bypass per thread
+ via prctl. Speculative Store Bypass is enabled
+ for a process by default. The state of the control
+ is inherited on fork.
Not specifying this option is equivalent to
spec_store_bypass_disable=auto.
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 47c454c..155d955 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -187,6 +187,7 @@ extern u64 x86_spec_ctrl_get_default(void);
enum ssb_mitigation {
SPEC_STORE_BYPASS_NONE,
SPEC_STORE_BYPASS_DISABLE,
+ SPEC_STORE_BYPASS_PRCTL,
};
extern char __indirect_thunk_start[];
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 0f8303e..bcfccd3 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -11,6 +11,8 @@
#include <linux/utsname.h>
#include <linux/cpu.h>
#include <linux/module.h>
+#include <linux/nospec.h>
+#include <linux/prctl.h>
#include <asm/spec-ctrl.h>
#include <asm/cmdline.h>
@@ -411,20 +413,23 @@ enum ssb_mitigation_cmd {
SPEC_STORE_BYPASS_CMD_NONE,
SPEC_STORE_BYPASS_CMD_AUTO,
SPEC_STORE_BYPASS_CMD_ON,
+ SPEC_STORE_BYPASS_CMD_PRCTL,
};
static const char *ssb_strings[] = {
[SPEC_STORE_BYPASS_NONE] = "Vulnerable",
- [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled"
+ [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled",
+ [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl"
};
static const struct {
const char *option;
enum ssb_mitigation_cmd cmd;
} ssb_mitigation_options[] = {
- { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */
- { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */
- { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */
+ { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */
+ { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */
+ { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */
+ { "prctl", SPEC_STORE_BYPASS_CMD_PRCTL }, /* Disable Speculative Store Bypass via prctl */
};
static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void)
@@ -474,14 +479,15 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void)
switch (cmd) {
case SPEC_STORE_BYPASS_CMD_AUTO:
- /*
- * AMD platforms by default don't need SSB mitigation.
- */
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
- break;
+ /* Choose prctl as the default mode */
+ mode = SPEC_STORE_BYPASS_PRCTL;
+ break;
case SPEC_STORE_BYPASS_CMD_ON:
mode = SPEC_STORE_BYPASS_DISABLE;
break;
+ case SPEC_STORE_BYPASS_CMD_PRCTL:
+ mode = SPEC_STORE_BYPASS_PRCTL;
+ break;
case SPEC_STORE_BYPASS_CMD_NONE:
break;
}
@@ -492,7 +498,7 @@ static enum ssb_mitigation_cmd __init __ssb_select_mitigation(void)
* - X86_FEATURE_RDS - CPU is able to turn off speculative store bypass
* - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation
*/
- if (mode != SPEC_STORE_BYPASS_NONE) {
+ if (mode == SPEC_STORE_BYPASS_DISABLE) {
setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE);
/*
* Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses
@@ -523,6 +529,63 @@ static void ssb_select_mitigation()
#undef pr_fmt
+static int ssb_prctl_set(unsigned long ctrl)
+{
+ bool rds = !!test_tsk_thread_flag(current, TIF_RDS);
+
+ if (ssb_mode != SPEC_STORE_BYPASS_PRCTL)
+ return -ENXIO;
+
+ if (ctrl == PR_SPEC_ENABLE)
+ clear_tsk_thread_flag(current, TIF_RDS);
+ else
+ set_tsk_thread_flag(current, TIF_RDS);
+
+ if (rds != !!test_tsk_thread_flag(current, TIF_RDS))
+ speculative_store_bypass_update();
+
+ return 0;
+}
+
+static int ssb_prctl_get(void)
+{
+ switch (ssb_mode) {
+ case SPEC_STORE_BYPASS_DISABLE:
+ return PR_SPEC_DISABLE;
+ case SPEC_STORE_BYPASS_PRCTL:
+ if (test_tsk_thread_flag(current, TIF_RDS))
+ return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
+ return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
+ default:
+ if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
+ return PR_SPEC_ENABLE;
+ return PR_SPEC_NOT_AFFECTED;
+ }
+}
+
+int arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl)
+{
+ if (ctrl != PR_SPEC_ENABLE && ctrl != PR_SPEC_DISABLE)
+ return -ERANGE;
+
+ switch (which) {
+ case PR_SPEC_STORE_BYPASS:
+ return ssb_prctl_set(ctrl);
+ default:
+ return -ENODEV;
+ }
+}
+
+int arch_prctl_spec_ctrl_get(unsigned long which)
+{
+ switch (which) {
+ case PR_SPEC_STORE_BYPASS:
+ return ssb_prctl_get();
+ default:
+ return -ENODEV;
+ }
+}
+
void x86_spec_ctrl_setup_ap(void)
{
if (boot_cpu_has(X86_FEATURE_IBRS))
From: Kyle Huey <me(a)kylehuey.com>
commit b9894a2f5bd18b1691cb6872c9afe32b148d0132 upstream
The debug control MSR is "highly magical" as the blockstep bit can be
cleared by hardware under not well documented circumstances.
So a task switch relying on the bit set by the previous task (according to
the previous tasks thread flags) can trip over this and not update the flag
for the next task.
To fix this its required to handle DEBUGCTLMSR_BTF when either the previous
or the next or both tasks have the TIF_BLOCKSTEP flag set.
While at it avoid branching within the TIF_BLOCKSTEP case and evaluating
boot_cpu_data twice in kernels without CONFIG_X86_DEBUGCTLMSR.
x86_64: arch/x86/kernel/process.o
text data bss dec hex
3024 8577 16 11617 2d61 Before
3008 8577 16 11601 2d51 After
i386: No change
[ tglx: Made the shift value explicit, use a local variable to make the
code readable and massaged changelog]
Originally-by: Thomas Gleixner <tglx(a)linutronix.de>
Signed-off-by: Kyle Huey <khuey(a)kylehuey.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Andy Lutomirski <luto(a)kernel.org>
Link: http://lkml.kernel.org/r/20170214081104.9244-3-khuey@kylehuey.com
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Signed-off-by: David Woodhouse <dwmw(a)amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa(a)csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley(a)gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov(a)vmware.com>
Reviewed-by: Bo Gan <ganb(a)vmware.com>
---
arch/x86/include/asm/msr-index.h | 1 +
arch/x86/kernel/process.c | 12 +++++++-----
2 files changed, 8 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index a29edb7..71a2c84 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -150,6 +150,7 @@
/* DEBUGCTLMSR bits (others vary by model): */
#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */
+#define DEBUGCTLMSR_BTF_SHIFT 1
#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */
#define DEBUGCTLMSR_TR (1UL << 6)
#define DEBUGCTLMSR_BTS (1UL << 7)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index cc0f288..166aef3 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -223,13 +223,15 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
propagate_user_return_notify(prev_p, next_p);
- if ((tifp ^ tifn) & _TIF_BLOCKSTEP) {
- unsigned long debugctl = get_debugctlmsr();
+ if ((tifp & _TIF_BLOCKSTEP || tifn & _TIF_BLOCKSTEP) &&
+ arch_has_block_step()) {
+ unsigned long debugctl, msk;
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
debugctl &= ~DEBUGCTLMSR_BTF;
- if (tifn & _TIF_BLOCKSTEP)
- debugctl |= DEBUGCTLMSR_BTF;
- update_debugctlmsr(debugctl);
+ msk = tifn & _TIF_BLOCKSTEP;
+ debugctl |= (msk >> TIF_BLOCKSTEP) << DEBUGCTLMSR_BTF_SHIFT;
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
}
if ((tifp ^ tifn) & _TIF_NOTSC) {