Older Xen versions (4.5 and before) might have problems migrating pv guests with MSR_IA32_SPEC_CTRL having a non-zero value. So before suspending zero that MSR and restore it after being resumed.
Cc: stable@vger.kernel.org Signed-off-by: Juergen Gross jgross@suse.com --- arch/x86/xen/suspend.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+)
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index d9f96cc5d743..1d83152c761b 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -1,12 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/types.h> #include <linux/tick.h> +#include <linux/percpu-defs.h>
#include <xen/xen.h> #include <xen/interface/xen.h> #include <xen/grant_table.h> #include <xen/events.h>
+#include <asm/cpufeatures.h> +#include <asm/msr-index.h> #include <asm/xen/hypercall.h> #include <asm/xen/page.h> #include <asm/fixmap.h> @@ -15,6 +18,8 @@ #include "mmu.h" #include "pmu.h"
+static DEFINE_PER_CPU(u64, spec_ctrl); + void xen_arch_pre_suspend(void) { xen_save_time_memory_area(); @@ -35,6 +40,9 @@ void xen_arch_post_suspend(int cancelled)
static void xen_vcpu_notify_restore(void *data) { + if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL)) + wrmsrl(MSR_IA32_SPEC_CTRL, this_cpu_read(spec_ctrl)); + /* Boot processor notified via generic timekeeping_resume() */ if (smp_processor_id() == 0) return; @@ -44,7 +52,15 @@ static void xen_vcpu_notify_restore(void *data)
static void xen_vcpu_notify_suspend(void *data) { + u64 tmp; + tick_suspend_local(); + + if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL)) { + rdmsrl(MSR_IA32_SPEC_CTRL, tmp); + this_cpu_write(spec_ctrl, tmp); + wrmsrl(MSR_IA32_SPEC_CTRL, 0); + } }
void xen_arch_resume(void)
On 26.02.18 at 15:08, jgross@suse.com wrote:
Older Xen versions (4.5 and before) might have problems migrating pv guests with MSR_IA32_SPEC_CTRL having a non-zero value. So before suspending zero that MSR and restore it after being resumed.
Cc: stable@vger.kernel.org Signed-off-by: Juergen Gross jgross@suse.com
Reviewed-by: Jan Beulich jbeulich@suse.com
On 26.02.18 at 15:08, jgross@suse.com wrote:
@@ -35,6 +40,9 @@ void xen_arch_post_suspend(int cancelled) static void xen_vcpu_notify_restore(void *data) {
- if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL))
wrmsrl(MSR_IA32_SPEC_CTRL, this_cpu_read(spec_ctrl));
- /* Boot processor notified via generic timekeeping_resume() */ if (smp_processor_id() == 0) return;
@@ -44,7 +52,15 @@ static void xen_vcpu_notify_restore(void *data) static void xen_vcpu_notify_suspend(void *data) {
- u64 tmp;
- tick_suspend_local();
- if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL)) {
rdmsrl(MSR_IA32_SPEC_CTRL, tmp);
this_cpu_write(spec_ctrl, tmp);
wrmsrl(MSR_IA32_SPEC_CTRL, 0);
- }
}
While investigating ways how to do something similar on our old, non-pvops kernels I've started wondering if this solution is actually correct in all cases. Of course discussing this is complicated by the fact that the change there might be a conflict with hasn't landed in Linus'es tree yet (see e.g. https://patchwork.kernel.org/patch/10153843/ for an upstream submission; I haven't been able to find any discussion on that patch or why it isn't upstream yet), but we have it in our various branches. The potential problem I'm seeing is with the clearing and re-setting of SPEC_CTRL around CPUs going idle. While the active CPU could have preemption disabled (if that isn't the case already), the passive CPUs are - afaict - neither under full control of drivers/xen/manage.c:do_suspend() nor excluded yet from any further scheduling activity. Hence with code like this (taken from one of our branches)
static void mwait_idle(void) { if (!current_set_polling_and_test()) { trace_cpu_idle_rcuidle(1, smp_processor_id()); if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) { smp_mb(); /* quirk */ clflush((void *)¤t_thread_info()->flags); smp_mb(); /* quirk */ }
x86_disable_ibrs();
__monitor((void *)¤t_thread_info()->flags, 0, 0); if (!need_resched()) __sti_mwait(0, 0); else local_irq_enable();
x86_enable_ibrs(); ...
the MSR might get set to non-zero again after having been cleared by the code your patch adds. I therefore think that the only race free solution would be to do the clearing from stop-machine context. But maybe I'm overlooking something.
Jan
linux-stable-mirror@lists.linaro.org