The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to stable@vger.kernel.org.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x a0ee1d5faff135e28810f29e0f06328c66f89852 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to 'stable@vger.kernel.org' --in-reply-to '2025062034-chastise-wrecking-9a12@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a0ee1d5faff135e28810f29e0f06328c66f89852 Mon Sep 17 00:00:00 2001 From: Chao Gao chao.gao@intel.com Date: Mon, 24 Mar 2025 22:08:48 +0800 Subject: [PATCH] KVM: VMX: Flush shadow VMCS on emergency reboot
Ensure the shadow VMCS cache is evicted during an emergency reboot to prevent potential memory corruption if the cache is evicted after reboot.
This issue was identified through code inspection, as __loaded_vmcs_clear() flushes both the normal VMCS and the shadow VMCS.
Avoid checking the "launched" state during an emergency reboot, unlike the behavior in __loaded_vmcs_clear(). This is important because reboot NMIs can interfere with operations like copy_shadow_to_vmcs12(), where shadow VMCSes are loaded directly using VMPTRLD. In such cases, if NMIs occur right after the VMCS load, the shadow VMCSes will be active but the "launched" state may not be set.
Fixes: 16f5b9034b69 ("KVM: nVMX: Copy processor-specific shadow-vmcs to VMCS12") Cc: stable@vger.kernel.org Signed-off-by: Chao Gao chao.gao@intel.com Reviewed-by: Kai Huang kai.huang@intel.com Link: https://lore.kernel.org/r/20250324140849.2099723-1-chao.gao@intel.com Signed-off-by: Sean Christopherson seanjc@google.com
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index ef2d7208dd20..848c4963bdb8 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -770,8 +770,11 @@ void vmx_emergency_disable_virtualization_cpu(void) return;
list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu), - loaded_vmcss_on_cpu_link) + loaded_vmcss_on_cpu_link) { vmcs_clear(v->vmcs); + if (v->shadow_vmcs) + vmcs_clear(v->shadow_vmcs); + }
kvm_cpu_vmxoff(); }
From: Sean Christopherson seanjc@google.com
[ Upstream commit 5e408396c60cd0f0b53a43713016b6d6af8d69e0 ]
Provide dedicated helpers to (un)register virt hooks used during an emergency crash/reboot, and WARN if there is an attempt to overwrite the registered callback, or an attempt to do an unpaired unregister.
Opportunsitically use rcu_assign_pointer() instead of RCU_INIT_POINTER(), mainly so that the set/unset paths are more symmetrical, but also because any performance gains from using RCU_INIT_POINTER() are meaningless for this code.
Reviewed-by: Kai Huang kai.huang@intel.com Link: https://lore.kernel.org/r/20230721201859.2307736-3-seanjc@google.com Signed-off-by: Sean Christopherson seanjc@google.com Stable-dep-of: a0ee1d5faff1 ("KVM: VMX: Flush shadow VMCS on emergency reboot") Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/include/asm/reboot.h | 5 +++-- arch/x86/kernel/reboot.c | 30 ++++++++++++++++++++++++------ arch/x86/kvm/vmx/vmx.c | 6 ++---- 3 files changed, 29 insertions(+), 12 deletions(-)
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h index 2551baec927d2..d9a38d379d182 100644 --- a/arch/x86/include/asm/reboot.h +++ b/arch/x86/include/asm/reboot.h @@ -25,8 +25,9 @@ void __noreturn machine_real_restart(unsigned int type); #define MRR_BIOS 0 #define MRR_APM 1
-typedef void crash_vmclear_fn(void); -extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss; +typedef void (cpu_emergency_virt_cb)(void); +void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback); +void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback); void cpu_emergency_disable_virtualization(void);
typedef void (*nmi_shootdown_cb)(int, struct pt_regs*); diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index d9dbcd1cf75f8..635995e7a704a 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -794,17 +794,35 @@ void machine_crash_shutdown(struct pt_regs *regs) * * protected by rcu. */ -crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss; -EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss); +static cpu_emergency_virt_cb __rcu *cpu_emergency_virt_callback; + +void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback) +{ + if (WARN_ON_ONCE(rcu_access_pointer(cpu_emergency_virt_callback))) + return; + + rcu_assign_pointer(cpu_emergency_virt_callback, callback); +} +EXPORT_SYMBOL_GPL(cpu_emergency_register_virt_callback); + +void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback) +{ + if (WARN_ON_ONCE(rcu_access_pointer(cpu_emergency_virt_callback) != callback)) + return; + + rcu_assign_pointer(cpu_emergency_virt_callback, NULL); + synchronize_rcu(); +} +EXPORT_SYMBOL_GPL(cpu_emergency_unregister_virt_callback);
static inline void cpu_crash_vmclear_loaded_vmcss(void) { - crash_vmclear_fn *do_vmclear_operation = NULL; + cpu_emergency_virt_cb *callback;
rcu_read_lock(); - do_vmclear_operation = rcu_dereference(crash_vmclear_loaded_vmcss); - if (do_vmclear_operation) - do_vmclear_operation(); + callback = rcu_dereference(cpu_emergency_virt_callback); + if (callback) + callback(); rcu_read_unlock(); }
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index fbe26b88f7312..aef2f09718b57 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -8554,8 +8554,7 @@ static void __vmx_exit(void) { allow_smaller_maxphyaddr = false;
- RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL); - synchronize_rcu(); + cpu_emergency_unregister_virt_callback(crash_vmclear_local_loaded_vmcss);
vmx_cleanup_l1d_flush(); } @@ -8629,8 +8628,7 @@ static int __init vmx_init(void) pi_init_cpu(cpu); }
- rcu_assign_pointer(crash_vmclear_loaded_vmcss, - crash_vmclear_local_loaded_vmcss); + cpu_emergency_register_virt_callback(crash_vmclear_local_loaded_vmcss);
vmx_check_vmcs12_offsets();
From: Sean Christopherson seanjc@google.com
[ Upstream commit 119b5cb4ffd0166f3e98e9ee042f5046f7744f28 ]
Use KVM VMX's reboot/crash callback to do VMXOFF in an emergency instead of manually and blindly doing VMXOFF. There's no need to attempt VMXOFF if a hypervisor, i.e. KVM, isn't loaded/active, i.e. if the CPU can't possibly be post-VMXON.
Reviewed-by: Kai Huang kai.huang@intel.com Link: https://lore.kernel.org/r/20230721201859.2307736-4-seanjc@google.com Signed-off-by: Sean Christopherson seanjc@google.com Stable-dep-of: a0ee1d5faff1 ("KVM: VMX: Flush shadow VMCS on emergency reboot") Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/include/asm/virtext.h | 10 ---------- arch/x86/kernel/reboot.c | 29 +++++++++-------------------- arch/x86/kvm/vmx/vmx.c | 8 +++++--- 3 files changed, 14 insertions(+), 33 deletions(-)
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h index 724ce44809ed2..1b683bf71d14f 100644 --- a/arch/x86/include/asm/virtext.h +++ b/arch/x86/include/asm/virtext.h @@ -70,16 +70,6 @@ static inline void __cpu_emergency_vmxoff(void) cpu_vmxoff(); }
-/** Disable VMX if it is supported and enabled on the current CPU - */ -static inline void cpu_emergency_vmxoff(void) -{ - if (cpu_has_vmx()) - __cpu_emergency_vmxoff(); -} - - -
/* * SVM functions: diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 635995e7a704a..79e1ac3d0625d 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -787,13 +787,7 @@ void machine_crash_shutdown(struct pt_regs *regs) } #endif
-/* - * This is used to VMCLEAR all VMCSs loaded on the - * processor. And when loading kvm_intel module, the - * callback function pointer will be assigned. - * - * protected by rcu. - */ +/* RCU-protected callback to disable virtualization prior to reboot. */ static cpu_emergency_virt_cb __rcu *cpu_emergency_virt_callback;
void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback) @@ -815,17 +809,6 @@ void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback) } EXPORT_SYMBOL_GPL(cpu_emergency_unregister_virt_callback);
-static inline void cpu_crash_vmclear_loaded_vmcss(void) -{ - cpu_emergency_virt_cb *callback; - - rcu_read_lock(); - callback = rcu_dereference(cpu_emergency_virt_callback); - if (callback) - callback(); - rcu_read_unlock(); -} - /* This is the CPU performing the emergency shutdown work. */ int crashing_cpu = -1;
@@ -836,9 +819,15 @@ int crashing_cpu = -1; */ void cpu_emergency_disable_virtualization(void) { - cpu_crash_vmclear_loaded_vmcss(); + cpu_emergency_virt_cb *callback; + + rcu_read_lock(); + callback = rcu_dereference(cpu_emergency_virt_callback); + if (callback) + callback(); + rcu_read_unlock();
- cpu_emergency_vmxoff(); + /* KVM_AMD doesn't yet utilize the common callback. */ cpu_emergency_svm_disable(); }
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index aef2f09718b57..ef9cb8445dc48 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -705,7 +705,7 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx, return ret; }
-static void crash_vmclear_local_loaded_vmcss(void) +static void vmx_emergency_disable(void) { int cpu = raw_smp_processor_id(); struct loaded_vmcs *v; @@ -713,6 +713,8 @@ static void crash_vmclear_local_loaded_vmcss(void) list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu), loaded_vmcss_on_cpu_link) vmcs_clear(v->vmcs); + + __cpu_emergency_vmxoff(); }
static void __loaded_vmcs_clear(void *arg) @@ -8554,7 +8556,7 @@ static void __vmx_exit(void) { allow_smaller_maxphyaddr = false;
- cpu_emergency_unregister_virt_callback(crash_vmclear_local_loaded_vmcss); + cpu_emergency_unregister_virt_callback(vmx_emergency_disable);
vmx_cleanup_l1d_flush(); } @@ -8628,7 +8630,7 @@ static int __init vmx_init(void) pi_init_cpu(cpu); }
- cpu_emergency_register_virt_callback(crash_vmclear_local_loaded_vmcss); + cpu_emergency_register_virt_callback(vmx_emergency_disable);
vmx_check_vmcs12_offsets();
From: Chao Gao chao.gao@intel.com
[ Upstream commit a0ee1d5faff135e28810f29e0f06328c66f89852 ]
Ensure the shadow VMCS cache is evicted during an emergency reboot to prevent potential memory corruption if the cache is evicted after reboot.
This issue was identified through code inspection, as __loaded_vmcs_clear() flushes both the normal VMCS and the shadow VMCS.
Avoid checking the "launched" state during an emergency reboot, unlike the behavior in __loaded_vmcs_clear(). This is important because reboot NMIs can interfere with operations like copy_shadow_to_vmcs12(), where shadow VMCSes are loaded directly using VMPTRLD. In such cases, if NMIs occur right after the VMCS load, the shadow VMCSes will be active but the "launched" state may not be set.
Fixes: 16f5b9034b69 ("KVM: nVMX: Copy processor-specific shadow-vmcs to VMCS12") Cc: stable@vger.kernel.org Signed-off-by: Chao Gao chao.gao@intel.com Reviewed-by: Kai Huang kai.huang@intel.com Link: https://lore.kernel.org/r/20250324140849.2099723-1-chao.gao@intel.com Signed-off-by: Sean Christopherson seanjc@google.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/kvm/vmx/vmx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index ef9cb8445dc48..12f9b220b6bd1 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -711,8 +711,11 @@ static void vmx_emergency_disable(void) struct loaded_vmcs *v;
list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu), - loaded_vmcss_on_cpu_link) + loaded_vmcss_on_cpu_link) { vmcs_clear(v->vmcs); + if (v->shadow_vmcs) + vmcs_clear(v->shadow_vmcs); + }
__cpu_emergency_vmxoff(); }
linux-stable-mirror@lists.linaro.org