FILL_RETURN_BUFFER can access percpu data, therefore vmload of the host save area must be executed first. First of all, move the VMCB vmsave/vmload to assembly.
The idea on how to number the exception tables is stolen from a prototype patch by Peter Zijlstra.
Cc: stable@vger.kernel.org Fixes: f14eec0a3203 ("KVM: SVM: move more vmentry code to assembly") Link: https://lore.kernel.org/all/f571e404-e625-bae1-10e9-449b2eb4cbd8@citrix.com/ Signed-off-by: Paolo Bonzini pbonzini@redhat.com --- arch/x86/kernel/asm-offsets.c | 2 ++ arch/x86/kvm/svm/svm.c | 9 ------- arch/x86/kvm/svm/vmenter.S | 50 +++++++++++++++++++++++++++-------- 3 files changed, 41 insertions(+), 20 deletions(-)
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 85de7e4fe59a..f01293a1e594 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -113,6 +113,8 @@ static void __used common(void) if (IS_ENABLED(CONFIG_KVM_AMD)) { BLANK(); OFFSET(SVM_vcpu_arch_regs, vcpu_svm, vcpu.arch.regs); + OFFSET(SVM_vmcb01, vcpu_svm, vmcb01); + OFFSET(KVM_VMCB_pa, kvm_vmcb_info, pa); }
if (IS_ENABLED(CONFIG_KVM_INTEL)) { diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 4cfa62e66a0e..ae65cdcab660 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3924,16 +3924,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu) } else { struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
- /* - * Use a single vmcb (vmcb01 because it's always valid) for - * context switching guest state via VMLOAD/VMSAVE, that way - * the state doesn't need to be copied between vmcb01 and - * vmcb02 when switching vmcbs for nested virtualization. - */ - vmload(svm->vmcb01.pa); __svm_vcpu_run(vmcb_pa, svm); - vmsave(svm->vmcb01.pa); - vmload(__sme_page_pa(sd->save_area)); }
diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index dc558d0a589e..4709bc8868d7 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/linkage.h> #include <asm/asm.h> +#include <asm/asm-offsets.h> #include <asm/bitsperlong.h> #include <asm/kvm_vcpu_regs.h> #include <asm/nospec-branch.h> @@ -27,6 +28,8 @@ #define VCPU_R15 (SVM_vcpu_arch_regs + __VCPU_REGS_R15 * WORD_SIZE) #endif
+#define SVM_vmcb01_pa (SVM_vmcb01 + KVM_VMCB_pa) + .section .noinstr.text, "ax"
/** @@ -56,6 +59,16 @@ SYM_FUNC_START(__svm_vcpu_run) /* Move @svm to RDI. */ mov %_ASM_ARG2, %_ASM_DI
+ /* + * Use a single vmcb (vmcb01 because it's always valid) for + * context switching guest state via VMLOAD/VMSAVE, that way + * the state doesn't need to be copied between vmcb01 and + * vmcb02 when switching vmcbs for nested virtualization. + */ + mov SVM_vmcb01_pa(%_ASM_DI), %_ASM_AX +1: vmload %_ASM_AX +2: + /* "POP" @vmcb to RAX. */ pop %_ASM_AX
@@ -80,16 +93,11 @@ SYM_FUNC_START(__svm_vcpu_run) /* Enter guest mode */ sti
-1: vmrun %_ASM_AX - -2: cli - -#ifdef CONFIG_RETPOLINE - /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ - FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE -#endif +3: vmrun %_ASM_AX +4: + cli
- /* "POP" @svm to RAX. */ + /* Pop @svm to RAX while it's the only available register. */ pop %_ASM_AX
/* Save all guest registers. */ @@ -110,6 +118,18 @@ SYM_FUNC_START(__svm_vcpu_run) mov %r15, VCPU_R15(%_ASM_AX) #endif
+ /* @svm can stay in RDI from now on. */ + mov %_ASM_AX, %_ASM_DI + + mov SVM_vmcb01_pa(%_ASM_DI), %_ASM_AX +5: vmsave %_ASM_AX +6: + +#ifdef CONFIG_RETPOLINE + /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ + FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE +#endif + /* * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be * untrained as soon as we exit the VM and are back to the @@ -159,11 +179,19 @@ SYM_FUNC_START(__svm_vcpu_run) pop %_ASM_BP RET
-3: cmpb $0, kvm_rebooting +10: cmpb $0, kvm_rebooting jne 2b ud2 +30: cmpb $0, kvm_rebooting + jne 4b + ud2 +50: cmpb $0, kvm_rebooting + jne 6b + ud2
- _ASM_EXTABLE(1b, 3b) + _ASM_EXTABLE(1b, 10b) + _ASM_EXTABLE(3b, 30b) + _ASM_EXTABLE(5b, 50b)
SYM_FUNC_END(__svm_vcpu_run)
On Mon, Nov 07, 2022 at 09:54:32AM -0500, Paolo Bonzini wrote:
@@ -56,6 +59,16 @@ SYM_FUNC_START(__svm_vcpu_run) /* Move @svm to RDI. */ mov %_ASM_ARG2, %_ASM_DI
- /*
* Use a single vmcb (vmcb01 because it's always valid) for
* context switching guest state via VMLOAD/VMSAVE, that way
* the state doesn't need to be copied between vmcb01 and
* vmcb02 when switching vmcbs for nested virtualization.
*/
- mov SVM_vmcb01_pa(%_ASM_DI), %_ASM_AX
+1: vmload %_ASM_AX +2:
- /* "POP" @vmcb to RAX. */ pop %_ASM_AX
@@ -80,16 +93,11 @@ SYM_FUNC_START(__svm_vcpu_run) /* Enter guest mode */ sti +3: vmrun %_ASM_AX +4:
- cli
- /* Pop @svm to RAX while it's the only available register. */ pop %_ASM_AX
/* Save all guest registers. */
So Andrew noted that once the vmload has executed any exception taken (say at 3) will crash and burn because %gs is scribbled.
Might be good to make a record of this in the code so it can be cleaned up some day.
@@ -159,11 +179,19 @@ SYM_FUNC_START(__svm_vcpu_run) pop %_ASM_BP RET +10: cmpb $0, kvm_rebooting jne 2b ud2 +30: cmpb $0, kvm_rebooting
- jne 4b
- ud2
+50: cmpb $0, kvm_rebooting
- jne 6b
- ud2
- _ASM_EXTABLE(1b, 10b)
- _ASM_EXTABLE(3b, 30b)
- _ASM_EXTABLE(5b, 50b)
On 11/7/22 16:23, Peter Zijlstra wrote:
+3: vmrun %_ASM_AX +4:
- cli
- /* Pop @svm to RAX while it's the only available register. */ pop %_ASM_AX
/* Save all guest registers. */
So Andrew noted that once the vmload has executed any exception taken (say at 3) will crash and burn because %gs is scribbled.
Might be good to make a record of this in the code so it can be cleaned up some day.
Yeah, it won't happen because clgi/stgi blocks setting kvm_rebooting so I thought of killing the three exception fixups after the first. In the end I kept them for simplicity and to keep the normal/SEV-ES versions as similar as possible.
Paolo
On 07/11/2022 14:54, Paolo Bonzini wrote:
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 4cfa62e66a0e..ae65cdcab660 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3924,16 +3924,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu) } else { struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
/*
* Use a single vmcb (vmcb01 because it's always valid) for
* context switching guest state via VMLOAD/VMSAVE, that way
* the state doesn't need to be copied between vmcb01 and
* vmcb02 when switching vmcbs for nested virtualization.
*/
__svm_vcpu_run(vmcb_pa, svm);vmload(svm->vmcb01.pa);
vmsave(svm->vmcb01.pa);
- vmload(__sme_page_pa(sd->save_area));
%gs is still the guests until this vmload has completed. It needs to move down into asm too.
~Andrew
On 11/7/22 16:32, Andrew Cooper wrote:
__svm_vcpu_run(vmcb_pa, svm);vmload(svm->vmcb01.pa);
vmsave(svm->vmcb01.pa);
- vmload(__sme_page_pa(sd->save_area));
%gs is still the guests until this vmload has completed. It needs to move down into asm too.
Sure, that's patch 6 in the series. See also cover letter: "this means moving guest vmload/vmsave and host vmload to assembly".
Paolo
On 07/11/2022 15:37, Paolo Bonzini wrote:
On 11/7/22 16:32, Andrew Cooper wrote:
- vmload(svm->vmcb01.pa); __svm_vcpu_run(vmcb_pa, svm); - vmsave(svm->vmcb01.pa);
vmload(__sme_page_pa(sd->save_area));
%gs is still the guests until this vmload has completed. It needs to move down into asm too.
Sure, that's patch 6 in the series. See also cover letter: "this means moving guest vmload/vmsave and host vmload to assembly".
Oh, ok. I missed that it was split across two patches.
Sorry for the noise. The end result looks ok.
~Andrew
linux-stable-mirror@lists.linaro.org