Normally guests will set up CR3 themselves, but some guests, such as kselftests, and potentially CONFIG_PVH guests, rely on being booted with paging enabled and CR3 initialized to a pre-allocated page table.
Currently CR3 updates via KVM_SET_SREGS* are not loaded into the guest VMCB until just prior to entering the guest. For SEV-ES/SEV-SNP, this is too late, since it will have switched over to using the VMSA page prior to that point, with the VMSA CR3 copied from the VMCB initial CR3 value: 0.
Address this by sync'ing the CR3 value into the VMCB save area immediately when KVM_SET_SREGS* is issued so it will find it's way into the initial VMSA.
Suggested-by: Tom Lendacky thomas.lendacky@amd.com Signed-off-by: Michael Roth michael.roth@amd.com --- arch/x86/include/asm/kvm-x86-ops.h | 1 + arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm/svm.c | 22 ++++++++++++++++++++++ arch/x86/kvm/vmx/vmx.c | 8 ++++++++ arch/x86/kvm/x86.c | 3 +-- 5 files changed, 33 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 3d25a86840db..653659e20614 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -35,6 +35,7 @@ KVM_X86_OP(get_cpl) KVM_X86_OP(set_segment) KVM_X86_OP_NULL(get_cs_db_l_bits) KVM_X86_OP(set_cr0) +KVM_X86_OP(set_cr3) KVM_X86_OP(is_valid_cr4) KVM_X86_OP(set_cr4) KVM_X86_OP(set_efer) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 09ec1ff5bd83..232e997acae6 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1324,6 +1324,7 @@ struct kvm_x86_ops { struct kvm_segment *var, int seg); void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); + void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr0); void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 7e19f5f6d0d8..2c3bc7a667c8 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1716,6 +1716,27 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) vmcb_mark_dirty(svm->vmcb, VMCB_DT); }
+static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + vcpu->arch.cr3 = cr3; + kvm_register_mark_available(vcpu, VCPU_EXREG_CR3); + + /* + * For guests that don't set guest_state_protected, the cr3 update is + * handled via kvm_mmu_load() while entering the guest. For guests + * that do (SEV-ES/SEV-SNP), the cr3 update needs to be written to + * VMCB save area now, since the save area will become the initial + * contents of the VMSA, and future VMCB save area updates won't be + * seen. + */ + if (sev_es_guest(vcpu->kvm)) { + svm->vmcb->save.cr3 = cr3; + vmcb_mark_dirty(svm->vmcb, VMCB_CR); + } +} + void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { struct vcpu_svm *svm = to_svm(vcpu); @@ -4564,6 +4585,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .get_cpl = svm_get_cpl, .get_cs_db_l_bits = kvm_get_cs_db_l_bits, .set_cr0 = svm_set_cr0, + .set_cr3 = svm_set_cr3, .is_valid_cr4 = svm_is_valid_cr4, .set_cr4 = svm_set_cr4, .set_efer = svm_set_efer, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index fada1055f325..4f233d0b05bf 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3130,6 +3130,13 @@ static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, vmcs_writel(GUEST_CR3, guest_cr3); }
+ +void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) +{ + vcpu->arch.cr3 = cr3; + kvm_register_mark_available(vcpu, VCPU_EXREG_CR3); +} + static bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { /* @@ -7578,6 +7585,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .get_cpl = vmx_get_cpl, .get_cs_db_l_bits = vmx_get_cs_db_l_bits, .set_cr0 = vmx_set_cr0, + .set_cr3 = vmx_set_cr3, .is_valid_cr4 = vmx_is_valid_cr4, .set_cr4 = vmx_set_cr4, .set_efer = vmx_set_efer, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c1a2dd0024b2..d724fa185bef 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10400,8 +10400,7 @@ static int __set_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs,
vcpu->arch.cr2 = sregs->cr2; *mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3; - vcpu->arch.cr3 = sregs->cr3; - kvm_register_mark_available(vcpu, VCPU_EXREG_CR3); + static_call(kvm_x86_set_cr3)(vcpu, sregs->cr3);
kvm_set_cr8(vcpu, sregs->cr8);