From: Fred Griffoul fgriffo@amazon.co.uk
Replace the eVMCS kvm_host_map with a gfn_to_pfn_cache to properly handle memslot changes and unify with other pfncaches in nVMX.
The change introduces proper locking/unlocking semantics for eVMCS access through nested_lock_evmcs() and nested_unlock_evmcs() helpers.
Signed-off-by: Fred Griffoul fgriffo@amazon.co.uk --- arch/x86/kvm/vmx/hyperv.h | 21 ++++---- arch/x86/kvm/vmx/nested.c | 109 ++++++++++++++++++++++++++------------ arch/x86/kvm/vmx/vmx.h | 3 +- 3 files changed, 88 insertions(+), 45 deletions(-)
diff --git a/arch/x86/kvm/vmx/hyperv.h b/arch/x86/kvm/vmx/hyperv.h index 3c7fea501ca5..3b6fcf8dff64 100644 --- a/arch/x86/kvm/vmx/hyperv.h +++ b/arch/x86/kvm/vmx/hyperv.h @@ -37,11 +37,6 @@ static inline bool nested_vmx_is_evmptr12_set(struct vcpu_vmx *vmx) return evmptr_is_set(vmx->nested.hv_evmcs_vmptr); }
-static inline struct hv_enlightened_vmcs *nested_vmx_evmcs(struct vcpu_vmx *vmx) -{ - return vmx->nested.hv_evmcs; -} - static inline bool guest_cpu_cap_has_evmcs(struct kvm_vcpu *vcpu) { /* @@ -70,6 +65,8 @@ void nested_evmcs_filter_control_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 * int nested_evmcs_check_controls(struct vmcs12 *vmcs12); bool nested_evmcs_l2_tlb_flush_enabled(struct kvm_vcpu *vcpu); void vmx_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu); +struct hv_enlightened_vmcs *nested_lock_evmcs(struct vcpu_vmx *vmx); +void nested_unlock_evmcs(struct vcpu_vmx *vmx); #else static inline bool evmptr_is_valid(u64 evmptr) { @@ -91,11 +88,6 @@ static inline bool nested_vmx_is_evmptr12_set(struct vcpu_vmx *vmx) return false; }
-static inline struct hv_enlightened_vmcs *nested_vmx_evmcs(struct vcpu_vmx *vmx) -{ - return NULL; -} - static inline u32 nested_evmcs_clean_fields(struct vcpu_vmx *vmx) { return 0; @@ -105,6 +97,15 @@ static inline bool nested_evmcs_msr_bitmap(struct vcpu_vmx *vmx) { return false; } + +static inline struct hv_enlightened_vmcs *nested_lock_evmcs(struct vcpu_vmx *vmx) +{ + return NULL; +} + +static inline void nested_unlock_evmcs(struct vcpu_vmx *vmx) +{ +} #endif
#endif /* __KVM_X86_VMX_HYPERV_H */ diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index aec150612818..d910508e3c22 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -232,8 +232,6 @@ static inline void nested_release_evmcs(struct kvm_vcpu *vcpu) struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu);
- kvm_vcpu_unmap(vcpu, &vmx->nested.hv_evmcs_map); - vmx->nested.hv_evmcs = NULL; vmx->nested.hv_evmcs_vmptr = EVMPTR_INVALID; vmx->nested.hv_clean_fields = 0; vmx->nested.hv_msr_bitmap = false; @@ -265,7 +263,7 @@ static bool nested_evmcs_handle_vmclear(struct kvm_vcpu *vcpu, gpa_t vmptr) !evmptr_is_valid(nested_get_evmptr(vcpu))) return false;
- if (nested_vmx_evmcs(vmx) && vmptr == vmx->nested.hv_evmcs_vmptr) + if (vmptr == vmx->nested.hv_evmcs_vmptr) nested_release_evmcs(vcpu);
return true; @@ -393,6 +391,9 @@ static void free_nested(struct kvm_vcpu *vcpu) kvm_gpc_deactivate(&vmx->nested.virtual_apic_cache); kvm_gpc_deactivate(&vmx->nested.apic_access_page_cache); kvm_gpc_deactivate(&vmx->nested.msr_bitmap_cache); +#ifdef CONFIG_KVM_HYPERV + kvm_gpc_deactivate(&vmx->nested.hv_evmcs_cache); +#endif
free_vpid(vmx->nested.vpid02); vmx->nested.posted_intr_nv = -1; @@ -1735,11 +1736,12 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) vmcs_load(vmx->loaded_vmcs->vmcs); }
-static void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx, u32 hv_clean_fields) +static void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx, + struct hv_enlightened_vmcs *evmcs, + u32 hv_clean_fields) { #ifdef CONFIG_KVM_HYPERV struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12; - struct hv_enlightened_vmcs *evmcs = nested_vmx_evmcs(vmx); struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(&vmx->vcpu);
/* HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE */ @@ -1987,7 +1989,7 @@ static void copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx) { #ifdef CONFIG_KVM_HYPERV struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12; - struct hv_enlightened_vmcs *evmcs = nested_vmx_evmcs(vmx); + struct hv_enlightened_vmcs *evmcs = nested_lock_evmcs(vmx);
/* * Should not be changed by KVM: @@ -2155,6 +2157,7 @@ static void copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx)
evmcs->guest_bndcfgs = vmcs12->guest_bndcfgs;
+ nested_unlock_evmcs(vmx); return; #else /* CONFIG_KVM_HYPERV */ KVM_BUG_ON(1, vmx->vcpu.kvm); @@ -2171,6 +2174,8 @@ static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld( #ifdef CONFIG_KVM_HYPERV struct vcpu_vmx *vmx = to_vmx(vcpu); struct hv_enlightened_vmcs *evmcs; + struct gfn_to_pfn_cache *gpc; + enum nested_evmptrld_status status = EVMPTRLD_SUCCEEDED; bool evmcs_gpa_changed = false; u64 evmcs_gpa;
@@ -2183,17 +2188,19 @@ static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld( return EVMPTRLD_DISABLED; }
+ gpc = &vmx->nested.hv_evmcs_cache; + if (nested_gpc_lock(gpc, evmcs_gpa)) { + nested_release_evmcs(vcpu); + return EVMPTRLD_ERROR; + } + + evmcs = gpc->khva; + if (unlikely(evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) { vmx->nested.current_vmptr = INVALID_GPA;
nested_release_evmcs(vcpu);
- if (kvm_vcpu_map(vcpu, gpa_to_gfn(evmcs_gpa), - &vmx->nested.hv_evmcs_map)) - return EVMPTRLD_ERROR; - - vmx->nested.hv_evmcs = vmx->nested.hv_evmcs_map.hva; - /* * Currently, KVM only supports eVMCS version 1 * (== KVM_EVMCS_VERSION) and thus we expect guest to set this @@ -2216,10 +2223,11 @@ static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld( * eVMCS version or VMCS12 revision_id as valid values for first * u32 field of eVMCS. */ - if ((vmx->nested.hv_evmcs->revision_id != KVM_EVMCS_VERSION) && - (vmx->nested.hv_evmcs->revision_id != VMCS12_REVISION)) { + if ((evmcs->revision_id != KVM_EVMCS_VERSION) && + (evmcs->revision_id != VMCS12_REVISION)) { nested_release_evmcs(vcpu); - return EVMPTRLD_VMFAIL; + status = EVMPTRLD_VMFAIL; + goto unlock; }
vmx->nested.hv_evmcs_vmptr = evmcs_gpa; @@ -2244,14 +2252,11 @@ static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld( * between different L2 guests as KVM keeps a single VMCS12 per L1. */ if (from_launch || evmcs_gpa_changed) { - vmx->nested.hv_evmcs->hv_clean_fields &= - ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; - + evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; vmx->nested.force_msr_bitmap_recalc = true; }
/* Cache evmcs fields to avoid reading evmcs after copy to vmcs12 */ - evmcs = vmx->nested.hv_evmcs; vmx->nested.hv_clean_fields = evmcs->hv_clean_fields; vmx->nested.hv_flush_hypercall = evmcs->hv_enlightenments_control.nested_flush_hypercall; vmx->nested.hv_msr_bitmap = evmcs->hv_enlightenments_control.msr_bitmap; @@ -2260,13 +2265,15 @@ static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld( struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
if (likely(!vmcs12->hdr.shadow_vmcs)) { - copy_enlightened_to_vmcs12(vmx, vmx->nested.hv_clean_fields); + copy_enlightened_to_vmcs12(vmx, evmcs, vmx->nested.hv_clean_fields); /* Enlightened VMCS doesn't have launch state */ vmcs12->launch_state = !from_launch; } }
- return EVMPTRLD_SUCCEEDED; +unlock: + nested_gpc_unlock(gpc); + return status; #else return EVMPTRLD_DISABLED; #endif @@ -2771,7 +2778,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, enum vm_entry_failure_code *entry_failure_code) { struct vcpu_vmx *vmx = to_vmx(vcpu); - struct hv_enlightened_vmcs *evmcs; bool load_guest_pdptrs_vmcs12 = false;
if (vmx->nested.dirty_vmcs12 || nested_vmx_is_evmptr12_valid(vmx)) { @@ -2909,9 +2915,13 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, * bits when it changes a field in eVMCS. Mark all fields as clean * here. */ - evmcs = nested_vmx_evmcs(vmx); - if (evmcs) + if (nested_vmx_is_evmptr12_valid(vmx)) { + struct hv_enlightened_vmcs *evmcs; + + evmcs = nested_lock_evmcs(vmx); evmcs->hv_clean_fields |= HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; + nested_unlock_evmcs(vmx); + }
return 0; } @@ -4147,6 +4157,18 @@ static void *nested_gpc_lock_if_active(struct gfn_to_pfn_cache *gpc) return gpc->khva; }
+#ifdef CONFIG_KVM_HYPERV +struct hv_enlightened_vmcs *nested_lock_evmcs(struct vcpu_vmx *vmx) +{ + return nested_gpc_lock_if_active(&vmx->nested.hv_evmcs_cache); +} + +void nested_unlock_evmcs(struct vcpu_vmx *vmx) +{ + nested_gpc_unlock(&vmx->nested.hv_evmcs_cache); +} +#endif + static struct pi_desc *nested_lock_pi_desc(struct vcpu_vmx *vmx) { u8 *pi_desc_page; @@ -5636,6 +5658,9 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) kvm_gpc_init_for_vcpu(&vmx->nested.virtual_apic_cache, vcpu); kvm_gpc_init_for_vcpu(&vmx->nested.pi_desc_cache, vcpu);
+#ifdef CONFIG_KVM_HYPERV + kvm_gpc_init(&vmx->nested.hv_evmcs_cache, vcpu->kvm); +#endif vmx->nested.vmcs02_initialized = false; vmx->nested.vmxon = true;
@@ -5887,6 +5912,8 @@ static int handle_vmread(struct kvm_vcpu *vcpu) /* Read the field, zero-extended to a u64 value */ value = vmcs12_read_any(vmcs12, field, offset); } else { + struct hv_enlightened_vmcs *evmcs; + /* * Hyper-V TLFS (as of 6.0b) explicitly states, that while an * enlightened VMCS is active VMREAD/VMWRITE instructions are @@ -5905,7 +5932,9 @@ static int handle_vmread(struct kvm_vcpu *vcpu) return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
/* Read the field, zero-extended to a u64 value */ - value = evmcs_read_any(nested_vmx_evmcs(vmx), field, offset); + evmcs = nested_lock_evmcs(vmx); + value = evmcs_read_any(evmcs, field, offset); + nested_unlock_evmcs(vmx); }
/* @@ -6935,6 +6964,27 @@ bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu) return true; }
+static void vmx_get_enlightened_to_vmcs12(struct vcpu_vmx *vmx) +{ +#ifdef CONFIG_KVM_HYPERV + struct hv_enlightened_vmcs *evmcs; + struct kvm_vcpu *vcpu = &vmx->vcpu; + + kvm_vcpu_srcu_read_lock(vcpu); + evmcs = nested_lock_evmcs(vmx); + /* + * L1 hypervisor is not obliged to keep eVMCS + * clean fields data always up-to-date while + * not in guest mode, 'hv_clean_fields' is only + * supposed to be actual upon vmentry so we need + * to ignore it here and do full copy. + */ + copy_enlightened_to_vmcs12(vmx, evmcs, 0); + nested_unlock_evmcs(vmx); + kvm_vcpu_srcu_read_unlock(vcpu); +#endif /* CONFIG_KVM_HYPERV */ +} + static int vmx_get_nested_state(struct kvm_vcpu *vcpu, struct kvm_nested_state __user *user_kvm_nested_state, u32 user_data_size) @@ -7025,14 +7075,7 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu)); if (!vmx->nested.need_vmcs12_to_shadow_sync) { if (nested_vmx_is_evmptr12_valid(vmx)) - /* - * L1 hypervisor is not obliged to keep eVMCS - * clean fields data always up-to-date while - * not in guest mode, 'hv_clean_fields' is only - * supposed to be actual upon vmentry so we need - * to ignore it here and do full copy. - */ - copy_enlightened_to_vmcs12(vmx, 0); + vmx_get_enlightened_to_vmcs12(vmx); else if (enable_shadow_vmcs) copy_shadow_to_vmcs12(vmx); } diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 87708af502f3..4da5a42b0c60 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -208,8 +208,7 @@ struct nested_vmx { u32 hv_clean_fields; bool hv_msr_bitmap; bool hv_flush_hypercall; - struct hv_enlightened_vmcs *hv_evmcs; - struct kvm_host_map hv_evmcs_map; + struct gfn_to_pfn_cache hv_evmcs_cache; #endif };