On Thu, Feb 08, 2024 at 01:26:33AM +0800, Xin Li wrote:
Set VMX nested exception bit in the VM-entry interruption information VMCS field when injecting a nested exception using FRED event delivery to ensure:
- The nested exception is injected on a correct stack level.
- The nested bit defined in FRED stack frame is set.
The event stack level used by FRED event delivery depends on whether the event was a nested exception encountered during delivery of another event, because a nested exception is "regarded" as happening on ring 0. E.g., when #PF is configured to use stack level 1 in IA32_FRED_STKLVLS MSR:
- nested #PF will be delivered on stack level 1 when encountered in ring 3.
- normal #PF will be delivered on stack level 0 when encountered in ring 3.
The VMX nested-exception support ensures the correct event stack level is chosen when a VM entry injects a nested exception.
Signed-off-by: Xin Li xin3.li@intel.com Tested-by: Shan Kang shan.kang@intel.com
Changes since v1:
- Set the nested flag when there is an original interrupt (Chao Gao).
arch/x86/include/asm/kvm_host.h | 6 +++-- arch/x86/include/asm/vmx.h | 5 ++-- arch/x86/kvm/svm/svm.c | 4 +-- arch/x86/kvm/vmx/vmx.c | 8 ++++-- arch/x86/kvm/x86.c | 46 ++++++++++++++++++++++++++------- arch/x86/kvm/x86.h | 1 + 6 files changed, 53 insertions(+), 17 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0d88873eba63..ef278ee0b6ca 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -736,6 +736,7 @@ struct kvm_queued_exception { u32 error_code; unsigned long payload; bool has_payload;
- bool nested;
"nested" may be lost after migration.
};
struct kvm_vcpu_arch { @@ -2060,8 +2061,9 @@ int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu); void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long payload); -void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr); -void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); +void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr, bool nested); +void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr,
u32 error_code, bool nested);
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 6b796c5c9c2b..68af74e48788 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -134,7 +134,7 @@ #define VMX_BASIC_DUAL_MONITOR_TREATMENT BIT_ULL(49) #define VMX_BASIC_INOUT BIT_ULL(54) #define VMX_BASIC_TRUE_CTLS BIT_ULL(55)
+#define VMX_BASIC_NESTED_EXCEPTION BIT_ULL(58)
this definition is not used in this patch.
/* VMX_MISC bits and bitmasks */ #define VMX_MISC_INTEL_PT BIT_ULL(14) @@ -407,8 +407,9 @@ enum vmcs_field { #define INTR_INFO_INTR_TYPE_MASK 0x700 /* 10:8 */ #define INTR_INFO_DELIVER_CODE_MASK 0x800 /* 11 */ #define INTR_INFO_UNBLOCK_NMI 0x1000 /* 12 */ +#define INTR_INFO_NESTED_EXCEPTION_MASK 0x2000 /* 13 */ #define INTR_INFO_VALID_MASK 0x80000000 /* 31 */ -#define INTR_INFO_RESVD_BITS_MASK 0x7ffff000 +#define INTR_INFO_RESVD_BITS_MASK 0x7fffd000
#define VECTORING_INFO_VECTOR_MASK INTR_INFO_VECTOR_MASK #define VECTORING_INFO_TYPE_MASK INTR_INFO_INTR_TYPE_MASK diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index e90b429c84f1..c220b690a37c 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4057,10 +4057,10 @@ static void svm_complete_interrupts(struct kvm_vcpu *vcpu)
if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) { u32 err = svm->vmcb->control.exit_int_info_err;
kvm_requeue_exception_e(vcpu, vector, err);
kvm_requeue_exception_e(vcpu, vector, err, false);
} else
kvm_requeue_exception(vcpu, vector);
break; case SVM_EXITINTINFO_TYPE_INTR: kvm_queue_interrupt(vcpu, vector, false);kvm_requeue_exception(vcpu, vector, false);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index f622fb90a098..1f265d526daf 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1891,6 +1891,8 @@ static void vmx_inject_exception(struct kvm_vcpu *vcpu) event_data = to_vmx(vcpu)->fred_xfd_event_data;
vmcs_write64(INJECTED_EVENT_DATA, event_data);
} }intr_info |= ex->nested ? INTR_INFO_NESTED_EXCEPTION_MASK : 0;
@@ -7281,9 +7283,11 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, bool vectoring) }
if (event_id & INTR_INFO_DELIVER_CODE_MASK)
kvm_requeue_exception_e(vcpu, vector, vmcs_read32(error_code_field));
kvm_requeue_exception_e(vcpu, vector, vmcs_read32(error_code_field),
elseevent_id & INTR_INFO_NESTED_EXCEPTION_MASK);
kvm_requeue_exception(vcpu, vector);
kvm_requeue_exception(vcpu, vector,
break; case INTR_TYPE_SOFT_INTR: vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);event_id & INTR_INFO_NESTED_EXCEPTION_MASK);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 00c0062726ae..725819262085 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -645,7 +645,8 @@ static void kvm_leave_nested(struct kvm_vcpu *vcpu)
static void kvm_multiple_exception(struct kvm_vcpu *vcpu, unsigned nr, bool has_error, u32 error_code,
bool has_payload, unsigned long payload, bool reinject)
bool has_payload, unsigned long payload,
bool reinject, bool nested)
{ u32 prev_nr; int class1, class2; @@ -696,6 +697,13 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, vcpu->arch.exception.pending = true; vcpu->arch.exception.injected = false; }
vcpu->arch.exception.nested = vcpu->arch.exception.nested ||
(kvm_is_fred_enabled(vcpu) &&
((reinject && nested) ||
vcpu->arch.nmi_injected ||
vcpu->arch.interrupt.injected));
You can set the nested flag regardless of FRED because the sole place using such information (vmx_inject_exception()) is guarded by kvm_is_fred_enabled() already.
I would also drop the check about @reinject to make @reinject and @nested orthogonal (i.e., avoid the artifical rule that nested interrupts should be queued by "reinject" only)
so, how about: if (vcpu->arch.nmi_injected || vcpu->arch.interrupt.injected || nested) vcpu->arch.exception.nested = true;
- vcpu->arch.exception.has_error_code = has_error; vcpu->arch.exception.vector = nr; vcpu->arch.exception.error_code = error_code;
@@ -725,8 +733,28 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, vcpu->arch.exception.injected = false; vcpu->arch.exception.pending = false;
/*
* A #DF is NOT a nested event per its definition, however per
* FRED spec 5.0 Appendix B, its delivery determines the new
* stack level as is done for events occurring when CPL = 0.
*/
vcpu->arch.exception.nested = false;
- kvm_queue_exception_e(vcpu, DF_VECTOR, 0); } else {
/*
* FRED spec 5.0 Appendix B: delivery of a nested exception
* determines the new stack level as is done for events
* occurring when CPL = 0.
*
* IOW, FRED event delivery of an event encountered in ring 3
* normally uses stack level 0 unconditionally. However, if
* the event is an exception nested on any earlier event,
* delivery of the nested exception will consult the FRED MSR
* IA32_FRED_STKLVLS to determine which stack level to use.
*/
vcpu->arch.exception.nested = kvm_is_fred_enabled(vcpu);
as said above, nested flag can be set regardless of FRED.