Re: [PATCH v2 13/25] KVM: VMX: Handle VMX nested exception for FRED

30 Apr 2024

On Thu, Feb 08, 2024 at 01:26:33AM +0800, Xin Li wrote:
...
Set VMX nested exception bit in the VM-entry interruption information
VMCS field when injecting a nested exception using FRED event delivery
to ensure:

The nested exception is injected on a correct stack level.
The nested bit defined in FRED stack frame is set.

The event stack level used by FRED event delivery depends on whether the
event was a nested exception encountered during delivery of another event,
because a nested exception is "regarded" as happening on ring 0.  E.g.,
when #PF is configured to use stack level 1 in IA32_FRED_STKLVLS MSR:

nested #PF will be delivered on stack level 1 when encountered in
ring 3.
normal #PF will be delivered on stack level 0 when encountered in
ring 3.

The VMX nested-exception support ensures the correct event stack level is
chosen when a VM entry injects a nested exception.
Signed-off-by: Xin Li xin3.li@intel.com
Tested-by: Shan Kang shan.kang@intel.com

Changes since v1:

Set the nested flag when there is an original interrupt (Chao Gao).


arch/x86/include/asm/kvm_host.h |  6 +++--
arch/x86/include/asm/vmx.h      |  5 ++--
arch/x86/kvm/svm/svm.c          |  4 +--
arch/x86/kvm/vmx/vmx.c          |  8 ++++--
arch/x86/kvm/x86.c              | 46 ++++++++++++++++++++++++++-------
arch/x86/kvm/x86.h              |  1 +
6 files changed, 53 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0d88873eba63..ef278ee0b6ca 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -736,6 +736,7 @@ struct kvm_queued_exception {
   u32 error_code;
   unsigned long payload;
   bool has_payload;

bool nested;

"nested" may be lost after migration.
...
};
struct kvm_vcpu_arch {
@@ -2060,8 +2061,9 @@ int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu);
void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long payload);
-void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr);
-void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
+void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr, bool nested);
+void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr,

	     u32 error_code, bool nested);



void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
   			    struct x86_exception *fault);
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 6b796c5c9c2b..68af74e48788 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -134,7 +134,7 @@
#define VMX_BASIC_DUAL_MONITOR_TREATMENT	BIT_ULL(49)
#define VMX_BASIC_INOUT				BIT_ULL(54)
#define VMX_BASIC_TRUE_CTLS			BIT_ULL(55)



+#define VMX_BASIC_NESTED_EXCEPTION		BIT_ULL(58)
this definition is not used in this patch.
...
/* VMX_MISC bits and bitmasks */
#define VMX_MISC_INTEL_PT			BIT_ULL(14)
@@ -407,8 +407,9 @@ enum vmcs_field {
#define INTR_INFO_INTR_TYPE_MASK        0x700           /* 10:8 */
#define INTR_INFO_DELIVER_CODE_MASK     0x800           /* 11 */
#define INTR_INFO_UNBLOCK_NMI		0x1000		/* 12 */
+#define INTR_INFO_NESTED_EXCEPTION_MASK	0x2000		/* 13 */
#define INTR_INFO_VALID_MASK            0x80000000      /* 31 */
-#define INTR_INFO_RESVD_BITS_MASK       0x7ffff000
+#define INTR_INFO_RESVD_BITS_MASK       0x7fffd000
#define VECTORING_INFO_VECTOR_MASK           	INTR_INFO_VECTOR_MASK
#define VECTORING_INFO_TYPE_MASK        	INTR_INFO_INTR_TYPE_MASK
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e90b429c84f1..c220b690a37c 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4057,10 +4057,10 @@ static void svm_complete_interrupts(struct kvm_vcpu *vcpu)
if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
	u32 err = svm->vmcb->control.exit_int_info_err;


	kvm_requeue_exception_e(vcpu, vector, err);




	kvm_requeue_exception_e(vcpu, vector, err, false);


} else



	kvm_requeue_exception(vcpu, vector);




	kvm_requeue_exception(vcpu, vector, false);

break;
case SVM_EXITINTINFO_TYPE_INTR:
kvm_queue_interrupt(vcpu, vector, false);

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index f622fb90a098..1f265d526daf 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1891,6 +1891,8 @@ static void vmx_inject_exception(struct kvm_vcpu *vcpu)
   			event_data = to_vmx(vcpu)->fred_xfd_event_data;
	vmcs_write64(INJECTED_EVENT_DATA, event_data);



	intr_info |= ex->nested ? INTR_INFO_NESTED_EXCEPTION_MASK : 0;

}
}

@@ -7281,9 +7283,11 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, bool vectoring)
   	}
if (event_id & INTR_INFO_DELIVER_CODE_MASK)


	kvm_requeue_exception_e(vcpu, vector, vmcs_read32(error_code_field));




	kvm_requeue_exception_e(vcpu, vector, vmcs_read32(error_code_field),


				event_id & INTR_INFO_NESTED_EXCEPTION_MASK);

else


	kvm_requeue_exception(vcpu, vector);




	kvm_requeue_exception(vcpu, vector,


			      event_id & INTR_INFO_NESTED_EXCEPTION_MASK);

break;
case INTR_TYPE_SOFT_INTR:
vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 00c0062726ae..725819262085 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -645,7 +645,8 @@ static void kvm_leave_nested(struct kvm_vcpu *vcpu)
static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
   	unsigned nr, bool has_error, u32 error_code,

       bool has_payload, unsigned long payload, bool reinject)




       bool has_payload, unsigned long payload,


bool reinject, bool nested)



{
   u32 prev_nr;
   int class1, class2;
@@ -696,6 +697,13 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
   		vcpu->arch.exception.pending = true;
   		vcpu->arch.exception.injected = false;
   	}


vcpu->arch.exception.nested = vcpu->arch.exception.nested ||


			      (kvm_is_fred_enabled(vcpu) &&


			       ((reinject && nested) ||


			        vcpu->arch.nmi_injected ||


			        vcpu->arch.interrupt.injected));



You can set the nested flag regardless of FRED because the sole place using
such information (vmx_inject_exception()) is guarded by kvm_is_fred_enabled()
already.
I would also drop the check about @reinject to make @reinject and @nested
orthogonal (i.e., avoid the artifical rule that nested interrupts should be
queued by "reinject" only)
so, how about:
    	if (vcpu->arch.nmi_injected || vcpu->arch.interrupt.injected ||
    	    nested)
    		vcpu->arch.exception.nested = true;
...

vcpu->arch.exception.has_error_code = has_error;
  vcpu->arch.exception.vector = nr;
  vcpu->arch.exception.error_code = error_code;

@@ -725,8 +733,28 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
   	vcpu->arch.exception.injected = false;
   	vcpu->arch.exception.pending = false;

/*


 * A #DF is NOT a nested event per its definition, however per


 * FRED spec 5.0 Appendix B, its delivery determines the new


 * stack level as is done for events occurring when CPL = 0.


 */


vcpu->arch.exception.nested = false;


kvm_queue_exception_e(vcpu, DF_VECTOR, 0);
 } else {
/*


 * FRED spec 5.0 Appendix B: delivery of a nested exception


 * determines the new stack level as is done for events


 * occurring when CPL = 0.


 *


 * IOW, FRED event delivery of an event encountered in ring 3


 * normally uses stack level 0 unconditionally.  However, if


 * the event is an exception nested on any earlier event,


 * delivery of the nested exception will consult the FRED MSR


 * IA32_FRED_STKLVLS to determine which stack level to use.


 */


vcpu->arch.exception.nested = kvm_is_fred_enabled(vcpu);



as said above, nested flag can be set regardless of FRED.

    

2025

2024

2023

2022

2021

2020

2019

2018

2017

Re: [PATCH v2 13/25] KVM: VMX: Handle VMX nested exception for FRED