From: Fred Griffoul fgriffo@amazon.co.uk
Add selftest to validate nested VMX context switching between multiple L2 vCPUs running on the same L1 vCPU. The test exercises both direct VMX interface (using vmptrld/vmclear operations) and enlightened VMCS (eVMCS) interface for Hyper-V nested scenarios.
The test creates multiple VMCS structures and switches between them to verify that the nested_context kvm counters are correct, according to the number of L2 vCPUs and the number of switches.
Signed-off-by: Fred Griffoul fgriffo@amazon.co.uk --- tools/testing/selftests/kvm/Makefile.kvm | 1 + .../selftests/kvm/x86/vmx_l2_switch_test.c | 416 ++++++++++++++++++ 2 files changed, 417 insertions(+) create mode 100644 tools/testing/selftests/kvm/x86/vmx_l2_switch_test.c
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index 3431568d837e..5d47afa5789b 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -138,6 +138,7 @@ TEST_GEN_PROGS_x86 += x86/triple_fault_event_test TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test TEST_GEN_PROGS_x86 += x86/aperfmperf_test TEST_GEN_PROGS_x86 += x86/vmx_apic_update_test +TEST_GEN_PROGS_x86 += x86/vmx_l2_switch_test TEST_GEN_PROGS_x86 += access_tracking_perf_test TEST_GEN_PROGS_x86 += coalesced_io_test TEST_GEN_PROGS_x86 += dirty_log_perf_test diff --git a/tools/testing/selftests/kvm/x86/vmx_l2_switch_test.c b/tools/testing/selftests/kvm/x86/vmx_l2_switch_test.c new file mode 100644 index 000000000000..5ec0da2f8386 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/vmx_l2_switch_test.c @@ -0,0 +1,416 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Test nested VMX context switching between multiple VMCS + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#define L2_GUEST_STACK_SIZE 64 +#define L2_VCPU_MAX 16 + +struct l2_vcpu_config { + vm_vaddr_t hv_pages_gva; /* Guest VA for eVMCS */ + vm_vaddr_t vmx_pages_gva; /* Guest VA for VMX pages */ + unsigned long stack[L2_GUEST_STACK_SIZE]; + uint16_t vpid; +}; + +struct l1_test_config { + struct l2_vcpu_config l2_vcpus[L2_VCPU_MAX]; + uint64_t hypercall_gpa; + uint32_t nr_l2_vcpus; + uint32_t nr_switches; + bool enable_vpid; + bool use_evmcs; + bool sched_only; +}; + +static void l2_guest(void) +{ + while (1) + vmcall(); +} + +static void run_l2_guest_evmcs(struct hyperv_test_pages *hv_pages, + struct vmx_pages *vmx, + void *guest_rip, + void *guest_rsp, + uint16_t vpid) +{ + GUEST_ASSERT(load_evmcs(hv_pages)); + prepare_vmcs(vmx, guest_rip, guest_rsp); + current_evmcs->hv_enlightenments_control.msr_bitmap = 1; + vmwrite(VIRTUAL_PROCESSOR_ID, vpid); + + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_VMCALL); + current_evmcs->guest_rip += 3; /* vmcall */ + + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_VMCALL); +} + +static void run_l2_guest_vmx_migrate(struct vmx_pages *vmx, + void *guest_rip, + void *guest_rsp, + uint16_t vpid, + bool start) +{ + uint32_t control; + + /* + * Emulate L2 vCPU migration: vmptrld/vmlaunch/vmclear + */ + + if (start) + GUEST_ASSERT(load_vmcs(vmx)); + else + GUEST_ASSERT(!vmptrld(vmx->vmcs_gpa)); + + prepare_vmcs(vmx, guest_rip, guest_rsp); + + control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); + control |= CPU_BASED_USE_MSR_BITMAPS; + vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); + vmwrite(VIRTUAL_PROCESSOR_ID, vpid); + + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_VMCALL); + + GUEST_ASSERT(vmptrstz() == vmx->vmcs_gpa); + GUEST_ASSERT(!vmclear(vmx->vmcs_gpa)); +} + +static void run_l2_guest_vmx_sched(struct vmx_pages *vmx, + void *guest_rip, + void *guest_rsp, + uint16_t vpid, + bool start) +{ + /* + * Emulate L2 vCPU multiplexing: vmptrld/vmresume + */ + + if (start) { + uint32_t control; + + GUEST_ASSERT(load_vmcs(vmx)); + prepare_vmcs(vmx, guest_rip, guest_rsp); + + control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); + control |= CPU_BASED_USE_MSR_BITMAPS; + vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); + vmwrite(VIRTUAL_PROCESSOR_ID, vpid); + + GUEST_ASSERT(!vmlaunch()); + } else { + GUEST_ASSERT(!vmptrld(vmx->vmcs_gpa)); + GUEST_ASSERT(!vmresume()); + } + + GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_VMCALL); + + vmwrite(GUEST_RIP, + vmreadz(GUEST_RIP) + vmreadz(VM_EXIT_INSTRUCTION_LEN)); +} + +static void l1_guest_evmcs(struct l1_test_config *config) +{ + struct hyperv_test_pages *hv_pages; + struct vmx_pages *vmx_pages; + uint32_t i, j; + + /* Initialize Hyper-V MSRs */ + wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); + wrmsr(HV_X64_MSR_HYPERCALL, config->hypercall_gpa); + + /* Enable VP assist page */ + hv_pages = (struct hyperv_test_pages *)config->l2_vcpus[0].hv_pages_gva; + enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist); + + /* Enable evmcs */ + evmcs_enable(); + + vmx_pages = (struct vmx_pages *)config->l2_vcpus[0].vmx_pages_gva; + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + + for (i = 0; i < config->nr_switches; i++) { + for (j = 0; j < config->nr_l2_vcpus; j++) { + struct l2_vcpu_config *l2 = &config->l2_vcpus[j]; + + hv_pages = (struct hyperv_test_pages *)l2->hv_pages_gva; + vmx_pages = (struct vmx_pages *)l2->vmx_pages_gva; + + run_l2_guest_evmcs(hv_pages, vmx_pages, l2_guest, + &l2->stack[L2_GUEST_STACK_SIZE], + l2->vpid); + } + } + + GUEST_DONE(); +} + +static void l1_guest_vmx(struct l1_test_config *config) +{ + struct vmx_pages *vmx_pages; + uint32_t i, j; + + vmx_pages = (struct vmx_pages *)config->l2_vcpus[0].vmx_pages_gva; + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + + for (i = 0; i < config->nr_switches; i++) { + for (j = 0; j < config->nr_l2_vcpus; j++) { + struct l2_vcpu_config *l2 = &config->l2_vcpus[j]; + + vmx_pages = (struct vmx_pages *)l2->vmx_pages_gva; + + if (config->sched_only) + run_l2_guest_vmx_sched(vmx_pages, l2_guest, + &l2->stack[L2_GUEST_STACK_SIZE], + l2->vpid, i == 0); + else + run_l2_guest_vmx_migrate(vmx_pages, l2_guest, + &l2->stack[L2_GUEST_STACK_SIZE], + l2->vpid, i == 0); + } + } + + if (config->sched_only) { + for (j = 0; j < config->nr_l2_vcpus; j++) { + struct l2_vcpu_config *l2 = &config->l2_vcpus[j]; + + vmx_pages = (struct vmx_pages *)l2->vmx_pages_gva; + vmclear(vmx_pages->vmcs_gpa); + } + } + + GUEST_DONE(); +} + +static void vcpu_clone_hyperv_test_pages(struct kvm_vm *vm, + vm_vaddr_t src_gva, + vm_vaddr_t *dst_gva) +{ + struct hyperv_test_pages *src, *dst; + vm_vaddr_t evmcs_gva; + + *dst_gva = vm_vaddr_alloc_page(vm); + + src = addr_gva2hva(vm, src_gva); + dst = addr_gva2hva(vm, *dst_gva); + memcpy(dst, src, sizeof(*dst)); + + /* Allocate a new evmcs page */ + evmcs_gva = vm_vaddr_alloc_page(vm); + dst->enlightened_vmcs = (void *)evmcs_gva; + dst->enlightened_vmcs_hva = addr_gva2hva(vm, evmcs_gva); + dst->enlightened_vmcs_gpa = addr_gva2gpa(vm, evmcs_gva); +} + +static void prepare_vcpu(struct kvm_vm *vm, struct kvm_vcpu *vcpu, + uint32_t nr_l2_vcpus, uint32_t nr_switches, + bool enable_vpid, bool use_evmcs, + bool sched_only) +{ + vm_vaddr_t config_gva; + struct l1_test_config *config; + vm_vaddr_t hypercall_page_gva = 0; + uint32_t i; + + TEST_ASSERT(nr_l2_vcpus <= L2_VCPU_MAX, + "Too many L2 vCPUs: %u (max %u)", nr_l2_vcpus, L2_VCPU_MAX); + + /* Allocate config structure in guest memory */ + config_gva = vm_vaddr_alloc(vm, sizeof(*config), 0x1000); + config = addr_gva2hva(vm, config_gva); + memset(config, 0, sizeof(*config)); + + if (use_evmcs) { + /* Allocate hypercall page */ + hypercall_page_gva = vm_vaddr_alloc_page(vm); + memset(addr_gva2hva(vm, hypercall_page_gva), 0, getpagesize()); + config->hypercall_gpa = addr_gva2gpa(vm, hypercall_page_gva); + + /* Enable Hyper-V enlightenments */ + vcpu_set_hv_cpuid(vcpu); + vcpu_enable_evmcs(vcpu); + } + + /* Allocate resources for each L2 vCPU */ + for (i = 0; i < nr_l2_vcpus; i++) { + vm_vaddr_t vmx_pages_gva; + + /* Allocate VMX pages (needed for both VMX and eVMCS) */ + vcpu_alloc_vmx(vm, &vmx_pages_gva); + config->l2_vcpus[i].vmx_pages_gva = vmx_pages_gva; + + if (use_evmcs) { + vm_vaddr_t hv_pages_gva; + + /* Allocate or clone hyperv_test_pages */ + if (i == 0) { + vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva); + } else { + vm_vaddr_t first_hv_gva = + config->l2_vcpus[0].hv_pages_gva; + vcpu_clone_hyperv_test_pages(vm, first_hv_gva, + &hv_pages_gva); + } + config->l2_vcpus[i].hv_pages_gva = hv_pages_gva; + } + + /* Set VPID */ + config->l2_vcpus[i].vpid = enable_vpid ? (i + 3) : 0; + } + + config->nr_l2_vcpus = nr_l2_vcpus; + config->nr_switches = nr_switches; + config->enable_vpid = enable_vpid; + config->use_evmcs = use_evmcs; + config->sched_only = use_evmcs ? false : sched_only; + + /* Pass single pointer to config structure */ + vcpu_args_set(vcpu, 1, config_gva); + + if (use_evmcs) + vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id); +} + +static bool opt_enable_vpid = true; +static const char *progname; + +static void check_stats(struct kvm_vm *vm, + uint32_t nr_l2_vcpus, + uint32_t nr_switches, + bool use_evmcs, + bool sched_only) +{ + uint64_t reuse = 0; + uint64_t recycle = 0; + + reuse = vm_get_stat(vm, nested_context_reuse); + recycle = vm_get_stat(vm, nested_context_recycle); + + if (nr_l2_vcpus <= KVM_NESTED_OVERSUB_RATIO) { + GUEST_ASSERT_EQ(reuse, nr_l2_vcpus * (nr_switches - 1)); + GUEST_ASSERT_EQ(recycle, 0); + } else { + if (sched_only) { + /* + * When scheduling only no L2 vCPU vmcs is cleared so + * we reuse up to the max. number of contexts, but we + * cannot recycle any of them. + */ + GUEST_ASSERT_EQ(reuse, + KVM_NESTED_OVERSUB_RATIO * + (nr_switches - 1)); + GUEST_ASSERT_EQ(recycle, 0); + } else { + /* + * When migration we cycle in LRU order so no context + * can be reused they are all recycled. + */ + GUEST_ASSERT_EQ(reuse, 0); + GUEST_ASSERT_EQ(recycle, + (nr_l2_vcpus * nr_switches) - + KVM_NESTED_OVERSUB_RATIO); + } + } + + printf("%s %u switches with %u L2 vCPUS (%s) reuse %" PRIu64 + " recycle %" PRIu64 "\n", progname, nr_switches, nr_l2_vcpus, + use_evmcs ? "evmcs" : (sched_only ? "vmx sched" : "vmx migrate"), + reuse, recycle); +} + +static void run_test(uint32_t nr_l2_vcpus, uint32_t nr_switches, + bool use_evmcs, bool sched_only) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + + vm = vm_create_with_one_vcpu(&vcpu, use_evmcs + ? l1_guest_evmcs : l1_guest_vmx); + + prepare_vcpu(vm, vcpu, nr_l2_vcpus, nr_switches, + opt_enable_vpid, use_evmcs, sched_only); + + for (;;) { + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_DONE: + goto done; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + default: + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } + } + +done: + check_stats(vm, nr_l2_vcpus, nr_switches, use_evmcs, sched_only); + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + uint32_t opt_nr_l2_vcpus = 0; + uint32_t opt_nr_switches = 0; + bool opt_sched_only = true; + int opt; + int i; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + + progname = argv[0]; + + while ((opt = getopt(argc, argv, "c:rs:v")) != -1) { + switch (opt) { + case 'c': + opt_nr_l2_vcpus = atoi_paranoid(optarg); + break; + case 'r': + opt_sched_only = false; + break; + case 's': + opt_nr_switches = atoi_paranoid(optarg); + break; + case 'v': + opt_enable_vpid = false; + break; + default: + break; + } + } + + if (opt_nr_l2_vcpus && opt_nr_switches) { + run_test(opt_nr_l2_vcpus, opt_nr_switches, false, + opt_sched_only); + + if (kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) + run_test(opt_nr_l2_vcpus, opt_nr_switches, + true, false); + } else { + /* VMX vmlaunch */ + for (i = 2; i <= 16; i++) + run_test(i, 4, false, false); + + /* VMX vmresume */ + for (i = 2; i <= 16; i++) + run_test(i, 4, false, true); + + /* eVMCS */ + if (kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) { + for (i = 2; i <= 16; i++) + run_test(i, 4, true, false); + } + } + + return 0; +}