Prior to commit 9245fd6b8531 ("KVM: x86: model canonical checks more precisely"), KVM_SET_NESTED_STATE would fail if the state was captured with L2 active, L1 had CR4.LA57 set, L2 did not, and the VMCS12.HOST_GSBASE (or other host-state field checked for canonicality) had an address greater than 48 bits wide.
Add a regression test that reproduces the KVM_SET_NESTED_STATE failure conditions. To do so, the first three patches add support for 5-level paging in the selftest L1 VM.
Jim Mattson (4): KVM: selftests: Use a loop to create guest page tables KVM: selftests: Use a loop to walk guest page tables KVM: selftests: Add VM_MODE_PXXV57_4K VM mode KVM: selftests: Add a VMX test for LA57 nested state
tools/testing/selftests/kvm/Makefile.kvm | 1 + .../testing/selftests/kvm/include/kvm_util.h | 1 + tools/testing/selftests/kvm/lib/kvm_util.c | 21 +++ .../testing/selftests/kvm/lib/x86/processor.c | 66 ++++----- tools/testing/selftests/kvm/lib/x86/vmx.c | 7 +- .../kvm/x86/vmx_la57_nested_state_test.c | 137 ++++++++++++++++++ 6 files changed, 195 insertions(+), 38 deletions(-) create mode 100644 tools/testing/selftests/kvm/x86/vmx_la57_nested_state_test.c
Walk the guest page tables via a loop when creating new mappings, instead of using unique variables for each level of the page tables.
This simplifies the code and makes it easier to support 5-level paging in the future.
Signed-off-by: Jim Mattson jmattson@google.com --- .../testing/selftests/kvm/lib/x86/processor.c | 22 +++++++------------ 1 file changed, 8 insertions(+), 14 deletions(-)
diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index d4c19ac885a9..0238e674709d 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -184,8 +184,8 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) { const uint64_t pg_size = PG_LEVEL_SIZE(level); - uint64_t *pml4e, *pdpe, *pde; - uint64_t *pte; + uint64_t *pte = &vm->pgd; + int current_level;
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Unknown or unsupported guest mode, mode: 0x%x", vm->mode); @@ -209,20 +209,14 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) * Allocate upper level page tables, if not already present. Return * early if a hugepage was created. */ - pml4e = virt_create_upper_pte(vm, &vm->pgd, vaddr, paddr, PG_LEVEL_512G, level); - if (*pml4e & PTE_LARGE_MASK) - return; - - pdpe = virt_create_upper_pte(vm, pml4e, vaddr, paddr, PG_LEVEL_1G, level); - if (*pdpe & PTE_LARGE_MASK) - return; - - pde = virt_create_upper_pte(vm, pdpe, vaddr, paddr, PG_LEVEL_2M, level); - if (*pde & PTE_LARGE_MASK) - return; + for (current_level = vm->pgtable_levels; current_level > 0; current_level--) { + pte = virt_create_upper_pte(vm, pte, vaddr, paddr, current_level, level); + if (*pte & PTE_LARGE_MASK) + return; + }
/* Fill in page table entry. */ - pte = virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K); + pte = virt_get_pte(vm, pte, vaddr, PG_LEVEL_4K); TEST_ASSERT(!(*pte & PTE_PRESENT_MASK), "PTE already present for 4k page at vaddr: 0x%lx", vaddr); *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
Walk the guest page tables via a loop when searching for a PTE, instead of using unique variables for each level of the page tables.
This simplifies the code and makes it easier to support 5-level paging in the future.
Signed-off-by: Jim Mattson jmattson@google.com --- .../testing/selftests/kvm/lib/x86/processor.c | 21 +++++++------------ 1 file changed, 8 insertions(+), 13 deletions(-)
diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index 0238e674709d..433365c8196d 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -270,7 +270,8 @@ static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level) uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, int *level) { - uint64_t *pml4e, *pdpe, *pde; + uint64_t *pte = &vm->pgd; + int current_level;
TEST_ASSERT(!vm->arch.is_pt_protected, "Walking page tables of protected guests is impossible"); @@ -291,19 +292,13 @@ uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16), "Canonical check failed. The virtual address is invalid.");
- pml4e = virt_get_pte(vm, &vm->pgd, vaddr, PG_LEVEL_512G); - if (vm_is_target_pte(pml4e, level, PG_LEVEL_512G)) - return pml4e; - - pdpe = virt_get_pte(vm, pml4e, vaddr, PG_LEVEL_1G); - if (vm_is_target_pte(pdpe, level, PG_LEVEL_1G)) - return pdpe; - - pde = virt_get_pte(vm, pdpe, vaddr, PG_LEVEL_2M); - if (vm_is_target_pte(pde, level, PG_LEVEL_2M)) - return pde; + for (current_level = vm->pgtable_levels; current_level > 0; current_level--) { + pte = virt_get_pte(vm, pte, vaddr, current_level); + if (vm_is_target_pte(pte, level, current_level)) + return pte; + }
- return virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K); + return pte; }
uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr)
Add a new VM mode, VM_MODE_PXXV57_4K, to support tests that require 5-level paging on x86. This mode sets up a 57-bit virtual address space and sets CR4.LA57 in the guest.
Signed-off-by: Jim Mattson jmattson@google.com --- .../testing/selftests/kvm/include/kvm_util.h | 1 + tools/testing/selftests/kvm/lib/kvm_util.c | 21 +++++++++++++++++ .../testing/selftests/kvm/lib/x86/processor.c | 23 ++++++++++++------- tools/testing/selftests/kvm/lib/x86/vmx.c | 7 +++--- 4 files changed, 41 insertions(+), 11 deletions(-)
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 23a506d7eca3..b6ea5d966715 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -175,6 +175,7 @@ enum vm_guest_mode { VM_MODE_P40V48_16K, VM_MODE_P40V48_64K, VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */ + VM_MODE_PXXV57_4K, /* For 48bits VA but ANY bits PA */ VM_MODE_P47V64_4K, VM_MODE_P44V64_4K, VM_MODE_P36V48_4K, diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index c3f5142b0a54..6b0e499c6e91 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -232,6 +232,7 @@ const char *vm_guest_mode_string(uint32_t i) [VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages", [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages", [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages", + [VM_MODE_PXXV57_4K] = "PA-bits:ANY, VA-bits:57, 4K pages", [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages", [VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages", [VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages", @@ -259,6 +260,7 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = { [VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 }, [VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 }, [VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 }, + [VM_MODE_PXXV57_4K] = { 0, 0, 0x1000, 12 }, [VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 }, [VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 }, [VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 }, @@ -358,6 +360,25 @@ struct kvm_vm *____vm_create(struct vm_shape shape) vm->va_bits = 48; #else TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms"); +#endif + break; + case VM_MODE_PXXV57_4K: +#ifdef __x86_64__ + kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits); + kvm_init_vm_address_properties(vm); + /* + * For 5-level paging, KVM requires LA57 to be enabled, which + * requires a 57-bit virtual address space. + */ + TEST_ASSERT(vm->va_bits == 57, + "Linear address width (%d bits) not supported for VM_MODE_PXXV57_4K", + vm->va_bits); + pr_debug("Guest physical address width detected: %d\n", + vm->pa_bits); + vm->pgtable_levels = 5; + vm->va_bits = 57; +#else + TEST_FAIL("VM_MODE_PXXV57_4K not supported on non-x86 platforms"); #endif break; case VM_MODE_P47V64_4K: diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index 433365c8196d..d566190ea488 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -124,10 +124,11 @@ bool kvm_is_tdp_enabled(void)
void virt_arch_pgd_alloc(struct kvm_vm *vm) { - TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " - "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K || + vm->mode == VM_MODE_PXXV57_4K, + "Unknown or unsupported guest mode: 0x%x", vm->mode);
- /* If needed, create page map l4 table. */ + /* If needed, create the top-level page table. */ if (!vm->pgd_created) { vm->pgd = vm_alloc_page_table(vm); vm->pgd_created = true; @@ -187,8 +188,9 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) uint64_t *pte = &vm->pgd; int current_level;
- TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, - "Unknown or unsupported guest mode, mode: 0x%x", vm->mode); + TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K || + vm->mode == VM_MODE_PXXV57_4K, + "Unknown or unsupported guest mode: 0x%x", vm->mode);
TEST_ASSERT((vaddr % pg_size) == 0, "Virtual address not aligned,\n" @@ -279,8 +281,9 @@ uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, TEST_ASSERT(*level >= PG_LEVEL_NONE && *level < PG_LEVEL_NUM, "Invalid PG_LEVEL_* '%d'", *level);
- TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " - "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K || + vm->mode == VM_MODE_PXXV57_4K, + "Unknown or unsupported guest mode: 0x%x", vm->mode); TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (vaddr >> vm->page_shift)), "Invalid virtual address, vaddr: 0x%lx", @@ -481,7 +484,9 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) { struct kvm_sregs sregs;
- TEST_ASSERT_EQ(vm->mode, VM_MODE_PXXV48_4K); + TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K || + vm->mode == VM_MODE_PXXV57_4K, + "Unknown or unsupported guest mode: 0x%x", vm->mode);
/* Set mode specific system register values. */ vcpu_sregs_get(vcpu, &sregs); @@ -495,6 +500,8 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; if (kvm_cpu_has(X86_FEATURE_XSAVE)) sregs.cr4 |= X86_CR4_OSXSAVE; + if (vm->pgtable_levels == 5) + sregs.cr4 |= X86_CR4_LA57; sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
kvm_seg_set_unusable(&sregs.ldt); diff --git a/tools/testing/selftests/kvm/lib/x86/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c index d4d1208dd023..1b6d4a007798 100644 --- a/tools/testing/selftests/kvm/lib/x86/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86/vmx.c @@ -401,11 +401,12 @@ void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, struct eptPageTableEntry *pt = vmx->eptp_hva, *pte; uint16_t index;
- TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " - "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K || + vm->mode == VM_MODE_PXXV57_4K, + "Unknown or unsupported guest mode: 0x%x", vm->mode);
TEST_ASSERT((nested_paddr >> 48) == 0, - "Nested physical address 0x%lx requires 5-level paging", + "Nested physical address 0x%lx is > 48-bits and requires 5-level EPT", nested_paddr); TEST_ASSERT((nested_paddr % page_size) == 0, "Nested physical address not on page boundary,\n"
Add a selftest that verifies KVM's ability to save and restore nested state when the L1 guest is using 5-level paging and the L2 guest is using 4-level paging. Specifically, canonicality tests of the VMCS12 host-state fields should accept 57-bit virtual addresses.
Signed-off-by: Jim Mattson jmattson@google.com --- tools/testing/selftests/kvm/Makefile.kvm | 1 + .../kvm/x86/vmx_la57_nested_state_test.c | 137 ++++++++++++++++++ 2 files changed, 138 insertions(+) create mode 100644 tools/testing/selftests/kvm/x86/vmx_la57_nested_state_test.c
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index 41b40c676d7f..f1958b88ec59 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -116,6 +116,7 @@ TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state TEST_GEN_PROGS_x86 += x86/vmx_msrs_test TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state TEST_GEN_PROGS_x86 += x86/vmx_set_nested_state_test +TEST_GEN_PROGS_x86 += x86/vmx_la57_nested_state_test TEST_GEN_PROGS_x86 += x86/vmx_tsc_adjust_test TEST_GEN_PROGS_x86 += x86/vmx_nested_tsc_scaling_test TEST_GEN_PROGS_x86 += x86/apic_bus_clock_test diff --git a/tools/testing/selftests/kvm/x86/vmx_la57_nested_state_test.c b/tools/testing/selftests/kvm/x86/vmx_la57_nested_state_test.c new file mode 100644 index 000000000000..7c3c4c1c17f6 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/vmx_la57_nested_state_test.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vmx_la57_nested_state_test + * + * Copyright (C) 2025, Google LLC. + * + * Test KVM's ability to save and restore nested state when the L1 guest + * is using 5-level paging and the L2 guest is using 4-level paging. + * + * This test would have failed prior to commit 9245fd6b8531 ("KVM: x86: + * model canonical checks more precisely"). + */ +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#define LA57_GS_BASE 0xff2bc0311fb00000ull + +static void l2_guest_code(void) +{ + /* + * Sync with L0 to trigger save/restore. After + * resuming, execute VMCALL to exit back to L1. + */ + GUEST_SYNC(1); + vmcall(); +} + +static void l1_guest_code(struct vmx_pages *vmx_pages) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + u64 guest_cr4; + vm_paddr_t pml5_pa, pml4_pa; + u64 *pml5; + u64 exit_reason; + + /* Set GS_BASE to a value that is only canonical with LA57. */ + wrmsr(MSR_GS_BASE, LA57_GS_BASE); + GUEST_ASSERT(rdmsr(MSR_GS_BASE) == LA57_GS_BASE); + + GUEST_ASSERT(vmx_pages->vmcs_gpa); + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* + * Set up L2 with a 4-level page table by pointing its CR3 to L1's + * PML4 table and clearing CR4.LA57. This creates the CR4.LA57 + * mismatch that exercises the bug. + */ + pml5_pa = get_cr3() & PHYSICAL_PAGE_MASK; + pml5 = (u64 *)pml5_pa; + pml4_pa = pml5[0] & PHYSICAL_PAGE_MASK; + vmwrite(GUEST_CR3, pml4_pa); + + guest_cr4 = vmreadz(GUEST_CR4); + guest_cr4 &= ~X86_CR4_LA57; + vmwrite(GUEST_CR4, guest_cr4); + + GUEST_ASSERT(!vmlaunch()); + + exit_reason = vmreadz(VM_EXIT_REASON); + GUEST_ASSERT(exit_reason == EXIT_REASON_VMCALL); +} + +void guest_code(struct vmx_pages *vmx_pages) +{ + if (vmx_pages) + l1_guest_code(vmx_pages); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t vmx_pages_gva = 0; + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + struct kvm_x86_state *state; + struct ucall uc; + int stage; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_LA57)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE)); + + vm = vm_create_shape_with_one_vcpu(VM_SHAPE(VM_MODE_PXXV57_4K), &vcpu, + guest_code); + + /* + * L1 needs to read its own PML5 table to set up L2. Identity map + * the PML5 table to facilitate this. + */ + virt_map(vm, vm->pgd, vm->pgd, 1); + + vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vcpu, 1, vmx_pages_gva); + + for (stage = 1;; stage++) { + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + case UCALL_SYNC: + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + TEST_ASSERT(uc.args[1] == stage, + "Expected stage %d, got stage %lu", stage, (ulong)uc.args[1]); + if (stage == 1) { + pr_info("L2 is active; performing save/restore.\n"); + state = vcpu_save_state(vcpu); + + kvm_vm_release(vm); + + /* Restore state in a new VM. */ + vcpu = vm_recreate_with_one_vcpu(vm); + vcpu_load_state(vcpu, state); + kvm_x86_state_cleanup(state); + } + } + +done: + kvm_vm_free(vm); + return 0; +}
linux-kselftest-mirror@lists.linaro.org