Add support for mapping guest pagetable pages to a contiguous guest virtual address range and sharing the physical to virtual mappings with the guest in a pre-defined format.
This functionality will allow the guests to modify their page table entries. One such usecase for CC VMs is to toggle encryption bit in their ptes to switch from encrypted to shared memory and vice a versa.
Signed-off-by: Vishal Annapurve vannapurve@google.com --- .../selftests/kvm/include/kvm_util_base.h | 105 ++++++++++++++++++ tools/testing/selftests/kvm/lib/kvm_util.c | 78 ++++++++++++- .../selftests/kvm/lib/x86_64/processor.c | 32 ++++++ 3 files changed, 214 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index dfe454f228e7..f57ced56da1b 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -74,6 +74,11 @@ struct vm_memcrypt { int8_t enc_bit; };
+struct pgt_page { + vm_paddr_t paddr; + struct list_head list; +}; + struct kvm_vm { int mode; unsigned long type; @@ -98,6 +103,10 @@ struct kvm_vm { vm_vaddr_t handlers; uint32_t dirty_ring_size; struct vm_memcrypt memcrypt; + struct list_head pgt_pages; + bool track_pgt_pages; + uint32_t num_pgt_pages; + vm_vaddr_t pgt_vaddr_start;
/* Cache of information for binary stats interface */ int stats_fd; @@ -184,6 +193,23 @@ struct vm_guest_mode_params { unsigned int page_size; unsigned int page_shift; }; + +/* + * Structure shared with the guest containing information about: + * - Starting virtual address for num_pgt_pages physical pagetable + * page addresses tracked via paddrs array + * - page size of the guest + * + * Guest can walk through its pagetables using this information to + * read/modify pagetable attributes. + */ +struct guest_pgt_info { + uint64_t num_pgt_pages; + uint64_t pgt_vaddr_start; + uint64_t page_size; + uint64_t paddrs[]; +}; + extern const struct vm_guest_mode_params vm_guest_mode_params[];
int open_path_or_exit(const char *path, int flags); @@ -394,6 +420,49 @@ void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id); vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); +void vm_map_page_table(struct kvm_vm *vm, vm_vaddr_t vaddr_min); + +/* + * function called by guest code to translate physical address of a pagetable + * page to guest virtual address. + * + * input args: + * gpgt_info - pointer to the guest_pgt_info structure containing info + * about guest virtual address mappings for guest physical + * addresses of page table pages. + * pgt_pa - physical address of guest page table page to be translated + * to a virtual address. + * + * output args: none + * + * return: + * pointer to the pagetable page, null in case physical address is not + * tracked via given guest_pgt_info structure. + */ +void *guest_code_get_pgt_vaddr(struct guest_pgt_info *gpgt_info, uint64_t pgt_pa); + +/* + * Allocate and setup a page to be shared with guest containing guest_pgt_info + * structure. + * + * Note: + * 1) vm_set_pgt_alloc_tracking function should be used to start tracking + * of physical page table page allocation. + * 2) This function should be invoked after needed pagetable pages are + * mapped to the VM using virt_pg_map. + * + * input args: + * vm - virtual machine + * vaddr_min - Minimum guest virtual address to start mapping the + * guest_pgt_info structure page(s). + * + * output args: none + * + * return: + * virtual address mapping guest_pgt_info structure. + */ +vm_vaddr_t vm_setup_pgt_info_buf(struct kvm_vm *vm, vm_vaddr_t vaddr_min); + vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages); vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm); @@ -647,10 +716,46 @@ void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing);
const char *exit_reason_str(unsigned int exit_reason);
+#ifdef __x86_64__ +/* + * Guest called function to get a pointer to pte corresponding to a given + * guest virtual address and pointer to the guest_pgt_info structure. + * + * input args: + * gpgt_info - pointer to guest_pgt_info structure containing information + * about guest virtual addresses mapped to pagetable physical + * addresses. + * vaddr - guest virtual address + * + * output args: none + * + * return: + * pointer to the pte corresponding to guest virtual address, + * Null if pte is not found + */ +uint64_t *guest_code_get_pte(struct guest_pgt_info *gpgt_info, uint64_t vaddr); +#endif + vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, uint32_t memslot); vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, vm_paddr_t paddr_min, uint32_t memslot); + +/* + * Enable tracking of physical guest pagetable pages for the given vm. + * This function should be called right after vm creation before any pages are + * mapped into the VM using vm_alloc_* / vm_vaddr_alloc* functions. + * + * input args: + * vm - virtual machine + * + * output args: none + * + * return: + * None + */ +void vm_set_pgt_alloc_tracking(struct kvm_vm *vm); + vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm);
/* diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index f153c71d6988..243d04a3d4b6 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -155,6 +155,7 @@ struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages) TEST_ASSERT(vm != NULL, "Insufficient Memory");
INIT_LIST_HEAD(&vm->vcpus); + INIT_LIST_HEAD(&vm->pgt_pages); vm->regions.gpa_tree = RB_ROOT; vm->regions.hva_tree = RB_ROOT; hash_init(vm->regions.slot_hash); @@ -573,6 +574,7 @@ void kvm_vm_free(struct kvm_vm *vmp) { int ctr; struct hlist_node *node; + struct pgt_page *entry, *nentry; struct userspace_mem_region *region;
if (vmp == NULL) @@ -588,6 +590,9 @@ void kvm_vm_free(struct kvm_vm *vmp) hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node) __vm_mem_region_delete(vmp, region, false);
+ list_for_each_entry_safe(entry, nentry, &vmp->pgt_pages, list) + free(entry); + /* Free sparsebit arrays. */ sparsebit_free(&vmp->vpages_valid); sparsebit_free(&vmp->vpages_mapped); @@ -1195,9 +1200,24 @@ vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, /* Arbitrary minimum physical address used for virtual translation tables. */ #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
+void vm_set_pgt_alloc_tracking(struct kvm_vm *vm) +{ + vm->track_pgt_pages = true; +} + vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm) { - return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); + struct pgt_page *pgt; + vm_paddr_t paddr = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); + + if (vm->track_pgt_pages) { + pgt = calloc(1, sizeof(*pgt)); + TEST_ASSERT(pgt != NULL, "Insufficient memory"); + pgt->paddr = addr_gpa2raw(vm, paddr); + list_add(&pgt->list, &vm->pgt_pages); + vm->num_pgt_pages++; + } + return paddr; }
/* @@ -1286,6 +1306,27 @@ static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, return pgidx_start * vm->page_size; }
+void vm_map_page_table(struct kvm_vm *vm, vm_vaddr_t vaddr_min) +{ + struct pgt_page *pgt_page_entry; + vm_vaddr_t vaddr; + + /* Stop tracking further pgt pages, mapping pagetable may itself need + * new pages. + */ + vm->track_pgt_pages = false; + vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, + vm->num_pgt_pages * vm->page_size, vaddr_min); + vaddr = vaddr_start; + list_for_each_entry(pgt_page_entry, &vm->pgt_pages, list) { + /* Map the virtual page. */ + virt_pg_map(vm, vaddr, addr_raw2gpa(vm, pgt_page_entry->paddr)); + sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); + vaddr += vm->page_size; + } + vm->pgt_vaddr_start = vaddr_start; +} + /* * VM Virtual Address Allocate Shared/Encrypted * @@ -1345,6 +1386,41 @@ vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_ return _vm_vaddr_alloc(vm, sz, vaddr_min, false); }
+void *guest_code_get_pgt_vaddr(struct guest_pgt_info *gpgt_info, + uint64_t pgt_pa) +{ + uint64_t num_pgt_pages = gpgt_info->num_pgt_pages; + uint64_t pgt_vaddr_start = gpgt_info->pgt_vaddr_start; + uint64_t page_size = gpgt_info->page_size; + + for (uint32_t i = 0; i < num_pgt_pages; i++) { + if (gpgt_info->paddrs[i] == pgt_pa) + return (void *)(pgt_vaddr_start + i * page_size); + } + return NULL; +} + +vm_vaddr_t vm_setup_pgt_info_buf(struct kvm_vm *vm, vm_vaddr_t vaddr_min) +{ + struct pgt_page *pgt_page_entry; + struct guest_pgt_info *gpgt_info; + uint64_t info_size = sizeof(*gpgt_info) + (sizeof(uint64_t) * vm->num_pgt_pages); + uint64_t num_pages = align_up(info_size, vm->page_size); + vm_vaddr_t buf_start = vm_vaddr_alloc(vm, num_pages, vaddr_min); + uint32_t i = 0; + + gpgt_info = (struct guest_pgt_info *)addr_gva2hva(vm, buf_start); + gpgt_info->num_pgt_pages = vm->num_pgt_pages; + gpgt_info->pgt_vaddr_start = vm->pgt_vaddr_start; + gpgt_info->page_size = vm->page_size; + list_for_each_entry(pgt_page_entry, &vm->pgt_pages, list) { + gpgt_info->paddrs[i] = pgt_page_entry->paddr; + i++; + } + TEST_ASSERT((i == vm->num_pgt_pages), "pgt entries mismatch with the counter"); + return buf_start; +} + /* * VM Virtual Address Allocate Pages * diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 09d757a0b148..02252cabf9ec 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -217,6 +217,38 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) __virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K); }
+uint64_t *guest_code_get_pte(struct guest_pgt_info *gpgt_info, uint64_t vaddr) +{ + uint16_t index[4]; + uint64_t *pml4e, *pdpe, *pde, *pte; + uint64_t pgt_paddr = get_cr3(); + uint64_t page_size = gpgt_info->page_size; + + index[0] = (vaddr >> 12) & 0x1ffu; + index[1] = (vaddr >> 21) & 0x1ffu; + index[2] = (vaddr >> 30) & 0x1ffu; + index[3] = (vaddr >> 39) & 0x1ffu; + + pml4e = guest_code_get_pgt_vaddr(gpgt_info, pgt_paddr); + GUEST_ASSERT(pml4e && (pml4e[index[3]] & PTE_PRESENT_MASK)); + + pgt_paddr = (PTE_GET_PFN(pml4e[index[3]]) * page_size); + pdpe = guest_code_get_pgt_vaddr(gpgt_info, pgt_paddr); + GUEST_ASSERT(pdpe && (pdpe[index[2]] & PTE_PRESENT_MASK) && + !(pdpe[index[2]] & PTE_LARGE_MASK)); + + pgt_paddr = (PTE_GET_PFN(pdpe[index[2]]) * page_size); + pde = guest_code_get_pgt_vaddr(gpgt_info, pgt_paddr); + GUEST_ASSERT(pde && (pde[index[1]] & PTE_PRESENT_MASK) && + !(pde[index[1]] & PTE_LARGE_MASK)); + + pgt_paddr = (PTE_GET_PFN(pde[index[1]]) * page_size); + pte = guest_code_get_pgt_vaddr(gpgt_info, pgt_paddr); + GUEST_ASSERT(pte && (pte[index[0]] & PTE_PRESENT_MASK)); + + return (uint64_t *)&pte[index[0]]; +} + static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu, uint64_t vaddr)