Adding GHCB support for selftests. Very similar code to the ucall functionality, I didn't refactor anything common out since I was unsure with just two instances that is required. If pulling out common code between those two is preferred please let me know. The series only adds a single usage of the GHCB which is a special outsb GHCB exit to allow for passing the 64-bit ucall pointer. In future series we can test more GHCB functionality of KVM. I'd like to base some SNP smoke tests off of this and the current SEV selftest work.
base-commit: 40e09b3ccfacc640d58e1e3d6b8f29b2db0a9848
Cc: Vishal Annapurve vannapurve@google.com Cc: Ackerley Tng ackerleytng@google.com Cc: Paolo Bonzini pbonzini@redhat.com Cc: Claudio Imbrenda imbrenda@linux.ibm.com Cc: Sean Christopherson seanjc@google.com Cc: Carlos Bilbao carlos.bilbao@amd.com Cc: Tom Lendacky thomas.lendacky@amd.com Cc: Michael Roth michael.roth@amd.com Cc: kvm@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Signed-off-by: Peter Gonda pgonda@google.com
Peter Gonda (6): Add GHCB with setters and getters Add arch specific additional guest pages Add vm_vaddr_alloc_pages_shared() Add GHCB allocations and helpers Add is_sev_enabled() helpers Add ability for SEV-ES guests to use ucalls via GHCB
tools/testing/selftests/kvm/Makefile | 2 +- .../selftests/kvm/include/kvm_util_base.h | 4 + .../selftests/kvm/include/x86_64/sev.h | 7 + .../selftests/kvm/include/x86_64/svm.h | 106 +++++++++++++ tools/testing/selftests/kvm/lib/kvm_util.c | 22 ++- .../selftests/kvm/lib/x86_64/processor.c | 8 + tools/testing/selftests/kvm/lib/x86_64/sev.c | 149 ++++++++++++++++++ .../testing/selftests/kvm/lib/x86_64/ucall.c | 17 ++ .../selftests/kvm/x86_64/sev_smoke_test.c | 22 +-- 9 files changed, 313 insertions(+), 24 deletions(-)
Move the GHCB definitions from svm.h to the tools/ copy. This allows the SEV-ES selftest to use GHCBs which are required for non-trival VMs to paravirtualize NonAutomaticExits (NAEs) when SEV-ES is enabled. GHCB getters/setters have a warning with address-of-packed-member, so removed this using the CFLAGS.
Cc: Vishal Annapurve vannapurve@google.com Cc: Ackerley Tng ackerleytng@google.com Cc: Paolo Bonzini pbonzini@redhat.com Cc: Claudio Imbrenda imbrenda@linux.ibm.com Cc: Sean Christopherson seanjc@google.com Cc: Carlos Bilbao carlos.bilbao@amd.com Cc: Tom Lendacky thomas.lendacky@amd.com Cc: Michael Roth michael.roth@amd.com Cc: kvm@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Signed-off-by: Peter Gonda pgonda@google.com --- tools/testing/selftests/kvm/Makefile | 2 +- .../selftests/kvm/include/x86_64/svm.h | 106 ++++++++++++++++++ 2 files changed, 107 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index c75251d5c97c..95fa0cead256 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -221,7 +221,7 @@ endif CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ -Wno-gnu-variable-sized-type-not-at-end -MD -MP \ -fno-builtin-memcmp -fno-builtin-memcpy -fno-builtin-memset \ - -fno-builtin-strnlen \ + -fno-builtin-strnlen -Wno-address-of-packed-member \ -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \ -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \ -I$(<D) -Iinclude/$(ARCH_DIR) -I ../rseq -I.. $(EXTRA_CFLAGS) \ diff --git a/tools/testing/selftests/kvm/include/x86_64/svm.h b/tools/testing/selftests/kvm/include/x86_64/svm.h index 4803e1056055..fbd8d29c15a8 100644 --- a/tools/testing/selftests/kvm/include/x86_64/svm.h +++ b/tools/testing/selftests/kvm/include/x86_64/svm.h @@ -323,4 +323,110 @@ struct __attribute__ ((__packed__)) vmcb {
#define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP)
+struct ghcb_save_area { + u8 reserved_0x0[203]; + u8 cpl; + u8 reserved_0xcc[116]; + u64 xss; + u8 reserved_0x148[24]; + u64 dr7; + u8 reserved_0x168[16]; + u64 rip; + u8 reserved_0x180[88]; + u64 rsp; + u8 reserved_0x1e0[24]; + u64 rax; + u8 reserved_0x200[264]; + u64 rcx; + u64 rdx; + u64 rbx; + u8 reserved_0x320[8]; + u64 rbp; + u64 rsi; + u64 rdi; + u64 r8; + u64 r9; + u64 r10; + u64 r11; + u64 r12; + u64 r13; + u64 r14; + u64 r15; + u8 reserved_0x380[16]; + u64 sw_exit_code; + u64 sw_exit_info_1; + u64 sw_exit_info_2; + u64 sw_scratch; + u8 reserved_0x3b0[56]; + u64 xcr0; + u8 valid_bitmap[16]; + u64 x87_state_gpa; +} __packed; + +#define GHCB_SHARED_BUF_SIZE 2032 + +struct ghcb { + struct ghcb_save_area save; + u8 reserved_save[2048 - sizeof(struct ghcb_save_area)]; + + u8 shared_buffer[GHCB_SHARED_BUF_SIZE]; + + u8 reserved_0xff0[10]; + u16 protocol_version; /* negotiated SEV-ES/GHCB protocol version */ + u32 ghcb_usage; +} __packed; + +/* GHCB Accessor functions */ + +#define GHCB_BITMAP_IDX(field) \ + (offsetof(struct ghcb_save_area, field) / sizeof(u64)) + +#define DEFINE_GHCB_ACCESSORS(field) \ + static __always_inline bool ghcb_##field##_is_valid(const struct ghcb *ghcb) \ + { \ + return test_bit(GHCB_BITMAP_IDX(field), \ + (unsigned long *)&ghcb->save.valid_bitmap); \ + } \ + \ + static __always_inline u64 ghcb_get_##field(struct ghcb *ghcb) \ + { \ + return ghcb->save.field; \ + } \ + \ + static __always_inline u64 ghcb_get_##field##_if_valid(struct ghcb *ghcb) \ + { \ + return ghcb_##field##_is_valid(ghcb) ? ghcb->save.field : 0; \ + } \ + \ + static __always_inline void ghcb_set_##field(struct ghcb *ghcb, u64 value) \ + { \ + __set_bit(GHCB_BITMAP_IDX(field), \ + (unsigned long *)&ghcb->save.valid_bitmap); \ + ghcb->save.field = value; \ + } + +DEFINE_GHCB_ACCESSORS(cpl) +DEFINE_GHCB_ACCESSORS(rip) +DEFINE_GHCB_ACCESSORS(rsp) +DEFINE_GHCB_ACCESSORS(rax) +DEFINE_GHCB_ACCESSORS(rcx) +DEFINE_GHCB_ACCESSORS(rdx) +DEFINE_GHCB_ACCESSORS(rbx) +DEFINE_GHCB_ACCESSORS(rbp) +DEFINE_GHCB_ACCESSORS(rsi) +DEFINE_GHCB_ACCESSORS(rdi) +DEFINE_GHCB_ACCESSORS(r8) +DEFINE_GHCB_ACCESSORS(r9) +DEFINE_GHCB_ACCESSORS(r10) +DEFINE_GHCB_ACCESSORS(r11) +DEFINE_GHCB_ACCESSORS(r12) +DEFINE_GHCB_ACCESSORS(r13) +DEFINE_GHCB_ACCESSORS(r14) +DEFINE_GHCB_ACCESSORS(r15) +DEFINE_GHCB_ACCESSORS(sw_exit_code) +DEFINE_GHCB_ACCESSORS(sw_exit_info_1) +DEFINE_GHCB_ACCESSORS(sw_exit_info_2) +DEFINE_GHCB_ACCESSORS(sw_scratch) +DEFINE_GHCB_ACCESSORS(xcr0) + #endif /* SELFTEST_KVM_SVM_H */
On Tue, Apr 09, 2024, Peter Gonda wrote:
Move the GHCB definitions from svm.h to the tools/ copy. This allows the SEV-ES selftest to use GHCBs which are required for non-trival VMs to paravirtualize NonAutomaticExits (NAEs) when SEV-ES is enabled. GHCB getters/setters have a warning with address-of-packed-member, so removed this using the CFLAGS.
Just for paranoia, I would put the -Wno-address-of-packed-member in a separate patch. And to make life easier for us paranoid folks, call out that the kernel builds with -Wno-address-of-packed-member by default for *all* architectures, thanks to this line in scripts/Makefile.extrawarn:
KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
And for good reason, that's a darn stupid warning for the kernel.
SEV-ES guests need additional pages allocated for their GHCBs. Add arch specific function definition with __weak to allow for overriding for X86 specific SEV-ES functionality.
Cc: Vishal Annapurve vannapurve@google.com Cc: Ackerley Tng ackerleytng@google.com Cc: Paolo Bonzini pbonzini@redhat.com Cc: Claudio Imbrenda imbrenda@linux.ibm.com Cc: Sean Christopherson seanjc@google.com Cc: Carlos Bilbao carlos.bilbao@amd.com Cc: Tom Lendacky thomas.lendacky@amd.com Cc: Michael Roth michael.roth@amd.com Cc: kvm@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Signed-off-by: Peter Gonda pgonda@google.com --- .../selftests/kvm/include/kvm_util_base.h | 3 +++ tools/testing/selftests/kvm/lib/kvm_util.c | 16 ++++++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-)
diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index 4a40b332115d..9a26afd2e82a 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -1126,4 +1126,7 @@ void kvm_arch_vm_post_create(struct kvm_vm *vm);
bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr);
+int kvm_arch_vm_additional_pages_required(struct vm_shape shape, + uint64_t page_size); + #endif /* SELFTEST_KVM_UTIL_BASE_H */ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index adc51b0712ca..2a7b2709eb8d 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -314,11 +314,11 @@ struct kvm_vm *____vm_create(struct vm_shape shape) return vm; }
-static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, +static uint64_t vm_nr_pages_required(struct vm_shape shape, uint32_t nr_runnable_vcpus, uint64_t extra_mem_pages) { - uint64_t page_size = vm_guest_mode_params[mode].page_size; + uint64_t page_size = vm_guest_mode_params[shape.mode].page_size; uint64_t nr_pages;
TEST_ASSERT(nr_runnable_vcpus, @@ -350,13 +350,15 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, /* Account for the number of pages needed by ucall. */ nr_pages += ucall_nr_pages_required(page_size);
- return vm_adjust_num_guest_pages(mode, nr_pages); + nr_pages += kvm_arch_vm_additional_pages_required(shape, page_size); + + return vm_adjust_num_guest_pages(shape.mode, nr_pages); }
struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, uint64_t nr_extra_pages) { - uint64_t nr_pages = vm_nr_pages_required(shape.mode, nr_runnable_vcpus, + uint64_t nr_pages = vm_nr_pages_required(shape, nr_runnable_vcpus, nr_extra_pages); struct userspace_mem_region *slot0; struct kvm_vm *vm; @@ -2246,6 +2248,12 @@ __weak void kvm_arch_vm_post_create(struct kvm_vm *vm) { }
+__weak int kvm_arch_vm_additional_pages_required(struct vm_shape shape, + uint64_t page_size) +{ + return 0; +} + __weak void kvm_selftest_arch_init(void) { }
Add a shared page allocation. To be used for SEV-ES GHCBs.
Cc: Vishal Annapurve vannapurve@google.com Cc: Ackerley Tng ackerleytng@google.com Cc: Paolo Bonzini pbonzini@redhat.com Cc: Claudio Imbrenda imbrenda@linux.ibm.com Cc: Sean Christopherson seanjc@google.com Cc: Carlos Bilbao carlos.bilbao@amd.com Cc: Tom Lendacky thomas.lendacky@amd.com Cc: Michael Roth michael.roth@amd.com Cc: kvm@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Signed-off-by: Peter Gonda pgonda@google.com --- tools/testing/selftests/kvm/include/kvm_util_base.h | 1 + tools/testing/selftests/kvm/lib/kvm_util.c | 6 ++++++ 2 files changed, 7 insertions(+)
diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index 9a26afd2e82a..8fa6e55e0039 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -595,6 +595,7 @@ vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, enum kvm_mem_region_type type); vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages); +vm_vaddr_t vm_vaddr_alloc_pages_shared(struct kvm_vm *vm, int nr_pages); vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type); vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 2a7b2709eb8d..bce60ff749ea 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1470,6 +1470,12 @@ vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages) return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR); }
+vm_vaddr_t vm_vaddr_alloc_pages_shared(struct kvm_vm *vm, int nr_pages) +{ + return vm_vaddr_alloc_shared(vm, nr_pages * getpagesize(), + KVM_UTIL_MIN_VADDR, MEM_REGION_TEST_DATA); +} + vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type) { return __vm_vaddr_alloc(vm, getpagesize(), KVM_UTIL_MIN_VADDR, type);
Add GHCB management functionality similar to the ucall management. Allows for selftest vCPUs to acquire GHCBs for their usage.
Cc: Vishal Annapurve vannapurve@google.com Cc: Ackerley Tng ackerleytng@google.com Cc: Paolo Bonzini pbonzini@redhat.com Cc: Claudio Imbrenda imbrenda@linux.ibm.com Cc: Sean Christopherson seanjc@google.com Cc: Carlos Bilbao carlos.bilbao@amd.com Cc: Tom Lendacky thomas.lendacky@amd.com Cc: Michael Roth michael.roth@amd.com Cc: kvm@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Signed-off-by: Peter Gonda pgonda@google.com --- .../selftests/kvm/include/x86_64/sev.h | 2 + .../selftests/kvm/lib/x86_64/processor.c | 8 ++ tools/testing/selftests/kvm/lib/x86_64/sev.c | 77 +++++++++++++++++++ 3 files changed, 87 insertions(+)
diff --git a/tools/testing/selftests/kvm/include/x86_64/sev.h b/tools/testing/selftests/kvm/include/x86_64/sev.h index 8a1bf88474c9..bfd481707f67 100644 --- a/tools/testing/selftests/kvm/include/x86_64/sev.h +++ b/tools/testing/selftests/kvm/include/x86_64/sev.h @@ -27,6 +27,8 @@ enum sev_guest_state {
#define GHCB_MSR_TERM_REQ 0x100
+int ghcb_nr_pages_required(uint64_t page_size); + void sev_vm_launch(struct kvm_vm *vm, uint32_t policy); void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement); void sev_vm_launch_finish(struct kvm_vm *vm); diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 49288fe10cd3..fd94a1bd82c9 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -584,6 +584,14 @@ void kvm_arch_vm_post_create(struct kvm_vm *vm) sev_es_vm_init(vm); }
+int kvm_arch_vm_additional_pages_required(struct vm_shape shape, uint64_t page_size) +{ + if (shape.subtype == VM_SUBTYPE_SEV_ES) + return ghcb_nr_pages_required(page_size); + + return 0; +} + void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) { struct kvm_regs regs; diff --git a/tools/testing/selftests/kvm/lib/x86_64/sev.c b/tools/testing/selftests/kvm/lib/x86_64/sev.c index e248d3364b9c..27ae1d3b1355 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/sev.c +++ b/tools/testing/selftests/kvm/lib/x86_64/sev.c @@ -4,6 +4,80 @@ #include <stdbool.h>
#include "sev.h" +#include "linux/bitmap.h" +#include "svm.h" +#include "svm_util.h" + +struct ghcb_entry { + struct ghcb ghcb; + + /* Guest physical address of this GHCB. */ + void *gpa; + + /* Host virtual address of this struct. */ + struct ghcb_entry *hva; +}; + +struct ghcb_header { + struct ghcb_entry ghcbs[KVM_MAX_VCPUS]; + DECLARE_BITMAP(in_use, KVM_MAX_VCPUS); +}; + +static struct ghcb_header *ghcb_pool; + +int ghcb_nr_pages_required(uint64_t page_size) +{ + return align_up(sizeof(struct ghcb_header), page_size) / page_size; +} + +void ghcb_init(struct kvm_vm *vm) +{ + struct ghcb_header *hdr; + struct ghcb_entry *entry; + vm_vaddr_t vaddr; + int i; + + vaddr = vm_vaddr_alloc_shared(vm, sizeof(*hdr), KVM_UTIL_MIN_VADDR, + MEM_REGION_DATA); + hdr = (struct ghcb_header *)addr_gva2hva(vm, vaddr); + memset(hdr, 0, sizeof(*hdr)); + + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + entry = &hdr->ghcbs[i]; + entry->hva = entry; + entry->gpa = addr_hva2gpa(vm, &entry->ghcb); + } + + write_guest_global(vm, ghcb_pool, (struct ghcb_header *)vaddr); +} + +static struct ghcb_entry *ghcb_alloc(void) +{ + return &ghcb_pool->ghcbs[0]; + struct ghcb_entry *entry; + int i; + + if (!ghcb_pool) + goto ucall_failed; + + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + if (!test_and_set_bit(i, ghcb_pool->in_use)) { + entry = &ghcb_pool->ghcbs[i]; + memset(&entry->ghcb, 0, sizeof(entry->ghcb)); + return entry; + } + } + +ucall_failed: + return NULL; +} + +static void ghcb_free(struct ghcb_entry *entry) +{ + /* Beware, here be pointer arithmetic. */ + clear_bit(entry - ghcb_pool->ghcbs, ghcb_pool->in_use); +} +
/* * sparsebit_next_clear() can return 0 if [x, 2**64-1] are all set, and the @@ -44,6 +118,9 @@ void sev_vm_launch(struct kvm_vm *vm, uint32_t policy) struct kvm_sev_guest_status status; int ctr;
+ if (policy & SEV_POLICY_ES) + ghcb_init(vm); + vm_sev_ioctl(vm, KVM_SEV_LAUNCH_START, &launch_start); vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
On Tue, Apr 09, 2024, Peter Gonda wrote:
Add GHCB management functionality similar to the ucall management. Allows for selftest vCPUs to acquire GHCBs for their usage.
Do we actually need a dedicated pool of GHCBs? The conundrum with ucalls is that we have no place in the guest to store the pointer on all architectures. Or rather, we were too lazy to find one. :-)
But for SEV-ES, we have MSR_AMD64_SEV_ES_GHCB, and any test that clobbers that obviously can't use ucalls anyways. Argh, but we can't get a value in that MSR from the host.
Hmm, that seems like a KVM flaw. KVM should advertise its supported GHCB protocol to *userspace*, but userspace should control the actual information exposed to the guest.
Oof, and doesn't SNP support effectively *require* version 2? I.e. shouldn't the KVM patch[*] that adds support for the AP reset MSR protocol bump the version? The GHCB spec very cleary states that that's v2+.
And what if userspace wants to advertise v2 to an SEV-ES guest? KVM shouldn't switch *all* SEV-ES guests to v2, without a way back. And the GHCB spec clearly states that some of the features are in scope for SEV-ES, e.g.
In addition to hypervisor feature advertisement, version 2 provides:
SEV-ES enhancements: o GHCB Format Version 2: ▪ The addition of the XSS MSR value (if supported) when CPUID 0xD is requested. ▪ The shared area specified in the GHCB SW_SCRATCH field must reside in the GHCB SharedBuffer area of the GHCB. o MSR protocol support for AP reset hold.
So I'm pretty sure KVM needs to let userspace set the initial value for MSR_AMD64_SEV_ES_GHCB. I suppose we could do that indirectly, e.g. through a capability. Hrm, but even if userspace can set the initial value, KVM would need to parse the min/max version so that KVM knows what *it* should support, which means that throwing in a GPA for selftests would screw things up.
Blech. Why do CPU folks insist on using ascending version numbers to bundle features?
Anyways, back to selftests. Since we apparently can't stuff MSR_AMD64_SEV_ES_GHCB from host userspace, what if we instead use a trampoline? Instead having vcpu_arch_set_entry_point() point directly at guest_code, point it at a trampoline for SEV-ES guests, and then have the trampoline set MSR_AMD64_SEV_ES_GHCB to the vCPU-specific GHCB before invoking guest_code().
Then we just need a register to stuff the GHCB into. Ah, and the actual guest entry point. GPRs are already part of selftest's "ABI", since they're set by vcpu_args_set(). And this is all 64-bit only, so we can use r10+.
Ugh, the complication is that the trampoline would need to save/restore RAX, RCX, and RDX in order to preserve the values from vcpu_args_set(), but that's just annoying, not hard. And I think it'd be less painful overall than having to create a GHCB pool?
In rough pseudo-asm, something like this?
static void vcpu_sev_es_guest_trampoline(void) { asm volatile(<save rax, rcx, rdx> "mov %%r15d, %%eax\n\t" "shr %%r15, $32\n\t" "mov %%r15d, %%eax\n\t" "mov $MSR_AMD64_SEV_ES_GHCB, %%ecx\n\t" <restore rax, rcx, rdx> "jmp %%r14") }
[*] https://lore.kernel.org/all/20240421180122.1650812-3-michael.roth@amd.com
On Tue, Apr 23, 2024, Sean Christopherson wrote:
On Tue, Apr 09, 2024, Peter Gonda wrote:
Add GHCB management functionality similar to the ucall management. Allows for selftest vCPUs to acquire GHCBs for their usage.
Do we actually need a dedicated pool of GHCBs? The conundrum with ucalls is that we have no place in the guest to store the pointer on all architectures. Or rather, we were too lazy to find one. :-)
But for SEV-ES, we have MSR_AMD64_SEV_ES_GHCB, and any test that clobbers that obviously can't use ucalls anyways. Argh, but we can't get a value in that MSR from the host.
...
Anyways, back to selftests. Since we apparently can't stuff MSR_AMD64_SEV_ES_GHCB from host userspace, what if we instead use a trampoline? Instead having vcpu_arch_set_entry_point() point directly at guest_code, point it at a trampoline for SEV-ES guests, and then have the trampoline set MSR_AMD64_SEV_ES_GHCB to the vCPU-specific GHCB before invoking guest_code().
Then we just need a register to stuff the GHCB into. Ah, and the actual guest entry point. GPRs are already part of selftest's "ABI", since they're set by vcpu_args_set(). And this is all 64-bit only, so we can use r10+.
Ugh, the complication is that the trampoline would need to save/restore RAX, RCX, and RDX in order to preserve the values from vcpu_args_set(), but that's just annoying, not hard. And I think it'd be less painful overall than having to create a GHCB pool?
In rough pseudo-asm, something like this?
static void vcpu_sev_es_guest_trampoline(void) { asm volatile(<save rax, rcx, rdx> "mov %%r15d, %%eax\n\t" "shr %%r15, $32\n\t" "mov %%r15d, %%eax\n\t" "mov $MSR_AMD64_SEV_ES_GHCB, %%ecx\n\t" <restore rax, rcx, rdx> "jmp %%r14") }
Scratch using inline asm, it needs to be a proper asm subroutine, as it's possible the compiler could clobber GPRs before emitting the asm. But writing actual assembly code is probably a good thing.
And we need assembly for TDX selftests, which forces vCPUs to start at the RESET vector[*]. Rather than add a TDX specific td_boot.S, we can add a common-ish entry.S to hold all of the CoCo entry points that need to be in assembly.
Then I think we'll eventually end up with something like:
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index fd94a1bd82c9..03818d3c4669 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -597,7 +597,12 @@ void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) struct kvm_regs regs;
vcpu_regs_get(vcpu, ®s); - regs.rip = (unsigned long) guest_code; + if (<is sev-es guest>) + regs.r14 = guest_code; + else if (<is tdx guest>) + <guest_code gets shoved somewhere else> + else + regs.rip = (unsigned long) guest_code; vcpu_regs_set(vcpu, ®s); }
@@ -635,6 +640,10 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) vcpu_regs_get(vcpu, ®s); regs.rflags = regs.rflags | 0x2; regs.rsp = stack_vaddr; + if (<is sev-es guest>) { + regs.rip = vcpu_sev_es_guest_trampoline; + regs.r15 = <allocate and map ghcb>(); + } vcpu_regs_set(vcpu, ®s);
/* Setup the MP state */ ---
[*] https://lore.kernel.org/all/20231212204647.2170650-6-sagis@google.com
On Tue, Apr 23, 2024, Sean Christopherson wrote:
Oof, and doesn't SNP support effectively *require* version 2? I.e. shouldn't the KVM patch[*] that adds support for the AP reset MSR protocol bump the version? The GHCB spec very cleary states that that's v2+.
Ah, the SNP series does bump the max version one patch later, presumably after all mandatory features for v2 are implemented.
Modifies ucall handling for SEV-ES VMs. Instead of using an out instruction and storing the ucall pointer in RDI, SEV-ES guests use a outsb VMGEXIT to move the ucall pointer as the data. Allows for SEV-ES to use ucalls instead of relying the SEV-ES MSR based termination protocol.
Cc: Vishal Annapurve vannapurve@google.com Cc: Ackerley Tng ackerleytng@google.com Cc: Paolo Bonzini pbonzini@redhat.com Cc: Claudio Imbrenda imbrenda@linux.ibm.com Cc: Sean Christopherson seanjc@google.com Cc: Carlos Bilbao carlos.bilbao@amd.com Cc: Tom Lendacky thomas.lendacky@amd.com Cc: Michael Roth michael.roth@amd.com Cc: kvm@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Signed-off-by: Peter Gonda pgonda@google.com --- .../selftests/kvm/include/x86_64/sev.h | 2 + tools/testing/selftests/kvm/lib/x86_64/sev.c | 67 ++++++++++++++++++- .../testing/selftests/kvm/lib/x86_64/ucall.c | 17 +++++ .../selftests/kvm/x86_64/sev_smoke_test.c | 17 +---- 4 files changed, 84 insertions(+), 19 deletions(-)
diff --git a/tools/testing/selftests/kvm/include/x86_64/sev.h b/tools/testing/selftests/kvm/include/x86_64/sev.h index 691dc005e2a1..26447caccd40 100644 --- a/tools/testing/selftests/kvm/include/x86_64/sev.h +++ b/tools/testing/selftests/kvm/include/x86_64/sev.h @@ -109,4 +109,6 @@ static inline void sev_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa, bool is_sev_enabled(void); bool is_sev_es_enabled(void);
+void sev_es_ucall_port_write(uint32_t port, uint64_t data); + #endif /* SELFTEST_KVM_SEV_H */ diff --git a/tools/testing/selftests/kvm/lib/x86_64/sev.c b/tools/testing/selftests/kvm/lib/x86_64/sev.c index 5b3f0a8a931a..276477f2c2cf 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/sev.c +++ b/tools/testing/selftests/kvm/lib/x86_64/sev.c @@ -8,11 +8,18 @@ #include "svm.h" #include "svm_util.h"
+#define IOIO_TYPE_STR (1 << 2) +#define IOIO_SEG_DS (1 << 11 | 1 << 10) +#define IOIO_DATA_8 (1 << 4) +#define IOIO_REP (1 << 3) + +#define SW_EXIT_CODE_IOIO 0x7b + struct ghcb_entry { struct ghcb ghcb;
/* Guest physical address of this GHCB. */ - void *gpa; + uint64_t gpa;
/* Host virtual address of this struct. */ struct ghcb_entry *hva; @@ -45,16 +52,22 @@ void ghcb_init(struct kvm_vm *vm) for (i = 0; i < KVM_MAX_VCPUS; ++i) { entry = &hdr->ghcbs[i]; entry->hva = entry; - entry->gpa = addr_hva2gpa(vm, &entry->ghcb); + entry->gpa = (uint64_t)addr_hva2gpa(vm, &entry->ghcb); }
write_guest_global(vm, ghcb_pool, (struct ghcb_header *)vaddr); }
+static void sev_es_terminate(void) +{ + wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ); +} + static struct ghcb_entry *ghcb_alloc(void) { return &ghcb_pool->ghcbs[0]; struct ghcb_entry *entry; + struct ghcb *ghcb; int i;
if (!ghcb_pool) @@ -63,12 +76,18 @@ static struct ghcb_entry *ghcb_alloc(void) for (i = 0; i < KVM_MAX_VCPUS; ++i) { if (!test_and_set_bit(i, ghcb_pool->in_use)) { entry = &ghcb_pool->ghcbs[i]; - memset(&entry->ghcb, 0, sizeof(entry->ghcb)); + ghcb = &entry->ghcb; + + memset(&ghcb, 0, sizeof(*ghcb)); + ghcb->ghcb_usage = 0; + ghcb->protocol_version = 1; + return entry; } }
ucall_failed: + sev_es_terminate(); return NULL; }
@@ -200,3 +219,45 @@ bool is_sev_es_enabled(void) return is_sev_enabled() && rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ES_ENABLED; } + +static uint64_t setup_exitinfo1_portio(uint32_t port) +{ + uint64_t exitinfo1 = 0; + + exitinfo1 |= IOIO_TYPE_STR; + exitinfo1 |= ((port & 0xffff) << 16); + exitinfo1 |= IOIO_SEG_DS; + exitinfo1 |= IOIO_DATA_8; + exitinfo1 |= IOIO_REP; + + return exitinfo1; +} + +static void do_vmg_exit(uint64_t ghcb_gpa) +{ + wrmsr(MSR_AMD64_SEV_ES_GHCB, ghcb_gpa); + __asm__ __volatile__("rep; vmmcall"); +} + +void sev_es_ucall_port_write(uint32_t port, uint64_t data) +{ + struct ghcb_entry *entry; + struct ghcb *ghcb; + const uint64_t exitinfo1 = setup_exitinfo1_portio(port); + + entry = ghcb_alloc(); + ghcb = &entry->ghcb; + + ghcb_set_sw_exit_code(ghcb, SW_EXIT_CODE_IOIO); + ghcb_set_sw_exit_info_1(ghcb, exitinfo1); + ghcb_set_sw_exit_info_2(ghcb, sizeof(data)); + + // Setup the SW Stratch buffer pointer. + ghcb_set_sw_scratch(ghcb, + entry->gpa + offsetof(struct ghcb, shared_buffer)); + memcpy(&ghcb->shared_buffer, &data, sizeof(data)); + + do_vmg_exit(entry->gpa); + + ghcb_free(entry); +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c index 1265cecc7dd1..24da2f4316d8 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c +++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c @@ -5,6 +5,8 @@ * Copyright (C) 2018, Red Hat, Inc. */ #include "kvm_util.h" +#include "processor.h" +#include "sev.h"
#define UCALL_PIO_PORT ((uint16_t)0x1000)
@@ -21,6 +23,10 @@ void ucall_arch_do_ucall(vm_vaddr_t uc) #define HORRIFIC_L2_UCALL_CLOBBER_HACK \ "rcx", "rsi", "r8", "r9", "r10", "r11"
+ if (is_sev_es_enabled()) { + sev_es_ucall_port_write(UCALL_PIO_PORT, uc); + } + asm volatile("push %%rbp\n\t" "push %%r15\n\t" "push %%r14\n\t" @@ -48,8 +54,19 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) { struct kvm_regs regs; + uint64_t addr; + + if (vcpu->vm->subtype == VM_SUBTYPE_SEV_ES) { + TEST_ASSERT( + run->io.count == 8 && run->io.size == 1, + "SEV-ES ucall exit requires 8 byte string out\n"); + + addr = *(uint64_t *)((uint8_t *)(run) + run->io.data_offset); + return (void *)addr; + }
vcpu_regs_get(vcpu, ®s); + return (void *)regs.rdi; } return NULL; diff --git a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c index 1d84e78e7ae2..2448533a9a41 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c +++ b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c @@ -18,12 +18,7 @@ static void guest_sev_es_code(void) /* TODO: Check CPUID after GHCB-based hypercall support is added. */ GUEST_ASSERT(is_sev_es_enabled());
- /* - * TODO: Add GHCB and ucall support for SEV-ES guests. For now, simply - * force "termination" to signal "done" via the GHCB MSR protocol. - */ - wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ); - __asm__ __volatile__("rep; vmmcall"); + GUEST_DONE(); }
static void guest_sev_code(void) @@ -45,16 +40,6 @@ static void test_sev(void *guest_code, uint64_t policy) for (;;) { vcpu_run(vcpu);
- if (policy & SEV_POLICY_ES) { - TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT, - "Wanted SYSTEM_EVENT, got %s", - exit_reason_str(vcpu->run->exit_reason)); - TEST_ASSERT_EQ(vcpu->run->system_event.type, KVM_SYSTEM_EVENT_SEV_TERM); - TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 1); - TEST_ASSERT_EQ(vcpu->run->system_event.data[0], GHCB_MSR_TERM_REQ); - break; - } - switch (get_ucall(vcpu, &uc)) { case UCALL_SYNC: continue;
On Tue, Apr 09, 2024, Peter Gonda wrote:
Modifies ucall handling for SEV-ES VMs.
Please follow the preferred changelog style as described in Documentation/process/maintainer-kvm-x86.rst
Instead of using an out instruction and storing the ucall pointer in RDI, SEV-ES guests use a outsb VMGEXIT to move the ucall pointer as the data.
Explain _why_. After poking around, I think I agree that string I/O is the least awful choice, but string I/O is generally unpleasant. E.g. my initial reaction to this addr = *(uint64_t *)((uint8_t *)(run) + run->io.data_offset);
was quite literally, "LOL, what?".
We could use MMIO, because there is no *real* instruction in the guest, it's all make believe, i.e. there doesn't actually need to be MMIO anywhere. But then we need to define an address; it could simply be the ucall address, but then SEV-ES ends up with a completely different flow then the regular magic I/O port.
The changelog should capture explain why string I/O was chosen over the "obvious" alternatives so that readers and reviewers aren't left wondering why on earth we *chose* to use string I/O.
Allows for SEV-ES to use ucalls instead of relying the SEV-ES MSR based termination protocol.
Cc: Vishal Annapurve vannapurve@google.com Cc: Ackerley Tng ackerleytng@google.com Cc: Paolo Bonzini pbonzini@redhat.com Cc: Claudio Imbrenda imbrenda@linux.ibm.com Cc: Sean Christopherson seanjc@google.com Cc: Carlos Bilbao carlos.bilbao@amd.com Cc: Tom Lendacky thomas.lendacky@amd.com Cc: Michael Roth michael.roth@amd.com Cc: kvm@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Signed-off-by: Peter Gonda pgonda@google.com
.../selftests/kvm/include/x86_64/sev.h | 2 + tools/testing/selftests/kvm/lib/x86_64/sev.c | 67 ++++++++++++++++++- .../testing/selftests/kvm/lib/x86_64/ucall.c | 17 +++++ .../selftests/kvm/x86_64/sev_smoke_test.c | 17 +---- 4 files changed, 84 insertions(+), 19 deletions(-)
diff --git a/tools/testing/selftests/kvm/include/x86_64/sev.h b/tools/testing/selftests/kvm/include/x86_64/sev.h index 691dc005e2a1..26447caccd40 100644 --- a/tools/testing/selftests/kvm/include/x86_64/sev.h +++ b/tools/testing/selftests/kvm/include/x86_64/sev.h @@ -109,4 +109,6 @@ static inline void sev_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa, bool is_sev_enabled(void); bool is_sev_es_enabled(void); +void sev_es_ucall_port_write(uint32_t port, uint64_t data);
#endif /* SELFTEST_KVM_SEV_H */ diff --git a/tools/testing/selftests/kvm/lib/x86_64/sev.c b/tools/testing/selftests/kvm/lib/x86_64/sev.c index 5b3f0a8a931a..276477f2c2cf 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/sev.c +++ b/tools/testing/selftests/kvm/lib/x86_64/sev.c @@ -8,11 +8,18 @@ #include "svm.h" #include "svm_util.h" +#define IOIO_TYPE_STR (1 << 2) +#define IOIO_SEG_DS (1 << 11 | 1 << 10) +#define IOIO_DATA_8 (1 << 4) +#define IOIO_REP (1 << 3)
+#define SW_EXIT_CODE_IOIO 0x7b
struct ghcb_entry { struct ghcb ghcb; /* Guest physical address of this GHCB. */
- void *gpa;
- uint64_t gpa;
/* Host virtual address of this struct. */ struct ghcb_entry *hva; @@ -45,16 +52,22 @@ void ghcb_init(struct kvm_vm *vm) for (i = 0; i < KVM_MAX_VCPUS; ++i) { entry = &hdr->ghcbs[i]; entry->hva = entry;
entry->gpa = addr_hva2gpa(vm, &entry->ghcb);
}entry->gpa = (uint64_t)addr_hva2gpa(vm, &entry->ghcb);
write_guest_global(vm, ghcb_pool, (struct ghcb_header *)vaddr); } +static void sev_es_terminate(void) +{
- wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ);
+}
static struct ghcb_entry *ghcb_alloc(void) { return &ghcb_pool->ghcbs[0]; struct ghcb_entry *entry;
- struct ghcb *ghcb; int i;
if (!ghcb_pool) @@ -63,12 +76,18 @@ static struct ghcb_entry *ghcb_alloc(void) for (i = 0; i < KVM_MAX_VCPUS; ++i) { if (!test_and_set_bit(i, ghcb_pool->in_use)) { entry = &ghcb_pool->ghcbs[i];
memset(&entry->ghcb, 0, sizeof(entry->ghcb));
ghcb = &entry->ghcb;
memset(&ghcb, 0, sizeof(*ghcb));
ghcb->ghcb_usage = 0;
ghcb->protocol_version = 1;
} }return entry;
ucall_failed:
- sev_es_terminate(); return NULL;
} @@ -200,3 +219,45 @@ bool is_sev_es_enabled(void) return is_sev_enabled() && rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ES_ENABLED; }
+static uint64_t setup_exitinfo1_portio(uint32_t port) +{
- uint64_t exitinfo1 = 0;
- exitinfo1 |= IOIO_TYPE_STR;
- exitinfo1 |= ((port & 0xffff) << 16);
- exitinfo1 |= IOIO_SEG_DS;
- exitinfo1 |= IOIO_DATA_8;
- exitinfo1 |= IOIO_REP;
- return exitinfo1;
+}
+static void do_vmg_exit(uint64_t ghcb_gpa) +{
- wrmsr(MSR_AMD64_SEV_ES_GHCB, ghcb_gpa);
- __asm__ __volatile__("rep; vmmcall");
+}
+void sev_es_ucall_port_write(uint32_t port, uint64_t data) +{
- struct ghcb_entry *entry;
- struct ghcb *ghcb;
- const uint64_t exitinfo1 = setup_exitinfo1_portio(port);
- entry = ghcb_alloc();
- ghcb = &entry->ghcb;
- ghcb_set_sw_exit_code(ghcb, SW_EXIT_CODE_IOIO);
- ghcb_set_sw_exit_info_1(ghcb, exitinfo1);
- ghcb_set_sw_exit_info_2(ghcb, sizeof(data));
- // Setup the SW Stratch buffer pointer.
- ghcb_set_sw_scratch(ghcb,
entry->gpa + offsetof(struct ghcb, shared_buffer));
- memcpy(&ghcb->shared_buffer, &data, sizeof(data));
- do_vmg_exit(entry->gpa);
- ghcb_free(entry);
+} diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c index 1265cecc7dd1..24da2f4316d8 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c +++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c @@ -5,6 +5,8 @@
- Copyright (C) 2018, Red Hat, Inc.
*/ #include "kvm_util.h" +#include "processor.h" +#include "sev.h" #define UCALL_PIO_PORT ((uint16_t)0x1000) @@ -21,6 +23,10 @@ void ucall_arch_do_ucall(vm_vaddr_t uc) #define HORRIFIC_L2_UCALL_CLOBBER_HACK \ "rcx", "rsi", "r8", "r9", "r10", "r11"
- if (is_sev_es_enabled()) {
No curly braces needed.
sev_es_ucall_port_write(UCALL_PIO_PORT, uc);
- }
This will clearly fall through to the standard IN, which I suspect is wrong and only "works" because the only usage is a single GUEST_DONE(), i.e. no test actually resumes to this point.
- asm volatile("push %%rbp\n\t" "push %%r15\n\t" "push %%r14\n\t"
@@ -48,8 +54,19 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) { struct kvm_regs regs;
uint64_t addr;
if (vcpu->vm->subtype == VM_SUBTYPE_SEV_ES) {
TEST_ASSERT(
No Google3 style please. I'm going to start charging folks for these violations. I don't know _how_, but darn it, I'll find a way :-)
run->io.count == 8 && run->io.size == 1,
"SEV-ES ucall exit requires 8 byte string out\n");
addr = *(uint64_t *)((uint8_t *)(run) + run->io.data_offset);
Rather than this amazing bit of casting, I'm tempted to say we should add kvm_vcpu_arch{} and then map the PIO page in vm_arch_vcpu_add(). Then this is more sanely:
return *(uint64_t *)vcpu->arch.pio);
where vcpu->arch.pio is a "void *". At least, I think that would work?
return (void *)addr;
}
vcpu_regs_get(vcpu, ®s);
Spurious whitespace.
linux-kselftest-mirror@lists.linaro.org