From: Sagi Shahar sagis@google.com
Adds the core logic for transferring state between source and destination TDs during intra-host migration.
Signed-off-by: Sagi Shahar sagis@google.com Co-developed-by: Ryan Afranji afranji@google.com Signed-off-by: Ryan Afranji afranji@google.com --- arch/x86/kvm/vmx/tdx.c | 193 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 192 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index 4582f94175b7..268aca28d878 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -3534,9 +3534,200 @@ static __always_inline bool tdx_finalized(struct kvm *kvm) return tdx_kvm->state == TD_STATE_RUNNABLE; }
+#define MAX_APIC_VECTOR 256 + +static int tdx_migrate_vcpus(struct kvm *dst, struct kvm *src) +{ + struct kvm_vcpu *src_vcpu; + struct kvm_tdx *dst_tdx; + unsigned long i; + + dst_tdx = to_kvm_tdx(dst); + + kvm_for_each_vcpu(i, src_vcpu, src) + tdx_flush_vp_on_cpu(src_vcpu); + + /* Copy per-vCPU state. */ + kvm_for_each_vcpu(i, src_vcpu, src) { + struct vcpu_tdx *dst_tdx_vcpu, *src_tdx_vcpu; + struct kvm_lapic_state src_lapic_state; + struct kvm_vcpu *dst_vcpu; + u64 apic_base; + u32 vector; + int ret; + + src_tdx_vcpu = to_tdx(src_vcpu); + dst_vcpu = kvm_get_vcpu(dst, i); + dst_tdx_vcpu = to_tdx(dst_vcpu); + + dst_vcpu->cpu = -1; + + /* Destination vCPU initialization skipped so do it here. */ + apic_base = APIC_DEFAULT_PHYS_BASE | LAPIC_MODE_X2APIC | + (kvm_vcpu_is_reset_bsp(dst_vcpu) ? + MSR_IA32_APICBASE_BSP : 0); + if (kvm_apic_set_base(dst_vcpu, apic_base, true)) + return -EINVAL; + + /* Copy lapic state. */ + ret = kvm_apic_get_state(src_vcpu, &src_lapic_state); + if (ret) + return -EINVAL; + + ret = kvm_apic_set_state(dst_vcpu, &src_lapic_state); + if (ret) + return -EINVAL; + + /* + * pi_desc stores state of posted interrupts for VMs which are + * processed by pcpu during VM entry/runtime. For + * non-confidential VMs, this storage is synchronized to vcpu + * state using set_lapic_state(sync_pir_to_virr). + * + * For TDX VMs, KVM doesn't have access to virtual lapic page, + * so in order to preserve the interrupt state, copy over + * pi_desc contents to destination VM during copyless migration. + */ + dst_tdx_vcpu->vt = src_tdx_vcpu->vt; + for (vector = 0; vector < MAX_APIC_VECTOR; vector++) { + if (pi_test_pir(vector, &src_tdx_vcpu->vt.pi_desc)) { + __vmx_deliver_posted_interrupt( + dst_vcpu, + &dst_tdx_vcpu->vt.pi_desc, + vector); + } + } + + /* Copy non-TDX vCPU state. */ + memcpy(dst_vcpu->arch.regs, src_vcpu->arch.regs, + NR_VCPU_REGS * sizeof(src_vcpu->arch.regs[0])); + + dst_vcpu->arch.regs_avail = src_vcpu->arch.regs_avail; + dst_vcpu->arch.regs_dirty = src_vcpu->arch.regs_dirty; + dst_vcpu->arch.tsc_offset = dst_tdx->tsc_offset; + dst_vcpu->arch.guest_state_protected = + src_vcpu->arch.guest_state_protected; + dst_vcpu->arch.xfd_no_write_intercept = + src_vcpu->arch.xfd_no_write_intercept; + dst_vcpu->arch.tsc_offset = dst_tdx->tsc_offset; + + /* Copy TD structures. */ + dst_tdx_vcpu->vp.tdvpr_page = src_tdx_vcpu->vp.tdvpr_page; + dst_tdx_vcpu->vp.tdcx_pages = src_tdx_vcpu->vp.tdcx_pages; + + td_vmcs_write64(dst_tdx_vcpu, POSTED_INTR_DESC_ADDR, + __pa(&dst_tdx_vcpu->vt.pi_desc)); + + /* Copy current vCPU status. */ + dst_tdx_vcpu->ext_exit_qualification = + src_tdx_vcpu->ext_exit_qualification; + dst_tdx_vcpu->exit_gpa = src_tdx_vcpu->exit_gpa; + dst_tdx_vcpu->vp_enter_args = src_tdx_vcpu->vp_enter_args; + dst_tdx_vcpu->vp_enter_ret = src_tdx_vcpu->vp_enter_ret; + dst_tdx_vcpu->guest_entered = src_tdx_vcpu->guest_entered; + dst_tdx_vcpu->map_gpa_next = src_tdx_vcpu->map_gpa_next; + dst_tdx_vcpu->map_gpa_end = src_tdx_vcpu->map_gpa_end; + + /* Copy mirror EPT tables. */ + vcpu_load(dst_vcpu); + if (kvm_mmu_move_mirror_pages_from(dst_vcpu, src_vcpu)) { + vcpu_put(dst_vcpu); + return -EINVAL; + } + vcpu_put(dst_vcpu); + + dst_vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + dst_tdx_vcpu->state = VCPU_TD_STATE_INITIALIZED; + + /* + * Set these source's vCPU migrated structures to NULL to avoid + * freeing them during source VM shutdown. + */ + src_tdx_vcpu->vp.tdvpr_page = NULL; + src_tdx_vcpu->vp.tdcx_pages = NULL; + } + + return 0; +} + static int tdx_migrate_from(struct kvm *dst, struct kvm *src) { - return -EINVAL; + struct kvm_tdx *src_tdx, *dst_tdx; + bool charged = false; + int ret; + + src_tdx = to_kvm_tdx(src); + dst_tdx = to_kvm_tdx(dst); + + ret = -EINVAL; + + if (src_tdx->state != TD_STATE_RUNNABLE) { + pr_warn("Cannot migrate from a non finalized VM\n"); + goto abort; + } + + /* Transfer miscellaneous cgroup. */ + dst_tdx->misc_cg = get_current_misc_cg(); + if (dst_tdx->misc_cg != src_tdx->misc_cg) { + ret = misc_cg_try_charge(MISC_CG_RES_TDX, dst_tdx->misc_cg, 1); + if (ret) + goto abort_dst_cgroup; + charged = true; + } + + dst_tdx->hkid = src_tdx->hkid; + + /* Copy VM data. */ + dst_tdx->attributes = src_tdx->attributes; + dst_tdx->xfam = src_tdx->xfam; + dst_tdx->tsc_offset = src_tdx->tsc_offset; + dst_tdx->tsc_multiplier = src_tdx->tsc_multiplier; + dst_tdx->nr_premapped = src_tdx->nr_premapped; + dst_tdx->wait_for_sept_zap = src_tdx->wait_for_sept_zap; + dst_tdx->kvm.arch.gfn_direct_bits = src_tdx->kvm.arch.gfn_direct_bits; + + /* Copy TD structures. */ + dst_tdx->td.tdcs_nr_pages = src_tdx->td.tdcs_nr_pages; + dst_tdx->td.tdcx_nr_pages = src_tdx->td.tdcx_nr_pages; + dst_tdx->td.tdr_page = src_tdx->td.tdr_page; + dst_tdx->td.tdcs_pages = src_tdx->td.tdcs_pages; + + /* Copy per-vCPU state. */ + ret = tdx_migrate_vcpus(dst, src); + if (ret) + goto late_abort; + + dst->mem_attr_array.xa_head = src->mem_attr_array.xa_head; + src->mem_attr_array.xa_head = NULL; + + dst_tdx->state = TD_STATE_RUNNABLE; + + /* + * Set these source's vCPU migrated structures to NULL to avoid + * freeing them during source VM shutdown. + */ + src_tdx->hkid = -1; + src_tdx->td.tdr_page = NULL; + src_tdx->td.tdcs_pages = NULL; + + return 0; + +late_abort: + /* + * If we aborted after the state transfer already started, the src VM + * is no longer valid. + */ + kvm_vm_dead(src); + +abort_dst_cgroup: + if (charged) + misc_cg_uncharge(MISC_CG_RES_TDX, dst_tdx->misc_cg, 1); + put_misc_cg(dst_tdx->misc_cg); + dst_tdx->misc_cg = NULL; +abort: + dst_tdx->hkid = -1; + dst_tdx->td.tdr_page = 0; + return ret; }
int tdx_vm_move_enc_context_from(struct kvm *kvm, struct kvm *src_kvm)