Hi Oliver,
Thanks for reviewing!
On 11/09/2025 7:42 pm, Oliver Upton wrote:
On Thu, Sep 11, 2025 at 02:46:45PM +0100, Jack Thomson wrote:
@@ -1607,7 +1611,7 @@ static int __user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_s2_trans *nested, struct kvm_memory_slot *memslot, long *page_size, unsigned long hva,
bool fault_is_perm)
{ int ret = 0; bool topup_memcache;bool fault_is_perm, bool pre_fault)
@@ -1631,10 +1635,13 @@ static int __user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, vm_flags_t vm_flags; enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_MEMABORT_FLAGS;
- if (pre_fault)
flags |= KVM_PGTABLE_WALK_PRE_FAULT;
- if (fault_is_perm) fault_granule = kvm_vcpu_trap_get_perm_fault_granule(vcpu);
- write_fault = kvm_is_write_fault(vcpu);
- exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
- write_fault = !pre_fault && kvm_is_write_fault(vcpu);
- exec_fault = !pre_fault && kvm_vcpu_trap_is_exec_fault(vcpu);
I'm not a fan of this. While user_mem_abort() is already a sloppy mess, one thing we could reliably assume is the presence of a valid fault context. Now we need to remember to special-case our interpretation of a fault on whether or not we're getting invoked for a pre-fault.
I'd rather see the pre-fault infrastructure compose a synthetic fault context (HPFAR_EL2, ESR_EL2, etc.). It places the complexity where it belongs and the rest of the abort handling code should 'just work'.
Agreed, it looks much better with the synthetic abort. Is this the approach you had in mind?
+long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, + struct kvm_pre_fault_memory *range) +{ + int ret, idx; + hva_t hva; + phys_addr_t end; + u64 esr, hpfar; + struct kvm_memory_slot *memslot; + struct kvm_vcpu_fault_info *fault_info; + + long page_size = PAGE_SIZE; + phys_addr_t ipa = range->gpa; + gfn_t gfn = gpa_to_gfn(range->gpa); + + idx = srcu_read_lock(&vcpu->kvm->srcu); + + if (ipa >= kvm_phys_size(vcpu->arch.hw_mmu)) { + ret = -ENOENT; + goto out_unlock; + } + + memslot = gfn_to_memslot(vcpu->kvm, gfn); + if (!memslot) { + ret = -ENOENT; + goto out_unlock; + } + + fault_info = &vcpu->arch.fault; + + esr = fault_info->esr_el2; + hpfar = fault_info->hpfar_el2; + + fault_info->esr_el2 = ESR_ELx_FSC_ACCESS_L(KVM_PGTABLE_LAST_LEVEL); + fault_info->hpfar_el2 = HPFAR_EL2_NS | + ((ipa >> (12 - HPFAR_EL2_FIPA_SHIFT)) & HPFAR_EL2_FIPA_MASK); + + if (kvm_slot_has_gmem(memslot)) { + ret = gmem_abort(vcpu, ipa, NULL, memslot, false); + } else { + hva = gfn_to_hva_memslot_prot(memslot, gfn, NULL); + if (kvm_is_error_hva(hva)) { + ret = -EFAULT; + goto out; + } + ret = user_mem_abort(vcpu, ipa, NULL, memslot, &page_size, hva, + false); + } + + if (ret < 0) + goto out; + + end = (range->gpa & ~(page_size - 1)) + page_size; + ret = min(range->size, end - range->gpa); + +out: + fault_info->esr_el2 = esr; + fault_info->hpfar_el2 = hpfar; +out_unlock: + srcu_read_unlock(&vcpu->kvm->srcu, idx); + return ret; +}
+long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
struct kvm_pre_fault_memory *range)
+{
- int r;
- hva_t hva;
- phys_addr_t end;
- long page_size;
- struct kvm_memory_slot *memslot;
- phys_addr_t ipa = range->gpa;
- gfn_t gfn = gpa_to_gfn(range->gpa);
- while (true) {
page_size = PAGE_SIZE;
memslot = gfn_to_memslot(vcpu->kvm, gfn);
if (!memslot)
return -ENOENT;
if (kvm_slot_has_gmem(memslot)) {
r = __gmem_abort(vcpu, ipa, NULL, memslot, false, true);
} else {
hva = gfn_to_hva_memslot_prot(memslot, gfn, NULL);
if (kvm_is_error_hva(hva))
return -EFAULT;
r = __user_mem_abort(vcpu, ipa, NULL, memslot, &page_size, hva, false,
true);
}
if (r != -EAGAIN)
break;
if (signal_pending(current))
return -EINTR;
if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu))
return -EIO;
cond_resched();
- };
Why do we need another retry loop? Looks like we've already got one in the arch-generic code.
Good point thanks, I've removed that now.
Thanks, Oliver
Thanks, Jack