Implement kvm_vgic_enable_vcpu_vlpi, which handles the KVM_ENABLE_VCPU_VLPI ioctl to enable direct vLPI injection on a specific vCPU. The function has two components: a call to vgic_v4_cpu_init and a call to upgrade_existing_lpis_to_vlpis:
- vgic_v4_vcpu_init() is the per-vCPU corrolary to vgic_cpu_init, and initializes all of the GIC structures a vCPU needs to handle LPI interrupts via direct injection. While IRQ domains are usually allocated on a per-VM basis, vgic_v4_cpu_init() creates a per-vPE IRQ domain and fwnode to decouple vLPI doorbell allocation across separate vCPUs. The domain allocation routine in its_vpe_irq_domain_alloc() also allocates a vPE table entry and virtual pending table for the vCPU.
- upgrade_existing_lpis_to_vlpis() iterates through all of the LPIs targeting the vCPU and initializes hardware forwarding to process them as direct vLPIs. This includes updating the LPIs ITE to hold a vPE's vPEID instead of a Collection table's collection ID. It also toggles each interrupt's irq->hw flag to true to notify the ITS to handle the interrupt via direct injection.
Signed-off-by: Maximilian Dittgen mdittgen@amazon.com --- arch/arm64/kvm/arm.c | 13 ++- arch/arm64/kvm/vgic/vgic-its.c | 4 +- arch/arm64/kvm/vgic/vgic-v4.c | 157 ++++++++++++++++++++++++++++- arch/arm64/kvm/vgic/vgic.h | 4 + drivers/irqchip/irq-gic-v3-its.c | 48 ++++++++- drivers/irqchip/irq-gic-v4.c | 56 ++++++++-- include/linux/irqchip/arm-gic-v3.h | 4 + include/linux/irqchip/arm-gic-v4.h | 8 +- 8 files changed, 277 insertions(+), 17 deletions(-)
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index afb04162e0cf..169860649bdd 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1951,8 +1951,17 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) return kvm_vm_ioctl_get_reg_writable_masks(kvm, &range); } case KVM_ENABLE_VCPU_VLPI: { - /* TODO: create ioctl handler function */ - return -ENOSYS; + int vcpu_id; + struct kvm_vcpu *vcpu; + + if (copy_from_user(&vcpu_id, argp, sizeof(vcpu_id))) + return -EFAULT; + + vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id); + if (!vcpu) + return -EINVAL; + + return kvm_vgic_enable_vcpu_vlpi(vcpu); } case KVM_DISABLE_VCPU_VLPI: { /* TODO: create ioctl handler function */ diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index ce3e3ed3f29f..5f3bbf24cc2f 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -23,7 +23,7 @@ #include "vgic.h" #include "vgic-mmio.h"
-static struct kvm_device_ops kvm_arm_vgic_its_ops; +struct kvm_device_ops kvm_arm_vgic_its_ops;
static int vgic_its_save_tables_v0(struct vgic_its *its); static int vgic_its_restore_tables_v0(struct vgic_its *its); @@ -2801,7 +2801,7 @@ static int vgic_its_get_attr(struct kvm_device *dev, return 0; }
-static struct kvm_device_ops kvm_arm_vgic_its_ops = { +struct kvm_device_ops kvm_arm_vgic_its_ops = { .name = "kvm-arm-vgic-its", .create = vgic_its_create, .destroy = vgic_its_destroy, diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c index cebcb9175572..efb9ac9188e3 100644 --- a/arch/arm64/kvm/vgic/vgic-v4.c +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -316,9 +316,15 @@ int vgic_v4_init(struct kvm *kvm) } #else /* - * TODO: Initialize the shared VM properties that remain necessary - * in per-vCPU mode + * Initialize the shared VM properties that remain necessary in per-vCPU mode */ + + /* vPE properties table */ + if (!dist->its_vm.vprop_page) { + dist->its_vm.vprop_page = its_allocate_prop_table(GFP_KERNEL); + if (!dist->its_vm.vprop_page) + ret = -ENOMEM; + } #endif if (ret) vgic_v4_teardown(kvm); @@ -326,6 +332,51 @@ int vgic_v4_init(struct kvm *kvm) return ret; }
+/** + * vgic_v4_vcpu_init - When per-vCPU vLPI injection is enabled, + * initialize the GICv4 data structures for a specific vCPU + * @vcpu: Pointer to the vcpu being initialized + * + * Called every time the KVM_ENABLE_VCPU_VLPI ioctl is called. + */ +int vgic_v4_vcpu_init(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + struct vgic_dist *dist = &kvm->arch.vgic; + int i, ret, irq; + unsigned long irq_flags = DB_IRQ_FLAGS; + + /* Validate vgic_v4_init() has been called to allocate the vpe array */ + if (!dist->its_vm.vpes) + return -ENODEV; + + /* Link KVM distributor to the newly-allocated vPE */ + i = kvm_idx_from_vcpu(kvm, vcpu); + if (i == UINT_MAX) + return -EINVAL; + dist->its_vm.vpes[i] = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + + ret = its_alloc_vcpu_irq(vcpu); + if (ret) + return ret; + + /* Same routine as the kvm_for_each_vcpu of vgic_v4_init */ + irq = dist->its_vm.vpes[i]->irq; + + if (kvm_vgic_global_state.has_gicv4_1) + irq_flags &= ~IRQ_NOAUTOEN; + irq_set_status_flags(irq, irq_flags); + + ret = vgic_v4_request_vpe_irq(vcpu, irq); + if (ret) + kvm_err("failed to allocate vcpu IRQ%d\n", irq); + + if (ret) + vgic_v4_teardown(kvm); + + return ret; +} + /** * vgic_v4_teardown - Free the GICv4 data structures * @kvm: Pointer to the VM being destroyed @@ -357,6 +408,9 @@ void vgic_v4_teardown(struct kvm *kvm) * in per-vCPU mode. Create separate teardown function * that operates on a per-vCPU basis. */ + + /* vPE properties table */ + its_free_prop_table(its_vm->vprop_page); #else its_free_vcpu_irqs(its_vm); #endif @@ -618,6 +672,105 @@ void kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int host_irq) vgic_put_irq(kvm, irq); }
+static int upgrade_existing_lpis_to_vlpis(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_device *dev; + struct vgic_its *its, *its_from_entry; + struct its_device *device; + struct its_ite *ite; + struct kvm_kernel_irq_routing_entry entry; + int ret = 0; + int host_irq; + + list_for_each_entry(dev, &kvm->devices, vm_node) { + /* Ensure we only look at ITS devices */ + if (dev->ops != &kvm_arm_vgic_its_ops) + continue; + + its = dev->private; + mutex_lock(&its->its_lock); + + list_for_each_entry(device, &its->device_list, dev_list) { + list_for_each_entry(ite, &device->itt_head, ite_list) { + /* ite->irq->hw means entry already upgraded to vLPI */ + if (ite->collection && + ite->collection->target_addr == vcpu->vcpu_id && + ite->irq && !ite->irq->hw) { + + /* + * An existing IRQ would only have a null host_irq if it is + * completely defined in software, in which case it cannot + * be direct injected anyways. Thus, we skip interrupt + * upgrade for IRQs with null host_irqs. + */ + if (ite->irq->host_irq > 0) + host_irq = ite->irq->host_irq; + else + continue; + + /* Create routing entry */ + memset(&entry, 0, sizeof(entry)); + entry.gsi = host_irq; + entry.type = KVM_IRQ_ROUTING_MSI; + /* MSI address is system defined for ARM GICv3 */ + entry.msi.address_lo = + (u32)(its->vgic_its_base + GITS_TRANSLATER); + entry.msi.address_hi = + (u32)((its->vgic_its_base + GITS_TRANSLATER) >> 32); + entry.msi.data = ite->event_id; + entry.msi.devid = device->device_id; + entry.msi.flags = KVM_MSI_VALID_DEVID; + + /* Verify ITS consistency */ + its_from_entry = vgic_get_its(kvm, &entry); + if (IS_ERR(its_from_entry) || its_from_entry != its) + continue; + + /* Upgrade to vLPI */ + ret = kvm_vgic_v4_set_forwarding_locked(kvm, host_irq, + &entry, its); + if (ret) + kvm_info("Failed to upgrade LPI %d: %d\n", + host_irq, ret); + } + } + } + + mutex_unlock(&its->its_lock); + } + + return 0; +} + +/* Enable vLPI direct injection on a specific vCPU */ +int kvm_vgic_enable_vcpu_vlpi(struct kvm_vcpu *vcpu) +{ + int ret; + int vcpu_vlpi_status = kvm_vgic_query_vcpu_vlpi(vcpu); + + /* vGIC not initialized for vCPU */ + if (vcpu_vlpi_status < 0) + return vcpu_vlpi_status; + /* vLPI already enabled */ + if (vcpu_vlpi_status > 0) + return 0; + + /* Allocate the vPE struct and vPE table for the vCPU */ + ret = vgic_v4_vcpu_init(vcpu); + if (ret) + return ret; + + /* + * Upgrade existing LPIs to vLPIs. We + * do not need to error check since + * a failure in upgrading an LPI is non-breaking; + * those LPIs may continue to be processed by + * software. + */ + return upgrade_existing_lpis_to_vlpis(vcpu); +} + /* query whether vLPI direct injection is enabled on a specific vCPU. * return 0 if disabled, 1 if enabled, -EINVAL if vCPU non-existant or GIC * uninitialized diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 295088913c26..60ae0d1f044d 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -251,6 +251,8 @@ struct ap_list_summary { #define irqs_active_outside_lrs(s) \ ((s)->nr_act && irqs_outside_lrs(s))
+extern struct kvm_device_ops kvm_arm_vgic_its_ops; + int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, struct vgic_reg_attr *reg_attr); int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, @@ -434,6 +436,7 @@ static inline bool vgic_supports_direct_irqs(struct kvm *kvm) }
int vgic_v4_init(struct kvm *kvm); +int vgic_v4_vcpu_init(struct kvm_vcpu *vcpu); void vgic_v4_teardown(struct kvm *kvm); void vgic_v4_configure_vsgis(struct kvm *kvm); void vgic_v4_get_vlpi_state(struct vgic_irq *irq, bool *val); @@ -468,6 +471,7 @@ int vgic_its_debug_init(struct kvm_device *dev); void vgic_its_debug_destroy(struct kvm_device *dev);
bool kvm_per_vcpu_vlpi_supported(void); +int kvm_vgic_enable_vcpu_vlpi(struct kvm_vcpu *vcpu); int kvm_vgic_query_vcpu_vlpi(struct kvm_vcpu *vcpu);
#endif diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 467cb78435a9..67749578f973 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -2261,7 +2261,7 @@ static void gic_reset_prop_table(void *va) gic_flush_dcache_to_poc(va, LPI_PROPBASE_SZ); }
-static struct page *its_allocate_prop_table(gfp_t gfp_flags) +struct page *its_allocate_prop_table(gfp_t gfp_flags) { struct page *prop_page;
@@ -2275,7 +2275,7 @@ static struct page *its_allocate_prop_table(gfp_t gfp_flags) return prop_page; }
-static void its_free_prop_table(struct page *prop_page) +void its_free_prop_table(struct page *prop_page) { its_free_pages(page_address(prop_page), get_order(LPI_PROPBASE_SZ)); } @@ -4612,25 +4612,65 @@ static void its_vpe_irq_domain_free(struct irq_domain *domain,
BUG_ON(vm != vpe->its_vm);
+#ifdef CONFIG_ARM_GIC_V3_PER_VCPU_VLPI + free_lpi_range(vpe->vpe_db_lpi, 1); +#else clear_bit(data->hwirq, vm->db_bitmap); +#endif its_vpe_teardown(vpe); irq_domain_reset_irq_data(data); }
+#ifndef CONFIG_ARM_GIC_V3_PER_VCPU_VLPI if (bitmap_empty(vm->db_bitmap, vm->nr_db_lpis)) { its_lpi_free(vm->db_bitmap, vm->db_lpi_base, vm->nr_db_lpis); its_free_prop_table(vm->vprop_page); } +#endif }
static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs, void *args) { struct irq_chip *irqchip = &its_vpe_irq_chip; + int base, err; +#ifdef CONFIG_ARM_GIC_V3_PER_VCPU_VLPI + struct its_vpe *vpe = args; + + /* Per-vCPU mode: allocate domain on vPE––rather than VM––level */ + WARN_ON(nr_irqs != 1); + + /* Use VM's shared properties table */ + if (!vpe->its_vm || !vpe->its_vm->vprop_page) + return -EINVAL; + + if (gic_rdists->has_rvpeid) + irqchip = &its_vpe_4_1_irq_chip; + + err = alloc_lpi_range(1, &base); + if (err) + return err; + vpe->vpe_db_lpi = base; + err = its_vpe_init(vpe); + if (err) + return err; + + err = its_irq_gic_domain_alloc(domain, virq, vpe->vpe_db_lpi); + if (err) + goto err_teardown_vpe; + + irq_domain_set_hwirq_and_chip(domain, virq, 0, irqchip, vpe); + irqd_set_resend_when_in_progress(irq_get_irq_data(virq)); + + return 0; + +err_teardown_vpe: + its_vpe_teardown(vpe); +#else struct its_vm *vm = args; unsigned long *bitmap; struct page *vprop_page; - int base, nr_ids, i, err = 0; + int nr_ids, i;
bitmap = its_lpi_alloc(roundup_pow_of_two(nr_irqs), &base, &nr_ids); if (!bitmap) @@ -4673,7 +4713,7 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq
if (err) its_vpe_irq_domain_free(domain, virq, i); - +#endif return err; }
diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c index 8455b4a5fbb0..c8e324cd8911 100644 --- a/drivers/irqchip/irq-gic-v4.c +++ b/drivers/irqchip/irq-gic-v4.c @@ -7,6 +7,7 @@ #include <linux/interrupt.h> #include <linux/irq.h> #include <linux/irqdomain.h> +#include <linux/kvm_host.h> #include <linux/msi.h> #include <linux/pid.h> #include <linux/sched.h> @@ -128,14 +129,14 @@ static int its_alloc_vcpu_sgis(struct its_vpe *vpe, int idx) if (!name) goto err;
- vpe->fwnode = irq_domain_alloc_named_id_fwnode(name, idx); - if (!vpe->fwnode) + vpe->sgi_fwnode = irq_domain_alloc_named_id_fwnode(name, idx); + if (!vpe->sgi_fwnode) goto err;
kfree(name); name = NULL;
- vpe->sgi_domain = irq_domain_create_linear(vpe->fwnode, 16, + vpe->sgi_domain = irq_domain_create_linear(vpe->sgi_fwnode, 16, sgi_domain_ops, vpe); if (!vpe->sgi_domain) goto err; @@ -149,8 +150,8 @@ static int its_alloc_vcpu_sgis(struct its_vpe *vpe, int idx) err: if (vpe->sgi_domain) irq_domain_remove(vpe->sgi_domain); - if (vpe->fwnode) - irq_domain_free_fwnode(vpe->fwnode); + if (vpe->sgi_fwnode) + irq_domain_free_fwnode(vpe->sgi_fwnode); kfree(name); return -ENOMEM; } @@ -199,6 +200,49 @@ int its_alloc_vcpu_irqs(struct its_vm *vm) return -ENOMEM; }
+int its_alloc_vcpu_irq(struct kvm_vcpu *vcpu) +{ + struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + struct its_vm *vm = &vcpu->kvm->arch.vgic.its_vm; + int ret; + + vpe->its_vm = vm; /* point all vPEs on a VM to the same shared dist its_vm*/ + if (!has_v4_1_sgi()) /* idai bool shares memory with sgi_domain pointer */ + vpe->idai = true; + + /* create a per-vPE, rather than per-VM, fwnode */ + if (!vpe->lpi_fwnode) { + /* add vcpu_id to fwnode naming to differentiate vcpus in same VM */ + vpe->lpi_fwnode = irq_domain_alloc_named_id_fwnode("GICv4-vpe-lpi", + task_pid_nr(current) * 1000 + vcpu->vcpu_id); + if (!vpe->lpi_fwnode) + goto err; + } + + /* create domain hierarchy for vPE */ + vpe->lpi_domain = irq_domain_create_hierarchy(gic_domain, 0, 1, + vpe->lpi_fwnode, vpe_domain_ops, vpe); + if (!vpe->lpi_domain) + goto err; + + /* allocate IRQs from vPE domain */ + vpe->irq = irq_domain_alloc_irqs(vpe->lpi_domain, 1, NUMA_NO_NODE, vpe); + if (vpe->irq <= 0) + goto err; + + ret = its_alloc_vcpu_sgis(vpe, vcpu->vcpu_id); + if (ret) + goto err; + + return 0; +err: + if (vpe->lpi_domain) + irq_domain_remove(vpe->lpi_domain); + if (vpe->lpi_fwnode) + irq_domain_free_fwnode(vpe->lpi_fwnode); + return -ENOMEM; +} + static void its_free_sgi_irqs(struct its_vm *vm) { int i; @@ -214,7 +258,7 @@ static void its_free_sgi_irqs(struct its_vm *vm)
irq_domain_free_irqs(irq, 16); irq_domain_remove(vm->vpes[i]->sgi_domain); - irq_domain_free_fwnode(vm->vpes[i]->fwnode); + irq_domain_free_fwnode(vm->vpes[i]->sgi_fwnode); } }
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 70c0948f978e..5031a4c25543 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -641,6 +641,10 @@ int its_init(struct fwnode_handle *handle, struct rdists *rdists, struct irq_domain *domain, u8 irq_prio); int mbi_init(struct fwnode_handle *fwnode, struct irq_domain *parent);
+/* Enable prop table alloc/free on vGIC init/destroy when per-vCPU vLPI is enabled */ +struct page *its_allocate_prop_table(gfp_t gfp_flags); +void its_free_prop_table(struct page *prop_page); + static inline bool gic_enable_sre(void) { u32 val; diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index 0b0887099fd7..bc493fed75ab 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -8,6 +8,7 @@ #define __LINUX_IRQCHIP_ARM_GIC_V4_H
struct its_vpe; +struct kvm_vcpu;
/* * Maximum number of ITTs when GITS_TYPER.VMOVP == 0, using the @@ -42,6 +43,10 @@ struct its_vpe { struct its_vm *its_vm; /* per-vPE VLPI tracking */ atomic_t vlpi_count; + /* per-vPE domain for per-vCPU VLPI enablement */ + struct irq_domain *lpi_domain; + /* enables per-vPE vLPI IRQ Domains during per-vCPU VLPI enablement */ + struct fwnode_handle *lpi_fwnode; /* Doorbell interrupt */ int irq; irq_hw_number_t vpe_db_lpi; @@ -59,7 +64,7 @@ struct its_vpe { }; /* GICv4.1 implementations */ struct { - struct fwnode_handle *fwnode; + struct fwnode_handle *sgi_fwnode; struct irq_domain *sgi_domain; struct { u8 priority; @@ -139,6 +144,7 @@ struct its_cmd_info { };
int its_alloc_vcpu_irqs(struct its_vm *vm); +int its_alloc_vcpu_irq(struct kvm_vcpu *vcpu); void its_free_vcpu_irqs(struct its_vm *vm); int its_make_vpe_resident(struct its_vpe *vpe, bool g0en, bool g1en); int its_make_vpe_non_resident(struct its_vpe *vpe, bool db);