On Wed, 2025-12-03 at 14:36 +0100, Paolo Bonzini wrote:
So yes, it's a guest-visible change, but only if the VMM explicitly *asks* for the broadcast suppression feature to work, in which case it's *necessary* anyway.
I see what you mean and I guess you're right... "Setting X will cause the in-kernel IOAPIC to report version 0x20" is as obscure as it gets, but then so is "Setting X will break guests unless you tell in-kernel IOAPIC to report version 0x20".
So this is good, but the docs need to say clearly that this should only be set if either full in-kernel irqchip is in use or, for split irqchip, if the userspace IOAPIC implements directed EOI correctly.
Updated patch below, dropping the struct change and just directly using the helper which I *believe* is going to be called kvm_lapic_ignore_suppress_eoi_broadcast() and have the opposite polarity to the one I proposed upthread. Doesn't build here because of that helper, obvs.
Still untested.
For the documentation then, how about...
Setting KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST causes KVM to advertise and correctly implement the Directed EOI feature in the local APIC, suppressing broadcast EOI when the feature is enabled by the guest. Setting this flag will also cause the in-kernel I/O APIC to advertise version 0x20 with support for the EOI register; a userspace implementation of I/O APIC should also support the same, as some guest operating systems do not check for that feature in the I/O APIC before disabling the broadcast in the local APIC.
Setting KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST causes KVM not to advertise the Directed EOI feature in the local APIC.
Userspace should explicitly either enable or disable the EOI broadcast using one of the two flags above. For historical compatibility reasons, if neither flag is set then KVM will advertise the feature but will not actually suppress the EOI broadcast, leading to potential IRQ storms in some guest configurations.
From: David Woodhouse dwmw@amazon.co.uk Subject: [PATCH] KVM: x86/ioapic: Implement support for I/O APIC version 0x20 with EOIR
As the weirdness with EOI broadcast suppression is being fixed in KVM, also update the in-kernel I/O APIC to handle the directed EOI which guests will need to use instead, when broadcast EOI suppression is fully enabled.
Signed-off-by: David Woodhouse dwmw@amazon.co.uk --- arch/x86/kvm/ioapic.c | 30 ++++++++++++++++++++++++++++-- arch/x86/kvm/ioapic.h | 20 ++++++++++++-------- 2 files changed, 40 insertions(+), 10 deletions(-)
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 2c2783296aed..0ed84b02c521 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -48,8 +48,11 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic)
switch (ioapic->ioregsel) { case IOAPIC_REG_VERSION: - result = ((((IOAPIC_NUM_PINS - 1) & 0xff) << 16) - | (IOAPIC_VERSION_ID & 0xff)); + if (kvm_lapic_ignore_suppress_eoi_broadcast(ioapic->kvm)) + result = IOAPIC_VERSION_ID; + else + result = IOAPIC_VERSION_ID_EOIR; + result |= ((IOAPIC_NUM_PINS - 1) & 0xff) << 16; break;
case IOAPIC_REG_APIC_ID: @@ -57,6 +60,10 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic) result = ((ioapic->id & 0xf) << 24); break;
+ case IOAPIC_REG_BOOT_CONFIG: + result = 0x01; /* Processor bus */ + break; + default: { u32 redir_index = (ioapic->ioregsel - 0x10) >> 1; @@ -695,6 +702,25 @@ static int ioapic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, ioapic_write_indirect(ioapic, data); break;
+ case IOAPIC_REG_EOIR: + /* + * The EOIR register is supported (and version 0x20 advertised) + * when userspace explicitly enables broadcast EOI supression. + */ + if (!kvm_lapic_ignore_suppress_eoi_broadcast(vcpu->kvm)) { + u8 vector = data & 0xff; + int i; + + rtc_irq_eoi(ioapic, vcpu, vector); + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; + + if (ent->fields.vector != vector) + continue; + kvm_ioapic_update_eoi_one(vcpu, ioapic, ent->fields.trig_mode, i); + } + } + break; default: break; } diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index bf28dbc11ff6..59d877f5f27b 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -11,7 +11,8 @@ struct kvm_vcpu;
#define IOAPIC_NUM_PINS KVM_IOAPIC_NUM_PINS #define MAX_NR_RESERVED_IOAPIC_PINS KVM_MAX_IRQ_ROUTES -#define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */ +#define IOAPIC_VERSION_ID 0x11 /* Default IOAPIC version */ +#define IOAPIC_VERSION_ID_EOIR 0x20 /* IOAPIC version with EOIR support */ #define IOAPIC_EDGE_TRIG 0 #define IOAPIC_LEVEL_TRIG 1
@@ -19,13 +20,16 @@ struct kvm_vcpu; #define IOAPIC_MEM_LENGTH 0x100
/* Direct registers. */ -#define IOAPIC_REG_SELECT 0x00 -#define IOAPIC_REG_WINDOW 0x10 - -/* Indirect registers. */ -#define IOAPIC_REG_APIC_ID 0x00 /* x86 IOAPIC only */ -#define IOAPIC_REG_VERSION 0x01 -#define IOAPIC_REG_ARB_ID 0x02 /* x86 IOAPIC only */ +#define IOAPIC_REG_SELECT 0x00 +#define IOAPIC_REG_WINDOW 0x10 +#define IOAPIC_REG_IRQPA 0x20 +#define IOAPIC_REG_EOIR 0x40 /* version 0x20+ only */ + +/* INDIRECT registers. */ +#define IOAPIC_REG_APIC_ID 0x00 /* x86 IOAPIC only */ +#define IOAPIC_REG_VERSION 0x01 +#define IOAPIC_REG_ARB_ID 0x02 /* x86 IOAPIC only */ +#define IOAPIC_REG_BOOT_CONFIG 0x03 /* x86 IOAPIC only */
/*ioapic delivery mode*/ #define IOAPIC_FIXED 0x0