From: Wanpeng Li wanpengli@tencent.com
[ Upstream commit 0361bdfddca20c8855ea3bdbbbc9c999912b10ff ]
MSR_KVM_POLL_CONTROL is cleared on reset, thus reverting guests to host-side polling after suspend/resume. Non-bootstrap CPUs are restored correctly by the haltpoll driver because they are hot-unplugged during suspend and hot-plugged during resume; however, the BSP is not hotpluggable and remains in host-sde polling mode after the guest resume. The makes the guest pay for the cost of vmexits every time the guest enters idle.
Fix it by recording BSP's haltpoll state and resuming it during guest resume.
Cc: Marcelo Tosatti mtosatti@redhat.com Signed-off-by: Wanpeng Li wanpengli@tencent.com Message-Id: 1650267752-46796-1-git-send-email-wanpengli@tencent.com Signed-off-by: Paolo Bonzini pbonzini@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/kernel/kvm.c | 13 +++++++++++++ 1 file changed, 13 insertions(+)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 408b51aba293..f582dda8dd34 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -59,6 +59,7 @@ static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __align DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visible; static int has_steal_clock = 0;
+static int has_guest_poll = 0; /* * No need for any "IO delay" on KVM */ @@ -584,14 +585,26 @@ static int kvm_cpu_down_prepare(unsigned int cpu)
static int kvm_suspend(void) { + u64 val = 0; + kvm_guest_cpu_offline(false);
+#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL + if (kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) + rdmsrl(MSR_KVM_POLL_CONTROL, val); + has_guest_poll = !(val & 1); +#endif return 0; }
static void kvm_resume(void) { kvm_cpu_online(raw_smp_processor_id()); + +#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL + if (kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL) && has_guest_poll) + wrmsrl(MSR_KVM_POLL_CONTROL, 0); +#endif }
static struct syscore_ops kvm_syscore_ops = {
From: Wanpeng Li wanpengli@tencent.com
[ Upstream commit 1714a4eb6fb0cb79f182873cd011a8ed60ac65e8 ]
As commit 0c5f81dad46 ("KVM: LAPIC: Inject timer interrupt via posted interrupt") mentioned that the host admin should well tune the guest setup, so that vCPUs are placed on isolated pCPUs, and with several pCPUs surplus for *busy* housekeeping. In this setup, it is preferrable to disable mwait/hlt/pause vmexits to keep the vCPUs in non-root mode.
However, if only some guests isolated and others not, they would not have any benefit from posted timer interrupts, and at the same time lose VMX preemption timer fast paths because kvm_can_post_timer_interrupt() returns true and therefore forces kvm_can_use_hv_timer() to false.
By guaranteeing that posted-interrupt timer is only used if MWAIT or HLT are done without vmexit, KVM can make a better choice and use the VMX preemption timer and the corresponding fast paths.
Reported-by: Aili Yao yaoaili@kingsoft.com Reviewed-by: Sean Christopherson seanjc@google.com Cc: Aili Yao yaoaili@kingsoft.com Cc: Sean Christopherson seanjc@google.com Signed-off-by: Wanpeng Li wanpengli@tencent.com Message-Id: 1643112538-36743-1-git-send-email-wanpengli@tencent.com Signed-off-by: Paolo Bonzini pbonzini@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/kvm/lapic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index afe3b8e61514..3696b4de9d99 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -118,7 +118,8 @@ static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu) { - return pi_inject_timer && kvm_vcpu_apicv_active(vcpu); + return pi_inject_timer && kvm_vcpu_apicv_active(vcpu) && + (kvm_mwait_in_guest(vcpu->kvm) || kvm_hlt_in_guest(vcpu->kvm)); } EXPORT_SYMBOL_GPL(kvm_can_post_timer_interrupt);
On 4/27/22 17:54, Sasha Levin wrote:
From: Wanpeng Li wanpengli@tencent.com
[ Upstream commit 1714a4eb6fb0cb79f182873cd011a8ed60ac65e8 ]
As commit 0c5f81dad46 ("KVM: LAPIC: Inject timer interrupt via posted interrupt") mentioned that the host admin should well tune the guest setup, so that vCPUs are placed on isolated pCPUs, and with several pCPUs surplus for *busy* housekeeping. In this setup, it is preferrable to disable mwait/hlt/pause vmexits to keep the vCPUs in non-root mode.
However, if only some guests isolated and others not, they would not have any benefit from posted timer interrupts, and at the same time lose VMX preemption timer fast paths because kvm_can_post_timer_interrupt() returns true and therefore forces kvm_can_use_hv_timer() to false.
By guaranteeing that posted-interrupt timer is only used if MWAIT or HLT are done without vmexit, KVM can make a better choice and use the VMX preemption timer and the corresponding fast paths.
Reported-by: Aili Yao yaoaili@kingsoft.com Reviewed-by: Sean Christopherson seanjc@google.com Cc: Aili Yao yaoaili@kingsoft.com Cc: Sean Christopherson seanjc@google.com Signed-off-by: Wanpeng Li wanpengli@tencent.com Message-Id: 1643112538-36743-1-git-send-email-wanpengli@tencent.com Signed-off-by: Paolo Bonzini pbonzini@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org
arch/x86/kvm/lapic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index afe3b8e61514..3696b4de9d99 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -118,7 +118,8 @@ static inline u32 kvm_x2apic_id(struct kvm_lapic *apic) bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu) {
- return pi_inject_timer && kvm_vcpu_apicv_active(vcpu);
- return pi_inject_timer && kvm_vcpu_apicv_active(vcpu) &&
} EXPORT_SYMBOL_GPL(kvm_can_post_timer_interrupt);(kvm_mwait_in_guest(vcpu->kvm) || kvm_hlt_in_guest(vcpu->kvm));
Acked-by: Paolo Bonzini pbonzini@redhat.com
On 4/27/22 17:54, Sasha Levin wrote:
From: Wanpeng Li wanpengli@tencent.com
[ Upstream commit 0361bdfddca20c8855ea3bdbbbc9c999912b10ff ]
MSR_KVM_POLL_CONTROL is cleared on reset, thus reverting guests to host-side polling after suspend/resume. Non-bootstrap CPUs are restored correctly by the haltpoll driver because they are hot-unplugged during suspend and hot-plugged during resume; however, the BSP is not hotpluggable and remains in host-sde polling mode after the guest resume. The makes the guest pay for the cost of vmexits every time the guest enters idle.
Fix it by recording BSP's haltpoll state and resuming it during guest resume.
Cc: Marcelo Tosatti mtosatti@redhat.com Signed-off-by: Wanpeng Li wanpengli@tencent.com Message-Id: 1650267752-46796-1-git-send-email-wanpengli@tencent.com Signed-off-by: Paolo Bonzini pbonzini@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org
arch/x86/kernel/kvm.c | 13 +++++++++++++ 1 file changed, 13 insertions(+)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 408b51aba293..f582dda8dd34 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -59,6 +59,7 @@ static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __align DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visible; static int has_steal_clock = 0; +static int has_guest_poll = 0; /*
- No need for any "IO delay" on KVM
*/ @@ -584,14 +585,26 @@ static int kvm_cpu_down_prepare(unsigned int cpu) static int kvm_suspend(void) {
- u64 val = 0;
- kvm_guest_cpu_offline(false);
+#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
- if (kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL))
rdmsrl(MSR_KVM_POLL_CONTROL, val);
- has_guest_poll = !(val & 1);
+#endif return 0; } static void kvm_resume(void) { kvm_cpu_online(raw_smp_processor_id());
+#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
- if (kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL) && has_guest_poll)
wrmsrl(MSR_KVM_POLL_CONTROL, 0);
+#endif } static struct syscore_ops kvm_syscore_ops = {
Acked-by: Paolo Bonzini pbonzini@redhat.com
linux-stable-mirror@lists.linaro.org