On Sun, Nov 03, 2024, Manali Shukla wrote:
On 10/15/2024 11:19 PM, Sean Christopherson wrote:
On Fri, Oct 04, 2024, Manali Shukla wrote:
...
+static int bus_lock_exit(struct kvm_vcpu *vcpu) +{
- struct vcpu_svm *svm = to_svm(vcpu);
- vcpu->run->exit_reason = KVM_EXIT_X86_BUS_LOCK;
- vcpu->run->flags |= KVM_RUN_X86_BUS_LOCK;
- /*
* Reload the counter with value greater than '0'.
The value quite obviously must be exactly '1', not simply greater than '0. I also think this is the wrong place to set the counter. Rather than set the counter at the time of exit, KVM should implement a vcpu->arch.complete_userspace_io callback and set the counter to '1' if and only if RIP (or LIP, but I have no objection to keeping things simple) is unchanged. It's a bit of extra complexity, but it will make it super obvious why KVM is setting the counter to '1'. And, if userspace wants to stuff state and move past the instruction, e.g. by emulating the guilty instruction, then KVM won't unnecessarily allow a bus lock in the guest.
And then the comment can be:
/* * If userspace has NOT change RIP, then KVM's ABI is to let the guest * execute the bus-locking instruction. Set the bus lock counter to '1' * to effectively step past the bus lock. */
The bus lock threshold intercept feature is available for SEV-ES and SEV-SNP guests too. The rip where the bus lock exit occurred, is not available in bus_lock_exit handler for SEV-ES and SEV-SNP guests, so the above-mentioned solution won't work with SEV-ES and SEV-SNP guests.
I would propose to add the above-mentioned solution only for normal and SEV guests and unconditionally reloading of bus_lock_counter to 1 in complete_userspace_io for SEV-ES and SEV-SNP guests.
Yeah, that works. Though I would condition the check on guest_state_protected. Actually, and this is going to seem really stupid, but everything will Just Work if you use kvm_get_linear_rip() and kvm_is_linear_rip(), because kvm_get_linear_rip() returns '0' for vCPUs with protected state. I.e. KVM will do a rather superfluous cui() callback, but otherwise it's fine. Silly, but in many ways preferable to special casing ES and SNP guests.
On a related topic, can you add a refacotring prep patch to move linear_rip out of kvm_pio_request and place it next to complete_userspace_io? There's nothing port I/O specific about that field, it just so happens to that port I/O is the only case where KVM's ABI is to let userspace stuff state (to emulate RESET) without first completing the I/O instruction.
I.e.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8e8ca6dab2b2..8617b15096a6 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -406,7 +406,6 @@ struct kvm_rmap_head { };
struct kvm_pio_request { - unsigned long linear_rip; unsigned long count; int in; int port; @@ -884,6 +883,7 @@ struct kvm_vcpu_arch { bool emulate_regs_need_sync_to_vcpu; bool emulate_regs_need_sync_from_vcpu; int (*complete_userspace_io)(struct kvm_vcpu *vcpu); + unsigned long cui_linear_rip;
gpa_t time; struct pvclock_vcpu_time_info hv_clock; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 425a301911a6..7704d3901481 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9308,7 +9308,7 @@ static int complete_fast_pio_out(struct kvm_vcpu *vcpu) { vcpu->arch.pio.count = 0;
- if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip))) + if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.cui_linear_rip))) return 1;
return kvm_skip_emulated_instruction(vcpu); @@ -9333,7 +9333,7 @@ static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, complete_fast_pio_out_port_0x7e; kvm_skip_emulated_instruction(vcpu); } else { - vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu); + vcpu->arch.cui_linear_rip = kvm_get_linear_rip(vcpu); vcpu->arch.complete_userspace_io = complete_fast_pio_out; } return 0; @@ -9346,7 +9346,7 @@ static int complete_fast_pio_in(struct kvm_vcpu *vcpu) /* We should only ever be called with arch.pio.count equal to 1 */ BUG_ON(vcpu->arch.pio.count != 1);
- if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip))) { + if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.cui_linear_rip))) { vcpu->arch.pio.count = 0; return 1; } @@ -9375,7 +9375,7 @@ static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, return ret; }
- vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu); + vcpu->arch.cui_linear_rip = kvm_get_linear_rip(vcpu); vcpu->arch.complete_userspace_io = complete_fast_pio_in;
return 0;