In some architectural corner cases, AT instructions can generate an exception, which KVM is not really ready to handle properly. Teach the code to handle this situation gracefully.
This is a backport of the respective upstream patches to v4.9(.235). James prepared and tested these already, but we were lacking the upstream commit IDs so far. I am sending this on his behalf, since he is off this week.
The original patches contained stable tags, but with a prerequisite patch in v5.3. Patch 2/4 is a backport of this one (removing ARMv8.2 RAS barriers, which are not supported in v4.9), patches 1/4 and 3/4 needed some massaging to apply and work on 4.9.
Cheers, Andre.
James Morse (4): KVM: arm64: Add kvm_extable for vaxorcism code KVM: arm64: Defer guest entry when an asynchronous exception is pending KVM: arm64: Survive synchronous exceptions caused by AT instructions KVM: arm64: Set HCR_EL2.PTW to prevent AT taking synchronous exception
arch/arm64/include/asm/kvm_arm.h | 3 +- arch/arm64/include/asm/kvm_asm.h | 43 ++++++++++++++++++++++ arch/arm64/kernel/vmlinux.lds.S | 8 ++++ arch/arm64/kvm/hyp/entry.S | 26 ++++++++++--- arch/arm64/kvm/hyp/hyp-entry.S | 63 +++++++++++++++++++++----------- arch/arm64/kvm/hyp/switch.c | 39 ++++++++++++++++++-- 6 files changed, 150 insertions(+), 32 deletions(-)
From: James Morse james.morse@arm.com
commit e9ee186bb735bfc17fa81dbc9aebf268aee5b41e upstream.
KVM has a one instruction window where it will allow an SError exception to be consumed by the hypervisor without treating it as a hypervisor bug. This is used to consume asynchronous external abort that were caused by the guest.
As we are about to add another location that survives unexpected exceptions, generalise this code to make it behave like the host's extable.
KVM's version has to be mapped to EL2 to be accessible on nVHE systems.
The SError vaxorcism code is a one instruction window, so has two entries in the extable. Because the KVM code is copied for VHE and nVHE, we end up with four entries, half of which correspond with code that isn't mapped.
Cc: stable@vger.kernel.org # v4.9 Signed-off-by: James Morse james.morse@arm.com Reviewed-by: Marc Zyngier maz@kernel.org Signed-off-by: Catalin Marinas catalin.marinas@arm.com Signed-off-by: Andre Przywara andre.przywara@arm.com --- arch/arm64/include/asm/kvm_asm.h | 15 ++++++++++ arch/arm64/kernel/vmlinux.lds.S | 8 +++++ arch/arm64/kvm/hyp/entry.S | 16 ++++++---- arch/arm64/kvm/hyp/hyp-entry.S | 51 ++++++++++++++++++++------------ arch/arm64/kvm/hyp/switch.c | 31 +++++++++++++++++++ 5 files changed, 96 insertions(+), 25 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 8f5cf83b2339..f6a22f07fa84 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -106,6 +106,21 @@ extern u32 __init_stage2_translation(void); kern_hyp_va \vcpu .endm
+/* + * KVM extable for unexpected exceptions. + * In the same format _asm_extable, but output to a different section so that + * it can be mapped to EL2. The KVM version is not sorted. The caller must + * ensure: + * x18 has the hypervisor value to allow any Shadow-Call-Stack instrumented + * code to write to it, and that SPSR_EL2 and ELR_EL2 are restored by the fixup. + */ +.macro _kvm_extable, from, to + .pushsection __kvm_ex_table, "a" + .align 3 + .long (\from - .), (\to - .) + .popsection +.endm + #endif
#endif /* __ARM_KVM_ASM_H__ */ diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 6a584558b29d..fa3ffad50a61 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -23,6 +23,13 @@ ENTRY(_text)
jiffies = jiffies_64;
+ +#define HYPERVISOR_EXTABLE \ + . = ALIGN(SZ_8); \ + VMLINUX_SYMBOL(__start___kvm_ex_table) = .; \ + *(__kvm_ex_table) \ + VMLINUX_SYMBOL(__stop___kvm_ex_table) = .; + #define HYPERVISOR_TEXT \ /* \ * Align to 4 KB so that \ @@ -38,6 +45,7 @@ jiffies = jiffies_64; VMLINUX_SYMBOL(__hyp_idmap_text_end) = .; \ VMLINUX_SYMBOL(__hyp_text_start) = .; \ *(.hyp.text) \ + HYPERVISOR_EXTABLE \ VMLINUX_SYMBOL(__hyp_text_end) = .;
#define IDMAP_TEXT \ diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index a360ac6e89e9..76545f95f704 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -135,18 +135,22 @@ ENTRY(__guest_exit) // This is our single instruction exception window. A pending // SError is guaranteed to occur at the earliest when we unmask // it, and at the latest just after the ISB. - .global abort_guest_exit_start abort_guest_exit_start:
isb
- .global abort_guest_exit_end abort_guest_exit_end: + msr daifset, #4 // Mask aborts + ret
- // If the exception took place, restore the EL1 exception - // context so that we can report some information. - // Merge the exception code with the SError pending bit. - tbz x0, #ARM_EXIT_WITH_SERROR_BIT, 1f + _kvm_extable abort_guest_exit_start, 9997f + _kvm_extable abort_guest_exit_end, 9997f +9997: + msr daifset, #4 // Mask aborts + mov x0, #(1 << ARM_EXIT_WITH_SERROR_BIT) + + // restore the EL1 exception context so that we can report some + // information. Merge the exception code with the SError pending bit. msr elr_el2, x2 msr esr_el2, x3 msr spsr_el2, x4 diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index bf4988f9dae8..ee2a1b4b4e39 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -25,6 +25,30 @@ #include <asm/kvm_asm.h> #include <asm/kvm_mmu.h>
+.macro save_caller_saved_regs_vect + stp x0, x1, [sp, #-16]! + stp x2, x3, [sp, #-16]! + stp x4, x5, [sp, #-16]! + stp x6, x7, [sp, #-16]! + stp x8, x9, [sp, #-16]! + stp x10, x11, [sp, #-16]! + stp x12, x13, [sp, #-16]! + stp x14, x15, [sp, #-16]! + stp x16, x17, [sp, #-16]! +.endm + +.macro restore_caller_saved_regs_vect + ldp x16, x17, [sp], #16 + ldp x14, x15, [sp], #16 + ldp x12, x13, [sp], #16 + ldp x10, x11, [sp], #16 + ldp x8, x9, [sp], #16 + ldp x6, x7, [sp], #16 + ldp x4, x5, [sp], #16 + ldp x2, x3, [sp], #16 + ldp x0, x1, [sp], #16 +.endm + .text .pushsection .hyp.text, "ax"
@@ -178,25 +202,14 @@ el1_error: b __guest_exit
el2_error: - /* - * Only two possibilities: - * 1) Either we come from the exit path, having just unmasked - * PSTATE.A: change the return code to an EL2 fault, and - * carry on, as we're already in a sane state to handle it. - * 2) Or we come from anywhere else, and that's a bug: we panic. - * - * For (1), x0 contains the original return code and x1 doesn't - * contain anything meaningful at that stage. We can reuse them - * as temp registers. - * For (2), who cares? - */ - mrs x0, elr_el2 - adr x1, abort_guest_exit_start - cmp x0, x1 - adr x1, abort_guest_exit_end - ccmp x0, x1, #4, ne - b.ne __hyp_panic - mov x0, #(1 << ARM_EXIT_WITH_SERROR_BIT) + save_caller_saved_regs_vect + stp x29, x30, [sp, #-16]! + + bl kvm_unexpected_el2_exception + + ldp x29, x30, [sp], #16 + restore_caller_saved_regs_vect + eret
ENTRY(__hyp_do_panic) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index ed7e3a288b4e..0d3d66cc3841 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -25,6 +25,10 @@ #include <asm/kvm_asm.h> #include <asm/kvm_emulate.h> #include <asm/kvm_hyp.h> +#include <asm/uaccess.h> + +extern struct exception_table_entry __start___kvm_ex_table; +extern struct exception_table_entry __stop___kvm_ex_table;
static bool __hyp_text __fpsimd_enabled_nvhe(void) { @@ -454,3 +458,30 @@ void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
unreachable(); } + +asmlinkage void __hyp_text kvm_unexpected_el2_exception(void) +{ + unsigned long addr, fixup; + struct kvm_cpu_context *host_ctxt; + struct exception_table_entry *entry, *end; + unsigned long elr_el2 = read_sysreg(elr_el2); + + entry = hyp_symbol_addr(__start___kvm_ex_table); + end = hyp_symbol_addr(__stop___kvm_ex_table); + host_ctxt = __hyp_this_cpu_ptr(kvm_host_cpu_state); + + while (entry < end) { + addr = (unsigned long)&entry->insn + entry->insn; + fixup = (unsigned long)&entry->fixup + entry->fixup; + + if (addr != elr_el2) { + entry++; + continue; + } + + write_sysreg(fixup, elr_el2); + return; + } + + hyp_panic(host_ctxt); +}
From: James Morse james.morse@arm.com
commit 5dcd0fdbb492d49dac6bf21c436dfcb5ded0a895 upstream.
SError that occur during world-switch's entry to the guest will be accounted to the guest, as the exception is masked until we enter the guest... but we want to attribute the SError as precisely as possible.
Reading DISR_EL1 before guest entry requires free registers, and using ESB+DISR_EL1 to consume and read back the ESR would leave KVM holding a host SError... We would rather leave the SError pending and let the host take it once we exit world-switch. To do this, we need to defer guest-entry if an SError is pending.
Read the ISR to see if SError (or an IRQ) is pending. If so fake an exit. Place this check between __guest_enter()'s save of the host registers, and restore of the guest's. SError that occur between here and the eret into the guest must have affected the guest's registers, which we can naturally attribute to the guest.
The dsb is needed to ensure any previous writes have been done before we read ISR_EL1. On systems without the v8.2 RAS extensions this doesn't give us anything as we can't contain errors, and the ESR bits to describe the severity are all implementation-defined. Replace this with a nop for these systems.
v4.9-backport: as this kernel version doesn't have the RAS support at all, remove the RAS alternative.
Cc: stable@vger.kernel.org # v4.9 Signed-off-by: James Morse james.morse@arm.com Signed-off-by: Marc Zyngier marc.zyngier@arm.com [ James: Removed v8.2 RAS related barriers ] Signed-off-by: James Morse james.morse@arm.com Signed-off-by: Andre Przywara andre.przywara@arm.com --- arch/arm64/kvm/hyp/entry.S | 10 ++++++++++ 1 file changed, 10 insertions(+)
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index 76545f95f704..4e0eac361f87 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -17,6 +17,7 @@
#include <linux/linkage.h>
+#include <asm/alternative.h> #include <asm/asm-offsets.h> #include <asm/assembler.h> #include <asm/fpsimdmacros.h> @@ -62,6 +63,15 @@ ENTRY(__guest_enter) // Store the host regs save_callee_saved_regs x1
+ // Now the host state is stored if we have a pending RAS SError it must + // affect the host. If any asynchronous exception is pending we defer + // the guest entry. + mrs x1, isr_el1 + cbz x1, 1f + mov x0, #ARM_EXCEPTION_IRQ + ret + +1: add x18, x0, #VCPU_CONTEXT
// Restore guest regs x0-x17
From: James Morse james.morse@arm.com
commit 88a84ccccb3966bcc3f309cdb76092a9892c0260 upstream.
KVM doesn't expect any synchronous exceptions when executing, any such exception leads to a panic(). AT instructions access the guest page tables, and can cause a synchronous external abort to be taken.
The arm-arm is unclear on what should happen if the guest has configured the hardware update of the access-flag, and a memory type in TCR_EL1 that does not support atomic operations. B2.2.6 "Possible implementation restrictions on using atomic instructions" from DDI0487F.a lists synchronous external abort as a possible behaviour of atomic instructions that target memory that isn't writeback cacheable, but the page table walker may behave differently.
Make KVM robust to synchronous exceptions caused by AT instructions. Add a get_user() style helper for AT instructions that returns -EFAULT if an exception was generated.
While KVM's version of the exception table mixes synchronous and asynchronous exceptions, only one of these can occur at each location.
Re-enter the guest when the AT instructions take an exception on the assumption the guest will take the same exception. This isn't guaranteed to make forward progress, as the AT instructions may always walk the page tables, but guest execution may use the translation cached in the TLB.
This isn't a problem, as since commit 5dcd0fdbb492 ("KVM: arm64: Defer guest entry when an asynchronous exception is pending"), KVM will return to the host to process IRQs allowing the rest of the system to keep running.
Cc: stable@vger.kernel.org # v4.9 Signed-off-by: James Morse james.morse@arm.com Reviewed-by: Marc Zyngier maz@kernel.org Signed-off-by: Catalin Marinas catalin.marinas@arm.com Signed-off-by: Andre Przywara andre.przywara@arm.com --- arch/arm64/include/asm/kvm_asm.h | 28 ++++++++++++++++++++++++++++ arch/arm64/kvm/hyp/hyp-entry.S | 12 ++++++++++-- arch/arm64/kvm/hyp/switch.c | 8 ++++---- 3 files changed, 42 insertions(+), 6 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index f6a22f07fa84..3d2fddac25b9 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -82,6 +82,34 @@ extern u32 __init_stage2_translation(void); *__hyp_this_cpu_ptr(sym); \ })
+#define __KVM_EXTABLE(from, to) \ + " .pushsection __kvm_ex_table, "a"\n" \ + " .align 3\n" \ + " .long (" #from " - .), (" #to " - .)\n" \ + " .popsection\n" + + +#define __kvm_at(at_op, addr) \ +( { \ + int __kvm_at_err = 0; \ + u64 spsr, elr; \ + asm volatile( \ + " mrs %1, spsr_el2\n" \ + " mrs %2, elr_el2\n" \ + "1: at "at_op", %3\n" \ + " isb\n" \ + " b 9f\n" \ + "2: msr spsr_el2, %1\n" \ + " msr elr_el2, %2\n" \ + " mov %w0, %4\n" \ + "9:\n" \ + __KVM_EXTABLE(1b, 2b) \ + : "+r" (__kvm_at_err), "=&r" (spsr), "=&r" (elr) \ + : "r" (addr), "i" (-EFAULT)); \ + __kvm_at_err; \ +} ) + + #else /* __ASSEMBLY__ */
.macro hyp_adr_this_cpu reg, sym, tmp diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index ee2a1b4b4e39..7ced1fb93d07 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -201,6 +201,15 @@ el1_error: mov x0, #ARM_EXCEPTION_EL1_SERROR b __guest_exit
+el2_sync: + save_caller_saved_regs_vect + stp x29, x30, [sp, #-16]! + bl kvm_unexpected_el2_exception + ldp x29, x30, [sp], #16 + restore_caller_saved_regs_vect + + eret + el2_error: save_caller_saved_regs_vect stp x29, x30, [sp, #-16]! @@ -238,7 +247,6 @@ ENDPROC(\label) invalid_vector el2t_irq_invalid invalid_vector el2t_fiq_invalid invalid_vector el2t_error_invalid - invalid_vector el2h_sync_invalid invalid_vector el2h_irq_invalid invalid_vector el2h_fiq_invalid invalid_vector el1_sync_invalid @@ -255,7 +263,7 @@ ENTRY(__kvm_hyp_vector) ventry el2t_fiq_invalid // FIQ EL2t ventry el2t_error_invalid // Error EL2t
- ventry el2h_sync_invalid // Synchronous EL2h + ventry el2_sync // Synchronous EL2h ventry el2h_irq_invalid // IRQ EL2h ventry el2h_fiq_invalid // FIQ EL2h ventry el2_error // Error EL2h diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 0d3d66cc3841..0a2f37bceab0 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -206,10 +206,10 @@ static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar) * saved the guest context yet, and we may return early... */ par = read_sysreg(par_el1); - asm volatile("at s1e1r, %0" : : "r" (far)); - isb(); - - tmp = read_sysreg(par_el1); + if (!__kvm_at("s1e1r", far)) + tmp = read_sysreg(par_el1); + else + tmp = 1; /* back to the guest */ write_sysreg(par, par_el1);
if (unlikely(tmp & 1))
From: James Morse james.morse@arm.com
commit 71a7f8cb1ca4ca7214a700b1243626759b6c11d4 upstream.
AT instructions do a translation table walk and return the result, or the fault in PAR_EL1. KVM uses these to find the IPA when the value is not provided by the CPU in HPFAR_EL1.
If a translation table walk causes an external abort it is taken as an exception, even if it was due to an AT instruction. (DDI0487F.a's D5.2.11 "Synchronous faults generated by address translation instructions")
While we previously made KVM resilient to exceptions taken due to AT instructions, the device access causes mismatched attributes, and may occur speculatively. Prevent this, by forbidding a walk through memory described as device at stage2. Now such AT instructions will report a stage2 fault.
Such a fault will cause KVM to restart the guest. If the AT instructions always walk the page tables, but guest execution uses the translation cached in the TLB, the guest can't make forward progress until the TLB entry is evicted. This isn't a problem, as since commit 5dcd0fdbb492 ("KVM: arm64: Defer guest entry when an asynchronous exception is pending"), KVM will return to the host to process IRQs allowing the rest of the system to keep running.
Cc: stable@vger.kernel.org # v4.9 Signed-off-by: James Morse james.morse@arm.com Reviewed-by: Marc Zyngier maz@kernel.org Signed-off-by: Catalin Marinas catalin.marinas@arm.com Signed-off-by: Andre Przywara andre.przywara@arm.com --- arch/arm64/include/asm/kvm_arm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index a11c8c2915c9..e8cb69b0cf4f 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -78,10 +78,11 @@ * IMO: Override CPSR.I and enable signaling with VI * FMO: Override CPSR.F and enable signaling with VF * SWIO: Turn set/way invalidates into set/way clean+invalidate + * PTW: Take a stage2 fault if a stage1 walk steps in device memory */ #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \ HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \ - HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW) + HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_PTW) #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF) #define HCR_INT_OVERRIDE (HCR_FMO | HCR_IMO) #define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK)
On Fri, Sep 04, 2020 at 12:28:56PM +0100, Andre Przywara wrote:
In some architectural corner cases, AT instructions can generate an exception, which KVM is not really ready to handle properly. Teach the code to handle this situation gracefully.
This is a backport of the respective upstream patches to v4.9(.235). James prepared and tested these already, but we were lacking the upstream commit IDs so far. I am sending this on his behalf, since he is off this week.
The original patches contained stable tags, but with a prerequisite patch in v5.3. Patch 2/4 is a backport of this one (removing ARMv8.2 RAS barriers, which are not supported in v4.9), patches 1/4 and 3/4 needed some massaging to apply and work on 4.9.
This, and the 4.14 series, now queued up, thanks.
greg k-h
linux-stable-mirror@lists.linaro.org