This series backports some recent fixes for SVE/KVM interactions from Mark Rutland to v5.15.
Signed-off-by: Mark Brown broonie@kernel.org --- Changes in v3: - Explicitly include "KVM: arm64: Always start with clearing SVE flag on load", it was included previously as part of a conflcit resolution. - Link to v2: https://lore.kernel.org/r/20250403-stable-sve-5-15-v2-0-30a36a78a20a@kernel....
Changes in v2: - Resend with Greg and the stable list added. - Link to v1: https://lore.kernel.org/r/20250402-stable-sve-5-15-v1-0-84d0e5ff1102@kernel....
--- Fuad Tabba (1): KVM: arm64: Calculate cptr_el2 traps on activating traps
Marc Zyngier (2): KVM: arm64: Get rid of host SVE tracking/saving KVM: arm64: Always start with clearing SVE flag on load
Mark Brown (4): KVM: arm64: Discard any SVE state when entering KVM guests arm64/fpsimd: Track the saved FPSIMD state type separately to TIF_SVE arm64/fpsimd: Have KVM explicitly say which FP registers to save arm64/fpsimd: Stop using TIF_SVE to manage register saving in KVM
Mark Rutland (4): KVM: arm64: Unconditionally save+flush host FPSIMD/SVE/SME state KVM: arm64: Remove host FPSIMD saving for non-protected KVM KVM: arm64: Remove VHE host restore of CPACR_EL1.ZEN KVM: arm64: Eagerly switch ZCR_EL{1,2}
arch/arm64/include/asm/fpsimd.h | 4 +- arch/arm64/include/asm/kvm_host.h | 17 +++-- arch/arm64/include/asm/kvm_hyp.h | 7 ++ arch/arm64/include/asm/processor.h | 7 ++ arch/arm64/kernel/fpsimd.c | 117 +++++++++++++++++++++++--------- arch/arm64/kernel/process.c | 3 + arch/arm64/kernel/ptrace.c | 3 + arch/arm64/kernel/signal.c | 3 + arch/arm64/kvm/arm.c | 1 - arch/arm64/kvm/fpsimd.c | 72 +++++++++----------- arch/arm64/kvm/hyp/entry.S | 5 ++ arch/arm64/kvm/hyp/include/hyp/switch.h | 86 +++++++++++++++-------- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 9 ++- arch/arm64/kvm/hyp/nvhe/switch.c | 52 +++++++++----- arch/arm64/kvm/hyp/vhe/switch.c | 4 ++ arch/arm64/kvm/reset.c | 3 + 16 files changed, 266 insertions(+), 127 deletions(-) --- base-commit: 0c935c049b5c196b83b968c72d348ae6fff83ea2 change-id: 20250326-stable-sve-5-15-bfd75482dcfa
Best regards,
From: Marc Zyngier maz@kernel.org
[ Upstream commit 8383741ab2e773a992f1f0f8acdca5e7a4687c49 ]
The SVE host tracking in KVM is pretty involved. It relies on a set of flags tracking the ownership of the SVE register, as well as that of the EL0 access.
It is also pretty scary: __hyp_sve_save_host() computes a thread_struct pointer and obtains a sve_state which gets directly accessed without further ado, even on nVHE. How can this even work?
The answer to that is that it doesn't, and that this is mostly dead code. Closer examination shows that on executing a syscall, userspace loses its SVE state entirely. This is part of the ABI. Another thing to notice is that although the kernel provides helpers such as kernel_neon_begin()/end(), they only deal with the FP/NEON state, and not SVE.
Given that you can only execute a guest as the result of a syscall, and that the kernel cannot use SVE by itself, it becomes pretty obvious that there is never any host SVE state to save, and that this code is only there to increase confusion.
Get rid of the TIF_SVE tracking and host save infrastructure altogether.
Reviewed-by: Mark Brown broonie@kernel.org Signed-off-by: Marc Zyngier maz@kernel.org Signed-off-by: Mark Brown broonie@kernel.org --- arch/arm64/include/asm/kvm_host.h | 1 - arch/arm64/kvm/fpsimd.c | 20 +++++--------------- arch/arm64/kvm/hyp/include/hyp/switch.h | 27 +++------------------------ 3 files changed, 8 insertions(+), 40 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 91038fa2e5e0..4e84d75ae91c 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -410,7 +410,6 @@ struct kvm_vcpu_arch { #define KVM_ARM64_DEBUG_DIRTY (1 << 0) #define KVM_ARM64_FP_ENABLED (1 << 1) /* guest FP regs loaded */ #define KVM_ARM64_FP_HOST (1 << 2) /* host FP regs loaded */ -#define KVM_ARM64_HOST_SVE_IN_USE (1 << 3) /* backup for host TIF_SVE */ #define KVM_ARM64_HOST_SVE_ENABLED (1 << 4) /* SVE enabled for EL0 */ #define KVM_ARM64_GUEST_HAS_SVE (1 << 5) /* SVE exposed to guest */ #define KVM_ARM64_VCPU_SVE_FINALIZED (1 << 6) /* SVE config completed */ diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 5621020b28de..2d15e1d6e214 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -66,22 +66,15 @@ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu) * * Here, we just set the correct metadata to indicate that the FPSIMD * state in the cpu regs (if any) belongs to current on the host. - * - * TIF_SVE is backed up here, since it may get clobbered with guest state. - * This flag is restored by kvm_arch_vcpu_put_fp(vcpu). */ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) { BUG_ON(!current->mm); + BUG_ON(test_thread_flag(TIF_SVE));
- vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | - KVM_ARM64_HOST_SVE_IN_USE | - KVM_ARM64_HOST_SVE_ENABLED); + vcpu->arch.flags &= ~KVM_ARM64_FP_ENABLED; vcpu->arch.flags |= KVM_ARM64_FP_HOST;
- if (test_thread_flag(TIF_SVE)) - vcpu->arch.flags |= KVM_ARM64_HOST_SVE_IN_USE; - if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED; } @@ -115,13 +108,11 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) { unsigned long flags; - bool host_has_sve = system_supports_sve(); - bool guest_has_sve = vcpu_has_sve(vcpu);
local_irq_save(flags);
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { - if (guest_has_sve) { + if (vcpu_has_sve(vcpu)) { __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
/* Restore the VL that was saved when bound to the CPU */ @@ -131,7 +122,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) }
fpsimd_save_and_flush_cpu_state(); - } else if (has_vhe() && host_has_sve) { + } else if (has_vhe() && system_supports_sve()) { /* * The FPSIMD/SVE state in the CPU has not been touched, and we * have SVE (and VHE): CPACR_EL1 (alias CPTR_EL2) has been @@ -145,8 +136,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0); }
- update_thread_flag(TIF_SVE, - vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE); + update_thread_flag(TIF_SVE, 0);
local_irq_restore(flags); } diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index ecd41844eda0..269ec3587210 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -207,16 +207,6 @@ static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) return __get_fault_info(esr, &vcpu->arch.fault); }
-static inline void __hyp_sve_save_host(struct kvm_vcpu *vcpu) -{ - struct thread_struct *thread; - - thread = container_of(vcpu->arch.host_fpsimd_state, struct thread_struct, - uw.fpsimd_state); - - __sve_save_state(sve_pffr(thread), &vcpu->arch.host_fpsimd_state->fpsr); -} - static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu) { sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2); @@ -228,21 +218,14 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu) /* Check for an FPSIMD/SVE trap and handle as appropriate */ static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) { - bool sve_guest, sve_host; + bool sve_guest; u8 esr_ec; u64 reg;
if (!system_supports_fpsimd()) return false;
- if (system_supports_sve()) { - sve_guest = vcpu_has_sve(vcpu); - sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE; - } else { - sve_guest = false; - sve_host = false; - } - + sve_guest = vcpu_has_sve(vcpu); esr_ec = kvm_vcpu_trap_get_class(vcpu); if (esr_ec != ESR_ELx_EC_FP_ASIMD && esr_ec != ESR_ELx_EC_SVE) @@ -269,11 +252,7 @@ static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) isb();
if (vcpu->arch.flags & KVM_ARM64_FP_HOST) { - if (sve_host) - __hyp_sve_save_host(vcpu); - else - __fpsimd_save_state(vcpu->arch.host_fpsimd_state); - + __fpsimd_save_state(vcpu->arch.host_fpsimd_state); vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; }
[ Sasha's backport helper bot ]
Hi,
Summary of potential issues: ⚠️ Found follow-up fixes in mainline
The upstream commit SHA1 provided is correct: 8383741ab2e773a992f1f0f8acdca5e7a4687c49
WARNING: Author mismatch between patch and upstream commit: Backport author: Mark Brownbroonie@kernel.org Commit author: Marc Zyngiermaz@kernel.org
Status in newer kernel trees: 6.14.y | Present (exact SHA1) 6.13.y | Present (exact SHA1) 6.12.y | Present (exact SHA1) 6.6.y | Present (exact SHA1) 6.1.y | Present (exact SHA1)
Found fixes commits: d52d165d67c5 KVM: arm64: Always start with clearing SVE flag on load
Note: The patch differs from the upstream commit: --- 1: 8383741ab2e77 ! 1: e6a9fd27335d3 KVM: arm64: Get rid of host SVE tracking/saving @@ Metadata ## Commit message ## KVM: arm64: Get rid of host SVE tracking/saving
+ [ Upstream commit 8383741ab2e773a992f1f0f8acdca5e7a4687c49 ] + The SVE host tracking in KVM is pretty involved. It relies on a set of flags tracking the ownership of the SVE register, as well as that of the EL0 access. @@ Commit message
Reviewed-by: Mark Brown broonie@kernel.org Signed-off-by: Marc Zyngier maz@kernel.org + Signed-off-by: Mark Brown broonie@kernel.org
## arch/arm64/include/asm/kvm_host.h ## @@ arch/arm64/include/asm/kvm_host.h: struct kvm_vcpu_arch { @@ arch/arm64/kvm/fpsimd.c: void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
## arch/arm64/kvm/hyp/include/hyp/switch.h ## @@ arch/arm64/kvm/hyp/include/hyp/switch.h: static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) - return __get_fault_info(vcpu->arch.fault.esr_el2, &vcpu->arch.fault); + return __get_fault_info(esr, &vcpu->arch.fault); }
-static inline void __hyp_sve_save_host(struct kvm_vcpu *vcpu) @@ arch/arm64/kvm/hyp/include/hyp/switch.h: static inline bool __populate_fault_inf { sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2); @@ arch/arm64/kvm/hyp/include/hyp/switch.h: static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu) - */ - static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) + /* Check for an FPSIMD/SVE trap and handle as appropriate */ + static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) { - bool sve_guest, sve_host; + bool sve_guest; @@ arch/arm64/kvm/hyp/include/hyp/switch.h: static inline void __hyp_sve_restore_gu - + sve_guest = vcpu_has_sve(vcpu); esr_ec = kvm_vcpu_trap_get_class(vcpu); - - /* Don't handle SVE traps for non-SVE vcpus here: */ -@@ arch/arm64/kvm/hyp/include/hyp/switch.h: static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) + if (esr_ec != ESR_ELx_EC_FP_ASIMD && + esr_ec != ESR_ELx_EC_SVE) +@@ arch/arm64/kvm/hyp/include/hyp/switch.h: static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) isb();
if (vcpu->arch.flags & KVM_ARM64_FP_HOST) { ---
Results of testing on various branches:
| Branch | Patch Apply | Build Test | |---------------------------|-------------|------------| | stable/linux-5.15.y | Success | Success |
From: Marc Zyngier maz@kernel.org
[ Upstream commit d52d165d67c5aa26c8c89909003c94a66492d23d ]
On each vcpu load, we set the KVM_ARM64_HOST_SVE_ENABLED flag if SVE is enabled for EL0 on the host. This is used to restore the correct state on vpcu put.
However, it appears that nothing ever clears this flag. Once set, it will stick until the vcpu is destroyed, which has the potential to spuriously enable SVE for userspace.
We probably never saw the issue because no VMM uses SVE, but that's still pretty bad. Unconditionally clearing the flag on vcpu load addresses the issue.
Fixes: 8383741ab2e7 ("KVM: arm64: Get rid of host SVE tracking/saving") Signed-off-by: Marc Zyngier maz@kernel.org Cc: stable@vger.kernel.org Reviewed-by: Mark Brown broonie@kernel.org Link: https://lore.kernel.org/r/20220528113829.1043361-2-maz@kernel.org Signed-off-by: Mark Brown broonie@kernel.org --- arch/arm64/kvm/fpsimd.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 2d15e1d6e214..24734bfcfaa0 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -75,6 +75,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) vcpu->arch.flags &= ~KVM_ARM64_FP_ENABLED; vcpu->arch.flags |= KVM_ARM64_FP_HOST;
+ vcpu->arch.flags &= ~KVM_ARM64_HOST_SVE_ENABLED; if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED; }
[ Sasha's backport helper bot ]
Hi,
Summary of potential issues: ℹ️ This is part 02/11 of a series ❌ Build failures detected
The upstream commit SHA1 provided is correct: d52d165d67c5aa26c8c89909003c94a66492d23d
WARNING: Author mismatch between patch and upstream commit: Backport author: Mark Brownbroonie@kernel.org Commit author: Marc Zyngiermaz@kernel.org
Status in newer kernel trees: 6.14.y | Present (exact SHA1) 6.13.y | Present (exact SHA1) 6.12.y | Present (exact SHA1) 6.6.y | Present (exact SHA1) 6.1.y | Present (exact SHA1)
Note: The patch differs from the upstream commit: --- 1: d52d165d67c5a ! 1: ab39b2accf324 KVM: arm64: Always start with clearing SVE flag on load @@ Metadata ## Commit message ## KVM: arm64: Always start with clearing SVE flag on load
+ [ Upstream commit d52d165d67c5aa26c8c89909003c94a66492d23d ] + On each vcpu load, we set the KVM_ARM64_HOST_SVE_ENABLED flag if SVE is enabled for EL0 on the host. This is used to restore the correct state on vpcu put. @@ Commit message Cc: stable@vger.kernel.org Reviewed-by: Mark Brown broonie@kernel.org Link: https://lore.kernel.org/r/20220528113829.1043361-2-maz@kernel.org + Signed-off-by: Mark Brown broonie@kernel.org
## arch/arm64/kvm/fpsimd.c ## @@ arch/arm64/kvm/fpsimd.c: void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) @@ arch/arm64/kvm/fpsimd.c: void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) + vcpu->arch.flags &= ~KVM_ARM64_HOST_SVE_ENABLED; if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED; - + } ---
NOTE: These results are for this patch alone. Full series testing will be performed when all parts are received.
Results of testing on various branches:
| Branch | Patch Apply | Build Test | |---------------------------|-------------|------------| | stable/linux-6.1.y | Success | Failed |
Build Errors: Build error: Segmentation fault make: *** [Makefile:1231: vmlinux] Error 139 make: Target '__all' not remade because of errors.
On Thu, Apr 10, 2025 at 11:53:32AM -0400, Sasha Levin wrote:
Summary of potential issues: ℹ️ This is part 02/11 of a series ❌ Build failures detected
Build Errors: Build error: Segmentation fault make: *** [Makefile:1231: vmlinux] Error 139 make: Target '__all' not remade because of errors.
This looks like some sort of infrastructure issue with the linker crashing? What configuration was this trying to build and with what toolchain? My own per-patch build tests were successful.
On Thu, Apr 10, 2025 at 05:11:06PM +0100, Mark Brown wrote:
On Thu, Apr 10, 2025 at 11:53:32AM -0400, Sasha Levin wrote:
Summary of potential issues: ℹ️ This is part 02/11 of a series ❌ Build failures detected
Build Errors: Build error: Segmentation fault make: *** [Makefile:1231: vmlinux] Error 139 make: Target '__all' not remade because of errors.
This looks like some sort of infrastructure issue with the linker crashing? What configuration was this trying to build and with what toolchain? My own per-patch build tests were successful.
You're right - sorry about that.
The cooling setup on my build server started to fail, which caused sporadic failures during heavy load.
On Thu, Apr 10, 2025 at 12:43:28PM -0400, Sasha Levin wrote:
On Thu, Apr 10, 2025 at 05:11:06PM +0100, Mark Brown wrote:
This looks like some sort of infrastructure issue with the linker crashing? What configuration was this trying to build and with what toolchain? My own per-patch build tests were successful.
You're right - sorry about that.
The cooling setup on my build server started to fail, which caused sporadic failures during heavy load.
Stable kernels, they're the hot new thing! :P
No worries.
[ Upstream commit 93ae6b01bafee8fa385aa25ee7ebdb40057f6abe ]
Since 8383741ab2e773a99 (KVM: arm64: Get rid of host SVE tracking/saving) KVM has not tracked the host SVE state, relying on the fact that we currently disable SVE whenever we perform a syscall. This may not be true in future since performance optimisation may result in us keeping SVE enabled in order to avoid needing to take access traps to reenable it. Handle this by clearing TIF_SVE and converting the stored task state to FPSIMD format when preparing to run the guest. This is done with a new call fpsimd_kvm_prepare() to keep the direct state manipulation functions internal to fpsimd.c.
Signed-off-by: Mark Brown broonie@kernel.org Reviewed-by: Catalin Marinas catalin.marinas@arm.com Reviewed-by: Marc Zyngier maz@kernel.org Link: https://lore.kernel.org/r/20221115094640.112848-2-broonie@kernel.org Signed-off-by: Will Deacon will@kernel.org [ Mark: trivial backport to v6.1 ] Signed-off-by: Mark Rutland mark.rutland@arm.com Signed-off-by: Mark Brown broonie@kernel.org --- arch/arm64/include/asm/fpsimd.h | 1 + arch/arm64/kernel/fpsimd.c | 23 +++++++++++++++++++++++ arch/arm64/kvm/fpsimd.c | 4 +++- 3 files changed, 27 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 9a62884183e5..f7faf0f4507c 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -44,6 +44,7 @@ extern void fpsimd_signal_preserve_current_state(void); extern void fpsimd_preserve_current_state(void); extern void fpsimd_restore_current_state(void); extern void fpsimd_update_current_state(struct user_fpsimd_state const *state); +extern void fpsimd_kvm_prepare(void);
extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state, void *sve_state, unsigned int sve_vl); diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index e22571e57ae1..57e89361edcb 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1108,6 +1108,29 @@ void fpsimd_signal_preserve_current_state(void) sve_to_fpsimd(current); }
+/* + * Called by KVM when entering the guest. + */ +void fpsimd_kvm_prepare(void) +{ + if (!system_supports_sve()) + return; + + /* + * KVM does not save host SVE state since we can only enter + * the guest from a syscall so the ABI means that only the + * non-saved SVE state needs to be saved. If we have left + * SVE enabled for performance reasons then update the task + * state to be FPSIMD only. + */ + get_cpu_fpsimd_context(); + + if (test_and_clear_thread_flag(TIF_SVE)) + sve_to_fpsimd(current); + + put_cpu_fpsimd_context(); +} + /* * Associate current's FPSIMD context with this cpu * The caller must have ownership of the cpu FPSIMD context before calling diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 24734bfcfaa0..16e29f03dcbf 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -70,12 +70,14 @@ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) { BUG_ON(!current->mm); - BUG_ON(test_thread_flag(TIF_SVE));
vcpu->arch.flags &= ~KVM_ARM64_FP_ENABLED; vcpu->arch.flags |= KVM_ARM64_FP_HOST;
+ fpsimd_kvm_prepare(); + vcpu->arch.flags &= ~KVM_ARM64_HOST_SVE_ENABLED; + if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED; }
[ Upstream commit baa8515281b30861cff3da7db70662d2a25c6440 ]
When we save the state for the floating point registers this can be done in the form visible through either the FPSIMD V registers or the SVE Z and P registers. At present we track which format is currently used based on TIF_SVE and the SME streaming mode state but particularly in the SVE case this limits our options for optimising things, especially around syscalls. Introduce a new enum which we place together with saved floating point state in both thread_struct and the KVM guest state which explicitly states which format is active and keep it up to date when we change it.
At present we do not use this state except to verify that it has the expected value when loading the state, future patches will introduce functional changes.
Signed-off-by: Mark Brown broonie@kernel.org Reviewed-by: Catalin Marinas catalin.marinas@arm.com Reviewed-by: Marc Zyngier maz@kernel.org Link: https://lore.kernel.org/r/20221115094640.112848-3-broonie@kernel.org Signed-off-by: Will Deacon will@kernel.org [ Mark: fix conflicts due to earlier backports ] Signed-off-by: Mark Rutland mark.rutland@arm.com Signed-off-by: Mark Brown broonie@kernel.org --- arch/arm64/include/asm/fpsimd.h | 3 +- arch/arm64/include/asm/kvm_host.h | 12 ++++++++ arch/arm64/include/asm/processor.h | 6 ++++ arch/arm64/kernel/fpsimd.c | 58 ++++++++++++++++++++++++++++---------- arch/arm64/kernel/process.c | 3 ++ arch/arm64/kernel/ptrace.c | 3 ++ arch/arm64/kernel/signal.c | 3 ++ arch/arm64/kvm/fpsimd.c | 3 +- 8 files changed, 74 insertions(+), 17 deletions(-)
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index f7faf0f4507c..9912bfd020be 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -47,7 +47,8 @@ extern void fpsimd_update_current_state(struct user_fpsimd_state const *state); extern void fpsimd_kvm_prepare(void);
extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state, - void *sve_state, unsigned int sve_vl); + void *sve_state, unsigned int sve_vl, + enum fp_type *type);
extern void fpsimd_flush_task_state(struct task_struct *target); extern void fpsimd_save_and_flush_cpu_state(void); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 4e84d75ae91c..ecae31b0dab3 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -280,7 +280,19 @@ struct vcpu_reset_state {
struct kvm_vcpu_arch { struct kvm_cpu_context ctxt; + + /* + * Guest floating point state + * + * The architecture has two main floating point extensions, + * the original FPSIMD and SVE. These have overlapping + * register views, with the FPSIMD V registers occupying the + * low 128 bits of the SVE Z registers. When the core + * floating point code saves the register state of a task it + * records which view it saved in fp_type. + */ void *sve_state; + enum fp_type fp_type; unsigned int sve_max_vl;
/* Stage 2 paging state used by the hardware on next switch */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 7364530de0a7..d5c11a994291 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -115,6 +115,11 @@ struct debug_info { #endif };
+enum fp_type { + FP_STATE_FPSIMD, + FP_STATE_SVE, +}; + struct cpu_context { unsigned long x19; unsigned long x20; @@ -145,6 +150,7 @@ struct thread_struct { struct user_fpsimd_state fpsimd_state; } uw;
+ enum fp_type fp_type; /* registers FPSIMD or SVE? */ unsigned int fpsimd_cpu; void *sve_state; /* SVE registers, if any */ unsigned int sve_vl; /* SVE vector length */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 57e89361edcb..4e702ff0d196 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -117,6 +117,7 @@ struct fpsimd_last_state_struct { struct user_fpsimd_state *st; void *sve_state; unsigned int sve_vl; + enum fp_type *fp_type; };
static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state); @@ -243,14 +244,6 @@ static void sve_free(struct task_struct *task) * The task can execute SVE instructions while in userspace without * trapping to the kernel. * - * When stored, Z0-Z31 (incorporating Vn in bits[127:0] or the - * corresponding Zn), P0-P15 and FFR are encoded in in - * task->thread.sve_state, formatted appropriately for vector - * length task->thread.sve_vl. - * - * task->thread.sve_state must point to a valid buffer at least - * sve_state_size(task) bytes in size. - * * During any syscall, the kernel may optionally clear TIF_SVE and * discard the vector state except for the FPSIMD subset. * @@ -260,7 +253,15 @@ static void sve_free(struct task_struct *task) * do_sve_acc() to be called, which does some preparation and then * sets TIF_SVE. * - * When stored, FPSIMD registers V0-V31 are encoded in + * During any syscall, the kernel may optionally clear TIF_SVE and + * discard the vector state except for the FPSIMD subset. + * + * The data will be stored in one of two formats: + * + * * FPSIMD only - FP_STATE_FPSIMD: + * + * When the FPSIMD only state stored task->thread.fp_type is set to + * FP_STATE_FPSIMD, the FPSIMD registers V0-V31 are encoded in * task->thread.uw.fpsimd_state; bits [max : 128] for each of Z0-Z31 are * logically zero but not stored anywhere; P0-P15 and FFR are not * stored and have unspecified values from userspace's point of @@ -270,6 +271,19 @@ static void sve_free(struct task_struct *task) * task->thread.sve_state does not need to be non-NULL, valid or any * particular size: it must not be dereferenced. * + * * SVE state - FP_STATE_SVE: + * + * When the full SVE state is stored task->thread.fp_type is set to + * FP_STATE_SVE and Z0-Z31 (incorporating Vn in bits[127:0] or the + * corresponding Zn), P0-P15 and FFR are encoded in in + * task->thread.sve_state, formatted appropriately for vector + * length task->thread.sve_vl or, if SVCR.SM is set, + * task->thread.sme_vl. The storage for the vector registers in + * task->thread.uw.fpsimd_state should be ignored. + * + * task->thread.sve_state must point to a valid buffer at least + * sve_state_size(task) bytes in size. + * * * FPSR and FPCR are always stored in task->thread.uw.fpsimd_state * irrespective of whether TIF_SVE is clear or set, since these are * not vector length dependent. @@ -287,12 +301,15 @@ static void task_fpsimd_load(void) WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context());
- if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) + if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) { + WARN_ON_ONCE(current->thread.fp_type != FP_STATE_SVE); sve_load_state(sve_pffr(¤t->thread), ¤t->thread.uw.fpsimd_state.fpsr, sve_vq_from_vl(current->thread.sve_vl) - 1); - else + } else { + WARN_ON_ONCE(current->thread.fp_type != FP_STATE_FPSIMD); fpsimd_load_state(¤t->thread.uw.fpsimd_state); + } }
/* @@ -324,8 +341,11 @@ static void fpsimd_save(void) sve_save_state((char *)last->sve_state + sve_ffr_offset(last->sve_vl), &last->st->fpsr); - } else + *last->fp_type = FP_STATE_SVE; + } else { fpsimd_save_state(last->st); + *last->fp_type = FP_STATE_FPSIMD; + } } }
@@ -624,8 +644,10 @@ int sve_set_vector_length(struct task_struct *task, }
fpsimd_flush_task_state(task); - if (test_and_clear_tsk_thread_flag(task, TIF_SVE)) + if (test_and_clear_tsk_thread_flag(task, TIF_SVE)) { sve_to_fpsimd(task); + task->thread.fp_type = FP_STATE_FPSIMD; + }
if (task == current) put_cpu_fpsimd_context(); @@ -1079,6 +1101,8 @@ void fpsimd_flush_thread(void) current->thread.sve_vl_onexec = 0; }
+ current->thread.fp_type = FP_STATE_FPSIMD; + put_cpu_fpsimd_context(); }
@@ -1125,8 +1149,10 @@ void fpsimd_kvm_prepare(void) */ get_cpu_fpsimd_context();
- if (test_and_clear_thread_flag(TIF_SVE)) + if (test_and_clear_thread_flag(TIF_SVE)) { sve_to_fpsimd(current); + current->thread.fp_type = FP_STATE_FPSIMD; + }
put_cpu_fpsimd_context(); } @@ -1145,6 +1171,7 @@ static void fpsimd_bind_task_to_cpu(void) last->st = ¤t->thread.uw.fpsimd_state; last->sve_state = current->thread.sve_state; last->sve_vl = current->thread.sve_vl; + last->fp_type = ¤t->thread.fp_type; current->thread.fpsimd_cpu = smp_processor_id();
if (system_supports_sve()) { @@ -1159,7 +1186,7 @@ static void fpsimd_bind_task_to_cpu(void) }
void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, - unsigned int sve_vl) + unsigned int sve_vl, enum fp_type *type) { struct fpsimd_last_state_struct *last = this_cpu_ptr(&fpsimd_last_state); @@ -1170,6 +1197,7 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, last->st = st; last->sve_state = sve_state; last->sve_vl = sve_vl; + last->fp_type = type; }
/* diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 839edc4ed7a4..a01f2288ee9a 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -307,6 +307,9 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) dst->thread.sve_state = NULL; clear_tsk_thread_flag(dst, TIF_SVE);
+ + dst->thread.fp_type = FP_STATE_FPSIMD; + /* clear any pending asynchronous tag fault raised by the parent */ clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT);
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 92de9212b7b5..4ef9e688508c 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -829,6 +829,7 @@ static int sve_set(struct task_struct *target, ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, SVE_PT_FPSIMD_OFFSET); clear_tsk_thread_flag(target, TIF_SVE); + target->thread.fp_type = FP_STATE_FPSIMD; goto out; }
@@ -848,6 +849,7 @@ static int sve_set(struct task_struct *target, if (!target->thread.sve_state) { ret = -ENOMEM; clear_tsk_thread_flag(target, TIF_SVE); + target->thread.fp_type = FP_STATE_FPSIMD; goto out; }
@@ -858,6 +860,7 @@ static int sve_set(struct task_struct *target, */ fpsimd_sync_to_sve(target); set_tsk_thread_flag(target, TIF_SVE); + target->thread.fp_type = FP_STATE_SVE;
BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header)); start = SVE_PT_SVE_OFFSET; diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index b3e1beccf458..768de05b616b 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -207,6 +207,7 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx) __get_user_error(fpsimd.fpcr, &ctx->fpcr, err);
clear_thread_flag(TIF_SVE); + current->thread.fp_type = FP_STATE_FPSIMD;
/* load the hardware registers from the fpsimd_state structure */ if (!err) @@ -271,6 +272,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
if (sve.head.size <= sizeof(*user->sve)) { clear_thread_flag(TIF_SVE); + current->thread.fp_type = FP_STATE_FPSIMD; goto fpsimd_only; }
@@ -303,6 +305,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) return -EFAULT;
set_thread_flag(TIF_SVE); + current->thread.fp_type = FP_STATE_SVE;
fpsimd_only: /* copy the FP and status/control registers */ diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 16e29f03dcbf..f1aaad9e14bc 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -95,7 +95,8 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.fp_regs, vcpu->arch.sve_state, - vcpu->arch.sve_max_vl); + vcpu->arch.sve_max_vl, + &vcpu->arch.fp_type);
clear_thread_flag(TIF_FOREIGN_FPSTATE); update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu));
[ Upstream commit deeb8f9a80fdae5a62525656d65c7070c28bd3a4 ]
In order to avoid needlessly saving and restoring the guest registers KVM relies on the host FPSMID code to save the guest registers when we context switch away from the guest. This is done by binding the KVM guest state to the CPU on top of the task state that was originally there, then carefully managing the TIF_SVE flag for the task to cause the host to save the full SVE state when needed regardless of the needs of the host task. This works well enough but isn't terribly direct about what is going on and makes it much more complicated to try to optimise what we're doing with the SVE register state.
Let's instead have KVM pass in the register state it wants saving when it binds to the CPU. We introduce a new FP_STATE_CURRENT for use during normal task binding to indicate that we should base our decisions on the current task. This should not be used when actually saving. Ideally we might want to use a separate enum for the type to save but this enum and the enum values would then need to be named which has problems with clarity and ambiguity.
In order to ease any future debugging that might be required this patch does not actually update any of the decision making about what to save, it merely starts tracking the new information and warns if the requested state is not what we would otherwise have decided to save.
Signed-off-by: Mark Brown broonie@kernel.org Reviewed-by: Catalin Marinas catalin.marinas@arm.com Reviewed-by: Marc Zyngier maz@kernel.org Link: https://lore.kernel.org/r/20221115094640.112848-4-broonie@kernel.org Signed-off-by: Will Deacon will@kernel.org [ Mark: trivial backport ] Signed-off-by: Mark Rutland mark.rutland@arm.com Signed-off-by: Mark Brown broonie@kernel.org --- arch/arm64/include/asm/fpsimd.h | 2 +- arch/arm64/include/asm/processor.h | 1 + arch/arm64/kernel/fpsimd.c | 79 +++++++++++++++++++++++++++----------- arch/arm64/kvm/fpsimd.c | 13 ++++++- 4 files changed, 70 insertions(+), 25 deletions(-)
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 9912bfd020be..7a407c3767b6 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -48,7 +48,7 @@ extern void fpsimd_kvm_prepare(void);
extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state, void *sve_state, unsigned int sve_vl, - enum fp_type *type); + enum fp_type *type, enum fp_type to_save);
extern void fpsimd_flush_task_state(struct task_struct *target); extern void fpsimd_save_and_flush_cpu_state(void); diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index d5c11a994291..1da032444dac 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -116,6 +116,7 @@ struct debug_info { };
enum fp_type { + FP_STATE_CURRENT, /* Save based on current task state. */ FP_STATE_FPSIMD, FP_STATE_SVE, }; diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 4e702ff0d196..105b8aa0c038 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -118,6 +118,7 @@ struct fpsimd_last_state_struct { void *sve_state; unsigned int sve_vl; enum fp_type *fp_type; + enum fp_type to_save; };
static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state); @@ -269,7 +270,8 @@ static void sve_free(struct task_struct *task) * but userspace is discouraged from relying on this. * * task->thread.sve_state does not need to be non-NULL, valid or any - * particular size: it must not be dereferenced. + * particular size: it must not be dereferenced and any data stored + * there should be considered stale and not referenced. * * * SVE state - FP_STATE_SVE: * @@ -282,7 +284,9 @@ static void sve_free(struct task_struct *task) * task->thread.uw.fpsimd_state should be ignored. * * task->thread.sve_state must point to a valid buffer at least - * sve_state_size(task) bytes in size. + * sve_state_size(task) bytes in size. The data stored in + * task->thread.uw.fpsimd_state.vregs should be considered stale + * and not referenced. * * * FPSR and FPCR are always stored in task->thread.uw.fpsimd_state * irrespective of whether TIF_SVE is clear or set, since these are @@ -321,32 +325,57 @@ static void fpsimd_save(void) struct fpsimd_last_state_struct const *last = this_cpu_ptr(&fpsimd_last_state); /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */ + bool save_sve_regs = false; + unsigned long vl;
WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context());
- if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { - if (IS_ENABLED(CONFIG_ARM64_SVE) && - test_thread_flag(TIF_SVE)) { - if (WARN_ON(sve_get_vl() != last->sve_vl)) { - /* - * Can't save the user regs, so current would - * re-enter user with corrupt state. - * There's no way to recover, so kill it: - */ - force_signal_inject(SIGKILL, SI_KERNEL, 0, 0); - return; - } - - sve_save_state((char *)last->sve_state + - sve_ffr_offset(last->sve_vl), - &last->st->fpsr); - *last->fp_type = FP_STATE_SVE; - } else { - fpsimd_save_state(last->st); - *last->fp_type = FP_STATE_FPSIMD; + if (test_thread_flag(TIF_FOREIGN_FPSTATE)) + return; + + if (IS_ENABLED(CONFIG_ARM64_SVE) && + test_thread_flag(TIF_SVE)) { + if (WARN_ON(sve_get_vl() != last->sve_vl)) { + /* + * Can't save the user regs, so current would + * re-enter user with corrupt state. + * There's no way to recover, so kill it: + */ + force_signal_inject(SIGKILL, SI_KERNEL, 0, 0); + return; } } + + if (test_thread_flag(TIF_SVE)) { + save_sve_regs = true; + vl = last->sve_vl; + } + + /* + * Validate that an explicitly specified state to save is + * consistent with the task state. + */ + switch (last->to_save) { + case FP_STATE_CURRENT: + break; + case FP_STATE_FPSIMD: + WARN_ON_ONCE(save_sve_regs); + break; + case FP_STATE_SVE: + WARN_ON_ONCE(!save_sve_regs); + break; + } + + if (IS_ENABLED(CONFIG_ARM64_SVE) && save_sve_regs) { + sve_save_state((char *)last->sve_state + + sve_ffr_offset(last->sve_vl), + &last->st->fpsr); + *last->fp_type = FP_STATE_SVE; + } else { + fpsimd_save_state(last->st); + *last->fp_type = FP_STATE_FPSIMD; + } }
/* @@ -987,6 +1016,7 @@ void do_sve_acc(unsigned long esr, struct pt_regs *regs) } else { fpsimd_to_sve(current); fpsimd_flush_task_state(current); + current->thread.fp_type = FP_STATE_SVE; }
put_cpu_fpsimd_context(); @@ -1172,6 +1202,7 @@ static void fpsimd_bind_task_to_cpu(void) last->sve_state = current->thread.sve_state; last->sve_vl = current->thread.sve_vl; last->fp_type = ¤t->thread.fp_type; + last->to_save = FP_STATE_CURRENT; current->thread.fpsimd_cpu = smp_processor_id();
if (system_supports_sve()) { @@ -1186,7 +1217,8 @@ static void fpsimd_bind_task_to_cpu(void) }
void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, - unsigned int sve_vl, enum fp_type *type) + unsigned int sve_vl, enum fp_type *type, + enum fp_type to_save) { struct fpsimd_last_state_struct *last = this_cpu_ptr(&fpsimd_last_state); @@ -1198,6 +1230,7 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, last->sve_state = sve_state; last->sve_vl = sve_vl; last->fp_type = type; + last->to_save = to_save; }
/* diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index f1aaad9e14bc..54a31c97eb7a 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -90,13 +90,24 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) */ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) { + enum fp_type fp_type; + WARN_ON_ONCE(!irqs_disabled());
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { + if (vcpu_has_sve(vcpu)) + fp_type = FP_STATE_SVE; + else + fp_type = FP_STATE_FPSIMD; + + /* + * Currently we do not support SME guests so SVCR is + * always 0 and we just need a variable to point to. + */ fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.fp_regs, vcpu->arch.sve_state, vcpu->arch.sve_max_vl, - &vcpu->arch.fp_type); + &vcpu->arch.fp_type, fp_type);
clear_thread_flag(TIF_FOREIGN_FPSTATE); update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu));
[ Upstream commit 62021cc36add7b2c015b837f7893f2fb4b8c2586 ]
Now that we are explicitly telling the host FP code which register state it needs to save we can remove the manipulation of TIF_SVE from the KVM code, simplifying it and allowing us to optimise our handling of normal tasks. Remove the manipulation of TIF_SVE from KVM and instead rely on to_save to ensure we save the correct data for it.
There should be no functional or performance impact from this change.
Signed-off-by: Mark Brown broonie@kernel.org Reviewed-by: Catalin Marinas catalin.marinas@arm.com Reviewed-by: Marc Zyngier maz@kernel.org Link: https://lore.kernel.org/r/20221115094640.112848-5-broonie@kernel.org Signed-off-by: Will Deacon will@kernel.org [ Mark: trivial backport ] Signed-off-by: Mark Rutland mark.rutland@arm.com Signed-off-by: Mark Brown broonie@kernel.org --- arch/arm64/kernel/fpsimd.c | 40 ++++++++++++++++------------------------ arch/arm64/kvm/fpsimd.c | 3 --- 2 files changed, 16 insertions(+), 27 deletions(-)
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 105b8aa0c038..e8f10daaa0d7 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -318,7 +318,13 @@ static void task_fpsimd_load(void)
/* * Ensure FPSIMD/SVE storage in memory for the loaded context is up to - * date with respect to the CPU registers. + * date with respect to the CPU registers. Note carefully that the + * current context is the context last bound to the CPU stored in + * last, if KVM is involved this may be the guest VM context rather + * than the host thread for the VM pointed to by current. This means + * that we must always reference the state storage via last rather + * than via current, if we are saving KVM state then it will have + * ensured that the type of registers to save is set in last->to_save. */ static void fpsimd_save(void) { @@ -334,9 +340,15 @@ static void fpsimd_save(void) if (test_thread_flag(TIF_FOREIGN_FPSTATE)) return;
- if (IS_ENABLED(CONFIG_ARM64_SVE) && - test_thread_flag(TIF_SVE)) { - if (WARN_ON(sve_get_vl() != last->sve_vl)) { + if ((last->to_save == FP_STATE_CURRENT && test_thread_flag(TIF_SVE)) || + last->to_save == FP_STATE_SVE) { + save_sve_regs = true; + vl = last->sve_vl; + } + + if (IS_ENABLED(CONFIG_ARM64_SVE) && save_sve_regs) { + /* Get the configured VL from RDVL, will account for SM */ + if (WARN_ON(sve_get_vl() != vl)) { /* * Can't save the user regs, so current would * re-enter user with corrupt state. @@ -347,26 +359,6 @@ static void fpsimd_save(void) } }
- if (test_thread_flag(TIF_SVE)) { - save_sve_regs = true; - vl = last->sve_vl; - } - - /* - * Validate that an explicitly specified state to save is - * consistent with the task state. - */ - switch (last->to_save) { - case FP_STATE_CURRENT: - break; - case FP_STATE_FPSIMD: - WARN_ON_ONCE(save_sve_regs); - break; - case FP_STATE_SVE: - WARN_ON_ONCE(!save_sve_regs); - break; - } - if (IS_ENABLED(CONFIG_ARM64_SVE) && save_sve_regs) { sve_save_state((char *)last->sve_state + sve_ffr_offset(last->sve_vl), diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 54a31c97eb7a..2e0f44f4c470 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -110,7 +110,6 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) &vcpu->arch.fp_type, fp_type);
clear_thread_flag(TIF_FOREIGN_FPSTATE); - update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu)); } }
@@ -151,7 +150,5 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0); }
- update_thread_flag(TIF_SVE, 0); - local_irq_restore(flags); }
From: Mark Rutland mark.rutland@arm.com
[ Upstream commit fbc7e61195e23f744814e78524b73b59faa54ab4 ]
There are several problems with the way hyp code lazily saves the host's FPSIMD/SVE state, including:
* Host SVE being discarded unexpectedly due to inconsistent configuration of TIF_SVE and CPACR_ELx.ZEN. This has been seen to result in QEMU crashes where SVE is used by memmove(), as reported by Eric Auger:
https://issues.redhat.com/browse/RHEL-68997
* Host SVE state is discarded *after* modification by ptrace, which was an unintentional ptrace ABI change introduced with lazy discarding of SVE state.
* The host FPMR value can be discarded when running a non-protected VM, where FPMR support is not exposed to a VM, and that VM uses FPSIMD/SVE. In these cases the hyp code does not save the host's FPMR before unbinding the host's FPSIMD/SVE/SME state, leaving a stale value in memory.
Avoid these by eagerly saving and "flushing" the host's FPSIMD/SVE/SME state when loading a vCPU such that KVM does not need to save any of the host's FPSIMD/SVE/SME state. For clarity, fpsimd_kvm_prepare() is removed and the necessary call to fpsimd_save_and_flush_cpu_state() is placed in kvm_arch_vcpu_load_fp(). As 'fpsimd_state' and 'fpmr_ptr' should not be used, they are set to NULL; all uses of these will be removed in subsequent patches.
Historical problems go back at least as far as v5.17, e.g. erroneous assumptions about TIF_SVE being clear in commit:
8383741ab2e773a9 ("KVM: arm64: Get rid of host SVE tracking/saving")
... and so this eager save+flush probably needs to be backported to ALL stable trees.
Fixes: 93ae6b01bafee8fa ("KVM: arm64: Discard any SVE state when entering KVM guests") Fixes: 8c845e2731041f0f ("arm64/sve: Leave SVE enabled on syscall if we don't context switch") Fixes: ef3be86021c3bdf3 ("KVM: arm64: Add save/restore support for FPMR") Reported-by: Eric Auger eauger@redhat.com Reported-by: Wilco Dijkstra wilco.dijkstra@arm.com Reviewed-by: Mark Brown broonie@kernel.org Tested-by: Mark Brown broonie@kernel.org Tested-by: Eric Auger eric.auger@redhat.com Acked-by: Will Deacon will@kernel.org Cc: Catalin Marinas catalin.marinas@arm.com Cc: Florian Weimer fweimer@redhat.com Cc: Fuad Tabba tabba@google.com Cc: Jeremy Linton jeremy.linton@arm.com Cc: Marc Zyngier maz@kernel.org Cc: Oliver Upton oliver.upton@linux.dev Cc: Paolo Bonzini pbonzini@redhat.com Signed-off-by: Mark Rutland mark.rutland@arm.com Reviewed-by: Oliver Upton oliver.upton@linux.dev Link: https://lore.kernel.org/r/20250210195226.1215254-2-mark.rutland@arm.com Signed-off-by: Marc Zyngier maz@kernel.org [ Mark: Handle vcpu/host flag conflict, remove host_data_ptr() ] Signed-off-by: Mark Rutland mark.rutland@arm.com Signed-off-by: Mark Brown broonie@kernel.org --- arch/arm64/kernel/fpsimd.c | 25 ------------------------- arch/arm64/kvm/fpsimd.c | 13 ++++++++++--- 2 files changed, 10 insertions(+), 28 deletions(-)
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index e8f10daaa0d7..4be9d9fd4fb7 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1154,31 +1154,6 @@ void fpsimd_signal_preserve_current_state(void) sve_to_fpsimd(current); }
-/* - * Called by KVM when entering the guest. - */ -void fpsimd_kvm_prepare(void) -{ - if (!system_supports_sve()) - return; - - /* - * KVM does not save host SVE state since we can only enter - * the guest from a syscall so the ABI means that only the - * non-saved SVE state needs to be saved. If we have left - * SVE enabled for performance reasons then update the task - * state to be FPSIMD only. - */ - get_cpu_fpsimd_context(); - - if (test_and_clear_thread_flag(TIF_SVE)) { - sve_to_fpsimd(current); - current->thread.fp_type = FP_STATE_FPSIMD; - } - - put_cpu_fpsimd_context(); -} - /* * Associate current's FPSIMD context with this cpu * The caller must have ownership of the cpu FPSIMD context before calling diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 2e0f44f4c470..2afa2521bce1 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -74,9 +74,16 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) vcpu->arch.flags &= ~KVM_ARM64_FP_ENABLED; vcpu->arch.flags |= KVM_ARM64_FP_HOST;
- fpsimd_kvm_prepare(); - - vcpu->arch.flags &= ~KVM_ARM64_HOST_SVE_ENABLED; + /* + * Ensure that any host FPSIMD/SVE/SME state is saved and unbound such + * that the host kernel is responsible for restoring this state upon + * return to userspace, and the hyp code doesn't need to save anything. + * + * When the host may use SME, fpsimd_save_and_flush_cpu_state() ensures + * that PSTATE.{SM,ZA} == {0,0}. + */ + fpsimd_save_and_flush_cpu_state(); + vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED;
From: Mark Rutland mark.rutland@arm.com
[ Upstream commit 8eca7f6d5100b6997df4f532090bc3f7e0203bef ]
Now that the host eagerly saves its own FPSIMD/SVE/SME state, non-protected KVM never needs to save the host FPSIMD/SVE/SME state, and the code to do this is never used. Protected KVM still needs to save/restore the host FPSIMD/SVE state to avoid leaking guest state to the host (and to avoid revealing to the host whether the guest used FPSIMD/SVE/SME), and that code needs to be retained.
Remove the unused code and data structures.
To avoid the need for a stub copy of kvm_hyp_save_fpsimd_host() in the VHE hyp code, the nVHE/hVHE version is moved into the shared switch header, where it is only invoked when KVM is in protected mode.
Signed-off-by: Mark Rutland mark.rutland@arm.com Reviewed-by: Mark Brown broonie@kernel.org Tested-by: Mark Brown broonie@kernel.org Acked-by: Will Deacon will@kernel.org Cc: Catalin Marinas catalin.marinas@arm.com Cc: Fuad Tabba tabba@google.com Cc: Marc Zyngier maz@kernel.org Cc: Oliver Upton oliver.upton@linux.dev Reviewed-by: Oliver Upton oliver.upton@linux.dev Link: https://lore.kernel.org/r/20250210195226.1215254-3-mark.rutland@arm.com Signed-off-by: Marc Zyngier maz@kernel.org Signed-off-by: Mark Brown broonie@kernel.org --- arch/arm64/include/asm/kvm_host.h | 1 - arch/arm64/kvm/fpsimd.c | 1 - arch/arm64/kvm/hyp/include/hyp/switch.h | 6 +----- 3 files changed, 1 insertion(+), 7 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index ecae31b0dab3..06e8d4645ecd 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -333,7 +333,6 @@ struct kvm_vcpu_arch { struct kvm_guest_debug_arch external_debug_state;
struct thread_info *host_thread_info; /* hyp VA */ - struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */
struct { /* {Break,watch}point registers */ diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 2afa2521bce1..1ef9d6cb91ee 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -55,7 +55,6 @@ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu) }
vcpu->arch.host_thread_info = kern_hyp_va(ti); - vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd); error: return ret; } diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 269ec3587210..cc102e46b0e2 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -251,11 +251,7 @@ static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) } isb();
- if (vcpu->arch.flags & KVM_ARM64_FP_HOST) { - __fpsimd_save_state(vcpu->arch.host_fpsimd_state); - vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; - } - + /* Restore the guest state */ if (sve_guest) __hyp_sve_restore_guest(vcpu); else
From: Mark Rutland mark.rutland@arm.com
[ Upstream commit 459f059be702056d91537b99a129994aa6ccdd35 ]
When KVM is in VHE mode, the host kernel tries to save and restore the configuration of CPACR_EL1.ZEN (i.e. CPTR_EL2.ZEN when HCR_EL2.E2H=1) across kvm_arch_vcpu_load_fp() and kvm_arch_vcpu_put_fp(), since the configuration may be clobbered by hyp when running a vCPU. This logic is currently redundant.
The VHE hyp code unconditionally configures CPTR_EL2.ZEN to 0b01 when returning to the host, permitting host kernel usage of SVE.
Now that the host eagerly saves and unbinds its own FPSIMD/SVE/SME state, there's no need to save/restore the state of the EL0 SVE trap. The kernel can safely save/restore state without trapping, as described above, and will restore userspace state (including trap controls) before returning to userspace.
Remove the redundant logic.
Signed-off-by: Mark Rutland mark.rutland@arm.com Reviewed-by: Mark Brown broonie@kernel.org Tested-by: Mark Brown broonie@kernel.org Acked-by: Will Deacon will@kernel.org Cc: Catalin Marinas catalin.marinas@arm.com Cc: Fuad Tabba tabba@google.com Cc: Marc Zyngier maz@kernel.org Cc: Oliver Upton oliver.upton@linux.dev Reviewed-by: Oliver Upton oliver.upton@linux.dev Link: https://lore.kernel.org/r/20250210195226.1215254-4-mark.rutland@arm.com Signed-off-by: Marc Zyngier maz@kernel.org [Rework for refactoring of where the flags are stored -- broonie] Signed-off-by: Mark Brown broonie@kernel.org --- arch/arm64/include/asm/kvm_host.h | 1 - arch/arm64/kvm/fpsimd.c | 15 --------------- 2 files changed, 16 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 06e8d4645ecd..3d4e2396a2d7 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -421,7 +421,6 @@ struct kvm_vcpu_arch { #define KVM_ARM64_DEBUG_DIRTY (1 << 0) #define KVM_ARM64_FP_ENABLED (1 << 1) /* guest FP regs loaded */ #define KVM_ARM64_FP_HOST (1 << 2) /* host FP regs loaded */ -#define KVM_ARM64_HOST_SVE_ENABLED (1 << 4) /* SVE enabled for EL0 */ #define KVM_ARM64_GUEST_HAS_SVE (1 << 5) /* SVE exposed to guest */ #define KVM_ARM64_VCPU_SVE_FINALIZED (1 << 6) /* SVE config completed */ #define KVM_ARM64_GUEST_HAS_PTRAUTH (1 << 7) /* PTRAUTH exposed to guest */ diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 1ef9d6cb91ee..1360ddd4137b 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -83,9 +83,6 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) */ fpsimd_save_and_flush_cpu_state(); vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; - - if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) - vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED; }
/* @@ -142,18 +139,6 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) }
fpsimd_save_and_flush_cpu_state(); - } else if (has_vhe() && system_supports_sve()) { - /* - * The FPSIMD/SVE state in the CPU has not been touched, and we - * have SVE (and VHE): CPACR_EL1 (alias CPTR_EL2) has been - * reset to CPACR_EL1_DEFAULT by the Hyp code, disabling SVE - * for EL0. To avoid spurious traps, restore the trap state - * seen by kvm_arch_vcpu_load_fp(): - */ - if (vcpu->arch.flags & KVM_ARM64_HOST_SVE_ENABLED) - sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_ZEN_EL0EN); - else - sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0); }
local_irq_restore(flags);
From: Fuad Tabba tabba@google.com
[ Upstream commit 2fd5b4b0e7b440602455b79977bfa64dea101e6c ]
Similar to VHE, calculate the value of cptr_el2 from scratch on activate traps. This removes the need to store cptr_el2 in every vcpu structure. Moreover, some traps, such as whether the guest owns the fp registers, need to be set on every vcpu run.
Reported-by: James Clark james.clark@linaro.org Fixes: 5294afdbf45a ("KVM: arm64: Exclude FP ownership from kvm_vcpu_arch") Signed-off-by: Fuad Tabba tabba@google.com Link: https://lore.kernel.org/r/20241216105057.579031-13-tabba@google.com Signed-off-by: Marc Zyngier maz@kernel.org Signed-off-by: Mark Brown broonie@kernel.org --- arch/arm64/include/asm/kvm_host.h | 1 - arch/arm64/kvm/arm.c | 1 - arch/arm64/kvm/hyp/nvhe/switch.c | 35 ++++++++++++++++++++++++++--------- 3 files changed, 26 insertions(+), 11 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 3d4e2396a2d7..2e0952134e2e 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -301,7 +301,6 @@ struct kvm_vcpu_arch { /* Values of trap registers for the guest. */ u64 hcr_el2; u64 mdcr_el2; - u64 cptr_el2;
/* Values of trap registers for the host before guest entry. */ u64 mdcr_el2_host; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 9ded5443de48..5ca8782edb96 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1138,7 +1138,6 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, }
vcpu_reset_hcr(vcpu); - vcpu->arch.cptr_el2 = CPTR_EL2_DEFAULT;
/* * Handle the "start in power-off" case. diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 4db5409f40c4..c0885197f2a5 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -34,21 +34,38 @@ DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data); DEFINE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); DEFINE_PER_CPU(unsigned long, kvm_hyp_vector);
-static void __activate_traps(struct kvm_vcpu *vcpu) +static bool guest_owns_fp_regs(struct kvm_vcpu *vcpu) { - u64 val; + return vcpu->arch.flags & KVM_ARM64_FP_ENABLED; +}
- ___activate_traps(vcpu); - __activate_traps_common(vcpu); +static void __activate_cptr_traps(struct kvm_vcpu *vcpu) +{ + u64 val = CPTR_EL2_TAM; /* Same bit irrespective of E2H */
- val = vcpu->arch.cptr_el2; - val |= CPTR_EL2_TTA | CPTR_EL2_TAM; - if (!update_fp_enabled(vcpu)) { - val |= CPTR_EL2_TFP | CPTR_EL2_TZ; - __activate_traps_fpsimd32(vcpu); + /* !hVHE case upstream */ + if (1) { + val |= CPTR_EL2_TTA | CPTR_NVHE_EL2_RES1; + + if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs(vcpu)) + val |= CPTR_EL2_TZ; + + if (!guest_owns_fp_regs(vcpu)) + val |= CPTR_EL2_TFP; }
+ if (!guest_owns_fp_regs(vcpu)) + __activate_traps_fpsimd32(vcpu); + write_sysreg(val, cptr_el2); +} + +static void __activate_traps(struct kvm_vcpu *vcpu) +{ + ___activate_traps(vcpu); + __activate_traps_common(vcpu); + __activate_cptr_traps(vcpu); + write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el2);
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
From: Mark Rutland mark.rutland@arm.com
[ Upstream commit 59419f10045bc955d2229819c7cf7a8b0b9c5b59 ]
In non-protected KVM modes, while the guest FPSIMD/SVE/SME state is live on the CPU, the host's active SVE VL may differ from the guest's maximum SVE VL:
* For VHE hosts, when a VM uses NV, ZCR_EL2 contains a value constrained by the guest hypervisor, which may be less than or equal to that guest's maximum VL.
Note: in this case the value of ZCR_EL1 is immaterial due to E2H.
* For nVHE/hVHE hosts, ZCR_EL1 contains a value written by the guest, which may be less than or greater than the guest's maximum VL.
Note: in this case hyp code traps host SVE usage and lazily restores ZCR_EL2 to the host's maximum VL, which may be greater than the guest's maximum VL.
This can be the case between exiting a guest and kvm_arch_vcpu_put_fp(). If a softirq is taken during this period and the softirq handler tries to use kernel-mode NEON, then the kernel will fail to save the guest's FPSIMD/SVE state, and will pend a SIGKILL for the current thread.
This happens because kvm_arch_vcpu_ctxsync_fp() binds the guest's live FPSIMD/SVE state with the guest's maximum SVE VL, and fpsimd_save_user_state() verifies that the live SVE VL is as expected before attempting to save the register state:
| if (WARN_ON(sve_get_vl() != vl)) { | force_signal_inject(SIGKILL, SI_KERNEL, 0, 0); | return; | }
Fix this and make this a bit easier to reason about by always eagerly switching ZCR_EL{1,2} at hyp during guest<->host transitions. With this happening, there's no need to trap host SVE usage, and the nVHE/nVHE __deactivate_cptr_traps() logic can be simplified to enable host access to all present FPSIMD/SVE/SME features.
In protected nVHE/hVHE modes, the host's state is always saved/restored by hyp, and the guest's state is saved prior to exit to the host, so from the host's PoV the guest never has live FPSIMD/SVE/SME state, and the host's ZCR_EL1 is never clobbered by hyp.
Fixes: 8c8010d69c132273 ("KVM: arm64: Save/restore SVE state for nVHE") Fixes: 2e3cf82063a00ea0 ("KVM: arm64: nv: Ensure correct VL is loaded before saving SVE state") Signed-off-by: Mark Rutland mark.rutland@arm.com Reviewed-by: Mark Brown broonie@kernel.org Tested-by: Mark Brown broonie@kernel.org Cc: Catalin Marinas catalin.marinas@arm.com Cc: Fuad Tabba tabba@google.com Cc: Marc Zyngier maz@kernel.org Cc: Oliver Upton oliver.upton@linux.dev Cc: Will Deacon will@kernel.org Reviewed-by: Oliver Upton oliver.upton@linux.dev Link: https://lore.kernel.org/r/20250210195226.1215254-9-mark.rutland@arm.com Signed-off-by: Marc Zyngier maz@kernel.org [ v6.6 lacks pKVM saving of host SVE state, pull in discovery of maximum host VL separately -- broonie ] Signed-off-by: Mark Brown broonie@kernel.org --- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/include/asm/kvm_hyp.h | 7 +++++ arch/arm64/kvm/fpsimd.c | 19 ++++++------ arch/arm64/kvm/hyp/entry.S | 5 +++ arch/arm64/kvm/hyp/include/hyp/switch.h | 55 +++++++++++++++++++++++++++++++++ arch/arm64/kvm/hyp/nvhe/hyp-main.c | 9 +++--- arch/arm64/kvm/hyp/nvhe/switch.c | 31 ++++++++++--------- arch/arm64/kvm/hyp/vhe/switch.c | 4 +++ arch/arm64/kvm/reset.c | 3 ++ 9 files changed, 106 insertions(+), 28 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2e0952134e2e..6d7b6b5d076d 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -64,6 +64,7 @@ enum kvm_mode kvm_get_mode(void); DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
extern unsigned int kvm_sve_max_vl; +extern unsigned int kvm_host_sve_max_vl; int kvm_arm_init_sve(void);
u32 __attribute_const__ kvm_target_cpu(void); diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 657d0c94cf82..308df86f9a4b 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -117,5 +117,12 @@ void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt);
extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val); +extern unsigned int kvm_nvhe_sym(kvm_host_sve_max_vl); + +static inline bool guest_owns_fp_regs(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.flags & KVM_ARM64_FP_ENABLED; +} +
#endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 1360ddd4137b..cfda503c8b3f 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -129,15 +129,16 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) local_irq_save(flags);
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { - if (vcpu_has_sve(vcpu)) { - __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR); - - /* Restore the VL that was saved when bound to the CPU */ - if (!has_vhe()) - sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, - SYS_ZCR_EL1); - } - + /* + * Flush (save and invalidate) the fpsimd/sve state so that if + * the host tries to use fpsimd/sve, it's not using stale data + * from the guest. + * + * Flushing the state sets the TIF_FOREIGN_FPSTATE bit for the + * context unconditionally, in both nVHE and VHE. This allows + * the kernel to restore the fpsimd/sve state, including ZCR_EL1 + * when needed. + */ fpsimd_save_and_flush_cpu_state(); }
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index 435346ea1504..d8c94c45cb2f 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -44,6 +44,11 @@ alternative_if ARM64_HAS_RAS_EXTN alternative_else_nop_endif mrs x1, isr_el1 cbz x1, 1f + + // Ensure that __guest_enter() always provides a context + // synchronization event so that callers don't need ISBs for anything + // that would usually be synchonized by the ERET. + isb mov x0, #ARM_EXCEPTION_IRQ ret
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index cc102e46b0e2..797544662a95 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -215,6 +215,61 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu) write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR); }
+static inline void fpsimd_lazy_switch_to_guest(struct kvm_vcpu *vcpu) +{ + u64 zcr_el1, zcr_el2; + + if (!guest_owns_fp_regs(vcpu)) + return; + + if (vcpu_has_sve(vcpu)) { + zcr_el2 = vcpu_sve_max_vq(vcpu) - 1; + + write_sysreg_el2(zcr_el2, SYS_ZCR); + + zcr_el1 = __vcpu_sys_reg(vcpu, ZCR_EL1); + write_sysreg_el1(zcr_el1, SYS_ZCR); + } +} + +static inline void fpsimd_lazy_switch_to_host(struct kvm_vcpu *vcpu) +{ + u64 zcr_el1, zcr_el2; + + if (!guest_owns_fp_regs(vcpu)) + return; + + /* + * When the guest owns the FP regs, we know that guest+hyp traps for + * any FPSIMD/SVE/SME features exposed to the guest have been disabled + * by either fpsimd_lazy_switch_to_guest() or kvm_hyp_handle_fpsimd() + * prior to __guest_entry(). As __guest_entry() guarantees a context + * synchronization event, we don't need an ISB here to avoid taking + * traps for anything that was exposed to the guest. + */ + if (vcpu_has_sve(vcpu)) { + zcr_el1 = read_sysreg_el1(SYS_ZCR); + __vcpu_sys_reg(vcpu, ZCR_EL1) = zcr_el1; + + /* + * The guest's state is always saved using the guest's max VL. + * Ensure that the host has the guest's max VL active such that + * the host can save the guest's state lazily, but don't + * artificially restrict the host to the guest's max VL. + */ + if (has_vhe()) { + zcr_el2 = vcpu_sve_max_vq(vcpu) - 1; + write_sysreg_el2(zcr_el2, SYS_ZCR); + } else { + zcr_el2 = sve_vq_from_vl(kvm_host_sve_max_vl) - 1; + write_sysreg_el2(zcr_el2, SYS_ZCR); + + zcr_el1 = vcpu_sve_max_vq(vcpu) - 1; + write_sysreg_el1(zcr_el1, SYS_ZCR); + } + } +} + /* Check for an FPSIMD/SVE trap and handle as appropriate */ static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) { diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 2da6aa8da868..a446883d5b9a 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -19,13 +19,17 @@
DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
+unsigned int kvm_host_sve_max_vl; + void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt);
static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1);
+ fpsimd_lazy_switch_to_guest(kern_hyp_va(vcpu)); cpu_reg(host_ctxt, 1) = __kvm_vcpu_run(kern_hyp_va(vcpu)); + fpsimd_lazy_switch_to_host(kern_hyp_va(vcpu)); }
static void handle___kvm_adjust_pc(struct kvm_cpu_context *host_ctxt) @@ -237,11 +241,6 @@ void handle_trap(struct kvm_cpu_context *host_ctxt) case ESR_ELx_EC_SMC64: handle_host_smc(host_ctxt); break; - case ESR_ELx_EC_SVE: - sysreg_clear_set(cptr_el2, CPTR_EL2_TZ, 0); - isb(); - sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2); - break; case ESR_ELx_EC_IABT_LOW: case ESR_ELx_EC_DABT_LOW: handle_host_mem_abort(host_ctxt); diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index c0885197f2a5..fff7491d8351 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -34,15 +34,13 @@ DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data); DEFINE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); DEFINE_PER_CPU(unsigned long, kvm_hyp_vector);
-static bool guest_owns_fp_regs(struct kvm_vcpu *vcpu) -{ - return vcpu->arch.flags & KVM_ARM64_FP_ENABLED; -} - static void __activate_cptr_traps(struct kvm_vcpu *vcpu) { u64 val = CPTR_EL2_TAM; /* Same bit irrespective of E2H */
+ if (!guest_owns_fp_regs(vcpu)) + __activate_traps_fpsimd32(vcpu); + /* !hVHE case upstream */ if (1) { val |= CPTR_EL2_TTA | CPTR_NVHE_EL2_RES1; @@ -52,12 +50,22 @@ static void __activate_cptr_traps(struct kvm_vcpu *vcpu)
if (!guest_owns_fp_regs(vcpu)) val |= CPTR_EL2_TFP; + + write_sysreg(val, cptr_el2); } +}
- if (!guest_owns_fp_regs(vcpu)) - __activate_traps_fpsimd32(vcpu); +static void __deactivate_cptr_traps(struct kvm_vcpu *vcpu) +{ + /* !hVHE case upstream */ + if (1) { + u64 val = CPTR_NVHE_EL2_RES1;
- write_sysreg(val, cptr_el2); + if (!cpus_have_final_cap(ARM64_SVE)) + val |= CPTR_EL2_TZ; + + write_sysreg(val, cptr_el2); + } }
static void __activate_traps(struct kvm_vcpu *vcpu) @@ -86,7 +94,6 @@ static void __activate_traps(struct kvm_vcpu *vcpu) static void __deactivate_traps(struct kvm_vcpu *vcpu) { extern char __kvm_hyp_host_vector[]; - u64 cptr;
___deactivate_traps(vcpu);
@@ -111,11 +118,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2);
- cptr = CPTR_EL2_DEFAULT; - if (vcpu_has_sve(vcpu) && (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)) - cptr |= CPTR_EL2_TZ; - - write_sysreg(cptr, cptr_el2); + __deactivate_cptr_traps(vcpu); write_sysreg(__kvm_hyp_host_vector, vbar_el2); }
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 813e6e2178c1..d8a8628a9d70 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -114,6 +114,8 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
sysreg_save_host_state_vhe(host_ctxt);
+ fpsimd_lazy_switch_to_guest(vcpu); + /* * ARM erratum 1165522 requires us to configure both stage 1 and * stage 2 translation for the guest context before we clear @@ -144,6 +146,8 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
__deactivate_traps(vcpu);
+ fpsimd_lazy_switch_to_host(vcpu); + sysreg_restore_host_state_vhe(host_ctxt);
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 5ce36b0a3343..deb205638279 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -42,11 +42,14 @@ static u32 kvm_ipa_limit; PSR_AA32_I_BIT | PSR_AA32_F_BIT)
unsigned int kvm_sve_max_vl; +unsigned int kvm_host_sve_max_vl;
int kvm_arm_init_sve(void) { if (system_supports_sve()) { kvm_sve_max_vl = sve_max_virtualisable_vl; + kvm_host_sve_max_vl = sve_max_vl; + kvm_nvhe_sym(kvm_host_sve_max_vl) = kvm_host_sve_max_vl;
/* * The get_sve_reg()/set_sve_reg() ioctl interface will need
linux-stable-mirror@lists.linaro.org