This series enables support for the data processing extensions in the newly released 2023 architecture, this is mainly support for 8 bit floating point formats. Most of the extensions only introduce new instructions and therefore only require hwcaps but there is a new EL0 visible control register FPMR used to control the 8 bit floating point formats, we need to manage traps for this and context switch it.
The sharing of floating point save code between the host and guest kernels slightly complicates the introduction of KVM support, we first introduce host support with some placeholders for KVM then replace those with the actual KVM support.
I've not added test coverage for ptrace, I've got a not quite finished test program which exercises all the FP ptrace interfaces and their interactions together, my plan is to cover it there rather than add another tiny test program that duplicates the boilerplace for tracing a target and doesn't actually run the traced program.
Signed-off-by: Mark Brown broonie@kernel.org --- Mark Brown (21): arm64/sysreg: Add definition for ID_AA64PFR2_EL1 arm64/sysreg: Update ID_AA64ISAR2_EL1 defintion for DDI0601 2023-09 arm64/sysreg: Add definition for ID_AA64ISAR3_EL1 arm64/sysreg: Add definition for ID_AA64FPFR0_EL1 arm64/sysreg: Update ID_AA64SMFR0_EL1 definition for DDI0601 2023-09 arm64/sysreg: Update SCTLR_EL1 for DDI0601 2023-09 arm64/sysreg: Update HCRX_EL2 definition for DDI0601 2023-09 arm64/sysreg: Add definition for FPMR arm64/cpufeature: Hook new identification registers up to cpufeature arm64/fpsimd: Enable host kernel access to FPMR arm64/fpsimd: Support FEAT_FPMR arm64/signal: Add FPMR signal handling arm64/ptrace: Expose FPMR via ptrace KVM: arm64: Add newly allocated ID registers to register descriptions KVM: arm64: Support FEAT_FPMR for guests arm64/hwcap: Define hwcaps for 2023 DPISA features kselftest/arm64: Handle FPMR context in generic signal frame parser kselftest/arm64: Add basic FPMR test kselftest/arm64: Add 2023 DPISA hwcap test coverage KVM: arm64: selftests: Document feature registers added in 2023 extensions KVM: arm64: selftests: Teach get-reg-list about FPMR
Documentation/arch/arm64/elf_hwcaps.rst | 49 +++++ arch/arm64/include/asm/cpu.h | 3 + arch/arm64/include/asm/cpufeature.h | 5 + arch/arm64/include/asm/fpsimd.h | 2 + arch/arm64/include/asm/hwcap.h | 15 ++ arch/arm64/include/asm/kvm_arm.h | 4 +- arch/arm64/include/asm/kvm_host.h | 3 + arch/arm64/include/asm/processor.h | 2 + arch/arm64/include/uapi/asm/hwcap.h | 15 ++ arch/arm64/include/uapi/asm/sigcontext.h | 8 + arch/arm64/kernel/cpufeature.c | 72 +++++++ arch/arm64/kernel/cpuinfo.c | 18 ++ arch/arm64/kernel/fpsimd.c | 13 ++ arch/arm64/kernel/ptrace.c | 42 ++++ arch/arm64/kernel/signal.c | 59 ++++++ arch/arm64/kvm/fpsimd.c | 19 +- arch/arm64/kvm/hyp/include/hyp/switch.h | 7 +- arch/arm64/kvm/sys_regs.c | 17 +- arch/arm64/tools/cpucaps | 1 + arch/arm64/tools/sysreg | 153 ++++++++++++++- include/uapi/linux/elf.h | 1 + tools/testing/selftests/arm64/abi/hwcap.c | 217 +++++++++++++++++++++ tools/testing/selftests/arm64/signal/.gitignore | 1 + .../arm64/signal/testcases/fpmr_siginfo.c | 82 ++++++++ .../selftests/arm64/signal/testcases/testcases.c | 8 + .../selftests/arm64/signal/testcases/testcases.h | 1 + tools/testing/selftests/kvm/aarch64/get-reg-list.c | 11 +- 27 files changed, 810 insertions(+), 18 deletions(-) --- base-commit: 05d3ef8bba77c1b5f98d941d8b2d4aeab8118ef1 change-id: 20231003-arm64-2023-dpisa-2f3d25746474
Best regards,
DDI0601 2023-09 defines a new system register ID_AA64PFR2_EL1 which enumerates FPMR and some new MTE features. Add a definition of this register.
Signed-off-by: Mark Brown broonie@kernel.org --- arch/arm64/tools/sysreg | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+)
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 76ce150e7347..0ea93d166f48 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -1002,6 +1002,27 @@ UnsignedEnum 3:0 BT EndEnum EndSysreg
+Sysreg ID_AA64PFR2_EL1 3 0 0 4 2 +Res0 63:36 +UnsignedEnum 35:32 FPMR + 0b0000 NI + 0b0001 IMP +EndEnum +Res0 31:12 +UnsignedEnum 11:8 MTEFAR + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 7:4 MTESTOREONLY + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 3:0 MTEPERM + 0b0000 NI + 0b0001 IMP +EndEnum +EndSysreg + Sysreg ID_AA64ZFR0_EL1 3 0 0 4 4 Res0 63:60 UnsignedEnum 59:56 F64MM
DDI0601 2023-09 defines some new fields in previously RES0 space in ID_AA64ISAR2_EL1, together with one new enum value. Update the system register definition to reflect this.
Signed-off-by: Mark Brown broonie@kernel.org --- arch/arm64/tools/sysreg | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 0ea93d166f48..fcca3a3714b0 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -1359,7 +1359,14 @@ EndEnum EndSysreg
Sysreg ID_AA64ISAR2_EL1 3 0 0 6 2 -Res0 63:56 +UnsignedEnum 63:60 ATS1A + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 59:56 LUT + 0b0000 NI + 0b0001 IMP +EndEnum UnsignedEnum 55:52 CSSC 0b0000 NI 0b0001 IMP @@ -1368,7 +1375,19 @@ UnsignedEnum 51:48 RPRFM 0b0000 NI 0b0001 IMP EndEnum -Res0 47:32 +Res0 47:44 +UnsignedEnum 43:40 PRFMSLC + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 39:36 SYSINSTR_128 + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 35:32 SYSREG_128 + 0b0000 NI + 0b0001 IMP +EndEnum UnsignedEnum 31:28 CLRBHB 0b0000 NI 0b0001 IMP @@ -1392,6 +1411,7 @@ UnsignedEnum 15:12 APA3 0b0011 PAuth2 0b0100 FPAC 0b0101 FPACCOMBINE + 0b0110 PAUTH_LR EndEnum UnsignedEnum 11:8 GPA3 0b0000 NI
DDI0601 2023-09 adds a new system register ID_AA64ISAR3_EL1 enumerating new floating point and TLB invalidation features. Add a defintion for it.
Signed-off-by: Mark Brown broonie@kernel.org --- arch/arm64/tools/sysreg | 17 +++++++++++++++++ 1 file changed, 17 insertions(+)
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index fcca3a3714b0..0515006a7292 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -1427,6 +1427,23 @@ UnsignedEnum 3:0 WFxT EndEnum EndSysreg
+Sysreg ID_AA64ISAR3_EL1 3 0 0 6 3 +Res0 63:12 +UnsignedEnum 11:8 TLBIW + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 7:4 FAMINMAX + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 3:0 CPA + 0b0000 NI + 0b0001 IMP + 0b0010 CPA2 +EndEnum +EndSysreg + Sysreg ID_AA64MMFR0_EL1 3 0 0 7 0 UnsignedEnum 63:60 ECV 0b0000 NI
DDI0601 2023-09 defines a new feature register ID_AA64FPFR0_EL1 which enumerates a number of FP8 related features. Add a definition for it.
Signed-off-by: Mark Brown broonie@kernel.org --- arch/arm64/tools/sysreg | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+)
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 0515006a7292..057cd85d8c2c 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -1127,6 +1127,35 @@ EndEnum Res0 31:0 EndSysreg
+Sysreg ID_AA64FPFR0_EL1 3 0 0 4 7 +Res0 63:32 +UnsignedEnum 31 F8CVT + 0b0 NI + 0b1 IMP +EndEnum +UnsignedEnum 30 F8FMA + 0b0 NI + 0b1 IMP +EndEnum +UnsignedEnum 29 F8DP4 + 0b0 NI + 0b1 IMP +EndEnum +UnsignedEnum 28 F8DP2 + 0b0 NI + 0b1 IMP +EndEnum +Res0 27:2 +UnsignedEnum 1 F8E4M3 + 0b0 NI + 0b1 IMP +EndEnum +UnsignedEnum 0 F8E5M2 + 0b0 NI + 0b1 IMP +EndEnum +EndSysreg + Sysreg ID_AA64DFR0_EL1 3 0 0 5 0 Enum 63:60 HPMN0 0b0000 UNPREDICTABLE
The 2023-09 release of DDI0601 defines a number of new feature enumeration fields in ID_AA64SMFR0_EL1. Add these fields.
Signed-off-by: Mark Brown broonie@kernel.org --- arch/arm64/tools/sysreg | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 057cd85d8c2c..91dd564ee4d3 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -1075,7 +1075,11 @@ UnsignedEnum 63 FA64 0b0 NI 0b1 IMP EndEnum -Res0 62:60 +Res0 62:61 +UnsignedEnum 60 LUTv2 + 0b0 NI + 0b1 IMP +EndEnum UnsignedEnum 59:56 SMEver 0b0000 SME 0b0001 SME2 @@ -1103,7 +1107,14 @@ UnsignedEnum 42 F16F16 0b0 NI 0b1 IMP EndEnum -Res0 41:40 +UnsignedEnum 41 F8F16 + 0b0 NI + 0b1 IMP +EndEnum +UnsignedEnum 40 F8F32 + 0b0 NI + 0b1 IMP +EndEnum UnsignedEnum 39:36 I8I32 0b0000 NI 0b1111 IMP @@ -1124,7 +1135,20 @@ UnsignedEnum 32 F32F32 0b0 NI 0b1 IMP EndEnum -Res0 31:0 +Res0 31 +UnsignedEnum 30 SF8FMA + 0b0 NI + 0b1 IMP +EndEnum +UnsignedEnum 29 SF8DP4 + 0b0 NI + 0b1 IMP +EndEnum +UnsignedEnum 28 SF8DP2 + 0b0 NI + 0b1 IMP +EndEnum +Res0 27:0 EndSysreg
Sysreg ID_AA64FPFR0_EL1 3 0 0 4 7
DDI0601 2023-09 defines some new fields in SCTLR_EL1 controlling new MTE and floating point features. Update our sysreg definition to reflect these.
Signed-off-by: Mark Brown broonie@kernel.org --- arch/arm64/tools/sysreg | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 91dd564ee4d3..97d0da472328 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -1785,7 +1785,8 @@ Field 63 TIDCP Field 62 SPINTMASK Field 61 NMI Field 60 EnTP2 -Res0 59:58 +Field 59 TCSO +Field 58 TCSO0 Field 57 EPAN Field 56 EnALS Field 55 EnAS0 @@ -1814,7 +1815,7 @@ EndEnum Field 37 ITFSB Field 36 BT1 Field 35 BT0 -Res0 34 +Field 34 EnFPM Field 33 MSCEn Field 32 CMOW Field 31 EnIA
DDI0601 2023-09 defines new fields in HCRX_EL2 controlling access to new system registers, update our definition of HCRX_EL2 to reflect this.
Signed-off-by: Mark Brown broonie@kernel.org --- arch/arm64/tools/sysreg | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 97d0da472328..e603a6153527 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -2406,7 +2406,9 @@ Fields ZCR_ELx EndSysreg
Sysreg HCRX_EL2 3 4 1 2 2 -Res0 63:23 +Res0 63:25 +Field 24 PACMEn +Field 23 EnFPM Field 22 GCSEn Field 21 EnIDCP128 Field 20 EnSDERR
DDI0601 2023-09 defines a new sysrem register FPMR (Floating Point Mode Register) which configures the new FP8 features. Add a definition of this register.
Signed-off-by: Mark Brown broonie@kernel.org --- arch/arm64/tools/sysreg | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+)
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index e603a6153527..3e4cb8a141a3 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -2132,6 +2132,29 @@ Field 1 ZA Field 0 SM EndSysreg
+Sysreg FPMR 3 3 4 4 2 +Res0 63:38 +Field 37:32 LSCALE2 +Field 31:24 NSCALE +Res0 23 +Field 22:16 LSCALE +Field 15 OSC +Field 14 OSM +Res0 13:9 +UnsignedEnum 8:6 F8D + 0b000 E5M2 + 0b001 E4M3 +EndEnum +UnsignedEnum 5:3 F8S2 + 0b000 E5M2 + 0b001 E4M3 +EndEnum +UnsignedEnum 2:0 F8S1 + 0b000 E5M2 + 0b001 E4M3 +EndEnum +EndSysreg + SysregFields HFGxTR_EL2 Field 63 nAMAIR2_EL1 Field 62 nMAIR2_EL1
The 2023 architecture extensions have defined several new ID registers, hook them up to the cpufeature code so we can add feature checks and hwcaps based on their contents.
Signed-off-by: Mark Brown broonie@kernel.org --- arch/arm64/include/asm/cpu.h | 3 +++ arch/arm64/kernel/cpufeature.c | 28 ++++++++++++++++++++++++++++ arch/arm64/kernel/cpuinfo.c | 3 +++ 3 files changed, 34 insertions(+)
diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index e749838b9c5d..60db584ded55 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -53,14 +53,17 @@ struct cpuinfo_arm64 { u64 reg_id_aa64isar0; u64 reg_id_aa64isar1; u64 reg_id_aa64isar2; + u64 reg_id_aa64isar3; u64 reg_id_aa64mmfr0; u64 reg_id_aa64mmfr1; u64 reg_id_aa64mmfr2; u64 reg_id_aa64mmfr3; u64 reg_id_aa64pfr0; u64 reg_id_aa64pfr1; + u64 reg_id_aa64pfr2; u64 reg_id_aa64zfr0; u64 reg_id_aa64smfr0; + u64 reg_id_aa64fpfr0;
struct cpuinfo_32bit aarch32;
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 444a73c2e638..c1658d893e0e 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -234,6 +234,10 @@ static const struct arm64_ftr_bits ftr_id_aa64isar2[] = { ARM64_FTR_END, };
+static const struct arm64_ftr_bits ftr_id_aa64isar3[] = { + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_CSV3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_CSV2_SHIFT, 4, 0), @@ -267,6 +271,10 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { ARM64_FTR_END, };
+static const struct arm64_ftr_bits ftr_id_aa64pfr2[] = { + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = { ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_F64MM_SHIFT, 4, 0), @@ -317,6 +325,10 @@ static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = { ARM64_FTR_END, };
+static const struct arm64_ftr_bits ftr_id_aa64fpfr0[] = { + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_ECV_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_FGT_SHIFT, 4, 0), @@ -712,10 +724,12 @@ static const struct __ftr_reg_entry { &id_aa64pfr0_override), ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1, &id_aa64pfr1_override), + ARM64_FTR_REG(SYS_ID_AA64PFR2_EL1, ftr_id_aa64pfr2), ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0, &id_aa64zfr0_override), ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64SMFR0_EL1, ftr_id_aa64smfr0, &id_aa64smfr0_override), + ARM64_FTR_REG(SYS_ID_AA64FPFR0_EL1, ftr_id_aa64fpfr0),
/* Op1 = 0, CRn = 0, CRm = 5 */ ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0), @@ -727,6 +741,7 @@ static const struct __ftr_reg_entry { &id_aa64isar1_override), ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2, &id_aa64isar2_override), + ARM64_FTR_REG(SYS_ID_AA64ISAR3_EL1, ftr_id_aa64isar3),
/* Op1 = 0, CRn = 0, CRm = 7 */ ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0), @@ -1026,14 +1041,17 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_ID_AA64ISAR0_EL1, info->reg_id_aa64isar0); init_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1); init_cpu_ftr_reg(SYS_ID_AA64ISAR2_EL1, info->reg_id_aa64isar2); + init_cpu_ftr_reg(SYS_ID_AA64ISAR3_EL1, info->reg_id_aa64isar3); init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0); init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1); init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2); init_cpu_ftr_reg(SYS_ID_AA64MMFR3_EL1, info->reg_id_aa64mmfr3); init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0); init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1); + init_cpu_ftr_reg(SYS_ID_AA64PFR2_EL1, info->reg_id_aa64pfr2); init_cpu_ftr_reg(SYS_ID_AA64ZFR0_EL1, info->reg_id_aa64zfr0); init_cpu_ftr_reg(SYS_ID_AA64SMFR0_EL1, info->reg_id_aa64smfr0); + init_cpu_ftr_reg(SYS_ID_AA64FPFR0_EL1, info->reg_id_aa64fpfr0);
if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) init_32bit_cpu_features(&info->aarch32); @@ -1263,6 +1281,8 @@ void update_cpu_features(int cpu, info->reg_id_aa64isar1, boot->reg_id_aa64isar1); taint |= check_update_ftr_reg(SYS_ID_AA64ISAR2_EL1, cpu, info->reg_id_aa64isar2, boot->reg_id_aa64isar2); + taint |= check_update_ftr_reg(SYS_ID_AA64ISAR3_EL1, cpu, + info->reg_id_aa64isar3, boot->reg_id_aa64isar3);
/* * Differing PARange support is fine as long as all peripherals and @@ -1282,6 +1302,8 @@ void update_cpu_features(int cpu, info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0); taint |= check_update_ftr_reg(SYS_ID_AA64PFR1_EL1, cpu, info->reg_id_aa64pfr1, boot->reg_id_aa64pfr1); + taint |= check_update_ftr_reg(SYS_ID_AA64PFR2_EL1, cpu, + info->reg_id_aa64pfr2, boot->reg_id_aa64pfr2);
taint |= check_update_ftr_reg(SYS_ID_AA64ZFR0_EL1, cpu, info->reg_id_aa64zfr0, boot->reg_id_aa64zfr0); @@ -1289,6 +1311,9 @@ void update_cpu_features(int cpu, taint |= check_update_ftr_reg(SYS_ID_AA64SMFR0_EL1, cpu, info->reg_id_aa64smfr0, boot->reg_id_aa64smfr0);
+ taint |= check_update_ftr_reg(SYS_ID_AA64FPFR0_EL1, cpu, + info->reg_id_aa64fpfr0, boot->reg_id_aa64fpfr0); + if (IS_ENABLED(CONFIG_ARM64_SVE) && id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) { info->reg_zcr = read_zcr_features(); @@ -1399,8 +1424,10 @@ u64 __read_sysreg_by_encoding(u32 sys_id)
read_sysreg_case(SYS_ID_AA64PFR0_EL1); read_sysreg_case(SYS_ID_AA64PFR1_EL1); + read_sysreg_case(SYS_ID_AA64PFR2_EL1); read_sysreg_case(SYS_ID_AA64ZFR0_EL1); read_sysreg_case(SYS_ID_AA64SMFR0_EL1); + read_sysreg_case(SYS_ID_AA64FPFR0_EL1); read_sysreg_case(SYS_ID_AA64DFR0_EL1); read_sysreg_case(SYS_ID_AA64DFR1_EL1); read_sysreg_case(SYS_ID_AA64MMFR0_EL1); @@ -1410,6 +1437,7 @@ u64 __read_sysreg_by_encoding(u32 sys_id) read_sysreg_case(SYS_ID_AA64ISAR0_EL1); read_sysreg_case(SYS_ID_AA64ISAR1_EL1); read_sysreg_case(SYS_ID_AA64ISAR2_EL1); + read_sysreg_case(SYS_ID_AA64ISAR3_EL1);
read_sysreg_case(SYS_CNTFRQ_EL0); read_sysreg_case(SYS_CTR_EL0); diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 98fda8500535..e153c6d2b3fd 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -445,14 +445,17 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_id_aa64isar0 = read_cpuid(ID_AA64ISAR0_EL1); info->reg_id_aa64isar1 = read_cpuid(ID_AA64ISAR1_EL1); info->reg_id_aa64isar2 = read_cpuid(ID_AA64ISAR2_EL1); + info->reg_id_aa64isar3 = read_cpuid(ID_AA64ISAR3_EL1); info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1); info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1); info->reg_id_aa64mmfr2 = read_cpuid(ID_AA64MMFR2_EL1); info->reg_id_aa64mmfr3 = read_cpuid(ID_AA64MMFR3_EL1); info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1); info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1); + info->reg_id_aa64pfr2 = read_cpuid(ID_AA64PFR2_EL1); info->reg_id_aa64zfr0 = read_cpuid(ID_AA64ZFR0_EL1); info->reg_id_aa64smfr0 = read_cpuid(ID_AA64SMFR0_EL1); + info->reg_id_aa64fpfr0 = read_cpuid(ID_AA64FPFR0_EL1);
if (id_aa64pfr1_mte(info->reg_id_aa64pfr1)) info->reg_gmid = read_cpuid(GMID_EL1);
FEAT_FPMR provides a new generally accessible architectural register FPMR. This is only accessible to EL0 and EL1 when HCRX_EL2.EnFPM is set to 1, do this when the host is running. The guest part will be done along with context switching the new register and exposing it via guest management.
Signed-off-by: Mark Brown broonie@kernel.org --- arch/arm64/include/asm/kvm_arm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 1095c6647e96..ef033c6c745c 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -103,7 +103,7 @@ #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
#define HCRX_GUEST_FLAGS (HCRX_EL2_SMPME | HCRX_EL2_TCR2En) -#define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En) +#define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM)
/* TCR_EL2 Registers bits */ #define TCR_EL2_RES1 ((1U << 31) | (1 << 23))
FEAT_FPMR defines a new EL0 accessible register FPMR use to configure the FP8 related features added to the architecture at the same time. Detect support for this register and context switch it for EL0 when present.
Due to the sharing of responsibility for saving floating point state between the host kernel and KVM FP8 support is not yet implemented in KVM and a stub similar to that used for SVCR is provided for FPMR in order to avoid bisection issues. To make it easier to share host state with the hypervisor we store FPMR immediately after the base floating point state, existing usage means that it is not practical to extend that directly.
Signed-off-by: Mark Brown broonie@kernel.org --- arch/arm64/include/asm/cpufeature.h | 5 +++++ arch/arm64/include/asm/fpsimd.h | 2 ++ arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/include/asm/processor.h | 2 ++ arch/arm64/kernel/cpufeature.c | 9 +++++++++ arch/arm64/kernel/fpsimd.c | 13 +++++++++++++ arch/arm64/kvm/fpsimd.c | 1 + arch/arm64/tools/cpucaps | 1 + 8 files changed, 34 insertions(+)
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 5bba39376055..9b93ba4a5a3d 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -779,6 +779,11 @@ static __always_inline bool system_supports_tpidr2(void) return system_supports_sme(); }
+static __always_inline bool system_supports_fpmr(void) +{ + return cpus_have_const_cap(ARM64_HAS_FPMR); +} + static __always_inline bool system_supports_cnp(void) { return IS_ENABLED(CONFIG_ARM64_CNP) && diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 8df46f186c64..5f921ebb7ca4 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -63,6 +63,7 @@ struct cpu_fp_state { void *sve_state; void *sme_state; u64 *svcr; + u64 *fpmr; unsigned int sve_vl; unsigned int sme_vl; enum fp_type *fp_type; @@ -127,6 +128,7 @@ extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused); extern void sme_kernel_enable(const struct arm64_cpu_capabilities *__unused); extern void sme2_kernel_enable(const struct arm64_cpu_capabilities *__unused); extern void fa64_kernel_enable(const struct arm64_cpu_capabilities *__unused); +extern void fpmr_kernel_enable(const struct arm64_cpu_capabilities *__unused);
extern u64 read_zcr_features(void); extern u64 read_smcr_features(void); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index af06ccb7ee34..2151c1cbc53d 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -481,6 +481,7 @@ struct kvm_vcpu_arch { enum fp_type fp_type; unsigned int sve_max_vl; u64 svcr; + u64 fpmr;
/* Stage 2 paging state used by the hardware on next switch */ struct kvm_s2_mmu *hw_mmu; diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index e5bc54522e71..dd3a5b29f76e 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -158,6 +158,8 @@ struct thread_struct { struct user_fpsimd_state fpsimd_state; } uw;
+ u64 fpmr; /* Adjacent to fpsimd_state for KVM */ + enum fp_type fp_type; /* registers FPSIMD or SVE? */ unsigned int fpsimd_cpu; void *sve_state; /* SVE registers, if any */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index c1658d893e0e..8e8cd411d1a2 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -272,6 +272,7 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { };
static const struct arm64_ftr_bits ftr_id_aa64pfr2[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR2_EL1_FPMR_SHIFT, 4, 0), ARM64_FTR_END, };
@@ -2747,6 +2748,14 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, EVT, IMP) }, + { + .desc = "FPMR", + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .capability = ARM64_HAS_FPMR, + .matches = has_cpuid_feature, + .cpu_enable = fpmr_kernel_enable, + ARM64_CPUID_FIELDS(ID_AA64PFR2_EL1, FPMR, IMP) + }, {}, };
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 91e44ac7150f..88a541b14d39 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -385,6 +385,9 @@ static void task_fpsimd_load(void) WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context());
+ if (system_supports_fpmr()) + write_sysreg_s(current->thread.fpmr, SYS_FPMR); + if (system_supports_sve() || system_supports_sme()) { switch (current->thread.fp_type) { case FP_STATE_FPSIMD: @@ -472,6 +475,9 @@ static void fpsimd_save(void) if (test_thread_flag(TIF_FOREIGN_FPSTATE)) return;
+ if (system_supports_fpmr()) + *(last->fpmr) = read_sysreg_s(SYS_FPMR); + /* * If a task is in a syscall the ABI allows us to only * preserve the state shared with FPSIMD so don't bother @@ -716,6 +722,12 @@ static void sve_to_fpsimd(struct task_struct *task) } }
+void fpmr_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) +{ + write_sysreg_s(read_sysreg_s(SYS_SCTLR_EL1) | SCTLR_EL1_EnFPM_MASK, + SYS_SCTLR_EL1); +} + #ifdef CONFIG_ARM64_SVE /* * Call __sve_free() directly only if you know task can't be scheduled @@ -1729,6 +1741,7 @@ static void fpsimd_bind_task_to_cpu(void) last->sve_vl = task_get_sve_vl(current); last->sme_vl = task_get_sme_vl(current); last->svcr = ¤t->thread.svcr; + last->fpmr = ¤t->thread.fpmr; last->fp_type = ¤t->thread.fp_type; last->to_save = FP_STATE_CURRENT; current->thread.fpsimd_cpu = smp_processor_id(); diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 8c1d0d4853df..e3e611e30e91 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -153,6 +153,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) fp_state.sve_vl = vcpu->arch.sve_max_vl; fp_state.sme_state = NULL; fp_state.svcr = &vcpu->arch.svcr; + fp_state.fpmr = &vcpu->arch.fpmr; fp_state.fp_type = &vcpu->arch.fp_type;
if (vcpu_has_sve(vcpu)) diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index dea3dc89234b..11b0e3360a2a 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -26,6 +26,7 @@ HAS_ECV HAS_ECV_CNTPOFF HAS_EPAN HAS_EVT +HAS_FPMR HAS_FGT HAS_GENERIC_AUTH HAS_GENERIC_AUTH_ARCH_QARMA3
Expose FPMR in the signal context on systems where it is supported. The kernel validates the exact size of the FPSIMD registers so we can't readily add it to fpsimd_context without disruption.
Signed-off-by: Mark Brown broonie@kernel.org --- arch/arm64/include/uapi/asm/sigcontext.h | 8 +++++ arch/arm64/kernel/signal.c | 59 ++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+)
diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h index f23c1dc3f002..8a45b7a411e0 100644 --- a/arch/arm64/include/uapi/asm/sigcontext.h +++ b/arch/arm64/include/uapi/asm/sigcontext.h @@ -152,6 +152,14 @@ struct tpidr2_context { __u64 tpidr2; };
+/* FPMR context */ +#define FPMR_MAGIC 0x46504d52 + +struct fpmr_context { + struct _aarch64_ctx head; + __u64 fpmr; +}; + #define ZA_MAGIC 0x54366345
struct za_context { diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 0e8beb3349ea..e8c808afcc8a 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -60,6 +60,7 @@ struct rt_sigframe_user_layout { unsigned long tpidr2_offset; unsigned long za_offset; unsigned long zt_offset; + unsigned long fpmr_offset; unsigned long extra_offset; unsigned long end_offset; }; @@ -182,6 +183,8 @@ struct user_ctxs { u32 za_size; struct zt_context __user *zt; u32 zt_size; + struct fpmr_context __user *fpmr; + u32 fpmr_size; };
static int preserve_fpsimd_context(struct fpsimd_context __user *ctx) @@ -227,6 +230,33 @@ static int restore_fpsimd_context(struct user_ctxs *user) return err ? -EFAULT : 0; }
+static int preserve_fpmr_context(struct fpmr_context __user *ctx) +{ + int err = 0; + + current->thread.fpmr = read_sysreg_s(SYS_FPMR); + + __put_user_error(FPMR_MAGIC, &ctx->head.magic, err); + __put_user_error(sizeof(*ctx), &ctx->head.size, err); + __put_user_error(current->thread.fpmr, &ctx->fpmr, err); + + return err; +} + +static int restore_fpmr_context(struct user_ctxs *user) +{ + u64 fpmr; + int err = 0; + + if (user->fpmr_size != sizeof(*user->fpmr)) + return -EINVAL; + + __get_user_error(fpmr, &user->fpmr->fpmr, err); + if (!err) + write_sysreg_s(fpmr, SYS_FPMR); + + return err; +}
#ifdef CONFIG_ARM64_SVE
@@ -590,6 +620,7 @@ static int parse_user_sigframe(struct user_ctxs *user, user->tpidr2 = NULL; user->za = NULL; user->zt = NULL; + user->fpmr = NULL;
if (!IS_ALIGNED((unsigned long)base, 16)) goto invalid; @@ -684,6 +715,17 @@ static int parse_user_sigframe(struct user_ctxs *user, user->zt_size = size; break;
+ case FPMR_MAGIC: + if (!system_supports_fpmr()) + goto invalid; + + if (user->fpmr) + goto invalid; + + user->fpmr = (struct fpmr_context __user *)head; + user->fpmr_size = size; + break; + case EXTRA_MAGIC: if (have_extra_context) goto invalid; @@ -806,6 +848,9 @@ static int restore_sigframe(struct pt_regs *regs, if (err == 0 && system_supports_tpidr2() && user.tpidr2) err = restore_tpidr2_context(&user);
+ if (err == 0 && system_supports_fpmr() && user.fpmr) + err = restore_fpmr_context(&user); + if (err == 0 && system_supports_sme() && user.za) err = restore_za_context(&user);
@@ -928,6 +973,13 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, } }
+ if (system_supports_fpmr()) { + err = sigframe_alloc(user, &user->fpmr_offset, + sizeof(struct fpmr_context)); + if (err) + return err; + } + return sigframe_alloc_end(user); }
@@ -983,6 +1035,13 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user, err |= preserve_tpidr2_context(tpidr2_ctx); }
+ /* FPMR if supported */ + if (system_supports_fpmr() && err == 0) { + struct fpmr_context __user *fpmr_ctx = + apply_user_offset(user, user->fpmr_offset); + err |= preserve_fpmr_context(fpmr_ctx); + } + /* ZA state if present */ if (system_supports_sme() && err == 0 && user->za_offset) { struct za_context __user *za_ctx =
Add a new regset to expose FPMR via ptrace. It is not added to the FPSIMD registers since that structure is exposed elsewhere without any allowance for extension we don't add there.
Signed-off-by: Mark Brown broonie@kernel.org --- arch/arm64/kernel/ptrace.c | 42 ++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/elf.h | 1 + 2 files changed, 43 insertions(+)
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 20d7ef82de90..cfb8a4d213be 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -697,6 +697,39 @@ static int tls_set(struct task_struct *target, const struct user_regset *regset, return ret; }
+static int fpmr_get(struct task_struct *target, const struct user_regset *regset, + struct membuf to) +{ + if (!system_supports_fpmr()) + return -EINVAL; + + if (target == current) + fpsimd_preserve_current_state(); + + return membuf_store(&to, target->thread.fpmr); +} + +static int fpmr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + unsigned long fpmr; + + if (!system_supports_fpmr()) + return -EINVAL; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &fpmr, 0, count); + if (ret) + return ret; + + target->thread.fpmr = fpmr; + + fpsimd_flush_task_state(target); + + return 0; +} + static int system_call_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) @@ -1417,6 +1450,7 @@ enum aarch64_regset { REGSET_HW_BREAK, REGSET_HW_WATCH, #endif + REGSET_FPMR, REGSET_SYSTEM_CALL, #ifdef CONFIG_ARM64_SVE REGSET_SVE, @@ -1495,6 +1529,14 @@ static const struct user_regset aarch64_regsets[] = { .regset_get = system_call_get, .set = system_call_set, }, + [REGSET_FPMR] = { + .core_note_type = NT_ARM_FPMR, + .n = 1, + .size = sizeof(u64), + .align = sizeof(u64), + .regset_get = fpmr_get, + .set = fpmr_set, + }, #ifdef CONFIG_ARM64_SVE [REGSET_SVE] = { /* Scalable Vector Extension */ .core_note_type = NT_ARM_SVE, diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 9b731976ce2f..4c8b4641a306 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -440,6 +440,7 @@ typedef struct elf64_shdr { #define NT_ARM_SSVE 0x40b /* ARM Streaming SVE registers */ #define NT_ARM_ZA 0x40c /* ARM SME ZA registers */ #define NT_ARM_ZT 0x40d /* ARM SME ZT registers */ +#define NT_ARM_FPMR 0x40e /* ARM floating point mode register */ #define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */ #define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */ #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */
The 2023 architecture extensions have allocated some new ID registers, add them to the KVM system register descriptions so that they are visible to guests.
Signed-off-by: Mark Brown broonie@kernel.org --- arch/arm64/kvm/sys_regs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 0afd6136e275..99cdaa594b06 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2016,12 +2016,12 @@ static const struct sys_reg_desc sys_reg_descs[] = { .reset = read_sanitised_id_aa64pfr0_el1, .val = ID_AA64PFR0_EL1_CSV2_MASK | ID_AA64PFR0_EL1_CSV3_MASK, }, ID_SANITISED(ID_AA64PFR1_EL1), - ID_UNALLOCATED(4,2), + ID_SANITISED(ID_AA64PFR2_EL1), ID_UNALLOCATED(4,3), ID_SANITISED(ID_AA64ZFR0_EL1), ID_HIDDEN(ID_AA64SMFR0_EL1), ID_UNALLOCATED(4,6), - ID_UNALLOCATED(4,7), + ID_SANITISED(ID_AA64FPFR0_EL1),
/* CRm=5 */ { SYS_DESC(SYS_ID_AA64DFR0_EL1), @@ -2042,7 +2042,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_SANITISED(ID_AA64ISAR0_EL1), ID_SANITISED(ID_AA64ISAR1_EL1), ID_SANITISED(ID_AA64ISAR2_EL1), - ID_UNALLOCATED(6,3), + ID_SANITISED(ID_AA64ISAR3_EL1), ID_UNALLOCATED(6,4), ID_UNALLOCATED(6,5), ID_UNALLOCATED(6,6),
FEAT_FPMR introduces a new system register FPMR which allows configuration of floating point behaviour, currently for FP8 specific features. Allow use of this in guests, disabling the trap while guests are running and saving and restoring the value along with the rest of the floating point state. Since FPMR is stored immediately after the main floating point state we share it with the hypervisor by adjusting the size of the shared region.
Access to FPMR is covered by both a register specific trap HCRX_EL2.EnFPM and the overall floating point access trap so we just unconditionally enable the FPMR specific trap and rely on the floating point access trap to detect guest floating point usage.
Signed-off-by: Mark Brown broonie@kernel.org --- arch/arm64/include/asm/kvm_arm.h | 2 +- arch/arm64/include/asm/kvm_host.h | 4 +++- arch/arm64/kvm/fpsimd.c | 20 +++++++++++++++++--- arch/arm64/kvm/hyp/include/hyp/switch.h | 7 ++++++- arch/arm64/kvm/sys_regs.c | 11 +++++++++++ 5 files changed, 38 insertions(+), 6 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index ef033c6c745c..2efc7107c9a1 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -102,7 +102,7 @@ #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
-#define HCRX_GUEST_FLAGS (HCRX_EL2_SMPME | HCRX_EL2_TCR2En) +#define HCRX_GUEST_FLAGS (HCRX_EL2_SMPME | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM) #define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM)
/* TCR_EL2 Registers bits */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2151c1cbc53d..ddd31b41bec7 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -355,6 +355,8 @@ enum vcpu_sysreg { CNTP_CVAL_EL0, CNTP_CTL_EL0,
+ FPMR, + /* Memory Tagging Extension registers */ RGSR_EL1, /* Random Allocation Tag Seed Register */ GCR_EL1, /* Tag Control Register */ @@ -481,7 +483,6 @@ struct kvm_vcpu_arch { enum fp_type fp_type; unsigned int sve_max_vl; u64 svcr; - u64 fpmr;
/* Stage 2 paging state used by the hardware on next switch */ struct kvm_s2_mmu *hw_mmu; @@ -540,6 +541,7 @@ struct kvm_vcpu_arch { struct kvm_guest_debug_arch external_debug_state;
struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */ + u64 *host_fpmr; /* hyp VA */ struct task_struct *parent_task;
struct { diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index e3e611e30e91..dee078625d0d 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -14,6 +14,16 @@ #include <asm/kvm_mmu.h> #include <asm/sysreg.h>
+static void *fpsimd_share_end(struct user_fpsimd_state *fpsimd) +{ + void *share_end = fpsimd + 1; + + if (cpus_have_final_cap(ARM64_HAS_FPMR)) + share_end += sizeof(u64); + + return share_end; +} + void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu) { struct task_struct *p = vcpu->arch.parent_task; @@ -23,7 +33,7 @@ void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu) return;
fpsimd = &p->thread.uw.fpsimd_state; - kvm_unshare_hyp(fpsimd, fpsimd + 1); + kvm_unshare_hyp(fpsimd, fpsimd_share_end(fpsimd)); put_task_struct(p); }
@@ -45,11 +55,15 @@ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu) kvm_vcpu_unshare_task_fp(vcpu);
/* Make sure the host task fpsimd state is visible to hyp: */ - ret = kvm_share_hyp(fpsimd, fpsimd + 1); + ret = kvm_share_hyp(fpsimd, fpsimd_share_end(fpsimd)); if (ret) return ret;
vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd); + if (cpus_have_final_cap(ARM64_HAS_FPMR)) { + WARN_ON_ONCE(¤t->thread.fpmr + 1 != fpsimd_share_end(fpsimd)); + vcpu->arch.host_fpmr = kern_hyp_va(¤t->thread.fpmr); + }
/* * We need to keep current's task_struct pinned until its data has been @@ -153,7 +167,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) fp_state.sve_vl = vcpu->arch.sve_max_vl; fp_state.sme_state = NULL; fp_state.svcr = &vcpu->arch.svcr; - fp_state.fpmr = &vcpu->arch.fpmr; + fp_state.fpmr = &__vcpu_sys_reg(vcpu, FPMR); fp_state.fp_type = &vcpu->arch.fp_type;
if (vcpu_has_sve(vcpu)) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 9cfe6bd1dbe4..4fad3c7d2d89 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -322,10 +322,15 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) isb();
/* Write out the host state if it's in the registers */ - if (vcpu->arch.fp_state == FP_STATE_HOST_OWNED) + if (vcpu->arch.fp_state == FP_STATE_HOST_OWNED) { __fpsimd_save_state(vcpu->arch.host_fpsimd_state); + if (cpus_have_final_cap(ARM64_HAS_FPMR)) + *vcpu->arch.host_fpmr = read_sysreg_s(SYS_FPMR); + }
/* Restore the guest state */ + if (cpus_have_final_cap(ARM64_HAS_FPMR)) + write_sysreg_s(__vcpu_sys_reg(vcpu, FPMR), SYS_FPMR); if (sve_guest) __hyp_sve_restore_guest(vcpu); else diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 99cdaa594b06..ad6c39178edc 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1806,6 +1806,15 @@ static unsigned int elx2_visibility(const struct kvm_vcpu *vcpu, .visibility = elx2_visibility, \ }
+static unsigned int fpmr_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + if (cpus_have_final_cap(ARM64_HAS_FPMR)) + return 0; + + return REG_HIDDEN; +} + /* * Since reset() callback and field val are not used for idregs, they will be * used for specific purposes for idregs. @@ -2165,6 +2174,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 }, { SYS_DESC(SYS_CTR_EL0), access_ctr }, { SYS_DESC(SYS_SVCR), undef_access }, + { SYS_DESC(SYS_FPMR), access_rw, reset_unknown, FPMR, + .visibility = fpmr_visibility },
{ PMU_SYS_REG(PMCR_EL0), .access = access_pmcr, .reset = reset_pmcr, .reg = PMCR_EL0 },
The 2023 architecture extensions include a large number of floating point features, most of which simply add new instructions. Add hwcaps so that userspace can enumerate these features.
Signed-off-by: Mark Brown broonie@kernel.org --- Documentation/arch/arm64/elf_hwcaps.rst | 49 +++++++++++++++++++++++++++++++++ arch/arm64/include/asm/hwcap.h | 15 ++++++++++ arch/arm64/include/uapi/asm/hwcap.h | 15 ++++++++++ arch/arm64/kernel/cpufeature.c | 35 +++++++++++++++++++++++ arch/arm64/kernel/cpuinfo.c | 15 ++++++++++ 5 files changed, 129 insertions(+)
diff --git a/Documentation/arch/arm64/elf_hwcaps.rst b/Documentation/arch/arm64/elf_hwcaps.rst index 76ff9d7398fd..777d8b868f98 100644 --- a/Documentation/arch/arm64/elf_hwcaps.rst +++ b/Documentation/arch/arm64/elf_hwcaps.rst @@ -308,6 +308,55 @@ HWCAP2_MOPS HWCAP2_HBC Functionality implied by ID_AA64ISAR2_EL1.BC == 0b0001.
+HWCAP2_FPMR + Functionality implied by ID_AA64PFR2_EL1.FMR == 0b0001. + +HWCAP2_LUT + Functionality implied by ID_AA64ISAR2_EL1.LUT == 0b0001. + +HWCAP2_FAMINMAX + Functionality implied by ID_AA64ISAR3_EL1.FAMINMAX == 0b0001. + +HWCAP2_F8CVT + Functionality implied by ID_AA64FPFR0_EL1.F8CVT == 0b1. + +HWCAP2_F8FMA + Functionality implied by ID_AA64FPFR0_EL1.F8FMA == 0b1. + +HWCAP2_F8DP4 + Functionality implied by ID_AA64FPFR0_EL1.F8DP4 == 0b1. + +HWCAP2_F8DP2 + Functionality implied by ID_AA64FPFR0_EL1.F8DP2 == 0b1. + +HWCAP2_F8E4M3 + Functionality implied by ID_AA64FPFR0_EL1.F8E4M3 == 0b1. + +HWCAP2_F8E5M2 + Functionality implied by ID_AA64FPFR0_EL1.F8E5M2 == 0b1. + +HWCAP2_SME_LUTV2 + Functionality implied by ID_AA64SMFR0_EL1.LUTv2 == 0b1. + +HWCAP2_SME_F8F16 + Functionality implied by ID_AA64SMFR0_EL1.F8F16 == 0b1. + +HWCAP2_SME_F8F32 + Functionality implied by ID_AA64SMFR0_EL1.F8F32 == 0b1. + +HWCAP2_SME_SF8FMA + Functionality implied by ID_AA64SMFR0_EL1.SF8FMA == 0b1. + +HWCAP2_SME_SF8DP4 + Functionality implied by ID_AA64SMFR0_EL1.SF8DP4 == 0b1. + +HWCAP2_SME_SF8DP2 + Functionality implied by ID_AA64SMFR0_EL1.SF8DP2 == 0b1. + +HWCAP2_SME_SF8DP4 + Functionality implied by ID_AA64SMFR0_EL1.SF8DP4 == 0b1. + + 4. Unused AT_HWCAP bits -----------------------
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 521267478d18..046978936d25 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -139,6 +139,21 @@ #define KERNEL_HWCAP_SME_F16F16 __khwcap2_feature(SME_F16F16) #define KERNEL_HWCAP_MOPS __khwcap2_feature(MOPS) #define KERNEL_HWCAP_HBC __khwcap2_feature(HBC) +#define KERNEL_HWCAP_FPMR __khwcap2_feature(FPMR) +#define KERNEL_HWCAP_LUT __khwcap2_feature(LUT) +#define KERNEL_HWCAP_FAMINMAX __khwcap2_feature(FAMINMAX) +#define KERNEL_HWCAP_F8CVT __khwcap2_feature(F8CVT) +#define KERNEL_HWCAP_F8FMA __khwcap2_feature(F8FMA) +#define KERNEL_HWCAP_F8DP4 __khwcap2_feature(F8DP4) +#define KERNEL_HWCAP_F8DP2 __khwcap2_feature(F8DP2) +#define KERNEL_HWCAP_F8E4M3 __khwcap2_feature(F8E4M3) +#define KERNEL_HWCAP_F8E5M2 __khwcap2_feature(F8E5M2) +#define KERNEL_HWCAP_SME_LUTV2 __khwcap2_feature(SME_LUTV2) +#define KERNEL_HWCAP_SME_F8F16 __khwcap2_feature(SME_F8F16) +#define KERNEL_HWCAP_SME_F8F32 __khwcap2_feature(SME_F8F32) +#define KERNEL_HWCAP_SME_SF8FMA __khwcap2_feature(SME_SF8FMA) +#define KERNEL_HWCAP_SME_SF8DP4 __khwcap2_feature(SME_SF8DP4) +#define KERNEL_HWCAP_SME_SF8DP2 __khwcap2_feature(SME_SF8DP2)
/* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 53026f45a509..0f0aa9006cef 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -104,5 +104,20 @@ #define HWCAP2_SME_F16F16 (1UL << 42) #define HWCAP2_MOPS (1UL << 43) #define HWCAP2_HBC (1UL << 44) +#define HWCAP2_FPMR (1UL << 45) +#define HWCAP2_LUT (1UL << 46) +#define HWCAP2_FAMINMAX (1UL << 47) +#define HWCAP2_F8CVT (1UL << 48) +#define HWCAP2_F8FMA (1UL << 49) +#define HWCAP2_F8DP4 (1UL << 50) +#define HWCAP2_F8DP2 (1UL << 51) +#define HWCAP2_F8E4M3 (1UL << 52) +#define HWCAP2_F8E5M2 (1UL << 53) +#define HWCAP2_SME_LUTV2 (1UL << 54) +#define HWCAP2_SME_F8F16 (1UL << 55) +#define HWCAP2_SME_F8F32 (1UL << 56) +#define HWCAP2_SME_SF8FMA (1UL << 57) +#define HWCAP2_SME_SF8DP4 (1UL << 58) +#define HWCAP2_SME_SF8DP2 (1UL << 59)
#endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 8e8cd411d1a2..2c85bc9e644c 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -220,6 +220,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { };
static const struct arm64_ftr_bits ftr_id_aa64isar2[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_LUT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CSSC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_RPRFM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CLRBHB_SHIFT, 4, 0), @@ -235,6 +236,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar2[] = { };
static const struct arm64_ftr_bits ftr_id_aa64isar3[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR3_EL1_FAMINMAX_SHIFT, 4, 0), ARM64_FTR_END, };
@@ -301,6 +303,8 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = { static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = { ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_LUTv2_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SMEver_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), @@ -313,6 +317,10 @@ static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = { FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_B16B16_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F16F16_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F8F16_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F8F32_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I8I32_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), @@ -323,10 +331,22 @@ static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = { FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_BI32I32_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F32F32_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SF8FMA_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SF8DP4_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SF8DP2_SHIFT, 1, 0), ARM64_FTR_END, };
static const struct arm64_ftr_bits ftr_id_aa64fpfr0[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8CVT_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8FMA_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8DP4_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8DP2_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8E4M3_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8E5M2_SHIFT, 1, 0), ARM64_FTR_END, };
@@ -2838,6 +2858,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64PFR0_EL1, AdvSIMD, IMP, CAP_HWCAP, KERNEL_HWCAP_ASIMD), HWCAP_CAP(ID_AA64PFR0_EL1, AdvSIMD, FP16, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP), HWCAP_CAP(ID_AA64PFR0_EL1, DIT, IMP, CAP_HWCAP, KERNEL_HWCAP_DIT), + HWCAP_CAP(ID_AA64PFR2_EL1, FPMR, IMP, CAP_HWCAP, KERNEL_HWCAP_FPMR), HWCAP_CAP(ID_AA64ISAR1_EL1, DPB, IMP, CAP_HWCAP, KERNEL_HWCAP_DCPOP), HWCAP_CAP(ID_AA64ISAR1_EL1, DPB, DPB2, CAP_HWCAP, KERNEL_HWCAP_DCPODP), HWCAP_CAP(ID_AA64ISAR1_EL1, JSCVT, IMP, CAP_HWCAP, KERNEL_HWCAP_JSCVT), @@ -2850,6 +2871,8 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64ISAR1_EL1, BF16, EBF16, CAP_HWCAP, KERNEL_HWCAP_EBF16), HWCAP_CAP(ID_AA64ISAR1_EL1, DGH, IMP, CAP_HWCAP, KERNEL_HWCAP_DGH), HWCAP_CAP(ID_AA64ISAR1_EL1, I8MM, IMP, CAP_HWCAP, KERNEL_HWCAP_I8MM), + HWCAP_CAP(ID_AA64ISAR2_EL1, LUT, IMP, CAP_HWCAP, KERNEL_HWCAP_LUT), + HWCAP_CAP(ID_AA64ISAR3_EL1, FAMINMAX, IMP, CAP_HWCAP, KERNEL_HWCAP_FAMINMAX), HWCAP_CAP(ID_AA64MMFR2_EL1, AT, IMP, CAP_HWCAP, KERNEL_HWCAP_USCAT), #ifdef CONFIG_ARM64_SVE HWCAP_CAP(ID_AA64PFR0_EL1, SVE, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE), @@ -2889,6 +2912,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { #ifdef CONFIG_ARM64_SME HWCAP_CAP(ID_AA64PFR1_EL1, SME, IMP, CAP_HWCAP, KERNEL_HWCAP_SME), HWCAP_CAP(ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), + HWCAP_CAP(ID_AA64SMFR0_EL1, LUTv2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_LUTV2), HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1), HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2, CAP_HWCAP, KERNEL_HWCAP_SME2), HWCAP_CAP(ID_AA64SMFR0_EL1, I16I64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64), @@ -2896,12 +2920,23 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64SMFR0_EL1, I16I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32), HWCAP_CAP(ID_AA64SMFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16), HWCAP_CAP(ID_AA64SMFR0_EL1, F16F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16), + HWCAP_CAP(ID_AA64SMFR0_EL1, F8F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F16), + HWCAP_CAP(ID_AA64SMFR0_EL1, F8F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F32), HWCAP_CAP(ID_AA64SMFR0_EL1, I8I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32), HWCAP_CAP(ID_AA64SMFR0_EL1, F16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32), HWCAP_CAP(ID_AA64SMFR0_EL1, B16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32), HWCAP_CAP(ID_AA64SMFR0_EL1, BI32I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32), HWCAP_CAP(ID_AA64SMFR0_EL1, F32F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32), + HWCAP_CAP(ID_AA64SMFR0_EL1, SF8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8FMA), + HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP4), + HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP2), #endif /* CONFIG_ARM64_SME */ + HWCAP_CAP(ID_AA64FPFR0_EL1, F8CVT, IMP, CAP_HWCAP, KERNEL_HWCAP_F8CVT), + HWCAP_CAP(ID_AA64FPFR0_EL1, F8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_F8FMA), + HWCAP_CAP(ID_AA64FPFR0_EL1, F8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_F8DP4), + HWCAP_CAP(ID_AA64FPFR0_EL1, F8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_F8DP2), + HWCAP_CAP(ID_AA64FPFR0_EL1, F8E4M3, IMP, CAP_HWCAP, KERNEL_HWCAP_F8E4M3), + HWCAP_CAP(ID_AA64FPFR0_EL1, F8E5M2, IMP, CAP_HWCAP, KERNEL_HWCAP_F8E5M2), {}, };
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index e153c6d2b3fd..9ff497ca70b4 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -127,6 +127,21 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_SME_F16F16] = "smef16f16", [KERNEL_HWCAP_MOPS] = "mops", [KERNEL_HWCAP_HBC] = "hbc", + [KERNEL_HWCAP_FPMR] = "fpmr", + [KERNEL_HWCAP_LUT] = "lut", + [KERNEL_HWCAP_FAMINMAX] = "faminmax", + [KERNEL_HWCAP_F8CVT] = "f8cvt", + [KERNEL_HWCAP_F8FMA] = "f8fma", + [KERNEL_HWCAP_F8DP4] = "f8dp4", + [KERNEL_HWCAP_F8DP2] = "f8dp2", + [KERNEL_HWCAP_F8E4M3] = "f8e4m3", + [KERNEL_HWCAP_F8E5M2] = "f8e5m2", + [KERNEL_HWCAP_SME_LUTV2] = "smelutv2", + [KERNEL_HWCAP_SME_F8F16] = "smef8f16", + [KERNEL_HWCAP_SME_F8F32] = "smef8f32", + [KERNEL_HWCAP_SME_SF8FMA] = "smesf8fma", + [KERNEL_HWCAP_SME_SF8DP4] = "smesf8dp4", + [KERNEL_HWCAP_SME_SF8DP2] = "smesf8dp2", };
#ifdef CONFIG_COMPAT
Teach the generic signal frame parsing code about the newly added FPMR frame, avoiding warnings every time one is generated.
Signed-off-by: Mark Brown broonie@kernel.org --- tools/testing/selftests/arm64/signal/testcases/testcases.c | 8 ++++++++ tools/testing/selftests/arm64/signal/testcases/testcases.h | 1 + 2 files changed, 9 insertions(+)
diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c index 9f580b55b388..674b88cc8c39 100644 --- a/tools/testing/selftests/arm64/signal/testcases/testcases.c +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c @@ -209,6 +209,14 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) zt = (struct zt_context *)head; new_flags |= ZT_CTX; break; + case FPMR_MAGIC: + if (flags & FPMR_CTX) + *err = "Multiple FPMR_MAGIC"; + else if (head->size != + sizeof(struct fpmr_context)) + *err = "Bad size for fpmr_context"; + new_flags |= FPMR_CTX; + break; case EXTRA_MAGIC: if (flags & EXTRA_CTX) *err = "Multiple EXTRA_MAGIC"; diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.h b/tools/testing/selftests/arm64/signal/testcases/testcases.h index a08ab0d6207a..7727126347e0 100644 --- a/tools/testing/selftests/arm64/signal/testcases/testcases.h +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.h @@ -19,6 +19,7 @@ #define ZA_CTX (1 << 2) #define EXTRA_CTX (1 << 3) #define ZT_CTX (1 << 4) +#define FPMR_CTX (1 << 5)
#define KSFT_BAD_MAGIC 0xdeadbeef
Verify that a FPMR frame is generated on systems that support FPMR and not generated otherwise.
Signed-off-by: Mark Brown broonie@kernel.org --- tools/testing/selftests/arm64/signal/.gitignore | 1 + .../arm64/signal/testcases/fpmr_siginfo.c | 82 ++++++++++++++++++++++ 2 files changed, 83 insertions(+)
diff --git a/tools/testing/selftests/arm64/signal/.gitignore b/tools/testing/selftests/arm64/signal/.gitignore index 839e3a252629..1ce5b5eac386 100644 --- a/tools/testing/selftests/arm64/signal/.gitignore +++ b/tools/testing/selftests/arm64/signal/.gitignore @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only mangle_* fake_sigreturn_* +fpmr_* sme_* ssve_* sve_* diff --git a/tools/testing/selftests/arm64/signal/testcases/fpmr_siginfo.c b/tools/testing/selftests/arm64/signal/testcases/fpmr_siginfo.c new file mode 100644 index 000000000000..e9d24685e741 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/fpmr_siginfo.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 ARM Limited + * + * Verify that the FPMR register context in signal frames is set up as + * expected. + */ + +#include <signal.h> +#include <ucontext.h> +#include <sys/auxv.h> +#include <sys/prctl.h> +#include <unistd.h> +#include <asm/sigcontext.h> + +#include "test_signals_utils.h" +#include "testcases.h" + +static union { + ucontext_t uc; + char buf[1024 * 128]; +} context; + +#define SYS_FPMR "S3_3_C4_C4_2" + +static uint64_t get_fpmr(void) +{ + uint64_t val; + + asm volatile ( + "mrs %0, " SYS_FPMR "\n" + : "=r"(val) + : + : "cc"); + + return val; +} + +int fpmr_present(struct tdescr *td, siginfo_t *si, ucontext_t *uc) +{ + struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context); + struct fpmr_context *fpmr_ctx; + size_t offset; + bool in_sigframe; + bool have_fpmr; + __u64 orig_fpmr; + + have_fpmr = getauxval(AT_HWCAP2) & HWCAP2_FPMR; + if (have_fpmr) + orig_fpmr = get_fpmr(); + + if (!get_current_context(td, &context.uc, sizeof(context))) + return 1; + + fpmr_ctx = (struct fpmr_context *) + get_header(head, FPMR_MAGIC, td->live_sz, &offset); + + in_sigframe = fpmr_ctx != NULL; + + fprintf(stderr, "FPMR sigframe %s on system %s FPMR\n", + in_sigframe ? "present" : "absent", + have_fpmr ? "with" : "without"); + + td->pass = (in_sigframe == have_fpmr); + + if (have_fpmr && fpmr_ctx) { + if (fpmr_ctx->fpmr != orig_fpmr) { + fprintf(stderr, "FPMR in frame is %llx, was %llx\n", + fpmr_ctx->fpmr, orig_fpmr); + td->pass = false; + } + } + + return 0; +} + +struct tdescr tde = { + .name = "FPMR", + .descr = "Validate that FPMR is present as expected", + .timeout = 3, + .run = fpmr_present, +};
Add the hwcaps added for the 2023 DPISA extensions to the hwcaps test program.
Signed-off-by: Mark Brown broonie@kernel.org --- tools/testing/selftests/arm64/abi/hwcap.c | 217 ++++++++++++++++++++++++++++++ 1 file changed, 217 insertions(+)
diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index e3d262831d91..ffc0ad1a9b16 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -58,11 +58,46 @@ static void cssc_sigill(void) asm volatile(".inst 0xdac01c00" : : : "x0"); }
+static void f8cvt_sigill(void) +{ + /* FSCALE V0.4H, V0.4H, V0.4H */ + asm volatile(".inst 0x2ec03c00"); +} + +static void f8dp2_sigill(void) +{ + /* FDOT V0.4H, V0.4H, V0.5H */ + asm volatile(".inst 0xe40fc00"); +} + +static void f8dp4_sigill(void) +{ + /* FDOT V0.2S, V0.2S, V0.2S */ + asm volatile(".inst 0xe00fc00"); +} + +static void f8fma_sigill(void) +{ + /* FMLALB V0.8H, V0.16B, V0.16B */ + asm volatile(".inst 0xec0fc00"); +} + +static void faminmax_sigill(void) +{ + /* FAMIN V0.4H, V0.4H, V0.4H */ + asm volatile(".inst 0x2ec01c00"); +} + static void fp_sigill(void) { asm volatile("fmov s0, #1"); }
+static void fpmr_sigill(void) +{ + asm volatile("mrs x0, S3_3_C4_C4_2" : : : "x0"); +} + static void ilrcpc_sigill(void) { /* LDAPUR W0, [SP, #8] */ @@ -81,6 +116,12 @@ static void lrcpc_sigill(void) asm volatile(".inst 0xb8bfc3e0" : : : ); }
+static void lut_sigill(void) +{ + /* LUTI2 V0.16B, { V0.16B }, V[0] */ + asm volatile(".inst 0x4e801000"); +} + static void mops_sigill(void) { char dst[1], src[1]; @@ -202,6 +243,78 @@ static void smef16f16_sigill(void) asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); }
+static void smef8f16_sigill(void) +{ + /* SMSTART */ + asm volatile("msr S0_3_C4_C7_3, xzr" : : : ); + + /* FDOT ZA.H[W0, 0], Z0.B-Z1.B, Z0.B-Z1.B */ + asm volatile(".inst 0xc1a01020" : : : ); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + +static void smef8f32_sigill(void) +{ + /* SMSTART */ + asm volatile("msr S0_3_C4_C7_3, xzr" : : : ); + + /* FDOT ZA.S[W0, 0], { Z0.B-Z1.B }, Z0.B[0] */ + asm volatile(".inst 0xc1500038" : : : ); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + +static void smelutv2_sigill(void) +{ + /* SMSTART */ + asm volatile("msr S0_3_C4_C7_3, xzr" : : : ); + + /* LUTI4 { Z0.B-Z3.B }, ZT0, { Z0-Z1 } */ + asm volatile(".inst 0xc08b0000" : : : ); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + +static void smesf8dp2_sigill(void) +{ + /* SMSTART */ + asm volatile("msr S0_3_C4_C7_3, xzr" : : : ); + + /* FDOT Z0.H, Z0.B, Z0.B[0] */ + asm volatile(".inst 0x64204400" : : : ); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + +static void smesf8dp4_sigill(void) +{ + /* SMSTART */ + asm volatile("msr S0_3_C4_C7_3, xzr" : : : ); + + /* FDOT Z0.S, Z0.B, Z0.B[0] */ + asm volatile(".inst 0xc1a41C00" : : : ); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + +static void smesf8fma_sigill(void) +{ + /* SMSTART */ + asm volatile("msr S0_3_C4_C7_3, xzr" : : : ); + + /* FMLALB V0.8H, V0.16B, V0.16B */ + asm volatile(".inst 0xec0fc00"); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + static void sve_sigill(void) { /* RDVL x0, #0 */ @@ -320,6 +433,53 @@ static const struct hwcap_data { .cpuinfo = "cssc", .sigill_fn = cssc_sigill, }, + { + .name = "F8CVT", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_F8CVT, + .cpuinfo = "f8cvt", + .sigill_fn = f8cvt_sigill, + }, + { + .name = "F8DP4", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_F8DP4, + .cpuinfo = "f8dp4", + .sigill_fn = f8dp4_sigill, + }, + { + .name = "F8DP2", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_F8DP2, + .cpuinfo = "f8dp4", + .sigill_fn = f8dp2_sigill, + }, + { + .name = "F8E5M2", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_F8E5M2, + .cpuinfo = "f8e5m2", + }, + { + .name = "F8E4M3", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_F8E4M3, + .cpuinfo = "f8e4m3", + }, + { + .name = "F8FMA", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_F8FMA, + .cpuinfo = "f8fma", + .sigill_fn = f8fma_sigill, + }, + { + .name = "FAMINMAX", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_FAMINMAX, + .cpuinfo = "faminmax", + .sigill_fn = faminmax_sigill, + }, { .name = "FP", .at_hwcap = AT_HWCAP, @@ -327,6 +487,14 @@ static const struct hwcap_data { .cpuinfo = "fp", .sigill_fn = fp_sigill, }, + { + .name = "FPMR", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_FPMR, + .cpuinfo = "fpmr", + .sigill_fn = fpmr_sigill, + .sigill_reliable = true, + }, { .name = "JSCVT", .at_hwcap = AT_HWCAP, @@ -364,6 +532,13 @@ static const struct hwcap_data { .sigbus_fn = uscat_sigbus, .sigbus_reliable = true, }, + { + .name = "LUT", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_LUT, + .cpuinfo = "lut", + .sigill_fn = lut_sigill, + }, { .name = "MOPS", .at_hwcap = AT_HWCAP2, @@ -464,6 +639,48 @@ static const struct hwcap_data { .cpuinfo = "smef16f16", .sigill_fn = smef16f16_sigill, }, + { + .name = "SME F8F16", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SME_F8F16, + .cpuinfo = "smef8f16", + .sigill_fn = smef8f16_sigill, + }, + { + .name = "SME F8F32", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SME_F8F32, + .cpuinfo = "smef8f32", + .sigill_fn = smef8f32_sigill, + }, + { + .name = "SME LUTV2", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SME_LUTV2, + .cpuinfo = "smelutv2", + .sigill_fn = smelutv2_sigill, + }, + { + .name = "SME SF8FMA", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SME_SF8FMA, + .cpuinfo = "smesf8fma", + .sigill_fn = smesf8fma_sigill, + }, + { + .name = "SME SF8DP2", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SME_SF8DP2, + .cpuinfo = "smesf8dp2", + .sigill_fn = smesf8dp2_sigill, + }, + { + .name = "SME SF8DP4", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SME_SF8DP4, + .cpuinfo = "smesf8dp4", + .sigill_fn = smesf8dp4_sigill, + }, { .name = "SVE", .at_hwcap = AT_HWCAP,
The 2023 architecture extensions allocated some previously usused feature registers, add comments mapping the names in get-reg-list as we do for the other allocated registers.
Signed-off-by: Mark Brown broonie@kernel.org --- tools/testing/selftests/kvm/aarch64/get-reg-list.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index 709d7d721760..71ea6ecec7ce 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -428,7 +428,7 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 0, 0, 4, 4), /* ID_AA64ZFR0_EL1 */ ARM64_SYS_REG(3, 0, 0, 4, 5), /* ID_AA64SMFR0_EL1 */ ARM64_SYS_REG(3, 0, 0, 4, 6), - ARM64_SYS_REG(3, 0, 0, 4, 7), + ARM64_SYS_REG(3, 0, 0, 4, 7), /* ID_AA64FPFR_EL1 */ ARM64_SYS_REG(3, 0, 0, 5, 0), /* ID_AA64DFR0_EL1 */ ARM64_SYS_REG(3, 0, 0, 5, 1), /* ID_AA64DFR1_EL1 */ ARM64_SYS_REG(3, 0, 0, 5, 2), @@ -440,7 +440,7 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 0, 0, 6, 0), /* ID_AA64ISAR0_EL1 */ ARM64_SYS_REG(3, 0, 0, 6, 1), /* ID_AA64ISAR1_EL1 */ ARM64_SYS_REG(3, 0, 0, 6, 2), /* ID_AA64ISAR2_EL1 */ - ARM64_SYS_REG(3, 0, 0, 6, 3), + ARM64_SYS_REG(3, 0, 0, 6, 3), /* ID_AA64ISAR3_EL1 */ ARM64_SYS_REG(3, 0, 0, 6, 4), ARM64_SYS_REG(3, 0, 0, 6, 5), ARM64_SYS_REG(3, 0, 0, 6, 6),
FEAT_FPMR defines a new register FMPR which is available at all ELs and is discovered via ID_AA64PFR2_EL1.FPMR, add this to the set of registers that get-reg-list knows to check for with the required identification register depdendency.
Signed-off-by: Mark Brown broonie@kernel.org --- tools/testing/selftests/kvm/aarch64/get-reg-list.c | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index 71ea6ecec7ce..1e43511d1440 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -40,6 +40,12 @@ static struct feature_id_reg feat_id_regs[] = { ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ 4, 1 + }, + { + ARM64_SYS_REG(3, 3, 4, 4, 2), /* FPMR */ + ARM64_SYS_REG(3, 0, 0, 4, 2), /* ID_AA64PFR2_EL1 */ + 32, + 1 } };
@@ -481,6 +487,7 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 3, 14, 2, 1), /* CNTP_CTL_EL0 */ ARM64_SYS_REG(3, 3, 14, 2, 2), /* CNTP_CVAL_EL0 */ ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */ + ARM64_SYS_REG(3, 3, 4, 4, 2), /* FPMR */ ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */ ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */ };
linux-kselftest-mirror@lists.linaro.org