The first patch here is a fix which should ideally be sent as such, currently the program will hang on architecturally valid systems which implement SME but not 128 bit vector lengths. The remaining patches are general enhancements, including coverage for the SME ABI on SME only systems.
To: Catalin Marinas catalin.marinas@arm.com To: Will Deacon will@kernel.org To: Shuah Khan shuah@kernel.org Cc: Shuah Khan skhan@linuxfoundation.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kselftest@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Mark Brown broonie@kernel.org
--- Mark Brown (4): kselftest/arm64: Fix syscall-abi for systems without 128 bit SME kselftest/arm64: Only enumerate VLs once in syscall-abi kselftest/arm64: Verify SME only ABI in syscall-abi kselftest/arm64: Only enumerate power of two VLs in syscall-abi
.../testing/selftests/arm64/abi/syscall-abi-asm.S | 14 ++- tools/testing/selftests/arm64/abi/syscall-abi.c | 133 +++++++++++++-------- 2 files changed, 89 insertions(+), 58 deletions(-) --- base-commit: 1b929c02afd37871d5afb9d498426f83432e71c2 change-id: 20221223-arm64-syscall-abi-sme-only-c3bb2c0f81e5
Best regards,
SME does not mandate any specific VL so we may not have 128 bit SME but the algorithm used for enumerating VLs assumes that we will. Add the required check to ensure that the algorithm terminates.
Fixes: 43e3f85523e4 ("kselftest/arm64: Add SME support to syscall ABI test") Signed-off-by: Mark Brown broonie@kernel.org --- tools/testing/selftests/arm64/abi/syscall-abi.c | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c index dd7ebe536d05..ffe719b50c21 100644 --- a/tools/testing/selftests/arm64/abi/syscall-abi.c +++ b/tools/testing/selftests/arm64/abi/syscall-abi.c @@ -390,6 +390,10 @@ static void test_one_syscall(struct syscall_cfg *cfg)
sme_vl &= PR_SME_VL_LEN_MASK;
+ /* Found lowest VL */ + if (sve_vq_from_vl(sme_vl) > sme_vq) + break; + if (sme_vq != sve_vq_from_vl(sme_vl)) sme_vq = sve_vq_from_vl(sme_vl);
@@ -461,6 +465,10 @@ int sme_count_vls(void)
vl &= PR_SME_VL_LEN_MASK;
+ /* Found lowest VL */ + if (sve_vq_from_vl(vl) > vq) + break; + if (vq != sve_vq_from_vl(vl)) vq = sve_vq_from_vl(vl);
Currently syscall-abi not only enumerates the SVE VLs twice while working out how many tests are planned, it also repeats the enumeration process while doing the actual tests. Record the VLs when we enumerate and use that list when we are performing the tests, removing some duplicated logic.
Signed-off-by: Mark Brown broonie@kernel.org --- tools/testing/selftests/arm64/abi/syscall-abi.c | 95 +++++++++++-------------- 1 file changed, 41 insertions(+), 54 deletions(-)
diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c index ffe719b50c21..45fdcbe3e909 100644 --- a/tools/testing/selftests/arm64/abi/syscall-abi.c +++ b/tools/testing/selftests/arm64/abi/syscall-abi.c @@ -24,6 +24,11 @@
static int default_sme_vl;
+static int sve_vl_count; +static unsigned int sve_vls[SVE_VQ_MAX]; +static int sme_vl_count; +static unsigned int sme_vls[SVE_VQ_MAX]; + extern void do_syscall(int sve_vl, int sme_vl);
static void fill_random(void *buf, size_t size) @@ -355,72 +360,55 @@ static bool do_test(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
static void test_one_syscall(struct syscall_cfg *cfg) { - int sve_vq, sve_vl; - int sme_vq, sme_vl; + int sve, sme; + int ret;
/* FPSIMD only case */ ksft_test_result(do_test(cfg, 0, default_sme_vl, 0), "%s FPSIMD\n", cfg->name);
- if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) - return; - - for (sve_vq = SVE_VQ_MAX; sve_vq > 0; --sve_vq) { - sve_vl = prctl(PR_SVE_SET_VL, sve_vq * 16); - if (sve_vl == -1) + for (sve = 0; sve < sve_vl_count; sve++) { + ret = prctl(PR_SVE_SET_VL, sve_vls[sve]); + if (ret == -1) ksft_exit_fail_msg("PR_SVE_SET_VL failed: %s (%d)\n", strerror(errno), errno);
- sve_vl &= PR_SVE_VL_LEN_MASK; - - if (sve_vq != sve_vq_from_vl(sve_vl)) - sve_vq = sve_vq_from_vl(sve_vl); + ksft_test_result(do_test(cfg, sve_vls[sve], default_sme_vl, 0), + "%s SVE VL %d\n", cfg->name, sve_vls[sve]);
- ksft_test_result(do_test(cfg, sve_vl, default_sme_vl, 0), - "%s SVE VL %d\n", cfg->name, sve_vl); - - if (!(getauxval(AT_HWCAP2) & HWCAP2_SME)) - continue; - - for (sme_vq = SVE_VQ_MAX; sme_vq > 0; --sme_vq) { - sme_vl = prctl(PR_SME_SET_VL, sme_vq * 16); - if (sme_vl == -1) + for (sme = 0; sme < sme_vl_count; sme++) { + ret = prctl(PR_SME_SET_VL, sme_vls[sme]); + if (ret == -1) ksft_exit_fail_msg("PR_SME_SET_VL failed: %s (%d)\n", strerror(errno), errno);
- sme_vl &= PR_SME_VL_LEN_MASK; - - /* Found lowest VL */ - if (sve_vq_from_vl(sme_vl) > sme_vq) - break; - - if (sme_vq != sve_vq_from_vl(sme_vl)) - sme_vq = sve_vq_from_vl(sme_vl); - - ksft_test_result(do_test(cfg, sve_vl, sme_vl, + ksft_test_result(do_test(cfg, sve_vls[sve], + sme_vls[sme], SVCR_ZA_MASK | SVCR_SM_MASK), "%s SVE VL %d/SME VL %d SM+ZA\n", - cfg->name, sve_vl, sme_vl); - ksft_test_result(do_test(cfg, sve_vl, sme_vl, - SVCR_SM_MASK), + cfg->name, sve_vls[sve], + sme_vls[sme]); + ksft_test_result(do_test(cfg, sve_vls[sve], + sme_vls[sme], SVCR_SM_MASK), "%s SVE VL %d/SME VL %d SM\n", - cfg->name, sve_vl, sme_vl); - ksft_test_result(do_test(cfg, sve_vl, sme_vl, - SVCR_ZA_MASK), + cfg->name, sve_vls[sve], + sme_vls[sme]); + ksft_test_result(do_test(cfg, sve_vls[sve], + sme_vls[sme], SVCR_ZA_MASK), "%s SVE VL %d/SME VL %d ZA\n", - cfg->name, sve_vl, sme_vl); + cfg->name, sve_vls[sve], + sme_vls[sme]); } } }
-int sve_count_vls(void) +void sve_count_vls(void) { unsigned int vq; - int vl_count = 0; int vl;
if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) - return 0; + return;
/* * Enumerate up to SVE_VQ_MAX vector lengths @@ -436,23 +424,17 @@ int sve_count_vls(void) if (vq != sve_vq_from_vl(vl)) vq = sve_vq_from_vl(vl);
- vl_count++; + sve_vls[sve_vl_count++] = vl; } - - return vl_count; }
-int sme_count_vls(void) +void sme_count_vls(void) { unsigned int vq; - int vl_count = 0; int vl;
if (!(getauxval(AT_HWCAP2) & HWCAP2_SME)) - return 0; - - /* Ensure we configure a SME VL, used to flag if SVCR is set */ - default_sme_vl = 16; + return;
/* * Enumerate up to SVE_VQ_MAX vector lengths @@ -472,10 +454,11 @@ int sme_count_vls(void) if (vq != sve_vq_from_vl(vl)) vq = sve_vq_from_vl(vl);
- vl_count++; + sme_vls[sme_vl_count++] = vl; }
- return vl_count; + /* Ensure we configure a SME VL, used to flag if SVCR is set */ + default_sme_vl = sme_vls[0]; }
int main(void) @@ -486,8 +469,12 @@ int main(void) srandom(getpid());
ksft_print_header(); - tests += sve_count_vls(); - tests += (sve_count_vls() * sme_count_vls()) * 3; + + sve_count_vls(); + sme_count_vls(); + + tests += sve_vl_count; + tests += (sve_vl_count * sme_vl_count) * 3; ksft_set_plan(ARRAY_SIZE(syscalls) * tests);
if (getauxval(AT_HWCAP2) & HWCAP2_SME_FA64)
Currently syscall-abi only covers SME in the case where the system supports SVE however it is architecturally valid to support SME without SVE. Update the program to cover this case, this requires adjustments in the code to check for SVCR.SM being set when deciding if we're handling the FPSIMD or SVE registers and the addition of new test cases for the SME only case.
Note that in the SME only case we should not save the SVE registers after a syscall since even if we were in streaming mode and therefore set them the syscall should have exited streaming mode, we check that we have done so by looking at SVCR.
Signed-off-by: Mark Brown broonie@kernel.org --- .../testing/selftests/arm64/abi/syscall-abi-asm.S | 14 +++++---- tools/testing/selftests/arm64/abi/syscall-abi.c | 34 +++++++++++++++++++++- 2 files changed, 42 insertions(+), 6 deletions(-)
diff --git a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S index acd5e9f3bc0b..cdfafc939a9e 100644 --- a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S +++ b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S @@ -92,8 +92,11 @@ do_syscall: str x29, [x2], #8 // FP str x30, [x2], #8 // LR
- // Load FPRs if we're not doing SVE + // Load FPRs if we're not doing neither SVE nor streaming SVE cbnz x0, 1f + ldr x2, =svcr_in + tbnz x2, #SVCR_SM_SHIFT, 1f + ldr x2, =fpr_in ldp q0, q1, [x2] ldp q2, q3, [x2, #16 * 2] @@ -111,10 +114,11 @@ do_syscall: ldp q26, q27, [x2, #16 * 26] ldp q28, q29, [x2, #16 * 28] ldp q30, q31, [x2, #16 * 30] + + b 2f 1:
// Load the SVE registers if we're doing SVE/SME - cbz x0, 1f
ldr x2, =z_in ldr z0, [x2, #0, MUL VL] @@ -155,9 +159,9 @@ do_syscall: ldr x2, =ffr_in ldr p0, [x2] ldr x2, [x2, #0] - cbz x2, 2f + cbz x2, 1f wrffr p0.b -2: +1:
ldr x2, =p_in ldr p0, [x2, #0, MUL VL] @@ -176,7 +180,7 @@ do_syscall: ldr p13, [x2, #13, MUL VL] ldr p14, [x2, #14, MUL VL] ldr p15, [x2, #15, MUL VL] -1: +2:
// Do the syscall svc #0 diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c index 45fdcbe3e909..7c9b6e947040 100644 --- a/tools/testing/selftests/arm64/abi/syscall-abi.c +++ b/tools/testing/selftests/arm64/abi/syscall-abi.c @@ -88,6 +88,7 @@ static int check_gpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, uint64_t s #define NUM_FPR 32 uint64_t fpr_in[NUM_FPR * 2]; uint64_t fpr_out[NUM_FPR * 2]; +uint64_t fpr_zero[NUM_FPR * 2];
static void setup_fpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, uint64_t svcr) @@ -102,7 +103,7 @@ static int check_fpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, int errors = 0; int i;
- if (!sve_vl) { + if (!sve_vl && !(svcr & SVCR_SM_MASK)) { for (i = 0; i < ARRAY_SIZE(fpr_in); i++) { if (fpr_in[i] != fpr_out[i]) { ksft_print_msg("%s Q%d/%d mismatch %llx != %llx\n", @@ -114,6 +115,18 @@ static int check_fpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, } }
+ /* + * In streaming mode the whole register set should be cleared + * by the transition out of streaming mode. + */ + if (svcr & SVCR_SM_MASK) { + if (memcmp(fpr_zero, fpr_out, sizeof(fpr_out)) != 0) { + ksft_print_msg("%s FPSIMD registers non-zero exiting SM\n", + cfg->name); + errors++; + } + } + return errors; }
@@ -400,6 +413,24 @@ static void test_one_syscall(struct syscall_cfg *cfg) sme_vls[sme]); } } + + for (sme = 0; sme < sme_vl_count; sme++) { + ret = prctl(PR_SME_SET_VL, sme_vls[sme]); + if (ret == -1) + ksft_exit_fail_msg("PR_SME_SET_VL failed: %s (%d)\n", + strerror(errno), errno); + + ksft_test_result(do_test(cfg, 0, sme_vls[sme], + SVCR_ZA_MASK | SVCR_SM_MASK), + "%s SME VL %d SM+ZA\n", + cfg->name, sme_vls[sme]); + ksft_test_result(do_test(cfg, 0, sme_vls[sme], SVCR_SM_MASK), + "%s SME VL %d SM\n", + cfg->name, sme_vls[sme]); + ksft_test_result(do_test(cfg, 0, sme_vls[sme], SVCR_ZA_MASK), + "%s SME VL %d ZA\n", + cfg->name, sme_vls[sme]); + } }
void sve_count_vls(void) @@ -474,6 +505,7 @@ int main(void) sme_count_vls();
tests += sve_vl_count; + tests += sme_vl_count * 3; tests += (sve_vl_count * sme_vl_count) * 3; ksft_set_plan(ARRAY_SIZE(syscalls) * tests);
As documented in issue C215 in the known issues list for DDI0487I.a [1] Arm will be making a retroactive change to SVE to remove the possibility of selecting non power of two vector lengths. This has no impact on existing physical implementations but most virtual implementations have implemented the full range of permissible vector lengths.
Since virtual implementations are noticeably slow in general and the larger vector lengths amplify the issue there's a useful improvement in runtime from only covering the vector lengths that will exist in practical systems, adjust our enumeration accordingly. We have other tests that aim to cover the enumeration interfaces.
For symmetry we apply the same change to the eumeration for SME vector lengths, though the power of two restriction was already present for SME so there is no impact on the set of vector lengths tested.
[1] https://developer.arm.com/documentation/102105/ia-00/
Signed-off-by: Mark Brown broonie@kernel.org --- tools/testing/selftests/arm64/abi/syscall-abi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c index 7c9b6e947040..8afcbf6861fd 100644 --- a/tools/testing/selftests/arm64/abi/syscall-abi.c +++ b/tools/testing/selftests/arm64/abi/syscall-abi.c @@ -444,7 +444,7 @@ void sve_count_vls(void) /* * Enumerate up to SVE_VQ_MAX vector lengths */ - for (vq = SVE_VQ_MAX; vq > 0; --vq) { + for (vq = SVE_VQ_MAX; vq > 0; vq /= 2) { vl = prctl(PR_SVE_SET_VL, vq * 16); if (vl == -1) ksft_exit_fail_msg("PR_SVE_SET_VL failed: %s (%d)\n", @@ -470,7 +470,7 @@ void sme_count_vls(void) /* * Enumerate up to SVE_VQ_MAX vector lengths */ - for (vq = SVE_VQ_MAX; vq > 0; --vq) { + for (vq = SVE_VQ_MAX; vq > 0; vq /= 2) { vl = prctl(PR_SME_SET_VL, vq * 16); if (vl == -1) ksft_exit_fail_msg("PR_SME_SET_VL failed: %s (%d)\n",
On Tue, 27 Dec 2022 13:06:35 +0000, Mark Brown wrote:
The first patch here is a fix which should ideally be sent as such, currently the program will hang on architecturally valid systems which implement SME but not 128 bit vector lengths. The remaining patches are general enhancements, including coverage for the SME ABI on SME only systems.
[...]
Applied to arm64 (for-next/kselftest), thanks!
[1/4] kselftest/arm64: Fix syscall-abi for systems without 128 bit SME https://git.kernel.org/arm64/c/97ec597b26df [2/4] kselftest/arm64: Only enumerate VLs once in syscall-abi https://git.kernel.org/arm64/c/fae491e52cc2 [3/4] kselftest/arm64: Verify SME only ABI in syscall-abi https://git.kernel.org/arm64/c/024e4a155874 [4/4] kselftest/arm64: Only enumerate power of two VLs in syscall-abi https://git.kernel.org/arm64/c/10f326fbb458
linux-kselftest-mirror@lists.linaro.org