Skip instead of failing when prerequisite conditions aren't fulfilled, such as invalid xstate values etc. This patch would make the tests show as skip when run by: make -C tools/testing/selftest/ TARGETS=x86 run_tests
... # timeout set to 45 # selftests: x86: amx_64 # # xstate cpuid: invalid tile data size/offset: 0/0 ok 42 selftests: x86: amx_64 # SKIP # timeout set to 45 # selftests: x86: lam_64 # # Unsupported LAM feature! ok 43 selftests: x86: lam_64 # SKIP ...
Signed-off-by: Muhammad Usama Anjum usama.anjum@collabora.com --- I'm not sure if xstate values should be correct on all the x86 processors. If the xstate is invalid on a CPU, the test should be skipped instead of failing. --- tools/testing/selftests/x86/amx.c | 33 ++++++++++++++++++++----------- tools/testing/selftests/x86/lam.c | 2 +- 2 files changed, 23 insertions(+), 12 deletions(-)
diff --git a/tools/testing/selftests/x86/amx.c b/tools/testing/selftests/x86/amx.c index d884fd69dd510..5d1ca0bbaaae7 100644 --- a/tools/testing/selftests/x86/amx.c +++ b/tools/testing/selftests/x86/amx.c @@ -103,9 +103,10 @@ static void clearhandler(int sig)
#define CPUID_LEAF1_ECX_XSAVE_MASK (1 << 26) #define CPUID_LEAF1_ECX_OSXSAVE_MASK (1 << 27) -static inline void check_cpuid_xsave(void) +static inline int check_cpuid_xsave(void) { uint32_t eax, ebx, ecx, edx; + int ret = 0;
/* * CPUID.1:ECX.XSAVE[bit 26] enumerates general @@ -113,10 +114,16 @@ static inline void check_cpuid_xsave(void) * XGETBV. */ __cpuid_count(1, 0, eax, ebx, ecx, edx); - if (!(ecx & CPUID_LEAF1_ECX_XSAVE_MASK)) - fatal_error("cpuid: no CPU xsave support"); - if (!(ecx & CPUID_LEAF1_ECX_OSXSAVE_MASK)) - fatal_error("cpuid: no OS xsave support"); + if (!(ecx & CPUID_LEAF1_ECX_XSAVE_MASK)) { + ksft_print_msg("cpuid: no CPU xsave support\n"); + ret = -1; + } + if (!(ecx & CPUID_LEAF1_ECX_OSXSAVE_MASK)) { + ksft_print_msg("cpuid: no OS xsave support\n"); + ret = -1; + } + + return ret; }
static uint32_t xbuf_size; @@ -131,7 +138,7 @@ static struct { #define TILE_CPUID 0x1d #define TILE_PALETTE_ID 0x1
-static void check_cpuid_xtiledata(void) +static int check_cpuid_xtiledata(void) { uint32_t eax, ebx, ecx, edx;
@@ -153,12 +160,16 @@ static void check_cpuid_xtiledata(void) * eax: XTILEDATA state component size * ebx: XTILEDATA state component offset in user buffer */ - if (!eax || !ebx) - fatal_error("xstate cpuid: invalid tile data size/offset: %d/%d", - eax, ebx); + if (!eax || !ebx) { + ksft_print_msg("xstate cpuid: invalid tile data size/offset: %d/%d\n", + eax, ebx); + return -1; + }
xtiledata.size = eax; xtiledata.xbuf_offset = ebx; + + return 0; }
/* The helpers for managing XSAVE buffer and tile states: */ @@ -929,8 +940,8 @@ static void test_ptrace(void) int main(void) { /* Check hardware availability at first */ - check_cpuid_xsave(); - check_cpuid_xtiledata(); + if (check_cpuid_xsave() || check_cpuid_xtiledata()) + return KSFT_SKIP;
init_stashed_xsave(); sethandler(SIGILL, handle_noperm, 0); diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c index 215b8150b7cca..c0f016f45ee17 100644 --- a/tools/testing/selftests/x86/lam.c +++ b/tools/testing/selftests/x86/lam.c @@ -1183,7 +1183,7 @@ int main(int argc, char **argv)
if (!cpu_has_lam()) { ksft_print_msg("Unsupported LAM feature!\n"); - return -1; + return KSFT_SKIP; }
while ((c = getopt(argc, argv, "ht:")) != -1) {
On 3/7/2024 10:37 AM, Muhammad Usama Anjum wrote:
diff --git a/tools/testing/selftests/x86/amx.c b/tools/testing/selftests/x86/amx.c index d884fd69dd510..5d1ca0bbaaae7 100644 --- a/tools/testing/selftests/x86/amx.c +++ b/tools/testing/selftests/x86/amx.c @@ -103,9 +103,10 @@ static void clearhandler(int sig) #define CPUID_LEAF1_ECX_XSAVE_MASK (1 << 26) #define CPUID_LEAF1_ECX_OSXSAVE_MASK (1 << 27) -static inline void check_cpuid_xsave(void) +static inline int check_cpuid_xsave(void) { uint32_t eax, ebx, ecx, edx;
- int ret = 0;
/* * CPUID.1:ECX.XSAVE[bit 26] enumerates general @@ -113,10 +114,16 @@ static inline void check_cpuid_xsave(void) * XGETBV. */ __cpuid_count(1, 0, eax, ebx, ecx, edx);
- if (!(ecx & CPUID_LEAF1_ECX_XSAVE_MASK))
fatal_error("cpuid: no CPU xsave support");
- if (!(ecx & CPUID_LEAF1_ECX_OSXSAVE_MASK))
fatal_error("cpuid: no OS xsave support");
- if (!(ecx & CPUID_LEAF1_ECX_XSAVE_MASK)) {
ksft_print_msg("cpuid: no CPU xsave support\n");
ret = -1;
- }
- if (!(ecx & CPUID_LEAF1_ECX_OSXSAVE_MASK)) {
ksft_print_msg("cpuid: no OS xsave support\n");
ret = -1;
- }
- return ret; }
I thought check_cpuid_xsave() can go away [1] by simplifying the availability check through arch_prctl():
+#define ARCH_GET_XCOMP_SUPP 0x1021 #define ARCH_GET_XCOMP_PERM 0x1022 #define ARCH_REQ_XCOMP_PERM 0x1023
@@ -928,8 +911,15 @@ static void test_ptrace(void)
int main(void) { - /* Check hardware availability at first */ - check_cpuid_xsave(); + unsigned long features; + long rc; + + rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_SUPP, &features); + if (rc || (features & XFEATURE_MASK_XTILE) != XFEATURE_MASK_XTILE) { + ksft_print_msg("no AMX support\n"); + return KSFT_SKIP; + }
-static void check_cpuid_xtiledata(void) +static int check_cpuid_xtiledata(void) { uint32_t eax, ebx, ecx, edx; @@ -153,12 +160,16 @@ static void check_cpuid_xtiledata(void) * eax: XTILEDATA state component size * ebx: XTILEDATA state component offset in user buffer */
- if (!eax || !ebx)
fatal_error("xstate cpuid: invalid tile data size/offset: %d/%d",
eax, ebx);
- if (!eax || !ebx) {
ksft_print_msg("xstate cpuid: invalid tile data size/offset: %d/%d\n",
eax, ebx);
return -1;
- }
xtiledata.size = eax; xtiledata.xbuf_offset = ebx;
- return 0; }
I don't think it is okay to silently skip the test here. If the feature is available, the tile data size and offset should not be zero.
Thanks, Chang
[1] https://lore.kernel.org/lkml/327cde12-daea-84ba-4b24-64fe12e89dea@intel.com/
On 3/9/24 6:06 AM, Chang S. Bae wrote:
On 3/7/2024 10:37 AM, Muhammad Usama Anjum wrote:
diff --git a/tools/testing/selftests/x86/amx.c b/tools/testing/selftests/x86/amx.c index d884fd69dd510..5d1ca0bbaaae7 100644 --- a/tools/testing/selftests/x86/amx.c +++ b/tools/testing/selftests/x86/amx.c @@ -103,9 +103,10 @@ static void clearhandler(int sig) #define CPUID_LEAF1_ECX_XSAVE_MASK (1 << 26) #define CPUID_LEAF1_ECX_OSXSAVE_MASK (1 << 27) -static inline void check_cpuid_xsave(void) +static inline int check_cpuid_xsave(void) { uint32_t eax, ebx, ecx, edx; + int ret = 0; /* * CPUID.1:ECX.XSAVE[bit 26] enumerates general @@ -113,10 +114,16 @@ static inline void check_cpuid_xsave(void) * XGETBV. */ __cpuid_count(1, 0, eax, ebx, ecx, edx); - if (!(ecx & CPUID_LEAF1_ECX_XSAVE_MASK)) - fatal_error("cpuid: no CPU xsave support"); - if (!(ecx & CPUID_LEAF1_ECX_OSXSAVE_MASK)) - fatal_error("cpuid: no OS xsave support"); + if (!(ecx & CPUID_LEAF1_ECX_XSAVE_MASK)) { + ksft_print_msg("cpuid: no CPU xsave support\n"); + ret = -1; + } + if (!(ecx & CPUID_LEAF1_ECX_OSXSAVE_MASK)) { + ksft_print_msg("cpuid: no OS xsave support\n"); + ret = -1; + }
+ return ret; }
I thought check_cpuid_xsave() can go away [1] by simplifying the availability check through arch_prctl():
In this patch, I'm just focusing on skip login on existing code. I'll make this change when I'll transform the entire test to TAP.
+#define ARCH_GET_XCOMP_SUPP 0x1021 #define ARCH_GET_XCOMP_PERM 0x1022 #define ARCH_REQ_XCOMP_PERM 0x1023
@@ -928,8 +911,15 @@ static void test_ptrace(void)
int main(void) { - /* Check hardware availability at first */ - check_cpuid_xsave(); + unsigned long features; + long rc;
+ rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_SUPP, &features); + if (rc || (features & XFEATURE_MASK_XTILE) != XFEATURE_MASK_XTILE) { + ksft_print_msg("no AMX support\n"); + return KSFT_SKIP; + }
-static void check_cpuid_xtiledata(void) +static int check_cpuid_xtiledata(void) { uint32_t eax, ebx, ecx, edx; @@ -153,12 +160,16 @@ static void check_cpuid_xtiledata(void) * eax: XTILEDATA state component size * ebx: XTILEDATA state component offset in user buffer */ - if (!eax || !ebx) - fatal_error("xstate cpuid: invalid tile data size/offset: %d/%d", - eax, ebx); + if (!eax || !ebx) { + ksft_print_msg("xstate cpuid: invalid tile data size/offset: %d/%d\n", + eax, ebx); + return -1; + } xtiledata.size = eax; xtiledata.xbuf_offset = ebx;
+ return 0; }
I don't think it is okay to silently skip the test here. If the feature is available, the tile data size and offset should not be zero.
We are logging that data size/offset are invalid if either eax or ebx are invalid and then we are skipping. Not sure what you are asking me to change.
Thanks, Chang
[1] https://lore.kernel.org/lkml/327cde12-daea-84ba-4b24-64fe12e89dea@intel.com/
On 3/11/2024 10:02 AM, Muhammad Usama Anjum wrote:
On 3/9/24 6:06 AM, Chang S. Bae wrote:
On 3/7/2024 10:37 AM, Muhammad Usama Anjum wrote:
-static void check_cpuid_xtiledata(void) +static int check_cpuid_xtiledata(void) { uint32_t eax, ebx, ecx, edx; @@ -153,12 +160,16 @@ static void check_cpuid_xtiledata(void) * eax: XTILEDATA state component size * ebx: XTILEDATA state component offset in user buffer */ - if (!eax || !ebx) - fatal_error("xstate cpuid: invalid tile data size/offset: %d/%d", - eax, ebx); + if (!eax || !ebx) { + ksft_print_msg("xstate cpuid: invalid tile data size/offset: %d/%d\n", + eax, ebx); + return -1; + } xtiledata.size = eax; xtiledata.xbuf_offset = ebx;
+ return 0; }
I don't think it is okay to silently skip the test here. If the feature is available, the tile data size and offset should not be zero.
We are logging that data size/offset are invalid if either eax or ebx are invalid and then we are skipping. Not sure what you are asking me to change.
You intention seems to skip the test when AMX is not available. But this function should only be invoked when AMX is actually available, not as part of the feature availability check. Therefore, I think this change is not relevant. Also, if we encounter invalid TILEDATA CPUID, it should be a reason to *fail* the test, rather than calling out a skip, right?
Thanks, Chang
On 3/11/24 10:39 PM, Chang S. Bae wrote:
On 3/11/2024 10:02 AM, Muhammad Usama Anjum wrote:
On 3/9/24 6:06 AM, Chang S. Bae wrote:
On 3/7/2024 10:37 AM, Muhammad Usama Anjum wrote:
-static void check_cpuid_xtiledata(void) +static int check_cpuid_xtiledata(void) { uint32_t eax, ebx, ecx, edx; @@ -153,12 +160,16 @@ static void check_cpuid_xtiledata(void) * eax: XTILEDATA state component size * ebx: XTILEDATA state component offset in user buffer */ - if (!eax || !ebx) - fatal_error("xstate cpuid: invalid tile data size/offset: %d/%d", - eax, ebx); + if (!eax || !ebx) { + ksft_print_msg("xstate cpuid: invalid tile data size/offset: %d/%d\n", + eax, ebx); + return -1; + } xtiledata.size = eax; xtiledata.xbuf_offset = ebx;
+ return 0; }
I don't think it is okay to silently skip the test here. If the feature is available, the tile data size and offset should not be zero.
We are logging that data size/offset are invalid if either eax or ebx are invalid and then we are skipping. Not sure what you are asking me to change.
You intention seems to skip the test when AMX is not available.
Yes,
But this function should only be invoked when AMX is actually available, not as part of the feature availability check.
How can we check if AMX is available or not?
Therefore, I think this change is not relevant. Also, if we encounter invalid TILEDATA CPUID, it should be a reason to *fail* the test, rather than calling out a skip, right?
I see. But once we check if AMX is available.
Thanks, Chang
On 3/12/2024 2:26 AM, Muhammad Usama Anjum wrote:
How can we check if AMX is available or not?
After a successful check_cpuid_xsave(), examining CPUID(eax=0xd, ecx=0) EDX: EAX, which reports the support bits of XCR0, can give assurance of AMX availability. Perhaps, this change is considerable on top of your patch:
static int check_cpuid_xtiledata(void) { uint32_t eax, ebx, ecx, edx; + uint64_t xfeatures;
__cpuid_count(CPUID_LEAF_XSTATE, CPUID_SUBLEAF_XSTATE_USER, eax, ebx, ecx, edx);
+ xfeatures = eax + ((uint64_t)edx << 32); + if ((xfeatures & XFEATURE_MASK_XTILE) != XFEATURE_MASK_XTILE) { + ksft_print_msg("no AMX support\n"); + return -1; + }
Nevertheless, I still believe that using arch_prctl(ARCH_GET_XCOMP_SUPP, ...) remains a simple and legitimate approach for directly checking dynamic feature support from the kernel: https://docs.kernel.org/arch/x86/xstate.html
Thanks, Chang
Hi Chang,
On 3/12/24 9:07 PM, Chang S. Bae wrote:
On 3/12/2024 2:26 AM, Muhammad Usama Anjum wrote:
How can we check if AMX is available or not?
After a successful check_cpuid_xsave(), examining CPUID(eax=0xd, ecx=0) EDX: EAX, which reports the support bits of XCR0, can give assurance of AMX availability. Perhaps, this change is considerable on top of your patch:
static int check_cpuid_xtiledata(void) { uint32_t eax, ebx, ecx, edx; + uint64_t xfeatures;
__cpuid_count(CPUID_LEAF_XSTATE, CPUID_SUBLEAF_XSTATE_USER, eax, ebx, ecx, edx);
+ xfeatures = eax + ((uint64_t)edx << 32); + if ((xfeatures & XFEATURE_MASK_XTILE) != XFEATURE_MASK_XTILE) { + ksft_print_msg("no AMX support\n"); + return -1; + }
Now I understand. I'll use this snippet to skip the test and fail the test if eax and ebx are zero. Sorry, I've lesser knowledge on the x86's extensions side. I'll build up the knowledge.
Nevertheless, I still believe that using arch_prctl(ARCH_GET_XCOMP_SUPP, ..) remains a simple and legitimate approach for directly checking dynamic feature support from the kernel: https://docs.kernel.org/arch/x86/xstate.html
I'll use arch_prtcl in another patch on top of the current patch in v2.
Thanks, Chang
On Thu, Mar 07, 2024 at 11:37:22PM +0500, Muhammad Usama Anjum wrote:
diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c index 215b8150b7cca..c0f016f45ee17 100644 --- a/tools/testing/selftests/x86/lam.c +++ b/tools/testing/selftests/x86/lam.c @@ -1183,7 +1183,7 @@ int main(int argc, char **argv) if (!cpu_has_lam()) { ksft_print_msg("Unsupported LAM feature!\n");
return -1;
}return KSFT_SKIP;
while ((c = getopt(argc, argv, "ht:")) != -1) {
Looks good to me:
Acked-by: Kirill A. Shutemov kirill.shutemov@linux.intel.com
On 3/8/2024 2:37 AM, Muhammad Usama Anjum wrote:
diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c index 215b8150b7cca..c0f016f45ee17 100644 --- a/tools/testing/selftests/x86/lam.c +++ b/tools/testing/selftests/x86/lam.c @@ -1183,7 +1183,7 @@ int main(int argc, char **argv) if (!cpu_has_lam()) { ksft_print_msg("Unsupported LAM feature!\n");
return -1;
}return KSFT_SKIP;
while ((c = getopt(argc, argv, "ht:")) != -1) {
Looks good to me. Reviewed-by: Binbin Wu binbin.wu@linux.intel.com
linux-kselftest-mirror@lists.linaro.org