Due to several issues which are unlikely to be fixed in the near future, the access_tracking_perf_test sanity check for how many pages are still clean after an iteration is not reliable when NUMA balancing is active.
This patch series refactors this test to skip this check by default automatically.
V2: adopted Sean's suggestions.
Best regards, Maxim Levitsky
Maxim Levitsky (1): KVM: selftests: access_tracking_perf_test: add option to skip the sanity check
Sean Christopherson (1): KVM: selftests: Extract guts of THP accessor to standalone sysfs helpers
.../selftests/kvm/access_tracking_perf_test.c | 33 +++++++++++++-- .../testing/selftests/kvm/include/test_util.h | 1 + tools/testing/selftests/kvm/lib/test_util.c | 42 ++++++++++++++----- 3 files changed, 61 insertions(+), 15 deletions(-)
From: Sean Christopherson seanjc@google.com
Extract the guts of thp_configured() and get_trans_hugepagesz() to standalone helpers so that the core logic can be reused for other sysfs files, e.g. to query numa_balancing.
Opportunistically assert that the initial fscanf() read at least one byte, and add a comment explaining the second call to fscanf().
Signed-off-by: Sean Christopherson seanjc@google.com Signed-off-by: Maxim Levitsky mlevitsk@redhat.com --- tools/testing/selftests/kvm/lib/test_util.c | 35 ++++++++++++++------- 1 file changed, 24 insertions(+), 11 deletions(-)
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 8ed0b74ae837..3dc8538f5d69 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -132,37 +132,50 @@ void print_skip(const char *fmt, ...) puts(", skipping test"); }
-bool thp_configured(void) +static bool test_sysfs_path(const char *path) { - int ret; struct stat statbuf; + int ret;
- ret = stat("/sys/kernel/mm/transparent_hugepage", &statbuf); + ret = stat(path, &statbuf); TEST_ASSERT(ret == 0 || (ret == -1 && errno == ENOENT), - "Error in stating /sys/kernel/mm/transparent_hugepage"); + "Error in stat()ing '%s'", path);
return ret == 0; }
-size_t get_trans_hugepagesz(void) +bool thp_configured(void) +{ + return test_sysfs_path("/sys/kernel/mm/transparent_hugepage"); +} + +static size_t get_sysfs_val(const char *path) { size_t size; FILE *f; int ret;
- TEST_ASSERT(thp_configured(), "THP is not configured in host kernel"); - - f = fopen("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", "r"); - TEST_ASSERT(f != NULL, "Error in opening transparent_hugepage/hpage_pmd_size"); + f = fopen(path, "r"); + TEST_ASSERT(f, "Error opening '%s'", path);
ret = fscanf(f, "%ld", &size); + TEST_ASSERT(ret > 0, "Error reading '%s'", path); + + /* Re-scan the input stream to verify the entire file was read. */ ret = fscanf(f, "%ld", &size); - TEST_ASSERT(ret < 1, "Error reading transparent_hugepage/hpage_pmd_size"); - fclose(f); + TEST_ASSERT(ret < 1, "Error reading '%s'", path);
+ fclose(f); return size; }
+size_t get_trans_hugepagesz(void) +{ + TEST_ASSERT(thp_configured(), "THP is not configured in host kernel"); + + return get_sysfs_val("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"); +} + size_t get_def_hugetlb_pagesz(void) { char buf[64];
Add an option to skip sanity check of number of still idle pages, and set it by default to skip, in case hypervisor or NUMA balancing is detected.
Signed-off-by: Maxim Levitsky mlevitsk@redhat.com --- .../selftests/kvm/access_tracking_perf_test.c | 33 ++++++++++++++++--- .../testing/selftests/kvm/include/test_util.h | 1 + tools/testing/selftests/kvm/lib/test_util.c | 7 ++++ 3 files changed, 37 insertions(+), 4 deletions(-)
diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index 3c7defd34f56..6d50c829f00c 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -65,6 +65,8 @@ static int vcpu_last_completed_iteration[KVM_MAX_VCPUS]; /* Whether to overlap the regions of memory vCPUs access. */ static bool overlap_memory_access;
+static int warn_on_too_many_idle_pages = -1; + struct test_params { /* The backing source for the region of memory. */ enum vm_mem_backing_src_type backing_src; @@ -184,11 +186,10 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm, * are cached and the guest won't see the "idle" bit cleared. */ if (still_idle >= pages / 10) { -#ifdef __x86_64__ - TEST_ASSERT(this_cpu_has(X86_FEATURE_HYPERVISOR), + TEST_ASSERT(warn_on_too_many_idle_pages, "vCPU%d: Too many pages still idle (%lu out of %lu)", vcpu_idx, still_idle, pages); -#endif + printf("WARNING: vCPU%d: Too many pages still idle (%lu out of %lu), " "this will affect performance results.\n", vcpu_idx, still_idle, pages); @@ -342,6 +343,8 @@ static void help(char *name) printf(" -v: specify the number of vCPUs to run.\n"); printf(" -o: Overlap guest memory accesses instead of partitioning\n" " them into a separate region of memory for each vCPU.\n"); + printf(" -w: Skip or force enable the check that after dirtying the guest memory, most (90%%) of \n" + "it is reported as dirty again (0/1)"); backing_src_help("-s"); puts(""); exit(0); @@ -359,7 +362,7 @@ int main(int argc, char *argv[])
guest_modes_append_default();
- while ((opt = getopt(argc, argv, "hm:b:v:os:")) != -1) { + while ((opt = getopt(argc, argv, "hm:b:v:os:w:")) != -1) { switch (opt) { case 'm': guest_modes_cmdline(optarg); @@ -376,6 +379,11 @@ int main(int argc, char *argv[]) case 's': params.backing_src = parse_backing_src_type(optarg); break; + case 'w': + warn_on_too_many_idle_pages = + atoi_non_negative("1 - enable warning, 0 - disable", + optarg); + break; case 'h': default: help(argv[0]); @@ -386,6 +394,23 @@ int main(int argc, char *argv[]) page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR); __TEST_REQUIRE(page_idle_fd >= 0, "CONFIG_IDLE_PAGE_TRACKING is not enabled"); + if (warn_on_too_many_idle_pages == -1) { +#ifdef __x86_64__ + if (this_cpu_has(X86_FEATURE_HYPERVISOR)) { + printf("Skipping idle page count sanity check, because the test is run nested\n"); + warn_on_too_many_idle_pages = 0; + } else +#endif + if (is_numa_balancing_enabled()) { + printf("Skipping idle page count sanity check, because NUMA balance is enabled\n"); + warn_on_too_many_idle_pages = 0; + } else { + warn_on_too_many_idle_pages = 1; + } + } else if (!warn_on_too_many_idle_pages) { + printf("Skipping idle page count sanity check, because this was requested by the user\n"); + } + close(page_idle_fd);
for_each_guest_mode(run_test, ¶ms); diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 3e473058849f..1bc9b0a92427 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -153,6 +153,7 @@ bool is_backing_src_hugetlb(uint32_t i); void backing_src_help(const char *flag); enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name); long get_run_delay(void); +bool is_numa_balancing_enabled(void);
/* * Whether or not the given source type is shared memory (as opposed to diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 3dc8538f5d69..03eb99af9b8d 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -176,6 +176,13 @@ size_t get_trans_hugepagesz(void) return get_sysfs_val("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"); }
+bool is_numa_balancing_enabled(void) +{ + if (!test_sysfs_path("/proc/sys/kernel/numa_balancing")) + return false; + return get_sysfs_val("/proc/sys/kernel/numa_balancing") == 1; +} + size_t get_def_hugetlb_pagesz(void) { char buf[64];
On Mon, Mar 24, 2025 at 6:57 PM Maxim Levitsky mlevitsk@redhat.com wrote:
Add an option to skip sanity check of number of still idle pages, and set it by default to skip, in case hypervisor or NUMA balancing is detected.
Signed-off-by: Maxim Levitsky mlevitsk@redhat.com
Thanks Maxim! I'm still working on a respin of this test with MGLRU integration, like [1]. Sorry it's taking me so long. I'll apply my changes on top of yours.
[1]: https://lore.kernel.org/kvm/20241105184333.2305744-12-jthoughton@google.com/
.../selftests/kvm/access_tracking_perf_test.c | 33 ++++++++++++++++--- .../testing/selftests/kvm/include/test_util.h | 1 + tools/testing/selftests/kvm/lib/test_util.c | 7 ++++ 3 files changed, 37 insertions(+), 4 deletions(-)
diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index 3c7defd34f56..6d50c829f00c 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -65,6 +65,8 @@ static int vcpu_last_completed_iteration[KVM_MAX_VCPUS]; /* Whether to overlap the regions of memory vCPUs access. */ static bool overlap_memory_access;
+static int warn_on_too_many_idle_pages = -1;
struct test_params { /* The backing source for the region of memory. */ enum vm_mem_backing_src_type backing_src; @@ -184,11 +186,10 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm, * are cached and the guest won't see the "idle" bit cleared. */ if (still_idle >= pages / 10) { -#ifdef __x86_64__
TEST_ASSERT(this_cpu_has(X86_FEATURE_HYPERVISOR),
TEST_ASSERT(warn_on_too_many_idle_pages,
I think this assertion is flipped (or how warn_on_too_many_idle_pages is being set is flipped, see below).
"vCPU%d: Too many pages still idle (%lu out of %lu)", vcpu_idx, still_idle, pages);
-#endif
printf("WARNING: vCPU%d: Too many pages still idle (%lu out of %lu), " "this will affect performance results.\n", vcpu_idx, still_idle, pages);
@@ -342,6 +343,8 @@ static void help(char *name) printf(" -v: specify the number of vCPUs to run.\n"); printf(" -o: Overlap guest memory accesses instead of partitioning\n" " them into a separate region of memory for each vCPU.\n");
printf(" -w: Skip or force enable the check that after dirtying the guest memory, most (90%%) of \n"
"it is reported as dirty again (0/1)"); backing_src_help("-s"); puts(""); exit(0);
@@ -359,7 +362,7 @@ int main(int argc, char *argv[])
guest_modes_append_default();
while ((opt = getopt(argc, argv, "hm:b:v:os:")) != -1) {
while ((opt = getopt(argc, argv, "hm:b:v:os:w:")) != -1) { switch (opt) { case 'm': guest_modes_cmdline(optarg);
@@ -376,6 +379,11 @@ int main(int argc, char *argv[]) case 's': params.backing_src = parse_backing_src_type(optarg); break;
case 'w':
warn_on_too_many_idle_pages =
atoi_non_negative("1 - enable warning, 0 - disable",
optarg);
We still get a "warning" either way, right? Maybe this should be called "fail_on_too_many_idle_pages" (in which case the above assertion is indeed flipped). Or "warn_on_too_many_idle_pages" should mean *only* warn, i.e., *don't* fail, in which case, below we need to flip how we set it below.
break; case 'h': default: help(argv[0]);
@@ -386,6 +394,23 @@ int main(int argc, char *argv[]) page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR); __TEST_REQUIRE(page_idle_fd >= 0, "CONFIG_IDLE_PAGE_TRACKING is not enabled");
if (warn_on_too_many_idle_pages == -1) {
+#ifdef __x86_64__
if (this_cpu_has(X86_FEATURE_HYPERVISOR)) {
printf("Skipping idle page count sanity check, because the test is run nested\n");
warn_on_too_many_idle_pages = 0;
} else
+#endif
if (is_numa_balancing_enabled()) {
printf("Skipping idle page count sanity check, because NUMA balance is enabled\n");
warn_on_too_many_idle_pages = 0;
} else {
warn_on_too_many_idle_pages = 1;
}
} else if (!warn_on_too_many_idle_pages) {
printf("Skipping idle page count sanity check, because this was requested by the user\n");
}
close(page_idle_fd); for_each_guest_mode(run_test, ¶ms);
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 3e473058849f..1bc9b0a92427 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -153,6 +153,7 @@ bool is_backing_src_hugetlb(uint32_t i); void backing_src_help(const char *flag); enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name); long get_run_delay(void); +bool is_numa_balancing_enabled(void);
/*
- Whether or not the given source type is shared memory (as opposed to
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 3dc8538f5d69..03eb99af9b8d 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -176,6 +176,13 @@ size_t get_trans_hugepagesz(void) return get_sysfs_val("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"); }
+bool is_numa_balancing_enabled(void) +{
if (!test_sysfs_path("/proc/sys/kernel/numa_balancing"))
return false;
return get_sysfs_val("/proc/sys/kernel/numa_balancing") == 1;
+}
size_t get_def_hugetlb_pagesz(void) { char buf[64]; -- 2.26.3
On Tue, Mar 25, 2025, James Houghton wrote:
On Mon, Mar 24, 2025 at 6:57 PM Maxim Levitsky mlevitsk@redhat.com wrote:
Add an option to skip sanity check of number of still idle pages, and set it by default to skip, in case hypervisor or NUMA balancing is detected.
Signed-off-by: Maxim Levitsky mlevitsk@redhat.com
Thanks Maxim! I'm still working on a respin of this test with MGLRU integration, like [1]. Sorry it's taking me so long. I'll apply my changes on top of yours.
.../selftests/kvm/access_tracking_perf_test.c | 33 ++++++++++++++++--- .../testing/selftests/kvm/include/test_util.h | 1 + tools/testing/selftests/kvm/lib/test_util.c | 7 ++++ 3 files changed, 37 insertions(+), 4 deletions(-)
diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index 3c7defd34f56..6d50c829f00c 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -65,6 +65,8 @@ static int vcpu_last_completed_iteration[KVM_MAX_VCPUS]; /* Whether to overlap the regions of memory vCPUs access. */ static bool overlap_memory_access;
+static int warn_on_too_many_idle_pages = -1;
struct test_params { /* The backing source for the region of memory. */ enum vm_mem_backing_src_type backing_src; @@ -184,11 +186,10 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm, * are cached and the guest won't see the "idle" bit cleared. */ if (still_idle >= pages / 10) { -#ifdef __x86_64__
TEST_ASSERT(this_cpu_has(X86_FEATURE_HYPERVISOR),
TEST_ASSERT(warn_on_too_many_idle_pages,
I think this assertion is flipped (or how warn_on_too_many_idle_pages is being set is flipped, see below).
"vCPU%d: Too many pages still idle (%lu out of %lu)", vcpu_idx, still_idle, pages);
-#endif
printf("WARNING: vCPU%d: Too many pages still idle (%lu out of %lu), " "this will affect performance results.\n", vcpu_idx, still_idle, pages);
@@ -342,6 +343,8 @@ static void help(char *name) printf(" -v: specify the number of vCPUs to run.\n"); printf(" -o: Overlap guest memory accesses instead of partitioning\n" " them into a separate region of memory for each vCPU.\n");
printf(" -w: Skip or force enable the check that after dirtying the guest memory, most (90%%) of \n"
"it is reported as dirty again (0/1)"); backing_src_help("-s"); puts(""); exit(0);
@@ -359,7 +362,7 @@ int main(int argc, char *argv[])
guest_modes_append_default();
while ((opt = getopt(argc, argv, "hm:b:v:os:")) != -1) {
while ((opt = getopt(argc, argv, "hm:b:v:os:w:")) != -1) { switch (opt) { case 'm': guest_modes_cmdline(optarg);
@@ -376,6 +379,11 @@ int main(int argc, char *argv[]) case 's': params.backing_src = parse_backing_src_type(optarg); break;
case 'w':
warn_on_too_many_idle_pages =
atoi_non_negative("1 - enable warning, 0 - disable",
optarg);
We still get a "warning" either way, right? Maybe this should be called "fail_on_too_many_idle_pages" (in which case the above assertion is indeed flipped). Or "warn_on_too_many_idle_pages" should mean *only* warn, i.e., *don't* fail, in which case, below we need to flip how we set it below.
Agreed. I like the "warn" terminology, Maybe this?
printf(" -w: Control whether the test warns or fails if more than 10%\n" " of pages are still seen as idle/old after accessing guest\n" " memory. >0 == warn only, 0 == fail, <0 == auto. For auto\n" " mode, the test fails by default, but switches to warn only\n" " if NUMA balancing is enabled or the test detects it's running\n" " in a VM.");
And let the user explicitly select auto:
case 'w': warn_only = atoi_paranoid(optarg); break;
Then the auto resolving works as below, and as James pointed out, the assert becomes
TEST_ASSERT(!warn_only, ....);
break; case 'h': default: help(argv[0]);
@@ -386,6 +394,23 @@ int main(int argc, char *argv[]) page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR); __TEST_REQUIRE(page_idle_fd >= 0, "CONFIG_IDLE_PAGE_TRACKING is not enabled");
if (warn_on_too_many_idle_pages == -1) {
+#ifdef __x86_64__
if (this_cpu_has(X86_FEATURE_HYPERVISOR)) {
printf("Skipping idle page count sanity check, because the test is run nested\n");
warn_on_too_many_idle_pages = 0;
} else
+#endif
if (is_numa_balancing_enabled()) {
printf("Skipping idle page count sanity check, because NUMA balance is enabled\n");
warn_on_too_many_idle_pages = 0;
} else {
warn_on_too_many_idle_pages = 1;
}
} else if (!warn_on_too_many_idle_pages) {
printf("Skipping idle page count sanity check, because this was requested by the user\n");
Eh, I vote to omit this. The sanity check is still there, it's just degraded to a warn. I'm not totally against it, just seems superfluous and potentially confusing.
On Wed, Mar 26, 2025 at 11:41 AM Sean Christopherson seanjc@google.com wrote:
On Tue, Mar 25, 2025, James Houghton wrote:
On Mon, Mar 24, 2025 at 6:57 PM Maxim Levitsky mlevitsk@redhat.com wrote:
Add an option to skip sanity check of number of still idle pages, and set it by default to skip, in case hypervisor or NUMA balancing is detected.
Signed-off-by: Maxim Levitsky mlevitsk@redhat.com
Thanks Maxim! I'm still working on a respin of this test with MGLRU integration, like [1]. Sorry it's taking me so long. I'll apply my changes on top of yours.
.../selftests/kvm/access_tracking_perf_test.c | 33 ++++++++++++++++--- .../testing/selftests/kvm/include/test_util.h | 1 + tools/testing/selftests/kvm/lib/test_util.c | 7 ++++ 3 files changed, 37 insertions(+), 4 deletions(-)
diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index 3c7defd34f56..6d50c829f00c 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -65,6 +65,8 @@ static int vcpu_last_completed_iteration[KVM_MAX_VCPUS]; /* Whether to overlap the regions of memory vCPUs access. */ static bool overlap_memory_access;
+static int warn_on_too_many_idle_pages = -1;
struct test_params { /* The backing source for the region of memory. */ enum vm_mem_backing_src_type backing_src; @@ -184,11 +186,10 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm, * are cached and the guest won't see the "idle" bit cleared. */ if (still_idle >= pages / 10) { -#ifdef __x86_64__
TEST_ASSERT(this_cpu_has(X86_FEATURE_HYPERVISOR),
TEST_ASSERT(warn_on_too_many_idle_pages,
I think this assertion is flipped (or how warn_on_too_many_idle_pages is being set is flipped, see below).
"vCPU%d: Too many pages still idle (%lu out of %lu)", vcpu_idx, still_idle, pages);
-#endif
printf("WARNING: vCPU%d: Too many pages still idle (%lu out of %lu), " "this will affect performance results.\n", vcpu_idx, still_idle, pages);
@@ -342,6 +343,8 @@ static void help(char *name) printf(" -v: specify the number of vCPUs to run.\n"); printf(" -o: Overlap guest memory accesses instead of partitioning\n" " them into a separate region of memory for each vCPU.\n");
printf(" -w: Skip or force enable the check that after dirtying the guest memory, most (90%%) of \n"
"it is reported as dirty again (0/1)"); backing_src_help("-s"); puts(""); exit(0);
@@ -359,7 +362,7 @@ int main(int argc, char *argv[])
guest_modes_append_default();
while ((opt = getopt(argc, argv, "hm:b:v:os:")) != -1) {
while ((opt = getopt(argc, argv, "hm:b:v:os:w:")) != -1) { switch (opt) { case 'm': guest_modes_cmdline(optarg);
@@ -376,6 +379,11 @@ int main(int argc, char *argv[]) case 's': params.backing_src = parse_backing_src_type(optarg); break;
case 'w':
warn_on_too_many_idle_pages =
atoi_non_negative("1 - enable warning, 0 - disable",
optarg);
We still get a "warning" either way, right? Maybe this should be called "fail_on_too_many_idle_pages" (in which case the above assertion is indeed flipped). Or "warn_on_too_many_idle_pages" should mean *only* warn, i.e., *don't* fail, in which case, below we need to flip how we set it below.
Agreed. I like the "warn" terminology, Maybe this?
printf(" -w: Control whether the test warns or fails if more than 10%\n" " of pages are still seen as idle/old after accessing guest\n" " memory. >0 == warn only, 0 == fail, <0 == auto. For auto\n" " mode, the test fails by default, but switches to warn only\n" " if NUMA balancing is enabled or the test detects it's running\n" " in a VM.");
LGTM.
And let the user explicitly select auto:
case 'w': warn_only = atoi_paranoid(optarg); break;
Then the auto resolving works as below, and as James pointed out, the assert becomes
TEST_ASSERT(!warn_only, ....);
I think the auto-resolving below needs to be flipped, and the TEST_ASSERT should be for `warn_only`, not `!warn_only`.
If warn_only == 1, the assert should pass.
break; case 'h': default: help(argv[0]);
@@ -386,6 +394,23 @@ int main(int argc, char *argv[]) page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR); __TEST_REQUIRE(page_idle_fd >= 0, "CONFIG_IDLE_PAGE_TRACKING is not enabled");
if (warn_on_too_many_idle_pages == -1) {
+#ifdef __x86_64__
if (this_cpu_has(X86_FEATURE_HYPERVISOR)) {
printf("Skipping idle page count sanity check, because the test is run nested\n");
warn_on_too_many_idle_pages = 0;
} else
+#endif
if (is_numa_balancing_enabled()) {
printf("Skipping idle page count sanity check, because NUMA balance is enabled\n");
warn_on_too_many_idle_pages = 0;
} else {
warn_on_too_many_idle_pages = 1;
}
} else if (!warn_on_too_many_idle_pages) {
printf("Skipping idle page count sanity check, because this was requested by the user\n");
Eh, I vote to omit this. The sanity check is still there, it's just degraded to a warn. I'm not totally against it, just seems superfluous and potentially confusing.
I agree, it's not adding much.
Separately: I've finished the MGLRU version of this test. It uses MGLRU if it is available, and marking pages as idle is much faster when using it. If MGLRU is enabled but otherwise not usable, the test fails, as the idle page bitmap is no longer usable for this test.
I'm happy to post a new version of Maxim's patch with the MGLRU patches too, Maxim, if you're okay with that.
On Wed, Mar 26, 2025, James Houghton wrote:
On Wed, Mar 26, 2025 at 11:41 AM Sean Christopherson seanjc@google.com wrote:
Then the auto resolving works as below, and as James pointed out, the assert becomes
TEST_ASSERT(!warn_only, ....);
I think the auto-resolving below needs to be flipped, and the TEST_ASSERT should be for `warn_only`, not `!warn_only`.
If warn_only == 1, the assert should pass.
/facepalm, yep
break; case 'h': default: help(argv[0]);
@@ -386,6 +394,23 @@ int main(int argc, char *argv[]) page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR); __TEST_REQUIRE(page_idle_fd >= 0, "CONFIG_IDLE_PAGE_TRACKING is not enabled");
if (warn_on_too_many_idle_pages == -1) {
+#ifdef __x86_64__
if (this_cpu_has(X86_FEATURE_HYPERVISOR)) {
printf("Skipping idle page count sanity check, because the test is run nested\n");
warn_on_too_many_idle_pages = 0;
} else
+#endif
if (is_numa_balancing_enabled()) {
printf("Skipping idle page count sanity check, because NUMA balance is enabled\n");
warn_on_too_many_idle_pages = 0;
} else {
warn_on_too_many_idle_pages = 1;
}
} else if (!warn_on_too_many_idle_pages) {
printf("Skipping idle page count sanity check, because this was requested by the user\n");
Eh, I vote to omit this. The sanity check is still there, it's just degraded to a warn. I'm not totally against it, just seems superfluous and potentially confusing.
I agree, it's not adding much.
Separately: I've finished the MGLRU version of this test. It uses MGLRU if it is available, and marking pages as idle is much faster when using it. If MGLRU is enabled but otherwise not usable, the test fails, as the idle page bitmap is no longer usable for this test.
I'm happy to post a new version of Maxim's patch with the MGLRU patches too, Maxim, if you're okay with that.
On Tue, 2025-03-25 at 11:01 -0700, James Houghton wrote:
On Mon, Mar 24, 2025 at 6:57 PM Maxim Levitsky mlevitsk@redhat.com wrote:
Add an option to skip sanity check of number of still idle pages, and set it by default to skip, in case hypervisor or NUMA balancing is detected.
Signed-off-by: Maxim Levitsky mlevitsk@redhat.com
Thanks Maxim! I'm still working on a respin of this test with MGLRU integration, like [1]. Sorry it's taking me so long. I'll apply my changes on top of yours.
.../selftests/kvm/access_tracking_perf_test.c | 33 ++++++++++++++++--- .../testing/selftests/kvm/include/test_util.h | 1 + tools/testing/selftests/kvm/lib/test_util.c | 7 ++++ 3 files changed, 37 insertions(+), 4 deletions(-)
diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index 3c7defd34f56..6d50c829f00c 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -65,6 +65,8 @@ static int vcpu_last_completed_iteration[KVM_MAX_VCPUS]; /* Whether to overlap the regions of memory vCPUs access. */ static bool overlap_memory_access;
+static int warn_on_too_many_idle_pages = -1;
struct test_params { /* The backing source for the region of memory. */ enum vm_mem_backing_src_type backing_src; @@ -184,11 +186,10 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm, * are cached and the guest won't see the "idle" bit cleared. */ if (still_idle >= pages / 10) { -#ifdef __x86_64__
TEST_ASSERT(this_cpu_has(X86_FEATURE_HYPERVISOR),
TEST_ASSERT(warn_on_too_many_idle_pages,
I think this assertion is flipped (or how warn_on_too_many_idle_pages is being set is flipped, see below).
Yes it is no doubt about.
I didn't notice this when I flipped the meaning of the variable as Sean suggested. Thanks!
Best regards, Maxim Levitsky
"vCPU%d: Too many pages still idle (%lu out of %lu)", vcpu_idx, still_idle, pages);
-#endif
printf("WARNING: vCPU%d: Too many pages still idle (%lu out of %lu), " "this will affect performance results.\n", vcpu_idx, still_idle, pages);
@@ -342,6 +343,8 @@ static void help(char *name) printf(" -v: specify the number of vCPUs to run.\n"); printf(" -o: Overlap guest memory accesses instead of partitioning\n" " them into a separate region of memory for each vCPU.\n");
printf(" -w: Skip or force enable the check that after dirtying the guest memory, most (90%%) of \n"
"it is reported as dirty again (0/1)"); backing_src_help("-s"); puts(""); exit(0);
@@ -359,7 +362,7 @@ int main(int argc, char *argv[])
guest_modes_append_default();
while ((opt = getopt(argc, argv, "hm:b:v:os:")) != -1) {
while ((opt = getopt(argc, argv, "hm:b:v:os:w:")) != -1) { switch (opt) { case 'm': guest_modes_cmdline(optarg);
@@ -376,6 +379,11 @@ int main(int argc, char *argv[]) case 's': params.backing_src = parse_backing_src_type(optarg); break;
case 'w':
warn_on_too_many_idle_pages =
atoi_non_negative("1 - enable warning, 0 - disable",
optarg);
We still get a "warning" either way, right? Maybe this should be called "fail_on_too_many_idle_pages" (in which case the above assertion is indeed flipped). Or "warn_on_too_many_idle_pages" should mean *only* warn, i.e., *don't* fail, in which case, below we need to flip how we set it below.
break; case 'h': default: help(argv[0]);
@@ -386,6 +394,23 @@ int main(int argc, char *argv[]) page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR); __TEST_REQUIRE(page_idle_fd >= 0, "CONFIG_IDLE_PAGE_TRACKING is not enabled");
if (warn_on_too_many_idle_pages == -1) {
+#ifdef __x86_64__
if (this_cpu_has(X86_FEATURE_HYPERVISOR)) {
printf("Skipping idle page count sanity check, because the test is run nested\n");
warn_on_too_many_idle_pages = 0;
} else
+#endif
if (is_numa_balancing_enabled()) {
printf("Skipping idle page count sanity check, because NUMA balance is enabled\n");
warn_on_too_many_idle_pages = 0;
} else {
warn_on_too_many_idle_pages = 1;
}
} else if (!warn_on_too_many_idle_pages) {
printf("Skipping idle page count sanity check, because this was requested by the user\n");
}
close(page_idle_fd); for_each_guest_mode(run_test, ¶ms);
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 3e473058849f..1bc9b0a92427 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -153,6 +153,7 @@ bool is_backing_src_hugetlb(uint32_t i); void backing_src_help(const char *flag); enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name); long get_run_delay(void); +bool is_numa_balancing_enabled(void);
/*
- Whether or not the given source type is shared memory (as opposed to
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 3dc8538f5d69..03eb99af9b8d 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -176,6 +176,13 @@ size_t get_trans_hugepagesz(void) return get_sysfs_val("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"); }
+bool is_numa_balancing_enabled(void) +{
if (!test_sysfs_path("/proc/sys/kernel/numa_balancing"))
return false;
return get_sysfs_val("/proc/sys/kernel/numa_balancing") == 1;
+}
size_t get_def_hugetlb_pagesz(void) { char buf[64]; -- 2.26.3
linux-kselftest-mirror@lists.linaro.org