Hi Barry,
Thanks for the wonderful test program.
I have also used other swap test programs as well. A lot of those tests are harder to setup up and run.
This test is very quick and simple to run. It can test some hard to hit corner cases for me.
I am able to reproduce the warning and the kernel oops with this test program. So for me, I am using it as a functional test that my allocator did not produce a crash. In that regard, it definitely provides value as a function test.
Having a fall percentage output is fine, as long as we don't fail the test based on performance number.
I am also fine with moving the test to under tools/mm etc. I see good value to include the test in the tree one way or the other.
On Wed, Jun 19, 2024 at 5:27 PM Barry Song 21cnbao@gmail.com wrote:
From: Barry Song v-songbaohua@oppo.com
Both Ryan and Chris have been utilizing the small test program to aid in debugging and identifying issues with swap entry allocation. While a real or intricate workload might be more suitable for assessing the correctness and effectiveness of the swap allocation policy, a small test program presents a simpler means of understanding the problem and initially verifying the improvements being made.
Let's endeavor to integrate it into the self-test suite. Although it presently only accommodates 64KB and 4KB, I'm optimistic that we can expand its capabilities to support multiple sizes and simulate more complex systems in the future as required.
Signed-off-by: Barry Song v-songbaohua@oppo.com
tools/testing/selftests/mm/Makefile | 1 + .../selftests/mm/thp_swap_allocator_test.c | 192 ++++++++++++++++++ 2 files changed, 193 insertions(+)
Assume we want to keep it as selftest. You did not add your test in run_vmtests.sh.
You might need something like this:
--- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -418,6 +418,14 @@ CATEGORY="thp" run_test ./khugepaged -s 2
CATEGORY="thp" run_test ./transhuge-stress -d 20
+# config and swapon zram here. + +CATEGORY="thp" run_test ./thp_swap_allocator_test + +CATEGORY="thp" run_test ./thp_swap_allocator_test -s + +# swapoff zram here. + # Try to create XFS if not provided if [ -z "${SPLIT_HUGE_PAGE_TEST_XFS_PATH}" ]; then if test_selected "thp"; then
You can use the following XFS test as an example of how to setup the zram swap. XFS uses file system mount, you use swapon.
Also you need to update the usage string in run_vmtests.sh.
BTW, here is how I invoke the test runs:
kselftest_override_timeout=500 make -C tools/testing/selftests TARGETS=mm run_tests
The time out is not for this test, it is for some other test before the thp_swap which exit run_vmtests.sh before hitting thp_swap. I am running in a VM so it is slower than native machine.
create mode 100644 tools/testing/selftests/mm/thp_swap_allocator_test.c
diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index e1aa09ddaa3d..64164ad66835 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -65,6 +65,7 @@ TEST_GEN_FILES += mseal_test TEST_GEN_FILES += seal_elf TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += pagemap_ioctl +TEST_GEN_FILES += thp_swap_allocator_test TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += uffd-stress diff --git a/tools/testing/selftests/mm/thp_swap_allocator_test.c b/tools/testing/selftests/mm/thp_swap_allocator_test.c new file mode 100644 index 000000000000..4443a906d0f8 --- /dev/null +++ b/tools/testing/selftests/mm/thp_swap_allocator_test.c @@ -0,0 +1,192 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/*
- thp_swap_allocator_test
- The purpose of this test program is helping check if THP swpout
- can correctly get swap slots to swap out as a whole instead of
- being split. It randomly releases swap entries through madvise
- DONTNEED and do swapout on two memory areas: a memory area for
- 64KB THP and the other area for small folios. The second memory
- can be enabled by "-s".
- Before running the program, we need to setup a zRAM or similar
- swap device by:
- echo lzo > /sys/block/zram0/comp_algorithm
- echo 64M > /sys/block/zram0/disksize
- echo never > /sys/kernel/mm/transparent_hugepage/hugepages-2048kB/enabled
- echo always > /sys/kernel/mm/transparent_hugepage/hugepages-64kB/enabled
- mkswap /dev/zram0
- swapon /dev/zram0
This setup needs to go into run_vmtest.sh as well.
Also tear it down after the test.
Chris
- The expected result should be 0% anon swpout fallback ratio w/ or
- w/o "-s".
- Author(s): Barry Song v-songbaohua@oppo.com
- */
+#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <sys/mman.h> +#include <errno.h> +#include <time.h>
+#define MEMSIZE_MTHP (60 * 1024 * 1024) +#define MEMSIZE_SMALLFOLIO (1 * 1024 * 1024) +#define ALIGNMENT_MTHP (64 * 1024) +#define ALIGNMENT_SMALLFOLIO (4 * 1024) +#define TOTAL_DONTNEED_MTHP (16 * 1024 * 1024) +#define TOTAL_DONTNEED_SMALLFOLIO (768 * 1024) +#define MTHP_FOLIO_SIZE (64 * 1024)
+#define SWPOUT_PATH \
"/sys/kernel/mm/transparent_hugepage/hugepages-64kB/stats/swpout"
+#define SWPOUT_FALLBACK_PATH \
"/sys/kernel/mm/transparent_hugepage/hugepages-64kB/stats/swpout_fallback"
+static void *aligned_alloc_mem(size_t size, size_t alignment) +{
void *mem = NULL;
if (posix_memalign(&mem, alignment, size) != 0) {
perror("posix_memalign");
return NULL;
}
return mem;
+}
+static void random_madvise_dontneed(void *mem, size_t mem_size,
size_t align_size, size_t total_dontneed_size)
+{
size_t num_pages = total_dontneed_size / align_size;
size_t i;
size_t offset;
void *addr;
for (i = 0; i < num_pages; ++i) {
offset = (rand() % (mem_size / align_size)) * align_size;
addr = (char *)mem + offset;
if (madvise(addr, align_size, MADV_DONTNEED) != 0)
perror("madvise dontneed");
memset(addr, 0x11, align_size);
}
+}
+static unsigned long read_stat(const char *path) +{
FILE *file;
unsigned long value;
file = fopen(path, "r");
if (!file) {
perror("fopen");
return 0;
}
if (fscanf(file, "%lu", &value) != 1) {
perror("fscanf");
fclose(file);
return 0;
}
fclose(file);
return value;
+}
+int main(int argc, char *argv[]) +{
int use_small_folio = 0;
int i;
void *mem1 = aligned_alloc_mem(MEMSIZE_MTHP, ALIGNMENT_MTHP);
void *mem2 = NULL;
if (mem1 == NULL) {
fprintf(stderr, "Failed to allocate 60MB memory\n");
return EXIT_FAILURE;
}
if (madvise(mem1, MEMSIZE_MTHP, MADV_HUGEPAGE) != 0) {
perror("madvise hugepage for mem1");
free(mem1);
return EXIT_FAILURE;
}
for (i = 1; i < argc; ++i) {
if (strcmp(argv[i], "-s") == 0)
use_small_folio = 1;
}
if (use_small_folio) {
mem2 = aligned_alloc_mem(MEMSIZE_SMALLFOLIO, ALIGNMENT_MTHP);
if (mem2 == NULL) {
fprintf(stderr, "Failed to allocate 1MB memory\n");
free(mem1);
return EXIT_FAILURE;
}
if (madvise(mem2, MEMSIZE_SMALLFOLIO, MADV_NOHUGEPAGE) != 0) {
perror("madvise nohugepage for mem2");
free(mem1);
free(mem2);
return EXIT_FAILURE;
}
}
for (i = 0; i < 100; ++i) {
unsigned long initial_swpout;
unsigned long initial_swpout_fallback;
unsigned long final_swpout;
unsigned long final_swpout_fallback;
unsigned long swpout_inc;
unsigned long swpout_fallback_inc;
double fallback_percentage;
initial_swpout = read_stat(SWPOUT_PATH);
initial_swpout_fallback = read_stat(SWPOUT_FALLBACK_PATH);
random_madvise_dontneed(mem1, MEMSIZE_MTHP, ALIGNMENT_MTHP,
TOTAL_DONTNEED_MTHP);
if (use_small_folio) {
random_madvise_dontneed(mem2, MEMSIZE_SMALLFOLIO,
ALIGNMENT_SMALLFOLIO,
TOTAL_DONTNEED_SMALLFOLIO);
}
if (madvise(mem1, MEMSIZE_MTHP, MADV_PAGEOUT) != 0) {
perror("madvise pageout for mem1");
free(mem1);
if (mem2 != NULL)
free(mem2);
return EXIT_FAILURE;
}
if (use_small_folio) {
if (madvise(mem2, MEMSIZE_SMALLFOLIO, MADV_PAGEOUT) != 0) {
perror("madvise pageout for mem2");
free(mem1);
free(mem2);
return EXIT_FAILURE;
}
}
final_swpout = read_stat(SWPOUT_PATH);
final_swpout_fallback = read_stat(SWPOUT_FALLBACK_PATH);
swpout_inc = final_swpout - initial_swpout;
swpout_fallback_inc = final_swpout_fallback - initial_swpout_fallback;
fallback_percentage = (double)swpout_fallback_inc /
(swpout_fallback_inc + swpout_inc) * 100;
printf("Iteration %d: swpout inc: %lu, swpout fallback inc: %lu, Fallback percentage: %.2f%%\n",
i + 1, swpout_inc, swpout_fallback_inc, fallback_percentage);
Chris
}
free(mem1);
if (mem2 != NULL)
free(mem2);
return EXIT_SUCCESS;
+}
2.34.1