From: Zi Yan ziy@nvidia.com
It is used to test split_huge_page_to_list_to_order for pagecache THPs. Also add test cases for split_huge_page_to_list_to_order via both debugfs and truncating a file.
Signed-off-by: Zi Yan ziy@nvidia.com --- mm/huge_memory.c | 13 +-- .../selftests/vm/split_huge_page_test.c | 102 +++++++++++++++++- 2 files changed, 105 insertions(+), 10 deletions(-)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 88f50da40c9b..b7470607a08b 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2974,7 +2974,7 @@ static ssize_t split_huge_pages_in_range_pid_write(struct file *file, static DEFINE_MUTEX(mutex); ssize_t ret; char input_buf[80]; /* hold pid, start_vaddr, end_vaddr */ - int pid; + int pid, to_order = 0; unsigned long vaddr_start, vaddr_end, addr; nodemask_t task_nodes; struct mm_struct *mm; @@ -2990,8 +2990,9 @@ static ssize_t split_huge_pages_in_range_pid_write(struct file *file, goto out;
input_buf[80] = '\0'; - ret = sscanf(input_buf, "%d,%lx,%lx", &pid, &vaddr_start, &vaddr_end); - if (ret != 3) { + ret = sscanf(input_buf, "%d,%lx,%lx,%d", &pid, &vaddr_start, &vaddr_end, &to_order); + /* cannot split to order-1 THP, which is not possible */ + if ((ret != 3 && ret != 4) || to_order == 1) { ret = -EINVAL; goto out; } @@ -2999,8 +3000,8 @@ static ssize_t split_huge_pages_in_range_pid_write(struct file *file, vaddr_end &= PAGE_MASK;
ret = strlen(input_buf); - pr_debug("split huge pages in pid: %d, vaddr: [%lx - %lx]\n", - pid, vaddr_start, vaddr_end); + pr_debug("split huge pages in pid: %d, vaddr: [%lx - %lx], to order: %d\n", + pid, vaddr_start, vaddr_end, to_order);
mm = find_mm_struct(pid, &task_nodes); if (IS_ERR(mm)) { @@ -3038,7 +3039,7 @@ static ssize_t split_huge_pages_in_range_pid_write(struct file *file, addr += page_size(page) - PAGE_SIZE;
/* reset addr if split fails */ - if (split_huge_page(page)) + if (split_huge_page_to_list_to_order(page, NULL, to_order)) addr -= (page_size(page) - PAGE_SIZE);
unlock_page(page); diff --git a/tools/testing/selftests/vm/split_huge_page_test.c b/tools/testing/selftests/vm/split_huge_page_test.c index c8a32ae9e13a..bcbc5a9d327c 100644 --- a/tools/testing/selftests/vm/split_huge_page_test.c +++ b/tools/testing/selftests/vm/split_huge_page_test.c @@ -16,6 +16,7 @@ #include <sys/wait.h> #include <malloc.h> #include <stdbool.h> +#include <time.h>
#define PAGE_4KB (4096UL) #define PAGE_2MB (512UL*PAGE_4KB) @@ -31,6 +32,7 @@
#define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages_in_range_pid" #define SMAP_PATH "/proc/self/smaps" +#define THP_FS_PATH "/mnt/thp_fs" #define INPUT_MAX 80
static int write_file(const char *path, const char *buf, size_t buflen) @@ -50,13 +52,13 @@ static int write_file(const char *path, const char *buf, size_t buflen) return (unsigned int) numwritten; }
-static void write_debugfs(int pid, uint64_t vaddr_start, uint64_t vaddr_end) +static void write_debugfs(int pid, uint64_t vaddr_start, uint64_t vaddr_end, int order) { char input[INPUT_MAX]; int ret;
- ret = snprintf(input, INPUT_MAX, "%d,%lx,%lx", pid, vaddr_start, - vaddr_end); + ret = snprintf(input, INPUT_MAX, "%d,%lx,%lx,%d", pid, vaddr_start, + vaddr_end, order); if (ret >= INPUT_MAX) { printf("%s: Debugfs input is too long\n", __func__); exit(EXIT_FAILURE); @@ -139,7 +141,7 @@ void split_pmd_thp(void) }
/* split all possible huge pages */ - write_debugfs(getpid(), (uint64_t)one_page, (uint64_t)one_page + len); + write_debugfs(getpid(), (uint64_t)one_page, (uint64_t)one_page + len, 0);
*one_page = 0;
@@ -153,9 +155,101 @@ void split_pmd_thp(void) free(one_page); }
+void create_pagecache_thp_and_fd(size_t fd_size, int *fd, char **addr) +{ + const char testfile[] = THP_FS_PATH "/test"; + size_t i; + int dummy; + + srand(time(NULL)); + + *fd = open(testfile, O_CREAT | O_RDWR, 0664); + + for (i = 0; i < fd_size; i++) { + unsigned char byte = rand(); + + write(*fd, &byte, sizeof(byte)); + } + close(*fd); + sync(); + *fd = open("/proc/sys/vm/drop_caches", O_WRONLY); + if (*fd == -1) { + perror("open drop_caches"); + exit(EXIT_FAILURE); + } + if (write(*fd, "3", 1) != 1) { + perror("write to drop_caches"); + exit(EXIT_FAILURE); + } + close(*fd); + + *fd = open(testfile, O_RDWR); + + *addr = mmap(NULL, fd_size, PROT_READ|PROT_WRITE, MAP_SHARED, *fd, 0); + if (*addr == (char *)-1) { + perror("cannot mmap"); + exit(1); + } + madvise(*addr, fd_size, MADV_HUGEPAGE); + + for (size_t i = 0; i < fd_size; i++) + dummy += *(*addr + i); +} + +void split_thp_in_pagecache_to_order(int order) +{ + int fd; + char *addr; + size_t fd_size = 2 * PAGE_2MB, i; + + create_pagecache_thp_and_fd(fd_size, &fd, &addr); + + printf("split %ld kB pagecache page to order %d ... ", fd_size >> 10, order); + write_debugfs(getpid(), (uint64_t)addr, (uint64_t)addr + fd_size, order); + + for (i = 0; i < fd_size; i++) + *(addr + i) = (char)i; + + close(fd); + printf("done\n"); +} + +void truncate_thp_in_pagecache_to_order(int order) +{ + int fd; + char *addr; + size_t fd_size = 2 * PAGE_2MB, i; + + create_pagecache_thp_and_fd(fd_size, &fd, &addr); + + printf("truncate %ld kB pagecache page to size %lu kB ... ", fd_size >> 10, 4UL << order); + ftruncate(fd, PAGE_4KB << order); + + for (i = 0; i < (PAGE_4KB << order); i++) + *(addr + i) = (char)i; + + close(fd); + printf("done\n"); +} + int main(int argc, char **argv) { + int i; + + if (geteuid() != 0) { + printf("Please run the benchmark as root\n"); + exit(EXIT_FAILURE); + } + split_pmd_thp();
+ for (i = 8; i >= 0; i--) + if (i != 1) + split_thp_in_pagecache_to_order(i); + + for (i = 8; i >= 0; i--) + if (i != 1) + truncate_thp_in_pagecache_to_order(i); + return 0; }