On 16 Mar 2021, at 19:18, Yang Shi wrote:
On Mon, Mar 15, 2021 at 1:34 PM Zi Yan zi.yan@sent.com wrote:
From: Zi Yan ziy@nvidia.com
Further extend <debugfs>/split_huge_pages to accept "<path>,<off_start>,<off_end>" for file-backed THP split tests since tmpfs may have file backed by THP that mapped nowhere.
Update selftest program to test file-backed THP split too.
Suggested-by: Kirill A. Shutemov kirill.shutemov@linux.intel.com Signed-off-by: Zi Yan ziy@nvidia.com
mm/huge_memory.c | 95 ++++++++++++++++++- .../selftests/vm/split_huge_page_test.c | 79 ++++++++++++++- 2 files changed, 166 insertions(+), 8 deletions(-)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 3bfee54e2cd0..da91ee97d944 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3043,12 +3043,72 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start, return ret; }
+static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start,
pgoff_t off_end)+{
struct filename *file;struct file *candidate;struct address_space *mapping;int ret = -EINVAL;pgoff_t off_cur;unsigned long total = 0, split = 0;file = getname_kernel(file_path);if (IS_ERR(file))return ret;candidate = file_open_name(file, O_RDONLY, 0);if (IS_ERR(candidate))goto out;pr_info("split file-backed THPs in file: %s, offset: [0x%lx - 0x%lx]\n",file_path, off_start, off_end);mapping = candidate->f_mapping;for (off_cur = off_start; off_cur < off_end;) {struct page *fpage = pagecache_get_page(mapping, off_cur,FGP_ENTRY | FGP_HEAD, 0);if (xa_is_value(fpage) || !fpage) {Why do you have FGP_ENTRY? It seems it would return page instead of NULL if page is value. So I think you could remove FGP_ENTRY and xa_is_value() check as well.
The comment on FGP_ENTRY says “If there is a shadow/swap/DAX entry, return it instead of allocating a new page to replace it”. I do not think we want to allocate new pages here. I mostly follow the use of pagecache_get_page() in shmem_getpage_gfp without swapin or allocating new pages.
off_cur += PAGE_SIZE;continue;}if (!is_transparent_hugepage(fpage)) {off_cur += PAGE_SIZE;goto next;}total++;off_cur = fpage->index + thp_size(fpage);if (!trylock_page(fpage))goto next;if (!split_huge_page(fpage))split++;unlock_page(fpage);+next:
put_page(fpage);}filp_close(candidate, NULL);ret = 0;pr_info("%lu of %lu file-backed THP split\n", split, total);+out:
putname(file);return ret;+}
static ssize_t split_huge_pages_write(struct file *file, const char __user *buf, size_t count, loff_t *ppops) { static DEFINE_MUTEX(mutex); ssize_t ret;
char input_buf[80]; /* hold pid, start_vaddr, end_vaddr */
/* hold pid, start_vaddr, end_vaddr or file_path, off_start, off_end */char input_buf[MAX_INPUT];I didn't find where MAX_INPUT is defined in your patch. Just saw include/uapi/linux/limits.h have it defined. Is it the one you really refer to?
Yeah, I want to use 255 as the max input size and find MAX_INPUT. From your comment, I think it is better to define a MACRO here explicitly.
int pid; unsigned long vaddr_start, vaddr_end;@@ -3058,11 +3118,40 @@ static ssize_t split_huge_pages_write(struct file *file, const char __user *buf,
ret = -EFAULT;
memset(input_buf, 0, 80);
memset(input_buf, 0, MAX_INPUT); if (copy_from_user(input_buf, buf, min_t(size_t, count, 80))) goto out;
input_buf[79] = '\0';
input_buf[MAX_INPUT - 1] = '\0';if (input_buf[0] == '/') {char *tok;char *buf = input_buf;char file_path[MAX_INPUT];pgoff_t off_start = 0, off_end = 0;size_t input_len = strlen(input_buf);tok = strsep(&buf, ",");if (tok) {strncpy(file_path, tok, MAX_INPUT);} else {ret = -EINVAL;goto out;}ret = sscanf(buf, "0x%lx,0x%lx", &off_start, &off_end);if (ret != 2) {pr_info("ret: %ld\n", ret);ret = -EINVAL;goto out;}ret = split_huge_pages_in_file(file_path, off_start, off_end);if (!ret)ret = input_len;goto out;}ret = sscanf(input_buf, "%d,0x%lx,0x%lx", &pid, &vaddr_start, &vaddr_end); if (ret == 1 && pid == 1) { split_huge_pages_all();diff --git a/tools/testing/selftests/vm/split_huge_page_test.c b/tools/testing/selftests/vm/split_huge_page_test.c index 9f33ddbb3182..0202702f7eda 100644 --- a/tools/testing/selftests/vm/split_huge_page_test.c +++ b/tools/testing/selftests/vm/split_huge_page_test.c @@ -7,11 +7,13 @@ #define _GNU_SOURCE #include <stdio.h> #include <stdlib.h> +#include <stdarg.h> #include <unistd.h> #include <inttypes.h> #include <string.h> #include <fcntl.h> #include <sys/mman.h> +#include <sys/mount.h> #include <malloc.h> #include <stdbool.h>
@@ -24,6 +26,9 @@ uint64_t pmd_pagesize; #define SMAP_PATH "/proc/self/smaps" #define INPUT_MAX 80
+#define PID_FMT "%d,0x%lx,0x%lx" +#define PATH_FMT "%s,0x%lx,0x%lx"
#define PFN_MASK ((1UL<<55)-1) #define KPF_THP (1UL<<22)
@@ -87,13 +92,16 @@ static int write_file(const char *path, const char *buf, size_t buflen) return (unsigned int) numwritten; }
-static void write_debugfs(int pid, uint64_t vaddr_start, uint64_t vaddr_end) +static void write_debugfs(const char *fmt, ...) { char input[INPUT_MAX]; int ret;
va_list argp;va_start(argp, fmt);ret = vsnprintf(input, INPUT_MAX, fmt, argp);va_end(argp);
ret = snprintf(input, INPUT_MAX, "%d,0x%lx,0x%lx", pid, vaddr_start,vaddr_end); if (ret >= INPUT_MAX) { printf("%s: Debugfs input is too long\n", __func__); exit(EXIT_FAILURE);@@ -178,7 +186,8 @@ void split_pmd_thp(void) }
/* split all THPs */
write_debugfs(getpid(), (uint64_t)one_page, (uint64_t)one_page + len);
write_debugfs(PID_FMT, getpid(), (uint64_t)one_page,(uint64_t)one_page + len); for (i = 0; i < len; i++) if (one_page[i] != (char)i) {@@ -269,7 +278,7 @@ void split_pte_mapped_thp(void) }
/* split all remapped THPs */
write_debugfs(getpid(), (uint64_t)pte_mapped,
write_debugfs(PID_FMT, getpid(), (uint64_t)pte_mapped, (uint64_t)pte_mapped + pagesize * 4); /* smap does not show THPs after mremap, use kpageflags instead */@@ -295,6 +304,65 @@ void split_pte_mapped_thp(void) close(kpageflags_fd); }
+void split_file_backed_thp(void) +{
int status;int fd;ssize_t num_written;char tmpfs_template[] = "/tmp/thp_split_XXXXXX";const char *tmpfs_loc = mkdtemp(tmpfs_template);char testfile[INPUT_MAX];status = mount("tmpfs", tmpfs_loc, "tmpfs", 0, "huge=always,size=4m");if (status) {printf("Unable to create a tmpfs for testing\n");exit(EXIT_FAILURE);}status = snprintf(testfile, INPUT_MAX, "%s/thp_file", tmpfs_loc);if (status >= INPUT_MAX) {printf("Fail to create file-backed THP split testing file\n");goto cleanup;}fd = open(testfile, O_CREAT|O_WRONLY);if (fd == -1) {perror("Cannot open testing file\n");goto cleanup;}/* write something to the file, so a file-backed THP can be allocated */num_written = write(fd, tmpfs_loc, sizeof(tmpfs_loc));close(fd);if (num_written < 1) {printf("Fail to write data to testing file\n");goto cleanup;}/* split the file-backed THP */write_debugfs(PATH_FMT, testfile, 0, 1024);status = unlink(testfile);if (status)perror("Cannot remove testing file\n");+cleanup:
status = umount(tmpfs_loc);if (status) {printf("Unable to umount %s\n", tmpfs_loc);exit(EXIT_FAILURE);}status = rmdir(tmpfs_loc);if (status) {perror("cannot remove tmp dir");exit(EXIT_FAILURE);}printf("file-backed THP split test done, please check dmesg for more information\n");+}
int main(int argc, char **argv) { if (geteuid() != 0) { @@ -308,6 +376,7 @@ int main(int argc, char **argv)
split_pmd_thp(); split_pte_mapped_thp();
split_file_backed_thp(); return 0;}
2.30.1
Thanks for the comments. :)
— Best Regards, Yan Zi