This is a follow-up to [1]: [PATCH v9 0/3] mm: process/cgroup ksm support
which is now in mm-stable. Ideally we'd get at least patch #1 into the same kernel release as [1], so the semantics of setting PR_SET_MEMORY_MERGE=0 are unchanged between kernel versions.
(1) Make PR_SET_MEMORY_MERGE=0 unmerge pages like setting MADV_UNMERGEABLE does, (2) add a selftest for it and (3) factor out disabling of KSM from s390/gmap code.
v1 -> v2: - "mm/ksm: unmerge and clear VM_MERGEABLE when setting PR_SET_MEMORY_MERGE=0" -> Cleanup one if/else -> Add doc for ksm_disable_merge_any() - Added ACKs
[1] https://lkml.kernel.org/r/20230418051342.1919757-1-shr@devkernel.io
Cc: Andrew Morton akpm@linux-foundation.org Cc: Stefan Roesch shr@devkernel.io Cc: Rik van Riel riel@surriel.com Cc: Johannes Weiner hannes@cmpxchg.org Cc: Michal Hocko mhocko@suse.com Cc: Christian Borntraeger borntraeger@linux.ibm.com Cc: Janosch Frank frankja@linux.ibm.com Cc: Claudio Imbrenda imbrenda@linux.ibm.com Cc: Heiko Carstens hca@linux.ibm.com Cc: Vasily Gorbik gor@linux.ibm.com Cc: Sven Schnelle svens@linux.ibm.com Cc: Shuah Khan shuah@kernel.org
David Hildenbrand (3): mm/ksm: unmerge and clear VM_MERGEABLE when setting PR_SET_MEMORY_MERGE=0 selftests/ksm: ksm_functional_tests: add prctl unmerge test mm/ksm: move disabling KSM from s390/gmap code to KSM code
arch/s390/mm/gmap.c | 20 +----- include/linux/ksm.h | 7 ++ kernel/sys.c | 12 +--- mm/ksm.c | 70 +++++++++++++++++++ .../selftests/mm/ksm_functional_tests.c | 46 ++++++++++-- 5 files changed, 121 insertions(+), 34 deletions(-)
Let's unmerge any KSM pages when setting PR_SET_MEMORY_MERGE=0, and clear the VM_MERGEABLE flag from all VMAs -- just like KSM would. Of course, only do that if we previously set PR_SET_MEMORY_MERGE=1.
Acked-by: Stefan Roesch shr@devkernel.io Signed-off-by: David Hildenbrand david@redhat.com --- include/linux/ksm.h | 1 + kernel/sys.c | 12 +++------ mm/ksm.c | 59 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 9 deletions(-)
diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 7a9b76fb6c3f..429efa6ff4ae 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -21,6 +21,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
void ksm_add_vma(struct vm_area_struct *vma); int ksm_enable_merge_any(struct mm_struct *mm); +int ksm_disable_merge_any(struct mm_struct *mm);
int __ksm_enter(struct mm_struct *mm); void __ksm_exit(struct mm_struct *mm); diff --git a/kernel/sys.c b/kernel/sys.c index 72cdb16e2636..339fee3eff6a 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2695,16 +2695,10 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, if (mmap_write_lock_killable(me->mm)) return -EINTR;
- if (arg2) { + if (arg2) error = ksm_enable_merge_any(me->mm); - } else { - /* - * TODO: we might want disable KSM on all VMAs and - * trigger unsharing to completely disable KSM. - */ - clear_bit(MMF_VM_MERGE_ANY, &me->mm->flags); - error = 0; - } + else + error = ksm_disable_merge_any(me->mm); mmap_write_unlock(me->mm); break; case PR_GET_MEMORY_MERGE: diff --git a/mm/ksm.c b/mm/ksm.c index 9e48258985d2..823bb3475a68 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -2520,6 +2520,22 @@ static void __ksm_add_vma(struct vm_area_struct *vma) vm_flags_set(vma, VM_MERGEABLE); }
+static int __ksm_del_vma(struct vm_area_struct *vma) +{ + int err; + + if (!(vma->vm_flags & VM_MERGEABLE)) + return 0; + + if (vma->anon_vma) { + err = unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end); + if (err) + return err; + } + + vm_flags_clear(vma, VM_MERGEABLE); + return 0; +} /** * ksm_add_vma - Mark vma as mergeable if compatible * @@ -2542,6 +2558,20 @@ static void ksm_add_vmas(struct mm_struct *mm) __ksm_add_vma(vma); }
+static int ksm_del_vmas(struct mm_struct *mm) +{ + struct vm_area_struct *vma; + int err; + + VMA_ITERATOR(vmi, mm, 0); + for_each_vma(vmi, vma) { + err = __ksm_del_vma(vma); + if (err) + return err; + } + return 0; +} + /** * ksm_enable_merge_any - Add mm to mm ksm list and enable merging on all * compatible VMA's @@ -2569,6 +2599,35 @@ int ksm_enable_merge_any(struct mm_struct *mm) return 0; }
+/** + * ksm_disable_merge_any - Disable merging on all compatible VMA's of the mm, + * previously enabled via ksm_enable_merge_any(). + * + * Disabling merging implies unmerging any merged pages, like setting + * MADV_UNMERGEABLE would. If unmerging fails, the whole operation fails and + * merging on all compatible VMA's remains enabled. + * + * @mm: Pointer to mm + * + * Returns 0 on success, otherwise error code + */ +int ksm_disable_merge_any(struct mm_struct *mm) +{ + int err; + + if (!test_bit(MMF_VM_MERGE_ANY, &mm->flags)) + return 0; + + err = ksm_del_vmas(mm); + if (err) { + ksm_add_vmas(mm); + return err; + } + + clear_bit(MMF_VM_MERGE_ANY, &mm->flags); + return 0; +} + int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags) {
Let's test whether setting PR_SET_MEMORY_MERGE to 0 after setting it to 1 will unmerge pages, similar to how setting MADV_UNMERGEABLE after setting MADV_MERGEABLE would.
Acked-by: Stefan Roesch shr@devkernel.io Signed-off-by: David Hildenbrand david@redhat.com --- .../selftests/mm/ksm_functional_tests.c | 46 ++++++++++++++++--- 1 file changed, 40 insertions(+), 6 deletions(-)
diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c index 7bc9fc17c9f0..26853badae70 100644 --- a/tools/testing/selftests/mm/ksm_functional_tests.c +++ b/tools/testing/selftests/mm/ksm_functional_tests.c @@ -91,9 +91,10 @@ static int ksm_merge(void) return 0; }
-static char *mmap_and_merge_range(char val, unsigned long size) +static char *mmap_and_merge_range(char val, unsigned long size, bool use_prctl) { char *map; + int ret;
map = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0); @@ -110,7 +111,17 @@ static char *mmap_and_merge_range(char val, unsigned long size)
/* Make sure each page contains the same values to merge them. */ memset(map, val, size); - if (madvise(map, size, MADV_MERGEABLE)) { + + if (use_prctl) { + ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0); + if (ret < 0 && errno == EINVAL) { + ksft_test_result_skip("PR_SET_MEMORY_MERGE not supported\n"); + goto unmap; + } else if (ret) { + ksft_test_result_fail("PR_SET_MEMORY_MERGE=1 failed\n"); + goto unmap; + } + } else if (madvise(map, size, MADV_MERGEABLE)) { ksft_test_result_fail("MADV_MERGEABLE failed\n"); goto unmap; } @@ -133,7 +144,7 @@ static void test_unmerge(void)
ksft_print_msg("[RUN] %s\n", __func__);
- map = mmap_and_merge_range(0xcf, size); + map = mmap_and_merge_range(0xcf, size, false); if (map == MAP_FAILED) return;
@@ -155,7 +166,7 @@ static void test_unmerge_discarded(void)
ksft_print_msg("[RUN] %s\n", __func__);
- map = mmap_and_merge_range(0xcf, size); + map = mmap_and_merge_range(0xcf, size, false); if (map == MAP_FAILED) return;
@@ -187,7 +198,7 @@ static void test_unmerge_uffd_wp(void)
ksft_print_msg("[RUN] %s\n", __func__);
- map = mmap_and_merge_range(0xcf, size); + map = mmap_and_merge_range(0xcf, size, false); if (map == MAP_FAILED) return;
@@ -323,9 +334,31 @@ static void test_prctl_fork(void) ksft_test_result_pass("PR_SET_MEMORY_MERGE value is inherited\n"); }
+static void test_prctl_unmerge(void) +{ + const unsigned int size = 2 * MiB; + char *map; + + ksft_print_msg("[RUN] %s\n", __func__); + + map = mmap_and_merge_range(0xcf, size, true); + if (map == MAP_FAILED) + return; + + if (prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0)) { + ksft_test_result_fail("PR_SET_MEMORY_MERGE=0 failed\n"); + goto unmap; + } + + ksft_test_result(!range_maps_duplicates(map, size), + "Pages were unmerged\n"); +unmap: + munmap(map, size); +} + int main(int argc, char **argv) { - unsigned int tests = 4; + unsigned int tests = 5; int err;
#ifdef __NR_userfaultfd @@ -355,6 +388,7 @@ int main(int argc, char **argv)
test_prctl(); test_prctl_fork(); + test_prctl_unmerge();
err = ksft_get_fail_cnt(); if (err)
Let's factor out actual disabling of KSM. The existing "mm->def_flags &= ~VM_MERGEABLE;" was essentially a NOP and can be dropped, because def_flags should never include VM_MERGEABLE. Note that we don't currently prevent re-enabling KSM.
This should now be faster in case KSM was never enabled, because we only conditionally iterate all VMAs. Further, it certainly looks cleaner.
Acked-by: Janosch Frank frankja@linux.ibm.com Acked-by: Stefan Roesch shr@devkernel.io Signed-off-by: David Hildenbrand david@redhat.com --- arch/s390/mm/gmap.c | 20 +------------------- include/linux/ksm.h | 6 ++++++ mm/ksm.c | 11 +++++++++++ 3 files changed, 18 insertions(+), 19 deletions(-)
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 0949811761e6..dfe905c7bd8e 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2585,30 +2585,12 @@ EXPORT_SYMBOL_GPL(s390_enable_sie);
int gmap_mark_unmergeable(void) { - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - unsigned long vm_flags; - int ret; - VMA_ITERATOR(vmi, mm, 0); - /* * Make sure to disable KSM (if enabled for the whole process or * individual VMAs). Note that nothing currently hinders user space * from re-enabling it. */ - clear_bit(MMF_VM_MERGE_ANY, &mm->flags); - - for_each_vma(vmi, vma) { - /* Copy vm_flags to avoid partial modifications in ksm_madvise */ - vm_flags = vma->vm_flags; - ret = ksm_madvise(vma, vma->vm_start, vma->vm_end, - MADV_UNMERGEABLE, &vm_flags); - if (ret) - return ret; - vm_flags_reset(vma, vm_flags); - } - mm->def_flags &= ~VM_MERGEABLE; - return 0; + return ksm_disable(current->mm); } EXPORT_SYMBOL_GPL(gmap_mark_unmergeable);
diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 429efa6ff4ae..899a314bc487 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -22,6 +22,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start, void ksm_add_vma(struct vm_area_struct *vma); int ksm_enable_merge_any(struct mm_struct *mm); int ksm_disable_merge_any(struct mm_struct *mm); +int ksm_disable(struct mm_struct *mm);
int __ksm_enter(struct mm_struct *mm); void __ksm_exit(struct mm_struct *mm); @@ -80,6 +81,11 @@ static inline void ksm_add_vma(struct vm_area_struct *vma) { }
+static inline int ksm_disable(struct mm_struct *mm) +{ + return 0; +} + static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) { return 0; diff --git a/mm/ksm.c b/mm/ksm.c index 823bb3475a68..0156bded3a66 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -2628,6 +2628,17 @@ int ksm_disable_merge_any(struct mm_struct *mm) return 0; }
+int ksm_disable(struct mm_struct *mm) +{ + mmap_assert_write_locked(mm); + + if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) + return 0; + if (test_bit(MMF_VM_MERGE_ANY, &mm->flags)) + return ksm_disable_merge_any(mm); + return ksm_del_vmas(mm); +} + int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags) {
On Sat, 22 Apr 2023 22:54:17 +0200 David Hildenbrand david@redhat.com wrote:
This is a follow-up to [1]: [PATCH v9 0/3] mm: process/cgroup ksm support
which is now in mm-stable. Ideally we'd get at least patch #1 into the same kernel release as [1], so the semantics of setting PR_SET_MEMORY_MERGE=0 are unchanged between kernel versions.
Ack. I'll prepare a second followon MM batch for Linus latish in the merge window for material such as this.
linux-kselftest-mirror@lists.linaro.org