This is a follow-up to [1]: [PATCH v9 0/3] mm: process/cgroup ksm support
which is not in mm-unstable yet (but soon? :) ). I'll be on vacation for ~2 weeks, so sending it out now as reply to [1].
(1) Make PR_SET_MEMORY_MERGE=0 unmerge pages like setting MADV_UNMERGEABLE does, (2) add a selftest for it and (3) factor out disabling of KSM from s390/gmap code.
Cc: Andrew Morton akpm@linux-foundation.org Cc: Stefan Roesch shr@devkernel.io Cc: Rik van Riel riel@surriel.com Cc: Johannes Weiner hannes@cmpxchg.org Cc: Michal Hocko mhocko@suse.com Cc: Christian Borntraeger borntraeger@linux.ibm.com Cc: Janosch Frank frankja@linux.ibm.com Cc: Claudio Imbrenda imbrenda@linux.ibm.com Cc: Heiko Carstens hca@linux.ibm.com Cc: Vasily Gorbik gor@linux.ibm.com Cc: Sven Schnelle svens@linux.ibm.com Cc: Andrew Morton akpm@linux-foundation.org Cc: Shuah Khan shuah@kernel.org
[1] https://lkml.kernel.org/r/20230418051342.1919757-1-shr@devkernel.io
David Hildenbrand (3): mm/ksm: unmerge and clear VM_MERGEABLE when setting PR_SET_MEMORY_MERGE=0 selftests/ksm: ksm_functional_tests: add prctl unmerge test mm/ksm: move disabling KSM from s390/gmap code to KSM code
arch/s390/mm/gmap.c | 20 +------ include/linux/ksm.h | 7 +++ kernel/sys.c | 7 +-- mm/ksm.c | 58 +++++++++++++++++++ .../selftests/mm/ksm_functional_tests.c | 46 +++++++++++++-- 5 files changed, 107 insertions(+), 31 deletions(-)
Let's unmerge any KSM pages when setting PR_SET_MEMORY_MERGE=0, and clear the VM_MERGEABLE flag from all VMAs -- just like KSM would. Of course, only do that if we previously set PR_SET_MEMORY_MERGE=1.
Signed-off-by: David Hildenbrand david@redhat.com --- include/linux/ksm.h | 1 + kernel/sys.c | 7 +------ mm/ksm.c | 47 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 6 deletions(-)
diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 590934bdddcf..7108bc65dc2a 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -21,6 +21,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
void ksm_add_vma(struct vm_area_struct *vma); int ksm_enable_merge_any(struct mm_struct *mm); +int ksm_disable_merge_any(struct mm_struct *mm);
int __ksm_enter(struct mm_struct *mm); void __ksm_exit(struct mm_struct *mm); diff --git a/kernel/sys.c b/kernel/sys.c index 72cdb16e2636..3436376667d7 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2698,12 +2698,7 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, if (arg2) { error = ksm_enable_merge_any(me->mm); } else { - /* - * TODO: we might want disable KSM on all VMAs and - * trigger unsharing to completely disable KSM. - */ - clear_bit(MMF_VM_MERGE_ANY, &me->mm->flags); - error = 0; + error = ksm_disable_merge_any(me->mm); } mmap_write_unlock(me->mm); break; diff --git a/mm/ksm.c b/mm/ksm.c index a959e8925413..813f7fbc1832 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -2520,6 +2520,22 @@ static void __ksm_add_vma(struct vm_area_struct *vma) vm_flags_set(vma, VM_MERGEABLE); }
+static int __ksm_del_vma(struct vm_area_struct *vma) +{ + int err; + + if (!(vma->vm_flags & VM_MERGEABLE)) + return 0; + + if (vma->anon_vma) { + err = unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end); + if (err) + return err; + } + + vm_flags_clear(vma, VM_MERGEABLE); + return 0; +} /** * ksm_add_vma - Mark vma as mergeable if compatible * @@ -2542,6 +2558,20 @@ static void ksm_add_vmas(struct mm_struct *mm) __ksm_add_vma(vma); }
+static int ksm_del_vmas(struct mm_struct *mm) +{ + struct vm_area_struct *vma; + int err; + + VMA_ITERATOR(vmi, mm, 0); + for_each_vma(vmi, vma) { + err = __ksm_del_vma(vma); + if (err) + return err; + } + return 0; +} + /** * ksm_enable_merge_any - Add mm to mm ksm list and enable merging on all * compatible VMA's @@ -2569,6 +2599,23 @@ int ksm_enable_merge_any(struct mm_struct *mm) return 0; }
+int ksm_disable_merge_any(struct mm_struct *mm) +{ + int err; + + if (!test_bit(MMF_VM_MERGE_ANY, &mm->flags)) + return 0; + + err = ksm_del_vmas(mm); + if (err) { + ksm_add_vmas(mm); + return err; + } + + clear_bit(MMF_VM_MERGE_ANY, &mm->flags); + return 0; +} + int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags) {
David Hildenbrand david@redhat.com writes:
Let's unmerge any KSM pages when setting PR_SET_MEMORY_MERGE=0, and clear the VM_MERGEABLE flag from all VMAs -- just like KSM would. Of course, only do that if we previously set PR_SET_MEMORY_MERGE=1.
Signed-off-by: David Hildenbrand david@redhat.com
include/linux/ksm.h | 1 + kernel/sys.c | 7 +------ mm/ksm.c | 47 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 6 deletions(-)
diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 590934bdddcf..7108bc65dc2a 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -21,6 +21,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
void ksm_add_vma(struct vm_area_struct *vma); int ksm_enable_merge_any(struct mm_struct *mm); +int ksm_disable_merge_any(struct mm_struct *mm);
int __ksm_enter(struct mm_struct *mm); void __ksm_exit(struct mm_struct *mm); diff --git a/kernel/sys.c b/kernel/sys.c index 72cdb16e2636..3436376667d7 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2698,12 +2698,7 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, if (arg2) { error = ksm_enable_merge_any(me->mm); } else {
/*
* TODO: we might want disable KSM on all VMAs and
* trigger unsharing to completely disable KSM.
*/
clear_bit(MMF_VM_MERGE_ANY, &me->mm->flags);
error = 0;
}error = ksm_disable_merge_any(me->mm);
nit: can we do:
if (arg2) error = ksm_enable_merge_any(me->mm); else error = ksm_disable_merge_any(me->mm); mmap_write_unlock(me->mm); break;
diff --git a/mm/ksm.c b/mm/ksm.c index a959e8925413..813f7fbc1832 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -2520,6 +2520,22 @@ static void __ksm_add_vma(struct vm_area_struct *vma) vm_flags_set(vma, VM_MERGEABLE); }
+static int __ksm_del_vma(struct vm_area_struct *vma) +{
- int err;
- if (!(vma->vm_flags & VM_MERGEABLE))
return 0;
- if (vma->anon_vma) {
err = unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end);
if (err)
return err;
- }
- vm_flags_clear(vma, VM_MERGEABLE);
- return 0;
+} /**
- ksm_add_vma - Mark vma as mergeable if compatible
@@ -2542,6 +2558,20 @@ static void ksm_add_vmas(struct mm_struct *mm) __ksm_add_vma(vma); }
+static int ksm_del_vmas(struct mm_struct *mm) +{
- struct vm_area_struct *vma;
- int err;
- VMA_ITERATOR(vmi, mm, 0);
- for_each_vma(vmi, vma) {
err = __ksm_del_vma(vma);
if (err)
return err;
- }
- return 0;
+}
/**
- ksm_enable_merge_any - Add mm to mm ksm list and enable merging on all
compatible VMA's
@@ -2569,6 +2599,23 @@ int ksm_enable_merge_any(struct mm_struct *mm) return 0; }
+int ksm_disable_merge_any(struct mm_struct *mm)
I understand we want to keep the name "symmetric" with ksm_enable_merge_any, but it also unmerges the ksm pages. Do we want to reflect that in the function name?
Can we add a comment for the function?
+{
- int err;
- if (!test_bit(MMF_VM_MERGE_ANY, &mm->flags))
return 0;
- err = ksm_del_vmas(mm);
- if (err) {
ksm_add_vmas(mm);
return err;
- }
- clear_bit(MMF_VM_MERGE_ANY, &mm->flags);
We only clear the MMF_VM_MERGE_ANY flag if there are no errors. Is this what we want? This means that if the process creates new memory regions they would still be marked as mergeable.
- return 0;
+}
int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags) {
[...]
nit: can we do:
if (arg2) error = ksm_enable_merge_any(me->mm);
else error = ksm_disable_merge_any(me->mm); mmap_write_unlock(me->mm); break;
Indeed, thanks.
diff --git a/mm/ksm.c b/mm/ksm.c index a959e8925413..813f7fbc1832 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -2520,6 +2520,22 @@ static void __ksm_add_vma(struct vm_area_struct *vma) vm_flags_set(vma, VM_MERGEABLE); }
+static int __ksm_del_vma(struct vm_area_struct *vma) +{
- int err;
- if (!(vma->vm_flags & VM_MERGEABLE))
return 0;
- if (vma->anon_vma) {
err = unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end);
if (err)
return err;
- }
- vm_flags_clear(vma, VM_MERGEABLE);
- return 0;
+} /**
- ksm_add_vma - Mark vma as mergeable if compatible
@@ -2542,6 +2558,20 @@ static void ksm_add_vmas(struct mm_struct *mm) __ksm_add_vma(vma); }
+static int ksm_del_vmas(struct mm_struct *mm) +{
- struct vm_area_struct *vma;
- int err;
- VMA_ITERATOR(vmi, mm, 0);
- for_each_vma(vmi, vma) {
err = __ksm_del_vma(vma);
if (err)
return err;
- }
- return 0;
+}
- /**
- ksm_enable_merge_any - Add mm to mm ksm list and enable merging on all
compatible VMA's
@@ -2569,6 +2599,23 @@ int ksm_enable_merge_any(struct mm_struct *mm) return 0; }
+int ksm_disable_merge_any(struct mm_struct *mm)
I understand we want to keep the name "symmetric" with ksm_enable_merge_any, but it also unmerges the ksm pages. Do we want to reflect that in the function name?
ksm_disable_merge_any_umerge() is suboptimal.
As ksm_disable_merge_any() now reverts what ksm_enable_merge_any() ended up doing, I think it's just fine.
(it would be a different story if we'd be using "set" / "clear" terminology instead of "enable" / "disable").
We can describe that in the comment.
Can we add a comment for the function?
Can do for symmetry with ksm_enable_merge_any().
But note that I don't think documentation for functions is of any help when it takes longer to read the documentation than to read+understand the actual code.
+{
- int err;
- if (!test_bit(MMF_VM_MERGE_ANY, &mm->flags))
return 0;
- err = ksm_del_vmas(mm);
- if (err) {
ksm_add_vmas(mm);
return err;
- }
- clear_bit(MMF_VM_MERGE_ANY, &mm->flags);
We only clear the MMF_VM_MERGE_ANY flag if there are no errors. Is this
I think this is the behavior we want. We tried to disable KSM for the process (previously enabled via the prctl), but cannot disable KSM. So we rollback our changes and return an error.
This is similar to trying to set MADV_UNMERGEABLE but failing. We leave the bit set.
what we want? This means that if the process creates new memory regions they would still be marked as mergeable.
Yes, we failed the operation so we keep everything unchanged.
I understand we want to keep the name "symmetric" with ksm_enable_merge_any, but it also unmerges the ksm pages. Do we want to reflect that in the function name?
ksm_disable_merge_any_umerge() is suboptimal.
As ksm_disable_merge_any() now reverts what ksm_enable_merge_any() ended up doing, I think it's just fine.
(it would be a different story if we'd be using "set" / "clear" terminology instead of "enable" / "disable").
We can describe that in the comment.
Can we add a comment for the function?
Can do for symmetry with ksm_enable_merge_any().
+/** + * ksm_disable_merge_any - Disable merging on all compatible VMA's of the mm, + * previously enabled via ksm_enable_merge_any(). + * + * Disabling merging implies unmerging any merged pages, like setting + * MADV_UNMERGEABLE would. If unmerging fails, the whole operation fails and + * merging on all compatible VMA's remains enabled. + * + * @mm: Pointer to mm + * + * Returns 0 on success, otherwise error code + */
David Hildenbrand david@redhat.com writes:
I understand we want to keep the name "symmetric" with ksm_enable_merge_any, but it also unmerges the ksm pages. Do we want to reflect that in the function name?
ksm_disable_merge_any_umerge() is suboptimal. As ksm_disable_merge_any() now reverts what ksm_enable_merge_any() ended up doing, I think it's just fine. (it would be a different story if we'd be using "set" / "clear" terminology instead of "enable" / "disable"). We can describe that in the comment.
Can we add a comment for the function?
Can do for symmetry with ksm_enable_merge_any().
+/**
- ksm_disable_merge_any - Disable merging on all compatible VMA's of the mm,
previously enabled via ksm_enable_merge_any().
- Disabling merging implies unmerging any merged pages, like setting
- MADV_UNMERGEABLE would. If unmerging fails, the whole operation fails and
- merging on all compatible VMA's remains enabled.
- @mm: Pointer to mm
- Returns 0 on success, otherwise error code
- */
LGTM
Acked-by: Stefan Roesch shr@devkernel.io
Let's test whether setting PR_SET_MEMORY_MERGE to 0 after setting it to 1 will unmerge pages, similar to how setting MADV_UNMERGEABLE after setting MADV_MERGEABLE would.
Signed-off-by: David Hildenbrand david@redhat.com --- .../selftests/mm/ksm_functional_tests.c | 46 ++++++++++++++++--- 1 file changed, 40 insertions(+), 6 deletions(-)
diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c index 7bc9fc17c9f0..26853badae70 100644 --- a/tools/testing/selftests/mm/ksm_functional_tests.c +++ b/tools/testing/selftests/mm/ksm_functional_tests.c @@ -91,9 +91,10 @@ static int ksm_merge(void) return 0; }
-static char *mmap_and_merge_range(char val, unsigned long size) +static char *mmap_and_merge_range(char val, unsigned long size, bool use_prctl) { char *map; + int ret;
map = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0); @@ -110,7 +111,17 @@ static char *mmap_and_merge_range(char val, unsigned long size)
/* Make sure each page contains the same values to merge them. */ memset(map, val, size); - if (madvise(map, size, MADV_MERGEABLE)) { + + if (use_prctl) { + ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0); + if (ret < 0 && errno == EINVAL) { + ksft_test_result_skip("PR_SET_MEMORY_MERGE not supported\n"); + goto unmap; + } else if (ret) { + ksft_test_result_fail("PR_SET_MEMORY_MERGE=1 failed\n"); + goto unmap; + } + } else if (madvise(map, size, MADV_MERGEABLE)) { ksft_test_result_fail("MADV_MERGEABLE failed\n"); goto unmap; } @@ -133,7 +144,7 @@ static void test_unmerge(void)
ksft_print_msg("[RUN] %s\n", __func__);
- map = mmap_and_merge_range(0xcf, size); + map = mmap_and_merge_range(0xcf, size, false); if (map == MAP_FAILED) return;
@@ -155,7 +166,7 @@ static void test_unmerge_discarded(void)
ksft_print_msg("[RUN] %s\n", __func__);
- map = mmap_and_merge_range(0xcf, size); + map = mmap_and_merge_range(0xcf, size, false); if (map == MAP_FAILED) return;
@@ -187,7 +198,7 @@ static void test_unmerge_uffd_wp(void)
ksft_print_msg("[RUN] %s\n", __func__);
- map = mmap_and_merge_range(0xcf, size); + map = mmap_and_merge_range(0xcf, size, false); if (map == MAP_FAILED) return;
@@ -323,9 +334,31 @@ static void test_prctl_fork(void) ksft_test_result_pass("PR_SET_MEMORY_MERGE value is inherited\n"); }
+static void test_prctl_unmerge(void) +{ + const unsigned int size = 2 * MiB; + char *map; + + ksft_print_msg("[RUN] %s\n", __func__); + + map = mmap_and_merge_range(0xcf, size, true); + if (map == MAP_FAILED) + return; + + if (prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0)) { + ksft_test_result_fail("PR_SET_MEMORY_MERGE=0 failed\n"); + goto unmap; + } + + ksft_test_result(!range_maps_duplicates(map, size), + "Pages were unmerged\n"); +unmap: + munmap(map, size); +} + int main(int argc, char **argv) { - unsigned int tests = 4; + unsigned int tests = 5; int err;
#ifdef __NR_userfaultfd @@ -355,6 +388,7 @@ int main(int argc, char **argv)
test_prctl(); test_prctl_fork(); + test_prctl_unmerge();
err = ksft_get_fail_cnt(); if (err)
David Hildenbrand david@redhat.com writes:
Let's test whether setting PR_SET_MEMORY_MERGE to 0 after setting it to 1 will unmerge pages, similar to how setting MADV_UNMERGEABLE after setting MADV_MERGEABLE would.
Signed-off-by: David Hildenbrand david@redhat.com
.../selftests/mm/ksm_functional_tests.c | 46 ++++++++++++++++--- 1 file changed, 40 insertions(+), 6 deletions(-)
diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c index 7bc9fc17c9f0..26853badae70 100644 --- a/tools/testing/selftests/mm/ksm_functional_tests.c +++ b/tools/testing/selftests/mm/ksm_functional_tests.c @@ -91,9 +91,10 @@ static int ksm_merge(void) return 0; }
-static char *mmap_and_merge_range(char val, unsigned long size) +static char *mmap_and_merge_range(char val, unsigned long size, bool use_prctl) { char *map;
int ret;
map = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
@@ -110,7 +111,17 @@ static char *mmap_and_merge_range(char val, unsigned long size)
/* Make sure each page contains the same values to merge them. */ memset(map, val, size);
- if (madvise(map, size, MADV_MERGEABLE)) {
- if (use_prctl) {
ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0);
if (ret < 0 && errno == EINVAL) {
ksft_test_result_skip("PR_SET_MEMORY_MERGE not supported\n");
goto unmap;
} else if (ret) {
ksft_test_result_fail("PR_SET_MEMORY_MERGE=1 failed\n");
goto unmap;
}
- } else if (madvise(map, size, MADV_MERGEABLE)) { ksft_test_result_fail("MADV_MERGEABLE failed\n"); goto unmap; }
@@ -133,7 +144,7 @@ static void test_unmerge(void)
ksft_print_msg("[RUN] %s\n", __func__);
- map = mmap_and_merge_range(0xcf, size);
- map = mmap_and_merge_range(0xcf, size, false); if (map == MAP_FAILED) return;
@@ -155,7 +166,7 @@ static void test_unmerge_discarded(void)
ksft_print_msg("[RUN] %s\n", __func__);
- map = mmap_and_merge_range(0xcf, size);
- map = mmap_and_merge_range(0xcf, size, false); if (map == MAP_FAILED) return;
@@ -187,7 +198,7 @@ static void test_unmerge_uffd_wp(void)
ksft_print_msg("[RUN] %s\n", __func__);
- map = mmap_and_merge_range(0xcf, size);
- map = mmap_and_merge_range(0xcf, size, false); if (map == MAP_FAILED) return;
@@ -323,9 +334,31 @@ static void test_prctl_fork(void) ksft_test_result_pass("PR_SET_MEMORY_MERGE value is inherited\n"); }
+static void test_prctl_unmerge(void) +{
- const unsigned int size = 2 * MiB;
- char *map;
- ksft_print_msg("[RUN] %s\n", __func__);
- map = mmap_and_merge_range(0xcf, size, true);
- if (map == MAP_FAILED)
return;
- if (prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0)) {
ksft_test_result_fail("PR_SET_MEMORY_MERGE=0 failed\n");
goto unmap;
- }
- ksft_test_result(!range_maps_duplicates(map, size),
"Pages were unmerged\n");
+unmap:
- munmap(map, size);
+}
int main(int argc, char **argv) {
- unsigned int tests = 4;
- unsigned int tests = 5; int err;
#ifdef __NR_userfaultfd @@ -355,6 +388,7 @@ int main(int argc, char **argv)
test_prctl(); test_prctl_fork();
test_prctl_unmerge();
err = ksft_get_fail_cnt(); if (err)
Acked-by: Stefan Roesch shr@devkernel.io
Let's factor out actual disabling of KSM. The existing "mm->def_flags &= ~VM_MERGEABLE;" was essentially a NOP and can be dropped, because def_flags should never include VM_MERGEABLE. Note that we don't currently prevent re-enabling KSM.
This should now be faster in case KSM was never enabled, because we only conditionally iterate all VMAs. Further, it certainly looks cleaner.
Signed-off-by: David Hildenbrand david@redhat.com --- arch/s390/mm/gmap.c | 20 +------------------- include/linux/ksm.h | 6 ++++++ mm/ksm.c | 11 +++++++++++ 3 files changed, 18 insertions(+), 19 deletions(-)
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 0949811761e6..dfe905c7bd8e 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2585,30 +2585,12 @@ EXPORT_SYMBOL_GPL(s390_enable_sie);
int gmap_mark_unmergeable(void) { - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - unsigned long vm_flags; - int ret; - VMA_ITERATOR(vmi, mm, 0); - /* * Make sure to disable KSM (if enabled for the whole process or * individual VMAs). Note that nothing currently hinders user space * from re-enabling it. */ - clear_bit(MMF_VM_MERGE_ANY, &mm->flags); - - for_each_vma(vmi, vma) { - /* Copy vm_flags to avoid partial modifications in ksm_madvise */ - vm_flags = vma->vm_flags; - ret = ksm_madvise(vma, vma->vm_start, vma->vm_end, - MADV_UNMERGEABLE, &vm_flags); - if (ret) - return ret; - vm_flags_reset(vma, vm_flags); - } - mm->def_flags &= ~VM_MERGEABLE; - return 0; + return ksm_disable(current->mm); } EXPORT_SYMBOL_GPL(gmap_mark_unmergeable);
diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 7108bc65dc2a..b3d8b7849e18 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -22,6 +22,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start, void ksm_add_vma(struct vm_area_struct *vma); int ksm_enable_merge_any(struct mm_struct *mm); int ksm_disable_merge_any(struct mm_struct *mm); +int ksm_disable(struct mm_struct *mm);
int __ksm_enter(struct mm_struct *mm); void __ksm_exit(struct mm_struct *mm); @@ -75,6 +76,11 @@ static inline void ksm_add_vma(struct vm_area_struct *vma) { }
+static inline int ksm_disable(struct mm_struct *mm) +{ + return 0; +} + static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) { return 0; diff --git a/mm/ksm.c b/mm/ksm.c index 813f7fbc1832..208311cbb019 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -2616,6 +2616,17 @@ int ksm_disable_merge_any(struct mm_struct *mm) return 0; }
+int ksm_disable(struct mm_struct *mm) +{ + mmap_assert_write_locked(mm); + + if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) + return 0; + if (test_bit(MMF_VM_MERGE_ANY, &mm->flags)) + return ksm_disable_merge_any(mm); + return ksm_del_vmas(mm); +} + int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags) {
On 4/18/23 17:28, David Hildenbrand wrote:
Let's factor out actual disabling of KSM. The existing "mm->def_flags &= ~VM_MERGEABLE;" was essentially a NOP and can be dropped, because def_flags should never include VM_MERGEABLE. Note that we don't currently prevent re-enabling KSM.
This should now be faster in case KSM was never enabled, because we only conditionally iterate all VMAs. Further, it certainly looks cleaner.
Signed-off-by: David Hildenbrand david@redhat.com
arch/s390/mm/gmap.c | 20 +------------------- include/linux/ksm.h | 6 ++++++ mm/ksm.c | 11 +++++++++++ 3 files changed, 18 insertions(+), 19 deletions(-)
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 0949811761e6..dfe905c7bd8e 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2585,30 +2585,12 @@ EXPORT_SYMBOL_GPL(s390_enable_sie); int gmap_mark_unmergeable(void) {
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- unsigned long vm_flags;
- int ret;
- VMA_ITERATOR(vmi, mm, 0);
- /*
*/
- Make sure to disable KSM (if enabled for the whole process or
- individual VMAs). Note that nothing currently hinders user space
- from re-enabling it.
Is that still true?
My KSM knowledge is nearly zero but from what I can see the patch looks ok to me: Acked-by: Janosch Frank frankja@linux.ibm.net
On 19.04.23 13:39, Janosch Frank wrote:
On 4/18/23 17:28, David Hildenbrand wrote:
Let's factor out actual disabling of KSM. The existing "mm->def_flags &= ~VM_MERGEABLE;" was essentially a NOP and can be dropped, because def_flags should never include VM_MERGEABLE. Note that we don't currently prevent re-enabling KSM.
This should now be faster in case KSM was never enabled, because we only conditionally iterate all VMAs. Further, it certainly looks cleaner.
Signed-off-by: David Hildenbrand david@redhat.com
arch/s390/mm/gmap.c | 20 +------------------- include/linux/ksm.h | 6 ++++++ mm/ksm.c | 11 +++++++++++ 3 files changed, 18 insertions(+), 19 deletions(-)
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 0949811761e6..dfe905c7bd8e 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2585,30 +2585,12 @@ EXPORT_SYMBOL_GPL(s390_enable_sie); int gmap_mark_unmergeable(void) {
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- unsigned long vm_flags;
- int ret;
- VMA_ITERATOR(vmi, mm, 0);
- /*
*/
- Make sure to disable KSM (if enabled for the whole process or
- individual VMAs). Note that nothing currently hinders user space
- from re-enabling it.
Is that still true?
Yes. We'd need another per-MM bit to stop it from getting re-enabled.
My KSM knowledge is nearly zero but from what I can see the patch looks ok to me: Acked-by: Janosch Frank frankja@linux.ibm.net
Thanks!
David Hildenbrand david@redhat.com writes:
Let's factor out actual disabling of KSM. The existing "mm->def_flags &= ~VM_MERGEABLE;" was essentially a NOP and can be dropped, because def_flags should never include VM_MERGEABLE. Note that we don't currently prevent re-enabling KSM.
This should now be faster in case KSM was never enabled, because we only conditionally iterate all VMAs. Further, it certainly looks cleaner.
Signed-off-by: David Hildenbrand david@redhat.com
arch/s390/mm/gmap.c | 20 +------------------- include/linux/ksm.h | 6 ++++++ mm/ksm.c | 11 +++++++++++ 3 files changed, 18 insertions(+), 19 deletions(-)
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 0949811761e6..dfe905c7bd8e 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2585,30 +2585,12 @@ EXPORT_SYMBOL_GPL(s390_enable_sie);
int gmap_mark_unmergeable(void) {
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- unsigned long vm_flags;
- int ret;
- VMA_ITERATOR(vmi, mm, 0);
- /*
*/
- Make sure to disable KSM (if enabled for the whole process or
- individual VMAs). Note that nothing currently hinders user space
- from re-enabling it.
- clear_bit(MMF_VM_MERGE_ANY, &mm->flags);
- for_each_vma(vmi, vma) {
/* Copy vm_flags to avoid partial modifications in ksm_madvise */
vm_flags = vma->vm_flags;
ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
MADV_UNMERGEABLE, &vm_flags);
if (ret)
return ret;
vm_flags_reset(vma, vm_flags);
- }
- mm->def_flags &= ~VM_MERGEABLE;
This clears the def_flags struct member, however, in ksm_disable() we clear the __flags struct member. Is this a problem?
- return 0;
- return ksm_disable(current->mm);
} EXPORT_SYMBOL_GPL(gmap_mark_unmergeable);
diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 7108bc65dc2a..b3d8b7849e18 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -22,6 +22,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start, void ksm_add_vma(struct vm_area_struct *vma); int ksm_enable_merge_any(struct mm_struct *mm); int ksm_disable_merge_any(struct mm_struct *mm); +int ksm_disable(struct mm_struct *mm);
int __ksm_enter(struct mm_struct *mm); void __ksm_exit(struct mm_struct *mm); @@ -75,6 +76,11 @@ static inline void ksm_add_vma(struct vm_area_struct *vma) { }
+static inline int ksm_disable(struct mm_struct *mm) +{
- return 0;
+}
static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) { return 0; diff --git a/mm/ksm.c b/mm/ksm.c index 813f7fbc1832..208311cbb019 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -2616,6 +2616,17 @@ int ksm_disable_merge_any(struct mm_struct *mm) return 0; }
+int ksm_disable(struct mm_struct *mm) +{
- mmap_assert_write_locked(mm);
- if (!test_bit(MMF_VM_MERGEABLE, &mm->flags))
return 0;
- if (test_bit(MMF_VM_MERGE_ANY, &mm->flags))
return ksm_disable_merge_any(mm);
- return ksm_del_vmas(mm);
+}
int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags) {
[...]
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 0949811761e6..dfe905c7bd8e 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2585,30 +2585,12 @@ EXPORT_SYMBOL_GPL(s390_enable_sie);
int gmap_mark_unmergeable(void) {
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- unsigned long vm_flags;
- int ret;
- VMA_ITERATOR(vmi, mm, 0);
- /*
*/
- Make sure to disable KSM (if enabled for the whole process or
- individual VMAs). Note that nothing currently hinders user space
- from re-enabling it.
- clear_bit(MMF_VM_MERGE_ANY, &mm->flags);
- for_each_vma(vmi, vma) {
/* Copy vm_flags to avoid partial modifications in ksm_madvise */
vm_flags = vma->vm_flags;
ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
MADV_UNMERGEABLE, &vm_flags);
if (ret)
return ret;
vm_flags_reset(vma, vm_flags);
- }
- mm->def_flags &= ~VM_MERGEABLE;
Hi Stefan,
This clears the def_flags struct member, however, in ksm_disable() we clear the __flags struct member. Is this a problem?
The patch description contains a comment regarding def_flags: "The existing "mm->def_flags &= ~VM_MERGEABLE;" was essentially a NOP and can be dropped, because def_flags should never include VM_MERGEABLE."
We keep clearing the MADV_UNMERGEABLE flag from MADV_UNMERGEABLE. In the old code, ksm_madvise() would have cleared it from local vm_flags and vm_flags_reset() would have modified vma->vm_flags. Now we clear it directly via vm_flags_clear(vma, VM_MERGEABLE);
Long story short, the mm->def_flags code as wrong and most probably copied from thp_split_mm() where we do: mm->def_flags |= VM_NOHUGEPAGE; Which makes more sense.
Thanks!
David Hildenbrand david@redhat.com writes:
[...]
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 0949811761e6..dfe905c7bd8e 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2585,30 +2585,12 @@ EXPORT_SYMBOL_GPL(s390_enable_sie);
int gmap_mark_unmergeable(void) {
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- unsigned long vm_flags;
- int ret;
- VMA_ITERATOR(vmi, mm, 0);
- /*
*/
- Make sure to disable KSM (if enabled for the whole process or
- individual VMAs). Note that nothing currently hinders user space
- from re-enabling it.
- clear_bit(MMF_VM_MERGE_ANY, &mm->flags);
- for_each_vma(vmi, vma) {
/* Copy vm_flags to avoid partial modifications in ksm_madvise */
vm_flags = vma->vm_flags;
ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
MADV_UNMERGEABLE, &vm_flags);
if (ret)
return ret;
vm_flags_reset(vma, vm_flags);
- }
- mm->def_flags &= ~VM_MERGEABLE;
Hi Stefan,
This clears the def_flags struct member, however, in ksm_disable() we clear the __flags struct member. Is this a problem?
The patch description contains a comment regarding def_flags: "The existing "mm->def_flags &= ~VM_MERGEABLE;" was essentially a NOP and can be dropped, because def_flags should never include VM_MERGEABLE."
We keep clearing the MADV_UNMERGEABLE flag from MADV_UNMERGEABLE. In the old code, ksm_madvise() would have cleared it from local vm_flags and vm_flags_reset() would have modified vma->vm_flags. Now we clear it directly via vm_flags_clear(vma, VM_MERGEABLE);
Long story short, the mm->def_flags code as wrong and most probably copied from thp_split_mm() where we do: mm->def_flags |= VM_NOHUGEPAGE; Which makes more sense.
Thanks!
Thanks for the explanation.
Acked-by: Stefan Roesch shr@devkernel.io
linux-kselftest-mirror@lists.linaro.org