On 06.04.23 18:53, Stefan Roesch wrote:
This adds the general_profit KSM sysfs knob and the process profit metric and process merge type knobs to ksm_stat.
expose general_profit metric
The documentation mentions a general profit metric, however this metric is not calculated. In addition the formula depends on the size of internal structures, which makes it more difficult for an administrator to make the calculation. Adding the metric for a better user experience.
document general_profit sysfs knob
calculate ksm process profit metric
The ksm documentation mentions the process profit metric and how to calculate it. This adds the calculation of the metric.
add ksm_merge_type() function
This adds the ksm_merge_type function. The function returns the merge type for the process. For madvise it returns "madvise", for prctl it returns "process" and otherwise it returns "none".
I'm curious, why exactly is this change required in this context? It might be sufficient to observe if the prctl is set for a process. If not, the ksm stats can reveal whether KSM is still active for that process -> madvise.
For your use case, I'd assume it's pretty unnecessary to expose that.
If there is no compelling reason, I'd suggest to drop this and limit this patch to exposing the general/per-mm profit, which I can understand why it's desirable when fine-tuning a workload.
[...]
Signed-off-by: Stefan Roesch shr@devkernel.io Reviewed-by: Bagas Sanjaya bagasdotme@gmail.com Cc: David Hildenbrand david@redhat.com Cc: Johannes Weiner hannes@cmpxchg.org Cc: Michal Hocko mhocko@suse.com Cc: Rik van Riel riel@surriel.com Signed-off-by: Andrew Morton akpm@linux-foundation.org
Documentation/ABI/testing/sysfs-kernel-mm-ksm | 8 +++++ Documentation/admin-guide/mm/ksm.rst | 8 ++++- fs/proc/base.c | 5 +++ include/linux/ksm.h | 5 +++ mm/ksm.c | 32 +++++++++++++++++++ 5 files changed, 57 insertions(+), 1 deletion(-)
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-ksm b/Documentation/ABI/testing/sysfs-kernel-mm-ksm index d244674a9480..7768e90f7a8f 100644 --- a/Documentation/ABI/testing/sysfs-kernel-mm-ksm +++ b/Documentation/ABI/testing/sysfs-kernel-mm-ksm @@ -51,3 +51,11 @@ Description: Control merging pages across different NUMA nodes. When it is set to 0 only pages from the same node are merged, otherwise pages from all nodes can be merged together (default).
+What: /sys/kernel/mm/ksm/general_profit +Date: January 2023
^ No
+KernelVersion: 6.1
^ Outdated
(kind of weird having to come up with the right numbers before getting it merged)
[...]
diff --git a/fs/proc/base.c b/fs/proc/base.c index 07463ad4a70a..c74450318e05 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -96,6 +96,7 @@ #include <linux/time_namespace.h> #include <linux/resctrl.h> #include <linux/cn_proc.h> +#include <linux/ksm.h> #include <trace/events/oom.h> #include "internal.h" #include "fd.h" @@ -3199,6 +3200,7 @@ static int proc_pid_ksm_merging_pages(struct seq_file *m, struct pid_namespace * return 0; }
^ unrelated change
static int proc_pid_ksm_stat(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { @@ -3208,6 +3210,9 @@ static int proc_pid_ksm_stat(struct seq_file *m, struct pid_namespace *ns, if (mm) { seq_printf(m, "ksm_rmap_items %lu\n", mm->ksm_rmap_items); seq_printf(m, "zero_pages_sharing %lu\n", mm->ksm_zero_pages_sharing);
seq_printf(m, "ksm_merging_pages %lu\n", mm->ksm_merging_pages);
seq_printf(m, "ksm_merge_type %s\n", ksm_merge_type(mm));
mmput(mm); }seq_printf(m, "ksm_process_profit %ld\n", ksm_process_profit(mm));
diff --git a/include/linux/ksm.h b/include/linux/ksm.h index c65455bf124c..4c32f9bca723 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -60,6 +60,11 @@ struct page *ksm_might_need_to_copy(struct page *page, void rmap_walk_ksm(struct folio *folio, struct rmap_walk_control *rwc); void folio_migrate_ksm(struct folio *newfolio, struct folio *folio); +#ifdef CONFIG_PROC_FS +long ksm_process_profit(struct mm_struct *); +const char *ksm_merge_type(struct mm_struct *mm); +#endif /* CONFIG_PROC_FS */
- #else /* !CONFIG_KSM */
static inline int ksm_add_mm(struct mm_struct *mm) diff --git a/mm/ksm.c b/mm/ksm.c index ab95ae0f9def..76b10ff840ac 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -3042,6 +3042,25 @@ static void wait_while_offlining(void) } #endif /* CONFIG_MEMORY_HOTREMOVE */ +#ifdef CONFIG_PROC_FS +long ksm_process_profit(struct mm_struct *mm) +{
- return (long)mm->ksm_merging_pages * PAGE_SIZE -
Do we really need the cast to long? mm->ksm_merging_pages is defined as "unsigned long". Just like "ksm_pages_sharing" below.
mm->ksm_rmap_items * sizeof(struct ksm_rmap_item);
+}
+/* Return merge type name as string. */ +const char *ksm_merge_type(struct mm_struct *mm) +{
- if (test_bit(MMF_VM_MERGE_ANY, &mm->flags))
return "process";
- else if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
return "madvise";
- else
return "none";
+} +#endif /* CONFIG_PROC_FS */
Apart from these nits, LGTM (again, I don't see why the merge type should belong into this patch, and why there is a real need to expose it like that).
Acked-by: David Hildenbrand david@redhat.com
David Hildenbrand david@redhat.com writes:
On 06.04.23 18:53, Stefan Roesch wrote:
This adds the general_profit KSM sysfs knob and the process profit metric and process merge type knobs to ksm_stat.
- expose general_profit metric The documentation mentions a general profit metric, however this metric is not calculated. In addition the formula depends on the size of internal structures, which makes it more difficult for an administrator to make the calculation. Adding the metric for a better user experience.
- document general_profit sysfs knob
- calculate ksm process profit metric The ksm documentation mentions the process profit metric and how to calculate it. This adds the calculation of the metric.
- add ksm_merge_type() function This adds the ksm_merge_type function. The function returns the merge type for the process. For madvise it returns "madvise", for prctl it returns "process" and otherwise it returns "none".
I'm curious, why exactly is this change required in this context? It might be sufficient to observe if the prctl is set for a process. If not, the ksm stats can reveal whether KSM is still active for that process -> madvise.
For your use case, I'd assume it's pretty unnecessary to expose that.
If there is no compelling reason, I'd suggest to drop this and limit this patch to exposing the general/per-mm profit, which I can understand why it's desirable when fine-tuning a workload.
[...]
In the next version, the ksm_merge_type function() is removed.
Signed-off-by: Stefan Roesch shr@devkernel.io Reviewed-by: Bagas Sanjaya bagasdotme@gmail.com Cc: David Hildenbrand david@redhat.com Cc: Johannes Weiner hannes@cmpxchg.org Cc: Michal Hocko mhocko@suse.com Cc: Rik van Riel riel@surriel.com Signed-off-by: Andrew Morton akpm@linux-foundation.org
Documentation/ABI/testing/sysfs-kernel-mm-ksm | 8 +++++ Documentation/admin-guide/mm/ksm.rst | 8 ++++- fs/proc/base.c | 5 +++ include/linux/ksm.h | 5 +++ mm/ksm.c | 32 +++++++++++++++++++ 5 files changed, 57 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-ksm b/Documentation/ABI/testing/sysfs-kernel-mm-ksm index d244674a9480..7768e90f7a8f 100644 --- a/Documentation/ABI/testing/sysfs-kernel-mm-ksm +++ b/Documentation/ABI/testing/sysfs-kernel-mm-ksm @@ -51,3 +51,11 @@ Description: Control merging pages across different NUMA nodes. When it is set to 0 only pages from the same node are merged, otherwise pages from all nodes can be merged together (default).
+What: /sys/kernel/mm/ksm/general_profit +Date: January 2023
^ N
Updated in the next version.
+KernelVersion: 6.1
^ Outdated
Updated in the next version.
(kind of weird having to come up with the right numbers before getting it merged)
[...]
diff --git a/fs/proc/base.c b/fs/proc/base.c index 07463ad4a70a..c74450318e05 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -96,6 +96,7 @@ #include <linux/time_namespace.h> #include <linux/resctrl.h> #include <linux/cn_proc.h> +#include <linux/ksm.h> #include <trace/events/oom.h> #include "internal.h" #include "fd.h" @@ -3199,6 +3200,7 @@ static int proc_pid_ksm_merging_pages(struct seq_file *m, struct pid_namespace * return 0; }
^ unrelated change
Fixed in the next version.
static int proc_pid_ksm_stat(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { @@ -3208,6 +3210,9 @@ static int proc_pid_ksm_stat(struct seq_file *m, struct pid_namespace *ns, if (mm) { seq_printf(m, "ksm_rmap_items %lu\n", mm->ksm_rmap_items); seq_printf(m, "zero_pages_sharing %lu\n", mm->ksm_zero_pages_sharing);
seq_printf(m, "ksm_merging_pages %lu\n", mm->ksm_merging_pages);
seq_printf(m, "ksm_merge_type %s\n", ksm_merge_type(mm));
mmput(mm); } diff --git a/include/linux/ksm.h b/include/linux/ksm.hseq_printf(m, "ksm_process_profit %ld\n", ksm_process_profit(mm));
index c65455bf124c..4c32f9bca723 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -60,6 +60,11 @@ struct page *ksm_might_need_to_copy(struct page *page, void rmap_walk_ksm(struct folio *folio, struct rmap_walk_control *rwc); void folio_migrate_ksm(struct folio *newfolio, struct folio *folio); +#ifdef CONFIG_PROC_FS +long ksm_process_profit(struct mm_struct *); +const char *ksm_merge_type(struct mm_struct *mm); +#endif /* CONFIG_PROC_FS */
- #else /* !CONFIG_KSM */ static inline int ksm_add_mm(struct mm_struct *mm)
diff --git a/mm/ksm.c b/mm/ksm.c index ab95ae0f9def..76b10ff840ac 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -3042,6 +3042,25 @@ static void wait_while_offlining(void) } #endif /* CONFIG_MEMORY_HOTREMOVE */ +#ifdef CONFIG_PROC_FS +long ksm_process_profit(struct mm_struct *mm) +{
- return (long)mm->ksm_merging_pages * PAGE_SIZE -
Do we really need the cast to long? mm->ksm_merging_pages is defined as "unsigned long". Just like "ksm_pages_sharing" below.
Removed the cast in the next version.
mm->ksm_rmap_items * sizeof(struct ksm_rmap_item);
+}
+/* Return merge type name as string. */ +const char *ksm_merge_type(struct mm_struct *mm) +{
- if (test_bit(MMF_VM_MERGE_ANY, &mm->flags))
return "process";
- else if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
return "madvise";
- else
return "none";
+} +#endif /* CONFIG_PROC_FS */
Apart from these nits, LGTM (again, I don't see why the merge type should belong into this patch, and why there is a real need to expose it like that).
Acked-by: David Hildenbrand david@redhat.com
linux-kselftest-mirror@lists.linaro.org