Building on the vma_count helpers, add a VM_WARN_ON_ONCE() to detect cases where the VMA count exceeds the sysctl_max_map_count limit.
This check will help catch future bugs or regressions where the VMAs are allocated exceeding the limit.
The warning is placed in the main vma_count_*() helpers, while the internal *_nocheck variants bypass it. _nocheck helpers are used to ensure that the assertion does not trigger a false positive in the legitimate case of a temporary VMA increase past the limit by a VMA split in munmap().
Cc: Andrew Morton akpm@linux-foundation.org Cc: David Hildenbrand david@redhat.com Cc: "Liam R. Howlett" Liam.Howlett@oracle.com Cc: Lorenzo Stoakes lorenzo.stoakes@oracle.com Cc: Mike Rapoport rppt@kernel.org Cc: Minchan Kim minchan@kernel.org Cc: Pedro Falcato pfalcato@suse.de Signed-off-by: Kalesh Singh kaleshsingh@google.com ---
Changes in v2: - Add assertions if exceeding max_vma_count limit, per Pedro
include/linux/mm.h | 12 ++++++-- mm/internal.h | 1 - mm/vma.c | 49 +++++++++++++++++++++++++------- tools/testing/vma/vma_internal.h | 7 ++++- 4 files changed, 55 insertions(+), 14 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h index 8bad1454984c..3a3749d7015c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4219,19 +4219,27 @@ static inline bool snapshot_page_is_faithful(const struct page_snapshot *ps)
void snapshot_page(struct page_snapshot *ps, const struct page *page);
+int vma_count_remaining(const struct mm_struct *mm); + static inline void vma_count_init(struct mm_struct *mm) { ACCESS_PRIVATE(mm, __vma_count) = 0; }
-static inline void vma_count_add(struct mm_struct *mm, int nr_vmas) +static inline void __vma_count_add_nocheck(struct mm_struct *mm, int nr_vmas) { ACCESS_PRIVATE(mm, __vma_count) += nr_vmas; }
+static inline void vma_count_add(struct mm_struct *mm, int nr_vmas) +{ + VM_WARN_ON_ONCE(!vma_count_remaining(mm)); + __vma_count_add_nocheck(mm, nr_vmas); +} + static inline void vma_count_sub(struct mm_struct *mm, int nr_vmas) { - vma_count_add(mm, -nr_vmas); + __vma_count_add_nocheck(mm, -nr_vmas); }
static inline void vma_count_inc(struct mm_struct *mm) diff --git a/mm/internal.h b/mm/internal.h index 39f1c9535ae5..e0567a3b64fa 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1661,6 +1661,5 @@ static inline bool reclaim_pt_is_enabled(unsigned long start, unsigned long end, void dup_mm_exe_file(struct mm_struct *mm, struct mm_struct *oldmm); int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm);
-int vma_count_remaining(const struct mm_struct *mm);
#endif /* __MM_INTERNAL_H */ diff --git a/mm/vma.c b/mm/vma.c index 0cd3cb472220..0e4fcaebe209 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -323,15 +323,17 @@ static void vma_prepare(struct vma_prepare *vp) }
/* - * vma_complete- Helper function for handling the unlocking after altering VMAs, - * or for inserting a VMA. + * This is the internal, unsafe version of vma_complete(). Unlike its + * wrapper, this function bypasses runtime checks for VMA count limits by + * using the _nocheck vma_count* helpers. * - * @vp: The vma_prepare struct - * @vmi: The vma iterator - * @mm: The mm_struct + * Its use is restricted to __split_vma() where the VMA count can be + * temporarily higher than the sysctl_max_map_count limit. + * + * All other callers must use vma_complete(). */ -static void vma_complete(struct vma_prepare *vp, struct vma_iterator *vmi, - struct mm_struct *mm) +static void __vma_complete(struct vma_prepare *vp, struct vma_iterator *vmi, + struct mm_struct *mm) { if (vp->file) { if (vp->adj_next) @@ -352,7 +354,11 @@ static void vma_complete(struct vma_prepare *vp, struct vma_iterator *vmi, * (it may either follow vma or precede it). */ vma_iter_store_new(vmi, vp->insert); - vma_count_inc(mm); + /* + * Explicitly allow vma_count to exceed the threshold to prevent, + * blocking munmap() freeing resources. + */ + __vma_count_add_nocheck(mm, 1); }
if (vp->anon_vma) { @@ -403,6 +409,26 @@ static void vma_complete(struct vma_prepare *vp, struct vma_iterator *vmi, uprobe_mmap(vp->insert); }
+/* + * vma_complete- Helper function for handling the unlocking after altering VMAs, + * or for inserting a VMA. + * + * @vp: The vma_prepare struct + * @vmi: The vma iterator + * @mm: The mm_struct + */ +static void vma_complete(struct vma_prepare *vp, struct vma_iterator *vmi, + struct mm_struct *mm) +{ + /* + * __vma_complete() explicitly foregoes checking the new + * vma_count against the sysctl_max_map_count limit, so + * do it here. + */ + VM_WARN_ON_ONCE(!vma_count_remaining(mm)); + __vma_complete(vp, vmi, mm); +} + /* * init_vma_prep() - Initializer wrapper for vma_prepare struct * @vp: The vma_prepare struct @@ -564,8 +590,11 @@ __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, vma->vm_end = addr; }
- /* vma_complete stores the new vma */ - vma_complete(&vp, vmi, vma->vm_mm); + /* + * __vma_complete stores the new vma without checking against the + * sysctl_max_map_count (vma_count) limit. + */ + __vma_complete(&vp, vmi, vma->vm_mm); validate_mm(vma->vm_mm);
/* Success. */ diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index 6e724ba1adf4..d084b1eb2a5c 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -1534,11 +1534,16 @@ static inline void vma_count_init(struct mm_struct *mm) mm->__vma_count = 0; }
-static inline void vma_count_add(struct mm_struct *mm, int nr_vmas) +static inline void __vma_count_add_nocheck(struct mm_struct *mm, int nr_vmas) { mm->__vma_count += nr_vmas; }
+static inline void vma_count_add(struct mm_struct *mm, int nr_vmas) +{ + __vma_count_add_nocheck(mm, nr_vmas); +} + static inline void vma_count_sub(struct mm_struct *mm, int nr_vmas) { vma_count_add(mm, -nr_vmas);