Hi David,
On Thu, Aug 21, 2025 at 10:06:47PM +0200, David Hildenbrand wrote:
Let's disallow handing out PFN ranges with non-contiguous pages, so we can remove the nth-page usage in __cma_alloc(), and so any callers don't have to worry about that either when wanting to blindly iterate pages.
This is really only a problem in configs with SPARSEMEM but without SPARSEMEM_VMEMMAP, and only when we would cross memory sections in some cases.
Will this cause harm? Probably not, because it's mostly 32bit that does not support SPARSEMEM_VMEMMAP. If this ever becomes a problem we could look into allocating the memmap for the memory sections spanned by a single CMA region in one go from memblock.
Signed-off-by: David Hildenbrand david@redhat.com
include/linux/mm.h | 6 ++++++ mm/cma.c | 36 +++++++++++++++++++++++------------- mm/util.c | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+), 13 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h index ef360b72cb05c..f59ad1f9fc792 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -209,9 +209,15 @@ extern unsigned long sysctl_user_reserve_kbytes; extern unsigned long sysctl_admin_reserve_kbytes; #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) +bool page_range_contiguous(const struct page *page, unsigned long nr_pages); #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) #else #define nth_page(page,n) ((page) + (n)) +static inline bool page_range_contiguous(const struct page *page,
unsigned long nr_pages)
+{
- return true;
+} #endif /* to align the pointer to the (next) page boundary */ diff --git a/mm/cma.c b/mm/cma.c index 2ffa4befb99ab..1119fa2830008 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -780,10 +780,8 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr, unsigned long count, unsigned int align, struct page **pagep, gfp_t gfp) {
- unsigned long mask, offset;
- unsigned long pfn = -1;
- unsigned long start = 0; unsigned long bitmap_maxno, bitmap_no, bitmap_count;
- unsigned long start, pfn, mask, offset; int ret = -EBUSY; struct page *page = NULL;
@@ -795,7 +793,7 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr, if (bitmap_count > bitmap_maxno) goto out;
- for (;;) {
- for (start = 0; ; start = bitmap_no + mask + 1) { spin_lock_irq(&cma->lock); /*
- If the request is larger than the available number
@@ -812,6 +810,22 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr, spin_unlock_irq(&cma->lock); break; }
pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit);
page = pfn_to_page(pfn);
/*
* Do not hand out page ranges that are not contiguous, so
* callers can just iterate the pages without having to worry
* about these corner cases.
*/
if (!page_range_contiguous(page, count)) {
spin_unlock_irq(&cma->lock);
pr_warn_ratelimited("%s: %s: skipping incompatible area [0x%lx-0x%lx]",
__func__, cma->name, pfn, pfn + count - 1);
continue;
}
- bitmap_set(cmr->bitmap, bitmap_no, bitmap_count); cma->available_count -= count; /*
@@ -821,29 +835,25 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr, */ spin_unlock_irq(&cma->lock);
mutex_lock(&cma->alloc_mutex); ret = alloc_contig_range(pfn, pfn + count, ACR_FLAGS_CMA, gfp); mutex_unlock(&cma->alloc_mutex);pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit);
if (ret == 0) {
page = pfn_to_page(pfn);
if (!ret) break;
}
cma_clear_bitmap(cma, cmr, pfn, count); if (ret != -EBUSY) break; pr_debug("%s(): memory range at pfn 0x%lx %p is busy, retrying\n",
__func__, pfn, pfn_to_page(pfn));
__func__, pfn, page);
trace_cma_alloc_busy_retry(cma->name, pfn, pfn_to_page(pfn),
Nitpick: I think you already have the page here.
count, align);
/* try again with a bit different memory target */
}start = bitmap_no + mask + 1;
out:
- *pagep = page;
- if (!ret)
return ret;*pagep = page;
} @@ -882,7 +892,7 @@ static struct page *__cma_alloc(struct cma *cma, unsigned long count, */ if (page) { for (i = 0; i < count; i++)
page_kasan_tag_reset(nth_page(page, i));
page_kasan_tag_reset(page + i);
Had a look at it, not very familiar with CMA, but the changes look equivalent to what was before. Not sure that's worth a Reviewed-by tag, but here it in case you want to add it:
Reviewed-by: Alexandru Elisei alexandru.elisei@arm.com
Just so I can better understand the problem being fixed, I guess you can have two consecutive pfns with non-consecutive associated struct page if you have two adjacent memory sections spanning the same physical memory region, is that correct?
Thanks, Alex
} if (ret && !(gfp & __GFP_NOWARN)) { diff --git a/mm/util.c b/mm/util.c index d235b74f7aff7..0bf349b19b652 100644 --- a/mm/util.c +++ b/mm/util.c @@ -1280,4 +1280,37 @@ unsigned int folio_pte_batch(struct folio *folio, pte_t *ptep, pte_t pte, { return folio_pte_batch_flags(folio, NULL, ptep, &pte, max_nr, 0); }
+#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) +/**
- page_range_contiguous - test whether the page range is contiguous
- @page: the start of the page range.
- @nr_pages: the number of pages in the range.
- Test whether the page range is contiguous, such that they can be iterated
- naively, corresponding to iterating a contiguous PFN range.
- This function should primarily only be used for debug checks, or when
- working with page ranges that are not naturally contiguous (e.g., pages
- within a folio are).
- Returns true if contiguous, otherwise false.
- */
+bool page_range_contiguous(const struct page *page, unsigned long nr_pages) +{
- const unsigned long start_pfn = page_to_pfn(page);
- const unsigned long end_pfn = start_pfn + nr_pages;
- unsigned long pfn;
- /*
* The memmap is allocated per memory section. We need to check
* each involved memory section once.
*/
- for (pfn = ALIGN(start_pfn, PAGES_PER_SECTION);
pfn < end_pfn; pfn += PAGES_PER_SECTION)
if (unlikely(page + (pfn - start_pfn) != pfn_to_page(pfn)))
return false;
- return true;
+} +#endif
#endif /* CONFIG_MMU */
2.50.1