On Sat, Sep 06, 2025 at 08:56:48AM +0200, David Hildenbrand wrote:
On 06.09.25 03:05, John Hubbard wrote:
Probably a similar sentiment as Lorenzo here...the above diffs make the code *worse* to read. In fact, I recall adding record_subpages() here long ago, specifically to help clarify what was going on.
Well, there is a lot I dislike about record_subpages() to go back there. Starting with "as Willy keeps explaining, the concept of subpages do not exist and ending with "why do we fill out the array even on failure".
Yes
:)
Now it's been returned to it's original, cryptic form.
The code in the caller was so uncryptic that both me and Lorenzo missed that magical addition. :P
:'(
Just my take on it, for whatever that's worth. :)
As always, appreciated.
I could of course keep the simple loop in some "record_folio_pages" function and clean up what I dislike about record_subpages().
But I much rather want the call chain to be cleaned up instead, if possible.
Roughly, what I am thinking (limiting it to pte+pmd case) about is the following:
I cannot get the below to apply even with the original patch here applied + fix.
It looks like (in mm-new :) commit e73f43a66d5f ("mm/gup: remove dead pgmap refcounting code") by Alastair has conflicted here, but even then I can't make it apply, with/without your fix...!
From d6d6d21dbf435d8030782a627175e36e6c7b2dfb Mon Sep 17 00:00:00 2001 From: David Hildenbrand david@redhat.com Date: Sat, 6 Sep 2025 08:33:42 +0200 Subject: [PATCH] tmp
Signed-off-by: David Hildenbrand david@redhat.com
mm/gup.c | 79 ++++++++++++++++++++++++++------------------------------ 1 file changed, 36 insertions(+), 43 deletions(-)
diff --git a/mm/gup.c b/mm/gup.c index 22420f2069ee1..98907ead749c0 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2845,12 +2845,11 @@ static void __maybe_unused gup_fast_undo_dev_pagemap(int *nr, int nr_start,
- also check pmd here to make sure pmd doesn't change (corresponds to
- pmdp_collapse_flush() in the THP collapse code path).
*/ -static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
unsigned long end, unsigned int flags, struct page **pages,
int *nr)
+static unsigned long gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
unsigned long end, unsigned int flags, struct page **pages)
{ struct dev_pagemap *pgmap = NULL;
- int ret = 0;
- unsigned long nr_pages = 0; pte_t *ptep, *ptem; ptem = ptep = pte_offset_map(&pmd, addr);
@@ -2908,24 +2907,20 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr, * details. */ if (flags & FOLL_PIN) {
ret = arch_make_folio_accessible(folio);
if (ret) {
} folio_set_referenced(folio);if (arch_make_folio_accessible(folio)) { gup_put_folio(folio, 1, flags); goto pte_unmap; }
pages[*nr] = page;
(*nr)++;
} while (ptep++, addr += PAGE_SIZE, addr != end);pages[nr_pages++] = page;
- ret = 1;
pte_unmap: if (pgmap) put_dev_pagemap(pgmap); pte_unmap(ptem);
- return ret;
- return nr_pages;
} #else @@ -2938,21 +2933,24 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
- get_user_pages_fast_only implementation that can pin pages. Thus it's still
- useful to have gup_fast_pmd_leaf even if we can't operate on ptes.
*/ -static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
unsigned long end, unsigned int flags, struct page **pages,
int *nr)
+static unsigned long gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
unsigned long end, unsigned int flags, struct page **pages)
{ return 0; } #endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */ -static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr,
unsigned long end, unsigned int flags, struct page **pages,
int *nr)
+static unsigned long gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr,
unsigned long end, unsigned int flags, struct page **pages)
{
- const unsigned long nr_pages = (end - addr) >> PAGE_SHIFT; struct page *page; struct folio *folio;
- int refs;
- unsigned long i;
- /* See gup_fast_pte_range() */
- if (pmd_protnone(orig))
if (!pmd_access_permitted(orig, flags & FOLL_WRITE)) return 0;return 0;
@@ -2960,33 +2958,30 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, if (pmd_special(orig)) return 0;
- refs = (end - addr) >> PAGE_SHIFT; page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
- folio = try_grab_folio_fast(page, refs, flags);
- folio = try_grab_folio_fast(page, nr_pages, flags); if (!folio) return 0; if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
gup_put_folio(folio, refs, flags);
return 0; } if (!gup_fast_folio_allowed(folio, flags)) {gup_put_folio(folio, nr_pages, flags);
gup_put_folio(folio, refs, flags);
return 0; } if (!pmd_write(orig) && gup_must_unshare(NULL, flags, &folio->page)) {gup_put_folio(folio, nr_pages, flags);
gup_put_folio(folio, refs, flags);
return 0; }gup_put_folio(folio, nr_pages, flags);
- pages += *nr;
- *nr += refs;
- for (; refs; refs--)
- for (i = 0; i < nr_pages; i++) *(pages++) = page++; folio_set_referenced(folio);
- return 1;
- return nr_pages;
} static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, @@ -3033,11 +3028,11 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, return 1; } -static int gup_fast_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
unsigned long end, unsigned int flags, struct page **pages,
int *nr)
+static unsigned long gup_fast_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
unsigned long end, unsigned int flags, struct page **pages)
{
- unsigned long next;
- unsigned long cur_nr_pages, next;
- unsigned long nr_pages = 0; pmd_t *pmdp; pmdp = pmd_offset_lockless(pudp, pud, addr);
@@ -3046,23 +3041,21 @@ static int gup_fast_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, next = pmd_addr_end(addr, end); if (!pmd_present(pmd))
return 0;
break;
if (unlikely(pmd_leaf(pmd))) {
/* See gup_fast_pte_range() */
if (pmd_protnone(pmd))
return 0;
if (unlikely(pmd_leaf(pmd)))
cur_nr_pages = gup_fast_pmd_leaf(pmd, pmdp, addr, next, flags, pages);
else
cur_nr_pages = gup_fast_pte_range(pmd, pmdp, addr, next, flags, pages);
if (!gup_fast_pmd_leaf(pmd, pmdp, addr, next, flags,
pages, nr))
return 0;
nr_pages += cur_nr_pages;
pages += cur_nr_pages;
} else if (!gup_fast_pte_range(pmd, pmdp, addr, next, flags,
pages, nr))
return 0;
if (nr_pages != (next - addr) >> PAGE_SIZE)
} while (pmdp++, addr = next, addr != end);break;
- return 1;
- return nr_pages;
} static int gup_fast_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr,
OK I guess you intentionally left the rest as a TODO :)
So I'll wait for you to post it before reviewing in-depth.
This generally LGTM as an approach, getting rid of *nr is important that's really horrible.
-- 2.50.1
Oh, I might even have found a bug moving away from that questionable "ret==1 means success" handling in gup_fast_pte_range()? Will have to double-check, but likely the following is the right thing to do.
From 8f48b25ef93e7ef98611fd58ec89384ad5171782 Mon Sep 17 00:00:00 2001 From: David Hildenbrand david@redhat.com Date: Sat, 6 Sep 2025 08:46:45 +0200 Subject: [PATCH] mm/gup: fix handling of errors from arch_make_folio_accessible() in follow_page_pte()
In case we call arch_make_folio_accessible() and it fails, we would incorrectly return a value that is "!= 0" to the caller, indicating that we pinned all requested pages and that the caller can keep going.
follow_page_pte() is not supposed to return error values, but instead 0 on failure and 1 on success.
That is of course wrong, because the caller will just keep going pinning more pages. If we happen to pin a page afterwards, we're in trouble, because we essentially skipped some pages.
Fixes: f28d43636d6f ("mm/gup/writeback: add callbacks for inaccessible pages") Signed-off-by: David Hildenbrand david@redhat.com
mm/gup.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/mm/gup.c b/mm/gup.c index 22420f2069ee1..cff226ec0ee7d 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2908,8 +2908,7 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr, * details. */ if (flags & FOLL_PIN) {
ret = arch_make_folio_accessible(folio);
if (ret) {
if (arch_make_folio_accessible(folio)) {
Oh Lord above. Lol. Yikes.
Yeah I think your fix is valid...
gup_put_folio(folio, 1, flags); goto pte_unmap; }
-- 2.50.1
-- Cheers
David / dhildenb