On 25/11/2025 18:38, Mike Rapoport wrote:
From: "Mike Rapoport (Microsoft)" rppt@kernel.org
When a VMA is registered with userfaulfd in minor mode, its ->fault() method should check if a folio exists in the page cache and if yes ->fault() should call handle_userfault(VM_UFFD_MISSING).
Instead of calling handle_userfault() directly from a specific ->fault() implementation introduce new fault reason VM_FAULT_UFFD_MINOR that will notify the core page fault handler that it should call handle_userfaultfd(VM_UFFD_MISSING) to complete a page fault.
Replace a call to handle_userfault(VM_UFFD_MISSING) in shmem and use the new VM_FAULT_UFFD_MINOR there instead.
For configurations that don't enable CONFIG_USERFAULTFD, VM_FAULT_UFFD_MINOR is set to 0.
Suggested-by: David Hildenbrand (Red Hat) david@kernel.org Signed-off-by: Mike Rapoport (Microsoft) rppt@kernel.org
include/linux/mm_types.h | 10 +++++++++- mm/memory.c | 2 ++ mm/shmem.c | 2 +- 3 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 90e5790c318f..df71b057111b 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1523,6 +1523,8 @@ typedef __bitwise unsigned int vm_fault_t;
fsync() to complete (for synchronous page faultsin DAX)- @VM_FAULT_COMPLETED: ->fault completed, meanwhile mmap lock released
- @VM_FAULT_UFFD_MINOR: ->fault did not modify page tables and needs
*/
handle_userfault(VM_UFFD_MINOR) to complete- @VM_FAULT_HINDEX_MASK: mask HINDEX value
@@ -1540,6 +1542,11 @@ enum vm_fault_reason { VM_FAULT_DONE_COW = (__force vm_fault_t)0x001000, VM_FAULT_NEEDDSYNC = (__force vm_fault_t)0x002000, VM_FAULT_COMPLETED = (__force vm_fault_t)0x004000, +#ifdef CONFIG_USERFAULTFD
VM_FAULT_UFFD_MINOR = (__force vm_fault_t)0x008000,+#else
VM_FAULT_UFFD_MINOR = (__force vm_fault_t)0x000000,+#endif VM_FAULT_HINDEX_MASK = (__force vm_fault_t)0x0f0000, };
@@ -1564,7 +1571,8 @@ enum vm_fault_reason { { VM_FAULT_FALLBACK, "FALLBACK" }, \ { VM_FAULT_DONE_COW, "DONE_COW" }, \ { VM_FAULT_NEEDDSYNC, "NEEDDSYNC" }, \
{ VM_FAULT_COMPLETED, "COMPLETED" }
{ VM_FAULT_COMPLETED, "COMPLETED" }, \{ VM_FAULT_UFFD_MINOR, "UFFD_MINOR" }, \
It looks like we have to keep the last element comma-less, otherwise I'm seeing compile errors somewhere in fs/dax.c.
struct vm_special_mapping { const char *name; /* The name, e.g. "[vdso]". */ diff --git a/mm/memory.c b/mm/memory.c index b59ae7ce42eb..94acbac8cefb 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5279,6 +5279,8 @@ static vm_fault_t __do_fault(struct vm_fault *vmf) }
ret = vma->vm_ops->fault(vmf);
if (unlikely(ret & VM_FAULT_UFFD_MINOR))return handle_userfault(vmf, VM_UFFD_MINOR); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY | VM_FAULT_DONE_COW))) return ret;diff --git a/mm/shmem.c b/mm/shmem.c index e16c7c8c3e1e..a9a31c0b5979 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2461,7 +2461,7 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index, if (folio && vma && userfaultfd_minor(vma)) { if (!xa_is_value(folio)) folio_put(folio);
*fault_type = handle_userfault(vmf, VM_UFFD_MINOR);
*fault_type = VM_FAULT_UFFD_MINOR; return 0; }-- 2.50.1