From: "Mike Rapoport (Microsoft)" rppt@kernel.org
When a VMA is registered with userfaulfd in minor mode, its ->fault() method should check if a folio exists in the page cache and if yes ->fault() should call handle_userfault(VM_UFFD_MISSING).
Instead of calling handle_userfault() directly from a specific ->fault() implementation introduce new fault reason VM_FAULT_UFFD_MINOR that will notify the core page fault handler that it should call handle_userfaultfd(VM_UFFD_MISSING) to complete a page fault.
Replace a call to handle_userfault(VM_UFFD_MISSING) in shmem and use the new VM_FAULT_UFFD_MINOR there instead.
For configurations that don't enable CONFIG_USERFAULTFD, VM_FAULT_UFFD_MINOR is set to 0.
Suggested-by: David Hildenbrand (Red Hat) david@kernel.org Signed-off-by: Mike Rapoport (Microsoft) rppt@kernel.org --- include/linux/mm_types.h | 10 +++++++++- mm/memory.c | 2 ++ mm/shmem.c | 2 +- 3 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 90e5790c318f..df71b057111b 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1523,6 +1523,8 @@ typedef __bitwise unsigned int vm_fault_t; * fsync() to complete (for synchronous page faults * in DAX) * @VM_FAULT_COMPLETED: ->fault completed, meanwhile mmap lock released + * @VM_FAULT_UFFD_MINOR: ->fault did not modify page tables and needs + * handle_userfault(VM_UFFD_MINOR) to complete * @VM_FAULT_HINDEX_MASK: mask HINDEX value * */ @@ -1540,6 +1542,11 @@ enum vm_fault_reason { VM_FAULT_DONE_COW = (__force vm_fault_t)0x001000, VM_FAULT_NEEDDSYNC = (__force vm_fault_t)0x002000, VM_FAULT_COMPLETED = (__force vm_fault_t)0x004000, +#ifdef CONFIG_USERFAULTFD + VM_FAULT_UFFD_MINOR = (__force vm_fault_t)0x008000, +#else + VM_FAULT_UFFD_MINOR = (__force vm_fault_t)0x000000, +#endif VM_FAULT_HINDEX_MASK = (__force vm_fault_t)0x0f0000, };
@@ -1564,7 +1571,8 @@ enum vm_fault_reason { { VM_FAULT_FALLBACK, "FALLBACK" }, \ { VM_FAULT_DONE_COW, "DONE_COW" }, \ { VM_FAULT_NEEDDSYNC, "NEEDDSYNC" }, \ - { VM_FAULT_COMPLETED, "COMPLETED" } + { VM_FAULT_COMPLETED, "COMPLETED" }, \ + { VM_FAULT_UFFD_MINOR, "UFFD_MINOR" }, \
struct vm_special_mapping { const char *name; /* The name, e.g. "[vdso]". */ diff --git a/mm/memory.c b/mm/memory.c index b59ae7ce42eb..94acbac8cefb 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5279,6 +5279,8 @@ static vm_fault_t __do_fault(struct vm_fault *vmf) }
ret = vma->vm_ops->fault(vmf); + if (unlikely(ret & VM_FAULT_UFFD_MINOR)) + return handle_userfault(vmf, VM_UFFD_MINOR); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY | VM_FAULT_DONE_COW))) return ret; diff --git a/mm/shmem.c b/mm/shmem.c index e16c7c8c3e1e..a9a31c0b5979 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2461,7 +2461,7 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index, if (folio && vma && userfaultfd_minor(vma)) { if (!xa_is_value(folio)) folio_put(folio); - *fault_type = handle_userfault(vmf, VM_UFFD_MINOR); + *fault_type = VM_FAULT_UFFD_MINOR; return 0; }