On Tue, Nov 18, 2025 at 05:41:13PM +0100, David Hildenbrand (Red Hat) wrote:
On 17.11.25 12:46, Mike Rapoport wrote:
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index fbca8c0972da..5e3c63307fdf 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -4,6 +4,7 @@ #include <linux/kvm_host.h> #include <linux/pagemap.h> #include <linux/anon_inodes.h> +#include <linux/userfaultfd_k.h> #include "kvm_mm.h" @@ -369,6 +370,12 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf) return vmf_error(err); }
- if (userfaultfd_minor(vmf->vma)) {
folio_unlock(folio);folio_put(folio);return handle_userfault(vmf, VM_UFFD_MINOR);- }
Staring at things like VM_FAULT_NEEDDSYNC, I'm wondering whether we could have a new return value from ->fault that would indicate that handle_userfault(vmf, VM_UFFD_MINOR) should be called.
Maybe some VM_FAULT_UFFD_MINOR or simply VM_FAULT_USERFAULTFD and we can just derive that it is VM_UFFD_MINOR.
_UFFD_MINOR sounds better, maybe we'll want something for missing later on.
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 4f66a3206a63c..2cf17da880f0e 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1601,6 +1601,8 @@ typedef __bitwise unsigned int vm_fault_t;
fsync() to complete (for synchronous page faultsin DAX)- @VM_FAULT_COMPLETED: ->fault completed, meanwhile mmap lock released
- @VM_FAULT_USERFAULTFD: ->fault did not modify page tables and needs
*/
handle_userfault() to complete- @VM_FAULT_HINDEX_MASK: mask HINDEX value
@@ -1618,6 +1620,7 @@ enum vm_fault_reason { VM_FAULT_DONE_COW = (__force vm_fault_t)0x001000, VM_FAULT_NEEDDSYNC = (__force vm_fault_t)0x002000, VM_FAULT_COMPLETED = (__force vm_fault_t)0x004000,
VM_FAULT_USERFAULTFD = (__force vm_fault_t)0x006000, VM_FAULT_HINDEX_MASK = (__force vm_fault_t)0x0f0000,}; @@ -1642,6 +1645,7 @@ enum vm_fault_reason { { VM_FAULT_FALLBACK, "FALLBACK" }, \ { VM_FAULT_DONE_COW, "DONE_COW" }, \ { VM_FAULT_NEEDDSYNC, "NEEDDSYNC" }, \
{ VM_FAULT_USERFAULTFD, "USERFAULTFD" },\ { VM_FAULT_COMPLETED, "COMPLETED" }struct vm_special_mapping {
IIUC, we have exactly two invocations of ->fault(vmf) in memory.c where we would have to handle it IIUC. And the return value would never leave the core.
I've found only one :/ But nevertheless, I like the idea to return VM_FAULT_UFFD_MINOR from ->fault() and then call handle_userfault() from __do_fault().
That way, we wouldn't have to export handle_userfault().
Just a thought ...
-- Cheers
David