On Tue, Mar 9, 2021 at 11:52 AM Zi Yan ziy@nvidia.com wrote:
On 1 Mar 2021, at 19:01, Axel Rasmussen wrote:
Modify the userfaultfd register API to allow registering shmem VMAs in minor mode. Modify the shmem mcopy implementation to support UFFDIO_CONTINUE in order to resolve such faults.
Combine the shmem mcopy handler functions into a single shmem_mcopy_atomic_pte, which takes a mode parameter. This matches how the hugetlbfs implementation is structured, and lets us remove a good chunk of boilerplate.
Signed-off-by: Axel Rasmussen axelrasmussen@google.com
fs/userfaultfd.c | 6 +-- include/linux/shmem_fs.h | 26 ++++----- include/uapi/linux/userfaultfd.h | 4 +- mm/memory.c | 8 +-- mm/shmem.c | 92 +++++++++++++++----------------- mm/userfaultfd.c | 27 +++++----- 6 files changed, 79 insertions(+), 84 deletions(-)
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 14f92285d04f..9f3b8684cf3c 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1267,8 +1267,7 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma, }
if (vm_flags & VM_UFFD_MINOR) {
/* FIXME: Add minor fault interception for shmem. */
if (!is_vm_hugetlb_page(vma))
if (!(is_vm_hugetlb_page(vma) || vma_is_shmem(vma))) return false; }
@@ -1941,7 +1940,8 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx, /* report all available features and ioctls to userland */ uffdio_api.features = UFFD_API_FEATURES; #ifndef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
uffdio_api.features &= ~UFFD_FEATURE_MINOR_HUGETLBFS;
uffdio_api.features &=
~(UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM);
#endif uffdio_api.ioctls = UFFD_API_IOCTLS; ret = -EFAULT; diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index d82b6f396588..f0919c3722e7 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -9,6 +9,7 @@ #include <linux/percpu_counter.h> #include <linux/xattr.h> #include <linux/fs_parser.h> +#include <linux/userfaultfd_k.h>
/* inode in-kernel data */
@@ -122,21 +123,16 @@ static inline bool shmem_file(struct file *file) extern bool shmem_charge(struct inode *inode, long pages); extern void shmem_uncharge(struct inode *inode, long pages);
+#ifdef CONFIG_USERFAULTFD #ifdef CONFIG_SHMEM -extern int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
struct vm_area_struct *dst_vma,
unsigned long dst_addr,
unsigned long src_addr,
struct page **pagep);
-extern int shmem_mfill_zeropage_pte(struct mm_struct *dst_mm,
pmd_t *dst_pmd,
struct vm_area_struct *dst_vma,
unsigned long dst_addr);
-#else -#define shmem_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \
src_addr, pagep) ({ BUG(); 0; })
-#define shmem_mfill_zeropage_pte(dst_mm, dst_pmd, dst_vma, \
dst_addr) ({ BUG(); 0; })
-#endif +int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
struct vm_area_struct *dst_vma,
unsigned long dst_addr, unsigned long src_addr,
enum mcopy_atomic_mode mode, struct page **pagep);
+#else /* !CONFIG_SHMEM */ +#define shmem_mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr, \
src_addr, mode, pagep) ({ BUG(); 0; })
+#endif /* CONFIG_SHMEM */ +#endif /* CONFIG_USERFAULTFD */
#endif diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index bafbeb1a2624..47d9790d863d 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -31,7 +31,8 @@ UFFD_FEATURE_MISSING_SHMEM | \ UFFD_FEATURE_SIGBUS | \ UFFD_FEATURE_THREAD_ID | \
UFFD_FEATURE_MINOR_HUGETLBFS)
UFFD_FEATURE_MINOR_HUGETLBFS | \
UFFD_FEATURE_MINOR_SHMEM)
#define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -196,6 +197,7 @@ struct uffdio_api { #define UFFD_FEATURE_SIGBUS (1<<7) #define UFFD_FEATURE_THREAD_ID (1<<8) #define UFFD_FEATURE_MINOR_HUGETLBFS (1<<9) +#define UFFD_FEATURE_MINOR_SHMEM (1<<10) __u64 features;
__u64 ioctls;
diff --git a/mm/memory.c b/mm/memory.c index c8e357627318..a1e5ff55027e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3929,9 +3929,11 @@ static vm_fault_t do_read_fault(struct vm_fault *vmf) * something). */ if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) {
ret = do_fault_around(vmf);
if (ret)
return ret;
if (likely(!userfaultfd_minor(vmf->vma))) {
ret = do_fault_around(vmf);
if (ret)
return ret;
} } ret = __do_fault(vmf);
diff --git a/mm/shmem.c b/mm/shmem.c index b2db4ed0fbc7..6f81259fabb3 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -77,7 +77,6 @@ static struct vfsmount *shm_mnt; #include <linux/syscalls.h> #include <linux/fcntl.h> #include <uapi/linux/memfd.h> -#include <linux/userfaultfd_k.h> #include <linux/rmap.h> #include <linux/uuid.h>
@@ -1785,8 +1784,8 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
- vm. If we swap it in we mark it dirty since we also free the swap
- entry since a page cannot live in both the swap and page cache.
- vmf and fault_type are only supplied by shmem_fault:
- otherwise they are NULL.
- vma, vmf, and fault_type are only supplied by shmem_fault: otherwise they
*/
- are NULL.
static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, struct page **pagep, enum sgp_type sgp, gfp_t gfp, @@ -1830,6 +1829,12 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, return error; }
if (page && vma && userfaultfd_minor(vma)) {
unlock_page(page);
*fault_type = handle_userfault(vmf, VM_UFFD_MINOR);
return 0;
}
if (page) hindex = page->index; if (page && sgp == SGP_WRITE)
@@ -2354,14 +2359,12 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode return inode; }
-static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
pmd_t *dst_pmd,
struct vm_area_struct *dst_vma,
unsigned long dst_addr,
unsigned long src_addr,
bool zeropage,
struct page **pagep)
+int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
struct vm_area_struct *dst_vma,
unsigned long dst_addr, unsigned long src_addr,
enum mcopy_atomic_mode mode, struct page **pagep)
{
bool is_continue = (mode == MCOPY_ATOMIC_CONTINUE); struct inode *inode = file_inode(dst_vma->vm_file); struct shmem_inode_info *info = SHMEM_I(inode); struct address_space *mapping = inode->i_mapping;
@@ -2378,12 +2381,17 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, if (!shmem_inode_acct_block(inode, 1)) goto out;
if (!*pagep) {
if (is_continue) {
ret = -EFAULT;
page = find_lock_page(mapping, pgoff);
if (!page)
goto out_unacct_blocks;
} else if (!*pagep) { page = shmem_alloc_page(gfp, info, pgoff); if (!page) goto out_unacct_blocks;
if (!zeropage) { /* mcopy_atomic */
if (mode == MCOPY_ATOMIC_NORMAL) { /* mcopy_atomic */ page_kaddr = kmap_atomic(page); ret = copy_from_user(page_kaddr, (const void __user *)src_addr,
Hi Axel,
shmem_mcopy_atomic_pte is not guarded by CONFIG_USERFAULTFD, thus it is causing compilation errors due to the use of enum mcopy_atomic_mode mode, when CONFIG_USERFAULTFD is not set.
Ah, my apologies, I guarded it in the header but forgot to do so in shmem.c. I'll send an updated patch today.
— Best Regards, Yan Zi