For vIOMMU passing through HW resources to user space (VMs), allowing a VM to control the passed through HW directly by accessing hardware registers, add an mmap infrastructure to map the physical MMIO pages to user space.
Maintain an maple tree per ictx as a translation table managing mmappable regions, from an allocated for-user mmap offset to an iommufd_mmap struct, where it stores the real PFN range for a remap_pfn_range call.
To allow an IOMMU driver to add and delete mmappable regions onto/from the maple tree, add iommufd_viommu_alloc/destroy_mmap helpers.
Signed-off-by: Nicolin Chen nicolinc@nvidia.com --- drivers/iommu/iommufd/iommufd_private.h | 11 +++++ include/linux/iommufd.h | 39 ++++++++++++++++ drivers/iommu/iommufd/driver.c | 52 +++++++++++++++++++++ drivers/iommu/iommufd/main.c | 61 +++++++++++++++++++++++++ 4 files changed, 163 insertions(+)
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 36a4e060982f..c87326d7ecfc 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -7,6 +7,7 @@ #include <linux/iommu.h> #include <linux/iommufd.h> #include <linux/iova_bitmap.h> +#include <linux/maple_tree.h> #include <linux/rwsem.h> #include <linux/uaccess.h> #include <linux/xarray.h> @@ -44,6 +45,7 @@ struct iommufd_ctx { struct xarray groups; wait_queue_head_t destroy_wait; struct rw_semaphore ioas_creation_lock; + struct maple_tree mt_mmap;
struct mutex sw_msi_lock; struct list_head sw_msi_list; @@ -55,6 +57,15 @@ struct iommufd_ctx { struct iommufd_ioas *vfio_ioas; };
+/* Entry for iommufd_ctx::mt_mmap */ +struct iommufd_mmap { + struct iommufd_object *owner; + + /* Physical range for remap_pfn_range() */ + unsigned long base_pfn; + unsigned long num_pfns; +}; + /* * The IOVA to PFN map. The map automatically copies the PFNs into multiple * domains and permits sharing of PFNs between io_pagetable instances. This diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index ddca0d2835df..47af130f4212 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -245,6 +245,10 @@ int _iommufd_object_depend(struct iommufd_object *obj_dependent, struct iommufd_object *obj_depended); void _iommufd_object_undepend(struct iommufd_object *obj_dependent, struct iommufd_object *obj_depended); +int _iommufd_alloc_mmap(struct iommufd_ctx *ictx, struct iommufd_object *owner, + phys_addr_t base, size_t length, unsigned long *offset); +void _iommufd_destroy_mmap(struct iommufd_ctx *ictx, + struct iommufd_object *owner, unsigned long offset); struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id); int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, @@ -277,6 +281,20 @@ _iommufd_object_undepend(struct iommufd_object *obj_dependent, { }
+static inline int _iommufd_alloc_mmap(struct iommufd_ctx *ictx, + struct iommufd_object *owner, + phys_addr_t base, size_t length, + unsigned long *offset) +{ + return -EOPNOTSUPP; +} + +static inline void _iommufd_destroy_mmap(struct iommufd_ctx *ictx, + struct iommufd_object *owner, + unsigned long offset) +{ +} + static inline struct device * iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id) { @@ -390,4 +408,25 @@ static inline int iommufd_viommu_report_event(struct iommufd_viommu *viommu, _iommufd_object_undepend(&dependent->member.obj, \ &depended->member.obj); \ }) + +/* + * Helpers for IOMMU driver to alloc/destroy an mmapable area for a structure. + * Driver should report the @out_offset and @length to user space for an mmap() + */ +#define iommufd_viommu_alloc_mmap(viommu, member, base, length, out_offset) \ + ({ \ + static_assert(__same_type(struct iommufd_viommu, \ + viommu->member)); \ + static_assert(offsetof(typeof(*viommu), member.obj) == 0); \ + _iommufd_alloc_mmap(viommu->member.ictx, &viommu->member.obj, \ + base, length, out_offset); \ + }) +#define iommufd_viommu_destroy_mmap(viommu, member, offset) \ + ({ \ + static_assert(__same_type(struct iommufd_viommu, \ + viommu->member)); \ + static_assert(offsetof(typeof(*viommu), member.obj) == 0); \ + _iommufd_destroy_mmap(viommu->member.ictx, \ + &viommu->member.obj, offset); \ + }) #endif diff --git a/drivers/iommu/iommufd/driver.c b/drivers/iommu/iommufd/driver.c index 0bcf0438d255..2d2695e2562d 100644 --- a/drivers/iommu/iommufd/driver.c +++ b/drivers/iommu/iommufd/driver.c @@ -78,6 +78,58 @@ void _iommufd_object_undepend(struct iommufd_object *obj_dependent, } EXPORT_SYMBOL_NS_GPL(_iommufd_object_undepend, "IOMMUFD");
+/* + * Allocate an @offset to return to user space to use for an mmap() syscall + * + * Driver should use a per-structure helper in include/linux/iommufd.h + */ +int _iommufd_alloc_mmap(struct iommufd_ctx *ictx, struct iommufd_object *owner, + phys_addr_t base, size_t length, unsigned long *offset) +{ + struct iommufd_mmap *immap; + unsigned long startp; + int num_pfns, rc; + + if (WARN_ON_ONCE(!offset)) + return -EINVAL; + if (!PAGE_ALIGNED(base)) + return -EINVAL; + if (!length || !PAGE_ALIGNED(length)) + return -EINVAL; + num_pfns = length >> PAGE_SHIFT; + + immap = kzalloc(sizeof(*immap), GFP_KERNEL); + if (!immap) + return -ENOMEM; + immap->owner = owner; + immap->base_pfn = base >> PAGE_SHIFT; + immap->num_pfns = length >> PAGE_SHIFT; + + rc = mtree_alloc_range(&ictx->mt_mmap, &startp, immap, immap->num_pfns, + 0, U32_MAX >> PAGE_SHIFT, GFP_KERNEL); + if (rc < 0) { + kfree(immap); + return rc; + } + + /* mmap() syscall will right-shift the offset in vma->vm_pgoff */ + *offset = startp << PAGE_SHIFT; + return 0; +} +EXPORT_SYMBOL_NS_GPL(_iommufd_alloc_mmap, "IOMMUFD"); + +/* Driver should use a per-structure helper in include/linux/iommufd.h */ +void _iommufd_destroy_mmap(struct iommufd_ctx *ictx, + struct iommufd_object *owner, unsigned long offset) +{ + struct iommufd_mmap *immap; + + immap = mtree_erase(&ictx->mt_mmap, offset >> PAGE_SHIFT); + WARN_ON_ONCE(!immap || immap->owner != owner); + kfree(immap); +} +EXPORT_SYMBOL_NS_GPL(_iommufd_destroy_mmap, "IOMMUFD"); + /* Caller should xa_lock(&viommu->vdevs) to protect the return value */ struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id) diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 10410e2f710a..1d7f3584aea0 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -213,6 +213,7 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp) xa_init_flags(&ictx->objects, XA_FLAGS_ALLOC1 | XA_FLAGS_ACCOUNT); xa_init(&ictx->groups); ictx->file = filp; + mt_init_flags(&ictx->mt_mmap, MT_FLAGS_ALLOC_RANGE); init_waitqueue_head(&ictx->destroy_wait); mutex_init(&ictx->sw_msi_lock); INIT_LIST_HEAD(&ictx->sw_msi_list); @@ -410,11 +411,71 @@ static long iommufd_fops_ioctl(struct file *filp, unsigned int cmd, return ret; }
+static void iommufd_fops_vma_open(struct vm_area_struct *vma) +{ + struct iommufd_mmap *immap = vma->vm_private_data; + + refcount_inc(&immap->owner->users); +} + +static void iommufd_fops_vma_close(struct vm_area_struct *vma) +{ + struct iommufd_mmap *immap = vma->vm_private_data; + + refcount_dec(&immap->owner->users); +} + +static const struct vm_operations_struct iommufd_vma_ops = { + .open = iommufd_fops_vma_open, + .close = iommufd_fops_vma_close, +}; + +/* + * Kernel driver must first use the for-driver helpers to register an mmappable + * MMIO region to the iommufd core to allocate an offset. Then, it should report + * to user space this offset and the length of the MMIO region for mmap syscall, + * via a prior IOMMU_VIOMMU_ALLOC ioctl. + */ +static int iommufd_fops_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct iommufd_ctx *ictx = filp->private_data; + size_t length = vma->vm_end - vma->vm_start; + struct iommufd_mmap *immap; + int rc; + + if (!PAGE_ALIGNED(length)) + return -EINVAL; + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + if (vma->vm_flags & VM_EXEC) + return -EPERM; + + /* vma->vm_pgoff carries an index to an mtree entry (immap) */ + immap = mtree_load(&ictx->mt_mmap, vma->vm_pgoff); + if (!immap) + return -ENXIO; + if (length >> PAGE_SHIFT != immap->num_pfns) + return -ENXIO; + + vma->vm_pgoff = 0; + vma->vm_private_data = immap; + vma->vm_ops = &iommufd_vma_ops; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_IO); + + rc = remap_pfn_range(vma, vma->vm_start, immap->base_pfn, length, + vma->vm_page_prot); + if (!rc) /* vm_ops.open won't be called for mmap itself. */ + refcount_inc(&immap->owner->users); + return rc; +} + static const struct file_operations iommufd_fops = { .owner = THIS_MODULE, .open = iommufd_fops_open, .release = iommufd_fops_release, .unlocked_ioctl = iommufd_fops_ioctl, + .mmap = iommufd_fops_mmap, };
/**