On Thu, Oct 26, 2023 at 10:49:28AM +0800, Lu Baolu wrote:
Add the file interface that provides a simple and efficient way for userspace to handle page faults. The file interface allows userspace to read fault messages sequentially, and to respond to the handling result by writing to the same file.
Userspace applications are recommended to use io_uring to speed up read and write efficiency.
With this done, allow userspace application to allocate a hw page table with IOMMU_HWPT_ALLOC_IOPF_CAPABLE flag set.
Signed-off-by: Lu Baolu baolu.lu@linux.intel.com
drivers/iommu/iommufd/iommufd_private.h | 2 + drivers/iommu/iommufd/hw_pagetable.c | 204 +++++++++++++++++++++++- 2 files changed, 205 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 0dbaa2dc5b22..ff063bc48150 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -237,6 +237,8 @@ struct hw_pgtable_fault { struct mutex mutex; struct list_head deliver; struct list_head response;
- struct file *fault_file;
- int fault_fd;
}; /* diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index 9f94c824cf86..f0aac1bb2d2d 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -3,6 +3,8 @@
- Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
*/ #include <linux/iommu.h> +#include <linux/file.h> +#include <linux/anon_inodes.h> #include <uapi/linux/iommufd.h> #include "../iommu-priv.h" @@ -38,9 +40,198 @@ static void iommufd_kernel_managed_hwpt_destroy(struct iommufd_object *obj) refcount_dec(&hwpt->ioas->obj.users); } +static int iommufd_compose_fault_message(struct iommu_fault *fault,
struct iommu_hwpt_pgfault *hwpt_fault,
struct device *dev)
+{
- struct iommufd_device *idev = iopf_pasid_cookie_get(dev, IOMMU_NO_PASID);
- if (!idev)
return -ENODEV;
- if (IS_ERR(idev))
return PTR_ERR(idev);
- hwpt_fault->size = sizeof(*hwpt_fault);
- hwpt_fault->flags = fault->prm.flags;
- hwpt_fault->dev_id = idev->obj.id;
- hwpt_fault->pasid = fault->prm.pasid;
- hwpt_fault->grpid = fault->prm.grpid;
- hwpt_fault->perm = fault->prm.perm;
- hwpt_fault->addr = fault->prm.addr;
- hwpt_fault->private_data[0] = fault->prm.private_data[0];
- hwpt_fault->private_data[1] = fault->prm.private_data[1];
- return 0;
+}
+static ssize_t hwpt_fault_fops_read(struct file *filep, char __user *buf,
size_t count, loff_t *ppos)
+{
- size_t fault_size = sizeof(struct iommu_hwpt_pgfault);
- struct hw_pgtable_fault *fault = filep->private_data;
- struct iommu_hwpt_pgfault data;
- struct iopf_group *group;
- struct iopf_fault *iopf;
- size_t done = 0;
- int rc;
- if (*ppos || count % fault_size)
return -ESPIPE;
- mutex_lock(&fault->mutex);
- while (!list_empty(&fault->deliver) && count > done) {
group = list_first_entry(&fault->deliver,
struct iopf_group, node);
if (list_count_nodes(&group->faults) * fault_size > count - done)
break;
list_for_each_entry(iopf, &group->faults, list) {
rc = iommufd_compose_fault_message(&iopf->fault,
&data, group->dev);
if (rc)
goto err_unlock;
rc = copy_to_user(buf + done, &data, fault_size);
if (rc)
goto err_unlock;
done += fault_size;
}
list_move_tail(&group->node, &fault->response);
- }
- mutex_unlock(&fault->mutex);
- return done;
+err_unlock:
- mutex_unlock(&fault->mutex);
- return rc;
+}
+static ssize_t hwpt_fault_fops_write(struct file *filep,
const char __user *buf,
size_t count, loff_t *ppos)
+{
- size_t response_size = sizeof(struct iommu_hwpt_page_response);
- struct hw_pgtable_fault *fault = filep->private_data;
- struct iommu_hwpt_page_response response;
- struct iommufd_hw_pagetable *hwpt;
- struct iopf_group *iter, *group;
- struct iommufd_device *idev;
- size_t done = 0;
- int rc = 0;
- if (*ppos || count % response_size)
return -ESPIPE;
- mutex_lock(&fault->mutex);
- while (!list_empty(&fault->response) && count > done) {
rc = copy_from_user(&response, buf + done, response_size);
if (rc)
break;
/* Get the device that this response targets at. */
idev = container_of(iommufd_get_object(fault->ictx,
response.dev_id,
IOMMUFD_OBJ_DEVICE),
struct iommufd_device, obj);
if (IS_ERR(idev)) {
rc = PTR_ERR(idev);
break;
}
/*
* Get the hw page table that this response was generated for.
* It must match the one stored in the fault data.
*/
hwpt = container_of(iommufd_get_object(fault->ictx,
response.hwpt_id,
IOMMUFD_OBJ_HW_PAGETABLE),
struct iommufd_hw_pagetable, obj);
if (IS_ERR(hwpt)) {
iommufd_put_object(&idev->obj);
rc = PTR_ERR(hwpt);
break;
}
if (hwpt != fault->hwpt) {
rc = -EINVAL;
goto put_obj;
}
group = NULL;
list_for_each_entry(iter, &fault->response, node) {
if (response.grpid != iter->last_fault.fault.prm.grpid)
continue;
if (idev->dev != iter->dev)
continue;
if ((iter->last_fault.fault.prm.flags &
IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) &&
response.pasid != iter->last_fault.fault.prm.pasid)
continue;
group = iter;
break;
}
if (!group) {
rc = -ENODEV;
goto put_obj;
}
rc = iopf_group_response(group, response.code);
if (rc)
goto put_obj;
list_del(&group->node);
iopf_free_group(group);
done += response_size;
+put_obj:
iommufd_put_object(&hwpt->obj);
iommufd_put_object(&idev->obj);
if (rc)
break;
- }
- mutex_unlock(&fault->mutex);
- return (rc < 0) ? rc : done;
+}
+static const struct file_operations hwpt_fault_fops = {
- .owner = THIS_MODULE,
- .read = hwpt_fault_fops_read,
- .write = hwpt_fault_fops_write,
+};
+static int hw_pagetable_get_fault_fd(struct hw_pgtable_fault *fault) +{
- struct file *filep;
- int fdno;
- fdno = get_unused_fd_flags(O_CLOEXEC);
- if (fdno < 0)
return fdno;
- filep = anon_inode_getfile("[iommufd-pgfault]", &hwpt_fault_fops,
fault, O_RDWR);
- if (IS_ERR(filep)) {
put_unused_fd(fdno);
return PTR_ERR(filep);
- }
- fd_install(fdno, filep);
- fault->fault_file = filep;
- fault->fault_fd = fdno;
- return 0;
+}
static struct hw_pgtable_fault *hw_pagetable_fault_alloc(void) { struct hw_pgtable_fault *fault;
- int rc;
fault = kzalloc(sizeof(*fault), GFP_KERNEL); if (!fault) @@ -50,6 +241,12 @@ static struct hw_pgtable_fault *hw_pagetable_fault_alloc(void) INIT_LIST_HEAD(&fault->response); mutex_init(&fault->mutex);
- rc = hw_pagetable_get_fault_fd(fault);
- if (rc) {
kfree(fault);
return ERR_PTR(rc);
- }
- return fault;
} @@ -58,6 +255,8 @@ static void hw_pagetable_fault_free(struct hw_pgtable_fault *fault) WARN_ON(!list_empty(&fault->deliver)); WARN_ON(!list_empty(&fault->response));
- fput(fault->fault_file);
- put_unused_fd(fault->fault_fd);
I have been running your code and have run into some invalid memory in this line. When `put_unused_fd` is called the files of the current task is accessed with `current->files`. In my case this is 0x0.
The reason for it being 0x0 is that `do_exit` calls `exit_files` where the task files get set to NULL; this call is made in `do_exit` before we execute `exit_task_work`.
'exit_task_work` is the call that eventually arrives here to `hw_pagetable_fault_free`.
The way I have arrived to this state is the following: 1. Version of linux kernel that I'm using : commit 357b5abcba0477f7f1391dd0fa3a919a6f06bdf0 (HEAD, lubaolu/iommufd-io-pgfault-delivery-v2) 2. Version of qemu that Im using : commit 577ef478780597d3f449feb01e853b93fa5c5530 (HEAD, yiliu/zhenzhong/wip/iommufd_nesting_rfcv1) 3. This error happens when my user space app is exiting. (hence the call to `do_exit` 4. I call the IOMMU_HWPT_ALLOC ioctl with .flags = IOMMU_HWPT_ALLOC_IOPF_CAPABLE and .hwpt_type = IOMMU_HWPT_TYPE_DEFAULT .pt_id = the default ioas id.
I have resolved this in a naive way by just not calling the put_unused_fd function.
Have you run into this? Is this a path that you were expecting? Also, please get back to me if you need more information about how I got to this place. I have provided what I think is enough info, but I might be missing something obvious.
Best
kfree(fault); } @@ -347,7 +546,9 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) struct mutex *mutex; int rc;
- if (cmd->flags & ~IOMMU_HWPT_ALLOC_NEST_PARENT || cmd->__reserved)
- if ((cmd->flags & ~(IOMMU_HWPT_ALLOC_NEST_PARENT |
IOMMU_HWPT_ALLOC_IOPF_CAPABLE)) ||
return -EOPNOTSUPP; if (!cmd->data_len && cmd->hwpt_type != IOMMU_HWPT_TYPE_DEFAULT) return -EINVAL;cmd->__reserved)
@@ -416,6 +617,7 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) hwpt->fault->hwpt = hwpt; hwpt->domain->iopf_handler = iommufd_hw_pagetable_iopf_handler; hwpt->domain->fault_data = hwpt;
}cmd->out_fault_fd = hwpt->fault->fault_fd;
cmd->out_hwpt_id = hwpt->obj.id; -- 2.34.1