On 28/8/24 02:59, Nicolin Chen wrote:
Introduce a pair of new ioctls to set/unset a per-viommu virtual device id that should be linked to a physical device id via an idev pointer.
Continue the support IOMMU_VIOMMU_TYPE_DEFAULT for a core-managed viommu. Provide a lookup function for drivers to load device pointer by a virtual device id.
Add a rw_semaphore protection around the vdev_id list. Any future ioctl handlers that potentially access the list must grab the lock too.
Signed-off-by: Nicolin Chen nicolinc@nvidia.com
drivers/iommu/iommufd/device.c | 12 +++ drivers/iommu/iommufd/iommufd_private.h | 21 ++++ drivers/iommu/iommufd/main.c | 6 ++ drivers/iommu/iommufd/viommu.c | 121 ++++++++++++++++++++++++ include/uapi/linux/iommufd.h | 40 ++++++++ 5 files changed, 200 insertions(+)
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 5fd3dd420290..3ad759971b32 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -136,6 +136,18 @@ void iommufd_device_destroy(struct iommufd_object *obj) struct iommufd_device *idev = container_of(obj, struct iommufd_device, obj);
- /* Unlocked since there should be no race in a destroy() */
- if (idev->vdev_id) {
struct iommufd_vdev_id *vdev_id = idev->vdev_id;
struct iommufd_viommu *viommu = vdev_id->viommu;
struct iommufd_vdev_id *old;
old = xa_cmpxchg(&viommu->vdev_ids, vdev_id->id, vdev_id, NULL,
GFP_KERNEL);
WARN_ON(old != vdev_id);
kfree(vdev_id);
idev->vdev_id = NULL;
- } iommu_device_release_dma_owner(idev->dev); iommufd_put_group(idev->igroup); if (!iommufd_selftest_is_mock_dev(idev->dev))
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 1f2a1c133b9a..2c6e168c5300 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -416,6 +416,7 @@ struct iommufd_device { struct iommufd_object obj; struct iommufd_ctx *ictx; struct iommufd_group *igroup;
- struct iommufd_vdev_id *vdev_id; struct list_head group_item; /* always the physical device */ struct device *dev;
@@ -533,11 +534,31 @@ struct iommufd_viommu { struct iommufd_ctx *ictx; struct iommufd_hwpt_paging *hwpt;
- /* The locking order is vdev_ids_rwsem -> igroup::lock */
- struct rw_semaphore vdev_ids_rwsem;
- struct xarray vdev_ids;
- unsigned int type; };
+struct iommufd_vdev_id {
- struct iommufd_viommu *viommu;
- struct iommufd_device *idev;
- u64 id;
+};
+static inline struct iommufd_viommu * +iommufd_get_viommu(struct iommufd_ucmd *ucmd, u32 id) +{
- return container_of(iommufd_get_object(ucmd->ictx, id,
IOMMUFD_OBJ_VIOMMU),
struct iommufd_viommu, obj);
+}
- int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd); void iommufd_viommu_destroy(struct iommufd_object *obj);
+int iommufd_viommu_set_vdev_id(struct iommufd_ucmd *ucmd); +int iommufd_viommu_unset_vdev_id(struct iommufd_ucmd *ucmd); #ifdef CONFIG_IOMMUFD_TEST int iommufd_test(struct iommufd_ucmd *ucmd); diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 288ee51b6829..199ad90fa36b 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -334,6 +334,8 @@ union ucmd_buffer { struct iommu_option option; struct iommu_vfio_ioas vfio_ioas; struct iommu_viommu_alloc viommu;
- struct iommu_viommu_set_vdev_id set_vdev_id;
- struct iommu_viommu_unset_vdev_id unset_vdev_id; #ifdef CONFIG_IOMMUFD_TEST struct iommu_test_cmd test; #endif
@@ -387,6 +389,10 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { __reserved), IOCTL_OP(IOMMU_VIOMMU_ALLOC, iommufd_viommu_alloc_ioctl, struct iommu_viommu_alloc, out_viommu_id),
- IOCTL_OP(IOMMU_VIOMMU_SET_VDEV_ID, iommufd_viommu_set_vdev_id,
struct iommu_viommu_set_vdev_id, vdev_id),
- IOCTL_OP(IOMMU_VIOMMU_UNSET_VDEV_ID, iommufd_viommu_unset_vdev_id,
#ifdef CONFIG_IOMMUFD_TEST IOCTL_OP(IOMMU_TEST_CMD, iommufd_test, struct iommu_test_cmd, last), #endifstruct iommu_viommu_unset_vdev_id, vdev_id),
diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c index 200653a4bf57..8ffcd72b16b8 100644 --- a/drivers/iommu/iommufd/viommu.c +++ b/drivers/iommu/iommufd/viommu.c @@ -8,6 +8,15 @@ void iommufd_viommu_destroy(struct iommufd_object *obj) { struct iommufd_viommu *viommu = container_of(obj, struct iommufd_viommu, obj);
- struct iommufd_vdev_id *vdev_id;
- unsigned long index;
- xa_for_each(&viommu->vdev_ids, index, vdev_id) {
/* Unlocked since there should be no race in a destroy() */
vdev_id->idev->vdev_id = NULL;
kfree(vdev_id);
- }
- xa_destroy(&viommu->vdev_ids);
refcount_dec(&viommu->hwpt->common.obj.users); } @@ -53,6 +62,9 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd) viommu->ictx = ucmd->ictx; viommu->hwpt = hwpt_paging;
- xa_init(&viommu->vdev_ids);
- init_rwsem(&viommu->vdev_ids_rwsem);
- refcount_inc(&viommu->hwpt->common.obj.users);
cmd->out_viommu_id = viommu->obj.id; @@ -70,3 +82,112 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd) iommufd_put_object(ucmd->ictx, &idev->obj); return rc; }
+int iommufd_viommu_set_vdev_id(struct iommufd_ucmd *ucmd) +{
- struct iommu_viommu_set_vdev_id *cmd = ucmd->cmd;
- struct iommufd_vdev_id *vdev_id, *curr;
- struct iommufd_viommu *viommu;
- struct iommufd_device *idev;
- int rc = 0;
- if (cmd->vdev_id > ULONG_MAX)
return -EINVAL;
- viommu = iommufd_get_viommu(ucmd, cmd->viommu_id);
- if (IS_ERR(viommu))
return PTR_ERR(viommu);
- idev = iommufd_get_device(ucmd, cmd->dev_id);
- if (IS_ERR(idev)) {
rc = PTR_ERR(idev);
goto out_put_viommu;
- }
- down_write(&viommu->vdev_ids_rwsem);
- mutex_lock(&idev->igroup->lock);
- if (idev->vdev_id) {
rc = -EEXIST;
goto out_unlock_igroup;
- }
- vdev_id = kzalloc(sizeof(*vdev_id), GFP_KERNEL);
- if (!vdev_id) {
rc = -ENOMEM;
goto out_unlock_igroup;
- }
- vdev_id->idev = idev;
- vdev_id->viommu = viommu;
- vdev_id->id = cmd->vdev_id;
- curr = xa_cmpxchg(&viommu->vdev_ids, cmd->vdev_id, NULL, vdev_id,
GFP_KERNEL);
- if (curr) {
rc = xa_err(curr) ? : -EBUSY;
goto out_free;
- }
- idev->vdev_id = vdev_id;
- goto out_unlock_igroup;
+out_free:
- kfree(vdev_id);
+out_unlock_igroup:
- mutex_unlock(&idev->igroup->lock);
- up_write(&viommu->vdev_ids_rwsem);
- iommufd_put_object(ucmd->ictx, &idev->obj);
+out_put_viommu:
- iommufd_put_object(ucmd->ictx, &viommu->obj);
- return rc;
+}
+int iommufd_viommu_unset_vdev_id(struct iommufd_ucmd *ucmd) +{
- struct iommu_viommu_unset_vdev_id *cmd = ucmd->cmd;
- struct iommufd_viommu *viommu;
- struct iommufd_vdev_id *old;
- struct iommufd_device *idev;
- int rc = 0;
- if (cmd->vdev_id > ULONG_MAX)
return -EINVAL;
- viommu = iommufd_get_viommu(ucmd, cmd->viommu_id);
- if (IS_ERR(viommu))
return PTR_ERR(viommu);
- idev = iommufd_get_device(ucmd, cmd->dev_id);
- if (IS_ERR(idev)) {
rc = PTR_ERR(idev);
goto out_put_viommu;
- }
- down_write(&viommu->vdev_ids_rwsem);
- mutex_lock(&idev->igroup->lock);
- if (!idev->vdev_id) {
rc = -ENOENT;
goto out_unlock_igroup;
- }
- if (idev->vdev_id->id != cmd->vdev_id) {
rc = -EINVAL;
goto out_unlock_igroup;
- }
- old = xa_cmpxchg(&viommu->vdev_ids, idev->vdev_id->id,
idev->vdev_id, NULL, GFP_KERNEL);
- if (xa_is_err(old)) {
rc = xa_err(old);
goto out_unlock_igroup;
- }
- kfree(old);
- idev->vdev_id = NULL;
+out_unlock_igroup:
- mutex_unlock(&idev->igroup->lock);
- up_write(&viommu->vdev_ids_rwsem);
- iommufd_put_object(ucmd->ictx, &idev->obj);
+out_put_viommu:
- iommufd_put_object(ucmd->ictx, &viommu->obj);
- return rc;
+} diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 51ce6a019c34..1816e89c922d 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -52,6 +52,8 @@ enum { IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d, IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e, IOMMUFD_CMD_VIOMMU_ALLOC = 0x8f,
- IOMMUFD_CMD_VIOMMU_SET_VDEV_ID = 0x90,
- IOMMUFD_CMD_VIOMMU_UNSET_VDEV_ID = 0x91, };
/** @@ -882,4 +884,42 @@ struct iommu_viommu_alloc { __u32 out_viommu_id; }; #define IOMMU_VIOMMU_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_ALLOC)
+/**
- struct iommu_viommu_set_vdev_id - ioctl(IOMMU_VIOMMU_SET_VDEV_ID)
- @size: sizeof(struct iommu_viommu_set_vdev_id)
- @viommu_id: viommu ID to associate with the device to store its virtual ID
- @dev_id: device ID to set its virtual ID
- @__reserved: Must be 0
- @vdev_id: Virtual device ID
- Set a viommu-specific virtual ID of a device
- */
+struct iommu_viommu_set_vdev_id {
- __u32 size;
- __u32 viommu_id;
- __u32 dev_id;
Is this ID from vfio_device_bind_iommufd.out_devid?
- __u32 __reserved;
- __aligned_u64 vdev_id;
What is the nature of this id? It is not the guest's BDFn, is it? The code suggests it is ARM's "SID" == "stream ID" and "a device might be able to generate multiple StreamIDs" (how, why?) 🤯 And these streams seem to have nothing to do with PCIe IDE streams, right?
For my SEV-TIO exercise ("trusted IO"), I am looking for a kernel interface to pass the guest's BDFs for a specific host device (which is passed through) and nothing in the kernel has any knowledge of it atm, is this the right place, or another ioctl() is needed here?
Sorry, I am too ignorant about ARM :)
+}; +#define IOMMU_VIOMMU_SET_VDEV_ID _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_SET_VDEV_ID)
+/**
- struct iommu_viommu_unset_vdev_id - ioctl(IOMMU_VIOMMU_UNSET_VDEV_ID)
- @size: sizeof(struct iommu_viommu_unset_vdev_id)
- @viommu_id: viommu ID associated with the device to delete its virtual ID
- @dev_id: device ID to unset its virtual ID
- @__reserved: Must be 0
- @vdev_id: Virtual device ID (for verification)
- Unset a viommu-specific virtual ID of a device
- */
+struct iommu_viommu_unset_vdev_id {
- __u32 size;
- __u32 viommu_id;
- __u32 dev_id;
- __u32 __reserved;
- __aligned_u64 vdev_id;
+}; +#define IOMMU_VIOMMU_UNSET_VDEV_ID _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_UNSET_VDEV_ID) #endif
Nit: "git format-patch -O orderfile" makes patches nicer by putting the documentation first (.h before .c, in this case) with the "ordefile" looking like this:
=== *.txt configure *Makefile* *.json *.h *.c ===
Thanks,