From: Leon Romanovsky leonro@nvidia.com
dma-buf invalidation is handled asynchronously by the hardware, so VFIO must wait until all affected objects have been fully invalidated.
In addition, the dma-buf exporter is expecting that all importers unmap any buffers they previously mapped.
Fixes: 5d74781ebc86 ("vfio/pci: Add dma-buf export support for MMIO regions") Signed-off-by: Leon Romanovsky leonro@nvidia.com --- drivers/vfio/pci/vfio_pci_dmabuf.c | 71 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 68 insertions(+), 3 deletions(-)
diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c index d8ceafabef48..485515629fe4 100644 --- a/drivers/vfio/pci/vfio_pci_dmabuf.c +++ b/drivers/vfio/pci/vfio_pci_dmabuf.c @@ -17,6 +17,8 @@ struct vfio_pci_dma_buf { struct dma_buf_phys_vec *phys_vec; struct p2pdma_provider *provider; u32 nr_ranges; + struct kref kref; + struct completion comp; u8 revoked : 1; };
@@ -44,27 +46,46 @@ static int vfio_pci_dma_buf_attach(struct dma_buf *dmabuf, return 0; }
+static void vfio_pci_dma_buf_done(struct kref *kref) +{ + struct vfio_pci_dma_buf *priv = + container_of(kref, struct vfio_pci_dma_buf, kref); + + complete(&priv->comp); +} + static struct sg_table * vfio_pci_dma_buf_map(struct dma_buf_attachment *attachment, enum dma_data_direction dir) { struct vfio_pci_dma_buf *priv = attachment->dmabuf->priv; + struct sg_table *ret;
dma_resv_assert_held(priv->dmabuf->resv);
if (priv->revoked) return ERR_PTR(-ENODEV);
- return dma_buf_phys_vec_to_sgt(attachment, priv->provider, - priv->phys_vec, priv->nr_ranges, - priv->size, dir); + ret = dma_buf_phys_vec_to_sgt(attachment, priv->provider, + priv->phys_vec, priv->nr_ranges, + priv->size, dir); + if (IS_ERR(ret)) + return ret; + + kref_get(&priv->kref); + return ret; }
static void vfio_pci_dma_buf_unmap(struct dma_buf_attachment *attachment, struct sg_table *sgt, enum dma_data_direction dir) { + struct vfio_pci_dma_buf *priv = attachment->dmabuf->priv; + + dma_resv_assert_held(priv->dmabuf->resv); + dma_buf_free_sgt(attachment, sgt, dir); + kref_put(&priv->kref, vfio_pci_dma_buf_done); }
static void vfio_pci_dma_buf_release(struct dma_buf *dmabuf) @@ -287,6 +308,9 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags, goto err_dev_put; }
+ kref_init(&priv->kref); + init_completion(&priv->comp); + /* dma_buf_put() now frees priv */ INIT_LIST_HEAD(&priv->dmabufs_elm); down_write(&vdev->memory_lock); @@ -326,6 +350,8 @@ void vfio_pci_dma_buf_move(struct vfio_pci_core_device *vdev, bool revoked) lockdep_assert_held_write(&vdev->memory_lock);
list_for_each_entry_safe(priv, tmp, &vdev->dmabufs, dmabufs_elm) { + unsigned long wait; + if (!get_file_active(&priv->dmabuf->file)) continue;
@@ -333,7 +359,37 @@ void vfio_pci_dma_buf_move(struct vfio_pci_core_device *vdev, bool revoked) dma_resv_lock(priv->dmabuf->resv, NULL); priv->revoked = revoked; dma_buf_invalidate_mappings(priv->dmabuf); + dma_resv_wait_timeout(priv->dmabuf->resv, + DMA_RESV_USAGE_BOOKKEEP, false, + MAX_SCHEDULE_TIMEOUT); dma_resv_unlock(priv->dmabuf->resv); + if (revoked) { + kref_put(&priv->kref, vfio_pci_dma_buf_done); + /* Let's wait till all DMA unmap are completed. */ + wait = wait_for_completion_timeout( + &priv->comp, secs_to_jiffies(1)); + /* + * If you see this WARN_ON, it means that + * importer didn't call unmap in response to + * dma_buf_invalidate_mappings() which is not + * allowed. + */ + WARN(!wait, + "Timed out waiting for DMABUF unmap, importer has a broken invalidate_mapping()"); + } else { + /* + * Kref is initialize again, because when revoke + * was performed the reference counter was decreased + * to zero to trigger completion. + */ + kref_init(&priv->kref); + /* + * There is no need to wait as no mapping was + * performed when the previous status was + * priv->revoked == true. + */ + reinit_completion(&priv->comp); + } } fput(priv->dmabuf->file); } @@ -346,6 +402,8 @@ void vfio_pci_dma_buf_cleanup(struct vfio_pci_core_device *vdev)
down_write(&vdev->memory_lock); list_for_each_entry_safe(priv, tmp, &vdev->dmabufs, dmabufs_elm) { + unsigned long wait; + if (!get_file_active(&priv->dmabuf->file)) continue;
@@ -354,7 +412,14 @@ void vfio_pci_dma_buf_cleanup(struct vfio_pci_core_device *vdev) priv->vdev = NULL; priv->revoked = true; dma_buf_invalidate_mappings(priv->dmabuf); + dma_resv_wait_timeout(priv->dmabuf->resv, + DMA_RESV_USAGE_BOOKKEEP, false, + MAX_SCHEDULE_TIMEOUT); dma_resv_unlock(priv->dmabuf->resv); + kref_put(&priv->kref, vfio_pci_dma_buf_done); + wait = wait_for_completion_timeout(&priv->comp, + secs_to_jiffies(1)); + WARN_ON(!wait); vfio_device_put_registration(&vdev->vdev); fput(priv->dmabuf->file); }