On Wed, 27 May 2026 03:23:12 -0700 Matt Evans mattev@meta.com wrote:
A new VFIO feature, VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR, is added to set (and get) CPU-facing memory type attributes for a DMABUF exported from vfio-pci. These are used for subsequent mmap()s of the buffer.
There are two attributes supported:
- The default, VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR_UC
- VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR_WC, which results in WC PTEs for the DMABUF's BAR region.
Signed-off-by: Matt Evans mattev@meta.com
drivers/vfio/pci/vfio_pci_core.c | 2 + drivers/vfio/pci/vfio_pci_dmabuf.c | 70 +++++++++++++++++++++++++++++- drivers/vfio/pci/vfio_pci_priv.h | 12 +++++ include/uapi/linux/vfio.h | 27 ++++++++++++ 4 files changed, 110 insertions(+), 1 deletion(-)
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 5184b3cac160..e256a925e7ce 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -1590,6 +1590,8 @@ int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32 flags, return vfio_pci_core_feature_token(vdev, flags, arg, argsz); case VFIO_DEVICE_FEATURE_DMA_BUF: return vfio_pci_core_feature_dma_buf(vdev, flags, arg, argsz);
- case VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR:
default: return -ENOTTY; }return vfio_pci_core_feature_dma_buf_memattr(vdev, flags, arg, argsz);diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c index 3fa14760898f..db8b95ddbe18 100644 --- a/drivers/vfio/pci/vfio_pci_dmabuf.c +++ b/drivers/vfio/pci/vfio_pci_dmabuf.c @@ -42,7 +42,10 @@ static int vfio_pci_dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct * * contained within the DMABUF size before calling this. */
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- if (READ_ONCE(priv->memattr) == VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR_WC)
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);- else
vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);/* See comments in vfio_pci_core_mmap() re VM_ALLOW_ANY_UNCACHED. */ @@ -464,6 +467,7 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags, priv->vdev = vdev; priv->nr_ranges = get_dma_buf.nr_ranges; priv->size = length;
- priv->memattr = VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR_NC; ret = vdev->pci_ops->get_dmabuf_phys(vdev, &priv->provider, get_dma_buf.region_index, priv->phys_vec, dma_ranges,
@@ -731,4 +735,68 @@ int vfio_pci_dma_buf_revoke(struct vfio_pci_core_device *vdev, int dmabuf_fd) return ret; }
+int vfio_pci_core_feature_dma_buf_memattr(
- struct vfio_pci_core_device *vdev, u32 flags,
- struct vfio_device_feature_dma_buf_memattr __user *arg,
- size_t argsz)
+{
- struct vfio_device_feature_dma_buf_memattr db_attr;
- struct vfio_pci_dma_buf *priv;
- struct dma_buf *dmabuf;
- int ret;
- if (!vdev->pci_ops || !vdev->pci_ops->get_dmabuf_phys)
return -EOPNOTSUPP;- ret = vfio_check_feature(flags, argsz,
VFIO_DEVICE_FEATURE_GET |VFIO_DEVICE_FEATURE_SET,sizeof(db_attr));
I don't see why this needs to support GET. Are we solving a userspace problem that doesn't exist?
- if (ret != 1)
return ret;- if (copy_from_user(&db_attr, arg, sizeof(db_attr)))
return -EFAULT;- dmabuf = dma_buf_get(db_attr.dmabuf_fd);
- if (IS_ERR(dmabuf))
return PTR_ERR(dmabuf);- /* Verify DMABUF: see comments in vfio_pci_dma_buf_revoke() */
- priv = dmabuf->priv;
- if (dmabuf->ops != &vfio_pci_dmabuf_ops || priv->vdev != vdev) {
ret = -ENODEV;goto out_put_buf;- }
- ret = 0;
- scoped_guard(rwsem_write, &vdev->memory_lock) {
Why? This doesn't serialize against mmap. Just use a WRITE_ONCE() to match the READ_ONCE() on mmap?
uint32_t old_attr = priv->memattr;if (flags & VFIO_DEVICE_FEATURE_SET) {switch(db_attr.memattr) {case VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR_NC:case VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR_WC:priv->memattr = db_attr.memattr;break;default:ret = -ENOTSUPP;
-EINVAL
}}db_attr.memattr = old_attr;- }
- if (!ret && (flags & VFIO_DEVICE_FEATURE_GET)) {
if (copy_to_user(arg, &db_attr, sizeof(db_attr)))ret = -EFAULT;- }
- out_put_buf:
- dma_buf_put(dmabuf);
- return ret;
+} #endif /* CONFIG_VFIO_PCI_DMABUF */ diff --git a/drivers/vfio/pci/vfio_pci_priv.h b/drivers/vfio/pci/vfio_pci_priv.h index a1e0f4fcb1dc..8067be45beb0 100644 --- a/drivers/vfio/pci/vfio_pci_priv.h +++ b/drivers/vfio/pci/vfio_pci_priv.h @@ -41,6 +41,7 @@ struct vfio_pci_dma_buf { struct kref kref; struct completion comp; unsigned long vma_pgoff_adjust;
- u32 memattr; enum vfio_pci_dma_buf_status status;
}; @@ -154,6 +155,10 @@ void vfio_pci_dma_buf_move(struct vfio_pci_core_device *vdev, bool revoked); int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags, struct vfio_device_feature_dma_buf __user *arg, size_t argsz); +int vfio_pci_core_feature_dma_buf_memattr(
- struct vfio_pci_core_device *vdev, u32 flags,
- struct vfio_device_feature_dma_buf_memattr __user *arg,
- size_t argsz);
int vfio_pci_dma_buf_revoke(struct vfio_pci_core_device *vdev, int dmabuf_fd); #else static inline int @@ -163,6 +168,13 @@ vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags, { return -ENOTTY; } +static inline int vfio_pci_core_feature_dma_buf_memattr(
- struct vfio_pci_core_device *vdev, u32 flags,
- struct vfio_device_feature_dma_buf_memattr __user *arg,
- size_t argsz)
+{
- return -ENODEV;
-ENOTTY
Thanks, Alex
+} static inline int vfio_pci_dma_buf_revoke(struct vfio_pci_core_device *vdev, int dmabuf_fd) { diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 02366e9f8e16..9b0b68f8a1ef 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -1564,6 +1564,33 @@ struct vfio_device_feature_dma_buf { */ #define VFIO_DEVICE_FEATURE_MIG_PRECOPY_INFOv2 12 +/**
- Given a dma_buf fd previously created by
- VFIO_DEVICE_FEATURE_DMA_BUF, GET or SET the memory attribute that
- will be used by future mmap()s of that fd. SETting a new attribute
- does not affect existing VMAs.
- The default, if no previous SET has been performed, is NC.
- Return: 0 on success, -1 and errno is set on failure:
- ENOTSUPP: The given memattr is not supported.
- EBADF, EINVAL: dmabuf_fd is not a DMABUF fd.
- ENODEV: The dmabuf_fd does not match this VFIO device.
- */
+#define VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR 13
+/* Valid memory attributes for the memattr field */ +enum vfio_device_dma_buf_memattr {
- VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR_NC = 0, /* pgprot_noncached */
- VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR_WC = 1, /* pgprot_writecombine */
+};
+struct vfio_device_feature_dma_buf_memattr {
- __s32 dmabuf_fd;
- __u32 memattr;
+};
/* -------- API for Type1 VFIO IOMMU -------- */ /**