From: Rob Clark robdclark@chromium.org
See 1/3 for motivation.
Rob Clark (3): dma-buf: Add ioctl to query mmap info drm/prime: Wire up mmap_info support drm/msm/prime: Add mmap_info support
drivers/dma-buf/dma-buf.c | 26 ++++++++++++++++++++++++++ drivers/gpu/drm/drm_prime.c | 12 ++++++++++++ drivers/gpu/drm/msm/msm_drv.c | 1 + drivers/gpu/drm/msm/msm_drv.h | 1 + drivers/gpu/drm/msm/msm_gem_prime.c | 11 +++++++++++ include/drm/drm_drv.h | 7 +++++++ include/linux/dma-buf.h | 7 +++++++ include/uapi/linux/dma-buf.h | 28 ++++++++++++++++++++++++++++ 8 files changed, 93 insertions(+)
From: Rob Clark robdclark@chromium.org
This is a fairly narrowly focused interface, providing a way for a VMM in userspace to tell the guest kernel what pgprot settings to use when mapping a buffer to guest userspace.
For buffers that get mapped into guest userspace, virglrenderer returns a dma-buf fd to the VMM (crosvm or qemu). In addition to mapping the pages into the guest VM, it needs to report to drm/virtio in the guest the cache settings to use for guest userspace. In particular, on some architectures, creating aliased mappings with different cache attributes is frowned upon, so it is important that the guest mappings have the same cache attributes as any potential host mappings.
Signed-off-by: Rob Clark robdclark@chromium.org --- drivers/dma-buf/dma-buf.c | 26 ++++++++++++++++++++++++++ include/linux/dma-buf.h | 7 +++++++ include/uapi/linux/dma-buf.h | 28 ++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 32f55640890c..d02d6c2a3b49 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -326,6 +326,29 @@ static long dma_buf_set_name(struct dma_buf *dmabuf, const char __user *buf) return 0; }
+static long dma_buf_info(struct dma_buf *dmabuf, const void __user *uarg) +{ + struct dma_buf_info arg; + + if (copy_from_user(&arg, uarg, sizeof(arg))) + return -EFAULT; + + switch (arg.param) { + case DMA_BUF_INFO_VM_PROT: + if (!dmabuf->ops->mmap_info) + return -ENOSYS; + arg.value = dmabuf->ops->mmap_info(dmabuf); + break; + default: + return -EINVAL; + } + + if (copy_to_user(uarg, &arg, sizeof(arg))) + return -EFAULT; + + return 0; +} + static long dma_buf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -369,6 +392,9 @@ static long dma_buf_ioctl(struct file *file, case DMA_BUF_SET_NAME_B: return dma_buf_set_name(dmabuf, (const char __user *)arg);
+ case DMA_BUF_IOCTL_INFO: + return dma_buf_info(dmabuf, (const void __user *)arg); + default: return -ENOTTY; } diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 71731796c8c3..6f4de64a5937 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -283,6 +283,13 @@ struct dma_buf_ops { */ int (*mmap)(struct dma_buf *, struct vm_area_struct *vma);
+ /** + * @mmap_info: + * + * Return mmapping info for the buffer. See DMA_BUF_INFO_VM_PROT. + */ + int (*mmap_info)(struct dma_buf *); + int (*vmap)(struct dma_buf *dmabuf, struct iosys_map *map); void (*vunmap)(struct dma_buf *dmabuf, struct iosys_map *map); }; diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h index b1523cb8ab30..a41adac0f46a 100644 --- a/include/uapi/linux/dma-buf.h +++ b/include/uapi/linux/dma-buf.h @@ -85,6 +85,32 @@ struct dma_buf_sync {
#define DMA_BUF_NAME_LEN 32
+ +/** + * struct dma_buf_info - Query info about the buffer. + */ +struct dma_buf_info { + +#define DMA_BUF_INFO_VM_PROT 1 +# define DMA_BUF_VM_PROT_WC 0 +# define DMA_BUF_VM_PROT_CACHED 1 + + /** + * @param: Which param to query + * + * DMA_BUF_INFO_BM_PROT: + * Query the access permissions of userspace mmap's of this buffer. + * Returns one of DMA_BUF_VM_PROT_x + */ + __u32 param; + __u32 pad; + + /** + * @value: Return value of the query. + */ + __u64 value; +}; + #define DMA_BUF_BASE 'b' #define DMA_BUF_IOCTL_SYNC _IOW(DMA_BUF_BASE, 0, struct dma_buf_sync)
@@ -95,4 +121,6 @@ struct dma_buf_sync { #define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, __u32) #define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, __u64)
+#define DMA_BUF_IOCTL_INFO _IOWR(DMA_BUF_BASE, 2, struct dma_buf_info) + #endif
Am 29.07.22 um 19:07 schrieb Rob Clark:
From: Rob Clark robdclark@chromium.org
This is a fairly narrowly focused interface, providing a way for a VMM in userspace to tell the guest kernel what pgprot settings to use when mapping a buffer to guest userspace.
For buffers that get mapped into guest userspace, virglrenderer returns a dma-buf fd to the VMM (crosvm or qemu).
Wow, wait a second. Who is giving whom the DMA-buf fd here?
My last status was that this design was illegal and couldn't be implemented because it requires internal knowledge only the exporting driver can have.
@Daniel has anything changed on that is or my status still valid?
Regards, Christian.
In addition to mapping the pages into the guest VM, it needs to report to drm/virtio in the guest the cache settings to use for guest userspace. In particular, on some architectures, creating aliased mappings with different cache attributes is frowned upon, so it is important that the guest mappings have the same cache attributes as any potential host mappings.
Signed-off-by: Rob Clark robdclark@chromium.org
drivers/dma-buf/dma-buf.c | 26 ++++++++++++++++++++++++++ include/linux/dma-buf.h | 7 +++++++ include/uapi/linux/dma-buf.h | 28 ++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 32f55640890c..d02d6c2a3b49 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -326,6 +326,29 @@ static long dma_buf_set_name(struct dma_buf *dmabuf, const char __user *buf) return 0; } +static long dma_buf_info(struct dma_buf *dmabuf, const void __user *uarg) +{
- struct dma_buf_info arg;
- if (copy_from_user(&arg, uarg, sizeof(arg)))
return -EFAULT;
- switch (arg.param) {
- case DMA_BUF_INFO_VM_PROT:
if (!dmabuf->ops->mmap_info)
return -ENOSYS;
arg.value = dmabuf->ops->mmap_info(dmabuf);
break;
- default:
return -EINVAL;
- }
- if (copy_to_user(uarg, &arg, sizeof(arg)))
return -EFAULT;
- return 0;
+}
- static long dma_buf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) {
@@ -369,6 +392,9 @@ static long dma_buf_ioctl(struct file *file, case DMA_BUF_SET_NAME_B: return dma_buf_set_name(dmabuf, (const char __user *)arg);
- case DMA_BUF_IOCTL_INFO:
return dma_buf_info(dmabuf, (const void __user *)arg);
- default: return -ENOTTY; }
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 71731796c8c3..6f4de64a5937 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -283,6 +283,13 @@ struct dma_buf_ops { */ int (*mmap)(struct dma_buf *, struct vm_area_struct *vma);
- /**
* @mmap_info:
*
* Return mmapping info for the buffer. See DMA_BUF_INFO_VM_PROT.
*/
- int (*mmap_info)(struct dma_buf *);
- int (*vmap)(struct dma_buf *dmabuf, struct iosys_map *map); void (*vunmap)(struct dma_buf *dmabuf, struct iosys_map *map); };
diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h index b1523cb8ab30..a41adac0f46a 100644 --- a/include/uapi/linux/dma-buf.h +++ b/include/uapi/linux/dma-buf.h @@ -85,6 +85,32 @@ struct dma_buf_sync { #define DMA_BUF_NAME_LEN 32
+/**
- struct dma_buf_info - Query info about the buffer.
- */
+struct dma_buf_info {
+#define DMA_BUF_INFO_VM_PROT 1 +# define DMA_BUF_VM_PROT_WC 0 +# define DMA_BUF_VM_PROT_CACHED 1
- /**
* @param: Which param to query
*
* DMA_BUF_INFO_BM_PROT:
* Query the access permissions of userspace mmap's of this buffer.
* Returns one of DMA_BUF_VM_PROT_x
*/
- __u32 param;
- __u32 pad;
- /**
* @value: Return value of the query.
*/
- __u64 value;
+};
- #define DMA_BUF_BASE 'b' #define DMA_BUF_IOCTL_SYNC _IOW(DMA_BUF_BASE, 0, struct dma_buf_sync)
@@ -95,4 +121,6 @@ struct dma_buf_sync { #define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, __u32) #define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, __u64) +#define DMA_BUF_IOCTL_INFO _IOWR(DMA_BUF_BASE, 2, struct dma_buf_info)
- #endif
On Sun, Aug 7, 2022 at 9:09 AM Christian König ckoenig.leichtzumerken@gmail.com wrote:
Am 29.07.22 um 19:07 schrieb Rob Clark:
From: Rob Clark robdclark@chromium.org
This is a fairly narrowly focused interface, providing a way for a VMM in userspace to tell the guest kernel what pgprot settings to use when mapping a buffer to guest userspace.
For buffers that get mapped into guest userspace, virglrenderer returns a dma-buf fd to the VMM (crosvm or qemu).
Wow, wait a second. Who is giving whom the DMA-buf fd here?
Not sure I understand the question.. the dma-buf fd could come from EGL_MESA_image_dma_buf_export, gbm, or similar.
My last status was that this design was illegal and couldn't be implemented because it requires internal knowledge only the exporting driver can have.
This ioctl provides that information from the exporting driver so that a VMM doesn't have to make assumptions ;-)
Currently crosvm assumes if (drivername == "i915") then it is a cached mapping, otherwise it is wc. I'm trying to find a way to fix this. Suggestions welcome, but because of how mapping to a guest VM works, a VMM is a somewhat special case where this information is needed in userspace.
BR, -R
@Daniel has anything changed on that is or my status still valid?
Regards, Christian.
In addition to mapping the pages into the guest VM, it needs to report to drm/virtio in the guest the cache settings to use for guest userspace. In particular, on some architectures, creating aliased mappings with different cache attributes is frowned upon, so it is important that the guest mappings have the same cache attributes as any potential host mappings.
Signed-off-by: Rob Clark robdclark@chromium.org
drivers/dma-buf/dma-buf.c | 26 ++++++++++++++++++++++++++ include/linux/dma-buf.h | 7 +++++++ include/uapi/linux/dma-buf.h | 28 ++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 32f55640890c..d02d6c2a3b49 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -326,6 +326,29 @@ static long dma_buf_set_name(struct dma_buf *dmabuf, const char __user *buf) return 0; }
+static long dma_buf_info(struct dma_buf *dmabuf, const void __user *uarg) +{
struct dma_buf_info arg;
if (copy_from_user(&arg, uarg, sizeof(arg)))
return -EFAULT;
switch (arg.param) {
case DMA_BUF_INFO_VM_PROT:
if (!dmabuf->ops->mmap_info)
return -ENOSYS;
arg.value = dmabuf->ops->mmap_info(dmabuf);
break;
default:
return -EINVAL;
}
if (copy_to_user(uarg, &arg, sizeof(arg)))
return -EFAULT;
return 0;
+}
- static long dma_buf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) {
@@ -369,6 +392,9 @@ static long dma_buf_ioctl(struct file *file, case DMA_BUF_SET_NAME_B: return dma_buf_set_name(dmabuf, (const char __user *)arg);
case DMA_BUF_IOCTL_INFO:
return dma_buf_info(dmabuf, (const void __user *)arg);
default: return -ENOTTY; }
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 71731796c8c3..6f4de64a5937 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -283,6 +283,13 @@ struct dma_buf_ops { */ int (*mmap)(struct dma_buf *, struct vm_area_struct *vma);
/**
* @mmap_info:
*
* Return mmapping info for the buffer. See DMA_BUF_INFO_VM_PROT.
*/
int (*mmap_info)(struct dma_buf *);
};int (*vmap)(struct dma_buf *dmabuf, struct iosys_map *map); void (*vunmap)(struct dma_buf *dmabuf, struct iosys_map *map);
diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h index b1523cb8ab30..a41adac0f46a 100644 --- a/include/uapi/linux/dma-buf.h +++ b/include/uapi/linux/dma-buf.h @@ -85,6 +85,32 @@ struct dma_buf_sync {
#define DMA_BUF_NAME_LEN 32
+/**
- struct dma_buf_info - Query info about the buffer.
- */
+struct dma_buf_info {
+#define DMA_BUF_INFO_VM_PROT 1 +# define DMA_BUF_VM_PROT_WC 0 +# define DMA_BUF_VM_PROT_CACHED 1
/**
* @param: Which param to query
*
* DMA_BUF_INFO_BM_PROT:
* Query the access permissions of userspace mmap's of this buffer.
* Returns one of DMA_BUF_VM_PROT_x
*/
__u32 param;
__u32 pad;
/**
* @value: Return value of the query.
*/
__u64 value;
+};
- #define DMA_BUF_BASE 'b' #define DMA_BUF_IOCTL_SYNC _IOW(DMA_BUF_BASE, 0, struct dma_buf_sync)
@@ -95,4 +121,6 @@ struct dma_buf_sync { #define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, __u32) #define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, __u64)
+#define DMA_BUF_IOCTL_INFO _IOWR(DMA_BUF_BASE, 2, struct dma_buf_info)
- #endif
Am 07.08.22 um 19:02 schrieb Rob Clark:
On Sun, Aug 7, 2022 at 9:09 AM Christian König ckoenig.leichtzumerken@gmail.com wrote:
Am 29.07.22 um 19:07 schrieb Rob Clark:
From: Rob Clark robdclark@chromium.org
This is a fairly narrowly focused interface, providing a way for a VMM in userspace to tell the guest kernel what pgprot settings to use when mapping a buffer to guest userspace.
For buffers that get mapped into guest userspace, virglrenderer returns a dma-buf fd to the VMM (crosvm or qemu).
Wow, wait a second. Who is giving whom the DMA-buf fd here?
Not sure I understand the question.. the dma-buf fd could come from EGL_MESA_image_dma_buf_export, gbm, or similar.
My last status was that this design was illegal and couldn't be implemented because it requires internal knowledge only the exporting driver can have.
This ioctl provides that information from the exporting driver so that a VMM doesn't have to make assumptions ;-)
And exactly that was NAKed the last time it came up. Only the exporting driver is allowed to mmap() the DMA-buf into the guest.
This way you also don't need to transport any caching information anywhere.
Currently crosvm assumes if (drivername == "i915") then it is a cached mapping, otherwise it is wc. I'm trying to find a way to fix this. Suggestions welcome, but because of how mapping to a guest VM works, a VMM is a somewhat special case where this information is needed in userspace.
Ok that leaves me completely puzzled. How does that work in the first place?
In other words how does the mapping into the guest page tables happen?
Regards, Christian.
BR, -R
@Daniel has anything changed on that is or my status still valid?
Regards, Christian.
In addition to mapping the
pages into the guest VM, it needs to report to drm/virtio in the guest the cache settings to use for guest userspace. In particular, on some architectures, creating aliased mappings with different cache attributes is frowned upon, so it is important that the guest mappings have the same cache attributes as any potential host mappings.
Signed-off-by: Rob Clark robdclark@chromium.org
drivers/dma-buf/dma-buf.c | 26 ++++++++++++++++++++++++++ include/linux/dma-buf.h | 7 +++++++ include/uapi/linux/dma-buf.h | 28 ++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 32f55640890c..d02d6c2a3b49 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -326,6 +326,29 @@ static long dma_buf_set_name(struct dma_buf *dmabuf, const char __user *buf) return 0; }
+static long dma_buf_info(struct dma_buf *dmabuf, const void __user *uarg) +{
struct dma_buf_info arg;
if (copy_from_user(&arg, uarg, sizeof(arg)))
return -EFAULT;
switch (arg.param) {
case DMA_BUF_INFO_VM_PROT:
if (!dmabuf->ops->mmap_info)
return -ENOSYS;
arg.value = dmabuf->ops->mmap_info(dmabuf);
break;
default:
return -EINVAL;
}
if (copy_to_user(uarg, &arg, sizeof(arg)))
return -EFAULT;
return 0;
+}
- static long dma_buf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) {
@@ -369,6 +392,9 @@ static long dma_buf_ioctl(struct file *file, case DMA_BUF_SET_NAME_B: return dma_buf_set_name(dmabuf, (const char __user *)arg);
case DMA_BUF_IOCTL_INFO:
return dma_buf_info(dmabuf, (const void __user *)arg);
default: return -ENOTTY; }
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 71731796c8c3..6f4de64a5937 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -283,6 +283,13 @@ struct dma_buf_ops { */ int (*mmap)(struct dma_buf *, struct vm_area_struct *vma);
/**
* @mmap_info:
*
* Return mmapping info for the buffer. See DMA_BUF_INFO_VM_PROT.
*/
int (*mmap_info)(struct dma_buf *);
};int (*vmap)(struct dma_buf *dmabuf, struct iosys_map *map); void (*vunmap)(struct dma_buf *dmabuf, struct iosys_map *map);
diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h index b1523cb8ab30..a41adac0f46a 100644 --- a/include/uapi/linux/dma-buf.h +++ b/include/uapi/linux/dma-buf.h @@ -85,6 +85,32 @@ struct dma_buf_sync {
#define DMA_BUF_NAME_LEN 32
+/**
- struct dma_buf_info - Query info about the buffer.
- */
+struct dma_buf_info {
+#define DMA_BUF_INFO_VM_PROT 1 +# define DMA_BUF_VM_PROT_WC 0 +# define DMA_BUF_VM_PROT_CACHED 1
/**
* @param: Which param to query
*
* DMA_BUF_INFO_BM_PROT:
* Query the access permissions of userspace mmap's of this buffer.
* Returns one of DMA_BUF_VM_PROT_x
*/
__u32 param;
__u32 pad;
/**
* @value: Return value of the query.
*/
__u64 value;
+};
- #define DMA_BUF_BASE 'b' #define DMA_BUF_IOCTL_SYNC _IOW(DMA_BUF_BASE, 0, struct dma_buf_sync)
@@ -95,4 +121,6 @@ struct dma_buf_sync { #define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, __u32) #define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, __u64)
+#define DMA_BUF_IOCTL_INFO _IOWR(DMA_BUF_BASE, 2, struct dma_buf_info)
- #endif
On Sun, Aug 7, 2022 at 10:14 AM Christian König christian.koenig@amd.com wrote:
Am 07.08.22 um 19:02 schrieb Rob Clark:
On Sun, Aug 7, 2022 at 9:09 AM Christian König ckoenig.leichtzumerken@gmail.com wrote:
Am 29.07.22 um 19:07 schrieb Rob Clark:
From: Rob Clark robdclark@chromium.org
This is a fairly narrowly focused interface, providing a way for a VMM in userspace to tell the guest kernel what pgprot settings to use when mapping a buffer to guest userspace.
For buffers that get mapped into guest userspace, virglrenderer returns a dma-buf fd to the VMM (crosvm or qemu).
Wow, wait a second. Who is giving whom the DMA-buf fd here?
Not sure I understand the question.. the dma-buf fd could come from EGL_MESA_image_dma_buf_export, gbm, or similar.
My last status was that this design was illegal and couldn't be implemented because it requires internal knowledge only the exporting driver can have.
This ioctl provides that information from the exporting driver so that a VMM doesn't have to make assumptions ;-)
And exactly that was NAKed the last time it came up. Only the exporting driver is allowed to mmap() the DMA-buf into the guest.
except the exporting driver is in the host ;-)
This way you also don't need to transport any caching information anywhere.
Currently crosvm assumes if (drivername == "i915") then it is a cached mapping, otherwise it is wc. I'm trying to find a way to fix this. Suggestions welcome, but because of how mapping to a guest VM works, a VMM is a somewhat special case where this information is needed in userspace.
Ok that leaves me completely puzzled. How does that work in the first place?
In other words how does the mapping into the guest page tables happen?
There are multiple levels to this, but in short mapping to guest userspace happens via drm/virtio (aka "virtio_gpu" or "virtgpu"), the cache attributes are set via "map_info" attribute returned from the host VMM (host userspace, hence the need for this ioctl).
In the host, the host kernel driver mmaps to host userspace (VMM). Here the exporting driver is performing the mmap with correct cache attributes. The VMM uses KVM to map these pages into the guest so they appear as physical pages to the guest kernel. The guest kernel (virtgpu) in turn maps them to guest userspace.
BR, -R
Regards, Christian.
BR, -R
@Daniel has anything changed on that is or my status still valid?
Regards, Christian.
In addition to mapping the
pages into the guest VM, it needs to report to drm/virtio in the guest the cache settings to use for guest userspace. In particular, on some architectures, creating aliased mappings with different cache attributes is frowned upon, so it is important that the guest mappings have the same cache attributes as any potential host mappings.
Signed-off-by: Rob Clark robdclark@chromium.org
drivers/dma-buf/dma-buf.c | 26 ++++++++++++++++++++++++++ include/linux/dma-buf.h | 7 +++++++ include/uapi/linux/dma-buf.h | 28 ++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 32f55640890c..d02d6c2a3b49 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -326,6 +326,29 @@ static long dma_buf_set_name(struct dma_buf *dmabuf, const char __user *buf) return 0; }
+static long dma_buf_info(struct dma_buf *dmabuf, const void __user *uarg) +{
struct dma_buf_info arg;
if (copy_from_user(&arg, uarg, sizeof(arg)))
return -EFAULT;
switch (arg.param) {
case DMA_BUF_INFO_VM_PROT:
if (!dmabuf->ops->mmap_info)
return -ENOSYS;
arg.value = dmabuf->ops->mmap_info(dmabuf);
break;
default:
return -EINVAL;
}
if (copy_to_user(uarg, &arg, sizeof(arg)))
return -EFAULT;
return 0;
+}
- static long dma_buf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) {
@@ -369,6 +392,9 @@ static long dma_buf_ioctl(struct file *file, case DMA_BUF_SET_NAME_B: return dma_buf_set_name(dmabuf, (const char __user *)arg);
case DMA_BUF_IOCTL_INFO:
return dma_buf_info(dmabuf, (const void __user *)arg);
default: return -ENOTTY; }
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 71731796c8c3..6f4de64a5937 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -283,6 +283,13 @@ struct dma_buf_ops { */ int (*mmap)(struct dma_buf *, struct vm_area_struct *vma);
/**
* @mmap_info:
*
* Return mmapping info for the buffer. See DMA_BUF_INFO_VM_PROT.
*/
int (*mmap_info)(struct dma_buf *);
};int (*vmap)(struct dma_buf *dmabuf, struct iosys_map *map); void (*vunmap)(struct dma_buf *dmabuf, struct iosys_map *map);
diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h index b1523cb8ab30..a41adac0f46a 100644 --- a/include/uapi/linux/dma-buf.h +++ b/include/uapi/linux/dma-buf.h @@ -85,6 +85,32 @@ struct dma_buf_sync {
#define DMA_BUF_NAME_LEN 32
+/**
- struct dma_buf_info - Query info about the buffer.
- */
+struct dma_buf_info {
+#define DMA_BUF_INFO_VM_PROT 1 +# define DMA_BUF_VM_PROT_WC 0 +# define DMA_BUF_VM_PROT_CACHED 1
/**
* @param: Which param to query
*
* DMA_BUF_INFO_BM_PROT:
* Query the access permissions of userspace mmap's of this buffer.
* Returns one of DMA_BUF_VM_PROT_x
*/
__u32 param;
__u32 pad;
/**
* @value: Return value of the query.
*/
__u64 value;
+};
- #define DMA_BUF_BASE 'b' #define DMA_BUF_IOCTL_SYNC _IOW(DMA_BUF_BASE, 0, struct dma_buf_sync)
@@ -95,4 +121,6 @@ struct dma_buf_sync { #define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, __u32) #define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, __u64)
+#define DMA_BUF_IOCTL_INFO _IOWR(DMA_BUF_BASE, 2, struct dma_buf_info)
- #endif
Am 07.08.22 um 19:35 schrieb Rob Clark:
On Sun, Aug 7, 2022 at 10:14 AM Christian König christian.koenig@amd.com wrote:
Am 07.08.22 um 19:02 schrieb Rob Clark:
On Sun, Aug 7, 2022 at 9:09 AM Christian König ckoenig.leichtzumerken@gmail.com wrote:
Am 29.07.22 um 19:07 schrieb Rob Clark:
From: Rob Clark robdclark@chromium.org
This is a fairly narrowly focused interface, providing a way for a VMM in userspace to tell the guest kernel what pgprot settings to use when mapping a buffer to guest userspace.
For buffers that get mapped into guest userspace, virglrenderer returns a dma-buf fd to the VMM (crosvm or qemu).
Wow, wait a second. Who is giving whom the DMA-buf fd here?
Not sure I understand the question.. the dma-buf fd could come from EGL_MESA_image_dma_buf_export, gbm, or similar.
My last status was that this design was illegal and couldn't be implemented because it requires internal knowledge only the exporting driver can have.
This ioctl provides that information from the exporting driver so that a VMM doesn't have to make assumptions ;-)
And exactly that was NAKed the last time it came up. Only the exporting driver is allowed to mmap() the DMA-buf into the guest.
except the exporting driver is in the host ;-)
This way you also don't need to transport any caching information anywhere.
Currently crosvm assumes if (drivername == "i915") then it is a cached mapping, otherwise it is wc. I'm trying to find a way to fix this. Suggestions welcome, but because of how mapping to a guest VM works, a VMM is a somewhat special case where this information is needed in userspace.
Ok that leaves me completely puzzled. How does that work in the first place?
In other words how does the mapping into the guest page tables happen?
There are multiple levels to this, but in short mapping to guest userspace happens via drm/virtio (aka "virtio_gpu" or "virtgpu"), the cache attributes are set via "map_info" attribute returned from the host VMM (host userspace, hence the need for this ioctl).
In the host, the host kernel driver mmaps to host userspace (VMM). Here the exporting driver is performing the mmap with correct cache attributes.
The VMM uses KVM to map these pages into the guest so
And exactly that was declared completely illegal the last time it came up on the mailing list.
Daniel implemented a whole bunch of patches into the DMA-buf layer to make it impossible for KVM to do this.
I have absolutely no idea why that is now a topic again and why anybody is still using this approach.
@Daniel can you clarify.
Thanks, Christian.
they appear as physical pages to the guest kernel. The guest kernel (virtgpu) in turn maps them to guest userspace.
BR, -R
Regards, Christian.
BR, -R
@Daniel has anything changed on that is or my status still valid?
Regards, Christian.
In addition to mapping the
pages into the guest VM, it needs to report to drm/virtio in the guest the cache settings to use for guest userspace. In particular, on some architectures, creating aliased mappings with different cache attributes is frowned upon, so it is important that the guest mappings have the same cache attributes as any potential host mappings.
Signed-off-by: Rob Clark robdclark@chromium.org
drivers/dma-buf/dma-buf.c | 26 ++++++++++++++++++++++++++ include/linux/dma-buf.h | 7 +++++++ include/uapi/linux/dma-buf.h | 28 ++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 32f55640890c..d02d6c2a3b49 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -326,6 +326,29 @@ static long dma_buf_set_name(struct dma_buf *dmabuf, const char __user *buf) return 0; }
+static long dma_buf_info(struct dma_buf *dmabuf, const void __user *uarg) +{
struct dma_buf_info arg;
if (copy_from_user(&arg, uarg, sizeof(arg)))
return -EFAULT;
switch (arg.param) {
case DMA_BUF_INFO_VM_PROT:
if (!dmabuf->ops->mmap_info)
return -ENOSYS;
arg.value = dmabuf->ops->mmap_info(dmabuf);
break;
default:
return -EINVAL;
}
if (copy_to_user(uarg, &arg, sizeof(arg)))
return -EFAULT;
return 0;
+}
- static long dma_buf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) {
@@ -369,6 +392,9 @@ static long dma_buf_ioctl(struct file *file, case DMA_BUF_SET_NAME_B: return dma_buf_set_name(dmabuf, (const char __user *)arg);
case DMA_BUF_IOCTL_INFO:
return dma_buf_info(dmabuf, (const void __user *)arg);
default: return -ENOTTY; }
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 71731796c8c3..6f4de64a5937 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -283,6 +283,13 @@ struct dma_buf_ops { */ int (*mmap)(struct dma_buf *, struct vm_area_struct *vma);
/**
* @mmap_info:
*
* Return mmapping info for the buffer. See DMA_BUF_INFO_VM_PROT.
*/
int (*mmap_info)(struct dma_buf *);
};int (*vmap)(struct dma_buf *dmabuf, struct iosys_map *map); void (*vunmap)(struct dma_buf *dmabuf, struct iosys_map *map);
diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h index b1523cb8ab30..a41adac0f46a 100644 --- a/include/uapi/linux/dma-buf.h +++ b/include/uapi/linux/dma-buf.h @@ -85,6 +85,32 @@ struct dma_buf_sync {
#define DMA_BUF_NAME_LEN 32
+/**
- struct dma_buf_info - Query info about the buffer.
- */
+struct dma_buf_info {
+#define DMA_BUF_INFO_VM_PROT 1 +# define DMA_BUF_VM_PROT_WC 0 +# define DMA_BUF_VM_PROT_CACHED 1
/**
* @param: Which param to query
*
* DMA_BUF_INFO_BM_PROT:
* Query the access permissions of userspace mmap's of this buffer.
* Returns one of DMA_BUF_VM_PROT_x
*/
__u32 param;
__u32 pad;
/**
* @value: Return value of the query.
*/
__u64 value;
+};
- #define DMA_BUF_BASE 'b' #define DMA_BUF_IOCTL_SYNC _IOW(DMA_BUF_BASE, 0, struct dma_buf_sync)
@@ -95,4 +121,6 @@ struct dma_buf_sync { #define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, __u32) #define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, __u64)
+#define DMA_BUF_IOCTL_INFO _IOWR(DMA_BUF_BASE, 2, struct dma_buf_info)
- #endif
On Sun, Aug 7, 2022 at 10:38 AM Christian König ckoenig.leichtzumerken@gmail.com wrote:
Am 07.08.22 um 19:35 schrieb Rob Clark:
On Sun, Aug 7, 2022 at 10:14 AM Christian König christian.koenig@amd.com wrote:
Am 07.08.22 um 19:02 schrieb Rob Clark:
On Sun, Aug 7, 2022 at 9:09 AM Christian König ckoenig.leichtzumerken@gmail.com wrote:
Am 29.07.22 um 19:07 schrieb Rob Clark:
From: Rob Clark robdclark@chromium.org
This is a fairly narrowly focused interface, providing a way for a VMM in userspace to tell the guest kernel what pgprot settings to use when mapping a buffer to guest userspace.
For buffers that get mapped into guest userspace, virglrenderer returns a dma-buf fd to the VMM (crosvm or qemu).
Wow, wait a second. Who is giving whom the DMA-buf fd here?
Not sure I understand the question.. the dma-buf fd could come from EGL_MESA_image_dma_buf_export, gbm, or similar.
My last status was that this design was illegal and couldn't be implemented because it requires internal knowledge only the exporting driver can have.
This ioctl provides that information from the exporting driver so that a VMM doesn't have to make assumptions ;-)
And exactly that was NAKed the last time it came up. Only the exporting driver is allowed to mmap() the DMA-buf into the guest.
except the exporting driver is in the host ;-)
This way you also don't need to transport any caching information anywhere.
Currently crosvm assumes if (drivername == "i915") then it is a cached mapping, otherwise it is wc. I'm trying to find a way to fix this. Suggestions welcome, but because of how mapping to a guest VM works, a VMM is a somewhat special case where this information is needed in userspace.
Ok that leaves me completely puzzled. How does that work in the first place?
In other words how does the mapping into the guest page tables happen?
There are multiple levels to this, but in short mapping to guest userspace happens via drm/virtio (aka "virtio_gpu" or "virtgpu"), the cache attributes are set via "map_info" attribute returned from the host VMM (host userspace, hence the need for this ioctl).
In the host, the host kernel driver mmaps to host userspace (VMM). Here the exporting driver is performing the mmap with correct cache attributes.
The VMM uses KVM to map these pages into the guest so
And exactly that was declared completely illegal the last time it came up on the mailing list.
Daniel implemented a whole bunch of patches into the DMA-buf layer to make it impossible for KVM to do this.
This issue isn't really with KVM, it is not making any CPU mappings itself. KVM is just making the pages available to the guest. Like I said the CPU mapping to the guest userspace is setup by virtgpu. But based on information that the host VMM provides. This patch simply provides a way for the host VMM to provide the correct information.
I have absolutely no idea why that is now a topic again and why anybody is still using this approach.
Because this is how VMMs work. And it is how the virtgpu device spec[1] is designed.
[1] https://github.com/oasis-tcs/virtio-spec/blob/master/virtio-gpu.tex#L767
@Daniel can you clarify.
Like I've said, I'd be happy to hear alternative suggestions. But the root problem is that it is not possible for the host kernel to directly map into guest userspace. So I don't really see an alternative. Even if it were possible for host kernel to directly map to guest userspace, that ship has already sailed as far as virtio device specification.
BR, -R
Thanks, Christian.
they appear as physical pages to the guest kernel. The guest kernel (virtgpu) in turn maps them to guest userspace.
BR, -R
Regards, Christian.
BR, -R
@Daniel has anything changed on that is or my status still valid?
Regards, Christian.
In addition to mapping the
pages into the guest VM, it needs to report to drm/virtio in the guest the cache settings to use for guest userspace. In particular, on some architectures, creating aliased mappings with different cache attributes is frowned upon, so it is important that the guest mappings have the same cache attributes as any potential host mappings.
Signed-off-by: Rob Clark robdclark@chromium.org
drivers/dma-buf/dma-buf.c | 26 ++++++++++++++++++++++++++ include/linux/dma-buf.h | 7 +++++++ include/uapi/linux/dma-buf.h | 28 ++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 32f55640890c..d02d6c2a3b49 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -326,6 +326,29 @@ static long dma_buf_set_name(struct dma_buf *dmabuf, const char __user *buf) return 0; }
+static long dma_buf_info(struct dma_buf *dmabuf, const void __user *uarg) +{
struct dma_buf_info arg;
if (copy_from_user(&arg, uarg, sizeof(arg)))
return -EFAULT;
switch (arg.param) {
case DMA_BUF_INFO_VM_PROT:
if (!dmabuf->ops->mmap_info)
return -ENOSYS;
arg.value = dmabuf->ops->mmap_info(dmabuf);
break;
default:
return -EINVAL;
}
if (copy_to_user(uarg, &arg, sizeof(arg)))
return -EFAULT;
return 0;
+}
- static long dma_buf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) {
@@ -369,6 +392,9 @@ static long dma_buf_ioctl(struct file *file, case DMA_BUF_SET_NAME_B: return dma_buf_set_name(dmabuf, (const char __user *)arg);
case DMA_BUF_IOCTL_INFO:
return dma_buf_info(dmabuf, (const void __user *)arg);
default: return -ENOTTY; }
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 71731796c8c3..6f4de64a5937 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -283,6 +283,13 @@ struct dma_buf_ops { */ int (*mmap)(struct dma_buf *, struct vm_area_struct *vma);
/**
* @mmap_info:
*
* Return mmapping info for the buffer. See DMA_BUF_INFO_VM_PROT.
*/
int (*mmap_info)(struct dma_buf *);
};int (*vmap)(struct dma_buf *dmabuf, struct iosys_map *map); void (*vunmap)(struct dma_buf *dmabuf, struct iosys_map *map);
diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h index b1523cb8ab30..a41adac0f46a 100644 --- a/include/uapi/linux/dma-buf.h +++ b/include/uapi/linux/dma-buf.h @@ -85,6 +85,32 @@ struct dma_buf_sync {
#define DMA_BUF_NAME_LEN 32
+/**
- struct dma_buf_info - Query info about the buffer.
- */
+struct dma_buf_info {
+#define DMA_BUF_INFO_VM_PROT 1 +# define DMA_BUF_VM_PROT_WC 0 +# define DMA_BUF_VM_PROT_CACHED 1
/**
* @param: Which param to query
*
* DMA_BUF_INFO_BM_PROT:
* Query the access permissions of userspace mmap's of this buffer.
* Returns one of DMA_BUF_VM_PROT_x
*/
__u32 param;
__u32 pad;
/**
* @value: Return value of the query.
*/
__u64 value;
+};
- #define DMA_BUF_BASE 'b' #define DMA_BUF_IOCTL_SYNC _IOW(DMA_BUF_BASE, 0, struct dma_buf_sync)
@@ -95,4 +121,6 @@ struct dma_buf_sync { #define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, __u32) #define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, __u64)
+#define DMA_BUF_IOCTL_INFO _IOWR(DMA_BUF_BASE, 2, struct dma_buf_info)
- #endif
Am 07.08.22 um 19:56 schrieb Rob Clark:
On Sun, Aug 7, 2022 at 10:38 AM Christian König ckoenig.leichtzumerken@gmail.com wrote:
[SNIP] And exactly that was declared completely illegal the last time it came up on the mailing list.
Daniel implemented a whole bunch of patches into the DMA-buf layer to make it impossible for KVM to do this.
This issue isn't really with KVM, it is not making any CPU mappings itself. KVM is just making the pages available to the guest.
Well I can only repeat myself: This is strictly illegal.
Please try this approach with CONFIG_DMABUF_DEBUG set. I'm pretty sure you will immediately run into a crash.
See this here as well https://elixir.bootlin.com/linux/v5.19/source/drivers/dma-buf/dma-buf.c#L653
Daniel intentionally added code to mangle the page pointers to make it impossible for KVM to do this.
If the virtio/virtgpu UAPI was build around the idea that this is possible then it is most likely fundamental broken.
Regards, Christian.
On Sun, Aug 7, 2022 at 11:05 AM Christian König ckoenig.leichtzumerken@gmail.com wrote:
Am 07.08.22 um 19:56 schrieb Rob Clark:
On Sun, Aug 7, 2022 at 10:38 AM Christian König ckoenig.leichtzumerken@gmail.com wrote:
[SNIP] And exactly that was declared completely illegal the last time it came up on the mailing list.
Daniel implemented a whole bunch of patches into the DMA-buf layer to make it impossible for KVM to do this.
This issue isn't really with KVM, it is not making any CPU mappings itself. KVM is just making the pages available to the guest.
Well I can only repeat myself: This is strictly illegal.
Please try this approach with CONFIG_DMABUF_DEBUG set. I'm pretty sure you will immediately run into a crash.
See this here as well https://elixir.bootlin.com/linux/v5.19/source/drivers/dma-buf/dma-buf.c#L653
Daniel intentionally added code to mangle the page pointers to make it impossible for KVM to do this.
I don't believe KVM is using the sg table, so this isn't going to stop anything ;-)
If the virtio/virtgpu UAPI was build around the idea that this is possible then it is most likely fundamental broken.
How else can you envision mmap'ing to guest userspace working? The guest kernel is the one that controls the guest userspace pagetables, not the host kernel. I guess your complaint is about VMs in general, but unfortunately I don't think you'll convince the rest of the industry to abandon VMs ;-)
But more seriously, let's take a step back here.. what scenarios are you seeing this being problematic for? Then we can see how to come up with solutions. The current situation of host userspace VMM just guessing isn't great. And sticking our heads in the sand and pretending VMs don't exist isn't great. So what can we do? I can instead add a msm ioctl to return this info and solve the problem even more narrowly for a single platform. But then the problem still remains on other platforms.
Slightly implicit in this is that mapping dma-bufs to the guest won't work for anything that requires DMA_BUF_IOCTL_SYNC for coherency.. we could add a possible return value for DMA_BUF_INFO_VM_PROT indicating that the buffer does not support mapping to guest or CPU access without DMA_BUF_IOCTL_SYNC. Then at least the VMM can fail gracefully instead of subtly.
BR, -R
Am 07.08.22 um 21:10 schrieb Rob Clark:
On Sun, Aug 7, 2022 at 11:05 AM Christian König ckoenig.leichtzumerken@gmail.com wrote:
Am 07.08.22 um 19:56 schrieb Rob Clark:
On Sun, Aug 7, 2022 at 10:38 AM Christian König ckoenig.leichtzumerken@gmail.com wrote:
[SNIP] And exactly that was declared completely illegal the last time it came up on the mailing list.
Daniel implemented a whole bunch of patches into the DMA-buf layer to make it impossible for KVM to do this.
This issue isn't really with KVM, it is not making any CPU mappings itself. KVM is just making the pages available to the guest.
Well I can only repeat myself: This is strictly illegal.
Please try this approach with CONFIG_DMABUF_DEBUG set. I'm pretty sure you will immediately run into a crash.
See this here as well https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir.boo...
Daniel intentionally added code to mangle the page pointers to make it impossible for KVM to do this.
I don't believe KVM is using the sg table, so this isn't going to stop anything ;-)
Then I have no idea how KVM actually works. Can you please briefly describe that?
If the virtio/virtgpu UAPI was build around the idea that this is possible then it is most likely fundamental broken.
How else can you envision mmap'ing to guest userspace working?
Well long story short: You can't.
See userspace mappings are not persistent, but rather faulted in on demand. The exporter is responsible for setting those up to be able to add reverse tracking and so can invalidate those mappings when the backing store changes.
The guest kernel is the one that controls the guest userspace pagetables, not the host kernel. I guess your complaint is about VMs in general, but unfortunately I don't think you'll convince the rest of the industry to abandon VMs ;-)
I'm not arguing against the usefulness of VM, it's just that what you describe here technically is just utterly nonsense as far as I can tell.
I have to confess that I'm totally lacking how this KVM mapping works, but when the struct pages pointers from the sg_table are not used I see two possibilities what was implemented here:
1. KVM is somehow walking the page tables to figure out what to map into the guest VM.
This would be *HIGHLY* illegal and not just with DMA-buf, but with pretty much a whole bunch of other drivers/subsystems as well. In other words it would be trivial for the guest to take over the host with that because it doesn't take into account that the underlying backing store of DMA-buf and other mmaped() areas can change at any time.
2. The guest VM triggers the fault handler for the mappings to fill in their page tables on demand.
That would actually work with DMA-buf, but then the guest needs to somehow use the caching attributes from the host side and not use it's own. Because otherwise you can't accommodate that the exporter is changing those caching attributes.
But more seriously, let's take a step back here.. what scenarios are you seeing this being problematic for? Then we can see how to come up with solutions. The current situation of host userspace VMM just guessing isn't great.
Well "isn't great" is a complete understatement. When KVM/virtio/virtgpu is doing what I guess they are doing here then that is a really major security hole.
And sticking our heads in the sand and pretending VMs don't exist isn't great. So what can we do? I can instead add a msm ioctl to return this info and solve the problem even more narrowly for a single platform. But then the problem still remains on other platforms.
Well once more: This is *not* MSM specific, you just absolutely *can't do that* for any driver!
I'm just really wondering what the heck is going on here, because all of this was discussed in lengthy before on the mailing list and very bluntly rejected.
Either I'm missing something (that's certainly possible) or we have a strong case of somebody implementing something without thinking about all the consequences.
Regards, Christian.
Slightly implicit in this is that mapping dma-bufs to the guest won't work for anything that requires DMA_BUF_IOCTL_SYNC for coherency.. we could add a possible return value for DMA_BUF_INFO_VM_PROT indicating that the buffer does not support mapping to guest or CPU access without DMA_BUF_IOCTL_SYNC. Then at least the VMM can fail gracefully instead of subtly.
BR, -R
On Mon, Aug 8, 2022 at 4:22 AM Christian König christian.koenig@amd.com wrote:
Am 07.08.22 um 21:10 schrieb Rob Clark:
On Sun, Aug 7, 2022 at 11:05 AM Christian König ckoenig.leichtzumerken@gmail.com wrote:
Am 07.08.22 um 19:56 schrieb Rob Clark:
On Sun, Aug 7, 2022 at 10:38 AM Christian König ckoenig.leichtzumerken@gmail.com wrote:
[SNIP] And exactly that was declared completely illegal the last time it came up on the mailing list.
Daniel implemented a whole bunch of patches into the DMA-buf layer to make it impossible for KVM to do this.
This issue isn't really with KVM, it is not making any CPU mappings itself. KVM is just making the pages available to the guest.
Well I can only repeat myself: This is strictly illegal.
Please try this approach with CONFIG_DMABUF_DEBUG set. I'm pretty sure you will immediately run into a crash.
See this here as well https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir.boo...
Daniel intentionally added code to mangle the page pointers to make it impossible for KVM to do this.
I don't believe KVM is using the sg table, so this isn't going to stop anything ;-)
Then I have no idea how KVM actually works. Can you please briefly describe that?
If the virtio/virtgpu UAPI was build around the idea that this is possible then it is most likely fundamental broken.
How else can you envision mmap'ing to guest userspace working?
Well long story short: You can't.
See userspace mappings are not persistent, but rather faulted in on demand. The exporter is responsible for setting those up to be able to add reverse tracking and so can invalidate those mappings when the backing store changes.
I think that is not actually a problem. At least for how it works on arm64 but I'm almost positive x86 is similar.. I'm not sure how else you could virtualize mmu/iommu/etc in a way that didn't have horrible performance.
There are two levels of pagetable translation, the first controlled by the host kernel, the second by the guest. From the PoV of host kernel, it is just memory mapped to userspace, getting faulted in on demand, just as normal. First the guest controlled translation triggers a fault in the guest which sets up guest mapping. And then the second level of translation to translate from what guest sees as PA (but host sees as VA) to actual PA triggers a fault in the host.
The guest kernel is the one that controls the guest userspace pagetables, not the host kernel. I guess your complaint is about VMs in general, but unfortunately I don't think you'll convince the rest of the industry to abandon VMs ;-)
I'm not arguing against the usefulness of VM, it's just that what you describe here technically is just utterly nonsense as far as I can tell.
I have to confess that I'm totally lacking how this KVM mapping works, but when the struct pages pointers from the sg_table are not used I see two possibilities what was implemented here:
- KVM is somehow walking the page tables to figure out what to map into
the guest VM.
it is just mapping host VA to the guest.. the guest kernel sees this as the PA and uses the level of pgtable translation that it controls to map to guest userspace. *All* that is needed (which this patch provides) is the correct cache attributes.
This would be *HIGHLY* illegal and not just with DMA-buf, but with
pretty much a whole bunch of other drivers/subsystems as well. In other words it would be trivial for the guest to take over the host with that because it doesn't take into account that the underlying backing store of DMA-buf and other mmaped() areas can change at any time.
- The guest VM triggers the fault handler for the mappings to fill in
their page tables on demand.
That would actually work with DMA-buf, but then the guest needs to
somehow use the caching attributes from the host side and not use it's own.
This is basically what happens, although via the two levels of pgtable translation. This patch provides the missing piece, the caching attributes.
Because otherwise you can't accommodate that the exporter is
changing those caching attributes.
Changing the attributes dynamically isn't going to work.. or at least not easily. If you had some sort of synchronous notification to host userspace, it could trigger an irq to the guest, I suppose. But it would mean host kernel has to block waiting for host userspace to interrupt the guest, then wait for guest vgpu process to be scheduled and handle the irq.
At least in the case of msm, the cache attributes are static for the life of the buffer, so this scenario isn't a problem. AFAICT this should work fine for at least all UMA hw.. I'm a bit less sure when it comes to TTM, but shouldn't you at least be able to use worst-cache cache attributes for buffers that are allowed to be mapped to guest?
BR, -R
But more seriously, let's take a step back here.. what scenarios are you seeing this being problematic for? Then we can see how to come up with solutions. The current situation of host userspace VMM just guessing isn't great.
Well "isn't great" is a complete understatement. When KVM/virtio/virtgpu is doing what I guess they are doing here then that is a really major security hole.
And sticking our heads in the sand and pretending VMs don't exist isn't great. So what can we do? I can instead add a msm ioctl to return this info and solve the problem even more narrowly for a single platform. But then the problem still remains on other platforms.
Well once more: This is *not* MSM specific, you just absolutely *can't do that* for any driver!
I'm just really wondering what the heck is going on here, because all of this was discussed in lengthy before on the mailing list and very bluntly rejected.
Either I'm missing something (that's certainly possible) or we have a strong case of somebody implementing something without thinking about all the consequences.
Regards, Christian.
Slightly implicit in this is that mapping dma-bufs to the guest won't work for anything that requires DMA_BUF_IOCTL_SYNC for coherency.. we could add a possible return value for DMA_BUF_INFO_VM_PROT indicating that the buffer does not support mapping to guest or CPU access without DMA_BUF_IOCTL_SYNC. Then at least the VMM can fail gracefully instead of subtly.
BR, -R
Am 08.08.22 um 15:26 schrieb Rob Clark:
On Mon, Aug 8, 2022 at 4:22 AM Christian König christian.koenig@amd.com wrote:
[SNIP]
If the virtio/virtgpu UAPI was build around the idea that this is possible then it is most likely fundamental broken.
How else can you envision mmap'ing to guest userspace working?
Well long story short: You can't.
See userspace mappings are not persistent, but rather faulted in on demand. The exporter is responsible for setting those up to be able to add reverse tracking and so can invalidate those mappings when the backing store changes.
I think that is not actually a problem. At least for how it works on arm64 but I'm almost positive x86 is similar.. I'm not sure how else you could virtualize mmu/iommu/etc in a way that didn't have horrible performance.
There are two levels of pagetable translation, the first controlled by the host kernel, the second by the guest. From the PoV of host kernel, it is just memory mapped to userspace, getting faulted in on demand, just as normal. First the guest controlled translation triggers a fault in the guest which sets up guest mapping. And then the second level of translation to translate from what guest sees as PA (but host sees as VA) to actual PA triggers a fault in the host.
Ok, that's calming.
At least that's not the approach talked about the last time this came up and it doesn't rip a massive security hole somewhere.
The question is why is the guest then not using the caching attributes setup by the host page tables when the translation is forwarded anyway?
[SNIP] This is basically what happens, although via the two levels of pgtable translation. This patch provides the missing piece, the caching attributes.
Yeah, but that won't work like this. See the backing store migrates all the time and when it is backed by PCIe/VRAM/local memory you need to use write combine while system memory is usually cached.
Because otherwise you can't accommodate that the exporter is
changing those caching attributes.
Changing the attributes dynamically isn't going to work.. or at least not easily. If you had some sort of synchronous notification to host userspace, it could trigger an irq to the guest, I suppose. But it would mean host kernel has to block waiting for host userspace to interrupt the guest, then wait for guest vgpu process to be scheduled and handle the irq.
We basically change that on every page flip on APUs and that doesn't sound like something fast.
Thanks for the explanation how this works, Christian.
At least in the case of msm, the cache attributes are static for the life of the buffer, so this scenario isn't a problem. AFAICT this should work fine for at least all UMA hw.. I'm a bit less sure when it comes to TTM, but shouldn't you at least be able to use worst-cache cache attributes for buffers that are allowed to be mapped to guest?
BR, -R
But more seriously, let's take a step back here.. what scenarios are you seeing this being problematic for? Then we can see how to come up with solutions. The current situation of host userspace VMM just guessing isn't great.
Well "isn't great" is a complete understatement. When KVM/virtio/virtgpu is doing what I guess they are doing here then that is a really major security hole.
And sticking our heads in the sand and
pretending VMs don't exist isn't great. So what can we do? I can instead add a msm ioctl to return this info and solve the problem even more narrowly for a single platform. But then the problem still remains on other platforms.
Well once more: This is *not* MSM specific, you just absolutely *can't do that* for any driver!
I'm just really wondering what the heck is going on here, because all of this was discussed in lengthy before on the mailing list and very bluntly rejected.
Either I'm missing something (that's certainly possible) or we have a strong case of somebody implementing something without thinking about all the consequences.
Regards, Christian.
Slightly implicit in this is that mapping dma-bufs to the guest won't work for anything that requires DMA_BUF_IOCTL_SYNC for coherency.. we could add a possible return value for DMA_BUF_INFO_VM_PROT indicating that the buffer does not support mapping to guest or CPU access without DMA_BUF_IOCTL_SYNC. Then at least the VMM can fail gracefully instead of subtly.
BR, -R
On Mon, Aug 8, 2022 at 7:56 AM Christian König christian.koenig@amd.com wrote:
Am 08.08.22 um 15:26 schrieb Rob Clark:
On Mon, Aug 8, 2022 at 4:22 AM Christian König christian.koenig@amd.com wrote:
[SNIP]
If the virtio/virtgpu UAPI was build around the idea that this is possible then it is most likely fundamental broken.
How else can you envision mmap'ing to guest userspace working?
Well long story short: You can't.
See userspace mappings are not persistent, but rather faulted in on demand. The exporter is responsible for setting those up to be able to add reverse tracking and so can invalidate those mappings when the backing store changes.
I think that is not actually a problem. At least for how it works on arm64 but I'm almost positive x86 is similar.. I'm not sure how else you could virtualize mmu/iommu/etc in a way that didn't have horrible performance.
There are two levels of pagetable translation, the first controlled by the host kernel, the second by the guest. From the PoV of host kernel, it is just memory mapped to userspace, getting faulted in on demand, just as normal. First the guest controlled translation triggers a fault in the guest which sets up guest mapping. And then the second level of translation to translate from what guest sees as PA (but host sees as VA) to actual PA triggers a fault in the host.
Ok, that's calming.
At least that's not the approach talked about the last time this came up and it doesn't rip a massive security hole somewhere.
Hmm, tbh I'm not sure which thread/discussion this was.. it could have been before I was paying much attention to the vm use-case
The question is why is the guest then not using the caching attributes setup by the host page tables when the translation is forwarded anyway?
The guest kernel itself doesn't know. AFAICT, at least on arm, the hw will combine the attributes of the mapping in S1 and S2 pagetables and use the most restrictive. So if S1 (host) is cached but S2 (guest) is WC, you'll end up w/ WC.
That said, at least on aarch64, it seems like we could always tell the guest it is cached, and if mapped WC in S1 you'll end up with WC access. But this seems to depend on an optional feature, FWB, which allows S2 to override S1 attributes, not being enabled. And not entirely sure how it works on x86.
BR, -R
[SNIP] This is basically what happens, although via the two levels of pgtable translation. This patch provides the missing piece, the caching attributes.
Yeah, but that won't work like this. See the backing store migrates all the time and when it is backed by PCIe/VRAM/local memory you need to use write combine while system memory is usually cached.
Because otherwise you can't accommodate that the exporter is
changing those caching attributes.
Changing the attributes dynamically isn't going to work.. or at least not easily. If you had some sort of synchronous notification to host userspace, it could trigger an irq to the guest, I suppose. But it would mean host kernel has to block waiting for host userspace to interrupt the guest, then wait for guest vgpu process to be scheduled and handle the irq.
We basically change that on every page flip on APUs and that doesn't sound like something fast.
Thanks for the explanation how this works, Christian.
At least in the case of msm, the cache attributes are static for the life of the buffer, so this scenario isn't a problem. AFAICT this should work fine for at least all UMA hw.. I'm a bit less sure when it comes to TTM, but shouldn't you at least be able to use worst-cache cache attributes for buffers that are allowed to be mapped to guest?
BR, -R
But more seriously, let's take a step back here.. what scenarios are you seeing this being problematic for? Then we can see how to come up with solutions. The current situation of host userspace VMM just guessing isn't great.
Well "isn't great" is a complete understatement. When KVM/virtio/virtgpu is doing what I guess they are doing here then that is a really major security hole.
And sticking our heads in the sand and
pretending VMs don't exist isn't great. So what can we do? I can instead add a msm ioctl to return this info and solve the problem even more narrowly for a single platform. But then the problem still remains on other platforms.
Well once more: This is *not* MSM specific, you just absolutely *can't do that* for any driver!
I'm just really wondering what the heck is going on here, because all of this was discussed in lengthy before on the mailing list and very bluntly rejected.
Either I'm missing something (that's certainly possible) or we have a strong case of somebody implementing something without thinking about all the consequences.
Regards, Christian.
Slightly implicit in this is that mapping dma-bufs to the guest won't work for anything that requires DMA_BUF_IOCTL_SYNC for coherency.. we could add a possible return value for DMA_BUF_INFO_VM_PROT indicating that the buffer does not support mapping to guest or CPU access without DMA_BUF_IOCTL_SYNC. Then at least the VMM can fail gracefully instead of subtly.
BR, -R
On 7/29/2022 10:37 PM, Rob Clark wrote:
From: Rob Clark robdclark@chromium.org
This is a fairly narrowly focused interface, providing a way for a VMM in userspace to tell the guest kernel what pgprot settings to use when mapping a buffer to guest userspace.
For buffers that get mapped into guest userspace, virglrenderer returns a dma-buf fd to the VMM (crosvm or qemu). In addition to mapping the pages into the guest VM, it needs to report to drm/virtio in the guest the cache settings to use for guest userspace. In particular, on some architectures, creating aliased mappings with different cache attributes is frowned upon, so it is important that the guest mappings have the same cache attributes as any potential host mappings.
Signed-off-by: Rob Clark robdclark@chromium.org
drivers/dma-buf/dma-buf.c | 26 ++++++++++++++++++++++++++ include/linux/dma-buf.h | 7 +++++++ include/uapi/linux/dma-buf.h | 28 ++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 32f55640890c..d02d6c2a3b49 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -326,6 +326,29 @@ static long dma_buf_set_name(struct dma_buf *dmabuf, const char __user *buf) return 0; } +static long dma_buf_info(struct dma_buf *dmabuf, const void __user *uarg) +{
- struct dma_buf_info arg;
- if (copy_from_user(&arg, uarg, sizeof(arg)))
return -EFAULT;
- switch (arg.param) {
- case DMA_BUF_INFO_VM_PROT:
if (!dmabuf->ops->mmap_info)
return -ENOSYS;
arg.value = dmabuf->ops->mmap_info(dmabuf);
break;
- default:
return -EINVAL;
- }
- if (copy_to_user(uarg, &arg, sizeof(arg)))
return -EFAULT;
- return 0;
+}
- static long dma_buf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) {
@@ -369,6 +392,9 @@ static long dma_buf_ioctl(struct file *file, case DMA_BUF_SET_NAME_B: return dma_buf_set_name(dmabuf, (const char __user *)arg);
- case DMA_BUF_IOCTL_INFO:
return dma_buf_info(dmabuf, (const void __user *)arg);
- default: return -ENOTTY; }
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 71731796c8c3..6f4de64a5937 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -283,6 +283,13 @@ struct dma_buf_ops { */ int (*mmap)(struct dma_buf *, struct vm_area_struct *vma);
- /**
* @mmap_info:
*
* Return mmapping info for the buffer. See DMA_BUF_INFO_VM_PROT.
*/
- int (*mmap_info)(struct dma_buf *);
- int (*vmap)(struct dma_buf *dmabuf, struct iosys_map *map); void (*vunmap)(struct dma_buf *dmabuf, struct iosys_map *map); };
diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h index b1523cb8ab30..a41adac0f46a 100644 --- a/include/uapi/linux/dma-buf.h +++ b/include/uapi/linux/dma-buf.h @@ -85,6 +85,32 @@ struct dma_buf_sync { #define DMA_BUF_NAME_LEN 32
+/**
- struct dma_buf_info - Query info about the buffer.
- */
+struct dma_buf_info {
+#define DMA_BUF_INFO_VM_PROT 1 +# define DMA_BUF_VM_PROT_WC 0 +# define DMA_BUF_VM_PROT_CACHED 1
- /**
* @param: Which param to query
*
* DMA_BUF_INFO_BM_PROT:
Is there a typo here? BM -> VM ?
-Akhil.
* Query the access permissions of userspace mmap's of this buffer.
* Returns one of DMA_BUF_VM_PROT_x
*/
- __u32 param;
- __u32 pad;
- /**
* @value: Return value of the query.
*/
- __u64 value;
+};
- #define DMA_BUF_BASE 'b' #define DMA_BUF_IOCTL_SYNC _IOW(DMA_BUF_BASE, 0, struct dma_buf_sync)
@@ -95,4 +121,6 @@ struct dma_buf_sync { #define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, __u32) #define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, __u64) +#define DMA_BUF_IOCTL_INFO _IOWR(DMA_BUF_BASE, 2, struct dma_buf_info)
- #endif
On Sun, Aug 7, 2022 at 1:25 PM Akhil P Oommen quic_akhilpo@quicinc.com wrote:
On 7/29/2022 10:37 PM, Rob Clark wrote:
From: Rob Clark robdclark@chromium.org
This is a fairly narrowly focused interface, providing a way for a VMM in userspace to tell the guest kernel what pgprot settings to use when mapping a buffer to guest userspace.
For buffers that get mapped into guest userspace, virglrenderer returns a dma-buf fd to the VMM (crosvm or qemu). In addition to mapping the pages into the guest VM, it needs to report to drm/virtio in the guest the cache settings to use for guest userspace. In particular, on some architectures, creating aliased mappings with different cache attributes is frowned upon, so it is important that the guest mappings have the same cache attributes as any potential host mappings.
Signed-off-by: Rob Clark robdclark@chromium.org
drivers/dma-buf/dma-buf.c | 26 ++++++++++++++++++++++++++ include/linux/dma-buf.h | 7 +++++++ include/uapi/linux/dma-buf.h | 28 ++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 32f55640890c..d02d6c2a3b49 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -326,6 +326,29 @@ static long dma_buf_set_name(struct dma_buf *dmabuf, const char __user *buf) return 0; }
+static long dma_buf_info(struct dma_buf *dmabuf, const void __user *uarg) +{
struct dma_buf_info arg;
if (copy_from_user(&arg, uarg, sizeof(arg)))
return -EFAULT;
switch (arg.param) {
case DMA_BUF_INFO_VM_PROT:
if (!dmabuf->ops->mmap_info)
return -ENOSYS;
arg.value = dmabuf->ops->mmap_info(dmabuf);
break;
default:
return -EINVAL;
}
if (copy_to_user(uarg, &arg, sizeof(arg)))
return -EFAULT;
return 0;
+}
- static long dma_buf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) {
@@ -369,6 +392,9 @@ static long dma_buf_ioctl(struct file *file, case DMA_BUF_SET_NAME_B: return dma_buf_set_name(dmabuf, (const char __user *)arg);
case DMA_BUF_IOCTL_INFO:
return dma_buf_info(dmabuf, (const void __user *)arg);
default: return -ENOTTY; }
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 71731796c8c3..6f4de64a5937 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -283,6 +283,13 @@ struct dma_buf_ops { */ int (*mmap)(struct dma_buf *, struct vm_area_struct *vma);
/**
* @mmap_info:
*
* Return mmapping info for the buffer. See DMA_BUF_INFO_VM_PROT.
*/
int (*mmap_info)(struct dma_buf *);
};int (*vmap)(struct dma_buf *dmabuf, struct iosys_map *map); void (*vunmap)(struct dma_buf *dmabuf, struct iosys_map *map);
diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h index b1523cb8ab30..a41adac0f46a 100644 --- a/include/uapi/linux/dma-buf.h +++ b/include/uapi/linux/dma-buf.h @@ -85,6 +85,32 @@ struct dma_buf_sync {
#define DMA_BUF_NAME_LEN 32
+/**
- struct dma_buf_info - Query info about the buffer.
- */
+struct dma_buf_info {
+#define DMA_BUF_INFO_VM_PROT 1 +# define DMA_BUF_VM_PROT_WC 0 +# define DMA_BUF_VM_PROT_CACHED 1
/**
* @param: Which param to query
*
* DMA_BUF_INFO_BM_PROT:
Is there a typo here? BM -> VM ?
yes, fixed locally
-Akhil.
* Query the access permissions of userspace mmap's of this buffer.
* Returns one of DMA_BUF_VM_PROT_x
*/
__u32 param;
__u32 pad;
/**
* @value: Return value of the query.
*/
__u64 value;
+};
- #define DMA_BUF_BASE 'b' #define DMA_BUF_IOCTL_SYNC _IOW(DMA_BUF_BASE, 0, struct dma_buf_sync)
@@ -95,4 +121,6 @@ struct dma_buf_sync { #define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, __u32) #define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, __u64)
+#define DMA_BUF_IOCTL_INFO _IOWR(DMA_BUF_BASE, 2, struct dma_buf_info)
- #endif
From: Rob Clark robdclark@chromium.org
Just plumbing the thing thru an extra layer.
Signed-off-by: Rob Clark robdclark@chromium.org --- drivers/gpu/drm/drm_prime.c | 12 ++++++++++++ include/drm/drm_drv.h | 7 +++++++ 2 files changed, 19 insertions(+)
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index e3f09f18110c..f58586e131c5 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -784,6 +784,17 @@ int drm_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma) } EXPORT_SYMBOL(drm_gem_dmabuf_mmap);
+static int drm_gem_dmabuf_mmap_info(struct dma_buf *dma_buf) +{ + struct drm_gem_object *obj = dma_buf->priv; + struct drm_device *dev = obj->dev; + + if (!dev->driver->gem_prime_mmap_info) + return -ENOSYS; + + return dev->driver->gem_prime_mmap_info(obj); +} + static const struct dma_buf_ops drm_gem_prime_dmabuf_ops = { .cache_sgt_mapping = true, .attach = drm_gem_map_attach, @@ -792,6 +803,7 @@ static const struct dma_buf_ops drm_gem_prime_dmabuf_ops = { .unmap_dma_buf = drm_gem_unmap_dma_buf, .release = drm_gem_dmabuf_release, .mmap = drm_gem_dmabuf_mmap, + .mmap_info = drm_gem_dmabuf_mmap_info, .vmap = drm_gem_dmabuf_vmap, .vunmap = drm_gem_dmabuf_vunmap, }; diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h index f6159acb8856..797c0f8c2dd0 100644 --- a/include/drm/drm_drv.h +++ b/include/drm/drm_drv.h @@ -355,6 +355,13 @@ struct drm_driver { */ int (*gem_prime_mmap)(struct drm_gem_object *obj, struct vm_area_struct *vma);
+ /** + * @gem_prim_mmap_info: + * + * Get info about mmap setup by gem_prime_mmap. See dma_buf_ops:mmap_info. + */ + int (*gem_prime_mmap_info)(struct drm_gem_object *obj); + /** * @dumb_create: *
linaro-mm-sig@lists.linaro.org