The inode can be different in a container, for example, a docker and host both open the same uacce parent device, which uses the same uacce struct but different inode, so uacce->inode is not enough.
What's worse, when docker stops, the inode will be destroyed as well, causing use-after-free in uacce_remove.
So use q->filep->f_mapping to replace uacce->inode->i_mapping.
Signed-off-by: Weili Qian qianweili@huawei.com Signed-off-by: Zhangfei Gao zhangfei.gao@linaro.org --- drivers/misc/uacce/uacce.c | 16 +++++++++------- include/linux/uacce.h | 4 ++-- 2 files changed, 11 insertions(+), 9 deletions(-)
diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c index 346bd7cf2e94..740ace422baa 100644 --- a/drivers/misc/uacce/uacce.c +++ b/drivers/misc/uacce/uacce.c @@ -166,8 +166,8 @@ static int uacce_fops_open(struct inode *inode, struct file *filep)
init_waitqueue_head(&q->wait); filep->private_data = q; - uacce->inode = inode; q->state = UACCE_Q_INIT; + q->private_data = filep; mutex_init(&q->mutex); list_add(&q->list, &uacce->queues); mutex_unlock(&uacce->mutex); @@ -574,12 +574,6 @@ void uacce_remove(struct uacce_device *uacce)
if (!uacce) return; - /* - * unmap remaining mapping from user space, preventing user still - * access the mmaped area while parent device is already removed - */ - if (uacce->inode) - unmap_mapping_range(uacce->inode->i_mapping, 0, 0, 1);
/* * uacce_fops_open() may be running concurrently, even after we remove @@ -589,6 +583,8 @@ void uacce_remove(struct uacce_device *uacce) mutex_lock(&uacce->mutex); /* ensure no open queue remains */ list_for_each_entry_safe(q, next_q, &uacce->queues, list) { + struct file *filep = q->private_data; + /* * Taking q->mutex ensures that fops do not use the defunct * uacce->ops after the queue is disabled. @@ -597,6 +593,12 @@ void uacce_remove(struct uacce_device *uacce) uacce_put_queue(q); mutex_unlock(&q->mutex); uacce_unbind_queue(q); + + /* + * unmap remaining mapping from user space, preventing user still + * access the mmaped area while parent device is already removed + */ + unmap_mapping_range(filep->f_mapping, 0, 0, 1); }
/* disable sva now since no opened queues */ diff --git a/include/linux/uacce.h b/include/linux/uacce.h index 0a81c3dfd26c..64b800b74436 100644 --- a/include/linux/uacce.h +++ b/include/linux/uacce.h @@ -86,6 +86,7 @@ enum uacce_q_state { * @state: queue state machine * @pasid: pasid associated to the mm * @handle: iommu_sva handle returned by iommu_sva_bind_device() + * @private_data: private data for saving filep */ struct uacce_queue { struct uacce_device *uacce; @@ -97,6 +98,7 @@ struct uacce_queue { enum uacce_q_state state; u32 pasid; struct iommu_sva *handle; + void *private_data; };
/** @@ -114,7 +116,6 @@ struct uacce_queue { * @mutex: protects uacce operation * @priv: private pointer of the uacce * @queues: list of queues - * @inode: core vfs */ struct uacce_device { const char *algs; @@ -130,7 +131,6 @@ struct uacce_device { struct mutex mutex; void *priv; struct list_head queues; - struct inode *inode; };
#if IS_ENABLED(CONFIG_UACCE)
On Thu, May 11, 2023 at 10:15:53AM +0800, Zhangfei Gao wrote:
The inode can be different in a container, for example, a docker and host both open the same uacce parent device, which uses the same uacce struct but different inode, so uacce->inode is not enough.
What's worse, when docker stops, the inode will be destroyed as well, causing use-after-free in uacce_remove.
So use q->filep->f_mapping to replace uacce->inode->i_mapping.
Signed-off-by: Weili Qian qianweili@huawei.com Signed-off-by: Zhangfei Gao zhangfei.gao@linaro.org
drivers/misc/uacce/uacce.c | 16 +++++++++------- include/linux/uacce.h | 4 ++-- 2 files changed, 11 insertions(+), 9 deletions(-)
diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c index 346bd7cf2e94..740ace422baa 100644 --- a/drivers/misc/uacce/uacce.c +++ b/drivers/misc/uacce/uacce.c @@ -166,8 +166,8 @@ static int uacce_fops_open(struct inode *inode, struct file *filep) init_waitqueue_head(&q->wait); filep->private_data = q;
- uacce->inode = inode; q->state = UACCE_Q_INIT;
- q->private_data = filep; mutex_init(&q->mutex); list_add(&q->list, &uacce->queues); mutex_unlock(&uacce->mutex);
@@ -574,12 +574,6 @@ void uacce_remove(struct uacce_device *uacce) if (!uacce) return;
- /*
* unmap remaining mapping from user space, preventing user still
* access the mmaped area while parent device is already removed
*/
- if (uacce->inode)
unmap_mapping_range(uacce->inode->i_mapping, 0, 0, 1);
/* * uacce_fops_open() may be running concurrently, even after we remove @@ -589,6 +583,8 @@ void uacce_remove(struct uacce_device *uacce) mutex_lock(&uacce->mutex); /* ensure no open queue remains */ list_for_each_entry_safe(q, next_q, &uacce->queues, list) {
struct file *filep = q->private_data;
- /*
- Taking q->mutex ensures that fops do not use the defunct
- uacce->ops after the queue is disabled.
@@ -597,6 +593,12 @@ void uacce_remove(struct uacce_device *uacce) uacce_put_queue(q); mutex_unlock(&q->mutex); uacce_unbind_queue(q);
/*
* unmap remaining mapping from user space, preventing user still
* access the mmaped area while parent device is already removed
*/
}unmap_mapping_range(filep->f_mapping, 0, 0, 1);
/* disable sva now since no opened queues */ diff --git a/include/linux/uacce.h b/include/linux/uacce.h index 0a81c3dfd26c..64b800b74436 100644 --- a/include/linux/uacce.h +++ b/include/linux/uacce.h @@ -86,6 +86,7 @@ enum uacce_q_state {
- @state: queue state machine
- @pasid: pasid associated to the mm
- @handle: iommu_sva handle returned by iommu_sva_bind_device()
*/
- @private_data: private data for saving filep
struct uacce_queue { struct uacce_device *uacce; @@ -97,6 +98,7 @@ struct uacce_queue { enum uacce_q_state state; u32 pasid; struct iommu_sva *handle;
- void *private_data;
Make this a real pointer to the inode, no need to make this "void *", right?
thanks,
greg k-h
On Thu, May 11, 2023 at 10:15:53AM +0800, Zhangfei Gao wrote:
The inode can be different in a container, for example, a docker and host both open the same uacce parent device, which uses the same uacce struct but different inode, so uacce->inode is not enough.
What's worse, when docker stops, the inode will be destroyed as well, causing use-after-free in uacce_remove.
So use q->filep->f_mapping to replace uacce->inode->i_mapping.
@@ -574,12 +574,6 @@ void uacce_remove(struct uacce_device *uacce) if (!uacce) return;
- /*
* unmap remaining mapping from user space, preventing user still
* access the mmaped area while parent device is already removed
*/
- if (uacce->inode)
unmap_mapping_range(uacce->inode->i_mapping, 0, 0, 1);
/* * uacce_fops_open() may be running concurrently, even after we remove @@ -589,6 +583,8 @@ void uacce_remove(struct uacce_device *uacce) mutex_lock(&uacce->mutex); /* ensure no open queue remains */ list_for_each_entry_safe(q, next_q, &uacce->queues, list) {
struct file *filep = q->private_data;
- /*
- Taking q->mutex ensures that fops do not use the defunct
- uacce->ops after the queue is disabled.
@@ -597,6 +593,12 @@ void uacce_remove(struct uacce_device *uacce) uacce_put_queue(q); mutex_unlock(&q->mutex); uacce_unbind_queue(q);
/*
* unmap remaining mapping from user space, preventing user still
* access the mmaped area while parent device is already removed
*/
unmap_mapping_range(filep->f_mapping, 0, 0, 1);
IDGI. Going through uacce_queue instead of uacce_device is fine, but why bother with file *or* inode? Just store a reference to struct address_space in your uacce_queue and be done with that...
Another problem in that driver is uacce_vma_close(); this if (vma->vm_pgoff < UACCE_MAX_REGION) qfr = q->qfrs[vma->vm_pgoff];
kfree(qfr); can't be right - you have q->qfrs left pointing to freed object. If nothing else, subsequent mmap() will fail with -EEXIST, won't it?
On Thu, 11 May 2023 at 12:05, Al Viro viro@zeniv.linux.org.uk wrote:
On Thu, May 11, 2023 at 10:15:53AM +0800, Zhangfei Gao wrote:
The inode can be different in a container, for example, a docker and host both open the same uacce parent device, which uses the same uacce struct but different inode, so uacce->inode is not enough.
What's worse, when docker stops, the inode will be destroyed as well, causing use-after-free in uacce_remove.
So use q->filep->f_mapping to replace uacce->inode->i_mapping.
@@ -574,12 +574,6 @@ void uacce_remove(struct uacce_device *uacce)
if (!uacce) return;
/*
* unmap remaining mapping from user space, preventing user still
* access the mmaped area while parent device is already removed
*/
if (uacce->inode)
unmap_mapping_range(uacce->inode->i_mapping, 0, 0, 1); /* * uacce_fops_open() may be running concurrently, even after we remove
@@ -589,6 +583,8 @@ void uacce_remove(struct uacce_device *uacce) mutex_lock(&uacce->mutex); /* ensure no open queue remains */ list_for_each_entry_safe(q, next_q, &uacce->queues, list) {
struct file *filep = q->private_data;
/* * Taking q->mutex ensures that fops do not use the defunct * uacce->ops after the queue is disabled.
@@ -597,6 +593,12 @@ void uacce_remove(struct uacce_device *uacce) uacce_put_queue(q); mutex_unlock(&q->mutex); uacce_unbind_queue(q);
/*
* unmap remaining mapping from user space, preventing user still
* access the mmaped area while parent device is already removed
*/
unmap_mapping_range(filep->f_mapping, 0, 0, 1);
IDGI. Going through uacce_queue instead of uacce_device is fine, but why bother with file *or* inode? Just store a reference to struct address_space in your uacce_queue and be done with that...
Yes, a struct address_space is enough.
Another problem in that driver is uacce_vma_close(); this if (vma->vm_pgoff < UACCE_MAX_REGION) qfr = q->qfrs[vma->vm_pgoff];
kfree(qfr);
can't be right - you have q->qfrs left pointing to freed object. If nothing else, subsequent mmap() will fail with -EEXIST, won't it?
Good catch, will fix it.
Thanks
v2: using q->mapping instead of inode or file vma_close clears q->qfrs as well
Zhangfei Gao (2): uacce: use q->mapping to replace inode->i_mapping uacce: vma_close clears q->qfrs when freeing qfrs
drivers/misc/uacce/uacce.c | 25 ++++++++++++++----------- include/linux/uacce.h | 4 ++-- 2 files changed, 16 insertions(+), 13 deletions(-)
The inode can be different in a container, for example, a docker and host both open the same uacce parent device, which uses the same uacce struct but different inode, so uacce->inode is not enough.
What's worse, when docker stops, the inode will be destroyed as well, causing use-after-free in uacce_remove.
So use q->mapping to replace uacce->inode->i_mapping.
Signed-off-by: Weili Qian qianweili@huawei.com Signed-off-by: Zhangfei Gao zhangfei.gao@linaro.org --- drivers/misc/uacce/uacce.c | 14 +++++++------- include/linux/uacce.h | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c index 346bd7cf2e94..338b59ef5493 100644 --- a/drivers/misc/uacce/uacce.c +++ b/drivers/misc/uacce/uacce.c @@ -166,8 +166,8 @@ static int uacce_fops_open(struct inode *inode, struct file *filep)
init_waitqueue_head(&q->wait); filep->private_data = q; - uacce->inode = inode; q->state = UACCE_Q_INIT; + q->mapping = filep->f_mapping; mutex_init(&q->mutex); list_add(&q->list, &uacce->queues); mutex_unlock(&uacce->mutex); @@ -574,12 +574,6 @@ void uacce_remove(struct uacce_device *uacce)
if (!uacce) return; - /* - * unmap remaining mapping from user space, preventing user still - * access the mmaped area while parent device is already removed - */ - if (uacce->inode) - unmap_mapping_range(uacce->inode->i_mapping, 0, 0, 1);
/* * uacce_fops_open() may be running concurrently, even after we remove @@ -597,6 +591,12 @@ void uacce_remove(struct uacce_device *uacce) uacce_put_queue(q); mutex_unlock(&q->mutex); uacce_unbind_queue(q); + + /* + * unmap remaining mapping from user space, preventing user still + * access the mmaped area while parent device is already removed + */ + unmap_mapping_range(q->mapping, 0, 0, 1); }
/* disable sva now since no opened queues */ diff --git a/include/linux/uacce.h b/include/linux/uacce.h index 0a81c3dfd26c..e290c0269944 100644 --- a/include/linux/uacce.h +++ b/include/linux/uacce.h @@ -86,6 +86,7 @@ enum uacce_q_state { * @state: queue state machine * @pasid: pasid associated to the mm * @handle: iommu_sva handle returned by iommu_sva_bind_device() + * @mapping: user space mapping of the queue */ struct uacce_queue { struct uacce_device *uacce; @@ -97,6 +98,7 @@ struct uacce_queue { enum uacce_q_state state; u32 pasid; struct iommu_sva *handle; + struct address_space *mapping; };
/** @@ -114,7 +116,6 @@ struct uacce_queue { * @mutex: protects uacce operation * @priv: private pointer of the uacce * @queues: list of queues - * @inode: core vfs */ struct uacce_device { const char *algs; @@ -130,7 +131,6 @@ struct uacce_device { struct mutex mutex; void *priv; struct list_head queues; - struct inode *inode; };
#if IS_ENABLED(CONFIG_UACCE)
vma_close frees qfrs but not clears q->qfrs, which still points to the freed object, leading to subsequent mmap fail. So vma_close clears q->qfrs as well.
Suggested-by: Al Viro viro@zeniv.linux.org.uk Signed-off-by: Zhangfei Gao zhangfei.gao@linaro.org --- drivers/misc/uacce/uacce.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c index 338b59ef5493..930c252753a0 100644 --- a/drivers/misc/uacce/uacce.c +++ b/drivers/misc/uacce/uacce.c @@ -200,12 +200,15 @@ static int uacce_fops_release(struct inode *inode, struct file *filep) static void uacce_vma_close(struct vm_area_struct *vma) { struct uacce_queue *q = vma->vm_private_data; - struct uacce_qfile_region *qfr = NULL;
- if (vma->vm_pgoff < UACCE_MAX_REGION) - qfr = q->qfrs[vma->vm_pgoff]; + if (vma->vm_pgoff < UACCE_MAX_REGION) { + struct uacce_qfile_region *qfr = q->qfrs[vma->vm_pgoff];
- kfree(qfr); + mutex_lock(&q->mutex); + q->qfrs[vma->vm_pgoff] = NULL; + mutex_unlock(&q->mutex); + kfree(qfr); + } }
static const struct vm_operations_struct uacce_vm_ops = {