The way cookie_init_hw_msi_region() allocates the iommu_dma_msi_page structures doesn't match the way iommu_put_dma_cookie() frees them.
The former performs a single allocation of all the required structures, while the latter tries to free them one at a time. It doesn't quite work for the main use case (the GICv3 ITS where the range is 64kB) when the base ganule size is 4kB.
This leads to a nice slab corruption on teardown, which is easily observable by simply creating a VF on a SRIOV-capable device, and tearing it down immediately (no need to even make use of it).
Fix it by allocating iommu_dma_msi_page structures one at a time.
Fixes: 7c1b058c8b5a3 ("iommu/dma: Handle IOMMU API reserved regions") Signed-off-by: Marc Zyngier maz@kernel.org Cc: Robin Murphy robin.murphy@arm.com Cc: Joerg Roedel jroedel@suse.de Cc: Eric Auger eric.auger@redhat.com Cc: Will Deacon will@kernel.org Cc: stable@vger.kernel.org --- drivers/iommu/dma-iommu.c | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-)
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index a2e96a5fd9a7..01fa64856c12 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -171,25 +171,37 @@ static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie, phys_addr_t start, phys_addr_t end) { struct iova_domain *iovad = &cookie->iovad; - struct iommu_dma_msi_page *msi_page; - int i, num_pages; + struct iommu_dma_msi_page *msi_page, *tmp; + int i, num_pages, ret = 0; + phys_addr_t base;
- start -= iova_offset(iovad, start); + base = start -= iova_offset(iovad, start); num_pages = iova_align(iovad, end - start) >> iova_shift(iovad);
- msi_page = kcalloc(num_pages, sizeof(*msi_page), GFP_KERNEL); - if (!msi_page) - return -ENOMEM; - for (i = 0; i < num_pages; i++) { - msi_page[i].phys = start; - msi_page[i].iova = start; - INIT_LIST_HEAD(&msi_page[i].list); - list_add(&msi_page[i].list, &cookie->msi_page_list); + msi_page = kmalloc(sizeof(*msi_page), GFP_KERNEL); + if (!msi_page) { + ret = -ENOMEM; + break; + } + msi_page->phys = start; + msi_page->iova = start; + INIT_LIST_HEAD(&msi_page->list); + list_add(&msi_page->list, &cookie->msi_page_list); start += iovad->granule; }
- return 0; + if (ret) { + list_for_each_entry_safe(msi_page, tmp, + &cookie->msi_page_list, list) { + if (msi_page->phys >= base && msi_page->phys < start) { + list_del(&msi_page->list); + kfree(msi_page); + } + } + } + + return ret; }
static int iova_reserve_pci_windows(struct pci_dev *dev,
Hi Marc, On 3/3/20 12:51 PM, Marc Zyngier wrote:
The way cookie_init_hw_msi_region() allocates the iommu_dma_msi_page structures doesn't match the way iommu_put_dma_cookie() frees them.
The former performs a single allocation of all the required structures, while the latter tries to free them one at a time. It doesn't quite work for the main use case (the GICv3 ITS where the range is 64kB) when the base ganule size is 4kB.
This leads to a nice slab corruption on teardown, which is easily observable by simply creating a VF on a SRIOV-capable device, and tearing it down immediately (no need to even make use of it).
Fix it by allocating iommu_dma_msi_page structures one at a time.
Fixes: 7c1b058c8b5a3 ("iommu/dma: Handle IOMMU API reserved regions")
Reviewed-by: Eric Auger eric.auger@redhat.com
Thanks
Eric
Signed-off-by: Marc Zyngier maz@kernel.org Cc: Robin Murphy robin.murphy@arm.com Cc: Joerg Roedel jroedel@suse.de Cc: Eric Auger eric.auger@redhat.com Cc: Will Deacon will@kernel.org Cc: stable@vger.kernel.org
drivers/iommu/dma-iommu.c | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-)
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index a2e96a5fd9a7..01fa64856c12 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -171,25 +171,37 @@ static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie, phys_addr_t start, phys_addr_t end) { struct iova_domain *iovad = &cookie->iovad;
- struct iommu_dma_msi_page *msi_page;
- int i, num_pages;
- struct iommu_dma_msi_page *msi_page, *tmp;
- int i, num_pages, ret = 0;
- phys_addr_t base;
- start -= iova_offset(iovad, start);
- base = start -= iova_offset(iovad, start); num_pages = iova_align(iovad, end - start) >> iova_shift(iovad);
- msi_page = kcalloc(num_pages, sizeof(*msi_page), GFP_KERNEL);
- if (!msi_page)
return -ENOMEM;
- for (i = 0; i < num_pages; i++) {
msi_page[i].phys = start;
msi_page[i].iova = start;
INIT_LIST_HEAD(&msi_page[i].list);
list_add(&msi_page[i].list, &cookie->msi_page_list);
msi_page = kmalloc(sizeof(*msi_page), GFP_KERNEL);
if (!msi_page) {
ret = -ENOMEM;
break;
}
msi_page->phys = start;
msi_page->iova = start;
INIT_LIST_HEAD(&msi_page->list);
start += iovad->granule; }list_add(&msi_page->list, &cookie->msi_page_list);
- return 0;
- if (ret) {
list_for_each_entry_safe(msi_page, tmp,
&cookie->msi_page_list, list) {
if (msi_page->phys >= base && msi_page->phys < start) {
list_del(&msi_page->list);
kfree(msi_page);
}
}
- }
- return ret;
} static int iova_reserve_pci_windows(struct pci_dev *dev,
On 03/03/2020 11:51 am, Marc Zyngier wrote:
The way cookie_init_hw_msi_region() allocates the iommu_dma_msi_page structures doesn't match the way iommu_put_dma_cookie() frees them.
The former performs a single allocation of all the required structures, while the latter tries to free them one at a time. It doesn't quite work for the main use case (the GICv3 ITS where the range is 64kB) when the base ganule size is 4kB.
This leads to a nice slab corruption on teardown, which is easily observable by simply creating a VF on a SRIOV-capable device, and tearing it down immediately (no need to even make use of it).
Fix it by allocating iommu_dma_msi_page structures one at a time.
Bleh, you know you're supposed to be using 64K pages on those things, right? :P
Fixes: 7c1b058c8b5a3 ("iommu/dma: Handle IOMMU API reserved regions") Signed-off-by: Marc Zyngier maz@kernel.org Cc: Robin Murphy robin.murphy@arm.com Cc: Joerg Roedel jroedel@suse.de Cc: Eric Auger eric.auger@redhat.com Cc: Will Deacon will@kernel.org Cc: stable@vger.kernel.org
drivers/iommu/dma-iommu.c | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-)
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index a2e96a5fd9a7..01fa64856c12 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -171,25 +171,37 @@ static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie, phys_addr_t start, phys_addr_t end) { struct iova_domain *iovad = &cookie->iovad;
- struct iommu_dma_msi_page *msi_page;
- int i, num_pages;
- struct iommu_dma_msi_page *msi_page, *tmp;
- int i, num_pages, ret = 0;
- phys_addr_t base;
- start -= iova_offset(iovad, start);
- base = start -= iova_offset(iovad, start); num_pages = iova_align(iovad, end - start) >> iova_shift(iovad);
- msi_page = kcalloc(num_pages, sizeof(*msi_page), GFP_KERNEL);
- if (!msi_page)
return -ENOMEM;
- for (i = 0; i < num_pages; i++) {
msi_page[i].phys = start;
msi_page[i].iova = start;
INIT_LIST_HEAD(&msi_page[i].list);
list_add(&msi_page[i].list, &cookie->msi_page_list);
msi_page = kmalloc(sizeof(*msi_page), GFP_KERNEL);
if (!msi_page) {
ret = -ENOMEM;
I think we can just return here and skip the cleanup below - by the time we get here the cookie itself has already been allocated and initialised, so even if iommu_dma_init_domain() fails someone else has already accepted the responsibility of calling iommu_put_dma_cookie() at some point later, which will clean up properly.
Cheers, Robin.
break;
}
msi_page->phys = start;
msi_page->iova = start;
INIT_LIST_HEAD(&msi_page->list);
start += iovad->granule; }list_add(&msi_page->list, &cookie->msi_page_list);
- return 0;
- if (ret) {
list_for_each_entry_safe(msi_page, tmp,
&cookie->msi_page_list, list) {
if (msi_page->phys >= base && msi_page->phys < start) {
list_del(&msi_page->list);
kfree(msi_page);
}
}
- }
- return ret; }
static int iova_reserve_pci_windows(struct pci_dev *dev,
On 2020-03-03 17:23, Robin Murphy wrote:
On 03/03/2020 11:51 am, Marc Zyngier wrote:
The way cookie_init_hw_msi_region() allocates the iommu_dma_msi_page structures doesn't match the way iommu_put_dma_cookie() frees them.
The former performs a single allocation of all the required structures, while the latter tries to free them one at a time. It doesn't quite work for the main use case (the GICv3 ITS where the range is 64kB) when the base ganule size is 4kB.
This leads to a nice slab corruption on teardown, which is easily observable by simply creating a VF on a SRIOV-capable device, and tearing it down immediately (no need to even make use of it).
Fix it by allocating iommu_dma_msi_page structures one at a time.
Bleh, you know you're supposed to be using 64K pages on those things, right? :P
lalalala... ;-)
[...]
if (!msi_page) {
ret = -ENOMEM;
I think we can just return here and skip the cleanup below - by the time we get here the cookie itself has already been allocated and initialised, so even if iommu_dma_init_domain() fails someone else has already accepted the responsibility of calling iommu_put_dma_cookie() at some point later, which will clean up properly.
Ah, that's a very good point. I'll refresh the patch with a simplified error handling.
Thanks,
M.
linux-stable-mirror@lists.linaro.org