The Documentation/DMA-API-HOWTO.txt states that the dma_map_sg() function returns the number of the created entries in the DMA address space. However the subsequent calls to the dma_sync_sg_for_{device,cpu}() and dma_unmap_sg must be called with the original number of the entries passed to the dma_map_sg().
struct sg_table is a common structure used for describing a non-contiguous memory buffer, used commonly in the DRM and graphics subsystems. It consists of a scatterlist with memory pages and DMA addresses (sgl entry), as well as the number of scatterlist entries: CPU pages (orig_nents entry) and DMA mapped pages (nents entry).
It turned out that it was a common mistake to misuse nents and orig_nents entries, calling DMA-mapping functions with a wrong number of entries or ignoring the number of mapped entries returned by the dma_map_sg() function.
To avoid such issues, lets use a common dma-mapping wrappers operating directly on the struct sg_table objects and use scatterlist page iterators where possible. This, almost always, hides references to the nents and orig_nents entries, making the code robust, easier to follow and copy/paste safe.
Signed-off-by: Marek Szyprowski m.szyprowski@samsung.com Reviewed-by: Christian König christian.koenig@amd.com --- For more information, see '[PATCH v5 00/38] DRM: fix struct sg_table nents vs. orig_nents misuse' thread: https://lore.kernel.org/linux-iommu/20200513132114.6046-1-m.szyprowski@samsu... --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 9 +++------ drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 8 ++++---- 3 files changed, 10 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 43d8ed7..519ce44 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -307,8 +307,8 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach, if (IS_ERR(sgt)) return sgt;
- if (!dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir, - DMA_ATTR_SKIP_CPU_SYNC)) + if (dma_map_sgtable(attach->dev, sgt, dir, + DMA_ATTR_SKIP_CPU_SYNC)) goto error_free; break;
@@ -349,7 +349,7 @@ static void amdgpu_dma_buf_unmap(struct dma_buf_attachment *attach, struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
if (sgt->sgl->page_link) { - dma_unmap_sg(attach->dev, sgt->sgl, sgt->nents, dir); + dma_unmap_sgtable(attach->dev, sgt, dir, 0); sg_free_table(sgt); kfree(sgt); } else { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 9cbecd5..57a5d56 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1024,7 +1024,6 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) { struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); struct amdgpu_ttm_tt *gtt = (void *)ttm; - unsigned nents; int r;
int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); @@ -1039,9 +1038,8 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) goto release_sg;
/* Map SG to device */ - r = -ENOMEM; - nents = dma_map_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); - if (nents == 0) + r = dma_map_sgtable(adev->dev, ttm->sg, direction, 0); + if (r) goto release_sg;
/* convert SG to linear array of pages and dma addresses */ @@ -1072,8 +1070,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) return;
/* unmap the pages mapped to the device */ - dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); - + dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0); sg_free_table(ttm->sg);
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index d399e58..75495a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -477,11 +477,11 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, if (r) goto error_free;
- for_each_sg((*sgt)->sgl, sg, num_entries, i) + for_each_sgtable_sg(*sgt, sg, i) sg->length = 0;
node = mem->mm_node; - for_each_sg((*sgt)->sgl, sg, num_entries, i) { + for_each_sgtable_sg(*sgt, sg, i) { phys_addr_t phys = (node->start << PAGE_SHIFT) + adev->gmc.aper_base; size_t size = node->size << PAGE_SHIFT; @@ -501,7 +501,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, return 0;
error_unmap: - for_each_sg((*sgt)->sgl, sg, num_entries, i) { + for_each_sgtable_sg(*sgt, sg, i) { if (!sg->length) continue;
@@ -532,7 +532,7 @@ void amdgpu_vram_mgr_free_sgt(struct amdgpu_device *adev, struct scatterlist *sg; int i;
- for_each_sg(sgt->sgl, sg, sgt->nents, i) + for_each_sgtable_sg(sgt, sg, i) dma_unmap_resource(dev, sg->dma_address, sg->length, dir, DMA_ATTR_SKIP_CPU_SYNC);