On 2025-11-13 11:05, Pierre-Eric Pelloux-Prayer wrote:
This way the caller can select the one it wants to use.
Signed-off-by: Pierre-Eric Pelloux-Prayer pierre-eric.pelloux-prayer@amd.com
I agree with Christian's comment to eliminate the ring parameter where it's implied by the entity. Other than that, the patch is
Acked-by: Felix Kuehling felix.kuehling@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 75 +++++++++++-------- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 16 ++-- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 3 +- 5 files changed, 60 insertions(+), 41 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index 02c2479a8840..b59040a8771f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -38,7 +38,8 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, stime = ktime_get(); for (i = 0; i < n; i++) { struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence,
r = amdgpu_copy_buffer(ring, &adev->mman.default_entity.base, if (r) goto exit_do_move;saddr, daddr, size, NULL, &fence, false, 0);diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index e08f58de4b17..c06c132a753c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1321,8 +1321,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) if (r) goto out;
- r = amdgpu_fill_buffer(abo, 0, &bo->base._resv, &fence, true,
AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
- r = amdgpu_fill_buffer(&adev->mman.clear_entity, abo, 0, &bo->base._resv,
if (WARN_ON(r)) goto out;&fence, AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 42d448cd6a6d..c8d59ca2b3bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -164,6 +164,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, /**
- amdgpu_ttm_map_buffer - Map memory into the GART windows
- @entity: entity to run the window setup job
- @bo: buffer object to map
- @mem: memory object to map
- @mm_cur: range to map
@@ -176,7 +177,8 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
- Setup one of the GART windows to access a specific piece of memory or return
- the physical address for local memory.
*/ -static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, +static int amdgpu_ttm_map_buffer(struct drm_sched_entity *entity,
struct ttm_buffer_object *bo, struct ttm_resource *mem, struct amdgpu_res_cursor *mm_cur, unsigned int window, struct amdgpu_ring *ring,@@ -224,7 +226,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
- r = amdgpu_job_alloc_with_ib(adev, &adev->mman.default_entity.base,
- r = amdgpu_job_alloc_with_ib(adev, entity, AMDGPU_FENCE_OWNER_UNDEFINED, num_dw * 4 + num_bytes, AMDGPU_IB_POOL_DELAYED, &job,
@@ -274,6 +276,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, /**
- amdgpu_ttm_copy_mem_to_mem - Helper function for copy
- @adev: amdgpu device
- @entity: entity to run the jobs
- @src: buffer/address where to read from
- @dst: buffer/address where to write to
- @size: number of bytes to copy
@@ -288,6 +291,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, */ __attribute__((nonnull)) static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
struct drm_sched_entity *entity, const struct amdgpu_copy_mem *src, const struct amdgpu_copy_mem *dst, uint64_t size, bool tmz,@@ -320,12 +324,14 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, cur_size = min3(src_mm.size, dst_mm.size, 256ULL << 20); /* Map src to window 0 and dst to window 1. */
r = amdgpu_ttm_map_buffer(src->bo, src->mem, &src_mm,
r = amdgpu_ttm_map_buffer(entity, if (r) goto error;src->bo, src->mem, &src_mm, 0, ring, tmz, &cur_size, &from);
r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, &dst_mm,
r = amdgpu_ttm_map_buffer(entity, if (r) goto error;dst->bo, dst->mem, &dst_mm, 1, ring, tmz, &cur_size, &to);@@ -353,7 +359,7 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, write_compress_disable)); }
r = amdgpu_copy_buffer(ring, from, to, cur_size, resv,
if (r) goto error;r = amdgpu_copy_buffer(ring, entity, from, to, cur_size, resv, &next, true, copy_flags);@@ -394,7 +400,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, src.offset = 0; dst.offset = 0;
- r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
- r = amdgpu_ttm_copy_mem_to_mem(adev,
&adev->mman.move_entity.base,&src, &dst, new_mem->size, amdgpu_bo_encrypted(abo), bo->base.resv, &fence);@@ -406,8 +414,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) { struct dma_fence *wipe_fence = NULL;
r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,false, AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
r = amdgpu_fill_buffer(&adev->mman.move_entity,abo, 0, NULL, &wipe_fence, if (r) { goto error; } else if (wipe_fence) {AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);@@ -2223,16 +2232,15 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) } static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
struct drm_sched_entity *entity, unsigned int num_dw, struct dma_resv *resv, bool vm_needs_flush, struct amdgpu_job **job,
bool delayed, u64 k_job_id)
{ enum amdgpu_ib_pool_type pool = AMDGPU_IB_POOL_DELAYED; int r;u64 k_job_id)
- struct drm_sched_entity *entity = delayed ? &adev->mman.clear_entity.base :
r = amdgpu_job_alloc_with_ib(adev, entity, AMDGPU_FENCE_OWNER_UNDEFINED, num_dw * 4, pool, job, k_job_id);&adev->mman.move_entity.base;@@ -2252,7 +2260,9 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev, DMA_RESV_USAGE_BOOKKEEP); } -int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, +int amdgpu_copy_buffer(struct amdgpu_ring *ring,
struct drm_sched_entity *entity,uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, struct dma_resv *resv, struct dma_fence **fence,@@ -2274,8 +2284,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, max_bytes = adev->mman.buffer_funcs->copy_max_bytes; num_loops = DIV_ROUND_UP(byte_count, max_bytes); num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
- r = amdgpu_ttm_prepare_job(adev, num_dw,
resv, vm_needs_flush, &job, false,
- r = amdgpu_ttm_prepare_job(adev, entity, num_dw,
if (r) return r;resv, vm_needs_flush, &job, AMDGPU_KERNEL_JOB_ID_TTM_COPY_BUFFER);@@ -2304,11 +2314,13 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, return r; } -static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, +static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring,
struct drm_sched_entity *entity,uint32_t src_data, uint64_t dst_addr, uint32_t byte_count, struct dma_resv *resv, struct dma_fence **fence,
bool vm_needs_flush, bool delayed,
{ struct amdgpu_device *adev = ring->adev;bool vm_needs_flush, u64 k_job_id)@@ -2321,8 +2333,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, max_bytes = adev->mman.buffer_funcs->fill_max_bytes; num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes); num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8);
- r = amdgpu_ttm_prepare_job(adev, num_dw, resv, vm_needs_flush,
&job, delayed, k_job_id);
- r = amdgpu_ttm_prepare_job(adev, entity, num_dw, resv,
if (r) return r;vm_needs_flush, &job, k_job_id);@@ -2386,13 +2398,14 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, /* Never clear more than 256MiB at once to avoid timeouts */ size = min(cursor.size, 256ULL << 20);
r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
r = amdgpu_ttm_map_buffer(&adev->mman.clear_entity.base, if (r) goto err;&bo->tbo, bo->tbo.resource, &cursor, 1, ring, false, &size, &addr);
r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,&next, true, true,
r = amdgpu_ttm_fill_mem(ring, &adev->mman.clear_entity.base, 0, addr, size, resv, if (r) goto err;&next, true, AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER);@@ -2408,12 +2421,12 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, return r; } -int amdgpu_fill_buffer(struct amdgpu_bo *bo,
uint32_t src_data,struct dma_resv *resv,struct dma_fence **f,bool delayed,u64 k_job_id)+int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity,
struct amdgpu_bo *bo,uint32_t src_data,struct dma_resv *resv,struct dma_fence **f, { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;u64 k_job_id)@@ -2437,13 +2450,15 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, /* Never fill more than 256MiB at once to avoid timeouts */ cur_size = min(dst.size, 256ULL << 20);
r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &dst,
r = amdgpu_ttm_map_buffer(&entity->base, if (r) goto error;&bo->tbo, bo->tbo.resource, &dst, 1, ring, false, &cur_size, &to);
r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv,&next, true, delayed, k_job_id);
r = amdgpu_ttm_fill_mem(ring, &entity->base,src_data, to, cur_size, resv, if (r) goto error;&next, true, k_job_id);diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index d2295d6c2b67..e1655f86a016 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -167,7 +167,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev); void amdgpu_ttm_fini(struct amdgpu_device *adev); void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable); -int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, +int amdgpu_copy_buffer(struct amdgpu_ring *ring,
struct drm_sched_entity *entity,uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, struct dma_resv *resv, struct dma_fence **fence,@@ -175,12 +177,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, struct dma_resv *resv, struct dma_fence **fence); -int amdgpu_fill_buffer(struct amdgpu_bo *bo,
uint32_t src_data,struct dma_resv *resv,struct dma_fence **fence,bool delayed,u64 k_job_id);+int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity,
struct amdgpu_bo *bo,uint32_t src_data,struct dma_resv *resv,struct dma_fence **f,u64 k_job_id);int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo); void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index d74ff6e90590..09756132fa1b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -157,7 +157,8 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys, goto out_unlock; }
r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
r = amdgpu_copy_buffer(ring, &entity->base, if (r) { dev_err(adev->dev, "fail %d to copy memory\n", r);gart_s, gart_d, size * PAGE_SIZE, NULL, &next, true, 0);
linaro-mm-sig@lists.linaro.org