On 11/13/25 17:05, Pierre-Eric Pelloux-Prayer wrote:
This way the caller can select the one it wants to use.
Signed-off-by: Pierre-Eric Pelloux-Prayer pierre-eric.pelloux-prayer@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 75 +++++++++++-------- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 16 ++-- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 3 +- 5 files changed, 60 insertions(+), 41 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index 02c2479a8840..b59040a8771f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -38,7 +38,8 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, stime = ktime_get(); for (i = 0; i < n; i++) { struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence,
r = amdgpu_copy_buffer(ring, &adev->mman.default_entity.base, if (r) goto exit_do_move;saddr, daddr, size, NULL, &fence, false, 0);diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index e08f58de4b17..c06c132a753c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1321,8 +1321,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) if (r) goto out;
- r = amdgpu_fill_buffer(abo, 0, &bo->base._resv, &fence, true,
AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
- r = amdgpu_fill_buffer(&adev->mman.clear_entity, abo, 0, &bo->base._resv,
if (WARN_ON(r)) goto out;&fence, AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 42d448cd6a6d..c8d59ca2b3bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -164,6 +164,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, /**
- amdgpu_ttm_map_buffer - Map memory into the GART windows
- @entity: entity to run the window setup job
- @bo: buffer object to map
- @mem: memory object to map
- @mm_cur: range to map
@@ -176,7 +177,8 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
- Setup one of the GART windows to access a specific piece of memory or return
- the physical address for local memory.
*/ -static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, +static int amdgpu_ttm_map_buffer(struct drm_sched_entity *entity,
struct ttm_buffer_object *bo,
Probably better to split this patch into multiple patches.
One which changes amdgpu_ttm_map_buffer() and then another one or two for the higher level copy_buffer and fill_buffer functions.
struct ttm_resource *mem, struct amdgpu_res_cursor *mm_cur, unsigned int window, struct amdgpu_ring *ring,@@ -224,7 +226,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
- r = amdgpu_job_alloc_with_ib(adev, &adev->mman.default_entity.base,
- r = amdgpu_job_alloc_with_ib(adev, entity, AMDGPU_FENCE_OWNER_UNDEFINED, num_dw * 4 + num_bytes, AMDGPU_IB_POOL_DELAYED, &job,
@@ -274,6 +276,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, /**
- amdgpu_ttm_copy_mem_to_mem - Helper function for copy
- @adev: amdgpu device
- @entity: entity to run the jobs
- @src: buffer/address where to read from
- @dst: buffer/address where to write to
- @size: number of bytes to copy
@@ -288,6 +291,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, */ __attribute__((nonnull)) static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
struct drm_sched_entity *entity, const struct amdgpu_copy_mem *src, const struct amdgpu_copy_mem *dst, uint64_t size, bool tmz,@@ -320,12 +324,14 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, cur_size = min3(src_mm.size, dst_mm.size, 256ULL << 20); /* Map src to window 0 and dst to window 1. */
r = amdgpu_ttm_map_buffer(src->bo, src->mem, &src_mm,
r = amdgpu_ttm_map_buffer(entity, if (r) goto error;src->bo, src->mem, &src_mm, 0, ring, tmz, &cur_size, &from);
r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, &dst_mm,
r = amdgpu_ttm_map_buffer(entity, if (r) goto error;dst->bo, dst->mem, &dst_mm, 1, ring, tmz, &cur_size, &to);@@ -353,7 +359,7 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, write_compress_disable)); }
r = amdgpu_copy_buffer(ring, from, to, cur_size, resv,
if (r) goto error;r = amdgpu_copy_buffer(ring, entity, from, to, cur_size, resv, &next, true, copy_flags);@@ -394,7 +400,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, src.offset = 0; dst.offset = 0;
- r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
- r = amdgpu_ttm_copy_mem_to_mem(adev,
&adev->mman.move_entity.base,&src, &dst, new_mem->size, amdgpu_bo_encrypted(abo), bo->base.resv, &fence);@@ -406,8 +414,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) { struct dma_fence *wipe_fence = NULL;
r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,false, AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
r = amdgpu_fill_buffer(&adev->mman.move_entity,abo, 0, NULL, &wipe_fence, if (r) { goto error; } else if (wipe_fence) {AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);@@ -2223,16 +2232,15 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) } static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
struct drm_sched_entity *entity, unsigned int num_dw, struct dma_resv *resv, bool vm_needs_flush, struct amdgpu_job **job,
bool delayed, u64 k_job_id)
u64 k_job_id){ enum amdgpu_ib_pool_type pool = AMDGPU_IB_POOL_DELAYED; int r;
- struct drm_sched_entity *entity = delayed ? &adev->mman.clear_entity.base :
r = amdgpu_job_alloc_with_ib(adev, entity, AMDGPU_FENCE_OWNER_UNDEFINED, num_dw * 4, pool, job, k_job_id);&adev->mman.move_entity.base;@@ -2252,7 +2260,9 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev, DMA_RESV_USAGE_BOOKKEEP); } -int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, +int amdgpu_copy_buffer(struct amdgpu_ring *ring,
struct drm_sched_entity *entity,uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, struct dma_resv *resv, struct dma_fence **fence,@@ -2274,8 +2284,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, max_bytes = adev->mman.buffer_funcs->copy_max_bytes; num_loops = DIV_ROUND_UP(byte_count, max_bytes); num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
- r = amdgpu_ttm_prepare_job(adev, num_dw,
resv, vm_needs_flush, &job, false,
- r = amdgpu_ttm_prepare_job(adev, entity, num_dw,
if (r) return r;resv, vm_needs_flush, &job, AMDGPU_KERNEL_JOB_ID_TTM_COPY_BUFFER);@@ -2304,11 +2314,13 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, return r; } -static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, +static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring,
struct drm_sched_entity *entity,uint32_t src_data, uint64_t dst_addr, uint32_t byte_count, struct dma_resv *resv, struct dma_fence **fence,
bool vm_needs_flush, bool delayed,
bool vm_needs_flush, u64 k_job_id){ struct amdgpu_device *adev = ring->adev; @@ -2321,8 +2333,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, max_bytes = adev->mman.buffer_funcs->fill_max_bytes; num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes); num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8);
- r = amdgpu_ttm_prepare_job(adev, num_dw, resv, vm_needs_flush,
&job, delayed, k_job_id);
- r = amdgpu_ttm_prepare_job(adev, entity, num_dw, resv,
if (r) return r;vm_needs_flush, &job, k_job_id);@@ -2386,13 +2398,14 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, /* Never clear more than 256MiB at once to avoid timeouts */ size = min(cursor.size, 256ULL << 20);
r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
r = amdgpu_ttm_map_buffer(&adev->mman.clear_entity.base, if (r) goto err;&bo->tbo, bo->tbo.resource, &cursor, 1, ring, false, &size, &addr);
r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,&next, true, true,
r = amdgpu_ttm_fill_mem(ring, &adev->mman.clear_entity.base, 0, addr, size, resv, if (r) goto err;&next, true, AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER);@@ -2408,12 +2421,12 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, return r; } -int amdgpu_fill_buffer(struct amdgpu_bo *bo,
uint32_t src_data,struct dma_resv *resv,struct dma_fence **f,bool delayed,u64 k_job_id)+int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity,
struct amdgpu_bo *bo,uint32_t src_data,struct dma_resv *resv,struct dma_fence **f,u64 k_job_id){ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; @@ -2437,13 +2450,15 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, /* Never fill more than 256MiB at once to avoid timeouts */ cur_size = min(dst.size, 256ULL << 20);
r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &dst,
r = amdgpu_ttm_map_buffer(&entity->base, if (r) goto error;&bo->tbo, bo->tbo.resource, &dst, 1, ring, false, &cur_size, &to);
r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv,&next, true, delayed, k_job_id);
r = amdgpu_ttm_fill_mem(ring, &entity->base,src_data, to, cur_size, resv, if (r) goto error;&next, true, k_job_id);diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index d2295d6c2b67..e1655f86a016 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -167,7 +167,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev); void amdgpu_ttm_fini(struct amdgpu_device *adev); void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable); -int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, +int amdgpu_copy_buffer(struct amdgpu_ring *ring,
struct drm_sched_entity *entity,
If I'm not completely mistaken you should be able to drop the ring argument since that can be determined from the entity.
Apart from that looks rather good to me.
Regards, Christian.
uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, struct dma_resv *resv, struct dma_fence **fence,@@ -175,12 +177,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, struct dma_resv *resv, struct dma_fence **fence); -int amdgpu_fill_buffer(struct amdgpu_bo *bo,
uint32_t src_data,struct dma_resv *resv,struct dma_fence **fence,bool delayed,u64 k_job_id);+int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity,
struct amdgpu_bo *bo,uint32_t src_data,struct dma_resv *resv,struct dma_fence **f,u64 k_job_id);int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo); void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index d74ff6e90590..09756132fa1b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -157,7 +157,8 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys, goto out_unlock; }
r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
r = amdgpu_copy_buffer(ring, &entity->base, if (r) { dev_err(adev->dev, "fail %d to copy memory\n", r);gart_s, gart_d, size * PAGE_SIZE, NULL, &next, true, 0);
linaro-mm-sig@lists.linaro.org