On 11/13/25 17:05, Pierre-Eric Pelloux-Prayer wrote:
Until now ttm stored a single pipelined eviction fence which means drivers had to use a single entity for these evictions.
To lift this requirement, this commit allows up to 8 entities to be used.
Ideally a dma_resv object would have been used as a container of the eviction fences, but the locking rules makes it complex. dma_resv all have the same ww_class, which means "Attempting to lock more mutexes after ww_acquire_done." is an error.
One alternative considered was to introduced a 2nd ww_class for specific resv to hold a single "transient" lock (= the resv lock would only be held for a short period, without taking any other locks).
The other option, is to statically reserve a fence array, and extend the existing code to deal with N fences, instead of 1.
The driver is still responsible to reserve the correct number of fence slots.
v2:
- simplified code
- dropped n_fences
- name changes
Signed-off-by: Pierre-Eric Pelloux-Prayer pierre-eric.pelloux-prayer@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 8 ++-- .../gpu/drm/ttm/tests/ttm_bo_validate_test.c | 11 +++-- drivers/gpu/drm/ttm/tests/ttm_resource_test.c | 5 +- drivers/gpu/drm/ttm/ttm_bo.c | 47 ++++++++++--------- drivers/gpu/drm/ttm/ttm_bo_util.c | 38 ++++++++++++--- drivers/gpu/drm/ttm/ttm_resource.c | 31 +++++++----- include/drm/ttm/ttm_resource.h | 29 ++++++++---- 7 files changed, 109 insertions(+), 60 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 326476089db3..3b46a24a8c48 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -2156,7 +2156,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) { struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); uint64_t size;
- int r;
- int r, i;
if (!adev->mman.initialized || amdgpu_in_reset(adev) || adev->mman.buffer_funcs_enabled == enable || adev->gmc.is_app_apu) @@ -2190,8 +2190,10 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) } else { drm_sched_entity_destroy(&adev->mman.high_pr); drm_sched_entity_destroy(&adev->mman.low_pr);
dma_fence_put(man->move);man->move = NULL;
for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {dma_fence_put(man->eviction_fences[i]);man->eviction_fences[i] = NULL;}
That code should have been a TTM function in the first place.
I suggest to just call ttm_resource_manager_cleanup() here instead and add this as comment:
/* Drop all the old fences since re-creating the scheduler entities will allocate next contexts */
Apart from that looks good to me.
Regards, Christian.
} /* this just adjusts TTM size idea, which sets lpfn to the correct value */ diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c index 3148f5d3dbd6..8f71906c4238 100644 --- a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c +++ b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c @@ -651,7 +651,7 @@ static void ttm_bo_validate_move_fence_signaled(struct kunit *test) int err; man = ttm_manager_type(priv->ttm_dev, mem_type);
- man->move = dma_fence_get_stub();
- man->eviction_fences[0] = dma_fence_get_stub();
bo = ttm_bo_kunit_init(test, test->priv, size, NULL); bo->type = bo_type; @@ -668,7 +668,7 @@ static void ttm_bo_validate_move_fence_signaled(struct kunit *test) KUNIT_EXPECT_EQ(test, ctx.bytes_moved, size); ttm_bo_put(bo);
- dma_fence_put(man->move);
- dma_fence_put(man->eviction_fences[0]);
} static const struct ttm_bo_validate_test_case ttm_bo_validate_wait_cases[] = { @@ -732,9 +732,9 @@ static void ttm_bo_validate_move_fence_not_signaled(struct kunit *test) spin_lock_init(&fence_lock); man = ttm_manager_type(priv->ttm_dev, fst_mem);
- man->move = alloc_mock_fence(test);
- man->eviction_fences[0] = alloc_mock_fence(test);
- task = kthread_create(threaded_fence_signal, man->move, "move-fence-signal");
- task = kthread_create(threaded_fence_signal, man->eviction_fences[0], "move-fence-signal"); if (IS_ERR(task)) KUNIT_FAIL(test, "Couldn't create move fence signal task\n");
@@ -742,7 +742,8 @@ static void ttm_bo_validate_move_fence_not_signaled(struct kunit *test) err = ttm_bo_validate(bo, placement_val, &ctx_val); dma_resv_unlock(bo->base.resv);
- dma_fence_wait_timeout(man->move, false, MAX_SCHEDULE_TIMEOUT);
- dma_fence_wait_timeout(man->eviction_fences[0], false, MAX_SCHEDULE_TIMEOUT);
- man->eviction_fences[0] = NULL;
KUNIT_EXPECT_EQ(test, err, 0); KUNIT_EXPECT_EQ(test, ctx_val.bytes_moved, size); diff --git a/drivers/gpu/drm/ttm/tests/ttm_resource_test.c b/drivers/gpu/drm/ttm/tests/ttm_resource_test.c index e6ea2bd01f07..c0e4e35e0442 100644 --- a/drivers/gpu/drm/ttm/tests/ttm_resource_test.c +++ b/drivers/gpu/drm/ttm/tests/ttm_resource_test.c @@ -207,6 +207,7 @@ static void ttm_resource_manager_init_basic(struct kunit *test) struct ttm_resource_test_priv *priv = test->priv; struct ttm_resource_manager *man; size_t size = SZ_16K;
- int i;
man = kunit_kzalloc(test, sizeof(*man), GFP_KERNEL); KUNIT_ASSERT_NOT_NULL(test, man); @@ -216,8 +217,8 @@ static void ttm_resource_manager_init_basic(struct kunit *test) KUNIT_ASSERT_PTR_EQ(test, man->bdev, priv->devs->ttm_dev); KUNIT_ASSERT_EQ(test, man->size, size); KUNIT_ASSERT_EQ(test, man->usage, 0);
- KUNIT_ASSERT_NULL(test, man->move);
- KUNIT_ASSERT_NOT_NULL(test, &man->move_lock);
- for (i = 0; i < TTM_NUM_MOVE_FENCES; i++)
KUNIT_ASSERT_NULL(test, man->eviction_fences[i]);for (int i = 0; i < TTM_MAX_BO_PRIORITY; ++i) KUNIT_ASSERT_TRUE(test, list_empty(&man->lru[i])); diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index f4d9e68b21e7..0b3732ed6f6c 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -658,34 +658,35 @@ void ttm_bo_unpin(struct ttm_buffer_object *bo) EXPORT_SYMBOL(ttm_bo_unpin); /*
- Add the last move fence to the BO as kernel dependency and reserve a new
- fence slot.
- Add the pipelined eviction fencesto the BO as kernel dependency and reserve new
*/
- fence slots.
-static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo,
struct ttm_resource_manager *man,bool no_wait_gpu)+static int ttm_bo_add_pipelined_eviction_fences(struct ttm_buffer_object *bo,
struct ttm_resource_manager *man,bool no_wait_gpu){ struct dma_fence *fence;
- int ret;
- int i;
- spin_lock(&man->move_lock);
- fence = dma_fence_get(man->move);
- spin_unlock(&man->move_lock);
- spin_lock(&man->eviction_lock);
- for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
fence = man->eviction_fences[i];if (!fence)continue;
- if (!fence)
return 0;- if (no_wait_gpu) {
ret = dma_fence_is_signaled(fence) ? 0 : -EBUSY;dma_fence_put(fence);return ret;
if (no_wait_gpu) {if (!dma_fence_is_signaled(fence)) {spin_unlock(&man->eviction_lock);return -EBUSY;}} else {dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL); }}- spin_unlock(&man->eviction_lock);
- dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL);
- ret = dma_resv_reserve_fences(bo->base.resv, 1);
- dma_fence_put(fence);
- return ret;
- /* TODO: this call should be removed. */
- return dma_resv_reserve_fences(bo->base.resv, 1);
} /** @@ -718,7 +719,7 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo, int i, ret; ticket = dma_resv_locking_ctx(bo->base.resv);
- ret = dma_resv_reserve_fences(bo->base.resv, 1);
- ret = dma_resv_reserve_fences(bo->base.resv, TTM_NUM_MOVE_FENCES); if (unlikely(ret)) return ret;
@@ -757,7 +758,7 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo, return ret; }
ret = ttm_bo_add_move_fence(bo, man, ctx->no_wait_gpu);
if (unlikely(ret)) { ttm_resource_free(bo, res); if (ret == -EBUSY)ret = ttm_bo_add_pipelined_eviction_fences(bo, man, ctx->no_wait_gpu);diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index acbbca9d5c92..2ff35d55e462 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -258,7 +258,7 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, ret = dma_resv_trylock(&fbo->base.base._resv); WARN_ON(!ret);
- ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1);
- ret = dma_resv_reserve_fences(&fbo->base.base._resv, TTM_NUM_MOVE_FENCES); if (ret) { dma_resv_unlock(&fbo->base.base._resv); kfree(fbo);
@@ -646,20 +646,44 @@ static void ttm_bo_move_pipeline_evict(struct ttm_buffer_object *bo, { struct ttm_device *bdev = bo->bdev; struct ttm_resource_manager *from;
- struct dma_fence *tmp;
- int i;
from = ttm_manager_type(bdev, bo->resource->mem_type); /** * BO doesn't have a TTM we need to bind/unbind. Just remember
* this eviction and free up the allocation
* this eviction and free up the allocation.* The fence will be saved in the first free slot or in the slot* already used to store a fence from the same context. Since* drivers can't use more than TTM_NUM_MOVE_FENCES contexts for */* evictions we should always find a slot to use.
- spin_lock(&from->move_lock);
- if (!from->move || dma_fence_is_later(fence, from->move)) {
dma_fence_put(from->move);from->move = dma_fence_get(fence);
- spin_lock(&from->eviction_lock);
- for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
tmp = from->eviction_fences[i];if (!tmp)break;if (fence->context != tmp->context)continue;if (dma_fence_is_later(fence, tmp)) {dma_fence_put(tmp);break;}goto unlock;- }
- if (i < TTM_NUM_MOVE_FENCES) {
from->eviction_fences[i] = dma_fence_get(fence);- } else {
WARN(1, "not enough fence slots for all fence contexts");spin_unlock(&from->eviction_lock);dma_fence_wait(fence, false); }goto end;
- spin_unlock(&from->move_lock);
+unlock:
- spin_unlock(&from->eviction_lock);
+end: ttm_resource_free(bo, &bo->resource); } diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index e2c82ad07eb4..62c34cafa387 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -523,14 +523,15 @@ void ttm_resource_manager_init(struct ttm_resource_manager *man, { unsigned i;
- spin_lock_init(&man->move_lock); man->bdev = bdev; man->size = size; man->usage = 0;
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) INIT_LIST_HEAD(&man->lru[i]);
- man->move = NULL;
- spin_lock_init(&man->eviction_lock);
- for (i = 0; i < TTM_NUM_MOVE_FENCES; i++)
man->eviction_fences[i] = NULL;} EXPORT_SYMBOL(ttm_resource_manager_init); @@ -551,7 +552,7 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, .no_wait_gpu = false, }; struct dma_fence *fence;
- int ret;
- int ret, i;
do { ret = ttm_bo_evict_first(bdev, man, &ctx); @@ -561,18 +562,24 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, if (ret && ret != -ENOENT) return ret;
- spin_lock(&man->move_lock);
- fence = dma_fence_get(man->move);
- spin_unlock(&man->move_lock);
- ret = 0;
- if (fence) {
ret = dma_fence_wait(fence, false);dma_fence_put(fence);if (ret)return ret;
- spin_lock(&man->eviction_lock);
- for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
fence = man->eviction_fences[i];if (fence && !dma_fence_is_signaled(fence)) {dma_fence_get(fence);spin_unlock(&man->eviction_lock);ret = dma_fence_wait(fence, false);dma_fence_put(fence);if (ret)return ret;spin_lock(&man->eviction_lock); }}- spin_unlock(&man->eviction_lock);
- return 0;
- return ret;
} EXPORT_SYMBOL(ttm_resource_manager_evict_all); diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h index f49daa504c36..50e6added509 100644 --- a/include/drm/ttm/ttm_resource.h +++ b/include/drm/ttm/ttm_resource.h @@ -50,6 +50,15 @@ struct io_mapping; struct sg_table; struct scatterlist; +/**
- define TTM_NUM_MOVE_FENCES - How many entities can be used for evictions
- Pipelined evictions can be spread on multiple entities. This
- is the max number of entities that can be used by the driver
- for that purpose.
- */
+#define TTM_NUM_MOVE_FENCES 8
/**
- enum ttm_lru_item_type - enumerate ttm_lru_item subclasses
*/ @@ -180,8 +189,8 @@ struct ttm_resource_manager_func {
- @size: Size of the managed region.
- @bdev: ttm device this manager belongs to
- @func: structure pointer implementing the range manager. See above
- @move_lock: lock for move fence
- @move: The fence of the last pipelined move operation.
- @eviction_lock: lock for eviction fences
- @eviction_fences: The fences of the last pipelined move operation.
- @lru: The lru list for this memory type.
- This structure is used to identify and manage memory types for a device.
@@ -195,12 +204,12 @@ struct ttm_resource_manager { struct ttm_device *bdev; uint64_t size; const struct ttm_resource_manager_func *func;
- spinlock_t move_lock;
- /*
* Protected by @move_lock.
- /* This is very similar to a dma_resv object, but locking rules make
*/* it difficult to use one in this context.
- struct dma_fence *move;
- spinlock_t eviction_lock;
- struct dma_fence *eviction_fences[TTM_NUM_MOVE_FENCES];
/* * Protected by the bdev->lru_lock. @@ -421,8 +430,12 @@ static inline bool ttm_resource_manager_used(struct ttm_resource_manager *man) static inline void ttm_resource_manager_cleanup(struct ttm_resource_manager *man) {
- dma_fence_put(man->move);
- man->move = NULL;
- int i;
- for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
dma_fence_put(man->eviction_fences[i]);man->eviction_fences[i] = NULL;- }
} void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk);
linaro-mm-sig@lists.linaro.org