On 12/11/25 16:08, Philipp Stanner wrote:
On Thu, 2025-12-11 at 13:16 +0100, Christian König wrote:
This allows amdkfd_fences to outlive the amdgpu module.
v2: implement Felix suggestion to lock the fence while signaling it. v3: fix typos v4: fix return code in signal_eviction_fence
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 7 +++ .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | 44 +++++++++---------- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 4 +- 4 files changed, 31 insertions(+), 26 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 8bdfcde2029b..2f2b277cfaed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -196,6 +196,7 @@ int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data); #endif #if IS_ENABLED(CONFIG_HSA_AMD) bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); +bool amdkfd_fence_signal(struct dma_fence *f); struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo); int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni, @@ -210,6 +211,12 @@ bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) return false; } +static inline +bool amdkfd_fence_signal(struct dma_fence *f) +{
- return false;
+}
Huh? What's that?
That function seems to be just a NOP. It's return code is used nowhere, is it?
It's the dummy which is used when CONFIG_HSA_AMD isn't enabled.
Not sure if it's actually used or not, but we have dummies for all functions declared in this file.
static inline struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c index 09c919f72b6c..9cd413e325f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c @@ -127,29 +127,9 @@ static bool amdkfd_fence_enable_signaling(struct dma_fence *f) if (!svm_range_schedule_evict_svm_bo(fence)) return true; }
- return false;
-}
-/**
- amdkfd_fence_release - callback that fence can be freed
- @f: dma_fence
- This function is called when the reference count becomes zero.
- Drops the mm_struct reference and RCU schedules freeing up the fence.
- */
-static void amdkfd_fence_release(struct dma_fence *f) -{
- struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
- /* Unconditionally signal the fence. The process is getting
* terminated.*/- if (WARN_ON(!fence))
return; /* Not an amdgpu_amdkfd_fence */mmdrop(fence->mm);
- kfree_rcu(f, rcu);
- fence->mm = NULL;
- return false;
} /** @@ -174,9 +154,27 @@ bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) return false; } +bool amdkfd_fence_signal(struct dma_fence *f) +{
- struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
- unsigned long flags;
- bool was_signaled;
- dma_fence_lock_irqsave(f, flags);
- if (fence->mm) {
mmdrop(fence->mm);fence->mm = NULL;- }
- was_signaled = dma_fence_is_signaled_locked(f);
- if (!was_signaled)
dma_fence_signal_locked(f);- dma_fence_unlock_irqrestore(f, flags);
- return was_signaled;
+}
static const struct dma_fence_ops amdkfd_fence_ops = { .get_driver_name = amdkfd_fence_get_driver_name, .get_timeline_name = amdkfd_fence_get_timeline_name, .enable_signaling = amdkfd_fence_enable_signaling,
- .release = amdkfd_fence_release,
}; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index bb252ec43733..2cf39e3d3fae 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1173,7 +1173,7 @@ static void kfd_process_wq_release(struct work_struct *work) synchronize_rcu(); ef = rcu_access_pointer(p->ef); if (ef)
dma_fence_signal(ef);
amdkfd_fence_signal(ef);kfd_process_remove_sysfs(p); kfd_debugfs_remove_process(p); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 97c2270f278f..0e94f3a976b1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -428,7 +428,7 @@ static void svm_range_bo_release(struct kref *kref) if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) /* We're not in the eviction worker. Signal the fence. */
dma_fence_signal(&svm_bo->eviction_fence->base);
amdkfd_fence_signal(&svm_bo->eviction_fence->base);dma_fence_put(&svm_bo->eviction_fence->base); amdgpu_bo_unref(&svm_bo->bo); kfree(svm_bo); @@ -3628,7 +3628,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work) mmap_read_unlock(mm); mmput(mm);
- dma_fence_signal(&svm_bo->eviction_fence->base);
- amdkfd_fence_signal(&svm_bo->eviction_fence->base);
And why do you do those changes and why doesn't the commit message explain it?
You stop signalling those fences, after all.
Hui? I don't stop signaling the fences. I just delegate signaling into a separate helper function which does some extra cleanup before signaling the fence.
Regards, Christian.
P.
/* This is the last reference to svm_bo, after svm_range_vram_node_free * has been called in svm_migrate_vram_to_ram
linaro-mm-sig@lists.linaro.org