Hi everyone,
dma_fences have ever lived under the tyranny dictated by the module lifetime of their issuer, leading to crashes should anybody still holding a reference to a dma_fence when the module of the issuer was unloaded.
The basic problem is that when buffer are shared between drivers dma_fence objects can leak into external drivers and stay there even after they are signaled. The dma_resv object for example only lazy releases dma_fences.
So what happens is that when the module who originally created the dma_fence unloads the dma_fence_ops function table becomes unavailable as well and so any attempt to release the fence crashes the system.
Previously various approaches have been discussed, including changing the locking semantics of the dma_fence callbacks (by me) as well as using the drm scheduler as intermediate layer (by Sima) to disconnect dma_fences from their actual users, but none of them are actually solving all problems.
Tvrtko did some really nice prerequisite work by protecting the returned strings of the dma_fence_ops by RCU. This way dma_fence creators where able to just wait for an RCU grace period after fence signaling before they could be save to free those data structures.
Now this patch set here goes a step further and protects the whole dma_fence_ops structure by RCU, so that after the fence signals the pointer to the dma_fence_ops is set to NULL when there is no wait nor release callback given. All functionality which use the dma_fence_ops reference are put inside an RCU critical section, except for the deprecated issuer specific wait and of course the optional release callback.
Additional to the RCU changes the lock protecting the dma_fence state previously had to be allocated external. This set here now changes the functionality to make that external lock optional and allows dma_fences to use an inline lock and be self contained.
This patch set addressed all previous code review comments and is based on drm-tip, includes my changes for amdgpu as well as Mathew's patches for XE.
Going to push the core DMA-buf changes to drm-misc-next as soon as I get the appropriate rb. The driver specific changes can go upstream through the driver channels as necessary.
Please review and comment, Christian.
The driver and timeline name are meaningless for signaled fences.
Drop them and also print the context number.
v2: avoid the calls when the BO is already signaled. v3: use same format as trace points for context and seqno.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-fence.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-)
diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index c5d1478b28dd..b4f5c8635276 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -997,19 +997,21 @@ EXPORT_SYMBOL(dma_fence_set_deadline); */ void dma_fence_describe(struct dma_fence *fence, struct seq_file *seq) { - const char __rcu *timeline; - const char __rcu *driver; + const char __rcu *timeline = ""; + const char __rcu *driver = ""; + const char *signaled = "";
rcu_read_lock();
- timeline = dma_fence_timeline_name(fence); - driver = dma_fence_driver_name(fence); + if (!dma_fence_is_signaled(fence)) { + timeline = dma_fence_timeline_name(fence); + driver = dma_fence_driver_name(fence); + signaled = "un"; + }
- seq_printf(seq, "%s %s seq %llu %ssignalled\n", - rcu_dereference(driver), - rcu_dereference(timeline), - fence->seqno, - dma_fence_is_signaled(fence) ? "" : "un"); + seq_printf(seq, "%llu:%llu %s %s %ssignalled\n", + fence->context, fence->seqno, timeline, driver, + signaled);
rcu_read_unlock(); }
At first glance it is counter intuitive to protect a constant function pointer table by RCU, but this allows modules providing the function table to unload by waiting for an RCU grace period.
v2: make one the now duplicated lockdep warnings a comment instead. v3: Add more documentation to ->wait and ->release callback.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-fence.c | 69 +++++++++++++++++++++++++------------ include/linux/dma-fence.h | 29 ++++++++++++++-- 2 files changed, 73 insertions(+), 25 deletions(-)
diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index b4f5c8635276..ec21be9b089a 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -498,6 +498,7 @@ EXPORT_SYMBOL(dma_fence_signal); signed long dma_fence_wait_timeout(struct dma_fence *fence, bool intr, signed long timeout) { + const struct dma_fence_ops *ops; signed long ret;
if (WARN_ON(timeout < 0)) @@ -509,15 +510,21 @@ dma_fence_wait_timeout(struct dma_fence *fence, bool intr, signed long timeout)
dma_fence_enable_sw_signaling(fence);
- if (trace_dma_fence_wait_start_enabled()) { - rcu_read_lock(); - trace_dma_fence_wait_start(fence); + rcu_read_lock(); + ops = rcu_dereference(fence->ops); + trace_dma_fence_wait_start(fence); + if (ops->wait) { + /* + * Implementing the wait ops is deprecated and not supported for + * issuer independent fences, so it is ok to use the ops outside + * the RCU protected section. + */ + rcu_read_unlock(); + ret = ops->wait(fence, intr, timeout); + } else { rcu_read_unlock(); - } - if (fence->ops->wait) - ret = fence->ops->wait(fence, intr, timeout); - else ret = dma_fence_default_wait(fence, intr, timeout); + } if (trace_dma_fence_wait_end_enabled()) { rcu_read_lock(); trace_dma_fence_wait_end(fence); @@ -538,6 +545,7 @@ void dma_fence_release(struct kref *kref) { struct dma_fence *fence = container_of(kref, struct dma_fence, refcount); + const struct dma_fence_ops *ops;
rcu_read_lock(); trace_dma_fence_destroy(fence); @@ -569,12 +577,12 @@ void dma_fence_release(struct kref *kref) spin_unlock_irqrestore(fence->lock, flags); }
- rcu_read_unlock(); - - if (fence->ops->release) - fence->ops->release(fence); + ops = rcu_dereference(fence->ops); + if (ops->release) + ops->release(fence); else dma_fence_free(fence); + rcu_read_unlock(); } EXPORT_SYMBOL(dma_fence_release);
@@ -593,6 +601,7 @@ EXPORT_SYMBOL(dma_fence_free);
static bool __dma_fence_enable_signaling(struct dma_fence *fence) { + const struct dma_fence_ops *ops; bool was_set;
lockdep_assert_held(fence->lock); @@ -603,14 +612,18 @@ static bool __dma_fence_enable_signaling(struct dma_fence *fence) if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) return false;
- if (!was_set && fence->ops->enable_signaling) { + rcu_read_lock(); + ops = rcu_dereference(fence->ops); + if (!was_set && ops->enable_signaling) { trace_dma_fence_enable_signal(fence);
- if (!fence->ops->enable_signaling(fence)) { + if (!ops->enable_signaling(fence)) { + rcu_read_unlock(); dma_fence_signal_locked(fence); return false; } } + rcu_read_unlock();
return true; } @@ -983,8 +996,13 @@ EXPORT_SYMBOL(dma_fence_wait_any_timeout); */ void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline) { - if (fence->ops->set_deadline && !dma_fence_is_signaled(fence)) - fence->ops->set_deadline(fence, deadline); + const struct dma_fence_ops *ops; + + rcu_read_lock(); + ops = rcu_dereference(fence->ops); + if (ops->set_deadline && !dma_fence_is_signaled(fence)) + ops->set_deadline(fence, deadline); + rcu_read_unlock(); } EXPORT_SYMBOL(dma_fence_set_deadline);
@@ -1025,7 +1043,12 @@ __dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, BUG_ON(!ops || !ops->get_driver_name || !ops->get_timeline_name);
kref_init(&fence->refcount); - fence->ops = ops; + /* + * At first glance it is counter intuitive to protect a constant + * function pointer table by RCU, but this allows modules providing the + * function table to unload by waiting for an RCU grace period. + */ + RCU_INIT_POINTER(fence->ops, ops); INIT_LIST_HEAD(&fence->cb_list); fence->lock = lock; fence->context = context; @@ -1105,11 +1128,12 @@ EXPORT_SYMBOL(dma_fence_init64); */ const char __rcu *dma_fence_driver_name(struct dma_fence *fence) { - RCU_LOCKDEP_WARN(!rcu_read_lock_held(), - "RCU protection is required for safe access to returned string"); + const struct dma_fence_ops *ops;
+ /* RCU protection is required for safe access to returned string */ + ops = rcu_dereference(fence->ops); if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) - return fence->ops->get_driver_name(fence); + return ops->get_driver_name(fence); else return "detached-driver"; } @@ -1137,11 +1161,12 @@ EXPORT_SYMBOL(dma_fence_driver_name); */ const char __rcu *dma_fence_timeline_name(struct dma_fence *fence) { - RCU_LOCKDEP_WARN(!rcu_read_lock_held(), - "RCU protection is required for safe access to returned string"); + const struct dma_fence_ops *ops;
+ /* RCU protection is required for safe access to returned string */ + ops = rcu_dereference(fence->ops); if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) - return fence->ops->get_timeline_name(fence); + return ops->get_timeline_name(fence); else return "signaled-timeline"; } diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 64639e104110..77f07735f556 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -66,7 +66,7 @@ struct seq_file; */ struct dma_fence { spinlock_t *lock; - const struct dma_fence_ops *ops; + const struct dma_fence_ops __rcu *ops; /* * We clear the callback list on kref_put so that by the time we * release the fence it is unused. No one should be adding to the @@ -218,6 +218,10 @@ struct dma_fence_ops { * timed out. Can also return other error values on custom implementations, * which should be treated as if the fence is signaled. For example a hardware * lockup could be reported like that. + * + * Implementing this callback prevents the BO from detaching after + * signaling and so it is mandatory for the module providing the + * dma_fence_ops to stay loaded as long as the dma_fence exists. */ signed long (*wait)(struct dma_fence *fence, bool intr, signed long timeout); @@ -229,6 +233,13 @@ struct dma_fence_ops { * Can be called from irq context. This callback is optional. If it is * NULL, then dma_fence_free() is instead called as the default * implementation. + * + * Implementing this callback prevents the BO from detaching after + * signaling and so it is mandatory for the module providing the + * dma_fence_ops to stay loaded as long as the dma_fence exists. + * + * If the callback is implemented the memory backing the dma_fence + * object must be freed RCU safe. */ void (*release)(struct dma_fence *fence);
@@ -418,13 +429,19 @@ const char __rcu *dma_fence_timeline_name(struct dma_fence *fence); static inline bool dma_fence_is_signaled_locked(struct dma_fence *fence) { + const struct dma_fence_ops *ops; + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) return true;
- if (fence->ops->signaled && fence->ops->signaled(fence)) { + rcu_read_lock(); + ops = rcu_dereference(fence->ops); + if (ops->signaled && ops->signaled(fence)) { + rcu_read_unlock(); dma_fence_signal_locked(fence); return true; } + rcu_read_unlock();
return false; } @@ -448,13 +465,19 @@ dma_fence_is_signaled_locked(struct dma_fence *fence) static inline bool dma_fence_is_signaled(struct dma_fence *fence) { + const struct dma_fence_ops *ops; + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) return true;
- if (fence->ops->signaled && fence->ops->signaled(fence)) { + rcu_read_lock(); + ops = rcu_dereference(fence->ops); + if (ops->signaled && ops->signaled(fence)) { + rcu_read_unlock(); dma_fence_signal(fence); return true; } + rcu_read_unlock();
return false; }
When neither a release nor a wait backend ops is specified it is possible to let the dma_fence live on independently of the module who issued it.
This makes it possible to unload drivers and only wait for all their fences to signal.
v2: fix typo in comment
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-fence.c | 16 ++++++++++++---- include/linux/dma-fence.h | 4 ++-- 2 files changed, 14 insertions(+), 6 deletions(-)
diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index ec21be9b089a..7074347f506d 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -374,6 +374,14 @@ int dma_fence_signal_timestamp_locked(struct dma_fence *fence, &fence->flags))) return -EINVAL;
+ /* + * When neither a release nor a wait operation is specified set the ops + * pointer to NULL to allow the fence structure to become independent + * from who originally issued it. + */ + if (!fence->ops->release && !fence->ops->wait) + RCU_INIT_POINTER(fence->ops, NULL); + /* Stash the cb_list before replacing it with the timestamp */ list_replace(&fence->cb_list, &cb_list);
@@ -513,7 +521,7 @@ dma_fence_wait_timeout(struct dma_fence *fence, bool intr, signed long timeout) rcu_read_lock(); ops = rcu_dereference(fence->ops); trace_dma_fence_wait_start(fence); - if (ops->wait) { + if (ops && ops->wait) { /* * Implementing the wait ops is deprecated and not supported for * issuer independent fences, so it is ok to use the ops outside @@ -578,7 +586,7 @@ void dma_fence_release(struct kref *kref) }
ops = rcu_dereference(fence->ops); - if (ops->release) + if (ops && ops->release) ops->release(fence); else dma_fence_free(fence); @@ -614,7 +622,7 @@ static bool __dma_fence_enable_signaling(struct dma_fence *fence)
rcu_read_lock(); ops = rcu_dereference(fence->ops); - if (!was_set && ops->enable_signaling) { + if (!was_set && ops && ops->enable_signaling) { trace_dma_fence_enable_signal(fence);
if (!ops->enable_signaling(fence)) { @@ -1000,7 +1008,7 @@ void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline)
rcu_read_lock(); ops = rcu_dereference(fence->ops); - if (ops->set_deadline && !dma_fence_is_signaled(fence)) + if (ops && ops->set_deadline && !dma_fence_is_signaled(fence)) ops->set_deadline(fence, deadline); rcu_read_unlock(); } diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 77f07735f556..eb57bcc8712f 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -436,7 +436,7 @@ dma_fence_is_signaled_locked(struct dma_fence *fence)
rcu_read_lock(); ops = rcu_dereference(fence->ops); - if (ops->signaled && ops->signaled(fence)) { + if (ops && ops->signaled && ops->signaled(fence)) { rcu_read_unlock(); dma_fence_signal_locked(fence); return true; @@ -472,7 +472,7 @@ dma_fence_is_signaled(struct dma_fence *fence)
rcu_read_lock(); ops = rcu_dereference(fence->ops); - if (ops->signaled && ops->signaled(fence)) { + if (ops && ops->signaled && ops->signaled(fence)) { rcu_read_unlock(); dma_fence_signal(fence); return true;
Implement per-fence spinlocks, allowing implementations to not give an external spinlock to protect the fence internal statei. Instead a spinlock embedded into the fence structure itself is used in this case.
Shared spinlocks have the problem that implementations need to guarantee that the lock live at least as long all fences referencing them.
Using a per-fence spinlock allows completely decoupling spinlock producer and consumer life times, simplifying the handling in most use cases.
v2: improve naming, coverage and function documentation
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-fence.c | 66 +++++++++++++----------- drivers/dma-buf/sw_sync.c | 14 ++--- drivers/dma-buf/sync_debug.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 12 ++--- drivers/gpu/drm/drm_crtc.c | 2 +- drivers/gpu/drm/drm_writeback.c | 2 +- drivers/gpu/drm/nouveau/nouveau_drm.c | 5 +- drivers/gpu/drm/nouveau/nouveau_fence.c | 3 +- drivers/gpu/drm/qxl/qxl_release.c | 3 +- drivers/gpu/drm/scheduler/sched_fence.c | 4 +- drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 3 +- drivers/gpu/drm/xe/xe_hw_fence.c | 3 +- drivers/gpu/drm/xe/xe_sched_job.c | 4 +- include/linux/dma-fence.h | 42 ++++++++++++++- 16 files changed, 111 insertions(+), 62 deletions(-)
diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 7074347f506d..9a5aa9e44e13 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -343,7 +343,6 @@ void __dma_fence_might_wait(void) } #endif
- /** * dma_fence_signal_timestamp_locked - signal completion of a fence * @fence: the fence to signal @@ -368,7 +367,7 @@ int dma_fence_signal_timestamp_locked(struct dma_fence *fence, struct dma_fence_cb *cur, *tmp; struct list_head cb_list;
- lockdep_assert_held(fence->lock); + lockdep_assert_held(dma_fence_spinlock(fence));
if (unlikely(test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))) @@ -421,9 +420,9 @@ int dma_fence_signal_timestamp(struct dma_fence *fence, ktime_t timestamp) if (WARN_ON(!fence)) return -EINVAL;
- spin_lock_irqsave(fence->lock, flags); + dma_fence_lock_irqsave(fence, flags); ret = dma_fence_signal_timestamp_locked(fence, timestamp); - spin_unlock_irqrestore(fence->lock, flags); + dma_fence_unlock_irqrestore(fence, flags);
return ret; } @@ -475,9 +474,9 @@ int dma_fence_signal(struct dma_fence *fence)
tmp = dma_fence_begin_signalling();
- spin_lock_irqsave(fence->lock, flags); + dma_fence_lock_irqsave(fence, flags); ret = dma_fence_signal_timestamp_locked(fence, ktime_get()); - spin_unlock_irqrestore(fence->lock, flags); + dma_fence_unlock_irqrestore(fence, flags);
dma_fence_end_signalling(tmp);
@@ -579,10 +578,10 @@ void dma_fence_release(struct kref *kref) * don't leave chains dangling. We set the error flag first * so that the callbacks know this signal is due to an error. */ - spin_lock_irqsave(fence->lock, flags); + dma_fence_lock_irqsave(fence, flags); fence->error = -EDEADLK; dma_fence_signal_locked(fence); - spin_unlock_irqrestore(fence->lock, flags); + dma_fence_unlock_irqrestore(fence, flags); }
ops = rcu_dereference(fence->ops); @@ -612,7 +611,7 @@ static bool __dma_fence_enable_signaling(struct dma_fence *fence) const struct dma_fence_ops *ops; bool was_set;
- lockdep_assert_held(fence->lock); + lockdep_assert_held(dma_fence_spinlock(fence));
was_set = test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags); @@ -648,9 +647,9 @@ void dma_fence_enable_sw_signaling(struct dma_fence *fence) { unsigned long flags;
- spin_lock_irqsave(fence->lock, flags); + dma_fence_lock_irqsave(fence, flags); __dma_fence_enable_signaling(fence); - spin_unlock_irqrestore(fence->lock, flags); + dma_fence_unlock_irqrestore(fence, flags); } EXPORT_SYMBOL(dma_fence_enable_sw_signaling);
@@ -690,8 +689,7 @@ int dma_fence_add_callback(struct dma_fence *fence, struct dma_fence_cb *cb, return -ENOENT; }
- spin_lock_irqsave(fence->lock, flags); - + dma_fence_lock_irqsave(fence, flags); if (__dma_fence_enable_signaling(fence)) { cb->func = func; list_add_tail(&cb->node, &fence->cb_list); @@ -699,8 +697,7 @@ int dma_fence_add_callback(struct dma_fence *fence, struct dma_fence_cb *cb, INIT_LIST_HEAD(&cb->node); ret = -ENOENT; } - - spin_unlock_irqrestore(fence->lock, flags); + dma_fence_unlock_irqrestore(fence, flags);
return ret; } @@ -723,9 +720,9 @@ int dma_fence_get_status(struct dma_fence *fence) unsigned long flags; int status;
- spin_lock_irqsave(fence->lock, flags); + dma_fence_lock_irqsave(fence, flags); status = dma_fence_get_status_locked(fence); - spin_unlock_irqrestore(fence->lock, flags); + dma_fence_unlock_irqrestore(fence, flags);
return status; } @@ -755,13 +752,11 @@ dma_fence_remove_callback(struct dma_fence *fence, struct dma_fence_cb *cb) unsigned long flags; bool ret;
- spin_lock_irqsave(fence->lock, flags); - + dma_fence_lock_irqsave(fence, flags); ret = !list_empty(&cb->node); if (ret) list_del_init(&cb->node); - - spin_unlock_irqrestore(fence->lock, flags); + dma_fence_unlock_irqrestore(fence, flags);
return ret; } @@ -800,8 +795,7 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout) unsigned long flags; signed long ret = timeout ? timeout : 1;
- spin_lock_irqsave(fence->lock, flags); - + dma_fence_lock_irqsave(fence, flags); if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) goto out;
@@ -824,11 +818,11 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout) __set_current_state(TASK_INTERRUPTIBLE); else __set_current_state(TASK_UNINTERRUPTIBLE); - spin_unlock_irqrestore(fence->lock, flags); + dma_fence_unlock_irqrestore(fence, flags);
ret = schedule_timeout(ret);
- spin_lock_irqsave(fence->lock, flags); + dma_fence_lock_irqsave(fence, flags); if (ret > 0 && intr && signal_pending(current)) ret = -ERESTARTSYS; } @@ -838,7 +832,7 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout) __set_current_state(TASK_RUNNING);
out: - spin_unlock_irqrestore(fence->lock, flags); + dma_fence_unlock_irqrestore(fence, flags); return ret; } EXPORT_SYMBOL(dma_fence_default_wait); @@ -1047,7 +1041,6 @@ static void __dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, spinlock_t *lock, u64 context, u64 seqno, unsigned long flags) { - BUG_ON(!lock); BUG_ON(!ops || !ops->get_driver_name || !ops->get_timeline_name);
kref_init(&fence->refcount); @@ -1058,10 +1051,15 @@ __dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, */ RCU_INIT_POINTER(fence->ops, ops); INIT_LIST_HEAD(&fence->cb_list); - fence->lock = lock; fence->context = context; fence->seqno = seqno; fence->flags = flags; + if (lock) { + fence->extern_lock = lock; + } else { + spin_lock_init(&fence->inline_lock); + fence->flags |= BIT(DMA_FENCE_FLAG_INLINE_LOCK_BIT); + } fence->error = 0;
trace_dma_fence_init(fence); @@ -1071,7 +1069,7 @@ __dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, * dma_fence_init - Initialize a custom fence. * @fence: the fence to initialize * @ops: the dma_fence_ops for operations on this fence - * @lock: the irqsafe spinlock to use for locking this fence + * @lock: optional irqsafe spinlock to use for locking this fence * @context: the execution context this fence is run on * @seqno: a linear increasing sequence number for this context * @@ -1081,6 +1079,10 @@ __dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, * * context and seqno are used for easy comparison between fences, allowing * to check which fence is later by simply using dma_fence_later(). + * + * It is strongly discouraged to provide an external lock. This is only allowed + * for legacy use cases when multiple fences need to be prevented from + * signaling out of order. */ void dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, @@ -1094,7 +1096,7 @@ EXPORT_SYMBOL(dma_fence_init); * dma_fence_init64 - Initialize a custom fence with 64-bit seqno support. * @fence: the fence to initialize * @ops: the dma_fence_ops for operations on this fence - * @lock: the irqsafe spinlock to use for locking this fence + * @lock: optional irqsafe spinlock to use for locking this fence * @context: the execution context this fence is run on * @seqno: a linear increasing sequence number for this context * @@ -1104,6 +1106,10 @@ EXPORT_SYMBOL(dma_fence_init); * * Context and seqno are used for easy comparison between fences, allowing * to check which fence is later by simply using dma_fence_later(). + * + * It is strongly discouraged to provide an external lock. This is only allowed + * for legacy use cases when multiple fences need to be prevented from + * signaling out of order. */ void dma_fence_init64(struct dma_fence *fence, const struct dma_fence_ops *ops, diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index 3c20f1d31cf5..956a3ad7a075 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -155,12 +155,12 @@ static void timeline_fence_release(struct dma_fence *fence) struct sync_timeline *parent = dma_fence_parent(fence); unsigned long flags;
- spin_lock_irqsave(fence->lock, flags); + dma_fence_lock_irqsave(fence, flags); if (!list_empty(&pt->link)) { list_del(&pt->link); rb_erase(&pt->node, &parent->pt_tree); } - spin_unlock_irqrestore(fence->lock, flags); + dma_fence_unlock_irqrestore(fence, flags);
sync_timeline_put(parent); dma_fence_free(fence); @@ -178,7 +178,7 @@ static void timeline_fence_set_deadline(struct dma_fence *fence, ktime_t deadlin struct sync_pt *pt = dma_fence_to_sync_pt(fence); unsigned long flags;
- spin_lock_irqsave(fence->lock, flags); + dma_fence_lock_irqsave(fence, flags); if (test_bit(SW_SYNC_HAS_DEADLINE_BIT, &fence->flags)) { if (ktime_before(deadline, pt->deadline)) pt->deadline = deadline; @@ -186,7 +186,7 @@ static void timeline_fence_set_deadline(struct dma_fence *fence, ktime_t deadlin pt->deadline = deadline; __set_bit(SW_SYNC_HAS_DEADLINE_BIT, &fence->flags); } - spin_unlock_irqrestore(fence->lock, flags); + dma_fence_unlock_irqrestore(fence, flags); }
static const struct dma_fence_ops timeline_fence_ops = { @@ -427,13 +427,13 @@ static int sw_sync_ioctl_get_deadline(struct sync_timeline *obj, unsigned long a goto put_fence; }
- spin_lock_irqsave(fence->lock, flags); + dma_fence_lock_irqsave(fence, flags); if (!test_bit(SW_SYNC_HAS_DEADLINE_BIT, &fence->flags)) { ret = -ENOENT; goto unlock; } data.deadline_ns = ktime_to_ns(pt->deadline); - spin_unlock_irqrestore(fence->lock, flags); + dma_fence_unlock_irqrestore(fence, flags);
dma_fence_put(fence);
@@ -446,7 +446,7 @@ static int sw_sync_ioctl_get_deadline(struct sync_timeline *obj, unsigned long a return 0;
unlock: - spin_unlock_irqrestore(fence->lock, flags); + dma_fence_unlock_irqrestore(fence, flags); put_fence: dma_fence_put(fence);
diff --git a/drivers/dma-buf/sync_debug.h b/drivers/dma-buf/sync_debug.h index 02af347293d0..c49324505b20 100644 --- a/drivers/dma-buf/sync_debug.h +++ b/drivers/dma-buf/sync_debug.h @@ -47,7 +47,7 @@ struct sync_timeline {
static inline struct sync_timeline *dma_fence_parent(struct dma_fence *fence) { - return container_of(fence->lock, struct sync_timeline, lock); + return container_of(fence->extern_lock, struct sync_timeline, lock); }
/** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index cd8873c6931a..ab41488b0df9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -474,10 +474,10 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence) return false;
- spin_lock_irqsave(fence->lock, flags); + dma_fence_lock_irqsave(fence, flags); if (!dma_fence_is_signaled_locked(fence)) dma_fence_set_error(fence, -ENODATA); - spin_unlock_irqrestore(fence->lock, flags); + dma_fence_unlock_irqrestore(fence, flags);
while (!dma_fence_is_signaled(fence) && ktime_to_ns(ktime_sub(deadline, ktime_get())) > 0) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 700b4a776532..4b5360bc4813 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2782,8 +2782,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) dma_fence_put(vm->last_unlocked); dma_fence_wait(vm->last_tlb_flush, false); /* Make sure that all fence callbacks have completed */ - spin_lock_irqsave(vm->last_tlb_flush->lock, flags); - spin_unlock_irqrestore(vm->last_tlb_flush->lock, flags); + dma_fence_lock_irqsave(vm->last_tlb_flush, flags); + dma_fence_unlock_irqrestore(vm->last_tlb_flush, flags); dma_fence_put(vm->last_tlb_flush);
list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index cf0ec94e8a07..82492a5759a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -631,20 +631,20 @@ bool amdgpu_vm_is_bo_always_valid(struct amdgpu_vm *vm, struct amdgpu_bo *bo); */ static inline uint64_t amdgpu_vm_tlb_seq(struct amdgpu_vm *vm) { + struct dma_fence *fence; unsigned long flags; - spinlock_t *lock;
/* * Workaround to stop racing between the fence signaling and handling - * the cb. The lock is static after initially setting it up, just make - * sure that the dma_fence structure isn't freed up. + * the cb. */ rcu_read_lock(); - lock = vm->last_tlb_flush->lock; + fence = dma_fence_get_rcu(vm->last_tlb_flush); rcu_read_unlock();
- spin_lock_irqsave(lock, flags); - spin_unlock_irqrestore(lock, flags); + dma_fence_lock_irqsave(fence, flags); + dma_fence_unlock_irqrestore(fence, flags); + dma_fence_put(fence);
return atomic64_read(&vm->tlb_seq); } diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index a7797d260f1e..17472915842f 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -159,7 +159,7 @@ static const struct dma_fence_ops drm_crtc_fence_ops; static struct drm_crtc *fence_to_crtc(struct dma_fence *fence) { BUG_ON(fence->ops != &drm_crtc_fence_ops); - return container_of(fence->lock, struct drm_crtc, fence_lock); + return container_of(fence->extern_lock, struct drm_crtc, fence_lock); }
static const char *drm_crtc_fence_get_driver_name(struct dma_fence *fence) diff --git a/drivers/gpu/drm/drm_writeback.c b/drivers/gpu/drm/drm_writeback.c index 95b8a2e4bda6..624a4e8b6c99 100644 --- a/drivers/gpu/drm/drm_writeback.c +++ b/drivers/gpu/drm/drm_writeback.c @@ -81,7 +81,7 @@ * From userspace, this property will always read as zero. */
-#define fence_to_wb_connector(x) container_of(x->lock, \ +#define fence_to_wb_connector(x) container_of(x->extern_lock, \ struct drm_writeback_connector, \ fence_lock)
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 1527b801f013..ec4dfa3ea725 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -156,12 +156,13 @@ nouveau_name(struct drm_device *dev) static inline bool nouveau_cli_work_ready(struct dma_fence *fence) { + unsigned long flags; bool ret = true;
- spin_lock_irq(fence->lock); + dma_fence_lock_irqsave(fence, flags); if (!dma_fence_is_signaled_locked(fence)) ret = false; - spin_unlock_irq(fence->lock); + dma_fence_unlock_irqrestore(fence, flags);
if (ret == true) dma_fence_put(fence); diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 869d4335c0f4..a7512c22c85f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -41,7 +41,8 @@ static const struct dma_fence_ops nouveau_fence_ops_legacy; static inline struct nouveau_fence_chan * nouveau_fctx(struct nouveau_fence *fence) { - return container_of(fence->base.lock, struct nouveau_fence_chan, lock); + return container_of(fence->base.extern_lock, struct nouveau_fence_chan, + lock); }
static bool diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index 7b3c9a6016db..de05e392df88 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -62,7 +62,8 @@ static long qxl_fence_wait(struct dma_fence *fence, bool intr, struct qxl_device *qdev; unsigned long cur, end = jiffies + timeout;
- qdev = container_of(fence->lock, struct qxl_device, release_lock); + qdev = container_of(fence->extern_lock, struct qxl_device, + release_lock);
if (!wait_event_timeout(qdev->release_event, (dma_fence_is_signaled(fence) || diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c index 9391d6f0dc01..08ccbde8b2f5 100644 --- a/drivers/gpu/drm/scheduler/sched_fence.c +++ b/drivers/gpu/drm/scheduler/sched_fence.c @@ -156,7 +156,7 @@ static void drm_sched_fence_set_deadline_finished(struct dma_fence *f, struct dma_fence *parent; unsigned long flags;
- spin_lock_irqsave(&fence->lock, flags); + dma_fence_lock_irqsave(f, flags);
/* If we already have an earlier deadline, keep it: */ if (test_bit(DRM_SCHED_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags) && @@ -168,7 +168,7 @@ static void drm_sched_fence_set_deadline_finished(struct dma_fence *f, fence->deadline = deadline; set_bit(DRM_SCHED_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags);
- spin_unlock_irqrestore(&fence->lock, flags); + dma_fence_unlock_irqrestore(f, flags);
/* * smp_load_aquire() to ensure that if we are racing another diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index 00be92da5509..621aa0aa8406 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -47,7 +47,8 @@ struct vmw_event_fence_action { static struct vmw_fence_manager * fman_from_fence(struct vmw_fence_obj *fence) { - return container_of(fence->base.lock, struct vmw_fence_manager, lock); + return container_of(fence->base.extern_lock, struct vmw_fence_manager, + lock); }
static void vmw_fence_obj_destroy(struct dma_fence *f) diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c index b2a0c46dfcd4..3456bec93c70 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence.c +++ b/drivers/gpu/drm/xe/xe_hw_fence.c @@ -144,7 +144,8 @@ static struct xe_hw_fence *to_xe_hw_fence(struct dma_fence *fence);
static struct xe_hw_fence_irq *xe_hw_fence_irq(struct xe_hw_fence *fence) { - return container_of(fence->dma.lock, struct xe_hw_fence_irq, lock); + return container_of(fence->dma.extern_lock, struct xe_hw_fence_irq, + lock); }
static const char *xe_hw_fence_get_driver_name(struct dma_fence *dma_fence) diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index cb674a322113..9a43ed689e65 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -189,11 +189,11 @@ static bool xe_fence_set_error(struct dma_fence *fence, int error) unsigned long irq_flags; bool signaled;
- spin_lock_irqsave(fence->lock, irq_flags); + dma_fence_lock_irqsave(fence, irq_flags); signaled = test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags); if (!signaled) dma_fence_set_error(fence, error); - spin_unlock_irqrestore(fence->lock, irq_flags); + dma_fence_unlock_irqrestore(fence, irq_flags);
return signaled; } diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index eb57bcc8712f..d493d15ad52a 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -34,7 +34,8 @@ struct seq_file; * @ops: dma_fence_ops associated with this fence * @rcu: used for releasing fence with kfree_rcu * @cb_list: list of all callbacks to call - * @lock: spin_lock_irqsave used for locking + * @extern_lock: external spin_lock_irqsave used for locking + * @inline_lock: alternative internal spin_lock_irqsave used for locking * @context: execution context this fence belongs to, returned by * dma_fence_context_alloc() * @seqno: the sequence number of this fence inside the execution context, @@ -48,6 +49,7 @@ struct seq_file; * atomic ops (bit_*), so taking the spinlock will not be needed most * of the time. * + * DMA_FENCE_FLAG_INLINE_LOCK_BIT - use inline spinlock instead of external one * DMA_FENCE_FLAG_SIGNALED_BIT - fence is already signaled * DMA_FENCE_FLAG_TIMESTAMP_BIT - timestamp recorded for fence signaling * DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT - enable_signaling might have been called @@ -65,7 +67,10 @@ struct seq_file; * been completed, or never called at all. */ struct dma_fence { - spinlock_t *lock; + union { + spinlock_t *extern_lock; + spinlock_t inline_lock; + }; const struct dma_fence_ops __rcu *ops; /* * We clear the callback list on kref_put so that by the time we @@ -98,6 +103,7 @@ struct dma_fence { };
enum dma_fence_flag_bits { + DMA_FENCE_FLAG_INLINE_LOCK_BIT, DMA_FENCE_FLAG_SEQNO64_BIT, DMA_FENCE_FLAG_SIGNALED_BIT, DMA_FENCE_FLAG_TIMESTAMP_BIT, @@ -362,6 +368,38 @@ dma_fence_get_rcu_safe(struct dma_fence __rcu **fencep) } while (1); }
+/** + * dma_fence_spinlock - return pointer to the spinlock protecting the fence + * @fence: the fence to get the lock from + * + * Return either the pointer to the embedded or the external spin lock. + */ +static inline spinlock_t *dma_fence_spinlock(struct dma_fence *fence) +{ + return test_bit(DMA_FENCE_FLAG_INLINE_LOCK_BIT, &fence->flags) ? + &fence->inline_lock : fence->extern_lock; +} + +/** + * dma_fence_lock_irqsave - irqsave lock the fence + * @fence: the fence to lock + * @flags: where to store the CPU flags. + * + * Lock the fence, preventing it from changing to the signaled state. + */ +#define dma_fence_lock_irqsave(fence, flags) \ + spin_lock_irqsave(dma_fence_spinlock(fence), flags) + +/** + * dma_fence_unlock_irqrestore - unlock the fence and irqrestore + * @fence: the fence to unlock + * @flags the CPU flags to restore + * + * Unlock the fence, allowing it to change it's state to signaled again. + */ +#define dma_fence_unlock_irqrestore(fence, flags) \ + spin_unlock_irqrestore(dma_fence_spinlock(fence), flags) + #ifdef CONFIG_LOCKDEP bool dma_fence_begin_signalling(void); void dma_fence_end_signalling(bool cookie);
Hi Christian,
kernel test robot noticed the following build errors:
[auto build test ERROR on next-20251113] [also build test ERROR on v6.18-rc5] [cannot apply to drm-xe/drm-xe-next linus/master v6.18-rc5 v6.18-rc4 v6.18-rc3] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Christian-K-nig/dma-buf-prote... base: next-20251113 patch link: https://lore.kernel.org/r/20251113145332.16805-5-christian.koenig%40amd.com patch subject: [PATCH 04/18] dma-buf: inline spinlock for fence protection v2 config: openrisc-randconfig-r072-20251114 (https://download.01.org/0day-ci/archive/20251114/202511140459.HpT5i7v9-lkp@i...) compiler: or1k-linux-gcc (GCC) 14.3.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251114/202511140459.HpT5i7v9-lkp@i...)
If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot lkp@intel.com | Closes: https://lore.kernel.org/oe-kbuild-all/202511140459.HpT5i7v9-lkp@intel.com/
All errors (new ones prefixed by >>):
drivers/dma-buf/st-dma-fence.c: In function '__wait_for_callbacks':
drivers/dma-buf/st-dma-fence.c:454:24: error: 'struct dma_fence' has no member named 'lock'
454 | spin_lock_irq(f->lock); | ^~ drivers/dma-buf/st-dma-fence.c:455:26: error: 'struct dma_fence' has no member named 'lock' 455 | spin_unlock_irq(f->lock); | ^~
vim +454 drivers/dma-buf/st-dma-fence.c
2989f6451084ae Chris Wilson 2019-08-19 451 2989f6451084ae Chris Wilson 2019-08-19 452 static void __wait_for_callbacks(struct dma_fence *f) 2989f6451084ae Chris Wilson 2019-08-19 453 { 2989f6451084ae Chris Wilson 2019-08-19 @454 spin_lock_irq(f->lock); 2989f6451084ae Chris Wilson 2019-08-19 455 spin_unlock_irq(f->lock); 2989f6451084ae Chris Wilson 2019-08-19 456 } 2989f6451084ae Chris Wilson 2019-08-19 457
Hi Christian,
kernel test robot noticed the following build errors:
[auto build test ERROR on next-20251113] [also build test ERROR on v6.18-rc5] [cannot apply to drm-xe/drm-xe-next linus/master v6.18-rc5 v6.18-rc4 v6.18-rc3] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Christian-K-nig/dma-buf-prote... base: next-20251113 patch link: https://lore.kernel.org/r/20251113145332.16805-5-christian.koenig%40amd.com patch subject: [PATCH 04/18] dma-buf: inline spinlock for fence protection v2 config: arm-randconfig-004-20251114 (https://download.01.org/0day-ci/archive/20251114/202511141529.mZKt8xhV-lkp@i...) compiler: clang version 22.0.0git (https://github.com/llvm/llvm-project 0bba1e76581bad04e7d7f09f5115ae5e2989e0d9) reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251114/202511141529.mZKt8xhV-lkp@i...)
If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot lkp@intel.com | Closes: https://lore.kernel.org/oe-kbuild-all/202511141529.mZKt8xhV-lkp@intel.com/
All errors (new ones prefixed by >>):
drivers/dma-buf/st-dma-fence.c:454:19: error: no member named 'lock' in 'struct dma_fence'
454 | spin_lock_irq(f->lock); | ~ ^ drivers/dma-buf/st-dma-fence.c:455:21: error: no member named 'lock' in 'struct dma_fence' 455 | spin_unlock_irq(f->lock); | ~ ^ 2 errors generated.
vim +454 drivers/dma-buf/st-dma-fence.c
2989f6451084ae Chris Wilson 2019-08-19 451 2989f6451084ae Chris Wilson 2019-08-19 452 static void __wait_for_callbacks(struct dma_fence *f) 2989f6451084ae Chris Wilson 2019-08-19 453 { 2989f6451084ae Chris Wilson 2019-08-19 @454 spin_lock_irq(f->lock); 2989f6451084ae Chris Wilson 2019-08-19 455 spin_unlock_irq(f->lock); 2989f6451084ae Chris Wilson 2019-08-19 456 } 2989f6451084ae Chris Wilson 2019-08-19 457
Using the inline lock is now the recommended way for dma_fence implementations.
So use this approach for the framework internal fences as well.
Also saves about 4 bytes for the external spinlock.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@igalia.com --- drivers/dma-buf/dma-fence.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-)
diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 9a5aa9e44e13..e6d26c2e0391 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -24,7 +24,6 @@ EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit); EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal); EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled);
-static DEFINE_SPINLOCK(dma_fence_stub_lock); static struct dma_fence dma_fence_stub;
/* @@ -123,12 +122,8 @@ static const struct dma_fence_ops dma_fence_stub_ops = {
static int __init dma_fence_init_stub(void) { - dma_fence_init(&dma_fence_stub, &dma_fence_stub_ops, - &dma_fence_stub_lock, 0, 0); - - set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, - &dma_fence_stub.flags); - + dma_fence_init(&dma_fence_stub, &dma_fence_stub_ops, NULL, 0, 0); + set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &dma_fence_stub.flags); dma_fence_signal(&dma_fence_stub); return 0; } @@ -160,16 +155,9 @@ struct dma_fence *dma_fence_allocate_private_stub(ktime_t timestamp) if (fence == NULL) return NULL;
- dma_fence_init(fence, - &dma_fence_stub_ops, - &dma_fence_stub_lock, - 0, 0); - - set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, - &fence->flags); - + dma_fence_init(fence, &dma_fence_stub_ops, NULL, 0, 0); + set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags); dma_fence_signal_timestamp(fence, timestamp); - return fence; } EXPORT_SYMBOL(dma_fence_allocate_private_stub);
Using the inline lock is now the recommended way for dma_fence implementations.
So use this approach for the framework internal fences as well.
Also saves about 4 bytes for the external spinlock.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@igalia.com --- drivers/dma-buf/dma-fence-array.c | 5 ++--- include/linux/dma-fence-array.h | 1 - 2 files changed, 2 insertions(+), 4 deletions(-)
diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c index 6657d4b30af9..c2119a8049fe 100644 --- a/drivers/dma-buf/dma-fence-array.c +++ b/drivers/dma-buf/dma-fence-array.c @@ -204,9 +204,8 @@ void dma_fence_array_init(struct dma_fence_array *array,
array->num_fences = num_fences;
- spin_lock_init(&array->lock); - dma_fence_init(&array->base, &dma_fence_array_ops, &array->lock, - context, seqno); + dma_fence_init(&array->base, &dma_fence_array_ops, NULL, context, + seqno); init_irq_work(&array->work, irq_dma_fence_array_work);
atomic_set(&array->num_pending, signal_on_any ? 1 : num_fences); diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index 079b3dec0a16..370b3d2bba37 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -38,7 +38,6 @@ struct dma_fence_array_cb { struct dma_fence_array { struct dma_fence base;
- spinlock_t lock; unsigned num_fences; atomic_t num_pending; struct dma_fence **fences;
Using the inline lock is now the recommended way for dma_fence implementations.
So use this approach for the framework internal fences as well.
Also saves about 4 bytes for the external spinlock.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@igalia.com --- drivers/dma-buf/dma-fence-chain.c | 3 +-- include/linux/dma-fence-chain.h | 1 - 2 files changed, 1 insertion(+), 3 deletions(-)
diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c index a8a90acf4f34..a707792b6025 100644 --- a/drivers/dma-buf/dma-fence-chain.c +++ b/drivers/dma-buf/dma-fence-chain.c @@ -245,7 +245,6 @@ void dma_fence_chain_init(struct dma_fence_chain *chain, struct dma_fence_chain *prev_chain = to_dma_fence_chain(prev); uint64_t context;
- spin_lock_init(&chain->lock); rcu_assign_pointer(chain->prev, prev); chain->fence = fence; chain->prev_seqno = 0; @@ -261,7 +260,7 @@ void dma_fence_chain_init(struct dma_fence_chain *chain, seqno = max(prev->seqno, seqno); }
- dma_fence_init64(&chain->base, &dma_fence_chain_ops, &chain->lock, + dma_fence_init64(&chain->base, &dma_fence_chain_ops, NULL, context, seqno);
/* diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h index 68c3c1e41014..d39ce7a2e599 100644 --- a/include/linux/dma-fence-chain.h +++ b/include/linux/dma-fence-chain.h @@ -46,7 +46,6 @@ struct dma_fence_chain { */ struct irq_work work; }; - spinlock_t lock; };
Using the inline lock is now the recommended way for dma_fence implementations.
So use this approach for the scheduler fences as well just in case if anybody uses this as blueprint for its own implementation.
Also saves about 4 bytes for the external spinlock.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/scheduler/sched_fence.c | 7 +++---- include/drm/gpu_scheduler.h | 4 ---- 2 files changed, 3 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c index 08ccbde8b2f5..47471b9e43f9 100644 --- a/drivers/gpu/drm/scheduler/sched_fence.c +++ b/drivers/gpu/drm/scheduler/sched_fence.c @@ -161,7 +161,7 @@ static void drm_sched_fence_set_deadline_finished(struct dma_fence *f, /* If we already have an earlier deadline, keep it: */ if (test_bit(DRM_SCHED_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags) && ktime_before(fence->deadline, deadline)) { - spin_unlock_irqrestore(&fence->lock, flags); + dma_fence_unlock_irqrestore(f, flags); return; }
@@ -217,7 +217,6 @@ struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *entity,
fence->owner = owner; fence->drm_client_id = drm_client_id; - spin_lock_init(&fence->lock);
return fence; } @@ -230,9 +229,9 @@ void drm_sched_fence_init(struct drm_sched_fence *fence, fence->sched = entity->rq->sched; seq = atomic_inc_return(&entity->fence_seq); dma_fence_init(&fence->scheduled, &drm_sched_fence_ops_scheduled, - &fence->lock, entity->fence_context, seq); + NULL, entity->fence_context, seq); dma_fence_init(&fence->finished, &drm_sched_fence_ops_finished, - &fence->lock, entity->fence_context + 1, seq); + NULL, entity->fence_context + 1, seq); }
module_init(drm_sched_fence_slab_init); diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index fb88301b3c45..b77f24a783e3 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -297,10 +297,6 @@ struct drm_sched_fence { * belongs to. */ struct drm_gpu_scheduler *sched; - /** - * @lock: the lock used by the scheduled and the finished fences. - */ - spinlock_t lock; /** * @owner: job owner for debugging */
Calling dma_fence_is_signaled() here is illegal!
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | 6 ------ 1 file changed, 6 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c index 1ef758ac5076..09c919f72b6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c @@ -120,12 +120,6 @@ static bool amdkfd_fence_enable_signaling(struct dma_fence *f) { struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
- if (!fence) - return false; - - if (dma_fence_is_signaled(f)) - return true; - if (!fence->svm_bo) { if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f)) return true;
This should allow amdgpu_fences to outlive the amdgpu module.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 63 +++++++---------------- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 - 2 files changed, 20 insertions(+), 44 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index c7843e336310..990fcbbe90a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -112,8 +112,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af, af->ring = ring;
seq = ++ring->fence_drv.sync_seq; - dma_fence_init(fence, &amdgpu_fence_ops, - &ring->fence_drv.lock, + dma_fence_init(fence, &amdgpu_fence_ops, NULL, adev->fence_context + ring->idx, seq);
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, @@ -467,7 +466,6 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) timer_setup(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, 0);
ring->fence_drv.num_fences_mask = ring->num_hw_submission * 2 - 1; - spin_lock_init(&ring->fence_drv.lock); ring->fence_drv.fences = kcalloc(ring->num_hw_submission * 2, sizeof(void *), GFP_KERNEL);
@@ -654,16 +652,20 @@ void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error) struct amdgpu_fence_driver *drv = &ring->fence_drv; unsigned long flags;
- spin_lock_irqsave(&drv->lock, flags); + rcu_read_lock(); for (unsigned int i = 0; i <= drv->num_fences_mask; ++i) { struct dma_fence *fence;
- fence = rcu_dereference_protected(drv->fences[i], - lockdep_is_held(&drv->lock)); - if (fence && !dma_fence_is_signaled_locked(fence)) + fence = dma_fence_get_rcu(drv->fences[i]); + if (!fence) + continue; + + dma_fence_lock_irqsave(fence, flags); + if (!dma_fence_is_signaled_locked(fence)) dma_fence_set_error(fence, error); + dma_fence_unlock_irqrestore(fence, flags); } - spin_unlock_irqrestore(&drv->lock, flags); + rcu_read_unlock(); }
/** @@ -714,16 +716,19 @@ void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af) seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
/* mark all fences from the guilty context with an error */ - spin_lock_irqsave(&ring->fence_drv.lock, flags); + rcu_read_lock(); do { last_seq++; last_seq &= ring->fence_drv.num_fences_mask;
ptr = &ring->fence_drv.fences[last_seq]; - rcu_read_lock(); - unprocessed = rcu_dereference(*ptr); + unprocessed = dma_fence_get_rcu(*ptr); + + if (!unprocessed) + continue;
- if (unprocessed && !dma_fence_is_signaled_locked(unprocessed)) { + dma_fence_lock_irqsave(unprocessed, flags); + if (dma_fence_is_signaled_locked(unprocessed)) { fence = container_of(unprocessed, struct amdgpu_fence, base);
if (fence == af) @@ -731,9 +736,10 @@ void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af) else if (fence->context == af->context) dma_fence_set_error(&fence->base, -ECANCELED); } - rcu_read_unlock(); + dma_fence_unlock_irqrestore(unprocessed, flags); + dma_fence_put(unprocessed); } while (last_seq != seq); - spin_unlock_irqrestore(&ring->fence_drv.lock, flags); + rcu_read_unlock(); /* signal the guilty fence */ amdgpu_fence_write(ring, (u32)af->base.seqno); amdgpu_fence_process(ring); @@ -823,39 +829,10 @@ static bool amdgpu_fence_enable_signaling(struct dma_fence *f) return true; }
-/** - * amdgpu_fence_free - free up the fence memory - * - * @rcu: RCU callback head - * - * Free up the fence memory after the RCU grace period. - */ -static void amdgpu_fence_free(struct rcu_head *rcu) -{ - struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); - - /* free fence_slab if it's separated fence*/ - kfree(to_amdgpu_fence(f)); -} - -/** - * amdgpu_fence_release - callback that fence can be freed - * - * @f: fence - * - * This function is called when the reference count becomes zero. - * It just RCU schedules freeing up the fence. - */ -static void amdgpu_fence_release(struct dma_fence *f) -{ - call_rcu(&f->rcu, amdgpu_fence_free); -} - static const struct dma_fence_ops amdgpu_fence_ops = { .get_driver_name = amdgpu_fence_get_driver_name, .get_timeline_name = amdgpu_fence_get_timeline_name, .enable_signaling = amdgpu_fence_enable_signaling, - .release = amdgpu_fence_release, };
/* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 7a27c6c4bb44..9cbf63454004 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -125,7 +125,6 @@ struct amdgpu_fence_driver { unsigned irq_type; struct timer_list fallback_timer; unsigned num_fences_mask; - spinlock_t lock; struct dma_fence **fences; };
This should allow amdgpu_fences to outlive the amdgpu module.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@igalia.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c | 3 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h | 1 - 2 files changed, 1 insertion(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c index 23d7d0b0d625..95ee22c43ceb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c @@ -167,9 +167,8 @@ amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr)
ev_fence->evf_mgr = evf_mgr; get_task_comm(ev_fence->timeline_name, current); - spin_lock_init(&ev_fence->lock); dma_fence_init64(&ev_fence->base, &amdgpu_eviction_fence_ops, - &ev_fence->lock, evf_mgr->ev_fence_ctx, + NULL, evf_mgr->ev_fence_ctx, atomic_inc_return(&evf_mgr->ev_fence_seq)); return ev_fence; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h index fcd867b7147d..fb70efb54338 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h @@ -27,7 +27,6 @@
struct amdgpu_eviction_fence { struct dma_fence base; - spinlock_t lock; char timeline_name[TASK_COMM_LEN]; struct amdgpu_eviction_fence_mgr *evf_mgr; };
This should allow amdgpu_vm_tlb_fences to outlive the amdgpu module.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@igalia.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c index 5d26797356a3..27bf1f569830 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c @@ -33,7 +33,6 @@ struct amdgpu_tlb_fence { struct amdgpu_device *adev; struct dma_fence *dependency; struct work_struct work; - spinlock_t lock; uint16_t pasid;
}; @@ -98,9 +97,8 @@ void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev, struct amdgpu_vm *vm f->dependency = *fence; f->pasid = vm->pasid; INIT_WORK(&f->work, amdgpu_tlb_fence_work); - spin_lock_init(&f->lock);
- dma_fence_init64(&f->base, &amdgpu_tlb_fence_ops, &f->lock, + dma_fence_init64(&f->base, &amdgpu_tlb_fence_ops, NULL, vm->tlb_fence_context, atomic64_read(&vm->tlb_seq));
/* TODO: We probably need a separate wq here */
This should allow amdkfd_fences to outlive the amdgpu module.
v2: implement Felix suggestion to lock the fence while signaling it.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 6 +++ .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | 39 ++++++++----------- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 7 ++-- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 4 +- 4 files changed, 27 insertions(+), 29 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 8bdfcde2029b..6254cef04213 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -196,6 +196,7 @@ int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data); #endif #if IS_ENABLED(CONFIG_HSA_AMD) bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); +void amdkfd_fence_signal(struct dma_fence *f); struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo); int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni, @@ -210,6 +211,11 @@ bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) return false; }
+static inline +void amdkfd_fence_signal(struct dma_fence *f) +{ +} + static inline struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c index 09c919f72b6c..f76c3c52a2a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c @@ -127,29 +127,9 @@ static bool amdkfd_fence_enable_signaling(struct dma_fence *f) if (!svm_range_schedule_evict_svm_bo(fence)) return true; } - return false; -} - -/** - * amdkfd_fence_release - callback that fence can be freed - * - * @f: dma_fence - * - * This function is called when the reference count becomes zero. - * Drops the mm_struct reference and RCU schedules freeing up the fence. - */ -static void amdkfd_fence_release(struct dma_fence *f) -{ - struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); - - /* Unconditionally signal the fence. The process is getting - * terminated. - */ - if (WARN_ON(!fence)) - return; /* Not an amdgpu_amdkfd_fence */ - mmdrop(fence->mm); - kfree_rcu(f, rcu); + fence->mm = NULL; + return false; }
/** @@ -174,9 +154,22 @@ bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) return false; }
+void amdkfd_fence_signal(struct dma_fence *f) +{ + struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); + long flags; + + dma_fence_lock_irqsafe(f, flags) + if (fence->mm) { + mmdrop(fence->mm); + fence->mm = NULL; + } + dma_fence_signal_locked(f); + dma_fence_unlock_irqrestore(f, flags) +} + static const struct dma_fence_ops amdkfd_fence_ops = { .get_driver_name = amdkfd_fence_get_driver_name, .get_timeline_name = amdkfd_fence_get_timeline_name, .enable_signaling = amdkfd_fence_enable_signaling, - .release = amdkfd_fence_release, }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index a085faac9fe1..8fac70b839ed 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1173,7 +1173,7 @@ static void kfd_process_wq_release(struct work_struct *work) synchronize_rcu(); ef = rcu_access_pointer(p->ef); if (ef) - dma_fence_signal(ef); + amdkfd_fence_signal(ef);
kfd_process_remove_sysfs(p); kfd_debugfs_remove_process(p); @@ -1990,7 +1990,6 @@ kfd_process_gpuid_from_node(struct kfd_process *p, struct kfd_node *node, static int signal_eviction_fence(struct kfd_process *p) { struct dma_fence *ef; - int ret;
rcu_read_lock(); ef = dma_fence_get_rcu_safe(&p->ef); @@ -1998,10 +1997,10 @@ static int signal_eviction_fence(struct kfd_process *p) if (!ef) return -EINVAL;
- ret = dma_fence_signal(ef); + amdkfd_fence_signal(ef); dma_fence_put(ef);
- return ret; + return 0; }
static void evict_process_worker(struct work_struct *work) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index c30dfb8ec236..566950702b7d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -428,7 +428,7 @@ static void svm_range_bo_release(struct kref *kref)
if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) /* We're not in the eviction worker. Signal the fence. */ - dma_fence_signal(&svm_bo->eviction_fence->base); + amdkfd_fence_signal(&svm_bo->eviction_fence->base); dma_fence_put(&svm_bo->eviction_fence->base); amdgpu_bo_unref(&svm_bo->bo); kfree(svm_bo); @@ -3628,7 +3628,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work) mmap_read_unlock(mm); mmput(mm);
- dma_fence_signal(&svm_bo->eviction_fence->base); + amdkfd_fence_signal(&svm_bo->eviction_fence->base);
/* This is the last reference to svm_bo, after svm_range_vram_node_free * has been called in svm_migrate_vram_to_ram
Hi Christian,
kernel test robot noticed the following build errors:
[auto build test ERROR on next-20251113] [cannot apply to drm-xe/drm-xe-next linus/master v6.18-rc5 v6.18-rc4 v6.18-rc3 v6.18-rc5] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Christian-K-nig/dma-buf-prote... base: next-20251113 patch link: https://lore.kernel.org/r/20251113145332.16805-14-christian.koenig%40amd.com patch subject: [PATCH 13/18] drm/amdgpu: independence for the amdkfd_fence! v2 config: riscv-randconfig-001-20251114 (https://download.01.org/0day-ci/archive/20251114/202511141921.NxnbQ4Zt-lkp@i...) compiler: riscv64-linux-gcc (GCC) 13.4.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251114/202511141921.NxnbQ4Zt-lkp@i...)
If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot lkp@intel.com | Closes: https://lore.kernel.org/oe-kbuild-all/202511141921.NxnbQ4Zt-lkp@intel.com/
All error/warnings (new ones prefixed by >>):
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c: In function 'amdkfd_fence_signal':
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c:162:9: error: implicit declaration of function 'dma_fence_lock_irqsafe'; did you mean 'dma_fence_lock_irqsave'? [-Werror=implicit-function-declaration]
162 | dma_fence_lock_irqsafe(f, flags) | ^~~~~~~~~~~~~~~~~~~~~~ | dma_fence_lock_irqsave
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c:162:41: error: expected ';' before 'if'
162 | dma_fence_lock_irqsafe(f, flags) | ^ | ; 163 | if (fence->mm) { | ~~
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c:169:1: error: expected ';' before '}' token
169 | } | ^
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c:159:37: warning: unused variable 'fence' [-Wunused-variable]
159 | struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); | ^~~~~ cc1: some warnings being treated as errors
vim +162 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
156 157 void amdkfd_fence_signal(struct dma_fence *f) 158 {
159 struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
160 long flags; 161
162 dma_fence_lock_irqsafe(f, flags)
163 if (fence->mm) { 164 mmdrop(fence->mm); 165 fence->mm = NULL; 166 } 167 dma_fence_signal_locked(f); 168 dma_fence_unlock_irqrestore(f, flags)
169 }
170
This should allow amdgpu_userq_fences to outlive the amdgpu module.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 13 +---- .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 54 ++++--------------- .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h | 8 --- 3 files changed, 11 insertions(+), 64 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index cb522d6272d6..6431fb4a24c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -3129,11 +3129,7 @@ static int __init amdgpu_init(void)
r = amdgpu_sync_init(); if (r) - goto error_sync; - - r = amdgpu_userq_fence_slab_init(); - if (r) - goto error_fence; + return r;
DRM_INFO("amdgpu kernel modesetting enabled.\n"); amdgpu_register_atpx_handler(); @@ -3150,12 +3146,6 @@ static int __init amdgpu_init(void)
/* let modprobe override vga console setting */ return pci_register_driver(&amdgpu_kms_pci_driver); - -error_fence: - amdgpu_sync_fini(); - -error_sync: - return r; }
static void __exit amdgpu_exit(void) @@ -3165,7 +3155,6 @@ static void __exit amdgpu_exit(void) amdgpu_unregister_atpx_handler(); amdgpu_acpi_release(); amdgpu_sync_fini(); - amdgpu_userq_fence_slab_fini(); mmu_notifier_synchronize(); amdgpu_xcp_drv_release(); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 99ae1d19b751..08c3da86b36d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -33,26 +33,6 @@ #include "amdgpu_userq_fence.h"
static const struct dma_fence_ops amdgpu_userq_fence_ops; -static struct kmem_cache *amdgpu_userq_fence_slab; - -int amdgpu_userq_fence_slab_init(void) -{ - amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence", - sizeof(struct amdgpu_userq_fence), - 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!amdgpu_userq_fence_slab) - return -ENOMEM; - - return 0; -} - -void amdgpu_userq_fence_slab_fini(void) -{ - rcu_barrier(); - kmem_cache_destroy(amdgpu_userq_fence_slab); -}
static inline struct amdgpu_userq_fence *to_amdgpu_userq_fence(struct dma_fence *f) { @@ -226,7 +206,7 @@ void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv)
static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence **userq_fence) { - *userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC); + *userq_fence = kmalloc(sizeof(**userq_fence), GFP_ATOMIC); return *userq_fence ? 0 : -ENOMEM; }
@@ -242,12 +222,11 @@ static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, if (!fence_drv) return -EINVAL;
- spin_lock_init(&userq_fence->lock); INIT_LIST_HEAD(&userq_fence->link); fence = &userq_fence->base; userq_fence->fence_drv = fence_drv;
- dma_fence_init64(fence, &amdgpu_userq_fence_ops, &userq_fence->lock, + dma_fence_init64(fence, &amdgpu_userq_fence_ops, NULL, fence_drv->context, seq);
amdgpu_userq_fence_driver_get(fence_drv); @@ -317,35 +296,22 @@ static bool amdgpu_userq_fence_signaled(struct dma_fence *f) rptr = amdgpu_userq_fence_read(fence_drv); wptr = fence->base.seqno;
- if (rptr >= wptr) + if (rptr >= wptr) { + amdgpu_userq_fence_driver_put(fence->fence_drv); + fence->fence_drv = NULL; + + kvfree(fence->fence_drv_array); + fence->fence_drv_array = NULL; return true; + }
return false; }
-static void amdgpu_userq_fence_free(struct rcu_head *rcu) -{ - struct dma_fence *fence = container_of(rcu, struct dma_fence, rcu); - struct amdgpu_userq_fence *userq_fence = to_amdgpu_userq_fence(fence); - struct amdgpu_userq_fence_driver *fence_drv = userq_fence->fence_drv; - - /* Release the fence driver reference */ - amdgpu_userq_fence_driver_put(fence_drv); - - kvfree(userq_fence->fence_drv_array); - kmem_cache_free(amdgpu_userq_fence_slab, userq_fence); -} - -static void amdgpu_userq_fence_release(struct dma_fence *f) -{ - call_rcu(&f->rcu, amdgpu_userq_fence_free); -} - static const struct dma_fence_ops amdgpu_userq_fence_ops = { .get_driver_name = amdgpu_userq_fence_get_driver_name, .get_timeline_name = amdgpu_userq_fence_get_timeline_name, .signaled = amdgpu_userq_fence_signaled, - .release = amdgpu_userq_fence_release, };
/** @@ -559,7 +525,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence); if (r) { mutex_unlock(&userq_mgr->userq_mutex); - kmem_cache_free(amdgpu_userq_fence_slab, userq_fence); + kfree(userq_fence); goto put_gobj_write; }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h index d76add2afc77..6f04782f3ea9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h @@ -31,11 +31,6 @@
struct amdgpu_userq_fence { struct dma_fence base; - /* - * This lock is necessary to synchronize the - * userqueue dma fence operations. - */ - spinlock_t lock; struct list_head link; unsigned long fence_drv_array_count; struct amdgpu_userq_fence_driver *fence_drv; @@ -58,9 +53,6 @@ struct amdgpu_userq_fence_driver { char timeline_name[TASK_COMM_LEN]; };
-int amdgpu_userq_fence_slab_init(void); -void amdgpu_userq_fence_slab_fini(void); - void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv); void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv); int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
From: Matthew Brost matthew.brost@intel.com
Preempt, tlb invalidation, and OA fences now use embedded fence lock.
Signed-off-by: Matthew Brost matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_oa.c | 5 +---- drivers/gpu/drm/xe/xe_preempt_fence.c | 3 +-- drivers/gpu/drm/xe/xe_preempt_fence_types.h | 2 -- drivers/gpu/drm/xe/xe_tlb_inval.c | 5 +---- drivers/gpu/drm/xe/xe_tlb_inval_types.h | 2 -- 5 files changed, 3 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 7a13a7bd99a6..41d081781060 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -112,8 +112,6 @@ struct xe_oa_config_bo { struct xe_oa_fence { /* @base: dma fence base */ struct dma_fence base; - /* @lock: lock for the fence */ - spinlock_t lock; /* @work: work to signal @base */ struct delayed_work work; /* @cb: callback to schedule @work */ @@ -1017,8 +1015,7 @@ static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config }
/* Point of no return: initialize and set fence to signal */ - spin_lock_init(&ofence->lock); - dma_fence_init(&ofence->base, &xe_oa_fence_ops, &ofence->lock, 0, 0); + dma_fence_init(&ofence->base, &xe_oa_fence_ops, NULL, 0, 0);
for (i = 0; i < stream->num_syncs; i++) { if (stream->syncs[i].flags & DRM_XE_SYNC_FLAG_SIGNAL) diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c index 7f587ca3947d..75f433aee747 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence.c +++ b/drivers/gpu/drm/xe/xe_preempt_fence.c @@ -145,9 +145,8 @@ xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_exec_queue *q, { list_del_init(&pfence->link); pfence->q = xe_exec_queue_get(q); - spin_lock_init(&pfence->lock); dma_fence_init(&pfence->base, &preempt_fence_ops, - &pfence->lock, context, seqno); + NULL, context, seqno);
return &pfence->base; } diff --git a/drivers/gpu/drm/xe/xe_preempt_fence_types.h b/drivers/gpu/drm/xe/xe_preempt_fence_types.h index ac125c697a41..a98de8d1c723 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence_types.h +++ b/drivers/gpu/drm/xe/xe_preempt_fence_types.h @@ -25,8 +25,6 @@ struct xe_preempt_fence { struct xe_exec_queue *q; /** @preempt_work: work struct which issues preemption */ struct work_struct preempt_work; - /** @lock: dma-fence fence lock */ - spinlock_t lock; /** @error: preempt fence is in error state */ int error; }; diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.c b/drivers/gpu/drm/xe/xe_tlb_inval.c index 918a59e686ea..5c23e76b0241 100644 --- a/drivers/gpu/drm/xe/xe_tlb_inval.c +++ b/drivers/gpu/drm/xe/xe_tlb_inval.c @@ -133,7 +133,6 @@ int xe_gt_tlb_inval_init_early(struct xe_gt *gt) tlb_inval->seqno = 1; INIT_LIST_HEAD(&tlb_inval->pending_fences); spin_lock_init(&tlb_inval->pending_lock); - spin_lock_init(&tlb_inval->lock); INIT_DELAYED_WORK(&tlb_inval->fence_tdr, xe_tlb_inval_fence_timeout);
err = drmm_mutex_init(&xe->drm, &tlb_inval->seqno_lock); @@ -420,10 +419,8 @@ void xe_tlb_inval_fence_init(struct xe_tlb_inval *tlb_inval, { xe_pm_runtime_get_noresume(tlb_inval->xe);
- spin_lock_irq(&tlb_inval->lock); - dma_fence_init(&fence->base, &inval_fence_ops, &tlb_inval->lock, + dma_fence_init(&fence->base, &inval_fence_ops, NULL, dma_fence_context_alloc(1), 1); - spin_unlock_irq(&tlb_inval->lock); INIT_LIST_HEAD(&fence->link); if (stack) set_bit(FENCE_STACK_BIT, &fence->base.flags); diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_types.h b/drivers/gpu/drm/xe/xe_tlb_inval_types.h index 8f8b060e9005..80e893950099 100644 --- a/drivers/gpu/drm/xe/xe_tlb_inval_types.h +++ b/drivers/gpu/drm/xe/xe_tlb_inval_types.h @@ -104,8 +104,6 @@ struct xe_tlb_inval { struct delayed_work fence_tdr; /** @job_wq: schedules TLB invalidation jobs */ struct workqueue_struct *job_wq; - /** @tlb_inval.lock: protects TLB invalidation fences */ - spinlock_t lock; };
/**
From: Matthew Brost matthew.brost@intel.com
Helps with disconnecting fences from Xe module.
Signed-off-by: Matthew Brost matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_hw_fence.c | 25 ++----------------------- drivers/gpu/drm/xe/xe_hw_fence.h | 3 --- drivers/gpu/drm/xe/xe_module.c | 5 ----- 3 files changed, 2 insertions(+), 31 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c index 3456bec93c70..5edcf057aceb 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence.c +++ b/drivers/gpu/drm/xe/xe_hw_fence.c @@ -6,7 +6,6 @@ #include "xe_hw_fence.h"
#include <linux/device.h> -#include <linux/slab.h>
#include "xe_bo.h" #include "xe_device.h" @@ -16,28 +15,9 @@ #include "xe_map.h" #include "xe_trace.h"
-static struct kmem_cache *xe_hw_fence_slab; - -int __init xe_hw_fence_module_init(void) -{ - xe_hw_fence_slab = kmem_cache_create("xe_hw_fence", - sizeof(struct xe_hw_fence), 0, - SLAB_HWCACHE_ALIGN, NULL); - if (!xe_hw_fence_slab) - return -ENOMEM; - - return 0; -} - -void xe_hw_fence_module_exit(void) -{ - rcu_barrier(); - kmem_cache_destroy(xe_hw_fence_slab); -} - static struct xe_hw_fence *fence_alloc(void) { - return kmem_cache_zalloc(xe_hw_fence_slab, GFP_KERNEL); + return kzalloc(sizeof(struct xe_hw_fence), GFP_KERNEL); }
static void fence_free(struct rcu_head *rcu) @@ -45,8 +25,7 @@ static void fence_free(struct rcu_head *rcu) struct xe_hw_fence *fence = container_of(rcu, struct xe_hw_fence, dma.rcu);
- if (!WARN_ON_ONCE(!fence)) - kmem_cache_free(xe_hw_fence_slab, fence); + kfree(fence); }
static void hw_fence_irq_run_cb(struct irq_work *work) diff --git a/drivers/gpu/drm/xe/xe_hw_fence.h b/drivers/gpu/drm/xe/xe_hw_fence.h index f13a1c4982c7..96f34332fd8d 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence.h +++ b/drivers/gpu/drm/xe/xe_hw_fence.h @@ -11,9 +11,6 @@ /* Cause an early wrap to catch wrapping errors */ #define XE_FENCE_INITIAL_SEQNO (-127)
-int xe_hw_fence_module_init(void); -void xe_hw_fence_module_exit(void); - void xe_hw_fence_irq_init(struct xe_hw_fence_irq *irq); void xe_hw_fence_irq_finish(struct xe_hw_fence_irq *irq); void xe_hw_fence_irq_run(struct xe_hw_fence_irq *irq); diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index d08338fc3bc1..32517bcd533c 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -12,7 +12,6 @@
#include "xe_drv.h" #include "xe_configfs.h" -#include "xe_hw_fence.h" #include "xe_pci.h" #include "xe_pm.h" #include "xe_observation.h" @@ -114,10 +113,6 @@ static const struct init_funcs init_funcs[] = { .init = xe_configfs_init, .exit = xe_configfs_exit, }, - { - .init = xe_hw_fence_module_init, - .exit = xe_hw_fence_module_exit, - }, { .init = xe_sched_job_module_init, .exit = xe_sched_job_module_exit,
From: Matthew Brost matthew.brost@intel.com
Help disconnect fences from the Xe module.
Signed-off-by: Matthew Brost matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_exec_queue.c | 2 +- drivers/gpu/drm/xe/xe_gt.c | 7 ++++-- drivers/gpu/drm/xe/xe_gt_types.h | 2 +- drivers/gpu/drm/xe/xe_hw_engine.c | 2 +- drivers/gpu/drm/xe/xe_hw_fence.c | 35 ++++++++++++++++++++++++-- drivers/gpu/drm/xe/xe_hw_fence.h | 2 +- drivers/gpu/drm/xe/xe_hw_fence_types.h | 4 +++ 7 files changed, 46 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 8724f8de67e2..68ec3ba4b995 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -140,7 +140,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, q->width = width; q->msix_vec = XE_IRQ_DEFAULT_MSIX; q->logical_mask = logical_mask; - q->fence_irq = >->fence_irq[hwe->class]; + q->fence_irq = gt->fence_irq[hwe->class]; q->ring_ops = gt->ring_ops[hwe->class]; q->ops = gt->exec_queue_ops; INIT_LIST_HEAD(&q->lr.link); diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 6d479948bf21..e0461a36771d 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -615,7 +615,8 @@ static void xe_gt_fini(void *arg) xe_pm_runtime_put(gt_to_xe(gt));
for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) - xe_hw_fence_irq_finish(>->fence_irq[i]); + if (gt->fence_irq[i]) + xe_hw_fence_irq_finish(gt->fence_irq[i]);
xe_gt_disable_host_l2_vram(gt); } @@ -629,7 +630,9 @@ int xe_gt_init(struct xe_gt *gt)
for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) { gt->ring_ops[i] = xe_ring_ops_get(gt, i); - xe_hw_fence_irq_init(>->fence_irq[i]); + gt->fence_irq[i] = xe_hw_fence_irq_init(); + if (!gt->fence_irq[i]) + return -ENOMEM; }
err = devm_add_action_or_reset(gt_to_xe(gt)->drm.dev, xe_gt_fini, gt); diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 0a728180b6fe..88a05edab51b 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -240,7 +240,7 @@ struct xe_gt { const struct xe_ring_ops *ring_ops[XE_ENGINE_CLASS_MAX];
/** @fence_irq: fence IRQs (1 per engine class) */ - struct xe_hw_fence_irq fence_irq[XE_ENGINE_CLASS_MAX]; + struct xe_hw_fence_irq *fence_irq[XE_ENGINE_CLASS_MAX];
/** @default_lrc: default LRC state */ void *default_lrc[XE_ENGINE_CLASS_MAX]; diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 6a9e2a4272dd..480972c3da84 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -524,7 +524,7 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe, info->irq_offset; hwe->domain = info->domain; hwe->name = info->name; - hwe->fence_irq = >->fence_irq[info->class]; + hwe->fence_irq = gt->fence_irq[info->class]; hwe->engine_id = id;
hwe->eclass = >->eclass[hwe->class]; diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c index 5edcf057aceb..f5fad4426729 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence.c +++ b/drivers/gpu/drm/xe/xe_hw_fence.c @@ -15,6 +15,25 @@ #include "xe_map.h" #include "xe_trace.h"
+static void xe_hw_fence_irq_destroy(struct kref *ref) +{ + struct xe_hw_fence_irq *irq = container_of(ref, typeof(*irq), refcount); + + kfree(irq); +} + +static void xe_hw_fence_irq_put(struct xe_hw_fence_irq *irq) +{ + if (irq) + kref_put(&irq->refcount, xe_hw_fence_irq_destroy); +} + +static struct xe_hw_fence_irq *xe_hw_fence_irq_get(struct xe_hw_fence_irq *irq) +{ + kref_get(&irq->refcount); + return irq; +} + static struct xe_hw_fence *fence_alloc(void) { return kzalloc(sizeof(struct xe_hw_fence), GFP_KERNEL); @@ -25,6 +44,7 @@ static void fence_free(struct rcu_head *rcu) struct xe_hw_fence *fence = container_of(rcu, struct xe_hw_fence, dma.rcu);
+ xe_hw_fence_irq_put(fence->irq); kfree(fence); }
@@ -52,12 +72,20 @@ static void hw_fence_irq_run_cb(struct irq_work *work) dma_fence_end_signalling(tmp); }
-void xe_hw_fence_irq_init(struct xe_hw_fence_irq *irq) +struct xe_hw_fence_irq *xe_hw_fence_irq_init(void) { + struct xe_hw_fence_irq *irq = kzalloc(sizeof(*irq), GFP_KERNEL); + + if (!irq) + return NULL; + + kref_init(&irq->refcount); spin_lock_init(&irq->lock); init_irq_work(&irq->work, hw_fence_irq_run_cb); INIT_LIST_HEAD(&irq->pending); irq->enabled = true; + + return irq; }
void xe_hw_fence_irq_finish(struct xe_hw_fence_irq *irq) @@ -82,6 +110,8 @@ void xe_hw_fence_irq_finish(struct xe_hw_fence_irq *irq)
/* Safe release of the irq->lock used in dma_fence_init. */ synchronize_rcu(); + + xe_hw_fence_irq_put(irq); }
void xe_hw_fence_irq_run(struct xe_hw_fence_irq *irq) @@ -233,13 +263,14 @@ void xe_hw_fence_free(struct dma_fence *fence) void xe_hw_fence_init(struct dma_fence *fence, struct xe_hw_fence_ctx *ctx, struct iosys_map seqno_map) { - struct xe_hw_fence *hw_fence = + struct xe_hw_fence *hw_fence = container_of(fence, typeof(*hw_fence), dma);
hw_fence->xe = gt_to_xe(ctx->gt); snprintf(hw_fence->name, sizeof(hw_fence->name), "%s", ctx->name); hw_fence->seqno_map = seqno_map; INIT_LIST_HEAD(&hw_fence->irq_link); + hw_fence->irq = xe_hw_fence_irq_get(ctx->irq);
dma_fence_init(fence, &xe_hw_fence_ops, &ctx->irq->lock, ctx->dma_fence_ctx, ctx->next_seqno++); diff --git a/drivers/gpu/drm/xe/xe_hw_fence.h b/drivers/gpu/drm/xe/xe_hw_fence.h index 96f34332fd8d..fa1620203b90 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence.h +++ b/drivers/gpu/drm/xe/xe_hw_fence.h @@ -11,7 +11,7 @@ /* Cause an early wrap to catch wrapping errors */ #define XE_FENCE_INITIAL_SEQNO (-127)
-void xe_hw_fence_irq_init(struct xe_hw_fence_irq *irq); +struct xe_hw_fence_irq *xe_hw_fence_irq_init(void); void xe_hw_fence_irq_finish(struct xe_hw_fence_irq *irq); void xe_hw_fence_irq_run(struct xe_hw_fence_irq *irq); void xe_hw_fence_irq_stop(struct xe_hw_fence_irq *irq); diff --git a/drivers/gpu/drm/xe/xe_hw_fence_types.h b/drivers/gpu/drm/xe/xe_hw_fence_types.h index 58a8d09afe5c..0682c12520e9 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence_types.h +++ b/drivers/gpu/drm/xe/xe_hw_fence_types.h @@ -28,6 +28,8 @@ struct xe_hw_fence_irq { struct irq_work work; /** @pending: list of pending xe_hw_fences */ struct list_head pending; + /** @refcount: ref count of this exec queue */ + struct kref refcount; /** @enabled: fence signaling enabled */ bool enabled; }; @@ -62,6 +64,8 @@ struct xe_hw_fence_ctx { struct xe_hw_fence { /** @dma: base dma fence for hardware fence context */ struct dma_fence dma; + /** @irq: fence irq handler */ + struct xe_hw_fence_irq *irq; /** @xe: Xe device for hw fence driver name */ struct xe_device *xe; /** @name: name of hardware fence context */
From: Matthew Brost matthew.brost@intel.com
Be safe when dereferencing fence->xe.
Signed-off-by: Matthew Brost matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_hw_fence.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c index f5fad4426729..8181dfc628e4 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence.c +++ b/drivers/gpu/drm/xe/xe_hw_fence.c @@ -159,9 +159,7 @@ static struct xe_hw_fence_irq *xe_hw_fence_irq(struct xe_hw_fence *fence)
static const char *xe_hw_fence_get_driver_name(struct dma_fence *dma_fence) { - struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); - - return dev_name(fence->xe->drm.dev); + return "xe"; }
static const char *xe_hw_fence_get_timeline_name(struct dma_fence *dma_fence) @@ -175,10 +173,13 @@ static bool xe_hw_fence_signaled(struct dma_fence *dma_fence) { struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); struct xe_device *xe = fence->xe; - u32 seqno = xe_map_rd(xe, &fence->seqno_map, 0, u32); + u32 seqno; + + if (dma_fence->error) + return true;
- return dma_fence->error || - !__dma_fence_is_later(dma_fence, dma_fence->seqno, seqno); + seqno = xe_map_rd(xe, &fence->seqno_map, 0, u32); + return !__dma_fence_is_later(dma_fence, dma_fence->seqno, seqno); }
static bool xe_hw_fence_enable_signaling(struct dma_fence *dma_fence)
linaro-mm-sig@lists.linaro.org