We clear the callback list on kref_put so that by the time we
release the fence it is unused. No one should be adding to the cb_list
that they don't themselves hold a reference for.
This small change is actually making the structure 16% smaller.
v2: add the comment to the code as well.
Signed-off-by: Christian König <christian.koenig(a)amd.com>
Reviewed-by: Chris Wilson <chris(a)chris-wilson.co.uk>
---
include/linux/dma-fence.h | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 05d29dbc7e62..bea1d05cf51e 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -65,8 +65,14 @@ struct dma_fence_cb;
struct dma_fence {
struct kref refcount;
const struct dma_fence_ops *ops;
- struct rcu_head rcu;
- struct list_head cb_list;
+ /* We clear the callback list on kref_put so that by the time we
+ * release the fence it is unused. No one should be adding to the cb_list
+ * that they don't themselves hold a reference for.
+ */
+ union {
+ struct rcu_head rcu;
+ struct list_head cb_list;
+ };
spinlock_t *lock;
u64 context;
u64 seqno;
--
2.17.1
We clear the callback list on kref_put so that by the time we
release the fence it is unused. No one should be adding to the cb_list
that they don't themselves hold a reference for.
This small change is actually making the structure 16% smaller.
Signed-off-by: Christian König <christian.koenig(a)amd.com>
Reviewed-by: Chris Wilson <chris(a)chris-wilson.co.uk>
---
include/linux/dma-fence.h | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 05d29dbc7e62..3985c72cd0c2 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -65,8 +65,10 @@ struct dma_fence_cb;
struct dma_fence {
struct kref refcount;
const struct dma_fence_ops *ops;
- struct rcu_head rcu;
- struct list_head cb_list;
+ union {
+ struct rcu_head rcu;
+ struct list_head cb_list;
+ };
spinlock_t *lock;
u64 context;
u64 seqno;
--
2.17.1
When reservation_object_add_shared_fence is replacing an old fence with a new
one we should not drop the old one before the new one is in place.
Otherwise other cores can busy wait for the new one to appear.
Signed-off-by: Christian König <christian.koenig(a)amd.com>
---
drivers/dma-buf/reservation.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c
index c71b85c8c159..d59207ca72d2 100644
--- a/drivers/dma-buf/reservation.c
+++ b/drivers/dma-buf/reservation.c
@@ -196,6 +196,7 @@ void reservation_object_add_shared_fence(struct reservation_object *obj,
struct dma_fence *fence)
{
struct reservation_object_list *fobj;
+ struct dma_fence *old;
unsigned int i, count;
dma_fence_get(fence);
@@ -209,18 +210,16 @@ void reservation_object_add_shared_fence(struct reservation_object *obj,
write_seqcount_begin(&obj->seq);
for (i = 0; i < count; ++i) {
- struct dma_fence *old_fence;
- old_fence = rcu_dereference_protected(fobj->shared[i],
- reservation_object_held(obj));
- if (old_fence->context == fence->context ||
- dma_fence_is_signaled(old_fence)) {
- dma_fence_put(old_fence);
+ old = rcu_dereference_protected(fobj->shared[i],
+ reservation_object_held(obj));
+ if (old->context == fence->context ||
+ dma_fence_is_signaled(old))
goto replace;
- }
}
BUG_ON(fobj->shared_count >= fobj->shared_max);
+ old = NULL;
count++;
replace:
@@ -230,6 +229,7 @@ void reservation_object_add_shared_fence(struct reservation_object *obj,
write_seqcount_end(&obj->seq);
preempt_enable();
+ dma_fence_put(old);
}
EXPORT_SYMBOL(reservation_object_add_shared_fence);
--
2.17.1
The reservation object should be capable of handling its internal memory
management itself. And since we search for a free slot to add the fence
from the beginning this is actually a waste of time and only minimal helpful.
Drop it to allow removal of the seqno handling in the reservation object.
This essentially reverts commit "drm/i915: Remove completed fences after a wait".
Signed-off-by: Christian König <christian.koenig(a)amd.com>
---
drivers/gpu/drm/i915/gem/i915_gem_wait.c | 27 ------------------------
1 file changed, 27 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
index 26ec6579b7cd..bb64ec6bef8e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
@@ -35,9 +35,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
unsigned int flags,
long timeout)
{
- unsigned int seq = __read_seqcount_begin(&resv->seq);
struct dma_fence *excl;
- bool prune_fences = false;
if (flags & I915_WAIT_ALL) {
struct dma_fence **shared;
@@ -61,17 +59,6 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
for (; i < count; i++)
dma_fence_put(shared[i]);
kfree(shared);
-
- /*
- * If both shared fences and an exclusive fence exist,
- * then by construction the shared fences must be later
- * than the exclusive fence. If we successfully wait for
- * all the shared fences, we know that the exclusive fence
- * must all be signaled. If all the shared fences are
- * signaled, we can prune the array and recover the
- * floating references on the fences/requests.
- */
- prune_fences = count && timeout >= 0;
} else {
excl = reservation_object_get_excl_rcu(resv);
}
@@ -80,20 +67,6 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
timeout = i915_gem_object_wait_fence(excl, flags, timeout);
dma_fence_put(excl);
-
- /*
- * Opportunistically prune the fences iff we know they have *all* been
- * signaled and that the reservation object has not been changed (i.e.
- * no new fences have been added).
- */
- if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) {
- if (reservation_object_trylock(resv)) {
- if (!__read_seqcount_retry(&resv->seq, seq))
- reservation_object_add_excl_fence(resv, NULL);
- reservation_object_unlock(resv);
- }
- }
-
return timeout;
}
--
2.17.1