This is the new dma_fence_array based container for shared fences in the dma_resv object.
Advantage of this approach is that you can grab a reference to the current set of shared fences at any time, which allows us to drop the sequence number increment and makes the whole RCU handling much more easier.
Disadvantage is that RCU users now have to grab a reference instead of using the sequence counter. As far as I can see i915 was actually the only driver doing this.
So we optimize for adding more fences instead of reading them now.
Another behavior change worth noting is that the shared fences are now only visible after unlocking the dma_resv object or calling dma_resv_fences_commit() manually.
Please review and/or comment, Christian.
The function is supposed to give a hint even if signaling is not enabled.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-fence-array.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c index d3fbd950be94..52068ee5eb35 100644 --- a/drivers/dma-buf/dma-fence-array.c +++ b/drivers/dma-buf/dma-fence-array.c @@ -103,8 +103,18 @@ static bool dma_fence_array_enable_signaling(struct dma_fence *fence) static bool dma_fence_array_signaled(struct dma_fence *fence) { struct dma_fence_array *array = to_dma_fence_array(fence); + int i, num_pending;
- return atomic_read(&array->num_pending) <= 0; + num_pending = atomic_read(&array->num_pending); + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags)) + return num_pending <= 0; + + for (i = 0; i < array->num_fences; ++i) + if (dma_fence_is_signaled(array->fences[i]) && + --num_pending == 0) + return true; + + return false; }
static void dma_fence_array_release(struct dma_fence *fence)
Quoting Christian König (2019-08-26 15:57:23)
The function is supposed to give a hint even if signaling is not enabled.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/dma-fence-array.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c index d3fbd950be94..52068ee5eb35 100644 --- a/drivers/dma-buf/dma-fence-array.c +++ b/drivers/dma-buf/dma-fence-array.c @@ -103,8 +103,18 @@ static bool dma_fence_array_enable_signaling(struct dma_fence *fence) static bool dma_fence_array_signaled(struct dma_fence *fence) { struct dma_fence_array *array = to_dma_fence_array(fence);
int i, num_pending;
return atomic_read(&array->num_pending) <= 0;
num_pending = atomic_read(&array->num_pending);
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags))
return num_pending <= 0;
for (i = 0; i < array->num_fences; ++i)
if (dma_fence_is_signaled(array->fences[i]) &&
--num_pending == 0)
return true;
num_fences may be 0 (it's not rejected in construction and works currently as a simple always-signaled stub).
if (!test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags)) { for (i = 0; i < array->num_fences; ++i) if (dma_fence_is_signaled(array->fences[i]) && --num_pending == 0) break; }
return num_pending <= 0;
A bit surprising that this wasn't already the case.
Signed-off-by: Christian König christian.koenig@amd.com --- include/linux/dma-fence-array.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index 303dd712220f..f99cd7eb24e0 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -68,7 +68,7 @@ static inline bool dma_fence_is_array(struct dma_fence *fence) static inline struct dma_fence_array * to_dma_fence_array(struct dma_fence *fence) { - if (fence->ops != &dma_fence_array_ops) + if (!fence || fence->ops != &dma_fence_array_ops) return NULL;
return container_of(fence, struct dma_fence_array, base);
This seperates allocation and initialization of the dma_fence_array object and allows allocating the fence array together with the dma_fence_array.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-fence-array.c | 101 ++++++++++++++++++++---------- include/linux/dma-fence-array.h | 50 +++++++++++++-- 2 files changed, 115 insertions(+), 36 deletions(-)
diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c index 52068ee5eb35..4664607f0abc 100644 --- a/drivers/dma-buf/dma-fence-array.c +++ b/drivers/dma-buf/dma-fence-array.c @@ -119,14 +119,7 @@ static bool dma_fence_array_signaled(struct dma_fence *fence)
static void dma_fence_array_release(struct dma_fence *fence) { - struct dma_fence_array *array = to_dma_fence_array(fence); - unsigned i; - - for (i = 0; i < array->num_fences; ++i) - dma_fence_put(array->fences[i]); - - kfree(array->fences); - dma_fence_free(fence); + dma_fence_array_free(container_of(fence, struct dma_fence_array, base)); }
const struct dma_fence_ops dma_fence_array_ops = { @@ -139,52 +132,96 @@ const struct dma_fence_ops dma_fence_array_ops = { EXPORT_SYMBOL(dma_fence_array_ops);
/** - * dma_fence_array_create - Create a custom fence array + * dma_fence_array_alloc - Allocate a custom fence array * @num_fences: [in] number of fences to add in the array - * @fences: [in] array containing the fences - * @context: [in] fence context to use - * @seqno: [in] sequence number to use - * @signal_on_any: [in] signal on any fence in the array + * @fences: [in] optional array containing the fences * - * Allocate a dma_fence_array object and initialize the base fence with - * dma_fence_init(). - * In case of error it returns NULL. + * Allocate a dma_fence_array object, in case of error it returns NULL. * - * The caller should allocate the fences array with num_fences size - * and fill it with the fences it wants to add to the object. Ownership of this - * array is taken and dma_fence_put() is used on each fence on release. - * - * If @signal_on_any is true the fence array signals if any fence in the array - * signals, otherwise it signals when all fences in the array signal. + * The fences array is optional and if NULL allocated together with the + * dma_fence_array object. */ -struct dma_fence_array *dma_fence_array_create(int num_fences, - struct dma_fence **fences, - u64 context, unsigned seqno, - bool signal_on_any) +struct dma_fence_array *dma_fence_array_alloc(int num_fences, + struct dma_fence **fences) { struct dma_fence_array *array; size_t size = sizeof(*array);
/* Allocate the callback structures behind the array. */ size += num_fences * sizeof(struct dma_fence_array_cb); + + /* Allocate the fences structures behind the callbacks */ + if (!fences) + size += num_fences * sizeof(struct dma_fence *); + array = kzalloc(size, GFP_KERNEL); if (!array) return NULL;
+ if (!fences) { + struct dma_fence_array_cb *cb = (void *)(&array[1]); + + num_fences = dma_fence_array_max_fences(array); + fences = (void *)(&cb[num_fences]); + } + array->fences = fences; + return array; +} +EXPORT_SYMBOL(dma_fence_array_alloc); + +/** + * dma_fence_array_init - init a custom fence array + * @array: [in] the pre-allocated array to init + * @context: [in] fence context to use + * @seqno: [in] sequence number to use + * @signal_on_any: [in] signal on any fence in the array + * + * Initialize the base fence with dma_fence_init(). + * + * The caller should allocate the fences array with num_fences size + * and fill it with the fences it wants to add to the object. Ownership of this + * array is taken and dma_fence_put() is used on each fence on release. + * + * If @signal_on_any is true the fence array signals if any fence in the array + * signals, otherwise it signals when all fences in the array signal. + */ +void dma_fence_array_init(struct dma_fence_array *array, u64 context, + unsigned int seqno, bool signal_on_any) +{ spin_lock_init(&array->lock); dma_fence_init(&array->base, &dma_fence_array_ops, &array->lock, context, seqno); init_irq_work(&array->work, irq_dma_fence_array_work);
- array->num_fences = num_fences; - atomic_set(&array->num_pending, signal_on_any ? 1 : num_fences); - array->fences = fences; - + atomic_set(&array->num_pending, signal_on_any ? 1 : array->num_fences); array->base.error = PENDING_ERROR; +} +EXPORT_SYMBOL(dma_fence_array_init);
- return array; +/** + * dma_fence_array_free - free a dma_fence_array object + * @array: the object to free + * + * The a dma_fence_array and drop all references to the fences it contains. + */ +void dma_fence_array_free(struct dma_fence_array *array) +{ + unsigned i; + + if (!array) + return; + + for (i = 0; i < array->num_fences; ++i) + dma_fence_put(array->fences[i]); + + /* Check if fences are part of the array */ + if ((u8 *)array->fences < (u8 *)array || + (u8 *)array->fences > ((u8 *)array) + ksize(array)) + kfree(array->fences); + + dma_fence_free(&array->base); } -EXPORT_SYMBOL(dma_fence_array_create); +EXPORT_SYMBOL(dma_fence_array_free);
/** * dma_fence_match_context - Check if all fences are from the given context diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index f99cd7eb24e0..be85c06b524d 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -14,6 +14,7 @@
#include <linux/dma-fence.h> #include <linux/irq_work.h> +#include <linux/slab.h>
/** * struct dma_fence_array_cb - callback helper for fence array @@ -74,10 +75,51 @@ to_dma_fence_array(struct dma_fence *fence) return container_of(fence, struct dma_fence_array, base); }
-struct dma_fence_array *dma_fence_array_create(int num_fences, - struct dma_fence **fences, - u64 context, unsigned seqno, - bool signal_on_any); +/** + * dma_fence_array_max_fences - calculate maximum number of fences + * @array: [in] the dma_fence_array to use + * + * Returns the maximum number of fences a dma_fence_array can store. + */ +static inline unsigned int +dma_fence_array_max_fences(struct dma_fence_array *array) +{ + return (ksize(array) - sizeof(*array)) / + (sizeof(struct dma_fence_array_cb) + + sizeof(struct dma_fence *)); +} + +struct dma_fence_array *dma_fence_array_alloc(int num_fences, + struct dma_fence **fences); +void dma_fence_array_init(struct dma_fence_array *array, u64 context, + unsigned int seqno, bool signal_on_any); +void dma_fence_array_free(struct dma_fence_array *array); + +/** + * dma_fence_array_create - Create a custom fence array + * @num_fences: [in] number of fences to add in the array + * @fences: [in] array containing the fences + * @context: [in] fence context to use + * @seqno: [in] sequence number to use + * @signal_on_any: [in] signal on any fence in the array + * + * Wrapper around dma_fence_array_alloc and dma_fence_array_init. Returns NULL + * on allocation failure. + */ +static inline struct dma_fence_array * +dma_fence_array_create(int num_fences, struct dma_fence **fences, u64 context, + unsigned seqno, bool signal_on_any) +{ + struct dma_fence_array *array; + + array = dma_fence_array_alloc(num_fences, fences); + if (!array) + return NULL; + + array->num_fences = num_fences; + dma_fence_array_init(array, context, seqno, signal_on_any); + return array; +}
bool dma_fence_match_context(struct dma_fence *fence, u64 context);
Try to recycle an dma_fence_array object by dropping the last reference to it without freeing it.
v2: fix the WARN_ON_ONCE recycle test after rebase
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-fence-array.c | 28 ++++++++++++++++++++++++++++ include/linux/dma-fence-array.h | 1 + 2 files changed, 29 insertions(+)
diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c index 4664607f0abc..ea7713b40514 100644 --- a/drivers/dma-buf/dma-fence-array.c +++ b/drivers/dma-buf/dma-fence-array.c @@ -198,6 +198,34 @@ void dma_fence_array_init(struct dma_fence_array *array, u64 context, } EXPORT_SYMBOL(dma_fence_array_init);
+/** + * dma_fence_array_reuse - dummy for dma_fence_array_recycle + */ +static void dma_fence_array_reuse(struct kref *kref) +{ + struct dma_fence_array *array = container_of(kref, typeof(*array), + base.refcount); + + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &array->base.flags)) + WARN_ON_ONCE(!list_empty(&array->base.cb_list)); +} + +/** + * dma_fence_array_try_reuse - try to reuse a dma_fence_array object + * @array: array which we should try to reuse + * + * Try to drop the last reference to an dma_fence_array and so allow it to be + * reused. + * + * Returns true if this was the last reference then caller can reuse the array. + * In this case the array is reset into a state where it can be used with + * dma_fence_array_init again. + */ +bool dma_fence_array_recycle(struct dma_fence_array *array) +{ + return kref_put(&array->base.refcount, dma_fence_array_reuse); +} + /** * dma_fence_array_free - free a dma_fence_array object * @array: the object to free diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index be85c06b524d..35d1d1e7c93b 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -93,6 +93,7 @@ struct dma_fence_array *dma_fence_array_alloc(int num_fences, struct dma_fence **fences); void dma_fence_array_init(struct dma_fence_array *array, u64 context, unsigned int seqno, bool signal_on_any); +bool dma_fence_array_recycle(struct dma_fence_array *array); void dma_fence_array_free(struct dma_fence_array *array);
/**
Makes it easier to extract the fences in a dma_fence_array.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-fence-array.c | 42 +++++++++++++++++++++++++++++++ include/linux/dma-fence-array.h | 24 ++++++++++++++++++ 2 files changed, 66 insertions(+)
diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c index ea7713b40514..3bc2663a3f30 100644 --- a/drivers/dma-buf/dma-fence-array.c +++ b/drivers/dma-buf/dma-fence-array.c @@ -276,3 +276,45 @@ bool dma_fence_match_context(struct dma_fence *fence, u64 context) return true; } EXPORT_SYMBOL(dma_fence_match_context); + +/** + * dma_fence_array_first - return the first fence in an array + * @cursor: cursor for the current position + * @array: array with the fences + * + * Returns the first fence in the array or NULL if the array is empty. + * If the array parameter isn't a dma_fence_array return it unmodified. + */ +struct dma_fence *dma_fence_array_first(struct dma_fence_array_cursor *cursor, + struct dma_fence *array) +{ + cursor->array = to_dma_fence_array(array); + if (!cursor->array) + return array; + + if (!cursor->array->num_fences) + return NULL; + + cursor->index = 0; + return cursor->array->fences[cursor->index++]; + +} +EXPORT_SYMBOL(dma_fence_array_first); + +/** + * dma_fence_array_next - return the next fence in the array + * @cursor: cursor for the current position + * + * Returns the next fence from the array or NULL if we don't have any more + */ +struct dma_fence *dma_fence_array_next(struct dma_fence_array_cursor *cursor) +{ + if (!cursor->array) + return NULL; + + if (cursor->index >= cursor->array->num_fences) + return NULL; + + return cursor->array->fences[cursor->index++]; +} +EXPORT_SYMBOL(dma_fence_array_next); diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index 35d1d1e7c93b..2a71fd003dfa 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -124,4 +124,28 @@ dma_fence_array_create(int num_fences, struct dma_fence **fences, u64 context,
bool dma_fence_match_context(struct dma_fence *fence, u64 context);
+/** + * struct dma_fence_array_cursor - cursor for the fences in an array + */ +struct dma_fence_array_cursor { + struct dma_fence_array *array; + unsigned int index; +}; + +struct dma_fence *dma_fence_array_first(struct dma_fence_array_cursor *cursor, + struct dma_fence *array); +struct dma_fence *dma_fence_array_next(struct dma_fence_array_cursor *cursor); + +/** + * dma_fence_array_for_each - walk over all fences in a fence array + * @fence: the current fence + * @cursor: cursor for the walk + * @array: potentitally fence array + * + * Walk over all the fences in the array. + */ +#define dma_fence_array_for_each(fence, cursor, array) \ + for (fence = dma_fence_array_first(&(cursor), array); \ + fence; fence = dma_fence_array_next(&(cursor))) + #endif /* __LINUX_DMA_FENCE_ARRAY_H */
Add a new dma_resv_prune_fences() function to improve memory management.
v2: fix potential NULL deref
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 37 ++++++++++++++++++++++ drivers/gpu/drm/i915/gem/i915_gem_wait.c | 3 +- drivers/gpu/drm/i915/i915_gem_batch_pool.c | 2 +- drivers/gpu/drm/i915/i915_vma.c | 3 +- drivers/gpu/drm/ttm/ttm_bo.c | 2 +- include/linux/dma-resv.h | 1 + 6 files changed, 42 insertions(+), 6 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 42a8f3f11681..59fbcd9f4b01 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -301,6 +301,43 @@ void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence) } EXPORT_SYMBOL(dma_resv_add_excl_fence);
+/** + * dma_resv_prune_fences - prune signaled fences from the resv object + * @obj: the reservation object to prune + * + * Prune all signaled fences from the reservation object. + */ +void dma_resv_prune_fences(struct dma_resv *obj) +{ + struct dma_resv_list *list; + struct dma_fence *fence; + unsigned int i; + + dma_resv_assert_held(obj); + + fence = dma_resv_get_excl(obj); + if (fence && dma_fence_is_signaled(fence)) { + RCU_INIT_POINTER(obj->fence_excl, NULL); + dma_fence_put(fence); + } + + list = dma_resv_get_list(obj); + if (!list) + return; + + for (i = 0; i < list->shared_count; ++i) { + fence = rcu_dereference_protected(list->shared[i], + dma_resv_held(obj)); + + if (!dma_fence_is_signaled(fence)) + continue; + + RCU_INIT_POINTER(list->shared[i], dma_fence_get_stub()); + dma_fence_put(fence); + } +} +EXPORT_SYMBOL(dma_resv_prune_fences); + /** * dma_resv_copy_fences - Copy all fences from src to dst. * @dst: the destination reservation object diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index 8af55cd3e690..a76d36f8fb77 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -85,8 +85,7 @@ i915_gem_object_wait_reservation(struct dma_resv *resv, * signaled. */ if (prune_fences && dma_resv_trylock(resv)) { - if (dma_resv_test_signaled_rcu(resv, true)) - dma_resv_add_excl_fence(resv, NULL); + dma_resv_prune_fences(resv); dma_resv_unlock(resv); }
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c index 5f82a763e64c..274cf5b19fc9 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@ -114,7 +114,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, */ if (rcu_access_pointer(resv->fence)) { dma_resv_lock(resv, NULL); - dma_resv_add_excl_fence(resv, NULL); + dma_resv_prune_fences(resv); dma_resv_unlock(resv); } } diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index ebfd03d117cd..fcbe433a968c 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -100,8 +100,7 @@ static void __i915_vma_retire(struct i915_active *ref)
/* Prune the shared fence arrays iff completely idle (inc. external) */ if (dma_resv_trylock(obj->base.resv)) { - if (dma_resv_test_signaled_rcu(obj->base.resv, true)) - dma_resv_add_excl_fence(obj->base.resv, NULL); + dma_resv_prune_fences(obj->base.resv); dma_resv_unlock(obj->base.resv); }
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 58d1f2b28132..f78f52cc2e6d 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1818,7 +1818,7 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, if (timeout == 0) return -EBUSY;
- dma_resv_add_excl_fence(bo->base.resv, NULL); + dma_resv_prune_fences(bo->base.resv); return 0; } EXPORT_SYMBOL(ttm_bo_wait); diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index ee50d10f052b..03b0f95682b0 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -279,6 +279,7 @@ int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences); void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence);
void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence); +void dma_resv_prune_fences(struct dma_resv *obj);
int dma_resv_get_fences_rcu(struct dma_resv *obj, struct dma_fence **pfence_excl,
Add a new container for fences which internally uses dma_fence_array's to store the fences.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 181 +++++++++++++++++++++++++++++++++++++ include/linux/dma-resv.h | 49 ++++++++++ 2 files changed, 230 insertions(+)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 59fbcd9f4b01..d67eaa3fa650 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -33,6 +33,7 @@ */
#include <linux/dma-resv.h> +#include <linux/dma-fence-array.h> #include <linux/export.h>
/** @@ -55,6 +56,186 @@ EXPORT_SYMBOL(reservation_seqcount_class); const char reservation_seqcount_string[] = "reservation_seqcount"; EXPORT_SYMBOL(reservation_seqcount_string);
+static void dma_resv_fences_init(struct dma_resv_fences *fences) +{ + RCU_INIT_POINTER(fences->fence, NULL); + fences->staged = NULL; +} + +static void dma_resv_fences_fini(struct dma_resv_fences *fences) +{ + /* + * This object should be dead and all references must have + * been released to it, so no need to be protected with rcu. + */ + dma_fence_put(rcu_dereference_protected(fences->fence, true)); + dma_fence_array_free(fences->staged); +} + +/** + * dma_resv_fences_reserve - allocate fence slots + * @fences: fences object where we need slots + * @num_fences: number of fence slots we need + * + * Make sure that we have at least @num_fences + all the existing ones free + * slots in the staged dma_fence_array. + * + * Returns -ENOMEM on allocation failure, 0 otherwise. + */ +int dma_resv_fences_reserve(struct dma_resv *obj, + struct dma_resv_fences *fences, + unsigned int num_fences) +{ + struct dma_fence *fence = dma_resv_fences_deref(obj, fences); + struct dma_fence_array *staged, *array; + unsigned int i; + + array = fences->staged; + if (!array) + array = to_dma_fence_array(fence); + + if (array) + num_fences += array->num_fences; + else if (fence) + num_fences += 1; + + staged = fences->staged; + if (staged && dma_fence_array_max_fences(staged) >= num_fences) + return 0; + + staged = dma_fence_array_alloc(num_fences, NULL); + if (!staged) + return -ENOMEM; + + /* Copy over all fences from the old object */ + if (array) { + for (i = 0; i < array->num_fences; ++i) { + struct dma_fence *f = array->fences[i]; + + staged->fences[i] = dma_fence_get(f); + } + staged->num_fences = array->num_fences; + + } else if (fence) { + staged->fences[0] = dma_fence_get(fence); + staged->num_fences = 1; + + } else { + staged->num_fences = 0; + } + + dma_fence_array_free(fences->staged); + fences->staged = staged; + + return 0; +} +EXPORT_SYMBOL(dma_resv_fences_reserve); + +/** + * dma_resv_fences_set - set the singleton fence + * @fences: fences object where to set the fence + * @fence: singleton fence for the object + * + * Grabs a reference to the new fence and replaces the current singleton fence + * with a new one and drop any staged fences. + */ +void dma_resv_fences_set(struct dma_resv *obj, + struct dma_resv_fences *fences, + struct dma_fence *fence) +{ + struct dma_fence *old = dma_resv_fences_deref(obj, fences); + + rcu_assign_pointer(fences->fence, dma_fence_get(fence)); + dma_fence_array_free(fences->staged); + fences->staged = NULL; + dma_fence_put(old); +} +EXPORT_SYMBOL(dma_resv_fences_set); + +/** + * dma_resv_fences_add - add a fence to the staged fence_array + * @fences: fences object where to add the fence to + * @fence: fence to add + * + * Add a new fence to the staged fence_array. + */ +void dma_resv_fences_add(struct dma_resv_fences *fences, + struct dma_fence *fence) +{ + struct dma_fence_array *staged = fences->staged; + struct dma_fence *old; + unsigned int i; + + for (i = 0; i < staged->num_fences; ++i) { + old = staged->fences[i]; + + if (old->context == fence->context +#ifndef CONFIG_DEBUG_MUTEXES + || dma_fence_is_signaled(old) +#endif + ) { + dma_fence_put(old); + goto replace; + } + } + + BUG_ON(staged->num_fences >= dma_fence_array_max_fences(staged)); + i = staged->num_fences++; + +replace: + staged->fences[i] = dma_fence_get(fence); +} +EXPORT_SYMBOL(dma_resv_fences_add); + +/** + * dma_resv_fences_commit - commit the staged dma_fence_array + * @fences: fences object where the commit should happen + * + * Commit the fences staged in the dma_fence_array and make them visible to + * other threads. + */ +void dma_resv_fences_commit(struct dma_resv *obj, + struct dma_resv_fences *fences) +{ + struct dma_fence *old = dma_resv_fences_deref(obj, fences); + struct dma_fence_array *array = fences->staged, *staged; + unsigned int i; + + if (!array || !array->num_fences) + return; + + fences->staged = NULL; + dma_fence_array_init(array, dma_fence_context_alloc(1), 1, false); + rcu_assign_pointer(fences->fence, &array->base); + + /* Try to recycle the old fence array */ + staged = to_dma_fence_array(old); + if (!staged || dma_fence_array_max_fences(staged) < array->num_fences) { + dma_fence_put(old); + return; + } + + /* Try to drop the last reference */ + if (!dma_fence_array_recycle(staged)) + return; + + /* Make sure the staged array has the latest fences */ + for (i = 0; i < array->num_fences; ++i) { + struct dma_fence *f = array->fences[i]; + + if (f == staged->fences[i]) + continue; + + dma_fence_put(staged->fences[i]); + staged->fences[i] = dma_fence_get(f); + } + for (;i < staged->num_fences; ++i) + dma_fence_put(staged->fences[i]); + staged->num_fences = array->num_fences; + fences->staged = staged; +} +EXPORT_SYMBOL(dma_resv_fences_commit); + /** * dma_resv_list_alloc - allocate fence list * @shared_max: number of fences we need space for diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 03b0f95682b0..c70f13fa6789 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -45,10 +45,33 @@ #include <linux/seqlock.h> #include <linux/rcupdate.h>
+struct dma_resv; + extern struct ww_class reservation_ww_class; extern struct lock_class_key reservation_seqcount_class; extern const char reservation_seqcount_string[];
+/** + * struct dma_resv_fences - fences inside a reservation object + * @fence: the current RCU protected singleton fence + * @staged: optional staged dma_fence_array to replace @fence + */ +struct dma_resv_fences { + struct dma_fence __rcu *fence; + struct dma_fence_array *staged; +}; + +int dma_resv_fences_reserve(struct dma_resv *obj, + struct dma_resv_fences *fences, + unsigned int num_fences); +void dma_resv_fences_set(struct dma_resv *obj, + struct dma_resv_fences *fences, + struct dma_fence *fence); +void dma_resv_fences_add(struct dma_resv_fences *fences, + struct dma_fence *fence); +void dma_resv_fences_commit(struct dma_resv *obj, + struct dma_resv_fences *fences); + /** * struct dma_resv_list - a list of shared fences * @rcu: for internal use @@ -80,6 +103,32 @@ struct dma_resv { #define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base) #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base)
+/** + * dma_resv_fences_deref - get singleton fence + * @obj: the reservation object + * @fences: the fences object + * + * Returns the singleton fence from a resv_fences object. + */ +static inline struct dma_fence * +dma_resv_fences_deref(struct dma_resv *obj, struct dma_resv_fences *fences) +{ + return rcu_dereference_protected(fences->fence, + dma_resv_held(obj)); +} + +/** + * dma_resv_fences_get_rcu - RCU get single fence + * @fences: fences structure where we need to get a reference for + * + * Get a reference to the single fence representing the synchronization. + */ +static inline struct dma_fence * +dma_resv_fences_get_rcu(struct dma_resv_fences *fences) +{ + return dma_fence_get_rcu_safe(&fences->fence); +} + /** * dma_resv_get_list - get the reservation object's * shared fence list, with update-side lock held
We can now grab a reference to all the fences in question, no need for the sequence counter any more.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 22 ++-------------------- drivers/gpu/drm/i915/gem/i915_gem_busy.c | 13 ++++--------- include/linux/dma-resv.h | 3 --- 3 files changed, 6 insertions(+), 32 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 7fa0e86b4e75..51067edff930 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -50,12 +50,6 @@ DEFINE_WD_CLASS(reservation_ww_class); EXPORT_SYMBOL(reservation_ww_class);
-struct lock_class_key reservation_seqcount_class; -EXPORT_SYMBOL(reservation_seqcount_class); - -const char reservation_seqcount_string[] = "reservation_seqcount"; -EXPORT_SYMBOL(reservation_seqcount_string); - static void dma_resv_fences_init(struct dma_resv_fences *fences) { RCU_INIT_POINTER(fences->fence, NULL); @@ -244,8 +238,6 @@ void dma_resv_init(struct dma_resv *obj) { ww_mutex_init(&obj->lock, &reservation_ww_class);
- __seqcount_init(&obj->seq, reservation_seqcount_string, - &reservation_seqcount_class); RCU_INIT_POINTER(obj->fence_excl, NULL); dma_resv_fences_init(&obj->shared); } @@ -321,13 +313,8 @@ void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence)
dma_fence_get(fence);
- preempt_disable(); - write_seqcount_begin(&obj->seq); - /* write_seqcount_begin provides the necessary memory barrier */ - RCU_INIT_POINTER(obj->fence_excl, fence); + rcu_assign_pointer(obj->fence_excl, fence); dma_resv_fences_set(obj, &obj->shared, NULL); - write_seqcount_end(&obj->seq); - preempt_enable();
dma_fence_put(old_fence); } @@ -377,13 +364,8 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src)
old = dma_resv_get_excl(dst);
- preempt_disable(); - write_seqcount_begin(&dst->seq); - /* write_seqcount_begin provides the necessary memory barrier */ - RCU_INIT_POINTER(dst->fence_excl, excl); + rcu_assign_pointer(dst->fence_excl, excl); dma_resv_fences_set(dst, &dst->shared, shared); - write_seqcount_end(&dst->seq); - preempt_enable();
dma_fence_put(old);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 0ef338a8cd9f..34256fff1f90 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -86,7 +86,6 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, struct dma_fence_array_cursor cursor; struct dma_fence *fence, *shared; struct drm_i915_gem_object *obj; - unsigned int seq; int err;
err = -ENOENT; @@ -112,22 +111,18 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * to report the overall busyness. This is what the wait-ioctl does. * */ -retry: - seq = raw_read_seqcount(&obj->base.resv->seq); + shared = dma_resv_fences_get_rcu(&obj->base.resv->shared); + fence = dma_fence_get_rcu_safe(&obj->base.resv->fence_excl);
/* Translate the exclusive fence to the READ *and* WRITE engine */ - args->busy = - busy_check_writer(rcu_dereference(obj->base.resv->fence_excl)); + args->busy = busy_check_writer(fence); + dma_fence_put(fence);
/* Translate shared fences to READ set of engines */ - shared = dma_resv_fences_get_rcu(&obj->base.resv->shared); dma_fence_array_for_each(fence, cursor, shared) args->busy |= busy_check_reader(fence); dma_fence_put(shared);
- if (args->busy && read_seqcount_retry(&obj->base.resv->seq, seq)) - goto retry; - err = 0; out: rcu_read_unlock(); diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 040e3f04a8ad..44f975d772e8 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -48,8 +48,6 @@ struct dma_resv;
extern struct ww_class reservation_ww_class; -extern struct lock_class_key reservation_seqcount_class; -extern const char reservation_seqcount_string[];
/** * struct dma_resv_fences - fences inside a reservation object @@ -81,7 +79,6 @@ void dma_resv_fences_commit(struct dma_resv *obj, */ struct dma_resv { struct ww_mutex lock; - seqcount_t seq;
struct dma_fence __rcu *fence_excl; struct dma_resv_fences shared;
On Mon, Aug 26, 2019 at 04:57:22PM +0200, Christian König wrote:
This is the new dma_fence_array based container for shared fences in the dma_resv object.
Advantage of this approach is that you can grab a reference to the current set of shared fences at any time, which allows us to drop the sequence number increment and makes the whole RCU handling much more easier.
Disadvantage is that RCU users now have to grab a reference instead of using the sequence counter. As far as I can see i915 was actually the only driver doing this.
So we optimize for adding more fences instead of reading them now.
Another behavior change worth noting is that the shared fences are now only visible after unlocking the dma_resv object or calling dma_resv_fences_commit() manually.
I think more specific point for publishing fences makes a lot of sense, so this sounds like a solid improvement on the dma_resv api. I'm working on some dma_fence instrumentation where that at least might be useful.
/me back to burried state
Cheers, Daniel
linaro-mm-sig@lists.linaro.org