Hi guys,
a few more bug fixes, looks like the more selftests I add the more odies I find.
Assuming the CI tests now pass I will start pushing patches I've already got an rb for to drm-misc-next.
Please review and/or comment, Christian.
Abstract the complexity of iterating over all the fences in a dma_resv object.
The new loop handles the whole RCU and retry dance and returns only fences where we can be sure we grabbed the right one.
v2: fix accessing the shared fences while they might be freed, improve kerneldoc, rename _cursor to _iter, add dma_resv_iter_is_exclusive, add dma_resv_iter_begin/end
v3: restructor the code, move rcu_read_lock()/unlock() into the iterator, add dma_resv_iter_is_restarted()
v4: fix NULL deref when no explicit fence exists, drop superflous rcu_read_lock()/unlock() calls.
v5: fix typos in the documentation
v6: fix coding error when excl fence is NULL
v7: one more logic fix
v8: fix index check in dma_resv_iter_is_exclusive()
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@intel.com (v7) --- drivers/dma-buf/dma-resv.c | 100 +++++++++++++++++++++++++++++++++++++ include/linux/dma-resv.h | 95 +++++++++++++++++++++++++++++++++++ 2 files changed, 195 insertions(+)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 84fbe60629e3..3cbcf66a137e 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -323,6 +323,106 @@ void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence) } EXPORT_SYMBOL(dma_resv_add_excl_fence);
+/** + * dma_resv_iter_restart_unlocked - restart the unlocked iterator + * @cursor: The dma_resv_iter object to restart + * + * Restart the unlocked iteration by initializing the cursor object. + */ +static void dma_resv_iter_restart_unlocked(struct dma_resv_iter *cursor) +{ + cursor->seq = read_seqcount_begin(&cursor->obj->seq); + cursor->index = -1; + if (cursor->all_fences) + cursor->fences = dma_resv_shared_list(cursor->obj); + else + cursor->fences = NULL; + cursor->is_restarted = true; +} + +/** + * dma_resv_iter_walk_unlocked - walk over fences in a dma_resv obj + * @cursor: cursor to record the current position + * + * Return all the fences in the dma_resv object which are not yet signaled. + * The returned fence has an extra local reference so will stay alive. + * If a concurrent modify is detected the whole iteration is started over again. + */ +static void dma_resv_iter_walk_unlocked(struct dma_resv_iter *cursor) +{ + struct dma_resv *obj = cursor->obj; + + do { + /* Drop the reference from the previous round */ + dma_fence_put(cursor->fence); + + if (cursor->index == -1) { + cursor->fence = dma_resv_excl_fence(obj); + cursor->index++; + if (!cursor->fence) + continue; + + } else if (!cursor->fences || + cursor->index >= cursor->fences->shared_count) { + cursor->fence = NULL; + break; + + } else { + struct dma_resv_list *fences = cursor->fences; + unsigned int idx = cursor->index++; + + cursor->fence = rcu_dereference(fences->shared[idx]); + } + cursor->fence = dma_fence_get_rcu(cursor->fence); + if (!cursor->fence || !dma_fence_is_signaled(cursor->fence)) + break; + } while (true); +} + +/** + * dma_resv_iter_first_unlocked - first fence in an unlocked dma_resv obj. + * @cursor: the cursor with the current position + * + * Returns the first fence from an unlocked dma_resv obj. + */ +struct dma_fence *dma_resv_iter_first_unlocked(struct dma_resv_iter *cursor) +{ + rcu_read_lock(); + do { + dma_resv_iter_restart_unlocked(cursor); + dma_resv_iter_walk_unlocked(cursor); + } while (read_seqcount_retry(&cursor->obj->seq, cursor->seq)); + rcu_read_unlock(); + + return cursor->fence; +} +EXPORT_SYMBOL(dma_resv_iter_first_unlocked); + +/** + * dma_resv_iter_next_unlocked - next fence in an unlocked dma_resv obj. + * @cursor: the cursor with the current position + * + * Returns the next fence from an unlocked dma_resv obj. + */ +struct dma_fence *dma_resv_iter_next_unlocked(struct dma_resv_iter *cursor) +{ + bool restart; + + rcu_read_lock(); + cursor->is_restarted = false; + restart = read_seqcount_retry(&cursor->obj->seq, cursor->seq); + do { + if (restart) + dma_resv_iter_restart_unlocked(cursor); + dma_resv_iter_walk_unlocked(cursor); + restart = true; + } while (read_seqcount_retry(&cursor->obj->seq, cursor->seq)); + rcu_read_unlock(); + + return cursor->fence; +} +EXPORT_SYMBOL(dma_resv_iter_next_unlocked); + /** * dma_resv_copy_fences - Copy all fences from src to dst. * @dst: the destination reservation object diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 9100dd3dc21f..764138ad8583 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -149,6 +149,101 @@ struct dma_resv { struct dma_resv_list __rcu *fence; };
+/** + * struct dma_resv_iter - current position into the dma_resv fences + * + * Don't touch this directly in the driver, use the accessor function instead. + */ +struct dma_resv_iter { + /** @obj: The dma_resv object we iterate over */ + struct dma_resv *obj; + + /** @all_fences: If all fences should be returned */ + bool all_fences; + + /** @fence: the currently handled fence */ + struct dma_fence *fence; + + /** @seq: sequence number to check for modifications */ + unsigned int seq; + + /** @index: index into the shared fences */ + unsigned int index; + + /** @fences: the shared fences */ + struct dma_resv_list *fences; + + /** @is_restarted: true if this is the first returned fence */ + bool is_restarted; +}; + +struct dma_fence *dma_resv_iter_first_unlocked(struct dma_resv_iter *cursor); +struct dma_fence *dma_resv_iter_next_unlocked(struct dma_resv_iter *cursor); + +/** + * dma_resv_iter_begin - initialize a dma_resv_iter object + * @cursor: The dma_resv_iter object to initialize + * @obj: The dma_resv object which we want to iterate over + * @all_fences: If all fences should be returned or just the exclusive one + */ +static inline void dma_resv_iter_begin(struct dma_resv_iter *cursor, + struct dma_resv *obj, + bool all_fences) +{ + cursor->obj = obj; + cursor->all_fences = all_fences; + cursor->fence = NULL; +} + +/** + * dma_resv_iter_end - cleanup a dma_resv_iter object + * @cursor: the dma_resv_iter object which should be cleaned up + * + * Make sure that the reference to the fence in the cursor is properly + * dropped. + */ +static inline void dma_resv_iter_end(struct dma_resv_iter *cursor) +{ + dma_fence_put(cursor->fence); +} + +/** + * dma_resv_iter_is_exclusive - test if the current fence is the exclusive one + * @cursor: the cursor of the current position + * + * Returns true if the currently returned fence is the exclusive one. + */ +static inline bool dma_resv_iter_is_exclusive(struct dma_resv_iter *cursor) +{ + return cursor->index == 0; +} + +/** + * dma_resv_iter_is_restarted - test if this is the first fence after a restart + * @cursor: the cursor with the current position + * + * Return true if this is the first fence in an iteration after a restart. + */ +static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor) +{ + return cursor->is_restarted; +} + +/** + * dma_resv_for_each_fence_unlocked - unlocked fence iterator + * @cursor: a struct dma_resv_iter pointer + * @fence: the current fence + * + * Iterate over the fences in a struct dma_resv object without holding the + * &dma_resv.lock and using RCU instead. The cursor needs to be initialized + * with dma_resv_iter_begin() and cleaned up with dma_resv_iter_end(). Inside + * the iterator a reference to the dma_fence is held and the RCU lock dropped. + * When the dma_resv is modified the iteration starts over again. + */ +#define dma_resv_for_each_fence_unlocked(cursor, fence) \ + for (fence = dma_resv_iter_first_unlocked(cursor); \ + fence; fence = dma_resv_iter_next_unlocked(cursor)) + #define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base) #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base)
A simpler version of the iterator to be used when the dma_resv object is locked.
v2: fix index check here as well
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 49 ++++++++++++++++++++++++++++++++++++++ include/linux/dma-resv.h | 19 +++++++++++++++ 2 files changed, 68 insertions(+)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 3cbcf66a137e..231bae173ef1 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -423,6 +423,55 @@ struct dma_fence *dma_resv_iter_next_unlocked(struct dma_resv_iter *cursor) } EXPORT_SYMBOL(dma_resv_iter_next_unlocked);
+/** + * dma_resv_iter_first - first fence from a locked dma_resv object + * @cursor: cursor to record the current position + * + * Return all the fences in the dma_resv object while holding the + * &dma_resv.lock. + */ +struct dma_fence *dma_resv_iter_first(struct dma_resv_iter *cursor) +{ + struct dma_fence *fence; + + dma_resv_assert_held(cursor->obj); + + cursor->index = 0; + cursor->fences = dma_resv_shared_list(cursor->obj); + + fence = dma_resv_excl_fence(cursor->obj); + if (!fence) + fence = dma_resv_iter_next(cursor); + + cursor->is_restarted = true; + return fence; +} +EXPORT_SYMBOL_GPL(dma_resv_iter_first); + +/** + * dma_resv_iter_next - next fence from a locked dma_resv object + * @cursor: cursor to record the current position + * + * Return all the fences in the dma_resv object while holding the + * &dma_resv.lock. + */ +struct dma_fence *dma_resv_iter_next(struct dma_resv_iter *cursor) +{ + unsigned int idx; + + dma_resv_assert_held(cursor->obj); + + cursor->is_restarted = false; + if (!cursor->all_fences || !cursor->fences || + cursor->index >= cursor->fences->shared_count) + return NULL; + + idx = cursor->index++; + return rcu_dereference_protected(cursor->fences->shared[idx], + dma_resv_held(cursor->obj)); +} +EXPORT_SYMBOL_GPL(dma_resv_iter_next); + /** * dma_resv_copy_fences - Copy all fences from src to dst. * @dst: the destination reservation object diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 764138ad8583..3df7ef23712d 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -179,6 +179,8 @@ struct dma_resv_iter {
struct dma_fence *dma_resv_iter_first_unlocked(struct dma_resv_iter *cursor); struct dma_fence *dma_resv_iter_next_unlocked(struct dma_resv_iter *cursor); +struct dma_fence *dma_resv_iter_first(struct dma_resv_iter *cursor); +struct dma_fence *dma_resv_iter_next(struct dma_resv_iter *cursor);
/** * dma_resv_iter_begin - initialize a dma_resv_iter object @@ -244,6 +246,23 @@ static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor) for (fence = dma_resv_iter_first_unlocked(cursor); \ fence; fence = dma_resv_iter_next_unlocked(cursor))
+/** + * dma_resv_for_each_fence - fence iterator + * @cursor: a struct dma_resv_iter pointer + * @obj: a dma_resv object pointer + * @all_fences: true if all fences should be returned + * @fence: the current fence + * + * Iterate over the fences in a struct dma_resv object while holding the + * &dma_resv.lock. @all_fences controls if the shared fences are returned as + * well. The cursor initialisation is part of the iterator and the fence stays + * valid as long as the lock is held. + */ +#define dma_resv_for_each_fence(cursor, obj, all_fences, fence) \ + for (dma_resv_iter_begin(cursor, obj, all_fences), \ + fence = dma_resv_iter_first(cursor); fence; \ + fence = dma_resv_iter_next(cursor)) + #define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base) #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base)
Good morning, could I get an rb for that patch here as well?
I've pushed the first few with Daniel's and your rbs, but this one here is blocking all the rest.
Thanks, Christian.
Am 05.10.21 um 13:37 schrieb Christian König:
A simpler version of the iterator to be used when the dma_resv object is locked.
v2: fix index check here as well
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/dma-resv.c | 49 ++++++++++++++++++++++++++++++++++++++ include/linux/dma-resv.h | 19 +++++++++++++++ 2 files changed, 68 insertions(+)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 3cbcf66a137e..231bae173ef1 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -423,6 +423,55 @@ struct dma_fence *dma_resv_iter_next_unlocked(struct dma_resv_iter *cursor) } EXPORT_SYMBOL(dma_resv_iter_next_unlocked); +/**
- dma_resv_iter_first - first fence from a locked dma_resv object
- @cursor: cursor to record the current position
- Return all the fences in the dma_resv object while holding the
- &dma_resv.lock.
- */
+struct dma_fence *dma_resv_iter_first(struct dma_resv_iter *cursor) +{
- struct dma_fence *fence;
- dma_resv_assert_held(cursor->obj);
- cursor->index = 0;
- cursor->fences = dma_resv_shared_list(cursor->obj);
- fence = dma_resv_excl_fence(cursor->obj);
- if (!fence)
fence = dma_resv_iter_next(cursor);
- cursor->is_restarted = true;
- return fence;
+} +EXPORT_SYMBOL_GPL(dma_resv_iter_first);
+/**
- dma_resv_iter_next - next fence from a locked dma_resv object
- @cursor: cursor to record the current position
- Return all the fences in the dma_resv object while holding the
- &dma_resv.lock.
- */
+struct dma_fence *dma_resv_iter_next(struct dma_resv_iter *cursor) +{
- unsigned int idx;
- dma_resv_assert_held(cursor->obj);
- cursor->is_restarted = false;
- if (!cursor->all_fences || !cursor->fences ||
cursor->index >= cursor->fences->shared_count)
return NULL;
- idx = cursor->index++;
- return rcu_dereference_protected(cursor->fences->shared[idx],
dma_resv_held(cursor->obj));
+} +EXPORT_SYMBOL_GPL(dma_resv_iter_next);
- /**
- dma_resv_copy_fences - Copy all fences from src to dst.
- @dst: the destination reservation object
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 764138ad8583..3df7ef23712d 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -179,6 +179,8 @@ struct dma_resv_iter { struct dma_fence *dma_resv_iter_first_unlocked(struct dma_resv_iter *cursor); struct dma_fence *dma_resv_iter_next_unlocked(struct dma_resv_iter *cursor); +struct dma_fence *dma_resv_iter_first(struct dma_resv_iter *cursor); +struct dma_fence *dma_resv_iter_next(struct dma_resv_iter *cursor); /**
- dma_resv_iter_begin - initialize a dma_resv_iter object
@@ -244,6 +246,23 @@ static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor) for (fence = dma_resv_iter_first_unlocked(cursor); \ fence; fence = dma_resv_iter_next_unlocked(cursor)) +/**
- dma_resv_for_each_fence - fence iterator
- @cursor: a struct dma_resv_iter pointer
- @obj: a dma_resv object pointer
- @all_fences: true if all fences should be returned
- @fence: the current fence
- Iterate over the fences in a struct dma_resv object while holding the
- &dma_resv.lock. @all_fences controls if the shared fences are returned as
- well. The cursor initialisation is part of the iterator and the fence stays
- valid as long as the lock is held.
- */
+#define dma_resv_for_each_fence(cursor, obj, all_fences, fence) \
- for (dma_resv_iter_begin(cursor, obj, all_fences), \
fence = dma_resv_iter_first(cursor); fence; \
fence = dma_resv_iter_next(cursor))
- #define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base) #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base)
On 05/10/2021 12:37, Christian König wrote:
A simpler version of the iterator to be used when the dma_resv object is locked.
v2: fix index check here as well
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/dma-resv.c | 49 ++++++++++++++++++++++++++++++++++++++ include/linux/dma-resv.h | 19 +++++++++++++++ 2 files changed, 68 insertions(+)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 3cbcf66a137e..231bae173ef1 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -423,6 +423,55 @@ struct dma_fence *dma_resv_iter_next_unlocked(struct dma_resv_iter *cursor) } EXPORT_SYMBOL(dma_resv_iter_next_unlocked); +/**
- dma_resv_iter_first - first fence from a locked dma_resv object
- @cursor: cursor to record the current position
- Return all the fences in the dma_resv object while holding the
- &dma_resv.lock.
- */
+struct dma_fence *dma_resv_iter_first(struct dma_resv_iter *cursor) +{
- struct dma_fence *fence;
- dma_resv_assert_held(cursor->obj);
- cursor->index = 0;
- cursor->fences = dma_resv_shared_list(cursor->obj);
- fence = dma_resv_excl_fence(cursor->obj);
- if (!fence)
fence = dma_resv_iter_next(cursor);
"Is restarted" probably does not matter hugely for the locked iterator but I think if it hits this path (no exclusive fence, returns first shared) then it will show it as false. Which is not consistent with the unlocked iterator.
Bonus points if you make a debug build assert that makes querying "is restarted" warn when used with the locked iterator.
- cursor->is_restarted = true;
- return fence;
+} +EXPORT_SYMBOL_GPL(dma_resv_iter_first);
+/**
- dma_resv_iter_next - next fence from a locked dma_resv object
- @cursor: cursor to record the current position
- Return all the fences in the dma_resv object while holding the
- &dma_resv.lock.
You probably want to replace "all the fences" with first and next, respectively, in here and in dma_resv_iter_first kerneldoc.
- */
+struct dma_fence *dma_resv_iter_next(struct dma_resv_iter *cursor) +{
- unsigned int idx;
- dma_resv_assert_held(cursor->obj);
- cursor->is_restarted = false;
- if (!cursor->all_fences || !cursor->fences ||
cursor->index >= cursor->fences->shared_count)
return NULL;
Theoretically you could store the shared count in the cursor and so could have a single condition here (assuming initialized to zero when !all_fences and !cursor->fences). For some value of optimisation. :) Probably not worth it.
But you could only assign cursor->fences if all_fences, in dma_resv_iter_first, so wouldn't have to duplicate the all_fences check here.
- idx = cursor->index++;
- return rcu_dereference_protected(cursor->fences->shared[idx],
dma_resv_held(cursor->obj));
+} +EXPORT_SYMBOL_GPL(dma_resv_iter_next);
- /**
- dma_resv_copy_fences - Copy all fences from src to dst.
- @dst: the destination reservation object
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 764138ad8583..3df7ef23712d 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -179,6 +179,8 @@ struct dma_resv_iter { struct dma_fence *dma_resv_iter_first_unlocked(struct dma_resv_iter *cursor); struct dma_fence *dma_resv_iter_next_unlocked(struct dma_resv_iter *cursor); +struct dma_fence *dma_resv_iter_first(struct dma_resv_iter *cursor); +struct dma_fence *dma_resv_iter_next(struct dma_resv_iter *cursor); /**
- dma_resv_iter_begin - initialize a dma_resv_iter object
@@ -244,6 +246,23 @@ static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor) for (fence = dma_resv_iter_first_unlocked(cursor); \ fence; fence = dma_resv_iter_next_unlocked(cursor)) +/**
- dma_resv_for_each_fence - fence iterator
- @cursor: a struct dma_resv_iter pointer
- @obj: a dma_resv object pointer
- @all_fences: true if all fences should be returned
- @fence: the current fence
- Iterate over the fences in a struct dma_resv object while holding the
- &dma_resv.lock. @all_fences controls if the shared fences are returned as
- well. The cursor initialisation is part of the iterator and the fence stays
- valid as long as the lock is held.
I'd be super cautious and explicitly spell out that reference is not held in contrast to the unlocked iterator.
- */
+#define dma_resv_for_each_fence(cursor, obj, all_fences, fence) \
- for (dma_resv_iter_begin(cursor, obj, all_fences), \
fence = dma_resv_iter_first(cursor); fence; \
fence = dma_resv_iter_next(cursor))
- #define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base) #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base)
Regards,
Tvrtko
On 06/10/2021 09:40, Tvrtko Ursulin wrote:
On 05/10/2021 12:37, Christian König wrote:
A simpler version of the iterator to be used when the dma_resv object is locked.
v2: fix index check here as well
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/dma-resv.c | 49 ++++++++++++++++++++++++++++++++++++++ include/linux/dma-resv.h | 19 +++++++++++++++ 2 files changed, 68 insertions(+)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 3cbcf66a137e..231bae173ef1 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -423,6 +423,55 @@ struct dma_fence *dma_resv_iter_next_unlocked(struct dma_resv_iter *cursor) } EXPORT_SYMBOL(dma_resv_iter_next_unlocked); +/**
- dma_resv_iter_first - first fence from a locked dma_resv object
- @cursor: cursor to record the current position
- Return all the fences in the dma_resv object while holding the
- &dma_resv.lock.
- */
+struct dma_fence *dma_resv_iter_first(struct dma_resv_iter *cursor) +{ + struct dma_fence *fence;
+ dma_resv_assert_held(cursor->obj);
+ cursor->index = 0; + cursor->fences = dma_resv_shared_list(cursor->obj);
+ fence = dma_resv_excl_fence(cursor->obj); + if (!fence) + fence = dma_resv_iter_next(cursor);
"Is restarted" probably does not matter hugely for the locked iterator but I think if it hits this path (no exclusive fence, returns first shared) then it will show it as false. Which is not consistent with the unlocked iterator.
Sorry I was blind or I don't know which version of which patch I was looking at.. It is set to true a few lines below. :)
Regards,
Tvrtko
Bonus points if you make a debug build assert that makes querying "is restarted" warn when used with the locked iterator.
+ cursor->is_restarted = true; + return fence; +} +EXPORT_SYMBOL_GPL(dma_resv_iter_first);
+/**
- dma_resv_iter_next - next fence from a locked dma_resv object
- @cursor: cursor to record the current position
- Return all the fences in the dma_resv object while holding the
- &dma_resv.lock.
You probably want to replace "all the fences" with first and next, respectively, in here and in dma_resv_iter_first kerneldoc.
- */
+struct dma_fence *dma_resv_iter_next(struct dma_resv_iter *cursor) +{ + unsigned int idx;
+ dma_resv_assert_held(cursor->obj);
+ cursor->is_restarted = false; + if (!cursor->all_fences || !cursor->fences || + cursor->index >= cursor->fences->shared_count) + return NULL;
Theoretically you could store the shared count in the cursor and so could have a single condition here (assuming initialized to zero when !all_fences and !cursor->fences). For some value of optimisation. :) Probably not worth it.
But you could only assign cursor->fences if all_fences, in dma_resv_iter_first, so wouldn't have to duplicate the all_fences check here.
+ idx = cursor->index++; + return rcu_dereference_protected(cursor->fences->shared[idx], + dma_resv_held(cursor->obj)); +} +EXPORT_SYMBOL_GPL(dma_resv_iter_next);
/** * dma_resv_copy_fences - Copy all fences from src to dst. * @dst: the destination reservation object diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 764138ad8583..3df7ef23712d 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -179,6 +179,8 @@ struct dma_resv_iter { struct dma_fence *dma_resv_iter_first_unlocked(struct dma_resv_iter *cursor); struct dma_fence *dma_resv_iter_next_unlocked(struct dma_resv_iter *cursor); +struct dma_fence *dma_resv_iter_first(struct dma_resv_iter *cursor); +struct dma_fence *dma_resv_iter_next(struct dma_resv_iter *cursor); /** * dma_resv_iter_begin - initialize a dma_resv_iter object @@ -244,6 +246,23 @@ static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor) for (fence = dma_resv_iter_first_unlocked(cursor); \ fence; fence = dma_resv_iter_next_unlocked(cursor)) +/**
- dma_resv_for_each_fence - fence iterator
- @cursor: a struct dma_resv_iter pointer
- @obj: a dma_resv object pointer
- @all_fences: true if all fences should be returned
- @fence: the current fence
- Iterate over the fences in a struct dma_resv object while holding the
- &dma_resv.lock. @all_fences controls if the shared fences are
returned as
- well. The cursor initialisation is part of the iterator and the
fence stays
- valid as long as the lock is held.
I'd be super cautious and explicitly spell out that reference is not held in contrast to the unlocked iterator.
- */
+#define dma_resv_for_each_fence(cursor, obj, all_fences, fence) \ + for (dma_resv_iter_begin(cursor, obj, all_fences), \ + fence = dma_resv_iter_first(cursor); fence; \ + fence = dma_resv_iter_next(cursor))
#define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base) #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base)
Regards,
Tvrtko
Just exercising a very minor subset of the functionality, but already proven useful.
v2: add missing locking v3: some more cleanup and consolidation, add unlocked test as well
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/Makefile | 3 +- drivers/dma-buf/selftests.h | 1 + drivers/dma-buf/st-dma-resv.c | 282 ++++++++++++++++++++++++++++++++++ 3 files changed, 285 insertions(+), 1 deletion(-) create mode 100644 drivers/dma-buf/st-dma-resv.c
diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile index 1ef021273a06..511805dbeb75 100644 --- a/drivers/dma-buf/Makefile +++ b/drivers/dma-buf/Makefile @@ -11,6 +11,7 @@ obj-$(CONFIG_DMABUF_SYSFS_STATS) += dma-buf-sysfs-stats.o dmabuf_selftests-y := \ selftest.o \ st-dma-fence.o \ - st-dma-fence-chain.o + st-dma-fence-chain.o \ + st-dma-resv.o
obj-$(CONFIG_DMABUF_SELFTESTS) += dmabuf_selftests.o diff --git a/drivers/dma-buf/selftests.h b/drivers/dma-buf/selftests.h index bc8cea67bf1e..97d73aaa31da 100644 --- a/drivers/dma-buf/selftests.h +++ b/drivers/dma-buf/selftests.h @@ -12,3 +12,4 @@ selftest(sanitycheck, __sanitycheck__) /* keep first (igt selfcheck) */ selftest(dma_fence, dma_fence) selftest(dma_fence_chain, dma_fence_chain) +selftest(dma_resv, dma_resv) diff --git a/drivers/dma-buf/st-dma-resv.c b/drivers/dma-buf/st-dma-resv.c new file mode 100644 index 000000000000..50d3791ccb8c --- /dev/null +++ b/drivers/dma-buf/st-dma-resv.c @@ -0,0 +1,282 @@ +/* SPDX-License-Identifier: MIT */ + +/* +* Copyright © 2019 Intel Corporation +* Copyright © 2021 Advanced Micro Devices, Inc. +*/ + +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/dma-resv.h> + +#include "selftest.h" + +static struct spinlock fence_lock; + +static const char *fence_name(struct dma_fence *f) +{ + return "selftest"; +} + +static const struct dma_fence_ops fence_ops = { + .get_driver_name = fence_name, + .get_timeline_name = fence_name, +}; + +static struct dma_fence *alloc_fence(void) +{ + struct dma_fence *f; + + f = kmalloc(sizeof(*f), GFP_KERNEL); + if (!f) + return NULL; + + dma_fence_init(f, &fence_ops, &fence_lock, 0, 0); + return f; +} + +static int sanitycheck(void *arg) +{ + struct dma_resv resv; + struct dma_fence *f; + int r; + + f = alloc_fence(); + if (!f) + return -ENOMEM; + + dma_fence_signal(f); + dma_fence_put(f); + + dma_resv_init(&resv); + r = dma_resv_lock(&resv, NULL); + if (r) + pr_err("Resv locking failed\n"); + else + dma_resv_unlock(&resv); + dma_resv_fini(&resv); + return r; +} + +static int test_signaling(void *arg, bool shared) +{ + struct dma_resv resv; + struct dma_fence *f; + int r; + + f = alloc_fence(); + if (!f) + return -ENOMEM; + + dma_resv_init(&resv); + r = dma_resv_lock(&resv, NULL); + if (r) { + pr_err("Resv locking failed\n"); + goto err_free; + } + + if (shared) { + r = dma_resv_reserve_shared(&resv, 1); + if (r) { + pr_err("Resv shared slot allocation failed\n"); + goto err_unlock; + } + + dma_resv_add_shared_fence(&resv, f); + } else { + dma_resv_add_excl_fence(&resv, f); + } + + if (dma_resv_test_signaled(&resv, shared)) { + pr_err("Resv unexpectedly signaled\n"); + r = -EINVAL; + goto err_unlock; + } + dma_fence_signal(f); + if (!dma_resv_test_signaled(&resv, shared)) { + pr_err("Resv not reporting signaled\n"); + r = -EINVAL; + goto err_unlock; + } +err_unlock: + dma_resv_unlock(&resv); +err_free: + dma_resv_fini(&resv); + dma_fence_put(f); + return r; +} + +static int test_excl_signaling(void *arg) +{ + return test_signaling(arg, false); +} + +static int test_shared_signaling(void *arg) +{ + return test_signaling(arg, true); +} + +static int test_for_each(void *arg, bool shared) +{ + struct dma_resv_iter cursor; + struct dma_fence *f, *fence; + struct dma_resv resv; + int r; + + f = alloc_fence(); + if (!f) + return -ENOMEM; + + dma_resv_init(&resv); + r = dma_resv_lock(&resv, NULL); + if (r) { + pr_err("Resv locking failed\n"); + goto err_free; + } + + if (shared) { + r = dma_resv_reserve_shared(&resv, 1); + if (r) { + pr_err("Resv shared slot allocation failed\n"); + goto err_unlock; + } + + dma_resv_add_shared_fence(&resv, f); + } else { + dma_resv_add_excl_fence(&resv, f); + } + + r = -ENOENT; + dma_resv_for_each_fence(&cursor, &resv, shared, fence) { + if (!r) { + pr_err("More than one fence found\n"); + r = -EINVAL; + goto err_unlock; + } + if (f != fence) { + pr_err("Unexpected fence\n"); + r = -EINVAL; + goto err_unlock; + } + if (dma_resv_iter_is_exclusive(&cursor) != !shared) { + pr_err("Unexpected fence usage\n"); + r = -EINVAL; + goto err_unlock; + } + r = 0; + } + if (r) { + pr_err("No fence found\n"); + goto err_unlock; + } + dma_fence_signal(f); +err_unlock: + dma_resv_unlock(&resv); +err_free: + dma_resv_fini(&resv); + dma_fence_put(f); + return r; +} + +static int test_excl_for_each(void *arg) +{ + return test_for_each(arg, false); +} + +static int test_shared_for_each(void *arg) +{ + return test_for_each(arg, false); +} + +static int test_for_each_unlocked(void *arg, bool shared) +{ + struct dma_resv_iter cursor; + struct dma_fence *f, *fence; + struct dma_resv resv; + int r; + + f = alloc_fence(); + if (!f) + return -ENOMEM; + + dma_resv_init(&resv); + r = dma_resv_lock(&resv, NULL); + if (r) { + pr_err("Resv locking failed\n"); + goto err_free; + } + + if (shared) { + r = dma_resv_reserve_shared(&resv, 1); + if (r) { + pr_err("Resv shared slot allocation failed\n"); + dma_resv_unlock(&resv); + goto err_free; + } + + dma_resv_add_shared_fence(&resv, f); + } else { + dma_resv_add_excl_fence(&resv, f); + } + dma_resv_unlock(&resv); + + r = -ENOENT; + dma_resv_iter_begin(&cursor, &resv, shared); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + if (!r) { + dma_resv_iter_end(&cursor); + pr_err("More than one fence found\n"); + r = -EINVAL; + goto err_free; + } + if (f != fence) { + dma_resv_iter_end(&cursor); + pr_err("Unexpected fence\n"); + r = -EINVAL; + goto err_free; + } + if (dma_resv_iter_is_exclusive(&cursor) != !shared) { + dma_resv_iter_end(&cursor); + pr_err("Unexpected fence usage\n"); + r = -EINVAL; + goto err_free; + } + r = 0; + } + dma_resv_iter_end(&cursor); + if (r) { + pr_err("No fence found\n"); + goto err_free; + } + dma_fence_signal(f); +err_free: + dma_resv_fini(&resv); + dma_fence_put(f); + return r; +} + +static int test_excl_for_each_unlocked(void *arg) +{ + return test_for_each_unlocked(arg, false); +} + +static int test_shared_for_each_unlocked(void *arg) +{ + return test_for_each_unlocked(arg, true); +} + +int dma_resv(void) +{ + static const struct subtest tests[] = { + SUBTEST(sanitycheck), + SUBTEST(test_excl_signaling), + SUBTEST(test_shared_signaling), + SUBTEST(test_excl_for_each), + SUBTEST(test_shared_for_each), + SUBTEST(test_excl_for_each_unlocked), + SUBTEST(test_shared_for_each_unlocked), + }; + + spin_lock_init(&fence_lock); + return subtests(tests, NULL); +}
On Tue, Oct 05, 2021 at 01:37:17PM +0200, Christian König wrote:
Just exercising a very minor subset of the functionality, but already proven useful.
v2: add missing locking v3: some more cleanup and consolidation, add unlocked test as well
Signed-off-by: Christian König christian.koenig@amd.com
Yeah this is great, since if we then get some specific bug later on it's going to be very easy to add the unit test for the precise bug hopefully.
I scrolled through, looks correct.
Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch
drivers/dma-buf/Makefile | 3 +- drivers/dma-buf/selftests.h | 1 + drivers/dma-buf/st-dma-resv.c | 282 ++++++++++++++++++++++++++++++++++ 3 files changed, 285 insertions(+), 1 deletion(-) create mode 100644 drivers/dma-buf/st-dma-resv.c
diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile index 1ef021273a06..511805dbeb75 100644 --- a/drivers/dma-buf/Makefile +++ b/drivers/dma-buf/Makefile @@ -11,6 +11,7 @@ obj-$(CONFIG_DMABUF_SYSFS_STATS) += dma-buf-sysfs-stats.o dmabuf_selftests-y := \ selftest.o \ st-dma-fence.o \
- st-dma-fence-chain.o
- st-dma-fence-chain.o \
- st-dma-resv.o
obj-$(CONFIG_DMABUF_SELFTESTS) += dmabuf_selftests.o diff --git a/drivers/dma-buf/selftests.h b/drivers/dma-buf/selftests.h index bc8cea67bf1e..97d73aaa31da 100644 --- a/drivers/dma-buf/selftests.h +++ b/drivers/dma-buf/selftests.h @@ -12,3 +12,4 @@ selftest(sanitycheck, __sanitycheck__) /* keep first (igt selfcheck) */ selftest(dma_fence, dma_fence) selftest(dma_fence_chain, dma_fence_chain) +selftest(dma_resv, dma_resv) diff --git a/drivers/dma-buf/st-dma-resv.c b/drivers/dma-buf/st-dma-resv.c new file mode 100644 index 000000000000..50d3791ccb8c --- /dev/null +++ b/drivers/dma-buf/st-dma-resv.c @@ -0,0 +1,282 @@ +/* SPDX-License-Identifier: MIT */
+/* +* Copyright © 2019 Intel Corporation +* Copyright © 2021 Advanced Micro Devices, Inc. +*/
+#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/dma-resv.h>
+#include "selftest.h"
+static struct spinlock fence_lock;
+static const char *fence_name(struct dma_fence *f) +{
- return "selftest";
+}
+static const struct dma_fence_ops fence_ops = {
- .get_driver_name = fence_name,
- .get_timeline_name = fence_name,
+};
+static struct dma_fence *alloc_fence(void) +{
- struct dma_fence *f;
- f = kmalloc(sizeof(*f), GFP_KERNEL);
- if (!f)
return NULL;
- dma_fence_init(f, &fence_ops, &fence_lock, 0, 0);
- return f;
+}
+static int sanitycheck(void *arg) +{
- struct dma_resv resv;
- struct dma_fence *f;
- int r;
- f = alloc_fence();
- if (!f)
return -ENOMEM;
- dma_fence_signal(f);
- dma_fence_put(f);
- dma_resv_init(&resv);
- r = dma_resv_lock(&resv, NULL);
- if (r)
pr_err("Resv locking failed\n");
- else
dma_resv_unlock(&resv);
- dma_resv_fini(&resv);
- return r;
+}
+static int test_signaling(void *arg, bool shared) +{
- struct dma_resv resv;
- struct dma_fence *f;
- int r;
- f = alloc_fence();
- if (!f)
return -ENOMEM;
- dma_resv_init(&resv);
- r = dma_resv_lock(&resv, NULL);
- if (r) {
pr_err("Resv locking failed\n");
goto err_free;
- }
- if (shared) {
r = dma_resv_reserve_shared(&resv, 1);
if (r) {
pr_err("Resv shared slot allocation failed\n");
goto err_unlock;
}
dma_resv_add_shared_fence(&resv, f);
- } else {
dma_resv_add_excl_fence(&resv, f);
- }
- if (dma_resv_test_signaled(&resv, shared)) {
pr_err("Resv unexpectedly signaled\n");
r = -EINVAL;
goto err_unlock;
- }
- dma_fence_signal(f);
- if (!dma_resv_test_signaled(&resv, shared)) {
pr_err("Resv not reporting signaled\n");
r = -EINVAL;
goto err_unlock;
- }
+err_unlock:
- dma_resv_unlock(&resv);
+err_free:
- dma_resv_fini(&resv);
- dma_fence_put(f);
- return r;
+}
+static int test_excl_signaling(void *arg) +{
- return test_signaling(arg, false);
+}
+static int test_shared_signaling(void *arg) +{
- return test_signaling(arg, true);
+}
+static int test_for_each(void *arg, bool shared) +{
- struct dma_resv_iter cursor;
- struct dma_fence *f, *fence;
- struct dma_resv resv;
- int r;
- f = alloc_fence();
- if (!f)
return -ENOMEM;
- dma_resv_init(&resv);
- r = dma_resv_lock(&resv, NULL);
- if (r) {
pr_err("Resv locking failed\n");
goto err_free;
- }
- if (shared) {
r = dma_resv_reserve_shared(&resv, 1);
if (r) {
pr_err("Resv shared slot allocation failed\n");
goto err_unlock;
}
dma_resv_add_shared_fence(&resv, f);
- } else {
dma_resv_add_excl_fence(&resv, f);
- }
- r = -ENOENT;
- dma_resv_for_each_fence(&cursor, &resv, shared, fence) {
if (!r) {
pr_err("More than one fence found\n");
r = -EINVAL;
goto err_unlock;
}
if (f != fence) {
pr_err("Unexpected fence\n");
r = -EINVAL;
goto err_unlock;
}
if (dma_resv_iter_is_exclusive(&cursor) != !shared) {
pr_err("Unexpected fence usage\n");
r = -EINVAL;
goto err_unlock;
}
r = 0;
- }
- if (r) {
pr_err("No fence found\n");
goto err_unlock;
- }
- dma_fence_signal(f);
+err_unlock:
- dma_resv_unlock(&resv);
+err_free:
- dma_resv_fini(&resv);
- dma_fence_put(f);
- return r;
+}
+static int test_excl_for_each(void *arg) +{
- return test_for_each(arg, false);
+}
+static int test_shared_for_each(void *arg) +{
- return test_for_each(arg, false);
+}
+static int test_for_each_unlocked(void *arg, bool shared) +{
- struct dma_resv_iter cursor;
- struct dma_fence *f, *fence;
- struct dma_resv resv;
- int r;
- f = alloc_fence();
- if (!f)
return -ENOMEM;
- dma_resv_init(&resv);
- r = dma_resv_lock(&resv, NULL);
- if (r) {
pr_err("Resv locking failed\n");
goto err_free;
- }
- if (shared) {
r = dma_resv_reserve_shared(&resv, 1);
if (r) {
pr_err("Resv shared slot allocation failed\n");
dma_resv_unlock(&resv);
goto err_free;
}
dma_resv_add_shared_fence(&resv, f);
- } else {
dma_resv_add_excl_fence(&resv, f);
- }
- dma_resv_unlock(&resv);
- r = -ENOENT;
- dma_resv_iter_begin(&cursor, &resv, shared);
- dma_resv_for_each_fence_unlocked(&cursor, fence) {
if (!r) {
dma_resv_iter_end(&cursor);
pr_err("More than one fence found\n");
r = -EINVAL;
goto err_free;
}
if (f != fence) {
dma_resv_iter_end(&cursor);
pr_err("Unexpected fence\n");
r = -EINVAL;
goto err_free;
}
if (dma_resv_iter_is_exclusive(&cursor) != !shared) {
dma_resv_iter_end(&cursor);
pr_err("Unexpected fence usage\n");
r = -EINVAL;
goto err_free;
}
r = 0;
- }
- dma_resv_iter_end(&cursor);
- if (r) {
pr_err("No fence found\n");
goto err_free;
- }
- dma_fence_signal(f);
+err_free:
- dma_resv_fini(&resv);
- dma_fence_put(f);
- return r;
+}
+static int test_excl_for_each_unlocked(void *arg) +{
- return test_for_each_unlocked(arg, false);
+}
+static int test_shared_for_each_unlocked(void *arg) +{
- return test_for_each_unlocked(arg, true);
+}
+int dma_resv(void) +{
- static const struct subtest tests[] = {
SUBTEST(sanitycheck),
SUBTEST(test_excl_signaling),
SUBTEST(test_shared_signaling),
SUBTEST(test_excl_for_each),
SUBTEST(test_shared_for_each),
SUBTEST(test_excl_for_each_unlocked),
SUBTEST(test_shared_for_each_unlocked),
- };
- spin_lock_init(&fence_lock);
- return subtests(tests, NULL);
+}
2.25.1
This makes the function much simpler since the complex retry logic is now handled else where.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch --- drivers/dma-buf/dma-resv.c | 84 +++++++++++++++----------------------- 1 file changed, 32 insertions(+), 52 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 231bae173ef1..e5ea42df0c6b 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -481,74 +481,54 @@ EXPORT_SYMBOL_GPL(dma_resv_iter_next); */ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src) { - struct dma_resv_list *src_list, *dst_list; - struct dma_fence *old, *new; - unsigned int i; + struct dma_resv_iter cursor; + struct dma_resv_list *list; + struct dma_fence *f, *excl;
dma_resv_assert_held(dst);
- rcu_read_lock(); - src_list = dma_resv_shared_list(src); + list = NULL; + excl = NULL;
-retry: - if (src_list) { - unsigned int shared_count = src_list->shared_count; + dma_resv_iter_begin(&cursor, src, true); + dma_resv_for_each_fence_unlocked(&cursor, f) {
- rcu_read_unlock(); + if (dma_resv_iter_is_restarted(&cursor)) { + dma_resv_list_free(list); + dma_fence_put(excl);
- dst_list = dma_resv_list_alloc(shared_count); - if (!dst_list) - return -ENOMEM; + if (cursor.fences) { + unsigned int cnt = cursor.fences->shared_count;
- rcu_read_lock(); - src_list = dma_resv_shared_list(src); - if (!src_list || src_list->shared_count > shared_count) { - kfree(dst_list); - goto retry; - } - - dst_list->shared_count = 0; - for (i = 0; i < src_list->shared_count; ++i) { - struct dma_fence __rcu **dst; - struct dma_fence *fence; + list = dma_resv_list_alloc(cnt); + if (!list) { + dma_resv_iter_end(&cursor); + return -ENOMEM; + }
- fence = rcu_dereference(src_list->shared[i]); - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &fence->flags)) - continue; + list->shared_count = 0;
- if (!dma_fence_get_rcu(fence)) { - dma_resv_list_free(dst_list); - src_list = dma_resv_shared_list(src); - goto retry; + } else { + list = NULL; } - - if (dma_fence_is_signaled(fence)) { - dma_fence_put(fence); - continue; - } - - dst = &dst_list->shared[dst_list->shared_count++]; - rcu_assign_pointer(*dst, fence); + excl = NULL; } - } else { - dst_list = NULL; - }
- new = dma_fence_get_rcu_safe(&src->fence_excl); - rcu_read_unlock(); - - src_list = dma_resv_shared_list(dst); - old = dma_resv_excl_fence(dst); + dma_fence_get(f); + if (dma_resv_iter_is_exclusive(&cursor)) + excl = f; + else + RCU_INIT_POINTER(list->shared[list->shared_count++], f); + } + dma_resv_iter_end(&cursor);
write_seqcount_begin(&dst->seq); - /* write_seqcount_begin provides the necessary memory barrier */ - RCU_INIT_POINTER(dst->fence_excl, new); - RCU_INIT_POINTER(dst->fence, dst_list); + excl = rcu_replace_pointer(dst->fence_excl, excl, dma_resv_held(dst)); + list = rcu_replace_pointer(dst->fence, list, dma_resv_held(dst)); write_seqcount_end(&dst->seq);
- dma_resv_list_free(src_list); - dma_fence_put(old); + dma_resv_list_free(list); + dma_fence_put(excl);
return 0; }
This makes the function much simpler since the complex retry logic is now handled elsewhere.
v2: use sizeof(void*) instead v3: fix rebase bug
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch --- drivers/dma-buf/dma-resv.c | 108 ++++++++++++------------------------- 1 file changed, 35 insertions(+), 73 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index e5ea42df0c6b..1a43bef03af3 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -538,99 +538,61 @@ EXPORT_SYMBOL(dma_resv_copy_fences); * dma_resv_get_fences - Get an object's shared and exclusive * fences without update side lock held * @obj: the reservation object - * @pfence_excl: the returned exclusive fence (or NULL) - * @pshared_count: the number of shared fences returned - * @pshared: the array of shared fence ptrs returned (array is krealloc'd to + * @fence_excl: the returned exclusive fence (or NULL) + * @shared_count: the number of shared fences returned + * @shared: the array of shared fence ptrs returned (array is krealloc'd to * the required size, and must be freed by caller) * * Retrieve all fences from the reservation object. If the pointer for the * exclusive fence is not specified the fence is put into the array of the * shared fences as well. Returns either zero or -ENOMEM. */ -int dma_resv_get_fences(struct dma_resv *obj, struct dma_fence **pfence_excl, - unsigned int *pshared_count, - struct dma_fence ***pshared) +int dma_resv_get_fences(struct dma_resv *obj, struct dma_fence **fence_excl, + unsigned int *shared_count, struct dma_fence ***shared) { - struct dma_fence **shared = NULL; - struct dma_fence *fence_excl; - unsigned int shared_count; - int ret = 1; - - do { - struct dma_resv_list *fobj; - unsigned int i, seq; - size_t sz = 0; - - shared_count = i = 0; - - rcu_read_lock(); - seq = read_seqcount_begin(&obj->seq); + struct dma_resv_iter cursor; + struct dma_fence *fence;
- fence_excl = dma_resv_excl_fence(obj); - if (fence_excl && !dma_fence_get_rcu(fence_excl)) - goto unlock; + *shared_count = 0; + *shared = NULL;
- fobj = dma_resv_shared_list(obj); - if (fobj) - sz += sizeof(*shared) * fobj->shared_max; + if (fence_excl) + *fence_excl = NULL;
- if (!pfence_excl && fence_excl) - sz += sizeof(*shared); + dma_resv_iter_begin(&cursor, obj, true); + dma_resv_for_each_fence_unlocked(&cursor, fence) {
- if (sz) { - struct dma_fence **nshared; + if (dma_resv_iter_is_restarted(&cursor)) { + unsigned int count;
- nshared = krealloc(shared, sz, - GFP_NOWAIT | __GFP_NOWARN); - if (!nshared) { - rcu_read_unlock(); + while (*shared_count) + dma_fence_put((*shared)[--(*shared_count)]);
- dma_fence_put(fence_excl); - fence_excl = NULL; + if (fence_excl) + dma_fence_put(*fence_excl);
- nshared = krealloc(shared, sz, GFP_KERNEL); - if (nshared) { - shared = nshared; - continue; - } + count = cursor.fences ? cursor.fences->shared_count : 0; + count += fence_excl ? 0 : 1;
- ret = -ENOMEM; - break; + /* Eventually re-allocate the array */ + *shared = krealloc_array(*shared, count, + sizeof(void *), + GFP_KERNEL); + if (count && !*shared) { + dma_resv_iter_end(&cursor); + return -ENOMEM; } - shared = nshared; - shared_count = fobj ? fobj->shared_count : 0; - for (i = 0; i < shared_count; ++i) { - shared[i] = rcu_dereference(fobj->shared[i]); - if (!dma_fence_get_rcu(shared[i])) - break; - } - } - - if (i != shared_count || read_seqcount_retry(&obj->seq, seq)) { - while (i--) - dma_fence_put(shared[i]); - dma_fence_put(fence_excl); - goto unlock; }
- ret = 0; -unlock: - rcu_read_unlock(); - } while (ret); - - if (pfence_excl) - *pfence_excl = fence_excl; - else if (fence_excl) - shared[shared_count++] = fence_excl; - - if (!shared_count) { - kfree(shared); - shared = NULL; + dma_fence_get(fence); + if (dma_resv_iter_is_exclusive(&cursor) && fence_excl) + *fence_excl = fence; + else + (*shared)[(*shared_count)++] = fence; } + dma_resv_iter_end(&cursor);
- *pshared_count = shared_count; - *pshared = shared; - return ret; + return 0; } EXPORT_SYMBOL_GPL(dma_resv_get_fences);
This makes the function much simpler since the complex retry logic is now handled elsewhere.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch --- drivers/dma-buf/dma-resv.c | 69 +++++--------------------------------- 1 file changed, 8 insertions(+), 61 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 1a43bef03af3..220c40dc5c11 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -614,74 +614,21 @@ long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr, unsigned long timeout) { long ret = timeout ? timeout : 1; - unsigned int seq, shared_count; + struct dma_resv_iter cursor; struct dma_fence *fence; - int i; - -retry: - shared_count = 0; - seq = read_seqcount_begin(&obj->seq); - rcu_read_lock(); - i = -1; - - fence = dma_resv_excl_fence(obj); - if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { - if (!dma_fence_get_rcu(fence)) - goto unlock_retry; - - if (dma_fence_is_signaled(fence)) { - dma_fence_put(fence); - fence = NULL; - } - - } else { - fence = NULL; - } - - if (wait_all) { - struct dma_resv_list *fobj = dma_resv_shared_list(obj); - - if (fobj) - shared_count = fobj->shared_count; - - for (i = 0; !fence && i < shared_count; ++i) { - struct dma_fence *lfence; - - lfence = rcu_dereference(fobj->shared[i]); - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &lfence->flags)) - continue;
- if (!dma_fence_get_rcu(lfence)) - goto unlock_retry; - - if (dma_fence_is_signaled(lfence)) { - dma_fence_put(lfence); - continue; - } + dma_resv_iter_begin(&cursor, obj, wait_all); + dma_resv_for_each_fence_unlocked(&cursor, fence) {
- fence = lfence; - break; + ret = dma_fence_wait_timeout(fence, intr, ret); + if (ret <= 0) { + dma_resv_iter_end(&cursor); + return ret; } } + dma_resv_iter_end(&cursor);
- rcu_read_unlock(); - if (fence) { - if (read_seqcount_retry(&obj->seq, seq)) { - dma_fence_put(fence); - goto retry; - } - - ret = dma_fence_wait_timeout(fence, intr, ret); - dma_fence_put(fence); - if (ret > 0 && wait_all && (i + 1 < shared_count)) - goto retry; - } return ret; - -unlock_retry: - rcu_read_unlock(); - goto retry; } EXPORT_SYMBOL_GPL(dma_resv_wait_timeout);
This makes the function much simpler since the complex retry logic is now handled elsewhere.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch --- drivers/dma-buf/dma-resv.c | 57 +++++--------------------------------- 1 file changed, 7 insertions(+), 50 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 220c40dc5c11..41c2e951213c 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -633,22 +633,6 @@ long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr, EXPORT_SYMBOL_GPL(dma_resv_wait_timeout);
-static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence) -{ - struct dma_fence *fence, *lfence = passed_fence; - int ret = 1; - - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &lfence->flags)) { - fence = dma_fence_get_rcu(lfence); - if (!fence) - return -1; - - ret = !!dma_fence_is_signaled(fence); - dma_fence_put(fence); - } - return ret; -} - /** * dma_resv_test_signaled - Test if a reservation object's fences have been * signaled. @@ -665,43 +649,16 @@ static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence) */ bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all) { + struct dma_resv_iter cursor; struct dma_fence *fence; - unsigned int seq; - int ret; - - rcu_read_lock(); -retry: - ret = true; - seq = read_seqcount_begin(&obj->seq); - - if (test_all) { - struct dma_resv_list *fobj = dma_resv_shared_list(obj); - unsigned int i, shared_count; - - shared_count = fobj ? fobj->shared_count : 0; - for (i = 0; i < shared_count; ++i) { - fence = rcu_dereference(fobj->shared[i]); - ret = dma_resv_test_signaled_single(fence); - if (ret < 0) - goto retry; - else if (!ret) - break; - } - } - - fence = dma_resv_excl_fence(obj); - if (ret && fence) { - ret = dma_resv_test_signaled_single(fence); - if (ret < 0) - goto retry;
+ dma_resv_iter_begin(&cursor, obj, test_all); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + dma_resv_iter_end(&cursor); + return false; } - - if (read_seqcount_retry(&obj->seq, seq)) - goto retry; - - rcu_read_unlock(); - return ret; + dma_resv_iter_end(&cursor); + return true; } EXPORT_SYMBOL_GPL(dma_resv_test_signaled);
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@intel.com --- drivers/dma-buf/dma-buf.c | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 61e20ae7b08b..8242b5d9baeb 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -1356,10 +1356,9 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused) { struct dma_buf *buf_obj; struct dma_buf_attachment *attach_obj; - struct dma_resv *robj; - struct dma_resv_list *fobj; + struct dma_resv_iter cursor; struct dma_fence *fence; - int count = 0, attach_count, shared_count, i; + int count = 0, attach_count; size_t size = 0; int ret;
@@ -1386,21 +1385,10 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused) file_inode(buf_obj->file)->i_ino, buf_obj->name ?: "");
- robj = buf_obj->resv; - fence = dma_resv_excl_fence(robj); - if (fence) - seq_printf(s, "\tExclusive fence: %s %s %ssignalled\n", - fence->ops->get_driver_name(fence), - fence->ops->get_timeline_name(fence), - dma_fence_is_signaled(fence) ? "" : "un"); - - fobj = rcu_dereference_protected(robj->fence, - dma_resv_held(robj)); - shared_count = fobj ? fobj->shared_count : 0; - for (i = 0; i < shared_count; i++) { - fence = rcu_dereference_protected(fobj->shared[i], - dma_resv_held(robj)); - seq_printf(s, "\tShared fence: %s %s %ssignalled\n", + dma_resv_for_each_fence(&cursor, buf_obj->resv, true, fence) { + seq_printf(s, "\t%s fence: %s %s %ssignalled\n", + dma_resv_iter_is_exclusive(&cursor) ? + "Exclusive" : "Shared", fence->ops->get_driver_name(fence), fence->ops->get_timeline_name(fence), dma_fence_is_signaled(fence) ? "" : "un");
Simplify the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@intel.com --- drivers/dma-buf/dma-buf.c | 36 ++++++------------------------------ 1 file changed, 6 insertions(+), 30 deletions(-)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 8242b5d9baeb..beb504a92d60 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -209,19 +209,14 @@ static void dma_buf_poll_cb(struct dma_fence *fence, struct dma_fence_cb *cb) dma_fence_put(fence); }
-static bool dma_buf_poll_shared(struct dma_resv *resv, +static bool dma_buf_poll_add_cb(struct dma_resv *resv, bool write, struct dma_buf_poll_cb_t *dcb) { - struct dma_resv_list *fobj = dma_resv_shared_list(resv); + struct dma_resv_iter cursor; struct dma_fence *fence; - int i, r; - - if (!fobj) - return false; + int r;
- for (i = 0; i < fobj->shared_count; ++i) { - fence = rcu_dereference_protected(fobj->shared[i], - dma_resv_held(resv)); + dma_resv_for_each_fence(&cursor, resv, write, fence) { dma_fence_get(fence); r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb); if (!r) @@ -232,24 +227,6 @@ static bool dma_buf_poll_shared(struct dma_resv *resv, return false; }
-static bool dma_buf_poll_excl(struct dma_resv *resv, - struct dma_buf_poll_cb_t *dcb) -{ - struct dma_fence *fence = dma_resv_excl_fence(resv); - int r; - - if (!fence) - return false; - - dma_fence_get(fence); - r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb); - if (!r) - return true; - dma_fence_put(fence); - - return false; -} - static __poll_t dma_buf_poll(struct file *file, poll_table *poll) { struct dma_buf *dmabuf; @@ -282,8 +259,7 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll) spin_unlock_irq(&dmabuf->poll.lock);
if (events & EPOLLOUT) { - if (!dma_buf_poll_shared(resv, dcb) && - !dma_buf_poll_excl(resv, dcb)) + if (!dma_buf_poll_add_cb(resv, true, dcb)) /* No callback queued, wake up any other waiters */ dma_buf_poll_cb(NULL, &dcb->cb); else @@ -303,7 +279,7 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll) spin_unlock_irq(&dmabuf->poll.lock);
if (events & EPOLLIN) { - if (!dma_buf_poll_excl(resv, dcb)) + if (!dma_buf_poll_add_cb(resv, false, dcb)) /* No callback queued, wake up any other waiters */ dma_buf_poll_cb(NULL, &dcb->cb); else
This is probably a fix since we didn't even grabed a reference to the fences.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch --- drivers/gpu/drm/ttm/ttm_bo.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index d62b2013c367..3934ee225c78 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -269,23 +269,15 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo) static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo) { struct dma_resv *resv = &bo->base._resv; - struct dma_resv_list *fobj; + struct dma_resv_iter cursor; struct dma_fence *fence; - int i; - - rcu_read_lock(); - fobj = dma_resv_shared_list(resv); - fence = dma_resv_excl_fence(resv); - if (fence && !fence->ops->signaled) - dma_fence_enable_sw_signaling(fence); - - for (i = 0; fobj && i < fobj->shared_count; ++i) { - fence = rcu_dereference(fobj->shared[i]);
+ dma_resv_iter_begin(&cursor, resv, true); + dma_resv_for_each_fence_unlocked(&cursor, fence) { if (!fence->ops->signaled) dma_fence_enable_sw_signaling(fence); } - rcu_read_unlock(); + dma_resv_iter_end(&cursor); }
/**
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 44 ++++++++---------------- 1 file changed, 14 insertions(+), 30 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 862eb3c1c4c5..f7d8487799b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -252,41 +252,25 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct dma_resv *resv, enum amdgpu_sync_mode mode, void *owner) { - struct dma_resv_list *flist; + struct dma_resv_iter cursor; struct dma_fence *f; - unsigned i; - int r = 0; + int r;
if (resv == NULL) return -EINVAL;
- /* always sync to the exclusive fence */ - f = dma_resv_excl_fence(resv); - dma_fence_chain_for_each(f, f) { - struct dma_fence_chain *chain = to_dma_fence_chain(f); - - if (amdgpu_sync_test_fence(adev, mode, owner, chain ? - chain->fence : f)) { - r = amdgpu_sync_fence(sync, f); - dma_fence_put(f); - if (r) - return r; - break; - } - } - - flist = dma_resv_shared_list(resv); - if (!flist) - return 0; - - for (i = 0; i < flist->shared_count; ++i) { - f = rcu_dereference_protected(flist->shared[i], - dma_resv_held(resv)); - - if (amdgpu_sync_test_fence(adev, mode, owner, f)) { - r = amdgpu_sync_fence(sync, f); - if (r) - return r; + dma_resv_for_each_fence(&cursor, resv, true, f) { + dma_fence_chain_for_each(f, f) { + struct dma_fence_chain *chain = to_dma_fence_chain(f); + + if (amdgpu_sync_test_fence(adev, mode, owner, chain ? + chain->fence : f)) { + r = amdgpu_sync_fence(sync, f); + dma_fence_put(f); + if (r) + return r; + break; + } } } return 0;
On Tue, Oct 05, 2021 at 01:37:25PM +0200, Christian König wrote:
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com
Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch
Yeah these iterators rock :-) -Daniel
drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 44 ++++++++---------------- 1 file changed, 14 insertions(+), 30 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 862eb3c1c4c5..f7d8487799b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -252,41 +252,25 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct dma_resv *resv, enum amdgpu_sync_mode mode, void *owner) {
- struct dma_resv_list *flist;
- struct dma_resv_iter cursor; struct dma_fence *f;
- unsigned i;
- int r = 0;
- int r;
if (resv == NULL) return -EINVAL;
- /* always sync to the exclusive fence */
- f = dma_resv_excl_fence(resv);
- dma_fence_chain_for_each(f, f) {
struct dma_fence_chain *chain = to_dma_fence_chain(f);
if (amdgpu_sync_test_fence(adev, mode, owner, chain ?
chain->fence : f)) {
r = amdgpu_sync_fence(sync, f);
dma_fence_put(f);
if (r)
return r;
break;
}
- }
- flist = dma_resv_shared_list(resv);
- if (!flist)
return 0;
- for (i = 0; i < flist->shared_count; ++i) {
f = rcu_dereference_protected(flist->shared[i],
dma_resv_held(resv));
if (amdgpu_sync_test_fence(adev, mode, owner, f)) {
r = amdgpu_sync_fence(sync, f);
if (r)
return r;
- dma_resv_for_each_fence(&cursor, resv, true, f) {
dma_fence_chain_for_each(f, f) {
struct dma_fence_chain *chain = to_dma_fence_chain(f);
if (amdgpu_sync_test_fence(adev, mode, owner, chain ?
chain->fence : f)) {
r = amdgpu_sync_fence(sync, f);
dma_fence_put(f);
if (r)
return r;
break;
} } return 0;}
-- 2.25.1
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index e8d70b6e6737..722e3c9e8882 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1345,10 +1345,9 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, const struct ttm_place *place) { unsigned long num_pages = bo->resource->num_pages; + struct dma_resv_iter resv_cursor; struct amdgpu_res_cursor cursor; - struct dma_resv_list *flist; struct dma_fence *f; - int i;
/* Swapout? */ if (bo->resource->mem_type == TTM_PL_SYSTEM) @@ -1362,14 +1361,9 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, * If true, then return false as any KFD process needs all its BOs to * be resident to run successfully */ - flist = dma_resv_shared_list(bo->base.resv); - if (flist) { - for (i = 0; i < flist->shared_count; ++i) { - f = rcu_dereference_protected(flist->shared[i], - dma_resv_held(bo->base.resv)); - if (amdkfd_fence_check_mm(f, current->mm)) - return false; - } + dma_resv_for_each_fence(&resv_cursor, bo->base.resv, true, f) { + if (amdkfd_fence_check_mm(f, current->mm)) + return false; }
switch (bo->resource->mem_type) {
On Tue, Oct 05, 2021 at 01:37:26PM +0200, Christian König wrote:
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index e8d70b6e6737..722e3c9e8882 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1345,10 +1345,9 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, const struct ttm_place *place) { unsigned long num_pages = bo->resource->num_pages;
- struct dma_resv_iter resv_cursor; struct amdgpu_res_cursor cursor;
- struct dma_resv_list *flist; struct dma_fence *f;
- int i;
/* Swapout? */ if (bo->resource->mem_type == TTM_PL_SYSTEM) @@ -1362,14 +1361,9 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, * If true, then return false as any KFD process needs all its BOs to * be resident to run successfully */
- flist = dma_resv_shared_list(bo->base.resv);
- if (flist) {
for (i = 0; i < flist->shared_count; ++i) {
f = rcu_dereference_protected(flist->shared[i],
dma_resv_held(bo->base.resv));
if (amdkfd_fence_check_mm(f, current->mm))
return false;
}
- dma_resv_for_each_fence(&resv_cursor, bo->base.resv, true, f) {
^false?
At least I'm not seeing the code look at the exclusive fence here. -Daniel
if (amdkfd_fence_check_mm(f, current->mm))
}return false;
switch (bo->resource->mem_type) { -- 2.25.1
Am 13.10.21 um 16:07 schrieb Daniel Vetter:
On Tue, Oct 05, 2021 at 01:37:26PM +0200, Christian König wrote:
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index e8d70b6e6737..722e3c9e8882 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1345,10 +1345,9 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, const struct ttm_place *place) { unsigned long num_pages = bo->resource->num_pages;
- struct dma_resv_iter resv_cursor; struct amdgpu_res_cursor cursor;
- struct dma_resv_list *flist; struct dma_fence *f;
- int i;
/* Swapout? */ if (bo->resource->mem_type == TTM_PL_SYSTEM) @@ -1362,14 +1361,9 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, * If true, then return false as any KFD process needs all its BOs to * be resident to run successfully */
- flist = dma_resv_shared_list(bo->base.resv);
- if (flist) {
for (i = 0; i < flist->shared_count; ++i) {
f = rcu_dereference_protected(flist->shared[i],
dma_resv_held(bo->base.resv));
if (amdkfd_fence_check_mm(f, current->mm))
return false;
}
- dma_resv_for_each_fence(&resv_cursor, bo->base.resv, true, f) {
^false?
At least I'm not seeing the code look at the exclusive fence here.
Yes, but that's correct. We need to look at all potential fences.
It's a design problem in KFD if you ask me, but that is a completely different topic.
Christian.
-Daniel
if (amdkfd_fence_check_mm(f, current->mm))
}return false;
switch (bo->resource->mem_type) { -- 2.25.1
Am 2021-10-19 um 7:36 a.m. schrieb Christian König:
Am 13.10.21 um 16:07 schrieb Daniel Vetter:
On Tue, Oct 05, 2021 at 01:37:26PM +0200, Christian König wrote:
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index e8d70b6e6737..722e3c9e8882 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1345,10 +1345,9 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, const struct ttm_place *place) { unsigned long num_pages = bo->resource->num_pages; + struct dma_resv_iter resv_cursor; struct amdgpu_res_cursor cursor; - struct dma_resv_list *flist; struct dma_fence *f; - int i; /* Swapout? */ if (bo->resource->mem_type == TTM_PL_SYSTEM) @@ -1362,14 +1361,9 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, * If true, then return false as any KFD process needs all its BOs to * be resident to run successfully */ - flist = dma_resv_shared_list(bo->base.resv); - if (flist) { - for (i = 0; i < flist->shared_count; ++i) { - f = rcu_dereference_protected(flist->shared[i], - dma_resv_held(bo->base.resv)); - if (amdkfd_fence_check_mm(f, current->mm)) - return false; - } + dma_resv_for_each_fence(&resv_cursor, bo->base.resv, true, f) {
^false?
At least I'm not seeing the code look at the exclusive fence here.
Yes, but that's correct. We need to look at all potential fences.
amdkfd_fence_check_mm is only meaningful for KFD eviction fences, and they are always added as shared fences. I think setting all_fences = false would return only the exclusive fence.
Regards, Felix
It's a design problem in KFD if you ask me, but that is a completely different topic.
Christian.
-Daniel
+ if (amdkfd_fence_check_mm(f, current->mm)) + return false; } switch (bo->resource->mem_type) { -- 2.25.1
On Tue, Oct 19, 2021 at 12:30:40PM -0400, Felix Kuehling wrote:
Am 2021-10-19 um 7:36 a.m. schrieb Christian König:
Am 13.10.21 um 16:07 schrieb Daniel Vetter:
On Tue, Oct 05, 2021 at 01:37:26PM +0200, Christian König wrote:
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index e8d70b6e6737..722e3c9e8882 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1345,10 +1345,9 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, const struct ttm_place *place) { unsigned long num_pages = bo->resource->num_pages; + struct dma_resv_iter resv_cursor; struct amdgpu_res_cursor cursor; - struct dma_resv_list *flist; struct dma_fence *f; - int i; /* Swapout? */ if (bo->resource->mem_type == TTM_PL_SYSTEM) @@ -1362,14 +1361,9 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, * If true, then return false as any KFD process needs all its BOs to * be resident to run successfully */ - flist = dma_resv_shared_list(bo->base.resv); - if (flist) { - for (i = 0; i < flist->shared_count; ++i) { - f = rcu_dereference_protected(flist->shared[i], - dma_resv_held(bo->base.resv)); - if (amdkfd_fence_check_mm(f, current->mm)) - return false; - } + dma_resv_for_each_fence(&resv_cursor, bo->base.resv, true, f) {
^false?
At least I'm not seeing the code look at the exclusive fence here.
Yes, but that's correct. We need to look at all potential fences.
amdkfd_fence_check_mm is only meaningful for KFD eviction fences, and they are always added as shared fences. I think setting all_fences = false would return only the exclusive fence.
Hm yeah I got that wrong, which puts my entire review a bit in question :-)
Anyway on the patch: Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch
Regards, Felix
It's a design problem in KFD if you ask me, but that is a completely different topic.
Christian.
-Daniel
+ if (amdkfd_fence_check_mm(f, current->mm)) + return false; } switch (bo->resource->mem_type) { -- 2.25.1
No need to actually allocate an array of fences here.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 6b15cad78de9..e42dd79ed6f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2090,30 +2090,14 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev, static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) { struct dma_resv *resv = vm->root.bo->tbo.base.resv; - struct dma_fence *excl, **shared; - unsigned i, shared_count; - int r; + struct dma_resv_iter cursor; + struct dma_fence *fence;
- r = dma_resv_get_fences(resv, &excl, &shared_count, &shared); - if (r) { - /* Not enough memory to grab the fence list, as last resort - * block for all the fences to complete. - */ - dma_resv_wait_timeout(resv, true, false, - MAX_SCHEDULE_TIMEOUT); - return; - } - - /* Add a callback for each fence in the reservation object */ - amdgpu_vm_prt_get(adev); - amdgpu_vm_add_prt_cb(adev, excl); - - for (i = 0; i < shared_count; ++i) { + dma_resv_for_each_fence(&cursor, resv, true, fence) { + /* Add a callback for each fence in the reservation object */ amdgpu_vm_prt_get(adev); - amdgpu_vm_add_prt_cb(adev, shared[i]); + amdgpu_vm_add_prt_cb(adev, fence); } - - kfree(shared); }
/**
On Tue, Oct 05, 2021 at 01:37:27PM +0200, Christian König wrote:
No need to actually allocate an array of fences here.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 6b15cad78de9..e42dd79ed6f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2090,30 +2090,14 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev, static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) { struct dma_resv *resv = vm->root.bo->tbo.base.resv;
- struct dma_fence *excl, **shared;
- unsigned i, shared_count;
- int r;
- struct dma_resv_iter cursor;
- struct dma_fence *fence;
- r = dma_resv_get_fences(resv, &excl, &shared_count, &shared);
- if (r) {
/* Not enough memory to grab the fence list, as last resort
* block for all the fences to complete.
*/
dma_resv_wait_timeout(resv, true, false,
MAX_SCHEDULE_TIMEOUT);
return;
- }
- /* Add a callback for each fence in the reservation object */
- amdgpu_vm_prt_get(adev);
I was confused for a bit why the old code wouldn't leak a refcount for !excl case, but it's all handled.
Not sure amdgpu_vm_add_prt_cb still needs to handle the !fence case, it's a bit a gotcha but I guess can happen?
Either way, looks correct.
Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch
- amdgpu_vm_add_prt_cb(adev, excl);
- for (i = 0; i < shared_count; ++i) {
- dma_resv_for_each_fence(&cursor, resv, true, fence) {
amdgpu_vm_prt_get(adev);/* Add a callback for each fence in the reservation object */
amdgpu_vm_add_prt_cb(adev, shared[i]);
}amdgpu_vm_add_prt_cb(adev, fence);
- kfree(shared);
} /** -- 2.25.1
Simplifying the code a bit. Also drop the RCU read side lock since the object is locked anyway.
Untested since I can't get the driver to compile on !ARM.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/msm/msm_gem.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 40a9863f5951..5bd511f07c07 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -880,7 +880,7 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m, { struct msm_gem_object *msm_obj = to_msm_bo(obj); struct dma_resv *robj = obj->resv; - struct dma_resv_list *fobj; + struct dma_resv_iter cursor; struct dma_fence *fence; struct msm_gem_vma *vma; uint64_t off = drm_vma_node_start(&obj->vma_node); @@ -955,22 +955,13 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m, seq_puts(m, "\n"); }
- rcu_read_lock(); - fobj = dma_resv_shared_list(robj); - if (fobj) { - unsigned int i, shared_count = fobj->shared_count; - - for (i = 0; i < shared_count; i++) { - fence = rcu_dereference(fobj->shared[i]); + dma_resv_for_each_fence(&cursor, robj, true, fence) { + if (dma_resv_iter_is_exclusive(&cursor)) + describe_fence(fence, "Exclusive", m); + else describe_fence(fence, "Shared", m); - } }
- fence = dma_resv_excl_fence(robj); - if (fence) - describe_fence(fence, "Exclusive", m); - rcu_read_unlock(); - msm_gem_unlock(obj); }
On Tue, Oct 05, 2021 at 01:37:28PM +0200, Christian König wrote:
Simplifying the code a bit. Also drop the RCU read side lock since the object is locked anyway.
Untested since I can't get the driver to compile on !ARM.
Cross-compiler install is pretty easy and you should have that for pushing drm changes to drm-misc :-)
Signed-off-by: Christian König christian.koenig@amd.com
Assuming this compiles, it looks correct.
Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch
drivers/gpu/drm/msm/msm_gem.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 40a9863f5951..5bd511f07c07 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -880,7 +880,7 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m, { struct msm_gem_object *msm_obj = to_msm_bo(obj); struct dma_resv *robj = obj->resv;
- struct dma_resv_list *fobj;
- struct dma_resv_iter cursor; struct dma_fence *fence; struct msm_gem_vma *vma; uint64_t off = drm_vma_node_start(&obj->vma_node);
@@ -955,22 +955,13 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m, seq_puts(m, "\n"); }
- rcu_read_lock();
- fobj = dma_resv_shared_list(robj);
- if (fobj) {
unsigned int i, shared_count = fobj->shared_count;
for (i = 0; i < shared_count; i++) {
fence = rcu_dereference(fobj->shared[i]);
- dma_resv_for_each_fence(&cursor, robj, true, fence) {
if (dma_resv_iter_is_exclusive(&cursor))
describe_fence(fence, "Exclusive", m);
else describe_fence(fence, "Shared", m);
}}
- fence = dma_resv_excl_fence(robj);
- if (fence)
describe_fence(fence, "Exclusive", m);
- rcu_read_unlock();
- msm_gem_unlock(obj);
} -- 2.25.1
Am 13.10.21 um 16:14 schrieb Daniel Vetter:
On Tue, Oct 05, 2021 at 01:37:28PM +0200, Christian König wrote:
Simplifying the code a bit. Also drop the RCU read side lock since the object is locked anyway.
Untested since I can't get the driver to compile on !ARM.
Cross-compiler install is pretty easy and you should have that for pushing drm changes to drm-misc :-)
I do have cross compile setups for some architectures, but I seriously can't do that for every single driver.
With only a bit of work we allowed MSM to be compile tested on other architectures as well now. That even yielded a couple of missing includes and dependencies in MSM which just don't matter on ARM.
Signed-off-by: Christian König christian.koenig@amd.com
Assuming this compiles, it looks correct.
Yes it does.
Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch
Thanks, Christian.
drivers/gpu/drm/msm/msm_gem.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 40a9863f5951..5bd511f07c07 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -880,7 +880,7 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m, { struct msm_gem_object *msm_obj = to_msm_bo(obj); struct dma_resv *robj = obj->resv;
- struct dma_resv_list *fobj;
- struct dma_resv_iter cursor; struct dma_fence *fence; struct msm_gem_vma *vma; uint64_t off = drm_vma_node_start(&obj->vma_node);
@@ -955,22 +955,13 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m, seq_puts(m, "\n"); }
- rcu_read_lock();
- fobj = dma_resv_shared_list(robj);
- if (fobj) {
unsigned int i, shared_count = fobj->shared_count;
for (i = 0; i < shared_count; i++) {
fence = rcu_dereference(fobj->shared[i]);
- dma_resv_for_each_fence(&cursor, robj, true, fence) {
if (dma_resv_iter_is_exclusive(&cursor))
describe_fence(fence, "Exclusive", m);
else describe_fence(fence, "Shared", m);
}}
- fence = dma_resv_excl_fence(robj);
- if (fence)
describe_fence(fence, "Exclusive", m);
- rcu_read_unlock();
- msm_gem_unlock(obj); }
2.25.1
On Tue, Oct 19, 2021 at 01:49:08PM +0200, Christian König wrote:
Am 13.10.21 um 16:14 schrieb Daniel Vetter:
On Tue, Oct 05, 2021 at 01:37:28PM +0200, Christian König wrote:
Simplifying the code a bit. Also drop the RCU read side lock since the object is locked anyway.
Untested since I can't get the driver to compile on !ARM.
Cross-compiler install is pretty easy and you should have that for pushing drm changes to drm-misc :-)
I do have cross compile setups for some architectures, but I seriously can't do that for every single driver.
With only a bit of work we allowed MSM to be compile tested on other architectures as well now. That even yielded a couple of missing includes and dependencies in MSM which just don't matter on ARM.
The only ones you need is arm32 and arm64. -Daniel
Signed-off-by: Christian König christian.koenig@amd.com
Assuming this compiles, it looks correct.
Yes it does.
Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch
Thanks, Christian.
drivers/gpu/drm/msm/msm_gem.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 40a9863f5951..5bd511f07c07 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -880,7 +880,7 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m, { struct msm_gem_object *msm_obj = to_msm_bo(obj); struct dma_resv *robj = obj->resv;
- struct dma_resv_list *fobj;
- struct dma_resv_iter cursor; struct dma_fence *fence; struct msm_gem_vma *vma; uint64_t off = drm_vma_node_start(&obj->vma_node);
@@ -955,22 +955,13 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m, seq_puts(m, "\n"); }
- rcu_read_lock();
- fobj = dma_resv_shared_list(robj);
- if (fobj) {
unsigned int i, shared_count = fobj->shared_count;
for (i = 0; i < shared_count; i++) {
fence = rcu_dereference(fobj->shared[i]);
- dma_resv_for_each_fence(&cursor, robj, true, fence) {
if (dma_resv_iter_is_exclusive(&cursor))
describe_fence(fence, "Exclusive", m);
else describe_fence(fence, "Shared", m);
}}
- fence = dma_resv_excl_fence(robj);
- if (fence)
describe_fence(fence, "Exclusive", m);
- rcu_read_unlock();
- msm_gem_unlock(obj); }
-- 2.25.1
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon_sync.c | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon_sync.c b/drivers/gpu/drm/radeon/radeon_sync.c index 9257b60144c4..b991ba1bcd51 100644 --- a/drivers/gpu/drm/radeon/radeon_sync.c +++ b/drivers/gpu/drm/radeon/radeon_sync.c @@ -91,33 +91,17 @@ int radeon_sync_resv(struct radeon_device *rdev, struct dma_resv *resv, bool shared) { - struct dma_resv_list *flist; - struct dma_fence *f; + struct dma_resv_iter cursor; struct radeon_fence *fence; - unsigned i; + struct dma_fence *f; int r = 0;
- /* always sync to the exclusive fence */ - f = dma_resv_excl_fence(resv); - fence = f ? to_radeon_fence(f) : NULL; - if (fence && fence->rdev == rdev) - radeon_sync_fence(sync, fence); - else if (f) - r = dma_fence_wait(f, true); - - flist = dma_resv_shared_list(resv); - if (shared || !flist || r) - return r; - - for (i = 0; i < flist->shared_count; ++i) { - f = rcu_dereference_protected(flist->shared[i], - dma_resv_held(resv)); + dma_resv_for_each_fence(&cursor, resv, shared, f) { fence = to_radeon_fence(f); if (fence && fence->rdev == rdev) radeon_sync_fence(sync, fence); else r = dma_fence_wait(f, true); - if (r) break; }
On Tue, Oct 05, 2021 at 01:37:29PM +0200, Christian König wrote:
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com
Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch
drivers/gpu/drm/radeon/radeon_sync.c | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon_sync.c b/drivers/gpu/drm/radeon/radeon_sync.c index 9257b60144c4..b991ba1bcd51 100644 --- a/drivers/gpu/drm/radeon/radeon_sync.c +++ b/drivers/gpu/drm/radeon/radeon_sync.c @@ -91,33 +91,17 @@ int radeon_sync_resv(struct radeon_device *rdev, struct dma_resv *resv, bool shared) {
- struct dma_resv_list *flist;
- struct dma_fence *f;
- struct dma_resv_iter cursor; struct radeon_fence *fence;
- unsigned i;
- struct dma_fence *f; int r = 0;
- /* always sync to the exclusive fence */
- f = dma_resv_excl_fence(resv);
- fence = f ? to_radeon_fence(f) : NULL;
- if (fence && fence->rdev == rdev)
radeon_sync_fence(sync, fence);
- else if (f)
r = dma_fence_wait(f, true);
- flist = dma_resv_shared_list(resv);
- if (shared || !flist || r)
return r;
- for (i = 0; i < flist->shared_count; ++i) {
f = rcu_dereference_protected(flist->shared[i],
dma_resv_held(resv));
- dma_resv_for_each_fence(&cursor, resv, shared, f) { fence = to_radeon_fence(f); if (fence && fence->rdev == rdev) radeon_sync_fence(sync, fence); else r = dma_fence_wait(f, true);
- if (r) break; }
-- 2.25.1
Simplifying the code a bit.
v2: use dma_resv_for_each_fence
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch --- drivers/gpu/drm/scheduler/sched_main.c | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-)
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 042c16b5d54a..5bc5f775abe1 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -699,30 +699,16 @@ int drm_sched_job_add_implicit_dependencies(struct drm_sched_job *job, struct drm_gem_object *obj, bool write) { + struct dma_resv_iter cursor; + struct dma_fence *fence; int ret; - struct dma_fence **fences; - unsigned int i, fence_count; - - if (!write) { - struct dma_fence *fence = dma_resv_get_excl_unlocked(obj->resv); - - return drm_sched_job_add_dependency(job, fence); - } - - ret = dma_resv_get_fences(obj->resv, NULL, &fence_count, &fences); - if (ret || !fence_count) - return ret;
- for (i = 0; i < fence_count; i++) { - ret = drm_sched_job_add_dependency(job, fences[i]); + dma_resv_for_each_fence(&cursor, obj->resv, write, fence) { + ret = drm_sched_job_add_dependency(job, fence); if (ret) - break; + return ret; } - - for (; i < fence_count; i++) - dma_fence_put(fences[i]); - kfree(fences); - return ret; + return 0; } EXPORT_SYMBOL(drm_sched_job_add_implicit_dependencies);
This makes the function much simpler since the complex retry logic is now handled else where.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_busy.c | 35 ++++++++++-------------- 1 file changed, 14 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 6234e17259c1..dc72b36dae54 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -82,8 +82,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj; - struct dma_resv_list *list; - unsigned int seq; + struct dma_resv_iter cursor; + struct dma_fence *fence; int err;
err = -ENOENT; @@ -109,27 +109,20 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * to report the overall busyness. This is what the wait-ioctl does. * */ -retry: - seq = raw_read_seqcount(&obj->base.resv->seq); - - /* Translate the exclusive fence to the READ *and* WRITE engine */ - args->busy = busy_check_writer(dma_resv_excl_fence(obj->base.resv)); - - /* Translate shared fences to READ set of engines */ - list = dma_resv_shared_list(obj->base.resv); - if (list) { - unsigned int shared_count = list->shared_count, i; - - for (i = 0; i < shared_count; ++i) { - struct dma_fence *fence = - rcu_dereference(list->shared[i]); - + args->busy = 0; + dma_resv_iter_begin(&cursor, obj->base.resv, true); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + if (dma_resv_iter_is_restarted(&cursor)) + args->busy = 0; + + if (dma_resv_iter_is_exclusive(&cursor)) + /* Translate the exclusive fence to the READ *and* WRITE engine */ + args->busy |= busy_check_writer(fence); + else + /* Translate shared fences to READ set of engines */ args->busy |= busy_check_reader(fence); - } } - - if (args->busy && read_seqcount_retry(&obj->base.resv->seq, seq)) - goto retry; + dma_resv_iter_end(&cursor);
err = 0; out:
On 05/10/2021 12:37, Christian König wrote:
This makes the function much simpler since the complex retry logic is now handled else where.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@intel.com
Reminder - r-b was retracted until at least more text is added to commit message about pros and cons. But really some discussion had inside the i915 team on the topic.
Regards,
Tvrtko
drivers/gpu/drm/i915/gem/i915_gem_busy.c | 35 ++++++++++-------------- 1 file changed, 14 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 6234e17259c1..dc72b36dae54 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -82,8 +82,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj;
- struct dma_resv_list *list;
- unsigned int seq;
- struct dma_resv_iter cursor;
- struct dma_fence *fence; int err;
err = -ENOENT; @@ -109,27 +109,20 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * to report the overall busyness. This is what the wait-ioctl does. * */ -retry:
- seq = raw_read_seqcount(&obj->base.resv->seq);
- /* Translate the exclusive fence to the READ *and* WRITE engine */
- args->busy = busy_check_writer(dma_resv_excl_fence(obj->base.resv));
- /* Translate shared fences to READ set of engines */
- list = dma_resv_shared_list(obj->base.resv);
- if (list) {
unsigned int shared_count = list->shared_count, i;
for (i = 0; i < shared_count; ++i) {
struct dma_fence *fence =
rcu_dereference(list->shared[i]);
- args->busy = 0;
- dma_resv_iter_begin(&cursor, obj->base.resv, true);
- dma_resv_for_each_fence_unlocked(&cursor, fence) {
if (dma_resv_iter_is_restarted(&cursor))
args->busy = 0;
if (dma_resv_iter_is_exclusive(&cursor))
/* Translate the exclusive fence to the READ *and* WRITE engine */
args->busy |= busy_check_writer(fence);
else
/* Translate shared fences to READ set of engines */ args->busy |= busy_check_reader(fence);
}}
- if (args->busy && read_seqcount_retry(&obj->base.resv->seq, seq))
goto retry;
- dma_resv_iter_end(&cursor);
err = 0; out:
Am 05.10.21 um 14:40 schrieb Tvrtko Ursulin:
On 05/10/2021 12:37, Christian König wrote:
This makes the function much simpler since the complex retry logic is now handled else where.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@intel.com
Reminder - r-b was retracted until at least more text is added to commit message about pros and cons. But really some discussion had inside the i915 team on the topic.
Sure, going to move those to a different branch.
But I really only see the following options: 1. Grab the lock. 2. Use the _unlocked variant with get/put. 3. Add another _rcu iterator just for this case.
I'm fine with either, but Daniel pretty much already rejected #3 and #2/#1 has more overhead then the original one.
Regards, Christian.
Regards,
Tvrtko
drivers/gpu/drm/i915/gem/i915_gem_busy.c | 35 ++++++++++-------------- 1 file changed, 14 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 6234e17259c1..dc72b36dae54 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -82,8 +82,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj; - struct dma_resv_list *list; - unsigned int seq; + struct dma_resv_iter cursor; + struct dma_fence *fence; int err; err = -ENOENT; @@ -109,27 +109,20 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * to report the overall busyness. This is what the wait-ioctl does. * */ -retry: - seq = raw_read_seqcount(&obj->base.resv->seq);
- /* Translate the exclusive fence to the READ *and* WRITE engine */ - args->busy = busy_check_writer(dma_resv_excl_fence(obj->base.resv));
- /* Translate shared fences to READ set of engines */ - list = dma_resv_shared_list(obj->base.resv); - if (list) { - unsigned int shared_count = list->shared_count, i;
- for (i = 0; i < shared_count; ++i) { - struct dma_fence *fence = - rcu_dereference(list->shared[i]);
+ args->busy = 0; + dma_resv_iter_begin(&cursor, obj->base.resv, true); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + if (dma_resv_iter_is_restarted(&cursor)) + args->busy = 0;
+ if (dma_resv_iter_is_exclusive(&cursor)) + /* Translate the exclusive fence to the READ *and* WRITE engine */ + args->busy |= busy_check_writer(fence); + else + /* Translate shared fences to READ set of engines */ args->busy |= busy_check_reader(fence); - } }
- if (args->busy && read_seqcount_retry(&obj->base.resv->seq, seq)) - goto retry; + dma_resv_iter_end(&cursor); err = 0; out:
On Tue, Oct 05, 2021 at 02:44:50PM +0200, Christian König wrote:
Am 05.10.21 um 14:40 schrieb Tvrtko Ursulin:
On 05/10/2021 12:37, Christian König wrote:
This makes the function much simpler since the complex retry logic is now handled else where.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@intel.com
Reminder - r-b was retracted until at least more text is added to commit message about pros and cons. But really some discussion had inside the i915 team on the topic.
Sure, going to move those to a different branch.
But I really only see the following options:
- Grab the lock.
- Use the _unlocked variant with get/put.
- Add another _rcu iterator just for this case.
I'm fine with either, but Daniel pretty much already rejected #3 and #2/#1 has more overhead then the original one.
Anything that removes open-code rcu/lockless magic from i915 gets my ack, there's way too much of this everywhere. So on this:
Acked-by: Daniel Vetter daniel.vetter@ffwll.ch
I've asked Maarten to review the i915 ones for you, please pester him if it's not happening :-) -Daniel
Regards, Christian.
Regards,
Tvrtko
drivers/gpu/drm/i915/gem/i915_gem_busy.c | 35 ++++++++++-------------- 1 file changed, 14 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 6234e17259c1..dc72b36dae54 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -82,8 +82,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj; - struct dma_resv_list *list; - unsigned int seq; + struct dma_resv_iter cursor; + struct dma_fence *fence; int err; err = -ENOENT; @@ -109,27 +109,20 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * to report the overall busyness. This is what the wait-ioctl does. * */ -retry: - seq = raw_read_seqcount(&obj->base.resv->seq);
- /* Translate the exclusive fence to the READ *and* WRITE engine */ - args->busy = busy_check_writer(dma_resv_excl_fence(obj->base.resv));
- /* Translate shared fences to READ set of engines */ - list = dma_resv_shared_list(obj->base.resv); - if (list) { - unsigned int shared_count = list->shared_count, i;
- for (i = 0; i < shared_count; ++i) { - struct dma_fence *fence = - rcu_dereference(list->shared[i]);
+ args->busy = 0; + dma_resv_iter_begin(&cursor, obj->base.resv, true); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + if (dma_resv_iter_is_restarted(&cursor)) + args->busy = 0;
+ if (dma_resv_iter_is_exclusive(&cursor)) + /* Translate the exclusive fence to the READ *and* WRITE engine */ + args->busy |= busy_check_writer(fence); + else + /* Translate shared fences to READ set of engines */ args->busy |= busy_check_reader(fence); - } }
- if (args->busy && read_seqcount_retry(&obj->base.resv->seq, seq)) - goto retry; + dma_resv_iter_end(&cursor); err = 0; out:
Simplifying the code a bit.
v2: use dma_resv_for_each_fence instead, according to Tvrtko the lock is held here anyway. v3: back to using dma_resv_for_each_fence_unlocked.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/i915/i915_sw_fence.c | 53 ++++++---------------------- 1 file changed, 11 insertions(+), 42 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index c589a681da77..7ea0dbf81530 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -572,56 +572,25 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, unsigned long timeout, gfp_t gfp) { - struct dma_fence *excl; + struct dma_resv_iter cursor; + struct dma_fence *f; int ret = 0, pending;
debug_fence_assert(fence); might_sleep_if(gfpflags_allow_blocking(gfp));
- if (write) { - struct dma_fence **shared; - unsigned int count, i; - - ret = dma_resv_get_fences(resv, &excl, &count, &shared); - if (ret) - return ret; - - for (i = 0; i < count; i++) { - if (shared[i]->ops == exclude) - continue; - - pending = i915_sw_fence_await_dma_fence(fence, - shared[i], - timeout, - gfp); - if (pending < 0) { - ret = pending; - break; - } - - ret |= pending; - } - - for (i = 0; i < count; i++) - dma_fence_put(shared[i]); - kfree(shared); - } else { - excl = dma_resv_get_excl_unlocked(resv); - } - - if (ret >= 0 && excl && excl->ops != exclude) { - pending = i915_sw_fence_await_dma_fence(fence, - excl, - timeout, + dma_resv_iter_begin(&cursor, resv, write); + dma_resv_for_each_fence_unlocked(&cursor, f) { + pending = i915_sw_fence_await_dma_fence(fence, f, timeout, gfp); - if (pending < 0) + if (pending < 0) { ret = pending; - else - ret |= pending; - } - - dma_fence_put(excl); + break; + }
+ ret |= pending; + } + dma_resv_iter_end(&cursor); return ret; }
Simplifying the code a bit.
v2: add missing rcu_read_lock()/rcu_read_unlock() v3: use dma_resv_for_each_fence instead
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@intel.com --- drivers/gpu/drm/i915/i915_request.c | 34 +++++------------------------ 1 file changed, 5 insertions(+), 29 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index ce446716d092..3839712ebd23 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1509,38 +1509,14 @@ i915_request_await_object(struct i915_request *to, struct drm_i915_gem_object *obj, bool write) { - struct dma_fence *excl; + struct dma_resv_iter cursor; + struct dma_fence *fence; int ret = 0;
- if (write) { - struct dma_fence **shared; - unsigned int count, i; - - ret = dma_resv_get_fences(obj->base.resv, &excl, &count, - &shared); + dma_resv_for_each_fence(&cursor, obj->base.resv, write, fence) { + ret = i915_request_await_dma_fence(to, fence); if (ret) - return ret; - - for (i = 0; i < count; i++) { - ret = i915_request_await_dma_fence(to, shared[i]); - if (ret) - break; - - dma_fence_put(shared[i]); - } - - for (; i < count; i++) - dma_fence_put(shared[i]); - kfree(shared); - } else { - excl = dma_resv_get_excl_unlocked(obj->base.resv); - } - - if (excl) { - if (ret == 0) - ret = i915_request_await_dma_fence(to, excl); - - dma_fence_put(excl); + break; }
return ret;
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/i915/gem/i915_gem_wait.c | 51 +++++------------------- 1 file changed, 9 insertions(+), 42 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index f909aaa09d9c..a13193db1dba 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -37,55 +37,22 @@ i915_gem_object_wait_reservation(struct dma_resv *resv, unsigned int flags, long timeout) { - struct dma_fence *excl; - bool prune_fences = false; - - if (flags & I915_WAIT_ALL) { - struct dma_fence **shared; - unsigned int count, i; - int ret; + struct dma_resv_iter cursor; + struct dma_fence *fence;
- ret = dma_resv_get_fences(resv, &excl, &count, &shared); - if (ret) - return ret; - - for (i = 0; i < count; i++) { - timeout = i915_gem_object_wait_fence(shared[i], - flags, timeout); - if (timeout < 0) - break; - - dma_fence_put(shared[i]); - } - - for (; i < count; i++) - dma_fence_put(shared[i]); - kfree(shared); - - /* - * If both shared fences and an exclusive fence exist, - * then by construction the shared fences must be later - * than the exclusive fence. If we successfully wait for - * all the shared fences, we know that the exclusive fence - * must all be signaled. If all the shared fences are - * signaled, we can prune the array and recover the - * floating references on the fences/requests. - */ - prune_fences = count && timeout >= 0; - } else { - excl = dma_resv_get_excl_unlocked(resv); + dma_resv_iter_begin(&cursor, resv, flags & I915_WAIT_ALL); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + timeout = i915_gem_object_wait_fence(fence, flags, timeout); + if (timeout < 0) + break; } - - if (excl && timeout >= 0) - timeout = i915_gem_object_wait_fence(excl, flags, timeout); - - dma_fence_put(excl); + dma_resv_iter_end(&cursor);
/* * Opportunistically prune the fences iff we know they have *all* been * signaled. */ - if (prune_fences) + if (timeout > 0) dma_resv_prune(resv);
return timeout;
Op 05-10-2021 om 13:37 schreef Christian König:
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/i915/gem/i915_gem_wait.c | 51 +++++------------------- 1 file changed, 9 insertions(+), 42 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index f909aaa09d9c..a13193db1dba 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -37,55 +37,22 @@ i915_gem_object_wait_reservation(struct dma_resv *resv, unsigned int flags, long timeout) {
- struct dma_fence *excl;
- bool prune_fences = false;
- if (flags & I915_WAIT_ALL) {
struct dma_fence **shared;
unsigned int count, i;
int ret;
- struct dma_resv_iter cursor;
- struct dma_fence *fence;
ret = dma_resv_get_fences(resv, &excl, &count, &shared);
if (ret)
return ret;
for (i = 0; i < count; i++) {
timeout = i915_gem_object_wait_fence(shared[i],
flags, timeout);
if (timeout < 0)
break;
dma_fence_put(shared[i]);
}
for (; i < count; i++)
dma_fence_put(shared[i]);
kfree(shared);
/*
* If both shared fences and an exclusive fence exist,
* then by construction the shared fences must be later
* than the exclusive fence. If we successfully wait for
* all the shared fences, we know that the exclusive fence
* must all be signaled. If all the shared fences are
* signaled, we can prune the array and recover the
* floating references on the fences/requests.
*/
prune_fences = count && timeout >= 0;
- } else {
excl = dma_resv_get_excl_unlocked(resv);
- dma_resv_iter_begin(&cursor, resv, flags & I915_WAIT_ALL);
- dma_resv_for_each_fence_unlocked(&cursor, fence) {
timeout = i915_gem_object_wait_fence(fence, flags, timeout);
if (timeout < 0)
}break;
- if (excl && timeout >= 0)
timeout = i915_gem_object_wait_fence(excl, flags, timeout);
- dma_fence_put(excl);
- dma_resv_iter_end(&cursor);
/* * Opportunistically prune the fences iff we know they have *all* been * signaled. */
- if (prune_fences)
- if (timeout > 0) dma_resv_prune(resv);
return timeout;
When replying to tvrtko about correctness of the conversion, I just now noticed a logic bug here, the same logic bug also affects dma_resv_wait_timeout.
long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr, unsigned long timeout) { long ret = timeout ? timeout : 1; struct dma_resv_iter cursor; struct dma_fence *fence;
dma_resv_iter_begin(&cursor, obj, wait_all); dma_resv_for_each_fence_unlocked(&cursor, fence) {
ret = dma_fence_wait_timeout(fence, intr, ret); if (ret <= 0) { dma_resv_iter_end(&cursor); return ret; } } dma_resv_iter_end(&cursor);
return ret; }
It fails to handle the case correctly when timeout = 0, I think the original code probably did. dma_fence_wait_timeout should be called with timeout = 0 explicitly.
Fixed code for inner loop: ret = dma_fence_wait_timeout(fence, intr, timeout); if (ret <= 0) break; if (timeout) timeout = ret;
This bug also affects i915_gem_object_wait_reservation, so the whole series might need to be respinned, or at least checked, if more wait conversions are affected.
~Maarten
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/i915/gem/i915_gem_wait.c | 31 +++++------------------- 1 file changed, 6 insertions(+), 25 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index a13193db1dba..569658c7859c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -118,32 +118,13 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, unsigned int flags, const struct i915_sched_attr *attr) { - struct dma_fence *excl; - - if (flags & I915_WAIT_ALL) { - struct dma_fence **shared; - unsigned int count, i; - int ret; - - ret = dma_resv_get_fences(obj->base.resv, &excl, &count, - &shared); - if (ret) - return ret; - - for (i = 0; i < count; i++) { - i915_gem_fence_wait_priority(shared[i], attr); - dma_fence_put(shared[i]); - } - - kfree(shared); - } else { - excl = dma_resv_get_excl_unlocked(obj->base.resv); - } + struct dma_resv_iter cursor; + struct dma_fence *fence;
- if (excl) { - i915_gem_fence_wait_priority(excl, attr); - dma_fence_put(excl); - } + dma_resv_iter_begin(&cursor, obj->base.resv, flags & I915_WAIT_ALL); + dma_resv_for_each_fence_unlocked(&cursor, fence) + i915_gem_fence_wait_priority(fence, attr); + dma_resv_iter_end(&cursor); return 0; }
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/i915/display/intel_display.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 134a6acbd8fb..d32137a84694 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -11290,6 +11290,7 @@ intel_prepare_plane_fb(struct drm_plane *_plane, i915_gem_object_flush_frontbuffer(obj, ORIGIN_DIRTYFB);
if (!new_plane_state->uapi.fence) { /* implicit fencing */ + struct dma_resv_iter cursor; struct dma_fence *fence;
ret = i915_sw_fence_await_reservation(&state->commit_ready, @@ -11300,12 +11301,12 @@ intel_prepare_plane_fb(struct drm_plane *_plane, if (ret < 0) goto unpin_fb;
- fence = dma_resv_get_excl_unlocked(obj->base.resv); - if (fence) { + dma_resv_iter_begin(&cursor, obj->base.resv, false); + dma_resv_for_each_fence_unlocked(&cursor, fence) { add_rps_boost_after_vblank(new_plane_state->hw.crtc, fence); - dma_fence_put(fence); } + dma_resv_iter_end(&cursor); } else { add_rps_boost_after_vblank(new_plane_state->hw.crtc, new_plane_state->uapi.fence);
Simplifying the code a bit.
v2: add missing rcu_read_lock()/unlock() v3: switch to locked version
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@intel.com --- drivers/gpu/drm/drm_gem.c | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 09c820045859..4dcdec6487bb 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -1340,31 +1340,15 @@ int drm_gem_fence_array_add_implicit(struct xarray *fence_array, struct drm_gem_object *obj, bool write) { - int ret; - struct dma_fence **fences; - unsigned int i, fence_count; - - if (!write) { - struct dma_fence *fence = - dma_resv_get_excl_unlocked(obj->resv); - - return drm_gem_fence_array_add(fence_array, fence); - } + struct dma_resv_iter cursor; + struct dma_fence *fence; + int ret = 0;
- ret = dma_resv_get_fences(obj->resv, NULL, - &fence_count, &fences); - if (ret || !fence_count) - return ret; - - for (i = 0; i < fence_count; i++) { - ret = drm_gem_fence_array_add(fence_array, fences[i]); + dma_resv_for_each_fence(&cursor, obj->resv, write, fence) { + ret = drm_gem_fence_array_add(fence_array, fence); if (ret) break; } - - for (; i < fence_count; i++) - dma_fence_put(fences[i]); - kfree(fences); return ret; } EXPORT_SYMBOL(drm_gem_fence_array_add_implicit);
On Tue, Oct 05, 2021 at 01:37:37PM +0200, Christian König wrote:
Simplifying the code a bit.
v2: add missing rcu_read_lock()/unlock() v3: switch to locked version
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@intel.com
Please make sure you also apply this to the new copy of this code in drm/sched. This one here is up for deletion, once I get all the driver conversions I have landed ... -Daniel
drivers/gpu/drm/drm_gem.c | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 09c820045859..4dcdec6487bb 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -1340,31 +1340,15 @@ int drm_gem_fence_array_add_implicit(struct xarray *fence_array, struct drm_gem_object *obj, bool write) {
- int ret;
- struct dma_fence **fences;
- unsigned int i, fence_count;
- if (!write) {
struct dma_fence *fence =
dma_resv_get_excl_unlocked(obj->resv);
return drm_gem_fence_array_add(fence_array, fence);
- }
- struct dma_resv_iter cursor;
- struct dma_fence *fence;
- int ret = 0;
- ret = dma_resv_get_fences(obj->resv, NULL,
&fence_count, &fences);
- if (ret || !fence_count)
return ret;
- for (i = 0; i < fence_count; i++) {
ret = drm_gem_fence_array_add(fence_array, fences[i]);
- dma_resv_for_each_fence(&cursor, obj->resv, write, fence) {
if (ret) break; }ret = drm_gem_fence_array_add(fence_array, fence);
- for (; i < fence_count; i++)
dma_fence_put(fences[i]);
- kfree(fences); return ret;
} EXPORT_SYMBOL(drm_gem_fence_array_add_implicit); -- 2.25.1
Am 13.10.21 um 16:21 schrieb Daniel Vetter:
On Tue, Oct 05, 2021 at 01:37:37PM +0200, Christian König wrote:
Simplifying the code a bit.
v2: add missing rcu_read_lock()/unlock() v3: switch to locked version
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@intel.com
Please make sure you also apply this to the new copy of this code in drm/sched. This one here is up for deletion, once I get all the driver conversions I have landed ...
Yeah, I do have that. Only added this patch here for completeness so that I could at least consider dropping the old access functions.
Put I will hold it back, just ping me when the code in question is removed.
Christian.
-Daniel
drivers/gpu/drm/drm_gem.c | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 09c820045859..4dcdec6487bb 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -1340,31 +1340,15 @@ int drm_gem_fence_array_add_implicit(struct xarray *fence_array, struct drm_gem_object *obj, bool write) {
- int ret;
- struct dma_fence **fences;
- unsigned int i, fence_count;
- if (!write) {
struct dma_fence *fence =
dma_resv_get_excl_unlocked(obj->resv);
return drm_gem_fence_array_add(fence_array, fence);
- }
- struct dma_resv_iter cursor;
- struct dma_fence *fence;
- int ret = 0;
- ret = dma_resv_get_fences(obj->resv, NULL,
&fence_count, &fences);
- if (ret || !fence_count)
return ret;
- for (i = 0; i < fence_count; i++) {
ret = drm_gem_fence_array_add(fence_array, fences[i]);
- dma_resv_for_each_fence(&cursor, obj->resv, write, fence) {
if (ret) break; }ret = drm_gem_fence_array_add(fence_array, fence);
- for (; i < fence_count; i++)
dma_fence_put(fences[i]);
- kfree(fences); return ret; } EXPORT_SYMBOL(drm_gem_fence_array_add_implicit);
-- 2.25.1
On Tue, Oct 19, 2021 at 02:54:04PM +0200, Christian König wrote:
Am 13.10.21 um 16:21 schrieb Daniel Vetter:
On Tue, Oct 05, 2021 at 01:37:37PM +0200, Christian König wrote:
Simplifying the code a bit.
v2: add missing rcu_read_lock()/unlock() v3: switch to locked version
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@intel.com
Please make sure you also apply this to the new copy of this code in drm/sched. This one here is up for deletion, once I get all the driver conversions I have landed ...
Yeah, I do have that. Only added this patch here for completeness so that I could at least consider dropping the old access functions.
Put I will hold it back, just ping me when the code in question is removed.
Imo go ahead and push this, the rebasing is trivial since I just delete code. That way I don't have to remember to do anything :-) -Daniel
Christian.
-Daniel
drivers/gpu/drm/drm_gem.c | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 09c820045859..4dcdec6487bb 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -1340,31 +1340,15 @@ int drm_gem_fence_array_add_implicit(struct xarray *fence_array, struct drm_gem_object *obj, bool write) {
- int ret;
- struct dma_fence **fences;
- unsigned int i, fence_count;
- if (!write) {
struct dma_fence *fence =
dma_resv_get_excl_unlocked(obj->resv);
return drm_gem_fence_array_add(fence_array, fence);
- }
- struct dma_resv_iter cursor;
- struct dma_fence *fence;
- int ret = 0;
- ret = dma_resv_get_fences(obj->resv, NULL,
&fence_count, &fences);
- if (ret || !fence_count)
return ret;
- for (i = 0; i < fence_count; i++) {
ret = drm_gem_fence_array_add(fence_array, fences[i]);
- dma_resv_for_each_fence(&cursor, obj->resv, write, fence) {
if (ret) break; }ret = drm_gem_fence_array_add(fence_array, fence);
- for (; i < fence_count; i++)
dma_fence_put(fences[i]);
- kfree(fences); return ret; } EXPORT_SYMBOL(drm_gem_fence_array_add_implicit);
-- 2.25.1
Makes the handling a bit more complex, but avoids the use of dma_resv_get_excl_unlocked().
v2: improve coding and documentation
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/drm_gem_atomic_helper.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem_atomic_helper.c b/drivers/gpu/drm/drm_gem_atomic_helper.c index e570398abd78..8534f78d4d6d 100644 --- a/drivers/gpu/drm/drm_gem_atomic_helper.c +++ b/drivers/gpu/drm/drm_gem_atomic_helper.c @@ -143,6 +143,7 @@ */ int drm_gem_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) { + struct dma_resv_iter cursor; struct drm_gem_object *obj; struct dma_fence *fence;
@@ -150,9 +151,17 @@ int drm_gem_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_plane_st return 0;
obj = drm_gem_fb_get_obj(state->fb, 0); - fence = dma_resv_get_excl_unlocked(obj->resv); - drm_atomic_set_fence_for_plane(state, fence); + dma_resv_iter_begin(&cursor, obj->resv, false); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + /* TODO: We only use the first write fence here and need to fix + * the drm_atomic_set_fence_for_plane() API to accept more than + * one. */ + dma_fence_get(fence); + break; + } + dma_resv_iter_end(&cursor);
+ drm_atomic_set_fence_for_plane(state, fence); return 0; } EXPORT_SYMBOL_GPL(drm_gem_plane_helper_prepare_fb);
On Tue, Oct 05, 2021 at 01:37:38PM +0200, Christian König wrote:
Makes the handling a bit more complex, but avoids the use of dma_resv_get_excl_unlocked().
v2: improve coding and documentation
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/drm_gem_atomic_helper.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem_atomic_helper.c b/drivers/gpu/drm/drm_gem_atomic_helper.c index e570398abd78..8534f78d4d6d 100644 --- a/drivers/gpu/drm/drm_gem_atomic_helper.c +++ b/drivers/gpu/drm/drm_gem_atomic_helper.c @@ -143,6 +143,7 @@ */ int drm_gem_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) {
- struct dma_resv_iter cursor; struct drm_gem_object *obj; struct dma_fence *fence;
@@ -150,9 +151,17 @@ int drm_gem_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_plane_st return 0; obj = drm_gem_fb_get_obj(state->fb, 0);
- fence = dma_resv_get_excl_unlocked(obj->resv);
- drm_atomic_set_fence_for_plane(state, fence);
- dma_resv_iter_begin(&cursor, obj->resv, false);
- dma_resv_for_each_fence_unlocked(&cursor, fence) {
/* TODO: We only use the first write fence here and need to fix
* the drm_atomic_set_fence_for_plane() API to accept more than
* one. */
I'm confused, right now there is only one write fence. So no need to iterate, and also no need to add a TODO. If/when we add more write fences then I think this needs to be revisited, and ofc then we do need to update the set_fence helpers to carry an entire array of fences. -Daniel
dma_fence_get(fence);
break;
- }
- dma_resv_iter_end(&cursor);
- drm_atomic_set_fence_for_plane(state, fence); return 0;
} EXPORT_SYMBOL_GPL(drm_gem_plane_helper_prepare_fb); -- 2.25.1
Am 13.10.21 um 16:23 schrieb Daniel Vetter:
On Tue, Oct 05, 2021 at 01:37:38PM +0200, Christian König wrote:
Makes the handling a bit more complex, but avoids the use of dma_resv_get_excl_unlocked().
v2: improve coding and documentation
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/drm_gem_atomic_helper.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem_atomic_helper.c b/drivers/gpu/drm/drm_gem_atomic_helper.c index e570398abd78..8534f78d4d6d 100644 --- a/drivers/gpu/drm/drm_gem_atomic_helper.c +++ b/drivers/gpu/drm/drm_gem_atomic_helper.c @@ -143,6 +143,7 @@ */ int drm_gem_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) {
- struct dma_resv_iter cursor; struct drm_gem_object *obj; struct dma_fence *fence;
@@ -150,9 +151,17 @@ int drm_gem_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_plane_st return 0; obj = drm_gem_fb_get_obj(state->fb, 0);
- fence = dma_resv_get_excl_unlocked(obj->resv);
- drm_atomic_set_fence_for_plane(state, fence);
- dma_resv_iter_begin(&cursor, obj->resv, false);
- dma_resv_for_each_fence_unlocked(&cursor, fence) {
/* TODO: We only use the first write fence here and need to fix
* the drm_atomic_set_fence_for_plane() API to accept more than
* one. */
I'm confused, right now there is only one write fence. So no need to iterate, and also no need to add a TODO. If/when we add more write fences then I think this needs to be revisited, and ofc then we do need to update the set_fence helpers to carry an entire array of fences.
Well could be that I misunderstood you, but in your last explanation it sounded like the drm_atomic_set_fence_for_plane() function needs fixing anyway because a plane could have multiple BOs.
So in my understanding what we need is a drm_atomic_add_dependency_for_plane() function which records that a certain fence needs to be signaled before a flip.
Support for more than one write fence then comes totally naturally.
Christian.
-Daniel
dma_fence_get(fence);
break;
- }
- dma_resv_iter_end(&cursor);
- drm_atomic_set_fence_for_plane(state, fence); return 0; } EXPORT_SYMBOL_GPL(drm_gem_plane_helper_prepare_fb);
-- 2.25.1
On Tue, Oct 19, 2021 at 03:02:26PM +0200, Christian König wrote:
Am 13.10.21 um 16:23 schrieb Daniel Vetter:
On Tue, Oct 05, 2021 at 01:37:38PM +0200, Christian König wrote:
Makes the handling a bit more complex, but avoids the use of dma_resv_get_excl_unlocked().
v2: improve coding and documentation
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/drm_gem_atomic_helper.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem_atomic_helper.c b/drivers/gpu/drm/drm_gem_atomic_helper.c index e570398abd78..8534f78d4d6d 100644 --- a/drivers/gpu/drm/drm_gem_atomic_helper.c +++ b/drivers/gpu/drm/drm_gem_atomic_helper.c @@ -143,6 +143,7 @@ */ int drm_gem_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) {
- struct dma_resv_iter cursor; struct drm_gem_object *obj; struct dma_fence *fence;
@@ -150,9 +151,17 @@ int drm_gem_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_plane_st return 0; obj = drm_gem_fb_get_obj(state->fb, 0);
- fence = dma_resv_get_excl_unlocked(obj->resv);
- drm_atomic_set_fence_for_plane(state, fence);
- dma_resv_iter_begin(&cursor, obj->resv, false);
- dma_resv_for_each_fence_unlocked(&cursor, fence) {
/* TODO: We only use the first write fence here and need to fix
* the drm_atomic_set_fence_for_plane() API to accept more than
* one. */
I'm confused, right now there is only one write fence. So no need to iterate, and also no need to add a TODO. If/when we add more write fences then I think this needs to be revisited, and ofc then we do need to update the set_fence helpers to carry an entire array of fences.
Well could be that I misunderstood you, but in your last explanation it sounded like the drm_atomic_set_fence_for_plane() function needs fixing anyway because a plane could have multiple BOs.
So in my understanding what we need is a drm_atomic_add_dependency_for_plane() function which records that a certain fence needs to be signaled before a flip.
Yeah that's another issue, but in practice there's no libva which decodes into planar yuv with different fences between the planes. So not a bug in practice.
But this is entirely orthogonal to you picking up the wrong fence here if there's not exclusive fence set:
- old code: Either pick the exclusive fence, or not fence if the exclusive one is not set.
- new code: Pick the exclusive fence or the first shared fence
New behaviour is busted, because scanning out and reading from a buffer at the same time (for the next frame, e.g. to copy over damaged areas or some other tricks) is very much a supported thing. Atomic _only_ wants to look at the exclusive fence slot, which mean "there is an implicitly synced write to this buffers". Implicitly synced reads _must_ be ignored.
Now amdgpu doesn't have this distinction in its uapi, but many drivers do. -Daniel
Support for more than one write fence then comes totally naturally.
Christian.
-Daniel
dma_fence_get(fence);
break;
- }
- dma_resv_iter_end(&cursor);
- drm_atomic_set_fence_for_plane(state, fence); return 0; } EXPORT_SYMBOL_GPL(drm_gem_plane_helper_prepare_fb);
-- 2.25.1
Am 19.10.21 um 16:30 schrieb Daniel Vetter:
On Tue, Oct 19, 2021 at 03:02:26PM +0200, Christian König wrote:
Am 13.10.21 um 16:23 schrieb Daniel Vetter:
On Tue, Oct 05, 2021 at 01:37:38PM +0200, Christian König wrote:
Makes the handling a bit more complex, but avoids the use of dma_resv_get_excl_unlocked().
v2: improve coding and documentation
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/drm_gem_atomic_helper.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem_atomic_helper.c b/drivers/gpu/drm/drm_gem_atomic_helper.c index e570398abd78..8534f78d4d6d 100644 --- a/drivers/gpu/drm/drm_gem_atomic_helper.c +++ b/drivers/gpu/drm/drm_gem_atomic_helper.c @@ -143,6 +143,7 @@ */ int drm_gem_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) {
- struct dma_resv_iter cursor; struct drm_gem_object *obj; struct dma_fence *fence;
@@ -150,9 +151,17 @@ int drm_gem_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_plane_st return 0; obj = drm_gem_fb_get_obj(state->fb, 0);
- fence = dma_resv_get_excl_unlocked(obj->resv);
- drm_atomic_set_fence_for_plane(state, fence);
- dma_resv_iter_begin(&cursor, obj->resv, false);
- dma_resv_for_each_fence_unlocked(&cursor, fence) {
/* TODO: We only use the first write fence here and need to fix
* the drm_atomic_set_fence_for_plane() API to accept more than
* one. */
I'm confused, right now there is only one write fence. So no need to iterate, and also no need to add a TODO. If/when we add more write fences then I think this needs to be revisited, and ofc then we do need to update the set_fence helpers to carry an entire array of fences.
Well could be that I misunderstood you, but in your last explanation it sounded like the drm_atomic_set_fence_for_plane() function needs fixing anyway because a plane could have multiple BOs.
So in my understanding what we need is a drm_atomic_add_dependency_for_plane() function which records that a certain fence needs to be signaled before a flip.
Yeah that's another issue, but in practice there's no libva which decodes into planar yuv with different fences between the planes. So not a bug in practice.
But this is entirely orthogonal to you picking up the wrong fence here if there's not exclusive fence set:
old code: Either pick the exclusive fence, or not fence if the exclusive one is not set.
new code: Pick the exclusive fence or the first shared fence
Hui what?
We use "dma_resv_iter_begin(&cursor, obj->resv, *false*);" here which means that only the exclusive fence is returned and no shared fences whatsoever.
My next step is to replace the boolean with a bunch of use case describing enums. I hope that will make it much clearer what's going on here.
Christian.
New behaviour is busted, because scanning out and reading from a buffer at the same time (for the next frame, e.g. to copy over damaged areas or some other tricks) is very much a supported thing. Atomic _only_ wants to look at the exclusive fence slot, which mean "there is an implicitly synced write to this buffers". Implicitly synced reads _must_ be ignored.
Now amdgpu doesn't have this distinction in its uapi, but many drivers do. -Daniel
Support for more than one write fence then comes totally naturally.
Christian.
-Daniel
dma_fence_get(fence);
break;
- }
- dma_resv_iter_end(&cursor);
- drm_atomic_set_fence_for_plane(state, fence); return 0; } EXPORT_SYMBOL_GPL(drm_gem_plane_helper_prepare_fb);
-- 2.25.1
On Tue, Oct 19, 2021 at 05:51:38PM +0200, Christian König wrote:
Am 19.10.21 um 16:30 schrieb Daniel Vetter:
On Tue, Oct 19, 2021 at 03:02:26PM +0200, Christian König wrote:
Am 13.10.21 um 16:23 schrieb Daniel Vetter:
On Tue, Oct 05, 2021 at 01:37:38PM +0200, Christian König wrote:
Makes the handling a bit more complex, but avoids the use of dma_resv_get_excl_unlocked().
v2: improve coding and documentation
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/drm_gem_atomic_helper.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem_atomic_helper.c b/drivers/gpu/drm/drm_gem_atomic_helper.c index e570398abd78..8534f78d4d6d 100644 --- a/drivers/gpu/drm/drm_gem_atomic_helper.c +++ b/drivers/gpu/drm/drm_gem_atomic_helper.c @@ -143,6 +143,7 @@ */ int drm_gem_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) {
- struct dma_resv_iter cursor; struct drm_gem_object *obj; struct dma_fence *fence;
@@ -150,9 +151,17 @@ int drm_gem_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_plane_st return 0; obj = drm_gem_fb_get_obj(state->fb, 0);
- fence = dma_resv_get_excl_unlocked(obj->resv);
- drm_atomic_set_fence_for_plane(state, fence);
- dma_resv_iter_begin(&cursor, obj->resv, false);
- dma_resv_for_each_fence_unlocked(&cursor, fence) {
/* TODO: We only use the first write fence here and need to fix
* the drm_atomic_set_fence_for_plane() API to accept more than
* one. */
I'm confused, right now there is only one write fence. So no need to iterate, and also no need to add a TODO. If/when we add more write fences then I think this needs to be revisited, and ofc then we do need to update the set_fence helpers to carry an entire array of fences.
Well could be that I misunderstood you, but in your last explanation it sounded like the drm_atomic_set_fence_for_plane() function needs fixing anyway because a plane could have multiple BOs.
So in my understanding what we need is a drm_atomic_add_dependency_for_plane() function which records that a certain fence needs to be signaled before a flip.
Yeah that's another issue, but in practice there's no libva which decodes into planar yuv with different fences between the planes. So not a bug in practice.
But this is entirely orthogonal to you picking up the wrong fence here if there's not exclusive fence set:
old code: Either pick the exclusive fence, or not fence if the exclusive one is not set.
new code: Pick the exclusive fence or the first shared fence
Hui what?
We use "dma_resv_iter_begin(&cursor, obj->resv, *false*);" here which means that only the exclusive fence is returned and no shared fences whatsoever.
My next step is to replace the boolean with a bunch of use case describing enums. I hope that will make it much clearer what's going on here.
Yeah I got that entirely wrong, which is kinda bad since that's about the only thing worth checking in these conversions :-/
I'll go recheck them again and slap some more r-b on stuff. -Daniel
Christian.
New behaviour is busted, because scanning out and reading from a buffer at the same time (for the next frame, e.g. to copy over damaged areas or some other tricks) is very much a supported thing. Atomic _only_ wants to look at the exclusive fence slot, which mean "there is an implicitly synced write to this buffers". Implicitly synced reads _must_ be ignored.
Now amdgpu doesn't have this distinction in its uapi, but many drivers do. -Daniel
Support for more than one write fence then comes totally naturally.
Christian.
-Daniel
dma_fence_get(fence);
break;
- }
- dma_resv_iter_end(&cursor);
- drm_atomic_set_fence_for_plane(state, fence); return 0; } EXPORT_SYMBOL_GPL(drm_gem_plane_helper_prepare_fb);
-- 2.25.1
On Tue, Oct 05, 2021 at 01:37:38PM +0200, Christian König wrote:
Makes the handling a bit more complex, but avoids the use of dma_resv_get_excl_unlocked().
v2: improve coding and documentation
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/drm_gem_atomic_helper.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem_atomic_helper.c b/drivers/gpu/drm/drm_gem_atomic_helper.c index e570398abd78..8534f78d4d6d 100644 --- a/drivers/gpu/drm/drm_gem_atomic_helper.c +++ b/drivers/gpu/drm/drm_gem_atomic_helper.c @@ -143,6 +143,7 @@ */ int drm_gem_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) {
- struct dma_resv_iter cursor; struct drm_gem_object *obj; struct dma_fence *fence;
@@ -150,9 +151,17 @@ int drm_gem_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_plane_st return 0; obj = drm_gem_fb_get_obj(state->fb, 0);
- fence = dma_resv_get_excl_unlocked(obj->resv);
- drm_atomic_set_fence_for_plane(state, fence);
- dma_resv_iter_begin(&cursor, obj->resv, false);
- dma_resv_for_each_fence_unlocked(&cursor, fence) {
/* TODO: We only use the first write fence here and need to fix
Maybe reword the todo that currently there's only one write fence, and if that changes we have work to do. Or something like that. The current comments sounds like multiple write fences are possible, which is not the case.
With that:
Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch
* the drm_atomic_set_fence_for_plane() API to accept more than
* one. */
dma_fence_get(fence);
break;
- }
- dma_resv_iter_end(&cursor);
- drm_atomic_set_fence_for_plane(state, fence); return 0;
} EXPORT_SYMBOL_GPL(drm_gem_plane_helper_prepare_fb); -- 2.25.1
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/nouveau/nouveau_fence.c | 48 +++++++------------------ 1 file changed, 12 insertions(+), 36 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 05d0b3eb3690..26f9299df881 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -339,14 +339,15 @@ nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr) }
int -nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool exclusive, bool intr) +nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, + bool exclusive, bool intr) { struct nouveau_fence_chan *fctx = chan->fence; - struct dma_fence *fence; struct dma_resv *resv = nvbo->bo.base.resv; - struct dma_resv_list *fobj; + struct dma_resv_iter cursor; + struct dma_fence *fence; struct nouveau_fence *f; - int ret = 0, i; + int ret;
if (!exclusive) { ret = dma_resv_reserve_shared(resv, 1); @@ -355,10 +356,7 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e return ret; }
- fobj = dma_resv_shared_list(resv); - fence = dma_resv_excl_fence(resv); - - if (fence) { + dma_resv_for_each_fence(&cursor, resv, exclusive, fence) { struct nouveau_channel *prev = NULL; bool must_wait = true;
@@ -366,41 +364,19 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e if (f) { rcu_read_lock(); prev = rcu_dereference(f->channel); - if (prev && (prev == chan || fctx->sync(f, prev, chan) == 0)) + if (prev && (prev == chan || + fctx->sync(f, prev, chan) == 0)) must_wait = false; rcu_read_unlock(); }
- if (must_wait) + if (must_wait) { ret = dma_fence_wait(fence, intr); - - return ret; - } - - if (!exclusive || !fobj) - return ret; - - for (i = 0; i < fobj->shared_count && !ret; ++i) { - struct nouveau_channel *prev = NULL; - bool must_wait = true; - - fence = rcu_dereference_protected(fobj->shared[i], - dma_resv_held(resv)); - - f = nouveau_local_fence(fence, chan->drm); - if (f) { - rcu_read_lock(); - prev = rcu_dereference(f->channel); - if (prev && (prev == chan || fctx->sync(f, prev, chan) == 0)) - must_wait = false; - rcu_read_unlock(); + if (ret) + return ret; } - - if (must_wait) - ret = dma_fence_wait(fence, intr); } - - return ret; + return 0; }
void
On Tue, Oct 05, 2021 at 01:37:39PM +0200, Christian König wrote:
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com
A bit a trick conversion since the previous code was clever with the ret handling in the loop, but looks correct.
Please mention in the commit message that this code now also waits for all shared fences in all cases. Previously if we found an exclusive fence, we bailed out. That needs to be recorded in the commit message, together with an explainer that defacto too many other drivers have broken this rule already, and so you have to always iterate all fences.
With that added:
Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch
drivers/gpu/drm/nouveau/nouveau_fence.c | 48 +++++++------------------ 1 file changed, 12 insertions(+), 36 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 05d0b3eb3690..26f9299df881 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -339,14 +339,15 @@ nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr) } int -nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool exclusive, bool intr) +nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan,
bool exclusive, bool intr)
{ struct nouveau_fence_chan *fctx = chan->fence;
- struct dma_fence *fence; struct dma_resv *resv = nvbo->bo.base.resv;
- struct dma_resv_list *fobj;
- struct dma_resv_iter cursor;
- struct dma_fence *fence; struct nouveau_fence *f;
- int ret = 0, i;
- int ret;
if (!exclusive) { ret = dma_resv_reserve_shared(resv, 1); @@ -355,10 +356,7 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e return ret; }
- fobj = dma_resv_shared_list(resv);
- fence = dma_resv_excl_fence(resv);
- if (fence) {
- dma_resv_for_each_fence(&cursor, resv, exclusive, fence) { struct nouveau_channel *prev = NULL; bool must_wait = true;
@@ -366,41 +364,19 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e if (f) { rcu_read_lock(); prev = rcu_dereference(f->channel);
if (prev && (prev == chan || fctx->sync(f, prev, chan) == 0))
if (prev && (prev == chan ||
}fctx->sync(f, prev, chan) == 0)) must_wait = false; rcu_read_unlock();
if (must_wait)
if (must_wait) { ret = dma_fence_wait(fence, intr);
return ret;
- }
- if (!exclusive || !fobj)
return ret;
- for (i = 0; i < fobj->shared_count && !ret; ++i) {
struct nouveau_channel *prev = NULL;
bool must_wait = true;
fence = rcu_dereference_protected(fobj->shared[i],
dma_resv_held(resv));
f = nouveau_local_fence(fence, chan->drm);
if (f) {
rcu_read_lock();
prev = rcu_dereference(f->channel);
if (prev && (prev == chan || fctx->sync(f, prev, chan) == 0))
must_wait = false;
rcu_read_unlock();
if (ret)
}return ret;
if (must_wait)
}ret = dma_fence_wait(fence, intr);
- return ret;
- return 0;
} void -- 2.25.1
Makes the handling a bit more complex, but avoids the use of dma_resv_get_excl_unlocked().
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/nouveau/dispnv50/wndw.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c index 8d048bacd6f0..30712a681e2a 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c @@ -539,6 +539,8 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) struct nouveau_bo *nvbo; struct nv50_head_atom *asyh; struct nv50_wndw_ctxdma *ctxdma; + struct dma_resv_iter cursor; + struct dma_fence *fence; int ret;
NV_ATOMIC(drm, "%s prepare: %p\n", plane->name, fb); @@ -561,7 +563,13 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) asyw->image.handle[0] = ctxdma->object.handle; }
- asyw->state.fence = dma_resv_get_excl_unlocked(nvbo->bo.base.resv); + dma_resv_iter_begin(&cursor, nvbo->bo.base.resv, false); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + /* TODO: We only use the first writer here */ + asyw->state.fence = dma_fence_get(fence); + break; + } + dma_resv_iter_end(&cursor); asyw->image.offset[0] = nvbo->offset;
if (wndw->func->prepare) {
On Tue, Oct 05, 2021 at 01:37:40PM +0200, Christian König wrote:
Makes the handling a bit more complex, but avoids the use of dma_resv_get_excl_unlocked().
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/nouveau/dispnv50/wndw.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c index 8d048bacd6f0..30712a681e2a 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c @@ -539,6 +539,8 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) struct nouveau_bo *nvbo; struct nv50_head_atom *asyh; struct nv50_wndw_ctxdma *ctxdma;
- struct dma_resv_iter cursor;
- struct dma_fence *fence; int ret;
NV_ATOMIC(drm, "%s prepare: %p\n", plane->name, fb); @@ -561,7 +563,13 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) asyw->image.handle[0] = ctxdma->object.handle; }
- asyw->state.fence = dma_resv_get_excl_unlocked(nvbo->bo.base.resv);
- dma_resv_iter_begin(&cursor, nvbo->bo.base.resv, false);
- dma_resv_for_each_fence_unlocked(&cursor, fence) {
/* TODO: We only use the first writer here */
Same thing as with the atomic core helper. This is actually broken, because for atomic we really do _not_ want to wait for any shared fences. Which this will do, if there's no exclusive fence attached.
So upgrading my general concern on this and the atomic helper patch to a reject, since I think it's broken. -Daniel
asyw->state.fence = dma_fence_get(fence);
break;
- }
- dma_resv_iter_end(&cursor); asyw->image.offset[0] = nvbo->offset;
if (wndw->func->prepare) { -- 2.25.1
Am 13.10.21 um 16:29 schrieb Daniel Vetter:
On Tue, Oct 05, 2021 at 01:37:40PM +0200, Christian König wrote:
Makes the handling a bit more complex, but avoids the use of dma_resv_get_excl_unlocked().
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/nouveau/dispnv50/wndw.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c index 8d048bacd6f0..30712a681e2a 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c @@ -539,6 +539,8 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) struct nouveau_bo *nvbo; struct nv50_head_atom *asyh; struct nv50_wndw_ctxdma *ctxdma;
- struct dma_resv_iter cursor;
- struct dma_fence *fence; int ret;
NV_ATOMIC(drm, "%s prepare: %p\n", plane->name, fb); @@ -561,7 +563,13 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) asyw->image.handle[0] = ctxdma->object.handle; }
- asyw->state.fence = dma_resv_get_excl_unlocked(nvbo->bo.base.resv);
- dma_resv_iter_begin(&cursor, nvbo->bo.base.resv, false);
- dma_resv_for_each_fence_unlocked(&cursor, fence) {
/* TODO: We only use the first writer here */
Same thing as with the atomic core helper. This is actually broken, because for atomic we really do _not_ want to wait for any shared fences. Which this will do, if there's no exclusive fence attached.
So upgrading my general concern on this and the atomic helper patch to a reject, since I think it's broken.
Since we simply had a misunderstanding with that could I get an rb for that now?
Thanks, Christian.
-Daniel
asyw->state.fence = dma_fence_get(fence);
break;
- }
- dma_resv_iter_end(&cursor); asyw->image.offset[0] = nvbo->offset;
if (wndw->func->prepare) { -- 2.25.1
On Fri, Oct 22, 2021 at 03:17:17PM +0200, Christian König wrote:
Am 13.10.21 um 16:29 schrieb Daniel Vetter:
On Tue, Oct 05, 2021 at 01:37:40PM +0200, Christian König wrote:
Makes the handling a bit more complex, but avoids the use of dma_resv_get_excl_unlocked().
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/nouveau/dispnv50/wndw.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c index 8d048bacd6f0..30712a681e2a 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c @@ -539,6 +539,8 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) struct nouveau_bo *nvbo; struct nv50_head_atom *asyh; struct nv50_wndw_ctxdma *ctxdma;
- struct dma_resv_iter cursor;
- struct dma_fence *fence; int ret; NV_ATOMIC(drm, "%s prepare: %p\n", plane->name, fb);
@@ -561,7 +563,13 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) asyw->image.handle[0] = ctxdma->object.handle; }
- asyw->state.fence = dma_resv_get_excl_unlocked(nvbo->bo.base.resv);
- dma_resv_iter_begin(&cursor, nvbo->bo.base.resv, false);
- dma_resv_for_each_fence_unlocked(&cursor, fence) {
/* TODO: We only use the first writer here */
Same thing as with the atomic core helper. This is actually broken, because for atomic we really do _not_ want to wait for any shared fences. Which this will do, if there's no exclusive fence attached.
So upgrading my general concern on this and the atomic helper patch to a reject, since I think it's broken.
Since we simply had a misunderstanding with that could I get an rb for that now?
Oh sorry, I thought I've supplied that. As much a you still trust my r-b at least :-)
Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch
Thanks, Christian.
-Daniel
asyw->state.fence = dma_fence_get(fence);
break;
- }
- dma_resv_iter_end(&cursor); asyw->image.offset[0] = nvbo->offset; if (wndw->func->prepare) {
-- 2.25.1
Instead of hand rolling the logic.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/etnaviv/etnaviv_gem.c | 31 ++++++++++----------------- 1 file changed, 11 insertions(+), 20 deletions(-)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index 8f1b5af47dd6..0eeb33de2ff4 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -428,19 +428,17 @@ int etnaviv_gem_wait_bo(struct etnaviv_gpu *gpu, struct drm_gem_object *obj, static void etnaviv_gem_describe_fence(struct dma_fence *fence, const char *type, struct seq_file *m) { - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) - seq_printf(m, "\t%9s: %s %s seq %llu\n", - type, - fence->ops->get_driver_name(fence), - fence->ops->get_timeline_name(fence), - fence->seqno); + seq_printf(m, "\t%9s: %s %s seq %llu\n", type, + fence->ops->get_driver_name(fence), + fence->ops->get_timeline_name(fence), + fence->seqno); }
static void etnaviv_gem_describe(struct drm_gem_object *obj, struct seq_file *m) { struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj); struct dma_resv *robj = obj->resv; - struct dma_resv_list *fobj; + struct dma_resv_iter cursor; struct dma_fence *fence; unsigned long off = drm_vma_node_start(&obj->vma_node);
@@ -449,21 +447,14 @@ static void etnaviv_gem_describe(struct drm_gem_object *obj, struct seq_file *m) obj->name, kref_read(&obj->refcount), off, etnaviv_obj->vaddr, obj->size);
- rcu_read_lock(); - fobj = dma_resv_shared_list(robj); - if (fobj) { - unsigned int i, shared_count = fobj->shared_count; - - for (i = 0; i < shared_count; i++) { - fence = rcu_dereference(fobj->shared[i]); + dma_resv_iter_begin(&cursor, robj, true); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + if (dma_resv_iter_is_exclusive(&cursor)) + etnaviv_gem_describe_fence(fence, "Exclusive", m); + else etnaviv_gem_describe_fence(fence, "Shared", m); - } } - - fence = dma_resv_excl_fence(robj); - if (fence) - etnaviv_gem_describe_fence(fence, "Exclusive", m); - rcu_read_unlock(); + dma_resv_iter_end(&cursor); }
void etnaviv_gem_describe_objects(struct etnaviv_drm_private *priv,
On Tue, Oct 05, 2021 at 01:37:41PM +0200, Christian König wrote:
Instead of hand rolling the logic.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/etnaviv/etnaviv_gem.c | 31 ++++++++++----------------- 1 file changed, 11 insertions(+), 20 deletions(-)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index 8f1b5af47dd6..0eeb33de2ff4 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -428,19 +428,17 @@ int etnaviv_gem_wait_bo(struct etnaviv_gpu *gpu, struct drm_gem_object *obj, static void etnaviv_gem_describe_fence(struct dma_fence *fence, const char *type, struct seq_file *m) {
- if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
Yay for removing open-coded tests like this. Drivers really should have no business digging around in fence->flags (i915 is terrible in this regard unfortunately).
seq_printf(m, "\t%9s: %s %s seq %llu\n",
type,
fence->ops->get_driver_name(fence),
fence->ops->get_timeline_name(fence),
fence->seqno);
- seq_printf(m, "\t%9s: %s %s seq %llu\n", type,
fence->ops->get_driver_name(fence),
fence->ops->get_timeline_name(fence),
fence->seqno);
} static void etnaviv_gem_describe(struct drm_gem_object *obj, struct seq_file *m) { struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj); struct dma_resv *robj = obj->resv;
- struct dma_resv_list *fobj;
- struct dma_resv_iter cursor; struct dma_fence *fence; unsigned long off = drm_vma_node_start(&obj->vma_node);
@@ -449,21 +447,14 @@ static void etnaviv_gem_describe(struct drm_gem_object *obj, struct seq_file *m) obj->name, kref_read(&obj->refcount), off, etnaviv_obj->vaddr, obj->size);
- rcu_read_lock();
- fobj = dma_resv_shared_list(robj);
- if (fobj) {
unsigned int i, shared_count = fobj->shared_count;
for (i = 0; i < shared_count; i++) {
fence = rcu_dereference(fobj->shared[i]);
- dma_resv_iter_begin(&cursor, robj, true);
- dma_resv_for_each_fence_unlocked(&cursor, fence) {
if (dma_resv_iter_is_exclusive(&cursor))
etnaviv_gem_describe_fence(fence, "Exclusive", m);
else etnaviv_gem_describe_fence(fence, "Shared", m);
}}
- fence = dma_resv_excl_fence(robj);
- if (fence)
etnaviv_gem_describe_fence(fence, "Exclusive", m);
- rcu_read_unlock();
- dma_resv_iter_end(&cursor);
Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch
Please make sure it compiles on arm before pushing :-)
} void etnaviv_gem_describe_objects(struct etnaviv_drm_private *priv, -- 2.25.1
We certainly hold the reservation lock here, no need for the RCU dance.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c index 4dd7d9d541c0..7e17bc2b5df1 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c @@ -195,7 +195,7 @@ static int submit_fence_sync(struct etnaviv_gem_submit *submit) if (ret) return ret; } else { - bo->excl = dma_resv_get_excl_unlocked(robj); + bo->excl = dma_fence_get(dma_resv_excl_fence(robj)); }
}
On Tue, Oct 05, 2021 at 01:37:42PM +0200, Christian König wrote:
We certainly hold the reservation lock here, no need for the RCU dance.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c index 4dd7d9d541c0..7e17bc2b5df1 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c @@ -195,7 +195,7 @@ static int submit_fence_sync(struct etnaviv_gem_submit *submit) if (ret) return ret; } else {
bo->excl = dma_resv_get_excl_unlocked(robj);
Maybe have that in the series to sunset dma_resv_get_excl_unlocked()? Just so it makes a bit more sense from a motivation pov. Or explain that in the commit message.
Anyway looks correct.
Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch
}bo->excl = dma_fence_get(dma_resv_excl_fence(robj));
} -- 2.25.1
linaro-mm-sig@lists.linaro.org