Hi guys,
The version I've send out yesterday had a rather obvious coding error and I honestly forgot the cover letter.
This one here is better tested and will now hopefully not be torn apart from the CI system immediately.
I tried to address all review and documentation comments as best as I could, so I'm hoping that we can now considering pushing this.
Cheers, Christian.
Abstract the complexity of iterating over all the fences in a dma_resv object.
The new loop handles the whole RCU and retry dance and returns only fences where we can be sure we grabbed the right one.
v2: fix accessing the shared fences while they might be freed, improve kerneldoc, rename _cursor to _iter, add dma_resv_iter_is_exclusive, add dma_resv_iter_begin/end
v3: restructor the code, move rcu_read_lock()/unlock() into the iterator, add dma_resv_iter_is_restarted()
v4: fix NULL deref when no explicit fence exists, drop superflous rcu_read_lock()/unlock() calls.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 95 ++++++++++++++++++++++++++++++++++++++ include/linux/dma-resv.h | 95 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 190 insertions(+)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 84fbe60629e3..7768140ab36d 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -323,6 +323,101 @@ void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence) } EXPORT_SYMBOL(dma_resv_add_excl_fence);
+/** + * dma_resv_iter_restart_unlocked - restart the unlocked iterator + * @cursor: The dma_resv_iter object to restart + * + * Restart the unlocked iteration by initializing the cursor object. + */ +static void dma_resv_iter_restart_unlocked(struct dma_resv_iter *cursor) +{ + cursor->seq = read_seqcount_begin(&cursor->obj->seq); + cursor->index = -1; + if (cursor->all_fences) + cursor->fences = dma_resv_shared_list(cursor->obj); + else + cursor->fences = NULL; + cursor->is_restarted = true; +} + +/** + * dma_resv_iter_walk_unlocked - walk over fences in a dma_resv obj + * @cursor: cursor to record the current position + * + * Return all the fences in the dma_resv object which are not yet signaled. + * The returned fence has an extra local reference so will stay alive. + * If a concurrent modify is detected the whole iterration is started over again. + */ +static void dma_resv_iter_walk_unlocked(struct dma_resv_iter *cursor) +{ + struct dma_resv *obj = cursor->obj; + + do { + /* Drop the reference from the previous round */ + dma_fence_put(cursor->fence); + + if (cursor->index++ == -1) { + cursor->fence = dma_resv_excl_fence(obj); + + } else if (!cursor->fences || + cursor->index >= cursor->fences->shared_count) { + cursor->fence = NULL; + + } else { + struct dma_resv_list *fences = cursor->fences; + unsigned int idx = cursor->index; + + cursor->fence = rcu_dereference(fences->shared[idx]); + } + if (cursor->fence) + cursor->fence = dma_fence_get_rcu(cursor->fence); + } while (cursor->fence && dma_fence_is_signaled(cursor->fence)); +} + +/** + * dma_resv_iter_first_unlocked - first fence in an unlocked dma_resv obj. + * @cursor: the cursor with the current position + * + * Returns the first fence from an unlocked dma_resv obj. + */ +struct dma_fence *dma_resv_iter_first_unlocked(struct dma_resv_iter *cursor) +{ + rcu_read_lock(); + do { + dma_resv_iter_restart_unlocked(cursor); + dma_resv_iter_walk_unlocked(cursor); + } while (read_seqcount_retry(&cursor->obj->seq, cursor->seq)); + rcu_read_unlock(); + + return cursor->fence; +} +EXPORT_SYMBOL(dma_resv_iter_first_unlocked); + +/** + * dma_resv_iter_next_unlocked - next fence in an unlocked dma_resv obj. + * @cursor: the cursor with the current position + * + * Returns the next fence from an unlocked dma_resv obj. + */ +struct dma_fence *dma_resv_iter_next_unlocked(struct dma_resv_iter *cursor) +{ + bool restart; + + rcu_read_lock(); + cursor->is_restarted = false; + restart = read_seqcount_retry(&cursor->obj->seq, cursor->seq); + do { + if (restart) + dma_resv_iter_restart_unlocked(cursor); + dma_resv_iter_walk_unlocked(cursor); + restart = true; + } while (read_seqcount_retry(&cursor->obj->seq, cursor->seq)); + rcu_read_unlock(); + + return cursor->fence; +} +EXPORT_SYMBOL(dma_resv_iter_next_unlocked); + /** * dma_resv_copy_fences - Copy all fences from src to dst. * @dst: the destination reservation object diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 9100dd3dc21f..baf77a542392 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -149,6 +149,101 @@ struct dma_resv { struct dma_resv_list __rcu *fence; };
+/** + * struct dma_resv_iter - current position into the dma_resv fences + * + * Don't touch this directly in the driver, use the accessor function instead. + */ +struct dma_resv_iter { + /** @obj: The dma_resv object we iterate over */ + struct dma_resv *obj; + + /** @all_fences: If all fences should be returned */ + bool all_fences; + + /** @fence: the currently handled fence */ + struct dma_fence *fence; + + /** @seq: sequence number to check for modifications */ + unsigned int seq; + + /** @index: index into the shared fences */ + unsigned int index; + + /** @fences: the shared fences */ + struct dma_resv_list *fences; + + /** @is_restarted: true if this is the first returned fence */ + bool is_restarted; +}; + +struct dma_fence *dma_resv_iter_first_unlocked(struct dma_resv_iter *cursor); +struct dma_fence *dma_resv_iter_next_unlocked(struct dma_resv_iter *cursor); + +/** + * dma_resv_iter_begin - initialize a dma_resv_iter object + * @cursor: The dma_resv_iter object to initialize + * @obj: The dma_resv object which we want to iterator over + * @all_fences: If all fences should be returned or just the exclusive one + */ +static inline void dma_resv_iter_begin(struct dma_resv_iter *cursor, + struct dma_resv *obj, + bool all_fences) +{ + cursor->obj = obj; + cursor->all_fences = all_fences; + cursor->fence = NULL; +} + +/** + * dma_resv_iter_end - cleanup a dma_resv_iter object + * @cursor: the dma_resv_iter object which should be cleaned up + * + * Make sure that the reference to the fence in the cursor is properly + * dropped. + */ +static inline void dma_resv_iter_end(struct dma_resv_iter *cursor) +{ + dma_fence_put(cursor->fence); +} + +/** + * dma_resv_iter_is_exclusive - test if the current fence is the exclusive one + * @cursor: the cursor of the current position + * + * Returns true if the currently returned fence is the exclusive one. + */ +static inline bool dma_resv_iter_is_exclusive(struct dma_resv_iter *cursor) +{ + return cursor->index == -1; +} + +/** + * dma_resv_iter_is_restarted - test if this is the first fence after a restart + * @cursor: the cursor with the current position + * + * Return true if this is the first fence in an interation after a restart. + */ +static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor) +{ + return cursor->is_restarted; +} + +/** + * dma_resv_for_each_fence_unlocked - unlocked fence iterator + * @cursor: a struct dma_resv_iter pointer + * @fence: the current fence + * + * Iterate over the fences in a struct dma_resv object without holding the + * &dma_resv.lock and using RCU instead. The cursor needs to be initialized + * with dma_resv_iter_begin() and cleaned up with dma_resv_iter_end(). Inside + * the iterator a reference to the dma_fence is hold and the RCU lock dropped. + * When the dma_resv is modified the iteration starts over again. + */ +#define dma_resv_for_each_fence_unlocked(cursor, fence) \ + for (fence = dma_resv_iter_first_unlocked(cursor); \ + fence; fence = dma_resv_iter_next_unlocked(cursor)) + #define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base) #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base)
On 22/09/2021 10:10, Christian König wrote:
Abstract the complexity of iterating over all the fences in a dma_resv object.
The new loop handles the whole RCU and retry dance and returns only fences where we can be sure we grabbed the right one.
v2: fix accessing the shared fences while they might be freed, improve kerneldoc, rename _cursor to _iter, add dma_resv_iter_is_exclusive, add dma_resv_iter_begin/end
v3: restructor the code, move rcu_read_lock()/unlock() into the iterator, add dma_resv_iter_is_restarted()
v4: fix NULL deref when no explicit fence exists, drop superflous rcu_read_lock()/unlock() calls.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/dma-resv.c | 95 ++++++++++++++++++++++++++++++++++++++ include/linux/dma-resv.h | 95 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 190 insertions(+)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 84fbe60629e3..7768140ab36d 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -323,6 +323,101 @@ void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence) } EXPORT_SYMBOL(dma_resv_add_excl_fence); +/**
- dma_resv_iter_restart_unlocked - restart the unlocked iterator
- @cursor: The dma_resv_iter object to restart
- Restart the unlocked iteration by initializing the cursor object.
- */
+static void dma_resv_iter_restart_unlocked(struct dma_resv_iter *cursor) +{
- cursor->seq = read_seqcount_begin(&cursor->obj->seq);
- cursor->index = -1;
- if (cursor->all_fences)
cursor->fences = dma_resv_shared_list(cursor->obj);
- else
cursor->fences = NULL;
- cursor->is_restarted = true;
+}
+/**
- dma_resv_iter_walk_unlocked - walk over fences in a dma_resv obj
- @cursor: cursor to record the current position
- Return all the fences in the dma_resv object which are not yet signaled.
- The returned fence has an extra local reference so will stay alive.
- If a concurrent modify is detected the whole iterration is started over again.
iteration
- */
+static void dma_resv_iter_walk_unlocked(struct dma_resv_iter *cursor) +{
- struct dma_resv *obj = cursor->obj;
- do {
/* Drop the reference from the previous round */
dma_fence_put(cursor->fence);
if (cursor->index++ == -1) {
cursor->fence = dma_resv_excl_fence(obj);
} else if (!cursor->fences ||
cursor->index >= cursor->fences->shared_count) {
cursor->fence = NULL;
} else {
struct dma_resv_list *fences = cursor->fences;
unsigned int idx = cursor->index;
cursor->fence = rcu_dereference(fences->shared[idx]);
}
if (cursor->fence)
cursor->fence = dma_fence_get_rcu(cursor->fence);
- } while (cursor->fence && dma_fence_is_signaled(cursor->fence));
+}
+/**
- dma_resv_iter_first_unlocked - first fence in an unlocked dma_resv obj.
- @cursor: the cursor with the current position
- Returns the first fence from an unlocked dma_resv obj.
- */
+struct dma_fence *dma_resv_iter_first_unlocked(struct dma_resv_iter *cursor) +{
- rcu_read_lock();
- do {
dma_resv_iter_restart_unlocked(cursor);
dma_resv_iter_walk_unlocked(cursor);
- } while (read_seqcount_retry(&cursor->obj->seq, cursor->seq));
- rcu_read_unlock();
- return cursor->fence;
+} +EXPORT_SYMBOL(dma_resv_iter_first_unlocked);
Why is this one split from dma_resv_iter_begin and even exported? I couldn't find any users in the series.
+/**
- dma_resv_iter_next_unlocked - next fence in an unlocked dma_resv obj.
- @cursor: the cursor with the current position
- Returns the next fence from an unlocked dma_resv obj.
- */
+struct dma_fence *dma_resv_iter_next_unlocked(struct dma_resv_iter *cursor) +{
- bool restart;
- rcu_read_lock();
- cursor->is_restarted = false;
- restart = read_seqcount_retry(&cursor->obj->seq, cursor->seq);
- do {
if (restart)
dma_resv_iter_restart_unlocked(cursor);
dma_resv_iter_walk_unlocked(cursor);
restart = true;
- } while (read_seqcount_retry(&cursor->obj->seq, cursor->seq));
- rcu_read_unlock();
- return cursor->fence;
+} +EXPORT_SYMBOL(dma_resv_iter_next_unlocked);
Couldn't dma_resv_iter_first_unlocked and dma_resv_iter_next_unlocked share the same implementation? Especially if you are able to replace cursor->is_restarted with cursor->index == -1.
- /**
- dma_resv_copy_fences - Copy all fences from src to dst.
- @dst: the destination reservation object
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 9100dd3dc21f..baf77a542392 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -149,6 +149,101 @@ struct dma_resv { struct dma_resv_list __rcu *fence; }; +/**
- struct dma_resv_iter - current position into the dma_resv fences
- Don't touch this directly in the driver, use the accessor function instead.
- */
+struct dma_resv_iter {
- /** @obj: The dma_resv object we iterate over */
- struct dma_resv *obj;
- /** @all_fences: If all fences should be returned */
- bool all_fences;
- /** @fence: the currently handled fence */
- struct dma_fence *fence;
- /** @seq: sequence number to check for modifications */
- unsigned int seq;
- /** @index: index into the shared fences */
- unsigned int index;
- /** @fences: the shared fences */
- struct dma_resv_list *fences;
- /** @is_restarted: true if this is the first returned fence */
- bool is_restarted;
+};
+struct dma_fence *dma_resv_iter_first_unlocked(struct dma_resv_iter *cursor); +struct dma_fence *dma_resv_iter_next_unlocked(struct dma_resv_iter *cursor);
+/**
- dma_resv_iter_begin - initialize a dma_resv_iter object
- @cursor: The dma_resv_iter object to initialize
- @obj: The dma_resv object which we want to iterator over
iterate
- @all_fences: If all fences should be returned or just the exclusive one
- */
+static inline void dma_resv_iter_begin(struct dma_resv_iter *cursor,
struct dma_resv *obj,
bool all_fences)
+{
- cursor->obj = obj;
- cursor->all_fences = all_fences;
- cursor->fence = NULL;
+}
+/**
- dma_resv_iter_end - cleanup a dma_resv_iter object
- @cursor: the dma_resv_iter object which should be cleaned up
- Make sure that the reference to the fence in the cursor is properly
- dropped.
- */
+static inline void dma_resv_iter_end(struct dma_resv_iter *cursor) +{
- dma_fence_put(cursor->fence);
+}
+/**
- dma_resv_iter_is_exclusive - test if the current fence is the exclusive one
- @cursor: the cursor of the current position
- Returns true if the currently returned fence is the exclusive one.
- */
+static inline bool dma_resv_iter_is_exclusive(struct dma_resv_iter *cursor) +{
- return cursor->index == -1;
+}
+/**
- dma_resv_iter_is_restarted - test if this is the first fence after a restart
- @cursor: the cursor with the current position
- Return true if this is the first fence in an interation after a restart.
iteration
- */
+static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor) +{
- return cursor->is_restarted;
+}
+/**
- dma_resv_for_each_fence_unlocked - unlocked fence iterator
- @cursor: a struct dma_resv_iter pointer
- @fence: the current fence
- Iterate over the fences in a struct dma_resv object without holding the
- &dma_resv.lock and using RCU instead. The cursor needs to be initialized
- with dma_resv_iter_begin() and cleaned up with dma_resv_iter_end(). Inside
- the iterator a reference to the dma_fence is hold and the RCU lock dropped.
held
- When the dma_resv is modified the iteration starts over again.
- */
+#define dma_resv_for_each_fence_unlocked(cursor, fence) \
- for (fence = dma_resv_iter_first_unlocked(cursor); \
fence; fence = dma_resv_iter_next_unlocked(cursor))
- #define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base) #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base)
Regards,
Tvrtko
Am 22.09.21 um 16:36 schrieb Tvrtko Ursulin:
+/**
- dma_resv_iter_first_unlocked - first fence in an unlocked
dma_resv obj.
- @cursor: the cursor with the current position
- Returns the first fence from an unlocked dma_resv obj.
- */
+struct dma_fence *dma_resv_iter_first_unlocked(struct dma_resv_iter *cursor) +{ + rcu_read_lock(); + do { + dma_resv_iter_restart_unlocked(cursor); + dma_resv_iter_walk_unlocked(cursor); + } while (read_seqcount_retry(&cursor->obj->seq, cursor->seq)); + rcu_read_unlock();
+ return cursor->fence; +} +EXPORT_SYMBOL(dma_resv_iter_first_unlocked);
Why is this one split from dma_resv_iter_begin and even exported?
I've split it to be able to use dma_resv_iter_begin in both the unlocked and locked iterator.
I couldn't find any users in the series.
This is used in the dma_resv_for_each_fence_unlocked() macro to return the first fence.
+/**
- dma_resv_iter_next_unlocked - next fence in an unlocked dma_resv
obj.
- @cursor: the cursor with the current position
- Returns the next fence from an unlocked dma_resv obj.
- */
+struct dma_fence *dma_resv_iter_next_unlocked(struct dma_resv_iter *cursor) +{ + bool restart;
+ rcu_read_lock(); + cursor->is_restarted = false; + restart = read_seqcount_retry(&cursor->obj->seq, cursor->seq); + do { + if (restart) + dma_resv_iter_restart_unlocked(cursor); + dma_resv_iter_walk_unlocked(cursor); + restart = true; + } while (read_seqcount_retry(&cursor->obj->seq, cursor->seq)); + rcu_read_unlock();
+ return cursor->fence; +} +EXPORT_SYMBOL(dma_resv_iter_next_unlocked);
Couldn't dma_resv_iter_first_unlocked and dma_resv_iter_next_unlocked share the same implementation? Especially if you are able to replace cursor->is_restarted with cursor->index == -1.
That's what I had initially, but Daniel disliked it for some reason. You then need a centralized walk function instead of first/next.
Thanks, Christian.
Regards,
Tvrtko
On 22/09/2021 15:50, Christian König wrote:
Am 22.09.21 um 16:36 schrieb Tvrtko Ursulin:
+/**
- dma_resv_iter_first_unlocked - first fence in an unlocked
dma_resv obj.
- @cursor: the cursor with the current position
- Returns the first fence from an unlocked dma_resv obj.
- */
+struct dma_fence *dma_resv_iter_first_unlocked(struct dma_resv_iter *cursor) +{ + rcu_read_lock(); + do { + dma_resv_iter_restart_unlocked(cursor); + dma_resv_iter_walk_unlocked(cursor); + } while (read_seqcount_retry(&cursor->obj->seq, cursor->seq)); + rcu_read_unlock();
+ return cursor->fence; +} +EXPORT_SYMBOL(dma_resv_iter_first_unlocked);
Why is this one split from dma_resv_iter_begin and even exported?
I've split it to be able to use dma_resv_iter_begin in both the unlocked and locked iterator.
Ok.
I couldn't find any users in the series.
This is used in the dma_resv_for_each_fence_unlocked() macro to return the first fence.
Doh!
+/**
- dma_resv_iter_next_unlocked - next fence in an unlocked dma_resv
obj.
- @cursor: the cursor with the current position
- Returns the next fence from an unlocked dma_resv obj.
- */
+struct dma_fence *dma_resv_iter_next_unlocked(struct dma_resv_iter *cursor) +{ + bool restart;
+ rcu_read_lock(); + cursor->is_restarted = false; + restart = read_seqcount_retry(&cursor->obj->seq, cursor->seq); + do { + if (restart) + dma_resv_iter_restart_unlocked(cursor); + dma_resv_iter_walk_unlocked(cursor); + restart = true; + } while (read_seqcount_retry(&cursor->obj->seq, cursor->seq)); + rcu_read_unlock();
+ return cursor->fence; +} +EXPORT_SYMBOL(dma_resv_iter_next_unlocked);
Couldn't dma_resv_iter_first_unlocked and dma_resv_iter_next_unlocked share the same implementation? Especially if you are able to replace cursor->is_restarted with cursor->index == -1.
That's what I had initially, but Daniel disliked it for some reason. You then need a centralized walk function instead of first/next.
I had some ideas to only consolidate "first" and "next" helpers but never mind, yours is fine as well.
Regards,
Tvrtko
Thanks, Christian.
Regards,
Tvrtko
A simpler version of the iterator to be used when the dma_resv object is locked.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 46 ++++++++++++++++++++++++++++++++++++++ include/linux/dma-resv.h | 19 ++++++++++++++++ 2 files changed, 65 insertions(+)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 7768140ab36d..c6f1f6837b55 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -418,6 +418,52 @@ struct dma_fence *dma_resv_iter_next_unlocked(struct dma_resv_iter *cursor) } EXPORT_SYMBOL(dma_resv_iter_next_unlocked);
+/** + * dma_resv_iter_first - first fence from a locked dma_resv object + * @cursor: cursor to record the current position + * + * Return all the fences in the dma_resv object while holding the + * &dma_resv.lock. + */ +struct dma_fence *dma_resv_iter_first(struct dma_resv_iter *cursor) +{ + struct dma_fence *fence; + + dma_resv_assert_held(cursor->obj); + + cursor->index = -1; + cursor->fences = dma_resv_shared_list(cursor->obj); + + fence = dma_resv_excl_fence(cursor->obj); + if (!fence) + fence = dma_resv_iter_next(cursor); + + cursor->is_restarted = true; + return fence; +} +EXPORT_SYMBOL_GPL(dma_resv_iter_first); + +/** + * dma_resv_iter_next - next fence from a locked dma_resv object + * @cursor: cursor to record the current position + * + * Return all the fences in the dma_resv object while holding the + * &dma_resv.lock. + */ +struct dma_fence *dma_resv_iter_next(struct dma_resv_iter *cursor) +{ + dma_resv_assert_held(cursor->obj); + + cursor->is_restarted = false; + if (!cursor->all_fences || !cursor->fences || + ++cursor->index >= cursor->fences->shared_count) + return NULL; + + return rcu_dereference_protected(cursor->fences->shared[cursor->index], + dma_resv_held(cursor->obj)); +} +EXPORT_SYMBOL_GPL(dma_resv_iter_next); + /** * dma_resv_copy_fences - Copy all fences from src to dst. * @dst: the destination reservation object diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index baf77a542392..72e7ebaa675f 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -179,6 +179,8 @@ struct dma_resv_iter {
struct dma_fence *dma_resv_iter_first_unlocked(struct dma_resv_iter *cursor); struct dma_fence *dma_resv_iter_next_unlocked(struct dma_resv_iter *cursor); +struct dma_fence *dma_resv_iter_first(struct dma_resv_iter *cursor); +struct dma_fence *dma_resv_iter_next(struct dma_resv_iter *cursor);
/** * dma_resv_iter_begin - initialize a dma_resv_iter object @@ -244,6 +246,23 @@ static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor) for (fence = dma_resv_iter_first_unlocked(cursor); \ fence; fence = dma_resv_iter_next_unlocked(cursor))
+/** + * dma_resv_for_each_fence - fence iterator + * @cursor: a struct dma_resv_iter pointer + * @obj: a dma_resv object pointer + * @all_fences: true if all fences should be returned + * @fence: the current fence + * + * Iterate over the fences in a struct dma_resv object while holding the + * &dma_resv.lock. @all_fences controls if the shared fences are returned as + * well. The cursor initialisation is part of the iterator and the fence stays + * valid as long as the lock is held. + */ +#define dma_resv_for_each_fence(cursor, obj, all_fences, fence) \ + for (dma_resv_iter_begin(cursor, obj, all_fences), \ + fence = dma_resv_iter_first(cursor); fence; \ + fence = dma_resv_iter_next(cursor)) + #define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base) #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base)
This makes the function much simpler since the complex retry logic is now handled else where.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch --- drivers/dma-buf/dma-resv.c | 84 +++++++++++++++----------------------- 1 file changed, 32 insertions(+), 52 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index c6f1f6837b55..556d5afafe3f 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -473,74 +473,54 @@ EXPORT_SYMBOL_GPL(dma_resv_iter_next); */ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src) { - struct dma_resv_list *src_list, *dst_list; - struct dma_fence *old, *new; - unsigned int i; + struct dma_resv_iter cursor; + struct dma_resv_list *list; + struct dma_fence *f, *excl;
dma_resv_assert_held(dst);
- rcu_read_lock(); - src_list = dma_resv_shared_list(src); + list = NULL; + excl = NULL;
-retry: - if (src_list) { - unsigned int shared_count = src_list->shared_count; + dma_resv_iter_begin(&cursor, src, true); + dma_resv_for_each_fence_unlocked(&cursor, f) {
- rcu_read_unlock(); + if (dma_resv_iter_is_restarted(&cursor)) { + dma_resv_list_free(list); + dma_fence_put(excl);
- dst_list = dma_resv_list_alloc(shared_count); - if (!dst_list) - return -ENOMEM; + if (cursor.fences) { + unsigned int cnt = cursor.fences->shared_count;
- rcu_read_lock(); - src_list = dma_resv_shared_list(src); - if (!src_list || src_list->shared_count > shared_count) { - kfree(dst_list); - goto retry; - } - - dst_list->shared_count = 0; - for (i = 0; i < src_list->shared_count; ++i) { - struct dma_fence __rcu **dst; - struct dma_fence *fence; + list = dma_resv_list_alloc(cnt); + if (!list) { + dma_resv_iter_end(&cursor); + return -ENOMEM; + }
- fence = rcu_dereference(src_list->shared[i]); - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &fence->flags)) - continue; + list->shared_count = 0;
- if (!dma_fence_get_rcu(fence)) { - dma_resv_list_free(dst_list); - src_list = dma_resv_shared_list(src); - goto retry; + } else { + list = NULL; } - - if (dma_fence_is_signaled(fence)) { - dma_fence_put(fence); - continue; - } - - dst = &dst_list->shared[dst_list->shared_count++]; - rcu_assign_pointer(*dst, fence); + excl = NULL; } - } else { - dst_list = NULL; - }
- new = dma_fence_get_rcu_safe(&src->fence_excl); - rcu_read_unlock(); - - src_list = dma_resv_shared_list(dst); - old = dma_resv_excl_fence(dst); + dma_fence_get(f); + if (dma_resv_iter_is_exclusive(&cursor)) + excl = f; + else + RCU_INIT_POINTER(list->shared[list->shared_count++], f); + } + dma_resv_iter_end(&cursor);
write_seqcount_begin(&dst->seq); - /* write_seqcount_begin provides the necessary memory barrier */ - RCU_INIT_POINTER(dst->fence_excl, new); - RCU_INIT_POINTER(dst->fence, dst_list); + excl = rcu_replace_pointer(dst->fence_excl, excl, dma_resv_held(dst)); + list = rcu_replace_pointer(dst->fence, list, dma_resv_held(dst)); write_seqcount_end(&dst->seq);
- dma_resv_list_free(src_list); - dma_fence_put(old); + dma_resv_list_free(list); + dma_fence_put(excl);
return 0; }
This makes the function much simpler since the complex retry logic is now handled elsewhere.
v2: use sizeof(void*) instead
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch --- drivers/dma-buf/dma-resv.c | 108 +++++++++++++------------------------ 1 file changed, 36 insertions(+), 72 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 556d5afafe3f..cde97e4e547f 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -530,99 +530,63 @@ EXPORT_SYMBOL(dma_resv_copy_fences); * dma_resv_get_fences - Get an object's shared and exclusive * fences without update side lock held * @obj: the reservation object - * @pfence_excl: the returned exclusive fence (or NULL) - * @pshared_count: the number of shared fences returned - * @pshared: the array of shared fence ptrs returned (array is krealloc'd to + * @fence_excl: the returned exclusive fence (or NULL) + * @shared_count: the number of shared fences returned + * @shared: the array of shared fence ptrs returned (array is krealloc'd to * the required size, and must be freed by caller) * * Retrieve all fences from the reservation object. If the pointer for the * exclusive fence is not specified the fence is put into the array of the * shared fences as well. Returns either zero or -ENOMEM. */ -int dma_resv_get_fences(struct dma_resv *obj, struct dma_fence **pfence_excl, - unsigned int *pshared_count, - struct dma_fence ***pshared) +int dma_resv_get_fences(struct dma_resv *obj, struct dma_fence **fence_excl, + unsigned int *shared_count, struct dma_fence ***shared) { - struct dma_fence **shared = NULL; - struct dma_fence *fence_excl; - unsigned int shared_count; - int ret = 1; - - do { - struct dma_resv_list *fobj; - unsigned int i, seq; - size_t sz = 0; - - shared_count = i = 0; - - rcu_read_lock(); - seq = read_seqcount_begin(&obj->seq); + struct dma_resv_iter cursor; + struct dma_fence *fence;
- fence_excl = dma_resv_excl_fence(obj); - if (fence_excl && !dma_fence_get_rcu(fence_excl)) - goto unlock; + *shared_count = 0; + *shared = NULL;
- fobj = dma_resv_shared_list(obj); - if (fobj) - sz += sizeof(*shared) * fobj->shared_max; + if (fence_excl) + *fence_excl = NULL;
- if (!pfence_excl && fence_excl) - sz += sizeof(*shared); + dma_resv_iter_begin(&cursor, obj, true); + dma_resv_for_each_fence_unlocked(&cursor, fence) {
- if (sz) { - struct dma_fence **nshared; + if (dma_resv_iter_is_restarted(&cursor)) { + unsigned int count;
- nshared = krealloc(shared, sz, - GFP_NOWAIT | __GFP_NOWARN); - if (!nshared) { - rcu_read_unlock(); + while (*shared_count) + dma_fence_put((*shared)[--(*shared_count)]);
- dma_fence_put(fence_excl); - fence_excl = NULL; + if (fence_excl) + dma_fence_put(*fence_excl);
- nshared = krealloc(shared, sz, GFP_KERNEL); - if (nshared) { - shared = nshared; - continue; - } + count = cursor.fences ? cursor.fences->shared_count : 0; + count += fence_excl ? 0 : 1;
- ret = -ENOMEM; - break; - } - shared = nshared; - shared_count = fobj ? fobj->shared_count : 0; - for (i = 0; i < shared_count; ++i) { - shared[i] = rcu_dereference(fobj->shared[i]); - if (!dma_fence_get_rcu(shared[i])) - break; + /* Eventually re-allocate the array */ + *shared = krealloc_array(*shared, count, + sizeof(void *), + GFP_KERNEL); + if (count && !*shared) { + dma_resv_iter_end(&cursor); + return -ENOMEM; } }
- if (i != shared_count || read_seqcount_retry(&obj->seq, seq)) { - while (i--) - dma_fence_put(shared[i]); - dma_fence_put(fence_excl); - goto unlock; - } - - ret = 0; -unlock: - rcu_read_unlock(); - } while (ret); - - if (pfence_excl) - *pfence_excl = fence_excl; - else if (fence_excl) - shared[shared_count++] = fence_excl; + if (dma_resv_iter_is_exclusive(&cursor) && fence_excl) + *fence_excl = fence; + else + (*shared)[(*shared_count)++] = fence;
- if (!shared_count) { - kfree(shared); - shared = NULL; + /* Don't drop the reference */ + fence = NULL; } + dma_resv_iter_end(&cursor);
- *pshared_count = shared_count; - *pshared = shared; - return ret; + return 0; } EXPORT_SYMBOL_GPL(dma_resv_get_fences);
This makes the function much simpler since the complex retry logic is now handled elsewhere.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch --- drivers/dma-buf/dma-resv.c | 69 +++++--------------------------------- 1 file changed, 8 insertions(+), 61 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index cde97e4e547f..d0e26cd13ecd 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -608,74 +608,21 @@ long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr, unsigned long timeout) { long ret = timeout ? timeout : 1; - unsigned int seq, shared_count; + struct dma_resv_iter cursor; struct dma_fence *fence; - int i; - -retry: - shared_count = 0; - seq = read_seqcount_begin(&obj->seq); - rcu_read_lock(); - i = -1; - - fence = dma_resv_excl_fence(obj); - if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { - if (!dma_fence_get_rcu(fence)) - goto unlock_retry; - - if (dma_fence_is_signaled(fence)) { - dma_fence_put(fence); - fence = NULL; - } - - } else { - fence = NULL; - } - - if (wait_all) { - struct dma_resv_list *fobj = dma_resv_shared_list(obj); - - if (fobj) - shared_count = fobj->shared_count; - - for (i = 0; !fence && i < shared_count; ++i) { - struct dma_fence *lfence;
- lfence = rcu_dereference(fobj->shared[i]); - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &lfence->flags)) - continue; - - if (!dma_fence_get_rcu(lfence)) - goto unlock_retry; - - if (dma_fence_is_signaled(lfence)) { - dma_fence_put(lfence); - continue; - } + dma_resv_iter_begin(&cursor, obj, wait_all); + dma_resv_for_each_fence_unlocked(&cursor, fence) {
- fence = lfence; - break; + ret = dma_fence_wait_timeout(fence, intr, ret); + if (ret <= 0) { + dma_resv_iter_end(&cursor); + return ret; } } + dma_resv_iter_end(&cursor);
- rcu_read_unlock(); - if (fence) { - if (read_seqcount_retry(&obj->seq, seq)) { - dma_fence_put(fence); - goto retry; - } - - ret = dma_fence_wait_timeout(fence, intr, ret); - dma_fence_put(fence); - if (ret > 0 && wait_all && (i + 1 < shared_count)) - goto retry; - } return ret; - -unlock_retry: - rcu_read_unlock(); - goto retry; } EXPORT_SYMBOL_GPL(dma_resv_wait_timeout);
This makes the function much simpler since the complex retry logic is now handled elsewhere.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch --- drivers/dma-buf/dma-resv.c | 57 +++++--------------------------------- 1 file changed, 7 insertions(+), 50 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index d0e26cd13ecd..fe9b84b308a3 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -627,22 +627,6 @@ long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr, EXPORT_SYMBOL_GPL(dma_resv_wait_timeout);
-static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence) -{ - struct dma_fence *fence, *lfence = passed_fence; - int ret = 1; - - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &lfence->flags)) { - fence = dma_fence_get_rcu(lfence); - if (!fence) - return -1; - - ret = !!dma_fence_is_signaled(fence); - dma_fence_put(fence); - } - return ret; -} - /** * dma_resv_test_signaled - Test if a reservation object's fences have been * signaled. @@ -659,43 +643,16 @@ static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence) */ bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all) { + struct dma_resv_iter cursor; struct dma_fence *fence; - unsigned int seq; - int ret; - - rcu_read_lock(); -retry: - ret = true; - seq = read_seqcount_begin(&obj->seq); - - if (test_all) { - struct dma_resv_list *fobj = dma_resv_shared_list(obj); - unsigned int i, shared_count; - - shared_count = fobj ? fobj->shared_count : 0; - for (i = 0; i < shared_count; ++i) { - fence = rcu_dereference(fobj->shared[i]); - ret = dma_resv_test_signaled_single(fence); - if (ret < 0) - goto retry; - else if (!ret) - break; - } - } - - fence = dma_resv_excl_fence(obj); - if (ret && fence) { - ret = dma_resv_test_signaled_single(fence); - if (ret < 0) - goto retry;
+ dma_resv_iter_begin(&cursor, obj, test_all); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + dma_resv_iter_end(&cursor); + return false; } - - if (read_seqcount_retry(&obj->seq, seq)) - goto retry; - - rcu_read_unlock(); - return ret; + dma_resv_iter_end(&cursor); + return true; } EXPORT_SYMBOL_GPL(dma_resv_test_signaled);
This is probably a fix since we didn't even grabed a reference to the fences.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch --- drivers/gpu/drm/ttm/ttm_bo.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 3b22c0013dbf..301b0b4b082e 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -269,23 +269,15 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo) static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo) { struct dma_resv *resv = &bo->base._resv; - struct dma_resv_list *fobj; + struct dma_resv_iter cursor; struct dma_fence *fence; - int i; - - rcu_read_lock(); - fobj = dma_resv_shared_list(resv); - fence = dma_resv_excl_fence(resv); - if (fence && !fence->ops->signaled) - dma_fence_enable_sw_signaling(fence); - - for (i = 0; fobj && i < fobj->shared_count; ++i) { - fence = rcu_dereference(fobj->shared[i]);
+ dma_resv_iter_begin(&cursor, resv, true); + dma_resv_for_each_fence_unlocked(&cursor, fence) { if (!fence->ops->signaled) dma_fence_enable_sw_signaling(fence); } - rcu_read_unlock(); + dma_resv_iter_end(&cursor); }
/**
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 44 ++++++++---------------- 1 file changed, 14 insertions(+), 30 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 862eb3c1c4c5..f7d8487799b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -252,41 +252,25 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct dma_resv *resv, enum amdgpu_sync_mode mode, void *owner) { - struct dma_resv_list *flist; + struct dma_resv_iter cursor; struct dma_fence *f; - unsigned i; - int r = 0; + int r;
if (resv == NULL) return -EINVAL;
- /* always sync to the exclusive fence */ - f = dma_resv_excl_fence(resv); - dma_fence_chain_for_each(f, f) { - struct dma_fence_chain *chain = to_dma_fence_chain(f); - - if (amdgpu_sync_test_fence(adev, mode, owner, chain ? - chain->fence : f)) { - r = amdgpu_sync_fence(sync, f); - dma_fence_put(f); - if (r) - return r; - break; - } - } - - flist = dma_resv_shared_list(resv); - if (!flist) - return 0; - - for (i = 0; i < flist->shared_count; ++i) { - f = rcu_dereference_protected(flist->shared[i], - dma_resv_held(resv)); - - if (amdgpu_sync_test_fence(adev, mode, owner, f)) { - r = amdgpu_sync_fence(sync, f); - if (r) - return r; + dma_resv_for_each_fence(&cursor, resv, true, f) { + dma_fence_chain_for_each(f, f) { + struct dma_fence_chain *chain = to_dma_fence_chain(f); + + if (amdgpu_sync_test_fence(adev, mode, owner, chain ? + chain->fence : f)) { + r = amdgpu_sync_fence(sync, f); + dma_fence_put(f); + if (r) + return r; + break; + } } } return 0;
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 1129e17e9f09..4511cd15c3a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1332,10 +1332,9 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, const struct ttm_place *place) { unsigned long num_pages = bo->resource->num_pages; + struct dma_resv_iter resv_cursor; struct amdgpu_res_cursor cursor; - struct dma_resv_list *flist; struct dma_fence *f; - int i;
/* Swapout? */ if (bo->resource->mem_type == TTM_PL_SYSTEM) @@ -1349,14 +1348,9 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, * If true, then return false as any KFD process needs all its BOs to * be resident to run successfully */ - flist = dma_resv_shared_list(bo->base.resv); - if (flist) { - for (i = 0; i < flist->shared_count; ++i) { - f = rcu_dereference_protected(flist->shared[i], - dma_resv_held(bo->base.resv)); - if (amdkfd_fence_check_mm(f, current->mm)) - return false; - } + dma_resv_for_each_fence(&resv_cursor, bo->base.resv, true, f) { + if (amdkfd_fence_check_mm(f, current->mm)) + return false; }
switch (bo->resource->mem_type) {
Simplifying the code a bit. Also drop the RCU read side lock since the object is locked anyway.
Untested since I can't get the driver to compile on !ARM.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/msm/msm_gem.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 22308a1b66fc..14907622769f 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -880,7 +880,7 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m, { struct msm_gem_object *msm_obj = to_msm_bo(obj); struct dma_resv *robj = obj->resv; - struct dma_resv_list *fobj; + struct dma_resv_iter cursor; struct dma_fence *fence; struct msm_gem_vma *vma; uint64_t off = drm_vma_node_start(&obj->vma_node); @@ -955,22 +955,13 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m, seq_puts(m, "\n"); }
- rcu_read_lock(); - fobj = dma_resv_shared_list(robj); - if (fobj) { - unsigned int i, shared_count = fobj->shared_count; - - for (i = 0; i < shared_count; i++) { - fence = rcu_dereference(fobj->shared[i]); + dma_resv_for_each_fence(&cursor, robj, true, fence) { + if (dma_resv_iter_is_exclusive(&cursor)) + describe_fence(fence, "Exclusive", m); + else describe_fence(fence, "Shared", m); - } }
- fence = dma_resv_excl_fence(robj); - if (fence) - describe_fence(fence, "Exclusive", m); - rcu_read_unlock(); - msm_gem_unlock(obj); }
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon_sync.c | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon_sync.c b/drivers/gpu/drm/radeon/radeon_sync.c index 9257b60144c4..b991ba1bcd51 100644 --- a/drivers/gpu/drm/radeon/radeon_sync.c +++ b/drivers/gpu/drm/radeon/radeon_sync.c @@ -91,33 +91,17 @@ int radeon_sync_resv(struct radeon_device *rdev, struct dma_resv *resv, bool shared) { - struct dma_resv_list *flist; - struct dma_fence *f; + struct dma_resv_iter cursor; struct radeon_fence *fence; - unsigned i; + struct dma_fence *f; int r = 0;
- /* always sync to the exclusive fence */ - f = dma_resv_excl_fence(resv); - fence = f ? to_radeon_fence(f) : NULL; - if (fence && fence->rdev == rdev) - radeon_sync_fence(sync, fence); - else if (f) - r = dma_fence_wait(f, true); - - flist = dma_resv_shared_list(resv); - if (shared || !flist || r) - return r; - - for (i = 0; i < flist->shared_count; ++i) { - f = rcu_dereference_protected(flist->shared[i], - dma_resv_held(resv)); + dma_resv_for_each_fence(&cursor, resv, shared, f) { fence = to_radeon_fence(f); if (fence && fence->rdev == rdev) radeon_sync_fence(sync, fence); else r = dma_fence_wait(f, true); - if (r) break; }
Simplifying the code a bit.
v2: use dma_resv_for_each_fence
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch --- drivers/gpu/drm/scheduler/sched_main.c | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-)
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 042c16b5d54a..5bc5f775abe1 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -699,30 +699,16 @@ int drm_sched_job_add_implicit_dependencies(struct drm_sched_job *job, struct drm_gem_object *obj, bool write) { + struct dma_resv_iter cursor; + struct dma_fence *fence; int ret; - struct dma_fence **fences; - unsigned int i, fence_count; - - if (!write) { - struct dma_fence *fence = dma_resv_get_excl_unlocked(obj->resv); - - return drm_sched_job_add_dependency(job, fence); - } - - ret = dma_resv_get_fences(obj->resv, NULL, &fence_count, &fences); - if (ret || !fence_count) - return ret;
- for (i = 0; i < fence_count; i++) { - ret = drm_sched_job_add_dependency(job, fences[i]); + dma_resv_for_each_fence(&cursor, obj->resv, write, fence) { + ret = drm_sched_job_add_dependency(job, fence); if (ret) - break; + return ret; } - - for (; i < fence_count; i++) - dma_fence_put(fences[i]); - kfree(fences); - return ret; + return 0; } EXPORT_SYMBOL(drm_sched_job_add_implicit_dependencies);
This makes the function much simpler since the complex retry logic is now handled else where.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/i915/gem/i915_gem_busy.c | 35 ++++++++++-------------- 1 file changed, 14 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 6234e17259c1..313afb4a11c7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -82,8 +82,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj; - struct dma_resv_list *list; - unsigned int seq; + struct dma_resv_iter cursor; + struct dma_fence *fence; int err;
err = -ENOENT; @@ -109,27 +109,20 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * to report the overall busyness. This is what the wait-ioctl does. * */ -retry: - seq = raw_read_seqcount(&obj->base.resv->seq); - - /* Translate the exclusive fence to the READ *and* WRITE engine */ - args->busy = busy_check_writer(dma_resv_excl_fence(obj->base.resv)); - - /* Translate shared fences to READ set of engines */ - list = dma_resv_shared_list(obj->base.resv); - if (list) { - unsigned int shared_count = list->shared_count, i; - - for (i = 0; i < shared_count; ++i) { - struct dma_fence *fence = - rcu_dereference(list->shared[i]); - + args->busy = false; + dma_resv_iter_begin(&cursor, obj->base.resv, true); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + if (dma_resv_iter_is_restarted(&cursor)) + args->busy = 0; + + if (dma_resv_iter_is_exclusive(&cursor)) + /* Translate the exclusive fence to the READ *and* WRITE engine */ + args->busy |= busy_check_writer(fence); + else + /* Translate shared fences to READ set of engines */ args->busy |= busy_check_reader(fence); - } } - - if (args->busy && read_seqcount_retry(&obj->base.resv->seq, seq)) - goto retry; + dma_resv_iter_end(&cursor);
err = 0; out:
On 22/09/2021 10:10, Christian König wrote:
This makes the function much simpler since the complex retry logic is now handled else where.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/i915/gem/i915_gem_busy.c | 35 ++++++++++-------------- 1 file changed, 14 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 6234e17259c1..313afb4a11c7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -82,8 +82,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj;
- struct dma_resv_list *list;
- unsigned int seq;
- struct dma_resv_iter cursor;
- struct dma_fence *fence; int err;
err = -ENOENT; @@ -109,27 +109,20 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * to report the overall busyness. This is what the wait-ioctl does. * */ -retry:
- seq = raw_read_seqcount(&obj->base.resv->seq);
- /* Translate the exclusive fence to the READ *and* WRITE engine */
- args->busy = busy_check_writer(dma_resv_excl_fence(obj->base.resv));
- /* Translate shared fences to READ set of engines */
- list = dma_resv_shared_list(obj->base.resv);
- if (list) {
unsigned int shared_count = list->shared_count, i;
for (i = 0; i < shared_count; ++i) {
struct dma_fence *fence =
rcu_dereference(list->shared[i]);
- args->busy = false;
You can drop this line, especially since it is not a boolean. With that:
Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@intel.com
Regards,
Tvrtko
- dma_resv_iter_begin(&cursor, obj->base.resv, true);
- dma_resv_for_each_fence_unlocked(&cursor, fence) {
if (dma_resv_iter_is_restarted(&cursor))
args->busy = 0;
if (dma_resv_iter_is_exclusive(&cursor))
/* Translate the exclusive fence to the READ *and* WRITE engine */
args->busy |= busy_check_writer(fence);
else
/* Translate shared fences to READ set of engines */ args->busy |= busy_check_reader(fence);
}}
- if (args->busy && read_seqcount_retry(&obj->base.resv->seq, seq))
goto retry;
- dma_resv_iter_end(&cursor);
err = 0; out:
On 22/09/2021 11:21, Tvrtko Ursulin wrote:
On 22/09/2021 10:10, Christian König wrote:
This makes the function much simpler since the complex retry logic is now handled else where.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/i915/gem/i915_gem_busy.c | 35 ++++++++++-------------- 1 file changed, 14 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 6234e17259c1..313afb4a11c7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -82,8 +82,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj; - struct dma_resv_list *list; - unsigned int seq; + struct dma_resv_iter cursor; + struct dma_fence *fence; int err; err = -ENOENT; @@ -109,27 +109,20 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * to report the overall busyness. This is what the wait-ioctl does. * */ -retry: - seq = raw_read_seqcount(&obj->base.resv->seq);
- /* Translate the exclusive fence to the READ *and* WRITE engine */ - args->busy = busy_check_writer(dma_resv_excl_fence(obj->base.resv));
- /* Translate shared fences to READ set of engines */ - list = dma_resv_shared_list(obj->base.resv); - if (list) { - unsigned int shared_count = list->shared_count, i;
- for (i = 0; i < shared_count; ++i) { - struct dma_fence *fence = - rcu_dereference(list->shared[i]);
+ args->busy = false;
You can drop this line, especially since it is not a boolean. With that:
Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@intel.com
Having said this, one thing to add in the commit message is some commentary that although simpler in code, the new implementation has a lot more atomic instructions due all the extra fence get/put.
Saying this because I remembered busy ioctl is quite an over-popular one. Thinking about traces from some real userspaces I looked at in the past.
So I think ack from maintainers will be required here. Because I just don't know if any performance impact will be visible or not. So view my r-b as "code looks fine" but I am on the fence if it should actually be merged. Probably leaning towards no actually - given how the code is localised here and I dislike burdening old platforms with more CPU time it could be cheaply left as is.
Regards,
Tvrtko
Regards,
Tvrtko
+ dma_resv_iter_begin(&cursor, obj->base.resv, true); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + if (dma_resv_iter_is_restarted(&cursor)) + args->busy = 0;
+ if (dma_resv_iter_is_exclusive(&cursor)) + /* Translate the exclusive fence to the READ *and* WRITE engine */ + args->busy |= busy_check_writer(fence); + else + /* Translate shared fences to READ set of engines */ args->busy |= busy_check_reader(fence); - } }
- if (args->busy && read_seqcount_retry(&obj->base.resv->seq, seq)) - goto retry; + dma_resv_iter_end(&cursor); err = 0; out:
Am 22.09.21 um 13:46 schrieb Tvrtko Ursulin:
On 22/09/2021 11:21, Tvrtko Ursulin wrote:
On 22/09/2021 10:10, Christian König wrote:
This makes the function much simpler since the complex retry logic is now handled else where.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/i915/gem/i915_gem_busy.c | 35 ++++++++++-------------- 1 file changed, 14 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 6234e17259c1..313afb4a11c7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -82,8 +82,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj; - struct dma_resv_list *list; - unsigned int seq; + struct dma_resv_iter cursor; + struct dma_fence *fence; int err; err = -ENOENT; @@ -109,27 +109,20 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * to report the overall busyness. This is what the wait-ioctl does. * */ -retry: - seq = raw_read_seqcount(&obj->base.resv->seq);
- /* Translate the exclusive fence to the READ *and* WRITE engine */ - args->busy = busy_check_writer(dma_resv_excl_fence(obj->base.resv));
- /* Translate shared fences to READ set of engines */ - list = dma_resv_shared_list(obj->base.resv); - if (list) { - unsigned int shared_count = list->shared_count, i;
- for (i = 0; i < shared_count; ++i) { - struct dma_fence *fence = - rcu_dereference(list->shared[i]);
+ args->busy = false;
You can drop this line, especially since it is not a boolean. With that:
Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@intel.com
Having said this, one thing to add in the commit message is some commentary that although simpler in code, the new implementation has a lot more atomic instructions due all the extra fence get/put.
Saying this because I remembered busy ioctl is quite an over-popular one. Thinking about traces from some real userspaces I looked at in the past.
So I think ack from maintainers will be required here. Because I just don't know if any performance impact will be visible or not. So view my r-b as "code looks fine" but I am on the fence if it should actually be merged. Probably leaning towards no actually - given how the code is localised here and I dislike burdening old platforms with more CPU time it could be cheaply left as is.
Well previously we would have allocated memory, which as far as I know has more overhead than a few extra atomic operations.
On the other hand I could as well stick with dma_resv_get_fences() here.
Regards, Christian.
Regards,
Tvrtko
Regards,
Tvrtko
+ dma_resv_iter_begin(&cursor, obj->base.resv, true); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + if (dma_resv_iter_is_restarted(&cursor)) + args->busy = 0;
+ if (dma_resv_iter_is_exclusive(&cursor)) + /* Translate the exclusive fence to the READ *and* WRITE engine */ + args->busy |= busy_check_writer(fence); + else + /* Translate shared fences to READ set of engines */ args->busy |= busy_check_reader(fence); - } }
- if (args->busy && read_seqcount_retry(&obj->base.resv->seq, seq)) - goto retry; + dma_resv_iter_end(&cursor); err = 0; out:
On 22/09/2021 13:15, Christian König wrote:
Am 22.09.21 um 13:46 schrieb Tvrtko Ursulin:
On 22/09/2021 11:21, Tvrtko Ursulin wrote:
On 22/09/2021 10:10, Christian König wrote:
This makes the function much simpler since the complex retry logic is now handled else where.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/i915/gem/i915_gem_busy.c | 35 ++++++++++-------------- 1 file changed, 14 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 6234e17259c1..313afb4a11c7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -82,8 +82,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj; - struct dma_resv_list *list; - unsigned int seq; + struct dma_resv_iter cursor; + struct dma_fence *fence; int err; err = -ENOENT; @@ -109,27 +109,20 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * to report the overall busyness. This is what the wait-ioctl does. * */ -retry: - seq = raw_read_seqcount(&obj->base.resv->seq);
- /* Translate the exclusive fence to the READ *and* WRITE engine */ - args->busy = busy_check_writer(dma_resv_excl_fence(obj->base.resv));
- /* Translate shared fences to READ set of engines */ - list = dma_resv_shared_list(obj->base.resv); - if (list) { - unsigned int shared_count = list->shared_count, i;
- for (i = 0; i < shared_count; ++i) { - struct dma_fence *fence = - rcu_dereference(list->shared[i]);
+ args->busy = false;
You can drop this line, especially since it is not a boolean. With that:
Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@intel.com
Having said this, one thing to add in the commit message is some commentary that although simpler in code, the new implementation has a lot more atomic instructions due all the extra fence get/put.
Saying this because I remembered busy ioctl is quite an over-popular one. Thinking about traces from some real userspaces I looked at in the past.
So I think ack from maintainers will be required here. Because I just don't know if any performance impact will be visible or not. So view my r-b as "code looks fine" but I am on the fence if it should actually be merged. Probably leaning towards no actually - given how the code is localised here and I dislike burdening old platforms with more CPU time it could be cheaply left as is.
Well previously we would have allocated memory, which as far as I know has more overhead than a few extra atomic operations.
It doesn't, it only uses dma_resv_excl_fence and dma_resv_shared_list.
Regards,
Tvrtko
On the other hand I could as well stick with dma_resv_get_fences() here.
Regards, Christian.
Regards,
Tvrtko
Regards,
Tvrtko
+ dma_resv_iter_begin(&cursor, obj->base.resv, true); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + if (dma_resv_iter_is_restarted(&cursor)) + args->busy = 0;
+ if (dma_resv_iter_is_exclusive(&cursor)) + /* Translate the exclusive fence to the READ *and* WRITE engine */ + args->busy |= busy_check_writer(fence); + else + /* Translate shared fences to READ set of engines */ args->busy |= busy_check_reader(fence); - } }
- if (args->busy && read_seqcount_retry(&obj->base.resv->seq, seq)) - goto retry; + dma_resv_iter_end(&cursor); err = 0; out:
Am 22.09.21 um 14:20 schrieb Tvrtko Ursulin:
On 22/09/2021 13:15, Christian König wrote:
Am 22.09.21 um 13:46 schrieb Tvrtko Ursulin:
On 22/09/2021 11:21, Tvrtko Ursulin wrote:
On 22/09/2021 10:10, Christian König wrote:
This makes the function much simpler since the complex retry logic is now handled else where.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/i915/gem/i915_gem_busy.c | 35 ++++++++++-------------- 1 file changed, 14 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 6234e17259c1..313afb4a11c7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -82,8 +82,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj; - struct dma_resv_list *list; - unsigned int seq; + struct dma_resv_iter cursor; + struct dma_fence *fence; int err; err = -ENOENT; @@ -109,27 +109,20 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * to report the overall busyness. This is what the wait-ioctl does. * */ -retry: - seq = raw_read_seqcount(&obj->base.resv->seq);
- /* Translate the exclusive fence to the READ *and* WRITE engine */ - args->busy = busy_check_writer(dma_resv_excl_fence(obj->base.resv));
- /* Translate shared fences to READ set of engines */ - list = dma_resv_shared_list(obj->base.resv); - if (list) { - unsigned int shared_count = list->shared_count, i;
- for (i = 0; i < shared_count; ++i) { - struct dma_fence *fence = - rcu_dereference(list->shared[i]);
+ args->busy = false;
You can drop this line, especially since it is not a boolean. With that:
Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@intel.com
Having said this, one thing to add in the commit message is some commentary that although simpler in code, the new implementation has a lot more atomic instructions due all the extra fence get/put.
Saying this because I remembered busy ioctl is quite an over-popular one. Thinking about traces from some real userspaces I looked at in the past.
So I think ack from maintainers will be required here. Because I just don't know if any performance impact will be visible or not. So view my r-b as "code looks fine" but I am on the fence if it should actually be merged. Probably leaning towards no actually - given how the code is localised here and I dislike burdening old platforms with more CPU time it could be cheaply left as is.
Well previously we would have allocated memory, which as far as I know has more overhead than a few extra atomic operations.
It doesn't, it only uses dma_resv_excl_fence and dma_resv_shared_list.
Yeah, ok then that's not really an option any more.
I think Daniel and I are totally on the same page that we won't allow this RCU dance in the drivers any more.
Regards, Christian.
Regards,
Tvrtko
On the other hand I could as well stick with dma_resv_get_fences() here.
Regards, Christian.
Regards,
Tvrtko
Regards,
Tvrtko
- dma_resv_iter_begin(&cursor, obj->base.resv, true);
+ dma_resv_for_each_fence_unlocked(&cursor, fence) { + if (dma_resv_iter_is_restarted(&cursor)) + args->busy = 0;
+ if (dma_resv_iter_is_exclusive(&cursor)) + /* Translate the exclusive fence to the READ *and* WRITE engine */ + args->busy |= busy_check_writer(fence); + else + /* Translate shared fences to READ set of engines */ args->busy |= busy_check_reader(fence); - } }
- if (args->busy && read_seqcount_retry(&obj->base.resv->seq, seq)) - goto retry; + dma_resv_iter_end(&cursor); err = 0; out:
Am 22.09.21 um 12:21 schrieb Tvrtko Ursulin:
On 22/09/2021 10:10, Christian König wrote:
This makes the function much simpler since the complex retry logic is now handled else where.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/i915/gem/i915_gem_busy.c | 35 ++++++++++-------------- 1 file changed, 14 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 6234e17259c1..313afb4a11c7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -82,8 +82,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj; - struct dma_resv_list *list; - unsigned int seq; + struct dma_resv_iter cursor; + struct dma_fence *fence; int err; err = -ENOENT; @@ -109,27 +109,20 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * to report the overall busyness. This is what the wait-ioctl does. * */ -retry: - seq = raw_read_seqcount(&obj->base.resv->seq);
- /* Translate the exclusive fence to the READ *and* WRITE engine */ - args->busy = busy_check_writer(dma_resv_excl_fence(obj->base.resv));
- /* Translate shared fences to READ set of engines */ - list = dma_resv_shared_list(obj->base.resv); - if (list) { - unsigned int shared_count = list->shared_count, i;
- for (i = 0; i < shared_count; ++i) { - struct dma_fence *fence = - rcu_dereference(list->shared[i]);
+ args->busy = false;
You can drop this line, especially since it is not a boolean. With that:
I just realized that this won't work. We still need to initialize the return value when there is no fence at all in the resv object.
Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@intel.com
Does that still counts if I set args->busy to zero?
Thanks, Christian.
Regards,
Tvrtko
+ dma_resv_iter_begin(&cursor, obj->base.resv, true); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + if (dma_resv_iter_is_restarted(&cursor)) + args->busy = 0;
+ if (dma_resv_iter_is_exclusive(&cursor)) + /* Translate the exclusive fence to the READ *and* WRITE engine */ + args->busy |= busy_check_writer(fence); + else + /* Translate shared fences to READ set of engines */ args->busy |= busy_check_reader(fence); - } }
- if (args->busy && read_seqcount_retry(&obj->base.resv->seq, seq)) - goto retry; + dma_resv_iter_end(&cursor); err = 0; out:
On 22/09/2021 15:31, Christian König wrote:
Am 22.09.21 um 12:21 schrieb Tvrtko Ursulin:
On 22/09/2021 10:10, Christian König wrote:
This makes the function much simpler since the complex retry logic is now handled else where.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/i915/gem/i915_gem_busy.c | 35 ++++++++++-------------- 1 file changed, 14 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 6234e17259c1..313afb4a11c7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -82,8 +82,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj; - struct dma_resv_list *list; - unsigned int seq; + struct dma_resv_iter cursor; + struct dma_fence *fence; int err; err = -ENOENT; @@ -109,27 +109,20 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * to report the overall busyness. This is what the wait-ioctl does. * */ -retry: - seq = raw_read_seqcount(&obj->base.resv->seq);
- /* Translate the exclusive fence to the READ *and* WRITE engine */ - args->busy = busy_check_writer(dma_resv_excl_fence(obj->base.resv));
- /* Translate shared fences to READ set of engines */ - list = dma_resv_shared_list(obj->base.resv); - if (list) { - unsigned int shared_count = list->shared_count, i;
- for (i = 0; i < shared_count; ++i) { - struct dma_fence *fence = - rcu_dereference(list->shared[i]);
+ args->busy = false;
You can drop this line, especially since it is not a boolean. With that:
I just realized that this won't work. We still need to initialize the return value when there is no fence at all in the resv object.
Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@intel.com
Does that still counts if I set args->busy to zero?
Ah yes, my bad, apologies. You can keep the r-b.
Regards,
Tvrtko
Thanks, Christian.
Regards,
Tvrtko
+ dma_resv_iter_begin(&cursor, obj->base.resv, true); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + if (dma_resv_iter_is_restarted(&cursor)) + args->busy = 0;
+ if (dma_resv_iter_is_exclusive(&cursor)) + /* Translate the exclusive fence to the READ *and* WRITE engine */ + args->busy |= busy_check_writer(fence); + else + /* Translate shared fences to READ set of engines */ args->busy |= busy_check_reader(fence); - } }
- if (args->busy && read_seqcount_retry(&obj->base.resv->seq, seq)) - goto retry; + dma_resv_iter_end(&cursor); err = 0; out:
Simplifying the code a bit.
v2: use dma_resv_for_each_fence instead, according to Tvrtko the lock is held here anyway. v3: back to using dma_resv_for_each_fence_unlocked.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/i915/i915_sw_fence.c | 53 ++++++---------------------- 1 file changed, 11 insertions(+), 42 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index c589a681da77..7ea0dbf81530 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -572,56 +572,25 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, unsigned long timeout, gfp_t gfp) { - struct dma_fence *excl; + struct dma_resv_iter cursor; + struct dma_fence *f; int ret = 0, pending;
debug_fence_assert(fence); might_sleep_if(gfpflags_allow_blocking(gfp));
- if (write) { - struct dma_fence **shared; - unsigned int count, i; - - ret = dma_resv_get_fences(resv, &excl, &count, &shared); - if (ret) - return ret; - - for (i = 0; i < count; i++) { - if (shared[i]->ops == exclude) - continue; - - pending = i915_sw_fence_await_dma_fence(fence, - shared[i], - timeout, - gfp); - if (pending < 0) { - ret = pending; - break; - } - - ret |= pending; - } - - for (i = 0; i < count; i++) - dma_fence_put(shared[i]); - kfree(shared); - } else { - excl = dma_resv_get_excl_unlocked(resv); - } - - if (ret >= 0 && excl && excl->ops != exclude) { - pending = i915_sw_fence_await_dma_fence(fence, - excl, - timeout, + dma_resv_iter_begin(&cursor, resv, write); + dma_resv_for_each_fence_unlocked(&cursor, f) { + pending = i915_sw_fence_await_dma_fence(fence, f, timeout, gfp); - if (pending < 0) + if (pending < 0) { ret = pending; - else - ret |= pending; - } - - dma_fence_put(excl); + break; + }
+ ret |= pending; + } + dma_resv_iter_end(&cursor); return ret; }
Simplifying the code a bit.
v2: add missing rcu_read_lock()/rcu_read_unlock() v3: use dma_resv_for_each_fence instead
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/i915/i915_request.c | 34 +++++------------------------ 1 file changed, 5 insertions(+), 29 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index ce446716d092..3839712ebd23 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1509,38 +1509,14 @@ i915_request_await_object(struct i915_request *to, struct drm_i915_gem_object *obj, bool write) { - struct dma_fence *excl; + struct dma_resv_iter cursor; + struct dma_fence *fence; int ret = 0;
- if (write) { - struct dma_fence **shared; - unsigned int count, i; - - ret = dma_resv_get_fences(obj->base.resv, &excl, &count, - &shared); + dma_resv_for_each_fence(&cursor, obj->base.resv, write, fence) { + ret = i915_request_await_dma_fence(to, fence); if (ret) - return ret; - - for (i = 0; i < count; i++) { - ret = i915_request_await_dma_fence(to, shared[i]); - if (ret) - break; - - dma_fence_put(shared[i]); - } - - for (; i < count; i++) - dma_fence_put(shared[i]); - kfree(shared); - } else { - excl = dma_resv_get_excl_unlocked(obj->base.resv); - } - - if (excl) { - if (ret == 0) - ret = i915_request_await_dma_fence(to, excl); - - dma_fence_put(excl); + break; }
return ret;
On 22/09/2021 10:10, Christian König wrote:
Simplifying the code a bit.
v2: add missing rcu_read_lock()/rcu_read_unlock() v3: use dma_resv_for_each_fence instead
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/i915/i915_request.c | 34 +++++------------------------ 1 file changed, 5 insertions(+), 29 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index ce446716d092..3839712ebd23 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1509,38 +1509,14 @@ i915_request_await_object(struct i915_request *to, struct drm_i915_gem_object *obj, bool write) {
- struct dma_fence *excl;
- struct dma_resv_iter cursor;
- struct dma_fence *fence; int ret = 0;
- if (write) {
struct dma_fence **shared;
unsigned int count, i;
ret = dma_resv_get_fences(obj->base.resv, &excl, &count,
&shared);
- dma_resv_for_each_fence(&cursor, obj->base.resv, write, fence) {
if (ret)ret = i915_request_await_dma_fence(to, fence);
return ret;
for (i = 0; i < count; i++) {
ret = i915_request_await_dma_fence(to, shared[i]);
if (ret)
break;
dma_fence_put(shared[i]);
}
for (; i < count; i++)
dma_fence_put(shared[i]);
kfree(shared);
- } else {
excl = dma_resv_get_excl_unlocked(obj->base.resv);
- }
- if (excl) {
if (ret == 0)
ret = i915_request_await_dma_fence(to, excl);
dma_fence_put(excl);
}break;
return ret;
Short and sweet. I hope CI confirms all callers have it locked and in the meantime I will risk it:
Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@intel.com
Regards,
Tvrtko
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/i915/gem/i915_gem_wait.c | 51 +++++------------------- 1 file changed, 9 insertions(+), 42 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index f909aaa09d9c..a13193db1dba 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -37,55 +37,22 @@ i915_gem_object_wait_reservation(struct dma_resv *resv, unsigned int flags, long timeout) { - struct dma_fence *excl; - bool prune_fences = false; - - if (flags & I915_WAIT_ALL) { - struct dma_fence **shared; - unsigned int count, i; - int ret; + struct dma_resv_iter cursor; + struct dma_fence *fence;
- ret = dma_resv_get_fences(resv, &excl, &count, &shared); - if (ret) - return ret; - - for (i = 0; i < count; i++) { - timeout = i915_gem_object_wait_fence(shared[i], - flags, timeout); - if (timeout < 0) - break; - - dma_fence_put(shared[i]); - } - - for (; i < count; i++) - dma_fence_put(shared[i]); - kfree(shared); - - /* - * If both shared fences and an exclusive fence exist, - * then by construction the shared fences must be later - * than the exclusive fence. If we successfully wait for - * all the shared fences, we know that the exclusive fence - * must all be signaled. If all the shared fences are - * signaled, we can prune the array and recover the - * floating references on the fences/requests. - */ - prune_fences = count && timeout >= 0; - } else { - excl = dma_resv_get_excl_unlocked(resv); + dma_resv_iter_begin(&cursor, resv, flags & I915_WAIT_ALL); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + timeout = i915_gem_object_wait_fence(fence, flags, timeout); + if (timeout < 0) + break; } - - if (excl && timeout >= 0) - timeout = i915_gem_object_wait_fence(excl, flags, timeout); - - dma_fence_put(excl); + dma_resv_iter_end(&cursor);
/* * Opportunistically prune the fences iff we know they have *all* been * signaled. */ - if (prune_fences) + if (timeout > 0) dma_resv_prune(resv);
return timeout;
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/i915/gem/i915_gem_wait.c | 31 +++++------------------- 1 file changed, 6 insertions(+), 25 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index a13193db1dba..569658c7859c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -118,32 +118,13 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, unsigned int flags, const struct i915_sched_attr *attr) { - struct dma_fence *excl; - - if (flags & I915_WAIT_ALL) { - struct dma_fence **shared; - unsigned int count, i; - int ret; - - ret = dma_resv_get_fences(obj->base.resv, &excl, &count, - &shared); - if (ret) - return ret; - - for (i = 0; i < count; i++) { - i915_gem_fence_wait_priority(shared[i], attr); - dma_fence_put(shared[i]); - } - - kfree(shared); - } else { - excl = dma_resv_get_excl_unlocked(obj->base.resv); - } + struct dma_resv_iter cursor; + struct dma_fence *fence;
- if (excl) { - i915_gem_fence_wait_priority(excl, attr); - dma_fence_put(excl); - } + dma_resv_iter_begin(&cursor, obj->base.resv, flags & I915_WAIT_ALL); + dma_resv_for_each_fence_unlocked(&cursor, fence) + i915_gem_fence_wait_priority(fence, attr); + dma_resv_iter_end(&cursor); return 0; }
This is maybe even a fix since the RCU usage here looks incorrect.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/i915/gem/i915_gem_object.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 48112b9d76df..e20efffce3a9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -507,16 +507,16 @@ static inline struct intel_engine_cs * i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) { struct intel_engine_cs *engine = NULL; + struct dma_resv_iter cursor; struct dma_fence *fence;
- rcu_read_lock(); - fence = dma_resv_get_excl_unlocked(obj->base.resv); - rcu_read_unlock(); - - if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence)) - engine = to_request(fence)->engine; - dma_fence_put(fence); - + dma_resv_iter_begin(&cursor, obj->base.resv, false); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + if (fence && dma_fence_is_i915(fence) && + !dma_fence_is_signaled(fence)) + engine = to_request(fence)->engine; + } + dma_resv_iter_end(&cursor); return engine; }
On 22/09/2021 10:10, Christian König wrote:
This is maybe even a fix since the RCU usage here looks incorrect.
I'm afraid I gazumped you here by removing this function shortly before you posted the respin.
Regards,
Tvrtko
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/i915/gem/i915_gem_object.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 48112b9d76df..e20efffce3a9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -507,16 +507,16 @@ static inline struct intel_engine_cs * i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) { struct intel_engine_cs *engine = NULL;
- struct dma_resv_iter cursor; struct dma_fence *fence;
- rcu_read_lock();
- fence = dma_resv_get_excl_unlocked(obj->base.resv);
- rcu_read_unlock();
- if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence))
engine = to_request(fence)->engine;
- dma_fence_put(fence);
- dma_resv_iter_begin(&cursor, obj->base.resv, false);
- dma_resv_for_each_fence_unlocked(&cursor, fence) {
if (fence && dma_fence_is_i915(fence) &&
!dma_fence_is_signaled(fence))
engine = to_request(fence)->engine;
- }
- dma_resv_iter_end(&cursor); return engine; }
Am 22.09.21 um 12:27 schrieb Tvrtko Ursulin:
On 22/09/2021 10:10, Christian König wrote:
This is maybe even a fix since the RCU usage here looks incorrect.
I'm afraid I gazumped you here by removing this function shortly before you posted the respin.
Is that already landed in drm-misc-next? If not just give me an Acked-by and it will be fixed when merging trees together again by just dropping the change.
Alternatively if it is not in drm-next I will ping the drm-misc-next maintainer for a merge.
Regards, Christian.
Regards,
Tvrtko
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/i915/gem/i915_gem_object.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 48112b9d76df..e20efffce3a9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -507,16 +507,16 @@ static inline struct intel_engine_cs * i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) { struct intel_engine_cs *engine = NULL; + struct dma_resv_iter cursor; struct dma_fence *fence; - rcu_read_lock(); - fence = dma_resv_get_excl_unlocked(obj->base.resv); - rcu_read_unlock();
- if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence)) - engine = to_request(fence)->engine; - dma_fence_put(fence);
+ dma_resv_iter_begin(&cursor, obj->base.resv, false); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + if (fence && dma_fence_is_i915(fence) && + !dma_fence_is_signaled(fence)) + engine = to_request(fence)->engine; + } + dma_resv_iter_end(&cursor); return engine; }
On 22/09/2021 12:00, Christian König wrote:
Am 22.09.21 um 12:27 schrieb Tvrtko Ursulin:
On 22/09/2021 10:10, Christian König wrote:
This is maybe even a fix since the RCU usage here looks incorrect.
I'm afraid I gazumped you here by removing this function shortly before you posted the respin.
Is that already landed in drm-misc-next? If not just give me an Acked-by and it will be fixed when merging trees together again by just dropping the change.
Alternatively if it is not in drm-next I will ping the drm-misc-next maintainer for a merge.
Problem is you will never pass our CI with a series which does not apply to drm-tip. ;)
Regards,
Tvrtko
Regards, Christian.
Regards,
Tvrtko
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/i915/gem/i915_gem_object.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 48112b9d76df..e20efffce3a9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -507,16 +507,16 @@ static inline struct intel_engine_cs * i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) { struct intel_engine_cs *engine = NULL; + struct dma_resv_iter cursor; struct dma_fence *fence; - rcu_read_lock(); - fence = dma_resv_get_excl_unlocked(obj->base.resv); - rcu_read_unlock();
- if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence)) - engine = to_request(fence)->engine; - dma_fence_put(fence);
+ dma_resv_iter_begin(&cursor, obj->base.resv, false); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + if (fence && dma_fence_is_i915(fence) && + !dma_fence_is_signaled(fence)) + engine = to_request(fence)->engine; + } + dma_resv_iter_end(&cursor); return engine; }
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/i915/display/intel_display.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 134a6acbd8fb..d32137a84694 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -11290,6 +11290,7 @@ intel_prepare_plane_fb(struct drm_plane *_plane, i915_gem_object_flush_frontbuffer(obj, ORIGIN_DIRTYFB);
if (!new_plane_state->uapi.fence) { /* implicit fencing */ + struct dma_resv_iter cursor; struct dma_fence *fence;
ret = i915_sw_fence_await_reservation(&state->commit_ready, @@ -11300,12 +11301,12 @@ intel_prepare_plane_fb(struct drm_plane *_plane, if (ret < 0) goto unpin_fb;
- fence = dma_resv_get_excl_unlocked(obj->base.resv); - if (fence) { + dma_resv_iter_begin(&cursor, obj->base.resv, false); + dma_resv_for_each_fence_unlocked(&cursor, fence) { add_rps_boost_after_vblank(new_plane_state->hw.crtc, fence); - dma_fence_put(fence); } + dma_resv_iter_end(&cursor); } else { add_rps_boost_after_vblank(new_plane_state->hw.crtc, new_plane_state->uapi.fence);
Simplifying the code a bit.
v2: add missing rcu_read_lock()/unlock() v3: switch to locked version
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/drm_gem.c | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 09c820045859..4dcdec6487bb 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -1340,31 +1340,15 @@ int drm_gem_fence_array_add_implicit(struct xarray *fence_array, struct drm_gem_object *obj, bool write) { - int ret; - struct dma_fence **fences; - unsigned int i, fence_count; - - if (!write) { - struct dma_fence *fence = - dma_resv_get_excl_unlocked(obj->resv); - - return drm_gem_fence_array_add(fence_array, fence); - } + struct dma_resv_iter cursor; + struct dma_fence *fence; + int ret = 0;
- ret = dma_resv_get_fences(obj->resv, NULL, - &fence_count, &fences); - if (ret || !fence_count) - return ret; - - for (i = 0; i < fence_count; i++) { - ret = drm_gem_fence_array_add(fence_array, fences[i]); + dma_resv_for_each_fence(&cursor, obj->resv, write, fence) { + ret = drm_gem_fence_array_add(fence_array, fence); if (ret) break; } - - for (; i < fence_count; i++) - dma_fence_put(fences[i]); - kfree(fences); return ret; } EXPORT_SYMBOL(drm_gem_fence_array_add_implicit);
Makes the handling a bit more complex, but avoids the use of dma_resv_get_excl_unlocked().
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/drm_gem_atomic_helper.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem_atomic_helper.c b/drivers/gpu/drm/drm_gem_atomic_helper.c index e570398abd78..21ed930042b8 100644 --- a/drivers/gpu/drm/drm_gem_atomic_helper.c +++ b/drivers/gpu/drm/drm_gem_atomic_helper.c @@ -143,6 +143,7 @@ */ int drm_gem_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) { + struct dma_resv_iter cursor; struct drm_gem_object *obj; struct dma_fence *fence;
@@ -150,9 +151,17 @@ int drm_gem_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_plane_st return 0;
obj = drm_gem_fb_get_obj(state->fb, 0); - fence = dma_resv_get_excl_unlocked(obj->resv); - drm_atomic_set_fence_for_plane(state, fence); + dma_resv_iter_begin(&cursor, obj->resv, false); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + dma_fence_get(fence); + dma_resv_iter_end(&cursor); + /* TODO: We only use the first write fence here */ + drm_atomic_set_fence_for_plane(state, fence); + return 0; + } + dma_resv_iter_end(&cursor);
+ drm_atomic_set_fence_for_plane(state, NULL); return 0; } EXPORT_SYMBOL_GPL(drm_gem_plane_helper_prepare_fb);
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/nouveau/nouveau_fence.c | 48 +++++++------------------ 1 file changed, 12 insertions(+), 36 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 05d0b3eb3690..26f9299df881 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -339,14 +339,15 @@ nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr) }
int -nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool exclusive, bool intr) +nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, + bool exclusive, bool intr) { struct nouveau_fence_chan *fctx = chan->fence; - struct dma_fence *fence; struct dma_resv *resv = nvbo->bo.base.resv; - struct dma_resv_list *fobj; + struct dma_resv_iter cursor; + struct dma_fence *fence; struct nouveau_fence *f; - int ret = 0, i; + int ret;
if (!exclusive) { ret = dma_resv_reserve_shared(resv, 1); @@ -355,10 +356,7 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e return ret; }
- fobj = dma_resv_shared_list(resv); - fence = dma_resv_excl_fence(resv); - - if (fence) { + dma_resv_for_each_fence(&cursor, resv, exclusive, fence) { struct nouveau_channel *prev = NULL; bool must_wait = true;
@@ -366,41 +364,19 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e if (f) { rcu_read_lock(); prev = rcu_dereference(f->channel); - if (prev && (prev == chan || fctx->sync(f, prev, chan) == 0)) + if (prev && (prev == chan || + fctx->sync(f, prev, chan) == 0)) must_wait = false; rcu_read_unlock(); }
- if (must_wait) + if (must_wait) { ret = dma_fence_wait(fence, intr); - - return ret; - } - - if (!exclusive || !fobj) - return ret; - - for (i = 0; i < fobj->shared_count && !ret; ++i) { - struct nouveau_channel *prev = NULL; - bool must_wait = true; - - fence = rcu_dereference_protected(fobj->shared[i], - dma_resv_held(resv)); - - f = nouveau_local_fence(fence, chan->drm); - if (f) { - rcu_read_lock(); - prev = rcu_dereference(f->channel); - if (prev && (prev == chan || fctx->sync(f, prev, chan) == 0)) - must_wait = false; - rcu_read_unlock(); + if (ret) + return ret; } - - if (must_wait) - ret = dma_fence_wait(fence, intr); } - - return ret; + return 0; }
void
Makes the handling a bit more complex, but avoids the use of dma_resv_get_excl_unlocked().
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/nouveau/dispnv50/wndw.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c index 8d048bacd6f0..30712a681e2a 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c @@ -539,6 +539,8 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) struct nouveau_bo *nvbo; struct nv50_head_atom *asyh; struct nv50_wndw_ctxdma *ctxdma; + struct dma_resv_iter cursor; + struct dma_fence *fence; int ret;
NV_ATOMIC(drm, "%s prepare: %p\n", plane->name, fb); @@ -561,7 +563,13 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) asyw->image.handle[0] = ctxdma->object.handle; }
- asyw->state.fence = dma_resv_get_excl_unlocked(nvbo->bo.base.resv); + dma_resv_iter_begin(&cursor, nvbo->bo.base.resv, false); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + /* TODO: We only use the first writer here */ + asyw->state.fence = dma_fence_get(fence); + break; + } + dma_resv_iter_end(&cursor); asyw->image.offset[0] = nvbo->offset;
if (wndw->func->prepare) {
Instead of hand rolling the logic.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/etnaviv/etnaviv_gem.c | 31 ++++++++++----------------- 1 file changed, 11 insertions(+), 20 deletions(-)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index 8f1b5af47dd6..0eeb33de2ff4 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -428,19 +428,17 @@ int etnaviv_gem_wait_bo(struct etnaviv_gpu *gpu, struct drm_gem_object *obj, static void etnaviv_gem_describe_fence(struct dma_fence *fence, const char *type, struct seq_file *m) { - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) - seq_printf(m, "\t%9s: %s %s seq %llu\n", - type, - fence->ops->get_driver_name(fence), - fence->ops->get_timeline_name(fence), - fence->seqno); + seq_printf(m, "\t%9s: %s %s seq %llu\n", type, + fence->ops->get_driver_name(fence), + fence->ops->get_timeline_name(fence), + fence->seqno); }
static void etnaviv_gem_describe(struct drm_gem_object *obj, struct seq_file *m) { struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj); struct dma_resv *robj = obj->resv; - struct dma_resv_list *fobj; + struct dma_resv_iter cursor; struct dma_fence *fence; unsigned long off = drm_vma_node_start(&obj->vma_node);
@@ -449,21 +447,14 @@ static void etnaviv_gem_describe(struct drm_gem_object *obj, struct seq_file *m) obj->name, kref_read(&obj->refcount), off, etnaviv_obj->vaddr, obj->size);
- rcu_read_lock(); - fobj = dma_resv_shared_list(robj); - if (fobj) { - unsigned int i, shared_count = fobj->shared_count; - - for (i = 0; i < shared_count; i++) { - fence = rcu_dereference(fobj->shared[i]); + dma_resv_iter_begin(&cursor, robj, true); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + if (dma_resv_iter_is_exclusive(&cursor)) + etnaviv_gem_describe_fence(fence, "Exclusive", m); + else etnaviv_gem_describe_fence(fence, "Shared", m); - } } - - fence = dma_resv_excl_fence(robj); - if (fence) - etnaviv_gem_describe_fence(fence, "Exclusive", m); - rcu_read_unlock(); + dma_resv_iter_end(&cursor); }
void etnaviv_gem_describe_objects(struct etnaviv_drm_private *priv,
We certainly hold the reservation lock here, no need for the RCU dance.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c index 4dd7d9d541c0..7e17bc2b5df1 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c @@ -195,7 +195,7 @@ static int submit_fence_sync(struct etnaviv_gem_submit *submit) if (ret) return ret; } else { - bo->excl = dma_resv_get_excl_unlocked(robj); + bo->excl = dma_fence_get(dma_resv_excl_fence(robj)); }
}
Heureka, that's finally not used any more.
Signed-off-by: Christian König christian.koenig@amd.com --- include/linux/dma-resv.h | 26 -------------------------- 1 file changed, 26 deletions(-)
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 72e7ebaa675f..42ea6f667120 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -436,32 +436,6 @@ dma_resv_excl_fence(struct dma_resv *obj) return rcu_dereference_check(obj->fence_excl, dma_resv_held(obj)); }
-/** - * dma_resv_get_excl_unlocked - get the reservation object's - * exclusive fence, without lock held. - * @obj: the reservation object - * - * If there is an exclusive fence, this atomically increments it's - * reference count and returns it. - * - * RETURNS - * The exclusive fence or NULL if none - */ -static inline struct dma_fence * -dma_resv_get_excl_unlocked(struct dma_resv *obj) -{ - struct dma_fence *fence; - - if (!rcu_access_pointer(obj->fence_excl)) - return NULL; - - rcu_read_lock(); - fence = dma_fence_get_rcu_safe(&obj->fence_excl); - rcu_read_unlock(); - - return fence; -} - /** * dma_resv_shared_list - get the reservation object's shared fence list * @obj: the reservation object
linaro-mm-sig@lists.linaro.org