On Fri, Sep 03, 2021 at 11:47:55AM -0700, Rob Clark wrote:
From: Rob Clark robdclark@chromium.org
As the finished fence is the one that is exposed to userspace, and therefore the one that other operations, like atomic update, would block on, we need to propagate the deadline from from the finished fence to the actual hw fence.
v2: Split into drm_sched_fence_set_parent() (ckoenig)
Signed-off-by: Rob Clark robdclark@chromium.org
drivers/gpu/drm/scheduler/sched_fence.c | 34 +++++++++++++++++++++++++ drivers/gpu/drm/scheduler/sched_main.c | 2 +- include/drm/gpu_scheduler.h | 8 ++++++ 3 files changed, 43 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c index bcea035cf4c6..4fc41a71d1c7 100644 --- a/drivers/gpu/drm/scheduler/sched_fence.c +++ b/drivers/gpu/drm/scheduler/sched_fence.c @@ -128,6 +128,30 @@ static void drm_sched_fence_release_finished(struct dma_fence *f) dma_fence_put(&fence->scheduled); } +static void drm_sched_fence_set_deadline_finished(struct dma_fence *f,
ktime_t deadline)
+{
- struct drm_sched_fence *fence = to_drm_sched_fence(f);
- unsigned long flags;
- spin_lock_irqsave(&fence->lock, flags);
- /* If we already have an earlier deadline, keep it: */
- if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags) &&
ktime_before(fence->deadline, deadline)) {
spin_unlock_irqrestore(&fence->lock, flags);
return;
- }
- fence->deadline = deadline;
- set_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags);
- spin_unlock_irqrestore(&fence->lock, flags);
- if (fence->parent)
dma_fence_set_deadline(fence->parent, deadline);
+}
static const struct dma_fence_ops drm_sched_fence_ops_scheduled = { .get_driver_name = drm_sched_fence_get_driver_name, .get_timeline_name = drm_sched_fence_get_timeline_name, @@ -138,6 +162,7 @@ static const struct dma_fence_ops drm_sched_fence_ops_finished = { .get_driver_name = drm_sched_fence_get_driver_name, .get_timeline_name = drm_sched_fence_get_timeline_name, .release = drm_sched_fence_release_finished,
- .set_deadline = drm_sched_fence_set_deadline_finished,
}; struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f) @@ -152,6 +177,15 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f) } EXPORT_SYMBOL(to_drm_sched_fence); +void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
struct dma_fence *fence)
+{
- s_fence->parent = dma_fence_get(fence);
- if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
&s_fence->finished.flags))
Don't you need the spinlock here too to avoid races? test_bit is very unordered, so guarantees nothing. Spinlock would need to be both around ->parent = and the test_bit.
Entirely aside, but there's discussions going on to preallocate the hw fence somehow. If we do that we could make the deadline forwarding lockless here. Having a spinlock just to set the parent is a bit annoying ...
Alternative is that you do this locklessly with barriers and a _lot_ of comments. Would be good to benchmark whether the overhead matters though first. -Daniel
dma_fence_set_deadline(fence, s_fence->deadline);
+}
struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *entity, void *owner) { diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 595e47ff7d06..27bf0ac0625f 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -978,7 +978,7 @@ static int drm_sched_main(void *param) drm_sched_fence_scheduled(s_fence); if (!IS_ERR_OR_NULL(fence)) {
s_fence->parent = dma_fence_get(fence);
drm_sched_fence_set_parent(s_fence, fence); r = dma_fence_add_callback(fence, &sched_job->cb, drm_sched_job_done_cb); if (r == -ENOENT)
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 7f77a455722c..158ddd662469 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -238,6 +238,12 @@ struct drm_sched_fence { */ struct dma_fence finished;
- /**
* @deadline: deadline set on &drm_sched_fence.finished which
* potentially needs to be propagated to &drm_sched_fence.parent
*/
- ktime_t deadline;
/** * @parent: the fence returned by &drm_sched_backend_ops.run_job * when scheduling the job on hardware. We signal the
@@ -505,6 +511,8 @@ void drm_sched_entity_set_priority(struct drm_sched_entity *entity, enum drm_sched_priority priority); bool drm_sched_entity_is_ready(struct drm_sched_entity *entity); +void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
struct dma_fence *fence);
struct drm_sched_fence *drm_sched_fence_alloc( struct drm_sched_entity *s_entity, void *owner); void drm_sched_fence_init(struct drm_sched_fence *fence, -- 2.31.1