We're seeing a GPU HANG issue on a CHV platform, which was caused by bac24f59f454 ("drm/i915/execlists: Enable coarse preemption boundaries for gen8").
Gen8 platform has only timeslice and doesn't support a preemption mechanism as engines do not have a preemption timer and doesn't send an irq if the preemption timeout expires. So, add a fix to not consider preemption during dequeuing for gen8 platforms.
Also move can_preemt() above need_preempt() function to resolve implicit declaration of function ‘can_preempt' error and make can_preempt() function param as const to resolve error: passing argument 1 of ‘can_preempt’ discards ‘const’ qualifier from the pointer target type.
v2: Simplify can_preemt() function (Tvrtko Ursulin)
Fixes: bac24f59f454 ("drm/i915/execlists: Enable coarse preemption boundaries for gen8") Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/11396 Suggested-by: Andi Shyti andi.shyti@intel.com Signed-off-by: Nitin Gote nitin.r.gote@intel.com Cc: Chris Wilson chris.p.wilson@linux.intel.com CC: stable@vger.kernel.org # v5.2+ --- .../drm/i915/gt/intel_execlists_submission.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 21829439e686..59885d7721e4 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -294,11 +294,19 @@ static int virtual_prio(const struct intel_engine_execlists *el) return rb ? rb_entry(rb, struct ve_node, rb)->prio : INT_MIN; }
+static bool can_preempt(const struct intel_engine_cs *engine) +{ + return GRAPHICS_VER(engine->i915) > 8; +} + static bool need_preempt(const struct intel_engine_cs *engine, const struct i915_request *rq) { int last_prio;
+ if (!can_preempt(engine)) + return false; + if (!intel_engine_has_semaphores(engine)) return false;
@@ -3313,15 +3321,6 @@ static void remove_from_engine(struct i915_request *rq) i915_request_notify_execute_cb_imm(rq); }
-static bool can_preempt(struct intel_engine_cs *engine) -{ - if (GRAPHICS_VER(engine->i915) > 8) - return true; - - /* GPGPU on bdw requires extra w/a; not implemented */ - return engine->class != RENDER_CLASS; -} - static void kick_execlists(const struct i915_request *rq, int prio) { struct intel_engine_cs *engine = rq->engine;
On 11/07/2024 06:12, Nitin Gote wrote:
We're seeing a GPU HANG issue on a CHV platform, which was caused by bac24f59f454 ("drm/i915/execlists: Enable coarse preemption boundaries for gen8").
Gen8 platform has only timeslice and doesn't support a preemption mechanism as engines do not have a preemption timer and doesn't send an irq if the preemption timeout expires. So, add a fix to not consider preemption during dequeuing for gen8 platforms.
Also move can_preemt() above need_preempt() function to resolve implicit declaration of function ‘can_preempt' error and make can_preempt() function param as const to resolve error: passing argument 1 of ‘can_preempt’ discards ‘const’ qualifier from the pointer target type.
v2: Simplify can_preemt() function (Tvrtko Ursulin)
Yeah sorry for that yesterday when I thought gen8 emit bb was dead code, somehow I thought there was a gen9 emit_bb flavour. Looks like I confused it with something else.
Fixes: bac24f59f454 ("drm/i915/execlists: Enable coarse preemption boundaries for gen8") Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/11396 Suggested-by: Andi Shyti andi.shyti@intel.com Signed-off-by: Nitin Gote nitin.r.gote@intel.com Cc: Chris Wilson chris.p.wilson@linux.intel.com CC: stable@vger.kernel.org # v5.2+
.../drm/i915/gt/intel_execlists_submission.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 21829439e686..59885d7721e4 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -294,11 +294,19 @@ static int virtual_prio(const struct intel_engine_execlists *el) return rb ? rb_entry(rb, struct ve_node, rb)->prio : INT_MIN; } +static bool can_preempt(const struct intel_engine_cs *engine) +{
- return GRAPHICS_VER(engine->i915) > 8;
+}
- static bool need_preempt(const struct intel_engine_cs *engine, const struct i915_request *rq) { int last_prio;
- if (!can_preempt(engine))
return false;
- if (!intel_engine_has_semaphores(engine))
Patch looks clean now. Hmmm one new observation is whether the "has semaphores" check is now redundant? Looks preemption depends on semaphore support in logical_ring_default_vfuncs().
Regards,
Tvrtko
return false;
@@ -3313,15 +3321,6 @@ static void remove_from_engine(struct i915_request *rq) i915_request_notify_execute_cb_imm(rq); } -static bool can_preempt(struct intel_engine_cs *engine) -{
- if (GRAPHICS_VER(engine->i915) > 8)
return true;
- /* GPGPU on bdw requires extra w/a; not implemented */
- return engine->class != RENDER_CLASS;
-}
- static void kick_execlists(const struct i915_request *rq, int prio) { struct intel_engine_cs *engine = rq->engine;
linux-stable-mirror@lists.linaro.org