i915 already does memory quiesce before signaling breadcrumb so remove extra memory quiescing for aux invalidation which can cause unnecessary side effects.
Fixes: 78a6ccd65fa3 ("drm/i915/gt: Ensure memory quiesced before invalidation") Cc: Jonathan Cavitt jonathan.cavitt@intel.com Cc: Andi Shyti andi.shyti@linux.intel.com Cc: stable@vger.kernel.org # v5.8+ Cc: Andrzej Hajda andrzej.hajda@intel.com Cc: Tvrtko Ursulin tvrtko.ursulin@intel.com Cc: Matt Roper matthew.d.roper@intel.com Cc: Tejas Upadhyay tejas.upadhyay@intel.com Cc: Lucas De Marchi lucas.demarchi@intel.com Cc: Prathap Kumar Valsan prathap.kumar.valsan@intel.com Cc: Tapani Pälli tapani.palli@intel.com Cc: Mark Janes mark.janes@intel.com Cc: Rodrigo Vivi rodrigo.vivi@intel.com Signed-off-by: Nirmoy Das nirmoy.das@intel.com --- drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 50 ++++++++++++------------ 1 file changed, 26 insertions(+), 24 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 0143445dba83..5001670046a0 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -248,11 +248,7 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) { struct intel_engine_cs *engine = rq->engine;
- /* - * On Aux CCS platforms the invalidation of the Aux - * table requires quiescing memory traffic beforehand - */ - if (mode & EMIT_FLUSH || gen12_needs_ccs_aux_inv(engine)) { + if (mode & EMIT_FLUSH) { u32 bit_group_0 = 0; u32 bit_group_1 = 0; int err; @@ -264,13 +260,6 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
- /* - * When required, in MTL and beyond platforms we - * need to set the CCS_FLUSH bit in the pipe control - */ - if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70)) - bit_group_0 |= PIPE_CONTROL_CCS_FLUSH; - bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH; bit_group_1 |= PIPE_CONTROL_FLUSH_L3; bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; @@ -800,14 +789,15 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) { struct drm_i915_private *i915 = rq->i915; struct intel_gt *gt = rq->engine->gt; - u32 flags = (PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_TLB_INVALIDATE | - PIPE_CONTROL_TILE_CACHE_FLUSH | - PIPE_CONTROL_FLUSH_L3 | - PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_DC_FLUSH_ENABLE | - PIPE_CONTROL_FLUSH_ENABLE); + u32 bit_group_0 = PIPE_CONTROL0_HDC_PIPELINE_FLUSH; + u32 bit_group_1 = (PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_TLB_INVALIDATE | + PIPE_CONTROL_TILE_CACHE_FLUSH | + PIPE_CONTROL_FLUSH_L3 | + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_FLUSH_ENABLE);
/* Wa_14016712196 */ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || IS_DG2(i915)) @@ -817,14 +807,26 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) /* Wa_1409600907 */ - flags |= PIPE_CONTROL_DEPTH_STALL; + bit_group_1 |= PIPE_CONTROL_DEPTH_STALL;
if (!HAS_3D_PIPELINE(rq->i915)) - flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS; + bit_group_1 &= ~PIPE_CONTROL_3D_ARCH_FLAGS; else if (rq->engine->class == COMPUTE_CLASS) - flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; + bit_group_1 &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; + + /* + * On Aux CCS platforms the invalidation of the Aux + * table requires quiescing memory traffic beforehand. + * gen12_emit_fini_breadcrumb_rcs() does this for us as + * all memory traffic has to be completed before we signal + * the breadcrumb. In MTL and beyond platforms, we need to also + * add CCS_FLUSH bit in the pipe control for that purpose. + */ + + if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70)) + bit_group_0 |= PIPE_CONTROL_CCS_FLUSH;
- cs = gen12_emit_pipe_control(cs, PIPE_CONTROL0_HDC_PIPELINE_FLUSH, flags, 0); + cs = gen12_emit_pipe_control(cs, bit_group_0, bit_group_1, 0);
/*XXX: Look at gen8_emit_fini_breadcrumb_rcs */ cs = gen12_emit_ggtt_write_rcs(cs,
I tested this first against tests that were failing for Mesa and it fixes all of the regressed cases (TGL LP). Also did a full run of all KHR-GLES31* and KHR-GLES32* test groups in the Khronos CTS suite, no regressions observed.
Tested-by: Tapani Pälli tapani.palli@intel.com
On 20.9.2023 14.11, Nirmoy Das wrote:
i915 already does memory quiesce before signaling breadcrumb so remove extra memory quiescing for aux invalidation which can cause unnecessary side effects.
Fixes: 78a6ccd65fa3 ("drm/i915/gt: Ensure memory quiesced before invalidation") Cc: Jonathan Cavitt jonathan.cavitt@intel.com Cc: Andi Shyti andi.shyti@linux.intel.com Cc: stable@vger.kernel.org # v5.8+ Cc: Andrzej Hajda andrzej.hajda@intel.com Cc: Tvrtko Ursulin tvrtko.ursulin@intel.com Cc: Matt Roper matthew.d.roper@intel.com Cc: Tejas Upadhyay tejas.upadhyay@intel.com Cc: Lucas De Marchi lucas.demarchi@intel.com Cc: Prathap Kumar Valsan prathap.kumar.valsan@intel.com Cc: Tapani Pälli tapani.palli@intel.com Cc: Mark Janes mark.janes@intel.com Cc: Rodrigo Vivi rodrigo.vivi@intel.com Signed-off-by: Nirmoy Das nirmoy.das@intel.com
drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 50 ++++++++++++------------ 1 file changed, 26 insertions(+), 24 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 0143445dba83..5001670046a0 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -248,11 +248,7 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) { struct intel_engine_cs *engine = rq->engine;
- /*
* On Aux CCS platforms the invalidation of the Aux
* table requires quiescing memory traffic beforehand
*/
- if (mode & EMIT_FLUSH || gen12_needs_ccs_aux_inv(engine)) {
- if (mode & EMIT_FLUSH) { u32 bit_group_0 = 0; u32 bit_group_1 = 0; int err;
@@ -264,13 +260,6 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
/*
* When required, in MTL and beyond platforms we
* need to set the CCS_FLUSH bit in the pipe control
*/
if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70))
bit_group_0 |= PIPE_CONTROL_CCS_FLUSH;
- bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH; bit_group_1 |= PIPE_CONTROL_FLUSH_L3; bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
@@ -800,14 +789,15 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) { struct drm_i915_private *i915 = rq->i915; struct intel_gt *gt = rq->engine->gt;
- u32 flags = (PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_TLB_INVALIDATE |
PIPE_CONTROL_TILE_CACHE_FLUSH |
PIPE_CONTROL_FLUSH_L3 |
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_FLUSH_ENABLE);
- u32 bit_group_0 = PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
- u32 bit_group_1 = (PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_TLB_INVALIDATE |
PIPE_CONTROL_TILE_CACHE_FLUSH |
PIPE_CONTROL_FLUSH_L3 |
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_FLUSH_ENABLE);
/* Wa_14016712196 */ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || IS_DG2(i915)) @@ -817,14 +807,26 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) /* Wa_1409600907 */
flags |= PIPE_CONTROL_DEPTH_STALL;
bit_group_1 |= PIPE_CONTROL_DEPTH_STALL;
if (!HAS_3D_PIPELINE(rq->i915))
flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
else if (rq->engine->class == COMPUTE_CLASS)bit_group_1 &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
bit_group_1 &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
- /*
* On Aux CCS platforms the invalidation of the Aux
* table requires quiescing memory traffic beforehand.
* gen12_emit_fini_breadcrumb_rcs() does this for us as
* all memory traffic has to be completed before we signal
* the breadcrumb. In MTL and beyond platforms, we need to also
* add CCS_FLUSH bit in the pipe control for that purpose.
*/
- if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70))
bit_group_0 |= PIPE_CONTROL_CCS_FLUSH;
- cs = gen12_emit_pipe_control(cs, PIPE_CONTROL0_HDC_PIPELINE_FLUSH, flags, 0);
- cs = gen12_emit_pipe_control(cs, bit_group_0, bit_group_1, 0);
/*XXX: Look at gen8_emit_fini_breadcrumb_rcs */ cs = gen12_emit_ggtt_write_rcs(cs,
On Wed, Sep 20, 2023 at 01:11:31PM +0200, Nirmoy Das wrote:
i915 already does memory quiesce before signaling breadcrumb so remove extra memory quiescing for aux invalidation which can cause unnecessary side effects.
This explanation seems confusing to me. If we've already performed the necessary flushing and quiesced all cache<->memory traffic, then performing another flush should just be a noop, right? If we're seeing side effects, then doesn't that imply that there was still something in the cache that hadn't made its way to memory yet?
Is the breadcrumb code flushing all the necessary bits? What about PIPE_CONTROL_CCS_FLUSH?
Matt
Fixes: 78a6ccd65fa3 ("drm/i915/gt: Ensure memory quiesced before invalidation") Cc: Jonathan Cavitt jonathan.cavitt@intel.com Cc: Andi Shyti andi.shyti@linux.intel.com Cc: stable@vger.kernel.org # v5.8+ Cc: Andrzej Hajda andrzej.hajda@intel.com Cc: Tvrtko Ursulin tvrtko.ursulin@intel.com Cc: Matt Roper matthew.d.roper@intel.com Cc: Tejas Upadhyay tejas.upadhyay@intel.com Cc: Lucas De Marchi lucas.demarchi@intel.com Cc: Prathap Kumar Valsan prathap.kumar.valsan@intel.com Cc: Tapani Pälli tapani.palli@intel.com Cc: Mark Janes mark.janes@intel.com Cc: Rodrigo Vivi rodrigo.vivi@intel.com Signed-off-by: Nirmoy Das nirmoy.das@intel.com
drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 50 ++++++++++++------------ 1 file changed, 26 insertions(+), 24 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 0143445dba83..5001670046a0 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -248,11 +248,7 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) { struct intel_engine_cs *engine = rq->engine;
- /*
* On Aux CCS platforms the invalidation of the Aux
* table requires quiescing memory traffic beforehand
*/
- if (mode & EMIT_FLUSH || gen12_needs_ccs_aux_inv(engine)) {
- if (mode & EMIT_FLUSH) { u32 bit_group_0 = 0; u32 bit_group_1 = 0; int err;
@@ -264,13 +260,6 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
/*
* When required, in MTL and beyond platforms we
* need to set the CCS_FLUSH bit in the pipe control
*/
if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70))
bit_group_0 |= PIPE_CONTROL_CCS_FLUSH;
- bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH; bit_group_1 |= PIPE_CONTROL_FLUSH_L3; bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
@@ -800,14 +789,15 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) { struct drm_i915_private *i915 = rq->i915; struct intel_gt *gt = rq->engine->gt;
- u32 flags = (PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_TLB_INVALIDATE |
PIPE_CONTROL_TILE_CACHE_FLUSH |
PIPE_CONTROL_FLUSH_L3 |
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_FLUSH_ENABLE);
- u32 bit_group_0 = PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
- u32 bit_group_1 = (PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_TLB_INVALIDATE |
PIPE_CONTROL_TILE_CACHE_FLUSH |
PIPE_CONTROL_FLUSH_L3 |
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_FLUSH_ENABLE);
/* Wa_14016712196 */ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || IS_DG2(i915)) @@ -817,14 +807,26 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) /* Wa_1409600907 */
flags |= PIPE_CONTROL_DEPTH_STALL;
bit_group_1 |= PIPE_CONTROL_DEPTH_STALL;
if (!HAS_3D_PIPELINE(rq->i915))
flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
else if (rq->engine->class == COMPUTE_CLASS)bit_group_1 &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
bit_group_1 &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
- /*
* On Aux CCS platforms the invalidation of the Aux
* table requires quiescing memory traffic beforehand.
* gen12_emit_fini_breadcrumb_rcs() does this for us as
* all memory traffic has to be completed before we signal
* the breadcrumb. In MTL and beyond platforms, we need to also
* add CCS_FLUSH bit in the pipe control for that purpose.
*/
- if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70))
bit_group_0 |= PIPE_CONTROL_CCS_FLUSH;
- cs = gen12_emit_pipe_control(cs, PIPE_CONTROL0_HDC_PIPELINE_FLUSH, flags, 0);
- cs = gen12_emit_pipe_control(cs, bit_group_0, bit_group_1, 0);
/*XXX: Look at gen8_emit_fini_breadcrumb_rcs */ cs = gen12_emit_ggtt_write_rcs(cs, -- 2.41.0
linux-stable-mirror@lists.linaro.org