6.12-stable review patch. If anyone has any objections, please let me know.
------------------
From: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com
commit a39d082c3553d35b4fe5585e1e2fb221c130cae8 upstream.
When the GuC fails to load we declare the device wedged. However, the very first GuC load attempt on GT0 (from xe_gt_init_hwconfig) is done before the GT1 GuC objects are initialized, so things go bad when the wedge code attempts to cleanup GT1. To fix this, check the initialization status in the functions called during wedge.
Fixes: 7dbe8af13c18 ("drm/xe: Wedge the entire device") Signed-off-by: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com Cc: Rodrigo Vivi rodrigo.vivi@intel.com Cc: Matthew Brost matthew.brost@intel.com Cc: Jonathan Cavitt jonathan.cavitt@intel.com Cc: Lucas De Marchi lucas.demarchi@intel.com Cc: Zhanjun Dong zhanjun.dong@intel.com Cc: stable@vger.kernel.org # v6.12+: 1e1981b16bb1: drm/xe: Fix taking invalid lock on wedge Cc: stable@vger.kernel.org # v6.12+ Reviewed-by: Jonathan Cavitt jonathan.cavitt@intel.com Reviewed-by: Lucas De Marchi lucas.demarchi@intel.com Link: https://lore.kernel.org/r/20250611214453.1159846-2-daniele.ceraolospurio@int... Signed-off-by: Lucas De Marchi lucas.demarchi@intel.com (cherry picked from commit 0b93b7dcd9eb888a6ac7546560877705d4ad61bf) Signed-off-by: Thomas Hellström thomas.hellstrom@linux.intel.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 8 ++++++++ drivers/gpu/drm/xe/xe_guc_ct.c | 7 +++++-- drivers/gpu/drm/xe/xe_guc_ct.h | 5 +++++ drivers/gpu/drm/xe/xe_guc_submit.c | 3 +++ 4 files changed, 21 insertions(+), 2 deletions(-)
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -138,6 +138,14 @@ void xe_gt_tlb_invalidation_reset(struct int pending_seqno;
/* + * we can get here before the CTs are even initialized if we're wedging + * very early, in which case there are not going to be any pending + * fences so we can bail immediately. + */ + if (!xe_guc_ct_initialized(>->uc.guc.ct)) + return; + + /* * CT channel is already disabled at this point. No new TLB requests can * appear. */ --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -454,6 +454,9 @@ void xe_guc_ct_disable(struct xe_guc_ct */ void xe_guc_ct_stop(struct xe_guc_ct *ct) { + if (!xe_guc_ct_initialized(ct)) + return; + xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_STOPPED); stop_g2h_handler(ct); } @@ -638,7 +641,7 @@ static int __guc_ct_send_locked(struct x u16 seqno; int ret;
- xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED); + xe_gt_assert(gt, xe_guc_ct_initialized(ct)); xe_gt_assert(gt, !g2h_len || !g2h_fence); xe_gt_assert(gt, !num_g2h || !g2h_fence); xe_gt_assert(gt, !g2h_len || num_g2h); @@ -1209,7 +1212,7 @@ static int g2h_read(struct xe_guc_ct *ct u32 action; u32 *hxg;
- xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED); + xe_gt_assert(gt, xe_guc_ct_initialized(ct)); lockdep_assert_held(&ct->fast_lock);
if (ct->state == XE_GUC_CT_STATE_DISABLED) --- a/drivers/gpu/drm/xe/xe_guc_ct.h +++ b/drivers/gpu/drm/xe/xe_guc_ct.h @@ -23,6 +23,11 @@ void xe_guc_ct_snapshot_print(struct xe_ void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot); void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic);
+static inline bool xe_guc_ct_initialized(struct xe_guc_ct *ct) +{ + return ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED; +} + static inline bool xe_guc_ct_enabled(struct xe_guc_ct *ct) { return ct->state == XE_GUC_CT_STATE_ENABLED; --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1722,6 +1722,9 @@ int xe_guc_submit_reset_prepare(struct x { int ret;
+ if (!guc->submission_state.initialized) + return 0; + /* * Using an atomic here rather than submission_state.lock as this * function can be called while holding the CT lock (engine reset