 
            These patchset adds support for VPU_JSM_STATUS_MVNCI_CONTEXT_VIOLATION_HW message added in recent VPU firmware. Without it the driver will not be able to process any jobs after this message is received and would need to be reloaded.
Most patches are as-is from upstream besides these two: - Fix locking order in ivpu_job_submit - Abort all jobs after command queue unregister
Both these patches need to be rebased because of missing new CMDQ UAPI changes that should not be backported to stable.
Changes since v1: - Documented deviations from the original upstream patches in commit messages
Andrew Kreimer (1): accel/ivpu: Fix a typo
Andrzej Kacprowski (1): accel/ivpu: Update VPU FW API headers
Karol Wachowski (4): accel/ivpu: Use xa_alloc_cyclic() instead of custom function accel/ivpu: Abort all jobs after command queue unregister accel/ivpu: Fix locking order in ivpu_job_submit accel/ivpu: Add handling of VPU_JSM_STATUS_MVNCI_CONTEXT_VIOLATION_HW
Tomasz Rusinowicz (1): accel/ivpu: Make DB_ID and JOB_ID allocations incremental
drivers/accel/ivpu/ivpu_drv.c | 38 ++-- drivers/accel/ivpu/ivpu_drv.h | 9 + drivers/accel/ivpu/ivpu_job.c | 125 +++++++++--- drivers/accel/ivpu/ivpu_job.h | 1 + drivers/accel/ivpu/ivpu_jsm_msg.c | 3 +- drivers/accel/ivpu/ivpu_mmu.c | 3 +- drivers/accel/ivpu/ivpu_sysfs.c | 5 +- drivers/accel/ivpu/vpu_boot_api.h | 45 +++-- drivers/accel/ivpu/vpu_jsm_api.h | 303 +++++++++++++++++++++++++----- 9 files changed, 412 insertions(+), 120 deletions(-)
 
            From: Tomasz Rusinowicz tomasz.rusinowicz@intel.com
commit c3b0ec0fe0c7ebc4eb42ba60f7340ecdb7aae1a2 upstream.
Save last used ID and use it to limit the possible values for the ID. This should decrease the rate at which the IDs are reused, which will make debugging easier.
Cc: stable@vger.kernel.org # v6.12 Signed-off-by: Tomasz Rusinowicz tomasz.rusinowicz@intel.com Reviewed-by: Jacek Lawrynowicz jacek.lawrynowicz@linux.intel.com Reviewed-by: Jeffrey Hugo quic_jhugo@quicinc.com Link: https://patchwork.freedesktop.org/patch/msgid/20240930195322.461209-19-jacek... Signed-off-by: Jacek Lawrynowicz jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_drv.c | 9 +++++++++ drivers/accel/ivpu/ivpu_drv.h | 7 +++++++ drivers/accel/ivpu/ivpu_job.c | 37 +++++++++++++++++++++++++---------- 3 files changed, 43 insertions(+), 10 deletions(-)
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 88df2cdc46b62..009b1226ee2ec 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -260,6 +260,11 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file) if (ret) goto err_xa_erase;
+ file_priv->default_job_limit.min = FIELD_PREP(IVPU_JOB_ID_CONTEXT_MASK, + (file_priv->ctx.id - 1)); + file_priv->default_job_limit.max = file_priv->default_job_limit.min | IVPU_JOB_ID_JOB_MASK; + file_priv->job_limit = file_priv->default_job_limit; + mutex_unlock(&vdev->context_list_lock); drm_dev_exit(idx);
@@ -607,6 +612,10 @@ static int ivpu_dev_init(struct ivpu_device *vdev) lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key); INIT_LIST_HEAD(&vdev->bo_list);
+ vdev->default_db_limit.min = IVPU_MIN_DB; + vdev->default_db_limit.max = IVPU_MAX_DB; + vdev->db_limit = vdev->default_db_limit; + ret = drmm_mutex_init(&vdev->drm, &vdev->context_list_lock); if (ret) goto err_xa_destroy; diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index 9430a24994c32..92f5df7fdc83d 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -46,6 +46,9 @@ #define IVPU_MIN_DB 1 #define IVPU_MAX_DB 255
+#define IVPU_JOB_ID_JOB_MASK GENMASK(7, 0) +#define IVPU_JOB_ID_CONTEXT_MASK GENMASK(31, 8) + #define IVPU_NUM_ENGINES 2 #define IVPU_NUM_PRIORITIES 4 #define IVPU_NUM_CMDQS_PER_CTX (IVPU_NUM_ENGINES * IVPU_NUM_PRIORITIES) @@ -136,6 +139,8 @@ struct ivpu_device { struct xa_limit context_xa_limit;
struct xarray db_xa; + struct xa_limit db_limit; + struct xa_limit default_db_limit;
struct mutex bo_list_lock; /* Protects bo_list */ struct list_head bo_list; @@ -171,6 +176,8 @@ struct ivpu_file_priv { struct mutex ms_lock; /* Protects ms_instance_list, ms_info_bo */ struct list_head ms_instance_list; struct ivpu_bo *ms_info_bo; + struct xa_limit job_limit; + struct xa_limit default_job_limit; bool has_mmu_faults; bool bound; bool aborted; diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index 91f7f6f3ca675..c0e52126779bc 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -21,8 +21,6 @@ #include "vpu_boot_api.h"
#define CMD_BUF_IDX 0 -#define JOB_ID_JOB_MASK GENMASK(7, 0) -#define JOB_ID_CONTEXT_MASK GENMASK(31, 8) #define JOB_MAX_BUFFER_COUNT 65535
static void ivpu_cmdq_ring_db(struct ivpu_device *vdev, struct ivpu_cmdq *cmdq) @@ -77,9 +75,28 @@ static void ivpu_preemption_buffers_free(struct ivpu_device *vdev, ivpu_bo_free(cmdq->secondary_preempt_buf); }
+static int ivpu_id_alloc(struct xarray *xa, u32 *id, void *entry, struct xa_limit *limit, + const struct xa_limit default_limit) +{ + int ret; + + ret = __xa_alloc(xa, id, entry, *limit, GFP_KERNEL); + if (ret) { + limit->min = default_limit.min; + ret = __xa_alloc(xa, id, entry, *limit, GFP_KERNEL); + if (ret) + return ret; + } + + limit->min = *id + 1; + if (limit->min > limit->max) + limit->min = default_limit.min; + + return ret; +} + static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv) { - struct xa_limit db_xa_limit = {.max = IVPU_MAX_DB, .min = IVPU_MIN_DB}; struct ivpu_device *vdev = file_priv->vdev; struct ivpu_cmdq *cmdq; int ret; @@ -88,7 +105,10 @@ static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv) if (!cmdq) return NULL;
- ret = xa_alloc(&vdev->db_xa, &cmdq->db_id, NULL, db_xa_limit, GFP_KERNEL); + xa_lock(&vdev->db_xa); /* lock here to protect db_limit */ + ret = ivpu_id_alloc(&vdev->db_xa, &cmdq->db_id, NULL, &vdev->db_limit, + vdev->default_db_limit); + xa_unlock(&vdev->db_xa); if (ret) { ivpu_err(vdev, "Failed to allocate doorbell id: %d\n", ret); goto err_free_cmdq; @@ -519,7 +539,6 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority) { struct ivpu_file_priv *file_priv = job->file_priv; struct ivpu_device *vdev = job->vdev; - struct xa_limit job_id_range; struct ivpu_cmdq *cmdq; bool is_first_job; int ret; @@ -530,7 +549,7 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority)
mutex_lock(&file_priv->lock);
- cmdq = ivpu_cmdq_acquire(job->file_priv, job->engine_idx, priority); + cmdq = ivpu_cmdq_acquire(file_priv, job->engine_idx, priority); if (!cmdq) { ivpu_warn_ratelimited(vdev, "Failed to get job queue, ctx %d engine %d prio %d\n", file_priv->ctx.id, job->engine_idx, priority); @@ -538,12 +557,10 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority) goto err_unlock_file_priv; }
- job_id_range.min = FIELD_PREP(JOB_ID_CONTEXT_MASK, (file_priv->ctx.id - 1)); - job_id_range.max = job_id_range.min | JOB_ID_JOB_MASK; - xa_lock(&vdev->submitted_jobs_xa); is_first_job = xa_empty(&vdev->submitted_jobs_xa); - ret = __xa_alloc(&vdev->submitted_jobs_xa, &job->job_id, job, job_id_range, GFP_KERNEL); + ret = ivpu_id_alloc(&vdev->submitted_jobs_xa, &job->job_id, job, &file_priv->job_limit, + file_priv->default_job_limit); if (ret) { ivpu_dbg(vdev, JOB, "Too many active jobs in ctx %d\n", file_priv->ctx.id);
 
            [ Sasha's backport helper bot ]
Hi,
✅ All tests passed successfully. No issues detected. No action required from the submitter.
The upstream commit SHA1 provided is correct: c3b0ec0fe0c7ebc4eb42ba60f7340ecdb7aae1a2
WARNING: Author mismatch between patch and upstream commit: Backport author: Jacek Lawrynowiczjacek.lawrynowicz@linux.intel.com Commit author: Tomasz Rusinowicztomasz.rusinowicz@intel.com
Note: The patch differs from the upstream commit: --- 1: c3b0ec0fe0c7e < -: ------------- accel/ivpu: Make DB_ID and JOB_ID allocations incremental -: ------------- > 1: aeaee199900ee Linux 6.14.5 ---
Results of testing on various branches:
| Branch | Patch Apply | Build Test | |---------------------------|-------------|------------| | stable/linux-5.4.y | Success | Success |
 
            From: Karol Wachowski karol.wachowski@intel.com
commit ae7af7d8dc2a13a427aa90d003fe4fb2c168342a upstream.
Remove custom ivpu_id_alloc() wrapper used for ID allocations and replace it with standard xa_alloc_cyclic() API.
The idea behind ivpu_id_alloc() was to have monotonic IDs, so the driver is easier to debug because same IDs are not reused all over. The same can be achieved just by using appropriate Linux API.
Cc: stable@vger.kernel.org # v6.12 Signed-off-by: Karol Wachowski karol.wachowski@intel.com Reviewed-by: Jacek Lawrynowicz jacek.lawrynowicz@linux.intel.com Reviewed-by: Jeffrey Hugo quic_jhugo@quicinc.com Link: https://patchwork.freedesktop.org/patch/msgid/20241017145817.121590-7-jacek.... Signed-off-by: Jacek Lawrynowicz jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_drv.c | 11 ++++------- drivers/accel/ivpu/ivpu_drv.h | 4 ++-- drivers/accel/ivpu/ivpu_job.c | 34 ++++++---------------------------- 3 files changed, 12 insertions(+), 37 deletions(-)
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 009b1226ee2ec..2a229903654e0 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -260,10 +260,8 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file) if (ret) goto err_xa_erase;
- file_priv->default_job_limit.min = FIELD_PREP(IVPU_JOB_ID_CONTEXT_MASK, - (file_priv->ctx.id - 1)); - file_priv->default_job_limit.max = file_priv->default_job_limit.min | IVPU_JOB_ID_JOB_MASK; - file_priv->job_limit = file_priv->default_job_limit; + file_priv->job_limit.min = FIELD_PREP(IVPU_JOB_ID_CONTEXT_MASK, (file_priv->ctx.id - 1)); + file_priv->job_limit.max = file_priv->job_limit.min | IVPU_JOB_ID_JOB_MASK;
mutex_unlock(&vdev->context_list_lock); drm_dev_exit(idx); @@ -612,9 +610,8 @@ static int ivpu_dev_init(struct ivpu_device *vdev) lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key); INIT_LIST_HEAD(&vdev->bo_list);
- vdev->default_db_limit.min = IVPU_MIN_DB; - vdev->default_db_limit.max = IVPU_MAX_DB; - vdev->db_limit = vdev->default_db_limit; + vdev->db_limit.min = IVPU_MIN_DB; + vdev->db_limit.max = IVPU_MAX_DB;
ret = drmm_mutex_init(&vdev->drm, &vdev->context_list_lock); if (ret) diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index 92f5df7fdc83d..56509c5a3875b 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -140,7 +140,7 @@ struct ivpu_device {
struct xarray db_xa; struct xa_limit db_limit; - struct xa_limit default_db_limit; + u32 db_next;
struct mutex bo_list_lock; /* Protects bo_list */ struct list_head bo_list; @@ -177,7 +177,7 @@ struct ivpu_file_priv { struct list_head ms_instance_list; struct ivpu_bo *ms_info_bo; struct xa_limit job_limit; - struct xa_limit default_job_limit; + u32 job_id_next; bool has_mmu_faults; bool bound; bool aborted; diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index c0e52126779bc..9767cde4ccd4b 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -75,26 +75,6 @@ static void ivpu_preemption_buffers_free(struct ivpu_device *vdev, ivpu_bo_free(cmdq->secondary_preempt_buf); }
-static int ivpu_id_alloc(struct xarray *xa, u32 *id, void *entry, struct xa_limit *limit, - const struct xa_limit default_limit) -{ - int ret; - - ret = __xa_alloc(xa, id, entry, *limit, GFP_KERNEL); - if (ret) { - limit->min = default_limit.min; - ret = __xa_alloc(xa, id, entry, *limit, GFP_KERNEL); - if (ret) - return ret; - } - - limit->min = *id + 1; - if (limit->min > limit->max) - limit->min = default_limit.min; - - return ret; -} - static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv) { struct ivpu_device *vdev = file_priv->vdev; @@ -105,11 +85,9 @@ static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv) if (!cmdq) return NULL;
- xa_lock(&vdev->db_xa); /* lock here to protect db_limit */ - ret = ivpu_id_alloc(&vdev->db_xa, &cmdq->db_id, NULL, &vdev->db_limit, - vdev->default_db_limit); - xa_unlock(&vdev->db_xa); - if (ret) { + ret = xa_alloc_cyclic(&vdev->db_xa, &cmdq->db_id, NULL, vdev->db_limit, &vdev->db_next, + GFP_KERNEL); + if (ret < 0) { ivpu_err(vdev, "Failed to allocate doorbell id: %d\n", ret); goto err_free_cmdq; } @@ -559,9 +537,9 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority)
xa_lock(&vdev->submitted_jobs_xa); is_first_job = xa_empty(&vdev->submitted_jobs_xa); - ret = ivpu_id_alloc(&vdev->submitted_jobs_xa, &job->job_id, job, &file_priv->job_limit, - file_priv->default_job_limit); - if (ret) { + ret = __xa_alloc_cyclic(&vdev->submitted_jobs_xa, &job->job_id, job, file_priv->job_limit, + &file_priv->job_id_next, GFP_KERNEL); + if (ret < 0) { ivpu_dbg(vdev, JOB, "Too many active jobs in ctx %d\n", file_priv->ctx.id); ret = -EBUSY;
 
            [ Sasha's backport helper bot ]
Hi,
✅ All tests passed successfully. No issues detected. No action required from the submitter.
The upstream commit SHA1 provided is correct: ae7af7d8dc2a13a427aa90d003fe4fb2c168342a
WARNING: Author mismatch between patch and upstream commit: Backport author: Jacek Lawrynowiczjacek.lawrynowicz@linux.intel.com Commit author: Karol Wachowskikarol.wachowski@intel.com
Note: The patch differs from the upstream commit: --- 1: ae7af7d8dc2a1 < -: ------------- accel/ivpu: Use xa_alloc_cyclic() instead of custom function -: ------------- > 1: aeaee199900ee Linux 6.14.5 ---
Results of testing on various branches:
| Branch | Patch Apply | Build Test | |---------------------------|-------------|------------| | stable/linux-5.4.y | Success | Success |
 
            From: Andrew Kreimer algonell@gmail.com
commit 284a8908f5ec25355a831e3e2d87975d748e98dc upstream.
Fix a typo in comments.
Cc: stable@vger.kernel.org # v6.12 Reported-by: Matthew Wilcox willy@infradead.org Signed-off-by: Andrew Kreimer algonell@gmail.com Signed-off-by: Simona Vetter simona.vetter@ffwll.ch Link: https://patchwork.freedesktop.org/patch/msgid/20240909135655.45938-1-algonel... --- drivers/accel/ivpu/vpu_boot_api.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/accel/ivpu/vpu_boot_api.h b/drivers/accel/ivpu/vpu_boot_api.h index 82954b91b7481..d474bc7b15c01 100644 --- a/drivers/accel/ivpu/vpu_boot_api.h +++ b/drivers/accel/ivpu/vpu_boot_api.h @@ -8,7 +8,7 @@
/* * =========== FW API version information beginning ================ - * The bellow values will be used to construct the version info this way: + * The below values will be used to construct the version info this way: * fw_bin_header->api_version[VPU_BOOT_API_VER_ID] = (VPU_BOOT_API_VER_MAJOR << 16) | * VPU_BOOT_API_VER_MINOR; * VPU_BOOT_API_VER_PATCH will be ignored. KMD and compatibility is not affected if this changes
 
            [ Sasha's backport helper bot ]
Hi,
✅ All tests passed successfully. No issues detected. No action required from the submitter.
The upstream commit SHA1 provided is correct: 284a8908f5ec25355a831e3e2d87975d748e98dc
WARNING: Author mismatch between patch and upstream commit: Backport author: Jacek Lawrynowiczjacek.lawrynowicz@linux.intel.com Commit author: Andrew Kreimeralgonell@gmail.com
Note: The patch differs from the upstream commit: --- 1: 284a8908f5ec2 < -: ------------- accel/ivpu: Fix a typo -: ------------- > 1: aeaee199900ee Linux 6.14.5 ---
Results of testing on various branches:
| Branch | Patch Apply | Build Test | |---------------------------|-------------|------------| | stable/linux-5.4.y | Success | Success |
 
            From: Andrzej Kacprowski Andrzej.Kacprowski@intel.com
commit a4293cc75348409f998c991c48cbe5532c438114 upstream.
This commit bumps: - Boot API from 3.24.0 to 3.26.3 - JSM API from 3.16.0 to 3.25.0
Cc: stable@vger.kernel.org # v6.12 Signed-off-by: Andrzej Kacprowski Andrzej.Kacprowski@intel.com Co-developed-by: Tomasz Rusinowicz tomasz.rusinowicz@intel.com Signed-off-by: Tomasz Rusinowicz tomasz.rusinowicz@intel.com Reviewed-by: Jacek Lawrynowicz jacek.lawrynowicz@linux.intel.com Link: https://patchwork.freedesktop.org/patch/msgid/20240930195322.461209-2-jacek.... Signed-off-by: Jacek Lawrynowicz jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_job.c | 2 +- drivers/accel/ivpu/ivpu_jsm_msg.c | 3 +- drivers/accel/ivpu/vpu_boot_api.h | 43 +++-- drivers/accel/ivpu/vpu_jsm_api.h | 303 +++++++++++++++++++++++++----- 4 files changed, 292 insertions(+), 59 deletions(-)
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index 9767cde4ccd4b..c2108346c4c9d 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -352,7 +352,7 @@ static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job) return -EBUSY; }
- entry = &cmdq->jobq->job[tail]; + entry = &cmdq->jobq->slot[tail].job; entry->batch_buf_addr = job->cmd_buf_vpu_addr; entry->job_id = job->job_id; entry->flags = 0; diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.c b/drivers/accel/ivpu/ivpu_jsm_msg.c index f7618b605f021..ae91ad24d10d8 100644 --- a/drivers/accel/ivpu/ivpu_jsm_msg.c +++ b/drivers/accel/ivpu/ivpu_jsm_msg.c @@ -48,9 +48,10 @@ const char *ivpu_jsm_msg_type_to_str(enum vpu_ipc_msg_type type) IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE); IVPU_CASE_TO_STR(VPU_JSM_MSG_STATE_DUMP); IVPU_CASE_TO_STR(VPU_JSM_MSG_STATE_DUMP_RSP); - IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT); + IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT_DEPRECATED); IVPU_CASE_TO_STR(VPU_JSM_MSG_DYNDBG_CONTROL); IVPU_CASE_TO_STR(VPU_JSM_MSG_JOB_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_NATIVE_FENCE_SIGNALLED); IVPU_CASE_TO_STR(VPU_JSM_MSG_ENGINE_RESET_DONE); IVPU_CASE_TO_STR(VPU_JSM_MSG_ENGINE_PREEMPT_DONE); IVPU_CASE_TO_STR(VPU_JSM_MSG_REGISTER_DB_DONE); diff --git a/drivers/accel/ivpu/vpu_boot_api.h b/drivers/accel/ivpu/vpu_boot_api.h index d474bc7b15c01..908e68ea1c39c 100644 --- a/drivers/accel/ivpu/vpu_boot_api.h +++ b/drivers/accel/ivpu/vpu_boot_api.h @@ -1,13 +1,12 @@ /* SPDX-License-Identifier: MIT */ /* - * Copyright (c) 2020-2023, Intel Corporation. + * Copyright (c) 2020-2024, Intel Corporation. */
#ifndef VPU_BOOT_API_H #define VPU_BOOT_API_H
/* - * =========== FW API version information beginning ================ * The below values will be used to construct the version info this way: * fw_bin_header->api_version[VPU_BOOT_API_VER_ID] = (VPU_BOOT_API_VER_MAJOR << 16) | * VPU_BOOT_API_VER_MINOR; @@ -27,19 +26,18 @@ * Minor version changes when API backward compatibility is preserved. * Resets to 0 if Major version is incremented. */ -#define VPU_BOOT_API_VER_MINOR 24 +#define VPU_BOOT_API_VER_MINOR 26
/* * API header changed (field names, documentation, formatting) but API itself has not been changed */ -#define VPU_BOOT_API_VER_PATCH 0 +#define VPU_BOOT_API_VER_PATCH 3
/* * Index in the API version table * Must be unique for each API */ #define VPU_BOOT_API_VER_INDEX 0 -/* ------------ FW API version information end ---------------------*/
#pragma pack(push, 4)
@@ -164,8 +162,6 @@ enum vpu_trace_destination { /* VPU 30xx HW component IDs are sequential, so define first and last IDs. */ #define VPU_TRACE_PROC_BIT_30XX_FIRST VPU_TRACE_PROC_BIT_LRT #define VPU_TRACE_PROC_BIT_30XX_LAST VPU_TRACE_PROC_BIT_SHV_15 -#define VPU_TRACE_PROC_BIT_KMB_FIRST VPU_TRACE_PROC_BIT_30XX_FIRST -#define VPU_TRACE_PROC_BIT_KMB_LAST VPU_TRACE_PROC_BIT_30XX_LAST
struct vpu_boot_l2_cache_config { u8 use; @@ -199,6 +195,17 @@ struct vpu_warm_boot_section { */ #define POWER_PROFILE_SURVIVABILITY 0x1
+/** + * Enum for dvfs_mode boot param. + */ +enum vpu_governor { + VPU_GOV_DEFAULT = 0, /* Default Governor for the system */ + VPU_GOV_MAX_PERFORMANCE = 1, /* Maximum performance governor */ + VPU_GOV_ON_DEMAND = 2, /* On Demand frequency control governor */ + VPU_GOV_POWER_SAVE = 3, /* Power save governor */ + VPU_GOV_ON_DEMAND_PRIORITY_AWARE = 4 /* On Demand priority based governor */ +}; + struct vpu_boot_params { u32 magic; u32 vpu_id; @@ -301,7 +308,14 @@ struct vpu_boot_params { u32 temp_sensor_period_ms; /** PLL ratio for efficient clock frequency */ u32 pn_freq_pll_ratio; - /** DVFS Mode: Default: 0, Max Performance: 1, On Demand: 2, Power Save: 3 */ + /** + * DVFS Mode: + * 0 - Default, DVFS mode selected by the firmware + * 1 - Max Performance + * 2 - On Demand + * 3 - Power Save + * 4 - On Demand Priority Aware + */ u32 dvfs_mode; /** * Depending on DVFS Mode: @@ -332,8 +346,8 @@ struct vpu_boot_params { u64 d0i3_entry_vpu_ts; /* * The system time of the host operating system in microseconds. - * E.g the number of microseconds since 1st of January 1970, or whatever date the - * host operating system uses to maintain system time. + * E.g the number of microseconds since 1st of January 1970, or whatever + * date the host operating system uses to maintain system time. * This value will be used to track system time on the VPU. * The KMD is required to update this value on every VPU reset. */ @@ -382,10 +396,7 @@ struct vpu_boot_params { u32 pad6[734]; };
-/* - * Magic numbers set between host and vpu to detect corruptio of tracing init - */ - +/* Magic numbers set between host and vpu to detect corruption of tracing init */ #define VPU_TRACING_BUFFER_CANARY (0xCAFECAFE)
/* Tracing buffer message format definitions */ @@ -405,7 +416,9 @@ struct vpu_tracing_buffer_header { u32 host_canary_start; /* offset from start of buffer for trace entries */ u32 read_index; - u32 pad_to_cache_line_size_0[14]; + /* keeps track of wrapping on the reader side */ + u32 read_wrap_count; + u32 pad_to_cache_line_size_0[13]; /* End of first cache line */
/** diff --git a/drivers/accel/ivpu/vpu_jsm_api.h b/drivers/accel/ivpu/vpu_jsm_api.h index 33f462b1a25d8..7215c144158cb 100644 --- a/drivers/accel/ivpu/vpu_jsm_api.h +++ b/drivers/accel/ivpu/vpu_jsm_api.h @@ -22,7 +22,7 @@ /* * Minor version changes when API backward compatibility is preserved. */ -#define VPU_JSM_API_VER_MINOR 16 +#define VPU_JSM_API_VER_MINOR 25
/* * API header changed (field names, documentation, formatting) but API itself has not been changed @@ -36,7 +36,7 @@
/* * Number of Priority Bands for Hardware Scheduling - * Bands: RealTime, Focus, Normal, Idle + * Bands: Idle(0), Normal(1), Focus(2), RealTime(3) */ #define VPU_HWS_NUM_PRIORITY_BANDS 4
@@ -74,6 +74,7 @@ #define VPU_JSM_STATUS_MVNCI_INTERNAL_ERROR 0xCU /* Job status returned when the job was preempted mid-inference */ #define VPU_JSM_STATUS_PREEMPTED_MID_INFERENCE 0xDU +#define VPU_JSM_STATUS_MVNCI_CONTEXT_VIOLATION_HW 0xEU
/* * Host <-> VPU IPC channels. @@ -86,18 +87,58 @@ /* * Job flags bit masks. */ -#define VPU_JOB_FLAGS_NULL_SUBMISSION_MASK 0x00000001 -#define VPU_JOB_FLAGS_PRIVATE_DATA_MASK 0xFF000000 +enum { + /* + * Null submission mask. + * When set, batch buffer's commands are not processed but returned as + * successful immediately, except fences and timestamps. + * When cleared, batch buffer's commands are processed normally. + * Used for testing and profiling purposes. + */ + VPU_JOB_FLAGS_NULL_SUBMISSION_MASK = (1 << 0U), + /* + * Inline command mask. + * When set, the object in job queue is an inline command (see struct vpu_inline_cmd below). + * When cleared, the object in job queue is a job (see struct vpu_job_queue_entry below). + */ + VPU_JOB_FLAGS_INLINE_CMD_MASK = (1 << 1U), + /* + * VPU private data mask. + * Reserved for the VPU to store private data about the job (or inline command) + * while being processed. + */ + VPU_JOB_FLAGS_PRIVATE_DATA_MASK = 0xFFFF0000U +};
/* - * Sizes of the reserved areas in jobs, in bytes. + * Job queue flags bit masks. */ -#define VPU_JOB_RESERVED_BYTES 8 +enum { + /* + * No job done notification mask. + * When set, indicates that no job done notification should be sent for any + * job from this queue. When cleared, indicates that job done notification + * should be sent for every job completed from this queue. + */ + VPU_JOB_QUEUE_FLAGS_NO_JOB_DONE_MASK = (1 << 0U), + /* + * Native fence usage mask. + * When set, indicates that job queue uses native fences (as inline commands + * in job queue). Such queues may also use legacy fences (as commands in batch buffers). + * When cleared, indicates the job queue only uses legacy fences. + * NOTE: For queues using native fences, VPU expects that all jobs in the queue + * are immediately followed by an inline command object. This object is expected + * to be a fence signal command in most cases, but can also be a NOP in case the host + * does not need per-job fence signalling. Other inline commands objects can be + * inserted between "job and inline command" pairs. + */ + VPU_JOB_QUEUE_FLAGS_USE_NATIVE_FENCE_MASK = (1 << 1U),
-/* - * Sizes of the reserved areas in job queues, in bytes. - */ -#define VPU_JOB_QUEUE_RESERVED_BYTES 52 + /* + * Enable turbo mode for testing NPU performance; not recommended for regular usage. + */ + VPU_JOB_QUEUE_FLAGS_TURBO_MODE = (1 << 2U) +};
/* * Max length (including trailing NULL char) of trace entity name (e.g., the @@ -140,24 +181,113 @@ */ #define VPU_HWS_INVALID_CMDQ_HANDLE 0ULL
+/* + * Inline commands types. + */ +/* + * NOP. + * VPU does nothing other than consuming the inline command object. + */ +#define VPU_INLINE_CMD_TYPE_NOP 0x0 +/* + * Fence wait. + * VPU waits for the fence current value to reach monitored value. + * Fence wait operations are executed upon job dispatching. While waiting for + * the fence to be satisfied, VPU blocks fetching of the next objects in the queue. + * Jobs present in the queue prior to the fence wait object may be processed + * concurrently. + */ +#define VPU_INLINE_CMD_TYPE_FENCE_WAIT 0x1 +/* + * Fence signal. + * VPU sets the fence current value to the provided value. If new current value + * is equal to or higher than monitored value, VPU sends fence signalled notification + * to the host. Fence signal operations are executed upon completion of all the jobs + * present in the queue prior to them, and in-order relative to each other in the queue. + * But jobs in-between them may be processed concurrently and may complete out-of-order. + */ +#define VPU_INLINE_CMD_TYPE_FENCE_SIGNAL 0x2 + +/* + * Job scheduling priority bands for both hardware scheduling and OS scheduling. + */ +enum vpu_job_scheduling_priority_band { + VPU_JOB_SCHEDULING_PRIORITY_BAND_IDLE = 0, + VPU_JOB_SCHEDULING_PRIORITY_BAND_NORMAL = 1, + VPU_JOB_SCHEDULING_PRIORITY_BAND_FOCUS = 2, + VPU_JOB_SCHEDULING_PRIORITY_BAND_REALTIME = 3, + VPU_JOB_SCHEDULING_PRIORITY_BAND_COUNT = 4, +}; + /* * Job format. + * Jobs defines the actual workloads to be executed by a given engine. */ struct vpu_job_queue_entry { - u64 batch_buf_addr; /**< Address of VPU commands batch buffer */ - u32 job_id; /**< Job ID */ - u32 flags; /**< Flags bit field, see VPU_JOB_FLAGS_* above */ - u64 root_page_table_addr; /**< Address of root page table to use for this job */ - u64 root_page_table_update_counter; /**< Page tables update events counter */ - u64 primary_preempt_buf_addr; + /**< Address of VPU commands batch buffer */ + u64 batch_buf_addr; + /**< Job ID */ + u32 job_id; + /**< Flags bit field, see VPU_JOB_FLAGS_* above */ + u32 flags; + /** + * Doorbell ring timestamp taken by KMD from SoC's global system clock, in + * microseconds. NPU can convert this value to its own fixed clock's timebase, + * to match other profiling timestamps. + */ + u64 doorbell_timestamp; + /**< Extra id for job tracking, used only in the firmware perf traces */ + u64 host_tracking_id; /**< Address of the primary preemption buffer to use for this job */ - u32 primary_preempt_buf_size; + u64 primary_preempt_buf_addr; /**< Size of the primary preemption buffer to use for this job */ - u32 secondary_preempt_buf_size; + u32 primary_preempt_buf_size; /**< Size of secondary preemption buffer to use for this job */ - u64 secondary_preempt_buf_addr; + u32 secondary_preempt_buf_size; /**< Address of secondary preemption buffer to use for this job */ - u8 reserved_0[VPU_JOB_RESERVED_BYTES]; + u64 secondary_preempt_buf_addr; + u64 reserved_0; +}; + +/* + * Inline command format. + * Inline commands are the commands executed at scheduler level (typically, + * synchronization directives). Inline command and job objects must be of + * the same size and have flags field at same offset. + */ +struct vpu_inline_cmd { + u64 reserved_0; + /* Inline command type, see VPU_INLINE_CMD_TYPE_* defines. */ + u32 type; + /* Flags bit field, see VPU_JOB_FLAGS_* above. */ + u32 flags; + /* Inline command payload. Depends on inline command type. */ + union { + /* Fence (wait and signal) commands' payload. */ + struct { + /* Fence object handle. */ + u64 fence_handle; + /* User VA of the current fence value. */ + u64 current_value_va; + /* User VA of the monitored fence value (read-only). */ + u64 monitored_value_va; + /* Value to wait for or write in fence location. */ + u64 value; + /* User VA of the log buffer in which to add log entry on completion. */ + u64 log_buffer_va; + } fence; + /* Other commands do not have a payload. */ + /* Payload definition for future inline commands can be inserted here. */ + u64 reserved_1[6]; + } payload; +}; + +/* + * Job queue slots can be populated either with job objects or inline command objects. + */ +union vpu_jobq_slot { + struct vpu_job_queue_entry job; + struct vpu_inline_cmd inline_cmd; };
/* @@ -167,7 +297,21 @@ struct vpu_job_queue_header { u32 engine_idx; u32 head; u32 tail; - u8 reserved_0[VPU_JOB_QUEUE_RESERVED_BYTES]; + u32 flags; + /* Set to 1 to indicate priority_band field is valid */ + u32 priority_band_valid; + /* + * Priority for the work of this job queue, valid only if the HWS is NOT used + * and the `priority_band_valid` is set to 1. It is applied only during + * the VPU_JSM_MSG_REGISTER_DB message processing. + * The device firmware might use the `priority_band` to optimize the power + * management logic, but it will not affect the order of jobs. + * Available priority bands: @see enum vpu_job_scheduling_priority_band + */ + u32 priority_band; + /* Inside realtime band assigns a further priority, limited to 0..31 range */ + u32 realtime_priority_level; + u32 reserved_0[9]; };
/* @@ -175,7 +319,7 @@ struct vpu_job_queue_header { */ struct vpu_job_queue { struct vpu_job_queue_header header; - struct vpu_job_queue_entry job[]; + union vpu_jobq_slot slot[]; };
/** @@ -197,9 +341,7 @@ enum vpu_trace_entity_type { struct vpu_hws_log_buffer_header { /* Written by VPU after adding a log entry. Initialised by host to 0. */ u32 first_free_entry_index; - /* Incremented by VPU every time the VPU overwrites the 0th entry; - * initialised by host to 0. - */ + /* Incremented by VPU every time the VPU writes the 0th entry; initialised by host to 0. */ u32 wraparound_count; /* * This is the number of buffers that can be stored in the log buffer provided by the host. @@ -230,14 +372,80 @@ struct vpu_hws_log_buffer_entry { u64 operation_data[2]; };
+/* Native fence log buffer types. */ +enum vpu_hws_native_fence_log_type { + VPU_HWS_NATIVE_FENCE_LOG_TYPE_WAITS = 1, + VPU_HWS_NATIVE_FENCE_LOG_TYPE_SIGNALS = 2 +}; + +/* HWS native fence log buffer header. */ +struct vpu_hws_native_fence_log_header { + union { + struct { + /* Index of the first free entry in buffer. */ + u32 first_free_entry_idx; + /* Incremented each time NPU wraps around the buffer to write next entry. */ + u32 wraparound_count; + }; + /* Field allowing atomic update of both fields above. */ + u64 atomic_wraparound_and_entry_idx; + }; + /* Log buffer type, see enum vpu_hws_native_fence_log_type. */ + u64 type; + /* Allocated number of entries in the log buffer. */ + u64 entry_nb; + u64 reserved[2]; +}; + +/* Native fence log operation types. */ +enum vpu_hws_native_fence_log_op { + VPU_HWS_NATIVE_FENCE_LOG_OP_SIGNAL_EXECUTED = 0, + VPU_HWS_NATIVE_FENCE_LOG_OP_WAIT_UNBLOCKED = 1 +}; + +/* HWS native fence log entry. */ +struct vpu_hws_native_fence_log_entry { + /* Newly signaled/unblocked fence value. */ + u64 fence_value; + /* Native fence object handle to which this operation belongs. */ + u64 fence_handle; + /* Operation type, see enum vpu_hws_native_fence_log_op. */ + u64 op_type; + u64 reserved_0; + /* + * VPU_HWS_NATIVE_FENCE_LOG_OP_WAIT_UNBLOCKED only: Timestamp at which fence + * wait was started (in NPU SysTime). + */ + u64 fence_wait_start_ts; + u64 reserved_1; + /* Timestamp at which fence operation was completed (in NPU SysTime). */ + u64 fence_end_ts; +}; + +/* Native fence log buffer. */ +struct vpu_hws_native_fence_log_buffer { + struct vpu_hws_native_fence_log_header header; + struct vpu_hws_native_fence_log_entry entry[]; +}; + /* * Host <-> VPU IPC messages types. */ enum vpu_ipc_msg_type { VPU_JSM_MSG_UNKNOWN = 0xFFFFFFFF, + /* IPC Host -> Device, Async commands */ VPU_JSM_MSG_ASYNC_CMD = 0x1100, VPU_JSM_MSG_ENGINE_RESET = VPU_JSM_MSG_ASYNC_CMD, + /** + * Preempt engine. The NPU stops (preempts) all the jobs currently + * executing on the target engine making the engine become idle and ready to + * execute new jobs. + * NOTE: The NPU does not remove unstarted jobs (if any) from job queues of + * the target engine, but it stops processing them (until the queue doorbell + * is rung again); the host is responsible to reset the job queue, either + * after preemption or when resubmitting jobs to the queue. + */ VPU_JSM_MSG_ENGINE_PREEMPT = 0x1101, VPU_JSM_MSG_REGISTER_DB = 0x1102, VPU_JSM_MSG_UNREGISTER_DB = 0x1103, @@ -323,9 +531,10 @@ enum vpu_ipc_msg_type { * NOTE: Please introduce new ASYNC commands before this one. * */ VPU_JSM_MSG_STATE_DUMP = 0x11FF, + /* IPC Host -> Device, General commands */ VPU_JSM_MSG_GENERAL_CMD = 0x1200, - VPU_JSM_MSG_BLOB_DEINIT = VPU_JSM_MSG_GENERAL_CMD, + VPU_JSM_MSG_BLOB_DEINIT_DEPRECATED = VPU_JSM_MSG_GENERAL_CMD, /** * Control dyndbg behavior by executing a dyndbg command; equivalent to * Linux command: `echo '<dyndbg_cmd>' > <debugfs>/dynamic_debug/control`. @@ -335,8 +544,12 @@ enum vpu_ipc_msg_type { * Perform the save procedure for the D0i3 entry */ VPU_JSM_MSG_PWR_D0I3_ENTER = 0x1202, + /* IPC Device -> Host, Job completion */ VPU_JSM_MSG_JOB_DONE = 0x2100, + /* IPC Device -> Host, Fence signalled */ + VPU_JSM_MSG_NATIVE_FENCE_SIGNALLED = 0x2101, + /* IPC Device -> Host, Async command completion */ VPU_JSM_MSG_ASYNC_CMD_DONE = 0x2200, VPU_JSM_MSG_ENGINE_RESET_DONE = VPU_JSM_MSG_ASYNC_CMD_DONE, @@ -422,6 +635,7 @@ enum vpu_ipc_msg_type { * NOTE: Please introduce new ASYNC responses before this one. * */ VPU_JSM_MSG_STATE_DUMP_RSP = 0x22FF, + /* IPC Device -> Host, General command completion */ VPU_JSM_MSG_GENERAL_CMD_DONE = 0x2300, VPU_JSM_MSG_BLOB_DEINIT_DONE = VPU_JSM_MSG_GENERAL_CMD_DONE, @@ -600,11 +814,6 @@ struct vpu_jsm_metric_streamer_update { u64 next_buffer_size; };
-struct vpu_ipc_msg_payload_blob_deinit { - /* 64-bit unique ID for the blob to be de-initialized. */ - u64 blob_id; -}; - struct vpu_ipc_msg_payload_job_done { /* Engine to which the job was submitted. */ u32 engine_idx; @@ -622,6 +831,21 @@ struct vpu_ipc_msg_payload_job_done { u64 cmdq_id; };
+/* + * Notification message upon native fence signalling. + * @see VPU_JSM_MSG_NATIVE_FENCE_SIGNALLED + */ +struct vpu_ipc_msg_payload_native_fence_signalled { + /* Engine ID. */ + u32 engine_idx; + /* Host SSID. */ + u32 host_ssid; + /* CMDQ ID */ + u64 cmdq_id; + /* Fence object handle. */ + u64 fence_handle; +}; + struct vpu_jsm_engine_reset_context { /* Host SSID */ u32 host_ssid; @@ -700,11 +924,6 @@ struct vpu_ipc_msg_payload_get_power_level_count_done { u8 power_limit[16]; };
-struct vpu_ipc_msg_payload_blob_deinit_done { - /* 64-bit unique ID for the blob de-initialized. */ - u64 blob_id; -}; - /* HWS priority band setup request / response */ struct vpu_ipc_msg_payload_hws_priority_band_setup { /* @@ -794,7 +1013,10 @@ struct vpu_ipc_msg_payload_hws_set_context_sched_properties { u32 reserved_0; /* Command queue id */ u64 cmdq_id; - /* Priority band to assign to work of this context */ + /* + * Priority band to assign to work of this context. + * Available priority bands: @see enum vpu_job_scheduling_priority_band + */ u32 priority_band; /* Inside realtime band assigns a further priority */ u32 realtime_priority_level; @@ -869,9 +1091,7 @@ struct vpu_ipc_msg_payload_hws_set_scheduling_log { */ u64 notify_index; /* - * Enable extra events to be output to log for debug of scheduling algorithm. - * Interpreted by VPU as a boolean to enable or disable, expected values are - * 0 and 1. + * Field is now deprecated, will be removed when KMD is updated to support removal */ u32 enable_extra_events; /* Zero Padding */ @@ -1243,10 +1463,10 @@ union vpu_ipc_msg_payload { struct vpu_jsm_metric_streamer_start metric_streamer_start; struct vpu_jsm_metric_streamer_stop metric_streamer_stop; struct vpu_jsm_metric_streamer_update metric_streamer_update; - struct vpu_ipc_msg_payload_blob_deinit blob_deinit; struct vpu_ipc_msg_payload_ssid_release ssid_release; struct vpu_jsm_hws_register_db hws_register_db; struct vpu_ipc_msg_payload_job_done job_done; + struct vpu_ipc_msg_payload_native_fence_signalled native_fence_signalled; struct vpu_ipc_msg_payload_engine_reset_done engine_reset_done; struct vpu_ipc_msg_payload_engine_preempt_done engine_preempt_done; struct vpu_ipc_msg_payload_register_db_done register_db_done; @@ -1254,7 +1474,6 @@ union vpu_ipc_msg_payload { struct vpu_ipc_msg_payload_query_engine_hb_done query_engine_hb_done; struct vpu_ipc_msg_payload_get_power_level_count_done get_power_level_count_done; struct vpu_jsm_metric_streamer_done metric_streamer_done; - struct vpu_ipc_msg_payload_blob_deinit_done blob_deinit_done; struct vpu_ipc_msg_payload_trace_config trace_config; struct vpu_ipc_msg_payload_trace_capability_rsp trace_capability; struct vpu_ipc_msg_payload_trace_get_name trace_get_name;
 
            [ Sasha's backport helper bot ]
Hi,
✅ All tests passed successfully. No issues detected. No action required from the submitter.
The upstream commit SHA1 provided is correct: a4293cc75348409f998c991c48cbe5532c438114
WARNING: Author mismatch between patch and upstream commit: Backport author: Jacek Lawrynowiczjacek.lawrynowicz@linux.intel.com Commit author: Andrzej KacprowskiAndrzej.Kacprowski@intel.com
Note: The patch differs from the upstream commit: --- 1: a4293cc753484 < -: ------------- accel/ivpu: Update VPU FW API headers -: ------------- > 1: aeaee199900ee Linux 6.14.5 ---
Results of testing on various branches:
| Branch | Patch Apply | Build Test | |---------------------------|-------------|------------| | stable/linux-5.4.y | Success | Success |
 
            From: Karol Wachowski karol.wachowski@intel.com
commit 5bbccadaf33eea2b879d8326ad59ae0663be47d1 upstream.
With hardware scheduler it is not expected to receive JOB_DONE notifications from NPU FW for the jobs aborted due to command queue destroy JSM command.
Remove jobs submitted to unregistered command queue from submitted_jobs_xa to avoid triggering a TDR in such case.
Add explicit submitted_jobs_lock that protects access to list of submitted jobs which is now used to find jobs to abort.
Move context abort procedure to separate work queue not to slow down handling of IPCs or DCT requests in case where job abort takes longer, especially when destruction of the last job of a specific context results in context release.
This backport removes all the lines from upstream commit related to the command queue UAPI, as it is not present in the 6.12 kernel and should not be backported.
Cc: stable@vger.kernel.org # v6.12 Signed-off-by: Karol Wachowski karol.wachowski@intel.com Signed-off-by: Maciej Falkowski maciej.falkowski@linux.intel.com Reviewed-by: Jacek Lawrynowicz jacek.lawrynowicz@linux.intel.com Link: https://patchwork.freedesktop.org/patch/msgid/20250107173238.381120-4-maciej... Signed-off-by: Jacek Lawrynowicz jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_drv.c | 32 +++---------- drivers/accel/ivpu/ivpu_drv.h | 2 + drivers/accel/ivpu/ivpu_job.c | 82 +++++++++++++++++++++++++-------- drivers/accel/ivpu/ivpu_job.h | 1 + drivers/accel/ivpu/ivpu_mmu.c | 3 +- drivers/accel/ivpu/ivpu_sysfs.c | 5 +- 6 files changed, 77 insertions(+), 48 deletions(-)
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 2a229903654e0..168d03d5aa1d0 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -36,8 +36,6 @@ __stringify(DRM_IVPU_DRIVER_MINOR) "." #endif
-static struct lock_class_key submitted_jobs_xa_lock_class_key; - int ivpu_dbg_mask; module_param_named(dbg_mask, ivpu_dbg_mask, int, 0644); MODULE_PARM_DESC(dbg_mask, "Driver debug mask. See IVPU_DBG_* macros."); @@ -455,26 +453,6 @@ static const struct drm_driver driver = { .minor = DRM_IVPU_DRIVER_MINOR, };
-static void ivpu_context_abort_invalid(struct ivpu_device *vdev) -{ - struct ivpu_file_priv *file_priv; - unsigned long ctx_id; - - mutex_lock(&vdev->context_list_lock); - - xa_for_each(&vdev->context_xa, ctx_id, file_priv) { - if (!file_priv->has_mmu_faults || file_priv->aborted) - continue; - - mutex_lock(&file_priv->lock); - ivpu_context_abort_locked(file_priv); - file_priv->aborted = true; - mutex_unlock(&file_priv->lock); - } - - mutex_unlock(&vdev->context_list_lock); -} - static irqreturn_t ivpu_irq_thread_handler(int irq, void *arg) { struct ivpu_device *vdev = arg; @@ -488,9 +466,6 @@ static irqreturn_t ivpu_irq_thread_handler(int irq, void *arg) case IVPU_HW_IRQ_SRC_IPC: ivpu_ipc_irq_thread_handler(vdev); break; - case IVPU_HW_IRQ_SRC_MMU_EVTQ: - ivpu_context_abort_invalid(vdev); - break; case IVPU_HW_IRQ_SRC_DCT: ivpu_pm_dct_irq_thread_handler(vdev); break; @@ -607,16 +582,21 @@ static int ivpu_dev_init(struct ivpu_device *vdev) xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1); xa_init_flags(&vdev->db_xa, XA_FLAGS_ALLOC1); - lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key); INIT_LIST_HEAD(&vdev->bo_list);
vdev->db_limit.min = IVPU_MIN_DB; vdev->db_limit.max = IVPU_MAX_DB;
+ INIT_WORK(&vdev->context_abort_work, ivpu_context_abort_thread_handler); + ret = drmm_mutex_init(&vdev->drm, &vdev->context_list_lock); if (ret) goto err_xa_destroy;
+ ret = drmm_mutex_init(&vdev->drm, &vdev->submitted_jobs_lock); + if (ret) + goto err_xa_destroy; + ret = drmm_mutex_init(&vdev->drm, &vdev->bo_list_lock); if (ret) goto err_xa_destroy; diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index 56509c5a3875b..a5707a85e7255 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -137,6 +137,7 @@ struct ivpu_device { struct mutex context_list_lock; /* Protects user context addition/removal */ struct xarray context_xa; struct xa_limit context_xa_limit; + struct work_struct context_abort_work;
struct xarray db_xa; struct xa_limit db_limit; @@ -145,6 +146,7 @@ struct ivpu_device { struct mutex bo_list_lock; /* Protects bo_list */ struct list_head bo_list;
+ struct mutex submitted_jobs_lock; /* Protects submitted_jobs */ struct xarray submitted_jobs_xa; struct ivpu_ipc_consumer job_done_consumer;
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index c2108346c4c9d..8207d1218e207 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -335,6 +335,8 @@ void ivpu_context_abort_locked(struct ivpu_file_priv *file_priv)
if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_OS) ivpu_jsm_context_release(vdev, file_priv->ctx.id); + + file_priv->aborted = true; }
static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job) @@ -467,16 +469,14 @@ static struct ivpu_job *ivpu_job_remove_from_submitted_jobs(struct ivpu_device * { struct ivpu_job *job;
- xa_lock(&vdev->submitted_jobs_xa); - job = __xa_erase(&vdev->submitted_jobs_xa, job_id); + lockdep_assert_held(&vdev->submitted_jobs_lock);
+ job = xa_erase(&vdev->submitted_jobs_xa, job_id); if (xa_empty(&vdev->submitted_jobs_xa) && job) { vdev->busy_time = ktime_add(ktime_sub(ktime_get(), vdev->busy_start_ts), vdev->busy_time); }
- xa_unlock(&vdev->submitted_jobs_xa); - return job; }
@@ -484,6 +484,8 @@ static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 { struct ivpu_job *job;
+ lockdep_assert_held(&vdev->submitted_jobs_lock); + job = ivpu_job_remove_from_submitted_jobs(vdev, job_id); if (!job) return -ENOENT; @@ -501,6 +503,10 @@ static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 ivpu_stop_job_timeout_detection(vdev);
ivpu_rpm_put(vdev); + + if (!xa_empty(&vdev->submitted_jobs_xa)) + ivpu_start_job_timeout_detection(vdev); + return 0; }
@@ -509,8 +515,12 @@ void ivpu_jobs_abort_all(struct ivpu_device *vdev) struct ivpu_job *job; unsigned long id;
+ mutex_lock(&vdev->submitted_jobs_lock); + xa_for_each(&vdev->submitted_jobs_xa, id, job) ivpu_job_signal_and_destroy(vdev, id, DRM_IVPU_JOB_STATUS_ABORTED); + + mutex_unlock(&vdev->submitted_jobs_lock); }
static int ivpu_job_submit(struct ivpu_job *job, u8 priority) @@ -535,15 +545,16 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority) goto err_unlock_file_priv; }
- xa_lock(&vdev->submitted_jobs_xa); + mutex_lock(&vdev->submitted_jobs_lock); + is_first_job = xa_empty(&vdev->submitted_jobs_xa); - ret = __xa_alloc_cyclic(&vdev->submitted_jobs_xa, &job->job_id, job, file_priv->job_limit, - &file_priv->job_id_next, GFP_KERNEL); + ret = xa_alloc_cyclic(&vdev->submitted_jobs_xa, &job->job_id, job, file_priv->job_limit, + &file_priv->job_id_next, GFP_KERNEL); if (ret < 0) { ivpu_dbg(vdev, JOB, "Too many active jobs in ctx %d\n", file_priv->ctx.id); ret = -EBUSY; - goto err_unlock_submitted_jobs_xa; + goto err_unlock_submitted_jobs; }
ret = ivpu_cmdq_push_job(cmdq, job); @@ -565,19 +576,21 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority) job->job_id, file_priv->ctx.id, job->engine_idx, priority, job->cmd_buf_vpu_addr, cmdq->jobq->header.tail);
- xa_unlock(&vdev->submitted_jobs_xa); - + mutex_unlock(&vdev->submitted_jobs_lock); mutex_unlock(&file_priv->lock);
- if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW)) + if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW)) { + mutex_lock(&vdev->submitted_jobs_lock); ivpu_job_signal_and_destroy(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS); + mutex_unlock(&vdev->submitted_jobs_lock); + }
return 0;
err_erase_xa: - __xa_erase(&vdev->submitted_jobs_xa, job->job_id); -err_unlock_submitted_jobs_xa: - xa_unlock(&vdev->submitted_jobs_xa); + xa_erase(&vdev->submitted_jobs_xa, job->job_id); +err_unlock_submitted_jobs: + mutex_unlock(&vdev->submitted_jobs_lock); err_unlock_file_priv: mutex_unlock(&file_priv->lock); ivpu_rpm_put(vdev); @@ -748,7 +761,6 @@ ivpu_job_done_callback(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr, struct vpu_jsm_msg *jsm_msg) { struct vpu_ipc_msg_payload_job_done *payload; - int ret;
if (!jsm_msg) { ivpu_err(vdev, "IPC message has no JSM payload\n"); @@ -761,9 +773,10 @@ ivpu_job_done_callback(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr, }
payload = (struct vpu_ipc_msg_payload_job_done *)&jsm_msg->payload; - ret = ivpu_job_signal_and_destroy(vdev, payload->job_id, payload->job_status); - if (!ret && !xa_empty(&vdev->submitted_jobs_xa)) - ivpu_start_job_timeout_detection(vdev); + + mutex_lock(&vdev->submitted_jobs_lock); + ivpu_job_signal_and_destroy(vdev, payload->job_id, payload->job_status); + mutex_unlock(&vdev->submitted_jobs_lock); }
void ivpu_job_done_consumer_init(struct ivpu_device *vdev) @@ -776,3 +789,36 @@ void ivpu_job_done_consumer_fini(struct ivpu_device *vdev) { ivpu_ipc_consumer_del(vdev, &vdev->job_done_consumer); } + +void ivpu_context_abort_thread_handler(struct work_struct *work) +{ + struct ivpu_device *vdev = container_of(work, struct ivpu_device, context_abort_work); + struct ivpu_file_priv *file_priv; + unsigned long ctx_id; + struct ivpu_job *job; + unsigned long id; + + mutex_lock(&vdev->context_list_lock); + xa_for_each(&vdev->context_xa, ctx_id, file_priv) { + if (!file_priv->has_mmu_faults || file_priv->aborted) + continue; + + mutex_lock(&file_priv->lock); + ivpu_context_abort_locked(file_priv); + mutex_unlock(&file_priv->lock); + } + mutex_unlock(&vdev->context_list_lock); + + if (vdev->fw->sched_mode != VPU_SCHEDULING_MODE_HW) + return; + /* + * In hardware scheduling mode NPU already has stopped processing jobs + * and won't send us any further notifications, thus we have to free job related resources + * and notify userspace + */ + mutex_lock(&vdev->submitted_jobs_lock); + xa_for_each(&vdev->submitted_jobs_xa, id, job) + if (job->file_priv->aborted) + ivpu_job_signal_and_destroy(vdev, job->job_id, DRM_IVPU_JOB_STATUS_ABORTED); + mutex_unlock(&vdev->submitted_jobs_lock); +} diff --git a/drivers/accel/ivpu/ivpu_job.h b/drivers/accel/ivpu/ivpu_job.h index 6accb94028c7a..0ae77f0638fad 100644 --- a/drivers/accel/ivpu/ivpu_job.h +++ b/drivers/accel/ivpu/ivpu_job.h @@ -64,6 +64,7 @@ void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev);
void ivpu_job_done_consumer_init(struct ivpu_device *vdev); void ivpu_job_done_consumer_fini(struct ivpu_device *vdev); +void ivpu_context_abort_thread_handler(struct work_struct *work);
void ivpu_jobs_abort_all(struct ivpu_device *vdev);
diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c index c078e214b2212..fb15eb75b5ba9 100644 --- a/drivers/accel/ivpu/ivpu_mmu.c +++ b/drivers/accel/ivpu/ivpu_mmu.c @@ -917,8 +917,7 @@ void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev) REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, vdev->mmu->evtq.cons); }
- if (!kfifo_put(&vdev->hw->irq.fifo, IVPU_HW_IRQ_SRC_MMU_EVTQ)) - ivpu_err_ratelimited(vdev, "IRQ FIFO full\n"); + queue_work(system_wq, &vdev->context_abort_work); }
void ivpu_mmu_evtq_dump(struct ivpu_device *vdev) diff --git a/drivers/accel/ivpu/ivpu_sysfs.c b/drivers/accel/ivpu/ivpu_sysfs.c index 616477fc17fa0..8a616791c32f5 100644 --- a/drivers/accel/ivpu/ivpu_sysfs.c +++ b/drivers/accel/ivpu/ivpu_sysfs.c @@ -30,11 +30,12 @@ npu_busy_time_us_show(struct device *dev, struct device_attribute *attr, char *b struct ivpu_device *vdev = to_ivpu_device(drm); ktime_t total, now = 0;
- xa_lock(&vdev->submitted_jobs_xa); + mutex_lock(&vdev->submitted_jobs_lock); + total = vdev->busy_time; if (!xa_empty(&vdev->submitted_jobs_xa)) now = ktime_sub(ktime_get(), vdev->busy_start_ts); - xa_unlock(&vdev->submitted_jobs_xa); + mutex_unlock(&vdev->submitted_jobs_lock);
return sysfs_emit(buf, "%lld\n", ktime_to_us(ktime_add(total, now))); }
 
            [ Sasha's backport helper bot ]
Hi,
✅ All tests passed successfully. No issues detected. No action required from the submitter.
The upstream commit SHA1 provided is correct: 5bbccadaf33eea2b879d8326ad59ae0663be47d1
WARNING: Author mismatch between patch and upstream commit: Backport author: Jacek Lawrynowiczjacek.lawrynowicz@linux.intel.com Commit author: Karol Wachowskikarol.wachowski@intel.com
Note: The patch differs from the upstream commit: --- 1: 5bbccadaf33ee < -: ------------- accel/ivpu: Abort all jobs after command queue unregister -: ------------- > 1: aeaee199900ee Linux 6.14.5 ---
Results of testing on various branches:
| Branch | Patch Apply | Build Test | |---------------------------|-------------|------------| | stable/linux-5.4.y | Success | Success |
 
            From: Karol Wachowski karol.wachowski@intel.com
commit ab680dc6c78aa035e944ecc8c48a1caab9f39924 upstream.
Fix deadlock in job submission and abort handling. When a thread aborts currently executing jobs due to a fault, it first locks the global lock protecting submitted_jobs (#1).
After the last job is destroyed, it proceeds to release the related context and locks file_priv (#2). Meanwhile, in the job submission thread, the file_priv lock (#2) is taken first, and then the submitted_jobs lock (#1) is obtained when a job is added to the submitted jobs list.
CPU0 CPU1 ---- ---- (for example due to a fault) (jobs submissions keep coming)
lock(&vdev->submitted_jobs_lock) #1 ivpu_jobs_abort_all() job_destroy() lock(&file_priv->lock) #2 lock(&vdev->submitted_jobs_lock) #1 file_priv_release() lock(&vdev->context_list_lock) lock(&file_priv->lock) #2
This order of locking causes a deadlock. To resolve this issue, change the order of locking in ivpu_job_submit().
This backport required small adjustments to ivpu_job_submit(), which lacks support for explicit command queue creation added in 6.15.
Cc: stable@vger.kernel.org # v6.12 Signed-off-by: Karol Wachowski karol.wachowski@intel.com Signed-off-by: Maciej Falkowski maciej.falkowski@linux.intel.com Reviewed-by: Jacek Lawrynowicz jacek.lawrynowicz@linux.intel.com Link: https://patchwork.freedesktop.org/patch/msgid/20250107173238.381120-12-macie... Signed-off-by: Jacek Lawrynowicz jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_job.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-)
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index 8207d1218e207..a8e3eca14989c 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -535,6 +535,7 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority) if (ret < 0) return ret;
+ mutex_lock(&vdev->submitted_jobs_lock); mutex_lock(&file_priv->lock);
cmdq = ivpu_cmdq_acquire(file_priv, job->engine_idx, priority); @@ -542,11 +543,9 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority) ivpu_warn_ratelimited(vdev, "Failed to get job queue, ctx %d engine %d prio %d\n", file_priv->ctx.id, job->engine_idx, priority); ret = -EINVAL; - goto err_unlock_file_priv; + goto err_unlock; }
- mutex_lock(&vdev->submitted_jobs_lock); - is_first_job = xa_empty(&vdev->submitted_jobs_xa); ret = xa_alloc_cyclic(&vdev->submitted_jobs_xa, &job->job_id, job, file_priv->job_limit, &file_priv->job_id_next, GFP_KERNEL); @@ -554,7 +553,7 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority) ivpu_dbg(vdev, JOB, "Too many active jobs in ctx %d\n", file_priv->ctx.id); ret = -EBUSY; - goto err_unlock_submitted_jobs; + goto err_unlock; }
ret = ivpu_cmdq_push_job(cmdq, job); @@ -576,22 +575,20 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority) job->job_id, file_priv->ctx.id, job->engine_idx, priority, job->cmd_buf_vpu_addr, cmdq->jobq->header.tail);
- mutex_unlock(&vdev->submitted_jobs_lock); mutex_unlock(&file_priv->lock);
if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW)) { - mutex_lock(&vdev->submitted_jobs_lock); ivpu_job_signal_and_destroy(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS); - mutex_unlock(&vdev->submitted_jobs_lock); }
+ mutex_unlock(&vdev->submitted_jobs_lock); + return 0;
err_erase_xa: xa_erase(&vdev->submitted_jobs_xa, job->job_id); -err_unlock_submitted_jobs: +err_unlock: mutex_unlock(&vdev->submitted_jobs_lock); -err_unlock_file_priv: mutex_unlock(&file_priv->lock); ivpu_rpm_put(vdev); return ret;
 
            [ Sasha's backport helper bot ]
Hi,
✅ All tests passed successfully. No issues detected. No action required from the submitter.
The upstream commit SHA1 provided is correct: ab680dc6c78aa035e944ecc8c48a1caab9f39924
WARNING: Author mismatch between patch and upstream commit: Backport author: Jacek Lawrynowiczjacek.lawrynowicz@linux.intel.com Commit author: Karol Wachowskikarol.wachowski@intel.com
Note: The patch differs from the upstream commit: --- 1: ab680dc6c78aa < -: ------------- accel/ivpu: Fix locking order in ivpu_job_submit -: ------------- > 1: aeaee199900ee Linux 6.14.5 ---
Results of testing on various branches:
| Branch | Patch Apply | Build Test | |---------------------------|-------------|------------| | stable/linux-5.4.y | Success | Success |
 
            From: Karol Wachowski karol.wachowski@intel.com
commit dad945c27a42dfadddff1049cf5ae417209a8996 upstream.
Mark as invalid context of a job that returned HW context violation error and queue work that aborts jobs from faulty context. Add engine reset to the context abort thread handler to not only abort currently executing jobs but also to ensure NPU invalid state recovery.
Cc: stable@vger.kernel.org # v6.12 Signed-off-by: Karol Wachowski karol.wachowski@intel.com Signed-off-by: Maciej Falkowski maciej.falkowski@linux.intel.com Reviewed-by: Jacek Lawrynowicz jacek.lawrynowicz@linux.intel.com Link: https://patchwork.freedesktop.org/patch/msgid/20250107173238.381120-13-macie... Signed-off-by: Jacek Lawrynowicz jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_job.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+)
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index a8e3eca14989c..27121c66e48f8 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -486,6 +486,26 @@ static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32
lockdep_assert_held(&vdev->submitted_jobs_lock);
+ job = xa_load(&vdev->submitted_jobs_xa, job_id); + if (!job) + return -ENOENT; + + if (job_status == VPU_JSM_STATUS_MVNCI_CONTEXT_VIOLATION_HW) { + guard(mutex)(&job->file_priv->lock); + + if (job->file_priv->has_mmu_faults) + return 0; + + /* + * Mark context as faulty and defer destruction of the job to jobs abort thread + * handler to synchronize between both faults and jobs returning context violation + * status and ensure both are handled in the same way + */ + job->file_priv->has_mmu_faults = true; + queue_work(system_wq, &vdev->context_abort_work); + return 0; + } + job = ivpu_job_remove_from_submitted_jobs(vdev, job_id); if (!job) return -ENOENT; @@ -795,6 +815,9 @@ void ivpu_context_abort_thread_handler(struct work_struct *work) struct ivpu_job *job; unsigned long id;
+ if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) + ivpu_jsm_reset_engine(vdev, 0); + mutex_lock(&vdev->context_list_lock); xa_for_each(&vdev->context_xa, ctx_id, file_priv) { if (!file_priv->has_mmu_faults || file_priv->aborted) @@ -808,6 +831,8 @@ void ivpu_context_abort_thread_handler(struct work_struct *work)
if (vdev->fw->sched_mode != VPU_SCHEDULING_MODE_HW) return; + + ivpu_jsm_hws_resume_engine(vdev, 0); /* * In hardware scheduling mode NPU already has stopped processing jobs * and won't send us any further notifications, thus we have to free job related resources
 
            [ Sasha's backport helper bot ]
Hi,
Summary of potential issues: ℹ️ This is part 7/7 of a series ❌ Build failures detected
The upstream commit SHA1 provided is correct: dad945c27a42dfadddff1049cf5ae417209a8996
WARNING: Author mismatch between patch and upstream commit: Backport author: Jacek Lawrynowiczjacek.lawrynowicz@linux.intel.com Commit author: Karol Wachowskikarol.wachowski@intel.com
Note: The patch differs from the upstream commit: --- 1: dad945c27a42d < -: ------------- accel/ivpu: Add handling of VPU_JSM_STATUS_MVNCI_CONTEXT_VIOLATION_HW -: ------------- > 1: aeaee199900ee Linux 6.14.5 ---
NOTE: These results are for this patch alone. Full series testing will be performed when all parts are received.
Results of testing on various branches:
| Branch | Patch Apply | Build Test | |---------------------------|-------------|------------| | stable/linux-5.4.y | Success | Failed |
Build Errors: Build error: ssh: connect to host 192.168.1.58 port 22: Connection refused
linux-stable-mirror@lists.linaro.org

