[Linaro-mm-sig] Re: [PATCH 8/9] accel/rocket: Add job submission IOCTL

14 Jun 2024

On 6/12/2024 7:53 AM, Tomeu Vizoso wrote:
...
Using the DRM GPU scheduler infrastructure, with a scheduler for each
core.
Userspace can decide for a series of tasks to be executed sequentially
in the same core, so SRAM locality can be taken advantage of.
The job submission code was intially based on Panfrost.
intially -> initially
...
Signed-off-by: Tomeu Vizoso tomeu@tomeuvizoso.net
drivers/accel/rocket/Makefile        |   3 +-
  drivers/accel/rocket/rocket_core.c   |   6 +
  drivers/accel/rocket/rocket_core.h   |  16 +
  drivers/accel/rocket/rocket_device.c |   2 +
  drivers/accel/rocket/rocket_device.h |   2 +
  drivers/accel/rocket/rocket_drv.c    |  15 +
  drivers/accel/rocket/rocket_drv.h    |   3 +
  drivers/accel/rocket/rocket_job.c    | 708 +++++++++++++++++++++++++++++++++++
  drivers/accel/rocket/rocket_job.h    |  49 +++
  include/uapi/drm/rocket_accel.h      |  55 +++
  10 files changed, 858 insertions(+), 1 deletion(-)

diff --git a/drivers/accel/rocket/Makefile b/drivers/accel/rocket/Makefile
index 875cac2243d9..4d59036af8d9 100644
--- a/drivers/accel/rocket/Makefile
+++ b/drivers/accel/rocket/Makefile
@@ -6,4 +6,5 @@ rocket-y := \
   rocket_core.o \
   rocket_device.o \
   rocket_drv.o \

rocket_gem.o


rocket_gem.o \
rocket_job.o

diff --git a/drivers/accel/rocket/rocket_core.c b/drivers/accel/rocket/rocket_core.c
index d6680b00fb2f..2b2d8be38f0a 100644
--- a/drivers/accel/rocket/rocket_core.c
+++ b/drivers/accel/rocket/rocket_core.c
@@ -11,6 +11,7 @@
  
  #include "rocket_core.h"
  #include "rocket_device.h"
+#include "rocket_job.h"
  #include "rocket_registers.h"
  
  static int rocket_clk_init(struct rocket_core *core)
@@ -122,6 +123,10 @@ int rocket_core_init(struct rocket_core *core)
   	goto out_pm_domain;
   }

err = rocket_job_init(core);
if (err)
goto out_pm_domain;


version = rocket_read(core, REG_PC_VERSION) + (rocket_read(core, REG_PC_VERSION_NUM) & 0xffff);
 dev_info(rdev->dev, "Rockchip NPU core %d version: %d\n", core->index, version);

@@ -134,6 +139,7 @@ int rocket_core_init(struct rocket_core *core)
  
  void rocket_core_fini(struct rocket_core *core)
  {

rocket_job_fini(core);
rocket_pmdomain_fini(core);
}

diff --git a/drivers/accel/rocket/rocket_core.h b/drivers/accel/rocket/rocket_core.h
index e5d4c848c9f4..e6401960a9b2 100644
--- a/drivers/accel/rocket/rocket_core.h
+++ b/drivers/accel/rocket/rocket_core.h
@@ -8,6 +8,8 @@
  #include <asm/io.h>
  #include <asm-generic/io.h>
  
+#include <drm/gpu_scheduler.h>
What about includes for workqueue or atomic?
...
+static struct dma_fence *rocket_job_run(struct drm_sched_job *sched_job)
+{

struct rocket_job *job = to_rocket_job(sched_job);
struct rocket_device *rdev = job->rdev;
struct rocket_core *core = sched_to_core(rdev, sched_job->sched);
struct dma_fence *fence = NULL;
int ret;

if (unlikely(job->base.s_fence->finished.error))
return NULL;



/* Nothing to execute: can happen if the job has finished while
* we were resetting the GPU.


*/



Not the correct comment style
...

if (job->next_task_idx == job->task_count)
return NULL;



fence = rocket_fence_create(core);
if (IS_ERR(fence))
return fence;



if (job->done_fence)
dma_fence_put(job->done_fence);


job->done_fence = dma_fence_get(fence);

ret = pm_runtime_get_sync(rdev->dev);
if (ret < 0)
return fence;



spin_lock(&core->job_lock);

core->in_flight_job = job;
rocket_job_hw_submit(core, job);

spin_unlock(&core->job_lock);

return fence;

+}



+static void rocket_job_handle_done(struct rocket_core *core,

		   struct rocket_job *job)



+{

if (job->next_task_idx < job->task_count) {
rocket_job_hw_submit(core, job);


return;


}

core->in_flight_job = NULL;
dma_fence_signal_locked(job->done_fence);
pm_runtime_put_autosuspend(core->dev->dev);

+}



+static void rocket_job_handle_irq(struct rocket_core *core)
+{

uint32_t status, raw_status;

pm_runtime_mark_last_busy(core->dev->dev);

status = rocket_read(core, REG_PC_INTERRUPT_STATUS);
raw_status = rocket_read(core, REG_PC_INTERRUPT_RAW_STATUS);

rocket_write(core, REG_PC_OPERATION_ENABLE, 0x0);
rocket_write(core, REG_PC_INTERRUPT_CLEAR, 0x1ffff);

spin_lock(&core->job_lock);

if (core->in_flight_job)
rocket_job_handle_done(core, core->in_flight_job);



spin_unlock(&core->job_lock);

+}



+static void
+rocket_reset(struct rocket_core *core, struct drm_sched_job *bad)
+{

struct rocket_device *rdev = core->dev;
bool cookie;

if (!atomic_read(&core->reset.pending))
return;



/* Stop the scheduler.

Not the correct comment style
...

*


* FIXME: We temporarily get out of the dma_fence_signalling section


* because the cleanup path generate lockdep splats when taking locks


* to release job resources. We should rework the code to follow this


* pattern:


*


*	try_lock


*	if (locked)


*		release


*	else


*		schedule_work_to_release_later


*/


drm_sched_stop(&core->sched, bad);

cookie = dma_fence_begin_signalling();

if (bad)
drm_sched_increase_karma(bad);



/* Mask job interrupts and synchronize to make sure we won't be
* interrupted during our reset.


*/



Not the correct comment style, again.  This is the last time I'm going 
to mention it.
...
diff --git a/drivers/accel/rocket/rocket_job.h b/drivers/accel/rocket/rocket_job.h
new file mode 100644
index 000000000000..0c3c90e47d39
--- /dev/null
+++ b/drivers/accel/rocket/rocket_job.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright 2024 Tomeu Vizoso tomeu@tomeuvizoso.net */



+#ifndef __ROCKET_JOB_H__
+#define __ROCKET_JOB_H__



+#include <drm/gpu_scheduler.h>
+#include <drm/drm_drv.h>
Alphabetical order
...
diff --git a/include/uapi/drm/rocket_accel.h b/include/uapi/drm/rocket_accel.h
index 8338726a83c3..888c9413e4cd 100644
--- a/include/uapi/drm/rocket_accel.h
+++ b/include/uapi/drm/rocket_accel.h
@@ -12,8 +12,10 @@ extern "C" {
  #endif
  
  #define DRM_ROCKET_CREATE_BO			0x00
+#define DRM_ROCKET_SUBMIT			0x01
  
  #define DRM_IOCTL_ROCKET_CREATE_BO		DRM_IOWR(DRM_COMMAND_BASE + DRM_ROCKET_CREATE_BO, struct drm_rocket_create_bo)
+#define DRM_IOCTL_ROCKET_SUBMIT			DRM_IOW(DRM_COMMAND_BASE + DRM_ROCKET_SUBMIT, struct drm_rocket_submit)
  
  /**

struct drm_rocket_create_bo - ioctl argument for creating Rocket BOs.

@@ -36,6 +38,59 @@ struct drm_rocket_create_bo {
   __u64 offset;
  };
  
+/**


struct drm_rocket_task - A task to be run on the NPU







A task is the smallest unit of work that can be run on the NPU.


*/

+struct drm_rocket_task {

  /** DMA address to NPU mapping of register command buffer */


  __u64 regcmd;



  /** Number of commands in the register command buffer */


  __u32 regcmd_count;



+};



+/**


struct drm_rocket_job - A job to be run on the NPU







The kernel will schedule the execution of this job taking into account its



dependencies with other jobs. All tasks in the same job will be executed



sequentially on the same core, to benefit from memory residency in SRAM.


*/

+struct drm_rocket_job {

  /** Pointer to an array of struct drm_rocket_task. */


  __u64 tasks;



  /** Number of tasks passed in. */


  __u32 task_count;



  /** Pointer to a u32 array of the BOs that are read by the job. */


  __u64 in_bo_handles;



  /** Number of input BO handles passed in (size is that times 4). */


  __u32 in_bo_handle_count;



  /** Pointer to a u32 array of the BOs that are written to by the job. */


  __u64 out_bo_handles;



  /** Number of output BO handles passed in (size is that times 4). */


  __u32 out_bo_handle_count;



+};
I feels like the mixing of 32-bit and 64-bit fields violates the 
guidelines on defining ioctls due to implicit padding that might or 
might not be present.
...



+/**


struct drm_rocket_submit - ioctl argument for submitting commands to the NPU.







The kernel will schedule the execution of these jobs in dependency order.


*/

+struct drm_rocket_submit {

  /** Pointer to an array of struct drm_rocket_job. */


  __u64 jobs;



  /** Number of jobs passed in. */


  __u32 job_count;



+};

#if defined(__cplusplus)
}
#endif


    

2026

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

[Linaro-mm-sig] Re: [PATCH 8/9] accel/rocket: Add job submission IOCTL

Signed-off-by: Tomeu Vizoso tomeu@tomeuvizoso.net