We were using the same force-poweron bit in the two codepaths, so they could race to have one of them lose GPU power early.
Signed-off-by: Eric Anholt eric@anholt.net Cc: stable@vger.kernel.org # v5.9 --- drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 25 ++++++++++++++++++++++--- drivers/gpu/drm/msm/adreno/a6xx_gmu.h | 8 ++++++++ drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 4 ++-- 3 files changed, 32 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index 78836b4fb98e..378dc7f190c3 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -264,6 +264,16 @@ int _a6xx_gmu_set_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state, char } name = "GPU_SET"; break; + case GMU_OOB_PERFCOUNTER_SET: + if (gmu->legacy) { + request = GMU_OOB_PERFCOUNTER_REQUEST; + ack = GMU_OOB_PERFCOUNTER_ACK; + } else { + request = GMU_OOB_PERFCOUNTER_REQUEST_NEW; + ack = GMU_OOB_PERFCOUNTER_ACK_NEW; + } + name = "PERFCOUNTER"; + break; case GMU_OOB_BOOT_SLUMBER: request = GMU_OOB_BOOT_SLUMBER_REQUEST; ack = GMU_OOB_BOOT_SLUMBER_ACK; @@ -302,9 +312,14 @@ int _a6xx_gmu_set_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state, char void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state) { if (!gmu->legacy) { - WARN_ON(state != GMU_OOB_GPU_SET); - gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET, - 1 << GMU_OOB_GPU_SET_CLEAR_NEW); + if (state == GMU_OOB_GPU_SET) { + gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET, + 1 << GMU_OOB_GPU_SET_CLEAR_NEW); + } else { + WARN_ON(state != GMU_OOB_PERFCOUNTER_SET); + gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET, + 1 << GMU_OOB_PERFCOUNTER_CLEAR_NEW); + } return; }
@@ -313,6 +328,10 @@ void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state) gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET, 1 << GMU_OOB_GPU_SET_CLEAR); break; + case GMU_OOB_PERFCOUNTER_SET: + gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET, + 1 << GMU_OOB_PERFCOUNTER_CLEAR); + break; case GMU_OOB_BOOT_SLUMBER: gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET, 1 << GMU_OOB_BOOT_SLUMBER_CLEAR); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h index c6d2bced8e5d..9fa278de2106 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h @@ -156,6 +156,7 @@ enum a6xx_gmu_oob_state { GMU_OOB_BOOT_SLUMBER = 0, GMU_OOB_GPU_SET, GMU_OOB_DCVS_SET, + GMU_OOB_PERFCOUNTER_SET, };
/* These are the interrupt / ack bits for each OOB request that are set @@ -190,6 +191,13 @@ enum a6xx_gmu_oob_state { #define GMU_OOB_GPU_SET_ACK_NEW 31 #define GMU_OOB_GPU_SET_CLEAR_NEW 31
+#define GMU_OOB_PERFCOUNTER_REQUEST 17 +#define GMU_OOB_PERFCOUNTER_ACK 25 +#define GMU_OOB_PERFCOUNTER_CLEAR 25 + +#define GMU_OOB_PERFCOUNTER_REQUEST_NEW 28 +#define GMU_OOB_PERFCOUNTER_ACK_NEW 30 +#define GMU_OOB_PERFCOUNTER_CLEAR_NEW 30
void a6xx_hfi_init(struct a6xx_gmu *gmu); int a6xx_hfi_start(struct a6xx_gmu *gmu, int boot_state); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index c8a9010c1a1d..7424a70b9d35 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1177,12 +1177,12 @@ static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
/* Force the GPU power on so we can read this register */ - a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); + a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
*value = gpu_read64(gpu, REG_A6XX_RBBM_PERFCTR_CP_0_LO, REG_A6XX_RBBM_PERFCTR_CP_0_HI);
- a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); + a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); return 0; }
Now that we're not racing with GPU setup, also fix races of timestamps against other timestamps. In CI, we were seeing this path trigger timeouts on setting the GMU bit, especially on the first set of tests right after boot (it's probably easier to lose the race than one might think, given that we start many tests in parallel, and waiting for NFS to page in code probably means that lots of tests hit the same point of screen init at the same time).
Signed-off-by: Eric Anholt eric@anholt.net Cc: stable@vger.kernel.org # v5.9 --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 7424a70b9d35..e8f0b5325a7f 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1175,6 +1175,9 @@ static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + static DEFINE_MUTEX(perfcounter_oob); + + mutex_lock(&perfcounter_oob);
/* Force the GPU power on so we can read this register */ a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); @@ -1183,6 +1186,7 @@ static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) REG_A6XX_RBBM_PERFCTR_CP_0_HI);
a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); + mutex_unlock(&perfcounter_oob); return 0; }
On Wed, Jan 27, 2021 at 3:39 PM Eric Anholt eric@anholt.net wrote:
Now that we're not racing with GPU setup, also fix races of timestamps against other timestamps. In CI, we were seeing this path trigger timeouts on setting the GMU bit, especially on the first set of tests right after boot (it's probably easier to lose the race than one might think, given that we start many tests in parallel, and waiting for NFS to page in code probably means that lots of tests hit the same point of screen init at the same time).
Could you add the error msg to the commit msg, to make it more easily searchable?
BR, -R
Signed-off-by: Eric Anholt eric@anholt.net Cc: stable@vger.kernel.org # v5.9
drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 7424a70b9d35..e8f0b5325a7f 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1175,6 +1175,9 @@ static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
static DEFINE_MUTEX(perfcounter_oob);
mutex_lock(&perfcounter_oob); /* Force the GPU power on so we can read this register */ a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
@@ -1183,6 +1186,7 @@ static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) REG_A6XX_RBBM_PERFCTR_CP_0_HI);
a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
mutex_unlock(&perfcounter_oob); return 0;
}
-- 2.30.0
linux-stable-mirror@lists.linaro.org