The patch below does not apply to the 6.0-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
20293269d817 ("drm/amdgpu: Remove ATC L2 access for MMHUB 2.1.x")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 20293269d81779a0d0c0865f5877b240c3335c97 Mon Sep 17 00:00:00 2001
From: Lijo Lazar <lijo.lazar(a)amd.com>
Date: Fri, 30 Sep 2022 10:43:08 +0530
Subject: [PATCH] drm/amdgpu: Remove ATC L2 access for MMHUB 2.1.x
MMHUB 2.1.x versions don't have ATCL2. Remove accesses to ATCL2 registers.
Since they are non-existing registers, read access will cause a
'Completer Abort' and gets reported when AER is enabled with the below patch.
Tagging with the patch so that this is backported along with it.
v2: squash in uninitialized warning fix (Nathan Chancellor)
Fixes: 8795e182b02d ("PCI/portdrv: Don't disable AER reporting in get_port_device_capability()")
Signed-off-by: Lijo Lazar <lijo.lazar(a)amd.com>
Reviewed-by: Guchun Chen <guchun.chen(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
index 4d304f22889e..998b5d17b271 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
@@ -32,8 +32,6 @@
#include "gc/gc_10_1_0_offset.h"
#include "soc15_common.h"
-#define mmMM_ATC_L2_MISC_CG_Sienna_Cichlid 0x064d
-#define mmMM_ATC_L2_MISC_CG_Sienna_Cichlid_BASE_IDX 0
#define mmDAGB0_CNTL_MISC2_Sienna_Cichlid 0x0070
#define mmDAGB0_CNTL_MISC2_Sienna_Cichlid_BASE_IDX 0
@@ -574,7 +572,6 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad
case IP_VERSION(2, 1, 0):
case IP_VERSION(2, 1, 1):
case IP_VERSION(2, 1, 2):
- def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid);
def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid);
break;
default:
@@ -608,8 +605,6 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad
case IP_VERSION(2, 1, 0):
case IP_VERSION(2, 1, 1):
case IP_VERSION(2, 1, 2):
- if (def != data)
- WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid, data);
if (def1 != data1)
WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid, data1);
break;
@@ -634,8 +629,8 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade
case IP_VERSION(2, 1, 0):
case IP_VERSION(2, 1, 1):
case IP_VERSION(2, 1, 2):
- def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid);
- break;
+ /* There is no ATCL2 in MMHUB for 2.1.x */
+ return;
default:
def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG);
break;
@@ -646,18 +641,8 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade
else
data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
- if (def != data) {
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
- case IP_VERSION(2, 1, 0):
- case IP_VERSION(2, 1, 1):
- case IP_VERSION(2, 1, 2):
- WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid, data);
- break;
- default:
- WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data);
- break;
- }
- }
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data);
}
static int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev,
@@ -695,7 +680,10 @@ static void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
case IP_VERSION(2, 1, 0):
case IP_VERSION(2, 1, 1):
case IP_VERSION(2, 1, 2):
- data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid);
+ /* There is no ATCL2 in MMHUB for 2.1.x. Keep the status
+ * based on DAGB
+ */
+ data = MM_ATC_L2_MISC_CG__ENABLE_MASK;
data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid);
break;
default:
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
20293269d817 ("drm/amdgpu: Remove ATC L2 access for MMHUB 2.1.x")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 20293269d81779a0d0c0865f5877b240c3335c97 Mon Sep 17 00:00:00 2001
From: Lijo Lazar <lijo.lazar(a)amd.com>
Date: Fri, 30 Sep 2022 10:43:08 +0530
Subject: [PATCH] drm/amdgpu: Remove ATC L2 access for MMHUB 2.1.x
MMHUB 2.1.x versions don't have ATCL2. Remove accesses to ATCL2 registers.
Since they are non-existing registers, read access will cause a
'Completer Abort' and gets reported when AER is enabled with the below patch.
Tagging with the patch so that this is backported along with it.
v2: squash in uninitialized warning fix (Nathan Chancellor)
Fixes: 8795e182b02d ("PCI/portdrv: Don't disable AER reporting in get_port_device_capability()")
Signed-off-by: Lijo Lazar <lijo.lazar(a)amd.com>
Reviewed-by: Guchun Chen <guchun.chen(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
index 4d304f22889e..998b5d17b271 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
@@ -32,8 +32,6 @@
#include "gc/gc_10_1_0_offset.h"
#include "soc15_common.h"
-#define mmMM_ATC_L2_MISC_CG_Sienna_Cichlid 0x064d
-#define mmMM_ATC_L2_MISC_CG_Sienna_Cichlid_BASE_IDX 0
#define mmDAGB0_CNTL_MISC2_Sienna_Cichlid 0x0070
#define mmDAGB0_CNTL_MISC2_Sienna_Cichlid_BASE_IDX 0
@@ -574,7 +572,6 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad
case IP_VERSION(2, 1, 0):
case IP_VERSION(2, 1, 1):
case IP_VERSION(2, 1, 2):
- def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid);
def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid);
break;
default:
@@ -608,8 +605,6 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad
case IP_VERSION(2, 1, 0):
case IP_VERSION(2, 1, 1):
case IP_VERSION(2, 1, 2):
- if (def != data)
- WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid, data);
if (def1 != data1)
WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid, data1);
break;
@@ -634,8 +629,8 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade
case IP_VERSION(2, 1, 0):
case IP_VERSION(2, 1, 1):
case IP_VERSION(2, 1, 2):
- def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid);
- break;
+ /* There is no ATCL2 in MMHUB for 2.1.x */
+ return;
default:
def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG);
break;
@@ -646,18 +641,8 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade
else
data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
- if (def != data) {
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
- case IP_VERSION(2, 1, 0):
- case IP_VERSION(2, 1, 1):
- case IP_VERSION(2, 1, 2):
- WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid, data);
- break;
- default:
- WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data);
- break;
- }
- }
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data);
}
static int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev,
@@ -695,7 +680,10 @@ static void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
case IP_VERSION(2, 1, 0):
case IP_VERSION(2, 1, 1):
case IP_VERSION(2, 1, 2):
- data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid);
+ /* There is no ATCL2 in MMHUB for 2.1.x. Keep the status
+ * based on DAGB
+ */
+ data = MM_ATC_L2_MISC_CG__ENABLE_MASK;
data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid);
break;
default:
The patch below does not apply to the 6.0-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
027bf0cee89a ("drm/amd/pm: add SMU IP v13.0.4 IF version define to V7")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 027bf0cee89a27325a9a4f2240c21dd5fb81e4fa Mon Sep 17 00:00:00 2001
From: Tim Huang <tim.huang(a)amd.com>
Date: Thu, 29 Sep 2022 15:06:47 +0800
Subject: [PATCH] drm/amd/pm: add SMU IP v13.0.4 IF version define to V7
The pmfw has changed the driver interface version, so keep same with the
fw.
Signed-off-by: Tim Huang <tim.huang(a)amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org # 6.0.x
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index 9d62ea2af132..8f72202aea8e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -28,7 +28,7 @@
#define SMU13_DRIVER_IF_VERSION_INV 0xFFFFFFFF
#define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04
#define SMU13_DRIVER_IF_VERSION_ALDE 0x08
-#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x05
+#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x07
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x30
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x2C
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
027bf0cee89a ("drm/amd/pm: add SMU IP v13.0.4 IF version define to V7")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 027bf0cee89a27325a9a4f2240c21dd5fb81e4fa Mon Sep 17 00:00:00 2001
From: Tim Huang <tim.huang(a)amd.com>
Date: Thu, 29 Sep 2022 15:06:47 +0800
Subject: [PATCH] drm/amd/pm: add SMU IP v13.0.4 IF version define to V7
The pmfw has changed the driver interface version, so keep same with the
fw.
Signed-off-by: Tim Huang <tim.huang(a)amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org # 6.0.x
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index 9d62ea2af132..8f72202aea8e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -28,7 +28,7 @@
#define SMU13_DRIVER_IF_VERSION_INV 0xFFFFFFFF
#define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04
#define SMU13_DRIVER_IF_VERSION_ALDE 0x08
-#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x05
+#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x07
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x30
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x2C
The patch below does not apply to the 6.0-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
d1bb3afc0527 ("drm/amd/pm: update SMU IP v13.0.4 driver interface version")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d1bb3afc0527ab55d118852b398fd0f1d2fe802d Mon Sep 17 00:00:00 2001
From: Tim Huang <tim.huang(a)amd.com>
Date: Thu, 29 Sep 2022 14:39:21 +0800
Subject: [PATCH] drm/amd/pm: update SMU IP v13.0.4 driver interface version
Update the SMU driver interface version to V7.
Signed-off-by: Tim Huang <tim.huang(a)amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org # 6.0.x
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h
index ae2d337158f3..f77401709d83 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h
@@ -27,7 +27,7 @@
// *** IMPORTANT ***
// SMU TEAM: Always increment the interface version if
// any structure is changed in this file
-#define PMFW_DRIVER_IF_VERSION 5
+#define PMFW_DRIVER_IF_VERSION 7
typedef struct {
int32_t value;
@@ -163,8 +163,8 @@ typedef struct {
uint16_t DclkFrequency; //[MHz]
uint16_t MemclkFrequency; //[MHz]
uint16_t spare; //[centi]
- uint16_t UvdActivity; //[centi]
uint16_t GfxActivity; //[centi]
+ uint16_t UvdActivity; //[centi]
uint16_t Voltage[2]; //[mV] indices: VDDCR_VDD, VDDCR_SOC
uint16_t Current[2]; //[mA] indices: VDDCR_VDD, VDDCR_SOC
@@ -199,6 +199,19 @@ typedef struct {
uint16_t DeviceState;
uint16_t CurTemp; //[centi-Celsius]
uint16_t spare2;
+
+ uint16_t AverageGfxclkFrequency;
+ uint16_t AverageFclkFrequency;
+ uint16_t AverageGfxActivity;
+ uint16_t AverageSocclkFrequency;
+ uint16_t AverageVclkFrequency;
+ uint16_t AverageVcnActivity;
+ uint16_t AverageDRAMReads; //Filtered DF Bandwidth::DRAM Reads
+ uint16_t AverageDRAMWrites; //Filtered DF Bandwidth::DRAM Writes
+ uint16_t AverageSocketPower; //Filtered value of CurrentSocketPower
+ uint16_t AverageCorePower; //Filtered of [sum of CorePower[8]])
+ uint16_t AverageCoreC0Residency[8]; //Filtered of [average C0 residency % per core]
+ uint32_t MetricsCounter; //Counts the # of metrics table parameter reads per update to the metrics table, i.e. if the metrics table update happens every 1 second, this value could be up to 1000 if the smu collected metrics data every cycle, or as low as 0 if the smu was asleep the whole time. Reset to 0 after writing.
} SmuMetrics_t;
typedef struct {
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
d1bb3afc0527 ("drm/amd/pm: update SMU IP v13.0.4 driver interface version")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d1bb3afc0527ab55d118852b398fd0f1d2fe802d Mon Sep 17 00:00:00 2001
From: Tim Huang <tim.huang(a)amd.com>
Date: Thu, 29 Sep 2022 14:39:21 +0800
Subject: [PATCH] drm/amd/pm: update SMU IP v13.0.4 driver interface version
Update the SMU driver interface version to V7.
Signed-off-by: Tim Huang <tim.huang(a)amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org # 6.0.x
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h
index ae2d337158f3..f77401709d83 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h
@@ -27,7 +27,7 @@
// *** IMPORTANT ***
// SMU TEAM: Always increment the interface version if
// any structure is changed in this file
-#define PMFW_DRIVER_IF_VERSION 5
+#define PMFW_DRIVER_IF_VERSION 7
typedef struct {
int32_t value;
@@ -163,8 +163,8 @@ typedef struct {
uint16_t DclkFrequency; //[MHz]
uint16_t MemclkFrequency; //[MHz]
uint16_t spare; //[centi]
- uint16_t UvdActivity; //[centi]
uint16_t GfxActivity; //[centi]
+ uint16_t UvdActivity; //[centi]
uint16_t Voltage[2]; //[mV] indices: VDDCR_VDD, VDDCR_SOC
uint16_t Current[2]; //[mA] indices: VDDCR_VDD, VDDCR_SOC
@@ -199,6 +199,19 @@ typedef struct {
uint16_t DeviceState;
uint16_t CurTemp; //[centi-Celsius]
uint16_t spare2;
+
+ uint16_t AverageGfxclkFrequency;
+ uint16_t AverageFclkFrequency;
+ uint16_t AverageGfxActivity;
+ uint16_t AverageSocclkFrequency;
+ uint16_t AverageVclkFrequency;
+ uint16_t AverageVcnActivity;
+ uint16_t AverageDRAMReads; //Filtered DF Bandwidth::DRAM Reads
+ uint16_t AverageDRAMWrites; //Filtered DF Bandwidth::DRAM Writes
+ uint16_t AverageSocketPower; //Filtered value of CurrentSocketPower
+ uint16_t AverageCorePower; //Filtered of [sum of CorePower[8]])
+ uint16_t AverageCoreC0Residency[8]; //Filtered of [average C0 residency % per core]
+ uint32_t MetricsCounter; //Counts the # of metrics table parameter reads per update to the metrics table, i.e. if the metrics table update happens every 1 second, this value could be up to 1000 if the smu collected metrics data every cycle, or as low as 0 if the smu was asleep the whole time. Reset to 0 after writing.
} SmuMetrics_t;
typedef struct {
The patch below does not apply to the 6.0-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
1c43a48b44a5 ("drm/amd/display: Fix access timeout to DPIA AUX at boot time")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1c43a48b44a5449ed996215d1488284d5bdb2be0 Mon Sep 17 00:00:00 2001
From: Stylon Wang <stylon.wang(a)amd.com>
Date: Mon, 24 Oct 2022 15:36:16 +0800
Subject: [PATCH] drm/amd/display: Fix access timeout to DPIA AUX at boot time
[Why]
Since introduction of patch "Query DPIA HPD status.", link detection at
boot could be accessing DPIA AUX, which will not succeed until
DMUB outbox messaging is enabled and results in below dmesg logs:
[ 160.840227] [drm:amdgpu_dm_process_dmub_aux_transfer_sync [amdgpu]] *ERROR* wait_for_completion_timeout timeout!
[How]
Enable DMUB outbox messaging before link detection at boot time.
Reviewed-by: Wayne Lin <Wayne.Lin(a)amd.com>
Acked-by: Tom Chung <chiahsuan.chung(a)amd.com>
Signed-off-by: Stylon Wang <stylon.wang(a)amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org # 6.0.x
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 123c235b60bc..2dc75b1b005b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1636,12 +1636,6 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
}
}
- if (amdgpu_dm_initialize_drm_device(adev)) {
- DRM_ERROR(
- "amdgpu: failed to initialize sw for display support.\n");
- goto error;
- }
-
/* Enable outbox notification only after IRQ handlers are registered and DMUB is alive.
* It is expected that DMUB will resend any pending notifications at this point, for
* example HPD from DPIA.
@@ -1649,6 +1643,12 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
if (dc_is_dmub_outbox_supported(adev->dm.dc))
dc_enable_dmub_outbox(adev->dm.dc);
+ if (amdgpu_dm_initialize_drm_device(adev)) {
+ DRM_ERROR(
+ "amdgpu: failed to initialize sw for display support.\n");
+ goto error;
+ }
+
/* create fake encoders for MST */
dm_dp_create_fake_mst_encoders(adev);
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
1c43a48b44a5 ("drm/amd/display: Fix access timeout to DPIA AUX at boot time")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1c43a48b44a5449ed996215d1488284d5bdb2be0 Mon Sep 17 00:00:00 2001
From: Stylon Wang <stylon.wang(a)amd.com>
Date: Mon, 24 Oct 2022 15:36:16 +0800
Subject: [PATCH] drm/amd/display: Fix access timeout to DPIA AUX at boot time
[Why]
Since introduction of patch "Query DPIA HPD status.", link detection at
boot could be accessing DPIA AUX, which will not succeed until
DMUB outbox messaging is enabled and results in below dmesg logs:
[ 160.840227] [drm:amdgpu_dm_process_dmub_aux_transfer_sync [amdgpu]] *ERROR* wait_for_completion_timeout timeout!
[How]
Enable DMUB outbox messaging before link detection at boot time.
Reviewed-by: Wayne Lin <Wayne.Lin(a)amd.com>
Acked-by: Tom Chung <chiahsuan.chung(a)amd.com>
Signed-off-by: Stylon Wang <stylon.wang(a)amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org # 6.0.x
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 123c235b60bc..2dc75b1b005b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1636,12 +1636,6 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
}
}
- if (amdgpu_dm_initialize_drm_device(adev)) {
- DRM_ERROR(
- "amdgpu: failed to initialize sw for display support.\n");
- goto error;
- }
-
/* Enable outbox notification only after IRQ handlers are registered and DMUB is alive.
* It is expected that DMUB will resend any pending notifications at this point, for
* example HPD from DPIA.
@@ -1649,6 +1643,12 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
if (dc_is_dmub_outbox_supported(adev->dm.dc))
dc_enable_dmub_outbox(adev->dm.dc);
+ if (amdgpu_dm_initialize_drm_device(adev)) {
+ DRM_ERROR(
+ "amdgpu: failed to initialize sw for display support.\n");
+ goto error;
+ }
+
/* create fake encoders for MST */
dm_dp_create_fake_mst_encoders(adev);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
107ba1a2c705 ("drm/i915/gt: Restrict forced preemption to the active context")
3e28d37146db ("drm/i915: Move priolist to new i915_sched_engine object")
2913fa4d7d42 ("drm/i915/gt: use new tasklet API for execution list")
eb5c10cbbc2f ("drm/i915: Remove I915_USER_PRIORITY_SHIFT")
2867ff6ceb25 ("drm/i915: Strip out internal priorities")
a829f033e966 ("drm/i915: Wedge the GPU if command parser setup fails")
f99e67f1b929 ("drm/i915/display: Apply interactive priority to explicit flip fences")
007c45787650 ("drm/i915/guc: stop calling execlists_set_default_submission")
43aaadc67e6f ("drm/i915/guc: init engine directly in GuC submission mode")
7e5299cebe91 ("drm/i915/guc: Delete GuC code unused in future patches")
baa7c2cd99c6 ("drm/i915: Refactor marking a request as EIO")
751f82b353a6 ("drm/i915/gt: Only disable preemption on gen8 render engines")
4386b8e5ad71 ("drm/i915/gt: Remove timeslice suppression")
fe7bcfaeb2b7 ("drm/i915/gt: Refactor heartbeat request construction and submission")
f81475bb5bb4 ("drm/i915/gt: Resubmit the virtual engine on schedule-out")
bab0557c8dca ("drm/i915/gt: Remove virtual breadcrumb before transfer")
6f0726b4807c ("drm/i915/gt: Defer schedule_out until after the next dequeue")
2efa2c522ab0 ("drm/i915/gt: Decouple inflight virtual engines")
64b7a3fa7e3e ("drm/i915/gt: Use virtual_engine during execlists_dequeue")
16f2941ad307 ("drm/i915/gt: Replace direct submit with direct call to tasklet")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 107ba1a2c705f4358f2602ec2f2fd821bb651f42 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris(a)chris-wilson.co.uk>
Date: Wed, 21 Sep 2022 15:52:58 +0200
Subject: [PATCH] drm/i915/gt: Restrict forced preemption to the active context
When we submit a new pair of contexts to ELSP for execution, we start a
timer by which point we expect the HW to have switched execution to the
pending contexts. If the promotion to the new pair of contexts has not
occurred, we declare the executing context to have hung and force the
preemption to take place by resetting the engine and resubmitting the
new contexts.
This can lead to an unfair situation where almost all of the preemption
timeout is consumed by the first context which just switches into the
second context immediately prior to the timer firing and triggering the
preemption reset (assuming that the timer interrupts before we process
the CS events for the context switch). The second context hasn't yet had
a chance to yield to the incoming ELSP (and send the ACk for the
promotion) and so ends up being blamed for the reset.
If we see that a context switch has occurred since setting the
preemption timeout, but have not yet received the ACK for the ELSP
promotion, rearm the preemption timer and check again. This is
especially significant if the first context was not schedulable and so
we used the shortest timer possible, greatly increasing the chance of
accidentally blaming the second innocent context.
Fixes: 3a7a92aba8fb ("drm/i915/execlists: Force preemption")
Fixes: d12acee84ffb ("drm/i915/execlists: Cancel banned contexts on schedule-out")
Reported-by: Tvrtko Ursulin <tvrtko.ursulin(a)intel.com>
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin(a)intel.com>
Cc: Andi Shyti <andi.shyti(a)linux.intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda(a)intel.com>
Tested-by: Andrzej Hajda <andrzej.hajda(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v5.5+
Signed-off-by: Andi Shyti <andi.shyti(a)linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220921135258.1714873-1-andr…
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 633a7e5dba3b..6b5d4ea22b67 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -165,6 +165,21 @@ struct intel_engine_execlists {
*/
struct timer_list preempt;
+ /**
+ * @preempt_target: active request at the time of the preemption request
+ *
+ * We force a preemption to occur if the pending contexts have not
+ * been promoted to active upon receipt of the CS ack event within
+ * the timeout. This timeout maybe chosen based on the target,
+ * using a very short timeout if the context is no longer schedulable.
+ * That short timeout may not be applicable to other contexts, so
+ * if a context switch should happen within before the preemption
+ * timeout, we may shoot early at an innocent context. To prevent this,
+ * we record which context was active at the time of the preemption
+ * request and only reset that context upon the timeout.
+ */
+ const struct i915_request *preempt_target;
+
/**
* @ccid: identifier for contexts submitted to this engine
*/
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index aab240ea1d25..428da98c5356 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -1241,6 +1241,9 @@ static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
if (!rq)
return 0;
+ /* Only allow ourselves to force reset the currently active context */
+ engine->execlists.preempt_target = rq;
+
/* Force a fast reset for terminated contexts (ignoring sysfs!) */
if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq)))
return INTEL_CONTEXT_BANNED_PREEMPT_TIMEOUT_MS;
@@ -2427,8 +2430,24 @@ static void execlists_submission_tasklet(struct tasklet_struct *t)
GEM_BUG_ON(inactive - post > ARRAY_SIZE(post));
if (unlikely(preempt_timeout(engine))) {
+ const struct i915_request *rq = *engine->execlists.active;
+
+ /*
+ * If after the preempt-timeout expired, we are still on the
+ * same active request/context as before we initiated the
+ * preemption, reset the engine.
+ *
+ * However, if we have processed a CS event to switch contexts,
+ * but not yet processed the CS event for the pending
+ * preemption, reset the timer allowing the new context to
+ * gracefully exit.
+ */
cancel_timer(&engine->execlists.preempt);
- engine->execlists.error_interrupt |= ERROR_PREEMPT;
+ if (rq == engine->execlists.preempt_target)
+ engine->execlists.error_interrupt |= ERROR_PREEMPT;
+ else
+ set_timer_ms(&engine->execlists.preempt,
+ active_preempt_timeout(engine, rq));
}
if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) {
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
107ba1a2c705 ("drm/i915/gt: Restrict forced preemption to the active context")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 107ba1a2c705f4358f2602ec2f2fd821bb651f42 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris(a)chris-wilson.co.uk>
Date: Wed, 21 Sep 2022 15:52:58 +0200
Subject: [PATCH] drm/i915/gt: Restrict forced preemption to the active context
When we submit a new pair of contexts to ELSP for execution, we start a
timer by which point we expect the HW to have switched execution to the
pending contexts. If the promotion to the new pair of contexts has not
occurred, we declare the executing context to have hung and force the
preemption to take place by resetting the engine and resubmitting the
new contexts.
This can lead to an unfair situation where almost all of the preemption
timeout is consumed by the first context which just switches into the
second context immediately prior to the timer firing and triggering the
preemption reset (assuming that the timer interrupts before we process
the CS events for the context switch). The second context hasn't yet had
a chance to yield to the incoming ELSP (and send the ACk for the
promotion) and so ends up being blamed for the reset.
If we see that a context switch has occurred since setting the
preemption timeout, but have not yet received the ACK for the ELSP
promotion, rearm the preemption timer and check again. This is
especially significant if the first context was not schedulable and so
we used the shortest timer possible, greatly increasing the chance of
accidentally blaming the second innocent context.
Fixes: 3a7a92aba8fb ("drm/i915/execlists: Force preemption")
Fixes: d12acee84ffb ("drm/i915/execlists: Cancel banned contexts on schedule-out")
Reported-by: Tvrtko Ursulin <tvrtko.ursulin(a)intel.com>
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin(a)intel.com>
Cc: Andi Shyti <andi.shyti(a)linux.intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda(a)intel.com>
Tested-by: Andrzej Hajda <andrzej.hajda(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v5.5+
Signed-off-by: Andi Shyti <andi.shyti(a)linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220921135258.1714873-1-andr…
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 633a7e5dba3b..6b5d4ea22b67 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -165,6 +165,21 @@ struct intel_engine_execlists {
*/
struct timer_list preempt;
+ /**
+ * @preempt_target: active request at the time of the preemption request
+ *
+ * We force a preemption to occur if the pending contexts have not
+ * been promoted to active upon receipt of the CS ack event within
+ * the timeout. This timeout maybe chosen based on the target,
+ * using a very short timeout if the context is no longer schedulable.
+ * That short timeout may not be applicable to other contexts, so
+ * if a context switch should happen within before the preemption
+ * timeout, we may shoot early at an innocent context. To prevent this,
+ * we record which context was active at the time of the preemption
+ * request and only reset that context upon the timeout.
+ */
+ const struct i915_request *preempt_target;
+
/**
* @ccid: identifier for contexts submitted to this engine
*/
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index aab240ea1d25..428da98c5356 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -1241,6 +1241,9 @@ static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
if (!rq)
return 0;
+ /* Only allow ourselves to force reset the currently active context */
+ engine->execlists.preempt_target = rq;
+
/* Force a fast reset for terminated contexts (ignoring sysfs!) */
if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq)))
return INTEL_CONTEXT_BANNED_PREEMPT_TIMEOUT_MS;
@@ -2427,8 +2430,24 @@ static void execlists_submission_tasklet(struct tasklet_struct *t)
GEM_BUG_ON(inactive - post > ARRAY_SIZE(post));
if (unlikely(preempt_timeout(engine))) {
+ const struct i915_request *rq = *engine->execlists.active;
+
+ /*
+ * If after the preempt-timeout expired, we are still on the
+ * same active request/context as before we initiated the
+ * preemption, reset the engine.
+ *
+ * However, if we have processed a CS event to switch contexts,
+ * but not yet processed the CS event for the pending
+ * preemption, reset the timer allowing the new context to
+ * gracefully exit.
+ */
cancel_timer(&engine->execlists.preempt);
- engine->execlists.error_interrupt |= ERROR_PREEMPT;
+ if (rq == engine->execlists.preempt_target)
+ engine->execlists.error_interrupt |= ERROR_PREEMPT;
+ else
+ set_timer_ms(&engine->execlists.preempt,
+ active_preempt_timeout(engine, rq));
}
if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) {