The patch below does not apply to the 6.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.7.y
git checkout FETCH_HEAD
git cherry-pick -x 8e57c06bf4b0f51a4d6958e15e1a99c9520d00fa
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012739-laziness-vacate-a43d@gregkh' --subject-prefix 'PATCH 6.7.y' HEAD^..
Possible dependencies:
8e57c06bf4b0 ("drm/amd/display: Refactor DMCUB enter/exit idle interface")
0f657938e434 ("drm/amd/display: do not send commands to DMUB if DMUB is inactive from S3")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8e57c06bf4b0f51a4d6958e15e1a99c9520d00fa Mon Sep 17 00:00:00 2001
From: Nicholas Kazlauskas <nicholas.kazlauskas(a)amd.com>
Date: Mon, 4 Dec 2023 14:10:05 -0500
Subject: [PATCH] drm/amd/display: Refactor DMCUB enter/exit idle interface
[Why]
We can hang in place trying to send commands when the DMCUB isn't
powered on.
[How]
We need to exit out of the idle state prior to sending a command,
but the process that performs the exit also invokes a command itself.
Fixing this issue involves the following:
1. Using a software state to track whether or not we need to start
the process to exit idle or notify idle.
It's possible for the hardware to have exited an idle state without
driver knowledge, but entering one is always restricted to a driver
allow - which makes the SW state vs HW state mismatch issue purely one
of optimization, which should seldomly be hit, if at all.
2. Refactor any instances of exit/notify idle to use a single wrapper
that maintains this SW state.
This works simialr to dc_allow_idle_optimizations, but works at the
DMCUB level and makes sure the state is marked prior to any notify/exit
idle so we don't enter an infinite loop.
3. Make sure we exit out of idle prior to sending any commands or
waiting for DMCUB idle.
This patch takes care of 1/2. A future patch will take care of wrapping
DMCUB command submission with calls to this new interface.
Cc: Mario Limonciello <mario.limonciello(a)amd.com>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Hansen Dsouza <hansen.dsouza(a)amd.com>
Acked-by: Wayne Lin <wayne.lin(a)amd.com>
Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas(a)amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 54861136dafd..97776ba1c70a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2856,7 +2856,7 @@ static int dm_resume(void *handle)
bool need_hotplug = false;
if (dm->dc->caps.ips_support) {
- dc_dmub_srv_exit_low_power_state(dm->dc);
+ dc_dmub_srv_apply_idle_power_optimizations(dm->dc, false);
}
if (amdgpu_in_reset(adev)) {
@@ -9001,7 +9001,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
if (new_con_state->crtc &&
new_con_state->crtc->state->active &&
drm_atomic_crtc_needs_modeset(new_con_state->crtc->state)) {
- dc_dmub_srv_exit_low_power_state(dm->dc);
+ dc_dmub_srv_apply_idle_power_optimizations(dm->dc, false);
break;
}
}
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index eb6f5640f19a..ccfe2b6046fd 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -1162,6 +1162,9 @@ bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait)
struct dc_context *dc_ctx = dc_dmub_srv->ctx;
enum dmub_status status;
+ if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+ return true;
+
if (dc_dmub_srv->ctx->dc->debug.dmcub_emulation)
return true;
@@ -1183,7 +1186,7 @@ bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait)
return true;
}
-void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle)
+static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle)
{
union dmub_rb_cmd cmd = {0};
@@ -1207,7 +1210,7 @@ void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle)
dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
-void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
+static void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
{
const uint32_t max_num_polls = 10000;
uint32_t allow_state = 0;
@@ -1220,6 +1223,9 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
if (!dc->idle_optimizations_allowed)
return;
+ if (!dc->ctx->dmub_srv || !dc->ctx->dmub_srv->dmub)
+ return;
+
if (dc->hwss.get_idle_state &&
dc->hwss.set_idle_state &&
dc->clk_mgr->funcs->exit_low_power_state) {
@@ -1296,3 +1302,30 @@ void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_c
else
dmub_srv_set_power_state(dmub, DMUB_POWER_STATE_D3);
}
+
+void dc_dmub_srv_apply_idle_power_optimizations(const struct dc *dc, bool allow_idle)
+{
+ struct dc_dmub_srv *dc_dmub_srv = dc->ctx->dmub_srv;
+
+ if (!dc_dmub_srv || !dc_dmub_srv->dmub)
+ return;
+
+ if (dc_dmub_srv->idle_allowed == allow_idle)
+ return;
+
+ /*
+ * Entering a low power state requires a driver notification.
+ * Powering up the hardware requires notifying PMFW and DMCUB.
+ * Clearing the driver idle allow requires a DMCUB command.
+ * DMCUB commands requires the DMCUB to be powered up and restored.
+ *
+ * Exit out early to prevent an infinite loop of DMCUB commands
+ * triggering exit low power - use software state to track this.
+ */
+ dc_dmub_srv->idle_allowed = allow_idle;
+
+ if (!allow_idle)
+ dc_dmub_srv_exit_low_power_state(dc);
+ else
+ dc_dmub_srv_notify_idle(dc, allow_idle);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
index c25ce7546f71..b63cba6235fc 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
@@ -50,6 +50,8 @@ struct dc_dmub_srv {
struct dc_context *ctx;
void *dm;
+
+ bool idle_allowed;
};
void dc_dmub_srv_wait_idle(struct dc_dmub_srv *dc_dmub_srv);
@@ -100,8 +102,8 @@ void dc_dmub_srv_enable_dpia_trace(const struct dc *dc);
void dc_dmub_srv_subvp_save_surf_addr(const struct dc_dmub_srv *dc_dmub_srv, const struct dc_plane_address *addr, uint8_t subvp_index);
bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait);
-void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle);
-void dc_dmub_srv_exit_low_power_state(const struct dc *dc);
+
+void dc_dmub_srv_apply_idle_power_optimizations(const struct dc *dc, bool allow_idle);
void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_cm_power_state powerState);
#endif /* _DMUB_DC_SRV_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
index 9262d3336182..f48001317fab 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
@@ -687,11 +687,7 @@ bool dcn35_apply_idle_power_optimizations(struct dc *dc, bool enable)
}
// TODO: review other cases when idle optimization is allowed
-
- if (!enable)
- dc_dmub_srv_exit_low_power_state(dc);
- else
- dc_dmub_srv_notify_idle(dc, enable);
+ dc_dmub_srv_apply_idle_power_optimizations(dc, enable);
return true;
}
@@ -701,7 +697,7 @@ void dcn35_z10_restore(const struct dc *dc)
if (dc->debug.disable_z10)
return;
- dc_dmub_srv_exit_low_power_state(dc);
+ dc_dmub_srv_apply_idle_power_optimizations(dc, false);
dcn31_z10_restore(dc);
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 94b1e028e15c94362420f9f3f711fafbf9d52996
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012757-mardi-send-000a@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
94b1e028e15c ("drm/amdgpu/sdma5.2: add begin/end_use ring callbacks")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 94b1e028e15c94362420f9f3f711fafbf9d52996 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher(a)amd.com>
Date: Thu, 7 Dec 2023 10:14:41 -0500
Subject: [PATCH] drm/amdgpu/sdma5.2: add begin/end_use ring callbacks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Add begin/end_use ring callbacks to disallow GFXOFF when
SDMA work is submitted and allow it again afterward.
This should avoid corner cases where GFXOFF is erroneously
entered when SDMA is still active. For now just allow/disallow
GFXOFF in the begin and end helpers until we root cause the
issue. This should not impact power as SDMA usage is pretty
minimal and GFXOSS should not be active when SDMA is active
anyway, this just makes it explicit.
v2: move everything into sdma5.2 code. No reason for this
to be generic at this point.
v3: Add comments in new code
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2220
Reviewed-by: Mario Limonciello <mario.limonciello(a)amd.com> (v1)
Tested-by: Mario Limonciello <mario.limonciello(a)amd.com> (v1)
Reviewed-by: Christian König <christian.koenig(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org # 5.15+
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index 83c240f741b5..0058f3f7cf6e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -1643,6 +1643,32 @@ static void sdma_v5_2_get_clockgating_state(void *handle, u64 *flags)
*flags |= AMD_CG_SUPPORT_SDMA_LS;
}
+static void sdma_v5_2_ring_begin_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* SDMA 5.2.3 (RMB) FW doesn't seem to properly
+ * disallow GFXOFF in some cases leading to
+ * hangs in SDMA. Disallow GFXOFF while SDMA is active.
+ * We can probably just limit this to 5.2.3,
+ * but it shouldn't hurt for other parts since
+ * this GFXOFF will be disallowed anyway when SDMA is
+ * active, this just makes it explicit.
+ */
+ amdgpu_gfx_off_ctrl(adev, false);
+}
+
+static void sdma_v5_2_ring_end_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* SDMA 5.2.3 (RMB) FW doesn't seem to properly
+ * disallow GFXOFF in some cases leading to
+ * hangs in SDMA. Allow GFXOFF when SDMA is complete.
+ */
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
const struct amd_ip_funcs sdma_v5_2_ip_funcs = {
.name = "sdma_v5_2",
.early_init = sdma_v5_2_early_init,
@@ -1690,6 +1716,8 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = {
.test_ib = sdma_v5_2_ring_test_ib,
.insert_nop = sdma_v5_2_ring_insert_nop,
.pad_ib = sdma_v5_2_ring_pad_ib,
+ .begin_use = sdma_v5_2_ring_begin_use,
+ .end_use = sdma_v5_2_ring_end_use,
.emit_wreg = sdma_v5_2_ring_emit_wreg,
.emit_reg_wait = sdma_v5_2_ring_emit_reg_wait,
.emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait,
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 94b1e028e15c94362420f9f3f711fafbf9d52996
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012756-effort-smelting-a27a@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
94b1e028e15c ("drm/amdgpu/sdma5.2: add begin/end_use ring callbacks")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 94b1e028e15c94362420f9f3f711fafbf9d52996 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher(a)amd.com>
Date: Thu, 7 Dec 2023 10:14:41 -0500
Subject: [PATCH] drm/amdgpu/sdma5.2: add begin/end_use ring callbacks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Add begin/end_use ring callbacks to disallow GFXOFF when
SDMA work is submitted and allow it again afterward.
This should avoid corner cases where GFXOFF is erroneously
entered when SDMA is still active. For now just allow/disallow
GFXOFF in the begin and end helpers until we root cause the
issue. This should not impact power as SDMA usage is pretty
minimal and GFXOSS should not be active when SDMA is active
anyway, this just makes it explicit.
v2: move everything into sdma5.2 code. No reason for this
to be generic at this point.
v3: Add comments in new code
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2220
Reviewed-by: Mario Limonciello <mario.limonciello(a)amd.com> (v1)
Tested-by: Mario Limonciello <mario.limonciello(a)amd.com> (v1)
Reviewed-by: Christian König <christian.koenig(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org # 5.15+
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index 83c240f741b5..0058f3f7cf6e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -1643,6 +1643,32 @@ static void sdma_v5_2_get_clockgating_state(void *handle, u64 *flags)
*flags |= AMD_CG_SUPPORT_SDMA_LS;
}
+static void sdma_v5_2_ring_begin_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* SDMA 5.2.3 (RMB) FW doesn't seem to properly
+ * disallow GFXOFF in some cases leading to
+ * hangs in SDMA. Disallow GFXOFF while SDMA is active.
+ * We can probably just limit this to 5.2.3,
+ * but it shouldn't hurt for other parts since
+ * this GFXOFF will be disallowed anyway when SDMA is
+ * active, this just makes it explicit.
+ */
+ amdgpu_gfx_off_ctrl(adev, false);
+}
+
+static void sdma_v5_2_ring_end_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* SDMA 5.2.3 (RMB) FW doesn't seem to properly
+ * disallow GFXOFF in some cases leading to
+ * hangs in SDMA. Allow GFXOFF when SDMA is complete.
+ */
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
const struct amd_ip_funcs sdma_v5_2_ip_funcs = {
.name = "sdma_v5_2",
.early_init = sdma_v5_2_early_init,
@@ -1690,6 +1716,8 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = {
.test_ib = sdma_v5_2_ring_test_ib,
.insert_nop = sdma_v5_2_ring_insert_nop,
.pad_ib = sdma_v5_2_ring_pad_ib,
+ .begin_use = sdma_v5_2_ring_begin_use,
+ .end_use = sdma_v5_2_ring_end_use,
.emit_wreg = sdma_v5_2_ring_emit_wreg,
.emit_reg_wait = sdma_v5_2_ring_emit_reg_wait,
.emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait,
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 94b1e028e15c94362420f9f3f711fafbf9d52996
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012754-roping-hug-8954@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
94b1e028e15c ("drm/amdgpu/sdma5.2: add begin/end_use ring callbacks")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 94b1e028e15c94362420f9f3f711fafbf9d52996 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher(a)amd.com>
Date: Thu, 7 Dec 2023 10:14:41 -0500
Subject: [PATCH] drm/amdgpu/sdma5.2: add begin/end_use ring callbacks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Add begin/end_use ring callbacks to disallow GFXOFF when
SDMA work is submitted and allow it again afterward.
This should avoid corner cases where GFXOFF is erroneously
entered when SDMA is still active. For now just allow/disallow
GFXOFF in the begin and end helpers until we root cause the
issue. This should not impact power as SDMA usage is pretty
minimal and GFXOSS should not be active when SDMA is active
anyway, this just makes it explicit.
v2: move everything into sdma5.2 code. No reason for this
to be generic at this point.
v3: Add comments in new code
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2220
Reviewed-by: Mario Limonciello <mario.limonciello(a)amd.com> (v1)
Tested-by: Mario Limonciello <mario.limonciello(a)amd.com> (v1)
Reviewed-by: Christian König <christian.koenig(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org # 5.15+
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index 83c240f741b5..0058f3f7cf6e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -1643,6 +1643,32 @@ static void sdma_v5_2_get_clockgating_state(void *handle, u64 *flags)
*flags |= AMD_CG_SUPPORT_SDMA_LS;
}
+static void sdma_v5_2_ring_begin_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* SDMA 5.2.3 (RMB) FW doesn't seem to properly
+ * disallow GFXOFF in some cases leading to
+ * hangs in SDMA. Disallow GFXOFF while SDMA is active.
+ * We can probably just limit this to 5.2.3,
+ * but it shouldn't hurt for other parts since
+ * this GFXOFF will be disallowed anyway when SDMA is
+ * active, this just makes it explicit.
+ */
+ amdgpu_gfx_off_ctrl(adev, false);
+}
+
+static void sdma_v5_2_ring_end_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* SDMA 5.2.3 (RMB) FW doesn't seem to properly
+ * disallow GFXOFF in some cases leading to
+ * hangs in SDMA. Allow GFXOFF when SDMA is complete.
+ */
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
const struct amd_ip_funcs sdma_v5_2_ip_funcs = {
.name = "sdma_v5_2",
.early_init = sdma_v5_2_early_init,
@@ -1690,6 +1716,8 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = {
.test_ib = sdma_v5_2_ring_test_ib,
.insert_nop = sdma_v5_2_ring_insert_nop,
.pad_ib = sdma_v5_2_ring_pad_ib,
+ .begin_use = sdma_v5_2_ring_begin_use,
+ .end_use = sdma_v5_2_ring_end_use,
.emit_wreg = sdma_v5_2_ring_emit_wreg,
.emit_reg_wait = sdma_v5_2_ring_emit_reg_wait,
.emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait,
The patch below does not apply to the 6.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.7.y
git checkout FETCH_HEAD
git cherry-pick -x 94b1e028e15c94362420f9f3f711fafbf9d52996
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012753-epilepsy-keep-947a@gregkh' --subject-prefix 'PATCH 6.7.y' HEAD^..
Possible dependencies:
94b1e028e15c ("drm/amdgpu/sdma5.2: add begin/end_use ring callbacks")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 94b1e028e15c94362420f9f3f711fafbf9d52996 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher(a)amd.com>
Date: Thu, 7 Dec 2023 10:14:41 -0500
Subject: [PATCH] drm/amdgpu/sdma5.2: add begin/end_use ring callbacks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Add begin/end_use ring callbacks to disallow GFXOFF when
SDMA work is submitted and allow it again afterward.
This should avoid corner cases where GFXOFF is erroneously
entered when SDMA is still active. For now just allow/disallow
GFXOFF in the begin and end helpers until we root cause the
issue. This should not impact power as SDMA usage is pretty
minimal and GFXOSS should not be active when SDMA is active
anyway, this just makes it explicit.
v2: move everything into sdma5.2 code. No reason for this
to be generic at this point.
v3: Add comments in new code
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2220
Reviewed-by: Mario Limonciello <mario.limonciello(a)amd.com> (v1)
Tested-by: Mario Limonciello <mario.limonciello(a)amd.com> (v1)
Reviewed-by: Christian König <christian.koenig(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org # 5.15+
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index 83c240f741b5..0058f3f7cf6e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -1643,6 +1643,32 @@ static void sdma_v5_2_get_clockgating_state(void *handle, u64 *flags)
*flags |= AMD_CG_SUPPORT_SDMA_LS;
}
+static void sdma_v5_2_ring_begin_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* SDMA 5.2.3 (RMB) FW doesn't seem to properly
+ * disallow GFXOFF in some cases leading to
+ * hangs in SDMA. Disallow GFXOFF while SDMA is active.
+ * We can probably just limit this to 5.2.3,
+ * but it shouldn't hurt for other parts since
+ * this GFXOFF will be disallowed anyway when SDMA is
+ * active, this just makes it explicit.
+ */
+ amdgpu_gfx_off_ctrl(adev, false);
+}
+
+static void sdma_v5_2_ring_end_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* SDMA 5.2.3 (RMB) FW doesn't seem to properly
+ * disallow GFXOFF in some cases leading to
+ * hangs in SDMA. Allow GFXOFF when SDMA is complete.
+ */
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
const struct amd_ip_funcs sdma_v5_2_ip_funcs = {
.name = "sdma_v5_2",
.early_init = sdma_v5_2_early_init,
@@ -1690,6 +1716,8 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = {
.test_ib = sdma_v5_2_ring_test_ib,
.insert_nop = sdma_v5_2_ring_insert_nop,
.pad_ib = sdma_v5_2_ring_pad_ib,
+ .begin_use = sdma_v5_2_ring_begin_use,
+ .end_use = sdma_v5_2_ring_end_use,
.emit_wreg = sdma_v5_2_ring_emit_wreg,
.emit_reg_wait = sdma_v5_2_ring_emit_reg_wait,
.emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait,
After mounting a remote cifs resource, it becomes unavailable:
df: /mnt/sambashare: Resource temporarily unavailable
It was tested on the following Linux kernels:
Linux altlinux 5.10.208-std-def-alt1
Linux fedora 5.10.208-200.el8.x86_64
The error appeared starting from kernel 5.10.206 after adding
the commit [1] "smb: client: fix OOB in SMB2_query_info_init()",
in which the buffer length increases by 1 as a result of changes:
...
- iov[0].iov_len = total_len - 1 + input_len;
+ iov[0].iov_len = len;
...
[1] https://patchwork.kernel.org/project/cifs-client/patch/20231213152557.6634-…
Error fixed by backported commits in next two patches adapted for the 5.10 kernel:
[PATCH 1/2] stddef: Introduce DECLARE_FLEX_ARRAY() helper
[PATCH 2/2] smb3: Replace smb2pdu 1-element arrays with flex-arrays