When a CL/CSD job times out, we check if the GPU has made any progress
since the last timeout. If so, instead of resetting the hardware, we skip
the reset and let the timer get rearmed. This gives long-running jobs a
chance to complete.
However, when `timedout_job()` is called, the job in question is removed
from the pending list, which means it won't be automatically freed through
`free_job()`. Consequently, when we skip the reset and keep the job
running, the job won't be freed when it finally completes.
This situation leads to a memory leak, as exposed in [1] and [2].
Similarly to commit 704d3d60fec4 ("drm/etnaviv: don't block scheduler when
GPU is still active"), this patch ensures the job is put back on the
pending list when extending the timeout.
Cc: stable(a)vger.kernel.org # 6.0
Reported-by: Daivik Bhatia <dtgs1208(a)gmail.com>
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12227 [1]
Closes: https://github.com/raspberrypi/linux/issues/6817 [2]
Signed-off-by: Maíra Canal <mcanal(a)igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral(a)igalia.com>
---
Hi,
While we typically strive to avoid exposing the scheduler's internals
within the drivers, I'm proposing this fix as an interim solution. I'm aware
that a comprehensive fix will need some adjustments on the DRM sched side,
and I plan to address that soon.
However, it would be hard to justify the backport of such patches to the
stable branches and this issue is affecting users in the moment.
Therefore, I'd like to push this patch to drm-misc-fixes in order to
address this leak as soon as possible, while working in a more generic
solution.
Best Regards,
- Maíra
v1 -> v2: https://lore.kernel.org/dri-devel/20250427202907.94415-2-mcanal@igalia.com/
- Protect the pending list by using its spinlock.
- Add the URL of another downstream issue related to this patch.
drivers/gpu/drm/v3d/v3d_sched.c | 28 +++++++++++++++++++++-------
1 file changed, 21 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index b3be08b0ca91..c612363181df 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -734,11 +734,16 @@ v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job)
return DRM_GPU_SCHED_STAT_NOMINAL;
}
-/* If the current address or return address have changed, then the GPU
- * has probably made progress and we should delay the reset. This
- * could fail if the GPU got in an infinite loop in the CL, but that
- * is pretty unlikely outside of an i-g-t testcase.
- */
+static void
+v3d_sched_skip_reset(struct drm_sched_job *sched_job)
+{
+ struct drm_gpu_scheduler *sched = sched_job->sched;
+
+ spin_lock(&sched->job_list_lock);
+ list_add(&sched_job->list, &sched->pending_list);
+ spin_unlock(&sched->job_list_lock);
+}
+
static enum drm_gpu_sched_stat
v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q,
u32 *timedout_ctca, u32 *timedout_ctra)
@@ -748,9 +753,16 @@ v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q,
u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(q));
u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(q));
+ /* If the current address or return address have changed, then the GPU
+ * has probably made progress and we should delay the reset. This
+ * could fail if the GPU got in an infinite loop in the CL, but that
+ * is pretty unlikely outside of an i-g-t testcase.
+ */
if (*timedout_ctca != ctca || *timedout_ctra != ctra) {
*timedout_ctca = ctca;
*timedout_ctra = ctra;
+
+ v3d_sched_skip_reset(sched_job);
return DRM_GPU_SCHED_STAT_NOMINAL;
}
@@ -790,11 +802,13 @@ v3d_csd_job_timedout(struct drm_sched_job *sched_job)
struct v3d_dev *v3d = job->base.v3d;
u32 batches = V3D_CORE_READ(0, V3D_CSD_CURRENT_CFG4(v3d->ver));
- /* If we've made progress, skip reset and let the timer get
- * rearmed.
+ /* If we've made progress, skip reset, add the job to the pending
+ * list, and let the timer get rearmed.
*/
if (job->timedout_batches != batches) {
job->timedout_batches = batches;
+
+ v3d_sched_skip_reset(sched_job);
return DRM_GPU_SCHED_STAT_NOMINAL;
}
--
2.49.0
From: Ashish Kalra <ashish.kalra(a)amd.com>
When the shared pages are being made private during kdump preparation
there are additional checks to handle shared GHCB pages.
These additional checks include handling the case of GHCB page being
contained within a huge page.
While handling the case of GHCB page contained within a huge page
any shared page just below the GHCB page gets skipped from being
transitioned back to private during kdump preparation.
This subsequently causes a 0x404 #VC exception when this skipped
shared page is accessed later while dumping guest memory during
vmcore generation via kdump.
Split the initial check for skipping the GHCB page into the page
being skipped fully containing the GHCB and GHCB being contained
within a huge page. Also ensure that the skipped huge page
containing the GHCB page is transitioned back to private later
when changing GHCBs to private at end of kdump preparation.
Reviewed-by: Tom Lendacky <thomas.lendacky(a)amd.com>
Cc: stable(a)vger.kernel.org
Fixes: 3074152e56c9 ("x86/sev: Convert shared memory back to private on kexec")
Signed-off-by: Ashish Kalra <ashish.kalra(a)amd.com>
---
arch/x86/coco/sev/core.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c
index d35fec7b164a..1f53383bd1fa 100644
--- a/arch/x86/coco/sev/core.c
+++ b/arch/x86/coco/sev/core.c
@@ -1019,7 +1019,13 @@ static void unshare_all_memory(void)
data = per_cpu(runtime_data, cpu);
ghcb = (unsigned long)&data->ghcb_page;
- if (addr <= ghcb && ghcb <= addr + size) {
+ /* Handle the case of a huge page containing the GHCB page */
+ if (level == PG_LEVEL_4K && addr == ghcb) {
+ skipped_addr = true;
+ break;
+ }
+ if (level > PG_LEVEL_4K && addr <= ghcb &&
+ ghcb < addr + size) {
skipped_addr = true;
break;
}
@@ -1131,8 +1137,8 @@ static void shutdown_all_aps(void)
void snp_kexec_finish(void)
{
struct sev_es_runtime_data *data;
+ unsigned long size, mask;
unsigned int level, cpu;
- unsigned long size;
struct ghcb *ghcb;
pte_t *pte;
@@ -1160,6 +1166,10 @@ void snp_kexec_finish(void)
ghcb = &data->ghcb_page;
pte = lookup_address((unsigned long)ghcb, &level);
size = page_level_size(level);
+ mask = page_level_mask(level);
+ /* Handle the case of a huge page containing the GHCB page */
+ if (level > PG_LEVEL_4K)
+ ghcb = (struct ghcb *)((unsigned long)ghcb & mask);
set_pte_enc(pte, level, (void *)ghcb);
snp_set_memory_private((unsigned long)ghcb, (size / PAGE_SIZE));
}
--
2.34.1
The following commit has been merged into the irq/urgent branch of tip:
Commit-ID: 38a05c0b87833f5b188ae43b428b1f792df2b384
Gitweb: https://git.kernel.org/tip/38a05c0b87833f5b188ae43b428b1f792df2b384
Author: Stephan Gerhold <stephan.gerhold(a)linaro.org>
AuthorDate: Fri, 02 May 2025 13:22:28 +02:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Fri, 02 May 2025 21:07:02 +02:00
irqchip/qcom-mpm: Prevent crash when trying to handle non-wake GPIOs
On Qualcomm chipsets not all GPIOs are wakeup capable. Those GPIOs do not
have a corresponding MPM pin and should not be handled inside the MPM
driver. The IRQ domain hierarchy is always applied, so it's required to
explicitly disconnect the hierarchy for those. The pinctrl-msm driver marks
these with GPIO_NO_WAKE_IRQ. qcom-pdc has a check for this, but
irq-qcom-mpm is currently missing the check. This is causing crashes when
setting up interrupts for non-wake GPIOs:
root@rb1:~# gpiomon -c gpiochip1 10
irq: IRQ159: trimming hierarchy from :soc@0:interrupt-controller@f200000-1
Unable to handle kernel paging request at virtual address ffff8000a1dc3820
Hardware name: Qualcomm Technologies, Inc. Robotics RB1 (DT)
pc : mpm_set_type+0x80/0xcc
lr : mpm_set_type+0x5c/0xcc
Call trace:
mpm_set_type+0x80/0xcc (P)
qcom_mpm_set_type+0x64/0x158
irq_chip_set_type_parent+0x20/0x38
msm_gpio_irq_set_type+0x50/0x530
__irq_set_trigger+0x60/0x184
__setup_irq+0x304/0x6bc
request_threaded_irq+0xc8/0x19c
edge_detector_setup+0x260/0x364
linereq_create+0x420/0x5a8
gpio_ioctl+0x2d4/0x6c0
Fix this by copying the check for GPIO_NO_WAKE_IRQ from qcom-pdc.c, so that
MPM is removed entirely from the hierarchy for non-wake GPIOs.
Fixes: a6199bb514d8 ("irqchip: Add Qualcomm MPM controller driver")
Reported-by: Alexey Klimov <alexey.klimov(a)linaro.org>
Signed-off-by: Stephan Gerhold <stephan.gerhold(a)linaro.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Tested-by: Alexey Klimov <alexey.klimov(a)linaro.org>
Reviewed-by: Bartosz Golaszewski <bartosz.golaszewski(a)linaro.org>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/20250502-irq-qcom-mpm-fix-no-wake-v1-1-8a1eafcd…
---
drivers/irqchip/irq-qcom-mpm.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/irqchip/irq-qcom-mpm.c b/drivers/irqchip/irq-qcom-mpm.c
index 7942d8e..f772deb 100644
--- a/drivers/irqchip/irq-qcom-mpm.c
+++ b/drivers/irqchip/irq-qcom-mpm.c
@@ -227,6 +227,9 @@ static int qcom_mpm_alloc(struct irq_domain *domain, unsigned int virq,
if (ret)
return ret;
+ if (pin == GPIO_NO_WAKE_IRQ)
+ return irq_domain_disconnect_hierarchy(domain, virq);
+
ret = irq_domain_set_hwirq_and_chip(domain, virq, pin,
&qcom_mpm_chip, priv);
if (ret)
Hi there,
Are you looking for a high-quality, global opt-in user list for Branch and
other top mobile analytics platforms?
We have fresh, permission-based data available for the following platforms:
* Adjust
* Mixpanel
* AppsFlyer
* CleverTap
* Amplitude Analytics
* Braze
* Singular
* Tenjin & many more...!
Would you be interested in learning more or receiving a sample?
Best regards,
Brielle Hayes | Marketing Executive
Please response with "skip" if not required.
From: Steven Rostedt <rostedt(a)goodmis.org>
On some paths in print_event_fields() it takes the trace_event_sem for
read, even though it should always be held when the function is called.
Remove the taking of that mutex and add a lockdep_assert_held_read() to
make sure the trace_event_sem is held when print_event_fields() is called.
Cc: stable(a)vger.kernel.org
Cc: Masami Hiramatsu <mhiramat(a)kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Link: https://lore.kernel.org/20250501224128.0b1f0571@batman.local.home
Fixes: 80a76994b2d88 ("tracing: Add "fields" option to show raw trace event fields")
Reported-by: syzbot+441582c1592938fccf09(a)syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/6813ff5e.050a0220.14dd7d.001b.GAE@google.com/
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/trace_output.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index fee40ffbd490..b9ab06c99543 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -1042,11 +1042,12 @@ enum print_line_t print_event_fields(struct trace_iterator *iter,
struct trace_event_call *call;
struct list_head *head;
+ lockdep_assert_held_read(&trace_event_sem);
+
/* ftrace defined events have separate call structures */
if (event->type <= __TRACE_LAST_TYPE) {
bool found = false;
- down_read(&trace_event_sem);
list_for_each_entry(call, &ftrace_events, list) {
if (call->event.type == event->type) {
found = true;
@@ -1056,7 +1057,6 @@ enum print_line_t print_event_fields(struct trace_iterator *iter,
if (call->event.type > __TRACE_LAST_TYPE)
break;
}
- up_read(&trace_event_sem);
if (!found) {
trace_seq_printf(&iter->seq, "UNKNOWN TYPE %d\n", event->type);
goto out;
--
2.47.2
Starting with Rust 1.88.0 (expected 2025-06-26) [1], Clippy may start
warning about paths that do not resolve in the `disallowed_macros`
configuration:
warning: `kernel::dbg` does not refer to an existing macro
--> .clippy.toml:10:5
|
10 | { path = "kernel::dbg", reason = "the `dbg!` macro is intended as a debugging tool" },
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
This is a lint we requested at [2], due to the trouble debugging
the lint due to false negatives (e.g. [3]), which we use to emulate
`clippy::dbg_macro` [4]. See commit 8577c9dca799 ("rust: replace
`clippy::dbg_macro` with `disallowed_macros`") for more details.
Given the false negatives are not resolved yet, it is expected that
Clippy complains about not finding this macro.
Thus, until the false negatives are fixed (and, even then, probably we
will need to wait for the MSRV to raise enough), use the escape hatch
to allow an invalid path.
Cc: stable(a)vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs).
Link: https://github.com/rust-lang/rust-clippy/pull/14397 [1]
Link: https://github.com/rust-lang/rust-clippy/issues/11432 [2]
Link: https://github.com/rust-lang/rust-clippy/issues/11431 [3]
Link: https://github.com/rust-lang/rust-clippy/issues/11303 [4]
Signed-off-by: Miguel Ojeda <ojeda(a)kernel.org>
---
.clippy.toml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.clippy.toml b/.clippy.toml
index 815c94732ed7..137f41d203de 100644
--- a/.clippy.toml
+++ b/.clippy.toml
@@ -7,5 +7,5 @@ check-private-items = true
disallowed-macros = [
# The `clippy::dbg_macro` lint only works with `std::dbg!`, thus we simulate
# it here, see: https://github.com/rust-lang/rust-clippy/issues/11303.
- { path = "kernel::dbg", reason = "the `dbg!` macro is intended as a debugging tool" },
+ { path = "kernel::dbg", reason = "the `dbg!` macro is intended as a debugging tool", allow-invalid = true },
]
--
2.49.0
Starting with Rust 1.88.0 (expected 2025-06-26) [1][2], `rustc` may
introduce a new lint that catches unnecessary transmutes, e.g.:
error: unnecessary transmute
--> rust/uapi/uapi_generated.rs:23242:18
|
23242 | unsafe { ::core::mem::transmute(self._bitfield_1.get(0usize, 1u8) as u8) }
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: replace this with: `(self._bitfield_1.get(0usize, 1u8) as u8 == 1)`
|
= note: `-D unnecessary-transmutes` implied by `-D warnings`
= help: to override `-D warnings` add `#[allow(unnecessary_transmutes)]`
There are a lot of them (at least 300), but luckily they are all in
`bindgen`-generated code.
Thus clean all up by allowing it there.
Since unknown lints trigger a lint itself in older compilers, do it
conditionally so that we can keep the `unknown_lints` lint enabled.
Cc: stable(a)vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs).
Link: https://github.com/rust-lang/rust/pull/136083 [1]
Link: https://github.com/rust-lang/rust/issues/136067 [2]
Signed-off-by: Miguel Ojeda <ojeda(a)kernel.org>
---
init/Kconfig | 3 +++
rust/bindings/lib.rs | 1 +
rust/uapi/lib.rs | 1 +
3 files changed, 5 insertions(+)
diff --git a/init/Kconfig b/init/Kconfig
index 63f5974b9fa6..4cdd1049283c 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -140,6 +140,9 @@ config LD_CAN_USE_KEEP_IN_OVERLAY
config RUSTC_HAS_COERCE_POINTEE
def_bool RUSTC_VERSION >= 108400
+config RUSTC_HAS_UNNECESSARY_TRANSMUTES
+ def_bool RUSTC_VERSION >= 108800
+
config PAHOLE_VERSION
int
default $(shell,$(srctree)/scripts/pahole-version.sh $(PAHOLE))
diff --git a/rust/bindings/lib.rs b/rust/bindings/lib.rs
index 014af0d1fc70..a08eb5518cac 100644
--- a/rust/bindings/lib.rs
+++ b/rust/bindings/lib.rs
@@ -26,6 +26,7 @@
#[allow(dead_code)]
#[allow(clippy::undocumented_unsafe_blocks)]
+#[cfg_attr(CONFIG_RUSTC_HAS_UNNECESSARY_TRANSMUTES, allow(unnecessary_transmutes))]
mod bindings_raw {
// Manual definition for blocklisted types.
type __kernel_size_t = usize;
diff --git a/rust/uapi/lib.rs b/rust/uapi/lib.rs
index 13495910271f..c98d7a8cde77 100644
--- a/rust/uapi/lib.rs
+++ b/rust/uapi/lib.rs
@@ -24,6 +24,7 @@
unreachable_pub,
unsafe_op_in_unsafe_fn
)]
+#![cfg_attr(CONFIG_RUSTC_HAS_UNNECESSARY_TRANSMUTES, allow(unnecessary_transmutes))]
// Manual definition of blocklisted types.
type __kernel_size_t = usize;
--
2.49.0