The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 964209202ebe1569c858337441e87ef0f9d71416
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025070818-buddhism-wikipedia-516a@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 964209202ebe1569c858337441e87ef0f9d71416 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang(a)intel.com>
Date: Thu, 19 Jun 2025 15:13:40 +0800
Subject: [PATCH] powercap: intel_rapl: Do not change CLAMPING bit if ENABLE
bit cannot be changed
PL1 cannot be disabled on some platforms. The ENABLE bit is still set
after software clears it. This behavior leads to a scenario where, upon
user request to disable the Power Limit through the powercap sysfs, the
ENABLE bit remains set while the CLAMPING bit is inadvertently cleared.
According to the Intel Software Developer's Manual, the CLAMPING bit,
"When set, allows the processor to go below the OS requested P states in
order to maintain the power below specified Platform Power Limit value."
Thus this means the system may operate at higher power levels than
intended on such platforms.
Enhance the code to check ENABLE bit after writing to it, and stop
further processing if ENABLE bit cannot be changed.
Reported-by: Srinivas Pandruvada <srinivas.pandruvada(a)linux.intel.com>
Fixes: 2d281d8196e3 ("PowerCap: Introduce Intel RAPL power capping driver")
Cc: All applicable <stable(a)vger.kernel.org>
Signed-off-by: Zhang Rui <rui.zhang(a)intel.com>
Link: https://patch.msgid.link/20250619071340.384782-1-rui.zhang@intel.com
[ rjw: Use str_enabled_disabled() instead of open-coded equivalent ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index e3be40adc0d7..faa0b6bc5b53 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -341,12 +341,28 @@ static int set_domain_enable(struct powercap_zone *power_zone, bool mode)
{
struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
struct rapl_defaults *defaults = get_defaults(rd->rp);
+ u64 val;
int ret;
cpus_read_lock();
ret = rapl_write_pl_data(rd, POWER_LIMIT1, PL_ENABLE, mode);
- if (!ret && defaults->set_floor_freq)
+ if (ret)
+ goto end;
+
+ ret = rapl_read_pl_data(rd, POWER_LIMIT1, PL_ENABLE, false, &val);
+ if (ret)
+ goto end;
+
+ if (mode != val) {
+ pr_debug("%s cannot be %s\n", power_zone->name,
+ str_enabled_disabled(mode));
+ goto end;
+ }
+
+ if (defaults->set_floor_freq)
defaults->set_floor_freq(rd, mode);
+
+end:
cpus_read_unlock();
return ret;
Make sure to drop the references to the accdet OF node and platform
device taken by of_parse_phandle() and of_find_device_by_node() after
looking up the sound component during probe.
Fixes: cf536e2622e2 ("ASoC: mediatek: common: Handle mediatek,accdet property")
Cc: stable(a)vger.kernel.org # 6.15
Cc: Nícolas F. R. A. Prado <nfraprado(a)collabora.com>
Signed-off-by: Johan Hovold <johan(a)kernel.org>
---
sound/soc/mediatek/common/mtk-soundcard-driver.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/sound/soc/mediatek/common/mtk-soundcard-driver.c b/sound/soc/mediatek/common/mtk-soundcard-driver.c
index 713a368f79cf..95a083939f3e 100644
--- a/sound/soc/mediatek/common/mtk-soundcard-driver.c
+++ b/sound/soc/mediatek/common/mtk-soundcard-driver.c
@@ -262,9 +262,13 @@ int mtk_soundcard_common_probe(struct platform_device *pdev)
soc_card_data->accdet = accdet_comp;
else
dev_err(&pdev->dev, "No sound component found from mediatek,accdet property\n");
+
+ put_device(&accdet_pdev->dev);
} else {
dev_err(&pdev->dev, "No device found from mediatek,accdet property\n");
}
+
+ of_node_put(accdet_node);
}
platform_node = of_parse_phandle(pdev->dev.of_node, "mediatek,platform", 0);
--
2.49.1
The original code causes a circular locking dependency found by lockdep.
======================================================
WARNING: possible circular locking dependency detected
6.16.0-rc6-lgci-xe-xe-pw-151626v3+ #1 Tainted: G S U
------------------------------------------------------
xe_fault_inject/5091 is trying to acquire lock:
ffff888156815688 ((work_completion)(&(&devcd->del_wk)->work)){+.+.}-{0:0}, at: __flush_work+0x25d/0x660
but task is already holding lock:
ffff888156815620 (&devcd->mutex){+.+.}-{3:3}, at: dev_coredump_put+0x3f/0xa0
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #2 (&devcd->mutex){+.+.}-{3:3}:
mutex_lock_nested+0x4e/0xc0
devcd_data_write+0x27/0x90
sysfs_kf_bin_write+0x80/0xf0
kernfs_fop_write_iter+0x169/0x220
vfs_write+0x293/0x560
ksys_write+0x72/0xf0
__x64_sys_write+0x19/0x30
x64_sys_call+0x2bf/0x2660
do_syscall_64+0x93/0xb60
entry_SYSCALL_64_after_hwframe+0x76/0x7e
-> #1 (kn->active#236){++++}-{0:0}:
kernfs_drain+0x1e2/0x200
__kernfs_remove+0xae/0x400
kernfs_remove_by_name_ns+0x5d/0xc0
remove_files+0x54/0x70
sysfs_remove_group+0x3d/0xa0
sysfs_remove_groups+0x2e/0x60
device_remove_attrs+0xc7/0x100
device_del+0x15d/0x3b0
devcd_del+0x19/0x30
process_one_work+0x22b/0x6f0
worker_thread+0x1e8/0x3d0
kthread+0x11c/0x250
ret_from_fork+0x26c/0x2e0
ret_from_fork_asm+0x1a/0x30
-> #0 ((work_completion)(&(&devcd->del_wk)->work)){+.+.}-{0:0}:
__lock_acquire+0x1661/0x2860
lock_acquire+0xc4/0x2f0
__flush_work+0x27a/0x660
flush_delayed_work+0x5d/0xa0
dev_coredump_put+0x63/0xa0
xe_driver_devcoredump_fini+0x12/0x20 [xe]
devm_action_release+0x12/0x30
release_nodes+0x3a/0x120
devres_release_all+0x8a/0xd0
device_unbind_cleanup+0x12/0x80
device_release_driver_internal+0x23a/0x280
device_driver_detach+0x14/0x20
unbind_store+0xaf/0xc0
drv_attr_store+0x21/0x50
sysfs_kf_write+0x4a/0x80
kernfs_fop_write_iter+0x169/0x220
vfs_write+0x293/0x560
ksys_write+0x72/0xf0
__x64_sys_write+0x19/0x30
x64_sys_call+0x2bf/0x2660
do_syscall_64+0x93/0xb60
entry_SYSCALL_64_after_hwframe+0x76/0x7e
other info that might help us debug this:
Chain exists of: (work_completion)(&(&devcd->del_wk)->work) --> kn->active#236 --> &devcd->mutex
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(&devcd->mutex);
lock(kn->active#236);
lock(&devcd->mutex);
lock((work_completion)(&(&devcd->del_wk)->work));
*** DEADLOCK ***
5 locks held by xe_fault_inject/5091:
#0: ffff8881129f9488 (sb_writers#5){.+.+}-{0:0}, at: ksys_write+0x72/0xf0
#1: ffff88810c755078 (&of->mutex#2){+.+.}-{3:3}, at: kernfs_fop_write_iter+0x123/0x220
#2: ffff8881054811a0 (&dev->mutex){....}-{3:3}, at: device_release_driver_internal+0x55/0x280
#3: ffff888156815620 (&devcd->mutex){+.+.}-{3:3}, at: dev_coredump_put+0x3f/0xa0
#4: ffffffff8359e020 (rcu_read_lock){....}-{1:2}, at: __flush_work+0x72/0x660
stack backtrace:
CPU: 14 UID: 0 PID: 5091 Comm: xe_fault_inject Tainted: G S U 6.16.0-rc6-lgci-xe-xe-pw-151626v3+ #1 PREEMPT_{RT,(lazy)}
Tainted: [S]=CPU_OUT_OF_SPEC, [U]=USER
Hardware name: Micro-Star International Co., Ltd. MS-7D25/PRO Z690-A DDR4(MS-7D25), BIOS 1.10 12/13/2021
Call Trace:
<TASK>
dump_stack_lvl+0x91/0xf0
dump_stack+0x10/0x20
print_circular_bug+0x285/0x360
check_noncircular+0x135/0x150
? register_lock_class+0x48/0x4a0
__lock_acquire+0x1661/0x2860
lock_acquire+0xc4/0x2f0
? __flush_work+0x25d/0x660
? mark_held_locks+0x46/0x90
? __flush_work+0x25d/0x660
__flush_work+0x27a/0x660
? __flush_work+0x25d/0x660
? trace_hardirqs_on+0x1e/0xd0
? __pfx_wq_barrier_func+0x10/0x10
flush_delayed_work+0x5d/0xa0
dev_coredump_put+0x63/0xa0
xe_driver_devcoredump_fini+0x12/0x20 [xe]
devm_action_release+0x12/0x30
release_nodes+0x3a/0x120
devres_release_all+0x8a/0xd0
device_unbind_cleanup+0x12/0x80
device_release_driver_internal+0x23a/0x280
? bus_find_device+0xa8/0xe0
device_driver_detach+0x14/0x20
unbind_store+0xaf/0xc0
drv_attr_store+0x21/0x50
sysfs_kf_write+0x4a/0x80
kernfs_fop_write_iter+0x169/0x220
vfs_write+0x293/0x560
ksys_write+0x72/0xf0
__x64_sys_write+0x19/0x30
x64_sys_call+0x2bf/0x2660
do_syscall_64+0x93/0xb60
? __f_unlock_pos+0x15/0x20
? __x64_sys_getdents64+0x9b/0x130
? __pfx_filldir64+0x10/0x10
? do_syscall_64+0x1a2/0xb60
? clear_bhb_loop+0x30/0x80
? clear_bhb_loop+0x30/0x80
entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x76e292edd574
Code: c7 00 16 00 00 00 b8 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 80 3d d5 ea 0e 00 00 74 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 55 48 89 e5 48 83 ec 20 48 89
RSP: 002b:00007fffe247a828 EFLAGS: 00000202 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 000076e292edd574
RDX: 000000000000000c RSI: 00006267f6306063 RDI: 000000000000000b
RBP: 000000000000000c R08: 000076e292fc4b20 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000202 R12: 00006267f6306063
R13: 000000000000000b R14: 00006267e6859c00 R15: 000076e29322a000
</TASK>
xe 0000:03:00.0: [drm] Xe device coredump has been deleted.
Fixes: 01daccf74832 ("devcoredump : Serialize devcd_del work")
Cc: Mukesh Ojha <quic_mojha(a)quicinc.com>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Johannes Berg <johannes(a)sipsolutions.net>
Cc: "Rafael J. Wysocki" <rafael(a)kernel.org>
Cc: Danilo Krummrich <dakr(a)kernel.org>
Cc: linux-kernel(a)vger.kernel.org
Cc: <stable(a)vger.kernel.org> # v6.1+
Signed-off-by: Maarten Lankhorst <dev(a)lankhorst.se>
Cc: Matthew Brost <matthew.brost(a)intel.com>
---
drivers/base/devcoredump.c | 136 ++++++++++++++++++++++---------------
1 file changed, 83 insertions(+), 53 deletions(-)
diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c
index 03a39c417dc41..ad4bddde12ccb 100644
--- a/drivers/base/devcoredump.c
+++ b/drivers/base/devcoredump.c
@@ -23,50 +23,46 @@ struct devcd_entry {
void *data;
size_t datalen;
/*
- * Here, mutex is required to serialize the calls to del_wk work between
- * user/kernel space which happens when devcd is added with device_add()
- * and that sends uevent to user space. User space reads the uevents,
- * and calls to devcd_data_write() which try to modify the work which is
- * not even initialized/queued from devcoredump.
+ * There are 2 races for which mutex is required.
*
+ * The first race is between device creation and userspace writing to
+ * schedule immediately destruction.
*
+ * This race is handled by arming the timer before device creation, but
+ * when device creation fails the timer still exists.
*
- * cpu0(X) cpu1(Y)
+ * To solve this, hold the mutex during device_add(), and set
+ * init_completed on success before releasing the mutex.
*
- * dev_coredump() uevent sent to user space
- * device_add() ======================> user space process Y reads the
- * uevents writes to devcd fd
- * which results into writes to
+ * That way the timer will never fire until device_add() is called,
+ * it will do nothing if init_completed is not set. The timer is also
+ * cancelled in that case.
*
- * devcd_data_write()
- * mod_delayed_work()
- * try_to_grab_pending()
- * timer_delete()
- * debug_assert_init()
- * INIT_DELAYED_WORK()
- * schedule_delayed_work()
- *
- *
- * Also, mutex alone would not be enough to avoid scheduling of
- * del_wk work after it get flush from a call to devcd_free()
- * mentioned as below.
- *
- * disabled_store()
- * devcd_free()
- * mutex_lock() devcd_data_write()
- * flush_delayed_work()
- * mutex_unlock()
- * mutex_lock()
- * mod_delayed_work()
- * mutex_unlock()
- * So, delete_work flag is required.
+ * The second race involves multiple parallel invocations of devcd_free(),
+ * add a deleted flag so only 1 can call the destructor.
*/
struct mutex mutex;
- bool delete_work;
+ bool init_completed, deleted;
struct module *owner;
ssize_t (*read)(char *buffer, loff_t offset, size_t count,
void *data, size_t datalen);
void (*free)(void *data);
+ /*
+ * If nothing interferes and device_add() was returns success,
+ * del_wk will destroy the device after the timer fires.
+ *
+ * Multiple userspace processes can interfere in the working of the timer:
+ * - Writing to the coredump will reschedule the timer to run immediately,
+ * if still armed.
+ *
+ * This is handled by using "if (cancel_delayed_work()) {
+ * schedule_delayed_work() }", to prevent re-arming after having
+ * been previously fired.
+ * - Writing to /sys/class/devcoredump/disabled will destroy the
+ * coredump synchronously.
+ * This is handled by using disable_delayed_work_sync(), and then
+ * checking if deleted flag is set with &devcd->mutex held.
+ */
struct delayed_work del_wk;
struct device *failing_dev;
};
@@ -95,14 +91,27 @@ static void devcd_dev_release(struct device *dev)
kfree(devcd);
}
+static void __devcd_del(struct devcd_entry *devcd)
+{
+ devcd->deleted = true;
+ device_del(&devcd->devcd_dev);
+ put_device(&devcd->devcd_dev);
+}
+
static void devcd_del(struct work_struct *wk)
{
struct devcd_entry *devcd;
+ bool init_completed;
devcd = container_of(wk, struct devcd_entry, del_wk.work);
- device_del(&devcd->devcd_dev);
- put_device(&devcd->devcd_dev);
+ /* devcd->mutex serializes against dev_coredumpm_timeout */
+ mutex_lock(&devcd->mutex);
+ init_completed = devcd->init_completed;
+ mutex_unlock(&devcd->mutex);
+
+ if (init_completed)
+ __devcd_del(devcd);
}
static ssize_t devcd_data_read(struct file *filp, struct kobject *kobj,
@@ -122,12 +131,12 @@ static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj,
struct device *dev = kobj_to_dev(kobj);
struct devcd_entry *devcd = dev_to_devcd(dev);
- mutex_lock(&devcd->mutex);
- if (!devcd->delete_work) {
- devcd->delete_work = true;
- mod_delayed_work(system_wq, &devcd->del_wk, 0);
- }
- mutex_unlock(&devcd->mutex);
+ /*
+ * Although it's tempting to use mod_delayed work here,
+ * that will cause a reschedule if the timer already fired.
+ */
+ if (cancel_delayed_work(&devcd->del_wk))
+ schedule_delayed_work(&devcd->del_wk, 0);
return count;
}
@@ -151,11 +160,21 @@ static int devcd_free(struct device *dev, void *data)
{
struct devcd_entry *devcd = dev_to_devcd(dev);
+ /*
+ * To prevent a race with devcd_data_write(), disable work and
+ * complete manually instead.
+ *
+ * We cannot rely on the return value of
+ * disable_delayed_work_sync() here, because it might be in the
+ * middle of a cancel_delayed_work + schedule_delayed_work pair.
+ *
+ * devcd->mutex here guards against multiple parallel invocations
+ * of devcd_free().
+ */
+ disable_delayed_work_sync(&devcd->del_wk);
mutex_lock(&devcd->mutex);
- if (!devcd->delete_work)
- devcd->delete_work = true;
-
- flush_delayed_work(&devcd->del_wk);
+ if (!devcd->deleted)
+ __devcd_del(devcd);
mutex_unlock(&devcd->mutex);
return 0;
}
@@ -179,12 +198,10 @@ static ssize_t disabled_show(const struct class *class, const struct class_attri
* put_device() <- last reference
* error = fn(dev, data) devcd_dev_release()
* devcd_free(dev, data) kfree(devcd)
- * mutex_lock(&devcd->mutex);
*
*
* In the above diagram, it looks like disabled_store() would be racing with parallelly
- * running devcd_del() and result in memory abort while acquiring devcd->mutex which
- * is called after kfree of devcd memory after dropping its last reference with
+ * running devcd_del() and result in memory abort after dropping its last reference with
* put_device(). However, this will not happens as fn(dev, data) runs
* with its own reference to device via klist_node so it is not its last reference.
* so, above situation would not occur.
@@ -374,7 +391,7 @@ void dev_coredumpm_timeout(struct device *dev, struct module *owner,
devcd->read = read;
devcd->free = free;
devcd->failing_dev = get_device(dev);
- devcd->delete_work = false;
+ devcd->deleted = false;
mutex_init(&devcd->mutex);
device_initialize(&devcd->devcd_dev);
@@ -383,8 +400,14 @@ void dev_coredumpm_timeout(struct device *dev, struct module *owner,
atomic_inc_return(&devcd_count));
devcd->devcd_dev.class = &devcd_class;
- mutex_lock(&devcd->mutex);
dev_set_uevent_suppress(&devcd->devcd_dev, true);
+
+ /* devcd->mutex prevents devcd_del() completing until init finishes */
+ mutex_lock(&devcd->mutex);
+ devcd->init_completed = false;
+ INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
+ schedule_delayed_work(&devcd->del_wk, timeout);
+
if (device_add(&devcd->devcd_dev))
goto put_device;
@@ -401,13 +424,20 @@ void dev_coredumpm_timeout(struct device *dev, struct module *owner,
dev_set_uevent_suppress(&devcd->devcd_dev, false);
kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD);
- INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
- schedule_delayed_work(&devcd->del_wk, timeout);
+
+ /*
+ * Safe to run devcd_del() now that we are done with devcd_dev.
+ * Alternatively we could have taken a ref on devcd_dev before
+ * dropping the lock.
+ */
+ devcd->init_completed = true;
mutex_unlock(&devcd->mutex);
return;
put_device:
- put_device(&devcd->devcd_dev);
mutex_unlock(&devcd->mutex);
+ cancel_delayed_work_sync(&devcd->del_wk);
+ put_device(&devcd->devcd_dev);
+
put_module:
module_put(owner);
free:
--
2.45.2