The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x f5d4e04634c9cf68bdf23de08ada0bb92e8befe7
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024052603-probing-percolate-6265@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f5d4e04634c9cf68bdf23de08ada0bb92e8befe7 Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Date: Mon, 20 May 2024 22:26:19 +0900
Subject: [PATCH] nilfs2: fix use-after-free of timer for log writer thread
Patch series "nilfs2: fix log writer related issues".
This bug fix series covers three nilfs2 log writer-related issues,
including a timer use-after-free issue and potential deadlock issue on
unmount, and a potential freeze issue in event synchronization found
during their analysis. Details are described in each commit log.
This patch (of 3):
A use-after-free issue has been reported regarding the timer sc_timer on
the nilfs_sc_info structure.
The problem is that even though it is used to wake up a sleeping log
writer thread, sc_timer is not shut down until the nilfs_sc_info structure
is about to be freed, and is used regardless of the thread's lifetime.
Fix this issue by limiting the use of sc_timer only while the log writer
thread is alive.
Link: https://lkml.kernel.org/r/20240520132621.4054-1-konishi.ryusuke@gmail.com
Link: https://lkml.kernel.org/r/20240520132621.4054-2-konishi.ryusuke@gmail.com
Fixes: fdce895ea5dd ("nilfs2: change sc_timer from a pointer to an embedded one in struct nilfs_sc_info")
Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Reported-by: "Bai, Shuangpeng" <sjb7183(a)psu.edu>
Closes: https://groups.google.com/g/syzkaller/c/MK_LYqtt8ko/m/8rgdWeseAwAJ
Tested-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 6be7dd423fbd..7cb34e1c9206 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2118,8 +2118,10 @@ static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci)
{
spin_lock(&sci->sc_state_lock);
if (!(sci->sc_state & NILFS_SEGCTOR_COMMIT)) {
- sci->sc_timer.expires = jiffies + sci->sc_interval;
- add_timer(&sci->sc_timer);
+ if (sci->sc_task) {
+ sci->sc_timer.expires = jiffies + sci->sc_interval;
+ add_timer(&sci->sc_timer);
+ }
sci->sc_state |= NILFS_SEGCTOR_COMMIT;
}
spin_unlock(&sci->sc_state_lock);
@@ -2320,10 +2322,21 @@ int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode,
*/
static void nilfs_segctor_accept(struct nilfs_sc_info *sci)
{
+ bool thread_is_alive;
+
spin_lock(&sci->sc_state_lock);
sci->sc_seq_accepted = sci->sc_seq_request;
+ thread_is_alive = (bool)sci->sc_task;
spin_unlock(&sci->sc_state_lock);
- del_timer_sync(&sci->sc_timer);
+
+ /*
+ * This function does not race with the log writer thread's
+ * termination. Therefore, deleting sc_timer, which should not be
+ * done after the log writer thread exits, can be done safely outside
+ * the area protected by sc_state_lock.
+ */
+ if (thread_is_alive)
+ del_timer_sync(&sci->sc_timer);
}
/**
@@ -2349,7 +2362,7 @@ static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err)
sci->sc_flush_request &= ~FLUSH_DAT_BIT;
/* re-enable timer if checkpoint creation was not done */
- if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) &&
+ if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && sci->sc_task &&
time_before(jiffies, sci->sc_timer.expires))
add_timer(&sci->sc_timer);
}
@@ -2539,6 +2552,7 @@ static int nilfs_segctor_thread(void *arg)
int timeout = 0;
sci->sc_timer_task = current;
+ timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0);
/* start sync. */
sci->sc_task = current;
@@ -2606,6 +2620,7 @@ static int nilfs_segctor_thread(void *arg)
end_thread:
/* end sync. */
sci->sc_task = NULL;
+ timer_shutdown_sync(&sci->sc_timer);
wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */
spin_unlock(&sci->sc_state_lock);
return 0;
@@ -2669,7 +2684,6 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb,
INIT_LIST_HEAD(&sci->sc_gc_inodes);
INIT_LIST_HEAD(&sci->sc_iput_queue);
INIT_WORK(&sci->sc_iput_work, nilfs_iput_work_func);
- timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0);
sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT;
sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ;
@@ -2748,7 +2762,6 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
down_write(&nilfs->ns_segctor_sem);
- timer_shutdown_sync(&sci->sc_timer);
kfree(sci);
}
commit cd94d1b182d2 ("dm/amd/pm: Fix problems with reboot/shutdown for
some SMU 13.0.4/13.0.11 users") attempted to fix shutdown issues
that were reported since commit 31729e8c21ec ("drm/amd/pm: fixes a
random hang in S4 for SMU v13.0.4/11") but caused issues for some
people.
Adjust the workaround flow to properly only apply in the S4 case:
-> For shutdown go through SMU_MSG_PrepareMp1ForUnload
-> For S4 go through SMU_MSG_GfxDeviceDriverReset and
SMU_MSG_PrepareMp1ForUnload
Reported-and-tested-by: lectrode <electrodexsnet(a)gmail.com>
Closes: https://github.com/void-linux/void-packages/issues/50417
Cc: stable(a)vger.kernel.org
Fixes: cd94d1b182d2 ("dm/amd/pm: Fix problems with reboot/shutdown for some SMU 13.0.4/13.0.11 users")
Signed-off-by: Mario Limonciello <mario.limonciello(a)amd.com>
---
Cc: regressions(a)lists.linux.dev
---
.../drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c | 20 ++++++++++---------
1 file changed, 11 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
index 4abfcd32747d..c7ab0d7027d9 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
@@ -226,15 +226,17 @@ static int smu_v13_0_4_system_features_control(struct smu_context *smu, bool en)
struct amdgpu_device *adev = smu->adev;
int ret = 0;
- if (!en && adev->in_s4) {
- /* Adds a GFX reset as workaround just before sending the
- * MP1_UNLOAD message to prevent GC/RLC/PMFW from entering
- * an invalid state.
- */
- ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset,
- SMU_RESET_MODE_2, NULL);
- if (ret)
- return ret;
+ if (!en && !adev->in_s0ix) {
+ if (adev->in_s4) {
+ /* Adds a GFX reset as workaround just before sending the
+ * MP1_UNLOAD message to prevent GC/RLC/PMFW from entering
+ * an invalid state.
+ */
+ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset,
+ SMU_RESET_MODE_2, NULL);
+ if (ret)
+ return ret;
+ }
ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PrepareMp1ForUnload, NULL);
}
--
2.43.0
FWW this is the output from faddr2line for Linux 6.8.11:
$ scripts/faddr2line --list vmlinux blk_try_enter_queue+0xc/0x75
blk_try_enter_queue+0xc/0x75:
__ref_is_percpu at include/linux/percpu-refcount.h:174 (discriminator 2)
169 * READ_ONCE() is required when fetching it.
170 *
171 * The dependency ordering from the READ_ONCE() pairs
172 * with smp_store_release() in __percpu_ref_switch_to_percpu().
173 */
>174< percpu_ptr = READ_ONCE(ref->percpu_count_ptr);
175
176 /*
177 * Theoretically, the following could test just ATOMIC; however,
178 * then we'd have to mask off DEAD separately as DEAD may be
179 * visible without ATOMIC if we race with percpu_ref_kill(). DEAD
(inlined by) percpu_ref_tryget_live_rcu at
include/linux/percpu-refcount.h:282 (discriminator 2)
277 unsigned long __percpu *percpu_count;
278 bool ret = false;
279
280 WARN_ON_ONCE(!rcu_read_lock_held());
281
>282< if (likely(__ref_is_percpu(ref, &percpu_count))) {
283 this_cpu_inc(*percpu_count);
284 ret = true;
285 } else if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD)) {
286 ret = atomic_long_inc_not_zero(&ref->data->count);
287 }
(inlined by) blk_try_enter_queue at block/blk.h:43 (discriminator 2)
38 void submit_bio_noacct_nocheck(struct bio *bio);
39
40 static inline bool blk_try_enter_queue(struct request_queue *q, bool pm)
41 {
42 rcu_read_lock();
>43< if (!percpu_ref_tryget_live_rcu(&q->q_usage_counter))
44 goto fail;
45
46 /*
47 * The code that increments the pm_only counter must ensure that the
48 * counter is globally visible before the queue is unfrozen.
From: Kemeng Shi <shikemeng(a)huaweicloud.com>
[ Upstream commit d92109891f21cf367caa2cc6dff11a4411d917f4 ]
For case there is no more inodes for IO in io list from last wb_writeback,
We may bail out early even there is inode in dirty list should be written
back. Only bail out when we queued once to avoid missing dirtied inode.
This is from code reading...
Signed-off-by: Kemeng Shi <shikemeng(a)huaweicloud.com>
Link: https://lore.kernel.org/r/20240228091958.288260-3-shikemeng@huaweicloud.com
Reviewed-by: Jan Kara <jack(a)suse.cz>
[brauner(a)kernel.org: fold in memory corruption fix from Jan in [1]]
Link: https://lore.kernel.org/r/20240405132346.bid7gibby3lxxhez@quack3 [1]
Signed-off-by: Christian Brauner <brauner(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
fs/fs-writeback.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 1767493dffda7..0a498bc60f557 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -2044,6 +2044,7 @@ static long wb_writeback(struct bdi_writeback *wb,
struct inode *inode;
long progress;
struct blk_plug plug;
+ bool queued = false;
blk_start_plug(&plug);
for (;;) {
@@ -2086,8 +2087,10 @@ static long wb_writeback(struct bdi_writeback *wb,
dirtied_before = jiffies;
trace_writeback_start(wb, work);
- if (list_empty(&wb->b_io))
+ if (list_empty(&wb->b_io)) {
queue_io(wb, work, dirtied_before);
+ queued = true;
+ }
if (work->sb)
progress = writeback_sb_inodes(work->sb, wb, work);
else
@@ -2102,7 +2105,7 @@ static long wb_writeback(struct bdi_writeback *wb,
* mean the overall work is done. So we keep looping as long
* as made some progress on cleaning pages or inodes.
*/
- if (progress) {
+ if (progress || !queued) {
spin_unlock(&wb->list_lock);
continue;
}
--
2.43.0