There is a lock inversion and rwsem read-lock recursion in the devfreq
target callback which can lead to deadlocks.
Specifically, ufshcd_devfreq_scale() already holds a clk_scaling_lock
read lock when toggling the write booster, which involves taking the
dev_cmd mutex before taking another clk_scaling_lock read lock.
This can lead to a deadlock if another thread:
1) tries to acquire the dev_cmd and clk_scaling locks in the correct
order, or
2) takes a clk_scaling write lock before the attempt to take the
clk_scaling read lock a second time.
Fix this by dropping the clk_scaling_lock before toggling the write
booster as was done before commit 0e9d4ca43ba8 ("scsi: ufs: Protect some
contexts from unexpected clock scaling").
While the devfreq callbacks are already serialised, add a second
serialising mutex to handle the unlikely case where a callback triggered
through the devfreq sysfs interface is racing with a request to disable
clock scaling through the UFS controller 'clkscale_enable' sysfs
attribute. This could otherwise lead to the write booster being left
disabled after having disabled clock scaling.
Also take the new mutex in ufshcd_clk_scaling_allow() to make sure that
any pending write booster update has completed on return.
Note that this currently only affects Qualcomm platforms since commit
87bd05016a64 ("scsi: ufs: core: Allow host driver to disable wb toggling
during clock scaling").
The lock inversion (i.e. 1 above) was reported by lockdep as:
======================================================
WARNING: possible circular locking dependency detected
6.1.0-next-20221216 #211 Not tainted
------------------------------------------------------
kworker/u16:2/71 is trying to acquire lock:
ffff076280ba98a0 (&hba->dev_cmd.lock){+.+.}-{3:3}, at: ufshcd_query_flag+0x50/0x1c0
but task is already holding lock:
ffff076280ba9cf0 (&hba->clk_scaling_lock){++++}-{3:3}, at: ufshcd_devfreq_scale+0x2b8/0x380
which lock already depends on the new lock.
[ +0.011606]
the existing dependency chain (in reverse order) is:
-> #1 (&hba->clk_scaling_lock){++++}-{3:3}:
lock_acquire+0x68/0x90
down_read+0x58/0x80
ufshcd_exec_dev_cmd+0x70/0x2c0
ufshcd_verify_dev_init+0x68/0x170
ufshcd_probe_hba+0x398/0x1180
ufshcd_async_scan+0x30/0x320
async_run_entry_fn+0x34/0x150
process_one_work+0x288/0x6c0
worker_thread+0x74/0x450
kthread+0x118/0x120
ret_from_fork+0x10/0x20
-> #0 (&hba->dev_cmd.lock){+.+.}-{3:3}:
__lock_acquire+0x12a0/0x2240
lock_acquire.part.0+0xcc/0x220
lock_acquire+0x68/0x90
__mutex_lock+0x98/0x430
mutex_lock_nested+0x2c/0x40
ufshcd_query_flag+0x50/0x1c0
ufshcd_query_flag_retry+0x64/0x100
ufshcd_wb_toggle+0x5c/0x120
ufshcd_devfreq_scale+0x2c4/0x380
ufshcd_devfreq_target+0xf4/0x230
devfreq_set_target+0x84/0x2f0
devfreq_update_target+0xc4/0xf0
devfreq_monitor+0x38/0x1f0
process_one_work+0x288/0x6c0
worker_thread+0x74/0x450
kthread+0x118/0x120
ret_from_fork+0x10/0x20
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(&hba->clk_scaling_lock);
lock(&hba->dev_cmd.lock);
lock(&hba->clk_scaling_lock);
lock(&hba->dev_cmd.lock);
*** DEADLOCK ***
Fixes: 0e9d4ca43ba8 ("scsi: ufs: Protect some contexts from unexpected clock scaling")
Cc: stable(a)vger.kernel.org # 5.12
Cc: Can Guo <quic_cang(a)quicinc.com>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
---
This issue has apparently been known for over a year [1] without anyone
bothering to fix it. Aside from the potential deadlocks, this also leads
to developers using Qualcomm platforms not being able to use lockdep to
prevent further issues like this from being introduced in other places.
Johan
[1] https://lore.kernel.org/lkml/1631843521-2863-1-git-send-email-cang@codeauro…
drivers/ufs/core/ufshcd.c | 29 +++++++++++++++--------------
include/ufs/ufshcd.h | 2 ++
2 files changed, 17 insertions(+), 14 deletions(-)
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index bda61be5f035..5c3821b2fcf8 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -1234,12 +1234,14 @@ static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba)
* clock scaling is in progress
*/
ufshcd_scsi_block_requests(hba);
+ mutex_lock(&hba->wb_mutex);
down_write(&hba->clk_scaling_lock);
if (!hba->clk_scaling.is_allowed ||
ufshcd_wait_for_doorbell_clr(hba, DOORBELL_CLR_TOUT_US)) {
ret = -EBUSY;
up_write(&hba->clk_scaling_lock);
+ mutex_unlock(&hba->wb_mutex);
ufshcd_scsi_unblock_requests(hba);
goto out;
}
@@ -1251,12 +1253,16 @@ static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba)
return ret;
}
-static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, bool writelock)
+static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, bool scale_up)
{
- if (writelock)
- up_write(&hba->clk_scaling_lock);
- else
- up_read(&hba->clk_scaling_lock);
+ up_write(&hba->clk_scaling_lock);
+
+ /* Enable Write Booster if we have scaled up else disable it */
+ if (ufshcd_enable_wb_if_scaling_up(hba))
+ ufshcd_wb_toggle(hba, scale_up);
+
+ mutex_unlock(&hba->wb_mutex);
+
ufshcd_scsi_unblock_requests(hba);
ufshcd_release(hba);
}
@@ -1273,7 +1279,6 @@ static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, bool writelock)
static int ufshcd_devfreq_scale(struct ufs_hba *hba, bool scale_up)
{
int ret = 0;
- bool is_writelock = true;
ret = ufshcd_clock_scaling_prepare(hba);
if (ret)
@@ -1302,15 +1307,8 @@ static int ufshcd_devfreq_scale(struct ufs_hba *hba, bool scale_up)
}
}
- /* Enable Write Booster if we have scaled up else disable it */
- if (ufshcd_enable_wb_if_scaling_up(hba)) {
- downgrade_write(&hba->clk_scaling_lock);
- is_writelock = false;
- ufshcd_wb_toggle(hba, scale_up);
- }
-
out_unprepare:
- ufshcd_clock_scaling_unprepare(hba, is_writelock);
+ ufshcd_clock_scaling_unprepare(hba, scale_up);
return ret;
}
@@ -6066,9 +6064,11 @@ static void ufshcd_force_error_recovery(struct ufs_hba *hba)
static void ufshcd_clk_scaling_allow(struct ufs_hba *hba, bool allow)
{
+ mutex_lock(&hba->wb_mutex);
down_write(&hba->clk_scaling_lock);
hba->clk_scaling.is_allowed = allow;
up_write(&hba->clk_scaling_lock);
+ mutex_unlock(&hba->wb_mutex);
}
static void ufshcd_clk_scaling_suspend(struct ufs_hba *hba, bool suspend)
@@ -9793,6 +9793,7 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
/* Initialize mutex for exception event control */
mutex_init(&hba->ee_ctrl_mutex);
+ mutex_init(&hba->wb_mutex);
init_rwsem(&hba->clk_scaling_lock);
ufshcd_init_clk_gating(hba);
diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index 5cf81dff60aa..727084cd79be 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -808,6 +808,7 @@ struct ufs_hba_monitor {
* @urgent_bkops_lvl: keeps track of urgent bkops level for device
* @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level for
* device is known or not.
+ * @wb_mutex: used to serialize devfreq and sysfs write booster toggling
* @clk_scaling_lock: used to serialize device commands and clock scaling
* @desc_size: descriptor sizes reported by device
* @scsi_block_reqs_cnt: reference counting for scsi block requests
@@ -951,6 +952,7 @@ struct ufs_hba {
enum bkops_status urgent_bkops_lvl;
bool is_urgent_bkops_lvl_checked;
+ struct mutex wb_mutex;
struct rw_semaphore clk_scaling_lock;
unsigned char desc_size[QUERY_DESC_IDN_MAX];
atomic_t scsi_block_reqs_cnt;
--
2.37.4
[Public]
Hi,
A patch was introduced into 6.2 that will allow outputting what GPIO is active at a given time for pinctrl-amd with dynamic debugging. This patch is useful particularly for debugging the wakeup source when it says that it's the GPIO controller.
I've asked at least 4 people to apply it on 6.1.y kernels and turn on those statements in /sys/kernel/debug/dynamic_debug/control to debug issues of spurious wakeups which has me realizing it would make sense to just generally being in 6.1.y.
Would you please cherry-pick this commit?
1d66e379731f ("pinctrl: amd: Add dynamic debugging for active GPIOs")
Thanks,
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
544d163d659d ("io_uring: lock overflowing for IOPOLL")
a8cf95f93610 ("io_uring: fix overflow handling regression")
fa18fa2272c7 ("io_uring: inline __io_req_complete_put()")
f9d567c75ec2 ("io_uring: inline __io_req_complete_post()")
52120f0fadcb ("io_uring: add allow_overflow to io_post_aux_cqe")
e6130eba8a84 ("io_uring: add support for passing fixed file descriptors")
253993210bd8 ("io_uring: introduce locking helpers for CQE posting")
305bef988708 ("io_uring: hide eventfd assumptions in eventfd paths")
d9dee4302a7c ("io_uring: remove ->flush_cqes optimisation")
a830ffd28780 ("io_uring: move io_eventfd_signal()")
9046c6415be6 ("io_uring: reshuffle io_uring/io_uring.h")
d142c3ec8d16 ("io_uring: remove extra io_commit_cqring()")
68494a65d0e2 ("io_uring: introduce io_req_cqe_overflow()")
faf88dde060f ("io_uring: don't inline __io_get_cqe()")
d245bca6375b ("io_uring: don't expose io_fill_cqe_aux()")
aa1e90f64ee5 ("io_uring: move small helpers to headers")
aa1e90f64ee5 ("io_uring: move small helpers to headers")
aa1e90f64ee5 ("io_uring: move small helpers to headers")
aa1e90f64ee5 ("io_uring: move small helpers to headers")
aa1e90f64ee5 ("io_uring: move small helpers to headers")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 544d163d659d45a206d8929370d5a2984e546cb7 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Thu, 12 Jan 2023 13:08:56 +0000
Subject: [PATCH] io_uring: lock overflowing for IOPOLL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
syzbot reports an issue with overflow filling for IOPOLL:
WARNING: CPU: 0 PID: 28 at io_uring/io_uring.c:734 io_cqring_event_overflow+0x1c0/0x230 io_uring/io_uring.c:734
CPU: 0 PID: 28 Comm: kworker/u4:1 Not tainted 6.2.0-rc3-syzkaller-16369-g358a161a6a9e #0
Workqueue: events_unbound io_ring_exit_work
Call trace:
io_cqring_event_overflow+0x1c0/0x230 io_uring/io_uring.c:734
io_req_cqe_overflow+0x5c/0x70 io_uring/io_uring.c:773
io_fill_cqe_req io_uring/io_uring.h:168 [inline]
io_do_iopoll+0x474/0x62c io_uring/rw.c:1065
io_iopoll_try_reap_events+0x6c/0x108 io_uring/io_uring.c:1513
io_uring_try_cancel_requests+0x13c/0x258 io_uring/io_uring.c:3056
io_ring_exit_work+0xec/0x390 io_uring/io_uring.c:2869
process_one_work+0x2d8/0x504 kernel/workqueue.c:2289
worker_thread+0x340/0x610 kernel/workqueue.c:2436
kthread+0x12c/0x158 kernel/kthread.c:376
ret_from_fork+0x10/0x20 arch/arm64/kernel/entry.S:863
There is no real problem for normal IOPOLL as flush is also called with
uring_lock taken, but it's getting more complicated for IOPOLL|SQPOLL,
for which __io_cqring_overflow_flush() happens from the CQ waiting path.
Reported-and-tested-by: syzbot+6805087452d72929404e(a)syzkaller.appspotmail.com
Cc: stable(a)vger.kernel.org # 5.10+
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 8227af2e1c0f..9c3ddd46a1ad 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -1062,7 +1062,11 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
continue;
req->cqe.flags = io_put_kbuf(req, 0);
- io_fill_cqe_req(req->ctx, req);
+ if (unlikely(!__io_fill_cqe_req(ctx, req))) {
+ spin_lock(&ctx->completion_lock);
+ io_req_cqe_overflow(req);
+ spin_unlock(&ctx->completion_lock);
+ }
}
if (unlikely(!nr_events))
On Mon, Jan 16, 2023 at 06:12:12AM +0530, mkv22(a)cantab.net wrote:
> Apologies that I am unable to attach more detailed information from dmesg
> or logs as the update was in a production laptop and had to be rolled back
> to 6.1.5 immediately and lost the dmesg output.
Any chance to run 'git bisect' to find the offending change?
thanks,
greg k-h