[PATCH] md/raid10: fix deadlock when handle read error and running data-check at same time

7 Apr 2023

when running data-check and ecounter a normal IO errror, raid10d handle
the error, one resync IO added into conf->retry_list waiting for
raid10d to handle it, so barrier will not drop to zero and the normal
IO(read error) will stuck in wait_barrier in raid10_read_request.
after this, resyc thread will stuck in raise_barrier, other process
will stuck in wait_barrier. Ignore barrier for read error retry in
raid10_read_request to avoid deadlock. for kernel linux-4.19.y
processA      md0_raid10          md0_resync               processB
-------------------------------------------------------------------------
        |         |                     |                      |
read io error     |                     |                      |
        |   handle_read_error     raise_barrier                |
        |         |               (nr_pending=1,barrier=1)     |
                  |                     |                 wait_barrier
                  |                     |       (nr_waiting=1,barrier=1)
           allow_barrier                |                      |
          (nr_pending=0)                |                      |
                  |                     |                      
                  |                conf->retry_list
                  |                     |
                  |                     |
            wait_barrier
          (nr_waiting=2,barrier=1)
[ 1452.065519] INFO: task md0_raid10:381 blocked for more than 120 seconds.
[ 1452.065852]       Tainted: G           OE K   4.19.280 #2
[ 1452.066018] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 1452.066189] md0_raid10      D    0   381      2 0x80000000
[ 1452.066191] Call Trace:
[ 1452.066197]  __schedule+0x3f8/0x8b0
[ 1452.066199]  schedule+0x36/0x80
[ 1452.066201]  wait_barrier+0x150/0x1b0
[ 1452.066203]  ? wait_woken+0x80/0x80
[ 1452.066205]  raid10_read_request+0xa8/0x510
[ 1452.066206]  handle_read_error+0xa9/0x220
[ 1452.066207]  ? pick_next_task_fair+0x15d/0x610
[ 1452.066208]  raid10d+0xa01/0x1510
[ 1452.066210]  ? schedule+0x36/0x80
[ 1452.066211]  md_thread+0x133/0x180
[ 1452.066212]  ? md_thread+0x133/0x180
[ 1452.066213]  ? wait_woken+0x80/0x80
[ 1452.066214]  kthread+0x105/0x140
Signed-off-by: linminggui linminggui1@bigo.sg
---
 drivers/md/raid10.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 9f9cd2f..9f00400 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1137,6 +1137,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
    int slot = r10_bio->read_slot;
    struct md_rdev *err_rdev = NULL;
    gfp_t gfp = GFP_NOIO;
+	bool error_retry = false;
if (slot >= 0 && r10_bio->devs[slot].rdev) {
    	/*
@@ -1153,6 +1154,9 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
    	 */
    	gfp = GFP_NOIO | __GFP_HIGH;
+		error_retry = true;
+		atomic_inc(&conf->nr_pending);
+
    	rcu_read_lock();
    	disk = r10_bio->devs[slot].devnum;
    	err_rdev = rcu_dereference(conf->mirrors[disk].rdev);
@@ -1169,8 +1173,10 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
     * Register the new request and wait if the reconstruction
     * thread has put up a bar for new requests.
     * Continue immediately if no resync is active currently.
+	 * Ignore barrier if this is an error retry.
     */
-	wait_barrier(conf);
+	if (!error_retry)
+		wait_barrier(conf);
sectors = r10_bio->sectors;
    while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
@@ -1181,12 +1187,14 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
    	 * pass
    	 */
    	raid10_log(conf->mddev, "wait reshape");
-		allow_barrier(conf);
+		if (!error_retry)
+			allow_barrier(conf);
    	wait_event(conf->wait_barrier,
    		   conf->reshape_progress <= bio->bi_iter.bi_sector ||
    		   conf->reshape_progress >= bio->bi_iter.bi_sector +
    		   sectors);
-		wait_barrier(conf);
+		if (!error_retry)
+			wait_barrier(conf);
    }
rdev = read_balance(conf, r10_bio, &max_sectors);
@@ -1208,9 +1216,11 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
    	struct bio *split = bio_split(bio, max_sectors,
    				      gfp, &conf->bio_split);
    	bio_chain(split, bio);
-		allow_barrier(conf);
+		if (!error_retry)
+			allow_barrier(conf);
    	generic_make_request(bio);
-		wait_barrier(conf);
+		if (!error_retry)
+			wait_barrier(conf);
    	bio = split;
    	r10_bio->master_bio = bio;
    	r10_bio->sectors = max_sectors;
-- 
2.7.4


    

2025

2024

2023

2022

2021

2020

2019

2018

2017

[PATCH] md/raid10: fix deadlock when handle read error and running data-check at same time