The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x ff84772fd45d486e4fc78c82e2f70ce5333543e6
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023080753-shortage-helium-c261@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
ff84772fd45d ("exfat: release s_lock before calling dir_emit()")
703e3e9a9cb1 ("exfat_iterate(): don't open-code file_inode(file)")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ff84772fd45d486e4fc78c82e2f70ce5333543e6 Mon Sep 17 00:00:00 2001
From: Sungjong Seo <sj1557.seo(a)samsung.com>
Date: Fri, 14 Jul 2023 17:43:54 +0900
Subject: [PATCH] exfat: release s_lock before calling dir_emit()
There is a potential deadlock reported by syzbot as below:
======================================================
WARNING: possible circular locking dependency detected
6.4.0-next-20230707-syzkaller #0 Not tainted
------------------------------------------------------
syz-executor330/5073 is trying to acquire lock:
ffff8880218527a0 (&mm->mmap_lock){++++}-{3:3}, at: mmap_read_lock_killable include/linux/mmap_lock.h:151 [inline]
ffff8880218527a0 (&mm->mmap_lock){++++}-{3:3}, at: get_mmap_lock_carefully mm/memory.c:5293 [inline]
ffff8880218527a0 (&mm->mmap_lock){++++}-{3:3}, at: lock_mm_and_find_vma+0x369/0x510 mm/memory.c:5344
but task is already holding lock:
ffff888019f760e0 (&sbi->s_lock){+.+.}-{3:3}, at: exfat_iterate+0x117/0xb50 fs/exfat/dir.c:232
which lock already depends on the new lock.
Chain exists of:
&mm->mmap_lock --> mapping.invalidate_lock#3 --> &sbi->s_lock
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(&sbi->s_lock);
lock(mapping.invalidate_lock#3);
lock(&sbi->s_lock);
rlock(&mm->mmap_lock);
Let's try to avoid above potential deadlock condition by moving dir_emit*()
out of sbi->s_lock coverage.
Fixes: ca06197382bd ("exfat: add directory operations")
Cc: stable(a)vger.kernel.org #v5.7+
Reported-by: syzbot+1741a5d9b79989c10bdc(a)syzkaller.appspotmail.com
Link: https://lore.kernel.org/lkml/00000000000078ee7e060066270b@google.com/T/#u
Tested-by: syzbot+1741a5d9b79989c10bdc(a)syzkaller.appspotmail.com
Signed-off-by: Sungjong Seo <sj1557.seo(a)samsung.com>
Signed-off-by: Namjae Jeon <linkinjeon(a)kernel.org>
diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c
index bc48f3329921..598081d0d059 100644
--- a/fs/exfat/dir.c
+++ b/fs/exfat/dir.c
@@ -218,7 +218,10 @@ static void exfat_free_namebuf(struct exfat_dentry_namebuf *nb)
exfat_init_namebuf(nb);
}
-/* skip iterating emit_dots when dir is empty */
+/*
+ * Before calling dir_emit*(), sbi->s_lock should be released
+ * because page fault can occur in dir_emit*().
+ */
#define ITER_POS_FILLED_DOTS (2)
static int exfat_iterate(struct file *file, struct dir_context *ctx)
{
@@ -233,11 +236,10 @@ static int exfat_iterate(struct file *file, struct dir_context *ctx)
int err = 0, fake_offset = 0;
exfat_init_namebuf(nb);
- mutex_lock(&EXFAT_SB(sb)->s_lock);
cpos = ctx->pos;
if (!dir_emit_dots(file, ctx))
- goto unlock;
+ goto out;
if (ctx->pos == ITER_POS_FILLED_DOTS) {
cpos = 0;
@@ -249,16 +251,18 @@ static int exfat_iterate(struct file *file, struct dir_context *ctx)
/* name buffer should be allocated before use */
err = exfat_alloc_namebuf(nb);
if (err)
- goto unlock;
+ goto out;
get_new:
+ mutex_lock(&EXFAT_SB(sb)->s_lock);
+
if (ei->flags == ALLOC_NO_FAT_CHAIN && cpos >= i_size_read(inode))
goto end_of_dir;
err = exfat_readdir(inode, &cpos, &de);
if (err) {
/*
- * At least we tried to read a sector. Move cpos to next sector
- * position (should be aligned).
+ * At least we tried to read a sector.
+ * Move cpos to next sector position (should be aligned).
*/
if (err == -EIO) {
cpos += 1 << (sb->s_blocksize_bits);
@@ -281,16 +285,10 @@ static int exfat_iterate(struct file *file, struct dir_context *ctx)
inum = iunique(sb, EXFAT_ROOT_INO);
}
- /*
- * Before calling dir_emit(), sb_lock should be released.
- * Because page fault can occur in dir_emit() when the size
- * of buffer given from user is larger than one page size.
- */
mutex_unlock(&EXFAT_SB(sb)->s_lock);
if (!dir_emit(ctx, nb->lfn, strlen(nb->lfn), inum,
(de.attr & ATTR_SUBDIR) ? DT_DIR : DT_REG))
- goto out_unlocked;
- mutex_lock(&EXFAT_SB(sb)->s_lock);
+ goto out;
ctx->pos = cpos;
goto get_new;
@@ -298,9 +296,8 @@ static int exfat_iterate(struct file *file, struct dir_context *ctx)
if (!cpos && fake_offset)
cpos = ITER_POS_FILLED_DOTS;
ctx->pos = cpos;
-unlock:
mutex_unlock(&EXFAT_SB(sb)->s_lock);
-out_unlocked:
+out:
/*
* To improve performance, free namebuf after unlock sb_lock.
* If namebuf is not allocated, this function do nothing
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x d42334578eba1390859012ebb91e1e556d51db49
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023080736-valley-grub-bcec@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
d42334578eba ("exfat: check if filename entries exceeds max filename length")
20914ff6dd56 ("exfat: move exfat_entry_set_cache from heap to stack")
a3ff29a95fde ("exfat: support dynamic allocate bh for exfat_entry_set_cache")
c6e2f52e3051 ("exfat: speed up iterate/lookup by fixing start point of traversing cluster chain")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d42334578eba1390859012ebb91e1e556d51db49 Mon Sep 17 00:00:00 2001
From: Namjae Jeon <linkinjeon(a)kernel.org>
Date: Thu, 13 Jul 2023 21:59:37 +0900
Subject: [PATCH] exfat: check if filename entries exceeds max filename length
exfat_extract_uni_name copies characters from a given file name entry into
the 'uniname' variable. This variable is actually defined on the stack of
the exfat_readdir() function. According to the definition of
the 'exfat_uni_name' type, the file name should be limited 255 characters
(+ null teminator space), but the exfat_get_uniname_from_ext_entry()
function can write more characters because there is no check if filename
entries exceeds max filename length. This patch add the check not to copy
filename characters when exceeding max filename length.
Cc: stable(a)vger.kernel.org
Cc: Yuezhang Mo <Yuezhang.Mo(a)sony.com>
Reported-by: Maxim Suhanov <dfirblog(a)gmail.com>
Reviewed-by: Sungjong Seo <sj1557.seo(a)samsung.com>
Signed-off-by: Namjae Jeon <linkinjeon(a)kernel.org>
diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c
index 957574180a5e..bc48f3329921 100644
--- a/fs/exfat/dir.c
+++ b/fs/exfat/dir.c
@@ -34,6 +34,7 @@ static int exfat_get_uniname_from_ext_entry(struct super_block *sb,
{
int i, err;
struct exfat_entry_set_cache es;
+ unsigned int uni_len = 0, len;
err = exfat_get_dentry_set(&es, sb, p_dir, entry, ES_ALL_ENTRIES);
if (err)
@@ -52,7 +53,10 @@ static int exfat_get_uniname_from_ext_entry(struct super_block *sb,
if (exfat_get_entry_type(ep) != TYPE_EXTEND)
break;
- exfat_extract_uni_name(ep, uniname);
+ len = exfat_extract_uni_name(ep, uniname);
+ uni_len += len;
+ if (len != EXFAT_FILE_NAME_LEN || uni_len >= MAX_NAME_LENGTH)
+ break;
uniname += EXFAT_FILE_NAME_LEN;
}
@@ -1079,7 +1083,8 @@ int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei,
if (entry_type == TYPE_EXTEND) {
unsigned short entry_uniname[16], unichar;
- if (step != DIRENT_STEP_NAME) {
+ if (step != DIRENT_STEP_NAME ||
+ name_len >= MAX_NAME_LENGTH) {
step = DIRENT_STEP_FILE;
continue;
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x d42334578eba1390859012ebb91e1e556d51db49
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023080735-headsman-grandpa-be68@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
d42334578eba ("exfat: check if filename entries exceeds max filename length")
20914ff6dd56 ("exfat: move exfat_entry_set_cache from heap to stack")
a3ff29a95fde ("exfat: support dynamic allocate bh for exfat_entry_set_cache")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d42334578eba1390859012ebb91e1e556d51db49 Mon Sep 17 00:00:00 2001
From: Namjae Jeon <linkinjeon(a)kernel.org>
Date: Thu, 13 Jul 2023 21:59:37 +0900
Subject: [PATCH] exfat: check if filename entries exceeds max filename length
exfat_extract_uni_name copies characters from a given file name entry into
the 'uniname' variable. This variable is actually defined on the stack of
the exfat_readdir() function. According to the definition of
the 'exfat_uni_name' type, the file name should be limited 255 characters
(+ null teminator space), but the exfat_get_uniname_from_ext_entry()
function can write more characters because there is no check if filename
entries exceeds max filename length. This patch add the check not to copy
filename characters when exceeding max filename length.
Cc: stable(a)vger.kernel.org
Cc: Yuezhang Mo <Yuezhang.Mo(a)sony.com>
Reported-by: Maxim Suhanov <dfirblog(a)gmail.com>
Reviewed-by: Sungjong Seo <sj1557.seo(a)samsung.com>
Signed-off-by: Namjae Jeon <linkinjeon(a)kernel.org>
diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c
index 957574180a5e..bc48f3329921 100644
--- a/fs/exfat/dir.c
+++ b/fs/exfat/dir.c
@@ -34,6 +34,7 @@ static int exfat_get_uniname_from_ext_entry(struct super_block *sb,
{
int i, err;
struct exfat_entry_set_cache es;
+ unsigned int uni_len = 0, len;
err = exfat_get_dentry_set(&es, sb, p_dir, entry, ES_ALL_ENTRIES);
if (err)
@@ -52,7 +53,10 @@ static int exfat_get_uniname_from_ext_entry(struct super_block *sb,
if (exfat_get_entry_type(ep) != TYPE_EXTEND)
break;
- exfat_extract_uni_name(ep, uniname);
+ len = exfat_extract_uni_name(ep, uniname);
+ uni_len += len;
+ if (len != EXFAT_FILE_NAME_LEN || uni_len >= MAX_NAME_LENGTH)
+ break;
uniname += EXFAT_FILE_NAME_LEN;
}
@@ -1079,7 +1083,8 @@ int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei,
if (entry_type == TYPE_EXTEND) {
unsigned short entry_uniname[16], unichar;
- if (step != DIRENT_STEP_NAME) {
+ if (step != DIRENT_STEP_NAME ||
+ name_len >= MAX_NAME_LENGTH) {
step = DIRENT_STEP_FILE;
continue;
}
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x d42334578eba1390859012ebb91e1e556d51db49
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023080734-remarry-tamer-aabe@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
d42334578eba ("exfat: check if filename entries exceeds max filename length")
20914ff6dd56 ("exfat: move exfat_entry_set_cache from heap to stack")
a3ff29a95fde ("exfat: support dynamic allocate bh for exfat_entry_set_cache")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d42334578eba1390859012ebb91e1e556d51db49 Mon Sep 17 00:00:00 2001
From: Namjae Jeon <linkinjeon(a)kernel.org>
Date: Thu, 13 Jul 2023 21:59:37 +0900
Subject: [PATCH] exfat: check if filename entries exceeds max filename length
exfat_extract_uni_name copies characters from a given file name entry into
the 'uniname' variable. This variable is actually defined on the stack of
the exfat_readdir() function. According to the definition of
the 'exfat_uni_name' type, the file name should be limited 255 characters
(+ null teminator space), but the exfat_get_uniname_from_ext_entry()
function can write more characters because there is no check if filename
entries exceeds max filename length. This patch add the check not to copy
filename characters when exceeding max filename length.
Cc: stable(a)vger.kernel.org
Cc: Yuezhang Mo <Yuezhang.Mo(a)sony.com>
Reported-by: Maxim Suhanov <dfirblog(a)gmail.com>
Reviewed-by: Sungjong Seo <sj1557.seo(a)samsung.com>
Signed-off-by: Namjae Jeon <linkinjeon(a)kernel.org>
diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c
index 957574180a5e..bc48f3329921 100644
--- a/fs/exfat/dir.c
+++ b/fs/exfat/dir.c
@@ -34,6 +34,7 @@ static int exfat_get_uniname_from_ext_entry(struct super_block *sb,
{
int i, err;
struct exfat_entry_set_cache es;
+ unsigned int uni_len = 0, len;
err = exfat_get_dentry_set(&es, sb, p_dir, entry, ES_ALL_ENTRIES);
if (err)
@@ -52,7 +53,10 @@ static int exfat_get_uniname_from_ext_entry(struct super_block *sb,
if (exfat_get_entry_type(ep) != TYPE_EXTEND)
break;
- exfat_extract_uni_name(ep, uniname);
+ len = exfat_extract_uni_name(ep, uniname);
+ uni_len += len;
+ if (len != EXFAT_FILE_NAME_LEN || uni_len >= MAX_NAME_LENGTH)
+ break;
uniname += EXFAT_FILE_NAME_LEN;
}
@@ -1079,7 +1083,8 @@ int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei,
if (entry_type == TYPE_EXTEND) {
unsigned short entry_uniname[16], unichar;
- if (step != DIRENT_STEP_NAME) {
+ if (step != DIRENT_STEP_NAME ||
+ name_len >= MAX_NAME_LENGTH) {
step = DIRENT_STEP_FILE;
continue;
}
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x e7e607bd00481745550389a29ecabe33e13d67cf
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023080735-sprang-moonlit-32f2@gregkh' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
e7e607bd0048 ("ceph: defer stopping mdsc delayed_work")
470a5c77eac0 ("ceph: use kill_anon_super helper")
fa9967734227 ("ceph: fix potential mdsc use-after-free crash")
3a3430affce5 ("ceph: show tasks waiting on caps in debugfs caps file")
7b2f936fc828 ("ceph: fix error handling in ceph_get_caps()")
1199d7da2d29 ("ceph: simplify arguments and return semantics of try_get_cap_refs")
ff4a80bf2d3f ("ceph: dump granular cap info in "caps" debugfs file")
0c44a8e0fc55 ("ceph: quota: fix quota subdir mounts")
2ee9dd958d47 ("ceph: add non-blocking parameter to ceph_try_get_caps()")
a57d9064e4ee ("ceph: flush pending works before shutdown super")
9122eed5281e ("ceph: quota: report root dir quota usage in statfs")
d557c48db730 ("ceph: quota: add counter for snaprealms with quota")
e3161f17d926 ("ceph: quota: cache inode pointer in ceph_snap_realm")
0eb6bbe4d9cf ("ceph: fix root quota realm check")
2596366907f8 ("ceph: don't check quota for snap inode")
1ab302a0cb14 ("ceph: quota: update MDS when max_bytes is approaching")
2b83845f8bd7 ("ceph: quota: support for ceph.quota.max_bytes")
cafe21a4fb30 ("ceph: quota: don't allow cross-quota renames")
b7a2921765cf ("ceph: quota: support for ceph.quota.max_files")
fb18a57568c2 ("ceph: quota: add initial infrastructure to support cephfs quotas")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e7e607bd00481745550389a29ecabe33e13d67cf Mon Sep 17 00:00:00 2001
From: Xiubo Li <xiubli(a)redhat.com>
Date: Tue, 25 Jul 2023 12:03:59 +0800
Subject: [PATCH] ceph: defer stopping mdsc delayed_work
Flushing the dirty buffer may take a long time if the cluster is
overloaded or if there is network issue. So we should ping the
MDSs periodically to keep alive, else the MDS will blocklist
the kclient.
Cc: stable(a)vger.kernel.org
Link: https://tracker.ceph.com/issues/61843
Signed-off-by: Xiubo Li <xiubli(a)redhat.com>
Reviewed-by: Milind Changire <mchangir(a)redhat.com>
Signed-off-by: Ilya Dryomov <idryomov(a)gmail.com>
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 66048a86c480..5fb367b1d4b0 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4764,7 +4764,7 @@ static void delayed_work(struct work_struct *work)
dout("mdsc delayed_work\n");
- if (mdsc->stopping)
+ if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED)
return;
mutex_lock(&mdsc->mutex);
@@ -4943,7 +4943,7 @@ void send_flush_mdlog(struct ceph_mds_session *s)
void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
{
dout("pre_umount\n");
- mdsc->stopping = 1;
+ mdsc->stopping = CEPH_MDSC_STOPPING_BEGIN;
ceph_mdsc_iterate_sessions(mdsc, send_flush_mdlog, true);
ceph_mdsc_iterate_sessions(mdsc, lock_unlock_session, false);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 724307ff89cd..86d2965e68a1 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -380,6 +380,11 @@ struct cap_wait {
int want;
};
+enum {
+ CEPH_MDSC_STOPPING_BEGIN = 1,
+ CEPH_MDSC_STOPPING_FLUSHED = 2,
+};
+
/*
* mds client state
*/
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 3fc48b43cab0..a5f52013314d 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -1374,6 +1374,16 @@ static void ceph_kill_sb(struct super_block *s)
ceph_mdsc_pre_umount(fsc->mdsc);
flush_fs_workqueues(fsc);
+ /*
+ * Though the kill_anon_super() will finally trigger the
+ * sync_filesystem() anyway, we still need to do it here
+ * and then bump the stage of shutdown to stop the work
+ * queue as earlier as possible.
+ */
+ sync_filesystem(s);
+
+ fsc->mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
+
kill_anon_super(s);
fsc->client->extra_mon_dispatch = NULL;
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x e7e607bd00481745550389a29ecabe33e13d67cf
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023080734-unenvied-relocate-bccd@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
e7e607bd0048 ("ceph: defer stopping mdsc delayed_work")
470a5c77eac0 ("ceph: use kill_anon_super helper")
fa9967734227 ("ceph: fix potential mdsc use-after-free crash")
3a3430affce5 ("ceph: show tasks waiting on caps in debugfs caps file")
7b2f936fc828 ("ceph: fix error handling in ceph_get_caps()")
1199d7da2d29 ("ceph: simplify arguments and return semantics of try_get_cap_refs")
ff4a80bf2d3f ("ceph: dump granular cap info in "caps" debugfs file")
0c44a8e0fc55 ("ceph: quota: fix quota subdir mounts")
2ee9dd958d47 ("ceph: add non-blocking parameter to ceph_try_get_caps()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e7e607bd00481745550389a29ecabe33e13d67cf Mon Sep 17 00:00:00 2001
From: Xiubo Li <xiubli(a)redhat.com>
Date: Tue, 25 Jul 2023 12:03:59 +0800
Subject: [PATCH] ceph: defer stopping mdsc delayed_work
Flushing the dirty buffer may take a long time if the cluster is
overloaded or if there is network issue. So we should ping the
MDSs periodically to keep alive, else the MDS will blocklist
the kclient.
Cc: stable(a)vger.kernel.org
Link: https://tracker.ceph.com/issues/61843
Signed-off-by: Xiubo Li <xiubli(a)redhat.com>
Reviewed-by: Milind Changire <mchangir(a)redhat.com>
Signed-off-by: Ilya Dryomov <idryomov(a)gmail.com>
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 66048a86c480..5fb367b1d4b0 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4764,7 +4764,7 @@ static void delayed_work(struct work_struct *work)
dout("mdsc delayed_work\n");
- if (mdsc->stopping)
+ if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED)
return;
mutex_lock(&mdsc->mutex);
@@ -4943,7 +4943,7 @@ void send_flush_mdlog(struct ceph_mds_session *s)
void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
{
dout("pre_umount\n");
- mdsc->stopping = 1;
+ mdsc->stopping = CEPH_MDSC_STOPPING_BEGIN;
ceph_mdsc_iterate_sessions(mdsc, send_flush_mdlog, true);
ceph_mdsc_iterate_sessions(mdsc, lock_unlock_session, false);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 724307ff89cd..86d2965e68a1 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -380,6 +380,11 @@ struct cap_wait {
int want;
};
+enum {
+ CEPH_MDSC_STOPPING_BEGIN = 1,
+ CEPH_MDSC_STOPPING_FLUSHED = 2,
+};
+
/*
* mds client state
*/
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 3fc48b43cab0..a5f52013314d 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -1374,6 +1374,16 @@ static void ceph_kill_sb(struct super_block *s)
ceph_mdsc_pre_umount(fsc->mdsc);
flush_fs_workqueues(fsc);
+ /*
+ * Though the kill_anon_super() will finally trigger the
+ * sync_filesystem() anyway, we still need to do it here
+ * and then bump the stage of shutdown to stop the work
+ * queue as earlier as possible.
+ */
+ sync_filesystem(s);
+
+ fsc->mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
+
kill_anon_super(s);
fsc->client->extra_mon_dispatch = NULL;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x e7e607bd00481745550389a29ecabe33e13d67cf
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023080733-snowbound-wagon-56bb@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
e7e607bd0048 ("ceph: defer stopping mdsc delayed_work")
470a5c77eac0 ("ceph: use kill_anon_super helper")
fa9967734227 ("ceph: fix potential mdsc use-after-free crash")
3a3430affce5 ("ceph: show tasks waiting on caps in debugfs caps file")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e7e607bd00481745550389a29ecabe33e13d67cf Mon Sep 17 00:00:00 2001
From: Xiubo Li <xiubli(a)redhat.com>
Date: Tue, 25 Jul 2023 12:03:59 +0800
Subject: [PATCH] ceph: defer stopping mdsc delayed_work
Flushing the dirty buffer may take a long time if the cluster is
overloaded or if there is network issue. So we should ping the
MDSs periodically to keep alive, else the MDS will blocklist
the kclient.
Cc: stable(a)vger.kernel.org
Link: https://tracker.ceph.com/issues/61843
Signed-off-by: Xiubo Li <xiubli(a)redhat.com>
Reviewed-by: Milind Changire <mchangir(a)redhat.com>
Signed-off-by: Ilya Dryomov <idryomov(a)gmail.com>
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 66048a86c480..5fb367b1d4b0 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4764,7 +4764,7 @@ static void delayed_work(struct work_struct *work)
dout("mdsc delayed_work\n");
- if (mdsc->stopping)
+ if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED)
return;
mutex_lock(&mdsc->mutex);
@@ -4943,7 +4943,7 @@ void send_flush_mdlog(struct ceph_mds_session *s)
void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
{
dout("pre_umount\n");
- mdsc->stopping = 1;
+ mdsc->stopping = CEPH_MDSC_STOPPING_BEGIN;
ceph_mdsc_iterate_sessions(mdsc, send_flush_mdlog, true);
ceph_mdsc_iterate_sessions(mdsc, lock_unlock_session, false);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 724307ff89cd..86d2965e68a1 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -380,6 +380,11 @@ struct cap_wait {
int want;
};
+enum {
+ CEPH_MDSC_STOPPING_BEGIN = 1,
+ CEPH_MDSC_STOPPING_FLUSHED = 2,
+};
+
/*
* mds client state
*/
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 3fc48b43cab0..a5f52013314d 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -1374,6 +1374,16 @@ static void ceph_kill_sb(struct super_block *s)
ceph_mdsc_pre_umount(fsc->mdsc);
flush_fs_workqueues(fsc);
+ /*
+ * Though the kill_anon_super() will finally trigger the
+ * sync_filesystem() anyway, we still need to do it here
+ * and then bump the stage of shutdown to stop the work
+ * queue as earlier as possible.
+ */
+ sync_filesystem(s);
+
+ fsc->mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
+
kill_anon_super(s);
fsc->client->extra_mon_dispatch = NULL;
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x e65851989001c0c9ba9177564b13b38201c0854c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023080717-repair-pessimism-cb11@gregkh' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
e65851989001 ("scsi: zfcp: Defer fc_rport blocking until after ADISC response")
8c9db6679be4 ("scsi: zfcp: Fix failed recovery on gone remote port with non-NPIV FCP devices")
5c750d58e9d7 ("scsi: zfcp: workqueue: set description for port work items with their WWPN as context")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e65851989001c0c9ba9177564b13b38201c0854c Mon Sep 17 00:00:00 2001
From: Steffen Maier <maier(a)linux.ibm.com>
Date: Mon, 24 Jul 2023 16:51:56 +0200
Subject: [PATCH] scsi: zfcp: Defer fc_rport blocking until after ADISC
response
Storage devices are free to send RSCNs, e.g. for internal state changes. If
this happens on all connected paths, zfcp risks temporarily losing all
paths at the same time. This has strong requirements on multipath
configuration such as "no_path_retry queue".
Avoid such situations by deferring fc_rport blocking until after the ADISC
response, when any actual state change of the remote port became clear.
The already existing port recovery triggers explicitly block the fc_rport.
The triggers are: on ADISC reject or timeout (typical cable pull case), and
on ADISC indicating that the remote port has changed its WWPN or
the port is meanwhile no longer open.
As a side effect, this also removes a confusing direct function call to
another work item function zfcp_scsi_rport_work() instead of scheduling
that other work item. It was probably done that way to have the rport block
side effect immediate and synchronous to the caller.
Fixes: a2fa0aede07c ("[SCSI] zfcp: Block FC transport rports early on errors")
Cc: stable(a)vger.kernel.org #v2.6.30+
Reviewed-by: Benjamin Block <bblock(a)linux.ibm.com>
Reviewed-by: Fedor Loshakov <loshakov(a)linux.ibm.com>
Signed-off-by: Steffen Maier <maier(a)linux.ibm.com>
Link: https://lore.kernel.org/r/20230724145156.3920244-1-maier@linux.ibm.com
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c
index f21307537829..4f0d0e55f0d4 100644
--- a/drivers/s390/scsi/zfcp_fc.c
+++ b/drivers/s390/scsi/zfcp_fc.c
@@ -534,8 +534,7 @@ static void zfcp_fc_adisc_handler(void *data)
/* re-init to undo drop from zfcp_fc_adisc() */
port->d_id = ntoh24(adisc_resp->adisc_port_id);
- /* port is good, unblock rport without going through erp */
- zfcp_scsi_schedule_rport_register(port);
+ /* port is still good, nothing to do */
out:
atomic_andnot(ZFCP_STATUS_PORT_LINK_TEST, &port->status);
put_device(&port->dev);
@@ -595,9 +594,6 @@ void zfcp_fc_link_test_work(struct work_struct *work)
int retval;
set_worker_desc("zadisc%16llx", port->wwpn); /* < WORKER_DESC_LEN=24 */
- get_device(&port->dev);
- port->rport_task = RPORT_DEL;
- zfcp_scsi_rport_work(&port->rport_work);
/* only issue one test command at one time per port */
if (atomic_read(&port->status) & ZFCP_STATUS_PORT_LINK_TEST)