From: Xiubo Li xiubli@redhat.com
If a client sends out a cap update dropping caps with the prior 'seq' just before an incoming cap revoke request, then the client may drop the revoke because it believes it's already released the requested capabilities.
This causes the MDS to wait indefinitely for the client to respond to the revoke. It's therefore always a good idea to ack the cap revoke request with the bumped up 'seq'.
Currently if the cap->issued equals to the newcaps the check_caps() will do nothing, we should force flush the caps.
Cc: stable@vger.kernel.org Link: https://tracker.ceph.com/issues/61782 Signed-off-by: Xiubo Li xiubli@redhat.com --- fs/ceph/caps.c | 16 ++++++++++++---- fs/ceph/super.h | 7 ++++--- 2 files changed, 16 insertions(+), 7 deletions(-)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 24c31f795938..ba5809cf8f02 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2024,6 +2024,8 @@ bool __ceph_should_report_size(struct ceph_inode_info *ci) * CHECK_CAPS_AUTHONLY - we should only check the auth cap * CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without * further delay. + * CHECK_CAPS_FLUSH_FORCE - we should flush any caps immediately, without + * further delay. */ void ceph_check_caps(struct ceph_inode_info *ci, int flags) { @@ -2105,7 +2107,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) }
doutc(cl, "%p %llx.%llx file_want %s used %s dirty %s " - "flushing %s issued %s revoking %s retain %s %s%s%s\n", + "flushing %s issued %s revoking %s retain %s %s%s%s%s\n", inode, ceph_vinop(inode), ceph_cap_string(file_wanted), ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps), ceph_cap_string(ci->i_flushing_caps), @@ -2113,7 +2115,8 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) ceph_cap_string(retain), (flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "", (flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "", - (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : ""); + (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "", + (flags & CHECK_CAPS_FLUSH_FORCE) ? " FLUSH_FORCE" : "");
/* * If we no longer need to hold onto old our caps, and we may @@ -2223,6 +2226,9 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) goto ack; }
+ if (flags & CHECK_CAPS_FLUSH_FORCE) + goto ack; + /* things we might delay */ if ((cap->issued & ~retain) == 0) continue; /* nope, all good */ @@ -3518,6 +3524,7 @@ static void handle_cap_grant(struct inode *inode, bool queue_invalidate = false; bool deleted_inode = false; bool fill_inline = false; + int flags = 0;
/* * If there is at least one crypto block then we'll trust @@ -3751,6 +3758,7 @@ static void handle_cap_grant(struct inode *inode, /* don't let check_caps skip sending a response to MDS for revoke msgs */ if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) { cap->mds_wanted = 0; + flags |= CHECK_CAPS_FLUSH_FORCE; if (cap == ci->i_auth_cap) check_caps = 1; /* check auth cap only */ else @@ -3806,9 +3814,9 @@ static void handle_cap_grant(struct inode *inode,
mutex_unlock(&session->s_mutex); if (check_caps == 1) - ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL); + ceph_check_caps(ci, flags | CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL); else if (check_caps == 2) - ceph_check_caps(ci, CHECK_CAPS_NOINVAL); + ceph_check_caps(ci, flags | CHECK_CAPS_NOINVAL); }
/* diff --git a/fs/ceph/super.h b/fs/ceph/super.h index b0b368ed3018..831e8ec4d5da 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -200,9 +200,10 @@ struct ceph_cap { struct list_head caps_item; };
-#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */ -#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */ -#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */ +#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */ +#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */ +#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */ +#define CHECK_CAPS_FLUSH_FORCE 8 /* force flush any caps */
struct ceph_cap_flush { u64 tid;
Updated this patch and sent out the V2 to improve it.
On 7/11/24 18:40, xiubli@redhat.com wrote:
From: Xiubo Li xiubli@redhat.com
If a client sends out a cap update dropping caps with the prior 'seq' just before an incoming cap revoke request, then the client may drop the revoke because it believes it's already released the requested capabilities.
This causes the MDS to wait indefinitely for the client to respond to the revoke. It's therefore always a good idea to ack the cap revoke request with the bumped up 'seq'.
Currently if the cap->issued equals to the newcaps the check_caps() will do nothing, we should force flush the caps.
Cc: stable@vger.kernel.org Link: https://tracker.ceph.com/issues/61782 Signed-off-by: Xiubo Li xiubli@redhat.com
fs/ceph/caps.c | 16 ++++++++++++---- fs/ceph/super.h | 7 ++++--- 2 files changed, 16 insertions(+), 7 deletions(-)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 24c31f795938..ba5809cf8f02 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2024,6 +2024,8 @@ bool __ceph_should_report_size(struct ceph_inode_info *ci)
- CHECK_CAPS_AUTHONLY - we should only check the auth cap
- CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without
- further delay.
- CHECK_CAPS_FLUSH_FORCE - we should flush any caps immediately, without
*/ void ceph_check_caps(struct ceph_inode_info *ci, int flags) {
- further delay.
@@ -2105,7 +2107,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) } doutc(cl, "%p %llx.%llx file_want %s used %s dirty %s "
"flushing %s issued %s revoking %s retain %s %s%s%s\n",
"flushing %s issued %s revoking %s retain %s %s%s%s%s\n", inode, ceph_vinop(inode), ceph_cap_string(file_wanted), ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps), ceph_cap_string(ci->i_flushing_caps),
@@ -2113,7 +2115,8 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) ceph_cap_string(retain), (flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "", (flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "",
(flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "");
(flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "",
(flags & CHECK_CAPS_FLUSH_FORCE) ? " FLUSH_FORCE" : "");
/* * If we no longer need to hold onto old our caps, and we may @@ -2223,6 +2226,9 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) goto ack; }
if (flags & CHECK_CAPS_FLUSH_FORCE)
goto ack;
- /* things we might delay */ if ((cap->issued & ~retain) == 0) continue; /* nope, all good */
@@ -3518,6 +3524,7 @@ static void handle_cap_grant(struct inode *inode, bool queue_invalidate = false; bool deleted_inode = false; bool fill_inline = false;
- int flags = 0;
/* * If there is at least one crypto block then we'll trust @@ -3751,6 +3758,7 @@ static void handle_cap_grant(struct inode *inode, /* don't let check_caps skip sending a response to MDS for revoke msgs */ if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) { cap->mds_wanted = 0;
if (cap == ci->i_auth_cap) check_caps = 1; /* check auth cap only */ elseflags |= CHECK_CAPS_FLUSH_FORCE;
@@ -3806,9 +3814,9 @@ static void handle_cap_grant(struct inode *inode, mutex_unlock(&session->s_mutex); if (check_caps == 1)
ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL);
else if (check_caps == 2)ceph_check_caps(ci, flags | CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL);
ceph_check_caps(ci, CHECK_CAPS_NOINVAL);
}ceph_check_caps(ci, flags | CHECK_CAPS_NOINVAL);
/* diff --git a/fs/ceph/super.h b/fs/ceph/super.h index b0b368ed3018..831e8ec4d5da 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -200,9 +200,10 @@ struct ceph_cap { struct list_head caps_item; }; -#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */ -#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */ -#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */ +#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */ +#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */ +#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */ +#define CHECK_CAPS_FLUSH_FORCE 8 /* force flush any caps */ struct ceph_cap_flush { u64 tid;
Hi Xiubo,
On Thu, Jul 11, 2024 at 4:10 PM xiubli@redhat.com wrote:
From: Xiubo Li xiubli@redhat.com
If a client sends out a cap update dropping caps with the prior 'seq' just before an incoming cap revoke request, then the client may drop the revoke because it believes it's already released the requested capabilities.
This causes the MDS to wait indefinitely for the client to respond to the revoke. It's therefore always a good idea to ack the cap revoke request with the bumped up 'seq'.
Currently if the cap->issued equals to the newcaps the check_caps() will do nothing, we should force flush the caps.
Cc: stable@vger.kernel.org Link: https://tracker.ceph.com/issues/61782 Signed-off-by: Xiubo Li xiubli@redhat.com
fs/ceph/caps.c | 16 ++++++++++++---- fs/ceph/super.h | 7 ++++--- 2 files changed, 16 insertions(+), 7 deletions(-)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 24c31f795938..ba5809cf8f02 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2024,6 +2024,8 @@ bool __ceph_should_report_size(struct ceph_inode_info *ci)
- CHECK_CAPS_AUTHONLY - we should only check the auth cap
- CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without
- further delay.
- CHECK_CAPS_FLUSH_FORCE - we should flush any caps immediately, without
*/
- further delay.
void ceph_check_caps(struct ceph_inode_info *ci, int flags) { @@ -2105,7 +2107,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) }
doutc(cl, "%p %llx.%llx file_want %s used %s dirty %s "
"flushing %s issued %s revoking %s retain %s %s%s%s\n",
"flushing %s issued %s revoking %s retain %s %s%s%s%s\n", inode, ceph_vinop(inode), ceph_cap_string(file_wanted), ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps), ceph_cap_string(ci->i_flushing_caps),
@@ -2113,7 +2115,8 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) ceph_cap_string(retain), (flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "", (flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "",
(flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "");
(flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "",
(flags & CHECK_CAPS_FLUSH_FORCE) ? " FLUSH_FORCE" : ""); /* * If we no longer need to hold onto old our caps, and we may
@@ -2223,6 +2226,9 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) goto ack; }
if (flags & CHECK_CAPS_FLUSH_FORCE)
goto ack;
Maybe check this early on inside the
for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
}
loop?
/* things we might delay */ if ((cap->issued & ~retain) == 0) continue; /* nope, all good */
@@ -3518,6 +3524,7 @@ static void handle_cap_grant(struct inode *inode, bool queue_invalidate = false; bool deleted_inode = false; bool fill_inline = false;
int flags = 0; /* * If there is at least one crypto block then we'll trust
@@ -3751,6 +3758,7 @@ static void handle_cap_grant(struct inode *inode, /* don't let check_caps skip sending a response to MDS for revoke msgs */ if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) { cap->mds_wanted = 0;
flags |= CHECK_CAPS_FLUSH_FORCE; if (cap == ci->i_auth_cap) check_caps = 1; /* check auth cap only */ else
@@ -3806,9 +3814,9 @@ static void handle_cap_grant(struct inode *inode,
mutex_unlock(&session->s_mutex); if (check_caps == 1)
ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL);
ceph_check_caps(ci, flags | CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL); else if (check_caps == 2)
ceph_check_caps(ci, CHECK_CAPS_NOINVAL);
ceph_check_caps(ci, flags | CHECK_CAPS_NOINVAL);
}
/* diff --git a/fs/ceph/super.h b/fs/ceph/super.h index b0b368ed3018..831e8ec4d5da 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -200,9 +200,10 @@ struct ceph_cap { struct list_head caps_item; };
-#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */ -#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */ -#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */ +#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */ +#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */ +#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */ +#define CHECK_CAPS_FLUSH_FORCE 8 /* force flush any caps */
struct ceph_cap_flush { u64 tid; -- 2.45.1
On 7/16/24 16:00, Venky Shankar wrote:
Hi Xiubo,
On Thu, Jul 11, 2024 at 4:10 PM xiubli@redhat.com wrote:
From: Xiubo Li xiubli@redhat.com
If a client sends out a cap update dropping caps with the prior 'seq' just before an incoming cap revoke request, then the client may drop the revoke because it believes it's already released the requested capabilities.
This causes the MDS to wait indefinitely for the client to respond to the revoke. It's therefore always a good idea to ack the cap revoke request with the bumped up 'seq'.
Currently if the cap->issued equals to the newcaps the check_caps() will do nothing, we should force flush the caps.
Cc: stable@vger.kernel.org Link: https://tracker.ceph.com/issues/61782 Signed-off-by: Xiubo Li xiubli@redhat.com
fs/ceph/caps.c | 16 ++++++++++++---- fs/ceph/super.h | 7 ++++--- 2 files changed, 16 insertions(+), 7 deletions(-)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 24c31f795938..ba5809cf8f02 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2024,6 +2024,8 @@ bool __ceph_should_report_size(struct ceph_inode_info *ci)
- CHECK_CAPS_AUTHONLY - we should only check the auth cap
- CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without
- further delay.
- CHECK_CAPS_FLUSH_FORCE - we should flush any caps immediately, without
*/ void ceph_check_caps(struct ceph_inode_info *ci, int flags) {
- further delay.
@@ -2105,7 +2107,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) }
doutc(cl, "%p %llx.%llx file_want %s used %s dirty %s "
"flushing %s issued %s revoking %s retain %s %s%s%s\n",
"flushing %s issued %s revoking %s retain %s %s%s%s%s\n", inode, ceph_vinop(inode), ceph_cap_string(file_wanted), ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps), ceph_cap_string(ci->i_flushing_caps),
@@ -2113,7 +2115,8 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) ceph_cap_string(retain), (flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "", (flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "",
(flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "");
(flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "",
(flags & CHECK_CAPS_FLUSH_FORCE) ? " FLUSH_FORCE" : ""); /* * If we no longer need to hold onto old our caps, and we may
@@ -2223,6 +2226,9 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) goto ack; }
if (flags & CHECK_CAPS_FLUSH_FORCE)
goto ack;
Maybe check this early on inside the
for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { }
loop?
Yeah, we can do this ealier, let me improved it.
Thanks
- Xiubo
/* things we might delay */ if ((cap->issued & ~retain) == 0) continue; /* nope, all good */
@@ -3518,6 +3524,7 @@ static void handle_cap_grant(struct inode *inode, bool queue_invalidate = false; bool deleted_inode = false; bool fill_inline = false;
int flags = 0; /* * If there is at least one crypto block then we'll trust
@@ -3751,6 +3758,7 @@ static void handle_cap_grant(struct inode *inode, /* don't let check_caps skip sending a response to MDS for revoke msgs */ if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) { cap->mds_wanted = 0;
flags |= CHECK_CAPS_FLUSH_FORCE; if (cap == ci->i_auth_cap) check_caps = 1; /* check auth cap only */ else
@@ -3806,9 +3814,9 @@ static void handle_cap_grant(struct inode *inode,
mutex_unlock(&session->s_mutex); if (check_caps == 1)
ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL);
ceph_check_caps(ci, flags | CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL); else if (check_caps == 2)
ceph_check_caps(ci, CHECK_CAPS_NOINVAL);
ceph_check_caps(ci, flags | CHECK_CAPS_NOINVAL);
}
/*
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index b0b368ed3018..831e8ec4d5da 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -200,9 +200,10 @@ struct ceph_cap { struct list_head caps_item; };
-#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */ -#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */ -#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */ +#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */ +#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */ +#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */ +#define CHECK_CAPS_FLUSH_FORCE 8 /* force flush any caps */
struct ceph_cap_flush { u64 tid; -- 2.45.1
linux-stable-mirror@lists.linaro.org