The patch below does not apply to the 6.6-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to stable@vger.kernel.org.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y git checkout FETCH_HEAD git cherry-pick -x 76d2e3890fb169168c73f2e4f8375c7cc24a765e # <resolve conflicts, build, test, etc.> git commit -s git send-email --to 'stable@vger.kernel.org' --in-reply-to '2025082224-submersed-commend-be4c@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 76d2e3890fb169168c73f2e4f8375c7cc24a765e Mon Sep 17 00:00:00 2001 From: Trond Myklebust trond.myklebust@hammerspace.com Date: Sat, 16 Aug 2025 07:25:20 -0700 Subject: [PATCH] NFS: Fix a race when updating an existing write
After nfs_lock_and_join_requests() tests for whether the request is still attached to the mapping, nothing prevents a call to nfs_inode_remove_request() from succeeding until we actually lock the page group. The reason is that whoever called nfs_inode_remove_request() doesn't necessarily have a lock on the page group head.
So in order to avoid races, let's take the page group lock earlier in nfs_lock_and_join_requests(), and hold it across the removal of the request in nfs_inode_remove_request().
Reported-by: Jeff Layton jlayton@kernel.org Tested-by: Joe Quanaim jdq@meta.com Tested-by: Andrew Steffen aksteffen@meta.com Reviewed-by: Jeff Layton jlayton@kernel.org Fixes: bd37d6fce184 ("NFSv4: Convert nfs_lock_and_join_requests() to use nfs_page_find_head_request()") Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust trond.myklebust@hammerspace.com
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 11968dcb7243..6e69ce43a13f 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -253,13 +253,14 @@ nfs_page_group_unlock(struct nfs_page *req) nfs_page_clear_headlock(req); }
-/* - * nfs_page_group_sync_on_bit_locked +/** + * nfs_page_group_sync_on_bit_locked - Test if all requests have @bit set + * @req: request in page group + * @bit: PG_* bit that is used to sync page group * * must be called with page group lock held */ -static bool -nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit) +bool nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit) { struct nfs_page *head = req->wb_head; struct nfs_page *tmp; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index fa5c41d0989a..8b7c04737967 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -153,20 +153,10 @@ nfs_page_set_inode_ref(struct nfs_page *req, struct inode *inode) } }
-static int -nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode) +static void nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode) { - int ret; - - if (!test_bit(PG_REMOVE, &req->wb_flags)) - return 0; - ret = nfs_page_group_lock(req); - if (ret) - return ret; if (test_and_clear_bit(PG_REMOVE, &req->wb_flags)) nfs_page_set_inode_ref(req, inode); - nfs_page_group_unlock(req); - return 0; }
/** @@ -585,19 +575,18 @@ retry: } }
+ ret = nfs_page_group_lock(head); + if (ret < 0) + goto out_unlock; + /* Ensure that nobody removed the request before we locked it */ if (head != folio->private) { + nfs_page_group_unlock(head); nfs_unlock_and_release_request(head); goto retry; }
- ret = nfs_cancel_remove_inode(head, inode); - if (ret < 0) - goto out_unlock; - - ret = nfs_page_group_lock(head); - if (ret < 0) - goto out_unlock; + nfs_cancel_remove_inode(head, inode);
/* lock each request in the page group */ for (subreq = head->wb_this_page; @@ -786,7 +775,8 @@ static void nfs_inode_remove_request(struct nfs_page *req) { struct nfs_inode *nfsi = NFS_I(nfs_page_to_inode(req));
- if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) { + nfs_page_group_lock(req); + if (nfs_page_group_sync_on_bit_locked(req, PG_REMOVE)) { struct folio *folio = nfs_page_to_folio(req->wb_head); struct address_space *mapping = folio->mapping;
@@ -798,6 +788,7 @@ static void nfs_inode_remove_request(struct nfs_page *req) } spin_unlock(&mapping->i_private_lock); } + nfs_page_group_unlock(req);
if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) { atomic_long_dec(&nfsi->nrequests); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 169b4ae30ff4..9aed39abc94b 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -160,6 +160,7 @@ extern void nfs_join_page_group(struct nfs_page *head, extern int nfs_page_group_lock(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); +extern bool nfs_page_group_sync_on_bit_locked(struct nfs_page *, unsigned int); extern int nfs_page_set_headlock(struct nfs_page *req); extern void nfs_page_clear_headlock(struct nfs_page *req); extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *);
From: Trond Myklebust trond.myklebust@hammerspace.com
The patch in question has a dependency on a previous patch by Christoph. Both needed backporting to Linux 6.6.y
Christoph Hellwig (1): nfs: fold nfs_page_group_lock_subrequests into nfs_lock_and_join_requests
Trond Myklebust (1): NFS: Fix a race when updating an existing write
fs/nfs/pagelist.c | 86 ++--------------------- fs/nfs/write.c | 146 +++++++++++++++++++++++++-------------- include/linux/nfs_page.h | 2 +- 3 files changed, 100 insertions(+), 134 deletions(-)
From: Christoph Hellwig hch@lst.de
Fold nfs_page_group_lock_subrequests into nfs_lock_and_join_requests to prepare for future changes to this code, and move the helpers to write.c as well.
Signed-off-by: Christoph Hellwig hch@lst.de Reviewed-by: Sagi Grimberg sagi@grimberg.me Signed-off-by: Anna Schumaker Anna.Schumaker@Netapp.com Signed-off-by: Trond Myklebust trond.myklebust@hammerspace.com (cherry picked from commit 25edbcac6e32eab345e470d56ca9974a577b878b) --- fs/nfs/pagelist.c | 77 ---------------------------------------- fs/nfs/write.c | 75 ++++++++++++++++++++++++++++++++++---- include/linux/nfs_page.h | 1 - 3 files changed, 69 insertions(+), 84 deletions(-)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 040b6b79c75e..30e2488eb84c 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -206,83 +206,6 @@ nfs_page_group_lock_head(struct nfs_page *req) return head; }
-/* - * nfs_unroll_locks - unlock all newly locked reqs and wait on @req - * @head: head request of page group, must be holding head lock - * @req: request that couldn't lock and needs to wait on the req bit lock - * - * This is a helper function for nfs_lock_and_join_requests - * returns 0 on success, < 0 on error. - */ -static void -nfs_unroll_locks(struct nfs_page *head, struct nfs_page *req) -{ - struct nfs_page *tmp; - - /* relinquish all the locks successfully grabbed this run */ - for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) { - if (!kref_read(&tmp->wb_kref)) - continue; - nfs_unlock_and_release_request(tmp); - } -} - -/* - * nfs_page_group_lock_subreq - try to lock a subrequest - * @head: head request of page group - * @subreq: request to lock - * - * This is a helper function for nfs_lock_and_join_requests which - * must be called with the head request and page group both locked. - * On error, it returns with the page group unlocked. - */ -static int -nfs_page_group_lock_subreq(struct nfs_page *head, struct nfs_page *subreq) -{ - int ret; - - if (!kref_get_unless_zero(&subreq->wb_kref)) - return 0; - while (!nfs_lock_request(subreq)) { - nfs_page_group_unlock(head); - ret = nfs_wait_on_request(subreq); - if (!ret) - ret = nfs_page_group_lock(head); - if (ret < 0) { - nfs_unroll_locks(head, subreq); - nfs_release_request(subreq); - return ret; - } - } - return 0; -} - -/* - * nfs_page_group_lock_subrequests - try to lock the subrequests - * @head: head request of page group - * - * This is a helper function for nfs_lock_and_join_requests which - * must be called with the head request locked. - */ -int nfs_page_group_lock_subrequests(struct nfs_page *head) -{ - struct nfs_page *subreq; - int ret; - - ret = nfs_page_group_lock(head); - if (ret < 0) - return ret; - /* lock each request in the page group */ - for (subreq = head->wb_this_page; subreq != head; - subreq = subreq->wb_this_page) { - ret = nfs_page_group_lock_subreq(head, subreq); - if (ret < 0) - return ret; - } - nfs_page_group_unlock(head); - return 0; -} - /* * nfs_page_set_headlock - set the request PG_HEADLOCK * @req: request that is to be locked diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 7d03811f44a4..289ff8e9f78a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -548,6 +548,57 @@ void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo, nfs_destroy_unlinked_subrequests(destroy_list, head, inode); }
+/* + * nfs_unroll_locks - unlock all newly locked reqs and wait on @req + * @head: head request of page group, must be holding head lock + * @req: request that couldn't lock and needs to wait on the req bit lock + * + * This is a helper function for nfs_lock_and_join_requests + * returns 0 on success, < 0 on error. + */ +static void +nfs_unroll_locks(struct nfs_page *head, struct nfs_page *req) +{ + struct nfs_page *tmp; + + /* relinquish all the locks successfully grabbed this run */ + for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) { + if (!kref_read(&tmp->wb_kref)) + continue; + nfs_unlock_and_release_request(tmp); + } +} + +/* + * nfs_page_group_lock_subreq - try to lock a subrequest + * @head: head request of page group + * @subreq: request to lock + * + * This is a helper function for nfs_lock_and_join_requests which + * must be called with the head request and page group both locked. + * On error, it returns with the page group unlocked. + */ +static int +nfs_page_group_lock_subreq(struct nfs_page *head, struct nfs_page *subreq) +{ + int ret; + + if (!kref_get_unless_zero(&subreq->wb_kref)) + return 0; + while (!nfs_lock_request(subreq)) { + nfs_page_group_unlock(head); + ret = nfs_wait_on_request(subreq); + if (!ret) + ret = nfs_page_group_lock(head); + if (ret < 0) { + nfs_unroll_locks(head, subreq); + nfs_release_request(subreq); + return ret; + } + } + return 0; +} + /* * nfs_lock_and_join_requests - join all subreqs to the head req * @folio: the folio used to lookup the "page group" of nfs_page structures @@ -566,7 +617,7 @@ void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo, static struct nfs_page *nfs_lock_and_join_requests(struct folio *folio) { struct inode *inode = folio_file_mapping(folio)->host; - struct nfs_page *head; + struct nfs_page *head, *subreq; struct nfs_commit_info cinfo; int ret;
@@ -580,16 +631,28 @@ static struct nfs_page *nfs_lock_and_join_requests(struct folio *folio) if (IS_ERR_OR_NULL(head)) return head;
+ ret = nfs_page_group_lock(head); + if (ret < 0) + goto out_unlock; + /* lock each request in the page group */ - ret = nfs_page_group_lock_subrequests(head); - if (ret < 0) { - nfs_unlock_and_release_request(head); - return ERR_PTR(ret); + for (subreq = head->wb_this_page; + subreq != head; + subreq = subreq->wb_this_page) { + ret = nfs_page_group_lock_subreq(head, subreq); + if (ret < 0) + goto out_unlock; }
- nfs_join_page_group(head, &cinfo, inode); + nfs_page_group_unlock(head);
+ nfs_init_cinfo_from_inode(&cinfo, inode); + nfs_join_page_group(head, &cinfo, inode); return head; + +out_unlock: + nfs_unlock_and_release_request(head); + return ERR_PTR(ret); }
static void nfs_write_error(struct nfs_page *req, int error) diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 1c315f854ea8..3a0f7ebe5388 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -156,7 +156,6 @@ extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_and_release_request(struct nfs_page *); extern struct nfs_page *nfs_page_group_lock_head(struct nfs_page *req); -extern int nfs_page_group_lock_subrequests(struct nfs_page *head); extern void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo, struct inode *inode);
From: Trond Myklebust trond.myklebust@hammerspace.com
After nfs_lock_and_join_requests() tests for whether the request is still attached to the mapping, nothing prevents a call to nfs_inode_remove_request() from succeeding until we actually lock the page group. The reason is that whoever called nfs_inode_remove_request() doesn't necessarily have a lock on the page group head.
So in order to avoid races, let's take the page group lock earlier in nfs_lock_and_join_requests(), and hold it across the removal of the request in nfs_inode_remove_request().
Reported-by: Jeff Layton jlayton@kernel.org Tested-by: Joe Quanaim jdq@meta.com Tested-by: Andrew Steffen aksteffen@meta.com Reviewed-by: Jeff Layton jlayton@kernel.org Fixes: bd37d6fce184 ("NFSv4: Convert nfs_lock_and_join_requests() to use nfs_page_find_head_request()") Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust trond.myklebust@hammerspace.com (cherry picked from commit 76d2e3890fb169168c73f2e4f8375c7cc24a765e) --- fs/nfs/pagelist.c | 9 ++--- fs/nfs/write.c | 71 ++++++++++++++-------------------------- include/linux/nfs_page.h | 1 + 3 files changed, 31 insertions(+), 50 deletions(-)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 30e2488eb84c..0ea3916ed1dc 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -272,13 +272,14 @@ nfs_page_group_unlock(struct nfs_page *req) nfs_page_clear_headlock(req); }
-/* - * nfs_page_group_sync_on_bit_locked +/** + * nfs_page_group_sync_on_bit_locked - Test if all requests have @bit set + * @req: request in page group + * @bit: PG_* bit that is used to sync page group * * must be called with page group lock held */ -static bool -nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit) +bool nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit) { struct nfs_page *head = req->wb_head; struct nfs_page *tmp; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 289ff8e9f78a..cb1e9996fcc8 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -156,20 +156,10 @@ nfs_page_set_inode_ref(struct nfs_page *req, struct inode *inode) } }
-static int -nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode) +static void nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode) { - int ret; - - if (!test_bit(PG_REMOVE, &req->wb_flags)) - return 0; - ret = nfs_page_group_lock(req); - if (ret) - return ret; if (test_and_clear_bit(PG_REMOVE, &req->wb_flags)) nfs_page_set_inode_ref(req, inode); - nfs_page_group_unlock(req); - return 0; }
static struct nfs_page *nfs_folio_private_request(struct folio *folio) @@ -238,36 +228,6 @@ static struct nfs_page *nfs_folio_find_head_request(struct folio *folio) return req; }
-static struct nfs_page *nfs_folio_find_and_lock_request(struct folio *folio) -{ - struct inode *inode = folio_file_mapping(folio)->host; - struct nfs_page *req, *head; - int ret; - - for (;;) { - req = nfs_folio_find_head_request(folio); - if (!req) - return req; - head = nfs_page_group_lock_head(req); - if (head != req) - nfs_release_request(req); - if (IS_ERR(head)) - return head; - ret = nfs_cancel_remove_inode(head, inode); - if (ret < 0) { - nfs_unlock_and_release_request(head); - return ERR_PTR(ret); - } - /* Ensure that nobody removed the request before we locked it */ - if (head == nfs_folio_private_request(folio)) - break; - if (folio_test_swapcache(folio)) - break; - nfs_unlock_and_release_request(head); - } - return head; -} - /* Adjust the file length if we're writing beyond the end */ static void nfs_grow_file(struct folio *folio, unsigned int offset, unsigned int count) @@ -621,20 +581,37 @@ static struct nfs_page *nfs_lock_and_join_requests(struct folio *folio) struct nfs_commit_info cinfo; int ret;
- nfs_init_cinfo_from_inode(&cinfo, inode); /* * A reference is taken only on the head request which acts as a * reference to the whole page group - the group will not be destroyed * until the head reference is released. */ - head = nfs_folio_find_and_lock_request(folio); - if (IS_ERR_OR_NULL(head)) - return head; +retry: + head = nfs_folio_find_head_request(folio); + if (!head) + return NULL; + + while (!nfs_lock_request(head)) { + ret = nfs_wait_on_request(head); + if (ret < 0) { + nfs_release_request(head); + return ERR_PTR(ret); + } + }
ret = nfs_page_group_lock(head); if (ret < 0) goto out_unlock;
+ /* Ensure that nobody removed the request before we locked it */ + if (head != folio->private && !folio_test_swapcache(folio)) { + nfs_page_group_unlock(head); + nfs_unlock_and_release_request(head); + goto retry; + } + + nfs_cancel_remove_inode(head, inode); + /* lock each request in the page group */ for (subreq = head->wb_this_page; subreq != head; @@ -855,7 +832,8 @@ static void nfs_inode_remove_request(struct nfs_page *req) { struct nfs_inode *nfsi = NFS_I(nfs_page_to_inode(req));
- if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) { + nfs_page_group_lock(req); + if (nfs_page_group_sync_on_bit_locked(req, PG_REMOVE)) { struct folio *folio = nfs_page_to_folio(req->wb_head); struct address_space *mapping = folio_file_mapping(folio);
@@ -867,6 +845,7 @@ static void nfs_inode_remove_request(struct nfs_page *req) } spin_unlock(&mapping->private_lock); } + nfs_page_group_unlock(req);
if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) { atomic_long_dec(&nfsi->nrequests); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 3a0f7ebe5388..6a46069c5a36 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -162,6 +162,7 @@ extern void nfs_join_page_group(struct nfs_page *head, extern int nfs_page_group_lock(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); +extern bool nfs_page_group_sync_on_bit_locked(struct nfs_page *, unsigned int); extern int nfs_page_set_headlock(struct nfs_page *req); extern void nfs_page_clear_headlock(struct nfs_page *req); extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *);
linux-stable-mirror@lists.linaro.org