5.10-stable review patch. If anyone has any objections, please let me know.
------------------
From: Trond Myklebust trond.myklebust@hammerspace.com
commit 76d2e3890fb169168c73f2e4f8375c7cc24a765e upstream.
After nfs_lock_and_join_requests() tests for whether the request is still attached to the mapping, nothing prevents a call to nfs_inode_remove_request() from succeeding until we actually lock the page group. The reason is that whoever called nfs_inode_remove_request() doesn't necessarily have a lock on the page group head.
So in order to avoid races, let's take the page group lock earlier in nfs_lock_and_join_requests(), and hold it across the removal of the request in nfs_inode_remove_request().
Reported-by: Jeff Layton jlayton@kernel.org Tested-by: Joe Quanaim jdq@meta.com Tested-by: Andrew Steffen aksteffen@meta.com Reviewed-by: Jeff Layton jlayton@kernel.org Fixes: bd37d6fce184 ("NFSv4: Convert nfs_lock_and_join_requests() to use nfs_page_find_head_request()") Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Trond Myklebust trond.myklebust@hammerspace.com --- fs/nfs/pagelist.c | 9 +++--- fs/nfs/write.c | 66 ++++++++++++++++------------------------------- include/linux/nfs_page.h | 1 3 files changed, 29 insertions(+), 47 deletions(-)
--- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -234,13 +234,14 @@ nfs_page_group_unlock(struct nfs_page *r nfs_page_clear_headlock(req); }
-/* - * nfs_page_group_sync_on_bit_locked +/** + * nfs_page_group_sync_on_bit_locked - Test if all requests have @bit set + * @req: request in page group + * @bit: PG_* bit that is used to sync page group * * must be called with page group lock held */ -static bool -nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit) +bool nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit) { struct nfs_page *head = req->wb_head; struct nfs_page *tmp; --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -155,20 +155,10 @@ nfs_page_set_inode_ref(struct nfs_page * } }
-static int -nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode) +static void nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode) { - int ret; - - if (!test_bit(PG_REMOVE, &req->wb_flags)) - return 0; - ret = nfs_page_group_lock(req); - if (ret) - return ret; if (test_and_clear_bit(PG_REMOVE, &req->wb_flags)) nfs_page_set_inode_ref(req, inode); - nfs_page_group_unlock(req); - return 0; }
static struct nfs_page * @@ -240,36 +230,6 @@ static struct nfs_page *nfs_page_find_he return req; }
-static struct nfs_page *nfs_find_and_lock_page_request(struct page *page) -{ - struct inode *inode = page_file_mapping(page)->host; - struct nfs_page *req, *head; - int ret; - - for (;;) { - req = nfs_page_find_head_request(page); - if (!req) - return req; - head = nfs_page_group_lock_head(req); - if (head != req) - nfs_release_request(req); - if (IS_ERR(head)) - return head; - ret = nfs_cancel_remove_inode(head, inode); - if (ret < 0) { - nfs_unlock_and_release_request(head); - return ERR_PTR(ret); - } - /* Ensure that nobody removed the request before we locked it */ - if (head == nfs_page_private_request(page)) - break; - if (PageSwapCache(page)) - break; - nfs_unlock_and_release_request(head); - } - return head; -} - /* Adjust the file length if we're writing beyond the end */ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count) { @@ -626,14 +586,32 @@ nfs_lock_and_join_requests(struct page * * reference to the whole page group - the group will not be destroyed * until the head reference is released. */ - head = nfs_find_and_lock_page_request(page); +retry: + head = nfs_page_find_head_request(page); if (IS_ERR_OR_NULL(head)) return head;
+ while (!nfs_lock_request(head)) { + ret = nfs_wait_on_request(head); + if (ret < 0) { + nfs_release_request(head); + return ERR_PTR(ret); + } + } + ret = nfs_page_group_lock(head); if (ret < 0) goto out_unlock;
+ /* Ensure that nobody removed the request before we locked it */ + if (head != nfs_page_private_request(page) && !PageSwapCache(page)) { + nfs_page_group_unlock(head); + nfs_unlock_and_release_request(head); + goto retry; + } + + nfs_cancel_remove_inode(head, inode); + /* lock each request in the page group */ for (subreq = head->wb_this_page; subreq != head; @@ -842,7 +820,8 @@ static void nfs_inode_remove_request(str struct nfs_inode *nfsi = NFS_I(inode); struct nfs_page *head;
- if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) { + nfs_page_group_lock(req); + if (nfs_page_group_sync_on_bit_locked(req, PG_REMOVE)) { head = req->wb_head;
spin_lock(&mapping->private_lock); @@ -853,6 +832,7 @@ static void nfs_inode_remove_request(str } spin_unlock(&mapping->private_lock); } + nfs_page_group_unlock(req);
if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) { nfs_release_request(req); --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -150,6 +150,7 @@ extern void nfs_join_page_group(struct n extern int nfs_page_group_lock(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); +extern bool nfs_page_group_sync_on_bit_locked(struct nfs_page *, unsigned int); extern int nfs_page_set_headlock(struct nfs_page *req); extern void nfs_page_clear_headlock(struct nfs_page *req); extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *);