From: Christoph Hellwig hch@lst.de
[ Upstream commit 6773da870ab89123d1b513da63ed59e32a29cb77 ]
xfs_bmapi_write can return 0 without actually returning a mapping in mval in two different cases:
1) when there is absolutely no space available to do an allocation 2) when converting delalloc space, and the allocation is so small that it only covers parts of the delalloc extent before the range requested by the caller
Callers at best can handle one of these cases, but in many cases can't cope with either one. Switch xfs_bmapi_write to always return a mapping or return an error code instead. For case 1) above ENOSPC is the obvious choice which is very much what the callers expect anyway. For case 2) there is no really good error code, so pick a funky one from the SysV streams portfolio.
This fixes the reproducer here:
https://lore.kernel.org/linux-xfs/CAEJPjCvT3Uag-pMTYuigEjWZHn1sGMZ0GCjVVCv29...
which uses reserved blocks to create file systems that are gravely out of space and thus cause at least xfs_file_alloc_space to hang and trigger the lack of ENOSPC handling in xfs_dquot_disk_alloc.
Note that this patch does not actually make any caller but xfs_alloc_file_space deal intelligently with case 2) above.
Signed-off-by: Christoph Hellwig hch@lst.de Reported-by: 刘通 lyutoon@gmail.com Reviewed-by: "Darrick J. Wong" djwong@kernel.org Signed-off-by: Chandan Babu R chandanbabu@kernel.org Signed-off-by: Leah Rumancik leah.rumancik@gmail.com Acked-by: "Darrick J. Wong" djwong@kernel.org --- fs/xfs/libxfs/xfs_attr_remote.c | 1 - fs/xfs/libxfs/xfs_bmap.c | 46 ++++++++++++++++++++++++++------- fs/xfs/libxfs/xfs_da_btree.c | 20 ++++---------- fs/xfs/xfs_bmap_util.c | 31 +++++++++++----------- fs/xfs/xfs_dquot.c | 1 - fs/xfs/xfs_iomap.c | 8 ------ fs/xfs/xfs_reflink.c | 14 ---------- fs/xfs/xfs_rtalloc.c | 2 -- 8 files changed, 57 insertions(+), 66 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index d440393b40eb..54de405cbab5 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -617,11 +617,10 @@ xfs_attr_rmtval_set_blk( attr->xattri_blkcnt, XFS_BMAPI_ATTRFORK, args->total, map, &nmap); if (error) return error;
- ASSERT(nmap == 1); ASSERT((map->br_startblock != DELAYSTARTBLOCK) && (map->br_startblock != HOLESTARTBLOCK));
/* roll attribute extent map forwards */ attr->xattri_lblkno += map->br_blockcount; diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 2a3b78032cb0..0235c1dd3d7e 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4111,12 +4111,14 @@ xfs_bmapi_allocate( else error = xfs_bmap_btalloc(bma); } else { error = xfs_bmap_alloc_userdata(bma); } - if (error || bma->blkno == NULLFSBLOCK) + if (error) return error; + if (bma->blkno == NULLFSBLOCK) + return -ENOSPC;
if (bma->flags & XFS_BMAPI_ZERO) { error = xfs_zero_extent(bma->ip, bma->blkno, bma->length); if (error) return error; @@ -4292,10 +4294,19 @@ xfs_bmapi_finish( /* * Map file blocks to filesystem blocks, and allocate blocks or convert the * extent state if necessary. Details behaviour is controlled by the flags * parameter. Only allocates blocks from a single allocation group, to avoid * locking problems. + * + * Returns 0 on success and places the extent mappings in mval. nmaps is used + * as an input/output parameter where the caller specifies the maximum number + * of mappings that may be returned and xfs_bmapi_write passes back the number + * of mappings (including existing mappings) it found. + * + * Returns a negative error code on failure, including -ENOSPC when it could not + * allocate any blocks and -ENOSR when it did allocate blocks to convert a + * delalloc range, but those blocks were before the passed in range. */ int xfs_bmapi_write( struct xfs_trans *tp, /* transaction pointer */ struct xfs_inode *ip, /* incore inode */ @@ -4419,14 +4430,20 @@ xfs_bmapi_write( bma.length = len;
ASSERT(len > 0); ASSERT(bma.length > 0); error = xfs_bmapi_allocate(&bma); - if (error) + if (error) { + /* + * If we already allocated space in a previous + * iteration return what we go so far when + * running out of space. + */ + if (error == -ENOSPC && bma.nallocs) + break; goto error0; - if (bma.blkno == NULLFSBLOCK) - break; + }
/* * If this is a CoW allocation, record the data in * the refcount btree for orphan recovery. */ @@ -4460,22 +4477,36 @@ xfs_bmapi_write( /* Else go on to the next record. */ bma.prev = bma.got; if (!xfs_iext_next_extent(ifp, &bma.icur, &bma.got)) eof = true; } - *nmap = n;
error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags, whichfork); if (error) goto error0;
ASSERT(ifp->if_format != XFS_DINODE_FMT_BTREE || ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork)); xfs_bmapi_finish(&bma, whichfork, 0); xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval, - orig_nmap, *nmap); + orig_nmap, n); + + /* + * When converting delayed allocations, xfs_bmapi_allocate ignores + * the passed in bno and always converts from the start of the found + * delalloc extent. + * + * To avoid a successful return with *nmap set to 0, return the magic + * -ENOSR error code for this particular case so that the caller can + * handle it. + */ + if (!n) { + ASSERT(bma.nallocs >= *nmap); + return -ENOSR; + } + *nmap = n; return 0; error0: xfs_bmapi_finish(&bma, whichfork, error); return error; } @@ -4578,13 +4609,10 @@ xfs_bmapi_convert_delalloc(
error = xfs_bmapi_allocate(&bma); if (error) goto out_finish;
- error = -ENOSPC; - if (WARN_ON_ONCE(bma.blkno == NULLFSBLOCK)) - goto out_finish; error = -EFSCORRUPTED; if (WARN_ON_ONCE(!xfs_valid_startblock(ip, bma.got.br_startblock))) goto out_finish;
XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, bma.length)); diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 282c7cf032f4..12e3cca804b7 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -2156,61 +2156,51 @@ xfs_da_grow_inode_int( { struct xfs_trans *tp = args->trans; struct xfs_inode *dp = args->dp; int w = args->whichfork; xfs_rfsblock_t nblks = dp->i_nblocks; - struct xfs_bmbt_irec map, *mapp; - int nmap, error, got, i, mapi; + struct xfs_bmbt_irec map, *mapp = ↦ + int nmap, error, got, i, mapi = 1;
/* * Find a spot in the file space to put the new block. */ error = xfs_bmap_first_unused(tp, dp, count, bno, w); if (error) return error;
/* * Try mapping it in one filesystem block. */ nmap = 1; error = xfs_bmapi_write(tp, dp, *bno, count, xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG, args->total, &map, &nmap); - if (error) - return error; - - ASSERT(nmap <= 1); - if (nmap == 1) { - mapp = ↦ - mapi = 1; - } else if (nmap == 0 && count > 1) { + if (error == -ENOSPC && count > 1) { xfs_fileoff_t b; int c;
/* * If we didn't get it and the block might work if fragmented, * try without the CONTIG flag. Loop until we get it all. */ mapp = kmem_alloc(sizeof(*mapp) * count, 0); for (b = *bno, mapi = 0; b < *bno + count; ) { c = (int)(*bno + count - b); nmap = min(XFS_BMAP_MAX_NMAP, c); error = xfs_bmapi_write(tp, dp, b, c, xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA, args->total, &mapp[mapi], &nmap); if (error) goto out_free_map; - if (nmap < 1) - break; mapi += nmap; b = mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount; } - } else { - mapi = 0; - mapp = NULL; } + if (error) + goto out_free_map;
/* * Count the blocks we got, make sure it matches the total. */ for (i = 0, got = 0; i < mapi; i++) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 468bb61a5e46..399451aab26a 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -866,37 +866,36 @@ xfs_alloc_file_space( error = xfs_iext_count_upgrade(tp, ip, XFS_IEXT_ADD_NOSPLIT_CNT); if (error) goto error;
- error = xfs_bmapi_write(tp, ip, startoffset_fsb, - allocatesize_fsb, XFS_BMAPI_PREALLOC, 0, imapp, - &nimaps); - if (error) - goto error; - - ip->i_diflags |= XFS_DIFLAG_PREALLOC; - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - - error = xfs_trans_commit(tp); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - if (error) - break; - /* * If the allocator cannot find a single free extent large * enough to cover the start block of the requested range, - * xfs_bmapi_write will return 0 but leave *nimaps set to 0. + * xfs_bmapi_write will return -ENOSR. * * In that case we simply need to keep looping with the same * startoffset_fsb so that one of the following allocations * will eventually reach the requested range. */ - if (nimaps) { + error = xfs_bmapi_write(tp, ip, startoffset_fsb, + allocatesize_fsb, XFS_BMAPI_PREALLOC, 0, imapp, + &nimaps); + if (error) { + if (error != -ENOSR) + goto error; + error = 0; + } else { startoffset_fsb += imapp->br_blockcount; allocatesize_fsb -= imapp->br_blockcount; } + + ip->i_diflags |= XFS_DIFLAG_PREALLOC; + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + + error = xfs_trans_commit(tp); + xfs_iunlock(ip, XFS_ILOCK_EXCL); }
return error;
error: diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index a8b2f3b278ea..6186b69be50a 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -331,11 +331,10 @@ xfs_dquot_disk_alloc( &nmaps); if (error) goto err_cancel;
ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); - ASSERT(nmaps == 1); ASSERT((map.br_startblock != DELAYSTARTBLOCK) && (map.br_startblock != HOLESTARTBLOCK));
/* * Keep track of the blkno to save a lookup later diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index ab5512c0bcf7..60d9638cec6d 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -307,18 +307,10 @@ xfs_iomap_write_direct( */ error = xfs_trans_commit(tp); if (error) goto out_unlock;
- /* - * Copy any maps to caller's array and return any error. - */ - if (nimaps == 0) { - error = -ENOSPC; - goto out_unlock; - } - if (unlikely(!xfs_valid_startblock(ip, imap->br_startblock))) error = xfs_alert_fsblock_zero(ip, imap);
out_unlock: *seq = xfs_iomap_inode_sequence(ip, 0); diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 1bac6a8af970..0405226fb74c 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -429,17 +429,10 @@ xfs_reflink_fill_cow_hole( xfs_inode_set_cowblocks_tag(ip); error = xfs_trans_commit(tp); if (error) return error;
- /* - * Allocation succeeded but the requested range was not even partially - * satisfied? Bail out! - */ - if (nimaps == 0) - return -ENOSPC; - convert: return xfs_reflink_convert_unwritten(ip, imap, cmap, convert_now);
out_trans_cancel: xfs_trans_cancel(tp); @@ -498,17 +491,10 @@ xfs_reflink_fill_delalloc(
xfs_inode_set_cowblocks_tag(ip); error = xfs_trans_commit(tp); if (error) return error; - - /* - * Allocation succeeded but the requested range was not even - * partially satisfied? Bail out! - */ - if (nimaps == 0) - return -ENOSPC; } while (cmap->br_startoff + cmap->br_blockcount <= imap->br_startoff);
return xfs_reflink_convert_unwritten(ip, imap, cmap, convert_now);
out_trans_cancel: diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 7c5134899634..5cf1e91f4c20 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -838,12 +838,10 @@ xfs_growfs_rt_alloc( * Allocate blocks to the bitmap file. */ nmap = 1; error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks, XFS_BMAPI_METADATA, 0, &map, &nmap); - if (!error && nmap < 1) - error = -ENOSPC; if (error) goto out_trans_cancel; /* * Free any blocks freed up in the transaction, then commit. */