Hi all,
This is the latest revision of a patchset that adds to XFS kernel support for reverse mapping for the realtime device. This time around I've fixed some of the bitrot that I've noticed over the past few months, and most notably have converted rtrmapbt to use the metadata inode directory feature instead of burning more space in the superblock.
At the beginning of the set are patches to implement storing B+tree leaves in an inode root, since the realtime rmapbt is rooted in an inode, unlike the regular rmapbt which is rooted in an AG block. Prior to this, the only btree that could be rooted in the inode fork was the block mapping btree; if all the extent records fit in the inode, format would be switched from 'btree' to 'extents'.
The next few patches enhance the reverse mapping routines to handle the parts that are specific to rtgroups -- adding the new btree type, adding a new log intent item type, and wiring up the metadata directory tree entries.
Finally, implement GETFSMAP with the rtrmapbt and scrub functionality for the rtrmapbt and rtbitmap and online fsck functionality.
If you're going to start using this code, I strongly recommend pulling from my git trees, which are linked below.
This has been running on the djcloud for months with no problems. Enjoy! Comments and questions are, as always, welcome.
--D
kernel git tree: https://git.kernel.org/cgit/linux/kernel/git/djwong/xfs-linux.git/log/?h=rea...
xfsprogs git tree: https://git.kernel.org/cgit/linux/kernel/git/djwong/xfsprogs-dev.git/log/?h=...
fstests git tree: https://git.kernel.org/cgit/linux/kernel/git/djwong/xfstests-dev.git/log/?h=...
xfsdocs git tree: https://git.kernel.org/cgit/linux/kernel/git/djwong/xfs-documentation.git/lo... --- Commits in this patchset: * xfs: add some rtgroup inode helpers * xfs: prepare rmap btree cursor tracepoints for realtime * xfs: simplify the xfs_rmap_{alloc,free}_extent calling conventions * xfs: introduce realtime rmap btree ondisk definitions * xfs: realtime rmap btree transaction reservations * xfs: add realtime rmap btree operations * xfs: prepare rmap functions to deal with rtrmapbt * xfs: add a realtime flag to the rmap update log redo items * xfs: support recovering rmap intent items targetting realtime extents * xfs: pretty print metadata file types in error messages * xfs: support file data forks containing metadata btrees * xfs: add realtime reverse map inode to metadata directory * xfs: add metadata reservations for realtime rmap btrees * xfs: wire up a new metafile type for the realtime rmap * xfs: wire up rmap map and unmap to the realtime rmapbt * xfs: create routine to allocate and initialize a realtime rmap btree inode * xfs: wire up getfsmap to the realtime reverse mapping btree * xfs: check that the rtrmapbt maxlevels doesn't increase when growing fs * xfs: report realtime rmap btree corruption errors to the health system * xfs: allow queued realtime intents to drain before scrubbing * xfs: scrub the realtime rmapbt * xfs: cross-reference realtime bitmap to realtime rmapbt scrubber * xfs: cross-reference the realtime rmapbt * xfs: scan rt rmap when we're doing an intense rmap check of bmbt mappings * xfs: scrub the metadir path of rt rmap btree files * xfs: walk the rt reverse mapping tree when rebuilding rmap * xfs: online repair of realtime file bmaps * xfs: repair inodes that have realtime extents * xfs: repair rmap btree inodes * xfs: online repair of realtime bitmaps for a realtime group * xfs: support repairing metadata btrees rooted in metadir inodes * xfs: online repair of the realtime rmap btree * xfs: create a shadow rmap btree during realtime rmap repair * xfs: hook live realtime rmap operations during a repair operation * xfs: don't shut down the filesystem for media failures beyond end of log * xfs: react to fsdax failure notifications on the rt device * xfs: enable realtime rmap btree --- fs/xfs/Makefile | 3 fs/xfs/libxfs/xfs_btree.c | 73 +++ fs/xfs/libxfs/xfs_btree.h | 8 fs/xfs/libxfs/xfs_btree_mem.c | 1 fs/xfs/libxfs/xfs_btree_staging.c | 1 fs/xfs/libxfs/xfs_defer.h | 1 fs/xfs/libxfs/xfs_exchmaps.c | 4 fs/xfs/libxfs/xfs_format.h | 28 + fs/xfs/libxfs/xfs_fs.h | 7 fs/xfs/libxfs/xfs_health.h | 4 fs/xfs/libxfs/xfs_inode_buf.c | 32 + fs/xfs/libxfs/xfs_inode_fork.c | 25 + fs/xfs/libxfs/xfs_log_format.h | 6 fs/xfs/libxfs/xfs_log_recover.h | 2 fs/xfs/libxfs/xfs_metafile.c | 18 + fs/xfs/libxfs/xfs_metafile.h | 2 fs/xfs/libxfs/xfs_ondisk.h | 2 fs/xfs/libxfs/xfs_refcount.c | 6 fs/xfs/libxfs/xfs_rmap.c | 171 +++++- fs/xfs/libxfs/xfs_rmap.h | 12 fs/xfs/libxfs/xfs_rtbitmap.c | 2 fs/xfs/libxfs/xfs_rtbitmap.h | 9 fs/xfs/libxfs/xfs_rtgroup.c | 53 +- fs/xfs/libxfs/xfs_rtgroup.h | 49 ++ fs/xfs/libxfs/xfs_rtrmap_btree.c | 1011 +++++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_rtrmap_btree.h | 210 ++++++++ fs/xfs/libxfs/xfs_sb.c | 6 fs/xfs/libxfs/xfs_shared.h | 14 + fs/xfs/libxfs/xfs_trans_resv.c | 12 fs/xfs/libxfs/xfs_trans_space.h | 13 fs/xfs/scrub/alloc_repair.c | 5 fs/xfs/scrub/bmap.c | 108 +++- fs/xfs/scrub/bmap_repair.c | 129 +++++ fs/xfs/scrub/common.c | 160 ++++++ fs/xfs/scrub/common.h | 23 + fs/xfs/scrub/health.c | 1 fs/xfs/scrub/inode.c | 10 fs/xfs/scrub/inode_repair.c | 136 +++++ fs/xfs/scrub/metapath.c | 3 fs/xfs/scrub/newbt.c | 42 ++ fs/xfs/scrub/newbt.h | 1 fs/xfs/scrub/reap.c | 41 ++ fs/xfs/scrub/reap.h | 2 fs/xfs/scrub/repair.c | 191 +++++++ fs/xfs/scrub/repair.h | 17 + fs/xfs/scrub/rgsuper.c | 6 fs/xfs/scrub/rmap_repair.c | 84 +++ fs/xfs/scrub/rtbitmap.c | 75 ++- fs/xfs/scrub/rtbitmap.h | 55 ++ fs/xfs/scrub/rtbitmap_repair.c | 429 +++++++++++++++- fs/xfs/scrub/rtrmap.c | 271 ++++++++++ fs/xfs/scrub/rtrmap_repair.c | 903 +++++++++++++++++++++++++++++++++ fs/xfs/scrub/rtsummary.c | 17 - fs/xfs/scrub/rtsummary_repair.c | 3 fs/xfs/scrub/scrub.c | 11 fs/xfs/scrub/scrub.h | 14 + fs/xfs/scrub/stats.c | 1 fs/xfs/scrub/tempexch.h | 2 fs/xfs/scrub/tempfile.c | 20 - fs/xfs/scrub/trace.c | 1 fs/xfs/scrub/trace.h | 228 ++++++++ fs/xfs/xfs_buf.c | 1 fs/xfs/xfs_buf_item_recover.c | 4 fs/xfs/xfs_drain.c | 20 - fs/xfs/xfs_drain.h | 7 fs/xfs/xfs_fsmap.c | 174 ++++++ fs/xfs/xfs_fsops.c | 11 fs/xfs/xfs_health.c | 1 fs/xfs/xfs_inode.c | 19 + fs/xfs/xfs_inode_item.c | 2 fs/xfs/xfs_inode_item_recover.c | 44 +- fs/xfs/xfs_log_recover.c | 2 fs/xfs/xfs_mount.c | 5 fs/xfs/xfs_mount.h | 9 fs/xfs/xfs_notify_failure.c | 230 +++++--- fs/xfs/xfs_notify_failure.h | 11 fs/xfs/xfs_qm.c | 8 fs/xfs/xfs_rmap_item.c | 216 +++++++- fs/xfs/xfs_rtalloc.c | 82 ++- fs/xfs/xfs_rtalloc.h | 10 fs/xfs/xfs_stats.c | 4 fs/xfs/xfs_stats.h | 2 fs/xfs/xfs_super.c | 6 fs/xfs/xfs_super.h | 1 fs/xfs/xfs_trace.h | 104 ++-- 85 files changed, 5381 insertions(+), 366 deletions(-) create mode 100644 fs/xfs/libxfs/xfs_rtrmap_btree.c create mode 100644 fs/xfs/libxfs/xfs_rtrmap_btree.h create mode 100644 fs/xfs/scrub/rtrmap.c create mode 100644 fs/xfs/scrub/rtrmap_repair.c create mode 100644 fs/xfs/xfs_notify_failure.h
From: Darrick J. Wong djwong@kernel.org
If the filesystem has an external log device on pmem and the pmem reports a media error beyond the end of the log area, don't shut down the filesystem because we don't use that space.
Cc: stable@vger.kernel.org # v6.0 Fixes: 6f643c57d57c56 ("xfs: implement ->notify_failure() for XFS") Signed-off-by: "Darrick J. Wong" djwong@kernel.org Reviewed-by: Christoph Hellwig hch@lst.de --- fs/xfs/xfs_notify_failure.c | 121 +++++++++++++++++++++++++++++-------------- 1 file changed, 82 insertions(+), 39 deletions(-)
diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c index fa50e5308292d3..0b0b0f31aca274 100644 --- a/fs/xfs/xfs_notify_failure.c +++ b/fs/xfs/xfs_notify_failure.c @@ -153,6 +153,79 @@ xfs_dax_notify_failure_thaw( thaw_super(sb, FREEZE_HOLDER_USERSPACE); }
+static int +xfs_dax_translate_range( + struct xfs_buftarg *btp, + u64 offset, + u64 len, + xfs_daddr_t *daddr, + uint64_t *bblen) +{ + u64 dev_start = btp->bt_dax_part_off; + u64 dev_len = bdev_nr_bytes(btp->bt_bdev); + u64 dev_end = dev_start + dev_len - 1; + + /* Notify failure on the whole device. */ + if (offset == 0 && len == U64_MAX) { + offset = dev_start; + len = dev_len; + } + + /* Ignore the range out of filesystem area */ + if (offset + len - 1 < dev_start) + return -ENXIO; + if (offset > dev_end) + return -ENXIO; + + /* Calculate the real range when it touches the boundary */ + if (offset > dev_start) + offset -= dev_start; + else { + len -= dev_start - offset; + offset = 0; + } + if (offset + len - 1 > dev_end) + len = dev_end - offset + 1; + + *daddr = BTOBB(offset); + *bblen = BTOBB(len); + return 0; +} + +static int +xfs_dax_notify_logdev_failure( + struct xfs_mount *mp, + u64 offset, + u64 len, + int mf_flags) +{ + xfs_daddr_t daddr; + uint64_t bblen; + int error; + + /* + * Return ENXIO instead of shutting down the filesystem if the failed + * region is beyond the end of the log. + */ + error = xfs_dax_translate_range(mp->m_logdev_targp, + offset, len, &daddr, &bblen); + if (error) + return error; + + /* + * In the pre-remove case the failure notification is attempting to + * trigger a force unmount. The expectation is that the device is + * still present, but its removal is in progress and can not be + * cancelled, proceed with accessing the log device. + */ + if (mf_flags & MF_MEM_PRE_REMOVE) + return 0; + + xfs_err(mp, "ondisk log corrupt, shutting down fs!"); + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_ONDISK); + return -EFSCORRUPTED; +} + static int xfs_dax_notify_ddev_failure( struct xfs_mount *mp, @@ -263,8 +336,9 @@ xfs_dax_notify_failure( int mf_flags) { struct xfs_mount *mp = dax_holder(dax_dev); - u64 ddev_start; - u64 ddev_end; + xfs_daddr_t daddr; + uint64_t bblen; + int error;
if (!(mp->m_super->s_flags & SB_BORN)) { xfs_warn(mp, "filesystem is not ready for notify_failure()!"); @@ -279,17 +353,7 @@ xfs_dax_notify_failure(
if (mp->m_logdev_targp && mp->m_logdev_targp->bt_daxdev == dax_dev && mp->m_logdev_targp != mp->m_ddev_targp) { - /* - * In the pre-remove case the failure notification is attempting - * to trigger a force unmount. The expectation is that the - * device is still present, but its removal is in progress and - * can not be cancelled, proceed with accessing the log device. - */ - if (mf_flags & MF_MEM_PRE_REMOVE) - return 0; - xfs_err(mp, "ondisk log corrupt, shutting down fs!"); - xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_ONDISK); - return -EFSCORRUPTED; + return xfs_dax_notify_logdev_failure(mp, offset, len, mf_flags); }
if (!xfs_has_rmapbt(mp)) { @@ -297,33 +361,12 @@ xfs_dax_notify_failure( return -EOPNOTSUPP; }
- ddev_start = mp->m_ddev_targp->bt_dax_part_off; - ddev_end = ddev_start + bdev_nr_bytes(mp->m_ddev_targp->bt_bdev) - 1; + error = xfs_dax_translate_range(mp->m_ddev_targp, offset, len, &daddr, + &bblen); + if (error) + return error;
- /* Notify failure on the whole device. */ - if (offset == 0 && len == U64_MAX) { - offset = ddev_start; - len = bdev_nr_bytes(mp->m_ddev_targp->bt_bdev); - } - - /* Ignore the range out of filesystem area */ - if (offset + len - 1 < ddev_start) - return -ENXIO; - if (offset > ddev_end) - return -ENXIO; - - /* Calculate the real range when it touches the boundary */ - if (offset > ddev_start) - offset -= ddev_start; - else { - len -= ddev_start - offset; - offset = 0; - } - if (offset + len - 1 > ddev_end) - len = ddev_end - offset + 1; - - return xfs_dax_notify_ddev_failure(mp, BTOBB(offset), BTOBB(len), - mf_flags); + return xfs_dax_notify_ddev_failure(mp, daddr, bblen, mf_flags); }
const struct dax_holder_operations xfs_dax_holder_operations = {
linux-stable-mirror@lists.linaro.org