Hi all,
Here are even more bugfixes for 6.13 that have been accumulating since 6.12 was released. Now with a few extra comments that were requested by reviewers.
If you're going to start using this code, I strongly recommend pulling from my git trees, which are linked below.
With a bit of luck, this should all go splendidly. Comments and questions are, as always, welcome.
--D
kernel git tree: https://git.kernel.org/cgit/linux/kernel/git/djwong/xfs-linux.git/log/?h=pro... --- Commits in this patchset: * xfs: don't move nondir/nonreg temporary repair files to the metadir namespace * xfs: don't crash on corrupt /quotas dirent * xfs: check pre-metadir fields correctly * xfs: fix zero byte checking in the superblock scrubber * xfs: return from xfs_symlink_verify early on V4 filesystems * xfs: port xfs_ioc_start_commit to multigrain timestamps --- fs/xfs/libxfs/xfs_symlink_remote.c | 4 ++ fs/xfs/scrub/agheader.c | 71 ++++++++++++++++++++++++++++-------- fs/xfs/scrub/tempfile.c | 3 ++ fs/xfs/xfs_exchrange.c | 14 ++++--- fs/xfs/xfs_qm.c | 7 ++++ 5 files changed, 76 insertions(+), 23 deletions(-)
From: Darrick J. Wong djwong@kernel.org
Only directories or regular files are allowed in the metadata directory tree. Don't move the repair tempfile to the metadir namespace if this is not true; this will cause the inode verifiers to trip.
xrep_tempfile_adjust_directory_tree opportunistically moves sc->tempip from the regular directory tree to the metadata directory tree if sc->ip is part of the metadata directory tree. However, the scrub setup functions grab sc->ip and create sc->tempip before we actually get around to checking if the file mode is the right type for the scrubber.
IOWs, you can invoke the symlink scrubber with the file handle of a subdirectory in the metadir. xrep_setup_symlink will create a temporary symlink file, xrep_tempfile_adjust_directory_tree will foolishly try to set the METADATA flag on the temp symlink, which trips the inode verifier in the inode item precommit, which shuts down the filesystem when expensive checks are turned on. If they're /not/ turned on, then xchk_symlink will return ENOENT when it sees that it's been passed a symlink, but the invalid inode could still get flushed to disk. We don't want that.
Cc: stable@vger.kernel.org # v6.13-rc1 Fixes: 9dc31acb01a1c7 ("xfs: move repair temporary files to the metadata directory tree") Signed-off-by: "Darrick J. Wong" djwong@kernel.org --- fs/xfs/scrub/tempfile.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c index dc3802c7f678ce..82ecbb654fbb39 100644 --- a/fs/xfs/scrub/tempfile.c +++ b/fs/xfs/scrub/tempfile.c @@ -204,6 +204,9 @@ xrep_tempfile_adjust_directory_tree(
if (!sc->ip || !xfs_is_metadir_inode(sc->ip)) return 0; + if (!S_ISDIR(VFS_I(sc->tempip)->i_mode) && + !S_ISREG(VFS_I(sc->tempip)->i_mode)) + return 0;
xfs_ilock(sc->tempip, XFS_IOLOCK_EXCL); sc->temp_ilock_flags |= XFS_IOLOCK_EXCL;
From: Darrick J. Wong djwong@kernel.org
If the /quotas dirent points to an inode but the inode isn't loadable (and hence mkdir returns -EEXIST), don't crash, just bail out.
Cc: stable@vger.kernel.org # v6.13-rc1 Fixes: e80fbe1ad8eff7 ("xfs: use metadir for quota inodes") Signed-off-by: "Darrick J. Wong" djwong@kernel.org Reviewed-by: Christoph Hellwig hch@lst.de --- fs/xfs/xfs_qm.c | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 69b70c3e999d72..dc8b1010d4d332 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -731,6 +731,13 @@ xfs_qm_create_metadir_qinos( error = xfs_dqinode_mkdir_parent(mp, &qi->qi_dirip); if (error && error != -EEXIST) return error; + /* + * If the /quotas dirent points to an inode that isn't + * loadable, qi_dirip will be NULL but mkdir_parent will return + * -EEXIST. In this case the metadir is corrupt, so bail out. + */ + if (XFS_IS_CORRUPT(mp, qi->qi_dirip == NULL)) + return -EFSCORRUPTED; }
if (XFS_IS_UQUOTA_ON(mp) && !qi->qi_uquotaip) {
From: Darrick J. Wong djwong@kernel.org
The checks that were added to the superblock scrubber for metadata directories aren't quite right -- the old inode pointers are now defined to be zeroes until someone else reuses them. Also consolidate the new metadir field checks to one place; they were inexplicably scattered around.
Cc: stable@vger.kernel.org # v6.13-rc1 Fixes: 28d756d4d562dc ("xfs: update sb field checks when metadir is turned on") Signed-off-by: "Darrick J. Wong" djwong@kernel.org Reviewed-by: Christoph Hellwig hch@lst.de --- fs/xfs/scrub/agheader.c | 40 +++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 13 deletions(-)
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index 1d41b85478da9d..88063d67cb5fd4 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c @@ -145,8 +145,11 @@ xchk_superblock( xchk_block_set_preen(sc, bp);
if (xfs_has_metadir(sc->mp)) { - if (sb->sb_metadirino != cpu_to_be64(mp->m_sb.sb_metadirino)) - xchk_block_set_preen(sc, bp); + if (sb->sb_rbmino != cpu_to_be64(0)) + xchk_block_set_corrupt(sc, bp); + + if (sb->sb_rsumino != cpu_to_be64(0)) + xchk_block_set_corrupt(sc, bp); } else { if (sb->sb_rbmino != cpu_to_be64(mp->m_sb.sb_rbmino)) xchk_block_set_preen(sc, bp); @@ -229,7 +232,13 @@ xchk_superblock( * sb_icount, sb_ifree, sb_fdblocks, sb_frexents */
- if (!xfs_has_metadir(mp)) { + if (xfs_has_metadir(mp)) { + if (sb->sb_uquotino != cpu_to_be64(0)) + xchk_block_set_corrupt(sc, bp); + + if (sb->sb_gquotino != cpu_to_be64(0)) + xchk_block_set_preen(sc, bp); + } else { if (sb->sb_uquotino != cpu_to_be64(mp->m_sb.sb_uquotino)) xchk_block_set_preen(sc, bp);
@@ -281,15 +290,8 @@ xchk_superblock( if (!!(sb->sb_features2 & cpu_to_be32(~v2_ok))) xchk_block_set_corrupt(sc, bp);
- if (xfs_has_metadir(mp)) { - if (sb->sb_rgblklog != mp->m_sb.sb_rgblklog) - xchk_block_set_corrupt(sc, bp); - if (memchr_inv(sb->sb_pad, 0, sizeof(sb->sb_pad))) - xchk_block_set_preen(sc, bp); - } else { - if (sb->sb_features2 != sb->sb_bad_features2) - xchk_block_set_preen(sc, bp); - } + if (sb->sb_features2 != sb->sb_bad_features2) + xchk_block_set_preen(sc, bp); }
/* Check sb_features2 flags that are set at mkfs time. */ @@ -351,7 +353,10 @@ xchk_superblock( if (sb->sb_spino_align != cpu_to_be32(mp->m_sb.sb_spino_align)) xchk_block_set_corrupt(sc, bp);
- if (!xfs_has_metadir(mp)) { + if (xfs_has_metadir(mp)) { + if (sb->sb_pquotino != cpu_to_be64(0)) + xchk_block_set_corrupt(sc, bp); + } else { if (sb->sb_pquotino != cpu_to_be64(mp->m_sb.sb_pquotino)) xchk_block_set_preen(sc, bp); } @@ -366,11 +371,20 @@ xchk_superblock( }
if (xfs_has_metadir(mp)) { + if (sb->sb_metadirino != cpu_to_be64(mp->m_sb.sb_metadirino)) + xchk_block_set_preen(sc, bp); + if (sb->sb_rgcount != cpu_to_be32(mp->m_sb.sb_rgcount)) xchk_block_set_corrupt(sc, bp);
if (sb->sb_rgextents != cpu_to_be32(mp->m_sb.sb_rgextents)) xchk_block_set_corrupt(sc, bp); + + if (sb->sb_rgblklog != mp->m_sb.sb_rgblklog) + xchk_block_set_corrupt(sc, bp); + + if (memchr_inv(sb->sb_pad, 0, sizeof(sb->sb_pad))) + xchk_block_set_corrupt(sc, bp); }
/* Everything else must be zero. */
From: Darrick J. Wong djwong@kernel.org
The logic to check that the region past the end of the superblock is all zeroes is wrong -- we don't want to check only the bytes past the end of the maximally sized ondisk superblock structure as currently defined in xfs_format.h; we want to check the bytes beyond the end of the ondisk as defined by the feature bits.
Port the superblock size logic from xfs_repair and then put it to use in xfs_scrub.
Cc: stable@vger.kernel.org # v4.15 Fixes: 21fb4cb1981ef7 ("xfs: scrub the secondary superblocks") Signed-off-by: "Darrick J. Wong" djwong@kernel.org Reviewed-by: Christoph Hellwig hch@lst.de --- fs/xfs/scrub/agheader.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-)
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index 88063d67cb5fd4..9f8c312dfd3c82 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c @@ -59,6 +59,32 @@ xchk_superblock_xref( /* scrub teardown will take care of sc->sa for us */ }
+/* + * Calculate the ondisk superblock size in bytes given the feature set of the + * mounted filesystem (aka the primary sb). This is subtlely different from + * the logic in xfs_repair, which computes the size of a secondary sb given the + * featureset listed in the secondary sb. + */ +STATIC size_t +xchk_superblock_ondisk_size( + struct xfs_mount *mp) +{ + if (xfs_has_metadir(mp)) + return offsetofend(struct xfs_dsb, sb_pad); + if (xfs_has_metauuid(mp)) + return offsetofend(struct xfs_dsb, sb_meta_uuid); + if (xfs_has_crc(mp)) + return offsetofend(struct xfs_dsb, sb_lsn); + if (xfs_sb_version_hasmorebits(&mp->m_sb)) + return offsetofend(struct xfs_dsb, sb_bad_features2); + if (xfs_has_logv2(mp)) + return offsetofend(struct xfs_dsb, sb_logsunit); + if (xfs_has_sector(mp)) + return offsetofend(struct xfs_dsb, sb_logsectsize); + /* only support dirv2 or more recent */ + return offsetofend(struct xfs_dsb, sb_dirblklog); +} + /* * Scrub the filesystem superblock. * @@ -75,6 +101,7 @@ xchk_superblock( struct xfs_buf *bp; struct xfs_dsb *sb; struct xfs_perag *pag; + size_t sblen; xfs_agnumber_t agno; uint32_t v2_ok; __be32 features_mask; @@ -388,8 +415,8 @@ xchk_superblock( }
/* Everything else must be zero. */ - if (memchr_inv(sb + 1, 0, - BBTOB(bp->b_length) - sizeof(struct xfs_dsb))) + sblen = xchk_superblock_ondisk_size(mp); + if (memchr_inv((char *)sb + sblen, 0, BBTOB(bp->b_length) - sblen)) xchk_block_set_corrupt(sc, bp);
xchk_superblock_xref(sc, bp);
From: Darrick J. Wong djwong@kernel.org
V4 symlink blocks didn't have headers, so return early if this is a V4 filesystem.
Cc: stable@vger.kernel.org # v5.1 Fixes: 39708c20ab5133 ("xfs: miscellaneous verifier magic value fixups") Signed-off-by: "Darrick J. Wong" djwong@kernel.org Reviewed-by: Christoph Hellwig hch@lst.de --- fs/xfs/libxfs/xfs_symlink_remote.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c index f228127a88ff26..fb47a76ead18c2 100644 --- a/fs/xfs/libxfs/xfs_symlink_remote.c +++ b/fs/xfs/libxfs/xfs_symlink_remote.c @@ -92,8 +92,10 @@ xfs_symlink_verify( struct xfs_mount *mp = bp->b_mount; struct xfs_dsymlink_hdr *dsl = bp->b_addr;
+ /* no verification of non-crc buffers */ if (!xfs_has_crc(mp)) - return __this_address; + return NULL; + if (!xfs_verify_magic(bp, dsl->sl_magic)) return __this_address; if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_meta_uuid))
From: Darrick J. Wong djwong@kernel.org
Take advantage of the multigrain timestamp APIs to ensure that nobody can sneak in and write things to a file between starting a file update operation and committing the results. This should have been part of the multigrain timestamp merge, but I forgot to fling it at jlayton when he resubmitted the patchset due to developer bandwidth problems.
Cc: jlayton@kernel.org Cc: stable@vger.kernel.org # v6.13-rc1 Fixes: 4e40eff0b5737c ("fs: add infrastructure for multigrain timestamps") Signed-off-by: Darrick J. Wong djwong@kernel.org Reviewed-by: Christoph Hellwig hch@lst.de Reviewed-by: Jeff Layton jlayton@kernel.org --- fs/xfs/xfs_exchrange.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/fs/xfs/xfs_exchrange.c b/fs/xfs/xfs_exchrange.c index 9ab05ad224d127..dd24de420714ab 100644 --- a/fs/xfs/xfs_exchrange.c +++ b/fs/xfs/xfs_exchrange.c @@ -854,7 +854,7 @@ xfs_ioc_start_commit( struct xfs_commit_range __user *argp) { struct xfs_commit_range args = { }; - struct timespec64 ts; + struct kstat kstat; struct xfs_commit_range_fresh *kern_f; struct xfs_commit_range_fresh __user *user_f; struct inode *inode2 = file_inode(file); @@ -871,12 +871,12 @@ xfs_ioc_start_commit( memcpy(&kern_f->fsid, ip2->i_mount->m_fixedfsid, sizeof(xfs_fsid_t));
xfs_ilock(ip2, lockflags); - ts = inode_get_ctime(inode2); - kern_f->file2_ctime = ts.tv_sec; - kern_f->file2_ctime_nsec = ts.tv_nsec; - ts = inode_get_mtime(inode2); - kern_f->file2_mtime = ts.tv_sec; - kern_f->file2_mtime_nsec = ts.tv_nsec; + /* Force writing of a distinct ctime if any writes happen. */ + fill_mg_cmtime(&kstat, STATX_CTIME | STATX_MTIME, inode2); + kern_f->file2_ctime = kstat.ctime.tv_sec; + kern_f->file2_ctime_nsec = kstat.ctime.tv_nsec; + kern_f->file2_mtime = kstat.mtime.tv_sec; + kern_f->file2_mtime_nsec = kstat.mtime.tv_nsec; kern_f->file2_ino = ip2->i_ino; kern_f->file2_gen = inode2->i_generation; kern_f->magic = XCR_FRESH_MAGIC;
linux-stable-mirror@lists.linaro.org