Backport a lazytime fix from upstream to 4.14-stable. To make it cherry-pick cleanly, I first cherry-picked two other commits. Commit #2 had a conflict due to it making a change to fs/xfs/ which isn't applicable to 4.14. I dropped that part of the change.
Christoph Hellwig (1): fs: move I_DIRTY_INODE to fs.h
Eric Biggers (1): fs: fix lazytime expiration handling in __writeback_single_inode()
Jan Kara (1): writeback: Drop I_DIRTY_TIME_EXPIRE
fs/ext4/inode.c | 6 ++--- fs/fs-writeback.c | 43 +++++++++++++------------------- fs/gfs2/super.c | 2 +- include/linux/fs.h | 4 +-- include/trace/events/writeback.h | 1 - 5 files changed, 24 insertions(+), 32 deletions(-)
From: Christoph Hellwig hch@lst.de
commit 0e11f6443f522f89509495b13ef1f3745640144d upstream.
And use it in a few more places rather than opencoding the values.
Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Al Viro viro@zeniv.linux.org.uk Signed-off-by: Eric Biggers ebiggers@google.com --- fs/ext4/inode.c | 4 ++-- fs/fs-writeback.c | 9 +++------ fs/gfs2/super.c | 2 +- include/linux/fs.h | 3 ++- 4 files changed, 8 insertions(+), 10 deletions(-)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 898f962d3a068..098856fcecf8c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5064,12 +5064,12 @@ static int other_inode_match(struct inode * inode, unsigned long ino,
if ((inode->i_ino != ino) || (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW | - I_DIRTY_SYNC | I_DIRTY_DATASYNC)) || + I_DIRTY_INODE)) || ((inode->i_state & I_DIRTY_TIME) == 0)) return 0; spin_lock(&inode->i_lock); if (((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW | - I_DIRTY_SYNC | I_DIRTY_DATASYNC)) == 0) && + I_DIRTY_INODE)) == 0) && (inode->i_state & I_DIRTY_TIME)) { struct ext4_inode_info *ei = EXT4_I(inode);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 3dbb875ed7903..5a115dc9bc9a9 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1400,7 +1400,7 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
dirty = inode->i_state & I_DIRTY; if (inode->i_state & I_DIRTY_TIME) { - if ((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) || + if ((dirty & I_DIRTY_INODE) || wbc->sync_mode == WB_SYNC_ALL || unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) || unlikely(time_after(jiffies, @@ -2136,7 +2136,6 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode) */ void __mark_inode_dirty(struct inode *inode, int flags) { -#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) struct super_block *sb = inode->i_sb; int dirtytime;
@@ -2146,7 +2145,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) * Don't do this for I_DIRTY_PAGES - that doesn't actually * dirty the inode itself */ - if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_TIME)) { + if (flags & (I_DIRTY_INODE | I_DIRTY_TIME)) { trace_writeback_dirty_inode_start(inode, flags);
if (sb->s_op->dirty_inode) @@ -2222,7 +2221,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) if (dirtytime) inode->dirtied_time_when = jiffies;
- if (inode->i_state & (I_DIRTY_INODE | I_DIRTY_PAGES)) + if (inode->i_state & I_DIRTY) dirty_list = &wb->b_dirty; else dirty_list = &wb->b_dirty_time; @@ -2246,8 +2245,6 @@ void __mark_inode_dirty(struct inode *inode, int flags) } out_unlock_inode: spin_unlock(&inode->i_lock); - -#undef I_DIRTY_INODE } EXPORT_SYMBOL(__mark_inode_dirty);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 639e2c86758a4..bcf95ec1bc31d 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -791,7 +791,7 @@ static void gfs2_dirty_inode(struct inode *inode, int flags) int need_endtrans = 0; int ret;
- if (!(flags & (I_DIRTY_DATASYNC|I_DIRTY_SYNC))) + if (!(flags & I_DIRTY_INODE)) return; if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) return; diff --git a/include/linux/fs.h b/include/linux/fs.h index 30172ad84b25f..50e7455195f77 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2015,7 +2015,8 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) #define I_OVL_INUSE (1 << 14) #define I_SYNC_QUEUED (1 << 17)
-#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) +#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) +#define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES) #define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME)
extern void __mark_inode_dirty(struct inode *, int);
From: Jan Kara jack@suse.cz
commit 5fcd57505c002efc5823a7355e21f48dd02d5a51 upstream.
The only use of I_DIRTY_TIME_EXPIRE is to detect in __writeback_single_inode() that inode got there because flush worker decided it's time to writeback the dirty inode time stamps (either because we are syncing or because of age). However we can detect this directly in __writeback_single_inode() and there's no need for the strange propagation with I_DIRTY_TIME_EXPIRE flag.
Reviewed-by: Christoph Hellwig hch@lst.de Signed-off-by: Jan Kara jack@suse.cz Signed-off-by: Eric Biggers ebiggers@google.com --- fs/ext4/inode.c | 2 +- fs/fs-writeback.c | 28 +++++++++++----------------- include/linux/fs.h | 1 - include/trace/events/writeback.h | 1 - 4 files changed, 12 insertions(+), 20 deletions(-)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 098856fcecf8c..eb635eab304ed 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5073,7 +5073,7 @@ static int other_inode_match(struct inode * inode, unsigned long ino, (inode->i_state & I_DIRTY_TIME)) { struct ext4_inode_info *ei = EXT4_I(inode);
- inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED); + inode->i_state &= ~I_DIRTY_TIME; spin_unlock(&inode->i_lock);
spin_lock(&ei->i_raw_lock); diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 5a115dc9bc9a9..d6c05e5bdacb8 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1154,7 +1154,7 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t) */ static int move_expired_inodes(struct list_head *delaying_queue, struct list_head *dispatch_queue, - int flags, unsigned long dirtied_before) + unsigned long dirtied_before) { LIST_HEAD(tmp); struct list_head *pos, *node; @@ -1170,8 +1170,6 @@ static int move_expired_inodes(struct list_head *delaying_queue, list_move(&inode->i_io_list, &tmp); moved++; spin_lock(&inode->i_lock); - if (flags & EXPIRE_DIRTY_ATIME) - inode->i_state |= I_DIRTY_TIME_EXPIRED; inode->i_state |= I_SYNC_QUEUED; spin_unlock(&inode->i_lock); if (sb_is_blkdev_sb(inode->i_sb)) @@ -1219,11 +1217,11 @@ static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work,
assert_spin_locked(&wb->list_lock); list_splice_init(&wb->b_more_io, &wb->b_io); - moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, dirtied_before); + moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, dirtied_before); if (!work->for_sync) time_expire_jif = jiffies - dirtytime_expire_interval * HZ; moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io, - EXPIRE_DIRTY_ATIME, time_expire_jif); + time_expire_jif); if (moved) wb_io_lists_populated(wb); trace_writeback_queue_io(wb, work, dirtied_before, moved); @@ -1399,18 +1397,14 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) spin_lock(&inode->i_lock);
dirty = inode->i_state & I_DIRTY; - if (inode->i_state & I_DIRTY_TIME) { - if ((dirty & I_DIRTY_INODE) || - wbc->sync_mode == WB_SYNC_ALL || - unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) || - unlikely(time_after(jiffies, - (inode->dirtied_time_when + - dirtytime_expire_interval * HZ)))) { - dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED; - trace_writeback_lazytime(inode); - } - } else - inode->i_state &= ~I_DIRTY_TIME_EXPIRED; + if ((inode->i_state & I_DIRTY_TIME) && + ((dirty & I_DIRTY_INODE) || + wbc->sync_mode == WB_SYNC_ALL || wbc->for_sync || + time_after(jiffies, inode->dirtied_time_when + + dirtytime_expire_interval * HZ))) { + dirty |= I_DIRTY_TIME; + trace_writeback_lazytime(inode); + } inode->i_state &= ~dirty;
/* diff --git a/include/linux/fs.h b/include/linux/fs.h index 50e7455195f77..309c151decd8c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2010,7 +2010,6 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) #define I_DIO_WAKEUP (1 << __I_DIO_WAKEUP) #define I_LINKABLE (1 << 10) #define I_DIRTY_TIME (1 << 11) -#define I_DIRTY_TIME_EXPIRED (1 << 12) #define I_WB_SWITCH (1 << 13) #define I_OVL_INUSE (1 << 14) #define I_SYNC_QUEUED (1 << 17) diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 3a76ca2eecd06..627f5759b67d1 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -20,7 +20,6 @@ {I_CLEAR, "I_CLEAR"}, \ {I_SYNC, "I_SYNC"}, \ {I_DIRTY_TIME, "I_DIRTY_TIME"}, \ - {I_DIRTY_TIME_EXPIRED, "I_DIRTY_TIME_EXPIRED"}, \ {I_REFERENCED, "I_REFERENCED"} \ )
From: Eric Biggers ebiggers@google.com
commit 1e249cb5b7fc09ff216aa5a12f6c302e434e88f9 upstream.
When lazytime is enabled and an inode is being written due to its in-memory updated timestamps having expired, either due to a sync() or syncfs() system call or due to dirtytime_expire_interval having elapsed, the VFS needs to inform the filesystem so that the filesystem can copy the inode's timestamps out to the on-disk data structures.
This is done by __writeback_single_inode() calling mark_inode_dirty_sync(), which then calls ->dirty_inode(I_DIRTY_SYNC).
However, this occurs after __writeback_single_inode() has already cleared the dirty flags from ->i_state. This causes two bugs:
- mark_inode_dirty_sync() redirties the inode, causing it to remain dirty. This wastefully causes the inode to be written twice. But more importantly, it breaks cases where sync_filesystem() is expected to clean dirty inodes. This includes the FS_IOC_REMOVE_ENCRYPTION_KEY ioctl (as reported at https://lore.kernel.org/r/20200306004555.GB225345@gmail.com), as well as possibly filesystem freezing (freeze_super()).
- Since ->i_state doesn't contain I_DIRTY_TIME when ->dirty_inode() is called from __writeback_single_inode() for lazytime expiration, xfs_fs_dirty_inode() ignores the notification. (XFS only cares about lazytime expirations, and it assumes that i_state will contain I_DIRTY_TIME during those.) Therefore, lazy timestamps aren't persisted by sync(), syncfs(), or dirtytime_expire_interval on XFS.
Fix this by moving the call to mark_inode_dirty_sync() to earlier in __writeback_single_inode(), before the dirty flags are cleared from i_state. This makes filesystems be properly notified of the timestamp expiration, and it avoids incorrectly redirtying the inode.
This fixes xfstest generic/580 (which tests FS_IOC_REMOVE_ENCRYPTION_KEY) when run on ext4 or f2fs with lazytime enabled. It also fixes the new lazytime xfstest I've proposed, which reproduces the above-mentioned XFS bug (https://lore.kernel.org/r/20210105005818.92978-1-ebiggers@kernel.org).
Alternatively, we could call ->dirty_inode(I_DIRTY_SYNC) directly. But due to the introduction of I_SYNC_QUEUED, mark_inode_dirty_sync() is the right thing to do because mark_inode_dirty_sync() now knows not to move the inode to a writeback list if it is currently queued for sync.
Fixes: 0ae45f63d4ef ("vfs: add support for a lazytime mount option") Cc: stable@vger.kernel.org Depends-on: 5afced3bf281 ("writeback: Avoid skipping inode writeback") Link: https://lore.kernel.org/r/20210112190253.64307-2-ebiggers@kernel.org Suggested-by: Jan Kara jack@suse.cz Reviewed-by: Christoph Hellwig hch@lst.de Reviewed-by: Jan Kara jack@suse.cz Signed-off-by: Eric Biggers ebiggers@google.com Signed-off-by: Jan Kara jack@suse.cz --- fs/fs-writeback.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index d6c05e5bdacb8..384f95e1936dd 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1390,21 +1390,25 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) }
/* - * Some filesystems may redirty the inode during the writeback - * due to delalloc, clear dirty metadata flags right before - * write_inode() + * If the inode has dirty timestamps and we need to write them, call + * mark_inode_dirty_sync() to notify the filesystem about it and to + * change I_DIRTY_TIME into I_DIRTY_SYNC. */ - spin_lock(&inode->i_lock); - - dirty = inode->i_state & I_DIRTY; if ((inode->i_state & I_DIRTY_TIME) && - ((dirty & I_DIRTY_INODE) || - wbc->sync_mode == WB_SYNC_ALL || wbc->for_sync || + (wbc->sync_mode == WB_SYNC_ALL || wbc->for_sync || time_after(jiffies, inode->dirtied_time_when + dirtytime_expire_interval * HZ))) { - dirty |= I_DIRTY_TIME; trace_writeback_lazytime(inode); + mark_inode_dirty_sync(inode); } + + /* + * Some filesystems may redirty the inode during the writeback + * due to delalloc, clear dirty metadata flags right before + * write_inode() + */ + spin_lock(&inode->i_lock); + dirty = inode->i_state & I_DIRTY; inode->i_state &= ~dirty;
/* @@ -1425,8 +1429,6 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
spin_unlock(&inode->i_lock);
- if (dirty & I_DIRTY_TIME) - mark_inode_dirty_sync(inode); /* Don't write the inode if only I_DIRTY_PAGES was set */ if (dirty & ~I_DIRTY_PAGES) { int err = write_inode(inode, wbc);
linux-stable-mirror@lists.linaro.org