From: Kent Overstreet kent.overstreet@linux.dev
commit 3f6d5e6a468d02676244b868b210433831846127 upstream.
Our proliferation of memalloc_*_{save,restore} APIs is getting a bit silly, this adds a generic version and converts the existing save/restore functions to wrappers.
Signed-off-by: Kent Overstreet kent.overstreet@linux.dev Cc: Vlastimil Babka vbabka@suse.cz Cc: Matthew Wilcox willy@infradead.org Cc: Michal Hocko mhocko@kernel.org Cc: Darrick J. Wong djwong@kernel.org Cc: linux-mm@kvack.org Acked-by: Vlastimil Babka vbabka@suse.cz Signed-off-by: Long Li leo.lilong@huawei.com --- We encountered a deadlock issue in our internal version caused by PF_MEMALLOC_NOFS being unexpectedly cleared during inode inactive transaction. This issue appears to still exist in 6.1/6.6 lts version.
In the mainline kernel, before commit [2] f2e812c1522d ("xfs: don't use current->journal_info") was merged, we relied on current->journal_info to check for transaction recursion. During transaction rollback/commit, only the last transaction commit would call xfs_trans_clear_context(tp) to restore the nofs flag, which worked correctly.
After this patch was merged, we no longer check for transaction recursion, so each transaction rollback/commit calls xfs_trans_clear_context(tp) to restore the nofs flag. At this point, tp->t_pflags is set to 0 (except for the last one tp), and memalloc_nofs_restore(0) will not clear the PF_MEMALLOC_NOFS flag during transaction rollback, this is also correct.
However, this also implies that the above patch depends on commit [1] 3f6d5e6a468d ("mm: introduce memalloc_flags_{save,restore}"), because that patch modified the semantics of the memalloc_nofs_{save,restore} interface, and only after this modification can it ensure that memalloc_nofs_restore(0) won't clear the PF_MEMALLOC_NOFS flag.
In our 6.1/6.6 LTS versions, we directly backported commit [2] without backporting commit [1], which leads to confusion with the PF_MEMALLOC_NOFS flag during transaction rollback, for example as follows:
xfs_inodegc_worker nofs_flag = memalloc_nofs_save(); //set PF_MEMALLOC_NOFS in current->flags xfs_inactive xfs_attr_inactive(ip) xfs_trans_alloc(mp, &M_RES(mp)->tr_attrinval, 0, 0, 0, &trans) xfs_trans_set_context(tp) //tp->t_pflags ==> 1 xfs_trans_commit(trans) __xfs_trans_commit(tp) xfs_defer_trans_roll xfs_trans_roll *tpp = xfs_trans_dup(trans) xfs_trans_switch_context(tp, ntp) new_tp->t_pflags = old_tp->t_pflags; //new_tp->t_pflags ==> 1 old_tp->t_pflags = 0; //old_tp->t_pflags ==> 0 __xfs_trans_commit(trans) //commit old_tp xfs_trans_free(tp); //free old_tp xfs_trans_clear_context(tp) memalloc_nofs_restore(0) //clear PF_MEMALLOC_NOFS in current->flags ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ < commit new_tp > xfs_trans_free(tp) //free new_tp memalloc_nofs_restore(1) //set PF_MEMALLOC_NOFS in current->flags memalloc_nofs_restore(nofs_flag); //clear PF_MEMALLOC_NOFS in current->flags
So backport commit [1] 3f6d5e6a468d ("mm: introduce memalloc_flags_{save,restore}") to 6.1/6.6 lts.
[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... [2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
include/linux/sched/mm.h | 43 ++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 17 deletions(-)
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 8d89c8c4fac1..10792d374785 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -306,6 +306,24 @@ static inline void might_alloc(gfp_t gfp_mask) might_sleep_if(gfpflags_allow_blocking(gfp_mask)); }
+/** + * memalloc_flags_save - Add a PF_* flag to current->flags, save old value + * + * This allows PF_* flags to be conveniently added, irrespective of current + * value, and then the old version restored with memalloc_flags_restore(). + */ +static inline unsigned memalloc_flags_save(unsigned flags) +{ + unsigned oldflags = ~current->flags & flags; + current->flags |= flags; + return oldflags; +} + +static inline void memalloc_flags_restore(unsigned flags) +{ + current->flags &= ~flags; +} + /** * memalloc_noio_save - Marks implicit GFP_NOIO allocation scope. * @@ -319,9 +337,7 @@ static inline void might_alloc(gfp_t gfp_mask) */ static inline unsigned int memalloc_noio_save(void) { - unsigned int flags = current->flags & PF_MEMALLOC_NOIO; - current->flags |= PF_MEMALLOC_NOIO; - return flags; + return memalloc_flags_save(PF_MEMALLOC_NOIO); }
/** @@ -334,7 +350,7 @@ static inline unsigned int memalloc_noio_save(void) */ static inline void memalloc_noio_restore(unsigned int flags) { - current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags; + memalloc_flags_restore(flags); }
/** @@ -350,9 +366,7 @@ static inline void memalloc_noio_restore(unsigned int flags) */ static inline unsigned int memalloc_nofs_save(void) { - unsigned int flags = current->flags & PF_MEMALLOC_NOFS; - current->flags |= PF_MEMALLOC_NOFS; - return flags; + return memalloc_flags_save(PF_MEMALLOC_NOFS); }
/** @@ -365,32 +379,27 @@ static inline unsigned int memalloc_nofs_save(void) */ static inline void memalloc_nofs_restore(unsigned int flags) { - current->flags = (current->flags & ~PF_MEMALLOC_NOFS) | flags; + memalloc_flags_restore(flags); }
static inline unsigned int memalloc_noreclaim_save(void) { - unsigned int flags = current->flags & PF_MEMALLOC; - current->flags |= PF_MEMALLOC; - return flags; + return memalloc_flags_save(PF_MEMALLOC); }
static inline void memalloc_noreclaim_restore(unsigned int flags) { - current->flags = (current->flags & ~PF_MEMALLOC) | flags; + memalloc_flags_restore(flags); }
static inline unsigned int memalloc_pin_save(void) { - unsigned int flags = current->flags & PF_MEMALLOC_PIN; - - current->flags |= PF_MEMALLOC_PIN; - return flags; + return memalloc_flags_save(PF_MEMALLOC_PIN); }
static inline void memalloc_pin_restore(unsigned int flags) { - current->flags = (current->flags & ~PF_MEMALLOC_PIN) | flags; + memalloc_flags_restore(flags); }
#ifdef CONFIG_MEMCG