From: "Liam R. Howlett" Liam.Howlett@Oracle.com
commit 3dd4432549415f3c65dd52d5c687629efbf4ece1 upstream.
Use the maple tree in RCU mode for VMA tracking.
The maple tree tracks the stack and is able to update the pivot (lower/upper boundary) in-place to allow the page fault handler to write to the tree while holding just the mmap read lock. This is safe as the writes to the stack have a guard VMA which ensures there will always be a NULL in the direction of the growth and thus will only update a pivot.
It is possible, but not recommended, to have VMAs that grow up/down without guard VMAs. syzbot has constructed a testcase which sets up a VMA to grow and consume the empty space. Overwriting the entire NULL entry causes the tree to be altered in a way that is not safe for concurrent readers; the readers may see a node being rewritten or one that does not match the maple state they are using.
Enabling RCU mode allows the concurrent readers to see a stable node and will return the expected result.
Link: https://lkml.kernel.org/r/20230227173632.3292573-9-surenb@google.com Cc: stable@vger.kernel.org Fixes: d4af56c5c7c6 ("mm: start tracking VMAs with maple tree") Signed-off-by: Liam R. Howlett Liam.Howlett@oracle.com Reported-by: syzbot+8d95422d3537159ca390@syzkaller.appspotmail.com --- include/linux/mm_types.h | 3 ++- kernel/fork.c | 3 +++ mm/mmap.c | 3 ++- 3 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 500e536796ca..247aedb18d5c 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -725,7 +725,8 @@ struct mm_struct { unsigned long cpu_bitmap[]; };
-#define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN) +#define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN | \ + MT_FLAGS_USE_RCU) extern struct mm_struct init_mm;
/* Pointer magic because the dynamic array size confuses some compilers. */ diff --git a/kernel/fork.c b/kernel/fork.c index a6d243a50be3..ec913b13c5ed 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -617,6 +617,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, if (retval) goto out;
+ mt_clear_in_rcu(mas.tree); mas_for_each(&old_mas, mpnt, ULONG_MAX) { struct file *file;
@@ -703,6 +704,8 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, retval = arch_dup_mmap(oldmm, mm); loop_out: mas_destroy(&mas); + if (!retval) + mt_set_in_rcu(mas.tree); out: mmap_write_unlock(mm); flush_tlb_mm(oldmm); diff --git a/mm/mmap.c b/mm/mmap.c index 177714886849..fe1db604dc49 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2308,7 +2308,7 @@ do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma, int count = 0; int error = -ENOMEM; MA_STATE(mas_detach, &mt_detach, 0, 0); - mt_init_flags(&mt_detach, MT_FLAGS_LOCK_EXTERN); + mt_init_flags(&mt_detach, mas->tree->ma_flags & MT_FLAGS_LOCK_MASK); mt_set_external_lock(&mt_detach, &mm->mmap_lock);
if (mas_preallocate(mas, vma, GFP_KERNEL)) @@ -3095,6 +3095,7 @@ void exit_mmap(struct mm_struct *mm) */ set_bit(MMF_OOM_SKIP, &mm->flags); mmap_write_lock(mm); + mt_clear_in_rcu(&mm->mm_mt); free_pgtables(&tlb, &mm->mm_mt, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING); tlb_finish_mmu(&tlb);