6.5-stable review patch. If anyone has any objections, please let me know.
------------------
From: Rik van Riel riel@surriel.com
commit bf4916922c60f43efaa329744b3eef539aa6a2b2 upstream.
Extend the locking scheme used to protect shared hugetlb mappings from truncate vs page fault races, in order to protect private hugetlb mappings (with resv_map) against MADV_DONTNEED.
Add a read-write semaphore to the resv_map data structure, and use that from the hugetlb_vma_(un)lock_* functions, in preparation for closing the race between MADV_DONTNEED and page faults.
Link: https://lkml.kernel.org/r/20231006040020.3677377-3-riel@surriel.com Fixes: 04ada095dcfc ("hugetlb: don't delete vma_lock in hugetlb MADV_DONTNEED processing") Signed-off-by: Rik van Riel riel@surriel.com Reviewed-by: Mike Kravetz mike.kravetz@oracle.com Cc: Matthew Wilcox (Oracle) willy@infradead.org Cc: Muchun Song muchun.song@linux.dev Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org --- include/linux/hugetlb.h | 6 ++++++ mm/hugetlb.c | 41 +++++++++++++++++++++++++++++++++++++---- 2 files changed, 43 insertions(+), 4 deletions(-)
--- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -58,6 +58,7 @@ struct resv_map { long adds_in_progress; struct list_head region_cache; long region_cache_count; + struct rw_semaphore rw_sema; #ifdef CONFIG_CGROUP_HUGETLB /* * On private mappings, the counter to uncharge reservations is stored @@ -1245,6 +1246,11 @@ static inline bool __vma_shareable_lock( return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data; }
+static inline bool __vma_private_lock(struct vm_area_struct *vma) +{ + return (!(vma->vm_flags & VM_MAYSHARE)) && vma->vm_private_data; +} + /* * Safe version of huge_pte_offset() to check the locks. See comments * above huge_pte_offset(). --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -96,6 +96,7 @@ static void hugetlb_vma_lock_alloc(struc static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma); static void hugetlb_unshare_pmds(struct vm_area_struct *vma, unsigned long start, unsigned long end); +static struct resv_map *vma_resv_map(struct vm_area_struct *vma);
static inline bool subpool_is_free(struct hugepage_subpool *spool) { @@ -266,6 +267,10 @@ void hugetlb_vma_lock_read(struct vm_are struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
down_read(&vma_lock->rw_sema); + } else if (__vma_private_lock(vma)) { + struct resv_map *resv_map = vma_resv_map(vma); + + down_read(&resv_map->rw_sema); } }
@@ -275,6 +280,10 @@ void hugetlb_vma_unlock_read(struct vm_a struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
up_read(&vma_lock->rw_sema); + } else if (__vma_private_lock(vma)) { + struct resv_map *resv_map = vma_resv_map(vma); + + up_read(&resv_map->rw_sema); } }
@@ -284,6 +293,10 @@ void hugetlb_vma_lock_write(struct vm_ar struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
down_write(&vma_lock->rw_sema); + } else if (__vma_private_lock(vma)) { + struct resv_map *resv_map = vma_resv_map(vma); + + down_write(&resv_map->rw_sema); } }
@@ -293,17 +306,27 @@ void hugetlb_vma_unlock_write(struct vm_ struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
up_write(&vma_lock->rw_sema); + } else if (__vma_private_lock(vma)) { + struct resv_map *resv_map = vma_resv_map(vma); + + up_write(&resv_map->rw_sema); } }
int hugetlb_vma_trylock_write(struct vm_area_struct *vma) { - struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
- if (!__vma_shareable_lock(vma)) - return 1; + if (__vma_shareable_lock(vma)) { + struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
- return down_write_trylock(&vma_lock->rw_sema); + return down_write_trylock(&vma_lock->rw_sema); + } else if (__vma_private_lock(vma)) { + struct resv_map *resv_map = vma_resv_map(vma); + + return down_write_trylock(&resv_map->rw_sema); + } + + return 1; }
void hugetlb_vma_assert_locked(struct vm_area_struct *vma) @@ -312,6 +335,10 @@ void hugetlb_vma_assert_locked(struct vm struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
lockdep_assert_held(&vma_lock->rw_sema); + } else if (__vma_private_lock(vma)) { + struct resv_map *resv_map = vma_resv_map(vma); + + lockdep_assert_held(&resv_map->rw_sema); } }
@@ -344,6 +371,11 @@ static void __hugetlb_vma_unlock_write_f struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
__hugetlb_vma_unlock_write_put(vma_lock); + } else if (__vma_private_lock(vma)) { + struct resv_map *resv_map = vma_resv_map(vma); + + /* no free for anon vmas, but still need to unlock */ + up_write(&resv_map->rw_sema); } }
@@ -1062,6 +1094,7 @@ struct resv_map *resv_map_alloc(void) kref_init(&resv_map->refs); spin_lock_init(&resv_map->lock); INIT_LIST_HEAD(&resv_map->regions); + init_rwsem(&resv_map->rw_sema);
resv_map->adds_in_progress = 0; /*