vma_*_reservation functions rely on vma_resv_map(), which assumes on a hugetlbfs concept of the resv_map being stored in a specific field of the inode.
This refactor enables vma_*_reservation functions, now renamed resv_map_*_reservation, to be used with non-hugetlbfs filesystems, further decoupling hugetlb from hugetlbfs.
Signed-off-by: Ackerley Tng ackerleytng@google.com --- mm/hugetlb.c | 184 +++++++++++++++++++++++++++------------------------ 1 file changed, 99 insertions(+), 85 deletions(-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d16c6417b90f..d943f83d15a9 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2643,89 +2643,81 @@ static void return_unused_surplus_pages(struct hstate *h,
/* - * vma_needs_reservation, vma_commit_reservation and vma_end_reservation - * are used by the huge page allocation routines to manage reservations. + * resv_map_needs_reservation, resv_map_commit_reservation and + * resv_map_end_reservation are used by the huge page allocation routines to + * manage reservations. * - * vma_needs_reservation is called to determine if the huge page at addr - * within the vma has an associated reservation. If a reservation is - * needed, the value 1 is returned. The caller is then responsible for - * managing the global reservation and subpool usage counts. After - * the huge page has been allocated, vma_commit_reservation is called - * to add the page to the reservation map. If the page allocation fails, - * the reservation must be ended instead of committed. vma_end_reservation - * is called in such cases. + * resv_map_needs_reservation is called to determine if the huge page at addr + * within the vma has an associated reservation. If a reservation is needed, + * the value 1 is returned. The caller is then responsible for managing the + * global reservation and subpool usage counts. After the huge page has been + * allocated, resv_map_commit_reservation is called to add the page to the + * reservation map. If the page allocation fails, the reservation must be ended + * instead of committed. resv_map_end_reservation is called in such cases. * - * In the normal case, vma_commit_reservation returns the same value - * as the preceding vma_needs_reservation call. The only time this - * is not the case is if a reserve map was changed between calls. It - * is the responsibility of the caller to notice the difference and - * take appropriate action. + * In the normal case, resv_map_commit_reservation returns the same value as the + * preceding resv_map_needs_reservation call. The only time this is not the + * case is if a reserve map was changed between calls. It is the responsibility + * of the caller to notice the difference and take appropriate action. * - * vma_add_reservation is used in error paths where a reservation must - * be restored when a newly allocated huge page must be freed. It is - * to be called after calling vma_needs_reservation to determine if a - * reservation exists. + * resv_map_add_reservation is used in error paths where a reservation must be + * restored when a newly allocated huge page must be freed. It is to be called + * after calling resv_map_needs_reservation to determine if a reservation + * exists. * - * vma_del_reservation is used in error paths where an entry in the reserve - * map was created during huge page allocation and must be removed. It is to - * be called after calling vma_needs_reservation to determine if a reservation + * resv_map_del_reservation is used in error paths where an entry in the reserve + * map was created during huge page allocation and must be removed. It is to be + * called after calling resv_map_needs_reservation to determine if a reservation * exists. */ -enum vma_resv_mode { - VMA_NEEDS_RESV, - VMA_COMMIT_RESV, - VMA_END_RESV, - VMA_ADD_RESV, - VMA_DEL_RESV, +enum resv_map_resv_mode { + RESV_MAP_NEEDS_RESV, + RESV_MAP_COMMIT_RESV, + RESV_MAP_END_RESV, + RESV_MAP_ADD_RESV, + RESV_MAP_DEL_RESV, }; -static long __vma_reservation_common(struct hstate *h, - struct vm_area_struct *vma, unsigned long addr, - enum vma_resv_mode mode) +static long __resv_map_reservation_common(struct resv_map *resv, pgoff_t resv_index, + bool may_be_shared_mapping, + enum resv_map_resv_mode mode) { - struct resv_map *resv; - pgoff_t idx; long ret; long dummy_out_regions_needed;
- resv = vma_resv_map(vma); - if (!resv) - return 1; - - idx = vma_hugecache_offset(h, vma, addr); switch (mode) { - case VMA_NEEDS_RESV: - ret = region_chg(resv, idx, idx + 1, &dummy_out_regions_needed); + case RESV_MAP_NEEDS_RESV: + ret = region_chg(resv, resv_index, resv_index + 1, &dummy_out_regions_needed); /* We assume that vma_reservation_* routines always operate on * 1 page, and that adding to resv map a 1 page entry can only * ever require 1 region. */ VM_BUG_ON(dummy_out_regions_needed != 1); break; - case VMA_COMMIT_RESV: - ret = region_add(resv, idx, idx + 1, 1, NULL, NULL); + case RESV_MAP_COMMIT_RESV: + ret = region_add(resv, resv_index, resv_index + 1, 1, NULL, NULL); /* region_add calls of range 1 should never fail. */ VM_BUG_ON(ret < 0); break; - case VMA_END_RESV: - region_abort(resv, idx, idx + 1, 1); + case RESV_MAP_END_RESV: + region_abort(resv, resv_index, resv_index + 1, 1); ret = 0; break; - case VMA_ADD_RESV: - if (vma->vm_flags & VM_MAYSHARE) { - ret = region_add(resv, idx, idx + 1, 1, NULL, NULL); + case RESV_MAP_ADD_RESV: + if (may_be_shared_mapping) { + ret = region_add(resv, resv_index, resv_index + 1, 1, NULL, NULL); /* region_add calls of range 1 should never fail. */ VM_BUG_ON(ret < 0); } else { - region_abort(resv, idx, idx + 1, 1); - ret = region_del(resv, idx, idx + 1); + region_abort(resv, resv_index, resv_index + 1, 1); + ret = region_del(resv, resv_index, resv_index + 1); } break; - case VMA_DEL_RESV: - if (vma->vm_flags & VM_MAYSHARE) { - region_abort(resv, idx, idx + 1, 1); - ret = region_del(resv, idx, idx + 1); + case RESV_MAP_DEL_RESV: + if (may_be_shared_mapping) { + region_abort(resv, resv_index, resv_index + 1, 1); + ret = region_del(resv, resv_index, resv_index + 1); } else { - ret = region_add(resv, idx, idx + 1, 1, NULL, NULL); + ret = region_add(resv, resv_index, resv_index + 1, 1, NULL, NULL); /* region_add calls of range 1 should never fail. */ VM_BUG_ON(ret < 0); } @@ -2734,7 +2726,7 @@ static long __vma_reservation_common(struct hstate *h, BUG(); }
- if (vma->vm_flags & VM_MAYSHARE || mode == VMA_DEL_RESV) + if (may_be_shared_mapping || mode == RESV_MAP_DEL_RESV) return ret; /* * We know private mapping must have HPAGE_RESV_OWNER set. @@ -2758,34 +2750,39 @@ static long __vma_reservation_common(struct hstate *h, return ret; }
-static long vma_needs_reservation(struct hstate *h, - struct vm_area_struct *vma, unsigned long addr) +static long resv_map_needs_reservation(struct resv_map *resv, pgoff_t resv_index, + bool may_be_shared_mapping) { - return __vma_reservation_common(h, vma, addr, VMA_NEEDS_RESV); + return __resv_map_reservation_common( + resv, resv_index, may_be_shared_mapping, RESV_MAP_NEEDS_RESV); }
-static long vma_commit_reservation(struct hstate *h, - struct vm_area_struct *vma, unsigned long addr) +static long resv_map_commit_reservation(struct resv_map *resv, pgoff_t resv_index, + bool may_be_shared_mapping) { - return __vma_reservation_common(h, vma, addr, VMA_COMMIT_RESV); + return __resv_map_reservation_common( + resv, resv_index, may_be_shared_mapping, RESV_MAP_COMMIT_RESV); }
-static void vma_end_reservation(struct hstate *h, - struct vm_area_struct *vma, unsigned long addr) +static void resv_map_end_reservation(struct resv_map *resv, pgoff_t resv_index, + bool may_be_shared_mapping) { - (void)__vma_reservation_common(h, vma, addr, VMA_END_RESV); + (void)__resv_map_reservation_common( + resv, resv_index, may_be_shared_mapping, RESV_MAP_END_RESV); }
-static long vma_add_reservation(struct hstate *h, - struct vm_area_struct *vma, unsigned long addr) +static long resv_map_add_reservation(struct resv_map *resv, pgoff_t resv_index, + bool may_be_shared_mapping) { - return __vma_reservation_common(h, vma, addr, VMA_ADD_RESV); + return __resv_map_reservation_common( + resv, resv_index, may_be_shared_mapping, RESV_MAP_ADD_RESV); }
-static long vma_del_reservation(struct hstate *h, - struct vm_area_struct *vma, unsigned long addr) +static long resv_map_del_reservation(struct resv_map *resv, pgoff_t resv_index, + bool may_be_shared_mapping) { - return __vma_reservation_common(h, vma, addr, VMA_DEL_RESV); + return __resv_map_reservation_common( + resv, resv_index, may_be_shared_mapping, RESV_MAP_DEL_RESV); }
/* @@ -2811,7 +2808,12 @@ static long vma_del_reservation(struct hstate *h, void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, unsigned long address, struct folio *folio) { - long rc = vma_needs_reservation(h, vma, address); + long rc; + struct resv_map *resv = vma_resv_map(vma); + pgoff_t resv_index = vma_hugecache_offset(h, vma, address); + bool may_share = vma->vm_flags & VM_MAYSHARE; + + rc = resv_map_needs_reservation(resv, resv_index, may_share);
if (folio_test_hugetlb_restore_reserve(folio)) { if (unlikely(rc < 0)) @@ -2828,9 +2830,9 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, */ folio_clear_hugetlb_restore_reserve(folio); else if (rc) - (void)vma_add_reservation(h, vma, address); + (void)resv_map_add_reservation(resv, resv_index, may_share); else - vma_end_reservation(h, vma, address); + resv_map_end_reservation(resv, resv_index, may_share); } else { if (!rc) { /* @@ -2841,7 +2843,7 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, * Remove the entry so that a subsequent allocation * does not consume a reservation. */ - rc = vma_del_reservation(h, vma, address); + rc = resv_map_del_reservation(resv, resv_index, may_share); if (rc < 0) /* * VERY rare out of memory condition. Since @@ -2855,7 +2857,7 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, } else if (rc < 0) { /* * Rare out of memory condition from - * vma_needs_reservation call. Memory allocation is + * resv_map_needs_reservation call. Memory allocation is * only attempted if a new entry is needed. Therefore, * this implies there is not an entry in the * reserve map. @@ -2877,7 +2879,7 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, /* * No reservation present, do nothing */ - vma_end_reservation(h, vma, address); + resv_map_end_reservation(resv, resv_index, may_share); } }
@@ -3019,13 +3021,17 @@ struct folio *alloc_hugetlb_folio_from_subpool( struct hugetlb_cgroup *h_cg = NULL; bool deferred_reserve;
+ struct resv_map *resv = vma_resv_map(vma); + pgoff_t resv_index = vma_hugecache_offset(h, vma, addr); + bool may_share = vma->vm_flags & VM_MAYSHARE; + idx = hstate_index(h); /* * Examine the region/reserve map to determine if the process * has a reservation for the page to be allocated. A return * code of zero indicates a reservation exists (no change). */ - map_chg = gbl_chg = vma_needs_reservation(h, vma, addr); + map_chg = gbl_chg = resv_map_needs_reservation(resv, resv_index, may_share); if (map_chg < 0) return ERR_PTR(-ENOMEM);
@@ -3039,7 +3045,7 @@ struct folio *alloc_hugetlb_folio_from_subpool( if (map_chg || avoid_reserve) { gbl_chg = hugepage_subpool_get_pages(spool, 1); if (gbl_chg < 0) { - vma_end_reservation(h, vma, addr); + resv_map_end_reservation(resv, resv_index, may_share); return ERR_PTR(-ENOSPC); }
@@ -3104,11 +3110,11 @@ struct folio *alloc_hugetlb_folio_from_subpool(
hugetlb_set_folio_subpool(folio, spool);
- map_commit = vma_commit_reservation(h, vma, addr); + map_commit = resv_map_commit_reservation(resv, resv_index, may_share); if (unlikely(map_chg > map_commit)) { /* * The page was added to the reservation map between - * vma_needs_reservation and vma_commit_reservation. + * resv_map_needs_reservation and resv_map_commit_reservation. * This indicates a race with hugetlb_reserve_pages. * Adjust for the subpool count incremented above AND * in hugetlb_reserve_pages for the same page. Also, @@ -3134,7 +3140,7 @@ struct folio *alloc_hugetlb_folio_from_subpool( out_subpool_put: if (map_chg || avoid_reserve) hugepage_subpool_put_pages(spool, 1); - vma_end_reservation(h, vma, addr); + resv_map_end_reservation(resv, resv_index, may_share); return ERR_PTR(-ENOSPC); }
@@ -5901,12 +5907,16 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, * the spinlock. */ if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) { - if (vma_needs_reservation(h, vma, haddr) < 0) { + struct resv_map *resv = vma_resv_map(vma); + pgoff_t resv_index = vma_hugecache_offset(h, vma, address); + bool may_share = vma->vm_flags & VM_MAYSHARE; + + if (resv_map_needs_reservation(resv, resv_index, may_share) < 0) { ret = VM_FAULT_OOM; goto backout_unlocked; } /* Just decrements count, does not deallocate */ - vma_end_reservation(h, vma, haddr); + resv_map_end_reservation(resv, resv_index, may_share); }
ptl = huge_pte_lock(h, mm, ptep); @@ -6070,12 +6080,16 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, */ if ((flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) && !(vma->vm_flags & VM_MAYSHARE) && !huge_pte_write(entry)) { - if (vma_needs_reservation(h, vma, haddr) < 0) { + struct resv_map *resv = vma_resv_map(vma); + pgoff_t resv_index = vma_hugecache_offset(h, vma, address); + bool may_share = vma->vm_flags & VM_MAYSHARE; + + if (resv_map_needs_reservation(resv, resv_index, may_share) < 0) { ret = VM_FAULT_OOM; goto out_mutex; } /* Just decrements count, does not deallocate */ - vma_end_reservation(h, vma, haddr); + resv_map_end_reservation(resv, resv_index, may_share);
pagecache_folio = filemap_lock_folio(mapping, idx); }