Patch series "mm/mremap: permit mremap() move of multiple VMAs".
Historically we've made it a uAPI requirement that mremap() may only operate on a single VMA at a time.
For instances where VMAs need to be resized, this makes sense, as it becomes very difficult to determine what a user actually wants should they indicate a desire to expand or shrink the size of multiple VMAs (truncate? Adjust sizes individually? Some other strategy?).
However, in instances where a user is moving VMAs, it is restrictive to disallow this.
This is especially the case when anonymous mapping remap may or may not be mergeable depending on whether VMAs have or have not been faulted due to anon_vma assignment and folio index alignment with vma->vm_pgoff.
Often this can result in surprising impact where a moved region is faulted, then moved back and a user fails to observe a merge from otherwise compatible, adjacent VMAs.
This change allows such cases to work without the user having to be cognizant of whether a prior mremap() move or other VMA operations has resulted in VMA fragmentation.
In order to do this, this series performs a large amount of refactoring, most pertinently - grouping sanity checks together, separately those that check input parameters and those relating to VMAs.
We also simplify the post-mmap lock drop processing for uffd and mlock()'d VMAs.
With this done, we can then fairly straightforwardly implement this functionality.
This works exclusively for mremap() invocations which specify MREMAP_FIXED. It is not compatible with VMAs which use userfaultfd, as the notification of the userland fault handler would require us to drop the mmap lock.
The input and output addresses ranges must not overlap. We carefully account for moves which would result in VMA merges or would otherwise result in VMA iterator invalidation.
This patch (of 10):
We const-ify the vrm flags parameter to indicate this will never change.
We rename resize_is_valid() to remap_is_valid(), as this function does not only apply to cases where we resize, so it's simply confusing to refer to that here.
We remove the BUG() from mremap_at(), as we should not BUG() unless we are certain it'll result in system instability.
We rename vrm_charge() to vrm_calc_charge() to make it clear this simply calculates the charged number of pages rather than actually adjusting any state.
We update the comment for vrm_implies_new_addr() to explain that MREMAP_DONTUNMAP does not require a set address, but will always be moved.
Additionally consistently use 'res' rather than 'ret' for result values.
No functional change intended.
Signed-off-by: Lorenzo Stoakes lorenzo.stoakes@oracle.com Reviewed-by: Vlastimil Babka vbabka@suse.cz --- mm/mremap.c | 55 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 23 deletions(-)
diff --git a/mm/mremap.c b/mm/mremap.c index 36585041c760..1815095c4bca 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -52,7 +52,7 @@ struct vma_remap_struct { unsigned long addr; /* User-specified address from which we remap. */ unsigned long old_len; /* Length of range being remapped. */ unsigned long new_len; /* Desired new length of mapping. */ - unsigned long flags; /* user-specified MREMAP_* flags. */ + const unsigned long flags; /* user-specified MREMAP_* flags. */ unsigned long new_addr; /* Optionally, desired new address. */
/* uffd state. */ @@ -911,7 +911,11 @@ static bool vrm_overlaps(struct vma_remap_struct *vrm) return false; }
-/* Do the mremap() flags require that the new_addr parameter be specified? */ +/* + * Will a new address definitely be assigned? This either if the user specifies + * it via MREMAP_FIXED, or if MREMAP_DONTUNMAP is used, indicating we will + * always detemrine a target address. + */ static bool vrm_implies_new_addr(struct vma_remap_struct *vrm) { return vrm->flags & (MREMAP_FIXED | MREMAP_DONTUNMAP); @@ -957,7 +961,7 @@ static unsigned long vrm_set_new_addr(struct vma_remap_struct *vrm) * * Returns true on success, false if insufficient memory to charge. */ -static bool vrm_charge(struct vma_remap_struct *vrm) +static bool vrm_calc_charge(struct vma_remap_struct *vrm) { unsigned long charged;
@@ -1262,8 +1266,11 @@ static unsigned long move_vma(struct vma_remap_struct *vrm) if (err) return err;
- /* If accounted, charge the number of bytes the operation will use. */ - if (!vrm_charge(vrm)) + /* + * If accounted, determine the number of bytes the operation will + * charge. + */ + if (!vrm_calc_charge(vrm)) return -ENOMEM;
/* We don't want racing faults. */ @@ -1302,12 +1309,12 @@ static unsigned long move_vma(struct vma_remap_struct *vrm) }
/* - * resize_is_valid() - Ensure the vma can be resized to the new length at the give - * address. + * remap_is_valid() - Ensure the VMA can be moved or resized to the new length, + * at the given address. * * Return 0 on success, error otherwise. */ -static int resize_is_valid(struct vma_remap_struct *vrm) +static int remap_is_valid(struct vma_remap_struct *vrm) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma = vrm->vma; @@ -1446,7 +1453,7 @@ static unsigned long mremap_to(struct vma_remap_struct *vrm) vrm->old_len = vrm->new_len; }
- err = resize_is_valid(vrm); + err = remap_is_valid(vrm); if (err) return err;
@@ -1571,7 +1578,7 @@ static unsigned long expand_vma_in_place(struct vma_remap_struct *vrm) struct vm_area_struct *vma = vrm->vma; VMA_ITERATOR(vmi, mm, vma->vm_end);
- if (!vrm_charge(vrm)) + if (!vrm_calc_charge(vrm)) return -ENOMEM;
/* @@ -1632,7 +1639,7 @@ static unsigned long expand_vma(struct vma_remap_struct *vrm) unsigned long err; unsigned long addr = vrm->addr;
- err = resize_is_valid(vrm); + err = remap_is_valid(vrm); if (err) return err;
@@ -1705,18 +1712,20 @@ static unsigned long mremap_at(struct vma_remap_struct *vrm) return expand_vma(vrm); }
- BUG(); + /* Should not be possible. */ + WARN_ON_ONCE(1); + return -EINVAL; }
static unsigned long do_mremap(struct vma_remap_struct *vrm) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - unsigned long ret; + unsigned long res;
- ret = check_mremap_params(vrm); - if (ret) - return ret; + res = check_mremap_params(vrm); + if (res) + return res;
vrm->old_len = PAGE_ALIGN(vrm->old_len); vrm->new_len = PAGE_ALIGN(vrm->new_len); @@ -1728,41 +1737,41 @@ static unsigned long do_mremap(struct vma_remap_struct *vrm)
vma = vrm->vma = vma_lookup(mm, vrm->addr); if (!vma) { - ret = -EFAULT; + res = -EFAULT; goto out; }
/* If mseal()'d, mremap() is prohibited. */ if (!can_modify_vma(vma)) { - ret = -EPERM; + res = -EPERM; goto out; }
/* Align to hugetlb page size, if required. */ if (is_vm_hugetlb_page(vma) && !align_hugetlb(vrm)) { - ret = -EINVAL; + res = -EINVAL; goto out; }
vrm->remap_type = vrm_remap_type(vrm);
/* Actually execute mremap. */ - ret = vrm_implies_new_addr(vrm) ? mremap_to(vrm) : mremap_at(vrm); + res = vrm_implies_new_addr(vrm) ? mremap_to(vrm) : mremap_at(vrm);
out: if (vrm->mmap_locked) { mmap_write_unlock(mm); vrm->mmap_locked = false;
- if (!offset_in_page(ret) && vrm->mlocked && vrm->new_len > vrm->old_len) + if (!offset_in_page(res) && vrm->mlocked && vrm->new_len > vrm->old_len) mm_populate(vrm->new_addr + vrm->old_len, vrm->delta); }
userfaultfd_unmap_complete(mm, vrm->uf_unmap_early); - mremap_userfaultfd_complete(vrm->uf, vrm->addr, ret, vrm->old_len); + mremap_userfaultfd_complete(vrm->uf, vrm->addr, res, vrm->old_len); userfaultfd_unmap_complete(mm, vrm->uf_unmap);
- return ret; + return res; }
/*