Right now it appears that the code is relying upon the returned destination address having bits outside PAGE_MASK to indicate whether an error value is specified, and decrementing the increased refcount on the uffd ctx if so.
This is not a safe means of determining an error value, so instead, be specific. It makes far more sense to do so in a dedicated error path, so add mremap_userfaultfd_fail() for this purpose and use this when an error arises.
A vm_userfaultfd_ctx is not established until we are at the point where mremap_userfaultfd_prep() is invoked in copy_vma_and_data(), so this is a no-op until this happens.
That is - uffd remap notification only occurs if the VMA is actually moved - at which point a UFFD_EVENT_REMAP event is raised.
No errors can occur after this point currently, though it's certainly not guaranteed this will always remain the case, and we mustn't rely on this.
However, the reason for needing to handle this case is that, when an error arises on a VMA move at the point of adjusting page tables, we revert this operation, and propagate the error.
At this point, it is not correct to raise a uffd remap event, and we must handle it.
This refactoring makes it abundantly clear what we are doing.
We assume vrm->new_addr is always valid, which a prior change made the case even for mremap() invocations which don't move the VMA, however given no uffd context would be set up in this case it's immaterial to this change anyway.
No functional change intended.
Signed-off-by: Lorenzo Stoakes lorenzo.stoakes@oracle.com --- fs/userfaultfd.c | 15 ++++++++++----- include/linux/userfaultfd_k.h | 1 + mm/mremap.c | 16 ++++++++++++---- 3 files changed, 23 insertions(+), 9 deletions(-)
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 2a644aa1a510..54c6cc7fe9c6 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -750,11 +750,6 @@ void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *vm_ctx, if (!ctx) return;
- if (to & ~PAGE_MASK) { - userfaultfd_ctx_put(ctx); - return; - } - msg_init(&ewq.msg);
ewq.msg.event = UFFD_EVENT_REMAP; @@ -765,6 +760,16 @@ void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *vm_ctx, userfaultfd_event_wait_completion(ctx, &ewq); }
+void mremap_userfaultfd_fail(struct vm_userfaultfd_ctx *vm_ctx) +{ + struct userfaultfd_ctx *ctx = vm_ctx->ctx; + + if (!ctx) + return; + + userfaultfd_ctx_put(ctx); +} + bool userfaultfd_remove(struct vm_area_struct *vma, unsigned long start, unsigned long end) { diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index df85330bcfa6..6680a4de40b3 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -259,6 +259,7 @@ extern void mremap_userfaultfd_prep(struct vm_area_struct *, extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *, unsigned long from, unsigned long to, unsigned long len); +void mremap_userfaultfd_fail(struct vm_userfaultfd_ctx *);
extern bool userfaultfd_remove(struct vm_area_struct *vma, unsigned long start, diff --git a/mm/mremap.c b/mm/mremap.c index 660bdb75e2f9..db7e773d0884 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -1729,12 +1729,17 @@ static int check_prep_vma(struct vma_remap_struct *vrm) return 0; }
-static void notify_uffd(struct vma_remap_struct *vrm, unsigned long ret) +static void notify_uffd(struct vma_remap_struct *vrm, bool failed) { struct mm_struct *mm = current->mm;
+ /* Regardless of success/failure, we always notify of any unmaps. */ userfaultfd_unmap_complete(mm, vrm->uf_unmap_early); - mremap_userfaultfd_complete(vrm->uf, vrm->addr, ret, vrm->old_len); + if (failed) + mremap_userfaultfd_fail(vrm->uf); + else + mremap_userfaultfd_complete(vrm->uf, vrm->addr, + vrm->new_addr, vrm->old_len); userfaultfd_unmap_complete(mm, vrm->uf_unmap); }
@@ -1742,6 +1747,7 @@ static unsigned long do_mremap(struct vma_remap_struct *vrm) { struct mm_struct *mm = current->mm; unsigned long res; + bool failed;
vrm->old_len = PAGE_ALIGN(vrm->old_len); vrm->new_len = PAGE_ALIGN(vrm->new_len); @@ -1763,13 +1769,15 @@ static unsigned long do_mremap(struct vma_remap_struct *vrm) res = vrm_implies_new_addr(vrm) ? mremap_to(vrm) : mremap_at(vrm);
out: + failed = IS_ERR_VALUE(res); + if (vrm->mmap_locked) mmap_write_unlock(mm);
- if (!IS_ERR_VALUE(res) && vrm->mlocked && vrm->new_len > vrm->old_len) + if (!failed && vrm->mlocked && vrm->new_len > vrm->old_len) mm_populate(vrm->new_addr + vrm->old_len, vrm->delta);
- notify_uffd(vrm, res); + notify_uffd(vrm, failed); return res; }