On Fri, Aug 08, 2025 at 08:28:49AM -0700, Suren Baghdasaryan wrote:
Utilize per-vma locks to stabilize vma after lookup without taking mmap_lock during PROCMAP_QUERY ioctl execution. If vma lock is contended, we fall back to mmap_lock but take it only momentarily to lock the vma and release the mmap_lock. In a very unlikely case of vm_refcnt overflow, this fall back path will fail and ioctl is done under mmap_lock protection.
This change is designed to reduce mmap_lock contention and prevent PROCMAP_QUERY ioctl calls from blocking address space updates.
Signed-off-by: Suren Baghdasaryan surenb@google.com Acked-by: SeongJae Park sj@kernel.org
All LGTM, so:
Reviewed-by: Lorenzo Stoakes lorenzo.stoakes@oracle.com
fs/proc/task_mmu.c | 103 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 85 insertions(+), 18 deletions(-)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index c0968d293b61..e64cf40ce9c4 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -132,6 +132,12 @@ static void release_task_mempolicy(struct proc_maps_private *priv)
#ifdef CONFIG_PER_VMA_LOCK
+static void reset_lock_ctx(struct proc_maps_locking_ctx *lock_ctx) +{
- lock_ctx->locked_vma = NULL;
- lock_ctx->mmap_locked = false;
+}
Thanks!
static void unlock_ctx_vma(struct proc_maps_locking_ctx *lock_ctx) { if (lock_ctx->locked_vma) { @@ -157,8 +163,7 @@ static inline bool lock_vma_range(struct seq_file *m, lock_ctx->mmap_locked = true; } else { rcu_read_lock();
lock_ctx->locked_vma = NULL;
lock_ctx->mmap_locked = false;
reset_lock_ctx(lock_ctx);
}
return true;
@@ -522,28 +527,90 @@ static int pid_maps_open(struct inode *inode, struct file *file) PROCMAP_QUERY_VMA_FLAGS \ )
-static int query_vma_setup(struct mm_struct *mm) +#ifdef CONFIG_PER_VMA_LOCK
+static int query_vma_setup(struct proc_maps_locking_ctx *lock_ctx) {
- return mmap_read_lock_killable(mm);
- reset_lock_ctx(lock_ctx);
- return 0;
}
-static void query_vma_teardown(struct mm_struct *mm, struct vm_area_struct *vma) +static void query_vma_teardown(struct proc_maps_locking_ctx *lock_ctx) {
- mmap_read_unlock(mm);
- if (lock_ctx->mmap_locked) {
mmap_read_unlock(lock_ctx->mm);
lock_ctx->mmap_locked = false;
- } else {
unlock_ctx_vma(lock_ctx);
- }
+}
+static struct vm_area_struct *query_vma_find_by_addr(struct proc_maps_locking_ctx *lock_ctx,
unsigned long addr)
+{
- struct mm_struct *mm = lock_ctx->mm;
- struct vm_area_struct *vma;
- struct vma_iterator vmi;
- if (lock_ctx->mmap_locked)
return find_vma(mm, addr);
- /* Unlock previously locked VMA and find the next one under RCU */
- unlock_ctx_vma(lock_ctx);
- rcu_read_lock();
- vma_iter_init(&vmi, mm, addr);
- vma = lock_next_vma(mm, &vmi, addr);
- rcu_read_unlock();
- if (!vma)
return NULL;
- if (!IS_ERR(vma)) {
lock_ctx->locked_vma = vma;
return vma;
- }
- if (PTR_ERR(vma) == -EAGAIN) {
/* Fallback to mmap_lock on vma->vm_refcnt overflow */
mmap_read_lock(mm);
vma = find_vma(mm, addr);
lock_ctx->mmap_locked = true;
- }
Thanks this is great!
- return vma;
}
-static struct vm_area_struct *query_vma_find_by_addr(struct mm_struct *mm, unsigned long addr) +#else /* CONFIG_PER_VMA_LOCK */
+static int query_vma_setup(struct proc_maps_locking_ctx *lock_ctx) +{
- return mmap_read_lock_killable(lock_ctx->mm);
+}
+static void query_vma_teardown(struct proc_maps_locking_ctx *lock_ctx) +{
- mmap_read_unlock(lock_ctx->mm);
+}
+static struct vm_area_struct *query_vma_find_by_addr(struct proc_maps_locking_ctx *lock_ctx,
unsigned long addr)
{
- return find_vma(mm, addr);
- return find_vma(lock_ctx->mm, addr);
}
-static struct vm_area_struct *query_matching_vma(struct mm_struct *mm, +#endif /* CONFIG_PER_VMA_LOCK */
+static struct vm_area_struct *query_matching_vma(struct proc_maps_locking_ctx *lock_ctx, unsigned long addr, u32 flags) { struct vm_area_struct *vma;
next_vma:
- vma = query_vma_find_by_addr(mm, addr);
- vma = query_vma_find_by_addr(lock_ctx, addr);
- if (IS_ERR(vma))
return vma;
- if (!vma) goto no_vma;
@@ -584,11 +651,11 @@ static struct vm_area_struct *query_matching_vma(struct mm_struct *mm, return ERR_PTR(-ENOENT); }
-static int do_procmap_query(struct proc_maps_private *priv, void __user *uarg) +static int do_procmap_query(struct mm_struct *mm, void __user *uarg) {
- struct proc_maps_locking_ctx lock_ctx = { .mm = mm }; struct procmap_query karg; struct vm_area_struct *vma;
- struct mm_struct *mm; const char *name = NULL; char build_id_buf[BUILD_ID_SIZE_MAX], *name_buf = NULL; __u64 usize;
@@ -615,17 +682,16 @@ static int do_procmap_query(struct proc_maps_private *priv, void __user *uarg) if (!!karg.build_id_size != !!karg.build_id_addr) return -EINVAL;
mm = priv->lock_ctx.mm; if (!mm || !mmget_not_zero(mm)) return -ESRCH;
err = query_vma_setup(mm);
- err = query_vma_setup(&lock_ctx); if (err) { mmput(mm); return err; }
- vma = query_matching_vma(mm, karg.query_addr, karg.query_flags);
- vma = query_matching_vma(&lock_ctx, karg.query_addr, karg.query_flags); if (IS_ERR(vma)) { err = PTR_ERR(vma); vma = NULL;
@@ -710,7 +776,7 @@ static int do_procmap_query(struct proc_maps_private *priv, void __user *uarg) }
/* unlock vma or mmap_lock, and put mm_struct before copying data to user */
- query_vma_teardown(mm, vma);
query_vma_teardown(&lock_ctx); mmput(mm);
if (karg.vma_name_size && copy_to_user(u64_to_user_ptr(karg.vma_name_addr),
@@ -730,7 +796,7 @@ static int do_procmap_query(struct proc_maps_private *priv, void __user *uarg) return 0;
out:
- query_vma_teardown(mm, vma);
- query_vma_teardown(&lock_ctx); mmput(mm); kfree(name_buf); return err;
@@ -743,7 +809,8 @@ static long procfs_procmap_ioctl(struct file *file, unsigned int cmd, unsigned l
switch (cmd) { case PROCMAP_QUERY:
return do_procmap_query(priv, (void __user *)arg);
/* priv->lock_ctx.mm is set during file open operation */
Thanks!
default: return -ENOIOCTLCMD; }return do_procmap_query(priv->lock_ctx.mm, (void __user *)arg);
-- 2.50.1.703.g449372360f-goog