[Linaro-mm-sig] Re: [PATCH v3 1/2] drm/amdgpu: convert amdgpu_vm_lock_by_pasid() to drm_exec

21 May 2026

On 5/20/26 17:17, Mikhail Gavrilov wrote:
...
amdgpu_vm_lock_by_pasid() looks up a VM by PASID and reserves its root
PD with a bare amdgpu_bo_reserve(), returning the still-reserved root to
the caller. A caller that then needs to reserve further BOs (for example
the devcoredump IB dump) ends up nesting reservation_ww_class_mutex
acquires without a ww_acquire_ctx, which lockdep flags as recursive
locking.
Convert the helper to take a drm_exec context and lock the root PD via
amdgpu_vm_lock_pd() instead. Callers now run it inside a
drm_exec_until_all_locked() loop and can lock additional BOs in the same
ww ticket, so there is no nested ww_mutex acquire.
The only existing caller, amdgpu_vm_handle_fault(), is updated
accordingly. Its is_compute_context path, which previously dropped the
root reservation around svm_range_restore_pages() and re-took it, now
finalises the drm_exec context and re-initialises a fresh one; behaviour
is otherwise unchanged.
No functional change intended for the page-fault path.
Signed-off-by: Mikhail Gavrilov mikhail.v.gavrilov@gmail.com
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 72 ++++++++++++++++++--------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  3 +-
 2 files changed, 51 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9ba9de16a27a..3a22670b733f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2950,14 +2950,22 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 }
 
 /**


amdgpu_vm_lock_by_pasid - return an amdgpu_vm and its root bo from a pasid, if possible.





amdgpu_vm_lock_by_pasid - look up a VM by PASID and lock its root PD
@adev: amdgpu device pointer





@root: root BO of the VM





@root: out: reference to the VM's root BO, dropped by the caller
@pasid: PASID of the VM





The caller needs to unreserve and unref the root bo on success.





@exec: drm_exec context to lock the root PD in







Must be called from within a drm_exec_until_all_locked() loop; the caller



runs drm_exec_retry_on_contention() afterwards and drops the *root



reference once the drm_exec context is finalised.







Return: the VM on success, or NULL if the PASID has no VM, the VM is being



torn down, or locking the root PD failed.

*/

struct amdgpu_vm *amdgpu_vm_lock_by_pasid(struct amdgpu_device *adev,

			  struct amdgpu_bo **root, u32 pasid)




			  struct amdgpu_bo **root, u32 pasid,



I think we can drop the root parameter now, the exec reference should be sufficient.
...

			  struct drm_exec *exec)



{
   unsigned long irqflags;
   struct amdgpu_vm *vm;
@@ -2971,9 +2979,11 @@ struct amdgpu_vm *amdgpu_vm_lock_by_pasid(struct amdgpu_device *adev,
   if (!*root)
   	return NULL;

r = amdgpu_bo_reserve(*root, true);
if (r)
goto error_unref;




r = amdgpu_vm_lock_pd(vm, exec, 0);

amdgpu_vm_lock_pd() can't be used here since we can't gurantee that the VM pointer wouldn't go away.
Just do:
r = drm_exec_lock_obj(exec, root->tbo.base);
...

if (r) {
amdgpu_bo_unref(root);


return NULL;


}

/* Double check that the VM still exists */
   xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
@@ -2981,16 +2991,12 @@ struct amdgpu_vm *amdgpu_vm_lock_by_pasid(struct amdgpu_device *adev,
   if (vm && vm->root.bo != *root)
   	vm = NULL;
   xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);

if (!vm)
goto error_unlock;




if (!vm) {

We should cleanup with drm_exec_unlock_obj() here, same as it was before.
...

amdgpu_bo_unref(root);


return NULL;


}

return vm;
We can drop the extra reference on the root BO before returning the VM now since the drm_exec object holds one as well.
Apart from that this looks like a really nice cleanup to me.
Thanks,
Christian.
...
-error_unlock:

amdgpu_bo_unreserve(*root);


-error_unref:

amdgpu_bo_unref(root);
return NULL;

}
 
 /**
@@ -3013,20 +3019,32 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
 {
   bool is_compute_context = false;
   struct amdgpu_bo *root;

struct drm_exec exec;
uint64_t value, flags;
struct amdgpu_vm *vm;
int r;


vm = amdgpu_vm_lock_by_pasid(adev, &root, pasid);
if (!vm)


drm_exec_init(&exec, 0, 0);
drm_exec_until_all_locked(&exec) {
vm = amdgpu_vm_lock_by_pasid(adev, &root, pasid, &exec);


drm_exec_retry_on_contention(&exec);


if (!vm)


	break;


}
if (!vm) {
drm_exec_fini(&exec);

return false;
}

is_compute_context = vm->is_compute_context;
 
   if (is_compute_context) {

/* Unreserve root since svm_range_restore_pages might try to reserve it. */


/* TODO: rework svm_range_restore_pages so that this isn't necessary. */


amdgpu_bo_unreserve(root);




/* Release the root PD lock since svm_range_restore_pages


 * might try to take it.


 * TODO: rework svm_range_restore_pages so that this isn't


 * necessary.


 */


drm_exec_fini(&exec);



if (!svm_range_restore_pages(adev, pasid, vmid,
   				     node_id, addr >> PAGE_SHIFT, ts, write_fault)) {
@@ -3036,9 +3054,17 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
   	amdgpu_bo_unref(&root);
 
   	/* Re-acquire the VM lock, could be that the VM was freed in between. */

vm = amdgpu_vm_lock_by_pasid(adev, &root, pasid);


if (!vm)




drm_exec_init(&exec, 0, 0);


drm_exec_until_all_locked(&exec) {


	vm = amdgpu_vm_lock_by_pasid(adev, &root, pasid, &exec);


	drm_exec_retry_on_contention(&exec);


	if (!vm)


		break;


}


if (!vm) {


	drm_exec_fini(&exec);
return false;


}

}

addr /= AMDGPU_GPU_PAGE_SIZE;
@@ -3076,7 +3102,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
   r = amdgpu_vm_update_pdes(adev, vm, true);
 
 error_unlock:

amdgpu_bo_unreserve(root);


drm_exec_fini(&exec);
if (r < 0)
dev_err(adev->dev, "Can't handle page fault (%d)\n", r);

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index d083d7aab75c..af292c2fc521 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -593,7 +593,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
   		    bool write_fault);
 
 struct amdgpu_vm *amdgpu_vm_lock_by_pasid(struct amdgpu_device *adev,

			  struct amdgpu_bo **root, u32 pasid);




			  struct amdgpu_bo **root, u32 pasid,


			  struct drm_exec *exec);



void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);

    

2026

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

[Linaro-mm-sig] Re: [PATCH v3 1/2] drm/amdgpu: convert amdgpu_vm_lock_by_pasid() to drm_exec

Signed-off-by: Mikhail Gavrilov mikhail.v.gavrilov@gmail.com