If the BO has been moved the PT should be updated, otherwise the VAs might point to invalid PT.
This fixes random GPU hangs when replacing sparse mappings from the userspace, while OP_MAP/OP_UNMAP works fine because always valid BOs are correctly handled there.
Cc: stable@vger.kernel.org Signed-off-by: Samuel Pitoiset samuel.pitoiset@gmail.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 12 ++++++++++++ 1 file changed, 12 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 143d11afe0e5..eff73c428b12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1771,18 +1771,30 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
/* Insert partial mapping before the range */ if (!list_empty(&before->list)) { + struct amdgpu_bo *bo = before->bo_va->base.bo; + amdgpu_vm_it_insert(before, &vm->va); if (before->flags & AMDGPU_PTE_PRT) amdgpu_vm_prt_get(adev); + + if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv && + !before->bo_va->base.moved) + amdgpu_vm_bo_moved(&before->bo_va->base); } else { kfree(before); }
/* Insert partial mapping after the range */ if (!list_empty(&after->list)) { + struct amdgpu_bo *bo = after->bo_va->base.bo; + amdgpu_vm_it_insert(after, &vm->va); if (after->flags & AMDGPU_PTE_PRT) amdgpu_vm_prt_get(adev); + + if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv && + !after->bo_va->base.moved) + amdgpu_vm_bo_moved(&after->bo_va->base); } else { kfree(after); }
Am 16.06.23 um 08:27 schrieb Samuel Pitoiset:
If the BO has been moved the PT should be updated, otherwise the VAs might point to invalid PT.
You might want to update this sentence a bit. Something like:
Per VM BOs must be marked as moved or otherwise their ranges are not updated on use which might be necessary when the replace operation splits mappings.
Apart from that really good catch and the patch is Reviewed-by: Christian König christian.koenig@amd.com
Regards, Christian.
This fixes random GPU hangs when replacing sparse mappings from the userspace, while OP_MAP/OP_UNMAP works fine because always valid BOs are correctly handled there.
Cc: stable@vger.kernel.org Signed-off-by: Samuel Pitoiset samuel.pitoiset@gmail.com
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 12 ++++++++++++ 1 file changed, 12 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 143d11afe0e5..eff73c428b12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1771,18 +1771,30 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, /* Insert partial mapping before the range */ if (!list_empty(&before->list)) {
struct amdgpu_bo *bo = before->bo_va->base.bo;
- amdgpu_vm_it_insert(before, &vm->va); if (before->flags & AMDGPU_PTE_PRT) amdgpu_vm_prt_get(adev);
if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
!before->bo_va->base.moved)
} else { kfree(before); }amdgpu_vm_bo_moved(&before->bo_va->base);
/* Insert partial mapping after the range */ if (!list_empty(&after->list)) {
struct amdgpu_bo *bo = after->bo_va->base.bo;
- amdgpu_vm_it_insert(after, &vm->va); if (after->flags & AMDGPU_PTE_PRT) amdgpu_vm_prt_get(adev);
if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
!after->bo_va->base.moved)
} else { kfree(after); }amdgpu_vm_bo_moved(&after->bo_va->base);
Per VM BOs must be marked as moved or otherwise their ranges are not updated on use which might be necessary when the replace operation splits mappings.
This fixes random GPU hangs when replacing sparse mappings from the userspace, while OP_MAP/OP_UNMAP works fine because always valid BOs are correctly handled there.
Cc: stable@vger.kernel.org Signed-off-by: Samuel Pitoiset samuel.pitoiset@gmail.com Reviewed-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 12 ++++++++++++ 1 file changed, 12 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 143d11afe0e5..eff73c428b12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1771,18 +1771,30 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
/* Insert partial mapping before the range */ if (!list_empty(&before->list)) { + struct amdgpu_bo *bo = before->bo_va->base.bo; + amdgpu_vm_it_insert(before, &vm->va); if (before->flags & AMDGPU_PTE_PRT) amdgpu_vm_prt_get(adev); + + if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv && + !before->bo_va->base.moved) + amdgpu_vm_bo_moved(&before->bo_va->base); } else { kfree(before); }
/* Insert partial mapping after the range */ if (!list_empty(&after->list)) { + struct amdgpu_bo *bo = after->bo_va->base.bo; + amdgpu_vm_it_insert(after, &vm->va); if (after->flags & AMDGPU_PTE_PRT) amdgpu_vm_prt_get(adev); + + if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv && + !after->bo_va->base.moved) + amdgpu_vm_bo_moved(&after->bo_va->base); } else { kfree(after); }
Applied. Thanks!
Alex
On Fri, Jun 16, 2023 at 9:38 AM Samuel Pitoiset samuel.pitoiset@gmail.com wrote:
Per VM BOs must be marked as moved or otherwise their ranges are not updated on use which might be necessary when the replace operation splits mappings.
This fixes random GPU hangs when replacing sparse mappings from the userspace, while OP_MAP/OP_UNMAP works fine because always valid BOs are correctly handled there.
Cc: stable@vger.kernel.org Signed-off-by: Samuel Pitoiset samuel.pitoiset@gmail.com Reviewed-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 12 ++++++++++++ 1 file changed, 12 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 143d11afe0e5..eff73c428b12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1771,18 +1771,30 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
/* Insert partial mapping before the range */ if (!list_empty(&before->list)) {
struct amdgpu_bo *bo = before->bo_va->base.bo;
amdgpu_vm_it_insert(before, &vm->va); if (before->flags & AMDGPU_PTE_PRT) amdgpu_vm_prt_get(adev);
if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
!before->bo_va->base.moved)
amdgpu_vm_bo_moved(&before->bo_va->base); } else { kfree(before); } /* Insert partial mapping after the range */ if (!list_empty(&after->list)) {
struct amdgpu_bo *bo = after->bo_va->base.bo;
amdgpu_vm_it_insert(after, &vm->va); if (after->flags & AMDGPU_PTE_PRT) amdgpu_vm_prt_get(adev);
if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
!after->bo_va->base.moved)
amdgpu_vm_bo_moved(&after->bo_va->base); } else { kfree(after); }
-- 2.41.0
linux-stable-mirror@lists.linaro.org