drm/amdgpu: lock both VM and BO in amdgpu_gem_object_open

The VM was not locked in the past since we initially only cleared the
linked list element and not added it to any VM state.

But this has changed quite some time ago, we just never realized this
problem because the VM state lock was masking it.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Christian König
2026-01-20 12:57:21 +01:00
committed by Alex Deucher
parent abde491143
commit fd1fa48b93
4 changed files with 42 additions and 11 deletions

View File

@@ -878,6 +878,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
struct amdgpu_bo *bo[2] = {NULL, NULL};
struct amdgpu_bo_va *bo_va;
bool same_hive = false;
struct drm_exec exec;
int i, ret;
if (!va) {
@@ -958,19 +959,25 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
goto unwind;
}
/* Add BO to VM internal data structures */
ret = amdgpu_bo_reserve(bo[i], false);
if (ret) {
pr_debug("Unable to reserve BO during memory attach");
goto unwind;
drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
drm_exec_until_all_locked(&exec) {
ret = amdgpu_vm_lock_pd(vm, &exec, 0);
drm_exec_retry_on_contention(&exec);
if (unlikely(ret))
goto unwind;
ret = drm_exec_lock_obj(&exec, &bo[i]->tbo.base);
drm_exec_retry_on_contention(&exec);
if (unlikely(ret))
goto unwind;
}
bo_va = amdgpu_vm_bo_find(vm, bo[i]);
if (!bo_va)
bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
else
++bo_va->ref_count;
attachment[i]->bo_va = bo_va;
amdgpu_bo_unreserve(bo[i]);
drm_exec_fini(&exec);
if (unlikely(!attachment[i]->bo_va)) {
ret = -ENOMEM;
pr_err("Failed to add BO object to VM. ret == %d\n",

View File

@@ -232,6 +232,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj,
struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_bo_va *bo_va;
struct mm_struct *mm;
struct drm_exec exec;
int r;
mm = amdgpu_ttm_tt_get_usermm(abo->tbo.ttm);
@@ -242,9 +243,18 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj,
!amdgpu_vm_is_bo_always_valid(vm, abo))
return -EPERM;
r = amdgpu_bo_reserve(abo, false);
if (r)
return r;
drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
drm_exec_until_all_locked(&exec) {
r = drm_exec_prepare_obj(&exec, &abo->tbo.base, 1);
drm_exec_retry_on_contention(&exec);
if (unlikely(r))
goto out_unlock;
r = amdgpu_vm_lock_pd(vm, &exec, 0);
drm_exec_retry_on_contention(&exec);
if (unlikely(r))
goto out_unlock;
}
amdgpu_vm_bo_update_shared(abo);
bo_va = amdgpu_vm_bo_find(vm, abo);
@@ -260,8 +270,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj,
amdgpu_bo_unreserve(abo);
return r;
}
amdgpu_bo_unreserve(abo);
drm_exec_fini(&exec);
/* Validate and add eviction fence to DMABuf imports with dynamic
* attachment in compute VMs. Re-validation will be done by
@@ -294,7 +303,10 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj,
}
}
mutex_unlock(&vm->process_info->lock);
return r;
out_unlock:
drm_exec_fini(&exec);
return r;
}

View File

@@ -1445,6 +1445,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
{
struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_fpriv *fpriv;
struct drm_exec exec;
int r, pasid;
/* Ensure IB tests are run on ring */
@@ -1484,7 +1485,16 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
if (r)
goto error_pasid;
drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
drm_exec_until_all_locked(&exec) {
r = amdgpu_vm_lock_pd(&fpriv->vm, &exec, 0);
drm_exec_retry_on_contention(&exec);
if (unlikely(r))
goto error_vm;
}
fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL);
drm_exec_fini(&exec);
if (!fpriv->prt_va) {
r = -ENOMEM;
goto error_vm;

View File

@@ -1735,6 +1735,8 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
{
struct amdgpu_bo_va *bo_va;
amdgpu_vm_assert_locked(vm);
bo_va = kzalloc(sizeof(struct amdgpu_bo_va), GFP_KERNEL);
if (bo_va == NULL) {
return NULL;