drm/xe: Do not wake device during a GT reset

Waking the device during a GT reset can lead to unintended memory
allocation, which is not allowed since GT resets occur in the reclaim
path. Prevent this by holding a PM reference while a reset is in flight.

Fixes: dd08ebf6c3 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Cc: stable@vger.kernel.org
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://lore.kernel.org/r/20251022005538.828980-3-matthew.brost@intel.com
This commit is contained in:
Matthew Brost
2025-10-21 17:55:37 -07:00
parent 1f1314e8e7
commit 480b358e7d

View File

@@ -818,17 +818,19 @@ static int gt_reset(struct xe_gt *gt)
unsigned int fw_ref;
int err;
if (xe_device_wedged(gt_to_xe(gt)))
return -ECANCELED;
if (xe_device_wedged(gt_to_xe(gt))) {
err = -ECANCELED;
goto err_pm_put;
}
/* We only support GT resets with GuC submission */
if (!xe_device_uc_enabled(gt_to_xe(gt)))
return -ENODEV;
if (!xe_device_uc_enabled(gt_to_xe(gt))) {
err = -ENODEV;
goto err_pm_put;
}
xe_gt_info(gt, "reset started\n");
xe_pm_runtime_get(gt_to_xe(gt));
if (xe_fault_inject_gt_reset()) {
err = -ECANCELED;
goto err_fail;
@@ -875,6 +877,7 @@ err_fail:
xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err));
xe_device_declare_wedged(gt_to_xe(gt));
err_pm_put:
xe_pm_runtime_put(gt_to_xe(gt));
return err;
@@ -896,7 +899,9 @@ void xe_gt_reset_async(struct xe_gt *gt)
return;
xe_gt_info(gt, "reset queued\n");
queue_work(gt->ordered_wq, &gt->reset.worker);
xe_pm_runtime_get_noresume(gt_to_xe(gt));
if (!queue_work(gt->ordered_wq, &gt->reset.worker))
xe_pm_runtime_put(gt_to_xe(gt));
}
void xe_gt_suspend_prepare(struct xe_gt *gt)