drm/xe/vf: Don't allow GT reset to be queued during VF post migration recovery

With well-behaved software, a GT reset should never occur, nor should it
happen during VF post-migration recovery. If it does, trigger a warning
but suppress the GT reset, as VF post-migration recovery is expected to
bring the VF back to a working state.

v3:
 - Better commit message (Tomasz)
v5:
 - Use xe_gt_WARN_ON (Michal)

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Tomasz Lis <tomasz.lis@intel.com>
Link: https://lore.kernel.org/r/20251008214532.3442967-17-matthew.brost@intel.com
This commit is contained in:
Matthew Brost
2025-10-08 14:45:14 -07:00
parent b47c0c07c3
commit f1029b9dde
4 changed files with 5 additions and 56 deletions

View File

@@ -803,11 +803,6 @@ static int do_gt_restart(struct xe_gt *gt)
return 0;
}
static int gt_wait_reset_unblock(struct xe_gt *gt)
{
return xe_guc_wait_reset_unblock(&gt->uc.guc);
}
static int gt_reset(struct xe_gt *gt)
{
unsigned int fw_ref;
@@ -822,10 +817,6 @@ static int gt_reset(struct xe_gt *gt)
xe_gt_info(gt, "reset started\n");
err = gt_wait_reset_unblock(gt);
if (!err)
xe_gt_warn(gt, "reset block failed to get lifted");
xe_pm_runtime_get(gt_to_xe(gt));
if (xe_fault_inject_gt_reset()) {

View File

@@ -1097,17 +1097,11 @@ void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p)
static void vf_post_migration_shutdown(struct xe_gt *gt)
{
int ret = 0;
spin_lock_irq(&gt->sriov.vf.migration.lock);
gt->sriov.vf.migration.recovery_queued = false;
spin_unlock_irq(&gt->sriov.vf.migration.lock);
xe_guc_submit_pause(&gt->uc.guc);
ret |= xe_guc_submit_reset_block(&gt->uc.guc);
if (ret)
xe_gt_sriov_info(gt, "migration recovery encountered ongoing reset\n");
}
static size_t post_migration_scratch_size(struct xe_device *xe)
@@ -1142,7 +1136,6 @@ static void vf_post_migration_kickstart(struct xe_gt *gt)
*/
xe_irq_resume(gt_to_xe(gt));
xe_guc_submit_reset_unblock(&gt->uc.guc);
xe_guc_submit_unpause(&gt->uc.guc);
}

View File

@@ -27,6 +27,7 @@
#include "xe_gt.h"
#include "xe_gt_clock.h"
#include "xe_gt_printk.h"
#include "xe_gt_sriov_vf.h"
#include "xe_guc.h"
#include "xe_guc_capture.h"
#include "xe_guc_ct.h"
@@ -1900,47 +1901,14 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
}
}
/**
* xe_guc_submit_reset_block - Disallow reset calls on given GuC.
* @guc: the &xe_guc struct instance
*/
int xe_guc_submit_reset_block(struct xe_guc *guc)
{
return atomic_fetch_or(1, &guc->submission_state.reset_blocked);
}
/**
* xe_guc_submit_reset_unblock - Allow back reset calls on given GuC.
* @guc: the &xe_guc struct instance
*/
void xe_guc_submit_reset_unblock(struct xe_guc *guc)
{
atomic_set_release(&guc->submission_state.reset_blocked, 0);
wake_up_all(&guc->ct.wq);
}
static int guc_submit_reset_is_blocked(struct xe_guc *guc)
{
return atomic_read_acquire(&guc->submission_state.reset_blocked);
}
/* Maximum time of blocking reset */
#define RESET_BLOCK_PERIOD_MAX (HZ * 5)
/**
* xe_guc_wait_reset_unblock - Wait until reset blocking flag is lifted, or timeout.
* @guc: the &xe_guc struct instance
*/
int xe_guc_wait_reset_unblock(struct xe_guc *guc)
{
return wait_event_timeout(guc->ct.wq,
!guc_submit_reset_is_blocked(guc), RESET_BLOCK_PERIOD_MAX);
}
int xe_guc_submit_reset_prepare(struct xe_guc *guc)
{
int ret;
if (xe_gt_WARN_ON(guc_to_gt(guc),
xe_gt_sriov_vf_recovery_pending(guc_to_gt(guc))))
return 0;
if (!guc->submission_state.initialized)
return 0;

View File

@@ -22,9 +22,6 @@ void xe_guc_submit_stop(struct xe_guc *guc);
int xe_guc_submit_start(struct xe_guc *guc);
void xe_guc_submit_pause(struct xe_guc *guc);
void xe_guc_submit_unpause(struct xe_guc *guc);
int xe_guc_submit_reset_block(struct xe_guc *guc);
void xe_guc_submit_reset_unblock(struct xe_guc *guc);
int xe_guc_wait_reset_unblock(struct xe_guc *guc);
void xe_guc_submit_wedge(struct xe_guc *guc);
int xe_guc_read_stopped(struct xe_guc *guc);