drm/xe/multi_queue: Handle CGP context error

Trigger multi-queue context cleanup upon CGP context error
notification from GuC.

v4: Fix error message

Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20251211010249.1647839-30-niranjana.vishwanathapura@intel.com
This commit is contained in:
Niranjana Vishwanathapura
2025-12-10 17:02:59 -08:00
parent 1b5d39e667
commit c85285b32c
5 changed files with 43 additions and 0 deletions

View File

@@ -142,6 +142,7 @@ enum xe_guc_action {
XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE = 0x4602,
XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC = 0x4603,
XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE = 0x4604,
XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CGP_CONTEXT_ERROR = 0x4605,
XE_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507,
XE_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A,
XE_GUC_ACTION_SET_DEVICE_ENGINE_ACTIVITY_BUFFER = 0x550C,

View File

@@ -1618,6 +1618,10 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
case XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE:
ret = xe_guc_exec_queue_cgp_sync_done_handler(guc, payload, adj_len);
break;
case XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CGP_CONTEXT_ERROR:
ret = xe_guc_exec_queue_cgp_context_error_handler(guc, payload,
adj_len);
break;
default:
xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action);
}

View File

@@ -48,6 +48,8 @@
#include "xe_uc_fw.h"
#include "xe_vm.h"
#define XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN 6
static struct xe_guc *
exec_queue_to_guc(struct xe_exec_queue *q)
{
@@ -3009,6 +3011,35 @@ int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 le
return 0;
}
int xe_guc_exec_queue_cgp_context_error_handler(struct xe_guc *guc, u32 *msg,
u32 len)
{
struct xe_gt *gt = guc_to_gt(guc);
struct xe_device *xe = guc_to_xe(guc);
struct xe_exec_queue *q;
u32 guc_id = msg[2];
if (unlikely(len != XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN)) {
drm_err(&xe->drm, "Invalid length %u", len);
return -EPROTO;
}
q = g2h_exec_queue_lookup(guc, guc_id);
if (unlikely(!q))
return -EPROTO;
xe_gt_dbg(gt,
"CGP context error: [%s] err=0x%x, q0_id=0x%x LRCA=0x%x guc_id=0x%x",
msg[0] & 1 ? "uc" : "kmd", msg[1], msg[2], msg[3], msg[4]);
trace_xe_exec_queue_cgp_context_error(q);
/* Treat the same as engine reset */
xe_guc_exec_queue_reset_trigger_cleanup(q);
return 0;
}
/**
* xe_guc_exec_queue_cgp_sync_done_handler - CGP synchronization done handler
* @guc: guc

View File

@@ -37,6 +37,8 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len);
int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len);
int xe_guc_exec_queue_cgp_sync_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
int xe_guc_exec_queue_cgp_context_error_handler(struct xe_guc *guc, u32 *msg,
u32 len);
struct xe_guc_submit_exec_queue_snapshot *
xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q);

View File

@@ -172,6 +172,11 @@ DEFINE_EVENT(xe_exec_queue, xe_exec_queue_memory_cat_error,
TP_ARGS(q)
);
DEFINE_EVENT(xe_exec_queue, xe_exec_queue_cgp_context_error,
TP_PROTO(struct xe_exec_queue *q),
TP_ARGS(q)
);
DEFINE_EVENT(xe_exec_queue, xe_exec_queue_stop,
TP_PROTO(struct xe_exec_queue *q),
TP_ARGS(q)