2
0
mirror of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-09-04 20:19:47 +08:00

drm/amdgpu: Create a debug option to disable ring reset

Prior to the addition of ring reset, the debug option
`debug_disable_soft_recovery` could be used to force a full device
reset. Now that we have ring reset, create a debug option to disable
them in amdgpu, forcing the driver to go with the full device
reset path again when both options are combined.

This option is useful for testing and debugging purposes when one wants
to test the full reset from userspace.

Signed-off-by: André Almeida <andrealmeid@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
André Almeida 2025-02-26 10:11:18 -03:00 committed by Alex Deucher
parent 63e6a77ccf
commit 9c696cc57c
3 changed files with 10 additions and 2 deletions

View File

@ -1192,6 +1192,7 @@ struct amdgpu_device {
bool debug_use_vram_fw_buf; bool debug_use_vram_fw_buf;
bool debug_enable_ras_aca; bool debug_enable_ras_aca;
bool debug_exp_resets; bool debug_exp_resets;
bool debug_disable_gpu_ring_reset;
bool enforce_isolation[MAX_XCP]; bool enforce_isolation[MAX_XCP];
/* Added this mutex for cleaner shader isolation between GFX and compute processes */ /* Added this mutex for cleaner shader isolation between GFX and compute processes */

View File

@ -137,6 +137,7 @@ enum AMDGPU_DEBUG_MASK {
AMDGPU_DEBUG_USE_VRAM_FW_BUF = BIT(3), AMDGPU_DEBUG_USE_VRAM_FW_BUF = BIT(3),
AMDGPU_DEBUG_ENABLE_RAS_ACA = BIT(4), AMDGPU_DEBUG_ENABLE_RAS_ACA = BIT(4),
AMDGPU_DEBUG_ENABLE_EXP_RESETS = BIT(5), AMDGPU_DEBUG_ENABLE_EXP_RESETS = BIT(5),
AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6),
}; };
unsigned int amdgpu_vram_limit = UINT_MAX; unsigned int amdgpu_vram_limit = UINT_MAX;
@ -2223,6 +2224,11 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev)
pr_info("debug: enable experimental reset features\n"); pr_info("debug: enable experimental reset features\n");
adev->debug_exp_resets = true; adev->debug_exp_resets = true;
} }
if (amdgpu_debug_mask & AMDGPU_DEBUG_DISABLE_GPU_RING_RESET) {
pr_info("debug: ring reset disabled\n");
adev->debug_disable_gpu_ring_reset = true;
}
} }
static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags) static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags)

View File

@ -131,8 +131,9 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
} }
/* attempt a per ring reset */ /* attempt a per ring reset */
if (amdgpu_gpu_recovery && if (unlikely(adev->debug_disable_gpu_ring_reset)) {
ring->funcs->reset) { dev_err(adev->dev, "Ring reset disabled by debug mask\n");
} else if (amdgpu_gpu_recovery && ring->funcs->reset) {
bool is_guilty; bool is_guilty;
dev_err(adev->dev, "Starting %s ring reset\n", s_job->sched->name); dev_err(adev->dev, "Starting %s ring reset\n", s_job->sched->name);