2
0
mirror of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-09-04 20:19:47 +08:00

drm/amdgpu: move reset support type checks into the caller

Rather than checking in the callbacks, check if the reset
type is supported in the caller.

Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Alex Deucher 2025-07-15 11:55:05 -04:00
parent ea2791d05a
commit 6ac55eab4f
25 changed files with 37 additions and 79 deletions

View File

@ -112,6 +112,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
amdgpu_job_core_dump(adev, job);
if (amdgpu_gpu_recovery &&
amdgpu_ring_is_reset_type_supported(ring, AMDGPU_RESET_TYPE_SOFT_RESET) &&
amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
dev_err(adev->dev, "ring %s timeout, but soft recovered\n",
s_job->sched->name);
@ -131,7 +132,9 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
/* attempt a per ring reset */
if (unlikely(adev->debug_disable_gpu_ring_reset)) {
dev_err(adev->dev, "Ring reset disabled by debug mask\n");
} else if (amdgpu_gpu_recovery && ring->funcs->reset) {
} else if (amdgpu_gpu_recovery &&
amdgpu_ring_is_reset_type_supported(ring, AMDGPU_RESET_TYPE_PER_QUEUE) &&
ring->funcs->reset) {
dev_err(adev->dev, "Starting %s ring reset\n",
s_job->sched->name);
r = amdgpu_ring_reset(ring, job->vmid, &job->hw_fence);

View File

@ -825,3 +825,34 @@ int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring,
drm_sched_wqueue_start(&ring->sched);
return 0;
}
bool amdgpu_ring_is_reset_type_supported(struct amdgpu_ring *ring,
u32 reset_type)
{
switch (ring->funcs->type) {
case AMDGPU_RING_TYPE_GFX:
if (ring->adev->gfx.gfx_supported_reset & reset_type)
return true;
break;
case AMDGPU_RING_TYPE_COMPUTE:
if (ring->adev->gfx.compute_supported_reset & reset_type)
return true;
break;
case AMDGPU_RING_TYPE_SDMA:
if (ring->adev->sdma.supported_reset & reset_type)
return true;
break;
case AMDGPU_RING_TYPE_VCN_DEC:
case AMDGPU_RING_TYPE_VCN_ENC:
if (ring->adev->vcn.supported_reset & reset_type)
return true;
break;
case AMDGPU_RING_TYPE_VCN_JPEG:
if (ring->adev->jpeg.supported_reset & reset_type)
return true;
break;
default:
break;
}
return false;
}

View File

@ -568,4 +568,6 @@ void amdgpu_ring_reset_helper_begin(struct amdgpu_ring *ring,
struct amdgpu_fence *guilty_fence);
int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring,
struct amdgpu_fence *guilty_fence);
bool amdgpu_ring_is_reset_type_supported(struct amdgpu_ring *ring,
u32 reset_type);
#endif

View File

@ -1522,9 +1522,6 @@ int amdgpu_vcn_ring_reset(struct amdgpu_ring *ring,
{
struct amdgpu_device *adev = ring->adev;
if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
return -EOPNOTSUPP;
if (adev->vcn.inst[ring->me].using_unified_queue)
return -EINVAL;

View File

@ -9523,9 +9523,6 @@ static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring,
u64 addr;
int r;
if (!(adev->gfx.gfx_supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
return -EOPNOTSUPP;
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;
@ -9591,9 +9588,6 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
unsigned long flags;
int i, r;
if (!(adev->gfx.compute_supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
return -EOPNOTSUPP;
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;

View File

@ -6808,9 +6808,6 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring,
struct amdgpu_device *adev = ring->adev;
int r;
if (!(adev->gfx.gfx_supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
return -EOPNOTSUPP;
amdgpu_ring_reset_helper_begin(ring, timedout_fence);
r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
@ -6974,9 +6971,6 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring,
struct amdgpu_device *adev = ring->adev;
int r = 0;
if (!(adev->gfx.compute_supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
return -EOPNOTSUPP;
amdgpu_ring_reset_helper_begin(ring, timedout_fence);
r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true);

View File

@ -5303,9 +5303,6 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring,
struct amdgpu_device *adev = ring->adev;
int r;
if (!(adev->gfx.gfx_supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
return -EOPNOTSUPP;
amdgpu_ring_reset_helper_begin(ring, timedout_fence);
r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
@ -5421,9 +5418,6 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring,
struct amdgpu_device *adev = ring->adev;
int r;
if (!(adev->gfx.compute_supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
return -EOPNOTSUPP;
amdgpu_ring_reset_helper_begin(ring, timedout_fence);
r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true);

View File

@ -7183,9 +7183,6 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring,
unsigned long flags;
int i, r;
if (!(adev->gfx.compute_supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
return -EOPNOTSUPP;
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;

View File

@ -3565,9 +3565,6 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring,
unsigned long flags;
int r;
if (!(adev->gfx.compute_supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
return -EOPNOTSUPP;
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;

View File

@ -773,9 +773,6 @@ static int jpeg_v2_0_ring_reset(struct amdgpu_ring *ring,
{
int r;
if (!(ring->adev->jpeg.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
return -EOPNOTSUPP;
amdgpu_ring_reset_helper_begin(ring, timedout_fence);
r = jpeg_v2_0_stop(ring->adev);
if (r)

View File

@ -650,9 +650,6 @@ static int jpeg_v2_5_ring_reset(struct amdgpu_ring *ring,
unsigned int vmid,
struct amdgpu_fence *timedout_fence)
{
if (!(ring->adev->jpeg.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
return -EOPNOTSUPP;
amdgpu_ring_reset_helper_begin(ring, timedout_fence);
jpeg_v2_5_stop_inst(ring->adev, ring->me);
jpeg_v2_5_start_inst(ring->adev, ring->me);

View File

@ -564,9 +564,6 @@ static int jpeg_v3_0_ring_reset(struct amdgpu_ring *ring,
{
int r;
if (!(ring->adev->jpeg.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
return -EOPNOTSUPP;
amdgpu_ring_reset_helper_begin(ring, timedout_fence);
r = jpeg_v3_0_stop(ring->adev);
if (r)

View File

@ -729,9 +729,6 @@ static int jpeg_v4_0_ring_reset(struct amdgpu_ring *ring,
{
int r;
if (!(ring->adev->jpeg.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
return -EOPNOTSUPP;
amdgpu_ring_reset_helper_begin(ring, timedout_fence);
r = jpeg_v4_0_stop(ring->adev);
if (r)

View File

@ -774,9 +774,6 @@ static int jpeg_v4_0_5_ring_reset(struct amdgpu_ring *ring,
{
int r;
if (!(ring->adev->jpeg.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
return -EOPNOTSUPP;
amdgpu_ring_reset_helper_begin(ring, timedout_fence);
r = jpeg_v4_0_5_stop(ring->adev);
if (r)

View File

@ -650,9 +650,6 @@ static int jpeg_v5_0_0_ring_reset(struct amdgpu_ring *ring,
{
int r;
if (!(ring->adev->jpeg.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
return -EOPNOTSUPP;
amdgpu_ring_reset_helper_begin(ring, timedout_fence);
r = jpeg_v5_0_0_stop(ring->adev);
if (r)

View File

@ -836,9 +836,6 @@ static int jpeg_v5_0_1_ring_reset(struct amdgpu_ring *ring,
unsigned int vmid,
struct amdgpu_fence *timedout_fence)
{
if (amdgpu_sriov_vf(ring->adev))
return -EOPNOTSUPP;
amdgpu_ring_reset_helper_begin(ring, timedout_fence);
jpeg_v5_0_1_core_stall_reset(ring);
jpeg_v5_0_1_init_jrbc(ring);

View File

@ -1664,9 +1664,6 @@ static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring,
u32 id = ring->me;
int r;
if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
return -EOPNOTSUPP;
amdgpu_amdkfd_suspend(adev, true);
r = amdgpu_sdma_reset_engine(adev, id, false);
amdgpu_amdkfd_resume(adev, true);

View File

@ -1547,9 +1547,6 @@ static int sdma_v5_0_reset_queue(struct amdgpu_ring *ring,
struct amdgpu_device *adev = ring->adev;
int r;
if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
return -EOPNOTSUPP;
if (ring->me >= adev->sdma.num_instances) {
dev_err(adev->dev, "sdma instance not found\n");
return -EINVAL;

View File

@ -1461,9 +1461,6 @@ static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring,
struct amdgpu_device *adev = ring->adev;
int r;
if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
return -EOPNOTSUPP;
if (ring->me >= adev->sdma.num_instances) {
dev_err(adev->dev, "sdma instance not found\n");
return -EINVAL;

View File

@ -1578,9 +1578,6 @@ static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring,
struct amdgpu_device *adev = ring->adev;
int r;
if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
return -EOPNOTSUPP;
if (ring->me >= adev->sdma.num_instances) {
dev_err(adev->dev, "sdma instance not found\n");
return -EINVAL;

View File

@ -809,9 +809,6 @@ static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring,
struct amdgpu_device *adev = ring->adev;
int r;
if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
return -EOPNOTSUPP;
if (ring->me >= adev->sdma.num_instances) {
dev_err(adev->dev, "sdma instance not found\n");
return -EINVAL;

View File

@ -1976,9 +1976,6 @@ static int vcn_v4_0_ring_reset(struct amdgpu_ring *ring,
struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
int r;
if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
return -EOPNOTSUPP;
amdgpu_ring_reset_helper_begin(ring, timedout_fence);
r = vcn_v4_0_stop(vinst);
if (r)

View File

@ -1603,12 +1603,6 @@ static int vcn_v4_0_3_ring_reset(struct amdgpu_ring *ring,
struct amdgpu_device *adev = ring->adev;
struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
if (amdgpu_sriov_vf(ring->adev))
return -EOPNOTSUPP;
if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
return -EOPNOTSUPP;
amdgpu_ring_reset_helper_begin(ring, timedout_fence);
vcn_inst = GET_INST(VCN, ring->me);

View File

@ -1474,9 +1474,6 @@ static int vcn_v4_0_5_ring_reset(struct amdgpu_ring *ring,
struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
int r;
if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
return -EOPNOTSUPP;
amdgpu_ring_reset_helper_begin(ring, timedout_fence);
r = vcn_v4_0_5_stop(vinst);
if (r)

View File

@ -1201,9 +1201,6 @@ static int vcn_v5_0_0_ring_reset(struct amdgpu_ring *ring,
struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
int r;
if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
return -EOPNOTSUPP;
amdgpu_ring_reset_helper_begin(ring, timedout_fence);
r = vcn_v5_0_0_stop(vinst);
if (r)