drm/sched: Add several job helpers to avoid drivers touching scheduler state

In the past, drivers used to reach into scheduler internals—this must
end because it makes it difficult to change scheduler internals, as
driver-side code must also be updated.

Add helpers to check if the scheduler is stopped and to query a job’s
signaled state to avoid reaching into scheduler internals. These are
expected to be used driver-side in recovery and debug flows.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Link: https://patch.msgid.link/20251209200039.1366764-2-matthew.brost@intel.com
This commit is contained in:
Matthew Brost
2025-12-09 12:00:38 -08:00
parent 343f5683cf
commit 38b069333b
2 changed files with 36 additions and 2 deletions

View File

@@ -344,7 +344,7 @@ drm_sched_rq_select_entity_fifo(struct drm_gpu_scheduler *sched,
*/
static void drm_sched_run_job_queue(struct drm_gpu_scheduler *sched)
{
if (!READ_ONCE(sched->pause_submit))
if (!drm_sched_is_stopped(sched))
queue_work(sched->submit_wq, &sched->work_run_job);
}
@@ -354,7 +354,7 @@ static void drm_sched_run_job_queue(struct drm_gpu_scheduler *sched)
*/
static void drm_sched_run_free_queue(struct drm_gpu_scheduler *sched)
{
if (!READ_ONCE(sched->pause_submit))
if (!drm_sched_is_stopped(sched))
queue_work(sched->submit_wq, &sched->work_free_job);
}
@@ -1567,3 +1567,35 @@ void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched)
queue_work(sched->submit_wq, &sched->work_free_job);
}
EXPORT_SYMBOL(drm_sched_wqueue_start);
/**
* drm_sched_is_stopped() - Checks whether drm_sched is stopped
* @sched: DRM scheduler
*
* Return: true if sched is stopped, false otherwise
*/
bool drm_sched_is_stopped(struct drm_gpu_scheduler *sched)
{
return READ_ONCE(sched->pause_submit);
}
EXPORT_SYMBOL(drm_sched_is_stopped);
/**
* drm_sched_job_is_signaled() - DRM scheduler job is signaled
* @job: DRM scheduler job
*
* Determine if DRM scheduler job is signaled. DRM scheduler should be stopped
* to obtain a stable snapshot of state. Both parent fence (hardware fence) and
* finished fence (software fence) are checked to determine signaling state.
*
* Return: true if job is signaled, false otherwise
*/
bool drm_sched_job_is_signaled(struct drm_sched_job *job)
{
struct drm_sched_fence *s_fence = job->s_fence;
WARN_ON(!drm_sched_is_stopped(job->sched));
return (s_fence->parent && dma_fence_is_signaled(s_fence->parent)) ||
dma_fence_is_signaled(&s_fence->finished);
}
EXPORT_SYMBOL(drm_sched_job_is_signaled);

View File

@@ -645,6 +645,7 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad);
void drm_sched_start(struct drm_gpu_scheduler *sched, int errno);
void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched);
void drm_sched_fault(struct drm_gpu_scheduler *sched);
bool drm_sched_is_stopped(struct drm_gpu_scheduler *sched);
struct drm_gpu_scheduler *
drm_sched_pick_best(struct drm_gpu_scheduler **sched_list,
@@ -674,6 +675,7 @@ bool drm_sched_job_has_dependency(struct drm_sched_job *job,
struct dma_fence *fence);
void drm_sched_job_cleanup(struct drm_sched_job *job);
void drm_sched_increase_karma(struct drm_sched_job *bad);
bool drm_sched_job_is_signaled(struct drm_sched_job *job);
static inline bool drm_sched_invalidate_job(struct drm_sched_job *s_job,
int threshold)