mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-03-22 07:27:12 +08:00
Merge tag 'wq-for-7.0-rc3-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq
Pull workqueue fixes from Tejun Heo:
- Improve workqueue stall diagnostics: dump all busy workers (not just
running ones), show wall-clock duration of in-flight work items, and
add a sample module for reproducing stalls
- Fix POOL_BH vs WQ_BH flag namespace mismatch in pr_cont_worker_id()
- Rename pool->watchdog_ts to pool->last_progress_ts and related
functions for clarity
* tag 'wq-for-7.0-rc3-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq:
workqueue: Rename show_cpu_pool{s,}_hog{s,}() to reflect broadened scope
workqueue: Add stall detector sample module
workqueue: Show all busy workers in stall diagnostics
workqueue: Show in-flight work item duration in stall diagnostics
workqueue: Rename pool->watchdog_ts to pool->last_progress_ts
workqueue: Use POOL_BH instead of WQ_BH when checking pool flags
This commit is contained in:
@@ -190,7 +190,7 @@ struct worker_pool {
|
|||||||
int id; /* I: pool ID */
|
int id; /* I: pool ID */
|
||||||
unsigned int flags; /* L: flags */
|
unsigned int flags; /* L: flags */
|
||||||
|
|
||||||
unsigned long watchdog_ts; /* L: watchdog timestamp */
|
unsigned long last_progress_ts; /* L: last forward progress timestamp */
|
||||||
bool cpu_stall; /* WD: stalled cpu bound pool */
|
bool cpu_stall; /* WD: stalled cpu bound pool */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -1697,7 +1697,7 @@ static void __pwq_activate_work(struct pool_workqueue *pwq,
|
|||||||
WARN_ON_ONCE(!(*wdb & WORK_STRUCT_INACTIVE));
|
WARN_ON_ONCE(!(*wdb & WORK_STRUCT_INACTIVE));
|
||||||
trace_workqueue_activate_work(work);
|
trace_workqueue_activate_work(work);
|
||||||
if (list_empty(&pwq->pool->worklist))
|
if (list_empty(&pwq->pool->worklist))
|
||||||
pwq->pool->watchdog_ts = jiffies;
|
pwq->pool->last_progress_ts = jiffies;
|
||||||
move_linked_works(work, &pwq->pool->worklist, NULL);
|
move_linked_works(work, &pwq->pool->worklist, NULL);
|
||||||
__clear_bit(WORK_STRUCT_INACTIVE_BIT, wdb);
|
__clear_bit(WORK_STRUCT_INACTIVE_BIT, wdb);
|
||||||
}
|
}
|
||||||
@@ -2348,7 +2348,7 @@ retry:
|
|||||||
*/
|
*/
|
||||||
if (list_empty(&pwq->inactive_works) && pwq_tryinc_nr_active(pwq, false)) {
|
if (list_empty(&pwq->inactive_works) && pwq_tryinc_nr_active(pwq, false)) {
|
||||||
if (list_empty(&pool->worklist))
|
if (list_empty(&pool->worklist))
|
||||||
pool->watchdog_ts = jiffies;
|
pool->last_progress_ts = jiffies;
|
||||||
|
|
||||||
trace_workqueue_activate_work(work);
|
trace_workqueue_activate_work(work);
|
||||||
insert_work(pwq, work, &pool->worklist, work_flags);
|
insert_work(pwq, work, &pool->worklist, work_flags);
|
||||||
@@ -3204,6 +3204,7 @@ __acquires(&pool->lock)
|
|||||||
worker->current_pwq = pwq;
|
worker->current_pwq = pwq;
|
||||||
if (worker->task)
|
if (worker->task)
|
||||||
worker->current_at = worker->task->se.sum_exec_runtime;
|
worker->current_at = worker->task->se.sum_exec_runtime;
|
||||||
|
worker->current_start = jiffies;
|
||||||
work_data = *work_data_bits(work);
|
work_data = *work_data_bits(work);
|
||||||
worker->current_color = get_work_color(work_data);
|
worker->current_color = get_work_color(work_data);
|
||||||
|
|
||||||
@@ -3352,7 +3353,7 @@ static void process_scheduled_works(struct worker *worker)
|
|||||||
while ((work = list_first_entry_or_null(&worker->scheduled,
|
while ((work = list_first_entry_or_null(&worker->scheduled,
|
||||||
struct work_struct, entry))) {
|
struct work_struct, entry))) {
|
||||||
if (first) {
|
if (first) {
|
||||||
worker->pool->watchdog_ts = jiffies;
|
worker->pool->last_progress_ts = jiffies;
|
||||||
first = false;
|
first = false;
|
||||||
}
|
}
|
||||||
process_one_work(worker, work);
|
process_one_work(worker, work);
|
||||||
@@ -4850,7 +4851,7 @@ static int init_worker_pool(struct worker_pool *pool)
|
|||||||
pool->cpu = -1;
|
pool->cpu = -1;
|
||||||
pool->node = NUMA_NO_NODE;
|
pool->node = NUMA_NO_NODE;
|
||||||
pool->flags |= POOL_DISASSOCIATED;
|
pool->flags |= POOL_DISASSOCIATED;
|
||||||
pool->watchdog_ts = jiffies;
|
pool->last_progress_ts = jiffies;
|
||||||
INIT_LIST_HEAD(&pool->worklist);
|
INIT_LIST_HEAD(&pool->worklist);
|
||||||
INIT_LIST_HEAD(&pool->idle_list);
|
INIT_LIST_HEAD(&pool->idle_list);
|
||||||
hash_init(pool->busy_hash);
|
hash_init(pool->busy_hash);
|
||||||
@@ -6274,7 +6275,7 @@ static void pr_cont_worker_id(struct worker *worker)
|
|||||||
{
|
{
|
||||||
struct worker_pool *pool = worker->pool;
|
struct worker_pool *pool = worker->pool;
|
||||||
|
|
||||||
if (pool->flags & WQ_BH)
|
if (pool->flags & POOL_BH)
|
||||||
pr_cont("bh%s",
|
pr_cont("bh%s",
|
||||||
pool->attrs->nice == HIGHPRI_NICE_LEVEL ? "-hi" : "");
|
pool->attrs->nice == HIGHPRI_NICE_LEVEL ? "-hi" : "");
|
||||||
else
|
else
|
||||||
@@ -6359,6 +6360,8 @@ static void show_pwq(struct pool_workqueue *pwq)
|
|||||||
pr_cont(" %s", comma ? "," : "");
|
pr_cont(" %s", comma ? "," : "");
|
||||||
pr_cont_worker_id(worker);
|
pr_cont_worker_id(worker);
|
||||||
pr_cont(":%ps", worker->current_func);
|
pr_cont(":%ps", worker->current_func);
|
||||||
|
pr_cont(" for %us",
|
||||||
|
jiffies_to_msecs(jiffies - worker->current_start) / 1000);
|
||||||
list_for_each_entry(work, &worker->scheduled, entry)
|
list_for_each_entry(work, &worker->scheduled, entry)
|
||||||
pr_cont_work(false, work, &pcws);
|
pr_cont_work(false, work, &pcws);
|
||||||
pr_cont_work_flush(comma, (work_func_t)-1L, &pcws);
|
pr_cont_work_flush(comma, (work_func_t)-1L, &pcws);
|
||||||
@@ -6462,7 +6465,7 @@ static void show_one_worker_pool(struct worker_pool *pool)
|
|||||||
|
|
||||||
/* How long the first pending work is waiting for a worker. */
|
/* How long the first pending work is waiting for a worker. */
|
||||||
if (!list_empty(&pool->worklist))
|
if (!list_empty(&pool->worklist))
|
||||||
hung = jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000;
|
hung = jiffies_to_msecs(jiffies - pool->last_progress_ts) / 1000;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Defer printing to avoid deadlocks in console drivers that
|
* Defer printing to avoid deadlocks in console drivers that
|
||||||
@@ -7580,11 +7583,11 @@ MODULE_PARM_DESC(panic_on_stall_time, "Panic if stall exceeds this many seconds
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Show workers that might prevent the processing of pending work items.
|
* Show workers that might prevent the processing of pending work items.
|
||||||
* The only candidates are CPU-bound workers in the running state.
|
* A busy worker that is not running on the CPU (e.g. sleeping in
|
||||||
* Pending work items should be handled by another idle worker
|
* wait_event_idle() with PF_WQ_WORKER cleared) can stall the pool just as
|
||||||
* in all other situations.
|
* effectively as a CPU-bound one, so dump every in-flight worker.
|
||||||
*/
|
*/
|
||||||
static void show_cpu_pool_hog(struct worker_pool *pool)
|
static void show_cpu_pool_busy_workers(struct worker_pool *pool)
|
||||||
{
|
{
|
||||||
struct worker *worker;
|
struct worker *worker;
|
||||||
unsigned long irq_flags;
|
unsigned long irq_flags;
|
||||||
@@ -7593,36 +7596,34 @@ static void show_cpu_pool_hog(struct worker_pool *pool)
|
|||||||
raw_spin_lock_irqsave(&pool->lock, irq_flags);
|
raw_spin_lock_irqsave(&pool->lock, irq_flags);
|
||||||
|
|
||||||
hash_for_each(pool->busy_hash, bkt, worker, hentry) {
|
hash_for_each(pool->busy_hash, bkt, worker, hentry) {
|
||||||
if (task_is_running(worker->task)) {
|
/*
|
||||||
/*
|
* Defer printing to avoid deadlocks in console
|
||||||
* Defer printing to avoid deadlocks in console
|
* drivers that queue work while holding locks
|
||||||
* drivers that queue work while holding locks
|
* also taken in their write paths.
|
||||||
* also taken in their write paths.
|
*/
|
||||||
*/
|
printk_deferred_enter();
|
||||||
printk_deferred_enter();
|
|
||||||
|
|
||||||
pr_info("pool %d:\n", pool->id);
|
pr_info("pool %d:\n", pool->id);
|
||||||
sched_show_task(worker->task);
|
sched_show_task(worker->task);
|
||||||
|
|
||||||
printk_deferred_exit();
|
printk_deferred_exit();
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
raw_spin_unlock_irqrestore(&pool->lock, irq_flags);
|
raw_spin_unlock_irqrestore(&pool->lock, irq_flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void show_cpu_pools_hogs(void)
|
static void show_cpu_pools_busy_workers(void)
|
||||||
{
|
{
|
||||||
struct worker_pool *pool;
|
struct worker_pool *pool;
|
||||||
int pi;
|
int pi;
|
||||||
|
|
||||||
pr_info("Showing backtraces of running workers in stalled CPU-bound worker pools:\n");
|
pr_info("Showing backtraces of busy workers in stalled worker pools:\n");
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
|
|
||||||
for_each_pool(pool, pi) {
|
for_each_pool(pool, pi) {
|
||||||
if (pool->cpu_stall)
|
if (pool->cpu_stall)
|
||||||
show_cpu_pool_hog(pool);
|
show_cpu_pool_busy_workers(pool);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -7691,7 +7692,7 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
|
|||||||
touched = READ_ONCE(per_cpu(wq_watchdog_touched_cpu, pool->cpu));
|
touched = READ_ONCE(per_cpu(wq_watchdog_touched_cpu, pool->cpu));
|
||||||
else
|
else
|
||||||
touched = READ_ONCE(wq_watchdog_touched);
|
touched = READ_ONCE(wq_watchdog_touched);
|
||||||
pool_ts = READ_ONCE(pool->watchdog_ts);
|
pool_ts = READ_ONCE(pool->last_progress_ts);
|
||||||
|
|
||||||
if (time_after(pool_ts, touched))
|
if (time_after(pool_ts, touched))
|
||||||
ts = pool_ts;
|
ts = pool_ts;
|
||||||
@@ -7719,7 +7720,7 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
|
|||||||
show_all_workqueues();
|
show_all_workqueues();
|
||||||
|
|
||||||
if (cpu_pool_stall)
|
if (cpu_pool_stall)
|
||||||
show_cpu_pools_hogs();
|
show_cpu_pools_busy_workers();
|
||||||
|
|
||||||
if (lockup_detected)
|
if (lockup_detected)
|
||||||
panic_on_wq_watchdog(max_stall_time);
|
panic_on_wq_watchdog(max_stall_time);
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ struct worker {
|
|||||||
work_func_t current_func; /* K: function */
|
work_func_t current_func; /* K: function */
|
||||||
struct pool_workqueue *current_pwq; /* K: pwq */
|
struct pool_workqueue *current_pwq; /* K: pwq */
|
||||||
u64 current_at; /* K: runtime at start or last wakeup */
|
u64 current_at; /* K: runtime at start or last wakeup */
|
||||||
|
unsigned long current_start; /* K: start time of current work item */
|
||||||
unsigned int current_color; /* K: color */
|
unsigned int current_color; /* K: color */
|
||||||
|
|
||||||
int sleeping; /* S: is worker sleeping? */
|
int sleeping; /* S: is worker sleeping? */
|
||||||
|
|||||||
1
samples/workqueue/stall_detector/Makefile
Normal file
1
samples/workqueue/stall_detector/Makefile
Normal file
@@ -0,0 +1 @@
|
|||||||
|
obj-m += wq_stall.o
|
||||||
98
samples/workqueue/stall_detector/wq_stall.c
Normal file
98
samples/workqueue/stall_detector/wq_stall.c
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
/*
|
||||||
|
* wq_stall - Test module for the workqueue stall detector.
|
||||||
|
*
|
||||||
|
* Deliberately creates a workqueue stall so the watchdog fires and
|
||||||
|
* prints diagnostic output. Useful for verifying that the stall
|
||||||
|
* detector correctly identifies stuck workers and produces useful
|
||||||
|
* backtraces.
|
||||||
|
*
|
||||||
|
* The stall is triggered by clearing PF_WQ_WORKER before sleeping,
|
||||||
|
* which hides the worker from the concurrency manager. A second
|
||||||
|
* work item queued on the same pool then sits in the worklist with
|
||||||
|
* no worker available to process it.
|
||||||
|
*
|
||||||
|
* After ~30s the workqueue watchdog fires:
|
||||||
|
* BUG: workqueue lockup - pool cpus=N ...
|
||||||
|
*
|
||||||
|
* Build:
|
||||||
|
* make -C <kernel tree> M=samples/workqueue/stall_detector modules
|
||||||
|
*
|
||||||
|
* Copyright (c) 2026 Meta Platforms, Inc. and affiliates.
|
||||||
|
* Copyright (c) 2026 Breno Leitao <leitao@debian.org>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/module.h>
|
||||||
|
#include <linux/workqueue.h>
|
||||||
|
#include <linux/wait.h>
|
||||||
|
#include <linux/atomic.h>
|
||||||
|
#include <linux/sched.h>
|
||||||
|
|
||||||
|
static DECLARE_WAIT_QUEUE_HEAD(stall_wq_head);
|
||||||
|
static atomic_t wake_condition = ATOMIC_INIT(0);
|
||||||
|
static struct work_struct stall_work1;
|
||||||
|
static struct work_struct stall_work2;
|
||||||
|
|
||||||
|
static void stall_work2_fn(struct work_struct *work)
|
||||||
|
{
|
||||||
|
pr_info("wq_stall: second work item finally ran\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void stall_work1_fn(struct work_struct *work)
|
||||||
|
{
|
||||||
|
pr_info("wq_stall: first work item running on cpu %d\n",
|
||||||
|
raw_smp_processor_id());
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Queue second item while we're still counted as running
|
||||||
|
* (pool->nr_running > 0). Since schedule_work() on a per-CPU
|
||||||
|
* workqueue targets raw_smp_processor_id(), item 2 lands on the
|
||||||
|
* same pool. __queue_work -> kick_pool -> need_more_worker()
|
||||||
|
* sees nr_running > 0 and does NOT wake a new worker.
|
||||||
|
*/
|
||||||
|
schedule_work(&stall_work2);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Hide from the workqueue concurrency manager. Without
|
||||||
|
* PF_WQ_WORKER, schedule() won't call wq_worker_sleeping(),
|
||||||
|
* so nr_running is never decremented and no replacement
|
||||||
|
* worker is created. Item 2 stays stuck in pool->worklist.
|
||||||
|
*/
|
||||||
|
current->flags &= ~PF_WQ_WORKER;
|
||||||
|
|
||||||
|
pr_info("wq_stall: entering wait_event_idle (PF_WQ_WORKER cleared)\n");
|
||||||
|
pr_info("wq_stall: expect 'BUG: workqueue lockup' in ~30-60s\n");
|
||||||
|
wait_event_idle(stall_wq_head, atomic_read(&wake_condition) != 0);
|
||||||
|
|
||||||
|
/* Restore so process_one_work() cleanup works correctly */
|
||||||
|
current->flags |= PF_WQ_WORKER;
|
||||||
|
pr_info("wq_stall: woke up, PF_WQ_WORKER restored\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
static int __init wq_stall_init(void)
|
||||||
|
{
|
||||||
|
pr_info("wq_stall: loading\n");
|
||||||
|
|
||||||
|
INIT_WORK(&stall_work1, stall_work1_fn);
|
||||||
|
INIT_WORK(&stall_work2, stall_work2_fn);
|
||||||
|
schedule_work(&stall_work1);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __exit wq_stall_exit(void)
|
||||||
|
{
|
||||||
|
pr_info("wq_stall: unloading\n");
|
||||||
|
atomic_set(&wake_condition, 1);
|
||||||
|
wake_up(&stall_wq_head);
|
||||||
|
flush_work(&stall_work1);
|
||||||
|
flush_work(&stall_work2);
|
||||||
|
pr_info("wq_stall: all work flushed, module unloaded\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
module_init(wq_stall_init);
|
||||||
|
module_exit(wq_stall_exit);
|
||||||
|
|
||||||
|
MODULE_LICENSE("GPL");
|
||||||
|
MODULE_DESCRIPTION("Reproduce workqueue stall caused by PF_WQ_WORKER misuse");
|
||||||
|
MODULE_AUTHOR("Breno Leitao <leitao@debian.org>");
|
||||||
Reference in New Issue
Block a user