panic: add panic_force_cpu= parameter to redirect panic to a specific CPU

Some platforms require panic handling to execute on a specific CPU for
crash dump to work reliably.  This can be due to firmware limitations,
interrupt routing constraints, or platform-specific requirements where
only a single CPU is able to safely enter the crash kernel.

Add the panic_force_cpu= kernel command-line parameter to redirect panic
execution to a designated CPU.  When the parameter is provided, the CPU
that initially triggers panic forwards the panic context to the target CPU
via IPI, which then proceeds with the normal panic and kexec flow.

The IPI delivery is implemented as a weak function
(panic_smp_redirect_cpu) so architectures with NMI support can override it
for more reliable delivery.

If the specified CPU is invalid, offline, or a panic is already in
progress on another CPU, the redirection is skipped and panic continues on
the current CPU.

[pnina.feder@mobileye.com: fix unused variable warning]
  Link: https://lkml.kernel.org/r/20260126122618.2967950-1-pnina.feder@mobileye.com
Link: https://lkml.kernel.org/r/20260122102457.1154599-1-pnina.feder@mobileye.com
Signed-off-by: Pnina Feder <pnina.feder@mobileye.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Pnina Feder
2026-01-22 12:24:57 +02:00
committed by Andrew Morton
parent f3951e93d4
commit 2e171ab29f
4 changed files with 186 additions and 2 deletions

View File

@@ -4788,6 +4788,21 @@ Kernel parameters
panic_on_warn=1 panic() instead of WARN(). Useful to cause kdump panic_on_warn=1 panic() instead of WARN(). Useful to cause kdump
on a WARN(). on a WARN().
panic_force_cpu=
[KNL,SMP] Force panic handling to execute on a specific CPU.
Format: <cpu number>
Some platforms require panic handling to occur on a
specific CPU for the crash kernel to function correctly.
This can be due to firmware limitations, interrupt routing
constraints, or platform-specific requirements where only
a particular CPU can safely enter the crash kernel.
When set, panic() will redirect execution to the specified
CPU before proceeding with the normal panic and kexec flow.
If the target CPU is offline or unavailable, panic proceeds
on the current CPU.
This option should only be used for systems with the above
constraints as it might cause the panic operation to be less reliable.
panic_print= Bitmask for printing system info when panic happens. panic_print= Bitmask for printing system info when panic happens.
User can chose combination of the following bits: User can chose combination of the following bits:
bit 0: print all tasks info bit 0: print all tasks info

View File

@@ -41,6 +41,14 @@ void abort(void);
* PANIC_CPU_INVALID means no CPU has entered panic() or crash_kexec(). * PANIC_CPU_INVALID means no CPU has entered panic() or crash_kexec().
*/ */
extern atomic_t panic_cpu; extern atomic_t panic_cpu;
/*
* panic_redirect_cpu is used when panic is redirected to a specific CPU via
* the panic_force_cpu= boot parameter. It holds the CPU number that originally
* triggered the panic before redirection. A value of PANIC_CPU_INVALID means
* no redirection has occurred.
*/
extern atomic_t panic_redirect_cpu;
#define PANIC_CPU_INVALID -1 #define PANIC_CPU_INVALID -1
bool panic_try_start(void); bool panic_try_start(void);

View File

@@ -62,6 +62,7 @@ int smp_call_function_single_async(int cpu, call_single_data_t *csd);
void __noreturn panic_smp_self_stop(void); void __noreturn panic_smp_self_stop(void);
void __noreturn nmi_panic_self_stop(struct pt_regs *regs); void __noreturn nmi_panic_self_stop(struct pt_regs *regs);
void crash_smp_send_stop(void); void crash_smp_send_stop(void);
int panic_smp_redirect_cpu(int target_cpu, void *msg);
/* /*
* Call a function on all processors * Call a function on all processors

View File

@@ -42,6 +42,7 @@
#define PANIC_TIMER_STEP 100 #define PANIC_TIMER_STEP 100
#define PANIC_BLINK_SPD 18 #define PANIC_BLINK_SPD 18
#define PANIC_MSG_BUFSZ 1024
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
/* /*
@@ -74,6 +75,8 @@ EXPORT_SYMBOL_GPL(panic_timeout);
unsigned long panic_print; unsigned long panic_print;
static int panic_force_cpu = -1;
ATOMIC_NOTIFIER_HEAD(panic_notifier_list); ATOMIC_NOTIFIER_HEAD(panic_notifier_list);
EXPORT_SYMBOL(panic_notifier_list); EXPORT_SYMBOL(panic_notifier_list);
@@ -300,6 +303,150 @@ void __weak crash_smp_send_stop(void)
} }
atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID); atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID);
atomic_t panic_redirect_cpu = ATOMIC_INIT(PANIC_CPU_INVALID);
#if defined(CONFIG_SMP) && defined(CONFIG_CRASH_DUMP)
static char *panic_force_buf;
static int __init panic_force_cpu_setup(char *str)
{
int cpu;
if (!str)
return -EINVAL;
if (kstrtoint(str, 0, &cpu) || cpu < 0 || cpu >= nr_cpu_ids) {
pr_warn("panic_force_cpu: invalid value '%s'\n", str);
return -EINVAL;
}
panic_force_cpu = cpu;
return 0;
}
early_param("panic_force_cpu", panic_force_cpu_setup);
static int __init panic_force_cpu_late_init(void)
{
if (panic_force_cpu < 0)
return 0;
panic_force_buf = kmalloc(PANIC_MSG_BUFSZ, GFP_KERNEL);
return 0;
}
late_initcall(panic_force_cpu_late_init);
static void do_panic_on_target_cpu(void *info)
{
panic("%s", (char *)info);
}
/**
* panic_smp_redirect_cpu - Redirect panic to target CPU
* @target_cpu: CPU that should handle the panic
* @msg: formatted panic message
*
* Default implementation uses IPI. Architectures with NMI support
* can override this for more reliable delivery.
*
* Return: 0 on success, negative errno on failure
*/
int __weak panic_smp_redirect_cpu(int target_cpu, void *msg)
{
static call_single_data_t panic_csd;
panic_csd.func = do_panic_on_target_cpu;
panic_csd.info = msg;
return smp_call_function_single_async(target_cpu, &panic_csd);
}
/**
* panic_try_force_cpu - Redirect panic to a specific CPU for crash kernel
* @fmt: panic message format string
* @args: arguments for format string
*
* Some platforms require panic handling to occur on a specific CPU
* for the crash kernel to function correctly. This function redirects
* panic handling to the CPU specified via the panic_force_cpu= boot parameter.
*
* Returns false if panic should proceed on current CPU.
* Returns true if panic was redirected.
*/
__printf(1, 0)
static bool panic_try_force_cpu(const char *fmt, va_list args)
{
int this_cpu = raw_smp_processor_id();
int old_cpu = PANIC_CPU_INVALID;
const char *msg;
/* Feature not enabled via boot parameter */
if (panic_force_cpu < 0)
return false;
/* Already on target CPU - proceed normally */
if (this_cpu == panic_force_cpu)
return false;
/* Target CPU is offline, can't redirect */
if (!cpu_online(panic_force_cpu)) {
pr_warn("panic: target CPU %d is offline, continuing on CPU %d\n",
panic_force_cpu, this_cpu);
return false;
}
/* Another panic already in progress */
if (panic_in_progress())
return false;
/*
* Only one CPU can do the redirect. Use atomic cmpxchg to ensure
* we don't race with another CPU also trying to redirect.
*/
if (!atomic_try_cmpxchg(&panic_redirect_cpu, &old_cpu, this_cpu))
return false;
/*
* Use dynamically allocated buffer if available, otherwise
* fall back to static message for early boot panics or allocation failure.
*/
if (panic_force_buf) {
vsnprintf(panic_force_buf, PANIC_MSG_BUFSZ, fmt, args);
msg = panic_force_buf;
} else {
msg = "Redirected panic (buffer unavailable)";
}
console_verbose();
bust_spinlocks(1);
pr_emerg("panic: Redirecting from CPU %d to CPU %d for crash kernel.\n",
this_cpu, panic_force_cpu);
/* Dump original CPU before redirecting */
if (!test_taint(TAINT_DIE) &&
oops_in_progress <= 1 &&
IS_ENABLED(CONFIG_DEBUG_BUGVERBOSE)) {
dump_stack();
}
if (panic_smp_redirect_cpu(panic_force_cpu, (void *)msg) != 0) {
atomic_set(&panic_redirect_cpu, PANIC_CPU_INVALID);
pr_warn("panic: failed to redirect to CPU %d, continuing on CPU %d\n",
panic_force_cpu, this_cpu);
return false;
}
/* IPI/NMI sent, this CPU should stop */
return true;
}
#else
__printf(1, 0)
static inline bool panic_try_force_cpu(const char *fmt, va_list args)
{
return false;
}
#endif /* CONFIG_SMP && CONFIG_CRASH_DUMP */
bool panic_try_start(void) bool panic_try_start(void)
{ {
@@ -428,7 +575,7 @@ static void panic_other_cpus_shutdown(bool crash_kexec)
*/ */
void vpanic(const char *fmt, va_list args) void vpanic(const char *fmt, va_list args)
{ {
static char buf[1024]; static char buf[PANIC_MSG_BUFSZ];
long i, i_next = 0, len; long i, i_next = 0, len;
int state = 0; int state = 0;
bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers; bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers;
@@ -452,6 +599,15 @@ void vpanic(const char *fmt, va_list args)
local_irq_disable(); local_irq_disable();
preempt_disable_notrace(); preempt_disable_notrace();
/* Redirect panic to target CPU if configured via panic_force_cpu=. */
if (panic_try_force_cpu(fmt, args)) {
/*
* Mark ourselves offline so panic_other_cpus_shutdown() won't wait
* for us on architectures that check num_online_cpus().
*/
set_cpu_online(smp_processor_id(), false);
panic_smp_self_stop();
}
/* /*
* It's possible to come here directly from a panic-assertion and * It's possible to come here directly from a panic-assertion and
* not have preempt disabled. Some functions called from here want * not have preempt disabled. Some functions called from here want
@@ -484,7 +640,11 @@ void vpanic(const char *fmt, va_list args)
/* /*
* Avoid nested stack-dumping if a panic occurs during oops processing * Avoid nested stack-dumping if a panic occurs during oops processing
*/ */
if (test_taint(TAINT_DIE) || oops_in_progress > 1) { if (atomic_read(&panic_redirect_cpu) != PANIC_CPU_INVALID &&
panic_force_cpu == raw_smp_processor_id()) {
pr_emerg("panic: Redirected from CPU %d, skipping stack dump.\n",
atomic_read(&panic_redirect_cpu));
} else if (test_taint(TAINT_DIE) || oops_in_progress > 1) {
panic_this_cpu_backtrace_printed = true; panic_this_cpu_backtrace_printed = true;
} else if (IS_ENABLED(CONFIG_DEBUG_BUGVERBOSE)) { } else if (IS_ENABLED(CONFIG_DEBUG_BUGVERBOSE)) {
dump_stack(); dump_stack();