mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-04 20:19:47 +08:00 
			
		
		
		
	sched/idle: Fix arch_cpu_idle() vs tracing
We call arch_cpu_idle() with RCU disabled, but then use
local_irq_{en,dis}able(), which invokes tracing, which relies on RCU.
Switch all arch_cpu_idle() implementations to use
raw_local_irq_{en,dis}able() and carefully manage the
lockdep,rcu,tracing state like we do in entry.
(XXX: we really should change arch_cpu_idle() to not return with
interrupts enabled)
Reported-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lkml.kernel.org/r/20201120114925.594122626@infradead.org
			
			
This commit is contained in:
		
							parent
							
								
									43be4388e9
								
							
						
					
					
						commit
						58c644ba51
					
				| @ -57,7 +57,7 @@ EXPORT_SYMBOL(pm_power_off); | |||||||
| void arch_cpu_idle(void) | void arch_cpu_idle(void) | ||||||
| { | { | ||||||
| 	wtint(0); | 	wtint(0); | ||||||
| 	local_irq_enable(); | 	raw_local_irq_enable(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void arch_cpu_idle_dead(void) | void arch_cpu_idle_dead(void) | ||||||
|  | |||||||
| @ -71,7 +71,7 @@ void arch_cpu_idle(void) | |||||||
| 		arm_pm_idle(); | 		arm_pm_idle(); | ||||||
| 	else | 	else | ||||||
| 		cpu_do_idle(); | 		cpu_do_idle(); | ||||||
| 	local_irq_enable(); | 	raw_local_irq_enable(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void arch_cpu_idle_prepare(void) | void arch_cpu_idle_prepare(void) | ||||||
|  | |||||||
| @ -126,7 +126,7 @@ void arch_cpu_idle(void) | |||||||
| 	 * tricks | 	 * tricks | ||||||
| 	 */ | 	 */ | ||||||
| 	cpu_do_idle(); | 	cpu_do_idle(); | ||||||
| 	local_irq_enable(); | 	raw_local_irq_enable(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_HOTPLUG_CPU | #ifdef CONFIG_HOTPLUG_CPU | ||||||
|  | |||||||
| @ -102,6 +102,6 @@ void arch_cpu_idle(void) | |||||||
| #ifdef CONFIG_CPU_PM_STOP | #ifdef CONFIG_CPU_PM_STOP | ||||||
| 	asm volatile("stop\n"); | 	asm volatile("stop\n"); | ||||||
| #endif | #endif | ||||||
| 	local_irq_enable(); | 	raw_local_irq_enable(); | ||||||
| } | } | ||||||
| #endif | #endif | ||||||
|  | |||||||
| @ -57,7 +57,7 @@ asmlinkage void ret_from_kernel_thread(void); | |||||||
|  */ |  */ | ||||||
| void arch_cpu_idle(void) | void arch_cpu_idle(void) | ||||||
| { | { | ||||||
| 	local_irq_enable(); | 	raw_local_irq_enable(); | ||||||
| 	__asm__("sleep"); | 	__asm__("sleep"); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -44,7 +44,7 @@ void arch_cpu_idle(void) | |||||||
| { | { | ||||||
| 	__vmwait(); | 	__vmwait(); | ||||||
| 	/*  interrupts wake us up, but irqs are still disabled */ | 	/*  interrupts wake us up, but irqs are still disabled */ | ||||||
| 	local_irq_enable(); | 	raw_local_irq_enable(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  | |||||||
| @ -239,7 +239,7 @@ void arch_cpu_idle(void) | |||||||
| 	if (mark_idle) | 	if (mark_idle) | ||||||
| 		(*mark_idle)(1); | 		(*mark_idle)(1); | ||||||
| 
 | 
 | ||||||
| 	safe_halt(); | 	raw_safe_halt(); | ||||||
| 
 | 
 | ||||||
| 	if (mark_idle) | 	if (mark_idle) | ||||||
| 		(*mark_idle)(0); | 		(*mark_idle)(0); | ||||||
|  | |||||||
| @ -149,5 +149,5 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpregs) | |||||||
| 
 | 
 | ||||||
| void arch_cpu_idle(void) | void arch_cpu_idle(void) | ||||||
| { | { | ||||||
|        local_irq_enable(); |        raw_local_irq_enable(); | ||||||
| } | } | ||||||
|  | |||||||
| @ -33,19 +33,19 @@ static void __cpuidle r3081_wait(void) | |||||||
| { | { | ||||||
| 	unsigned long cfg = read_c0_conf(); | 	unsigned long cfg = read_c0_conf(); | ||||||
| 	write_c0_conf(cfg | R30XX_CONF_HALT); | 	write_c0_conf(cfg | R30XX_CONF_HALT); | ||||||
| 	local_irq_enable(); | 	raw_local_irq_enable(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void __cpuidle r39xx_wait(void) | static void __cpuidle r39xx_wait(void) | ||||||
| { | { | ||||||
| 	if (!need_resched()) | 	if (!need_resched()) | ||||||
| 		write_c0_conf(read_c0_conf() | TX39_CONF_HALT); | 		write_c0_conf(read_c0_conf() | TX39_CONF_HALT); | ||||||
| 	local_irq_enable(); | 	raw_local_irq_enable(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void __cpuidle r4k_wait(void) | void __cpuidle r4k_wait(void) | ||||||
| { | { | ||||||
| 	local_irq_enable(); | 	raw_local_irq_enable(); | ||||||
| 	__r4k_wait(); | 	__r4k_wait(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -64,7 +64,7 @@ void __cpuidle r4k_wait_irqoff(void) | |||||||
| 		"	.set	arch=r4000	\n" | 		"	.set	arch=r4000	\n" | ||||||
| 		"	wait			\n" | 		"	wait			\n" | ||||||
| 		"	.set	pop		\n"); | 		"	.set	pop		\n"); | ||||||
| 	local_irq_enable(); | 	raw_local_irq_enable(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
| @ -84,7 +84,7 @@ static void __cpuidle rm7k_wait_irqoff(void) | |||||||
| 		"	wait						\n" | 		"	wait						\n" | ||||||
| 		"	mtc0	$1, $12		# stalls until W stage	\n" | 		"	mtc0	$1, $12		# stalls until W stage	\n" | ||||||
| 		"	.set	pop					\n"); | 		"	.set	pop					\n"); | ||||||
| 	local_irq_enable(); | 	raw_local_irq_enable(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
| @ -257,7 +257,7 @@ void arch_cpu_idle(void) | |||||||
| 	if (cpu_wait) | 	if (cpu_wait) | ||||||
| 		cpu_wait(); | 		cpu_wait(); | ||||||
| 	else | 	else | ||||||
| 		local_irq_enable(); | 		raw_local_irq_enable(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_CPU_IDLE | #ifdef CONFIG_CPU_IDLE | ||||||
|  | |||||||
| @ -33,7 +33,7 @@ EXPORT_SYMBOL(pm_power_off); | |||||||
| 
 | 
 | ||||||
| void arch_cpu_idle(void) | void arch_cpu_idle(void) | ||||||
| { | { | ||||||
| 	local_irq_enable(); | 	raw_local_irq_enable(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  | |||||||
| @ -79,7 +79,7 @@ void machine_power_off(void) | |||||||
|  */ |  */ | ||||||
| void arch_cpu_idle(void) | void arch_cpu_idle(void) | ||||||
| { | { | ||||||
| 	local_irq_enable(); | 	raw_local_irq_enable(); | ||||||
| 	if (mfspr(SPR_UPR) & SPR_UPR_PMP) | 	if (mfspr(SPR_UPR) & SPR_UPR_PMP) | ||||||
| 		mtspr(SPR_PMR, mfspr(SPR_PMR) | SPR_PMR_DME); | 		mtspr(SPR_PMR, mfspr(SPR_PMR) | SPR_PMR_DME); | ||||||
| } | } | ||||||
|  | |||||||
| @ -169,7 +169,7 @@ void __cpuidle arch_cpu_idle_dead(void) | |||||||
| 
 | 
 | ||||||
| void __cpuidle arch_cpu_idle(void) | void __cpuidle arch_cpu_idle(void) | ||||||
| { | { | ||||||
| 	local_irq_enable(); | 	raw_local_irq_enable(); | ||||||
| 
 | 
 | ||||||
| 	/* nop on real hardware, qemu will idle sleep. */ | 	/* nop on real hardware, qemu will idle sleep. */ | ||||||
| 	asm volatile("or %%r10,%%r10,%%r10\n":::); | 	asm volatile("or %%r10,%%r10,%%r10\n":::); | ||||||
|  | |||||||
| @ -52,9 +52,9 @@ void arch_cpu_idle(void) | |||||||
| 		 * interrupts enabled, some don't. | 		 * interrupts enabled, some don't. | ||||||
| 		 */ | 		 */ | ||||||
| 		if (irqs_disabled()) | 		if (irqs_disabled()) | ||||||
| 			local_irq_enable(); | 			raw_local_irq_enable(); | ||||||
| 	} else { | 	} else { | ||||||
| 		local_irq_enable(); | 		raw_local_irq_enable(); | ||||||
| 		/*
 | 		/*
 | ||||||
| 		 * Go into low thread priority and possibly | 		 * Go into low thread priority and possibly | ||||||
| 		 * low power mode. | 		 * low power mode. | ||||||
|  | |||||||
| @ -36,7 +36,7 @@ extern asmlinkage void ret_from_kernel_thread(void); | |||||||
| void arch_cpu_idle(void) | void arch_cpu_idle(void) | ||||||
| { | { | ||||||
| 	wait_for_interrupt(); | 	wait_for_interrupt(); | ||||||
| 	local_irq_enable(); | 	raw_local_irq_enable(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void show_regs(struct pt_regs *regs) | void show_regs(struct pt_regs *regs) | ||||||
|  | |||||||
| @ -33,10 +33,10 @@ void enabled_wait(void) | |||||||
| 		PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; | 		PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; | ||||||
| 	clear_cpu_flag(CIF_NOHZ_DELAY); | 	clear_cpu_flag(CIF_NOHZ_DELAY); | ||||||
| 
 | 
 | ||||||
| 	local_irq_save(flags); | 	raw_local_irq_save(flags); | ||||||
| 	/* Call the assembler magic in entry.S */ | 	/* Call the assembler magic in entry.S */ | ||||||
| 	psw_idle(idle, psw_mask); | 	psw_idle(idle, psw_mask); | ||||||
| 	local_irq_restore(flags); | 	raw_local_irq_restore(flags); | ||||||
| 
 | 
 | ||||||
| 	/* Account time spent with enabled wait psw loaded as idle time. */ | 	/* Account time spent with enabled wait psw loaded as idle time. */ | ||||||
| 	raw_write_seqcount_begin(&idle->seqcount); | 	raw_write_seqcount_begin(&idle->seqcount); | ||||||
| @ -123,7 +123,7 @@ void arch_cpu_idle_enter(void) | |||||||
| void arch_cpu_idle(void) | void arch_cpu_idle(void) | ||||||
| { | { | ||||||
| 	enabled_wait(); | 	enabled_wait(); | ||||||
| 	local_irq_enable(); | 	raw_local_irq_enable(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void arch_cpu_idle_exit(void) | void arch_cpu_idle_exit(void) | ||||||
|  | |||||||
| @ -22,7 +22,7 @@ static void (*sh_idle)(void); | |||||||
| void default_idle(void) | void default_idle(void) | ||||||
| { | { | ||||||
| 	set_bl_bit(); | 	set_bl_bit(); | ||||||
| 	local_irq_enable(); | 	raw_local_irq_enable(); | ||||||
| 	/* Isn't this racy ? */ | 	/* Isn't this racy ? */ | ||||||
| 	cpu_sleep(); | 	cpu_sleep(); | ||||||
| 	clear_bl_bit(); | 	clear_bl_bit(); | ||||||
|  | |||||||
| @ -50,7 +50,7 @@ static void pmc_leon_idle_fixup(void) | |||||||
| 	register unsigned int address = (unsigned int)leon3_irqctrl_regs; | 	register unsigned int address = (unsigned int)leon3_irqctrl_regs; | ||||||
| 
 | 
 | ||||||
| 	/* Interrupts need to be enabled to not hang the CPU */ | 	/* Interrupts need to be enabled to not hang the CPU */ | ||||||
| 	local_irq_enable(); | 	raw_local_irq_enable(); | ||||||
| 
 | 
 | ||||||
| 	__asm__ __volatile__ ( | 	__asm__ __volatile__ ( | ||||||
| 		"wr	%%g0, %%asr19\n" | 		"wr	%%g0, %%asr19\n" | ||||||
| @ -66,7 +66,7 @@ static void pmc_leon_idle_fixup(void) | |||||||
| static void pmc_leon_idle(void) | static void pmc_leon_idle(void) | ||||||
| { | { | ||||||
| 	/* Interrupts need to be enabled to not hang the CPU */ | 	/* Interrupts need to be enabled to not hang the CPU */ | ||||||
| 	local_irq_enable(); | 	raw_local_irq_enable(); | ||||||
| 
 | 
 | ||||||
| 	/* For systems without power-down, this will be no-op */ | 	/* For systems without power-down, this will be no-op */ | ||||||
| 	__asm__ __volatile__ ("wr	%g0, %asr19\n\t"); | 	__asm__ __volatile__ ("wr	%g0, %asr19\n\t"); | ||||||
|  | |||||||
| @ -74,7 +74,7 @@ void arch_cpu_idle(void) | |||||||
| { | { | ||||||
| 	if (sparc_idle) | 	if (sparc_idle) | ||||||
| 		(*sparc_idle)(); | 		(*sparc_idle)(); | ||||||
| 	local_irq_enable(); | 	raw_local_irq_enable(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /* XXX cli/sti -> local_irq_xxx here, check this works once SMP is fixed. */ | /* XXX cli/sti -> local_irq_xxx here, check this works once SMP is fixed. */ | ||||||
|  | |||||||
| @ -62,11 +62,11 @@ void arch_cpu_idle(void) | |||||||
| { | { | ||||||
| 	if (tlb_type != hypervisor) { | 	if (tlb_type != hypervisor) { | ||||||
| 		touch_nmi_watchdog(); | 		touch_nmi_watchdog(); | ||||||
| 		local_irq_enable(); | 		raw_local_irq_enable(); | ||||||
| 	} else { | 	} else { | ||||||
| 		unsigned long pstate; | 		unsigned long pstate; | ||||||
| 
 | 
 | ||||||
| 		local_irq_enable(); | 		raw_local_irq_enable(); | ||||||
| 
 | 
 | ||||||
|                 /* The sun4v sleeping code requires that we have PSTATE.IE cleared over
 |                 /* The sun4v sleeping code requires that we have PSTATE.IE cleared over
 | ||||||
|                  * the cpu sleep hypervisor call. |                  * the cpu sleep hypervisor call. | ||||||
|  | |||||||
| @ -217,7 +217,7 @@ void arch_cpu_idle(void) | |||||||
| { | { | ||||||
| 	cpu_tasks[current_thread_info()->cpu].pid = os_getpid(); | 	cpu_tasks[current_thread_info()->cpu].pid = os_getpid(); | ||||||
| 	um_idle_sleep(); | 	um_idle_sleep(); | ||||||
| 	local_irq_enable(); | 	raw_local_irq_enable(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| int __cant_sleep(void) { | int __cant_sleep(void) { | ||||||
|  | |||||||
| @ -88,8 +88,6 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx, | |||||||
| 
 | 
 | ||||||
| static inline void __sti_mwait(unsigned long eax, unsigned long ecx) | static inline void __sti_mwait(unsigned long eax, unsigned long ecx) | ||||||
| { | { | ||||||
| 	trace_hardirqs_on(); |  | ||||||
| 
 |  | ||||||
| 	mds_idle_clear_cpu_buffers(); | 	mds_idle_clear_cpu_buffers(); | ||||||
| 	/* "mwait %eax, %ecx;" */ | 	/* "mwait %eax, %ecx;" */ | ||||||
| 	asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" | 	asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" | ||||||
|  | |||||||
| @ -685,7 +685,7 @@ void arch_cpu_idle(void) | |||||||
|  */ |  */ | ||||||
| void __cpuidle default_idle(void) | void __cpuidle default_idle(void) | ||||||
| { | { | ||||||
| 	safe_halt(); | 	raw_safe_halt(); | ||||||
| } | } | ||||||
| #if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE) | #if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE) | ||||||
| EXPORT_SYMBOL(default_idle); | EXPORT_SYMBOL(default_idle); | ||||||
| @ -736,6 +736,8 @@ void stop_this_cpu(void *dummy) | |||||||
| /*
 | /*
 | ||||||
|  * AMD Erratum 400 aware idle routine. We handle it the same way as C3 power |  * AMD Erratum 400 aware idle routine. We handle it the same way as C3 power | ||||||
|  * states (local apic timer and TSC stop). |  * states (local apic timer and TSC stop). | ||||||
|  |  * | ||||||
|  |  * XXX this function is completely buggered vs RCU and tracing. | ||||||
|  */ |  */ | ||||||
| static void amd_e400_idle(void) | static void amd_e400_idle(void) | ||||||
| { | { | ||||||
| @ -757,9 +759,9 @@ static void amd_e400_idle(void) | |||||||
| 	 * The switch back from broadcast mode needs to be called with | 	 * The switch back from broadcast mode needs to be called with | ||||||
| 	 * interrupts disabled. | 	 * interrupts disabled. | ||||||
| 	 */ | 	 */ | ||||||
| 	local_irq_disable(); | 	raw_local_irq_disable(); | ||||||
| 	tick_broadcast_exit(); | 	tick_broadcast_exit(); | ||||||
| 	local_irq_enable(); | 	raw_local_irq_enable(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
| @ -801,9 +803,9 @@ static __cpuidle void mwait_idle(void) | |||||||
| 		if (!need_resched()) | 		if (!need_resched()) | ||||||
| 			__sti_mwait(0, 0); | 			__sti_mwait(0, 0); | ||||||
| 		else | 		else | ||||||
| 			local_irq_enable(); | 			raw_local_irq_enable(); | ||||||
| 	} else { | 	} else { | ||||||
| 		local_irq_enable(); | 		raw_local_irq_enable(); | ||||||
| 	} | 	} | ||||||
| 	__current_clr_polling(); | 	__current_clr_polling(); | ||||||
| } | } | ||||||
|  | |||||||
| @ -78,7 +78,7 @@ void __weak arch_cpu_idle_dead(void) { } | |||||||
| void __weak arch_cpu_idle(void) | void __weak arch_cpu_idle(void) | ||||||
| { | { | ||||||
| 	cpu_idle_force_poll = 1; | 	cpu_idle_force_poll = 1; | ||||||
| 	local_irq_enable(); | 	raw_local_irq_enable(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /**
 | /**
 | ||||||
| @ -94,9 +94,35 @@ void __cpuidle default_idle_call(void) | |||||||
| 
 | 
 | ||||||
| 		trace_cpu_idle(1, smp_processor_id()); | 		trace_cpu_idle(1, smp_processor_id()); | ||||||
| 		stop_critical_timings(); | 		stop_critical_timings(); | ||||||
|  | 
 | ||||||
|  | 		/*
 | ||||||
|  | 		 * arch_cpu_idle() is supposed to enable IRQs, however | ||||||
|  | 		 * we can't do that because of RCU and tracing. | ||||||
|  | 		 * | ||||||
|  | 		 * Trace IRQs enable here, then switch off RCU, and have | ||||||
|  | 		 * arch_cpu_idle() use raw_local_irq_enable(). Note that | ||||||
|  | 		 * rcu_idle_enter() relies on lockdep IRQ state, so switch that | ||||||
|  | 		 * last -- this is very similar to the entry code. | ||||||
|  | 		 */ | ||||||
|  | 		trace_hardirqs_on_prepare(); | ||||||
|  | 		lockdep_hardirqs_on_prepare(_THIS_IP_); | ||||||
| 		rcu_idle_enter(); | 		rcu_idle_enter(); | ||||||
|  | 		lockdep_hardirqs_on(_THIS_IP_); | ||||||
|  | 
 | ||||||
| 		arch_cpu_idle(); | 		arch_cpu_idle(); | ||||||
|  | 
 | ||||||
|  | 		/*
 | ||||||
|  | 		 * OK, so IRQs are enabled here, but RCU needs them disabled to | ||||||
|  | 		 * turn itself back on.. funny thing is that disabling IRQs | ||||||
|  | 		 * will cause tracing, which needs RCU. Jump through hoops to | ||||||
|  | 		 * make it 'work'. | ||||||
|  | 		 */ | ||||||
|  | 		raw_local_irq_disable(); | ||||||
|  | 		lockdep_hardirqs_off(_THIS_IP_); | ||||||
| 		rcu_idle_exit(); | 		rcu_idle_exit(); | ||||||
|  | 		lockdep_hardirqs_on(_THIS_IP_); | ||||||
|  | 		raw_local_irq_enable(); | ||||||
|  | 
 | ||||||
| 		start_critical_timings(); | 		start_critical_timings(); | ||||||
| 		trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); | 		trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); | ||||||
| 	} | 	} | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Peter Zijlstra
						Peter Zijlstra