mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-04 20:19:47 +08:00 
			
		
		
		
	 1f8da406a9
			
		
	
	
		1f8da406a9
		
	
	
	
	
		
			
			Tiny SRCU readers can appear at task level, but also in interrupt and softirq handlers. Because Tiny SRCU is selected only in kernels built with CONFIG_SMP=n and CONFIG_PREEMPTION=n, it is not possible for a grace period to start while there is a non-task-level SRCU reader executing. This means that it does not make sense for __srcu_read_unlock() to awaken the Tiny SRCU grace period, because that can only happen when the grace period is waiting for one value of ->srcu_idx and __srcu_read_unlock() is ending the last reader for some other value of ->srcu_idx. After all, any such wakeup will be redundant. Worse yet, in some cases, such wakeups generate lockdep splats: ====================================================== WARNING: possible circular locking dependency detected 5.15.0-rc1+ #3758 Not tainted ------------------------------------------------------ rcu_torture_rea/53 is trying to acquire lock: ffffffff9514e6a8 (srcu_ctl.srcu_wq.lock){..-.}-{2:2}, at: xa/0x30 but task is already holding lock: ffff95c642479d80 (&p->pi_lock){-.-.}-{2:2}, at: _extend+0x370/0x400 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&p->pi_lock){-.-.}-{2:2}: _raw_spin_lock_irqsave+0x2f/0x50 try_to_wake_up+0x50/0x580 swake_up_locked.part.7+0xe/0x30 swake_up_one+0x22/0x30 rcutorture_one_extend+0x1b6/0x400 rcu_torture_one_read+0x290/0x5d0 rcu_torture_timer+0x1a/0x70 call_timer_fn+0xa6/0x230 run_timer_softirq+0x493/0x4c0 __do_softirq+0xc0/0x371 irq_exit+0x73/0x90 sysvec_apic_timer_interrupt+0x63/0x80 asm_sysvec_apic_timer_interrupt+0x12/0x20 default_idle+0xb/0x10 default_idle_call+0x5e/0x170 do_idle+0x18a/0x1f0 cpu_startup_entry+0xa/0x10 start_kernel+0x678/0x69f secondary_startup_64_no_verify+0xc2/0xcb -> #0 (srcu_ctl.srcu_wq.lock){..-.}-{2:2}: __lock_acquire+0x130c/0x2440 lock_acquire+0xc2/0x270 _raw_spin_lock_irqsave+0x2f/0x50 swake_up_one+0xa/0x30 rcutorture_one_extend+0x387/0x400 rcu_torture_one_read+0x290/0x5d0 rcu_torture_reader+0xac/0x200 kthread+0x12d/0x150 ret_from_fork+0x22/0x30 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&p->pi_lock); lock(srcu_ctl.srcu_wq.lock); lock(&p->pi_lock); lock(srcu_ctl.srcu_wq.lock); *** DEADLOCK *** 1 lock held by rcu_torture_rea/53: #0: ffff95c642479d80 (&p->pi_lock){-.-.}-{2:2}, at: _extend+0x370/0x400 stack backtrace: CPU: 0 PID: 53 Comm: rcu_torture_rea Not tainted 5.15.0-rc1+ Hardware name: Red Hat KVM/RHEL-AV, BIOS e_el8.5.0+746+bbd5d70c 04/01/2014 Call Trace: check_noncircular+0xfe/0x110 ? find_held_lock+0x2d/0x90 __lock_acquire+0x130c/0x2440 lock_acquire+0xc2/0x270 ? swake_up_one+0xa/0x30 ? find_held_lock+0x72/0x90 _raw_spin_lock_irqsave+0x2f/0x50 ? swake_up_one+0xa/0x30 swake_up_one+0xa/0x30 rcutorture_one_extend+0x387/0x400 rcu_torture_one_read+0x290/0x5d0 rcu_torture_reader+0xac/0x200 ? rcutorture_oom_notify+0xf0/0xf0 ? __kthread_parkme+0x61/0x90 ? rcu_torture_one_read+0x5d0/0x5d0 kthread+0x12d/0x150 ? set_kthread_struct+0x40/0x40 ret_from_fork+0x22/0x30 This is a false positive because there is only one CPU, and both locks are raw (non-preemptible) spinlocks. However, it is worthwhile getting rid of the redundant wakeup, which has the side effect of breaking the theoretical deadlock cycle. This commit therefore eliminates the redundant wakeups. Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
		
			
				
	
	
		
			273 lines
		
	
	
		
			7.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			273 lines
		
	
	
		
			7.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| // SPDX-License-Identifier: GPL-2.0+
 | |
| /*
 | |
|  * Sleepable Read-Copy Update mechanism for mutual exclusion,
 | |
|  *	tiny version for non-preemptible single-CPU use.
 | |
|  *
 | |
|  * Copyright (C) IBM Corporation, 2017
 | |
|  *
 | |
|  * Author: Paul McKenney <paulmck@linux.ibm.com>
 | |
|  */
 | |
| 
 | |
| #include <linux/export.h>
 | |
| #include <linux/mutex.h>
 | |
| #include <linux/preempt.h>
 | |
| #include <linux/rcupdate_wait.h>
 | |
| #include <linux/sched.h>
 | |
| #include <linux/delay.h>
 | |
| #include <linux/srcu.h>
 | |
| 
 | |
| #include <linux/rcu_node_tree.h>
 | |
| #include "rcu_segcblist.h"
 | |
| #include "rcu.h"
 | |
| 
 | |
| int rcu_scheduler_active __read_mostly;
 | |
| static LIST_HEAD(srcu_boot_list);
 | |
| static bool srcu_init_done;
 | |
| 
 | |
| static int init_srcu_struct_fields(struct srcu_struct *ssp)
 | |
| {
 | |
| 	ssp->srcu_lock_nesting[0] = 0;
 | |
| 	ssp->srcu_lock_nesting[1] = 0;
 | |
| 	init_swait_queue_head(&ssp->srcu_wq);
 | |
| 	ssp->srcu_cb_head = NULL;
 | |
| 	ssp->srcu_cb_tail = &ssp->srcu_cb_head;
 | |
| 	ssp->srcu_gp_running = false;
 | |
| 	ssp->srcu_gp_waiting = false;
 | |
| 	ssp->srcu_idx = 0;
 | |
| 	ssp->srcu_idx_max = 0;
 | |
| 	INIT_WORK(&ssp->srcu_work, srcu_drive_gp);
 | |
| 	INIT_LIST_HEAD(&ssp->srcu_work.entry);
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| #ifdef CONFIG_DEBUG_LOCK_ALLOC
 | |
| 
 | |
| int __init_srcu_struct(struct srcu_struct *ssp, const char *name,
 | |
| 		       struct lock_class_key *key)
 | |
| {
 | |
| 	/* Don't re-initialize a lock while it is held. */
 | |
| 	debug_check_no_locks_freed((void *)ssp, sizeof(*ssp));
 | |
| 	lockdep_init_map(&ssp->dep_map, name, key, 0);
 | |
| 	return init_srcu_struct_fields(ssp);
 | |
| }
 | |
| EXPORT_SYMBOL_GPL(__init_srcu_struct);
 | |
| 
 | |
| #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 | |
| 
 | |
| /*
 | |
|  * init_srcu_struct - initialize a sleep-RCU structure
 | |
|  * @ssp: structure to initialize.
 | |
|  *
 | |
|  * Must invoke this on a given srcu_struct before passing that srcu_struct
 | |
|  * to any other function.  Each srcu_struct represents a separate domain
 | |
|  * of SRCU protection.
 | |
|  */
 | |
| int init_srcu_struct(struct srcu_struct *ssp)
 | |
| {
 | |
| 	return init_srcu_struct_fields(ssp);
 | |
| }
 | |
| EXPORT_SYMBOL_GPL(init_srcu_struct);
 | |
| 
 | |
| #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 | |
| 
 | |
| /*
 | |
|  * cleanup_srcu_struct - deconstruct a sleep-RCU structure
 | |
|  * @ssp: structure to clean up.
 | |
|  *
 | |
|  * Must invoke this after you are finished using a given srcu_struct that
 | |
|  * was initialized via init_srcu_struct(), else you leak memory.
 | |
|  */
 | |
| void cleanup_srcu_struct(struct srcu_struct *ssp)
 | |
| {
 | |
| 	WARN_ON(ssp->srcu_lock_nesting[0] || ssp->srcu_lock_nesting[1]);
 | |
| 	flush_work(&ssp->srcu_work);
 | |
| 	WARN_ON(ssp->srcu_gp_running);
 | |
| 	WARN_ON(ssp->srcu_gp_waiting);
 | |
| 	WARN_ON(ssp->srcu_cb_head);
 | |
| 	WARN_ON(&ssp->srcu_cb_head != ssp->srcu_cb_tail);
 | |
| 	WARN_ON(ssp->srcu_idx != ssp->srcu_idx_max);
 | |
| 	WARN_ON(ssp->srcu_idx & 0x1);
 | |
| }
 | |
| EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
 | |
| 
 | |
| /*
 | |
|  * Removes the count for the old reader from the appropriate element of
 | |
|  * the srcu_struct.
 | |
|  */
 | |
| void __srcu_read_unlock(struct srcu_struct *ssp, int idx)
 | |
| {
 | |
| 	int newval = READ_ONCE(ssp->srcu_lock_nesting[idx]) - 1;
 | |
| 
 | |
| 	WRITE_ONCE(ssp->srcu_lock_nesting[idx], newval);
 | |
| 	if (!newval && READ_ONCE(ssp->srcu_gp_waiting) && in_task())
 | |
| 		swake_up_one(&ssp->srcu_wq);
 | |
| }
 | |
| EXPORT_SYMBOL_GPL(__srcu_read_unlock);
 | |
| 
 | |
| /*
 | |
|  * Workqueue handler to drive one grace period and invoke any callbacks
 | |
|  * that become ready as a result.  Single-CPU and !PREEMPTION operation
 | |
|  * means that we get away with murder on synchronization.  ;-)
 | |
|  */
 | |
| void srcu_drive_gp(struct work_struct *wp)
 | |
| {
 | |
| 	int idx;
 | |
| 	struct rcu_head *lh;
 | |
| 	struct rcu_head *rhp;
 | |
| 	struct srcu_struct *ssp;
 | |
| 
 | |
| 	ssp = container_of(wp, struct srcu_struct, srcu_work);
 | |
| 	if (ssp->srcu_gp_running || USHORT_CMP_GE(ssp->srcu_idx, READ_ONCE(ssp->srcu_idx_max)))
 | |
| 		return; /* Already running or nothing to do. */
 | |
| 
 | |
| 	/* Remove recently arrived callbacks and wait for readers. */
 | |
| 	WRITE_ONCE(ssp->srcu_gp_running, true);
 | |
| 	local_irq_disable();
 | |
| 	lh = ssp->srcu_cb_head;
 | |
| 	ssp->srcu_cb_head = NULL;
 | |
| 	ssp->srcu_cb_tail = &ssp->srcu_cb_head;
 | |
| 	local_irq_enable();
 | |
| 	idx = (ssp->srcu_idx & 0x2) / 2;
 | |
| 	WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1);
 | |
| 	WRITE_ONCE(ssp->srcu_gp_waiting, true);  /* srcu_read_unlock() wakes! */
 | |
| 	swait_event_exclusive(ssp->srcu_wq, !READ_ONCE(ssp->srcu_lock_nesting[idx]));
 | |
| 	WRITE_ONCE(ssp->srcu_gp_waiting, false); /* srcu_read_unlock() cheap. */
 | |
| 	WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1);
 | |
| 
 | |
| 	/* Invoke the callbacks we removed above. */
 | |
| 	while (lh) {
 | |
| 		rhp = lh;
 | |
| 		lh = lh->next;
 | |
| 		local_bh_disable();
 | |
| 		rhp->func(rhp);
 | |
| 		local_bh_enable();
 | |
| 	}
 | |
| 
 | |
| 	/*
 | |
| 	 * Enable rescheduling, and if there are more callbacks,
 | |
| 	 * reschedule ourselves.  This can race with a call_srcu()
 | |
| 	 * at interrupt level, but the ->srcu_gp_running checks will
 | |
| 	 * straighten that out.
 | |
| 	 */
 | |
| 	WRITE_ONCE(ssp->srcu_gp_running, false);
 | |
| 	if (USHORT_CMP_LT(ssp->srcu_idx, READ_ONCE(ssp->srcu_idx_max)))
 | |
| 		schedule_work(&ssp->srcu_work);
 | |
| }
 | |
| EXPORT_SYMBOL_GPL(srcu_drive_gp);
 | |
| 
 | |
| static void srcu_gp_start_if_needed(struct srcu_struct *ssp)
 | |
| {
 | |
| 	unsigned short cookie;
 | |
| 
 | |
| 	cookie = get_state_synchronize_srcu(ssp);
 | |
| 	if (USHORT_CMP_GE(READ_ONCE(ssp->srcu_idx_max), cookie))
 | |
| 		return;
 | |
| 	WRITE_ONCE(ssp->srcu_idx_max, cookie);
 | |
| 	if (!READ_ONCE(ssp->srcu_gp_running)) {
 | |
| 		if (likely(srcu_init_done))
 | |
| 			schedule_work(&ssp->srcu_work);
 | |
| 		else if (list_empty(&ssp->srcu_work.entry))
 | |
| 			list_add(&ssp->srcu_work.entry, &srcu_boot_list);
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Enqueue an SRCU callback on the specified srcu_struct structure,
 | |
|  * initiating grace-period processing if it is not already running.
 | |
|  */
 | |
| void call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp,
 | |
| 	       rcu_callback_t func)
 | |
| {
 | |
| 	unsigned long flags;
 | |
| 
 | |
| 	rhp->func = func;
 | |
| 	rhp->next = NULL;
 | |
| 	local_irq_save(flags);
 | |
| 	*ssp->srcu_cb_tail = rhp;
 | |
| 	ssp->srcu_cb_tail = &rhp->next;
 | |
| 	local_irq_restore(flags);
 | |
| 	srcu_gp_start_if_needed(ssp);
 | |
| }
 | |
| EXPORT_SYMBOL_GPL(call_srcu);
 | |
| 
 | |
| /*
 | |
|  * synchronize_srcu - wait for prior SRCU read-side critical-section completion
 | |
|  */
 | |
| void synchronize_srcu(struct srcu_struct *ssp)
 | |
| {
 | |
| 	struct rcu_synchronize rs;
 | |
| 
 | |
| 	init_rcu_head_on_stack(&rs.head);
 | |
| 	init_completion(&rs.completion);
 | |
| 	call_srcu(ssp, &rs.head, wakeme_after_rcu);
 | |
| 	wait_for_completion(&rs.completion);
 | |
| 	destroy_rcu_head_on_stack(&rs.head);
 | |
| }
 | |
| EXPORT_SYMBOL_GPL(synchronize_srcu);
 | |
| 
 | |
| /*
 | |
|  * get_state_synchronize_srcu - Provide an end-of-grace-period cookie
 | |
|  */
 | |
| unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp)
 | |
| {
 | |
| 	unsigned long ret;
 | |
| 
 | |
| 	barrier();
 | |
| 	ret = (READ_ONCE(ssp->srcu_idx) + 3) & ~0x1;
 | |
| 	barrier();
 | |
| 	return ret & USHRT_MAX;
 | |
| }
 | |
| EXPORT_SYMBOL_GPL(get_state_synchronize_srcu);
 | |
| 
 | |
| /*
 | |
|  * start_poll_synchronize_srcu - Provide cookie and start grace period
 | |
|  *
 | |
|  * The difference between this and get_state_synchronize_srcu() is that
 | |
|  * this function ensures that the poll_state_synchronize_srcu() will
 | |
|  * eventually return the value true.
 | |
|  */
 | |
| unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp)
 | |
| {
 | |
| 	unsigned long ret = get_state_synchronize_srcu(ssp);
 | |
| 
 | |
| 	srcu_gp_start_if_needed(ssp);
 | |
| 	return ret;
 | |
| }
 | |
| EXPORT_SYMBOL_GPL(start_poll_synchronize_srcu);
 | |
| 
 | |
| /*
 | |
|  * poll_state_synchronize_srcu - Has cookie's grace period ended?
 | |
|  */
 | |
| bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie)
 | |
| {
 | |
| 	bool ret = USHORT_CMP_GE(READ_ONCE(ssp->srcu_idx), cookie);
 | |
| 
 | |
| 	barrier();
 | |
| 	return ret;
 | |
| }
 | |
| EXPORT_SYMBOL_GPL(poll_state_synchronize_srcu);
 | |
| 
 | |
| /* Lockdep diagnostics.  */
 | |
| void __init rcu_scheduler_starting(void)
 | |
| {
 | |
| 	rcu_scheduler_active = RCU_SCHEDULER_RUNNING;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Queue work for srcu_struct structures with early boot callbacks.
 | |
|  * The work won't actually execute until the workqueue initialization
 | |
|  * phase that takes place after the scheduler starts.
 | |
|  */
 | |
| void __init srcu_init(void)
 | |
| {
 | |
| 	struct srcu_struct *ssp;
 | |
| 
 | |
| 	srcu_init_done = true;
 | |
| 	while (!list_empty(&srcu_boot_list)) {
 | |
| 		ssp = list_first_entry(&srcu_boot_list,
 | |
| 				      struct srcu_struct, srcu_work.entry);
 | |
| 		list_del_init(&ssp->srcu_work.entry);
 | |
| 		schedule_work(&ssp->srcu_work);
 | |
| 	}
 | |
| }
 |