locking: Move common qspinlock helpers to a private header

Move qspinlock helper functions that encode, decode tail word, set and clear the pending and locked bits, and other miscellaneous definitions and macros to a private header. To this end, create a qspinlock.h header file in kernel/locking. Subsequent commits will introduce a modified qspinlock slow path function, thus moving shared code to a private header will help minimize unnecessary code duplication. Reviewed-by: Barret Rhoden <brho@google.com> Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Link: https://lore.kernel.org/r/20250316040541.108729-3-memxor@gmail.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2025-09-04 20:19:47 +08:00 · 2025-03-15 21:05:18 -07:00 · 2025-03-15 21:05:18 -07:00 · ac08f68f1f
commit ac08f68f1f
parent 8707d1eedc
2 changed files with 206 additions and 188 deletions
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@ -25,8 +25,9 @@
 #include <trace/events/lock.h>
 /*
- * Include queued spinlock statistics code
+ * Include queued spinlock definitions and statistics code
 */
 #include "qspinlock.h"
 #include "qspinlock_stat.h"
 /*
@ -67,36 +68,6 @@
 */
 #include "mcs_spinlock.h"
 #define MAX_NODES	4
 /*
 * On 64-bit architectures, the mcs_spinlock structure will be 16 bytes in
 * size and four of them will fit nicely in one 64-byte cacheline. For
 * pvqspinlock, however, we need more space for extra data. To accommodate
 * that, we insert two more long words to pad it up to 32 bytes. IOW, only
 * two of them can fit in a cacheline in this case. That is OK as it is rare
 * to have more than 2 levels of slowpath nesting in actual use. We don't
 * want to penalize pvqspinlocks to optimize for a rare case in native
 * qspinlocks.
 */
 struct qnode {
 	struct mcs_spinlock mcs;
 #ifdef CONFIG_PARAVIRT_SPINLOCKS
 	long reserved[2];
 #endif
 };
 /*
 * The pending bit spinning loop count.
 * This heuristic is used to limit the number of lockword accesses
 * made by atomic_cond_read_relaxed when waiting for the lock to
 * transition out of the "== _Q_PENDING_VAL" state. We don't spin
 * indefinitely because there's no guarantee that we'll make forward
 * progress.
 */
 #ifndef _Q_PENDING_LOOPS
 #define _Q_PENDING_LOOPS	1
 #endif
 /*
 * Per-CPU queue node structures; we can never have more than 4 nested
@ -106,161 +77,7 @@ struct qnode {
 *
 * PV doubles the storage and uses the second cacheline for PV state.
 */
-static DEFINE_PER_CPU_ALIGNED(struct qnode, qnodes[MAX_NODES]);
+static DEFINE_PER_CPU_ALIGNED(struct qnode, qnodes[_Q_MAX_NODES]);
 /*
 * We must be able to distinguish between no-tail and the tail at 0:0,
 * therefore increment the cpu number by one.
 */
 static inline __pure u32 encode_tail(int cpu, int idx)
 {
 	u32 tail;
 	tail  = (cpu + 1) << _Q_TAIL_CPU_OFFSET;
 	tail |= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */
 	return tail;
 }
 static inline __pure struct mcs_spinlock *decode_tail(u32 tail)
 {
 	int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1;
 	int idx = (tail &  _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
 	return per_cpu_ptr(&qnodes[idx].mcs, cpu);
 }
 static inline __pure
 struct mcs_spinlock *grab_mcs_node(struct mcs_spinlock *base, int idx)
 {
 	return &((struct qnode *)base + idx)->mcs;
 }
 #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
 #if _Q_PENDING_BITS == 8
 /**
 * clear_pending - clear the pending bit.
 * @lock: Pointer to queued spinlock structure
 *
 * *,1,* -> *,0,*
 */
 static __always_inline void clear_pending(struct qspinlock *lock)
 {
 	WRITE_ONCE(lock->pending, 0);
 }
 /**
 * clear_pending_set_locked - take ownership and clear the pending bit.
 * @lock: Pointer to queued spinlock structure
 *
 * *,1,0 -> *,0,1
 *
 * Lock stealing is not allowed if this function is used.
 */
 static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
 {
 	WRITE_ONCE(lock->locked_pending, _Q_LOCKED_VAL);
 }
 /*
 * xchg_tail - Put in the new queue tail code word & retrieve previous one
 * @lock : Pointer to queued spinlock structure
 * @tail : The new queue tail code word
 * Return: The previous queue tail code word
 *
 * xchg(lock, tail), which heads an address dependency
 *
 * p,*,* -> n,*,* ; prev = xchg(lock, node)
 */
 static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
 {
 	/*
 	 * We can use relaxed semantics since the caller ensures that the
 	 * MCS node is properly initialized before updating the tail.
 	 */
 	return (u32)xchg_relaxed(&lock->tail,
 				 tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
 }
 #else /* _Q_PENDING_BITS == 8 */
 /**
 * clear_pending - clear the pending bit.
 * @lock: Pointer to queued spinlock structure
 *
 * *,1,* -> *,0,*
 */
 static __always_inline void clear_pending(struct qspinlock *lock)
 {
 	atomic_andnot(_Q_PENDING_VAL, &lock->val);
 }
 /**
 * clear_pending_set_locked - take ownership and clear the pending bit.
 * @lock: Pointer to queued spinlock structure
 *
 * *,1,0 -> *,0,1
 */
 static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
 {
 	atomic_add(-_Q_PENDING_VAL + _Q_LOCKED_VAL, &lock->val);
 }
 /**
 * xchg_tail - Put in the new queue tail code word & retrieve previous one
 * @lock : Pointer to queued spinlock structure
 * @tail : The new queue tail code word
 * Return: The previous queue tail code word
 *
 * xchg(lock, tail)
 *
 * p,*,* -> n,*,* ; prev = xchg(lock, node)
 */
 static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
 {
 	u32 old, new;
 	old = atomic_read(&lock->val);
 	do {
 		new = (old & _Q_LOCKED_PENDING_MASK) | tail;
 		/*
 		 * We can use relaxed semantics since the caller ensures that
 		 * the MCS node is properly initialized before updating the
 		 * tail.
 		 */
 	} while (!atomic_try_cmpxchg_relaxed(&lock->val, &old, new));
 	return old;
 }
 #endif /* _Q_PENDING_BITS == 8 */
 /**
 * queued_fetch_set_pending_acquire - fetch the whole lock value and set pending
 * @lock : Pointer to queued spinlock structure
 * Return: The previous lock value
 *
 * *,*,* -> *,1,*
 */
 #ifndef queued_fetch_set_pending_acquire
 static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lock)
 {
 	return atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val);
 }
 #endif
 /**
 * set_locked - Set the lock bit and own the lock
 * @lock: Pointer to queued spinlock structure
 *
 * *,*,0 -> *,0,1
 */
 static __always_inline void set_locked(struct qspinlock *lock)
 {
 	WRITE_ONCE(lock->locked, _Q_LOCKED_VAL);
 }
 /*
 * Generate the native code for queued_spin_unlock_slowpath(); provide NOPs for
@ -410,7 +227,7 @@ pv_queue:
 	 * any MCS node. This is not the most elegant solution, but is
 	 * simple enough.
 	 */
-	if (unlikely(idx >= MAX_NODES)) {
+	if (unlikely(idx >= _Q_MAX_NODES)) {
 		lockevent_inc(lock_no_node);
 		while (!queued_spin_trylock(lock))
 			cpu_relax();
@ -465,7 +282,7 @@ pv_queue:
 	 * head of the waitqueue.
 	 */
 	if (old & _Q_TAIL_MASK) {
-		prev = decode_tail(old);
+		prev = decode_tail(old, qnodes);
 		/* Link @node into the waitqueue. */
 		WRITE_ONCE(prev->next, node);
--- a/kernel/locking/qspinlock.h
+++ b/kernel/locking/qspinlock.h
@ -0,0 +1,201 @@
 /* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
 * Queued spinlock defines
 *
 * This file contains macro definitions and functions shared between different
 * qspinlock slow path implementations.
 */
 #ifndef __LINUX_QSPINLOCK_H
 #define __LINUX_QSPINLOCK_H
 #include <asm-generic/percpu.h>
 #include <linux/percpu-defs.h>
 #include <asm-generic/qspinlock.h>
 #include <asm-generic/mcs_spinlock.h>
 #define _Q_MAX_NODES	4
 /*
 * The pending bit spinning loop count.
 * This heuristic is used to limit the number of lockword accesses
 * made by atomic_cond_read_relaxed when waiting for the lock to
 * transition out of the "== _Q_PENDING_VAL" state. We don't spin
 * indefinitely because there's no guarantee that we'll make forward
 * progress.
 */
 #ifndef _Q_PENDING_LOOPS
 #define _Q_PENDING_LOOPS	1
 #endif
 /*
 * On 64-bit architectures, the mcs_spinlock structure will be 16 bytes in
 * size and four of them will fit nicely in one 64-byte cacheline. For
 * pvqspinlock, however, we need more space for extra data. To accommodate
 * that, we insert two more long words to pad it up to 32 bytes. IOW, only
 * two of them can fit in a cacheline in this case. That is OK as it is rare
 * to have more than 2 levels of slowpath nesting in actual use. We don't
 * want to penalize pvqspinlocks to optimize for a rare case in native
 * qspinlocks.
 */
 struct qnode {
 	struct mcs_spinlock mcs;
 #ifdef CONFIG_PARAVIRT_SPINLOCKS
 	long reserved[2];
 #endif
 };
 /*
 * We must be able to distinguish between no-tail and the tail at 0:0,
 * therefore increment the cpu number by one.
 */
 static inline __pure u32 encode_tail(int cpu, int idx)
 {
 	u32 tail;
 	tail  = (cpu + 1) << _Q_TAIL_CPU_OFFSET;
 	tail |= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */
 	return tail;
 }
 static inline __pure struct mcs_spinlock *decode_tail(u32 tail,
 						      struct qnode __percpu *qnodes)
 {
 	int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1;
 	int idx = (tail &  _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
 	return per_cpu_ptr(&qnodes[idx].mcs, cpu);
 }
 static inline __pure
 struct mcs_spinlock *grab_mcs_node(struct mcs_spinlock *base, int idx)
 {
 	return &((struct qnode *)base + idx)->mcs;
 }
 #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
 #if _Q_PENDING_BITS == 8
 /**
 * clear_pending - clear the pending bit.
 * @lock: Pointer to queued spinlock structure
 *
 * *,1,* -> *,0,*
 */
 static __always_inline void clear_pending(struct qspinlock *lock)
 {
 	WRITE_ONCE(lock->pending, 0);
 }
 /**
 * clear_pending_set_locked - take ownership and clear the pending bit.
 * @lock: Pointer to queued spinlock structure
 *
 * *,1,0 -> *,0,1
 *
 * Lock stealing is not allowed if this function is used.
 */
 static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
 {
 	WRITE_ONCE(lock->locked_pending, _Q_LOCKED_VAL);
 }
 /*
 * xchg_tail - Put in the new queue tail code word & retrieve previous one
 * @lock : Pointer to queued spinlock structure
 * @tail : The new queue tail code word
 * Return: The previous queue tail code word
 *
 * xchg(lock, tail), which heads an address dependency
 *
 * p,*,* -> n,*,* ; prev = xchg(lock, node)
 */
 static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
 {
 	/*
 	 * We can use relaxed semantics since the caller ensures that the
 	 * MCS node is properly initialized before updating the tail.
 	 */
 	return (u32)xchg_relaxed(&lock->tail,
 				 tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
 }
 #else /* _Q_PENDING_BITS == 8 */
 /**
 * clear_pending - clear the pending bit.
 * @lock: Pointer to queued spinlock structure
 *
 * *,1,* -> *,0,*
 */
 static __always_inline void clear_pending(struct qspinlock *lock)
 {
 	atomic_andnot(_Q_PENDING_VAL, &lock->val);
 }
 /**
 * clear_pending_set_locked - take ownership and clear the pending bit.
 * @lock: Pointer to queued spinlock structure
 *
 * *,1,0 -> *,0,1
 */
 static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
 {
 	atomic_add(-_Q_PENDING_VAL + _Q_LOCKED_VAL, &lock->val);
 }
 /**
 * xchg_tail - Put in the new queue tail code word & retrieve previous one
 * @lock : Pointer to queued spinlock structure
 * @tail : The new queue tail code word
 * Return: The previous queue tail code word
 *
 * xchg(lock, tail)
 *
 * p,*,* -> n,*,* ; prev = xchg(lock, node)
 */
 static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
 {
 	u32 old, new;
 	old = atomic_read(&lock->val);
 	do {
 		new = (old & _Q_LOCKED_PENDING_MASK) | tail;
 		/*
 		 * We can use relaxed semantics since the caller ensures that
 		 * the MCS node is properly initialized before updating the
 		 * tail.
 		 */
 	} while (!atomic_try_cmpxchg_relaxed(&lock->val, &old, new));
 	return old;
 }
 #endif /* _Q_PENDING_BITS == 8 */
 /**
 * queued_fetch_set_pending_acquire - fetch the whole lock value and set pending
 * @lock : Pointer to queued spinlock structure
 * Return: The previous lock value
 *
 * *,*,* -> *,1,*
 */
 #ifndef queued_fetch_set_pending_acquire
 static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lock)
 {
 	return atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val);
 }
 #endif
 /**
 * set_locked - Set the lock bit and own the lock
 * @lock: Pointer to queued spinlock structure
 *
 * *,*,0 -> *,0,1
 */
 static __always_inline void set_locked(struct qspinlock *lock)
 {
 	WRITE_ONCE(lock->locked, _Q_LOCKED_VAL);
 }
 #endif /* __LINUX_QSPINLOCK_H */