2
0
mirror of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-09-04 20:19:47 +08:00
linux/tools/perf/util/lock-contention.h
Namhyung Kim 91a5bffa56 perf lock contention: Handle slab objects in -L/--lock-filter option
This is to filter lock contention from specific slab objects only.
Like in the lock symbol output, we can use '&' prefix to filter slab
object names.

  root@virtme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -abl sleep 1
   contended   total wait     max wait     avg wait            address   symbol

           3     14.99 us     14.44 us      5.00 us   ffffffff851c0940   pack_mutex (mutex)
           2      2.75 us      2.56 us      1.38 us   ffff98d7031fb498   &task_struct (mutex)
           4      1.42 us       557 ns       355 ns   ffff98d706311400   &kmalloc-cg-512 (mutex)
           2       953 ns       714 ns       476 ns   ffffffff851c3620   delayed_uprobe_lock (mutex)
           1       929 ns       929 ns       929 ns   ffff98d7031fb538   &task_struct (mutex)
           3       561 ns       210 ns       187 ns   ffffffff84a8b3a0   text_mutex (mutex)
           1       479 ns       479 ns       479 ns   ffffffff851b4cf8   tracepoint_srcu_srcu_usage (mutex)
           2       320 ns       195 ns       160 ns   ffffffff851cf840   pcpu_alloc_mutex (mutex)
           1       212 ns       212 ns       212 ns   ffff98d7031784d8   &signal_cache (mutex)
           1       177 ns       177 ns       177 ns   ffffffff851b4c28   tracepoint_srcu_srcu_usage (mutex)

With the filter, it can show contentions from the task_struct only.

  root@virtme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -abl -L '&task_struct' sleep 1
   contended   total wait     max wait     avg wait            address   symbol

           2      1.97 us      1.71 us       987 ns   ffff98d7032fd658   &task_struct (mutex)
           1      1.20 us      1.20 us      1.20 us   ffff98d7032fd6f8   &task_struct (mutex)

It can work with other aggregation mode:

  root@virtme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -ab -L '&task_struct' sleep 1
   contended   total wait     max wait     avg wait         type   caller

           1     25.10 us     25.10 us     25.10 us        mutex   perf_event_exit_task+0x39
           1     21.60 us     21.60 us     21.60 us        mutex   futex_exit_release+0x21
           1      5.56 us      5.56 us      5.56 us        mutex   futex_exec_release+0x21

Committer testing:

  root@number:~# perf lock con -abl sleep 1
   contended   total wait     max wait     avg wait            address   symbol

           1     20.80 us     20.80 us     20.80 us   ffff9d417fbd65d0    (spinlock)
           8     12.85 us      2.41 us      1.61 us   ffff9d415eeb6a40   rq_lock (spinlock)
           1      2.55 us      2.55 us      2.55 us   ffff9d415f636a40   rq_lock (spinlock)
           7      1.92 us       840 ns       274 ns   ffff9d39c2cbc8c4    (spinlock)
           1      1.23 us      1.23 us      1.23 us   ffff9d415fb36a40   rq_lock (spinlock)
           2       928 ns       738 ns       464 ns   ffff9d39c1fa6660   &kmalloc-rnd-14-192 (rwlock)
           4       788 ns       252 ns       197 ns   ffffffffb8608a80   jiffies_lock (spinlock)
           1       304 ns       304 ns       304 ns   ffff9d39c2c979c4    (spinlock)
           1       216 ns       216 ns       216 ns   ffff9d3a0225c660   &kmalloc-rnd-14-192 (rwlock)
           1        89 ns        89 ns        89 ns   ffff9d3a0adbf3e0   &kmalloc-rnd-14-192 (rwlock)
           1        61 ns        61 ns        61 ns   ffff9d415f9b6a40   rq_lock (spinlock)
  root@number:~# uname -r
  6.13.0-rc2
  root@number:~#

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Chun-Tse Shao <ctshao@google.com>
Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Song Liu <song@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Link: https://lore.kernel.org/r/20241220060009.507297-5-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-12-23 13:53:08 -03:00

193 lines
4.3 KiB
C

// SPDX-License-Identifier: GPL-2.0
#ifndef PERF_LOCK_CONTENTION_H
#define PERF_LOCK_CONTENTION_H
#include <linux/list.h>
#include <linux/rbtree.h>
struct lock_filter {
int nr_types;
int nr_addrs;
int nr_syms;
int nr_cgrps;
int nr_slabs;
unsigned int *types;
unsigned long *addrs;
char **syms;
u64 *cgrps;
char **slabs;
};
struct lock_stat {
struct hlist_node hash_entry;
struct rb_node rb; /* used for sorting */
u64 addr; /* address of lockdep_map, used as ID */
char *name; /* for strcpy(), we cannot use const */
u64 *callstack;
unsigned int nr_acquire;
unsigned int nr_acquired;
unsigned int nr_contended;
unsigned int nr_release;
union {
unsigned int nr_readlock;
unsigned int flags;
};
unsigned int nr_trylock;
/* these times are in nano sec. */
u64 avg_wait_time;
u64 wait_time_total;
u64 wait_time_min;
u64 wait_time_max;
int broken; /* flag of blacklist */
int combined;
};
/*
* States of lock_seq_stat
*
* UNINITIALIZED is required for detecting first event of acquire.
* As the nature of lock events, there is no guarantee
* that the first event for the locks are acquire,
* it can be acquired, contended or release.
*/
#define SEQ_STATE_UNINITIALIZED 0 /* initial state */
#define SEQ_STATE_RELEASED 1
#define SEQ_STATE_ACQUIRING 2
#define SEQ_STATE_ACQUIRED 3
#define SEQ_STATE_READ_ACQUIRED 4
#define SEQ_STATE_CONTENDED 5
/*
* MAX_LOCK_DEPTH
* Imported from include/linux/sched.h.
* Should this be synchronized?
*/
#define MAX_LOCK_DEPTH 48
/* based on kernel/lockdep.c */
#define LOCKHASH_BITS 12
#define LOCKHASH_SIZE (1UL << LOCKHASH_BITS)
extern struct hlist_head *lockhash_table;
/*
* struct lock_seq_stat:
* Place to put on state of one lock sequence
* 1) acquire -> acquired -> release
* 2) acquire -> contended -> acquired -> release
* 3) acquire (with read or try) -> release
* 4) Are there other patterns?
*/
struct lock_seq_stat {
struct list_head list;
int state;
u64 prev_event_time;
u64 addr;
int read_count;
};
struct thread_stat {
struct rb_node rb;
u32 tid;
struct list_head seq_list;
};
/*
* CONTENTION_STACK_DEPTH
* Number of stack trace entries to find callers
*/
#define CONTENTION_STACK_DEPTH 8
/*
* CONTENTION_STACK_SKIP
* Number of stack trace entries to skip when finding callers.
* The first few entries belong to the locking implementation itself.
*/
#define CONTENTION_STACK_SKIP 4
/*
* flags for lock:contention_begin
* Imported from include/trace/events/lock.h.
*/
#define LCB_F_SPIN (1U << 0)
#define LCB_F_READ (1U << 1)
#define LCB_F_WRITE (1U << 2)
#define LCB_F_RT (1U << 3)
#define LCB_F_PERCPU (1U << 4)
#define LCB_F_MUTEX (1U << 5)
struct evlist;
struct machine;
struct target;
struct lock_contention_fails {
int task;
int stack;
int time;
int data;
};
struct lock_contention {
struct evlist *evlist;
struct target *target;
struct machine *machine;
struct hlist_head *result;
struct lock_filter *filters;
struct lock_contention_fails fails;
struct rb_root cgroups;
unsigned long map_nr_entries;
int max_stack;
int stack_skip;
int aggr_mode;
int owner;
int nr_filtered;
bool save_callstack;
};
struct option;
int parse_call_stack(const struct option *opt, const char *str, int unset);
bool needs_callstack(void);
struct lock_stat *lock_stat_find(u64 addr);
struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags);
bool match_callstack_filter(struct machine *machine, u64 *callstack, int max_stack_depth);
#ifdef HAVE_BPF_SKEL
int lock_contention_prepare(struct lock_contention *con);
int lock_contention_start(void);
int lock_contention_stop(void);
int lock_contention_read(struct lock_contention *con);
int lock_contention_finish(struct lock_contention *con);
#else /* !HAVE_BPF_SKEL */
static inline int lock_contention_prepare(struct lock_contention *con __maybe_unused)
{
return 0;
}
static inline int lock_contention_start(void) { return 0; }
static inline int lock_contention_stop(void) { return 0; }
static inline int lock_contention_finish(struct lock_contention *con __maybe_unused)
{
return 0;
}
static inline int lock_contention_read(struct lock_contention *con __maybe_unused)
{
return 0;
}
#endif /* HAVE_BPF_SKEL */
#endif /* PERF_LOCK_CONTENTION_H */