mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-04 20:19:47 +08:00

Sometimes users also want to see average latency as well as histogram. Display latency statistics like avg, max, min at the end. $ sudo ./perf ftrace latency -ab -T synchronize_rcu -- ... # DURATION | COUNT | GRAPH | 0 - 1 us | 0 | | 1 - 2 us | 0 | | 2 - 4 us | 0 | | 4 - 8 us | 0 | | 8 - 16 us | 0 | | 16 - 32 us | 0 | | 32 - 64 us | 0 | | 64 - 128 us | 0 | | 128 - 256 us | 0 | | 256 - 512 us | 0 | | 512 - 1024 us | 0 | | 1 - 2 ms | 0 | | 2 - 4 ms | 0 | | 4 - 8 ms | 0 | | 8 - 16 ms | 1 | ##### | 16 - 32 ms | 7 | ######################################## | 32 - 64 ms | 0 | | 64 - 128 ms | 0 | | 128 - 256 ms | 0 | | 256 - 512 ms | 0 | | 512 - 1024 ms | 0 | | 1 - ... s | 0 | | # statistics (in usec) total time: 171832 avg time: 21479 max time: 30906 min time: 15869 count: 8 Committer testing: root@number:~# perf ftrace latency -nab --bucket-range 100 --max-latency 512 -T switch_mm_irqs_off sleep 1 # DURATION | COUNT | GRAPH | 0 - 100 ns | 314 | ## | 100 - 200 ns | 1843 | ############# | 200 - 300 ns | 1390 | ########## | 300 - 400 ns | 844 | ###### | 400 - 500 ns | 480 | ### | 500 - 512 ns | 315 | ## | 512 - ... ns | 16 | | # statistics (in nsec) total time: 2448936 avg time: 387 max time: 3285 min time: 82 count: 6328 root@number:~# Reviewed-by: James Clark <james.clark@linaro.org> Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: https://lore.kernel.org/r/20250107224352.1128669-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
162 lines
3.1 KiB
C
162 lines
3.1 KiB
C
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
|
// Copyright (c) 2021 Google
|
|
#include "vmlinux.h"
|
|
#include <bpf/bpf_helpers.h>
|
|
#include <bpf/bpf_tracing.h>
|
|
|
|
// This should be in sync with "util/ftrace.h"
|
|
#define NUM_BUCKET 22
|
|
|
|
struct {
|
|
__uint(type, BPF_MAP_TYPE_HASH);
|
|
__uint(key_size, sizeof(__u64));
|
|
__uint(value_size, sizeof(__u64));
|
|
__uint(max_entries, 10000);
|
|
} functime SEC(".maps");
|
|
|
|
struct {
|
|
__uint(type, BPF_MAP_TYPE_HASH);
|
|
__uint(key_size, sizeof(__u32));
|
|
__uint(value_size, sizeof(__u8));
|
|
__uint(max_entries, 1);
|
|
} cpu_filter SEC(".maps");
|
|
|
|
struct {
|
|
__uint(type, BPF_MAP_TYPE_HASH);
|
|
__uint(key_size, sizeof(__u32));
|
|
__uint(value_size, sizeof(__u8));
|
|
__uint(max_entries, 1);
|
|
} task_filter SEC(".maps");
|
|
|
|
struct {
|
|
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
|
|
__uint(key_size, sizeof(__u32));
|
|
__uint(value_size, sizeof(__u64));
|
|
__uint(max_entries, NUM_BUCKET);
|
|
} latency SEC(".maps");
|
|
|
|
|
|
int enabled = 0;
|
|
|
|
// stats
|
|
__s64 total;
|
|
__s64 count;
|
|
__s64 max;
|
|
__s64 min;
|
|
|
|
const volatile int has_cpu = 0;
|
|
const volatile int has_task = 0;
|
|
const volatile int use_nsec = 0;
|
|
const volatile unsigned int bucket_range;
|
|
const volatile unsigned int min_latency;
|
|
const volatile unsigned int max_latency;
|
|
|
|
SEC("kprobe/func")
|
|
int BPF_PROG(func_begin)
|
|
{
|
|
__u64 key, now;
|
|
|
|
if (!enabled)
|
|
return 0;
|
|
|
|
key = bpf_get_current_pid_tgid();
|
|
|
|
if (has_cpu) {
|
|
__u32 cpu = bpf_get_smp_processor_id();
|
|
__u8 *ok;
|
|
|
|
ok = bpf_map_lookup_elem(&cpu_filter, &cpu);
|
|
if (!ok)
|
|
return 0;
|
|
}
|
|
|
|
if (has_task) {
|
|
__u32 pid = key & 0xffffffff;
|
|
__u8 *ok;
|
|
|
|
ok = bpf_map_lookup_elem(&task_filter, &pid);
|
|
if (!ok)
|
|
return 0;
|
|
}
|
|
|
|
now = bpf_ktime_get_ns();
|
|
|
|
// overwrite timestamp for nested functions
|
|
bpf_map_update_elem(&functime, &key, &now, BPF_ANY);
|
|
return 0;
|
|
}
|
|
|
|
SEC("kretprobe/func")
|
|
int BPF_PROG(func_end)
|
|
{
|
|
__u64 tid;
|
|
__u64 *start;
|
|
__u64 cmp_base = use_nsec ? 1 : 1000;
|
|
|
|
if (!enabled)
|
|
return 0;
|
|
|
|
tid = bpf_get_current_pid_tgid();
|
|
|
|
start = bpf_map_lookup_elem(&functime, &tid);
|
|
if (start) {
|
|
__s64 delta = bpf_ktime_get_ns() - *start;
|
|
__u32 key = 0;
|
|
__u64 *hist;
|
|
|
|
bpf_map_delete_elem(&functime, &tid);
|
|
|
|
if (delta < 0)
|
|
return 0;
|
|
|
|
if (bucket_range != 0) {
|
|
delta /= cmp_base;
|
|
|
|
if (min_latency > 0) {
|
|
if (delta > min_latency)
|
|
delta -= min_latency;
|
|
else
|
|
goto do_lookup;
|
|
}
|
|
|
|
// Less than 1 unit (ms or ns), or, in the future,
|
|
// than the min latency desired.
|
|
if (delta > 0) { // 1st entry: [ 1 unit .. bucket_range units )
|
|
// clang 12 doesn't like s64 / u32 division
|
|
key = (__u64)delta / bucket_range + 1;
|
|
if (key >= NUM_BUCKET ||
|
|
delta >= max_latency - min_latency)
|
|
key = NUM_BUCKET - 1;
|
|
}
|
|
|
|
delta += min_latency;
|
|
goto do_lookup;
|
|
}
|
|
// calculate index using delta
|
|
for (key = 0; key < (NUM_BUCKET - 1); key++) {
|
|
if (delta < (cmp_base << key))
|
|
break;
|
|
}
|
|
|
|
do_lookup:
|
|
hist = bpf_map_lookup_elem(&latency, &key);
|
|
if (!hist)
|
|
return 0;
|
|
|
|
*hist += 1;
|
|
|
|
if (bucket_range == 0)
|
|
delta /= cmp_base;
|
|
|
|
__sync_fetch_and_add(&total, delta);
|
|
__sync_fetch_and_add(&count, 1);
|
|
|
|
if (delta > max)
|
|
max = delta;
|
|
if (delta < min)
|
|
min = delta;
|
|
}
|
|
|
|
return 0;
|
|
}
|