2
0
mirror of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-09-04 20:19:47 +08:00
linux/tools/perf/util/bpf_skel/func_latency.bpf.c
Namhyung Kim 86a12b92a9 perf ftrace: Display latency statistics at the end
Sometimes users also want to see average latency as well as histogram.

Display latency statistics like avg, max, min at the end.

  $ sudo ./perf ftrace latency -ab -T synchronize_rcu -- ...
  #   DURATION     |      COUNT | GRAPH                                          |
       0 - 1    us |          0 |                                                |
       1 - 2    us |          0 |                                                |
       2 - 4    us |          0 |                                                |
       4 - 8    us |          0 |                                                |
       8 - 16   us |          0 |                                                |
      16 - 32   us |          0 |                                                |
      32 - 64   us |          0 |                                                |
      64 - 128  us |          0 |                                                |
     128 - 256  us |          0 |                                                |
     256 - 512  us |          0 |                                                |
     512 - 1024 us |          0 |                                                |
       1 - 2    ms |          0 |                                                |
       2 - 4    ms |          0 |                                                |
       4 - 8    ms |          0 |                                                |
       8 - 16   ms |          1 | #####                                          |
      16 - 32   ms |          7 | ########################################       |
      32 - 64   ms |          0 |                                                |
      64 - 128  ms |          0 |                                                |
     128 - 256  ms |          0 |                                                |
     256 - 512  ms |          0 |                                                |
     512 - 1024 ms |          0 |                                                |
       1 - ...   s |          0 |                                                |

  # statistics  (in usec)
    total time:               171832
      avg time:                21479
      max time:                30906
      min time:                15869
         count:                    8

Committer testing:

  root@number:~# perf ftrace latency -nab --bucket-range 100 --max-latency 512 -T switch_mm_irqs_off sleep 1
  #   DURATION     |      COUNT | GRAPH                                          |
       0 -  100 ns |        314 | ##                                             |
     100 -  200 ns |       1843 | #############                                  |
     200 -  300 ns |       1390 | ##########                                     |
     300 -  400 ns |        844 | ######                                         |
     400 -  500 ns |        480 | ###                                            |
     500 -  512 ns |        315 | ##                                             |
     512 -  ... ns |         16 |                                                |

  # statistics  (in nsec)
    total time:              2448936
      avg time:                  387
      max time:                 3285
      min time:                   82
         count:                 6328
  root@number:~#

Reviewed-by: James Clark <james.clark@linaro.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20250107224352.1128669-1-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2025-01-08 17:20:42 -03:00

162 lines
3.1 KiB
C

// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
// Copyright (c) 2021 Google
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
// This should be in sync with "util/ftrace.h"
#define NUM_BUCKET 22
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(key_size, sizeof(__u64));
__uint(value_size, sizeof(__u64));
__uint(max_entries, 10000);
} functime SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(__u8));
__uint(max_entries, 1);
} cpu_filter SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(__u8));
__uint(max_entries, 1);
} task_filter SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(__u64));
__uint(max_entries, NUM_BUCKET);
} latency SEC(".maps");
int enabled = 0;
// stats
__s64 total;
__s64 count;
__s64 max;
__s64 min;
const volatile int has_cpu = 0;
const volatile int has_task = 0;
const volatile int use_nsec = 0;
const volatile unsigned int bucket_range;
const volatile unsigned int min_latency;
const volatile unsigned int max_latency;
SEC("kprobe/func")
int BPF_PROG(func_begin)
{
__u64 key, now;
if (!enabled)
return 0;
key = bpf_get_current_pid_tgid();
if (has_cpu) {
__u32 cpu = bpf_get_smp_processor_id();
__u8 *ok;
ok = bpf_map_lookup_elem(&cpu_filter, &cpu);
if (!ok)
return 0;
}
if (has_task) {
__u32 pid = key & 0xffffffff;
__u8 *ok;
ok = bpf_map_lookup_elem(&task_filter, &pid);
if (!ok)
return 0;
}
now = bpf_ktime_get_ns();
// overwrite timestamp for nested functions
bpf_map_update_elem(&functime, &key, &now, BPF_ANY);
return 0;
}
SEC("kretprobe/func")
int BPF_PROG(func_end)
{
__u64 tid;
__u64 *start;
__u64 cmp_base = use_nsec ? 1 : 1000;
if (!enabled)
return 0;
tid = bpf_get_current_pid_tgid();
start = bpf_map_lookup_elem(&functime, &tid);
if (start) {
__s64 delta = bpf_ktime_get_ns() - *start;
__u32 key = 0;
__u64 *hist;
bpf_map_delete_elem(&functime, &tid);
if (delta < 0)
return 0;
if (bucket_range != 0) {
delta /= cmp_base;
if (min_latency > 0) {
if (delta > min_latency)
delta -= min_latency;
else
goto do_lookup;
}
// Less than 1 unit (ms or ns), or, in the future,
// than the min latency desired.
if (delta > 0) { // 1st entry: [ 1 unit .. bucket_range units )
// clang 12 doesn't like s64 / u32 division
key = (__u64)delta / bucket_range + 1;
if (key >= NUM_BUCKET ||
delta >= max_latency - min_latency)
key = NUM_BUCKET - 1;
}
delta += min_latency;
goto do_lookup;
}
// calculate index using delta
for (key = 0; key < (NUM_BUCKET - 1); key++) {
if (delta < (cmp_base << key))
break;
}
do_lookup:
hist = bpf_map_lookup_elem(&latency, &key);
if (!hist)
return 0;
*hist += 1;
if (bucket_range == 0)
delta /= cmp_base;
__sync_fetch_and_add(&total, delta);
__sync_fetch_and_add(&count, 1);
if (delta > max)
max = delta;
if (delta < min)
min = delta;
}
return 0;
}