mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-04 20:19:47 +08:00

Sometimes users also want to see average latency as well as histogram. Display latency statistics like avg, max, min at the end. $ sudo ./perf ftrace latency -ab -T synchronize_rcu -- ... # DURATION | COUNT | GRAPH | 0 - 1 us | 0 | | 1 - 2 us | 0 | | 2 - 4 us | 0 | | 4 - 8 us | 0 | | 8 - 16 us | 0 | | 16 - 32 us | 0 | | 32 - 64 us | 0 | | 64 - 128 us | 0 | | 128 - 256 us | 0 | | 256 - 512 us | 0 | | 512 - 1024 us | 0 | | 1 - 2 ms | 0 | | 2 - 4 ms | 0 | | 4 - 8 ms | 0 | | 8 - 16 ms | 1 | ##### | 16 - 32 ms | 7 | ######################################## | 32 - 64 ms | 0 | | 64 - 128 ms | 0 | | 128 - 256 ms | 0 | | 256 - 512 ms | 0 | | 512 - 1024 ms | 0 | | 1 - ... s | 0 | | # statistics (in usec) total time: 171832 avg time: 21479 max time: 30906 min time: 15869 count: 8 Committer testing: root@number:~# perf ftrace latency -nab --bucket-range 100 --max-latency 512 -T switch_mm_irqs_off sleep 1 # DURATION | COUNT | GRAPH | 0 - 100 ns | 314 | ## | 100 - 200 ns | 1843 | ############# | 200 - 300 ns | 1390 | ########## | 300 - 400 ns | 844 | ###### | 400 - 500 ns | 480 | ### | 500 - 512 ns | 315 | ## | 512 - ... ns | 16 | | # statistics (in nsec) total time: 2448936 avg time: 387 max time: 3285 min time: 82 count: 6328 root@number:~# Reviewed-by: James Clark <james.clark@linaro.org> Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: https://lore.kernel.org/r/20250107224352.1128669-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
168 lines
3.8 KiB
C
168 lines
3.8 KiB
C
#include <stdio.h>
|
|
#include <fcntl.h>
|
|
#include <stdint.h>
|
|
#include <stdlib.h>
|
|
|
|
#include <linux/err.h>
|
|
|
|
#include "util/ftrace.h"
|
|
#include "util/cpumap.h"
|
|
#include "util/thread_map.h"
|
|
#include "util/debug.h"
|
|
#include "util/evlist.h"
|
|
#include "util/bpf_counter.h"
|
|
#include "util/stat.h"
|
|
|
|
#include "util/bpf_skel/func_latency.skel.h"
|
|
|
|
static struct func_latency_bpf *skel;
|
|
|
|
int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
|
|
{
|
|
int fd, err;
|
|
int i, ncpus = 1, ntasks = 1;
|
|
struct filter_entry *func;
|
|
|
|
if (!list_is_singular(&ftrace->filters)) {
|
|
pr_err("ERROR: %s target function(s).\n",
|
|
list_empty(&ftrace->filters) ? "No" : "Too many");
|
|
return -1;
|
|
}
|
|
|
|
func = list_first_entry(&ftrace->filters, struct filter_entry, list);
|
|
|
|
skel = func_latency_bpf__open();
|
|
if (!skel) {
|
|
pr_err("Failed to open func latency skeleton\n");
|
|
return -1;
|
|
}
|
|
|
|
skel->rodata->bucket_range = ftrace->bucket_range;
|
|
skel->rodata->min_latency = ftrace->min_latency;
|
|
|
|
/* don't need to set cpu filter for system-wide mode */
|
|
if (ftrace->target.cpu_list) {
|
|
ncpus = perf_cpu_map__nr(ftrace->evlist->core.user_requested_cpus);
|
|
bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
|
|
skel->rodata->has_cpu = 1;
|
|
}
|
|
|
|
if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) {
|
|
ntasks = perf_thread_map__nr(ftrace->evlist->core.threads);
|
|
bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
|
|
skel->rodata->has_task = 1;
|
|
}
|
|
|
|
skel->rodata->use_nsec = ftrace->use_nsec;
|
|
|
|
set_max_rlimit();
|
|
|
|
err = func_latency_bpf__load(skel);
|
|
if (err) {
|
|
pr_err("Failed to load func latency skeleton\n");
|
|
goto out;
|
|
}
|
|
|
|
if (ftrace->target.cpu_list) {
|
|
u32 cpu;
|
|
u8 val = 1;
|
|
|
|
fd = bpf_map__fd(skel->maps.cpu_filter);
|
|
|
|
for (i = 0; i < ncpus; i++) {
|
|
cpu = perf_cpu_map__cpu(ftrace->evlist->core.user_requested_cpus, i).cpu;
|
|
bpf_map_update_elem(fd, &cpu, &val, BPF_ANY);
|
|
}
|
|
}
|
|
|
|
if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) {
|
|
u32 pid;
|
|
u8 val = 1;
|
|
|
|
fd = bpf_map__fd(skel->maps.task_filter);
|
|
|
|
for (i = 0; i < ntasks; i++) {
|
|
pid = perf_thread_map__pid(ftrace->evlist->core.threads, i);
|
|
bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
|
|
}
|
|
}
|
|
|
|
skel->bss->min = INT64_MAX;
|
|
|
|
skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
|
|
false, func->name);
|
|
if (IS_ERR(skel->links.func_begin)) {
|
|
pr_err("Failed to attach fentry program\n");
|
|
err = PTR_ERR(skel->links.func_begin);
|
|
goto out;
|
|
}
|
|
|
|
skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end,
|
|
true, func->name);
|
|
if (IS_ERR(skel->links.func_end)) {
|
|
pr_err("Failed to attach fexit program\n");
|
|
err = PTR_ERR(skel->links.func_end);
|
|
goto out;
|
|
}
|
|
|
|
/* XXX: we don't actually use this fd - just for poll() */
|
|
return open("/dev/null", O_RDONLY);
|
|
|
|
out:
|
|
return err;
|
|
}
|
|
|
|
int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused)
|
|
{
|
|
skel->bss->enabled = 1;
|
|
return 0;
|
|
}
|
|
|
|
int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
|
|
{
|
|
skel->bss->enabled = 0;
|
|
return 0;
|
|
}
|
|
|
|
int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
|
|
int buckets[], struct stats *stats)
|
|
{
|
|
int i, fd, err;
|
|
u32 idx;
|
|
u64 *hist;
|
|
int ncpus = cpu__max_cpu().cpu;
|
|
|
|
fd = bpf_map__fd(skel->maps.latency);
|
|
|
|
hist = calloc(ncpus, sizeof(*hist));
|
|
if (hist == NULL)
|
|
return -ENOMEM;
|
|
|
|
for (idx = 0; idx < NUM_BUCKET; idx++) {
|
|
err = bpf_map_lookup_elem(fd, &idx, hist);
|
|
if (err) {
|
|
buckets[idx] = 0;
|
|
continue;
|
|
}
|
|
|
|
for (i = 0; i < ncpus; i++)
|
|
buckets[idx] += hist[i];
|
|
}
|
|
|
|
if (skel->bss->count) {
|
|
stats->mean = skel->bss->total / skel->bss->count;
|
|
stats->n = skel->bss->count;
|
|
stats->max = skel->bss->max;
|
|
stats->min = skel->bss->min;
|
|
}
|
|
|
|
free(hist);
|
|
return 0;
|
|
}
|
|
|
|
int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused)
|
|
{
|
|
func_latency_bpf__destroy(skel);
|
|
return 0;
|
|
}
|