mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-04 20:19:47 +08:00
When compiled with libunwind, perf does some preparatory work when
processing side-band events. This is not needed when report actually
don't unwind dwarf callchains, so it's disabled with
dwarf_callchain_users bool.
However we could move that check to higher level and shield more
unwanted code for normal report processing, giving us following speed up
on kernel build profile:
Before:
$ perf record make -j40
...
$ ll ../../perf.data
-rw-------. 1 jolsa jolsa 461783932 Apr 26 09:11 perf.data
$ perf stat -e cycles:u,instructions:u perf report -i perf.data > out
Performance counter stats for 'perf report -i perf.data':
78,669,920,155 cycles:u
99,076,431,951 instructions:u # 1.26 insn per cycle
55.382823668 seconds time elapsed
27.512341000 seconds user
27.712871000 seconds sys
After:
$ perf stat -e cycles:u,instructions:u perf report -i perf.data > out
Performance counter stats for 'perf report -i perf.data':
59,626,798,904 cycles:u
88,583,575,849 instructions:u # 1.49 insn per cycle
21.296935559 seconds time elapsed
20.010191000 seconds user
1.202935000 seconds sys
The speed is higher with profile having many side-band events,
because these trigger libunwind preparatory code.
This does not apply for perf compiled with libdw for dwarf unwind,
only for build with libunwind.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20190426073804.17238-1-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
96 lines
2.3 KiB
C
96 lines
2.3 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#include "unwind.h"
|
|
#include "map.h"
|
|
#include "thread.h"
|
|
#include "session.h"
|
|
#include "debug.h"
|
|
#include "env.h"
|
|
#include "callchain.h"
|
|
|
|
struct unwind_libunwind_ops __weak *local_unwind_libunwind_ops;
|
|
struct unwind_libunwind_ops __weak *x86_32_unwind_libunwind_ops;
|
|
struct unwind_libunwind_ops __weak *arm64_unwind_libunwind_ops;
|
|
|
|
static void unwind__register_ops(struct thread *thread,
|
|
struct unwind_libunwind_ops *ops)
|
|
{
|
|
thread->unwind_libunwind_ops = ops;
|
|
}
|
|
|
|
int unwind__prepare_access(struct thread *thread, struct map *map,
|
|
bool *initialized)
|
|
{
|
|
const char *arch;
|
|
enum dso_type dso_type;
|
|
struct unwind_libunwind_ops *ops = local_unwind_libunwind_ops;
|
|
int err;
|
|
|
|
if (!dwarf_callchain_users)
|
|
return 0;
|
|
|
|
if (thread->addr_space) {
|
|
pr_debug("unwind: thread map already set, dso=%s\n",
|
|
map->dso->name);
|
|
if (initialized)
|
|
*initialized = true;
|
|
return 0;
|
|
}
|
|
|
|
/* env->arch is NULL for live-mode (i.e. perf top) */
|
|
if (!thread->mg->machine->env || !thread->mg->machine->env->arch)
|
|
goto out_register;
|
|
|
|
dso_type = dso__type(map->dso, thread->mg->machine);
|
|
if (dso_type == DSO__TYPE_UNKNOWN)
|
|
return 0;
|
|
|
|
arch = perf_env__arch(thread->mg->machine->env);
|
|
|
|
if (!strcmp(arch, "x86")) {
|
|
if (dso_type != DSO__TYPE_64BIT)
|
|
ops = x86_32_unwind_libunwind_ops;
|
|
} else if (!strcmp(arch, "arm64") || !strcmp(arch, "arm")) {
|
|
if (dso_type == DSO__TYPE_64BIT)
|
|
ops = arm64_unwind_libunwind_ops;
|
|
}
|
|
|
|
if (!ops) {
|
|
pr_err("unwind: target platform=%s is not supported\n", arch);
|
|
return 0;
|
|
}
|
|
out_register:
|
|
unwind__register_ops(thread, ops);
|
|
|
|
err = thread->unwind_libunwind_ops->prepare_access(thread);
|
|
if (initialized)
|
|
*initialized = err ? false : true;
|
|
return err;
|
|
}
|
|
|
|
void unwind__flush_access(struct thread *thread)
|
|
{
|
|
if (!dwarf_callchain_users)
|
|
return;
|
|
|
|
if (thread->unwind_libunwind_ops)
|
|
thread->unwind_libunwind_ops->flush_access(thread);
|
|
}
|
|
|
|
void unwind__finish_access(struct thread *thread)
|
|
{
|
|
if (!dwarf_callchain_users)
|
|
return;
|
|
|
|
if (thread->unwind_libunwind_ops)
|
|
thread->unwind_libunwind_ops->finish_access(thread);
|
|
}
|
|
|
|
int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
|
|
struct thread *thread,
|
|
struct perf_sample *data, int max_stack)
|
|
{
|
|
if (thread->unwind_libunwind_ops)
|
|
return thread->unwind_libunwind_ops->get_entries(cb, arg, thread, data, max_stack);
|
|
return 0;
|
|
}
|