mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-03-27 18:06:53 +08:00
Merge tag 'perf-tools-fixes-for-v6.0-2022-08-27' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull perf tools fixes from Arnaldo Carvalho de Melo: - Fixup setup of weak groups when using 'perf stat --repeat', add a 'perf test' for it. - Fix memory leaks in 'perf sched record' detected with -fsanitize=address. - Fix build when PYTHON_CONFIG is user supplied. - Capitalize topdown metrics' names in 'perf stat', so that the output, sometimes parsed, matches the Intel SDM docs. - Make sure the documentation for the save_type filter about Intel systems with Arch LBR support (12th-Gen+ client or 4th-Gen Xeon+ server) reflects recent related kernel changes. - Fix 'perf record' man page formatting of description of support to hybrid systems. - Update arm64´s KVM header from the kernel sources. * tag 'perf-tools-fixes-for-v6.0-2022-08-27' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: perf stat: Capitalize topdown metrics' names perf docs: Update the documentation for the save_type filter perf sched: Fix memory leaks in __cmd_record detected with -fsanitize=address perf record: Fix manpage formatting of description of support to hybrid systems perf test: Stat test for repeat with a weak group perf stat: Clear evsel->reset_group for each stat run tools kvm headers arm64: Update KVM header from the kernel sources perf python: Fix build when PYTHON_CONFIG is user supplied
This commit is contained in:
@@ -75,9 +75,11 @@ struct kvm_regs {
|
||||
|
||||
/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
|
||||
#define KVM_ARM_DEVICE_TYPE_SHIFT 0
|
||||
#define KVM_ARM_DEVICE_TYPE_MASK (0xffff << KVM_ARM_DEVICE_TYPE_SHIFT)
|
||||
#define KVM_ARM_DEVICE_TYPE_MASK GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \
|
||||
KVM_ARM_DEVICE_TYPE_SHIFT)
|
||||
#define KVM_ARM_DEVICE_ID_SHIFT 16
|
||||
#define KVM_ARM_DEVICE_ID_MASK (0xffff << KVM_ARM_DEVICE_ID_SHIFT)
|
||||
#define KVM_ARM_DEVICE_ID_MASK GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \
|
||||
KVM_ARM_DEVICE_ID_SHIFT)
|
||||
|
||||
/* Supported device IDs */
|
||||
#define KVM_ARM_DEVICE_VGIC_V2 0
|
||||
|
||||
@@ -21,11 +21,6 @@ cat /sys/devices/cpu_atom/cpus
|
||||
|
||||
It indicates cpu0-cpu15 are core cpus and cpu16-cpu23 are atom cpus.
|
||||
|
||||
Quickstart
|
||||
|
||||
List hybrid event
|
||||
-----------------
|
||||
|
||||
As before, use perf-list to list the symbolic event.
|
||||
|
||||
perf list
|
||||
@@ -40,7 +35,6 @@ the event is belong to. Same event name but with different pmu can
|
||||
be supported.
|
||||
|
||||
Enable hybrid event with a specific pmu
|
||||
---------------------------------------
|
||||
|
||||
To enable a core only event or atom only event, following syntax is supported:
|
||||
|
||||
@@ -53,7 +47,6 @@ For example, count the 'cycles' event on core cpus.
|
||||
perf stat -e cpu_core/cycles/
|
||||
|
||||
Create two events for one hardware event automatically
|
||||
------------------------------------------------------
|
||||
|
||||
When creating one event and the event is available on both atom and core,
|
||||
two events are created automatically. One is for atom, the other is for
|
||||
@@ -132,7 +125,6 @@ For perf-stat result, it displays two events:
|
||||
The first 'cycles' is core event, the second 'cycles' is atom event.
|
||||
|
||||
Thread mode example:
|
||||
--------------------
|
||||
|
||||
perf-stat reports the scaled counts for hybrid event and with a percentage
|
||||
displayed. The percentage is the event's running time/enabling time.
|
||||
@@ -176,14 +168,12 @@ perf_event_attr:
|
||||
604,097,080 cpu_atom/cycles/ (99.57%)
|
||||
|
||||
perf-record:
|
||||
------------
|
||||
|
||||
If there is no '-e' specified in perf record, on hybrid platform,
|
||||
it creates two default 'cycles' and adds them to event list. One
|
||||
is for core, the other is for atom.
|
||||
|
||||
perf-stat:
|
||||
----------
|
||||
|
||||
If there is no '-e' specified in perf stat, on hybrid platform,
|
||||
besides of software events, following events are created and
|
||||
|
||||
@@ -397,6 +397,9 @@ following filters are defined:
|
||||
- abort_tx: only when the target is a hardware transaction abort
|
||||
- cond: conditional branches
|
||||
- save_type: save branch type during sampling in case binary is not available later
|
||||
For the platforms with Intel Arch LBR support (12th-Gen+ client or
|
||||
4th-Gen Xeon+ server), the save branch type is unconditionally enabled
|
||||
when the taken branch stack sampling is enabled.
|
||||
|
||||
+
|
||||
The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
|
||||
@@ -757,8 +760,6 @@ events in data directory files. Option specified with no or empty value
|
||||
defaults to CPU layout. Masks defined or provided by the option value are
|
||||
filtered through the mask provided by -C option.
|
||||
|
||||
include::intel-hybrid.txt[]
|
||||
|
||||
--debuginfod[=URLs]::
|
||||
Specify debuginfod URL to be used when cacheing perf.data binaries,
|
||||
it follows the same syntax as the DEBUGINFOD_URLS variable, like:
|
||||
@@ -778,6 +779,8 @@ include::intel-hybrid.txt[]
|
||||
only, as of now. So the applications built without the frame
|
||||
pointer might see bogus addresses.
|
||||
|
||||
include::intel-hybrid.txt[]
|
||||
|
||||
SEE ALSO
|
||||
--------
|
||||
linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1]
|
||||
|
||||
@@ -265,7 +265,7 @@ endif
|
||||
# defined. get-executable-or-default fails with an error if the first argument is supplied but
|
||||
# doesn't exist.
|
||||
override PYTHON_CONFIG := $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON_AUTO))
|
||||
override PYTHON := $(call get-executable-or-default,PYTHON,$(subst -config,,$(PYTHON_AUTO)))
|
||||
override PYTHON := $(call get-executable-or-default,PYTHON,$(subst -config,,$(PYTHON_CONFIG)))
|
||||
|
||||
grep-libs = $(filter -l%,$(1))
|
||||
strip-libs = $(filter-out -l%,$(1))
|
||||
|
||||
@@ -3355,7 +3355,8 @@ static bool schedstat_events_exposed(void)
|
||||
static int __cmd_record(int argc, const char **argv)
|
||||
{
|
||||
unsigned int rec_argc, i, j;
|
||||
const char **rec_argv;
|
||||
char **rec_argv;
|
||||
const char **rec_argv_copy;
|
||||
const char * const record_args[] = {
|
||||
"record",
|
||||
"-a",
|
||||
@@ -3384,6 +3385,7 @@ static int __cmd_record(int argc, const char **argv)
|
||||
ARRAY_SIZE(schedstat_args) : 0;
|
||||
|
||||
struct tep_event *waking_event;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* +2 for either "-e", "sched:sched_wakeup" or
|
||||
@@ -3391,14 +3393,18 @@ static int __cmd_record(int argc, const char **argv)
|
||||
*/
|
||||
rec_argc = ARRAY_SIZE(record_args) + 2 + schedstat_argc + argc - 1;
|
||||
rec_argv = calloc(rec_argc + 1, sizeof(char *));
|
||||
|
||||
if (rec_argv == NULL)
|
||||
return -ENOMEM;
|
||||
rec_argv_copy = calloc(rec_argc + 1, sizeof(char *));
|
||||
if (rec_argv_copy == NULL) {
|
||||
free(rec_argv);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(record_args); i++)
|
||||
rec_argv[i] = strdup(record_args[i]);
|
||||
|
||||
rec_argv[i++] = "-e";
|
||||
rec_argv[i++] = strdup("-e");
|
||||
waking_event = trace_event__tp_format("sched", "sched_waking");
|
||||
if (!IS_ERR(waking_event))
|
||||
rec_argv[i++] = strdup("sched:sched_waking");
|
||||
@@ -3409,11 +3415,19 @@ static int __cmd_record(int argc, const char **argv)
|
||||
rec_argv[i++] = strdup(schedstat_args[j]);
|
||||
|
||||
for (j = 1; j < (unsigned int)argc; j++, i++)
|
||||
rec_argv[i] = argv[j];
|
||||
rec_argv[i] = strdup(argv[j]);
|
||||
|
||||
BUG_ON(i != rec_argc);
|
||||
|
||||
return cmd_record(i, rec_argv);
|
||||
memcpy(rec_argv_copy, rec_argv, sizeof(char *) * rec_argc);
|
||||
ret = cmd_record(rec_argc, rec_argv_copy);
|
||||
|
||||
for (i = 0; i < rec_argc; i++)
|
||||
free(rec_argv[i]);
|
||||
free(rec_argv);
|
||||
free(rec_argv_copy);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int cmd_sched(int argc, const char **argv)
|
||||
|
||||
@@ -826,6 +826,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
|
||||
}
|
||||
|
||||
evlist__for_each_entry(evsel_list, counter) {
|
||||
counter->reset_group = false;
|
||||
if (bpf_counter__load(counter, &target))
|
||||
return -1;
|
||||
if (!evsel__is_bpf(counter))
|
||||
|
||||
@@ -28,6 +28,24 @@ test_stat_record_report() {
|
||||
echo "stat record and report test [Success]"
|
||||
}
|
||||
|
||||
test_stat_repeat_weak_groups() {
|
||||
echo "stat repeat weak groups test"
|
||||
if ! perf stat -e '{cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles}' \
|
||||
true 2>&1 | grep -q 'seconds time elapsed'
|
||||
then
|
||||
echo "stat repeat weak groups test [Skipped event parsing failed]"
|
||||
return
|
||||
fi
|
||||
if ! perf stat -r2 -e '{cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles}:W' \
|
||||
true > /dev/null 2>&1
|
||||
then
|
||||
echo "stat repeat weak groups test [Failed]"
|
||||
err=1
|
||||
return
|
||||
fi
|
||||
echo "stat repeat weak groups test [Success]"
|
||||
}
|
||||
|
||||
test_topdown_groups() {
|
||||
# Topdown events must be grouped with the slots event first. Test that
|
||||
# parse-events reorders this.
|
||||
@@ -75,6 +93,7 @@ test_topdown_weak_groups() {
|
||||
|
||||
test_default_stat
|
||||
test_stat_record_report
|
||||
test_stat_repeat_weak_groups
|
||||
test_topdown_groups
|
||||
test_topdown_weak_groups
|
||||
exit $err
|
||||
|
||||
@@ -1193,7 +1193,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
|
||||
&rsd);
|
||||
if (retiring > 0.7)
|
||||
color = PERF_COLOR_GREEN;
|
||||
print_metric(config, ctxp, color, "%8.1f%%", "retiring",
|
||||
print_metric(config, ctxp, color, "%8.1f%%", "Retiring",
|
||||
retiring * 100.);
|
||||
} else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) &&
|
||||
full_td(cpu_map_idx, st, &rsd)) {
|
||||
@@ -1202,7 +1202,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
|
||||
&rsd);
|
||||
if (fe_bound > 0.2)
|
||||
color = PERF_COLOR_RED;
|
||||
print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
|
||||
print_metric(config, ctxp, color, "%8.1f%%", "Frontend Bound",
|
||||
fe_bound * 100.);
|
||||
} else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) &&
|
||||
full_td(cpu_map_idx, st, &rsd)) {
|
||||
@@ -1211,7 +1211,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
|
||||
&rsd);
|
||||
if (be_bound > 0.2)
|
||||
color = PERF_COLOR_RED;
|
||||
print_metric(config, ctxp, color, "%8.1f%%", "backend bound",
|
||||
print_metric(config, ctxp, color, "%8.1f%%", "Backend Bound",
|
||||
be_bound * 100.);
|
||||
} else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) &&
|
||||
full_td(cpu_map_idx, st, &rsd)) {
|
||||
@@ -1220,7 +1220,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
|
||||
&rsd);
|
||||
if (bad_spec > 0.1)
|
||||
color = PERF_COLOR_RED;
|
||||
print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
|
||||
print_metric(config, ctxp, color, "%8.1f%%", "Bad Speculation",
|
||||
bad_spec * 100.);
|
||||
} else if (perf_stat_evsel__is(evsel, TOPDOWN_HEAVY_OPS) &&
|
||||
full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
|
||||
@@ -1234,13 +1234,13 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
|
||||
|
||||
if (retiring > 0.7 && heavy_ops > 0.1)
|
||||
color = PERF_COLOR_GREEN;
|
||||
print_metric(config, ctxp, color, "%8.1f%%", "heavy operations",
|
||||
print_metric(config, ctxp, color, "%8.1f%%", "Heavy Operations",
|
||||
heavy_ops * 100.);
|
||||
if (retiring > 0.7 && light_ops > 0.6)
|
||||
color = PERF_COLOR_GREEN;
|
||||
else
|
||||
color = NULL;
|
||||
print_metric(config, ctxp, color, "%8.1f%%", "light operations",
|
||||
print_metric(config, ctxp, color, "%8.1f%%", "Light Operations",
|
||||
light_ops * 100.);
|
||||
} else if (perf_stat_evsel__is(evsel, TOPDOWN_BR_MISPREDICT) &&
|
||||
full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
|
||||
@@ -1254,13 +1254,13 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
|
||||
|
||||
if (bad_spec > 0.1 && br_mis > 0.05)
|
||||
color = PERF_COLOR_RED;
|
||||
print_metric(config, ctxp, color, "%8.1f%%", "branch mispredict",
|
||||
print_metric(config, ctxp, color, "%8.1f%%", "Branch Mispredict",
|
||||
br_mis * 100.);
|
||||
if (bad_spec > 0.1 && m_clears > 0.05)
|
||||
color = PERF_COLOR_RED;
|
||||
else
|
||||
color = NULL;
|
||||
print_metric(config, ctxp, color, "%8.1f%%", "machine clears",
|
||||
print_metric(config, ctxp, color, "%8.1f%%", "Machine Clears",
|
||||
m_clears * 100.);
|
||||
} else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_LAT) &&
|
||||
full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
|
||||
@@ -1274,13 +1274,13 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
|
||||
|
||||
if (fe_bound > 0.2 && fetch_lat > 0.15)
|
||||
color = PERF_COLOR_RED;
|
||||
print_metric(config, ctxp, color, "%8.1f%%", "fetch latency",
|
||||
print_metric(config, ctxp, color, "%8.1f%%", "Fetch Latency",
|
||||
fetch_lat * 100.);
|
||||
if (fe_bound > 0.2 && fetch_bw > 0.1)
|
||||
color = PERF_COLOR_RED;
|
||||
else
|
||||
color = NULL;
|
||||
print_metric(config, ctxp, color, "%8.1f%%", "fetch bandwidth",
|
||||
print_metric(config, ctxp, color, "%8.1f%%", "Fetch Bandwidth",
|
||||
fetch_bw * 100.);
|
||||
} else if (perf_stat_evsel__is(evsel, TOPDOWN_MEM_BOUND) &&
|
||||
full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
|
||||
@@ -1294,13 +1294,13 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
|
||||
|
||||
if (be_bound > 0.2 && mem_bound > 0.2)
|
||||
color = PERF_COLOR_RED;
|
||||
print_metric(config, ctxp, color, "%8.1f%%", "memory bound",
|
||||
print_metric(config, ctxp, color, "%8.1f%%", "Memory Bound",
|
||||
mem_bound * 100.);
|
||||
if (be_bound > 0.2 && core_bound > 0.1)
|
||||
color = PERF_COLOR_RED;
|
||||
else
|
||||
color = NULL;
|
||||
print_metric(config, ctxp, color, "%8.1f%%", "Core bound",
|
||||
print_metric(config, ctxp, color, "%8.1f%%", "Core Bound",
|
||||
core_bound * 100.);
|
||||
} else if (evsel->metric_expr) {
|
||||
generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,
|
||||
|
||||
Reference in New Issue
Block a user