mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-04 20:19:47 +08:00

perf report/top/annotate TUI: - Accept the left arrow key as a Zoom out if done on the first column. - Show if source code toggle status in title, to help spotting bugs with the various disassemblers (capstone, llvm, objdump). - Provide feedback on unhandled hotkeys. Build: - Better inform when certain features are not available with warnings in the build process and in 'perf version --build-options' or 'perf -vv'. perf record: - Improve the --off-cpu code by synthesizing events for switch-out -> switch-in intervals using a BPF program. This can be fine tuned using a --off-cpu-thresh knob. perf report: - Add 'tgid' sort key. perf mem/c2c: - Add 'op', 'cache', 'snoop', 'dtlb' output fields. - Add support for 'ldlat' on AMD IBS (Instruction Based Sampling). perf ftrace: - Use process/session specific trace settings instead of messing with the global ftrace knobs. perf trace: - Implement syscall summary in BPF. - Support --summary-mode=cgroup. - Always print return value for syscalls returning a pid. - The rseq and set_robust_list don't return a pid, just -errno. perf lock contention: - Symbolize zone->lock using BTF. - Add -J/--inject-delay option to estimate impact on application performance by optimization of kernel locking behavior. perf stat: - Improve hybrid support for the NMI watchdog warning. Symbol resolution: - Handle 'u' and 'l' symbols in /proc/kallsyms, resolving some Rust symbols. - Improve Rust demangler. Hardware tracing: Intel PT: - Fix PEBS-via-PT data_src. - Do not default to recording all switch events. - Fix pattern matching with python3 on the SQL viewer script. arm64: - Fixups for the hip08 hha PMU. Vendor events: - Update Intel events/metrics files for alderlake, alderlaken, arrowlake, bonnell, broadwell, broadwellde, broadwellx, cascadelakex, clearwaterforest, elkhartlake, emeraldrapids, grandridge, graniterapids, haswell, haswellx, icelake, icelakex, ivybridge, ivytown, jaketown, lunarlake, meteorlake, nehalemep, nehalemex, rocketlake, sandybridge, sapphirerapids, sierraforest, skylake, skylakex, snowridgex, tigerlake, westmereep-dp, westmereep-sp, westmereep-sx. python support: - Add support for event counts in the python binding, add a counting.py example. perf list: - Display the PMU name associated with a perf metric in JSON. perf test: - Hybrid improvements for metric value validation test. - Fix LBR test by ignoring idle task. - Add AMD IBS sw filter ana d'ldlat' tests. - Add 'perf trace --summary-mode=cgroup' test. - Add tests for the various language symbol demanglers. Miscellaneous. - Allow specifying the cpu an event will be tied using '-e event/cpu=N/'. - Sync various headers with the kernel sources. - Add annotations to use clang's -Wthread-safety and fix some problems it detected. - Make dump_stack() use perf's symbol resolution to provide better backtraces. - Intel TPEBS support cleanups and fixes. TPEBS stands for Timed PEBS (Precision Event-Based Sampling), that adds timing info, the retirement latency of instructions. - Various memory allocation (some detected by ASAN) and reference counting fixes. - Add a 8-byte aligned PERF_RECORD_COMPRESSED2 to replace PERF_RECORD_COMPRESSED. - Skip unsupported event types in perf.data files, don't stop when finding one. - Improve lookups using hashmaps and binary searches. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> -----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQR2GiIUctdOfX2qHhGyPKLppCJ+JwUCaD9ViwAKCRCyPKLppCJ+ JzOfAQDXlukhPQyuJ4j1ie0x1QO4jalloMbG1Bkp3hn6yjxafAD9Ha5wr+dwnAj4 FfxOVqua29r8Htn4aGahXZ0nnlVp9Ac= =bwgD -----END PGP SIGNATURE----- Merge tag 'perf-tools-for-v6.16-1-2025-06-03' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools Pull perf tools updates from Arnaldo Carvalho de Melo: "perf report/top/annotate TUI: - Accept the left arrow key as a Zoom out if done on the first column - Show if source code toggle status in title, to help spotting bugs with the various disassemblers (capstone, llvm, objdump) - Provide feedback on unhandled hotkeys Build: - Better inform when certain features are not available with warnings in the build process and in 'perf version --build-options' or 'perf -vv' perf record: - Improve the --off-cpu code by synthesizing events for switch-out -> switch-in intervals using a BPF program. This can be fine tuned using a --off-cpu-thresh knob perf report: - Add 'tgid' sort key perf mem/c2c: - Add 'op', 'cache', 'snoop', 'dtlb' output fields - Add support for 'ldlat' on AMD IBS (Instruction Based Sampling) perf ftrace: - Use process/session specific trace settings instead of messing with the global ftrace knobs perf trace: - Implement syscall summary in BPF - Support --summary-mode=cgroup - Always print return value for syscalls returning a pid - The rseq and set_robust_list don't return a pid, just -errno perf lock contention: - Symbolize zone->lock using BTF - Add -J/--inject-delay option to estimate impact on application performance by optimization of kernel locking behavior perf stat: - Improve hybrid support for the NMI watchdog warning Symbol resolution: - Handle 'u' and 'l' symbols in /proc/kallsyms, resolving some Rust symbols - Improve Rust demangler Hardware tracing: Intel PT: - Fix PEBS-via-PT data_src - Do not default to recording all switch events - Fix pattern matching with python3 on the SQL viewer script arm64: - Fixups for the hip08 hha PMU Vendor events: - Update Intel events/metrics files for alderlake, alderlaken, arrowlake, bonnell, broadwell, broadwellde, broadwellx, cascadelakex, clearwaterforest, elkhartlake, emeraldrapids, grandridge, graniterapids, haswell, haswellx, icelake, icelakex, ivybridge, ivytown, jaketown, lunarlake, meteorlake, nehalemep, nehalemex, rocketlake, sandybridge, sapphirerapids, sierraforest, skylake, skylakex, snowridgex, tigerlake, westmereep-dp, westmereep-sp, westmereep-sx python support: - Add support for event counts in the python binding, add a counting.py example perf list: - Display the PMU name associated with a perf metric in JSON perf test: - Hybrid improvements for metric value validation test - Fix LBR test by ignoring idle task - Add AMD IBS sw filter ana d'ldlat' tests - Add 'perf trace --summary-mode=cgroup' test - Add tests for the various language symbol demanglers Miscellaneous: - Allow specifying the cpu an event will be tied using '-e event/cpu=N/' - Sync various headers with the kernel sources - Add annotations to use clang's -Wthread-safety and fix some problems it detected - Make dump_stack() use perf's symbol resolution to provide better backtraces - Intel TPEBS support cleanups and fixes. TPEBS stands for Timed PEBS (Precision Event-Based Sampling), that adds timing info, the retirement latency of instructions - Various memory allocation (some detected by ASAN) and reference counting fixes - Add a 8-byte aligned PERF_RECORD_COMPRESSED2 to replace PERF_RECORD_COMPRESSED - Skip unsupported event types in perf.data files, don't stop when finding one - Improve lookups using hashmaps and binary searches" * tag 'perf-tools-for-v6.16-1-2025-06-03' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools: (206 commits) perf callchain: Always populate the addr_location map when adding IP perf lock contention: Reject more than 10ms delays for safety perf trace: Set errpid to false for rseq and set_robust_list perf symbol: Move demangling code out of symbol-elf.c perf trace: Always print return value for syscalls returning a pid perf script: Print PERF_AUX_FLAG_COLLISION flag perf mem: Show absolute percent in mem_stat output perf mem: Display sort order only if it's available perf mem: Describe overhead calculation in brief perf record: Fix incorrect --user-regs comments Revert "perf thread: Ensure comm_lock held for comm_list" perf test trace_summary: Skip --bpf-summary tests if no libbpf perf test intel-pt: Skip jitdump test if no libelf perf intel-tpebs: Avoid race when evlist is being deleted perf test demangle-java: Don't segv if demangling fails perf symbol: Fix use-after-free in filename__read_build_id perf pmu: Avoid segv for missing name/alias_name in wildcarding perf machine: Factor creating a "live" machine out of dwarf-unwind perf test: Add AMD IBS sw filter test perf mem: Count L2 HITM for c2c statistic ...
391 lines
12 KiB
C
391 lines
12 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* AMD specific. Provide textual annotation for IBS raw sample data.
|
|
*/
|
|
|
|
#include <unistd.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <inttypes.h>
|
|
|
|
#include <linux/string.h>
|
|
#include "../../arch/x86/include/asm/amd/ibs.h"
|
|
|
|
#include "debug.h"
|
|
#include "session.h"
|
|
#include "evlist.h"
|
|
#include "sample-raw.h"
|
|
#include "util/sample.h"
|
|
|
|
static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type;
|
|
static bool zen4_ibs_extensions;
|
|
static bool ldlat_cap;
|
|
static bool dtlb_pgsize_cap;
|
|
|
|
static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg)
|
|
{
|
|
const char * const ic_miss_strs[] = {
|
|
" IcMiss 0",
|
|
" IcMiss 1",
|
|
};
|
|
const char * const l1tlb_pgsz_strs[] = {
|
|
" L1TlbPgSz 4KB",
|
|
" L1TlbPgSz 2MB",
|
|
" L1TlbPgSz 1GB",
|
|
" L1TlbPgSz RESERVED"
|
|
};
|
|
const char * const l1tlb_pgsz_strs_erratum1347[] = {
|
|
" L1TlbPgSz 4KB",
|
|
" L1TlbPgSz 16KB",
|
|
" L1TlbPgSz 2MB",
|
|
" L1TlbPgSz 1GB"
|
|
};
|
|
const char *ic_miss_str = NULL;
|
|
const char *l1tlb_pgsz_str = NULL;
|
|
char l3_miss_str[sizeof(" L3MissOnly _ FetchOcMiss _ FetchL3Miss _")] = "";
|
|
|
|
if (cpu_family == 0x19 && cpu_model < 0x10) {
|
|
/*
|
|
* Erratum #1238 workaround is to ignore MSRC001_1030[IbsIcMiss]
|
|
* Erratum #1347 workaround is to use table provided in erratum
|
|
*/
|
|
if (reg.phy_addr_valid)
|
|
l1tlb_pgsz_str = l1tlb_pgsz_strs_erratum1347[reg.l1tlb_pgsz];
|
|
} else {
|
|
if (reg.phy_addr_valid)
|
|
l1tlb_pgsz_str = l1tlb_pgsz_strs[reg.l1tlb_pgsz];
|
|
ic_miss_str = ic_miss_strs[reg.ic_miss];
|
|
}
|
|
|
|
if (zen4_ibs_extensions) {
|
|
snprintf(l3_miss_str, sizeof(l3_miss_str),
|
|
" L3MissOnly %d FetchOcMiss %d FetchL3Miss %d",
|
|
reg.l3_miss_only, reg.fetch_oc_miss, reg.fetch_l3_miss);
|
|
}
|
|
|
|
printf("ibs_fetch_ctl:\t%016llx MaxCnt %7d Cnt %7d Lat %5d En %d Val %d Comp %d%s "
|
|
"PhyAddrValid %d%s L1TlbMiss %d L2TlbMiss %d RandEn %d%s%s\n",
|
|
reg.val, reg.fetch_maxcnt << 4, reg.fetch_cnt << 4, reg.fetch_lat,
|
|
reg.fetch_en, reg.fetch_val, reg.fetch_comp, ic_miss_str ? : "",
|
|
reg.phy_addr_valid, l1tlb_pgsz_str ? : "", reg.l1tlb_miss, reg.l2tlb_miss,
|
|
reg.rand_en, reg.fetch_comp ? (reg.fetch_l2_miss ? " L2Miss 1" : " L2Miss 0") : "",
|
|
l3_miss_str);
|
|
}
|
|
|
|
static void pr_ic_ibs_extd_ctl(union ic_ibs_extd_ctl reg)
|
|
{
|
|
printf("ic_ibs_ext_ctl:\t%016llx IbsItlbRefillLat %3d\n", reg.val, reg.itlb_refill_lat);
|
|
}
|
|
|
|
static void pr_ibs_op_ctl(union ibs_op_ctl reg)
|
|
{
|
|
char l3_miss_only[sizeof(" L3MissOnly _")] = "";
|
|
char ldlat[sizeof(" LdLatThrsh __ LdLatEn _")] = "";
|
|
|
|
if (zen4_ibs_extensions)
|
|
snprintf(l3_miss_only, sizeof(l3_miss_only), " L3MissOnly %d", reg.l3_miss_only);
|
|
|
|
if (ldlat_cap) {
|
|
snprintf(ldlat, sizeof(ldlat), " LdLatThrsh %2d LdLatEn %d",
|
|
reg.ldlat_thrsh, reg.ldlat_en);
|
|
}
|
|
|
|
printf("ibs_op_ctl:\t%016llx MaxCnt %9d%s En %d Val %d CntCtl %d=%s CurCnt %9d%s\n",
|
|
reg.val, ((reg.opmaxcnt_ext << 16) | reg.opmaxcnt) << 4, l3_miss_only,
|
|
reg.op_en, reg.op_val, reg.cnt_ctl,
|
|
reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt, ldlat);
|
|
}
|
|
|
|
static void pr_ibs_op_data(union ibs_op_data reg)
|
|
{
|
|
printf("ibs_op_data:\t%016llx CompToRetCtr %5d TagToRetCtr %5d%s%s%s BrnRet %d "
|
|
" RipInvalid %d BrnFuse %d Microcode %d\n",
|
|
reg.val, reg.comp_to_ret_ctr, reg.tag_to_ret_ctr,
|
|
reg.op_brn_ret ? (reg.op_return ? " OpReturn 1" : " OpReturn 0") : "",
|
|
reg.op_brn_ret ? (reg.op_brn_taken ? " OpBrnTaken 1" : " OpBrnTaken 0") : "",
|
|
reg.op_brn_ret ? (reg.op_brn_misp ? " OpBrnMisp 1" : " OpBrnMisp 0") : "",
|
|
reg.op_brn_ret, reg.op_rip_invalid, reg.op_brn_fuse, reg.op_microcode);
|
|
}
|
|
|
|
static void pr_ibs_op_data2_extended(union ibs_op_data2 reg)
|
|
{
|
|
static const char * const data_src_str[] = {
|
|
"",
|
|
" DataSrc 1=Local L3 or other L1/L2 in CCX",
|
|
" DataSrc 2=Another CCX cache in the same NUMA node",
|
|
" DataSrc 3=DRAM",
|
|
" DataSrc 4=(reserved)",
|
|
" DataSrc 5=Another CCX cache in a different NUMA node",
|
|
" DataSrc 6=Long-latency DIMM",
|
|
" DataSrc 7=MMIO/Config/PCI/APIC",
|
|
" DataSrc 8=Extension Memory",
|
|
" DataSrc 9=(reserved)",
|
|
" DataSrc 10=(reserved)",
|
|
" DataSrc 11=(reserved)",
|
|
" DataSrc 12=Coherent Memory of a different processor type",
|
|
/* 13 to 31 are reserved. Avoid printing them. */
|
|
};
|
|
int data_src = (reg.data_src_hi << 3) | reg.data_src_lo;
|
|
|
|
printf("ibs_op_data2:\t%016llx %sRmtNode %d%s\n", reg.val,
|
|
(data_src == 1 || data_src == 2 || data_src == 5) ?
|
|
(reg.cache_hit_st ? "CacheHitSt 1=O-State " : "CacheHitSt 0=M-state ") : "",
|
|
reg.rmt_node,
|
|
data_src < (int)ARRAY_SIZE(data_src_str) ? data_src_str[data_src] : "");
|
|
}
|
|
|
|
static void pr_ibs_op_data2_default(union ibs_op_data2 reg)
|
|
{
|
|
static const char * const data_src_str[] = {
|
|
"",
|
|
" DataSrc 1=(reserved)",
|
|
" DataSrc 2=Local node cache",
|
|
" DataSrc 3=DRAM",
|
|
" DataSrc 4=Remote node cache",
|
|
" DataSrc 5=(reserved)",
|
|
" DataSrc 6=(reserved)",
|
|
" DataSrc 7=Other"
|
|
};
|
|
|
|
printf("ibs_op_data2:\t%016llx %sRmtNode %d%s\n", reg.val,
|
|
reg.data_src_lo == 2 ? (reg.cache_hit_st ? "CacheHitSt 1=O-State "
|
|
: "CacheHitSt 0=M-state ") : "",
|
|
reg.rmt_node, data_src_str[reg.data_src_lo]);
|
|
}
|
|
|
|
static void pr_ibs_op_data2(union ibs_op_data2 reg)
|
|
{
|
|
if (zen4_ibs_extensions)
|
|
return pr_ibs_op_data2_extended(reg);
|
|
pr_ibs_op_data2_default(reg);
|
|
}
|
|
|
|
static void pr_ibs_op_data3(union ibs_op_data3 reg)
|
|
{
|
|
static const char * const dc_page_sizes[] = {
|
|
" 4K",
|
|
" 2M",
|
|
" 1G",
|
|
" ??",
|
|
};
|
|
char op_dc_miss_open_mem_reqs_str[sizeof(" OpDcMissOpenMemReqs __")] = "";
|
|
char dc_l1_l2tlb_miss_str[sizeof(" DcL1TlbMiss _ DcL2TlbMiss _")] = "";
|
|
char dc_l1tlb_hit_str[sizeof(" DcL1TlbHit2M _ DcL1TlbHit1G _")] = "";
|
|
char op_mem_width_str[sizeof(" OpMemWidth _____ bytes")] = "";
|
|
char dc_l2tlb_hit_2m_str[sizeof(" DcL2TlbHit2M _")] = "";
|
|
char dc_l2tlb_hit_1g_str[sizeof(" DcL2TlbHit1G _")] = "";
|
|
char dc_page_size_str[sizeof(" DcPageSize ____")] = "";
|
|
char l2_miss_str[sizeof(" L2Miss _")] = "";
|
|
|
|
/*
|
|
* Erratum #1293
|
|
* Ignore L2Miss and OpDcMissOpenMemReqs (and opdata2) if DcMissNoMabAlloc or SwPf set
|
|
*/
|
|
if (!(cpu_family == 0x19 && cpu_model < 0x10 && (reg.dc_miss_no_mab_alloc || reg.sw_pf))) {
|
|
snprintf(l2_miss_str, sizeof(l2_miss_str), " L2Miss %d", reg.l2_miss);
|
|
snprintf(op_dc_miss_open_mem_reqs_str, sizeof(op_dc_miss_open_mem_reqs_str),
|
|
" OpDcMissOpenMemReqs %2d", reg.op_dc_miss_open_mem_reqs);
|
|
}
|
|
|
|
if (reg.op_mem_width)
|
|
snprintf(op_mem_width_str, sizeof(op_mem_width_str),
|
|
" OpMemWidth %2d bytes", 1 << (reg.op_mem_width - 1));
|
|
|
|
if (dtlb_pgsize_cap) {
|
|
if (reg.dc_phy_addr_valid) {
|
|
int idx = (reg.dc_l1tlb_hit_1g << 1) | reg.dc_l1tlb_hit_2m;
|
|
|
|
snprintf(dc_l1_l2tlb_miss_str, sizeof(dc_l1_l2tlb_miss_str),
|
|
" DcL1TlbMiss %d DcL2TlbMiss %d",
|
|
reg.dc_l1tlb_miss, reg.dc_l2tlb_miss);
|
|
snprintf(dc_page_size_str, sizeof(dc_page_size_str),
|
|
" DcPageSize %4s", dc_page_sizes[idx]);
|
|
}
|
|
} else {
|
|
snprintf(dc_l1_l2tlb_miss_str, sizeof(dc_l1_l2tlb_miss_str),
|
|
" DcL1TlbMiss %d DcL2TlbMiss %d",
|
|
reg.dc_l1tlb_miss, reg.dc_l2tlb_miss);
|
|
snprintf(dc_l1tlb_hit_str, sizeof(dc_l1tlb_hit_str),
|
|
" DcL1TlbHit2M %d DcL1TlbHit1G %d",
|
|
reg.dc_l1tlb_hit_2m, reg.dc_l1tlb_hit_1g);
|
|
snprintf(dc_l2tlb_hit_2m_str, sizeof(dc_l2tlb_hit_2m_str),
|
|
" DcL2TlbHit2M %d", reg.dc_l2tlb_hit_2m);
|
|
snprintf(dc_l2tlb_hit_1g_str, sizeof(dc_l2tlb_hit_1g_str),
|
|
" DcL2TlbHit1G %d", reg.dc_l2_tlb_hit_1g);
|
|
}
|
|
|
|
printf("ibs_op_data3:\t%016llx LdOp %d StOp %d%s%s%s DcMiss %d DcMisAcc %d "
|
|
"DcWcMemAcc %d DcUcMemAcc %d DcLockedOp %d DcMissNoMabAlloc %d "
|
|
"DcLinAddrValid %d DcPhyAddrValid %d%s%s SwPf %d%s%s "
|
|
"DcMissLat %5d TlbRefillLat %5d\n",
|
|
reg.val, reg.ld_op, reg.st_op, dc_l1_l2tlb_miss_str,
|
|
dtlb_pgsize_cap ? dc_page_size_str : dc_l1tlb_hit_str,
|
|
dc_l2tlb_hit_2m_str, reg.dc_miss, reg.dc_mis_acc, reg.dc_wc_mem_acc,
|
|
reg.dc_uc_mem_acc, reg.dc_locked_op, reg.dc_miss_no_mab_alloc,
|
|
reg.dc_lin_addr_valid, reg.dc_phy_addr_valid, dc_l2tlb_hit_1g_str,
|
|
l2_miss_str, reg.sw_pf, op_mem_width_str, op_dc_miss_open_mem_reqs_str,
|
|
reg.dc_miss_lat, reg.tlb_refill_lat);
|
|
}
|
|
|
|
/*
|
|
* IBS Op/Execution MSRs always saved, in order, are:
|
|
* IBS_OP_CTL, IBS_OP_RIP, IBS_OP_DATA, IBS_OP_DATA2,
|
|
* IBS_OP_DATA3, IBS_DC_LINADDR, IBS_DC_PHYSADDR, BP_IBSTGT_RIP
|
|
*/
|
|
static void amd_dump_ibs_op(struct perf_sample *sample)
|
|
{
|
|
struct perf_ibs_data *data = sample->raw_data;
|
|
union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data;
|
|
__u64 *rip = (__u64 *)op_ctl + 1;
|
|
union ibs_op_data *op_data = (union ibs_op_data *)(rip + 1);
|
|
union ibs_op_data3 *op_data3 = (union ibs_op_data3 *)(rip + 3);
|
|
|
|
pr_ibs_op_ctl(*op_ctl);
|
|
if (!op_data->op_rip_invalid)
|
|
printf("IbsOpRip:\t%016llx\n", *rip);
|
|
pr_ibs_op_data(*op_data);
|
|
/*
|
|
* Erratum #1293: ignore op_data2 if DcMissNoMabAlloc or SwPf are set
|
|
*/
|
|
if (!(cpu_family == 0x19 && cpu_model < 0x10 &&
|
|
(op_data3->dc_miss_no_mab_alloc || op_data3->sw_pf)))
|
|
pr_ibs_op_data2(*(union ibs_op_data2 *)(rip + 2));
|
|
pr_ibs_op_data3(*op_data3);
|
|
if (op_data3->dc_lin_addr_valid)
|
|
printf("IbsDCLinAd:\t%016llx\n", *(rip + 4));
|
|
if (op_data3->dc_phy_addr_valid)
|
|
printf("IbsDCPhysAd:\t%016llx\n", *(rip + 5));
|
|
if (op_data->op_brn_ret && *(rip + 6))
|
|
printf("IbsBrTarget:\t%016llx\n", *(rip + 6));
|
|
}
|
|
|
|
/*
|
|
* IBS Fetch MSRs always saved, in order, are:
|
|
* IBS_FETCH_CTL, IBS_FETCH_LINADDR, IBS_FETCH_PHYSADDR, IC_IBS_EXTD_CTL
|
|
*/
|
|
static void amd_dump_ibs_fetch(struct perf_sample *sample)
|
|
{
|
|
struct perf_ibs_data *data = sample->raw_data;
|
|
union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data;
|
|
__u64 *addr = (__u64 *)fetch_ctl + 1;
|
|
union ic_ibs_extd_ctl *extd_ctl = (union ic_ibs_extd_ctl *)addr + 2;
|
|
|
|
pr_ibs_fetch_ctl(*fetch_ctl);
|
|
printf("IbsFetchLinAd:\t%016llx\n", *addr++);
|
|
if (fetch_ctl->phy_addr_valid)
|
|
printf("IbsFetchPhysAd:\t%016llx\n", *addr);
|
|
pr_ic_ibs_extd_ctl(*extd_ctl);
|
|
}
|
|
|
|
/*
|
|
* Test for enable and valid bits in captured control MSRs.
|
|
*/
|
|
static bool is_valid_ibs_fetch_sample(struct perf_sample *sample)
|
|
{
|
|
struct perf_ibs_data *data = sample->raw_data;
|
|
union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data;
|
|
|
|
if (fetch_ctl->fetch_en && fetch_ctl->fetch_val)
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
static bool is_valid_ibs_op_sample(struct perf_sample *sample)
|
|
{
|
|
struct perf_ibs_data *data = sample->raw_data;
|
|
union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data;
|
|
|
|
if (op_ctl->op_en && op_ctl->op_val)
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
/* AMD vendor specific raw sample function. Check for PERF_RECORD_SAMPLE events
|
|
* and if the event was triggered by IBS, display its raw data with decoded text.
|
|
* The function is only invoked when the dump flag -D is set.
|
|
*/
|
|
void evlist__amd_sample_raw(struct evlist *evlist, union perf_event *event,
|
|
struct perf_sample *sample)
|
|
{
|
|
struct evsel *evsel;
|
|
|
|
if (event->header.type != PERF_RECORD_SAMPLE || !sample->raw_size)
|
|
return;
|
|
|
|
evsel = evlist__event2evsel(evlist, event);
|
|
if (!evsel)
|
|
return;
|
|
|
|
if (evsel->core.attr.type == ibs_fetch_type) {
|
|
if (!is_valid_ibs_fetch_sample(sample)) {
|
|
pr_debug("Invalid raw IBS Fetch MSR data encountered\n");
|
|
return;
|
|
}
|
|
amd_dump_ibs_fetch(sample);
|
|
} else if (evsel->core.attr.type == ibs_op_type) {
|
|
if (!is_valid_ibs_op_sample(sample)) {
|
|
pr_debug("Invalid raw IBS Op MSR data encountered\n");
|
|
return;
|
|
}
|
|
amd_dump_ibs_op(sample);
|
|
}
|
|
}
|
|
|
|
static void parse_cpuid(struct perf_env *env)
|
|
{
|
|
const char *cpuid;
|
|
int ret;
|
|
|
|
cpuid = perf_env__cpuid(env);
|
|
/*
|
|
* cpuid = "AuthenticAMD,family,model,stepping"
|
|
*/
|
|
ret = sscanf(cpuid, "%*[^,],%u,%u", &cpu_family, &cpu_model);
|
|
if (ret != 2)
|
|
pr_debug("problem parsing cpuid\n");
|
|
}
|
|
|
|
/*
|
|
* Find and assign the type number used for ibs_op or ibs_fetch samples.
|
|
* Device names can be large - we are only interested in the first 9 characters,
|
|
* to match "ibs_fetch".
|
|
*/
|
|
bool evlist__has_amd_ibs(struct evlist *evlist)
|
|
{
|
|
struct perf_env *env = evlist->env;
|
|
int ret, nr_pmu_mappings = perf_env__nr_pmu_mappings(env);
|
|
const char *pmu_mapping = perf_env__pmu_mappings(env);
|
|
char name[sizeof("ibs_fetch")];
|
|
u32 type;
|
|
|
|
while (nr_pmu_mappings--) {
|
|
ret = sscanf(pmu_mapping, "%u:%9s", &type, name);
|
|
if (ret == 2) {
|
|
if (strstarts(name, "ibs_op"))
|
|
ibs_op_type = type;
|
|
else if (strstarts(name, "ibs_fetch"))
|
|
ibs_fetch_type = type;
|
|
}
|
|
pmu_mapping += strlen(pmu_mapping) + 1 /* '\0' */;
|
|
}
|
|
|
|
if (perf_env__find_pmu_cap(env, "ibs_op", "zen4_ibs_extensions"))
|
|
zen4_ibs_extensions = 1;
|
|
|
|
if (perf_env__find_pmu_cap(env, "ibs_op", "ldlat"))
|
|
ldlat_cap = 1;
|
|
|
|
if (perf_env__find_pmu_cap(env, "ibs_op", "dtlb_pgsize"))
|
|
dtlb_pgsize_cap = 1;
|
|
|
|
if (ibs_fetch_type || ibs_op_type) {
|
|
if (!cpu_family)
|
|
parse_cpuid(env);
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|