|
| 1 | +// SPDX-License-Identifier: GPL-2.0 |
| 2 | +/* |
| 3 | + * AMD specific. Provide textual annotation for IBS raw sample data. |
| 4 | + */ |
| 5 | + |
| 6 | +#include <unistd.h> |
| 7 | +#include <stdio.h> |
| 8 | +#include <string.h> |
| 9 | +#include <inttypes.h> |
| 10 | + |
| 11 | +#include <linux/string.h> |
| 12 | +#include "../../arch/x86/include/asm/amd-ibs.h" |
| 13 | + |
| 14 | +#include "debug.h" |
| 15 | +#include "session.h" |
| 16 | +#include "evlist.h" |
| 17 | +#include "sample-raw.h" |
| 18 | +#include "pmu-events/pmu-events.h" |
| 19 | + |
| 20 | +static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type; |
| 21 | + |
| 22 | +static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg) |
| 23 | +{ |
| 24 | + const char * const ic_miss_strs[] = { |
| 25 | + " IcMiss 0", |
| 26 | + " IcMiss 1", |
| 27 | + }; |
| 28 | + const char * const l1tlb_pgsz_strs[] = { |
| 29 | + " L1TlbPgSz 4KB", |
| 30 | + " L1TlbPgSz 2MB", |
| 31 | + " L1TlbPgSz 1GB", |
| 32 | + " L1TlbPgSz RESERVED" |
| 33 | + }; |
| 34 | + const char * const l1tlb_pgsz_strs_erratum1347[] = { |
| 35 | + " L1TlbPgSz 4KB", |
| 36 | + " L1TlbPgSz 16KB", |
| 37 | + " L1TlbPgSz 2MB", |
| 38 | + " L1TlbPgSz 1GB" |
| 39 | + }; |
| 40 | + const char *ic_miss_str = NULL; |
| 41 | + const char *l1tlb_pgsz_str = NULL; |
| 42 | + |
| 43 | + if (cpu_family == 0x19 && cpu_model < 0x10) { |
| 44 | + /* |
| 45 | + * Erratum #1238 workaround is to ignore MSRC001_1030[IbsIcMiss] |
| 46 | + * Erratum #1347 workaround is to use table provided in erratum |
| 47 | + */ |
| 48 | + if (reg.phy_addr_valid) |
| 49 | + l1tlb_pgsz_str = l1tlb_pgsz_strs_erratum1347[reg.l1tlb_pgsz]; |
| 50 | + } else { |
| 51 | + if (reg.phy_addr_valid) |
| 52 | + l1tlb_pgsz_str = l1tlb_pgsz_strs[reg.l1tlb_pgsz]; |
| 53 | + ic_miss_str = ic_miss_strs[reg.ic_miss]; |
| 54 | + } |
| 55 | + |
| 56 | + printf("ibs_fetch_ctl:\t%016llx MaxCnt %7d Cnt %7d Lat %5d En %d Val %d Comp %d%s " |
| 57 | + "PhyAddrValid %d%s L1TlbMiss %d L2TlbMiss %d RandEn %d%s\n", |
| 58 | + reg.val, reg.fetch_maxcnt << 4, reg.fetch_cnt << 4, reg.fetch_lat, |
| 59 | + reg.fetch_en, reg.fetch_val, reg.fetch_comp, ic_miss_str ? : "", |
| 60 | + reg.phy_addr_valid, l1tlb_pgsz_str ? : "", reg.l1tlb_miss, reg.l2tlb_miss, |
| 61 | + reg.rand_en, reg.fetch_comp ? (reg.fetch_l2_miss ? " L2Miss 1" : " L2Miss 0") : ""); |
| 62 | +} |
| 63 | + |
| 64 | +static void pr_ic_ibs_extd_ctl(union ic_ibs_extd_ctl reg) |
| 65 | +{ |
| 66 | + printf("ic_ibs_ext_ctl:\t%016llx IbsItlbRefillLat %3d\n", reg.val, reg.itlb_refill_lat); |
| 67 | +} |
| 68 | + |
| 69 | +static void pr_ibs_op_ctl(union ibs_op_ctl reg) |
| 70 | +{ |
| 71 | + printf("ibs_op_ctl:\t%016llx MaxCnt %9d En %d Val %d CntCtl %d=%s CurCnt %9d\n", |
| 72 | + reg.val, ((reg.opmaxcnt_ext << 16) | reg.opmaxcnt) << 4, reg.op_en, reg.op_val, |
| 73 | + reg.cnt_ctl, reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt); |
| 74 | +} |
| 75 | + |
| 76 | +static void pr_ibs_op_data(union ibs_op_data reg) |
| 77 | +{ |
| 78 | + printf("ibs_op_data:\t%016llx CompToRetCtr %5d TagToRetCtr %5d%s%s%s BrnRet %d " |
| 79 | + " RipInvalid %d BrnFuse %d Microcode %d\n", |
| 80 | + reg.val, reg.comp_to_ret_ctr, reg.tag_to_ret_ctr, |
| 81 | + reg.op_brn_ret ? (reg.op_return ? " OpReturn 1" : " OpReturn 0") : "", |
| 82 | + reg.op_brn_ret ? (reg.op_brn_taken ? " OpBrnTaken 1" : " OpBrnTaken 0") : "", |
| 83 | + reg.op_brn_ret ? (reg.op_brn_misp ? " OpBrnMisp 1" : " OpBrnMisp 0") : "", |
| 84 | + reg.op_brn_ret, reg.op_rip_invalid, reg.op_brn_fuse, reg.op_microcode); |
| 85 | +} |
| 86 | + |
| 87 | +static void pr_ibs_op_data2(union ibs_op_data2 reg) |
| 88 | +{ |
| 89 | + static const char * const data_src_str[] = { |
| 90 | + "", |
| 91 | + " DataSrc 1=(reserved)", |
| 92 | + " DataSrc 2=Local node cache", |
| 93 | + " DataSrc 3=DRAM", |
| 94 | + " DataSrc 4=Remote node cache", |
| 95 | + " DataSrc 5=(reserved)", |
| 96 | + " DataSrc 6=(reserved)", |
| 97 | + " DataSrc 7=Other" |
| 98 | + }; |
| 99 | + |
| 100 | + printf("ibs_op_data2:\t%016llx %sRmtNode %d%s\n", reg.val, |
| 101 | + reg.data_src == 2 ? (reg.cache_hit_st ? "CacheHitSt 1=O-State " |
| 102 | + : "CacheHitSt 0=M-state ") : "", |
| 103 | + reg.rmt_node, data_src_str[reg.data_src]); |
| 104 | +} |
| 105 | + |
| 106 | +static void pr_ibs_op_data3(union ibs_op_data3 reg) |
| 107 | +{ |
| 108 | + char l2_miss_str[sizeof(" L2Miss _")] = ""; |
| 109 | + char op_mem_width_str[sizeof(" OpMemWidth _____ bytes")] = ""; |
| 110 | + char op_dc_miss_open_mem_reqs_str[sizeof(" OpDcMissOpenMemReqs __")] = ""; |
| 111 | + |
| 112 | + /* |
| 113 | + * Erratum #1293 |
| 114 | + * Ignore L2Miss and OpDcMissOpenMemReqs (and opdata2) if DcMissNoMabAlloc or SwPf set |
| 115 | + */ |
| 116 | + if (!(cpu_family == 0x19 && cpu_model < 0x10 && (reg.dc_miss_no_mab_alloc || reg.sw_pf))) { |
| 117 | + snprintf(l2_miss_str, sizeof(l2_miss_str), " L2Miss %d", reg.l2_miss); |
| 118 | + snprintf(op_dc_miss_open_mem_reqs_str, sizeof(op_dc_miss_open_mem_reqs_str), |
| 119 | + " OpDcMissOpenMemReqs %2d", reg.op_dc_miss_open_mem_reqs); |
| 120 | + } |
| 121 | + |
| 122 | + if (reg.op_mem_width) |
| 123 | + snprintf(op_mem_width_str, sizeof(op_mem_width_str), |
| 124 | + " OpMemWidth %2d bytes", 1 << (reg.op_mem_width - 1)); |
| 125 | + |
| 126 | + printf("ibs_op_data3:\t%016llx LdOp %d StOp %d DcL1TlbMiss %d DcL2TlbMiss %d " |
| 127 | + "DcL1TlbHit2M %d DcL1TlbHit1G %d DcL2TlbHit2M %d DcMiss %d DcMisAcc %d " |
| 128 | + "DcWcMemAcc %d DcUcMemAcc %d DcLockedOp %d DcMissNoMabAlloc %d DcLinAddrValid %d " |
| 129 | + "DcPhyAddrValid %d DcL2TlbHit1G %d%s SwPf %d%s%s DcMissLat %5d TlbRefillLat %5d\n", |
| 130 | + reg.val, reg.ld_op, reg.st_op, reg.dc_l1tlb_miss, reg.dc_l2tlb_miss, |
| 131 | + reg.dc_l1tlb_hit_2m, reg.dc_l1tlb_hit_1g, reg.dc_l2tlb_hit_2m, reg.dc_miss, |
| 132 | + reg.dc_mis_acc, reg.dc_wc_mem_acc, reg.dc_uc_mem_acc, reg.dc_locked_op, |
| 133 | + reg.dc_miss_no_mab_alloc, reg.dc_lin_addr_valid, reg.dc_phy_addr_valid, |
| 134 | + reg.dc_l2_tlb_hit_1g, l2_miss_str, reg.sw_pf, op_mem_width_str, |
| 135 | + op_dc_miss_open_mem_reqs_str, reg.dc_miss_lat, reg.tlb_refill_lat); |
| 136 | +} |
| 137 | + |
| 138 | +/* |
| 139 | + * IBS Op/Execution MSRs always saved, in order, are: |
| 140 | + * IBS_OP_CTL, IBS_OP_RIP, IBS_OP_DATA, IBS_OP_DATA2, |
| 141 | + * IBS_OP_DATA3, IBS_DC_LINADDR, IBS_DC_PHYSADDR, BP_IBSTGT_RIP |
| 142 | + */ |
| 143 | +static void amd_dump_ibs_op(struct perf_sample *sample) |
| 144 | +{ |
| 145 | + struct perf_ibs_data *data = sample->raw_data; |
| 146 | + union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data; |
| 147 | + __u64 *rip = (__u64 *)op_ctl + 1; |
| 148 | + union ibs_op_data *op_data = (union ibs_op_data *)(rip + 1); |
| 149 | + union ibs_op_data3 *op_data3 = (union ibs_op_data3 *)(rip + 3); |
| 150 | + |
| 151 | + pr_ibs_op_ctl(*op_ctl); |
| 152 | + if (!op_data->op_rip_invalid) |
| 153 | + printf("IbsOpRip:\t%016llx\n", *rip); |
| 154 | + pr_ibs_op_data(*op_data); |
| 155 | + /* |
| 156 | + * Erratum #1293: ignore op_data2 if DcMissNoMabAlloc or SwPf are set |
| 157 | + */ |
| 158 | + if (!(cpu_family == 0x19 && cpu_model < 0x10 && |
| 159 | + (op_data3->dc_miss_no_mab_alloc || op_data3->sw_pf))) |
| 160 | + pr_ibs_op_data2(*(union ibs_op_data2 *)(rip + 2)); |
| 161 | + pr_ibs_op_data3(*op_data3); |
| 162 | + if (op_data3->dc_lin_addr_valid) |
| 163 | + printf("IbsDCLinAd:\t%016llx\n", *(rip + 4)); |
| 164 | + if (op_data3->dc_phy_addr_valid) |
| 165 | + printf("IbsDCPhysAd:\t%016llx\n", *(rip + 5)); |
| 166 | + if (op_data->op_brn_ret && *(rip + 6)) |
| 167 | + printf("IbsBrTarget:\t%016llx\n", *(rip + 6)); |
| 168 | +} |
| 169 | + |
| 170 | +/* |
| 171 | + * IBS Fetch MSRs always saved, in order, are: |
| 172 | + * IBS_FETCH_CTL, IBS_FETCH_LINADDR, IBS_FETCH_PHYSADDR, IC_IBS_EXTD_CTL |
| 173 | + */ |
| 174 | +static void amd_dump_ibs_fetch(struct perf_sample *sample) |
| 175 | +{ |
| 176 | + struct perf_ibs_data *data = sample->raw_data; |
| 177 | + union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data; |
| 178 | + __u64 *addr = (__u64 *)fetch_ctl + 1; |
| 179 | + union ic_ibs_extd_ctl *extd_ctl = (union ic_ibs_extd_ctl *)addr + 2; |
| 180 | + |
| 181 | + pr_ibs_fetch_ctl(*fetch_ctl); |
| 182 | + printf("IbsFetchLinAd:\t%016llx\n", *addr++); |
| 183 | + if (fetch_ctl->phy_addr_valid) |
| 184 | + printf("IbsFetchPhysAd:\t%016llx\n", *addr); |
| 185 | + pr_ic_ibs_extd_ctl(*extd_ctl); |
| 186 | +} |
| 187 | + |
| 188 | +/* |
| 189 | + * Test for enable and valid bits in captured control MSRs. |
| 190 | + */ |
| 191 | +static bool is_valid_ibs_fetch_sample(struct perf_sample *sample) |
| 192 | +{ |
| 193 | + struct perf_ibs_data *data = sample->raw_data; |
| 194 | + union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data; |
| 195 | + |
| 196 | + if (fetch_ctl->fetch_en && fetch_ctl->fetch_val) |
| 197 | + return true; |
| 198 | + |
| 199 | + return false; |
| 200 | +} |
| 201 | + |
| 202 | +static bool is_valid_ibs_op_sample(struct perf_sample *sample) |
| 203 | +{ |
| 204 | + struct perf_ibs_data *data = sample->raw_data; |
| 205 | + union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data; |
| 206 | + |
| 207 | + if (op_ctl->op_en && op_ctl->op_val) |
| 208 | + return true; |
| 209 | + |
| 210 | + return false; |
| 211 | +} |
| 212 | + |
| 213 | +/* AMD vendor specific raw sample function. Check for PERF_RECORD_SAMPLE events |
| 214 | + * and if the event was triggered by IBS, display its raw data with decoded text. |
| 215 | + * The function is only invoked when the dump flag -D is set. |
| 216 | + */ |
| 217 | +void evlist__amd_sample_raw(struct evlist *evlist, union perf_event *event, |
| 218 | + struct perf_sample *sample) |
| 219 | +{ |
| 220 | + struct evsel *evsel; |
| 221 | + |
| 222 | + if (event->header.type != PERF_RECORD_SAMPLE || !sample->raw_size) |
| 223 | + return; |
| 224 | + |
| 225 | + evsel = evlist__event2evsel(evlist, event); |
| 226 | + if (!evsel) |
| 227 | + return; |
| 228 | + |
| 229 | + if (evsel->core.attr.type == ibs_fetch_type) { |
| 230 | + if (!is_valid_ibs_fetch_sample(sample)) { |
| 231 | + pr_debug("Invalid raw IBS Fetch MSR data encountered\n"); |
| 232 | + return; |
| 233 | + } |
| 234 | + amd_dump_ibs_fetch(sample); |
| 235 | + } else if (evsel->core.attr.type == ibs_op_type) { |
| 236 | + if (!is_valid_ibs_op_sample(sample)) { |
| 237 | + pr_debug("Invalid raw IBS Op MSR data encountered\n"); |
| 238 | + return; |
| 239 | + } |
| 240 | + amd_dump_ibs_op(sample); |
| 241 | + } |
| 242 | +} |
| 243 | + |
| 244 | +static void parse_cpuid(struct perf_env *env) |
| 245 | +{ |
| 246 | + const char *cpuid; |
| 247 | + int ret; |
| 248 | + |
| 249 | + cpuid = perf_env__cpuid(env); |
| 250 | + /* |
| 251 | + * cpuid = "AuthenticAMD,family,model,stepping" |
| 252 | + */ |
| 253 | + ret = sscanf(cpuid, "%*[^,],%u,%u", &cpu_family, &cpu_model); |
| 254 | + if (ret != 2) |
| 255 | + pr_debug("problem parsing cpuid\n"); |
| 256 | +} |
| 257 | + |
| 258 | +/* |
| 259 | + * Find and assign the type number used for ibs_op or ibs_fetch samples. |
| 260 | + * Device names can be large - we are only interested in the first 9 characters, |
| 261 | + * to match "ibs_fetch". |
| 262 | + */ |
| 263 | +bool evlist__has_amd_ibs(struct evlist *evlist) |
| 264 | +{ |
| 265 | + struct perf_env *env = evlist->env; |
| 266 | + int ret, nr_pmu_mappings = perf_env__nr_pmu_mappings(env); |
| 267 | + const char *pmu_mapping = perf_env__pmu_mappings(env); |
| 268 | + char name[sizeof("ibs_fetch")]; |
| 269 | + u32 type; |
| 270 | + |
| 271 | + while (nr_pmu_mappings--) { |
| 272 | + ret = sscanf(pmu_mapping, "%u:%9s", &type, name); |
| 273 | + if (ret == 2) { |
| 274 | + if (strstarts(name, "ibs_op")) |
| 275 | + ibs_op_type = type; |
| 276 | + else if (strstarts(name, "ibs_fetch")) |
| 277 | + ibs_fetch_type = type; |
| 278 | + } |
| 279 | + pmu_mapping += strlen(pmu_mapping) + 1 /* '\0' */; |
| 280 | + } |
| 281 | + |
| 282 | + if (ibs_fetch_type || ibs_op_type) { |
| 283 | + if (!cpu_family) |
| 284 | + parse_cpuid(env); |
| 285 | + return true; |
| 286 | + } |
| 287 | + |
| 288 | + return false; |
| 289 | +} |
0 commit comments