|
20 | 20 | }, |
21 | 21 | { |
22 | 22 | "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks (when the front-end could not sustain operations delivery to the back-end)", |
23 | | - "MetricExpr": "100 * ( tma_frontend_bound - ( 1 - ( 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts ) ) * tma_fetch_latency * tma_mispredicts_resteers / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) - ( ( 1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=0x1@ ) * ( tma_fetch_latency * ( tma_ms_switches + tma_branch_resteers * ( tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts ) / ( tma_mispredicts_resteers + tma_clears_resteers + tma_unknown_branches ) ) / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) + tma_fetch_bandwidth * tma_ms / ( tma_mite + tma_dsb + tma_lsd + tma_ms ) ) ) ) - tma_bottleneck_big_code", |
| 23 | + "MetricExpr": "100 * ( tma_frontend_bound - ( 1 - ( 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts ) ) * tma_fetch_latency * tma_mispredicts_resteers / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) - ( ( 1 - INST_RETIRED.REP_ITERATION / cpu_core@UOPS_RETIRED.MS\\,cmask\\=0x1@ ) * ( tma_fetch_latency * ( tma_ms_switches + tma_branch_resteers * ( tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts ) / ( tma_mispredicts_resteers + tma_clears_resteers + tma_unknown_branches ) ) / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) + tma_fetch_bandwidth * tma_ms / ( tma_mite + tma_dsb + tma_lsd + tma_ms ) ) ) ) - tma_bottleneck_big_code", |
24 | 24 | "MetricGroup": "BvFB;Fed;FetchBW;Frontend;TopdownL1;tma_L1_group;Default;Scaled_Slots", |
25 | 25 | "MetricName": "tma_bottleneck_instruction_fetch_bw", |
26 | 26 | "MetricThreshold": "tma_bottleneck_instruction_fetch_bw > 20", |
|
79 | 79 | }, |
80 | 80 | { |
81 | 81 | "BriefDescription": "Total pipeline cost of irregular execution (e.g", |
82 | | - "MetricExpr": "100 * ( ( ( 1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=0x1@ ) * ( tma_fetch_latency * ( tma_ms_switches + tma_branch_resteers * ( tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts ) / ( tma_mispredicts_resteers + tma_clears_resteers + tma_unknown_branches ) ) / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) + tma_fetch_bandwidth * tma_ms / ( tma_mite + tma_dsb + tma_lsd + tma_ms ) ) ) + ( 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts ) * tma_branch_mispredicts + ( tma_machine_clears * tma_other_nukes / ( tma_other_nukes ) ) + ( tma_core_bound * ( tma_serializing_operation + RS.EMPTY_RESOURCE / tma_info_thread_clks * tma_ports_utilized_0 ) / ( tma_divider + tma_serializing_operation + tma_ports_utilization ) ) + ( ( ( tma_microcode_sequencer / ( tma_few_uops_instructions + tma_microcode_sequencer ) ) * ( tma_assists / tma_microcode_sequencer ) ) * tma_heavy_operations ) )", |
| 82 | + "MetricExpr": "100 * ( ( ( 1 - INST_RETIRED.REP_ITERATION / cpu_core@UOPS_RETIRED.MS\\,cmask\\=0x1@ ) * ( tma_fetch_latency * ( tma_ms_switches + tma_branch_resteers * ( tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts ) / ( tma_mispredicts_resteers + tma_clears_resteers + tma_unknown_branches ) ) / ( tma_icache_misses + tma_itlb_misses + tma_branch_resteers + tma_ms_switches + tma_lcp + tma_dsb_switches ) + tma_fetch_bandwidth * tma_ms / ( tma_mite + tma_dsb + tma_lsd + tma_ms ) ) ) + ( 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts ) * tma_branch_mispredicts + ( tma_machine_clears * tma_other_nukes / ( tma_other_nukes ) ) + ( tma_core_bound * ( tma_serializing_operation + RS.EMPTY_RESOURCE / tma_info_thread_clks * tma_ports_utilized_0 ) / ( tma_divider + tma_serializing_operation + tma_ports_utilization ) ) + ( ( ( tma_microcode_sequencer / ( tma_few_uops_instructions + tma_microcode_sequencer ) ) * ( tma_assists / tma_microcode_sequencer ) ) * tma_heavy_operations ) )", |
83 | 83 | "MetricGroup": "Bad;BvIO;Cor;Ret;TopdownL1;tma_L1_group;Default;Scaled_Slots;tma_issueMS", |
84 | 84 | "MetricName": "tma_bottleneck_irregular_overhead", |
85 | 85 | "MetricThreshold": "tma_bottleneck_irregular_overhead > 10", |
|
242 | 242 | }, |
243 | 243 | { |
244 | 244 | "BriefDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS)", |
245 | | - "MetricExpr": "( 3 ) * cpu@UOPS_RETIRED.MS\\,cmask\\=0x1\\,edge\\=0x1@ / ( UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY ) / tma_info_thread_clks", |
| 245 | + "MetricExpr": "( 3 ) * cpu_core@UOPS_RETIRED.MS\\,cmask\\=0x1\\,edge\\=0x1@ / ( UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY ) / tma_info_thread_clks", |
246 | 246 | "MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;Clocks_Estimated;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO", |
247 | 247 | "MetricName": "tma_ms_switches", |
248 | 248 | "ScaleUnit": "100%", |
|
289 | 289 | }, |
290 | 290 | { |
291 | 291 | "BriefDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder", |
292 | | - "MetricExpr": "( cpu@INST_DECODED.DECODERS\\,cmask\\=0x1@ - cpu@INST_DECODED.DECODERS\\,cmask\\=0x2@ ) / tma_info_core_core_clks / 2", |
| 292 | + "MetricExpr": "( cpu_core@INST_DECODED.DECODERS\\,cmask\\=0x1@ - cpu_core@INST_DECODED.DECODERS\\,cmask\\=0x2@ ) / tma_info_core_core_clks / 2", |
293 | 293 | "MetricGroup": "DSBmiss;FetchBW;TopdownL4;tma_L4_group;tma_mite_group;Slots_Estimated;tma_issueD0", |
294 | 294 | "MetricName": "tma_decoder0_alone", |
295 | 295 | "ScaleUnit": "100%", |
|
316 | 316 | }, |
317 | 317 | { |
318 | 318 | "BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the Microcode Sequencer (MS) unit - see Microcode_Sequencer node for details", |
319 | | - "MetricExpr": "max( IDQ.MS_CYCLES_ANY , cpu@UOPS_RETIRED.MS\\,cmask\\=0x1@ / ( UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY ) ) / tma_info_core_core_clks / 2", |
| 319 | + "MetricExpr": "max( IDQ.MS_CYCLES_ANY , cpu_core@UOPS_RETIRED.MS\\,cmask\\=0x1@ / ( UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY ) ) / tma_info_core_core_clks / 2", |
320 | 320 | "MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group;Slots_Estimated", |
321 | 321 | "MetricName": "tma_ms", |
322 | 322 | "ScaleUnit": "100%", |
|
404 | 404 | }, |
405 | 405 | { |
406 | 406 | "BriefDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses", |
407 | | - "MetricExpr": "min( ( 7 ) * cpu@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=0x1@ + DTLB_LOAD_MISSES.WALK_ACTIVE , max( CYCLE_ACTIVITY.CYCLES_MEM_ANY - MEMORY_ACTIVITY.CYCLES_L1D_MISS , 0 ) ) / tma_info_thread_clks", |
| 407 | + "MetricExpr": "min( ( 7 ) * cpu_core@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=0x1@ + DTLB_LOAD_MISSES.WALK_ACTIVE , max( CYCLE_ACTIVITY.CYCLES_MEM_ANY - MEMORY_ACTIVITY.CYCLES_L1D_MISS , 0 ) ) / tma_info_thread_clks", |
408 | 408 | "MetricGroup": "BvMT;MemoryTLB;TopdownL4;tma_L4_group;tma_l1_bound_group;Clocks_Estimated;tma_issueTLB", |
409 | 409 | "MetricName": "tma_dtlb_load", |
410 | 410 | "ScaleUnit": "100%", |
|
570 | 570 | }, |
571 | 571 | { |
572 | 572 | "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM)", |
573 | | - "MetricExpr": "( min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=0x4@ ) ) / tma_info_thread_clks", |
| 573 | + "MetricExpr": "( min( CPU_CLK_UNHALTED.THREAD , cpu_core@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=0x4@ ) ) / tma_info_thread_clks", |
574 | 574 | "MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;Clocks;tma_issueBW", |
575 | 575 | "MetricName": "tma_mem_bandwidth", |
576 | 576 | "ScaleUnit": "100%", |
|
633 | 633 | }, |
634 | 634 | { |
635 | 635 | "BriefDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses", |
636 | | - "MetricExpr": "( ( 7 ) * cpu@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=0x1@ + DTLB_STORE_MISSES.WALK_ACTIVE ) / tma_info_core_core_clks", |
| 636 | + "MetricExpr": "( ( 7 ) * cpu_core@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=0x1@ + DTLB_STORE_MISSES.WALK_ACTIVE ) / tma_info_core_core_clks", |
637 | 637 | "MetricGroup": "BvMT;MemoryTLB;TopdownL4;tma_L4_group;tma_store_bound_group;Clocks_Estimated;tma_issueTLB", |
638 | 638 | "MetricName": "tma_dtlb_store", |
639 | 639 | "ScaleUnit": "100%", |
|
1174 | 1174 | }, |
1175 | 1175 | { |
1176 | 1176 | "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per thread (logical-processor)", |
1177 | | - "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=0x1@", |
| 1177 | + "MetricExpr": "UOPS_EXECUTED.THREAD / cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=0x1@", |
1178 | 1178 | "MetricGroup": "Backend;Cor;Pipeline;PortsUtil;Metric", |
1179 | 1179 | "MetricName": "tma_info_core_ilp" |
1180 | 1180 | }, |
|
1301 | 1301 | }, |
1302 | 1302 | { |
1303 | 1303 | "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired", |
1304 | | - "MetricExpr": "( tma_retiring * tma_info_thread_slots ) / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=0x1@", |
| 1304 | + "MetricExpr": "( tma_retiring * tma_info_thread_slots ) / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=0x1@", |
1305 | 1305 | "MetricGroup": "Pipeline;Ret;Metric", |
1306 | 1306 | "MetricName": "tma_info_pipeline_retire" |
1307 | 1307 | }, |
1308 | 1308 | { |
1309 | 1309 | "BriefDescription": "Estimated fraction of retirement-cycles dealing with repeat instructions", |
1310 | | - "MetricExpr": "INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=0x1@", |
| 1310 | + "MetricExpr": "INST_RETIRED.REP_ITERATION / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=0x1@", |
1311 | 1311 | "MetricGroup": "MicroSeq;Pipeline;Ret;Metric", |
1312 | 1312 | "MetricName": "tma_info_pipeline_strings_cycles", |
1313 | 1313 | "MetricThreshold": "tma_info_pipeline_strings_cycles > 0.1" |
|
1322 | 1322 | }, |
1323 | 1323 | { |
1324 | 1324 | "BriefDescription": "", |
1325 | | - "MetricExpr": "UOPS_EXECUTED.THREAD / ( ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else cpu@UOPS_EXECUTED.THREAD\\,cmask\\=0x1@ )", |
| 1325 | + "MetricExpr": "UOPS_EXECUTED.THREAD / ( ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=0x1@ )", |
1326 | 1326 | "MetricGroup": "Cor;Pipeline;PortsUtil;SMT;Metric", |
1327 | 1327 | "MetricName": "tma_info_pipeline_execute" |
1328 | 1328 | }, |
|
1346 | 1346 | }, |
1347 | 1347 | { |
1348 | 1348 | "BriefDescription": "Average number of Uops issued by front-end when it issued something", |
1349 | | - "MetricExpr": "UOPS_ISSUED.ANY / cpu@UOPS_ISSUED.ANY\\,cmask\\=0x1@", |
| 1349 | + "MetricExpr": "UOPS_ISSUED.ANY / cpu_core@UOPS_ISSUED.ANY\\,cmask\\=0x1@", |
1350 | 1350 | "MetricGroup": "Fed;FetchBW;Metric", |
1351 | 1351 | "MetricName": "tma_info_frontend_fetch_upc" |
1352 | 1352 | }, |
|
1366 | 1366 | }, |
1367 | 1367 | { |
1368 | 1368 | "BriefDescription": "Average number of cycles the front-end was delayed due to an Unknown Branch detection", |
1369 | | - "MetricExpr": "INT_MISC.UNKNOWN_BRANCH_CYCLES / cpu@INT_MISC.UNKNOWN_BRANCH_CYCLES\\,cmask\\=0x1\\,edge\\=0x1@", |
| 1369 | + "MetricExpr": "INT_MISC.UNKNOWN_BRANCH_CYCLES / cpu_core@INT_MISC.UNKNOWN_BRANCH_CYCLES\\,cmask\\=0x1\\,edge\\=0x1@", |
1370 | 1370 | "MetricGroup": "Fed;Metric", |
1371 | 1371 | "MetricName": "tma_info_frontend_unknown_branch_cost", |
1372 | 1372 | "PublicDescription": "Average number of cycles the front-end was delayed due to an Unknown Branch detection. See Unknown_Branches node." |
1373 | 1373 | }, |
1374 | 1374 | { |
1375 | 1375 | "BriefDescription": "Average number of cycles of a switch from the DSB fetch-unit to MITE fetch unit - see DSB_Switches tree node for details", |
1376 | | - "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / cpu@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=0x1\\,edge\\=0x1@", |
| 1376 | + "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=0x1\\,edge\\=0x1@", |
1377 | 1377 | "MetricGroup": "DSBmiss;Metric", |
1378 | 1378 | "MetricName": "tma_info_frontend_dsb_switch_cost" |
1379 | 1379 | }, |
|
1385 | 1385 | }, |
1386 | 1386 | { |
1387 | 1387 | "BriefDescription": "Average Latency for L1 instruction cache misses", |
1388 | | - "MetricExpr": "ICACHE_DATA.STALLS / cpu@ICACHE_DATA.STALLS\\,cmask\\=0x1\\,edge\\=0x1@", |
| 1388 | + "MetricExpr": "ICACHE_DATA.STALLS / cpu_core@ICACHE_DATA.STALLS\\,cmask\\=0x1\\,edge\\=0x1@", |
1389 | 1389 | "MetricGroup": "Fed;FetchLat;IcMiss;Metric", |
1390 | 1390 | "MetricName": "tma_info_frontend_icache_miss_latency" |
1391 | 1391 | }, |
|
1683 | 1683 | }, |
1684 | 1684 | { |
1685 | 1685 | "BriefDescription": "Average Parallel L2 cache miss demand Loads", |
1686 | | - "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=0x1@", |
| 1686 | + "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=0x1@", |
1687 | 1687 | "MetricGroup": "Memory_BW;Offcore;Metric", |
1688 | 1688 | "MetricName": "tma_info_memory_latency_load_l2_mlp" |
1689 | 1689 | }, |
|
0 commit comments