Skip to content

Commit 498d348

Browse files
sandip4nnamhyung
authored andcommitted
perf vendor events amd: Fix Zen 4 cache latency events
L3PMCx0AC and L3PMCx0AD, used in l3_xi_sampled_latency* events, have a quirk that requires them to be programmed with SliceId set to 0x3. Without this, the events do not count at all and affects dependent metrics such as l3_read_miss_latency. If ThreadMask is not specified, the amd-uncore driver internally sets ThreadMask to 0x3, EnAllCores to 0x1 and EnAllSlices to 0x1 but does not set SliceId. Since SliceId must also be set to 0x3 in this case, specify all the other fields explicitly. E.g. $ sudo perf stat -e l3_xi_sampled_latency.all,l3_xi_sampled_latency_requests.all -a sleep 1 Before: Performance counter stats for 'system wide': 0 l3_xi_sampled_latency.all 0 l3_xi_sampled_latency_requests.all 1.005155399 seconds time elapsed After: Performance counter stats for 'system wide': 921,446 l3_xi_sampled_latency.all 54,210 l3_xi_sampled_latency_requests.all 1.005664472 seconds time elapsed Fixes: 5b2ca34 ("perf vendor events amd: Add Zen 4 uncore events") Signed-off-by: Sandipan Das <[email protected]> Reviewed-by: Ian Rogers <[email protected]> Cc: [email protected] Cc: [email protected] Cc: [email protected] Signed-off-by: Namhyung Kim <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 507ad2b commit 498d348

File tree

2 files changed

+60
-0
lines changed

2 files changed

+60
-0
lines changed

tools/perf/pmu-events/arch/x86/amdzen4/cache.json

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -676,97 +676,153 @@
676676
"EventCode": "0xac",
677677
"BriefDescription": "Average sampled latency when data is sourced from DRAM in the same NUMA node.",
678678
"UMask": "0x01",
679+
"EnAllCores": "0x1",
680+
"EnAllSlices": "0x1",
681+
"SliceId": "0x3",
682+
"ThreadMask": "0x3",
679683
"Unit": "L3PMC"
680684
},
681685
{
682686
"EventName": "l3_xi_sampled_latency.dram_far",
683687
"EventCode": "0xac",
684688
"BriefDescription": "Average sampled latency when data is sourced from DRAM in a different NUMA node.",
685689
"UMask": "0x02",
690+
"EnAllCores": "0x1",
691+
"EnAllSlices": "0x1",
692+
"SliceId": "0x3",
693+
"ThreadMask": "0x3",
686694
"Unit": "L3PMC"
687695
},
688696
{
689697
"EventName": "l3_xi_sampled_latency.near_cache",
690698
"EventCode": "0xac",
691699
"BriefDescription": "Average sampled latency when data is sourced from another CCX's cache when the address was in the same NUMA node.",
692700
"UMask": "0x04",
701+
"EnAllCores": "0x1",
702+
"EnAllSlices": "0x1",
703+
"SliceId": "0x3",
704+
"ThreadMask": "0x3",
693705
"Unit": "L3PMC"
694706
},
695707
{
696708
"EventName": "l3_xi_sampled_latency.far_cache",
697709
"EventCode": "0xac",
698710
"BriefDescription": "Average sampled latency when data is sourced from another CCX's cache when the address was in a different NUMA node.",
699711
"UMask": "0x08",
712+
"EnAllCores": "0x1",
713+
"EnAllSlices": "0x1",
714+
"SliceId": "0x3",
715+
"ThreadMask": "0x3",
700716
"Unit": "L3PMC"
701717
},
702718
{
703719
"EventName": "l3_xi_sampled_latency.ext_near",
704720
"EventCode": "0xac",
705721
"BriefDescription": "Average sampled latency when data is sourced from extension memory (CXL) in the same NUMA node.",
706722
"UMask": "0x10",
723+
"EnAllCores": "0x1",
724+
"EnAllSlices": "0x1",
725+
"SliceId": "0x3",
726+
"ThreadMask": "0x3",
707727
"Unit": "L3PMC"
708728
},
709729
{
710730
"EventName": "l3_xi_sampled_latency.ext_far",
711731
"EventCode": "0xac",
712732
"BriefDescription": "Average sampled latency when data is sourced from extension memory (CXL) in a different NUMA node.",
713733
"UMask": "0x20",
734+
"EnAllCores": "0x1",
735+
"EnAllSlices": "0x1",
736+
"SliceId": "0x3",
737+
"ThreadMask": "0x3",
714738
"Unit": "L3PMC"
715739
},
716740
{
717741
"EventName": "l3_xi_sampled_latency.all",
718742
"EventCode": "0xac",
719743
"BriefDescription": "Average sampled latency from all data sources.",
720744
"UMask": "0x3f",
745+
"EnAllCores": "0x1",
746+
"EnAllSlices": "0x1",
747+
"SliceId": "0x3",
748+
"ThreadMask": "0x3",
721749
"Unit": "L3PMC"
722750
},
723751
{
724752
"EventName": "l3_xi_sampled_latency_requests.dram_near",
725753
"EventCode": "0xad",
726754
"BriefDescription": "L3 cache fill requests sourced from DRAM in the same NUMA node.",
727755
"UMask": "0x01",
756+
"EnAllCores": "0x1",
757+
"EnAllSlices": "0x1",
758+
"SliceId": "0x3",
759+
"ThreadMask": "0x3",
728760
"Unit": "L3PMC"
729761
},
730762
{
731763
"EventName": "l3_xi_sampled_latency_requests.dram_far",
732764
"EventCode": "0xad",
733765
"BriefDescription": "L3 cache fill requests sourced from DRAM in a different NUMA node.",
734766
"UMask": "0x02",
767+
"EnAllCores": "0x1",
768+
"EnAllSlices": "0x1",
769+
"SliceId": "0x3",
770+
"ThreadMask": "0x3",
735771
"Unit": "L3PMC"
736772
},
737773
{
738774
"EventName": "l3_xi_sampled_latency_requests.near_cache",
739775
"EventCode": "0xad",
740776
"BriefDescription": "L3 cache fill requests sourced from another CCX's cache when the address was in the same NUMA node.",
741777
"UMask": "0x04",
778+
"EnAllCores": "0x1",
779+
"EnAllSlices": "0x1",
780+
"SliceId": "0x3",
781+
"ThreadMask": "0x3",
742782
"Unit": "L3PMC"
743783
},
744784
{
745785
"EventName": "l3_xi_sampled_latency_requests.far_cache",
746786
"EventCode": "0xad",
747787
"BriefDescription": "L3 cache fill requests sourced from another CCX's cache when the address was in a different NUMA node.",
748788
"UMask": "0x08",
789+
"EnAllCores": "0x1",
790+
"EnAllSlices": "0x1",
791+
"SliceId": "0x3",
792+
"ThreadMask": "0x3",
749793
"Unit": "L3PMC"
750794
},
751795
{
752796
"EventName": "l3_xi_sampled_latency_requests.ext_near",
753797
"EventCode": "0xad",
754798
"BriefDescription": "L3 cache fill requests sourced from extension memory (CXL) in the same NUMA node.",
755799
"UMask": "0x10",
800+
"EnAllCores": "0x1",
801+
"EnAllSlices": "0x1",
802+
"SliceId": "0x3",
803+
"ThreadMask": "0x3",
756804
"Unit": "L3PMC"
757805
},
758806
{
759807
"EventName": "l3_xi_sampled_latency_requests.ext_far",
760808
"EventCode": "0xad",
761809
"BriefDescription": "L3 cache fill requests sourced from extension memory (CXL) in a different NUMA node.",
762810
"UMask": "0x20",
811+
"EnAllCores": "0x1",
812+
"EnAllSlices": "0x1",
813+
"SliceId": "0x3",
814+
"ThreadMask": "0x3",
763815
"Unit": "L3PMC"
764816
},
765817
{
766818
"EventName": "l3_xi_sampled_latency_requests.all",
767819
"EventCode": "0xad",
768820
"BriefDescription": "L3 cache fill requests sourced from all data sources.",
769821
"UMask": "0x3f",
822+
"EnAllCores": "0x1",
823+
"EnAllSlices": "0x1",
824+
"SliceId": "0x3",
825+
"ThreadMask": "0x3",
770826
"Unit": "L3PMC"
771827
}
772828
]

tools/perf/pmu-events/jevents.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,10 @@ def canonicalize_value(val: str) -> str:
373373
('UMask', 'umask='),
374374
('NodeType', 'type='),
375375
('RdWrMask', 'rdwrmask='),
376+
('EnAllCores', 'enallcores='),
377+
('EnAllSlices', 'enallslices='),
378+
('SliceId', 'sliceid='),
379+
('ThreadMask', 'threadmask='),
376380
]
377381
for key, value in event_fields:
378382
if key in jd and not is_zero(jd[key]):

0 commit comments

Comments
 (0)