8
8
9
9
#include "../perf_event.h"
10
10
11
- static const enum {
12
- LBR_EIP_FLAGS = 1 ,
13
- LBR_TSX = 2 ,
14
- } lbr_desc [LBR_FORMAT_MAX_KNOWN + 1 ] = {
15
- [LBR_FORMAT_EIP_FLAGS ] = LBR_EIP_FLAGS ,
16
- [LBR_FORMAT_EIP_FLAGS2 ] = LBR_EIP_FLAGS | LBR_TSX ,
17
- };
18
-
19
11
/*
20
12
* Intel LBR_SELECT bits
21
13
* Intel Vol3a, April 2011, Section 16.7 Table 16-10
@@ -243,7 +235,7 @@ void intel_pmu_lbr_reset_64(void)
243
235
for (i = 0 ; i < x86_pmu .lbr_nr ; i ++ ) {
244
236
wrmsrl (x86_pmu .lbr_from + i , 0 );
245
237
wrmsrl (x86_pmu .lbr_to + i , 0 );
246
- if (x86_pmu .intel_cap . lbr_format == LBR_FORMAT_INFO )
238
+ if (x86_pmu .lbr_has_info )
247
239
wrmsrl (x86_pmu .lbr_info + i , 0 );
248
240
}
249
241
}
@@ -305,11 +297,10 @@ enum {
305
297
*/
306
298
static inline bool lbr_from_signext_quirk_needed (void )
307
299
{
308
- int lbr_format = x86_pmu .intel_cap .lbr_format ;
309
300
bool tsx_support = boot_cpu_has (X86_FEATURE_HLE ) ||
310
301
boot_cpu_has (X86_FEATURE_RTM );
311
302
312
- return !tsx_support && ( lbr_desc [ lbr_format ] & LBR_TSX ) ;
303
+ return !tsx_support && x86_pmu . lbr_has_tsx ;
313
304
}
314
305
315
306
static DEFINE_STATIC_KEY_FALSE (lbr_from_quirk_key );
@@ -427,12 +418,12 @@ rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
427
418
428
419
void intel_pmu_lbr_restore (void * ctx )
429
420
{
430
- bool need_info = x86_pmu .intel_cap .lbr_format == LBR_FORMAT_INFO ;
431
421
struct cpu_hw_events * cpuc = this_cpu_ptr (& cpu_hw_events );
432
422
struct x86_perf_task_context * task_ctx = ctx ;
433
- int i ;
434
- unsigned lbr_idx , mask ;
423
+ bool need_info = x86_pmu .lbr_has_info ;
435
424
u64 tos = task_ctx -> tos ;
425
+ unsigned lbr_idx , mask ;
426
+ int i ;
436
427
437
428
mask = x86_pmu .lbr_nr - 1 ;
438
429
for (i = 0 ; i < task_ctx -> valid_lbrs ; i ++ ) {
@@ -444,7 +435,7 @@ void intel_pmu_lbr_restore(void *ctx)
444
435
lbr_idx = (tos - i ) & mask ;
445
436
wrlbr_from (lbr_idx , 0 );
446
437
wrlbr_to (lbr_idx , 0 );
447
- if (x86_pmu . intel_cap . lbr_format == LBR_FORMAT_INFO )
438
+ if (need_info )
448
439
wrlbr_info (lbr_idx , 0 );
449
440
}
450
441
@@ -519,9 +510,9 @@ static void __intel_pmu_lbr_restore(void *ctx)
519
510
520
511
void intel_pmu_lbr_save (void * ctx )
521
512
{
522
- bool need_info = x86_pmu .intel_cap .lbr_format == LBR_FORMAT_INFO ;
523
513
struct cpu_hw_events * cpuc = this_cpu_ptr (& cpu_hw_events );
524
514
struct x86_perf_task_context * task_ctx = ctx ;
515
+ bool need_info = x86_pmu .lbr_has_info ;
525
516
unsigned lbr_idx , mask ;
526
517
u64 tos ;
527
518
int i ;
@@ -816,7 +807,6 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
816
807
{
817
808
bool need_info = false, call_stack = false;
818
809
unsigned long mask = x86_pmu .lbr_nr - 1 ;
819
- int lbr_format = x86_pmu .intel_cap .lbr_format ;
820
810
u64 tos = intel_pmu_lbr_tos ();
821
811
int i ;
822
812
int out = 0 ;
@@ -831,9 +821,7 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
831
821
for (i = 0 ; i < num ; i ++ ) {
832
822
unsigned long lbr_idx = (tos - i ) & mask ;
833
823
u64 from , to , mis = 0 , pred = 0 , in_tx = 0 , abort = 0 ;
834
- int skip = 0 ;
835
824
u16 cycles = 0 ;
836
- int lbr_flags = lbr_desc [lbr_format ];
837
825
838
826
from = rdlbr_from (lbr_idx , NULL );
839
827
to = rdlbr_to (lbr_idx , NULL );
@@ -845,37 +833,39 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
845
833
if (call_stack && !from )
846
834
break ;
847
835
848
- if (lbr_format == LBR_FORMAT_INFO && need_info ) {
849
- u64 info ;
850
-
851
- info = rdlbr_info (lbr_idx , NULL );
852
- mis = !!(info & LBR_INFO_MISPRED );
853
- pred = !mis ;
854
- in_tx = !!(info & LBR_INFO_IN_TX );
855
- abort = !!(info & LBR_INFO_ABORT );
856
- cycles = (info & LBR_INFO_CYCLES );
857
- }
858
-
859
- if (lbr_format == LBR_FORMAT_TIME ) {
860
- mis = !!(from & LBR_FROM_FLAG_MISPRED );
861
- pred = !mis ;
862
- skip = 1 ;
863
- cycles = ((to >> 48 ) & LBR_INFO_CYCLES );
864
-
865
- to = (u64 )((((s64 )to ) << 16 ) >> 16 );
866
- }
867
-
868
- if (lbr_flags & LBR_EIP_FLAGS ) {
869
- mis = !!(from & LBR_FROM_FLAG_MISPRED );
870
- pred = !mis ;
871
- skip = 1 ;
872
- }
873
- if (lbr_flags & LBR_TSX ) {
874
- in_tx = !!(from & LBR_FROM_FLAG_IN_TX );
875
- abort = !!(from & LBR_FROM_FLAG_ABORT );
876
- skip = 3 ;
836
+ if (x86_pmu .lbr_has_info ) {
837
+ if (need_info ) {
838
+ u64 info ;
839
+
840
+ info = rdlbr_info (lbr_idx , NULL );
841
+ mis = !!(info & LBR_INFO_MISPRED );
842
+ pred = !mis ;
843
+ cycles = (info & LBR_INFO_CYCLES );
844
+ if (x86_pmu .lbr_has_tsx ) {
845
+ in_tx = !!(info & LBR_INFO_IN_TX );
846
+ abort = !!(info & LBR_INFO_ABORT );
847
+ }
848
+ }
849
+ } else {
850
+ int skip = 0 ;
851
+
852
+ if (x86_pmu .lbr_from_flags ) {
853
+ mis = !!(from & LBR_FROM_FLAG_MISPRED );
854
+ pred = !mis ;
855
+ skip = 1 ;
856
+ }
857
+ if (x86_pmu .lbr_has_tsx ) {
858
+ in_tx = !!(from & LBR_FROM_FLAG_IN_TX );
859
+ abort = !!(from & LBR_FROM_FLAG_ABORT );
860
+ skip = 3 ;
861
+ }
862
+ from = (u64 )((((s64 )from ) << skip ) >> skip );
863
+
864
+ if (x86_pmu .lbr_to_cycles ) {
865
+ cycles = ((to >> 48 ) & LBR_INFO_CYCLES );
866
+ to = (u64 )((((s64 )to ) << 16 ) >> 16 );
867
+ }
877
868
}
878
- from = (u64 )((((s64 )from ) << skip ) >> skip );
879
869
880
870
/*
881
871
* Some CPUs report duplicated abort records,
@@ -903,37 +893,40 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
903
893
cpuc -> lbr_stack .hw_idx = tos ;
904
894
}
905
895
896
+ static DEFINE_STATIC_KEY_FALSE (x86_lbr_mispred );
897
+ static DEFINE_STATIC_KEY_FALSE (x86_lbr_cycles );
898
+ static DEFINE_STATIC_KEY_FALSE (x86_lbr_type );
899
+
906
900
static __always_inline int get_lbr_br_type (u64 info )
907
901
{
908
- if (!static_cpu_has (X86_FEATURE_ARCH_LBR ) || !x86_pmu .lbr_br_type )
909
- return 0 ;
902
+ int type = 0 ;
910
903
911
- return (info & LBR_INFO_BR_TYPE ) >> LBR_INFO_BR_TYPE_OFFSET ;
904
+ if (static_branch_likely (& x86_lbr_type ))
905
+ type = (info & LBR_INFO_BR_TYPE ) >> LBR_INFO_BR_TYPE_OFFSET ;
906
+
907
+ return type ;
912
908
}
913
909
914
910
static __always_inline bool get_lbr_mispred (u64 info )
915
911
{
916
- if (static_cpu_has (X86_FEATURE_ARCH_LBR ) && !x86_pmu .lbr_mispred )
917
- return 0 ;
912
+ bool mispred = 0 ;
918
913
919
- return !!( info & LBR_INFO_MISPRED );
920
- }
914
+ if ( static_branch_likely ( & x86_lbr_mispred ))
915
+ mispred = !!( info & LBR_INFO_MISPRED );
921
916
922
- static __always_inline bool get_lbr_predicted (u64 info )
923
- {
924
- if (static_cpu_has (X86_FEATURE_ARCH_LBR ) && !x86_pmu .lbr_mispred )
925
- return 0 ;
926
-
927
- return !(info & LBR_INFO_MISPRED );
917
+ return mispred ;
928
918
}
929
919
930
920
static __always_inline u16 get_lbr_cycles (u64 info )
931
921
{
922
+ u16 cycles = info & LBR_INFO_CYCLES ;
923
+
932
924
if (static_cpu_has (X86_FEATURE_ARCH_LBR ) &&
933
- !(x86_pmu .lbr_timed_lbr && info & LBR_INFO_CYC_CNT_VALID ))
934
- return 0 ;
925
+ (!static_branch_likely (& x86_lbr_cycles ) ||
926
+ !(info & LBR_INFO_CYC_CNT_VALID )))
927
+ cycles = 0 ;
935
928
936
- return info & LBR_INFO_CYCLES ;
929
+ return cycles ;
937
930
}
938
931
939
932
static void intel_pmu_store_lbr (struct cpu_hw_events * cpuc ,
@@ -961,7 +954,7 @@ static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
961
954
e -> from = from ;
962
955
e -> to = to ;
963
956
e -> mispred = get_lbr_mispred (info );
964
- e -> predicted = get_lbr_predicted ( info ) ;
957
+ e -> predicted = ! e -> mispred ;
965
958
e -> in_tx = !!(info & LBR_INFO_IN_TX );
966
959
e -> abort = !!(info & LBR_INFO_ABORT );
967
960
e -> cycles = get_lbr_cycles (info );
@@ -1120,7 +1113,7 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
1120
1113
1121
1114
if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES ) &&
1122
1115
(br_type & PERF_SAMPLE_BRANCH_NO_FLAGS ) &&
1123
- ( x86_pmu .intel_cap . lbr_format == LBR_FORMAT_INFO ) )
1116
+ x86_pmu .lbr_has_info )
1124
1117
reg -> config |= LBR_NO_INFO ;
1125
1118
1126
1119
return 0 ;
@@ -1706,6 +1699,38 @@ void intel_pmu_lbr_init_knl(void)
1706
1699
x86_pmu .intel_cap .lbr_format = LBR_FORMAT_EIP_FLAGS ;
1707
1700
}
1708
1701
1702
+ void intel_pmu_lbr_init (void )
1703
+ {
1704
+ switch (x86_pmu .intel_cap .lbr_format ) {
1705
+ case LBR_FORMAT_EIP_FLAGS2 :
1706
+ x86_pmu .lbr_has_tsx = 1 ;
1707
+ fallthrough ;
1708
+ case LBR_FORMAT_EIP_FLAGS :
1709
+ x86_pmu .lbr_from_flags = 1 ;
1710
+ break ;
1711
+
1712
+ case LBR_FORMAT_INFO :
1713
+ x86_pmu .lbr_has_tsx = 1 ;
1714
+ fallthrough ;
1715
+ case LBR_FORMAT_INFO2 :
1716
+ x86_pmu .lbr_has_info = 1 ;
1717
+ break ;
1718
+
1719
+ case LBR_FORMAT_TIME :
1720
+ x86_pmu .lbr_from_flags = 1 ;
1721
+ x86_pmu .lbr_to_cycles = 1 ;
1722
+ break ;
1723
+ }
1724
+
1725
+ if (x86_pmu .lbr_has_info ) {
1726
+ /*
1727
+ * Only used in combination with baseline pebs.
1728
+ */
1729
+ static_branch_enable (& x86_lbr_mispred );
1730
+ static_branch_enable (& x86_lbr_cycles );
1731
+ }
1732
+ }
1733
+
1709
1734
/*
1710
1735
* LBR state size is variable based on the max number of registers.
1711
1736
* This calculates the expected state size, which should match
@@ -1726,6 +1751,9 @@ static bool is_arch_lbr_xsave_available(void)
1726
1751
* Check the LBR state with the corresponding software structure.
1727
1752
* Disable LBR XSAVES support if the size doesn't match.
1728
1753
*/
1754
+ if (xfeature_size (XFEATURE_LBR ) == 0 )
1755
+ return false;
1756
+
1729
1757
if (WARN_ON (xfeature_size (XFEATURE_LBR ) != get_lbr_state_size ()))
1730
1758
return false;
1731
1759
@@ -1765,6 +1793,12 @@ void __init intel_pmu_arch_lbr_init(void)
1765
1793
x86_pmu .lbr_br_type = ecx .split .lbr_br_type ;
1766
1794
x86_pmu .lbr_nr = lbr_nr ;
1767
1795
1796
+ if (x86_pmu .lbr_mispred )
1797
+ static_branch_enable (& x86_lbr_mispred );
1798
+ if (x86_pmu .lbr_timed_lbr )
1799
+ static_branch_enable (& x86_lbr_cycles );
1800
+ if (x86_pmu .lbr_br_type )
1801
+ static_branch_enable (& x86_lbr_type );
1768
1802
1769
1803
arch_lbr_xsave = is_arch_lbr_xsave_available ();
1770
1804
if (arch_lbr_xsave ) {
0 commit comments