Skip to content

Commit 49d8184

Browse files
Kan LiangPeter Zijlstra
authored andcommitted
perf/x86/intel/lbr: Support LBR_CTL
An IA32_LBR_CTL is introduced for Architecture LBR to enable and config LBR registers to replace the previous LBR_SELECT. All the related members in struct cpu_hw_events and struct x86_pmu have to be renamed. Some new macros are added to reflect the layout of LBR_CTL. The mapping from PERF_SAMPLE_BRANCH_* to the corresponding bits in LBR_CTL MSR is saved in lbr_ctl_map now, which is not a const value. The value relies on the CPUID enumeration. For the previous model-specific LBR, most of the bits in LBR_SELECT operate in the suppressed mode. For the bits in LBR_CTL, the polarity is inverted. For the previous model-specific LBR format 5 (LBR_FORMAT_INFO), if the NO_CYCLES and NO_FLAGS type are set, the flag LBR_NO_INFO will be set to avoid the unnecessary LBR_INFO MSR read. Although Architecture LBR also has a dedicated LBR_INFO MSR, perf doesn't need to check and set the flag LBR_NO_INFO. For Architecture LBR, XSAVES instruction will be used as the default way to read the LBR MSRs all together. The overhead which the flag tries to avoid doesn't exist anymore. Dropping the flag can save the extra check for the flag in the lbr_read() later, and make the code cleaner. Signed-off-by: Kan Liang <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent af6cf12 commit 49d8184

File tree

2 files changed

+55
-3
lines changed

2 files changed

+55
-3
lines changed

arch/x86/events/intel/lbr.c

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,44 @@ enum {
132132
X86_BR_IRQ |\
133133
X86_BR_INT)
134134

135+
/*
136+
* Intel LBR_CTL bits
137+
*
138+
* Hardware branch filter for Arch LBR
139+
*/
140+
#define ARCH_LBR_KERNEL_BIT 1 /* capture at ring0 */
141+
#define ARCH_LBR_USER_BIT 2 /* capture at ring > 0 */
142+
#define ARCH_LBR_CALL_STACK_BIT 3 /* enable call stack */
143+
#define ARCH_LBR_JCC_BIT 16 /* capture conditional branches */
144+
#define ARCH_LBR_REL_JMP_BIT 17 /* capture relative jumps */
145+
#define ARCH_LBR_IND_JMP_BIT 18 /* capture indirect jumps */
146+
#define ARCH_LBR_REL_CALL_BIT 19 /* capture relative calls */
147+
#define ARCH_LBR_IND_CALL_BIT 20 /* capture indirect calls */
148+
#define ARCH_LBR_RETURN_BIT 21 /* capture near returns */
149+
#define ARCH_LBR_OTHER_BRANCH_BIT 22 /* capture other branches */
150+
151+
#define ARCH_LBR_KERNEL (1ULL << ARCH_LBR_KERNEL_BIT)
152+
#define ARCH_LBR_USER (1ULL << ARCH_LBR_USER_BIT)
153+
#define ARCH_LBR_CALL_STACK (1ULL << ARCH_LBR_CALL_STACK_BIT)
154+
#define ARCH_LBR_JCC (1ULL << ARCH_LBR_JCC_BIT)
155+
#define ARCH_LBR_REL_JMP (1ULL << ARCH_LBR_REL_JMP_BIT)
156+
#define ARCH_LBR_IND_JMP (1ULL << ARCH_LBR_IND_JMP_BIT)
157+
#define ARCH_LBR_REL_CALL (1ULL << ARCH_LBR_REL_CALL_BIT)
158+
#define ARCH_LBR_IND_CALL (1ULL << ARCH_LBR_IND_CALL_BIT)
159+
#define ARCH_LBR_RETURN (1ULL << ARCH_LBR_RETURN_BIT)
160+
#define ARCH_LBR_OTHER_BRANCH (1ULL << ARCH_LBR_OTHER_BRANCH_BIT)
161+
162+
#define ARCH_LBR_ANY \
163+
(ARCH_LBR_JCC |\
164+
ARCH_LBR_REL_JMP |\
165+
ARCH_LBR_IND_JMP |\
166+
ARCH_LBR_REL_CALL |\
167+
ARCH_LBR_IND_CALL |\
168+
ARCH_LBR_RETURN |\
169+
ARCH_LBR_OTHER_BRANCH)
170+
171+
#define ARCH_LBR_CTL_MASK 0x7f000e
172+
135173
static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
136174

137175
/*
@@ -820,6 +858,11 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
820858
reg = &event->hw.branch_reg;
821859
reg->idx = EXTRA_REG_LBR;
822860

861+
if (static_cpu_has(X86_FEATURE_ARCH_LBR)) {
862+
reg->config = mask;
863+
return 0;
864+
}
865+
823866
/*
824867
* The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate
825868
* in suppress mode. So LBR_SELECT should be set to

arch/x86/events/perf_event.h

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,10 @@ struct cpu_hw_events {
245245
int lbr_pebs_users;
246246
struct perf_branch_stack lbr_stack;
247247
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
248-
struct er_account *lbr_sel;
248+
union {
249+
struct er_account *lbr_sel;
250+
struct er_account *lbr_ctl;
251+
};
249252
u64 br_sel;
250253
void *last_task_ctx;
251254
int last_log_id;
@@ -688,8 +691,14 @@ struct x86_pmu {
688691
*/
689692
unsigned int lbr_tos, lbr_from, lbr_to,
690693
lbr_nr; /* LBR base regs and size */
691-
u64 lbr_sel_mask; /* LBR_SELECT valid bits */
692-
const int *lbr_sel_map; /* lbr_select mappings */
694+
union {
695+
u64 lbr_sel_mask; /* LBR_SELECT valid bits */
696+
u64 lbr_ctl_mask; /* LBR_CTL valid bits */
697+
};
698+
union {
699+
const int *lbr_sel_map; /* lbr_select mappings */
700+
int *lbr_ctl_map; /* LBR_CTL mappings */
701+
};
693702
bool lbr_double_abort; /* duplicated lbr aborts */
694703
bool lbr_pt_coexist; /* (LBR|BTS) may coexist with PT */
695704

0 commit comments

Comments
 (0)