Skip to content

Commit 631618a

Browse files
Kan LiangPeter Zijlstra
authored andcommitted
perf/x86/intel/lbr: Factor out intel_pmu_store_lbr
The way to store the LBR information from a PEBS LBR record can be reused in Architecture LBR, because - The LBR information is stored like a stack. Entry 0 is always the youngest branch. - The layout of the LBR INFO MSR is similar. The LBR information may be retrieved from either the LBR registers (non-PEBS event) or a buffer (PEBS event). Extend rdlbr_*() to support both methods. Explicitly check the invalid entry (0s), which can avoid unnecessary MSR access if using a non-PEBS event. For a PEBS event, the check should slightly improve the performance as well. The invalid entries are cut. The intel_pmu_lbr_filter() doesn't need to check and filter them out. Cannot share the function with current model-specific LBR read, because the direction of the LBR growth is opposite. Signed-off-by: Kan Liang <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent fda1f99 commit 631618a

File tree

1 file changed

+56
-26
lines changed
  • arch/x86/events/intel

1 file changed

+56
-26
lines changed

arch/x86/events/intel/lbr.c

Lines changed: 56 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -348,28 +348,37 @@ static __always_inline void wrlbr_info(unsigned int idx, u64 val)
348348
wrmsrl(x86_pmu.lbr_info + idx, val);
349349
}
350350

351-
static __always_inline u64 rdlbr_from(unsigned int idx)
351+
static __always_inline u64 rdlbr_from(unsigned int idx, struct lbr_entry *lbr)
352352
{
353353
u64 val;
354354

355+
if (lbr)
356+
return lbr->from;
357+
355358
rdmsrl(x86_pmu.lbr_from + idx, val);
356359

357360
return lbr_from_signext_quirk_rd(val);
358361
}
359362

360-
static __always_inline u64 rdlbr_to(unsigned int idx)
363+
static __always_inline u64 rdlbr_to(unsigned int idx, struct lbr_entry *lbr)
361364
{
362365
u64 val;
363366

367+
if (lbr)
368+
return lbr->to;
369+
364370
rdmsrl(x86_pmu.lbr_to + idx, val);
365371

366372
return val;
367373
}
368374

369-
static __always_inline u64 rdlbr_info(unsigned int idx)
375+
static __always_inline u64 rdlbr_info(unsigned int idx, struct lbr_entry *lbr)
370376
{
371377
u64 val;
372378

379+
if (lbr)
380+
return lbr->info;
381+
373382
rdmsrl(x86_pmu.lbr_info + idx, val);
374383

375384
return val;
@@ -387,16 +396,16 @@ wrlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
387396
static inline bool
388397
rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
389398
{
390-
u64 from = rdlbr_from(idx);
399+
u64 from = rdlbr_from(idx, NULL);
391400

392401
/* Don't read invalid entry */
393402
if (!from)
394403
return false;
395404

396405
lbr->from = from;
397-
lbr->to = rdlbr_to(idx);
406+
lbr->to = rdlbr_to(idx, NULL);
398407
if (need_info)
399-
lbr->info = rdlbr_info(idx);
408+
lbr->info = rdlbr_info(idx, NULL);
400409

401410
return true;
402411
}
@@ -432,7 +441,7 @@ void intel_pmu_lbr_restore(void *ctx)
432441

433442
static __always_inline bool lbr_is_reset_in_cstate(void *ctx)
434443
{
435-
return !rdlbr_from(((struct x86_perf_task_context *)ctx)->tos);
444+
return !rdlbr_from(((struct x86_perf_task_context *)ctx)->tos, NULL);
436445
}
437446

438447
static void __intel_pmu_lbr_restore(void *ctx)
@@ -709,8 +718,8 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
709718
u16 cycles = 0;
710719
int lbr_flags = lbr_desc[lbr_format];
711720

712-
from = rdlbr_from(lbr_idx);
713-
to = rdlbr_to(lbr_idx);
721+
from = rdlbr_from(lbr_idx, NULL);
722+
to = rdlbr_to(lbr_idx, NULL);
714723

715724
/*
716725
* Read LBR call stack entries
@@ -722,7 +731,7 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
722731
if (lbr_format == LBR_FORMAT_INFO && need_info) {
723732
u64 info;
724733

725-
info = rdlbr_info(lbr_idx);
734+
info = rdlbr_info(lbr_idx, NULL);
726735
mis = !!(info & LBR_INFO_MISPRED);
727736
pred = !mis;
728737
in_tx = !!(info & LBR_INFO_IN_TX);
@@ -777,6 +786,42 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
777786
cpuc->lbr_stack.hw_idx = tos;
778787
}
779788

789+
static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
790+
struct lbr_entry *entries)
791+
{
792+
struct perf_branch_entry *e;
793+
struct lbr_entry *lbr;
794+
u64 from, to, info;
795+
int i;
796+
797+
for (i = 0; i < x86_pmu.lbr_nr; i++) {
798+
lbr = entries ? &entries[i] : NULL;
799+
e = &cpuc->lbr_entries[i];
800+
801+
from = rdlbr_from(i, lbr);
802+
/*
803+
* Read LBR entries until invalid entry (0s) is detected.
804+
*/
805+
if (!from)
806+
break;
807+
808+
to = rdlbr_to(i, lbr);
809+
info = rdlbr_info(i, lbr);
810+
811+
e->from = from;
812+
e->to = to;
813+
e->mispred = !!(info & LBR_INFO_MISPRED);
814+
e->predicted = !(info & LBR_INFO_MISPRED);
815+
e->in_tx = !!(info & LBR_INFO_IN_TX);
816+
e->abort = !!(info & LBR_INFO_ABORT);
817+
e->cycles = info & LBR_INFO_CYCLES;
818+
e->type = 0;
819+
e->reserved = 0;
820+
}
821+
822+
cpuc->lbr_stack.nr = i;
823+
}
824+
780825
void intel_pmu_lbr_read(void)
781826
{
782827
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -1215,29 +1260,14 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
12151260
void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr)
12161261
{
12171262
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1218-
int i;
1219-
1220-
cpuc->lbr_stack.nr = x86_pmu.lbr_nr;
12211263

12221264
/* Cannot get TOS for large PEBS */
12231265
if (cpuc->n_pebs == cpuc->n_large_pebs)
12241266
cpuc->lbr_stack.hw_idx = -1ULL;
12251267
else
12261268
cpuc->lbr_stack.hw_idx = intel_pmu_lbr_tos();
12271269

1228-
for (i = 0; i < x86_pmu.lbr_nr; i++) {
1229-
u64 info = lbr[i].info;
1230-
struct perf_branch_entry *e = &cpuc->lbr_entries[i];
1231-
1232-
e->from = lbr[i].from;
1233-
e->to = lbr[i].to;
1234-
e->mispred = !!(info & LBR_INFO_MISPRED);
1235-
e->predicted = !(info & LBR_INFO_MISPRED);
1236-
e->in_tx = !!(info & LBR_INFO_IN_TX);
1237-
e->abort = !!(info & LBR_INFO_ABORT);
1238-
e->cycles = info & LBR_INFO_CYCLES;
1239-
e->reserved = 0;
1240-
}
1270+
intel_pmu_store_lbr(cpuc, lbr);
12411271
intel_pmu_lbr_filter(cpuc);
12421272
}
12431273

0 commit comments

Comments
 (0)