Skip to content

Commit 0f9e042

Browse files
committed
Merge tag 'perf_urgent_for_v5.17_rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Borislav Petkov: - Add support for accessing the general purpose counters on Alder Lake via MMIO - Add new LBR format v7 support which is v5 modulo TSX - Fix counter enumeration on Alder Lake hybrids - Overhaul how context time updates are done and get rid of perf_event::shadow_ctx_time. - The usual amount of fixes: event mask correction, supported event types reporting, etc. * tag 'perf_urgent_for_v5.17_rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/perf: Avoid warning for Arch LBR without XSAVE perf/x86/intel/uncore: Add IMC uncore support for ADL perf/x86/intel/lbr: Add static_branch for LBR INFO flags perf/x86/intel/lbr: Support LBR format V7 perf/x86/rapl: fix AMD event handling perf/x86/intel/uncore: Fix CAS_COUNT_WRITE issue for ICX perf/x86/intel: Add a quirk for the calculation of the number of counters on Alder Lake perf: Fix perf_event_read_local() time
2 parents e783362 + 8c16dc0 commit 0f9e042

File tree

12 files changed

+501
-189
lines changed

12 files changed

+501
-189
lines changed

arch/x86/events/intel/core.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6236,6 +6236,19 @@ __init int intel_pmu_init(void)
62366236
pmu->num_counters = x86_pmu.num_counters;
62376237
pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
62386238
}
6239+
6240+
/*
6241+
* Quirk: For some Alder Lake machine, when all E-cores are disabled in
6242+
* a BIOS, the leaf 0xA will enumerate all counters of P-cores. However,
6243+
* the X86_FEATURE_HYBRID_CPU is still set. The above codes will
6244+
* mistakenly add extra counters for P-cores. Correct the number of
6245+
* counters here.
6246+
*/
6247+
if ((pmu->num_counters > 8) || (pmu->num_counters_fixed > 4)) {
6248+
pmu->num_counters = x86_pmu.num_counters;
6249+
pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
6250+
}
6251+
62396252
pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
62406253
pmu->unconstrained = (struct event_constraint)
62416254
__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
@@ -6340,6 +6353,8 @@ __init int intel_pmu_init(void)
63406353
}
63416354

63426355
if (x86_pmu.lbr_nr) {
6356+
intel_pmu_lbr_init();
6357+
63436358
pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
63446359

63456360
/* only support branch_stack snapshot for perfmon >= v2 */

arch/x86/events/intel/lbr.c

Lines changed: 101 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,6 @@
88

99
#include "../perf_event.h"
1010

11-
static const enum {
12-
LBR_EIP_FLAGS = 1,
13-
LBR_TSX = 2,
14-
} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
15-
[LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS,
16-
[LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
17-
};
18-
1911
/*
2012
* Intel LBR_SELECT bits
2113
* Intel Vol3a, April 2011, Section 16.7 Table 16-10
@@ -243,7 +235,7 @@ void intel_pmu_lbr_reset_64(void)
243235
for (i = 0; i < x86_pmu.lbr_nr; i++) {
244236
wrmsrl(x86_pmu.lbr_from + i, 0);
245237
wrmsrl(x86_pmu.lbr_to + i, 0);
246-
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
238+
if (x86_pmu.lbr_has_info)
247239
wrmsrl(x86_pmu.lbr_info + i, 0);
248240
}
249241
}
@@ -305,11 +297,10 @@ enum {
305297
*/
306298
static inline bool lbr_from_signext_quirk_needed(void)
307299
{
308-
int lbr_format = x86_pmu.intel_cap.lbr_format;
309300
bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
310301
boot_cpu_has(X86_FEATURE_RTM);
311302

312-
return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX);
303+
return !tsx_support && x86_pmu.lbr_has_tsx;
313304
}
314305

315306
static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
@@ -427,12 +418,12 @@ rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
427418

428419
void intel_pmu_lbr_restore(void *ctx)
429420
{
430-
bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO;
431421
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
432422
struct x86_perf_task_context *task_ctx = ctx;
433-
int i;
434-
unsigned lbr_idx, mask;
423+
bool need_info = x86_pmu.lbr_has_info;
435424
u64 tos = task_ctx->tos;
425+
unsigned lbr_idx, mask;
426+
int i;
436427

437428
mask = x86_pmu.lbr_nr - 1;
438429
for (i = 0; i < task_ctx->valid_lbrs; i++) {
@@ -444,7 +435,7 @@ void intel_pmu_lbr_restore(void *ctx)
444435
lbr_idx = (tos - i) & mask;
445436
wrlbr_from(lbr_idx, 0);
446437
wrlbr_to(lbr_idx, 0);
447-
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
438+
if (need_info)
448439
wrlbr_info(lbr_idx, 0);
449440
}
450441

@@ -519,9 +510,9 @@ static void __intel_pmu_lbr_restore(void *ctx)
519510

520511
void intel_pmu_lbr_save(void *ctx)
521512
{
522-
bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO;
523513
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
524514
struct x86_perf_task_context *task_ctx = ctx;
515+
bool need_info = x86_pmu.lbr_has_info;
525516
unsigned lbr_idx, mask;
526517
u64 tos;
527518
int i;
@@ -816,7 +807,6 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
816807
{
817808
bool need_info = false, call_stack = false;
818809
unsigned long mask = x86_pmu.lbr_nr - 1;
819-
int lbr_format = x86_pmu.intel_cap.lbr_format;
820810
u64 tos = intel_pmu_lbr_tos();
821811
int i;
822812
int out = 0;
@@ -831,9 +821,7 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
831821
for (i = 0; i < num; i++) {
832822
unsigned long lbr_idx = (tos - i) & mask;
833823
u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
834-
int skip = 0;
835824
u16 cycles = 0;
836-
int lbr_flags = lbr_desc[lbr_format];
837825

838826
from = rdlbr_from(lbr_idx, NULL);
839827
to = rdlbr_to(lbr_idx, NULL);
@@ -845,37 +833,39 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
845833
if (call_stack && !from)
846834
break;
847835

848-
if (lbr_format == LBR_FORMAT_INFO && need_info) {
849-
u64 info;
850-
851-
info = rdlbr_info(lbr_idx, NULL);
852-
mis = !!(info & LBR_INFO_MISPRED);
853-
pred = !mis;
854-
in_tx = !!(info & LBR_INFO_IN_TX);
855-
abort = !!(info & LBR_INFO_ABORT);
856-
cycles = (info & LBR_INFO_CYCLES);
857-
}
858-
859-
if (lbr_format == LBR_FORMAT_TIME) {
860-
mis = !!(from & LBR_FROM_FLAG_MISPRED);
861-
pred = !mis;
862-
skip = 1;
863-
cycles = ((to >> 48) & LBR_INFO_CYCLES);
864-
865-
to = (u64)((((s64)to) << 16) >> 16);
866-
}
867-
868-
if (lbr_flags & LBR_EIP_FLAGS) {
869-
mis = !!(from & LBR_FROM_FLAG_MISPRED);
870-
pred = !mis;
871-
skip = 1;
872-
}
873-
if (lbr_flags & LBR_TSX) {
874-
in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
875-
abort = !!(from & LBR_FROM_FLAG_ABORT);
876-
skip = 3;
836+
if (x86_pmu.lbr_has_info) {
837+
if (need_info) {
838+
u64 info;
839+
840+
info = rdlbr_info(lbr_idx, NULL);
841+
mis = !!(info & LBR_INFO_MISPRED);
842+
pred = !mis;
843+
cycles = (info & LBR_INFO_CYCLES);
844+
if (x86_pmu.lbr_has_tsx) {
845+
in_tx = !!(info & LBR_INFO_IN_TX);
846+
abort = !!(info & LBR_INFO_ABORT);
847+
}
848+
}
849+
} else {
850+
int skip = 0;
851+
852+
if (x86_pmu.lbr_from_flags) {
853+
mis = !!(from & LBR_FROM_FLAG_MISPRED);
854+
pred = !mis;
855+
skip = 1;
856+
}
857+
if (x86_pmu.lbr_has_tsx) {
858+
in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
859+
abort = !!(from & LBR_FROM_FLAG_ABORT);
860+
skip = 3;
861+
}
862+
from = (u64)((((s64)from) << skip) >> skip);
863+
864+
if (x86_pmu.lbr_to_cycles) {
865+
cycles = ((to >> 48) & LBR_INFO_CYCLES);
866+
to = (u64)((((s64)to) << 16) >> 16);
867+
}
877868
}
878-
from = (u64)((((s64)from) << skip) >> skip);
879869

880870
/*
881871
* Some CPUs report duplicated abort records,
@@ -903,37 +893,40 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
903893
cpuc->lbr_stack.hw_idx = tos;
904894
}
905895

896+
static DEFINE_STATIC_KEY_FALSE(x86_lbr_mispred);
897+
static DEFINE_STATIC_KEY_FALSE(x86_lbr_cycles);
898+
static DEFINE_STATIC_KEY_FALSE(x86_lbr_type);
899+
906900
static __always_inline int get_lbr_br_type(u64 info)
907901
{
908-
if (!static_cpu_has(X86_FEATURE_ARCH_LBR) || !x86_pmu.lbr_br_type)
909-
return 0;
902+
int type = 0;
910903

911-
return (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET;
904+
if (static_branch_likely(&x86_lbr_type))
905+
type = (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET;
906+
907+
return type;
912908
}
913909

914910
static __always_inline bool get_lbr_mispred(u64 info)
915911
{
916-
if (static_cpu_has(X86_FEATURE_ARCH_LBR) && !x86_pmu.lbr_mispred)
917-
return 0;
912+
bool mispred = 0;
918913

919-
return !!(info & LBR_INFO_MISPRED);
920-
}
914+
if (static_branch_likely(&x86_lbr_mispred))
915+
mispred = !!(info & LBR_INFO_MISPRED);
921916

922-
static __always_inline bool get_lbr_predicted(u64 info)
923-
{
924-
if (static_cpu_has(X86_FEATURE_ARCH_LBR) && !x86_pmu.lbr_mispred)
925-
return 0;
926-
927-
return !(info & LBR_INFO_MISPRED);
917+
return mispred;
928918
}
929919

930920
static __always_inline u16 get_lbr_cycles(u64 info)
931921
{
922+
u16 cycles = info & LBR_INFO_CYCLES;
923+
932924
if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
933-
!(x86_pmu.lbr_timed_lbr && info & LBR_INFO_CYC_CNT_VALID))
934-
return 0;
925+
(!static_branch_likely(&x86_lbr_cycles) ||
926+
!(info & LBR_INFO_CYC_CNT_VALID)))
927+
cycles = 0;
935928

936-
return info & LBR_INFO_CYCLES;
929+
return cycles;
937930
}
938931

939932
static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
@@ -961,7 +954,7 @@ static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
961954
e->from = from;
962955
e->to = to;
963956
e->mispred = get_lbr_mispred(info);
964-
e->predicted = get_lbr_predicted(info);
957+
e->predicted = !e->mispred;
965958
e->in_tx = !!(info & LBR_INFO_IN_TX);
966959
e->abort = !!(info & LBR_INFO_ABORT);
967960
e->cycles = get_lbr_cycles(info);
@@ -1120,7 +1113,7 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
11201113

11211114
if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
11221115
(br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
1123-
(x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO))
1116+
x86_pmu.lbr_has_info)
11241117
reg->config |= LBR_NO_INFO;
11251118

11261119
return 0;
@@ -1706,6 +1699,38 @@ void intel_pmu_lbr_init_knl(void)
17061699
x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS;
17071700
}
17081701

1702+
void intel_pmu_lbr_init(void)
1703+
{
1704+
switch (x86_pmu.intel_cap.lbr_format) {
1705+
case LBR_FORMAT_EIP_FLAGS2:
1706+
x86_pmu.lbr_has_tsx = 1;
1707+
fallthrough;
1708+
case LBR_FORMAT_EIP_FLAGS:
1709+
x86_pmu.lbr_from_flags = 1;
1710+
break;
1711+
1712+
case LBR_FORMAT_INFO:
1713+
x86_pmu.lbr_has_tsx = 1;
1714+
fallthrough;
1715+
case LBR_FORMAT_INFO2:
1716+
x86_pmu.lbr_has_info = 1;
1717+
break;
1718+
1719+
case LBR_FORMAT_TIME:
1720+
x86_pmu.lbr_from_flags = 1;
1721+
x86_pmu.lbr_to_cycles = 1;
1722+
break;
1723+
}
1724+
1725+
if (x86_pmu.lbr_has_info) {
1726+
/*
1727+
* Only used in combination with baseline pebs.
1728+
*/
1729+
static_branch_enable(&x86_lbr_mispred);
1730+
static_branch_enable(&x86_lbr_cycles);
1731+
}
1732+
}
1733+
17091734
/*
17101735
* LBR state size is variable based on the max number of registers.
17111736
* This calculates the expected state size, which should match
@@ -1726,6 +1751,9 @@ static bool is_arch_lbr_xsave_available(void)
17261751
* Check the LBR state with the corresponding software structure.
17271752
* Disable LBR XSAVES support if the size doesn't match.
17281753
*/
1754+
if (xfeature_size(XFEATURE_LBR) == 0)
1755+
return false;
1756+
17291757
if (WARN_ON(xfeature_size(XFEATURE_LBR) != get_lbr_state_size()))
17301758
return false;
17311759

@@ -1765,6 +1793,12 @@ void __init intel_pmu_arch_lbr_init(void)
17651793
x86_pmu.lbr_br_type = ecx.split.lbr_br_type;
17661794
x86_pmu.lbr_nr = lbr_nr;
17671795

1796+
if (x86_pmu.lbr_mispred)
1797+
static_branch_enable(&x86_lbr_mispred);
1798+
if (x86_pmu.lbr_timed_lbr)
1799+
static_branch_enable(&x86_lbr_cycles);
1800+
if (x86_pmu.lbr_br_type)
1801+
static_branch_enable(&x86_lbr_type);
17681802

17691803
arch_lbr_xsave = is_arch_lbr_xsave_available();
17701804
if (arch_lbr_xsave) {

arch/x86/events/intel/uncore.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1762,7 +1762,7 @@ static const struct intel_uncore_init_fun rkl_uncore_init __initconst = {
17621762

17631763
static const struct intel_uncore_init_fun adl_uncore_init __initconst = {
17641764
.cpu_init = adl_uncore_cpu_init,
1765-
.mmio_init = tgl_uncore_mmio_init,
1765+
.mmio_init = adl_uncore_mmio_init,
17661766
};
17671767

17681768
static const struct intel_uncore_init_fun icx_uncore_init __initconst = {

arch/x86/events/intel/uncore.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -584,10 +584,11 @@ void snb_uncore_cpu_init(void);
584584
void nhm_uncore_cpu_init(void);
585585
void skl_uncore_cpu_init(void);
586586
void icl_uncore_cpu_init(void);
587-
void adl_uncore_cpu_init(void);
588587
void tgl_uncore_cpu_init(void);
588+
void adl_uncore_cpu_init(void);
589589
void tgl_uncore_mmio_init(void);
590590
void tgl_l_uncore_mmio_init(void);
591+
void adl_uncore_mmio_init(void);
591592
int snb_pci2phy_map_init(int devid);
592593

593594
/* uncore_snbep.c */

arch/x86/events/intel/uncore_discovery.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -494,8 +494,8 @@ void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box)
494494
writel(0, box->io_addr);
495495
}
496496

497-
static void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box,
498-
struct perf_event *event)
497+
void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box,
498+
struct perf_event *event)
499499
{
500500
struct hw_perf_event *hwc = &event->hw;
501501

arch/x86/events/intel/uncore_discovery.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,8 @@ void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box);
139139
void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box);
140140
void intel_generic_uncore_mmio_disable_event(struct intel_uncore_box *box,
141141
struct perf_event *event);
142+
void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box,
143+
struct perf_event *event);
142144

143145
void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box);
144146
void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box);

0 commit comments

Comments
 (0)