Skip to content

Commit 91e1c99

Browse files
committed
Merge tag 'perf-core-2021-10-31' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Thomas Gleixner: "Core: - Allow ftrace to instrument parts of the perf core code - Add a new mem_hops field to perf_mem_data_src which allows to represent intra-node/package or inter-node/off-package details to prepare for next generation systems which have more hieararchy within the node/pacakge level. Tools: - Update for the new mem_hops field in perf_mem_data_src Arch: - A set of constraints fixes for the Intel uncore PMU - The usual set of small fixes and improvements for x86 and PPC" * tag 'perf-core-2021-10-31' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: perf/x86/intel: Fix ICL/SPR INST_RETIRED.PREC_DIST encodings powerpc/perf: Fix data source encodings for L2.1 and L3.1 accesses tools/perf: Add mem_hops field in perf_mem_data_src structure perf: Add mem_hops field in perf_mem_data_src structure perf: Add comment about current state of PERF_MEM_LVL_* namespace and remove an extra line perf/core: Allow ftrace for functions in kernel/event/core.c perf/x86: Add new event for AUX output counter index perf/x86: Add compiler barrier after updating BTS perf/x86/intel/uncore: Fix Intel SPR M3UPI event constraints perf/x86/intel/uncore: Fix Intel SPR M2PCIE event constraints perf/x86/intel/uncore: Fix Intel SPR IIO event constraints perf/x86/intel/uncore: Fix Intel SPR CHA event constraints perf/x86/intel/uncore: Fix Intel ICX IIO event constraints perf/x86/intel/uncore: Fix invalid unit check perf/x86/intel/uncore: Support extra IMC channel on Ice Lake server
2 parents 5a47ebe + 2de71ee commit 91e1c99

File tree

15 files changed

+169
-25
lines changed

15 files changed

+169
-25
lines changed

arch/powerpc/perf/isa207-common.c

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -238,11 +238,27 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx)
238238
ret |= P(SNOOP, HIT);
239239
break;
240240
case 5:
241-
ret = PH(LVL, REM_CCE1);
242-
if ((sub_idx == 0) || (sub_idx == 2) || (sub_idx == 4))
243-
ret |= P(SNOOP, HIT);
244-
else if ((sub_idx == 1) || (sub_idx == 3) || (sub_idx == 5))
245-
ret |= P(SNOOP, HITM);
241+
if (cpu_has_feature(CPU_FTR_ARCH_31)) {
242+
ret = REM | P(HOPS, 0);
243+
244+
if (sub_idx == 0 || sub_idx == 4)
245+
ret |= PH(LVL, L2) | LEVEL(L2) | P(SNOOP, HIT);
246+
else if (sub_idx == 1 || sub_idx == 5)
247+
ret |= PH(LVL, L2) | LEVEL(L2) | P(SNOOP, HITM);
248+
else if (sub_idx == 2 || sub_idx == 6)
249+
ret |= PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
250+
else if (sub_idx == 3 || sub_idx == 7)
251+
ret |= PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
252+
} else {
253+
if (sub_idx == 0)
254+
ret = PH(LVL, L2) | LEVEL(L2) | REM | P(SNOOP, HIT) | P(HOPS, 0);
255+
else if (sub_idx == 1)
256+
ret = PH(LVL, L2) | LEVEL(L2) | REM | P(SNOOP, HITM) | P(HOPS, 0);
257+
else if (sub_idx == 2 || sub_idx == 4)
258+
ret = PH(LVL, L3) | LEVEL(L3) | REM | P(SNOOP, HIT) | P(HOPS, 0);
259+
else if (sub_idx == 3 || sub_idx == 5)
260+
ret = PH(LVL, L3) | LEVEL(L3) | REM | P(SNOOP, HITM) | P(HOPS, 0);
261+
}
246262
break;
247263
case 6:
248264
ret = PH(LVL, REM_CCE2);

arch/powerpc/perf/isa207-common.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,8 @@
273273
#define P(a, b) PERF_MEM_S(a, b)
274274
#define PH(a, b) (P(LVL, HIT) | P(a, b))
275275
#define PM(a, b) (P(LVL, MISS) | P(a, b))
276+
#define LEVEL(x) P(LVLNUM, x)
277+
#define REM P(REMOTE, REMOTE)
276278

277279
int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1);
278280
int isa207_compute_mmcr(u64 event[], int n_ev,

arch/x86/events/core.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_enable_all, *x86_pmu.enable_all);
6666
DEFINE_STATIC_CALL_NULL(x86_pmu_enable, *x86_pmu.enable);
6767
DEFINE_STATIC_CALL_NULL(x86_pmu_disable, *x86_pmu.disable);
6868

69+
DEFINE_STATIC_CALL_NULL(x86_pmu_assign, *x86_pmu.assign);
70+
6971
DEFINE_STATIC_CALL_NULL(x86_pmu_add, *x86_pmu.add);
7072
DEFINE_STATIC_CALL_NULL(x86_pmu_del, *x86_pmu.del);
7173
DEFINE_STATIC_CALL_NULL(x86_pmu_read, *x86_pmu.read);
@@ -1215,6 +1217,8 @@ static inline void x86_assign_hw_event(struct perf_event *event,
12151217
hwc->last_cpu = smp_processor_id();
12161218
hwc->last_tag = ++cpuc->tags[i];
12171219

1220+
static_call_cond(x86_pmu_assign)(event, idx);
1221+
12181222
switch (hwc->idx) {
12191223
case INTEL_PMC_IDX_FIXED_BTS:
12201224
case INTEL_PMC_IDX_FIXED_VLBR:
@@ -2005,6 +2009,8 @@ static void x86_pmu_static_call_update(void)
20052009
static_call_update(x86_pmu_enable, x86_pmu.enable);
20062010
static_call_update(x86_pmu_disable, x86_pmu.disable);
20072011

2012+
static_call_update(x86_pmu_assign, x86_pmu.assign);
2013+
20082014
static_call_update(x86_pmu_add, x86_pmu.add);
20092015
static_call_update(x86_pmu_del, x86_pmu.del);
20102016
static_call_update(x86_pmu_read, x86_pmu.read);

arch/x86/events/intel/bts.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,12 @@ static void bts_update(struct bts_ctx *bts)
209209
} else {
210210
local_set(&buf->data_size, head);
211211
}
212+
213+
/*
214+
* Since BTS is coherent, just add compiler barrier to ensure
215+
* BTS updating is ordered against bts::handle::event.
216+
*/
217+
barrier();
212218
}
213219

214220
static int

arch/x86/events/intel/core.c

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,8 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
243243

244244
static struct event_constraint intel_icl_event_constraints[] = {
245245
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
246-
FIXED_EVENT_CONSTRAINT(0x01c0, 0), /* INST_RETIRED.PREC_DIST */
246+
FIXED_EVENT_CONSTRAINT(0x01c0, 0), /* old INST_RETIRED.PREC_DIST */
247+
FIXED_EVENT_CONSTRAINT(0x0100, 0), /* INST_RETIRED.PREC_DIST */
247248
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
248249
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
249250
FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */
@@ -288,7 +289,7 @@ static struct extra_reg intel_spr_extra_regs[] __read_mostly = {
288289

289290
static struct event_constraint intel_spr_event_constraints[] = {
290291
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
291-
FIXED_EVENT_CONSTRAINT(0x01c0, 0), /* INST_RETIRED.PREC_DIST */
292+
FIXED_EVENT_CONSTRAINT(0x0100, 0), /* INST_RETIRED.PREC_DIST */
292293
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
293294
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
294295
FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */
@@ -2403,6 +2404,12 @@ static void intel_pmu_disable_event(struct perf_event *event)
24032404
intel_pmu_pebs_disable(event);
24042405
}
24052406

2407+
static void intel_pmu_assign_event(struct perf_event *event, int idx)
2408+
{
2409+
if (is_pebs_pt(event))
2410+
perf_report_aux_output_id(event, idx);
2411+
}
2412+
24062413
static void intel_pmu_del_event(struct perf_event *event)
24072414
{
24082415
if (needs_branch_stack(event))
@@ -4495,8 +4502,16 @@ static int intel_pmu_check_period(struct perf_event *event, u64 value)
44954502
return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
44964503
}
44974504

4505+
static void intel_aux_output_init(void)
4506+
{
4507+
/* Refer also intel_pmu_aux_output_match() */
4508+
if (x86_pmu.intel_cap.pebs_output_pt_available)
4509+
x86_pmu.assign = intel_pmu_assign_event;
4510+
}
4511+
44984512
static int intel_pmu_aux_output_match(struct perf_event *event)
44994513
{
4514+
/* intel_pmu_assign_event() is needed, refer intel_aux_output_init() */
45004515
if (!x86_pmu.intel_cap.pebs_output_pt_available)
45014516
return 0;
45024517

@@ -6302,6 +6317,8 @@ __init int intel_pmu_init(void)
63026317
if (is_hybrid())
63036318
intel_pmu_check_hybrid_pmus((u64)fixed_mask);
63046319

6320+
intel_aux_output_init();
6321+
63056322
return 0;
63066323
}
63076324

arch/x86/events/intel/ds.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -923,7 +923,8 @@ struct event_constraint intel_skl_pebs_event_constraints[] = {
923923
};
924924

925925
struct event_constraint intel_icl_pebs_event_constraints[] = {
926-
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
926+
INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x100000000ULL), /* old INST_RETIRED.PREC_DIST */
927+
INTEL_FLAGS_UEVENT_CONSTRAINT(0x0100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
927928
INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), /* SLOTS */
928929

929930
INTEL_PLD_CONSTRAINT(0x1cd, 0xff), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
@@ -943,7 +944,7 @@ struct event_constraint intel_icl_pebs_event_constraints[] = {
943944
};
944945

945946
struct event_constraint intel_spr_pebs_event_constraints[] = {
946-
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL),
947+
INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
947948
INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
948949

949950
INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xfe),

arch/x86/events/intel/uncore_discovery.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030

3131

3232
#define uncore_discovery_invalid_unit(unit) \
33-
(!unit.table1 || !unit.ctl || !unit.table3 || \
33+
(!unit.table1 || !unit.ctl || \
3434
unit.table1 == -1ULL || unit.ctl == -1ULL || \
3535
unit.table3 == -1ULL)
3636

arch/x86/events/intel/uncore_snbep.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -452,7 +452,7 @@
452452
#define ICX_M3UPI_PCI_PMON_BOX_CTL 0xa0
453453

454454
/* ICX IMC */
455-
#define ICX_NUMBER_IMC_CHN 2
455+
#define ICX_NUMBER_IMC_CHN 3
456456
#define ICX_IMC_MEM_STRIDE 0x4
457457

458458
/* SPR */
@@ -5076,8 +5076,10 @@ static struct event_constraint icx_uncore_iio_constraints[] = {
50765076
UNCORE_EVENT_CONSTRAINT(0x02, 0x3),
50775077
UNCORE_EVENT_CONSTRAINT(0x03, 0x3),
50785078
UNCORE_EVENT_CONSTRAINT(0x83, 0x3),
5079+
UNCORE_EVENT_CONSTRAINT(0x88, 0xc),
50795080
UNCORE_EVENT_CONSTRAINT(0xc0, 0xc),
50805081
UNCORE_EVENT_CONSTRAINT(0xc5, 0xc),
5082+
UNCORE_EVENT_CONSTRAINT(0xd5, 0xc),
50815083
EVENT_CONSTRAINT_END
50825084
};
50835085

@@ -5463,7 +5465,7 @@ static struct intel_uncore_ops icx_uncore_mmio_ops = {
54635465
static struct intel_uncore_type icx_uncore_imc = {
54645466
.name = "imc",
54655467
.num_counters = 4,
5466-
.num_boxes = 8,
5468+
.num_boxes = 12,
54675469
.perf_ctr_bits = 48,
54685470
.fixed_ctr_bits = 48,
54695471
.fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR,
@@ -5647,6 +5649,7 @@ static struct intel_uncore_type spr_uncore_chabox = {
56475649
.event_mask = SPR_CHA_PMON_EVENT_MASK,
56485650
.event_mask_ext = SPR_RAW_EVENT_MASK_EXT,
56495651
.num_shared_regs = 1,
5652+
.constraints = skx_uncore_chabox_constraints,
56505653
.ops = &spr_uncore_chabox_ops,
56515654
.format_group = &spr_uncore_chabox_format_group,
56525655
.attr_update = uncore_alias_groups,
@@ -5658,6 +5661,7 @@ static struct intel_uncore_type spr_uncore_iio = {
56585661
.event_mask_ext = SNR_IIO_PMON_RAW_EVENT_MASK_EXT,
56595662
.format_group = &snr_uncore_iio_format_group,
56605663
.attr_update = uncore_alias_groups,
5664+
.constraints = icx_uncore_iio_constraints,
56615665
};
56625666

56635667
static struct attribute *spr_uncore_raw_formats_attr[] = {
@@ -5686,9 +5690,16 @@ static struct intel_uncore_type spr_uncore_irp = {
56865690

56875691
};
56885692

5693+
static struct event_constraint spr_uncore_m2pcie_constraints[] = {
5694+
UNCORE_EVENT_CONSTRAINT(0x14, 0x3),
5695+
UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
5696+
EVENT_CONSTRAINT_END
5697+
};
5698+
56895699
static struct intel_uncore_type spr_uncore_m2pcie = {
56905700
SPR_UNCORE_COMMON_FORMAT(),
56915701
.name = "m2pcie",
5702+
.constraints = spr_uncore_m2pcie_constraints,
56925703
};
56935704

56945705
static struct intel_uncore_type spr_uncore_pcu = {
@@ -5765,6 +5776,7 @@ static struct intel_uncore_type spr_uncore_upi = {
57655776
static struct intel_uncore_type spr_uncore_m3upi = {
57665777
SPR_UNCORE_PCI_COMMON_FORMAT(),
57675778
.name = "m3upi",
5779+
.constraints = icx_uncore_m3upi_constraints,
57685780
};
57695781

57705782
static struct intel_uncore_type spr_uncore_mdf = {

arch/x86/events/perf_event.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -726,6 +726,7 @@ struct x86_pmu {
726726
void (*enable_all)(int added);
727727
void (*enable)(struct perf_event *);
728728
void (*disable)(struct perf_event *);
729+
void (*assign)(struct perf_event *event, int idx);
729730
void (*add)(struct perf_event *);
730731
void (*del)(struct perf_event *);
731732
void (*read)(struct perf_event *event);

include/linux/perf_event.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1400,6 +1400,7 @@ perf_event_addr_filters(struct perf_event *event)
14001400
}
14011401

14021402
extern void perf_event_addr_filters_sync(struct perf_event *event);
1403+
extern void perf_report_aux_output_id(struct perf_event *event, u64 hw_id);
14031404

14041405
extern int perf_output_begin(struct perf_output_handle *handle,
14051406
struct perf_sample_data *data,

0 commit comments

Comments
 (0)