Skip to content

Commit a2f0e7e

Browse files
committed
Merge tag 'perf-core-2023-02-20' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: - Optimize perf_sample_data layout - Prepare sample data handling for BPF integration - Update the x86 PMU driver for Intel Meteor Lake - Restructure the x86 uncore code to fix a SPR (Sapphire Rapids) discovery breakage - Fix the x86 Zhaoxin PMU driver - Cleanups * tag 'perf-core-2023-02-20' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (27 commits) perf/x86/intel/uncore: Add Meteor Lake support x86/perf/zhaoxin: Add stepping check for ZXC perf/x86/intel/ds: Fix the conversion from TSC to perf time perf/x86/uncore: Don't WARN_ON_ONCE() for a broken discovery table perf/x86/uncore: Add a quirk for UPI on SPR perf/x86/uncore: Ignore broken units in discovery table perf/x86/uncore: Fix potential NULL pointer in uncore_get_alias_name perf/x86/uncore: Factor out uncore_device_to_die() perf/core: Call perf_prepare_sample() before running BPF perf/core: Introduce perf_prepare_header() perf/core: Do not pass header for sample ID init perf/core: Set data->sample_flags in perf_prepare_sample() perf/core: Add perf_sample_save_brstack() helper perf/core: Add perf_sample_save_raw_data() helper perf/core: Add perf_sample_save_callchain() helper perf/core: Save the dynamic parts of sample data size x86/kprobes: Use switch-case for 0xFF opcodes in prepare_emulation perf/core: Change the layout of perf_sample_data perf/x86/msr: Add Meteor Lake support perf/x86/cstate: Add Meteor Lake support ...
2 parents 6e649d0 + c828441 commit a2f0e7e

File tree

25 files changed

+953
-317
lines changed

25 files changed

+953
-317
lines changed

arch/powerpc/perf/core-book3s.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2313,8 +2313,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
23132313
struct cpu_hw_events *cpuhw;
23142314
cpuhw = this_cpu_ptr(&cpu_hw_events);
23152315
power_pmu_bhrb_read(event, cpuhw);
2316-
data.br_stack = &cpuhw->bhrb_stack;
2317-
data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
2316+
perf_sample_save_brstack(&data, event, &cpuhw->bhrb_stack);
23182317
}
23192318

23202319
if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC &&

arch/s390/kernel/perf_cpum_cf.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -662,9 +662,7 @@ static int cfdiag_push_sample(struct perf_event *event,
662662
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
663663
raw.frag.size = cpuhw->usedss;
664664
raw.frag.data = cpuhw->stop;
665-
raw.size = raw.frag.size;
666-
data.raw = &raw;
667-
data.sample_flags |= PERF_SAMPLE_RAW;
665+
perf_sample_save_raw_data(&data, &raw);
668666
}
669667

670668
overflow = perf_event_overflow(event, &data, &regs);

arch/s390/kernel/perf_cpum_sf.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -672,7 +672,8 @@ static void cpumsf_output_event_pid(struct perf_event *event,
672672
/* Protect callchain buffers, tasks */
673673
rcu_read_lock();
674674

675-
perf_prepare_sample(&header, data, event, regs);
675+
perf_prepare_sample(data, event, regs);
676+
perf_prepare_header(&header, data, event, regs);
676677
if (perf_output_begin(&handle, data, event, header.size))
677678
goto out;
678679

arch/s390/kernel/perf_pai_crypto.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -362,9 +362,7 @@ static int paicrypt_push_sample(void)
362362
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
363363
raw.frag.size = rawsize;
364364
raw.frag.data = cpump->save;
365-
raw.size = raw.frag.size;
366-
data.raw = &raw;
367-
data.sample_flags |= PERF_SAMPLE_RAW;
365+
perf_sample_save_raw_data(&data, &raw);
368366
}
369367

370368
overflow = perf_event_overflow(event, &data, &regs);

arch/s390/kernel/perf_pai_ext.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -451,9 +451,7 @@ static int paiext_push_sample(void)
451451
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
452452
raw.frag.size = rawsize;
453453
raw.frag.data = cpump->save;
454-
raw.size = raw.frag.size;
455-
data.raw = &raw;
456-
data.sample_flags |= PERF_SAMPLE_RAW;
454+
perf_sample_save_raw_data(&data, &raw);
457455
}
458456

459457
overflow = perf_event_overflow(event, &data, &regs);

arch/x86/events/amd/core.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -928,10 +928,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
928928
if (!x86_perf_event_set_period(event))
929929
continue;
930930

931-
if (has_branch_stack(event)) {
932-
data.br_stack = &cpuc->lbr_stack;
933-
data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
934-
}
931+
if (has_branch_stack(event))
932+
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
935933

936934
if (perf_event_overflow(event, &data, regs))
937935
x86_pmu_stop(event, 0);

arch/x86/events/amd/ibs.c

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1110,8 +1110,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
11101110
.data = ibs_data.data,
11111111
},
11121112
};
1113-
data.raw = &raw;
1114-
data.sample_flags |= PERF_SAMPLE_RAW;
1113+
perf_sample_save_raw_data(&data, &raw);
11151114
}
11161115

11171116
if (perf_ibs == &perf_ibs_op)
@@ -1122,10 +1121,8 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
11221121
* recorded as part of interrupt regs. Thus we need to use rip from
11231122
* interrupt regs while unwinding call stack.
11241123
*/
1125-
if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
1126-
data.callchain = perf_callchain(event, iregs);
1127-
data.sample_flags |= PERF_SAMPLE_CALLCHAIN;
1128-
}
1124+
if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
1125+
perf_sample_save_callchain(&data, event, iregs);
11291126

11301127
throttle = perf_event_overflow(event, &data, &regs);
11311128
out:

arch/x86/events/intel/core.c

Lines changed: 181 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2119,6 +2119,16 @@ static struct extra_reg intel_grt_extra_regs[] __read_mostly = {
21192119
EVENT_EXTRA_END
21202120
};
21212121

2122+
static struct extra_reg intel_cmt_extra_regs[] __read_mostly = {
2123+
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
2124+
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x800ff3ffffffffffull, RSP_0),
2125+
INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xff3ffffffffffull, RSP_1),
2126+
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x5d0),
2127+
INTEL_UEVENT_EXTRA_REG(0x0127, MSR_SNOOP_RSP_0, 0xffffffffffffffffull, SNOOP_0),
2128+
INTEL_UEVENT_EXTRA_REG(0x0227, MSR_SNOOP_RSP_1, 0xffffffffffffffffull, SNOOP_1),
2129+
EVENT_EXTRA_END
2130+
};
2131+
21222132
#define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */
21232133
#define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */
21242134
#define KNL_MCDRAM_LOCAL BIT_ULL(21)
@@ -3026,10 +3036,8 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
30263036

30273037
perf_sample_data_init(&data, 0, event->hw.last_period);
30283038

3029-
if (has_branch_stack(event)) {
3030-
data.br_stack = &cpuc->lbr_stack;
3031-
data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
3032-
}
3039+
if (has_branch_stack(event))
3040+
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
30333041

30343042
if (perf_event_overflow(event, &data, regs))
30353043
x86_pmu_stop(event, 0);
@@ -4182,6 +4190,12 @@ static int hsw_hw_config(struct perf_event *event)
41824190
static struct event_constraint counter0_constraint =
41834191
INTEL_ALL_EVENT_CONSTRAINT(0, 0x1);
41844192

4193+
static struct event_constraint counter1_constraint =
4194+
INTEL_ALL_EVENT_CONSTRAINT(0, 0x2);
4195+
4196+
static struct event_constraint counter0_1_constraint =
4197+
INTEL_ALL_EVENT_CONSTRAINT(0, 0x3);
4198+
41854199
static struct event_constraint counter2_constraint =
41864200
EVENT_CONSTRAINT(0, 0x4, 0);
41874201

@@ -4191,6 +4205,12 @@ static struct event_constraint fixed0_constraint =
41914205
static struct event_constraint fixed0_counter0_constraint =
41924206
INTEL_ALL_EVENT_CONSTRAINT(0, 0x100000001ULL);
41934207

4208+
static struct event_constraint fixed0_counter0_1_constraint =
4209+
INTEL_ALL_EVENT_CONSTRAINT(0, 0x100000003ULL);
4210+
4211+
static struct event_constraint counters_1_7_constraint =
4212+
INTEL_ALL_EVENT_CONSTRAINT(0, 0xfeULL);
4213+
41944214
static struct event_constraint *
41954215
hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
41964216
struct perf_event *event)
@@ -4322,6 +4342,78 @@ adl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
43224342
return &emptyconstraint;
43234343
}
43244344

4345+
static struct event_constraint *
4346+
cmt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
4347+
struct perf_event *event)
4348+
{
4349+
struct event_constraint *c;
4350+
4351+
c = intel_get_event_constraints(cpuc, idx, event);
4352+
4353+
/*
4354+
* The :ppp indicates the Precise Distribution (PDist) facility, which
4355+
* is only supported on the GP counter 0 & 1 and Fixed counter 0.
4356+
* If a :ppp event which is not available on the above eligible counters,
4357+
* error out.
4358+
*/
4359+
if (event->attr.precise_ip == 3) {
4360+
/* Force instruction:ppp on PMC0, 1 and Fixed counter 0 */
4361+
if (constraint_match(&fixed0_constraint, event->hw.config))
4362+
return &fixed0_counter0_1_constraint;
4363+
4364+
switch (c->idxmsk64 & 0x3ull) {
4365+
case 0x1:
4366+
return &counter0_constraint;
4367+
case 0x2:
4368+
return &counter1_constraint;
4369+
case 0x3:
4370+
return &counter0_1_constraint;
4371+
}
4372+
return &emptyconstraint;
4373+
}
4374+
4375+
return c;
4376+
}
4377+
4378+
static struct event_constraint *
4379+
rwc_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
4380+
struct perf_event *event)
4381+
{
4382+
struct event_constraint *c;
4383+
4384+
c = spr_get_event_constraints(cpuc, idx, event);
4385+
4386+
/* The Retire Latency is not supported by the fixed counter 0. */
4387+
if (event->attr.precise_ip &&
4388+
(event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE) &&
4389+
constraint_match(&fixed0_constraint, event->hw.config)) {
4390+
/*
4391+
* The Instruction PDIR is only available
4392+
* on the fixed counter 0. Error out for this case.
4393+
*/
4394+
if (event->attr.precise_ip == 3)
4395+
return &emptyconstraint;
4396+
return &counters_1_7_constraint;
4397+
}
4398+
4399+
return c;
4400+
}
4401+
4402+
static struct event_constraint *
4403+
mtl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
4404+
struct perf_event *event)
4405+
{
4406+
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
4407+
4408+
if (pmu->cpu_type == hybrid_big)
4409+
return rwc_get_event_constraints(cpuc, idx, event);
4410+
if (pmu->cpu_type == hybrid_small)
4411+
return cmt_get_event_constraints(cpuc, idx, event);
4412+
4413+
WARN_ON(1);
4414+
return &emptyconstraint;
4415+
}
4416+
43254417
static int adl_hw_config(struct perf_event *event)
43264418
{
43274419
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
@@ -4494,6 +4586,25 @@ static void flip_smm_bit(void *data)
44944586
}
44954587
}
44964588

4589+
static void intel_pmu_check_num_counters(int *num_counters,
4590+
int *num_counters_fixed,
4591+
u64 *intel_ctrl, u64 fixed_mask);
4592+
4593+
static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
4594+
{
4595+
unsigned int sub_bitmaps = cpuid_eax(ARCH_PERFMON_EXT_LEAF);
4596+
unsigned int eax, ebx, ecx, edx;
4597+
4598+
if (sub_bitmaps & ARCH_PERFMON_NUM_COUNTER_LEAF_BIT) {
4599+
cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF,
4600+
&eax, &ebx, &ecx, &edx);
4601+
pmu->num_counters = fls(eax);
4602+
pmu->num_counters_fixed = fls(ebx);
4603+
intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed,
4604+
&pmu->intel_ctrl, ebx);
4605+
}
4606+
}
4607+
44974608
static bool init_hybrid_pmu(int cpu)
44984609
{
44994610
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
@@ -4519,6 +4630,9 @@ static bool init_hybrid_pmu(int cpu)
45194630
if (!cpumask_empty(&pmu->supported_cpus))
45204631
goto end;
45214632

4633+
if (this_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT))
4634+
update_pmu_cap(pmu);
4635+
45224636
if (!check_hw_exists(&pmu->pmu, pmu->num_counters, pmu->num_counters_fixed))
45234637
return false;
45244638

@@ -5463,6 +5577,12 @@ static struct attribute *adl_hybrid_mem_attrs[] = {
54635577
NULL,
54645578
};
54655579

5580+
static struct attribute *mtl_hybrid_mem_attrs[] = {
5581+
EVENT_PTR(mem_ld_adl),
5582+
EVENT_PTR(mem_st_adl),
5583+
NULL
5584+
};
5585+
54665586
EVENT_ATTR_STR_HYBRID(tx-start, tx_start_adl, "event=0xc9,umask=0x1", hybrid_big);
54675587
EVENT_ATTR_STR_HYBRID(tx-commit, tx_commit_adl, "event=0xc9,umask=0x2", hybrid_big);
54685588
EVENT_ATTR_STR_HYBRID(tx-abort, tx_abort_adl, "event=0xc9,umask=0x4", hybrid_big);
@@ -5490,20 +5610,40 @@ FORMAT_ATTR_HYBRID(offcore_rsp, hybrid_big_small);
54905610
FORMAT_ATTR_HYBRID(ldlat, hybrid_big_small);
54915611
FORMAT_ATTR_HYBRID(frontend, hybrid_big);
54925612

5613+
#define ADL_HYBRID_RTM_FORMAT_ATTR \
5614+
FORMAT_HYBRID_PTR(in_tx), \
5615+
FORMAT_HYBRID_PTR(in_tx_cp)
5616+
5617+
#define ADL_HYBRID_FORMAT_ATTR \
5618+
FORMAT_HYBRID_PTR(offcore_rsp), \
5619+
FORMAT_HYBRID_PTR(ldlat), \
5620+
FORMAT_HYBRID_PTR(frontend)
5621+
54935622
static struct attribute *adl_hybrid_extra_attr_rtm[] = {
5494-
FORMAT_HYBRID_PTR(in_tx),
5495-
FORMAT_HYBRID_PTR(in_tx_cp),
5496-
FORMAT_HYBRID_PTR(offcore_rsp),
5497-
FORMAT_HYBRID_PTR(ldlat),
5498-
FORMAT_HYBRID_PTR(frontend),
5499-
NULL,
5623+
ADL_HYBRID_RTM_FORMAT_ATTR,
5624+
ADL_HYBRID_FORMAT_ATTR,
5625+
NULL
55005626
};
55015627

55025628
static struct attribute *adl_hybrid_extra_attr[] = {
5503-
FORMAT_HYBRID_PTR(offcore_rsp),
5504-
FORMAT_HYBRID_PTR(ldlat),
5505-
FORMAT_HYBRID_PTR(frontend),
5506-
NULL,
5629+
ADL_HYBRID_FORMAT_ATTR,
5630+
NULL
5631+
};
5632+
5633+
PMU_FORMAT_ATTR_SHOW(snoop_rsp, "config1:0-63");
5634+
FORMAT_ATTR_HYBRID(snoop_rsp, hybrid_small);
5635+
5636+
static struct attribute *mtl_hybrid_extra_attr_rtm[] = {
5637+
ADL_HYBRID_RTM_FORMAT_ATTR,
5638+
ADL_HYBRID_FORMAT_ATTR,
5639+
FORMAT_HYBRID_PTR(snoop_rsp),
5640+
NULL
5641+
};
5642+
5643+
static struct attribute *mtl_hybrid_extra_attr[] = {
5644+
ADL_HYBRID_FORMAT_ATTR,
5645+
FORMAT_HYBRID_PTR(snoop_rsp),
5646+
NULL
55075647
};
55085648

55095649
static bool is_attr_for_this_pmu(struct kobject *kobj, struct attribute *attr)
@@ -5725,6 +5865,12 @@ static void intel_pmu_check_hybrid_pmus(u64 fixed_mask)
57255865
}
57265866
}
57275867

5868+
static __always_inline bool is_mtl(u8 x86_model)
5869+
{
5870+
return (x86_model == INTEL_FAM6_METEORLAKE) ||
5871+
(x86_model == INTEL_FAM6_METEORLAKE_L);
5872+
}
5873+
57285874
__init int intel_pmu_init(void)
57295875
{
57305876
struct attribute **extra_skl_attr = &empty_attrs;
@@ -6382,6 +6528,8 @@ __init int intel_pmu_init(void)
63826528
case INTEL_FAM6_RAPTORLAKE:
63836529
case INTEL_FAM6_RAPTORLAKE_P:
63846530
case INTEL_FAM6_RAPTORLAKE_S:
6531+
case INTEL_FAM6_METEORLAKE:
6532+
case INTEL_FAM6_METEORLAKE_L:
63856533
/*
63866534
* Alder Lake has 2 types of CPU, core and atom.
63876535
*
@@ -6401,9 +6549,7 @@ __init int intel_pmu_init(void)
64016549
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
64026550
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
64036551
x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
6404-
x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
64056552
x86_pmu.lbr_pt_coexist = true;
6406-
intel_pmu_pebs_data_source_adl();
64076553
x86_pmu.pebs_latency_data = adl_latency_data_small;
64086554
x86_pmu.num_topdown_events = 8;
64096555
static_call_update(intel_pmu_update_topdown_event,
@@ -6490,8 +6636,22 @@ __init int intel_pmu_init(void)
64906636
pmu->event_constraints = intel_slm_event_constraints;
64916637
pmu->pebs_constraints = intel_grt_pebs_event_constraints;
64926638
pmu->extra_regs = intel_grt_extra_regs;
6493-
pr_cont("Alderlake Hybrid events, ");
6494-
name = "alderlake_hybrid";
6639+
if (is_mtl(boot_cpu_data.x86_model)) {
6640+
x86_pmu.pebs_latency_data = mtl_latency_data_small;
6641+
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
6642+
mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr;
6643+
mem_attr = mtl_hybrid_mem_attrs;
6644+
intel_pmu_pebs_data_source_mtl();
6645+
x86_pmu.get_event_constraints = mtl_get_event_constraints;
6646+
pmu->extra_regs = intel_cmt_extra_regs;
6647+
pr_cont("Meteorlake Hybrid events, ");
6648+
name = "meteorlake_hybrid";
6649+
} else {
6650+
x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
6651+
intel_pmu_pebs_data_source_adl();
6652+
pr_cont("Alderlake Hybrid events, ");
6653+
name = "alderlake_hybrid";
6654+
}
64956655
break;
64966656

64976657
default:
@@ -6606,6 +6766,9 @@ __init int intel_pmu_init(void)
66066766
if (is_hybrid())
66076767
intel_pmu_check_hybrid_pmus((u64)fixed_mask);
66086768

6769+
if (x86_pmu.intel_cap.pebs_timing_info)
6770+
x86_pmu.flags |= PMU_FL_RETIRE_LATENCY;
6771+
66096772
intel_aux_output_init();
66106773

66116774
return 0;

0 commit comments

Comments
 (0)