Skip to content

Commit 722e42e

Browse files
Kan LiangPeter Zijlstra
authored andcommitted
perf/x86: Support counter mask
The current perf assumes that both GP and fixed counters are contiguous. But it's not guaranteed on newer Intel platforms or in a virtualization environment. Use the counter mask to replace the number of counters for both GP and the fixed counters. For the other ARCHs or old platforms which don't support a counter mask, using GENMASK_ULL(num_counter - 1, 0) to replace. There is no functional change for them. The interface to KVM is not changed. The number of counters still be passed to KVM. It can be updated later separately. Signed-off-by: Kan Liang <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Reviewed-by: Andi Kleen <[email protected]> Reviewed-by: Ian Rogers <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent a23eb2f commit 722e42e

File tree

9 files changed

+199
-179
lines changed

9 files changed

+199
-179
lines changed

arch/x86/events/amd/core.c

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -432,7 +432,7 @@ static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
432432
* be removed on one CPU at a time AND PMU is disabled
433433
* when we come here
434434
*/
435-
for (i = 0; i < x86_pmu.num_counters; i++) {
435+
for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
436436
struct perf_event *tmp = event;
437437

438438
if (try_cmpxchg(nb->owners + i, &tmp, NULL))
@@ -501,7 +501,7 @@ __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *ev
501501
* because of successive calls to x86_schedule_events() from
502502
* hw_perf_group_sched_in() without hw_perf_enable()
503503
*/
504-
for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) {
504+
for_each_set_bit(idx, c->idxmsk, x86_pmu_max_num_counters(NULL)) {
505505
if (new == -1 || hwc->idx == idx)
506506
/* assign free slot, prefer hwc->idx */
507507
old = cmpxchg(nb->owners + idx, NULL, event);
@@ -544,7 +544,7 @@ static struct amd_nb *amd_alloc_nb(int cpu)
544544
/*
545545
* initialize all possible NB constraints
546546
*/
547-
for (i = 0; i < x86_pmu.num_counters; i++) {
547+
for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
548548
__set_bit(i, nb->event_constraints[i].idxmsk);
549549
nb->event_constraints[i].weight = 1;
550550
}
@@ -737,7 +737,7 @@ static void amd_pmu_check_overflow(void)
737737
* counters are always enabled when this function is called and
738738
* ARCH_PERFMON_EVENTSEL_INT is always set.
739739
*/
740-
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
740+
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
741741
if (!test_bit(idx, cpuc->active_mask))
742742
continue;
743743

@@ -757,7 +757,7 @@ static void amd_pmu_enable_all(int added)
757757

758758
amd_brs_enable_all();
759759

760-
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
760+
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
761761
/* only activate events which are marked as active */
762762
if (!test_bit(idx, cpuc->active_mask))
763763
continue;
@@ -980,7 +980,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
980980
/* Clear any reserved bits set by buggy microcode */
981981
status &= amd_pmu_global_cntr_mask;
982982

983-
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
983+
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
984984
if (!test_bit(idx, cpuc->active_mask))
985985
continue;
986986

@@ -1315,7 +1315,7 @@ static __initconst const struct x86_pmu amd_pmu = {
13151315
.addr_offset = amd_pmu_addr_offset,
13161316
.event_map = amd_pmu_event_map,
13171317
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
1318-
.num_counters = AMD64_NUM_COUNTERS,
1318+
.cntr_mask64 = GENMASK_ULL(AMD64_NUM_COUNTERS - 1, 0),
13191319
.add = amd_pmu_add_event,
13201320
.del = amd_pmu_del_event,
13211321
.cntval_bits = 48,
@@ -1414,7 +1414,7 @@ static int __init amd_core_pmu_init(void)
14141414
*/
14151415
x86_pmu.eventsel = MSR_F15H_PERF_CTL;
14161416
x86_pmu.perfctr = MSR_F15H_PERF_CTR;
1417-
x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE;
1417+
x86_pmu.cntr_mask64 = GENMASK_ULL(AMD64_NUM_COUNTERS_CORE - 1, 0);
14181418

14191419
/* Check for Performance Monitoring v2 support */
14201420
if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) {
@@ -1424,9 +1424,9 @@ static int __init amd_core_pmu_init(void)
14241424
x86_pmu.version = 2;
14251425

14261426
/* Find the number of available Core PMCs */
1427-
x86_pmu.num_counters = ebx.split.num_core_pmc;
1427+
x86_pmu.cntr_mask64 = GENMASK_ULL(ebx.split.num_core_pmc - 1, 0);
14281428

1429-
amd_pmu_global_cntr_mask = (1ULL << x86_pmu.num_counters) - 1;
1429+
amd_pmu_global_cntr_mask = x86_pmu.cntr_mask64;
14301430

14311431
/* Update PMC handling functions */
14321432
x86_pmu.enable_all = amd_pmu_v2_enable_all;
@@ -1454,12 +1454,12 @@ static int __init amd_core_pmu_init(void)
14541454
* even numbered counter that has a consecutive adjacent odd
14551455
* numbered counter following it.
14561456
*/
1457-
for (i = 0; i < x86_pmu.num_counters - 1; i += 2)
1457+
for (i = 0; i < x86_pmu_max_num_counters(NULL) - 1; i += 2)
14581458
even_ctr_mask |= BIT_ULL(i);
14591459

14601460
pair_constraint = (struct event_constraint)
14611461
__EVENT_CONSTRAINT(0, even_ctr_mask, 0,
1462-
x86_pmu.num_counters / 2, 0,
1462+
x86_pmu_max_num_counters(NULL) / 2, 0,
14631463
PERF_X86_EVENT_PAIR);
14641464

14651465
x86_pmu.get_event_constraints = amd_get_event_constraints_f17h;

arch/x86/events/core.c

Lines changed: 47 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -189,53 +189,57 @@ static DEFINE_MUTEX(pmc_reserve_mutex);
189189

190190
#ifdef CONFIG_X86_LOCAL_APIC
191191

192-
static inline int get_possible_num_counters(void)
192+
static inline u64 get_possible_counter_mask(void)
193193
{
194-
int i, num_counters = x86_pmu.num_counters;
194+
u64 cntr_mask = x86_pmu.cntr_mask64;
195+
int i;
195196

196197
if (!is_hybrid())
197-
return num_counters;
198+
return cntr_mask;
198199

199200
for (i = 0; i < x86_pmu.num_hybrid_pmus; i++)
200-
num_counters = max_t(int, num_counters, x86_pmu.hybrid_pmu[i].num_counters);
201+
cntr_mask |= x86_pmu.hybrid_pmu[i].cntr_mask64;
201202

202-
return num_counters;
203+
return cntr_mask;
203204
}
204205

205206
static bool reserve_pmc_hardware(void)
206207
{
207-
int i, num_counters = get_possible_num_counters();
208+
u64 cntr_mask = get_possible_counter_mask();
209+
int i, end;
208210

209-
for (i = 0; i < num_counters; i++) {
211+
for_each_set_bit(i, (unsigned long *)&cntr_mask, X86_PMC_IDX_MAX) {
210212
if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
211213
goto perfctr_fail;
212214
}
213215

214-
for (i = 0; i < num_counters; i++) {
216+
for_each_set_bit(i, (unsigned long *)&cntr_mask, X86_PMC_IDX_MAX) {
215217
if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
216218
goto eventsel_fail;
217219
}
218220

219221
return true;
220222

221223
eventsel_fail:
222-
for (i--; i >= 0; i--)
224+
end = i;
225+
for_each_set_bit(i, (unsigned long *)&cntr_mask, end)
223226
release_evntsel_nmi(x86_pmu_config_addr(i));
224-
225-
i = num_counters;
227+
i = X86_PMC_IDX_MAX;
226228

227229
perfctr_fail:
228-
for (i--; i >= 0; i--)
230+
end = i;
231+
for_each_set_bit(i, (unsigned long *)&cntr_mask, end)
229232
release_perfctr_nmi(x86_pmu_event_addr(i));
230233

231234
return false;
232235
}
233236

234237
static void release_pmc_hardware(void)
235238
{
236-
int i, num_counters = get_possible_num_counters();
239+
u64 cntr_mask = get_possible_counter_mask();
240+
int i;
237241

238-
for (i = 0; i < num_counters; i++) {
242+
for_each_set_bit(i, (unsigned long *)&cntr_mask, X86_PMC_IDX_MAX) {
239243
release_perfctr_nmi(x86_pmu_event_addr(i));
240244
release_evntsel_nmi(x86_pmu_config_addr(i));
241245
}
@@ -248,7 +252,8 @@ static void release_pmc_hardware(void) {}
248252

249253
#endif
250254

251-
bool check_hw_exists(struct pmu *pmu, int num_counters, int num_counters_fixed)
255+
bool check_hw_exists(struct pmu *pmu, unsigned long *cntr_mask,
256+
unsigned long *fixed_cntr_mask)
252257
{
253258
u64 val, val_fail = -1, val_new= ~0;
254259
int i, reg, reg_fail = -1, ret = 0;
@@ -259,7 +264,7 @@ bool check_hw_exists(struct pmu *pmu, int num_counters, int num_counters_fixed)
259264
* Check to see if the BIOS enabled any of the counters, if so
260265
* complain and bail.
261266
*/
262-
for (i = 0; i < num_counters; i++) {
267+
for_each_set_bit(i, cntr_mask, X86_PMC_IDX_MAX) {
263268
reg = x86_pmu_config_addr(i);
264269
ret = rdmsrl_safe(reg, &val);
265270
if (ret)
@@ -273,12 +278,12 @@ bool check_hw_exists(struct pmu *pmu, int num_counters, int num_counters_fixed)
273278
}
274279
}
275280

276-
if (num_counters_fixed) {
281+
if (*(u64 *)fixed_cntr_mask) {
277282
reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
278283
ret = rdmsrl_safe(reg, &val);
279284
if (ret)
280285
goto msr_fail;
281-
for (i = 0; i < num_counters_fixed; i++) {
286+
for_each_set_bit(i, fixed_cntr_mask, X86_PMC_IDX_MAX) {
282287
if (fixed_counter_disabled(i, pmu))
283288
continue;
284289
if (val & (0x03ULL << i*4)) {
@@ -679,7 +684,7 @@ void x86_pmu_disable_all(void)
679684
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
680685
int idx;
681686

682-
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
687+
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
683688
struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
684689
u64 val;
685690

@@ -736,7 +741,7 @@ void x86_pmu_enable_all(int added)
736741
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
737742
int idx;
738743

739-
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
744+
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
740745
struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
741746

742747
if (!test_bit(idx, cpuc->active_mask))
@@ -975,7 +980,6 @@ EXPORT_SYMBOL_GPL(perf_assign_events);
975980

976981
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
977982
{
978-
int num_counters = hybrid(cpuc->pmu, num_counters);
979983
struct event_constraint *c;
980984
struct perf_event *e;
981985
int n0, i, wmin, wmax, unsched = 0;
@@ -1051,7 +1055,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
10511055

10521056
/* slow path */
10531057
if (i != n) {
1054-
int gpmax = num_counters;
1058+
int gpmax = x86_pmu_max_num_counters(cpuc->pmu);
10551059

10561060
/*
10571061
* Do not allow scheduling of more than half the available
@@ -1072,7 +1076,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
10721076
* the extra Merge events needed by large increment events.
10731077
*/
10741078
if (x86_pmu.flags & PMU_FL_PAIR) {
1075-
gpmax = num_counters - cpuc->n_pair;
1079+
gpmax -= cpuc->n_pair;
10761080
WARN_ON(gpmax <= 0);
10771081
}
10781082

@@ -1157,12 +1161,10 @@ static int collect_event(struct cpu_hw_events *cpuc, struct perf_event *event,
11571161
*/
11581162
static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
11591163
{
1160-
int num_counters = hybrid(cpuc->pmu, num_counters);
1161-
int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
11621164
struct perf_event *event;
11631165
int n, max_count;
11641166

1165-
max_count = num_counters + num_counters_fixed;
1167+
max_count = x86_pmu_num_counters(cpuc->pmu) + x86_pmu_num_counters_fixed(cpuc->pmu);
11661168

11671169
/* current number of events already accepted */
11681170
n = cpuc->n_events;
@@ -1522,13 +1524,13 @@ void perf_event_print_debug(void)
15221524
u64 pebs, debugctl;
15231525
int cpu = smp_processor_id();
15241526
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1525-
int num_counters = hybrid(cpuc->pmu, num_counters);
1526-
int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
1527+
unsigned long *cntr_mask = hybrid(cpuc->pmu, cntr_mask);
1528+
unsigned long *fixed_cntr_mask = hybrid(cpuc->pmu, fixed_cntr_mask);
15271529
struct event_constraint *pebs_constraints = hybrid(cpuc->pmu, pebs_constraints);
15281530
unsigned long flags;
15291531
int idx;
15301532

1531-
if (!num_counters)
1533+
if (!*(u64 *)cntr_mask)
15321534
return;
15331535

15341536
local_irq_save(flags);
@@ -1555,7 +1557,7 @@ void perf_event_print_debug(void)
15551557
}
15561558
pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
15571559

1558-
for (idx = 0; idx < num_counters; idx++) {
1560+
for_each_set_bit(idx, cntr_mask, X86_PMC_IDX_MAX) {
15591561
rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
15601562
rdmsrl(x86_pmu_event_addr(idx), pmc_count);
15611563

@@ -1568,7 +1570,7 @@ void perf_event_print_debug(void)
15681570
pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
15691571
cpu, idx, prev_left);
15701572
}
1571-
for (idx = 0; idx < num_counters_fixed; idx++) {
1573+
for_each_set_bit(idx, fixed_cntr_mask, X86_PMC_IDX_MAX) {
15721574
if (fixed_counter_disabled(idx, cpuc->pmu))
15731575
continue;
15741576
rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
@@ -1682,7 +1684,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
16821684
*/
16831685
apic_write(APIC_LVTPC, APIC_DM_NMI);
16841686

1685-
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1687+
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
16861688
if (!test_bit(idx, cpuc->active_mask))
16871689
continue;
16881690

@@ -2038,18 +2040,15 @@ static void _x86_pmu_read(struct perf_event *event)
20382040
static_call(x86_pmu_update)(event);
20392041
}
20402042

2041-
void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed,
2042-
u64 intel_ctrl)
2043+
void x86_pmu_show_pmu_cap(struct pmu *pmu)
20432044
{
20442045
pr_info("... version: %d\n", x86_pmu.version);
20452046
pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
2046-
pr_info("... generic registers: %d\n", num_counters);
2047+
pr_info("... generic registers: %d\n", x86_pmu_num_counters(pmu));
20472048
pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask);
20482049
pr_info("... max period: %016Lx\n", x86_pmu.max_period);
2049-
pr_info("... fixed-purpose events: %lu\n",
2050-
hweight64((((1ULL << num_counters_fixed) - 1)
2051-
<< INTEL_PMC_IDX_FIXED) & intel_ctrl));
2052-
pr_info("... event mask: %016Lx\n", intel_ctrl);
2050+
pr_info("... fixed-purpose events: %d\n", x86_pmu_num_counters_fixed(pmu));
2051+
pr_info("... event mask: %016Lx\n", hybrid(pmu, intel_ctrl));
20532052
}
20542053

20552054
static int __init init_hw_perf_events(void)
@@ -2086,7 +2085,7 @@ static int __init init_hw_perf_events(void)
20862085
pmu_check_apic();
20872086

20882087
/* sanity check that the hardware exists or is emulated */
2089-
if (!check_hw_exists(&pmu, x86_pmu.num_counters, x86_pmu.num_counters_fixed))
2088+
if (!check_hw_exists(&pmu, x86_pmu.cntr_mask, x86_pmu.fixed_cntr_mask))
20902089
goto out_bad_pmu;
20912090

20922091
pr_cont("%s PMU driver.\n", x86_pmu.name);
@@ -2097,14 +2096,14 @@ static int __init init_hw_perf_events(void)
20972096
quirk->func();
20982097

20992098
if (!x86_pmu.intel_ctrl)
2100-
x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
2099+
x86_pmu.intel_ctrl = x86_pmu.cntr_mask64;
21012100

21022101
perf_events_lapic_init();
21032102
register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");
21042103

21052104
unconstrained = (struct event_constraint)
2106-
__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
2107-
0, x86_pmu.num_counters, 0, 0);
2105+
__EVENT_CONSTRAINT(0, x86_pmu.cntr_mask64,
2106+
0, x86_pmu_num_counters(NULL), 0, 0);
21082107

21092108
x86_pmu_format_group.attrs = x86_pmu.format_attrs;
21102109

@@ -2113,11 +2112,8 @@ static int __init init_hw_perf_events(void)
21132112

21142113
pmu.attr_update = x86_pmu.attr_update;
21152114

2116-
if (!is_hybrid()) {
2117-
x86_pmu_show_pmu_cap(x86_pmu.num_counters,
2118-
x86_pmu.num_counters_fixed,
2119-
x86_pmu.intel_ctrl);
2120-
}
2115+
if (!is_hybrid())
2116+
x86_pmu_show_pmu_cap(NULL);
21212117

21222118
if (!x86_pmu.read)
21232119
x86_pmu.read = _x86_pmu_read;
@@ -2481,7 +2477,7 @@ void perf_clear_dirty_counters(void)
24812477
for_each_set_bit(i, cpuc->dirty, X86_PMC_IDX_MAX) {
24822478
if (i >= INTEL_PMC_IDX_FIXED) {
24832479
/* Metrics and fake events don't have corresponding HW counters. */
2484-
if ((i - INTEL_PMC_IDX_FIXED) >= hybrid(cpuc->pmu, num_counters_fixed))
2480+
if (!test_bit(i - INTEL_PMC_IDX_FIXED, hybrid(cpuc->pmu, fixed_cntr_mask)))
24852481
continue;
24862482

24872483
wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0);
@@ -2983,8 +2979,8 @@ void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
29832979
* base PMU holds the correct number of counters for P-cores.
29842980
*/
29852981
cap->version = x86_pmu.version;
2986-
cap->num_counters_gp = x86_pmu.num_counters;
2987-
cap->num_counters_fixed = x86_pmu.num_counters_fixed;
2982+
cap->num_counters_gp = x86_pmu_num_counters(NULL);
2983+
cap->num_counters_fixed = x86_pmu_num_counters_fixed(NULL);
29882984
cap->bit_width_gp = x86_pmu.cntval_bits;
29892985
cap->bit_width_fixed = x86_pmu.cntval_bits;
29902986
cap->events_mask = (unsigned int)x86_pmu.events_maskl;

0 commit comments

Comments
 (0)