Skip to content

Commit 75013c6

Browse files
committed
Merge tag 'perf_urgent_for_v5.12-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Borislav Petkov: - Make sure PMU internal buffers are flushed for per-CPU events too and properly handle PID/TID for large PEBS. - Handle the case properly when there's no PMU and therefore return an empty list of perf MSRs for VMX to switch instead of reading random garbage from the stack. * tag 'perf_urgent_for_v5.12-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/perf: Use RET0 as default for guest_get_msrs to handle "no PMU" case perf/x86/intel: Set PERF_ATTACH_SCHED_CB for large PEBS and LBR perf/core: Flush PMU internal buffers for per-CPU events
2 parents 836d7f0 + c8e2fe1 commit 75013c6

File tree

5 files changed

+51
-15
lines changed

5 files changed

+51
-15
lines changed

arch/x86/events/core.c

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,11 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx);
8181
DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs);
8282
DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases);
8383

84-
DEFINE_STATIC_CALL_NULL(x86_pmu_guest_get_msrs, *x86_pmu.guest_get_msrs);
84+
/*
85+
* This one is magic, it will get called even when PMU init fails (because
86+
* there is no PMU), in which case it should simply return NULL.
87+
*/
88+
DEFINE_STATIC_CALL_RET0(x86_pmu_guest_get_msrs, *x86_pmu.guest_get_msrs);
8589

8690
u64 __read_mostly hw_cache_event_ids
8791
[PERF_COUNT_HW_CACHE_MAX]
@@ -1944,13 +1948,6 @@ static void _x86_pmu_read(struct perf_event *event)
19441948
x86_perf_event_update(event);
19451949
}
19461950

1947-
static inline struct perf_guest_switch_msr *
1948-
perf_guest_get_msrs_nop(int *nr)
1949-
{
1950-
*nr = 0;
1951-
return NULL;
1952-
}
1953-
19541951
static int __init init_hw_perf_events(void)
19551952
{
19561953
struct x86_pmu_quirk *quirk;
@@ -2025,7 +2022,7 @@ static int __init init_hw_perf_events(void)
20252022
x86_pmu.read = _x86_pmu_read;
20262023

20272024
if (!x86_pmu.guest_get_msrs)
2028-
x86_pmu.guest_get_msrs = perf_guest_get_msrs_nop;
2025+
x86_pmu.guest_get_msrs = (void *)&__static_call_return0;
20292026

20302027
x86_pmu_static_call_update();
20312028

arch/x86/events/intel/core.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3662,8 +3662,10 @@ static int intel_pmu_hw_config(struct perf_event *event)
36623662
if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) {
36633663
event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
36643664
if (!(event->attr.sample_type &
3665-
~intel_pmu_large_pebs_flags(event)))
3665+
~intel_pmu_large_pebs_flags(event))) {
36663666
event->hw.flags |= PERF_X86_EVENT_LARGE_PEBS;
3667+
event->attach_state |= PERF_ATTACH_SCHED_CB;
3668+
}
36673669
}
36683670
if (x86_pmu.pebs_aliases)
36693671
x86_pmu.pebs_aliases(event);
@@ -3676,6 +3678,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
36763678
ret = intel_pmu_setup_lbr_filter(event);
36773679
if (ret)
36783680
return ret;
3681+
event->attach_state |= PERF_ATTACH_SCHED_CB;
36793682

36803683
/*
36813684
* BTS is set up earlier in this path, so don't account twice

arch/x86/kvm/vmx/vmx.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6580,8 +6580,8 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
65806580
int i, nr_msrs;
65816581
struct perf_guest_switch_msr *msrs;
65826582

6583+
/* Note, nr_msrs may be garbage if perf_guest_get_msrs() returns NULL. */
65836584
msrs = perf_guest_get_msrs(&nr_msrs);
6584-
65856585
if (!msrs)
65866586
return;
65876587

include/linux/perf_event.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,7 @@ struct swevent_hlist {
606606
#define PERF_ATTACH_TASK 0x04
607607
#define PERF_ATTACH_TASK_DATA 0x08
608608
#define PERF_ATTACH_ITRACE 0x10
609+
#define PERF_ATTACH_SCHED_CB 0x20
609610

610611
struct perf_cgroup;
611612
struct perf_buffer;
@@ -872,6 +873,7 @@ struct perf_cpu_context {
872873
struct list_head cgrp_cpuctx_entry;
873874
#endif
874875

876+
struct list_head sched_cb_entry;
875877
int sched_cb_usage;
876878

877879
int online;

kernel/events/core.c

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,7 @@ static DEFINE_MUTEX(perf_sched_mutex);
386386
static atomic_t perf_sched_count;
387387

388388
static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
389+
static DEFINE_PER_CPU(int, perf_sched_cb_usages);
389390
static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
390391

391392
static atomic_t nr_mmap_events __read_mostly;
@@ -3461,19 +3462,27 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
34613462
}
34623463
}
34633464

3465+
static DEFINE_PER_CPU(struct list_head, sched_cb_list);
3466+
34643467
void perf_sched_cb_dec(struct pmu *pmu)
34653468
{
34663469
struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
34673470

3468-
--cpuctx->sched_cb_usage;
3471+
this_cpu_dec(perf_sched_cb_usages);
3472+
3473+
if (!--cpuctx->sched_cb_usage)
3474+
list_del(&cpuctx->sched_cb_entry);
34693475
}
34703476

34713477

34723478
void perf_sched_cb_inc(struct pmu *pmu)
34733479
{
34743480
struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
34753481

3476-
cpuctx->sched_cb_usage++;
3482+
if (!cpuctx->sched_cb_usage++)
3483+
list_add(&cpuctx->sched_cb_entry, this_cpu_ptr(&sched_cb_list));
3484+
3485+
this_cpu_inc(perf_sched_cb_usages);
34773486
}
34783487

34793488
/*
@@ -3502,6 +3511,24 @@ static void __perf_pmu_sched_task(struct perf_cpu_context *cpuctx, bool sched_in
35023511
perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
35033512
}
35043513

3514+
static void perf_pmu_sched_task(struct task_struct *prev,
3515+
struct task_struct *next,
3516+
bool sched_in)
3517+
{
3518+
struct perf_cpu_context *cpuctx;
3519+
3520+
if (prev == next)
3521+
return;
3522+
3523+
list_for_each_entry(cpuctx, this_cpu_ptr(&sched_cb_list), sched_cb_entry) {
3524+
/* will be handled in perf_event_context_sched_in/out */
3525+
if (cpuctx->task_ctx)
3526+
continue;
3527+
3528+
__perf_pmu_sched_task(cpuctx, sched_in);
3529+
}
3530+
}
3531+
35053532
static void perf_event_switch(struct task_struct *task,
35063533
struct task_struct *next_prev, bool sched_in);
35073534

@@ -3524,6 +3551,9 @@ void __perf_event_task_sched_out(struct task_struct *task,
35243551
{
35253552
int ctxn;
35263553

3554+
if (__this_cpu_read(perf_sched_cb_usages))
3555+
perf_pmu_sched_task(task, next, false);
3556+
35273557
if (atomic_read(&nr_switch_events))
35283558
perf_event_switch(task, next, false);
35293559

@@ -3832,6 +3862,9 @@ void __perf_event_task_sched_in(struct task_struct *prev,
38323862

38333863
if (atomic_read(&nr_switch_events))
38343864
perf_event_switch(task, prev, true);
3865+
3866+
if (__this_cpu_read(perf_sched_cb_usages))
3867+
perf_pmu_sched_task(prev, task, true);
38353868
}
38363869

38373870
static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
@@ -4656,7 +4689,7 @@ static void unaccount_event(struct perf_event *event)
46564689
if (event->parent)
46574690
return;
46584691

4659-
if (event->attach_state & PERF_ATTACH_TASK)
4692+
if (event->attach_state & (PERF_ATTACH_TASK | PERF_ATTACH_SCHED_CB))
46604693
dec = true;
46614694
if (event->attr.mmap || event->attr.mmap_data)
46624695
atomic_dec(&nr_mmap_events);
@@ -11175,7 +11208,7 @@ static void account_event(struct perf_event *event)
1117511208
if (event->parent)
1117611209
return;
1117711210

11178-
if (event->attach_state & PERF_ATTACH_TASK)
11211+
if (event->attach_state & (PERF_ATTACH_TASK | PERF_ATTACH_SCHED_CB))
1117911212
inc = true;
1118011213
if (event->attr.mmap || event->attr.mmap_data)
1118111214
atomic_inc(&nr_mmap_events);
@@ -12972,6 +13005,7 @@ static void __init perf_event_init_all_cpus(void)
1297213005
#ifdef CONFIG_CGROUP_PERF
1297313006
INIT_LIST_HEAD(&per_cpu(cgrp_cpuctx_list, cpu));
1297413007
#endif
13008+
INIT_LIST_HEAD(&per_cpu(sched_cb_list, cpu));
1297513009
}
1297613010
}
1297713011

0 commit comments

Comments
 (0)