Skip to content

Commit a5398bf

Browse files
Kan LiangIngo Molnar
authored andcommitted
perf/core: Flush PMU internal buffers for per-CPU events
Sometimes the PMU internal buffers have to be flushed for per-CPU events during a context switch, e.g., large PEBS. Otherwise, the perf tool may report samples in locations that do not belong to the process where the samples are processed in, because PEBS does not tag samples with PID/TID. The current code only flush the buffers for a per-task event. It doesn't check a per-CPU event. Add a new event state flag, PERF_ATTACH_SCHED_CB, to indicate that the PMU internal buffers have to be flushed for this event during a context switch. Add sched_cb_entry and perf_sched_cb_usages back to track the PMU/cpuctx which is required to be flushed. Only need to invoke the sched_task() for per-CPU events in this patch. The per-task events have been handled in perf_event_context_sched_in/out already. Fixes: 9c964ef ("perf/x86/intel: Drain the PEBS buffer during context switches") Reported-by: Gabriel Marin <[email protected]> Originally-by: Namhyung Kim <[email protected]> Signed-off-by: Kan Liang <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Signed-off-by: Ingo Molnar <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent a38fd87 commit a5398bf

File tree

2 files changed

+40
-4
lines changed

2 files changed

+40
-4
lines changed

include/linux/perf_event.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,7 @@ struct swevent_hlist {
606606
#define PERF_ATTACH_TASK 0x04
607607
#define PERF_ATTACH_TASK_DATA 0x08
608608
#define PERF_ATTACH_ITRACE 0x10
609+
#define PERF_ATTACH_SCHED_CB 0x20
609610

610611
struct perf_cgroup;
611612
struct perf_buffer;
@@ -872,6 +873,7 @@ struct perf_cpu_context {
872873
struct list_head cgrp_cpuctx_entry;
873874
#endif
874875

876+
struct list_head sched_cb_entry;
875877
int sched_cb_usage;
876878

877879
int online;

kernel/events/core.c

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,7 @@ static DEFINE_MUTEX(perf_sched_mutex);
386386
static atomic_t perf_sched_count;
387387

388388
static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
389+
static DEFINE_PER_CPU(int, perf_sched_cb_usages);
389390
static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
390391

391392
static atomic_t nr_mmap_events __read_mostly;
@@ -3461,19 +3462,27 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
34613462
}
34623463
}
34633464

3465+
static DEFINE_PER_CPU(struct list_head, sched_cb_list);
3466+
34643467
void perf_sched_cb_dec(struct pmu *pmu)
34653468
{
34663469
struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
34673470

3468-
--cpuctx->sched_cb_usage;
3471+
this_cpu_dec(perf_sched_cb_usages);
3472+
3473+
if (!--cpuctx->sched_cb_usage)
3474+
list_del(&cpuctx->sched_cb_entry);
34693475
}
34703476

34713477

34723478
void perf_sched_cb_inc(struct pmu *pmu)
34733479
{
34743480
struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
34753481

3476-
cpuctx->sched_cb_usage++;
3482+
if (!cpuctx->sched_cb_usage++)
3483+
list_add(&cpuctx->sched_cb_entry, this_cpu_ptr(&sched_cb_list));
3484+
3485+
this_cpu_inc(perf_sched_cb_usages);
34773486
}
34783487

34793488
/*
@@ -3502,6 +3511,24 @@ static void __perf_pmu_sched_task(struct perf_cpu_context *cpuctx, bool sched_in
35023511
perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
35033512
}
35043513

3514+
static void perf_pmu_sched_task(struct task_struct *prev,
3515+
struct task_struct *next,
3516+
bool sched_in)
3517+
{
3518+
struct perf_cpu_context *cpuctx;
3519+
3520+
if (prev == next)
3521+
return;
3522+
3523+
list_for_each_entry(cpuctx, this_cpu_ptr(&sched_cb_list), sched_cb_entry) {
3524+
/* will be handled in perf_event_context_sched_in/out */
3525+
if (cpuctx->task_ctx)
3526+
continue;
3527+
3528+
__perf_pmu_sched_task(cpuctx, sched_in);
3529+
}
3530+
}
3531+
35053532
static void perf_event_switch(struct task_struct *task,
35063533
struct task_struct *next_prev, bool sched_in);
35073534

@@ -3524,6 +3551,9 @@ void __perf_event_task_sched_out(struct task_struct *task,
35243551
{
35253552
int ctxn;
35263553

3554+
if (__this_cpu_read(perf_sched_cb_usages))
3555+
perf_pmu_sched_task(task, next, false);
3556+
35273557
if (atomic_read(&nr_switch_events))
35283558
perf_event_switch(task, next, false);
35293559

@@ -3832,6 +3862,9 @@ void __perf_event_task_sched_in(struct task_struct *prev,
38323862

38333863
if (atomic_read(&nr_switch_events))
38343864
perf_event_switch(task, prev, true);
3865+
3866+
if (__this_cpu_read(perf_sched_cb_usages))
3867+
perf_pmu_sched_task(prev, task, true);
38353868
}
38363869

38373870
static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
@@ -4656,7 +4689,7 @@ static void unaccount_event(struct perf_event *event)
46564689
if (event->parent)
46574690
return;
46584691

4659-
if (event->attach_state & PERF_ATTACH_TASK)
4692+
if (event->attach_state & (PERF_ATTACH_TASK | PERF_ATTACH_SCHED_CB))
46604693
dec = true;
46614694
if (event->attr.mmap || event->attr.mmap_data)
46624695
atomic_dec(&nr_mmap_events);
@@ -11175,7 +11208,7 @@ static void account_event(struct perf_event *event)
1117511208
if (event->parent)
1117611209
return;
1117711210

11178-
if (event->attach_state & PERF_ATTACH_TASK)
11211+
if (event->attach_state & (PERF_ATTACH_TASK | PERF_ATTACH_SCHED_CB))
1117911212
inc = true;
1118011213
if (event->attr.mmap || event->attr.mmap_data)
1118111214
atomic_inc(&nr_mmap_events);
@@ -12972,6 +13005,7 @@ static void __init perf_event_init_all_cpus(void)
1297213005
#ifdef CONFIG_CGROUP_PERF
1297313006
INIT_LIST_HEAD(&per_cpu(cgrp_cpuctx_list, cpu));
1297413007
#endif
13008+
INIT_LIST_HEAD(&per_cpu(sched_cb_list, cpu));
1297513009
}
1297613010
}
1297713011

0 commit comments

Comments
 (0)