Skip to content

Commit 9734e25

Browse files
Kan LiangPeter Zijlstra
authored andcommitted
perf: Fix the throttle logic for a group
The current throttle logic doesn't work well with a group, e.g., the following sampling-read case. $ perf record -e "{cycles,cycles}:S" ... $ perf report -D | grep THROTTLE | tail -2 THROTTLE events: 426 ( 9.0%) UNTHROTTLE events: 425 ( 9.0%) $ perf report -D | grep PERF_RECORD_SAMPLE -a4 | tail -n 5 0 1020120874009167 0x74970 [0x68]: PERF_RECORD_SAMPLE(IP, 0x1): ... sample_read: .... group nr 2 ..... id 0000000000000327, value 000000000cbb993a, lost 0 ..... id 0000000000000328, value 00000002211c26df, lost 0 The second cycles event has a much larger value than the first cycles event in the same group. The current throttle logic in the generic code only logs the THROTTLE event. It relies on the specific driver implementation to disable events. For all ARCHs, the implementation is similar. Only the event is disabled, rather than the group. The logic to disable the group should be generic for all ARCHs. Add the logic in the generic code. The following patch will remove the buggy driver-specific implementation. The throttle only happens when an event is overflowed. Stop the entire group when any event in the group triggers the throttle. The MAX_INTERRUPTS is set to all throttle events. The unthrottled could happen in 3 places. - event/group sched. All events in the group are scheduled one by one. All of them will be unthrottled eventually. Nothing needs to be changed. - The perf_adjust_freq_unthr_events for each tick. Needs to restart the group altogether. - The __perf_event_period(). The whole group needs to be restarted altogether as well. With the fix, $ sudo perf report -D | grep PERF_RECORD_SAMPLE -a4 | tail -n 5 0 3573470770332 0x12f5f8 [0x70]: PERF_RECORD_SAMPLE(IP, 0x2): ... sample_read: .... group nr 2 ..... id 0000000000000a28, value 00000004fd3dfd8f, lost 0 ..... id 0000000000000a29, value 00000004fd3dfd8f, lost 0 Suggested-by: "Peter Zijlstra (Intel)" <[email protected]> Signed-off-by: Kan Liang <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Acked-by: Namhyung Kim <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent ca55950 commit 9734e25

File tree

1 file changed

+46
-20
lines changed

1 file changed

+46
-20
lines changed

kernel/events/core.c

Lines changed: 46 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2645,6 +2645,39 @@ void perf_event_disable_inatomic(struct perf_event *event)
26452645
static void perf_log_throttle(struct perf_event *event, int enable);
26462646
static void perf_log_itrace_start(struct perf_event *event);
26472647

2648+
static void perf_event_unthrottle(struct perf_event *event, bool start)
2649+
{
2650+
event->hw.interrupts = 0;
2651+
if (start)
2652+
event->pmu->start(event, 0);
2653+
perf_log_throttle(event, 1);
2654+
}
2655+
2656+
static void perf_event_throttle(struct perf_event *event)
2657+
{
2658+
event->pmu->stop(event, 0);
2659+
event->hw.interrupts = MAX_INTERRUPTS;
2660+
perf_log_throttle(event, 0);
2661+
}
2662+
2663+
static void perf_event_unthrottle_group(struct perf_event *event, bool skip_start_event)
2664+
{
2665+
struct perf_event *sibling, *leader = event->group_leader;
2666+
2667+
perf_event_unthrottle(leader, skip_start_event ? leader != event : true);
2668+
for_each_sibling_event(sibling, leader)
2669+
perf_event_unthrottle(sibling, skip_start_event ? sibling != event : true);
2670+
}
2671+
2672+
static void perf_event_throttle_group(struct perf_event *event)
2673+
{
2674+
struct perf_event *sibling, *leader = event->group_leader;
2675+
2676+
perf_event_throttle(leader);
2677+
for_each_sibling_event(sibling, leader)
2678+
perf_event_throttle(sibling);
2679+
}
2680+
26482681
static int
26492682
event_sched_in(struct perf_event *event, struct perf_event_context *ctx)
26502683
{
@@ -2673,10 +2706,8 @@ event_sched_in(struct perf_event *event, struct perf_event_context *ctx)
26732706
* ticks already, also for a heavily scheduling task there is little
26742707
* guarantee it'll get a tick in a timely manner.
26752708
*/
2676-
if (unlikely(event->hw.interrupts == MAX_INTERRUPTS)) {
2677-
perf_log_throttle(event, 1);
2678-
event->hw.interrupts = 0;
2679-
}
2709+
if (unlikely(event->hw.interrupts == MAX_INTERRUPTS))
2710+
perf_event_unthrottle(event, false);
26802711

26812712
perf_pmu_disable(event->pmu);
26822713

@@ -4254,12 +4285,8 @@ static void perf_adjust_freq_unthr_events(struct list_head *event_list)
42544285

42554286
hwc = &event->hw;
42564287

4257-
if (hwc->interrupts == MAX_INTERRUPTS) {
4258-
hwc->interrupts = 0;
4259-
perf_log_throttle(event, 1);
4260-
if (!is_event_in_freq_mode(event))
4261-
event->pmu->start(event, 0);
4262-
}
4288+
if (hwc->interrupts == MAX_INTERRUPTS)
4289+
perf_event_unthrottle_group(event, is_event_in_freq_mode(event));
42634290

42644291
if (!is_event_in_freq_mode(event))
42654292
continue;
@@ -6181,21 +6208,21 @@ static void __perf_event_period(struct perf_event *event,
61816208
active = (event->state == PERF_EVENT_STATE_ACTIVE);
61826209
if (active) {
61836210
perf_pmu_disable(event->pmu);
6184-
/*
6185-
* We could be throttled; unthrottle now to avoid the tick
6186-
* trying to unthrottle while we already re-started the event.
6187-
*/
6188-
if (event->hw.interrupts == MAX_INTERRUPTS) {
6189-
event->hw.interrupts = 0;
6190-
perf_log_throttle(event, 1);
6191-
}
61926211
event->pmu->stop(event, PERF_EF_UPDATE);
61936212
}
61946213

61956214
local64_set(&event->hw.period_left, 0);
61966215

61976216
if (active) {
61986217
event->pmu->start(event, PERF_EF_RELOAD);
6218+
/*
6219+
* Once the period is force-reset, the event starts immediately.
6220+
* But the event/group could be throttled. Unthrottle the
6221+
* event/group now to avoid the next tick trying to unthrottle
6222+
* while we already re-started the event/group.
6223+
*/
6224+
if (event->hw.interrupts == MAX_INTERRUPTS)
6225+
perf_event_unthrottle_group(event, true);
61996226
perf_pmu_enable(event->pmu);
62006227
}
62016228
}
@@ -10084,8 +10111,7 @@ __perf_event_account_interrupt(struct perf_event *event, int throttle)
1008410111
if (unlikely(throttle && hwc->interrupts >= max_samples_per_tick)) {
1008510112
__this_cpu_inc(perf_throttled_count);
1008610113
tick_dep_set_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
10087-
hwc->interrupts = MAX_INTERRUPTS;
10088-
perf_log_throttle(event, 0);
10114+
perf_event_throttle_group(event);
1008910115
ret = 1;
1009010116
}
1009110117

0 commit comments

Comments
 (0)