Skip to content

Commit 33238c5

Browse files
Peter ZijlstraIngo Molnar
authored andcommitted
perf/core: Fix event cgroup tracking
Song reports that installing cgroup events is broken since: db0503e ("perf/core: Optimize perf_install_in_event()") The problem being that cgroup events try to track cpuctx->cgrp even for disabled events, which is pointless and actively harmful since the above commit. Rework the code to have explicit enable/disable hooks for cgroup events, such that we can limit cgroup tracking to active events. More specifically, since the above commit disabled events are no longer added to their context from the 'right' CPU, and we can't access things like the current cgroup for a remote CPU. Cc: <[email protected]> # v5.5+ Fixes: db0503e ("perf/core: Optimize perf_install_in_event()") Reported-by: Song Liu <[email protected]> Tested-by: Song Liu <[email protected]> Reviewed-by: Song Liu <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Signed-off-by: Ingo Molnar <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent f5e94d1 commit 33238c5

File tree

1 file changed

+43
-27
lines changed

1 file changed

+43
-27
lines changed

kernel/events/core.c

Lines changed: 43 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -983,16 +983,10 @@ perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
983983
event->shadow_ctx_time = now - t->timestamp;
984984
}
985985

986-
/*
987-
* Update cpuctx->cgrp so that it is set when first cgroup event is added and
988-
* cleared when last cgroup event is removed.
989-
*/
990986
static inline void
991-
list_update_cgroup_event(struct perf_event *event,
992-
struct perf_event_context *ctx, bool add)
987+
perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ctx)
993988
{
994989
struct perf_cpu_context *cpuctx;
995-
struct list_head *cpuctx_entry;
996990

997991
if (!is_cgroup_event(event))
998992
return;
@@ -1009,28 +1003,41 @@ list_update_cgroup_event(struct perf_event *event,
10091003
* because if the first would mismatch, the second would not try again
10101004
* and we would leave cpuctx->cgrp unset.
10111005
*/
1012-
if (add && !cpuctx->cgrp) {
1006+
if (ctx->is_active && !cpuctx->cgrp) {
10131007
struct perf_cgroup *cgrp = perf_cgroup_from_task(current, ctx);
10141008

10151009
if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
10161010
cpuctx->cgrp = cgrp;
10171011
}
10181012

1019-
if (add && ctx->nr_cgroups++)
1013+
if (ctx->nr_cgroups++)
10201014
return;
1021-
else if (!add && --ctx->nr_cgroups)
1015+
1016+
list_add(&cpuctx->cgrp_cpuctx_entry,
1017+
per_cpu_ptr(&cgrp_cpuctx_list, event->cpu));
1018+
}
1019+
1020+
static inline void
1021+
perf_cgroup_event_disable(struct perf_event *event, struct perf_event_context *ctx)
1022+
{
1023+
struct perf_cpu_context *cpuctx;
1024+
1025+
if (!is_cgroup_event(event))
10221026
return;
10231027

1024-
/* no cgroup running */
1025-
if (!add)
1028+
/*
1029+
* Because cgroup events are always per-cpu events,
1030+
* @ctx == &cpuctx->ctx.
1031+
*/
1032+
cpuctx = container_of(ctx, struct perf_cpu_context, ctx);
1033+
1034+
if (--ctx->nr_cgroups)
1035+
return;
1036+
1037+
if (ctx->is_active && cpuctx->cgrp)
10261038
cpuctx->cgrp = NULL;
10271039

1028-
cpuctx_entry = &cpuctx->cgrp_cpuctx_entry;
1029-
if (add)
1030-
list_add(cpuctx_entry,
1031-
per_cpu_ptr(&cgrp_cpuctx_list, event->cpu));
1032-
else
1033-
list_del(cpuctx_entry);
1040+
list_del(&cpuctx->cgrp_cpuctx_entry);
10341041
}
10351042

10361043
#else /* !CONFIG_CGROUP_PERF */
@@ -1096,11 +1103,14 @@ static inline u64 perf_cgroup_event_time(struct perf_event *event)
10961103
}
10971104

10981105
static inline void
1099-
list_update_cgroup_event(struct perf_event *event,
1100-
struct perf_event_context *ctx, bool add)
1106+
perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ctx)
11011107
{
11021108
}
11031109

1110+
static inline void
1111+
perf_cgroup_event_disable(struct perf_event *event, struct perf_event_context *ctx)
1112+
{
1113+
}
11041114
#endif
11051115

11061116
/*
@@ -1791,13 +1801,14 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
17911801
add_event_to_groups(event, ctx);
17921802
}
17931803

1794-
list_update_cgroup_event(event, ctx, true);
1795-
17961804
list_add_rcu(&event->event_entry, &ctx->event_list);
17971805
ctx->nr_events++;
17981806
if (event->attr.inherit_stat)
17991807
ctx->nr_stat++;
18001808

1809+
if (event->state > PERF_EVENT_STATE_OFF)
1810+
perf_cgroup_event_enable(event, ctx);
1811+
18011812
ctx->generation++;
18021813
}
18031814

@@ -1976,8 +1987,6 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
19761987

19771988
event->attach_state &= ~PERF_ATTACH_CONTEXT;
19781989

1979-
list_update_cgroup_event(event, ctx, false);
1980-
19811990
ctx->nr_events--;
19821991
if (event->attr.inherit_stat)
19831992
ctx->nr_stat--;
@@ -1994,8 +2003,10 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
19942003
* of error state is by explicit re-enabling
19952004
* of the event
19962005
*/
1997-
if (event->state > PERF_EVENT_STATE_OFF)
2006+
if (event->state > PERF_EVENT_STATE_OFF) {
2007+
perf_cgroup_event_disable(event, ctx);
19982008
perf_event_set_state(event, PERF_EVENT_STATE_OFF);
2009+
}
19992010

20002011
ctx->generation++;
20012012
}
@@ -2226,6 +2237,7 @@ event_sched_out(struct perf_event *event,
22262237

22272238
if (READ_ONCE(event->pending_disable) >= 0) {
22282239
WRITE_ONCE(event->pending_disable, -1);
2240+
perf_cgroup_event_disable(event, ctx);
22292241
state = PERF_EVENT_STATE_OFF;
22302242
}
22312243
perf_event_set_state(event, state);
@@ -2363,6 +2375,7 @@ static void __perf_event_disable(struct perf_event *event,
23632375
event_sched_out(event, cpuctx, ctx);
23642376

23652377
perf_event_set_state(event, PERF_EVENT_STATE_OFF);
2378+
perf_cgroup_event_disable(event, ctx);
23662379
}
23672380

23682381
/*
@@ -2746,7 +2759,7 @@ static int __perf_install_in_context(void *info)
27462759
}
27472760

27482761
#ifdef CONFIG_CGROUP_PERF
2749-
if (is_cgroup_event(event)) {
2762+
if (event->state > PERF_EVENT_STATE_OFF && is_cgroup_event(event)) {
27502763
/*
27512764
* If the current cgroup doesn't match the event's
27522765
* cgroup, we should not try to schedule it.
@@ -2906,6 +2919,7 @@ static void __perf_event_enable(struct perf_event *event,
29062919
ctx_sched_out(ctx, cpuctx, EVENT_TIME);
29072920

29082921
perf_event_set_state(event, PERF_EVENT_STATE_INACTIVE);
2922+
perf_cgroup_event_enable(event, ctx);
29092923

29102924
if (!ctx->is_active)
29112925
return;
@@ -3616,8 +3630,10 @@ static int merge_sched_in(struct perf_event *event, void *data)
36163630
}
36173631

36183632
if (event->state == PERF_EVENT_STATE_INACTIVE) {
3619-
if (event->attr.pinned)
3633+
if (event->attr.pinned) {
3634+
perf_cgroup_event_disable(event, ctx);
36203635
perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
3636+
}
36213637

36223638
*can_add_hw = 0;
36233639
ctx->rotate_necessary = 1;

0 commit comments

Comments
 (0)