Skip to content

Commit 95ed6c7

Browse files
captain5050Ingo Molnar
authored andcommitted
perf/cgroup: Order events in RB tree by cgroup id
If one is monitoring 6 events on 20 cgroups the per-CPU RB tree will hold 120 events. The scheduling in of the events currently iterates over all events looking to see which events match the task's cgroup or its cgroup hierarchy. If a task is in 1 cgroup with 6 events, then 114 events are considered unnecessarily. This change orders events in the RB tree by cgroup id if it is present. This means scheduling in may go directly to events associated with the task's cgroup if one is present. The per-CPU iterator storage in visit_groups_merge is sized sufficent for an iterator per cgroup depth, where different iterators are needed for the task's cgroup and parent cgroups. By considering the set of iterators when visiting, the lowest group_index event may be selected and the insertion order group_index property is maintained. This also allows event rotation to function correctly, as although events are grouped into a cgroup, rotation always selects the lowest group_index event to rotate (delete/insert into the tree) and the min heap of iterators make it so that the group_index order is maintained. Signed-off-by: Ian Rogers <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Signed-off-by: Ingo Molnar <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent c2283c9 commit 95ed6c7

File tree

1 file changed

+84
-10
lines changed

1 file changed

+84
-10
lines changed

kernel/events/core.c

Lines changed: 84 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1577,6 +1577,30 @@ perf_event_groups_less(struct perf_event *left, struct perf_event *right)
15771577
if (left->cpu > right->cpu)
15781578
return false;
15791579

1580+
#ifdef CONFIG_CGROUP_PERF
1581+
if (left->cgrp != right->cgrp) {
1582+
if (!left->cgrp || !left->cgrp->css.cgroup) {
1583+
/*
1584+
* Left has no cgroup but right does, no cgroups come
1585+
* first.
1586+
*/
1587+
return true;
1588+
}
1589+
if (!right->cgrp || right->cgrp->css.cgroup) {
1590+
/*
1591+
* Right has no cgroup but left does, no cgroups come
1592+
* first.
1593+
*/
1594+
return false;
1595+
}
1596+
/* Two dissimilar cgroups, order by id. */
1597+
if (left->cgrp->css.cgroup->kn->id < right->cgrp->css.cgroup->kn->id)
1598+
return true;
1599+
1600+
return false;
1601+
}
1602+
#endif
1603+
15801604
if (left->group_index < right->group_index)
15811605
return true;
15821606
if (left->group_index > right->group_index)
@@ -1656,25 +1680,48 @@ del_event_from_groups(struct perf_event *event, struct perf_event_context *ctx)
16561680
}
16571681

16581682
/*
1659-
* Get the leftmost event in the @cpu subtree.
1683+
* Get the leftmost event in the cpu/cgroup subtree.
16601684
*/
16611685
static struct perf_event *
1662-
perf_event_groups_first(struct perf_event_groups *groups, int cpu)
1686+
perf_event_groups_first(struct perf_event_groups *groups, int cpu,
1687+
struct cgroup *cgrp)
16631688
{
16641689
struct perf_event *node_event = NULL, *match = NULL;
16651690
struct rb_node *node = groups->tree.rb_node;
1691+
#ifdef CONFIG_CGROUP_PERF
1692+
u64 node_cgrp_id, cgrp_id = 0;
1693+
1694+
if (cgrp)
1695+
cgrp_id = cgrp->kn->id;
1696+
#endif
16661697

16671698
while (node) {
16681699
node_event = container_of(node, struct perf_event, group_node);
16691700

16701701
if (cpu < node_event->cpu) {
16711702
node = node->rb_left;
1672-
} else if (cpu > node_event->cpu) {
1703+
continue;
1704+
}
1705+
if (cpu > node_event->cpu) {
16731706
node = node->rb_right;
1674-
} else {
1675-
match = node_event;
1707+
continue;
1708+
}
1709+
#ifdef CONFIG_CGROUP_PERF
1710+
node_cgrp_id = 0;
1711+
if (node_event->cgrp && node_event->cgrp->css.cgroup)
1712+
node_cgrp_id = node_event->cgrp->css.cgroup->kn->id;
1713+
1714+
if (cgrp_id < node_cgrp_id) {
16761715
node = node->rb_left;
1716+
continue;
1717+
}
1718+
if (cgrp_id > node_cgrp_id) {
1719+
node = node->rb_right;
1720+
continue;
16771721
}
1722+
#endif
1723+
match = node_event;
1724+
node = node->rb_left;
16781725
}
16791726

16801727
return match;
@@ -1687,12 +1734,26 @@ static struct perf_event *
16871734
perf_event_groups_next(struct perf_event *event)
16881735
{
16891736
struct perf_event *next;
1737+
#ifdef CONFIG_CGROUP_PERF
1738+
u64 curr_cgrp_id = 0;
1739+
u64 next_cgrp_id = 0;
1740+
#endif
16901741

16911742
next = rb_entry_safe(rb_next(&event->group_node), typeof(*event), group_node);
1692-
if (next && next->cpu == event->cpu)
1693-
return next;
1743+
if (next == NULL || next->cpu != event->cpu)
1744+
return NULL;
16941745

1695-
return NULL;
1746+
#ifdef CONFIG_CGROUP_PERF
1747+
if (event->cgrp && event->cgrp->css.cgroup)
1748+
curr_cgrp_id = event->cgrp->css.cgroup->kn->id;
1749+
1750+
if (next->cgrp && next->cgrp->css.cgroup)
1751+
next_cgrp_id = next->cgrp->css.cgroup->kn->id;
1752+
1753+
if (curr_cgrp_id != next_cgrp_id)
1754+
return NULL;
1755+
#endif
1756+
return next;
16961757
}
16971758

16981759
/*
@@ -3473,6 +3534,9 @@ static noinline int visit_groups_merge(struct perf_cpu_context *cpuctx,
34733534
int (*func)(struct perf_event *, void *),
34743535
void *data)
34753536
{
3537+
#ifdef CONFIG_CGROUP_PERF
3538+
struct cgroup_subsys_state *css = NULL;
3539+
#endif
34763540
/* Space for per CPU and/or any CPU event iterators. */
34773541
struct perf_event *itrs[2];
34783542
struct min_heap event_heap;
@@ -3487,18 +3551,28 @@ static noinline int visit_groups_merge(struct perf_cpu_context *cpuctx,
34873551
};
34883552

34893553
lockdep_assert_held(&cpuctx->ctx.lock);
3554+
3555+
#ifdef CONFIG_CGROUP_PERF
3556+
if (cpuctx->cgrp)
3557+
css = &cpuctx->cgrp->css;
3558+
#endif
34903559
} else {
34913560
event_heap = (struct min_heap){
34923561
.data = itrs,
34933562
.nr = 0,
34943563
.size = ARRAY_SIZE(itrs),
34953564
};
34963565
/* Events not within a CPU context may be on any CPU. */
3497-
__heap_add(&event_heap, perf_event_groups_first(groups, -1));
3566+
__heap_add(&event_heap, perf_event_groups_first(groups, -1, NULL));
34983567
}
34993568
evt = event_heap.data;
35003569

3501-
__heap_add(&event_heap, perf_event_groups_first(groups, cpu));
3570+
__heap_add(&event_heap, perf_event_groups_first(groups, cpu, NULL));
3571+
3572+
#ifdef CONFIG_CGROUP_PERF
3573+
for (; css; css = css->parent)
3574+
__heap_add(&event_heap, perf_event_groups_first(groups, cpu, css->cgroup));
3575+
#endif
35023576

35033577
min_heapify_all(&event_heap, &perf_min_heap);
35043578

0 commit comments

Comments
 (0)