Skip to content

Commit d499fd4

Browse files
Waiman-Longhtejun
authored andcommitted
cgroup/rstat: Optimize cgroup_rstat_updated_list()
The current design of cgroup_rstat_cpu_pop_updated() is to traverse the updated tree in a way to pop out the leaf nodes first before their parents. This can cause traversal of multiple nodes before a leaf node can be found and popped out. IOW, a given node in the tree can be visited multiple times before the whole operation is done. So it is not very efficient and the code can be hard to read. With the introduction of cgroup_rstat_updated_list() to build a list of cgroups to be flushed first before any flushing operation is being done, we can optimize the way the updated tree nodes are being popped by pushing the parents first to the tail end of the list before their children. In this way, most updated tree nodes will be visited only once with the exception of the subtree root as we still need to go back to its parent and popped it out of its updated_children list. This also makes the code easier to read. Signed-off-by: Waiman Long <[email protected]> Signed-off-by: Tejun Heo <[email protected]>
1 parent 7b91eb6 commit d499fd4

File tree

1 file changed

+91
-62
lines changed

1 file changed

+91
-62
lines changed

kernel/cgroup/rstat.c

Lines changed: 91 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -74,64 +74,109 @@ __bpf_kfunc void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
7474
}
7575

7676
/**
77-
* cgroup_rstat_cpu_pop_updated - iterate and dismantle rstat_cpu updated tree
78-
* @pos: current position
79-
* @root: root of the tree to traversal
77+
* cgroup_rstat_push_children - push children cgroups into the given list
78+
* @head: current head of the list (= subtree root)
79+
* @child: first child of the root
8080
* @cpu: target cpu
81+
* Return: A new singly linked list of cgroups to be flush
8182
*
82-
* Walks the updated rstat_cpu tree on @cpu from @root. %NULL @pos starts
83-
* the traversal and %NULL return indicates the end. During traversal,
84-
* each returned cgroup is unlinked from the tree. Must be called with the
85-
* matching cgroup_rstat_cpu_lock held.
83+
* Iteratively traverse down the cgroup_rstat_cpu updated tree level by
84+
* level and push all the parents first before their next level children
85+
* into a singly linked list built from the tail backward like "pushing"
86+
* cgroups into a stack. The root is pushed by the caller.
87+
*/
88+
static struct cgroup *cgroup_rstat_push_children(struct cgroup *head,
89+
struct cgroup *child, int cpu)
90+
{
91+
struct cgroup *chead = child; /* Head of child cgroup level */
92+
struct cgroup *ghead = NULL; /* Head of grandchild cgroup level */
93+
struct cgroup *parent, *grandchild;
94+
struct cgroup_rstat_cpu *crstatc;
95+
96+
child->rstat_flush_next = NULL;
97+
98+
next_level:
99+
while (chead) {
100+
child = chead;
101+
chead = child->rstat_flush_next;
102+
parent = cgroup_parent(child);
103+
104+
/* updated_next is parent cgroup terminated */
105+
while (child != parent) {
106+
child->rstat_flush_next = head;
107+
head = child;
108+
crstatc = cgroup_rstat_cpu(child, cpu);
109+
grandchild = crstatc->updated_children;
110+
if (grandchild != child) {
111+
/* Push the grand child to the next level */
112+
crstatc->updated_children = child;
113+
grandchild->rstat_flush_next = ghead;
114+
ghead = grandchild;
115+
}
116+
child = crstatc->updated_next;
117+
crstatc->updated_next = NULL;
118+
}
119+
}
120+
121+
if (ghead) {
122+
chead = ghead;
123+
ghead = NULL;
124+
goto next_level;
125+
}
126+
return head;
127+
}
128+
129+
/**
130+
* cgroup_rstat_updated_list - return a list of updated cgroups to be flushed
131+
* @root: root of the cgroup subtree to traverse
132+
* @cpu: target cpu
133+
* Return: A singly linked list of cgroups to be flushed
134+
*
135+
* Walks the updated rstat_cpu tree on @cpu from @root. During traversal,
136+
* each returned cgroup is unlinked from the updated tree.
86137
*
87138
* The only ordering guarantee is that, for a parent and a child pair
88-
* covered by a given traversal, if a child is visited, its parent is
89-
* guaranteed to be visited afterwards.
139+
* covered by a given traversal, the child is before its parent in
140+
* the list.
141+
*
142+
* Note that updated_children is self terminated and points to a list of
143+
* child cgroups if not empty. Whereas updated_next is like a sibling link
144+
* within the children list and terminated by the parent cgroup. An exception
145+
* here is the cgroup root whose updated_next can be self terminated.
90146
*/
91-
static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos,
92-
struct cgroup *root, int cpu)
147+
static struct cgroup *cgroup_rstat_updated_list(struct cgroup *root, int cpu)
93148
{
94-
struct cgroup_rstat_cpu *rstatc;
95-
struct cgroup *parent;
96-
97-
if (pos == root)
98-
return NULL;
149+
raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu);
150+
struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(root, cpu);
151+
struct cgroup *head = NULL, *parent, *child;
152+
unsigned long flags;
99153

100154
/*
101-
* We're gonna walk down to the first leaf and visit/remove it. We
102-
* can pick whatever unvisited node as the starting point.
155+
* The _irqsave() is needed because cgroup_rstat_lock is
156+
* spinlock_t which is a sleeping lock on PREEMPT_RT. Acquiring
157+
* this lock with the _irq() suffix only disables interrupts on
158+
* a non-PREEMPT_RT kernel. The raw_spinlock_t below disables
159+
* interrupts on both configurations. The _irqsave() ensures
160+
* that interrupts are always disabled and later restored.
103161
*/
104-
if (!pos) {
105-
pos = root;
106-
/* return NULL if this subtree is not on-list */
107-
if (!cgroup_rstat_cpu(pos, cpu)->updated_next)
108-
return NULL;
109-
} else {
110-
pos = cgroup_parent(pos);
111-
}
162+
raw_spin_lock_irqsave(cpu_lock, flags);
112163

113-
/* walk down to the first leaf */
114-
while (true) {
115-
rstatc = cgroup_rstat_cpu(pos, cpu);
116-
if (rstatc->updated_children == pos)
117-
break;
118-
pos = rstatc->updated_children;
119-
}
164+
/* Return NULL if this subtree is not on-list */
165+
if (!rstatc->updated_next)
166+
goto unlock_ret;
120167

121168
/*
122-
* Unlink @pos from the tree. As the updated_children list is
169+
* Unlink @root from its parent. As the updated_children list is
123170
* singly linked, we have to walk it to find the removal point.
124-
* However, due to the way we traverse, @pos will be the first
125-
* child in most cases. The only exception is @root.
126171
*/
127-
parent = cgroup_parent(pos);
172+
parent = cgroup_parent(root);
128173
if (parent) {
129174
struct cgroup_rstat_cpu *prstatc;
130175
struct cgroup **nextp;
131176

132177
prstatc = cgroup_rstat_cpu(parent, cpu);
133178
nextp = &prstatc->updated_children;
134-
while (*nextp != pos) {
179+
while (*nextp != root) {
135180
struct cgroup_rstat_cpu *nrstatc;
136181

137182
nrstatc = cgroup_rstat_cpu(*nextp, cpu);
@@ -142,31 +187,15 @@ static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos,
142187
}
143188

144189
rstatc->updated_next = NULL;
145-
return pos;
146-
}
147190

148-
/* Return a list of updated cgroups to be flushed */
149-
static struct cgroup *cgroup_rstat_updated_list(struct cgroup *root, int cpu)
150-
{
151-
raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu);
152-
struct cgroup *head, *tail, *next;
153-
unsigned long flags;
154-
155-
/*
156-
* The _irqsave() is needed because cgroup_rstat_lock is
157-
* spinlock_t which is a sleeping lock on PREEMPT_RT. Acquiring
158-
* this lock with the _irq() suffix only disables interrupts on
159-
* a non-PREEMPT_RT kernel. The raw_spinlock_t below disables
160-
* interrupts on both configurations. The _irqsave() ensures
161-
* that interrupts are always disabled and later restored.
162-
*/
163-
raw_spin_lock_irqsave(cpu_lock, flags);
164-
head = tail = cgroup_rstat_cpu_pop_updated(NULL, root, cpu);
165-
while (tail) {
166-
next = cgroup_rstat_cpu_pop_updated(tail, root, cpu);
167-
tail->rstat_flush_next = next;
168-
tail = next;
169-
}
191+
/* Push @root to the list first before pushing the children */
192+
head = root;
193+
root->rstat_flush_next = NULL;
194+
child = rstatc->updated_children;
195+
rstatc->updated_children = root;
196+
if (child != root)
197+
head = cgroup_rstat_push_children(head, child, cpu);
198+
unlock_ret:
170199
raw_spin_unlock_irqrestore(cpu_lock, flags);
171200
return head;
172201
}

0 commit comments

Comments
 (0)