@@ -74,64 +74,109 @@ __bpf_kfunc void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
74
74
}
75
75
76
76
/**
77
- * cgroup_rstat_cpu_pop_updated - iterate and dismantle rstat_cpu updated tree
78
- * @pos : current position
79
- * @root: root of the tree to traversal
77
+ * cgroup_rstat_push_children - push children cgroups into the given list
78
+ * @head : current head of the list (= subtree root)
79
+ * @child: first child of the root
80
80
* @cpu: target cpu
81
+ * Return: A new singly linked list of cgroups to be flush
81
82
*
82
- * Walks the updated rstat_cpu tree on @cpu from @root. %NULL @pos starts
83
- * the traversal and %NULL return indicates the end. During traversal,
84
- * each returned cgroup is unlinked from the tree. Must be called with the
85
- * matching cgroup_rstat_cpu_lock held.
83
+ * Iteratively traverse down the cgroup_rstat_cpu updated tree level by
84
+ * level and push all the parents first before their next level children
85
+ * into a singly linked list built from the tail backward like "pushing"
86
+ * cgroups into a stack. The root is pushed by the caller.
87
+ */
88
+ static struct cgroup * cgroup_rstat_push_children (struct cgroup * head ,
89
+ struct cgroup * child , int cpu )
90
+ {
91
+ struct cgroup * chead = child ; /* Head of child cgroup level */
92
+ struct cgroup * ghead = NULL ; /* Head of grandchild cgroup level */
93
+ struct cgroup * parent , * grandchild ;
94
+ struct cgroup_rstat_cpu * crstatc ;
95
+
96
+ child -> rstat_flush_next = NULL ;
97
+
98
+ next_level :
99
+ while (chead ) {
100
+ child = chead ;
101
+ chead = child -> rstat_flush_next ;
102
+ parent = cgroup_parent (child );
103
+
104
+ /* updated_next is parent cgroup terminated */
105
+ while (child != parent ) {
106
+ child -> rstat_flush_next = head ;
107
+ head = child ;
108
+ crstatc = cgroup_rstat_cpu (child , cpu );
109
+ grandchild = crstatc -> updated_children ;
110
+ if (grandchild != child ) {
111
+ /* Push the grand child to the next level */
112
+ crstatc -> updated_children = child ;
113
+ grandchild -> rstat_flush_next = ghead ;
114
+ ghead = grandchild ;
115
+ }
116
+ child = crstatc -> updated_next ;
117
+ crstatc -> updated_next = NULL ;
118
+ }
119
+ }
120
+
121
+ if (ghead ) {
122
+ chead = ghead ;
123
+ ghead = NULL ;
124
+ goto next_level ;
125
+ }
126
+ return head ;
127
+ }
128
+
129
+ /**
130
+ * cgroup_rstat_updated_list - return a list of updated cgroups to be flushed
131
+ * @root: root of the cgroup subtree to traverse
132
+ * @cpu: target cpu
133
+ * Return: A singly linked list of cgroups to be flushed
134
+ *
135
+ * Walks the updated rstat_cpu tree on @cpu from @root. During traversal,
136
+ * each returned cgroup is unlinked from the updated tree.
86
137
*
87
138
* The only ordering guarantee is that, for a parent and a child pair
88
- * covered by a given traversal, if a child is visited, its parent is
89
- * guaranteed to be visited afterwards.
139
+ * covered by a given traversal, the child is before its parent in
140
+ * the list.
141
+ *
142
+ * Note that updated_children is self terminated and points to a list of
143
+ * child cgroups if not empty. Whereas updated_next is like a sibling link
144
+ * within the children list and terminated by the parent cgroup. An exception
145
+ * here is the cgroup root whose updated_next can be self terminated.
90
146
*/
91
- static struct cgroup * cgroup_rstat_cpu_pop_updated (struct cgroup * pos ,
92
- struct cgroup * root , int cpu )
147
+ static struct cgroup * cgroup_rstat_updated_list (struct cgroup * root , int cpu )
93
148
{
94
- struct cgroup_rstat_cpu * rstatc ;
95
- struct cgroup * parent ;
96
-
97
- if (pos == root )
98
- return NULL ;
149
+ raw_spinlock_t * cpu_lock = per_cpu_ptr (& cgroup_rstat_cpu_lock , cpu );
150
+ struct cgroup_rstat_cpu * rstatc = cgroup_rstat_cpu (root , cpu );
151
+ struct cgroup * head = NULL , * parent , * child ;
152
+ unsigned long flags ;
99
153
100
154
/*
101
- * We're gonna walk down to the first leaf and visit/remove it. We
102
- * can pick whatever unvisited node as the starting point.
155
+ * The _irqsave() is needed because cgroup_rstat_lock is
156
+ * spinlock_t which is a sleeping lock on PREEMPT_RT. Acquiring
157
+ * this lock with the _irq() suffix only disables interrupts on
158
+ * a non-PREEMPT_RT kernel. The raw_spinlock_t below disables
159
+ * interrupts on both configurations. The _irqsave() ensures
160
+ * that interrupts are always disabled and later restored.
103
161
*/
104
- if (!pos ) {
105
- pos = root ;
106
- /* return NULL if this subtree is not on-list */
107
- if (!cgroup_rstat_cpu (pos , cpu )-> updated_next )
108
- return NULL ;
109
- } else {
110
- pos = cgroup_parent (pos );
111
- }
162
+ raw_spin_lock_irqsave (cpu_lock , flags );
112
163
113
- /* walk down to the first leaf */
114
- while (true) {
115
- rstatc = cgroup_rstat_cpu (pos , cpu );
116
- if (rstatc -> updated_children == pos )
117
- break ;
118
- pos = rstatc -> updated_children ;
119
- }
164
+ /* Return NULL if this subtree is not on-list */
165
+ if (!rstatc -> updated_next )
166
+ goto unlock_ret ;
120
167
121
168
/*
122
- * Unlink @pos from the tree. As the updated_children list is
169
+ * Unlink @root from its parent. As the updated_children list is
123
170
* singly linked, we have to walk it to find the removal point.
124
- * However, due to the way we traverse, @pos will be the first
125
- * child in most cases. The only exception is @root.
126
171
*/
127
- parent = cgroup_parent (pos );
172
+ parent = cgroup_parent (root );
128
173
if (parent ) {
129
174
struct cgroup_rstat_cpu * prstatc ;
130
175
struct cgroup * * nextp ;
131
176
132
177
prstatc = cgroup_rstat_cpu (parent , cpu );
133
178
nextp = & prstatc -> updated_children ;
134
- while (* nextp != pos ) {
179
+ while (* nextp != root ) {
135
180
struct cgroup_rstat_cpu * nrstatc ;
136
181
137
182
nrstatc = cgroup_rstat_cpu (* nextp , cpu );
@@ -142,31 +187,15 @@ static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos,
142
187
}
143
188
144
189
rstatc -> updated_next = NULL ;
145
- return pos ;
146
- }
147
190
148
- /* Return a list of updated cgroups to be flushed */
149
- static struct cgroup * cgroup_rstat_updated_list (struct cgroup * root , int cpu )
150
- {
151
- raw_spinlock_t * cpu_lock = per_cpu_ptr (& cgroup_rstat_cpu_lock , cpu );
152
- struct cgroup * head , * tail , * next ;
153
- unsigned long flags ;
154
-
155
- /*
156
- * The _irqsave() is needed because cgroup_rstat_lock is
157
- * spinlock_t which is a sleeping lock on PREEMPT_RT. Acquiring
158
- * this lock with the _irq() suffix only disables interrupts on
159
- * a non-PREEMPT_RT kernel. The raw_spinlock_t below disables
160
- * interrupts on both configurations. The _irqsave() ensures
161
- * that interrupts are always disabled and later restored.
162
- */
163
- raw_spin_lock_irqsave (cpu_lock , flags );
164
- head = tail = cgroup_rstat_cpu_pop_updated (NULL , root , cpu );
165
- while (tail ) {
166
- next = cgroup_rstat_cpu_pop_updated (tail , root , cpu );
167
- tail -> rstat_flush_next = next ;
168
- tail = next ;
169
- }
191
+ /* Push @root to the list first before pushing the children */
192
+ head = root ;
193
+ root -> rstat_flush_next = NULL ;
194
+ child = rstatc -> updated_children ;
195
+ rstatc -> updated_children = root ;
196
+ if (child != root )
197
+ head = cgroup_rstat_push_children (head , child , cpu );
198
+ unlock_ret :
170
199
raw_spin_unlock_irqrestore (cpu_lock , flags );
171
200
return head ;
172
201
}
0 commit comments