Skip to content

Commit 21c38a3

Browse files
netoptimizerhtejun
authored andcommitted
cgroup/rstat: add cgroup_rstat_cpu_lock helpers and tracepoints
This closely resembles helpers added for the global cgroup_rstat_lock in commit fc29e04 ("cgroup/rstat: add cgroup_rstat_lock helpers and tracepoints"). This is for the per CPU lock cgroup_rstat_cpu_lock. Based on production workloads, we observe the fast-path "update" function cgroup_rstat_updated() is invoked around 3 million times per sec, while the "flush" function cgroup_rstat_flush_locked(), walking each possible CPU, can see periodic spikes of 700 invocations/sec. For this reason, the tracepoints are split into normal and fastpath versions for this per-CPU lock. Making it feasible for production to continuously monitor the non-fastpath tracepoint to detect lock contention issues. The reason for monitoring is that lock disables IRQs which can disturb e.g. softirq processing on the local CPUs involved. When the global cgroup_rstat_lock stops disabling IRQs (e.g converted to a mutex), this per CPU lock becomes the next bottleneck that can introduce latency variations. A practical bpftrace script for monitoring contention latency: bpftrace -e ' tracepoint:cgroup:cgroup_rstat_cpu_lock_contended { @start[tid]=nsecs; @cnt[probe]=count()} tracepoint:cgroup:cgroup_rstat_cpu_locked { if (args->contended) { @wait_ns=hist(nsecs-@start[tid]); delete(@start[tid]);} @cnt[probe]=count()} interval:s:1 {time("%H:%M:%S "); print(@wait_ns); print(@cnt); clear(@cnt);}' Signed-off-by: Jesper Dangaard Brouer <[email protected]> Signed-off-by: Tejun Heo <[email protected]>
1 parent c1457d9 commit 21c38a3

File tree

2 files changed

+108
-18
lines changed

2 files changed

+108
-18
lines changed

include/trace/events/cgroup.h

Lines changed: 50 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -206,31 +206,32 @@ DEFINE_EVENT(cgroup_event, cgroup_notify_frozen,
206206

207207
DECLARE_EVENT_CLASS(cgroup_rstat,
208208

209-
TP_PROTO(struct cgroup *cgrp, int cpu_in_loop, bool contended),
209+
TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
210210

211-
TP_ARGS(cgrp, cpu_in_loop, contended),
211+
TP_ARGS(cgrp, cpu, contended),
212212

213213
TP_STRUCT__entry(
214214
__field( int, root )
215215
__field( int, level )
216216
__field( u64, id )
217-
__field( int, cpu_in_loop )
217+
__field( int, cpu )
218218
__field( bool, contended )
219219
),
220220

221221
TP_fast_assign(
222222
__entry->root = cgrp->root->hierarchy_id;
223223
__entry->id = cgroup_id(cgrp);
224224
__entry->level = cgrp->level;
225-
__entry->cpu_in_loop = cpu_in_loop;
225+
__entry->cpu = cpu;
226226
__entry->contended = contended;
227227
),
228228

229-
TP_printk("root=%d id=%llu level=%d cpu_in_loop=%d lock contended:%d",
229+
TP_printk("root=%d id=%llu level=%d cpu=%d lock contended:%d",
230230
__entry->root, __entry->id, __entry->level,
231-
__entry->cpu_in_loop, __entry->contended)
231+
__entry->cpu, __entry->contended)
232232
);
233233

234+
/* Related to global: cgroup_rstat_lock */
234235
DEFINE_EVENT(cgroup_rstat, cgroup_rstat_lock_contended,
235236

236237
TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
@@ -252,6 +253,49 @@ DEFINE_EVENT(cgroup_rstat, cgroup_rstat_unlock,
252253
TP_ARGS(cgrp, cpu, contended)
253254
);
254255

256+
/* Related to per CPU: cgroup_rstat_cpu_lock */
257+
DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_lock_contended,
258+
259+
TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
260+
261+
TP_ARGS(cgrp, cpu, contended)
262+
);
263+
264+
DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_lock_contended_fastpath,
265+
266+
TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
267+
268+
TP_ARGS(cgrp, cpu, contended)
269+
);
270+
271+
DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_locked,
272+
273+
TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
274+
275+
TP_ARGS(cgrp, cpu, contended)
276+
);
277+
278+
DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_locked_fastpath,
279+
280+
TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
281+
282+
TP_ARGS(cgrp, cpu, contended)
283+
);
284+
285+
DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_unlock,
286+
287+
TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
288+
289+
TP_ARGS(cgrp, cpu, contended)
290+
);
291+
292+
DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_unlock_fastpath,
293+
294+
TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
295+
296+
TP_ARGS(cgrp, cpu, contended)
297+
);
298+
255299
#endif /* _TRACE_CGROUP_H */
256300

257301
/* This part must be outside protection */

kernel/cgroup/rstat.c

Lines changed: 58 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,60 @@ static struct cgroup_rstat_cpu *cgroup_rstat_cpu(struct cgroup *cgrp, int cpu)
1919
return per_cpu_ptr(cgrp->rstat_cpu, cpu);
2020
}
2121

22+
/*
23+
* Helper functions for rstat per CPU lock (cgroup_rstat_cpu_lock).
24+
*
25+
* This makes it easier to diagnose locking issues and contention in
26+
* production environments. The parameter @fast_path determine the
27+
* tracepoints being added, allowing us to diagnose "flush" related
28+
* operations without handling high-frequency fast-path "update" events.
29+
*/
30+
static __always_inline
31+
unsigned long _cgroup_rstat_cpu_lock(raw_spinlock_t *cpu_lock, int cpu,
32+
struct cgroup *cgrp, const bool fast_path)
33+
{
34+
unsigned long flags;
35+
bool contended;
36+
37+
/*
38+
* The _irqsave() is needed because cgroup_rstat_lock is
39+
* spinlock_t which is a sleeping lock on PREEMPT_RT. Acquiring
40+
* this lock with the _irq() suffix only disables interrupts on
41+
* a non-PREEMPT_RT kernel. The raw_spinlock_t below disables
42+
* interrupts on both configurations. The _irqsave() ensures
43+
* that interrupts are always disabled and later restored.
44+
*/
45+
contended = !raw_spin_trylock_irqsave(cpu_lock, flags);
46+
if (contended) {
47+
if (fast_path)
48+
trace_cgroup_rstat_cpu_lock_contended_fastpath(cgrp, cpu, contended);
49+
else
50+
trace_cgroup_rstat_cpu_lock_contended(cgrp, cpu, contended);
51+
52+
raw_spin_lock_irqsave(cpu_lock, flags);
53+
}
54+
55+
if (fast_path)
56+
trace_cgroup_rstat_cpu_locked_fastpath(cgrp, cpu, contended);
57+
else
58+
trace_cgroup_rstat_cpu_locked(cgrp, cpu, contended);
59+
60+
return flags;
61+
}
62+
63+
static __always_inline
64+
void _cgroup_rstat_cpu_unlock(raw_spinlock_t *cpu_lock, int cpu,
65+
struct cgroup *cgrp, unsigned long flags,
66+
const bool fast_path)
67+
{
68+
if (fast_path)
69+
trace_cgroup_rstat_cpu_unlock_fastpath(cgrp, cpu, false);
70+
else
71+
trace_cgroup_rstat_cpu_unlock(cgrp, cpu, false);
72+
73+
raw_spin_unlock_irqrestore(cpu_lock, flags);
74+
}
75+
2276
/**
2377
* cgroup_rstat_updated - keep track of updated rstat_cpu
2478
* @cgrp: target cgroup
@@ -44,7 +98,7 @@ __bpf_kfunc void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
4498
if (data_race(cgroup_rstat_cpu(cgrp, cpu)->updated_next))
4599
return;
46100

47-
raw_spin_lock_irqsave(cpu_lock, flags);
101+
flags = _cgroup_rstat_cpu_lock(cpu_lock, cpu, cgrp, true);
48102

49103
/* put @cgrp and all ancestors on the corresponding updated lists */
50104
while (true) {
@@ -72,7 +126,7 @@ __bpf_kfunc void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
72126
cgrp = parent;
73127
}
74128

75-
raw_spin_unlock_irqrestore(cpu_lock, flags);
129+
_cgroup_rstat_cpu_unlock(cpu_lock, cpu, cgrp, flags, true);
76130
}
77131

78132
/**
@@ -153,15 +207,7 @@ static struct cgroup *cgroup_rstat_updated_list(struct cgroup *root, int cpu)
153207
struct cgroup *head = NULL, *parent, *child;
154208
unsigned long flags;
155209

156-
/*
157-
* The _irqsave() is needed because cgroup_rstat_lock is
158-
* spinlock_t which is a sleeping lock on PREEMPT_RT. Acquiring
159-
* this lock with the _irq() suffix only disables interrupts on
160-
* a non-PREEMPT_RT kernel. The raw_spinlock_t below disables
161-
* interrupts on both configurations. The _irqsave() ensures
162-
* that interrupts are always disabled and later restored.
163-
*/
164-
raw_spin_lock_irqsave(cpu_lock, flags);
210+
flags = _cgroup_rstat_cpu_lock(cpu_lock, cpu, root, false);
165211

166212
/* Return NULL if this subtree is not on-list */
167213
if (!rstatc->updated_next)
@@ -198,7 +244,7 @@ static struct cgroup *cgroup_rstat_updated_list(struct cgroup *root, int cpu)
198244
if (child != root)
199245
head = cgroup_rstat_push_children(head, child, cpu);
200246
unlock_ret:
201-
raw_spin_unlock_irqrestore(cpu_lock, flags);
247+
_cgroup_rstat_cpu_unlock(cpu_lock, cpu, root, flags, false);
202248
return head;
203249
}
204250

0 commit comments

Comments
 (0)