Skip to content

Commit 5421e25

Browse files
kkdwvdKernel Patches Daemon
authored andcommitted
selftests/bpf: Add lock wait time stats to rqspinlock stress test
Add statistics per-CPU broken down by context and various timing windows for the time taken to acquire an rqspinlock. Cases where all acquisitions fit into the 10ms window are skipped from printing, otherwise the full breakdown is displayed when printing the summary. This allows capturing precisely the number of times outlier attempts happened for a given lock in a given context. A critical detail is that time is captured regardless of success or failure, which is important to capture events for failed but long waiting timeout attempts. Output: [ 64.279459] rqspinlock acquisition latency histogram (ms): [ 64.279472] cpu1: total 528426 (normal 526559, nmi 1867) [ 64.279477] 0-1ms: total 524697 (normal 524697, nmi 0) [ 64.279480] 2-2ms: total 3652 (normal 1811, nmi 1841) [ 64.279482] 3-3ms: total 66 (normal 47, nmi 19) [ 64.279485] 4-4ms: total 2 (normal 1, nmi 1) [ 64.279487] 5-5ms: total 1 (normal 1, nmi 0) [ 64.279489] 6-6ms: total 1 (normal 0, nmi 1) [ 64.279490] 101-150ms: total 1 (normal 0, nmi 1) [ 64.279492] >= 251ms: total 6 (normal 2, nmi 4) ... Signed-off-by: Kumar Kartikeya Dwivedi <[email protected]>
1 parent bec9457 commit 5421e25

File tree

1 file changed

+104
-0
lines changed

1 file changed

+104
-0
lines changed

tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <linux/delay.h>
66
#include <linux/module.h>
77
#include <linux/prandom.h>
8+
#include <linux/ktime.h>
89
#include <asm/rqspinlock.h>
910
#include <linux/perf_event.h>
1011
#include <linux/kthread.h>
@@ -24,6 +25,21 @@ static rqspinlock_t lock_a;
2425
static rqspinlock_t lock_b;
2526
static rqspinlock_t lock_c;
2627

28+
#define RQSL_SLOW_THRESHOLD_MS 10
29+
static const unsigned int rqsl_hist_ms[] = {
30+
1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
31+
12, 14, 16, 18, 20, 25, 30, 40, 50, 75,
32+
100, 150, 200, 250, 1000,
33+
};
34+
#define RQSL_NR_HIST_BUCKETS ARRAY_SIZE(rqsl_hist_ms)
35+
36+
struct rqsl_cpu_hist {
37+
atomic64_t normal[RQSL_NR_HIST_BUCKETS];
38+
atomic64_t nmi[RQSL_NR_HIST_BUCKETS];
39+
};
40+
41+
static DEFINE_PER_CPU(struct rqsl_cpu_hist, rqsl_cpu_hists);
42+
2743
enum rqsl_mode {
2844
RQSL_MODE_AA = 0,
2945
RQSL_MODE_ABBA,
@@ -79,10 +95,33 @@ static struct rqsl_lock_pair rqsl_get_lock_pair(int cpu)
7995
}
8096
}
8197

98+
static u32 rqsl_hist_bucket_idx(u32 delta_ms)
99+
{
100+
int i;
101+
102+
for (i = 0; i < RQSL_NR_HIST_BUCKETS; i++) {
103+
if (delta_ms <= rqsl_hist_ms[i])
104+
return i;
105+
}
106+
107+
return RQSL_NR_HIST_BUCKETS - 1;
108+
}
109+
110+
static void rqsl_record_lock_time(u64 delta_ns, bool is_nmi)
111+
{
112+
struct rqsl_cpu_hist *hist = this_cpu_ptr(&rqsl_cpu_hists);
113+
u32 delta_ms = DIV_ROUND_UP_ULL(delta_ns, NSEC_PER_MSEC);
114+
u32 bucket = rqsl_hist_bucket_idx(delta_ms);
115+
atomic64_t *buckets = is_nmi ? hist->nmi : hist->normal;
116+
117+
atomic64_inc(&buckets[bucket]);
118+
}
119+
82120
static int rqspinlock_worker_fn(void *arg)
83121
{
84122
int cpu = smp_processor_id();
85123
unsigned long flags;
124+
u64 start_ns;
86125
int ret;
87126

88127
if (cpu) {
@@ -96,7 +135,9 @@ static int rqspinlock_worker_fn(void *arg)
96135
msleep(1000);
97136
continue;
98137
}
138+
start_ns = ktime_get_mono_fast_ns();
99139
ret = raw_res_spin_lock_irqsave(worker_lock, flags);
140+
rqsl_record_lock_time(ktime_get_mono_fast_ns() - start_ns, false);
100141
mdelay(20);
101142
if (!ret)
102143
raw_res_spin_unlock_irqrestore(worker_lock, flags);
@@ -130,13 +171,16 @@ static void nmi_cb(struct perf_event *event, struct perf_sample_data *data,
130171
struct rqsl_lock_pair locks;
131172
int cpu = smp_processor_id();
132173
unsigned long flags;
174+
u64 start_ns;
133175
int ret;
134176

135177
if (!cpu || READ_ONCE(pause))
136178
return;
137179

138180
locks = rqsl_get_lock_pair(cpu);
181+
start_ns = ktime_get_mono_fast_ns();
139182
ret = raw_res_spin_lock_irqsave(locks.nmi_lock, flags);
183+
rqsl_record_lock_time(ktime_get_mono_fast_ns() - start_ns, true);
140184

141185
mdelay(10);
142186

@@ -235,10 +279,70 @@ static int bpf_test_rqspinlock_init(void)
235279

236280
module_init(bpf_test_rqspinlock_init);
237281

282+
static void rqsl_print_histograms(void)
283+
{
284+
int cpu, i;
285+
286+
pr_err("rqspinlock acquisition latency histogram (ms):\n");
287+
288+
for_each_online_cpu(cpu) {
289+
struct rqsl_cpu_hist *hist = per_cpu_ptr(&rqsl_cpu_hists, cpu);
290+
u64 norm_counts[RQSL_NR_HIST_BUCKETS];
291+
u64 nmi_counts[RQSL_NR_HIST_BUCKETS];
292+
u64 total_counts[RQSL_NR_HIST_BUCKETS];
293+
u64 norm_total = 0, nmi_total = 0, total = 0;
294+
bool has_slow = false;
295+
296+
for (i = 0; i < RQSL_NR_HIST_BUCKETS; i++) {
297+
norm_counts[i] = atomic64_read(&hist->normal[i]);
298+
nmi_counts[i] = atomic64_read(&hist->nmi[i]);
299+
total_counts[i] = norm_counts[i] + nmi_counts[i];
300+
norm_total += norm_counts[i];
301+
nmi_total += nmi_counts[i];
302+
total += total_counts[i];
303+
if (rqsl_hist_ms[i] > RQSL_SLOW_THRESHOLD_MS &&
304+
total_counts[i])
305+
has_slow = true;
306+
}
307+
308+
if (!total)
309+
continue;
310+
311+
if (!has_slow) {
312+
pr_err(" cpu%d: total %llu (normal %llu, nmi %llu), all within 0-%ums\n",
313+
cpu, total, norm_total, nmi_total, RQSL_SLOW_THRESHOLD_MS);
314+
continue;
315+
}
316+
317+
pr_err(" cpu%d: total %llu (normal %llu, nmi %llu)\n",
318+
cpu, total, norm_total, nmi_total);
319+
for (i = 0; i < RQSL_NR_HIST_BUCKETS; i++) {
320+
unsigned int start_ms;
321+
322+
if (!total_counts[i])
323+
continue;
324+
325+
start_ms = i == 0 ? 0 : rqsl_hist_ms[i - 1] + 1;
326+
if (i == RQSL_NR_HIST_BUCKETS - 1) {
327+
pr_err(" >= %ums: total %llu (normal %llu, nmi %llu)\n",
328+
start_ms, total_counts[i],
329+
norm_counts[i], nmi_counts[i]);
330+
} else {
331+
pr_err(" %u-%ums: total %llu (normal %llu, nmi %llu)\n",
332+
start_ms, rqsl_hist_ms[i],
333+
total_counts[i],
334+
norm_counts[i], nmi_counts[i]);
335+
}
336+
}
337+
}
338+
}
339+
238340
static void bpf_test_rqspinlock_exit(void)
239341
{
342+
WRITE_ONCE(pause, 1);
240343
free_rqsl_threads();
241344
free_rqsl_evts();
345+
rqsl_print_histograms();
242346
}
243347

244348
module_exit(bpf_test_rqspinlock_exit);

0 commit comments

Comments
 (0)