Skip to content

Commit 5262cb2

Browse files
author
Alexei Starovoitov
committed
Merge branch 'general-enhancements-to-rqspinlock-stress-test'
Kumar Kartikeya Dwivedi says: ==================== General enhancements to rqspinlock stress test Three enchancements, details in commit messages. First, the CPU requirements are 2 for AA, 3 for ABBA, and 4 for ABBCCA, hence relax the check during module initialization. Second, add a per-CPU histogram to capture lock acquisition times to record which buckets these acquisitions fall into for the normal task context and NMI context. Anything below 10ms is not printed in detail, but above that displays the full breakdown for each context. Finally, make the delay of the NMI and task contexts configurable, set to 10 and 20 ms respectively by default. ==================== Link: https://patch.msgid.link/[email protected] Signed-off-by: Alexei Starovoitov <[email protected]>
2 parents 8f6ddc0 + 88337b5 commit 5262cb2

File tree

1 file changed

+117
-3
lines changed

1 file changed

+117
-3
lines changed

tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c

Lines changed: 117 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <linux/delay.h>
66
#include <linux/module.h>
77
#include <linux/prandom.h>
8+
#include <linux/ktime.h>
89
#include <asm/rqspinlock.h>
910
#include <linux/perf_event.h>
1011
#include <linux/kthread.h>
@@ -24,6 +25,21 @@ static rqspinlock_t lock_a;
2425
static rqspinlock_t lock_b;
2526
static rqspinlock_t lock_c;
2627

28+
#define RQSL_SLOW_THRESHOLD_MS 10
29+
static const unsigned int rqsl_hist_ms[] = {
30+
1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
31+
12, 14, 16, 18, 20, 25, 30, 40, 50, 75,
32+
100, 150, 200, 250, 1000,
33+
};
34+
#define RQSL_NR_HIST_BUCKETS ARRAY_SIZE(rqsl_hist_ms)
35+
36+
struct rqsl_cpu_hist {
37+
atomic64_t normal[RQSL_NR_HIST_BUCKETS];
38+
atomic64_t nmi[RQSL_NR_HIST_BUCKETS];
39+
};
40+
41+
static DEFINE_PER_CPU(struct rqsl_cpu_hist, rqsl_cpu_hists);
42+
2743
enum rqsl_mode {
2844
RQSL_MODE_AA = 0,
2945
RQSL_MODE_ABBA,
@@ -35,6 +51,16 @@ module_param(test_mode, int, 0644);
3551
MODULE_PARM_DESC(test_mode,
3652
"rqspinlock test mode: 0 = AA, 1 = ABBA, 2 = ABBCCA");
3753

54+
static int normal_delay = 20;
55+
module_param(normal_delay, int, 0644);
56+
MODULE_PARM_DESC(normal_delay,
57+
"rqspinlock critical section length for normal context (20ms default)");
58+
59+
static int nmi_delay = 10;
60+
module_param(nmi_delay, int, 0644);
61+
MODULE_PARM_DESC(nmi_delay,
62+
"rqspinlock critical section length for NMI context (10ms default)");
63+
3864
static struct perf_event **rqsl_evts;
3965
static int rqsl_nevts;
4066

@@ -79,10 +105,33 @@ static struct rqsl_lock_pair rqsl_get_lock_pair(int cpu)
79105
}
80106
}
81107

108+
static u32 rqsl_hist_bucket_idx(u32 delta_ms)
109+
{
110+
int i;
111+
112+
for (i = 0; i < RQSL_NR_HIST_BUCKETS; i++) {
113+
if (delta_ms <= rqsl_hist_ms[i])
114+
return i;
115+
}
116+
117+
return RQSL_NR_HIST_BUCKETS - 1;
118+
}
119+
120+
static void rqsl_record_lock_time(u64 delta_ns, bool is_nmi)
121+
{
122+
struct rqsl_cpu_hist *hist = this_cpu_ptr(&rqsl_cpu_hists);
123+
u32 delta_ms = DIV_ROUND_UP_ULL(delta_ns, NSEC_PER_MSEC);
124+
u32 bucket = rqsl_hist_bucket_idx(delta_ms);
125+
atomic64_t *buckets = is_nmi ? hist->nmi : hist->normal;
126+
127+
atomic64_inc(&buckets[bucket]);
128+
}
129+
82130
static int rqspinlock_worker_fn(void *arg)
83131
{
84132
int cpu = smp_processor_id();
85133
unsigned long flags;
134+
u64 start_ns;
86135
int ret;
87136

88137
if (cpu) {
@@ -96,8 +145,10 @@ static int rqspinlock_worker_fn(void *arg)
96145
msleep(1000);
97146
continue;
98147
}
148+
start_ns = ktime_get_mono_fast_ns();
99149
ret = raw_res_spin_lock_irqsave(worker_lock, flags);
100-
mdelay(20);
150+
rqsl_record_lock_time(ktime_get_mono_fast_ns() - start_ns, false);
151+
mdelay(normal_delay);
101152
if (!ret)
102153
raw_res_spin_unlock_irqrestore(worker_lock, flags);
103154
cpu_relax();
@@ -130,15 +181,18 @@ static void nmi_cb(struct perf_event *event, struct perf_sample_data *data,
130181
struct rqsl_lock_pair locks;
131182
int cpu = smp_processor_id();
132183
unsigned long flags;
184+
u64 start_ns;
133185
int ret;
134186

135187
if (!cpu || READ_ONCE(pause))
136188
return;
137189

138190
locks = rqsl_get_lock_pair(cpu);
191+
start_ns = ktime_get_mono_fast_ns();
139192
ret = raw_res_spin_lock_irqsave(locks.nmi_lock, flags);
193+
rqsl_record_lock_time(ktime_get_mono_fast_ns() - start_ns, true);
140194

141-
mdelay(10);
195+
mdelay(nmi_delay);
142196

143197
if (!ret)
144198
raw_res_spin_unlock_irqrestore(locks.nmi_lock, flags);
@@ -182,7 +236,7 @@ static int bpf_test_rqspinlock_init(void)
182236

183237
pr_err("Mode = %s\n", rqsl_mode_names[test_mode]);
184238

185-
if (ncpus < 3)
239+
if (ncpus < test_mode + 2)
186240
return -ENOTSUPP;
187241

188242
raw_res_spin_lock_init(&lock_a);
@@ -235,10 +289,70 @@ static int bpf_test_rqspinlock_init(void)
235289

236290
module_init(bpf_test_rqspinlock_init);
237291

292+
static void rqsl_print_histograms(void)
293+
{
294+
int cpu, i;
295+
296+
pr_err("rqspinlock acquisition latency histogram (ms):\n");
297+
298+
for_each_online_cpu(cpu) {
299+
struct rqsl_cpu_hist *hist = per_cpu_ptr(&rqsl_cpu_hists, cpu);
300+
u64 norm_counts[RQSL_NR_HIST_BUCKETS];
301+
u64 nmi_counts[RQSL_NR_HIST_BUCKETS];
302+
u64 total_counts[RQSL_NR_HIST_BUCKETS];
303+
u64 norm_total = 0, nmi_total = 0, total = 0;
304+
bool has_slow = false;
305+
306+
for (i = 0; i < RQSL_NR_HIST_BUCKETS; i++) {
307+
norm_counts[i] = atomic64_read(&hist->normal[i]);
308+
nmi_counts[i] = atomic64_read(&hist->nmi[i]);
309+
total_counts[i] = norm_counts[i] + nmi_counts[i];
310+
norm_total += norm_counts[i];
311+
nmi_total += nmi_counts[i];
312+
total += total_counts[i];
313+
if (rqsl_hist_ms[i] > RQSL_SLOW_THRESHOLD_MS &&
314+
total_counts[i])
315+
has_slow = true;
316+
}
317+
318+
if (!total)
319+
continue;
320+
321+
if (!has_slow) {
322+
pr_err(" cpu%d: total %llu (normal %llu, nmi %llu), all within 0-%ums\n",
323+
cpu, total, norm_total, nmi_total, RQSL_SLOW_THRESHOLD_MS);
324+
continue;
325+
}
326+
327+
pr_err(" cpu%d: total %llu (normal %llu, nmi %llu)\n",
328+
cpu, total, norm_total, nmi_total);
329+
for (i = 0; i < RQSL_NR_HIST_BUCKETS; i++) {
330+
unsigned int start_ms;
331+
332+
if (!total_counts[i])
333+
continue;
334+
335+
start_ms = i == 0 ? 0 : rqsl_hist_ms[i - 1] + 1;
336+
if (i == RQSL_NR_HIST_BUCKETS - 1) {
337+
pr_err(" >= %ums: total %llu (normal %llu, nmi %llu)\n",
338+
start_ms, total_counts[i],
339+
norm_counts[i], nmi_counts[i]);
340+
} else {
341+
pr_err(" %u-%ums: total %llu (normal %llu, nmi %llu)\n",
342+
start_ms, rqsl_hist_ms[i],
343+
total_counts[i],
344+
norm_counts[i], nmi_counts[i]);
345+
}
346+
}
347+
}
348+
}
349+
238350
static void bpf_test_rqspinlock_exit(void)
239351
{
352+
WRITE_ONCE(pause, 1);
240353
free_rqsl_threads();
241354
free_rqsl_evts();
355+
rqsl_print_histograms();
242356
}
243357

244358
module_exit(bpf_test_rqspinlock_exit);

0 commit comments

Comments
 (0)