55#include <linux/delay.h>
66#include <linux/module.h>
77#include <linux/prandom.h>
8+ #include <linux/ktime.h>
89#include <asm/rqspinlock.h>
910#include <linux/perf_event.h>
1011#include <linux/kthread.h>
@@ -24,6 +25,21 @@ static rqspinlock_t lock_a;
2425static rqspinlock_t lock_b ;
2526static rqspinlock_t lock_c ;
2627
28+ #define RQSL_SLOW_THRESHOLD_MS 10
29+ static const unsigned int rqsl_hist_ms [] = {
30+ 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 ,
31+ 12 , 14 , 16 , 18 , 20 , 25 , 30 , 40 , 50 , 75 ,
32+ 100 , 150 , 200 , 250 , 1000 ,
33+ };
34+ #define RQSL_NR_HIST_BUCKETS ARRAY_SIZE(rqsl_hist_ms)
35+
36+ struct rqsl_cpu_hist {
37+ atomic64_t normal [RQSL_NR_HIST_BUCKETS ];
38+ atomic64_t nmi [RQSL_NR_HIST_BUCKETS ];
39+ };
40+
41+ static DEFINE_PER_CPU (struct rqsl_cpu_hist , rqsl_cpu_hists ) ;
42+
2743enum rqsl_mode {
2844 RQSL_MODE_AA = 0 ,
2945 RQSL_MODE_ABBA ,
@@ -35,6 +51,16 @@ module_param(test_mode, int, 0644);
3551MODULE_PARM_DESC (test_mode ,
3652 "rqspinlock test mode: 0 = AA, 1 = ABBA, 2 = ABBCCA" );
3753
54+ static int normal_delay = 20 ;
55+ module_param (normal_delay , int , 0644 );
56+ MODULE_PARM_DESC (normal_delay ,
57+ "rqspinlock critical section length for normal context (20ms default)" );
58+
59+ static int nmi_delay = 10 ;
60+ module_param (nmi_delay , int , 0644 );
61+ MODULE_PARM_DESC (nmi_delay ,
62+ "rqspinlock critical section length for NMI context (10ms default)" );
63+
3864static struct perf_event * * rqsl_evts ;
3965static int rqsl_nevts ;
4066
@@ -79,10 +105,33 @@ static struct rqsl_lock_pair rqsl_get_lock_pair(int cpu)
79105 }
80106}
81107
108+ static u32 rqsl_hist_bucket_idx (u32 delta_ms )
109+ {
110+ int i ;
111+
112+ for (i = 0 ; i < RQSL_NR_HIST_BUCKETS ; i ++ ) {
113+ if (delta_ms <= rqsl_hist_ms [i ])
114+ return i ;
115+ }
116+
117+ return RQSL_NR_HIST_BUCKETS - 1 ;
118+ }
119+
120+ static void rqsl_record_lock_time (u64 delta_ns , bool is_nmi )
121+ {
122+ struct rqsl_cpu_hist * hist = this_cpu_ptr (& rqsl_cpu_hists );
123+ u32 delta_ms = DIV_ROUND_UP_ULL (delta_ns , NSEC_PER_MSEC );
124+ u32 bucket = rqsl_hist_bucket_idx (delta_ms );
125+ atomic64_t * buckets = is_nmi ? hist -> nmi : hist -> normal ;
126+
127+ atomic64_inc (& buckets [bucket ]);
128+ }
129+
82130static int rqspinlock_worker_fn (void * arg )
83131{
84132 int cpu = smp_processor_id ();
85133 unsigned long flags ;
134+ u64 start_ns ;
86135 int ret ;
87136
88137 if (cpu ) {
@@ -96,8 +145,10 @@ static int rqspinlock_worker_fn(void *arg)
96145 msleep (1000 );
97146 continue ;
98147 }
148+ start_ns = ktime_get_mono_fast_ns ();
99149 ret = raw_res_spin_lock_irqsave (worker_lock , flags );
100- mdelay (20 );
150+ rqsl_record_lock_time (ktime_get_mono_fast_ns () - start_ns , false);
151+ mdelay (normal_delay );
101152 if (!ret )
102153 raw_res_spin_unlock_irqrestore (worker_lock , flags );
103154 cpu_relax ();
@@ -130,15 +181,18 @@ static void nmi_cb(struct perf_event *event, struct perf_sample_data *data,
130181 struct rqsl_lock_pair locks ;
131182 int cpu = smp_processor_id ();
132183 unsigned long flags ;
184+ u64 start_ns ;
133185 int ret ;
134186
135187 if (!cpu || READ_ONCE (pause ))
136188 return ;
137189
138190 locks = rqsl_get_lock_pair (cpu );
191+ start_ns = ktime_get_mono_fast_ns ();
139192 ret = raw_res_spin_lock_irqsave (locks .nmi_lock , flags );
193+ rqsl_record_lock_time (ktime_get_mono_fast_ns () - start_ns , true);
140194
141- mdelay (10 );
195+ mdelay (nmi_delay );
142196
143197 if (!ret )
144198 raw_res_spin_unlock_irqrestore (locks .nmi_lock , flags );
@@ -182,7 +236,7 @@ static int bpf_test_rqspinlock_init(void)
182236
183237 pr_err ("Mode = %s\n" , rqsl_mode_names [test_mode ]);
184238
185- if (ncpus < 3 )
239+ if (ncpus < test_mode + 2 )
186240 return - ENOTSUPP ;
187241
188242 raw_res_spin_lock_init (& lock_a );
@@ -235,10 +289,70 @@ static int bpf_test_rqspinlock_init(void)
235289
236290module_init (bpf_test_rqspinlock_init );
237291
292+ static void rqsl_print_histograms (void )
293+ {
294+ int cpu , i ;
295+
296+ pr_err ("rqspinlock acquisition latency histogram (ms):\n" );
297+
298+ for_each_online_cpu (cpu ) {
299+ struct rqsl_cpu_hist * hist = per_cpu_ptr (& rqsl_cpu_hists , cpu );
300+ u64 norm_counts [RQSL_NR_HIST_BUCKETS ];
301+ u64 nmi_counts [RQSL_NR_HIST_BUCKETS ];
302+ u64 total_counts [RQSL_NR_HIST_BUCKETS ];
303+ u64 norm_total = 0 , nmi_total = 0 , total = 0 ;
304+ bool has_slow = false;
305+
306+ for (i = 0 ; i < RQSL_NR_HIST_BUCKETS ; i ++ ) {
307+ norm_counts [i ] = atomic64_read (& hist -> normal [i ]);
308+ nmi_counts [i ] = atomic64_read (& hist -> nmi [i ]);
309+ total_counts [i ] = norm_counts [i ] + nmi_counts [i ];
310+ norm_total += norm_counts [i ];
311+ nmi_total += nmi_counts [i ];
312+ total += total_counts [i ];
313+ if (rqsl_hist_ms [i ] > RQSL_SLOW_THRESHOLD_MS &&
314+ total_counts [i ])
315+ has_slow = true;
316+ }
317+
318+ if (!total )
319+ continue ;
320+
321+ if (!has_slow ) {
322+ pr_err (" cpu%d: total %llu (normal %llu, nmi %llu), all within 0-%ums\n" ,
323+ cpu , total , norm_total , nmi_total , RQSL_SLOW_THRESHOLD_MS );
324+ continue ;
325+ }
326+
327+ pr_err (" cpu%d: total %llu (normal %llu, nmi %llu)\n" ,
328+ cpu , total , norm_total , nmi_total );
329+ for (i = 0 ; i < RQSL_NR_HIST_BUCKETS ; i ++ ) {
330+ unsigned int start_ms ;
331+
332+ if (!total_counts [i ])
333+ continue ;
334+
335+ start_ms = i == 0 ? 0 : rqsl_hist_ms [i - 1 ] + 1 ;
336+ if (i == RQSL_NR_HIST_BUCKETS - 1 ) {
337+ pr_err (" >= %ums: total %llu (normal %llu, nmi %llu)\n" ,
338+ start_ms , total_counts [i ],
339+ norm_counts [i ], nmi_counts [i ]);
340+ } else {
341+ pr_err (" %u-%ums: total %llu (normal %llu, nmi %llu)\n" ,
342+ start_ms , rqsl_hist_ms [i ],
343+ total_counts [i ],
344+ norm_counts [i ], nmi_counts [i ]);
345+ }
346+ }
347+ }
348+ }
349+
238350static void bpf_test_rqspinlock_exit (void )
239351{
352+ WRITE_ONCE (pause , 1 );
240353 free_rqsl_threads ();
241354 free_rqsl_evts ();
355+ rqsl_print_histograms ();
242356}
243357
244358module_exit (bpf_test_rqspinlock_exit );
0 commit comments