20
20
#include <linux/sched.h>
21
21
#include <linux/sched/idle.h>
22
22
#include <linux/hypervisor.h>
23
+ #include <linux/sched/clock.h>
24
+ #include <linux/nmi.h>
25
+ #include <linux/sched/debug.h>
23
26
24
27
#include "smpboot.h"
25
28
#include "sched/smp.h"
@@ -96,17 +99,134 @@ void __init call_function_init(void)
96
99
smpcfd_prepare_cpu (smp_processor_id ());
97
100
}
98
101
102
+ #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
103
+
104
+ static DEFINE_PER_CPU (call_single_data_t * , cur_csd ) ;
105
+ static DEFINE_PER_CPU (smp_call_func_t , cur_csd_func ) ;
106
+ static DEFINE_PER_CPU (void * , cur_csd_info ) ;
107
+
108
+ #define CSD_LOCK_TIMEOUT (5ULL * NSEC_PER_SEC)
109
+ static atomic_t csd_bug_count = ATOMIC_INIT (0 );
110
+
111
+ /* Record current CSD work for current CPU, NULL to erase. */
112
+ static void csd_lock_record (call_single_data_t * csd )
113
+ {
114
+ if (!csd ) {
115
+ smp_mb (); /* NULL cur_csd after unlock. */
116
+ __this_cpu_write (cur_csd , NULL );
117
+ return ;
118
+ }
119
+ __this_cpu_write (cur_csd_func , csd -> func );
120
+ __this_cpu_write (cur_csd_info , csd -> info );
121
+ smp_wmb (); /* func and info before csd. */
122
+ __this_cpu_write (cur_csd , csd );
123
+ smp_mb (); /* Update cur_csd before function call. */
124
+ /* Or before unlock, as the case may be. */
125
+ }
126
+
127
+ static __always_inline int csd_lock_wait_getcpu (call_single_data_t * csd )
128
+ {
129
+ unsigned int csd_type ;
130
+
131
+ csd_type = CSD_TYPE (csd );
132
+ if (csd_type == CSD_TYPE_ASYNC || csd_type == CSD_TYPE_SYNC )
133
+ return csd -> dst ; /* Other CSD_TYPE_ values might not have ->dst. */
134
+ return -1 ;
135
+ }
136
+
137
+ /*
138
+ * Complain if too much time spent waiting. Note that only
139
+ * the CSD_TYPE_SYNC/ASYNC types provide the destination CPU,
140
+ * so waiting on other types gets much less information.
141
+ */
142
+ static __always_inline bool csd_lock_wait_toolong (call_single_data_t * csd , u64 ts0 , u64 * ts1 , int * bug_id )
143
+ {
144
+ int cpu = -1 ;
145
+ int cpux ;
146
+ bool firsttime ;
147
+ u64 ts2 , ts_delta ;
148
+ call_single_data_t * cpu_cur_csd ;
149
+ unsigned int flags = READ_ONCE (csd -> flags );
150
+
151
+ if (!(flags & CSD_FLAG_LOCK )) {
152
+ if (!unlikely (* bug_id ))
153
+ return true;
154
+ cpu = csd_lock_wait_getcpu (csd );
155
+ pr_alert ("csd: CSD lock (#%d) got unstuck on CPU#%02d, CPU#%02d released the lock.\n" ,
156
+ * bug_id , raw_smp_processor_id (), cpu );
157
+ return true;
158
+ }
159
+
160
+ ts2 = sched_clock ();
161
+ ts_delta = ts2 - * ts1 ;
162
+ if (likely (ts_delta <= CSD_LOCK_TIMEOUT ))
163
+ return false;
164
+
165
+ firsttime = !* bug_id ;
166
+ if (firsttime )
167
+ * bug_id = atomic_inc_return (& csd_bug_count );
168
+ cpu = csd_lock_wait_getcpu (csd );
169
+ if (WARN_ONCE (cpu < 0 || cpu >= nr_cpu_ids , "%s: cpu = %d\n" , __func__ , cpu ))
170
+ cpux = 0 ;
171
+ else
172
+ cpux = cpu ;
173
+ cpu_cur_csd = smp_load_acquire (& per_cpu (cur_csd , cpux )); /* Before func and info. */
174
+ pr_alert ("csd: %s non-responsive CSD lock (#%d) on CPU#%d, waiting %llu ns for CPU#%02d %pS(%ps).\n" ,
175
+ firsttime ? "Detected" : "Continued" , * bug_id , raw_smp_processor_id (), ts2 - ts0 ,
176
+ cpu , csd -> func , csd -> info );
177
+ if (cpu_cur_csd && csd != cpu_cur_csd ) {
178
+ pr_alert ("\tcsd: CSD lock (#%d) handling prior %pS(%ps) request.\n" ,
179
+ * bug_id , READ_ONCE (per_cpu (cur_csd_func , cpux )),
180
+ READ_ONCE (per_cpu (cur_csd_info , cpux )));
181
+ } else {
182
+ pr_alert ("\tcsd: CSD lock (#%d) %s.\n" ,
183
+ * bug_id , !cpu_cur_csd ? "unresponsive" : "handling this request" );
184
+ }
185
+ if (cpu >= 0 ) {
186
+ if (!trigger_single_cpu_backtrace (cpu ))
187
+ dump_cpu_task (cpu );
188
+ if (!cpu_cur_csd ) {
189
+ pr_alert ("csd: Re-sending CSD lock (#%d) IPI from CPU#%02d to CPU#%02d\n" , * bug_id , raw_smp_processor_id (), cpu );
190
+ arch_send_call_function_single_ipi (cpu );
191
+ }
192
+ }
193
+ dump_stack ();
194
+ * ts1 = ts2 ;
195
+
196
+ return false;
197
+ }
198
+
99
199
/*
100
200
* csd_lock/csd_unlock used to serialize access to per-cpu csd resources
101
201
*
102
202
* For non-synchronous ipi calls the csd can still be in use by the
103
203
* previous function call. For multi-cpu calls its even more interesting
104
204
* as we'll have to ensure no other cpu is observing our csd.
105
205
*/
206
+ static __always_inline void csd_lock_wait (call_single_data_t * csd )
207
+ {
208
+ int bug_id = 0 ;
209
+ u64 ts0 , ts1 ;
210
+
211
+ ts1 = ts0 = sched_clock ();
212
+ for (;;) {
213
+ if (csd_lock_wait_toolong (csd , ts0 , & ts1 , & bug_id ))
214
+ break ;
215
+ cpu_relax ();
216
+ }
217
+ smp_acquire__after_ctrl_dep ();
218
+ }
219
+
220
+ #else
221
+ static void csd_lock_record (call_single_data_t * csd )
222
+ {
223
+ }
224
+
106
225
static __always_inline void csd_lock_wait (call_single_data_t * csd )
107
226
{
108
227
smp_cond_load_acquire (& csd -> flags , !(VAL & CSD_FLAG_LOCK ));
109
228
}
229
+ #endif
110
230
111
231
static __always_inline void csd_lock (call_single_data_t * csd )
112
232
{
@@ -166,9 +286,11 @@ static int generic_exec_single(int cpu, call_single_data_t *csd)
166
286
* We can unlock early even for the synchronous on-stack case,
167
287
* since we're doing this from the same CPU..
168
288
*/
289
+ csd_lock_record (csd );
169
290
csd_unlock (csd );
170
291
local_irq_save (flags );
171
292
func (info );
293
+ csd_lock_record (NULL );
172
294
local_irq_restore (flags );
173
295
return 0 ;
174
296
}
@@ -268,8 +390,10 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
268
390
entry = & csd_next -> llist ;
269
391
}
270
392
393
+ csd_lock_record (csd );
271
394
func (info );
272
395
csd_unlock (csd );
396
+ csd_lock_record (NULL );
273
397
} else {
274
398
prev = & csd -> llist ;
275
399
}
@@ -296,8 +420,10 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
296
420
smp_call_func_t func = csd -> func ;
297
421
void * info = csd -> info ;
298
422
423
+ csd_lock_record (csd );
299
424
csd_unlock (csd );
300
425
func (info );
426
+ csd_lock_record (NULL );
301
427
} else if (type == CSD_TYPE_IRQ_WORK ) {
302
428
irq_work_single (csd );
303
429
}
@@ -375,6 +501,10 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
375
501
376
502
csd -> func = func ;
377
503
csd -> info = info ;
504
+ #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
505
+ csd -> src = smp_processor_id ();
506
+ csd -> dst = cpu ;
507
+ #endif
378
508
379
509
err = generic_exec_single (cpu , csd );
380
510
@@ -540,6 +670,10 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
540
670
csd -> flags |= CSD_TYPE_SYNC ;
541
671
csd -> func = func ;
542
672
csd -> info = info ;
673
+ #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
674
+ csd -> src = smp_processor_id ();
675
+ csd -> dst = cpu ;
676
+ #endif
543
677
if (llist_add (& csd -> llist , & per_cpu (call_single_queue , cpu )))
544
678
__cpumask_set_cpu (cpu , cfd -> cpumask_ipi );
545
679
}
0 commit comments