Skip to content

Commit d40760d

Browse files
paulmckrcuNeeraj Upadhyay
authored andcommitted
locking/csd-lock: Use backoff for repeated reports of same incident
Currently, the CSD-lock diagnostics in CONFIG_CSD_LOCK_WAIT_DEBUG=y kernels are emitted at five-second intervals. Although this has proven to be a good time interval for the first diagnostic, if the target CPU keeps interrupts disabled for way longer than five seconds, the ratio of useful new information to pointless repetition increases considerably. Therefore, back off the time period for repeated reports of the same incident, increasing linearly with the number of reports and logarithmicly with the number of online CPUs. [ paulmck: Apply Dan Carpenter feedback. ] Signed-off-by: Paul E. McKenney <[email protected]> Cc: Imran Khan <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Leonardo Bras <[email protected]> Cc: "Peter Zijlstra (Intel)" <[email protected]> Cc: Rik van Riel <[email protected]> Reviewed-by: Rik van Riel <[email protected]> Signed-off-by: Neeraj Upadhyay <[email protected]>
1 parent ac9d455 commit d40760d

File tree

1 file changed

+7
-3
lines changed

1 file changed

+7
-3
lines changed

kernel/smp.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ bool csd_lock_is_stuck(void)
226226
* the CSD_TYPE_SYNC/ASYNC types provide the destination CPU,
227227
* so waiting on other types gets much less information.
228228
*/
229-
static bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, int *bug_id)
229+
static bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, int *bug_id, unsigned long *nmessages)
230230
{
231231
int cpu = -1;
232232
int cpux;
@@ -249,7 +249,9 @@ static bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, in
249249
ts2 = sched_clock();
250250
/* How long since we last checked for a stuck CSD lock.*/
251251
ts_delta = ts2 - *ts1;
252-
if (likely(ts_delta <= csd_lock_timeout_ns || csd_lock_timeout_ns == 0))
252+
if (likely(ts_delta <= csd_lock_timeout_ns * (*nmessages + 1) *
253+
(!*nmessages ? 1 : (ilog2(num_online_cpus()) / 2 + 1)) ||
254+
csd_lock_timeout_ns == 0))
253255
return false;
254256

255257
firsttime = !*bug_id;
@@ -266,6 +268,7 @@ static bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, in
266268
pr_alert("csd: %s non-responsive CSD lock (#%d) on CPU#%d, waiting %lld ns for CPU#%02d %pS(%ps).\n",
267269
firsttime ? "Detected" : "Continued", *bug_id, raw_smp_processor_id(), (s64)ts_delta,
268270
cpu, csd->func, csd->info);
271+
(*nmessages)++;
269272
if (firsttime)
270273
atomic_inc(&n_csd_lock_stuck);
271274
/*
@@ -306,12 +309,13 @@ static bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, in
306309
*/
307310
static void __csd_lock_wait(call_single_data_t *csd)
308311
{
312+
unsigned long nmessages = 0;
309313
int bug_id = 0;
310314
u64 ts0, ts1;
311315

312316
ts1 = ts0 = sched_clock();
313317
for (;;) {
314-
if (csd_lock_wait_toolong(csd, ts0, &ts1, &bug_id))
318+
if (csd_lock_wait_toolong(csd, ts0, &ts1, &bug_id, &nmessages))
315319
break;
316320
cpu_relax();
317321
}

0 commit comments

Comments
 (0)