Skip to content

Commit cbe644a

Browse files
paulmckrcuFrederic Weisbecker
authored andcommitted
rcu: Stop stall warning from dumping stacks if grace period ends
Currently, once an RCU CPU stall warning decides to dump the stalling CPUs' stacks, the rcu_dump_cpu_stacks() function persists until it has gone through the full list. Unfortunately, if the stalled grace periods ends midway through, this function will be dumping stacks of innocent-bystander CPUs that happen to be blocking not the old grace period, but instead the new one. This can cause serious confusion. This commit therefore stops dumping stacks if and when the stalled grace period ends. [ paulmck: Apply Joel Fernandes feedback. ] Signed-off-by: Paul E. McKenney <[email protected]> Signed-off-by: Frederic Weisbecker <[email protected]>
1 parent 26ff1fb commit cbe644a

File tree

1 file changed

+11
-6
lines changed

1 file changed

+11
-6
lines changed

kernel/rcu/tree_stall.h

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -335,13 +335,17 @@ static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags)
335335
* that don't support NMI-based stack dumps. The NMI-triggered stack
336336
* traces are more accurate because they are printed by the target CPU.
337337
*/
338-
static void rcu_dump_cpu_stacks(void)
338+
static void rcu_dump_cpu_stacks(unsigned long gp_seq)
339339
{
340340
int cpu;
341341
unsigned long flags;
342342
struct rcu_node *rnp;
343343

344344
rcu_for_each_leaf_node(rnp) {
345+
if (gp_seq != data_race(rcu_state.gp_seq)) {
346+
pr_err("INFO: Stall ended during stack backtracing.\n");
347+
return;
348+
}
345349
printk_deferred_enter();
346350
raw_spin_lock_irqsave_rcu_node(rnp, flags);
347351
for_each_leaf_node_possible_cpu(rnp, cpu)
@@ -608,7 +612,7 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
608612
(long)rcu_seq_current(&rcu_state.gp_seq), totqlen,
609613
data_race(rcu_state.n_online_cpus)); // Diagnostic read
610614
if (ndetected) {
611-
rcu_dump_cpu_stacks();
615+
rcu_dump_cpu_stacks(gp_seq);
612616

613617
/* Complain about tasks blocking the grace period. */
614618
rcu_for_each_leaf_node(rnp)
@@ -640,7 +644,7 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
640644
rcu_force_quiescent_state(); /* Kick them all. */
641645
}
642646

643-
static void print_cpu_stall(unsigned long gps)
647+
static void print_cpu_stall(unsigned long gp_seq, unsigned long gps)
644648
{
645649
int cpu;
646650
unsigned long flags;
@@ -677,7 +681,7 @@ static void print_cpu_stall(unsigned long gps)
677681
rcu_check_gp_kthread_expired_fqs_timer();
678682
rcu_check_gp_kthread_starvation();
679683

680-
rcu_dump_cpu_stacks();
684+
rcu_dump_cpu_stacks(gp_seq);
681685

682686
raw_spin_lock_irqsave_rcu_node(rnp, flags);
683687
/* Rewrite if needed in case of slow consoles. */
@@ -759,7 +763,8 @@ static void check_cpu_stall(struct rcu_data *rdp)
759763
gs2 = READ_ONCE(rcu_state.gp_seq);
760764
if (gs1 != gs2 ||
761765
ULONG_CMP_LT(j, js) ||
762-
ULONG_CMP_GE(gps, js))
766+
ULONG_CMP_GE(gps, js) ||
767+
!rcu_seq_state(gs2))
763768
return; /* No stall or GP completed since entering function. */
764769
rnp = rdp->mynode;
765770
jn = jiffies + ULONG_MAX / 2;
@@ -780,7 +785,7 @@ static void check_cpu_stall(struct rcu_data *rdp)
780785
pr_err("INFO: %s detected stall, but suppressed full report due to a stuck CSD-lock.\n", rcu_state.name);
781786
} else if (self_detected) {
782787
/* We haven't checked in, so go dump stack. */
783-
print_cpu_stall(gps);
788+
print_cpu_stall(gs2, gps);
784789
} else {
785790
/* They had a few time units to dump stack, so complain. */
786791
print_other_cpu_stall(gs2, gps);

0 commit comments

Comments
 (0)