Skip to content

Commit 4593e77

Browse files
committed
rcu-tasks: Add stall warnings for RCU Tasks Trace
This commit adds RCU CPU stall warnings for RCU Tasks Trace. These dump out any tasks blocking the current grace period, as well as any CPUs that have not responded to an IPI request. This happens in two phases, when initially extracting state from the tasks and later when waiting for any holdout tasks to check in. Signed-off-by: Paul E. McKenney <[email protected]>
1 parent c1a76c0 commit 4593e77

File tree

1 file changed

+66
-4
lines changed

1 file changed

+66
-4
lines changed

kernel/rcu/tasks.h

Lines changed: 66 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -798,9 +798,41 @@ static void rcu_tasks_trace_postscan(void)
798798
// Any tasks that exit after this point will set ->trc_reader_checked.
799799
}
800800

801+
/* Show the state of a task stalling the current RCU tasks trace GP. */
802+
static void show_stalled_task_trace(struct task_struct *t, bool *firstreport)
803+
{
804+
int cpu;
805+
806+
if (*firstreport) {
807+
pr_err("INFO: rcu_tasks_trace detected stalls on tasks:\n");
808+
*firstreport = false;
809+
}
810+
// FIXME: This should attempt to use try_invoke_on_nonrunning_task().
811+
cpu = task_cpu(t);
812+
pr_alert("P%d: %c%c%c nesting: %d%c cpu: %d\n",
813+
t->pid,
814+
".I"[READ_ONCE(t->trc_ipi_to_cpu) > 0],
815+
".i"[is_idle_task(t)],
816+
".N"[cpu > 0 && tick_nohz_full_cpu(cpu)],
817+
t->trc_reader_nesting,
818+
" N"[!!t->trc_reader_need_end],
819+
cpu);
820+
sched_show_task(t);
821+
}
822+
823+
/* List stalled IPIs for RCU tasks trace. */
824+
static void show_stalled_ipi_trace(void)
825+
{
826+
int cpu;
827+
828+
for_each_possible_cpu(cpu)
829+
if (per_cpu(trc_ipi_to_cpu, cpu))
830+
pr_alert("\tIPI outstanding to CPU %d\n", cpu);
831+
}
832+
801833
/* Do one scan of the holdout list. */
802834
static void check_all_holdout_tasks_trace(struct list_head *hop,
803-
bool ndrpt, bool *frptp)
835+
bool needreport, bool *firstreport)
804836
{
805837
struct task_struct *g, *t;
806838

@@ -813,21 +845,51 @@ static void check_all_holdout_tasks_trace(struct list_head *hop,
813845
// If check succeeded, remove this task from the list.
814846
if (READ_ONCE(t->trc_reader_checked))
815847
trc_del_holdout(t);
848+
else if (needreport)
849+
show_stalled_task_trace(t, firstreport);
850+
}
851+
if (needreport) {
852+
if (firstreport)
853+
pr_err("INFO: rcu_tasks_trace detected stalls? (Late IPI?)\n");
854+
show_stalled_ipi_trace();
816855
}
817856
}
818857

819858
/* Wait for grace period to complete and provide ordering. */
820859
static void rcu_tasks_trace_postgp(void)
821860
{
861+
bool firstreport;
862+
struct task_struct *g, *t;
863+
LIST_HEAD(holdouts);
864+
long ret;
865+
822866
// Remove the safety count.
823867
smp_mb__before_atomic(); // Order vs. earlier atomics
824868
atomic_dec(&trc_n_readers_need_end);
825869
smp_mb__after_atomic(); // Order vs. later atomics
826870

827871
// Wait for readers.
828-
wait_event_idle_exclusive(trc_wait,
829-
atomic_read(&trc_n_readers_need_end) == 0);
830-
872+
for (;;) {
873+
ret = wait_event_idle_exclusive_timeout(
874+
trc_wait,
875+
atomic_read(&trc_n_readers_need_end) == 0,
876+
READ_ONCE(rcu_task_stall_timeout));
877+
if (ret)
878+
break; // Count reached zero.
879+
for_each_process_thread(g, t)
880+
if (READ_ONCE(t->trc_reader_need_end))
881+
trc_add_holdout(t, &holdouts);
882+
firstreport = true;
883+
list_for_each_entry_safe(t, g, &holdouts, trc_holdout_list)
884+
if (READ_ONCE(t->trc_reader_need_end)) {
885+
show_stalled_task_trace(t, &firstreport);
886+
trc_del_holdout(t);
887+
}
888+
if (firstreport)
889+
pr_err("INFO: rcu_tasks_trace detected stalls? (Counter/taskslist mismatch?)\n");
890+
show_stalled_ipi_trace();
891+
pr_err("\t%d holdouts\n", atomic_read(&trc_n_readers_need_end));
892+
}
831893
smp_mb(); // Caller's code must be ordered after wakeup.
832894
}
833895

0 commit comments

Comments
 (0)