Skip to content

Commit be42f00

Browse files
Zhen Leipaulmckrcu
authored andcommitted
rcu: Add RCU stall diagnosis information
Because RCU CPU stall warnings are driven from the scheduling-clock interrupt handler, a workload consisting of a very large number of short-duration hardware interrupts can result in misleading stall-warning messages. On systems supporting only a single level of interrupts, that is, where interrupts handlers cannot be interrupted, this can produce misleading diagnostics. The stack traces will show the innocent-bystander interrupted task, not the interrupts that are at the very least exacerbating the stall. This situation can be improved by displaying the number of interrupts and the CPU time that they have consumed. Diagnosing other types of stalls can be eased by also providing the count of softirqs and the CPU time that they consumed as well as the number of context switches and the task-level CPU time consumed. Consider the following output given this change: rcu: INFO: rcu_preempt self-detected stall on CPU rcu: 0-....: (1250 ticks this GP) <omitted> rcu: hardirqs softirqs csw/system rcu: number: 624 45 0 rcu: cputime: 69 1 2425 ==> 2500(ms) This output shows that the number of hard and soft interrupts is small, there are no context switches, and the system takes up a lot of time. This indicates that the current task is looping with preemption disabled. The impact on system performance is negligible because snapshot is recorded only once for all continuous RCU stalls. This added debugging information is suppressed by default and can be enabled by building the kernel with CONFIG_RCU_CPU_STALL_CPUTIME=y or by booting with rcupdate.rcu_cpu_stall_cputime=1. Signed-off-by: Zhen Lei <[email protected]> Reviewed-by: Mukesh Ojha <[email protected]> Reviewed-by: Frederic Weisbecker <[email protected]> Signed-off-by: Paul E. McKenney <[email protected]>
1 parent 7c18272 commit be42f00

File tree

7 files changed

+90
-0
lines changed

7 files changed

+90
-0
lines changed

Documentation/admin-guide/kernel-parameters.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5113,6 +5113,12 @@
51135113
rcupdate.rcu_cpu_stall_timeout to be used (after
51145114
conversion from seconds to milliseconds).
51155115

5116+
rcupdate.rcu_cpu_stall_cputime= [KNL]
5117+
Provide statistics on the cputime and count of
5118+
interrupts and tasks during the sampling period. For
5119+
multiple continuous RCU stalls, all sampling periods
5120+
begin at half of the first RCU stall timeout.
5121+
51165122
rcupdate.rcu_expedited= [KNL]
51175123
Use expedited grace-period primitives, for
51185124
example, synchronize_rcu_expedited() instead

kernel/rcu/Kconfig.debug

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,19 @@ config RCU_EXP_CPU_STALL_TIMEOUT
9292
says to use the RCU_CPU_STALL_TIMEOUT value converted from
9393
seconds to milliseconds.
9494

95+
config RCU_CPU_STALL_CPUTIME
96+
bool "Provide additional RCU stall debug information"
97+
depends on RCU_STALL_COMMON
98+
default n
99+
help
100+
Collect statistics during the sampling period, such as the number of
101+
(hard interrupts, soft interrupts, task switches) and the cputime of
102+
(hard interrupts, soft interrupts, kernel tasks) are added to the
103+
RCU stall report. For multiple continuous RCU stalls, all sampling
104+
periods begin at half of the first RCU stall timeout.
105+
The boot option rcupdate.rcu_cpu_stall_cputime has the same function
106+
as this one, but will override this if it exists.
107+
95108
config RCU_TRACE
96109
bool "Enable tracing for RCU"
97110
depends on DEBUG_KERNEL

kernel/rcu/rcu.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,7 @@ extern int rcu_cpu_stall_ftrace_dump;
224224
extern int rcu_cpu_stall_suppress;
225225
extern int rcu_cpu_stall_timeout;
226226
extern int rcu_exp_cpu_stall_timeout;
227+
extern int rcu_cpu_stall_cputime;
227228
int rcu_jiffies_till_stall_check(void);
228229
int rcu_exp_jiffies_till_stall_check(void);
229230

kernel/rcu/tree.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -925,6 +925,24 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
925925
rdp->rcu_iw_gp_seq = rnp->gp_seq;
926926
irq_work_queue_on(&rdp->rcu_iw, rdp->cpu);
927927
}
928+
929+
if (rcu_cpu_stall_cputime && rdp->snap_record.gp_seq != rdp->gp_seq) {
930+
int cpu = rdp->cpu;
931+
struct rcu_snap_record *rsrp;
932+
struct kernel_cpustat *kcsp;
933+
934+
kcsp = &kcpustat_cpu(cpu);
935+
936+
rsrp = &rdp->snap_record;
937+
rsrp->cputime_irq = kcpustat_field(kcsp, CPUTIME_IRQ, cpu);
938+
rsrp->cputime_softirq = kcpustat_field(kcsp, CPUTIME_SOFTIRQ, cpu);
939+
rsrp->cputime_system = kcpustat_field(kcsp, CPUTIME_SYSTEM, cpu);
940+
rsrp->nr_hardirqs = kstat_cpu_irqs_sum(rdp->cpu);
941+
rsrp->nr_softirqs = kstat_cpu_softirqs_sum(rdp->cpu);
942+
rsrp->nr_csw = nr_context_switches_cpu(rdp->cpu);
943+
rsrp->jiffies = jiffies;
944+
rsrp->gp_seq = rdp->gp_seq;
945+
}
928946
}
929947

930948
return 0;

kernel/rcu/tree.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,23 @@ union rcu_noqs {
158158
u16 s; /* Set of bits, aggregate OR here. */
159159
};
160160

161+
/*
162+
* Record the snapshot of the core stats at half of the first RCU stall timeout.
163+
* The member gp_seq is used to ensure that all members are updated only once
164+
* during the sampling period. The snapshot is taken only if this gp_seq is not
165+
* equal to rdp->gp_seq.
166+
*/
167+
struct rcu_snap_record {
168+
unsigned long gp_seq; /* Track rdp->gp_seq counter */
169+
u64 cputime_irq; /* Accumulated cputime of hard irqs */
170+
u64 cputime_softirq;/* Accumulated cputime of soft irqs */
171+
u64 cputime_system; /* Accumulated cputime of kernel tasks */
172+
unsigned long nr_hardirqs; /* Accumulated number of hard irqs */
173+
unsigned int nr_softirqs; /* Accumulated number of soft irqs */
174+
unsigned long long nr_csw; /* Accumulated number of task switches */
175+
unsigned long jiffies; /* Track jiffies value */
176+
};
177+
161178
/* Per-CPU data for read-copy update. */
162179
struct rcu_data {
163180
/* 1) quiescent-state and grace-period handling : */
@@ -262,6 +279,8 @@ struct rcu_data {
262279
short rcu_onl_gp_flags; /* ->gp_flags at last online. */
263280
unsigned long last_fqs_resched; /* Time of last rcu_resched(). */
264281
unsigned long last_sched_clock; /* Jiffies of last rcu_sched_clock_irq(). */
282+
struct rcu_snap_record snap_record; /* Snapshot of core stats at half of */
283+
/* the first RCU stall timeout */
265284

266285
long lazy_len; /* Length of buffered lazy callbacks. */
267286
int cpu;

kernel/rcu/tree_stall.h

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,35 @@ static bool rcu_is_rcuc_kthread_starving(struct rcu_data *rdp, unsigned long *jp
428428
return j > 2 * HZ;
429429
}
430430

431+
static void print_cpu_stat_info(int cpu)
432+
{
433+
struct rcu_snap_record rsr, *rsrp;
434+
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
435+
struct kernel_cpustat *kcsp = &kcpustat_cpu(cpu);
436+
437+
if (!rcu_cpu_stall_cputime)
438+
return;
439+
440+
rsrp = &rdp->snap_record;
441+
if (rsrp->gp_seq != rdp->gp_seq)
442+
return;
443+
444+
rsr.cputime_irq = kcpustat_field(kcsp, CPUTIME_IRQ, cpu);
445+
rsr.cputime_softirq = kcpustat_field(kcsp, CPUTIME_SOFTIRQ, cpu);
446+
rsr.cputime_system = kcpustat_field(kcsp, CPUTIME_SYSTEM, cpu);
447+
448+
pr_err("\t hardirqs softirqs csw/system\n");
449+
pr_err("\t number: %8ld %10d %12lld\n",
450+
kstat_cpu_irqs_sum(cpu) - rsrp->nr_hardirqs,
451+
kstat_cpu_softirqs_sum(cpu) - rsrp->nr_softirqs,
452+
nr_context_switches_cpu(cpu) - rsrp->nr_csw);
453+
pr_err("\tcputime: %8lld %10lld %12lld ==> %d(ms)\n",
454+
div_u64(rsr.cputime_irq - rsrp->cputime_irq, NSEC_PER_MSEC),
455+
div_u64(rsr.cputime_softirq - rsrp->cputime_softirq, NSEC_PER_MSEC),
456+
div_u64(rsr.cputime_system - rsrp->cputime_system, NSEC_PER_MSEC),
457+
jiffies_to_msecs(jiffies - rsrp->jiffies));
458+
}
459+
431460
/*
432461
* Print out diagnostic information for the specified stalled CPU.
433462
*
@@ -484,6 +513,8 @@ static void print_cpu_stall_info(int cpu)
484513
data_race(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart,
485514
rcuc_starved ? buf : "",
486515
falsepositive ? " (false positive?)" : "");
516+
517+
print_cpu_stat_info(cpu);
487518
}
488519

489520
/* Complain about starvation of grace-period kthread. */

kernel/rcu/update.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -508,6 +508,8 @@ int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
508508
module_param(rcu_cpu_stall_timeout, int, 0644);
509509
int rcu_exp_cpu_stall_timeout __read_mostly = CONFIG_RCU_EXP_CPU_STALL_TIMEOUT;
510510
module_param(rcu_exp_cpu_stall_timeout, int, 0644);
511+
int rcu_cpu_stall_cputime __read_mostly = IS_ENABLED(CONFIG_RCU_CPU_STALL_CPUTIME);
512+
module_param(rcu_cpu_stall_cputime, int, 0644);
511513
#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
512514

513515
// Suppress boot-time RCU CPU stall warnings and rcutorture writer stall

0 commit comments

Comments
 (0)