Skip to content

Commit 98b38a8

Browse files
committed
runq: add anomaly checking for runqueues.
Issues like missing IPIs or irq disablement for long, can create obervable anomalies on scheduler side. For example a scheduler IPI, missed/absent due to hypervisor issue, can cause migration/X threads on VMs to get stuck for ever, causing softlockup. Having a knowledge of these anomalies can help in locating the actual problem. For example following snippet shows that migration/23 thread was stuck in runq's wake_list. cpu: 23 has following tasks in its runq wake_list: task pid: 273301, comm: ora_ipc0_csadsd task pid: 281335, comm: ora_lms0_ccawsd task pid: 391691, comm: ora_ipc0_webprd task pid: 390722, comm: ora_ipc0_dppd1 task pid: 394144, comm: ora_lmd1_audprd task pid: 394450, comm: ora_lms1_rbsspr task pid: 393235, comm: ora_lmd0_etudpr task pid: 24469, comm: cvfwd task pid: 357613, comm: tnslsnr task pid: 351700, comm: ocssd.bin task pid: 394519, comm: ora_dia0_wspd1 task pid: 394307, comm: ora_lms1_wmsprd task pid: 394773, comm: ora_lms0_ccadmp task pid: 351141, comm: ocssd.bin task pid: 394690, comm: ora_lms0_wspd1 task pid: 351774, comm: ocssd.bin task pid: 351678, comm: ocssd.bin task pid: 351692, comm: ocssd.bin task pid: 351683, comm: ocssd.bin task pid: 351680, comm: ocssd.bin task pid: 351686, comm: ocssd.bin task pid: 351681, comm: ocssd.bin task pid: 351688, comm: ocssd.bin task pid: 150, comm: migration/23 This will block migration/X threads running on other CPUs. The helpers and corelens module added here can detect such issues. Signed-off-by: Imran Khan <[email protected]>
1 parent c832d9f commit 98b38a8

File tree

1 file changed

+293
-0
lines changed

1 file changed

+293
-0
lines changed

drgn_tools/runq.py

Lines changed: 293 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,20 @@
11
# Copyright (c) 2023, Oracle and/or its affiliates.
22
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
33
import argparse
4+
from typing import Iterator
45

56
from drgn import container_of
67
from drgn import Object
78
from drgn import Program
9+
from drgn import sizeof
810
from drgn.helpers.common import escape_ascii_string
11+
from drgn.helpers.linux.bitops import for_each_set_bit
912
from drgn.helpers.linux.cpumask import for_each_online_cpu
1013
from drgn.helpers.linux.list import list_for_each_entry
14+
from drgn.helpers.linux.llist import llist_empty
15+
from drgn.helpers.linux.llist import llist_for_each_entry
1116
from drgn.helpers.linux.percpu import per_cpu
17+
from drgn.helpers.linux.rbtree import rbtree_inorder_for_each_entry
1218

1319
from drgn_tools.corelens import CorelensModule
1420
from drgn_tools.task import task_lastrun2now
@@ -97,6 +103,293 @@ def run_queue(prog: Program) -> None:
97103
print()
98104

99105

106+
def for_each_task_on_cfs_rq(prog: Program, cpu: int) -> Iterator[Object]:
107+
"""
108+
Iterate through all tasks, on a CPU's CFS runq.
109+
110+
:param prog: drgn program
111+
:param cpu: cpu number
112+
:return: Iterator of ``struct task_struct *`` objects
113+
"""
114+
runq = per_cpu(prog["runqueues"], cpu)
115+
cfs_rq = runq.cfs
116+
rb_root = cfs_rq.tasks_timeline.rb_root
117+
for se in rbtree_inorder_for_each_entry(
118+
"struct sched_entity", rb_root, "run_node"
119+
):
120+
task = container_of(se, "struct task_struct", "se")
121+
yield task
122+
123+
124+
def num_tasks_on_cfs_rq(prog: Program, cpu: int) -> int:
125+
"""
126+
Get number of tasks, on a CPU's CFS runq.
127+
128+
:param cpu: cpu number
129+
:return: number of tasks
130+
"""
131+
return len(list(for_each_task_on_cfs_rq(prog, cpu)))
132+
133+
134+
def for_each_task_on_rt_rq(prog: Program, cpu: int) -> Iterator[Object]:
135+
"""
136+
Iterate through all tasks, on a CPU's RT runq.
137+
138+
:param prog: drgn program
139+
:param cpu: cpu number
140+
:return: Iterator of ``struct task_struct *`` objects
141+
"""
142+
runq = per_cpu(prog["runqueues"], cpu)
143+
rt_rq = runq.rt
144+
active = rt_rq.active
145+
queues = active.queue
146+
for prio in for_each_set_bit(active.bitmap, sizeof(active.bitmap) * 8):
147+
head = queues[prio]
148+
if prio > 99:
149+
break
150+
151+
for se in list_for_each_entry(
152+
"struct sched_rt_entity", head.address_of_(), "run_list"
153+
):
154+
task = container_of(se, "struct task_struct", "rt")
155+
yield task
156+
157+
158+
def num_tasks_on_rt_rq(prog: Program, cpu: int) -> int:
159+
"""
160+
Get number of tasks, on a CPU's RT runq.
161+
162+
:param cpu: cpu number
163+
:return: number of tasks
164+
"""
165+
return len(list(for_each_task_on_rt_rq(prog, cpu)))
166+
167+
168+
def for_each_task_on_dl_rq(prog: Program, cpu: int) -> Iterator[Object]:
169+
"""
170+
Iterate through all tasks, on a CPU's DL runq.
171+
172+
:param prog: drgn program
173+
:param cpu: cpu number
174+
:return: Iterator of ``struct task_struct *`` objects
175+
"""
176+
runq = per_cpu(prog["runqueues"], cpu)
177+
dl_rq = runq.dl
178+
rb_root = dl_rq.root.rb_root
179+
for dl in rbtree_inorder_for_each_entry(
180+
"struct sched_dl_entity", rb_root, "rb_node"
181+
):
182+
task = container_of(dl, "struct task_struct", "dl")
183+
yield task
184+
185+
186+
def num_tasks_on_dl_rq(prog: Program, cpu: int) -> int:
187+
"""
188+
Get number of tasks, on a CPU's DL runq.
189+
190+
:param prog: drgn program
191+
:param cpu: cpu number
192+
:return: number of tasks
193+
"""
194+
return len(list(for_each_task_on_dl_rq(prog, cpu)))
195+
196+
197+
def for_each_task_on_rq(prog: Program, cpu: int) -> Iterator[Object]:
198+
"""
199+
Iterate through all tasks, on a CPU's runq.
200+
201+
:param prog: drgn program
202+
:param cpu: cpu number
203+
:return: Iterator of ``struct task_struct *`` objects
204+
"""
205+
206+
for task in for_each_task_on_rt_rq(prog, cpu):
207+
yield task
208+
for task in for_each_task_on_cfs_rq(prog, cpu):
209+
yield task
210+
for task in for_each_task_on_dl_rq(prog, cpu):
211+
yield task
212+
213+
214+
def for_each_task_on_rq_wake_list(prog: Program, cpu: int) -> Iterator[Object]:
215+
"""
216+
Iterate through all tasks, on a CPU's wake_list.
217+
218+
A CPU's wake_list contains tasks, that are in the process of being woken
219+
up and have not yet landed on a CPU runq after wakeup.
220+
Tasks should not reside here for long.
221+
222+
:param prog: drgn program
223+
:param cpu: cpu number
224+
:return: Iterator of ``struct task_struct *`` objects
225+
"""
226+
227+
runq = per_cpu(prog["runqueues"], cpu)
228+
for task in llist_for_each_entry(
229+
"struct task_struct", runq.wake_list.first, "wake_entry"
230+
):
231+
yield task
232+
233+
234+
def check_idle_runq(prog: Program, cpu: int) -> None:
235+
"""
236+
Check an idle runq.
237+
An idle runq:
238+
* should have current pid as 0.
239+
* should have nr_running as 0.
240+
* ideally should not have any tasks.
241+
this is not necessarily an error because
242+
scheduler may just be bringing it out of idle.
243+
244+
:param prog: drgn program
245+
:param cpu: cpu number
246+
"""
247+
runq = per_cpu(prog["runqueues"], cpu)
248+
if runq.curr.pid.value_() != 0:
249+
return
250+
251+
if runq.nr_running.value_() != 0:
252+
print(f"Idle cpu: {cpu} has non-zero nr_running")
253+
254+
nr_cfs_task = num_tasks_on_cfs_rq(prog, cpu)
255+
nr_rt_task = num_tasks_on_rt_rq(prog, cpu)
256+
nr_dl_task = num_tasks_on_dl_rq(prog, cpu)
257+
258+
if nr_cfs_task != 0:
259+
print(f"Idle cpu: {cpu} has {nr_cfs_task} tasks on CFS runq")
260+
261+
if nr_rt_task != 0:
262+
print(f"Idle cpu: {cpu} has {nr_rt_task} tasks on RT runq")
263+
264+
if nr_dl_task != 0:
265+
print(f"Idle cpu: {cpu} has {nr_dl_task} tasks on DL runq")
266+
267+
268+
def check_runq_wakelist(prog: Program, cpu: int) -> None:
269+
"""
270+
Check runq's wakelist.
271+
A runq's wakelist temporarily holds tasks that are about to be woken
272+
up on that CPU. If this list has multiple tasks, it usually means that
273+
this CPU has missed multiple sched IPIs and that can indicate other issues
274+
like irq disablement for long, or irq delivery issues between hypervisor
275+
and VM or some other issue.
276+
277+
:param prog: drgn program
278+
:param cpu: cpu number
279+
"""
280+
runq = per_cpu(prog["runqueues"], cpu)
281+
if llist_empty(runq.wake_list):
282+
return
283+
284+
print("\n")
285+
print(f"cpu: {cpu} has following tasks in its runq wake_list:")
286+
for task in for_each_task_on_rq_wake_list(prog, cpu):
287+
print(
288+
f"task pid: {task.pid.value_()}, comm: {task.comm.string_().decode()}"
289+
)
290+
291+
292+
def dump_rt_runq_wait_summary(
293+
prog: Program, cpu: int, qduration_thresh_ms: int = 1000
294+
):
295+
"""
296+
Iterate through all tasks, on a CPU's runq and list tasks that have been queued
297+
for greater than specified threshold in ms (default 1000 ms).
298+
299+
:param prog: drgn Program
300+
:param cpu: cpu number
301+
:param qduration_thresh_ms: threshold for wait duration on runq
302+
"""
303+
runq = per_cpu(prog["runqueues"], cpu)
304+
for task in for_each_task_on_rt_rq(prog, cpu):
305+
try:
306+
if (
307+
task.sched_info.last_queued.value_() > 0
308+
and task.sched_info.last_queued.value_()
309+
> task.sched_info.last_arrival.value_()
310+
):
311+
qduration = (
312+
runq.clock.value_() - task.sched_info.last_queued.value_()
313+
)
314+
print(
315+
f"cpu: {cpu} pid: {task.pid.value_()} prio: {task.prio.value_()} queued for {qduration} nsecs"
316+
)
317+
except AttributeError:
318+
pass
319+
320+
321+
def dump_cfs_runq_wait_summary(
322+
prog: Program, cpu: int, qduration_thresh_ms: int = 1000
323+
):
324+
"""
325+
Iterate through all tasks, on a CPU's runq and list tasks that have been queued
326+
for greater than specified threshold in ms (default 1000 ms).
327+
328+
:param prog: drgn Program
329+
:param cpu: cpu number
330+
:param qduration_thresh_ms: threshold for wait duration on runq
331+
"""
332+
runq = per_cpu(prog["runqueues"], cpu)
333+
for task in for_each_task_on_cfs_rq(prog, cpu):
334+
try:
335+
if (
336+
task.sched_info.last_queued.value_() > 0
337+
and task.sched_info.last_queued.value_()
338+
> task.sched_info.last_arrival.value_()
339+
):
340+
qduration = (
341+
runq.clock.value_() - task.sched_info.last_queued.value_()
342+
)
343+
print(
344+
f"cpu: {cpu} pid: {task.pid.value_()} prio: {task.prio.value_()} queued for {qduration} nsecs"
345+
)
346+
except AttributeError:
347+
pass
348+
349+
350+
def dump_runq_wait_summary(
351+
prog: Program, cpu: int, qduration_thresh_ms: int = 1000
352+
):
353+
"""
354+
Iterate through all tasks, on a CPU's runq and list tasks that have been queued
355+
for greater than specified threshold in ms (default 1000 ms).
356+
357+
:param prog: drgn Program
358+
:param cpu: cpu number
359+
:param qduration_thresh_ms: threshold for wait duration on runq
360+
"""
361+
dump_cfs_runq_wait_summary(prog, cpu, qduration_thresh_ms)
362+
dump_rt_runq_wait_summary(prog, cpu, qduration_thresh_ms)
363+
364+
365+
def run_queue_check(prog: Program) -> None:
366+
"""
367+
Check and report runqueue anomalies.
368+
369+
:param prog: drgn program
370+
"""
371+
372+
for cpu in for_each_online_cpu(prog):
373+
check_idle_runq(prog, cpu)
374+
check_runq_wakelist(prog, cpu)
375+
dump_runq_wait_summary(prog, cpu)
376+
print("\n")
377+
378+
379+
class RunQueueCheck(CorelensModule):
380+
"""
381+
Check and report runqueue anomalies.
382+
The content of this report does not necessarily mean an
383+
error condition or a problem. But it mentions things that
384+
don'y happen usually, so should not be ignored.
385+
"""
386+
387+
name = "runqcheck"
388+
389+
def run(self, prog: Program, args: argparse.Namespace) -> None:
390+
run_queue_check(prog)
391+
392+
100393
class RunQueue(CorelensModule):
101394
"""
102395
List process that are in RT and CFS queue.

0 commit comments

Comments
 (0)