Skip to content

Commit c7a35f9

Browse files
committed
Add a corelens module for spinlocks
Detect spinlock spinners and owners. Orabug: 37357389 Signed-off-by: Richard Li <[email protected]>
1 parent d6ba9fe commit c7a35f9

File tree

2 files changed

+368
-0
lines changed

2 files changed

+368
-0
lines changed

drgn_tools/spinlock.py

Lines changed: 360 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,360 @@
1+
# Copyright (c) 2025, Oracle and/or its affiliates.
2+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
3+
import argparse
4+
import os
5+
import re
6+
import shutil
7+
import subprocess
8+
from collections import defaultdict
9+
from typing import Dict
10+
from typing import Iterable
11+
from typing import List
12+
from typing import Tuple
13+
14+
from drgn import FaultError
15+
from drgn import Object
16+
from drgn import Program
17+
from drgn.helpers.linux.cpumask import for_each_online_cpu
18+
from drgn.helpers.linux.percpu import per_cpu
19+
from drgn.helpers.linux.sched import cpu_curr
20+
21+
from drgn_tools.bt import frame_name
22+
from drgn_tools.corelens import CorelensModule
23+
from drgn_tools.table import FixedTable
24+
from drgn_tools.task import get_command
25+
from drgn_tools.task import get_current_run_time
26+
from drgn_tools.util import timestamp_str
27+
28+
# must have cscope installed
29+
# must have uek source code repo set here
30+
UEK_CODE_DIR = "/root/uek-production"
31+
32+
"""
33+
Find this C symbol:
34+
Find this function definition:
35+
Find functions called by this function:
36+
Find functions calling this function:
37+
Find this text string:
38+
Change this text string:
39+
Find this egrep pattern:
40+
Find this file:
41+
Find files #including this file:
42+
"""
43+
44+
45+
def query_cscope(
46+
nums: List, pattern: str, keyword: str = "", target_dir: str = UEK_CODE_DIR
47+
):
48+
"""Run cscope query with grep filter and return the output as a string."""
49+
results = ""
50+
for num in nums:
51+
try:
52+
result = subprocess.check_output(
53+
f"cscope -d -L -{num}{pattern} | grep -E '{keyword}'",
54+
universal_newlines=True,
55+
shell=True,
56+
cwd=target_dir,
57+
)
58+
results += result
59+
60+
except subprocess.CalledProcessError:
61+
continue
62+
63+
return results
64+
65+
66+
_QSPINLOCK_UNLOCKED_VAL = 0
67+
68+
69+
def qspinlock_is_locked(qsp: Object) -> str:
70+
"""
71+
Check if a qspinlock is locked or not
72+
73+
:param qsp: ``struct qspinlock *``
74+
:returns: True if qspinlock is locked, False otherwise.
75+
"""
76+
return str(qsp.locked.value_() != _QSPINLOCK_UNLOCKED_VAL)
77+
78+
79+
def get_qspinlock_tail_cpu(qsp: Object) -> int:
80+
"""
81+
Get tail cpu that spins on the qspinlock
82+
83+
:param qsp: ``struct qspinlock *``
84+
:returns: tail cpu that spins on the qspinlock, -1 if None
85+
"""
86+
tail = qsp.tail.value_()
87+
tail_cpu = (tail >> 2) - 1
88+
return tail_cpu
89+
90+
91+
def get_tail_cpu_qnode(qsp: Object) -> Iterable[Object]:
92+
"""
93+
Only for UEK6 and above.
94+
Given a qspinlock, find qnodes associated with the tail cpu spining on the qspinlock.
95+
96+
:param qsp: ``struct qspinlock *``
97+
:returns: Iterator of qnode
98+
"""
99+
tail_cpu = get_qspinlock_tail_cpu(qsp)
100+
prog = qsp.prog_
101+
if tail_cpu < 0:
102+
return []
103+
tail_qnodes = per_cpu(prog["qnodes"], tail_cpu)
104+
for qnode in tail_qnodes:
105+
yield qnode
106+
107+
108+
def dump_qnode_address_for_each_cpu(prog: Program, cpu: int = -1) -> None:
109+
"""
110+
Only for UEK6 and above.
111+
Dump all qnode addresses per cpu. If cpu is specified, dump qnode address on that cpu only.
112+
113+
:param prog: drgn program
114+
:param cpu: cpu id
115+
"""
116+
print(
117+
"%-20s %-20s"
118+
% (
119+
"cpu",
120+
"qnode",
121+
)
122+
)
123+
online_cpus = list(for_each_online_cpu(prog))
124+
if cpu > -1:
125+
if cpu in online_cpus:
126+
qnode_addr = per_cpu(prog["qnodes"], cpu).address_of_().value_()
127+
print("%-20s %-20lx" % (cpu, qnode_addr))
128+
else:
129+
for cpu_id in online_cpus:
130+
qnode_addr = per_cpu(prog["qnodes"], cpu_id).address_of_().value_()
131+
print("%-20s %-20lx" % (cpu_id, qnode_addr))
132+
133+
134+
def scan_bt_for_spinners(prog: Program) -> Tuple[Dict, Dict]:
135+
"""
136+
Scan spinlocks spinners on bt and dump their info.
137+
138+
:param prog: drgn program
139+
:param show_unlocked_only: bool
140+
"""
141+
wait_on_spin_lock_key_words = {
142+
"__pv_queued_spin_lock_slowpath",
143+
"native_queued_spin_lock_slowpath",
144+
"queued_spin_lock_slowpath",
145+
}
146+
147+
spinners = {}
148+
sp_ids = defaultdict(list)
149+
for cpu in for_each_online_cpu(prog):
150+
task = cpu_curr(prog, cpu)
151+
trace = prog.stack_trace(task)
152+
f_names = []
153+
# store the index where the keyword appears
154+
spin_lock_key_word_idx = -1
155+
156+
for idx, frame in enumerate(trace):
157+
f_name = frame_name(prog, frame).split(" ")[0]
158+
f_names.append(f_name)
159+
if f_name in wait_on_spin_lock_key_words:
160+
spin_lock_key_word_idx = idx
161+
run_time = timestamp_str(get_current_run_time(prog, cpu))
162+
pid = task.pid.value_()
163+
cmd = get_command(task)
164+
task_addr = task.value_()
165+
if "lock" in frame.locals():
166+
sp = frame["lock"]
167+
if not sp.absent_:
168+
try:
169+
sp.val.read_()
170+
sp_addr = sp.value_()
171+
is_locked = qspinlock_is_locked(sp)
172+
except FaultError:
173+
sp_addr = "Unknown"
174+
is_locked = "Unknown"
175+
pass
176+
177+
spinners[cpu] = [
178+
sp_addr,
179+
is_locked,
180+
task_addr,
181+
pid,
182+
run_time,
183+
cmd,
184+
]
185+
186+
# the caller function should be the first function after the frame containing keyword
187+
# that does not contain _spin_lock substring (might exist corner cases where the caller indeed contains such substring?)
188+
if spin_lock_key_word_idx > -1:
189+
for i, f_name in enumerate(f_names[spin_lock_key_word_idx + 1 :]):
190+
if "_spin_lock" not in f_name:
191+
sp_id = get_spinlock_container_type_and_field_name(f_name)
192+
if sp_id:
193+
sp_ids[sp_id].append(cpu)
194+
break
195+
return spinners, sp_ids
196+
197+
198+
def get_spinlock_name(funcname: str):
199+
"""
200+
Try to look for a spinlock in a function definition
201+
202+
:param funcname: bool
203+
"""
204+
skip_list = ["raw_spin_rq_lock_nested"]
205+
if funcname in skip_list:
206+
return None
207+
208+
output = query_cscope(
209+
[2],
210+
funcname,
211+
keyword=r"spin_lock\(|spin_lock_irq|spin_lock_irqsave|spin_lock_bh",
212+
)
213+
# line of code that invokes spin_lock(), spin_lock_irqsave(),..
214+
spinlock_line = ""
215+
if output:
216+
match = re.search(r"\s{1}(\d+)\s{1}(.*)", output)
217+
if match:
218+
spinlock_line = match.group(2)
219+
220+
spinlock_name = ""
221+
if spinlock_line:
222+
match = re.search(r"\((.*?)\)", spinlock_line)
223+
if match:
224+
spinlock_name = match.group(1).split(",")[0].lstrip("&")
225+
226+
return spinlock_name
227+
228+
229+
def get_spinlock_container_type_and_field_name(funcname: str):
230+
"""
231+
Get the struct type that contains this spinlock and its spinlock field name
232+
233+
:param funcname: str
234+
"""
235+
# get the spinlock name first
236+
spinlock_name = get_spinlock_name(funcname)
237+
if not spinlock_name:
238+
return None
239+
240+
# get the container instance first
241+
spinlock_container_instance = None
242+
spinlock_field = ""
243+
if "->" in spinlock_name:
244+
spinlock_container_instance, spinlock_field = (
245+
spinlock_name.split("->")[0],
246+
spinlock_name.split("->")[1],
247+
)
248+
elif "." in spinlock_name:
249+
spinlock_container_instance, spinlock_field = (
250+
spinlock_name.split(".")[0],
251+
spinlock_name.split(".")[1],
252+
)
253+
else:
254+
return None
255+
256+
# then get the struct type of the instance
257+
# there could be multiple matches, and we are looking for "struct A a" pattern to get A
258+
outputs = query_cscope([0, 1], spinlock_container_instance).split("\n")
259+
for output in outputs:
260+
output = output.strip("{;").strip()
261+
match = re.search(r"\s{1}(\d+)\s{1}(.*)", output)
262+
if match:
263+
match = re.search(r"struct\s{1}(.*)", match.group(2))
264+
if match:
265+
candidate = match.group(1).split(" ")
266+
if len(candidate) > 1:
267+
return candidate[0], spinlock_field
268+
269+
return None
270+
271+
272+
def scan_bt_for_owners(prog: Program):
273+
"""
274+
Scan spinlocks owners on bt and dump their info.
275+
276+
:param prog: drgn program
277+
"""
278+
spinners, sp_ids = scan_bt_for_spinners(prog)
279+
280+
# number of spinlocks
281+
nr_locks = len(set([v[0] for v in spinners.values()]))
282+
print(f"There are {nr_locks} spinlock(s) detected.")
283+
nr_lock_owners_found = 0
284+
for cpu in for_each_online_cpu(prog):
285+
if nr_lock_owners_found == nr_locks:
286+
break
287+
288+
task = cpu_curr(prog, cpu)
289+
trace = prog.stack_trace(task)
290+
291+
for frame in trace:
292+
f_name = frame_name(prog, frame).split(" ")[0]
293+
sp_id = get_spinlock_container_type_and_field_name(f_name)
294+
if sp_id and sp_id in sp_ids:
295+
# it is very unlikely for a cpu to hold and spin on the same lock
296+
if cpu in sp_ids[sp_id]:
297+
continue
298+
299+
nr_lock_owners_found += 1
300+
print(
301+
f"{nr_lock_owners_found}/{nr_locks} of lock owner(s) found!"
302+
)
303+
print(f"{frame}({f_name}) is a spinlock owner: ")
304+
305+
tbl = FixedTable(
306+
[
307+
"CPU:>",
308+
"TASK:>x",
309+
"PID:>",
310+
"CURRENT HOLDTIME:>",
311+
"COMMAND:>",
312+
]
313+
)
314+
315+
hold_time = timestamp_str(get_current_run_time(prog, cpu))
316+
pid = task.pid.value_()
317+
cmd = get_command(task)
318+
task_addr = task.value_()
319+
tbl.row(cpu, task_addr, pid, hold_time, cmd)
320+
tbl.write()
321+
322+
print("It has below spinners: ")
323+
spinner_cpus = sp_ids[sp_id]
324+
tbl = FixedTable(
325+
[
326+
"CPU:>",
327+
"SPINLOCK:>x",
328+
"TASK:>x",
329+
"PID:>",
330+
"CURRENT SPINTIME:>",
331+
"COMMAND:>",
332+
]
333+
)
334+
for sp_cpu in spinner_cpus:
335+
tbl.row(
336+
sp_cpu,
337+
spinners[sp_cpu][0],
338+
spinners[sp_cpu][2],
339+
spinners[sp_cpu][3],
340+
spinners[sp_cpu][4],
341+
spinners[sp_cpu][5],
342+
)
343+
tbl.write()
344+
345+
346+
class Spinlock(CorelensModule):
347+
"""
348+
Print out spinlock owners and spinners.
349+
"""
350+
351+
name = "spinlock"
352+
353+
def run(self, prog: Program, args: argparse.Namespace) -> None:
354+
if not UEK_CODE_DIR or not os.path.isdir(UEK_CODE_DIR):
355+
raise Exception(
356+
"UEK source code not found. Please set UEK_CODE_DIR correctly."
357+
)
358+
if not shutil.which("cscope"):
359+
raise Exception("cscope not installed or not in PATH.")
360+
scan_bt_for_owners(prog)

tests/test_spinlock.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Copyright (c) 2025, Oracle and/or its affiliates.
2+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
3+
from drgn_tools import spinlock
4+
5+
6+
# test for qspinlock
7+
def test_scan_bt_for_spinlocks(prog):
8+
spinlock.scan_bt_for_owners(prog)

0 commit comments

Comments
 (0)