Skip to content

Commit c424dde

Browse files
brenns10imran-kn
andcommitted
lock: add fallback mechanism for CTF or DWARF absent objects
For mutexes, locks, and semaphores, we are able to reliably test whether any given pointer actually corresponds to the correct lock. We simply pretend that it is valid, and check to see whether the current task is present on the list of waiters. Imran initially implemented the is_task_blocked_on_lock() function with this great idea. We don't know where lock pointer will actually be on the stack: that's what DWARF would tell us. We could hard-code the stack offsets we've observed in the past, but this is not very maintainable. It takes a lot of code to implement and it takes a lot of time & resources to check every kernel. Plus, we would need to check each new kernel as they are released. The alternative, as suggested by Junxiao, is much simpler: just check every stack offset from the top of the stack to the mutex/sem lock function. We can eliminate many addresses by the fact that they may not be kernel memory addresses. For the remaining addresses, so long as we are careful (validating that we are really following a linked list, and not going into a loop), there should be no problem testing them to see if they are really locks. This commit implements the approach. Signed-off-by: Stephen Brennan <[email protected]> Co-authored-by: Imran Khan <[email protected]> Suggested-by: Junxiao Bi <[email protected]>
1 parent 4c6b1f1 commit c424dde

File tree

2 files changed

+182
-63
lines changed

2 files changed

+182
-63
lines changed

drgn_tools/lock.py

Lines changed: 40 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
from typing import Set
3333
from typing import Tuple
3434

35-
import drgn
3635
from drgn import Object
3736
from drgn import Program
3837
from drgn import StackFrame
@@ -47,6 +46,7 @@
4746
from drgn_tools.locking import _RWSEM_READER_SHIFT
4847
from drgn_tools.locking import for_each_mutex_waiter
4948
from drgn_tools.locking import for_each_rwsem_waiter
49+
from drgn_tools.locking import get_lock_from_frame
5050
from drgn_tools.locking import get_rwsem_owner
5151
from drgn_tools.locking import get_rwsem_spinners_info
5252
from drgn_tools.locking import mutex_owner
@@ -121,32 +121,31 @@ def scan_mutex_lock(
121121
if pid is not None:
122122
wtask = find_task(prog, pid)
123123

124-
frame_list = bt_has_any(prog, ["__mutex_lock"])
124+
frame_list = bt_has_any(
125+
prog,
126+
[
127+
"__mutex_lock",
128+
"__mutex_lock_interruptible_slowpath",
129+
"__mutex_lock_slowpath",
130+
"__mutex_lock_killable_slowpath",
131+
],
132+
one_per_task=True,
133+
)
125134
if not frame_list:
126135
return
127136

128137
seen_mutexes: Set[int] = set()
129138

130-
warned_absent = False
131139
for task, frame in frame_list:
132-
try:
133-
mutex = frame["lock"]
134-
mutex_addr = mutex.value_()
135-
except drgn.ObjectAbsentError:
136-
if not warned_absent:
137-
print(
138-
"warning: failed to get mutex from stack frame"
139-
"- information is incomplete"
140-
)
141-
warned_absent = True
140+
mutex = get_lock_from_frame(prog, task, frame, "mutex", "lock")
141+
if not mutex:
142142
continue
143-
144-
struct_owner = mutex_owner(prog, mutex)
145-
143+
mutex_addr = mutex.value_()
146144
if mutex_addr in seen_mutexes:
147145
continue
148146
seen_mutexes.add(mutex_addr)
149147

148+
struct_owner = mutex_owner(prog, mutex)
150149
index = 0
151150
print(f"Mutex: 0x{mutex_addr:x}")
152151
print(
@@ -155,10 +154,9 @@ def scan_mutex_lock(
155154
"PID :",
156155
struct_owner.pid.value_(),
157156
)
158-
print("")
159157
if stack:
160-
bt(struct_owner.pid)
161-
print("")
158+
bt(struct_owner)
159+
print("")
162160

163161
print(
164162
"Mutex WAITERS (Index, cpu, comm, pid, state, wait time (d hr:min:sec:ms)):"
@@ -184,31 +182,23 @@ def scan_mutex_lock(
184182
def show_sem_lock(
185183
prog: Program,
186184
frame_list,
187-
seen_sems,
188185
stack: bool,
189186
time: Optional[int] = None,
190187
pid: Optional[int] = None,
191188
) -> None:
192189
"""Show semaphore details"""
193-
warned_absent = False
194190
wtask = None
195191

196192
if pid is not None:
197193
wtask = find_task(prog, pid)
198194

195+
seen_sems: Set[int] = set()
196+
199197
for task, frame in frame_list:
200-
try:
201-
sem = frame["sem"]
202-
semaddr = sem.value_()
203-
except drgn.ObjectAbsentError:
204-
if not warned_absent:
205-
print(
206-
"warning: failed to get semaphore from stack frame"
207-
"- information is incomplete"
208-
)
209-
warned_absent = True
198+
sem = get_lock_from_frame(prog, task, frame, "semaphore", "sem")
199+
if not sem:
210200
continue
211-
201+
semaddr = sem.value_()
212202
if semaddr in seen_sems:
213203
continue
214204
seen_sems.add(semaddr)
@@ -239,31 +229,23 @@ def show_sem_lock(
239229
def show_rwsem_lock(
240230
prog: Program,
241231
frame_list: List[Tuple[Object, StackFrame]],
242-
seen_rwsems: Set[int],
243232
stack: bool,
244233
time: Optional[int] = None,
245234
pid: Optional[int] = None,
246235
) -> None:
247236
"""Show rw_semaphore details"""
248-
warned_absent = False
249237
wtask = None
250238

251239
if pid is not None:
252240
wtask = find_task(prog, pid)
253241

242+
seen_rwsems: Set[int] = set()
243+
254244
for task, frame in frame_list:
255-
try:
256-
rwsem = frame["sem"]
257-
rwsemaddr = rwsem.value_()
258-
except drgn.ObjectAbsentError:
259-
if not warned_absent:
260-
print(
261-
"warning: failed to get rwsemaphore from stack frame"
262-
"- information is incomplete"
263-
)
264-
warned_absent = True
245+
rwsem = get_lock_from_frame(prog, task, frame, "rw_semaphore", "sem")
246+
if not rwsem:
265247
continue
266-
248+
rwsemaddr = rwsem.value_()
267249
if rwsemaddr in seen_rwsems:
268250
continue
269251
seen_rwsems.add(rwsemaddr)
@@ -282,10 +264,9 @@ def show_rwsem_lock(
282264
print(
283265
f"Writer owner ({owner_task.type_.type_name()})0x{owner_task.value_():x}: (pid){owner_task.pid.value_()}"
284266
)
285-
print("")
286267
if stack:
287-
bt(owner_task.pid)
288-
print("")
268+
bt(owner_task)
269+
print("")
289270
elif owner_type == RwsemStateCode.READER_OWNED:
290271
# For reader owned rwsems, we can get number of readers in newer kernels( >= v5.3.1).
291272
# So try to retrieve that info.
@@ -332,17 +313,16 @@ def scan_sem_lock(
332313
if pid is not None:
333314
wtask = find_task(prog, pid)
334315

335-
seen_sems: Set[int] = set()
336316
functions = [
337317
"__down",
338318
"__down_common",
339319
"__down_interruptible",
340320
"__down_killable",
341321
"__down_timeout",
342322
]
343-
frame_list = bt_has_any(prog, functions, wtask)
323+
frame_list = bt_has_any(prog, functions, wtask, one_per_task=True)
344324
if frame_list:
345-
show_sem_lock(prog, frame_list, seen_sems, stack, time, pid)
325+
show_sem_lock(prog, frame_list, stack, time, pid)
346326

347327

348328
def scan_rwsem_lock(
@@ -356,16 +336,19 @@ def scan_rwsem_lock(
356336
if pid is not None:
357337
wtask = find_task(prog, pid)
358338

359-
seen_rwsems: Set[int] = set()
360339
functions = [
361340
"__rwsem_down_write_failed_common",
362341
"__rwsem_down_read_failed_common",
342+
"rwsem_down_write_failed",
343+
"rwsem_down_write_failed_killable",
363344
"rwsem_down_write_slowpath",
345+
"rwsem_down_read_failed",
346+
"rwsem_down_read_failed_killable",
364347
"rwsem_down_read_slowpath",
365348
]
366-
frame_list = bt_has_any(prog, functions, wtask)
349+
frame_list = bt_has_any(prog, functions, wtask, one_per_task=True)
367350
if frame_list:
368-
show_rwsem_lock(prog, frame_list, seen_rwsems, stack, time, pid)
351+
show_rwsem_lock(prog, frame_list, stack, time, pid)
369352

370353

371354
def scan_lock(
@@ -375,24 +358,20 @@ def scan_lock(
375358
pid: Optional[int] = None,
376359
) -> None:
377360
"""Scan tasks for Mutex and Semaphore"""
378-
print("Scanning Mutexes...")
379-
print("")
361+
print("Scanning Mutexes...\n")
380362
scan_mutex_lock(prog, stack, time, pid)
381363

382-
print("Scanning Semaphores...")
383-
print("")
364+
print("Scanning Semaphores...\n")
384365
scan_sem_lock(prog, stack, time, pid)
385366

386-
print("Scanning RWSemaphores...")
387-
print("")
367+
print("Scanning RWSemaphores...\n")
388368
scan_rwsem_lock(prog, stack, time, pid)
389369

390370

391371
class Locking(CorelensModule):
392372
"""Display active mutex and semaphores and their waiters"""
393373

394374
name = "lock"
395-
need_dwarf = True
396375

397376
def add_args(self, parser: argparse.ArgumentParser) -> None:
398377
parser.add_argument(

0 commit comments

Comments
 (0)