Skip to content

Commit 58b3ec7

Browse files
committed
Add thread roots
We are seeing a few mysterious cases where there is a strongly-connected component that's just floating around. This change now adds the code location to `builtins.frame`s and also grabs all the frames of all the threads to see what the locals are.
1 parent c30cf05 commit 58b3ec7

File tree

2 files changed

+54
-16
lines changed

2 files changed

+54
-16
lines changed

analyze_heap.py

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ class HeapObject:
2626
addr: int
2727
typename: str
2828
root: bool
29+
thread_root: bool
2930
children_size_exclusive: int = 0
3031
# Pointers from this object to other objects.
3132
referents: set[int] = dataclasses.field(default_factory=set)
@@ -61,6 +62,7 @@ def _scanheap(filename: str, populate_referrers: bool = True) -> dict[int, HeapO
6162
typename=typenames[record["objtype_addr"]],
6263
size=record["size"],
6364
root=record["root"],
65+
thread_root=record.get("thread_root", False),
6466
payload=payload,
6567
)
6668
elif record["t"] == "referents":
@@ -109,7 +111,11 @@ def _top(args: argparse.Namespace) -> None:
109111
print(
110112
f"- {obj.size:8d} {obj.addr:016x} {obj.typename} "
111113
+ (repr(obj.payload) if obj.payload is not None else "")
112-
+ (" (root)" if obj.root else ""),
114+
+ (
115+
" (root)"
116+
if obj.root
117+
else (" (thread root)" if obj.thread_root else "")
118+
),
113119
)
114120
if args.show_parents:
115121
seen = set()
@@ -126,7 +132,11 @@ def _top(args: argparse.Namespace) -> None:
126132
else ""
127133
)
128134
+ (" (cycle)" if addr in seen else "")
129-
+ (" (root)" if child_obj.root else ""),
135+
+ (
136+
" (root)"
137+
if child_obj.root
138+
else (" (thread root)" if child_obj.thread_root else "")
139+
),
130140
)
131141
if addr in seen:
132142
continue
@@ -235,35 +245,40 @@ def _render_graph(
235245

236246
style = ""
237247
if obj.addr in exclude_addresses:
238-
style = ",style=filled,fillcolor=gray"
248+
style += ",style=filled,fillcolor=gray"
239249
elif len(path) == 1:
240-
style = ",style=filled,fillcolor=red"
250+
style += ",style=filled,fillcolor=red"
241251
elif highlight and highlight in obj.typename:
242-
style = ",style=filled,fillcolor=yellow"
252+
style += ",style=filled,fillcolor=yellow"
243253
payload = ""
244254
if obj.payload is not None:
245255
if len(payload) <= 32:
246256
payload = f"\\n{obj.payload}"
247257
else:
248258
payload = f"\\n{obj.payload[:31]}…"
259+
label = f"0x{obj.addr:x}"
260+
if obj.root:
261+
label += " (gcroot)"
262+
elif obj.thread_root:
263+
label += " (thread root)"
249264
if censor_prefixes:
250265
if any(
251266
obj.typename.startswith(censored) for censored in censor_prefixes
252267
):
253268
output.write(
254-
f' x{obj.addr:x} [label="0x{obj.addr:x}\\n[omitted]\\n{obj.size}"{style}];\n'.encode(
269+
f' x{obj.addr:x} [label="{label}\\n[omitted]\\n{obj.size}"{style}];\n'.encode(
255270
"utf-8"
256271
)
257272
)
258273
else:
259274
output.write(
260-
f' x{obj.addr:x} [label="0x{obj.addr:x}\\n{obj.typename}\\n{obj.size}"{style}];\n'.encode(
275+
f' x{obj.addr:x} [label="{label}\\n{obj.typename}\\n{obj.size}"{style}];\n'.encode(
261276
"utf-8"
262277
)
263278
)
264279
else:
265280
output.write(
266-
f' x{obj.addr:x} [label="0x{obj.addr:x}\\n{obj.typename}\\n{obj.size}{payload}"{style}];\n'.encode(
281+
f' x{obj.addr:x} [label="{label}\\n{obj.typename}\\n{obj.size}{payload}"{style}];\n'.encode(
267282
"utf-8"
268283
)
269284
)

src/payloads/dump_heap.py

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ def __payload_entrypoint(output_path: str) -> None:
66
import gc
77
import struct
88
import sys
9+
import threading
910
from typing import Any
1011

1112
DEFAULT_MAX_PAYLOAD_SIZE = 128
@@ -110,19 +111,28 @@ def _pack_type(objtype_addr: int, typename: bytes) -> bytes:
110111
)
111112

112113
def _pack_object(
113-
*, root: bool, addr: int, objtype_addr: int, size: int, payload: bytes | None
114+
*,
115+
root: bool,
116+
thread_root: bool,
117+
addr: int,
118+
objtype_addr: int,
119+
size: int,
120+
payload: bytes | None,
114121
) -> bytes:
115122
if payload:
116123
return b"".join(
117124
(
118-
_pack_map(6),
125+
_pack_map(7),
119126
#
120127
_pack_str(b"t"),
121128
_pack_str(b"object"),
122129
#
123130
_pack_str(b"root"),
124131
_pack_bool(root),
125132
#
133+
_pack_str(b"thread_root"),
134+
_pack_bool(thread_root),
135+
#
126136
_pack_str(b"objtype_addr"),
127137
_pack_int(objtype_addr),
128138
#
@@ -139,14 +149,17 @@ def _pack_object(
139149
else:
140150
return b"".join(
141151
(
142-
_pack_map(5),
152+
_pack_map(6),
143153
#
144154
_pack_str(b"t"),
145155
_pack_str(b"object"),
146156
#
147157
_pack_str(b"root"),
148158
_pack_bool(root),
149159
#
160+
_pack_str(b"thread_root"),
161+
_pack_bool(thread_root),
162+
#
150163
_pack_str(b"objtype_addr"),
151164
_pack_int(objtype_addr),
152165
#
@@ -232,6 +245,8 @@ def _get_payload(obj: Any) -> bytes | None:
232245
if source_traceback
233246
else repr(obj)
234247
)
248+
elif inspect.isframe(obj):
249+
payload_str = f"{obj.f_code.co_qualname} at {obj.f_code.co_filename}:{obj.f_lineno}"
235250
elif inspect.ismodule(obj):
236251
payload_str = obj.__name__
237252
elif inspect.isclass(obj):
@@ -256,9 +271,16 @@ def _get_payload(obj: Any) -> bytes | None:
256271

257272
try:
258273
with open(output_path, "wb") as output_file:
259-
queue: list[tuple[Any, int, bool]] = [
260-
(o, id(o), True) for o in gc.get_objects()
274+
# Add all the gc-known objects into the heap dump.
275+
queue: list[tuple[Any, int, bool, bool]] = [
276+
(o, id(o), True, False) for o in gc.get_objects()
261277
]
278+
# Now also add all the thread-local frames, excluding the one from this method.
279+
current_thread_id = threading.current_thread().ident
280+
for thread_id, frame in sys._current_frames().items():
281+
if current_thread_id == thread_id:
282+
continue
283+
queue.append((frame, id(frame), False, True))
262284
x = 0
263285
totalsize = 0
264286

@@ -272,7 +294,7 @@ def _get_payload(obj: Any) -> bytes | None:
272294
)
273295
x += 1
274296

275-
obj, addr, root = queue.pop()
297+
obj, addr, gcroot, thread_root = queue.pop()
276298
if addr in ignored_addrs:
277299
# We don't want to track the objects we own.
278300
continue
@@ -299,7 +321,8 @@ def _get_payload(obj: Any) -> bytes | None:
299321
totalsize += size
300322
output_file.write(
301323
_pack_object(
302-
root=root,
324+
root=gcroot,
325+
thread_root=thread_root,
303326
addr=addr,
304327
objtype_addr=objtype_addr,
305328
size=size,
@@ -312,7 +335,7 @@ def _get_payload(obj: Any) -> bytes | None:
312335
for child_obj in referents:
313336
child_addr = id(child_obj)
314337
child_addrs.append(child_addr)
315-
queue.append((child_obj, child_addr, False))
338+
queue.append((child_obj, child_addr, False, False))
316339
if child_addrs:
317340
output_file.write(
318341
_pack_referents(addr=addr, child_addrs=child_addrs)

0 commit comments

Comments
 (0)