Skip to content

Commit 1e3312c

Browse files
committed
Increase flexibility of all flags
Some of the flags (like `--filter`) have a weird syntax where you add exclusions by prepending a `!` before the address. Some flags let you use typenames instead of addresses. This change now explicitly adds a `--exclude` flag to exclude stuff, and allows `--sink`, `--source`, `--highlight`, and `--exclude` to all be a comma-separated list of either addresses or typenames. It also makes the performance of specifying `--source` a lot better, since it no longer needs to go through the whole graph, just the visible nodes.
1 parent 3d91470 commit 1e3312c

File tree

2 files changed

+97
-88
lines changed

2 files changed

+97
-88
lines changed

README.md

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -58,17 +58,18 @@ To get the largest potentially leaked objects among snapshots:
5858
You can add the `--show-parents` flag to identify what's holding a (transitive)
5959
reference to the potentially-leaked object, but that is very noisy. If you want
6060
to visualize that in a nice [Graphviz](https://graphviz.org/)-produced svg, you
61-
can grab the address of an object you want to focus on (say,
62-
`0x00005d4bbffe2100`), a few objects you want to exclude (because they have
63-
just too many references, `!0x7967b8ed0fe0,!0x7967eff3ad80,!0x7967e7cca000`,
64-
for example), and specify an optional type to highlight (`asyncio.Task` is
65-
useful in async contexts, since task leaks are surprising and often contribute
66-
a lot):
61+
can grab the address or typename of an object you want to focus on (say,
62+
`0x00005d4bbffe2100`), a few objects / typenames you want to exclude (because
63+
they have just too many references,
64+
`0x7967b8ed0fe0,0x7967eff3ad80,0x7967e7cca000`, for example), and specify an
65+
optional type to highlight (`asyncio.Task` is useful in async contexts, since
66+
task leaks are surprising and often contribute a lot):
6767

6868
```shell
6969
~/debug-heap$ uv run analyze_heap.py \
7070
graph \
71-
--filter='0x00005d4bbffe2100,!0x7967b8ed0fe0,!0x7967eff3ad80,!0x7967e7cca000' \
71+
--sinks='0x00005d4bbffe2100' \
72+
--exclude='0x7967b8ed0fe0,0x7967eff3ad80,0x7967e7cca000' \
7273
--highlight='asyncio.Task' \
7374
--max-depth=30 \
7475
~/heap.after.bin.gz | \

analyze_heap.py

Lines changed: 89 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,15 @@
22
import collections
33
import dataclasses
44
import copy
5+
import re
56
import gzip
67
import io
78
import logging
89

910
import msgpack
1011

12+
_HEX_RE = re.compile(r"^(:?0[xX])?[0-9a-fA-F]+$")
13+
1114

1215
@dataclasses.dataclass(order=True)
1316
class HeapObject:
@@ -151,45 +154,42 @@ def _top(args: argparse.Namespace) -> None:
151154
parser_top.add_argument("heap_dump", metavar="heap-dump")
152155
parser_top.set_defaults(func=_top)
153156

157+
def _parse_list(s: str) -> tuple[set[int], set[str]]:
158+
addresses = set[int]()
159+
typenames = set[str]()
160+
161+
if s:
162+
for x in s.split(","):
163+
if _HEX_RE.match(x):
164+
addresses.add(int(x.removeprefix("0x"), 16))
165+
else:
166+
typenames.add(x)
167+
168+
return addresses, typenames
169+
154170
def _graph(args: argparse.Namespace) -> None:
155-
queue: list[tuple[int, HeapObject]] = []
156-
seen: set[int] = set()
171+
queue: list[tuple[tuple[int, ...], HeapObject]] = []
157172
ranks = collections.defaultdict[int, list[int]](list)
158-
addresses: set[int] | None = None
159-
excluded_addresses: set[int] | None = None
160-
if "0x" in args.filter:
161-
filter_exprs = args.filter.split(",")
162-
excluded_addresses = set(
163-
int(x.strip("!"), 16) for x in filter_exprs if x.startswith("!")
164-
)
165-
addresses = set(int(x, 16) for x in filter_exprs if not x.startswith("!"))
166-
highlighted_edges: set[tuple[int, int]] = set()
167-
source_addresses: set[int] = (
168-
set(int(x.removeprefix("0x"), 16) for x in args.sources.split(","))
169-
if args.sources
170-
else set()
171-
)
172-
if source_addresses and addresses:
173-
logging.info("computing paths from %r to %r", source_addresses, addresses)
174-
source_seen: set[int] = set()
175-
source_queue = [(x, [x]) for x in source_addresses]
176-
while source_queue:
177-
addr, addr_list = source_queue.pop(0)
178-
if addr in source_seen:
179-
continue
180-
source_seen.add(addr)
181-
if addr in addresses:
182-
# We found a path! Add every pair of addresses into the highlighted edges.
183-
logging.info("found path: %r", addr_list)
184-
for i in range(len(addr_list) - 1):
185-
highlighted_edges.add((addr_list[i], addr_list[i + 1]))
186-
obj = live_objects.get(addr, None)
187-
if obj is None:
188-
continue
189-
for next_addr in obj.referents:
190-
if next_addr in source_seen:
191-
continue
192-
source_queue.append((next_addr, addr_list + [next_addr]))
173+
174+
sink_addresses, sink_typenames = _parse_list(args.sinks)
175+
source_addresses, source_typenames = _parse_list(args.sources)
176+
exclude_addresses, exclude_typenames = _parse_list(args.exclude)
177+
highlight_addresses, highlight_typenames = _parse_list(args.highlight)
178+
179+
# Censored prefixes work differently.
180+
censor_prefixes = args.censor.split(",") if args.censor else None
181+
182+
for obj in live_objects.values():
183+
if obj.typename in sink_typenames:
184+
sink_addresses.add(obj.addr)
185+
if obj.typename in source_typenames:
186+
source_addresses.add(obj.addr)
187+
if obj.typename in highlight_typenames:
188+
highlight_addresses.add(obj.addr)
189+
190+
if obj.addr not in sink_addresses or obj.addr in exclude_addresses:
191+
continue
192+
queue.append(((obj.addr, ), obj))
193193

194194
print("digraph heap {")
195195
print(
@@ -198,27 +198,24 @@ def _graph(args: argparse.Namespace) -> None:
198198
print(" rankdir=TB;")
199199
print(" node [shape=box];")
200200
print(" edge [dir=back];")
201-
for obj in live_objects.values():
202-
if addresses is not None and excluded_addresses is not None:
203-
if obj.addr not in addresses or obj.addr in excluded_addresses:
204-
continue
205-
elif args.filter not in obj.typename:
206-
continue
207-
queue.append((0, obj))
208-
censor_list: list[str] | None = None
209-
if args.censor:
210-
censor_list = args.censor.split(",")
201+
seen: set[int] = set()
202+
highlighted_edges: set[tuple[int, int]] = set()
203+
edge_attributes: dict[tuple[int, int], list[str]] = {}
211204
while queue:
212-
depth, obj = queue.pop(0)
205+
path, obj = queue.pop(0)
213206
if obj.addr in seen:
214207
continue
215208
seen.add(obj.addr)
216-
ranks[depth].append(obj.addr)
209+
210+
if obj.addr in source_addresses:
211+
# We found a path. Add every pair of sink_addresses into the highlighted edges.
212+
for i in range(len(path) - 1):
213+
highlighted_edges.add((path[i], path[i + 1]))
217214

218215
style = ""
219-
if excluded_addresses is not None and obj.addr in excluded_addresses:
216+
if obj.addr in exclude_addresses:
220217
style = ",style=filled,fillcolor=gray"
221-
elif depth == 0:
218+
elif len(path) == 1:
222219
style = ",style=filled,fillcolor=red"
223220
elif args.highlight and args.highlight in obj.typename:
224221
style = ",style=filled,fillcolor=yellow"
@@ -228,8 +225,10 @@ def _graph(args: argparse.Namespace) -> None:
228225
payload = f"\\n{obj.payload}"
229226
else:
230227
payload = f"\\n{obj.payload[:31]}…"
231-
if censor_list:
232-
if any(obj.typename.startswith(censored) for censored in censor_list):
228+
if censor_prefixes:
229+
if any(
230+
obj.typename.startswith(censored) for censored in censor_prefixes
231+
):
233232
print(
234233
f' x{obj.addr:x} [label="0x{obj.addr:x}\\n[omitted]\\n{obj.size}"{style}];'
235234
)
@@ -241,12 +240,12 @@ def _graph(args: argparse.Namespace) -> None:
241240
print(
242241
f' x{obj.addr:x} [label="0x{obj.addr:x}\\n{obj.typename}\\n{obj.size}{payload}"{style}];'
243242
)
244-
if excluded_addresses is not None and obj.addr in excluded_addresses:
243+
if obj.addr in exclude_addresses:
245244
continue
246245

247246
if not obj.referrers:
248247
continue
249-
if depth >= args.max_depth:
248+
if len(path) > args.max_depth:
250249
print(
251250
f' x{obj.addr:x}_parents [label="...",shape=circle,style=filled,fillcolor=gray];'
252251
)
@@ -258,27 +257,31 @@ def _graph(args: argparse.Namespace) -> None:
258257
)
259258
print(f" x{obj.addr:x} -> x{obj.addr:x}_parents [style=dotted];")
260259
continue
261-
if obj.typename not in {
260+
if obj.typename in {
262261
"builtins.function",
263262
}:
264-
for addr in obj.referrers:
265-
referrer_obj = all_objects[addr]
266-
attributes: list[str] = []
267-
if addr in seen:
268-
attributes.append("style=dashed")
269-
elif len(referrer_obj.referents) == 1:
270-
# Marking any single references with bold arrows.
271-
attributes.append("style=bold")
272-
if (obj.addr, addr) in highlighted_edges or (
273-
addr,
274-
obj.addr,
275-
) in highlighted_edges:
276-
attributes.append("color=red")
277-
style = ""
278-
if attributes:
279-
style = f" [{' '.join(attributes)}]"
280-
print(f" x{obj.addr:x} -> x{addr:x}{style};")
281-
queue.append((depth + 1, referrer_obj))
263+
continue
264+
for addr in obj.referrers:
265+
referrer_obj = all_objects[addr]
266+
attributes: list[str] = []
267+
if addr in seen:
268+
attributes.append("style=dashed")
269+
elif len(referrer_obj.referents) == 1:
270+
# Marking any single references with bold arrows.
271+
attributes.append("style=bold")
272+
edge_attributes[(obj.addr, addr)] = attributes
273+
queue.append((path + (addr,), referrer_obj))
274+
275+
# Now that we know what edges need highlighting, we can render them all.
276+
for (a, b), attributes in edge_attributes.items():
277+
style = ""
278+
if (a, b) in highlighted_edges or (b, a) in highlighted_edges:
279+
attributes.append("color=red")
280+
if attributes:
281+
style = f" [{' '.join(attributes)}]"
282+
print(f" x{a:x} -> x{b:x}{style};")
283+
284+
# Finally render all the ranks.
282285
for rank in range(args.max_depth + 1):
283286
if rank not in ranks:
284287
break
@@ -310,11 +313,21 @@ def _graph(args: argparse.Namespace) -> None:
310313
)
311314
parser_graph.add_argument("heap_dump", metavar="heap-dump")
312315
parser_graph.add_argument(
313-
"--filter",
316+
"--sinks",
314317
type=str,
315-
help="Filter entries by typename or address",
318+
help="Comma-separated list of addresses or typenames to trace ownership of.",
316319
required=True,
317320
)
321+
parser_graph.add_argument(
322+
"--exclude",
323+
type=str,
324+
help="Comma-separated list of addresses or typenames to exclude from the graph.",
325+
)
326+
parser_graph.add_argument(
327+
"--sources",
328+
type=str,
329+
help="Comma-separated list of addresses or typenames to highlight paths to the sinks.",
330+
)
318331
parser_graph.add_argument(
319332
"--highlight",
320333
type=str,
@@ -325,11 +338,6 @@ def _graph(args: argparse.Namespace) -> None:
325338
type=str,
326339
help="Censor nodes that match a comma-separated list of prefixes of a typename",
327340
)
328-
parser_graph.add_argument(
329-
"--sources",
330-
type=str,
331-
help="Comma-separated list of addresses to trace to the target",
332-
)
333341
parser_graph.set_defaults(func=_graph)
334342
args = parser.parse_args()
335343

0 commit comments

Comments
 (0)