22import collections
33import dataclasses
44import copy
5+ import re
56import gzip
67import io
78import logging
89
910import msgpack
1011
12+ _HEX_RE = re .compile (r"^(:?0[xX])?[0-9a-fA-F]+$" )
13+
1114
1215@dataclasses .dataclass (order = True )
1316class HeapObject :
@@ -151,45 +154,42 @@ def _top(args: argparse.Namespace) -> None:
151154 parser_top .add_argument ("heap_dump" , metavar = "heap-dump" )
152155 parser_top .set_defaults (func = _top )
153156
157+ def _parse_list (s : str ) -> tuple [set [int ], set [str ]]:
158+ addresses = set [int ]()
159+ typenames = set [str ]()
160+
161+ if s :
162+ for x in s .split ("," ):
163+ if _HEX_RE .match (x ):
164+ addresses .add (int (x .removeprefix ("0x" ), 16 ))
165+ else :
166+ typenames .add (x )
167+
168+ return addresses , typenames
169+
154170 def _graph (args : argparse .Namespace ) -> None :
155- queue : list [tuple [int , HeapObject ]] = []
156- seen : set [int ] = set ()
171+ queue : list [tuple [tuple [int , ...], HeapObject ]] = []
157172 ranks = collections .defaultdict [int , list [int ]](list )
158- addresses : set [int ] | None = None
159- excluded_addresses : set [int ] | None = None
160- if "0x" in args .filter :
161- filter_exprs = args .filter .split ("," )
162- excluded_addresses = set (
163- int (x .strip ("!" ), 16 ) for x in filter_exprs if x .startswith ("!" )
164- )
165- addresses = set (int (x , 16 ) for x in filter_exprs if not x .startswith ("!" ))
166- highlighted_edges : set [tuple [int , int ]] = set ()
167- source_addresses : set [int ] = (
168- set (int (x .removeprefix ("0x" ), 16 ) for x in args .sources .split ("," ))
169- if args .sources
170- else set ()
171- )
172- if source_addresses and addresses :
173- logging .info ("computing paths from %r to %r" , source_addresses , addresses )
174- source_seen : set [int ] = set ()
175- source_queue = [(x , [x ]) for x in source_addresses ]
176- while source_queue :
177- addr , addr_list = source_queue .pop (0 )
178- if addr in source_seen :
179- continue
180- source_seen .add (addr )
181- if addr in addresses :
182- # We found a path! Add every pair of addresses into the highlighted edges.
183- logging .info ("found path: %r" , addr_list )
184- for i in range (len (addr_list ) - 1 ):
185- highlighted_edges .add ((addr_list [i ], addr_list [i + 1 ]))
186- obj = live_objects .get (addr , None )
187- if obj is None :
188- continue
189- for next_addr in obj .referents :
190- if next_addr in source_seen :
191- continue
192- source_queue .append ((next_addr , addr_list + [next_addr ]))
173+
174+ sink_addresses , sink_typenames = _parse_list (args .sinks )
175+ source_addresses , source_typenames = _parse_list (args .sources )
176+ exclude_addresses , exclude_typenames = _parse_list (args .exclude )
177+ highlight_addresses , highlight_typenames = _parse_list (args .highlight )
178+
179+ # Censored prefixes work differently.
180+ censor_prefixes = args .censor .split ("," ) if args .censor else None
181+
182+ for obj in live_objects .values ():
183+ if obj .typename in sink_typenames :
184+ sink_addresses .add (obj .addr )
185+ if obj .typename in source_typenames :
186+ source_addresses .add (obj .addr )
187+ if obj .typename in highlight_typenames :
188+ highlight_addresses .add (obj .addr )
189+
190+ if obj .addr not in sink_addresses or obj .addr in exclude_addresses :
191+ continue
192+ queue .append (((obj .addr , ), obj ))
193193
194194 print ("digraph heap {" )
195195 print (
@@ -198,27 +198,24 @@ def _graph(args: argparse.Namespace) -> None:
198198 print (" rankdir=TB;" )
199199 print (" node [shape=box];" )
200200 print (" edge [dir=back];" )
201- for obj in live_objects .values ():
202- if addresses is not None and excluded_addresses is not None :
203- if obj .addr not in addresses or obj .addr in excluded_addresses :
204- continue
205- elif args .filter not in obj .typename :
206- continue
207- queue .append ((0 , obj ))
208- censor_list : list [str ] | None = None
209- if args .censor :
210- censor_list = args .censor .split ("," )
201+ seen : set [int ] = set ()
202+ highlighted_edges : set [tuple [int , int ]] = set ()
203+ edge_attributes : dict [tuple [int , int ], list [str ]] = {}
211204 while queue :
212- depth , obj = queue .pop (0 )
205+ path , obj = queue .pop (0 )
213206 if obj .addr in seen :
214207 continue
215208 seen .add (obj .addr )
216- ranks [depth ].append (obj .addr )
209+
210+ if obj .addr in source_addresses :
211+ # We found a path. Add every pair of sink_addresses into the highlighted edges.
212+ for i in range (len (path ) - 1 ):
213+ highlighted_edges .add ((path [i ], path [i + 1 ]))
217214
218215 style = ""
219- if excluded_addresses is not None and obj .addr in excluded_addresses :
216+ if obj .addr in exclude_addresses :
220217 style = ",style=filled,fillcolor=gray"
221- elif depth == 0 :
218+ elif len ( path ) == 1 :
222219 style = ",style=filled,fillcolor=red"
223220 elif args .highlight and args .highlight in obj .typename :
224221 style = ",style=filled,fillcolor=yellow"
@@ -228,8 +225,10 @@ def _graph(args: argparse.Namespace) -> None:
228225 payload = f"\\ n{ obj .payload } "
229226 else :
230227 payload = f"\\ n{ obj .payload [:31 ]} …"
231- if censor_list :
232- if any (obj .typename .startswith (censored ) for censored in censor_list ):
228+ if censor_prefixes :
229+ if any (
230+ obj .typename .startswith (censored ) for censored in censor_prefixes
231+ ):
233232 print (
234233 f' x{ obj .addr :x} [label="0x{ obj .addr :x} \\ n[omitted]\\ n{ obj .size } "{ style } ];'
235234 )
@@ -241,12 +240,12 @@ def _graph(args: argparse.Namespace) -> None:
241240 print (
242241 f' x{ obj .addr :x} [label="0x{ obj .addr :x} \\ n{ obj .typename } \\ n{ obj .size } { payload } "{ style } ];'
243242 )
244- if excluded_addresses is not None and obj .addr in excluded_addresses :
243+ if obj .addr in exclude_addresses :
245244 continue
246245
247246 if not obj .referrers :
248247 continue
249- if depth >= args .max_depth :
248+ if len ( path ) > args .max_depth :
250249 print (
251250 f' x{ obj .addr :x} _parents [label="...",shape=circle,style=filled,fillcolor=gray];'
252251 )
@@ -258,27 +257,31 @@ def _graph(args: argparse.Namespace) -> None:
258257 )
259258 print (f" x{ obj .addr :x} -> x{ obj .addr :x} _parents [style=dotted];" )
260259 continue
261- if obj .typename not in {
260+ if obj .typename in {
262261 "builtins.function" ,
263262 }:
264- for addr in obj .referrers :
265- referrer_obj = all_objects [addr ]
266- attributes : list [str ] = []
267- if addr in seen :
268- attributes .append ("style=dashed" )
269- elif len (referrer_obj .referents ) == 1 :
270- # Marking any single references with bold arrows.
271- attributes .append ("style=bold" )
272- if (obj .addr , addr ) in highlighted_edges or (
273- addr ,
274- obj .addr ,
275- ) in highlighted_edges :
276- attributes .append ("color=red" )
277- style = ""
278- if attributes :
279- style = f" [{ ' ' .join (attributes )} ]"
280- print (f" x{ obj .addr :x} -> x{ addr :x} { style } ;" )
281- queue .append ((depth + 1 , referrer_obj ))
263+ continue
264+ for addr in obj .referrers :
265+ referrer_obj = all_objects [addr ]
266+ attributes : list [str ] = []
267+ if addr in seen :
268+ attributes .append ("style=dashed" )
269+ elif len (referrer_obj .referents ) == 1 :
270+ # Marking any single references with bold arrows.
271+ attributes .append ("style=bold" )
272+ edge_attributes [(obj .addr , addr )] = attributes
273+ queue .append ((path + (addr ,), referrer_obj ))
274+
275+ # Now that we know what edges need highlighting, we can render them all.
276+ for (a , b ), attributes in edge_attributes .items ():
277+ style = ""
278+ if (a , b ) in highlighted_edges or (b , a ) in highlighted_edges :
279+ attributes .append ("color=red" )
280+ if attributes :
281+ style = f" [{ ' ' .join (attributes )} ]"
282+ print (f" x{ a :x} -> x{ b :x} { style } ;" )
283+
284+ # Finally render all the ranks.
282285 for rank in range (args .max_depth + 1 ):
283286 if rank not in ranks :
284287 break
@@ -310,11 +313,21 @@ def _graph(args: argparse.Namespace) -> None:
310313 )
311314 parser_graph .add_argument ("heap_dump" , metavar = "heap-dump" )
312315 parser_graph .add_argument (
313- "--filter " ,
316+ "--sinks " ,
314317 type = str ,
315- help = "Filter entries by typename or address " ,
318+ help = "Comma-separated list of addresses or typenames to trace ownership of. " ,
316319 required = True ,
317320 )
321+ parser_graph .add_argument (
322+ "--exclude" ,
323+ type = str ,
324+ help = "Comma-separated list of addresses or typenames to exclude from the graph." ,
325+ )
326+ parser_graph .add_argument (
327+ "--sources" ,
328+ type = str ,
329+ help = "Comma-separated list of addresses or typenames to highlight paths to the sinks." ,
330+ )
318331 parser_graph .add_argument (
319332 "--highlight" ,
320333 type = str ,
@@ -325,11 +338,6 @@ def _graph(args: argparse.Namespace) -> None:
325338 type = str ,
326339 help = "Censor nodes that match a comma-separated list of prefixes of a typename" ,
327340 )
328- parser_graph .add_argument (
329- "--sources" ,
330- type = str ,
331- help = "Comma-separated list of addresses to trace to the target" ,
332- )
333341 parser_graph .set_defaults (func = _graph )
334342 args = parser .parse_args ()
335343
0 commit comments