@@ -115,7 +115,10 @@ def log(msg: str) -> None:
115115 # f"{root}-{pid}-{tslug}.out",
116116 ]:
117117 with open (filename , "a" ) as f :
118- print (f"{ pid } :{ tslug } : { msg } " , file = f , flush = True )
118+ try :
119+ print (f"{ pid } :{ tslug } : { msg } " , file = f , flush = True )
120+ except UnicodeError :
121+ print (f"{ pid } :{ tslug } : { ascii (msg )} " , file = f , flush = True )
119122
120123 def arg_repr (arg : Any ) -> str :
121124 """Make a customized repr for logged values."""
@@ -176,7 +179,14 @@ def _decorator(meth: AnyCallable) -> AnyCallable:
176179
177180
178181class InstructionWalker :
179- """Utility to step through trails of instructions."""
182+ """Utility to step through trails of instructions.
183+
184+ We have two reasons to need sequences of instructions from a code object:
185+ First, in strict sequence to visit all the instructions in the object.
186+ This is `walk(follow_jumps=False)`. Second, we want to follow jumps to
187+ understand how execution will flow: `walk(follow_jumps=True)`.
188+
189+ """
180190
181191 def __init__ (self , code : CodeType ) -> None :
182192 self .code = code
@@ -213,19 +223,35 @@ def walk(
213223def populate_branch_trails (code : CodeType , code_info : CodeInfo ) -> None :
214224 """
215225 Populate the `branch_trails` attribute on `code_info`.
226+
227+ Instructions can have a jump_target, where they might jump to next. Some
228+ instructions with a jump_target are unconditional jumps (ALWAYS_JUMPS), so
229+ they aren't interesting to us, since they aren't the start of a branch
230+ possibility.
231+
232+ Instructions that might or might not jump somewhere else are branch
233+ possibilities. For each of those, we track a trail of instructions. These
234+ are lists of instruction offsets, the next instructions that can execute.
235+ We follow the trail until we get to a new source line. That gives us the
236+ arc from the original instruction's line to the new source line.
237+
216238 """
239+ log (f"populate_branch_trails: { code } " )
217240 iwalker = InstructionWalker (code )
218241 for inst in iwalker .walk (follow_jumps = False ):
219242 log (f"considering { inst = } " )
220243 if not inst .jump_target :
244+ # We only care about instructions with jump targets.
221245 log ("no jump_target" )
222246 continue
223247 if inst .opcode in ALWAYS_JUMPS :
248+ # We don't care about unconditional jumps.
224249 log ("always jumps" )
225250 continue
226251
227252 from_line = inst .line_number
228- assert from_line is not None
253+ if from_line is None :
254+ continue
229255
230256 def walk_one_branch (
231257 start_at : TOffset , branch_kind : str
@@ -255,14 +281,26 @@ def walk_one_branch(
255281 )
256282 return inst_offsets , (from_line , to_line )
257283 else :
258- log (f" no possible branch from @{ start_at } : { inst_offsets } " )
284+ log (f"no possible branch from @{ start_at } : { inst_offsets } " )
259285 return [], None
260286
261- code_info .branch_trails [inst .offset ] = (
287+ # Calculate two trails: one from the next instruction, and one from the
288+ # jump_target instruction.
289+ trails = [
262290 walk_one_branch (start_at = inst .offset + 2 , branch_kind = "not-taken" ),
263291 walk_one_branch (start_at = inst .jump_target , branch_kind = "taken" ),
264- )
265-
292+ ]
293+ code_info .branch_trails [inst .offset ] = trails
294+
295+ # Sometimes we get BRANCH_RIGHT or BRANCH_LEFT events from instructions
296+ # other than the original jump possibility instruction. Register each
297+ # trail under all of their offsets so we can pick up in the middle of a
298+ # trail if need be.
299+ for trail in trails :
300+ for offset in trail [0 ]:
301+ if offset not in code_info .branch_trails :
302+ code_info .branch_trails [offset ] = []
303+ code_info .branch_trails [offset ].append (trail )
266304
267305@dataclass
268306class CodeInfo :
@@ -271,19 +309,17 @@ class CodeInfo:
271309 tracing : bool
272310 file_data : TTraceFileData | None
273311 byte_to_line : dict [TOffset , TLineNo ] | None
312+
274313 # Keys are start instruction offsets for branches.
275- # Values are two tuples :
276- # (
314+ # Values are lists :
315+ # [
277316 # ([offset, offset, ...], (from_line, to_line)),
278317 # ([offset, offset, ...], (from_line, to_line)),
279- # )
318+ # ]
280319 # Two possible trails from the branch point, left and right.
281320 branch_trails : dict [
282321 TOffset ,
283- tuple [
284- tuple [list [TOffset ], TArc | None ],
285- tuple [list [TOffset ], TArc | None ],
286- ],
322+ list [tuple [list [TOffset ], TArc | None ]],
287323 ]
288324
289325
@@ -447,7 +483,9 @@ def sysmon_py_start( # pylint: disable=useless-return
447483 branch_trails = {},
448484 )
449485 self .code_infos [id (code )] = code_info
450- populate_branch_trails (code , code_info ) # TODO: should be a method?
486+ if self .trace_arcs :
487+ populate_branch_trails (code , code_info )
488+ log (f"branch_trails for { code } :\n { code_info .branch_trails } " )
451489 self .code_objects .append (code )
452490
453491 if tracing_code :
@@ -487,8 +525,8 @@ def sysmon_py_return( # pylint: disable=useless-return
487525 @panopticon ("code" , "line" )
488526 def sysmon_line_lines (self , code : CodeType , line_number : TLineNo ) -> MonitorReturn :
489527 """Handle sys.monitoring.events.LINE events for line coverage."""
490- code_info = self .code_infos [ id (code )]
491- if code_info .file_data is not None :
528+ code_info = self .code_infos . get ( id (code ))
529+ if code_info is not None and code_info .file_data is not None :
492530 cast (set [TLineNo ], code_info .file_data ).add (line_number )
493531 log (f"adding { line_number = } " )
494532 return DISABLE
@@ -509,6 +547,7 @@ def sysmon_branch_either(
509547 ) -> MonitorReturn :
510548 """Handle BRANCH_RIGHT and BRANCH_LEFT events."""
511549 code_info = self .code_infos [id (code )]
550+ added_arc = False
512551 if code_info .file_data is not None :
513552 dest_info = code_info .branch_trails .get (instruction_offset )
514553 log (f"{ dest_info = } " )
@@ -519,4 +558,17 @@ def sysmon_branch_either(
519558 if destination_offset in offsets :
520559 cast (set [TArc ], code_info .file_data ).add (arc )
521560 log (f"adding { arc = } " )
561+ added_arc = True
562+ break
563+
564+ if not added_arc :
565+ # This could be an exception jumping from line to line.
566+ assert code_info .byte_to_line is not None
567+ l1 = code_info .byte_to_line [instruction_offset ]
568+ l2 = code_info .byte_to_line [destination_offset ]
569+ if l1 != l2 :
570+ arc = (l1 , l2 )
571+ cast (set [TArc ], code_info .file_data ).add (arc )
572+ log (f"adding unforeseen { arc = } " )
573+
522574 return DISABLE
0 commit comments