44"""Callback functions and support for sys.monitoring data collection."""
55
66# TODO: https://github.com/python/cpython/issues/111963#issuecomment-2386584080
7+ # commented out stuff with 111963 below...
78
89from __future__ import annotations
910
11+ import dis
1012import functools
1113import inspect
1214import os
5658MonitorReturn = Optional [DISABLE_TYPE ]
5759DISABLE = cast (MonitorReturn , getattr (sys_monitoring , "DISABLE" , None ))
5860
61+ ALWAYS_JUMPS = {
62+ dis .opmap [name ] for name in
63+ ["JUMP_FORWARD" , "JUMP_BACKWARD" , "JUMP_BACKWARD_NO_INTERRUPT" ]
64+ }
65+
66+ RETURNS = {
67+ dis .opmap [name ] for name in ["RETURN_VALUE" , "RETURN_GENERATOR" ]
68+ }
69+
5970
6071if LOG : # pragma: debugging
6172
@@ -163,14 +174,101 @@ def _decorator(meth: AnyCallable) -> AnyCallable:
163174 return _decorator
164175
165176
177+ class InstructionWalker :
178+ def __init__ (self , code : CodeType ):
179+ self .code = code
180+ self .insts : dict [int , dis .Instruction ] = {}
181+
182+ for inst in dis .get_instructions (code ):
183+ self .insts [inst .offset ] = inst
184+
185+ self .max_offset = inst .offset
186+
187+ def walk (self , * , start_at = 0 , follow_jumps = True ):
188+ seen = set ()
189+ offset = start_at
190+ while offset < self .max_offset + 1 :
191+ if offset in seen :
192+ break
193+ seen .add (offset )
194+ if inst := self .insts .get (offset ):
195+ yield inst
196+ if follow_jumps and inst .opcode in ALWAYS_JUMPS :
197+ offset = inst .jump_target
198+ continue
199+ offset += 2
200+
201+
202+ def populate_branch_trails (code : CodeType , code_info : CodeInfo ) -> tuple [list [int ], TArc | None ]:
203+ iwalker = InstructionWalker (code )
204+ for inst in iwalker .walk (follow_jumps = False ):
205+ log (f"considering { inst = } " )
206+ if not inst .jump_target :
207+ log (f"no jump_target" )
208+ continue
209+ if inst .opcode in ALWAYS_JUMPS :
210+ log (f"always jumps" )
211+ continue
212+
213+ from_line = inst .line_number
214+
215+ def walkabout (start_at , branch_kind ):
216+ insts = []
217+ to_line = None
218+ for inst2 in iwalker .walk (start_at = start_at ):
219+ insts .append (inst2 .offset )
220+ if inst2 .line_number and inst2 .line_number != from_line :
221+ to_line = inst2 .line_number
222+ break
223+ elif inst2 .jump_target and (inst2 .opcode not in ALWAYS_JUMPS ):
224+ log (f"stop: { inst2 .jump_target = } , { inst2 .opcode = } ({ dis .opname [inst2 .opcode ]} ), { ALWAYS_JUMPS = } " )
225+ break
226+ elif inst2 .opcode in RETURNS :
227+ to_line = - code .co_firstlineno
228+ break
229+ # if to_line is None:
230+ # import contextlib
231+ # with open("/tmp/foo.out", "a") as f:
232+ # with contextlib.redirect_stdout(f):
233+ # print()
234+ # print(f"{code = }")
235+ # print(f"{from_line = }, {to_line = }, {start_at = }")
236+ # dis.dis(code)
237+ # 1/0
238+ if to_line is not None :
239+ log (f"possible branch from @{ start_at } : { insts } , { (from_line , to_line )} { code } " )
240+ return insts , (from_line , to_line )
241+ else :
242+ log (f" no possible branch from @{ start_at } : { insts } " )
243+ return [], None
244+
245+ code_info .branch_trails [inst .offset ] = (
246+ walkabout (start_at = inst .offset + 2 , branch_kind = "not-taken" ),
247+ walkabout (start_at = inst .jump_target , branch_kind = "taken" ),
248+ )
249+
250+
166251@dataclass
167252class CodeInfo :
168253 """The information we want about each code object."""
169254
170255 tracing : bool
171256 file_data : TTraceFileData | None
172- # TODO: what is byte_to_line for?
173257 byte_to_line : dict [int , int ] | None
258+ # Keys are start instruction offsets for branches.
259+ # Values are two tuples:
260+ # (
261+ # ([offset, offset, ...], (from_line, to_line)),
262+ # ([offset, offset, ...], (from_line, to_line)),
263+ # )
264+ # Two possible trails from the branch point, left and right.
265+ branch_trails : dict [
266+ int ,
267+ tuple [
268+ tuple [list [int ], TArc ] | None ,
269+ tuple [list [int ], TArc ] | None ,
270+ ]
271+ ]
174272
175273
176274def bytes_to_lines (code : CodeType ) -> dict [int , int ]:
@@ -210,8 +308,9 @@ def __init__(self, tool_id: int) -> None:
210308 # A list of code_objects, just to keep them alive so that id's are
211309 # useful as identity.
212310 self .code_objects : list [CodeType ] = []
213- # Map id(code_object) -> code_object
214- self .local_event_codes : dict [int , CodeType ] = {}
311+ # 111963:
312+ # # Map id(code_object) -> code_object
313+ # self.local_event_codes: dict[int, CodeType] = {}
215314 self .sysmon_on = False
216315 self .lock = threading .Lock ()
217316
@@ -238,16 +337,13 @@ def start(self) -> None:
238337 events = sys .monitoring .events
239338 import contextlib
240339
241- with open ("/tmp/foo.out" , "a" ) as f :
242- with contextlib .redirect_stdout (f ):
243- print (f"{ events = } " )
244340 sys_monitoring .set_events (self .myid , events .PY_START )
245341 register (events .PY_START , self .sysmon_py_start )
246342 if self .trace_arcs :
247343 register (events .PY_RETURN , self .sysmon_py_return )
248344 register (events .LINE , self .sysmon_line_arcs )
249- register (events .BRANCH_RIGHT , self .sysmon_branch_right ) # type:ignore[attr-defined]
250- register (events .BRANCH_LEFT , self .sysmon_branch_left ) # type:ignore[attr-defined]
345+ register (events .BRANCH_RIGHT , self .sysmon_branch_either ) # type:ignore[attr-defined]
346+ register (events .BRANCH_LEFT , self .sysmon_branch_either ) # type:ignore[attr-defined]
251347 else :
252348 register (events .LINE , self .sysmon_line_lines )
253349 sys_monitoring .restart_events ()
@@ -264,9 +360,10 @@ def stop(self) -> None:
264360 sys_monitoring .set_events (self .myid , 0 )
265361 with self .lock :
266362 self .sysmon_on = False
267- for code in self .local_event_codes .values ():
268- sys_monitoring .set_local_events (self .myid , code , 0 )
269- self .local_event_codes = {}
363+ # 111963:
364+ # for code in self.local_event_codes.values():
365+ # sys_monitoring.set_local_events(self.myid, code, 0)
366+ # self.local_event_codes = {}
270367 sys_monitoring .free_tool_id (self .myid )
271368
272369 @panopticon ()
@@ -329,11 +426,14 @@ def sysmon_py_start( # pylint: disable=useless-return
329426 file_data = None
330427 b2l = None
331428
332- self . code_infos [ id ( code )] = CodeInfo (
429+ code_info = CodeInfo (
333430 tracing = tracing_code ,
334431 file_data = file_data ,
335432 byte_to_line = b2l ,
433+ branch_trails = {},
336434 )
435+ self .code_infos [id (code )] = code_info
436+ populate_branch_trails (code , code_info ) # TODO: should be a method?
337437 self .code_objects .append (code )
338438
339439 if tracing_code :
@@ -348,7 +448,8 @@ def sysmon_py_start( # pylint: disable=useless-return
348448 events .BRANCH_RIGHT | events .BRANCH_LEFT # type:ignore[attr-defined]
349449 )
350450 sys_monitoring .set_local_events (self .myid , code , local_events )
351- self .local_event_codes [id (code )] = code
451+ # 111963:
452+ # self.local_event_codes[id(code)] = code
352453
353454 return None
354455
@@ -390,29 +491,19 @@ def sysmon_line_arcs(self, code: CodeType, line_number: int) -> MonitorReturn:
390491 return DISABLE
391492
392493 @panopticon ("code" , "@" , "@" )
393- def sysmon_branch_right (
494+ def sysmon_branch_either (
394495 self , code : CodeType , instruction_offset : int , destination_offset : int
395496 ) -> MonitorReturn :
396- """Handed BRANCH_RIGHT and BRANCH_LEFT events."""
497+ """Handle BRANCH_RIGHT and BRANCH_LEFT events."""
397498 code_info = self .code_infos [id (code )]
398499 if code_info .file_data is not None :
399- b2l = code_info .byte_to_line
400- assert b2l is not None
401- arc = (b2l [instruction_offset ], b2l [destination_offset ])
402- cast (set [TArc ], code_info .file_data ).add (arc )
403- log (f"adding { arc = } " )
404- return DISABLE
405-
406- @panopticon ("code" , "@" , "@" )
407- def sysmon_branch_left (
408- self , code : CodeType , instruction_offset : int , destination_offset : int
409- ) -> MonitorReturn :
410- """Handed BRANCH_RIGHT and BRANCH_LEFT events."""
411- code_info = self .code_infos [id (code )]
412- if code_info .file_data is not None :
413- b2l = code_info .byte_to_line
414- assert b2l is not None
415- arc = (b2l [instruction_offset ], b2l [destination_offset ])
416- cast (set [TArc ], code_info .file_data ).add (arc )
417- log (f"adding { arc = } " )
500+ dest_info = code_info .branch_trails .get (instruction_offset )
501+ log (f"{ dest_info = } " )
502+ if dest_info is not None :
503+ for offsets , arc in dest_info :
504+ if arc is None :
505+ continue
506+ if destination_offset in offsets :
507+ cast (set [TArc ], code_info .file_data ).add (arc )
508+ log (f"adding { arc = } " )
418509 return DISABLE
0 commit comments