2929import sys
3030import types
3131from collections import deque
32- from typing import Tuple
32+ from typing import Optional , Tuple
3333
3434import xdis
3535from xdis .bytecode import Bytecode
@@ -72,6 +72,7 @@ def show_module_header(
7272 header = True ,
7373 show_filename = True ,
7474 is_graal = False ,
75+ file_offset : Optional [int ] = None ,
7576) -> None :
7677 bytecode_version = "." .join ((str (i ) for i in version_tuple ))
7778 real_out = out or sys .stdout
@@ -121,22 +122,25 @@ def show_module_header(
121122 real_out .write ("# SipHash: 0x%x\n " % sip_hash )
122123 if show_filename :
123124 real_out .write ("# Embedded file name: %s\n " % co .co_filename )
125+ if file_offset :
126+ real_out .write ("# Position in bytecode file: 0x%x\n " % file_offset )
124127
125128
126129def disco (
127130 version_tuple ,
128131 co ,
129132 timestamp ,
130133 out = sys .stdout ,
131- is_pypy : bool = False ,
134+ is_pypy : bool = False ,
132135 magic_int = None ,
133136 source_size = None ,
134137 sip_hash = None ,
135- asm_format : str = "classic" ,
138+ asm_format : str = "classic" ,
136139 alternate_opmap = None ,
137- show_source : bool = False ,
138- is_graal : bool = False ,
140+ show_source : bool = False ,
141+ is_graal : bool = False ,
139142 methods = tuple (),
143+ file_offsets : dict = {},
140144) -> None :
141145 """
142146 disassembles and deparses a given code block 'co'
@@ -163,7 +167,15 @@ def disco(
163167
164168 if co .co_filename and asm_format != "xasm" :
165169 if not_filtered (co , methods ):
166- real_out .write (format_code_info (co , version_tuple , is_graal = is_graal ) + "\n " )
170+ real_out .write (
171+ format_code_info (
172+ co ,
173+ version_tuple ,
174+ is_graal = is_graal ,
175+ file_offset = file_offsets .get (co ),
176+ )
177+ + "\n "
178+ )
167179 pass
168180
169181 opc = get_opcode (version_tuple , is_pypy , alternate_opmap )
@@ -184,6 +196,7 @@ def disco(
184196 dup_lines = True ,
185197 show_source = show_source ,
186198 methods = methods ,
199+ file_offsets = file_offsets ,
187200 )
188201
189202
@@ -196,6 +209,7 @@ def disco_loop(
196209 asm_format = "classic" ,
197210 show_source = False ,
198211 methods = tuple (),
212+ file_offsets : dict = {},
199213) -> None :
200214 """Disassembles a queue of code objects. If we discover
201215 another code object which will be found in co_consts, we add
@@ -211,7 +225,13 @@ def disco_loop(
211225 co = queue .popleft ()
212226 if not_filtered (co , methods ):
213227 if co .co_name not in ("<module>" , "?" ):
214- real_out .write ("\n " + format_code_info (co , version_tuple ) + "\n " )
228+ real_out .write (
229+ "\n "
230+ + format_code_info (
231+ co , version_tuple , file_offset = file_offsets .get (co )
232+ )
233+ + "\n "
234+ )
215235
216236 if asm_format == "dis" :
217237 assert version_tuple [:2 ] == PYTHON_VERSION_TRIPLE [:2 ], (
@@ -222,12 +242,18 @@ def disco_loop(
222242 else :
223243 bytecode = Bytecode (co , opc , dup_lines = dup_lines )
224244 real_out .write (
225- bytecode .dis (asm_format = asm_format , show_source = show_source ) + "\n "
245+ bytecode .dis (
246+ asm_format = asm_format ,
247+ show_source = show_source ,
248+ )
249+ + "\n "
226250 )
227251
228252 if version_tuple >= (3 , 11 ):
229253 if bytecode .exception_entries not in (None , []):
230- exception_table = format_exception_table (bytecode , version_tuple )
254+ exception_table = format_exception_table (
255+ bytecode , version_tuple
256+ )
231257 real_out .write (exception_table + "\n " )
232258
233259 for c in co .co_consts :
@@ -242,7 +268,9 @@ def code_uniquify(basename, co_code) -> str:
242268 return "%s_0x%x" % (basename , id (co_code ))
243269
244270
245- def disco_loop_asm_format (opc , version_tuple , co , real_out , fn_name_map , all_fns ) -> None :
271+ def disco_loop_asm_format (
272+ opc , version_tuple , co , real_out , fn_name_map , all_fns
273+ ) -> None :
246274 """Produces disassembly in a format more conducive to
247275 automatic assembly by producing inner modules before they are
248276 used by outer ones. Since this is recursive, we'll
@@ -318,7 +346,8 @@ def disassemble_file(
318346 asm_format = "classic" ,
319347 alternate_opmap = None ,
320348 show_source = False ,
321- methods : Tuple [str ] = tuple ()
349+ methods : Tuple [str ] = tuple (),
350+ save_file_offsets : bool = False ,
322351):
323352 """
324353 Disassemble Python byte-code file (.pyc).
@@ -329,6 +358,7 @@ def disassemble_file(
329358 If that fails, we'll compile internally for the Python version currently running.
330359 """
331360 pyc_filename = None
361+ file_offsets = {}
332362 try :
333363 # FIXME: add whether we want PyPy
334364 pyc_filename = check_object_path (filename )
@@ -340,7 +370,8 @@ def disassemble_file(
340370 is_pypy ,
341371 source_size ,
342372 sip_hash ,
343- ) = load_module (pyc_filename )
373+ file_offsets ,
374+ ) = load_module (pyc_filename , save_file_offsets = save_file_offsets )
344375 except (ImportError , NotImplementedError , ValueError ):
345376 raise
346377 except Exception :
@@ -391,6 +422,7 @@ def disassemble_file(
391422 show_source = show_source ,
392423 is_graal = is_graal ,
393424 methods = methods ,
425+ file_offsets = file_offsets ,
394426 )
395427 # print co.co_filename
396428 return (
@@ -404,9 +436,11 @@ def disassemble_file(
404436 sip_hash ,
405437 )
406438
439+
407440def not_filtered (co : types .CodeType , methods : tuple ) -> bool :
408441 return len (methods ) == 0 or co .co_name in methods
409442
443+
410444def _test () -> None :
411445 """Simple test program to disassemble a file."""
412446 argc = len (sys .argv )
0 commit comments