1111import xdis
1212from xdis import disassemble_file , iscode
1313
14+
1415# Util to format shorthand code obj name
1516# Used so we do not compare memory addrs
16- _fmt_codeobj = lambda co : f"<codeobj { co .co_name } >"
17+ def _fmt_codeobj (co ):
18+ return f"<codeobj { co .co_name } >"
1719
1820
1921def _iter_nested_bytecodes (bytecode , bytecode_constructor : Callable ):
2022 """
2123 iterate over a bytecode and its child bytecodes
22- bytecode: bytecode object to iterate, will be yielded on first call
23- bytecode_constructor: constructor to create child bytecodes with
24+
25+ :param bytecode: bytecode object to iterate, will be yielded on first call
26+ :param bytecode_constructor: constructor to create child bytecodes with
2427 """
2528 bc_stack = [bytecode ]
2629 while bc_stack :
2730 bc = bc_stack .pop ()
28- bc_stack .extend (
29- bytecode_constructor (obj ) for obj in bc .codeobj .co_consts if iscode (obj )
30- )
31+ bc_stack .extend (bytecode_constructor (obj ) for obj in bc .codeobj .co_consts if iscode (obj ))
3132 yield bc
3233
3334
@@ -57,15 +58,22 @@ def _get_headers_to_serialize(bytecode_version: tuple):
5758 return headers_to_serialize
5859
5960
60- def _format_headers (bytecode , bytecode_version : tuple ) -> str :
61- """Format important headers (attrs) of bytecode."""
61+ def _format_headers (bytecode , bytecode_version : tuple , headers_to_serialize : list [str ] | None ) -> str :
62+ """
63+ Format important headers (attrs) of bytecode.
64+
65+ :param bytecode: bytecode object
66+ :param bytecode_version: bytecode version tuple to track version specific headers
67+ :param headers: list bytecode headers that we want to specifically format, excluding the other headers. By default, tests all params.
68+ """
6269
6370 # default format for each attr
6471 header_fmt = "{name} : {val}"
6572
6673 # format headers
6774 formatted_headers = []
68- for attr_name in _get_headers_to_serialize (bytecode_version ):
75+ headers = headers_to_serialize if headers_to_serialize is not None else _get_headers_to_serialize (bytecode_version )
76+ for attr_name in headers :
6977 # check for missing attrs
7078 if not hasattr (bytecode .codeobj , attr_name ):
7179 logging .warning (f"Codeobj missing test_attr { attr_name } " )
@@ -76,10 +84,7 @@ def _format_headers(bytecode, bytecode_version: tuple) -> str:
7684 # handle const attrs and some callables
7785 if attr_name == "co_consts" :
7886 # filter code objects in co_consts
79- val = [
80- f"<codeobj { const .co_name } " if iscode (const ) else const
81- for const in attr_val
82- ]
87+ val = [f"<codeobj { const .co_name } " if iscode (const ) else const for const in attr_val ]
8388 elif attr_name in ("co_lines" , "co_positions" ):
8489 val = list (attr_val ())
8590 else :
@@ -115,20 +120,35 @@ def _format_insts(bytecode, bytecode_version: tuple) -> str:
115120 return "\n " .join (insts )
116121
117122
118- def format_bytecode (bytecode , bytecode_version : tuple ) -> str :
119- """Create complete formatted string of bytecode."""
123+ def format_bytecode (bytecode , bytecode_version : tuple , headers_to_serialize : list [str ] | None = None , serialize_insts : bool = True ) -> str :
124+ """
125+ Create complete formatted string of bytecode.
126+
127+ :param bytecode: bytecode object
128+ :param bytecode_version: tuple of bytecode version to track version specific formatting
129+ :param headers: list of bytecode headers we want to format in output. If None or not defined, we format all params by default.
130+ :param serialize_insts: bool to determine if we serialize instructions or ignore them and dont output.
131+ """
132+
120133 outstr = f"BYTECODE { bytecode .codeobj .co_name } \n "
121134 outstr += "ATTRS:\n "
122- outstr += _format_headers (bytecode , bytecode_version ) + "\n "
123- outstr += "INSTS:\n "
124- outstr += _format_insts (bytecode , bytecode_version ) + "\n "
135+ outstr += _format_headers (bytecode , bytecode_version , headers_to_serialize ) + "\n "
136+ if serialize_insts :
137+ outstr += "INSTS:\n "
138+ outstr += _format_insts (bytecode , bytecode_version ) + "\n "
125139 return outstr
126140
127141
128- def serialize_pyc (
129- pyc : Path , use_xdis : bool = False , output_file : TextIO | None = sys .stdout
130- ) -> str :
131- """Serialize a pyc to text for testing, using dis or xdis."""
142+ def serialize_pyc (pyc : Path , use_xdis : bool = False , output_file : TextIO | None = sys .stdout , headers : list [str ] | None = None , serialize_insts : bool = True ) -> str :
143+ """
144+ Serialize a pyc to text for testing, using dis or xdis.
145+
146+ :param pyc: path of pyc file
147+ :param use_xdis: boolean if we serialize with xdis, default use dis (meaning pyc must be same version as running python)
148+ :param output_file: file to write output to
149+ :param headers: list of bytecode headers we want to format in output. Default is None, where we format all params.
150+ :param serialize_insts: bool to determine if we format instructions or ignore them and dont output save.
151+ """
132152
133153 # create a code object in xdis or dis, and a constructor to make bytecodes with
134154 if use_xdis :
@@ -138,9 +158,7 @@ def serialize_pyc(
138158 # write to null so no disassembly output
139159 with open (devnull , "w" ) as fnull :
140160 # create xdis code obj
141- (_ , code_object , version_tuple , _ , _ , is_pypy , _ , _ ) = disassemble_file (
142- str (pyc ), fnull , asm_format = "classic"
143- )
161+ (_ , code_object , version_tuple , _ , _ , is_pypy , _ , _ ) = disassemble_file (str (pyc ), fnull , asm_format = "classic" )
144162 # get corresponding opcode class
145163 opc = xdis .get_opcode (version_tuple , is_pypy , None )
146164 # create xdis bytecode constructor
@@ -161,7 +179,7 @@ def serialize_pyc(
161179 formatted_bytecodes = []
162180 init_bytecode = bytecode_constructor (code_object )
163181 for bc in _iter_nested_bytecodes (init_bytecode , bytecode_constructor ):
164- formatted_bytecodes .append (format_bytecode (bc , bytecode_version ))
182+ formatted_bytecodes .append (format_bytecode (bc , bytecode_version , headers , serialize_insts ))
165183
166184 # write formatted bytecodes
167185 full_formatted_bytecode = "\n " .join (formatted_bytecodes )
@@ -179,6 +197,16 @@ def serialize_pyc(
179197 help = "Use xdis to serialize bytecode" ,
180198 action = "store_true" ,
181199 )
200+ parser .add_argument (
201+ "--headers" ,
202+ help = "List of specific code object params to test, defaults to all parameters. Should be 'co_*', for example, 'co_lines'" ,
203+ nargs = "*" ,
204+ )
205+ parser .add_argument (
206+ "--skip_insts" ,
207+ help = "Do not test accuracy of instructions" ,
208+ action = "store_false" ,
209+ )
182210 parser .add_argument ("pyc" , help = "PYC file to serialize." )
183211 args = parser .parse_args ()
184212
@@ -189,4 +217,4 @@ def serialize_pyc(
189217 # setup logger
190218 logging .basicConfig (format = "%(levelname)s: %(message)s" )
191219
192- serialize_pyc (pyc_path , args .use_xdis )
220+ serialize_pyc (pyc_path , args .use_xdis , headers = args . headers if args . headers else None , serialize_insts = args . skip_insts )
0 commit comments