Skip to content

Commit f22aed3

Browse files
committed
add "headers" and "serialize_insts" options to bytecode serialization to define which parts of bytecode should be serialized
1 parent 3aa1c90 commit f22aed3

File tree

2 files changed

+58
-40
lines changed

2 files changed

+58
-40
lines changed

test_crossversion/prepare_templates.py

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,8 @@
22
import logging
33
from py_compile import compile
44

5-
from config import (
6-
SYS_VERSION,
7-
TEMPLATE_COMPILED_DIR,
8-
TEMPLATE_SERIALIZED_DIR,
9-
TEMPLATE_SOURCE_DIR,
10-
)
115
from serialize_bytecode import serialize_pyc
6+
from config import SYS_VERSION, TEMPLATE_COMPILED_DIR, TEMPLATE_SERIALIZED_DIR, TEMPLATE_SOURCE_DIR
127

138

149
def prepare_templates():
@@ -46,16 +41,11 @@ def prepare_templates():
4641

4742
if __name__ == "__main__":
4843
parser = argparse.ArgumentParser(prog="prepare_templates")
49-
parser.add_argument(
50-
"-V", "--verbose", action="store_true", help="Use verbose output"
51-
)
44+
parser.add_argument("-V", "--verbose", action="store_true", help="Use verbose output")
5245
args = parser.parse_args()
5346

5447
# setup logger
55-
logging.basicConfig(
56-
format="%(levelname)s: %(message)s",
57-
level=logging.DEBUG if args.verbose else None,
58-
)
48+
logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.DEBUG if args.verbose else None)
5949

6050
# compile and serialize templates
6151
prepare_templates()

test_crossversion/serialize_bytecode.py

Lines changed: 55 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -11,23 +11,24 @@
1111
import xdis
1212
from xdis import disassemble_file, iscode
1313

14+
1415
# Util to format shorthand code obj name
1516
# Used so we do not compare memory addrs
16-
_fmt_codeobj = lambda co: f"<codeobj {co.co_name}>"
17+
def _fmt_codeobj(co):
18+
return f"<codeobj {co.co_name}>"
1719

1820

1921
def _iter_nested_bytecodes(bytecode, bytecode_constructor: Callable):
2022
"""
2123
iterate over a bytecode and its child bytecodes
22-
bytecode: bytecode object to iterate, will be yielded on first call
23-
bytecode_constructor: constructor to create child bytecodes with
24+
25+
:param bytecode: bytecode object to iterate, will be yielded on first call
26+
:param bytecode_constructor: constructor to create child bytecodes with
2427
"""
2528
bc_stack = [bytecode]
2629
while bc_stack:
2730
bc = bc_stack.pop()
28-
bc_stack.extend(
29-
bytecode_constructor(obj) for obj in bc.codeobj.co_consts if iscode(obj)
30-
)
31+
bc_stack.extend(bytecode_constructor(obj) for obj in bc.codeobj.co_consts if iscode(obj))
3132
yield bc
3233

3334

@@ -57,15 +58,22 @@ def _get_headers_to_serialize(bytecode_version: tuple):
5758
return headers_to_serialize
5859

5960

60-
def _format_headers(bytecode, bytecode_version: tuple) -> str:
61-
"""Format important headers (attrs) of bytecode."""
61+
def _format_headers(bytecode, bytecode_version: tuple, headers_to_serialize: list[str] | None) -> str:
62+
"""
63+
Format important headers (attrs) of bytecode.
64+
65+
:param bytecode: bytecode object
66+
:param bytecode_version: bytecode version tuple to track version specific headers
67+
:param headers: list bytecode headers that we want to specifically format, excluding the other headers. By default, tests all params.
68+
"""
6269

6370
# default format for each attr
6471
header_fmt = "{name} : {val}"
6572

6673
# format headers
6774
formatted_headers = []
68-
for attr_name in _get_headers_to_serialize(bytecode_version):
75+
headers = headers_to_serialize if headers_to_serialize is not None else _get_headers_to_serialize(bytecode_version)
76+
for attr_name in headers:
6977
# check for missing attrs
7078
if not hasattr(bytecode.codeobj, attr_name):
7179
logging.warning(f"Codeobj missing test_attr {attr_name}")
@@ -76,10 +84,7 @@ def _format_headers(bytecode, bytecode_version: tuple) -> str:
7684
# handle const attrs and some callables
7785
if attr_name == "co_consts":
7886
# filter code objects in co_consts
79-
val = [
80-
f"<codeobj {const.co_name}" if iscode(const) else const
81-
for const in attr_val
82-
]
87+
val = [f"<codeobj {const.co_name}" if iscode(const) else const for const in attr_val]
8388
elif attr_name in ("co_lines", "co_positions"):
8489
val = list(attr_val())
8590
else:
@@ -115,20 +120,35 @@ def _format_insts(bytecode, bytecode_version: tuple) -> str:
115120
return "\n".join(insts)
116121

117122

118-
def format_bytecode(bytecode, bytecode_version: tuple) -> str:
119-
"""Create complete formatted string of bytecode."""
123+
def format_bytecode(bytecode, bytecode_version: tuple, headers_to_serialize: list[str] | None = None, serialize_insts: bool = True) -> str:
124+
"""
125+
Create complete formatted string of bytecode.
126+
127+
:param bytecode: bytecode object
128+
:param bytecode_version: tuple of bytecode version to track version specific formatting
129+
:param headers: list of bytecode headers we want to format in output. If None or not defined, we format all params by default.
130+
:param serialize_insts: bool to determine if we serialize instructions or ignore them and dont output.
131+
"""
132+
120133
outstr = f"BYTECODE {bytecode.codeobj.co_name}\n"
121134
outstr += "ATTRS:\n"
122-
outstr += _format_headers(bytecode, bytecode_version) + "\n"
123-
outstr += "INSTS:\n"
124-
outstr += _format_insts(bytecode, bytecode_version) + "\n"
135+
outstr += _format_headers(bytecode, bytecode_version, headers_to_serialize) + "\n"
136+
if serialize_insts:
137+
outstr += "INSTS:\n"
138+
outstr += _format_insts(bytecode, bytecode_version) + "\n"
125139
return outstr
126140

127141

128-
def serialize_pyc(
129-
pyc: Path, use_xdis: bool = False, output_file: TextIO | None = sys.stdout
130-
) -> str:
131-
"""Serialize a pyc to text for testing, using dis or xdis."""
142+
def serialize_pyc(pyc: Path, use_xdis: bool = False, output_file: TextIO | None = sys.stdout, headers: list[str] | None = None, serialize_insts: bool = True) -> str:
143+
"""
144+
Serialize a pyc to text for testing, using dis or xdis.
145+
146+
:param pyc: path of pyc file
147+
:param use_xdis: boolean if we serialize with xdis, default use dis (meaning pyc must be same version as running python)
148+
:param output_file: file to write output to
149+
:param headers: list of bytecode headers we want to format in output. Default is None, where we format all params.
150+
:param serialize_insts: bool to determine if we format instructions or ignore them and dont output save.
151+
"""
132152

133153
# create a code object in xdis or dis, and a constructor to make bytecodes with
134154
if use_xdis:
@@ -138,9 +158,7 @@ def serialize_pyc(
138158
# write to null so no disassembly output
139159
with open(devnull, "w") as fnull:
140160
# create xdis code obj
141-
(_, code_object, version_tuple, _, _, is_pypy, _, _) = disassemble_file(
142-
str(pyc), fnull, asm_format="classic"
143-
)
161+
(_, code_object, version_tuple, _, _, is_pypy, _, _) = disassemble_file(str(pyc), fnull, asm_format="classic")
144162
# get corresponding opcode class
145163
opc = xdis.get_opcode(version_tuple, is_pypy, None)
146164
# create xdis bytecode constructor
@@ -161,7 +179,7 @@ def serialize_pyc(
161179
formatted_bytecodes = []
162180
init_bytecode = bytecode_constructor(code_object)
163181
for bc in _iter_nested_bytecodes(init_bytecode, bytecode_constructor):
164-
formatted_bytecodes.append(format_bytecode(bc, bytecode_version))
182+
formatted_bytecodes.append(format_bytecode(bc, bytecode_version, headers, serialize_insts))
165183

166184
# write formatted bytecodes
167185
full_formatted_bytecode = "\n".join(formatted_bytecodes)
@@ -179,6 +197,16 @@ def serialize_pyc(
179197
help="Use xdis to serialize bytecode",
180198
action="store_true",
181199
)
200+
parser.add_argument(
201+
"--headers",
202+
help="List of specific code object params to test, defaults to all parameters. Should be 'co_*', for example, 'co_lines'",
203+
nargs="*",
204+
)
205+
parser.add_argument(
206+
"--skip_insts",
207+
help="Do not test accuracy of instructions",
208+
action="store_false",
209+
)
182210
parser.add_argument("pyc", help="PYC file to serialize.")
183211
args = parser.parse_args()
184212

@@ -189,4 +217,4 @@ def serialize_pyc(
189217
# setup logger
190218
logging.basicConfig(format="%(levelname)s: %(message)s")
191219

192-
serialize_pyc(pyc_path, args.use_xdis)
220+
serialize_pyc(pyc_path, args.use_xdis, headers=args.headers if args.headers else None, serialize_insts=args.skip_insts)

0 commit comments

Comments
 (0)