Skip to content

Commit 720d675

Browse files
authored
Merge pull request #159 from 2elli/cross_version_tests
Automated cross-version testing
2 parents 8ca21d8 + f22aed3 commit 720d675

File tree

11 files changed

+479
-16
lines changed

11 files changed

+479
-16
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ Downloads = "https://github.com/rocky/python-xdis/releases"
5454
dev = [
5555
"pre-commit",
5656
"pytest",
57+
"tox",
5758
]
5859

5960
[project.scripts]

test_crossversion/.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
templates/
2+
**/__pycache__/
3+
*.pyc
4+
.python-version

test_crossversion/Makefile

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
.PHONY: help clean get_sources setup_pyenv compile prepare test
2+
3+
SOURCE=./templates/source/
4+
COMPILED=./templates/compiled/
5+
SERIALIZED=./templates/serialized/
6+
7+
# usage
8+
define helptext
9+
Crossversion xdis test usage:
10+
help : show this menu
11+
clean : remove compiled and serialized files
12+
get_sources : symlink all .py files in ./ -> $(SOURCE)
13+
setup_pyenv : setup local pyenv versions to be used by tox
14+
compile : with each tox env, compile all sources in $(SOURCE) to $(COMPILED), then serialize with dis to $(SERIALIZED)
15+
prepare : fully prepare test environment and compile test files
16+
test : prepare and run tests. with each tox env, serialize pyc's in $(COMPILED)<version> with xdis, then check against corresponding serialized pyc in $(SERIALIZED)<version>
17+
endef
18+
export helptext
19+
20+
#: show help menu
21+
help:
22+
@echo "$$helptext"
23+
24+
#: remove compiled and serialized files
25+
clean:
26+
find . -name "*.pyc" -delete
27+
find . -name "__pycache__" -type d -delete
28+
rm -rf $(COMPILED)/*
29+
rm -rf $(SERIALIZED)/*
30+
31+
#: copy all .py files in ./ -> ./templates/source/
32+
get_sources:
33+
cp -f *.py $(SOURCE)
34+
35+
.python-version:
36+
tox --listenvs | xargs pyenv local
37+
#: setup local pyenv versions to be used by tox
38+
setup_pyenv: .python-version
39+
40+
#: with each tox env, compile all sources in ./templates/source/ to ./templates/compiled/, then serialize with dis to ./templates/serialized/
41+
compile:
42+
tox p -c ./tox_prepare.ini
43+
44+
#: fully prepare tests
45+
prepare: clean get_sources setup_pyenv compile
46+
47+
#: prepare and run tests. with each tox env, serialize pyc's in ./templates/compiled/<version> with xdis, then check against corresponding dis serialized pyc in ./templates/serialized/<version>
48+
test: prepare
49+
tox r -c ./tox.ini

test_crossversion/USAGE.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Automated crossversion testing
2+
This testing suite is used for automatic testing of differences found between xdis and dis.
3+
This is done by having a way to identically "serialize" important attributes in xdis and dis bytecodes.
4+
We then can check a diff between a serialized xdis and dis bytecode to find if xdis is parsing something incorrectly.
5+
Most tests should be ran using the makefile.
6+
7+
# System Requirements
8+
- `pyenv` and `pyenv-virtualenv`
9+
- Each version needing to be tested should be installed with pyenv.
10+
- `tox`
11+
12+
# Usage
13+
## Makefile
14+
Run `make` or `make help` to show the help menu for running and preparing tests, or with `remake`, `remake --tasks`.
15+
16+
To simply run tests, `make test` will copy some sources, prepare template files, and run tests.
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
from configparser import ConfigParser
2+
from pathlib import Path
3+
from sys import version_info
4+
5+
# main test root dir
6+
_test_path = Path(__file__).parent.parent
7+
8+
# system version of python
9+
SYS_VERSION = f"{version_info.major}.{version_info.minor}"
10+
SYS_VERSION_TUPLE = (version_info.major, version_info.minor, version_info.micro)
11+
12+
# template dirs
13+
TEMPLATE_DIR = _test_path / "templates"
14+
TEMPLATE_SOURCE_DIR = TEMPLATE_DIR / "source"
15+
TEMPLATE_COMPILED_DIR = TEMPLATE_DIR / "compiled"
16+
TEMPLATE_SERIALIZED_DIR = TEMPLATE_DIR / "serialized"
17+
18+
# check dirs and make them if needed
19+
_check_dir = lambda dir: dir.mkdir() if not dir.exists() else True
20+
_check_dir(TEMPLATE_DIR)
21+
_check_dir(TEMPLATE_SOURCE_DIR)
22+
_check_dir(TEMPLATE_COMPILED_DIR)
23+
_check_dir(TEMPLATE_SERIALIZED_DIR)
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import argparse
2+
import logging
3+
from py_compile import compile
4+
5+
from serialize_bytecode import serialize_pyc
6+
from config import SYS_VERSION, TEMPLATE_COMPILED_DIR, TEMPLATE_SERIALIZED_DIR, TEMPLATE_SOURCE_DIR
7+
8+
9+
def prepare_templates():
10+
"""
11+
Compile files in template source dir, then serialize with dis
12+
Intermediary steps are saved in respective folders in templates / <compiled|serialized> / <version>
13+
"""
14+
# create folders to save pyc's
15+
compiled_dir = TEMPLATE_COMPILED_DIR / SYS_VERSION
16+
serialized_dir = TEMPLATE_SERIALIZED_DIR / SYS_VERSION
17+
if not compiled_dir.exists():
18+
compiled_dir.mkdir()
19+
if not serialized_dir.exists():
20+
serialized_dir.mkdir()
21+
22+
# compile and serialize template files
23+
num_source = 0
24+
for source in TEMPLATE_SOURCE_DIR.glob("*.py"):
25+
# create paths
26+
pyc_file = compiled_dir / f"{source.stem}_{SYS_VERSION}.pyc"
27+
serialized_file = serialized_dir / f"{source.stem}_{SYS_VERSION}.txt"
28+
29+
# compile pyc
30+
compile(str(source), str(pyc_file))
31+
logging.info(f"Compiled {str(source)} -> {str(pyc_file)}")
32+
33+
# serialize pyc
34+
with serialized_file.open("w") as f:
35+
serialize_pyc(pyc_file, False, f)
36+
logging.info(f"Serialized {str(pyc_file)} -> {str(serialized_file)}")
37+
num_source += 1
38+
39+
print(f"{num_source} files compiled and serialized")
40+
41+
42+
if __name__ == "__main__":
43+
parser = argparse.ArgumentParser(prog="prepare_templates")
44+
parser.add_argument("-V", "--verbose", action="store_true", help="Use verbose output")
45+
args = parser.parse_args()
46+
47+
# setup logger
48+
logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.DEBUG if args.verbose else None)
49+
50+
# compile and serialize templates
51+
prepare_templates()
Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
from __future__ import annotations
2+
3+
import argparse
4+
import logging
5+
import sys
6+
from pathlib import Path
7+
from typing import Callable, TextIO
8+
9+
from config import SYS_VERSION_TUPLE
10+
11+
import xdis
12+
from xdis import disassemble_file, iscode
13+
14+
15+
# Util to format shorthand code obj name
16+
# Used so we do not compare memory addrs
17+
def _fmt_codeobj(co):
18+
return f"<codeobj {co.co_name}>"
19+
20+
21+
def _iter_nested_bytecodes(bytecode, bytecode_constructor: Callable):
22+
"""
23+
iterate over a bytecode and its child bytecodes
24+
25+
:param bytecode: bytecode object to iterate, will be yielded on first call
26+
:param bytecode_constructor: constructor to create child bytecodes with
27+
"""
28+
bc_stack = [bytecode]
29+
while bc_stack:
30+
bc = bc_stack.pop()
31+
bc_stack.extend(bytecode_constructor(obj) for obj in bc.codeobj.co_consts if iscode(obj))
32+
yield bc
33+
34+
35+
def _get_headers_to_serialize(bytecode_version: tuple):
36+
headers_to_serialize = [
37+
"co_argcount",
38+
"co_cellvars",
39+
"co_code",
40+
"co_consts",
41+
"co_firstlineno",
42+
"co_flags",
43+
"co_freevars",
44+
"co_kwonlyargcount",
45+
"co_name",
46+
"co_names",
47+
"co_nlocals",
48+
"co_posonlyargcount",
49+
"co_stacksize",
50+
"co_varnames",
51+
]
52+
53+
if bytecode_version >= (3, 10):
54+
headers_to_serialize.append("co_lines")
55+
if bytecode_version >= (3, 11):
56+
headers_to_serialize.append("co_qualname")
57+
# headers_to_serialize.append("co_positions"), not fully supported in xdis
58+
return headers_to_serialize
59+
60+
61+
def _format_headers(bytecode, bytecode_version: tuple, headers_to_serialize: list[str] | None) -> str:
62+
"""
63+
Format important headers (attrs) of bytecode.
64+
65+
:param bytecode: bytecode object
66+
:param bytecode_version: bytecode version tuple to track version specific headers
67+
:param headers: list bytecode headers that we want to specifically format, excluding the other headers. By default, tests all params.
68+
"""
69+
70+
# default format for each attr
71+
header_fmt = "{name} : {val}"
72+
73+
# format headers
74+
formatted_headers = []
75+
headers = headers_to_serialize if headers_to_serialize is not None else _get_headers_to_serialize(bytecode_version)
76+
for attr_name in headers:
77+
# check for missing attrs
78+
if not hasattr(bytecode.codeobj, attr_name):
79+
logging.warning(f"Codeobj missing test_attr {attr_name}")
80+
continue
81+
82+
attr_val = getattr(bytecode.codeobj, attr_name)
83+
84+
# handle const attrs and some callables
85+
if attr_name == "co_consts":
86+
# filter code objects in co_consts
87+
val = [f"<codeobj {const.co_name}" if iscode(const) else const for const in attr_val]
88+
elif attr_name in ("co_lines", "co_positions"):
89+
val = list(attr_val())
90+
else:
91+
val = attr_val
92+
93+
# format header string
94+
formatted_headers.append(header_fmt.format(name=attr_name[3:], val=val))
95+
96+
return "\n".join(formatted_headers)
97+
98+
99+
def _format_insts(bytecode, bytecode_version: tuple) -> str:
100+
"""Format all instructions in given bytecode."""
101+
# TODO revisit ignoring argrepr and argvals in tests
102+
# we are ignoring argrepr and val for now, as xdis will sometimes include additional info there
103+
104+
# default format for each instruction
105+
inst_fmt = "{inst.opcode} {inst.opname} : {inst.arg} {argval}"
106+
insts = []
107+
for inst in bytecode:
108+
# skip cache
109+
if inst.opname == "CACHE":
110+
continue
111+
112+
# filter and format argvals
113+
if iscode(inst.argval):
114+
argval = _fmt_codeobj(inst.argval)
115+
else:
116+
argval = inst.argval
117+
118+
insts.append(inst_fmt.format(inst=inst, argval=argval))
119+
120+
return "\n".join(insts)
121+
122+
123+
def format_bytecode(bytecode, bytecode_version: tuple, headers_to_serialize: list[str] | None = None, serialize_insts: bool = True) -> str:
124+
"""
125+
Create complete formatted string of bytecode.
126+
127+
:param bytecode: bytecode object
128+
:param bytecode_version: tuple of bytecode version to track version specific formatting
129+
:param headers: list of bytecode headers we want to format in output. If None or not defined, we format all params by default.
130+
:param serialize_insts: bool to determine if we serialize instructions or ignore them and dont output.
131+
"""
132+
133+
outstr = f"BYTECODE {bytecode.codeobj.co_name}\n"
134+
outstr += "ATTRS:\n"
135+
outstr += _format_headers(bytecode, bytecode_version, headers_to_serialize) + "\n"
136+
if serialize_insts:
137+
outstr += "INSTS:\n"
138+
outstr += _format_insts(bytecode, bytecode_version) + "\n"
139+
return outstr
140+
141+
142+
def serialize_pyc(pyc: Path, use_xdis: bool = False, output_file: TextIO | None = sys.stdout, headers: list[str] | None = None, serialize_insts: bool = True) -> str:
143+
"""
144+
Serialize a pyc to text for testing, using dis or xdis.
145+
146+
:param pyc: path of pyc file
147+
:param use_xdis: boolean if we serialize with xdis, default use dis (meaning pyc must be same version as running python)
148+
:param output_file: file to write output to
149+
:param headers: list of bytecode headers we want to format in output. Default is None, where we format all params.
150+
:param serialize_insts: bool to determine if we format instructions or ignore them and dont output save.
151+
"""
152+
153+
# create a code object in xdis or dis, and a constructor to make bytecodes with
154+
if use_xdis:
155+
# using xdis
156+
from os import devnull
157+
158+
# write to null so no disassembly output
159+
with open(devnull, "w") as fnull:
160+
# create xdis code obj
161+
(_, code_object, version_tuple, _, _, is_pypy, _, _) = disassemble_file(str(pyc), fnull, asm_format="classic")
162+
# get corresponding opcode class
163+
opc = xdis.get_opcode(version_tuple, is_pypy, None)
164+
# create xdis bytecode constructor
165+
bytecode_constructor = lambda codeobj: xdis.Bytecode(codeobj, opc)
166+
bytecode_version = version_tuple
167+
else:
168+
# using dis
169+
import dis
170+
import marshal
171+
172+
# load code obj
173+
code_object = marshal.loads(pyc.read_bytes()[16:])
174+
# create dis bytecode constructor
175+
bytecode_constructor = lambda codeobj: dis.Bytecode(codeobj)
176+
bytecode_version = SYS_VERSION_TUPLE
177+
178+
# iter bytecodes and create list of formatted bytecodes strings
179+
formatted_bytecodes = []
180+
init_bytecode = bytecode_constructor(code_object)
181+
for bc in _iter_nested_bytecodes(init_bytecode, bytecode_constructor):
182+
formatted_bytecodes.append(format_bytecode(bc, bytecode_version, headers, serialize_insts))
183+
184+
# write formatted bytecodes
185+
full_formatted_bytecode = "\n".join(formatted_bytecodes)
186+
if output_file:
187+
output_file.write(full_formatted_bytecode)
188+
189+
return full_formatted_bytecode
190+
191+
192+
if __name__ == "__main__":
193+
parser = argparse.ArgumentParser(prog="serialize_bytecode")
194+
parser.add_argument(
195+
"-x",
196+
"--use_xdis",
197+
help="Use xdis to serialize bytecode",
198+
action="store_true",
199+
)
200+
parser.add_argument(
201+
"--headers",
202+
help="List of specific code object params to test, defaults to all parameters. Should be 'co_*', for example, 'co_lines'",
203+
nargs="*",
204+
)
205+
parser.add_argument(
206+
"--skip_insts",
207+
help="Do not test accuracy of instructions",
208+
action="store_false",
209+
)
210+
parser.add_argument("pyc", help="PYC file to serialize.")
211+
args = parser.parse_args()
212+
213+
# verify pyc path
214+
pyc_path = Path(args.pyc)
215+
assert pyc_path.exists(), "PYC does not exist"
216+
217+
# setup logger
218+
logging.basicConfig(format="%(levelname)s: %(message)s")
219+
220+
serialize_pyc(pyc_path, args.use_xdis, headers=args.headers if args.headers else None, serialize_insts=args.skip_insts)

0 commit comments

Comments
 (0)