diff --git a/test_crossversion/.gitignore b/test_crossversion/.gitignore new file mode 100644 index 00000000..c4a44ac8 --- /dev/null +++ b/test_crossversion/.gitignore @@ -0,0 +1,3 @@ +templates/ +**/__pycache__/ +*.pyc diff --git a/test_crossversion/Makefile b/test_crossversion/Makefile new file mode 100644 index 00000000..abc5d6ac --- /dev/null +++ b/test_crossversion/Makefile @@ -0,0 +1,43 @@ +.PHONY: clean compile prepare test get_sources + +SOURCE=./templates/source/ +COMPILED=./templates/compiled/ +SERIALIZED=./templates/serialized/ + +# usage +define helptext +Crossversion xdis test usage: + help | usage : show this menu + clean : remove compiled and serialized files + compile : with each tox env, compile all sources in $(SOURCE) to $(COMPILED), then serialize with dis to $(SERIALIZED) + prepare : clean then compile + get_sources : symlink all .py files in ./ -> $(SOURCE) + test : prepare and run tests. with each tox env, serialize pyc's in $(COMPILED) with xdis, then check against corresponding serialized pyc in $(SERIALIZED) +endef +export helptext + +help: + @echo "$$helptext" + +usage: help + +# clean compiled files +clean: + find . -name "*.pyc" -delete + find . -name "__pycache__" -type d -delete + rm -rf $(COMPILED)/* + rm -rf $(SERIALIZED)/* + +# compile sources in templates/source +compile: + tox -c ./tox_prepare.ini + +prepare: clean compile + +# sim link python source files to ./templates/source +get_sources: + cp -f *.py $(SOURCE) + +# main test +test: get_sources prepare + tox diff --git a/test_crossversion/USAGE.md b/test_crossversion/USAGE.md new file mode 100644 index 00000000..9de7121b --- /dev/null +++ b/test_crossversion/USAGE.md @@ -0,0 +1,16 @@ +# Automated crossversion testing +This testing suite is used for automatic testing of differences found between xdis and dis. +This is done by having a way to identically "serialize" important attributes in xdis and dis bytecodes. +We then can check a diff between a serialized xdis and dis bytecode to find if xdis is parsing something incorrectly. +Most tests should be ran using the makefile. + +# System Requirements +- `pyenv` and `pyenv-virtualenv` + - Each version needing to be tested should be installed with pyenv. +- `tox` + +# Usage +## Makefile +Run `make` or `make help` to show the help menu for running and preparing tests. + +To simply run tests, `make test` will copy some sources, prepare template files, and run tests. diff --git a/test_crossversion/config/__init__.py b/test_crossversion/config/__init__.py new file mode 100644 index 00000000..43d0cd91 --- /dev/null +++ b/test_crossversion/config/__init__.py @@ -0,0 +1,22 @@ +from configparser import ConfigParser +from pathlib import Path +from sys import version_info + +# main test root dir +_test_path = Path(__file__).parent.parent + +# system version of python +SYS_VERSION = f"{version_info.major}.{version_info.minor}" + +# template dirs +TEMPLATE_DIR = _test_path / "templates" +TEMPLATE_SOURCE_DIR = TEMPLATE_DIR / "source" +TEMPLATE_COMPILED_DIR = TEMPLATE_DIR / "compiled" +TEMPLATE_SERIALIZED_DIR = TEMPLATE_DIR / "serialized" + +# check dirs and make them if needed +_check_dir = lambda dir: dir.mkdir() if not dir.exists() else True +_check_dir(TEMPLATE_DIR) +_check_dir(TEMPLATE_SOURCE_DIR) +_check_dir(TEMPLATE_COMPILED_DIR) +_check_dir(TEMPLATE_SERIALIZED_DIR) diff --git a/test_crossversion/prepare_templates.py b/test_crossversion/prepare_templates.py new file mode 100644 index 00000000..96d1dc69 --- /dev/null +++ b/test_crossversion/prepare_templates.py @@ -0,0 +1,47 @@ +from py_compile import compile + +from config import ( + SYS_VERSION, + TEMPLATE_COMPILED_DIR, + TEMPLATE_SERIALIZED_DIR, + TEMPLATE_SOURCE_DIR, +) +from serialize_bytecode import serialize_pyc + + +def prepare_templates(): + """ + Compile files in template source dir, then serialize with dis + Intermediary steps are saved in respective folders in templates / / + """ + # create folders to save pyc's + compiled_dir = TEMPLATE_COMPILED_DIR / SYS_VERSION + serialized_dir = TEMPLATE_SERIALIZED_DIR / SYS_VERSION + if not compiled_dir.exists(): + compiled_dir.mkdir() + if not serialized_dir.exists(): + serialized_dir.mkdir() + + # compile and serialize template files + num_source = 0 + for source in TEMPLATE_SOURCE_DIR.glob("*.py"): + + # create paths + pyc_file = compiled_dir / f"{source.stem}_{SYS_VERSION}.pyc" + serialized_file = serialized_dir / f"{source.stem}_{SYS_VERSION}.txt" + + # compile pyc + compile(str(source), str(pyc_file)) + print(f"Compiled {str(source)} -> {str(pyc_file)}") + + # serialize pyc + with serialized_file.open("w") as f: + serialize_pyc(pyc_file, False, f) + print(f"Serialized {str(pyc_file)} -> {str(serialized_file)}") + num_source += 1 + + print(f"{num_source} files compiled and serialized") + + +if __name__ == "__main__": + prepare_templates() diff --git a/test_crossversion/serialize_bytecode.py b/test_crossversion/serialize_bytecode.py new file mode 100644 index 00000000..d4bc83a7 --- /dev/null +++ b/test_crossversion/serialize_bytecode.py @@ -0,0 +1,165 @@ +from __future__ import annotations + +import argparse +import sys +from pathlib import Path +from typing import Callable, TextIO + +import xdis +from xdis import disassemble_file, iscode + +# Util to format shorthand code obj name +# Used so we do not compare memory addrs +_fmt_codeobj = lambda co: f"" + + +def _iter_nested_bytecodes(bytecode, bytecode_constructor: Callable): + """ + iterate over a bytecode and its child bytecodes + bytecode: bytecode object to iterate, will be yielded on first call + bytecode_constructor: constructor to create child bytecodes with + """ + bc_stack = [bytecode] + while bc_stack: + bc = bc_stack.pop() + bc_stack.extend( + bytecode_constructor(obj) for obj in bc.codeobj.co_consts if iscode(obj) + ) + yield bc + + +def _format_headers(bytecode) -> str: + """Format important headers (attrs) of bytecode.""" + + # TODO add an automated way to filter attrs not used in dis that may be present in xdis + # simple solution may just be a header in a serialized pyc stating what is being saved + + # headers of the codeobj to serialize + headers_to_serialize = [ + "co_argcount", + "co_cellvars", + "co_code", + "co_consts", + "co_firstlineno", + "co_flags", + "co_freevars", + "co_kwonlyargcount", + "co_linetable", + # "co_lnotab", # not in dis >3.11, see todo above + "co_name", + "co_names", + "co_nlocals", + "co_posonlyargcount", + "co_stacksize", + "co_varnames", + ] + # default format for each attr + header_fmt = "{name} : {val}" + + # format headers + formatted_headers = [] + for attr in headers_to_serialize: + if not hasattr(bytecode.codeobj, attr): + print(f"Warning: Codeobj missing test_attr {attr}") + continue + val = getattr(bytecode.codeobj, attr) + # filter code objects in co_consts + if attr == "co_consts": + val = [ + f" str: + """Format all instructions in given bytecode.""" + # TODO revisit ignoring argrepr and argvals in tests + # we are ignoring argrepr and val for now, as xdis will sometimes include additional info there + + # default format for each instruction + inst_fmt = "{inst.opcode} {inst.opname} : {inst.arg} {argval}" + insts = [] + for inst in bytecode: + # skip cache + if inst.opname == "CACHE": + continue + # filter and format argvals + if iscode(inst.argval): + argval = _fmt_codeobj(inst.argval) + insts.append(inst_fmt.format(inst=inst, argval=argval)) + else: + insts.append(inst_fmt.format(inst=inst, argval=inst.argval)) + + return "\n".join(insts) + + +def format_bytecode(bytecode) -> str: + """Create complete formatted string of bytecode.""" + outstr = f"BYTECODE {bytecode.codeobj.co_name}\n" + outstr += "ATTRS:\n" + outstr += _format_headers(bytecode) + "\n" + outstr += "INSTS:\n" + outstr += _format_insts(bytecode) + "\n" + return outstr + + +def serialize_pyc( + pyc: Path, use_xdis: bool = False, output_file: TextIO | None = sys.stdout +) -> str: + """Serialize a pyc to text for testing, using dis or xdis.""" + + # create a code object in xdis or dis, and a constructor to make bytecodes with + if use_xdis: + # write to null so no disassembly output + from os import devnull + + with open(devnull, "w") as fnull: + # create xdis code obj + (_, code_object, version_tuple, _, _, is_pypy, _, _) = disassemble_file( + str(pyc), fnull + ) + # get corresponding opcode class + opc = xdis.get_opcode(version_tuple, is_pypy, None) + # create xdis bytecode constructor + bytecode_constructor = lambda codeobj: xdis.Bytecode(codeobj, opc) + else: + import dis + import marshal + + # load code obj + code_object = marshal.loads(pyc.read_bytes()[16:]) + # create dis bytecode constructor + bytecode_constructor = lambda codeobj: dis.Bytecode(codeobj) + + # iter bytecodes + formatted_bytecodes = [] + init_bytecode = bytecode_constructor(code_object) + for bc in _iter_nested_bytecodes(init_bytecode, bytecode_constructor): + formatted_bytecodes.append(format_bytecode(bc)) + + # write formatted bytecodes + full_formatted_bytecode = "\n".join(formatted_bytecodes) + if output_file: + output_file.write(full_formatted_bytecode) + + return full_formatted_bytecode + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(prog="serialize_bytecode") + parser.add_argument( + "-x", + "--use_xdis", + help="Use xdis to serialize bytecode", + action="store_true", + ) + parser.add_argument("pyc", help="PYC file to serialize.") + args = parser.parse_args() + + pyc_path = Path(args.pyc) + assert pyc_path.exists(), "PYC does not exist" + + serialize_pyc(pyc_path, args.use_xdis) diff --git a/test_crossversion/test_xdis.py b/test_crossversion/test_xdis.py new file mode 100644 index 00000000..ce0a0e49 --- /dev/null +++ b/test_crossversion/test_xdis.py @@ -0,0 +1,62 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Iterable + +from config import SYS_VERSION, TEMPLATE_COMPILED_DIR, TEMPLATE_SERIALIZED_DIR +from serialize_bytecode import serialize_pyc + + +class SerializedTestCase: + """Test case for comparing a disassembled xdis and dis pyc, Needs a pyc to + disassemble with xdis then serialize, and a dis serialized pyc txt file.""" + + pyc_path: Path + serialized_txt_path: Path + serialized_dis: str + serialized_xdis: str + message: str + + def __init__(self, pyc: Path, serialized_txt: Path): + self.pyc_path = pyc + self.serialized_txt_path = serialized_txt + self.serialized_dis = serialized_txt.read_text() + self.serialized_xdis = serialize_pyc(pyc, use_xdis=True, output_file=None) + self.message = ( + f"Checking equivalence: {self.pyc_path} <---> {self.serialized_txt_path}" + ) + + +def get_tests_by_version(v: str) -> Iterable[SerializedTestCase]: + """Iterate test cases from Template folder with given version v.""" + compiled_tests_dir = Path(TEMPLATE_COMPILED_DIR / v) + serialized_tests_dir = Path(TEMPLATE_SERIALIZED_DIR / v) + assert compiled_tests_dir.exists() + assert serialized_tests_dir.exists() + + for compiled_test in compiled_tests_dir.glob("*"): + test_stem = compiled_test.stem + serialized_test = Path(serialized_tests_dir / (test_stem + ".txt")) + + # check test case pair + assert serialized_test.exists() and compiled_test.exists() + yield SerializedTestCase(compiled_test, serialized_test) + + +def get_versions() -> Iterable[str]: + """Get test versions by iterating through dirs in template compiled dir.""" + for dir in TEMPLATE_COMPILED_DIR.glob("*"): + if dir.is_dir(): + yield dir.name + + +def test_all_versions(): + """Test each version in compiled template folder.""" + for v in get_versions(): + print(f"=== {SYS_VERSION}: Testing version {v} ===") + for case in get_tests_by_version(v): + assert case.serialized_dis.splitlines() == case.serialized_xdis.splitlines() + + +if __name__ == "__main__": + test_all_versions() diff --git a/test_crossversion/tox.ini b/test_crossversion/tox.ini new file mode 100644 index 00000000..a5bcaf67 --- /dev/null +++ b/test_crossversion/tox.ini @@ -0,0 +1,13 @@ +[tox] +min_version = 4.0 +requires = tox-pyenv-redux +# ENV LIST MUST BE COMMA SEPARATED LIST OF PYTHON VERSIONS +env_list = 3.9, 3.10, 3.11, 3.12, 3.13 + +[testenv] +description = Check all permutations of python dis code objects with xdis code objects. +deps = + -e=file:///{toxinidir}/../. + pytest +commands = + pytest {tty:--color=yes} -s {posargs} ./test_xdis.py diff --git a/test_crossversion/tox_prepare.ini b/test_crossversion/tox_prepare.ini new file mode 100644 index 00000000..f403eafa --- /dev/null +++ b/test_crossversion/tox_prepare.ini @@ -0,0 +1,13 @@ +[tox] +min_version = 4.0 +requires = tox-pyenv-redux +# ENV LIST MUST BE COMMA SEPARATED LIST OF PYTHON VERSIONS +env_list = 3.9, 3.10, 3.11, 3.12, 3.13 + +[testenv] +description = Compile and serialize source templates with dis +deps = + -e=file:///{toxinidir}/../. + pytest # not needed but speeds up env creation +commands = + python ./prepare_templates.py