Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions test_crossversion/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
templates/
**/__pycache__/
*.pyc
43 changes: 43 additions & 0 deletions test_crossversion/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
.PHONY: clean compile prepare test get_sources

SOURCE=./templates/source/
COMPILED=./templates/compiled/
SERIALIZED=./templates/serialized/

# usage
define helptext
Crossversion xdis test usage:
help | usage : show this menu
clean : remove compiled and serialized files
compile : with each tox env, compile all sources in $(SOURCE) to $(COMPILED), then serialize with dis to $(SERIALIZED)
prepare : clean then compile
get_sources : symlink all .py files in ./ -> $(SOURCE)
test : prepare and run tests. with each tox env, serialize pyc's in $(COMPILED)<version> with xdis, then check against corresponding serialized pyc in $(SERIALIZED)<version>
endef
export helptext

help:
@echo "$$helptext"

usage: help

# clean compiled files
clean:
find . -name "*.pyc" -delete
find . -name "__pycache__" -type d -delete
rm -rf $(COMPILED)/*
rm -rf $(SERIALIZED)/*

# compile sources in templates/source
compile:
tox -c ./tox_prepare.ini

prepare: clean compile

# sim link python source files to ./templates/source
get_sources:
cp -f *.py $(SOURCE)

# main test
test: get_sources prepare
tox
16 changes: 16 additions & 0 deletions test_crossversion/USAGE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Automated crossversion testing
This testing suite is used for automatic testing of differences found between xdis and dis.
This is done by having a way to identically "serialize" important attributes in xdis and dis bytecodes.
We then can check a diff between a serialized xdis and dis bytecode to find if xdis is parsing something incorrectly.
Most tests should be ran using the makefile.

# System Requirements
- `pyenv` and `pyenv-virtualenv`
- Each version needing to be tested should be installed with pyenv.
- `tox`

# Usage
## Makefile
Run `make` or `make help` to show the help menu for running and preparing tests.

To simply run tests, `make test` will copy some sources, prepare template files, and run tests.
22 changes: 22 additions & 0 deletions test_crossversion/config/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from configparser import ConfigParser
from pathlib import Path
from sys import version_info

# main test root dir
_test_path = Path(__file__).parent.parent

# system version of python
SYS_VERSION = f"{version_info.major}.{version_info.minor}"

# template dirs
TEMPLATE_DIR = _test_path / "templates"
TEMPLATE_SOURCE_DIR = TEMPLATE_DIR / "source"
TEMPLATE_COMPILED_DIR = TEMPLATE_DIR / "compiled"
TEMPLATE_SERIALIZED_DIR = TEMPLATE_DIR / "serialized"

# check dirs and make them if needed
_check_dir = lambda dir: dir.mkdir() if not dir.exists() else True
_check_dir(TEMPLATE_DIR)
_check_dir(TEMPLATE_SOURCE_DIR)
_check_dir(TEMPLATE_COMPILED_DIR)
_check_dir(TEMPLATE_SERIALIZED_DIR)
47 changes: 47 additions & 0 deletions test_crossversion/prepare_templates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from py_compile import compile

from config import (
SYS_VERSION,
TEMPLATE_COMPILED_DIR,
TEMPLATE_SERIALIZED_DIR,
TEMPLATE_SOURCE_DIR,
)
from serialize_bytecode import serialize_pyc


def prepare_templates():
"""
Compile files in template source dir, then serialize with dis
Intermediary steps are saved in respective folders in templates / <compiled|serialized> / <version>
"""
# create folders to save pyc's
compiled_dir = TEMPLATE_COMPILED_DIR / SYS_VERSION
serialized_dir = TEMPLATE_SERIALIZED_DIR / SYS_VERSION
if not compiled_dir.exists():
compiled_dir.mkdir()
if not serialized_dir.exists():
serialized_dir.mkdir()

# compile and serialize template files
num_source = 0
for source in TEMPLATE_SOURCE_DIR.glob("*.py"):

# create paths
pyc_file = compiled_dir / f"{source.stem}_{SYS_VERSION}.pyc"
serialized_file = serialized_dir / f"{source.stem}_{SYS_VERSION}.txt"

# compile pyc
compile(str(source), str(pyc_file))
print(f"Compiled {str(source)} -> {str(pyc_file)}")

# serialize pyc
with serialized_file.open("w") as f:
serialize_pyc(pyc_file, False, f)
print(f"Serialized {str(pyc_file)} -> {str(serialized_file)}")
num_source += 1

print(f"{num_source} files compiled and serialized")


if __name__ == "__main__":
prepare_templates()
165 changes: 165 additions & 0 deletions test_crossversion/serialize_bytecode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
from __future__ import annotations

import argparse
import sys
from pathlib import Path
from typing import Callable, TextIO

import xdis
from xdis import disassemble_file, iscode

# Util to format shorthand code obj name
# Used so we do not compare memory addrs
_fmt_codeobj = lambda co: f"<codeobj {co.co_name}>"


def _iter_nested_bytecodes(bytecode, bytecode_constructor: Callable):
"""
iterate over a bytecode and its child bytecodes
bytecode: bytecode object to iterate, will be yielded on first call
bytecode_constructor: constructor to create child bytecodes with
"""
bc_stack = [bytecode]
while bc_stack:
bc = bc_stack.pop()
bc_stack.extend(
bytecode_constructor(obj) for obj in bc.codeobj.co_consts if iscode(obj)
)
yield bc


def _format_headers(bytecode) -> str:
"""Format important headers (attrs) of bytecode."""

# TODO add an automated way to filter attrs not used in dis that may be present in xdis
# simple solution may just be a header in a serialized pyc stating what is being saved

# headers of the codeobj to serialize
headers_to_serialize = [
"co_argcount",
"co_cellvars",
"co_code",
"co_consts",
"co_firstlineno",
"co_flags",
"co_freevars",
"co_kwonlyargcount",
"co_linetable",
# "co_lnotab", # not in dis >3.11, see todo above
"co_name",
"co_names",
"co_nlocals",
"co_posonlyargcount",
"co_stacksize",
"co_varnames",
]
# default format for each attr
header_fmt = "{name} : {val}"

# format headers
formatted_headers = []
for attr in headers_to_serialize:
if not hasattr(bytecode.codeobj, attr):
print(f"Warning: Codeobj missing test_attr {attr}")
continue
val = getattr(bytecode.codeobj, attr)
# filter code objects in co_consts
if attr == "co_consts":
val = [
f"<codeobj {const.co_name}" if iscode(const) else const for const in val
]
# format header string
formatted_headers.append(header_fmt.format(name=attr[3:], val=val))

return "\n".join(formatted_headers)


def _format_insts(bytecode) -> str:
"""Format all instructions in given bytecode."""
# TODO revisit ignoring argrepr and argvals in tests
# we are ignoring argrepr and val for now, as xdis will sometimes include additional info there

# default format for each instruction
inst_fmt = "{inst.opcode} {inst.opname} : {inst.arg} {argval}"
insts = []
for inst in bytecode:
# skip cache
if inst.opname == "CACHE":
continue
# filter and format argvals
if iscode(inst.argval):
argval = _fmt_codeobj(inst.argval)
insts.append(inst_fmt.format(inst=inst, argval=argval))
else:
insts.append(inst_fmt.format(inst=inst, argval=inst.argval))

return "\n".join(insts)


def format_bytecode(bytecode) -> str:
"""Create complete formatted string of bytecode."""
outstr = f"BYTECODE {bytecode.codeobj.co_name}\n"
outstr += "ATTRS:\n"
outstr += _format_headers(bytecode) + "\n"
outstr += "INSTS:\n"
outstr += _format_insts(bytecode) + "\n"
return outstr


def serialize_pyc(
pyc: Path, use_xdis: bool = False, output_file: TextIO | None = sys.stdout
) -> str:
"""Serialize a pyc to text for testing, using dis or xdis."""

# create a code object in xdis or dis, and a constructor to make bytecodes with
if use_xdis:
# write to null so no disassembly output
from os import devnull

with open(devnull, "w") as fnull:
# create xdis code obj
(_, code_object, version_tuple, _, _, is_pypy, _, _) = disassemble_file(
str(pyc), fnull
)
# get corresponding opcode class
opc = xdis.get_opcode(version_tuple, is_pypy, None)
# create xdis bytecode constructor
bytecode_constructor = lambda codeobj: xdis.Bytecode(codeobj, opc)
else:
import dis
import marshal

# load code obj
code_object = marshal.loads(pyc.read_bytes()[16:])
# create dis bytecode constructor
bytecode_constructor = lambda codeobj: dis.Bytecode(codeobj)

# iter bytecodes
formatted_bytecodes = []
init_bytecode = bytecode_constructor(code_object)
for bc in _iter_nested_bytecodes(init_bytecode, bytecode_constructor):
formatted_bytecodes.append(format_bytecode(bc))

# write formatted bytecodes
full_formatted_bytecode = "\n".join(formatted_bytecodes)
if output_file:
output_file.write(full_formatted_bytecode)

return full_formatted_bytecode


if __name__ == "__main__":
parser = argparse.ArgumentParser(prog="serialize_bytecode")
parser.add_argument(
"-x",
"--use_xdis",
help="Use xdis to serialize bytecode",
action="store_true",
)
parser.add_argument("pyc", help="PYC file to serialize.")
args = parser.parse_args()

pyc_path = Path(args.pyc)
assert pyc_path.exists(), "PYC does not exist"

serialize_pyc(pyc_path, args.use_xdis)
62 changes: 62 additions & 0 deletions test_crossversion/test_xdis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from __future__ import annotations

from pathlib import Path
from typing import Iterable

from config import SYS_VERSION, TEMPLATE_COMPILED_DIR, TEMPLATE_SERIALIZED_DIR
from serialize_bytecode import serialize_pyc


class SerializedTestCase:
"""Test case for comparing a disassembled xdis and dis pyc, Needs a pyc to
disassemble with xdis then serialize, and a dis serialized pyc txt file."""

pyc_path: Path
serialized_txt_path: Path
serialized_dis: str
serialized_xdis: str
message: str

def __init__(self, pyc: Path, serialized_txt: Path):
self.pyc_path = pyc
self.serialized_txt_path = serialized_txt
self.serialized_dis = serialized_txt.read_text()
self.serialized_xdis = serialize_pyc(pyc, use_xdis=True, output_file=None)
self.message = (
f"Checking equivalence: {self.pyc_path} <---> {self.serialized_txt_path}"
)


def get_tests_by_version(v: str) -> Iterable[SerializedTestCase]:
"""Iterate test cases from Template folder with given version v."""
compiled_tests_dir = Path(TEMPLATE_COMPILED_DIR / v)
serialized_tests_dir = Path(TEMPLATE_SERIALIZED_DIR / v)
assert compiled_tests_dir.exists()
assert serialized_tests_dir.exists()

for compiled_test in compiled_tests_dir.glob("*"):
test_stem = compiled_test.stem
serialized_test = Path(serialized_tests_dir / (test_stem + ".txt"))

# check test case pair
assert serialized_test.exists() and compiled_test.exists()
yield SerializedTestCase(compiled_test, serialized_test)


def get_versions() -> Iterable[str]:
"""Get test versions by iterating through dirs in template compiled dir."""
for dir in TEMPLATE_COMPILED_DIR.glob("*"):
if dir.is_dir():
yield dir.name


def test_all_versions():
"""Test each version in compiled template folder."""
for v in get_versions():
print(f"=== {SYS_VERSION}: Testing version {v} ===")
for case in get_tests_by_version(v):
assert case.serialized_dis.splitlines() == case.serialized_xdis.splitlines()


if __name__ == "__main__":
test_all_versions()
13 changes: 13 additions & 0 deletions test_crossversion/tox.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[tox]
min_version = 4.0
requires = tox-pyenv-redux
# ENV LIST MUST BE COMMA SEPARATED LIST OF PYTHON VERSIONS
env_list = 3.9, 3.10, 3.11, 3.12, 3.13

[testenv]
description = Check all permutations of python dis code objects with xdis code objects.
deps =
-e=file:///{toxinidir}/../.
pytest
commands =
pytest {tty:--color=yes} -s {posargs} ./test_xdis.py
13 changes: 13 additions & 0 deletions test_crossversion/tox_prepare.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[tox]
min_version = 4.0
requires = tox-pyenv-redux
# ENV LIST MUST BE COMMA SEPARATED LIST OF PYTHON VERSIONS
env_list = 3.9, 3.10, 3.11, 3.12, 3.13

[testenv]
description = Compile and serialize source templates with dis
deps =
-e=file:///{toxinidir}/../.
pytest # not needed but speeds up env creation
commands =
python ./prepare_templates.py
Loading