From eaeae09f8258368089de95e8e7d681326be2d4f5 Mon Sep 17 00:00:00 2001 From: Joel Flores Date: Wed, 20 Nov 2024 14:58:13 -0600 Subject: [PATCH 1/9] to fix python 3.12 line table issues. We do this by turning the python 3.13 object to a python 3.12 object since both of those versions do the same thing. Also added some stuff to fix some control flow issues in 3.13 as there are some cache instructions that mess up some targets --- xdis/bytecode.py | 43 ++++-- xdis/codetype/__init__.py | 20 +-- xdis/codetype/code312.py | 275 ++++++++++++++++++++++++++++++++++++++ xdis/load.py | 12 +- 4 files changed, 324 insertions(+), 26 deletions(-) create mode 100644 xdis/codetype/code312.py diff --git a/xdis/bytecode.py b/xdis/bytecode.py index 3722ffac..fbd37111 100644 --- a/xdis/bytecode.py +++ b/xdis/bytecode.py @@ -201,7 +201,8 @@ def parse_exception_table(exception_table: bytes): dl = _parse_varint(iterator) depth = dl >> 1 lasti = bool(dl & 1) - entries.append(_ExceptionTableEntry(start, end, target, depth, lasti)) + entries.append(_ExceptionTableEntry( + start, end, target, depth, lasti)) except StopIteration: return entries @@ -323,7 +324,8 @@ def get_instructions_bytes( if arg & 1: argrepr = "NULL|self + " + argrepr elif ( - opc.version_tuple >= (3, 12) and opc.opname[op] == "LOAD_SUPER_ATTR" + opc.version_tuple >= ( + 3, 12) and opc.opname[op] == "LOAD_SUPER_ATTR" ): argval, argrepr = _get_name_info(arg >> 2, names) if arg & 1: @@ -333,6 +335,12 @@ def get_instructions_bytes( elif op in opc.JREL_OPS: signed_arg = -arg if "JUMP_BACKWARD" in opc.opname[op] else arg argval = i + get_jump_val(signed_arg, opc.python_version) + # deal with cache instructions in python 3.13 + if opc.version_tuple >= (3, 13): + if opc.opname[op] in ["POP_JUMP_IF_TRUE", "POP_JUMP_IF_FALSE", "POP_JUMP_IF_NONE", "POP_JUMP_IF_NOT_NONE"]: + argval += 2 + elif opc.opname[op] == 'JUMP_BACKWARD': # might be sus + argval -= 2 # FOR_ITER has a cache instruction in 3.12 if opc.version_tuple >= (3, 12) and opc.opname[op] == "FOR_ITER": argval += 2 @@ -344,8 +352,10 @@ def get_instructions_bytes( if opc.version_tuple >= (3, 13) and opc.opname[op] in ("LOAD_FAST_LOAD_FAST", "STORE_FAST_LOAD_FAST", "STORE_FAST_STORE_FAST"): arg1 = arg >> 4 arg2 = arg & 15 - argval1, argrepr1 = _get_name_info(arg1, (varnames or tuple()) + (cells or tuple())) - argval2, argrepr2 = _get_name_info(arg2, (varnames or tuple()) + (cells or tuple())) + argval1, argrepr1 = _get_name_info( + arg1, (varnames or tuple()) + (cells or tuple())) + argval2, argrepr2 = _get_name_info( + arg2, (varnames or tuple()) + (cells or tuple())) argval = argval1, argval2 argrepr = argrepr1 + ", " + argrepr2 elif opc.version_tuple >= (3, 11): @@ -362,10 +372,10 @@ def get_instructions_bytes( else: argval, argrepr = _get_name_info(arg, cells) elif op in opc.COMPARE_OPS: - if opc.python_version >= (3,13): + if opc.python_version >= (3, 13): # The fifth-lowest bit of the oparg now indicates a forced conversion to bool. argval = (opc.cmp_op[arg >> 5]) - elif opc.python_version >= (3,12): + elif opc.python_version >= (3, 12): argval = (opc.cmp_op[arg >> 4]) else: argval = (opc.cmp_op[arg]) @@ -374,10 +384,12 @@ def get_instructions_bytes( opname = opc.opname[op] if python_36 and opname in ("CALL_FUNCTION", "CALL_FUNCTION_EX"): if opname == "CALL_FUNCTION": - argrepr = format_CALL_FUNCTION(code2num(bytecode, i - 1)) + argrepr = format_CALL_FUNCTION( + code2num(bytecode, i - 1)) else: assert opname == "CALL_FUNCTION_EX" - argrepr = format_CALL_FUNCTION_EX(code2num(bytecode, i - 1)) + argrepr = format_CALL_FUNCTION_EX( + code2num(bytecode, i - 1)) else: if not ( python_36 @@ -396,7 +408,8 @@ def get_instructions_bytes( argrepr = opc.opcode_arg_fmt[opc.opname[op]](arg) opname = opc.opname[op] - inst_size = instruction_size(op, opc) + (extended_arg_count * extended_arg_size) + inst_size = instruction_size( + op, opc) + (extended_arg_count * extended_arg_size) # fallthrough = op not in opc.nofollow start_offset = offset if opc.oppop[op] == 0 else None @@ -465,7 +478,8 @@ def __init__(self, x, opc, first_line=None, current_offset=None, dup_lines=True) self.current_offset = current_offset if opc.version_tuple >= (3, 11) and hasattr(co, "co_exceptiontable"): - self.exception_entries = parse_exception_table(co.co_exceptiontable) + self.exception_entries = parse_exception_table( + co.co_exceptiontable) else: self.exception_entries = None @@ -517,7 +531,8 @@ def dis(self, asm_format="classic", show_source=False): cells = None line_starts = None - first_line_number = co.co_firstlineno if hasattr(co, "co_firstlineno") else None + first_line_number = co.co_firstlineno if hasattr( + co, "co_firstlineno") else None if inspect.iscode(co): filename = inspect.getfile(co) @@ -573,7 +588,8 @@ def disassemble_bytes( exception_entries=None, ) -> list: # Omit the line number column entirely if we have no line number info - show_lineno = line_starts is not None or self.opc.version_tuple < (2, 3) + show_lineno = line_starts is not None or self.opc.version_tuple < ( + 2, 3) show_source = show_source and show_lineno and first_line_number and filename def show_source_text(line_number: Optional[int]): @@ -777,7 +793,8 @@ def list2bytecode(inst_list: Iterable, opc, varnames, consts): operands = opcodes[1:] if opname not in opc.opname: raise TypeError( - "error at item %d [%s, %s], opcode not valid" % (i, opname, operands) + "error at item %d [%s, %s], opcode not valid" % ( + i, opname, operands) ) opcode = opc.opmap[opname] bc.append(opcode) diff --git a/xdis/codetype/__init__.py b/xdis/codetype/__init__.py index a89d0dc5..3a2bd170 100644 --- a/xdis/codetype/__init__.py +++ b/xdis/codetype/__init__.py @@ -29,9 +29,9 @@ from xdis.codetype.code310 import Code310 from xdis.codetype.code311 import Code311, Code311FieldNames from xdis.codetype.code313 import Code313 +from xdis.codetype.code312 import Code312 from xdis.version_info import PYTHON_VERSION_TRIPLE - def codeType2Portable(code, version_tuple=PYTHON_VERSION_TRIPLE): """Converts a native types.CodeType code object into a corresponding more flexible xdis Code type. @@ -42,7 +42,8 @@ def codeType2Portable(code, version_tuple=PYTHON_VERSION_TRIPLE): raise TypeError( f"parameter expected to be a types.CodeType type; is {type(code)} instead" ) - line_table_field = "co_lnotab" if hasattr(code, "co_lnotab") else "co_linetable" + line_table_field = "co_lnotab" if hasattr( + code, "co_lnotab") else "co_linetable" line_table = getattr(code, line_table_field) if version_tuple >= (3, 0): if version_tuple < (3, 8): @@ -101,7 +102,7 @@ def codeType2Portable(code, version_tuple=PYTHON_VERSION_TRIPLE): co_firstlineno=code.co_firstlineno, co_linetable=line_table, ) - elif version_tuple[:2] < (3,13): + elif version_tuple[:2] == (3, 11): return Code311( co_argcount=code.co_argcount, co_posonlyargcount=code.co_posonlyargcount, @@ -122,8 +123,8 @@ def codeType2Portable(code, version_tuple=PYTHON_VERSION_TRIPLE): co_linetable=line_table, co_exceptiontable=code.co_exceptiontable, ) - else: # version tuple >= 3, 13 - return Code313( + elif (version_tuple[:2] >= (3, 12)): # version tuple >= 3, 13 + return Code312( co_argcount=code.co_argcount, co_posonlyargcount=code.co_posonlyargcount, co_kwonlyargcount=code.co_kwonlyargcount, @@ -209,11 +210,12 @@ def portableCodeType(version_tuple=PYTHON_VERSION_TRIPLE): elif version_tuple[:2] == (3, 10): # 3.10 return Code310 - elif version_tuple[:2] < (3,13): - # 3.11 ... + elif version_tuple[:2] == (3, 11): + # 3.11 return Code311 - else: - return Code313 + elif version_tuple[:2] >= (3, 12): + #3.12 + return Code312 elif version_tuple > (2, 0): # 2.0 .. 2.7 return Code2 diff --git a/xdis/codetype/code312.py b/xdis/codetype/code312.py new file mode 100644 index 00000000..4cd5fe40 --- /dev/null +++ b/xdis/codetype/code312.py @@ -0,0 +1,275 @@ +# +import types +from copy import deepcopy + +from xdis.codetype.code311 import Code311, Code311FieldTypes +from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str + +from dataclasses import dataclass +from typing import Iterable, Iterator, Generator + +Code312FieldNames = Code311FieldTypes.copy() +Code312FieldTypes = deepcopy(Code311FieldTypes) + +##### NEW "OPAQUE" LINE TABLE PARSING ##### +# See: https://github.com/python/cpython/blob/aaed91cabcedc16c089c4b1c9abb1114659a83d3/Objects/codeobject.c#L1245C1-L1245C17 +PY_CODE_LOCATION_INFO_SHORT0 = 0 +PY_CODE_LOCATION_INFO_ONE_LINE0 = 10 +PY_CODE_LOCATION_INFO_ONE_LINE1 = 11 +PY_CODE_LOCATION_INFO_ONE_LINE2 = 12 + +PY_CODE_LOCATION_INFO_NO_COLUMNS = 13 +PY_CODE_LOCATION_INFO_LONG = 14 +PY_CODE_LOCATION_INFO_NONE = 15 + +@dataclass(frozen=True) +class LineTableEntry: + line_delta: int + code_delta: int + no_line_flag: bool + +def _scan_varint(remaining_linetable: Iterable[int]) -> int: + value = 0 + for shift, read in enumerate(remaining_linetable): + value |= (read & 63) << (shift * 6) + if not (read & 64): + break + return value + +def _scan_signed_varint(remaining_linetable: Iterable[int]) -> int: + value = _scan_varint(remaining_linetable) + if value & 1: + return -(value >> 1) + return value >> 1 + +def _get_line_delta(code_byte: int, remaining_linetable: Iterable[int]): + line_delta_code = (code_byte >> 3) & 15 + if line_delta_code == PY_CODE_LOCATION_INFO_NONE: + return 0 + if line_delta_code in (PY_CODE_LOCATION_INFO_NO_COLUMNS, PY_CODE_LOCATION_INFO_LONG): + return _scan_signed_varint(remaining_linetable) + if line_delta_code == PY_CODE_LOCATION_INFO_ONE_LINE0: + return 0 + if line_delta_code == PY_CODE_LOCATION_INFO_ONE_LINE1: + return 1 + if line_delta_code == PY_CODE_LOCATION_INFO_ONE_LINE2: + return 2 + return 0 + +def _is_no_line_marker(linetable_code_byte: int): + return (linetable_code_byte >> 3) == 0x1f + +def _next_code_delta(linetable_code_byte: int): + return ((linetable_code_byte & 7) + 1) * 2 + +def _test_check_bit(linetable_code_byte: int): + return bool(linetable_code_byte & 128) + +def _go_to_next_code_byte(remaining_linetable: Iterator[int]) -> int: + try: + while not _test_check_bit((code_byte := next(remaining_linetable))): + pass + except StopIteration: + return None + return code_byte + +def decode_linetable_entry(code_byte: int, remaining_linetable: Iterable[int]) -> LineTableEntry: + assert _test_check_bit(code_byte), "malformed linetable" + return LineTableEntry( + line_delta=_get_line_delta(code_byte=code_byte, remaining_linetable=remaining_linetable), + code_delta=_next_code_delta(linetable_code_byte=code_byte), + no_line_flag=_is_no_line_marker(linetable_code_byte=code_byte) + ) + +def parse_linetable(linetable: bytes, first_lineno: int): + + linetable_entries: list[LineTableEntry] = [] + + # decode linetable entries + iter_linetable = iter(linetable) + while (code_byte := _go_to_next_code_byte(iter_linetable)) is not None: + linetable_entries.append(decode_linetable_entry(code_byte=code_byte, remaining_linetable=iter_linetable)) + + if not linetable_entries: + return + + first_entry, *remaining_entries = linetable_entries + + # compute co_lines() + code_start: int = 0 + code_end: int = first_entry.code_delta + line: int = first_lineno + first_entry.line_delta + no_line_flag = first_entry.no_line_flag + for linetable_entry in remaining_entries: + if linetable_entry.line_delta != 0 or linetable_entry.no_line_flag != no_line_flag: + # if the line changes, emit the current entry + yield (code_start, code_end, None if no_line_flag else line) + + line += linetable_entry.line_delta + no_line_flag = linetable_entry.no_line_flag + code_start = code_end + code_end += linetable_entry.code_delta + + yield (code_start, code_end, None if no_line_flag else line) + +@dataclass(frozen=True) +class PositionEntry: + line_delta: int + num_lines: int + code_delta: int + column: int + endcolumn: int + no_line_flag: bool + +def decode_position_entry(code_byte: int, remaining_linetable: Iterator[int]) -> PositionEntry: + assert _test_check_bit(code_byte), "malformed linetable" + + code_delta = _next_code_delta(code_byte) + + no_line_flag = False + column = -1 + endcolumn = -1 + line_delta = 0 + num_lines = 0 + + location_flags = (code_byte >> 3) & 15 + if location_flags == PY_CODE_LOCATION_INFO_NONE: + no_line_flag = True + elif location_flags == PY_CODE_LOCATION_INFO_LONG: + line_delta = _scan_signed_varint(remaining_linetable) + num_lines = _scan_varint(remaining_linetable) + column = _scan_varint(remaining_linetable) - 1 + endcolumn = _scan_varint(remaining_linetable) - 1 + elif location_flags == PY_CODE_LOCATION_INFO_NO_COLUMNS: + line_delta = _scan_signed_varint(remaining_linetable) + elif location_flags in (PY_CODE_LOCATION_INFO_ONE_LINE0, PY_CODE_LOCATION_INFO_ONE_LINE1, PY_CODE_LOCATION_INFO_ONE_LINE2): + line_delta = location_flags - 10 + column = next(remaining_linetable) + endcolumn = next(remaining_linetable) + else: + second_byte = next(remaining_linetable) + assert not _test_check_bit(second_byte) + column = (location_flags << 3) | (second_byte >> 4) + endcolumn = column + (second_byte & 15) + + return PositionEntry( + line_delta=line_delta, + num_lines=num_lines, + code_delta=code_delta, + column=column, + endcolumn=endcolumn, + no_line_flag=no_line_flag + ) + +def parse_positions(linetable: bytes, first_lineno: int): + position_entries: list[PositionEntry] = [] + + # decode linetable entries + iter_linetable = iter(linetable) + try: + while (code_byte := next(iter_linetable)) is not None: + position_entries.append(decode_position_entry(code_byte=code_byte, remaining_linetable=iter_linetable)) + except StopIteration: + pass + + computed_line = first_lineno + for position_entry in position_entries: + computed_line += position_entry.line_delta + for _ in range(0, position_entry.code_delta, 2): + if position_entry.no_line_flag: + yield (None, None, None, None) + else: + yield (computed_line, computed_line + position_entry.num_lines, position_entry.column, position_entry.endcolumn) +##### + + +class Code312(Code311): + """ Class for a Python 3.12+ code object + New CPython "undocumented" changes make this necessary to parse the co_linetable with co_lines(). + See: https://github.com/python/cpython/blob/aaed91cabcedc16c089c4b1c9abb1114659a83d3/Objects/codeobject.c#L1245C1-L1245C17 + """ + + def __init__( + self, + co_argcount, + co_posonlyargcount, + co_kwonlyargcount, + co_nlocals, + co_stacksize, + co_flags, + co_consts, + co_code, + co_names, + co_varnames, + co_freevars, + co_cellvars, + co_filename, + co_name, + co_qualname, + co_firstlineno, + co_linetable, + co_exceptiontable, + ): + # Keyword argument parameters in the call below is more robust. + # Since things change around, robustness is good. + super(Code312, self).__init__( + co_argcount=co_argcount, + co_posonlyargcount=co_posonlyargcount, + co_kwonlyargcount=co_kwonlyargcount, + co_nlocals=co_nlocals, + co_stacksize=co_stacksize, + co_flags=co_flags, + co_consts=co_consts, + co_code=co_code, + co_names=co_names, + co_varnames=co_varnames, + co_freevars=co_freevars, + co_cellvars=co_cellvars, + co_filename=co_filename, + co_name=co_name, + co_qualname=co_qualname, + co_firstlineno=co_firstlineno, + co_linetable=co_linetable, + co_exceptiontable=co_exceptiontable, + ) + self.fieldtypes = Code312FieldTypes + if type(self) == Code312: + self.check() + + def to_native(self): + if not (PYTHON_VERSION_TRIPLE >= (3, 12)): + raise TypeError( + "Python Interpreter needs to be in 3.12 or greater; is %s" + % version_tuple_to_str() + ) + + code = deepcopy(self) + code.freeze() + try: + code.check() + except AssertionError as e: + raise TypeError(e) + + return types.CodeType( + code.co_argcount, + code.co_posonlyargcount, + code.co_kwonlyargcount, + code.co_nlocals, + code.co_stacksize, + code.co_flags, + code.co_code, + code.co_consts, + code.co_names, + code.co_varnames, + code.co_filename, + code.co_name, + code.co_qualname, + code.co_firstlineno, + code.co_linetable, + code.co_exceptiontable, + code.co_freevars, + code.co_cellvars, + ) + + def co_lines(self): + return parse_linetable(self.co_linetable, self.co_firstlineno) diff --git a/xdis/load.py b/xdis/load.py index 88dd8804..4f15cde2 100644 --- a/xdis/load.py +++ b/xdis/load.py @@ -74,7 +74,8 @@ def check_object_path(path) -> str: try: import importlib - bytecode_path = importlib.util.cache_from_source(path, optimization="") + bytecode_path = importlib.util.cache_from_source( + path, optimization="") if osp.exists(bytecode_path): return bytecode_path except Exception: @@ -94,7 +95,8 @@ def check_object_path(path) -> str: # It would be better to use a context manager function like WithNamedTemporary. # However we are seeing write errors when this is done in Windows. # So until this is resolved, we'll use mkstemp and explicitly do a close. - fd, path = tempfile.mkstemp(prefix=basename + "-", suffix=".pyc", text=False) + fd, path = tempfile.mkstemp( + prefix=basename + "-", suffix=".pyc", text=False) close(fd) py_compile.compile(spath, cfile=path, doraise=True) @@ -217,7 +219,8 @@ def load_module_from_file_object( tuple_version = magic_int2tuple(magic_int) except KeyError: if magic_int in (2657, 22138): - raise ImportError("This smells like Pyston which is not supported.") + raise ImportError( + "This smells like Pyston which is not supported.") if len(magic) >= 2: raise ImportError( @@ -329,7 +332,8 @@ def load_module_from_file_object( import traceback traceback.print_exc() - raise ImportError(f"Ill-formed bytecode file {filename}\n{kind}; {msg}") + raise ImportError( + f"Ill-formed bytecode file {filename}\n{kind}; {msg}") finally: fp.close() From b7e1b19dc7e121a748319fd343b904028b5df4ff Mon Sep 17 00:00:00 2001 From: Joel Flores Date: Wed, 20 Nov 2024 15:03:27 -0600 Subject: [PATCH 2/9] Revert "to fix python 3.12 line table issues. We do this by turning the python 3.13 object to a python 3.12 object since both of those versions do the same thing. Also added some stuff to fix some control flow issues in 3.13 as there are some cache instructions that mess up some targets" This reverts commit eaeae09f8258368089de95e8e7d681326be2d4f5. --- xdis/bytecode.py | 43 ++---- xdis/codetype/__init__.py | 20 ++- xdis/codetype/code312.py | 275 -------------------------------------- xdis/load.py | 12 +- 4 files changed, 26 insertions(+), 324 deletions(-) delete mode 100644 xdis/codetype/code312.py diff --git a/xdis/bytecode.py b/xdis/bytecode.py index fbd37111..3722ffac 100644 --- a/xdis/bytecode.py +++ b/xdis/bytecode.py @@ -201,8 +201,7 @@ def parse_exception_table(exception_table: bytes): dl = _parse_varint(iterator) depth = dl >> 1 lasti = bool(dl & 1) - entries.append(_ExceptionTableEntry( - start, end, target, depth, lasti)) + entries.append(_ExceptionTableEntry(start, end, target, depth, lasti)) except StopIteration: return entries @@ -324,8 +323,7 @@ def get_instructions_bytes( if arg & 1: argrepr = "NULL|self + " + argrepr elif ( - opc.version_tuple >= ( - 3, 12) and opc.opname[op] == "LOAD_SUPER_ATTR" + opc.version_tuple >= (3, 12) and opc.opname[op] == "LOAD_SUPER_ATTR" ): argval, argrepr = _get_name_info(arg >> 2, names) if arg & 1: @@ -335,12 +333,6 @@ def get_instructions_bytes( elif op in opc.JREL_OPS: signed_arg = -arg if "JUMP_BACKWARD" in opc.opname[op] else arg argval = i + get_jump_val(signed_arg, opc.python_version) - # deal with cache instructions in python 3.13 - if opc.version_tuple >= (3, 13): - if opc.opname[op] in ["POP_JUMP_IF_TRUE", "POP_JUMP_IF_FALSE", "POP_JUMP_IF_NONE", "POP_JUMP_IF_NOT_NONE"]: - argval += 2 - elif opc.opname[op] == 'JUMP_BACKWARD': # might be sus - argval -= 2 # FOR_ITER has a cache instruction in 3.12 if opc.version_tuple >= (3, 12) and opc.opname[op] == "FOR_ITER": argval += 2 @@ -352,10 +344,8 @@ def get_instructions_bytes( if opc.version_tuple >= (3, 13) and opc.opname[op] in ("LOAD_FAST_LOAD_FAST", "STORE_FAST_LOAD_FAST", "STORE_FAST_STORE_FAST"): arg1 = arg >> 4 arg2 = arg & 15 - argval1, argrepr1 = _get_name_info( - arg1, (varnames or tuple()) + (cells or tuple())) - argval2, argrepr2 = _get_name_info( - arg2, (varnames or tuple()) + (cells or tuple())) + argval1, argrepr1 = _get_name_info(arg1, (varnames or tuple()) + (cells or tuple())) + argval2, argrepr2 = _get_name_info(arg2, (varnames or tuple()) + (cells or tuple())) argval = argval1, argval2 argrepr = argrepr1 + ", " + argrepr2 elif opc.version_tuple >= (3, 11): @@ -372,10 +362,10 @@ def get_instructions_bytes( else: argval, argrepr = _get_name_info(arg, cells) elif op in opc.COMPARE_OPS: - if opc.python_version >= (3, 13): + if opc.python_version >= (3,13): # The fifth-lowest bit of the oparg now indicates a forced conversion to bool. argval = (opc.cmp_op[arg >> 5]) - elif opc.python_version >= (3, 12): + elif opc.python_version >= (3,12): argval = (opc.cmp_op[arg >> 4]) else: argval = (opc.cmp_op[arg]) @@ -384,12 +374,10 @@ def get_instructions_bytes( opname = opc.opname[op] if python_36 and opname in ("CALL_FUNCTION", "CALL_FUNCTION_EX"): if opname == "CALL_FUNCTION": - argrepr = format_CALL_FUNCTION( - code2num(bytecode, i - 1)) + argrepr = format_CALL_FUNCTION(code2num(bytecode, i - 1)) else: assert opname == "CALL_FUNCTION_EX" - argrepr = format_CALL_FUNCTION_EX( - code2num(bytecode, i - 1)) + argrepr = format_CALL_FUNCTION_EX(code2num(bytecode, i - 1)) else: if not ( python_36 @@ -408,8 +396,7 @@ def get_instructions_bytes( argrepr = opc.opcode_arg_fmt[opc.opname[op]](arg) opname = opc.opname[op] - inst_size = instruction_size( - op, opc) + (extended_arg_count * extended_arg_size) + inst_size = instruction_size(op, opc) + (extended_arg_count * extended_arg_size) # fallthrough = op not in opc.nofollow start_offset = offset if opc.oppop[op] == 0 else None @@ -478,8 +465,7 @@ def __init__(self, x, opc, first_line=None, current_offset=None, dup_lines=True) self.current_offset = current_offset if opc.version_tuple >= (3, 11) and hasattr(co, "co_exceptiontable"): - self.exception_entries = parse_exception_table( - co.co_exceptiontable) + self.exception_entries = parse_exception_table(co.co_exceptiontable) else: self.exception_entries = None @@ -531,8 +517,7 @@ def dis(self, asm_format="classic", show_source=False): cells = None line_starts = None - first_line_number = co.co_firstlineno if hasattr( - co, "co_firstlineno") else None + first_line_number = co.co_firstlineno if hasattr(co, "co_firstlineno") else None if inspect.iscode(co): filename = inspect.getfile(co) @@ -588,8 +573,7 @@ def disassemble_bytes( exception_entries=None, ) -> list: # Omit the line number column entirely if we have no line number info - show_lineno = line_starts is not None or self.opc.version_tuple < ( - 2, 3) + show_lineno = line_starts is not None or self.opc.version_tuple < (2, 3) show_source = show_source and show_lineno and first_line_number and filename def show_source_text(line_number: Optional[int]): @@ -793,8 +777,7 @@ def list2bytecode(inst_list: Iterable, opc, varnames, consts): operands = opcodes[1:] if opname not in opc.opname: raise TypeError( - "error at item %d [%s, %s], opcode not valid" % ( - i, opname, operands) + "error at item %d [%s, %s], opcode not valid" % (i, opname, operands) ) opcode = opc.opmap[opname] bc.append(opcode) diff --git a/xdis/codetype/__init__.py b/xdis/codetype/__init__.py index 3a2bd170..a89d0dc5 100644 --- a/xdis/codetype/__init__.py +++ b/xdis/codetype/__init__.py @@ -29,9 +29,9 @@ from xdis.codetype.code310 import Code310 from xdis.codetype.code311 import Code311, Code311FieldNames from xdis.codetype.code313 import Code313 -from xdis.codetype.code312 import Code312 from xdis.version_info import PYTHON_VERSION_TRIPLE + def codeType2Portable(code, version_tuple=PYTHON_VERSION_TRIPLE): """Converts a native types.CodeType code object into a corresponding more flexible xdis Code type. @@ -42,8 +42,7 @@ def codeType2Portable(code, version_tuple=PYTHON_VERSION_TRIPLE): raise TypeError( f"parameter expected to be a types.CodeType type; is {type(code)} instead" ) - line_table_field = "co_lnotab" if hasattr( - code, "co_lnotab") else "co_linetable" + line_table_field = "co_lnotab" if hasattr(code, "co_lnotab") else "co_linetable" line_table = getattr(code, line_table_field) if version_tuple >= (3, 0): if version_tuple < (3, 8): @@ -102,7 +101,7 @@ def codeType2Portable(code, version_tuple=PYTHON_VERSION_TRIPLE): co_firstlineno=code.co_firstlineno, co_linetable=line_table, ) - elif version_tuple[:2] == (3, 11): + elif version_tuple[:2] < (3,13): return Code311( co_argcount=code.co_argcount, co_posonlyargcount=code.co_posonlyargcount, @@ -123,8 +122,8 @@ def codeType2Portable(code, version_tuple=PYTHON_VERSION_TRIPLE): co_linetable=line_table, co_exceptiontable=code.co_exceptiontable, ) - elif (version_tuple[:2] >= (3, 12)): # version tuple >= 3, 13 - return Code312( + else: # version tuple >= 3, 13 + return Code313( co_argcount=code.co_argcount, co_posonlyargcount=code.co_posonlyargcount, co_kwonlyargcount=code.co_kwonlyargcount, @@ -210,12 +209,11 @@ def portableCodeType(version_tuple=PYTHON_VERSION_TRIPLE): elif version_tuple[:2] == (3, 10): # 3.10 return Code310 - elif version_tuple[:2] == (3, 11): - # 3.11 + elif version_tuple[:2] < (3,13): + # 3.11 ... return Code311 - elif version_tuple[:2] >= (3, 12): - #3.12 - return Code312 + else: + return Code313 elif version_tuple > (2, 0): # 2.0 .. 2.7 return Code2 diff --git a/xdis/codetype/code312.py b/xdis/codetype/code312.py deleted file mode 100644 index 4cd5fe40..00000000 --- a/xdis/codetype/code312.py +++ /dev/null @@ -1,275 +0,0 @@ -# -import types -from copy import deepcopy - -from xdis.codetype.code311 import Code311, Code311FieldTypes -from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str - -from dataclasses import dataclass -from typing import Iterable, Iterator, Generator - -Code312FieldNames = Code311FieldTypes.copy() -Code312FieldTypes = deepcopy(Code311FieldTypes) - -##### NEW "OPAQUE" LINE TABLE PARSING ##### -# See: https://github.com/python/cpython/blob/aaed91cabcedc16c089c4b1c9abb1114659a83d3/Objects/codeobject.c#L1245C1-L1245C17 -PY_CODE_LOCATION_INFO_SHORT0 = 0 -PY_CODE_LOCATION_INFO_ONE_LINE0 = 10 -PY_CODE_LOCATION_INFO_ONE_LINE1 = 11 -PY_CODE_LOCATION_INFO_ONE_LINE2 = 12 - -PY_CODE_LOCATION_INFO_NO_COLUMNS = 13 -PY_CODE_LOCATION_INFO_LONG = 14 -PY_CODE_LOCATION_INFO_NONE = 15 - -@dataclass(frozen=True) -class LineTableEntry: - line_delta: int - code_delta: int - no_line_flag: bool - -def _scan_varint(remaining_linetable: Iterable[int]) -> int: - value = 0 - for shift, read in enumerate(remaining_linetable): - value |= (read & 63) << (shift * 6) - if not (read & 64): - break - return value - -def _scan_signed_varint(remaining_linetable: Iterable[int]) -> int: - value = _scan_varint(remaining_linetable) - if value & 1: - return -(value >> 1) - return value >> 1 - -def _get_line_delta(code_byte: int, remaining_linetable: Iterable[int]): - line_delta_code = (code_byte >> 3) & 15 - if line_delta_code == PY_CODE_LOCATION_INFO_NONE: - return 0 - if line_delta_code in (PY_CODE_LOCATION_INFO_NO_COLUMNS, PY_CODE_LOCATION_INFO_LONG): - return _scan_signed_varint(remaining_linetable) - if line_delta_code == PY_CODE_LOCATION_INFO_ONE_LINE0: - return 0 - if line_delta_code == PY_CODE_LOCATION_INFO_ONE_LINE1: - return 1 - if line_delta_code == PY_CODE_LOCATION_INFO_ONE_LINE2: - return 2 - return 0 - -def _is_no_line_marker(linetable_code_byte: int): - return (linetable_code_byte >> 3) == 0x1f - -def _next_code_delta(linetable_code_byte: int): - return ((linetable_code_byte & 7) + 1) * 2 - -def _test_check_bit(linetable_code_byte: int): - return bool(linetable_code_byte & 128) - -def _go_to_next_code_byte(remaining_linetable: Iterator[int]) -> int: - try: - while not _test_check_bit((code_byte := next(remaining_linetable))): - pass - except StopIteration: - return None - return code_byte - -def decode_linetable_entry(code_byte: int, remaining_linetable: Iterable[int]) -> LineTableEntry: - assert _test_check_bit(code_byte), "malformed linetable" - return LineTableEntry( - line_delta=_get_line_delta(code_byte=code_byte, remaining_linetable=remaining_linetable), - code_delta=_next_code_delta(linetable_code_byte=code_byte), - no_line_flag=_is_no_line_marker(linetable_code_byte=code_byte) - ) - -def parse_linetable(linetable: bytes, first_lineno: int): - - linetable_entries: list[LineTableEntry] = [] - - # decode linetable entries - iter_linetable = iter(linetable) - while (code_byte := _go_to_next_code_byte(iter_linetable)) is not None: - linetable_entries.append(decode_linetable_entry(code_byte=code_byte, remaining_linetable=iter_linetable)) - - if not linetable_entries: - return - - first_entry, *remaining_entries = linetable_entries - - # compute co_lines() - code_start: int = 0 - code_end: int = first_entry.code_delta - line: int = first_lineno + first_entry.line_delta - no_line_flag = first_entry.no_line_flag - for linetable_entry in remaining_entries: - if linetable_entry.line_delta != 0 or linetable_entry.no_line_flag != no_line_flag: - # if the line changes, emit the current entry - yield (code_start, code_end, None if no_line_flag else line) - - line += linetable_entry.line_delta - no_line_flag = linetable_entry.no_line_flag - code_start = code_end - code_end += linetable_entry.code_delta - - yield (code_start, code_end, None if no_line_flag else line) - -@dataclass(frozen=True) -class PositionEntry: - line_delta: int - num_lines: int - code_delta: int - column: int - endcolumn: int - no_line_flag: bool - -def decode_position_entry(code_byte: int, remaining_linetable: Iterator[int]) -> PositionEntry: - assert _test_check_bit(code_byte), "malformed linetable" - - code_delta = _next_code_delta(code_byte) - - no_line_flag = False - column = -1 - endcolumn = -1 - line_delta = 0 - num_lines = 0 - - location_flags = (code_byte >> 3) & 15 - if location_flags == PY_CODE_LOCATION_INFO_NONE: - no_line_flag = True - elif location_flags == PY_CODE_LOCATION_INFO_LONG: - line_delta = _scan_signed_varint(remaining_linetable) - num_lines = _scan_varint(remaining_linetable) - column = _scan_varint(remaining_linetable) - 1 - endcolumn = _scan_varint(remaining_linetable) - 1 - elif location_flags == PY_CODE_LOCATION_INFO_NO_COLUMNS: - line_delta = _scan_signed_varint(remaining_linetable) - elif location_flags in (PY_CODE_LOCATION_INFO_ONE_LINE0, PY_CODE_LOCATION_INFO_ONE_LINE1, PY_CODE_LOCATION_INFO_ONE_LINE2): - line_delta = location_flags - 10 - column = next(remaining_linetable) - endcolumn = next(remaining_linetable) - else: - second_byte = next(remaining_linetable) - assert not _test_check_bit(second_byte) - column = (location_flags << 3) | (second_byte >> 4) - endcolumn = column + (second_byte & 15) - - return PositionEntry( - line_delta=line_delta, - num_lines=num_lines, - code_delta=code_delta, - column=column, - endcolumn=endcolumn, - no_line_flag=no_line_flag - ) - -def parse_positions(linetable: bytes, first_lineno: int): - position_entries: list[PositionEntry] = [] - - # decode linetable entries - iter_linetable = iter(linetable) - try: - while (code_byte := next(iter_linetable)) is not None: - position_entries.append(decode_position_entry(code_byte=code_byte, remaining_linetable=iter_linetable)) - except StopIteration: - pass - - computed_line = first_lineno - for position_entry in position_entries: - computed_line += position_entry.line_delta - for _ in range(0, position_entry.code_delta, 2): - if position_entry.no_line_flag: - yield (None, None, None, None) - else: - yield (computed_line, computed_line + position_entry.num_lines, position_entry.column, position_entry.endcolumn) -##### - - -class Code312(Code311): - """ Class for a Python 3.12+ code object - New CPython "undocumented" changes make this necessary to parse the co_linetable with co_lines(). - See: https://github.com/python/cpython/blob/aaed91cabcedc16c089c4b1c9abb1114659a83d3/Objects/codeobject.c#L1245C1-L1245C17 - """ - - def __init__( - self, - co_argcount, - co_posonlyargcount, - co_kwonlyargcount, - co_nlocals, - co_stacksize, - co_flags, - co_consts, - co_code, - co_names, - co_varnames, - co_freevars, - co_cellvars, - co_filename, - co_name, - co_qualname, - co_firstlineno, - co_linetable, - co_exceptiontable, - ): - # Keyword argument parameters in the call below is more robust. - # Since things change around, robustness is good. - super(Code312, self).__init__( - co_argcount=co_argcount, - co_posonlyargcount=co_posonlyargcount, - co_kwonlyargcount=co_kwonlyargcount, - co_nlocals=co_nlocals, - co_stacksize=co_stacksize, - co_flags=co_flags, - co_consts=co_consts, - co_code=co_code, - co_names=co_names, - co_varnames=co_varnames, - co_freevars=co_freevars, - co_cellvars=co_cellvars, - co_filename=co_filename, - co_name=co_name, - co_qualname=co_qualname, - co_firstlineno=co_firstlineno, - co_linetable=co_linetable, - co_exceptiontable=co_exceptiontable, - ) - self.fieldtypes = Code312FieldTypes - if type(self) == Code312: - self.check() - - def to_native(self): - if not (PYTHON_VERSION_TRIPLE >= (3, 12)): - raise TypeError( - "Python Interpreter needs to be in 3.12 or greater; is %s" - % version_tuple_to_str() - ) - - code = deepcopy(self) - code.freeze() - try: - code.check() - except AssertionError as e: - raise TypeError(e) - - return types.CodeType( - code.co_argcount, - code.co_posonlyargcount, - code.co_kwonlyargcount, - code.co_nlocals, - code.co_stacksize, - code.co_flags, - code.co_code, - code.co_consts, - code.co_names, - code.co_varnames, - code.co_filename, - code.co_name, - code.co_qualname, - code.co_firstlineno, - code.co_linetable, - code.co_exceptiontable, - code.co_freevars, - code.co_cellvars, - ) - - def co_lines(self): - return parse_linetable(self.co_linetable, self.co_firstlineno) diff --git a/xdis/load.py b/xdis/load.py index 4f15cde2..88dd8804 100644 --- a/xdis/load.py +++ b/xdis/load.py @@ -74,8 +74,7 @@ def check_object_path(path) -> str: try: import importlib - bytecode_path = importlib.util.cache_from_source( - path, optimization="") + bytecode_path = importlib.util.cache_from_source(path, optimization="") if osp.exists(bytecode_path): return bytecode_path except Exception: @@ -95,8 +94,7 @@ def check_object_path(path) -> str: # It would be better to use a context manager function like WithNamedTemporary. # However we are seeing write errors when this is done in Windows. # So until this is resolved, we'll use mkstemp and explicitly do a close. - fd, path = tempfile.mkstemp( - prefix=basename + "-", suffix=".pyc", text=False) + fd, path = tempfile.mkstemp(prefix=basename + "-", suffix=".pyc", text=False) close(fd) py_compile.compile(spath, cfile=path, doraise=True) @@ -219,8 +217,7 @@ def load_module_from_file_object( tuple_version = magic_int2tuple(magic_int) except KeyError: if magic_int in (2657, 22138): - raise ImportError( - "This smells like Pyston which is not supported.") + raise ImportError("This smells like Pyston which is not supported.") if len(magic) >= 2: raise ImportError( @@ -332,8 +329,7 @@ def load_module_from_file_object( import traceback traceback.print_exc() - raise ImportError( - f"Ill-formed bytecode file {filename}\n{kind}; {msg}") + raise ImportError(f"Ill-formed bytecode file {filename}\n{kind}; {msg}") finally: fp.close() From 4ca79e54a5077162c62575ee9c55a056365ba391 Mon Sep 17 00:00:00 2001 From: Joel Flores Date: Wed, 20 Nov 2024 15:07:42 -0600 Subject: [PATCH 3/9] add a python 3.12 object --- xdis/codetype/code312.py | 274 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 274 insertions(+) create mode 100644 xdis/codetype/code312.py diff --git a/xdis/codetype/code312.py b/xdis/codetype/code312.py new file mode 100644 index 00000000..c4df9050 --- /dev/null +++ b/xdis/codetype/code312.py @@ -0,0 +1,274 @@ +import types +from copy import deepcopy + +from xdis.codetype.code311 import Code311, Code311FieldTypes +from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str + +from dataclasses import dataclass +from typing import Iterable, Iterator, Generator + +Code312FieldNames = Code311FieldTypes.copy() +Code312FieldTypes = deepcopy(Code311FieldTypes) + +##### NEW "OPAQUE" LINE TABLE PARSING ##### +# See: https://github.com/python/cpython/blob/aaed91cabcedc16c089c4b1c9abb1114659a83d3/Objects/codeobject.c#L1245C1-L1245C17 +PY_CODE_LOCATION_INFO_SHORT0 = 0 +PY_CODE_LOCATION_INFO_ONE_LINE0 = 10 +PY_CODE_LOCATION_INFO_ONE_LINE1 = 11 +PY_CODE_LOCATION_INFO_ONE_LINE2 = 12 + +PY_CODE_LOCATION_INFO_NO_COLUMNS = 13 +PY_CODE_LOCATION_INFO_LONG = 14 +PY_CODE_LOCATION_INFO_NONE = 15 + +@dataclass(frozen=True) +class LineTableEntry: + line_delta: int + code_delta: int + no_line_flag: bool + +def _scan_varint(remaining_linetable: Iterable[int]) -> int: + value = 0 + for shift, read in enumerate(remaining_linetable): + value |= (read & 63) << (shift * 6) + if not (read & 64): + break + return value + +def _scan_signed_varint(remaining_linetable: Iterable[int]) -> int: + value = _scan_varint(remaining_linetable) + if value & 1: + return -(value >> 1) + return value >> 1 + +def _get_line_delta(code_byte: int, remaining_linetable: Iterable[int]): + line_delta_code = (code_byte >> 3) & 15 + if line_delta_code == PY_CODE_LOCATION_INFO_NONE: + return 0 + if line_delta_code in (PY_CODE_LOCATION_INFO_NO_COLUMNS, PY_CODE_LOCATION_INFO_LONG): + return _scan_signed_varint(remaining_linetable) + if line_delta_code == PY_CODE_LOCATION_INFO_ONE_LINE0: + return 0 + if line_delta_code == PY_CODE_LOCATION_INFO_ONE_LINE1: + return 1 + if line_delta_code == PY_CODE_LOCATION_INFO_ONE_LINE2: + return 2 + return 0 + +def _is_no_line_marker(linetable_code_byte: int): + return (linetable_code_byte >> 3) == 0x1f + +def _next_code_delta(linetable_code_byte: int): + return ((linetable_code_byte & 7) + 1) * 2 + +def _test_check_bit(linetable_code_byte: int): + return bool(linetable_code_byte & 128) + +def _go_to_next_code_byte(remaining_linetable: Iterator[int]) -> int: + try: + while not _test_check_bit((code_byte := next(remaining_linetable))): + pass + except StopIteration: + return None + return code_byte + +def decode_linetable_entry(code_byte: int, remaining_linetable: Iterable[int]) -> LineTableEntry: + assert _test_check_bit(code_byte), "malformed linetable" + return LineTableEntry( + line_delta=_get_line_delta(code_byte=code_byte, remaining_linetable=remaining_linetable), + code_delta=_next_code_delta(linetable_code_byte=code_byte), + no_line_flag=_is_no_line_marker(linetable_code_byte=code_byte) + ) + +def parse_linetable(linetable: bytes, first_lineno: int): + + linetable_entries: list[LineTableEntry] = [] + + # decode linetable entries + iter_linetable = iter(linetable) + while (code_byte := _go_to_next_code_byte(iter_linetable)) is not None: + linetable_entries.append(decode_linetable_entry(code_byte=code_byte, remaining_linetable=iter_linetable)) + + if not linetable_entries: + return + + first_entry, *remaining_entries = linetable_entries + + # compute co_lines() + code_start: int = 0 + code_end: int = first_entry.code_delta + line: int = first_lineno + first_entry.line_delta + no_line_flag = first_entry.no_line_flag + for linetable_entry in remaining_entries: + if linetable_entry.line_delta != 0 or linetable_entry.no_line_flag != no_line_flag: + # if the line changes, emit the current entry + yield (code_start, code_end, None if no_line_flag else line) + + line += linetable_entry.line_delta + no_line_flag = linetable_entry.no_line_flag + code_start = code_end + code_end += linetable_entry.code_delta + + yield (code_start, code_end, None if no_line_flag else line) + +@dataclass(frozen=True) +class PositionEntry: + line_delta: int + num_lines: int + code_delta: int + column: int + endcolumn: int + no_line_flag: bool + +def decode_position_entry(code_byte: int, remaining_linetable: Iterator[int]) -> PositionEntry: + assert _test_check_bit(code_byte), "malformed linetable" + + code_delta = _next_code_delta(code_byte) + + no_line_flag = False + column = -1 + endcolumn = -1 + line_delta = 0 + num_lines = 0 + + location_flags = (code_byte >> 3) & 15 + if location_flags == PY_CODE_LOCATION_INFO_NONE: + no_line_flag = True + elif location_flags == PY_CODE_LOCATION_INFO_LONG: + line_delta = _scan_signed_varint(remaining_linetable) + num_lines = _scan_varint(remaining_linetable) + column = _scan_varint(remaining_linetable) - 1 + endcolumn = _scan_varint(remaining_linetable) - 1 + elif location_flags == PY_CODE_LOCATION_INFO_NO_COLUMNS: + line_delta = _scan_signed_varint(remaining_linetable) + elif location_flags in (PY_CODE_LOCATION_INFO_ONE_LINE0, PY_CODE_LOCATION_INFO_ONE_LINE1, PY_CODE_LOCATION_INFO_ONE_LINE2): + line_delta = location_flags - 10 + column = next(remaining_linetable) + endcolumn = next(remaining_linetable) + else: + second_byte = next(remaining_linetable) + assert not _test_check_bit(second_byte) + column = (location_flags << 3) | (second_byte >> 4) + endcolumn = column + (second_byte & 15) + + return PositionEntry( + line_delta=line_delta, + num_lines=num_lines, + code_delta=code_delta, + column=column, + endcolumn=endcolumn, + no_line_flag=no_line_flag + ) + +def parse_positions(linetable: bytes, first_lineno: int): + position_entries: list[PositionEntry] = [] + + # decode linetable entries + iter_linetable = iter(linetable) + try: + while (code_byte := next(iter_linetable)) is not None: + position_entries.append(decode_position_entry(code_byte=code_byte, remaining_linetable=iter_linetable)) + except StopIteration: + pass + + computed_line = first_lineno + for position_entry in position_entries: + computed_line += position_entry.line_delta + for _ in range(0, position_entry.code_delta, 2): + if position_entry.no_line_flag: + yield (None, None, None, None) + else: + yield (computed_line, computed_line + position_entry.num_lines, position_entry.column, position_entry.endcolumn) +##### + + +class Code312(Code311): + """ Class for a Python 3.13+ code object + New CPython "undocumented" changes make this necessary to parse the co_linetable with co_lines(). + See: https://github.com/python/cpython/blob/aaed91cabcedc16c089c4b1c9abb1114659a83d3/Objects/codeobject.c#L1245C1-L1245C17 + """ + + def __init__( + self, + co_argcount, + co_posonlyargcount, + co_kwonlyargcount, + co_nlocals, + co_stacksize, + co_flags, + co_consts, + co_code, + co_names, + co_varnames, + co_freevars, + co_cellvars, + co_filename, + co_name, + co_qualname, + co_firstlineno, + co_linetable, + co_exceptiontable, + ): + # Keyword argument parameters in the call below is more robust. + # Since things change around, robustness is good. + super(Code312, self).__init__( + co_argcount=co_argcount, + co_posonlyargcount=co_posonlyargcount, + co_kwonlyargcount=co_kwonlyargcount, + co_nlocals=co_nlocals, + co_stacksize=co_stacksize, + co_flags=co_flags, + co_consts=co_consts, + co_code=co_code, + co_names=co_names, + co_varnames=co_varnames, + co_freevars=co_freevars, + co_cellvars=co_cellvars, + co_filename=co_filename, + co_name=co_name, + co_qualname=co_qualname, + co_firstlineno=co_firstlineno, + co_linetable=co_linetable, + co_exceptiontable=co_exceptiontable, + ) + self.fieldtypes = Code312FieldTypes + if type(self) == Code312: + self.check() + + def to_native(self): + if not (PYTHON_VERSION_TRIPLE >= (3, 12)): + raise TypeError( + "Python Interpreter needs to be in 3.12 or greater; is %s" + % version_tuple_to_str() + ) + + code = deepcopy(self) + code.freeze() + try: + code.check() + except AssertionError as e: + raise TypeError(e) + + return types.CodeType( + code.co_argcount, + code.co_posonlyargcount, + code.co_kwonlyargcount, + code.co_nlocals, + code.co_stacksize, + code.co_flags, + code.co_code, + code.co_consts, + code.co_names, + code.co_varnames, + code.co_filename, + code.co_name, + code.co_qualname, + code.co_firstlineno, + code.co_linetable, + code.co_exceptiontable, + code.co_freevars, + code.co_cellvars, + ) + + def co_lines(self): + return parse_linetable(self.co_linetable, self.co_firstlineno) From 61cf8cf61fd7f3cdeefc1c3f13831ea01a69f706 Mon Sep 17 00:00:00 2001 From: Joel Flores Date: Wed, 20 Nov 2024 15:21:24 -0600 Subject: [PATCH 4/9] deal with cache instructions in python 3.13 so it does not mess with jump targets --- xdis/bytecode.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/xdis/bytecode.py b/xdis/bytecode.py index 3722ffac..427dbef0 100644 --- a/xdis/bytecode.py +++ b/xdis/bytecode.py @@ -333,6 +333,14 @@ def get_instructions_bytes( elif op in opc.JREL_OPS: signed_arg = -arg if "JUMP_BACKWARD" in opc.opname[op] else arg argval = i + get_jump_val(signed_arg, opc.python_version) + + #check cache instructions for python 3.13 + if opc.version_tuple >= (3, 13): + if opc.opname[op] in ["POP_JUMP_IF_TRUE", "POP_JUMP_IF_FALSE", "POP_JUMP_IF_NONE", "POP_JUMP_IF_NOT_NONE"]: + argval += 2 + elif opc.opname[op] == 'JUMP_BACKWARD': + argval -= 2 + # FOR_ITER has a cache instruction in 3.12 if opc.version_tuple >= (3, 12) and opc.opname[op] == "FOR_ITER": argval += 2 From d5a2ced25d62133f0ee9a203c4a3e852bb263a01 Mon Sep 17 00:00:00 2001 From: Joel Flores Date: Wed, 20 Nov 2024 15:25:20 -0600 Subject: [PATCH 5/9] typo --- xdis/codetype/__init__.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/xdis/codetype/__init__.py b/xdis/codetype/__init__.py index a89d0dc5..29a39910 100644 --- a/xdis/codetype/__init__.py +++ b/xdis/codetype/__init__.py @@ -28,6 +28,7 @@ from xdis.codetype.code38 import Code38 from xdis.codetype.code310 import Code310 from xdis.codetype.code311 import Code311, Code311FieldNames +from xdis.codetype.code312 import Code312 from xdis.codetype.code313 import Code313 from xdis.version_info import PYTHON_VERSION_TRIPLE @@ -101,7 +102,7 @@ def codeType2Portable(code, version_tuple=PYTHON_VERSION_TRIPLE): co_firstlineno=code.co_firstlineno, co_linetable=line_table, ) - elif version_tuple[:2] < (3,13): + elif version_tuple[:2] == (3,11): return Code311( co_argcount=code.co_argcount, co_posonlyargcount=code.co_posonlyargcount, @@ -122,8 +123,8 @@ def codeType2Portable(code, version_tuple=PYTHON_VERSION_TRIPLE): co_linetable=line_table, co_exceptiontable=code.co_exceptiontable, ) - else: # version tuple >= 3, 13 - return Code313( + elif version_tuple[:2] >= (3,12): + return Code312( co_argcount=code.co_argcount, co_posonlyargcount=code.co_posonlyargcount, co_kwonlyargcount=code.co_kwonlyargcount, @@ -209,11 +210,11 @@ def portableCodeType(version_tuple=PYTHON_VERSION_TRIPLE): elif version_tuple[:2] == (3, 10): # 3.10 return Code310 - elif version_tuple[:2] < (3,13): + elif version_tuple[:2] == (3,11): # 3.11 ... return Code311 - else: - return Code313 + elif version_tuple[:2] >= (3,12): + return Code312 elif version_tuple > (2, 0): # 2.0 .. 2.7 return Code2 From 76ef0a84c47b2c17f2e7b49927347c1724d9f085 Mon Sep 17 00:00:00 2001 From: elliot Date: Mon, 2 Dec 2024 12:27:24 -0600 Subject: [PATCH 6/9] delete 313 code type --- xdis/__init__.py | 2 - xdis/codetype/__init__.py | 1 - xdis/codetype/code313.py | 274 -------------------------------------- 3 files changed, 277 deletions(-) delete mode 100644 xdis/codetype/code313.py diff --git a/xdis/__init__.py b/xdis/__init__.py index 035fd846..3f596fa8 100644 --- a/xdis/__init__.py +++ b/xdis/__init__.py @@ -40,7 +40,6 @@ Code38, Code310, Code311, - Code313, codeType2Portable, ) from xdis.codetype.base import code_has_star_arg, code_has_star_star_arg, iscode @@ -184,7 +183,6 @@ "Code3", "Code310", "Code311", - "Code313", "Code38", "code_has_star_star_arg", "code_has_star_arg", diff --git a/xdis/codetype/__init__.py b/xdis/codetype/__init__.py index 29a39910..b9c71781 100644 --- a/xdis/codetype/__init__.py +++ b/xdis/codetype/__init__.py @@ -29,7 +29,6 @@ from xdis.codetype.code310 import Code310 from xdis.codetype.code311 import Code311, Code311FieldNames from xdis.codetype.code312 import Code312 -from xdis.codetype.code313 import Code313 from xdis.version_info import PYTHON_VERSION_TRIPLE diff --git a/xdis/codetype/code313.py b/xdis/codetype/code313.py deleted file mode 100644 index b6676f8e..00000000 --- a/xdis/codetype/code313.py +++ /dev/null @@ -1,274 +0,0 @@ -import types -from copy import deepcopy - -from xdis.codetype.code311 import Code311, Code311FieldTypes -from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str - -from dataclasses import dataclass -from typing import Iterable, Iterator, Generator - -Code313FieldNames = Code311FieldTypes.copy() -Code313FieldTypes = deepcopy(Code311FieldTypes) - -##### NEW "OPAQUE" LINE TABLE PARSING ##### -# See: https://github.com/python/cpython/blob/aaed91cabcedc16c089c4b1c9abb1114659a83d3/Objects/codeobject.c#L1245C1-L1245C17 -PY_CODE_LOCATION_INFO_SHORT0 = 0 -PY_CODE_LOCATION_INFO_ONE_LINE0 = 10 -PY_CODE_LOCATION_INFO_ONE_LINE1 = 11 -PY_CODE_LOCATION_INFO_ONE_LINE2 = 12 - -PY_CODE_LOCATION_INFO_NO_COLUMNS = 13 -PY_CODE_LOCATION_INFO_LONG = 14 -PY_CODE_LOCATION_INFO_NONE = 15 - -@dataclass(frozen=True) -class LineTableEntry: - line_delta: int - code_delta: int - no_line_flag: bool - -def _scan_varint(remaining_linetable: Iterable[int]) -> int: - value = 0 - for shift, read in enumerate(remaining_linetable): - value |= (read & 63) << (shift * 6) - if not (read & 64): - break - return value - -def _scan_signed_varint(remaining_linetable: Iterable[int]) -> int: - value = _scan_varint(remaining_linetable) - if value & 1: - return -(value >> 1) - return value >> 1 - -def _get_line_delta(code_byte: int, remaining_linetable: Iterable[int]): - line_delta_code = (code_byte >> 3) & 15 - if line_delta_code == PY_CODE_LOCATION_INFO_NONE: - return 0 - if line_delta_code in (PY_CODE_LOCATION_INFO_NO_COLUMNS, PY_CODE_LOCATION_INFO_LONG): - return _scan_signed_varint(remaining_linetable) - if line_delta_code == PY_CODE_LOCATION_INFO_ONE_LINE0: - return 0 - if line_delta_code == PY_CODE_LOCATION_INFO_ONE_LINE1: - return 1 - if line_delta_code == PY_CODE_LOCATION_INFO_ONE_LINE2: - return 2 - return 0 - -def _is_no_line_marker(linetable_code_byte: int): - return (linetable_code_byte >> 3) == 0x1f - -def _next_code_delta(linetable_code_byte: int): - return ((linetable_code_byte & 7) + 1) * 2 - -def _test_check_bit(linetable_code_byte: int): - return bool(linetable_code_byte & 128) - -def _go_to_next_code_byte(remaining_linetable: Iterator[int]) -> int: - try: - while not _test_check_bit((code_byte := next(remaining_linetable))): - pass - except StopIteration: - return None - return code_byte - -def decode_linetable_entry(code_byte: int, remaining_linetable: Iterable[int]) -> LineTableEntry: - assert _test_check_bit(code_byte), "malformed linetable" - return LineTableEntry( - line_delta=_get_line_delta(code_byte=code_byte, remaining_linetable=remaining_linetable), - code_delta=_next_code_delta(linetable_code_byte=code_byte), - no_line_flag=_is_no_line_marker(linetable_code_byte=code_byte) - ) - -def parse_linetable(linetable: bytes, first_lineno: int): - - linetable_entries: list[LineTableEntry] = [] - - # decode linetable entries - iter_linetable = iter(linetable) - while (code_byte := _go_to_next_code_byte(iter_linetable)) is not None: - linetable_entries.append(decode_linetable_entry(code_byte=code_byte, remaining_linetable=iter_linetable)) - - if not linetable_entries: - return - - first_entry, *remaining_entries = linetable_entries - - # compute co_lines() - code_start: int = 0 - code_end: int = first_entry.code_delta - line: int = first_lineno + first_entry.line_delta - no_line_flag = first_entry.no_line_flag - for linetable_entry in remaining_entries: - if linetable_entry.line_delta != 0 or linetable_entry.no_line_flag != no_line_flag: - # if the line changes, emit the current entry - yield (code_start, code_end, None if no_line_flag else line) - - line += linetable_entry.line_delta - no_line_flag = linetable_entry.no_line_flag - code_start = code_end - code_end += linetable_entry.code_delta - - yield (code_start, code_end, None if no_line_flag else line) - -@dataclass(frozen=True) -class PositionEntry: - line_delta: int - num_lines: int - code_delta: int - column: int - endcolumn: int - no_line_flag: bool - -def decode_position_entry(code_byte: int, remaining_linetable: Iterator[int]) -> PositionEntry: - assert _test_check_bit(code_byte), "malformed linetable" - - code_delta = _next_code_delta(code_byte) - - no_line_flag = False - column = -1 - endcolumn = -1 - line_delta = 0 - num_lines = 0 - - location_flags = (code_byte >> 3) & 15 - if location_flags == PY_CODE_LOCATION_INFO_NONE: - no_line_flag = True - elif location_flags == PY_CODE_LOCATION_INFO_LONG: - line_delta = _scan_signed_varint(remaining_linetable) - num_lines = _scan_varint(remaining_linetable) - column = _scan_varint(remaining_linetable) - 1 - endcolumn = _scan_varint(remaining_linetable) - 1 - elif location_flags == PY_CODE_LOCATION_INFO_NO_COLUMNS: - line_delta = _scan_signed_varint(remaining_linetable) - elif location_flags in (PY_CODE_LOCATION_INFO_ONE_LINE0, PY_CODE_LOCATION_INFO_ONE_LINE1, PY_CODE_LOCATION_INFO_ONE_LINE2): - line_delta = location_flags - 10 - column = next(remaining_linetable) - endcolumn = next(remaining_linetable) - else: - second_byte = next(remaining_linetable) - assert not _test_check_bit(second_byte) - column = (location_flags << 3) | (second_byte >> 4) - endcolumn = column + (second_byte & 15) - - return PositionEntry( - line_delta=line_delta, - num_lines=num_lines, - code_delta=code_delta, - column=column, - endcolumn=endcolumn, - no_line_flag=no_line_flag - ) - -def parse_positions(linetable: bytes, first_lineno: int): - position_entries: list[PositionEntry] = [] - - # decode linetable entries - iter_linetable = iter(linetable) - try: - while (code_byte := next(iter_linetable)) is not None: - position_entries.append(decode_position_entry(code_byte=code_byte, remaining_linetable=iter_linetable)) - except StopIteration: - pass - - computed_line = first_lineno - for position_entry in position_entries: - computed_line += position_entry.line_delta - for _ in range(0, position_entry.code_delta, 2): - if position_entry.no_line_flag: - yield (None, None, None, None) - else: - yield (computed_line, computed_line + position_entry.num_lines, position_entry.column, position_entry.endcolumn) -##### - - -class Code313(Code311): - """ Class for a Python 3.13+ code object - New CPython "undocumented" changes make this necessary to parse the co_linetable with co_lines(). - See: https://github.com/python/cpython/blob/aaed91cabcedc16c089c4b1c9abb1114659a83d3/Objects/codeobject.c#L1245C1-L1245C17 - """ - - def __init__( - self, - co_argcount, - co_posonlyargcount, - co_kwonlyargcount, - co_nlocals, - co_stacksize, - co_flags, - co_consts, - co_code, - co_names, - co_varnames, - co_freevars, - co_cellvars, - co_filename, - co_name, - co_qualname, - co_firstlineno, - co_linetable, - co_exceptiontable, - ): - # Keyword argument parameters in the call below is more robust. - # Since things change around, robustness is good. - super(Code313, self).__init__( - co_argcount=co_argcount, - co_posonlyargcount=co_posonlyargcount, - co_kwonlyargcount=co_kwonlyargcount, - co_nlocals=co_nlocals, - co_stacksize=co_stacksize, - co_flags=co_flags, - co_consts=co_consts, - co_code=co_code, - co_names=co_names, - co_varnames=co_varnames, - co_freevars=co_freevars, - co_cellvars=co_cellvars, - co_filename=co_filename, - co_name=co_name, - co_qualname=co_qualname, - co_firstlineno=co_firstlineno, - co_linetable=co_linetable, - co_exceptiontable=co_exceptiontable, - ) - self.fieldtypes = Code313FieldTypes - if type(self) == Code313: - self.check() - - def to_native(self): - if not (PYTHON_VERSION_TRIPLE >= (3, 13)): - raise TypeError( - "Python Interpreter needs to be in 3.13 or greater; is %s" - % version_tuple_to_str() - ) - - code = deepcopy(self) - code.freeze() - try: - code.check() - except AssertionError as e: - raise TypeError(e) - - return types.CodeType( - code.co_argcount, - code.co_posonlyargcount, - code.co_kwonlyargcount, - code.co_nlocals, - code.co_stacksize, - code.co_flags, - code.co_code, - code.co_consts, - code.co_names, - code.co_varnames, - code.co_filename, - code.co_name, - code.co_qualname, - code.co_firstlineno, - code.co_linetable, - code.co_exceptiontable, - code.co_freevars, - code.co_cellvars, - ) - - def co_lines(self): - return parse_linetable(self.co_linetable, self.co_firstlineno) From 8353b3192d9ab3f346f9c6a0c3fa16cb4af94cba Mon Sep 17 00:00:00 2001 From: elliot Date: Tue, 3 Dec 2024 18:22:43 -0600 Subject: [PATCH 7/9] fix 311 line table entries and remove unused code --- xdis/codetype/code311.py | 331 +++++++++++++++++++++++++++++++++++++ xdis/codetype/code312.py | 179 +------------------- xdis/opcodes/opcode_311.py | 114 +------------ xdis/opcodes/opcode_312.py | 6 +- xdis/opcodes/opcode_313.py | 2 +- 5 files changed, 336 insertions(+), 296 deletions(-) diff --git a/xdis/codetype/code311.py b/xdis/codetype/code311.py index 95a5d54f..87357283 100644 --- a/xdis/codetype/code311.py +++ b/xdis/codetype/code311.py @@ -20,6 +20,118 @@ from xdis.codetype.code310 import Code310, Code310FieldTypes from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str +from dataclasses import dataclass +from typing import Iterable, Iterator, Generator + + +def parse_location_entries(location_bytes, first_line): + """ + Parses the locations table described in: https://github.com/python/cpython/blob/3.11/Objects/locations.md + The locations table replaced the line number table starting in 3.11 + """ + + def starts_new_entry(b): + return bool(b & 0b10000000) # bit 7 is set + + def extract_code(b): + return (b & 0b01111000) >> 3 # extracts bits 3-6 + + def extract_length(b): + return (b & 0b00000111) + 1 # extracts bit 0-2 + + def iter_location_codes(loc_bytes): + if len(loc_bytes) == 0: + return [] + + iter_locs = iter(loc_bytes) + entry_codes = [next(iter_locs)] + + for b in iter_locs: + if starts_new_entry(b): + yield entry_codes + entry_codes = [b] + else: + entry_codes.append(b) + + if entry_codes: + yield entry_codes + + def iter_varints(varint_bytes): + if len(varint_bytes) == 0: + return [] + + def has_next_byte(b): + return bool(b & 0b0100_0000) # has bit 6 set + + def get_value(b): + return b & 0b00111111 # extracts bits 0-5 + + iter_varint_bytes = iter(varint_bytes) + + current_value = 0 + shift_amt = 0 + + for b in iter_varint_bytes: + current_value += get_value(b) << shift_amt + if has_next_byte(b): + shift_amt += 6 + else: + yield current_value + current_value = 0 + shift_amt = 0 + + def decode_signed_varint(s): + return -(s >> 1) if s & 1 else (s >> 1) + + entries = ( + [] + ) # tuples of (code units, start line, end line, start column, end column) + + last_line = first_line + + for location_codes in iter_location_codes(location_bytes): + first_byte = location_codes[0] + location_length = extract_length(first_byte) + code = extract_code(first_byte) + + if code <= 9: # short form + start_line = last_line + end_line = start_line + second_byte = location_codes[1] + start_column = (code * 8) + ((second_byte >> 4) & 7) + end_column = start_column + (second_byte & 15) + elif code <= 12: # one line form + start_line = last_line + code - 10 + end_line = start_line + start_column = location_codes[1] + end_column = location_codes[2] + elif code == 13: # no column info + (start_line_delta,) = iter_varints(location_codes[1:]) + start_line = last_line + decode_signed_varint(start_line_delta) + end_line = start_line + start_column = None + end_column = None + elif code == 14: # long form + (start_line_delta, end_line_delta, start_column, end_column) = iter_varints( + location_codes[1:] + ) + start_line = last_line + decode_signed_varint(start_line_delta) + end_line = start_line + end_line_delta + else: # code == 15, no location + start_line = None + end_line = None + start_column = None + end_column = None + + entries.append( + (location_length, start_line, end_line, start_column, end_column) + ) + + last_line = start_line if start_line is not None else last_line + + return entries + + # Note: order is the positional order given in the Python docs for # 3.11 types.Codetype. # "posonlyargcount" is not used, but it is in other Python versions, so it @@ -50,6 +162,219 @@ Code311FieldTypes.update({"co_qualname": str, "co_exceptiontable": bytes}) +##### NEW "OPAQUE" LINE TABLE PARSING ##### +# See: https://github.com/python/cpython/blob/aaed91cabcedc16c089c4b1c9abb1114659a83d3/Objects/codeobject.c#L1245C1-L1245C17 +PY_CODE_LOCATION_INFO_SHORT0 = 0 +PY_CODE_LOCATION_INFO_ONE_LINE0 = 10 +PY_CODE_LOCATION_INFO_ONE_LINE1 = 11 +PY_CODE_LOCATION_INFO_ONE_LINE2 = 12 + +PY_CODE_LOCATION_INFO_NO_COLUMNS = 13 +PY_CODE_LOCATION_INFO_LONG = 14 +PY_CODE_LOCATION_INFO_NONE = 15 + + +@dataclass(frozen=True) +class LineTableEntry: + line_delta: int + code_delta: int + no_line_flag: bool + + +def _scan_varint(remaining_linetable: Iterable[int]) -> int: + value = 0 + for shift, read in enumerate(remaining_linetable): + value |= (read & 63) << (shift * 6) + if not (read & 64): + break + return value + + +def _scan_signed_varint(remaining_linetable: Iterable[int]) -> int: + value = _scan_varint(remaining_linetable) + if value & 1: + return -(value >> 1) + return value >> 1 + + +def _get_line_delta(code_byte: int, remaining_linetable: Iterable[int]): + line_delta_code = (code_byte >> 3) & 15 + if line_delta_code == PY_CODE_LOCATION_INFO_NONE: + return 0 + if line_delta_code in ( + PY_CODE_LOCATION_INFO_NO_COLUMNS, + PY_CODE_LOCATION_INFO_LONG, + ): + return _scan_signed_varint(remaining_linetable) + if line_delta_code == PY_CODE_LOCATION_INFO_ONE_LINE0: + return 0 + if line_delta_code == PY_CODE_LOCATION_INFO_ONE_LINE1: + return 1 + if line_delta_code == PY_CODE_LOCATION_INFO_ONE_LINE2: + return 2 + return 0 + + +def _is_no_line_marker(linetable_code_byte: int): + return (linetable_code_byte >> 3) == 0x1F + + +def _next_code_delta(linetable_code_byte: int): + return ((linetable_code_byte & 7) + 1) * 2 + + +def _test_check_bit(linetable_code_byte: int): + return bool(linetable_code_byte & 128) + + +def _go_to_next_code_byte(remaining_linetable: Iterator[int]) -> int: + try: + while not _test_check_bit((code_byte := next(remaining_linetable))): + pass + except StopIteration: + return None + return code_byte + + +def decode_linetable_entry( + code_byte: int, remaining_linetable: Iterable[int] +) -> LineTableEntry: + assert _test_check_bit(code_byte), "malformed linetable" + return LineTableEntry( + line_delta=_get_line_delta( + code_byte=code_byte, remaining_linetable=remaining_linetable + ), + code_delta=_next_code_delta(linetable_code_byte=code_byte), + no_line_flag=_is_no_line_marker(linetable_code_byte=code_byte), + ) + + +def parse_linetable(linetable: bytes, first_lineno: int): + + linetable_entries: list[LineTableEntry] = [] + + # decode linetable entries + iter_linetable = iter(linetable) + while (code_byte := _go_to_next_code_byte(iter_linetable)) is not None: + linetable_entries.append( + decode_linetable_entry( + code_byte=code_byte, remaining_linetable=iter_linetable + ) + ) + + if not linetable_entries: + return + + first_entry, *remaining_entries = linetable_entries + + # compute co_lines() + code_start: int = 0 + code_end: int = first_entry.code_delta + line: int = first_lineno + first_entry.line_delta + no_line_flag = first_entry.no_line_flag + for linetable_entry in remaining_entries: + if ( + linetable_entry.line_delta != 0 + or linetable_entry.no_line_flag != no_line_flag + ): + # if the line changes, emit the current entry + yield (code_start, code_end, None if no_line_flag else line) + + line += linetable_entry.line_delta + no_line_flag = linetable_entry.no_line_flag + code_start = code_end + code_end += linetable_entry.code_delta + + yield (code_start, code_end, None if no_line_flag else line) + + +@dataclass(frozen=True) +class PositionEntry: + line_delta: int + num_lines: int + code_delta: int + column: int + endcolumn: int + no_line_flag: bool + + +def decode_position_entry( + code_byte: int, remaining_linetable: Iterator[int] +) -> PositionEntry: + assert _test_check_bit(code_byte), "malformed linetable" + + code_delta = _next_code_delta(code_byte) + + no_line_flag = False + column = -1 + endcolumn = -1 + line_delta = 0 + num_lines = 0 + + location_flags = (code_byte >> 3) & 15 + if location_flags == PY_CODE_LOCATION_INFO_NONE: + no_line_flag = True + elif location_flags == PY_CODE_LOCATION_INFO_LONG: + line_delta = _scan_signed_varint(remaining_linetable) + num_lines = _scan_varint(remaining_linetable) + column = _scan_varint(remaining_linetable) - 1 + endcolumn = _scan_varint(remaining_linetable) - 1 + elif location_flags == PY_CODE_LOCATION_INFO_NO_COLUMNS: + line_delta = _scan_signed_varint(remaining_linetable) + elif location_flags in ( + PY_CODE_LOCATION_INFO_ONE_LINE0, + PY_CODE_LOCATION_INFO_ONE_LINE1, + PY_CODE_LOCATION_INFO_ONE_LINE2, + ): + line_delta = location_flags - 10 + column = next(remaining_linetable) + endcolumn = next(remaining_linetable) + else: + second_byte = next(remaining_linetable) + assert not _test_check_bit(second_byte) + column = (location_flags << 3) | (second_byte >> 4) + endcolumn = column + (second_byte & 15) + + return PositionEntry( + line_delta=line_delta, + num_lines=num_lines, + code_delta=code_delta, + column=column, + endcolumn=endcolumn, + no_line_flag=no_line_flag, + ) + + +def parse_positions(linetable: bytes, first_lineno: int): + position_entries: list[PositionEntry] = [] + + # decode linetable entries + iter_linetable = iter(linetable) + try: + while (code_byte := next(iter_linetable)) is not None: + position_entries.append( + decode_position_entry( + code_byte=code_byte, remaining_linetable=iter_linetable + ) + ) + except StopIteration: + pass + + computed_line = first_lineno + for position_entry in position_entries: + computed_line += position_entry.line_delta + for _ in range(0, position_entry.code_delta, 2): + if position_entry.no_line_flag: + yield (None, None, None, None) + else: + yield ( + computed_line, + computed_line + position_entry.num_lines, + position_entry.column, + position_entry.endcolumn, + ) + + class Code311(Code310): """Class for a Python 3.11+ code object used when a Python interpreter less than 3.11 is working on Python 3.11 bytecode. It also functions as an object that can be used @@ -144,3 +469,9 @@ def to_native(self): code.co_freevars, code.co_cellvars, ) + + def co_lines(self): + return parse_linetable(self.co_linetable, self.co_firstlineno) + + def co_positions(self): + return parse_location_entries(self.co_linetable, self.co_firstlineno) diff --git a/xdis/codetype/code312.py b/xdis/codetype/code312.py index c4df9050..1f37257f 100644 --- a/xdis/codetype/code312.py +++ b/xdis/codetype/code312.py @@ -4,186 +4,12 @@ from xdis.codetype.code311 import Code311, Code311FieldTypes from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str -from dataclasses import dataclass -from typing import Iterable, Iterator, Generator - Code312FieldNames = Code311FieldTypes.copy() Code312FieldTypes = deepcopy(Code311FieldTypes) -##### NEW "OPAQUE" LINE TABLE PARSING ##### -# See: https://github.com/python/cpython/blob/aaed91cabcedc16c089c4b1c9abb1114659a83d3/Objects/codeobject.c#L1245C1-L1245C17 -PY_CODE_LOCATION_INFO_SHORT0 = 0 -PY_CODE_LOCATION_INFO_ONE_LINE0 = 10 -PY_CODE_LOCATION_INFO_ONE_LINE1 = 11 -PY_CODE_LOCATION_INFO_ONE_LINE2 = 12 - -PY_CODE_LOCATION_INFO_NO_COLUMNS = 13 -PY_CODE_LOCATION_INFO_LONG = 14 -PY_CODE_LOCATION_INFO_NONE = 15 - -@dataclass(frozen=True) -class LineTableEntry: - line_delta: int - code_delta: int - no_line_flag: bool - -def _scan_varint(remaining_linetable: Iterable[int]) -> int: - value = 0 - for shift, read in enumerate(remaining_linetable): - value |= (read & 63) << (shift * 6) - if not (read & 64): - break - return value - -def _scan_signed_varint(remaining_linetable: Iterable[int]) -> int: - value = _scan_varint(remaining_linetable) - if value & 1: - return -(value >> 1) - return value >> 1 - -def _get_line_delta(code_byte: int, remaining_linetable: Iterable[int]): - line_delta_code = (code_byte >> 3) & 15 - if line_delta_code == PY_CODE_LOCATION_INFO_NONE: - return 0 - if line_delta_code in (PY_CODE_LOCATION_INFO_NO_COLUMNS, PY_CODE_LOCATION_INFO_LONG): - return _scan_signed_varint(remaining_linetable) - if line_delta_code == PY_CODE_LOCATION_INFO_ONE_LINE0: - return 0 - if line_delta_code == PY_CODE_LOCATION_INFO_ONE_LINE1: - return 1 - if line_delta_code == PY_CODE_LOCATION_INFO_ONE_LINE2: - return 2 - return 0 - -def _is_no_line_marker(linetable_code_byte: int): - return (linetable_code_byte >> 3) == 0x1f - -def _next_code_delta(linetable_code_byte: int): - return ((linetable_code_byte & 7) + 1) * 2 - -def _test_check_bit(linetable_code_byte: int): - return bool(linetable_code_byte & 128) - -def _go_to_next_code_byte(remaining_linetable: Iterator[int]) -> int: - try: - while not _test_check_bit((code_byte := next(remaining_linetable))): - pass - except StopIteration: - return None - return code_byte - -def decode_linetable_entry(code_byte: int, remaining_linetable: Iterable[int]) -> LineTableEntry: - assert _test_check_bit(code_byte), "malformed linetable" - return LineTableEntry( - line_delta=_get_line_delta(code_byte=code_byte, remaining_linetable=remaining_linetable), - code_delta=_next_code_delta(linetable_code_byte=code_byte), - no_line_flag=_is_no_line_marker(linetable_code_byte=code_byte) - ) - -def parse_linetable(linetable: bytes, first_lineno: int): - - linetable_entries: list[LineTableEntry] = [] - - # decode linetable entries - iter_linetable = iter(linetable) - while (code_byte := _go_to_next_code_byte(iter_linetable)) is not None: - linetable_entries.append(decode_linetable_entry(code_byte=code_byte, remaining_linetable=iter_linetable)) - - if not linetable_entries: - return - - first_entry, *remaining_entries = linetable_entries - - # compute co_lines() - code_start: int = 0 - code_end: int = first_entry.code_delta - line: int = first_lineno + first_entry.line_delta - no_line_flag = first_entry.no_line_flag - for linetable_entry in remaining_entries: - if linetable_entry.line_delta != 0 or linetable_entry.no_line_flag != no_line_flag: - # if the line changes, emit the current entry - yield (code_start, code_end, None if no_line_flag else line) - - line += linetable_entry.line_delta - no_line_flag = linetable_entry.no_line_flag - code_start = code_end - code_end += linetable_entry.code_delta - - yield (code_start, code_end, None if no_line_flag else line) - -@dataclass(frozen=True) -class PositionEntry: - line_delta: int - num_lines: int - code_delta: int - column: int - endcolumn: int - no_line_flag: bool - -def decode_position_entry(code_byte: int, remaining_linetable: Iterator[int]) -> PositionEntry: - assert _test_check_bit(code_byte), "malformed linetable" - - code_delta = _next_code_delta(code_byte) - - no_line_flag = False - column = -1 - endcolumn = -1 - line_delta = 0 - num_lines = 0 - - location_flags = (code_byte >> 3) & 15 - if location_flags == PY_CODE_LOCATION_INFO_NONE: - no_line_flag = True - elif location_flags == PY_CODE_LOCATION_INFO_LONG: - line_delta = _scan_signed_varint(remaining_linetable) - num_lines = _scan_varint(remaining_linetable) - column = _scan_varint(remaining_linetable) - 1 - endcolumn = _scan_varint(remaining_linetable) - 1 - elif location_flags == PY_CODE_LOCATION_INFO_NO_COLUMNS: - line_delta = _scan_signed_varint(remaining_linetable) - elif location_flags in (PY_CODE_LOCATION_INFO_ONE_LINE0, PY_CODE_LOCATION_INFO_ONE_LINE1, PY_CODE_LOCATION_INFO_ONE_LINE2): - line_delta = location_flags - 10 - column = next(remaining_linetable) - endcolumn = next(remaining_linetable) - else: - second_byte = next(remaining_linetable) - assert not _test_check_bit(second_byte) - column = (location_flags << 3) | (second_byte >> 4) - endcolumn = column + (second_byte & 15) - - return PositionEntry( - line_delta=line_delta, - num_lines=num_lines, - code_delta=code_delta, - column=column, - endcolumn=endcolumn, - no_line_flag=no_line_flag - ) - -def parse_positions(linetable: bytes, first_lineno: int): - position_entries: list[PositionEntry] = [] - - # decode linetable entries - iter_linetable = iter(linetable) - try: - while (code_byte := next(iter_linetable)) is not None: - position_entries.append(decode_position_entry(code_byte=code_byte, remaining_linetable=iter_linetable)) - except StopIteration: - pass - - computed_line = first_lineno - for position_entry in position_entries: - computed_line += position_entry.line_delta - for _ in range(0, position_entry.code_delta, 2): - if position_entry.no_line_flag: - yield (None, None, None, None) - else: - yield (computed_line, computed_line + position_entry.num_lines, position_entry.column, position_entry.endcolumn) -##### - class Code312(Code311): - """ Class for a Python 3.13+ code object + """Class for a Python 3.13+ code object New CPython "undocumented" changes make this necessary to parse the co_linetable with co_lines(). See: https://github.com/python/cpython/blob/aaed91cabcedc16c089c4b1c9abb1114659a83d3/Objects/codeobject.c#L1245C1-L1245C17 """ @@ -269,6 +95,3 @@ def to_native(self): code.co_freevars, code.co_cellvars, ) - - def co_lines(self): - return parse_linetable(self.co_linetable, self.co_firstlineno) diff --git a/xdis/opcodes/opcode_311.py b/xdis/opcodes/opcode_311.py index 9b4add2f..f07f4885 100644 --- a/xdis/opcodes/opcode_311.py +++ b/xdis/opcodes/opcode_311.py @@ -299,121 +299,9 @@ def format_BINARY_OP(arg) -> str: opcode_extended_fmt = opcode_extended_fmt311 -update_pj3(globals(), loc) -finalize_opcodes(loc) - - -def parse_location_entries(location_bytes, first_line): - """ - Parses the locations table described in: https://github.com/python/cpython/blob/3.11/Objects/locations.md - The locations table replaced the line number table starting in 3.11 - """ - - def starts_new_entry(b): - return bool(b & 0b10000000) # bit 7 is set - - def extract_code(b): - return (b & 0b01111000) >> 3 # extracts bits 3-6 - - def extract_length(b): - return (b & 0b00000111) + 1 # extracts bit 0-2 - - def iter_location_codes(loc_bytes): - if len(loc_bytes) == 0: - return [] - - iter_locs = iter(loc_bytes) - entry_codes = [next(iter_locs)] - - for b in iter_locs: - if starts_new_entry(b): - yield entry_codes - entry_codes = [b] - else: - entry_codes.append(b) - - if entry_codes: - yield entry_codes - - def iter_varints(varint_bytes): - if len(varint_bytes) == 0: - return [] - - def has_next_byte(b): - return bool(b & 0b0100_0000) # has bit 6 set - - def get_value(b): - return b & 0b00111111 # extracts bits 0-5 - - iter_varint_bytes = iter(varint_bytes) - - current_value = 0 - shift_amt = 0 - - for b in iter_varint_bytes: - current_value += get_value(b) << shift_amt - if has_next_byte(b): - shift_amt += 6 - else: - yield current_value - current_value = 0 - shift_amt = 0 - - def decode_signed_varint(s): - return -(s >> 1) if s & 1 else (s >> 1) - - entries = ( - [] - ) # tuples of (code units, start line, end line, start column, end column) - - last_line = first_line - - for location_codes in iter_location_codes(location_bytes): - first_byte = location_codes[0] - location_length = extract_length(first_byte) - code = extract_code(first_byte) - - if code <= 9: # short form - start_line = last_line - end_line = start_line - second_byte = location_codes[1] - start_column = (code * 8) + ((second_byte >> 4) & 7) - end_column = start_column + (second_byte & 15) - elif code <= 12: # one line form - start_line = last_line + code - 10 - end_line = start_line - start_column = location_codes[1] - end_column = location_codes[2] - elif code == 13: # no column info - (start_line_delta,) = iter_varints(location_codes[1:]) - start_line = last_line + decode_signed_varint(start_line_delta) - end_line = start_line - start_column = None - end_column = None - elif code == 14: # long form - (start_line_delta, end_line_delta, start_column, end_column) = iter_varints( - location_codes[1:] - ) - start_line = last_line + decode_signed_varint(start_line_delta) - end_line = start_line + end_line_delta - else: # code == 15, no location - start_line = None - end_line = None - start_column = None - end_column = None - - entries.append( - (location_length, start_line, end_line, start_column, end_column) - ) - - last_line = start_line if start_line is not None else last_line - - return entries - +opcode_arg_fmt = opcode_arg_fmt11 = opcode_arg_fmt310.copy() from xdis.opcodes.opcode_310 import findlinestarts -opcode_arg_fmt = opcode_arg_fmt11 = opcode_arg_fmt310.copy() - update_pj3(globals(), loc) finalize_opcodes(loc) diff --git a/xdis/opcodes/opcode_312.py b/xdis/opcodes/opcode_312.py index 0a8c17fc..071965e6 100644 --- a/xdis/opcodes/opcode_312.py +++ b/xdis/opcodes/opcode_312.py @@ -187,11 +187,9 @@ def format_CALL_INTRINSIC_2(arg) -> str: } opcode_extended_fmt = opcode_extended_fmt312 = opcode_extended_fmt311.copy() -# Overwrite legacy findlinestarts with the 3.11 version that uses the -# location_table syntax. -from xdis.opcodes.opcode_311 import findlinestarts, parse_location_entries - opcode_arg_fmt = opcode_arg_fmt12 = opcode_arg_fmt311.copy() +from xdis.opcodes.opcode_311 import findlinestarts + update_pj3(globals(), loc) finalize_opcodes(loc) diff --git a/xdis/opcodes/opcode_313.py b/xdis/opcodes/opcode_313.py index 598d5d39..23d876eb 100644 --- a/xdis/opcodes/opcode_313.py +++ b/xdis/opcodes/opcode_313.py @@ -341,7 +341,7 @@ opcode_arg_fmt13 = opcode_arg_fmt opcode_extended_fmt13 = opcode_extended_fmt -from xdis.opcodes.opcode_312 import findlinestarts as findlinestarts_312, parse_location_entries, format_CALL_INTRINSIC_1, format_CALL_INTRINSIC_2 +from xdis.opcodes.opcode_312 import findlinestarts as findlinestarts_312 # update any calls to findlinestarts to include the version tuple def findlinestarts_313(code, dup_lines=False): From 472036c4fdc3374c6759205a78c21a857ccb0dea Mon Sep 17 00:00:00 2001 From: elliot Date: Wed, 4 Dec 2024 17:44:19 -0600 Subject: [PATCH 8/9] Remove duplicates in recreated co_localsplusnames Co-authored-by: Joel Flores --- xdis/bytecode.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/xdis/bytecode.py b/xdis/bytecode.py index b612faec..f4c0901f 100644 --- a/xdis/bytecode.py +++ b/xdis/bytecode.py @@ -318,6 +318,10 @@ def get_logical_instruction_at_offset( # raw name index for LOAD_GLOBAL, LOAD_CONST, etc. argval = arg + + # create a localsplusnames table that resolves duplicates. + localsplusnames = (varnames or tuple()) + tuple(name for name in (cells or tuple()) if name not in varnames) + if op in opc.CONST_OPS: argval, argrepr = _get_const_info(arg, constants) elif op in opc.NAME_OPS: @@ -340,8 +344,8 @@ def get_logical_instruction_at_offset( elif op in opc.JREL_OPS: signed_arg = -arg if "JUMP_BACKWARD" in opname else arg argval = i + get_jump_val(signed_arg, opc.python_version) - - #check cache instructions for python 3.13 + + # check cache instructions for python 3.13 if opc.version_tuple >= (3, 13): if opc.opname[op] in ["POP_JUMP_IF_TRUE", "POP_JUMP_IF_FALSE", "POP_JUMP_IF_NONE", "POP_JUMP_IF_NOT_NONE"]: argval += 2 @@ -359,28 +363,24 @@ def get_logical_instruction_at_offset( if opc.version_tuple >= (3, 13) and opname in ("LOAD_FAST_LOAD_FAST", "STORE_FAST_LOAD_FAST", "STORE_FAST_STORE_FAST"): arg1 = arg >> 4 arg2 = arg & 15 - argval1, argrepr1 = _get_name_info(arg1, (varnames or tuple()) + (cells or tuple())) - argval2, argrepr2 = _get_name_info(arg2, (varnames or tuple()) + (cells or tuple())) + argval1, argrepr1 = _get_name_info(arg1, localsplusnames) + argval2, argrepr2 = _get_name_info(arg2, localsplusnames) argval = argval1, argval2 argrepr = argrepr1 + ", " + argrepr2 elif opc.version_tuple >= (3, 11): - argval, argrepr = _get_name_info( - arg, (varnames or tuple()) + (cells or tuple()) - ) + argval, argrepr = _get_name_info(arg, localsplusnames) else: argval, argrepr = _get_name_info(arg, varnames) elif op in opc.FREE_OPS: if opc.version_tuple >= (3, 11): - argval, argrepr = _get_name_info( - arg, (varnames or tuple()) + (cells or tuple()) - ) + argval, argrepr = _get_name_info(arg, localsplusnames) else: argval, argrepr = _get_name_info(arg, cells) elif op in opc.COMPARE_OPS: - if opc.python_version >= (3,13): + if opc.python_version >= (3, 13): # The fifth-lowest bit of the oparg now indicates a forced conversion to bool. argval = (opc.cmp_op[arg >> 5]) - elif opc.python_version >= (3,12): + elif opc.python_version >= (3, 12): argval = (opc.cmp_op[arg >> 4]) else: argval = (opc.cmp_op[arg]) From ac100f821adda5e739fdd7b210974c434d7b3f74 Mon Sep 17 00:00:00 2001 From: Joel Flores Date: Fri, 6 Dec 2024 15:38:37 -0600 Subject: [PATCH 9/9] oopsies backwards != minus --- xdis/bytecode.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/xdis/bytecode.py b/xdis/bytecode.py index f4c0901f..84333e48 100644 --- a/xdis/bytecode.py +++ b/xdis/bytecode.py @@ -347,10 +347,8 @@ def get_logical_instruction_at_offset( # check cache instructions for python 3.13 if opc.version_tuple >= (3, 13): - if opc.opname[op] in ["POP_JUMP_IF_TRUE", "POP_JUMP_IF_FALSE", "POP_JUMP_IF_NONE", "POP_JUMP_IF_NOT_NONE"]: + if opc.opname[op] in ["POP_JUMP_IF_TRUE", "POP_JUMP_IF_FALSE", "POP_JUMP_IF_NONE", "POP_JUMP_IF_NOT_NONE", "JUMP_BACKWARD"]: argval += 2 - elif opc.opname[op] == 'JUMP_BACKWARD': - argval -= 2 # FOR_ITER has a cache instruction in 3.12 if opc.version_tuple >= (3, 12) and opname == "FOR_ITER":