diff --git a/xdis/__init__.py b/xdis/__init__.py index 035fd846..3f596fa8 100644 --- a/xdis/__init__.py +++ b/xdis/__init__.py @@ -40,7 +40,6 @@ Code38, Code310, Code311, - Code313, codeType2Portable, ) from xdis.codetype.base import code_has_star_arg, code_has_star_star_arg, iscode @@ -184,7 +183,6 @@ "Code3", "Code310", "Code311", - "Code313", "Code38", "code_has_star_star_arg", "code_has_star_arg", diff --git a/xdis/bytecode.py b/xdis/bytecode.py index 95d28877..84333e48 100644 --- a/xdis/bytecode.py +++ b/xdis/bytecode.py @@ -318,6 +318,10 @@ def get_logical_instruction_at_offset( # raw name index for LOAD_GLOBAL, LOAD_CONST, etc. argval = arg + + # create a localsplusnames table that resolves duplicates. + localsplusnames = (varnames or tuple()) + tuple(name for name in (cells or tuple()) if name not in varnames) + if op in opc.CONST_OPS: argval, argrepr = _get_const_info(arg, constants) elif op in opc.NAME_OPS: @@ -340,6 +344,12 @@ def get_logical_instruction_at_offset( elif op in opc.JREL_OPS: signed_arg = -arg if "JUMP_BACKWARD" in opname else arg argval = i + get_jump_val(signed_arg, opc.python_version) + + # check cache instructions for python 3.13 + if opc.version_tuple >= (3, 13): + if opc.opname[op] in ["POP_JUMP_IF_TRUE", "POP_JUMP_IF_FALSE", "POP_JUMP_IF_NONE", "POP_JUMP_IF_NOT_NONE", "JUMP_BACKWARD"]: + argval += 2 + # FOR_ITER has a cache instruction in 3.12 if opc.version_tuple >= (3, 12) and opname == "FOR_ITER": argval += 2 @@ -351,28 +361,24 @@ def get_logical_instruction_at_offset( if opc.version_tuple >= (3, 13) and opname in ("LOAD_FAST_LOAD_FAST", "STORE_FAST_LOAD_FAST", "STORE_FAST_STORE_FAST"): arg1 = arg >> 4 arg2 = arg & 15 - argval1, argrepr1 = _get_name_info(arg1, (varnames or tuple()) + (cells or tuple())) - argval2, argrepr2 = _get_name_info(arg2, (varnames or tuple()) + (cells or tuple())) + argval1, argrepr1 = _get_name_info(arg1, localsplusnames) + argval2, argrepr2 = _get_name_info(arg2, localsplusnames) argval = argval1, argval2 argrepr = argrepr1 + ", " + argrepr2 elif opc.version_tuple >= (3, 11): - argval, argrepr = _get_name_info( - arg, (varnames or tuple()) + (cells or tuple()) - ) + argval, argrepr = _get_name_info(arg, localsplusnames) else: argval, argrepr = _get_name_info(arg, varnames) elif op in opc.FREE_OPS: if opc.version_tuple >= (3, 11): - argval, argrepr = _get_name_info( - arg, (varnames or tuple()) + (cells or tuple()) - ) + argval, argrepr = _get_name_info(arg, localsplusnames) else: argval, argrepr = _get_name_info(arg, cells) elif op in opc.COMPARE_OPS: - if opc.python_version >= (3,13): + if opc.python_version >= (3, 13): # The fifth-lowest bit of the oparg now indicates a forced conversion to bool. argval = (opc.cmp_op[arg >> 5]) - elif opc.python_version >= (3,12): + elif opc.python_version >= (3, 12): argval = (opc.cmp_op[arg >> 4]) else: argval = (opc.cmp_op[arg]) diff --git a/xdis/codetype/__init__.py b/xdis/codetype/__init__.py index a89d0dc5..b9c71781 100644 --- a/xdis/codetype/__init__.py +++ b/xdis/codetype/__init__.py @@ -28,7 +28,7 @@ from xdis.codetype.code38 import Code38 from xdis.codetype.code310 import Code310 from xdis.codetype.code311 import Code311, Code311FieldNames -from xdis.codetype.code313 import Code313 +from xdis.codetype.code312 import Code312 from xdis.version_info import PYTHON_VERSION_TRIPLE @@ -101,7 +101,7 @@ def codeType2Portable(code, version_tuple=PYTHON_VERSION_TRIPLE): co_firstlineno=code.co_firstlineno, co_linetable=line_table, ) - elif version_tuple[:2] < (3,13): + elif version_tuple[:2] == (3,11): return Code311( co_argcount=code.co_argcount, co_posonlyargcount=code.co_posonlyargcount, @@ -122,8 +122,8 @@ def codeType2Portable(code, version_tuple=PYTHON_VERSION_TRIPLE): co_linetable=line_table, co_exceptiontable=code.co_exceptiontable, ) - else: # version tuple >= 3, 13 - return Code313( + elif version_tuple[:2] >= (3,12): + return Code312( co_argcount=code.co_argcount, co_posonlyargcount=code.co_posonlyargcount, co_kwonlyargcount=code.co_kwonlyargcount, @@ -209,11 +209,11 @@ def portableCodeType(version_tuple=PYTHON_VERSION_TRIPLE): elif version_tuple[:2] == (3, 10): # 3.10 return Code310 - elif version_tuple[:2] < (3,13): + elif version_tuple[:2] == (3,11): # 3.11 ... return Code311 - else: - return Code313 + elif version_tuple[:2] >= (3,12): + return Code312 elif version_tuple > (2, 0): # 2.0 .. 2.7 return Code2 diff --git a/xdis/codetype/code311.py b/xdis/codetype/code311.py index 95a5d54f..87357283 100644 --- a/xdis/codetype/code311.py +++ b/xdis/codetype/code311.py @@ -20,6 +20,118 @@ from xdis.codetype.code310 import Code310, Code310FieldTypes from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str +from dataclasses import dataclass +from typing import Iterable, Iterator, Generator + + +def parse_location_entries(location_bytes, first_line): + """ + Parses the locations table described in: https://github.com/python/cpython/blob/3.11/Objects/locations.md + The locations table replaced the line number table starting in 3.11 + """ + + def starts_new_entry(b): + return bool(b & 0b10000000) # bit 7 is set + + def extract_code(b): + return (b & 0b01111000) >> 3 # extracts bits 3-6 + + def extract_length(b): + return (b & 0b00000111) + 1 # extracts bit 0-2 + + def iter_location_codes(loc_bytes): + if len(loc_bytes) == 0: + return [] + + iter_locs = iter(loc_bytes) + entry_codes = [next(iter_locs)] + + for b in iter_locs: + if starts_new_entry(b): + yield entry_codes + entry_codes = [b] + else: + entry_codes.append(b) + + if entry_codes: + yield entry_codes + + def iter_varints(varint_bytes): + if len(varint_bytes) == 0: + return [] + + def has_next_byte(b): + return bool(b & 0b0100_0000) # has bit 6 set + + def get_value(b): + return b & 0b00111111 # extracts bits 0-5 + + iter_varint_bytes = iter(varint_bytes) + + current_value = 0 + shift_amt = 0 + + for b in iter_varint_bytes: + current_value += get_value(b) << shift_amt + if has_next_byte(b): + shift_amt += 6 + else: + yield current_value + current_value = 0 + shift_amt = 0 + + def decode_signed_varint(s): + return -(s >> 1) if s & 1 else (s >> 1) + + entries = ( + [] + ) # tuples of (code units, start line, end line, start column, end column) + + last_line = first_line + + for location_codes in iter_location_codes(location_bytes): + first_byte = location_codes[0] + location_length = extract_length(first_byte) + code = extract_code(first_byte) + + if code <= 9: # short form + start_line = last_line + end_line = start_line + second_byte = location_codes[1] + start_column = (code * 8) + ((second_byte >> 4) & 7) + end_column = start_column + (second_byte & 15) + elif code <= 12: # one line form + start_line = last_line + code - 10 + end_line = start_line + start_column = location_codes[1] + end_column = location_codes[2] + elif code == 13: # no column info + (start_line_delta,) = iter_varints(location_codes[1:]) + start_line = last_line + decode_signed_varint(start_line_delta) + end_line = start_line + start_column = None + end_column = None + elif code == 14: # long form + (start_line_delta, end_line_delta, start_column, end_column) = iter_varints( + location_codes[1:] + ) + start_line = last_line + decode_signed_varint(start_line_delta) + end_line = start_line + end_line_delta + else: # code == 15, no location + start_line = None + end_line = None + start_column = None + end_column = None + + entries.append( + (location_length, start_line, end_line, start_column, end_column) + ) + + last_line = start_line if start_line is not None else last_line + + return entries + + # Note: order is the positional order given in the Python docs for # 3.11 types.Codetype. # "posonlyargcount" is not used, but it is in other Python versions, so it @@ -50,6 +162,219 @@ Code311FieldTypes.update({"co_qualname": str, "co_exceptiontable": bytes}) +##### NEW "OPAQUE" LINE TABLE PARSING ##### +# See: https://github.com/python/cpython/blob/aaed91cabcedc16c089c4b1c9abb1114659a83d3/Objects/codeobject.c#L1245C1-L1245C17 +PY_CODE_LOCATION_INFO_SHORT0 = 0 +PY_CODE_LOCATION_INFO_ONE_LINE0 = 10 +PY_CODE_LOCATION_INFO_ONE_LINE1 = 11 +PY_CODE_LOCATION_INFO_ONE_LINE2 = 12 + +PY_CODE_LOCATION_INFO_NO_COLUMNS = 13 +PY_CODE_LOCATION_INFO_LONG = 14 +PY_CODE_LOCATION_INFO_NONE = 15 + + +@dataclass(frozen=True) +class LineTableEntry: + line_delta: int + code_delta: int + no_line_flag: bool + + +def _scan_varint(remaining_linetable: Iterable[int]) -> int: + value = 0 + for shift, read in enumerate(remaining_linetable): + value |= (read & 63) << (shift * 6) + if not (read & 64): + break + return value + + +def _scan_signed_varint(remaining_linetable: Iterable[int]) -> int: + value = _scan_varint(remaining_linetable) + if value & 1: + return -(value >> 1) + return value >> 1 + + +def _get_line_delta(code_byte: int, remaining_linetable: Iterable[int]): + line_delta_code = (code_byte >> 3) & 15 + if line_delta_code == PY_CODE_LOCATION_INFO_NONE: + return 0 + if line_delta_code in ( + PY_CODE_LOCATION_INFO_NO_COLUMNS, + PY_CODE_LOCATION_INFO_LONG, + ): + return _scan_signed_varint(remaining_linetable) + if line_delta_code == PY_CODE_LOCATION_INFO_ONE_LINE0: + return 0 + if line_delta_code == PY_CODE_LOCATION_INFO_ONE_LINE1: + return 1 + if line_delta_code == PY_CODE_LOCATION_INFO_ONE_LINE2: + return 2 + return 0 + + +def _is_no_line_marker(linetable_code_byte: int): + return (linetable_code_byte >> 3) == 0x1F + + +def _next_code_delta(linetable_code_byte: int): + return ((linetable_code_byte & 7) + 1) * 2 + + +def _test_check_bit(linetable_code_byte: int): + return bool(linetable_code_byte & 128) + + +def _go_to_next_code_byte(remaining_linetable: Iterator[int]) -> int: + try: + while not _test_check_bit((code_byte := next(remaining_linetable))): + pass + except StopIteration: + return None + return code_byte + + +def decode_linetable_entry( + code_byte: int, remaining_linetable: Iterable[int] +) -> LineTableEntry: + assert _test_check_bit(code_byte), "malformed linetable" + return LineTableEntry( + line_delta=_get_line_delta( + code_byte=code_byte, remaining_linetable=remaining_linetable + ), + code_delta=_next_code_delta(linetable_code_byte=code_byte), + no_line_flag=_is_no_line_marker(linetable_code_byte=code_byte), + ) + + +def parse_linetable(linetable: bytes, first_lineno: int): + + linetable_entries: list[LineTableEntry] = [] + + # decode linetable entries + iter_linetable = iter(linetable) + while (code_byte := _go_to_next_code_byte(iter_linetable)) is not None: + linetable_entries.append( + decode_linetable_entry( + code_byte=code_byte, remaining_linetable=iter_linetable + ) + ) + + if not linetable_entries: + return + + first_entry, *remaining_entries = linetable_entries + + # compute co_lines() + code_start: int = 0 + code_end: int = first_entry.code_delta + line: int = first_lineno + first_entry.line_delta + no_line_flag = first_entry.no_line_flag + for linetable_entry in remaining_entries: + if ( + linetable_entry.line_delta != 0 + or linetable_entry.no_line_flag != no_line_flag + ): + # if the line changes, emit the current entry + yield (code_start, code_end, None if no_line_flag else line) + + line += linetable_entry.line_delta + no_line_flag = linetable_entry.no_line_flag + code_start = code_end + code_end += linetable_entry.code_delta + + yield (code_start, code_end, None if no_line_flag else line) + + +@dataclass(frozen=True) +class PositionEntry: + line_delta: int + num_lines: int + code_delta: int + column: int + endcolumn: int + no_line_flag: bool + + +def decode_position_entry( + code_byte: int, remaining_linetable: Iterator[int] +) -> PositionEntry: + assert _test_check_bit(code_byte), "malformed linetable" + + code_delta = _next_code_delta(code_byte) + + no_line_flag = False + column = -1 + endcolumn = -1 + line_delta = 0 + num_lines = 0 + + location_flags = (code_byte >> 3) & 15 + if location_flags == PY_CODE_LOCATION_INFO_NONE: + no_line_flag = True + elif location_flags == PY_CODE_LOCATION_INFO_LONG: + line_delta = _scan_signed_varint(remaining_linetable) + num_lines = _scan_varint(remaining_linetable) + column = _scan_varint(remaining_linetable) - 1 + endcolumn = _scan_varint(remaining_linetable) - 1 + elif location_flags == PY_CODE_LOCATION_INFO_NO_COLUMNS: + line_delta = _scan_signed_varint(remaining_linetable) + elif location_flags in ( + PY_CODE_LOCATION_INFO_ONE_LINE0, + PY_CODE_LOCATION_INFO_ONE_LINE1, + PY_CODE_LOCATION_INFO_ONE_LINE2, + ): + line_delta = location_flags - 10 + column = next(remaining_linetable) + endcolumn = next(remaining_linetable) + else: + second_byte = next(remaining_linetable) + assert not _test_check_bit(second_byte) + column = (location_flags << 3) | (second_byte >> 4) + endcolumn = column + (second_byte & 15) + + return PositionEntry( + line_delta=line_delta, + num_lines=num_lines, + code_delta=code_delta, + column=column, + endcolumn=endcolumn, + no_line_flag=no_line_flag, + ) + + +def parse_positions(linetable: bytes, first_lineno: int): + position_entries: list[PositionEntry] = [] + + # decode linetable entries + iter_linetable = iter(linetable) + try: + while (code_byte := next(iter_linetable)) is not None: + position_entries.append( + decode_position_entry( + code_byte=code_byte, remaining_linetable=iter_linetable + ) + ) + except StopIteration: + pass + + computed_line = first_lineno + for position_entry in position_entries: + computed_line += position_entry.line_delta + for _ in range(0, position_entry.code_delta, 2): + if position_entry.no_line_flag: + yield (None, None, None, None) + else: + yield ( + computed_line, + computed_line + position_entry.num_lines, + position_entry.column, + position_entry.endcolumn, + ) + + class Code311(Code310): """Class for a Python 3.11+ code object used when a Python interpreter less than 3.11 is working on Python 3.11 bytecode. It also functions as an object that can be used @@ -144,3 +469,9 @@ def to_native(self): code.co_freevars, code.co_cellvars, ) + + def co_lines(self): + return parse_linetable(self.co_linetable, self.co_firstlineno) + + def co_positions(self): + return parse_location_entries(self.co_linetable, self.co_firstlineno) diff --git a/xdis/codetype/code312.py b/xdis/codetype/code312.py new file mode 100644 index 00000000..1f37257f --- /dev/null +++ b/xdis/codetype/code312.py @@ -0,0 +1,97 @@ +import types +from copy import deepcopy + +from xdis.codetype.code311 import Code311, Code311FieldTypes +from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str + +Code312FieldNames = Code311FieldTypes.copy() +Code312FieldTypes = deepcopy(Code311FieldTypes) + + +class Code312(Code311): + """Class for a Python 3.13+ code object + New CPython "undocumented" changes make this necessary to parse the co_linetable with co_lines(). + See: https://github.com/python/cpython/blob/aaed91cabcedc16c089c4b1c9abb1114659a83d3/Objects/codeobject.c#L1245C1-L1245C17 + """ + + def __init__( + self, + co_argcount, + co_posonlyargcount, + co_kwonlyargcount, + co_nlocals, + co_stacksize, + co_flags, + co_consts, + co_code, + co_names, + co_varnames, + co_freevars, + co_cellvars, + co_filename, + co_name, + co_qualname, + co_firstlineno, + co_linetable, + co_exceptiontable, + ): + # Keyword argument parameters in the call below is more robust. + # Since things change around, robustness is good. + super(Code312, self).__init__( + co_argcount=co_argcount, + co_posonlyargcount=co_posonlyargcount, + co_kwonlyargcount=co_kwonlyargcount, + co_nlocals=co_nlocals, + co_stacksize=co_stacksize, + co_flags=co_flags, + co_consts=co_consts, + co_code=co_code, + co_names=co_names, + co_varnames=co_varnames, + co_freevars=co_freevars, + co_cellvars=co_cellvars, + co_filename=co_filename, + co_name=co_name, + co_qualname=co_qualname, + co_firstlineno=co_firstlineno, + co_linetable=co_linetable, + co_exceptiontable=co_exceptiontable, + ) + self.fieldtypes = Code312FieldTypes + if type(self) == Code312: + self.check() + + def to_native(self): + if not (PYTHON_VERSION_TRIPLE >= (3, 12)): + raise TypeError( + "Python Interpreter needs to be in 3.12 or greater; is %s" + % version_tuple_to_str() + ) + + code = deepcopy(self) + code.freeze() + try: + code.check() + except AssertionError as e: + raise TypeError(e) + + return types.CodeType( + code.co_argcount, + code.co_posonlyargcount, + code.co_kwonlyargcount, + code.co_nlocals, + code.co_stacksize, + code.co_flags, + code.co_code, + code.co_consts, + code.co_names, + code.co_varnames, + code.co_filename, + code.co_name, + code.co_qualname, + code.co_firstlineno, + code.co_linetable, + code.co_exceptiontable, + code.co_freevars, + code.co_cellvars, + ) diff --git a/xdis/codetype/code313.py b/xdis/codetype/code313.py deleted file mode 100644 index b6676f8e..00000000 --- a/xdis/codetype/code313.py +++ /dev/null @@ -1,274 +0,0 @@ -import types -from copy import deepcopy - -from xdis.codetype.code311 import Code311, Code311FieldTypes -from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str - -from dataclasses import dataclass -from typing import Iterable, Iterator, Generator - -Code313FieldNames = Code311FieldTypes.copy() -Code313FieldTypes = deepcopy(Code311FieldTypes) - -##### NEW "OPAQUE" LINE TABLE PARSING ##### -# See: https://github.com/python/cpython/blob/aaed91cabcedc16c089c4b1c9abb1114659a83d3/Objects/codeobject.c#L1245C1-L1245C17 -PY_CODE_LOCATION_INFO_SHORT0 = 0 -PY_CODE_LOCATION_INFO_ONE_LINE0 = 10 -PY_CODE_LOCATION_INFO_ONE_LINE1 = 11 -PY_CODE_LOCATION_INFO_ONE_LINE2 = 12 - -PY_CODE_LOCATION_INFO_NO_COLUMNS = 13 -PY_CODE_LOCATION_INFO_LONG = 14 -PY_CODE_LOCATION_INFO_NONE = 15 - -@dataclass(frozen=True) -class LineTableEntry: - line_delta: int - code_delta: int - no_line_flag: bool - -def _scan_varint(remaining_linetable: Iterable[int]) -> int: - value = 0 - for shift, read in enumerate(remaining_linetable): - value |= (read & 63) << (shift * 6) - if not (read & 64): - break - return value - -def _scan_signed_varint(remaining_linetable: Iterable[int]) -> int: - value = _scan_varint(remaining_linetable) - if value & 1: - return -(value >> 1) - return value >> 1 - -def _get_line_delta(code_byte: int, remaining_linetable: Iterable[int]): - line_delta_code = (code_byte >> 3) & 15 - if line_delta_code == PY_CODE_LOCATION_INFO_NONE: - return 0 - if line_delta_code in (PY_CODE_LOCATION_INFO_NO_COLUMNS, PY_CODE_LOCATION_INFO_LONG): - return _scan_signed_varint(remaining_linetable) - if line_delta_code == PY_CODE_LOCATION_INFO_ONE_LINE0: - return 0 - if line_delta_code == PY_CODE_LOCATION_INFO_ONE_LINE1: - return 1 - if line_delta_code == PY_CODE_LOCATION_INFO_ONE_LINE2: - return 2 - return 0 - -def _is_no_line_marker(linetable_code_byte: int): - return (linetable_code_byte >> 3) == 0x1f - -def _next_code_delta(linetable_code_byte: int): - return ((linetable_code_byte & 7) + 1) * 2 - -def _test_check_bit(linetable_code_byte: int): - return bool(linetable_code_byte & 128) - -def _go_to_next_code_byte(remaining_linetable: Iterator[int]) -> int: - try: - while not _test_check_bit((code_byte := next(remaining_linetable))): - pass - except StopIteration: - return None - return code_byte - -def decode_linetable_entry(code_byte: int, remaining_linetable: Iterable[int]) -> LineTableEntry: - assert _test_check_bit(code_byte), "malformed linetable" - return LineTableEntry( - line_delta=_get_line_delta(code_byte=code_byte, remaining_linetable=remaining_linetable), - code_delta=_next_code_delta(linetable_code_byte=code_byte), - no_line_flag=_is_no_line_marker(linetable_code_byte=code_byte) - ) - -def parse_linetable(linetable: bytes, first_lineno: int): - - linetable_entries: list[LineTableEntry] = [] - - # decode linetable entries - iter_linetable = iter(linetable) - while (code_byte := _go_to_next_code_byte(iter_linetable)) is not None: - linetable_entries.append(decode_linetable_entry(code_byte=code_byte, remaining_linetable=iter_linetable)) - - if not linetable_entries: - return - - first_entry, *remaining_entries = linetable_entries - - # compute co_lines() - code_start: int = 0 - code_end: int = first_entry.code_delta - line: int = first_lineno + first_entry.line_delta - no_line_flag = first_entry.no_line_flag - for linetable_entry in remaining_entries: - if linetable_entry.line_delta != 0 or linetable_entry.no_line_flag != no_line_flag: - # if the line changes, emit the current entry - yield (code_start, code_end, None if no_line_flag else line) - - line += linetable_entry.line_delta - no_line_flag = linetable_entry.no_line_flag - code_start = code_end - code_end += linetable_entry.code_delta - - yield (code_start, code_end, None if no_line_flag else line) - -@dataclass(frozen=True) -class PositionEntry: - line_delta: int - num_lines: int - code_delta: int - column: int - endcolumn: int - no_line_flag: bool - -def decode_position_entry(code_byte: int, remaining_linetable: Iterator[int]) -> PositionEntry: - assert _test_check_bit(code_byte), "malformed linetable" - - code_delta = _next_code_delta(code_byte) - - no_line_flag = False - column = -1 - endcolumn = -1 - line_delta = 0 - num_lines = 0 - - location_flags = (code_byte >> 3) & 15 - if location_flags == PY_CODE_LOCATION_INFO_NONE: - no_line_flag = True - elif location_flags == PY_CODE_LOCATION_INFO_LONG: - line_delta = _scan_signed_varint(remaining_linetable) - num_lines = _scan_varint(remaining_linetable) - column = _scan_varint(remaining_linetable) - 1 - endcolumn = _scan_varint(remaining_linetable) - 1 - elif location_flags == PY_CODE_LOCATION_INFO_NO_COLUMNS: - line_delta = _scan_signed_varint(remaining_linetable) - elif location_flags in (PY_CODE_LOCATION_INFO_ONE_LINE0, PY_CODE_LOCATION_INFO_ONE_LINE1, PY_CODE_LOCATION_INFO_ONE_LINE2): - line_delta = location_flags - 10 - column = next(remaining_linetable) - endcolumn = next(remaining_linetable) - else: - second_byte = next(remaining_linetable) - assert not _test_check_bit(second_byte) - column = (location_flags << 3) | (second_byte >> 4) - endcolumn = column + (second_byte & 15) - - return PositionEntry( - line_delta=line_delta, - num_lines=num_lines, - code_delta=code_delta, - column=column, - endcolumn=endcolumn, - no_line_flag=no_line_flag - ) - -def parse_positions(linetable: bytes, first_lineno: int): - position_entries: list[PositionEntry] = [] - - # decode linetable entries - iter_linetable = iter(linetable) - try: - while (code_byte := next(iter_linetable)) is not None: - position_entries.append(decode_position_entry(code_byte=code_byte, remaining_linetable=iter_linetable)) - except StopIteration: - pass - - computed_line = first_lineno - for position_entry in position_entries: - computed_line += position_entry.line_delta - for _ in range(0, position_entry.code_delta, 2): - if position_entry.no_line_flag: - yield (None, None, None, None) - else: - yield (computed_line, computed_line + position_entry.num_lines, position_entry.column, position_entry.endcolumn) -##### - - -class Code313(Code311): - """ Class for a Python 3.13+ code object - New CPython "undocumented" changes make this necessary to parse the co_linetable with co_lines(). - See: https://github.com/python/cpython/blob/aaed91cabcedc16c089c4b1c9abb1114659a83d3/Objects/codeobject.c#L1245C1-L1245C17 - """ - - def __init__( - self, - co_argcount, - co_posonlyargcount, - co_kwonlyargcount, - co_nlocals, - co_stacksize, - co_flags, - co_consts, - co_code, - co_names, - co_varnames, - co_freevars, - co_cellvars, - co_filename, - co_name, - co_qualname, - co_firstlineno, - co_linetable, - co_exceptiontable, - ): - # Keyword argument parameters in the call below is more robust. - # Since things change around, robustness is good. - super(Code313, self).__init__( - co_argcount=co_argcount, - co_posonlyargcount=co_posonlyargcount, - co_kwonlyargcount=co_kwonlyargcount, - co_nlocals=co_nlocals, - co_stacksize=co_stacksize, - co_flags=co_flags, - co_consts=co_consts, - co_code=co_code, - co_names=co_names, - co_varnames=co_varnames, - co_freevars=co_freevars, - co_cellvars=co_cellvars, - co_filename=co_filename, - co_name=co_name, - co_qualname=co_qualname, - co_firstlineno=co_firstlineno, - co_linetable=co_linetable, - co_exceptiontable=co_exceptiontable, - ) - self.fieldtypes = Code313FieldTypes - if type(self) == Code313: - self.check() - - def to_native(self): - if not (PYTHON_VERSION_TRIPLE >= (3, 13)): - raise TypeError( - "Python Interpreter needs to be in 3.13 or greater; is %s" - % version_tuple_to_str() - ) - - code = deepcopy(self) - code.freeze() - try: - code.check() - except AssertionError as e: - raise TypeError(e) - - return types.CodeType( - code.co_argcount, - code.co_posonlyargcount, - code.co_kwonlyargcount, - code.co_nlocals, - code.co_stacksize, - code.co_flags, - code.co_code, - code.co_consts, - code.co_names, - code.co_varnames, - code.co_filename, - code.co_name, - code.co_qualname, - code.co_firstlineno, - code.co_linetable, - code.co_exceptiontable, - code.co_freevars, - code.co_cellvars, - ) - - def co_lines(self): - return parse_linetable(self.co_linetable, self.co_firstlineno) diff --git a/xdis/opcodes/opcode_311.py b/xdis/opcodes/opcode_311.py index 9b4add2f..f07f4885 100644 --- a/xdis/opcodes/opcode_311.py +++ b/xdis/opcodes/opcode_311.py @@ -299,121 +299,9 @@ def format_BINARY_OP(arg) -> str: opcode_extended_fmt = opcode_extended_fmt311 -update_pj3(globals(), loc) -finalize_opcodes(loc) - - -def parse_location_entries(location_bytes, first_line): - """ - Parses the locations table described in: https://github.com/python/cpython/blob/3.11/Objects/locations.md - The locations table replaced the line number table starting in 3.11 - """ - - def starts_new_entry(b): - return bool(b & 0b10000000) # bit 7 is set - - def extract_code(b): - return (b & 0b01111000) >> 3 # extracts bits 3-6 - - def extract_length(b): - return (b & 0b00000111) + 1 # extracts bit 0-2 - - def iter_location_codes(loc_bytes): - if len(loc_bytes) == 0: - return [] - - iter_locs = iter(loc_bytes) - entry_codes = [next(iter_locs)] - - for b in iter_locs: - if starts_new_entry(b): - yield entry_codes - entry_codes = [b] - else: - entry_codes.append(b) - - if entry_codes: - yield entry_codes - - def iter_varints(varint_bytes): - if len(varint_bytes) == 0: - return [] - - def has_next_byte(b): - return bool(b & 0b0100_0000) # has bit 6 set - - def get_value(b): - return b & 0b00111111 # extracts bits 0-5 - - iter_varint_bytes = iter(varint_bytes) - - current_value = 0 - shift_amt = 0 - - for b in iter_varint_bytes: - current_value += get_value(b) << shift_amt - if has_next_byte(b): - shift_amt += 6 - else: - yield current_value - current_value = 0 - shift_amt = 0 - - def decode_signed_varint(s): - return -(s >> 1) if s & 1 else (s >> 1) - - entries = ( - [] - ) # tuples of (code units, start line, end line, start column, end column) - - last_line = first_line - - for location_codes in iter_location_codes(location_bytes): - first_byte = location_codes[0] - location_length = extract_length(first_byte) - code = extract_code(first_byte) - - if code <= 9: # short form - start_line = last_line - end_line = start_line - second_byte = location_codes[1] - start_column = (code * 8) + ((second_byte >> 4) & 7) - end_column = start_column + (second_byte & 15) - elif code <= 12: # one line form - start_line = last_line + code - 10 - end_line = start_line - start_column = location_codes[1] - end_column = location_codes[2] - elif code == 13: # no column info - (start_line_delta,) = iter_varints(location_codes[1:]) - start_line = last_line + decode_signed_varint(start_line_delta) - end_line = start_line - start_column = None - end_column = None - elif code == 14: # long form - (start_line_delta, end_line_delta, start_column, end_column) = iter_varints( - location_codes[1:] - ) - start_line = last_line + decode_signed_varint(start_line_delta) - end_line = start_line + end_line_delta - else: # code == 15, no location - start_line = None - end_line = None - start_column = None - end_column = None - - entries.append( - (location_length, start_line, end_line, start_column, end_column) - ) - - last_line = start_line if start_line is not None else last_line - - return entries - +opcode_arg_fmt = opcode_arg_fmt11 = opcode_arg_fmt310.copy() from xdis.opcodes.opcode_310 import findlinestarts -opcode_arg_fmt = opcode_arg_fmt11 = opcode_arg_fmt310.copy() - update_pj3(globals(), loc) finalize_opcodes(loc) diff --git a/xdis/opcodes/opcode_312.py b/xdis/opcodes/opcode_312.py index 968186f1..29d695ae 100644 --- a/xdis/opcodes/opcode_312.py +++ b/xdis/opcodes/opcode_312.py @@ -187,11 +187,9 @@ def format_CALL_INTRINSIC_2(arg) -> str: } opcode_extended_fmt = opcode_extended_fmt312 = opcode_extended_fmt311.copy() -# Overwrite legacy findlinestarts with the 3.11 version that uses the -# location_table syntax. -from xdis.opcodes.opcode_311 import findlinestarts, parse_location_entries - opcode_arg_fmt = opcode_arg_fmt12 = opcode_arg_fmt311.copy() +from xdis.opcodes.opcode_311 import findlinestarts + update_pj3(globals(), loc) finalize_opcodes(loc)