Skip to content

Commit c45502f

Browse files
author
rocky
committed
Get Graal line offsets working
1 parent d94bf3c commit c45502f

File tree

7 files changed

+257
-128
lines changed

7 files changed

+257
-128
lines changed

xdis/bytecode.py

Lines changed: 23 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
from io import StringIO
2828
from linecache import getline
2929
from types import CodeType
30-
from typing import Iterable, Iterator, Optional, Tuple, Union
30+
from typing import Iterable, Iterator, Optional, Tuple
3131

3232
from xdis.cross_dis import (
3333
format_code_info,
@@ -454,15 +454,8 @@ def next_offset(op: int, opc, offset: int) -> int:
454454

455455

456456
def get_instructions_bytes(
457-
bytecode,
457+
code_object,
458458
opc,
459-
varnames=None,
460-
names=None,
461-
constants=None,
462-
cells=None,
463-
linestarts=None,
464-
line_offset=0,
465-
exception_entries=None,
466459
):
467460
"""
468461
Iterate over the instructions in a bytecode string.
@@ -472,7 +465,20 @@ def get_instructions_bytes(
472465
e.g., variable names, constants, can be specified using optional
473466
arguments.
474467
"""
468+
469+
bytecode: bytes = code_object.co_code
470+
constants: tuple = code_object.co_consts
471+
names: tuple = code_object.co_names
472+
varnames: tuple = code_object.co_varnames
473+
cells: tuple = code_object.co_cells if hasattr(code_object, "co_cells") else tuple()
474+
exception_entries = code_object.exception_entries if hasattr(code_object, "exception_entries") else tuple()
475+
# freevars: tuple = code_object.co_freevars
476+
475477
labels = opc.findlabels(bytecode, opc)
478+
if hasattr(opc, "findlinestarts"):
479+
line_starts = dict(opc.findlinestarts(code_object, dup_lines=True))
480+
else:
481+
line_starts = None
476482

477483
# PERFORMANCE FIX: Build exception labels ONCE, not on every iteration
478484
# The old code was O(n^2) because it rebuilt the same list every call to
@@ -497,7 +503,7 @@ def get_instructions_bytes(
497503
names=names,
498504
constants=constants,
499505
cells=cells,
500-
linestarts=linestarts,
506+
linestarts=line_starts,
501507
line_offset=0,
502508
exception_entries=exception_entries,
503509
labels=labels,
@@ -552,17 +558,7 @@ def __init__(
552558

553559
def __iter__(self):
554560
co = self.codeobj
555-
return get_instructions_bytes(
556-
co.co_code,
557-
self.opc,
558-
co.co_varnames,
559-
co.co_names,
560-
co.co_consts,
561-
self._cell_names,
562-
self._linestarts,
563-
line_offset=self._line_offset,
564-
exception_entries=self.exception_entries,
565-
)
561+
return get_instructions_bytes(co, self.opc)
566562

567563
def __repr__(self) -> str:
568564
return f"{self.__class__.__name__}({self._original_object!r})"
@@ -596,10 +592,8 @@ def dis(
596592
offset = -1
597593
output = StringIO()
598594
if self.opc.version_tuple > (2, 0):
599-
cells = self._cell_names
600595
line_starts = self._linestarts
601596
else:
602-
cells = None
603597
line_starts = None
604598

605599
first_line_number = co.co_firstlineno if hasattr(co, "co_firstlineno") else None
@@ -610,17 +604,8 @@ def dis(
610604
if isinstance(filename, UnicodeForPython3):
611605
filename = str(filename)
612606

613-
if isinstance(co.co_code, str):
614-
co_code = co.co_code.encode('latin-1')
615-
else:
616-
co_code = co.co_code
617-
618607
self.disassemble_bytes(
619-
co_code,
620-
varnames=co.co_varnames,
621-
names=co.co_names,
622-
constants=co.co_consts,
623-
cells=cells,
608+
co,
624609
line_starts=line_starts,
625610
line_offset=self._line_offset,
626611
file=output,
@@ -629,7 +614,6 @@ def dis(
629614
filename=filename,
630615
show_source=show_source,
631616
first_line_number=first_line_number,
632-
exception_entries=self.exception_entries,
633617
)
634618
return output.getvalue()
635619

@@ -647,20 +631,15 @@ def distb(self, tb=None) -> None:
647631

648632
def disassemble_bytes(
649633
self,
650-
bytecode: Union[CodeType, bytes, str],
634+
code_object: CodeType,
651635
lasti: int = -1,
652-
varnames=None,
653-
names=None,
654-
constants=None,
655-
cells=None,
656636
line_starts=None,
657637
file=sys.stdout,
658638
line_offset=0,
659639
asm_format="classic",
660640
filename: Optional[str] = None,
661641
show_source=True,
662642
first_line_number: Optional[int] = None,
663-
exception_entries=None,
664643
) -> list:
665644
# Omit the line number column entirely if we have no line number info
666645
show_lineno = line_starts is not None or self.opc.version_tuple < (2, 3)
@@ -703,17 +682,7 @@ def show_source_text(line_number: Optional[int]) -> None:
703682
else:
704683
get_instructions_fn = get_instructions_bytes
705684

706-
for instr in get_instructions_fn(
707-
bytecode,
708-
self.opc,
709-
varnames,
710-
names,
711-
constants,
712-
cells,
713-
line_starts,
714-
line_offset=line_offset,
715-
exception_entries=exception_entries,
716-
):
685+
for instr in get_instructions_fn(code_object, self.opc):
717686
# Python 1.x into early 2.0 uses SET_LINENO
718687
if last_was_set_lineno:
719688
instr = Instruction(
@@ -789,7 +758,7 @@ def show_source_text(line_number: Optional[int]) -> None:
789758
new_source_line = show_lineno and (
790759
extended_arg_starts_line
791760
or instr.starts_line is not None
792-
and instr.offset > 0
761+
and instr.offset >= 0
793762
)
794763
if new_source_line:
795764
file.write("\n")
@@ -833,7 +802,7 @@ def show_source_text(line_number: Optional[int]) -> None:
833802
pass
834803
return instructions
835804

836-
def get_instructions(self, x, first_line=None):
805+
def get_instructions(self, x):
837806
"""Iterator for the opcodes in methods, functions or code
838807
839808
Generates a series of Instruction named tuples giving the details of
@@ -845,22 +814,7 @@ def get_instructions(self, x, first_line=None):
845814
the disassembled code object.
846815
"""
847816
co = get_code_object(x)
848-
cell_names = co.co_cellvars + co.co_freevars
849-
line_starts = dict(self.opc.findlinestarts(co))
850-
if first_line is not None:
851-
line_offset = first_line - co.co_firstlineno
852-
else:
853-
line_offset = 0
854-
return get_instructions_bytes(
855-
co.co_code,
856-
self.opc,
857-
co.co_varnames,
858-
co.co_names,
859-
co.co_consts,
860-
cell_names,
861-
line_starts,
862-
line_offset,
863-
)
817+
return get_instructions_bytes(co.co_code, self.opc)
864818

865819

866820
def list2bytecode(

xdis/bytecode_graal.py

Lines changed: 18 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
# from xdis.bytecode import get_optype
2+
from typing import Optional
3+
24
from xdis.bytecode import Bytecode
35
from xdis.cross_dis import get_code_object
46
from xdis.instruction import Instruction
7+
from xdis.lineoffsets_graal import SourceMap, find_linestarts_graal
58
from xdis.opcodes.base_graal import (
69
BINARY_OPS,
710
COLLECTION_KIND,
@@ -11,15 +14,8 @@
1114

1215

1316
def get_instructions_bytes_graal(
14-
bytecode,
17+
code_object,
1518
opc,
16-
varnames,
17-
names,
18-
constants,
19-
cells,
20-
freevars,
21-
line_offset,
22-
exception_entries,
2319
):
2420
"""
2521
Iterate over the instructions in a bytecode string.
@@ -29,19 +25,26 @@ def get_instructions_bytes_graal(
2925
e.g., variable names, constants, can be specified using optional
3026
arguments.
3127
"""
32-
# source_map = ???
28+
bytecode: bytes = code_object.co_code
29+
constants: tuple = code_object.co_consts
30+
names: tuple = code_object.co_names
31+
varnames: tuple = code_object.co_varnames
32+
# cells: tuple = code_object.co_cells
33+
# freevars: tuple = code_object.co_freevars
3334

3435
i = 0
3536
n = len(bytecode)
3637

37-
extended_arg_count = 0
3838
labels = opc.findlabels(bytecode, opc)
39+
linestarts = find_linestarts_graal(code_object, opc, dup_lines=True)
3940

41+
extended_arg_count = 0
4042
while i < n:
4143
opcode = bytecode[i]
4244
opname = opc.opname[opcode]
4345
optype = get_optype_graal(opcode, opc)
4446
offset = i
47+
starts_line = linestarts.get(offset, None)
4548

4649
arg_count = opc.arg_counts[opcode]
4750
is_jump_target = i in labels
@@ -191,7 +194,7 @@ def get_instructions_bytes_graal(
191194
arg = 0
192195
break
193196

194-
inst_size = (i - offset + 1) + (extended_arg_count * 2)
197+
inst_size = (arg_count + 1) + (extended_arg_count * 2)
195198
start_offset = offset if opc.oppop[opcode] == 0 else None
196199

197200
# for (int i = 0 i < exceptionHandlerRanges.length; i += 4) {
@@ -242,7 +245,7 @@ def get_instructions_bytes_graal(
242245

243246
yield Instruction(
244247
is_jump_target=is_jump_target,
245-
starts_line=False, # starts_line,
248+
starts_line= starts_line,
246249
offset=offset,
247250
opname=opname,
248251
opcode=opcode,
@@ -280,8 +283,7 @@ def __init__(self, x, opc, first_line=None, current_offset=None) -> None:
280283
self._line_offset = first_line - co.co_firstlineno
281284
pass
282285

283-
# self._linestarts = dict(opc.findlinestarts(co, dup_lines=dup_lines))
284-
self._linestarts = None
286+
self._linestarts = find_linestarts_graal(co, opc, dup_lines=True)
285287
self._original_object = x
286288
self.opc = opc
287289
self.opnames = opc.opname
@@ -298,11 +300,6 @@ def __iter__(self):
298300
return get_instructions_bytes_graal(
299301
co.co_code,
300302
self.opc,
301-
co.co_varnames,
302-
co.co_names,
303-
co.co_consts,
304-
co.co_cellvars,
305-
co.co_freevars,
306303
)
307304

308305
def __repr__(self) -> str:
@@ -319,13 +316,5 @@ def get_instructions(self, x):
319316
Otherwise, the source line information (if any) is taken directly from
320317
the disassembled code object.
321318
"""
322-
co = get_code_object(x)
323-
return get_instructions_bytes_graal(
324-
co.co_code,
325-
self.opc,
326-
co.co_varnames,
327-
co.co_names,
328-
co.co_consts,
329-
co.co_cellvars,
330-
co.co_freevars,
331-
)
319+
code_object = get_code_object(x)
320+
return get_instructions_bytes_graal(code_object, self.opc)

xdis/codetype/__init__.py

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
__docformat__ = "restructuredtext"
3434

3535

36-
def codeType2Portable(code, version_triple=PYTHON_VERSION_TRIPLE, is_graal: bool=False):
36+
def codeType2Portable(code, version_triple=PYTHON_VERSION_TRIPLE, is_graal: bool=False, other_fields: dict={}):
3737
"""Converts a native types.CodeType code object into a
3838
corresponding more flexible xdis Code type.
3939
"""
@@ -149,20 +149,7 @@ def codeType2Portable(code, version_triple=PYTHON_VERSION_TRIPLE, is_graal: bool
149149
version_triple=version_triple,
150150
)
151151
elif version_triple[:2] >= (3, 11):
152-
other_fields = {}
153152
if is_graal:
154-
other_fields["condition_profileCount"]=code.condition_profileCount if hasattr(code, "condition_profileCount") else -1,
155-
other_fields["endColumn"]=code.endColumn if hasattr(code, "endColumn") else -1,
156-
other_fields["endLine"]=code.endLine if hasattr(code, "endLine") else -1,
157-
other_fields["exception_handler_ranges"]=code.exception_handler_ranges if hasattr(code, "exception_handler_ranges") else tuple(),
158-
other_fields["generalizeInputsMap"]=code.generalizeInputsMap if hasattr(code, "generalizeInputsMap") else {},
159-
other_fields["generalizeVarsMap"]=code.generalizeVarsMap if hasattr(code, "generalizeVarsMap") else {},
160-
other_fields["outputCanQuicken"]=code.outputCanQuicken if hasattr(code, "outputCanQuicken") else b"",
161-
other_fields["primitiveConstants"]=code.primitiveConstants if hasattr(code, "primitiveConstants") else tuple(),
162-
other_fields["srcOffsetTable"]=code.srcOffsetTable if hasattr(code, "srcOffsetTable") else b"",
163-
other_fields["startColumn"]=code.startColumn if hasattr(code, "startColumn") else -1,
164-
other_fields["startLine"]=code.startLine if hasattr(code, "startLine") else -1,
165-
other_fields["variableShouldUnbox"]=code.variableShouldUnbox if hasattr(code, "variableShouldUnbox") else b"",
166153

167154
return Code311Graal(
168155
co_argcount=code.co_argcount,
@@ -363,7 +350,7 @@ def to_portable(
363350
version_triple=version_triple,
364351
other_fields=other_fields,
365352
)
366-
return codeType2Portable(code, version_triple, is_graal=is_graal)
353+
return codeType2Portable(code, version_triple, is_graal=is_graal, other_fields=other_fields)
367354

368355

369356
if __name__ == "__main__":

xdis/disasm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ def disco(
137137
co,
138138
timestamp,
139139
out=sys.stdout,
140-
magic_int=None,
140+
magic_int: int=-1,
141141
source_size=None,
142142
sip_hash=None,
143143
asm_format: str = "classic",

0 commit comments

Comments
 (0)