Skip to content

Commit d731d32

Browse files
committed
Simplify BREAK_LOOP detection...
by making more us of linestart. At least for now...
1 parent 04da2fb commit d731d32

File tree

5 files changed

+94
-50
lines changed

5 files changed

+94
-50
lines changed

uncompyle6/scanners/scanner2.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -495,7 +495,8 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None):
495495

496496
if show_asm in ("both", "after"):
497497
print("\n# ---- tokenization:")
498-
for t in new_tokens:
498+
# FIXME: t.format() is changing tokens!
499+
for t in new_tokens.copy():
499500
print(t.format(line_prefix=""))
500501
print()
501502
return new_tokens, customize

uncompyle6/scanners/scanner26.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,8 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None):
353353

354354
if show_asm in ("both", "after"):
355355
print("\n# ---- tokenization:")
356-
for t in tokens:
356+
# FIXME: t.format() is changing tokens!
357+
for t in tokens.copy():
357358
print(t.format(line_prefix=""))
358359
print()
359360
return tokens, customize

uncompyle6/scanners/scanner3.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -797,7 +797,8 @@ def ingest(
797797

798798
if show_asm in ("both", "after"):
799799
print("\n# ---- tokenization:")
800-
for t in new_tokens:
800+
# FIXME: t.format() is changing tokens!
801+
for t in new_tokens.copy():
801802
print(t.format(line_prefix=""))
802803
print()
803804
return new_tokens, customize

uncompyle6/scanners/scanner37base.py

Lines changed: 68 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -228,13 +228,13 @@ def tokens_append(j, token):
228228

229229
if show_asm in ("both", "before"):
230230
print("\n# ---- disassembly:")
231-
self.insts = bytecode.disassemble_bytes(
231+
bytecode.disassemble_bytes(
232232
co.co_code,
233233
varnames=co.co_varnames,
234234
names=co.co_names,
235235
constants=co.co_consts,
236236
cells=bytecode._cell_names,
237-
linestarts=bytecode._linestarts,
237+
line_starts=bytecode._linestarts,
238238
asm_format="extended",
239239
filename=co.co_filename,
240240
show_source=True,
@@ -481,12 +481,17 @@ def tokens_append(j, token):
481481
next_opname = self.insts[i + 1].opname
482482

483483
# 'Continue's include jumps to loops that are not
484-
# and the end of a block which follow with POP_BLOCK and COME_FROM_LOOP.
485-
# If the JUMP_ABSOLUTE is to a FOR_ITER and it is followed by another JUMP_FORWARD
486-
# then we'll take it as a "continue".
487-
is_continue = (
488-
self.insts[self.offset2inst_index[target]].opname == "FOR_ITER"
489-
and self.insts[i + 1].opname == "JUMP_FORWARD"
484+
# and the end of a block which follow with
485+
# POP_BLOCK and COME_FROM_LOOP. If the
486+
# JUMP_ABSOLUTE is to a FOR_ITER, and it is
487+
# followed by another JUMP_FORWARD then we'll take
488+
# it as a "continue".
489+
next_inst = self.insts[i + 1]
490+
is_continue = self.insts[
491+
self.offset2inst_index[target]
492+
].opname == "FOR_ITER" and next_inst.opname in (
493+
"JUMP_FORWARD",
494+
"JUMP_ABSOLUTE",
490495
)
491496

492497
if self.version < (3, 8) and (
@@ -501,21 +506,65 @@ def tokens_append(j, token):
501506
):
502507
opname = "CONTINUE"
503508
else:
509+
# "continue" versus "break_loop" dectction is more complicated
510+
# because "continue" to an outer loop is really a "break loop"
504511
opname = "JUMP_BACK"
512+
505513
# FIXME: this is a hack to catch stuff like:
506514
# if x: continue
507515
# the "continue" is not on a new line.
508-
# There are other situations where we don't catch
509-
# CONTINUE as well.
510-
if tokens[-1].kind == "JUMP_BACK" and tokens[-1].attr <= argval:
516+
#
517+
# Another situation is where we have
518+
# for method in methods:
519+
# for B in method:
520+
# if c:
521+
# return
522+
# break # A "continue" but not the innermost one
523+
if tokens[-1].kind == "JUMP_LOOP" and tokens[-1].attr <= argval:
511524
if tokens[-2].kind == "BREAK_LOOP":
512525
del tokens[-1]
526+
j -= 1
513527
else:
514-
# intern is used because we are changing the *previous* token
515-
tokens[-1].kind = sys.intern("CONTINUE")
516-
if last_op_was_break and opname == "CONTINUE":
517-
last_op_was_break = False
518-
continue
528+
# "intern" is used because we are
529+
# changing the *previous* token. A
530+
# POP_TOP suggests a "break" rather
531+
# than a "continue"?
532+
if tokens[-2] == "POP_TOP" and (
533+
is_continue and next_inst.argval != tokens[-1].attr
534+
):
535+
tokens[-1].kind = sys.intern("BREAK_LOOP")
536+
else:
537+
tokens[-1].kind = sys.intern("CONTINUE")
538+
last_continue = tokens[-1]
539+
pass
540+
pass
541+
pass
542+
# elif (
543+
# last_continue is not None
544+
# and tokens[-1].kind == "JUMP_LOOP"
545+
# and last_continue.attr <= tokens[-1].attr
546+
# and last_continue.offset > tokens[-1].attr
547+
# ):
548+
# # Handle mis-characterized "CONTINUE"
549+
# # We have a situation like:
550+
# # loop ... for or while)
551+
# # loop
552+
# # if ...: # code below starts here
553+
# # break # not continue
554+
# #
555+
# # POP_JUMP_IF_FALSE_LOOP # to outer loop
556+
# # JUMP_LOOP # to inner loop
557+
# # ...
558+
# # JUMP_LOOP # to outer loop
559+
# tokens[-2].kind = sys.intern("BREAK_LOOP")
560+
# pass
561+
562+
# if last_op_was_break and opname == "CONTINUE":
563+
# last_op_was_break = False
564+
# continue
565+
pass
566+
else:
567+
opname = "JUMP_FORWARD"
519568

520569
elif inst.offset in self.load_asserts:
521570
opname = "LOAD_ASSERT"
@@ -538,9 +587,10 @@ def tokens_append(j, token):
538587
)
539588
pass
540589

541-
if show_asm in ("both", "after"):
590+
if show_asm in ("both", "after") and self.version < (3, 8):
542591
print("\n# ---- tokenization:")
543-
for t in tokens:
592+
# FIXME: t.format() is changing tokens!
593+
for t in tokens.copy():
544594
print(t.format(line_prefix=""))
545595
print()
546596
return tokens, customize

uncompyle6/scanners/scanner38.py

Lines changed: 20 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,13 @@
2424

2525
from typing import Dict, Tuple
2626

27-
from uncompyle6.scanners.tok import off2int
28-
from uncompyle6.scanners.scanner37 import Scanner37
29-
from uncompyle6.scanners.scanner37base import Scanner37Base
30-
3127
# bytecode verification, verify(), uses JUMP_OPs from here
3228
from xdis.opcodes import opcode_38 as opc
3329

30+
from uncompyle6.scanners.scanner37 import Scanner37
31+
from uncompyle6.scanners.scanner37base import Scanner37Base
32+
from uncompyle6.scanners.tok import off2int
33+
3434
# bytecode verification, verify(), uses JUMP_OPS from here
3535
JUMP_OPs = opc.JUMP_OPS
3636

@@ -121,35 +121,26 @@ def ingest(
121121
new_tokens.append(token)
122122
continue
123123

124-
# We also want to avoid confusing BREAK_LOOPS with parts of the
125-
# grammar rules for loops. (Perhaps we should change the grammar.)
126-
# Try to find an adjacent JUMP_BACK which is part of the normal loop end.
127-
128-
if i + 1 < len(tokens) and tokens[i + 1] == "JUMP_BACK":
129-
# Sometimes the jump back is after the "break" instruction..
130-
jump_back_index = i + 1
131-
else:
132-
# and sometimes, because of jump-to-jump optimization, it is before the
133-
# jump target instruction.
134-
jump_back_index = self.offset2tok_index[jump_target] - 1
135-
while tokens[jump_back_index].kind.startswith("COME_FROM_"):
136-
jump_back_index -= 1
137-
pass
138-
pass
139-
jump_back_token = tokens[jump_back_index]
140-
141-
# Is this a forward jump not next to a JUMP_BACK ? ...
142-
break_loop = token.linestart and jump_back_token != "JUMP_BACK"
124+
j = i
125+
while tokens[j - 1] in ("POP_TOP", "POP_BLOCK", "POP_EXCEPT"):
126+
j -= 1
127+
if tokens[j].linestart:
128+
break
129+
token_with_linestart = tokens[j]
143130

144-
# or if there is looping jump back, then that loop
145-
# should start before where the "break" instruction sits.
146-
if break_loop or (
147-
jump_back_token == "JUMP_BACK"
148-
and jump_back_token.attr < token.off2int()
149-
):
131+
if token_with_linestart.linestart:
150132
token.kind = "BREAK_LOOP"
133+
151134
pass
152135
new_tokens.append(token)
136+
137+
if show_asm in ("both", "after"):
138+
print("\n# ---- tokenization:")
139+
# FIXME: t.format() is changing tokens!
140+
for t in new_tokens.copy():
141+
print(t.format(line_prefix=""))
142+
print()
143+
153144
return new_tokens, customize
154145

155146

0 commit comments

Comments
 (0)