Skip to content

Commit 81922bd

Browse files
committed
Handle long dict litereals in 3.4- better...
Bracket in pseudo op COLLECTION_START ... BUILD_xx
1 parent d731d32 commit 81922bd

File tree

2 files changed

+141
-31
lines changed

2 files changed

+141
-31
lines changed

uncompyle6/scanners/scanner3.py

Lines changed: 140 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ def bound_collection_from_inst(
216216
collection_type: str,
217217
) -> Optional[list]:
218218
"""
219-
Try to a replace sequence of instruction that ends with a
219+
Try to replace a sequence of instruction that ends with a
220220
BUILD_xxx with a sequence that can be parsed much faster, but
221221
inserting the token boundary at the beginning of the sequence.
222222
"""
@@ -298,8 +298,8 @@ def bound_collection_from_inst(
298298
)
299299
return new_tokens
300300

301-
def bound_map_from_inst(
302-
self, insts: list, next_tokens: list, inst: Instruction, t: Token, i: int
301+
def bound_map_from_inst_35(
302+
self, insts: list, next_tokens: list, t: Token, i: int
303303
) -> Optional[list]:
304304
"""
305305
Try to a sequence of instruction that ends with a BUILD_MAP into
@@ -315,25 +315,19 @@ def bound_map_from_inst(
315315
if count < 5:
316316
return None
317317

318-
if self.version >= (3, 5):
319-
# Newer Python BUILD_MAP argument's count is a
320-
# key and value pair so it is multiplied by two.
321-
collection_start = i - (count * 2)
322-
assert (count * 2) <= i
323-
324-
for j in range(collection_start, i, 2):
325-
if insts[j].opname not in ("LOAD_CONST",):
326-
return None
327-
if insts[j + 1].opname not in ("LOAD_CONST",):
328-
return None
329-
330-
collection_start = i - (2 * count)
331-
collection_enum = CONST_COLLECTIONS.index("CONST_MAP")
332-
# else: Older Python count is sum of all key and value pairs
333-
# Each pair is added individually like:
334-
# LOAD_CONST ("Max-Age")
335-
# LOAD_CONST ("max-age")
336-
# STORE_MAP
318+
# Newer Python BUILD_MAP argument's count is a
319+
# key and value pair so it is multiplied by two.
320+
collection_start = i - (count * 2)
321+
assert (count * 2) <= i
322+
323+
for j in range(collection_start, i, 2):
324+
if insts[j].opname not in ("LOAD_CONST",):
325+
return None
326+
if insts[j + 1].opname not in ("LOAD_CONST",):
327+
return None
328+
329+
collection_start = i - (2 * count)
330+
collection_enum = CONST_COLLECTIONS.index("CONST_MAP")
337331

338332
# If we get here, all instructions before tokens[i] are LOAD_CONST and
339333
# we can replace add a boundary marker and change LOAD_CONST to
@@ -346,7 +340,7 @@ def bound_map_from_inst(
346340
attr=collection_enum,
347341
pattr="CONST_MAP",
348342
offset=f"{start_offset}_0",
349-
linestart=False,
343+
linestart=insts[collection_start].starts_line,
350344
has_arg=True,
351345
has_extended_arg=False,
352346
opc=self.opc,
@@ -364,6 +358,7 @@ def bound_map_from_inst(
364358
has_arg=True,
365359
has_extended_arg=False,
366360
opc=self.opc,
361+
optype="pseudo",
367362
)
368363
)
369364
new_tokens.append(
@@ -376,7 +371,7 @@ def bound_map_from_inst(
376371
has_arg=True,
377372
has_extended_arg=False,
378373
opc=self.opc,
379-
optype=insts[j + 1].optype,
374+
optype="pseudo",
380375
)
381376
)
382377
new_tokens.append(
@@ -389,7 +384,93 @@ def bound_map_from_inst(
389384
has_arg=t.has_arg,
390385
has_extended_arg=False,
391386
opc=t.opc,
392-
optype=t.optype,
387+
optype="pseudo",
388+
)
389+
)
390+
return new_tokens
391+
392+
def bound_map_from_inst_pre35(
393+
self, insts: list, next_tokens: list, t: Token, i: int
394+
):
395+
"""
396+
Try to a sequence of instruction that ends with a BUILD_MAP into
397+
a sequence that can be parsed much faster, but inserting the
398+
token boundary at the beginning of the sequence.
399+
"""
400+
count = t.attr
401+
assert isinstance(count, int)
402+
403+
# For small lists don't bother
404+
if count < 10:
405+
return None
406+
407+
# Older Python BUILD_MAP argument's count is a
408+
# key and value pair and STORE_MAP. So it is multiplied by three.
409+
collection_end = i + 1 + count * 3
410+
411+
for j in range(i + 1, collection_end, 3):
412+
if insts[j].opname not in ("LOAD_CONST",):
413+
return None
414+
if insts[j + 1].opname not in ("LOAD_CONST",):
415+
return None
416+
if insts[j + 2].opname not in ("STORE_MAP",):
417+
return None
418+
419+
collection_enum = CONST_COLLECTIONS.index("CONST_MAP")
420+
421+
new_tokens = next_tokens[:i]
422+
start_offset = insts[i].offset
423+
new_tokens.append(
424+
Token(
425+
opname="COLLECTION_START",
426+
attr=collection_enum,
427+
pattr="CONST_MAP",
428+
offset=f"{start_offset}_0",
429+
linestart=insts[i].starts_line,
430+
has_arg=True,
431+
has_extended_arg=False,
432+
opc=self.opc,
433+
optype="pseudo",
434+
)
435+
)
436+
for j in range(i + 1, collection_end, 3):
437+
new_tokens.append(
438+
Token(
439+
opname="ADD_KEY",
440+
attr=insts[j + 1].argval,
441+
pattr=insts[j + 1].argrepr,
442+
offset=insts[j + 1].offset,
443+
linestart=insts[j + 1].starts_line,
444+
has_arg=True,
445+
has_extended_arg=False,
446+
opc=self.opc,
447+
optype="pseudo",
448+
)
449+
)
450+
new_tokens.append(
451+
Token(
452+
opname="ADD_VALUE",
453+
attr=insts[j].argval,
454+
pattr=insts[j].argrepr,
455+
offset=insts[j].offset,
456+
linestart=insts[j].starts_line,
457+
has_arg=True,
458+
has_extended_arg=False,
459+
opc=self.opc,
460+
optype="pseudo",
461+
)
462+
)
463+
new_tokens.append(
464+
Token(
465+
opname="BUILD_DICT_OLDER",
466+
attr=t.attr,
467+
pattr=t.pattr,
468+
offset=t.offset,
469+
linestart=t.linestart,
470+
has_arg=t.has_arg,
471+
has_extended_arg=False,
472+
opc=t.opc,
473+
optype="pseudo",
393474
)
394475
)
395476
return new_tokens
@@ -497,8 +578,16 @@ def ingest(
497578

498579
last_op_was_break = False
499580
new_tokens = []
581+
skip_end_offset = None
500582

501583
for i, inst in enumerate(self.insts):
584+
# BUILD_MAP for < 3.5 can skip *forward* in instructions and
585+
# replace them. So we use the below to get up to the position
586+
# scanned and replaced forward
587+
if skip_end_offset and inst.offset <= skip_end_offset:
588+
continue
589+
skip_end_offset = None
590+
502591
opname = inst.opname
503592
argval = inst.argval
504593
pattr = inst.argrepr
@@ -532,17 +621,38 @@ def ingest(
532621
if try_tokens is not None:
533622
new_tokens = try_tokens
534623
continue
535-
elif opname in ("BUILD_MAP",) and self.version >= (3, 5):
536-
try_tokens = self.bound_map_from_inst(
624+
625+
elif opname in ("BUILD_MAP",):
626+
bound_map_from_insts_fn = (
627+
self.bound_map_from_inst_35
628+
if self.version >= (3, 5)
629+
else self.bound_map_from_inst_pre35
630+
)
631+
try_tokens = bound_map_from_insts_fn(
537632
self.insts,
538633
new_tokens,
539-
inst,
540634
t,
541635
i,
542636
)
543637
if try_tokens is not None:
544-
new_tokens = try_tokens
545-
continue
638+
if self.version < (3, 5):
639+
assert try_tokens[-1] == "BUILD_DICT_OLDER"
640+
prev_offset = inst.offset
641+
for j in range(i, len(self.insts)):
642+
if self.insts[j].opname == "STORE_NAME":
643+
new_tokens = try_tokens
644+
skip_end_offset = prev_offset
645+
# Set a hacky sentinal to indicate skipping to the
646+
# next instruction
647+
opname = "EXTENDED_ARG"
648+
break
649+
prev_offset = self.insts[j].offset
650+
pass
651+
pass
652+
else:
653+
new_tokens = try_tokens
654+
continue
655+
pass
546656

547657
argval = inst.argval
548658
op = inst.opcode

uncompyle6/scanners/scanner38.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2019-2022 by Rocky Bernstein
1+
# Copyright (c) 2019-2022, 2024 by Rocky Bernstein
22
#
33
# This program is free software: you can redistribute it and/or modify
44
# it under the terms of the GNU General Public License as published by

0 commit comments

Comments
 (0)