@@ -216,7 +216,7 @@ def bound_collection_from_inst(
216
216
collection_type : str ,
217
217
) -> Optional [list ]:
218
218
"""
219
- Try to a replace sequence of instruction that ends with a
219
+ Try to replace a sequence of instruction that ends with a
220
220
BUILD_xxx with a sequence that can be parsed much faster, but
221
221
inserting the token boundary at the beginning of the sequence.
222
222
"""
@@ -298,8 +298,8 @@ def bound_collection_from_inst(
298
298
)
299
299
return new_tokens
300
300
301
- def bound_map_from_inst (
302
- self , insts : list , next_tokens : list , inst : Instruction , t : Token , i : int
301
+ def bound_map_from_inst_35 (
302
+ self , insts : list , next_tokens : list , t : Token , i : int
303
303
) -> Optional [list ]:
304
304
"""
305
305
Try to a sequence of instruction that ends with a BUILD_MAP into
@@ -315,25 +315,19 @@ def bound_map_from_inst(
315
315
if count < 5 :
316
316
return None
317
317
318
- if self .version >= (3 , 5 ):
319
- # Newer Python BUILD_MAP argument's count is a
320
- # key and value pair so it is multiplied by two.
321
- collection_start = i - (count * 2 )
322
- assert (count * 2 ) <= i
323
-
324
- for j in range (collection_start , i , 2 ):
325
- if insts [j ].opname not in ("LOAD_CONST" ,):
326
- return None
327
- if insts [j + 1 ].opname not in ("LOAD_CONST" ,):
328
- return None
329
-
330
- collection_start = i - (2 * count )
331
- collection_enum = CONST_COLLECTIONS .index ("CONST_MAP" )
332
- # else: Older Python count is sum of all key and value pairs
333
- # Each pair is added individually like:
334
- # LOAD_CONST ("Max-Age")
335
- # LOAD_CONST ("max-age")
336
- # STORE_MAP
318
+ # Newer Python BUILD_MAP argument's count is a
319
+ # key and value pair so it is multiplied by two.
320
+ collection_start = i - (count * 2 )
321
+ assert (count * 2 ) <= i
322
+
323
+ for j in range (collection_start , i , 2 ):
324
+ if insts [j ].opname not in ("LOAD_CONST" ,):
325
+ return None
326
+ if insts [j + 1 ].opname not in ("LOAD_CONST" ,):
327
+ return None
328
+
329
+ collection_start = i - (2 * count )
330
+ collection_enum = CONST_COLLECTIONS .index ("CONST_MAP" )
337
331
338
332
# If we get here, all instructions before tokens[i] are LOAD_CONST and
339
333
# we can replace add a boundary marker and change LOAD_CONST to
@@ -346,7 +340,7 @@ def bound_map_from_inst(
346
340
attr = collection_enum ,
347
341
pattr = "CONST_MAP" ,
348
342
offset = f"{ start_offset } _0" ,
349
- linestart = False ,
343
+ linestart = insts [ collection_start ]. starts_line ,
350
344
has_arg = True ,
351
345
has_extended_arg = False ,
352
346
opc = self .opc ,
@@ -364,6 +358,7 @@ def bound_map_from_inst(
364
358
has_arg = True ,
365
359
has_extended_arg = False ,
366
360
opc = self .opc ,
361
+ optype = "pseudo" ,
367
362
)
368
363
)
369
364
new_tokens .append (
@@ -376,7 +371,7 @@ def bound_map_from_inst(
376
371
has_arg = True ,
377
372
has_extended_arg = False ,
378
373
opc = self .opc ,
379
- optype = insts [ j + 1 ]. optype ,
374
+ optype = "pseudo" ,
380
375
)
381
376
)
382
377
new_tokens .append (
@@ -389,7 +384,93 @@ def bound_map_from_inst(
389
384
has_arg = t .has_arg ,
390
385
has_extended_arg = False ,
391
386
opc = t .opc ,
392
- optype = t .optype ,
387
+ optype = "pseudo" ,
388
+ )
389
+ )
390
+ return new_tokens
391
+
392
+ def bound_map_from_inst_pre35 (
393
+ self , insts : list , next_tokens : list , t : Token , i : int
394
+ ):
395
+ """
396
+ Try to a sequence of instruction that ends with a BUILD_MAP into
397
+ a sequence that can be parsed much faster, but inserting the
398
+ token boundary at the beginning of the sequence.
399
+ """
400
+ count = t .attr
401
+ assert isinstance (count , int )
402
+
403
+ # For small lists don't bother
404
+ if count < 10 :
405
+ return None
406
+
407
+ # Older Python BUILD_MAP argument's count is a
408
+ # key and value pair and STORE_MAP. So it is multiplied by three.
409
+ collection_end = i + 1 + count * 3
410
+
411
+ for j in range (i + 1 , collection_end , 3 ):
412
+ if insts [j ].opname not in ("LOAD_CONST" ,):
413
+ return None
414
+ if insts [j + 1 ].opname not in ("LOAD_CONST" ,):
415
+ return None
416
+ if insts [j + 2 ].opname not in ("STORE_MAP" ,):
417
+ return None
418
+
419
+ collection_enum = CONST_COLLECTIONS .index ("CONST_MAP" )
420
+
421
+ new_tokens = next_tokens [:i ]
422
+ start_offset = insts [i ].offset
423
+ new_tokens .append (
424
+ Token (
425
+ opname = "COLLECTION_START" ,
426
+ attr = collection_enum ,
427
+ pattr = "CONST_MAP" ,
428
+ offset = f"{ start_offset } _0" ,
429
+ linestart = insts [i ].starts_line ,
430
+ has_arg = True ,
431
+ has_extended_arg = False ,
432
+ opc = self .opc ,
433
+ optype = "pseudo" ,
434
+ )
435
+ )
436
+ for j in range (i + 1 , collection_end , 3 ):
437
+ new_tokens .append (
438
+ Token (
439
+ opname = "ADD_KEY" ,
440
+ attr = insts [j + 1 ].argval ,
441
+ pattr = insts [j + 1 ].argrepr ,
442
+ offset = insts [j + 1 ].offset ,
443
+ linestart = insts [j + 1 ].starts_line ,
444
+ has_arg = True ,
445
+ has_extended_arg = False ,
446
+ opc = self .opc ,
447
+ optype = "pseudo" ,
448
+ )
449
+ )
450
+ new_tokens .append (
451
+ Token (
452
+ opname = "ADD_VALUE" ,
453
+ attr = insts [j ].argval ,
454
+ pattr = insts [j ].argrepr ,
455
+ offset = insts [j ].offset ,
456
+ linestart = insts [j ].starts_line ,
457
+ has_arg = True ,
458
+ has_extended_arg = False ,
459
+ opc = self .opc ,
460
+ optype = "pseudo" ,
461
+ )
462
+ )
463
+ new_tokens .append (
464
+ Token (
465
+ opname = "BUILD_DICT_OLDER" ,
466
+ attr = t .attr ,
467
+ pattr = t .pattr ,
468
+ offset = t .offset ,
469
+ linestart = t .linestart ,
470
+ has_arg = t .has_arg ,
471
+ has_extended_arg = False ,
472
+ opc = t .opc ,
473
+ optype = "pseudo" ,
393
474
)
394
475
)
395
476
return new_tokens
@@ -497,8 +578,16 @@ def ingest(
497
578
498
579
last_op_was_break = False
499
580
new_tokens = []
581
+ skip_end_offset = None
500
582
501
583
for i , inst in enumerate (self .insts ):
584
+ # BUILD_MAP for < 3.5 can skip *forward* in instructions and
585
+ # replace them. So we use the below to get up to the position
586
+ # scanned and replaced forward
587
+ if skip_end_offset and inst .offset <= skip_end_offset :
588
+ continue
589
+ skip_end_offset = None
590
+
502
591
opname = inst .opname
503
592
argval = inst .argval
504
593
pattr = inst .argrepr
@@ -532,17 +621,38 @@ def ingest(
532
621
if try_tokens is not None :
533
622
new_tokens = try_tokens
534
623
continue
535
- elif opname in ("BUILD_MAP" ,) and self .version >= (3 , 5 ):
536
- try_tokens = self .bound_map_from_inst (
624
+
625
+ elif opname in ("BUILD_MAP" ,):
626
+ bound_map_from_insts_fn = (
627
+ self .bound_map_from_inst_35
628
+ if self .version >= (3 , 5 )
629
+ else self .bound_map_from_inst_pre35
630
+ )
631
+ try_tokens = bound_map_from_insts_fn (
537
632
self .insts ,
538
633
new_tokens ,
539
- inst ,
540
634
t ,
541
635
i ,
542
636
)
543
637
if try_tokens is not None :
544
- new_tokens = try_tokens
545
- continue
638
+ if self .version < (3 , 5 ):
639
+ assert try_tokens [- 1 ] == "BUILD_DICT_OLDER"
640
+ prev_offset = inst .offset
641
+ for j in range (i , len (self .insts )):
642
+ if self .insts [j ].opname == "STORE_NAME" :
643
+ new_tokens = try_tokens
644
+ skip_end_offset = prev_offset
645
+ # Set a hacky sentinal to indicate skipping to the
646
+ # next instruction
647
+ opname = "EXTENDED_ARG"
648
+ break
649
+ prev_offset = self .insts [j ].offset
650
+ pass
651
+ pass
652
+ else :
653
+ new_tokens = try_tokens
654
+ continue
655
+ pass
546
656
547
657
argval = inst .argval
548
658
op = inst .opcode
0 commit comments