1- # (C) Copyright 2023-2024 by Rocky Bernstein
1+ # (C) Copyright 2023-2025 by Rocky Bernstein
22#
33# This program is free software; you can redistribute it and/or
44# modify it under the terms of the GNU General Public License
1616"""
1717Routines for formatting opcodes.
1818"""
19+
20+ import re
1921from typing import List , Optional , Tuple
2022
2123from xdis .instruction import Instruction
2224from xdis .opcodes .format .basic import format_IS_OP , format_RAISE_VARARGS_older
2325
26+ NULL_EXTENDED_OP = "" , None
27+
2428
2529def extended_format_binary_op (
2630 opc , instructions : List [Instruction ], fmt_str : str
@@ -56,7 +60,7 @@ def extended_format_binary_op(
5660 arg1_start_offset , instructions , 1
5761 )
5862 if i is None :
59- return "" , None
63+ return NULL_EXTENDED_OP
6064 j = skip_cache (instructions , i + 1 )
6165 stack_inst2 = instructions [j ]
6266 if (
@@ -75,7 +79,7 @@ def extended_format_binary_op(
7579 return fmt_str % (arg2 , arg1 ), start_offset
7680 else :
7781 return fmt_str % ("..." , arg1 ), None
78- return "" , None
82+ return NULL_EXTENDED_OP
7983
8084
8185def extended_format_infix_binary_op (
@@ -99,7 +103,7 @@ def extended_format_infix_binary_op(
99103 if arg1_start_offset is not None :
100104 i = get_instruction_index_from_offset (arg1_start_offset , instructions , 1 )
101105 if i is None :
102- return "" , None
106+ return NULL_EXTENDED_OP
103107 j = i + 1
104108 # 3.11+ has CACHE instructions
105109 while instructions [j ].opname == "CACHE" :
@@ -108,11 +112,7 @@ def extended_format_infix_binary_op(
108112 instructions [j ].opcode in opc .operator_set
109113 and instructions [i ].opcode in opc .operator_set
110114 ):
111- arg2 = (
112- instructions [j ].tos_str
113- if instructions [j ].tos_str is not None
114- else instructions [j ].argrepr
115- )
115+ arg2 = get_instruction_tos_str (instructions [j ])
116116 start_offset = instructions [j ].start_offset
117117 return f"{ arg2 } { op_str } { arg1 } " , start_offset
118118 elif instructions [j ].start_offset is not None :
@@ -129,7 +129,7 @@ def extended_format_infix_binary_op(
129129 return f"{ arg2 } { op_str } { arg1 } " , start_offset
130130 else :
131131 return f"...{ op_str } { arg1 } " , None
132- return "" , None
132+ return NULL_EXTENDED_OP
133133
134134
135135def extended_format_store_op (
@@ -142,7 +142,7 @@ def extended_format_store_op(
142142 # are more complicated, so let's not try to figure this out.
143143 # This kind of things is best left for a decompiler.
144144 if inst .is_jump_target :
145- return "" , None
145+ return NULL_EXTENDED_OP
146146
147147 prev_inst = instructions [1 ]
148148 start_offset = prev_inst .offset
@@ -198,7 +198,7 @@ def extended_format_ternary_op(
198198 if arg1_start_offset is not None :
199199 i = get_instruction_index_from_offset (arg1_start_offset , instructions , 1 )
200200 if i is None :
201- return "" , None
201+ return NULL_EXTENDED_OP
202202 j = skip_cache (instructions , i + 1 )
203203 stack_inst2 = instructions [j ]
204204 if (
@@ -229,7 +229,7 @@ def extended_format_ternary_op(
229229 return fmt_str % (arg2 , arg1 , arg3 ), start_offset
230230 else :
231231 return fmt_str % ("..." , "..." , "..." ), None
232- return "" , None
232+ return NULL_EXTENDED_OP
233233
234234
235235def extended_format_STORE_SUBSCR (
@@ -251,7 +251,7 @@ def extended_format_unary_op(
251251 return fmt_str % stack_arg .tos_str , start_offset
252252 if stack_arg .opcode in opc .operator_set :
253253 return fmt_str % stack_arg .argrepr , start_offset
254- return "" , None
254+ return NULL_EXTENDED_OP
255255
256256
257257def extended_format_ATTR (
@@ -265,13 +265,13 @@ def extended_format_ATTR(
265265 instr1 .tos_str
266266 or instr1 .opcode in opc .NAME_OPS | opc .CONST_OPS | opc .LOCAL_OPS | opc .FREE_OPS
267267 ):
268- base = get_instruction_arg (instr1 )
268+ base = get_instruction_tos_str (instr1 )
269269
270270 return (
271271 f"{ base } .{ instructions [0 ].argrepr } " ,
272272 instr1 .start_offset ,
273273 )
274- return "" , None
274+ return NULL_EXTENDED_OP
275275
276276
277277def extended_format_BINARY_ADD (
@@ -372,7 +372,7 @@ def extended_format_build_tuple_or_list(
372372 return f"{ left_delim } { args_str } ,{ right_delim } " , instructions [i ].start_offset
373373 else :
374374 return f"{ left_delim } { args_str } { right_delim } " , instructions [i ].start_offset
375- return "" , None
375+ return NULL_EXTENDED_OP
376376
377377
378378def extended_format_BUILD_CONST_KEY_MAP (opc , instructions ):
@@ -393,7 +393,7 @@ def extended_format_BUILD_CONST_KEY_MAP(opc, instructions):
393393 arg_pairs .append (f"{ key_values [i ]} : { arglist [i ]} " )
394394 args_str = ", " .join (arg_pairs )
395395 return "{" + args_str + "}" , instructions [i ].start_offset
396- return "" , None
396+ return NULL_EXTENDED_OP
397397
398398
399399def extended_format_BUILD_LIST (
@@ -426,7 +426,7 @@ def extended_format_BUILD_SLICE(
426426 if instructions [0 ].argval == 0 :
427427 # Degenerate case
428428 return "set()" , instructions [0 ].start_offset
429- return "" , None
429+ return NULL_EXTENDED_OP
430430
431431
432432def extended_format_BUILD_TUPLE (
@@ -448,6 +448,21 @@ def extended_format_COMPARE_OP(
448448 )
449449
450450
451+ def extended_format_DUP_TOP (
452+ opc , instructions : List [Instruction ]
453+ ) -> Tuple [str , Optional [int ]]:
454+ """Try to extract TOS value and show that surrounded in a "push() ".
455+ The trailing space at the used as a sentinal for `get_instruction_tos_str()`
456+ which tries to remove the push() part when the operand value string is needed.
457+ """
458+
459+ # We add a space at the end as a sentinal to use in get_instruction_tos_str()
460+ if instructions [1 ].optype not in ['jrel' , 'jabs' ]:
461+ return extended_format_unary_op (opc , instructions , "push(%s) " )
462+ else :
463+ return NULL_EXTENDED_OP
464+
465+
451466def extended_format_CALL_FUNCTION (opc , instructions ) -> Tuple [str , Optional [int ]]:
452467 """call_function_inst should be a "CALL_FUNCTION" instruction. Look in
453468 `instructions` to see if we can find a method name. If not we'll
@@ -464,11 +479,11 @@ def extended_format_CALL_FUNCTION(opc, instructions) -> Tuple[str, Optional[int]
464479 arglist , arg_count , i = get_arglist (instructions , 0 , arg_count )
465480
466481 if arglist is None :
467- return "" , None
482+ return NULL_EXTENDED_OP
468483
469484 assert i is not None
470485 if i >= len (instructions ) - 1 :
471- return "" , None
486+ return NULL_EXTENDED_OP
472487
473488 fn_inst = instructions [i + 1 ]
474489 if fn_inst .opcode in opc .operator_set :
@@ -480,7 +495,7 @@ def extended_format_CALL_FUNCTION(opc, instructions) -> Tuple[str, Optional[int]
480495 arglist .reverse ()
481496 s = f'{ fn_name } ({ ", " .join (arglist )} )'
482497 return s , start_offset
483- return "" , None
498+ return NULL_EXTENDED_OP
484499
485500
486501def extended_format_IMPORT_FROM (
@@ -493,7 +508,8 @@ def extended_format_IMPORT_FROM(
493508 instructions [i ].start_offset , instructions , 1
494509 )
495510 if i is None :
496- return "" , None
511+ return NULL_EXTENDED_OP
512+
497513 module_name = get_instruction_arg (instructions [i ])
498514 if module_name .startswith ("import_module(" ):
499515 module_name = module_name [len ("import_module(" ) : - 1 ]
@@ -647,7 +663,7 @@ def extended_format_CALL_METHOD(opc, instructions) -> Tuple[str, Optional[int]]:
647663 arglist , arg_count , first_arg = get_arglist (instructions , 0 , arg_count )
648664
649665 if first_arg is None or first_arg >= len (instructions ) - 1 :
650- return "" , None
666+ return NULL_EXTENDED_OP
651667
652668 fn_inst = instructions [first_arg + 1 ]
653669 if fn_inst .opcode in opc .operator_set and arglist is not None :
@@ -657,7 +673,8 @@ def extended_format_CALL_METHOD(opc, instructions) -> Tuple[str, Optional[int]]:
657673 arglist .reverse ()
658674 s = f'{ fn_name } ({ ", " .join (arglist )} )'
659675 return s , start_offset
660- return "" , None
676+ return NULL_EXTENDED_OP
677+
661678
662679
663680def extended_format_RAISE_VARARGS_older (
@@ -771,6 +788,18 @@ def get_instruction_arg(inst: Instruction, argval=None) -> str:
771788 return inst .tos_str if inst .tos_str is not None else argval
772789
773790
791+ def get_instruction_tos_str (inst : Instruction ) -> str :
792+ if inst .tos_str is not None :
793+ argval = inst .tos_str
794+ argval_without_push = re .match (r"^(?:push|copy)\((.+)\) " , argval )
795+ if argval_without_push :
796+ # remove surrounding "push(...) or copy(...)" string
797+ argval = argval_without_push .group (1 )
798+ else :
799+ argval = inst .argrepr
800+ return argval
801+
802+
774803def get_instruction_index_from_offset (
775804 target_offset : int , instructions : List [Instruction ], start_index : int = 1
776805) -> Optional [int ]:
@@ -858,6 +887,7 @@ def skip_cache(instructions: List[Instruction], i: int) -> int:
858887 "BUILD_TUPLE" : extended_format_BUILD_TUPLE ,
859888 "CALL_FUNCTION" : extended_format_CALL_FUNCTION ,
860889 "COMPARE_OP" : extended_format_COMPARE_OP ,
890+ "DUP_TOP" : extended_format_DUP_TOP ,
861891 "IMPORT_FROM" : extended_format_IMPORT_FROM ,
862892 "IMPORT_NAME" : extended_format_IMPORT_NAME ,
863893 "INPLACE_ADD" : extended_format_INPLACE_ADD ,
0 commit comments