Skip to content

Commit 9db5641

Browse files
authored
Merge pull request #162 from rocky/format-stack-opcodes
Start formatting stack manip opcodes...
2 parents f53adb3 + adde8c9 commit 9db5641

File tree

3 files changed

+127
-30
lines changed

3 files changed

+127
-30
lines changed

xdis/opcodes/format/basic.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# (C) Copyright 2023 by Rocky Bernstein
1+
# (C) Copyright 2023, 2025 by Rocky Bernstein
22
#
33
# This program is free software; you can redistribute it and/or
44
# modify it under the terms of the GNU General Public License
@@ -17,7 +17,6 @@
1717
Routines for formatting opcodes.
1818
"""
1919

20-
2120
def format_extended_arg(arg):
2221
return str(arg * (1 << 16))
2322

@@ -47,7 +46,7 @@ def format_MAKE_FUNCTION_10_27(argc: int) -> str:
4746

4847

4948
# Up until 3.7
50-
def format_RAISE_VARARGS_older(argc):
49+
def format_RAISE_VARARGS_older(argc) -> str:
5150
assert 0 <= argc <= 3
5251
if argc == 0:
5352
return "reraise"
@@ -57,6 +56,20 @@ def format_RAISE_VARARGS_older(argc):
5756
return "exception, parameter"
5857
elif argc == 3:
5958
return "exception, parameter, traceback"
59+
return ""
60+
61+
def format_ROT_FOUR(_: int) -> str:
62+
return "TOS, TOS1, TOS2, TOS3 = TOS1, TOS2, TOS3, TOS"
63+
64+
65+
def format_ROT_THREE(_: int) -> str:
66+
return "TOS, TOS1, TOS2 = TOS1, TOS2, TOS"
67+
68+
69+
def format_ROT_TWO(_: int) -> str:
70+
# We add a space at the end as a sentinal to use in get_instruction_tos_str()
71+
return "TOS, TOS1 = TOS1, TOS"
72+
6073

6174

6275
opcode_arg_fmt_base = opcode_arg_fmt34 = {
@@ -65,4 +78,7 @@ def format_RAISE_VARARGS_older(argc):
6578
"CALL_FUNCTION_VAR_KW": format_CALL_FUNCTION_pos_name_encoded,
6679
"EXTENDED_ARG": format_extended_arg,
6780
"RAISE_VARARGS": format_RAISE_VARARGS_older,
81+
"ROT_FOUR": format_ROT_FOUR,
82+
"ROT_THREE": format_ROT_THREE,
83+
"ROT_TWO": format_ROT_TWO,
6884
}

xdis/opcodes/format/extended.py

Lines changed: 55 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# (C) Copyright 2023-2024 by Rocky Bernstein
1+
# (C) Copyright 2023-2025 by Rocky Bernstein
22
#
33
# This program is free software; you can redistribute it and/or
44
# modify it under the terms of the GNU General Public License
@@ -16,11 +16,15 @@
1616
"""
1717
Routines for formatting opcodes.
1818
"""
19+
20+
import re
1921
from typing import List, Optional, Tuple
2022

2123
from xdis.instruction import Instruction
2224
from xdis.opcodes.format.basic import format_IS_OP, format_RAISE_VARARGS_older
2325

26+
NULL_EXTENDED_OP = "", None
27+
2428

2529
def extended_format_binary_op(
2630
opc, instructions: List[Instruction], fmt_str: str
@@ -56,7 +60,7 @@ def extended_format_binary_op(
5660
arg1_start_offset, instructions, 1
5761
)
5862
if i is None:
59-
return "", None
63+
return NULL_EXTENDED_OP
6064
j = skip_cache(instructions, i + 1)
6165
stack_inst2 = instructions[j]
6266
if (
@@ -75,7 +79,7 @@ def extended_format_binary_op(
7579
return fmt_str % (arg2, arg1), start_offset
7680
else:
7781
return fmt_str % ("...", arg1), None
78-
return "", None
82+
return NULL_EXTENDED_OP
7983

8084

8185
def extended_format_infix_binary_op(
@@ -99,7 +103,7 @@ def extended_format_infix_binary_op(
99103
if arg1_start_offset is not None:
100104
i = get_instruction_index_from_offset(arg1_start_offset, instructions, 1)
101105
if i is None:
102-
return "", None
106+
return NULL_EXTENDED_OP
103107
j = i + 1
104108
# 3.11+ has CACHE instructions
105109
while instructions[j].opname == "CACHE":
@@ -108,11 +112,7 @@ def extended_format_infix_binary_op(
108112
instructions[j].opcode in opc.operator_set
109113
and instructions[i].opcode in opc.operator_set
110114
):
111-
arg2 = (
112-
instructions[j].tos_str
113-
if instructions[j].tos_str is not None
114-
else instructions[j].argrepr
115-
)
115+
arg2 = get_instruction_tos_str(instructions[j])
116116
start_offset = instructions[j].start_offset
117117
return f"{arg2}{op_str}{arg1}", start_offset
118118
elif instructions[j].start_offset is not None:
@@ -129,7 +129,7 @@ def extended_format_infix_binary_op(
129129
return f"{arg2}{op_str}{arg1}", start_offset
130130
else:
131131
return f"...{op_str}{arg1}", None
132-
return "", None
132+
return NULL_EXTENDED_OP
133133

134134

135135
def extended_format_store_op(
@@ -142,7 +142,7 @@ def extended_format_store_op(
142142
# are more complicated, so let's not try to figure this out.
143143
# This kind of things is best left for a decompiler.
144144
if inst.is_jump_target:
145-
return "", None
145+
return NULL_EXTENDED_OP
146146

147147
prev_inst = instructions[1]
148148
start_offset = prev_inst.offset
@@ -198,7 +198,7 @@ def extended_format_ternary_op(
198198
if arg1_start_offset is not None:
199199
i = get_instruction_index_from_offset(arg1_start_offset, instructions, 1)
200200
if i is None:
201-
return "", None
201+
return NULL_EXTENDED_OP
202202
j = skip_cache(instructions, i + 1)
203203
stack_inst2 = instructions[j]
204204
if (
@@ -229,7 +229,7 @@ def extended_format_ternary_op(
229229
return fmt_str % (arg2, arg1, arg3), start_offset
230230
else:
231231
return fmt_str % ("...", "...", "..."), None
232-
return "", None
232+
return NULL_EXTENDED_OP
233233

234234

235235
def extended_format_STORE_SUBSCR(
@@ -251,7 +251,7 @@ def extended_format_unary_op(
251251
return fmt_str % stack_arg.tos_str, start_offset
252252
if stack_arg.opcode in opc.operator_set:
253253
return fmt_str % stack_arg.argrepr, start_offset
254-
return "", None
254+
return NULL_EXTENDED_OP
255255

256256

257257
def extended_format_ATTR(
@@ -265,13 +265,13 @@ def extended_format_ATTR(
265265
instr1.tos_str
266266
or instr1.opcode in opc.NAME_OPS | opc.CONST_OPS | opc.LOCAL_OPS | opc.FREE_OPS
267267
):
268-
base = get_instruction_arg(instr1)
268+
base = get_instruction_tos_str(instr1)
269269

270270
return (
271271
f"{base}.{instructions[0].argrepr}",
272272
instr1.start_offset,
273273
)
274-
return "", None
274+
return NULL_EXTENDED_OP
275275

276276

277277
def extended_format_BINARY_ADD(
@@ -372,7 +372,7 @@ def extended_format_build_tuple_or_list(
372372
return f"{left_delim}{args_str},{right_delim}", instructions[i].start_offset
373373
else:
374374
return f"{left_delim}{args_str}{right_delim}", instructions[i].start_offset
375-
return "", None
375+
return NULL_EXTENDED_OP
376376

377377

378378
def extended_format_BUILD_CONST_KEY_MAP(opc, instructions):
@@ -393,7 +393,7 @@ def extended_format_BUILD_CONST_KEY_MAP(opc, instructions):
393393
arg_pairs.append(f"{key_values[i]}: {arglist[i]}")
394394
args_str = ", ".join(arg_pairs)
395395
return "{" + args_str + "}", instructions[i].start_offset
396-
return "", None
396+
return NULL_EXTENDED_OP
397397

398398

399399
def extended_format_BUILD_LIST(
@@ -426,7 +426,7 @@ def extended_format_BUILD_SLICE(
426426
if instructions[0].argval == 0:
427427
# Degenerate case
428428
return "set()", instructions[0].start_offset
429-
return "", None
429+
return NULL_EXTENDED_OP
430430

431431

432432
def extended_format_BUILD_TUPLE(
@@ -448,6 +448,21 @@ def extended_format_COMPARE_OP(
448448
)
449449

450450

451+
def extended_format_DUP_TOP(
452+
opc, instructions: List[Instruction]
453+
) -> Tuple[str, Optional[int]]:
454+
"""Try to extract TOS value and show that surrounded in a "push() ".
455+
The trailing space at the used as a sentinal for `get_instruction_tos_str()`
456+
which tries to remove the push() part when the operand value string is needed.
457+
"""
458+
459+
# We add a space at the end as a sentinal to use in get_instruction_tos_str()
460+
if instructions[1].optype not in ['jrel', 'jabs']:
461+
return extended_format_unary_op(opc, instructions, "push(%s) ")
462+
else:
463+
return NULL_EXTENDED_OP
464+
465+
451466
def extended_format_CALL_FUNCTION(opc, instructions) -> Tuple[str, Optional[int]]:
452467
"""call_function_inst should be a "CALL_FUNCTION" instruction. Look in
453468
`instructions` to see if we can find a method name. If not we'll
@@ -464,11 +479,11 @@ def extended_format_CALL_FUNCTION(opc, instructions) -> Tuple[str, Optional[int]
464479
arglist, arg_count, i = get_arglist(instructions, 0, arg_count)
465480

466481
if arglist is None:
467-
return "", None
482+
return NULL_EXTENDED_OP
468483

469484
assert i is not None
470485
if i >= len(instructions) - 1:
471-
return "", None
486+
return NULL_EXTENDED_OP
472487

473488
fn_inst = instructions[i + 1]
474489
if fn_inst.opcode in opc.operator_set:
@@ -480,7 +495,7 @@ def extended_format_CALL_FUNCTION(opc, instructions) -> Tuple[str, Optional[int]
480495
arglist.reverse()
481496
s = f'{fn_name}({", ".join(arglist)})'
482497
return s, start_offset
483-
return "", None
498+
return NULL_EXTENDED_OP
484499

485500

486501
def extended_format_IMPORT_FROM(
@@ -493,7 +508,8 @@ def extended_format_IMPORT_FROM(
493508
instructions[i].start_offset, instructions, 1
494509
)
495510
if i is None:
496-
return "", None
511+
return NULL_EXTENDED_OP
512+
497513
module_name = get_instruction_arg(instructions[i])
498514
if module_name.startswith("import_module("):
499515
module_name = module_name[len("import_module(") : -1]
@@ -647,7 +663,7 @@ def extended_format_CALL_METHOD(opc, instructions) -> Tuple[str, Optional[int]]:
647663
arglist, arg_count, first_arg = get_arglist(instructions, 0, arg_count)
648664

649665
if first_arg is None or first_arg >= len(instructions) - 1:
650-
return "", None
666+
return NULL_EXTENDED_OP
651667

652668
fn_inst = instructions[first_arg + 1]
653669
if fn_inst.opcode in opc.operator_set and arglist is not None:
@@ -657,7 +673,8 @@ def extended_format_CALL_METHOD(opc, instructions) -> Tuple[str, Optional[int]]:
657673
arglist.reverse()
658674
s = f'{fn_name}({", ".join(arglist)})'
659675
return s, start_offset
660-
return "", None
676+
return NULL_EXTENDED_OP
677+
661678

662679

663680
def extended_format_RAISE_VARARGS_older(
@@ -771,6 +788,18 @@ def get_instruction_arg(inst: Instruction, argval=None) -> str:
771788
return inst.tos_str if inst.tos_str is not None else argval
772789

773790

791+
def get_instruction_tos_str(inst: Instruction) -> str:
792+
if inst.tos_str is not None:
793+
argval = inst.tos_str
794+
argval_without_push = re.match(r"^(?:push|copy)\((.+)\) ", argval)
795+
if argval_without_push:
796+
# remove surrounding "push(...) or copy(...)" string
797+
argval = argval_without_push.group(1)
798+
else:
799+
argval = inst.argrepr
800+
return argval
801+
802+
774803
def get_instruction_index_from_offset(
775804
target_offset: int, instructions: List[Instruction], start_index: int = 1
776805
) -> Optional[int]:
@@ -858,6 +887,7 @@ def skip_cache(instructions: List[Instruction], i: int) -> int:
858887
"BUILD_TUPLE": extended_format_BUILD_TUPLE,
859888
"CALL_FUNCTION": extended_format_CALL_FUNCTION,
860889
"COMPARE_OP": extended_format_COMPARE_OP,
890+
"DUP_TOP": extended_format_DUP_TOP,
861891
"IMPORT_FROM": extended_format_IMPORT_FROM,
862892
"IMPORT_NAME": extended_format_IMPORT_NAME,
863893
"INPLACE_ADD": extended_format_INPLACE_ADD,

xdis/opcodes/opcode_311.py

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from typing import Dict, List, Optional, Tuple
2525

2626
import xdis.opcodes.opcode_310 as opcode_310
27+
from xdis.instruction import Instruction
2728
from xdis.opcodes.base import (
2829
binary_op,
2930
def_op,
@@ -35,7 +36,11 @@
3536
store_op,
3637
update_pj3,
3738
)
38-
from xdis.opcodes.format.extended import extended_format_binary_op
39+
from xdis.opcodes.format.extended import (
40+
NULL_EXTENDED_OP,
41+
extended_format_binary_op,
42+
extended_format_unary_op,
43+
)
3944
from xdis.opcodes.opcode_310 import opcode_arg_fmt310, opcode_extended_fmt310
4045

4146
version_tuple = (3, 11)
@@ -244,10 +249,54 @@ def extended_format_BINARY_OP(opc, instructions) -> Tuple[str, Optional[int]]:
244249
return extended_format_binary_op(opc, instructions, f"%s {opname} %s")
245250

246251

247-
def format_BINARY_OP(arg) -> str:
252+
def extended_format_COPY_OP(
253+
opc, instructions: List[Instruction]
254+
) -> Tuple[str, Optional[int]]:
255+
"""Try to extract TOS value and show that surrounded in a "push() ".
256+
The trailing space at the used as a sentinal for `get_instruction_tos_str()`
257+
which tries to remove the push() part when the operand value string is needed.
258+
"""
259+
260+
# We add a space at the end as a sentinal to use in get_instruction_tos_str()
261+
if instructions[1].optype not in ["jrel", "jabs"]:
262+
return extended_format_unary_op(opc, instructions, "copy(%s) ")
263+
else:
264+
return NULL_EXTENDED_OP
265+
266+
267+
def extended_format_SWAP(
268+
opc, instructions: List[Instruction]
269+
) -> Tuple[str, Optional[int]]:
270+
"""call_function_inst should be a "SWAP" instruction. See if
271+
`we can find the two instructions to be swapped. If not we'll
272+
return None.
273+
274+
"""
275+
# From opcode description: argc indicates the total number of
276+
# positional and keyword arguments. Sometimes the function name
277+
# is in the stack arg positions back.
278+
# From opcode description: arg_count indicates the total number of
279+
# positional and keyword arguments.
280+
281+
swap_instr = instructions[0]
282+
i = swap_instr.argval
283+
# s = ""
284+
285+
if i is None or not (0 < i < len(instructions)):
286+
return "", None
287+
288+
# To be continued
289+
return "", None
290+
291+
292+
def format_BINARY_OP(arg: int) -> str:
248293
return _nb_ops[arg][1]
249294

250295

296+
def format_SWAP_OP(arg: int) -> str:
297+
return f"TOS <-> TOS{arg-1}"
298+
299+
251300
opcode_arg_fmt311 = opcode_arg_fmt310.copy()
252301
del opcode_arg_fmt311["CALL_FUNCTION"]
253302
del opcode_arg_fmt311["CALL_FUNCTION_KW"]
@@ -257,13 +306,15 @@ def format_BINARY_OP(arg) -> str:
257306
**opcode_arg_fmt310,
258307
**{
259308
"BINARY_OP": format_BINARY_OP,
309+
"SWAP": format_SWAP_OP,
260310
},
261311
}
262312

263313
opcode_extended_fmt = opcode_extended_fmt311 = {
264314
**opcode_extended_fmt310,
265315
**{
266316
"BINARY_OP": extended_format_BINARY_OP,
317+
"COPY": extended_format_COPY_OP,
267318
},
268319
}
269320

0 commit comments

Comments
 (0)