Skip to content

Commit 79f745f

Browse files
authored
Merge pull request #155 from rocky/add-get_logical_instruction_at_offset
Add get_logical_instruction_at_offset()...
2 parents be3ec97 + b7d9ac6 commit 79f745f

File tree

2 files changed

+91
-38
lines changed

2 files changed

+91
-38
lines changed
803 Bytes
Binary file not shown.

xdis/bytecode.py

Lines changed: 91 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -223,8 +223,9 @@ def prefer_double_quote(string: str) -> str:
223223
return string
224224

225225

226-
def get_instructions_bytes(
226+
def get_logical_instruction_at_offset(
227227
bytecode,
228+
offset: int,
228229
opc,
229230
varnames=None,
230231
names=None,
@@ -233,16 +234,17 @@ def get_instructions_bytes(
233234
linestarts=None,
234235
line_offset=0,
235236
exception_entries=None,
237+
labels = None
236238
):
237-
"""Iterate over the instructions in a bytecode string.
238-
239-
Generates a sequence of Instruction namedtuples giving the details of each
240-
opcode. Additional information about the code's runtime environment
241-
e.g., variable names, constants, can be specified using optional
242-
arguments.
243-
244239
"""
245-
labels = opc.findlabels(bytecode, opc)
240+
Return a single logical instruction for `bytecode` at offset `offset`.
241+
if the opcode at offset is EXTENDED_ARG, then instructions are returned
242+
until we no longer have an EXTENDED_ARG instruction. Note that the
243+
last non-EXTENDED_ARG instruction will have its argument value adjusted
244+
to note the increased size of the argument.
245+
"""
246+
if labels is None:
247+
labels = opc.findlabels(bytecode, opc)
246248

247249
if exception_entries is not None:
248250
for start, end, target, _, _ in exception_entries:
@@ -256,40 +258,45 @@ def get_instructions_bytes(
256258
python_36 = True if opc.python_version >= (3, 6) else False
257259

258260
starts_line = None
259-
# enumerate() is not an option, since we sometimes process
260-
# multiple elements on a single pass through the loop
261+
261262
n = len(bytecode)
262-
i = 0
263+
263264
extended_arg_count = 0
264265
extended_arg = 0
266+
265267
if hasattr(opc, "EXTENDED_ARG"):
266268
extended_arg_size = instruction_size(opc.EXTENDED_ARG, opc)
267269
else:
268270
extended_arg_size = 0
269271

270-
while i < n:
272+
# This is not necessarily true initially, but it gets us through the
273+
# loop below.
274+
275+
last_op_was_extended_arg = True
276+
i = offset
277+
278+
while i < n and last_op_was_extended_arg:
271279
op = code2num(bytecode, i)
280+
opname = opc.opname[op]
281+
optype = get_optype(op, opc)
272282

273283
offset = i
274284
if linestarts is not None:
275285
starts_line = linestarts.get(i, None)
276286
if starts_line is not None:
277287
starts_line += line_offset
278-
if i in labels:
279-
is_jump_target = True
280-
else:
281-
is_jump_target = False
288+
289+
is_jump_target = i in labels
282290

283291
i += 1
284292
arg = None
285293
argval = None
286294
argrepr = ""
287295
has_arg = op_has_argument(op, opc)
288-
optype = get_optype(op, opc)
289296
if has_arg:
290297
if python_36:
291298
arg = code2num(bytecode, i) | extended_arg
292-
extended_arg = (arg << 8) if op == opc.EXTENDED_ARG else 0
299+
extended_arg = (arg << 8) if opname == "EXTENDED_ARG" else 0
293300
# FIXME: Python 3.6.0a1 is 2, for 3.6.a3 we have 1
294301
i += 1
295302
else:
@@ -301,7 +308,7 @@ def get_instructions_bytes(
301308
i += 2
302309
extended_arg = (
303310
arg * 0x10000
304-
if hasattr(opc, "EXTENDED_ARG") and op == opc.EXTENDED_ARG
311+
if opname == "EXTENDED_ARG"
305312
else 0
306313
)
307314

@@ -314,34 +321,34 @@ def get_instructions_bytes(
314321
if op in opc.CONST_OPS:
315322
argval, argrepr = _get_const_info(arg, constants)
316323
elif op in opc.NAME_OPS:
317-
if opc.version_tuple >= (3, 11) and opc.opname[op] == "LOAD_GLOBAL":
324+
if opc.version_tuple >= (3, 11) and opname == "LOAD_GLOBAL":
318325
argval, argrepr = _get_name_info(arg >> 1, names)
319326
if arg & 1:
320327
argrepr = "NULL + " + argrepr
321-
elif opc.version_tuple >= (3, 12) and opc.opname[op] == "LOAD_ATTR":
328+
elif opc.version_tuple >= (3, 12) and opname == "LOAD_ATTR":
322329
argval, argrepr = _get_name_info(arg >> 1, names)
323330
if arg & 1:
324331
argrepr = "NULL|self + " + argrepr
325332
elif (
326-
opc.version_tuple >= (3, 12) and opc.opname[op] == "LOAD_SUPER_ATTR"
333+
opc.version_tuple >= (3, 12) and opname == "LOAD_SUPER_ATTR"
327334
):
328335
argval, argrepr = _get_name_info(arg >> 2, names)
329336
if arg & 1:
330337
argrepr = "NULL|self + " + argrepr
331338
else:
332339
argval, argrepr = _get_name_info(arg, names)
333340
elif op in opc.JREL_OPS:
334-
signed_arg = -arg if "JUMP_BACKWARD" in opc.opname[op] else arg
341+
signed_arg = -arg if "JUMP_BACKWARD" in opname else arg
335342
argval = i + get_jump_val(signed_arg, opc.python_version)
336343
# FOR_ITER has a cache instruction in 3.12
337-
if opc.version_tuple >= (3, 12) and opc.opname[op] == "FOR_ITER":
344+
if opc.version_tuple >= (3, 12) and opname == "FOR_ITER":
338345
argval += 2
339346
argrepr = "to " + repr(argval)
340347
elif op in opc.JABS_OPS:
341348
argval = get_jump_val(arg, opc.python_version)
342349
argrepr = "to " + repr(argval)
343350
elif op in opc.LOCAL_OPS:
344-
if opc.version_tuple >= (3, 13) and opc.opname[op] in ("LOAD_FAST_LOAD_FAST", "STORE_FAST_LOAD_FAST", "STORE_FAST_STORE_FAST"):
351+
if opc.version_tuple >= (3, 13) and opname in ("LOAD_FAST_LOAD_FAST", "STORE_FAST_LOAD_FAST", "STORE_FAST_STORE_FAST"):
345352
arg1 = arg >> 4
346353
arg2 = arg & 15
347354
argval1, argrepr1 = _get_name_info(arg1, (varnames or tuple()) + (cells or tuple()))
@@ -371,7 +378,7 @@ def get_instructions_bytes(
371378
argval = (opc.cmp_op[arg])
372379
argrepr = argval
373380
elif op in opc.NARGS_OPS:
374-
opname = opc.opname[op]
381+
opname = opname
375382
if python_36 and opname in ("CALL_FUNCTION", "CALL_FUNCTION_EX"):
376383
if opname == "CALL_FUNCTION":
377384
argrepr = format_CALL_FUNCTION(code2num(bytecode, i - 1))
@@ -387,17 +394,15 @@ def get_instructions_bytes(
387394
code2num(bytecode, i - 2),
388395
code2num(bytecode, i - 1),
389396
)
390-
if hasattr(opc, "opcode_arg_fmt") and opc.opname[op] in opc.opcode_arg_fmt:
391-
argrepr = opc.opcode_arg_fmt[opc.opname[op]](arg)
397+
if hasattr(opc, "opcode_arg_fmt") and opname in opc.opcode_arg_fmt:
398+
argrepr = opc.opcode_arg_fmt[opname](arg)
392399
else:
393400
if python_36:
394401
i += 1
395-
if hasattr(opc, "opcode_arg_fmt") and opc.opname[op] in opc.opcode_arg_fmt:
396-
argrepr = opc.opcode_arg_fmt[opc.opname[op]](arg)
402+
if hasattr(opc, "opcode_arg_fmt") and opname in opc.opcode_arg_fmt:
403+
argrepr = opc.opcode_arg_fmt[opname](arg)
397404

398-
opname = opc.opname[op]
399405
inst_size = instruction_size(op, opc) + (extended_arg_count * extended_arg_size)
400-
# fallthrough = op not in opc.nofollow
401406
start_offset = offset if opc.oppop[op] == 0 else None
402407

403408
yield Instruction(
@@ -419,11 +424,9 @@ def get_instructions_bytes(
419424
start_offset=start_offset,
420425
)
421426
# fallthrough
422-
extended_arg_count = (
423-
extended_arg_count + 1
424-
if hasattr(opc, "EXTENDED_ARG") and op == opc.EXTENDED_ARG
425-
else 0
426-
)
427+
last_op_was_extended_arg = True if opname == "EXTENDED_ARG" else False
428+
extended_arg_count = extended_arg_count + 1 if last_op_was_extended_arg else 0
429+
# end loop
427430

428431

429432
def next_offset(op: int, opc, offset: int) -> int:
@@ -434,6 +437,56 @@ def next_offset(op: int, opc, offset: int) -> int:
434437
return offset + instruction_size(op, opc)
435438

436439

440+
def get_instructions_bytes(
441+
bytecode,
442+
opc,
443+
varnames=None,
444+
names=None,
445+
constants=None,
446+
cells=None,
447+
linestarts=None,
448+
line_offset=0,
449+
exception_entries=None,
450+
):
451+
"""
452+
Iterate over the instructions in a bytecode string.
453+
454+
Generates a sequence of Instruction namedtuples giving the details of each
455+
opcode. Additional information about the code's runtime environment
456+
e.g., variable names, constants, can be specified using optional
457+
arguments.
458+
"""
459+
labels = opc.findlabels(bytecode, opc)
460+
461+
if exception_entries is not None:
462+
for start, end, target, _, _ in exception_entries:
463+
for i in range(start, end):
464+
labels.append(target)
465+
466+
n = len(bytecode)
467+
offset = 0
468+
469+
while offset < n:
470+
instructions = list(get_logical_instruction_at_offset(
471+
bytecode,
472+
offset,
473+
opc,
474+
varnames=varnames,
475+
names=names,
476+
constants=constants,
477+
cells=cells,
478+
linestarts=linestarts,
479+
line_offset=0,
480+
exception_entries=exception_entries,
481+
)
482+
)
483+
484+
for instruction in instructions:
485+
yield instruction
486+
offset = next_offset(instruction.opcode, opc, instruction.offset)
487+
488+
489+
437490
class Bytecode:
438491
"""Bytecode operations involving a Python code object.
439492

0 commit comments

Comments
 (0)