@@ -223,8 +223,9 @@ def prefer_double_quote(string: str) -> str:
223223 return string
224224
225225
226- def get_instructions_bytes (
226+ def get_logical_instruction_at_offset (
227227 bytecode ,
228+ offset : int ,
228229 opc ,
229230 varnames = None ,
230231 names = None ,
@@ -233,16 +234,17 @@ def get_instructions_bytes(
233234 linestarts = None ,
234235 line_offset = 0 ,
235236 exception_entries = None ,
237+ labels = None
236238):
237- """Iterate over the instructions in a bytecode string.
238-
239- Generates a sequence of Instruction namedtuples giving the details of each
240- opcode. Additional information about the code's runtime environment
241- e.g., variable names, constants, can be specified using optional
242- arguments.
243-
244239 """
245- labels = opc .findlabels (bytecode , opc )
240+ Return a single logical instruction for `bytecode` at offset `offset`.
241+ if the opcode at offset is EXTENDED_ARG, then instructions are returned
242+ until we no longer have an EXTENDED_ARG instruction. Note that the
243+ last non-EXTENDED_ARG instruction will have its argument value adjusted
244+ to note the increased size of the argument.
245+ """
246+ if labels is None :
247+ labels = opc .findlabels (bytecode , opc )
246248
247249 if exception_entries is not None :
248250 for start , end , target , _ , _ in exception_entries :
@@ -256,40 +258,45 @@ def get_instructions_bytes(
256258 python_36 = True if opc .python_version >= (3 , 6 ) else False
257259
258260 starts_line = None
259- # enumerate() is not an option, since we sometimes process
260- # multiple elements on a single pass through the loop
261+
261262 n = len (bytecode )
262- i = 0
263+
263264 extended_arg_count = 0
264265 extended_arg = 0
266+
265267 if hasattr (opc , "EXTENDED_ARG" ):
266268 extended_arg_size = instruction_size (opc .EXTENDED_ARG , opc )
267269 else :
268270 extended_arg_size = 0
269271
270- while i < n :
272+ # This is not necessarily true initially, but it gets us through the
273+ # loop below.
274+
275+ last_op_was_extended_arg = True
276+ i = offset
277+
278+ while i < n and last_op_was_extended_arg :
271279 op = code2num (bytecode , i )
280+ opname = opc .opname [op ]
281+ optype = get_optype (op , opc )
272282
273283 offset = i
274284 if linestarts is not None :
275285 starts_line = linestarts .get (i , None )
276286 if starts_line is not None :
277287 starts_line += line_offset
278- if i in labels :
279- is_jump_target = True
280- else :
281- is_jump_target = False
288+
289+ is_jump_target = i in labels
282290
283291 i += 1
284292 arg = None
285293 argval = None
286294 argrepr = ""
287295 has_arg = op_has_argument (op , opc )
288- optype = get_optype (op , opc )
289296 if has_arg :
290297 if python_36 :
291298 arg = code2num (bytecode , i ) | extended_arg
292- extended_arg = (arg << 8 ) if op == opc . EXTENDED_ARG else 0
299+ extended_arg = (arg << 8 ) if opname == " EXTENDED_ARG" else 0
293300 # FIXME: Python 3.6.0a1 is 2, for 3.6.a3 we have 1
294301 i += 1
295302 else :
@@ -301,7 +308,7 @@ def get_instructions_bytes(
301308 i += 2
302309 extended_arg = (
303310 arg * 0x10000
304- if hasattr ( opc , "EXTENDED_ARG" ) and op == opc . EXTENDED_ARG
311+ if opname == " EXTENDED_ARG"
305312 else 0
306313 )
307314
@@ -314,34 +321,34 @@ def get_instructions_bytes(
314321 if op in opc .CONST_OPS :
315322 argval , argrepr = _get_const_info (arg , constants )
316323 elif op in opc .NAME_OPS :
317- if opc .version_tuple >= (3 , 11 ) and opc . opname [ op ] == "LOAD_GLOBAL" :
324+ if opc .version_tuple >= (3 , 11 ) and opname == "LOAD_GLOBAL" :
318325 argval , argrepr = _get_name_info (arg >> 1 , names )
319326 if arg & 1 :
320327 argrepr = "NULL + " + argrepr
321- elif opc .version_tuple >= (3 , 12 ) and opc . opname [ op ] == "LOAD_ATTR" :
328+ elif opc .version_tuple >= (3 , 12 ) and opname == "LOAD_ATTR" :
322329 argval , argrepr = _get_name_info (arg >> 1 , names )
323330 if arg & 1 :
324331 argrepr = "NULL|self + " + argrepr
325332 elif (
326- opc .version_tuple >= (3 , 12 ) and opc . opname [ op ] == "LOAD_SUPER_ATTR"
333+ opc .version_tuple >= (3 , 12 ) and opname == "LOAD_SUPER_ATTR"
327334 ):
328335 argval , argrepr = _get_name_info (arg >> 2 , names )
329336 if arg & 1 :
330337 argrepr = "NULL|self + " + argrepr
331338 else :
332339 argval , argrepr = _get_name_info (arg , names )
333340 elif op in opc .JREL_OPS :
334- signed_arg = - arg if "JUMP_BACKWARD" in opc . opname [ op ] else arg
341+ signed_arg = - arg if "JUMP_BACKWARD" in opname else arg
335342 argval = i + get_jump_val (signed_arg , opc .python_version )
336343 # FOR_ITER has a cache instruction in 3.12
337- if opc .version_tuple >= (3 , 12 ) and opc . opname [ op ] == "FOR_ITER" :
344+ if opc .version_tuple >= (3 , 12 ) and opname == "FOR_ITER" :
338345 argval += 2
339346 argrepr = "to " + repr (argval )
340347 elif op in opc .JABS_OPS :
341348 argval = get_jump_val (arg , opc .python_version )
342349 argrepr = "to " + repr (argval )
343350 elif op in opc .LOCAL_OPS :
344- if opc .version_tuple >= (3 , 13 ) and opc . opname [ op ] in ("LOAD_FAST_LOAD_FAST" , "STORE_FAST_LOAD_FAST" , "STORE_FAST_STORE_FAST" ):
351+ if opc .version_tuple >= (3 , 13 ) and opname in ("LOAD_FAST_LOAD_FAST" , "STORE_FAST_LOAD_FAST" , "STORE_FAST_STORE_FAST" ):
345352 arg1 = arg >> 4
346353 arg2 = arg & 15
347354 argval1 , argrepr1 = _get_name_info (arg1 , (varnames or tuple ()) + (cells or tuple ()))
@@ -371,7 +378,7 @@ def get_instructions_bytes(
371378 argval = (opc .cmp_op [arg ])
372379 argrepr = argval
373380 elif op in opc .NARGS_OPS :
374- opname = opc . opname [ op ]
381+ opname = opname
375382 if python_36 and opname in ("CALL_FUNCTION" , "CALL_FUNCTION_EX" ):
376383 if opname == "CALL_FUNCTION" :
377384 argrepr = format_CALL_FUNCTION (code2num (bytecode , i - 1 ))
@@ -387,17 +394,15 @@ def get_instructions_bytes(
387394 code2num (bytecode , i - 2 ),
388395 code2num (bytecode , i - 1 ),
389396 )
390- if hasattr (opc , "opcode_arg_fmt" ) and opc . opname [ op ] in opc .opcode_arg_fmt :
391- argrepr = opc .opcode_arg_fmt [opc . opname [ op ] ](arg )
397+ if hasattr (opc , "opcode_arg_fmt" ) and opname in opc .opcode_arg_fmt :
398+ argrepr = opc .opcode_arg_fmt [opname ](arg )
392399 else :
393400 if python_36 :
394401 i += 1
395- if hasattr (opc , "opcode_arg_fmt" ) and opc . opname [ op ] in opc .opcode_arg_fmt :
396- argrepr = opc .opcode_arg_fmt [opc . opname [ op ] ](arg )
402+ if hasattr (opc , "opcode_arg_fmt" ) and opname in opc .opcode_arg_fmt :
403+ argrepr = opc .opcode_arg_fmt [opname ](arg )
397404
398- opname = opc .opname [op ]
399405 inst_size = instruction_size (op , opc ) + (extended_arg_count * extended_arg_size )
400- # fallthrough = op not in opc.nofollow
401406 start_offset = offset if opc .oppop [op ] == 0 else None
402407
403408 yield Instruction (
@@ -419,11 +424,9 @@ def get_instructions_bytes(
419424 start_offset = start_offset ,
420425 )
421426 # fallthrough
422- extended_arg_count = (
423- extended_arg_count + 1
424- if hasattr (opc , "EXTENDED_ARG" ) and op == opc .EXTENDED_ARG
425- else 0
426- )
427+ last_op_was_extended_arg = True if opname == "EXTENDED_ARG" else False
428+ extended_arg_count = extended_arg_count + 1 if last_op_was_extended_arg else 0
429+ # end loop
427430
428431
429432def next_offset (op : int , opc , offset : int ) -> int :
@@ -434,6 +437,56 @@ def next_offset(op: int, opc, offset: int) -> int:
434437 return offset + instruction_size (op , opc )
435438
436439
440+ def get_instructions_bytes (
441+ bytecode ,
442+ opc ,
443+ varnames = None ,
444+ names = None ,
445+ constants = None ,
446+ cells = None ,
447+ linestarts = None ,
448+ line_offset = 0 ,
449+ exception_entries = None ,
450+ ):
451+ """
452+ Iterate over the instructions in a bytecode string.
453+
454+ Generates a sequence of Instruction namedtuples giving the details of each
455+ opcode. Additional information about the code's runtime environment
456+ e.g., variable names, constants, can be specified using optional
457+ arguments.
458+ """
459+ labels = opc .findlabels (bytecode , opc )
460+
461+ if exception_entries is not None :
462+ for start , end , target , _ , _ in exception_entries :
463+ for i in range (start , end ):
464+ labels .append (target )
465+
466+ n = len (bytecode )
467+ offset = 0
468+
469+ while offset < n :
470+ instructions = list (get_logical_instruction_at_offset (
471+ bytecode ,
472+ offset ,
473+ opc ,
474+ varnames = varnames ,
475+ names = names ,
476+ constants = constants ,
477+ cells = cells ,
478+ linestarts = linestarts ,
479+ line_offset = 0 ,
480+ exception_entries = exception_entries ,
481+ )
482+ )
483+
484+ for instruction in instructions :
485+ yield instruction
486+ offset = next_offset (instruction .opcode , opc , instruction .offset )
487+
488+
489+
437490class Bytecode :
438491 """Bytecode operations involving a Python code object.
439492
0 commit comments