Skip to content

Commit b53db67

Browse files
authored
Merge pull request #169 from rocky/add-show-file-offset-option
Add option -x --show-file-offsets
2 parents d490124 + f7a9ae3 commit b53db67

File tree

8 files changed

+135
-35
lines changed

8 files changed

+135
-35
lines changed

pytest/test_load_file.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,13 @@ def test_load_file() -> None:
2323
obj_path = check_object_path(load_py)
2424
(
2525
version_tuple,
26-
timestamp,
27-
magic_int,
26+
_timestamp,
27+
_magic_int,
2828
co_module,
2929
pypy,
3030
source_size,
3131
sip_hash,
32+
_file_offsets,
3233
) = load_module(obj_path)
3334
if (3, 3) <= version_tuple <= (3, 7):
3435
statinfo = os.stat(load_py)

xdis/bin/pydisasm.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,19 +43,25 @@
4343
metavar="FUNCTION-OR-METHOD",
4444
multiple=True,
4545
type=str,
46-
help=("Specify which specific methods or functions to show. "
47-
"If omitted all, functions are shown. "
48-
"Can be given multiple times.")
46+
help=(
47+
"Specify which specific methods or functions to show. "
48+
"If omitted all, functions are shown. "
49+
"Can be given multiple times."
50+
),
4951
)
50-
5152
@click.option(
5253
"--show-source/--no-show-source",
5354
"-S",
5455
help="Intersperse Python source text from linecache if available.",
5556
)
57+
@click.option(
58+
"--show-file-offsets/--no-show-file_offsets",
59+
"-x",
60+
help="Show bytecode file hex addresses for the start of each code object.",
61+
)
5662
@click.version_option(version=__version__)
5763
@click.argument("files", nargs=-1, type=click.Path(readable=True), required=True)
58-
def main(format: list[str], method: tuple, show_source: bool, files):
64+
def main(format: list[str], method: tuple, show_source: bool, show_file_offsets, files):
5965
"""Disassembles a Python bytecode file.
6066
6167
We handle bytecode for virtually every release of Python and some releases of PyPy.
@@ -91,7 +97,14 @@ def main(format: list[str], method: tuple, show_source: bool, files):
9197
continue
9298

9399
try:
94-
disassemble_file(path, sys.stdout, format, show_source=show_source, methods=method)
100+
disassemble_file(
101+
path,
102+
sys.stdout,
103+
format,
104+
show_source=show_source,
105+
methods=method,
106+
save_file_offsets=show_file_offsets,
107+
)
95108
except (ImportError, NotImplementedError, ValueError) as e:
96109
print(e)
97110
rc = 3

xdis/bytecode.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -497,7 +497,7 @@ def get_instructions_bytes(
497497
linestarts=linestarts,
498498
line_offset=0,
499499
exception_entries=exception_entries,
500-
labels=labels
500+
labels=labels,
501501
)
502502
)
503503

@@ -515,7 +515,9 @@ class Bytecode:
515515
Iterating over these yields the bytecode operations as Instruction instances.
516516
"""
517517

518-
def __init__(self, x, opc, first_line=None, current_offset=None, dup_lines: bool=True) -> None:
518+
def __init__(
519+
self, x, opc, first_line=None, current_offset=None, dup_lines: bool = True
520+
) -> None:
519521
self.codeobj = co = get_code_object(x)
520522
self._line_offset = 0
521523
self._cell_names = ()
@@ -536,7 +538,11 @@ def __init__(self, x, opc, first_line=None, current_offset=None, dup_lines: bool
536538
self.opnames = opc.opname
537539
self.current_offset = current_offset
538540

539-
if opc.version_tuple >= (3, 11) and not opc.is_pypy and hasattr(co, "co_exceptiontable"):
541+
if (
542+
opc.version_tuple >= (3, 11)
543+
and not opc.is_pypy
544+
and hasattr(co, "co_exceptiontable")
545+
):
540546
self.exception_entries = parse_exception_table(co.co_exceptiontable)
541547
else:
542548
self.exception_entries = None
@@ -573,7 +579,11 @@ def info(self) -> str:
573579
"""Return formatted information about the code object."""
574580
return format_code_info(self.codeobj, self.opc.version_tuple)
575581

576-
def dis(self, asm_format: str="classic", show_source: bool=False) -> str:
582+
def dis(
583+
self,
584+
asm_format: str = "classic",
585+
show_source: bool = False,
586+
) -> str:
577587
"""Return a formatted view of the bytecode operations."""
578588
co = self.codeobj
579589
filename = co.co_filename
@@ -839,7 +849,9 @@ def get_instructions(self, x, first_line=None):
839849
)
840850

841851

842-
def list2bytecode(inst_list: Iterable, opc, varnames: str, consts: Tuple[None, int]) -> bytes:
852+
def list2bytecode(
853+
inst_list: Iterable, opc, varnames: str, consts: Tuple[None, int]
854+
) -> bytes:
843855
"""Convert list/tuple of list/tuples to bytecode
844856
_names_ contains a list of name objects
845857
"""

xdis/cross_dis.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
# earlier versions of xdis (and without attribution).
2020

2121
from types import CodeType
22-
from typing import List
22+
from typing import List, Optional
2323

2424
from xdis.util import (
2525
COMPILER_FLAG_NAMES,
@@ -272,7 +272,7 @@ def pretty_flags(flags, is_pypy=False) -> str:
272272

273273

274274
def format_code_info(
275-
co, version_tuple: tuple, name=None, is_pypy=False, is_graal=False
275+
co, version_tuple: tuple, name=None, is_pypy=False, is_graal=False, file_offset: Optional[int]=None
276276
) -> str:
277277
if not name:
278278
name = co.co_name
@@ -285,6 +285,9 @@ def format_code_info(
285285
# Later versions use "<module>"
286286
lines.append("# Filename: %s" % co.co_filename)
287287

288+
if file_offset:
289+
lines.append("# Offset in file: 0x%x" % file_offset)
290+
288291
if not is_graal:
289292
if version_tuple >= (1, 3):
290293
lines.append("# Argument count: %s" % co.co_argcount)

xdis/disasm.py

Lines changed: 46 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
import sys
3030
import types
3131
from collections import deque
32-
from typing import Tuple
32+
from typing import Optional, Tuple
3333

3434
import xdis
3535
from xdis.bytecode import Bytecode
@@ -72,6 +72,7 @@ def show_module_header(
7272
header=True,
7373
show_filename=True,
7474
is_graal=False,
75+
file_offset: Optional[int] = None,
7576
) -> None:
7677
bytecode_version = ".".join((str(i) for i in version_tuple))
7778
real_out = out or sys.stdout
@@ -121,22 +122,25 @@ def show_module_header(
121122
real_out.write("# SipHash: 0x%x\n" % sip_hash)
122123
if show_filename:
123124
real_out.write("# Embedded file name: %s\n" % co.co_filename)
125+
if file_offset:
126+
real_out.write("# Position in bytecode file: 0x%x\n" % file_offset)
124127

125128

126129
def disco(
127130
version_tuple,
128131
co,
129132
timestamp,
130133
out=sys.stdout,
131-
is_pypy: bool=False,
134+
is_pypy: bool = False,
132135
magic_int=None,
133136
source_size=None,
134137
sip_hash=None,
135-
asm_format: str="classic",
138+
asm_format: str = "classic",
136139
alternate_opmap=None,
137-
show_source: bool=False,
138-
is_graal: bool=False,
140+
show_source: bool = False,
141+
is_graal: bool = False,
139142
methods=tuple(),
143+
file_offsets: dict = {},
140144
) -> None:
141145
"""
142146
disassembles and deparses a given code block 'co'
@@ -163,7 +167,15 @@ def disco(
163167

164168
if co.co_filename and asm_format != "xasm":
165169
if not_filtered(co, methods):
166-
real_out.write(format_code_info(co, version_tuple, is_graal=is_graal) + "\n")
170+
real_out.write(
171+
format_code_info(
172+
co,
173+
version_tuple,
174+
is_graal=is_graal,
175+
file_offset=file_offsets.get(co),
176+
)
177+
+ "\n"
178+
)
167179
pass
168180

169181
opc = get_opcode(version_tuple, is_pypy, alternate_opmap)
@@ -184,6 +196,7 @@ def disco(
184196
dup_lines=True,
185197
show_source=show_source,
186198
methods=methods,
199+
file_offsets=file_offsets,
187200
)
188201

189202

@@ -196,6 +209,7 @@ def disco_loop(
196209
asm_format="classic",
197210
show_source=False,
198211
methods=tuple(),
212+
file_offsets: dict = {},
199213
) -> None:
200214
"""Disassembles a queue of code objects. If we discover
201215
another code object which will be found in co_consts, we add
@@ -211,7 +225,13 @@ def disco_loop(
211225
co = queue.popleft()
212226
if not_filtered(co, methods):
213227
if co.co_name not in ("<module>", "?"):
214-
real_out.write("\n" + format_code_info(co, version_tuple) + "\n")
228+
real_out.write(
229+
"\n"
230+
+ format_code_info(
231+
co, version_tuple, file_offset=file_offsets.get(co)
232+
)
233+
+ "\n"
234+
)
215235

216236
if asm_format == "dis":
217237
assert version_tuple[:2] == PYTHON_VERSION_TRIPLE[:2], (
@@ -222,12 +242,18 @@ def disco_loop(
222242
else:
223243
bytecode = Bytecode(co, opc, dup_lines=dup_lines)
224244
real_out.write(
225-
bytecode.dis(asm_format=asm_format, show_source=show_source) + "\n"
245+
bytecode.dis(
246+
asm_format=asm_format,
247+
show_source=show_source,
248+
)
249+
+ "\n"
226250
)
227251

228252
if version_tuple >= (3, 11):
229253
if bytecode.exception_entries not in (None, []):
230-
exception_table = format_exception_table(bytecode, version_tuple)
254+
exception_table = format_exception_table(
255+
bytecode, version_tuple
256+
)
231257
real_out.write(exception_table + "\n")
232258

233259
for c in co.co_consts:
@@ -242,7 +268,9 @@ def code_uniquify(basename, co_code) -> str:
242268
return "%s_0x%x" % (basename, id(co_code))
243269

244270

245-
def disco_loop_asm_format(opc, version_tuple, co, real_out, fn_name_map, all_fns) -> None:
271+
def disco_loop_asm_format(
272+
opc, version_tuple, co, real_out, fn_name_map, all_fns
273+
) -> None:
246274
"""Produces disassembly in a format more conducive to
247275
automatic assembly by producing inner modules before they are
248276
used by outer ones. Since this is recursive, we'll
@@ -318,7 +346,8 @@ def disassemble_file(
318346
asm_format="classic",
319347
alternate_opmap=None,
320348
show_source=False,
321-
methods: Tuple[str] = tuple()
349+
methods: Tuple[str] = tuple(),
350+
save_file_offsets: bool = False,
322351
):
323352
"""
324353
Disassemble Python byte-code file (.pyc).
@@ -329,6 +358,7 @@ def disassemble_file(
329358
If that fails, we'll compile internally for the Python version currently running.
330359
"""
331360
pyc_filename = None
361+
file_offsets = {}
332362
try:
333363
# FIXME: add whether we want PyPy
334364
pyc_filename = check_object_path(filename)
@@ -340,7 +370,8 @@ def disassemble_file(
340370
is_pypy,
341371
source_size,
342372
sip_hash,
343-
) = load_module(pyc_filename)
373+
file_offsets,
374+
) = load_module(pyc_filename, save_file_offsets=save_file_offsets)
344375
except (ImportError, NotImplementedError, ValueError):
345376
raise
346377
except Exception:
@@ -391,6 +422,7 @@ def disassemble_file(
391422
show_source=show_source,
392423
is_graal=is_graal,
393424
methods=methods,
425+
file_offsets=file_offsets,
394426
)
395427
# print co.co_filename
396428
return (
@@ -404,9 +436,11 @@ def disassemble_file(
404436
sip_hash,
405437
)
406438

439+
407440
def not_filtered(co: types.CodeType, methods: tuple) -> bool:
408441
return len(methods) == 0 or co.co_name in methods
409442

443+
410444
def _test() -> None:
411445
"""Simple test program to disassemble a file."""
412446
argc = len(sys.argv)

xdis/dropbox/decrypt25.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ def fix_dropbox_pyc(fp):
294294
timestamp = struct.unpack("I", ts)[0]
295295
b = fp.read()
296296
co = loads(b)
297-
return (2, 5, "0dropbox"), timestamp, 62131, co, False, source_size, None
297+
return (2, 5, "0dropbox"), timestamp, 62131, co, False, source_size, None, {}
298298

299299

300300
def fix_dir(path) -> None:

0 commit comments

Comments
 (0)