Skip to content

Commit f791905

Browse files
guyush1roddyrap
andcommitted
Added a module-size analysis script
The script is used in order to determine which frozen modules are the biggest, in order for us to remove them (whenever possible), so that the python (and thus gdb) remains relatively small. Co-authored-by: roddyrap <[email protected]>
1 parent d734d02 commit f791905

File tree

1 file changed

+272
-0
lines changed

1 file changed

+272
-0
lines changed

get_module_sizes.py

Lines changed: 272 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,272 @@
1+
#!/usr/bin/env python3
2+
3+
import argparse
4+
import re
5+
import subprocess
6+
import sys
7+
from typing import Dict, Iterable
8+
9+
TABLE_PATTERN = re.compile(r"""
10+
\s*(?P<number>\d+):\s+ # Match the symbol number. Allow spaces because symbol numbers are aligned to the right.
11+
(?P<address>[a-zA-Z0-9]+)\s+ # Symbol address in file.
12+
(?P<size>\d+)\s+ # Symbol size.
13+
(?P<type>\w+)\s+ # Symbol type.
14+
(?P<bind>\w+)\s+ # Symbol bind.
15+
(?P<vis>\w+)\s+ # Symbol Vis(ibility, I think).
16+
(?P<ndx>\w+)\s+ # Symbol NDX.
17+
(?P<name>[\w.]+) # Symbol name.
18+
""", re.X)
19+
20+
MODULES= [
21+
'abc',
22+
'aifc',
23+
'_aix_support',
24+
'antigravity',
25+
'argparse',
26+
'ast',
27+
'base64',
28+
'bdb',
29+
'bisect',
30+
'calendar',
31+
'cgi',
32+
'cgitb',
33+
'chunk',
34+
'cmd',
35+
'codecs',
36+
'codeop',
37+
'code',
38+
'collections',
39+
'_collections_abc',
40+
'colorsys',
41+
'_compat_pickle',
42+
'compileall',
43+
'_compression',
44+
'concurrent',
45+
'configparser',
46+
'contextlib',
47+
'contextvars',
48+
'copy',
49+
'copyreg',
50+
'cProfile',
51+
'crypt',
52+
'csv',
53+
'dataclasses',
54+
'datetime',
55+
'dbm',
56+
'decimal',
57+
'difflib',
58+
'dis',
59+
'doctest',
60+
'email',
61+
'encodings',
62+
'ensurepip',
63+
'enum',
64+
'filecmp',
65+
'fileinput',
66+
'fnmatch',
67+
'fractions',
68+
'ftplib',
69+
'functools',
70+
'__future__',
71+
'genericpath',
72+
'getopt',
73+
'getpass',
74+
'gettext',
75+
'glob',
76+
'graphlib',
77+
'gzip',
78+
'hashlib',
79+
'heapq',
80+
'hmac',
81+
'html',
82+
'http',
83+
'idlelib',
84+
'imaplib',
85+
'imghdr',
86+
'importlib',
87+
'inspect',
88+
'io',
89+
'ipaddress',
90+
'json',
91+
'keyword',
92+
'lib2to3',
93+
'linecache',
94+
'locale',
95+
'logging',
96+
'lzma',
97+
'mailbox',
98+
'mailcap',
99+
'_markupbase',
100+
'mimetypes',
101+
'modulefinder',
102+
'msilib',
103+
'multiprocessing',
104+
'netrc',
105+
'nntplib',
106+
'ntpath',
107+
'nturl2path',
108+
'numbers',
109+
'opcode',
110+
'operator',
111+
'optparse',
112+
'os',
113+
'_osx_support',
114+
'pathlib',
115+
'pdb',
116+
'__phello__',
117+
'pickle',
118+
'pickletools',
119+
'pipes',
120+
'pkgutil',
121+
'platform',
122+
'plistlib',
123+
'poplib',
124+
'posixpath',
125+
'pprint',
126+
'profile',
127+
'pstats',
128+
'pty',
129+
'_py_abc',
130+
'pyclbr',
131+
'py_compile',
132+
'_pydatetime',
133+
'_pydecimal',
134+
'pydoc_data',
135+
'pydoc',
136+
'_pyio',
137+
'_pylong',
138+
'queue',
139+
'quopri',
140+
'random',
141+
're',
142+
'reprlib',
143+
'rlcompleter',
144+
'sched',
145+
'selectors',
146+
'shelve',
147+
'shlex',
148+
'shutil',
149+
'signal',
150+
'smtplib',
151+
'sndhdr',
152+
'socket',
153+
'socketserver',
154+
'statistics',
155+
'stat',
156+
'stringprep',
157+
'string',
158+
'_strptime',
159+
'struct',
160+
'subprocess',
161+
'sunau',
162+
'symtable',
163+
'sysconfig',
164+
'tabnanny',
165+
'tarfile',
166+
'telnetlib',
167+
'tempfile',
168+
'textwrap',
169+
'this',
170+
'_threading_local',
171+
'threading',
172+
'timeit',
173+
'tokenize',
174+
'token',
175+
'tomllib',
176+
'traceback',
177+
'tracemalloc',
178+
'trace',
179+
'tty',
180+
'types',
181+
'typing',
182+
'urllib',
183+
'uuid',
184+
'uu',
185+
'warnings',
186+
'wave',
187+
'weakref',
188+
'_weakrefset',
189+
'webbrowser',
190+
'wsgiref',
191+
'xdrlib',
192+
'zipapp',
193+
'zipfile',
194+
'zoneinfo',
195+
'__hello__',
196+
197+
'site',
198+
'_sitebuiltins',
199+
'runpy',
200+
201+
'gdb',
202+
'pygments',
203+
204+
'zipimport',
205+
206+
'const_str',
207+
'const_int',
208+
]
209+
210+
def print_warning(message: str, prefix: str = "Warning: ", color: str = "\033[33m"):
211+
ANSI_RESET = "\033[0m"
212+
print(f"{color}{prefix}{ANSI_RESET}{message}", file=sys.stderr)
213+
214+
def human_bytes(num_bytes: float, byte_step: int = 1024) -> str:
215+
"""Return the given bytes as a human friendly string."""
216+
PREFIXES = ['B', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y']
217+
218+
num_step = num_bytes
219+
chosen_prefix = None
220+
for chosen_prefix in PREFIXES:
221+
new_num_step = num_step / byte_step
222+
if new_num_step < 1.0:
223+
break
224+
225+
num_step = new_num_step
226+
227+
if chosen_prefix != 'B':
228+
chosen_prefix += ("i" if byte_step == 1024 else "") + "B"
229+
230+
return f"{num_step:.2f} {chosen_prefix}"
231+
232+
def get_module_sizes(object_file: str, module_list: Iterable[str] = None) -> Dict[str, int]:
233+
module_list = module_list or MODULES.copy()
234+
235+
symbol_info = subprocess.run(["readelf", "-sW", "--sym-base=10", object_file], check=True, capture_output=True).stdout.decode()
236+
237+
module_sizes = {}
238+
for symbol_str in symbol_info.splitlines():
239+
symbol_match = TABLE_PATTERN.search(symbol_str)
240+
if symbol_match is None:
241+
print_warning(f"Couldn't match table to line: {symbol_str!r}")
242+
continue
243+
244+
symbol_name, symbol_size = symbol_match.group("name"), int(symbol_match.group("size"))
245+
for existing_module in module_list:
246+
if symbol_name.startswith((f"{existing_module}_", f"_Py_get_{existing_module}_")):
247+
module_sizes[existing_module] = module_sizes.get(existing_module, 0) + symbol_size
248+
break
249+
else:
250+
print_warning(f"Can't match symbol {symbol_name} (size: {human_bytes(symbol_size)}) to module")
251+
252+
return module_sizes
253+
254+
def main():
255+
parser = argparse.ArgumentParser()
256+
parser.add_argument("object_file")
257+
parser.add_argument("--total", action='store_true')
258+
args = parser.parse_args()
259+
260+
module_sizes = get_module_sizes(args.object_file)
261+
sorted_module_sizes = sorted(module_sizes.items(), key=lambda module_tuple: module_tuple[1])
262+
263+
bytes_total = 0
264+
for module_name, module_size in sorted_module_sizes:
265+
print(f"{human_bytes(module_size)}\t{module_name}")
266+
bytes_total += module_size
267+
268+
if args.total:
269+
print(f"Total:\t{human_bytes(bytes_total)}")
270+
271+
if __name__ == "__main__":
272+
main()

0 commit comments

Comments
 (0)