|
| 1 | +#!/usr/bin/env python3 |
| 2 | + |
| 3 | +import argparse |
| 4 | +import re |
| 5 | +import subprocess |
| 6 | +import sys |
| 7 | +from typing import Dict, Iterable |
| 8 | + |
| 9 | +TABLE_PATTERN = re.compile(r""" |
| 10 | + \s*(?P<number>\d+):\s+ # Match the symbol number. Allow spaces because symbol numbers are aligned to the right. |
| 11 | + (?P<address>[a-zA-Z0-9]+)\s+ # Symbol address in file. |
| 12 | + (?P<size>\d+)\s+ # Symbol size. |
| 13 | + (?P<type>\w+)\s+ # Symbol type. |
| 14 | + (?P<bind>\w+)\s+ # Symbol bind. |
| 15 | + (?P<vis>\w+)\s+ # Symbol Vis(ibility, I think). |
| 16 | + (?P<ndx>\w+)\s+ # Symbol NDX. |
| 17 | + (?P<name>[\w.]+) # Symbol name. |
| 18 | +""", re.X) |
| 19 | + |
| 20 | +MODULES= [ |
| 21 | + 'abc', |
| 22 | + 'aifc', |
| 23 | + '_aix_support', |
| 24 | + 'antigravity', |
| 25 | + 'argparse', |
| 26 | + 'ast', |
| 27 | + 'base64', |
| 28 | + 'bdb', |
| 29 | + 'bisect', |
| 30 | + 'calendar', |
| 31 | + 'cgi', |
| 32 | + 'cgitb', |
| 33 | + 'chunk', |
| 34 | + 'cmd', |
| 35 | + 'codecs', |
| 36 | + 'codeop', |
| 37 | + 'code', |
| 38 | + 'collections', |
| 39 | + '_collections_abc', |
| 40 | + 'colorsys', |
| 41 | + '_compat_pickle', |
| 42 | + 'compileall', |
| 43 | + '_compression', |
| 44 | + 'concurrent', |
| 45 | + 'configparser', |
| 46 | + 'contextlib', |
| 47 | + 'contextvars', |
| 48 | + 'copy', |
| 49 | + 'copyreg', |
| 50 | + 'cProfile', |
| 51 | + 'crypt', |
| 52 | + 'csv', |
| 53 | + 'dataclasses', |
| 54 | + 'datetime', |
| 55 | + 'dbm', |
| 56 | + 'decimal', |
| 57 | + 'difflib', |
| 58 | + 'dis', |
| 59 | + 'doctest', |
| 60 | + 'email', |
| 61 | + 'encodings', |
| 62 | + 'ensurepip', |
| 63 | + 'enum', |
| 64 | + 'filecmp', |
| 65 | + 'fileinput', |
| 66 | + 'fnmatch', |
| 67 | + 'fractions', |
| 68 | + 'ftplib', |
| 69 | + 'functools', |
| 70 | + '__future__', |
| 71 | + 'genericpath', |
| 72 | + 'getopt', |
| 73 | + 'getpass', |
| 74 | + 'gettext', |
| 75 | + 'glob', |
| 76 | + 'graphlib', |
| 77 | + 'gzip', |
| 78 | + 'hashlib', |
| 79 | + 'heapq', |
| 80 | + 'hmac', |
| 81 | + 'html', |
| 82 | + 'http', |
| 83 | + 'idlelib', |
| 84 | + 'imaplib', |
| 85 | + 'imghdr', |
| 86 | + 'importlib', |
| 87 | + 'inspect', |
| 88 | + 'io', |
| 89 | + 'ipaddress', |
| 90 | + 'json', |
| 91 | + 'keyword', |
| 92 | + 'lib2to3', |
| 93 | + 'linecache', |
| 94 | + 'locale', |
| 95 | + 'logging', |
| 96 | + 'lzma', |
| 97 | + 'mailbox', |
| 98 | + 'mailcap', |
| 99 | + '_markupbase', |
| 100 | + 'mimetypes', |
| 101 | + 'modulefinder', |
| 102 | + 'msilib', |
| 103 | + 'multiprocessing', |
| 104 | + 'netrc', |
| 105 | + 'nntplib', |
| 106 | + 'ntpath', |
| 107 | + 'nturl2path', |
| 108 | + 'numbers', |
| 109 | + 'opcode', |
| 110 | + 'operator', |
| 111 | + 'optparse', |
| 112 | + 'os', |
| 113 | + '_osx_support', |
| 114 | + 'pathlib', |
| 115 | + 'pdb', |
| 116 | + '__phello__', |
| 117 | + 'pickle', |
| 118 | + 'pickletools', |
| 119 | + 'pipes', |
| 120 | + 'pkgutil', |
| 121 | + 'platform', |
| 122 | + 'plistlib', |
| 123 | + 'poplib', |
| 124 | + 'posixpath', |
| 125 | + 'pprint', |
| 126 | + 'profile', |
| 127 | + 'pstats', |
| 128 | + 'pty', |
| 129 | + '_py_abc', |
| 130 | + 'pyclbr', |
| 131 | + 'py_compile', |
| 132 | + '_pydatetime', |
| 133 | + '_pydecimal', |
| 134 | + 'pydoc_data', |
| 135 | + 'pydoc', |
| 136 | + '_pyio', |
| 137 | + '_pylong', |
| 138 | + 'queue', |
| 139 | + 'quopri', |
| 140 | + 'random', |
| 141 | + 're', |
| 142 | + 'reprlib', |
| 143 | + 'rlcompleter', |
| 144 | + 'sched', |
| 145 | + 'selectors', |
| 146 | + 'shelve', |
| 147 | + 'shlex', |
| 148 | + 'shutil', |
| 149 | + 'signal', |
| 150 | + 'smtplib', |
| 151 | + 'sndhdr', |
| 152 | + 'socket', |
| 153 | + 'socketserver', |
| 154 | + 'statistics', |
| 155 | + 'stat', |
| 156 | + 'stringprep', |
| 157 | + 'string', |
| 158 | + '_strptime', |
| 159 | + 'struct', |
| 160 | + 'subprocess', |
| 161 | + 'sunau', |
| 162 | + 'symtable', |
| 163 | + 'sysconfig', |
| 164 | + 'tabnanny', |
| 165 | + 'tarfile', |
| 166 | + 'telnetlib', |
| 167 | + 'tempfile', |
| 168 | + 'textwrap', |
| 169 | + 'this', |
| 170 | + '_threading_local', |
| 171 | + 'threading', |
| 172 | + 'timeit', |
| 173 | + 'tokenize', |
| 174 | + 'token', |
| 175 | + 'tomllib', |
| 176 | + 'traceback', |
| 177 | + 'tracemalloc', |
| 178 | + 'trace', |
| 179 | + 'tty', |
| 180 | + 'types', |
| 181 | + 'typing', |
| 182 | + 'urllib', |
| 183 | + 'uuid', |
| 184 | + 'uu', |
| 185 | + 'warnings', |
| 186 | + 'wave', |
| 187 | + 'weakref', |
| 188 | + '_weakrefset', |
| 189 | + 'webbrowser', |
| 190 | + 'wsgiref', |
| 191 | + 'xdrlib', |
| 192 | + 'zipapp', |
| 193 | + 'zipfile', |
| 194 | + 'zoneinfo', |
| 195 | + '__hello__', |
| 196 | + |
| 197 | + 'site', |
| 198 | + '_sitebuiltins', |
| 199 | + 'runpy', |
| 200 | + |
| 201 | + 'gdb', |
| 202 | + 'pygments', |
| 203 | + |
| 204 | + 'zipimport', |
| 205 | + |
| 206 | + 'const_str', |
| 207 | + 'const_int', |
| 208 | +] |
| 209 | + |
| 210 | +def print_warning(message: str, prefix: str = "Warning: ", color: str = "\033[33m"): |
| 211 | + ANSI_RESET = "\033[0m" |
| 212 | + print(f"{color}{prefix}{ANSI_RESET}{message}", file=sys.stderr) |
| 213 | + |
| 214 | +def human_bytes(num_bytes: float, byte_step: int = 1024) -> str: |
| 215 | + """Return the given bytes as a human friendly string.""" |
| 216 | + PREFIXES = ['B', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'] |
| 217 | + |
| 218 | + num_step = num_bytes |
| 219 | + chosen_prefix = None |
| 220 | + for chosen_prefix in PREFIXES: |
| 221 | + new_num_step = num_step / byte_step |
| 222 | + if new_num_step < 1.0: |
| 223 | + break |
| 224 | + |
| 225 | + num_step = new_num_step |
| 226 | + |
| 227 | + if chosen_prefix != 'B': |
| 228 | + chosen_prefix += ("i" if byte_step == 1024 else "") + "B" |
| 229 | + |
| 230 | + return f"{num_step:.2f} {chosen_prefix}" |
| 231 | + |
| 232 | +def get_module_sizes(object_file: str, module_list: Iterable[str] = None) -> Dict[str, int]: |
| 233 | + module_list = module_list or MODULES.copy() |
| 234 | + |
| 235 | + symbol_info = subprocess.run(["readelf", "-sW", "--sym-base=10", object_file], check=True, capture_output=True).stdout.decode() |
| 236 | + |
| 237 | + module_sizes = {} |
| 238 | + for symbol_str in symbol_info.splitlines(): |
| 239 | + symbol_match = TABLE_PATTERN.search(symbol_str) |
| 240 | + if symbol_match is None: |
| 241 | + print_warning(f"Couldn't match table to line: {symbol_str!r}") |
| 242 | + continue |
| 243 | + |
| 244 | + symbol_name, symbol_size = symbol_match.group("name"), int(symbol_match.group("size")) |
| 245 | + for existing_module in module_list: |
| 246 | + if symbol_name.startswith((f"{existing_module}_", f"_Py_get_{existing_module}_")): |
| 247 | + module_sizes[existing_module] = module_sizes.get(existing_module, 0) + symbol_size |
| 248 | + break |
| 249 | + else: |
| 250 | + print_warning(f"Can't match symbol {symbol_name} (size: {human_bytes(symbol_size)}) to module") |
| 251 | + |
| 252 | + return module_sizes |
| 253 | + |
| 254 | +def main(): |
| 255 | + parser = argparse.ArgumentParser() |
| 256 | + parser.add_argument("object_file") |
| 257 | + parser.add_argument("--total", action='store_true') |
| 258 | + args = parser.parse_args() |
| 259 | + |
| 260 | + module_sizes = get_module_sizes(args.object_file) |
| 261 | + sorted_module_sizes = sorted(module_sizes.items(), key=lambda module_tuple: module_tuple[1]) |
| 262 | + |
| 263 | + bytes_total = 0 |
| 264 | + for module_name, module_size in sorted_module_sizes: |
| 265 | + print(f"{human_bytes(module_size)}\t{module_name}") |
| 266 | + bytes_total += module_size |
| 267 | + |
| 268 | + if args.total: |
| 269 | + print(f"Total:\t{human_bytes(bytes_total)}") |
| 270 | + |
| 271 | +if __name__ == "__main__": |
| 272 | + main() |
0 commit comments