Skip to content

Commit 9cef6c2

Browse files
authored
Move analytics scripts from builder (#6111)
Part of pytorch/builder#2054
1 parent 9f4ff02 commit 9cef6c2

File tree

7 files changed

+1283
-0
lines changed

7 files changed

+1283
-0
lines changed

tools/analytics/cubinsizes.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
#!/usr/bin/env python3
2+
# Tool for analyzing sizes of CUDA kernels for various GPU architectures
3+
import os
4+
import struct
5+
import subprocess
6+
import sys
7+
from tempfile import TemporaryDirectory
8+
from typing import Dict
9+
10+
11+
# Try to auto-import elftools
12+
try:
13+
from elftools.elf.elffile import ELFFile
14+
except ModuleNotFoundError:
15+
print(f'elftools module not found, trying to install it from pip')
16+
from pip._internal import main as pip_main
17+
try:
18+
pip_main(["install", "pyelftools", "--user"])
19+
except SystemExit:
20+
print(f'PIP installation failed, please install it manually by invoking "{sys.executable} -mpip install pyelftools --user"')
21+
sys.exit(-1)
22+
from elftools.elf.elffile import ELFFile
23+
24+
25+
# From https://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
26+
def sizeof_fmt(num, suffix='B'):
27+
for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']:
28+
if abs(num) < 1024.0:
29+
return "%3.1f%s%s" % (num, unit, suffix)
30+
num /= 1024.0
31+
return "%.1f%s%s" % (num, 'Yi', suffix)
32+
33+
34+
def compute_cubin_sizes(file_name, section_name='.nv_fatbin', debug=False):
35+
with open(file_name, 'rb') as f:
36+
elf_file = ELFFile(f)
37+
nv_fatbin = elf_file.get_section_by_name(section_name)
38+
if nv_fatbin is None:
39+
return {}
40+
data = nv_fatbin.data()
41+
idx, offs = 0, 0
42+
elf_sizes = {}
43+
while offs < len(data):
44+
(magic, version, header_size, fatbin_size) = struct.unpack('IHHL', data[offs: offs + 16])
45+
if magic != 0xba55ed50 or version != 1:
46+
raise RuntimeError(f"Unexpected fatbin magic {hex(magic)} or version {version}")
47+
if debug:
48+
print(f"Found fatbin at {offs} header_size={header_size} fatbin_size={fatbin_size}")
49+
offs += header_size
50+
fatbin_end = offs + fatbin_size
51+
while offs < fatbin_end:
52+
(kind, version, hdr_size, elf_size, empty, code_ver, sm_ver) = struct.unpack('HHILLIH', data[offs: offs + 30])
53+
if version != 0x0101 or kind not in [1, 2]:
54+
raise RuntimeError(f"Unexpected cubin version {hex(version)} or kind {kind}")
55+
sm_ver = f'{"ptx" if kind == 1 else "sm"}_{sm_ver}'
56+
if debug:
57+
print(f" {idx}: elf_size={elf_size} code_ver={hex(code_ver)} sm={sm_ver}")
58+
if sm_ver not in elf_sizes:
59+
elf_sizes[sm_ver] = 0
60+
elf_sizes[sm_ver] += elf_size
61+
idx, offs = idx + 1, offs + hdr_size + elf_size
62+
offs = fatbin_end
63+
return elf_sizes
64+
65+
66+
class ArFileCtx:
67+
def __init__(self, ar_name: str) -> None:
68+
self.ar_name = os.path.abspath(ar_name)
69+
self._tmpdir = TemporaryDirectory()
70+
71+
def __enter__(self) -> str:
72+
self._pwd = os.getcwd()
73+
rc = self._tmpdir.__enter__()
74+
subprocess.check_call(['ar', 'x', self.ar_name])
75+
return rc
76+
77+
def __exit__(self, ex, value, tb) -> None:
78+
os.chdir(self._pwd)
79+
return self._tmpdir.__exit__(ex, value, tb)
80+
81+
82+
def dict_add(rc: Dict[str, int], b: Dict[str, int]) -> Dict[str, int]:
83+
for key, val in b.items():
84+
rc[key] = (rc[key] if key in rc else 0) + val
85+
return rc
86+
87+
88+
def main():
89+
if sys.platform != 'linux':
90+
print('This script only works with Linux ELF files')
91+
return
92+
if len(sys.argv) < 2:
93+
print(f"{sys.argv[0]} invoked without any arguments trying to infer location of libtorch_cuda")
94+
import torch
95+
fname = os.path.join(os.path.dirname(torch.__file__), 'lib', 'libtorch_cuda.so')
96+
else:
97+
fname = sys.argv[1]
98+
99+
if not os.path.exists(fname):
100+
print(f"Can't find {fname}")
101+
sys.exit(-1)
102+
103+
section_names = ['.nv_fatbin', '__nv_relfatbin']
104+
results = {name: {} for name in section_names}
105+
print(f"Analyzing {fname}")
106+
if os.path.splitext(fname)[1] == '.a':
107+
with ArFileCtx(fname):
108+
for fname in os.listdir("."):
109+
if not fname.endswith(".o"): continue
110+
for section_name in section_names:
111+
elf_sizes = compute_cubin_sizes(fname, section_name)
112+
dict_add(results[section_name], elf_sizes)
113+
else:
114+
for section_name in ['.nv_fatbin', '__nv_relfatbin']:
115+
dict_add(results[section_name], compute_cubin_sizes(fname, section_name))
116+
117+
for section_name in section_names:
118+
elf_sizes = results[section_name]
119+
print(f"{section_name} size {sizeof_fmt(sum(elf_sizes.values()))}")
120+
for (sm_ver, total_size) in elf_sizes.items():
121+
print(f" {sm_ver}: {sizeof_fmt(total_size)}")
122+
123+
124+
if __name__ == '__main__':
125+
main()
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
from collections import defaultdict
2+
from datetime import datetime, timedelta, timezone
3+
import gzip
4+
import os
5+
import re
6+
import urllib
7+
8+
from tqdm import tqdm
9+
import boto3
10+
11+
S3 = boto3.resource('s3')
12+
CLIENT = boto3.client('s3')
13+
BUCKET = S3.Bucket('pytorch')
14+
15+
class CacheEntry:
16+
_size = None
17+
18+
def __init__(self, download_uri: str):
19+
self.download_uri = download_uri
20+
self.bytes_sent = 0
21+
22+
@property
23+
def os_type(self) -> str:
24+
os_type = "linux"
25+
if "win" in self.download_uri:
26+
os_type = "windows"
27+
elif "macosx" in self.download_uri:
28+
os_type = "macos"
29+
return os_type
30+
31+
@property
32+
def target_arch(self) -> str:
33+
target_arch = "cpu"
34+
result = re.search(r"cu[0-9]+", self.download_uri)
35+
if result:
36+
target_arch = result[0]
37+
return target_arch
38+
39+
@property
40+
def package_name(self) -> str:
41+
filename_contents = os.path.basename(self.download_uri).split('-')
42+
return filename_contents[0]
43+
44+
@property
45+
def package_version(self) -> str:
46+
if "dev" in self.download_uri:
47+
results = re.search(
48+
r"[0-9]+\.[0-9]+\.[0-9]+\.dev[0-9]+",
49+
self.download_uri
50+
)
51+
else:
52+
results = re.search(
53+
r"[0-9]+\.[0-9]+\.[0-9]+", self.download_uri
54+
)
55+
if not results:
56+
raise Exception("Wtf there's no version o.O")
57+
return results[0]
58+
59+
@property
60+
def size(self) -> int:
61+
if self._size is None:
62+
for key in BUCKET.objects.filter(
63+
Prefix=self.download_uri.lstrip("/")
64+
):
65+
self._size = key.size
66+
if self._size is None:
67+
raise Exception(
68+
f"No object found for prefix {self.download_uri}"
69+
)
70+
return self._size
71+
72+
@property
73+
def downloads(self):
74+
return self.bytes_sent // self.size
75+
76+
def parse_logs(log_directory: str) -> dict:
77+
bytes_cache = {}
78+
for (dirpath, _, filenames) in os.walk(log_directory):
79+
for filename in tqdm(filenames):
80+
with gzip.open(os.path.join(dirpath, filename), 'r') as gf:
81+
string = gf.read().decode("utf-8")
82+
entries = []
83+
entries += string.splitlines()[2:]
84+
for entry in entries:
85+
columns = entry.split('\t')
86+
bytes_sent = int(columns[3])
87+
download_uri = urllib.parse.unquote(
88+
urllib.parse.unquote(columns[7])
89+
)
90+
status = columns[8]
91+
if not all([
92+
status.startswith("2"),
93+
download_uri.endswith((".whl", ".zip"))
94+
]):
95+
continue
96+
if not bytes_cache.get(download_uri):
97+
bytes_cache[download_uri] = CacheEntry(download_uri)
98+
bytes_cache[download_uri].bytes_sent += bytes_sent
99+
return bytes_cache
100+
101+
def output_results(bytes_cache: dict) -> None:
102+
os_results = defaultdict(int)
103+
arch_results = defaultdict(int)
104+
package_results = defaultdict(lambda: defaultdict(int))
105+
for _, val in tqdm(bytes_cache.items()):
106+
try:
107+
os_results[val.os_type] += val.downloads
108+
arch_results[val.target_arch] += val.downloads
109+
package_results[val.package_name][val.package_version] += (
110+
val.downloads
111+
)
112+
except Exception:
113+
pass
114+
print("=-=-= Results =-=-=")
115+
print("=-=-= OS =-=-=")
116+
total_os_num = sum(os_results.values())
117+
for os_type, num in os_results.items():
118+
print(
119+
f"\t* {os_type}: {num} ({(num/total_os_num)*100:.2f}%)"
120+
)
121+
122+
print("=-=-= ARCH =-=-=")
123+
total_arch_num = sum(arch_results.values())
124+
for arch_type, num in arch_results.items():
125+
print(
126+
f"\t* {arch_type}: {num} ({(num/total_arch_num) * 100:.2f}%)"
127+
)
128+
129+
print("=-=-= By Package =-=-=")
130+
for package_name, upper_val in package_results.items():
131+
print(f"=-=-= {package_name} =-=-=")
132+
total_package_num = sum(upper_val.values())
133+
for package_version, num in upper_val.items():
134+
print(
135+
f"\t* {package_version}: {num} ({(num/total_package_num) * 100:.2f}%)"
136+
)
137+
138+
def download_logs(log_directory: str, since: float):
139+
dt_now = datetime.now(timezone.utc)
140+
dt_end = datetime(dt_now.year, dt_now.month, dt_now.day, tzinfo=timezone.utc)
141+
dt_start = dt_end - timedelta(days=1, hours=1) # Add 1 hour padding to account for potentially missed logs due to timing
142+
for key in tqdm(BUCKET.objects.filter(Prefix='cflogs')):
143+
remote_fname = key.key
144+
local_fname = os.path.join(log_directory, remote_fname)
145+
# Only download things from yesterday
146+
dt_modified = key.last_modified.replace(tzinfo=timezone.utc)
147+
if dt_start >= dt_modified or dt_end < dt_modified:
148+
continue
149+
# TODO: Do this in parallel
150+
if not os.path.exists(local_fname):
151+
dirname = os.path.dirname(local_fname)
152+
if not os.path.exists(dirname):
153+
os.makedirs(dirname)
154+
CLIENT.download_file("pytorch", remote_fname, local_fname)
155+
156+
157+
if __name__ == "__main__":
158+
print("Downloading logs")
159+
download_logs('cache', 1)
160+
print("Parsing logs")
161+
cache = parse_logs('cache/cflogs/')
162+
print("Calculating results")
163+
output_results(cache)

tools/analytics/duplicates_analyze.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
#!/usr/bin/env python3
2+
from typing import Dict, List
3+
from subprocess import check_output
4+
import os
5+
import sys
6+
7+
8+
def get_defined_symbols(fname: str, verbose: bool = False) -> Dict[str, int]:
9+
if verbose:
10+
print(f"Processing {fname}...", end='', flush=True)
11+
if sys.platform == 'darwin':
12+
lines = check_output(['nm', '--defined-only', '-n', fname]).decode('ascii').split("\n")[:-1]
13+
rc = {}
14+
for idx, line in enumerate(lines):
15+
addr, stype, name = line.split(' ')
16+
size = 4 if idx + 1 == len(lines) else (int(lines[idx + 1].split(' ')[0], 16) - int(addr, 16))
17+
rc[name] = size
18+
else:
19+
lines = check_output(['nm', '--print-size', '--defined-only', fname]).decode('ascii').split('\n')
20+
rc = {e[3]: int(e[1], 16) for e in [line.split() for line in lines] if len(e) == 4}
21+
if verbose:
22+
print("done")
23+
return rc
24+
25+
26+
def get_deps(fname: str) -> List[str]:
27+
if sys.platform == 'darwin':
28+
rc = []
29+
lines = check_output(['otool', '-l', fname]).decode('ascii').split("\n")[1:-1]
30+
for idx, line in enumerate(lines):
31+
if line.strip() != 'cmd LC_LOAD_DYLIB':
32+
continue
33+
path = lines[idx + 2].strip()
34+
assert path.startswith('name')
35+
rc.append(os.path.basename(path.split(' ')[1]))
36+
return rc
37+
lines = check_output(['readelf', '--dynamic', fname]).decode('ascii').split('\n')
38+
return [line.split('[')[1][:-1] for line in lines if '(NEEDED)' in line]
39+
40+
41+
def humansize(size):
42+
if size < 1024:
43+
return f"{size} bytes"
44+
if size < 1024**2:
45+
return f"{int(size/1024)} Kb"
46+
if size < 1024**3:
47+
return f"{size/(1024.0**2):.2f} Mb"
48+
return f"{size/(1024.0**3):.2f} Gb"
49+
50+
51+
def print_sizes(libname, depth: int = 2) -> None:
52+
libs = [libname]
53+
depth = 2
54+
symbols = {os.path.basename(libname): get_defined_symbols(libname, verbose=True)}
55+
for _ in range(depth):
56+
for lib in libs:
57+
dirname = os.path.dirname(lib)
58+
for dep in get_deps(lib):
59+
path = os.path.join(dirname, dep)
60+
if not os.path.exists(path):
61+
continue
62+
if path not in libs:
63+
libs.append(path)
64+
symbols[dep] = get_defined_symbols(path, verbose=True)
65+
66+
for lib in libs:
67+
lib_symbols = symbols[os.path.basename(lib)]
68+
lib_keys = set(lib_symbols.keys())
69+
rc = f"{lib} symbols size {humansize(sum(lib_symbols.values()))}"
70+
for dep in get_deps(lib):
71+
if dep not in symbols:
72+
continue
73+
dep_overlap = lib_keys.intersection(set(symbols[dep].keys()))
74+
overlap_size = sum(lib_symbols[k] for k in dep_overlap)
75+
if overlap_size > 0:
76+
rc += f" {dep} overlap is {humansize(overlap_size)}"
77+
print(rc)
78+
79+
80+
def print_symbols_overlap(libname1: str, libname2: str) -> None:
81+
sym1 = get_defined_symbols(libname1, verbose=True)
82+
sym2 = get_defined_symbols(libname2, verbose=True)
83+
sym1_size = sum(sym1.values())
84+
sym2_size = sum(sym2.values())
85+
sym_overlap = set(sym1.keys()).intersection(set(sym2.keys()))
86+
overlap_size = sum(sym1[s] for s in sym_overlap)
87+
if overlap_size == 0:
88+
print(f"{libname1} symbols size {humansize(sym1_size)} does not overlap with {libname2}")
89+
return
90+
print(f"{libname1} symbols size {humansize(sym1_size)} overlap {humansize(overlap_size)} ({100.0 * overlap_size/sym1_size :.2f}%)")
91+
for sym in sym_overlap:
92+
print(sym)
93+
94+
95+
if __name__ == '__main__':
96+
if len(sys.argv) == 3:
97+
print_symbols_overlap(sys.argv[1], sys.argv[2])
98+
else:
99+
print_sizes(sys.argv[1] if len(sys.argv) > 1 else "lib/libtorch_cuda.so")

0 commit comments

Comments
 (0)