Skip to content

Commit 9b96f14

Browse files
authored
refactor code, add docstring (#908)
* refactor code, add docstring * lint * lint * lint * lint
1 parent 7617874 commit 9b96f14

File tree

2 files changed

+154
-64
lines changed

2 files changed

+154
-64
lines changed

benchmarks/chrome_trace_parser.py

Lines changed: 10 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,65 +1,29 @@
11
#!/usr/bin/env python
22
import argparse
3-
import json
3+
44
import os
55
import logging
66
import pandas as pd
77

8+
from functorch._src.benchmark_utils import compute_utilization
9+
810
# process the chrome traces output by the pytorch profiler
911
# require the json input file's name to be in format {model_name}_chrome_trace_*.json
10-
# the runtimes file should have format (model_name, time)
11-
12-
gpu_pids = []
13-
14-
def is_gpu_compute_event(event):
15-
global gpu_pids
16-
return "pid" in event and event["pid"] in gpu_pids and "ph" in event and event["ph"] == "X"
17-
18-
def get_events(filename):
19-
f = open(filename)
20-
data = json.load(f)
21-
events = data["traceEvents"]
22-
return events
23-
24-
def get_sorted_gpu_events(events):
25-
sorted_gpu_events = []
26-
for event in events:
27-
if(not is_gpu_compute_event(event)):
28-
continue
29-
sorted_gpu_events.append(event)
30-
return sorted(sorted_gpu_events, key=lambda x: x["ts"])
12+
# the runtimes file should have format (model_name, runtime)
3113

32-
def get_sorted_gpu_mm_conv_events(events):
33-
def is_mm_conv_event(event):
34-
return "name" in event and ("gemm" in event["name"] or "conv" in event["name"]
35-
or "cutlass" in event["name"] or "wgrad" in event["name"])
36-
gpu_events = get_sorted_gpu_events(events)
37-
sorted_events = []
38-
for event in gpu_events:
39-
if(not is_mm_conv_event(event)):
40-
continue
41-
sorted_events.append(event)
42-
return sorted_events
43-
44-
def get_duration(sorted_gpu_events):
45-
event = sorted_gpu_events[0]
46-
current_end_time = event["ts"] + event["dur"]
47-
total_duration = event["dur"]
48-
for event in sorted_gpu_events[1:]:
49-
start_time = max(event["ts"], current_end_time)
50-
end_time = event["ts"] + event["dur"]
51-
total_duration = total_duration + max(end_time - start_time, 0)
52-
current_end_time = max(current_end_time, end_time)
53-
return total_duration
5414

5515
def get_model_name(filename):
16+
"""
17+
Get model name from a file in format {model_name}_chrome_trace_*.json
18+
"""
5619
_, tail = os.path.split(filename)
5720
modelname = tail[:tail.find("_chrome_trace")]
5821
return modelname
5922

6023
def get_total_length(run_times_df, modelname):
6124
return float(run_times_df[run_times_df["name"]==modelname]["runtime"])
6225

26+
6327
def main():
6428
parser = argparse.ArgumentParser()
6529
group = parser.add_mutually_exclusive_group(required=True)
@@ -74,7 +38,6 @@ def main():
7438
)
7539
args = parser.parse_args()
7640

77-
run_times_df = pd.read_csv(args.runtime)
7841

7942
if args.filename:
8043
filenames = args.filename
@@ -90,32 +53,16 @@ def main():
9053

9154
print(f"modelname, GPU Utilization, MM and Conv time")
9255

56+
run_times_df = pd.read_csv(args.runtime)
9357
for filename in filenames:
9458
try:
95-
events = get_events(filename)
96-
97-
# get pids of GPU events
98-
global gpu_pids
99-
for event in events:
100-
if "name" not in event:
101-
continue
102-
if event["name"] == 'process_labels' and "GPU" in event["args"]["labels"]:
103-
gpu_pids.append(event["pid"])
104-
10559
modelname = get_model_name(filename)
10660
total_length = get_total_length(run_times_df, modelname) * 1e6
107-
108-
sorted_gpu_events = get_sorted_gpu_events(events)
109-
utilization = get_duration(sorted_gpu_events) / total_length
110-
111-
sorted_gpu_mm_conv_events = get_sorted_gpu_mm_conv_events(events)
112-
mm_conv_utilization = get_duration(sorted_gpu_mm_conv_events) / total_length
113-
61+
utilization, mm_conv_utilization = compute_utilization(filenames, total_length)
11462
print(f"{modelname}, {utilization}, {mm_conv_utilization}")
11563
except:
11664
logging.exception(f"{filename}, ERROR")
11765
print(f"{filename}, ERROR")
11866

119-
12067
if __name__ == "__main__":
12168
main()

functorch/_src/benchmark_utils.py

Lines changed: 144 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,23 @@
11
import time
2+
import os
3+
import json
24

35
import torch
4-
from torch.profiler import profile
6+
from torch.profiler import profile, ProfilerActivity
57

68

79
def synchronize():
810
pass
911

1012

13+
class NullContext:
14+
def __enter__(self):
15+
pass
16+
17+
def __exit__(self, exc_type, exc_val, exc_tb):
18+
pass
19+
20+
1121
def dump_chrome_trace(f, input, trace_filename, optimize_ctx, activities, num_runs=1,
1222
devices=None, kwargs_for_f=None, kwargs_for_profiler=None):
1323
"""
@@ -55,3 +65,136 @@ def dump_chrome_trace(f, input, trace_filename, optimize_ctx, activities, num_ru
5565
prof.export_chrome_trace(trace_filename)
5666

5767
return timing
68+
69+
70+
def get_chrome_trace_events(filename):
71+
f = open(filename)
72+
data = json.load(f)
73+
events = data["traceEvents"]
74+
return events
75+
76+
77+
def is_gpu_compute_event(event):
78+
global gpu_pids
79+
return "pid" in event and event["pid"] in gpu_pids and "ph" in event and event["ph"] == "X"
80+
81+
82+
def get_sorted_gpu_events(events):
83+
sorted_gpu_events = []
84+
for event in events:
85+
if(not is_gpu_compute_event(event)):
86+
continue
87+
sorted_gpu_events.append(event)
88+
return sorted(sorted_gpu_events, key=lambda x: x["ts"])
89+
90+
91+
def get_duration(sorted_gpu_events):
92+
if len(sorted_gpu_events) == 0:
93+
return 0
94+
event = sorted_gpu_events[0]
95+
current_end_time = event["ts"] + event["dur"]
96+
total_duration = event["dur"]
97+
for event in sorted_gpu_events[1:]:
98+
start_time = max(event["ts"], current_end_time)
99+
end_time = event["ts"] + event["dur"]
100+
total_duration = total_duration + max(end_time - start_time, 0)
101+
current_end_time = max(current_end_time, end_time)
102+
return total_duration
103+
104+
105+
def get_sorted_gpu_mm_conv_events(events):
106+
def is_mm_conv_event(event):
107+
return "name" in event and ("gemm" in event["name"] or "conv" in event["name"]
108+
or "cutlass" in event["name"] or "wgrad" in event["name"])
109+
gpu_events = get_sorted_gpu_events(events)
110+
sorted_events = []
111+
for event in gpu_events:
112+
if(not is_mm_conv_event(event)):
113+
continue
114+
sorted_events.append(event)
115+
return sorted_events
116+
117+
118+
gpu_pids = []
119+
120+
121+
def compute_utilization(filename: str, total_length: float):
122+
"""
123+
Process the chrome traces outputs by the pytorch profiler to compute GPU Utilization
124+
and percent of times spent on matmal and convolution
125+
126+
Args:
127+
filename(str): Name of chrome traces file produced by pytorch profiler
128+
129+
total_length(float): total length of the process without profiler in second
130+
131+
Return:
132+
tuple: (GPU Utilization, percent of time spent on matmal and convolution)
133+
"""
134+
events = get_chrome_trace_events(filename)
135+
136+
# get pids of GPU events
137+
global gpu_pids
138+
gpu_pids = []
139+
for event in events:
140+
if "name" not in event:
141+
continue
142+
if event["name"] == 'process_labels' and "GPU" in event["args"]["labels"]:
143+
gpu_pids.append(event["pid"])
144+
145+
total_length = total_length * 1e6
146+
sorted_gpu_events = get_sorted_gpu_events(events)
147+
utilization = get_duration(sorted_gpu_events) / total_length
148+
149+
sorted_gpu_mm_conv_events = get_sorted_gpu_mm_conv_events(events)
150+
mm_conv_utilization = get_duration(sorted_gpu_mm_conv_events) / total_length
151+
152+
return utilization, mm_conv_utilization
153+
154+
155+
def benchmark_utilization(f, input, trace_folder, optimize_ctx=None, trace_file_name="tmp_chrome_trace", num_runs=1):
156+
"""
157+
Benchmark the GPU Utilization and percent of time spent on matmal and convolution operations of
158+
running f(input, **kwargs_for_f) with [optimize_ctx] [num_runs] times.
159+
It will produce a chrome trace file in trace_folder/trace_file_name.json
160+
161+
Example:
162+
163+
```
164+
def f(a):
165+
return a.sum()
166+
a = torch.rand(2**20, device="cuda")
167+
utilization, mm_conv_utilization = benchmark_utilization(f, a, "tmp", trace_file_name = "tmp_chrome_trace")
168+
```
169+
170+
Args:
171+
f: function to benchmark
172+
173+
input: input to :attr:`f`
174+
175+
trace_folder: name of the folder to store the chrome trace
176+
177+
optimize_ctx: the context in which f will run
178+
179+
trace_file_name: name of the dumped chrome trace file, default to "tmp_chrome_trace"
180+
181+
num_runs: number of times to run f, excluding the warm-up runs, default to 1.
182+
183+
Return:
184+
tuple: (GPU Utilization, percent of time spent on matmal and convolution)
185+
186+
"""
187+
isExist = os.path.exists(trace_folder)
188+
if not isExist:
189+
os.makedirs(trace_folder)
190+
print("create folder " + trace_folder)
191+
192+
if optimize_ctx is None:
193+
optimize_ctx = NullContext()
194+
195+
chrome_trace_file_name = os.path.join(trace_folder, trace_file_name + ".json")
196+
total_length = dump_chrome_trace(f, input, chrome_trace_file_name, optimize_ctx,
197+
[ProfilerActivity.CUDA], num_runs=num_runs, devices="cuda")
198+
utilization, mm_conv_utilization = compute_utilization(chrome_trace_file_name, total_length)
199+
200+
return utilization, mm_conv_utilization

0 commit comments

Comments
 (0)