Skip to content
This repository was archived by the owner on Aug 21, 2025. It is now read-only.

Commit a228a1d

Browse files
authored
Util dump chrome trace (#869)
* Output the chrome trace of running f(input, **kwargs_for_f) with [optimize_ctx] [num_runs] times to [trace_filename].
1 parent 715c44e commit a228a1d

File tree

1 file changed

+57
-0
lines changed

1 file changed

+57
-0
lines changed

functorch/_src/benchmark_utils.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import time
2+
3+
import torch
4+
from torch.profiler import profile
5+
6+
7+
def synchronize():
8+
pass
9+
10+
11+
def dump_chrome_trace(f, input, trace_filename, optimize_ctx, activities, num_runs=1,
12+
devices=None, kwargs_for_f=None, kwargs_for_profiler=None):
13+
"""
14+
Output the chrome trace of running f(input, **kwargs_for_f) with [optimize_ctx]
15+
[num_runs] times to [trace_filename].
16+
17+
[activities] are the activities that the profiler will record, e.g. ProfilerActivity.CUDA.
18+
Return total runtime without the profiler
19+
20+
Outputs to trace_filename
21+
"""
22+
23+
if devices is None:
24+
devices = ["cuda"]
25+
26+
global synchronize
27+
if devices != ["cpu"] and torch.cuda.is_available():
28+
synchronize = torch.cuda.synchronize
29+
30+
if kwargs_for_f is None:
31+
kwargs_for_f = {}
32+
if kwargs_for_profiler is None:
33+
kwargs_for_profiler = {}
34+
35+
with optimize_ctx:
36+
torch.manual_seed(1337)
37+
for _ in range(5): # warmup runs
38+
f(input, **kwargs_for_f)
39+
synchronize()
40+
torch.manual_seed(1337)
41+
t0 = time.perf_counter()
42+
for _ in range(num_runs):
43+
f(input, **kwargs_for_f)
44+
synchronize()
45+
t1 = time.perf_counter()
46+
timing = t1 - t0
47+
48+
with profile(activities=activities, **kwargs_for_profiler) as prof:
49+
with optimize_ctx:
50+
synchronize()
51+
torch.manual_seed(1337)
52+
for _ in range(num_runs):
53+
f(input, **kwargs_for_f)
54+
synchronize()
55+
prof.export_chrome_trace(trace_filename)
56+
57+
return timing

0 commit comments

Comments
 (0)