Skip to content

Commit 6c8f081

Browse files
committed
Refactor code structure of graph_net_bench/torch/util/timing.py
1 parent 61dd8e0 commit 6c8f081

File tree

1 file changed

+42
-31
lines changed

1 file changed

+42
-31
lines changed

graph_net_bench/torch/util/timing.py

Lines changed: 42 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -19,40 +19,51 @@ def measure_performance(model_call, args, compiler):
1919

2020
if "cuda" in args.device:
2121
torch.cuda.empty_cache()
22-
e2e_times, gpu_times = [], []
23-
for i in range(args.trials):
24-
duration_box = test_compiler_util.DurationBox(-1)
25-
with test_compiler_util.naive_timer(duration_box, compiler.synchronize):
26-
start_event = torch.cuda.Event(enable_timing=True)
27-
end_event = torch.cuda.Event(enable_timing=True)
28-
start_event.record()
29-
model_call()
30-
end_event.record()
31-
compiler.synchronize()
32-
33-
gpu_time_ms = start_event.elapsed_time(end_event)
34-
e2e_times.append(duration_box.value)
35-
gpu_times.append(gpu_time_ms)
36-
print(
37-
f"Trial {i + 1}: e2e={duration_box.value:.5f} ms, gpu={gpu_time_ms:.5f} ms",
38-
file=sys.stderr,
39-
flush=True,
40-
)
41-
22+
e2e_times, gpu_times = run_cuda_benchmark_timer(
23+
model_call, args.trials, compiler
24+
)
4225
stats["e2e"] = test_compiler_util.get_timing_stats(e2e_times)
4326
stats["gpu"] = test_compiler_util.get_timing_stats(gpu_times)
4427
else:
45-
e2e_times = []
46-
for i in range(args.trials):
47-
duration_box = test_compiler_util.DurationBox(-1)
48-
with test_compiler_util.naive_timer(duration_box, compiler.synchronize):
49-
model_call()
50-
e2e_times.append(duration_box.value)
51-
print(
52-
f"Trial {i + 1}: e2e={duration_box.value:.5f} ms",
53-
file=sys.stderr,
54-
flush=True,
55-
)
28+
e2e_times = run_non_cuda_benchmark_timer(model_call, args.trials, compiler)
5629
stats["e2e"] = test_compiler_util.get_timing_stats(e2e_times)
5730

5831
return outs, stats
32+
33+
34+
def run_cuda_benchmark_timer(model_call, trials, compiler):
35+
e2e_times, gpu_times = [], []
36+
for i in range(trials):
37+
duration_box = test_compiler_util.DurationBox(-1)
38+
with test_compiler_util.naive_timer(duration_box, compiler.synchronize):
39+
start_event = torch.cuda.Event(enable_timing=True)
40+
end_event = torch.cuda.Event(enable_timing=True)
41+
start_event.record()
42+
model_call()
43+
end_event.record()
44+
compiler.synchronize()
45+
46+
gpu_time_ms = start_event.elapsed_time(end_event)
47+
e2e_times.append(duration_box.value)
48+
gpu_times.append(gpu_time_ms)
49+
print(
50+
f"Trial {i + 1}: e2e={duration_box.value:.5f} ms, gpu={gpu_time_ms:.5f} ms",
51+
file=sys.stderr,
52+
flush=True,
53+
)
54+
return e2e_times, gpu_times
55+
56+
57+
def run_non_cuda_benchmark_timer(model_call, trials, compiler):
58+
e2e_times = []
59+
for i in range(trials):
60+
duration_box = test_compiler_util.DurationBox(-1)
61+
with test_compiler_util.naive_timer(duration_box, compiler.synchronize):
62+
model_call()
63+
e2e_times.append(duration_box.value)
64+
print(
65+
f"Trial {i + 1}: e2e={duration_box.value:.5f} ms",
66+
file=sys.stderr,
67+
flush=True,
68+
)
69+
return e2e_times

0 commit comments

Comments
 (0)