|
15 | 15 | from contextlib import redirect_stdout, redirect_stderr |
16 | 16 | from graph_net_bench.torch.backend.graph_compiler_backend import GraphCompilerBackend |
17 | 17 | from graph_net_bench import test_compiler_util |
| 18 | +from .util.timing import measure_performance |
18 | 19 |
|
19 | 20 |
|
20 | 21 | def register_op_lib(op_lib): |
@@ -129,69 +130,6 @@ def get_input_dict(args): |
129 | 130 | } |
130 | 131 |
|
131 | 132 |
|
132 | | -def measure_performance(model_call, args, compiler): |
133 | | - stats = {} |
134 | | - outs = model_call() |
135 | | - |
136 | | - # Warmup runs |
137 | | - for _ in range(args.warmup): |
138 | | - model_call() |
139 | | - compiler.synchronize() |
140 | | - |
141 | | - print( |
142 | | - f"[Profiling] Warm up {args.warmup}, Trials {args.trials}", |
143 | | - file=sys.stderr, |
144 | | - flush=True, |
145 | | - ) |
146 | | - |
147 | | - if "cuda" in args.device: |
148 | | - torch.cuda.empty_cache() |
149 | | - e2e_times = [] |
150 | | - gpu_times = [] |
151 | | - |
152 | | - for i in range(args.trials): |
153 | | - # End-to-end timing (naive_timer) |
154 | | - duration_box = test_compiler_util.DurationBox(-1) |
155 | | - with test_compiler_util.naive_timer(duration_box, compiler.synchronize): |
156 | | - # GPU-only timing (CUDA Events) |
157 | | - start_event = torch.cuda.Event(enable_timing=True) |
158 | | - end_event = torch.cuda.Event(enable_timing=True) |
159 | | - start_event.record() |
160 | | - |
161 | | - model_call() |
162 | | - |
163 | | - end_event.record() |
164 | | - compiler.synchronize() |
165 | | - |
166 | | - gpu_time_ms = start_event.elapsed_time(end_event) |
167 | | - e2e_times.append(duration_box.value) |
168 | | - gpu_times.append(gpu_time_ms) |
169 | | - print( |
170 | | - f"Trial {i + 1}: e2e={duration_box.value:.5f} ms, gpu={gpu_time_ms:.5f} ms", |
171 | | - file=sys.stderr, |
172 | | - flush=True, |
173 | | - ) |
174 | | - |
175 | | - stats["e2e"] = test_compiler_util.get_timing_stats(e2e_times) |
176 | | - stats["gpu"] = test_compiler_util.get_timing_stats(gpu_times) |
177 | | - |
178 | | - else: # CPU or other devices |
179 | | - e2e_times = [] |
180 | | - for i in range(args.trials): |
181 | | - duration_box = test_compiler_util.DurationBox(-1) |
182 | | - with test_compiler_util.naive_timer(duration_box, compiler.synchronize): |
183 | | - model_call() |
184 | | - print( |
185 | | - f"Trial {i + 1}: e2e={duration_box.value:.5f} ms", |
186 | | - file=sys.stderr, |
187 | | - flush=True, |
188 | | - ) |
189 | | - e2e_times.append(duration_box.value) |
190 | | - stats["e2e"] = test_compiler_util.get_timing_stats(e2e_times) |
191 | | - |
192 | | - return outs, stats |
193 | | - |
194 | | - |
195 | 133 | def eval_single_model_with_single_backend(args): |
196 | 134 | check_and_complete_args(args) |
197 | 135 | set_seed(args.seed) |
|
0 commit comments