Skip to content

Commit 939a587

Browse files
committed
Optimize the profiling code and support the automatic adjustment of trials.
1 parent 84d28b8 commit 939a587

File tree

1 file changed

+30
-16
lines changed

1 file changed

+30
-16
lines changed

graph_net/paddle/test_compiler.py

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -140,17 +140,31 @@ def measure_performance(model_call, args, compiler, profile=False):
140140
outs = model_call()
141141

142142
# Warmup runs
143+
warmup_e2e_times = []
143144
for _ in range(args.warmup):
144-
model_call()
145+
duration_box = test_compiler_util.DurationBox(-1)
146+
with test_compiler_util.naive_timer(duration_box, compiler.synchronize):
147+
model_call()
148+
warmup_e2e_times.append(duration_box.value)
145149
compiler.synchronize()
146150

151+
# Ensure the measuring time is not less than 100ms.
152+
min_trials = int(100 / np.mean(warmup_e2e_times[1:]))
153+
trials = max(args.trials, min_trials)
154+
147155
hardware_name = get_hardward_name(args)
148156
print(
149-
f"[Profiling] Using device: {args.device} {hardware_name}, warm up {args.warmup}, trials {args.trials}",
157+
f"[Profiling] Using device: {args.device} {hardware_name}, warm up {args.warmup}, trials {trials}",
150158
file=sys.stderr,
151159
flush=True,
152160
)
153161

162+
if profile:
163+
import paddle.profiler as profiler
164+
165+
p = profiler.Profiler()
166+
p.start()
167+
154168
if test_compiler_util.is_gpu_device(args.device):
155169
"""
156170
Acknowledgement: We evaluate the performance on both end-to-end and GPU-only timings,
@@ -160,12 +174,7 @@ def measure_performance(model_call, args, compiler, profile=False):
160174
e2e_times = []
161175
gpu_times = []
162176

163-
if profile:
164-
import paddle.profiler as profiler
165-
166-
p = profiler.Profiler()
167-
p.start()
168-
for i in range(args.trials):
177+
for i in range(trials):
169178
# End-to-end timing (naive_timer)
170179
duration_box = test_compiler_util.DurationBox(-1)
171180
with test_compiler_util.naive_timer(duration_box, compiler.synchronize):
@@ -176,8 +185,9 @@ def measure_performance(model_call, args, compiler, profile=False):
176185
start_event.record()
177186
model_call()
178187
end_event.record()
179-
if profile:
180-
p.step()
188+
189+
if profile:
190+
p.step()
181191

182192
gpu_time_ms = start_event.elapsed_time(end_event)
183193
e2e_times.append(duration_box.value)
@@ -187,26 +197,30 @@ def measure_performance(model_call, args, compiler, profile=False):
187197
file=sys.stderr,
188198
flush=True,
189199
)
190-
if profile:
191-
p.stop()
192-
p.summary()
193-
194200
stats["e2e"] = test_compiler_util.get_timing_stats(e2e_times)
195201
stats["gpu"] = test_compiler_util.get_timing_stats(gpu_times)
196202
else: # CPU or other devices
197203
e2e_times = []
198-
for i in range(args.trials):
204+
for i in range(trials):
199205
duration_box = test_compiler_util.DurationBox(-1)
200206
with test_compiler_util.naive_timer(duration_box, compiler.synchronize):
201207
model_call()
208+
209+
if profile:
210+
p.step()
211+
212+
e2e_times.append(duration_box.value)
202213
print(
203214
f"Trial {i + 1}: e2e={duration_box.value:.4f} ms",
204215
file=sys.stderr,
205216
flush=True,
206217
)
207-
e2e_times.append(duration_box.value)
208218
stats["e2e"] = test_compiler_util.get_timing_stats(e2e_times)
209219

220+
if profile:
221+
p.stop()
222+
p.summary()
223+
210224
return outs, stats
211225

212226

0 commit comments

Comments
 (0)