@@ -41,6 +41,11 @@ def set_seed(random_seed):
4141 np .random .seed (random_seed )
4242
4343
44+ def init_env (args ):
45+ if test_compiler_util .is_gpu_device (args .device ):
46+ paddle .set_flags ({"FLAGS_cudnn_exhaustive_search" : 1 })
47+
48+
4449def get_hardward_name (args ):
4550 hardware = "unknown"
4651 if test_compiler_util .is_gpu_device (args .device ):
@@ -156,7 +161,10 @@ def measure_performance(model_call, args, compiler, profile=False):
156161 gpu_times = []
157162
158163 if profile :
159- paddle .base .core .nvprof_start ()
164+ import paddle .profiler as profiler
165+
166+ p = profiler .Profiler ()
167+ p .start ()
160168 for i in range (args .trials ):
161169 # End-to-end timing (naive_timer)
162170 duration_box = test_compiler_util .DurationBox (- 1 )
@@ -168,6 +176,8 @@ def measure_performance(model_call, args, compiler, profile=False):
168176 start_event .record ()
169177 model_call ()
170178 end_event .record ()
179+ if profile :
180+ p .step ()
171181
172182 gpu_time_ms = start_event .elapsed_time (end_event )
173183 e2e_times .append (duration_box .value )
@@ -178,7 +188,8 @@ def measure_performance(model_call, args, compiler, profile=False):
178188 flush = True ,
179189 )
180190 if profile :
181- paddle .base .core .nvprof_stop ()
191+ p .stop ()
192+ p .summary ()
182193
183194 stats ["e2e" ] = test_compiler_util .get_timing_stats (e2e_times )
184195 stats ["gpu" ] = test_compiler_util .get_timing_stats (gpu_times )
0 commit comments