1010import numpy as np
1111import random
1212import platform
13+ import traceback
1314
1415from graph_net .paddle import utils
1516from graph_net .benchmark_result import BenchmarkResult
@@ -89,13 +90,6 @@ def get_compiled_model(args, model):
8990 return compiled_model
9091
9192
92- def regular_item (item ):
93- assert isinstance (item , paddle .Tensor )
94- if item .dtype not in [paddle .float32 , paddle .float64 ]:
95- item = item .astype ("float32" )
96- return item
97-
98-
9993def count_number_of_ops (args , model , eager_mode ):
10094 if eager_mode :
10195 static_model = paddle .jit .to_static (
@@ -227,70 +221,64 @@ def init_benchmark_result(args):
227221 return result_data
228222
229223
230- def test_single_model (args ):
231- synchronizer_func = get_synchronizer_func (args )
232- input_dict , input_dtypes , param_dtypes = get_input_dict (args )
233- model = get_model (args )
234- model .eval ()
235-
236- # Collect model information
237- num_eager_ops = count_number_of_ops (args , model , eager_mode = True )
238-
239- # Initialize benchmark result
240- result_data = init_benchmark_result (args )
241- result_data .update_model_info (num_eager_ops , input_dtypes , param_dtypes )
242-
243- # Run on eager mode
244- expected_out , eager_time_stats = measure_performance (
245- lambda : model (** input_dict ), args , synchronizer_func
246- )
247-
248- # Run on compiling mode
249- compiled_model = get_compiled_model (args , model )
250- compiled_out , compiled_time_stats = measure_performance (
251- lambda : compiled_model (** input_dict ), args , synchronizer_func
252- )
253-
224+ def check_outputs (args , expected_out , compiled_out , result_data ):
254225 if isinstance (expected_out , paddle .Tensor ):
255226 expected_out = [expected_out ]
227+ if isinstance (compiled_out , paddle .Tensor ):
256228 compiled_out = [compiled_out ]
257- if isinstance (expected_out , list ) or isinstance (expected_out , tuple ):
258- output_dtypes = []
259- for a , b in zip (expected_out , compiled_out ):
260- if (a is None and b is not None ) or (a is not None and b is None ):
261- raise ValueError ("Both expected_out and compiled_out must be not None." )
262- if a is not None and b is not None :
263- assert (
264- a .dtype == b .dtype
265- ), f"expected_out's dtype ({ a .dtype } ) is not the same as compiled_out's dtype { b .dtype } ."
266- output_dtypes .append (str (a .dtype ))
267- result_data .update_corrrectness ("num_outpus" , len (output_dtypes ))
268- result_data .update_corrrectness ("output_dtyps" , output_dtypes )
269-
270- # Remove all None in outputs
271- expected_out = [x for x in expected_out if x is not None ]
272- compiled_out = [x for x in compiled_out if x is not None ]
273- expected_out = [
274- regular_item (item )
275- for item in expected_out
276- if item is not None and np .array (item ).size != 0
277- ]
278- compiled_out = [
279- regular_item (item )
280- for item in compiled_out
281- if item is not None and np .array (item ).size != 0
282- ]
283- else :
284- raise ValueError ("Illegal return value." )
229+
230+ eager_output_dtypes = [None ] * len (expected_out )
231+ for i , tensor in enumerate (expected_out ):
232+ if tensor is not None :
233+ eager_output_dtypes [i ] = str (tensor .dtype )
234+ result_data .update_corrrectness ("num_eager_outputs" , len (expected_out ))
235+ result_data .update_corrrectness ("eager_output_dtypes" , eager_output_dtypes )
236+
237+ compiled_output_dtypes = [None ] * len (compiled_out )
238+ for i , tensor in enumerate (compiled_out ):
239+ if tensor is not None :
240+ compiled_output_dtypes [i ] = str (tensor .dtype )
241+ result_data .update_corrrectness ("num_compiled_outputs" , len (compiled_out ))
242+ result_data .update_corrrectness ("compiled_output_dtypes" , compiled_output_dtypes )
243+
244+ is_output_consistent = len (expected_out ) == len (compiled_out )
245+ for a , b in zip (expected_out , compiled_out ):
246+ if (a is None and b is not None ) or (a is not None and b is None ):
247+ is_output_consistent = False
248+ if a is not None and b is not None and a .dtype != b .dtype :
249+ is_output_consistent = False
250+ result_data .update_corrrectness ("output_consistent" , is_output_consistent )
251+
252+ def regular_outputs (origin_outputs ):
253+ outputs = []
254+ for item in origin_outputs :
255+ if (
256+ item is not None
257+ and isinstance (item , paddle .Tensor )
258+ and item .dtype not in [paddle .float32 , paddle .float64 ]
259+ ):
260+ item = item .astype ("float32" )
261+ outputs .append (item )
262+ return outputs
263+
264+ expected_out = regular_outputs (expected_out )
265+ compiled_out = regular_outputs (compiled_out )
285266
286267 def print_cmp (key , func , ** kwargs ):
287- cmp_ret = func (expected_out , compiled_out , ** kwargs )
268+ try :
269+ cmp_ret = func (expected_out , compiled_out , ** kwargs )
270+ except Exception as e :
271+ cmp_ret = f"{ key } failed: { str (e )} \n { traceback .format_exc ()} "
288272 result_data .update_corrrectness (key , cmp_ret )
289273 print (
290274 f"{ args .log_prompt } { key } model_path:{ args .model_path } { cmp_ret } " ,
291275 file = sys .stderr ,
292276 )
293277
278+ print (
279+ f"{ args .log_prompt } output_dtypes model_path:{ args .model_path } eager:{ eager_output_dtypes } compiled:{ compiled_output_dtypes } " ,
280+ file = sys .stderr ,
281+ )
294282 print_cmp ("cmp.equal" , get_cmp_equal )
295283 print_cmp ("cmp.all_close_atol8_rtol8" , get_cmp_all_close , atol = 1e-8 , rtol = 1e-8 )
296284 print_cmp ("cmp.all_close_atol8_rtol5" , get_cmp_all_close , atol = 1e-8 , rtol = 1e-5 )
@@ -305,26 +293,65 @@ def print_cmp(key, func, **kwargs):
305293 print_cmp ("cmp.diff_count_atol3_rtol2" , get_cmp_diff_count , atol = 1e-3 , rtol = 1e-2 )
306294 print_cmp ("cmp.diff_count_atol2_rtol1" , get_cmp_diff_count , atol = 1e-2 , rtol = 1e-1 )
307295
296+
297+ def test_single_model (args ):
298+ synchronizer_func = get_synchronizer_func (args )
299+ input_dict , input_dtypes , param_dtypes = get_input_dict (args )
300+ model = get_model (args )
301+ model .eval ()
302+
303+ # Collect model information
304+ num_eager_ops = count_number_of_ops (args , model , eager_mode = True )
305+
306+ # Initialize benchmark result
307+ result_data = init_benchmark_result (args )
308+ result_data .update_model_info (num_eager_ops , input_dtypes , param_dtypes )
309+
310+ # Run on eager mode
311+ running_eager_success = False
312+ try :
313+ print ("Run model in eager mode." )
314+ expected_out , eager_time_stats = measure_performance (
315+ lambda : model (** input_dict ), args , synchronizer_func
316+ )
317+ running_eager_success = True
318+ except Exception as e :
319+ print (f"Run model in eager mode failed: { str (e )} \n { traceback .format_exc ()} " )
320+
321+ # Run on compiling mode
322+ running_compiled_success = False
323+ try :
324+ print ("Run model in compiled mode." )
325+ compiled_model = get_compiled_model (args , model )
326+ compiled_out , compiled_time_stats = measure_performance (
327+ lambda : compiled_model (** input_dict ), args , synchronizer_func
328+ )
329+ running_compiled_success = True
330+ except Exception as e :
331+ print (f"Run model in compiled mode failed: { str (e )} \n { traceback .format_exc ()} " )
332+
308333 print (
309334 f"{ args .log_prompt } information model_path:{ args .model_path } { num_eager_ops } ops, param_dtypes:{ param_dtypes } , input_dtypes:{ input_dtypes } " ,
310335 file = sys .stderr ,
311336 )
337+ if running_eager_success and running_compiled_success :
338+ check_outputs (args , expected_out , compiled_out , result_data )
312339
313- result_data .update_performance (eager_time_stats , compiled_time_stats )
314- duration_log = (
315- f"{ args .log_prompt } [Duration] "
316- f"eager_e2e:{ result_data .eager_e2e_time_ms :.4f} ms compiled_e2e:{ result_data .compiled_e2e_time_ms :.4f} ms"
317- )
318- speedup_log = (
319- f"{ args .log_prompt } [Speedup] " f"e2e_speedup:{ result_data .e2e_speedup :.4f} "
320- )
340+ result_data .update_performance (eager_time_stats , compiled_time_stats )
341+ duration_log = (
342+ f"{ args .log_prompt } [Duration] "
343+ f"eager_e2e:{ result_data .eager_e2e_time_ms :.4f} ms compiled_e2e:{ result_data .compiled_e2e_time_ms :.4f} ms"
344+ )
345+ speedup_log = (
346+ f"{ args .log_prompt } [Speedup] " f"e2e_speedup:{ result_data .e2e_speedup :.4f} "
347+ )
321348
322- if "cuda" in args .device :
323- duration_log += f" eager_gpu:{ result_data .eager_gpu_time_ms :.4f} ms compiled_gpu:{ result_data .compiled_gpu_time_ms :.4f} ms"
324- speedup_log += f" gpu_speedup:{ result_data .gpu_speedup :.4f} "
349+ if "cuda" in args .device :
350+ duration_log += f" eager_gpu:{ result_data .eager_gpu_time_ms :.4f} ms compiled_gpu:{ result_data .compiled_gpu_time_ms :.4f} ms"
351+ speedup_log += f" gpu_speedup:{ result_data .gpu_speedup :.4f} "
325352
326- print (duration_log , file = sys .stderr )
327- print (speedup_log , file = sys .stderr )
353+ print (duration_log , file = sys .stderr )
354+ print (speedup_log , file = sys .stderr )
328355
329356 if args .output_dir :
330357 result_data .write_to_json (args .output_dir )
0 commit comments