99import time
1010import math
1111import numpy as np
12+ import random
1213import platform
1314import traceback
1415import subprocess
@@ -34,32 +35,46 @@ def get_compiler_backend(args) -> GraphCompilerBackend:
3435 return registry_backend [args .compiler ]
3536
3637
38+ def set_seed (random_seed ):
39+ paddle .seed (random_seed )
40+ random .seed (random_seed )
41+ np .random .seed (random_seed )
42+
43+
3744def init_env (args ):
3845 if test_compiler_util .is_gpu_device (args .device ):
3946 paddle .set_flags ({"FLAGS_cudnn_exhaustive_search" : 1 })
4047
4148
49+ def get_hardward_name (args ):
50+ hardware = "unknown"
51+ if test_compiler_util .is_gpu_device (args .device ):
52+ hardware = paddle .device .cuda .get_device_name (0 )
53+ elif args .device == "xpu" :
54+ try :
55+ output = subprocess .check_output (["xpu-smi" , "-L" ], text = True )
56+ hardware = next (
57+ match .group (2 )
58+ for line in output .splitlines ()
59+ if (
60+ match := re .match (
61+ r"XPU\s+(\d+):\s+(.+?)\s+\(UUID:\s*([^)]+)\)" , line
62+ )
63+ )
64+ )
65+ except Exception as e :
66+ pass
67+ elif args .device == "cpu" :
68+ hardware = platform .processor ()
69+ return hardware
70+
71+
4272def get_compile_framework_version (args ):
4373 if args .compiler in ["cinn" , "nope" ]:
4474 return paddle .__version__
4575 return "unknown"
4676
4777
48- def check_and_print_gpu_utilization (compiler ):
49- if paddle .device .is_compiled_with_cuda ():
50- device_id = int (paddle .device .get_device ().split (":" )[- 1 ])
51- device_count = paddle .device .cuda .device_count ()
52- gpu_util , mem_util = test_compiler_util .get_device_utilization (
53- device_id , device_count , compiler .synchronize
54- )
55- if gpu_util is not None and mem_util is not None :
56- print (
57- f"Device status: gpu_id { device_id } , gpu_util { gpu_util :.2f} %, mem_util { mem_util :.2f} %" ,
58- file = sys .stderr ,
59- flush = True ,
60- )
61-
62-
6378def load_class_from_file (file_path : str , class_name : str ):
6479 file = Path (file_path ).resolve ()
6580 module_name = file .stem
@@ -85,10 +100,30 @@ def get_model(model_path):
85100 return model_class ()
86101
87102
103+ def get_input_dict (model_path ):
104+ inputs_params = utils .load_converted_from_text (f"{ model_path } " )
105+ params = inputs_params ["weight_info" ]
106+ inputs = inputs_params ["input_info" ]
107+
108+ params .update (inputs )
109+ state_dict = {k : utils .replay_tensor (v ) for k , v in params .items ()}
110+ return state_dict
111+
112+
113+ def get_input_spec (model_path ):
114+ inputs_params_list = utils .load_converted_list_from_text (f"{ model_path } " )
115+ input_spec = [None ] * len (inputs_params_list )
116+ for i , v in enumerate (inputs_params_list ):
117+ dtype = v ["info" ]["dtype" ]
118+ shape = v ["info" ]["shape" ]
119+ input_spec [i ] = paddle .static .InputSpec (shape , dtype )
120+ return input_spec
121+
122+
88123def get_static_model (args , model ):
89124 static_model = paddle .jit .to_static (
90125 model ,
91- input_spec = utils . get_input_spec (args .model_path ),
126+ input_spec = get_input_spec (args .model_path ),
92127 full_graph = True ,
93128 backend = None ,
94129 )
@@ -117,7 +152,7 @@ def measure_performance(model_call, args, compiler, profile=False):
117152 min_trials = int (100 / np .mean (warmup_e2e_times [1 :]))
118153 trials = max (args .trials , min_trials )
119154
120- hardware_name = test_compiler_util . get_hardward_name (args )
155+ hardware_name = get_hardward_name (args )
121156 print (
122157 f"[Profiling] Using device: { args .device } { hardware_name } , warm up { args .warmup } , trials { trials } " ,
123158 file = sys .stderr ,
@@ -240,7 +275,7 @@ def transfer_to_float(origin_outputs):
240275 args ,
241276 expected_out ,
242277 compiled_out ,
243- cmp_equal_func = utils . get_cmp_equal ,
278+ cmp_equal_func = get_cmp_equal ,
244279 )
245280
246281 expected_out_fp32 = transfer_to_float (expected_out )
@@ -249,26 +284,39 @@ def transfer_to_float(origin_outputs):
249284 args ,
250285 expected_out_fp32 ,
251286 compiled_out_fp32 ,
252- cmp_all_close_func = utils .get_cmp_all_close ,
253- cmp_max_diff_func = utils .get_cmp_max_diff ,
254- cmp_mean_diff_func = utils .get_cmp_mean_diff ,
255- cmp_max_relative_diff_func = utils .get_cmp_max_relative_diff ,
256- cmp_mean_relative_diff_func = utils .get_cmp_mean_relative_diff ,
287+ cmp_all_close_func = get_cmp_all_close ,
288+ cmp_max_diff_func = get_cmp_max_diff ,
289+ cmp_mean_diff_func = get_cmp_mean_diff ,
290+ cmp_max_relative_diff_func = get_cmp_max_relative_diff ,
291+ cmp_mean_relative_diff_func = get_cmp_mean_relative_diff ,
292+ )
293+
294+
295+ def check_and_print_gpu_utilization (compiler ):
296+ if paddle .device .is_compiled_with_cuda ():
297+ device_id = int (paddle .device .get_device ().split (":" )[- 1 ])
298+ device_count = paddle .device .cuda .device_count ()
299+ gpu_util , mem_util = test_compiler_util .get_device_utilization (
300+ device_id , device_count , compiler .synchronize
257301 )
302+ if gpu_util is not None and mem_util is not None :
303+ print (
304+ f"Device status: gpu_id { device_id } , gpu_util { gpu_util :.2f} %, mem_util { mem_util :.2f} %" ,
305+ file = sys .stderr ,
306+ flush = True ,
307+ )
258308
259309
260310def test_single_model (args ):
261311 compiler = get_compiler_backend (args )
262312 check_and_print_gpu_utilization (compiler )
263313
264- input_dict = utils . get_input_dict (args .model_path )
314+ input_dict = get_input_dict (args .model_path )
265315 model = get_model (args .model_path )
266316 model .eval ()
267317
268- hardware_name = test_compiler_util .get_hardward_name (args )
269-
270318 test_compiler_util .print_basic_config (
271- args , hardware_name , get_compile_framework_version (args )
319+ args , get_hardward_name ( args ) , get_compile_framework_version (args )
272320 )
273321
274322 # Run on eager mode
@@ -293,7 +341,7 @@ def test_single_model(args):
293341 compiled_time_stats = {}
294342 try :
295343 print ("Run model in compiled mode." , file = sys .stderr , flush = True )
296- input_spec = utils . get_input_spec (args .model_path )
344+ input_spec = get_input_spec (args .model_path )
297345 compiled_model = compiler (model , input_spec )
298346 compiled_out , compiled_time_stats = measure_performance (
299347 lambda : compiled_model (** input_dict ), args , compiler , profile = False
@@ -315,18 +363,125 @@ def test_single_model(args):
315363 )
316364
317365
366+ def get_cmp_equal (expected_out , compiled_out ):
367+ def convert (x ):
368+ if x .dtype in [paddle .float16 , paddle .bfloat16 ]:
369+ return x .astype ("float32" )
370+ elif x .dtype in [paddle .uint8 , paddle .int8 , paddle .int16 ]:
371+ return x .astype ("int32" )
372+ return x
373+
374+ return " " .join (
375+ str (int (paddle .equal_all (convert (a ), convert (b ))))
376+ for a , b in zip (expected_out , compiled_out )
377+ )
378+
379+
380+ def get_cmp_all_close (expected_out , compiled_out , atol , rtol ):
381+ return " " .join (
382+ str (int (paddle .allclose (a , b , atol = atol , rtol = rtol )))
383+ for a , b in zip (expected_out , compiled_out )
384+ )
385+
386+
387+ def get_format_str (f ):
388+ if (abs (f ) > 1e5 or abs (f ) < 1e-5 ) and abs (f ) != 0.0 :
389+ return str (f"{ f :.5E} " )
390+ else :
391+ return str (f"{ f :.5f} " )
392+
393+
394+ def get_cmp_max_diff (expected_out , compiled_out ):
395+ return " " .join (
396+ get_format_str (paddle .max (paddle .abs (a - b )).item ())
397+ for a , b in zip (expected_out , compiled_out )
398+ )
399+
400+
401+ def get_cmp_mean_diff (expected_out , compiled_out ):
402+ return " " .join (
403+ get_format_str (paddle .mean (paddle .abs (a - b )).item ())
404+ for a , b in zip (expected_out , compiled_out )
405+ )
406+
407+
408+ def get_cmp_max_relative_diff (expected_out , compiled_out ):
409+ epsilon = 1e-8
410+ return " " .join (
411+ get_format_str (paddle .max (paddle .abs (a - b ) / (paddle .abs (a ) + epsilon )).item ())
412+ for a , b in zip (expected_out , compiled_out )
413+ )
414+
415+
416+ def get_cmp_mean_relative_diff (expected_out , compiled_out ):
417+ epsilon = 1e-8
418+ return " " .join (
419+ get_format_str (
420+ paddle .mean (paddle .abs (a - b ) / (paddle .abs (a ) + epsilon )).item ()
421+ )
422+ for a , b in zip (expected_out , compiled_out )
423+ )
424+
425+
426+ def get_cmp_diff_count (expected_out , compiled_out , atol , rtol ):
427+ return " " .join (
428+ str (paddle .sum (~ paddle .isclose (a , b , atol = atol , rtol = rtol )).item ())
429+ for a , b in zip (expected_out , compiled_out )
430+ )
431+
432+
433+ def test_multi_models (args ):
434+ test_samples = test_compiler_util .get_allow_samples (args .allow_list )
435+
436+ sample_idx = 0
437+ failed_samples = []
438+ module_name = os .path .splitext (os .path .basename (__file__ ))[0 ]
439+ for model_path in path_utils .get_recursively_model_path (args .model_path ):
440+ if test_samples is None or os .path .abspath (model_path ) in test_samples :
441+ print (
442+ f"[{ sample_idx } ] { module_name } , model_path: { model_path } " ,
443+ file = sys .stderr ,
444+ flush = True ,
445+ )
446+ cmd = " " .join (
447+ [
448+ sys .executable ,
449+ f"-m graph_net.paddle.{ module_name } " ,
450+ f"--model-path { model_path } " ,
451+ f"--compiler { args .compiler } " ,
452+ f"--device { args .device } " ,
453+ f"--warmup { args .warmup } " ,
454+ f"--trials { args .trials } " ,
455+ f"--log-prompt { args .log_prompt } " ,
456+ ]
457+ )
458+ cmd_ret = os .system (cmd )
459+ # assert cmd_ret == 0, f"{cmd_ret=}, {cmd=}"
460+ if cmd_ret != 0 :
461+ failed_samples .append (model_path )
462+ sample_idx += 1
463+
464+ print (
465+ f"Totally { sample_idx } verified samples, failed { len (failed_samples )} samples." ,
466+ file = sys .stderr ,
467+ flush = True ,
468+ )
469+ for model_path in failed_samples :
470+ print (f"- { model_path } " , file = sys .stderr , flush = True )
471+
472+
318473def main (args ):
319474 assert os .path .isdir (args .model_path )
320475 assert args .compiler in {"cinn" , "nope" }
321476 assert args .device in ["cuda" , "dcu" , "xpu" , "cpu" ]
322477
323478 initalize_seed = 123
324- test_compiler_util . set_seed (random_seed = initalize_seed )
479+ set_seed (random_seed = initalize_seed )
325480
326481 if path_utils .is_single_model_dir (args .model_path ):
327482 test_single_model (args )
328483 else :
329- test_compiler_util . test_multi_models (args , "paddle" )
484+ test_multi_models (args )
330485
331486
332487if __name__ == "__main__" :
0 commit comments