@@ -25,7 +25,7 @@ def set_seed(random_seed):
2525
2626
2727def get_hardward_name (args ):
28- if args .device == "cuda" :
28+ if test_compiler_util . is_gpu_device ( args .device ) :
2929 hardware = paddle .device .cuda .get_device_name (0 )
3030 elif args .device == "cpu" :
3131 hardware = platform .processor ()
@@ -64,15 +64,15 @@ def get_synchronizer_func(args):
6464 return paddle .device .synchronize
6565
6666
67- def get_model (args ):
67+ def get_model (model_path ):
6868 model_class = load_class_from_file (
69- f"{ args . model_path } /model.py" , class_name = "GraphModule"
69+ f"{ model_path } /model.py" , class_name = "GraphModule"
7070 )
7171 return model_class ()
7272
7373
74- def get_input_dict (args ):
75- inputs_params = utils .load_converted_from_text (f"{ args . model_path } " )
74+ def get_input_dict (model_path ):
75+ inputs_params = utils .load_converted_from_text (f"{ model_path } " )
7676 params = inputs_params ["weight_info" ]
7777 inputs = inputs_params ["input_info" ]
7878
@@ -81,8 +81,8 @@ def get_input_dict(args):
8181 return state_dict
8282
8383
84- def get_input_spec (args ):
85- inputs_params_list = utils .load_converted_list_from_text (f"{ args . model_path } " )
84+ def get_input_spec (model_path ):
85+ inputs_params_list = utils .load_converted_list_from_text (f"{ model_path } " )
8686 input_spec = [None ] * len (inputs_params_list )
8787 for i , v in enumerate (inputs_params_list ):
8888 dtype = v ["info" ]["dtype" ]
@@ -94,7 +94,7 @@ def get_input_spec(args):
9494def get_compiled_model (args , model ):
9595 if args .compiler == "nope" :
9696 return model
97- input_spec = get_input_spec (args )
97+ input_spec = get_input_spec (args . model_path )
9898 build_strategy = paddle .static .BuildStrategy ()
9999 compiled_model = paddle .jit .to_static (
100100 model ,
@@ -110,7 +110,7 @@ def get_compiled_model(args, model):
110110def get_static_model (args , model ):
111111 static_model = paddle .jit .to_static (
112112 model ,
113- input_spec = get_input_spec (args ),
113+ input_spec = get_input_spec (args . model_path ),
114114 full_graph = True ,
115115 backend = None ,
116116 )
@@ -138,7 +138,7 @@ def measure_performance(model_call, args, synchronizer_func, profile=False):
138138 flush = True ,
139139 )
140140
141- if "cuda" in args .device :
141+ if test_compiler_util . is_gpu_device ( args .device ) :
142142 """
143143 Acknowledgement: We evaluate the performance on both end-to-end and GPU-only timings,
144144 With reference to methods only based on CUDA events from KernelBench in https://github.com/ScalingIntelligence/KernelBench
@@ -249,8 +249,8 @@ def transfer_to_float(origin_outputs):
249249
250250def test_single_model (args ):
251251 synchronizer_func = get_synchronizer_func (args )
252- input_dict = get_input_dict (args )
253- model = get_model (args )
252+ input_dict = get_input_dict (args . model_path )
253+ model = get_model (args . model_path )
254254 model .eval ()
255255
256256 test_compiler_util .print_basic_config (
@@ -259,6 +259,7 @@ def test_single_model(args):
259259
260260 # Run on eager mode
261261 eager_success = False
262+ eager_time_stats = {}
262263 try :
263264 print ("Run model in eager mode." , file = sys .stderr , flush = True )
264265 static_model = get_static_model (args , model )
@@ -275,6 +276,7 @@ def test_single_model(args):
275276
276277 # Run on compiling mode
277278 compiled_success = False
279+ compiled_time_stats = {}
278280 try :
279281 print ("Run model in compiled mode." , file = sys .stderr , flush = True )
280282 compiled_model = get_compiled_model (args , model )
@@ -293,9 +295,9 @@ def test_single_model(args):
293295 if eager_success and compiled_success :
294296 check_outputs (args , expected_out , compiled_out )
295297
296- test_compiler_util .print_times_and_speedup (
297- args , eager_time_stats , compiled_time_stats
298- )
298+ test_compiler_util .print_times_and_speedup (
299+ args , eager_time_stats , compiled_time_stats
300+ )
299301
300302
301303def get_cmp_equal (expected_out , compiled_out ):
@@ -366,20 +368,12 @@ def get_cmp_diff_count(expected_out, compiled_out, atol, rtol):
366368
367369
368370def test_multi_models (args ):
369- test_samples = None
370- if args .allow_list is not None :
371- assert os .path .isfile (args .allow_list )
372- graphnet_root = path_utils .get_graphnet_root ()
373- print (f"graphnet_root: { graphnet_root } " , file = sys .stderr , flush = True )
374- verified_samples = []
375- with open (args .verified_samples_list_path , "r" ) as f :
376- for line in f .readlines ():
377- test_samples .append (os .path .join (graphnet_root , line .strip ()))
371+ test_samples = test_compiler_util .get_allow_samples (args .allow_list )
378372
379373 sample_idx = 0
380374 failed_samples = []
381375 for model_path in path_utils .get_recursively_model_path (args .model_path ):
382- if verified_samples is None or os .path .abspath (model_path ) in verified_samples :
376+ if test_samples is None or os .path .abspath (model_path ) in test_samples :
383377 print (
384378 f"[{ sample_idx } ] test_compiler, model_path: { model_path } " ,
385379 file = sys .stderr ,
@@ -415,11 +409,24 @@ def test_multi_models(args):
415409def main (args ):
416410 assert os .path .isdir (args .model_path )
417411 assert args .compiler in {"cinn" , "nope" }
412+ assert args .device in ["cuda" , "dcu" , "cpu" ]
418413
419414 initalize_seed = 123
420415 set_seed (random_seed = initalize_seed )
421416
422417 if path_utils .is_single_model_dir (args .model_path ):
418+ if paddle .device .is_compiled_with_cuda ():
419+ device_id = int (paddle .device .get_device ().split (":" )[- 1 ])
420+ device_count = paddle .device .cuda .device_count ()
421+ gpu_util , mem_util = test_compiler_util .get_device_utilization (
422+ device_id , device_count , get_synchronizer_func (args )
423+ )
424+ if gpu_util is not None and mem_util is not None :
425+ print (
426+ f"Device status: gpu_id { device_id } , gpu_util { gpu_util :.2f} %, mem_util { mem_util :.2f} %" ,
427+ file = sys .stderr ,
428+ flush = True ,
429+ )
423430 test_single_model (args )
424431 else :
425432 test_multi_models (args )
0 commit comments