@@ -25,7 +25,7 @@ def set_seed(random_seed):
2525
2626
2727def get_hardward_name(args):
28- if args.device == "cuda" :
28+ if test_compiler_util.is_gpu_device( args.device) :
2929 hardware = paddle.device.cuda.get_device_name(0)
3030 elif args.device == "cpu":
3131 hardware = platform.processor()
@@ -64,15 +64,15 @@ def get_synchronizer_func(args):
6464 return paddle.device.synchronize
6565
6666
67- def get_model(args ):
67+ def get_model(model_path ):
6868 model_class = load_class_from_file(
69- f"{args. model_path}/model.py", class_name="GraphModule"
69+ f"{model_path}/model.py", class_name="GraphModule"
7070 )
7171 return model_class()
7272
7373
74- def get_input_dict(args ):
75- inputs_params = utils.load_converted_from_text(f"{args. model_path}")
74+ def get_input_dict(model_path ):
75+ inputs_params = utils.load_converted_from_text(f"{model_path}")
7676 params = inputs_params["weight_info"]
7777 inputs = inputs_params["input_info"]
7878
@@ -81,8 +81,8 @@ def get_input_dict(args):
8181 return state_dict
8282
8383
84- def get_input_spec(args ):
85- inputs_params_list = utils.load_converted_list_from_text(f"{args. model_path}")
84+ def get_input_spec(model_path ):
85+ inputs_params_list = utils.load_converted_list_from_text(f"{model_path}")
8686 input_spec = [None] * len(inputs_params_list)
8787 for i, v in enumerate(inputs_params_list):
8888 dtype = v["info"]["dtype"]
@@ -94,7 +94,7 @@ def get_input_spec(args):
9494def get_compiled_model(args, model):
9595 if args.compiler == "nope":
9696 return model
97- input_spec = get_input_spec(args)
97+ input_spec = get_input_spec(args.model_path )
9898 build_strategy = paddle.static.BuildStrategy()
9999 compiled_model = paddle.jit.to_static(
100100 model,
@@ -110,7 +110,7 @@ def get_compiled_model(args, model):
110110def get_static_model(args, model):
111111 static_model = paddle.jit.to_static(
112112 model,
113- input_spec=get_input_spec(args),
113+ input_spec=get_input_spec(args.model_path ),
114114 full_graph=True,
115115 backend=None,
116116 )
@@ -138,7 +138,7 @@ def measure_performance(model_call, args, synchronizer_func, profile=False):
138138 flush=True,
139139 )
140140
141- if "cuda" in args.device:
141+ if test_compiler_util.is_gpu_device( args.device) :
142142 """
143143 Acknowledgement: We evaluate the performance on both end-to-end and GPU-only timings,
144144 With reference to methods only based on CUDA events from KernelBench in https://github.com/ScalingIntelligence/KernelBench
@@ -249,8 +249,8 @@ def transfer_to_float(origin_outputs):
249249
250250def test_single_model(args):
251251 synchronizer_func = get_synchronizer_func(args)
252- input_dict = get_input_dict(args)
253- model = get_model(args)
252+ input_dict = get_input_dict(args.model_path )
253+ model = get_model(args.model_path )
254254 model.eval()
255255
256256 test_compiler_util.print_basic_config(
@@ -259,11 +259,12 @@ def test_single_model(args):
259259
260260 # Run on eager mode
261261 eager_success = False
262+ eager_time_stats = {}
262263 try:
263264 print("Run model in eager mode.", file=sys.stderr, flush=True)
264- # static_model = get_static_model(args, model)
265+ static_model = get_static_model(args, model)
265266 expected_out, eager_time_stats = measure_performance(
266- lambda: model (**input_dict), args, synchronizer_func, profile=False
267+ lambda: static_model (**input_dict), args, synchronizer_func, profile=False
267268 )
268269 eager_success = True
269270 except Exception as e:
@@ -275,6 +276,7 @@ def test_single_model(args):
275276
276277 # Run on compiling mode
277278 compiled_success = False
279+ compiled_time_stats = {}
278280 try:
279281 print("Run model in compiled mode.", file=sys.stderr, flush=True)
280282 compiled_model = get_compiled_model(args, model)
@@ -293,9 +295,9 @@ def test_single_model(args):
293295 if eager_success and compiled_success:
294296 check_outputs(args, expected_out, compiled_out)
295297
296- test_compiler_util.print_times_and_speedup(
297- args, eager_time_stats, compiled_time_stats
298- )
298+ test_compiler_util.print_times_and_speedup(
299+ args, eager_time_stats, compiled_time_stats
300+ )
299301
300302
301303def get_cmp_equal(expected_out, compiled_out):
@@ -366,15 +368,7 @@ def get_cmp_diff_count(expected_out, compiled_out, atol, rtol):
366368
367369
368370def test_multi_models(args):
369- test_samples = None
370- if args.allow_list is not None:
371- assert os.path.isfile(args.allow_list)
372- graphnet_root = path_utils.get_graphnet_root()
373- print(f"graphnet_root: {graphnet_root}", file=sys.stderr, flush=True)
374- test_samples = []
375- with open(args.allow_list, "r") as f:
376- for line in f.readlines():
377- test_samples.append(os.path.join(graphnet_root, line.strip()))
371+ test_samples = test_compiler_util.get_allow_samples(args.allow_list)
378372
379373 sample_idx = 0
380374 failed_samples = []
@@ -415,6 +409,7 @@ def test_multi_models(args):
415409def main(args):
416410 assert os.path.isdir(args.model_path)
417411 assert args.compiler in {"cinn", "nope"}
412+ assert args.device in ["cuda", "dcu", "cpu"]
418413
419414 initalize_seed = 123
420415 set_seed(random_seed=initalize_seed)
0 commit comments