Skip to content

Commit 4f0a16c

Browse files
committed
test_compiler support dcu.
1 parent bdc8d03 commit 4f0a16c

File tree

2 files changed

+64
-40
lines changed

2 files changed

+64
-40
lines changed

graph_net/paddle/test_compiler.py

Lines changed: 21 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def set_seed(random_seed):
2525

2626

2727
def get_hardward_name(args):
28-
if args.device == "cuda":
28+
if test_compiler_util.is_gpu_device(args.device):
2929
hardware = paddle.device.cuda.get_device_name(0)
3030
elif args.device == "cpu":
3131
hardware = platform.processor()
@@ -64,15 +64,15 @@ def get_synchronizer_func(args):
6464
return paddle.device.synchronize
6565

6666

67-
def get_model(args):
67+
def get_model(model_path):
6868
model_class = load_class_from_file(
69-
f"{args.model_path}/model.py", class_name="GraphModule"
69+
f"{model_path}/model.py", class_name="GraphModule"
7070
)
7171
return model_class()
7272

7373

74-
def get_input_dict(args):
75-
inputs_params = utils.load_converted_from_text(f"{args.model_path}")
74+
def get_input_dict(model_path):
75+
inputs_params = utils.load_converted_from_text(f"{model_path}")
7676
params = inputs_params["weight_info"]
7777
inputs = inputs_params["input_info"]
7878

@@ -81,8 +81,8 @@ def get_input_dict(args):
8181
return state_dict
8282

8383

84-
def get_input_spec(args):
85-
inputs_params_list = utils.load_converted_list_from_text(f"{args.model_path}")
84+
def get_input_spec(model_path):
85+
inputs_params_list = utils.load_converted_list_from_text(f"{model_path}")
8686
input_spec = [None] * len(inputs_params_list)
8787
for i, v in enumerate(inputs_params_list):
8888
dtype = v["info"]["dtype"]
@@ -94,7 +94,7 @@ def get_input_spec(args):
9494
def get_compiled_model(args, model):
9595
if args.compiler == "nope":
9696
return model
97-
input_spec = get_input_spec(args)
97+
input_spec = get_input_spec(args.model_path)
9898
build_strategy = paddle.static.BuildStrategy()
9999
compiled_model = paddle.jit.to_static(
100100
model,
@@ -110,7 +110,7 @@ def get_compiled_model(args, model):
110110
def get_static_model(args, model):
111111
static_model = paddle.jit.to_static(
112112
model,
113-
input_spec=get_input_spec(args),
113+
input_spec=get_input_spec(args.model_path),
114114
full_graph=True,
115115
backend=None,
116116
)
@@ -138,7 +138,7 @@ def measure_performance(model_call, args, synchronizer_func, profile=False):
138138
flush=True,
139139
)
140140

141-
if "cuda" in args.device:
141+
if test_compiler_util.is_gpu_device(args.device):
142142
"""
143143
Acknowledgement: We evaluate the performance on both end-to-end and GPU-only timings,
144144
With reference to methods only based on CUDA events from KernelBench in https://github.com/ScalingIntelligence/KernelBench
@@ -249,8 +249,8 @@ def transfer_to_float(origin_outputs):
249249

250250
def test_single_model(args):
251251
synchronizer_func = get_synchronizer_func(args)
252-
input_dict = get_input_dict(args)
253-
model = get_model(args)
252+
input_dict = get_input_dict(args.model_path)
253+
model = get_model(args.model_path)
254254
model.eval()
255255

256256
test_compiler_util.print_basic_config(
@@ -259,11 +259,12 @@ def test_single_model(args):
259259

260260
# Run on eager mode
261261
eager_success = False
262+
eager_time_stats = {}
262263
try:
263264
print("Run model in eager mode.", file=sys.stderr, flush=True)
264-
# static_model = get_static_model(args, model)
265+
static_model = get_static_model(args, model)
265266
expected_out, eager_time_stats = measure_performance(
266-
lambda: model(**input_dict), args, synchronizer_func, profile=False
267+
lambda: static_model(**input_dict), args, synchronizer_func, profile=False
267268
)
268269
eager_success = True
269270
except Exception as e:
@@ -275,6 +276,7 @@ def test_single_model(args):
275276

276277
# Run on compiling mode
277278
compiled_success = False
279+
compiled_time_stats = {}
278280
try:
279281
print("Run model in compiled mode.", file=sys.stderr, flush=True)
280282
compiled_model = get_compiled_model(args, model)
@@ -293,9 +295,9 @@ def test_single_model(args):
293295
if eager_success and compiled_success:
294296
check_outputs(args, expected_out, compiled_out)
295297

296-
test_compiler_util.print_times_and_speedup(
297-
args, eager_time_stats, compiled_time_stats
298-
)
298+
test_compiler_util.print_times_and_speedup(
299+
args, eager_time_stats, compiled_time_stats
300+
)
299301

300302

301303
def get_cmp_equal(expected_out, compiled_out):
@@ -366,15 +368,7 @@ def get_cmp_diff_count(expected_out, compiled_out, atol, rtol):
366368

367369

368370
def test_multi_models(args):
369-
test_samples = None
370-
if args.allow_list is not None:
371-
assert os.path.isfile(args.allow_list)
372-
graphnet_root = path_utils.get_graphnet_root()
373-
print(f"graphnet_root: {graphnet_root}", file=sys.stderr, flush=True)
374-
test_samples = []
375-
with open(args.allow_list, "r") as f:
376-
for line in f.readlines():
377-
test_samples.append(os.path.join(graphnet_root, line.strip()))
371+
test_samples = test_compiler_util.get_allow_samples(args.allow_list)
378372

379373
sample_idx = 0
380374
failed_samples = []
@@ -415,6 +409,7 @@ def test_multi_models(args):
415409
def main(args):
416410
assert os.path.isdir(args.model_path)
417411
assert args.compiler in {"cinn", "nope"}
412+
assert args.device in ["cuda", "dcu", "cpu"]
418413

419414
initalize_seed = 123
420415
set_seed(random_seed=initalize_seed)

graph_net/test_compiler_util.py

Lines changed: 43 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
from dataclasses import dataclass
1010
from contextlib import contextmanager
1111

12+
from graph_net import path_utils
13+
1214

1315
@dataclass
1416
class DurationBox:
@@ -25,6 +27,10 @@ def naive_timer(duration_box, synchronizer_func):
2527
duration_box.value = (end - start) * 1000 # Store in milliseconds
2628

2729

30+
def is_gpu_device(device):
31+
return "cuda" in device or "dcu" in device
32+
33+
2834
def get_device_utilization(device_id, device_count, synchronizer_func):
2935
current_pid = os.getpid()
3036

@@ -98,6 +104,7 @@ def get_device_utilization(device_id, device_count, synchronizer_func):
98104
gpu_uuid, pid, used_memory = line.split(", ")
99105
if gpu_uuid == selected_gpu_uuid and int(pid) != current_pid:
100106
other_tasks.append(line)
107+
# Note: in docker container, the current_pid maybe different from that captured by nvidia-smi.
101108
print(
102109
f"Note: There are {len(other_tasks)} tasks running on GPU {selected_gpu_id} (current_pid:{current_pid}).",
103110
file=sys.stderr,
@@ -169,24 +176,33 @@ def print_basic_config(args, hardware_name, compile_framework_version):
169176
)
170177

171178

172-
def print_running_status(args, eager_success, compiled_success):
179+
def print_running_status(args, eager_success, compiled_success=None):
173180
def convert_to_str(b):
174181
return "success" if b else "failed"
175182

176-
print_with_log_prompt(
177-
"[Result][status]",
178-
f"eager:{convert_to_str(eager_success)} compiled:{convert_to_str(compiled_success)}",
179-
args.log_prompt,
180-
)
183+
if compiled_success is not None:
184+
print_with_log_prompt(
185+
"[Result][status]",
186+
f"eager:{convert_to_str(eager_success)} compiled:{convert_to_str(compiled_success)}",
187+
args.log_prompt,
188+
)
189+
else:
190+
print_with_log_prompt(
191+
"[Result][status]",
192+
f"eager:{convert_to_str(eager_success)}",
193+
args.log_prompt,
194+
)
181195

182196

183197
def print_times_and_speedup(args, eager_stats, compiled_stats):
184-
print_with_log_prompt(
185-
"[Performance][eager]:", json.dumps(eager_stats), args.log_prompt
186-
)
187-
print_with_log_prompt(
188-
"[Performance][compiled]:", json.dumps(compiled_stats), args.log_prompt
189-
)
198+
if not eager_stats:
199+
print_with_log_prompt(
200+
"[Performance][eager]:", json.dumps(eager_stats), args.log_prompt
201+
)
202+
if not compiled_stats:
203+
print_with_log_prompt(
204+
"[Performance][compiled]:", json.dumps(compiled_stats), args.log_prompt
205+
)
190206

191207
e2e_speedup = 0
192208
gpu_speedup = 0
@@ -197,7 +213,7 @@ def print_times_and_speedup(args, eager_stats, compiled_stats):
197213
if eager_e2e_time_ms > 0 and compiled_e2e_time_ms > 0:
198214
e2e_speedup = eager_e2e_time_ms / compiled_e2e_time_ms
199215

200-
if "cuda" in args.device:
216+
if is_gpu_device(args.device):
201217
eager_gpu_time_ms = eager_stats.get("gpu", {}).get("mean", 0)
202218
compiled_gpu_time_ms = compiled_stats.get("gpu", {}).get("mean", 0)
203219

@@ -207,7 +223,7 @@ def print_times_and_speedup(args, eager_stats, compiled_stats):
207223
if e2e_speedup > 0:
208224
print_with_log_prompt("[Speedup][e2e]:", f"{e2e_speedup:.5f}", args.log_prompt)
209225

210-
if "cuda" in args.device and gpu_speedup > 0:
226+
if is_gpu_device(args.device) and gpu_speedup > 0:
211227
print_with_log_prompt("[Speedup][gpu]:", f"{gpu_speedup:.5f}", args.log_prompt)
212228

213229

@@ -318,3 +334,16 @@ def check_allclose(
318334
compiled_out=compiled_out,
319335
**kwargs,
320336
)
337+
338+
339+
def get_allow_samples(allow_list):
340+
if allow_list is None:
341+
return None
342+
343+
assert os.path.isfile(allow_list), f"{allow_list} is not a regular file."
344+
graphnet_root = path_utils.get_graphnet_root()
345+
print(f"graphnet_root: {graphnet_root}", file=sys.stderr, flush=True)
346+
test_samples = []
347+
with open(allow_list, "r") as f:
348+
for line in f.readlines():
349+
test_samples.append(os.path.join(graphnet_root, line.strip()))

0 commit comments

Comments
 (0)