Skip to content

Commit dcd63a7

Browse files
committed
test_compiler support dcu.
1 parent bdc8d03 commit dcd63a7

File tree

2 files changed

+35
-19
lines changed

2 files changed

+35
-19
lines changed

graph_net/paddle/test_compiler.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def set_seed(random_seed):
2525

2626

2727
def get_hardward_name(args):
28-
if args.device == "cuda":
28+
if test_compiler_util.is_gpu_device(args.device):
2929
hardware = paddle.device.cuda.get_device_name(0)
3030
elif args.device == "cpu":
3131
hardware = platform.processor()
@@ -138,7 +138,7 @@ def measure_performance(model_call, args, synchronizer_func, profile=False):
138138
flush=True,
139139
)
140140

141-
if "cuda" in args.device:
141+
if test_compiler_util.is_gpu_device(args.device):
142142
"""
143143
Acknowledgement: We evaluate the performance on both end-to-end and GPU-only timings,
144144
With reference to methods only based on CUDA events from KernelBench in https://github.com/ScalingIntelligence/KernelBench
@@ -259,6 +259,7 @@ def test_single_model(args):
259259

260260
# Run on eager mode
261261
eager_success = False
262+
eager_time_stats = {}
262263
try:
263264
print("Run model in eager mode.", file=sys.stderr, flush=True)
264265
# static_model = get_static_model(args, model)
@@ -275,6 +276,7 @@ def test_single_model(args):
275276

276277
# Run on compiling mode
277278
compiled_success = False
279+
compiled_time_stats = {}
278280
try:
279281
print("Run model in compiled mode.", file=sys.stderr, flush=True)
280282
compiled_model = get_compiled_model(args, model)
@@ -293,9 +295,9 @@ def test_single_model(args):
293295
if eager_success and compiled_success:
294296
check_outputs(args, expected_out, compiled_out)
295297

296-
test_compiler_util.print_times_and_speedup(
297-
args, eager_time_stats, compiled_time_stats
298-
)
298+
test_compiler_util.print_times_and_speedup(
299+
args, eager_time_stats, compiled_time_stats
300+
)
299301

300302

301303
def get_cmp_equal(expected_out, compiled_out):

graph_net/test_compiler_util.py

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ def naive_timer(duration_box, synchronizer_func):
2525
duration_box.value = (end - start) * 1000 # Store in milliseconds
2626

2727

28+
def is_gpu_device(device):
29+
return "cuda" in device or "dcu" in device
30+
31+
2832
def get_device_utilization(device_id, device_count, synchronizer_func):
2933
current_pid = os.getpid()
3034

@@ -98,6 +102,7 @@ def get_device_utilization(device_id, device_count, synchronizer_func):
98102
gpu_uuid, pid, used_memory = line.split(", ")
99103
if gpu_uuid == selected_gpu_uuid and int(pid) != current_pid:
100104
other_tasks.append(line)
105+
# Note: in docker container, the current_pid maybe different from that captured by nvidia-smi.
101106
print(
102107
f"Note: There are {len(other_tasks)} tasks running on GPU {selected_gpu_id} (current_pid:{current_pid}).",
103108
file=sys.stderr,
@@ -169,24 +174,33 @@ def print_basic_config(args, hardware_name, compile_framework_version):
169174
)
170175

171176

172-
def print_running_status(args, eager_success, compiled_success):
177+
def print_running_status(args, eager_success, compiled_success=None):
173178
def convert_to_str(b):
174179
return "success" if b else "failed"
175180

176-
print_with_log_prompt(
177-
"[Result][status]",
178-
f"eager:{convert_to_str(eager_success)} compiled:{convert_to_str(compiled_success)}",
179-
args.log_prompt,
180-
)
181+
if compiled_success is not None:
182+
print_with_log_prompt(
183+
"[Result][status]",
184+
f"eager:{convert_to_str(eager_success)} compiled:{convert_to_str(compiled_success)}",
185+
args.log_prompt,
186+
)
187+
else:
188+
print_with_log_prompt(
189+
"[Result][status]",
190+
f"eager:{convert_to_str(eager_success)}",
191+
args.log_prompt,
192+
)
181193

182194

183195
def print_times_and_speedup(args, eager_stats, compiled_stats):
184-
print_with_log_prompt(
185-
"[Performance][eager]:", json.dumps(eager_stats), args.log_prompt
186-
)
187-
print_with_log_prompt(
188-
"[Performance][compiled]:", json.dumps(compiled_stats), args.log_prompt
189-
)
196+
if not eager_stats:
197+
print_with_log_prompt(
198+
"[Performance][eager]:", json.dumps(eager_stats), args.log_prompt
199+
)
200+
if not compiled_stats:
201+
print_with_log_prompt(
202+
"[Performance][compiled]:", json.dumps(compiled_stats), args.log_prompt
203+
)
190204

191205
e2e_speedup = 0
192206
gpu_speedup = 0
@@ -197,7 +211,7 @@ def print_times_and_speedup(args, eager_stats, compiled_stats):
197211
if eager_e2e_time_ms > 0 and compiled_e2e_time_ms > 0:
198212
e2e_speedup = eager_e2e_time_ms / compiled_e2e_time_ms
199213

200-
if "cuda" in args.device:
214+
if is_gpu_device(args.device):
201215
eager_gpu_time_ms = eager_stats.get("gpu", {}).get("mean", 0)
202216
compiled_gpu_time_ms = compiled_stats.get("gpu", {}).get("mean", 0)
203217

@@ -207,7 +221,7 @@ def print_times_and_speedup(args, eager_stats, compiled_stats):
207221
if e2e_speedup > 0:
208222
print_with_log_prompt("[Speedup][e2e]:", f"{e2e_speedup:.5f}", args.log_prompt)
209223

210-
if "cuda" in args.device and gpu_speedup > 0:
224+
if is_gpu_device(args.device) and gpu_speedup > 0:
211225
print_with_log_prompt("[Speedup][gpu]:", f"{gpu_speedup:.5f}", args.log_prompt)
212226

213227

0 commit comments

Comments
 (0)