Skip to content

Commit 05de565

Browse files
committed
Optimize the check codes of test_compiler.
1 parent ded584f commit 05de565

File tree

3 files changed

+107
-78
lines changed

3 files changed

+107
-78
lines changed

graph_net/paddle/check_redundant_incrementally.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,10 @@ def is_single_model_dir(model_dir):
4343

4444

4545
def main(args):
46-
assert os.path.isdir(args.model_path)
47-
assert os.path.isdir(args.graph_net_samples_path)
46+
assert os.path.isdir(args.model_path), f"model_path: {args.model_path}"
47+
assert os.path.isdir(
48+
args.graph_net_samples_path
49+
), f"graph_net_samples_path: {args.graph_net_samples_path}"
4850
current_model_graph_hash_pathes = set(
4951
graph_hash_path
5052
for model_path in get_recursively_model_pathes(args.model_path)

graph_net/paddle/test_compiler.py

Lines changed: 100 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import numpy as np
1111
import random
1212
import platform
13+
import traceback
1314

1415
from graph_net.paddle import utils
1516
from graph_net.benchmark_result import BenchmarkResult
@@ -89,13 +90,6 @@ def get_compiled_model(args, model):
8990
return compiled_model
9091

9192

92-
def regular_item(item):
93-
assert isinstance(item, paddle.Tensor)
94-
if item.dtype not in [paddle.float32, paddle.float64]:
95-
item = item.astype("float32")
96-
return item
97-
98-
9993
def count_number_of_ops(args, model, eager_mode):
10094
if eager_mode:
10195
static_model = paddle.jit.to_static(
@@ -227,70 +221,64 @@ def init_benchmark_result(args):
227221
return result_data
228222

229223

230-
def test_single_model(args):
231-
synchronizer_func = get_synchronizer_func(args)
232-
input_dict, input_dtypes, param_dtypes = get_input_dict(args)
233-
model = get_model(args)
234-
model.eval()
235-
236-
# Collect model information
237-
num_eager_ops = count_number_of_ops(args, model, eager_mode=True)
238-
239-
# Initialize benchmark result
240-
result_data = init_benchmark_result(args)
241-
result_data.update_model_info(num_eager_ops, input_dtypes, param_dtypes)
242-
243-
# Run on eager mode
244-
expected_out, eager_time_stats = measure_performance(
245-
lambda: model(**input_dict), args, synchronizer_func
246-
)
247-
248-
# Run on compiling mode
249-
compiled_model = get_compiled_model(args, model)
250-
compiled_out, compiled_time_stats = measure_performance(
251-
lambda: compiled_model(**input_dict), args, synchronizer_func
252-
)
253-
224+
def check_outputs(args, expected_out, compiled_out, result_data):
254225
if isinstance(expected_out, paddle.Tensor):
255226
expected_out = [expected_out]
227+
if isinstance(compiled_out, paddle.Tensor):
256228
compiled_out = [compiled_out]
257-
if isinstance(expected_out, list) or isinstance(expected_out, tuple):
258-
output_dtypes = []
259-
for a, b in zip(expected_out, compiled_out):
260-
if (a is None and b is not None) or (a is not None and b is None):
261-
raise ValueError("Both expected_out and compiled_out must be not None.")
262-
if a is not None and b is not None:
263-
assert (
264-
a.dtype == b.dtype
265-
), f"expected_out's dtype ({a.dtype}) is not the same as compiled_out's dtype {b.dtype}."
266-
output_dtypes.append(str(a.dtype))
267-
result_data.update_corrrectness("num_outpus", len(output_dtypes))
268-
result_data.update_corrrectness("output_dtyps", output_dtypes)
269-
270-
# Remove all None in outputs
271-
expected_out = [x for x in expected_out if x is not None]
272-
compiled_out = [x for x in compiled_out if x is not None]
273-
expected_out = [
274-
regular_item(item)
275-
for item in expected_out
276-
if item is not None and np.array(item).size != 0
277-
]
278-
compiled_out = [
279-
regular_item(item)
280-
for item in compiled_out
281-
if item is not None and np.array(item).size != 0
282-
]
283-
else:
284-
raise ValueError("Illegal return value.")
229+
230+
eager_output_dtypes = [None] * len(expected_out)
231+
for i, tensor in enumerate(expected_out):
232+
if tensor is not None:
233+
eager_output_dtypes[i] = str(tensor.dtype)
234+
result_data.update_corrrectness("num_eager_outputs", len(expected_out))
235+
result_data.update_corrrectness("eager_output_dtypes", eager_output_dtypes)
236+
237+
compiled_output_dtypes = [None] * len(compiled_out)
238+
for i, tensor in enumerate(compiled_out):
239+
if tensor is not None:
240+
compiled_output_dtypes[i] = str(tensor.dtype)
241+
result_data.update_corrrectness("num_compiled_outputs", len(compiled_out))
242+
result_data.update_corrrectness("compiled_output_dtypes", compiled_output_dtypes)
243+
244+
is_output_consistent = len(expected_out) == len(compiled_out)
245+
for a, b in zip(expected_out, compiled_out):
246+
if (a is None and b is not None) or (a is not None and b is None):
247+
is_output_consistent = False
248+
if a is not None and b is not None and a.dtype != b.dtype:
249+
is_output_consistent = False
250+
result_data.update_corrrectness("output_consistent", is_output_consistent)
251+
252+
def regular_outputs(origin_outputs):
253+
outputs = []
254+
for item in origin_outputs:
255+
if (
256+
item is not None
257+
and isinstance(item, paddle.Tensor)
258+
and item.dtype not in [paddle.float32, paddle.float64]
259+
):
260+
item = item.astype("float32")
261+
outputs.append(item)
262+
return outputs
263+
264+
expected_out = regular_outputs(expected_out)
265+
compiled_out = regular_outputs(compiled_out)
285266

286267
def print_cmp(key, func, **kwargs):
287-
cmp_ret = func(expected_out, compiled_out, **kwargs)
268+
try:
269+
cmp_ret = func(expected_out, compiled_out, **kwargs)
270+
except Exception as e:
271+
cmp_ret = f"{key} failed: {str(e)}\n{traceback.format_exc()}"
288272
result_data.update_corrrectness(key, cmp_ret)
289273
print(
290274
f"{args.log_prompt} {key} model_path:{args.model_path} {cmp_ret}",
291275
file=sys.stderr,
292276
)
293277

278+
print(
279+
f"{args.log_prompt} output_dtypes model_path:{args.model_path} eager:{eager_output_dtypes} compiled:{compiled_output_dtypes}",
280+
file=sys.stderr,
281+
)
294282
print_cmp("cmp.equal", get_cmp_equal)
295283
print_cmp("cmp.all_close_atol8_rtol8", get_cmp_all_close, atol=1e-8, rtol=1e-8)
296284
print_cmp("cmp.all_close_atol8_rtol5", get_cmp_all_close, atol=1e-8, rtol=1e-5)
@@ -305,26 +293,65 @@ def print_cmp(key, func, **kwargs):
305293
print_cmp("cmp.diff_count_atol3_rtol2", get_cmp_diff_count, atol=1e-3, rtol=1e-2)
306294
print_cmp("cmp.diff_count_atol2_rtol1", get_cmp_diff_count, atol=1e-2, rtol=1e-1)
307295

296+
297+
def test_single_model(args):
298+
synchronizer_func = get_synchronizer_func(args)
299+
input_dict, input_dtypes, param_dtypes = get_input_dict(args)
300+
model = get_model(args)
301+
model.eval()
302+
303+
# Collect model information
304+
num_eager_ops = count_number_of_ops(args, model, eager_mode=True)
305+
306+
# Initialize benchmark result
307+
result_data = init_benchmark_result(args)
308+
result_data.update_model_info(num_eager_ops, input_dtypes, param_dtypes)
309+
310+
# Run on eager mode
311+
running_eager_success = False
312+
try:
313+
print("Run model in eager mode.")
314+
expected_out, eager_time_stats = measure_performance(
315+
lambda: model(**input_dict), args, synchronizer_func
316+
)
317+
running_eager_success = True
318+
except Exception as e:
319+
print(f"Run model in eager mode failed: {str(e)}\n{traceback.format_exc()}")
320+
321+
# Run on compiling mode
322+
running_compiled_success = False
323+
try:
324+
print("Run model in compiled mode.")
325+
compiled_model = get_compiled_model(args, model)
326+
compiled_out, compiled_time_stats = measure_performance(
327+
lambda: compiled_model(**input_dict), args, synchronizer_func
328+
)
329+
running_compiled_success = True
330+
except Exception as e:
331+
print(f"Run model in compiled mode failed: {str(e)}\n{traceback.format_exc()}")
332+
308333
print(
309334
f"{args.log_prompt} information model_path:{args.model_path} {num_eager_ops} ops, param_dtypes:{param_dtypes}, input_dtypes:{input_dtypes}",
310335
file=sys.stderr,
311336
)
337+
if running_eager_success and running_compiled_success:
338+
check_outputs(args, expected_out, compiled_out, result_data)
312339

313-
result_data.update_performance(eager_time_stats, compiled_time_stats)
314-
duration_log = (
315-
f"{args.log_prompt} [Duration] "
316-
f"eager_e2e:{result_data.eager_e2e_time_ms:.4f} ms compiled_e2e:{result_data.compiled_e2e_time_ms:.4f} ms"
317-
)
318-
speedup_log = (
319-
f"{args.log_prompt} [Speedup] " f"e2e_speedup:{result_data.e2e_speedup:.4f}"
320-
)
340+
result_data.update_performance(eager_time_stats, compiled_time_stats)
341+
duration_log = (
342+
f"{args.log_prompt} [Duration] "
343+
f"eager_e2e:{result_data.eager_e2e_time_ms:.4f} ms compiled_e2e:{result_data.compiled_e2e_time_ms:.4f} ms"
344+
)
345+
speedup_log = (
346+
f"{args.log_prompt} [Speedup] " f"e2e_speedup:{result_data.e2e_speedup:.4f}"
347+
)
321348

322-
if "cuda" in args.device:
323-
duration_log += f" eager_gpu:{result_data.eager_gpu_time_ms:.4f} ms compiled_gpu:{result_data.compiled_gpu_time_ms:.4f} ms"
324-
speedup_log += f" gpu_speedup:{result_data.gpu_speedup:.4f}"
349+
if "cuda" in args.device:
350+
duration_log += f" eager_gpu:{result_data.eager_gpu_time_ms:.4f} ms compiled_gpu:{result_data.compiled_gpu_time_ms:.4f} ms"
351+
speedup_log += f" gpu_speedup:{result_data.gpu_speedup:.4f}"
325352

326-
print(duration_log, file=sys.stderr)
327-
print(speedup_log, file=sys.stderr)
353+
print(duration_log, file=sys.stderr)
354+
print(speedup_log, file=sys.stderr)
328355

329356
if args.output_dir:
330357
result_data.write_to_json(args.output_dir)

graph_net/torch/test_compiler.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ def measure_performance(model_call, args, compiler):
140140
e2e_times.append(duration_box.value)
141141
gpu_times.append(gpu_time_ms)
142142
print(
143-
f"Trial {i + 1}: e2e={duration_box.value:.2f} ms, gpu={gpu_time_ms:.3g} ms"
143+
f"Trial {i + 1}: e2e={duration_box.value:.4f} ms, gpu={gpu_time_ms:.5g} ms"
144144
)
145145

146146
stats["e2e"] = get_timing_stats(e2e_times)
@@ -157,7 +157,7 @@ def measure_performance(model_call, args, compiler):
157157
duration_box = DurationBox(-1)
158158
with naive_timer(duration_box, compiler.synchronize):
159159
model_call()
160-
print(f"Trial {i + 1}: e2e={duration_box.value:.2f} ms")
160+
print(f"Trial {i + 1}: e2e={duration_box.value:.4f} ms")
161161
e2e_times.append(duration_box.value)
162162
stats["e2e"] = get_timing_stats(e2e_times)
163163

@@ -220,7 +220,7 @@ def test_single_model(args):
220220
"compile_framework_version"
221221
] = f"BladeDISC {compiler.version}"
222222
else:
223-
result_data["configuration"]["compiler_version"] = "unknown"
223+
result_data["configuration"]["compile_framework_version"] = "unknown"
224224

225225
eager_model_call = lambda: model(**input_dict)
226226
compiled_model_call = lambda: compiled_model(**input_dict)

0 commit comments

Comments
 (0)