Skip to content

Commit 918cc50

Browse files
committed
Merge upstream
1 parent 31920f1 commit 918cc50

File tree

7 files changed

+303
-264
lines changed

7 files changed

+303
-264
lines changed

graph_net/paddle/test_compiler.py

Lines changed: 185 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import time
1010
import math
1111
import numpy as np
12+
import random
1213
import platform
1314
import traceback
1415
import subprocess
@@ -34,32 +35,46 @@ def get_compiler_backend(args) -> GraphCompilerBackend:
3435
return registry_backend[args.compiler]
3536

3637

38+
def set_seed(random_seed):
39+
paddle.seed(random_seed)
40+
random.seed(random_seed)
41+
np.random.seed(random_seed)
42+
43+
3744
def init_env(args):
3845
if test_compiler_util.is_gpu_device(args.device):
3946
paddle.set_flags({"FLAGS_cudnn_exhaustive_search": 1})
4047

4148

49+
def get_hardward_name(args):
50+
hardware = "unknown"
51+
if test_compiler_util.is_gpu_device(args.device):
52+
hardware = paddle.device.cuda.get_device_name(0)
53+
elif args.device == "xpu":
54+
try:
55+
output = subprocess.check_output(["xpu-smi", "-L"], text=True)
56+
hardware = next(
57+
match.group(2)
58+
for line in output.splitlines()
59+
if (
60+
match := re.match(
61+
r"XPU\s+(\d+):\s+(.+?)\s+\(UUID:\s*([^)]+)\)", line
62+
)
63+
)
64+
)
65+
except Exception as e:
66+
pass
67+
elif args.device == "cpu":
68+
hardware = platform.processor()
69+
return hardware
70+
71+
4272
def get_compile_framework_version(args):
4373
if args.compiler in ["cinn", "nope"]:
4474
return paddle.__version__
4575
return "unknown"
4676

4777

48-
def check_and_print_gpu_utilization(compiler):
49-
if paddle.device.is_compiled_with_cuda():
50-
device_id = int(paddle.device.get_device().split(":")[-1])
51-
device_count = paddle.device.cuda.device_count()
52-
gpu_util, mem_util = test_compiler_util.get_device_utilization(
53-
device_id, device_count, compiler.synchronize
54-
)
55-
if gpu_util is not None and mem_util is not None:
56-
print(
57-
f"Device status: gpu_id {device_id}, gpu_util {gpu_util:.2f}%, mem_util {mem_util:.2f}%",
58-
file=sys.stderr,
59-
flush=True,
60-
)
61-
62-
6378
def load_class_from_file(file_path: str, class_name: str):
6479
file = Path(file_path).resolve()
6580
module_name = file.stem
@@ -85,10 +100,30 @@ def get_model(model_path):
85100
return model_class()
86101

87102

103+
def get_input_dict(model_path):
104+
inputs_params = utils.load_converted_from_text(f"{model_path}")
105+
params = inputs_params["weight_info"]
106+
inputs = inputs_params["input_info"]
107+
108+
params.update(inputs)
109+
state_dict = {k: utils.replay_tensor(v) for k, v in params.items()}
110+
return state_dict
111+
112+
113+
def get_input_spec(model_path):
114+
inputs_params_list = utils.load_converted_list_from_text(f"{model_path}")
115+
input_spec = [None] * len(inputs_params_list)
116+
for i, v in enumerate(inputs_params_list):
117+
dtype = v["info"]["dtype"]
118+
shape = v["info"]["shape"]
119+
input_spec[i] = paddle.static.InputSpec(shape, dtype)
120+
return input_spec
121+
122+
88123
def get_static_model(args, model):
89124
static_model = paddle.jit.to_static(
90125
model,
91-
input_spec=utils.get_input_spec(args.model_path),
126+
input_spec=get_input_spec(args.model_path),
92127
full_graph=True,
93128
backend=None,
94129
)
@@ -117,7 +152,7 @@ def measure_performance(model_call, args, compiler, profile=False):
117152
min_trials = int(100 / np.mean(warmup_e2e_times[1:]))
118153
trials = max(args.trials, min_trials)
119154

120-
hardware_name = test_compiler_util.get_hardward_name(args)
155+
hardware_name = get_hardward_name(args)
121156
print(
122157
f"[Profiling] Using device: {args.device} {hardware_name}, warm up {args.warmup}, trials {trials}",
123158
file=sys.stderr,
@@ -240,7 +275,7 @@ def transfer_to_float(origin_outputs):
240275
args,
241276
expected_out,
242277
compiled_out,
243-
cmp_equal_func=utils.get_cmp_equal,
278+
cmp_equal_func=get_cmp_equal,
244279
)
245280

246281
expected_out_fp32 = transfer_to_float(expected_out)
@@ -249,26 +284,39 @@ def transfer_to_float(origin_outputs):
249284
args,
250285
expected_out_fp32,
251286
compiled_out_fp32,
252-
cmp_all_close_func=utils.get_cmp_all_close,
253-
cmp_max_diff_func=utils.get_cmp_max_diff,
254-
cmp_mean_diff_func=utils.get_cmp_mean_diff,
255-
cmp_max_relative_diff_func=utils.get_cmp_max_relative_diff,
256-
cmp_mean_relative_diff_func=utils.get_cmp_mean_relative_diff,
287+
cmp_all_close_func=get_cmp_all_close,
288+
cmp_max_diff_func=get_cmp_max_diff,
289+
cmp_mean_diff_func=get_cmp_mean_diff,
290+
cmp_max_relative_diff_func=get_cmp_max_relative_diff,
291+
cmp_mean_relative_diff_func=get_cmp_mean_relative_diff,
292+
)
293+
294+
295+
def check_and_print_gpu_utilization(compiler):
296+
if paddle.device.is_compiled_with_cuda():
297+
device_id = int(paddle.device.get_device().split(":")[-1])
298+
device_count = paddle.device.cuda.device_count()
299+
gpu_util, mem_util = test_compiler_util.get_device_utilization(
300+
device_id, device_count, compiler.synchronize
257301
)
302+
if gpu_util is not None and mem_util is not None:
303+
print(
304+
f"Device status: gpu_id {device_id}, gpu_util {gpu_util:.2f}%, mem_util {mem_util:.2f}%",
305+
file=sys.stderr,
306+
flush=True,
307+
)
258308

259309

260310
def test_single_model(args):
261311
compiler = get_compiler_backend(args)
262312
check_and_print_gpu_utilization(compiler)
263313

264-
input_dict = utils.get_input_dict(args.model_path)
314+
input_dict = get_input_dict(args.model_path)
265315
model = get_model(args.model_path)
266316
model.eval()
267317

268-
hardware_name = test_compiler_util.get_hardward_name(args)
269-
270318
test_compiler_util.print_basic_config(
271-
args, hardware_name, get_compile_framework_version(args)
319+
args, get_hardward_name(args), get_compile_framework_version(args)
272320
)
273321

274322
# Run on eager mode
@@ -293,7 +341,7 @@ def test_single_model(args):
293341
compiled_time_stats = {}
294342
try:
295343
print("Run model in compiled mode.", file=sys.stderr, flush=True)
296-
input_spec = utils.get_input_spec(args.model_path)
344+
input_spec = get_input_spec(args.model_path)
297345
compiled_model = compiler(model, input_spec)
298346
compiled_out, compiled_time_stats = measure_performance(
299347
lambda: compiled_model(**input_dict), args, compiler, profile=False
@@ -315,18 +363,125 @@ def test_single_model(args):
315363
)
316364

317365

366+
def get_cmp_equal(expected_out, compiled_out):
367+
def convert(x):
368+
if x.dtype in [paddle.float16, paddle.bfloat16]:
369+
return x.astype("float32")
370+
elif x.dtype in [paddle.uint8, paddle.int8, paddle.int16]:
371+
return x.astype("int32")
372+
return x
373+
374+
return " ".join(
375+
str(int(paddle.equal_all(convert(a), convert(b))))
376+
for a, b in zip(expected_out, compiled_out)
377+
)
378+
379+
380+
def get_cmp_all_close(expected_out, compiled_out, atol, rtol):
381+
return " ".join(
382+
str(int(paddle.allclose(a, b, atol=atol, rtol=rtol)))
383+
for a, b in zip(expected_out, compiled_out)
384+
)
385+
386+
387+
def get_format_str(f):
388+
if (abs(f) > 1e5 or abs(f) < 1e-5) and abs(f) != 0.0:
389+
return str(f"{f:.5E}")
390+
else:
391+
return str(f"{f:.5f}")
392+
393+
394+
def get_cmp_max_diff(expected_out, compiled_out):
395+
return " ".join(
396+
get_format_str(paddle.max(paddle.abs(a - b)).item())
397+
for a, b in zip(expected_out, compiled_out)
398+
)
399+
400+
401+
def get_cmp_mean_diff(expected_out, compiled_out):
402+
return " ".join(
403+
get_format_str(paddle.mean(paddle.abs(a - b)).item())
404+
for a, b in zip(expected_out, compiled_out)
405+
)
406+
407+
408+
def get_cmp_max_relative_diff(expected_out, compiled_out):
409+
epsilon = 1e-8
410+
return " ".join(
411+
get_format_str(paddle.max(paddle.abs(a - b) / (paddle.abs(a) + epsilon)).item())
412+
for a, b in zip(expected_out, compiled_out)
413+
)
414+
415+
416+
def get_cmp_mean_relative_diff(expected_out, compiled_out):
417+
epsilon = 1e-8
418+
return " ".join(
419+
get_format_str(
420+
paddle.mean(paddle.abs(a - b) / (paddle.abs(a) + epsilon)).item()
421+
)
422+
for a, b in zip(expected_out, compiled_out)
423+
)
424+
425+
426+
def get_cmp_diff_count(expected_out, compiled_out, atol, rtol):
427+
return " ".join(
428+
str(paddle.sum(~paddle.isclose(a, b, atol=atol, rtol=rtol)).item())
429+
for a, b in zip(expected_out, compiled_out)
430+
)
431+
432+
433+
def test_multi_models(args):
434+
test_samples = test_compiler_util.get_allow_samples(args.allow_list)
435+
436+
sample_idx = 0
437+
failed_samples = []
438+
module_name = os.path.splitext(os.path.basename(__file__))[0]
439+
for model_path in path_utils.get_recursively_model_path(args.model_path):
440+
if test_samples is None or os.path.abspath(model_path) in test_samples:
441+
print(
442+
f"[{sample_idx}] {module_name}, model_path: {model_path}",
443+
file=sys.stderr,
444+
flush=True,
445+
)
446+
cmd = " ".join(
447+
[
448+
sys.executable,
449+
f"-m graph_net.paddle.{module_name}",
450+
f"--model-path {model_path}",
451+
f"--compiler {args.compiler}",
452+
f"--device {args.device}",
453+
f"--warmup {args.warmup}",
454+
f"--trials {args.trials}",
455+
f"--log-prompt {args.log_prompt}",
456+
]
457+
)
458+
cmd_ret = os.system(cmd)
459+
# assert cmd_ret == 0, f"{cmd_ret=}, {cmd=}"
460+
if cmd_ret != 0:
461+
failed_samples.append(model_path)
462+
sample_idx += 1
463+
464+
print(
465+
f"Totally {sample_idx} verified samples, failed {len(failed_samples)} samples.",
466+
file=sys.stderr,
467+
flush=True,
468+
)
469+
for model_path in failed_samples:
470+
print(f"- {model_path}", file=sys.stderr, flush=True)
471+
472+
318473
def main(args):
319474
assert os.path.isdir(args.model_path)
320475
assert args.compiler in {"cinn", "nope"}
321476
assert args.device in ["cuda", "dcu", "xpu", "cpu"]
322477

323478
initalize_seed = 123
324-
test_compiler_util.set_seed(random_seed=initalize_seed)
479+
set_seed(random_seed=initalize_seed)
325480

326481
if path_utils.is_single_model_dir(args.model_path):
327482
test_single_model(args)
328483
else:
329-
test_compiler_util.test_multi_models(args, "paddle")
484+
test_multi_models(args)
330485

331486

332487
if __name__ == "__main__":

graph_net/paddle/test_reference_device.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
from graph_net import path_utils
1616
from graph_net import test_compiler_util
17-
from graph_net.paddle import test_compiler, utils
17+
from graph_net.paddle import test_compiler
1818

1919

2020
def get_reference_log_path(reference_dir, model_path):
@@ -38,7 +38,7 @@ def test_single_model(args):
3838
compiler = test_compiler.get_compiler_backend(args)
3939
test_compiler.check_and_print_gpu_utilization(compiler)
4040

41-
input_dict = utils.get_input_dict(args.model_path)
41+
input_dict = test_compiler.get_input_dict(args.model_path)
4242
model = test_compiler.get_model(args.model_path)
4343
model.eval()
4444

@@ -48,14 +48,14 @@ def test_single_model(args):
4848

4949
test_compiler_util.print_basic_config(
5050
args,
51-
test_compiler_util.get_hardward_name(args),
51+
test_compiler.get_hardward_name(args),
5252
test_compiler.get_compile_framework_version(args),
5353
)
5454

5555
success = False
5656
time_stats = {}
5757
try:
58-
input_spec = utils.get_input_spec(args.model_path)
58+
input_spec = test_compiler.get_input_spec(args.model_path)
5959
compiled_model = compiler(model, input_spec)
6060
outputs, time_stats = test_compiler.measure_performance(
6161
lambda: compiled_model(**input_dict),
@@ -130,7 +130,7 @@ def main(args):
130130
assert args.compiler in {"cinn", "nope"}
131131
assert args.device in ["cuda"]
132132

133-
test_compiler_util.set_seed(random_seed=args.seed)
133+
test_compiler.set_seed(random_seed=args.seed)
134134
test_compiler.init_env(args)
135135

136136
ref_dump_dir = Path(args.reference_dir)

graph_net/paddle/test_target_device.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
import paddle
1414
from graph_net import path_utils
1515
from graph_net import test_compiler_util
16-
from graph_net.paddle import test_compiler, test_reference_device, utils
16+
from graph_net.paddle import test_compiler, test_reference_device
1717

1818

1919
def parse_config_from_reference_log(log_path):
@@ -59,28 +59,28 @@ def update_args_and_set_seed(args, model_path):
5959
vars(args)["compiler"] = config.get("compiler")
6060
vars(args)["trials"] = int(config.get("trials"))
6161
vars(args)["warmup"] = int(config.get("warmup"))
62-
test_compiler_util.set_seed(random_seed=int(config.get("seed")))
62+
test_compiler.set_seed(random_seed=int(config.get("seed")))
6363
return args
6464

6565

6666
def test_single_model(args):
6767
compiler = test_compiler.get_compiler_backend(args)
6868
test_compiler.check_and_print_gpu_utilization(compiler)
6969

70-
input_dict = utils.get_input_dict(args.model_path)
70+
input_dict = test_compiler.get_input_dict(args.model_path)
7171
model = test_compiler.get_model(args.model_path)
7272
model.eval()
7373

7474
test_compiler_util.print_basic_config(
7575
args,
76-
test_compiler_util.get_hardward_name(args),
76+
test_compiler.get_hardward_name(args),
7777
test_compiler.get_compile_framework_version(args),
7878
)
7979

8080
success = False
8181
time_stats = {}
8282
try:
83-
input_spec = utils.get_input_spec(args.model_path)
83+
input_spec = test_compiler.get_input_spec(args.model_path)
8484
compiled_model = compiler(model, input_spec)
8585
outputs, time_stats = test_compiler.measure_performance(
8686
lambda: compiled_model(**input_dict), args, compiler, profile=False

0 commit comments

Comments
 (0)