Skip to content

Commit 5b9272a

Browse files
authored
[ Feature Enhancement ]Add support to test devices on Paddle (#330)
* Add support to test devices on Paddle * Update * Merge * test_device * update * fix
1 parent f5ca9e4 commit 5b9272a

File tree

2 files changed

+344
-0
lines changed

2 files changed

+344
-0
lines changed
Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
import argparse
2+
import importlib.util
3+
import paddle
4+
import time
5+
import numpy as np
6+
import random
7+
import os
8+
from pathlib import Path
9+
from contextlib import redirect_stdout, redirect_stderr
10+
import json
11+
import re
12+
import sys
13+
import traceback
14+
from graph_net import test_compiler_util
15+
from graph_net.paddle import utils
16+
from graph_net.paddle import test_compiler
17+
from graph_net import path_utils
18+
from graph_net import test_compiler_util
19+
20+
21+
def test_single_model(args):
22+
model_name = test_compiler_util.get_model_name(args.model_path)
23+
if test_compiler_util.get_subgraph_tag(args.model_path):
24+
model_name += "_" + test_compiler_util.get_subgraph_tag(args.model_path)
25+
ref_log = Path(args.reference_dir) / f"{model_name}.log"
26+
with open(ref_log, "w", encoding="utf-8") as log_f:
27+
with redirect_stdout(log_f), redirect_stderr(log_f):
28+
compiler = test_compiler.get_compiler_backend(args)
29+
test_compiler.check_and_print_gpu_utilization(compiler)
30+
31+
input_dict = test_compiler.get_input_dict(args.model_path)
32+
model = test_compiler.get_model(args.model_path)
33+
model.eval()
34+
35+
test_compiler_util.print_with_log_prompt(
36+
"[Config] seed:", args.seed, args.log_prompt
37+
)
38+
39+
test_compiler_util.print_basic_config(
40+
args,
41+
test_compiler.get_hardward_name(args),
42+
test_compiler.get_compile_framework_version(args),
43+
)
44+
45+
success = False
46+
time_stats = {}
47+
try:
48+
input_spec = test_compiler.get_input_spec(args.model_path)
49+
compiled_model = compiler(model, input_spec)
50+
outputs, time_stats = test_compiler.measure_performance(
51+
lambda: compiled_model(**input_dict),
52+
args,
53+
compiler,
54+
profile=False,
55+
)
56+
success = True
57+
except Exception as e:
58+
print(
59+
f"Run model failed: {str(e)}\n{traceback.format_exc()}",
60+
file=sys.stderr,
61+
flush=True,
62+
)
63+
64+
test_compiler_util.print_running_status(args, success)
65+
if success:
66+
ref_dump = Path(args.reference_dir) / f"{model_name}.pdout"
67+
paddle.save(outputs, str(ref_dump))
68+
test_compiler_util.print_with_log_prompt(
69+
"[Performance][eager]:", json.dumps(time_stats), args.log_prompt
70+
)
71+
72+
73+
def test_multi_models(args):
74+
test_samples = test_compiler_util.get_allow_samples(args.allow_list)
75+
76+
sample_idx = 0
77+
failed_samples = []
78+
for model_path in path_utils.get_recursively_model_path(args.model_path):
79+
if test_samples is None or os.path.abspath(model_path) in test_samples:
80+
print(
81+
f"[{sample_idx}] test_compiler, model_path: {model_path}",
82+
file=sys.stderr,
83+
flush=True,
84+
)
85+
cmd = " ".join(
86+
[
87+
sys.executable,
88+
"-m graph_net.paddle.test_reference_device",
89+
f"--model-path {model_path}",
90+
f"--compiler {args.compiler}",
91+
f"--device {args.device}",
92+
f"--warmup {args.warmup}",
93+
f"--trials {args.trials}",
94+
f"--log-prompt {args.log_prompt}",
95+
f"--seed {args.seed}",
96+
f"--reference-dir {args.reference_dir}",
97+
]
98+
)
99+
cmd_ret = os.system(cmd)
100+
# assert cmd_ret == 0, f"{cmd_ret=}, {cmd=}"
101+
if cmd_ret != 0:
102+
failed_samples.append(model_path)
103+
sample_idx += 1
104+
105+
print(
106+
f"Totally {sample_idx} verified samples, failed {len(failed_samples)} samples.",
107+
file=sys.stderr,
108+
flush=True,
109+
)
110+
for model_path in failed_samples:
111+
print(f"- {model_path}", file=sys.stderr, flush=True)
112+
113+
114+
def main(args):
115+
assert os.path.isdir(args.model_path)
116+
assert args.compiler in {"cinn", "nope"}
117+
assert args.device in ["cuda"]
118+
119+
test_compiler.set_seed(random_seed=args.seed)
120+
121+
ref_dump_dir = Path(args.reference_dir)
122+
ref_dump_dir.mkdir(parents=True, exist_ok=True)
123+
124+
if path_utils.is_single_model_dir(args.model_path):
125+
test_single_model(args)
126+
else:
127+
test_multi_models(args)
128+
129+
130+
if __name__ == "__main__":
131+
parser = argparse.ArgumentParser(description="Test compiler performance.")
132+
parser.add_argument(
133+
"--model-path",
134+
type=str,
135+
required=True,
136+
help="Path to model file(s), each subdirectory containing graph_net.json will be regarded as a model",
137+
)
138+
parser.add_argument(
139+
"--compiler",
140+
type=str,
141+
required=False,
142+
default="cinn",
143+
help="Path to customized compiler python file",
144+
)
145+
parser.add_argument(
146+
"--device",
147+
type=str,
148+
required=False,
149+
default="cuda",
150+
help="Device for testing the compiler (e.g., 'cpu' or 'cuda')",
151+
)
152+
parser.add_argument(
153+
"--warmup", type=int, required=False, default=5, help="Number of warmup steps"
154+
)
155+
parser.add_argument(
156+
"--trials", type=int, required=False, default=10, help="Number of timing trials"
157+
)
158+
parser.add_argument(
159+
"--log-prompt",
160+
type=str,
161+
required=False,
162+
default="graph-net-test-device-log",
163+
help="Log prompt for performance log filtering.",
164+
)
165+
parser.add_argument(
166+
"--allow-list",
167+
type=str,
168+
required=False,
169+
default=None,
170+
help="Path to samples list, each line contains a sample path",
171+
)
172+
parser.add_argument(
173+
"--seed",
174+
type=int,
175+
required=False,
176+
default=123,
177+
help="Random seed (default: 123)",
178+
)
179+
parser.add_argument(
180+
"--reference-dir",
181+
type=str,
182+
required=True,
183+
help="Directory to save reference stats log and outputs",
184+
)
185+
args = parser.parse_args()
186+
main(args=args)
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
import argparse
2+
import importlib.util
3+
import paddle
4+
import time
5+
import numpy as np
6+
import random
7+
import os
8+
from pathlib import Path
9+
import json
10+
import re
11+
import sys
12+
import traceback
13+
from graph_net import test_compiler_util
14+
from graph_net.paddle import utils
15+
from graph_net.paddle import test_compiler
16+
from graph_net import path_utils
17+
from graph_net import test_compiler_util
18+
19+
20+
def read_config(log_path):
21+
config = {}
22+
with open(log_path, "r", encoding="utf-8") as f:
23+
lines = f.readlines()
24+
for line in reversed(lines):
25+
if "[Processing]" in line:
26+
model_path = line.split("[Processing]")[1].strip()
27+
config["model_path"] = model_path
28+
if "[Config]" in line:
29+
config_line = line.split("[Config]")[1].strip()
30+
key, value = config_line.split(": ")
31+
config[key.strip()] = value.strip()
32+
return config
33+
34+
35+
def read_time_stats(log_path):
36+
with open(log_path, "r", encoding="utf-8") as f:
37+
lines = f.readlines()
38+
for line in reversed(lines):
39+
if "[Performance][eager]" in line:
40+
start = line.find("{")
41+
end = line.rfind("}")
42+
time_stats = json.loads(line[start : end + 1])
43+
return time_stats
44+
45+
46+
def test_single_model(args):
47+
compiler = test_compiler.get_compiler_backend(args)
48+
test_compiler.check_and_print_gpu_utilization(compiler)
49+
50+
input_dict = test_compiler.get_input_dict(args.model_path)
51+
model = test_compiler.get_model(args.model_path)
52+
model.eval()
53+
54+
test_compiler_util.print_basic_config(
55+
args,
56+
test_compiler.get_hardward_name(args),
57+
test_compiler.get_compile_framework_version(args),
58+
)
59+
60+
success = False
61+
time_stats = {}
62+
try:
63+
input_spec = test_compiler.get_input_spec(args.model_path)
64+
compiled_model = compiler(model, input_spec)
65+
outputs, time_stats = test_compiler.measure_performance(
66+
lambda: compiled_model(**input_dict), args, compiler, profile=False
67+
)
68+
success = True
69+
except Exception as e:
70+
print(
71+
f"Run model failed: {str(e)}\n{traceback.format_exc()}",
72+
file=sys.stderr,
73+
flush=True,
74+
)
75+
76+
test_compiler_util.print_running_status(args, success)
77+
78+
model_name = test_compiler_util.get_model_name(args.model_path)
79+
if test_compiler_util.get_subgraph_tag(args.model_path):
80+
model_name += "_" + test_compiler_util.get_subgraph_tag(args.model_path)
81+
82+
ref_dump = Path(args.reference_dir) / f"{model_name}.pdout"
83+
ref_log = Path(args.reference_dir) / f"{model_name}.log"
84+
ref_out = paddle.load(str(ref_dump))
85+
ref_time_stats = read_time_stats(ref_log)
86+
87+
if success:
88+
test_compiler.check_outputs(args, ref_out, outputs)
89+
90+
test_compiler_util.print_times_and_speedup(args, ref_time_stats, time_stats)
91+
92+
return 0
93+
94+
95+
def find_log_files(directory):
96+
for root, _, files in os.walk(directory):
97+
for file in files:
98+
if file.endswith(".log"):
99+
yield os.path.join(root, file)
100+
101+
102+
def main(args):
103+
assert os.path.isdir(args.reference_dir)
104+
105+
sample_idx = 0
106+
failed_samples = []
107+
108+
for log_file in find_log_files(args.reference_dir):
109+
config = read_config(log_file)
110+
model_path = config.get("model_path")
111+
vars(args)["model_path"] = model_path
112+
vars(args)["compiler"] = config.get("compiler")
113+
vars(args)["trials"] = int(config.get("trials"))
114+
vars(args)["warmup"] = int(config.get("warmup"))
115+
test_compiler.set_seed(random_seed=int(config.get("seed")))
116+
117+
print(
118+
f"[{sample_idx}] test_device, model_path: {model_path}",
119+
file=sys.stderr,
120+
flush=True,
121+
)
122+
if test_single_model(args) != 0:
123+
failed_samples.append(model_path)
124+
sample_idx += 1
125+
126+
print(
127+
f"Totally {sample_idx} verified samples, failed {len(failed_samples)} samples.",
128+
file=sys.stderr,
129+
flush=True,
130+
)
131+
for model_path in failed_samples:
132+
print(f"- {model_path}", file=sys.stderr, flush=True)
133+
134+
135+
if __name__ == "__main__":
136+
parser = argparse.ArgumentParser(description="Test compiler performance.")
137+
parser.add_argument(
138+
"--reference-dir",
139+
type=str,
140+
required=True,
141+
help="Directory to load reference stats log and outputs",
142+
)
143+
parser.add_argument(
144+
"--device",
145+
type=str,
146+
required=False,
147+
default="cuda",
148+
help="Device for testing the compiler (e.g., 'cpu' or 'cuda')",
149+
)
150+
parser.add_argument(
151+
"--log-prompt",
152+
type=str,
153+
required=False,
154+
default="graph-net-test-device-log",
155+
help="Log prompt for performance log filtering.",
156+
)
157+
args = parser.parse_args()
158+
main(args=args)

0 commit comments

Comments
 (0)