Skip to content

Commit e198704

Browse files
authored
Merge branch 'PaddlePaddle:develop' into develop
2 parents d0f236b + e56b801 commit e198704

12 files changed

+502
-335
lines changed

graph_net/paddle/test_compiler.py

Lines changed: 54 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,11 @@ def set_seed(random_seed):
4141
np.random.seed(random_seed)
4242

4343

44+
def init_env(args):
45+
if test_compiler_util.is_gpu_device(args.device):
46+
paddle.set_flags({"FLAGS_cudnn_exhaustive_search": 1})
47+
48+
4449
def get_hardward_name(args):
4550
hardware = "unknown"
4651
if test_compiler_util.is_gpu_device(args.device):
@@ -65,10 +70,8 @@ def get_hardward_name(args):
6570

6671

6772
def get_compile_framework_version(args):
68-
if args.compiler == "cinn":
73+
if args.compiler in ["cinn", "nope"]:
6974
return paddle.__version__
70-
if args.compiler == "nope":
71-
return "nope-baseline"
7275
return "unknown"
7376

7477

@@ -137,17 +140,31 @@ def measure_performance(model_call, args, compiler, profile=False):
137140
outs = model_call()
138141

139142
# Warmup runs
143+
warmup_e2e_times = []
140144
for _ in range(args.warmup):
141-
model_call()
145+
duration_box = test_compiler_util.DurationBox(-1)
146+
with test_compiler_util.naive_timer(duration_box, compiler.synchronize):
147+
model_call()
148+
warmup_e2e_times.append(duration_box.value)
142149
compiler.synchronize()
143150

151+
# Ensure the measuring time is not less than 100ms.
152+
min_trials = int(100 / np.mean(warmup_e2e_times[1:]))
153+
trials = max(args.trials, min_trials)
154+
144155
hardware_name = get_hardward_name(args)
145156
print(
146-
f"[Profiling] Using device: {args.device} {hardware_name}, warm up {args.warmup}, trials {args.trials}",
157+
f"[Profiling] Using device: {args.device} {hardware_name}, warm up {args.warmup}, trials {trials}",
147158
file=sys.stderr,
148159
flush=True,
149160
)
150161

162+
if profile:
163+
import paddle.profiler as profiler
164+
165+
p = profiler.Profiler()
166+
p.start()
167+
151168
if test_compiler_util.is_gpu_device(args.device):
152169
"""
153170
Acknowledgement: We evaluate the performance on both end-to-end and GPU-only timings,
@@ -157,9 +174,7 @@ def measure_performance(model_call, args, compiler, profile=False):
157174
e2e_times = []
158175
gpu_times = []
159176

160-
if profile:
161-
paddle.base.core.nvprof_start()
162-
for i in range(args.trials):
177+
for i in range(trials):
163178
# End-to-end timing (naive_timer)
164179
duration_box = test_compiler_util.DurationBox(-1)
165180
with test_compiler_util.naive_timer(duration_box, compiler.synchronize):
@@ -171,6 +186,9 @@ def measure_performance(model_call, args, compiler, profile=False):
171186
model_call()
172187
end_event.record()
173188

189+
if profile:
190+
p.step()
191+
174192
gpu_time_ms = start_event.elapsed_time(end_event)
175193
e2e_times.append(duration_box.value)
176194
gpu_times.append(gpu_time_ms)
@@ -179,25 +197,30 @@ def measure_performance(model_call, args, compiler, profile=False):
179197
file=sys.stderr,
180198
flush=True,
181199
)
182-
if profile:
183-
paddle.base.core.nvprof_stop()
184-
185200
stats["e2e"] = test_compiler_util.get_timing_stats(e2e_times)
186201
stats["gpu"] = test_compiler_util.get_timing_stats(gpu_times)
187202
else: # CPU or other devices
188203
e2e_times = []
189-
for i in range(args.trials):
204+
for i in range(trials):
190205
duration_box = test_compiler_util.DurationBox(-1)
191206
with test_compiler_util.naive_timer(duration_box, compiler.synchronize):
192207
model_call()
208+
209+
if profile:
210+
p.step()
211+
212+
e2e_times.append(duration_box.value)
193213
print(
194214
f"Trial {i + 1}: e2e={duration_box.value:.4f} ms",
195215
file=sys.stderr,
196216
flush=True,
197217
)
198-
e2e_times.append(duration_box.value)
199218
stats["e2e"] = test_compiler_util.get_timing_stats(e2e_times)
200219

220+
if profile:
221+
p.stop()
222+
p.summary()
223+
201224
return outs, stats
202225

203226

@@ -210,19 +233,31 @@ def check_outputs(args, expected_out, compiled_out):
210233
eager_dtypes = [None] * len(expected_out)
211234
for i, tensor in enumerate(expected_out):
212235
eager_dtypes[i] = (
213-
str(tensor.dtype).replace("paddle.", "") if tensor is not None else "none"
236+
str(tensor.dtype).replace("paddle.", "") if tensor is not None else "None"
214237
)
215238

216239
compiled_dtypes = [None] * len(compiled_out)
217240
for i, tensor in enumerate(compiled_out):
218241
compiled_dtypes[i] = (
219-
str(tensor.dtype).replace("paddle.", "") if tensor is not None else "none"
242+
str(tensor.dtype).replace("paddle.", "") if tensor is not None else "None"
220243
)
221244

222245
type_match = test_compiler_util.check_output_datatype(
223246
args, eager_dtypes, compiled_dtypes
224247
)
225248

249+
eager_shapes = [None] * len(expected_out)
250+
for i, tensor in enumerate(expected_out):
251+
eager_shapes[i] = tensor.shape if tensor is not None else None
252+
253+
compiled_shapes = [None] * len(compiled_out)
254+
for i, tensor in enumerate(compiled_out):
255+
compiled_shapes[i] = tensor.shape if tensor is not None else None
256+
257+
shape_match = test_compiler_util.check_output_shape(
258+
args, eager_shapes, compiled_shapes
259+
)
260+
226261
def transfer_to_float(origin_outputs):
227262
outputs = []
228263
for item in origin_outputs:
@@ -235,7 +270,7 @@ def transfer_to_float(origin_outputs):
235270
outputs.append(item)
236271
return outputs
237272

238-
if type_match:
273+
if type_match and shape_match:
239274
test_compiler_util.check_equal(
240275
args,
241276
expected_out,
@@ -400,17 +435,18 @@ def test_multi_models(args):
400435

401436
sample_idx = 0
402437
failed_samples = []
438+
module_name = os.path.splitext(os.path.basename(__file__))[0]
403439
for model_path in path_utils.get_recursively_model_path(args.model_path):
404440
if test_samples is None or os.path.abspath(model_path) in test_samples:
405441
print(
406-
f"[{sample_idx}] test_compiler, model_path: {model_path}",
442+
f"[{sample_idx}] {module_name}, model_path: {model_path}",
407443
file=sys.stderr,
408444
flush=True,
409445
)
410446
cmd = " ".join(
411447
[
412448
sys.executable,
413-
"-m graph_net.paddle.test_compiler",
449+
f"-m graph_net.paddle.{module_name}",
414450
f"--model-path {model_path}",
415451
f"--compiler {args.compiler}",
416452
f"--device {args.device}",

graph_net/paddle/test_reference_device.py

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,28 @@
1111
import re
1212
import sys
1313
import traceback
14-
from graph_net import test_compiler_util
15-
from graph_net.paddle import utils
16-
from graph_net.paddle import test_compiler
14+
1715
from graph_net import path_utils
1816
from graph_net import test_compiler_util
17+
from graph_net.paddle import test_compiler
18+
19+
20+
def get_reference_log_path(reference_dir, model_path):
21+
model_name = model_path.split("paddle_samples/")[-1].replace(os.sep, "_")
22+
return os.path.join(reference_dir, f"{model_name}.log")
23+
24+
25+
def get_reference_output_path(reference_dir, model_path):
26+
model_name = model_path.split("paddle_samples/")[-1].replace(os.sep, "_")
27+
return os.path.join(reference_dir, f"{model_name}.pdout")
1928

2029

2130
def test_single_model(args):
22-
model_name = test_compiler_util.get_model_name(args.model_path)
23-
if test_compiler_util.get_subgraph_tag(args.model_path):
24-
model_name += "_" + test_compiler_util.get_subgraph_tag(args.model_path)
25-
ref_log = Path(args.reference_dir) / f"{model_name}.log"
31+
ref_log = get_reference_log_path(args.reference_dir, args.model_path)
32+
ref_dump = get_reference_output_path(args.reference_dir, args.model_path)
33+
print(f"Reference log path: {ref_log}", file=sys.stderr, flush=True)
34+
print(f"Reference outputs path: {ref_dump}", file=sys.stderr, flush=True)
35+
2636
with open(ref_log, "w", encoding="utf-8") as log_f:
2737
with redirect_stdout(log_f), redirect_stderr(log_f):
2838
compiler = test_compiler.get_compiler_backend(args)
@@ -63,29 +73,33 @@ def test_single_model(args):
6373

6474
test_compiler_util.print_running_status(args, success)
6575
if success:
66-
ref_dump = Path(args.reference_dir) / f"{model_name}.pdout"
6776
paddle.save(outputs, str(ref_dump))
6877
test_compiler_util.print_with_log_prompt(
6978
"[Performance][eager]:", json.dumps(time_stats), args.log_prompt
7079
)
7180

81+
with open(ref_log, "r", encoding="utf-8") as f:
82+
content = f.read()
83+
print(content, file=sys.stderr, flush=True)
84+
7285

7386
def test_multi_models(args):
7487
test_samples = test_compiler_util.get_allow_samples(args.allow_list)
7588

7689
sample_idx = 0
7790
failed_samples = []
91+
module_name = os.path.splitext(os.path.basename(__file__))[0]
7892
for model_path in path_utils.get_recursively_model_path(args.model_path):
7993
if test_samples is None or os.path.abspath(model_path) in test_samples:
8094
print(
81-
f"[{sample_idx}] test_compiler, model_path: {model_path}",
95+
f"[{sample_idx}] {module_name}, model_path: {model_path}",
8296
file=sys.stderr,
8397
flush=True,
8498
)
8599
cmd = " ".join(
86100
[
87101
sys.executable,
88-
"-m graph_net.paddle.test_reference_device",
102+
f"-m graph_net.paddle.{module_name}",
89103
f"--model-path {model_path}",
90104
f"--compiler {args.compiler}",
91105
f"--device {args.device}",
@@ -117,6 +131,7 @@ def main(args):
117131
assert args.device in ["cuda"]
118132

119133
test_compiler.set_seed(random_seed=args.seed)
134+
test_compiler.init_env(args)
120135

121136
ref_dump_dir = Path(args.reference_dir)
122137
ref_dump_dir.mkdir(parents=True, exist_ok=True)

0 commit comments

Comments
 (0)