[ Feature Enhancement ]Add support to test devices on Paddle (#330)

JewelRoam · web-flow · commit 5b9272a8e967 · 2025-11-06T15:00:43.000+08:00
* Add support to test devices on Paddle

* Update

* Merge

* test_device

* update

* fix
diff --git a/graph_net/paddle/test_reference_device.py b/graph_net/paddle/test_reference_device.py
@@ -0,0 +1,186 @@
+import argparse
+import importlib.util
+import paddle
+import time
+import numpy as np
+import random
+import os
+from pathlib import Path
+from contextlib import redirect_stdout, redirect_stderr
+import json
+import re
+import sys
+import traceback
+from graph_net import test_compiler_util
+from graph_net.paddle import utils
+from graph_net.paddle import test_compiler
+from graph_net import path_utils
+from graph_net import test_compiler_util
+
+
+def test_single_model(args):
+    model_name = test_compiler_util.get_model_name(args.model_path)
+    if test_compiler_util.get_subgraph_tag(args.model_path):
+        model_name += "_" + test_compiler_util.get_subgraph_tag(args.model_path)
+    ref_log = Path(args.reference_dir) / f"{model_name}.log"
+    with open(ref_log, "w", encoding="utf-8") as log_f:
+        with redirect_stdout(log_f), redirect_stderr(log_f):
+            compiler = test_compiler.get_compiler_backend(args)
+            test_compiler.check_and_print_gpu_utilization(compiler)
+
+            input_dict = test_compiler.get_input_dict(args.model_path)
+            model = test_compiler.get_model(args.model_path)
+            model.eval()
+
+            test_compiler_util.print_with_log_prompt(
+                "[Config] seed:", args.seed, args.log_prompt
+            )
+
+            test_compiler_util.print_basic_config(
+                args,
+                test_compiler.get_hardward_name(args),
+                test_compiler.get_compile_framework_version(args),
+            )
+
+            success = False
+            time_stats = {}
+            try:
+                input_spec = test_compiler.get_input_spec(args.model_path)
+                compiled_model = compiler(model, input_spec)
+                outputs, time_stats = test_compiler.measure_performance(
+                    lambda: compiled_model(**input_dict),
+                    args,
+                    compiler,
+                    profile=False,
+                )
+                success = True
+            except Exception as e:
+                print(
+                    f"Run model failed: {str(e)}\n{traceback.format_exc()}",
+                    file=sys.stderr,
+                    flush=True,
+                )
+
+            test_compiler_util.print_running_status(args, success)
+            if success:
+                ref_dump = Path(args.reference_dir) / f"{model_name}.pdout"
+                paddle.save(outputs, str(ref_dump))
+            test_compiler_util.print_with_log_prompt(
+                "[Performance][eager]:", json.dumps(time_stats), args.log_prompt
+            )
+
+
+def test_multi_models(args):
+    test_samples = test_compiler_util.get_allow_samples(args.allow_list)
+
+    sample_idx = 0
+    failed_samples = []
+    for model_path in path_utils.get_recursively_model_path(args.model_path):
+        if test_samples is None or os.path.abspath(model_path) in test_samples:
+            print(
+                f"[{sample_idx}] test_compiler, model_path: {model_path}",
+                file=sys.stderr,
+                flush=True,
+            )
+            cmd = " ".join(
+                [
+                    sys.executable,
+                    "-m graph_net.paddle.test_reference_device",
+                    f"--model-path {model_path}",
+                    f"--compiler {args.compiler}",
+                    f"--device {args.device}",
+                    f"--warmup {args.warmup}",
+                    f"--trials {args.trials}",
+                    f"--log-prompt {args.log_prompt}",
+                    f"--seed {args.seed}",
+                    f"--reference-dir {args.reference_dir}",
+                ]
+            )
+            cmd_ret = os.system(cmd)
+            # assert cmd_ret == 0, f"{cmd_ret=}, {cmd=}"
+            if cmd_ret != 0:
+                failed_samples.append(model_path)
+            sample_idx += 1
+
+    print(
+        f"Totally {sample_idx} verified samples, failed {len(failed_samples)} samples.",
+        file=sys.stderr,
+        flush=True,
+    )
+    for model_path in failed_samples:
+        print(f"- {model_path}", file=sys.stderr, flush=True)
+
+
+def main(args):
+    assert os.path.isdir(args.model_path)
+    assert args.compiler in {"cinn", "nope"}
+    assert args.device in ["cuda"]
+
+    test_compiler.set_seed(random_seed=args.seed)
+
+    ref_dump_dir = Path(args.reference_dir)
+    ref_dump_dir.mkdir(parents=True, exist_ok=True)
+
+    if path_utils.is_single_model_dir(args.model_path):
+        test_single_model(args)
+    else:
+        test_multi_models(args)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Test compiler performance.")
+    parser.add_argument(
+        "--model-path",
+        type=str,
+        required=True,
+        help="Path to model file(s), each subdirectory containing graph_net.json will be regarded as a model",
+    )
+    parser.add_argument(
+        "--compiler",
+        type=str,
+        required=False,
+        default="cinn",
+        help="Path to customized compiler python file",
+    )
+    parser.add_argument(
+        "--device",
+        type=str,
+        required=False,
+        default="cuda",
+        help="Device for testing the compiler (e.g., 'cpu' or 'cuda')",
+    )
+    parser.add_argument(
+        "--warmup", type=int, required=False, default=5, help="Number of warmup steps"
+    )
+    parser.add_argument(
+        "--trials", type=int, required=False, default=10, help="Number of timing trials"
+    )
+    parser.add_argument(
+        "--log-prompt",
+        type=str,
+        required=False,
+        default="graph-net-test-device-log",
+        help="Log prompt for performance log filtering.",
+    )
+    parser.add_argument(
+        "--allow-list",
+        type=str,
+        required=False,
+        default=None,
+        help="Path to samples list, each line contains a sample path",
+    )
+    parser.add_argument(
+        "--seed",
+        type=int,
+        required=False,
+        default=123,
+        help="Random seed (default: 123)",
+    )
+    parser.add_argument(
+        "--reference-dir",
+        type=str,
+        required=True,
+        help="Directory to save reference stats log and outputs",
+    )
+    args = parser.parse_args()
+    main(args=args)
diff --git a/graph_net/paddle/test_target_device.py b/graph_net/paddle/test_target_device.py
@@ -0,0 +1,158 @@
+import argparse
+import importlib.util
+import paddle
+import time
+import numpy as np
+import random
+import os
+from pathlib import Path
+import json
+import re
+import sys
+import traceback
+from graph_net import test_compiler_util
+from graph_net.paddle import utils
+from graph_net.paddle import test_compiler
+from graph_net import path_utils
+from graph_net import test_compiler_util
+
+
+def read_config(log_path):
+    config = {}
+    with open(log_path, "r", encoding="utf-8") as f:
+        lines = f.readlines()
+        for line in reversed(lines):
+            if "[Processing]" in line:
+                model_path = line.split("[Processing]")[1].strip()
+                config["model_path"] = model_path
+            if "[Config]" in line:
+                config_line = line.split("[Config]")[1].strip()
+                key, value = config_line.split(": ")
+                config[key.strip()] = value.strip()
+    return config
+
+
+def read_time_stats(log_path):
+    with open(log_path, "r", encoding="utf-8") as f:
+        lines = f.readlines()
+        for line in reversed(lines):
+            if "[Performance][eager]" in line:
+                start = line.find("{")
+                end = line.rfind("}")
+                time_stats = json.loads(line[start : end + 1])
+    return time_stats
+
+
+def test_single_model(args):
+    compiler = test_compiler.get_compiler_backend(args)
+    test_compiler.check_and_print_gpu_utilization(compiler)
+
+    input_dict = test_compiler.get_input_dict(args.model_path)
+    model = test_compiler.get_model(args.model_path)
+    model.eval()
+
+    test_compiler_util.print_basic_config(
+        args,
+        test_compiler.get_hardward_name(args),
+        test_compiler.get_compile_framework_version(args),
+    )
+
+    success = False
+    time_stats = {}
+    try:
+        input_spec = test_compiler.get_input_spec(args.model_path)
+        compiled_model = compiler(model, input_spec)
+        outputs, time_stats = test_compiler.measure_performance(
+            lambda: compiled_model(**input_dict), args, compiler, profile=False
+        )
+        success = True
+    except Exception as e:
+        print(
+            f"Run model failed: {str(e)}\n{traceback.format_exc()}",
+            file=sys.stderr,
+            flush=True,
+        )
+
+    test_compiler_util.print_running_status(args, success)
+
+    model_name = test_compiler_util.get_model_name(args.model_path)
+    if test_compiler_util.get_subgraph_tag(args.model_path):
+        model_name += "_" + test_compiler_util.get_subgraph_tag(args.model_path)
+
+    ref_dump = Path(args.reference_dir) / f"{model_name}.pdout"
+    ref_log = Path(args.reference_dir) / f"{model_name}.log"
+    ref_out = paddle.load(str(ref_dump))
+    ref_time_stats = read_time_stats(ref_log)
+
+    if success:
+        test_compiler.check_outputs(args, ref_out, outputs)
+
+    test_compiler_util.print_times_and_speedup(args, ref_time_stats, time_stats)
+
+    return 0
+
+
+def find_log_files(directory):
+    for root, _, files in os.walk(directory):
+        for file in files:
+            if file.endswith(".log"):
+                yield os.path.join(root, file)
+
+
+def main(args):
+    assert os.path.isdir(args.reference_dir)
+
+    sample_idx = 0
+    failed_samples = []
+
+    for log_file in find_log_files(args.reference_dir):
+        config = read_config(log_file)
+        model_path = config.get("model_path")
+        vars(args)["model_path"] = model_path
+        vars(args)["compiler"] = config.get("compiler")
+        vars(args)["trials"] = int(config.get("trials"))
+        vars(args)["warmup"] = int(config.get("warmup"))
+        test_compiler.set_seed(random_seed=int(config.get("seed")))
+
+        print(
+            f"[{sample_idx}] test_device, model_path: {model_path}",
+            file=sys.stderr,
+            flush=True,
+        )
+        if test_single_model(args) != 0:
+            failed_samples.append(model_path)
+        sample_idx += 1
+
+    print(
+        f"Totally {sample_idx} verified samples, failed {len(failed_samples)} samples.",
+        file=sys.stderr,
+        flush=True,
+    )
+    for model_path in failed_samples:
+        print(f"- {model_path}", file=sys.stderr, flush=True)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Test compiler performance.")
+    parser.add_argument(
+        "--reference-dir",
+        type=str,
+        required=True,
+        help="Directory to load reference stats log and outputs",
+    )
+    parser.add_argument(
+        "--device",
+        type=str,
+        required=False,
+        default="cuda",
+        help="Device for testing the compiler (e.g., 'cpu' or 'cuda')",
+    )
+    parser.add_argument(
+        "--log-prompt",
+        type=str,
+        required=False,
+        default="graph-net-test-device-log",
+        help="Log prompt for performance log filtering.",
+    )
+    args = parser.parse_args()
+    main(args=args)