Update

JewelRoam · JewelRoam · commit dfddea4df4e1 · 2025-11-18T13:46:20.000+08:00
diff --git a/graph_net/analysis_util.py b/graph_net/analysis_util.py
@@ -1,88 +1,11 @@
 import os
-import json
 import re
 import numpy as np
 from scipy.stats import gmean
 from collections import OrderedDict, defaultdict
 from graph_net.config.datatype_tolerance_config import get_precision
 
 
-def extract_speedup_data_from_subdirs(benchmark_path: str) -> dict:
-    """
-    Reads speedup data from JSON files within each immediate subdirectory of the benchmark_path.
-    Each subdirectory is treated as a separate category.
-    Returns a dictionary mapping {subdir_name: [speedup_values]}.
-    """
-    data_by_subdir = defaultdict(list)
-
-    if not os.path.exists(benchmark_path):
-        print(f"Error: Path does not exist -> {benchmark_path}")
-        return {}
-
-    try:
-        subdirs = [
-            d
-            for d in os.listdir(benchmark_path)
-            if os.path.isdir(os.path.join(benchmark_path, d))
-        ]
-    except FileNotFoundError:
-        print(f"Error: Benchmark path not found -> {benchmark_path}")
-        return {}
-
-    if not subdirs:
-        print(f"Warning: No subdirectories found in -> {benchmark_path}")
-        return {}
-
-    print(f"Found subdirectories to process: {', '.join(subdirs)}")
-
-    for subdir_name in subdirs:
-        current_dir_path = os.path.join(benchmark_path, subdir_name)
-        # Using scan_all_folders and load_one_folder could be an alternative,
-        # but os.walk is also robust for nested directories if needed in the future.
-        for root, _, files in os.walk(current_dir_path):
-            for file in files:
-                if not file.endswith(".json"):
-                    continue
-
-                json_file = os.path.join(root, file)
-                try:
-                    with open(json_file, "r") as f:
-                        data = json.load(f)
-                        performance = data.get("performance", {})
-                        if not performance:
-                            continue
-
-                        speedup_data = performance.get("speedup")
-                        if isinstance(speedup_data, dict):
-                            # Prioritize 'e2e' speedup, fallback to 'gpu'
-                            if "e2e" in speedup_data:
-                                data_by_subdir[subdir_name].append(speedup_data["e2e"])
-                            elif "gpu" in speedup_data:
-                                data_by_subdir[subdir_name].append(speedup_data["gpu"])
-                        elif isinstance(speedup_data, (float, int)):
-                            data_by_subdir[subdir_name].append(speedup_data)
-
-                except (json.JSONDecodeError, KeyError) as e:
-                    print(
-                        f"Warning: Failed to read or parse file -> {json_file}, Error: {e}"
-                    )
-                    continue
-
-    return data_by_subdir
-
-
-def load_json_file(filepath: str) -> dict:
-    """
-    Safely load a JSON file and return data, return an empty dictionary if loading fails.
-    """
-    try:
-        with open(filepath, "r", encoding="utf-8") as f:
-            return json.load(f)
-    except (json.JSONDecodeError, KeyError) as e:
-        print(f"    Warning: Could not process file {filepath}. Error: {e}")
-        return {}
-
-
 def detect_sample_error_code(log_text: str) -> str:
     """
     Detect the error code for a single sample from log text.
@@ -154,8 +77,8 @@ def parse_logs_to_data(log_file: str) -> list:
     Parse a structured log file generated by the benchmark script and
     return a list of data dictionaries (one per model-compiler run).
 
-    This function directly parses log files without generating intermediate JSON files.
-    It automatically handles both Paddle (with subgraph) and PyTorch (without subgraph) samples.
+    This function directly parses log files,
+    handling both Paddle (with subgraph) and PyTorch (without subgraph) samples.
 
     Args:
         log_file: Path to the benchmark log file
@@ -229,8 +152,7 @@ def parse_logs_to_data(log_file: str) -> list:
         performance_match = patterns["performance"].search(line)
         if performance_match:
             key, value_str = performance_match.groups()
-            # The performance value is a JSON string, so we load it
-            data["performance"][key.strip()] = json.loads(value_str)
+            data["performance"][key.strip()] = value_str.strip()
             continue
 
         datatype_match = patterns["datatype"].search(line)
@@ -409,7 +331,6 @@ def get_correctness(dtype: str, t: int, correctness_data: dict, index: int) -> b
     if atol == 0 and rtol == 0:
         metric_key_to_check = "[equal]"
     else:
-        # Use .2E format to ensure two decimal places and use uppercase E to match JSON log format
         metric_key_to_check = f"[all_close_atol_{atol:.2E}_rtol_{rtol:.2E}]"
 
     result = correctness_data.get(metric_key_to_check)
diff --git a/graph_net/plot_ESt.py b/graph_net/plot_ESt.py
@@ -162,7 +162,7 @@ def get_verified_aggregated_es_values(es_scores: dict, folder_name: str) -> dict
     return verified_es_values
 
 
-def plot_ES_results(s_scores: dict, cli_args: argparse.Namespace):
+def plot_ES_results(s_scores: dict, args: argparse.Namespace):
     """
     Plot ES(t) curve
     """
@@ -179,8 +179,7 @@ def plot_ES_results(s_scores: dict, cli_args: argparse.Namespace):
         for (
             t_key,
             score_data,
-        ) in scores_dict.items():  # Change variable name to score_data
-            # Access the 'score' key from the nested dictionary
+        ) in scores_dict.items():
             if isinstance(score_data, dict):
                 score = score_data["score"]
             else:
@@ -234,8 +233,8 @@ def plot_ES_results(s_scores: dict, cli_args: argparse.Namespace):
                 markersize=6,
             )
 
-    p = cli_args.negative_speedup_penalty
-    config = f"p = {p}, b = {cli_args.fpdb}"
+    p = args.negative_speedup_penalty
+    config = f"p = {p}, b = {args.fpdb}"
     fig.text(0.5, 0.9, config, ha="center", fontsize=16, style="italic")
 
     ax.set_xlabel("t", fontsize=18)
@@ -253,51 +252,7 @@ def plot_ES_results(s_scores: dict, cli_args: argparse.Namespace):
     return fig, ax, all_x_coords
 
 
-def main():
-    """Main execution function for plotting ES(t)."""
-    parser = argparse.ArgumentParser(
-        description="Calculate and plot ES(t) scores from benchmark results.",
-        formatter_class=argparse.RawTextHelpFormatter,
-    )
-    # Add arguments (same as plot_St)
-    parser.add_argument(
-        "--benchmark-path",
-        type=str,
-        required=True,
-        help="Path to the benchmark log file or directory containing benchmark JSON files or sub-folders.",
-    )
-    parser.add_argument(
-        "--output-dir",
-        type=str,
-        default="analysis_results",
-        help="Output directory for saving the plot. Default: analysis_results",
-    )
-    parser.add_argument(
-        "--negative-speedup-penalty",
-        type=float,
-        default=0.0,
-        help="Penalty power (p) for negative speedup. Formula: speedup**(p+1). Default: 0.0.",
-    )
-    parser.add_argument(
-        "--fpdb",
-        type=float,
-        default=0.1,
-        help="Base penalty for severe errors (e.g., crashes, correctness failures).",
-    )
-    parser.add_argument(
-        "--enable-aggregation-mode",
-        action="store_true",
-        help="Enable aggregation mode to verify aggregated/microscopic consistency. Default: enabled.",
-    )
-    parser.add_argument(
-        "--disable-aggregation-mode",
-        dest="enable_aggregation_mode",
-        action="store_false",
-        help="Disable aggregation mode verification.",
-    )
-    parser.set_defaults(enable_aggregation_mode=True)
-    args = parser.parse_args()
-
+def main(args):
     # 1. Scan folders to get data
     all_results = analysis_util.scan_all_folders(args.benchmark_path)
     if not all_results:
@@ -433,4 +388,45 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
+    parser = argparse.ArgumentParser(
+        description="Calculate and plot ES(t) scores from benchmark results.",
+        formatter_class=argparse.RawTextHelpFormatter,
+    )
+    parser.add_argument(
+        "--benchmark-path",
+        type=str,
+        required=True,
+        help="Path to the benchmark log file or directory containing benchmark JSON files or sub-folders.",
+    )
+    parser.add_argument(
+        "--output-dir",
+        type=str,
+        default="analysis_results",
+        help="Output directory for saving the plot. Default: analysis_results",
+    )
+    parser.add_argument(
+        "--negative-speedup-penalty",
+        type=float,
+        default=0.0,
+        help="Penalty power (p) for negative speedup. Formula: speedup**(p+1). Default: 0.0.",
+    )
+    parser.add_argument(
+        "--fpdb",
+        type=float,
+        default=0.1,
+        help="Base penalty for severe errors (e.g., crashes, correctness failures).",
+    )
+    parser.add_argument(
+        "--enable-aggregation-mode",
+        action="store_true",
+        help="Enable aggregation mode to verify aggregated/microscopic consistency. Default: enabled.",
+    )
+    parser.add_argument(
+        "--disable-aggregation-mode",
+        dest="enable_aggregation_mode",
+        action="store_false",
+        help="Disable aggregation mode verification.",
+    )
+    parser.set_defaults(enable_aggregation_mode=True)
+    args = parser.parse_args()
+    main(args)