PaddlePaddle
diff --git a/‎graph_net/analysis_util.py‎
Lines changed: 3 additions & 82 deletions b/‎graph_net/analysis_util.py‎
Lines changed: 3 additions & 82 deletions
diff --git a/‎graph_net/log2json.py‎
Lines changed: 0 additions & 202 deletions b/‎graph_net/log2json.py‎
Lines changed: 0 additions & 202 deletions
@@ -1,88 +1,11 @@
 import os
-import json
 import re
 import numpy as np
 from scipy.stats import gmean
 from collections import OrderedDict, defaultdict
 from graph_net.config.datatype_tolerance_config import get_precision
 
 
-def extract_speedup_data_from_subdirs(benchmark_path: str) -> dict:
-    """
-    Reads speedup data from JSON files within each immediate subdirectory of the benchmark_path.
-    Each subdirectory is treated as a separate category.
-    Returns a dictionary mapping {subdir_name: [speedup_values]}.
-    """
-    data_by_subdir = defaultdict(list)
-
-    if not os.path.exists(benchmark_path):
-        print(f"Error: Path does not exist -> {benchmark_path}")
-        return {}
-
-    try:
-        subdirs = [
-            d
-            for d in os.listdir(benchmark_path)
-            if os.path.isdir(os.path.join(benchmark_path, d))
-        ]
-    except FileNotFoundError:
-        print(f"Error: Benchmark path not found -> {benchmark_path}")
-        return {}
-
-    if not subdirs:
-        print(f"Warning: No subdirectories found in -> {benchmark_path}")
-        return {}
-
-    print(f"Found subdirectories to process: {', '.join(subdirs)}")
-
-    for subdir_name in subdirs:
-        current_dir_path = os.path.join(benchmark_path, subdir_name)
-        # Using scan_all_folders and load_one_folder could be an alternative,
-        # but os.walk is also robust for nested directories if needed in the future.
-        for root, _, files in os.walk(current_dir_path):
-            for file in files:
-                if not file.endswith(".json"):
-                    continue
-
-                json_file = os.path.join(root, file)
-                try:
-                    with open(json_file, "r") as f:
-                        data = json.load(f)
-                        performance = data.get("performance", {})
-                        if not performance:
-                            continue
-
-                        speedup_data = performance.get("speedup")
-                        if isinstance(speedup_data, dict):
-                            # Prioritize 'e2e' speedup, fallback to 'gpu'
-                            if "e2e" in speedup_data:
-                                data_by_subdir[subdir_name].append(speedup_data["e2e"])
-                            elif "gpu" in speedup_data:
-                                data_by_subdir[subdir_name].append(speedup_data["gpu"])
-                        elif isinstance(speedup_data, (float, int)):
-                            data_by_subdir[subdir_name].append(speedup_data)
-
-                except (json.JSONDecodeError, KeyError) as e:
-                    print(
-                        f"Warning: Failed to read or parse file -> {json_file}, Error: {e}"
-                    )
-                    continue
-
-    return data_by_subdir
-
-
-def load_json_file(filepath: str) -> dict:
-    """
-    Safely load a JSON file and return data, return an empty dictionary if loading fails.
-    """
-    try:
-        with open(filepath, "r", encoding="utf-8") as f:
-            return json.load(f)
-    except (json.JSONDecodeError, KeyError) as e:
-        print(f"    Warning: Could not process file {filepath}. Error: {e}")
-        return {}
-
-
 def detect_sample_error_code(log_text: str) -> str:
     """
     Detect the error code for a single sample from log text.
@@ -154,8 +77,8 @@ def parse_logs_to_data(log_file: str) -> list:
     Parse a structured log file generated by the benchmark script and
     return a list of data dictionaries (one per model-compiler run).
 
-    This function directly parses log files without generating intermediate JSON files.
-    It automatically handles both Paddle (with subgraph) and PyTorch (without subgraph) samples.
+    This function directly parses log files,
+    handling both Paddle (with subgraph) and PyTorch (without subgraph) samples.
 
     Args:
         log_file: Path to the benchmark log file
@@ -229,8 +152,7 @@ def parse_logs_to_data(log_file: str) -> list:
         performance_match = patterns["performance"].search(line)
         if performance_match:
             key, value_str = performance_match.groups()
-            # The performance value is a JSON string, so we load it
-            data["performance"][key.strip()] = json.loads(value_str)
+            data["performance"][key.strip()] = value_str.strip()
             continue
 
         datatype_match = patterns["datatype"].search(line)
@@ -409,7 +331,6 @@ def get_correctness(dtype: str, t: int, correctness_data: dict, index: int) -> b
     if atol == 0 and rtol == 0:
         metric_key_to_check = "[equal]"
     else:
-        # Use .2E format to ensure two decimal places and use uppercase E to match JSON log format
         metric_key_to_check = f"[all_close_atol_{atol:.2E}_rtol_{rtol:.2E}]"
 
     result = correctness_data.get(metric_key_to_check)