PaddlePaddle
diff --git a/‎README.md‎
Lines changed: 19 additions & 12 deletions b/‎README.md‎
Lines changed: 19 additions & 12 deletions
diff --git a/‎graph_net/benchmark_demo.sh‎
Lines changed: 0 additions & 40 deletions b/‎graph_net/benchmark_demo.sh‎
Lines changed: 0 additions & 40 deletions
diff --git a/‎graph_net/torch/log2json.py‎
Lines changed: 164 additions & 0 deletions b/‎graph_net/torch/log2json.py‎
Lines changed: 164 additions & 0 deletions
@@ -84,24 +84,20 @@ All the **construction constraints** will be examined automatically. After passi
 
 **Step 1: Benchmark**
 
-We use `graph_net/benchmark_demo.sh` to benchmark GraphNet computation graph samples:
+We use `graph_net.torch.test_compiler` to benchmark GraphNet samples with specific batch and log configurations:
 
 ```bash
-bash graph_net/benchmark_demo.sh &
-```
-
-The script runs `graph_net.torch.test_compiler` with specific batch and log configurations.
-
-Or you can customize and use `graph_net.torch.test_compiler` yourself:
+# Set your benchmark directory
+export GRAPH_NET_BENCHMARK_PATH=/home/yourname/graphnet_benchmark/
 
-```bash
+# Run benchmark
 python -m graph_net.torch.test_compiler \
   --model-path $GRAPH_NET_EXTRACT_WORKSPACE/model_name/ \
   --compiler /custom/or/builtin/compiler/ \
+  --device /device/to/execute/ \
   --warmup /times/to/warmup/ \
   --trials /times/to/test/ \
-  --device /device/to/execute/ \
-  --output-dir /path/to/save/JSON/result/file/
+  > $GRAPH_NET_BENCHMARK_PATH/log.log 2>&1
 
 # Note: if --compiler is omitted, PyTorch’s built-in compiler is used by default
 ```
@@ -110,9 +106,20 @@ After executing, `graph_net.torch.test_compiler` will:
 1. Running the original model in eager mode to record a baseline.
 2. Compiling the model with the specified backend (e.g., CINN, TVM, Inductor, TensorRT, XLA, BladeDISC).
 3. Executing the compiled model and collecting its runtime and outputs.
-4. Conduct speedup by comparing the compiled results against the baseline.
+4. Conduct speedup by comparing the compiled results against the baseline (if no execution failure occurs).
+
+**Step 2: Generate JSON Record**
+
+This step is to extract information (including failure) from logs in benchmark.
+All the information will be saved to multiple `model_compiler.json` files via:
+
+```bash
+python -m graph_net.torch.log2json \
+  --log-file $GRAPH_NET_BENCHMARK_PATH/log.log \
+  --output-dir $GRAPH_NET_BENCHMARK_PATH
+```
 
-**Step 2: Analysis**
+**Step 3: Analysis**
 
 After processing, we provide `graph_net/analysis.py` to generate [violin plot](https://en.m.wikipedia.org/wiki/Violin_plot) based on the JSON results.
 
 
@@ -0,0 +1,164 @@
+import argparse
+import json
+import os
+import re
+from collections import defaultdict
+
+
+def parse_logs_to_json(log_file: str, output_dir: str):
+    """
+    Parses a structured log file generated by the benchmark script and
+    creates a separate JSON report for each model-compiler run.
+    """
+    try:
+        with open(log_file, "r", encoding="utf-8") as f:
+            lines = f.readlines()
+    except FileNotFoundError:
+        print(f"Error: Log file not found at '{log_file}'")
+        return
+    except Exception as e:
+        print(f"Error reading log file: {e}")
+        return
+
+    # This dictionary will hold the parsed data for all runs found in the log file.
+    # Key: The model path from the '[Processing]' line, which is unique per run.
+    # Value: The dictionary that will be converted to JSON.
+    all_runs_data = {}
+    current_run_key = None
+
+    # Define regex patterns for each type of log line we need to parse.
+    patterns = {
+        "processing": re.compile(r"\[Processing\] (.+)"),
+        "config": re.compile(r"\[Config\] (\S+): (.+)"),
+        "performance": re.compile(r"\[Performance\]\[(\w+)\]: (.+)"),
+        "datatype": re.compile(r"\[Datatype\]\[(\w+)\]: (.+)"),
+        "correctness": re.compile(r"\[Correctness\](\[.+\]): (.+)"),
+        "result_status": re.compile(r"\[Result\] status: (.+)"),
+        "speedup": re.compile(r"\[Speedup\]\[(\w+)\]: (.+)"),
+    }
+
+    for line in lines:
+        # Check for the start of a new model run
+        processing_match = patterns["processing"].search(line)
+        if processing_match:
+            current_run_key = processing_match.group(1).strip()
+            # Initialize a nested dictionary structure for this new run
+            all_runs_data[current_run_key] = {
+                "configuration": {},
+                "correctness": {},
+                "performance": {
+                    "eager": {},
+                    "compiled": {},
+                    "datatype": {},
+                    "speedup": {},
+                },
+            }
+            continue
+
+        # If we haven't identified a run yet, skip the line
+        if not current_run_key:
+            continue
+
+        # Get the data dictionary for the current run
+        data = all_runs_data[current_run_key]
+
+        # Try to match other patterns
+        config_match = patterns["config"].search(line)
+        if config_match:
+            key, value = config_match.groups()
+            data["configuration"][key.strip()] = value.strip()
+            continue
+
+        performance_match = patterns["performance"].search(line)
+        if performance_match:
+            key, value_str = performance_match.groups()
+            # The performance value is a JSON string, so we load it
+            data["performance"][key.strip()] = json.loads(value_str)
+            continue
+
+        datatype_match = patterns["datatype"].search(line)
+        if datatype_match:
+            key, value_str = datatype_match.groups()
+            # The datatype value is a space-separated string
+            data["performance"]["datatype"][key.strip()] = value_str.strip().split()
+            continue
+
+        correctness_match = patterns["correctness"].search(line)
+        if correctness_match:
+            key, value_str = correctness_match.groups()
+            values = []
+            for v in value_str.strip().split():
+                try:
+                    # Try to convert to int if it's a whole number, else float
+                    values.append(int(v) if "." not in v else float(v))
+                except ValueError:
+                    # Handle non-numeric values like 'nan'
+                    values.append(float(v))
+            data["correctness"][key.strip()] = values
+            continue
+
+        result_status_match = patterns["result_status"].search(line)
+        if result_status_match:
+            status = result_status_match.group(1).strip()
+            if status == "failed":
+                data["performance"]["failure"] = "True"
+            continue
+
+        speedup_match = patterns["speedup"].search(line)
+        if speedup_match:
+            key, value_str = speedup_match.groups()
+            data["performance"]["speedup"][key.strip()] = float(value_str)
+            continue
+
+    # After parsing all lines, write the results to JSON files
+    if not all_runs_data:
+        print("No processable log entries found in the file.")
+        return
+
+    os.makedirs(output_dir, exist_ok=True)
+
+    for run_key, data in all_runs_data.items():
+        try:
+            model_name = data["configuration"]["model"]
+            compiler_name = data["configuration"]["compiler"]
+            filename = f"{model_name}_{compiler_name}.json"
+            filepath = os.path.join(output_dir, filename)
+
+            with open(filepath, "w", encoding="utf-8") as f:
+                json.dump(data, f, indent=4)
+
+            print(f"Successfully generated report: {filepath}")
+
+        except KeyError as e:
+            print(
+                f"Warning: Could not generate report for '{run_key}' due to missing key: {e}"
+            )
+        except Exception as e:
+            print(
+                f"Warning: An unexpected error occurred while writing report for '{run_key}': {e}"
+            )
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Convert benchmark logs to JSON reports.",
+        formatter_class=argparse.RawTextHelpFormatter,
+    )
+    parser.add_argument(
+        "--log-file",
+        type=str,
+        required=True,
+        help="Path to the benchmark log file generated by test_compiler.py.",
+    )
+    parser.add_argument(
+        "--output-dir",
+        type=str,
+        required=True,
+        help="Directory to save the structured JSON result files.",
+    )
+    args = parser.parse_args()
+    parse_logs_to_json(args.log_file, args.output_dir)
+
+
+if __name__ == "__main__":
+    main()