|
1 | 1 | import os |
2 | | -import json |
3 | 2 | import re |
4 | 3 | import numpy as np |
5 | 4 | from scipy.stats import gmean |
6 | 5 | from collections import OrderedDict, defaultdict |
7 | 6 | from graph_net.config.datatype_tolerance_config import get_precision |
8 | 7 |
|
9 | 8 |
|
10 | | -def extract_speedup_data_from_subdirs(benchmark_path: str) -> dict: |
11 | | - """ |
12 | | - Reads speedup data from JSON files within each immediate subdirectory of the benchmark_path. |
13 | | - Each subdirectory is treated as a separate category. |
14 | | - Returns a dictionary mapping {subdir_name: [speedup_values]}. |
15 | | - """ |
16 | | - data_by_subdir = defaultdict(list) |
17 | | - |
18 | | - if not os.path.exists(benchmark_path): |
19 | | - print(f"Error: Path does not exist -> {benchmark_path}") |
20 | | - return {} |
21 | | - |
22 | | - try: |
23 | | - subdirs = [ |
24 | | - d |
25 | | - for d in os.listdir(benchmark_path) |
26 | | - if os.path.isdir(os.path.join(benchmark_path, d)) |
27 | | - ] |
28 | | - except FileNotFoundError: |
29 | | - print(f"Error: Benchmark path not found -> {benchmark_path}") |
30 | | - return {} |
31 | | - |
32 | | - if not subdirs: |
33 | | - print(f"Warning: No subdirectories found in -> {benchmark_path}") |
34 | | - return {} |
35 | | - |
36 | | - print(f"Found subdirectories to process: {', '.join(subdirs)}") |
37 | | - |
38 | | - for subdir_name in subdirs: |
39 | | - current_dir_path = os.path.join(benchmark_path, subdir_name) |
40 | | - # Using scan_all_folders and load_one_folder could be an alternative, |
41 | | - # but os.walk is also robust for nested directories if needed in the future. |
42 | | - for root, _, files in os.walk(current_dir_path): |
43 | | - for file in files: |
44 | | - if not file.endswith(".json"): |
45 | | - continue |
46 | | - |
47 | | - json_file = os.path.join(root, file) |
48 | | - try: |
49 | | - with open(json_file, "r") as f: |
50 | | - data = json.load(f) |
51 | | - performance = data.get("performance", {}) |
52 | | - if not performance: |
53 | | - continue |
54 | | - |
55 | | - speedup_data = performance.get("speedup") |
56 | | - if isinstance(speedup_data, dict): |
57 | | - # Prioritize 'e2e' speedup, fallback to 'gpu' |
58 | | - if "e2e" in speedup_data: |
59 | | - data_by_subdir[subdir_name].append(speedup_data["e2e"]) |
60 | | - elif "gpu" in speedup_data: |
61 | | - data_by_subdir[subdir_name].append(speedup_data["gpu"]) |
62 | | - elif isinstance(speedup_data, (float, int)): |
63 | | - data_by_subdir[subdir_name].append(speedup_data) |
64 | | - |
65 | | - except (json.JSONDecodeError, KeyError) as e: |
66 | | - print( |
67 | | - f"Warning: Failed to read or parse file -> {json_file}, Error: {e}" |
68 | | - ) |
69 | | - continue |
70 | | - |
71 | | - return data_by_subdir |
72 | | - |
73 | | - |
74 | | -def load_json_file(filepath: str) -> dict: |
75 | | - """ |
76 | | - Safely load a JSON file and return data, return an empty dictionary if loading fails. |
77 | | - """ |
78 | | - try: |
79 | | - with open(filepath, "r", encoding="utf-8") as f: |
80 | | - return json.load(f) |
81 | | - except (json.JSONDecodeError, KeyError) as e: |
82 | | - print(f" Warning: Could not process file {filepath}. Error: {e}") |
83 | | - return {} |
84 | | - |
85 | | - |
86 | 9 | def detect_sample_error_code(log_text: str) -> str: |
87 | 10 | """ |
88 | 11 | Detect the error code for a single sample from log text. |
@@ -154,8 +77,8 @@ def parse_logs_to_data(log_file: str) -> list: |
154 | 77 | Parse a structured log file generated by the benchmark script and |
155 | 78 | return a list of data dictionaries (one per model-compiler run). |
156 | 79 |
|
157 | | - This function directly parses log files without generating intermediate JSON files. |
158 | | - It automatically handles both Paddle (with subgraph) and PyTorch (without subgraph) samples. |
| 80 | + This function directly parses log files, |
| 81 | + handling both Paddle (with subgraph) and PyTorch (without subgraph) samples. |
159 | 82 |
|
160 | 83 | Args: |
161 | 84 | log_file: Path to the benchmark log file |
@@ -229,8 +152,7 @@ def parse_logs_to_data(log_file: str) -> list: |
229 | 152 | performance_match = patterns["performance"].search(line) |
230 | 153 | if performance_match: |
231 | 154 | key, value_str = performance_match.groups() |
232 | | - # The performance value is a JSON string, so we load it |
233 | | - data["performance"][key.strip()] = json.loads(value_str) |
| 155 | + data["performance"][key.strip()] = value_str.strip() |
234 | 156 | continue |
235 | 157 |
|
236 | 158 | datatype_match = patterns["datatype"].search(line) |
@@ -409,7 +331,6 @@ def get_correctness(dtype: str, t: int, correctness_data: dict, index: int) -> b |
409 | 331 | if atol == 0 and rtol == 0: |
410 | 332 | metric_key_to_check = "[equal]" |
411 | 333 | else: |
412 | | - # Use .2E format to ensure two decimal places and use uppercase E to match JSON log format |
413 | 334 | metric_key_to_check = f"[all_close_atol_{atol:.2E}_rtol_{rtol:.2E}]" |
414 | 335 |
|
415 | 336 | result = correctness_data.get(metric_key_to_check) |
|
0 commit comments