Skip to content

Commit ee9cb27

Browse files
committed
update analysis
1 parent 1ecdcdd commit ee9cb27

File tree

1 file changed

+196
-134
lines changed

1 file changed

+196
-134
lines changed

graph_net/analysis.py

Lines changed: 196 additions & 134 deletions
Original file line numberDiff line numberDiff line change
@@ -5,135 +5,90 @@
55
import argparse
66
import os
77
import json
8-
import re
9-
10-
11-
def read_speedups_from_log(log_file):
12-
speedups = []
13-
try:
14-
with open(log_file, "r") as f:
15-
for line in f:
16-
match = re.search(
17-
r"duration.*eager:\s*(\d+\.?\d*).*compiled:\s*(\d+\.?\d*)", line
18-
)
19-
if match:
20-
eager_time = float(match.group(1))
21-
compiled_time = float(match.group(2))
22-
if compiled_time > 0:
23-
speedups.append(eager_time / compiled_time)
24-
except FileNotFoundError:
25-
print(f"Error: Log file not found -> {log_file}")
26-
return []
27-
return speedups
28-
29-
30-
def read_speedups_from_json(benchmark_path):
31-
speedups = []
8+
from collections import defaultdict
9+
10+
11+
def parse_filename(filename):
12+
"""
13+
Parses the model name and compiler name from a JSON filename.
14+
Assumes filename format: <model_name>_<compiler_name>.json
15+
"""
16+
parts = os.path.splitext(filename)[0].split("_")
17+
if len(parts) < 2:
18+
return None, None
19+
compiler = parts[-1]
20+
model = "_".join(parts[:-1])
21+
return model, compiler
22+
23+
24+
def read_all_speedups(benchmark_path):
25+
"""
26+
Recursively finds all .json files in a given path, extracts the speedup values,
27+
and organizes them by compiler and category (subdirectory).
28+
"""
29+
data_by_compiler_category = defaultdict(lambda: defaultdict(list))
30+
all_compilers = set()
31+
3232
if not os.path.exists(benchmark_path):
3333
print(f"Error: Path does not exist -> {benchmark_path}")
34-
return []
35-
36-
try:
37-
for root, _, files in os.walk(benchmark_path):
38-
for file in files:
39-
if file.endswith(".json"):
40-
json_file = os.path.join(root, file)
41-
try:
42-
with open(json_file, "r") as f:
43-
data = json.load(f)
44-
if (
45-
"performance" in data
46-
and "speedup" in data["performance"]
47-
):
48-
speedups.append(data["performance"]["speedup"])
49-
else:
50-
print(
51-
f"Warning: Invalid JSON format (missing 'performance.speedup') -> {json_file}"
52-
)
53-
except json.JSONDecodeError:
54-
print(f"Error: Invalid JSON file -> {json_file}")
55-
continue
56-
except Exception as e:
57-
print(f"Unexpected error: {str(e)}")
58-
return []
34+
return {}, []
5935

60-
return speedups
36+
for root, _, files in os.walk(benchmark_path):
37+
for file in files:
38+
if file.endswith(".json"):
39+
_, compiler = parse_filename(file)
40+
if not compiler:
41+
continue
6142

43+
all_compilers.add(compiler)
6244

63-
def analysis(args):
64-
compilers = ["CINN", "torch.inductor", "tvm", "XLA", "TensorRT", "BladeDISC"]
65-
num_samples_per_compiler = 200
66-
data = {"Compiler": [], "log2(speedup)": []}
67-
68-
# A: CINN (Simulate)
69-
# data["log2(speedup)"].extend(
70-
# np.random.normal(loc=0.35, scale=0.2, size=num_samples_per_compiler)
71-
# )
72-
# data["Compiler"].extend(["CINN"] * num_samples_per_compiler)
73-
74-
# B: torch.inductor
75-
# inductor_log = os.path.join(args.test_compiler_log_file)
76-
# inductor_speedup = read_speedups_from_log(inductor_log)
77-
inductor_speedup = read_speedups_from_json(args.benchmark_path)
78-
print(f"Find {len(inductor_speedup)} samples.")
79-
log2_speedups = np.log2(inductor_speedup)
80-
81-
mask = log2_speedups <= 2
82-
filtered_log2_speedups = log2_speedups[mask]
83-
filtered_count = len(filtered_log2_speedups)
84-
print(
85-
f"After filtering, {filtered_count} samples remain (removed {len(log2_speedups) - filtered_count} outliers)."
86-
)
45+
category = os.path.relpath(root, benchmark_path)
46+
if category == ".":
47+
category = os.path.basename(benchmark_path)
8748

88-
data["log2(speedup)"].extend(filtered_log2_speedups)
89-
data["Compiler"].extend(["torch.inductor"] * len(filtered_log2_speedups))
90-
# data["log2(speedup)"].extend(log2_speedups)
91-
# data["Compiler"].extend(["torch.inductor"] * len(log2_speedups))
92-
93-
# C: tvm (Simulate)
94-
# data["log2(speedup)"].extend(
95-
# np.random.normal(loc=0.3, scale=0.15, size=num_samples_per_compiler)
96-
# )
97-
# data["Compiler"].extend(["tvm"] * num_samples_per_compiler)
98-
99-
# D: XLA (Simulate)
100-
# data["log2(speedup)"].extend(
101-
# np.concatenate(
102-
# [
103-
# np.random.normal(
104-
# loc=-0.5, scale=0.1, size=int(num_samples_per_compiler * 0.6)
105-
# ),
106-
# np.random.normal(
107-
# loc=0.2, scale=0.2, size=int(num_samples_per_compiler * 0.4)
108-
# ),
109-
# ]
110-
# )
111-
# )
112-
# data["Compiler"].extend(["XLA"] * num_samples_per_compiler)
113-
114-
# E: TensorRT (Simulate)
115-
# data["log2(speedup)"].extend(
116-
# np.random.normal(loc=0.5, scale=0.1, size=num_samples_per_compiler)
117-
# )
118-
# data["Compiler"].extend(["TensorRT"] * num_samples_per_compiler)
119-
120-
# F: BladeDISC (Simulate)
121-
# data["log2(speedup)"].extend(
122-
# np.random.normal(loc=0.05, scale=0.3, size=num_samples_per_compiler)
123-
# )
124-
# data["Compiler"].extend(["BladeDISC"] * num_samples_per_compiler)
125-
126-
df = pd.DataFrame(data)
127-
df["Compiler"] = pd.Categorical(df["Compiler"], categories=compilers, ordered=True)
49+
json_file = os.path.join(root, file)
50+
try:
51+
with open(json_file, "r") as f:
52+
data = json.load(f)
53+
speedup_data = data.get("performance", {}).get("speedup")
12854

55+
if isinstance(speedup_data, dict):
56+
# Handle new format with 'e2e' and 'gpu' keys
57+
if "e2e" in speedup_data:
58+
data_by_compiler_category[compiler][category].append(
59+
speedup_data["e2e"]
60+
)
61+
elif "gpu" in speedup_data:
62+
data_by_compiler_category[compiler][category].append(
63+
speedup_data["gpu"]
64+
)
65+
elif isinstance(speedup_data, float):
66+
# Handle old format where speedup is just a number
67+
data_by_compiler_category[compiler][category].append(
68+
speedup_data
69+
)
70+
71+
except (json.JSONDecodeError, KeyError) as e:
72+
print(
73+
f"Warning: Failed to read or parse file -> {json_file}, Error: {e}"
74+
)
75+
continue
76+
77+
return data_by_compiler_category, sorted(list(all_compilers))
78+
79+
80+
def plot_summary_comparison(df, all_compilers, output_dir):
81+
"""
82+
Generates a summary plot comparing the overall performance of all compilers.
83+
"""
84+
plt.figure(figsize=(12, 7))
12985
sns.set_theme(style="whitegrid")
130-
plt.figure(figsize=(10, 6))
13186

13287
ax = sns.violinplot(
13388
x="Compiler",
13489
y="log2(speedup)",
13590
data=df,
136-
order=compilers,
91+
order=all_compilers,
13792
color="white",
13893
linewidth=0.8,
13994
inner=None,
@@ -143,7 +98,7 @@ def analysis(args):
14398
x="Compiler",
14499
y="log2(speedup)",
145100
data=df,
146-
order=compilers,
101+
order=all_compilers,
147102
showcaps=False,
148103
boxprops={"facecolor": "royalblue", "edgecolor": "black"},
149104
medianprops={"color": "white", "linewidth": 2},
@@ -153,15 +108,128 @@ def analysis(args):
153108
ax=ax,
154109
)
155110

111+
sample_counts = df["Compiler"].value_counts().to_dict()
112+
x_labels = [
113+
f"{compiler}\n({sample_counts.get(compiler, 0)} samples)"
114+
for compiler in all_compilers
115+
]
116+
156117
ax.set_ylabel("log2(speedup)", fontsize=14)
157118
ax.set_xlabel("")
158-
x_labels = [f"{chr(65+i)}\n{compiler}" for i, compiler in enumerate(compilers)]
159-
ax.set_xticks(ticks=range(len(x_labels)), labels=x_labels, fontsize=12)
160-
ax.tick_params(axis="y", colors="black")
119+
ax.set_xticks(ticks=range(len(x_labels)))
120+
ax.set_xticklabels(x_labels, rotation=45, ha="right", fontsize=11)
121+
ax.set_title("Overall Compiler Performance Comparison", fontsize=16)
122+
161123
sns.despine(trim=True, left=True)
162124

163-
plt.savefig(args.output_file, dpi=300, bbox_inches="tight")
164-
print(f"Figure saved to {args.output_file}")
125+
output_file = os.path.join(output_dir, "summary_speedup_comparison.png")
126+
plt.savefig(output_file, dpi=300, bbox_inches="tight")
127+
print(f"\nSummary comparison plot saved to: {output_file}")
128+
plt.close()
129+
130+
131+
def plot_per_compiler_detail(df_all, compiler_name, output_dir):
132+
"""
133+
Generates a detailed plot for a single compiler, showing its performance across different categories.
134+
"""
135+
df_compiler = df_all[df_all["Compiler"] == compiler_name]
136+
if df_compiler.empty:
137+
print(
138+
f"Warning: No valid data found for compiler '{compiler_name}'. Skipping detailed plot."
139+
)
140+
return
141+
142+
categories = sorted(df_compiler["Category"].unique())
143+
144+
plt.figure(figsize=(10, 6))
145+
sns.set_theme(style="whitegrid")
146+
147+
ax = sns.violinplot(
148+
x="Category",
149+
y="log2(speedup)",
150+
data=df_compiler,
151+
order=categories,
152+
color="white",
153+
linewidth=0.8,
154+
inner=None,
155+
)
156+
157+
sns.boxplot(
158+
x="Category",
159+
y="log2(speedup)",
160+
data=df_compiler,
161+
order=categories,
162+
showcaps=False,
163+
boxprops={"facecolor": "royalblue", "edgecolor": "black"},
164+
medianprops={"color": "white", "linewidth": 2},
165+
whiskerprops={"color": "black", "linewidth": 1.5},
166+
flierprops={"marker": ".", "markerfacecolor": "black"},
167+
width=0.1,
168+
ax=ax,
169+
)
170+
171+
sample_counts = df_compiler["Category"].value_counts().to_dict()
172+
# Use os.path.basename to get only the package name from the path
173+
x_labels = [
174+
f"{os.path.basename(cat)}\n(n={sample_counts.get(cat, 0)})"
175+
for cat in categories
176+
]
177+
178+
ax.set_ylabel("log2(speedup)", fontsize=14)
179+
ax.set_xlabel("")
180+
ax.set_xticks(ticks=range(len(x_labels)))
181+
ax.set_xticklabels(x_labels, rotation=45, ha="right", fontsize=11)
182+
# Add the benchmark path to the title
183+
ax.set_title(f"Speedup for {compiler_name} by Categories", fontsize=16)
184+
185+
sns.despine(trim=True, left=True)
186+
187+
output_file = os.path.join(output_dir, f"{compiler_name}_speedup_by_category.png")
188+
plt.savefig(output_file, dpi=300, bbox_inches="tight")
189+
print(f"Detailed plot for '{compiler_name}' saved to: {output_file}")
190+
plt.close()
191+
192+
193+
def analysis(args):
194+
data_by_compiler_category, all_compilers = read_all_speedups(args.benchmark_path)
195+
196+
if not data_by_compiler_category:
197+
print("Error: No valid benchmark data found.")
198+
return
199+
200+
print(f"\nDiscovered compilers: {all_compilers}")
201+
202+
# Prepare data for DataFrame
203+
plot_data = {"Compiler": [], "Category": [], "log2(speedup)": []}
204+
205+
for compiler, categories_data in data_by_compiler_category.items():
206+
for category, speedups in categories_data.items():
207+
if not speedups:
208+
continue
209+
210+
speedups_array = np.array(speedups)
211+
# Filter out non-positive values before taking the logarithm
212+
log2_speedups = np.log2(speedups_array[speedups_array > 0])
213+
214+
plot_data["log2(speedup)"].extend(log2_speedups)
215+
plot_data["Compiler"].extend([compiler] * len(log2_speedups))
216+
plot_data["Category"].extend([category] * len(log2_speedups))
217+
218+
df_all = pd.DataFrame(plot_data)
219+
220+
if df_all.empty:
221+
print("Error: No valid data available for plotting after processing.")
222+
return
223+
224+
# Create the output directory
225+
os.makedirs(args.output_dir, exist_ok=True)
226+
227+
# 1. Generate the summary comparison plot
228+
plot_summary_comparison(df_all, all_compilers, args.output_dir)
229+
230+
# 2. Generate a detailed plot for each compiler
231+
for compiler in all_compilers:
232+
plot_per_compiler_detail(df_all, compiler, args.output_dir)
165233

166234

167235
def main(args):
@@ -170,25 +238,19 @@ def main(args):
170238

171239
if __name__ == "__main__":
172240
parser = argparse.ArgumentParser(
173-
description="Analyse speedup from different compile frameworks/hardware types"
241+
description="Analyze speedup from different compile frameworks/hardware types and generate plots."
174242
)
175243
parser.add_argument(
176244
"--benchmark-path",
177245
type=str,
178246
required=True,
179-
help="Path include multiple benchmark results from test_compiler",
180-
)
181-
parser.add_argument(
182-
"--test-compiler-log-file",
183-
type=str,
184-
required=False,
185-
help="Log from test_compiler (Outdated)",
247+
help="Path to the root directory containing benchmark result subdirectories and JSON files.",
186248
)
187249
parser.add_argument(
188-
"--output-file",
250+
"--output-dir",
189251
type=str,
190-
default="compiler_speedup.png",
191-
help="Output figure file name",
252+
default="analysis_results",
253+
help="Directory to save the output figures.",
192254
)
193255
args = parser.parse_args()
194-
main(args=args)
256+
main(args)

0 commit comments

Comments
 (0)