Skip to content

Commit 18b3600

Browse files
authored
[ New Feature ] Generate violin figure (Benchmark) (#241)
* CONTRIBUTE_TUTORIAL_cn.md * Handle big int tensors by converting to sparse COO * Update utils * Update utils * Update utils * Update utils * Update utils * Update paddle test compiler * Add compilation_duration display * resolve conflict * Feat: generate violin figure (benchmark) * revert file * cn->en * Update Analysis * Update * Update * Update * Update * Update * Update * Update * Update * Update * add annotation * Optimized structure, add e2e time * Record 2 types of speedup * update analysis * Update * Update
1 parent 3c73c60 commit 18b3600

File tree

3 files changed

+422
-125
lines changed

3 files changed

+422
-125
lines changed

graph_net/analysis.py

Lines changed: 256 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,256 @@
1+
import pandas as pd
2+
import numpy as np
3+
import seaborn as sns
4+
import matplotlib.pyplot as plt
5+
import argparse
6+
import os
7+
import json
8+
from collections import defaultdict
9+
10+
11+
def parse_filename(filename):
12+
"""
13+
Parses the model name and compiler name from a JSON filename.
14+
According to output filename format of graph_net.torch.test_compiler: <model_name>_<compiler_name>.json
15+
"""
16+
parts = os.path.splitext(filename)[0].split("_")
17+
if len(parts) < 2:
18+
return None, None
19+
compiler = parts[-1]
20+
model = "_".join(parts[:-1])
21+
return model, compiler
22+
23+
24+
def read_all_speedups(benchmark_path):
25+
"""
26+
Recursively finds all .json files in a given path, extracts the speedup values,
27+
and organizes them by compiler and category (library).
28+
"""
29+
data_by_compiler_category = defaultdict(lambda: defaultdict(list))
30+
all_compilers = set()
31+
32+
if not os.path.exists(benchmark_path):
33+
print(f"Error: Path does not exist -> {benchmark_path}")
34+
return {}, []
35+
36+
for root, _, files in os.walk(benchmark_path):
37+
for file in files:
38+
if file.endswith(".json"):
39+
_, compiler = parse_filename(file)
40+
if not compiler:
41+
continue
42+
43+
all_compilers.add(compiler)
44+
45+
category = os.path.relpath(root, benchmark_path)
46+
if category == ".":
47+
category = os.path.basename(benchmark_path)
48+
49+
json_file = os.path.join(root, file)
50+
try:
51+
with open(json_file, "r") as f:
52+
data = json.load(f)
53+
speedup_data = data.get("performance", {}).get("speedup")
54+
55+
if isinstance(speedup_data, dict):
56+
# Handle new format with 'e2e' and 'gpu' keys
57+
if "e2e" in speedup_data:
58+
data_by_compiler_category[compiler][category].append(
59+
speedup_data["e2e"]
60+
)
61+
elif "gpu" in speedup_data:
62+
data_by_compiler_category[compiler][category].append(
63+
speedup_data["gpu"]
64+
)
65+
elif isinstance(speedup_data, float):
66+
# Handle old format where speedup is just a number
67+
data_by_compiler_category[compiler][category].append(
68+
speedup_data
69+
)
70+
71+
except (json.JSONDecodeError, KeyError) as e:
72+
print(
73+
f"Warning: Failed to read or parse file -> {json_file}, Error: {e}"
74+
)
75+
continue
76+
77+
return data_by_compiler_category, sorted(list(all_compilers))
78+
79+
80+
def plot_summary_comparison(df, all_compilers, output_dir):
81+
"""
82+
Generates a summary plot comparing the overall performance of all compilers.
83+
"""
84+
plt.figure(figsize=(12, 7))
85+
sns.set_theme(style="whitegrid")
86+
87+
ax = sns.violinplot(
88+
x="Compiler",
89+
y="log2(speedup)",
90+
data=df,
91+
order=all_compilers,
92+
color="white",
93+
linewidth=0.8,
94+
inner=None,
95+
)
96+
97+
sns.boxplot(
98+
x="Compiler",
99+
y="log2(speedup)",
100+
data=df,
101+
order=all_compilers,
102+
showcaps=False,
103+
boxprops={"facecolor": "royalblue", "edgecolor": "black"},
104+
medianprops={"color": "white", "linewidth": 2},
105+
whiskerprops={"color": "black", "linewidth": 1.5},
106+
flierprops={"marker": ".", "markerfacecolor": "black"},
107+
width=0.1,
108+
ax=ax,
109+
)
110+
111+
sample_counts = df["Compiler"].value_counts().to_dict()
112+
x_labels = [
113+
f"{compiler}\n({sample_counts.get(compiler, 0)} samples)"
114+
for compiler in all_compilers
115+
]
116+
117+
ax.set_ylabel("log2(speedup)", fontsize=14)
118+
ax.set_xlabel("")
119+
ax.set_xticks(ticks=range(len(x_labels)))
120+
ax.set_xticklabels(x_labels, rotation=45, ha="right", fontsize=11)
121+
ax.set_title("Overall Compiler Performance Comparison", fontsize=16)
122+
123+
sns.despine(trim=True, left=True)
124+
125+
output_file = os.path.join(output_dir, "summary_speedup_comparison.png")
126+
plt.savefig(output_file, dpi=300, bbox_inches="tight")
127+
print(f"\nSummary comparison plot saved to: {output_file}")
128+
plt.close()
129+
130+
131+
def plot_per_compiler_detail(df_all, compiler_name, output_dir):
132+
"""
133+
Generates a detailed plot for a single compiler, showing its performance across different categories.
134+
"""
135+
df_compiler = df_all[df_all["Compiler"] == compiler_name]
136+
if df_compiler.empty:
137+
print(
138+
f"Warning: No valid data found for compiler '{compiler_name}'. Skipping detailed plot."
139+
)
140+
return
141+
142+
categories = sorted(df_compiler["Category"].unique())
143+
144+
plt.figure(figsize=(10, 6))
145+
sns.set_theme(style="whitegrid")
146+
147+
ax = sns.violinplot(
148+
x="Category",
149+
y="log2(speedup)",
150+
data=df_compiler,
151+
order=categories,
152+
color="white",
153+
linewidth=0.8,
154+
inner=None,
155+
)
156+
157+
sns.boxplot(
158+
x="Category",
159+
y="log2(speedup)",
160+
data=df_compiler,
161+
order=categories,
162+
showcaps=False,
163+
boxprops={"facecolor": "royalblue", "edgecolor": "black"},
164+
medianprops={"color": "white", "linewidth": 2},
165+
whiskerprops={"color": "black", "linewidth": 1.5},
166+
flierprops={"marker": ".", "markerfacecolor": "black"},
167+
width=0.1,
168+
ax=ax,
169+
)
170+
171+
sample_counts = df_compiler["Category"].value_counts().to_dict()
172+
# Use os.path.basename to get only the package name from the path
173+
x_labels = [
174+
f"{os.path.basename(cat)}\n(n={sample_counts.get(cat, 0)})"
175+
for cat in categories
176+
]
177+
178+
ax.set_ylabel("log2(speedup)", fontsize=14)
179+
ax.set_xlabel("")
180+
ax.set_xticks(ticks=range(len(x_labels)))
181+
ax.set_xticklabels(x_labels, rotation=45, ha="right", fontsize=11)
182+
# Add the benchmark path to the title
183+
ax.set_title(f"Speedup for {compiler_name} by Categories", fontsize=16)
184+
185+
sns.despine(trim=True, left=True)
186+
187+
output_file = os.path.join(output_dir, f"{compiler_name}_speedup_by_category.png")
188+
plt.savefig(output_file, dpi=300, bbox_inches="tight")
189+
print(f"Detailed plot for '{compiler_name}' saved to: {output_file}")
190+
plt.close()
191+
192+
193+
def analysis(args):
194+
data_by_compiler_category, all_compilers = read_all_speedups(args.benchmark_path)
195+
196+
if not data_by_compiler_category:
197+
print("Error: No valid benchmark data found.")
198+
return
199+
200+
print(f"\nDiscovered compilers: {all_compilers}")
201+
202+
# Prepare data for DataFrame
203+
plot_data = {"Compiler": [], "Category": [], "log2(speedup)": []}
204+
205+
for compiler, categories_data in data_by_compiler_category.items():
206+
for category, speedups in categories_data.items():
207+
if not speedups:
208+
continue
209+
210+
speedups_array = np.array(speedups)
211+
# Filter out non-positive values before taking the logarithm
212+
log2_speedups = np.log2(speedups_array[speedups_array > 0])
213+
214+
plot_data["log2(speedup)"].extend(log2_speedups)
215+
plot_data["Compiler"].extend([compiler] * len(log2_speedups))
216+
plot_data["Category"].extend([category] * len(log2_speedups))
217+
218+
df_all = pd.DataFrame(plot_data)
219+
220+
if df_all.empty:
221+
print("Error: No valid data available for plotting after processing.")
222+
return
223+
224+
# Create the output directory
225+
os.makedirs(args.output_dir, exist_ok=True)
226+
227+
# 1. Generate the summary comparison plot
228+
plot_summary_comparison(df_all, all_compilers, args.output_dir)
229+
230+
# 2. Generate a detailed plot for each compiler
231+
for compiler in all_compilers:
232+
plot_per_compiler_detail(df_all, compiler, args.output_dir)
233+
234+
235+
def main(args):
236+
analysis(args)
237+
238+
239+
if __name__ == "__main__":
240+
parser = argparse.ArgumentParser(
241+
description="Analyze speedup from different compile frameworks/hardware types and generate plots."
242+
)
243+
parser.add_argument(
244+
"--benchmark-path",
245+
type=str,
246+
required=True,
247+
help="Path to the root directory containing benchmark result subdirectories and JSON files.",
248+
)
249+
parser.add_argument(
250+
"--output-dir",
251+
type=str,
252+
default="analysis_results",
253+
help="Directory to save the output figures.",
254+
)
255+
args = parser.parse_args()
256+
main(args)

graph_net/benchmark_demo.sh

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#!/bin/bash
2+
# A benchmark script for GraphNet models.
3+
4+
benchmark_dir="/work/GraphNet/benchmark_logs"
5+
samples_dir="/work/GraphNet/samples"
6+
global_log="${benchmark_dir}/global.log"
7+
8+
mkdir -p "${benchmark_dir}"
9+
> "$global_log"
10+
11+
for package_path in "${samples_dir}"/*/; do
12+
package_name=$(basename "${package_path%/}")
13+
output_dir="${benchmark_dir}/${package_name}"
14+
mkdir -p "${output_dir}"
15+
16+
for model_path in "${package_path}"*/; do
17+
model_name=$(basename "${model_path%/}")
18+
{
19+
if ls "${output_dir}"/*"${model_name}"*.json > /dev/null 2>&1; then
20+
echo "[$(date)] SKIPPING: ${package_name}/${model_name} (JSON result already exists)"
21+
else
22+
echo "[$(date)] STARTING: ${package_name}/${model_name}"
23+
24+
python -m graph_net.torch.test_compiler \
25+
--model-path "${model_path}" \
26+
--compiler "inductor" \
27+
--warmup 3 \
28+
--trials 10 \
29+
--device "cuda" \
30+
--output-dir "${output_dir}"
31+
32+
echo "[$(date)] FINISHED: ${package_name}/${model_name}"
33+
fi
34+
} >> "$global_log" 2>&1 &
35+
done
36+
done
37+
38+
echo "[$(date)] All tasks launched. Waiting for remaining background jobs to complete..." | tee -a "$global_log"
39+
wait
40+
echo "[$(date)] All jobs finished. Script completed." | tee -a "$global_log"

0 commit comments

Comments
 (0)