Skip to content

Commit fd025b0

Browse files
authored
Add a demo and test of range_decomposer_validator, Update analysis script (#311)
* Update utils * Update * Update * Update * Update * Add nope backend on torch * Add nope backend on torch * Add nope in paddle * Write a demo of range_decomposer_validator * Write a demo of range_decomposer_validator * Write a demo of range_decomposer_validator * Update * Update analysis script
1 parent 6e83641 commit fd025b0

32 files changed

+992
-411
lines changed

README.md

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,21 +72,27 @@ python -m graph_net.log2json \
7272

7373
**Step 3: Analysis**
7474

75-
Use `graph_net.violin_analysis` to generate [violin plot](https://en.m.wikipedia.org/wiki/Violin_plot) and `graph_net.S_analysis` to generate S and ES plot based on the JSON results.
75+
Use the three scripts `graph_net.plot_St`, `graph_net.plot_ESt` and `graph_net.plot_violin` to generate St plot, ESt plot, and [violin plot](https://en.m.wikipedia.org/wiki/Violin_plot) based on the JSON results.
7676

7777
```bash
78-
python -m graph_net.violin_analysis \
78+
python -m graph_net.plot_St \
7979
--benchmark-path $GRAPH_NET_BENCHMARK_PATH/JSON_results/ \
80-
--output-dir $GRAPH_NET_BENCHMARK_PATH
80+
--output-dir $GRAPH_NET_BENCHMARK_PATH \
81+
--negative-speedup-penalty penalty/power/for/negative/speedup \
82+
--fpdb base/penalty/for/severe/errors
8183

82-
python -m graph_net.S_analysis \
84+
python -m graph_net.plot_ESt \
8385
--benchmark-path $GRAPH_NET_BENCHMARK_PATH/JSON_results/ \
8486
--output-dir $GRAPH_NET_BENCHMARK_PATH \
8587
--negative-speedup-penalty penalty/power/for/negative/speedup \
8688
--fpdb base/penalty/for/severe/errors
8789

8890
# Note: If --negative-speedup-penalty is omitted, p=0 is used by default.
8991
# If --fpdb, b=0.1 is used by default.
92+
93+
python -m graph_net.plot_violin \
94+
--benchmark-path $GRAPH_NET_BENCHMARK_PATH/JSON_results/ \
95+
--output-dir $GRAPH_NET_BENCHMARK_PATH
9096
```
9197

9298
The scripts are designed to process a file structure as `/benchmark_path/category_name/`, and items on x-axis are identified by name of the sub-directories. After executing, several summary plots of result in categories (model tasks, libraries...) will be exported to `$GRAPH_NET_BENCHMARK_PATH`.
Lines changed: 68 additions & 228 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,77 @@
11
import os
22
import json
3-
import argparse
4-
import re
53
import numpy as np
6-
import matplotlib.pyplot as plt
74
from scipy.stats import gmean
8-
from collections import OrderedDict
9-
from scipy.optimize import curve_fit
5+
from collections import OrderedDict, defaultdict
106
from graph_net.config.datatype_tolerance_config import get_precision
117

128

13-
# ---------- 1. Data Loading and Processing ----------
9+
def extract_speedup_data_from_subdirs(benchmark_path: str) -> dict:
10+
"""
11+
Reads speedup data from JSON files within each immediate subdirectory of the benchmark_path.
12+
Each subdirectory is treated as a separate category.
13+
Returns a dictionary mapping {subdir_name: [speedup_values]}.
14+
"""
15+
data_by_subdir = defaultdict(list)
16+
17+
if not os.path.exists(benchmark_path):
18+
print(f"Error: Path does not exist -> {benchmark_path}")
19+
return {}
20+
21+
try:
22+
subdirs = [
23+
d
24+
for d in os.listdir(benchmark_path)
25+
if os.path.isdir(os.path.join(benchmark_path, d))
26+
]
27+
except FileNotFoundError:
28+
print(f"Error: Benchmark path not found -> {benchmark_path}")
29+
return {}
30+
31+
if not subdirs:
32+
print(f"Warning: No subdirectories found in -> {benchmark_path}")
33+
return {}
34+
35+
print(f"Found subdirectories to process: {', '.join(subdirs)}")
36+
37+
for subdir_name in subdirs:
38+
current_dir_path = os.path.join(benchmark_path, subdir_name)
39+
# Using scan_all_folders and load_one_folder could be an alternative,
40+
# but os.walk is also robust for nested directories if needed in the future.
41+
for root, _, files in os.walk(current_dir_path):
42+
for file in files:
43+
if file.endswith(".json"):
44+
json_file = os.path.join(root, file)
45+
try:
46+
with open(json_file, "r") as f:
47+
data = json.load(f)
48+
performance = data.get("performance", {})
49+
if not performance:
50+
continue
51+
52+
speedup_data = performance.get("speedup")
53+
if isinstance(speedup_data, dict):
54+
# Prioritize 'e2e' speedup, fallback to 'gpu'
55+
if "e2e" in speedup_data:
56+
data_by_subdir[subdir_name].append(
57+
speedup_data["e2e"]
58+
)
59+
elif "gpu" in speedup_data:
60+
data_by_subdir[subdir_name].append(
61+
speedup_data["gpu"]
62+
)
63+
elif isinstance(speedup_data, (float, int)):
64+
data_by_subdir[subdir_name].append(speedup_data)
65+
66+
except (json.JSONDecodeError, KeyError) as e:
67+
print(
68+
f"Warning: Failed to read or parse file -> {json_file}, Error: {e}"
69+
)
70+
continue
71+
72+
return data_by_subdir
73+
74+
1475
def load_json_file(filepath: str) -> dict:
1576
"""
1677
Safely load a JSON file and return data, return an empty dictionary if loading fails.
@@ -81,7 +142,6 @@ def scan_all_folders(benchmark_path: str) -> dict:
81142
return all_results
82143

83144

84-
# ---------- 2. Core Calculation Logic ----------
85145
def get_correctness(dtype: str, t: int, correctness_data: dict, index: int) -> bool:
86146
"""
87147
Based on tolerance, data type, and output index, find the actual atol/rtol values from the config and get the correctness result for a single output.
@@ -339,224 +399,4 @@ def print_stat_info(
339399
print(f" - pi: {pi}")
340400

341401
return s_scores, s_scores_fake_degrad
342-
343-
344-
# ---------- 3. Plotting Functions ----------
345-
def plot_S_results(s_scores: dict, cli_args: argparse.Namespace):
346-
"""
347-
Plot S(t) curve
348-
"""
349-
plt.style.use("seaborn-v0_8-whitegrid")
350-
fig, ax = plt.subplots(figsize=(14, 8))
351-
352-
prop_cycle = plt.rcParams["axes.prop_cycle"]
353-
colors = prop_cycle.by_key()["color"]
354-
355-
all_x_coords = []
356-
357-
for idx, (folder_name, scores_dict) in enumerate(s_scores.items()):
358-
plot_points = []
359-
for t_key, score in scores_dict.items():
360-
if t_key <= 0:
361-
all_x_coords.append(t_key)
362-
plot_points.append({"x": t_key, "y": score})
363-
364-
plot_points.sort(key=lambda p: p["x"])
365-
366-
x_vals = np.array([p["x"] for p in plot_points])
367-
y_vals = np.array([p["y"] for p in plot_points])
368-
369-
color = colors[idx % len(colors)]
370-
ax.plot(
371-
x_vals,
372-
y_vals,
373-
"o-",
374-
color=color,
375-
label=folder_name,
376-
linewidth=2,
377-
markersize=6,
378-
)
379-
380-
p = cli_args.negative_speedup_penalty
381-
config = f"p = {p}, b = {cli_args.fpdb}"
382-
fig.text(0.5, 0.9, config, ha="center", fontsize=16, style="italic")
383-
384-
ax.set_xlabel("t", fontsize=18)
385-
ax.set_ylabel("S(t)", fontsize=18)
386-
ax.tick_params(axis="both", which="major", labelsize=14)
387-
388-
if all_x_coords:
389-
x_min = int(np.floor(min(all_x_coords)))
390-
x_max = int(np.ceil(max(all_x_coords)))
391-
ax.set_xticks(np.arange(x_min, x_max + 1))
392-
393-
ax.xaxis.grid(True, which="major", lw=0.8, ls=":", color="grey", alpha=0.5)
394-
ax.yaxis.grid(True, which="major", lw=0.8, ls=":", color="grey", alpha=0.5)
395-
396-
ax.legend(fontsize=16, loc="best")
397-
output_file = os.path.join(cli_args.output_dir, "S_result.png")
398-
plt.savefig(output_file, dpi=300, bbox_inches="tight")
399-
print(f"\nComparison plot saved to {output_file}")
400-
401-
402-
def plot_ES_results(s_scores: dict, cli_args: argparse.Namespace):
403-
"""
404-
Plot ES(t) curve
405-
"""
406-
plt.style.use("seaborn-v0_8-whitegrid")
407-
fig, ax = plt.subplots(figsize=(14, 8))
408-
409-
prop_cycle = plt.rcParams["axes.prop_cycle"]
410-
colors = prop_cycle.by_key()["color"]
411-
412-
all_x_coords = []
413-
414-
for idx, (folder_name, scores_dict) in enumerate(s_scores.items()):
415-
plot_points = []
416-
for (
417-
t_key,
418-
score_data,
419-
) in scores_dict.items(): # Change variable name to score_data
420-
# Access the 'score' key from the nested dictionary
421-
if isinstance(score_data, dict):
422-
score = score_data["score"]
423-
else:
424-
score = score_data
425-
426-
all_x_coords.append(t_key)
427-
plot_points.append({"x": t_key, "y": score})
428-
429-
# Sort by x value
430-
plot_points.sort(key=lambda p: p["x"])
431-
432-
x_vals = np.array([p["x"] for p in plot_points])
433-
y_vals = np.array([p["y"] for p in plot_points])
434-
435-
color = colors[idx % len(colors)]
436-
437-
# Find index where t=0
438-
zero_index = np.where(x_vals == 0)[0][0] if 0 in x_vals else None
439-
440-
# If t=0 exists, plot in segments
441-
if zero_index is not None:
442-
# Plot continuous line for t <= 0
443-
ax.plot(
444-
x_vals[: zero_index + 1],
445-
y_vals[: zero_index + 1],
446-
"o-",
447-
color=color,
448-
label=folder_name,
449-
linewidth=2,
450-
markersize=6,
451-
)
452-
# Plot stepwise portion for t > 0
453-
ax.plot(
454-
x_vals[zero_index:],
455-
y_vals[zero_index:],
456-
"o-",
457-
color=color,
458-
linewidth=2,
459-
markersize=6,
460-
drawstyle="steps-post",
461-
)
462-
else:
463-
# If no t=0, plot the entire curve as a regular line
464-
ax.plot(
465-
x_vals,
466-
y_vals,
467-
"o-",
468-
color=color,
469-
label=folder_name,
470-
linewidth=2,
471-
markersize=6,
472-
)
473-
474-
p = cli_args.negative_speedup_penalty
475-
config = f"p = {p}, b = {cli_args.fpdb}"
476-
fig.text(0.5, 0.9, config, ha="center", fontsize=16, style="italic")
477-
478-
ax.set_xlabel("t", fontsize=18)
479-
ax.set_ylabel("ES(t)", fontsize=18)
480-
ax.tick_params(axis="both", which="major", labelsize=14)
481-
482-
if all_x_coords:
483-
x_min = int(np.floor(min(all_x_coords)))
484-
x_max = int(np.ceil(max(all_x_coords)))
485-
ax.set_xticks(np.arange(x_min, x_max + 1))
486-
487-
ax.xaxis.grid(True, which="major", lw=0.7, ls=":", color="grey", alpha=0.5)
488-
ax.yaxis.grid(True, which="major", lw=0.7, ls=":", color="grey", alpha=0.5)
489-
490-
ax.legend(fontsize=16, loc="best")
491-
output_file = os.path.join(cli_args.output_dir, "ES_result.png")
492-
plt.savefig(output_file, dpi=300, bbox_inches="tight")
493-
print(f"\nComparison plot saved to {output_file}")
494-
495-
496-
# ---------- 4. Main Program Entry ----------
497-
def main():
498-
"""
499-
Main execution function.
500-
"""
501-
parser = argparse.ArgumentParser(
502-
description="Load benchmark JSON records from multiple sub-folders, "
503-
"calculate aggregated S(t) scores, and plot multi-curve results.",
504-
formatter_class=argparse.RawTextHelpFormatter,
505-
)
506-
parser.add_argument(
507-
"--benchmark-path",
508-
type=str,
509-
required=True,
510-
help="Path to the directory containing sub-folders of benchmark JSON files.",
511-
)
512-
parser.add_argument(
513-
"--output-dir",
514-
type=str,
515-
default="analysis_results",
516-
help="Output directory path for saving plots. Default: analysis_results",
517-
)
518-
parser.add_argument(
519-
"--negative-speedup-penalty",
520-
type=float,
521-
default=0.0,
522-
help="Penalty power (p) for negative speedup (speedup < 1). Formula: speedup**(p+1). Default: 0.0.",
523-
)
524-
parser.add_argument(
525-
"--fpdb",
526-
type=float,
527-
default=0.1,
528-
help="Base penalty for severe errors (e.g., correctness failure, crashes).",
529-
)
530-
args = parser.parse_args()
531-
532-
# 1. Scan all subdirectories
533-
all_results = scan_all_folders(args.benchmark_path)
534-
if not all_results:
535-
print("No valid data found. Exiting.")
536-
return
537-
538-
# 2. Calculate S scores for each curve
539-
all_s_scores = {}
540-
all_s_scores_fake_degrad = {}
541-
542-
for folder_name, samples in all_results.items():
543-
s_scores, s_scores_fake_degrad = calculate_s_scores(
544-
samples,
545-
folder_name,
546-
negative_speedup_penalty=args.negative_speedup_penalty,
547-
fpdb=args.fpdb,
548-
)
549-
all_s_scores[folder_name] = s_scores
550-
all_s_scores_fake_degrad[folder_name] = s_scores_fake_degrad
551-
552-
# 3. Plot S and ES curves
553-
if any(all_s_scores.values()):
554-
os.makedirs(args.output_dir, exist_ok=True)
555-
plot_S_results(all_s_scores, args)
556-
plot_ES_results(all_s_scores_fake_degrad, args)
557-
else:
558-
print("No S(t) scores were calculated. Skipping plot generation.")
559-
560-
561-
if __name__ == "__main__":
562-
main()
402+
return s_scores, es_scores

0 commit comments

Comments
 (0)