Skip to content

Commit 758b241

Browse files
committed
refactor(plot_violin): read data from log files instead of JSON
- Change data source from JSON files in subdirectories to direct log file parsing - Use scan_all_folders to support both single log file and directory with multiple log files - Optimize code structure: eliminate duplicate logic and intermediate variables - Merge data processing flow to reduce loop iterations - Improve variable naming semantics: curve_name -> category_name, speedup_data -> speedup_raw/speedup_numeric - Unify to use 'e2e' speedup only, consistent with core logic (no fallback to 'gpu') - Reduce data processing code from 54 lines to 17 lines
1 parent d7cf909 commit 758b241

File tree

1 file changed

+25
-27
lines changed

1 file changed

+25
-27
lines changed

graph_net/plot_violin.py

Lines changed: 25 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def main():
7777
"--benchmark-path",
7878
type=str,
7979
required=True,
80-
help="Path to the root directory containing benchmark result subdirectories.",
80+
help="Path to a log file (.log or .txt) or a directory containing log files.",
8181
)
8282
parser.add_argument(
8383
"--output-dir",
@@ -87,40 +87,38 @@ def main():
8787
)
8888
args = parser.parse_args()
8989

90-
# 1. Use the utility function to extract data
91-
data_by_subdir = analysis_util.extract_speedup_data_from_subdirs(
92-
args.benchmark_path
93-
)
90+
# 1. Parse log files and extract speedup data
91+
# Use scan_all_folders to handle both single log file and directory with log files
92+
all_samples_by_curve = analysis_util.scan_all_folders(args.benchmark_path)
9493

95-
if not data_by_subdir:
94+
if not all_samples_by_curve:
9695
print("Error: No valid benchmark data was found. Exiting.")
9796
return
9897

99-
# 2. Process data for plotting
98+
# 2. Extract speedup values from samples and process for plotting
10099
plot_data = {"Category": [], "log2(speedup)": []}
101-
for subdir_name, speedups in data_by_subdir.items():
102-
if not speedups:
103-
print(f"Warning: No speedup values found for '{subdir_name}'.")
104-
continue
105-
106-
speedups_array = np.array(speedups)
107-
positive_speedups = speedups_array[speedups_array > 0]
108-
if len(positive_speedups) == 0:
109-
print(
110-
f"Warning: No positive speedup values for '{subdir_name}' to plot (log2 requires positive values)."
111-
)
112-
continue
113-
114-
log2_speedups = np.log2(positive_speedups)
115-
plot_data["log2(speedup)"].extend(log2_speedups)
116-
plot_data["Category"].extend([subdir_name] * len(log2_speedups))
100+
for category_name, samples in all_samples_by_curve.items():
101+
for sample in samples:
102+
speedup_raw = sample.get("performance", {}).get("speedup", {})
103+
104+
# Extract numeric speedup value: use 'e2e' from dict format {"e2e": x, "gpu": y} or direct numeric value
105+
speedup_numeric = None
106+
if isinstance(speedup_raw, dict):
107+
speedup_numeric = speedup_raw.get("e2e")
108+
elif isinstance(speedup_raw, (float, int)):
109+
speedup_numeric = speedup_raw
110+
111+
# Only process positive numeric speedup values (log2 requires positive values)
112+
if isinstance(speedup_numeric, (float, int)) and speedup_numeric > 0:
113+
plot_data["log2(speedup)"].append(np.log2(float(speedup_numeric)))
114+
plot_data["Category"].append(category_name)
115+
116+
if not plot_data["log2(speedup)"]:
117+
print("Error: No valid speedup data was found. Exiting.")
118+
return
117119

118120
df_all = pd.DataFrame(plot_data)
119121

120-
if df_all.empty:
121-
print("Error: No valid data available for plotting after processing.")
122-
return
123-
124122
# 3. Create output directory and generate plot
125123
os.makedirs(args.output_dir, exist_ok=True)
126124
plot_violin(df_all, args.output_dir)

0 commit comments

Comments
 (0)