Skip to content

Commit 658e4a5

Browse files
authored
Merge pull request #7 from converged-computing/more-plots
feat: use a database proper
2 parents 94833c6 + 895bb8b commit 658e4a5

File tree

2,572 files changed

+35217
-1504265
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,572 files changed

+35217
-1504265
lines changed

analysis/bdas/1-run-analysis.py

Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
#!/usr/bin/env python3
2+
3+
import argparse
4+
import os
5+
import sys
6+
import re
7+
8+
import matplotlib.pylab as plt
9+
import seaborn as sns
10+
11+
here = os.path.dirname(os.path.abspath(__file__))
12+
analysis_root = os.path.dirname(here)
13+
root = os.path.dirname(analysis_root)
14+
sys.path.insert(0, analysis_root)
15+
16+
import performance_study as ps
17+
18+
sns.set_theme(style="whitegrid", palette="muted")
19+
20+
# These are files I found erroneous - no result, or incomplete result
21+
# Details included with each, and more exploration is likely needed to quantify
22+
# error types
23+
errors = []
24+
error_regex = "(%s)" % "|".join(errors)
25+
26+
27+
def get_parser():
28+
parser = argparse.ArgumentParser(
29+
description="Run analysis",
30+
formatter_class=argparse.RawTextHelpFormatter,
31+
)
32+
parser.add_argument(
33+
"--root",
34+
help="root directory with experiments",
35+
default=os.path.join(root, "experiments"),
36+
)
37+
parser.add_argument(
38+
"--non-anon",
39+
help="Generate non-anon",
40+
action="store_true",
41+
default=False,
42+
)
43+
parser.add_argument(
44+
"--out",
45+
help="directory to save parsed results",
46+
default=os.path.join(here, "data"),
47+
)
48+
return parser
49+
50+
51+
def main():
52+
"""
53+
Find application result files to parse.
54+
"""
55+
parser = get_parser()
56+
args, _ = parser.parse_known_args()
57+
58+
# Output images and data
59+
outdir = os.path.abspath(args.out)
60+
indir = os.path.abspath(args.root)
61+
62+
# We absolutely want on premises results here
63+
if not os.path.exists(outdir):
64+
os.makedirs(outdir)
65+
66+
# Find input files (skip anything with test)
67+
files = ps.find_inputs(indir, "bdas")
68+
if not files:
69+
raise ValueError(f"There are no input files in {indir}")
70+
71+
# Saves raw data to file
72+
df = parse_data(indir, outdir, files)
73+
plot_results(df, outdir, args.non_anon)
74+
75+
76+
77+
def parse_data(indir, outdir, files):
78+
"""
79+
Parse filepaths for environment, etc., and results files for data.
80+
"""
81+
# metrics here will be figures of merit, and seconds runtime
82+
p = ps.ProblemSizeParser("bdas")
83+
data = {}
84+
85+
# It's important to just parse raw data once, and then use intermediate
86+
for filename in files:
87+
exp = ps.ExperimentNameParser(filename, indir)
88+
if exp.prefix not in data:
89+
data[exp.prefix] = []
90+
91+
# Skip size 2: testing
92+
if exp.size == 2:
93+
continue
94+
95+
# kmeans, princcomp, or svm
96+
app = os.path.basename(filename).split('-')[-1].replace('.r.out', '')
97+
98+
# Set the parsing context for the result data frame
99+
p.set_context(exp.cloud, exp.env, exp.env_type, exp.size)
100+
101+
# Sanity check the files we found
102+
print(filename)
103+
exp.show()
104+
105+
item = ps.read_file(filename)
106+
jobs = ps.parse_flux_jobs(item)
107+
for job, metadata in jobs.items():
108+
print(metadata)
109+
minimum, mean, maximum = [x for x in metadata['log'].split('\n')[-1].split(' ') if x.strip()]
110+
p.add_result("duration", metadata['duration'], app)
111+
p.add_result("minimum", minimum, app)
112+
p.add_result("mean", mean, app)
113+
p.add_result("maximum", maximum, app)
114+
115+
print("Done parsing bdas results!")
116+
117+
# Save stuff to file first
118+
p.df.to_csv(os.path.join(outdir, "bdas-results.csv"))
119+
ps.write_json(jobs, os.path.join(outdir, "flux-jobs-and-events.json"))
120+
return p.df
121+
122+
def plot_results(df, outdir, non_anon=False):
123+
"""
124+
Plot analysis results
125+
"""
126+
# Let's get some shoes! Err, plots.
127+
# Make an image outdir
128+
img_outdir = os.path.join(outdir, "img")
129+
if not os.path.exists(img_outdir):
130+
os.makedirs(img_outdir)
131+
132+
# We are going to put the plots together, and the colors need to match!
133+
cloud_colors = {}
134+
for cloud in df.experiment.unique():
135+
cloud_colors[cloud] = ps.match_color(cloud)
136+
137+
# Within a setup, compare between experiments for GPU and cpu
138+
frames = {}
139+
for env in df.env_type.unique():
140+
subset = df[df.env_type == env]
141+
142+
# Make a plot for seconds runtime, and each FOM set.
143+
# We can look at the metric across sizes, colored by experiment
144+
for metric in subset.metric.unique():
145+
metric_df = subset[subset.metric == metric]
146+
frames[metric] = {'cpu': metric_df}
147+
148+
for metric, data_frames in frames.items():
149+
fig = plt.figure(figsize=(9, 3.3))
150+
gs = plt.GridSpec(1, 2, width_ratios=[3, 1])
151+
axes = []
152+
axes.append(fig.add_subplot(gs[0, 0]))
153+
axes.append(fig.add_subplot(gs[0, 1]))
154+
155+
sns.set_style("whitegrid")
156+
sns.barplot(
157+
data_frames["cpu"],
158+
ax=axes[0],
159+
x="nodes",
160+
y="value",
161+
hue="problem_size",
162+
err_kws={"color": "darkred"},
163+
# palette=cloud_colors,
164+
order=[4, 8, 16, 32, 64],
165+
)
166+
title = " ".join([x.capitalize() for x in metric.split("_")])
167+
axes[0].set_title(f"BDAS {title} (CPU)", fontsize=14)
168+
axes[0].set_ylabel("Seconds", fontsize=14)
169+
axes[0].set_xlabel("Nodes", fontsize=14)
170+
handles, labels = axes[0].get_legend_handles_labels()
171+
labels = ["/".join(x.split("/")[0:2]) for x in labels]
172+
axes[1].legend(
173+
handles, labels, loc="center left", bbox_to_anchor=(-0.1, 0.5), frameon=False
174+
)
175+
for ax in axes[0:1]:
176+
ax.get_legend().remove()
177+
axes[1].axis("off")
178+
179+
plt.tight_layout()
180+
plt.savefig(os.path.join(img_outdir, f"bdas-{metric}-cpu.svg"))
181+
plt.savefig(os.path.join(img_outdir, f"bdas-{metric}-cpu.png"))
182+
plt.clf()
183+
184+
# Print the total number of data points
185+
print(f'Total number of CPU datum: {data_frames["cpu"].shape[0]}')
186+
187+
188+
if __name__ == "__main__":
189+
main()

0 commit comments

Comments
 (0)