Skip to content

Commit 115bc0b

Browse files
author
Inkedstinct
committed
feat(analysis) : Add some visualisations scripts for rq1/2/3 of 2020 Paper
1 parent 5ba2b72 commit 115bc0b

File tree

10 files changed

+1323
-0
lines changed

10 files changed

+1323
-0
lines changed

analysis/data_analysis.py

Lines changed: 286 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,286 @@
1+
# IMPORTS
2+
import os
3+
import sys
4+
import polars as pl
5+
import schemas
6+
import extract
7+
import load
8+
import rq1
9+
import rq2
10+
import rq3
11+
import rq34
12+
import visualization
13+
14+
15+
vendor_generation_map = {
16+
"E5-2620 v4": {
17+
"architecture": "Broadwell-E",
18+
"vendor": "Intel",
19+
"generation": 6,
20+
"launch_date": "Q1 2016",
21+
},
22+
"E5-2630L v4": {
23+
"architecture": "Broadwell-E",
24+
"vendor": "Intel",
25+
"generation": 6,
26+
"launch_date": "Q1 2016",
27+
},
28+
"E5-2698 v4": {
29+
"architecture": "Broadwell-E",
30+
"vendor": "Intel",
31+
"generation": 6,
32+
"launch_date": "Q1 2016",
33+
},
34+
"E5-2630 v3": {
35+
"architecture": "Haswell-E",
36+
"vendor": "Intel",
37+
"generation": 5,
38+
"launch_date": "Q3 2014",
39+
},
40+
"Gold 5220": {
41+
"architecture": "Cascade Lake-SP",
42+
"vendor": "Intel",
43+
"generation": 10,
44+
"launch_date": "Q2 2019",
45+
},
46+
"Gold 5218": {
47+
"architecture": "Cascade Lake-SP",
48+
"vendor": "Intel",
49+
"generation": 10,
50+
"launch_date": "Q2 2019",
51+
},
52+
"i7-9750H": {
53+
"architecture": "Coffee Lake",
54+
"vendor": "Intel",
55+
"generation": 9,
56+
"launch_date": "Q2 2019",
57+
},
58+
"Silver 4314": {
59+
"architecture": "Ice Lake-SP",
60+
"vendor": "Intel",
61+
"generation": 10,
62+
"launch_date": "Q2 2021",
63+
},
64+
"Gold 5320": {
65+
"architecture": "Ice Lake-SP",
66+
"vendor": "Intel",
67+
"generation": 10,
68+
"launch_date": "Q2 2021",
69+
},
70+
"Gold 6126": {
71+
"architecture": "Skylake-SP",
72+
"vendor": "Intel",
73+
"generation": 6,
74+
"launch_date": "Q3 2017",
75+
},
76+
"Gold 6130": {
77+
"architecture": "Skylake-SP",
78+
"vendor": "Intel",
79+
"generation": 6,
80+
"launch_date": "Q3 2017",
81+
},
82+
"E5-2620": {
83+
"architecture": "Sandy Bridge-EP",
84+
"vendor": "Intel",
85+
"generation": 3,
86+
"launch_date": "Q1 2012",
87+
},
88+
"E5-2630": {
89+
"architecture": "Sandy Bridge-EP",
90+
"vendor": "Intel",
91+
"generation": 3,
92+
"launch_date": "Q1 2012",
93+
},
94+
"E5-2630L": {
95+
"architecture": "Sandy Bridge-EP",
96+
"vendor": "Intel",
97+
"generation": 3,
98+
"launch_date": "Q1 2012",
99+
},
100+
"E5-2660": {
101+
"architecture": "Sandy Bridge-EP",
102+
"vendor": "Intel",
103+
"generation": 3,
104+
"launch_date": "Q1 2012",
105+
},
106+
"7301": {
107+
"architecture": "Zen",
108+
"vendor": "AMD",
109+
"generation": 1,
110+
"launch_date": "Q2 2017",
111+
},
112+
"7352": {
113+
"architecture": "Zen 2",
114+
"vendor": "AMD",
115+
"generation": 2,
116+
"launch_date": "Q3 2019",
117+
},
118+
"7452": {
119+
"architecture": "Zen 2",
120+
"vendor": "AMD",
121+
"generation": 2,
122+
"launch_date": "Q3 2019",
123+
},
124+
"7642": {
125+
"architecture": "Zen 2",
126+
"vendor": "AMD",
127+
"generation": 2,
128+
"launch_date": "Q3 2019",
129+
},
130+
"7742": {
131+
"architecture": "Zen 2",
132+
"vendor": "AMD",
133+
"generation": 2,
134+
"launch_date": "Q3 2019",
135+
},
136+
}
137+
138+
139+
def main():
140+
141+
test = sys.argv[1]
142+
if test == "test":
143+
test = True
144+
else:
145+
test = False
146+
147+
debian11_energy_stats_df = energy_for_os(
148+
"debian11-5.10-0",
149+
r"batches/debian11-5\.10-0\.d/results-debian11-5\.10-0\.d/([^/]+)/([^/]+)/([^/]+)/[^_]*_([^_]+).*",
150+
test,
151+
)
152+
ubuntu2404_energy_stats_df = energy_for_os(
153+
"ubuntu2404nfs-6.8-0",
154+
r"batches/ubuntu2404nfs-6\.8-0\.d/results-ubuntu2404nfs-6\.8-0\.d/([^/]+)/([^/]+)/([^/]+)/[^_]*_([^_]+).*",
155+
test,
156+
)
157+
158+
powerapi_energy_stats_df = energy_for_os(
159+
"powerapi",
160+
r"batches/powerapi\.d/results-powerapi\.d/([^/]+)/([^/]+)/([^/]+)/[^_]*_([^_]+).*",
161+
test,
162+
)
163+
164+
165+
rq3.correlation_perf_perf_hwpc_hwpc_cv_os(ubuntu2404_energy_stats_df, debian11_energy_stats_df, "alone")
166+
rq1.correlation_perf_hwpc_cv(debian11_energy_stats_df, "alone", "debian11 Kernel 5.10")
167+
rq1.correlation_perf_hwpc_cv(debian11_energy_stats_df, "not_alone", "debian11 Kernel 5.10")
168+
rq1.correlation_perf_hwpc_cv(ubuntu2404_energy_stats_df, "alone", "ubuntu2404 Kernel 6.8")
169+
rq1.correlation_perf_hwpc_cv(ubuntu2404_energy_stats_df, "not_alone", "ubuntu2404 Kernel 6.8")
170+
171+
rq2.boxplots_perf_hwpc_cv_processor(debian11_energy_stats_df, "processor_detail", "pkg_coefficient_of_variation", "job", "25 000 Operations")
172+
173+
174+
concatenated_dfs = pl.concat([debian11_energy_stats_df, ubuntu2404_energy_stats_df])
175+
concatenated_dfs = concatenated_dfs.sql(
176+
"SELECT * FROM self WHERE nb_ops_per_core > 25"
177+
)
178+
179+
joined_df = ubuntu2404_energy_stats_df.join(
180+
debian11_energy_stats_df,
181+
on=["node", "nb_ops_per_core", "nb_core", "job"],
182+
suffix="_debian",
183+
)
184+
185+
# Get rid of 25 OPS as it may be unrelevant
186+
joined_df = joined_df.sql("SELECT * FROM self WHERE nb_ops_per_core > 25")
187+
188+
189+
# RQ3/4
190+
rq34.os_comparison_boxplots_processor_versions_pkg_all(
191+
[debian11_energy_stats_df, ubuntu2404_energy_stats_df]
192+
)
193+
rq34.os_comparison_boxplots_processor_versions_ram_all(
194+
[debian11_energy_stats_df, ubuntu2404_energy_stats_df]
195+
)
196+
print("Heatmaps pkg perf alone")
197+
rq34.os_comparison_heatmap_processor_versions_pkg_nb_ops(joined_df.sql("SELECT * FROM self WHERE job = 'perf_alone'"), "PERF")
198+
print("Heatmaps pkg hwpc alone")
199+
rq34.os_comparison_heatmap_processor_versions_pkg_nb_ops(joined_df.sql("SELECT * FROM self WHERE job = 'hwpc_alone'"), "HWPC")
200+
print("Heatmaps ram perf alone")
201+
rq34.os_comparison_heatmap_processor_versions_ram_nb_ops(joined_df.sql("SELECT * FROM self WHERE job = 'perf_alone'"), "PERF")
202+
print("Heatmaps ram hwpc alone")
203+
rq34.os_comparison_heatmap_processor_versions_ram_nb_ops(joined_df.sql("SELECT * FROM self WHERE job = 'hwpc_alone'"), "HWPC")
204+
rq34.os_comparison_heatmap_processor_versions_pkg_percent_used(joined_df)
205+
rq34.os_comparison_heatmap_processor_versions_ram_percent_used(joined_df)
206+
207+
rq34.debian_facetgrid_processor_versions_pkg_cv_nb_ops(debian11_energy_stats_df.sql("SELECT * FROM self WHERE nb_ops_per_core > 25"))
208+
rq34.debian_facetgrid_processor_versions_ram_cv_nb_ops(debian11_energy_stats_df.sql("SELECT * FROM self WHERE nb_ops_per_core > 25"))
209+
rq34.ubuntu_facetgrid_processor_versions_pkg_cv_nb_ops(ubuntu2404_energy_stats_df.sql("SELECT * FROM self WHERE nb_ops_per_core > 25"))
210+
rq34.ubuntu_facetgrid_processor_versions_ram_cv_nb_ops(ubuntu2404_energy_stats_df.sql("SELECT * FROM self WHERE nb_ops_per_core > 25"))
211+
212+
213+
def energy_for_os(os_flavor, results_directory_match, test):
214+
if test:
215+
energy_stats_csv_file = (
216+
f"batches/{os_flavor}.d/{os_flavor}_energy_stats_sample.csv"
217+
)
218+
else:
219+
energy_stats_csv_file = f"batches/{os_flavor}.d/{os_flavor}_energy_stats.csv"
220+
if os.path.exists(energy_stats_csv_file):
221+
return pl.read_csv(energy_stats_csv_file)
222+
results_directory: str = f"batches/{os_flavor}.d/results-{os_flavor}.d/"
223+
inventories_directory: str = f"batches/{os_flavor}.d/inventories-{os_flavor}.d/"
224+
(hwpc_files, perf_files) = extract.extract_csv_files(results_directory)
225+
226+
nodes_df = extract.extract_json_files(
227+
directory=inventories_directory, schema=schemas.nodes_configuration_columns
228+
)
229+
230+
nodes_df = nodes_df.with_columns(
231+
[
232+
# (pl.col("processor_version").map_elements(lambda x: f"{x}\nGen: {vendor_generation_map[x]['architecture']}\nRelease: {vendor_generation_map[x]['launch_date']}", return_dtype=pl.String).alias("processor_detail")),
233+
(
234+
pl.col("processor_version")
235+
.map_elements(
236+
lambda x: f"{x}\n{vendor_generation_map[x]['architecture']}",
237+
return_dtype=pl.String,
238+
)
239+
.alias("processor_detail")
240+
),
241+
(
242+
pl.col("processor_version")
243+
.map_elements(
244+
lambda x: vendor_generation_map[x]["generation"],
245+
return_dtype=pl.String,
246+
)
247+
.alias("processor_generation")
248+
),
249+
(
250+
pl.col("processor_version")
251+
.map_elements(
252+
lambda x: vendor_generation_map[x]["vendor"], return_dtype=pl.String
253+
)
254+
.alias("processor_vendor")
255+
),
256+
]
257+
)
258+
259+
print("Nodes Configuration glimpse:\n", nodes_df.head())
260+
261+
# Data Exploration
262+
(hwpc_results, perf_results) = load.load_results(
263+
hwpc_files, perf_files, results_directory_match, test
264+
)
265+
print(
266+
"HWPC Results glimpse:\n",
267+
hwpc_results.head(),
268+
"\nHWPC Results stats:\n",
269+
hwpc_results.describe(),
270+
)
271+
print(hwpc_results.sql("select energy_pkg from self").describe())
272+
print(
273+
"Perf Results glimpse:\n",
274+
perf_results.head(),
275+
"\nPerf Results stats:\n",
276+
perf_results.describe(),
277+
)
278+
279+
energy_stats_df = load.load_energy(hwpc_results, perf_results, nodes_df, os_flavor)
280+
energy_stats_df.write_csv(energy_stats_csv_file, separator=",")
281+
282+
return energy_stats_df
283+
284+
285+
if __name__ == "__main__":
286+
main()

analysis/execution_time.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import os
2+
import glob
3+
import pandas as pd
4+
5+
def compute_mean_std(directory, nb_ops):
6+
# Define the file pattern to search for
7+
pattern = os.path.join(directory, f"**/perf_*_{nb_ops}.csv")
8+
files = glob.glob(pattern, recursive=True)
9+
10+
if not files:
11+
print(f"No files found for NB_OPS={nb_ops}")
12+
return
13+
14+
time_elapsed_values = []
15+
16+
# Loop through all matching files
17+
for file in files:
18+
try:
19+
# Read the CSV file
20+
df = pd.read_csv(file)
21+
# Append the time_elapsed column to the list
22+
time_elapsed_values.extend(df["time_elapsed"].dropna())
23+
except Exception as e:
24+
print(f"Error reading file {file}: {e}")
25+
26+
if not time_elapsed_values:
27+
print(f"No valid time_elapsed values found in files for NB_OPS={nb_ops}")
28+
return
29+
30+
# Compute mean and standard deviation
31+
mean_time = sum(time_elapsed_values) / len(time_elapsed_values)
32+
std_dev_time = (sum((x - mean_time) ** 2 for x in time_elapsed_values) / len(time_elapsed_values)) ** 0.5
33+
34+
print(f"Results for NB_OPS={nb_ops}:")
35+
print(f" Mean time_elapsed: {mean_time:.6f} seconds")
36+
print(f" Standard deviation: {std_dev_time:.6f} seconds")
37+
38+
# Example usage
39+
# Replace "your_directory_path" with the actual path to the directory containing the files
40+
print("For Ubuntu")
41+
directory = "./batches/ubuntu2404nfs-6.8-0.d/results-ubuntu2404nfs-6.8-0.d/"
42+
nb_ops = 25 # Change this to 250, 2500, or 25000 as needed
43+
compute_mean_std(directory, nb_ops)
44+
nb_ops = 250 # Change this to 250, 2500, or 25000 as needed
45+
compute_mean_std(directory, nb_ops)
46+
nb_ops = 2500 # Change this to 250, 2500, or 25000 as needed
47+
compute_mean_std(directory, nb_ops)
48+
nb_ops = 25000 # Change this to 250, 2500, or 25000 as needed
49+
compute_mean_std(directory, nb_ops)
50+
51+
print("For Debian")
52+
directory = "./batches/debian11-5.10-0.d/results-debian11-5.10-0.d/"
53+
nb_ops = 25 # Change this to 250, 2500, or 25000 as needed
54+
compute_mean_std(directory, nb_ops)
55+
nb_ops = 250 # Change this to 250, 2500, or 25000 as needed
56+
compute_mean_std(directory, nb_ops)
57+
nb_ops = 2500 # Change this to 250, 2500, or 25000 as needed
58+
compute_mean_std(directory, nb_ops)
59+
nb_ops = 25000 # Change this to 250, 2500, or 25000 as needed
60+
compute_mean_std(directory, nb_ops)
61+
62+
print("For Powerapi")
63+
directory = "./results_powerapi2u"
64+
nb_ops = 25 # Change this to 250, 2500, or 25000 as needed
65+
compute_mean_std(directory, nb_ops)
66+
nb_ops = 250 # Change this to 250, 2500, or 25000 as needed
67+
compute_mean_std(directory, nb_ops)
68+
nb_ops = 2500 # Change this to 250, 2500, or 25000 as needed
69+
compute_mean_std(directory, nb_ops)
70+
nb_ops = 25000 # Change this to 250, 2500, or 25000 as needed
71+
compute_mean_std(directory, nb_ops)

0 commit comments

Comments
 (0)