Skip to content

Commit 5ba2b72

Browse files
author
Inkedstinct
committed
[NLD] : WIP
1 parent f9dd8c2 commit 5ba2b72

File tree

1 file changed

+266
-0
lines changed

1 file changed

+266
-0
lines changed

data_analysis.py

Lines changed: 266 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,266 @@
1+
# IMPORTS
2+
import os
3+
import sys
4+
import polars as pl
5+
import schemas
6+
import extract
7+
import load
8+
import rq34
9+
10+
11+
vendor_generation_map = {
12+
"E5-2620 v4": {
13+
"architecture": "Broadwell-E",
14+
"vendor": "Intel",
15+
"generation": 6,
16+
"launch_date": "Q1 2016",
17+
},
18+
"E5-2630L v4": {
19+
"architecture": "Broadwell-E",
20+
"vendor": "Intel",
21+
"generation": 6,
22+
"launch_date": "Q1 2016",
23+
},
24+
"E5-2698 v4": {
25+
"architecture": "Broadwell-E",
26+
"vendor": "Intel",
27+
"generation": 6,
28+
"launch_date": "Q1 2016",
29+
},
30+
"E5-2630 v3": {
31+
"architecture": "Haswell-E",
32+
"vendor": "Intel",
33+
"generation": 5,
34+
"launch_date": "Q3 2014",
35+
},
36+
"Gold 5220": {
37+
"architecture": "Cascade Lake-SP",
38+
"vendor": "Intel",
39+
"generation": 10,
40+
"launch_date": "Q2 2019",
41+
},
42+
"Gold 5218": {
43+
"architecture": "Cascade Lake-SP",
44+
"vendor": "Intel",
45+
"generation": 10,
46+
"launch_date": "Q2 2019",
47+
},
48+
"i7-9750H": {
49+
"architecture": "Coffee Lake",
50+
"vendor": "Intel",
51+
"generation": 9,
52+
"launch_date": "Q2 2019",
53+
},
54+
"Silver 4314": {
55+
"architecture": "Ice Lake-SP",
56+
"vendor": "Intel",
57+
"generation": 10,
58+
"launch_date": "Q2 2021",
59+
},
60+
"Gold 5320": {
61+
"architecture": "Ice Lake-SP",
62+
"vendor": "Intel",
63+
"generation": 10,
64+
"launch_date": "Q2 2021",
65+
},
66+
"Gold 6126": {
67+
"architecture": "Skylake-SP",
68+
"vendor": "Intel",
69+
"generation": 6,
70+
"launch_date": "Q3 2017",
71+
},
72+
"Gold 6130": {
73+
"architecture": "Skylake-SP",
74+
"vendor": "Intel",
75+
"generation": 6,
76+
"launch_date": "Q3 2017",
77+
},
78+
"E5-2620": {
79+
"architecture": "Sandy Bridge-EP",
80+
"vendor": "Intel",
81+
"generation": 3,
82+
"launch_date": "Q1 2012",
83+
},
84+
"E5-2630": {
85+
"architecture": "Sandy Bridge-EP",
86+
"vendor": "Intel",
87+
"generation": 3,
88+
"launch_date": "Q1 2012",
89+
},
90+
"E5-2630L": {
91+
"architecture": "Sandy Bridge-EP",
92+
"vendor": "Intel",
93+
"generation": 3,
94+
"launch_date": "Q1 2012",
95+
},
96+
"E5-2660": {
97+
"architecture": "Sandy Bridge-EP",
98+
"vendor": "Intel",
99+
"generation": 3,
100+
"launch_date": "Q1 2012",
101+
},
102+
"7301": {
103+
"architecture": "Zen",
104+
"vendor": "AMD",
105+
"generation": 1,
106+
"launch_date": "Q2 2017",
107+
},
108+
"7352": {
109+
"architecture": "Zen 2",
110+
"vendor": "AMD",
111+
"generation": 2,
112+
"launch_date": "Q3 2019",
113+
},
114+
"7452": {
115+
"architecture": "Zen 2",
116+
"vendor": "AMD",
117+
"generation": 2,
118+
"launch_date": "Q3 2019",
119+
},
120+
"7642": {
121+
"architecture": "Zen 2",
122+
"vendor": "AMD",
123+
"generation": 2,
124+
"launch_date": "Q3 2019",
125+
},
126+
"7742": {
127+
"architecture": "Zen 2",
128+
"vendor": "AMD",
129+
"generation": 2,
130+
"launch_date": "Q3 2019",
131+
},
132+
}
133+
134+
135+
def main():
136+
137+
test = sys.argv[1]
138+
if test == "test":
139+
test = True
140+
else:
141+
test = False
142+
143+
debian11_energy_stats_df = energy_for_os(
144+
"debian11-5.10-0",
145+
r"batches/debian11-5\.10-0\.d/results-debian11-5\.10-0\.d/([^/]+)/([^/]+)/([^/]+)/[^_]*_([^_]+).*",
146+
test,
147+
)
148+
ubuntu2404_energy_stats_df = energy_for_os(
149+
"ubuntu2404nfs-6.8-0",
150+
r"batches/ubuntu2404nfs-6\.8-0\.d/results-ubuntu2404nfs-6\.8-0\.d/([^/]+)/([^/]+)/([^/]+)/[^_]*_([^_]+).*",
151+
test,
152+
)
153+
154+
powerapi_energy_stats_df = energy_for_os(
155+
"powerapi",
156+
r"batches/powerapi/results/([^/]+)/([^/]+)/([^/]+)/[^_]*_([^_]+).*",
157+
test,
158+
)
159+
160+
concatenated_dfs = pl.concat([debian11_energy_stats_df, ubuntu2404_energy_stats_df])
161+
concatenated_dfs = concatenated_dfs.sql(
162+
"SELECT * FROM self WHERE nb_ops_per_core > 25"
163+
)
164+
165+
joined_df = ubuntu2404_energy_stats_df.join(
166+
debian11_energy_stats_df,
167+
on=["node", "nb_ops_per_core", "nb_core", "job"],
168+
suffix="_debian",
169+
)
170+
171+
# Get rid of 25 OPS as it may be unrelevant
172+
joined_df = joined_df.sql("SELECT * FROM self WHERE nb_ops_per_core > 25")
173+
174+
175+
# RQ3/4
176+
rq34.os_comparison_boxplots_processor_versions_pkg_all(
177+
[debian11_energy_stats_df, ubuntu2404_energy_stats_df]
178+
)
179+
rq34.os_comparison_boxplots_processor_versions_ram_all(
180+
[debian11_energy_stats_df, ubuntu2404_energy_stats_df]
181+
)
182+
rq34.os_comparison_heatmap_processor_versions_pkg_nb_ops(joined_df)
183+
rq34.os_comparison_heatmap_processor_versions_ram_nb_ops(joined_df)
184+
rq34.os_comparison_heatmap_processor_versions_pkg_percent_used(joined_df)
185+
rq34.os_comparison_heatmap_processor_versions_ram_percent_used(joined_df)
186+
187+
rq34.debian_facetgrid_processor_versions_pkg_cv_nb_ops(debian11_energy_stats_df.sql("SELECT * FROM self WHERE nb_ops_per_core > 25"))
188+
rq34.debian_facetgrid_processor_versions_ram_cv_nb_ops(debian11_energy_stats_df.sql("SELECT * FROM self WHERE nb_ops_per_core > 25"))
189+
rq34.ubuntu_facetgrid_processor_versions_pkg_cv_nb_ops(ubuntu2404_energy_stats_df.sql("SELECT * FROM self WHERE nb_ops_per_core > 25"))
190+
rq34.ubuntu_facetgrid_processor_versions_ram_cv_nb_ops(ubuntu2404_energy_stats_df.sql("SELECT * FROM self WHERE nb_ops_per_core > 25"))
191+
192+
193+
def energy_for_os(os_flavor, results_directory_match, test):
194+
if test:
195+
energy_stats_csv_file = (
196+
f"batches/{os_flavor}.d/{os_flavor}_energy_stats_sample.csv"
197+
)
198+
else:
199+
energy_stats_csv_file = f"batches/{os_flavor}.d/{os_flavor}_energy_stats.csv"
200+
if os.path.exists(energy_stats_csv_file):
201+
return pl.read_csv(energy_stats_csv_file)
202+
results_directory: str = f"batches/{os_flavor}.d/results-{os_flavor}.d/"
203+
inventories_directory: str = f"batches/{os_flavor}.d/inventories-{os_flavor}.d/"
204+
(hwpc_files, perf_files) = extract.extract_csv_files(results_directory)
205+
206+
nodes_df = extract.extract_json_files(
207+
directory=inventories_directory, schema=schemas.nodes_configuration_columns
208+
)
209+
210+
nodes_df = nodes_df.with_columns(
211+
[
212+
# (pl.col("processor_version").map_elements(lambda x: f"{x}\nGen: {vendor_generation_map[x]['architecture']}\nRelease: {vendor_generation_map[x]['launch_date']}", return_dtype=pl.String).alias("processor_detail")),
213+
(
214+
pl.col("processor_version")
215+
.map_elements(
216+
lambda x: f"{x}\n{vendor_generation_map[x]['architecture']}",
217+
return_dtype=pl.String,
218+
)
219+
.alias("processor_detail")
220+
),
221+
(
222+
pl.col("processor_version")
223+
.map_elements(
224+
lambda x: vendor_generation_map[x]["generation"],
225+
return_dtype=pl.String,
226+
)
227+
.alias("processor_generation")
228+
),
229+
(
230+
pl.col("processor_version")
231+
.map_elements(
232+
lambda x: vendor_generation_map[x]["vendor"], return_dtype=pl.String
233+
)
234+
.alias("processor_vendor")
235+
),
236+
]
237+
)
238+
239+
print("Nodes Configuration glimpse:\n", nodes_df.head())
240+
241+
# Data Exploration
242+
(hwpc_results, perf_results) = load.load_results(
243+
hwpc_files, perf_files, results_directory_match, test
244+
)
245+
print(
246+
"HWPC Results glimpse:\n",
247+
hwpc_results.head(),
248+
"\nHWPC Results stats:\n",
249+
hwpc_results.describe(),
250+
)
251+
print(hwpc_results.sql("select energy_pkg from self").describe())
252+
print(
253+
"Perf Results glimpse:\n",
254+
perf_results.head(),
255+
"\nPerf Results stats:\n",
256+
perf_results.describe(),
257+
)
258+
259+
energy_stats_df = load.load_energy(hwpc_results, perf_results, nodes_df, os_flavor)
260+
energy_stats_df.write_csv(energy_stats_csv_file, separator=",")
261+
262+
return energy_stats_df
263+
264+
265+
if __name__ == "__main__":
266+
main()

0 commit comments

Comments
 (0)