|
| 1 | +# IMPORTS |
| 2 | +import os |
| 3 | +import sys |
| 4 | +import polars as pl |
| 5 | +import schemas |
| 6 | +import extract |
| 7 | +import load |
| 8 | +import rq1 |
| 9 | +import rq2 |
| 10 | +import rq3 |
| 11 | +import rq34 |
| 12 | +import visualization |
| 13 | + |
| 14 | + |
| 15 | +vendor_generation_map = { |
| 16 | + "E5-2620 v4": { |
| 17 | + "architecture": "Broadwell-E", |
| 18 | + "vendor": "Intel", |
| 19 | + "generation": 6, |
| 20 | + "launch_date": "Q1 2016", |
| 21 | + }, |
| 22 | + "E5-2630L v4": { |
| 23 | + "architecture": "Broadwell-E", |
| 24 | + "vendor": "Intel", |
| 25 | + "generation": 6, |
| 26 | + "launch_date": "Q1 2016", |
| 27 | + }, |
| 28 | + "E5-2698 v4": { |
| 29 | + "architecture": "Broadwell-E", |
| 30 | + "vendor": "Intel", |
| 31 | + "generation": 6, |
| 32 | + "launch_date": "Q1 2016", |
| 33 | + }, |
| 34 | + "E5-2630 v3": { |
| 35 | + "architecture": "Haswell-E", |
| 36 | + "vendor": "Intel", |
| 37 | + "generation": 5, |
| 38 | + "launch_date": "Q3 2014", |
| 39 | + }, |
| 40 | + "Gold 5220": { |
| 41 | + "architecture": "Cascade Lake-SP", |
| 42 | + "vendor": "Intel", |
| 43 | + "generation": 10, |
| 44 | + "launch_date": "Q2 2019", |
| 45 | + }, |
| 46 | + "Gold 5218": { |
| 47 | + "architecture": "Cascade Lake-SP", |
| 48 | + "vendor": "Intel", |
| 49 | + "generation": 10, |
| 50 | + "launch_date": "Q2 2019", |
| 51 | + }, |
| 52 | + "i7-9750H": { |
| 53 | + "architecture": "Coffee Lake", |
| 54 | + "vendor": "Intel", |
| 55 | + "generation": 9, |
| 56 | + "launch_date": "Q2 2019", |
| 57 | + }, |
| 58 | + "Silver 4314": { |
| 59 | + "architecture": "Ice Lake-SP", |
| 60 | + "vendor": "Intel", |
| 61 | + "generation": 10, |
| 62 | + "launch_date": "Q2 2021", |
| 63 | + }, |
| 64 | + "Gold 5320": { |
| 65 | + "architecture": "Ice Lake-SP", |
| 66 | + "vendor": "Intel", |
| 67 | + "generation": 10, |
| 68 | + "launch_date": "Q2 2021", |
| 69 | + }, |
| 70 | + "Gold 6126": { |
| 71 | + "architecture": "Skylake-SP", |
| 72 | + "vendor": "Intel", |
| 73 | + "generation": 6, |
| 74 | + "launch_date": "Q3 2017", |
| 75 | + }, |
| 76 | + "Gold 6130": { |
| 77 | + "architecture": "Skylake-SP", |
| 78 | + "vendor": "Intel", |
| 79 | + "generation": 6, |
| 80 | + "launch_date": "Q3 2017", |
| 81 | + }, |
| 82 | + "E5-2620": { |
| 83 | + "architecture": "Sandy Bridge-EP", |
| 84 | + "vendor": "Intel", |
| 85 | + "generation": 3, |
| 86 | + "launch_date": "Q1 2012", |
| 87 | + }, |
| 88 | + "E5-2630": { |
| 89 | + "architecture": "Sandy Bridge-EP", |
| 90 | + "vendor": "Intel", |
| 91 | + "generation": 3, |
| 92 | + "launch_date": "Q1 2012", |
| 93 | + }, |
| 94 | + "E5-2630L": { |
| 95 | + "architecture": "Sandy Bridge-EP", |
| 96 | + "vendor": "Intel", |
| 97 | + "generation": 3, |
| 98 | + "launch_date": "Q1 2012", |
| 99 | + }, |
| 100 | + "E5-2660": { |
| 101 | + "architecture": "Sandy Bridge-EP", |
| 102 | + "vendor": "Intel", |
| 103 | + "generation": 3, |
| 104 | + "launch_date": "Q1 2012", |
| 105 | + }, |
| 106 | + "7301": { |
| 107 | + "architecture": "Zen", |
| 108 | + "vendor": "AMD", |
| 109 | + "generation": 1, |
| 110 | + "launch_date": "Q2 2017", |
| 111 | + }, |
| 112 | + "7352": { |
| 113 | + "architecture": "Zen 2", |
| 114 | + "vendor": "AMD", |
| 115 | + "generation": 2, |
| 116 | + "launch_date": "Q3 2019", |
| 117 | + }, |
| 118 | + "7452": { |
| 119 | + "architecture": "Zen 2", |
| 120 | + "vendor": "AMD", |
| 121 | + "generation": 2, |
| 122 | + "launch_date": "Q3 2019", |
| 123 | + }, |
| 124 | + "7642": { |
| 125 | + "architecture": "Zen 2", |
| 126 | + "vendor": "AMD", |
| 127 | + "generation": 2, |
| 128 | + "launch_date": "Q3 2019", |
| 129 | + }, |
| 130 | + "7742": { |
| 131 | + "architecture": "Zen 2", |
| 132 | + "vendor": "AMD", |
| 133 | + "generation": 2, |
| 134 | + "launch_date": "Q3 2019", |
| 135 | + }, |
| 136 | +} |
| 137 | + |
| 138 | + |
| 139 | +def main(): |
| 140 | + |
| 141 | + test = sys.argv[1] |
| 142 | + if test == "test": |
| 143 | + test = True |
| 144 | + else: |
| 145 | + test = False |
| 146 | + |
| 147 | + debian11_energy_stats_df = energy_for_os( |
| 148 | + "debian11-5.10-0", |
| 149 | + r"batches/debian11-5\.10-0\.d/results-debian11-5\.10-0\.d/([^/]+)/([^/]+)/([^/]+)/[^_]*_([^_]+).*", |
| 150 | + test, |
| 151 | + ) |
| 152 | + ubuntu2404_energy_stats_df = energy_for_os( |
| 153 | + "ubuntu2404nfs-6.8-0", |
| 154 | + r"batches/ubuntu2404nfs-6\.8-0\.d/results-ubuntu2404nfs-6\.8-0\.d/([^/]+)/([^/]+)/([^/]+)/[^_]*_([^_]+).*", |
| 155 | + test, |
| 156 | + ) |
| 157 | + |
| 158 | + powerapi_energy_stats_df = energy_for_os( |
| 159 | + "powerapi", |
| 160 | + r"batches/powerapi\.d/results-powerapi\.d/([^/]+)/([^/]+)/([^/]+)/[^_]*_([^_]+).*", |
| 161 | + test, |
| 162 | + ) |
| 163 | + |
| 164 | + |
| 165 | + rq3.correlation_perf_perf_hwpc_hwpc_cv_os(ubuntu2404_energy_stats_df, debian11_energy_stats_df, "alone") |
| 166 | + rq1.correlation_perf_hwpc_cv(debian11_energy_stats_df, "alone", "debian11 Kernel 5.10") |
| 167 | + rq1.correlation_perf_hwpc_cv(debian11_energy_stats_df, "not_alone", "debian11 Kernel 5.10") |
| 168 | + rq1.correlation_perf_hwpc_cv(ubuntu2404_energy_stats_df, "alone", "ubuntu2404 Kernel 6.8") |
| 169 | + rq1.correlation_perf_hwpc_cv(ubuntu2404_energy_stats_df, "not_alone", "ubuntu2404 Kernel 6.8") |
| 170 | + |
| 171 | + rq2.boxplots_perf_hwpc_cv_processor(debian11_energy_stats_df, "processor_detail", "pkg_coefficient_of_variation", "job", "25 000 Operations") |
| 172 | + |
| 173 | + |
| 174 | + concatenated_dfs = pl.concat([debian11_energy_stats_df, ubuntu2404_energy_stats_df]) |
| 175 | + concatenated_dfs = concatenated_dfs.sql( |
| 176 | + "SELECT * FROM self WHERE nb_ops_per_core > 25" |
| 177 | + ) |
| 178 | + |
| 179 | + joined_df = ubuntu2404_energy_stats_df.join( |
| 180 | + debian11_energy_stats_df, |
| 181 | + on=["node", "nb_ops_per_core", "nb_core", "job"], |
| 182 | + suffix="_debian", |
| 183 | + ) |
| 184 | + |
| 185 | + # Get rid of 25 OPS as it may be unrelevant |
| 186 | + joined_df = joined_df.sql("SELECT * FROM self WHERE nb_ops_per_core > 25") |
| 187 | + |
| 188 | + |
| 189 | + # RQ3/4 |
| 190 | + rq34.os_comparison_boxplots_processor_versions_pkg_all( |
| 191 | + [debian11_energy_stats_df, ubuntu2404_energy_stats_df] |
| 192 | + ) |
| 193 | + rq34.os_comparison_boxplots_processor_versions_ram_all( |
| 194 | + [debian11_energy_stats_df, ubuntu2404_energy_stats_df] |
| 195 | + ) |
| 196 | + print("Heatmaps pkg perf alone") |
| 197 | + rq34.os_comparison_heatmap_processor_versions_pkg_nb_ops(joined_df.sql("SELECT * FROM self WHERE job = 'perf_alone'"), "PERF") |
| 198 | + print("Heatmaps pkg hwpc alone") |
| 199 | + rq34.os_comparison_heatmap_processor_versions_pkg_nb_ops(joined_df.sql("SELECT * FROM self WHERE job = 'hwpc_alone'"), "HWPC") |
| 200 | + print("Heatmaps ram perf alone") |
| 201 | + rq34.os_comparison_heatmap_processor_versions_ram_nb_ops(joined_df.sql("SELECT * FROM self WHERE job = 'perf_alone'"), "PERF") |
| 202 | + print("Heatmaps ram hwpc alone") |
| 203 | + rq34.os_comparison_heatmap_processor_versions_ram_nb_ops(joined_df.sql("SELECT * FROM self WHERE job = 'hwpc_alone'"), "HWPC") |
| 204 | + rq34.os_comparison_heatmap_processor_versions_pkg_percent_used(joined_df) |
| 205 | + rq34.os_comparison_heatmap_processor_versions_ram_percent_used(joined_df) |
| 206 | + |
| 207 | + rq34.debian_facetgrid_processor_versions_pkg_cv_nb_ops(debian11_energy_stats_df.sql("SELECT * FROM self WHERE nb_ops_per_core > 25")) |
| 208 | + rq34.debian_facetgrid_processor_versions_ram_cv_nb_ops(debian11_energy_stats_df.sql("SELECT * FROM self WHERE nb_ops_per_core > 25")) |
| 209 | + rq34.ubuntu_facetgrid_processor_versions_pkg_cv_nb_ops(ubuntu2404_energy_stats_df.sql("SELECT * FROM self WHERE nb_ops_per_core > 25")) |
| 210 | + rq34.ubuntu_facetgrid_processor_versions_ram_cv_nb_ops(ubuntu2404_energy_stats_df.sql("SELECT * FROM self WHERE nb_ops_per_core > 25")) |
| 211 | + |
| 212 | + |
| 213 | +def energy_for_os(os_flavor, results_directory_match, test): |
| 214 | + if test: |
| 215 | + energy_stats_csv_file = ( |
| 216 | + f"batches/{os_flavor}.d/{os_flavor}_energy_stats_sample.csv" |
| 217 | + ) |
| 218 | + else: |
| 219 | + energy_stats_csv_file = f"batches/{os_flavor}.d/{os_flavor}_energy_stats.csv" |
| 220 | + if os.path.exists(energy_stats_csv_file): |
| 221 | + return pl.read_csv(energy_stats_csv_file) |
| 222 | + results_directory: str = f"batches/{os_flavor}.d/results-{os_flavor}.d/" |
| 223 | + inventories_directory: str = f"batches/{os_flavor}.d/inventories-{os_flavor}.d/" |
| 224 | + (hwpc_files, perf_files) = extract.extract_csv_files(results_directory) |
| 225 | + |
| 226 | + nodes_df = extract.extract_json_files( |
| 227 | + directory=inventories_directory, schema=schemas.nodes_configuration_columns |
| 228 | + ) |
| 229 | + |
| 230 | + nodes_df = nodes_df.with_columns( |
| 231 | + [ |
| 232 | + # (pl.col("processor_version").map_elements(lambda x: f"{x}\nGen: {vendor_generation_map[x]['architecture']}\nRelease: {vendor_generation_map[x]['launch_date']}", return_dtype=pl.String).alias("processor_detail")), |
| 233 | + ( |
| 234 | + pl.col("processor_version") |
| 235 | + .map_elements( |
| 236 | + lambda x: f"{x}\n{vendor_generation_map[x]['architecture']}", |
| 237 | + return_dtype=pl.String, |
| 238 | + ) |
| 239 | + .alias("processor_detail") |
| 240 | + ), |
| 241 | + ( |
| 242 | + pl.col("processor_version") |
| 243 | + .map_elements( |
| 244 | + lambda x: vendor_generation_map[x]["generation"], |
| 245 | + return_dtype=pl.String, |
| 246 | + ) |
| 247 | + .alias("processor_generation") |
| 248 | + ), |
| 249 | + ( |
| 250 | + pl.col("processor_version") |
| 251 | + .map_elements( |
| 252 | + lambda x: vendor_generation_map[x]["vendor"], return_dtype=pl.String |
| 253 | + ) |
| 254 | + .alias("processor_vendor") |
| 255 | + ), |
| 256 | + ] |
| 257 | + ) |
| 258 | + |
| 259 | + print("Nodes Configuration glimpse:\n", nodes_df.head()) |
| 260 | + |
| 261 | + # Data Exploration |
| 262 | + (hwpc_results, perf_results) = load.load_results( |
| 263 | + hwpc_files, perf_files, results_directory_match, test |
| 264 | + ) |
| 265 | + print( |
| 266 | + "HWPC Results glimpse:\n", |
| 267 | + hwpc_results.head(), |
| 268 | + "\nHWPC Results stats:\n", |
| 269 | + hwpc_results.describe(), |
| 270 | + ) |
| 271 | + print(hwpc_results.sql("select energy_pkg from self").describe()) |
| 272 | + print( |
| 273 | + "Perf Results glimpse:\n", |
| 274 | + perf_results.head(), |
| 275 | + "\nPerf Results stats:\n", |
| 276 | + perf_results.describe(), |
| 277 | + ) |
| 278 | + |
| 279 | + energy_stats_df = load.load_energy(hwpc_results, perf_results, nodes_df, os_flavor) |
| 280 | + energy_stats_df.write_csv(energy_stats_csv_file, separator=",") |
| 281 | + |
| 282 | + return energy_stats_df |
| 283 | + |
| 284 | + |
| 285 | +if __name__ == "__main__": |
| 286 | + main() |
0 commit comments