Skip to content

Commit 73f9c5c

Browse files
authored
Merge pull request #229 from sunya-ch/fix-bugs
Add metadata plot and Fix bugs
2 parents ca9e4d7 + 9758dd9 commit 73f9c5c

File tree

10 files changed

+119
-39
lines changed

10 files changed

+119
-39
lines changed

cmd/cmd_plot.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,3 +111,39 @@ def summary_plot(args, energy_source, summary_df, output_folder, name):
111111
filename = os.path.join(output_folder, name + ".png")
112112
fig.savefig(filename)
113113
plt.close()
114+
115+
def metadata_plot(args, energy_source, metadata_df, output_folder, name):
116+
if metadata_df is None or len(metadata_df) == 0:
117+
print("no metadata data to plot")
118+
return
119+
120+
plot_height = 5
121+
plot_width = 20
122+
123+
import matplotlib.pyplot as plt
124+
import seaborn as sns
125+
sns.set(font_scale=1.2)
126+
127+
energy_components = PowerSourceMap[energy_source]
128+
col_num = len(energy_components)
129+
fig, axes = plt.subplots(col_num, 1, figsize=(plot_width, plot_height*col_num))
130+
for i in range(0, col_num):
131+
component = energy_components[i]
132+
metadata_df = metadata_df.sort_values(by="feature_group")
133+
if col_num == 1:
134+
ax = axes
135+
else:
136+
ax = axes[i]
137+
sns.boxplot(data=metadata_df, x="feature_group", y="mae", hue="trainer", ax=ax, hue_order=sorted(metadata_df['trainer'].unique()), showfliers=False, palette="Set3")
138+
ax.set_title(component)
139+
ax.set_ylabel("MAE (Watt)")
140+
ax.set_xlabel("Feature Group")
141+
# ax.set_ylim((0, 100))
142+
if i < col_num-1:
143+
ax.set_xlabel("")
144+
# ax.legend(bbox_to_anchor=(1.05, 1.05))
145+
plt.suptitle("Pipieline metadata of {} {}".format(energy_source.upper(), args.output_type))
146+
plt.tight_layout()
147+
filename = os.path.join(output_folder, name + ".png")
148+
fig.savefig(filename)
149+
plt.close()

cmd/cmd_util.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -240,16 +240,20 @@ def check_ot_fg(args, valid_fg):
240240
exit()
241241
return ot, fg
242242

243+
import sklearn
243244
def assert_train(trainer, data, energy_components):
244245
import pandas as pd
245246
node_types = pd.unique(data[node_info_column])
246247
for node_type in node_types:
247248
node_type_filtered_data = data[data[node_info_column] == node_type]
248249
X_values = node_type_filtered_data[trainer.features].values
249250
for component in energy_components:
250-
output = trainer.predict(node_type, component, X_values)
251-
if output is not None:
252-
assert len(output) == len(X_values), "length of predicted values != features ({}!={})".format(len(output), len(X_values))
251+
try:
252+
output = trainer.predict(node_type, component, X_values)
253+
if output is not None:
254+
assert len(output) == len(X_values), "length of predicted values != features ({}!={})".format(len(output), len(X_values))
255+
except sklearn.exceptions.NotFittedError:
256+
pass
253257

254258
def get_isolator(data_path, isolator, profile, pipeline_name, target_hints, bg_hints, abs_pipeline_name, replace_node_type=default_node_type):
255259
pipeline_path = get_pipeline_path(data_path, pipeline_name=pipeline_name)

cmd/main.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,15 @@
1919
from util.extract_types import get_expected_power_columns
2020
from util.train_types import ModelOutputType, FeatureGroups, is_single_source_feature_group, all_feature_groups, default_trainers
2121
from util.loader import load_json, DEFAULT_PIPELINE, load_pipeline_metadata, get_pipeline_path, get_model_group_path, list_pipelines, list_model_names, load_metadata, load_csv, get_preprocess_folder, get_general_filename, load_machine_spec
22-
from util.saver import save_json, save_csv, save_train_args
22+
from util.saver import save_json, save_csv, save_train_args, _pipeline_model_metadata_filename
2323
from util.config import ERROR_KEY, model_toppath
2424
from util import get_valid_feature_group_from_queries, PowerSourceMap
2525
from train.prom.prom_query import _range_queries
2626
from train.exporter import exporter
2727
from train import load_class
2828
from train.profiler.node_type_index import NodeTypeIndexCollection, NodeTypeSpec, generate_spec
2929

30-
from cmd_plot import ts_plot, feature_power_plot, summary_plot
30+
from cmd_plot import ts_plot, feature_power_plot, summary_plot, metadata_plot
3131
from cmd_util import extract_time, save_query_results, get_validate_df, summary_validation, get_extractor, check_ot_fg, get_pipeline, assert_train, get_isolator, UTC_OFFSET_TIMEDELTA
3232

3333
import threading
@@ -597,9 +597,11 @@ def estimate(args):
597597
- `preprocess` plots time series of usage and power metrics for both AbsPower and DynPower
598598
- `estimate` passes all arguments to `estimate` function, and plots the predicted time series and correlation between usage and power metrics
599599
- `error` passes all arguments to `estimate` function, and plots the summary of prediction error
600+
- `metadata` plot pipeline metadata
600601
- --energy-source : specify target energy sources (use comma(,) as delimiter)
601602
- --extractor : specify extractor to get preprocessed data of AbsPower model linked to the input data
602603
- --isolator : specify isolator to get preprocessed data of DynPower model linked to the input data
604+
- --pipeline_name : specify pipeline name
603605
"""
604606

605607
def plot(args):
@@ -685,6 +687,12 @@ def plot(args):
685687
for energy_source in energy_sources:
686688
data_filename = get_general_filename(args.target_data, energy_source, fg, ot, args.extractor, args.isolator)
687689
summary_plot(args, energy_source, summary_df, output_folder, data_filename)
690+
elif args.target_data == "metadata":
691+
for energy_source in energy_sources:
692+
data_filename = _pipeline_model_metadata_filename(energy_source, ot.name)
693+
pipeline_path = get_pipeline_path(data_path, pipeline_name=pipeline_name)
694+
model_metadata_df = load_pipeline_metadata(pipeline_path, energy_source, ot.name)
695+
metadata_plot(args, energy_source, model_metadata_df, output_folder, data_filename)
688696

689697
"""
690698
export
@@ -732,7 +740,15 @@ def export(args):
732740
pipeline_name = args.pipeline_name
733741
pipeline_path = get_pipeline_path(data_path, pipeline_name=pipeline_name)
734742

735-
exporter.export(data_path, pipeline_path, output_path, publisher=args.publisher, collect_date=collect_date, inputs=inputs)
743+
local_export_path = exporter.export(data_path, pipeline_path, output_path, publisher=args.publisher, collect_date=collect_date, inputs=inputs)
744+
args.target_data = "metadata"
745+
746+
args.output = local_export_path
747+
args.output_type = "AbsPower"
748+
args.energy_source = ",".join(PowerSourceMap.keys())
749+
plot(args)
750+
args.output_type = "DynPower"
751+
plot(args)
736752

737753
"""
738754
plot_scenario

model_training/s3/s3-loader.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ def ibmcloud_list_keys(client, bucket_name, prefix):
2020

2121
def get_bucket_file_map(client, bucket_name, machine_id, mnt_path, pipeline_name, list_func):
2222
bucket_file_map = dict()
23+
top_key_path = ""
2324
if machine_id is not None and machine_id != "":
2425
top_key_path = "/" + machine_id
2526
# add data key map

src/train/exporter/exporter.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
from validator import get_validated_export_items, BestModelCollection
1313
from loader import load_metadata, load_node_type_index, get_version_path, get_export_path
14+
from saver import save_pipeline_metadata, save_node_type_index
1415
from format import time_to_str
1516
from writer import generate_pipeline_page, generate_report_results, generate_pipeline_readme, append_version_readme, get_workload_content
1617
from config import ERROR_KEY
@@ -44,7 +45,14 @@ def export(data_path, pipeline_path, db_path, publisher, collect_date, inputs):
4445
remote_version_path = get_version_path(repo_url, assure=False)
4546

4647
# get validated export items (models)
47-
export_items = get_validated_export_items(pipeline_path, pipeline_name)
48+
export_items, valid_metadata_df = get_validated_export_items(pipeline_path, pipeline_name)
49+
# save pipeline metadata
50+
for energy_source, ot_metadata_df in valid_metadata_df.items():
51+
for model_type, metadata_df in ot_metadata_df.items():
52+
metadata_df = metadata_df.sort_values(by=["feature_group", ERROR_KEY])
53+
save_pipeline_metadata(local_export_path, pipeline_metadata, energy_source, model_type, metadata_df)
54+
# save node_type_index.json
55+
save_node_type_index(local_export_path, node_type_index_json)
4856

4957
for export_item in export_items:
5058
# export models
@@ -60,4 +68,6 @@ def export(data_path, pipeline_path, db_path, publisher, collect_date, inputs):
6068
# generate validation result page
6169
generate_pipeline_readme(pipeline_name, local_export_path, node_type_index_json, best_model_collections)
6270
# add new pipeline item to version path
63-
append_version_readme(local_version_path, pipeline_metadata)
71+
append_version_readme(local_version_path, pipeline_metadata)
72+
73+
return local_export_path

src/train/exporter/validator.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,13 +88,16 @@ def get_best_item_with_weight(self, energy_source, output_type_name, feature_gro
8888
# get_validated_export_items return valid export items
8989
def get_validated_export_items(pipeline_path, pipeline_name):
9090
export_items = []
91+
valid_metadata_df = dict()
9192
models_path = os.path.join(pipeline_path, "..")
9293
for energy_source in PowerSourceMap.keys():
94+
valid_metadata_df[energy_source] = dict()
9395
for ot in ModelOutputType:
9496
metadata_df = load_pipeline_metadata(pipeline_path, energy_source, ot.name)
9597
if metadata_df is None:
9698
print("no metadata for", energy_source, ot.name)
9799
continue
100+
valid_rows = []
98101
for _, row in metadata_df.iterrows():
99102
if row['mape'] <= mape_threshold or row['mae'] <= mae_threshold:
100103
model_name = row["model_name"]
@@ -105,4 +108,6 @@ def get_validated_export_items(pipeline_path, pipeline_name):
105108
print("source not exist: ", source_file)
106109
continue
107110
export_items += [export_item]
108-
return export_items
111+
valid_rows += [row]
112+
valid_metadata_df[energy_source][ot.name] = pd.DataFrame(valid_rows)
113+
return export_items, valid_metadata_df

src/train/exporter/writer.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,9 @@
77
sys.path.append(util_path)
88

99
from loader import load_json, version
10-
from saver import assure_path
10+
from saver import assure_path, _pipeline_model_metadata_filename
1111
from validator import mae_threshold, mape_threshold
12+
from train_types import ModelOutputType, PowerSourceMap
1213

1314
error_report_foldername = "error_report"
1415

@@ -201,7 +202,6 @@ def generate_pipeline_readme(pipeline_name, local_export_path, node_type_index_j
201202
markdown_filepath = os.path.join(local_export_path, "README.md")
202203
markdown_content = "# {} on v{} Build\n\n".format(pipeline_name, version)
203204
markdown_content += "MAE Threshold = {}, MAPE Threshold = {}%\n\n".format(mae_threshold, int(mape_threshold))
204-
205205
items = []
206206
for node_type, spec_json in node_type_index_json.items():
207207
if best_model_collections[int(node_type)].has_model:
@@ -213,6 +213,12 @@ def generate_pipeline_readme(pipeline_name, local_export_path, node_type_index_j
213213
items += [item]
214214
df = pd.DataFrame(items)
215215
markdown_content += "Available Node Type: {}\n\n".format(len(df))
216+
# add metadata figures
217+
for ot in ModelOutputType:
218+
for energy_source in PowerSourceMap.keys():
219+
data_filename = _pipeline_model_metadata_filename(energy_source, ot.name)
220+
markdown_content += "![]({}.png)\n".format(data_filename)
221+
216222
markdown_content += data_to_markdown_table(df.sort_values(by=["node type"]))
217223
write_markdown(markdown_filepath, markdown_content)
218224
return markdown_filepath

src/train/pipeline.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def prepare_data_from_input_list(self, input_query_results_list, energy_componen
117117

118118
def _train(self, abs_data, dyn_data, power_labels, energy_source, feature_group):
119119
# start the thread pool
120-
with ThreadPoolExecutor(2) as executor:
120+
with ThreadPoolExecutor(len(self.trainers)) as executor:
121121
futures = []
122122
for trainer in self.trainers:
123123
if trainer.feature_group_name != feature_group:

src/train/profiler/node_type_index.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def generate_spec(data_path, machine_id):
4444
if "brand_raw" in cpu_info:
4545
processor = format_processor(cpu_info["brand_raw"])
4646
cores = psutil.cpu_count(logical=True)
47-
chips = psutil.cpu_count(logical=False)
47+
chips = int(cores/psutil.cpu_count(logical=False))
4848
memory = psutil.virtual_memory().total
4949
memory_gb = int(memory/GB)
5050
cpu_freq_mhz = round(psutil.cpu_freq(percpu=False).max/100)*100 # round to one decimal of GHz

src/train/trainer/__init__.py

Lines changed: 28 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -130,39 +130,41 @@ def load_model(self, node_type):
130130

131131
def process(self, data, power_labels, pipeline_lock):
132132
node_types = pd.unique(data[node_info_column])
133-
try:
134-
for node_type in node_types:
135-
node_type = int(node_type)
136-
save_path = self._get_save_path(str(node_type))
137-
self.node_scalers[node_type] = load_scaler(save_path)
138-
self.load_model(node_type)
139-
140-
node_type_filtered_data = data[data[node_info_column] == node_type]
141-
if self.node_scalers[node_type] is None:
142-
self.print_log("fit scaler to latest data {1} for node_type={0}".format(node_type, self.feature_group_name))
143-
# no profiled scaler
144-
x_values = node_type_filtered_data[self.features].values
145-
self.node_scalers[node_type] = MaxAbsScaler()
146-
self.node_scalers[node_type].fit(x_values)
147-
148-
X_test_map = dict()
149-
y_test_map = dict()
133+
for node_type in node_types:
134+
node_type = int(node_type)
135+
save_path = self._get_save_path(str(node_type))
136+
self.node_scalers[node_type] = load_scaler(save_path)
137+
self.load_model(node_type)
138+
139+
node_type_filtered_data = data[data[node_info_column] == node_type]
140+
if self.node_scalers[node_type] is None:
141+
self.print_log("fit scaler to latest data {1} for node_type={0}".format(node_type, self.feature_group_name))
142+
# no profiled scaler
143+
x_values = node_type_filtered_data[self.features].values
144+
self.node_scalers[node_type] = MaxAbsScaler()
145+
self.node_scalers[node_type].fit(x_values)
146+
147+
X_test_map = dict()
148+
y_test_map = dict()
149+
try:
150150
for component in self.energy_components:
151151
X_values, y_values = self.apply_ratio(component, node_type_filtered_data, power_labels)
152152
X_train, X_test, y_train, y_test = normalize_and_split(X_values, y_values, scaler=self.node_scalers[node_type])
153153
X_test_map[component] = X_test
154154
y_test_map[component] = y_test
155155
self.train(node_type, component, X_train, y_train)
156156
self.save_checkpoint(self.node_models[node_type][component], self._checkpoint_filepath(component, node_type))
157-
if self.should_archive(node_type):
158-
pipeline_lock.acquire()
159-
try:
160-
self.save_model_and_metadata(node_type, X_test_map, y_test_map)
161-
finally:
162-
pipeline_lock.release()
163-
except Exception as e:
164-
print(e)
165-
pipeline_lock.release()
157+
except Exception as err:
158+
self.print_log("failed to process {}: {}".format(node_type, err))
159+
continue
160+
if self.should_archive(node_type):
161+
pipeline_lock.acquire()
162+
try:
163+
self.save_model_and_metadata(node_type, X_test_map, y_test_map)
164+
except Exception as err:
165+
self.print_log("failed to save model {}: {}".format(node_type, err))
166+
finally:
167+
pipeline_lock.release()
166168

167169
def apply_ratio(self, component, node_type_filtered_data, power_labels):
168170
power_label = component_to_col(component)

0 commit comments

Comments
 (0)