Skip to content

Commit b6dad1e

Browse files
authored
Merge pull request #151 from sunya-ch/isolator
define hints as alternative to isolate container by names
2 parents 4b949d2 + 9ba33ca commit b6dad1e

File tree

5 files changed

+164
-37
lines changed

5 files changed

+164
-37
lines changed

cmd/README.md

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@
33
Use kepler model server function as a standalone docker container.
44

55
```
6-
usage: main.py [-h] [-i INPUT] [-o OUTPUT] [-s SERVER] [--interval INTERVAL] [--step STEP] [--metric-prefix METRIC_PREFIX] [-p PIPELINE_NAME] [--extractor EXTRACTOR] [--isolator ISOLATOR] [--profile PROFILE] [-e ENERGY_SOURCE] [--abs-trainers ABS_TRAINERS]
7-
[--dyn-trainers DYN_TRAINERS] [--benchmark BENCHMARK] [-ot OUTPUT_TYPE] [-fg FEATURE_GROUP] [--model-name MODEL_NAME] [--target-data TARGET_DATA] [--id ID] [--version VERSION] [--publisher PUBLISHER] [--include-raw INCLUDE_RAW]
6+
usage: main.py [-h] [-i INPUT] [-o OUTPUT] [-s SERVER] [--interval INTERVAL] [--step STEP] [--metric-prefix METRIC_PREFIX] [-p PIPELINE_NAME] [--extractor EXTRACTOR] [--isolator ISOLATOR] [--profile PROFILE] [--target-hints TARGET_HINTS] [--bg-hints BG_HINTS]
7+
[-e ENERGY_SOURCE] [--abs-trainers ABS_TRAINERS] [--dyn-trainers DYN_TRAINERS] [--benchmark BENCHMARK] [-ot OUTPUT_TYPE] [-fg FEATURE_GROUP] [--model-name MODEL_NAME] [--target-data TARGET_DATA] [--scenario SCENARIO] [--id ID] [--version VERSION]
8+
[--publisher PUBLISHER] [--include-raw INCLUDE_RAW]
89
command
910
1011
Kepler model server entrypoint
@@ -30,6 +31,9 @@ optional arguments:
3031
Specify extractor name (default, smooth).
3132
--isolator ISOLATOR Specify isolator name (none, min, profile, trainer).
3233
--profile PROFILE Specify profile input (required for trainer and profile isolator).
34+
--target-hints TARGET_HINTS
35+
Specify dynamic workload container name hints (used by TrainIsolator)
36+
--bg-hints BG_HINTS Specify background workload container name hints (used by TrainIsolator)
3337
-e ENERGY_SOURCE, --energy-source ENERGY_SOURCE
3438
Specify energy source.
3539
--abs-trainers ABS_TRAINERS
@@ -46,6 +50,7 @@ optional arguments:
4650
Specify target model name for energy estimation.
4751
--target-data TARGET_DATA
4852
Speficy target plot data (preprocess, estimate)
53+
--scenario SCENARIO Speficy scenario
4954
--id ID specify machine id
5055
--version VERSION Specify model server version.
5156
--publisher PUBLISHER

cmd/main.py

Lines changed: 127 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -248,18 +248,24 @@ def assert_train(trainer, data, energy_components):
248248
if output is not None:
249249
assert len(output) == len(X_values), "length of predicted values != features ({}!={})".format(len(output), len(X_values))
250250

251-
def get_pipeline(pipeline_name, extractor, profile, isolator, abs_trainer_names, dyn_trainer_names, energy_sources, valid_feature_groups):
251+
def get_isolator(isolator, profile, pipeline_name, target_hints, bg_hints):
252252
pipeline_path = get_pipeline_path(data_path, pipeline_name=pipeline_name)
253-
from train import DefaultExtractor, SmoothExtractor, MinIdleIsolator, NoneIsolator, DefaultProfiler, ProfileBackgroundIsolator, TrainIsolator, generate_profiles, NewPipeline
254-
supported_extractor = {
255-
DefaultExtractor().get_name(): DefaultExtractor(),
256-
SmoothExtractor().get_name(): SmoothExtractor()
257-
}
253+
from train import MinIdleIsolator, NoneIsolator, DefaultProfiler, ProfileBackgroundIsolator, TrainIsolator, generate_profiles
258254
supported_isolator = {
259255
MinIdleIsolator().get_name(): MinIdleIsolator(),
260256
NoneIsolator().get_name(): NoneIsolator(),
261257
}
262258

259+
if target_hints:
260+
target_hints = target_hints.split(",")
261+
else:
262+
target_hints = []
263+
264+
if bg_hints:
265+
bg_hints = bg_hints.split(",")
266+
else:
267+
bg_hints = []
268+
263269
profiles = dict()
264270
if profile:
265271
idle_response = load_json(data_path, profile)
@@ -270,35 +276,38 @@ def get_pipeline(pipeline_name, extractor, profile, isolator, abs_trainer_names,
270276
profile_map = DefaultProfiler.process(idle_data, profile_top_path=pipeline_path)
271277
profiles = generate_profiles(profile_map)
272278
profile_isolator = ProfileBackgroundIsolator(profiles, idle_data)
273-
trainer_isolator = TrainIsolator(idle_data=idle_data, profiler=DefaultProfiler)
279+
trainer_isolator = TrainIsolator(idle_data=idle_data, profiler=DefaultProfiler, target_hints=target_hints, bg_hints=bg_hints, abs_pipeline_name=pipeline_name)
274280
supported_isolator[profile_isolator.get_name()] = profile_isolator
275-
supported_isolator[trainer_isolator] = trainer_isolator.get_name()
281+
else:
282+
trainer_isolator = TrainIsolator(target_hints=target_hints, bg_hints=bg_hints, abs_pipeline_name=pipeline_name)
283+
284+
supported_isolator[trainer_isolator.get_name()] = trainer_isolator
276285

277286
if isolator not in supported_isolator:
278287
print("isolator {} is not supported. supported isolator: {}".format(isolator, supported_isolator.keys()))
279-
return None
280-
288+
return None
289+
return supported_isolator[isolator]
290+
291+
def get_extractor(extractor):
292+
from train import DefaultExtractor, SmoothExtractor
293+
supported_extractor = {
294+
DefaultExtractor().get_name(): DefaultExtractor(),
295+
SmoothExtractor().get_name(): SmoothExtractor()
296+
}
281297
if extractor not in supported_extractor:
282298
print("extractor {} is not supported. supported extractor: {}".format(extractor, supported_extractor.keys()))
283299
return None
300+
return supported_extractor[extractor]
284301

285-
isolator = supported_isolator[isolator]
286-
extractor = supported_extractor[extractor]
302+
def get_pipeline(pipeline_name, extractor, profile, target_hints, bg_hints, isolator, abs_trainer_names, dyn_trainer_names, energy_sources, valid_feature_groups):
303+
from train import NewPipeline
304+
isolator = get_isolator(isolator, profile, pipeline_name, target_hints, bg_hints)
305+
extractor = get_extractor(extractor)
287306
pipeline = NewPipeline(pipeline_name, abs_trainer_names, dyn_trainer_names, extractor=extractor, isolator=isolator, target_energy_sources=energy_sources ,valid_feature_groups=valid_feature_groups)
288307
return pipeline
289308

290309
def extract(args):
291-
from train import DefaultExtractor, SmoothExtractor
292-
supported_extractor = {
293-
"default": DefaultExtractor(),
294-
"smooth": SmoothExtractor()
295-
}
296-
297-
if args.extractor not in supported_extractor:
298-
print("extractor {} is not supported. supported extractor: {}".format(args.extractor, supported_extractor.keys()))
299-
return None
300-
extractor = supported_extractor[args.extractor]
301-
310+
extractor = get_extractor(args.extractor)
302311
# single input
303312
input = args.input
304313
response = load_json(data_path, input)
@@ -314,12 +323,22 @@ def extract(args):
314323
node_level=False
315324
if ot == ModelOutputType.AbsPower:
316325
node_level=True
317-
feature_power_data, _, _, _ = extractor.extract(query_results, energy_components, args.feature_group, args.energy_source, node_level=node_level)
318-
print(feature_power_data)
326+
feature_power_data, power_cols, _, _ = extractor.extract(query_results, energy_components, args.feature_group, args.energy_source, node_level=node_level)
319327
if args.output:
320-
feature_power_data.to_csv(args.output)
328+
save_csv(data_path, "extracted_" + args.output, feature_power_data)
321329
query = feature_to_query(FeatureGroups[fg][0])
322330
query_results[query][[TIMESTAMP_COL, query]].groupby([TIMESTAMP_COL]).sum().to_csv(args.output[0:-4]+"_raw.csv")
331+
return feature_power_data, power_cols
332+
333+
def isolate(args):
334+
extracted_data, power_labels = extract(args)
335+
if extracted_data is None or power_labels is None:
336+
return None
337+
pipeline_name = DEFAULT_PIPELINE if not args.pipeline_name else args.pipeline_name
338+
isolator = get_isolator(args.isolator, args.profile, pipeline_name, args.target_hints, args.bg_hints)
339+
isolated_data = isolator.isolate(extracted_data, label_cols=power_labels, energy_source=args.energy_source)
340+
if args.output:
341+
save_csv(data_path, "isolated_" + args.output, isolated_data)
323342

324343
def train(args):
325344
import warnings
@@ -353,7 +372,7 @@ def train(args):
353372

354373
abs_trainer_names = args.abs_trainers.split(",")
355374
dyn_trainer_names = args.dyn_trainers.split(",")
356-
pipeline = get_pipeline(pipeline_name, args.extractor, args.profile, args.isolator, abs_trainer_names, dyn_trainer_names, energy_sources, valid_feature_groups)
375+
pipeline = get_pipeline(pipeline_name, args.extractor, args.profile, args.target_hints, args.bg_hints, args.isolator, abs_trainer_names, dyn_trainer_names, energy_sources, valid_feature_groups)
357376
if pipeline is None:
358377
print("cannot get pipeline")
359378
exit()
@@ -435,7 +454,7 @@ def estimate(args):
435454
if pipeline_metadata is None:
436455
print("no metadata for pipeline {}.".format(pipeline_name))
437456
continue
438-
pipeline = get_pipeline(pipeline_name, args.extractor, args.profile, pipeline_metadata["isolator"], pipeline_metadata["abs_trainers"], pipeline_metadata["dyn_trainers"], energy_sources, valid_fg)
457+
pipeline = get_pipeline(pipeline_name, args.extractor, args.profile, args.target_hints, args.bg_hints, pipeline_metadata["isolator"], pipeline_metadata["abs_trainers"], pipeline_metadata["dyn_trainers"], energy_sources, valid_fg)
439458
if pipeline is None:
440459
print("cannot get pipeline {}.".format(pipeline_name))
441460
continue
@@ -643,9 +662,10 @@ def plot(args):
643662
feature_cols = FeatureGroups[fg]
644663
power_cols = [col for col in data.columns if "power" in col]
645664
feature_data = data.groupby([TIMESTAMP_COL]).sum()
646-
_ts_plot(feature_data, feature_cols, "Feature group: {}".format(fg.name), output_folder, data_filename)
665+
_ts_plot(feature_data[feature_data[feature_cols]>0], feature_cols, "Feature group: {}".format(fg.name), output_folder, data_filename)
647666
if not energy_plot:
648667
power_data = data.groupby([TIMESTAMP_COL]).max()
668+
data_filename = get_general_filename(args.target_data, energy_source, None, ot, args.extractor, args.isolator)
649669
_ts_plot(power_data, power_cols, "Power source: {}".format(energy_source), output_folder, data_filename, ylabel="Power (W)")
650670
elif args.target_data == "estimate":
651671
from estimate import default_predicted_col_func
@@ -744,6 +764,81 @@ def export(args):
744764
args.feature_group = feature_group
745765
plot(args)
746766

767+
def plot_scenario(args):
768+
if not args.benchmark:
769+
print("Need --benchmark")
770+
exit()
771+
772+
if not args.scenario:
773+
print("Need --scenario")
774+
exit()
775+
776+
# filter scenario
777+
input_scenarios = args.scenario.split(",")
778+
status_data = load_json(data_path, args.benchmark)
779+
target_pods = []
780+
cpe_results = status_data["status"]["results"]
781+
for result in cpe_results:
782+
scenarioID = result["scenarioID"]
783+
target = False
784+
for scenario in input_scenarios:
785+
if scenario in scenarioID:
786+
target = True
787+
break
788+
if not target:
789+
continue
790+
791+
scenarios = result["scenarios"]
792+
configurations = result["configurations"]
793+
for k, v in scenarios.items():
794+
result[k] = v
795+
for k, v in configurations.items():
796+
result[k] = v
797+
repetitions = result["repetitions"]
798+
for rep in repetitions:
799+
podname = rep["pod"]
800+
target_pods += [podname]
801+
802+
response = load_json(data_path, args.input)
803+
query_results = prom_responses_to_results(response)
804+
for query, data in query_results.items():
805+
if "pod_name" in data.columns:
806+
query_results[query] = data[data["pod_name"].isin(target_pods)]
807+
808+
valid_fg = [fg_key for fg_key in FeatureGroups.keys()]
809+
ot, fg = check_ot_fg(args, valid_fg)
810+
if fg is not None:
811+
valid_fg = [fg]
812+
energy_sources = args.energy_source.split(",")
813+
output_folder = os.path.join(data_path, args.output)
814+
815+
print("Plot:", args)
816+
feature_plot = []
817+
for energy_source in energy_sources:
818+
energy_components = PowerSourceMap[energy_source]
819+
energy_plot = False
820+
for fg in valid_fg:
821+
if (len(valid_fg) > 1 and not is_single_source_feature_group(fg)) or (energy_plot and fg.name in feature_plot):
822+
# no need to plot if it is a mixed source or already plotted
823+
continue
824+
data_filename = get_general_filename(args.target_data, energy_source, fg, ot, args.extractor, args.isolator) + "_" + args.scenario
825+
if data_filename is None:
826+
print("cannot get preprocessed data for ", ot.name)
827+
return
828+
from train import DefaultExtractor
829+
830+
extractor = DefaultExtractor()
831+
data, power_cols, _, _ = extractor.extract(query_results, energy_components, fg.name, args.energy_source, node_level=True)
832+
feature_plot += [fg.name]
833+
feature_cols = FeatureGroups[fg]
834+
power_cols = [col for col in data.columns if "power" in col]
835+
feature_data = data.groupby([TIMESTAMP_COL]).sum()
836+
_ts_plot(feature_data, feature_cols, "Feature group: {} ({})".format(fg.name, args.scenario), output_folder, data_filename)
837+
if not energy_plot:
838+
power_data = data.groupby([TIMESTAMP_COL]).max()
839+
data_filename = get_general_filename(args.target_data, energy_source, None, ot, args.extractor, args.isolator) + "_" + args.scenario
840+
_ts_plot(power_data, power_cols, "Power source: {} ({})".format(energy_source, args.scenario), output_folder, data_filename, ylabel="Power (W)")
841+
747842

748843
if __name__ == "__main__":
749844
# set model top path to data path
@@ -768,6 +863,8 @@ def export(args):
768863
parser.add_argument("--extractor", type=str, help="Specify extractor name (default, smooth).", default="default")
769864
parser.add_argument("--isolator", type=str, help="Specify isolator name (none, min, profile, trainer).", default="min")
770865
parser.add_argument("--profile", type=str, help="Specify profile input (required for trainer and profile isolator).")
866+
parser.add_argument("--target-hints", type=str, help="Specify dynamic workload container name hints (used by TrainIsolator)")
867+
parser.add_argument("--bg-hints", type=str, help="Specify background workload container name hints (used by TrainIsolator)")
771868
parser.add_argument("-e", "--energy-source", type=str, help="Specify energy source.", default="rapl")
772869
parser.add_argument("--abs-trainers", type=str, help="Specify trainer names (use comma(,) as delimiter).", default=default_trainers)
773870
parser.add_argument("--dyn-trainers", type=str, help="Specify trainer names (use comma(,) as delimiter).", default=default_trainers)
@@ -782,6 +879,7 @@ def export(args):
782879

783880
# Plot arguments
784881
parser.add_argument("--target-data", type=str, help="Speficy target plot data (preprocess, estimate)")
882+
parser.add_argument("--scenario", type=str, help="Speficy scenario")
785883

786884
# Export arguments
787885
parser.add_argument("--id", type=str, help="specify machine id")

src/train/isolator/isolator.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ def isolate_container(extracted_data, background_containers, label_cols):
5454
target_containers, background_containers = get_target_containers(extracted_data, background_containers)
5555
target_data = extracted_data[extracted_data[container_id_colname].isin(target_containers)]
5656
background_data = extracted_data[~extracted_data[container_id_colname].isin(target_containers)]
57+
print("Target containers:", target_containers)
5758
target_data = squeeze_data(target_data, label_cols)
5859
background_data = squeeze_data(background_data, label_cols)
5960
return target_data, background_data

src/train/isolator/train_isolator.py

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import os
22
import sys
33
import numpy as np
4+
import pandas as pd
45

56
util_path = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'util')
67
sys.path.append(util_path)
@@ -14,12 +15,11 @@
1415

1516
from util import PowerSourceMap
1617
from util.train_types import get_valid_feature_groups
17-
from util.prom_types import TIMESTAMP_COL
18+
from util.prom_types import TIMESTAMP_COL, get_container_name_from_id
1819
from util.extract_types import container_level_index, container_id_colname, col_to_component
1920
from util.config import model_toppath
2021
from util.loader import list_all_abs_models, DEFAULT_PIPELINE
2122

22-
2323
def is_better(curr_min_err, err, curr_max_corr, corr, corr_threshold=0.7):
2424
if curr_min_err is None:
2525
return True
@@ -111,21 +111,43 @@ def find_best_target_data_with_dyn_power(energy_source, energy_components, extra
111111
best_background_data_with_prediction = background_data_with_prediction
112112
return best_target_data_with_dyn_power, best_background_data_with_prediction
113113

114+
def get_background_container_from_target_hints(data, target_hints):
115+
container_names = pd.unique(data[container_id_colname].transform(get_container_name_from_id))
116+
background_containers = [container_name for container_name in container_names if not any(hint in container_name for hint in target_hints)]
117+
return background_containers
118+
119+
def get_background_container_from_bg_hints(data, bg_hints):
120+
container_names = pd.unique(data[container_id_colname].transform(get_container_name_from_id))
121+
background_containers = [container_name for container_name in container_names if any(hint in container_name for hint in bg_hints)]
122+
return background_containers
123+
114124
# TO-DO: suppport multiple node types
115125
class TrainIsolator(Isolator):
116-
def __init__(self, idle_data, profiler, abs_pipeline_name=DEFAULT_PIPELINE):
117-
self.idle_data = idle_data
118-
self.profiles = profiler.process(self.idle_data)
119-
self.background_containers = get_background_containers(self.idle_data)
126+
def __init__(self, idle_data=None, profiler=None, target_hints=[], bg_hints=[], abs_pipeline_name=DEFAULT_PIPELINE):
127+
if profiler is not None and idle_data is not None:
128+
self.idle_data = idle_data
129+
self.profiles = profiler.process(self.idle_data)
130+
self.background_containers = get_background_containers(self.idle_data)
131+
self.background_containers = None
120132
self.abs_pipeline_name = abs_pipeline_name
133+
self.target_hints = target_hints
134+
self.bg_hints = bg_hints
121135

122136
def isolate(self, data, label_cols, energy_source):
123137
index_list = data.index.names
124138
if index_list[0] is not None:
125139
data = data.reset_index()
140+
if self.background_containers is None:
141+
if len(self.target_hints) > 0:
142+
self.background_containers = get_background_container_from_target_hints(data, self.target_hints)
143+
else:
144+
# if nothing set for hint, all are considered as target
145+
self.background_containers = get_background_container_from_target_hints(data, self.bg_hints)
126146
energy_components = PowerSourceMap[energy_source]
127147
label_cols = list(label_cols)
128148
best_target_data_with_dyn_power, _ = find_best_target_data_with_dyn_power(energy_source, energy_components, data, self.background_containers, label_cols, pipeline_name=self.abs_pipeline_name)
149+
if best_target_data_with_dyn_power is None:
150+
return None
129151
isolated_data = best_target_data_with_dyn_power.copy()
130152
power_label_cols = [get_label_power_colname(energy_component) for energy_component in energy_components]
131153
extracted_power_labels = get_extracted_power_labels(data, energy_components, label_cols)[power_label_cols]

tests/isolator_test.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,5 +94,6 @@ def process(test_isolators=test_isolators, customize_isolators=[], extract_path=
9494
if __name__ == '__main__':
9595
# Add customize isolator here
9696
customize_isolators = [TrainIsolator(idle_data=test_idle_data, profiler=DefaultProfiler)]
97+
customize_isolators = [TrainIsolator(target_hints=["coremark"])]
9798
customize_isolators += [ProfileBackgroundIsolator(test_profiles, test_idle_data)]
9899
process(customize_isolators=customize_isolators)

0 commit comments

Comments
 (0)