Skip to content

Commit 16ef378

Browse files
author
Sunil Thaha
committed
chore: add more logging
Signed-off-by: Sunil Thaha <[email protected]>
1 parent f89cb3e commit 16ef378

File tree

5 files changed

+70
-44
lines changed

5 files changed

+70
-44
lines changed

src/kepler_model/estimate/estimator.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from kepler_model.estimate.model.model import load_downloaded_model
1515
from kepler_model.util.loader import get_download_output_path
1616
from kepler_model.util.config import set_env_from_model_config, SERVE_SOCKET, download_path
17-
from kepler_model.util.train_types import is_support_output_type, ModelOutputType
17+
from kepler_model.util.train_types import is_output_type_supported, ModelOutputType
1818

1919
###############################################
2020
# power request
@@ -43,16 +43,16 @@ def __init__(self, metrics, values, output_type, source, system_features, system
4343
loaded_model = dict()
4444

4545

46-
def handle_request(data):
46+
def handle_request(data: str) -> dict:
4747
try:
4848
power_request = json.loads(data, object_hook=lambda d: PowerRequest(**d))
4949
except Exception as e:
50-
logger.error(f"fail to handle request: {e}")
51-
msg = "fail to handle request: {}".format(e)
50+
msg = f"failed to handle request: {e}"
51+
logger.error(msg)
5252
return {"powers": dict(), "msg": msg}
5353

54-
if not is_support_output_type(power_request.output_type):
55-
msg = "output type {} is not supported".format(power_request.output_type)
54+
if not is_output_type_supported(power_request.output_type):
55+
msg = f"output type {power_request.output_type} is not supported"
5656
logger.error(msg)
5757
return {"powers": dict(), "msg": msg}
5858

@@ -63,6 +63,7 @@ def handle_request(data):
6363

6464
if output_type.name not in loaded_model:
6565
loaded_model[output_type.name] = dict()
66+
6667
output_path = ""
6768
mismatch_trainer = False
6869
if is_model_server_enabled():
@@ -87,12 +88,12 @@ def handle_request(data):
8788
msg = "failed to get model from request {}".format(data)
8889
logger.error(msg)
8990
return {"powers": dict(), "msg": msg}
90-
else:
91-
logger.info(f"load model from config: {output_path}")
91+
logger.info(f"load model from config: {output_path}")
9292
else:
9393
logger.info(f"load model from model server: {output_path}")
9494

9595
loaded_item = load_downloaded_model(power_request.energy_source, output_type)
96+
9697
if loaded_item is not None and loaded_item.estimator is not None:
9798
loaded_model[output_type.name][power_request.energy_source] = loaded_item
9899
logger.info(f"set model {loaded_item.model_name} for {output_type.name} ({power_request.energy_source})")
@@ -103,6 +104,7 @@ def handle_request(data):
103104
logger.info(f"{model.model_name} failed to predict; removed: {msg}")
104105
if output_path != "" and os.path.exists(output_path):
105106
shutil.rmtree(output_path)
107+
106108
return {"powers": powers, "msg": msg}
107109

108110

@@ -124,7 +126,7 @@ def start(self):
124126
os.remove(self.socket_path)
125127
sys.stdout.write("close socket\n")
126128
except Exception as e:
127-
logger.error(f"fail to close socket: {e}")
129+
logger.error(f"failed to close socket: {e}")
128130

129131
def accepted(self, connection):
130132
data = b""

src/kepler_model/estimate/model/model.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import json
2+
import logging
23
import pandas as pd
34

45
from kepler_model.util.loader import load_metadata, get_download_output_path
@@ -10,6 +11,8 @@
1011
from kepler_model.estimate.model.curvefit_model import CurveFitModelEstimator
1112
# from keras_model import KerasModelEstimator
1213

14+
logger = logging.getLogger(__name__)
15+
1316
# model wrapper
1417
MODELCLASS = {
1518
"scikit": ScikitModelEstimator,
@@ -157,17 +160,18 @@ def append_idle_prediction(self, data, predicted_col_func=default_idle_predicted
157160

158161
def load_model(model_path):
159162
metadata = load_metadata(model_path)
160-
if metadata is not None:
161-
metadata["model_path"] = model_path
162-
metadata_str = json.dumps(metadata)
163-
try:
164-
model = json.loads(metadata_str, object_hook=lambda d: Model(**d))
165-
return model
166-
except Exception as e:
167-
print("fail to load: ", e)
168-
return None
169-
print("no metadata")
170-
return None
163+
if not metadata:
164+
logger.warn(f"no metadata in {model_path}")
165+
return None
166+
167+
metadata["model_path"] = model_path
168+
metadata_str = json.dumps(metadata)
169+
try:
170+
model = json.loads(metadata_str, object_hook=lambda d: Model(**d))
171+
return model
172+
except Exception as e:
173+
logger.error(f"fail to load: {model_path} - {e}")
174+
return None
171175

172176

173177
# download model folder has no subfolder of energy source and feature group because it has been already determined by model request

src/kepler_model/util/config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
import os
1616
from .loader import get_url, get_pipeline_url, base_model_url, default_pipelines, default_train_output_pipeline
17-
from .train_types import ModelOutputType, is_support_output_type, FeatureGroup
17+
from .train_types import ModelOutputType, is_output_type_supported, FeatureGroup
1818

1919
# must be writable (for shared volume mount)
2020
MNT_PATH = "/mnt"
@@ -150,7 +150,7 @@ def get_init_model_url(energy_source, output_type, model_topurl=model_topurl):
150150
if get_energy_source(prefix) == energy_source:
151151
modelURL = get_init_url(prefix)
152152
print("get init url", modelURL)
153-
if modelURL == "" and is_support_output_type(output_type):
153+
if modelURL == "" and is_output_type_supported(output_type):
154154
print("init URL is not set, try using default URL".format(output_type))
155155
return get_url(feature_group=FeatureGroup.BPFOnly, output_type=ModelOutputType[output_type], energy_source=energy_source, model_topurl=model_topurl, pipeline_name=pipeline_name)
156156
else:

src/kepler_model/util/loader.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
22
import json
3+
import logging
34
import joblib
45
import pandas as pd
56
from .saver import assure_path, METADATA_FILENAME, SCALER_FILENAME, WEIGHT_FILENAME, TRAIN_ARGS_FILENAME, NODE_TYPE_INDEX_FILENAME, MACHINE_SPEC_PATH, _pipeline_model_metadata_filename
@@ -31,6 +32,9 @@
3132
base_model_url = "https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-db/main/models/v{}".format(major_version)
3233

3334

35+
logger = logging.getLogger(__name__)
36+
37+
3438
def get_pipeline_url(model_topurl, pipeline_name):
3539
file_ext = ".zip"
3640
return os.path.join(model_topurl, pipeline_name + file_ext)
@@ -52,40 +56,45 @@ def assure_pipeline_name(pipeline_name, energy_source, nodeCollection):
5256
default_feature_group = FeatureGroup.BPFOnly
5357

5458

55-
def load_json(path, name):
56-
if ".json" not in name:
59+
def load_json(path: str, name: str):
60+
if name.endswith(".json") is False:
5761
name = name + ".json"
62+
5863
filepath = os.path.join(path, name)
5964
try:
6065
with open(filepath) as f:
6166
res = json.load(f)
6267
return res
6368
except Exception as err:
69+
logger.error(f"fail to load json {filepath}: {err}")
6470
return None
6571

6672

67-
def load_pkl(path, name):
68-
if ".pkl" not in name:
73+
def load_pkl(path: str, name: str):
74+
if name.endswith(".pkl") is False:
6975
name = name + ".pkl"
76+
7077
filepath = os.path.join(path, name)
7178
try:
7279
res = joblib.load(filepath)
7380
return res
7481
except FileNotFoundError:
7582
return None
7683
except Exception as err:
77-
print("fail to load pkl {}: {}".format(filepath, err))
84+
logger.error(f"failed to load pkl {filepath}: {err}")
7885
return None
7986

8087

8188
def load_remote_pkl(url_path):
82-
if ".pkl" not in url_path:
83-
url_path = url_path + ".pkl"
89+
if url_path.endswith(".pkl") is False:
90+
url_path += ".pkl"
91+
8492
try:
8593
response = urlopen(url_path)
8694
loaded_model = joblib.load(response)
8795
return loaded_model
88-
except:
96+
except Exception as e:
97+
logger.error(f"failed to load pkl url {url_path}: {e}")
8998
return None
9099

91100

@@ -133,8 +142,8 @@ def load_csv(path, name):
133142
data = pd.read_csv(file_path)
134143
data = data.apply(pd.to_numeric, errors="ignore")
135144
return data
136-
except:
137-
# print('cannot load {}'.format(file_path))
145+
except Exception as err:
146+
logger.error(f"cannot load {file_path}: {err}")
138147
return None
139148

140149

src/kepler_model/util/train_types.py

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
###########################################################
22
## types.py
3-
##
3+
##
44
## defines
55
## - collection of features
66
## - feature groups
@@ -17,7 +17,7 @@
1717
COUNTER_FEAUTRES = ["cache_miss", "cpu_cycles", "cpu_instructions"]
1818
BPF_FEATURES = ["bpf_cpu_time_ms", "bpf_page_cache_hit"]
1919
IRQ_FEATURES = ["bpf_block_irq", "bpf_net_rx_irq", "bpf_net_tx_irq"]
20-
ACCELERATE_FEATURES = ['accelerator_intel_qat']
20+
ACCELERATE_FEATURES = ["accelerator_intel_qat"]
2121
WORKLOAD_FEATURES = COUNTER_FEAUTRES + BPF_FEATURES + IRQ_FEATURES + ACCELERATE_FEATURES
2222
BASIC_FEATURES = COUNTER_FEAUTRES + BPF_FEATURES
2323

@@ -40,8 +40,8 @@
4040
"cpu_scaling_frequency_hertz": ["1GHz", "2GHz", "3GHz"],
4141
}
4242

43-
no_weight_trainers = ['PolynomialRegressionTrainer', 'GradientBoostingRegressorTrainer', 'KNeighborsRegressorTrainer', 'LinearRegressionTrainer','SVRRegressorTrainer', 'XgboostFitTrainer']
44-
weight_support_trainers = ['SGDRegressorTrainer', 'LogarithmicRegressionTrainer', 'LogisticRegressionTrainer', 'ExponentialRegressionTrainer']
43+
no_weight_trainers = ["PolynomialRegressionTrainer", "GradientBoostingRegressorTrainer", "KNeighborsRegressorTrainer", "LinearRegressionTrainer", "SVRRegressorTrainer", "XgboostFitTrainer"]
44+
weight_support_trainers = ["SGDRegressorTrainer", "LogarithmicRegressionTrainer", "LogisticRegressionTrainer", "ExponentialRegressionTrainer"]
4545
default_trainer_names = no_weight_trainers + weight_support_trainers
4646
default_trainers = ",".join(default_trainer_names)
4747

@@ -59,24 +59,29 @@ class FeatureGroup(enum.Enum):
5959
ThirdParty = 10
6060
Unknown = 99
6161

62+
6263
class EnergyComponentLabelGroup(enum.Enum):
6364
PackageEnergyComponentOnly = 1
6465
DRAMEnergyComponentOnly = 2
6566
CoreEnergyComponentOnly = 3
6667
PackageDRAMEnergyComponents = 4
6768

69+
6870
class ModelOutputType(enum.Enum):
6971
AbsPower = 1
7072
DynPower = 2
7173

72-
def is_support_output_type(output_type_name):
74+
75+
def is_output_type_supported(output_type_name):
7376
return any(output_type_name == item.name for item in ModelOutputType)
7477

78+
7579
def deep_sort(elements):
7680
sorted_elements = elements.copy()
7781
sorted_elements.sort()
7882
return sorted_elements
7983

84+
8085
FeatureGroups = {
8186
FeatureGroup.Full: deep_sort(WORKLOAD_FEATURES + SYSTEM_FEATURES),
8287
FeatureGroup.WorkloadOnly: deep_sort(WORKLOAD_FEATURES),
@@ -90,9 +95,11 @@ def deep_sort(elements):
9095

9196
SingleSourceFeatures = [FeatureGroup.CounterOnly.name, FeatureGroup.BPFOnly.name, FeatureGroup.BPFIRQ.name]
9297

98+
9399
def is_single_source_feature_group(fg):
94100
return fg.name in SingleSourceFeatures
95101

102+
96103
default_main_feature_map = {
97104
FeatureGroup.Full: "cpu_instructions",
98105
FeatureGroup.WorkloadOnly: "cpu_instructions",
@@ -122,15 +129,17 @@ def main_feature(feature_group_name, energy_component):
122129
feature = default_main_feature_map[feature_group]
123130
return features.index(feature)
124131

132+
125133
# XGBoostRegressionTrainType
126134
class XGBoostRegressionTrainType(enum.Enum):
127135
TrainTestSplitFit = 1
128136
KFoldCrossValidation = 2
129137

138+
130139
# XGBoost Model Feature and Label Incompatability Exception
131140
class XGBoostModelFeatureOrLabelIncompatabilityException(Exception):
132-
"""Exception raised when a saved model's features and label is incompatable with the training data.
133-
141+
"""Exception raised when a saved model's features and label is incompatable with the training data.
142+
134143
...
135144
136145
Attributes
@@ -139,7 +148,7 @@ class XGBoostModelFeatureOrLabelIncompatabilityException(Exception):
139148
expected_labels: the expected model labels
140149
actual_features: the actual model features
141150
actual_labels: the actual model labels
142-
features_incompatible: true if expected_features == actual_features else false
151+
features_incompatible: true if expected_features == actual_features else false
143152
labels_incompatible: true if expected_labels == actual_labels else false
144153
"""
145154

@@ -150,7 +159,6 @@ class XGBoostModelFeatureOrLabelIncompatabilityException(Exception):
150159
features_incompatible: bool
151160
labels_incompatible: bool
152161

153-
154162
def __init__(self, expected_features: List[str], expected_labels: List[str], received_features: List[str], received_labels: List[str], message="expected features/labels are the not the same as the features/labels of the training data") -> None:
155163
self.expected_features = expected_features
156164
self.expected_labels = expected_labels
@@ -188,12 +196,12 @@ def __init__(self, missing_model: bool, missing_model_desc: bool, message="model
188196
EnergyComponentLabelGroup.PackageEnergyComponentOnly: deep_sort(PACKAGE_ENERGY_COMPONENT_LABEL),
189197
EnergyComponentLabelGroup.DRAMEnergyComponentOnly: deep_sort(DRAM_ENERGY_COMPONENT_LABEL),
190198
EnergyComponentLabelGroup.CoreEnergyComponentOnly: deep_sort(CORE_ENERGY_COMPONENT_LABEL),
191-
EnergyComponentLabelGroup.PackageDRAMEnergyComponents: deep_sort(PACKAGE_ENERGY_COMPONENT_LABEL + DRAM_ENERGY_COMPONENT_LABEL)
192-
199+
EnergyComponentLabelGroup.PackageDRAMEnergyComponents: deep_sort(PACKAGE_ENERGY_COMPONENT_LABEL + DRAM_ENERGY_COMPONENT_LABEL),
193200
}
194201

195202
all_feature_groups = [fg.name for fg in FeatureGroups.keys()]
196203

204+
197205
def get_feature_group(features):
198206
sorted_features = deep_sort(features)
199207
for g, g_features in FeatureGroups.items():
@@ -202,6 +210,7 @@ def get_feature_group(features):
202210
return g
203211
return FeatureGroup.Unknown
204212

213+
205214
def get_valid_feature_groups(features):
206215
valid_fgs = []
207216
for fg_key, fg_features in FeatureGroups.items():
@@ -214,6 +223,7 @@ def get_valid_feature_groups(features):
214223
valid_fgs += [fg_key]
215224
return valid_fgs
216225

226+
217227
def is_weight_output(output_type):
218228
if output_type == ModelOutputType.AbsModelWeight:
219229
return True
@@ -225,7 +235,8 @@ def is_weight_output(output_type):
225235
return True
226236
return False
227237

228-
if __name__ == '__main__':
238+
239+
if __name__ == "__main__":
229240
for g, g_features in FeatureGroups.items():
230241
shuffled_features = g_features.copy()
231242
random.shuffle(shuffled_features)

0 commit comments

Comments
 (0)