chore: add more logging

Sunil Thaha · Sunil Thaha · commit 16ef378d2813 · 2024-08-21T15:10:31.000+10:00
Signed-off-by: Sunil Thaha &lt;sthaha@redhat.com&gt;
diff --git a/src/kepler_model/estimate/estimator.py b/src/kepler_model/estimate/estimator.py
@@ -14,7 +14,7 @@
 from kepler_model.estimate.model.model import load_downloaded_model
 from kepler_model.util.loader import get_download_output_path
 from kepler_model.util.config import set_env_from_model_config, SERVE_SOCKET, download_path
-from kepler_model.util.train_types import is_support_output_type, ModelOutputType
+from kepler_model.util.train_types import is_output_type_supported, ModelOutputType
 
 ###############################################
 # power request
@@ -43,16 +43,16 @@ def __init__(self, metrics, values, output_type, source, system_features, system
 loaded_model = dict()
 
 
-def handle_request(data):
+def handle_request(data: str) -> dict:
     try:
         power_request = json.loads(data, object_hook=lambda d: PowerRequest(**d))
     except Exception as e:
-        logger.error(f"fail to handle request: {e}")
-        msg = "fail to handle request: {}".format(e)
+        msg = f"failed to handle request: {e}"
+        logger.error(msg)
         return {"powers": dict(), "msg": msg}
 
-    if not is_support_output_type(power_request.output_type):
-        msg = "output type {} is not supported".format(power_request.output_type)
+    if not is_output_type_supported(power_request.output_type):
+        msg = f"output type {power_request.output_type} is not supported"
         logger.error(msg)
         return {"powers": dict(), "msg": msg}
 
@@ -63,6 +63,7 @@ def handle_request(data):
 
     if output_type.name not in loaded_model:
         loaded_model[output_type.name] = dict()
+
     output_path = ""
     mismatch_trainer = False
     if is_model_server_enabled():
@@ -87,12 +88,12 @@ def handle_request(data):
                     msg = "failed to get model from request {}".format(data)
                     logger.error(msg)
                     return {"powers": dict(), "msg": msg}
-                else:
-                    logger.info(f"load model from config: {output_path}")
+                logger.info(f"load model from config: {output_path}")
             else:
                 logger.info(f"load model from model server: {output_path}")
 
         loaded_item = load_downloaded_model(power_request.energy_source, output_type)
+
         if loaded_item is not None and loaded_item.estimator is not None:
             loaded_model[output_type.name][power_request.energy_source] = loaded_item
             logger.info(f"set model {loaded_item.model_name} for {output_type.name} ({power_request.energy_source})")
@@ -103,6 +104,7 @@ def handle_request(data):
         logger.info(f"{model.model_name} failed to predict; removed: {msg}")
         if output_path != "" and os.path.exists(output_path):
             shutil.rmtree(output_path)
+
     return {"powers": powers, "msg": msg}
 
 
@@ -124,7 +126,7 @@ def start(self):
                 os.remove(self.socket_path)
                 sys.stdout.write("close socket\n")
             except Exception as e:
-                logger.error(f"fail to close socket: {e}")
+                logger.error(f"failed to close socket: {e}")
 
     def accepted(self, connection):
         data = b""
diff --git a/src/kepler_model/estimate/model/model.py b/src/kepler_model/estimate/model/model.py
@@ -1,4 +1,5 @@
 import json
+import logging
 import pandas as pd
 
 from kepler_model.util.loader import load_metadata, get_download_output_path
@@ -10,6 +11,8 @@
 from kepler_model.estimate.model.curvefit_model import CurveFitModelEstimator
 # from keras_model import KerasModelEstimator
 
+logger = logging.getLogger(__name__)
+
 # model wrapper
 MODELCLASS = {
     "scikit": ScikitModelEstimator,
@@ -157,17 +160,18 @@ def append_idle_prediction(self, data, predicted_col_func=default_idle_predicted
 
 def load_model(model_path):
     metadata = load_metadata(model_path)
-    if metadata is not None:
-        metadata["model_path"] = model_path
-        metadata_str = json.dumps(metadata)
-        try:
-            model = json.loads(metadata_str, object_hook=lambda d: Model(**d))
-            return model
-        except Exception as e:
-            print("fail to load: ", e)
-            return None
-    print("no metadata")
-    return None
+    if not metadata:
+        logger.warn(f"no metadata in {model_path}")
+        return None
+
+    metadata["model_path"] = model_path
+    metadata_str = json.dumps(metadata)
+    try:
+        model = json.loads(metadata_str, object_hook=lambda d: Model(**d))
+        return model
+    except Exception as e:
+        logger.error(f"fail to load: {model_path} - {e}")
+        return None
 
 
 # download model folder has no subfolder of energy source and feature group because it has been already determined by model request
diff --git a/src/kepler_model/util/config.py b/src/kepler_model/util/config.py
@@ -14,7 +14,7 @@
 
 import os
 from .loader import get_url, get_pipeline_url, base_model_url, default_pipelines, default_train_output_pipeline
-from .train_types import ModelOutputType, is_support_output_type, FeatureGroup
+from .train_types import ModelOutputType, is_output_type_supported, FeatureGroup
 
 # must be writable (for shared volume mount)
 MNT_PATH = "/mnt"
@@ -150,7 +150,7 @@ def get_init_model_url(energy_source, output_type, model_topurl=model_topurl):
         if get_energy_source(prefix) == energy_source:
             modelURL = get_init_url(prefix)
             print("get init url", modelURL)
-            if modelURL == "" and is_support_output_type(output_type):
+            if modelURL == "" and is_output_type_supported(output_type):
                 print("init URL is not set, try using default URL".format(output_type))
                 return get_url(feature_group=FeatureGroup.BPFOnly, output_type=ModelOutputType[output_type], energy_source=energy_source, model_topurl=model_topurl, pipeline_name=pipeline_name)
             else:
diff --git a/src/kepler_model/util/loader.py b/src/kepler_model/util/loader.py
@@ -1,5 +1,6 @@
 import os
 import json
+import logging
 import joblib
 import pandas as pd
 from .saver import assure_path, METADATA_FILENAME, SCALER_FILENAME, WEIGHT_FILENAME, TRAIN_ARGS_FILENAME, NODE_TYPE_INDEX_FILENAME, MACHINE_SPEC_PATH, _pipeline_model_metadata_filename
@@ -31,6 +32,9 @@
 base_model_url = "https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-db/main/models/v{}".format(major_version)
 
 
+logger = logging.getLogger(__name__)
+
+
 def get_pipeline_url(model_topurl, pipeline_name):
     file_ext = ".zip"
     return os.path.join(model_topurl, pipeline_name + file_ext)
@@ -52,40 +56,45 @@ def assure_pipeline_name(pipeline_name, energy_source, nodeCollection):
 default_feature_group = FeatureGroup.BPFOnly
 
 
-def load_json(path, name):
-    if ".json" not in name:
+def load_json(path: str, name: str):
+    if name.endswith(".json") is False:
         name = name + ".json"
+
     filepath = os.path.join(path, name)
     try:
         with open(filepath) as f:
             res = json.load(f)
         return res
     except Exception as err:
+        logger.error(f"fail to load json {filepath}: {err}")
         return None
 
 
-def load_pkl(path, name):
-    if ".pkl" not in name:
+def load_pkl(path: str, name: str):
+    if name.endswith(".pkl") is False:
         name = name + ".pkl"
+
     filepath = os.path.join(path, name)
     try:
         res = joblib.load(filepath)
         return res
     except FileNotFoundError:
         return None
     except Exception as err:
-        print("fail to load pkl {}: {}".format(filepath, err))
+        logger.error(f"failed to load pkl {filepath}: {err}")
         return None
 
 
 def load_remote_pkl(url_path):
-    if ".pkl" not in url_path:
-        url_path = url_path + ".pkl"
+    if url_path.endswith(".pkl") is False:
+        url_path += ".pkl"
+
     try:
         response = urlopen(url_path)
         loaded_model = joblib.load(response)
         return loaded_model
-    except:
+    except Exception as e:
+        logger.error(f"failed to load pkl url {url_path}: {e}")
         return None
 
 
@@ -133,8 +142,8 @@ def load_csv(path, name):
         data = pd.read_csv(file_path)
         data = data.apply(pd.to_numeric, errors="ignore")
         return data
-    except:
-        # print('cannot load {}'.format(file_path))
+    except Exception as err:
+        logger.error(f"cannot load {file_path}: {err}")
         return None
 
 
diff --git a/src/kepler_model/util/train_types.py b/src/kepler_model/util/train_types.py
@@ -1,6 +1,6 @@
 ###########################################################
 ## types.py
-## 
+##
 ## defines
 ## - collection of features
 ## - feature groups
@@ -17,7 +17,7 @@
 COUNTER_FEAUTRES = ["cache_miss", "cpu_cycles", "cpu_instructions"]
 BPF_FEATURES = ["bpf_cpu_time_ms", "bpf_page_cache_hit"]
 IRQ_FEATURES = ["bpf_block_irq", "bpf_net_rx_irq", "bpf_net_tx_irq"]
-ACCELERATE_FEATURES = ['accelerator_intel_qat']
+ACCELERATE_FEATURES = ["accelerator_intel_qat"]
 WORKLOAD_FEATURES = COUNTER_FEAUTRES + BPF_FEATURES + IRQ_FEATURES + ACCELERATE_FEATURES
 BASIC_FEATURES = COUNTER_FEAUTRES + BPF_FEATURES
 
@@ -40,8 +40,8 @@
                     "cpu_scaling_frequency_hertz": ["1GHz", "2GHz", "3GHz"],
                     }
 
-no_weight_trainers = ['PolynomialRegressionTrainer', 'GradientBoostingRegressorTrainer', 'KNeighborsRegressorTrainer', 'LinearRegressionTrainer','SVRRegressorTrainer', 'XgboostFitTrainer']
-weight_support_trainers = ['SGDRegressorTrainer', 'LogarithmicRegressionTrainer', 'LogisticRegressionTrainer', 'ExponentialRegressionTrainer']
+no_weight_trainers = ["PolynomialRegressionTrainer", "GradientBoostingRegressorTrainer", "KNeighborsRegressorTrainer", "LinearRegressionTrainer", "SVRRegressorTrainer", "XgboostFitTrainer"]
+weight_support_trainers = ["SGDRegressorTrainer", "LogarithmicRegressionTrainer", "LogisticRegressionTrainer", "ExponentialRegressionTrainer"]
 default_trainer_names = no_weight_trainers + weight_support_trainers
 default_trainers = ",".join(default_trainer_names)
 
@@ -59,24 +59,29 @@ class FeatureGroup(enum.Enum):
     ThirdParty = 10
     Unknown = 99
 
+
 class EnergyComponentLabelGroup(enum.Enum):
     PackageEnergyComponentOnly = 1
     DRAMEnergyComponentOnly = 2
     CoreEnergyComponentOnly = 3
     PackageDRAMEnergyComponents = 4
 
+
 class ModelOutputType(enum.Enum):
     AbsPower = 1
     DynPower = 2
 
-def is_support_output_type(output_type_name):
+
+def is_output_type_supported(output_type_name):
     return any(output_type_name == item.name for item in ModelOutputType)
 
+
 def deep_sort(elements):
     sorted_elements = elements.copy()
     sorted_elements.sort()
     return sorted_elements
 
+
 FeatureGroups = {
     FeatureGroup.Full: deep_sort(WORKLOAD_FEATURES + SYSTEM_FEATURES),
     FeatureGroup.WorkloadOnly: deep_sort(WORKLOAD_FEATURES),
@@ -90,9 +95,11 @@ def deep_sort(elements):
 
 SingleSourceFeatures = [FeatureGroup.CounterOnly.name, FeatureGroup.BPFOnly.name, FeatureGroup.BPFIRQ.name]
 
+
 def is_single_source_feature_group(fg):
     return fg.name in SingleSourceFeatures
 
+
 default_main_feature_map = {
     FeatureGroup.Full: "cpu_instructions",
     FeatureGroup.WorkloadOnly: "cpu_instructions",
@@ -122,15 +129,17 @@ def main_feature(feature_group_name, energy_component):
         feature = default_main_feature_map[feature_group]
     return features.index(feature)
 
+
 # XGBoostRegressionTrainType
 class XGBoostRegressionTrainType(enum.Enum):
     TrainTestSplitFit = 1
     KFoldCrossValidation = 2
 
+
 # XGBoost Model Feature and Label Incompatability Exception
 class XGBoostModelFeatureOrLabelIncompatabilityException(Exception):
-    """Exception raised when a saved model's features and label is incompatable with the training data. 
-    
+    """Exception raised when a saved model's features and label is incompatable with the training data.
+
     ...
 
     Attributes
@@ -139,7 +148,7 @@ class XGBoostModelFeatureOrLabelIncompatabilityException(Exception):
     expected_labels: the expected model labels
     actual_features: the actual model features
     actual_labels: the actual model labels
-    features_incompatible: true if expected_features == actual_features else false 
+    features_incompatible: true if expected_features == actual_features else false
     labels_incompatible: true if expected_labels == actual_labels else false
     """
 
@@ -150,7 +159,6 @@ class XGBoostModelFeatureOrLabelIncompatabilityException(Exception):
     features_incompatible: bool
     labels_incompatible: bool
 
-
     def __init__(self, expected_features: List[str], expected_labels: List[str], received_features: List[str], received_labels: List[str], message="expected features/labels are the not the same as the features/labels of the training data") -> None:
         self.expected_features = expected_features
         self.expected_labels = expected_labels
@@ -188,12 +196,12 @@ def __init__(self, missing_model: bool, missing_model_desc: bool, message="model
     EnergyComponentLabelGroup.PackageEnergyComponentOnly: deep_sort(PACKAGE_ENERGY_COMPONENT_LABEL),
     EnergyComponentLabelGroup.DRAMEnergyComponentOnly: deep_sort(DRAM_ENERGY_COMPONENT_LABEL),
     EnergyComponentLabelGroup.CoreEnergyComponentOnly: deep_sort(CORE_ENERGY_COMPONENT_LABEL),
-    EnergyComponentLabelGroup.PackageDRAMEnergyComponents: deep_sort(PACKAGE_ENERGY_COMPONENT_LABEL + DRAM_ENERGY_COMPONENT_LABEL)
-
+    EnergyComponentLabelGroup.PackageDRAMEnergyComponents: deep_sort(PACKAGE_ENERGY_COMPONENT_LABEL + DRAM_ENERGY_COMPONENT_LABEL),
 }
 
 all_feature_groups = [fg.name for fg in FeatureGroups.keys()]
 
+
 def get_feature_group(features):
     sorted_features = deep_sort(features)
     for g, g_features in FeatureGroups.items():
@@ -202,6 +210,7 @@ def get_feature_group(features):
             return g
     return FeatureGroup.Unknown
 
+
 def get_valid_feature_groups(features):
     valid_fgs = []
     for fg_key, fg_features in FeatureGroups.items():
@@ -214,6 +223,7 @@ def get_valid_feature_groups(features):
             valid_fgs += [fg_key]
     return valid_fgs
 
+
 def is_weight_output(output_type):
     if output_type == ModelOutputType.AbsModelWeight:
         return True
@@ -225,7 +235,8 @@ def is_weight_output(output_type):
         return True
     return False
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     for g, g_features in FeatureGroups.items():
         shuffled_features = g_features.copy()
         random.shuffle(shuffled_features)