Generalize model tag finder, tag model experiment_name (#103)

dtzar · eedorenko · commit 6c56eeee1470 · 2019-11-22T10:34:23.000-08:00
diff --git a/code/evaluate/evaluate_model.py b/code/evaluate/evaluate_model.py
@@ -24,14 +24,17 @@
 POSSIBILITY OF SUCH DAMAGE.
 """
 import os
-from azureml.core import Model, Run, Workspace, Experiment
+import sys
+from azureml.core import Run, Workspace, Experiment
 import argparse
 from azureml.core.authentication import ServicePrincipalAuthentication
 import traceback
 
 run = Run.get_context()
 if (run.id.startswith('OfflineRun')):
     from dotenv import load_dotenv
+    sys.path.append(os.path.abspath("./code/util"))  # NOQA: E402
+    from model_helper import get_model_by_tag
     # For local development, set values in this section
     load_dotenv()
     workspace_name = os.environ.get("WORKSPACE_NAME")
@@ -56,8 +59,10 @@
     )
     ws = aml_workspace
     exp = Experiment(ws, experiment_name)
-    run_id = "e78b2c27-5ceb-49d9-8e84-abe7aecf37d5"
+    run_id = "57fee47f-5ae8-441c-bc0c-d4c371f32d70"
 else:
+    sys.path.append(os.path.abspath("./util"))  # NOQA: E402
+    from model_helper import get_model_by_tag
     exp = run.experiment
     ws = run.experiment.workspace
     run_id = 'amlcompute'
@@ -94,16 +99,15 @@
 # Paramaterize the matrices on which the models should be compared
 # Add golden data set on which all the model performance can be evaluated
 try:
-    model_list = Model.list(ws)
-    if (len(model_list) > 0):
-        production_model = next(
-            filter(
-                lambda x: x.created_time == max(
-                    model.created_time for model in model_list),
-                model_list,
-            )
-        )
-        production_model_run_id = production_model.run_id
+    firstRegistration = False
+    tag_name = 'experiment_name'
+
+    model = get_model_by_tag(
+        model_name, tag_name, exp.name, ws)
+
+    if (model is not None):
+
+        production_model_run_id = model.run_id
 
         # Get the run history for both production model and
         # newly trained model and compare mse
@@ -136,6 +140,7 @@
     else:
         print("This is the first model, "
               "thus it should be registered")
+
 except Exception:
     traceback.print_exc(limit=None, file=None, chain=True)
     print("Something went wrong trying to evaluate. Exiting.")
diff --git a/code/register/register_model.py b/code/register/register_model.py
@@ -38,7 +38,7 @@ def main():
     if (run.id.startswith('OfflineRun')):
         from dotenv import load_dotenv
         sys.path.append(os.path.abspath("./code/util"))  # NOQA: E402
-        from model_helper import get_model_by_build_id
+        from model_helper import get_model_by_tag
         # For local development, set values in this section
         load_dotenv()
         workspace_name = os.environ.get("WORKSPACE_NAME")
@@ -66,7 +66,7 @@ def main():
         run_id = "bd184a18-2ac8-4951-8e78-e290bef3b012"
     else:
         sys.path.append(os.path.abspath("./util"))  # NOQA: E402
-        from model_helper import get_model_by_build_id
+        from model_helper import get_model_by_tag
         ws = run.experiment.workspace
         exp = run.experiment
         run_id = 'amlcompute'
@@ -108,7 +108,8 @@ def main():
 
     if (validate):
         try:
-            get_model_by_build_id(model_name, build_id, exp.workspace)
+            tag_name = 'BuildId'
+            get_model_by_tag(model_name, tag_name, build_id, exp.workspace)
             print("Model was registered for this build.")
         except Exception as e:
             print(e)
@@ -139,12 +140,13 @@ def register_aml_model(model_name, exp, run_id, build_id: str = 'none'):
             model_already_registered(model_name, exp, run_id)
             run = Run(experiment=exp, run_id=run_id)
             tagsValue = {"area": "diabetes", "type": "regression",
-                         "BuildId": build_id, "run_id": run_id}
+                         "BuildId": build_id, "run_id": run_id,
+                         "experiment_name": exp.name}
         else:
             run = Run(experiment=exp, run_id=run_id)
             if (run is not None):
-                tagsValue = {"area": "diabetes",
-                             "type": "regression", "run_id": run_id}
+                tagsValue = {"area": "diabetes", "type": "regression",
+                             "run_id": run_id, "experiment_name": exp.name}
             else:
                 print("A model run for experiment", exp.name,
                       "matching properties run_id =", run_id,
diff --git a/code/util/model_helper.py b/code/util/model_helper.py
@@ -22,9 +22,10 @@ def get_current_workspace() -> Workspace:
     return experiment.workspace
 
 
-def _get_model_by_build_id(
+def get_model_by_tag(
     model_name: str,
-    build_id: str,
+    tag_name: str,
+    tag_value: str,
     aml_workspace: Workspace = None
 ) -> AMLModel:
     """
@@ -34,52 +35,46 @@ def _get_model_by_build_id(
     Parameters:
     aml_workspace (Workspace): aml.core Workspace that the model lives.
     model_name (str): name of the model we are looking for
-    build_id (str): the build id the model was registered under.
+    tag (str): the tag value the model was registered under.
 
     Return:
     A single aml model from the workspace that matches the name and tag.
     """
-    # Validate params. cannot be None.
-    if model_name is None:
-        raise ValueError("model_name[:str] is required")
-    if build_id is None:
-        raise ValueError("build_id[:str] is required")
-    if aml_workspace is None:
-        aml_workspace = get_current_workspace()
+    try:
+        # Validate params. cannot be None.
+        if model_name is None:
+            raise ValueError("model_name[:str] is required")
+        if tag_name is None:
+            raise ValueError("tag_name[:str] is required")
+        if tag_value is None:
+            raise ValueError("tag[:str] is required")
+        if aml_workspace is None:
+            aml_workspace = get_current_workspace()
 
-    # get model by tag.
-    model_list = AMLModel.list(
-        aml_workspace, name=model_name,
-        tags=[["BuildId", build_id]], latest=True
-    )
+        # get model by tag.
+        model_list = AMLModel.list(
+            aml_workspace, name=model_name,
+            tags=[[tag_name, tag_value]], latest=True
+        )
 
-    # latest should only return 1 model, but if it does, then maybe
-    # internal code was accidentally changed or the source code has changed.
-    should_not_happen = ("THIS SHOULD NOT HAPPEN: found more than one model "
-                         "for the latest with {{model_name: {model_name},"
-                         "BuildId: {build_id}. Models found: {model_list}}}")\
-        .format(model_name=model_name, build_id=build_id,
-                model_list=model_list)
-    if len(model_list) > 1:
-        raise ValueError(should_not_happen)
+        # latest should only return 1 model, but if it does,
+        # then maybe sdk or source code changed.
+        should_not_happen = ("Found more than one model "
+                             "for the latest with {{tag_name: {tag_name},"
+                             "tag_value: {tag_value}. "
+                             "Models found: {model_list}}}")\
+            .format(tag_name=tag_name, tag_value=tag_value,
+                    model_list=model_list)
+        no_model_found = ("No Model found with {{tag_name: {tag_name} ,"
+                          "tag_value: {tag_value}.}}")\
+            .format(tag_name=tag_name, tag_value=tag_value)
 
-    return model_list
-
-
-def get_model_by_build_id(
-    model_name: str,
-    build_id: str,
-    aml_workspace: Workspace = None
-) -> AMLModel:
-    """
-    Wrapper function for get_model_by_id that throws an error if model is none
-    """
-    model_list = _get_model_by_build_id(model_name, build_id, aml_workspace)
-
-    if model_list:
-        return model_list[0]
-
-    no_model_found = ("Model not found with model_name: {model_name} "
-                      "BuildId: {build_id}.")\
-        .format(model_name=model_name, build_id=build_id)
-    raise Exception(no_model_found)
+        if len(model_list) > 1:
+            raise ValueError(should_not_happen)
+        if len(model_list) == 1:
+            return model_list[0]
+        else:
+            print(no_model_found)
+            return None
+    except Exception:
+        raise
diff --git a/ml_service/pipelines/run_train_pipeline.py b/ml_service/pipelines/run_train_pipeline.py
@@ -10,9 +10,9 @@
 def main():
     e = Env()
     service_principal = ServicePrincipalAuthentication(
-            tenant_id=e.tenant_id,
-            service_principal_id=e.app_id,
-            service_principal_password=e.app_secret)
+        tenant_id=e.tenant_id,
+        service_principal_id=e.app_id,
+        service_principal_password=e.app_secret)
 
     aml_workspace = Workspace.get(
         name=e.workspace_name,
diff --git a/ml_service/util/env_variables.py b/ml_service/util/env_variables.py
@@ -7,7 +7,7 @@ class Singleton(object):
 
     def __new__(class_, *args, **kwargs):
         if class_ not in class_._instances:
-            class_._instances[class_] = super(Singleton, class_).__new__(class_, *args, **kwargs) # noqa E501
+            class_._instances[class_] = super(Singleton, class_).__new__(class_, *args, **kwargs)  # noqa E501
         return class_._instances[class_]
 
 
@@ -23,7 +23,7 @@ def __init__(self):
         self._app_secret = os.environ.get("SP_APP_SECRET")
         self._vm_size = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU")
         self._compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME")
-        self._vm_priority = os.environ.get("AML_CLUSTER_PRIORITY", 'lowpriority') # noqa E501
+        self._vm_priority = os.environ.get("AML_CLUSTER_PRIORITY", 'lowpriority')  # noqa E501
         self._min_nodes = int(os.environ.get("AML_CLUSTER_MIN_NODES", 0))
         self._max_nodes = int(os.environ.get("AML_CLUSTER_MAX_NODES", 4))
         self._build_id = os.environ.get("BUILD_BUILDID")