feat: added aml pipeline steps script

praneet22 · praneet22 · commit cfd3e2eaa5bb · 2019-03-01T17:54:48.000-05:00
diff --git a/code/evaluate/evaluate_model.py b/code/evaluate/evaluate_model.py
@@ -0,0 +1,126 @@
+"""
+Copyright (C) Microsoft Corporation. All rights reserved.​
+ ​
+Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual,
+royalty-free right to use, copy, and modify the software code provided by us
+("Software Code"). You may not sublicense the Software Code or any use of it
+(except to your affiliates and to vendors to perform work on your behalf)
+through distribution, network access, service agreement, lease, rental, or
+otherwise. This license does not purport to express any claim of ownership over
+data you may have shared with Microsoft in the creation of the Software Code.
+Unless applicable law gives you more rights, Microsoft reserves all other
+rights not expressly granted herein, whether by implication, estoppel or
+otherwise. ​
+ ​
+THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+"""
+import os, json
+from azureml.core import Workspace
+from azureml.core import Experiment
+from azureml.core.model import Model
+import azureml.core
+from azureml.core import Run
+import argparse
+
+
+# Get workspace
+# ws = Workspace.from_config()
+run = Run.get_context()
+exp = run.experiment
+ws = run.experiment.workspace
+
+
+parser = argparse.ArgumentParser("evaluate")
+parser.add_argument(
+    "--config_suffix", type=str, help="Datetime suffix for json config files"
+)
+parser.add_argument(
+    "--json_config",
+    type=str,
+    help="Directory to write all the intermediate json configs",
+)
+args = parser.parse_args()
+
+print("Argument 1: %s" % args.config_suffix)
+print("Argument 2: %s" % args.json_config)
+
+if not (args.json_config is None):
+    os.makedirs(args.json_config, exist_ok=True)
+    print("%s created" % args.json_config)
+# Paramaterize the matrics on which the models should be compared
+# Add golden data set on which all the model performance can be evaluated
+
+# Get the latest run_id
+# with open("aml_config/run_id.json") as f:
+#     config = json.load(f)
+
+train_run_id_json = "run_id_{}.json".format(args.config_suffix)
+train_output_path = os.path.join(args.json_config, train_run_id_json)
+with open(train_output_path) as f:
+    config = json.load(f)
+
+# parser = argparse.ArgumentParser()
+# parser.add_argument('--train_run_id',type=str,default='',help='Run id of the newly trained model')
+# #parser.add_argument('--model_assets_path',type=str,default='outputs',help='Location of trained model.')
+
+
+new_model_run_id = config["run_id"]  # args.train_run_id
+experiment_name = config["experiment_name"]
+# exp = Experiment(workspace=ws, name=experiment_name)
+
+
+try:
+    # Get most recently registered model, we assume that is the model in production. Download this model and compare it with the recently trained model by running test with same data set.
+    model_list = Model.list(ws)
+    production_model = next(
+        filter(
+            lambda x: x.created_time == max(model.created_time for model in model_list),
+            model_list,
+        )
+    )
+    production_model_run_id = production_model.tags.get("run_id")
+    run_list = exp.get_runs()
+    # production_model_run = next(filter(lambda x: x.id == production_model_run_id, run_list))
+
+    # Get the run history for both production model and newly trained model and compare mse
+    production_model_run = Run(exp, run_id=production_model_run_id)
+    new_model_run = Run(exp, run_id=new_model_run_id)
+
+    production_model_mse = production_model_run.get_metrics().get("mse")
+    new_model_mse = new_model_run.get_metrics().get("mse")
+    print(
+        "Current Production model mse: {}, New trained model mse: {}".format(
+            production_model_mse, new_model_mse
+        )
+    )
+
+    promote_new_model = False
+    if new_model_mse < production_model_mse:
+        promote_new_model = True
+        print("New trained model performs better, thus it will be registered")
+except:
+    promote_new_model = True
+    print("This is the first model to be trained, thus nothing to evaluate for now")
+
+run_id = {}
+run_id["run_id"] = ""
+# Writing the run id to /aml_config/run_id.json
+if promote_new_model:
+    run_id["run_id"] = new_model_run_id
+    # register new model
+    # new_model_run.register_model(model_name='',model_path='outputs/sklearn_regression_model.pkl')
+
+run_id["experiment_name"] = experiment_name
+filename = "run_id_{}.json".format(args.config_suffix)
+output_path = os.path.join(args.json_config, filename)
+with open(output_path, "w") as outfile:
+    json.dump(run_id, outfile)
diff --git a/code/register/register_model.py b/code/register/register_model.py
@@ -0,0 +1,119 @@
+"""
+Copyright (C) Microsoft Corporation. All rights reserved.​
+ ​
+Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual,
+royalty-free right to use, copy, and modify the software code provided by us
+("Software Code"). You may not sublicense the Software Code or any use of it
+(except to your affiliates and to vendors to perform work on your behalf)
+through distribution, network access, service agreement, lease, rental, or
+otherwise. This license does not purport to express any claim of ownership over
+data you may have shared with Microsoft in the creation of the Software Code.
+Unless applicable law gives you more rights, Microsoft reserves all other
+rights not expressly granted herein, whether by implication, estoppel or
+otherwise. ​
+ ​
+THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+"""
+import os, json, sys
+from azureml.core import Workspace
+from azureml.core import Run
+from azureml.core import Experiment
+from azureml.core.model import Model
+import argparse
+
+from azureml.core.runconfig import RunConfiguration
+from azureml.core.authentication import AzureCliAuthentication
+
+cli_auth = AzureCliAuthentication()
+
+# Get workspace
+# ws = Workspace.from_config(auth=cli_auth)
+run = Run.get_context()
+exp = run.experiment
+ws = run.experiment.workspace
+
+parser = argparse.ArgumentParser("register")
+parser.add_argument(
+    "--config_suffix", type=str, help="Datetime suffix for json config files"
+)
+parser.add_argument(
+    "--json_config",
+    type=str,
+    help="Directory to write all the intermediate json configs",
+)
+args = parser.parse_args()
+
+print("Argument 1: %s" % args.config_suffix)
+print("Argument 2: %s" % args.json_config)
+
+if not (args.json_config is None):
+    os.makedirs(args.json_config, exist_ok=True)
+    print("%s created" % args.json_config)
+
+evaluate_run_id_json = "run_id_{}.json".format(args.config_suffix)
+evaluate_output_path = os.path.join(args.json_config, evaluate_run_id_json)
+
+# Get the latest evaluation result
+try:
+    with open(evaluate_output_path) as f:
+        config = json.load(f)
+    if not config["run_id"]:
+        raise Exception("No new model to register as production model perform better")
+except:
+    print("No new model to register as production model perform better")
+    # raise Exception('No new model to register as production model perform better')
+    sys.exit(0)
+
+run_id = config["run_id"]
+experiment_name = config["experiment_name"]
+# exp = Experiment(workspace=ws, name=experiment_name)
+
+run = Run(experiment=exp, run_id=run_id)
+names = run.get_file_names
+names()
+print("Run ID for last run: {}".format(run_id))
+model_local_dir = "model"
+os.makedirs(model_local_dir, exist_ok=True)
+
+# Download Model to Project root directory
+model_name = "sklearn_regression_model.pkl"
+run.download_file(
+    name="./outputs/" + model_name, output_file_path="./model/" + model_name
+)
+print("Downloaded model {} to Project root directory".format(model_name))
+os.chdir("./model")
+model = Model.register(
+    model_path=model_name,  # this points to a local file
+    model_name=model_name,  # this is the name the model is registered as
+    tags={"area": "diabetes", "type": "regression", "run_id": run_id},
+    description="Regression model for diabetes dataset",
+    workspace=ws,
+)
+os.chdir("..")
+print(
+    "Model registered: {} \nModel Description: {} \nModel Version: {}".format(
+        model.name, model.description, model.version
+    )
+)
+
+# Remove the evaluate.json as we no longer need it
+# os.remove("aml_config/evaluate.json")
+
+# Writing the registered model details to /aml_config/model.json
+model_json = {}
+model_json["model_name"] = model.name
+model_json["model_version"] = model.version
+model_json["run_id"] = run_id
+filename = "model_{}.json".format(args.config_suffix)
+output_path = os.path.join(args.json_config, filename)
+with open(output_path, "w") as outfile:
+    json.dump(model_json, outfile)
diff --git a/code/scoring/create_scoring_image.py b/code/scoring/create_scoring_image.py
@@ -0,0 +1,124 @@
+"""
+Copyright (C) Microsoft Corporation. All rights reserved.​
+ ​
+Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual,
+royalty-free right to use, copy, and modify the software code provided by us
+("Software Code"). You may not sublicense the Software Code or any use of it
+(except to your affiliates and to vendors to perform work on your behalf)
+through distribution, network access, service agreement, lease, rental, or
+otherwise. This license does not purport to express any claim of ownership over
+data you may have shared with Microsoft in the creation of the Software Code.
+Unless applicable law gives you more rights, Microsoft reserves all other
+rights not expressly granted herein, whether by implication, estoppel or
+otherwise. ​
+ ​
+THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+"""
+import os, json, sys
+import argparse
+from azureml.core import Workspace
+from azureml.core.image import ContainerImage, Image
+from azureml.core import Run
+from azureml.core.model import Model
+from azureml.core.authentication import AzureCliAuthentication
+
+cli_auth = AzureCliAuthentication()
+
+run = Run.get_context()
+if "OfflineRun" in run.id:
+    print("offline run")
+    # Get workspace
+    ws = Workspace.from_config(auth=cli_auth)
+else:
+    exp = run.experiment
+    ws = run.experiment.workspace
+
+# Get the latest model details
+
+parser = argparse.ArgumentParser("scoring_image")
+parser.add_argument(
+    "--config_suffix", type=str, help="Datetime suffix for json config files"
+)
+parser.add_argument(
+    "--json_config",
+    type=str,
+    help="Directory to write all the intermediate json configs",
+)
+args = parser.parse_args()
+
+register_model_json = "model_{}.json".format(args.config_suffix)
+register_output_path = os.path.join(args.json_config, register_model_json)
+
+
+try:
+    with open(register_output_path) as f:
+        config = json.load(f)
+except:
+    print("No new model to register thus no need to create new scoring image")
+    # raise Exception('No new model to register as production model perform better')
+    sys.exit(0)
+
+model_name = config["model_name"]
+model_version = config["model_version"]
+
+model_list = Model.list(workspace=ws)
+model, = (m for m in model_list if m.version == model_version and m.name == model_name)
+print(
+    "Model picked: {} \nModel Description: {} \nModel Version: {}".format(
+        model.name, model.description, model.version
+    )
+)
+
+os.chdir("./code/scoring")
+image_name = "diabetes-model-score"
+
+image_config = ContainerImage.image_configuration(
+    execution_script="score.py",
+    runtime="python-slim",
+    conda_file="conda_dependencies.yml",
+    description="Image with ridge regression model",
+    tags={"area": "diabetes", "type": "regression"},
+)
+
+image = Image.create(
+    name=image_name, models=[model], image_config=image_config, workspace=ws
+)
+
+image.wait_for_creation(show_output=True)
+os.chdir("../..")
+
+if image.creation_state != "Succeeded":
+    raise Exception("Image creation status: {image.creation_state}")
+
+print(
+    "{}(v.{} [{}]) stored at {} with build log {}".format(
+        image.name,
+        image.version,
+        image.creation_state,
+        image.image_location,
+        image.image_build_log_uri,
+    )
+)
+
+# Writing the image details to /aml_config/image.json
+image_json = {}
+image_json["image_name"] = image.name
+image_json["image_version"] = image.version
+image_json["image_location"] = image.image_location
+# with open("aml_config/image.json", "w") as outfile:
+#     json.dump(image_json, outfile)
+filename = "image_{}.json".format(args.config_suffix)
+output_path = os.path.join(args.json_config, filename)
+with open(output_path, "w") as outfile:
+    json.dump(image_json, outfile)
+
+# How to fix the schema for a model, like if we have multiple models expecting different schema,
diff --git a/code/training/train.py b/code/training/train.py