From 244bb9f7ab234c40eb6b53b61c462bac8db0a7d8 Mon Sep 17 00:00:00 2001 From: Scott Grosenick Date: Thu, 22 Sep 2022 16:53:52 -0700 Subject: [PATCH 1/3] Adding optional code for demonstrating how to add prediction to an automl training pipeline --- .../02_AutoML_Training_Pipeline.ipynb | 132 +++++++++++++++++- 1 file changed, 126 insertions(+), 6 deletions(-) diff --git a/Automated_ML/02_AutoML_Training_Pipeline/02_AutoML_Training_Pipeline.ipynb b/Automated_ML/02_AutoML_Training_Pipeline/02_AutoML_Training_Pipeline.ipynb index 81683c8..5e36976 100644 --- a/Automated_ML/02_AutoML_Training_Pipeline/02_AutoML_Training_Pipeline.ipynb +++ b/Automated_ML/02_AutoML_Training_Pipeline/02_AutoML_Training_Pipeline.ipynb @@ -111,7 +111,8 @@ "import pandas as pd\n", "\n", "# set up workspace\n", - "ws= Workspace.from_config()\n", + "#ws= Workspace.from_config()\n", + "ws= Workspace(subscription_id='3f08b018-5d36-4171-926f-2384f3676e3d', resource_group='rg-sharingtest-dev', workspace_name='mlw-sharingtest-dev')\n", "\n", "# Take a look at Workspace\n", "ws.get_details()\n", @@ -195,6 +196,9 @@ "filedst_10_models = Dataset.get_by_name(ws, name='oj_data_small_train')\n", "filedst_10_models_input = filedst_10_models.as_named_input('train_10_models')\n", "\n", + "filedst_10_models_inference = Dataset.get_by_name(ws, name='oj_data_small_inference')\n", + "filedst_10_models_inference_input = filedst_10_models_inference.as_named_input('inference_10_models')\n", + "\n", "#filedst_all_models = Dataset.get_by_name(ws, name='oj_data_train')\n", "#filedst_all_models_inputs = filedst_all_models.as_named_input('train_all_models')" ] @@ -233,7 +237,7 @@ "from azureml.core.compute import ComputeTarget\n", "\n", "# Choose a name for your cluster.\n", - "amlcompute_cluster_name = \"cpucluster\"\n", + "amlcompute_cluster_name = \"DEV-CLUSTER-01\" #\"CPU-DEV-CLUSTER-01\"\n", "\n", "found = False\n", "# Check if this compute target already exists in the workspace.\n", @@ -382,8 +386,119 @@ { "cell_type": "markdown", "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.0 Prediction (Optional)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the event that you want to do prediction after training, we need to save off the AutoMLPipelineBuilder output from creating the training steps. The AutoMLPipelineBuilder generates the Python driver files for the training and prediction steps and writes them out to the PROJECT_DIR. If you want to invoke the AutoMLPipelineBuilder multiple times when constructing a single pipeline, you have to stash the generated files between calls. If we do not do this, the AutoMLPipelineBuilder will overwrite the train_steps if we build prediction steps now." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from azureml.contrib.automl.pipeline.steps.automl_pipeline_builder import PROJECT_DIR as automl_dir\n", + "# Move the train pipeline steps into a temp folder\n", + "os.rename(automl_dir, \"temp\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create the prediction steps" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ - "## 4.0 Run the training pipeline" + "from azureml.core.environment import Environment\n", + "from azureml.data import OutputFileDatasetConfig\n", + "\n", + "\n", + "inference_data_output = OutputFileDatasetConfig(name=\"inference_data_output\")\n", + "inf_env = Environment.get(ws, name=\"AzureML-AutoML\")\n", + "\n", + "predict_steps = AutoMLPipelineBuilder.get_many_models_batch_inference_steps(\n", + " experiment=experiment,\n", + " inference_data=filedst_10_models_inference_input,\n", + " output_datastore=inference_data_output,\n", + " compute_target=compute,\n", + " inference_env=inf_env,\n", + " partition_column_names=partition_column_names,\n", + " node_count=2,\n", + " process_count_per_node=8,\n", + " run_invocation_timeout=3700,\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copy the training + prediction step data together" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import shutil\n", + "from distutils.dir_util import copy_tree\n", + "\n", + "copy_tree(\"temp\", automl_dir)\n", + "shutil.rmtree(\"temp\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Wire up the dependencies to ensure that training happens prior to prediction" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Ensure predict_step is run after train\n", + "dependent = predict_steps[0]\n", + "dependency = train_steps[-1]\n", + "dependent.run_after(dependency)\n", + "\n", + "# concatenate prediction steps onto training steps\n", + "train_steps = train_steps + predict_steps" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5.0 Run the pipeline" ] }, { @@ -581,9 +696,9 @@ } ], "kernelspec": { - "display_name": "Python 3.6", + "display_name": "Python 3.8.13 ('aml-many-models-env')", "language": "python", - "name": "python36" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -595,10 +710,15 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.7" + "version": "3.8.13" }, "nteract": { "version": "nteract-front-end@1.0.0" + }, + "vscode": { + "interpreter": { + "hash": "33c00bdcfceaf86e9e75a8ec3169afe182b33553a5784957e67704ef0a1a5ba6" + } } }, "nbformat": 4, From 19bf4ff03a1ab810a548bafe51a8469acf5537db Mon Sep 17 00:00:00 2001 From: Scott Grosenick Date: Thu, 22 Sep 2022 17:00:27 -0700 Subject: [PATCH 2/3] Cleanup prior to PR --- .../02_AutoML_Training_Pipeline.ipynb | 22 +++++-------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/Automated_ML/02_AutoML_Training_Pipeline/02_AutoML_Training_Pipeline.ipynb b/Automated_ML/02_AutoML_Training_Pipeline/02_AutoML_Training_Pipeline.ipynb index 5e36976..2a349cb 100644 --- a/Automated_ML/02_AutoML_Training_Pipeline/02_AutoML_Training_Pipeline.ipynb +++ b/Automated_ML/02_AutoML_Training_Pipeline/02_AutoML_Training_Pipeline.ipynb @@ -111,8 +111,7 @@ "import pandas as pd\n", "\n", "# set up workspace\n", - "#ws= Workspace.from_config()\n", - "ws= Workspace(subscription_id='3f08b018-5d36-4171-926f-2384f3676e3d', resource_group='rg-sharingtest-dev', workspace_name='mlw-sharingtest-dev')\n", + "ws= Workspace.from_config()\n", "\n", "# Take a look at Workspace\n", "ws.get_details()\n", @@ -237,7 +236,7 @@ "from azureml.core.compute import ComputeTarget\n", "\n", "# Choose a name for your cluster.\n", - "amlcompute_cluster_name = \"DEV-CLUSTER-01\" #\"CPU-DEV-CLUSTER-01\"\n", + "amlcompute_cluster_name = \"cpucluster\"\n", "\n", "found = False\n", "# Check if this compute target already exists in the workspace.\n", @@ -696,29 +695,20 @@ } ], "kernelspec": { - "display_name": "Python 3.8.13 ('aml-many-models-env')", + "display_name": "Python 3.6", "language": "python", - "name": "python3" + "name": "python36" }, "language_info": { "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", + @@ -595,10 +710,15 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" + "version": "3.6.7" }, "nteract": { "version": "nteract-front-end@1.0.0" - }, - "vscode": { - "interpreter": { - "hash": "33c00bdcfceaf86e9e75a8ec3169afe182b33553a5784957e67704ef0a1a5ba6" - } } }, "nbformat": 4, From a251aca53ecf4bce6d2ea0ff946393e79aee1ba3 Mon Sep 17 00:00:00 2001 From: Scott Grosenick Date: Thu, 22 Sep 2022 17:02:17 -0700 Subject: [PATCH 3/3] Cleanup prior to PR --- .../02_AutoML_Training_Pipeline.ipynb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Automated_ML/02_AutoML_Training_Pipeline/02_AutoML_Training_Pipeline.ipynb b/Automated_ML/02_AutoML_Training_Pipeline/02_AutoML_Training_Pipeline.ipynb index 2a349cb..c903a30 100644 --- a/Automated_ML/02_AutoML_Training_Pipeline/02_AutoML_Training_Pipeline.ipynb +++ b/Automated_ML/02_AutoML_Training_Pipeline/02_AutoML_Training_Pipeline.ipynb @@ -701,7 +701,11 @@ }, "language_info": { "codemirror_mode": { - @@ -595,10 +710,15 @@ + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3",