microsoft · ScoGroMSFT · Sep 22, 2022 · Sep 23, 2022 · Sep 23, 2022
diff --git a/Automated_ML/02_AutoML_Training_Pipeline/02_AutoML_Training_Pipeline.ipynb b/Automated_ML/02_AutoML_Training_Pipeline/02_AutoML_Training_Pipeline.ipynb
@@ -195,6 +195,9 @@
     "filedst_10_models = Dataset.get_by_name(ws, name='oj_data_small_train')\n",
     "filedst_10_models_input = filedst_10_models.as_named_input('train_10_models')\n",
     "\n",
+    "filedst_10_models_inference = Dataset.get_by_name(ws, name='oj_data_small_inference')\n",
+    "filedst_10_models_inference_input = filedst_10_models_inference.as_named_input('inference_10_models')\n",
+    "\n",
     "#filedst_all_models = Dataset.get_by_name(ws, name='oj_data_train')\n",
     "#filedst_all_models_inputs = filedst_all_models.as_named_input('train_all_models')"
    ]
@@ -379,11 +382,122 @@
     "                                                                output_datastore=dstore)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4.0 Prediction (Optional)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In the event that you want to do prediction after training, we need to save off the AutoMLPipelineBuilder output from creating the training steps. The AutoMLPipelineBuilder generates the Python driver files for the training and prediction steps and writes them out to the PROJECT_DIR. If you want to invoke the AutoMLPipelineBuilder multiple times when constructing a single pipeline, you have to stash the generated files between calls. If we do not do this, the AutoMLPipelineBuilder will overwrite the train_steps if we build prediction steps now."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from azureml.contrib.automl.pipeline.steps.automl_pipeline_builder import PROJECT_DIR as automl_dir\n",
+    "# Move the train pipeline steps into a temp folder\n",
+    "os.rename(automl_dir, \"temp\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Create the prediction steps"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azureml.core.environment import Environment\n",
+    "from azureml.data import OutputFileDatasetConfig\n",
+    "\n",
+    "\n",
+    "inference_data_output = OutputFileDatasetConfig(name=\"inference_data_output\")\n",
+    "inf_env = Environment.get(ws, name=\"AzureML-AutoML\")\n",
+    "\n",
+    "predict_steps = AutoMLPipelineBuilder.get_many_models_batch_inference_steps(\n",
+    "        experiment=experiment,\n",
+    "        inference_data=filedst_10_models_inference_input,\n",
+    "        output_datastore=inference_data_output,\n",
+    "        compute_target=compute,\n",
+    "        inference_env=inf_env,\n",
+    "        partition_column_names=partition_column_names,\n",
+    "        node_count=2,\n",
+    "        process_count_per_node=8,\n",
+    "        run_invocation_timeout=3700,\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Copy the training + prediction step data together"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import shutil\n",
+    "from distutils.dir_util import copy_tree\n",
+    "\n",
+    "copy_tree(\"temp\", automl_dir)\n",
+    "shutil.rmtree(\"temp\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Wire up the dependencies to ensure that training happens prior to prediction"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Ensure predict_step is run after train\n",
+    "dependent = predict_steps[0]\n",
+    "dependency = train_steps[-1]\n",
+    "dependent.run_after(dependency)\n",
+    "\n",
+    "# concatenate prediction steps onto training steps\n",
+    "train_steps = train_steps + predict_steps"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": []
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 4.0 Run the training pipeline"
+    "## 5.0 Run the pipeline"
    ]
   },
   {