Update tutorial-azure-ml-in-a-day.md

raoberman · web-flow · commit 138713bcd98d · 2023-06-05T14:18:05.000-04:00
diff --git a/articles/machine-learning/tutorial-azure-ml-in-a-day.md b/articles/machine-learning/tutorial-azure-ml-in-a-day.md
@@ -215,6 +215,127 @@ You might need to select **Refresh** to see the new folder and script in your **
 
 :::image type="content" source="media/tutorial-azure-ml-in-a-day/refresh.png" alt-text="Screenshot shows the refresh icon.":::
 
+### [Optional] Enable Intel® Extension for Scikit-Learn optimizations for more performance on Intel hardware
+
+Want to speed up your scikit-learn scripts on Intel hardware? Try enabling [Intel® Extension for Scikit-Learn](https://www.intel.com/content/www/us/en/developer/tools/oneapi/scikit-learn.html) in your training script. Intel® Extension for Scikit-Learn is already installed in the Azure Machine Learning curated environment used in this tutorial, so no additional installation is needed.
+
+To learn more about Intel® Extension for Scikit-Learn, visit the package's [documentation](https://intel.github.io/scikit-learn-intelex/).
+
+If you want to use Intel® Extension for Scikit-Learn as part of the training script described above, you can enable the performance optimizations by adding the two lines of code to the top of the script file, as shown below.
+
+
+```python
+%%writefile {train_src_dir}/main.py
+import os
+import argparse
+
+# Import and enable Intel Extension for Scikit-learn optimizations
+# where possible
+from sklearnex import patch_sklearn
+patch_sklearn()
+
+import pandas as pd
+import mlflow
+import mlflow.sklearn
+from sklearn.ensemble import GradientBoostingClassifier
+from sklearn.metrics import classification_report
+from sklearn.model_selection import train_test_split
+
+def main():
+    """Main function of the script."""
+
+    # input and output arguments
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--data", type=str, help="path to input data")
+    parser.add_argument("--test_train_ratio", type=float, required=False, default=0.25)
+    parser.add_argument("--n_estimators", required=False, default=100, type=int)
+    parser.add_argument("--learning_rate", required=False, default=0.1, type=float)
+    parser.add_argument("--registered_model_name", type=str, help="model name")
+    args = parser.parse_args()
+   
+    # Start Logging
+    mlflow.start_run()
+
+    # enable autologging
+    mlflow.sklearn.autolog()
+
+    ###################
+    #<prepare the data>
+    ###################
+    print(" ".join(f"{k}={v}" for k, v in vars(args).items()))
+
+    print("input data:", args.data)
+    
+    credit_df = pd.read_csv(args.data, header=1, index_col=0)
+
+    mlflow.log_metric("num_samples", credit_df.shape[0])
+    mlflow.log_metric("num_features", credit_df.shape[1] - 1)
+
+    train_df, test_df = train_test_split(
+        credit_df,
+        test_size=args.test_train_ratio,
+    )
+    ####################
+    #</prepare the data>
+    ####################
+
+    ##################
+    #<train the model>
+    ##################
+    # Extracting the label column
+    y_train = train_df.pop("default payment next month")
+
+    # convert the dataframe values to array
+    X_train = train_df.values
+
+    # Extracting the label column
+    y_test = test_df.pop("default payment next month")
+
+    # convert the dataframe values to array
+    X_test = test_df.values
+
+    print(f"Training with data of shape {X_train.shape}")
+
+    clf = GradientBoostingClassifier(
+        n_estimators=args.n_estimators, learning_rate=args.learning_rate
+    )
+    clf.fit(X_train, y_train)
+
+    y_pred = clf.predict(X_test)
+
+    print(classification_report(y_test, y_pred))
+    ###################
+    #</train the model>
+    ###################
+
+    ##########################
+    #<save and register model>
+    ##########################
+    # Registering the model to the workspace
+    print("Registering the model via MLFlow")
+    mlflow.sklearn.log_model(
+        sk_model=clf,
+        registered_model_name=args.registered_model_name,
+        artifact_path=args.registered_model_name,
+    )
+
+    # Saving the model to a file
+    mlflow.sklearn.save_model(
+        sk_model=clf,
+        path=os.path.join(args.registered_model_name, "trained_model"),
+    )
+    ###########################
+    #</save and register model>
+    ###########################
+    
+    # Stop Logging
+    mlflow.end_run()
+
+if __name__ == "__main__":
+    main()
+```
+
+
 ## Create a compute cluster, a scalable way to run a training job
 
 > [!NOTE]