Skip to content

Commit 138713b

Browse files
author
raoberman
authored
Update tutorial-azure-ml-in-a-day.md
1 parent b88a312 commit 138713b

File tree

1 file changed

+121
-0
lines changed

1 file changed

+121
-0
lines changed

articles/machine-learning/tutorial-azure-ml-in-a-day.md

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,127 @@ You might need to select **Refresh** to see the new folder and script in your **
215215

216216
:::image type="content" source="media/tutorial-azure-ml-in-a-day/refresh.png" alt-text="Screenshot shows the refresh icon.":::
217217

218+
### [Optional] Enable Intel® Extension for Scikit-Learn optimizations for more performance on Intel hardware
219+
220+
Want to speed up your scikit-learn scripts on Intel hardware? Try enabling [Intel® Extension for Scikit-Learn](https://www.intel.com/content/www/us/en/developer/tools/oneapi/scikit-learn.html) in your training script. Intel® Extension for Scikit-Learn is already installed in the Azure Machine Learning curated environment used in this tutorial, so no additional installation is needed.
221+
222+
To learn more about Intel® Extension for Scikit-Learn, visit the package's [documentation](https://intel.github.io/scikit-learn-intelex/).
223+
224+
If you want to use Intel® Extension for Scikit-Learn as part of the training script described above, you can enable the performance optimizations by adding the two lines of code to the top of the script file, as shown below.
225+
226+
227+
```python
228+
%%writefile {train_src_dir}/main.py
229+
import os
230+
import argparse
231+
232+
# Import and enable Intel Extension for Scikit-learn optimizations
233+
# where possible
234+
from sklearnex import patch_sklearn
235+
patch_sklearn()
236+
237+
import pandas as pd
238+
import mlflow
239+
import mlflow.sklearn
240+
from sklearn.ensemble import GradientBoostingClassifier
241+
from sklearn.metrics import classification_report
242+
from sklearn.model_selection import train_test_split
243+
244+
def main():
245+
"""Main function of the script."""
246+
247+
# input and output arguments
248+
parser = argparse.ArgumentParser()
249+
parser.add_argument("--data", type=str, help="path to input data")
250+
parser.add_argument("--test_train_ratio", type=float, required=False, default=0.25)
251+
parser.add_argument("--n_estimators", required=False, default=100, type=int)
252+
parser.add_argument("--learning_rate", required=False, default=0.1, type=float)
253+
parser.add_argument("--registered_model_name", type=str, help="model name")
254+
args = parser.parse_args()
255+
256+
# Start Logging
257+
mlflow.start_run()
258+
259+
# enable autologging
260+
mlflow.sklearn.autolog()
261+
262+
###################
263+
#<prepare the data>
264+
###################
265+
print(" ".join(f"{k}={v}" for k, v in vars(args).items()))
266+
267+
print("input data:", args.data)
268+
269+
credit_df = pd.read_csv(args.data, header=1, index_col=0)
270+
271+
mlflow.log_metric("num_samples", credit_df.shape[0])
272+
mlflow.log_metric("num_features", credit_df.shape[1] - 1)
273+
274+
train_df, test_df = train_test_split(
275+
credit_df,
276+
test_size=args.test_train_ratio,
277+
)
278+
####################
279+
#</prepare the data>
280+
####################
281+
282+
##################
283+
#<train the model>
284+
##################
285+
# Extracting the label column
286+
y_train = train_df.pop("default payment next month")
287+
288+
# convert the dataframe values to array
289+
X_train = train_df.values
290+
291+
# Extracting the label column
292+
y_test = test_df.pop("default payment next month")
293+
294+
# convert the dataframe values to array
295+
X_test = test_df.values
296+
297+
print(f"Training with data of shape {X_train.shape}")
298+
299+
clf = GradientBoostingClassifier(
300+
n_estimators=args.n_estimators, learning_rate=args.learning_rate
301+
)
302+
clf.fit(X_train, y_train)
303+
304+
y_pred = clf.predict(X_test)
305+
306+
print(classification_report(y_test, y_pred))
307+
###################
308+
#</train the model>
309+
###################
310+
311+
##########################
312+
#<save and register model>
313+
##########################
314+
# Registering the model to the workspace
315+
print("Registering the model via MLFlow")
316+
mlflow.sklearn.log_model(
317+
sk_model=clf,
318+
registered_model_name=args.registered_model_name,
319+
artifact_path=args.registered_model_name,
320+
)
321+
322+
# Saving the model to a file
323+
mlflow.sklearn.save_model(
324+
sk_model=clf,
325+
path=os.path.join(args.registered_model_name, "trained_model"),
326+
)
327+
###########################
328+
#</save and register model>
329+
###########################
330+
331+
# Stop Logging
332+
mlflow.end_run()
333+
334+
if __name__ == "__main__":
335+
main()
336+
```
337+
338+
218339
## Create a compute cluster, a scalable way to run a training job
219340

220341
> [!NOTE]

0 commit comments

Comments
 (0)