Skip to content

Commit 8952976

Browse files
Merge pull request #253749 from sdgilley/patch-5
Remove extension section
2 parents 463b6fd + c5de8d7 commit 8952976

File tree

1 file changed

+0
-121
lines changed

1 file changed

+0
-121
lines changed

articles/machine-learning/tutorial-azure-ml-in-a-day.md

Lines changed: 0 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -219,127 +219,6 @@ You might need to select **Refresh** to see the new folder and script in your **
219219

220220
:::image type="content" source="media/tutorial-azure-ml-in-a-day/refresh.png" alt-text="Screenshot shows the refresh icon.":::
221221

222-
### [Optional] Enable Intel® Extension for Scikit-Learn optimizations for more performance on Intel hardware
223-
224-
Want to speed up your scikit-learn scripts on Intel hardware? Try enabling [Intel® Extension for Scikit-Learn](https://www.intel.com/content/www/us/en/developer/tools/oneapi/scikit-learn.html) in your training script. Intel® Extension for Scikit-Learn is already installed in the Azure Machine Learning curated environment used in this tutorial, so no additional installation is needed.
225-
226-
To learn more about Intel® Extension for Scikit-Learn, visit the package's [documentation](https://intel.github.io/scikit-learn-intelex/).
227-
228-
If you want to use Intel® Extension for Scikit-Learn as part of the training script described above, you can enable the performance optimizations by adding the two lines of code to the top of the script file, as shown below.
229-
230-
231-
```python
232-
%%writefile {train_src_dir}/main.py
233-
import os
234-
import argparse
235-
236-
# Import and enable Intel Extension for Scikit-learn optimizations
237-
# where possible
238-
from sklearnex import patch_sklearn
239-
patch_sklearn()
240-
241-
import pandas as pd
242-
import mlflow
243-
import mlflow.sklearn
244-
from sklearn.ensemble import GradientBoostingClassifier
245-
from sklearn.metrics import classification_report
246-
from sklearn.model_selection import train_test_split
247-
248-
def main():
249-
"""Main function of the script."""
250-
251-
# input and output arguments
252-
parser = argparse.ArgumentParser()
253-
parser.add_argument("--data", type=str, help="path to input data")
254-
parser.add_argument("--test_train_ratio", type=float, required=False, default=0.25)
255-
parser.add_argument("--n_estimators", required=False, default=100, type=int)
256-
parser.add_argument("--learning_rate", required=False, default=0.1, type=float)
257-
parser.add_argument("--registered_model_name", type=str, help="model name")
258-
args = parser.parse_args()
259-
260-
# Start Logging
261-
mlflow.start_run()
262-
263-
# enable autologging
264-
mlflow.sklearn.autolog()
265-
266-
###################
267-
#<prepare the data>
268-
###################
269-
print(" ".join(f"{k}={v}" for k, v in vars(args).items()))
270-
271-
print("input data:", args.data)
272-
273-
credit_df = pd.read_csv(args.data, header=1, index_col=0)
274-
275-
mlflow.log_metric("num_samples", credit_df.shape[0])
276-
mlflow.log_metric("num_features", credit_df.shape[1] - 1)
277-
278-
train_df, test_df = train_test_split(
279-
credit_df,
280-
test_size=args.test_train_ratio,
281-
)
282-
####################
283-
#</prepare the data>
284-
####################
285-
286-
##################
287-
#<train the model>
288-
##################
289-
# Extracting the label column
290-
y_train = train_df.pop("default payment next month")
291-
292-
# convert the dataframe values to array
293-
X_train = train_df.values
294-
295-
# Extracting the label column
296-
y_test = test_df.pop("default payment next month")
297-
298-
# convert the dataframe values to array
299-
X_test = test_df.values
300-
301-
print(f"Training with data of shape {X_train.shape}")
302-
303-
clf = GradientBoostingClassifier(
304-
n_estimators=args.n_estimators, learning_rate=args.learning_rate
305-
)
306-
clf.fit(X_train, y_train)
307-
308-
y_pred = clf.predict(X_test)
309-
310-
print(classification_report(y_test, y_pred))
311-
###################
312-
#</train the model>
313-
###################
314-
315-
##########################
316-
#<save and register model>
317-
##########################
318-
# Registering the model to the workspace
319-
print("Registering the model via MLFlow")
320-
mlflow.sklearn.log_model(
321-
sk_model=clf,
322-
registered_model_name=args.registered_model_name,
323-
artifact_path=args.registered_model_name,
324-
)
325-
326-
# Saving the model to a file
327-
mlflow.sklearn.save_model(
328-
sk_model=clf,
329-
path=os.path.join(args.registered_model_name, "trained_model"),
330-
)
331-
###########################
332-
#</save and register model>
333-
###########################
334-
335-
# Stop Logging
336-
mlflow.end_run()
337-
338-
if __name__ == "__main__":
339-
main()
340-
```
341-
342-
343222
## Create a compute cluster, a scalable way to run a training job
344223

345224
> [!NOTE]

0 commit comments

Comments
 (0)