@@ -69,61 +69,163 @@ https://github.com/user-attachments/assets/c199156f-96cf-4ed0-a8b5-c88db3e7a552
6969
7070https://github.com/user-attachments/assets/f8cbd32c-94fc-43d3-a7a8-00f63cdc543d
7171
72+ ## Step 4: Create a New Notebook or Script
7273
73- ### ** 4. Create a New Notebook or Script**
7474- Use the compute instance to open a ** Jupyter notebook** or create a Python script.
7575- Import necessary libraries:
76+
7677 ``` python
7778 import pandas as pd
7879 from sklearn.model_selection import train_test_split
7980 from sklearn.ensemble import RandomForestClassifier
8081 from sklearn.metrics import accuracy_score
8182 ```
8283
83- ---
84+ https://github.com/user-attachments/assets/16650584-11cb-48fb-928d-c032e519c14b
85+
86+ ## Step 5: Load and Explore the Data
87+
88+ > Load the dataset and perform basic EDA (exploratory data analysis):
8489
85- ### ** 5. Load and Explore the Data**
86- - Load the dataset and perform basic EDA (exploratory data analysis):
8790 ``` python
88- data = pd.read_csv(' your_dataset.csv' )
89- print (data.head())
91+ import mltable
92+ from azure.ai.ml import MLClient
93+ from azure.identity import DefaultAzureCredential
94+
95+ ml_client = MLClient.from_config(credential = DefaultAzureCredential())
96+ data_asset = ml_client.data.get(" employee_data" , version = " 1" )
97+
98+ tbl = mltable.load(f ' azureml:/ { data_asset.id} ' )
99+
100+ df = tbl.to_pandas_dataframe()
101+ df
90102 ```
91103
92- ---
104+ https://github.com/user-attachments/assets/5fa65d95-8502-4ab7-ba0d-dfda66378cc2
93105
94- ### ** 6. Train Your Model**
95- - Split the data and train a model:
96- ``` python
97- X = data.drop(' target' , axis = 1 )
98- y = data[' target' ]
99- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2 )
106+ ## Step 6: Train Your Model
107+
108+ > Split the data and train a model:
100109
101- model = RandomForestClassifier()
110+ ``` python
111+ # Step 1: Preprocessing
112+ from sklearn.preprocessing import LabelEncoder, StandardScaler
113+
114+ # Encode categorical columns
115+ label_encoder = LabelEncoder()
116+ df[' Department' ] = label_encoder.fit_transform(df[' Department' ])
117+
118+ # Drop non-informative or high-cardinality columns
119+ if ' Name' in df.columns:
120+ df = df.drop(columns = [' Name' ]) # 'Name' is likely not predictive
121+
122+ # Optional: Check for missing values
123+ if df.isnull().sum().any():
124+ df = df.dropna() # or use df.fillna(method='ffill') for imputation
125+
126+ # Step 2: Define Features and Target
127+ X = df.drop(' Salary' , axis = 1 ) # Features: Age and Department
128+ y = df[' Salary' ] # Target: Salary
129+
130+ # Optional: Feature Scaling (especially useful for models sensitive to scale)
131+ scaler = StandardScaler()
132+ X_scaled = scaler.fit_transform(X)
133+
134+ # Step 3: Split the Data
135+ from sklearn.model_selection import train_test_split
136+
137+ X_train, X_test, y_train, y_test = train_test_split(
138+ X_scaled, y, test_size = 0.2 , random_state = 42
139+ )
140+
141+ # Step 4: Train a Regression Model
142+ from sklearn.ensemble import RandomForestRegressor
143+
144+ model = RandomForestRegressor(
145+ n_estimators = 100 ,
146+ max_depth = None ,
147+ random_state = 42 ,
148+ n_jobs = - 1 # Use all available cores
149+ )
102150 model.fit(X_train, y_train)
103151 ```
104152
105- ---
153+ https://github.com/user-attachments/assets/2176c795-5fda-4746-93c7-8b137b526a09
154+
155+ ## Step 7: Evaluate the Model
156+
157+ > Check performance:
106158
107- ### ** 7. Evaluate the Model**
108- - Check performance:
109159 ``` python
160+ # Step 5: Make Predictions
110161 predictions = model.predict(X_test)
111- print (" Accuracy:" , accuracy_score(y_test, predictions))
162+
163+ # Step 6: Evaluate the Model
164+ from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
165+ import numpy as np
166+
167+ mae = mean_absolute_error(y_test, predictions)
168+ mse = mean_squared_error(y_test, predictions)
169+ rmse = np.sqrt(mse)
170+ r2 = r2_score(y_test, predictions)
171+
172+ print (" Model Evaluation Metrics" )
173+ print (f " Mean Absolute Error (MAE): { mae:.2f } " )
174+ print (f " Mean Squared Error (MSE): { mse:.2f } " )
175+ print (f " Root Mean Squared Error (RMSE): { rmse:.2f } " )
176+ print (f " R² Score: { r2:.2f } " )
112177 ```
113178
114- ---
179+ <img width =" 550 " alt =" image " src =" https://github.com/user-attachments/assets/6aa19680-cadb-4fe4-a419-a626942e15f9 " />
180+
181+ > Distribution of prediction errors:
182+
183+ ``` python
184+ import matplotlib.pyplot as plt
185+
186+ # Plot 1: Distribution of prediction errors
187+ errors = y_test - predictions
188+ plt.figure(figsize = (10 , 6 ))
189+ plt.hist(errors, bins = 30 , color = ' skyblue' , edgecolor = ' black' )
190+ plt.title(' Distribution of Prediction Errors' )
191+ plt.xlabel(' Prediction Error' )
192+ plt.ylabel(' Frequency' )
193+ plt.grid(True )
194+ plt.show()
195+
196+ # Plot 2: Predicted vs Actual values
197+ plt.figure(figsize = (10 , 6 ))
198+ plt.scatter(y_test, predictions, alpha = 0.3 , color = ' darkorange' )
199+ plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], ' k--' , lw = 2 )
200+ plt.title(' Predicted vs Actual Salary' )
201+ plt.xlabel(' Actual Salary' )
202+ plt.ylabel(' Predicted Salary' )
203+ plt.grid(True )
204+ plt.show()
205+ ```
206+
207+ <img width =" 550 " alt =" image " src =" https://github.com/user-attachments/assets/d8ec1f2c-eb97-4106-9cee-809849d02796 " >
208+
209+ ## Step 8: Register the Model
210+
211+ > Save and register the model in Azure ML:
115212
116- ### ** 8. Register the Model**
117- - Save and register the model in Azure ML:
118213 ``` python
119214 import joblib
120215 joblib.dump(model, ' model.pkl' )
121-
216+
122217 from azureml.core import Workspace, Model
123218 ws = Workspace.from_config()
124- Model.register(workspace = ws, model_path = " model.pkl" , model_name = " my_model " )
219+ Model.register(workspace = ws, model_path = " model.pkl" , model_name = " my_model_RegressionModel " )
125220 ```
126221
222+ https://github.com/user-attachments/assets/a82ff03e-437c-41bc-85fa-8b9903384a5b
223+
224+
225+ > [ !TIP]
226+ > Click [ here] ( ) to read the script used.
227+
228+
127229---
128230
129231### ** 9. Deploy the Model**
0 commit comments