@@ -85,7 +85,7 @@ https://github.com/user-attachments/assets/f8cbd32c-94fc-43d3-a7a8-00f63cdc543d
8585
8686## Step 5: Load and Explore the Data
8787
88- - Load the dataset and perform basic EDA (exploratory data analysis):
88+ > Load the dataset and perform basic EDA (exploratory data analysis):
8989
9090 ``` python
9191 import mltable
@@ -105,38 +105,122 @@ https://github.com/user-attachments/assets/f8cbd32c-94fc-43d3-a7a8-00f63cdc543d
105105
106106## Step 6: Train Your Model
107107
108- - Split the data and train a model:
108+ > Split the data and train a model:
109109
110110 ``` python
111- X = data.drop(' target' , axis = 1 )
112- y = data[' target' ]
113- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2 )
114-
115- model = RandomForestClassifier()
111+ # Step 1: Preprocessing
112+ from sklearn.preprocessing import LabelEncoder, StandardScaler
113+
114+ # Encode categorical columns
115+ label_encoder = LabelEncoder()
116+ df[' Department' ] = label_encoder.fit_transform(df[' Department' ])
117+
118+ # Drop non-informative or high-cardinality columns
119+ if ' Name' in df.columns:
120+ df = df.drop(columns = [' Name' ]) # 'Name' is likely not predictive
121+
122+ # Optional: Check for missing values
123+ if df.isnull().sum().any():
124+ df = df.dropna() # or use df.fillna(method='ffill') for imputation
125+
126+ # Step 2: Define Features and Target
127+ X = df.drop(' Salary' , axis = 1 ) # Features: Age and Department
128+ y = df[' Salary' ] # Target: Salary
129+
130+ # Optional: Feature Scaling (especially useful for models sensitive to scale)
131+ scaler = StandardScaler()
132+ X_scaled = scaler.fit_transform(X)
133+
134+ # Step 3: Split the Data
135+ from sklearn.model_selection import train_test_split
136+
137+ X_train, X_test, y_train, y_test = train_test_split(
138+ X_scaled, y, test_size = 0.2 , random_state = 42
139+ )
140+
141+ # Step 4: Train a Regression Model
142+ from sklearn.ensemble import RandomForestRegressor
143+
144+ model = RandomForestRegressor(
145+ n_estimators = 100 ,
146+ max_depth = None ,
147+ random_state = 42 ,
148+ n_jobs = - 1 # Use all available cores
149+ )
116150 model.fit(X_train, y_train)
117151 ```
118152
119- ---
153+ https://github.com/user-attachments/assets/2176c795-5fda-4746-93c7-8b137b526a09
154+
155+ ## Step 7: Evaluate the Model
156+
157+ > Check performance:
120158
121- ### ** 7. Evaluate the Model**
122- - Check performance:
123159 ``` python
160+ # Step 5: Make Predictions
124161 predictions = model.predict(X_test)
125- print (" Accuracy:" , accuracy_score(y_test, predictions))
162+
163+ # Step 6: Evaluate the Model
164+ from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
165+ import numpy as np
166+
167+ mae = mean_absolute_error(y_test, predictions)
168+ mse = mean_squared_error(y_test, predictions)
169+ rmse = np.sqrt(mse)
170+ r2 = r2_score(y_test, predictions)
171+
172+ print (" Model Evaluation Metrics" )
173+ print (f " Mean Absolute Error (MAE): { mae:.2f } " )
174+ print (f " Mean Squared Error (MSE): { mse:.2f } " )
175+ print (f " Root Mean Squared Error (RMSE): { rmse:.2f } " )
176+ print (f " R² Score: { r2:.2f } " )
126177 ```
127178
128- ---
179+ <img width =" 550 " alt =" image " src =" https://github.com/user-attachments/assets/6aa19680-cadb-4fe4-a419-a626942e15f9 " />
180+
181+ > Distribution of prediction errors:
182+
183+ ``` python
184+ import matplotlib.pyplot as plt
185+
186+ # Plot 1: Distribution of prediction errors
187+ errors = y_test - predictions
188+ plt.figure(figsize = (10 , 6 ))
189+ plt.hist(errors, bins = 30 , color = ' skyblue' , edgecolor = ' black' )
190+ plt.title(' Distribution of Prediction Errors' )
191+ plt.xlabel(' Prediction Error' )
192+ plt.ylabel(' Frequency' )
193+ plt.grid(True )
194+ plt.show()
195+
196+ # Plot 2: Predicted vs Actual values
197+ plt.figure(figsize = (10 , 6 ))
198+ plt.scatter(y_test, predictions, alpha = 0.3 , color = ' darkorange' )
199+ plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], ' k--' , lw = 2 )
200+ plt.title(' Predicted vs Actual Salary' )
201+ plt.xlabel(' Actual Salary' )
202+ plt.ylabel(' Predicted Salary' )
203+ plt.grid(True )
204+ plt.show()
205+ ```
206+
207+ <img width =" 550 " alt =" image " src =" https://github.com/user-attachments/assets/d8ec1f2c-eb97-4106-9cee-809849d02796 " >
208+
209+ ## Step 8: Register the Model
210+
211+ > Save and register the model in Azure ML:
129212
130- ### ** 8. Register the Model**
131- - Save and register the model in Azure ML:
132213 ``` python
133214 import joblib
134215 joblib.dump(model, ' model.pkl' )
135-
216+
136217 from azureml.core import Workspace, Model
137218 ws = Workspace.from_config()
138- Model.register(workspace = ws, model_path = " model.pkl" , model_name = " my_model " )
219+ Model.register(workspace = ws, model_path = " model.pkl" , model_name = " my_model_RegressionModel " )
139220 ```
221+ > [ !TIP]
222+ > Click [ here] ( ) to read the script used.
223+
140224
141225---
142226
0 commit comments