-
-
Notifications
You must be signed in to change notification settings - Fork 48.7k
Fixes issue #12108: Added Ridge Regression to Machine Learning #12246
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 4 commits
2293d37
e9ef03e
861618e
2dc60f4
61945d0
a2d07af
8f1f091
5bf9b85
85020a7
52345d9
4204bf6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
import numpy as np | ||
import pandas as pd | ||
|
||
|
||
class RidgeRegression: | ||
def __init__( | ||
self, alpha: float = 0.001, lambda_: float = 0.1, iterations: int = 1000 | ||
) -> None: | ||
""" | ||
Ridge Regression Constructor | ||
:param alpha: Learning rate for gradient descent | ||
:param lambda_: Regularization parameter (L2 regularization) | ||
:param iterations: Number of iterations for gradient descent | ||
""" | ||
self.alpha = alpha | ||
self.lambda_ = lambda_ | ||
self.iterations = iterations | ||
self.theta = None | ||
|
||
def feature_scaling( | ||
self, features: np.ndarray | ||
) -> tuple[np.ndarray, np.ndarray, np.ndarray]: | ||
""" | ||
Normalize features to have mean 0 and standard deviation 1. | ||
|
||
:param features: Input features, shape (m, n) | ||
:return: Tuple containing: | ||
- Scaled features | ||
- Mean of each feature | ||
- Standard deviation of each feature | ||
|
||
Example: | ||
>>> rr = RidgeRegression() | ||
>>> features = np.array([[1, 2], [2, 3], [4, 6]]) | ||
>>> scaled_features, mean, std = rr.feature_scaling(features) | ||
>>> np.allclose(scaled_features.mean(axis=0), 0) | ||
True | ||
>>> np.allclose(scaled_features.std(axis=0), 1) | ||
True | ||
""" | ||
mean = np.mean(features, axis=0) | ||
std = np.std(features, axis=0) | ||
|
||
# Avoid division by zero for constant features (std = 0) | ||
std[std == 0] = 1 # Set std=1 for constant features to avoid NaN | ||
|
||
scaled_features = (features - mean) / std | ||
return scaled_features, mean, std | ||
|
||
def fit(self, x: np.ndarray, y: np.ndarray) -> None: | ||
""" | ||
Fit the Ridge Regression model to the training data. | ||
|
||
:param x: Input features, shape (m, n) | ||
:param y: Target values, shape (m,) | ||
""" | ||
x_scaled, mean, std = self.feature_scaling(x) # Normalize features | ||
m, n = x_scaled.shape | ||
self.theta = np.zeros(n) # Initialize weights to zeros | ||
|
||
for _ in range(self.iterations): | ||
predictions = x_scaled.dot(self.theta) | ||
error = predictions - y | ||
|
||
# Compute gradient with L2 regularization | ||
gradient = (x_scaled.T.dot(error) + self.lambda_ * self.theta) / m | ||
self.theta -= self.alpha * gradient # Update weights | ||
|
||
def predict(self, x: np.ndarray) -> np.ndarray: | ||
|
||
""" | ||
Predict values using the trained model. | ||
|
||
:param x: Input features, shape (m, n) | ||
:return: Predicted values, shape (m,) | ||
""" | ||
x_scaled, _, _ = self.feature_scaling(x) # Scale features using training data | ||
return x_scaled.dot(self.theta) | ||
|
||
def compute_cost(self, x: np.ndarray, y: np.ndarray) -> float: | ||
|
||
""" | ||
Compute the cost function with regularization. | ||
|
||
:param x: Input features, shape (m, n) | ||
:param y: Target values, shape (m,) | ||
:return: Computed cost | ||
""" | ||
x_scaled, _, _ = self.feature_scaling(x) # Scale features using training data | ||
m = len(y) | ||
predictions = x_scaled.dot(self.theta) | ||
cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + ( | ||
self.lambda_ / (2 * m) | ||
) * np.sum(self.theta**2) | ||
return cost | ||
|
||
def mean_absolute_error(self, y_true: np.ndarray, y_pred: np.ndarray) -> float: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As there is no test file in this pull request nor any test function or class in the file |
||
""" | ||
Compute Mean Absolute Error (MAE) between true and predicted values. | ||
|
||
:param y_true: Actual target values, shape (m,) | ||
:param y_pred: Predicted target values, shape (m,) | ||
:return: MAE | ||
""" | ||
return np.mean(np.abs(y_true - y_pred)) | ||
|
||
|
||
# Example usage | ||
if __name__ == "__main__": | ||
# Load dataset | ||
data = pd.read_csv( | ||
"https://raw.githubusercontent.com/yashLadha/The_Math_of_Intelligence/master/Week1/ADRvsRating.csv" | ||
) | ||
x = data[["Rating"]].to_numpy() # Feature: Rating | ||
y = data["ADR"].to_numpy() # Target: ADR | ||
y = (y - np.mean(y)) / np.std(y) | ||
|
||
# Add bias term (intercept) to the feature matrix | ||
x = np.c_[np.ones(X.shape[0]), x] # Add intercept term | ||
|
||
# Initialize and train the Ridge Regression model | ||
model = RidgeRegression(alpha=0.01, lambda_=0.1, iterations=1000) | ||
model.fit(x, y) | ||
|
||
# Predictions | ||
predictions = model.predict(x) | ||
|
||
# Results | ||
print("Optimized Weights:", model.theta) | ||
print("Cost:", model.compute_cost(x, y)) | ||
print("Mean Absolute Error:", model.mean_absolute_error(y, predictions)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As there is no test file in this pull request nor any test function or class in the file
machine_learning/ridge_regression.py
, please provide doctest for the functionfit
Please provide descriptive name for the parameter:
x
Please provide descriptive name for the parameter:
y