Skip to content
129 changes: 129 additions & 0 deletions machine_learning/ridge_regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
import numpy as np
import pandas as pd


class RidgeRegression:
def __init__(
self, alpha: float = 0.001, lambda_: float = 0.1, iterations: int = 1000
) -> None:
"""
Ridge Regression Constructor
:param alpha: Learning rate for gradient descent
:param lambda_: Regularization parameter (L2 regularization)
:param iterations: Number of iterations for gradient descent
"""
self.alpha = alpha
self.lambda_ = lambda_
self.iterations = iterations
self.theta = None

def feature_scaling(
self, features: np.ndarray
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
"""
Normalize features to have mean 0 and standard deviation 1.

:param features: Input features, shape (m, n)
:return: Tuple containing:
- Scaled features
- Mean of each feature
- Standard deviation of each feature

Example:
>>> rr = RidgeRegression()
>>> features = np.array([[1, 2], [2, 3], [4, 6]])
>>> scaled_features, mean, std = rr.feature_scaling(features)
>>> np.allclose(scaled_features.mean(axis=0), 0)
True
>>> np.allclose(scaled_features.std(axis=0), 1)
True
"""
mean = np.mean(features, axis=0)
std = np.std(features, axis=0)

# Avoid division by zero for constant features (std = 0)
std[std == 0] = 1 # Set std=1 for constant features to avoid NaN

scaled_features = (features - mean) / std
return scaled_features, mean, std

def fit(self, x: np.ndarray, y: np.ndarray) -> None:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As there is no test file in this pull request nor any test function or class in the file machine_learning/ridge_regression.py, please provide doctest for the function fit

Please provide descriptive name for the parameter: x

Please provide descriptive name for the parameter: y

"""
Fit the Ridge Regression model to the training data.

:param x: Input features, shape (m, n)
:param y: Target values, shape (m,)
"""
x_scaled, mean, std = self.feature_scaling(x) # Normalize features
m, n = x_scaled.shape
self.theta = np.zeros(n) # Initialize weights to zeros

for _ in range(self.iterations):
predictions = x_scaled.dot(self.theta)
error = predictions - y

# Compute gradient with L2 regularization
gradient = (x_scaled.T.dot(error) + self.lambda_ * self.theta) / m
self.theta -= self.alpha * gradient # Update weights

def predict(self, x: np.ndarray) -> np.ndarray:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As there is no test file in this pull request nor any test function or class in the file machine_learning/ridge_regression.py, please provide doctest for the function predict

Please provide descriptive name for the parameter: x

"""
Predict values using the trained model.

:param x: Input features, shape (m, n)
:return: Predicted values, shape (m,)
"""
x_scaled, _, _ = self.feature_scaling(x) # Scale features using training data
return x_scaled.dot(self.theta)

def compute_cost(self, x: np.ndarray, y: np.ndarray) -> float:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As there is no test file in this pull request nor any test function or class in the file machine_learning/ridge_regression.py, please provide doctest for the function compute_cost

Please provide descriptive name for the parameter: x

Please provide descriptive name for the parameter: y

"""
Compute the cost function with regularization.

:param x: Input features, shape (m, n)
:param y: Target values, shape (m,)
:return: Computed cost
"""
x_scaled, _, _ = self.feature_scaling(x) # Scale features using training data
m = len(y)
predictions = x_scaled.dot(self.theta)
cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + (
self.lambda_ / (2 * m)
) * np.sum(self.theta**2)
return cost

def mean_absolute_error(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As there is no test file in this pull request nor any test function or class in the file machine_learning/ridge_regression.py, please provide doctest for the function mean_absolute_error

"""
Compute Mean Absolute Error (MAE) between true and predicted values.

:param y_true: Actual target values, shape (m,)
:param y_pred: Predicted target values, shape (m,)
:return: MAE
"""
return np.mean(np.abs(y_true - y_pred))


# Example usage
if __name__ == "__main__":
# Load dataset
data = pd.read_csv(
"https://raw.githubusercontent.com/yashLadha/The_Math_of_Intelligence/master/Week1/ADRvsRating.csv"
)
x = data[["Rating"]].to_numpy() # Feature: Rating
y = data["ADR"].to_numpy() # Target: ADR
y = (y - np.mean(y)) / np.std(y)

# Add bias term (intercept) to the feature matrix
x = np.c_[np.ones(X.shape[0]), x] # Add intercept term

# Initialize and train the Ridge Regression model
model = RidgeRegression(alpha=0.01, lambda_=0.1, iterations=1000)
model.fit(x, y)

# Predictions
predictions = model.predict(x)

# Results
print("Optimized Weights:", model.theta)
print("Cost:", model.compute_cost(x, y))
print("Mean Absolute Error:", model.mean_absolute_error(y, predictions))