Skip to content
94 changes: 94 additions & 0 deletions machine_learning/catboost_regressor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
"""
CatBoost Regressor Example.

This script demonstrates the usage of the CatBoost Regressor for a simple regression task.

Check failure on line 4 in machine_learning/catboost_regressor.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (E501)

machine_learning/catboost_regressor.py:4:89: E501 Line too long (90 > 88)

Check failure on line 4 in machine_learning/catboost_regressor.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (E501)

machine_learning/catboost_regressor.py:4:89: E501 Line too long (90 > 88)
CatBoost is a powerful gradient boosting library that handles categorical features automatically

Check failure on line 5 in machine_learning/catboost_regressor.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (E501)

machine_learning/catboost_regressor.py:5:89: E501 Line too long (96 > 88)

Check failure on line 5 in machine_learning/catboost_regressor.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (E501)

machine_learning/catboost_regressor.py:5:89: E501 Line too long (96 > 88)
and is highly efficient.

Make sure to install CatBoost using:
pip install catboost

Contributed by: @AHuzail
"""

import numpy as np
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from catboost import CatBoostRegressor


def data_handling() -> tuple:

Check failure on line 21 in machine_learning/catboost_regressor.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (I001)

machine_learning/catboost_regressor.py:14:1: I001 Import block is un-sorted or un-formatted

Check failure on line 21 in machine_learning/catboost_regressor.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (I001)

machine_learning/catboost_regressor.py:14:1: I001 Import block is un-sorted or un-formatted
"""
Loads and handles the dataset, splitting it into features and targets.

The Boston dataset is used as a regression example.

Check failure on line 26 in machine_learning/catboost_regressor.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (W293)

machine_learning/catboost_regressor.py:26:1: W293 Blank line contains whitespace

Check failure on line 26 in machine_learning/catboost_regressor.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (W293)

machine_learning/catboost_regressor.py:26:1: W293 Blank line contains whitespace
Returns:
tuple: A tuple of (features, target), where both are numpy arrays.

Example:
>>> features, target = data_handling()
>>> features.shape
(506, 13)
>>> target.shape
(506,)
"""
# Load Boston dataset (note: this dataset may be deprecated, replace if needed)
boston = load_boston()
features = boston.data
target = boston.target
return features, target


def catboost_regressor(features: np.ndarray, target: np.ndarray) -> CatBoostRegressor:
"""
Trains a CatBoostRegressor using the provided features and target values.

Args:
features (np.ndarray): The input features for the regression model.
target (np.ndarray): The target values for the regression model.

Returns:
CatBoostRegressor: A trained CatBoost regressor model.

Example:
>>> features, target = data_handling()
>>> model = catboost_regressor(features, target)
>>> isinstance(model, CatBoostRegressor)
True
"""
regressor = CatBoostRegressor(iterations=100, learning_rate=0.1, depth=6, verbose=0)
regressor.fit(features, target)
return regressor


def main() -> None:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As there is no test file in this pull request nor any test function or class in the file machine_learning/catboost_regressor.py, please provide doctest for the function main

"""
Main function to run the CatBoost Regressor example.

It loads the data, splits it into training and testing sets,
trains the regressor on the training data, and evaluates its performance
on the test data.
"""
# Load and split the dataset
features, target = data_handling()
x_train, x_test, y_train, y_test = train_test_split(
features, target, test_size=0.25, random_state=42
)

# Train CatBoost Regressor
regressor = catboost_regressor(x_train, y_train)

# Predict on the test set
predictions = regressor.predict(x_test)

# Evaluate the performance using Mean Squared Error
mse = mean_squared_error(y_test, predictions)
print(f"Mean Squared Error on Test Set: {mse:.4f}")


if __name__ == "__main__":
import doctest
doctest.testmod(verbose=True)
main()
Loading