Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 69 additions & 45 deletions machine_learning/xgboost_classifier.py
Original file line number Diff line number Diff line change
@@ -1,81 +1,105 @@
# XGBoost Classifier Example
import numpy as np
from matplotlib import pyplot as plt
from sklearn.datasets import load_iris
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import ConfusionMatrixDisplay, accuracy_score
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.tree import DecisionTreeRegressor


class SimpleXGBoost:
def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
self.n_estimators = n_estimators
self.learning_rate = learning_rate
self.max_depth = max_depth
self.trees = []

def _negative_gradient(self, y_true, y_pred):
"""Compute the negative gradient (residuals) for classification (log-odds)."""
return y_true - y_pred

def _update_predictions(self, predictions, residuals):
"""Update the predictions using the residuals and learning rate."""
return predictions + self.learning_rate * residuals

def fit(self, X, y):

Check failure on line 24 in machine_learning/xgboost_classifier.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (N803)

machine_learning/xgboost_classifier.py:24:19: N803 Argument name `X` should be lowercase

Check failure on line 24 in machine_learning/xgboost_classifier.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (N803)

machine_learning/xgboost_classifier.py:24:19: N803 Argument name `X` should be lowercase
"""Fit the model using gradient boosting."""
# Initialize predictions as the average of the target (for binary classification)

Check failure on line 26 in machine_learning/xgboost_classifier.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (E501)

machine_learning/xgboost_classifier.py:26:89: E501 Line too long (89 > 88)

Check failure on line 26 in machine_learning/xgboost_classifier.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (E501)

machine_learning/xgboost_classifier.py:26:89: E501 Line too long (89 > 88)
predictions = np.full(y.shape, np.mean(y))

for _ in range(self.n_estimators):
# Compute residuals (negative gradient)
residuals = self._negative_gradient(y, predictions)

# Fit a weak learner (decision tree) to the residuals
tree = DecisionTreeRegressor(max_depth=self.max_depth)
tree.fit(X, residuals)

# Update the predictions
predictions = self._update_predictions(predictions, tree.predict(X))

# Store the tree
self.trees.append(tree)

def predict(self, X):

Check failure on line 43 in machine_learning/xgboost_classifier.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (N803)

machine_learning/xgboost_classifier.py:43:23: N803 Argument name `X` should be lowercase

Check failure on line 43 in machine_learning/xgboost_classifier.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (N803)

machine_learning/xgboost_classifier.py:43:23: N803 Argument name `X` should be lowercase
"""Make predictions by summing the weak learners' outputs."""
predictions = np.zeros(X.shape[0])

for tree in self.trees:
predictions += self.learning_rate * tree.predict(X)

# Convert the predictions to binary (0 or 1) for classification
return np.round(predictions).astype(int)


def data_handling(data: dict) -> tuple:
# Split dataset into features and target
# data is features
"""
>>> data_handling(({'data':'[5.1, 3.5, 1.4, 0.2]','target':([0])}))
('[5.1, 3.5, 1.4, 0.2]', [0])
>>> data_handling(
... {'data': '[4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]', 'target': ([0, 0])}
... )
('[4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]', [0, 0])
"""
return (data["data"], data["target"])


def xgboost(features: np.ndarray, target: np.ndarray) -> XGBClassifier:
"""
# THIS TEST IS BROKEN!! >>> xgboost(np.array([[5.1, 3.6, 1.4, 0.2]]), np.array([0]))
XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
early_stopping_rounds=None, enable_categorical=False,
eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
importance_type=None, interaction_constraints='',
learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,
max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,
missing=nan, monotone_constraints='()', n_estimators=100,
n_jobs=0, num_parallel_tree=1, predictor='auto', random_state=0,
reg_alpha=0, reg_lambda=1, ...)
"""
classifier = XGBClassifier()
classifier.fit(features, target)
return classifier


def main() -> None:
"""
>>> main()

Url for the algorithm:
https://xgboost.readthedocs.io/en/stable/
Implemented XGBoost Classifier without external libraries.
Iris type dataset is used to demonstrate algorithm.
"""

# Load Iris dataset
iris = load_iris()
features, targets = data_handling(iris)

# For simplicity, binary classification (setosa vs non-setosa)
targets = (targets == 0).astype(int)

# Split data into train and test sets
x_train, x_test, y_train, y_test = train_test_split(
features, targets, test_size=0.25
)

names = iris["target_names"]
# Create a custom XGBoost classifier
xgboost_classifier = SimpleXGBoost(n_estimators=50, learning_rate=0.1, max_depth=3)

# Fit the model
xgboost_classifier.fit(x_train, y_train)

# Create an XGBoost Classifier from the training data
xgboost_classifier = xgboost(x_train, y_train)
# Make predictions
y_pred = xgboost_classifier.predict(x_test)

# Display the confusion matrix of the classifier with both training and test sets
ConfusionMatrixDisplay.from_estimator(
xgboost_classifier,
x_test,
# Print accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Display confusion matrix
ConfusionMatrixDisplay.from_predictions(
y_test,
display_labels=names,
y_pred,
display_labels=["Non-Setosa", "Setosa"],
cmap="Blues",
normalize="true",
)
plt.title("Normalized Confusion Matrix - IRIS Dataset")
plt.title("Normalized Confusion Matrix - IRIS Dataset (Manual XGBoost)")
plt.show()


if __name__ == "__main__":
import doctest

doctest.testmod(verbose=True)
main()
71 changes: 49 additions & 22 deletions machine_learning/xgboost_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,50 @@
from sklearn.datasets import fetch_california_housing
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor


class SimpleXGBoostRegressor:
def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
self.n_estimators = n_estimators
self.learning_rate = learning_rate
self.max_depth = max_depth
self.trees = []

def _negative_gradient(self, y_true, y_pred):
"""Compute the negative gradient (residuals) for regression."""
return y_true - y_pred

def _update_predictions(self, predictions, residuals):
"""Update the predictions using the residuals and learning rate."""
return predictions + self.learning_rate * residuals

def fit(self, X, y):

Check failure on line 23 in machine_learning/xgboost_regressor.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (N803)

machine_learning/xgboost_regressor.py:23:19: N803 Argument name `X` should be lowercase

Check failure on line 23 in machine_learning/xgboost_regressor.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (N803)

machine_learning/xgboost_regressor.py:23:19: N803 Argument name `X` should be lowercase
"""Fit the model using gradient boosting."""
# Initialize predictions as the average of the target
predictions = np.full(y.shape, np.mean(y))

for _ in range(self.n_estimators):
# Compute residuals (negative gradient)
residuals = self._negative_gradient(y, predictions)

# Fit a weak learner (decision tree) to the residuals
tree = DecisionTreeRegressor(max_depth=self.max_depth)

Check failure on line 33 in machine_learning/xgboost_regressor.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (F821)

machine_learning/xgboost_regressor.py:33:20: F821 Undefined name `DecisionTreeRegressor`

Check failure on line 33 in machine_learning/xgboost_regressor.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (F821)

machine_learning/xgboost_regressor.py:33:20: F821 Undefined name `DecisionTreeRegressor`
tree.fit(X, residuals)

# Update the predictions
predictions = self._update_predictions(predictions, tree.predict(X))

# Store the tree
self.trees.append(tree)

def predict(self, X):

Check failure on line 42 in machine_learning/xgboost_regressor.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (N803)

machine_learning/xgboost_regressor.py:42:23: N803 Argument name `X` should be lowercase

Check failure on line 42 in machine_learning/xgboost_regressor.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (N803)

machine_learning/xgboost_regressor.py:42:23: N803 Argument name `X` should be lowercase
"""Make predictions by summing the weak learners' outputs."""
predictions = np.zeros(X.shape[0])

for tree in self.trees:
predictions += self.learning_rate * tree.predict(X)

return predictions


def data_handling(data: dict) -> tuple:
Expand All @@ -17,26 +60,6 @@
return (data["data"], data["target"])


def xgboost(
features: np.ndarray, target: np.ndarray, test_features: np.ndarray
) -> np.ndarray:
"""
>>> xgboost(np.array([[ 2.3571 , 52. , 6.00813008, 1.06775068,
... 907. , 2.45799458, 40.58 , -124.26]]),np.array([1.114]),
... np.array([[1.97840000e+00, 3.70000000e+01, 4.98858447e+00, 1.03881279e+00,
... 1.14300000e+03, 2.60958904e+00, 3.67800000e+01, -1.19780000e+02]]))
array([[1.1139996]], dtype=float32)
"""
xgb = XGBRegressor(
verbosity=0, random_state=42, tree_method="exact", base_score=0.5
)
xgb.fit(features, target)
# Predict target for test data
predictions = xgb.predict(test_features)
predictions = predictions.reshape(len(predictions), 1)
return predictions


def main() -> None:
"""
The URL for this algorithm
Expand All @@ -53,7 +76,11 @@
x_train, x_test, y_train, y_test = train_test_split(
data, target, test_size=0.25, random_state=1
)
predictions = xgboost(x_train, y_train, x_test)
xgboost_regressor = SimpleXGBoostRegressor(
n_estimators=50, learning_rate=0.1, max_depth=3
)
xgboost_regressor.fit(x_train, y_train)
predictions = xgboost_regressor.predict(x_test)
# Error printing
print(f"Mean Absolute Error: {mean_absolute_error(y_test, predictions)}")
print(f"Mean Square Error: {mean_squared_error(y_test, predictions)}")
Expand Down
Loading