Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
name: "Pull Request Docs Check"
on:
pull_request:

push:
branches:
- dev
jobs:
docs:
runs-on: ubuntu-latest
Expand All @@ -17,7 +19,7 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install sphinx
pip install -r docs/requirements.txt # Ensure you have a requirements file for Sphinx
pip install -r requirements.txt # Ensure you have a requirements file for Sphinx

- name: Build documentation
run: |
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ __pycache__/
*.py[cod]
*$py.class

tests/mq/

# C extensions
*.so

Expand Down
173 changes: 102 additions & 71 deletions lazypredict/Supervised.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import numpy as np
import pandas as pd
from tqdm import tqdm
from tqdm.autonotebook import tqdm
import datetime
import time
from sklearn.pipeline import Pipeline
Expand All @@ -18,10 +18,14 @@
from sklearn.metrics import (
accuracy_score,
balanced_accuracy_score,
euclidean_distances,
precision_score,
recall_score,
roc_auc_score,
f1_score,
r2_score,
mean_squared_error,
average_precision_score,
)
import warnings
import xgboost
Expand Down Expand Up @@ -210,6 +214,7 @@ def __init__(
predictions=False,
random_state=42,
classifiers="all",
transformers=True,
):
self.verbose = verbose
self.ignore_warnings = ignore_warnings
Expand All @@ -218,34 +223,16 @@ def __init__(
self.models = {}
self.random_state = random_state
self.classifiers = classifiers
self.transformers = transformers

def fit(self, X_train, X_test, y_train, y_test):
"""Fit Classification algorithms to X_train and y_train, predict and score on X_test, y_test.
Parameters
----------
X_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
y_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
Returns
-------
scores : Pandas DataFrame
Returns metrics of all the models in a Pandas DataFrame.
predictions : Pandas DataFrame
Returns predictions of all the models in a Pandas DataFrame.
"""
Accuracy = []
B_Accuracy = []
ROC_AUC = []
PR_SCORE = []
F1 = []
PRECISION = []
RECALL = []
names = []
TIME = []
predictions = {}
Expand All @@ -257,20 +244,25 @@ def fit(self, X_train, X_test, y_train, y_test):
X_train = pd.DataFrame(X_train)
X_test = pd.DataFrame(X_test)

numeric_features = X_train.select_dtypes(include=[np.number]).columns
categorical_features = X_train.select_dtypes(include=["object"]).columns
if self.transformers is True:
numeric_features = X_train.select_dtypes(include=[np.number]).columns
categorical_features = X_train.select_dtypes(include=["object"]).columns

categorical_low, categorical_high = get_card_split(
X_train, categorical_features
)
categorical_low, categorical_high = get_card_split(
X_train, categorical_features
)

preprocessor = ColumnTransformer(
transformers=[
("numeric", numeric_transformer, numeric_features),
("categorical_low", categorical_transformer_low, categorical_low),
("categorical_high", categorical_transformer_high, categorical_high),
]
)
preprocessor = ColumnTransformer(
transformers=[
("numeric", numeric_transformer, numeric_features),
("categorical_low", categorical_transformer_low, categorical_low),
("categorical_high", categorical_transformer_high, categorical_high),
]
)
elif self.transformers is False or self.transformers is None:
preprocessor = ColumnTransformer(transformers=[], remainder="passthrough")
elif isinstance(self.transformers, ColumnTransformer):
preprocessor = self.transformers

if self.classifiers == "all":
self.classifiers = CLASSIFIERS
Expand All @@ -289,77 +281,113 @@ def fit(self, X_train, X_test, y_train, y_test):
start = time.time()
try:
if "random_state" in model().get_params().keys():
pipe = Pipeline(
steps=[
("preprocessor", preprocessor),
("classifier", model(random_state=self.random_state)),
]
)
if "probability" not in model().get_params().keys():
pipe = Pipeline(
steps=[
("preprocessor", preprocessor),
("classifier", model(random_state=self.random_state)),
]
)
else:
pipe = Pipeline(
steps=[
("preprocessor", preprocessor),
("classifier", model(random_state=self.random_state, probability=True)),
]
)
else:
pipe = Pipeline(
steps=[("preprocessor", preprocessor), ("classifier", model())]
)
if "probability" not in model().get_params().keys():
pipe = Pipeline(
steps=[("preprocessor", preprocessor), ("classifier", model())]
)
else:
pipe = Pipeline(
steps=[("preprocessor", preprocessor), ("classifier", model(probability=True))]
)

pipe.fit(X_train, y_train)
self.models[name] = pipe
y_pred = pipe.predict(X_test)

try:
y_score = pipe.predict_proba(X_test)[:, 1]
except:
try:
y_score = pipe.decision_function(X_test)
except:
centroids = pipe.named_steps['classifier'].centroids_
distances = euclidean_distances(X_test, centroids)
y_score = -distances[:, 1]

accuracy = accuracy_score(y_test, y_pred, normalize=True)
b_accuracy = balanced_accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average="weighted")
precision = precision_score(y_test, y_pred, average="weighted")
recall = recall_score(y_test, y_pred, average="weighted")
try:
roc_auc = roc_auc_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_score)
except Exception as exception:
roc_auc = None
if self.ignore_warnings is False:
print("ROC AUC couldn't be calculated for " + name)
print(exception)

try:
pr_score = average_precision_score(y_test, y_score)
except Exception as exception:
pr_score = None
if self.ignore_warnings is False:
print("Precision-Recall AUC couldn't be calculated for " + name)
print(exception)

names.append(name)
Accuracy.append(accuracy)
B_Accuracy.append(b_accuracy)
ROC_AUC.append(roc_auc)
PR_SCORE.append(pr_score)
F1.append(f1)
PRECISION.append(precision)
RECALL.append(recall)
TIME.append(time.time() - start)

if self.custom_metric is not None:
custom_metric = self.custom_metric(y_test, y_pred)
CUSTOM_METRIC.append(custom_metric)

if self.verbose > 0:
output = {
"Model": name,
"Accuracy": accuracy,
"Balanced Accuracy": b_accuracy,
"ROC AUC": roc_auc,
"Precision-Recall AUC": pr_score,
"F1 Score": f1,
"Precision": precision,
"Recall": recall,
"Time taken": time.time() - start,
}
if self.custom_metric is not None:
print(
{
"Model": name,
"Accuracy": accuracy,
"Balanced Accuracy": b_accuracy,
"ROC AUC": roc_auc,
"F1 Score": f1,
self.custom_metric.__name__: custom_metric,
"Time taken": time.time() - start,
}
)
else:
print(
{
"Model": name,
"Accuracy": accuracy,
"Balanced Accuracy": b_accuracy,
"ROC AUC": roc_auc,
"F1 Score": f1,
"Time taken": time.time() - start,
}
)
output[self.custom_metric.__name__] = custom_metric
print(output)

if self.predictions:
predictions[name] = y_pred
except Exception as exception:
if self.ignore_warnings is False:
print(name + " model failed to execute")
print(exception)

if self.custom_metric is None:
scores = pd.DataFrame(
{
"Model": names,
"Accuracy": Accuracy,
"Balanced Accuracy": B_Accuracy,
"ROC AUC": ROC_AUC,
"Precision-Recall AUC": PR_SCORE,
"F1 Score": F1,
"Precision": PRECISION,
"Recall": RECALL,
"Time Taken": TIME,
}
)
Expand All @@ -370,19 +398,22 @@ def fit(self, X_train, X_test, y_train, y_test):
"Accuracy": Accuracy,
"Balanced Accuracy": B_Accuracy,
"ROC AUC": ROC_AUC,
"Precision-Recall AUC": PR_SCORE,
"F1 Score": F1,
"Precision": PRECISION,
"Recall": RECALL,
self.custom_metric.__name__: CUSTOM_METRIC,
"Time Taken": TIME,
}
)
scores = scores.sort_values(by="Balanced Accuracy", ascending=False).set_index(
"Model"
)

scores = scores.sort_values(by="ROC AUC", ascending=False).set_index("Model")

if self.predictions:
predictions_df = pd.DataFrame.from_dict(predictions)
return scores, predictions_df if self.predictions is True else scores
return scores, predictions_df

return scores, None
def provide_models(self, X_train, X_test, y_train, y_test):
"""
This function returns all the model objects trained in fit function.
Expand Down
2 changes: 1 addition & 1 deletion lazypredict/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@

__author__ = """Shankar Rao Pandala"""
__email__ = "[email protected]"
__version__ = '0.2.12'
__version__ = '0.2.13'
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,6 @@
test_suite="tests",
tests_require=test_requirements,
url="https://github.com/shankarpandala/lazypredict",
version='0.2.12',
version='0.2.13',
zip_safe=False,
)