shankarpandala · qomhmd · Nov 2, 2024 · Nov 2, 2024 · Nov 2, 2024 · Nov 2, 2024
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -1,7 +1,9 @@
 name: "Pull Request Docs Check"
 on: 
   pull_request:
-
+  push:
+    branches:
+      - dev
 jobs:
   docs:
     runs-on: ubuntu-latest
@@ -17,7 +19,7 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           pip install sphinx
-          pip install -r docs/requirements.txt  # Ensure you have a requirements file for Sphinx
+          pip install -r requirements.txt  # Ensure you have a requirements file for Sphinx
 
       - name: Build documentation
         run: |

diff --git a/.gitignore b/.gitignore
@@ -3,6 +3,8 @@ __pycache__/
 *.py[cod]
 *$py.class
 
+tests/mq/
+
 # C extensions
 *.so
 

diff --git a/lazypredict/Supervised.py b/lazypredict/Supervised.py
@@ -5,7 +5,7 @@
 
 import numpy as np
 import pandas as pd
-from tqdm import tqdm
+from tqdm.autonotebook import tqdm
 import datetime
 import time
 from sklearn.pipeline import Pipeline
@@ -18,10 +18,14 @@
 from sklearn.metrics import (
     accuracy_score,
     balanced_accuracy_score,
+    euclidean_distances,
+    precision_score,
+    recall_score,
     roc_auc_score,
     f1_score,
     r2_score,
     mean_squared_error,
+    average_precision_score,
 )
 import warnings
 import xgboost
@@ -210,6 +214,7 @@ def __init__(
         predictions=False,
         random_state=42,
         classifiers="all",
+        transformers=True,
     ):
         self.verbose = verbose
         self.ignore_warnings = ignore_warnings
@@ -218,34 +223,16 @@ def __init__(
         self.models = {}
         self.random_state = random_state
         self.classifiers = classifiers
+        self.transformers = transformers
 
     def fit(self, X_train, X_test, y_train, y_test):
-        """Fit Classification algorithms to X_train and y_train, predict and score on X_test, y_test.
-        Parameters
-        ----------
-        X_train : array-like,
-            Training vectors, where rows is the number of samples
-            and columns is the number of features.
-        X_test : array-like,
-            Testing vectors, where rows is the number of samples
-            and columns is the number of features.
-        y_train : array-like,
-            Training vectors, where rows is the number of samples
-            and columns is the number of features.
-        y_test : array-like,
-            Testing vectors, where rows is the number of samples
-            and columns is the number of features.
-        Returns
-        -------
-        scores : Pandas DataFrame
-            Returns metrics of all the models in a Pandas DataFrame.
-        predictions : Pandas DataFrame
-            Returns predictions of all the models in a Pandas DataFrame.
-        """
         Accuracy = []
         B_Accuracy = []
         ROC_AUC = []
+        PR_SCORE = []
         F1 = []
+        PRECISION = []
+        RECALL = []
         names = []
         TIME = []
         predictions = {}
@@ -257,20 +244,25 @@ def fit(self, X_train, X_test, y_train, y_test):
             X_train = pd.DataFrame(X_train)
             X_test = pd.DataFrame(X_test)
 
-        numeric_features = X_train.select_dtypes(include=[np.number]).columns
-        categorical_features = X_train.select_dtypes(include=["object"]).columns
+        if self.transformers is True:
+            numeric_features = X_train.select_dtypes(include=[np.number]).columns
+            categorical_features = X_train.select_dtypes(include=["object"]).columns
 
-        categorical_low, categorical_high = get_card_split(
-            X_train, categorical_features
-        )
+            categorical_low, categorical_high = get_card_split(
+                X_train, categorical_features
+            )
 
-        preprocessor = ColumnTransformer(
-            transformers=[
-                ("numeric", numeric_transformer, numeric_features),
-                ("categorical_low", categorical_transformer_low, categorical_low),
-                ("categorical_high", categorical_transformer_high, categorical_high),
-            ]
-        )
+            preprocessor = ColumnTransformer(
+                transformers=[
+                    ("numeric", numeric_transformer, numeric_features),
+                    ("categorical_low", categorical_transformer_low, categorical_low),
+                    ("categorical_high", categorical_transformer_high, categorical_high),
+                ]
+            )
+        elif self.transformers is False or self.transformers is None:
+            preprocessor = ColumnTransformer(transformers=[], remainder="passthrough")
+        elif isinstance(self.transformers, ColumnTransformer):
+            preprocessor = self.transformers
 
         if self.classifiers == "all":
             self.classifiers = CLASSIFIERS
@@ -289,77 +281,113 @@ def fit(self, X_train, X_test, y_train, y_test):
             start = time.time()
             try:
                 if "random_state" in model().get_params().keys():
-                    pipe = Pipeline(
-                        steps=[
-                            ("preprocessor", preprocessor),
-                            ("classifier", model(random_state=self.random_state)),
-                        ]
-                    )
+                    if "probability" not in model().get_params().keys():
+                        pipe = Pipeline(
+                            steps=[
+                                ("preprocessor", preprocessor),
+                                ("classifier", model(random_state=self.random_state)),
+                            ]
+                        )
+                    else:
+                        pipe = Pipeline(
+                            steps=[
+                                ("preprocessor", preprocessor),
+                                ("classifier", model(random_state=self.random_state, probability=True)),
+                            ]
+                        )
                 else:
-                    pipe = Pipeline(
-                        steps=[("preprocessor", preprocessor), ("classifier", model())]
-                    )
+                    if "probability" not in model().get_params().keys():
+                        pipe = Pipeline(
+                            steps=[("preprocessor", preprocessor), ("classifier", model())]
+                        )
+                    else:
+                        pipe = Pipeline(
+                            steps=[("preprocessor", preprocessor), ("classifier", model(probability=True))]
+                        )
 
                 pipe.fit(X_train, y_train)
                 self.models[name] = pipe
                 y_pred = pipe.predict(X_test)
+
+                try:
+                    y_score = pipe.predict_proba(X_test)[:, 1]
+                except:
+                    try:
+                        y_score = pipe.decision_function(X_test)
+                    except:
+                        centroids = pipe.named_steps['classifier'].centroids_
+                        distances = euclidean_distances(X_test, centroids)
+                        y_score = -distances[:, 1]
+
                 accuracy = accuracy_score(y_test, y_pred, normalize=True)
                 b_accuracy = balanced_accuracy_score(y_test, y_pred)
                 f1 = f1_score(y_test, y_pred, average="weighted")
+                precision = precision_score(y_test, y_pred, average="weighted")
+                recall = recall_score(y_test, y_pred, average="weighted")
                 try:
-                    roc_auc = roc_auc_score(y_test, y_pred)
+                    roc_auc = roc_auc_score(y_test, y_score)
                 except Exception as exception:
                     roc_auc = None
                     if self.ignore_warnings is False:
                         print("ROC AUC couldn't be calculated for " + name)
                         print(exception)
+
+                try:
+                    pr_score = average_precision_score(y_test, y_score)
+                except Exception as exception:
+                    pr_score = None
+                    if self.ignore_warnings is False:
+                        print("Precision-Recall AUC couldn't be calculated for " + name)
+                        print(exception)
+
                 names.append(name)
                 Accuracy.append(accuracy)
                 B_Accuracy.append(b_accuracy)
                 ROC_AUC.append(roc_auc)
+                PR_SCORE.append(pr_score)
                 F1.append(f1)
+                PRECISION.append(precision)
+                RECALL.append(recall)
                 TIME.append(time.time() - start)
+
                 if self.custom_metric is not None:
                     custom_metric = self.custom_metric(y_test, y_pred)
                     CUSTOM_METRIC.append(custom_metric)
+
                 if self.verbose > 0:
+                    output = {
+                        "Model": name,
+                        "Accuracy": accuracy,
+                        "Balanced Accuracy": b_accuracy,
+                        "ROC AUC": roc_auc,
+                        "Precision-Recall AUC": pr_score,
+                        "F1 Score": f1,
+                        "Precision": precision,
+                        "Recall": recall,
+                        "Time taken": time.time() - start,
+                    }
                     if self.custom_metric is not None:
-                        print(
-                            {
-                                "Model": name,
-                                "Accuracy": accuracy,
-                                "Balanced Accuracy": b_accuracy,
-                                "ROC AUC": roc_auc,
-                                "F1 Score": f1,
-                                self.custom_metric.__name__: custom_metric,
-                                "Time taken": time.time() - start,
-                            }
-                        )
-                    else:
-                        print(
-                            {
-                                "Model": name,
-                                "Accuracy": accuracy,
-                                "Balanced Accuracy": b_accuracy,
-                                "ROC AUC": roc_auc,
-                                "F1 Score": f1,
-                                "Time taken": time.time() - start,
-                            }
-                        )
+                        output[self.custom_metric.__name__] = custom_metric
+                    print(output)
+
                 if self.predictions:
                     predictions[name] = y_pred
             except Exception as exception:
                 if self.ignore_warnings is False:
                     print(name + " model failed to execute")
                     print(exception)
+
         if self.custom_metric is None:
             scores = pd.DataFrame(
                 {
                     "Model": names,
                     "Accuracy": Accuracy,
                     "Balanced Accuracy": B_Accuracy,
                     "ROC AUC": ROC_AUC,
+                    "Precision-Recall AUC": PR_SCORE,
                     "F1 Score": F1,
+                    "Precision": PRECISION,
+                    "Recall": RECALL,
                     "Time Taken": TIME,
                 }
             )
@@ -370,19 +398,22 @@ def fit(self, X_train, X_test, y_train, y_test):
                     "Accuracy": Accuracy,
                     "Balanced Accuracy": B_Accuracy,
                     "ROC AUC": ROC_AUC,
+                    "Precision-Recall AUC": PR_SCORE,
                     "F1 Score": F1,
+                    "Precision": PRECISION,
+                    "Recall": RECALL,
                     self.custom_metric.__name__: CUSTOM_METRIC,
                     "Time Taken": TIME,
                 }
             )
-        scores = scores.sort_values(by="Balanced Accuracy", ascending=False).set_index(
-            "Model"
-        )
+
+        scores = scores.sort_values(by="ROC AUC", ascending=False).set_index("Model")
 
         if self.predictions:
             predictions_df = pd.DataFrame.from_dict(predictions)
-        return scores, predictions_df if self.predictions is True else scores
+            return scores, predictions_df
 
+        return scores, None
     def provide_models(self, X_train, X_test, y_train, y_test):
         """
         This function returns all the model objects trained in fit function.

diff --git a/lazypredict/__init__.py b/lazypredict/__init__.py
@@ -4,4 +4,4 @@
 
 __author__ = """Shankar Rao Pandala"""
 __email__ = "[email protected]"
-__version__ = '0.2.12'
+__version__ = '0.2.13'
diff --git a/setup.py b/setup.py
@@ -51,6 +51,6 @@
     test_suite="tests",
     tests_require=test_requirements,
     url="https://github.com/shankarpandala/lazypredict",
-    version='0.2.12',
+    version='0.2.13',
     zip_safe=False,
 )