Merge pull request #313 from brendalf/feat/ajusted_rsquared

shankarpandala · web-flow · commit 312e348313ba · 2021-01-13T16:50:20.000+05:30
Implement adjusted r-square metric for LazyRegressor
diff --git a/lazypredict/Supervised.py b/lazypredict/Supervised.py
@@ -148,7 +148,6 @@ def get_card_split(df, cols, n=11):
 
 # Helper class for performing classification
 
-
 class LazyClassifier:
     """
     This module helps in fitting to all the classification algorithms that are available in Scikit-learn
@@ -405,10 +404,14 @@ def provide_models(self, X_train, X_test, y_train, y_test):
         """
         if len(self.models.keys()) == 0:
             self.fit(X_train,X_test,y_train,y_test)
-        
+
         return self.models
 
 
+def adjusted_rsquared(r2, n, p):
+    return 1 - (1-r2) * ((n-1) / (n-p-1))
+
+
 # Helper class for performing classification
 
 
@@ -522,13 +525,14 @@ def fit(self, X_train, X_test, y_train, y_test):
             Returns predictions of all the models in a Pandas DataFrame.
         """
         R2 = []
+        ADJR2 = []
         RMSE = []
         # WIN = []
         names = []
         TIME = []
         predictions = {}
 
-        if self.custom_metric is not None:
+        if self.custom_metric:
             CUSTOM_METRIC = []
 
         if isinstance(X_train, np.ndarray):
@@ -566,61 +570,58 @@ def fit(self, X_train, X_test, y_train, y_test):
                     pipe = Pipeline(
                         steps=[("preprocessor", preprocessor), ("regressor", model())]
                     )
+
                 pipe.fit(X_train, y_train)
                 self.models[name] = pipe
                 y_pred = pipe.predict(X_test)
+
                 r_squared = r2_score(y_test, y_pred)
+                adj_rsquared = adjusted_rsquared(r_squared, X_test.shape[0], X_test.shape[1])
                 rmse = np.sqrt(mean_squared_error(y_test, y_pred))
+
                 names.append(name)
                 R2.append(r_squared)
+                ADJR2.append(adj_rsquared)
                 RMSE.append(rmse)
                 TIME.append(time.time() - start)
-                if self.custom_metric is not None:
+
+                if self.custom_metric:
                     custom_metric = self.custom_metric(y_test, y_pred)
                     CUSTOM_METRIC.append(custom_metric)
 
                 if self.verbose > 0:
-                    if self.custom_metric is not None:
-                        print(
-                            {
-                                "Model": name,
-                                "R-Squared": r_squared,
-                                "RMSE": rmse,
-                                self.custom_metric.__name__: custom_metric,
-                                "Time taken": time.time() - start,
-                            }
-                        )
-                    else:
-                        print(
-                            {
-                                "Model": name,
-                                "R-Squared": r_squared,
-                                "RMSE": rmse,
-                                "Time taken": time.time() - start,
-                            }
-                        )
+                    scores_verbose = {
+                        "Model": name,
+                        "R-Squared": r_squared,
+                        "Adjusted R-Squared": adj_rsquared,
+                        "RMSE": rmse,
+                        "Time taken": time.time() - start,
+                    }
+
+                    if self.custom_metric:
+                        scores_verbose[self.custom_metric.__name__] = custom_metric
+
+                    print(scores_verbose)
                 if self.predictions:
                     predictions[name] = y_pred
             except Exception as exception:
                 if self.ignore_warnings is False:
                     print(name + " model failed to execute")
                     print(exception)
 
-        if self.custom_metric is None:
-            scores = pd.DataFrame(
-                {"Model": names, "R-Squared": R2, "RMSE": RMSE, "Time Taken": TIME}
-            )
-        else:
-            scores = pd.DataFrame(
-                {
-                    "Model": names,
-                    "R-Squared": R2,
-                    "RMSE": RMSE,
-                    self.custom_metric.__name__: CUSTOM_METRIC,
-                    "Time Taken": TIME,
-                }
-            )
-        scores = scores.sort_values(by="R-Squared", ascending=False).set_index("Model")
+        scores = {
+            "Model": names,
+            "Adjusted R-Squared": ADJR2,
+            "R-Squared": R2,
+            "RMSE": RMSE,
+            "Time Taken": TIME
+        }
+
+        if self.custom_metric:
+            scores[self.custom_metric.__name__] = CUSTOM_METRIC
+
+        scores = pd.DataFrame(scores)
+        scores = scores.sort_values(by="Adjusted R-Squared", ascending=False).set_index("Model")
 
         if self.predictions:
             predictions_df = pd.DataFrame.from_dict(predictions)