Implement drop proba col (#675)

rchaves33 · Ryan Chaves · rasbt · web-flow · commit 6b457a1317a2 · 2020-04-03T16:56:14.000-05:00
* Replace drop_last_proba with drop_proba col (#590) * Revert "Replace drop_last_proba with drop_proba col (#590)" This reverts commit a19ed56. * Fix typo in unit test for drop_proba_col (#590) * fix rebase issue * add check for drop_col param Co-authored-by: Ryan Chaves <ryan.chaves@ing.com> Co-authored-by: rasbt <mail@sebastianraschka.com>
diff --git a/docs/sources/user_guide/classifier/StackingCVClassifier.ipynb b/docs/sources/user_guide/classifier/StackingCVClassifier.ipynb
@@ -700,7 +700,7 @@
      "text": [
       "## StackingCVClassifier\n",
       "\n",
-      "*StackingCVClassifier(classifiers, meta_classifier, use_probas=False, drop_last_proba=False, cv=2, shuffle=True, random_state=None, stratify=True, verbose=0, use_features_in_secondary=False, store_train_meta_features=False, use_clones=True, n_jobs=None, pre_dispatch='2*n_jobs')*\n",
+      "*StackingCVClassifier(classifiers, meta_classifier, use_probas=False, drop_proba_col=None, cv=2, shuffle=True, random_state=None, stratify=True, verbose=0, use_features_in_secondary=False, store_train_meta_features=False, use_clones=True, n_jobs=None, pre_dispatch='2*n_jobs')*\n",
       "\n",
       "A 'Stacking Cross-Validation' classifier for scikit-learn estimators.\n",
       "\n",
@@ -725,13 +725,16 @@
       "    If True, trains meta-classifier based on predicted probabilities\n",
       "    instead of class labels.\n",
       "\n",
-      "- `drop_last_proba` : bool (default: False)\n",
+      "- `drop_proba_col` : string (default: None)\n",
       "\n",
-      "    Drops the last \"probability\" column in the feature set since if `True`,\n",
-      "    because it is redundant:\n",
+      "    Drops extra \"probability\" column in the feature set, because it is\n",
+      "    redundant:\n",
       "    p(y_c) = 1 - p(y_1) + p(y_2) + ... + p(y_{c-1}).\n",
-      "    This can be useful for meta-classifiers that are sensitive to\n",
-      "    perfectly collinear features. Only relevant if `use_probas=True.\n",
+      "    This can be useful for meta-classifiers that are sensitive to perfectly\n",
+      "    collinear features.\n",
+      "    If `last`, drops last probability column.\n",
+      "    If `first`, drops first probability column.\n",
+      "    Only relevant if `use_probas=True`.\n",
       "\n",
       "- `cv` : int, cross-validation generator or an iterable, optional (default: 2)\n",
       "\n",
@@ -1094,4 +1097,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
diff --git a/docs/sources/user_guide/classifier/StackingClassifier.ipynb b/docs/sources/user_guide/classifier/StackingClassifier.ipynb
@@ -696,7 +696,7 @@
      "text": [
       "## StackingClassifier\n",
       "\n",
-      "*StackingClassifier(classifiers, meta_classifier, use_probas=False, drop_last_proba=False, average_probas=False, verbose=0, use_features_in_secondary=False, store_train_meta_features=False, use_clones=True)*\n",
+      "*StackingClassifier(classifiers, meta_classifier, use_probas=False, drop_proba_col=None, average_probas=False, verbose=0, use_features_in_secondary=False, store_train_meta_features=False, use_clones=True)*\n",
       "\n",
       "A Stacking classifier for scikit-learn estimators for classification.\n",
       "\n",
@@ -720,13 +720,16 @@
       "    If True, trains meta-classifier based on predicted probabilities\n",
       "    instead of class labels.\n",
       "\n",
-      "- `drop_last_proba` : bool (default: False)\n",
+      "- `drop_proba_col` : string (default: None)\n",
       "\n",
-      "    Drops the last \"probability\" column in the feature set since if `True`,\n",
-      "    because it is redundant:\n",
+      "    Drops extra \"probability\" column in the feature set, because it is\n",
+      "    redundant:\n",
       "    p(y_c) = 1 - p(y_1) + p(y_2) + ... + p(y_{c-1}).\n",
-      "    This can be useful for meta-classifiers that are sensitive to\n",
-      "    perfectly collinear features. Only relevant if `use_probas=True`.\n",
+      "    This can be useful for meta-classifiers that are sensitive to perfectly\n",
+      "    collinear features.\n",
+      "    If `last`, drops last probability column.\n",
+      "    If `first`, drops first probability column.\n",
+      "    Only relevant if `use_probas=True`.\n",
       "\n",
       "- `average_probas` : bool (default: False)\n",
       "\n",
@@ -1032,4 +1035,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
diff --git a/mlxtend/classifier/stacking_classification.py b/mlxtend/classifier/stacking_classification.py
@@ -37,12 +37,15 @@ class StackingClassifier(_BaseXComposition, _BaseStackingClassifier,
     use_probas : bool (default: False)
         If True, trains meta-classifier based on predicted probabilities
         instead of class labels.
-    drop_last_proba : bool (default: False)
-        Drops the last "probability" column in the feature set since if `True`,
-        because it is redundant:
+    drop_proba_col : string (default: None)
+        Drops extra "probability" column in the feature set, because it is
+        redundant:
         p(y_c) = 1 - p(y_1) + p(y_2) + ... + p(y_{c-1}).
-        This can be useful for meta-classifiers that are sensitive to
-        perfectly collinear features. Only relevant if `use_probas=True`.
+        This can be useful for meta-classifiers that are sensitive to perfectly
+        collinear features.
+        If 'last', drops last probability column.
+        If 'first', drops first probability column.
+        Only relevant if `use_probas=True`.
     average_probas : bool (default: False)
         Averages the probabilities as meta features if `True`.
         Only relevant if `use_probas=True`.
@@ -93,7 +96,7 @@ class StackingClassifier(_BaseXComposition, _BaseStackingClassifier,
     """
 
     def __init__(self, classifiers, meta_classifier,
-                 use_probas=False, drop_last_proba=False,
+                 use_probas=False, drop_proba_col=None,
                  average_probas=False, verbose=0,
                  use_features_in_secondary=False,
                  store_train_meta_features=False,
@@ -102,7 +105,13 @@ def __init__(self, classifiers, meta_classifier,
         self.classifiers = classifiers
         self.meta_classifier = meta_classifier
         self.use_probas = use_probas
-        self.drop_last_proba = drop_last_proba
+
+        allowed = {None, 'first', 'last'}
+        if drop_proba_col not in allowed:
+            raise ValueError('`drop_proba_col` must be in %s. Got %s'
+                             % (allowed, drop_proba_col))
+        self.drop_proba_col = drop_proba_col
+
         self.average_probas = average_probas
         self.verbose = verbose
         self.use_features_in_secondary = use_features_in_secondary
@@ -214,9 +223,12 @@ def predict_meta_features(self, X):
         """
         check_is_fitted(self, 'clfs_')
         if self.use_probas:
-            if self.drop_last_proba:
+            if self.drop_proba_col == 'last':
                 probas = np.asarray([clf.predict_proba(X)[:, :-1]
                                      for clf in self.clfs_])
+            elif self.drop_proba_col == 'first':
+                probas = np.asarray([clf.predict_proba(X)[:, 1:]
+                                     for clf in self.clfs_])
             else:
                 probas = np.asarray([clf.predict_proba(X)
                                      for clf in self.clfs_])
diff --git a/mlxtend/classifier/stacking_cv_classification.py b/mlxtend/classifier/stacking_cv_classification.py
@@ -42,12 +42,15 @@ class StackingCVClassifier(_BaseXComposition, _BaseStackingClassifier,
     use_probas : bool (default: False)
         If True, trains meta-classifier based on predicted probabilities
         instead of class labels.
-    drop_last_proba : bool (default: False)
-        Drops the last "probability" column in the feature set since if `True`,
-        because it is redundant:
+    drop_proba_col : string (default: None)
+        Drops extra "probability" column in the feature set, because it is
+        redundant:
         p(y_c) = 1 - p(y_1) + p(y_2) + ... + p(y_{c-1}).
-        This can be useful for meta-classifiers that are sensitive to
-        perfectly collinear features. Only relevant if `use_probas=True.
+        This can be useful for meta-classifiers that are sensitive to perfectly
+        collinear features.
+        If 'last', drops last probability column.
+        If 'first', drops first probability column.
+        Only relevant if `use_probas=True`.
     cv : int, cross-validation generator or an iterable, optional (default: 2)
         Determines the cross-validation splitting strategy.
         Possible inputs for cv are:
@@ -137,7 +140,7 @@ class StackingCVClassifier(_BaseXComposition, _BaseStackingClassifier,
 
     """
     def __init__(self, classifiers, meta_classifier,
-                 use_probas=False, drop_last_proba=False,
+                 use_probas=False, drop_proba_col=None,
                  cv=2, shuffle=True,
                  random_state=None, stratify=True, verbose=0,
                  use_features_in_secondary=False,
@@ -148,7 +151,13 @@ def __init__(self, classifiers, meta_classifier,
         self.classifiers = classifiers
         self.meta_classifier = meta_classifier
         self.use_probas = use_probas
-        self.drop_last_proba = drop_last_proba
+
+        allowed = {None, 'first', 'last'}
+        if drop_proba_col not in allowed:
+            raise ValueError('`drop_proba_col` must be in %s. Got %s'
+                             % (allowed, drop_proba_col))
+
+        self.drop_proba_col = drop_proba_col
         self.cv = cv
         self.shuffle = shuffle
         self.random_state = random_state
@@ -243,8 +252,10 @@ def fit(self, X, y, groups=None, sample_weight=None):
 
             if not self.use_probas:
                 prediction = prediction[:, np.newaxis]
-            elif self.drop_last_proba:
+            elif self.drop_proba_col == 'last':
                 prediction = prediction[:, :-1]
+            elif self.drop_proba_col == 'first':
+                prediction = prediction[:, 1:]
 
             if meta_features is None:
                 meta_features = prediction
@@ -315,8 +326,10 @@ def predict_meta_features(self, X):
             if not self.use_probas:
                 prediction = model.predict(X)[:, np.newaxis]
             else:
-                if self.drop_last_proba:
+                if self.drop_proba_col == 'last':
                     prediction = model.predict_proba(X)[:, :-1]
+                elif self.drop_proba_col == 'first':
+                    prediction = model.predict_proba(X)[:, 1:]
                 else:
                     prediction = model.predict_proba(X)
 
diff --git a/mlxtend/classifier/tests/test_stacking_classifier.py b/mlxtend/classifier/tests/test_stacking_classifier.py
@@ -198,13 +198,13 @@ def test_StackingClassifier_avg_vs_concat():
     np.array_equal(r2[0][:3], r2[0][3:])
 
 
-def test_StackingClassifier_drop_last_proba():
+def test_StackingClassifier_drop_proba_col():
     np.random.seed(123)
     lr1 = LogisticRegression(solver='liblinear',
                              multi_class='ovr')
     sclf1 = StackingClassifier(classifiers=[lr1, lr1],
                                use_probas=True,
-                               drop_last_proba=False,
+                               drop_proba_col=None,
                                meta_classifier=lr1)
 
     sclf1.fit(X, y)
@@ -213,16 +213,25 @@ def test_StackingClassifier_drop_last_proba():
 
     sclf2 = StackingClassifier(classifiers=[lr1, lr1],
                                use_probas=True,
-                               drop_last_proba=True,
+                               drop_proba_col='last',
                                meta_classifier=lr1)
 
     sclf2.fit(X, y)
     r2 = sclf2.predict_meta_features(X[:2])
     assert r2.shape == (2, 4), r2.shape
 
+    sclf4 = StackingClassifier(classifiers=[lr1, lr1],
+                               use_probas=True,
+                               drop_proba_col='first',
+                               meta_classifier=lr1)
+
+    sclf4.fit(X, y)
+    r4 = sclf4.predict_meta_features(X[:2])
+    assert r4.shape == (2, 4), r4.shape
+
     sclf3 = StackingClassifier(classifiers=[lr1, lr1],
                                use_probas=True,
-                               drop_last_proba=True,
+                               drop_proba_col='last',
                                meta_classifier=lr1)
 
     sclf3.fit(X[0:100], y[0:100])  # only 2 classes
@@ -440,7 +449,7 @@ def test_get_params():
     got = sorted(list({s.split('__')[0] for s in sclf.get_params().keys()}))
     expect = ['average_probas',
               'classifiers',
-              'drop_last_proba',
+              'drop_proba_col',
               'gaussiannb',
               'kneighborsclassifier',
               'meta_classifier',
@@ -564,3 +573,16 @@ def test_decision_function():
         assert scores_mean == 0.95, scores_mean
     else:
         assert scores_mean == 0.94, scores_mean
+
+
+def test_drop_col_unsupported():
+    np.random.seed(123)
+    meta = LogisticRegression()
+    clf1 = RandomForestClassifier(n_estimators=10)
+    clf2 = GaussianNB()
+    clf3 = KNeighborsClassifier()
+
+    with pytest.raises(ValueError):
+        StackingClassifier(classifiers=[clf1, clf2, clf3],
+                           meta_classifier=meta,
+                           drop_proba_col='invalid value')
diff --git a/mlxtend/classifier/tests/test_stacking_cv_classifier.py b/mlxtend/classifier/tests/test_stacking_cv_classifier.py
@@ -344,7 +344,7 @@ def test_get_params():
 
     expect = ['classifiers',
               'cv',
-              'drop_last_proba',
+              'drop_proba_col',
               'gaussiannb',
               'kneighborsclassifier',
               'meta_classifier',
@@ -502,13 +502,13 @@ def test_sparse_inputs_with_features_in_secondary():
         round(stclf.score(X_train, y_train), 2)
 
 
-def test_StackingClassifier_drop_last_proba():
+def test_StackingClassifier_drop_proba_col():
     np.random.seed(123)
     lr1 = LogisticRegression(solver='liblinear',
                              multi_class='ovr')
     sclf1 = StackingCVClassifier(classifiers=[lr1, lr1],
                                  use_probas=True,
-                                 drop_last_proba=False,
+                                 drop_proba_col=None,
                                  meta_classifier=lr1)
 
     sclf1.fit(X_iris, y_iris)
@@ -517,16 +517,25 @@ def test_StackingClassifier_drop_last_proba():
 
     sclf2 = StackingCVClassifier(classifiers=[lr1, lr1],
                                  use_probas=True,
-                                 drop_last_proba=True,
+                                 drop_proba_col='last',
                                  meta_classifier=lr1)
 
     sclf2.fit(X_iris, y_iris)
     r2 = sclf2.predict_meta_features(X_iris[:2])
     assert r2.shape == (2, 4), r2.shape
 
+    sclf4 = StackingCVClassifier(classifiers=[lr1, lr1],
+                                 use_probas=True,
+                                 drop_proba_col='first',
+                                 meta_classifier=lr1)
+
+    sclf4.fit(X_iris, y_iris)
+    r4 = sclf4.predict_meta_features(X_iris[:2])
+    assert r4.shape == (2, 4), r4.shape
+
     sclf3 = StackingCVClassifier(classifiers=[lr1, lr1],
                                  use_probas=True,
-                                 drop_last_proba=True,
+                                 drop_proba_col='last',
                                  meta_classifier=lr1)
 
     sclf3.fit(X_iris[0:100], y_iris[0:100])  # only 2 classes
@@ -618,3 +627,16 @@ def test_decision_function():
         assert scores_mean == 0.96, scores_mean
     else:
         assert scores_mean == 0.90, scores_mean
+
+
+def test_drop_col_unsupported():
+    np.random.seed(123)
+    meta = LogisticRegression()
+    clf1 = RandomForestClassifier(n_estimators=10)
+    clf2 = GaussianNB()
+    clf3 = KNeighborsClassifier()
+
+    with pytest.raises(ValueError):
+        StackingCVClassifier(classifiers=[clf1, clf2, clf3],
+                             meta_classifier=meta,
+                             drop_proba_col='invalid value')