Bugfix: attribute naming consistency, drop_cols (#219)

VHeusinkveld · web-flow · commit e85877af82c3 · 2020-10-01T13:33:53.000-07:00
* Bugfix: attribute naming consistency, drop_cols

Within the scikit-learn ecosystem, it is standard practice to name the attributes like the init arguments. In this way the get_parms method can get the attributes. From sklearn version 24 onwards this is required behavior.

* Bugfix: drop_cols incorrectly creates new list object

* drop_cols get state default from None to List

* Update changelog and contribution list

* Formatting

* Bump version to 2.0.2
diff --git a/README.rst b/README.rst
@@ -445,6 +445,12 @@ can be easily serialized.
 
 Changelog
 ---------
+2.0.2 (2020-10-01)
+******************
+
+* Fix `DataFrameMapper` drop_cols attribute naming consistency with scikit-learn and initialization.
+
+
 2.0.1 (2020-09-07)
 ******************
 
@@ -585,3 +591,4 @@ Other contributors:
 * Vitaley Zaretskey (@vzaretsk)
 * Zac Stewart (@zacstewart)
 * Parul Singh (@paro1234)
+* Vincent Heusinkveld (@VHeusinkveld)
diff --git a/sklearn_pandas/__init__.py b/sklearn_pandas/__init__.py
@@ -1,5 +1,5 @@
-__version__ = '2.0.1'
+__version__ = '2.0.2'
 
 from .dataframe_mapper import DataFrameMapper  # NOQA
 from .features_generator import gen_features  # NOQA
-from .transformers import NumericalTransformer # NOQA
+from .transformers import NumericalTransformer # NOQA
diff --git a/sklearn_pandas/dataframe_mapper.py b/sklearn_pandas/dataframe_mapper.py
@@ -105,7 +105,7 @@ def __init__(self, features, default=False, sparse=False, df_out=False,
         self.sparse = sparse
         self.df_out = df_out
         self.input_df = input_df
-        self.drop_columns = drop_cols or []
+        self.drop_cols = [] if drop_cols is None else drop_cols
         self.transformed_names_ = []
 
         if (df_out and (sparse or default)):
@@ -147,7 +147,7 @@ def _unselected_columns(self, X):
         X_columns = list(X.columns)
         return [column for column in X_columns if
                 column not in self._selected_columns
-                and column not in self.drop_columns]
+                and column not in self.drop_cols]
 
     def __setstate__(self, state):
         # compatibility for older versions of sklearn-pandas
@@ -156,7 +156,7 @@ def __setstate__(self, state):
         self.default = state.get('default', False)
         self.df_out = state.get('df_out', False)
         self.input_df = state.get('input_df', False)
-        self.drop_columns = state.get('drop_cols', None)
+        self.drop_cols = state.get('drop_cols', [])
         self.built_features = state.get('built_features', self.features)
         self.built_default = state.get('built_default', self.default)
         self.transformed_names_ = state.get('transformed_names_', [])