fixed suggested changes

RJ Agrawal · RJ Agrawal · commit e0ef0c066f62 · 2020-07-28T23:54:44.000-07:00
diff --git a/README.rst b/README.rst
@@ -7,11 +7,8 @@ Sklearn-pandas
 
 This module provides a bridge between `Scikit-Learn <http://scikit-learn.org/stable>`__'s machine learning methods and `pandas <https://pandas.pydata.org>`__-style Data Frames.
 
-In particular, it provides:
 
-1. A way to map ``DataFrame`` columns to transformations, which are later recombined into features.
-2. A compatibility shim for old ``scikit-learn`` versions to cross-validate a pipeline that takes a pandas ``DataFrame`` as input. This is only needed for ``scikit-learn<0.16.0`` (see `#11 <https://github.com/paulgb/sklearn-pandas/issues/11>`__ for details). It is deprecated and will likely be dropped in ``skearn-pandas==2.0``.
-3. A couple of special transformers that work well with pandas inputs: ``CategoricalImputer`` and ``FunctionTransformer``.
+In particular, it provides a way to map ``DataFrame`` columns to transformations, which are later recombined into features.
 
 Installation
 ------------
@@ -20,6 +17,7 @@ You can install ``sklearn-pandas`` with ``pip``::
 
     # pip install sklearn-pandas
 
+
 Tests
 -----
 
@@ -136,8 +134,18 @@ of the feature definition::
   >>> mapper_alias.transformed_names_
   ['children_scaled']
 
+Alternatively, you can also specify prefix and/or suffix to add to the column name. For example::
+
 
-Passing Series/DataFrames to the transformers
+  >>> mapper_alias = DataFrameMapper([
+  ...     (['children'], sklearn.preprocessing.StandardScaler(), {'prefix': 'standard_scaled_'}),
+  ...     (['children'], sklearn.preprocessing.StandardScaler(), {'suffix': '_raw'})
+  ... ])
+  >>> _ = mapper_alias.fit_transform(data.copy())
+  >>> mapper_alias.transformed_names_
+  ['standard_scaled_children', 'children_raw']
+
+Passing Series/DataFrames to the transformerså
 *********************************************
 
 By default the transformers are passed a numpy array of the selected columns
@@ -338,6 +346,23 @@ Then the following code could be used to override default imputing strategy:
            [2.0, True, 0.0],
            [3.0, True, 0.0]], dtype=object)
 
+You can also specify global prefix or suffix for the generated transformed column names using the prefix and suffix
+parameters::
+
+    >>> feature_def = gen_features(
+    ...     columns=['col1', 'col2', 'col3'],
+    ...     classes=[sklearn.preprocessing.LabelEncoder],
+    ...     prefix="lblencoder_"
+    ... )
+    >>> mapper5 = DataFrameMapper(feature_def)
+    >>> data5 = pd.DataFrame({
+    ...     'col1': ['yes', 'no', 'yes'],
+    ...     'col2': [True, False, False],
+    ...     'col3': ['one', 'two', 'three']
+    ... })
+    >>> _ = mapper5.fit_transform(data5)
+    >>> mapper5.transformed_names_
+    ['lblencoder_col1', 'lblencoder_col2', 'lblencoder_col3']
 
 Feature selection and other supervised transformations
 ******************************************************
diff --git a/setup.py b/setup.py
@@ -34,15 +34,19 @@ def run(self):
       description='Pandas integration with sklearn',
       maintainer='Ritesh Agrawal',
       maintainer_email='ragrawal@gmail.com',
-      url='https://github.com/ragrawal/sklearn-pandas',
+      url='https://github.com/scikit-learn-contrib/sklearn-pandas',
       packages=['sklearn_pandas'],
       keywords=['scikit', 'sklearn', 'pandas'],
       install_requires=[
           'scikit-learn>=0.23.0',
           'scipy>=1.4.1',
           'pandas>=1.0.5',
           'numpy>=1.18.1',
-          'tqdm>=4.46.0'],
+          'tqdm>=4.46.0'
+      ],
+      extras_require={
+          "progress-bar": ['tqdm>=4.46.0']
+      },
       tests_require=['pytest', 'mock'],
       cmdclass={'test': PyTest},
       )
diff --git a/sklearn_pandas/__init__.py b/sklearn_pandas/__init__.py
@@ -1,4 +1,4 @@
-__version__ = '1.8.2'
+__version__ = '2.0.0'
 
 from .dataframe_mapper import DataFrameMapper  # NOQA
 from .cross_validation import cross_val_score, GridSearchCV, RandomizedSearchCV  # NOQA
diff --git a/sklearn_pandas/dataframe_mapper.py b/sklearn_pandas/dataframe_mapper.py
@@ -276,11 +276,9 @@ def get_names(self, columns, transformer, x, alias=None, prefix='',
         else:
             output = [name]
 
-        if not prefix and not suffix:
+        if prefix == suffix == "":
             return output
 
-        prefix = prefix or ''
-        suffix = suffix or ''
         return ['{}{}{}'.format(prefix, x, suffix) for x in output]
 
     def get_dtypes(self, extracted):
@@ -327,8 +325,8 @@ def _transform(self, X, y=None, do_fit=False):
             extracted.append(_handle_feature(Xt))
 
             alias = options.get('alias')
-            prefix = options.get('prefix')
-            suffix = options.get('suffix')
+            prefix = options.get('prefix', '')
+            suffix = options.get('suffix', '')
 
             self.transformed_names_ += self.get_names(
                 columns, transformers, Xt, alias, prefix, suffix)
diff --git a/sklearn_pandas/features_generator.py b/sklearn_pandas/features_generator.py
@@ -1,4 +1,4 @@
-def gen_features(columns, classes=None, arguments={}):
+def gen_features(columns, classes=None, prefix='', suffix=''):
     """Generates a feature definition list which can be passed
     into DataFrameMapper
 
@@ -25,8 +25,9 @@ def gen_features(columns, classes=None, arguments={}):
 
                 If None value selected, then each feature left as is.
 
-    arguments   a dictionary of additional values such as {'prefix': 'x',
-                'suffix': 'na'}
+    prefix      add prefix to transformed column names
+
+    suffix      add suffix to transformed column names.
 
     """
     if classes is None:
@@ -37,9 +38,15 @@ def gen_features(columns, classes=None, arguments={}):
     for column in columns:
         feature_transformers = []
 
+        arguments = {}
+        if prefix and prefix != "":
+            arguments['prefix'] = prefix
+        if suffix and suffix != "":
+            arguments['suffix'] = suffix
+
         classes = [cls for cls in classes if cls is not None]
         if not classes:
-            feature_defs.append((column, None))
+            feature_defs.append((column, None, arguments))
 
         else:
             for definition in classes:
diff --git a/tests/test_features_generator.py b/tests/test_features_generator.py
@@ -47,6 +47,9 @@ def test_generate_features_with_default_parameters():
     feature_defs = gen_features(columns=columns, classes=[MockClass])
     assert len(feature_defs) == len(columns)
 
+    for feature in feature_defs:
+        assert feature[2] == {}
+
     feature_dict = dict([_[0:2] for _ in feature_defs])
     assert columns == sorted(feature_dict.keys())
 
@@ -84,9 +87,9 @@ def test_generate_features_with_none_only_transformers():
     feature_defs = gen_features(
         columns=['colA', 'colB', 'colC'], classes=[None])
 
-    expected = [('colA', None),
-                ('colB', None),
-                ('colC', None)]
+    expected = [('colA', None, {}),
+                ('colB', None, {}),
+                ('colC', None, {})]
 
     assert feature_defs == expected
 
diff --git a/tox.ini b/tox.ini
@@ -10,7 +10,7 @@ exclude =
     *bin/
 
 [tox]
-envlist = {py36}-sklearn{23}-pandas{1}
+envlist = {py36,py37,py38}-sklearn{22,23}-pandas{105,110}
 
 [testenv]
 deps =
@@ -20,7 +20,9 @@ deps =
     flake8==3.7.9
     numpy==1.18.1
     scipy==1.4.1
-    pandas1: pandas==1.0.5
+    pandas105: pandas==1.0.5
+    pandas110: pandas==1.1.0
+    sklearn22: scikit-learn==0.22.2
     sklearn23: scikit-learn==0.23.1
 
 commands =

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-__version__ = '1.8.2'`
	`1`	`+__version__ = '2.0.0'`
`2`	`2`
`3`	`3`	`from .dataframe_mapper import DataFrameMapper # NOQA`
`4`	`4`	`from .cross_validation import cross_val_score, GridSearchCV, RandomizedSearchCV # NOQA`