88import sklearn
99import category_encoders .tests .helpers as th
1010from sklearn .utils .estimator_checks import check_transformer_general , check_transformers_unfitted
11+ from sklearn .compose import ColumnTransformer
1112from unittest2 import TestSuite , TextTestRunner , TestCase # or `from unittest import ...` if on Python 3.4+
1213
1314import category_encoders as encoders
@@ -419,3 +420,23 @@ def test_truncated_index(self):
419420 enc2 = getattr (encoders , encoder_name )()
420421 result2 = enc2 .fit_transform (data2 .x , data2 .y )
421422 self .assertTrue ((result .values == result2 .values ).all ())
423+
424+ def test_column_transformer (self ):
425+ # see issue #169
426+ for encoder_name in (set (encoders .__all__ ) - {'HashingEncoder' }): # HashingEncoder does not accept handle_missing parameter
427+ with self .subTest (encoder_name = encoder_name ):
428+
429+ # we can only test one data type at once. Here, we test string columns.
430+ tested_columns = ['unique_str' , 'invariant' , 'underscore' , 'none' , 'extra' ]
431+
432+ # ColumnTransformer instantiates the encoder twice -> we have to make sure the encoder settings are correctly passed
433+ ct = ColumnTransformer ([
434+ ("dummy_encoder_name" , getattr (encoders , encoder_name )(handle_missing = "return_nan" ), tested_columns )
435+ ])
436+ obtained = ct .fit_transform (X , y )
437+
438+ # the old-school approach
439+ enc = getattr (encoders , encoder_name )(handle_missing = "return_nan" , return_df = False )
440+ expected = enc .fit_transform (X [tested_columns ], y )
441+
442+ np .testing .assert_array_equal (obtained , expected )
0 commit comments