@@ -149,6 +149,7 @@ def test_handle_unknown_error(self):
149149 def test_handle_missing_error (self ):
150150 non_null = pd .DataFrame ({'city' : ['chicago' , 'los angeles' ], 'color' : ['red' , np .nan ]}) # only 'city' column is going to be transformed
151151 has_null = pd .DataFrame ({'city' : ['chicago' , np .nan ], 'color' : ['red' , np .nan ]})
152+ has_null_pd = pd .DataFrame ({'city' : ['chicago' , pd .NA ], 'color' : ['red' , pd .NA ]}, dtype = "string" )
152153 y = pd .Series ([1 , 0 ])
153154
154155 for encoder_name in (set (encoders .__all__ ) - {'HashingEncoder' }): # HashingEncoder supports new values by design -> excluded
@@ -158,6 +159,9 @@ def test_handle_missing_error(self):
158159 with self .assertRaises (ValueError ):
159160 enc .fit (has_null , y )
160161
162+ with self .assertRaises (ValueError ):
163+ enc .fit (has_null_pd , y )
164+
161165 enc .fit (non_null , y ) # we raise an error only if a missing value is in one of the transformed columns
162166
163167 with self .assertRaises (ValueError ):
@@ -199,13 +203,15 @@ def test_handle_unknown_return_nan(self):
199203 self .assertTrue (result [1 :].isna ().all ())
200204
201205 def test_handle_missing_return_nan_train (self ):
202- X = pd .DataFrame ({'city' : ['chicago' , 'los angeles' , np .NaN ]})
206+ X_np = pd .DataFrame ({'city' : ['chicago' , 'los angeles' , np .NaN ]})
207+ X_pd = pd .DataFrame ({'city' : ['chicago' , 'los angeles' , pd .NA ]}, dtype = "string" )
203208 y = pd .Series ([1 , 0 , 1 ])
204209
205210 for encoder_name in (set (encoders .__all__ ) - {'HashingEncoder' }): # HashingEncoder supports new values by design -> excluded
206- with self .subTest (encoder_name = encoder_name ):
207- enc = getattr (encoders , encoder_name )(handle_missing = 'return_nan' )
208- result = enc .fit_transform (X , y ).iloc [2 , :]
211+ for X in (X_np , X_pd ):
212+ with self .subTest (encoder_name = encoder_name ):
213+ enc = getattr (encoders , encoder_name )(handle_missing = 'return_nan' )
214+ result = enc .fit_transform (X , y ).iloc [2 , :]
209215
210216 if len (result ) == 1 :
211217 self .assertTrue (result .isna ().all ())
@@ -214,13 +220,15 @@ def test_handle_missing_return_nan_train(self):
214220
215221 def test_handle_missing_return_nan_test (self ):
216222 X = pd .DataFrame ({'city' : ['chicago' , 'los angeles' , 'chicago' ]})
217- X_t = pd .DataFrame ({'city' : ['chicago' , 'los angeles' , np .NaN ]})
223+ X_np = pd .DataFrame ({'city' : ['chicago' , 'los angeles' , np .NaN ]})
224+ X_pd = pd .DataFrame ({'city' : ['chicago' , 'los angeles' , pd .NA ]}, dtype = "string" )
218225 y = pd .Series ([1 , 0 , 1 ])
219226
220227 for encoder_name in (set (encoders .__all__ ) - {'HashingEncoder' }): # HashingEncoder supports new values by design -> excluded
221- with self .subTest (encoder_name = encoder_name ):
222- enc = getattr (encoders , encoder_name )(handle_missing = 'return_nan' )
223- result = enc .fit (X , y ).transform (X_t ).iloc [2 , :]
228+ for X_na in (X_np , X_pd ):
229+ with self .subTest (encoder_name = encoder_name ):
230+ enc = getattr (encoders , encoder_name )(handle_missing = 'return_nan' )
231+ result = enc .fit (X , y ).transform (X_na ).iloc [2 , :]
224232
225233 if len (result ) == 1 :
226234 self .assertTrue (result .isna ().all ())
0 commit comments