@@ -199,33 +199,37 @@ def test_handle_unknown_return_nan(self):
199199 self .assertTrue (result [1 :].isnull ().all ())
200200
201201 def test_handle_missing_return_nan_train (self ):
202- X = pd .DataFrame ({'city' : ['chicago' , 'los angeles' , np .NaN ]})
202+ X_np = pd .DataFrame ({'city' : ['chicago' , 'los angeles' , np .NaN ]})
203+ X_pd = pd .DataFrame ({'city' : ['chicago' , 'los angeles' , pd .NA ]}, dtype = "string" )
203204 y = pd .Series ([1 , 0 , 1 ])
204205
205206 for encoder_name in (set (encoders .__all__ ) - {'HashingEncoder' }): # HashingEncoder supports new values by design -> excluded
206- with self .subTest (encoder_name = encoder_name ):
207- enc = getattr (encoders , encoder_name )(handle_missing = 'return_nan' )
208- result = enc .fit_transform (X , y ).iloc [2 , :]
207+ for X in (X_np , X_pd ):
208+ with self .subTest (encoder_name = encoder_name ):
209+ enc = getattr (encoders , encoder_name )(handle_missing = 'return_nan' )
210+ result = enc .fit_transform (X , y ).iloc [2 , :]
209211
210- if len (result ) == 1 :
211- self .assertTrue (result .isnull ().all ())
212- else :
213- self .assertTrue (result [1 :].isnull ().all ())
212+ if len (result ) == 1 :
213+ self .assertTrue (result .isnull ().all ())
214+ else :
215+ self .assertTrue (result [1 :].isnull ().all ())
214216
215217 def test_handle_missing_return_nan_test (self ):
216218 X = pd .DataFrame ({'city' : ['chicago' , 'los angeles' , 'chicago' ]})
217- X_t = pd .DataFrame ({'city' : ['chicago' , 'los angeles' , np .NaN ]})
219+ X_np = pd .DataFrame ({'city' : ['chicago' , 'los angeles' , np .NaN ]})
220+ X_pd = pd .DataFrame ({'city' : ['chicago' , 'los angeles' , pd .NA ]}, dtype = "string" )
218221 y = pd .Series ([1 , 0 , 1 ])
219222
220223 for encoder_name in (set (encoders .__all__ ) - {'HashingEncoder' }): # HashingEncoder supports new values by design -> excluded
221- with self .subTest (encoder_name = encoder_name ):
222- enc = getattr (encoders , encoder_name )(handle_missing = 'return_nan' )
223- result = enc .fit (X , y ).transform (X_t ).iloc [2 , :]
224-
225- if len (result ) == 1 :
226- self .assertTrue (result .isnull ().all ())
227- else :
228- self .assertTrue (result [1 :].isnull ().all ())
224+ for X_na in (X_np , X_pd ):
225+ with self .subTest (encoder_name = encoder_name ):
226+ enc = getattr (encoders , encoder_name )(handle_missing = 'return_nan' )
227+ result = enc .fit (X , y ).transform (X_na ).iloc [2 , :]
228+
229+ if len (result ) == 1 :
230+ self .assertTrue (result .isnull ().all ())
231+ else :
232+ self .assertTrue (result [1 :].isnull ().all ())
229233
230234 def test_handle_unknown_value (self ):
231235 train = pd .DataFrame ({'city' : ['chicago' , 'los angeles' ]})
0 commit comments