11import atomsci .ddm .pipeline .transformations as trans
2+ import atomsci .ddm .pipeline .parameter_parser as pp
23import numpy as np
4+ import pandas as pd
35from deepchem .data import NumpyDataset
46import pytest
57
@@ -214,3 +216,87 @@ def test_normalization_transformer_missing_data_transform_X():
214216 expected_transformed_X = (X - expected_X_means ) / expected_X_stds
215217 np .testing .assert_array_almost_equal (transformed_dataset .X , expected_transformed_X , decimal = 6 )
216218
219+ def test_create_feature_transformers ():
220+ """Test the `create_feature_transformers` when params.transformers is None."""
221+
222+ params = pp .wrapper ({})
223+ params .transformers = None
224+ transformers_x = trans .create_feature_transformers (
225+ params ,
226+ featurization = None ,
227+ train_dset = None
228+ )
229+
230+ assert transformers_x == []
231+
232+ def test_zero_out_inf_nan_numpy_with_nonfinite_replaces_and_copies ():
233+ x = np .array ([1.0 , np .nan , np .inf , - np .inf , 2.5 ], dtype = float )
234+ y = trans .zero_out_inf_nan (x )
235+
236+ assert isinstance (y , np .ndarray )
237+ np .testing .assert_array_equal (y , np .array ([1.0 , 0.0 , 0.0 , 0.0 , 2.5 ], dtype = float ))
238+
239+ # Ensure it is a copy and original not mutated
240+ assert y is not x
241+ np .testing .assert_array_equal (x , np .array ([1.0 , np .nan , np .inf , - np .inf , 2.5 ], dtype = float ))
242+
243+
244+ def test_zero_out_inf_nan_numpy_without_nonfinite_no_change_but_copy ():
245+ x = np .array ([1.0 , 2.0 , 3.0 ], dtype = float )
246+ y = trans .zero_out_inf_nan (x )
247+
248+ assert isinstance (y , np .ndarray )
249+ np .testing .assert_array_equal (y , x )
250+ assert y is not x
251+
252+
253+ def test_zero_out_inf_nan_series_with_nonfinite_replaces_preserves_index_and_name ():
254+ s = pd .Series ([1.0 , np .nan , np .inf , - np .inf ], index = ["a" , "b" , "c" , "d" ], name = "vals" )
255+ out = trans .zero_out_inf_nan (s )
256+
257+ assert isinstance (out , pd .Series )
258+ assert out .name == "vals"
259+ assert list (out .index ) == ["a" , "b" , "c" , "d" ]
260+ np .testing .assert_array_equal (out .values , np .array ([1.0 , 0.0 , 0.0 , 0.0 ], dtype = float ))
261+
262+ # Original not mutated
263+ assert np .isnan (s .loc ["b" ])
264+ assert np .isposinf (s .loc ["c" ])
265+ assert np .isneginf (s .loc ["d" ])
266+
267+
268+ def test_zero_out_inf_nan_series_without_nonfinite_no_change_values ():
269+ s = pd .Series ([1.0 , 2.0 ], index = [10 , 20 ], name = "ok" )
270+ out = trans .zero_out_inf_nan (s )
271+
272+ assert isinstance (out , pd .Series )
273+ assert out .name == "ok"
274+ assert list (out .index ) == [10 , 20 ]
275+ np .testing .assert_array_equal (out .values , s .values )
276+
277+
278+ def test_zero_out_inf_nan_dataframe_with_nonfinite_replaces_preserves_index_and_columns ():
279+ df = pd .DataFrame (
280+ {"c1" : [1.0 , np .nan ], "c2" : [np .inf , 4.0 ]},
281+ index = ["r1" , "r2" ],
282+ )
283+ out = trans .zero_out_inf_nan (df )
284+
285+ assert isinstance (out , pd .DataFrame )
286+ assert list (out .index ) == ["r1" , "r2" ]
287+ assert list (out .columns ) == ["c1" , "c2" ]
288+ np .testing .assert_array_equal (out .values , np .array ([[1.0 , 0.0 ], [0.0 , 4.0 ]], dtype = float ))
289+
290+ # Original not mutated
291+ assert np .isnan (df .loc ["r2" , "c1" ])
292+ assert np .isposinf (df .loc ["r1" , "c2" ])
293+
294+
295+ def test_zero_out_inf_nan_dataframe_without_nonfinite_no_change_values ():
296+ df = pd .DataFrame ({"a" : [1.0 , 2.0 ], "b" : [3.0 , 4.0 ]}, index = [0 , 1 ])
297+ out = trans .zero_out_inf_nan (df )
298+
299+ assert isinstance (out , pd .DataFrame )
300+ assert list (out .index ) == [0 , 1 ]
301+ assert list (out .columns ) == ["a" , "b" ]
302+ np .testing .assert_array_equal (out .values , df .values )
0 commit comments