1- """Helper functions that are used exclusively in the tests"""
2-
31import numpy as np
4- import random
52import pandas as pd
6- import math
7-
3+ from unittest2 import TestCase # or `from unittest import ...` if on Python 3.4+
84
9- def verify_numeric (X_test ):
10- """
11- Test that all attributes in the DataFrame are numeric.
12- """
13- for dt in X_test .dtypes :
14- numeric = False
15- if np .issubdtype (dt , np .dtype (int )) or np .issubdtype (dt , np .dtype (float )):
16- numeric = True
17- assert numeric
5+ from category_encoders .tests .helpers import verify_numeric
186
197
20- def create_array (n_rows = 1000 , extras = False , has_none = True ):
21- """
22- Creates a numpy dataset with some categorical variables.
23- """
24- ds = [[
25- random .random (),
26- random .random (),
27- random .choice (['A' , 'B' , 'C' ]),
28- random .choice (['A' , 'B' , 'C' , 'D' ]) if extras else random .choice (['A' , 'B' , 'C' ]),
29- random .choice (['A' , 'B' , 'C' , None , np .nan ]) if has_none else random .choice (['A' , 'B' , 'C' ]),
30- random .choice (['A' ])
31- ] for _ in range (n_rows )]
8+ class TestHelpers (TestCase ):
329
33- return np .array (ds )
10+ def test_is_numeric_pandas (self ):
11+ # Whole numbers, regardless of the byte length, should not raise AssertionError
12+ X = pd .DataFrame (np .ones ([5 , 5 ]), dtype = 'int32' )
13+ verify_numeric (pd .DataFrame (X ))
3414
15+ X = pd .DataFrame (np .ones ([5 , 5 ]), dtype = 'int64' )
16+ verify_numeric (pd .DataFrame (X ))
3517
36- def create_dataset (n_rows = 1000 , extras = False , has_none = True ):
37- """
38- Creates a dataset with some categorical variables.
39- """
40- random .seed (2001 )
41- ds = [[
42- random .random (), # Floats
43- random .choice ([float ('nan' ), float ('inf' ), float ('-inf' ), - 0 , 0 , 1 , - 1 , math .pi ]), # Floats with edge scenarios
44- row , # Unique integers
45- str (row ), # Unique strings
46- random .choice (['A' , 'B' ]) if extras else 'A' , # Invariant in the training data
47- random .choice (['A' , 'B_b' , 'C_c_c' ]), # Strings with underscores to test reverse_dummies()
48- random .choice (['A' , 'B' , 'C' , None ]) if has_none else random .choice (['A' , 'B' , 'C' ]), # None
49- random .choice (['A' , 'B' , 'C' , 'D' ]) if extras else random .choice (['A' , 'B' , 'C' ]), # With a new string value
50- random .choice ([12 , 43 , - 32 ]), # Number in the column name
51- random .choice (['A' , 'B' , 'C' ]), # What is going to become the categorical column
52- ] for row in range (n_rows )]
18+ # Strings should raise AssertionError
19+ X = pd .DataFrame ([['a' , 'b' , 'c' ], ['d' , 'e' , 'f' ]])
20+ with self .assertRaises (Exception ):
21+ verify_numeric (pd .DataFrame (X ))
5322
54- df = pd .DataFrame (ds , columns = ['float' , 'float_edge' , 'unique_int' , 'unique_str' , 'invariant' , 'underscore' , 'none' , 'extra' , 321 , 'categorical' ])
55- df ['categorical' ] = pd .Categorical (df ['categorical' ], categories = ['A' , 'B' , 'C' ])
56- return df
23+ def test_is_numeric_numpy (self ):
24+ # Whole numbers, regardless of the byte length, should not raise AssertionError
25+ X = np .ones ([5 , 5 ], dtype = 'int32' )
26+ verify_numeric (pd .DataFrame (X ))
5727
28+ X = np .ones ([5 , 5 ], dtype = 'int64' )
29+ verify_numeric (pd .DataFrame (X ))
5830
59- def verify_inverse_transform (x , x_inv ):
60- """
61- Verify x is equal to x_inv. The test returns true for NaN.equals(NaN) as it should.
62- """
63- assert x .equals (x_inv )
31+ # Floats
32+ X = np .ones ([5 , 5 ], dtype = 'float32' )
33+ verify_numeric (pd .DataFrame (X ))
6434
35+ X = np .ones ([5 , 5 ], dtype = 'float64' )
36+ verify_numeric (pd .DataFrame (X ))
6537
66- def deep_round ( A , ndigits = 5 ):
67- """
68- Rounds numbers in a list of lists. Useful for approximate equality testing.
69- """
70- return [[ round ( val , ndigits ) for val in sublst ] for sublst in A ]
38+ def test_verify_raises_AssertionError_on_categories ( self ):
39+ # Categories should raise AssertionError
40+ X = pd . DataFrame ([[ 'a' , 'b' , 'c' ], [ 'd' , 'e' , 'f' ]], dtype = 'category' )
41+ with self . assertRaises ( Exception ):
42+ verify_numeric ( pd . DataFrame ( X ))
0 commit comments