Skip to content

Commit ae84e05

Browse files
authored
Merge pull request #170 from datarian/test-helper-refactoring
Refactoring of test_helper
2 parents bb3dce5 + d6e73ce commit ae84e05

File tree

8 files changed

+107
-65
lines changed

8 files changed

+107
-65
lines changed

category_encoders/tests/helpers.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
"""Helper functions that are used exclusively in the tests"""
2+
3+
import numpy as np
4+
import random
5+
import pandas as pd
6+
import math
7+
8+
9+
def verify_numeric(X_test):
10+
"""
11+
Test that all attributes in the DataFrame are numeric.
12+
"""
13+
_NUMERIC_KINDS = set('buifc')
14+
15+
for dt in X_test.dtypes:
16+
assert(dt.kind in _NUMERIC_KINDS)
17+
18+
def create_array(n_rows=1000, extras=False, has_none=True):
19+
"""
20+
Creates a numpy dataset with some categorical variables.
21+
"""
22+
ds = [[
23+
random.random(),
24+
random.random(),
25+
random.choice(['A', 'B', 'C']),
26+
random.choice(['A', 'B', 'C', 'D']) if extras else random.choice(['A', 'B', 'C']),
27+
random.choice(['A', 'B', 'C', None, np.nan]) if has_none else random.choice(['A', 'B', 'C']),
28+
random.choice(['A'])
29+
] for _ in range(n_rows)]
30+
31+
return np.array(ds)
32+
33+
34+
def create_dataset(n_rows=1000, extras=False, has_none=True):
35+
"""
36+
Creates a dataset with some categorical variables.
37+
"""
38+
random.seed(2001)
39+
ds = [[
40+
random.random(), # Floats
41+
random.choice([float('nan'), float('inf'), float('-inf'), -0, 0, 1, -1, math.pi]), # Floats with edge scenarios
42+
row, # Unique integers
43+
str(row), # Unique strings
44+
random.choice(['A', 'B']) if extras else 'A', # Invariant in the training data
45+
random.choice(['A', 'B_b', 'C_c_c']), # Strings with underscores to test reverse_dummies()
46+
random.choice(['A', 'B', 'C', None]) if has_none else random.choice(['A', 'B', 'C']), # None
47+
random.choice(['A', 'B', 'C', 'D']) if extras else random.choice(['A', 'B', 'C']), # With a new string value
48+
random.choice([12, 43, -32]), # Number in the column name
49+
random.choice(['A', 'B', 'C']), # What is going to become the categorical column
50+
random.choice(['A', 'B', 'C', np.nan]) # Categorical with missing values
51+
] for row in range(n_rows)]
52+
53+
df = pd.DataFrame(ds, columns=['float', 'float_edge', 'unique_int', 'unique_str', 'invariant', 'underscore', 'none', 'extra', 321, 'categorical', 'categorical_na'])
54+
df['categorical'] = pd.Categorical(df['categorical'], categories=['A', 'B', 'C'])
55+
df['categorical_na'] = pd.Categorical(df['categorical_na'], categories=['A', 'B', 'C'])
56+
return df
57+
58+
59+
def verify_inverse_transform(x, x_inv):
60+
"""
61+
Verify x is equal to x_inv. The test returns true for NaN.equals(NaN) as it should.
62+
"""
63+
assert x.equals(x_inv)
64+
65+
66+
def deep_round(A, ndigits=5):
67+
"""
68+
Rounds numbers in a list of lists. Useful for approximate equality testing.
69+
"""
70+
return [[round(val, ndigits) for val in sublst] for sublst in A]

category_encoders/tests/test_encoders.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import numpy as np
77
import pandas as pd
88
import sklearn
9-
import category_encoders.tests.test_helpers as th
9+
import category_encoders.tests.helpers as th
1010
from sklearn.utils.estimator_checks import check_transformer_general, check_transformers_unfitted
1111
from unittest2 import TestSuite, TextTestRunner, TestCase # or `from unittest import ...` if on Python 3.4+
1212

Lines changed: 29 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,70 +1,42 @@
1-
"""Helper functions that are used exclusively in the tests"""
2-
31
import numpy as np
4-
import random
52
import pandas as pd
6-
import math
7-
3+
from unittest2 import TestCase # or `from unittest import ...` if on Python 3.4+
84

9-
def verify_numeric(X_test):
10-
"""
11-
Test that all attributes in the DataFrame are numeric.
12-
"""
13-
for dt in X_test.dtypes:
14-
numeric = False
15-
if np.issubdtype(dt, np.dtype(int)) or np.issubdtype(dt, np.dtype(float)):
16-
numeric = True
17-
assert numeric
5+
from category_encoders.tests.helpers import verify_numeric
186

197

20-
def create_array(n_rows=1000, extras=False, has_none=True):
21-
"""
22-
Creates a numpy dataset with some categorical variables.
23-
"""
24-
ds = [[
25-
random.random(),
26-
random.random(),
27-
random.choice(['A', 'B', 'C']),
28-
random.choice(['A', 'B', 'C', 'D']) if extras else random.choice(['A', 'B', 'C']),
29-
random.choice(['A', 'B', 'C', None, np.nan]) if has_none else random.choice(['A', 'B', 'C']),
30-
random.choice(['A'])
31-
] for _ in range(n_rows)]
8+
class TestHelpers(TestCase):
329

33-
return np.array(ds)
10+
def test_is_numeric_pandas(self):
11+
# Whole numbers, regardless of the byte length, should not raise AssertionError
12+
X = pd.DataFrame(np.ones([5, 5]), dtype='int32')
13+
verify_numeric(pd.DataFrame(X))
3414

15+
X = pd.DataFrame(np.ones([5, 5]), dtype='int64')
16+
verify_numeric(pd.DataFrame(X))
3517

36-
def create_dataset(n_rows=1000, extras=False, has_none=True):
37-
"""
38-
Creates a dataset with some categorical variables.
39-
"""
40-
random.seed(2001)
41-
ds = [[
42-
random.random(), # Floats
43-
random.choice([float('nan'), float('inf'), float('-inf'), -0, 0, 1, -1, math.pi]), # Floats with edge scenarios
44-
row, # Unique integers
45-
str(row), # Unique strings
46-
random.choice(['A', 'B']) if extras else 'A', # Invariant in the training data
47-
random.choice(['A', 'B_b', 'C_c_c']), # Strings with underscores to test reverse_dummies()
48-
random.choice(['A', 'B', 'C', None]) if has_none else random.choice(['A', 'B', 'C']), # None
49-
random.choice(['A', 'B', 'C', 'D']) if extras else random.choice(['A', 'B', 'C']), # With a new string value
50-
random.choice([12, 43, -32]), # Number in the column name
51-
random.choice(['A', 'B', 'C']), # What is going to become the categorical column
52-
] for row in range(n_rows)]
18+
# Strings should raise AssertionError
19+
X = pd.DataFrame([['a', 'b', 'c'], ['d', 'e', 'f']])
20+
with self.assertRaises(Exception):
21+
verify_numeric(pd.DataFrame(X))
5322

54-
df = pd.DataFrame(ds, columns=['float', 'float_edge', 'unique_int', 'unique_str', 'invariant', 'underscore', 'none', 'extra', 321, 'categorical'])
55-
df['categorical'] = pd.Categorical(df['categorical'], categories=['A', 'B', 'C'])
56-
return df
23+
def test_is_numeric_numpy(self):
24+
# Whole numbers, regardless of the byte length, should not raise AssertionError
25+
X = np.ones([5, 5], dtype='int32')
26+
verify_numeric(pd.DataFrame(X))
5727

28+
X = np.ones([5, 5], dtype='int64')
29+
verify_numeric(pd.DataFrame(X))
5830

59-
def verify_inverse_transform(x, x_inv):
60-
"""
61-
Verify x is equal to x_inv. The test returns true for NaN.equals(NaN) as it should.
62-
"""
63-
assert x.equals(x_inv)
31+
# Floats
32+
X = np.ones([5, 5], dtype='float32')
33+
verify_numeric(pd.DataFrame(X))
6434

35+
X = np.ones([5, 5], dtype='float64')
36+
verify_numeric(pd.DataFrame(X))
6537

66-
def deep_round(A, ndigits=5):
67-
"""
68-
Rounds numbers in a list of lists. Useful for approximate equality testing.
69-
"""
70-
return [[round(val, ndigits) for val in sublst] for sublst in A]
38+
def test_verify_raises_AssertionError_on_categories(self):
39+
# Categories should raise AssertionError
40+
X = pd.DataFrame([['a', 'b', 'c'], ['d', 'e', 'f']], dtype='category')
41+
with self.assertRaises(Exception):
42+
verify_numeric(pd.DataFrame(X))

category_encoders/tests/test_leave_one_out.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import pandas as pd
22
from unittest2 import TestCase # or `from unittest import ...` if on Python 3.4+
3-
import category_encoders.tests.test_helpers as th
3+
import category_encoders.tests.helpers as th
44
import numpy as np
55

66
import category_encoders as encoders

category_encoders/tests/test_one_hot.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import pandas as pd
2-
from unittest import TestCase # or `from unittest import ...` if on Python 3.4+
2+
from unittest2 import TestCase # or `from unittest import ...` if on Python 3.4+
33
import numpy as np
44
import warnings
5-
import category_encoders.tests.test_helpers as th
5+
import category_encoders.tests.helpers as th
66

77
import category_encoders as encoders
88

category_encoders/tests/test_ordinal.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import pandas as pd
22
from unittest2 import TestCase # or `from unittest import ...` if on Python 3.4+
3-
import category_encoders.tests.test_helpers as th
3+
import category_encoders.tests.helpers as th
44
import numpy as np
55
import warnings
66
import category_encoders as encoders
@@ -35,7 +35,7 @@ def test_ordinal(self):
3535
self.assertIn(-1, set(out['extra'].values))
3636
self.assertTrue(len(enc.mapping) > 0)
3737

38-
enc = encoders.OrdinalEncoder(verbose=1, return_df=True, handle_unknown='return _nan')
38+
enc = encoders.OrdinalEncoder(verbose=1, return_df=True, handle_unknown='return_nan')
3939
enc.fit(X)
4040
out = enc.transform(X_t)
4141
out_cats = [x for x in set(out['extra'].values) if np.isfinite(x)]

category_encoders/tests/test_target_encoder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import pandas as pd
22
from unittest2 import TestCase # or `from unittest import ...` if on Python 3.4+
3-
import category_encoders.tests.test_helpers as th
3+
import category_encoders.tests.helpers as th
44
import numpy as np
55

66
import category_encoders as encoders

category_encoders/tests/test_woe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import pandas as pd
22
from unittest2 import TestCase # or `from unittest import ...` if on Python 3.4+
3-
import category_encoders.tests.test_helpers as th
3+
import category_encoders.tests.helpers as th
44
import numpy as np
55

66
import category_encoders as encoders

0 commit comments

Comments
 (0)