Skip to content

Commit 5a66f94

Browse files
committed
Adding pytest fixture for teams example
1 parent 46c2a19 commit 5a66f94

File tree

1 file changed

+60
-87
lines changed

1 file changed

+60
-87
lines changed

autonormalize/tests/test_normalize.py

Lines changed: 60 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,47 @@
11
import pandas as pd
2-
import featuretools as ft
3-
4-
from featuretools.variable_types import ZIPCode, Index, Datetime, Numeric, DatetimeTimeIndex, Categorical, Id, \
5-
SubRegionCode
2+
import pytest
63
from pandas.util.testing import assert_frame_equal
74

8-
from autonormalize import classes, normalize, autonormalize
9-
5+
import featuretools as ft
6+
from featuretools.variable_types import (
7+
Categorical,
8+
Datetime,
9+
DatetimeTimeIndex,
10+
Id,
11+
Index,
12+
Numeric,
13+
Text,
14+
ZIPCode
15+
)
16+
17+
from autonormalize import autonormalize, classes, normalize
1018

1119
# from classes import Dependencies
1220

1321
# from normalize import normalize, find_most_comm, split_on_dep
1422

23+
@pytest.fixture
24+
def teams_input():
25+
class Teams:
26+
def get_df(self):
27+
dic = {'team': ['Red', 'Red', 'Red', 'Orange', 'Orange', 'Yellow',
28+
'Yellow', 'Green', 'Green', 'Blue'],
29+
'jersey_num': [1, 2, 3, 1, 2, 1, 5, 8, 2, 2],
30+
'player_name': ['A', 'B', 'C', 'D', 'A', 'E', 'B', 'A', 'G', 'H'],
31+
'city': ['boston', 'boston', 'boston', 'chicago', 'chicago',
32+
'honolulu', 'honolulu', 'boston', 'boston', 'austin'],
33+
'state': ['MA', 'MA', 'MA', 'IL', 'IL', 'HI', 'HI', 'MA', 'MA', 'TX']}
34+
return pd.DataFrame(dic)
35+
36+
def get_deps(self):
37+
return classes.Dependencies({'team': [['player_name', 'jersey_num']],
38+
'jersey_num': [['player_name', 'team']],
39+
'player_name': [['team', 'jersey_num']],
40+
'city': [['team'], ['state'], ['player_name', 'jersey_num']],
41+
'state': [['team'], ['player_name', 'jersey_num'],
42+
['city']]}, ['team', 'jersey_num'])
43+
return Teams()
44+
1545

1646
def test_normalize():
1747
# how to test that relations remain the same???
@@ -105,23 +135,8 @@ def test_choose_index():
105135
assert normalize.choose_index(keys, df) == ['A', 'B']
106136

107137

108-
def test_normalize_dataframe():
109-
110-
dic = {'team': ['Red', 'Red', 'Red', 'Orange', 'Orange', 'Yellow',
111-
'Yellow', 'Green', 'Green', 'Blue'],
112-
'jersey_num': [1, 2, 3, 1, 2, 1, 5, 8, 2, 2],
113-
'player_name': ['A', 'B', 'C', 'D', 'A', 'E', 'B', 'A', 'G', 'H'],
114-
'city': ['boston', 'boston', 'boston', 'chicago', 'chicago',
115-
'honolulu', 'honolulu', 'boston', 'boston', 'austin'],
116-
'state': ['MA', 'MA', 'MA', 'IL', 'IL', 'HI', 'HI', 'MA', 'MA', 'TX']}
117-
df = pd.DataFrame(dic)
118-
deps = classes.Dependencies({'team': [['player_name', 'jersey_num']],
119-
'jersey_num': [['player_name', 'team']],
120-
'player_name': [['team', 'jersey_num']],
121-
'city': [['team'], ['state'], ['player_name', 'jersey_num']],
122-
'state': [['team'], ['player_name', 'jersey_num'], ['city']]}, ['team', 'jersey_num'])
123-
124-
depdf = normalize.DepDF(deps, df, deps.get_prim_key())
138+
def test_normalize_dataframe(teams_input):
139+
depdf = normalize.DepDF(teams_input.get_deps(), teams_input.get_df(), teams_input.get_deps().get_prim_key())
125140
normalize.normalize_dataframe(depdf)
126141
new_dfs = depdf.return_dfs()
127142

@@ -216,21 +231,8 @@ def test_variable_types():
216231
assert normalized_entityset['customer_id'].variable_types['zip_code'] == ZIPCode
217232

218233

219-
def test_make_entityset_default_args():
220-
dic = {'team': ['Red', 'Red', 'Red', 'Orange', 'Orange', 'Yellow',
221-
'Yellow', 'Green', 'Green', 'Blue'],
222-
'jersey_num': [1, 2, 3, 1, 2, 1, 5, 8, 2, 2],
223-
'player_name': ['A', 'B', 'C', 'D', 'A', 'E', 'B', 'A', 'G', 'H'],
224-
'city': ['boston', 'boston', 'boston', 'chicago', 'chicago',
225-
'honolulu', 'honolulu', 'boston', 'boston', 'austin'],
226-
'state': ['US-MA', 'US-MA', 'US-MA', 'US-IL', 'US-IL', 'US-HI', 'US-HI', 'US-MA', 'US-MA', 'US-TX']}
227-
df = pd.DataFrame(dic)
228-
deps = classes.Dependencies({'team': [['player_name', 'jersey_num']],
229-
'jersey_num': [['player_name', 'team']],
230-
'player_name': [['team', 'jersey_num']],
231-
'city': [['team'], ['state'], ['player_name', 'jersey_num']],
232-
'state': [['team'], ['player_name', 'jersey_num'], ['city']]}, ['team', 'jersey_num'])
233-
normalized_entityset = autonormalize.make_entityset(df, deps)
234+
def test_make_entityset_default_args(teams_input):
235+
normalized_entityset = autonormalize.make_entityset(teams_input.get_df(), teams_input.get_deps())
234236

235237
dic_one = {'team_jersey_num': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
236238
'team': ['Red', 'Red', 'Red', 'Orange', 'Orange', 'Yellow',
@@ -242,7 +244,7 @@ def test_make_entityset_default_args():
242244
'city': ['austin', 'boston', 'chicago', 'boston', 'honolulu']}
243245

244246
dic_three = {'city': ['austin', 'boston', 'chicago', 'honolulu'],
245-
'state': ['US-TX', 'US-MA', 'US-IL', 'US-HI', ]}
247+
'state': ['TX', 'MA', 'IL', 'HI']}
246248

247249
assert len(normalized_entityset.entities) == 3
248250

@@ -264,24 +266,11 @@ def test_make_entityset_default_args():
264266
assert normalized_entityset.entities[2].variable_types['state'] == Categorical
265267

266268

267-
def test_make_entityset_custom_args():
268-
dic = {'team': ['Red', 'Red', 'Red', 'Orange', 'Orange', 'Yellow',
269-
'Yellow', 'Green', 'Green', 'Blue'],
270-
'jersey_num': [1, 2, 3, 1, 2, 1, 5, 8, 2, 2],
271-
'player_name': ['A', 'B', 'C', 'D', 'A', 'E', 'B', 'A', 'G', 'H'],
272-
'city': ['boston', 'boston', 'boston', 'chicago', 'chicago',
273-
'honolulu', 'honolulu', 'boston', 'boston', 'austin'],
274-
'state': ['US-MA', 'US-MA', 'US-MA', 'US-IL', 'US-IL', 'US-HI', 'US-HI', 'US-MA', 'US-MA', 'US-TX']}
275-
df = pd.DataFrame(dic)
276-
deps = classes.Dependencies({'team': [['player_name', 'jersey_num']],
277-
'jersey_num': [['player_name', 'team']],
278-
'player_name': [['team', 'jersey_num']],
279-
'city': [['team'], ['state'], ['player_name', 'jersey_num']],
280-
'state': [['team'], ['player_name', 'jersey_num'], ['city']]}, ['team', 'jersey_num'])
281-
normalized_entityset = autonormalize.make_entityset(df=df,
282-
dependencies=deps,
283-
name='Sport',
284-
variable_types={'state': SubRegionCode})
269+
def test_make_entityset_custom_args(teams_input):
270+
normalized_entityset = autonormalize.make_entityset(df=teams_input.get_df(),
271+
dependencies=teams_input.get_deps(),
272+
name='Teams',
273+
variable_types={'state': Text})
285274

286275
dic_one = {'team_jersey_num': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
287276
'team': ['Red', 'Red', 'Red', 'Orange', 'Orange', 'Yellow',
@@ -293,10 +282,10 @@ def test_make_entityset_custom_args():
293282
'city': ['austin', 'boston', 'chicago', 'boston', 'honolulu']}
294283

295284
dic_three = {'city': ['austin', 'boston', 'chicago', 'honolulu'],
296-
'state': ['US-TX', 'US-MA', 'US-IL', 'US-HI', ]}
285+
'state': ['TX', 'MA', 'IL', 'HI']}
297286

298287
assert len(normalized_entityset.entities) == 3
299-
assert normalized_entityset.id == 'Sport'
288+
assert normalized_entityset.id == 'Teams'
300289

301290
assert normalized_entityset.entities[0].df.equals(pd.DataFrame(dic_one))
302291
assert normalized_entityset.entities[1].df.equals(pd.DataFrame(
@@ -313,19 +302,11 @@ def test_make_entityset_custom_args():
313302
assert normalized_entityset.entities[1].variable_types['city'] == Id
314303

315304
assert normalized_entityset.entities[2].variable_types['city'] == Index
316-
assert normalized_entityset.entities[2].variable_types['state'] == SubRegionCode
305+
assert normalized_entityset.entities[2].variable_types['state'] == Text
317306

318307

319-
def test_auto_entityset_default_args():
320-
dic = {'team': ['Red', 'Red', 'Red', 'Orange', 'Orange', 'Yellow',
321-
'Yellow', 'Green', 'Green', 'Blue'],
322-
'jersey_num': [1, 2, 3, 1, 2, 1, 5, 8, 2, 2],
323-
'player_name': ['A', 'B', 'C', 'D', 'A', 'E', 'B', 'A', 'G', 'H'],
324-
'city': ['boston', 'boston', 'boston', 'chicago', 'chicago',
325-
'honolulu', 'honolulu', 'boston', 'boston', 'austin'],
326-
'state': ['US-MA', 'US-MA', 'US-MA', 'US-IL', 'US-IL', 'US-HI', 'US-HI', 'US-MA', 'US-MA', 'US-TX']}
327-
df = pd.DataFrame(dic)
328-
normalized_entityset = autonormalize.auto_entityset(df)
308+
def test_auto_entityset_default_args(teams_input):
309+
normalized_entityset = autonormalize.auto_entityset(teams_input.get_df())
329310

330311
dic_one = {'jersey_num_team': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
331312
'team': ['Red', 'Red', 'Red', 'Orange', 'Orange', 'Yellow',
@@ -337,7 +318,7 @@ def test_auto_entityset_default_args():
337318
'city': ['austin', 'boston', 'chicago', 'boston', 'honolulu']}
338319

339320
dic_three = {'city': ['austin', 'boston', 'chicago', 'honolulu'],
340-
'state': ['US-TX', 'US-MA', 'US-IL', 'US-HI', ]}
321+
'state': ['TX', 'MA', 'IL', 'HI']}
341322

342323
assert len(normalized_entityset.entities) == 3
343324

@@ -359,20 +340,12 @@ def test_auto_entityset_default_args():
359340
assert normalized_entityset.entities[2].variable_types['state'] == Categorical
360341

361342

362-
def test_auto_entityset_custom_args():
363-
dic = {'team': ['Red', 'Red', 'Red', 'Orange', 'Orange', 'Yellow',
364-
'Yellow', 'Green', 'Green', 'Blue'],
365-
'jersey_num': [1, 2, 3, 1, 2, 1, 5, 8, 2, 2],
366-
'player_name': ['A', 'B', 'C', 'D', 'A', 'E', 'B', 'A', 'G', 'H'],
367-
'city': ['boston', 'boston', 'boston', 'chicago', 'chicago',
368-
'honolulu', 'honolulu', 'boston', 'boston', 'austin'],
369-
'state': ['US-MA', 'US-MA', 'US-MA', 'US-IL', 'US-IL', 'US-HI', 'US-HI', 'US-MA', 'US-MA', 'US-TX']}
370-
df = pd.DataFrame(dic)
371-
normalized_entityset = autonormalize.auto_entityset(df=df,
372-
name='Sport',
373-
variable_types={'state': SubRegionCode})
343+
def test_auto_entityset_custom_args(teams_input):
344+
normalized_entityset = autonormalize.auto_entityset(df=teams_input.get_df(),
345+
name='Teams',
346+
variable_types={'state': Text})
374347

375-
dic_one = {'team_jersey_num': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
348+
dic_one = {'jersey_num_team': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
376349
'team': ['Red', 'Red', 'Red', 'Orange', 'Orange', 'Yellow',
377350
'Yellow', 'Green', 'Green', 'Blue'],
378351
'jersey_num': [1, 2, 3, 1, 2, 1, 5, 8, 2, 2],
@@ -382,10 +355,10 @@ def test_auto_entityset_custom_args():
382355
'city': ['austin', 'boston', 'chicago', 'boston', 'honolulu']}
383356

384357
dic_three = {'city': ['austin', 'boston', 'chicago', 'honolulu'],
385-
'state': ['US-TX', 'US-MA', 'US-IL', 'US-HI', ]}
358+
'state': ['TX', 'MA', 'IL', 'HI']}
386359

387360
assert len(normalized_entityset.entities) == 3
388-
assert normalized_entityset.id == 'Sport'
361+
assert normalized_entityset.id == 'Teams'
389362

390363
assert normalized_entityset.entities[0].df.equals(pd.DataFrame(dic_one))
391364
assert normalized_entityset.entities[1].df.equals(pd.DataFrame(
@@ -402,4 +375,4 @@ def test_auto_entityset_custom_args():
402375
assert normalized_entityset.entities[1].variable_types['city'] == Id
403376

404377
assert normalized_entityset.entities[2].variable_types['city'] == Index
405-
assert normalized_entityset.entities[2].variable_types['state'] == SubRegionCode
378+
assert normalized_entityset.entities[2].variable_types['state'] == Text

0 commit comments

Comments
 (0)