Skip to content

Commit 46c2a19

Browse files
committed
Initial tests for make_entityset and auto_entityset
1 parent db8257a commit 46c2a19

File tree

1 file changed

+192
-2
lines changed

1 file changed

+192
-2
lines changed

autonormalize/tests/test_normalize.py

Lines changed: 192 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import pandas as pd
22
import featuretools as ft
33

4-
from featuretools.variable_types import ZIPCode, Index, Datetime, Numeric, DatetimeTimeIndex, Categorical, Id
4+
from featuretools.variable_types import ZIPCode, Index, Datetime, Numeric, DatetimeTimeIndex, Categorical, Id, \
5+
SubRegionCode
56
from pandas.util.testing import assert_frame_equal
67

78
from autonormalize import classes, normalize, autonormalize
@@ -191,7 +192,7 @@ def test_variable_types():
191192
entityset.entity_from_dataframe(entity_id='Customer Transactions',
192193
dataframe=df,
193194
time_index='transaction_time',
194-
variable_types={"zip_code": ZIPCode})
195+
variable_types={'zip_code': ZIPCode})
195196

196197
normalized_entityset = autonormalize.normalize_entity(entityset)
197198

@@ -213,3 +214,192 @@ def test_variable_types():
213214
assert normalized_entityset['customer_id'].variable_types['join_date'] == Datetime
214215
assert normalized_entityset['customer_id'].variable_types['date_of_birth'] == Datetime
215216
assert normalized_entityset['customer_id'].variable_types['zip_code'] == ZIPCode
217+
218+
219+
def test_make_entityset_default_args():
220+
dic = {'team': ['Red', 'Red', 'Red', 'Orange', 'Orange', 'Yellow',
221+
'Yellow', 'Green', 'Green', 'Blue'],
222+
'jersey_num': [1, 2, 3, 1, 2, 1, 5, 8, 2, 2],
223+
'player_name': ['A', 'B', 'C', 'D', 'A', 'E', 'B', 'A', 'G', 'H'],
224+
'city': ['boston', 'boston', 'boston', 'chicago', 'chicago',
225+
'honolulu', 'honolulu', 'boston', 'boston', 'austin'],
226+
'state': ['US-MA', 'US-MA', 'US-MA', 'US-IL', 'US-IL', 'US-HI', 'US-HI', 'US-MA', 'US-MA', 'US-TX']}
227+
df = pd.DataFrame(dic)
228+
deps = classes.Dependencies({'team': [['player_name', 'jersey_num']],
229+
'jersey_num': [['player_name', 'team']],
230+
'player_name': [['team', 'jersey_num']],
231+
'city': [['team'], ['state'], ['player_name', 'jersey_num']],
232+
'state': [['team'], ['player_name', 'jersey_num'], ['city']]}, ['team', 'jersey_num'])
233+
normalized_entityset = autonormalize.make_entityset(df, deps)
234+
235+
dic_one = {'team_jersey_num': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
236+
'team': ['Red', 'Red', 'Red', 'Orange', 'Orange', 'Yellow',
237+
'Yellow', 'Green', 'Green', 'Blue'],
238+
'jersey_num': [1, 2, 3, 1, 2, 1, 5, 8, 2, 2],
239+
'player_name': ['A', 'B', 'C', 'D', 'A', 'E', 'B', 'A', 'G', 'H']}
240+
241+
dic_two = {'team': ['Blue', 'Green', 'Orange', 'Red', 'Yellow'],
242+
'city': ['austin', 'boston', 'chicago', 'boston', 'honolulu']}
243+
244+
dic_three = {'city': ['austin', 'boston', 'chicago', 'honolulu'],
245+
'state': ['US-TX', 'US-MA', 'US-IL', 'US-HI', ]}
246+
247+
assert len(normalized_entityset.entities) == 3
248+
249+
assert normalized_entityset.entities[0].df.equals(pd.DataFrame(dic_one))
250+
assert normalized_entityset.entities[1].df.equals(pd.DataFrame(
251+
dic_two, index=['Blue', 'Green', 'Orange', 'Red', 'Yellow']))
252+
assert normalized_entityset.entities[2].df.equals(pd.DataFrame(
253+
dic_three, index=['austin', 'boston', 'chicago', 'honolulu']))
254+
255+
assert normalized_entityset.entities[0].variable_types['team_jersey_num'] == Index
256+
assert normalized_entityset.entities[0].variable_types['team'] == Id
257+
assert normalized_entityset.entities[0].variable_types['jersey_num'] == Numeric
258+
assert normalized_entityset.entities[0].variable_types['player_name'] == Categorical
259+
260+
assert normalized_entityset.entities[1].variable_types['team'] == Index
261+
assert normalized_entityset.entities[1].variable_types['city'] == Id
262+
263+
assert normalized_entityset.entities[2].variable_types['city'] == Index
264+
assert normalized_entityset.entities[2].variable_types['state'] == Categorical
265+
266+
267+
def test_make_entityset_custom_args():
268+
dic = {'team': ['Red', 'Red', 'Red', 'Orange', 'Orange', 'Yellow',
269+
'Yellow', 'Green', 'Green', 'Blue'],
270+
'jersey_num': [1, 2, 3, 1, 2, 1, 5, 8, 2, 2],
271+
'player_name': ['A', 'B', 'C', 'D', 'A', 'E', 'B', 'A', 'G', 'H'],
272+
'city': ['boston', 'boston', 'boston', 'chicago', 'chicago',
273+
'honolulu', 'honolulu', 'boston', 'boston', 'austin'],
274+
'state': ['US-MA', 'US-MA', 'US-MA', 'US-IL', 'US-IL', 'US-HI', 'US-HI', 'US-MA', 'US-MA', 'US-TX']}
275+
df = pd.DataFrame(dic)
276+
deps = classes.Dependencies({'team': [['player_name', 'jersey_num']],
277+
'jersey_num': [['player_name', 'team']],
278+
'player_name': [['team', 'jersey_num']],
279+
'city': [['team'], ['state'], ['player_name', 'jersey_num']],
280+
'state': [['team'], ['player_name', 'jersey_num'], ['city']]}, ['team', 'jersey_num'])
281+
normalized_entityset = autonormalize.make_entityset(df=df,
282+
dependencies=deps,
283+
name='Sport',
284+
variable_types={'state': SubRegionCode})
285+
286+
dic_one = {'team_jersey_num': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
287+
'team': ['Red', 'Red', 'Red', 'Orange', 'Orange', 'Yellow',
288+
'Yellow', 'Green', 'Green', 'Blue'],
289+
'jersey_num': [1, 2, 3, 1, 2, 1, 5, 8, 2, 2],
290+
'player_name': ['A', 'B', 'C', 'D', 'A', 'E', 'B', 'A', 'G', 'H']}
291+
292+
dic_two = {'team': ['Blue', 'Green', 'Orange', 'Red', 'Yellow'],
293+
'city': ['austin', 'boston', 'chicago', 'boston', 'honolulu']}
294+
295+
dic_three = {'city': ['austin', 'boston', 'chicago', 'honolulu'],
296+
'state': ['US-TX', 'US-MA', 'US-IL', 'US-HI', ]}
297+
298+
assert len(normalized_entityset.entities) == 3
299+
assert normalized_entityset.id == 'Sport'
300+
301+
assert normalized_entityset.entities[0].df.equals(pd.DataFrame(dic_one))
302+
assert normalized_entityset.entities[1].df.equals(pd.DataFrame(
303+
dic_two, index=['Blue', 'Green', 'Orange', 'Red', 'Yellow']))
304+
assert normalized_entityset.entities[2].df.equals(pd.DataFrame(
305+
dic_three, index=['austin', 'boston', 'chicago', 'honolulu']))
306+
307+
assert normalized_entityset.entities[0].variable_types['team_jersey_num'] == Index
308+
assert normalized_entityset.entities[0].variable_types['team'] == Id
309+
assert normalized_entityset.entities[0].variable_types['jersey_num'] == Numeric
310+
assert normalized_entityset.entities[0].variable_types['player_name'] == Categorical
311+
312+
assert normalized_entityset.entities[1].variable_types['team'] == Index
313+
assert normalized_entityset.entities[1].variable_types['city'] == Id
314+
315+
assert normalized_entityset.entities[2].variable_types['city'] == Index
316+
assert normalized_entityset.entities[2].variable_types['state'] == SubRegionCode
317+
318+
319+
def test_auto_entityset_default_args():
320+
dic = {'team': ['Red', 'Red', 'Red', 'Orange', 'Orange', 'Yellow',
321+
'Yellow', 'Green', 'Green', 'Blue'],
322+
'jersey_num': [1, 2, 3, 1, 2, 1, 5, 8, 2, 2],
323+
'player_name': ['A', 'B', 'C', 'D', 'A', 'E', 'B', 'A', 'G', 'H'],
324+
'city': ['boston', 'boston', 'boston', 'chicago', 'chicago',
325+
'honolulu', 'honolulu', 'boston', 'boston', 'austin'],
326+
'state': ['US-MA', 'US-MA', 'US-MA', 'US-IL', 'US-IL', 'US-HI', 'US-HI', 'US-MA', 'US-MA', 'US-TX']}
327+
df = pd.DataFrame(dic)
328+
normalized_entityset = autonormalize.auto_entityset(df)
329+
330+
dic_one = {'jersey_num_team': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
331+
'team': ['Red', 'Red', 'Red', 'Orange', 'Orange', 'Yellow',
332+
'Yellow', 'Green', 'Green', 'Blue'],
333+
'jersey_num': [1, 2, 3, 1, 2, 1, 5, 8, 2, 2],
334+
'player_name': ['A', 'B', 'C', 'D', 'A', 'E', 'B', 'A', 'G', 'H']}
335+
336+
dic_two = {'team': ['Blue', 'Green', 'Orange', 'Red', 'Yellow'],
337+
'city': ['austin', 'boston', 'chicago', 'boston', 'honolulu']}
338+
339+
dic_three = {'city': ['austin', 'boston', 'chicago', 'honolulu'],
340+
'state': ['US-TX', 'US-MA', 'US-IL', 'US-HI', ]}
341+
342+
assert len(normalized_entityset.entities) == 3
343+
344+
assert normalized_entityset.entities[0].df.equals(pd.DataFrame(dic_one))
345+
assert normalized_entityset.entities[1].df.equals(pd.DataFrame(
346+
dic_two, index=['Blue', 'Green', 'Orange', 'Red', 'Yellow']))
347+
assert normalized_entityset.entities[2].df.equals(pd.DataFrame(
348+
dic_three, index=['austin', 'boston', 'chicago', 'honolulu']))
349+
350+
assert normalized_entityset.entities[0].variable_types['jersey_num_team'] == Index
351+
assert normalized_entityset.entities[0].variable_types['team'] == Id
352+
assert normalized_entityset.entities[0].variable_types['jersey_num'] == Numeric
353+
assert normalized_entityset.entities[0].variable_types['player_name'] == Categorical
354+
355+
assert normalized_entityset.entities[1].variable_types['team'] == Index
356+
assert normalized_entityset.entities[1].variable_types['city'] == Id
357+
358+
assert normalized_entityset.entities[2].variable_types['city'] == Index
359+
assert normalized_entityset.entities[2].variable_types['state'] == Categorical
360+
361+
362+
def test_auto_entityset_custom_args():
363+
dic = {'team': ['Red', 'Red', 'Red', 'Orange', 'Orange', 'Yellow',
364+
'Yellow', 'Green', 'Green', 'Blue'],
365+
'jersey_num': [1, 2, 3, 1, 2, 1, 5, 8, 2, 2],
366+
'player_name': ['A', 'B', 'C', 'D', 'A', 'E', 'B', 'A', 'G', 'H'],
367+
'city': ['boston', 'boston', 'boston', 'chicago', 'chicago',
368+
'honolulu', 'honolulu', 'boston', 'boston', 'austin'],
369+
'state': ['US-MA', 'US-MA', 'US-MA', 'US-IL', 'US-IL', 'US-HI', 'US-HI', 'US-MA', 'US-MA', 'US-TX']}
370+
df = pd.DataFrame(dic)
371+
normalized_entityset = autonormalize.auto_entityset(df=df,
372+
name='Sport',
373+
variable_types={'state': SubRegionCode})
374+
375+
dic_one = {'team_jersey_num': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
376+
'team': ['Red', 'Red', 'Red', 'Orange', 'Orange', 'Yellow',
377+
'Yellow', 'Green', 'Green', 'Blue'],
378+
'jersey_num': [1, 2, 3, 1, 2, 1, 5, 8, 2, 2],
379+
'player_name': ['A', 'B', 'C', 'D', 'A', 'E', 'B', 'A', 'G', 'H']}
380+
381+
dic_two = {'team': ['Blue', 'Green', 'Orange', 'Red', 'Yellow'],
382+
'city': ['austin', 'boston', 'chicago', 'boston', 'honolulu']}
383+
384+
dic_three = {'city': ['austin', 'boston', 'chicago', 'honolulu'],
385+
'state': ['US-TX', 'US-MA', 'US-IL', 'US-HI', ]}
386+
387+
assert len(normalized_entityset.entities) == 3
388+
assert normalized_entityset.id == 'Sport'
389+
390+
assert normalized_entityset.entities[0].df.equals(pd.DataFrame(dic_one))
391+
assert normalized_entityset.entities[1].df.equals(pd.DataFrame(
392+
dic_two, index=['Blue', 'Green', 'Orange', 'Red', 'Yellow']))
393+
assert normalized_entityset.entities[2].df.equals(pd.DataFrame(
394+
dic_three, index=['austin', 'boston', 'chicago', 'honolulu']))
395+
396+
assert normalized_entityset.entities[0].variable_types['jersey_num_team'] == Index
397+
assert normalized_entityset.entities[0].variable_types['team'] == Id
398+
assert normalized_entityset.entities[0].variable_types['jersey_num'] == Numeric
399+
assert normalized_entityset.entities[0].variable_types['player_name'] == Categorical
400+
401+
assert normalized_entityset.entities[1].variable_types['team'] == Index
402+
assert normalized_entityset.entities[1].variable_types['city'] == Id
403+
404+
assert normalized_entityset.entities[2].variable_types['city'] == Index
405+
assert normalized_entityset.entities[2].variable_types['state'] == SubRegionCode

0 commit comments

Comments
 (0)