@@ -70,7 +70,7 @@ def normalize_dataframe(df, dependencies):
70
70
return depdf .return_dfs ()
71
71
72
72
73
- def make_entityset (df , dependencies , name = None , time_index = None ):
73
+ def make_entityset (df , dependencies , name = None , time_index = None , variable_types = None ):
74
74
"""
75
75
Creates a normalized EntitySet from df based on the dependencies given.
76
76
Keys for the newly created DataFrames can only be columns that are strings,
@@ -82,6 +82,10 @@ def make_entityset(df, dependencies, name=None, time_index=None):
82
82
df (pd.DataFrame) : dataframe to normalize and make entity set from
83
83
dependencies (Dependenies) : the dependencies discovered in df
84
84
name (str, optional) : the name of created EntitySet
85
+ time_index (str, optional) : name of time column in the dataframe
86
+ variable_types (dict[str -> Variable], optional):
87
+ Keys are of variable ids and values are variable types. Used to
88
+ initialize an entity's store.
85
89
86
90
Returns:
87
91
entityset (ft.EntitySet) : created entity set
@@ -97,10 +101,14 @@ def make_entityset(df, dependencies, name=None, time_index=None):
97
101
98
102
while stack != []:
99
103
current = stack .pop ()
104
+ if variable_types is not None :
105
+ entity_variable_types = {col : variable_types [col ] for col in current .df .columns if col in variable_types }
106
+ else :
107
+ entity_variable_types = None
100
108
if time_index in current .df .columns :
101
- entities [current .index [0 ]] = (current .df , current .index [0 ], time_index )
109
+ entities [current .index [0 ]] = (current .df , current .index [0 ], time_index , entity_variable_types )
102
110
else :
103
- entities [current .index [0 ]] = (current .df , current .index [0 ])
111
+ entities [current .index [0 ]] = (current .df , current .index [0 ], None , entity_variable_types )
104
112
for child in current .children :
105
113
# add to stack
106
114
# add relationship
@@ -110,7 +118,7 @@ def make_entityset(df, dependencies, name=None, time_index=None):
110
118
return ft .EntitySet (name , entities , relationships )
111
119
112
120
113
- def auto_entityset (df , accuracy = 0.98 , index = None , name = None , time_index = None ):
121
+ def auto_entityset (df , accuracy = 0.98 , index = None , name = None , time_index = None , variable_types = None ):
114
122
"""
115
123
Creates a normalized entityset from a dataframe.
116
124
@@ -126,13 +134,17 @@ def auto_entityset(df, accuracy=0.98, index=None, name=None, time_index=None):
126
134
127
135
name (str, optional) : the name of created EntitySet
128
136
129
- time_index (str, optional) : name of time column in the dataframe.
137
+ time_index (str, optional) : name of time column in the dataframe
138
+
139
+ variable_types (dict[str -> Variable], optional):
140
+ Keys are of variable ids and values are variable types. Used to
141
+ initialize an entity's store
130
142
131
143
Returns:
132
144
133
145
entityset (ft.EntitySet) : created entity set
134
146
"""
135
- return make_entityset (df , find_dependencies (df , accuracy , index ), name , time_index )
147
+ return make_entityset (df , find_dependencies (df , accuracy , index ), name , time_index , variable_types )
136
148
137
149
138
150
def auto_normalize (df ):
@@ -169,5 +181,6 @@ def normalize_entity(es, accuracy=0.98):
169
181
if len (es .entities ) == 0 :
170
182
raise ValueError ('This EntitySet is empty' )
171
183
entity = es .entities [0 ]
172
- new_es = auto_entityset (entity .df , accuracy , index = entity .index , name = es .id , time_index = entity .time_index )
184
+ new_es = auto_entityset (entity .df , accuracy , index = entity .index , name = es .id , time_index = entity .time_index ,
185
+ variable_types = entity .variable_types )
173
186
return new_es
0 commit comments