Skip to content

Commit 4dd5df1

Browse files
committed
normalize an entityset with a single en tity
1 parent 989d103 commit 4dd5df1

File tree

1 file changed

+25
-0
lines changed

1 file changed

+25
-0
lines changed

autonormalize/autonormalize.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,3 +146,28 @@ def auto_normalize(df):
146146
new_dfs (list[pd.DataFrame]) : list of new dataframes
147147
"""
148148
return normalize_dataframe(df, find_dependencies(df))
149+
150+
151+
def normalize_entity(es, accuracy=0.98):
152+
"""
153+
Returns a new normalized EntitySet from an EntitySet with a single entity.
154+
155+
Arguments:
156+
es (ft.EntitySet) : EntitySet to normalize
157+
accuracy (0 < float <= 1.00; default = 0.98) : the accuracy threshold
158+
required in order to conclude a dependency (i.e. with accuracy = 0.98,
159+
0.98 of the rows must hold true the dependency LHS --> RHS)
160+
161+
Returns:
162+
new_es (ft.EntitySet) : new normalized EntitySet
163+
"""
164+
# TO DO: add option to pass an EntitySet with more than one entity, and specify which one
165+
# to normalize while preserving existing relationships
166+
167+
if len(es.entities()) > 1:
168+
raise ValueError('There is more than one entity in this EntitySet')
169+
if len(es.entities()) == 0:
170+
raise ValueError('This EntitySet is empty')
171+
entity = es.entities()[0]
172+
new_es = auto_entityset(entity.df, accuracy, index=entity.index, name=es.id, time_index=entity.time_index)
173+
return new_es

0 commit comments

Comments
 (0)