Skip to content

Commit 47a92bb

Browse files
committed
Solves issue #3
1 parent 2f0c4eb commit 47a92bb

File tree

1 file changed

+31
-0
lines changed

1 file changed

+31
-0
lines changed

python/helper_functions.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from pandas.api.types import is_string_dtype, is_numeric_dtype, is_categorical_dtype
2+
3+
4+
def metadata_from_dataframe(df):
5+
"""Build metadata from a dataset
6+
7+
Args:
8+
df (:class:`pandas.DataFrame`):
9+
File path of the serialized model. It must be a file that can be
10+
loaded using :mod:`joblib`
11+
12+
Returns:
13+
dict:
14+
Information about the variables of the dataset.
15+
16+
Raises:
17+
ValueError: If a type of variable is not handled by this function
18+
"""
19+
metadata = []
20+
for c in df.columns:
21+
if is_categorical_dtype(df[c]):
22+
tmp = {'name': c, 'type': 'category',
23+
'categories': sorted(df[c].dtype.categories.values.tolist())})
24+
elif is_numeric_dtype(df[c]):
25+
tmp = {'name': c, 'type': 'numeric'}
26+
elif is_string_dtype(df[c]):
27+
tmp = {'name': c, 'type': 'string'}
28+
else:
29+
raise ValueError('Unknown type for {}'.format(c))
30+
metadata.append(tmp)
31+
return metadata

0 commit comments

Comments
 (0)