Skip to content

Commit 3da0f4c

Browse files
committed
Update schema utils
1 parent 66b0d65 commit 3da0f4c

File tree

1 file changed

+15
-7
lines changed

1 file changed

+15
-7
lines changed

schema_utils.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from datetime import datetime
12
import os
23
import logging
34
from types import NoneType
@@ -98,12 +99,19 @@ def do_nothing(obj):
9899

99100
TYPE_TO_CONVERT_FUNCTION_MAP = {
100101
str: to_string,
102+
int: to_numpy_int64,
103+
float: to_numpy_float64,
104+
bool: to_numpy_bool,
105+
datetime: to_pandas_timestamp,
106+
bson.ObjectId: to_string,
107+
bson.Decimal128: to_numpy_float64,
108+
np.int32: to_numpy_int64,
101109
np.int64: to_numpy_int64,
102110
bson.int64.Int64: to_numpy_int64,
103111
np.bool_: to_numpy_bool,
104112
np.float64: to_numpy_float64,
105113
bson.Decimal128: to_numpy_float64,
106-
pd.Timestamp: to_pandas_timestamp,
114+
pd.Timestamp: to_pandas_timestamp
107115
}
108116

109117
COLUMN_DTYPE_CONVERSION_MAP = {
@@ -287,19 +295,19 @@ def process_dataframe(table_name_param: str, df: pd.DataFrame):
287295
#if current_item_type != schema_of_this_column[TYPE_KEY]:
288296
expected_type = schema_of_this_column[TYPE_KEY]
289297
for item in df[col_name]:
290-
if not isinstance(type(item), expected_type):
298+
current_column_name = col_name
299+
if not isinstance(item, expected_type):
291300
logger.debug(
292301
f" item type detected: current item is {item} of type={type(item)}, expected item type from schema= {expected_type}"
293302
)
294303
conversion_fcn = TYPE_TO_CONVERT_FUNCTION_MAP.get(
295304
expected_type, do_nothing
296305
)
297306

298-
# Set the current column name for logging
299-
current_column_name = col_name
300-
df[col_name] = df[col_name].apply(conversion_fcn)
301-
print(df[col_name])
302-
break
307+
# Set the current column name for logging
308+
df[col_name] = df[col_name].apply(conversion_fcn)
309+
print(df[col_name])
310+
break
303311
# for index, item in enumerate(df[col_name]):
304312
# print(f"Row {index}: Value={item}, Type={type(item)}")
305313

0 commit comments

Comments
 (0)