|
| 1 | +from datetime import datetime |
1 | 2 | import os |
2 | 3 | import logging |
3 | 4 | from types import NoneType |
@@ -98,12 +99,19 @@ def do_nothing(obj): |
98 | 99 |
|
99 | 100 | TYPE_TO_CONVERT_FUNCTION_MAP = { |
100 | 101 | str: to_string, |
| 102 | + int: to_numpy_int64, |
| 103 | + float: to_numpy_float64, |
| 104 | + bool: to_numpy_bool, |
| 105 | + datetime: to_pandas_timestamp, |
| 106 | + bson.ObjectId: to_string, |
| 107 | + bson.Decimal128: to_numpy_float64, |
| 108 | + np.int32: to_numpy_int64, |
101 | 109 | np.int64: to_numpy_int64, |
102 | 110 | bson.int64.Int64: to_numpy_int64, |
103 | 111 | np.bool_: to_numpy_bool, |
104 | 112 | np.float64: to_numpy_float64, |
105 | 113 | bson.Decimal128: to_numpy_float64, |
106 | | - pd.Timestamp: to_pandas_timestamp, |
| 114 | + pd.Timestamp: to_pandas_timestamp |
107 | 115 | } |
108 | 116 |
|
109 | 117 | COLUMN_DTYPE_CONVERSION_MAP = { |
@@ -287,19 +295,19 @@ def process_dataframe(table_name_param: str, df: pd.DataFrame): |
287 | 295 | #if current_item_type != schema_of_this_column[TYPE_KEY]: |
288 | 296 | expected_type = schema_of_this_column[TYPE_KEY] |
289 | 297 | for item in df[col_name]: |
290 | | - if not isinstance(type(item), expected_type): |
| 298 | + current_column_name = col_name |
| 299 | + if not isinstance(item, expected_type): |
291 | 300 | logger.debug( |
292 | 301 | f" item type detected: current item is {item} of type={type(item)}, expected item type from schema= {expected_type}" |
293 | 302 | ) |
294 | 303 | conversion_fcn = TYPE_TO_CONVERT_FUNCTION_MAP.get( |
295 | 304 | expected_type, do_nothing |
296 | 305 | ) |
297 | 306 |
|
298 | | - # Set the current column name for logging |
299 | | - current_column_name = col_name |
300 | | - df[col_name] = df[col_name].apply(conversion_fcn) |
301 | | - print(df[col_name]) |
302 | | - break |
| 307 | + # Set the current column name for logging |
| 308 | + df[col_name] = df[col_name].apply(conversion_fcn) |
| 309 | + print(df[col_name]) |
| 310 | + break |
303 | 311 | # for index, item in enumerate(df[col_name]): |
304 | 312 | # print(f"Row {index}: Value={item}, Type={type(item)}") |
305 | 313 |
|
|
0 commit comments