|
1 | 1 | from datetime import datetime |
2 | 2 | import os |
3 | 3 | import logging |
4 | | -from types import NoneType |
| 4 | +#17June2025 - doesnt work for 3.9 and lesser Python versions |
| 5 | +#from types import NoneType |
5 | 6 | import bson.int64 |
6 | 7 | import pymongo |
7 | 8 | import pandas as pd |
|
30 | 31 |
|
31 | 32 |
|
32 | 33 | logger = logging.getLogger(f"{__name__}") |
| 34 | +#17June2025 - added NoneType manually instead of importing from types for 3.9 and lesser Python versions |
| 35 | +NoneType = type(None) |
33 | 36 |
|
34 | 37 | def _converter_template(obj, type_name, raw_convert_func, default_value=None): |
35 | 38 | original_type = type(obj) |
@@ -228,7 +231,9 @@ def init_table_schema(table_name: str): |
228 | 231 | ) |
229 | 232 | if schema_of_this_table: |
230 | 233 | logger.info(f"loaded schema of {table_name} from file") |
231 | | - schemas.init_table_schema(table_name, schema_of_this_table) |
| 234 | + # schemas.init_table_schema(table_name, schema_of_this_table) |
| 235 | + # 9 May 2025 should not write back to internal schema file |
| 236 | + schemas.init_table_schema_to_mem(table_name, schema_of_this_table) |
232 | 237 | # load column renaming if it exists, otherwise this table has been previously |
233 | 238 | # initiated but no column is renamed, so we don't need to do anything |
234 | 239 | table_column_renaming = read_from_file( |
@@ -282,6 +287,9 @@ def process_dataframe(table_name_param: str, df: pd.DataFrame): |
282 | 287 |
|
283 | 288 |
|
284 | 289 | processed_col_name = schemas.find_column_renaming(table_name, col_name) |
| 290 | + logger.debug( |
| 291 | + f"%%%% Processed col name found: processed_col_name is {processed_col_name} %%%%%" |
| 292 | + ) |
285 | 293 | schema_of_this_column = schemas.get_table_column_schema(table_name, col_name) |
286 | 294 | logger.debug( |
287 | 295 | f"%%%% In process_df: schema_of_this_column is {schema_of_this_column} %%%%%" |
@@ -311,6 +319,8 @@ def process_dataframe(table_name_param: str, df: pd.DataFrame): |
311 | 319 | if processed_col_name and processed_col_name != col_name: |
312 | 320 | df.rename(columns={col_name: processed_col_name}, inplace=True) |
313 | 321 | col_name = processed_col_name |
| 322 | + # May 9 : get schema from file for the renamed column |
| 323 | + schema_of_this_column = schemas.get_table_column_schema(table_name, col_name) |
314 | 324 |
|
315 | 325 | # schema_of_this_column should always exists at this point |
316 | 326 | # existing column or new column with schema appended, process according to schema_of_this_column |
|
0 commit comments