Skip to content

Commit 320294b

Browse files
author
Bob Strahan
committed
Fix Glue table updates to handle location changes in addition to schema changes
1 parent 4cf5578 commit 320294b

File tree

1 file changed

+42
-11
lines changed

1 file changed

+42
-11
lines changed

lib/idp_common_pkg/idp_common/reporting/save_reporting_data.py

Lines changed: 42 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -428,16 +428,34 @@ def _create_or_update_glue_table(
428428
existing_column_names = {col["Name"] for col in existing_columns}
429429
new_column_names = {col["Name"] for col in columns}
430430

431-
# If there are new columns, update the table
432-
if new_column_names - existing_column_names:
433-
logger.info(f"Updating Glue table {table_name} with new columns")
431+
# Check if location has changed
432+
existing_location = (
433+
existing_table.get("Table", {})
434+
.get("StorageDescriptor", {})
435+
.get("Location", "")
436+
)
437+
new_location = table_input["StorageDescriptor"]["Location"]
438+
439+
# Check if columns or location have changed
440+
columns_changed = bool(new_column_names - existing_column_names)
441+
location_changed = existing_location != new_location
442+
443+
# If there are new columns or location has changed, update the table
444+
if columns_changed or location_changed:
445+
if columns_changed:
446+
logger.info(f"Updating Glue table {table_name} with new columns")
447+
if location_changed:
448+
logger.info(
449+
f"Updating Glue table {table_name} with new location: {existing_location} -> {new_location}"
450+
)
451+
434452
self.glue_client.update_table(
435453
DatabaseName=self.database_name, TableInput=table_input
436454
)
437455
return True
438456
else:
439457
logger.debug(
440-
f"Glue table {table_name} already exists with current schema"
458+
f"Glue table {table_name} already exists with current schema and location"
441459
)
442460
return False
443461

@@ -863,21 +881,34 @@ def _create_or_update_metering_glue_table(self, schema: pa.Schema) -> bool:
863881

864882
try:
865883
# Check if table exists
866-
self.glue_client.get_table(DatabaseName=self.database_name, Name=table_name)
884+
existing_table_response = self.glue_client.get_table(
885+
DatabaseName=self.database_name, Name=table_name
886+
)
867887

868888
# Table exists, check if we need to update it
869-
existing_table = self.glue_client.get_table(
870-
DatabaseName=self.database_name, Name=table_name
871-
)["Table"]
889+
existing_table = existing_table_response["Table"]
872890
existing_columns = existing_table["StorageDescriptor"]["Columns"]
873891

874892
# Check if new columns need to be added
875893
existing_column_names = {col["Name"] for col in existing_columns}
876894
new_column_names = {col["Name"] for col in columns}
877895

878-
if not new_column_names.issubset(existing_column_names):
879-
# Update table with new columns
880-
logger.info(f"Updating Glue table {table_name} with new columns")
896+
# Check if location has changed
897+
existing_location = existing_table["StorageDescriptor"].get("Location", "")
898+
new_location = table_input["StorageDescriptor"]["Location"]
899+
900+
# Check if columns or location have changed
901+
columns_changed = not new_column_names.issubset(existing_column_names)
902+
location_changed = existing_location != new_location
903+
904+
if columns_changed or location_changed:
905+
if columns_changed:
906+
logger.info(f"Updating Glue table {table_name} with new columns")
907+
if location_changed:
908+
logger.info(
909+
f"Updating Glue table {table_name} with new location: {existing_location} -> {new_location}"
910+
)
911+
881912
self.glue_client.update_table(
882913
DatabaseName=self.database_name, TableInput=table_input
883914
)

0 commit comments

Comments
 (0)