Skip to content

Commit 1ac85a9

Browse files
committed
add parameter
1 parent 29ec58d commit 1ac85a9

File tree

3 files changed

+22
-5
lines changed

3 files changed

+22
-5
lines changed

awswrangler/athena/_write_iceberg.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,13 @@ def _determine_differences(
115115

116116
catalog_column_types = typing.cast(
117117
Dict[str, str],
118-
catalog.get_table_types(database=database, table=table, catalog_id=catalog_id, boto3_session=boto3_session),
118+
catalog.get_table_types(
119+
database=database,
120+
table=table,
121+
catalog_id=catalog_id,
122+
return_iceberg_current=True,
123+
boto3_session=boto3_session,
124+
),
119125
)
120126

121127
original_column_names = set(catalog_column_types)

awswrangler/catalog/_get.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ def get_table_types(
107107
database: str,
108108
table: str,
109109
catalog_id: str | None = None,
110+
return_iceberg_current: bool = False,
110111
boto3_session: boto3.Session | None = None,
111112
) -> dict[str, str] | None:
112113
"""Get all columns and types from a table.
@@ -120,6 +121,9 @@ def get_table_types(
120121
catalog_id
121122
The ID of the Data Catalog from which to retrieve Databases.
122123
If ``None`` is provided, the AWS account ID is used by default.
124+
return_iceberg_current
125+
If True, returns only current iceberg fields (fields marked with iceberg.field.current: true).
126+
Otherwise, returns the all fields. False by default (return all fields).
123127
boto3_session
124128
The default boto3 session will be used if **boto3_session** receive ``None``.
125129
@@ -139,7 +143,10 @@ def get_table_types(
139143
response = client_glue.get_table(**_catalog_id(catalog_id=catalog_id, DatabaseName=database, Name=table))
140144
except client_glue.exceptions.EntityNotFoundException:
141145
return None
142-
return _extract_dtypes_from_table_details(response=response)
146+
return _extract_dtypes_from_table_details(
147+
response=response,
148+
return_iceberg_current=return_iceberg_current,
149+
)
143150

144151

145152
def get_databases(

awswrangler/catalog/_utils.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,16 @@ def _sanitize_name(name: str) -> str:
3131
return re.sub("[^A-Za-z0-9_]+", "_", name).lower() # Replacing non alphanumeric characters by underscore
3232

3333

34-
def _extract_dtypes_from_table_details(response: "GetTableResponseTypeDef") -> dict[str, str]:
34+
def _extract_dtypes_from_table_details(
35+
response: "GetTableResponseTypeDef",
36+
return_iceberg_current: bool = False,
37+
) -> dict[str, str]:
3538
dtypes: dict[str, str] = {}
3639
for col in response["Table"]["StorageDescriptor"]["Columns"]:
37-
# Do not return "hidden" iceberg columns
38-
if col.get("Parameters", {}).get("iceberg.field.current") != "false":
40+
# Only return current fields if flag is enabled
41+
if not return_iceberg_current or col.get("Parameters", {}).get("iceberg.field.current") == "true":
3942
dtypes[col["Name"]] = col["Type"]
43+
# Add partition keys as columns
4044
if "PartitionKeys" in response["Table"]:
4145
for par in response["Table"]["PartitionKeys"]:
4246
dtypes[par["Name"]] = par["Type"]

0 commit comments

Comments
 (0)