From bc1d44f1a66a0ffbc4916831674bfbf426c175d6 Mon Sep 17 00:00:00 2001 From: Abdel Jaidi Date: Tue, 18 Feb 2025 11:53:18 +0000 Subject: [PATCH] feat: add dtype argument to delete_from_iceberg --- awswrangler/athena/_write_iceberg.py | 6 ++++++ tests/unit/test_athena_iceberg.py | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/awswrangler/athena/_write_iceberg.py b/awswrangler/athena/_write_iceberg.py index 52f3743d6..ee3791bf2 100644 --- a/awswrangler/athena/_write_iceberg.py +++ b/awswrangler/athena/_write_iceberg.py @@ -667,6 +667,7 @@ def delete_from_iceberg_table( workgroup: str = "primary", encryption: str | None = None, kms_key: str | None = None, + dtype: dict[str, str] | None = None, boto3_session: boto3.Session | None = None, s3_additional_kwargs: dict[str, Any] | None = None, catalog_id: str | None = None, @@ -702,6 +703,10 @@ def delete_from_iceberg_table( Valid values: [``None``, ``"SSE_S3"``, ``"SSE_KMS"``]. Notice: ``"CSE_KMS"`` is not supported. kms_key For SSE-KMS, this is the KMS key ARN or ID. + dtype + Dictionary of columns names and Athena/Glue types to be casted. + Useful when you have columns with undetermined or mixed data types. + (e.g. {'col name': 'bigint', 'col2 name': 'int'}) boto3_session The default boto3 session will be used if **boto3_session** receive ``None``. s3_additional_kwargs @@ -763,6 +768,7 @@ def delete_from_iceberg_table( boto3_session=boto3_session, s3_additional_kwargs=s3_additional_kwargs, catalog_id=catalog_id, + dtype=dtype, index=False, ) diff --git a/tests/unit/test_athena_iceberg.py b/tests/unit/test_athena_iceberg.py index 927fb77f2..6233e8425 100644 --- a/tests/unit/test_athena_iceberg.py +++ b/tests/unit/test_athena_iceberg.py @@ -870,6 +870,7 @@ def test_athena_delete_from_iceberg_table( "id": [1, 2, 3], "name": ["a", "b", "c"], "ts": [ts("2020-01-01 00:00:00.0"), ts("2020-01-02 00:00:01.0"), ts("2020-01-03 00:00:00.0")], + "empty": [pd.NA, pd.NA, pd.NA], } ) df["id"] = df["id"].astype("Int64") # Cast as nullable int64 type @@ -883,6 +884,7 @@ def test_athena_delete_from_iceberg_table( temp_path=path2, partition_cols=partition_cols, keep_files=False, + dtype={"empty": "string"}, ) wr.athena.delete_from_iceberg_table( @@ -892,6 +894,7 @@ def test_athena_delete_from_iceberg_table( temp_path=path2, merge_cols=["id"], keep_files=False, + dtype={"empty": "string"}, ) df_actual = wr.athena.read_sql_query( @@ -906,10 +909,12 @@ def test_athena_delete_from_iceberg_table( "id": [3], "name": ["c"], "ts": [ts("2020-01-03 00:00:00.0")], + "empty": [pd.NA], } ) df_expected["id"] = df_expected["id"].astype("Int64") # Cast as nullable int64 type df_expected["name"] = df_expected["name"].astype("string") + df_expected["empty"] = df_expected["empty"].astype("string") assert_pandas_equals(df_expected, df_actual)