@@ -33,11 +33,12 @@ def _create_iceberg_table(
3333 encryption : Optional [str ] = None ,
3434 kms_key : Optional [str ] = None ,
3535 boto3_session : Optional [boto3 .Session ] = None ,
36+ dtype : Optional [Dict [str , str ]] = None ,
3637) -> None :
3738 if not path :
3839 raise exceptions .InvalidArgumentValue ("Must specify table location to create the table." )
3940
40- columns_types , _ = catalog .extract_athena_types (df = df , index = index )
41+ columns_types , _ = catalog .extract_athena_types (df = df , index = index , dtype = dtype )
4142 cols_str : str = ", " .join ([f"{ k } { v } " for k , v in columns_types .items ()])
4243 partition_cols_str : str = f"PARTITIONED BY ({ ', ' .join ([col for col in partition_cols ])} )" if partition_cols else ""
4344 table_properties_str : str = (
@@ -86,6 +87,7 @@ def to_iceberg(
8687 boto3_session : Optional [boto3 .Session ] = None ,
8788 s3_additional_kwargs : Optional [Dict [str , Any ]] = None ,
8889 additional_table_properties : Optional [Dict [str , Any ]] = None ,
90+ dtype : Optional [Dict [str , str ]] = None ,
8991) -> None :
9092 """
9193 Insert into Athena Iceberg table using INSERT INTO ... SELECT. Will create Iceberg table if it does not exist.
@@ -133,6 +135,10 @@ def to_iceberg(
133135 e.g. additional_table_properties={'write_target_data_file_size_bytes': '536870912'}
134136
135137 https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-table-properties
138+ dtype: Optional[Dict[str, str]]
139+ Dictionary of columns names and Athena/Glue types to be casted.
140+ Useful when you have columns with undetermined or mixed data types.
141+ e.g. {'col name': 'bigint', 'col2 name': 'int'}
136142
137143 Returns
138144 -------
@@ -192,6 +198,7 @@ def to_iceberg(
192198 encryption = encryption ,
193199 kms_key = kms_key ,
194200 boto3_session = boto3_session ,
201+ dtype = dtype ,
195202 )
196203
197204 # Create temporary external table, write the results
@@ -203,6 +210,7 @@ def to_iceberg(
203210 table = temp_table ,
204211 boto3_session = boto3_session ,
205212 s3_additional_kwargs = s3_additional_kwargs ,
213+ dtype = dtype ,
206214 )
207215
208216 # Insert into iceberg table
0 commit comments