1414from awswrangler ._config import apply_configs
1515from awswrangler .s3 ._delete import delete_objects
1616from awswrangler .s3 ._fs import open_s3_object
17- from awswrangler .s3 ._write import _COMPRESSION_2_EXT , _apply_dtype , _sanitize , _validate_args
17+ from awswrangler .s3 ._write import _COMPRESSION_2_EXT , _apply_dtype , _check_schema_changes , _sanitize , _validate_args
1818from awswrangler .s3 ._write_dataset import _to_dataset
1919
2020_logger : logging .Logger = logging .getLogger (__name__ )
@@ -87,6 +87,7 @@ def to_csv( # pylint: disable=too-many-arguments,too-many-locals,too-many-state
8787 concurrent_partitioning : bool = False ,
8888 mode : Optional [str ] = None ,
8989 catalog_versioning : bool = False ,
90+ schema_evolution : bool = False ,
9091 database : Optional [str ] = None ,
9192 table : Optional [str ] = None ,
9293 dtype : Optional [Dict [str , str ]] = None ,
@@ -182,6 +183,11 @@ def to_csv( # pylint: disable=too-many-arguments,too-many-locals,too-many-state
182183 https://aws-data-wrangler.readthedocs.io/en/2.9.0/stubs/awswrangler.s3.to_parquet.html#awswrangler.s3.to_parquet
183184 catalog_versioning : bool
184185 If True and `mode="overwrite"`, creates an archived version of the table catalog before updating it.
186+ schema_evolution : bool
187+ If True allows schema evolution (new or missing columns), otherwise a exception will be raised.
188+ (Only considered if dataset=True and mode in ("append", "overwrite_partitions"))
189+ Related tutorial:
190+ https://aws-data-wrangler.readthedocs.io/en/2.9.0/tutorials/014%20-%20Schema%20Evolution.html
185191 database : str, optional
186192 Glue/Athena catalog: Database name.
187193 table : str, optional
@@ -474,6 +480,16 @@ def to_csv( # pylint: disable=too-many-arguments,too-many-locals,too-many-state
474480 pd_kwargs .pop ("compression" , None )
475481
476482 df = df [columns ] if columns else df
483+
484+ columns_types : Dict [str , str ] = {}
485+ partitions_types : Dict [str , str ] = {}
486+ if (database is not None ) and (table is not None ):
487+ columns_types , partitions_types = _data_types .athena_types_from_pandas_partitioned (
488+ df = df , index = index , partition_cols = partition_cols , dtype = dtype , index_left = True
489+ )
490+ if schema_evolution is False :
491+ _check_schema_changes (columns_types = columns_types , table_input = catalog_table_input , mode = mode )
492+
477493 paths , partitions_values = _to_dataset (
478494 func = _to_text ,
479495 concurrent_partitioning = concurrent_partitioning ,
@@ -498,9 +514,6 @@ def to_csv( # pylint: disable=too-many-arguments,too-many-locals,too-many-state
498514 )
499515 if database and table :
500516 try :
501- columns_types , partitions_types = _data_types .athena_types_from_pandas_partitioned (
502- df = df , index = index , partition_cols = partition_cols , dtype = dtype , index_left = True
503- )
504517 serde_info : Dict [str , Any ] = {}
505518 if catalog_table_input :
506519 serde_info = catalog_table_input ["StorageDescriptor" ]["SerdeInfo" ]
0 commit comments