@@ -1043,11 +1043,19 @@ def to_parquet( # pylint: disable=too-many-arguments
10431043 )
10441044 if df .empty is True :
10451045 raise exceptions .EmptyDataFrame ()
1046- session : boto3 .Session = _utils .ensure_session (session = boto3_session )
1046+
1047+ # Sanitize table to respect Athena's standards
10471048 partition_cols = partition_cols if partition_cols else []
10481049 dtype = dtype if dtype else {}
10491050 columns_comments = columns_comments if columns_comments else {}
10501051 partitions_values : Dict [str , List [str ]] = {}
1052+ df = catalog .sanitize_dataframe_columns_names (df = df )
1053+ partition_cols = [catalog .sanitize_column_name (p ) for p in partition_cols ]
1054+ dtype = {catalog .sanitize_column_name (k ): v .lower () for k , v in dtype .items ()}
1055+ columns_comments = {catalog .sanitize_column_name (k ): v for k , v in columns_comments .items ()}
1056+ df = catalog .drop_duplicated_columns (df = df )
1057+
1058+ session : boto3 .Session = _utils .ensure_session (session = boto3_session )
10511059 cpus : int = _utils .ensure_cpu_count (use_threads = use_threads )
10521060 fs : s3fs .S3FileSystem = _utils .get_fs (session = session , s3_additional_kwargs = s3_additional_kwargs )
10531061 compression_ext : Optional [str ] = _COMPRESSION_2_EXT .get (compression , None )
@@ -1075,16 +1083,11 @@ def to_parquet( # pylint: disable=too-many-arguments
10751083 ]
10761084 else :
10771085 mode = "append" if mode is None else mode
1078- if (database is not None ) and (table is not None ): # Normalize table to respect Athena's standards
1079- df = catalog .sanitize_dataframe_columns_names (df = df )
1080- partition_cols = [catalog .sanitize_column_name (p ) for p in partition_cols ]
1081- dtype = {catalog .sanitize_column_name (k ): v .lower () for k , v in dtype .items ()}
1082- columns_comments = {catalog .sanitize_column_name (k ): v for k , v in columns_comments .items ()}
1086+ if (database is not None ) and (table is not None ):
10831087 exist : bool = catalog .does_table_exist (database = database , table = table , boto3_session = session )
10841088 if (exist is True ) and (mode in ("append" , "overwrite_partitions" )):
10851089 for k , v in catalog .get_table_types (database = database , table = table , boto3_session = session ).items ():
10861090 dtype [k ] = v
1087- df = catalog .drop_duplicated_columns (df = df )
10881091 paths , partitions_values = _to_parquet_dataset (
10891092 df = df ,
10901093 path = path ,
0 commit comments