Skip to content

Commit 1378eeb

Browse files
Argon-sfc-gh-pczajka
authored andcommitted
SNOW-2019088: Extend write_pandas by a parameter for schema inference (#2250)
1 parent 3bbf18d commit 1378eeb

File tree

1 file changed

+27
-22
lines changed

1 file changed

+27
-22
lines changed

src/snowflake/connector/pandas_tools.py

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,7 @@ def write_pandas(
254254
on_error: str = "abort_statement",
255255
parallel: int = 4,
256256
quote_identifiers: bool = True,
257+
infer_schema: bool = False,
257258
auto_create_table: bool = False,
258259
create_temp_table: bool = False,
259260
overwrite: bool = False,
@@ -316,6 +317,8 @@ def write_pandas(
316317
quote_identifiers: By default, identifiers, specifically database, schema, table and column names
317318
(from df.columns) will be quoted. If set to False, identifiers are passed on to Snowflake without quoting.
318319
I.e. identifiers will be coerced to uppercase by Snowflake. (Default value = True)
320+
infer_schema: Perform explicit schema inference on the data in the DataFrame and use the inferred data types
321+
when selecting columns from the DataFrame. (Default value = False)
319322
auto_create_table: When true, will automatically create a table with corresponding columns for each column in
320323
the passed in DataFrame. The table will not be created if it already exists
321324
create_temp_table: (Deprecated) Will make the auto-created table as a temporary table
@@ -482,7 +485,7 @@ def drop_object(name: str, object_type: str) -> None:
482485
num_statements=1,
483486
)
484487

485-
if auto_create_table or overwrite:
488+
if auto_create_table or overwrite or infer_schema:
486489
file_format_location = _create_temp_file_format(
487490
cursor,
488491
database,
@@ -525,27 +528,29 @@ def drop_object(name: str, object_type: str) -> None:
525528
quote_identifiers,
526529
)
527530

528-
iceberg = "ICEBERG " if iceberg_config else ""
529-
iceberg_config_statement = _iceberg_config_statement_helper(
530-
iceberg_config or {}
531-
)
531+
if auto_create_table or overwrite:
532+
iceberg = "ICEBERG " if iceberg_config else ""
533+
iceberg_config_statement = _iceberg_config_statement_helper(
534+
iceberg_config or {}
535+
)
536+
537+
create_table_sql = (
538+
f"CREATE {table_type.upper()} {iceberg}TABLE IF NOT EXISTS identifier(?) "
539+
f"({create_table_columns}) {iceberg_config_statement}"
540+
f" /* Python:snowflake.connector.pandas_tools.write_pandas() */ "
541+
)
542+
params = (target_table_location,)
543+
logger.debug(
544+
f"auto creating table with '{create_table_sql}'. params: %s", params
545+
)
546+
cursor.execute(
547+
create_table_sql,
548+
_is_internal=True,
549+
_force_qmark_paramstyle=True,
550+
params=params,
551+
num_statements=1,
552+
)
532553

533-
create_table_sql = (
534-
f"CREATE {table_type.upper()} {iceberg}TABLE IF NOT EXISTS identifier(?) "
535-
f"({create_table_columns}) {iceberg_config_statement}"
536-
f" /* Python:snowflake.connector.pandas_tools.write_pandas() */ "
537-
)
538-
params = (target_table_location,)
539-
logger.debug(
540-
f"auto creating table with '{create_table_sql}'. params: %s", params
541-
)
542-
cursor.execute(
543-
create_table_sql,
544-
_is_internal=True,
545-
_force_qmark_paramstyle=True,
546-
params=params,
547-
num_statements=1,
548-
)
549554
# need explicit casting when the underlying table schema is inferred
550555
parquet_columns = "$1:" + ",$1:".join(
551556
f"{quote}{snowflake_col}{quote}::{column_type_mapping[col]}"
@@ -584,7 +589,7 @@ def drop_object(name: str, object_type: str) -> None:
584589
f"TYPE=PARQUET "
585590
f"USE_VECTORIZED_SCANNER={use_vectorized_scanner} "
586591
f"COMPRESSION={compression_map[compression]}"
587-
f"{' BINARY_AS_TEXT=FALSE' if auto_create_table or overwrite else ''}"
592+
f"{' BINARY_AS_TEXT=FALSE' if auto_create_table or overwrite or infer_schema else ''}"
588593
f"{sql_use_logical_type}"
589594
f") "
590595
f"PURGE=TRUE ON_ERROR=?"

0 commit comments

Comments
 (0)