Skip to content

Commit c85e592

Browse files
SNOW-2250223: add support for use_vectorized_scanner in write_pandas (#2456)
1 parent a269f15 commit c85e592

File tree

3 files changed

+72
-0
lines changed

3 files changed

+72
-0
lines changed

DESCRIPTION.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ Source code is also available at: https://github.com/snowflakedb/snowflake-conne
1919
- Added basic json support for Interval types.
2020
- Moved `OAUTH_TYPE` to `CLIENT_ENVIROMENT`.
2121
- Fix bug where PAT with external session authenticator was used while `external_session_id` was not provided in `SnowflakeRestful.fetch`
22+
- Added support for parameter `use_vectorized_scanner` in function `write_pandas`.
2223

2324
- v3.16.0(July 04,2025)
2425
- Bumped numpy dependency from <2.1.0 to <=2.2.4.

src/snowflake/connector/pandas_tools.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,7 @@ def write_pandas(
261261
use_logical_type: bool | None = None,
262262
iceberg_config: dict[str, str] | None = None,
263263
bulk_upload_chunks: bool = False,
264+
use_vectorized_scanner: bool = False,
264265
**kwargs: Any,
265266
) -> tuple[
266267
bool,
@@ -308,6 +309,8 @@ def write_pandas(
308309
on_error: Action to take when COPY INTO statements fail, default follows documentation at:
309310
https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#copy-options-copyoptions
310311
(Default value = 'abort_statement').
312+
use_vectorized_scanner: Boolean that specifies whether to use a vectorized scanner for loading Parquet files. See details at
313+
`copy options <https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#copy-options-copyoptions>`_.
311314
parallel: Number of threads to be used when uploading chunks, default follows documentation at:
312315
https://docs.snowflake.com/en/sql-reference/sql/put.html#optional-parameters (Default value = 4).
313316
quote_identifiers: By default, identifiers, specifically database, schema, table and column names
@@ -579,6 +582,7 @@ def drop_object(name: str, object_type: str) -> None:
579582
f"FROM (SELECT {parquet_columns} FROM '{copy_stage_location}') "
580583
f"FILE_FORMAT=("
581584
f"TYPE=PARQUET "
585+
f"USE_VECTORIZED_SCANNER={use_vectorized_scanner} "
582586
f"COMPRESSION={compression_map[compression]}"
583587
f"{' BINARY_AS_TEXT=FALSE' if auto_create_table or overwrite else ''}"
584588
f"{sql_use_logical_type}"

test/integ/pandas_it/test_pandas_tools.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1184,3 +1184,70 @@ def test_write_pandas_bulk_chunks_upload(conn_cnx, bulk_upload_chunks):
11841184
assert result["COUNT(*)"] == 4
11851185
finally:
11861186
cnx.execute_string(drop_sql)
1187+
1188+
1189+
@pytest.mark.parametrize(
1190+
"use_vectorized_scanner",
1191+
[
1192+
True,
1193+
False,
1194+
],
1195+
)
1196+
def test_write_pandas_with_use_vectorized_scanner(
1197+
conn_cnx: Callable[..., Generator[SnowflakeConnection]],
1198+
use_vectorized_scanner,
1199+
caplog,
1200+
):
1201+
"""Tests whether overwriting table using a Pandas DataFrame works as expected."""
1202+
random_table_name = random_string(5, "userspoints_")
1203+
df_data = [("Dash", 50)]
1204+
df = pandas.DataFrame(df_data, columns=["name", "points"])
1205+
1206+
table_name = random_table_name
1207+
col_id = "id"
1208+
col_name = "name"
1209+
col_points = "points"
1210+
1211+
create_sql = (
1212+
f"CREATE OR REPLACE TABLE {table_name}"
1213+
f"({col_name} STRING, {col_points} INT, {col_id} INT AUTOINCREMENT)"
1214+
)
1215+
1216+
drop_sql = f"DROP TABLE IF EXISTS {table_name}"
1217+
with conn_cnx() as cnx: # type: SnowflakeConnection
1218+
original_cur = cnx.cursor().execute
1219+
1220+
def fake_execute(query, params=None, *args, **kwargs):
1221+
return original_cur(query, params, *args, **kwargs)
1222+
1223+
cnx.execute_string(create_sql)
1224+
try:
1225+
with mock.patch(
1226+
"snowflake.connector.cursor.SnowflakeCursor.execute",
1227+
side_effect=fake_execute,
1228+
) as execute:
1229+
# Write dataframe with 1 row
1230+
success, nchunks, nrows, _ = write_pandas(
1231+
cnx,
1232+
df,
1233+
random_table_name,
1234+
quote_identifiers=False,
1235+
auto_create_table=False,
1236+
overwrite=True,
1237+
index=True,
1238+
use_vectorized_scanner=use_vectorized_scanner,
1239+
)
1240+
# Check write_pandas output
1241+
assert success
1242+
assert nchunks == 1
1243+
assert nrows == 1
1244+
1245+
for call in execute.call_args_list:
1246+
if call.args[0].startswith("COPY"):
1247+
assert (
1248+
f"USE_VECTORIZED_SCANNER={use_vectorized_scanner}"
1249+
in call.args[0]
1250+
)
1251+
1252+
finally:
1253+
cnx.execute_string(drop_sql)

0 commit comments

Comments
 (0)