Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Source code is also available at: https://github.com/snowflakedb/snowflake-conne
- Disabled token caching for OAuth Client Credentials authentication
- Added in-band HTTP exception telemetry.
- Fixed a bug where timezoned timestamps fetched as pandas.DataFrame or pyarrow.Table would overflow for the sake of unnecessary precision. In the case where an overflow cannot be prevented a clear error will be raised now.
- Added `use_vectorized_scanner` parameter on `write_pandas` to leverage vectorized scanner.

- v3.16.0(July 04,2025)
- Bumped numpy dependency from <2.1.0 to <=2.2.4.
Expand Down
7 changes: 6 additions & 1 deletion src/snowflake/connector/pandas_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@ def write_pandas(
use_logical_type: bool | None = None,
iceberg_config: dict[str, str] | None = None,
bulk_upload_chunks: bool = False,
use_vectorized_scanner: bool | None = None,
**kwargs: Any,
) -> tuple[
bool,
Expand Down Expand Up @@ -334,7 +335,10 @@ def write_pandas(
* storage_serialization_policy: specifies the storage serialization policy for the table
bulk_upload_chunks: If set to True, the upload will use the wildcard upload method.
This is a faster method of uploading but instead of uploading and cleaning up each chunk separately it will upload all chunks at once and then clean up locally stored chunks.

use_vectorized_scanner: Boolean that specifies to use a vectorized scanner for loading Parquet files.
Using the vectorized scanner can significantly reduce the latency for loading Parquet files. To enable
Vectorized scanning of Parquet files, set use_vectorized_scanner as True. Set to None to use Snowflakes default.
For more information, see: https://docs.snowflake.com/en/sql-reference/sql/copy-into-table#label-use-vectorized-scanner


Returns:
Expand Down Expand Up @@ -582,6 +586,7 @@ def drop_object(name: str, object_type: str) -> None:
f"COMPRESSION={compression_map[compression]}"
f"{' BINARY_AS_TEXT=FALSE' if auto_create_table or overwrite else ''}"
f"{sql_use_logical_type}"
f"{' USE_VECTORIZED_SCANNER=' + str(use_vectorized_scanner).upper() if use_vectorized_scanner is not None else ''}"
f") "
f"PURGE=TRUE ON_ERROR=?"
)
Expand Down
57 changes: 57 additions & 0 deletions test/unit/test_pandas_tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#
# Copyright (c) 2012-2023 Snowflake Computing Inc. All rights reserved.
#

from typing import Union
from unittest.mock import MagicMock

import pandas as pd
import pytest

from snowflake.connector import pandas_tools

from .mock_utils import mock_connection


@pytest.mark.parametrize(
("use_vectorized_scanner", "expected_file_format"),
[
(None, "FILE_FORMAT=(TYPE=PARQUET COMPRESSION=auto)"),
(
True,
"FILE_FORMAT=(TYPE=PARQUET COMPRESSION=auto USE_VECTORIZED_SCANNER=TRUE)",
),
(
False,
"FILE_FORMAT=(TYPE=PARQUET COMPRESSION=auto USE_VECTORIZED_SCANNER=FALSE)",
),
],
)
def test_write_pandas_use_vectorized_scanner(
use_vectorized_scanner: Union[bool, None], expected_file_format: str
):
# Setup Mocks
df = pd.DataFrame({"col1": [1, 2, 3]})

mock_conn = mock_connection()
mock_cursor = MagicMock()
mock_conn.cursor.return_value = mock_cursor

# Execute Function
pandas_tools.write_pandas(
conn=mock_conn,
df=df,
table_name="test_table",
schema="test_schema",
database="test_database",
use_vectorized_scanner=use_vectorized_scanner,
)

executed_sql_statements = [
call[0][0] for call in mock_cursor.execute.call_args_list
]

assert any(
"COPY INTO" in sql and expected_file_format in sql
for sql in executed_sql_statements
)
Loading