snowflakedb · culpgrant · Feb 3, 2025 · Feb 20, 2025 · Feb 25, 2025 · Feb 25, 2025
@@ -13,6 +13,7 @@ Source code is also available at: https://github.com/snowflakedb/snowflake-conne
   - Disabled token caching for OAuth Client Credentials authentication
   - Added in-band HTTP exception telemetry.
   - Fixed a bug where timezoned timestamps fetched as pandas.DataFrame or pyarrow.Table would overflow for the sake of unnecessary precision. In the case where an overflow cannot be prevented a clear error will be raised now.
+  - Added `use_vectorized_scanner` parameter on `write_pandas` to leverage vectorized scanner.
 
 - v3.16.0(July 04,2025)
   - Bumped numpy dependency from <2.1.0 to <=2.2.4.

@@ -261,6 +261,7 @@ def write_pandas(
     use_logical_type: bool | None = None,
     iceberg_config: dict[str, str] | None = None,
     bulk_upload_chunks: bool = False,
+    use_vectorized_scanner: bool | None = None,
     **kwargs: Any,
 ) -> tuple[
     bool,
@@ -334,7 +335,10 @@ def write_pandas(
                 * storage_serialization_policy: specifies the storage serialization policy for the table
         bulk_upload_chunks: If set to True, the upload will use the wildcard upload method.
             This is a faster method of uploading but instead of uploading and cleaning up each chunk separately it will upload all chunks at once and then clean up locally stored chunks.
-
+        use_vectorized_scanner: Boolean that specifies to use a vectorized scanner for loading Parquet files.
+            Using the vectorized scanner can significantly reduce the latency for loading Parquet files. To enable
+            Vectorized scanning of Parquet files, set use_vectorized_scanner as True. Set to None to use Snowflakes default.
+            For more information, see: https://docs.snowflake.com/en/sql-reference/sql/copy-into-table#label-use-vectorized-scanner
 
 
     Returns:
@@ -582,6 +586,7 @@ def drop_object(name: str, object_type: str) -> None:
             f"COMPRESSION={compression_map[compression]}"
             f"{' BINARY_AS_TEXT=FALSE' if auto_create_table or overwrite else ''}"
             f"{sql_use_logical_type}"
+            f"{' USE_VECTORIZED_SCANNER=' + str(use_vectorized_scanner).upper() if use_vectorized_scanner is not None else ''}"
             f") "
             f"PURGE=TRUE ON_ERROR=?"
         )

@@ -0,0 +1,57 @@
+#
+# Copyright (c) 2012-2023 Snowflake Computing Inc. All rights reserved.
+#
+
+from typing import Union
+from unittest.mock import MagicMock
+
+import pandas as pd
+import pytest
+
+from snowflake.connector import pandas_tools
+
+from .mock_utils import mock_connection
+
+
+@pytest.mark.parametrize(
+    ("use_vectorized_scanner", "expected_file_format"),
+    [
+        (None, "FILE_FORMAT=(TYPE=PARQUET COMPRESSION=auto)"),
+        (
+            True,
+            "FILE_FORMAT=(TYPE=PARQUET COMPRESSION=auto USE_VECTORIZED_SCANNER=TRUE)",
+        ),
+        (
+            False,
+            "FILE_FORMAT=(TYPE=PARQUET COMPRESSION=auto USE_VECTORIZED_SCANNER=FALSE)",
+        ),
+    ],
+)
+def test_write_pandas_use_vectorized_scanner(
+    use_vectorized_scanner: Union[bool, None], expected_file_format: str
+):
+    # Setup Mocks
+    df = pd.DataFrame({"col1": [1, 2, 3]})
+
+    mock_conn = mock_connection()
+    mock_cursor = MagicMock()
+    mock_conn.cursor.return_value = mock_cursor
+
+    # Execute Function
+    pandas_tools.write_pandas(
+        conn=mock_conn,
+        df=df,
+        table_name="test_table",
+        schema="test_schema",
+        database="test_database",
+        use_vectorized_scanner=use_vectorized_scanner,
+    )
+
+    executed_sql_statements = [
+        call[0][0] for call in mock_cursor.execute.call_args_list
+    ]
+
+    assert any(
+        "COPY INTO" in sql and expected_file_format in sql
+        for sql in executed_sql_statements
+    )