snowflakedb
diff --git a/‎DESCRIPTION.md‎
Lines changed: 12 additions & 1 deletion b/‎DESCRIPTION.md‎
Lines changed: 12 additions & 1 deletion
diff --git a/‎src/snowflake/connector/constants.py‎
Lines changed: 1 addition & 0 deletions b/‎src/snowflake/connector/constants.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/snowflake/connector/cursor.py‎
Lines changed: 139 additions & 41 deletions b/‎src/snowflake/connector/cursor.py‎
Lines changed: 139 additions & 41 deletions
diff --git a/‎src/snowflake/connector/telemetry.py‎
Lines changed: 2 additions & 0 deletions b/‎src/snowflake/connector/telemetry.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/snowflake/connector/test_util.py‎
Lines changed: 40 additions & 0 deletions b/‎src/snowflake/connector/test_util.py‎
Lines changed: 40 additions & 0 deletions
@@ -8,11 +8,22 @@ Source code is also available at: https://github.com/snowflakedb/snowflake-conne
 
 # Release Notes
 
-- v2.8.4(unreleased)
+- v2.9.0(Unreleased)
+
   - Fixed a bug where the permission of the file downloaded via GET command is changed
   - Reworked authentication internals to allow users to plug custom key-pair authenticators
+  - Multi-statement query execution is now supported through `cursor.execute` and `cursor.executemany`
+    - The Snowflake parameter `MULTI_STATEMENT_COUNT` can be altered at the account, session, or statement level. An additional argument, `num_statements`, can be provided to `execute` to use this parameter at the statement level. It *must* be provided to `executemany` to submit a multi-statement query through the method. Note that bulk insert optimizations available through `executemany` are not available when submitting multi-statement queries.
+      - By default the parameter is 1, meaning only a single query can be submitted at a time
+      - Set to 0 to submit any number of statements in a multi-statement query
+      - Set to >1 to submit the specified exact number of statements in a multi-statement query
+    - Bindings are accepted in the same way for multi-statements as they are for single statement queries
+    - Asynchronous multi-statement query execution is supported. Users should still use `get_results_from_sfqid` to retrieve results
+    - To access the results of each query, users can call `SnowflakeCursor.nextset()` as specified in the DB 2.0 API (PEP-249), to iterate through each statements results
+      - The first statement's results are accessible immediately after calling `execute` (or `get_results_from_sfqid` if asynchronous) through the existing `fetch*()` methods
 
 - v2.8.3(November 28,2022)
+
   - Bumped cryptography dependency from <39.0.0 to <41.0.0
   - Fixed a bug where expired OCSP response cache caused infinite recursion during cache loading
 
 
@@ -229,6 +229,7 @@ class FileHeader(NamedTuple):
 PARAMETER_ENABLE_STAGE_S3_PRIVATELINK_FOR_US_EAST_1 = (
     "ENABLE_STAGE_S3_PRIVATELINK_FOR_US_EAST_1"
 )
+PARAMETER_MULTI_STATEMENT_COUNT = "MULTI_STATEMENT_COUNT"
 
 HTTP_HEADER_CONTENT_TYPE = "Content-Type"
 HTTP_HEADER_CONTENT_ENCODING = "Content-Encoding"
 
@@ -12,6 +12,7 @@
 import sys
 import time
 import uuid
+from collections import deque
 from enum import Enum
 from logging import getLogger
 from threading import Lock, Timer
@@ -190,7 +191,7 @@ class SnowflakeCursor:
         r".*VALUES\s*(\(.*\)).*", re.IGNORECASE | re.MULTILINE | re.DOTALL
     )
     ALTER_SESSION_RE = re.compile(
-        r"alter\s+session\s+set\s+(.*)=\'?([^\']+)\'?\s*;",
+        r"alter\s+session\s+set\s+(\w*?)\s*=\s*\'?([^\']+?)\'?\s*(?:;|$)",
         flags=re.IGNORECASE | re.MULTILINE | re.DOTALL,
     )
 
@@ -232,6 +233,8 @@ def __init__(
         self._sequence_counter = -1
         self._request_id = None
         self._is_file_transfer = False
+        self._multi_statement_resultIds: deque[str] = deque()
+        self.multi_statement_savedIds: list[str] = []
 
         self._timestamp_output_format = None
         self._timestamp_ltz_output_format = None
@@ -559,6 +562,40 @@ def interrupt_handler(*_):  # pragma: no cover
         self._sequence_counter = -1
         return ret
 
+    def _preprocess_pyformat_query(
+        self,
+        command: str,
+        params: Sequence[Any] | dict[Any, Any] | None = None,
+    ) -> str:
+        # pyformat/format paramstyle
+        # client side binding
+        processed_params = self._connection._process_params_pyformat(params, self)
+        # SNOW-513061 collect telemetry for empty sequence usage before we make the breaking change announcement
+        if params is not None and len(params) == 0:
+            self._log_telemetry_job_data(
+                TelemetryField.EMPTY_SEQ_INTERPOLATION,
+                TelemetryData.TRUE
+                if self.connection._interpolate_empty_sequences
+                else TelemetryData.FALSE,
+            )
+        if logger.getEffectiveLevel() <= logging.DEBUG:
+            logger.debug(
+                f"binding: [{self._format_query_for_log(command)}] "
+                f"with input=[{params}], "
+                f"processed=[{processed_params}]",
+            )
+        if (
+            self.connection._interpolate_empty_sequences
+            and processed_params is not None
+        ) or (
+            not self.connection._interpolate_empty_sequences
+            and len(processed_params) > 0
+        ):
+            query = command % processed_params
+        else:
+            query = command
+        return query
+
     def execute(
         self,
         command: str,
@@ -583,6 +620,7 @@ def execute(
         _raise_put_get_error: bool = True,
         _force_put_overwrite: bool = False,
         file_stream: IO[bytes] | None = None,
+        num_statements: int | None = None,
     ) -> SnowflakeCursor | None:
         """Executes a command/query.
 
@@ -612,6 +650,8 @@ def execute(
             _force_put_overwrite: If the SQL query is a PUT, then this flag can force overwriting of an already
                 existing file on stage.
             file_stream: File-like object to be uploaded with PUT
+            num_statements: Query level parameter submitted in _statement_params constraining exact number of
+            statements being submitted (or 0 if submitting an uncounted number) when using a multi-statement query.
 
         Returns:
             The cursor itself, or None if some error happened, or the response returned
@@ -635,6 +675,12 @@ def execute(
             logger.warning("execute: no query is given to execute")
             return
 
+        if _statement_params is None:
+            _statement_params = dict()
+
+        if num_statements:
+            _statement_params["MULTI_STATEMENT_COUNT"] = num_statements
+
         kwargs = {
             "timeout": timeout,
             "statement_params": _statement_params,
@@ -646,33 +692,7 @@ def execute(
         }
 
         if self._connection.is_pyformat:
-            # pyformat/format paramstyle
-            # client side binding
-            processed_params = self._connection._process_params_pyformat(params, self)
-            # SNOW-513061 collect telemetry for empty sequence usage before we make the breaking change announcement
-            if params is not None and len(params) == 0:
-                self._log_telemetry_job_data(
-                    TelemetryField.EMPTY_SEQ_INTERPOLATION,
-                    TelemetryData.TRUE
-                    if self.connection._interpolate_empty_sequences
-                    else TelemetryData.FALSE,
-                )
-            if logger.getEffectiveLevel() <= logging.DEBUG:
-                logger.debug(
-                    f"binding: [{self._format_query_for_log(command)}] "
-                    f"with input=[{params}], "
-                    f"processed=[{processed_params}]",
-                )
-            if (
-                self.connection._interpolate_empty_sequences
-                and processed_params is not None
-            ) or (
-                not self.connection._interpolate_empty_sequences
-                and len(processed_params) > 0
-            ):
-                query = command % processed_params
-            else:
-                query = command
+            query = self._preprocess_pyformat_query(command, params)
         else:
             # qmark and numeric paramstyle
             query = command
@@ -711,11 +731,14 @@ def execute(
             if "data" in ret and "queryId" in ret["data"]
             else None
         )
+        logger.debug(f"sfqid: {self.sfqid}")
         self._sqlstate = (
             ret["data"]["sqlState"]
             if "data" in ret and "sqlState" in ret["data"]
             else None
         )
+        logger.info("query execution done")
+
         self._first_chunk_time = get_time_millis()
 
         # if server gives a send time, log the time it took to arrive
@@ -726,13 +749,27 @@ def execute(
             self._log_telemetry_job_data(
                 TelemetryField.TIME_CONSUME_FIRST_RESULT, time_consume_first_result
             )
-        logger.debug("sfqid: %s", self.sfqid)
 
-        logger.info("query execution done")
         if ret["success"]:
             logger.debug("SUCCESS")
             data = ret["data"]
 
+            for m in self.ALTER_SESSION_RE.finditer(query):
+                # session parameters
+                param = m.group(1).upper()
+                value = m.group(2)
+                self._connection.converter.set_parameter(param, value)
+
+            if "resultIds" in data:
+                self._init_multi_statement_results(data)
+                return self
+            else:
+                self.multi_statement_savedIds = []
+
+            self._is_file_transfer = "command" in data and data["command"] in (
+                "UPLOAD",
+                "DOWNLOAD",
+            )
             logger.debug("PUT OR GET: %s", self.is_file_transfer)
             if self.is_file_transfer:
                 # Decide whether to use the old, or new code path
@@ -757,12 +794,6 @@ def execute(
                 sf_file_transfer_agent.execute()
                 data = sf_file_transfer_agent.result()
                 self._total_rowcount = len(data["rowset"]) if "rowset" in data else -1
-            m = self.ALTER_SESSION_RE.match(query)
-            if m:
-                # session parameters
-                param = m.group(1).upper()
-                value = m.group(2)
-                self._connection.converter.set_parameter(param, value)
 
             if _exec_async:
                 self.connection._async_sfqids[self._sfqid] = None
@@ -871,6 +902,22 @@ def _init_result_and_meta(self, data):
             else:
                 self._total_rowcount += updated_rows
 
+    def _init_multi_statement_results(self, data: dict):
+        self._log_telemetry_job_data(TelemetryField.MULTI_STATEMENT, TelemetryData.TRUE)
+        self.multi_statement_savedIds = data["resultIds"].split(",")
+        self._multi_statement_resultIds = deque(self.multi_statement_savedIds)
+        if self._is_file_transfer:
+            Error.errorhandler_wrapper(
+                self.connection,
+                self,
+                ProgrammingError,
+                {
+                    "msg": "PUT/GET commands are not supported for multi-statement queries and cannot be executed.",
+                    "errno": ER_INVALID_VALUE,
+                },
+            )
+        self.nextset()
+
     def check_can_use_arrow_resultset(self):
         global CAN_USE_ARROW_RESULT_FORMAT
 
@@ -1002,10 +1049,17 @@ def executemany(
         command = command.strip(" \t\n\r") if command else None
 
         if not seqparams:
+            logger.warning(
+                "No parameters provided to executemany, returning without doing anything."
+            )
             return self
 
-        if self.INSERT_SQL_RE.match(command):
+        if self.INSERT_SQL_RE.match(command) and (
+            "num_statements" not in kwargs or kwargs.get("num_statements") == 1
+        ):
             if self._connection.is_pyformat:
+                # TODO - utilize multi-statement instead of rewriting the query and
+                #  accumulate results to mock the result from a single insert statement as formatted below
                 logger.debug("rewriting INSERT query")
                 command_wo_comments = re.sub(self.COMMENT_SQL_RE, "", command)
                 m = self.INSERT_SQL_VALUES_RE.match(command_wo_comments)
@@ -1074,8 +1128,31 @@ def executemany(
                 return self
 
         self.reset()
-        for param in seqparams:
-            self.execute(command, params=param, _do_reset=False, **kwargs)
+        if "num_statements" not in kwargs:
+            # fall back to old driver behavior when the user does not provide the parameter to enable
+            #  multi-statement optimizations for executemany
+            for param in seqparams:
+                self.execute(command, params=param, _do_reset=False, **kwargs)
+        else:
+            if re.search(";/s*$", command) is None:
+                command = command + "; "
+            if self._connection.is_pyformat:
+                processed_queries = [
+                    self._preprocess_pyformat_query(command, params)
+                    for params in seqparams
+                ]
+                query = "".join(processed_queries)
+                params = None
+            else:
+                query = command * len(seqparams)
+                params = [param for parameters in seqparams for param in parameters]
+
+            kwargs["num_statements"]: int = kwargs.get("num_statements") * len(
+                seqparams
+            )
+
+            self.execute(query, params, _do_reset=False, **kwargs)
+
         return self
 
     def _result_iterator(
@@ -1147,8 +1224,19 @@ def fetchall(self) -> list[tuple] | list[dict]:
         return ret
 
     def nextset(self):
-        """Not supported."""
-        logger.debug("nop")
+        """
+        Fetches the next set of results if the previously executed query was multi-statement so that subsequent calls
+        to any of the fetch*() methods will return rows from the next query's set of results. Returns None if no more
+        query results are available.
+        """
+        self.reset()
+        if self._multi_statement_resultIds:
+            self.query_result(self._multi_statement_resultIds[0])
+            logger.info(
+                f"Retrieved results for query ID: {self._multi_statement_resultIds.popleft()}"
+            )
+            return self
+
         return None
 
     def setinputsizes(self, _):
@@ -1276,6 +1364,16 @@ def wait_until_ready():
             # Unset this function, so that we don't block anymore
             self._prefetch_hook = None
 
+            if (
+                self._inner_cursor._total_rowcount == 1
+                and self._inner_cursor.fetchall()
+                == [("Multiple statements executed successfully.",)]
+            ):
+                url = f"/queries/{sfqid}/result"
+                ret = self._connection.rest.request(url=url, method="get")
+                if "data" in ret and "resultIds" in ret["data"]:
+                    self._init_multi_statement_results(ret["data"])
+
         self.connection.get_query_status_throw_if_error(
             sfqid
         )  # Trigger an exception if query failed
 
@@ -41,6 +41,8 @@ class TelemetryField(Enum):
     PANDAS_WRITE = "client_write_pandas"
     # imported packages along with client
     IMPORTED_PACKAGES = "client_imported_packages"
+    # multi-statement usage
+    MULTI_STATEMENT = "client_multi_statement_query"
     # Keys for telemetry data sent through either in-band or out-of-band telemetry
     KEY_TYPE = "type"
     KEY_SOURCE = "source"
 
@@ -7,6 +7,12 @@
 
 import logging
 import os
+import time
+
+import pytest
+
+import snowflake.connector.connection
+from snowflake.connector.constants import QueryStatus
 
 from .compat import IS_LINUX
 
@@ -29,3 +35,37 @@
         )
     )
     rt_plain_logger.addHandler(ch)
+
+
+def _wait_while_query_running(
+    con: snowflake.connector.connection.SnowflakeConnection,
+    sfqid: str,
+    sleep_time: int,
+    dont_cache: bool = False,
+) -> None:
+    """
+    Checks if the provided still returns that it is still running, and if so,
+    sleeps for the specified time in a while loop.
+    """
+    query_status = con._get_query_status if dont_cache else con.get_query_status
+    while con.is_still_running(query_status(sfqid)):
+        time.sleep(sleep_time)
+
+
+def _wait_until_query_success(
+    con: snowflake.connector.connection.SnowflakeConnection,
+    sfqid: str,
+    num_checks: int,
+    sleep_per_check: int,
+) -> None:
+    for _ in range(num_checks):
+        status = con.get_query_status(sfqid)
+        if status == QueryStatus.SUCCESS:
+            break
+        time.sleep(sleep_per_check)
+    else:
+        pytest.fail(
+            "We should have broke out of wait loop for query success."
+            f"Query ID: {sfqid}"
+            f"Final query status: {status}"
+        )
Original file line number	Diff line number	Diff line change
`@@ -229,6 +229,7 @@ class FileHeader(NamedTuple):`
`229`	`229`	`PARAMETER_ENABLE_STAGE_S3_PRIVATELINK_FOR_US_EAST_1 = (`
`230`	`230`	`"ENABLE_STAGE_S3_PRIVATELINK_FOR_US_EAST_1"`
`231`	`231`	`)`
	`232`	`+PARAMETER_MULTI_STATEMENT_COUNT = "MULTI_STATEMENT_COUNT"`
`232`	`233`
`233`	`234`	`HTTP_HEADER_CONTENT_TYPE = "Content-Type"`
`234`	`235`	`HTTP_HEADER_CONTENT_ENCODING = "Content-Encoding"`