Skip to content

Commit d3efb17

Browse files
authored
SNOW-1562604: enhance error handling when polling query result (#2027)
1 parent 5832190 commit d3efb17

File tree

3 files changed

+84
-3
lines changed

3 files changed

+84
-3
lines changed

DESCRIPTION.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ Source code is also available at: https://github.com/snowflakedb/snowflake-conne
1515
- Use `pathlib` instead of `os` for default config file location resolution.
1616
- Removed upper `cryptogaphy` version pin.
1717
- Removed reference to script `snowflake-export-certs` (its backing module was already removed long ago)
18+
- Enhanced retry mechanism for handling transient network failures during query result polling when no server response is received.
1819

1920
- v3.12.0(July 24,2024)
2021
- Set default connection timeout of 10 seconds and socket read timeout of 10 minutes for HTTP calls in file transfer.

src/snowflake/connector/network.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -679,6 +679,7 @@ def _get_request(
679679
headers: dict[str, str],
680680
token: str = None,
681681
timeout: int | None = None,
682+
is_fetch_query_status: bool = False,
682683
) -> dict[str, Any]:
683684
if "Content-Encoding" in headers:
684685
del headers["Content-Encoding"]
@@ -692,6 +693,7 @@ def _get_request(
692693
headers,
693694
timeout=timeout,
694695
token=token,
696+
is_fetch_query_status=is_fetch_query_status,
695697
)
696698
if ret.get("code") == SESSION_EXPIRED_GS_CODE:
697699
try:
@@ -706,7 +708,12 @@ def _get_request(
706708
)
707709
)
708710
if ret.get("success"):
709-
return self._get_request(url, headers, token=self.token)
711+
return self._get_request(
712+
url,
713+
headers,
714+
token=self.token,
715+
is_fetch_query_status=is_fetch_query_status,
716+
)
710717

711718
return ret
712719

@@ -779,7 +786,13 @@ def _post_request(
779786
result_url = ret["data"]["getResultUrl"]
780787
logger.debug("ping pong starting...")
781788
ret = self._get_request(
782-
result_url, headers, token=self.token, timeout=timeout
789+
result_url,
790+
headers,
791+
token=self.token,
792+
timeout=timeout,
793+
is_fetch_query_status=bool(
794+
re.match(r"^/queries/.+/result$", result_url)
795+
),
783796
)
784797
logger.debug("ret[code] = %s", ret.get("code", "N/A"))
785798
logger.debug("ping pong done")
@@ -878,6 +891,7 @@ def _request_exec_wrapper(
878891

879892
full_url = retry_ctx.add_retry_params(full_url)
880893
full_url = SnowflakeRestful.add_request_guid(full_url)
894+
is_fetch_query_status = kwargs.pop("is_fetch_query_status", False)
881895
try:
882896
return_object = self._request_exec(
883897
session=session,
@@ -890,6 +904,10 @@ def _request_exec_wrapper(
890904
)
891905
if return_object is not None:
892906
return return_object
907+
if is_fetch_query_status:
908+
err_msg = "fetch query status failed and http request returned None, this is usually caused by transient network failures, retrying..."
909+
logger.info(err_msg)
910+
raise RetryRequest(err_msg)
893911
self._handle_unknown_error(method, full_url, headers, data, conn)
894912
return {}
895913
except RetryRequest as e:

test/integ/test_network.py

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,19 @@
55

66
from __future__ import annotations
77

8+
import logging
9+
import unittest.mock
810
from logging import getLogger
911

12+
import pytest
13+
14+
import snowflake.connector
1015
from snowflake.connector import errorcode, errors
11-
from snowflake.connector.network import SnowflakeRestful
16+
from snowflake.connector.network import (
17+
QUERY_IN_PROGRESS_ASYNC_CODE,
18+
QUERY_IN_PROGRESS_CODE,
19+
SnowflakeRestful,
20+
)
1221

1322
logger = getLogger(__name__)
1423

@@ -36,3 +45,56 @@ def test_no_auth(db_parameters):
3645
assert e.errno == errorcode.ER_CONNECTION_IS_CLOSED
3746
finally:
3847
rest.close()
48+
49+
50+
@pytest.mark.skipolddriver
51+
@pytest.mark.parametrize(
52+
"query_return_code", [QUERY_IN_PROGRESS_CODE, QUERY_IN_PROGRESS_ASYNC_CODE]
53+
)
54+
def test_none_object_when_querying_result(db_parameters, caplog, query_return_code):
55+
# this test simulate the case where the response from the server is None
56+
# the following events happen in sequence:
57+
# 1. we send a simple query to the server which is a post request
58+
# 2. we record the query result in a global variable
59+
# 3. we mock return a query in progress code and an url to fetch the query result
60+
# 4. we return None for the fetching query result request for the first time
61+
# 5. for the second time, we return the code for the query result
62+
# 6. in the end, we assert the result, and retry has taken place when result is None by checking logging
63+
64+
original_request_exec = SnowflakeRestful._request_exec
65+
expected_ret = None
66+
get_executed_time = 0
67+
68+
def side_effect_request_exec(self, *args, **kwargs):
69+
nonlocal expected_ret, get_executed_time
70+
# 1. we send a simple query to the server which is a post request
71+
if "queries/v1/query-request" in kwargs["full_url"]:
72+
ret = original_request_exec(self, *args, **kwargs)
73+
expected_ret = ret # 2. we record the query result in a global variable
74+
# 3. we mock return a query in progress code and an url to fetch the query result
75+
return {
76+
"code": query_return_code,
77+
"data": {"getResultUrl": "/queries/123/result"},
78+
}
79+
80+
if "/queries/123/result" in kwargs["full_url"]:
81+
if get_executed_time == 0:
82+
# 4. we return None for the 1st time fetching query result request, this should trigger retry
83+
get_executed_time += 1
84+
return None
85+
else:
86+
# 5. for the second time, we return the code for the query result, this indicates retry success
87+
return expected_ret
88+
89+
with snowflake.connector.connect(
90+
**db_parameters
91+
) as conn, conn.cursor() as cursor, caplog.at_level(logging.INFO):
92+
with unittest.mock.patch.object(
93+
SnowflakeRestful, "_request_exec", new=side_effect_request_exec
94+
):
95+
# 6. in the end, we assert the result, and retry has taken place when result is None by checking logging
96+
assert cursor.execute("select 1").fetchone() == (1,)
97+
assert (
98+
"fetch query status failed and http request returned None, this is usually caused by transient network failures, retrying"
99+
in caplog.text
100+
)

0 commit comments

Comments
 (0)