Skip to content

Commit 188bd95

Browse files
authored
SNOW-1279205: add capability to debug arrow data (#1918)
1 parent 091dab4 commit 188bd95

File tree

4 files changed

+34
-3
lines changed

4 files changed

+34
-3
lines changed

DESCRIPTION.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ Source code is also available at: https://github.com/snowflakedb/snowflake-conne
1111
- v3.11.0(TBD)
1212

1313
- Added support for `token_file_path` connection parameter to read an OAuth token from a file when connecting to Snowflake.
14+
- Added support for `debug_arrow_chunk` connection parameter to allow debugging raw arrow data in case of arrow data parsing failure.
1415

1516
- v3.10.0(April 29,2024)
1617

src/snowflake/connector/connection.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,10 @@ def _get_private_bytes_from_file(
281281
True,
282282
bool,
283283
), # Disable console login and fall back to getting SSO URL from GS
284+
"debug_arrow_chunk": (
285+
False,
286+
bool,
287+
), # log raw arrow chunk for debugging purpuse in case there is malformed arrow data
284288
}
285289

286290
APPLICATION_RE = re.compile(r"[\w\d_]+")

src/snowflake/connector/result_batch.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -664,11 +664,21 @@ def _create_iter(
664664
) -> Iterator[dict | Exception] | Iterator[tuple | Exception] | Iterator[Table]:
665665
"""Create an iterator for the ResultBatch. Used by get_arrow_iter."""
666666
if self._local:
667-
return self._from_data(self._data, iter_unit)
667+
try:
668+
return self._from_data(self._data, iter_unit)
669+
except Exception:
670+
if connection and getattr(connection, "_debug_arrow_chunk", False):
671+
logger.debug(f"arrow data can not be parsed: {self._data}")
672+
raise
668673
response = self._download(connection=connection)
669674
logger.debug(f"started loading result batch id: {self.id}")
670675
with TimerContextManager() as load_metric:
671-
loaded_data = self._load(response, iter_unit)
676+
try:
677+
loaded_data = self._load(response, iter_unit)
678+
except Exception:
679+
if connection and getattr(connection, "_debug_arrow_chunk", False):
680+
logger.debug(f"arrow data can not be parsed: {response}")
681+
raise
672682
logger.debug(f"finished loading result batch id: {self.id}")
673683
self._metrics[DownloadMetrics.load.value] = load_metric.get_timing_millis()
674684
return loaded_data

test/integ/test_arrow_result.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@
55

66
from __future__ import annotations
77

8+
import base64
89
import itertools
910
import json
11+
import logging
1012
import os
1113
import random
1214
import re
@@ -16,7 +18,7 @@
1618
import pytest
1719

1820
import snowflake.connector
19-
from snowflake.connector.errors import ProgrammingError
21+
from snowflake.connector.errors import OperationalError, ProgrammingError
2022

2123
try:
2224
from snowflake.connector.util_text import random_string
@@ -1103,6 +1105,20 @@ def iterate_over_test_chunk(
11031105
assert str(arrow_res[0]) == expected[i]
11041106

11051107

1108+
@pytest.mark.parametrize("debug_arrow_chunk", [True, False])
1109+
def test_arrow_bad_data(conn_cnx, caplog, debug_arrow_chunk):
1110+
with caplog.at_level(logging.DEBUG):
1111+
with conn_cnx(
1112+
debug_arrow_chunk=debug_arrow_chunk
1113+
) as arrow_cnx, arrow_cnx.cursor() as cursor:
1114+
cursor.execute("select 1")
1115+
cursor._result_set.batches[0]._data = base64.b64encode(b"wrong_data")
1116+
with pytest.raises(OperationalError):
1117+
cursor.fetchone()
1118+
expr = bool("arrow data can not be parsed" in caplog.text)
1119+
assert expr if debug_arrow_chunk else not expr
1120+
1121+
11061122
def init(conn_cnx, table, column, values):
11071123
with conn_cnx() as json_cnx:
11081124
cursor_json = json_cnx.cursor()

0 commit comments

Comments
 (0)