Skip to content

Commit 0b24148

Browse files
authored
SNOW-641540 Fix dtypes of fetch_pandas_all for empty result (#1238)
* SNOW-641540 Fix dtypes of fetch_pandas_all for empty result (#1226) * SNOW-641540 Fix dtypes of fetch_pandas_all for empty result When the result is empty, the pandas dataframe information is lost when creating the iterator. To keep the information, we store the to_pandas result, and use it for empty dataframe case. * Address comment * use list * format * Add DESCRIPTION
1 parent 12c4e5a commit 0b24148

File tree

4 files changed

+21
-12
lines changed

4 files changed

+21
-12
lines changed

DESCRIPTION.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@ Source code is also available at: https://github.com/snowflakedb/snowflake-conne
88

99
# Release Notes
1010

11+
- v2.7.13 (Unreleased)
1112

13+
- Fixed missing dtypes when calling fetch_pandas_all() on empty result
1214

1315
- v2.7.12(August 26,2022)
1416

src/snowflake/connector/result_batch.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -646,7 +646,7 @@ def _get_arrow_iter(
646646
return self._create_iter(iter_unit=IterUnit.TABLE_UNIT, connection=connection)
647647

648648
def _create_empty_table(self) -> Table:
649-
"""Returns emtpy Arrow table based on schema"""
649+
"""Returns empty Arrow table based on schema"""
650650
if installed_pandas:
651651
# initialize pyarrow type array corresponding to FIELD_TYPES
652652
FIELD_TYPE_TO_PA_TYPE = [e.pa_type() for e in FIELD_TYPES]

src/snowflake/connector/result_set.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,8 @@ def _fetch_pandas_all(self, **kwargs) -> pandas.DataFrame:
189189
ignore_index=True, # Don't keep in result batch indexes
190190
**kwargs,
191191
)
192-
return pandas.DataFrame(columns=self.batches[0].column_names)
192+
# Empty dataframe
193+
return self.batches[0].to_pandas()
193194

194195
def _get_metrics(self) -> dict[str, int]:
195196
"""Sum up all the chunks' metrics and show them together."""

test/integ/pandas/test_arrow_pandas.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1169,6 +1169,18 @@ def assert_dtype_equal(a, b):
11691169
)
11701170

11711171

1172+
def assert_pandas_batch_types(
1173+
batch: pandas.DataFrame, expected_types: list[type]
1174+
) -> None:
1175+
assert batch.dtypes is not None
1176+
1177+
pandas_dtypes = batch.dtypes
1178+
# pd.string is represented as an np.object
1179+
# np.dtype string is not the same as pd.string (python)
1180+
for pandas_dtype, expected_type in zip(pandas_dtypes, expected_types):
1181+
assert_dtype_equal(pandas_dtype.type, numpy.dtype(expected_type).type)
1182+
1183+
11721184
def test_pandas_dtypes(conn_cnx):
11731185
with conn_cnx(
11741186
session_parameters={
@@ -1179,18 +1191,12 @@ def test_pandas_dtypes(conn_cnx):
11791191
cur.execute(
11801192
"select 1::integer, 2.3::double, 'foo'::string, current_timestamp()::timestamp where 1=0"
11811193
)
1182-
batches = cur.get_result_batches()
1183-
batch = batches[0].to_pandas()
1194+
expected_types = [numpy.int64, float, object, numpy.datetime64]
1195+
assert_pandas_batch_types(cur.fetch_pandas_all(), expected_types)
11841196

1185-
assert batch.dtypes is not None
1197+
batches = cur.get_result_batches()
11861198
assert batches[0].to_arrow() is not True
1187-
1188-
pandas_dtypes = batch.dtypes
1189-
expected_types = [numpy.int64, float, object, numpy.datetime64]
1190-
# pd.string is represented as an np.object
1191-
# np.dtype string is not the same as pd.string (python)
1192-
for i, typ in enumerate(expected_types):
1193-
assert_dtype_equal(pandas_dtypes[i].type, numpy.dtype(typ).type)
1199+
assert_pandas_batch_types(batches[0].to_pandas(), expected_types)
11941200

11951201

11961202
def test_timestamp_tz(conn_cnx):

0 commit comments

Comments
 (0)