Skip to content

Commit f94595b

Browse files
authored
SNOW-2126985: Fix to_pandas dropping columns (#3408)
1 parent ed0a2ac commit f94595b

File tree

5 files changed

+31
-8
lines changed

5 files changed

+31
-8
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
- Fixed a bug in `DataFrameReader.dbapi` (PrPr) where the `create_connection` defined as local function was incompatible with multiprocessing.
1616
- Fixed a bug in `DataFrameReader.dbapi` (PrPr) where databricks `TIMESTAMP` type was converted to Snowflake `TIMESTAMP_NTZ` type which should be `TIMESTAMP_LTZ` type.
17+
- Fixed a bug in `DataFrame.to_pandas()` that would drop column names when converting a dataframe that did not originate from a select statement.
1718
- Fixed a bug that `DataFrame.create_or_replace_dynamic_table` raises error when the dataframe contains a UDTF and `SELECT *` in UDTF not being parsed correctly.
1819

1920
#### Improvements

src/snowflake/snowpark/async_job.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -373,8 +373,6 @@ def result(
373373
result = self._session._conn._to_data_or_iter(
374374
self._cursor, to_pandas=True, to_iter=False
375375
)["data"]
376-
if not isinstance(result, pandas.DataFrame):
377-
result = pandas.DataFrame(result)
378376
elif async_result_type == _AsyncResultType.PANDAS_BATCH:
379377
result = self._session._conn._to_data_or_iter(
380378
self._cursor, to_pandas=True, to_iter=True

src/snowflake/snowpark/dataframe.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1069,7 +1069,9 @@ def to_pandas(
10691069
# e.g., session.sql("create ...").to_pandas()
10701070
if block:
10711071
if not isinstance(result, pandas.DataFrame):
1072-
return pandas.DataFrame(result)
1072+
return pandas.DataFrame(
1073+
result, columns=[attr.name for attr in self._plan.attributes]
1074+
)
10731075

10741076
return result
10751077

tests/integ/scala/test_async_job_suite.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,23 @@ def test_async_to_pandas_common(session):
114114
session.create_dataframe(res), session.create_dataframe(expected_res)
115115
)
116116

117+
# Non-select
118+
non_select = session.sql("show regions")
119+
expected = session.create_dataframe(non_select.to_pandas())
120+
actual = session.create_dataframe(non_select.to_pandas(block=False).result())
121+
assert non_select.columns == actual.columns
122+
Utils.check_answer(expected, actual)
123+
124+
try:
125+
table_name = Utils.random_table_name()
126+
df = session.sql(f"create temporary table {table_name} (A int)")
127+
Utils.check_answer(
128+
session.create_dataframe(df.to_pandas(block=False).result()),
129+
[Row(f"Table {table_name} successfully created.")],
130+
)
131+
finally:
132+
Utils.drop_table(session, table_name)
133+
117134

118135
@pytest.mark.skipif(IS_IN_STORED_PROC_LOCALFS, reason="Requires large result")
119136
@pytest.mark.skipif(not is_pandas_available, reason="pandas is not available")

tests/integ/test_df_to_pandas.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -201,19 +201,24 @@ def test_to_pandas_non_select(session):
201201
PandasDF,
202202
)
203203

204-
# non SELECT statements will fail
205204
def check_fetch_data_exception(query: str):
206-
result = session.sql(query).to_pandas()
207-
isinstance(result, PandasDF)
205+
df = session.sql(query)
206+
result = df.to_pandas()
207+
assert df.columns == result.columns.to_list()
208+
assert isinstance(result, PandasDF)
208209
return result
209210

210211
temp_table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
211212
check_fetch_data_exception("show tables")
212213
res = check_fetch_data_exception(f"create temporary table {temp_table_name}(a int)")
213-
expected_res = pd.DataFrame([(f"Table {temp_table_name} successfully created.",)])
214+
expected_res = pd.DataFrame(
215+
[(f"Table {temp_table_name} successfully created.",)], columns=['"status"']
216+
)
214217
assert expected_res.equals(res)
215218
res = check_fetch_data_exception(f"drop table if exists {temp_table_name}")
216-
expected_res = pd.DataFrame([(f"{temp_table_name} successfully dropped.",)])
219+
expected_res = pd.DataFrame(
220+
[(f"{temp_table_name} successfully dropped.",)], columns=['"status"']
221+
)
217222
assert expected_res.equals(res)
218223

219224
# to_pandas should work for the large dataframe

0 commit comments

Comments
 (0)