Skip to content

Commit 54b84f5

Browse files
SNOW-2676991: Add support for to_snowflake in faster pandas (#3988)
1 parent 8eb5c4b commit 54b84f5

File tree

3 files changed

+66
-4
lines changed

3 files changed

+66
-4
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@
157157
- `groupby.any`
158158
- `groupby.all`
159159
- `groupby.unique`
160+
- `to_snowflake`
160161
- Make faster pandas disabled by default (opt-in instead of opt-out).
161162
- Improve performance of `drop_duplicates` by avoiding joins when `keep!=False` in faster pandas.
162163

src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -836,9 +836,8 @@ def _raise_not_implemented_error_for_timedelta(
836836
method = method_frame.f_code.co_name # type: ignore[union-attr]
837837
ErrorMessage.not_implemented_for_timedelta(method)
838838

839-
def _warn_lost_snowpark_pandas_type(self) -> None:
839+
def _warn_lost_snowpark_pandas_type(self, method: str) -> None:
840840
"""Warn Snowpark pandas type can be lost in current operation."""
841-
method = inspect.currentframe().f_back.f_back.f_code.co_name # type: ignore[union-attr]
842841
snowpark_pandas_types = [
843842
type(t).__name__
844843
for t in set(
@@ -2284,7 +2283,35 @@ def to_snowflake(
22842283
index_label: Optional[IndexLabel] = None,
22852284
table_type: Literal["", "temp", "temporary", "transient"] = "",
22862285
) -> None:
2287-
self._warn_lost_snowpark_pandas_type()
2286+
"""
2287+
Wrapper around _to_snowflake_internal to be supported in faster pandas.
2288+
"""
2289+
if self._relaxed_query_compiler is not None and not index:
2290+
self._relaxed_query_compiler._to_snowflake_internal(
2291+
name=name,
2292+
if_exists=if_exists,
2293+
index=index,
2294+
index_label=index_label,
2295+
table_type=table_type,
2296+
)
2297+
else:
2298+
self._to_snowflake_internal(
2299+
name=name,
2300+
if_exists=if_exists,
2301+
index=index,
2302+
index_label=index_label,
2303+
table_type=table_type,
2304+
)
2305+
2306+
def _to_snowflake_internal(
2307+
self,
2308+
name: Union[str, Iterable[str]],
2309+
if_exists: Optional[Literal["fail", "replace", "append"]] = "fail",
2310+
index: bool = True,
2311+
index_label: Optional[IndexLabel] = None,
2312+
table_type: Literal["", "temp", "temporary", "transient"] = "",
2313+
) -> None:
2314+
self._warn_lost_snowpark_pandas_type("to_snowflake")
22882315
handle_if_exists_for_to_snowflake(if_exists=if_exists, name=name)
22892316

22902317
if if_exists == "fail":
@@ -2322,7 +2349,7 @@ def to_snowpark(
23222349

23232350
For details, please see comment in _to_snowpark_dataframe_of_pandas_dataframe.
23242351
"""
2325-
self._warn_lost_snowpark_pandas_type()
2352+
self._warn_lost_snowpark_pandas_type("to_snowpark")
23262353

23272354
return self._to_snowpark_dataframe_from_snowpark_pandas_dataframe(
23282355
index, index_label

tests/integ/modin/test_faster_pandas.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1752,6 +1752,40 @@ def test_series_to_datetime(session):
17521752
assert_series_equal(snow_result, native_result)
17531753

17541754

1755+
@sql_count_checker(query_count=4)
1756+
def test_to_snowflake(session):
1757+
with session_parameter_override(
1758+
session, "dummy_row_pos_optimization_enabled", True
1759+
):
1760+
# create tables
1761+
table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
1762+
session.create_dataframe(
1763+
native_pd.DataFrame([[2, 12], [1, 11], [3, 13]], columns=["A", "B"])
1764+
).write.save_as_table(table_name, table_type="temp")
1765+
1766+
# create snow dataframes
1767+
df = pd.read_snowflake(table_name)
1768+
df.to_snowflake(table_name, if_exists="replace", index=False)
1769+
snow_result = pd.read_snowflake(table_name).sort_values(by="A")
1770+
1771+
# verify that the input dataframe has a populated relaxed query compiler
1772+
assert df._query_compiler._relaxed_query_compiler is not None
1773+
assert df._query_compiler._relaxed_query_compiler._dummy_row_pos_mode is True
1774+
# verify that the output dataframe also has a populated relaxed query compiler
1775+
assert snow_result._query_compiler._relaxed_query_compiler is not None
1776+
assert (
1777+
snow_result._query_compiler._relaxed_query_compiler._dummy_row_pos_mode
1778+
is True
1779+
)
1780+
1781+
# create pandas dataframes
1782+
native_df = df.to_pandas()
1783+
native_result = native_df.sort_values(by="A")
1784+
1785+
# compare results
1786+
assert_frame_equal(snow_result, native_result)
1787+
1788+
17551789
@sql_count_checker(query_count=0)
17561790
def test_dummy_row_pos_optimization_enabled_on_session(db_parameters):
17571791
with Session.builder.configs(db_parameters).create() as new_session:

0 commit comments

Comments
 (0)