Skip to content

Commit a19b5a6

Browse files
SNOW-2676993: Add support for to_snowpark in faster pandas
1 parent c76802f commit a19b5a6

File tree

3 files changed

+51
-0
lines changed

3 files changed

+51
-0
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@
144144
- `cumsum`
145145
- `cummin`
146146
- `cummax`
147+
- `to_snowpark`
147148
- Make faster pandas disabled by default (opt-in instead of opt-out).
148149
- Improve performance of `drop_duplicates` by avoiding joins when `keep!=False` in faster pandas.
149150

src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2302,6 +2302,22 @@ def to_snowflake(
23022302

23032303
def to_snowpark(
23042304
self, index: bool = True, index_label: Optional[IndexLabel] = None
2305+
) -> SnowparkDataFrame:
2306+
"""
2307+
Wrapper around _to_snowpark_internal to be supported in faster pandas.
2308+
"""
2309+
if self._relaxed_query_compiler is not None and not index:
2310+
return self._relaxed_query_compiler._to_snowpark_internal(
2311+
index=index,
2312+
index_label=index_label,
2313+
)
2314+
return self._to_snowpark_internal(
2315+
index=index,
2316+
index_label=index_label,
2317+
)
2318+
2319+
def _to_snowpark_internal(
2320+
self, index: bool = True, index_label: Optional[IndexLabel] = None
23052321
) -> SnowparkDataFrame:
23062322
"""
23072323
Convert the Snowpark pandas Dataframe to Snowpark Dataframe. The Snowpark Dataframe is created by selecting

tests/integ/modin/test_faster_pandas.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1673,6 +1673,40 @@ def test_series_to_datetime(session):
16731673
assert_series_equal(snow_result, native_result)
16741674

16751675

1676+
@sql_count_checker(query_count=3)
1677+
def test_to_snowpark(session):
1678+
with session_parameter_override(
1679+
session, "dummy_row_pos_optimization_enabled", True
1680+
):
1681+
# create tables
1682+
table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
1683+
session.create_dataframe(
1684+
native_pd.DataFrame([[2, 12], [1, 11], [3, 13]], columns=["A", "B"])
1685+
).write.save_as_table(table_name, table_type="temp")
1686+
1687+
# create snow dataframes
1688+
df = pd.read_snowflake(table_name)
1689+
sdf = df.to_snowpark(index=False)
1690+
snow_result = sdf.to_snowpark_pandas()
1691+
1692+
# verify that the input dataframe has a populated relaxed query compiler
1693+
assert df._query_compiler._relaxed_query_compiler is not None
1694+
assert df._query_compiler._relaxed_query_compiler._dummy_row_pos_mode is True
1695+
# verify that the output dataframe also has a populated relaxed query compiler
1696+
assert snow_result._query_compiler._relaxed_query_compiler is not None
1697+
assert (
1698+
snow_result._query_compiler._relaxed_query_compiler._dummy_row_pos_mode
1699+
is True
1700+
)
1701+
1702+
# create pandas dataframes
1703+
native_df = df.to_pandas()
1704+
native_result = native_df
1705+
1706+
# compare results
1707+
assert_frame_equal(snow_result, native_result)
1708+
1709+
16761710
@sql_count_checker(query_count=0)
16771711
def test_dummy_row_pos_optimization_enabled_on_session(db_parameters):
16781712
with Session.builder.configs(db_parameters).create() as new_session:

0 commit comments

Comments
 (0)