Skip to content

Commit cb667c3

Browse files
SNOW-2676993: Add support for to_snowpark in faster pandas (#3989)
1 parent 54b84f5 commit cb667c3

File tree

3 files changed

+51
-0
lines changed

3 files changed

+51
-0
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@
158158
- `groupby.all`
159159
- `groupby.unique`
160160
- `to_snowflake`
161+
- `to_snowpark`
161162
- Make faster pandas disabled by default (opt-in instead of opt-out).
162163
- Improve performance of `drop_duplicates` by avoiding joins when `keep!=False` in faster pandas.
163164

src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2332,6 +2332,22 @@ def _to_snowflake_internal(
23322332

23332333
def to_snowpark(
23342334
self, index: bool = True, index_label: Optional[IndexLabel] = None
2335+
) -> SnowparkDataFrame:
2336+
"""
2337+
Wrapper around _to_snowpark_internal to be supported in faster pandas.
2338+
"""
2339+
if self._relaxed_query_compiler is not None and not index:
2340+
return self._relaxed_query_compiler._to_snowpark_internal(
2341+
index=index,
2342+
index_label=index_label,
2343+
)
2344+
return self._to_snowpark_internal(
2345+
index=index,
2346+
index_label=index_label,
2347+
)
2348+
2349+
def _to_snowpark_internal(
2350+
self, index: bool = True, index_label: Optional[IndexLabel] = None
23352351
) -> SnowparkDataFrame:
23362352
"""
23372353
Convert the Snowpark pandas Dataframe to Snowpark Dataframe. The Snowpark Dataframe is created by selecting

tests/integ/modin/test_faster_pandas.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1786,6 +1786,40 @@ def test_to_snowflake(session):
17861786
assert_frame_equal(snow_result, native_result)
17871787

17881788

1789+
@sql_count_checker(query_count=3)
1790+
def test_to_snowpark(session):
1791+
with session_parameter_override(
1792+
session, "dummy_row_pos_optimization_enabled", True
1793+
):
1794+
# create tables
1795+
table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
1796+
session.create_dataframe(
1797+
native_pd.DataFrame([[2, 12], [1, 11], [3, 13]], columns=["A", "B"])
1798+
).write.save_as_table(table_name, table_type="temp")
1799+
1800+
# create snow dataframes
1801+
df = pd.read_snowflake(table_name)
1802+
sdf = df.to_snowpark(index=False)
1803+
snow_result = sdf.to_snowpark_pandas().sort_values(by="A")
1804+
1805+
# verify that the input dataframe has a populated relaxed query compiler
1806+
assert df._query_compiler._relaxed_query_compiler is not None
1807+
assert df._query_compiler._relaxed_query_compiler._dummy_row_pos_mode is True
1808+
# verify that the output dataframe also has a populated relaxed query compiler
1809+
assert snow_result._query_compiler._relaxed_query_compiler is not None
1810+
assert (
1811+
snow_result._query_compiler._relaxed_query_compiler._dummy_row_pos_mode
1812+
is True
1813+
)
1814+
1815+
# create pandas dataframes
1816+
native_df = df.to_pandas()
1817+
native_result = native_df.sort_values(by="A")
1818+
1819+
# compare results
1820+
assert_frame_equal(snow_result, native_result)
1821+
1822+
17891823
@sql_count_checker(query_count=0)
17901824
def test_dummy_row_pos_optimization_enabled_on_session(db_parameters):
17911825
with Session.builder.configs(db_parameters).create() as new_session:

0 commit comments

Comments
 (0)