Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@
- `groupby.all`
- `groupby.unique`
- `to_snowflake`
- `to_snowpark`
- Make faster pandas disabled by default (opt-in instead of opt-out).
- Improve performance of `drop_duplicates` by avoiding joins when `keep!=False` in faster pandas.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2332,6 +2332,22 @@ def _to_snowflake_internal(

def to_snowpark(
self, index: bool = True, index_label: Optional[IndexLabel] = None
) -> SnowparkDataFrame:
"""
Wrapper around _to_snowpark_internal to be supported in faster pandas.
"""
if self._relaxed_query_compiler is not None and not index:
return self._relaxed_query_compiler._to_snowpark_internal(
index=index,
index_label=index_label,
)
return self._to_snowpark_internal(
index=index,
index_label=index_label,
)

def _to_snowpark_internal(
self, index: bool = True, index_label: Optional[IndexLabel] = None
) -> SnowparkDataFrame:
"""
Convert the Snowpark pandas Dataframe to Snowpark Dataframe. The Snowpark Dataframe is created by selecting
Expand Down
34 changes: 34 additions & 0 deletions tests/integ/modin/test_faster_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1786,6 +1786,40 @@ def test_to_snowflake(session):
assert_frame_equal(snow_result, native_result)


@sql_count_checker(query_count=3)
def test_to_snowpark(session):
with session_parameter_override(
session, "dummy_row_pos_optimization_enabled", True
):
# create tables
table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
session.create_dataframe(
native_pd.DataFrame([[2, 12], [1, 11], [3, 13]], columns=["A", "B"])
).write.save_as_table(table_name, table_type="temp")

# create snow dataframes
df = pd.read_snowflake(table_name)
sdf = df.to_snowpark(index=False)
snow_result = sdf.to_snowpark_pandas().sort_values(by="A")

# verify that the input dataframe has a populated relaxed query compiler
assert df._query_compiler._relaxed_query_compiler is not None
assert df._query_compiler._relaxed_query_compiler._dummy_row_pos_mode is True
# verify that the output dataframe also has a populated relaxed query compiler
assert snow_result._query_compiler._relaxed_query_compiler is not None
assert (
snow_result._query_compiler._relaxed_query_compiler._dummy_row_pos_mode
is True
)

# create pandas dataframes
native_df = df.to_pandas()
native_result = native_df.sort_values(by="A")

# compare results
assert_frame_equal(snow_result, native_result)


@sql_count_checker(query_count=0)
def test_dummy_row_pos_optimization_enabled_on_session(db_parameters):
with Session.builder.configs(db_parameters).create() as new_session:
Expand Down