Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@
- `cumsum`
- `cummin`
- `cummax`
- `to_snowpark`
- Make faster pandas disabled by default (opt-in instead of opt-out).
- Improve performance of `drop_duplicates` by avoiding joins when `keep!=False` in faster pandas.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2302,6 +2302,22 @@ def to_snowflake(

def to_snowpark(
self, index: bool = True, index_label: Optional[IndexLabel] = None
) -> SnowparkDataFrame:
"""
Wrapper around _to_snowpark_internal to be supported in faster pandas.
"""
if self._relaxed_query_compiler is not None and not index:
return self._relaxed_query_compiler._to_snowpark_internal(
index=index,
index_label=index_label,
)
return self._to_snowpark_internal(
index=index,
index_label=index_label,
)

def _to_snowpark_internal(
self, index: bool = True, index_label: Optional[IndexLabel] = None
) -> SnowparkDataFrame:
"""
Convert the Snowpark pandas Dataframe to Snowpark Dataframe. The Snowpark Dataframe is created by selecting
Expand Down
34 changes: 34 additions & 0 deletions tests/integ/modin/test_faster_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1673,6 +1673,40 @@ def test_series_to_datetime(session):
assert_series_equal(snow_result, native_result)


@sql_count_checker(query_count=3)
def test_to_snowpark(session):
with session_parameter_override(
session, "dummy_row_pos_optimization_enabled", True
):
# create tables
table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
session.create_dataframe(
native_pd.DataFrame([[2, 12], [1, 11], [3, 13]], columns=["A", "B"])
).write.save_as_table(table_name, table_type="temp")

# create snow dataframes
df = pd.read_snowflake(table_name)
sdf = df.to_snowpark(index=False)
snow_result = sdf.to_snowpark_pandas()

# verify that the input dataframe has a populated relaxed query compiler
assert df._query_compiler._relaxed_query_compiler is not None
assert df._query_compiler._relaxed_query_compiler._dummy_row_pos_mode is True
# verify that the output dataframe also has a populated relaxed query compiler
assert snow_result._query_compiler._relaxed_query_compiler is not None
assert (
snow_result._query_compiler._relaxed_query_compiler._dummy_row_pos_mode
is True
)

# create pandas dataframes
native_df = df.to_pandas()
native_result = native_df

# compare results
assert_frame_equal(snow_result, native_result)


@sql_count_checker(query_count=0)
def test_dummy_row_pos_optimization_enabled_on_session(db_parameters):
with Session.builder.configs(db_parameters).create() as new_session:
Expand Down
Loading