Skip to content

Commit b0f4438

Browse files
SNOW-2429645: Add support for drop in faster pandas (#3892)
1 parent b6bcebb commit b0f4438

File tree

3 files changed

+56
-0
lines changed

3 files changed

+56
-0
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@
110110
- `sort_values`
111111
- `loc` (setting columns)
112112
- `to_datetime`
113+
- `drop`
113114
- Reuse row count from the relaxed query compiler in `get_axis_len`.
114115

115116
#### Bug Fixes

src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13084,6 +13084,32 @@ def drop(
1308413084
columns: Optional[Sequence[Hashable]] = None,
1308513085
level: Optional[Level] = None,
1308613086
errors: Literal["raise", "ignore"] = "raise",
13087+
) -> "SnowflakeQueryCompiler":
13088+
"""
13089+
Wrapper around _drop_internal to be supported in faster pandas.
13090+
"""
13091+
relaxed_query_compiler = None
13092+
if self._relaxed_query_compiler is not None and index is None:
13093+
relaxed_query_compiler = self._relaxed_query_compiler._drop_internal(
13094+
index=index,
13095+
columns=columns,
13096+
level=level,
13097+
errors=errors,
13098+
)
13099+
qc = self._drop_internal(
13100+
index=index,
13101+
columns=columns,
13102+
level=level,
13103+
errors=errors,
13104+
)
13105+
return self._maybe_set_relaxed_qc(qc, relaxed_query_compiler)
13106+
13107+
def _drop_internal(
13108+
self,
13109+
index: Optional[Sequence[Hashable]] = None,
13110+
columns: Optional[Sequence[Hashable]] = None,
13111+
level: Optional[Level] = None,
13112+
errors: Literal["raise", "ignore"] = "raise",
1308713113
) -> "SnowflakeQueryCompiler":
1308813114
"""
1308913115
Drop specified rows or columns.

tests/integ/modin/test_faster_pandas.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,35 @@ def test_read_filter_join_flag_disabled(session):
194194
assert_frame_equal(snow_result, native_result)
195195

196196

197+
@sql_count_checker(query_count=3)
198+
def test_drop(session):
199+
# create tables
200+
table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
201+
session.create_dataframe(
202+
native_pd.DataFrame([[2, True], [1, False], [3, False]], columns=["A", "B"])
203+
).write.save_as_table(table_name, table_type="temp")
204+
205+
# create snow dataframes
206+
df = pd.read_snowflake(table_name)
207+
snow_result = df.drop(columns=["B"])
208+
209+
# verify that the input dataframe has a populated relaxed query compiler
210+
assert df._query_compiler._relaxed_query_compiler is not None
211+
assert df._query_compiler._relaxed_query_compiler._dummy_row_pos_mode is True
212+
# verify that the output dataframe also has a populated relaxed query compiler
213+
assert snow_result._query_compiler._relaxed_query_compiler is not None
214+
assert (
215+
snow_result._query_compiler._relaxed_query_compiler._dummy_row_pos_mode is True
216+
)
217+
218+
# create pandas dataframes
219+
native_df = df.to_pandas()
220+
native_result = native_df.drop(columns=["B"])
221+
222+
# compare results
223+
assert_frame_equal(snow_result, native_result)
224+
225+
197226
@pytest.mark.parametrize("func", ["isna", "isnull", "notna", "notnull"])
198227
@sql_count_checker(query_count=3)
199228
def test_isna_notna(session, func):

0 commit comments

Comments
 (0)