Skip to content

Commit c364364

Browse files
Merge branch 'main' into feature/aherrera/SNOW-2432059-StringAndBinary-part1
2 parents 918498e + 01dead4 commit c364364

File tree

5 files changed

+53
-4
lines changed

5 files changed

+53
-4
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@
7979
- Added a fix for floating point precision discrepancies in `interval_day_time_from_parts`.
8080
- Fixed a bug where writing Snowpark pandas dataframes on the pandas backend with a column multiindex to Snowflake with `to_snowflake` would raise `KeyError`.
8181
- Fixed a bug that `DataFrameReader.dbapi` (PuPr) is not compatible with oracledb 3.4.0.
82+
- Fixed a bug where `modin` would unintentionally be imported during session initialization in some scenarios.
8283

8384
#### Improvements
8485

@@ -163,6 +164,7 @@
163164
- `groupby.median`
164165
- `groupby.std`
165166
- `groupby.var`
167+
- `drop_duplicates`
166168
- Reuse row count from the relaxed query compiler in `get_axis_len`.
167169

168170
#### Bug Fixes

src/snowflake/snowpark/modin/plugin/_internal/indexing_utils.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1130,7 +1130,9 @@ def get_frame_by_row_label(
11301130

11311131
# boolean indexer
11321132
if isinstance(key_datatype, BooleanType):
1133-
return _get_frame_by_row_label_boolean_frame(internal_frame, key)
1133+
return _get_frame_by_row_label_boolean_frame(
1134+
internal_frame, key, dummy_row_pos_mode
1135+
)
11341136

11351137
return _get_frame_by_row_label_non_boolean_frame(
11361138
internal_frame, key, dummy_row_pos_mode
@@ -1470,6 +1472,7 @@ def generate_bound_column(
14701472
def _get_frame_by_row_label_boolean_frame(
14711473
internal_frame: InternalFrame,
14721474
key: InternalFrame,
1475+
dummy_row_pos_mode: bool = False,
14731476
) -> InternalFrame:
14741477
"""
14751478
Select rows with boolean frame key. Here, if the frame and key's index are aligned, then the join is on their row
@@ -1488,6 +1491,7 @@ def _get_frame_by_row_label_boolean_frame(
14881491
internal_frame,
14891492
key,
14901493
"coalesce",
1494+
dummy_row_pos_mode,
14911495
)
14921496

14931497
key_bool_val_col = col(

src/snowflake/snowpark/modin/plugin/_internal/ordered_dataframe.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1257,17 +1257,31 @@ def join(
12571257

12581258
left = left.ensure_row_position_column(dummy_row_pos_mode=True)
12591259
if len(left_on_cols) == 1 and ROW_POSITION_COLUMN_LABEL in left_on_cols[0]:
1260+
left_row_position_snowflake_quoted_identifier = (
1261+
left.row_position_snowflake_quoted_identifier
1262+
)
12601263
left.row_position_snowflake_quoted_identifier = None
1261-
left = left.ensure_row_position_column(dummy_row_pos_mode=False)
1264+
new_left = left.ensure_row_position_column(dummy_row_pos_mode=False)
1265+
left.row_position_snowflake_quoted_identifier = (
1266+
left_row_position_snowflake_quoted_identifier
1267+
)
1268+
left = new_left
12621269
assert left.row_position_snowflake_quoted_identifier is not None
12631270
left_on_cols = [left.row_position_snowflake_quoted_identifier]
12641271
right = right.ensure_row_position_column(dummy_row_pos_mode=True)
12651272
if (
12661273
len(right_on_cols) == 1
12671274
and ROW_POSITION_COLUMN_LABEL in right_on_cols[0]
12681275
):
1276+
right_row_position_snowflake_quoted_identifier = (
1277+
right.row_position_snowflake_quoted_identifier
1278+
)
12691279
right.row_position_snowflake_quoted_identifier = None
1270-
right = right.ensure_row_position_column(dummy_row_pos_mode=False)
1280+
new_right = right.ensure_row_position_column(dummy_row_pos_mode=False)
1281+
right.row_position_snowflake_quoted_identifier = (
1282+
right_row_position_snowflake_quoted_identifier
1283+
)
1284+
right = new_right
12711285
assert right.row_position_snowflake_quoted_identifier is not None
12721286
right_on_cols = [right.row_position_snowflake_quoted_identifier]
12731287

src/snowflake/snowpark/session.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -755,7 +755,7 @@ def __init__(
755755
)
756756
)
757757

758-
if importlib.util.find_spec("modin"):
758+
if "modin" in sys.modules:
759759
try:
760760
from modin.config import AutoSwitchBackend
761761

tests/integ/modin/test_faster_pandas.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,35 @@ def test_drop(session):
279279
assert_frame_equal(snow_result, native_result)
280280

281281

282+
@sql_count_checker(query_count=3, join_count=2)
283+
def test_drop_duplicates(session):
284+
# create tables
285+
table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
286+
session.create_dataframe(
287+
native_pd.DataFrame([[2, 12], [2, 12], [3, 13]], columns=["A", "B"])
288+
).write.save_as_table(table_name, table_type="temp")
289+
290+
# create snow dataframes
291+
df = pd.read_snowflake(table_name)
292+
snow_result = df.drop_duplicates()
293+
294+
# verify that the input dataframe has a populated relaxed query compiler
295+
assert df._query_compiler._relaxed_query_compiler is not None
296+
assert df._query_compiler._relaxed_query_compiler._dummy_row_pos_mode is True
297+
# verify that the output dataframe also has a populated relaxed query compiler
298+
assert snow_result._query_compiler._relaxed_query_compiler is not None
299+
assert (
300+
snow_result._query_compiler._relaxed_query_compiler._dummy_row_pos_mode is True
301+
)
302+
303+
# create pandas dataframes
304+
native_df = df.to_pandas()
305+
native_result = native_df.drop_duplicates()
306+
307+
# compare results
308+
assert_frame_equal(snow_result, native_result)
309+
310+
282311
@sql_count_checker(query_count=3, join_count=1)
283312
def test_duplicated(session):
284313
# create tables

0 commit comments

Comments
 (0)