Skip to content

Commit 80119fa

Browse files
SNOW-2455993: Added support for concat in faster pandas (#3949)
1 parent 4ef6ebb commit 80119fa

File tree

3 files changed

+89
-0
lines changed

3 files changed

+89
-0
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@
166166
- `groupby.nunique`
167167
- `groupby.size`
168168
- `drop_duplicates`
169+
- `concat`
169170
- Reuse row count from the relaxed query compiler in `get_axis_len`.
170171

171172
#### Bug Fixes

src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8614,6 +8614,57 @@ def concat(
86148614
names: Optional[list[Hashable]] = None,
86158615
verify_integrity: Optional[bool] = False,
86168616
sort: Optional[bool] = False,
8617+
) -> "SnowflakeQueryCompiler":
8618+
"""
8619+
Wrapper around _concat_internal to be supported in faster pandas.
8620+
"""
8621+
relaxed_query_compiler = None
8622+
if (
8623+
self._relaxed_query_compiler is not None
8624+
and all([qc._relaxed_query_compiler is not None for qc in other])
8625+
and axis == 0
8626+
):
8627+
new_other = [
8628+
qc._relaxed_query_compiler
8629+
for qc in other
8630+
if qc._relaxed_query_compiler is not None
8631+
]
8632+
relaxed_query_compiler = self._relaxed_query_compiler._concat_internal(
8633+
axis=axis,
8634+
other=new_other,
8635+
join=join,
8636+
ignore_index=ignore_index,
8637+
keys=keys,
8638+
levels=levels,
8639+
names=names,
8640+
verify_integrity=verify_integrity,
8641+
sort=sort,
8642+
)
8643+
qc = self._concat_internal(
8644+
axis=axis,
8645+
other=other,
8646+
join=join,
8647+
ignore_index=ignore_index,
8648+
keys=keys,
8649+
levels=levels,
8650+
names=names,
8651+
verify_integrity=verify_integrity,
8652+
sort=sort,
8653+
)
8654+
return self._maybe_set_relaxed_qc(qc, relaxed_query_compiler)
8655+
8656+
def _concat_internal(
8657+
self,
8658+
axis: Axis,
8659+
other: list["SnowflakeQueryCompiler"],
8660+
*,
8661+
join: Optional[Literal["outer", "inner"]] = "outer",
8662+
ignore_index: bool = False,
8663+
keys: Optional[Sequence[Hashable]] = None,
8664+
levels: Optional[list[Sequence[Hashable]]] = None,
8665+
names: Optional[list[Hashable]] = None,
8666+
verify_integrity: Optional[bool] = False,
8667+
sort: Optional[bool] = False,
86178668
) -> "SnowflakeQueryCompiler":
86188669
"""
86198670
Concatenate `self` with passed query compilers along specified axis.

tests/integ/modin/test_faster_pandas.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,43 @@ def test_agg(session, func):
250250
assert_series_equal(snow_result4, native_result4, check_dtype=False)
251251

252252

253+
@sql_count_checker(query_count=5, union_count=1)
254+
def test_concat(session):
255+
# create tables
256+
table_name1 = Utils.random_name_for_temp_object(TempObjectType.TABLE)
257+
session.create_dataframe(
258+
native_pd.DataFrame([[2, 12], [2, 12], [3, 13]], columns=["A", "B"])
259+
).write.save_as_table(table_name1, table_type="temp")
260+
table_name2 = Utils.random_name_for_temp_object(TempObjectType.TABLE)
261+
session.create_dataframe(
262+
native_pd.DataFrame([[2, 12], [2, 12], [3, 13]], columns=["A", "B"])
263+
).write.save_as_table(table_name2, table_type="temp")
264+
265+
# create snow dataframes
266+
df1 = pd.read_snowflake(table_name1).sort_values("A", ignore_index=True)
267+
df2 = pd.read_snowflake(table_name2).sort_values("A", ignore_index=True)
268+
snow_result = pd.concat([df1, df2])
269+
270+
# verify that the input dataframe has a populated relaxed query compiler
271+
assert df1._query_compiler._relaxed_query_compiler is not None
272+
assert df1._query_compiler._relaxed_query_compiler._dummy_row_pos_mode is True
273+
assert df2._query_compiler._relaxed_query_compiler is not None
274+
assert df2._query_compiler._relaxed_query_compiler._dummy_row_pos_mode is True
275+
# verify that the output dataframe also has a populated relaxed query compiler
276+
assert snow_result._query_compiler._relaxed_query_compiler is not None
277+
assert (
278+
snow_result._query_compiler._relaxed_query_compiler._dummy_row_pos_mode is True
279+
)
280+
281+
# create pandas dataframes
282+
native_df1 = df1.to_pandas()
283+
native_df2 = df2.to_pandas()
284+
native_result = native_pd.concat([native_df1, native_df2])
285+
286+
# compare results
287+
assert_frame_equal(snow_result, native_result)
288+
289+
253290
@sql_count_checker(query_count=3)
254291
def test_drop(session):
255292
# create tables

0 commit comments

Comments
 (0)