Skip to content

Commit 90d4a55

Browse files
authored
Merge branch 'main' into jkew/add-hybrid-precheck
2 parents 8b6d1d0 + 30fa5f3 commit 90d4a55

File tree

15 files changed

+1451
-107
lines changed

15 files changed

+1451
-107
lines changed

CHANGELOG.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
- Fixed a bug that `DataFrameReader.dbapi` (PuPr) is not compatible with oracledb 3.4.0.
6868
- Fixed a bug where `modin` would unintentionally be imported during session initialization in some scenarios.
6969
- Fixed a bug where `session.udf|udtf|udaf|sproc.register` failed when an extra session argument was passed. These methods do not expect a session argument; please remove it if provided.
70+
- Fixed a bug in `DataFrameGroupBuy.agg` where func is a list of tuples used to set the names of the output columns.
7071

7172
#### Improvements
7273

@@ -83,6 +84,7 @@
8384
- Added support for the `dtypes` parameter of `pd.get_dummies`
8485
- Added support for `nunique` in `df.pivot_table`, `df.agg` and other places where aggregate functions can be used.
8586
- Added support for `DataFrame.interpolate` and `Series.interpolate` with the "linear", "ffill"/"pad", and "backfill"/bfill" methods. These use the SQL `INTERPOLATE_LINEAR`, `INTERPOLATE_FFILL`, and `INTERPOLATE_BFILL` functions (PuPr).
87+
- Added support for `Dataframe.groupby.rolling()`.
8688

8789
#### Improvements
8890

@@ -93,6 +95,16 @@
9395
- `skew()` with `axis=1` or `numeric_only=False` parameters
9496
- `round()` with `decimals` parameter as a Series
9597
- `corr()` with `method!=pearson` parameter
98+
- `shift()` with `suffix` or non-integer `periods` parameters
99+
- `sort_index()` with `axis=1` or `key` parameters
100+
- `sort_values()` with `axis=1`
101+
- `melt()` with `col_level` parameter
102+
- `apply()` with `result_type` parameter for DataFrame
103+
- `pivot_table()` with `sort=True`, non-string `index` list, non-string `columns` list, non-string `values` list, or `aggfunc` dict with non-string values
104+
- `fillna()` with `downcast` parameter or using `limit` together with `value`
105+
- `dropna()` with `axis=1`
106+
107+
96108
- Set `cte_optimization_enabled` to True for all Snowpark pandas sessions.
97109
- Add support for the following in faster pandas:
98110
- `isin`
@@ -152,6 +164,9 @@
152164
- `groupby.median`
153165
- `groupby.std`
154166
- `groupby.var`
167+
- `groupby.nunique`
168+
- `groupby.size`
169+
- `groupby.apply`
155170
- `drop_duplicates`
156171
- Reuse row count from the relaxed query compiler in `get_axis_len`.
157172

docs/source/modin/supported/groupby_supported.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,10 @@ Computations/descriptive stats
153153
| | | will be lost. ``rule`` frequencies 's', 'min', |
154154
| | | 'h', and 'D' are supported. |
155155
+-----------------------------+---------------------------------+----------------------------------------------------+
156-
| ``rolling`` | N | |
156+
| ``rolling`` | P | Implemented for DataframeGroupby objects. ``N`` for|
157+
| | | ``on``, non-integer ``window``, ``axis = 1``, |
158+
| | | ``method`` != ``single``, ``min_periods = 0``, or |
159+
| | | ``closed`` != ``None``. |
157160
+-----------------------------+---------------------------------+----------------------------------------------------+
158161
| ``sample`` | N | |
159162
+-----------------------------+---------------------------------+----------------------------------------------------+

setup.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,10 @@
6868
"lxml", # used in XML reader unit tests
6969
]
7070
MODIN_DEVELOPMENT_REQUIREMENTS = [
71-
"scipy", # Snowpark pandas 3rd party library testing
71+
# Snowpark pandas 3rd party library testing. Cap the scipy version because
72+
# Snowflake cannot find newer versions of scipy for python 3.11+. See
73+
# SNOW-2452791.
74+
"scipy<=1.16.0",
7275
"statsmodels", # Snowpark pandas 3rd party library testing
7376
"scikit-learn", # Snowpark pandas 3rd party library testing
7477
# plotly version restricted due to foreseen change in query counts in version 6.0.0+

src/snowflake/snowpark/modin/plugin/_internal/aggregation_utils.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -898,8 +898,10 @@ def _is_supported_snowflake_agg_func(
898898
"""
899899
if isinstance(agg_func, tuple) and len(agg_func) == 2:
900900
# For named aggregations, like `df.agg(new_col=("old_col", "sum"))`,
901-
# take the second part of the named aggregation.
902-
agg_func = agg_func[0]
901+
# take the aggregation part of the named aggregation.
902+
agg_func = (
903+
agg_func.func if isinstance(agg_func, AggFuncWithLabel) else agg_func[1]
904+
)
903905

904906
if get_snowflake_agg_func(agg_func, agg_kwargs, axis, _is_df_agg) is None:
905907
return AggregationSupportResult(
@@ -1381,10 +1383,15 @@ def get_agg_func_to_col_map(
13811383
def get_pandas_aggr_func_name(aggfunc: AggFuncTypeBase) -> str:
13821384
"""
13831385
Returns the friendly name for the aggr function. For example, if it is a callable, it will return __name__
1384-
otherwise the same string name value.
1386+
otherwise the same string name value. If aggfunc is a tuple, treat as named aggregation and return
1387+
the first part of the name.
13851388
"""
13861389
return (
1387-
getattr(aggfunc, "__name__", str(aggfunc))
1390+
getattr(
1391+
aggfunc,
1392+
"__name__",
1393+
str(aggfunc[0]) if isinstance(aggfunc, tuple) else str(aggfunc),
1394+
)
13881395
if not isinstance(aggfunc, str)
13891396
else aggfunc
13901397
)
@@ -1536,7 +1543,12 @@ def generate_column_agg_info(
15361543
for func_info, label, identifier in zip(
15371544
agg_func_list, agg_col_labels, agg_col_identifiers
15381545
):
1539-
func = func_info.func
1546+
# If func_info.func is a tuple, treat as named aggregation and return the aggregate function
1547+
func = (
1548+
func_info.func[1]
1549+
if isinstance(func_info.func, tuple)
1550+
else func_info.func
1551+
)
15401552
is_dummy_agg = func_info.is_dummy_agg
15411553
agg_func_col = pandas_lit(None) if is_dummy_agg else quoted_identifier
15421554
snowflake_agg_func = get_snowflake_agg_func(func, agg_kwargs, axis=0)

0 commit comments

Comments
 (0)