snowflakedb
diff --git a/‎CHANGELOG.md‎
Lines changed: 5 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py‎
Lines changed: 85 additions & 14 deletions b/‎src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py‎
Lines changed: 85 additions & 14 deletions
diff --git a/‎src/snowflake/snowpark/modin/plugin/extensions/dataframe_groupby_overrides.py‎
Lines changed: 26 additions & 7 deletions b/‎src/snowflake/snowpark/modin/plugin/extensions/dataframe_groupby_overrides.py‎
Lines changed: 26 additions & 7 deletions
diff --git a/‎src/snowflake/snowpark/modin/plugin/extensions/series_groupby_overrides.py‎
Lines changed: 2 additions & 1 deletion b/‎src/snowflake/snowpark/modin/plugin/extensions/series_groupby_overrides.py‎
Lines changed: 2 additions & 1 deletion
@@ -188,6 +188,11 @@
   - `skew()` with `axis=1` or `numeric_only=False` parameters
   - `round()` with `decimals` parameter as a Series
   - `corr()` with `method!=pearson` parameter
+  - `df.groupby()` with `axis=1`, `by!=None and level!=None`, or by containing any non-pandas hashable labels.
+  - `groupby_fillna()` with `downcast` parameter
+  - `groupby_first()` with `min_count>1`
+  - `groupby_last()` with `min_count>1`
+  - `shift()` with `freq` parameter
 - Set `cte_optimization_enabled` to True for all Snowpark pandas sessions.
 - Add support for the following in faster pandas:
   - `isin`
 
@@ -450,7 +450,7 @@
 # For now, limit number of quantiles supported df.quantiles to avoid producing recursion limit failure in Snowpark.
 MAX_QUANTILES_SUPPORTED: int = 16
 
-_GROUPBY_UNSUPPORTED_GROUPING_MESSAGE = "does not yet support axis == 1, by != None and level != None, or by containing any non-pandas hashable labels."
+_GROUPBY_UNSUPPORTED_GROUPING_MESSAGE = "does not yet support axis == 1, by != None and level != None, or by containing any non-pandas hashable labels"
 
 QUARTER_START_MONTHS = [1, 4, 7, 10]
 
@@ -1153,8 +1153,7 @@ def stay_cost(
             return QCCoercionCost.COST_IMPOSSIBLE
 
         if method_key in HYBRID_SWITCH_FOR_UNSUPPORTED_ARGS:
-
-            if arguments and SnowflakeQueryCompiler._has_unsupported_args(
+            if SnowflakeQueryCompiler._has_unsupported_args(
                 api_cls_name, operation, arguments
             ):
                 WarningMessage.single_warning(
@@ -4546,7 +4545,7 @@ def groupby_ngroups(
         is_supported = check_is_groupby_supported_by_snowflake(by, level, axis)
         if not is_supported:
             ErrorMessage.not_implemented(
-                f"Snowpark pandas GroupBy.ngroups {_GROUPBY_UNSUPPORTED_GROUPING_MESSAGE}"
+                f"Snowpark pandas GroupBy.ngroups {_GROUPBY_UNSUPPORTED_GROUPING_MESSAGE}."
             )
 
         query_compiler = get_frame_with_groupby_columns_as_index(
@@ -4555,7 +4554,7 @@ def groupby_ngroups(
 
         if query_compiler is None:
             ErrorMessage.not_implemented(
-                f"Snowpark pandas GroupBy.ngroups {_GROUPBY_UNSUPPORTED_GROUPING_MESSAGE}"
+                f"Snowpark pandas GroupBy.ngroups {_GROUPBY_UNSUPPORTED_GROUPING_MESSAGE}."
             )
 
         internal_frame = query_compiler._modin_frame
@@ -4706,7 +4705,7 @@ def _groupby_agg_internal(
             by, level, axis
         ):
             ErrorMessage.not_implemented(
-                f"Snowpark pandas GroupBy.aggregate {_GROUPBY_UNSUPPORTED_GROUPING_MESSAGE}"
+                f"Snowpark pandas GroupBy.aggregate {_GROUPBY_UNSUPPORTED_GROUPING_MESSAGE}."
             )
 
         by_list = query_compiler._modin_frame.index_column_pandas_labels
@@ -4961,6 +4960,7 @@ def groupby_apply(
         include_groups: bool,
         force_single_group: bool = False,
         force_list_like_to_series: bool = False,
+        is_transform: bool = False,
     ) -> "SnowflakeQueryCompiler":
         """
         Wrapper around _groupby_apply_internal to be supported in faster pandas.
@@ -4979,6 +4979,7 @@ def groupby_apply(
                     include_groups=include_groups,
                     force_single_group=force_single_group,
                     force_list_like_to_series=force_list_like_to_series,
+                    is_transform=is_transform,
                 )
             )
         qc = self._groupby_apply_internal(
@@ -4992,6 +4993,7 @@ def groupby_apply(
             include_groups=include_groups,
             force_single_group=force_single_group,
             force_list_like_to_series=force_list_like_to_series,
+            is_transform=is_transform,
         )
         return self._maybe_set_relaxed_qc(qc, relaxed_query_compiler)
 
@@ -5007,6 +5009,7 @@ def _groupby_apply_internal(
         include_groups: bool,
         force_single_group: bool = False,
         force_list_like_to_series: bool = False,
+        is_transform: bool = False,
     ) -> "SnowflakeQueryCompiler":
         """
         Group according to `by` and `level`, apply a function to each group, and combine the results.
@@ -5114,7 +5117,7 @@ def _groupby_apply_internal(
         data_columns_index = _modin_frame.data_columns_index[
             input_data_column_positions
         ]
-        is_transform = groupby_kwargs.get("apply_op") == "transform"
+
         output_schema, udtf = create_udtf_for_groupby_apply(
             agg_func,
             agg_args,
@@ -5499,7 +5502,7 @@ def _groupby_first_last(
         is_supported = check_is_groupby_supported_by_snowflake(by, level, axis)
         if not is_supported:
             ErrorMessage.not_implemented(
-                f"Snowpark pandas GroupBy.{method} {_GROUPBY_UNSUPPORTED_GROUPING_MESSAGE}"
+                f"Snowpark pandas GroupBy.{method} {_GROUPBY_UNSUPPORTED_GROUPING_MESSAGE}."
             )
         # TODO: Support groupby first and last with min_count (SNOW-1482931)
         if agg_kwargs.get("min_count", -1) > 1:
@@ -5544,6 +5547,19 @@ def _groupby_first_last(
             result = result.reset_index(drop=False)
         return result
 
+    @register_query_compiler_method_not_implemented(
+        "DataFrameGroupBy",
+        "first",
+        UnsupportedArgsRule(
+            unsupported_conditions=[
+                (
+                    lambda args: args.get("min_count", -1) > 1
+                    or args.get("agg_kwargs", {}).get("min_count", -1) > 1,
+                    "GroupBy.first does not yet support min_count > 1",
+                ),
+            ],
+        ),
+    )
     def groupby_first(
         self,
         by: Any,
@@ -5577,6 +5593,19 @@ def groupby_first(
             "first", by, axis, groupby_kwargs, agg_args, agg_kwargs, drop, **kwargs
         )
 
+    @register_query_compiler_method_not_implemented(
+        "DataFrameGroupBy",
+        "last",
+        UnsupportedArgsRule(
+            unsupported_conditions=[
+                (
+                    lambda args: args.get("agg_kwargs", {}).get("min_count", -1) > 1
+                    or args.get("min_count", -1) > 1,
+                    "GroupBy.last does not yet support min_count > 1",
+                ),
+            ],
+        ),
+    )
     def groupby_last(
         self,
         by: Any,
@@ -5610,6 +5639,19 @@ def groupby_last(
             "last", by, axis, groupby_kwargs, agg_args, agg_kwargs, drop, **kwargs
         )
 
+    @register_query_compiler_method_not_implemented(
+        "DataFrameGroupBy",
+        "rank",
+        UnsupportedArgsRule(
+            unsupported_conditions=[
+                (
+                    lambda args: args.get("groupby_kwargs", {}).get("level") is not None
+                    and args.get("groupby_kwargs", {}).get("level") != 0,
+                    "GroupBy.rank with level != 0 is not supported yet in Snowpark pandas.",
+                ),
+            ],
+        ),
+    )
     def groupby_rank(
         self,
         by: Any,
@@ -6059,6 +6101,23 @@ def groupby_rolling(
             result_qc = SnowflakeQueryCompiler(new_frame)
         return result_qc
 
+    @register_query_compiler_method_not_implemented(
+        "DataFrameGroupBy",
+        "shift",
+        UnsupportedArgsRule(
+            unsupported_conditions=[
+                (
+                    lambda args: args.get("freq") is not None,
+                    "'freq' argument is not supported yet in Snowpark pandas",
+                ),
+                (
+                    lambda args: args.get("groupby_kwargs", {}).get("level") is not None
+                    and args.get("groupby_kwargs", {}).get("level") != 0,
+                    "GroupBy.shift with level != 0 is not supported yet in Snowpark pandas",
+                ),
+            ],
+        ),
+    )
     def groupby_shift(
         self,
         by: Any,
@@ -6314,7 +6373,7 @@ def groupby_get_group(
         is_supported = check_is_groupby_supported_by_snowflake(by, level, axis)
         if not is_supported:  # pragma: no cover
             ErrorMessage.not_implemented(
-                f"Snowpark pandas GroupBy.get_group {_GROUPBY_UNSUPPORTED_GROUPING_MESSAGE}"
+                f"Snowpark pandas GroupBy.get_group {_GROUPBY_UNSUPPORTED_GROUPING_MESSAGE}."
             )
         if is_list_like(by):
             ErrorMessage.not_implemented(
@@ -6418,7 +6477,7 @@ def _groupby_size_internal(
         is_supported = check_is_groupby_supported_by_snowflake(by, level, axis)
         if not is_supported:
             ErrorMessage.not_implemented(
-                f"Snowpark pandas GroupBy.size {_GROUPBY_UNSUPPORTED_GROUPING_MESSAGE}"
+                f"Snowpark pandas GroupBy.size {_GROUPBY_UNSUPPORTED_GROUPING_MESSAGE}."
             )
         if not is_list_like(by):
             by = [by]
@@ -6717,7 +6776,7 @@ def groupby_cummin(
         self,
         by: Any,
         axis: int,
-        numeric_only: int,
+        numeric_only: bool,
         groupby_kwargs: dict[str, Any],
     ) -> "SnowflakeQueryCompiler":
         """
@@ -6910,7 +6969,7 @@ def groupby_value_counts(
         is_supported = check_is_groupby_supported_by_snowflake(by, level, axis)
         if not is_supported:
             ErrorMessage.not_implemented(
-                f"Snowpark pandas GroupBy.value_counts {_GROUPBY_UNSUPPORTED_GROUPING_MESSAGE}"
+                f"Snowpark pandas GroupBy.value_counts {_GROUPBY_UNSUPPORTED_GROUPING_MESSAGE}."
             )
         if bins is not None:
             raise ErrorMessage.not_implemented("bins argument is not yet supported")
@@ -7047,6 +7106,18 @@ def groupby_value_counts(
             ignore_index=not as_index,  # When as_index=False, take the default positional index
         )
 
+    @register_query_compiler_method_not_implemented(
+        "DataFrameGroupBy",
+        "fillna",
+        UnsupportedArgsRule(
+            unsupported_conditions=[
+                (
+                    lambda args: args.get("downcast") is not None,
+                    "'downcast' argument is not supported yet in Snowpark pandas",
+                ),
+            ],
+        ),
+    )
     def groupby_fillna(
         self,
         by: Any,
@@ -7089,7 +7160,7 @@ def groupby_fillna(
         )
         if not is_supported:
             ErrorMessage.not_implemented(
-                f"Snowpark pandas GroupBy.fillna {_GROUPBY_UNSUPPORTED_GROUPING_MESSAGE}"
+                f"Snowpark pandas GroupBy.fillna {_GROUPBY_UNSUPPORTED_GROUPING_MESSAGE}."
             )
 
         if by is not None and not is_list_like(by):
@@ -7353,7 +7424,7 @@ def groupby_pct_change(
         # Remaining parameters are validated in pct_change method
         if not is_supported:
             ErrorMessage.not_implemented(
-                f"Snowpark pandas GroupBy.pct_change {_GROUPBY_UNSUPPORTED_GROUPING_MESSAGE}"
+                f"Snowpark pandas GroupBy.pct_change {_GROUPBY_UNSUPPORTED_GROUPING_MESSAGE}."
             )
 
         by_labels = by
 
@@ -50,8 +50,14 @@
 from snowflake.snowpark.modin.plugin._internal.apply_utils import (
     create_groupby_transform_func,
 )
+from snowflake.snowpark.modin.plugin._internal.groupby_utils import (
+    check_is_groupby_supported_by_snowflake,
+)
 from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
     SnowflakeQueryCompiler,
+    UnsupportedArgsRule,
+    _GROUPBY_UNSUPPORTED_GROUPING_MESSAGE,
+    register_query_compiler_method_not_implemented,
 )
 
 # the following import is used in doctests
@@ -78,6 +84,22 @@
 
 
 @register_df_groupby_override("__init__")
+@register_query_compiler_method_not_implemented(
+    "DataFrameGroupBy",
+    "__init__",
+    UnsupportedArgsRule(
+        unsupported_conditions=[
+            (
+                lambda args: not check_is_groupby_supported_by_snowflake(
+                    args.get("by"),
+                    args.get("level"),
+                    args.get("axis", 0),
+                ),
+                f"Groupby {_GROUPBY_UNSUPPORTED_GROUPING_MESSAGE}",
+            )
+        ]
+    ),
+)
 def __init__(
     self,
     df,
@@ -114,9 +136,6 @@ def __init__(
         "group_keys": group_keys,
     }
     self._kwargs.update(kwargs)
-    if "apply_op" not in self._kwargs:
-        # Can be "apply", "transform", "filter" or "aggregate"
-        self._kwargs.update({"apply_op": "apply"})
 
 
 @register_df_groupby_override("ngroups")
@@ -172,7 +191,7 @@ def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]:
 
 
 @register_df_groupby_override("apply")
-def apply(self, func, *args, include_groups=True, **kwargs):
+def apply(self, func, *args, include_groups=True, _is_transform=False, **kwargs):
     # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
     # TODO: SNOW-1244717: Explore whether window function are performant and can be used
     #       whenever `func` is an aggregation function.
@@ -188,6 +207,7 @@ def apply(self, func, *args, include_groups=True, **kwargs):
             agg_kwargs=kwargs,
             series_groupby=False,
             include_groups=include_groups,
+            is_transform=_is_transform,
         )
     )
     if dataframe_result.columns.equals(pandas.Index([MODIN_UNNAMED_SERIES_LABEL])):
@@ -320,11 +340,10 @@ def transform(
         dropna=False,
         sort=self._sort,
     )
-    groupby_obj._kwargs["apply_op"] = "transform"
-
     # Apply the transform function to each group.
     res = groupby_obj.apply(
-        create_groupby_transform_func(func, by, level, *args, **kwargs)
+        create_groupby_transform_func(func, by, level, *args, **kwargs),
+        _is_transform=True,
     )
 
     dropna = self._kwargs.get("dropna", True)
 
@@ -101,7 +101,7 @@ def get_group(self, name, obj=None):
 
 
 @register_ser_groupby_override("apply")
-def apply(self, func, *args, include_groups=True, **kwargs):
+def apply(self, func, *args, include_groups=True, _is_transform=False, **kwargs):
     # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.SeriesGroupBy functions
     if not callable(func):
         raise NotImplementedError("No support for non-callable `func`")
@@ -117,6 +117,7 @@ def apply(self, func, *args, include_groups=True, **kwargs):
             # TODO(https://github.com/modin-project/modin/issues/7096):
             # upstream the series_groupby param to Modin
             series_groupby=True,
+            is_transform=_is_transform,
         )
     )
     if dataframe_result.columns.equals(pandas.Index([MODIN_UNNAMED_SERIES_LABEL])):
Original file line number	Diff line number	Diff line change
`@@ -101,7 +101,7 @@ def get_group(self, name, obj=None):`
`101`	`101`
`102`	`102`
`103`	`103`	`@register_ser_groupby_override("apply")`
`104`		`-def apply(self, func, args, include_groups=True, *kwargs):`
	`104`	`+def apply(self, func, args, include_groups=True, _is_transform=False, *kwargs):`
`105`	`105`	`# TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.SeriesGroupBy functions`
`106`	`106`	`if not callable(func):`
`107`	`107`	raise NotImplementedError("No support for non-callable `func`")
`@@ -117,6 +117,7 @@ def apply(self, func, args, include_groups=True, *kwargs):`
`117`	`117`	`# TODO(https://github.com/modin-project/modin/issues/7096):`
`118`	`118`	`# upstream the series_groupby param to Modin`
`119`	`119`	`series_groupby=True,`
	`120`	`+ is_transform=_is_transform,`
`120`	`121`	`)`
`121`	`122`	`)`
`122`	`123`	`if dataframe_result.columns.equals(pandas.Index([MODIN_UNNAMED_SERIES_LABEL])):`