Merge branch 'main' into helmeleegy-SNOW-2504821

sfc-gh-helmeleegy · sfc-gh-helmeleegy · commit d9b42a1cdeb4 · 2025-10-30T14:55:39.000-07:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -37,6 +37,11 @@
   - `pivot_table()` with `sort=True`, non-string `index` list, non-string `columns` list, non-string `values` list, or `aggfunc` dict with non-string values
   - `fillna()` with `downcast` parameter or using `limit` together with `value`
   - `dropna()` with `axis=1`
+  - `groupby()` with `axis=1`, `by!=None and level!=None`, or by containing any non-pandas hashable labels.
+  - `groupby_fillna()` with `downcast` parameter
+  - `groupby_first()` with `min_count>1`
+  - `groupby_last()` with `min_count>1`
+  - `shift()` with `freq` parameter
 
 #### Bug Fixes
 
@@ -210,11 +215,6 @@
   - `skew()` with `axis=1` or `numeric_only=False` parameters
   - `round()` with `decimals` parameter as a Series
   - `corr()` with `method!=pearson` parameter
-  - `df.groupby()` with `axis=1`, `by!=None and level!=None`, or by containing any non-pandas hashable labels.
-  - `groupby_fillna()` with `downcast` parameter
-  - `groupby_first()` with `min_count>1`
-  - `groupby_last()` with `min_count>1`
-  - `shift()` with `freq` parameter
 - Set `cte_optimization_enabled` to True for all Snowpark pandas sessions.
 - Add support for the following in faster pandas:
   - `isin`
diff --git a/src/snowflake/snowpark/functions.py b/src/snowflake/snowpark/functions.py
@@ -7135,10 +7135,15 @@ def array_contains(
         variant: Column containing the VARIANT to find.
         array: Column containing the ARRAY to search.
 
+            If this is a semi-structured array, you're required to explicitly cast the following SQL types into a VARIANT:
+
+            - `String & Binary <https://docs.snowflake.com/en/sql-reference/data-types-text>`_
+            - `Date & Time <https://docs.snowflake.com/en/sql-reference/data-types-datetime>`_
+
     Example::
         >>> from snowflake.snowpark import Row
-        >>> df = session.create_dataframe([Row([1, 2]), Row([1, 3])], schema=["a"])
-        >>> df.select(array_contains(lit(2), "a").alias("result")).show()
+        >>> df = session.create_dataframe([Row(["apple", "banana"]), Row(["apple", "orange"])], schema=["a"])
+        >>> df.select(array_contains(lit("banana").cast("variant"), "a").alias("result")).show()
         ------------
         |"RESULT"  |
         ------------
diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
@@ -5548,7 +5548,7 @@ def _groupby_first_last(
         return result
 
     @register_query_compiler_method_not_implemented(
-        "DataFrameGroupBy",
+        ["DataFrameGroupBy", "SeriesGroupBy"],
         "first",
         UnsupportedArgsRule(
             unsupported_conditions=[
@@ -5594,7 +5594,7 @@ def groupby_first(
         )
 
     @register_query_compiler_method_not_implemented(
-        "DataFrameGroupBy",
+        ["DataFrameGroupBy", "SeriesGroupBy"],
         "last",
         UnsupportedArgsRule(
             unsupported_conditions=[
@@ -5640,7 +5640,7 @@ def groupby_last(
         )
 
     @register_query_compiler_method_not_implemented(
-        "DataFrameGroupBy",
+        ["DataFrameGroupBy", "SeriesGroupBy"],
         "rank",
         UnsupportedArgsRule(
             unsupported_conditions=[
@@ -6102,7 +6102,7 @@ def groupby_rolling(
         return result_qc
 
     @register_query_compiler_method_not_implemented(
-        "DataFrameGroupBy",
+        ["DataFrameGroupBy", "SeriesGroupBy"],
         "shift",
         UnsupportedArgsRule(
             unsupported_conditions=[
@@ -7107,7 +7107,7 @@ def groupby_value_counts(
         )
 
     @register_query_compiler_method_not_implemented(
-        "DataFrameGroupBy",
+        ["DataFrameGroupBy", "SeriesGroupBy"],
         "fillna",
         UnsupportedArgsRule(
             unsupported_conditions=[
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
@@ -59,6 +59,12 @@
 )
 from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
     HYBRID_SWITCH_FOR_UNIMPLEMENTED_METHODS,
+    UnsupportedArgsRule,
+    _GROUPBY_UNSUPPORTED_GROUPING_MESSAGE,
+    register_query_compiler_method_not_implemented,
+)
+from snowflake.snowpark.modin.plugin._internal.groupby_utils import (
+    check_is_groupby_supported_by_snowflake,
 )
 from snowflake.snowpark.modin.plugin._typing import DropKeep, ListLike
 from snowflake.snowpark.modin.plugin.extensions.snow_partition_iterator import (
@@ -1549,6 +1555,22 @@ def fillna(
 
 # Snowpark pandas defines a custom GroupBy object
 @register_series_accessor("groupby")
+@register_query_compiler_method_not_implemented(
+    "Series",
+    "groupby",
+    UnsupportedArgsRule(
+        unsupported_conditions=[
+            (
+                lambda args: not check_is_groupby_supported_by_snowflake(
+                    args.get("by"),
+                    args.get("level"),
+                    args.get("axis", 0),
+                ),
+                f"Groupby {_GROUPBY_UNSUPPORTED_GROUPING_MESSAGE}",
+            )
+        ]
+    ),
+)
 def groupby(
     self,
     by=None,
diff --git a/tests/integ/modin/groupby/test_groupby_default2pandas.py b/tests/integ/modin/groupby/test_groupby_default2pandas.py
@@ -130,7 +130,7 @@ def test_groupby_with_numpy_array(basic_snowpark_pandas_df) -> None:
 @sql_count_checker(query_count=0)
 def test_groupby_series_with_numpy_array(native_series_multi_numeric, by_list) -> None:
     with pytest.raises(
-        NotImplementedError, match=GROUPBY_UNSUPPORTED_GROUPING_ERROR_PATTERN
+        NotImplementedError, match=_GROUPBY_UNSUPPORTED_GROUPING_MESSAGE
     ):
         pd.Series(native_series_multi_numeric).groupby(by=by_list).max()
 
diff --git a/tests/integ/modin/groupby/test_groupby_rolling.py b/tests/integ/modin/groupby/test_groupby_rolling.py
@@ -102,18 +102,18 @@ def test_groupby_rolling_dropna_false():
     )
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=0)
 def test_groupby_rolling_series_negative():
     date_idx = pd.date_range("1/1/2000", periods=8, freq="min")
     date_idx.names = ["grp_col"]
     snow_ser = pd.Series([1, 1, np.nan, 2])
     with pytest.raises(
         NotImplementedError,
         match=re.escape(
-            "Groupby does not yet support axis == 1, by != None and level != None, or by containing any non-pandas hashable labels"
+            "Snowpark pandas does not yet support the method GroupBy.rolling for Series"
         ),
     ):
-        snow_ser.groupby(snow_ser.index).rolling(2).sum()
+        snow_ser.groupby(level=0).rolling(2).sum()
 
 
 @pytest.mark.parametrize(
diff --git a/tests/integ/modin/hybrid/test_switch_operations.py b/tests/integ/modin/hybrid/test_switch_operations.py