SNOW-1739034: Unskip tests requiring pandas 2.2.3 in anaconda. (#2829)

sfc-gh-mvashishtha · web-flow · commit d92dee95156b · 2025-01-08T00:19:54.000Z
pandas 2.2.3 is available in anaconda, so we can once again run the tests that try to use UD(T)Fs. Fixes SNOW-1739034 Signed-off-by: sfc-gh-mvashishtha <mahesh.vashishtha@snowflake.com>
diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
@@ -8435,14 +8435,11 @@ def apply(
                 )
             return self._apply_snowpark_python_function_to_columns(func, kwargs)
 
-        # TODO SNOW-1739034: remove 'no cover' when apply tests are enabled in CI
-        sf_func = NUMPY_UNIVERSAL_FUNCTION_TO_SNOWFLAKE_FUNCTION.get(
-            func
-        )  # pragma: no cover
-        if sf_func is not None:  # pragma: no cover
+        sf_func = NUMPY_UNIVERSAL_FUNCTION_TO_SNOWFLAKE_FUNCTION.get(func)
+        if sf_func is not None:
             return self._apply_snowpark_python_function_to_columns(sf_func, kwargs)
 
-        if get_snowflake_agg_func(func, {}, axis) is not None:  # pragma: no cover
+        if get_snowflake_agg_func(func, {}, axis) is not None:
             # np.std and np.var 'ddof' parameter defaults to 0 but
             # df.std and df.var 'ddof' parameter defaults to 1.
             # Set it here explicitly to 0 if not provided.
@@ -8470,7 +8467,7 @@ def apply(
             # If raw, then pass numpy ndarray rather than pandas Series as input to the apply function.
             if raw:
 
-                def wrapped_func(*args, **kwargs):  # type: ignore[no-untyped-def] # pragma: no cover: adding type hint causes an error when creating udtf. also, skip coverage for this function because coverage tools can't tell that we're executing this function because we execute it in a UDTF.
+                def wrapped_func(*args, **kwargs):  # type: ignore[no-untyped-def] # pragma: no cover: skip coverage for this function because coverage tools can't tell that we're executing this function because we execute it in a UDTF.
                     raw_input_obj = args[0].to_numpy()
                     args = (raw_input_obj,) + args[1:]
                     return func(*args, **kwargs)
@@ -8783,14 +8780,13 @@ def applymap(
                 )
             return self._apply_snowpark_python_function_to_columns(func, kwargs)
 
-        # TODO SNOW-1739034: remove pragma no cover when apply tests are enabled in CI
-        # Check if the function is a known numpy function that can be translated to
-        # Snowflake function.
+        # Check if the function is a known numpy function that can be translated
+        # to Snowflake function.
         sf_func = NUMPY_UNIVERSAL_FUNCTION_TO_SNOWFLAKE_FUNCTION.get(func)
-        if sf_func is not None:  # pragma: no cover
+        if sf_func is not None:
             return self._apply_snowpark_python_function_to_columns(sf_func, kwargs)
 
-        if func in (np.sum, np.min, np.max):  # pragma: no cover
+        if func in (np.sum, np.min, np.max):
             # Aggregate functions applied element-wise to columns are no-op.
             return self
 
diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py b/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py
@@ -636,7 +636,7 @@ def applymap():
         0  1.000  2.120
         1  3.356  4.567
 
-        >>> df.applymap(lambda x: len(str(x)))  # doctest: +SKIP
+        >>> df.applymap(lambda x: len(str(x)))
            0  1
         0  3  4
         1  5  5
@@ -646,7 +646,7 @@ def applymap():
         more efficiently by utilizing alternative dataframe operations instead of applymap.
         For example, You could square each number elementwise.
 
-        >>> df.applymap(lambda x: x**2)  # doctest: +SKIP
+        >>> df.applymap(lambda x: x**2)
                    0          1
         0   1.000000   4.494400
         1  11.262736  20.857489
@@ -715,7 +715,6 @@ def aggregate():
 
     def apply():
         # TODO SNOW-1818207 unskip tests once package resolution is fixed
-        # TODO SNOW-1739034 unskip UDF tests when pandas 2.2.3 is available in anaconda
         """
         Apply a function along an axis of the DataFrame.
 
@@ -824,15 +823,15 @@ def apply():
 
         Using a reducing function on ``axis=1``:
 
-        >>> df.apply(np.sum, axis=1)  # doctest: +SKIP
+        >>> df.apply(np.sum, axis=1)
         0     2
         1    10
         2    13
         dtype: int64
 
         Returning a list-like object will result in a Series:
 
-        >>> df.apply(lambda x: [1, 2], axis=1)  # doctest: +SKIP
+        >>> df.apply(lambda x: [1, 2], axis=1)
         0    [1, 2]
         1    [1, 2]
         2    [1, 2]
@@ -843,7 +842,7 @@ def apply():
         >>> import scipy.stats
         >>> pd.session.custom_package_usage_config['enabled'] = True
         >>> pd.session.add_packages(['numpy', scipy])
-        >>> df.apply(lambda x: np.dot(x * scipy.stats.norm.cdf(0), x * scipy.stats.norm.cdf(0)), axis=1)  # doctest: +SKIP
+        >>> df.apply(lambda x: np.dot(x * scipy.stats.norm.cdf(0), x * scipy.stats.norm.cdf(0)), axis=1)
         0     1.00
         1    14.50
         2    24.25
@@ -1025,7 +1024,6 @@ def keys():
         """
 
     def transform():
-        # TODO SNOW-1739034 unskip UDF tests when pandas 2.2.3 is available in anaconda
         """
         Call ``func`` on self producing a Snowpark pandas DataFrame with the same axis shape as self.
 
@@ -1059,19 +1057,19 @@ def transform():
         0     1     3
         1     2     4
         2     3     5
-        >>> df.transform(lambda x: x + 1, axis=1)  # doctest: +SKIP
+        >>> df.transform(lambda x: x + 1, axis=1)
            col1  col2
         0     2     4
         1     3     5
         2     4     6
 
         Apply a numpy ufunc to every value in the DataFrame.
 
-        >>> df.transform(np.square, axis=1)  # doctest: +SKIP
+        >>> df.transform(np.square, axis=1)
            col1  col2
-        0     1     9
-        1     4    16
-        2     9    25
+        0   1.0   9.0
+        1   4.0  16.0
+        2   9.0  25.0
         """
 
     def transpose():
diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/groupby.py b/src/snowflake/snowpark/modin/plugin/docstrings/groupby.py
@@ -1057,7 +1057,6 @@ def cummax():
         """
 
     def apply():
-        # TODO SNOW-1739034 unskip UDF tests when pandas 2.2.3 is available in anaconda
         """
         Apply function ``func`` group-wise and combine the results together.
 
@@ -1119,7 +1118,7 @@ def apply():
         its argument and returns a DataFrame. `apply` combines the result for
         each group together into a new DataFrame:
 
-        >>> g1[['B', 'C']].apply(lambda x: x.select_dtypes('number') / x.select_dtypes('number').sum()) # doctest: +SKIP
+        >>> g1[['B', 'C']].apply(lambda x: x.select_dtypes('number') / x.select_dtypes('number').sum())
                     B    C
         0.0  0.333333  0.4
         1.0  0.666667  0.6
@@ -1128,8 +1127,8 @@ def apply():
         In the above, the groups are not part of the index. We can have them included
         by using ``g2`` where ``group_keys=True``:
 
-        >>> g2[['B', 'C']].apply(lambda x: x.select_dtypes('number') / x.select_dtypes('number').sum()) # doctest: +SKIP
-                    B    C
+        >>> g2[['B', 'C']].apply(lambda x: x.select_dtypes('number') / x.select_dtypes('number').sum()) # doctest: +NORMALIZE_WHITESPACE
+                      B    C
         A
         a 0.0  0.333333  0.4
           1.0  0.666667  0.6
@@ -2056,7 +2055,6 @@ def cov():
         pass
 
     def transform():
-        # TODO SNOW-1739034 unskip UDF tests when pandas 2.2.3 is available in anaconda
         """
         Call function producing a same-indexed DataFrame on each group.
 
@@ -2126,7 +2124,7 @@ def transform():
         i     X     9    90    -9
         j     Y    10    10   -10
 
-        >>> df.groupby("col1", dropna=True).transform(lambda df, n: df.head(n), n=2)  # doctest: +SKIP
+        >>> df.groupby("col1", dropna=True).transform(lambda df, n: df.head(n), n=2)
            col2  col3  col4
         a   1.0  40.0  -1.0
         b   NaN   NaN   NaN
@@ -2139,7 +2137,7 @@ def transform():
         i   NaN   NaN   NaN
         j  10.0  10.0 -10.0
 
-        >>> df.groupby("col1", dropna=False).transform("mean")  # doctest: +SKIP
+        >>> df.groupby("col1", dropna=False).transform("mean")
            col2  col3  col4
         a  2.50  25.0 -2.50
         b  5.00  65.0 -5.00
diff --git a/tests/integ/modin/frame/test_apply.py b/tests/integ/modin/frame/test_apply.py
@@ -25,19 +25,13 @@
     create_func_with_return_type_hint,
 )
 from tests.integ.modin.utils import (
-    PANDAS_VERSION_PREDICATE,
     assert_snowpark_pandas_equal_to_pandas,
     assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
     create_test_dfs,
     eval_snowpark_pandas_result,
 )
 from tests.integ.utils.sql_counter import SqlCounter, sql_count_checker
 
-pytestmark = pytest.mark.skipif(
-    PANDAS_VERSION_PREDICATE,
-    reason="SNOW-1739034: tests with UDFs/sprocs cannot run without pandas 2.2.3 in Snowflake anaconda",
-)
-
 # TODO SNOW-891796: replace native_pd with pd after allowing using snowpandas module/function in UDF
 
 # test data which has a python type as return type that is not a pandas Series/pandas DataFrame/tuple/list
diff --git a/tests/integ/modin/frame/test_apply_axis_0.py b/tests/integ/modin/frame/test_apply_axis_0.py
@@ -18,19 +18,13 @@
     create_func_with_return_type_hint,
 )
 from tests.integ.modin.utils import (
-    PANDAS_VERSION_PREDICATE,
     assert_snowpark_pandas_equal_to_pandas,
     assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
     create_test_dfs,
     eval_snowpark_pandas_result,
 )
 from tests.integ.utils.sql_counter import SqlCounter, sql_count_checker
 
-pytestmark = pytest.mark.skipif(
-    PANDAS_VERSION_PREDICATE,
-    reason="SNOW-1739034: tests with UDFs/sprocs cannot run without pandas 2.2.3 in Snowflake anaconda",
-)
-
 # test data which has a python type as return type that is not a pandas Series/pandas DataFrame/tuple/list
 BASIC_DATA_FUNC_PYTHON_RETURN_TYPE_MAP = [
     [[[1.0, 2.2], [3, np.nan]], np.min, "float"],
diff --git a/tests/integ/modin/frame/test_applymap.py b/tests/integ/modin/frame/test_applymap.py
@@ -15,18 +15,12 @@
     create_func_with_return_type_hint,
 )
 from tests.integ.modin.utils import (
-    PANDAS_VERSION_PREDICATE,
     assert_snowpark_pandas_equal_to_pandas,
     create_test_dfs,
     eval_snowpark_pandas_result,
 )
 from tests.integ.utils.sql_counter import SqlCounter, sql_count_checker
 
-pytestmark = pytest.mark.skipif(
-    PANDAS_VERSION_PREDICATE,
-    reason="SNOW-1739034: tests with UDFs/sprocs cannot run without pandas 2.2.3 in Snowflake anaconda",
-)
-
 
 @pytest.fixture(params=["applymap", "map"])
 def method(request):
diff --git a/tests/integ/modin/frame/test_cache_result.py b/tests/integ/modin/frame/test_cache_result.py
@@ -11,7 +11,6 @@
 
 import snowflake.snowpark.modin.plugin  # noqa: F401
 from tests.integ.modin.utils import (
-    PANDAS_VERSION_PREDICATE,
     assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
     create_test_dfs,
 )
@@ -177,10 +176,6 @@ def test_cache_result_post_pivot(self, inplace, simple_test_data):
                 cached_snow_df, native_df
             )
 
-    @pytest.mark.skipif(
-        PANDAS_VERSION_PREDICATE,
-        reason="SNOW-1739034: tests with UDFs/sprocs cannot run without pandas 2.2.3 in Snowflake anaconda",
-    )
     def test_cache_result_post_apply(self, inplace, simple_test_data):
         # In this test, the caching doesn't aid in the query counts since
         # the implementation of apply(axis=1) itself contains intermediate
@@ -209,10 +204,6 @@ def test_cache_result_post_apply(self, inplace, simple_test_data):
                 native_df,
             )
 
-    @pytest.mark.skipif(
-        PANDAS_VERSION_PREDICATE,
-        reason="SNOW-1739034: tests with UDFs/sprocs cannot run without pandas 2.2.3 in Snowflake anaconda",
-    )
     def test_cache_result_post_applymap(self, inplace, simple_test_data):
         # The high query counts in this test case come from the setup and definition
         # of the UDFs used.
diff --git a/tests/integ/modin/groupby/test_all_any.py b/tests/integ/modin/groupby/test_all_any.py
@@ -14,7 +14,6 @@
 import snowflake.snowpark.modin.plugin  # noqa: F401
 from snowflake.snowpark.exceptions import SnowparkSQLException
 from tests.integ.modin.utils import (
-    PANDAS_VERSION_PREDICATE,
     assert_frame_equal,
     create_test_dfs,
     eval_snowpark_pandas_result as _eval_snowpark_pandas_result,
@@ -99,10 +98,6 @@ def test_all_any_invalid_types(data, msg):
         pd.DataFrame(data).groupby("by").any().to_pandas()
 
 
-@pytest.mark.skipif(
-    PANDAS_VERSION_PREDICATE,
-    reason="SNOW-1739034: tests with UDFs/sprocs cannot run without pandas 2.2.3 in Snowflake anaconda",
-)
 @sql_count_checker(query_count=5, join_count=1, udtf_count=1)
 def test_all_any_chained():
     data = {
diff --git a/tests/integ/modin/groupby/test_groupby_apply.py b/tests/integ/modin/groupby/test_groupby_apply.py
@@ -18,7 +18,6 @@
 from snowflake.snowpark.exceptions import SnowparkSQLException
 from snowflake.snowpark.modin.plugin.extensions.utils import try_convert_index_to_native
 from tests.integ.modin.utils import (
-    PANDAS_VERSION_PREDICATE,
     assert_snowpark_pandas_equal_to_pandas,
     assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
     assert_values_equal,
@@ -28,11 +27,6 @@
 )
 from tests.integ.utils.sql_counter import SqlCounter, sql_count_checker
 
-pytestmark = pytest.mark.skipif(
-    PANDAS_VERSION_PREDICATE,
-    reason="SNOW-1739034: tests with UDFs/sprocs cannot run without pandas 2.2.3 in Snowflake anaconda",
-)
-
 # Use the workaround shown below for applying functions that are attributes
 # of this module.
 # https://github.com/cloudpipe/cloudpickle?tab=readme-ov-file#overriding-pickles-serialization-mechanism-for-importable-constructs
diff --git a/tests/integ/modin/groupby/test_groupby_transform.py b/tests/integ/modin/groupby/test_groupby_transform.py
@@ -10,17 +10,11 @@
 
 import snowflake.snowpark.modin.plugin  # noqa: F401
 from tests.integ.modin.utils import (
-    PANDAS_VERSION_PREDICATE,
     create_test_dfs,
     eval_snowpark_pandas_result as _eval_snowpark_pandas_result,
 )
 from tests.integ.utils.sql_counter import SqlCounter, sql_count_checker
 
-pytestmark = pytest.mark.skipif(
-    PANDAS_VERSION_PREDICATE,
-    reason="SNOW-1739034: tests with UDFs/sprocs cannot run without pandas 2.2.3 in Snowflake anaconda",
-)
-
 
 def eval_snowpark_pandas_result(*args, **kwargs):
     # Some calls to the native pandas function propagate attrs while some do not, depending on the values of its arguments.
diff --git a/tests/integ/modin/series/test_apply_and_map.py b/tests/integ/modin/series/test_apply_and_map.py
@@ -23,7 +23,6 @@
 from snowflake.snowpark.functions import udf
 from snowflake.snowpark.types import DoubleType, StringType, VariantType
 from tests.integ.modin.utils import (
-    PANDAS_VERSION_PREDICATE,
     ColumnSchema,
     assert_snowpark_pandas_equal_to_pandas,
     assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
@@ -33,11 +32,6 @@
 )
 from tests.integ.utils.sql_counter import SqlCounter, sql_count_checker
 
-pytestmark = pytest.mark.skipif(
-    PANDAS_VERSION_PREDICATE,
-    reason="SNOW-1739034: tests with UDFs/sprocs cannot run without pandas 2.2.3 in Snowflake anaconda",
-)
-
 BASIC_DATA_FUNC_RETURN_TYPE_MAP = [
     ([1, 2, 3, None], lambda x: x + 1, "int"),
     param(
diff --git a/tests/integ/modin/test_session.py b/tests/integ/modin/test_session.py
@@ -17,7 +17,6 @@
     _remove_session,
 )
 from tests.integ.modin.utils import (
-    PANDAS_VERSION_PREDICATE,
     create_test_dfs,
     eval_snowpark_pandas_result,
 )
@@ -216,10 +215,6 @@ def test_snowpark_pandas_session_class_does_not_exist_snow_1022098():
         pd.Session
 
 
-@pytest.mark.skipif(
-    PANDAS_VERSION_PREDICATE,
-    reason="SNOW-1739034: tests with UDFs/sprocs cannot run without pandas 2.2.3 in Snowflake anaconda",
-)
 @pytest.mark.parametrize(
     "operation",
     [
diff --git a/tests/integ/modin/test_sql_counter.py b/tests/integ/modin/test_sql_counter.py
@@ -8,7 +8,7 @@
 
 import snowflake.snowpark.modin.plugin  # noqa: F401
 from snowflake.snowpark import QueryRecord
-from tests.integ.modin.utils import PANDAS_VERSION_PREDICATE, assert_frame_equal
+from tests.integ.modin.utils import assert_frame_equal
 from tests.integ.utils.sql_counter import SqlCounter, sql_count_checker
 
 
@@ -126,10 +126,6 @@ def test_sql_counter_with_fallback_count():
     assert len(df) == 3
 
 
-@pytest.mark.skipif(
-    PANDAS_VERSION_PREDICATE,
-    reason="SNOW-1739034: tests with UDFs/sprocs cannot run without pandas 2.2.3 in Snowflake anaconda",
-)
 @sql_count_checker(query_count=5, join_count=2, udtf_count=1)
 def test_sql_counter_with_df_udtf_count():
     df = pd.DataFrame([[1, 2], [3, 4]]).apply(lambda x: str(type(x)), axis=1, raw=True)
diff --git a/tests/integ/modin/utils.py b/tests/integ/modin/utils.py