Skip to content

Commit d92dee9

Browse files
SNOW-1739034: Unskip tests requiring pandas 2.2.3 in anaconda. (#2829)
pandas 2.2.3 is available in anaconda, so we can once again run the tests that try to use UD(T)Fs. Fixes SNOW-1739034 Signed-off-by: sfc-gh-mvashishtha <mahesh.vashishtha@snowflake.com>
1 parent b64e724 commit d92dee9

File tree

14 files changed

+24
-96
lines changed

14 files changed

+24
-96
lines changed

src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8435,14 +8435,11 @@ def apply(
84358435
)
84368436
return self._apply_snowpark_python_function_to_columns(func, kwargs)
84378437

8438-
# TODO SNOW-1739034: remove 'no cover' when apply tests are enabled in CI
8439-
sf_func = NUMPY_UNIVERSAL_FUNCTION_TO_SNOWFLAKE_FUNCTION.get(
8440-
func
8441-
) # pragma: no cover
8442-
if sf_func is not None: # pragma: no cover
8438+
sf_func = NUMPY_UNIVERSAL_FUNCTION_TO_SNOWFLAKE_FUNCTION.get(func)
8439+
if sf_func is not None:
84438440
return self._apply_snowpark_python_function_to_columns(sf_func, kwargs)
84448441

8445-
if get_snowflake_agg_func(func, {}, axis) is not None: # pragma: no cover
8442+
if get_snowflake_agg_func(func, {}, axis) is not None:
84468443
# np.std and np.var 'ddof' parameter defaults to 0 but
84478444
# df.std and df.var 'ddof' parameter defaults to 1.
84488445
# Set it here explicitly to 0 if not provided.
@@ -8470,7 +8467,7 @@ def apply(
84708467
# If raw, then pass numpy ndarray rather than pandas Series as input to the apply function.
84718468
if raw:
84728469

8473-
def wrapped_func(*args, **kwargs): # type: ignore[no-untyped-def] # pragma: no cover: adding type hint causes an error when creating udtf. also, skip coverage for this function because coverage tools can't tell that we're executing this function because we execute it in a UDTF.
8470+
def wrapped_func(*args, **kwargs): # type: ignore[no-untyped-def] # pragma: no cover: skip coverage for this function because coverage tools can't tell that we're executing this function because we execute it in a UDTF.
84748471
raw_input_obj = args[0].to_numpy()
84758472
args = (raw_input_obj,) + args[1:]
84768473
return func(*args, **kwargs)
@@ -8783,14 +8780,13 @@ def applymap(
87838780
)
87848781
return self._apply_snowpark_python_function_to_columns(func, kwargs)
87858782

8786-
# TODO SNOW-1739034: remove pragma no cover when apply tests are enabled in CI
8787-
# Check if the function is a known numpy function that can be translated to
8788-
# Snowflake function.
8783+
# Check if the function is a known numpy function that can be translated
8784+
# to Snowflake function.
87898785
sf_func = NUMPY_UNIVERSAL_FUNCTION_TO_SNOWFLAKE_FUNCTION.get(func)
8790-
if sf_func is not None: # pragma: no cover
8786+
if sf_func is not None:
87918787
return self._apply_snowpark_python_function_to_columns(sf_func, kwargs)
87928788

8793-
if func in (np.sum, np.min, np.max): # pragma: no cover
8789+
if func in (np.sum, np.min, np.max):
87948790
# Aggregate functions applied element-wise to columns are no-op.
87958791
return self
87968792

src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -636,7 +636,7 @@ def applymap():
636636
0 1.000 2.120
637637
1 3.356 4.567
638638
639-
>>> df.applymap(lambda x: len(str(x))) # doctest: +SKIP
639+
>>> df.applymap(lambda x: len(str(x)))
640640
0 1
641641
0 3 4
642642
1 5 5
@@ -646,7 +646,7 @@ def applymap():
646646
more efficiently by utilizing alternative dataframe operations instead of applymap.
647647
For example, You could square each number elementwise.
648648
649-
>>> df.applymap(lambda x: x**2) # doctest: +SKIP
649+
>>> df.applymap(lambda x: x**2)
650650
0 1
651651
0 1.000000 4.494400
652652
1 11.262736 20.857489
@@ -715,7 +715,6 @@ def aggregate():
715715

716716
def apply():
717717
# TODO SNOW-1818207 unskip tests once package resolution is fixed
718-
# TODO SNOW-1739034 unskip UDF tests when pandas 2.2.3 is available in anaconda
719718
"""
720719
Apply a function along an axis of the DataFrame.
721720
@@ -824,15 +823,15 @@ def apply():
824823
825824
Using a reducing function on ``axis=1``:
826825
827-
>>> df.apply(np.sum, axis=1) # doctest: +SKIP
826+
>>> df.apply(np.sum, axis=1)
828827
0 2
829828
1 10
830829
2 13
831830
dtype: int64
832831
833832
Returning a list-like object will result in a Series:
834833
835-
>>> df.apply(lambda x: [1, 2], axis=1) # doctest: +SKIP
834+
>>> df.apply(lambda x: [1, 2], axis=1)
836835
0 [1, 2]
837836
1 [1, 2]
838837
2 [1, 2]
@@ -843,7 +842,7 @@ def apply():
843842
>>> import scipy.stats
844843
>>> pd.session.custom_package_usage_config['enabled'] = True
845844
>>> pd.session.add_packages(['numpy', scipy])
846-
>>> df.apply(lambda x: np.dot(x * scipy.stats.norm.cdf(0), x * scipy.stats.norm.cdf(0)), axis=1) # doctest: +SKIP
845+
>>> df.apply(lambda x: np.dot(x * scipy.stats.norm.cdf(0), x * scipy.stats.norm.cdf(0)), axis=1)
847846
0 1.00
848847
1 14.50
849848
2 24.25
@@ -1025,7 +1024,6 @@ def keys():
10251024
"""
10261025

10271026
def transform():
1028-
# TODO SNOW-1739034 unskip UDF tests when pandas 2.2.3 is available in anaconda
10291027
"""
10301028
Call ``func`` on self producing a Snowpark pandas DataFrame with the same axis shape as self.
10311029
@@ -1059,19 +1057,19 @@ def transform():
10591057
0 1 3
10601058
1 2 4
10611059
2 3 5
1062-
>>> df.transform(lambda x: x + 1, axis=1) # doctest: +SKIP
1060+
>>> df.transform(lambda x: x + 1, axis=1)
10631061
col1 col2
10641062
0 2 4
10651063
1 3 5
10661064
2 4 6
10671065
10681066
Apply a numpy ufunc to every value in the DataFrame.
10691067
1070-
>>> df.transform(np.square, axis=1) # doctest: +SKIP
1068+
>>> df.transform(np.square, axis=1)
10711069
col1 col2
1072-
0 1 9
1073-
1 4 16
1074-
2 9 25
1070+
0 1.0 9.0
1071+
1 4.0 16.0
1072+
2 9.0 25.0
10751073
"""
10761074

10771075
def transpose():

src/snowflake/snowpark/modin/plugin/docstrings/groupby.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1057,7 +1057,6 @@ def cummax():
10571057
"""
10581058

10591059
def apply():
1060-
# TODO SNOW-1739034 unskip UDF tests when pandas 2.2.3 is available in anaconda
10611060
"""
10621061
Apply function ``func`` group-wise and combine the results together.
10631062
@@ -1119,7 +1118,7 @@ def apply():
11191118
its argument and returns a DataFrame. `apply` combines the result for
11201119
each group together into a new DataFrame:
11211120
1122-
>>> g1[['B', 'C']].apply(lambda x: x.select_dtypes('number') / x.select_dtypes('number').sum()) # doctest: +SKIP
1121+
>>> g1[['B', 'C']].apply(lambda x: x.select_dtypes('number') / x.select_dtypes('number').sum())
11231122
B C
11241123
0.0 0.333333 0.4
11251124
1.0 0.666667 0.6
@@ -1128,8 +1127,8 @@ def apply():
11281127
In the above, the groups are not part of the index. We can have them included
11291128
by using ``g2`` where ``group_keys=True``:
11301129
1131-
>>> g2[['B', 'C']].apply(lambda x: x.select_dtypes('number') / x.select_dtypes('number').sum()) # doctest: +SKIP
1132-
B C
1130+
>>> g2[['B', 'C']].apply(lambda x: x.select_dtypes('number') / x.select_dtypes('number').sum()) # doctest: +NORMALIZE_WHITESPACE
1131+
B C
11331132
A
11341133
a 0.0 0.333333 0.4
11351134
1.0 0.666667 0.6
@@ -2056,7 +2055,6 @@ def cov():
20562055
pass
20572056

20582057
def transform():
2059-
# TODO SNOW-1739034 unskip UDF tests when pandas 2.2.3 is available in anaconda
20602058
"""
20612059
Call function producing a same-indexed DataFrame on each group.
20622060
@@ -2126,7 +2124,7 @@ def transform():
21262124
i X 9 90 -9
21272125
j Y 10 10 -10
21282126
2129-
>>> df.groupby("col1", dropna=True).transform(lambda df, n: df.head(n), n=2) # doctest: +SKIP
2127+
>>> df.groupby("col1", dropna=True).transform(lambda df, n: df.head(n), n=2)
21302128
col2 col3 col4
21312129
a 1.0 40.0 -1.0
21322130
b NaN NaN NaN
@@ -2139,7 +2137,7 @@ def transform():
21392137
i NaN NaN NaN
21402138
j 10.0 10.0 -10.0
21412139
2142-
>>> df.groupby("col1", dropna=False).transform("mean") # doctest: +SKIP
2140+
>>> df.groupby("col1", dropna=False).transform("mean")
21432141
col2 col3 col4
21442142
a 2.50 25.0 -2.50
21452143
b 5.00 65.0 -5.00

tests/integ/modin/frame/test_apply.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,19 +25,13 @@
2525
create_func_with_return_type_hint,
2626
)
2727
from tests.integ.modin.utils import (
28-
PANDAS_VERSION_PREDICATE,
2928
assert_snowpark_pandas_equal_to_pandas,
3029
assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
3130
create_test_dfs,
3231
eval_snowpark_pandas_result,
3332
)
3433
from tests.integ.utils.sql_counter import SqlCounter, sql_count_checker
3534

36-
pytestmark = pytest.mark.skipif(
37-
PANDAS_VERSION_PREDICATE,
38-
reason="SNOW-1739034: tests with UDFs/sprocs cannot run without pandas 2.2.3 in Snowflake anaconda",
39-
)
40-
4135
# TODO SNOW-891796: replace native_pd with pd after allowing using snowpandas module/function in UDF
4236

4337
# test data which has a python type as return type that is not a pandas Series/pandas DataFrame/tuple/list

tests/integ/modin/frame/test_apply_axis_0.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,13 @@
1818
create_func_with_return_type_hint,
1919
)
2020
from tests.integ.modin.utils import (
21-
PANDAS_VERSION_PREDICATE,
2221
assert_snowpark_pandas_equal_to_pandas,
2322
assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
2423
create_test_dfs,
2524
eval_snowpark_pandas_result,
2625
)
2726
from tests.integ.utils.sql_counter import SqlCounter, sql_count_checker
2827

29-
pytestmark = pytest.mark.skipif(
30-
PANDAS_VERSION_PREDICATE,
31-
reason="SNOW-1739034: tests with UDFs/sprocs cannot run without pandas 2.2.3 in Snowflake anaconda",
32-
)
33-
3428
# test data which has a python type as return type that is not a pandas Series/pandas DataFrame/tuple/list
3529
BASIC_DATA_FUNC_PYTHON_RETURN_TYPE_MAP = [
3630
[[[1.0, 2.2], [3, np.nan]], np.min, "float"],

tests/integ/modin/frame/test_applymap.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,12 @@
1515
create_func_with_return_type_hint,
1616
)
1717
from tests.integ.modin.utils import (
18-
PANDAS_VERSION_PREDICATE,
1918
assert_snowpark_pandas_equal_to_pandas,
2019
create_test_dfs,
2120
eval_snowpark_pandas_result,
2221
)
2322
from tests.integ.utils.sql_counter import SqlCounter, sql_count_checker
2423

25-
pytestmark = pytest.mark.skipif(
26-
PANDAS_VERSION_PREDICATE,
27-
reason="SNOW-1739034: tests with UDFs/sprocs cannot run without pandas 2.2.3 in Snowflake anaconda",
28-
)
29-
3024

3125
@pytest.fixture(params=["applymap", "map"])
3226
def method(request):

tests/integ/modin/frame/test_cache_result.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111

1212
import snowflake.snowpark.modin.plugin # noqa: F401
1313
from tests.integ.modin.utils import (
14-
PANDAS_VERSION_PREDICATE,
1514
assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
1615
create_test_dfs,
1716
)
@@ -177,10 +176,6 @@ def test_cache_result_post_pivot(self, inplace, simple_test_data):
177176
cached_snow_df, native_df
178177
)
179178

180-
@pytest.mark.skipif(
181-
PANDAS_VERSION_PREDICATE,
182-
reason="SNOW-1739034: tests with UDFs/sprocs cannot run without pandas 2.2.3 in Snowflake anaconda",
183-
)
184179
def test_cache_result_post_apply(self, inplace, simple_test_data):
185180
# In this test, the caching doesn't aid in the query counts since
186181
# the implementation of apply(axis=1) itself contains intermediate
@@ -209,10 +204,6 @@ def test_cache_result_post_apply(self, inplace, simple_test_data):
209204
native_df,
210205
)
211206

212-
@pytest.mark.skipif(
213-
PANDAS_VERSION_PREDICATE,
214-
reason="SNOW-1739034: tests with UDFs/sprocs cannot run without pandas 2.2.3 in Snowflake anaconda",
215-
)
216207
def test_cache_result_post_applymap(self, inplace, simple_test_data):
217208
# The high query counts in this test case come from the setup and definition
218209
# of the UDFs used.

tests/integ/modin/groupby/test_all_any.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
import snowflake.snowpark.modin.plugin # noqa: F401
1515
from snowflake.snowpark.exceptions import SnowparkSQLException
1616
from tests.integ.modin.utils import (
17-
PANDAS_VERSION_PREDICATE,
1817
assert_frame_equal,
1918
create_test_dfs,
2019
eval_snowpark_pandas_result as _eval_snowpark_pandas_result,
@@ -99,10 +98,6 @@ def test_all_any_invalid_types(data, msg):
9998
pd.DataFrame(data).groupby("by").any().to_pandas()
10099

101100

102-
@pytest.mark.skipif(
103-
PANDAS_VERSION_PREDICATE,
104-
reason="SNOW-1739034: tests with UDFs/sprocs cannot run without pandas 2.2.3 in Snowflake anaconda",
105-
)
106101
@sql_count_checker(query_count=5, join_count=1, udtf_count=1)
107102
def test_all_any_chained():
108103
data = {

tests/integ/modin/groupby/test_groupby_apply.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
from snowflake.snowpark.exceptions import SnowparkSQLException
1919
from snowflake.snowpark.modin.plugin.extensions.utils import try_convert_index_to_native
2020
from tests.integ.modin.utils import (
21-
PANDAS_VERSION_PREDICATE,
2221
assert_snowpark_pandas_equal_to_pandas,
2322
assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
2423
assert_values_equal,
@@ -28,11 +27,6 @@
2827
)
2928
from tests.integ.utils.sql_counter import SqlCounter, sql_count_checker
3029

31-
pytestmark = pytest.mark.skipif(
32-
PANDAS_VERSION_PREDICATE,
33-
reason="SNOW-1739034: tests with UDFs/sprocs cannot run without pandas 2.2.3 in Snowflake anaconda",
34-
)
35-
3630
# Use the workaround shown below for applying functions that are attributes
3731
# of this module.
3832
# https://github.com/cloudpipe/cloudpickle?tab=readme-ov-file#overriding-pickles-serialization-mechanism-for-importable-constructs

tests/integ/modin/groupby/test_groupby_transform.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,11 @@
1010

1111
import snowflake.snowpark.modin.plugin # noqa: F401
1212
from tests.integ.modin.utils import (
13-
PANDAS_VERSION_PREDICATE,
1413
create_test_dfs,
1514
eval_snowpark_pandas_result as _eval_snowpark_pandas_result,
1615
)
1716
from tests.integ.utils.sql_counter import SqlCounter, sql_count_checker
1817

19-
pytestmark = pytest.mark.skipif(
20-
PANDAS_VERSION_PREDICATE,
21-
reason="SNOW-1739034: tests with UDFs/sprocs cannot run without pandas 2.2.3 in Snowflake anaconda",
22-
)
23-
2418

2519
def eval_snowpark_pandas_result(*args, **kwargs):
2620
# Some calls to the native pandas function propagate attrs while some do not, depending on the values of its arguments.

0 commit comments

Comments
 (0)