Skip to content

Commit ac482f5

Browse files
SNOW-1856438: Directly support Snowflake Cortex functions Summarize and Sentiment with apply (#2943)
Signed-off-by: Labanya Mukhopadhyay <labanya.mukhopadhyay@snowflake.com>
1 parent 1ad799d commit ac482f5

File tree

7 files changed

+248
-54
lines changed

7 files changed

+248
-54
lines changed

CHANGELOG.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,23 @@
44

55
### Snowpark Python API Updates
66

7+
#### Deprecations:
8+
9+
- Deprecated Snowpark Python function `snowflake_cortex_summarize`. Users can install snowflake-ml-python and use the snowflake.cortex.summarize function instead.
10+
- Deprecated Snowpark Python function `snowflake_cortex_sentiment`. Users can install snowflake-ml-python and use the snowflake.cortex.sentiment function instead.
11+
712
#### New Features
813

914
- Added support for the following functions in `functions.py`
1015
- `normal`
1116
- `randn`
1217

18+
### Snowpark pandas API Updates
19+
20+
#### New Features
21+
22+
- Added support for applying Snowflake Cortex functions `Summarize` and `Sentiment`.
23+
1324
## 1.27.0 (2025-02-03)
1425

1526
### Snowpark Python API Updates

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ def run(self):
202202
"scikit-learn", # Snowpark pandas 3rd party library testing
203203
# plotly version restricted due to foreseen change in query counts in version 6.0.0+
204204
"plotly<6.0.0", # Snowpark pandas 3rd party library testing
205+
"snowflake-ml-python",
205206
],
206207
"localtest": [
207208
"pandas",

src/snowflake/snowpark/functions.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@
212212
publicapi,
213213
validate_object_name,
214214
check_create_map_parameter,
215+
deprecated,
215216
)
216217
from snowflake.snowpark.column import (
217218
CaseExpr,
@@ -10779,13 +10780,16 @@ def make_interval(
1077910780
return res
1078010781

1078110782

10783+
@deprecated(
10784+
version="1.28.0",
10785+
extra_warning_text="Please consider installing snowflake-ml-python and using `snowflake.cortex.summarize` instead.",
10786+
extra_doc_string="Use :meth:`snowflake.cortex.summarize` instead.",
10787+
)
1078210788
def snowflake_cortex_summarize(text: ColumnOrLiteralStr):
1078310789
"""
1078410790
Summarizes the given English-language input text.
10785-
1078610791
Args:
1078710792
text: A string containing the English text from which a summary should be generated.
10788-
1078910793
Returns:
1079010794
A string containing a summary of the original text.
1079110795
"""
@@ -10794,10 +10798,14 @@ def snowflake_cortex_summarize(text: ColumnOrLiteralStr):
1079410798
return builtin(sql_func_name)(text_col)
1079510799

1079610800

10801+
@deprecated(
10802+
version="1.28.0",
10803+
extra_warning_text="Please consider installing snowflake-ml-python and using `snowflake.cortex.sentiment` instead.",
10804+
extra_doc_string="Use :meth:`snowflake.cortex.sentiment` instead.",
10805+
)
1079710806
def snowflake_cortex_sentiment(text: ColumnOrLiteralStr):
1079810807
"""
1079910808
A string containing the text for which a sentiment score should be calculated.
10800-
1080110809
Args:
1080210810
text: A string containing the English text from which a summary should be generated.
1080310811
Returns:

src/snowflake/snowpark/modin/plugin/_internal/apply_utils.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,10 +97,23 @@
9797
sp_func.floor,
9898
sp_func.trunc,
9999
sp_func.sqrt,
100-
sp_func.snowflake_cortex_summarize,
101-
sp_func.snowflake_cortex_sentiment,
102100
}
103101

102+
try:
103+
import snowflake.cortex
104+
105+
SUPPORTED_SNOWFLAKE_CORTEX_FUNCTIONS_IN_APPLY = {
106+
snowflake.cortex.Summarize,
107+
snowflake.cortex.Sentiment,
108+
}
109+
110+
ALL_SNOWFLAKE_CORTEX_FUNCTIONS = tuple(
111+
i[1] for i in inspect.getmembers(snowflake.cortex)
112+
)
113+
except ImportError:
114+
SUPPORTED_SNOWFLAKE_CORTEX_FUNCTIONS_IN_APPLY = set()
115+
ALL_SNOWFLAKE_CORTEX_FUNCTIONS = tuple()
116+
104117

105118
class GroupbyApplySortMethod(Enum):
106119
"""

src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,8 @@
202202
is_supported_snowpark_python_function,
203203
sort_apply_udtf_result_columns_by_pandas_positions,
204204
make_series_map_snowpark_function,
205+
SUPPORTED_SNOWFLAKE_CORTEX_FUNCTIONS_IN_APPLY,
206+
ALL_SNOWFLAKE_CORTEX_FUNCTIONS,
205207
)
206208
from collections import defaultdict
207209
from snowflake.snowpark.modin.plugin._internal.binary_op_utils import (
@@ -8471,6 +8473,29 @@ def apply(
84718473
)
84728474
return self._apply_snowpark_python_function_to_columns(func, kwargs)
84738475

8476+
if func in SUPPORTED_SNOWFLAKE_CORTEX_FUNCTIONS_IN_APPLY:
8477+
if axis != 0:
8478+
ErrorMessage.not_implemented(
8479+
f"Snowpark pandas apply API doesn't yet support Snowflake Cortex function `{func.__name__}` with with axis = {axis}.'"
8480+
)
8481+
if raw is not False:
8482+
ErrorMessage.not_implemented(
8483+
f"Snowpark pandas apply API doesn't yet support Snowflake Cortex function `{func.__name__}`with raw = {raw}."
8484+
)
8485+
if args:
8486+
ErrorMessage.not_implemented(
8487+
f"Snowpark pandas apply API doesn't yet support Snowflake Cortex function `{func.__name__}` with args == '{args}'"
8488+
)
8489+
if kwargs:
8490+
ErrorMessage.not_implemented(
8491+
f"Snowpark pandas apply API doesn't yet support Snowflake Cortex function `{func.__name__}` with kwargs == '{kwargs}'"
8492+
)
8493+
return self._apply_snowflake_cortex_function_to_columns(func)
8494+
elif func in ALL_SNOWFLAKE_CORTEX_FUNCTIONS:
8495+
ErrorMessage.not_implemented(
8496+
f"Snowpark pandas apply API doesn't yet support Snowflake Cortex function `{func.__name__}`"
8497+
)
8498+
84748499
sf_func = NUMPY_UNIVERSAL_FUNCTION_TO_SNOWFLAKE_FUNCTION.get(func)
84758500
if sf_func is not None:
84768501
return self._apply_snowpark_python_function_to_columns(sf_func, kwargs)
@@ -8786,6 +8811,22 @@ def sf_function(col: SnowparkColumn) -> SnowparkColumn:
87868811
self._modin_frame.apply_snowpark_function_to_columns(sf_function)
87878812
)
87888813

8814+
def _apply_snowflake_cortex_function_to_columns(
8815+
self,
8816+
snowflake_function: Callable,
8817+
) -> "SnowflakeQueryCompiler":
8818+
"""Apply Snowflake Cortex function to columns."""
8819+
8820+
def sf_function(col: SnowparkColumn) -> SnowparkColumn:
8821+
resolved_positional = []
8822+
resolved_positional.append(col)
8823+
8824+
return snowflake_function(*resolved_positional)
8825+
8826+
return SnowflakeQueryCompiler(
8827+
self._modin_frame.apply_snowpark_function_to_columns(sf_function)
8828+
)
8829+
87898830
def applymap(
87908831
self,
87918832
func: AggFuncType,
@@ -8819,6 +8860,25 @@ def applymap(
88198860
)
88208861
return self._apply_snowpark_python_function_to_columns(func, kwargs)
88218862

8863+
if func in SUPPORTED_SNOWFLAKE_CORTEX_FUNCTIONS_IN_APPLY:
8864+
if na_action:
8865+
ErrorMessage.not_implemented(
8866+
f"Snowpark pandas applymap API doesn't yet support Snowflake Cortex function `{func.__name__}` with na_action == '{na_action}'"
8867+
)
8868+
if args:
8869+
ErrorMessage.not_implemented(
8870+
f"Snowpark pandas applymap API doesn't yet support Snowflake Cortex function `{func.__name__}` with args == '{args}'"
8871+
)
8872+
if kwargs:
8873+
ErrorMessage.not_implemented(
8874+
f"Snowpark pandas applymap API doesn't yet support Snowflake Cortex function `{func.__name__}` with kwargs == '{kwargs}'"
8875+
)
8876+
return self._apply_snowflake_cortex_function_to_columns(func)
8877+
elif func in ALL_SNOWFLAKE_CORTEX_FUNCTIONS:
8878+
ErrorMessage.not_implemented(
8879+
f"Snowpark pandas apply API doesn't yet support Snowflake Cortex function `{func.__name__}`"
8880+
)
8881+
88228882
# Check if the function is a known numpy function that can be translated
88238883
# to Snowflake function.
88248884
sf_func = NUMPY_UNIVERSAL_FUNCTION_TO_SNOWFLAKE_FUNCTION.get(func)
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
#
2+
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3+
#
4+
5+
import modin.pandas as pd
6+
import pytest
7+
from pytest import param
8+
9+
10+
from tests.integ.utils.sql_counter import SqlCounter, sql_count_checker
11+
from tests.utils import running_on_jenkins
12+
from snowflake.cortex import Sentiment, Summarize, Translate
13+
14+
15+
@pytest.mark.skipif(
16+
running_on_jenkins(),
17+
reason="TODO: SNOW-1859087 snowflake.cortex.summarize SSL error",
18+
)
19+
def test_apply_snowflake_cortex_summarize(session):
20+
21+
# TODO: SNOW-1758914 snowflake.cortex.summarize error on GCP
22+
with SqlCounter(query_count=0):
23+
if session.connection.host == "sfctest0.us-central1.gcp.snowflakecomputing.com":
24+
return
25+
26+
with SqlCounter(query_count=1):
27+
content = """pandas on Snowflake lets you run your pandas code in a distributed manner directly on your data in
28+
Snowflake. Just by changing the import statement and a few lines of code, you can get the familiar pandas experience
29+
you know and love with the scalability and security benefits of Snowflake. With pandas on Snowflake, you can work
30+
with much larger datasets and avoid the time and expense of porting your pandas pipelines to other big data
31+
frameworks or provisioning large and expensive machines. It runs workloads natively in Snowflake through
32+
transpilation to SQL, enabling it to take advantage of parallelization and the data governance and security
33+
benefits of Snowflake. pandas on Snowflake is delivered through the Snowpark pandas API as part of the Snowpark
34+
Python library, which enables scalable data processing of Python code within the Snowflake platform.
35+
"""
36+
s = pd.Series([content])
37+
summary = s.apply(Summarize).iloc[0]
38+
# this length check is to get around the fact that this function may not be deterministic
39+
assert 0 < len(summary) < len(content)
40+
41+
42+
@pytest.mark.skipif(
43+
running_on_jenkins(),
44+
reason="TODO: SNOW-1859087 snowflake.cortex.sentiment SSL error",
45+
)
46+
def test_apply_snowflake_cortex_sentiment_series(session):
47+
48+
# TODO: SNOW-1758914 snowflake.cortex.sentiment error on GCP
49+
with SqlCounter(query_count=0):
50+
if session.connection.host == "sfctest0.us-central1.gcp.snowflakecomputing.com":
51+
return
52+
53+
with SqlCounter(query_count=1):
54+
content = "A very very bad review!"
55+
s = pd.Series([content])
56+
sentiment = s.apply(Sentiment).iloc[0]
57+
assert -1 <= sentiment <= 0
58+
59+
60+
def test_apply_snowflake_cortex_sentiment_df(session):
61+
62+
# TODO: SNOW-1758914 snowflake.cortex.sentiment error on GCP
63+
with SqlCounter(query_count=0):
64+
if session.connection.host == "sfctest0.us-central1.gcp.snowflakecomputing.com":
65+
return
66+
text_list = [
67+
"A first row of text.",
68+
"This is a very bad test.",
69+
"This is the best test ever.",
70+
]
71+
72+
content_frame = pd.DataFrame(text_list, columns=["content"])
73+
with SqlCounter(query_count=4):
74+
res = content_frame.apply(Sentiment)
75+
sent_row_2 = res["content"][1]
76+
sent_row_3 = res["content"][2]
77+
assert -1 <= sent_row_2 <= 0
78+
assert 0 <= sent_row_3 <= 1
79+
80+
81+
@pytest.mark.skipif(
82+
running_on_jenkins(),
83+
reason="TODO: SNOW-1859087 snowflake.cortex.sentiment SSL error",
84+
)
85+
@sql_count_checker(query_count=0)
86+
@pytest.mark.parametrize(
87+
"is_series, operation",
88+
[
89+
param(
90+
True,
91+
(lambda s: s.apply(Translate, source_language="en", target_language="de")),
92+
id="series_cortex_unsupported_function_translate",
93+
),
94+
param(
95+
False,
96+
(
97+
lambda df: df.apply(
98+
Translate, source_language="en", target_language="de"
99+
)
100+
),
101+
id="df_cortex_unsupported_function_translate",
102+
),
103+
param(
104+
True,
105+
(lambda s: s.apply(Sentiment, args=("hello",))),
106+
id="series_cortex_unsupported_args",
107+
),
108+
param(
109+
False,
110+
(lambda df: df.apply(Sentiment, args=("hello",))),
111+
id="df_cortex_unsupported_args",
112+
),
113+
param(
114+
True,
115+
(lambda s: s.apply(Sentiment, extra="hello")),
116+
id="series_cortex_unsupported_kwargs",
117+
),
118+
param(
119+
False,
120+
(lambda df: df.apply(Sentiment, extra="hello")),
121+
id="df_cortex_unsupported_kwargs",
122+
),
123+
param(
124+
True,
125+
(lambda s: s.apply(Sentiment, na_action="ignore")),
126+
id="series_cortex_unsupported_na_action",
127+
),
128+
param(
129+
False,
130+
(lambda df: df.apply(Sentiment, raw=True)),
131+
id="df_cortex_unsupported_raw",
132+
),
133+
param(
134+
False,
135+
(lambda df: df.apply(Sentiment, axis=1)),
136+
id="df_cortex_unsupported_axis_1",
137+
),
138+
],
139+
)
140+
def test_apply_snowflake_cortex_negative(session, is_series, operation):
141+
142+
# TODO: SNOW-1758914 snowflake.cortex.sentiment error on GCP
143+
if session.connection.host == "sfctest0.us-central1.gcp.snowflakecomputing.com":
144+
return
145+
146+
content = "One day I will see the world."
147+
modin_input = (pd.Series if is_series else pd.DataFrame)([content])
148+
with pytest.raises(NotImplementedError):
149+
operation(modin_input)

tests/integ/modin/test_apply_snowpark_python_functions.py

Lines changed: 1 addition & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,7 @@
1010
import pytest
1111

1212
from tests.integ.modin.utils import assert_frame_equal, assert_series_equal
13-
from tests.integ.utils.sql_counter import sql_count_checker, SqlCounter
14-
from tests.utils import running_on_jenkins
13+
from tests.integ.utils.sql_counter import sql_count_checker
1514

1615

1716
@sql_count_checker(query_count=4)
@@ -70,50 +69,3 @@ def test_apply_snowpark_python_function_not_implemented():
7069
pd.DataFrame({"a": [1, 2, 3]}).apply(asc, axis=1)
7170
with pytest.raises(NotImplementedError):
7271
pd.DataFrame({"a": [1, 2, 3]}).apply(asc, args=(1, 2))
73-
74-
75-
@pytest.mark.skipif(
76-
running_on_jenkins(),
77-
reason="TODO: SNOW-1859087 snowflake.cortex.summarize SSL error",
78-
)
79-
def test_apply_snowflake_cortex_summarize(session):
80-
from snowflake.snowpark.functions import snowflake_cortex_summarize
81-
82-
# TODO: SNOW-1758914 snowflake.cortex.summarize error on GCP
83-
with SqlCounter(query_count=0):
84-
if session.connection.host == "sfctest0.us-central1.gcp.snowflakecomputing.com":
85-
return
86-
87-
with SqlCounter(query_count=1):
88-
content = """pandas on Snowflake lets you run your pandas code in a distributed manner directly on your data in
89-
Snowflake. Just by changing the import statement and a few lines of code, you can get the familiar pandas experience
90-
you know and love with the scalability and security benefits of Snowflake. With pandas on Snowflake, you can work
91-
with much larger datasets and avoid the time and expense of porting your pandas pipelines to other big data
92-
frameworks or provisioning large and expensive machines. It runs workloads natively in Snowflake through
93-
transpilation to SQL, enabling it to take advantage of parallelization and the data governance and security
94-
benefits of Snowflake. pandas on Snowflake is delivered through the Snowpark pandas API as part of the Snowpark
95-
Python library, which enables scalable data processing of Python code within the Snowflake platform.
96-
"""
97-
s = pd.Series([content])
98-
summary = s.apply(snowflake_cortex_summarize).iloc[0]
99-
# this length check is to get around the fact that this function may not be deterministic
100-
assert 0 < len(summary) < len(content)
101-
102-
103-
@pytest.mark.skipif(
104-
running_on_jenkins(),
105-
reason="TODO: SNOW-1859087 snowflake.cortex.sentiment SSL error",
106-
)
107-
def test_apply_snowflake_cortex_sentiment(session):
108-
from snowflake.snowpark.functions import snowflake_cortex_sentiment
109-
110-
# TODO: SNOW-1758914 snowflake.cortex.sentiment error on GCP
111-
with SqlCounter(query_count=0):
112-
if session.connection.host == "sfctest0.us-central1.gcp.snowflakecomputing.com":
113-
return
114-
115-
with SqlCounter(query_count=1):
116-
content = "A very very bad review!"
117-
s = pd.Series([content])
118-
sentiment = s.apply(snowflake_cortex_sentiment).iloc[0]
119-
assert -1 <= sentiment <= 0

0 commit comments

Comments
 (0)