SNOW-1994541: Change cortex complete to use snowflake.core APIs (#2260)

sfc-gh-asravi · sfc-gh-pczajka · web-flow · commit 93ecd2f769d4 · 2025-05-16T11:28:07.000+02:00
Co-authored-by: Patryk Czajka &lt;patryk.czajka@snowflake.com&gt;
diff --git a/src/snowflake/cli/_plugins/cortex/commands.py b/src/snowflake/cli/_plugins/cortex/commands.py
@@ -15,13 +15,17 @@
 from __future__ import annotations
 
 import sys
+from enum import Enum
 from pathlib import Path
 from typing import List, Optional
 
 import click
 import typer
 from click import UsageError
-from snowflake.cli._plugins.cortex.constants import DEFAULT_MODEL
+from snowflake.cli._plugins.cortex.constants import (
+    DEFAULT_BACKEND,
+    DEFAULT_MODEL,
+)
 from snowflake.cli._plugins.cortex.manager import CortexManager
 from snowflake.cli._plugins.cortex.types import (
     Language,
@@ -36,7 +40,7 @@
     OverrideableOption,
 )
 from snowflake.cli.api.commands.snow_typer import SnowTyperFactory
-from snowflake.cli.api.constants import PYTHON_3_12
+from snowflake.cli.api.constants import DEFAULT_SIZE_LIMIT_MB, PYTHON_3_12
 from snowflake.cli.api.output.types import (
     CollectionResult,
     CommandResult,
@@ -115,6 +119,11 @@ def search(
     return CollectionResult(response.results)
 
 
+class Backend(Enum):
+    SQL = "sql"
+    REST = "rest"
+
+
 @app.command(
     name="complete",
     requires_connection=True,
@@ -130,6 +139,11 @@ def complete(
         "--model",
         help="String specifying the model to be used.",
     ),
+    backend: Optional[Backend] = typer.Option(
+        DEFAULT_BACKEND,
+        "--backend",
+        help="String specifying whether to use sql or rest backend.",
+    ),
     file: Optional[Path] = ExclusiveReadableFileOption(
         help="JSON file containing conversation history to be used to generate a completion. Cannot be combined with TEXT argument.",
     ),
@@ -143,18 +157,30 @@ def complete(
 
     manager = CortexManager()
 
+    is_file_input: bool = False
     if text:
-        result_text = manager.complete_for_prompt(
-            text=Text(text),
+        prompt = text
+    elif file:
+        prompt = SecurePath(file).read_text(file_size_limit_mb=DEFAULT_SIZE_LIMIT_MB)
+        is_file_input = True
+    else:
+        raise UsageError("Either --file option or TEXT argument has to be provided.")
+
+    if backend == Backend.SQL:
+        result_text = manager.complete(
+            text=Text(prompt),
             model=Model(model),
+            is_file_input=is_file_input,
         )
-    elif file:
-        result_text = manager.complete_for_conversation(
-            conversation_json_file=SecurePath(file),
+    elif backend == Backend.REST:
+        root = get_cli_context().snow_api_root
+        result_text = manager.rest_complete(
+            text=Text(prompt),
             model=Model(model),
+            root=root,
         )
     else:
-        raise UsageError("Either --file option or TEXT argument has to be provided.")
+        raise UsageError("--backend option should be either rest or sql.")
 
     return MessageResult(result_text.strip())
 
diff --git a/src/snowflake/cli/_plugins/cortex/constants.py b/src/snowflake/cli/_plugins/cortex/constants.py
@@ -14,4 +14,5 @@
 
 from snowflake.cli._plugins.cortex.types import Model
 
-DEFAULT_MODEL: Model = Model("snowflake-arctic")
+DEFAULT_MODEL: Model = Model("llama3.1-70b")
+DEFAULT_BACKEND = "rest"
diff --git a/src/snowflake/cli/_plugins/cortex/manager.py b/src/snowflake/cli/_plugins/cortex/manager.py
@@ -32,43 +32,103 @@
 from snowflake.cli.api.sql_execution import SqlExecutionMixin
 from snowflake.connector import ProgrammingError
 from snowflake.connector.cursor import DictCursor
+from snowflake.core._root import Root
+from snowflake.core.cortex.inference_service import CortexInferenceService
+from snowflake.core.cortex.inference_service._generated.models import CompleteRequest
+from snowflake.core.cortex.inference_service._generated.models.complete_request_messages_inner import (
+    CompleteRequestMessagesInner,
+)
 
 log = logging.getLogger(__name__)
 
 
+class ResponseParseError(Exception):
+    """This exception is raised when the server response cannot be parsed."""
+
+    pass
+
+
+class MidStreamError(Exception):
+    """The SSE (Server-sent Event) stream can contain error messages in the middle of the stream,
+    using the “error” event type. This exception is raised when there is such a mid-stream error."""
+
+    def __init__(
+        self,
+        reason: Optional[str] = None,
+    ) -> None:
+        message = ""
+        if reason is not None:
+            message = reason
+        super().__init__(message)
+
+
 class CortexManager(SqlExecutionMixin):
-    def complete_for_prompt(
+    def complete(
         self,
         text: Text,
         model: Model,
+        is_file_input: bool = False,
     ) -> str:
-        query = f"""\
+        if not is_file_input:
+            query = f"""\
+                SELECT SNOWFLAKE.CORTEX.COMPLETE(
+                    '{model}',
+                    '{self._escape_input(text)}'
+                ) AS CORTEX_RESULT;"""
+            return self._query_cortex_result_str(query)
+        else:
+            query = f"""\
             SELECT SNOWFLAKE.CORTEX.COMPLETE(
                 '{model}',
-                '{self._escape_input(text)}'
+                PARSE_JSON('{self._escape_input(text)}'),
+                {{}}
             ) AS CORTEX_RESULT;"""
-        return self._query_cortex_result_str(query)
+            raw_result = self._query_cortex_result_str(query)
+            json_result = json.loads(raw_result)
+            return self._extract_text_result_from_json_result(
+                lambda: json_result["choices"][0]["messages"]
+            )
 
-    def complete_for_conversation(
+    def make_rest_complete_request(
         self,
-        conversation_json_file: SecurePath,
         model: Model,
-    ) -> str:
-        json_content = conversation_json_file.read_text(
-            file_size_limit_mb=DEFAULT_SIZE_LIMIT_MB
-        )
-        query = f"""\
-            SELECT SNOWFLAKE.CORTEX.COMPLETE(
-                '{model}',
-                PARSE_JSON('{self._escape_input(json_content)}'),
-                {{}}
-            ) AS CORTEX_RESULT;"""
-        raw_result = self._query_cortex_result_str(query)
-        json_result = json.loads(raw_result)
-        return self._extract_text_result_from_json_result(
-            lambda: json_result["choices"][0]["messages"]
+        prompt: Text,
+    ) -> CompleteRequest:
+        return CompleteRequest(
+            model=str(model),
+            messages=[CompleteRequestMessagesInner(content=str(prompt))],
+            stream=True,
         )
 
+    def rest_complete(
+        self,
+        text: Text,
+        model: Model,
+        root: "Root",
+    ) -> str:
+        complete_request = self.make_rest_complete_request(model=model, prompt=text)
+        cortex_inference_service = CortexInferenceService(root=root)
+        try:
+            raw_resp = cortex_inference_service.complete(
+                complete_request=complete_request
+            )
+        except Exception as e:
+            raise
+        result = ""
+        for event in raw_resp.events():
+            try:
+                parsed_resp = json.loads(event.data)
+            except json.JSONDecodeError:
+                raise ResponseParseError("Server response cannot be parsed")
+            try:
+                result += parsed_resp["choices"][0]["delta"]["content"]
+            except (json.JSONDecodeError, KeyError, IndexError):
+                if parsed_resp.get("error"):
+                    raise MidStreamError(reason=event.data)
+            else:
+                pass
+        return result
+
     def extract_answer_from_source_document(
         self,
         source_document: SourceDocument,
@@ -170,7 +230,7 @@ def _escape_input(plain_input: str):
 
     @staticmethod
     def _extract_text_result_from_json_result(
-        extract_function: Callable[[], str]
+        extract_function: Callable[[], str],
     ) -> str:
         try:
             return extract_function()
diff --git a/tests/__snapshots__/test_docs_generation_output.ambr b/tests/__snapshots__/test_docs_generation_output.ambr
@@ -11,6 +11,7 @@
     snow cortex complete
       <text>
       --model <model>
+      --backend <backend>
       --file <file>
       --connection <connection>
       --host <host>
@@ -56,7 +57,10 @@
   ===============================================================================
   
   :samp:`--model {TEXT}`
-    String specifying the model to be used. Default: snowflake-arctic.
+    String specifying the model to be used. Default: llama3.1-70b.
+  
+  :samp:`--backend [sql|rest]`
+    String specifying whether to use sql or rest backend. Default: rest.
   
   :samp:`--file {FILE}`
     JSON file containing conversation history to be used to generate a completion. Cannot be combined with TEXT argument.
diff --git a/tests/cortex/test_cortex_commands.py b/tests/cortex/test_cortex_commands.py
@@ -51,13 +51,15 @@ def _mock(raw_result: Any, expected_query: Optional[str] = None):
 def test_cortex_complete_for_prompt_with_default_model(_mock_cortex_result, runner):
     with _mock_cortex_result(
         raw_result="Yes",
-        expected_query="SELECT SNOWFLAKE.CORTEX.COMPLETE( 'snowflake-arctic', 'Is 5 more than 4? Please answer using one word without a period.' ) AS CORTEX_RESULT;",
+        expected_query="SELECT SNOWFLAKE.CORTEX.COMPLETE( 'llama3.1-70b', 'Is 5 more than 4? Please answer using one word without a period.' ) AS CORTEX_RESULT;",
     ):
         result = runner.invoke(
             [
                 "cortex",
                 "complete",
                 "Is 5 more than 4? Please answer using one word without a period.",
+                "--backend",
+                "sql",
             ]
         )
         assert_successful_result_message(result, expected_msg="Yes")
@@ -113,6 +115,8 @@ def test_cortex_complete_for_prompt_with_chosen_model(_mock_cortex_result, runne
                 "Is 5 more than 4? Please answer using one word without a period.",
                 "--model",
                 "reka-flash",
+                "--backend",
+                "sql",
             ]
         )
         assert_successful_result_message(result, expected_msg="Yes")
@@ -121,14 +125,16 @@ def test_cortex_complete_for_prompt_with_chosen_model(_mock_cortex_result, runne
 def test_cortex_complete_for_file(_mock_cortex_result, runner):
     with _mock_cortex_result(
         raw_result="""{"choices": [{"messages": "No, I'm not"}]}""",
-        expected_query="""SELECT SNOWFLAKE.CORTEX.COMPLETE( 'snowflake-arctic', PARSE_JSON('[ { "role": "user", "content": "how does a \\\\"snowflake\\\\" get its \\'unique\\' pattern?" }, { "role": "system", "content": "I don\\'t know" }, { "role": "user", "content": "I thought \\\\"you\\\\" are smarter" } ] '), {} ) AS CORTEX_RESULT;""",
+        expected_query="""SELECT SNOWFLAKE.CORTEX.COMPLETE( 'llama3.1-70b', PARSE_JSON('[ { "role": "user", "content": "how does a \\\\"snowflake\\\\" get its \\'unique\\' pattern?" }, { "role": "system", "content": "I don\\'t know" }, { "role": "user", "content": "I thought \\\\"you\\\\" are smarter" } ] '), {} ) AS CORTEX_RESULT;""",
     ):
         result = runner.invoke(
             [
                 "cortex",
                 "complete",
                 "--file",
                 str(TEST_DIR / "test_data/cortex/conversation.json"),
+                "--backend",
+                "sql",
             ]
         )
         assert_successful_result_message(result, expected_msg="No, I'm not")

Original file line number	Diff line number	Diff line change
`@@ -51,13 +51,15 @@ def _mock(raw_result: Any, expected_query: Optional[str] = None):`
`51`	`51`	`def test_cortex_complete_for_prompt_with_default_model(_mock_cortex_result, runner):`
`52`	`52`	`with _mock_cortex_result(`
`53`	`53`	`raw_result="Yes",`
`54`		`- expected_query="SELECT SNOWFLAKE.CORTEX.COMPLETE( 'snowflake-arctic', 'Is 5 more than 4? Please answer using one word without a period.' ) AS CORTEX_RESULT;",`
	`54`	`+ expected_query="SELECT SNOWFLAKE.CORTEX.COMPLETE( 'llama3.1-70b', 'Is 5 more than 4? Please answer using one word without a period.' ) AS CORTEX_RESULT;",`
`55`	`55`	`):`
`56`	`56`	`result = runner.invoke(`
`57`	`57`	`[`
`58`	`58`	`"cortex",`
`59`	`59`	`"complete",`
`60`	`60`	`"Is 5 more than 4? Please answer using one word without a period.",`
	`61`	`+ "--backend",`
	`62`	`+ "sql",`
`61`	`63`	`]`
`62`	`64`	`)`
`63`	`65`	`assert_successful_result_message(result, expected_msg="Yes")`
`@@ -113,6 +115,8 @@ def test_cortex_complete_for_prompt_with_chosen_model(_mock_cortex_result, runne`
`113`	`115`	`"Is 5 more than 4? Please answer using one word without a period.",`
`114`	`116`	`"--model",`
`115`	`117`	`"reka-flash",`
	`118`	`+ "--backend",`
	`119`	`+ "sql",`
`116`	`120`	`]`
`117`	`121`	`)`
`118`	`122`	`assert_successful_result_message(result, expected_msg="Yes")`
`@@ -121,14 +125,16 @@ def test_cortex_complete_for_prompt_with_chosen_model(_mock_cortex_result, runne`
`121`	`125`	`def test_cortex_complete_for_file(_mock_cortex_result, runner):`
`122`	`126`	`with _mock_cortex_result(`
`123`	`127`	`raw_result="""{"choices": [{"messages": "No, I'm not"}]}""",`
`124`		`- expected_query="""SELECT SNOWFLAKE.CORTEX.COMPLETE( 'snowflake-arctic', PARSE_JSON('[ { "role": "user", "content": "how does a \\\\"snowflake\\\\" get its \\'unique\\' pattern?" }, { "role": "system", "content": "I don\\'t know" }, { "role": "user", "content": "I thought \\\\"you\\\\" are smarter" } ] '), {} ) AS CORTEX_RESULT;""",`
	`128`	`+ expected_query="""SELECT SNOWFLAKE.CORTEX.COMPLETE( 'llama3.1-70b', PARSE_JSON('[ { "role": "user", "content": "how does a \\\\"snowflake\\\\" get its \\'unique\\' pattern?" }, { "role": "system", "content": "I don\\'t know" }, { "role": "user", "content": "I thought \\\\"you\\\\" are smarter" } ] '), {} ) AS CORTEX_RESULT;""",`
`125`	`129`	`):`
`126`	`130`	`result = runner.invoke(`
`127`	`131`	`[`
`128`	`132`	`"cortex",`
`129`	`133`	`"complete",`
`130`	`134`	`"--file",`
`131`	`135`	`str(TEST_DIR / "test_data/cortex/conversation.json"),`
	`136`	`+ "--backend",`
	`137`	`+ "sql",`
`132`	`138`	`]`
`133`	`139`	`)`
`134`	`140`	`assert_successful_result_message(result, expected_msg="No, I'm not")`