VoyageAI improvements #9107

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open

fzowl wants to merge 5 commits into geldata:master from voyage-ai:voyageai-3.5

+294 −1

docs/reference/ai/extai.rst

-Original file line number
+Diff line change
@@ Expand Up / @@ -286,6 +286,7 @@ Example provider configuration: @@
         * ``OpenAI``
         * ``Anthropic``
+        * ``VoyageAI``
     ---------
@@ Expand All / @@ -309,6 +310,7 @@ Example provider configuration: @@
         * ``ext::ai::OpenAIProviderConfig``
         * ``ext::ai::MistralProviderConfig``
         * ``ext::ai::AnthropicProviderConfig``
+        * ``ext::ai::VoyageAIProviderConfig``
         * ``ext::ai::CustomProviderConfig``
         Each inherits from :eql:type:`ext::ai::ProviderConfig` with provider-specific defaults.
@@ Expand Down Expand Up @@
     * ``mistral-embed``
+    VoyageAI (`documentation <https://docs.voyageai.com/docs/embeddings>`__)
+    Current generation models:
+    * ``voyage-3-large``
+    * ``voyage-3.5``
+    * ``voyage-3.5-lite``
+    * ``voyage-code-3``
+    * ``voyage-context-3``
+    Legacy models (deprecated):
+    * ``voyage-3`` (use ``voyage-3.5`` instead)
+    * ``voyage-3-lite`` (use ``voyage-3.5-lite`` instead)
+    * ``voyage-code-2`` (use ``voyage-code-3`` instead)
+    * ``voyage-finance-2``
+    * ``voyage-law-2``
     Ollama (`documentation <https://github.com/ollama/ollama/blob/main/docs/api.md#generate-embeddings>`__)
     * ``nomic-embed-text``
@@ Expand Down @@

edb/lib/ext/ai.edgeql

-Original file line number
+Diff line change
@@ Expand Up / @@ -29,7 +29,7 @@ CREATE EXTENSION PACKAGE ai VERSION '1.0' { @@
         create permission ext::ai::perm::chat_prompt_write;
         create scalar type ext::ai::ProviderAPIStyle
-            extending enum<OpenAI, Anthropic, Ollama>;
+            extending enum<OpenAI, Anthropic, Ollama, VoyageAI>;
         create abstract type ext::ai::ProviderConfig extending cfg::ConfigObject {
             create required property name: std::str {
@@ Expand Down Expand Up / @@ -142,6 +142,27 @@ CREATE EXTENSION PACKAGE ai VERSION '1.0' { @@
             };
         };
+        create type ext::ai::VoyageAIProviderConfig extending ext::ai::ProviderConfig {
+            alter property name {
+                set protected := true;
+                set default := 'builtin::voyageai';
+            };
+            alter property display_name {
+                set protected := true;
+                set default := 'VoyageAI';
+            };
+            alter property api_url {
+                set default := 'https://api.voyageai.com/v1'
+            };
+            alter property api_style {
+                set protected := true;
+                set default := ext::ai::ProviderAPIStyle.VoyageAI;
+            };
+        };
         create type ext::ai::OllamaProviderConfig extending ext::ai::ProviderConfig {
             alter property name {
                 set protected := true;
@@ Expand Down Expand Up / @@ -562,6 +583,177 @@ CREATE EXTENSION PACKAGE ai VERSION '1.0' { @@
                 ext::ai::text_gen_model_context_window := "200000";
         };
+        # VoyageAI models
+        create abstract type ext::ai::Voyage3LargeEmbedModel
+            extending ext::ai::EmbeddingModel
+        {
+            alter annotation
+                ext::ai::model_name := "voyage-3-large";
+            alter annotation
+                ext::ai::model_provider := "builtin::voyageai";
+            alter annotation
+                ext::ai::embedding_model_max_input_tokens := "32000";
+            alter annotation
+                ext::ai::embedding_model_max_batch_tokens := "32000";
+            alter annotation
+                ext::ai::embedding_model_max_output_dimensions := "1024";
+            alter annotation
+                ext::ai::embedding_model_supports_shortening := "true";
+        };
+        create abstract type ext::ai::VoyageCode3EmbedModel
+            extending ext::ai::EmbeddingModel
+        {
+            alter annotation
+                ext::ai::model_name := "voyage-code-3";
+            alter annotation
+                ext::ai::model_provider := "builtin::voyageai";
+            alter annotation
+                ext::ai::embedding_model_max_input_tokens := "32000";
+            alter annotation
+                ext::ai::embedding_model_max_batch_tokens := "32000";
+            alter annotation
+                ext::ai::embedding_model_max_output_dimensions := "1024";
+            alter annotation
+                ext::ai::embedding_model_supports_shortening := "true";
+        };
+        create abstract type ext::ai::Voyage35EmbedModel
+            extending ext::ai::EmbeddingModel
+        {
+            alter annotation
+                ext::ai::model_name := "voyage-3.5";
+            alter annotation
+                ext::ai::model_provider := "builtin::voyageai";
+            alter annotation
+                ext::ai::embedding_model_max_input_tokens := "32000";
+            alter annotation
+                ext::ai::embedding_model_max_batch_tokens := "32000";
+            alter annotation
+                ext::ai::embedding_model_max_output_dimensions := "1024";
+            alter annotation
+                ext::ai::embedding_model_supports_shortening := "true";
+        };
+        create abstract type ext::ai::Voyage35LiteEmbedModel
+            extending ext::ai::EmbeddingModel
+        {
+            alter annotation
+                ext::ai::model_name := "voyage-3.5-lite";
+            alter annotation
+                ext::ai::model_provider := "builtin::voyageai";
+            alter annotation
+                ext::ai::embedding_model_max_input_tokens := "32000";
+            alter annotation
+                ext::ai::embedding_model_max_batch_tokens := "32000";
+            alter annotation
+                ext::ai::embedding_model_max_output_dimensions := "1024";
+            alter annotation
+                ext::ai::embedding_model_supports_shortening := "true";
+        };
+        create abstract type ext::ai::Voyage3EmbedModel
+            extending ext::ai::EmbeddingModel
+        {
+            create annotation std::deprecated :=
+                "This model is noted as a legacy model in the VoyageAI docs. Consider using voyage-3.5 instead.";
+            alter annotation
+                ext::ai::model_name := "voyage-3";
+            alter annotation
+                ext::ai::model_provider := "builtin::voyageai";
+            alter annotation
+                ext::ai::embedding_model_max_input_tokens := "32000";
+            alter annotation
+                ext::ai::embedding_model_max_batch_tokens := "32000";
+            alter annotation
+                ext::ai::embedding_model_max_output_dimensions := "1024";
+        };
+        create abstract type ext::ai::Voyage3LiteEmbedModel
+            extending ext::ai::EmbeddingModel
+        {
+            create annotation std::deprecated :=
+                "This model is noted as a legacy model in the VoyageAI docs. Consider using voyage-3.5-lite instead.";
+            alter annotation
+                ext::ai::model_name := "voyage-3-lite";
+            alter annotation
+                ext::ai::model_provider := "builtin::voyageai";
+            alter annotation
+                ext::ai::embedding_model_max_input_tokens := "32000";
+            alter annotation
+                ext::ai::embedding_model_max_batch_tokens := "32000";
+            alter annotation
+                ext::ai::embedding_model_max_output_dimensions := "512";
+        };
+        create abstract type ext::ai::VoyageFinance2EmbedModel
+            extending ext::ai::EmbeddingModel
+        {
+            create annotation std::deprecated :=
+                "This model is noted as a legacy model in the VoyageAI docs.";
+            alter annotation
+                ext::ai::model_name := "voyage-finance-2";
+            alter annotation
+                ext::ai::model_provider := "builtin::voyageai";
+            alter annotation
+                ext::ai::embedding_model_max_input_tokens := "32000";
+            alter annotation
+                ext::ai::embedding_model_max_batch_tokens := "32000";
+            alter annotation
+                ext::ai::embedding_model_max_output_dimensions := "1024";
+        };
+        create abstract type ext::ai::VoyageLaw2EmbedModel
+            extending ext::ai::EmbeddingModel
+        {
+            create annotation std::deprecated :=
+                "This model is noted as a legacy model in the VoyageAI docs.";
+            alter annotation
+                ext::ai::model_name := "voyage-law-2";
+            alter annotation
+                ext::ai::model_provider := "builtin::voyageai";
+            alter annotation
+                ext::ai::embedding_model_max_input_tokens := "16000";
+            alter annotation
+                ext::ai::embedding_model_max_batch_tokens := "16000";
+            alter annotation
+                ext::ai::embedding_model_max_output_dimensions := "1024";
+        };
+        create abstract type ext::ai::VoyageCode2EmbedModel
+            extending ext::ai::EmbeddingModel
+        {
+            create annotation std::deprecated :=
+                "This model is noted as a legacy model in the VoyageAI docs. Consider using voyage-code-3 instead.";
+            alter annotation
+                ext::ai::model_name := "voyage-code-2";
+            alter annotation
+                ext::ai::model_provider := "builtin::voyageai";
+            alter annotation
+                ext::ai::embedding_model_max_input_tokens := "16000";
+            alter annotation
+                ext::ai::embedding_model_max_batch_tokens := "16000";
+            alter annotation
+                ext::ai::embedding_model_max_output_dimensions := "1536";
+        };
+        create abstract type ext::ai::VoyageContext3EmbedModel
+            extending ext::ai::EmbeddingModel
+        {
+            alter annotation
+                ext::ai::model_name := "voyage-context-3";
+            alter annotation
+                ext::ai::model_provider := "builtin::voyageai";
+            alter annotation
+                ext::ai::embedding_model_max_input_tokens := "32000";
+            alter annotation
+                ext::ai::embedding_model_max_batch_tokens := "320000";
+            alter annotation
+                ext::ai::embedding_model_max_output_dimensions := "1024";
+            alter annotation
+                ext::ai::embedding_model_supports_shortening := "true";
+        };
         # Ollama embedding models
         create abstract type ext::ai::OllamaLlama_3_2_Model
             extending ext::ai::TextGenerationModel
@@ Expand Down @@

edb/server/protocol/ai_ext.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -111,6 +111,7 @@ class BadRequestError(AIExtError): @@
     class ApiStyle(s_enum.StrEnum):
         OpenAI = 'OpenAI'
         Anthropic = 'Anthropic'
+        VoyageAI = 'VoyageAI'
         Ollama = 'Ollama'
@@ Expand Down Expand Up / @@ -1267,6 +1268,10 @@ async def _generate_embeddings( @@
             result = await _generate_openai_embeddings(
                 provider, model_name, inputs, shortening, user, http_client
             )
+        elif provider.api_style == ApiStyle.VoyageAI:
+            return await _generate_voyageai_embeddings(
+                provider, model_name, inputs, shortening, http_client
+            )
         elif provider.api_style == ApiStyle.Ollama:
             result = await _generate_ollama_embeddings(
                 provider, model_name, inputs, shortening, http_client
@@ Expand Down Expand Up / @@ -1336,6 +1341,82 @@ async def _generate_openai_embeddings( @@
         )
+    async def _generate_voyageai_embeddings(
+        provider: ProviderConfig,
+        model_name: str,
+        inputs: list[str],
+        shortening: Optional[int],
+        http_client: http.HttpClient,
+    ) -> EmbeddingsResult:
+        headers = {
+            "Authorization": f"Bearer {provider.secret}",
+        }
+        client = http_client.with_context(
+            headers=headers,
+            base_url=provider.api_url,
+        )
+        # Check if this is a contextualized embedding model
+        is_contextualized = "context" in model_name
+        if is_contextualized:
+            # For contextualized embeddings, treat each input as a single-chunk document
+            params: dict[str, Any] = {
+                "inputs": [[inp] for inp in inputs],
+                "input_type": "document",
+                "model": model_name,
+            }
+            endpoint = "/contextualizedembeddings"
+        else:
+            # Standard embeddings
+            params = {
+                "input": inputs,
+                "model": model_name,
+            }
+            endpoint = "/embeddings"
+        # Add output_dimension parameter if shortening is specified
+        if shortening is not None:
+            params["output_dimension"] = shortening
+        result = await client.post(
+            endpoint,
+            json=params,
+        )
+        error = None
+        if result.status_code >= 400:
+            error = rs.Error(
+                message=(
+                    f"API call to generate embeddings failed with status "
+                    f"{result.status_code}: {result.text}"
+                ),
+                retry=(
+                    # If the request fails with 429 - too many requests, it can be
+                    # retried
+                    result.status_code == 429
+                ),
+            )
+        # For contextualized embeddings, we need to flatten the response
+        if is_contextualized and not error:
+            import json
+            response_data = json.loads(result.bytes())
+            # Flatten the nested structure: data[doc][chunk] -> data[chunk]
+            flattened_data = []
+            for doc_idx, doc in enumerate(response_data.get("data", [])):
+                for chunk in doc.get("data", []):
+                    flattened_data.append(chunk)
+            response_data["data"] = flattened_data
+            flattened_bytes = json.dumps(response_data).encode()
+            return EmbeddingsResult(data=EmbeddingsData(flattened_bytes))
+        return EmbeddingsResult(
+            data=(error if error else EmbeddingsData(result.bytes())),
+        )
     def _read_openai_header_field(
         result: Any,
         field_names: list[str],
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

VoyageAI improvements #9107

Diff view

Diff view

There are no files selected for viewing

Uh oh!

Uh oh!

Uh oh!

VoyageAI improvements #9107

Are you sure you want to change the base?

VoyageAI improvements #9107

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

Uh oh!

Uh oh!