diff --git a/docs/reference/ai/extai.rst b/docs/reference/ai/extai.rst index 4b5eed9abe8..244c4578c14 100644 --- a/docs/reference/ai/extai.rst +++ b/docs/reference/ai/extai.rst @@ -286,6 +286,7 @@ Example provider configuration: * ``OpenAI`` * ``Anthropic`` + * ``VoyageAI`` --------- @@ -309,6 +310,7 @@ Example provider configuration: * ``ext::ai::OpenAIProviderConfig`` * ``ext::ai::MistralProviderConfig`` * ``ext::ai::AnthropicProviderConfig`` + * ``ext::ai::VoyageAIProviderConfig`` * ``ext::ai::CustomProviderConfig`` Each inherits from :eql:type:`ext::ai::ProviderConfig` with provider-specific defaults. @@ -345,6 +347,24 @@ Mistral (`documentation `__) + +Current generation models: + +* ``voyage-3-large`` +* ``voyage-3.5`` +* ``voyage-3.5-lite`` +* ``voyage-code-3`` +* ``voyage-context-3`` + +Legacy models (deprecated): + +* ``voyage-3`` (use ``voyage-3.5`` instead) +* ``voyage-3-lite`` (use ``voyage-3.5-lite`` instead) +* ``voyage-code-2`` (use ``voyage-code-3`` instead) +* ``voyage-finance-2`` +* ``voyage-law-2`` + Ollama (`documentation `__) * ``nomic-embed-text`` diff --git a/edb/lib/ext/ai.edgeql b/edb/lib/ext/ai.edgeql index aea8cd413ce..8172a9939e0 100644 --- a/edb/lib/ext/ai.edgeql +++ b/edb/lib/ext/ai.edgeql @@ -29,7 +29,7 @@ CREATE EXTENSION PACKAGE ai VERSION '1.0' { create permission ext::ai::perm::chat_prompt_write; create scalar type ext::ai::ProviderAPIStyle - extending enum; + extending enum; create abstract type ext::ai::ProviderConfig extending cfg::ConfigObject { create required property name: std::str { @@ -142,6 +142,27 @@ CREATE EXTENSION PACKAGE ai VERSION '1.0' { }; }; + create type ext::ai::VoyageAIProviderConfig extending ext::ai::ProviderConfig { + alter property name { + set protected := true; + set default := 'builtin::voyageai'; + }; + + alter property display_name { + set protected := true; + set default := 'VoyageAI'; + }; + + alter property api_url { + set default := 'https://api.voyageai.com/v1' + }; + + alter property api_style { + set protected := true; + set default := ext::ai::ProviderAPIStyle.VoyageAI; + }; + }; + create type ext::ai::OllamaProviderConfig extending ext::ai::ProviderConfig { alter property name { set protected := true; @@ -562,6 +583,177 @@ CREATE EXTENSION PACKAGE ai VERSION '1.0' { ext::ai::text_gen_model_context_window := "200000"; }; + # VoyageAI models + create abstract type ext::ai::Voyage3LargeEmbedModel + extending ext::ai::EmbeddingModel + { + alter annotation + ext::ai::model_name := "voyage-3-large"; + alter annotation + ext::ai::model_provider := "builtin::voyageai"; + alter annotation + ext::ai::embedding_model_max_input_tokens := "32000"; + alter annotation + ext::ai::embedding_model_max_batch_tokens := "32000"; + alter annotation + ext::ai::embedding_model_max_output_dimensions := "1024"; + alter annotation + ext::ai::embedding_model_supports_shortening := "true"; + }; + + create abstract type ext::ai::VoyageCode3EmbedModel + extending ext::ai::EmbeddingModel + { + alter annotation + ext::ai::model_name := "voyage-code-3"; + alter annotation + ext::ai::model_provider := "builtin::voyageai"; + alter annotation + ext::ai::embedding_model_max_input_tokens := "32000"; + alter annotation + ext::ai::embedding_model_max_batch_tokens := "32000"; + alter annotation + ext::ai::embedding_model_max_output_dimensions := "1024"; + alter annotation + ext::ai::embedding_model_supports_shortening := "true"; + }; + + create abstract type ext::ai::Voyage35EmbedModel + extending ext::ai::EmbeddingModel + { + alter annotation + ext::ai::model_name := "voyage-3.5"; + alter annotation + ext::ai::model_provider := "builtin::voyageai"; + alter annotation + ext::ai::embedding_model_max_input_tokens := "32000"; + alter annotation + ext::ai::embedding_model_max_batch_tokens := "32000"; + alter annotation + ext::ai::embedding_model_max_output_dimensions := "1024"; + alter annotation + ext::ai::embedding_model_supports_shortening := "true"; + }; + + create abstract type ext::ai::Voyage35LiteEmbedModel + extending ext::ai::EmbeddingModel + { + alter annotation + ext::ai::model_name := "voyage-3.5-lite"; + alter annotation + ext::ai::model_provider := "builtin::voyageai"; + alter annotation + ext::ai::embedding_model_max_input_tokens := "32000"; + alter annotation + ext::ai::embedding_model_max_batch_tokens := "32000"; + alter annotation + ext::ai::embedding_model_max_output_dimensions := "1024"; + alter annotation + ext::ai::embedding_model_supports_shortening := "true"; + }; + + create abstract type ext::ai::Voyage3EmbedModel + extending ext::ai::EmbeddingModel + { + create annotation std::deprecated := + "This model is noted as a legacy model in the VoyageAI docs. Consider using voyage-3.5 instead."; + alter annotation + ext::ai::model_name := "voyage-3"; + alter annotation + ext::ai::model_provider := "builtin::voyageai"; + alter annotation + ext::ai::embedding_model_max_input_tokens := "32000"; + alter annotation + ext::ai::embedding_model_max_batch_tokens := "32000"; + alter annotation + ext::ai::embedding_model_max_output_dimensions := "1024"; + }; + + create abstract type ext::ai::Voyage3LiteEmbedModel + extending ext::ai::EmbeddingModel + { + create annotation std::deprecated := + "This model is noted as a legacy model in the VoyageAI docs. Consider using voyage-3.5-lite instead."; + alter annotation + ext::ai::model_name := "voyage-3-lite"; + alter annotation + ext::ai::model_provider := "builtin::voyageai"; + alter annotation + ext::ai::embedding_model_max_input_tokens := "32000"; + alter annotation + ext::ai::embedding_model_max_batch_tokens := "32000"; + alter annotation + ext::ai::embedding_model_max_output_dimensions := "512"; + }; + + create abstract type ext::ai::VoyageFinance2EmbedModel + extending ext::ai::EmbeddingModel + { + create annotation std::deprecated := + "This model is noted as a legacy model in the VoyageAI docs."; + alter annotation + ext::ai::model_name := "voyage-finance-2"; + alter annotation + ext::ai::model_provider := "builtin::voyageai"; + alter annotation + ext::ai::embedding_model_max_input_tokens := "32000"; + alter annotation + ext::ai::embedding_model_max_batch_tokens := "32000"; + alter annotation + ext::ai::embedding_model_max_output_dimensions := "1024"; + }; + + create abstract type ext::ai::VoyageLaw2EmbedModel + extending ext::ai::EmbeddingModel + { + create annotation std::deprecated := + "This model is noted as a legacy model in the VoyageAI docs."; + alter annotation + ext::ai::model_name := "voyage-law-2"; + alter annotation + ext::ai::model_provider := "builtin::voyageai"; + alter annotation + ext::ai::embedding_model_max_input_tokens := "16000"; + alter annotation + ext::ai::embedding_model_max_batch_tokens := "16000"; + alter annotation + ext::ai::embedding_model_max_output_dimensions := "1024"; + }; + + create abstract type ext::ai::VoyageCode2EmbedModel + extending ext::ai::EmbeddingModel + { + create annotation std::deprecated := + "This model is noted as a legacy model in the VoyageAI docs. Consider using voyage-code-3 instead."; + alter annotation + ext::ai::model_name := "voyage-code-2"; + alter annotation + ext::ai::model_provider := "builtin::voyageai"; + alter annotation + ext::ai::embedding_model_max_input_tokens := "16000"; + alter annotation + ext::ai::embedding_model_max_batch_tokens := "16000"; + alter annotation + ext::ai::embedding_model_max_output_dimensions := "1536"; + }; + + create abstract type ext::ai::VoyageContext3EmbedModel + extending ext::ai::EmbeddingModel + { + alter annotation + ext::ai::model_name := "voyage-context-3"; + alter annotation + ext::ai::model_provider := "builtin::voyageai"; + alter annotation + ext::ai::embedding_model_max_input_tokens := "32000"; + alter annotation + ext::ai::embedding_model_max_batch_tokens := "320000"; + alter annotation + ext::ai::embedding_model_max_output_dimensions := "1024"; + alter annotation + ext::ai::embedding_model_supports_shortening := "true"; + }; + # Ollama embedding models create abstract type ext::ai::OllamaLlama_3_2_Model extending ext::ai::TextGenerationModel diff --git a/edb/server/protocol/ai_ext.py b/edb/server/protocol/ai_ext.py index 84ea7d0dc07..67df75e8938 100644 --- a/edb/server/protocol/ai_ext.py +++ b/edb/server/protocol/ai_ext.py @@ -111,6 +111,7 @@ class BadRequestError(AIExtError): class ApiStyle(s_enum.StrEnum): OpenAI = 'OpenAI' Anthropic = 'Anthropic' + VoyageAI = 'VoyageAI' Ollama = 'Ollama' @@ -1267,6 +1268,10 @@ async def _generate_embeddings( result = await _generate_openai_embeddings( provider, model_name, inputs, shortening, user, http_client ) + elif provider.api_style == ApiStyle.VoyageAI: + return await _generate_voyageai_embeddings( + provider, model_name, inputs, shortening, http_client + ) elif provider.api_style == ApiStyle.Ollama: result = await _generate_ollama_embeddings( provider, model_name, inputs, shortening, http_client @@ -1336,6 +1341,82 @@ async def _generate_openai_embeddings( ) +async def _generate_voyageai_embeddings( + provider: ProviderConfig, + model_name: str, + inputs: list[str], + shortening: Optional[int], + http_client: http.HttpClient, +) -> EmbeddingsResult: + + headers = { + "Authorization": f"Bearer {provider.secret}", + } + client = http_client.with_context( + headers=headers, + base_url=provider.api_url, + ) + + # Check if this is a contextualized embedding model + is_contextualized = "context" in model_name + + if is_contextualized: + # For contextualized embeddings, treat each input as a single-chunk document + params: dict[str, Any] = { + "inputs": [[inp] for inp in inputs], + "input_type": "document", + "model": model_name, + } + endpoint = "/contextualizedembeddings" + else: + # Standard embeddings + params = { + "input": inputs, + "model": model_name, + } + endpoint = "/embeddings" + + # Add output_dimension parameter if shortening is specified + if shortening is not None: + params["output_dimension"] = shortening + + result = await client.post( + endpoint, + json=params, + ) + + error = None + if result.status_code >= 400: + error = rs.Error( + message=( + f"API call to generate embeddings failed with status " + f"{result.status_code}: {result.text}" + ), + retry=( + # If the request fails with 429 - too many requests, it can be + # retried + result.status_code == 429 + ), + ) + + # For contextualized embeddings, we need to flatten the response + if is_contextualized and not error: + import json + response_data = json.loads(result.bytes()) + # Flatten the nested structure: data[doc][chunk] -> data[chunk] + flattened_data = [] + for doc_idx, doc in enumerate(response_data.get("data", [])): + for chunk in doc.get("data", []): + flattened_data.append(chunk) + response_data["data"] = flattened_data + flattened_bytes = json.dumps(response_data).encode() + return EmbeddingsResult(data=EmbeddingsData(flattened_bytes)) + + return EmbeddingsResult( + data=(error if error else EmbeddingsData(result.bytes())), + ) + + def _read_openai_header_field( result: Any, field_names: list[str],