Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions docs/reference/ai/extai.rst
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ Example provider configuration:

* ``OpenAI``
* ``Anthropic``
* ``VoyageAI``


---------
Expand All @@ -309,6 +310,7 @@ Example provider configuration:
* ``ext::ai::OpenAIProviderConfig``
* ``ext::ai::MistralProviderConfig``
* ``ext::ai::AnthropicProviderConfig``
* ``ext::ai::VoyageAIProviderConfig``
* ``ext::ai::CustomProviderConfig``

Each inherits from :eql:type:`ext::ai::ProviderConfig` with provider-specific defaults.
Expand Down Expand Up @@ -345,6 +347,24 @@ Mistral (`documentation <https://docs.mistral.ai/capabilities/embeddings/#mistra

* ``mistral-embed``

VoyageAI (`documentation <https://docs.voyageai.com/docs/embeddings>`__)

Current generation models:

* ``voyage-3-large``
* ``voyage-3.5``
* ``voyage-3.5-lite``
* ``voyage-code-3``
* ``voyage-context-3``

Legacy models (deprecated):

* ``voyage-3`` (use ``voyage-3.5`` instead)
* ``voyage-3-lite`` (use ``voyage-3.5-lite`` instead)
* ``voyage-code-2`` (use ``voyage-code-3`` instead)
* ``voyage-finance-2``
* ``voyage-law-2``

Ollama (`documentation <https://github.com/ollama/ollama/blob/main/docs/api.md#generate-embeddings>`__)

* ``nomic-embed-text``
Expand Down
194 changes: 193 additions & 1 deletion edb/lib/ext/ai.edgeql
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ CREATE EXTENSION PACKAGE ai VERSION '1.0' {
create permission ext::ai::perm::chat_prompt_write;

create scalar type ext::ai::ProviderAPIStyle
extending enum<OpenAI, Anthropic, Ollama>;
extending enum<OpenAI, Anthropic, Ollama, VoyageAI>;

create abstract type ext::ai::ProviderConfig extending cfg::ConfigObject {
create required property name: std::str {
Expand Down Expand Up @@ -142,6 +142,27 @@ CREATE EXTENSION PACKAGE ai VERSION '1.0' {
};
};

create type ext::ai::VoyageAIProviderConfig extending ext::ai::ProviderConfig {
alter property name {
set protected := true;
set default := 'builtin::voyageai';
};

alter property display_name {
set protected := true;
set default := 'VoyageAI';
};

alter property api_url {
set default := 'https://api.voyageai.com/v1'
};

alter property api_style {
set protected := true;
set default := ext::ai::ProviderAPIStyle.VoyageAI;
};
};

create type ext::ai::OllamaProviderConfig extending ext::ai::ProviderConfig {
alter property name {
set protected := true;
Expand Down Expand Up @@ -562,6 +583,177 @@ CREATE EXTENSION PACKAGE ai VERSION '1.0' {
ext::ai::text_gen_model_context_window := "200000";
};

# VoyageAI models
create abstract type ext::ai::Voyage3LargeEmbedModel
extending ext::ai::EmbeddingModel
{
alter annotation
ext::ai::model_name := "voyage-3-large";
alter annotation
ext::ai::model_provider := "builtin::voyageai";
alter annotation
ext::ai::embedding_model_max_input_tokens := "32000";
alter annotation
ext::ai::embedding_model_max_batch_tokens := "32000";
alter annotation
ext::ai::embedding_model_max_output_dimensions := "1024";
alter annotation
ext::ai::embedding_model_supports_shortening := "true";
};

create abstract type ext::ai::VoyageCode3EmbedModel
extending ext::ai::EmbeddingModel
{
alter annotation
ext::ai::model_name := "voyage-code-3";
alter annotation
ext::ai::model_provider := "builtin::voyageai";
alter annotation
ext::ai::embedding_model_max_input_tokens := "32000";
alter annotation
ext::ai::embedding_model_max_batch_tokens := "32000";
alter annotation
ext::ai::embedding_model_max_output_dimensions := "1024";
alter annotation
ext::ai::embedding_model_supports_shortening := "true";
};

create abstract type ext::ai::Voyage35EmbedModel
extending ext::ai::EmbeddingModel
{
alter annotation
ext::ai::model_name := "voyage-3.5";
alter annotation
ext::ai::model_provider := "builtin::voyageai";
alter annotation
ext::ai::embedding_model_max_input_tokens := "32000";
alter annotation
ext::ai::embedding_model_max_batch_tokens := "32000";
alter annotation
ext::ai::embedding_model_max_output_dimensions := "1024";
alter annotation
ext::ai::embedding_model_supports_shortening := "true";
};

create abstract type ext::ai::Voyage35LiteEmbedModel
extending ext::ai::EmbeddingModel
{
alter annotation
ext::ai::model_name := "voyage-3.5-lite";
alter annotation
ext::ai::model_provider := "builtin::voyageai";
alter annotation
ext::ai::embedding_model_max_input_tokens := "32000";
alter annotation
ext::ai::embedding_model_max_batch_tokens := "32000";
alter annotation
ext::ai::embedding_model_max_output_dimensions := "1024";
alter annotation
ext::ai::embedding_model_supports_shortening := "true";
};

create abstract type ext::ai::Voyage3EmbedModel
extending ext::ai::EmbeddingModel
{
create annotation std::deprecated :=
"This model is noted as a legacy model in the VoyageAI docs. Consider using voyage-3.5 instead.";
alter annotation
ext::ai::model_name := "voyage-3";
alter annotation
ext::ai::model_provider := "builtin::voyageai";
alter annotation
ext::ai::embedding_model_max_input_tokens := "32000";
alter annotation
ext::ai::embedding_model_max_batch_tokens := "32000";
alter annotation
ext::ai::embedding_model_max_output_dimensions := "1024";
};

create abstract type ext::ai::Voyage3LiteEmbedModel
extending ext::ai::EmbeddingModel
{
create annotation std::deprecated :=
"This model is noted as a legacy model in the VoyageAI docs. Consider using voyage-3.5-lite instead.";
alter annotation
ext::ai::model_name := "voyage-3-lite";
alter annotation
ext::ai::model_provider := "builtin::voyageai";
alter annotation
ext::ai::embedding_model_max_input_tokens := "32000";
alter annotation
ext::ai::embedding_model_max_batch_tokens := "32000";
alter annotation
ext::ai::embedding_model_max_output_dimensions := "512";
};

create abstract type ext::ai::VoyageFinance2EmbedModel
extending ext::ai::EmbeddingModel
{
create annotation std::deprecated :=
"This model is noted as a legacy model in the VoyageAI docs.";
alter annotation
ext::ai::model_name := "voyage-finance-2";
alter annotation
ext::ai::model_provider := "builtin::voyageai";
alter annotation
ext::ai::embedding_model_max_input_tokens := "32000";
alter annotation
ext::ai::embedding_model_max_batch_tokens := "32000";
alter annotation
ext::ai::embedding_model_max_output_dimensions := "1024";
};

create abstract type ext::ai::VoyageLaw2EmbedModel
extending ext::ai::EmbeddingModel
{
create annotation std::deprecated :=
"This model is noted as a legacy model in the VoyageAI docs.";
alter annotation
ext::ai::model_name := "voyage-law-2";
alter annotation
ext::ai::model_provider := "builtin::voyageai";
alter annotation
ext::ai::embedding_model_max_input_tokens := "16000";
alter annotation
ext::ai::embedding_model_max_batch_tokens := "16000";
alter annotation
ext::ai::embedding_model_max_output_dimensions := "1024";
};

create abstract type ext::ai::VoyageCode2EmbedModel
extending ext::ai::EmbeddingModel
{
create annotation std::deprecated :=
"This model is noted as a legacy model in the VoyageAI docs. Consider using voyage-code-3 instead.";
alter annotation
ext::ai::model_name := "voyage-code-2";
alter annotation
ext::ai::model_provider := "builtin::voyageai";
alter annotation
ext::ai::embedding_model_max_input_tokens := "16000";
alter annotation
ext::ai::embedding_model_max_batch_tokens := "16000";
alter annotation
ext::ai::embedding_model_max_output_dimensions := "1536";
};

create abstract type ext::ai::VoyageContext3EmbedModel
extending ext::ai::EmbeddingModel
{
alter annotation
ext::ai::model_name := "voyage-context-3";
alter annotation
ext::ai::model_provider := "builtin::voyageai";
alter annotation
ext::ai::embedding_model_max_input_tokens := "32000";
alter annotation
ext::ai::embedding_model_max_batch_tokens := "320000";
alter annotation
ext::ai::embedding_model_max_output_dimensions := "1024";
alter annotation
ext::ai::embedding_model_supports_shortening := "true";
};

# Ollama embedding models
create abstract type ext::ai::OllamaLlama_3_2_Model
extending ext::ai::TextGenerationModel
Expand Down
81 changes: 81 additions & 0 deletions edb/server/protocol/ai_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ class BadRequestError(AIExtError):
class ApiStyle(s_enum.StrEnum):
OpenAI = 'OpenAI'
Anthropic = 'Anthropic'
VoyageAI = 'VoyageAI'
Ollama = 'Ollama'


Expand Down Expand Up @@ -1267,6 +1268,10 @@ async def _generate_embeddings(
result = await _generate_openai_embeddings(
provider, model_name, inputs, shortening, user, http_client
)
elif provider.api_style == ApiStyle.VoyageAI:
return await _generate_voyageai_embeddings(
provider, model_name, inputs, shortening, http_client
)
elif provider.api_style == ApiStyle.Ollama:
result = await _generate_ollama_embeddings(
provider, model_name, inputs, shortening, http_client
Expand Down Expand Up @@ -1336,6 +1341,82 @@ async def _generate_openai_embeddings(
)


async def _generate_voyageai_embeddings(
provider: ProviderConfig,
model_name: str,
inputs: list[str],
shortening: Optional[int],
http_client: http.HttpClient,
) -> EmbeddingsResult:

headers = {
"Authorization": f"Bearer {provider.secret}",
}
client = http_client.with_context(
headers=headers,
base_url=provider.api_url,
)

# Check if this is a contextualized embedding model
is_contextualized = "context" in model_name

if is_contextualized:
# For contextualized embeddings, treat each input as a single-chunk document
params: dict[str, Any] = {
"inputs": [[inp] for inp in inputs],
"input_type": "document",
"model": model_name,
}
endpoint = "/contextualizedembeddings"
else:
# Standard embeddings
params = {
"input": inputs,
"model": model_name,
}
endpoint = "/embeddings"

# Add output_dimension parameter if shortening is specified
if shortening is not None:
params["output_dimension"] = shortening

result = await client.post(
endpoint,
json=params,
)

error = None
if result.status_code >= 400:
error = rs.Error(
message=(
f"API call to generate embeddings failed with status "
f"{result.status_code}: {result.text}"
),
retry=(
# If the request fails with 429 - too many requests, it can be
# retried
result.status_code == 429
),
)

# For contextualized embeddings, we need to flatten the response
if is_contextualized and not error:
import json
response_data = json.loads(result.bytes())
# Flatten the nested structure: data[doc][chunk] -> data[chunk]
flattened_data = []
for doc_idx, doc in enumerate(response_data.get("data", [])):
for chunk in doc.get("data", []):
flattened_data.append(chunk)
response_data["data"] = flattened_data
flattened_bytes = json.dumps(response_data).encode()
return EmbeddingsResult(data=EmbeddingsData(flattened_bytes))

return EmbeddingsResult(
data=(error if error else EmbeddingsData(result.bytes())),
)


def _read_openai_header_field(
result: Any,
field_names: list[str],
Expand Down