cocoindex-io
diff --git a/‎examples/text_embedding/main.py‎
Lines changed: 8 additions & 0 deletions b/‎examples/text_embedding/main.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎python/cocoindex/functions.py‎
Lines changed: 10 additions & 0 deletions b/‎python/cocoindex/functions.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎src/llm/anthropic.rs‎
Lines changed: 2 additions & 2 deletions b/‎src/llm/anthropic.rs‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/llm/gemini.rs‎
Lines changed: 80 additions & 14 deletions b/‎src/llm/gemini.rs‎
Lines changed: 80 additions & 14 deletions
diff --git a/‎src/llm/mod.rs‎
Lines changed: 56 additions & 16 deletions b/‎src/llm/mod.rs‎
Lines changed: 56 additions & 16 deletions
diff --git a/‎src/llm/ollama.rs‎
Lines changed: 2 additions & 2 deletions b/‎src/llm/ollama.rs‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/llm/openai.rs‎
Lines changed: 43 additions & 5 deletions b/‎src/llm/openai.rs‎
Lines changed: 43 additions & 5 deletions
@@ -1,6 +1,7 @@
 from dotenv import load_dotenv
 from psycopg_pool import ConnectionPool
 from pgvector.psycopg import register_vector
+from typing import Any
 import cocoindex
 import os
 from numpy.typing import NDArray
@@ -15,6 +16,13 @@ def text_to_embedding(
     Embed the text using a SentenceTransformer model.
     This is a shared logic between indexing and querying, so extract it as a function.
     """
+    # You can also switch to remote embedding model:
+    #   return text.transform(
+    #       cocoindex.functions.EmbedText(
+    #           api_type=cocoindex.llm.LlmApiType.OPENAI,
+    #           model="text-embedding-3-small",
+    #       )
+    #   )
     return text.transform(
         cocoindex.functions.SentenceTransformerEmbed(
             model="sentence-transformers/all-MiniLM-L6-v2"
 
@@ -32,6 +32,16 @@ class SplitRecursively(op.FunctionSpec):
     custom_languages: list[CustomLanguageSpec] = dataclasses.field(default_factory=list)
 
 
+class EmbedText(op.FunctionSpec):
+    """Embed a text into a vector space."""
+
+    api_type: llm.LlmApiType
+    model: str
+    address: str | None = None
+    output_dimension: int | None = None
+    task_type: str | None = None
+
+
 class ExtractByLlm(op.FunctionSpec):
     """Extract information from a text using a LLM."""
 
 
@@ -1,5 +1,5 @@
 use crate::llm::{
-    LlmClient, LlmGenerateRequest, LlmGenerateResponse, OutputFormat, ToJsonSchemaOptions,
+    LlmGenerateRequest, LlmGenerateResponse, LlmGenerationClient, OutputFormat, ToJsonSchemaOptions,
 };
 use anyhow::{Context, Result, bail};
 use async_trait::async_trait;
@@ -31,7 +31,7 @@ impl Client {
 }
 
 #[async_trait]
-impl LlmClient for Client {
+impl LlmGenerationClient for Client {
     async fn generate<'req>(
         &self,
         request: LlmGenerateRequest<'req>,
 
@@ -1,9 +1,10 @@
-use crate::api_bail;
+use crate::prelude::*;
+
 use crate::llm::{
-    LlmClient, LlmGenerateRequest, LlmGenerateResponse, OutputFormat, ToJsonSchemaOptions,
+    LlmEmbeddingClient, LlmGenerateRequest, LlmGenerateResponse, LlmGenerationClient, OutputFormat,
+    ToJsonSchemaOptions,
 };
-use anyhow::{Context, Result, bail};
-use async_trait::async_trait;
+use phf::phf_map;
 use serde_json::Value;
 use urlencoding::encode;
 
@@ -13,7 +14,7 @@ pub struct Client {
 }
 
 impl Client {
-    pub async fn new(address: Option<String>) -> Result<Self> {
+    pub fn new(address: Option<String>) -> Result<Self> {
         if address.is_some() {
             api_bail!("Gemini doesn't support custom API address");
         }
@@ -46,8 +47,19 @@ fn remove_additional_properties(value: &mut Value) {
     }
 }
 
+impl Client {
+    fn get_api_url(&self, model: &str, api_name: &str) -> String {
+        format!(
+            "https://generativelanguage.googleapis.com/v1beta/models/{}:{}?key={}",
+            encode(model),
+            api_name,
+            encode(&self.api_key)
+        )
+    }
+}
+
 #[async_trait]
-impl LlmClient for Client {
+impl LlmGenerationClient for Client {
     async fn generate<'req>(
         &self,
         request: LlmGenerateRequest<'req>,
@@ -76,21 +88,21 @@ impl LlmClient for Client {
             });
         }
 
-        let api_key = &self.api_key;
-        let url = format!(
-            "https://generativelanguage.googleapis.com/v1beta/models/{}:generateContent?key={}",
-            encode(request.model),
-            encode(api_key)
-        );
-
+        let url = self.get_api_url(request.model, "generateContent");
         let resp = self
             .client
             .post(&url)
             .json(&payload)
             .send()
             .await
             .context("HTTP error")?;
-
+        if !resp.status().is_success() {
+            bail!(
+                "Gemini API error: {:?}\n{}\n",
+                resp.status(),
+                resp.text().await?
+            );
+        }
         let resp_json: Value = resp.json().await.context("Invalid JSON")?;
 
         if let Some(error) = resp_json.get("error") {
@@ -114,3 +126,57 @@ impl LlmClient for Client {
         }
     }
 }
+
+static DEFAULT_EMBEDDING_DIMENSIONS: phf::Map<&str, u32> = phf_map! {
+    "gemini-embedding-exp-03-07" => 3072,
+    "text-embedding-004" => 768,
+    "embedding-001" => 768,
+};
+
+#[derive(Deserialize)]
+struct ContentEmbedding {
+    values: Vec<f32>,
+}
+#[derive(Deserialize)]
+struct EmbedContentResponse {
+    embedding: ContentEmbedding,
+}
+
+#[async_trait]
+impl LlmEmbeddingClient for Client {
+    async fn embed_text<'req>(
+        &self,
+        request: super::LlmEmbeddingRequest<'req>,
+    ) -> Result<super::LlmEmbeddingResponse> {
+        let url = self.get_api_url(request.model, "embedContent");
+        let mut payload = serde_json::json!({
+            "model": request.model,
+            "content": { "parts": [{ "text": request.text }] },
+        });
+        if let Some(task_type) = request.task_type {
+            payload["taskType"] = serde_json::Value::String(task_type.into());
+        }
+        let resp = self
+            .client
+            .post(&url)
+            .json(&payload)
+            .send()
+            .await
+            .context("HTTP error")?;
+        if !resp.status().is_success() {
+            bail!(
+                "Gemini API error: {:?}\n{}\n",
+                resp.status(),
+                resp.text().await?
+            );
+        }
+        let embedding_resp: EmbedContentResponse = resp.json().await.context("Invalid JSON")?;
+        Ok(super::LlmEmbeddingResponse {
+            embedding: embedding_resp.embedding.values,
+        })
+    }
+
+    fn get_default_embedding_dimension(&self, model: &str) -> Option<u32> {
+        DEFAULT_EMBEDDING_DIMENSIONS.get(model).copied()
+    }
+}
@@ -1,13 +1,10 @@
-use std::borrow::Cow;
-
-use anyhow::Result;
-use async_trait::async_trait;
-use schemars::schema::SchemaObject;
-use serde::{Deserialize, Serialize};
+use crate::prelude::*;
 
 use crate::base::json_schema::ToJsonSchemaOptions;
+use schemars::schema::SchemaObject;
+use std::borrow::Cow;
 
-#[derive(Debug, Clone, Serialize, Deserialize)]
+#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
 pub enum LlmApiType {
     Ollama,
     OpenAi,
@@ -46,7 +43,7 @@ pub struct LlmGenerateResponse {
 }
 
 #[async_trait]
-pub trait LlmClient: Send + Sync {
+pub trait LlmGenerationClient: Send + Sync {
     async fn generate<'req>(
         &self,
         request: LlmGenerateRequest<'req>,
@@ -55,6 +52,28 @@ pub trait LlmClient: Send + Sync {
     fn json_schema_options(&self) -> ToJsonSchemaOptions;
 }
 
+#[derive(Debug)]
+pub struct LlmEmbeddingRequest<'a> {
+    pub model: &'a str,
+    pub text: Cow<'a, str>,
+    pub output_dimension: u32,
+    pub task_type: Option<Cow<'a, str>>,
+}
+
+pub struct LlmEmbeddingResponse {
+    pub embedding: Vec<f32>,
+}
+
+#[async_trait]
+pub trait LlmEmbeddingClient: Send + Sync {
+    async fn embed_text<'req>(
+        &self,
+        request: LlmEmbeddingRequest<'req>,
+    ) -> Result<LlmEmbeddingResponse>;
+
+    fn get_default_embedding_dimension(&self, model: &str) -> Option<u32>;
+}
+
 mod anthropic;
 mod gemini;
 mod litellm;
@@ -65,20 +84,41 @@ mod openrouter;
 pub async fn new_llm_generation_client(
     api_type: LlmApiType,
     address: Option<String>,
-) -> Result<Box<dyn LlmClient>> {
+) -> Result<Box<dyn LlmGenerationClient>> {
     let client = match api_type {
-        LlmApiType::Ollama => Box::new(ollama::Client::new(address).await?) as Box<dyn LlmClient>,
-        LlmApiType::OpenAi => Box::new(openai::Client::new(address).await?) as Box<dyn LlmClient>,
-        LlmApiType::Gemini => Box::new(gemini::Client::new(address).await?) as Box<dyn LlmClient>,
+        LlmApiType::Ollama => {
+            Box::new(ollama::Client::new(address).await?) as Box<dyn LlmGenerationClient>
+        }
+        LlmApiType::OpenAi => {
+            Box::new(openai::Client::new(address)?) as Box<dyn LlmGenerationClient>
+        }
+        LlmApiType::Gemini => {
+            Box::new(gemini::Client::new(address)?) as Box<dyn LlmGenerationClient>
+        }
         LlmApiType::Anthropic => {
-            Box::new(anthropic::Client::new(address).await?) as Box<dyn LlmClient>
+            Box::new(anthropic::Client::new(address).await?) as Box<dyn LlmGenerationClient>
         }
         LlmApiType::LiteLlm => {
-            Box::new(litellm::Client::new_litellm(address).await?) as Box<dyn LlmClient>
+            Box::new(litellm::Client::new_litellm(address).await?) as Box<dyn LlmGenerationClient>
+        }
+        LlmApiType::OpenRouter => Box::new(openrouter::Client::new_openrouter(address).await?)
+            as Box<dyn LlmGenerationClient>,
+    };
+    Ok(client)
+}
+
+pub fn new_llm_embedding_client(
+    api_type: LlmApiType,
+    address: Option<String>,
+) -> Result<Box<dyn LlmEmbeddingClient>> {
+    let client = match api_type {
+        LlmApiType::Gemini => {
+            Box::new(gemini::Client::new(address)?) as Box<dyn LlmEmbeddingClient>
         }
-        LlmApiType::OpenRouter => {
-            Box::new(openrouter::Client::new_openrouter(address).await?) as Box<dyn LlmClient>
+        LlmApiType::OpenAi => {
+            Box::new(openai::Client::new(address)?) as Box<dyn LlmEmbeddingClient>
         }
+        _ => api_bail!("Embedding is not supported for API type {:?}", api_type),
     };
     Ok(client)
 }
@@ -1,4 +1,4 @@
-use super::LlmClient;
+use super::LlmGenerationClient;
 use anyhow::Result;
 use async_trait::async_trait;
 use schemars::schema::SchemaObject;
@@ -45,7 +45,7 @@ impl Client {
 }
 
 #[async_trait]
-impl LlmClient for Client {
+impl LlmGenerationClient for Client {
     async fn generate<'req>(
         &self,
         request: super::LlmGenerateRequest<'req>,
 
@@ -1,18 +1,19 @@
 use crate::api_bail;
 
-use super::LlmClient;
+use super::{LlmEmbeddingClient, LlmGenerationClient};
 use anyhow::Result;
 use async_openai::{
     Client as OpenAIClient,
     config::OpenAIConfig,
     types::{
         ChatCompletionRequestMessage, ChatCompletionRequestSystemMessage,
         ChatCompletionRequestSystemMessageContent, ChatCompletionRequestUserMessage,
-        ChatCompletionRequestUserMessageContent, CreateChatCompletionRequest, ResponseFormat,
-        ResponseFormatJsonSchema,
+        ChatCompletionRequestUserMessageContent, CreateChatCompletionRequest,
+        CreateEmbeddingRequest, EmbeddingInput, ResponseFormat, ResponseFormatJsonSchema,
     },
 };
 use async_trait::async_trait;
+use phf::phf_map;
 
 pub struct Client {
     client: async_openai::Client<OpenAIConfig>,
@@ -23,7 +24,7 @@ impl Client {
         Self { client }
     }
 
-    pub async fn new(address: Option<String>) -> Result<Self> {
+    pub fn new(address: Option<String>) -> Result<Self> {
         if let Some(address) = address {
             api_bail!("OpenAI doesn't support custom API address: {address}");
         }
@@ -39,7 +40,7 @@ impl Client {
 }
 
 #[async_trait]
-impl LlmClient for Client {
+impl LlmGenerationClient for Client {
     async fn generate<'req>(
         &self,
         request: super::LlmGenerateRequest<'req>,
@@ -109,3 +110,40 @@ impl LlmClient for Client {
         }
     }
 }
+
+static DEFAULT_EMBEDDING_DIMENSIONS: phf::Map<&str, u32> = phf_map! {
+    "text-embedding-3-small" => 1536,
+    "text-embedding-3-large" => 3072,
+    "text-embedding-ada-002" => 1536,
+};
+
+#[async_trait]
+impl LlmEmbeddingClient for Client {
+    async fn embed_text<'req>(
+        &self,
+        request: super::LlmEmbeddingRequest<'req>,
+    ) -> Result<super::LlmEmbeddingResponse> {
+        let response = self
+            .client
+            .embeddings()
+            .create(CreateEmbeddingRequest {
+                model: request.model.to_string(),
+                input: EmbeddingInput::String(request.text.to_string()),
+                dimensions: Some(request.output_dimension),
+                ..Default::default()
+            })
+            .await?;
+        Ok(super::LlmEmbeddingResponse {
+            embedding: response
+                .data
+                .into_iter()
+                .next()
+                .ok_or_else(|| anyhow::anyhow!("No embedding returned from OpenAI"))?
+                .embedding,
+        })
+    }
+
+    fn get_default_embedding_dimension(&self, model: &str) -> Option<u32> {
+        DEFAULT_EMBEDDING_DIMENSIONS.get(model).copied()
+    }
+}