Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
114 commits
Select commit Hold shift + click to select a range
5b33175
Add union type and JSON schema conversion
chardoncs May 7, 2025
a7f7e85
Add basic postgres conversion
chardoncs May 7, 2025
7b959b9
Compact lines
chardoncs May 7, 2025
6b6aae9
Workaround for JSON conversion in union
chardoncs May 7, 2025
bb18d97
Add stub impl for Python union conversion
chardoncs May 7, 2025
a2b5906
Add impl for python object union conversion
chardoncs May 8, 2025
0e19865
Update error message
chardoncs May 8, 2025
4cbd632
Rename type item
chardoncs May 8, 2025
69cc231
Add str parsing method
chardoncs May 8, 2025
3b386b6
Add union conversion for Qdrant
chardoncs May 8, 2025
159e8e3
Merge branch 'cocoindex-io:main' into expr-union-type-impl
chardoncs May 8, 2025
606d26e
Add basic string parsing for union type
chardoncs May 8, 2025
c68fd2a
Fix union conversion for Qdrant
chardoncs May 8, 2025
839ee85
Merge branch 'cocoindex-io:main' into expr-union-type-impl
chardoncs May 9, 2025
4752970
Replace if guards with matches
chardoncs May 9, 2025
b16d0f6
Add extra parsing for string
chardoncs May 9, 2025
c404388
Add rustdoc for parsing method
chardoncs May 9, 2025
312ef53
Turn string parsing into a util function
chardoncs May 9, 2025
c2ebd88
Update union parsing for serde value
chardoncs May 9, 2025
703565b
Add vector union type parsing for Qdrant
chardoncs May 9, 2025
d2d02b0
Switch to BTreeSet for union types
chardoncs May 9, 2025
72537b8
Remove nested union detection
chardoncs May 9, 2025
f837121
Remove TODO: Support struct/table
chardoncs May 9, 2025
2785e1c
Add union type helper struct
chardoncs May 9, 2025
a1f47a6
Add comments
chardoncs May 9, 2025
cca7d1f
Update Python type conversion for union type
chardoncs May 9, 2025
f9e9bec
Use reversed iteration for union type matching
chardoncs May 9, 2025
189a52d
Merge branch 'cocoindex-io:main' into expr-union-type-impl
chardoncs May 10, 2025
b1e1084
Merge branch 'cocoindex-io:main' into expr-union-type-impl
chardoncs May 11, 2025
4ec510d
Add test cases for union fmt
chardoncs May 11, 2025
9a31d2e
Update comments
chardoncs May 11, 2025
c168dcc
Add test cases
chardoncs May 11, 2025
2e06096
Remove "undetected JSON" parsing
chardoncs May 11, 2025
1e9d74c
Merge branch 'cocoindex-io:main' into expr-union-type-impl
chardoncs May 12, 2025
780d154
Merge branch 'cocoindex-io:main' into expr-union-type-impl
chardoncs May 13, 2025
0ffe380
Merge branch 'cocoindex-io:main' into expr-union-type-impl
chardoncs May 14, 2025
f384c16
Merge branch 'cocoindex-io:main' into expr-union-type-impl
chardoncs May 15, 2025
4463e52
Merge branch 'cocoindex-io:main' into expr-union-type-impl
chardoncs May 15, 2025
69a0280
Merge branch 'cocoindex-io:main' into expr-union-type-impl
chardoncs May 16, 2025
1d51d5c
Update union analysis in Python API
chardoncs May 16, 2025
efa1861
Add union type encoding for Python API
chardoncs May 16, 2025
6294ecf
Add single type checking for union type analysis
chardoncs May 16, 2025
fd37218
Merge branch 'cocoindex-io:main' into expr-union-type-impl
chardoncs May 17, 2025
052815a
Merge branch 'cocoindex-io:main' into expr-union-type-impl
chardoncs May 18, 2025
66a09ec
Update union type
chardoncs May 18, 2025
385487f
Merge branch 'cocoindex-io:main' into expr-union-type-impl
chardoncs May 18, 2025
f8eb3cc
Add union decoding
chardoncs May 18, 2025
bdd4b16
Revert "Add union decoding"
chardoncs May 18, 2025
534c791
Update encoded type field
chardoncs May 18, 2025
e8b972e
Merge branch 'cocoindex-io:main' into expr-union-type-impl
chardoncs May 19, 2025
beaa1c1
Update union types field in Python
chardoncs May 19, 2025
39b6039
Merge branch 'cocoindex-io:main' into expr-union-type-impl
chardoncs May 20, 2025
fa2dd14
Merge branch 'main' into expr-union-type-impl
chardoncs May 21, 2025
030281c
Update type serialization
chardoncs May 21, 2025
a037d9a
Revert "Update type serialization"
chardoncs May 22, 2025
224cb5b
Merge branch 'main' into expr-union-type-impl
chardoncs May 22, 2025
d4916d2
Add `UnionVariant` and conversions in `BasicValue`
chardoncs May 22, 2025
8869002
Merge branch 'main' into expr-union-type-impl
chardoncs May 23, 2025
7f08070
Add union value binding for Postgres
chardoncs May 23, 2025
0571e8c
Update type guessing for union from python object
chardoncs May 23, 2025
edeabe7
Replace direct return with break
chardoncs May 23, 2025
d194117
Use `Vec` to remove auto-sort
chardoncs May 25, 2025
fe4941b
Revert "Use `Vec` to remove auto-sort"
chardoncs May 25, 2025
0850cdc
Merge branch 'main' into expr-union-type-impl
chardoncs May 25, 2025
2775b28
Merge branch 'main' into expr-union-type-impl
chardoncs May 27, 2025
d649e0a
Merge branch 'main' into expr-union-type-impl
chardoncs May 29, 2025
9677169
Use `Vec` for union type
chardoncs May 29, 2025
bf2811c
Add union processing for KuzuDB
chardoncs May 29, 2025
a5f6c6c
Update Cypher generation for union type
chardoncs May 29, 2025
d48f6c5
Use 0-based index for `val{i}`
chardoncs May 29, 2025
abb920e
Update tuple
chardoncs Jun 2, 2025
09764e2
Merge branch 'main' into expr-union-type-impl
chardoncs Jun 2, 2025
b246485
Take values for JSON conversion for union
chardoncs Jun 2, 2025
34d0ca3
Update variable name
chardoncs Jun 2, 2025
d029def
Use typed value conversion for union in Postgres
chardoncs Jun 2, 2025
ce45aaa
Replace union conversion with error in `from_pg_value()`
chardoncs Jun 2, 2025
2c7d106
Update union conversion for Qdrant
chardoncs Jun 2, 2025
1a78b48
Update `PyErr` message for union
chardoncs Jun 2, 2025
e8ed867
Move `UnionType` to `schema.rs` as `UnionTypeSchema`
chardoncs Jun 2, 2025
f577da7
Use `to_value()` for union value conversion
chardoncs Jun 2, 2025
59f4bce
Use `bail!()` for early return
chardoncs Jun 2, 2025
70d2010
Update error message for union tuple conversion
chardoncs Jun 2, 2025
a913522
Merge branch 'main' into expr-union-type-impl
chardoncs Jun 2, 2025
dd0d48f
Merge branch 'main' into expr-union-type-impl
chardoncs Jun 3, 2025
549dc30
Move union type checking to the loop
chardoncs Jun 3, 2025
240cf16
Replace `.ok_or_else()` with `.unwrap()`
chardoncs Jun 3, 2025
4832227
Update union variant serialization
chardoncs Jun 3, 2025
5c48526
Merge branch 'main' into expr-union-type-impl
chardoncs Jun 5, 2025
2795d4b
Merge branch 'main' into expr-union-type-impl
chardoncs Jun 11, 2025
af45e67
Merge branch 'main' into expr-union-type-impl
chardoncs Jun 12, 2025
d093571
Match quote styling
chardoncs Jun 12, 2025
866865c
Break infinite loops
chardoncs Jun 12, 2025
6de65b5
Added a union test case
chardoncs Jun 12, 2025
4bcc53d
Fix union typing
chardoncs Jun 12, 2025
4157ff0
Merge branch 'main' into expr-union-type-impl
chardoncs Jun 14, 2025
64cf435
Make `union_variant_types` optional
chardoncs Jun 14, 2025
e873f65
Merge branch 'main' into expr-union-type-impl
chardoncs Jun 14, 2025
ae6e5bc
Update test case
chardoncs Jun 14, 2025
d1205cf
Fix JSON seder and decoding
chardoncs Jun 14, 2025
f748cda
Add UUID union test cases
chardoncs Jun 14, 2025
66b92b5
Add test cases for union type
chardoncs Jun 14, 2025
4df4518
Merge branch 'main' into expr-union-type-impl
chardoncs Jun 15, 2025
e76f8e5
Update the offset datetime test case for unions
chardoncs Jun 15, 2025
047978e
Merge branch 'main' into expr-union-type-impl
chardoncs Jun 16, 2025
0e68c71
Remove union implementation for Kuzu
chardoncs Jun 16, 2025
54cb4eb
Merge branch 'main' into expr-union-type-impl
chardoncs Jun 17, 2025
cae045c
Merge branch 'main' into expr-union-type-impl
chardoncs Jun 18, 2025
986e5fc
Update `union_variant_types` typing
chardoncs Jun 18, 2025
dbe8c41
Merge branch 'main' into expr-union-type-impl
chardoncs Jun 19, 2025
02b7e2a
Add union value serialization for `TypedValue`
chardoncs Jun 19, 2025
b236de8
Merge branch 'main' into expr-union-type-impl
chardoncs Jun 20, 2025
5cc0fca
Update union tuple check for basic value
chardoncs Jun 20, 2025
bb75e46
Reformat files
chardoncs Jun 20, 2025
77096bc
Remove explicit type for array, the type is obvious
chardoncs Jun 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions python/cocoindex/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,9 @@ def decode_vector(value: Any) -> Any | None:

return decode_vector

if src_type_kind == "Union":
return lambda value: value[1]

return lambda value: value


Expand Down
42 changes: 42 additions & 0 deletions python/cocoindex/tests/test_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,6 +549,48 @@ def test_field_position_cases(
assert decoder(engine_val) == PythonOrder(**expected_dict)


def test_roundtrip_union_simple() -> None:
t = int | str | float
value = 10.4
validate_full_roundtrip(value, t)


def test_roundtrip_union_with_active_uuid() -> None:
t = str | uuid.UUID | int
value = uuid.uuid4().bytes
validate_full_roundtrip(value, t)


def test_roundtrip_union_with_inactive_uuid() -> None:
t = str | uuid.UUID | int
value = "5a9f8f6a-318f-4f1f-929d-566d7444a62d" # it's a string
validate_full_roundtrip(value, t)


def test_roundtrip_union_offset_datetime() -> None:
t = str | uuid.UUID | float | int | datetime.datetime
value = datetime.datetime.now(datetime.UTC)
validate_full_roundtrip(value, t)


def test_roundtrip_union_date() -> None:
t = str | uuid.UUID | float | int | datetime.date
value = datetime.date.today()
validate_full_roundtrip(value, t)


def test_roundtrip_union_time() -> None:
t = str | uuid.UUID | float | int | datetime.time
value = datetime.time()
validate_full_roundtrip(value, t)


def test_roundtrip_union_timedelta() -> None:
t = str | uuid.UUID | float | int | datetime.timedelta
value = datetime.timedelta(hours=39, minutes=10, seconds=1)
validate_full_roundtrip(value, t)


def test_roundtrip_ltable() -> None:
t = list[Order]
value = [Order("O1", "item1", 10.0), Order("O2", "item2", 20.0)]
Expand Down
41 changes: 29 additions & 12 deletions python/cocoindex/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ class AnalyzedTypeInfo:

attrs: dict[str, Any] | None
nullable: bool = False
union_variant_types: typing.List[ElementType] | None = None # For Union


def analyze_type_info(t: Any) -> AnalyzedTypeInfo:
Expand All @@ -181,18 +182,6 @@ def analyze_type_info(t: Any) -> AnalyzedTypeInfo:
if base_type is Annotated:
annotations = t.__metadata__
t = t.__origin__
elif base_type is types.UnionType:
possible_types = typing.get_args(t)
non_none_types = [
arg for arg in possible_types if arg not in (None, types.NoneType)
]
if len(non_none_types) != 1:
raise ValueError(
f"Expect exactly one non-None choice for Union type, but got {len(non_none_types)}: {t}"
)
t = non_none_types[0]
if len(possible_types) > 1:
nullable = True
else:
break

Expand All @@ -211,6 +200,7 @@ def analyze_type_info(t: Any) -> AnalyzedTypeInfo:

struct_type: type | None = None
elem_type: ElementType | None = None
union_variant_types: typing.List[ElementType] | None = None
key_type: type | None = None
np_number_type: type | None = None
if _is_struct_type(t):
Expand Down Expand Up @@ -251,6 +241,24 @@ def analyze_type_info(t: Any) -> AnalyzedTypeInfo:
args = typing.get_args(t)
elem_type = (args[0], args[1])
kind = "KTable"
elif base_type is types.UnionType:
possible_types = typing.get_args(t)
non_none_types = [
arg for arg in possible_types if arg not in (None, types.NoneType)
]

if len(non_none_types) == 0:
return analyze_type_info(None)

nullable = len(non_none_types) < len(possible_types)

if len(non_none_types) == 1:
result = analyze_type_info(non_none_types[0])
result.nullable = nullable
return result

kind = "Union"
union_variant_types = non_none_types
elif kind is None:
if t is bytes:
kind = "Bytes"
Expand Down Expand Up @@ -279,6 +287,7 @@ def analyze_type_info(t: Any) -> AnalyzedTypeInfo:
kind=kind,
vector_info=vector_info,
elem_type=elem_type,
union_variant_types=union_variant_types,
key_type=key_type,
struct_type=struct_type,
np_number_type=np_number_type,
Expand Down Expand Up @@ -338,6 +347,14 @@ def _encode_type(type_info: AnalyzedTypeInfo) -> dict[str, Any]:
encoded_type["element_type"] = _encode_type(elem_type_info)
encoded_type["dimension"] = type_info.vector_info.dim

elif type_info.kind == "Union":
if type_info.union_variant_types is None:
raise ValueError("Union type must have a variant type list")
encoded_type["types"] = [
_encode_type(analyze_type_info(typ))
for typ in type_info.union_variant_types
]

elif type_info.kind in TABLE_TYPES:
if type_info.elem_type is None:
raise ValueError(f"{type_info.kind} type must have an element type")
Expand Down
12 changes: 12 additions & 0 deletions src/base/json_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use crate::prelude::*;
use crate::utils::immutable::RefList;
use schemars::schema::{
ArrayValidation, InstanceType, ObjectValidation, Schema, SchemaObject, SingleOrVec,
SubschemaValidation,
};
use std::fmt::Write;

Expand Down Expand Up @@ -176,6 +177,17 @@ impl JsonSchemaBuilder {
..Default::default()
}));
}
schema::BasicValueType::Union(s) => {
schema.subschemas = Some(Box::new(SubschemaValidation {
one_of: Some(
s.types
.iter()
.map(|t| Schema::Object(self.for_basic_value_type(t, field_path)))
.collect(),
),
..Default::default()
}));
}
}
schema
}
Expand Down
19 changes: 19 additions & 0 deletions src/base/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ pub struct VectorTypeSchema {
pub dimension: Option<usize>,
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct UnionTypeSchema {
pub types: Vec<BasicValueType>,
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(tag = "kind")]
pub enum BasicValueType {
Expand Down Expand Up @@ -56,6 +61,9 @@ pub enum BasicValueType {

/// A vector of values (usually numbers, for embeddings).
Vector(VectorTypeSchema),

/// A union
Union(UnionTypeSchema),
}

impl std::fmt::Display for BasicValueType {
Expand All @@ -82,6 +90,17 @@ impl std::fmt::Display for BasicValueType {
}
write!(f, "]")
}
BasicValueType::Union(s) => {
write!(f, "Union[")?;
for (i, typ) in s.types.iter().enumerate() {
if i > 0 {
// Add type delimiter
write!(f, " | ")?;
}
write!(f, "{}", typ)?;
}
write!(f, "]")
}
}
}
}
Expand Down
63 changes: 60 additions & 3 deletions src/base/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,10 @@ pub enum BasicValue {
TimeDelta(chrono::Duration),
Json(Arc<serde_json::Value>),
Vector(Arc<[BasicValue]>),
UnionVariant {
tag_id: usize,
value: Box<BasicValue>,
},
}

impl From<Bytes> for BasicValue {
Expand Down Expand Up @@ -496,7 +500,8 @@ impl BasicValue {
| BasicValue::OffsetDateTime(_)
| BasicValue::TimeDelta(_)
| BasicValue::Json(_)
| BasicValue::Vector(_) => api_bail!("invalid key value type"),
| BasicValue::Vector(_)
| BasicValue::UnionVariant { .. } => api_bail!("invalid key value type"),
};
Ok(result)
}
Expand All @@ -517,7 +522,8 @@ impl BasicValue {
| BasicValue::OffsetDateTime(_)
| BasicValue::TimeDelta(_)
| BasicValue::Json(_)
| BasicValue::Vector(_) => api_bail!("invalid key value type"),
| BasicValue::Vector(_)
| BasicValue::UnionVariant { .. } => api_bail!("invalid key value type"),
};
Ok(result)
}
Expand All @@ -539,6 +545,7 @@ impl BasicValue {
BasicValue::TimeDelta(_) => "timedelta",
BasicValue::Json(_) => "json",
BasicValue::Vector(_) => "vector",
BasicValue::UnionVariant { .. } => "union",
}
}
}
Expand Down Expand Up @@ -892,6 +899,12 @@ impl serde::Serialize for BasicValue {
BasicValue::TimeDelta(v) => serializer.serialize_str(&v.to_string()),
BasicValue::Json(v) => v.serialize(serializer),
BasicValue::Vector(v) => v.serialize(serializer),
BasicValue::UnionVariant { tag_id, value } => {
let mut s = serializer.serialize_tuple(2)?;
s.serialize_element(tag_id)?;
s.serialize_element(value)?;
s.end()
}
}
}
}
Expand Down Expand Up @@ -956,6 +969,40 @@ impl BasicValue {
.collect::<Result<Vec<_>>>()?;
BasicValue::Vector(Arc::from(vec))
}
(v, BasicValueType::Union(typ)) => {
let arr = match v {
serde_json::Value::Array(arr) => arr,
_ => anyhow::bail!("Invalid JSON value for union, expect array"),
};

if arr.len() != 2 {
anyhow::bail!(
"Invalid union tuple: expect 2 values, received {}",
arr.len()
);
}

let mut obj_iter = arr.into_iter();

// Take first element
let tag_id = obj_iter
.next()
.and_then(|value| value.as_u64().map(|num_u64| num_u64 as usize))
.unwrap();

// Take second element
let value = obj_iter.next().unwrap();

let cur_type = typ
.types
.get(tag_id)
.ok_or_else(|| anyhow::anyhow!("No type in `tag_id` \"{tag_id}\" found"))?;

BasicValue::UnionVariant {
tag_id,
value: Box::new(BasicValue::from_json(value, cur_type)?),
}
}
(v, t) => {
anyhow::bail!("Value and type not matched.\nTarget type {t:?}\nJSON value: {v}\n")
}
Expand Down Expand Up @@ -1088,7 +1135,17 @@ impl Serialize for TypedValue<'_> {
fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
match (self.t, self.v) {
(_, Value::Null) => serializer.serialize_none(),
(ValueType::Basic(_), v) => v.serialize(serializer),
(ValueType::Basic(t), v) => match t {
BasicValueType::Union(_) => match v {
Value::Basic(BasicValue::UnionVariant { value, .. }) => {
value.serialize(serializer)
}
_ => Err(serde::ser::Error::custom(
"Unmatched union type and value for `TypedValue`",
)),
},
_ => v.serialize(serializer),
},
(ValueType::Struct(s), Value::Struct(field_values)) => TypedFieldsValue {
schema: &s.fields,
values_iter: field_values.fields.iter(),
Expand Down
12 changes: 9 additions & 3 deletions src/llm/litellm.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,22 @@
use async_openai::config::OpenAIConfig;
use async_openai::Client as OpenAIClient;
use async_openai::config::OpenAIConfig;

pub use super::openai::Client;

impl Client {
pub async fn new_litellm(spec: super::LlmSpec) -> anyhow::Result<Self> {
let address = spec.address.clone().unwrap_or_else(|| "http://127.0.0.1:4000".to_string());
let address = spec
.address
.clone()
.unwrap_or_else(|| "http://127.0.0.1:4000".to_string());
let api_key = std::env::var("LITELLM_API_KEY").ok();
let mut config = OpenAIConfig::new().with_api_base(address);
if let Some(api_key) = api_key {
config = config.with_api_key(api_key);
}
Ok(Client::from_parts(OpenAIClient::with_config(config), spec.model))
Ok(Client::from_parts(
OpenAIClient::with_config(config),
spec.model,
))
}
}
9 changes: 3 additions & 6 deletions src/llm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ pub trait LlmGenerationClient: Send + Sync {

mod anthropic;
mod gemini;
mod litellm;
mod ollama;
mod openai;
mod litellm;
mod openrouter;

pub async fn new_llm_generation_client(spec: LlmSpec) -> Result<Box<dyn LlmGenerationClient>> {
Expand All @@ -78,11 +78,8 @@ pub async fn new_llm_generation_client(spec: LlmSpec) -> Result<Box<dyn LlmGener
LlmApiType::LiteLlm => {
Box::new(litellm::Client::new_litellm(spec).await?) as Box<dyn LlmGenerationClient>
}
LlmApiType::OpenRouter => {
Box::new(openrouter::Client::new_openrouter(spec).await?) as Box<dyn LlmGenerationClient>
}


LlmApiType::OpenRouter => Box::new(openrouter::Client::new_openrouter(spec).await?)
as Box<dyn LlmGenerationClient>,
};
Ok(client)
}
12 changes: 9 additions & 3 deletions src/llm/openrouter.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,22 @@
use async_openai::config::OpenAIConfig;
use async_openai::Client as OpenAIClient;
use async_openai::config::OpenAIConfig;

pub use super::openai::Client;

impl Client {
pub async fn new_openrouter(spec: super::LlmSpec) -> anyhow::Result<Self> {
let address = spec.address.clone().unwrap_or_else(|| "https://openrouter.ai/api/v1".to_string());
let address = spec
.address
.clone()
.unwrap_or_else(|| "https://openrouter.ai/api/v1".to_string());
let api_key = std::env::var("OPENROUTER_API_KEY").ok();
let mut config = OpenAIConfig::new().with_api_base(address);
if let Some(api_key) = api_key {
config = config.with_api_key(api_key);
}
Ok(Client::from_parts(OpenAIClient::with_config(config), spec.model))
Ok(Client::from_parts(
OpenAIClient::with_config(config),
spec.model,
))
}
}
Loading
Loading