Skip to content

Commit 239ff82

Browse files
jairad26github-actions[bot]
authored andcommitted
[BUG] is_default_schema does not do a space check for defaults or #embedding (#5787)
## Description of changes _Summarize the changes made by this PR._ - Improvements & Bug fixes - There is a bug in schema where is_default_schema does not check the space when confirming if the schema is default. This was hidden by our e2e tests, since we used an ef with a default space of cosine. therefore the configuration was cosine and was automatically converted into the schema and tests passed. this PR adds a new test to confirm the basic space setting scenario works - New functionality - ... ## Test plan _How are these changes tested?_ - [ ] Tests pass locally with `pytest` for python, `yarn test` for js, `cargo test` for rust ## Migration plan _Are there any migrations, or any forwards/backwards compatibility changes needed in order to make sure this change deploys reliably?_ ## Observability plan _What is the plan to instrument and monitor this change?_ ## Documentation Changes _Are all docstrings for user-facing APIs updated if required? Do we need to make documentation changes in the [docs section](https://github.com/chroma-core/chroma/tree/main/docs/docs.trychroma.com)?_
1 parent 3038d08 commit 239ff82

File tree

2 files changed

+124
-1
lines changed

2 files changed

+124
-1
lines changed

chromadb/test/api/test_schema_e2e.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,64 @@ def test_schema_vector_config_persistence(
105105

106106
collection_name = f"schema_spann_{uuid4().hex}"
107107

108+
schema = Schema()
109+
schema.create_index(
110+
config=VectorIndexConfig(
111+
space="cosine",
112+
spann=SpannIndexConfig(
113+
search_nprobe=16,
114+
write_nprobe=32,
115+
ef_construction=120,
116+
max_neighbors=24,
117+
),
118+
)
119+
)
120+
121+
collection = client.get_or_create_collection(
122+
name=collection_name,
123+
schema=schema,
124+
)
125+
126+
persisted_schema = collection.schema
127+
assert persisted_schema is not None
128+
129+
print(persisted_schema.serialize_to_json())
130+
131+
embedding_override = persisted_schema.keys["#embedding"].float_list
132+
assert embedding_override is not None
133+
vector_index = embedding_override.vector_index
134+
assert vector_index is not None
135+
assert vector_index.enabled is True
136+
assert vector_index.config is not None
137+
assert vector_index.config.space is not None
138+
assert vector_index.config.space == "cosine"
139+
140+
client_reloaded = client_factories.create_client_from_system()
141+
reloaded_collection = client_reloaded.get_collection(
142+
name=collection_name,
143+
)
144+
145+
reloaded_schema = reloaded_collection.schema
146+
assert reloaded_schema is not None
147+
reloaded_embedding_override = reloaded_schema.keys["#embedding"].float_list
148+
assert reloaded_embedding_override is not None
149+
reloaded_vector_index = reloaded_embedding_override.vector_index
150+
assert reloaded_vector_index is not None
151+
assert reloaded_vector_index.config is not None
152+
assert reloaded_vector_index.config.space is not None
153+
assert reloaded_vector_index.config.space == "cosine"
154+
155+
156+
def test_schema_vector_config_persistence_with_ef(
157+
client_factories: "ClientFactories",
158+
) -> None:
159+
"""Ensure schema-provided SPANN settings persist across client restarts."""
160+
161+
client = client_factories.create_client_from_system()
162+
client.reset()
163+
164+
collection_name = f"schema_spann_{uuid4().hex}"
165+
108166
schema = Schema()
109167
embedding_function = SimpleEmbeddingFunction(dim=6)
110168
schema.create_index(

rust/types/src/collection_schema.rs

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1547,8 +1547,13 @@ impl Schema {
15471547
if vector_index.enabled {
15481548
return false;
15491549
}
1550+
if !is_embedding_function_default(&vector_index.config.embedding_function) {
1551+
return false;
1552+
}
1553+
if !is_space_default(&vector_index.config.space) {
1554+
return false;
1555+
}
15501556
// Check that the config has default structure
1551-
// We allow space and embedding_function to vary, but check structure
15521557
if vector_index.config.source_key.is_some() {
15531558
return false;
15541559
}
@@ -1613,6 +1618,9 @@ impl Schema {
16131618
if !vector_index.enabled {
16141619
return false;
16151620
}
1621+
if !is_space_default(&vector_index.config.space) {
1622+
return false;
1623+
}
16161624
// Check that embedding_function is default
16171625
if !is_embedding_function_default(&vector_index.config.embedding_function) {
16181626
return false;
@@ -3801,6 +3809,63 @@ mod tests {
38013809
assert!(!schema_with_extra_overrides.is_default());
38023810
}
38033811

3812+
#[test]
3813+
fn test_is_schema_default_with_space() {
3814+
let schema = Schema::new_default(KnnIndex::Hnsw);
3815+
assert!(schema.is_default());
3816+
3817+
let mut schema_with_space = Schema::new_default(KnnIndex::Hnsw);
3818+
if let Some(ref mut float_list) = schema_with_space.defaults.float_list {
3819+
if let Some(ref mut vector_index) = float_list.vector_index {
3820+
vector_index.config.space = Some(Space::Cosine);
3821+
}
3822+
}
3823+
assert!(!schema_with_space.is_default());
3824+
3825+
let mut schema_with_space_in_embedding_key = Schema::new_default(KnnIndex::Spann);
3826+
if let Some(ref mut embedding_key) = schema_with_space_in_embedding_key
3827+
.keys
3828+
.get_mut(EMBEDDING_KEY)
3829+
{
3830+
if let Some(ref mut float_list) = embedding_key.float_list {
3831+
if let Some(ref mut vector_index) = float_list.vector_index {
3832+
vector_index.config.space = Some(Space::Cosine);
3833+
}
3834+
}
3835+
}
3836+
assert!(!schema_with_space_in_embedding_key.is_default());
3837+
}
3838+
3839+
#[test]
3840+
fn test_is_schema_default_with_embedding_function() {
3841+
let schema = Schema::new_default(KnnIndex::Hnsw);
3842+
assert!(schema.is_default());
3843+
3844+
let mut schema_with_embedding_function = Schema::new_default(KnnIndex::Hnsw);
3845+
if let Some(ref mut float_list) = schema_with_embedding_function.defaults.float_list {
3846+
if let Some(ref mut vector_index) = float_list.vector_index {
3847+
vector_index.config.embedding_function =
3848+
Some(EmbeddingFunctionConfiguration::Legacy);
3849+
}
3850+
}
3851+
assert!(!schema_with_embedding_function.is_default());
3852+
3853+
let mut schema_with_embedding_function_in_embedding_key =
3854+
Schema::new_default(KnnIndex::Spann);
3855+
if let Some(ref mut embedding_key) = schema_with_embedding_function_in_embedding_key
3856+
.keys
3857+
.get_mut(EMBEDDING_KEY)
3858+
{
3859+
if let Some(ref mut float_list) = embedding_key.float_list {
3860+
if let Some(ref mut vector_index) = float_list.vector_index {
3861+
vector_index.config.embedding_function =
3862+
Some(EmbeddingFunctionConfiguration::Legacy);
3863+
}
3864+
}
3865+
}
3866+
assert!(!schema_with_embedding_function_in_embedding_key.is_default());
3867+
}
3868+
38043869
#[test]
38053870
fn test_add_merges_keys_by_value_type() {
38063871
let mut schema_a = Schema::new_default(KnnIndex::Hnsw);

0 commit comments

Comments
 (0)