Skip to content

Commit e0d238f

Browse files
committed
[BUG] schema: build default with config ef & knn_index, remove #document population in defaults
1 parent dfbb6c2 commit e0d238f

File tree

11 files changed

+766
-84
lines changed

11 files changed

+766
-84
lines changed

go/pkg/sysdb/coordinator/model/collection_configuration.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ type ValueTypes struct {
249249
SparseVector *SparseVectorValueType `json:"sparse_vector,omitempty"`
250250
Int *IntValueType `json:"int,omitempty"`
251251
Float *FloatValueType `json:"float,omitempty"`
252-
Boolean *BoolValueType `json:"boolean,omitempty"`
252+
Boolean *BoolValueType `json:"bool,omitempty"`
253253
}
254254

255255
type Schema struct {

go/pkg/sysdb/coordinator/model/collection_configuration_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ func TestUpdateSchemaFromConfig_HnswSuccess(t *testing.T) {
147147
"config": {}
148148
}
149149
},
150-
"boolean": {
150+
"bool": {
151151
"bool_inverted_index": {
152152
"enabled": true,
153153
"config": {}
@@ -312,7 +312,7 @@ func TestUpdateSchemaFromConfig_SpannSuccess(t *testing.T) {
312312
"config": {}
313313
}
314314
},
315-
"boolean": {
315+
"bool": {
316316
"bool_inverted_index": {
317317
"enabled": true,
318318
"config": {}
@@ -481,7 +481,7 @@ func TestUpdateSchemaFromConfig_EmbeddingFunction(t *testing.T) {
481481
"config": {}
482482
}
483483
},
484-
"boolean": {
484+
"bool": {
485485
"bool_inverted_index": {
486486
"enabled": true,
487487
"config": {}

go/pkg/sysdb/coordinator/table_catalog_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1555,7 +1555,7 @@ func TestUpdateCollection_WithSchema(t *testing.T) {
15551555
"config": {}
15561556
}
15571557
},
1558-
"boolean": {
1558+
"bool": {
15591559
"bool_inverted_index": {
15601560
"enabled": true,
15611561
"config": {}

rust/cli/src/commands/vacuum.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use chroma_segment::local_segment_manager::LocalSegmentManager;
1111
use chroma_sqlite::db::SqliteDb;
1212
use chroma_sysdb::SysDb;
1313
use chroma_system::System;
14-
use chroma_types::{CollectionUuid, KnnIndex, ListCollectionsRequest};
14+
use chroma_types::{CollectionUuid, KnnIndex, ListCollectionsRequest, Schema};
1515
use clap::Parser;
1616
use colored::Colorize;
1717
use dialoguer::Confirm;
@@ -108,10 +108,15 @@ async fn trigger_vector_segments_max_seq_id_migration(
108108
for collection_id in collection_ids {
109109
let mut collection = sysdb.get_collection_with_segments(collection_id).await?;
110110

111-
collection
112-
.collection
113-
.reconcile_schema_with_config(default_knn_index)
114-
.map_err(|e| Box::new(e) as Box<dyn Error>)?;
111+
if collection.collection.schema.is_none() {
112+
collection.collection.schema = Some(
113+
Schema::convert_collection_config_to_schema(
114+
&collection.collection.config,
115+
default_knn_index,
116+
)
117+
.map_err(|e| Box::new(e) as Box<dyn Error>)?,
118+
);
119+
}
115120

116121
// If collection is uninitialized, that means nothing has been written yet.
117122
let dim = match collection.collection.dimension {

rust/frontend/src/get_collection_with_segments_provider.rs

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -185,14 +185,16 @@ impl CollectionsWithSegmentsProvider {
185185
.await?
186186
};
187187

188-
// reconcile schema and config
189-
let reconciled_schema = Schema::reconcile_schema_and_config(
190-
collection_and_segments_sysdb.collection.schema.as_ref(),
191-
Some(&collection_and_segments_sysdb.collection.config),
192-
knn_index,
193-
)
194-
.map_err(CollectionsWithSegmentsProviderError::InvalidSchema)?;
195-
collection_and_segments_sysdb.collection.schema = Some(reconciled_schema);
188+
if collection_and_segments_sysdb.collection.schema.is_none() {
189+
collection_and_segments_sysdb.collection.schema = Some(
190+
Schema::convert_collection_config_to_schema(
191+
&collection_and_segments_sysdb.collection.config,
192+
knn_index,
193+
)
194+
.map_err(CollectionsWithSegmentsProviderError::InvalidSchema)?,
195+
);
196+
}
197+
196198
self.set_collection_with_segments(collection_and_segments_sysdb.clone())
197199
.await;
198200
Ok(collection_and_segments_sysdb)

rust/frontend/src/impls/service_based_frontend.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,7 @@ impl ServiceBasedFrontend {
381381
if self.enable_schema {
382382
for collection in collections.iter_mut() {
383383
collection
384-
.reconcile_schema_with_config(self.default_knn_index)
384+
.reconcile_schema_for_read()
385385
.map_err(GetCollectionsError::InvalidSchema)?;
386386
}
387387
}
@@ -425,7 +425,7 @@ impl ServiceBasedFrontend {
425425
if self.enable_schema {
426426
for collection in &mut collections {
427427
collection
428-
.reconcile_schema_with_config(self.default_knn_index)
428+
.reconcile_schema_for_read()
429429
.map_err(GetCollectionError::InvalidSchema)?;
430430
}
431431
}
@@ -450,7 +450,7 @@ impl ServiceBasedFrontend {
450450

451451
if self.enable_schema {
452452
collection
453-
.reconcile_schema_with_config(self.default_knn_index)
453+
.reconcile_schema_for_read()
454454
.map_err(GetCollectionByCrnError::InvalidSchema)?;
455455
}
456456
Ok(collection)
@@ -630,9 +630,10 @@ impl ServiceBasedFrontend {
630630
// that was retrieved from sysdb, rather than the one that was passed in
631631
if self.enable_schema {
632632
collection
633-
.reconcile_schema_with_config(self.default_knn_index)
633+
.reconcile_schema_for_read()
634634
.map_err(CreateCollectionError::InvalidSchema)?;
635635
}
636+
636637
Ok(collection)
637638
}
638639

@@ -735,7 +736,7 @@ impl ServiceBasedFrontend {
735736
.await?;
736737
collection_and_segments
737738
.collection
738-
.reconcile_schema_with_config(self.default_knn_index)
739+
.reconcile_schema_for_read()
739740
.map_err(ForkCollectionError::InvalidSchema)?;
740741
let collection = collection_and_segments.collection.clone();
741742
let latest_collection_logical_size_bytes = collection_and_segments

rust/log/src/local_compaction_manager.rs

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ use chroma_sysdb::SysDb;
1515
use chroma_system::Handler;
1616
use chroma_system::{Component, ComponentContext};
1717
use chroma_types::{
18-
Chunk, CollectionUuid, GetCollectionWithSegmentsError, KnnIndex, LogRecord, SchemaError,
18+
Chunk, CollectionUuid, GetCollectionWithSegmentsError, KnnIndex, LogRecord, Schema, SchemaError,
1919
};
2020
use serde::{Deserialize, Serialize};
2121
use thiserror::Error;
@@ -141,9 +141,15 @@ impl Handler<BackfillMessage> for LocalCompactionManager {
141141
.get_collection_with_segments(message.collection_id)
142142
.await?;
143143
let schema_previously_persisted = collection_and_segments.collection.schema.is_some();
144-
collection_and_segments
145-
.collection
146-
.reconcile_schema_with_config(KnnIndex::Hnsw)?;
144+
if !schema_previously_persisted {
145+
collection_and_segments.collection.schema = Some(
146+
Schema::convert_collection_config_to_schema(
147+
&collection_and_segments.collection.config,
148+
KnnIndex::Hnsw,
149+
)
150+
.map_err(CompactionManagerError::SchemaReconcileError)?,
151+
);
152+
}
147153
// If collection is uninitialized, that means nothing has been written yet.
148154
let dim = match collection_and_segments.collection.dimension {
149155
Some(dim) => dim,
@@ -267,7 +273,12 @@ impl Handler<PurgeLogsMessage> for LocalCompactionManager {
267273
.get_collection_with_segments(message.collection_id)
268274
.await?;
269275
let mut collection = collection_segments.collection.clone();
270-
collection.reconcile_schema_with_config(KnnIndex::Hnsw)?;
276+
if collection.schema.is_none() {
277+
collection.schema = Some(
278+
Schema::convert_collection_config_to_schema(&collection.config, KnnIndex::Hnsw)
279+
.map_err(CompactionManagerError::SchemaReconcileError)?,
280+
);
281+
}
271282
// If dimension is None, that means nothing has been written yet.
272283
let dim = match collection.dimension {
273284
Some(dim) => dim,

rust/segment/src/distributed_hnsw.rs

Lines changed: 38 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@ use chroma_index::hnsw_provider::{
88
HnswIndexProviderOpenError, HnswIndexRef,
99
};
1010
use chroma_index::{Index, IndexUuid};
11-
use chroma_types::{Collection, HnswParametersFromSegmentError, SegmentUuid};
11+
use chroma_types::{
12+
Collection, HnswParametersFromSegmentError, KnnIndex, Schema, SchemaError, SegmentUuid,
13+
};
1214
use chroma_types::{MaterializedLogOperation, Segment};
1315
use std::collections::HashMap;
1416
use std::fmt::Debug;
@@ -55,6 +57,8 @@ pub enum DistributedHNSWSegmentFromSegmentError {
5557
MissingHnswConfiguration,
5658
#[error("Could not parse HNSW configuration: {0}")]
5759
InvalidHnswConfiguration(#[from] HnswParametersFromSegmentError),
60+
#[error("Invalid schema: {0}")]
61+
InvalidSchema(#[source] SchemaError),
5862
}
5963

6064
impl ChromaError for DistributedHNSWSegmentFromSegmentError {
@@ -72,6 +76,7 @@ impl ChromaError for DistributedHNSWSegmentFromSegmentError {
7276
DistributedHNSWSegmentFromSegmentError::InvalidHnswConfiguration(_) => {
7377
ErrorCodes::Internal
7478
}
79+
DistributedHNSWSegmentFromSegmentError::InvalidSchema(e) => e.code(),
7580
}
7681
}
7782
}
@@ -96,9 +101,14 @@ impl DistributedHNSWSegmentWriter {
96101
hnsw_index_provider: HnswIndexProvider,
97102
) -> Result<Box<DistributedHNSWSegmentWriter>, Box<DistributedHNSWSegmentFromSegmentError>>
98103
{
99-
let hnsw_configuration = collection
100-
.config
101-
.get_hnsw_config_with_legacy_fallback(segment)
104+
let schema = if let Some(schema) = &collection.schema {
105+
schema
106+
} else {
107+
&Schema::convert_collection_config_to_schema(&collection.config, KnnIndex::Hnsw)
108+
.map_err(DistributedHNSWSegmentFromSegmentError::InvalidSchema)?
109+
};
110+
let hnsw_configuration = schema
111+
.get_internal_hnsw_config_with_legacy_fallback(segment)
102112
.map_err(DistributedHNSWSegmentFromSegmentError::InvalidHnswConfiguration)?
103113
.ok_or(DistributedHNSWSegmentFromSegmentError::MissingHnswConfiguration)?;
104114

@@ -313,9 +323,13 @@ impl DistributedHNSWSegmentReader {
313323
hnsw_index_provider: HnswIndexProvider,
314324
) -> Result<Box<DistributedHNSWSegmentReader>, Box<DistributedHNSWSegmentFromSegmentError>>
315325
{
316-
let hnsw_configuration = collection
317-
.config
318-
.get_hnsw_config_with_legacy_fallback(segment)
326+
let schema = collection.schema.as_ref().ok_or_else(|| {
327+
DistributedHNSWSegmentFromSegmentError::InvalidSchema(SchemaError::InvalidSchema {
328+
reason: "Schema is None".to_string(),
329+
})
330+
})?;
331+
let hnsw_configuration = schema
332+
.get_internal_hnsw_config_with_legacy_fallback(segment)
319333
.map_err(DistributedHNSWSegmentFromSegmentError::InvalidHnswConfiguration)?
320334
.ok_or(DistributedHNSWSegmentFromSegmentError::MissingHnswConfiguration)?;
321335

@@ -394,7 +408,7 @@ pub mod test {
394408
use chroma_index::{HnswIndexConfig, DEFAULT_MAX_ELEMENTS};
395409
use chroma_types::{
396410
Collection, CollectionUuid, InternalCollectionConfiguration, InternalHnswConfiguration,
397-
Segment, SegmentUuid,
411+
KnnIndex, Schema, Segment, SegmentUuid,
398412
};
399413
use tempfile::tempdir;
400414
use uuid::Uuid;
@@ -423,18 +437,20 @@ pub mod test {
423437
config.persist_path,
424438
Some(persist_path.to_str().unwrap().to_string())
425439
);
440+
let config = InternalCollectionConfiguration {
441+
vector_index: chroma_types::VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
442+
max_neighbors: 10,
443+
..Default::default()
444+
}),
445+
embedding_function: None,
446+
};
426447

427448
// Try partial override
428449
let collection = Collection {
429-
config: InternalCollectionConfiguration {
430-
vector_index: chroma_types::VectorIndexConfiguration::Hnsw(
431-
InternalHnswConfiguration {
432-
max_neighbors: 10,
433-
..Default::default()
434-
},
435-
),
436-
embedding_function: None,
437-
},
450+
config: config.clone(),
451+
schema: Some(
452+
Schema::convert_collection_config_to_schema(&config, KnnIndex::Hnsw).unwrap(),
453+
),
438454
..Default::default()
439455
};
440456

@@ -448,9 +464,12 @@ pub mod test {
448464
};
449465

450466
let hnsw_params = collection
451-
.config
452-
.get_hnsw_config_with_legacy_fallback(&segment)
467+
.schema
468+
.as_ref()
469+
.map(|schema| schema.get_internal_hnsw_config_with_legacy_fallback(&segment))
470+
.transpose()
453471
.unwrap()
472+
.flatten()
454473
.unwrap();
455474
let config = HnswIndexConfig::new_persistent(
456475
hnsw_params.max_neighbors,

rust/segment/src/distributed_spann.rs

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -112,14 +112,14 @@ impl SpannSegmentWriter {
112112
return Err(SpannSegmentWriterError::InvalidArgument);
113113
}
114114

115-
let reconciled_schema = Schema::reconcile_schema_and_config(
116-
collection.schema.as_ref(),
117-
Some(&collection.config),
118-
KnnIndex::Spann,
119-
)
120-
.map_err(SpannSegmentWriterError::InvalidSchema)?;
115+
let schema = if let Some(schema) = &collection.schema {
116+
schema
117+
} else {
118+
&Schema::convert_collection_config_to_schema(&collection.config, KnnIndex::Spann)
119+
.map_err(SpannSegmentWriterError::InvalidSchema)?
120+
};
121121

122-
let params = reconciled_schema
122+
let params = schema
123123
.get_internal_spann_config()
124124
.ok_or(SpannSegmentWriterError::MissingSpannConfiguration)?;
125125

@@ -690,8 +690,8 @@ mod test {
690690
..Default::default()
691691
};
692692
collection.schema = Some(
693-
Schema::reconcile_schema_and_config(None, Some(&collection.config), KnnIndex::Spann)
694-
.expect("Error reconciling schema for test collection"),
693+
Schema::convert_collection_config_to_schema(&collection.config, KnnIndex::Spann)
694+
.expect("Error converting config to schema for test collection"),
695695
);
696696

697697
let pl_block_size = 5 * 1024 * 1024;
@@ -927,8 +927,8 @@ mod test {
927927
..Default::default()
928928
};
929929
collection.schema = Some(
930-
Schema::reconcile_schema_and_config(None, Some(&collection.config), KnnIndex::Spann)
931-
.expect("Error reconciling schema for test collection"),
930+
Schema::convert_collection_config_to_schema(&collection.config, KnnIndex::Spann)
931+
.expect("Error converting config to schema for test collection"),
932932
);
933933

934934
let pl_block_size = 5 * 1024 * 1024;
@@ -1089,8 +1089,8 @@ mod test {
10891089
..Default::default()
10901090
};
10911091
collection.schema = Some(
1092-
Schema::reconcile_schema_and_config(None, Some(&collection.config), KnnIndex::Spann)
1093-
.expect("Error reconciling schema for test collection"),
1092+
Schema::convert_collection_config_to_schema(&collection.config, KnnIndex::Spann)
1093+
.expect("Error converting config to schema for test collection"),
10941094
);
10951095

10961096
let segment_id = SegmentUuid::new();
@@ -1220,8 +1220,8 @@ mod test {
12201220
..Default::default()
12211221
};
12221222
collection.schema = Some(
1223-
Schema::reconcile_schema_and_config(None, Some(&collection.config), KnnIndex::Spann)
1224-
.expect("Error reconciling schema for test collection"),
1223+
Schema::convert_collection_config_to_schema(&collection.config, KnnIndex::Spann)
1224+
.expect("Error converting config to schema for test collection"),
12251225
);
12261226

12271227
let pl_block_size = 5 * 1024 * 1024;

0 commit comments

Comments
 (0)