Skip to content

Commit b230820

Browse files
sjuddConvex, Inc.
authored andcommitted
Build a single large text index in the multisegment format when migrating (#26910)
GitOrigin-RevId: e23ab1fb1bf8f154fa17bcea501c2b32ffad0c76
1 parent 28e672c commit b230820

File tree

3 files changed

+147
-13
lines changed

3 files changed

+147
-13
lines changed

crates/database/src/index_workers/search_flusher.rs

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -356,13 +356,11 @@ impl<RT: Runtime, T: SearchIndex + 'static> SearchFlusher<RT, T> {
356356
match snapshot.data {
357357
// If we're on an old or unrecognized version, rebuild everything. The formats
358358
// are not compatible.
359-
SnapshotData::SingleSegment(_) | SnapshotData::Unknown(_) => (
360-
vec![],
361-
MultipartBuildType::IncrementalComplete {
362-
cursor: None,
363-
backfill_snapshot_ts: new_ts,
364-
},
365-
),
359+
// TODO(CX-6743): Make this a failure so that we do not inadvertently rebuild
360+
// indexes once we migrate to the multi segment text search index format.
361+
SnapshotData::SingleSegment(_) | SnapshotData::Unknown(_) => {
362+
(vec![], MultipartBuildType::Complete)
363+
},
366364
SnapshotData::MultiSegment(ref parts) => {
367365
let ts = IndexWorkerMetadataModel::new(&mut tx)
368366
.get_fast_forward_ts(snapshot.ts, job.index_id)
@@ -546,6 +544,22 @@ impl<RT: Runtime, T: SearchIndex + 'static> SearchFlusher<RT, T> {
546544
.boxed();
547545
(documents, previous_segments)
548546
},
547+
// TODO(CX-6743): Remove this logic, it's expensive to run and we don't want to do so
548+
// inadvertently.
549+
MultipartBuildType::Complete => {
550+
let table_iterator = params.database.table_iterator(
551+
snapshot_ts,
552+
*VECTOR_INDEX_WORKER_PAGE_SIZE,
553+
None,
554+
);
555+
(
556+
table_iterator
557+
.stream_documents_in_table(*index_name.table(), by_id, None)
558+
.map_ok(|(doc, ts)| (ts, doc.id().into(), Some(doc)))
559+
.boxed(),
560+
vec![],
561+
)
562+
},
549563
};
550564

551565
let mut mutable_previous_segments = T::download_previous_segments(
@@ -626,6 +640,9 @@ pub struct MultiSegmentBuildResult<T: SearchIndex> {
626640
#[derive(Clone, Copy)]
627641
pub enum MultipartBuildType {
628642
Partial(RepeatableTimestamp),
643+
// TODO(CX-6743): Remove this build type, it's expensive to run and we don't want to do so
644+
// inadvertently.
645+
Complete,
629646
IncrementalComplete {
630647
cursor: Option<ResolvedDocumentId>,
631648
backfill_snapshot_ts: RepeatableTimestamp,

crates/database/src/index_workers/writer.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ use crate::{
5252
IndexBuild,
5353
IndexBuildResult,
5454
},
55+
BuildReason,
5556
MultiSegmentBackfillResult,
5657
},
5758
metrics::{
@@ -392,7 +393,10 @@ impl<RT: Runtime, T: SearchIndex> Inner<RT, T> {
392393
let mut tx: Transaction<RT> = self.database.begin(Identity::system()).await?;
393394
let metadata = Self::require_index_metadata(&mut tx, job.metadata_id).await?;
394395

395-
anyhow::ensure!(metadata.config.is_backfilling());
396+
anyhow::ensure!(
397+
metadata.config.is_backfilling()
398+
|| matches!(job.build_reason, BuildReason::VersionMismatch)
399+
);
396400

397401
let (developer_config, state) = T::extract_metadata(metadata)?;
398402

crates/database/src/text_index_worker/flusher2.rs

Lines changed: 118 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -151,19 +151,35 @@ pub(crate) fn new_text_flusher<RT: Runtime>(
151151
mod tests {
152152
use common::{
153153
bootstrap_model::index::{
154-
text_index::TextIndexState,
154+
text_index::{
155+
TextIndexBackfillState,
156+
TextIndexSnapshot,
157+
TextIndexSnapshotData,
158+
TextIndexState,
159+
TextSnapshotVersion,
160+
},
155161
IndexConfig,
162+
IndexMetadata,
156163
},
157164
runtime::testing::TestRuntime,
158-
types::TabletIndexName,
165+
types::{
166+
IndexName,
167+
ObjectKey,
168+
TabletIndexName,
169+
},
159170
};
160171
use maplit::btreemap;
161172
use must_let::must_let;
162173
use value::TableNamespace;
163174

164-
use crate::tests::text_test_utils::{
165-
IndexData,
166-
TextFixtures,
175+
use crate::{
176+
tests::text_test_utils::{
177+
backfilling_text_index,
178+
IndexData,
179+
TextFixtures,
180+
},
181+
IndexModel,
182+
SystemMetadataModel,
167183
};
168184

169185
#[convex_macro::test_runtime]
@@ -502,4 +518,101 @@ mod tests {
502518

503519
Ok(())
504520
}
521+
522+
#[convex_macro::test_runtime]
523+
async fn backfill_with_backfilled_single_segment_format_backfills_with_multi_segment_format(
524+
rt: TestRuntime,
525+
) -> anyhow::Result<()> {
526+
let fixtures = TextFixtures::new(rt).await?;
527+
let index_name = create_backfilled_single_segment_text_index(&fixtures).await?;
528+
529+
fixtures.add_document("cat").await?;
530+
531+
let mut flusher = fixtures.new_search_flusher2();
532+
flusher.step().await?;
533+
534+
fixtures.enable_index(&index_name).await?;
535+
536+
let results = fixtures.search(index_name.clone(), "cat").await?;
537+
assert_eq!(results.len(), 1);
538+
let segments = fixtures.get_segments_metadata(index_name).await?;
539+
assert_eq!(segments.len(), 1);
540+
Ok(())
541+
}
542+
543+
#[convex_macro::test_runtime]
544+
async fn backfill_with_snapshotted_at_single_segment_format_backfills_with_multi_segment_format(
545+
rt: TestRuntime,
546+
) -> anyhow::Result<()> {
547+
let fixtures = TextFixtures::new(rt).await?;
548+
549+
let index_name = create_backfilled_single_segment_text_index(&fixtures).await?;
550+
551+
let mut tx = fixtures.db.begin_system().await?;
552+
IndexModel::new(&mut tx)
553+
.enable_index_for_testing(TableNamespace::Global, &index_name)
554+
.await?;
555+
fixtures.db.commit(tx).await?;
556+
557+
fixtures.add_document("cat").await?;
558+
559+
let mut flusher = fixtures.new_search_flusher2();
560+
flusher.step().await?;
561+
562+
let results = fixtures.search(index_name.clone(), "cat").await?;
563+
assert_eq!(results.len(), 1);
564+
let segments = fixtures.get_segments_metadata(index_name).await?;
565+
assert_eq!(segments.len(), 1);
566+
Ok(())
567+
}
568+
569+
async fn create_backfilled_single_segment_text_index(
570+
fixtures: &TextFixtures,
571+
) -> anyhow::Result<IndexName> {
572+
let mut tx = fixtures.db.begin_system().await?;
573+
let metadata = backfilling_text_index()?;
574+
let on_disk_state = TextIndexState::Backfilling(TextIndexBackfillState::new());
575+
must_let!(let IndexConfig::Search {
576+
developer_config,
577+
..
578+
} = metadata.config);
579+
let doc_id = IndexModel::new(&mut tx)
580+
.add_application_index(
581+
TableNamespace::Global,
582+
IndexMetadata::new_search_index(
583+
metadata.name.clone(),
584+
developer_config,
585+
on_disk_state,
586+
),
587+
)
588+
.await?;
589+
590+
fixtures.db.commit(tx).await?;
591+
let mut tx = fixtures.db.begin_system().await?;
592+
let indexes = IndexModel::new(&mut tx).get_all_indexes().await?;
593+
let index = indexes
594+
.into_iter()
595+
.find(|index| index.id() == doc_id)
596+
.unwrap();
597+
let (id, value) = index.into_id_and_value();
598+
must_let!(let IndexConfig::Search {
599+
developer_config,
600+
..
601+
} = value.config);
602+
let on_disk_state = TextIndexState::Backfilled(TextIndexSnapshot {
603+
data: TextIndexSnapshotData::SingleSegment(ObjectKey::try_from("Fake".to_string())?),
604+
ts: *tx.begin_timestamp(),
605+
version: TextSnapshotVersion::V2UseStringIds,
606+
});
607+
608+
SystemMetadataModel::new_global(&mut tx)
609+
.replace(
610+
id,
611+
IndexMetadata::new_search_index(value.name, developer_config, on_disk_state)
612+
.try_into()?,
613+
)
614+
.await?;
615+
fixtures.db.commit(tx).await?;
616+
Ok(metadata.name)
617+
}
505618
}

0 commit comments

Comments
 (0)