Skip to content

Commit a2963f5

Browse files
sjuddConvex, Inc.
authored andcommitted
Generalize logging text/vector segment/index metadata after a flush (#26326)
We're going to care about similar things for text / vector search and I'd like to generalize the remainder of the flushers. So I've created a combined set of metrics that can be used with both. I ended up expanding the metrics slightly to include both the total number of documents as well as the number of non-deleted documents in each new segment and each index. Total number of documents gives us some idea of how big an index is (actual byte size would be nice too). Non-deleted documents lets us track how much overhead there is for each segment due to deletions. I also removed logging the size of each segment after a flush. Now we only log the size of the new segment and the total size of the index across all segments. I'll have to fix a few graphs, but this seems more coherent. GitOrigin-RevId: 1be03cb43651c98477ee7166c829527645da17ce
1 parent 74eae6d commit a2963f5

File tree

13 files changed

+210
-92
lines changed

13 files changed

+210
-92
lines changed

crates/common/src/bootstrap_model/index/text_index/index_snapshot.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ pub struct SerializedFragmentedTextSegment {
143143
pub id_tracker_key: String,
144144
pub deleted_terms_table_key: String,
145145
pub alive_bitset_key: String,
146-
pub num_indexed_documents: u32,
146+
pub num_indexed_documents: u64,
147147
pub id: String,
148148
}
149149

@@ -184,7 +184,12 @@ pub struct FragmentedTextSegment {
184184
pub id_tracker_key: ObjectKey,
185185
pub deleted_terms_table_key: ObjectKey,
186186
pub alive_bitset_key: ObjectKey,
187-
pub num_indexed_documents: u32,
187+
// 2^63 ~= 9.2 * 10^18. We only support i64 in Convex.
188+
#[cfg_attr(
189+
any(test, feature = "testing"),
190+
proptest(strategy = "1u64..9223372000000000000")
191+
)]
192+
pub num_indexed_documents: u64,
188193
// A random UUID that can be used to identify a segment to determine if the
189194
// segment has changed during non-transactional index changes (compaction).
190195
pub id: String,

crates/common/src/knobs.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -784,7 +784,7 @@ pub static MULTI_SEGMENT_FULL_SCAN_THRESHOLD_KB: LazyLock<usize> =
784784
pub static SEGMENT_MAX_SIZE_BYTES: LazyLock<u64> =
785785
LazyLock::new(|| env_config("SEGMENT_MAX_SIZE_BYTES", (1100000 * 2048 * 4) / 3_u64));
786786
/// The minimum number of segments we will compact in one pass.
787-
pub static MIN_COMPACTION_SEGMENTS: LazyLock<usize> =
787+
pub static MIN_COMPACTION_SEGMENTS: LazyLock<u64> =
788788
LazyLock::new(|| env_config("MIN_COMPACTION_SEGMENTS", 3));
789789
/// The maximum percentage of a Segment that can be deleted before we will
790790
/// recompact that segment to remove deleted vectors

crates/database/src/index_workers/index_meta.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,10 @@ pub trait SearchIndex: Clone {
128128

129129
pub trait SegmentStatistics: Default {
130130
fn add(lhs: anyhow::Result<Self>, rhs: anyhow::Result<Self>) -> anyhow::Result<Self>;
131-
fn log(&self);
131+
132+
fn num_documents(&self) -> u64;
133+
134+
fn num_non_deleted_documents(&self) -> u64;
132135
}
133136
pub struct SearchIndexConfig<T: SearchIndex> {
134137
pub developer_config: T::DeveloperConfig,

crates/database/src/index_workers/search_flusher.rs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -299,11 +299,7 @@ impl<RT: Runtime, T: SearchIndexConfigParser + 'static> SearchFlusher<RT, T> {
299299

300300
let total_stats = new_and_updated_parts
301301
.iter()
302-
.map(|segment| {
303-
let segment_stats = segment.statistics()?;
304-
segment_stats.log();
305-
Ok(segment_stats)
306-
})
302+
.map(|segment| segment.statistics())
307303
.reduce(SegmentStatistics::add)
308304
.transpose()?
309305
.unwrap_or_default();

crates/database/src/index_workers/writer.rs

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,16 @@ pub(crate) struct SearchIndexMetadataWriter<RT: Runtime, T: SearchIndex> {
7676
search_type: SearchType,
7777
}
7878

79+
pub struct SearchIndexWriteResult<T: SearchIndex> {
80+
/// Stats summing metrics across all segments in the index.
81+
pub index_stats: T::Statistics,
82+
/// Stats for just the new segment if a new segment was built.
83+
pub new_segment_stats: Option<T::Statistics>,
84+
/// Stats for all segments in the index, including the new segment if it was
85+
/// built.
86+
pub per_segment_stats: Vec<T::Statistics>,
87+
}
88+
7989
impl<RT: Runtime, T: SearchIndex> SearchIndexMetadataWriter<RT, T> {
8090
pub(crate) fn new(
8191
runtime: RT,
@@ -159,7 +169,7 @@ impl<RT: Runtime, T: SearchIndex> SearchIndexMetadataWriter<RT, T> {
159169
&self,
160170
job: &IndexBuild<T>,
161171
result: IndexBuildResult<T>,
162-
) -> anyhow::Result<(T::Statistics, Option<T::Statistics>)> {
172+
) -> anyhow::Result<SearchIndexWriteResult<T>> {
163173
let IndexBuildResult {
164174
snapshot_ts,
165175
data,
@@ -171,6 +181,10 @@ impl<RT: Runtime, T: SearchIndex> SearchIndexMetadataWriter<RT, T> {
171181

172182
let inner = self.inner(SearchWriterLockWaiter::Flusher).await;
173183
let segments = data.require_multi_segment()?;
184+
let per_segment_stats = segments
185+
.iter()
186+
.map(|segment| segment.statistics())
187+
.collect::<anyhow::Result<Vec<_>>>()?;
174188

175189
if let Some(index_backfill_result) = backfill_result {
176190
inner
@@ -188,7 +202,11 @@ impl<RT: Runtime, T: SearchIndex> SearchIndexMetadataWriter<RT, T> {
188202
.await?
189203
}
190204

191-
Ok((total_stats, new_segment_stats))
205+
Ok(SearchIndexWriteResult {
206+
index_stats: total_stats,
207+
new_segment_stats,
208+
per_segment_stats,
209+
})
192210
}
193211

194212
async fn inner(&self, waiter: SearchWriterLockWaiter) -> MutexGuard<Inner<RT, T>> {

crates/database/src/metrics.rs

Lines changed: 69 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -808,6 +808,75 @@ pub fn finish_search_index_merge_timer(mut timer: StatusTimer, merge_type: Searc
808808
timer.finish();
809809
}
810810

811+
register_convex_histogram!(
812+
DOCUMENTS_PER_NEW_SEARCH_SEGMENT_TOTAL,
813+
"Total number of documents in a newly built search index segment.",
814+
&[SEARCH_TYPE_LABEL],
815+
);
816+
pub fn log_documents_per_new_search_segment(count: u64, search_type: SearchType) {
817+
log_distribution_with_labels(
818+
&DOCUMENTS_PER_NEW_SEARCH_SEGMENT_TOTAL,
819+
count as f64,
820+
vec![search_type.tag()],
821+
);
822+
}
823+
824+
register_convex_histogram!(
825+
DOCUMENTS_PER_SEARCH_SEGMENT_TOTAL,
826+
"Total number of documents in a specific search segment, including documents that were added \
827+
to the segment but are deleted and excluded from any search results",
828+
&[SEARCH_TYPE_LABEL],
829+
);
830+
pub fn log_documents_per_search_segment(count: u64, search_type: SearchType) {
831+
log_distribution_with_labels(
832+
&DOCUMENTS_PER_SEARCH_SEGMENT_TOTAL,
833+
count as f64,
834+
vec![search_type.tag()],
835+
);
836+
}
837+
838+
register_convex_histogram!(
839+
NON_DELETED_DOCUMENTS_PER_SEARCH_SEGMENT_TOTAL,
840+
"Total number of non-deleted documents in a specific search segment, excluding documents that \
841+
were added to the segment but are deleted and excluded from any search results",
842+
&[SEARCH_TYPE_LABEL],
843+
);
844+
pub fn log_non_deleted_documents_per_search_segment(count: u64, search_type: SearchType) {
845+
log_distribution_with_labels(
846+
&NON_DELETED_DOCUMENTS_PER_SEARCH_SEGMENT_TOTAL,
847+
count as f64,
848+
vec![search_type.tag()],
849+
);
850+
}
851+
852+
register_convex_histogram!(
853+
DOCUMENTS_PER_SEARCH_INDEX_TOTAL,
854+
"Total number of documents across all segments in a search index, including documents that \
855+
were added to the index but are deleted and excluded from any search results",
856+
&[SEARCH_TYPE_LABEL],
857+
);
858+
pub fn log_documents_per_search_index(count: u64, search_type: SearchType) {
859+
log_distribution_with_labels(
860+
&DOCUMENTS_PER_SEARCH_INDEX_TOTAL,
861+
count as f64,
862+
vec![search_type.tag()],
863+
);
864+
}
865+
register_convex_histogram!(
866+
NON_DELETED_DOCUMENTS_PER_SEARCH_INDEX_TOTAL,
867+
"Total number of non-deleted documents across all segments in a search index segment, \
868+
excluding documents that were added to the index but are deleted and excluded from any \
869+
search results",
870+
&[SEARCH_TYPE_LABEL],
871+
);
872+
pub fn log_non_deleted_documents_per_search_index(count: u64, search_type: SearchType) {
873+
log_distribution_with_labels(
874+
&NON_DELETED_DOCUMENTS_PER_SEARCH_INDEX_TOTAL,
875+
count as f64,
876+
vec![search_type.tag()],
877+
);
878+
}
879+
811880
pub mod search {
812881

813882
use metrics::{
@@ -860,14 +929,6 @@ pub mod search {
860929
log_distribution(&DATABASE_SEARCH_DOCUMENTS_PER_INDEX_TOTAL, count as f64);
861930
}
862931

863-
register_convex_histogram!(
864-
DATABASE_TEXT_DOCUMENTS_PER_NEW_SEGMENT_TOTAL,
865-
"Number of documents in a newly built text search index segment",
866-
);
867-
pub fn log_documents_per_new_segment(count: u32) {
868-
log_distribution(&DATABASE_TEXT_DOCUMENTS_PER_NEW_SEGMENT_TOTAL, count as f64);
869-
}
870-
871932
register_convex_histogram!(
872933
DATABASE_SEARCH_ITERATOR_NEXT_SECONDS,
873934
"Time to fetch the next document in a search query iterator",
@@ -914,33 +975,6 @@ pub mod vector {
914975
StatusTimer::new(&DATABASE_VECTOR_BUILD_ONE_SECONDS)
915976
}
916977

917-
register_convex_histogram!(
918-
DATABASE_VECTOR_DOCUMENTS_PER_INDEX_TOTAL,
919-
"Number of documents per vector index",
920-
);
921-
pub fn log_documents_per_index(count: u64) {
922-
log_distribution(&DATABASE_VECTOR_DOCUMENTS_PER_INDEX_TOTAL, count as f64);
923-
}
924-
925-
register_convex_histogram!(
926-
DATABASE_VECTOR_DOCUMENTS_PER_SEGMENT_TOTAL,
927-
"Number of documents per vector index segment",
928-
);
929-
pub fn log_documents_per_segment(count: u64) {
930-
log_distribution(&DATABASE_VECTOR_DOCUMENTS_PER_SEGMENT_TOTAL, count as f64);
931-
}
932-
933-
register_convex_histogram!(
934-
DATABASE_VECTOR_DOCUMENTS_PER_NEW_SEGMENT_TOTAL,
935-
"Number of documents in a newly built vector index segment",
936-
);
937-
pub fn log_documents_per_new_segment(count: u32) {
938-
log_distribution(
939-
&DATABASE_VECTOR_DOCUMENTS_PER_NEW_SEGMENT_TOTAL,
940-
count as f64,
941-
);
942-
}
943-
944978
register_convex_histogram!(
945979
DATABASE_VECTOR_SEARCH_QUERY_SECONDS,
946980
"Time to run a single vector search, not including retries due to bootstrapping",

crates/database/src/text_index_worker/flusher.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ impl<RT: Runtime> TextIndexFlusher<RT> {
9595
///
9696
/// Returns a map of IndexName to number of documents indexed for each
9797
/// index that was built.
98-
pub(crate) async fn step(&mut self) -> anyhow::Result<(BTreeMap<TabletIndexName, u32>, Token)> {
98+
pub(crate) async fn step(&mut self) -> anyhow::Result<(BTreeMap<TabletIndexName, u64>, Token)> {
9999
let mut metrics = BTreeMap::new();
100100

101101
let (to_build, token) = self.needs_backfill().await?;
@@ -107,7 +107,7 @@ impl<RT: Runtime> TextIndexFlusher<RT> {
107107
for job in to_build {
108108
let index_name = job.index_name.clone();
109109
let num_documents_indexed = self.build_one(job).await?;
110-
metrics.insert(index_name, num_documents_indexed as u32);
110+
metrics.insert(index_name, num_documents_indexed as u64);
111111
}
112112

113113
Ok((metrics, token))

crates/database/src/text_index_worker/flusher2.rs

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,22 @@ use storage::Storage;
1919

2020
use crate::{
2121
index_workers::{
22+
index_meta::SegmentStatistics,
2223
search_flusher::{
2324
IndexBuild,
2425
SearchFlusher,
2526
SearchIndexLimits,
2627
},
27-
writer::SearchIndexMetadataWriter,
28+
writer::{
29+
SearchIndexMetadataWriter,
30+
SearchIndexWriteResult,
31+
},
2832
},
29-
metrics::search::{
30-
log_documents_per_index,
31-
log_documents_per_new_segment,
33+
metrics::{
34+
log_documents_per_new_search_segment,
35+
log_documents_per_search_segment,
36+
log_non_deleted_documents_per_search_index,
37+
log_non_deleted_documents_per_search_segment,
3238
},
3339
text_index_worker::text_meta::{
3440
BuildTextIndexArgs,
@@ -158,7 +164,7 @@ impl<RT: Runtime> TextIndexFlusher2<RT> {
158164
///
159165
/// Returns a map of IndexName to number of documents indexed for each
160166
/// index that was built.
161-
pub(crate) async fn step(&mut self) -> anyhow::Result<(BTreeMap<TabletIndexName, u32>, Token)> {
167+
pub(crate) async fn step(&mut self) -> anyhow::Result<(BTreeMap<TabletIndexName, u64>, Token)> {
162168
let mut metrics = BTreeMap::new();
163169

164170
let (to_build, token) = self.flusher.needs_backfill().await?;
@@ -185,7 +191,7 @@ impl<RT: Runtime> TextIndexFlusher2<RT> {
185191
Ok((metrics, token))
186192
}
187193

188-
async fn build_one(&self, job: IndexBuild<TextSearchIndex>) -> anyhow::Result<u32> {
194+
async fn build_one(&self, job: IndexBuild<TextSearchIndex>) -> anyhow::Result<u64> {
189195
let timer = crate::metrics::search::build_one_timer();
190196

191197
let build_index_args = BuildTextIndexArgs {
@@ -198,13 +204,30 @@ impl<RT: Runtime> TextIndexFlusher2<RT> {
198204
.await?;
199205
tracing::debug!("Built a text segment for: {result:#?}");
200206

201-
let (total_stats, new_segment_stats) = self.writer.commit_flush(&job, result).await?;
202-
203-
let num_indexed_documents = new_segment_stats.unwrap_or_default().num_indexed_documents;
204-
log_documents_per_new_segment(num_indexed_documents);
205-
log_documents_per_index(total_stats.num_indexed_documents as usize);
207+
let SearchIndexWriteResult {
208+
index_stats,
209+
new_segment_stats,
210+
per_segment_stats,
211+
} = self.writer.commit_flush(&job, result).await?;
212+
213+
let new_segment_stats = new_segment_stats.unwrap_or_default();
214+
log_documents_per_new_search_segment(new_segment_stats.num_documents(), SearchType::Text);
215+
216+
per_segment_stats.into_iter().for_each(|stats| {
217+
log_documents_per_search_segment(stats.num_documents(), SearchType::Text);
218+
log_non_deleted_documents_per_search_segment(
219+
stats.num_non_deleted_documents(),
220+
SearchType::Text,
221+
);
222+
});
223+
224+
log_documents_per_new_search_segment(index_stats.num_documents(), SearchType::Text);
225+
log_non_deleted_documents_per_search_index(
226+
index_stats.num_non_deleted_documents(),
227+
SearchType::Text,
228+
);
206229
timer.finish();
207-
Ok(num_indexed_documents)
230+
Ok(new_segment_stats.num_documents())
208231
}
209232
}
210233

crates/database/src/text_index_worker/text_meta.rs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ impl SearchIndex for TextSearchIndex {
260260

261261
#[derive(Debug, Default)]
262262
pub struct TextStatistics {
263-
pub num_indexed_documents: u32,
263+
pub num_indexed_documents: u64,
264264
}
265265

266266
impl From<SearchOnDiskState<TextSearchIndex>> for TextIndexState {
@@ -290,7 +290,14 @@ impl SegmentStatistics for TextStatistics {
290290
})
291291
}
292292

293-
fn log(&self) {}
293+
fn num_documents(&self) -> u64 {
294+
self.num_indexed_documents
295+
}
296+
297+
fn num_non_deleted_documents(&self) -> u64 {
298+
// TODO(sam): Add a non-deleted number of documents.
299+
self.num_indexed_documents
300+
}
294301
}
295302

296303
impl From<TextIndexBackfillState> for BackfillState<TextSearchIndex> {

0 commit comments

Comments
 (0)