@@ -44,7 +44,10 @@ use vector::{
4444 QdrantSchema ,
4545} ;
4646
47- use crate :: Snapshot ;
47+ use crate :: {
48+ metrics:: vector:: log_documents_per_segment,
49+ Snapshot ,
50+ } ;
4851
4952pub trait SearchIndexConfigParser {
5053 type IndexType : SearchIndex ;
@@ -127,10 +130,11 @@ pub trait SearchIndex {
127130
128131 type PreviousSegments ;
129132
133+ type Statistics : SegmentStatistics ;
134+
130135 type Schema : Send + Sync ;
131- // TODO(sam): Convert these to something more like segment statistics
132- fn num_vectors ( segment : & Self :: Segment ) -> u32 ;
133- fn non_deleted_vectors ( segment : & Self :: Segment ) -> anyhow:: Result < u64 > ;
136+
137+ fn statistics ( segment : & Self :: Segment ) -> anyhow:: Result < Self :: Statistics > ;
134138
135139 async fn upload_new_segment < RT : Runtime > (
136140 rt : & RT ,
@@ -170,6 +174,11 @@ pub trait SearchIndex {
170174 ) -> anyhow:: Result < Vec < Self :: Segment > > ;
171175}
172176
177+ pub trait SegmentStatistics : Default {
178+ fn add ( lhs : anyhow:: Result < Self > , rhs : anyhow:: Result < Self > ) -> anyhow:: Result < Self > ;
179+ fn log ( & self ) ;
180+ }
181+
173182pub struct TextSearchIndex ;
174183#[ async_trait]
175184impl SearchIndex for TextSearchIndex {
@@ -178,6 +187,7 @@ impl SearchIndex for TextSearchIndex {
178187 type PreviousSegments = ( ) ;
179188 type Schema = ( ) ;
180189 type Segment = FragmentedSearchSegment ;
190+ type Statistics = TextStatistics ;
181191
182192 fn get_index_sizes ( snapshot : Snapshot ) -> anyhow:: Result < BTreeMap < IndexId , usize > > {
183193 Ok ( snapshot
@@ -230,19 +240,26 @@ impl SearchIndex for TextSearchIndex {
230240 anyhow:: bail!( "Not implemented" )
231241 }
232242
233- fn num_vectors ( _segment : & Self :: Segment ) -> u32 {
234- 0
235- }
236-
237243 fn segment_id ( _segment : & Self :: Segment ) -> String {
238244 "" . to_string ( )
239245 }
240246
241- fn non_deleted_vectors ( _segment : & Self :: Segment ) -> anyhow:: Result < u64 > {
242- anyhow :: bail! ( "Not implemented" )
247+ fn statistics ( _segment : & Self :: Segment ) -> anyhow:: Result < Self :: Statistics > {
248+ Ok ( TextStatistics )
243249 }
244250}
245251
252+ #[ derive( Default ) ]
253+ pub struct TextStatistics ;
254+
255+ impl SegmentStatistics for TextStatistics {
256+ fn add ( _: anyhow:: Result < Self > , _: anyhow:: Result < Self > ) -> anyhow:: Result < Self > {
257+ Ok ( Self )
258+ }
259+
260+ fn log ( & self ) { }
261+ }
262+
246263#[ derive( Debug ) ]
247264pub struct VectorSearchIndex ;
248265
@@ -253,6 +270,7 @@ impl SearchIndex for VectorSearchIndex {
253270 type PreviousSegments = Vec < MutableFragmentedSegmentMetadata > ;
254271 type Schema = QdrantSchema ;
255272 type Segment = FragmentedVectorSegment ;
273+ type Statistics = VectorStatistics ;
256274
257275 fn get_index_sizes ( snapshot : Snapshot ) -> anyhow:: Result < BTreeMap < IndexId , usize > > {
258276 Ok ( snapshot
@@ -322,16 +340,16 @@ impl SearchIndex for VectorSearchIndex {
322340 upload_segment ( rt, storage, new_segment) . await
323341 }
324342
325- fn num_vectors ( segment : & Self :: Segment ) -> u32 {
326- segment. num_vectors
327- }
328-
329343 fn segment_id ( segment : & Self :: Segment ) -> String {
330344 segment. id . clone ( )
331345 }
332346
333- fn non_deleted_vectors ( segment : & Self :: Segment ) -> anyhow:: Result < u64 > {
334- segment. non_deleted_vectors ( )
347+ fn statistics ( segment : & Self :: Segment ) -> anyhow:: Result < Self :: Statistics > {
348+ let non_deleted_vectors = segment. non_deleted_vectors ( ) ?;
349+ Ok ( VectorStatistics {
350+ non_deleted_vectors,
351+ num_vectors : segment. num_vectors ,
352+ } )
335353 }
336354}
337355pub struct SearchIndexConfig < T : SearchIndex > {
@@ -350,6 +368,27 @@ pub struct BackfillState<T: SearchIndex> {
350368 pub backfill_snapshot_ts : Option < Timestamp > ,
351369}
352370
371+ #[ derive( Debug , Default ) ]
372+ pub struct VectorStatistics {
373+ pub num_vectors : u32 ,
374+ pub non_deleted_vectors : u64 ,
375+ }
376+
377+ impl SegmentStatistics for VectorStatistics {
378+ fn add ( lhs : anyhow:: Result < Self > , rhs : anyhow:: Result < Self > ) -> anyhow:: Result < Self > {
379+ let rhs = rhs?;
380+ let lhs = lhs?;
381+ Ok ( Self {
382+ num_vectors : lhs. num_vectors + rhs. num_vectors ,
383+ non_deleted_vectors : lhs. non_deleted_vectors + rhs. non_deleted_vectors ,
384+ } )
385+ }
386+
387+ fn log ( & self ) {
388+ log_documents_per_segment ( self . non_deleted_vectors ) ;
389+ }
390+ }
391+
353392impl From < VectorIndexBackfillState > for BackfillState < VectorSearchIndex > {
354393 fn from ( value : VectorIndexBackfillState ) -> Self {
355394 Self {
0 commit comments