@@ -43,7 +43,11 @@ use crate::{
4343 archive:: extract_zip,
4444 constants:: CONVEX_EN_TOKENIZER ,
4545 convex_en,
46- disk_index:: download_single_file_zip,
46+ disk_index:: {
47+ download_single_file_zip,
48+ upload_single_file,
49+ } ,
50+ SearchFileType ,
4751 TantivySearchIndexSchema ,
4852 SEARCH_FIELD_ID ,
4953} ;
@@ -68,6 +72,7 @@ pub struct UpdatableTextSegment {
6872 inverted_index : Arc < InvertedIndexReader > ,
6973 id_tracker : StaticIdTracker ,
7074 deletion_tracker : MemoryDeletionTracker ,
75+ original : FragmentedTextSegment ,
7176}
7277
7378fn inverted_index_from_index ( index : & Index ) -> anyhow:: Result < Arc < InvertedIndexReader > > {
@@ -94,6 +99,51 @@ impl UpdatableTextSegment {
9499 inverted_index,
95100 id_tracker,
96101 deletion_tracker,
102+ // TODO(sam): We should probably create this outside of this method, then pass it
103+ // through here. For now this is unused in these tests.
104+ original : FragmentedTextSegment {
105+ segment_key : "segment" . try_into ( ) ?,
106+ id_tracker_key : "id_tracker" . try_into ( ) ?,
107+ deleted_terms_table_key : "deleted_terms" . try_into ( ) ?,
108+ alive_bitset_key : "bitset" . try_into ( ) ?,
109+ num_indexed_documents : 0 ,
110+ id : "test_id" . to_string ( ) ,
111+ } ,
112+ } )
113+ }
114+
115+ pub async fn upload_metadata (
116+ self ,
117+ storage : Arc < dyn Storage > ,
118+ ) -> anyhow:: Result < FragmentedTextSegment > {
119+ // TODO(CX-6511): Skip the upload and return the original file if this segment
120+ // wasn't modified.
121+
122+ let mut bitset_buf = vec ! [ ] ;
123+ let mut deleted_terms_buf = vec ! [ ] ;
124+ self . deletion_tracker
125+ . write ( & mut bitset_buf, & mut deleted_terms_buf) ?;
126+
127+ let mut bitset_slice = bitset_buf. as_slice ( ) ;
128+ let upload_bitset = upload_single_file (
129+ & mut bitset_slice,
130+ "alive_bitset" . to_string ( ) ,
131+ storage. clone ( ) ,
132+ SearchFileType :: TextAliveBitset ,
133+ ) ;
134+ let mut deleted_terms_slice = deleted_terms_buf. as_slice ( ) ;
135+ let upload_deleted_terms = upload_single_file (
136+ & mut deleted_terms_slice,
137+ "deleted_terms" . to_string ( ) ,
138+ storage. clone ( ) ,
139+ SearchFileType :: TextDeletedTerms ,
140+ ) ;
141+ let ( alive_bitset_key, deleted_terms_table_key) =
142+ futures:: try_join!( upload_bitset, upload_deleted_terms) ?;
143+ Ok ( FragmentedTextSegment {
144+ deleted_terms_table_key,
145+ alive_bitset_key,
146+ ..self . original
97147 } )
98148 }
99149
@@ -150,6 +200,7 @@ impl UpdatableTextSegment {
150200 inverted_index,
151201 id_tracker,
152202 deletion_tracker,
203+ original,
153204 } )
154205 }
155206}
@@ -292,7 +343,7 @@ pub async fn build_new_segment(
292343 let new_deletion_tracker = MemoryDeletionTracker :: new ( new_id_tracker. num_ids ( ) as u32 ) ;
293344 let alive_bit_set_path = dir. join ( ALIVE_BITSET_PATH ) ;
294345 let deleted_terms_path = dir. join ( DELETED_TERMS_PATH ) ;
295- new_deletion_tracker. write ( & alive_bit_set_path, & deleted_terms_path) ?;
346+ new_deletion_tracker. write_to_path ( & alive_bit_set_path, & deleted_terms_path) ?;
296347 let id_tracker_path = dir. join ( ID_TRACKER_PATH ) ;
297348 new_id_tracker. write ( & id_tracker_path) ?;
298349
@@ -371,7 +422,7 @@ pub async fn merge_segments(
371422 let tracker = MemoryDeletionTracker :: new ( num_docs as u32 ) ;
372423 let alive_bit_set_path = dir. to_path_buf ( ) . join ( ALIVE_BITSET_PATH ) ;
373424 let deleted_terms_path = dir. to_path_buf ( ) . join ( DELETED_TERMS_PATH ) ;
374- tracker. write ( & alive_bit_set_path, & deleted_terms_path) ?;
425+ tracker. write_to_path ( & alive_bit_set_path, & deleted_terms_path) ?;
375426 Ok ( TextSegmentPaths {
376427 index_path : index_dir,
377428 id_tracker_path,
0 commit comments