@@ -37,6 +37,11 @@ pub(crate) struct BuildOption<'a, P> {
3737 pub ( crate ) available_memory : Option < usize > ,
3838 pub ( crate ) cancel : Box < dyn Fn ( ) -> bool + ' a + Sync + Send > ,
3939 pub ( crate ) progress : P ,
40+ /// An optimization that allows for faster relinking of all items.
41+ ///
42+ /// Avoids marking all the items as "updated" and
43+ /// let the rebuild function take them all.
44+ pub ( crate ) relink_all_items : bool ,
4045}
4146
4247impl Default for BuildOption < ' _ , NoProgress > {
@@ -47,6 +52,7 @@ impl Default for BuildOption<'_, NoProgress> {
4752 available_memory : None ,
4853 cancel : Box :: new ( || false ) ,
4954 progress : NoProgress ,
55+ relink_all_items : false ,
5056 }
5157 }
5258}
@@ -112,13 +118,27 @@ impl<'a, D: Distance, R: Rng + SeedableRng, P> HannoyBuilder<'a, D, R, P> {
112118 let HannoyBuilder {
113119 writer,
114120 rng,
115- inner : BuildOption { ef_construction, available_memory, cancel, progress : _, alpha } ,
121+ inner :
122+ BuildOption {
123+ ef_construction,
124+ available_memory,
125+ cancel,
126+ progress : _,
127+ alpha,
128+ relink_all_items,
129+ } ,
116130 } = self ;
117-
118131 HannoyBuilder {
119132 writer,
120133 rng,
121- inner : BuildOption { ef_construction, available_memory, cancel, progress, alpha } ,
134+ inner : BuildOption {
135+ ef_construction,
136+ available_memory,
137+ cancel,
138+ progress,
139+ alpha,
140+ relink_all_items,
141+ } ,
122142 }
123143 }
124144
@@ -227,7 +247,15 @@ impl<'a, D: Distance, R: Rng + SeedableRng, P> HannoyBuilder<'a, D, R, P> {
227247 where
228248 P : steppe:: Progress ,
229249 {
230- self . writer . force_rebuild :: < R , P , M , M0 > ( wtxn, self . rng , & self . inner )
250+ // Use this option to mark all nodes as updated
251+ self . inner . relink_all_items = true ;
252+
253+ self . writer . force_rebuild :: < R , P , M , M0 > ( wtxn, self . rng , & self . inner ) ?;
254+
255+ // As this builder can be reused, we need to reset this parameter
256+ self . inner . relink_all_items = false ;
257+
258+ Ok ( ( ) )
231259 }
232260
233261 /// Converts an arroy db into a hannoy one.
@@ -494,11 +522,17 @@ impl<D: Distance> Writer<D> {
494522 P : steppe:: Progress ,
495523 {
496524 let item_indices = self . item_indices ( wtxn, options) ?;
497- // updated items can be an update, an addition or a removed item
498- let updated_items = self . reset_and_retrieve_updated_items ( wtxn, options) ?;
499525
500- let to_delete = updated_items. clone ( ) - & item_indices;
501- let to_insert = & item_indices & & updated_items;
526+ // In case we have to rebuild all links we can skip the deletion step.
527+ let ( to_delete, to_insert) = if options. relink_all_items {
528+ ( RoaringBitmap :: new ( ) , item_indices. clone ( ) )
529+ } else {
530+ // updated items can be an update, an addition or a removed item
531+ let updated_items = self . reset_and_retrieve_updated_items ( wtxn, options) ?;
532+ let to_delete = updated_items. clone ( ) - & item_indices;
533+ let to_insert = & item_indices & & updated_items;
534+ ( to_delete, to_insert)
535+ } ;
502536
503537 let metadata = self
504538 . database
@@ -571,14 +605,11 @@ impl<D: Distance> Writer<D> {
571605 let item_ids = self . item_indices ( wtxn, options) ?;
572606 self . delete_links_from_db ( & item_ids, wtxn) ?;
573607
574- // 4. mark all nodes as updated
575- for item_id in item_ids {
576- self . database . remap_data_type :: < Unit > ( ) . put (
577- wtxn,
578- & Key :: updated ( self . index , item_id) ,
579- & ( ) ,
580- ) ?;
581- }
608+ // 4. Avoid marking all nodes as updated
609+ assert ! (
610+ options. relink_all_items,
611+ "forcing relinking of all items requires the relink_all_items option to be set to true"
612+ ) ;
582613
583614 // 5. trigger build
584615 self . build :: < R , P , M , M0 > ( wtxn, rng, options)
0 commit comments