@@ -199,6 +199,37 @@ impl<'a, D: Distance, R: Rng + SeedableRng, P> HannoyBuilder<'a, D, R, P> {
199199 self . writer . build :: < R , P , M , M0 > ( wtxn, self . rng , & self . inner )
200200 }
201201
202+ /// Rebuilds an HNSW graph from scratch.
203+ ///
204+ /// Assumes you've previously built one or more times. This function will drop all graph edges
205+ /// from previous builds and reconstruct the hnsw with the vectors found in the db.
206+ ///
207+ /// Standard builds work by first adding or deleting some nodes, here we're marking all
208+ /// vectors found on disk as updated to force a rebuild. When in doubt prefer [`Self::build<M,M0>`] over
209+ /// this method.
210+ ///
211+ /// # Example
212+ ///
213+ /// ```no_run
214+ /// # use hannoy::{Writer, distances::Euclidean};
215+ /// # let (writer, wtxn): (Writer<Euclidean>, heed::RwTxn) = todo!();
216+ /// use rayon;
217+ /// use rand::rngs::StdRng;
218+ /// use rand::SeedableRng;
219+ ///
220+ /// // configure global threadpool if you want!
221+ /// rayon::ThreadPoolBuilder::new().num_threads(4).build_global().unwrap();
222+ ///
223+ /// let mut rng = StdRng::seed_from_u64(4729);
224+ /// writer.builder(&mut rng).force_rebuild::<16,32>(&mut wtxn);
225+ /// ```
226+ pub fn force_rebuild < const M : usize , const M0 : usize > ( & mut self , wtxn : & mut RwTxn ) -> Result < ( ) >
227+ where
228+ P : steppe:: Progress ,
229+ {
230+ self . writer . force_rebuild :: < R , P , M , M0 > ( wtxn, self . rng , & self . inner )
231+ }
232+
202233 /// Converts an arroy db into a hannoy one.
203234 #[ cfg( any( test, feature = "arroy" ) ) ]
204235 #[ cfg_attr( docsrs, doc( cfg( feature = "arroy" ) ) ) ]
@@ -493,7 +524,7 @@ impl<D: Distance> Writer<D> {
493524 // Remove deleted links from lmdb AFTER build; in DiskANN we use a deleted item's
494525 // neighbours when filling in the "gaps" left in the graph from deletions. See
495526 // [`HnswBuilder::maybe_patch_old_links`] for more details.
496- self . delete_links_from_db ( to_delete, wtxn) ?;
527+ self . delete_links_from_db ( & to_delete, wtxn) ?;
497528
498529 debug ! ( "write the metadata..." ) ;
499530 options. progress . update ( HannoyBuild :: WriteTheMetadata ) ;
@@ -519,6 +550,40 @@ impl<D: Distance> Writer<D> {
519550 Ok ( ( ) )
520551 }
521552
553+ /// Kinda like clear and create, but only for links
554+ fn force_rebuild < R , P , const M : usize , const M0 : usize > (
555+ & self ,
556+ wtxn : & mut RwTxn ,
557+ rng : & mut R ,
558+ options : & BuildOption < P > ,
559+ ) -> Result < ( ) >
560+ where
561+ R : Rng + SeedableRng ,
562+ P : steppe:: Progress ,
563+ {
564+ // 1. delete metadata
565+ self . database . delete ( wtxn, & Key :: metadata ( self . index ) ) ?;
566+
567+ // 2. delete version
568+ self . database . delete ( wtxn, & Key :: version ( self . index ) ) ?;
569+
570+ // 3. delete all links
571+ let item_ids = self . item_indices ( wtxn, options) ?;
572+ self . delete_links_from_db ( & item_ids, wtxn) ?;
573+
574+ // 4. mark all nodes as updated
575+ for item_id in item_ids {
576+ self . database . remap_data_type :: < Unit > ( ) . put (
577+ wtxn,
578+ & Key :: updated ( self . index , item_id) ,
579+ & ( ) ,
580+ ) ?;
581+ }
582+
583+ // 5. trigger build
584+ self . build :: < R , P , M , M0 > ( wtxn, rng, options)
585+ }
586+
522587 fn reset_and_retrieve_updated_items < P > (
523588 & self ,
524589 wtxn : & mut RwTxn ,
@@ -586,7 +651,7 @@ impl<D: Distance> Writer<D> {
586651
587652 // Iterates over links in lmdb and deletes those in `to_delete`. There can be several links
588653 // with the same NodeId.item, each differing by their layer
589- fn delete_links_from_db ( & self , to_delete : RoaringBitmap , wtxn : & mut RwTxn ) -> Result < ( ) > {
654+ fn delete_links_from_db ( & self , to_delete : & RoaringBitmap , wtxn : & mut RwTxn ) -> Result < ( ) > {
590655 let mut cursor = self
591656 . database
592657 . remap_key_type :: < PrefixCodec > ( )
0 commit comments