Skip to content

Commit 02ca250

Browse files
committed
Optimize relinking all the items
1 parent e5b3902 commit 02ca250

File tree

1 file changed

+47
-16
lines changed

1 file changed

+47
-16
lines changed

src/writer.rs

Lines changed: 47 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,11 @@ pub(crate) struct BuildOption<'a, P> {
3737
pub(crate) available_memory: Option<usize>,
3838
pub(crate) cancel: Box<dyn Fn() -> bool + 'a + Sync + Send>,
3939
pub(crate) progress: P,
40+
/// An optimization that allows for faster relinking of all items.
41+
///
42+
/// Avoids marking all the items as "updated" and
43+
/// let the rebuild function take them all.
44+
pub(crate) relink_all_items: bool,
4045
}
4146

4247
impl Default for BuildOption<'_, NoProgress> {
@@ -47,6 +52,7 @@ impl Default for BuildOption<'_, NoProgress> {
4752
available_memory: None,
4853
cancel: Box::new(|| false),
4954
progress: NoProgress,
55+
relink_all_items: false,
5056
}
5157
}
5258
}
@@ -112,13 +118,27 @@ impl<'a, D: Distance, R: Rng + SeedableRng, P> HannoyBuilder<'a, D, R, P> {
112118
let HannoyBuilder {
113119
writer,
114120
rng,
115-
inner: BuildOption { ef_construction, available_memory, cancel, progress: _, alpha },
121+
inner:
122+
BuildOption {
123+
ef_construction,
124+
available_memory,
125+
cancel,
126+
progress: _,
127+
alpha,
128+
relink_all_items,
129+
},
116130
} = self;
117-
118131
HannoyBuilder {
119132
writer,
120133
rng,
121-
inner: BuildOption { ef_construction, available_memory, cancel, progress, alpha },
134+
inner: BuildOption {
135+
ef_construction,
136+
available_memory,
137+
cancel,
138+
progress,
139+
alpha,
140+
relink_all_items,
141+
},
122142
}
123143
}
124144

@@ -227,7 +247,15 @@ impl<'a, D: Distance, R: Rng + SeedableRng, P> HannoyBuilder<'a, D, R, P> {
227247
where
228248
P: steppe::Progress,
229249
{
230-
self.writer.force_rebuild::<R, P, M, M0>(wtxn, self.rng, &self.inner)
250+
// Use this option to mark all nodes as updated
251+
self.inner.relink_all_items = true;
252+
253+
self.writer.force_rebuild::<R, P, M, M0>(wtxn, self.rng, &self.inner)?;
254+
255+
// As this builder can be reused, we need to reset this parameter
256+
self.inner.relink_all_items = false;
257+
258+
Ok(())
231259
}
232260

233261
/// Converts an arroy db into a hannoy one.
@@ -494,11 +522,17 @@ impl<D: Distance> Writer<D> {
494522
P: steppe::Progress,
495523
{
496524
let item_indices = self.item_indices(wtxn, options)?;
497-
// updated items can be an update, an addition or a removed item
498-
let updated_items = self.reset_and_retrieve_updated_items(wtxn, options)?;
499525

500-
let to_delete = updated_items.clone() - &item_indices;
501-
let to_insert = &item_indices & &updated_items;
526+
// In case we have to rebuild all links we can skip the deletion step.
527+
let (to_delete, to_insert) = if options.relink_all_items {
528+
(RoaringBitmap::new(), item_indices.clone())
529+
} else {
530+
// updated items can be an update, an addition or a removed item
531+
let updated_items = self.reset_and_retrieve_updated_items(wtxn, options)?;
532+
let to_delete = updated_items.clone() - &item_indices;
533+
let to_insert = &item_indices & &updated_items;
534+
(to_delete, to_insert)
535+
};
502536

503537
let metadata = self
504538
.database
@@ -571,14 +605,11 @@ impl<D: Distance> Writer<D> {
571605
let item_ids = self.item_indices(wtxn, options)?;
572606
self.delete_links_from_db(&item_ids, wtxn)?;
573607

574-
// 4. mark all nodes as updated
575-
for item_id in item_ids {
576-
self.database.remap_data_type::<Unit>().put(
577-
wtxn,
578-
&Key::updated(self.index, item_id),
579-
&(),
580-
)?;
581-
}
608+
// 4. Avoid marking all nodes as updated
609+
assert!(
610+
options.relink_all_items,
611+
"forcing relinking of all items requires the relink_all_items option to be set to true"
612+
);
582613

583614
// 5. trigger build
584615
self.build::<R, P, M, M0>(wtxn, rng, options)

0 commit comments

Comments
 (0)