Skip to content

Commit f96e101

Browse files
authored
Merge pull request #114 from meilisearch/progress
Add a notion of progress in arroy compatible with Meilisearch
2 parents f52bf05 + 0432afd commit f96e101

File tree

6 files changed

+167
-35
lines changed

6 files changed

+167
-35
lines changed

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22
name = "arroy"
33
description = "Annoy-inspired Approximate Nearest Neighbors in Rust, based on LMDB and optimized for memory usage"
4-
version = "0.6.0"
4+
version = "0.6.1"
55
documentation = "https://docs.rs/arroy"
66
repository = "https://github.com/meilisearch/arroy"
77
keywords = ["ANN-search", "Graph-algorithms", "Vector-Search", "Store"]
@@ -27,6 +27,7 @@ tempfile = "3.15.0"
2727
thiserror = "2.0.9"
2828
nohash = "0.2.0"
2929
page_size = "0.6.0"
30+
enum-iterator = "2.1.0"
3031

3132
[dev-dependencies]
3233
anyhow = "1.0.95"

src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ use node::{Node, NodeCodec};
105105
use node_id::{NodeId, NodeMode};
106106
pub use reader::{QueryBuilder, Reader};
107107
pub use stats::{Stats, TreeStats};
108-
pub use writer::{ArroyBuilder, Writer};
108+
pub use writer::{ArroyBuilder, MainStep, SubStep, Writer, WriterProgress};
109109

110110
/// The set of types used by the [`Distance`] trait.
111111
pub mod internals {

src/parallel.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ impl<'t, D: Distance> ImmutableLeafs<'t, D> {
194194
database: Database<D>,
195195
index: u16,
196196
nb_leafs: u64,
197+
progress: &AtomicU32,
197198
) -> heed::Result<Self> {
198199
let mut leafs =
199200
IntMap::with_capacity_and_hasher(nb_leafs as usize, BuildNoHashHasher::default());
@@ -209,6 +210,7 @@ impl<'t, D: Distance> ImmutableLeafs<'t, D> {
209210
let item_id = key.node.unwrap_item();
210211
assert_eq!(*constant_length.get_or_insert(bytes.len()), bytes.len());
211212
leafs.insert(item_id, bytes.as_ptr());
213+
progress.fetch_add(1, Ordering::Relaxed);
212214
}
213215

214216
Ok(ImmutableLeafs { leafs, constant_length, _marker: marker::PhantomData })
@@ -407,6 +409,7 @@ impl<'t, D: Distance> ImmutableTrees<'t, D> {
407409
database: Database<D>,
408410
index: u16,
409411
nb_trees: u64,
412+
progress: &AtomicU32,
410413
) -> heed::Result<Self> {
411414
let mut trees =
412415
IntMap::with_capacity_and_hasher(nb_trees as usize, BuildNoHashHasher::default());
@@ -420,6 +423,7 @@ impl<'t, D: Distance> ImmutableTrees<'t, D> {
420423
let (key, bytes) = result?;
421424
let tree_id = key.node.unwrap_tree();
422425
trees.insert(tree_id, (bytes.len(), bytes.as_ptr()));
426+
progress.fetch_add(1, Ordering::Relaxed);
423427
}
424428

425429
Ok(ImmutableTrees { trees, _marker: marker::PhantomData })

src/tests/binary_quantized.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ fn write_and_retrieve_binary_quantized_vector() {
4848
==================
4949
Dumping index 0
5050
Root: Metadata { dimensions: 16, items: RoaringBitmap<[0]>, roots: [0], distance: "binary quantized euclidean" }
51-
Version: Version { major: 0, minor: 6, patch: 0 }
51+
Version: Version { major: 0, minor: 6, patch: 1 }
5252
Tree 0: Descendants(Descendants { descendants: [0] })
5353
Item 0: Leaf(Leaf { header: NodeHeaderBinaryQuantizedEuclidean { bias: 0.0 }, vector: [-1.0000, -1.0000, 1.0000, -1.0000, 1.0000, 1.0000, -1.0000, 1.0000, -1.0000, -1.0000, "other ..."] })
5454
"###);

src/tests/writer.rs

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ fn use_u32_max_minus_one_for_a_vec() {
5353
==================
5454
Dumping index 0
5555
Root: Metadata { dimensions: 3, items: RoaringBitmap<[4294967294]>, roots: [0], distance: "euclidean" }
56-
Version: Version { major: 0, minor: 6, patch: 0 }
56+
Version: Version { major: 0, minor: 6, patch: 1 }
5757
Tree 0: Descendants(Descendants { descendants: [4294967294] })
5858
Item 4294967294: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 1.0000, 2.0000] })
5959
"###);
@@ -73,7 +73,7 @@ fn use_u32_max_for_a_vec() {
7373
==================
7474
Dumping index 0
7575
Root: Metadata { dimensions: 3, items: RoaringBitmap<[4294967295]>, roots: [0], distance: "euclidean" }
76-
Version: Version { major: 0, minor: 6, patch: 0 }
76+
Version: Version { major: 0, minor: 6, patch: 1 }
7777
Tree 0: Descendants(Descendants { descendants: [4294967295] })
7878
Item 4294967295: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 1.0000, 2.0000] })
7979
"###);
@@ -93,7 +93,7 @@ fn write_one_vector() {
9393
==================
9494
Dumping index 0
9595
Root: Metadata { dimensions: 3, items: RoaringBitmap<[0]>, roots: [0], distance: "euclidean" }
96-
Version: Version { major: 0, minor: 6, patch: 0 }
96+
Version: Version { major: 0, minor: 6, patch: 1 }
9797
Tree 0: Descendants(Descendants { descendants: [0] })
9898
Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 1.0000, 2.0000] })
9999
"###);
@@ -113,7 +113,7 @@ fn write_one_vector_in_one_tree() {
113113
==================
114114
Dumping index 0
115115
Root: Metadata { dimensions: 3, items: RoaringBitmap<[0]>, roots: [0], distance: "euclidean" }
116-
Version: Version { major: 0, minor: 6, patch: 0 }
116+
Version: Version { major: 0, minor: 6, patch: 1 }
117117
Tree 0: Descendants(Descendants { descendants: [0] })
118118
Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 1.0000, 2.0000] })
119119
"###);
@@ -133,7 +133,7 @@ fn write_one_vector_in_multiple_trees() {
133133
==================
134134
Dumping index 0
135135
Root: Metadata { dimensions: 3, items: RoaringBitmap<[0]>, roots: [0], distance: "euclidean" }
136-
Version: Version { major: 0, minor: 6, patch: 0 }
136+
Version: Version { major: 0, minor: 6, patch: 1 }
137137
Tree 0: Descendants(Descendants { descendants: [0] })
138138
Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 1.0000, 2.0000] })
139139
"###);
@@ -157,7 +157,7 @@ fn write_vectors_until_there_is_a_descendants() {
157157
==================
158158
Dumping index 0
159159
Root: Metadata { dimensions: 3, items: RoaringBitmap<[0, 1, 2]>, roots: [0], distance: "euclidean" }
160-
Version: Version { major: 0, minor: 6, patch: 0 }
160+
Version: Version { major: 0, minor: 6, patch: 1 }
161161
Tree 0: Descendants(Descendants { descendants: [0, 1, 2] })
162162
Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000, 0.0000] })
163163
Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [1.0000, 1.0000, 1.0000] })
@@ -235,31 +235,31 @@ fn write_multiple_indexes() {
235235
==================
236236
Dumping index 0
237237
Root: Metadata { dimensions: 3, items: RoaringBitmap<[0]>, roots: [0], distance: "euclidean" }
238-
Version: Version { major: 0, minor: 6, patch: 0 }
238+
Version: Version { major: 0, minor: 6, patch: 1 }
239239
Tree 0: Descendants(Descendants { descendants: [0] })
240240
Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 1.0000, 2.0000] })
241241
==================
242242
Dumping index 1
243243
Root: Metadata { dimensions: 3, items: RoaringBitmap<[0]>, roots: [0], distance: "euclidean" }
244-
Version: Version { major: 0, minor: 6, patch: 0 }
244+
Version: Version { major: 0, minor: 6, patch: 1 }
245245
Tree 0: Descendants(Descendants { descendants: [0] })
246246
Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 1.0000, 2.0000] })
247247
==================
248248
Dumping index 2
249249
Root: Metadata { dimensions: 3, items: RoaringBitmap<[0]>, roots: [0], distance: "euclidean" }
250-
Version: Version { major: 0, minor: 6, patch: 0 }
250+
Version: Version { major: 0, minor: 6, patch: 1 }
251251
Tree 0: Descendants(Descendants { descendants: [0] })
252252
Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 1.0000, 2.0000] })
253253
==================
254254
Dumping index 3
255255
Root: Metadata { dimensions: 3, items: RoaringBitmap<[0]>, roots: [0], distance: "euclidean" }
256-
Version: Version { major: 0, minor: 6, patch: 0 }
256+
Version: Version { major: 0, minor: 6, patch: 1 }
257257
Tree 0: Descendants(Descendants { descendants: [0] })
258258
Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 1.0000, 2.0000] })
259259
==================
260260
Dumping index 4
261261
Root: Metadata { dimensions: 3, items: RoaringBitmap<[0]>, roots: [0], distance: "euclidean" }
262-
Version: Version { major: 0, minor: 6, patch: 0 }
262+
Version: Version { major: 0, minor: 6, patch: 1 }
263263
Tree 0: Descendants(Descendants { descendants: [0] })
264264
Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 1.0000, 2.0000] })
265265
"###);
@@ -359,7 +359,7 @@ fn delete_one_item_in_a_one_item_db() {
359359
==================
360360
Dumping index 0
361361
Root: Metadata { dimensions: 2, items: RoaringBitmap<[0]>, roots: [0], distance: "euclidean" }
362-
Version: Version { major: 0, minor: 6, patch: 0 }
362+
Version: Version { major: 0, minor: 6, patch: 1 }
363363
Tree 0: Descendants(Descendants { descendants: [0] })
364364
Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000] })
365365
"###);
@@ -376,7 +376,7 @@ fn delete_one_item_in_a_one_item_db() {
376376
==================
377377
Dumping index 0
378378
Root: Metadata { dimensions: 2, items: RoaringBitmap<[]>, roots: [], distance: "euclidean" }
379-
Version: Version { major: 0, minor: 6, patch: 0 }
379+
Version: Version { major: 0, minor: 6, patch: 1 }
380380
"###);
381381

382382
let rtxn = handle.env.read_txn().unwrap();
@@ -402,7 +402,7 @@ fn delete_document_in_an_empty_index_74() {
402402
==================
403403
Dumping index 0
404404
Root: Metadata { dimensions: 2, items: RoaringBitmap<[0]>, roots: [0], distance: "euclidean" }
405-
Version: Version { major: 0, minor: 6, patch: 0 }
405+
Version: Version { major: 0, minor: 6, patch: 1 }
406406
Tree 0: Descendants(Descendants { descendants: [0] })
407407
Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000] })
408408
"###);
@@ -428,11 +428,11 @@ fn delete_document_in_an_empty_index_74() {
428428
==================
429429
Dumping index 0
430430
Root: Metadata { dimensions: 2, items: RoaringBitmap<[]>, roots: [], distance: "euclidean" }
431-
Version: Version { major: 0, minor: 6, patch: 0 }
431+
Version: Version { major: 0, minor: 6, patch: 1 }
432432
==================
433433
Dumping index 1
434434
Root: Metadata { dimensions: 2, items: RoaringBitmap<[]>, roots: [], distance: "euclidean" }
435-
Version: Version { major: 0, minor: 6, patch: 0 }
435+
Version: Version { major: 0, minor: 6, patch: 1 }
436436
"###);
437437

438438
let rtxn = handle.env.read_txn().unwrap();
@@ -458,7 +458,7 @@ fn delete_one_item_in_a_descendant() {
458458
==================
459459
Dumping index 0
460460
Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1]>, roots: [0], distance: "euclidean" }
461-
Version: Version { major: 0, minor: 6, patch: 0 }
461+
Version: Version { major: 0, minor: 6, patch: 1 }
462462
Tree 0: Descendants(Descendants { descendants: [0, 1] })
463463
Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000] })
464464
Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [1.0000, 0.0000] })
@@ -476,7 +476,7 @@ fn delete_one_item_in_a_descendant() {
476476
==================
477477
Dumping index 0
478478
Root: Metadata { dimensions: 2, items: RoaringBitmap<[1]>, roots: [0], distance: "euclidean" }
479-
Version: Version { major: 0, minor: 6, patch: 0 }
479+
Version: Version { major: 0, minor: 6, patch: 1 }
480480
Tree 0: Descendants(Descendants { descendants: [1] })
481481
Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [1.0000, 0.0000] })
482482
"###);
@@ -520,7 +520,7 @@ fn delete_one_leaf_in_a_split() {
520520
==================
521521
Dumping index 0
522522
Root: Metadata { dimensions: 2, items: RoaringBitmap<[1, 2]>, roots: [0], distance: "euclidean" }
523-
Version: Version { major: 0, minor: 6, patch: 0 }
523+
Version: Version { major: 0, minor: 6, patch: 1 }
524524
Tree 0: Descendants(Descendants { descendants: [1, 2] })
525525
Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [1.0000, 0.0000] })
526526
Item 2: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [2.0000, 0.0000] })
@@ -543,7 +543,7 @@ fn delete_one_item_in_a_single_document_database() {
543543
==================
544544
Dumping index 0
545545
Root: Metadata { dimensions: 2, items: RoaringBitmap<[0]>, roots: [0], distance: "cosine" }
546-
Version: Version { major: 0, minor: 6, patch: 0 }
546+
Version: Version { major: 0, minor: 6, patch: 1 }
547547
Tree 0: Descendants(Descendants { descendants: [0] })
548548
Item 0: Leaf(Leaf { header: NodeHeaderCosine { norm: 0.0 }, vector: [0.0000, 0.0000] })
549549
"###);
@@ -560,7 +560,7 @@ fn delete_one_item_in_a_single_document_database() {
560560
==================
561561
Dumping index 0
562562
Root: Metadata { dimensions: 2, items: RoaringBitmap<[]>, roots: [], distance: "cosine" }
563-
Version: Version { major: 0, minor: 6, patch: 0 }
563+
Version: Version { major: 0, minor: 6, patch: 1 }
564564
"###);
565565
}
566566

@@ -654,7 +654,7 @@ fn add_one_item_incrementally_in_an_empty_db() {
654654
==================
655655
Dumping index 0
656656
Root: Metadata { dimensions: 2, items: RoaringBitmap<[]>, roots: [], distance: "euclidean" }
657-
Version: Version { major: 0, minor: 6, patch: 0 }
657+
Version: Version { major: 0, minor: 6, patch: 1 }
658658
"###);
659659

660660
let mut wtxn = handle.env.write_txn().unwrap();
@@ -667,7 +667,7 @@ fn add_one_item_incrementally_in_an_empty_db() {
667667
==================
668668
Dumping index 0
669669
Root: Metadata { dimensions: 2, items: RoaringBitmap<[0]>, roots: [0], distance: "euclidean" }
670-
Version: Version { major: 0, minor: 6, patch: 0 }
670+
Version: Version { major: 0, minor: 6, patch: 1 }
671671
Tree 0: Descendants(Descendants { descendants: [0] })
672672
Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000] })
673673
"###);
@@ -687,7 +687,7 @@ fn add_one_item_incrementally_in_a_one_item_db() {
687687
==================
688688
Dumping index 0
689689
Root: Metadata { dimensions: 2, items: RoaringBitmap<[0]>, roots: [0], distance: "euclidean" }
690-
Version: Version { major: 0, minor: 6, patch: 0 }
690+
Version: Version { major: 0, minor: 6, patch: 1 }
691691
Tree 0: Descendants(Descendants { descendants: [0] })
692692
Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000] })
693693
"###);
@@ -702,7 +702,7 @@ fn add_one_item_incrementally_in_a_one_item_db() {
702702
==================
703703
Dumping index 0
704704
Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1]>, roots: [0], distance: "euclidean" }
705-
Version: Version { major: 0, minor: 6, patch: 0 }
705+
Version: Version { major: 0, minor: 6, patch: 1 }
706706
Tree 0: Descendants(Descendants { descendants: [0, 1] })
707707
Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000] })
708708
Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [1.0000, 0.0000] })
@@ -724,7 +724,7 @@ fn add_one_item_incrementally_to_create_a_split_node() {
724724
==================
725725
Dumping index 0
726726
Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1]>, roots: [0], distance: "euclidean" }
727-
Version: Version { major: 0, minor: 6, patch: 0 }
727+
Version: Version { major: 0, minor: 6, patch: 1 }
728728
Tree 0: Descendants(Descendants { descendants: [0, 1] })
729729
Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000] })
730730
Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [1.0000, 0.0000] })
@@ -740,7 +740,7 @@ fn add_one_item_incrementally_to_create_a_split_node() {
740740
==================
741741
Dumping index 0
742742
Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1, 2]>, roots: [2], distance: "euclidean" }
743-
Version: Version { major: 0, minor: 6, patch: 0 }
743+
Version: Version { major: 0, minor: 6, patch: 1 }
744744
Tree 1: Descendants(Descendants { descendants: [1, 2] })
745745
Tree 2: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: Item(0), right: Tree(1), normal: [1.0000, 0.0000] })
746746
Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000] })
@@ -1051,7 +1051,7 @@ fn append() {
10511051
==================
10521052
Dumping index 1
10531053
Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1]>, roots: [0], distance: "euclidean" }
1054-
Version: Version { major: 0, minor: 6, patch: 0 }
1054+
Version: Version { major: 0, minor: 6, patch: 1 }
10551055
Tree 0: Descendants(Descendants { descendants: [0, 1] })
10561056
Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000] })
10571057
Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1000, 0.1000] })

0 commit comments

Comments
 (0)