Skip to content

Commit 8aaeae7

Browse files
authored
Merge pull request #77 from meilisearch/panic-at-search
Panic at search when the trees need a rebuild
2 parents 6470bb7 + ae9e854 commit 8aaeae7

File tree

8 files changed

+182
-69
lines changed

8 files changed

+182
-69
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22
name = "arroy"
33
description = "Annoy-inspired Approximate Nearest Neighbors in Rust, based on LMDB and optimized for memory usage"
4-
version = "0.3.1"
4+
version = "0.4.0"
55
documentation = "https://docs.rs/arroy"
66
repository = "https://github.com/meilisearch/arroy"
77
keywords = ["ANN-search", "Graph-algorithms", "Vector-Search", "Store"]

src/error.rs

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
use std::io;
22

3+
use crate::{key::Key, node_id::NodeMode, ItemId};
4+
35
/// The different set of errors that arroy can encounter.
46
#[derive(Debug, thiserror::Error)]
57
pub enum Error {
@@ -12,7 +14,7 @@ pub enum Error {
1214
Io(#[from] io::Error),
1315

1416
/// The user is trying to insert or search for a vector that is not of the right dimensions.
15-
#[error("Invalid vector dimensions. Got {received} but expected {expected}.")]
17+
#[error("Invalid vector dimensions. Got {received} but expected {expected}")]
1618
InvalidVecDimension {
1719
/// The expected number of dimensions.
1820
expected: usize,
@@ -21,7 +23,7 @@ pub enum Error {
2123
},
2224

2325
/// An internal error returned when arroy cannot generate internal IDs.
24-
#[error("Database full. Arroy cannot generate enough internal IDs for your items.")]
26+
#[error("Database full. Arroy cannot generate enough internal IDs for your items")]
2527
DatabaseFull,
2628

2729
/// The user tried to append an item in the database but the last inserted item
@@ -30,7 +32,7 @@ pub enum Error {
3032
InvalidItemAppend,
3133

3234
/// The user is trying to query a database with a distance that is not of the right type.
33-
#[error("Invalid distance provided. Got {received} but expected {expected}.")]
35+
#[error("Invalid distance provided. Got {received} but expected {expected}")]
3436
UnmatchingDistance {
3537
/// The expected distance type.
3638
expected: String,
@@ -40,10 +42,37 @@ pub enum Error {
4042

4143
/// Arroy is not able to find the metadata for a given index.
4244
/// It is probably because the user forget to build the database.
43-
#[error("Metadata are missing, did you build your database before trying to read it.")]
44-
MissingMetadata,
45+
#[error(
46+
"Metadata are missing on index {0}, You must build your database before attempting to read it"
47+
)]
48+
MissingMetadata(u16),
49+
50+
/// The last time items in the database were updated, the [`Writer::build`] method wasn't called.
51+
#[error("The trees have not been built after an update on index {0}")]
52+
NeedBuild(u16),
4553

4654
/// Internal error
47-
#[error("Internal error: Node is missing")]
48-
MissingNode,
55+
#[error("Internal error: {mode}({item}) is missing in index `{index}`")]
56+
MissingKey {
57+
/// The index that caused the error
58+
index: u16,
59+
/// The kind of item that was being queried
60+
mode: &'static str,
61+
/// The item ID queried
62+
item: ItemId,
63+
},
64+
}
65+
66+
impl Error {
67+
pub(crate) fn missing_key(key: Key) -> Self {
68+
Self::MissingKey {
69+
index: key.index,
70+
mode: match key.node.mode {
71+
NodeMode::Item => "Item",
72+
NodeMode::Tree => "Tree",
73+
NodeMode::Metadata => "Metadata",
74+
},
75+
item: key.node.item,
76+
}
77+
}
4978
}

src/reader.rs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use std::iter::repeat;
55
use std::marker;
66
use std::num::NonZeroUsize;
77

8-
use heed::types::DecodeIgnore;
8+
use heed::types::{Bytes, DecodeIgnore};
99
use heed::RoTxn;
1010
use ordered_float::OrderedFloat;
1111
use roaring::RoaringBitmap;
@@ -36,7 +36,7 @@ impl<'t, D: Distance> Reader<'t, D> {
3636
let metadata_key = Key::metadata(index);
3737
let metadata = match database.remap_data_type::<MetadataCodec>().get(rtxn, &metadata_key)? {
3838
Some(metadata) => metadata,
39-
None => return Err(Error::MissingMetadata),
39+
None => return Err(Error::MissingMetadata(index)),
4040
};
4141

4242
if D::name() != metadata.distance {
@@ -45,6 +45,9 @@ impl<'t, D: Distance> Reader<'t, D> {
4545
received: D::name(),
4646
});
4747
}
48+
if database.remap_data_type::<Bytes>().get(rtxn, &Key::updated(index))?.is_some() {
49+
return Err(Error::NeedBuild(index));
50+
}
4851

4952
Ok(Reader {
5053
database: database.remap_data_type(),
@@ -222,7 +225,8 @@ impl<'t, D: Distance> Reader<'t, D> {
222225
None => break,
223226
};
224227

225-
match self.database.get(rtxn, &Key::new(self.index, item))?.unwrap() {
228+
let key = Key::new(self.index, item);
229+
match self.database.get(rtxn, &key)?.ok_or(Error::missing_key(key))? {
226230
Node::Leaf(_) => {
227231
if candidates.map_or(true, |c| c.contains(item.item)) {
228232
nns.push(item.unwrap_item());
@@ -250,7 +254,8 @@ impl<'t, D: Distance> Reader<'t, D> {
250254

251255
let mut nns_distances = Vec::with_capacity(nns.len());
252256
for nn in nns {
253-
let leaf = match self.database.get(rtxn, &Key::item(self.index, nn))?.unwrap() {
257+
let key = Key::item(self.index, nn);
258+
let leaf = match self.database.get(rtxn, &key)?.ok_or(Error::missing_key(key))? {
254259
Node::Leaf(leaf) => leaf,
255260
Node::Descendants(_) | Node::SplitPlaneNormal(_) => unreachable!(),
256261
};

src/tests/reader.rs

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ fn open_unfinished_db() {
3434

3535
let rtxn = handle.env.read_txn().unwrap();
3636
let ret = Reader::<Euclidean>::open(&rtxn, 0, handle.database).map(|_| ()).unwrap_err();
37-
insta::assert_display_snapshot!(ret, @"Metadata are missing, did you build your database before trying to read it.");
37+
insta::assert_display_snapshot!(ret, @"Metadata are missing on index 0, You must build your database before attempting to read it");
3838
}
3939

4040
#[test]
@@ -50,7 +50,7 @@ fn open_db_with_wrong_dimension() {
5050
let rtxn = handle.env.read_txn().unwrap();
5151
let reader = Reader::<Euclidean>::open(&rtxn, 0, handle.database).unwrap();
5252
let ret = reader.nns_by_vector(&rtxn, &[1.0, 2.0, 3.0], 5, None, None).unwrap_err();
53-
insta::assert_display_snapshot!(ret, @"Invalid vector dimensions. Got 3 but expected 2.");
53+
insta::assert_display_snapshot!(ret, @"Invalid vector dimensions. Got 3 but expected 2");
5454
}
5555

5656
#[test]
@@ -206,7 +206,7 @@ fn filtering() {
206206

207207
#[test]
208208
fn search_in_empty_database() {
209-
// See https://github.com/meilisearch/arroy/issues/74
209+
// See https://github.com/meilisearch/arroy/issues/75
210210
let handle = create_database::<Euclidean>();
211211

212212
let mut wtxn = handle.env.write_txn().unwrap();
@@ -219,3 +219,41 @@ fn search_in_empty_database() {
219219
let ret = reader.nns_by_vector(&rtxn, &[0., 0.], 10, None, None).unwrap();
220220
insta::assert_debug_snapshot!(ret, @"[]");
221221
}
222+
223+
#[test]
224+
fn try_reading_in_a_non_built_database() {
225+
// See https://github.com/meilisearch/arroy/issues/74
226+
let handle = create_database::<Euclidean>();
227+
228+
let mut wtxn = handle.env.write_txn().unwrap();
229+
let writer = Writer::new(handle.database, 0, 2);
230+
writer.add_item(&mut wtxn, 0, &[0.0, 0.0]).unwrap();
231+
// We don't build the database
232+
wtxn.commit().unwrap();
233+
234+
let rtxn = handle.env.read_txn().unwrap();
235+
let error = Reader::open(&rtxn, 0, handle.database).unwrap_err();
236+
insta::assert_debug_snapshot!(error, @r###"
237+
MissingMetadata(
238+
0,
239+
)
240+
"###);
241+
drop(rtxn);
242+
243+
// we build the database once to get valid metadata
244+
let mut wtxn = handle.env.write_txn().unwrap();
245+
let writer = Writer::new(handle.database, 0, 2);
246+
writer.build(&mut wtxn, &mut rng(), None).unwrap();
247+
let writer = Writer::new(handle.database, 0, 2);
248+
writer.del_item(&mut wtxn, 0).unwrap();
249+
// We don't build the database; this leaves the database in a corrupted state
250+
wtxn.commit().unwrap();
251+
252+
let rtxn = handle.env.read_txn().unwrap();
253+
let error = Reader::open(&rtxn, 0, handle.database).unwrap_err();
254+
insta::assert_debug_snapshot!(error, @r###"
255+
NeedBuild(
256+
0,
257+
)
258+
"###);
259+
}

src/tests/snapshots/arroy__tests__writer__write_and_update_lot_of_random_points-2.snap

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,5 +213,4 @@ Tree 124: Descendants(Descendants { descendants: [14, 16, 22, 38, 41, 42, 43, 44
213213
Tree 125: Descendants(Descendants { descendants: [2, 10, 21, 23, 26, 28, 29, 30, 34, 37, 39, 47, 52, 54, 55, 67, 68, 69, 71, 72, 76, 78, 86, 88, 90, 94, 97, 98] })
214214
Tree 126: SplitPlaneNormal(SplitPlaneNormal { left: Tree(124), right: Tree(125), normal: [-0.2156, 0.0925, 0.0242, 0.1246, -0.0758, -0.0672, -0.1816, 0.4853, 0.1643, 0.1418, 0.2752, -0.1825, -0.2671, 0.0674, -0.1950, -0.0743, -0.1493, -0.0368, -0.1929, 0.2540, -0.0441, 0.3135, -0.2219, 0.1059, 0.2115, 0.0749, 0.1629, -0.1050, -0.0211, 0.0051] })
215215
Root: Metadata { dimensions: 30, items: RoaringBitmap<100 values between 0 and 99>, roots: [8, 17, 24, 35, 44, 55, 64, 75, 86, 97], distance: "euclidean" }
216-
updated_item_ids: RoaringBitmap<[]>
217216

src/tests/snapshots/arroy__tests__writer__write_and_update_lot_of_random_points.snap

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,5 +203,4 @@ Tree 95: SplitPlaneNormal(SplitPlaneNormal { left: Tree(89), right: Tree(94), no
203203
Tree 96: SplitPlaneNormal(SplitPlaneNormal { left: Tree(88), right: Tree(95), normal: [0.0684, -0.2434, -0.1543, 0.0337, 0.1784, 0.1295, 0.3061, -0.0175, -0.0679, -0.1419, -0.0180, 0.2335, 0.3782, -0.1127, 0.2746, -0.2657, -0.0579, 0.1808, 0.2623, -0.3329, 0.1801, -0.2106, 0.1692, -0.1191, 0.0152, 0.1214, 0.0252, -0.0236, -0.1220, -0.1448] })
204204
Tree 97: SplitPlaneNormal(SplitPlaneNormal { left: Tree(87), right: Tree(96), normal: [-0.0316, -0.1036, 0.3252, -0.0985, -0.1286, -0.4867, -0.0286, 0.1159, -0.0177, 0.2205, 0.0177, 0.3192, 0.1521, -0.1484, 0.1918, -0.2794, -0.1141, 0.2598, 0.1453, 0.1133, -0.1149, -0.0455, 0.0697, -0.2537, 0.1797, -0.0423, -0.0470, 0.0886, 0.0868, 0.2083] })
205205
Root: Metadata { dimensions: 30, items: RoaringBitmap<100 values between 0 and 99>, roots: [8, 17, 24, 35, 44, 55, 64, 75, 86, 97], distance: "euclidean" }
206-
updated_item_ids: RoaringBitmap<[]>
207206

0 commit comments

Comments
 (0)