Skip to content

Commit 7e783f8

Browse files
authored
Merge pull request #125 from meilisearch/make-splitnode-null
Handle none as a valid split plane normal
2 parents 0389d5f + 2849d29 commit 7e783f8

File tree

9 files changed

+223
-101
lines changed

9 files changed

+223
-101
lines changed

src/node.rs

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -115,17 +115,20 @@ impl fmt::Debug for ItemIds<'_> {
115115
pub struct SplitPlaneNormal<'a, D: Distance> {
116116
pub left: NodeId,
117117
pub right: NodeId,
118-
pub normal: Cow<'a, UnalignedVector<D::VectorCodec>>,
118+
pub normal: Option<Cow<'a, UnalignedVector<D::VectorCodec>>>,
119119
}
120120

121121
impl<D: Distance> fmt::Debug for SplitPlaneNormal<'_, D> {
122122
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
123123
let name = format!("SplitPlaneNormal<{}>", D::name());
124-
f.debug_struct(&name)
125-
.field("left", &self.left)
126-
.field("right", &self.right)
127-
.field("normal", &self.normal)
128-
.finish()
124+
let mut debug = f.debug_struct(&name);
125+
126+
debug.field("left", &self.left).field("right", &self.right);
127+
match &self.normal {
128+
Some(normal) => debug.field("normal", &normal),
129+
None => debug.field("normal", &"none"),
130+
};
131+
debug.finish()
129132
}
130133
}
131134

@@ -153,7 +156,9 @@ impl<'a, D: Distance> BytesEncode<'a> for NodeCodec<D> {
153156
bytes.push(SPLIT_PLANE_NORMAL_TAG);
154157
bytes.extend_from_slice(&left.to_bytes());
155158
bytes.extend_from_slice(&right.to_bytes());
156-
bytes.extend_from_slice(normal.as_bytes());
159+
if let Some(normal) = normal {
160+
bytes.extend_from_slice(normal.as_bytes());
161+
}
157162
}
158163
Node::Descendants(Descendants { descendants }) => {
159164
bytes.push(DESCENDANTS_TAG);
@@ -179,11 +184,12 @@ impl<'a, D: Distance> BytesDecode<'a> for NodeCodec<D> {
179184
[SPLIT_PLANE_NORMAL_TAG, bytes @ ..] => {
180185
let (left, bytes) = NodeId::from_bytes(bytes);
181186
let (right, bytes) = NodeId::from_bytes(bytes);
182-
Ok(Node::SplitPlaneNormal(SplitPlaneNormal {
183-
normal: UnalignedVector::<D::VectorCodec>::from_bytes(bytes)?,
184-
left,
185-
right,
186-
}))
187+
let normal = if bytes.is_empty() {
188+
None
189+
} else {
190+
Some(UnalignedVector::<D::VectorCodec>::from_bytes(bytes)?)
191+
};
192+
Ok(Node::SplitPlaneNormal(SplitPlaneNormal { normal, left, right }))
187193
}
188194
[DESCENDANTS_TAG, bytes @ ..] => Ok(Node::Descendants(Descendants {
189195
descendants: Cow::Owned(RoaringBitmap::deserialize_from(bytes)?),

src/reader.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ impl<'t, D: Distance> Reader<'t, D> {
209209
Node::SplitPlaneNormal(SplitPlaneNormal { normal, left, right }) => {
210210
let left = recursive_depth(rtxn, database, index, left)?;
211211
let right = recursive_depth(rtxn, database, index, right)?;
212-
let is_zero_normal = normal.is_zero() as usize;
212+
let is_zero_normal = normal.map_or(1, |normal| normal.is_zero() as usize);
213213

214214
Ok(TreeStats {
215215
depth: 1 + left.depth.max(right.depth),
@@ -322,7 +322,10 @@ impl<'t, D: Distance> Reader<'t, D> {
322322
}
323323
}
324324
Node::SplitPlaneNormal(SplitPlaneNormal { normal, left, right }) => {
325-
let margin = D::margin_no_header(&normal, &query_leaf.vector);
325+
let margin = match normal {
326+
Some(normal) => D::margin_no_header(&normal, &query_leaf.vector),
327+
None => 0.0,
328+
};
326329
queue.push((OrderedFloat(D::pq_distance(dist, margin, Side::Left)), left));
327330
queue.push((OrderedFloat(D::pq_distance(dist, margin, Side::Right)), right));
328331
}
@@ -391,7 +394,7 @@ impl<'t, D: Distance> Reader<'t, D> {
391394
writeln!(writer, "\t\t{} [label=\"{}\"]", key.node.item, key.node.item,)?
392395
}
393396
Node::SplitPlaneNormal(SplitPlaneNormal { normal, left, right }) => {
394-
if normal.is_zero() {
397+
if normal.is_none() {
395398
writeln!(writer, "\t\t{} [color=red]", key.node.item)?;
396399
}
397400
writeln!(

src/tests/assets/v0_6/data.mdb

48 KB
Binary file not shown.

src/tests/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ use crate::{Database, Distance, MetadataCodec, NodeCodec, NodeMode, Reader};
1111

1212
mod binary_quantized;
1313
mod reader;
14+
mod upgrade;
1415
mod writer;
1516

1617
pub struct DatabaseHandle<D> {

src/tests/upgrade.rs

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
use heed::EnvOpenOptions;
2+
3+
use super::DatabaseHandle;
4+
use crate::distance::Euclidean;
5+
use crate::upgrade::from_0_6_to_0_7;
6+
use crate::Database;
7+
8+
#[test]
9+
fn upgrade_v0_6_to_v0_7() {
10+
let dir = tempfile::tempdir().unwrap();
11+
std::fs::copy("src/tests/assets/v0_6/data.mdb", dir.path().join("data.mdb")).unwrap();
12+
let env =
13+
unsafe { EnvOpenOptions::new().map_size(200 * 1024 * 1024).open(dir.path()) }.unwrap();
14+
let mut rtxn = env.read_txn().unwrap();
15+
let database: Database<Euclidean> = env.open_database(&mut rtxn, None).unwrap().unwrap();
16+
17+
let mut wtxn = env.write_txn().unwrap();
18+
19+
/* The original database in v0.6 looks like this:
20+
==================
21+
Dumping index 0
22+
Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1, 2, 3, 4, 5]>, roots: [0], distance: "euclidean" }
23+
Tree 0: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: Item(0), right: Tree(4), normal: [1.0000, 0.0000] })
24+
Tree 1: Descendants(Descendants { descendants: [1, 5] })
25+
Tree 2: Descendants(Descendants { descendants: [3, 4] })
26+
Tree 3: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: Tree(2), right: Item(2), normal: [0.0000, 0.0000] }) // The normal should become None after the upgrade
27+
Tree 4: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: Tree(1), right: Tree(3), normal: [0.0000, 0.0000] }) // The normal should become None after the upgrade
28+
Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000] })
29+
Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [1.0000, 0.0000] })
30+
Item 2: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [2.0000, 0.0000] })
31+
Item 3: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [3.0000, 0.0000] })
32+
Item 4: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [4.0000, 0.0000] })
33+
Item 5: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [5.0000, 0.0000] })
34+
*/
35+
36+
from_0_6_to_0_7(&mut rtxn, database, &mut wtxn, database).unwrap();
37+
wtxn.commit().unwrap();
38+
drop(rtxn);
39+
40+
let handle = DatabaseHandle { env: env.clone(), database, tempdir: dir };
41+
insta::assert_snapshot!(handle, @r#"
42+
==================
43+
Dumping index 0
44+
Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1, 2, 3, 4, 5]>, roots: [0], distance: "euclidean" }
45+
Version: Version { major: 0, minor: 6, patch: 1 }
46+
Tree 0: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: Item(0), right: Tree(4), normal: [1.0000, 0.0000] })
47+
Tree 1: Descendants(Descendants { descendants: [1, 5] })
48+
Tree 2: Descendants(Descendants { descendants: [3, 4] })
49+
Tree 3: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: Tree(2), right: Item(2), normal: "none" })
50+
Tree 4: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: Tree(1), right: Tree(3), normal: "none" })
51+
Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000] })
52+
Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [1.0000, 0.0000] })
53+
Item 2: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [2.0000, 0.0000] })
54+
Item 3: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [3.0000, 0.0000] })
55+
Item 4: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [4.0000, 0.0000] })
56+
Item 5: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [5.0000, 0.0000] })
57+
"#);
58+
}

0 commit comments

Comments
 (0)