From 5af1db506bf86c3e552a17900c9723094dfbe329 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 1 Oct 2025 14:55:12 +0200 Subject: [PATCH 1/6] Use heed v0.22.1-nested-rtxns --- Cargo.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 19aa205..ed3601a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "arroy" description = "Annoy-inspired Approximate Nearest Neighbors in Rust, based on LMDB and optimized for memory usage" -version = "0.6.3" +version = "0.6.4-nested-rtxns" documentation = "https://docs.rs/arroy" repository = "https://github.com/meilisearch/arroy" keywords = ["ANN-search", "Graph-algorithms", "Vector-Search", "Store"] @@ -16,7 +16,7 @@ edition = "2021" [dependencies] bytemuck = { version = "1.21.0", features = ["derive", "extern_crate_alloc"] } byteorder = "1.5.0" -heed = { version = "0.22.0", default-features = false } +heed = { version = "0.22.1-nested-rtxns", default-features = false } tracing = "0.1.41" memmap2 = "0.9.5" ordered-float = "4.6.0" @@ -35,7 +35,7 @@ approx = "0.5.1" arbitrary = { version = "1.4.1", features = ["derive"] } clap = { version = "4.5.24", features = ["derive"] } env_logger = "0.11.6" -hannoy = "0.0.4" +hannoy = { version = "0.0.9-nested-rtxns", git = "https://github.com/nnethercott/hannoy", "tag" = "v0.0.9-nested-rtxns", default-features = false } insta = "1.42.0" instant-distance = "0.6.1" proptest = "1.6.0" From 6c67285b8cc541df1c4c780bdf1e223f160489af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 1 Oct 2025 14:57:30 +0200 Subject: [PATCH 2/6] Make clippy happy --- src/reader.rs | 2 +- src/roaring.rs | 2 +- src/unaligned_vector/binary_quantized.rs | 2 +- src/unaligned_vector/f32.rs | 10 ++++------ src/unaligned_vector/mod.rs | 19 ++++++++----------- 5 files changed, 15 insertions(+), 20 deletions(-) diff --git a/src/reader.rs b/src/reader.rs index fb1a275..5a27373 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -273,7 +273,7 @@ impl<'t, D: Distance> Reader<'t, D> { /// Return a [`QueryBuilder`] that lets you configure and execute a search request. /// /// You must provide the number of items you want to receive. - pub fn nns(&self, count: usize) -> QueryBuilder { + pub fn nns(&self, count: usize) -> QueryBuilder<'_, D> { QueryBuilder { reader: self, count, search_k: None, oversampling: None, candidates: None } } diff --git a/src/roaring.rs b/src/roaring.rs index ed07ec8..951c128 100644 --- a/src/roaring.rs +++ b/src/roaring.rs @@ -16,7 +16,7 @@ impl heed::BytesDecode<'_> for RoaringBitmapCodec { impl heed::BytesEncode<'_> for RoaringBitmapCodec { type EItem = RoaringBitmap; - fn bytes_encode(item: &Self::EItem) -> Result, BoxedError> { + fn bytes_encode(item: &Self::EItem) -> Result, BoxedError> { let mut bytes = Vec::with_capacity(item.serialized_size()); item.serialize_into(&mut bytes)?; Ok(Cow::Owned(bytes)) diff --git a/src/unaligned_vector/binary_quantized.rs b/src/unaligned_vector/binary_quantized.rs index d4f4f29..e4f4260 100644 --- a/src/unaligned_vector/binary_quantized.rs +++ b/src/unaligned_vector/binary_quantized.rs @@ -15,7 +15,7 @@ const QUANTIZED_WORD_BYTES: usize = std::mem::size_of::(); pub enum BinaryQuantized {} impl UnalignedVectorCodec for BinaryQuantized { - fn from_bytes(bytes: &[u8]) -> Result>, SizeMismatch> { + fn from_bytes(bytes: &[u8]) -> Result>, SizeMismatch> { let rem = bytes.len() % QUANTIZED_WORD_BYTES; if rem == 0 { // safety: `UnalignedVector` is transparent diff --git a/src/unaligned_vector/f32.rs b/src/unaligned_vector/f32.rs index aa078de..78a4537 100644 --- a/src/unaligned_vector/f32.rs +++ b/src/unaligned_vector/f32.rs @@ -1,7 +1,5 @@ -use std::{ - borrow::Cow, - mem::{size_of, transmute}, -}; +use std::borrow::Cow; +use std::mem::{size_of, transmute}; use bytemuck::cast_slice; use byteorder::{ByteOrder, NativeEndian}; @@ -10,7 +8,7 @@ use super::{SizeMismatch, UnalignedVector, UnalignedVectorCodec}; impl UnalignedVectorCodec for f32 { /// Creates an unaligned slice of f32 wrapper from a slice of bytes. - fn from_bytes(bytes: &[u8]) -> Result>, SizeMismatch> { + fn from_bytes(bytes: &[u8]) -> Result>, SizeMismatch> { let rem = bytes.len() % size_of::(); if rem == 0 { // safety: `UnalignedF32Slice` is transparent @@ -22,7 +20,7 @@ impl UnalignedVectorCodec for f32 { /// Creates an unaligned slice of f32 wrapper from a slice of f32. /// The slice is already known to be of the right length. - fn from_slice(slice: &[f32]) -> Cow> { + fn from_slice(slice: &[f32]) -> Cow<'_, UnalignedVector> { Self::from_bytes(cast_slice(slice)).unwrap() } diff --git a/src/unaligned_vector/mod.rs b/src/unaligned_vector/mod.rs index 1b0cc89..9cd1e07 100644 --- a/src/unaligned_vector/mod.rs +++ b/src/unaligned_vector/mod.rs @@ -1,12 +1,9 @@ -use std::{ - borrow::{Borrow, Cow}, - fmt, - marker::PhantomData, - mem::transmute, -}; +use std::borrow::{Borrow, Cow}; +use std::fmt; +use std::marker::PhantomData; +use std::mem::transmute; pub use binary_quantized::BinaryQuantized; - use bytemuck::pod_collect_to_vec; mod binary_quantized; @@ -19,11 +16,11 @@ mod binary_quantized_test; pub trait UnalignedVectorCodec: std::borrow::ToOwned + Sized { /// Creates an unaligned vector from a slice of bytes. /// Don't allocate. - fn from_bytes(bytes: &[u8]) -> Result>, SizeMismatch>; + fn from_bytes(bytes: &[u8]) -> Result>, SizeMismatch>; /// Creates an unaligned vector from a slice of f32. /// May allocate depending on the codec. - fn from_slice(slice: &[f32]) -> Cow>; + fn from_slice(slice: &[f32]) -> Cow<'_, UnalignedVector>; /// Creates an unaligned slice of f32 wrapper from a slice of f32. /// The slice is already known to be of the right length. @@ -64,13 +61,13 @@ impl UnalignedVector { /// Creates an unaligned vector from a slice of bytes. /// Don't allocate. - pub fn from_bytes(bytes: &[u8]) -> Result>, SizeMismatch> { + pub fn from_bytes(bytes: &[u8]) -> Result>, SizeMismatch> { Codec::from_bytes(bytes) } /// Creates an unaligned vector from a slice of f32. /// May allocate depending on the codec. - pub fn from_slice(slice: &[f32]) -> Cow> { + pub fn from_slice(slice: &[f32]) -> Cow<'_, UnalignedVector> { Codec::from_slice(slice) } From 1b0664e1119a0c1c978c0f7470d62a698f8fb5ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 1 Oct 2025 17:11:03 +0200 Subject: [PATCH 3/6] Fix tests --- src/tests/binary_quantized.rs | 2 +- src/tests/writer.rs | 56 +++++++++++++++++------------------ 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/src/tests/binary_quantized.rs b/src/tests/binary_quantized.rs index 10f41ee..d5f1bca 100644 --- a/src/tests/binary_quantized.rs +++ b/src/tests/binary_quantized.rs @@ -48,7 +48,7 @@ fn write_and_retrieve_binary_quantized_vector() { ================== Dumping index 0 Root: Metadata { dimensions: 16, items: RoaringBitmap<[0]>, roots: [0], distance: "binary quantized euclidean" } - Version: Version { major: 0, minor: 6, patch: 3 } + Version: Version { major: 0, minor: 6, patch: 4 } Tree 0: Descendants(Descendants { descendants: [0] }) Item 0: Leaf(Leaf { header: NodeHeaderBinaryQuantizedEuclidean { bias: 0.0 }, vector: [-1.0000, -1.0000, 1.0000, -1.0000, 1.0000, 1.0000, -1.0000, 1.0000, -1.0000, -1.0000, "other ..."] }) "###); diff --git a/src/tests/writer.rs b/src/tests/writer.rs index 56b3ba6..6f08c91 100644 --- a/src/tests/writer.rs +++ b/src/tests/writer.rs @@ -114,7 +114,7 @@ fn use_u32_max_minus_one_for_a_vec() { ================== Dumping index 0 Root: Metadata { dimensions: 3, items: RoaringBitmap<[4294967294]>, roots: [0], distance: "euclidean" } - Version: Version { major: 0, minor: 6, patch: 3 } + Version: Version { major: 0, minor: 6, patch: 4 } Tree 0: Descendants(Descendants { descendants: [4294967294] }) Item 4294967294: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 1.0000, 2.0000] }) "###); @@ -134,7 +134,7 @@ fn use_u32_max_for_a_vec() { ================== Dumping index 0 Root: Metadata { dimensions: 3, items: RoaringBitmap<[4294967295]>, roots: [0], distance: "euclidean" } - Version: Version { major: 0, minor: 6, patch: 3 } + Version: Version { major: 0, minor: 6, patch: 4 } Tree 0: Descendants(Descendants { descendants: [4294967295] }) Item 4294967295: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 1.0000, 2.0000] }) "###); @@ -154,7 +154,7 @@ fn write_one_vector() { ================== Dumping index 0 Root: Metadata { dimensions: 3, items: RoaringBitmap<[0]>, roots: [0], distance: "euclidean" } - Version: Version { major: 0, minor: 6, patch: 3 } + Version: Version { major: 0, minor: 6, patch: 4 } Tree 0: Descendants(Descendants { descendants: [0] }) Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 1.0000, 2.0000] }) "###); @@ -174,7 +174,7 @@ fn write_one_vector_in_one_tree() { ================== Dumping index 0 Root: Metadata { dimensions: 3, items: RoaringBitmap<[0]>, roots: [0], distance: "euclidean" } - Version: Version { major: 0, minor: 6, patch: 3 } + Version: Version { major: 0, minor: 6, patch: 4 } Tree 0: Descendants(Descendants { descendants: [0] }) Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 1.0000, 2.0000] }) "###); @@ -194,7 +194,7 @@ fn write_one_vector_in_multiple_trees() { ================== Dumping index 0 Root: Metadata { dimensions: 3, items: RoaringBitmap<[0]>, roots: [0], distance: "euclidean" } - Version: Version { major: 0, minor: 6, patch: 3 } + Version: Version { major: 0, minor: 6, patch: 4 } Tree 0: Descendants(Descendants { descendants: [0] }) Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 1.0000, 2.0000] }) "###); @@ -218,7 +218,7 @@ fn write_vectors_until_there_is_a_descendants() { ================== Dumping index 0 Root: Metadata { dimensions: 3, items: RoaringBitmap<[0, 1, 2]>, roots: [0], distance: "euclidean" } - Version: Version { major: 0, minor: 6, patch: 3 } + Version: Version { major: 0, minor: 6, patch: 4 } Tree 0: Descendants(Descendants { descendants: [0, 1, 2] }) Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000, 0.0000] }) Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [1.0000, 1.0000, 1.0000] }) @@ -296,31 +296,31 @@ fn write_multiple_indexes() { ================== Dumping index 0 Root: Metadata { dimensions: 3, items: RoaringBitmap<[0]>, roots: [0], distance: "euclidean" } - Version: Version { major: 0, minor: 6, patch: 3 } + Version: Version { major: 0, minor: 6, patch: 4 } Tree 0: Descendants(Descendants { descendants: [0] }) Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 1.0000, 2.0000] }) ================== Dumping index 1 Root: Metadata { dimensions: 3, items: RoaringBitmap<[0]>, roots: [0], distance: "euclidean" } - Version: Version { major: 0, minor: 6, patch: 3 } + Version: Version { major: 0, minor: 6, patch: 4 } Tree 0: Descendants(Descendants { descendants: [0] }) Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 1.0000, 2.0000] }) ================== Dumping index 2 Root: Metadata { dimensions: 3, items: RoaringBitmap<[0]>, roots: [0], distance: "euclidean" } - Version: Version { major: 0, minor: 6, patch: 3 } + Version: Version { major: 0, minor: 6, patch: 4 } Tree 0: Descendants(Descendants { descendants: [0] }) Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 1.0000, 2.0000] }) ================== Dumping index 3 Root: Metadata { dimensions: 3, items: RoaringBitmap<[0]>, roots: [0], distance: "euclidean" } - Version: Version { major: 0, minor: 6, patch: 3 } + Version: Version { major: 0, minor: 6, patch: 4 } Tree 0: Descendants(Descendants { descendants: [0] }) Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 1.0000, 2.0000] }) ================== Dumping index 4 Root: Metadata { dimensions: 3, items: RoaringBitmap<[0]>, roots: [0], distance: "euclidean" } - Version: Version { major: 0, minor: 6, patch: 3 } + Version: Version { major: 0, minor: 6, patch: 4 } Tree 0: Descendants(Descendants { descendants: [0] }) Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 1.0000, 2.0000] }) "###); @@ -420,7 +420,7 @@ fn delete_one_item_in_a_one_item_db() { ================== Dumping index 0 Root: Metadata { dimensions: 2, items: RoaringBitmap<[0]>, roots: [0], distance: "euclidean" } - Version: Version { major: 0, minor: 6, patch: 3 } + Version: Version { major: 0, minor: 6, patch: 4 } Tree 0: Descendants(Descendants { descendants: [0] }) Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000] }) "###); @@ -437,7 +437,7 @@ fn delete_one_item_in_a_one_item_db() { ================== Dumping index 0 Root: Metadata { dimensions: 2, items: RoaringBitmap<[]>, roots: [], distance: "euclidean" } - Version: Version { major: 0, minor: 6, patch: 3 } + Version: Version { major: 0, minor: 6, patch: 4 } "###); let rtxn = handle.env.read_txn().unwrap(); @@ -463,7 +463,7 @@ fn delete_document_in_an_empty_index_74() { ================== Dumping index 0 Root: Metadata { dimensions: 2, items: RoaringBitmap<[0]>, roots: [0], distance: "euclidean" } - Version: Version { major: 0, minor: 6, patch: 3 } + Version: Version { major: 0, minor: 6, patch: 4 } Tree 0: Descendants(Descendants { descendants: [0] }) Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000] }) "###); @@ -489,11 +489,11 @@ fn delete_document_in_an_empty_index_74() { ================== Dumping index 0 Root: Metadata { dimensions: 2, items: RoaringBitmap<[]>, roots: [], distance: "euclidean" } - Version: Version { major: 0, minor: 6, patch: 3 } + Version: Version { major: 0, minor: 6, patch: 4 } ================== Dumping index 1 Root: Metadata { dimensions: 2, items: RoaringBitmap<[]>, roots: [], distance: "euclidean" } - Version: Version { major: 0, minor: 6, patch: 3 } + Version: Version { major: 0, minor: 6, patch: 4 } "###); let rtxn = handle.env.read_txn().unwrap(); @@ -519,7 +519,7 @@ fn delete_one_item_in_a_descendant() { ================== Dumping index 0 Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1]>, roots: [0], distance: "euclidean" } - Version: Version { major: 0, minor: 6, patch: 3 } + Version: Version { major: 0, minor: 6, patch: 4 } Tree 0: Descendants(Descendants { descendants: [0, 1] }) Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000] }) Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [1.0000, 0.0000] }) @@ -537,7 +537,7 @@ fn delete_one_item_in_a_descendant() { ================== Dumping index 0 Root: Metadata { dimensions: 2, items: RoaringBitmap<[1]>, roots: [0], distance: "euclidean" } - Version: Version { major: 0, minor: 6, patch: 3 } + Version: Version { major: 0, minor: 6, patch: 4 } Tree 0: Descendants(Descendants { descendants: [1] }) Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [1.0000, 0.0000] }) "###); @@ -581,7 +581,7 @@ fn delete_one_leaf_in_a_split() { ================== Dumping index 0 Root: Metadata { dimensions: 2, items: RoaringBitmap<[1, 2]>, roots: [0], distance: "euclidean" } - Version: Version { major: 0, minor: 6, patch: 3 } + Version: Version { major: 0, minor: 6, patch: 4 } Tree 0: Descendants(Descendants { descendants: [1, 2] }) Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [1.0000, 0.0000] }) Item 2: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [2.0000, 0.0000] }) @@ -604,7 +604,7 @@ fn delete_one_item_in_a_single_document_database() { ================== Dumping index 0 Root: Metadata { dimensions: 2, items: RoaringBitmap<[0]>, roots: [0], distance: "cosine" } - Version: Version { major: 0, minor: 6, patch: 3 } + Version: Version { major: 0, minor: 6, patch: 4 } Tree 0: Descendants(Descendants { descendants: [0] }) Item 0: Leaf(Leaf { header: NodeHeaderCosine { norm: 0.0 }, vector: [0.0000, 0.0000] }) "###); @@ -621,7 +621,7 @@ fn delete_one_item_in_a_single_document_database() { ================== Dumping index 0 Root: Metadata { dimensions: 2, items: RoaringBitmap<[]>, roots: [], distance: "cosine" } - Version: Version { major: 0, minor: 6, patch: 3 } + Version: Version { major: 0, minor: 6, patch: 4 } "###); } @@ -715,7 +715,7 @@ fn add_one_item_incrementally_in_an_empty_db() { ================== Dumping index 0 Root: Metadata { dimensions: 2, items: RoaringBitmap<[]>, roots: [], distance: "euclidean" } - Version: Version { major: 0, minor: 6, patch: 3 } + Version: Version { major: 0, minor: 6, patch: 4 } "###); let mut wtxn = handle.env.write_txn().unwrap(); @@ -728,7 +728,7 @@ fn add_one_item_incrementally_in_an_empty_db() { ================== Dumping index 0 Root: Metadata { dimensions: 2, items: RoaringBitmap<[0]>, roots: [0], distance: "euclidean" } - Version: Version { major: 0, minor: 6, patch: 3 } + Version: Version { major: 0, minor: 6, patch: 4 } Tree 0: Descendants(Descendants { descendants: [0] }) Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000] }) "###); @@ -748,7 +748,7 @@ fn add_one_item_incrementally_in_a_one_item_db() { ================== Dumping index 0 Root: Metadata { dimensions: 2, items: RoaringBitmap<[0]>, roots: [0], distance: "euclidean" } - Version: Version { major: 0, minor: 6, patch: 3 } + Version: Version { major: 0, minor: 6, patch: 4 } Tree 0: Descendants(Descendants { descendants: [0] }) Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000] }) "###); @@ -763,7 +763,7 @@ fn add_one_item_incrementally_in_a_one_item_db() { ================== Dumping index 0 Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1]>, roots: [0], distance: "euclidean" } - Version: Version { major: 0, minor: 6, patch: 3 } + Version: Version { major: 0, minor: 6, patch: 4 } Tree 0: Descendants(Descendants { descendants: [0, 1] }) Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000] }) Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [1.0000, 0.0000] }) @@ -785,7 +785,7 @@ fn add_one_item_incrementally_to_create_a_split_node() { ================== Dumping index 0 Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1]>, roots: [0], distance: "euclidean" } - Version: Version { major: 0, minor: 6, patch: 3 } + Version: Version { major: 0, minor: 6, patch: 4 } Tree 0: Descendants(Descendants { descendants: [0, 1] }) Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000] }) Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [1.0000, 0.0000] }) @@ -801,7 +801,7 @@ fn add_one_item_incrementally_to_create_a_split_node() { ================== Dumping index 0 Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1, 2]>, roots: [2], distance: "euclidean" } - Version: Version { major: 0, minor: 6, patch: 3 } + Version: Version { major: 0, minor: 6, patch: 4 } Tree 1: Descendants(Descendants { descendants: [1, 2] }) Tree 2: SplitPlaneNormal(SplitPlaneNormal { left: Item(0), right: Tree(1), normal: [1.0000, 0.0000] }) Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000] }) @@ -1112,7 +1112,7 @@ fn append() { ================== Dumping index 1 Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1]>, roots: [0], distance: "euclidean" } - Version: Version { major: 0, minor: 6, patch: 3 } + Version: Version { major: 0, minor: 6, patch: 4 } Tree 0: Descendants(Descendants { descendants: [0, 1] }) Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000] }) Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.1000, 0.1000] }) From 2ceffd951bfda65f6267b7307ff83ec01cc291ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 1 Oct 2025 17:11:12 +0200 Subject: [PATCH 4/6] Remove hannoy from arroy --- Cargo.toml | 1 - src/tests/writer.rs | 61 --------------------------------------------- 2 files changed, 62 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ed3601a..d48a9e0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,7 +35,6 @@ approx = "0.5.1" arbitrary = { version = "1.4.1", features = ["derive"] } clap = { version = "4.5.24", features = ["derive"] } env_logger = "0.11.6" -hannoy = { version = "0.0.9-nested-rtxns", git = "https://github.com/nnethercott/hannoy", "tag" = "v0.0.9-nested-rtxns", default-features = false } insta = "1.42.0" instant-distance = "0.6.1" proptest = "1.6.0" diff --git a/src/tests/writer.rs b/src/tests/writer.rs index 6f08c91..eece68e 100644 --- a/src/tests/writer.rs +++ b/src/tests/writer.rs @@ -39,67 +39,6 @@ fn clear_small_database() { wtxn.commit().unwrap(); } -#[test] -fn convert_from_arroy_to_hannoy() { - // let handle = create_database::(); - let _ = rayon::ThreadPoolBuilder::new().num_threads(1).build_global(); - let dir = tempfile::tempdir().unwrap(); - let env = unsafe { heed::EnvOpenOptions::new().map_size(200 * 1024 * 1024).open(dir.path()) } - .unwrap(); - let mut wtxn = env.write_txn().unwrap(); - let database: hannoy::Database = - env.create_database(&mut wtxn, None).unwrap(); - wtxn.commit().unwrap(); - - let mut rng = rng(); - let mut wtxn = env.write_txn().unwrap(); - - let mut db_indexes: Vec = (0..10).collect(); - db_indexes.shuffle(&mut rng); - - for index in db_indexes.iter().copied() { - let writer = hannoy::Writer::new(database, index, 1024); - - // We're going to write 100 vectors per index - for i in 0..100 { - let vector: [f32; 1024] = std::array::from_fn(|_| rng.gen()); - writer.add_item(&mut wtxn, i, &vector).unwrap(); - } - writer.builder(&mut rng).build::<16, 32>(&mut wtxn).unwrap(); - } - wtxn.commit().unwrap(); - - // Now it's time to convert the indexes - - let mut wtxn = env.write_txn().unwrap(); - let rtxn = env.read_txn().unwrap(); - let database: crate::Database = env.open_database(&mut wtxn, None).unwrap().unwrap(); - - db_indexes.shuffle(&mut rng); - - for index in db_indexes { - let pre_commit_hannoy_reader = - hannoy::Reader::::open(&rtxn, index, database.remap_types()) - .unwrap(); - - let writer = Writer::new(database, index, pre_commit_hannoy_reader.dimensions()); - let mut builder = writer.builder(&mut rng); - builder.prepare_hannoy_conversion(&mut wtxn).unwrap(); - assert!(writer.need_build(&mut wtxn).unwrap()); - builder.build(&mut wtxn).unwrap(); - - for result in pre_commit_hannoy_reader.iter(&rtxn).unwrap() { - let (item_id, vector) = result.unwrap(); - let reader = Reader::open(&wtxn, index, database).unwrap(); - assert_eq!(reader.item_vector(&wtxn, item_id).unwrap().as_deref(), Some(&vector[..])); - let mut found = reader.nns(1).by_vector(&wtxn, &vector).unwrap(); - let (found_item_id, found_distance) = found.pop().unwrap(); - assert_eq!(found_item_id, item_id); - approx::assert_abs_diff_eq!(found_distance, 0.0); - } - } -} - #[test] fn use_u32_max_minus_one_for_a_vec() { let handle = create_database::(); From 9f83e159195e030da9ec54656eec8025b6a7ce33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 1 Oct 2025 17:14:24 +0200 Subject: [PATCH 5/6] Bump the Rust toolchain version to 1.89 --- .github/workflows/rust.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index d60f1a7..28e7cec 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -24,7 +24,7 @@ jobs: steps: - uses: actions/checkout@v1 - - uses: dtolnay/rust-toolchain@1.85 + - uses: dtolnay/rust-toolchain@1.89 - uses: actions-rs/cargo@v1 with: command: build @@ -41,7 +41,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v1 - - uses: dtolnay/rust-toolchain@1.85 + - uses: dtolnay/rust-toolchain@1.89 with: profile: minimal components: clippy, rustfmt From 9ef95432b169d0903375a5f640ddd6b218132e9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 1 Oct 2025 17:16:57 +0200 Subject: [PATCH 6/6] Make the new Clippy happy --- src/metadata.rs | 2 +- src/spaces/simple.rs | 4 ++-- src/unaligned_vector/binary_quantized.rs | 2 +- src/writer.rs | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/metadata.rs b/src/metadata.rs index 6b0814a..79c5113 100644 --- a/src/metadata.rs +++ b/src/metadata.rs @@ -23,7 +23,7 @@ impl<'a> heed::BytesEncode<'a> for MetadataCodec { fn bytes_encode(item: &'a Self::EItem) -> Result, BoxedError> { let Metadata { dimensions, items, roots, distance } = item; - debug_assert!(!distance.as_bytes().iter().any(|&b| b == 0)); + debug_assert!(!distance.as_bytes().contains(&0)); let mut output = Vec::with_capacity( size_of::() diff --git a/src/spaces/simple.rs b/src/spaces/simple.rs index fbf4fc2..dac121f 100644 --- a/src/spaces/simple.rs +++ b/src/spaces/simple.rs @@ -102,8 +102,8 @@ pub fn dot_product_non_optimized(u: &UnalignedVector, v: &UnalignedVector -2 /// 01 => 0 diff --git a/src/unaligned_vector/binary_quantized.rs b/src/unaligned_vector/binary_quantized.rs index e4f4260..f5ad17a 100644 --- a/src/unaligned_vector/binary_quantized.rs +++ b/src/unaligned_vector/binary_quantized.rs @@ -94,7 +94,7 @@ unsafe fn from_slice_neon(slice: &[f32]) -> Vec { // The size of the returned vector must be a multiple of a word let remaining = slice.len() % QUANTIZED_WORD_BYTES; let mut len = iterations; - if len % QUANTIZED_WORD_BYTES != 0 { + if !len.is_multiple_of(QUANTIZED_WORD_BYTES) { len += QUANTIZED_WORD_BYTES - len % QUANTIZED_WORD_BYTES; } else if remaining != 0 { // if we generated a valid number of Word but we're missing a few bits diff --git a/src/writer.rs b/src/writer.rs index d8070ca..8bcee5d 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -899,7 +899,7 @@ impl Writer { tmp_nodes, )?; - return Ok((new_id, new_items)); + Ok((new_id, new_items)) } } NodeMode::Tree => {