Skip to content
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
ad16bd8
Remove a first batch of useless writes to tmp files and LMDB
irevoire May 28, 2025
e9d2c64
update the tmp_nodes to be readable
irevoire May 28, 2025
20b891e
Parallelize the split of large descendants
irevoire Jun 10, 2025
fe56192
randomize the item selection
irevoire Jun 10, 2025
ae9c4c2
get rids of the remap method on the tmp files
irevoire Jun 11, 2025
c118447
makes the clippy gods happy
irevoire Jun 11, 2025
dc14f4b
fmt
irevoire Jun 11, 2025
9c70222
Make the deletion of items faster
irevoire Jun 12, 2025
5a6dcb3
Merge with main, it was too complex to rebase
irevoire Jun 12, 2025
ae21f60
fmt
irevoire Jun 12, 2025
9c29c04
make clippy happy
irevoire Jun 12, 2025
a16fbdd
share the code between the writer and the incremental indexing. That …
irevoire Jun 16, 2025
d4d646d
handle error properly in the rayon pool
irevoire Jun 16, 2025
c1874ff
use try_send instead on send in case two errors happens at the same m…
irevoire Jun 16, 2025
2face0e
fmt+lints
irevoire Jun 16, 2025
5b8efc0
remove debug prints
irevoire Jun 17, 2025
e9209c2
remove new_fit_in_memory
irevoire Jun 17, 2025
af2cdca
fix panic when we have low memory
irevoire Jun 17, 2025
ecf0df4
check for error when inserting elements in a tree
irevoire Jun 17, 2025
5262b08
fix multiple bugs
irevoire Jun 17, 2025
fa62650
fix a new bug and add unit tests on fit_in_memory
irevoire Jun 17, 2025
3a94056
Add a large test with no memory to force the full indexing process to…
irevoire Jun 17, 2025
ccdafc2
fmt
irevoire Jun 17, 2025
29bc623
Update src/writer.rs
irevoire Jun 18, 2025
e0d18a0
fmt
irevoire Jun 18, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ thiserror = "2.0.9"
nohash = "0.2.0"
page_size = "0.6.0"
enum-iterator = "2.1.0"
thread_local = "1.1.8"
crossbeam = "0.8.4"

[dev-dependencies]
anyhow = "1.0.95"
Expand Down
2 changes: 1 addition & 1 deletion src/distance/cosine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ pub enum Cosine {}
#[repr(C)]
#[derive(Pod, Zeroable, Clone, Copy)]
pub struct NodeHeaderCosine {
norm: f32,
pub(crate) norm: f32,
}
impl fmt::Debug for NodeHeaderCosine {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
Expand Down
4 changes: 4 additions & 0 deletions src/distance/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,10 @@ pub trait Distance: Send + Sync + Sized + Clone + fmt::Debug + 'static {
) -> heed::Result<()> {
Ok(())
}

fn size_of_item(dimensions: usize) -> usize {
std::mem::size_of::<Self::Header>() + Self::VectorCodec::size_of_item(dimensions)
}
}

fn two_means<D: Distance, R: Rng>(
Expand Down
2 changes: 1 addition & 1 deletion src/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ impl<'a> heed::BytesEncode<'a> for MetadataCodec {

fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
let Metadata { dimensions, items, roots, distance } = item;
debug_assert!(!distance.as_bytes().iter().any(|&b| b == 0));
debug_assert!(!distance.as_bytes().contains(&0));

let mut output = Vec::with_capacity(
size_of::<u32>()
Expand Down
36 changes: 35 additions & 1 deletion src/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,20 @@ pub enum Node<'a, D: Distance> {
SplitPlaneNormal(SplitPlaneNormal<'a, D>),
}

impl<'a, D: Distance> Node<'a, D> {
pub fn into_owned(self) -> Node<'static, D> {
match self {
Node::Leaf(leaf) => Node::Leaf(leaf.into_owned()),
Node::Descendants(descendants) => Node::Descendants(Descendants {
descendants: Cow::Owned(descendants.descendants.into_owned()),
}),
Node::SplitPlaneNormal(split_plane_normal) => {
Node::SplitPlaneNormal(split_plane_normal.into_owned())
}
}
}
}

/// A node generic over the version of the database.
/// Should only be used while reading from the database.
#[derive(Clone, Debug)]
Expand All @@ -40,8 +54,15 @@ impl<'a, D: Distance> Node<'a, D> {
None
}
}
}

pub fn descendants(self) -> Option<Descendants<'a>> {
if let Node::Descendants(descendants) = self {
Some(descendants)
} else {
None
}
}
}
/// A leaf node which corresponds to the vector inputed
/// by the user and the distance header.
pub struct Leaf<'a, D: Distance> {
Expand Down Expand Up @@ -142,6 +163,19 @@ impl<D: Distance> fmt::Debug for SplitPlaneNormal<'_, D> {
}
}

impl<D: Distance> SplitPlaneNormal<'_, D> {
pub fn into_owned(self) -> SplitPlaneNormal<'static, D> {
SplitPlaneNormal {
left: self.left,
right: self.right,
normal: self.normal.map(|normal| Leaf {
header: normal.header,
vector: Cow::Owned(normal.vector.into_owned()),
}),
}
}
}

impl<D: Distance> Clone for SplitPlaneNormal<'_, D> {
fn clone(&self) -> Self {
Self { left: self.left, right: self.right, normal: self.normal.clone() }
Expand Down
Loading
Loading