Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 13 additions & 9 deletions src/distance/binary_quantized_cosine.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::borrow::Cow;
use std::fmt;

use bytemuck::{Pod, Zeroable};
use rand::Rng;
Expand All @@ -20,10 +20,17 @@ pub enum BinaryQuantizedCosine {}

/// The header of `BinaryQuantizedCosine` leaf nodes.
#[repr(C)]
#[derive(Pod, Zeroable, Debug, Clone, Copy)]
#[derive(Pod, Zeroable, Clone, Copy)]
pub struct NodeHeaderBinaryQuantizedCosine {
norm: f32,
}
impl fmt::Debug for NodeHeaderBinaryQuantizedCosine {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("NodeHeaderBinaryQuantizedCosine")
.field("norm", &format!("{:.4}", self.norm))
.finish()
}
}

impl Distance for BinaryQuantizedCosine {
const DEFAULT_OVERSAMPLING: usize = 3;
Expand Down Expand Up @@ -72,7 +79,7 @@ impl Distance for BinaryQuantizedCosine {
fn create_split<'a, R: Rng>(
children: &'a ImmutableSubsetLeafs<Self>,
rng: &mut R,
) -> heed::Result<Cow<'a, UnalignedVector<Self::VectorCodec>>> {
) -> heed::Result<Leaf<'a, Self>> {
let [node_p, node_q] = two_means::<Self, Cosine, R>(rng, children, true)?;
let vector: Vec<f32> =
node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect();
Expand All @@ -83,13 +90,10 @@ impl Distance for BinaryQuantizedCosine {
};
Self::normalize(&mut normal);

Ok(normal.vector)
Ok(normal)
}

fn margin_no_header(
p: &UnalignedVector<Self::VectorCodec>,
q: &UnalignedVector<Self::VectorCodec>,
) -> f32 {
dot_product_binary_quantized(p, q)
fn margin(p: &Leaf<Self>, q: &Leaf<Self>) -> f32 {
dot_product_binary_quantized(&p.vector, &q.vector)
}
}
32 changes: 20 additions & 12 deletions src/distance/binary_quantized_euclidean.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::borrow::Cow;
use std::fmt;

use bytemuck::{Pod, Zeroable};
use rand::Rng;
Expand All @@ -21,11 +21,18 @@ pub enum BinaryQuantizedEuclidean {}

/// The header of `BinaryQuantizedEuclidean` leaf nodes.
#[repr(C)]
#[derive(Pod, Zeroable, Debug, Clone, Copy)]
#[derive(Pod, Zeroable, Clone, Copy)]
pub struct NodeHeaderBinaryQuantizedEuclidean {
/// An extra constant term to determine the offset of the plane
bias: f32,
}
impl fmt::Debug for NodeHeaderBinaryQuantizedEuclidean {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("NodeHeaderBinaryQuantizedEuclidean")
.field("bias", &format!("{:.4}", self.bias))
.finish()
}
}

impl Distance for BinaryQuantizedEuclidean {
const DEFAULT_OVERSAMPLING: usize = 3;
Expand Down Expand Up @@ -59,29 +66,30 @@ impl Distance for BinaryQuantizedEuclidean {
fn create_split<'a, R: Rng>(
children: &'a ImmutableSubsetLeafs<Self>,
rng: &mut R,
) -> heed::Result<Cow<'a, UnalignedVector<Self::VectorCodec>>> {
) -> heed::Result<Leaf<'a, Self>> {
let [node_p, node_q] = two_means::<Self, Euclidean, R>(rng, children, false)?;
let vector: Vec<f32> =
node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect();
let mut normal = Leaf {
header: NodeHeaderBinaryQuantizedEuclidean { bias: 0.0 },
vector: UnalignedVector::from_slice(&vector),
vector: UnalignedVector::from_vec(vector),
};
Self::normalize(&mut normal);

Ok(Cow::Owned(normal.vector.into_owned()))
normal.header.bias = normal
.vector
.iter()
.zip(UnalignedVector::<BinaryQuantized>::from_vec(node_p.vector.to_vec()).iter())
.zip(UnalignedVector::<BinaryQuantized>::from_vec(node_q.vector.to_vec()).iter())
.map(|((n, p), q)| -n * (p + q) / 2.0)
.sum();

Ok(normal)
}

fn margin(p: &Leaf<Self>, q: &Leaf<Self>) -> f32 {
p.header.bias + dot_product_binary_quantized(&p.vector, &q.vector)
}

fn margin_no_header(
p: &UnalignedVector<Self::VectorCodec>,
q: &UnalignedVector<Self::VectorCodec>,
) -> f32 {
dot_product_binary_quantized(p, q)
}
}

/// For the binary quantized squared euclidean distance:
Expand Down
32 changes: 20 additions & 12 deletions src/distance/binary_quantized_manhattan.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::borrow::Cow;
use std::fmt;

use bytemuck::{Pod, Zeroable};
use rand::Rng;
Expand All @@ -20,11 +20,18 @@ pub enum BinaryQuantizedManhattan {}

/// The header of BinaryQuantizedEuclidean leaf nodes.
#[repr(C)]
#[derive(Pod, Zeroable, Debug, Clone, Copy)]
#[derive(Pod, Zeroable, Clone, Copy)]
pub struct NodeHeaderBinaryQuantizedManhattan {
/// An extra constant term to determine the offset of the plane
bias: f32,
}
impl fmt::Debug for NodeHeaderBinaryQuantizedManhattan {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("NodeHeaderBinaryQuantizedManhattan")
.field("bias", &format!("{:.4}", self.bias))
.finish()
}
}

impl Distance for BinaryQuantizedManhattan {
const DEFAULT_OVERSAMPLING: usize = 3;
Expand Down Expand Up @@ -63,29 +70,30 @@ impl Distance for BinaryQuantizedManhattan {
fn create_split<'a, R: Rng>(
children: &'a ImmutableSubsetLeafs<Self>,
rng: &mut R,
) -> heed::Result<Cow<'a, UnalignedVector<Self::VectorCodec>>> {
) -> heed::Result<Leaf<'a, Self>> {
let [node_p, node_q] = two_means::<Self, Manhattan, R>(rng, children, false)?;
let vector: Vec<f32> =
node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect();
let mut normal = Leaf {
header: NodeHeaderBinaryQuantizedManhattan { bias: 0.0 },
vector: UnalignedVector::from_slice(&vector),
vector: UnalignedVector::from_vec(vector),
};
Self::normalize(&mut normal);

Ok(Cow::Owned(normal.vector.into_owned()))
normal.header.bias = normal
.vector
.iter()
.zip(UnalignedVector::<BinaryQuantized>::from_vec(node_p.vector.to_vec()).iter())
.zip(UnalignedVector::<BinaryQuantized>::from_vec(node_q.vector.to_vec()).iter())
.map(|((n, p), q)| -n * (p + q) / 2.0)
.sum();

Ok(normal)
}

fn margin(p: &Leaf<Self>, q: &Leaf<Self>) -> f32 {
p.header.bias + dot_product_binary_quantized(&p.vector, &q.vector)
}

fn margin_no_header(
p: &UnalignedVector<Self::VectorCodec>,
q: &UnalignedVector<Self::VectorCodec>,
) -> f32 {
dot_product_binary_quantized(p, q)
}
}

/// For the binary quantized manhattan distance:
Expand Down
20 changes: 11 additions & 9 deletions src/distance/cosine.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::borrow::Cow;
use std::fmt;

use bytemuck::{Pod, Zeroable};
use rand::Rng;
Expand All @@ -18,10 +18,15 @@ pub enum Cosine {}

/// The header of Cosine leaf nodes.
#[repr(C)]
#[derive(Pod, Zeroable, Debug, Clone, Copy)]
#[derive(Pod, Zeroable, Clone, Copy)]
pub struct NodeHeaderCosine {
norm: f32,
}
impl fmt::Debug for NodeHeaderCosine {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("NodeHeaderCosine").field("norm", &format!("{:.4}", self.norm)).finish()
}
}

impl Distance for Cosine {
type Header = NodeHeaderCosine;
Expand Down Expand Up @@ -68,21 +73,18 @@ impl Distance for Cosine {
fn create_split<'a, R: Rng>(
children: &'a ImmutableSubsetLeafs<Self>,
rng: &mut R,
) -> heed::Result<Cow<'a, UnalignedVector<Self::VectorCodec>>> {
) -> heed::Result<Leaf<'a, Self>> {
let [node_p, node_q] = two_means(rng, children, true)?;
let vector: Vec<f32> =
node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect();
let unaligned_vector = UnalignedVector::from_vec(vector);
let mut normal = Leaf { header: NodeHeaderCosine { norm: 0.0 }, vector: unaligned_vector };
Self::normalize(&mut normal);

Ok(normal.vector)
Ok(normal)
}

fn margin_no_header(
p: &UnalignedVector<Self::VectorCodec>,
q: &UnalignedVector<Self::VectorCodec>,
) -> f32 {
dot_product(p, q)
fn margin(p: &Leaf<Self>, q: &Leaf<Self>) -> f32 {
dot_product(&p.vector, &q.vector)
}
}
23 changes: 12 additions & 11 deletions src/distance/dot_product.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::borrow::Cow;
use std::fmt;

use bytemuck::{Pod, Zeroable};
use heed::{RwPrefix, RwTxn};
Expand All @@ -21,12 +21,20 @@ pub enum DotProduct {}

/// The header of DotProduct leaf nodes.
#[repr(C)]
#[derive(Pod, Zeroable, Debug, Clone, Copy)]
#[derive(Pod, Zeroable, Clone, Copy)]
pub struct NodeHeaderDotProduct {
extra_dim: f32,
/// An extra constant term to determine the offset of the plane
norm: f32,
}
impl fmt::Debug for NodeHeaderDotProduct {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("NodeHeaderDotProduct")
.field("extra_dim", &format!("{:.4}", self.extra_dim))
.field("norm", &format!("{:.4}", self.norm))
.finish()
}
}

impl Distance for DotProduct {
type Header = NodeHeaderDotProduct;
Expand Down Expand Up @@ -90,7 +98,7 @@ impl Distance for DotProduct {
fn create_split<'a, R: Rng>(
children: &'a ImmutableSubsetLeafs<Self>,
rng: &mut R,
) -> heed::Result<Cow<'a, UnalignedVector<Self::VectorCodec>>> {
) -> heed::Result<Leaf<'a, Self>> {
let [node_p, node_q] = two_means(rng, children, true)?;
let vector: Vec<f32> =
node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect();
Expand All @@ -101,20 +109,13 @@ impl Distance for DotProduct {
normal.header.extra_dim = node_p.header.extra_dim - node_q.header.extra_dim;
Self::normalize(&mut normal);

Ok(normal.vector)
Ok(normal)
}

fn margin(p: &Leaf<Self>, q: &Leaf<Self>) -> f32 {
dot_product(&p.vector, &q.vector) + p.header.extra_dim * q.header.extra_dim
}

fn margin_no_header(
p: &UnalignedVector<Self::VectorCodec>,
q: &UnalignedVector<Self::VectorCodec>,
) -> f32 {
dot_product(p, q)
}

fn preprocess(
wtxn: &mut RwTxn,
new_iter: impl for<'a> Fn(
Expand Down
24 changes: 11 additions & 13 deletions src/distance/euclidean.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::borrow::Cow;
use std::fmt;

use bytemuck::{Pod, Zeroable};
use rand::Rng;
Expand All @@ -19,11 +19,16 @@ pub enum Euclidean {}

/// The header of Euclidean leaf nodes.
#[repr(C)]
#[derive(Pod, Zeroable, Debug, Clone, Copy)]
#[derive(Pod, Zeroable, Clone, Copy)]
pub struct NodeHeaderEuclidean {
/// An extra constant term to determine the offset of the plane
bias: f32,
}
impl fmt::Debug for NodeHeaderEuclidean {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("NodeHeaderEuclidean").field("bias", &format!("{:.4}", self.bias)).finish()
}
}

impl Distance for Euclidean {
type Header = NodeHeaderEuclidean;
Expand All @@ -50,7 +55,7 @@ impl Distance for Euclidean {
fn create_split<'a, R: Rng>(
children: &'a ImmutableSubsetLeafs<Self>,
rng: &mut R,
) -> heed::Result<Cow<'a, UnalignedVector<Self::VectorCodec>>> {
) -> heed::Result<Leaf<'a, Self>> {
let [node_p, node_q] = two_means(rng, children, false)?;
let vector: Vec<_> =
node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect();
Expand All @@ -68,17 +73,10 @@ impl Distance for Euclidean {
.map(|((n, p), q)| -n * (p + q) / 2.0)
.sum();

Ok(normal.vector)
}

fn margin(p: &Leaf<Self>, q: &Leaf<Self>) -> f32 {
p.header.bias + dot_product(&p.vector, &q.vector)
Ok(normal)
}

fn margin_no_header(
p: &UnalignedVector<Self::VectorCodec>,
q: &UnalignedVector<Self::VectorCodec>,
) -> f32 {
dot_product(p, q)
fn margin(n: &Leaf<Self>, q: &Leaf<Self>) -> f32 {
n.header.bias + dot_product(&n.vector, &q.vector)
}
}
20 changes: 9 additions & 11 deletions src/distance/manhattan.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::borrow::Cow;
use std::fmt;

use bytemuck::{Pod, Zeroable};
use rand::Rng;
Expand All @@ -18,11 +18,16 @@ pub enum Manhattan {}

/// The header of Manhattan leaf nodes.
#[repr(C)]
#[derive(Pod, Zeroable, Debug, Clone, Copy)]
#[derive(Pod, Zeroable, Clone, Copy)]
pub struct NodeHeaderManhattan {
/// An extra constant term to determine the offset of the plane
bias: f32,
}
impl fmt::Debug for NodeHeaderManhattan {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("NodeHeaderManhattan").field("bias", &format!("{:.4}", self.bias)).finish()
}
}

impl Distance for Manhattan {
type Header = NodeHeaderManhattan;
Expand Down Expand Up @@ -53,7 +58,7 @@ impl Distance for Manhattan {
fn create_split<'a, R: Rng>(
children: &'a ImmutableSubsetLeafs<Self>,
rng: &mut R,
) -> heed::Result<Cow<'a, UnalignedVector<Self::VectorCodec>>> {
) -> heed::Result<Leaf<'a, Self>> {
let [node_p, node_q] = two_means(rng, children, false)?;
let vector: Vec<_> =
node_p.vector.iter().zip(node_q.vector.iter()).map(|(p, q)| p - q).collect();
Expand All @@ -71,17 +76,10 @@ impl Distance for Manhattan {
.map(|((n, p), q)| -n * (p + q) / 2.0)
.sum();

Ok(normal.vector)
Ok(normal)
}

fn margin(p: &Leaf<Self>, q: &Leaf<Self>) -> f32 {
p.header.bias + dot_product(&p.vector, &q.vector)
}

fn margin_no_header(
p: &UnalignedVector<Self::VectorCodec>,
q: &UnalignedVector<Self::VectorCodec>,
) -> f32 {
dot_product(p, q)
}
}
Loading
Loading