Skip to content

Commit 9f8ca6d

Browse files
committed
feat: implement hnsw index
Signed-off-by: Arshdeep54 <balarsh535@gmail.com>
1 parent 0e78312 commit 9f8ca6d

File tree

17 files changed

+1040
-33
lines changed

17 files changed

+1040
-33
lines changed

.env.example

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ GRPC_ROOT_PASSWORD=your-secure-password
1414
STORAGE_TYPE=rocksdb
1515
INDEX_TYPE=flat
1616
DIMENSION=512
17+
SIMILARITY=cosine
1718

1819
DATA_PATH=./data
1920

Cargo.lock

Lines changed: 59 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ dotenv = "0.15.0"
2626
dotenvy = "0.15"
2727
prost = "0.14.1"
2828
prost-types = "0.14.1"
29+
rand = "0.9.2"
2930
ratatui = "0.26"
3031
reqwest = { version = "0.12", features = ["json", "blocking", "multipart"] }
3132
rocksdb = "0.21.0"

crates/api/src/lib.rs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
use defs::{DbError, IndexedVector, Similarity};
1+
use defs::{DbError, Dimension, IndexedVector, Similarity};
22

33
use defs::{DenseVector, Payload, Point, PointId};
4+
use index::hnsw::HnswIndex;
45
use std::path::PathBuf;
56
// use std::sync::atomic::{AtomicU64, Ordering};
67
use std::sync::{Arc, RwLock};
@@ -136,7 +137,8 @@ pub struct DbConfig {
136137
pub storage_type: StorageType,
137138
pub index_type: IndexType,
138139
pub data_path: PathBuf,
139-
pub dimension: usize,
140+
pub dimension: Dimension,
141+
pub similarity: Similarity,
140142
}
141143

142144
pub fn init_api(config: DbConfig) -> Result<VectorDb, DbError> {
@@ -149,6 +151,10 @@ pub fn init_api(config: DbConfig) -> Result<VectorDb, DbError> {
149151
// Initialize the vector index
150152
let index: Arc<RwLock<dyn VectorIndex>> = match config.index_type {
151153
IndexType::Flat => Arc::new(RwLock::new(FlatIndex::new())),
154+
IndexType::HNSW => Arc::new(RwLock::new(HnswIndex::new(
155+
config.similarity,
156+
config.dimension,
157+
))),
152158
_ => Arc::new(RwLock::new(FlatIndex::new())),
153159
};
154160

@@ -178,6 +184,7 @@ mod tests {
178184
index_type: IndexType::Flat,
179185
data_path: temp_dir.path().to_path_buf(),
180186
dimension: 3,
187+
similarity: Similarity::Cosine,
181188
};
182189
init_api(config).unwrap()
183190
}

crates/defs/src/error.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
use std::io;
2+
3+
use crate::Dimension;
24
#[derive(Debug, PartialEq, Eq)]
35
pub enum DbError {
46
ParseError,
@@ -8,6 +10,7 @@ pub enum DbError {
810
IndexError(String),
911
LockError,
1012
DimensionMismatch,
13+
InvalidDimension { expected: Dimension, got: Dimension },
1114
}
1215

1316
#[derive(Debug)]

crates/defs/src/types.rs

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ pub type Element = f32;
99
// pub type ElementHalf = f16; - Unstable https://github.com/rust-lang/rust/issues/116909
1010
pub type ElementByte = u8;
1111

12+
pub type Dimension = usize;
13+
1214
// Dense Vector and Vector are considered same
1315
// Sparse vector implementation not supported yet. Refer lib/sparse/src/common/sparse_vector.rs
1416
pub type DenseVector = Vec<Element>;
@@ -43,7 +45,7 @@ pub struct IndexedVector {
4345
pub vector: DenseVector,
4446
}
4547

46-
#[derive(Deserialize, Copy, Clone)]
48+
#[derive(Debug, Deserialize, Copy, Clone)]
4749
pub enum Similarity {
4850
Euclidean,
4951
Manhattan,
@@ -89,3 +91,29 @@ impl<'q> Eq for DistanceOrderedVector<'q> {}
8991
// Discovery(DiscoveryQuery<VectorInternal>),
9092
// Context(ContextQuery<VectorInternal>),
9193
// }
94+
95+
#[derive(Clone, Copy, Debug, PartialEq)]
96+
pub struct OrdF32(f32);
97+
98+
impl OrdF32 {
99+
pub fn new(x: f32) -> Self {
100+
Self(x)
101+
}
102+
pub fn into_inner(self) -> f32 {
103+
self.0
104+
}
105+
}
106+
107+
impl Eq for OrdF32 {}
108+
109+
impl Ord for OrdF32 {
110+
fn cmp(&self, other: &Self) -> Ordering {
111+
self.0.total_cmp(&other.0)
112+
}
113+
}
114+
115+
impl PartialOrd for OrdF32 {
116+
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
117+
Some(self.cmp(other))
118+
}
119+
}

crates/grpc/src/tests.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use crate::service::vectordb::{DenseVector, InsertVectorRequest, Payload, PointI
44
use crate::service::{VectorDBService, run_server};
55
use crate::utils::ServerEndpoint;
66
use api::DbConfig;
7+
use defs::Similarity;
78
use index::IndexType;
89
use std::net::SocketAddr;
910
use std::sync::Arc;
@@ -31,6 +32,7 @@ async fn start_test_server() -> Result<SocketAddr, Box<dyn std::error::Error>> {
3132
index_type: IndexType::Flat,
3233
data_path: temp_dir.path().to_path_buf(),
3334
dimension: 3,
35+
similarity: Similarity::Cosine,
3436
};
3537

3638
let vector_db_api = api::init_api(db_config)?;

crates/index/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,5 @@ license.workspace = true
88

99
[dependencies]
1010
defs.workspace = true
11+
rand.workspace = true
1112
uuid.workspace = true

crates/index/src/hnsw.rs

Lines changed: 0 additions & 29 deletions
This file was deleted.

0 commit comments

Comments
 (0)