Skip to content

Commit 17513cd

Browse files
authored
Add int8 support to Vamana type-erased index (#355)
1 parent 1087b63 commit 17513cd

File tree

7 files changed

+143
-93
lines changed

7 files changed

+143
-93
lines changed

apis/python/test/test_ingestion.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -363,11 +363,6 @@ def test_ingestion_numpy_i8(tmp_path):
363363
gt_i, gt_d = get_groundtruth_ivec(gt_uri, k=k, nqueries=nqueries)
364364

365365
for index_type, index_class in zip(INDEXES, INDEX_CLASSES):
366-
# TODO(paris): Fix Vamana bug and re-enable:
367-
# RuntimeError: IndexError: index 100 is out of bounds for axis 0 with size 100
368-
if index_type == "VAMANA":
369-
continue
370-
371366
index_uri = os.path.join(tmp_path, f"array_{index_type}")
372367
index = ingest(
373368
index_type=index_type,

documentation/Building.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ Alternatively, you can setup CLion, which is the suggested way to develop C++ in
3030
## Python
3131

3232
Before building you may want to set up a virtual environment:
33+
3334
```bash
3435
conda create --name TileDB-Vector-Search python=3.9
3536
conda activate TileDB-Vector-Search

src/include/api/feature_vector_array.h

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,7 @@ class FeatureVectorArray {
313313
// clang-format off
314314
const FeatureVectorArray::col_major_matrix_table_type FeatureVectorArray::col_major_matrix_dispatch_table = {
315315
{TILEDB_FLOAT32, [](size_t rows, size_t cols) { return std::make_unique<FeatureVectorArray::vector_array_impl<ColMajorMatrix<float >>>(rows, cols);}},
316+
{TILEDB_INT8, [](size_t rows, size_t cols) { return std::make_unique<FeatureVectorArray::vector_array_impl<ColMajorMatrix<int8_t >>>(rows, cols);}},
316317
{TILEDB_UINT8, [](size_t rows, size_t cols) { return std::make_unique<FeatureVectorArray::vector_array_impl<ColMajorMatrix<uint8_t >>>(rows, cols);}},
317318
{TILEDB_INT32, [](size_t rows, size_t cols) { return std::make_unique<FeatureVectorArray::vector_array_impl<ColMajorMatrix<int32_t >>>(rows, cols);}},
318319
{TILEDB_UINT32, [](size_t rows, size_t cols) { return std::make_unique<FeatureVectorArray::vector_array_impl<ColMajorMatrix<uint32_t>>>(rows, cols);}},
@@ -322,6 +323,7 @@ const FeatureVectorArray::col_major_matrix_table_type FeatureVectorArray::col_ma
322323

323324
const FeatureVectorArray::tdb_col_major_matrix_table_type FeatureVectorArray::tdb_col_major_matrix_dispatch_table = {
324325
{TILEDB_FLOAT32, [](const tiledb::Context& ctx, const std::string& uri, size_t num_vectors, size_t timestamp) { return std::make_unique<FeatureVectorArray::vector_array_impl<tdbColMajorMatrix<float >>>(ctx, uri, num_vectors, timestamp); }},
326+
{TILEDB_INT8, [](const tiledb::Context& ctx, const std::string& uri, size_t num_vectors, size_t timestamp) { return std::make_unique<FeatureVectorArray::vector_array_impl<tdbColMajorMatrix<int8_t >>>(ctx, uri, num_vectors, timestamp); }},
325327
{TILEDB_UINT8, [](const tiledb::Context& ctx, const std::string& uri, size_t num_vectors, size_t timestamp) { return std::make_unique<FeatureVectorArray::vector_array_impl<tdbColMajorMatrix<uint8_t >>>(ctx, uri, num_vectors, timestamp); }},
326328
{TILEDB_INT32, [](const tiledb::Context& ctx, const std::string& uri, size_t num_vectors, size_t timestamp) { return std::make_unique<FeatureVectorArray::vector_array_impl<tdbColMajorMatrix<int32_t >>>(ctx, uri, num_vectors, timestamp); }},
327329
{TILEDB_UINT32, [](const tiledb::Context& ctx, const std::string& uri, size_t num_vectors, size_t timestamp) { return std::make_unique<FeatureVectorArray::vector_array_impl<tdbColMajorMatrix<uint32_t>>>(ctx, uri, num_vectors, timestamp); }},
@@ -331,13 +333,15 @@ const FeatureVectorArray::tdb_col_major_matrix_table_type FeatureVectorArray::td
331333

332334
const FeatureVectorArray::col_major_matrix_with_ids_table_type FeatureVectorArray::col_major_matrix_with_ids_dispatch_table = {
333335
{{TILEDB_FLOAT32, TILEDB_UINT32},[](size_t rows, size_t cols) { return std::make_unique<FeatureVectorArray::vector_array_impl<ColMajorMatrixWithIds<float, uint32_t>>>(rows, cols); }},
336+
{{TILEDB_INT8, TILEDB_UINT32},[](size_t rows, size_t cols) { return std::make_unique<FeatureVectorArray::vector_array_impl<ColMajorMatrixWithIds<int8_t, uint32_t>>>(rows, cols); }},
334337
{{TILEDB_UINT8, TILEDB_UINT32},[](size_t rows, size_t cols) { return std::make_unique<FeatureVectorArray::vector_array_impl<ColMajorMatrixWithIds<uint8_t, uint32_t>>>(rows, cols); }},
335338
{{TILEDB_INT32, TILEDB_UINT32},[](size_t rows, size_t cols) { return std::make_unique<FeatureVectorArray::vector_array_impl<ColMajorMatrixWithIds<int32_t, uint32_t>>>(rows, cols); }},
336339
{{TILEDB_UINT32, TILEDB_UINT32},[](size_t rows, size_t cols) { return std::make_unique<FeatureVectorArray::vector_array_impl<ColMajorMatrixWithIds<uint32_t, uint32_t>>>(rows, cols); }},
337340
{{TILEDB_INT64, TILEDB_UINT32},[](size_t rows, size_t cols) { return std::make_unique<FeatureVectorArray::vector_array_impl<ColMajorMatrixWithIds<int64_t, uint32_t>>>(rows, cols); }},
338341
{{TILEDB_UINT64, TILEDB_UINT32},[](size_t rows, size_t cols) { return std::make_unique<FeatureVectorArray::vector_array_impl<ColMajorMatrixWithIds<uint64_t, uint32_t>>>(rows, cols); }},
339342

340343
{{TILEDB_FLOAT32, TILEDB_UINT64},[](size_t rows, size_t cols) { return std::make_unique<FeatureVectorArray::vector_array_impl<ColMajorMatrixWithIds<float, uint64_t>>>(rows, cols); }},
344+
{{TILEDB_INT8, TILEDB_UINT64},[](size_t rows, size_t cols) { return std::make_unique<FeatureVectorArray::vector_array_impl<ColMajorMatrixWithIds<int8_t, uint64_t>>>(rows, cols); }},
341345
{{TILEDB_UINT8, TILEDB_UINT64},[](size_t rows, size_t cols) { return std::make_unique<FeatureVectorArray::vector_array_impl<ColMajorMatrixWithIds<uint8_t, uint64_t>>>(rows, cols); }},
342346
{{TILEDB_INT32, TILEDB_UINT64},[](size_t rows, size_t cols) { return std::make_unique<FeatureVectorArray::vector_array_impl<ColMajorMatrixWithIds<int32_t, uint64_t>>>(rows, cols); }},
343347
{{TILEDB_UINT32, TILEDB_UINT64},[](size_t rows, size_t cols) { return std::make_unique<FeatureVectorArray::vector_array_impl<ColMajorMatrixWithIds<uint32_t, uint64_t>>>(rows, cols); }},
@@ -347,13 +351,15 @@ const FeatureVectorArray::col_major_matrix_with_ids_table_type FeatureVectorArra
347351

348352
const FeatureVectorArray::tdb_col_major_matrix_with_ids_table_type FeatureVectorArray::tdb_col_major_matrix_with_ids_dispatch_table = {
349353
{{TILEDB_FLOAT32, TILEDB_UINT32},[](const tiledb::Context& ctx, const std::string& uri, const std::string& ids_uri, size_t num_vectors, size_t timestamp) { return std::make_unique<FeatureVectorArray::vector_array_impl<tdbColMajorMatrixWithIds<float, uint32_t>>>(ctx, uri, ids_uri, num_vectors, timestamp);}},
354+
{{TILEDB_INT8, TILEDB_UINT32},[](const tiledb::Context& ctx, const std::string& uri, const std::string& ids_uri, size_t num_vectors, size_t timestamp) { return std::make_unique<FeatureVectorArray::vector_array_impl<tdbColMajorMatrixWithIds<int8_t, uint32_t>>>(ctx, uri, ids_uri, num_vectors, timestamp);}},
350355
{{TILEDB_UINT8, TILEDB_UINT32},[](const tiledb::Context& ctx, const std::string& uri, const std::string& ids_uri, size_t num_vectors, size_t timestamp) { return std::make_unique<FeatureVectorArray::vector_array_impl<tdbColMajorMatrixWithIds<uint8_t, uint32_t>>>(ctx, uri, ids_uri, num_vectors, timestamp);}},
351356
{{TILEDB_INT32, TILEDB_UINT32},[](const tiledb::Context& ctx, const std::string& uri, const std::string& ids_uri, size_t num_vectors, size_t timestamp) {return std::make_unique<FeatureVectorArray::vector_array_impl<tdbColMajorMatrixWithIds<int32_t, uint32_t>>>(ctx, uri, ids_uri, num_vectors, timestamp);}},
352357
{{TILEDB_UINT32, TILEDB_UINT32},[](const tiledb::Context& ctx, const std::string& uri, const std::string& ids_uri, size_t num_vectors, size_t timestamp) {return std::make_unique<FeatureVectorArray::vector_array_impl<tdbColMajorMatrixWithIds<uint32_t, uint32_t>>>(ctx, uri, ids_uri, num_vectors, timestamp);}},
353358
{{TILEDB_INT64, TILEDB_UINT32},[](const tiledb::Context& ctx, const std::string& uri, const std::string& ids_uri, size_t num_vectors, size_t timestamp) {return std::make_unique<FeatureVectorArray::vector_array_impl<tdbColMajorMatrixWithIds<int64_t, uint32_t>>>(ctx, uri, ids_uri, num_vectors, timestamp);}},
354359
{{TILEDB_UINT64, TILEDB_UINT32},[](const tiledb::Context& ctx, const std::string& uri, const std::string& ids_uri, size_t num_vectors, size_t timestamp) {return std::make_unique<FeatureVectorArray::vector_array_impl<tdbColMajorMatrixWithIds<uint64_t, uint32_t>>>(ctx, uri, ids_uri, num_vectors, timestamp);}},
355360

356361
{{TILEDB_FLOAT32, TILEDB_UINT64},[](const tiledb::Context& ctx, const std::string& uri, const std::string& ids_uri, size_t num_vectors, size_t timestamp) { return std::make_unique<FeatureVectorArray::vector_array_impl<tdbColMajorMatrixWithIds<float, uint64_t>>>(ctx, uri, ids_uri, num_vectors, timestamp);}},
362+
{{TILEDB_INT8, TILEDB_UINT64},[](const tiledb::Context& ctx, const std::string& uri, const std::string& ids_uri, size_t num_vectors, size_t timestamp) { return std::make_unique<FeatureVectorArray::vector_array_impl<tdbColMajorMatrixWithIds<int8_t, uint64_t>>>(ctx, uri, ids_uri, num_vectors, timestamp);}},
357363
{{TILEDB_UINT8, TILEDB_UINT64},[](const tiledb::Context& ctx, const std::string& uri, const std::string& ids_uri, size_t num_vectors, size_t timestamp) { return std::make_unique<FeatureVectorArray::vector_array_impl<tdbColMajorMatrixWithIds<uint8_t, uint64_t>>>(ctx, uri, ids_uri, num_vectors, timestamp);}},
358364
{{TILEDB_INT32, TILEDB_UINT64},[](const tiledb::Context& ctx, const std::string& uri, const std::string& ids_uri, size_t num_vectors, size_t timestamp) {return std::make_unique<FeatureVectorArray::vector_array_impl<tdbColMajorMatrixWithIds<int32_t, uint64_t>>>(ctx, uri, ids_uri, num_vectors, timestamp);}},
359365
{{TILEDB_UINT32, TILEDB_UINT64},[](const tiledb::Context& ctx, const std::string& uri, const std::string& ids_uri, size_t num_vectors, size_t timestamp) {return std::make_unique<FeatureVectorArray::vector_array_impl<tdbColMajorMatrixWithIds<uint32_t, uint64_t>>>(ctx, uri, ids_uri, num_vectors, timestamp);}},
@@ -369,6 +375,21 @@ bool validate_top_k(const FeatureVectorArray& a, const FeatureVectorArray& b) {
369375

370376
auto proc_b = [&b](auto& aview) {
371377
switch (b.feature_type()) {
378+
case TILEDB_FLOAT32: {
379+
auto bview = MatrixView<float, stdx::layout_left>{
380+
(float*)b.data(), extents(b)[0], extents(b)[1]};
381+
return validate_top_k(aview, bview);
382+
}
383+
case TILEDB_INT8: {
384+
auto bview = MatrixView<int8_t, stdx::layout_left>{
385+
(int8_t*)b.data(), extents(b)[0], extents(b)[1]};
386+
return validate_top_k(aview, bview);
387+
}
388+
case TILEDB_UINT8: {
389+
auto bview = MatrixView<uint8_t, stdx::layout_left>{
390+
(uint8_t*)b.data(), extents(b)[0], extents(b)[1]};
391+
return validate_top_k(aview, bview);
392+
}
372393
case TILEDB_INT32: {
373394
auto bview = MatrixView<int32_t, stdx::layout_left>{
374395
(int32_t*)b.data(), extents(b)[0], extents(b)[1]};
@@ -395,6 +416,21 @@ bool validate_top_k(const FeatureVectorArray& a, const FeatureVectorArray& b) {
395416
};
396417

397418
switch (a.feature_type()) {
419+
case TILEDB_FLOAT32: {
420+
auto aview = MatrixView<float, stdx::layout_left>{
421+
(float*)a.data(), extents(a)[0], extents(a)[1]};
422+
return proc_b(aview);
423+
}
424+
case TILEDB_INT8: {
425+
auto aview = MatrixView<int8_t, stdx::layout_left>{
426+
(int8_t*)a.data(), extents(a)[0], extents(a)[1]};
427+
return proc_b(aview);
428+
}
429+
case TILEDB_UINT8: {
430+
auto aview = MatrixView<uint8_t, stdx::layout_left>{
431+
(uint8_t*)a.data(), extents(a)[0], extents(a)[1]};
432+
return proc_b(aview);
433+
}
398434
case TILEDB_INT32: {
399435
auto aview = MatrixView<int32_t, stdx::layout_left>{
400436
(int32_t*)a.data(), extents(a)[0], extents(a)[1]};
@@ -432,6 +468,21 @@ auto count_intersections(
432468
const FeatureVectorArray& a, const FeatureVectorArray& b, size_t k_nn) {
433469
auto proc_b = [&b, k_nn](auto& aview) {
434470
switch (b.feature_type()) {
471+
case TILEDB_FLOAT32: {
472+
auto bview = MatrixView<float, stdx::layout_left>{
473+
(float*)b.data(), extents(b)[0], extents(b)[1]};
474+
return count_intersections(aview, bview, k_nn);
475+
}
476+
case TILEDB_INT8: {
477+
auto bview = MatrixView<int8_t, stdx::layout_left>{
478+
(int8_t*)b.data(), extents(b)[0], extents(b)[1]};
479+
return count_intersections(aview, bview, k_nn);
480+
}
481+
case TILEDB_UINT8: {
482+
auto bview = MatrixView<uint8_t, stdx::layout_left>{
483+
(uint8_t*)b.data(), extents(b)[0], extents(b)[1]};
484+
return count_intersections(aview, bview, k_nn);
485+
}
435486
case TILEDB_INT32: {
436487
auto bview = MatrixView<int32_t, stdx::layout_left>{
437488
(int32_t*)b.data(), extents(b)[0], extents(b)[1]};
@@ -458,6 +509,21 @@ auto count_intersections(
458509
};
459510

460511
switch (a.feature_type()) {
512+
case TILEDB_FLOAT32: {
513+
auto aview = MatrixView<float, stdx::layout_left>{
514+
(float*)a.data(), extents(a)[0], extents(a)[1]};
515+
return proc_b(aview);
516+
}
517+
case TILEDB_INT8: {
518+
auto aview = MatrixView<int8_t, stdx::layout_left>{
519+
(int8_t*)a.data(), extents(a)[0], extents(a)[1]};
520+
return proc_b(aview);
521+
}
522+
case TILEDB_UINT8: {
523+
auto aview = MatrixView<uint8_t, stdx::layout_left>{
524+
(uint8_t*)a.data(), extents(a)[0], extents(a)[1]};
525+
return proc_b(aview);
526+
}
461527
case TILEDB_INT32: {
462528
auto aview = MatrixView<int32_t, stdx::layout_left>{
463529
(int32_t*)a.data(), extents(a)[0], extents(a)[1]};

0 commit comments

Comments
 (0)