From bbb7f66959a2600e266453870a58c76e182c2962 Mon Sep 17 00:00:00 2001 From: meiravgri <109056284+meiravgri@users.noreply.github.com> Date: Mon, 3 Nov 2025 14:21:41 +0200 Subject: [PATCH 1/2] Fix incorrect assertion in `BM_VecSimBasics::UpdateAtBlockSize` benchmark (#819) * VecSimIndexInterface: introduce indexMetaDataCapacity for testing returns metadata containers capacity * cover for tiered index (cherry picked from commit f24b65afa98f99352f7c1f384e24f2c2a4cbaad1) --- .../algorithms/brute_force/brute_force.h | 2 ++ src/VecSim/algorithms/hnsw/hnsw.h | 2 ++ src/VecSim/algorithms/hnsw/hnsw_tiered.h | 4 +++ src/VecSim/vec_sim_interface.h | 10 +++++++ tests/benchmark/bm_vecsim_basics.h | 18 +++++++++---- .../run_files/bm_basics_single_fp32.cpp | 2 +- tests/unit/test_allocator.cpp | 2 ++ tests/unit/test_hnsw_tiered.cpp | 27 +++++++++++++++++++ 8 files changed, 61 insertions(+), 6 deletions(-) diff --git a/src/VecSim/algorithms/brute_force/brute_force.h b/src/VecSim/algorithms/brute_force/brute_force.h index 338b63e80..cfe80cee6 100644 --- a/src/VecSim/algorithms/brute_force/brute_force.h +++ b/src/VecSim/algorithms/brute_force/brute_force.h @@ -88,6 +88,8 @@ class BruteForceIndex : public VecSimIndexAbstract { resizeLabelLookup(idToLabelMapping.size()); } + size_t indexMetaDataCapacity() const override { return idToLabelMapping.capacity(); } + size_t getStoredVectorsCount() const { size_t actual_stored_vec = 0; for (auto &block : vectorBlocks) { diff --git a/src/VecSim/algorithms/hnsw/hnsw.h b/src/VecSim/algorithms/hnsw/hnsw.h index eaa92199b..788579f6c 100644 --- a/src/VecSim/algorithms/hnsw/hnsw.h +++ b/src/VecSim/algorithms/hnsw/hnsw.h @@ -326,6 +326,8 @@ class HNSWIndex : public VecSimIndexAbstract, return actual_stored_vec; } + + size_t indexMetaDataCapacity() const override { return idToMetaData.capacity(); } #endif protected: diff --git a/src/VecSim/algorithms/hnsw/hnsw_tiered.h b/src/VecSim/algorithms/hnsw/hnsw_tiered.h index 1d3c78766..1cd2680db 100644 --- a/src/VecSim/algorithms/hnsw/hnsw_tiered.h +++ b/src/VecSim/algorithms/hnsw/hnsw_tiered.h @@ -233,6 +233,10 @@ class TieredHNSWIndex : public VecSimTieredIndex { #ifdef BUILD_TESTS void getDataByLabel(labelType label, std::vector> &vectors_output) const; + size_t indexMetaDataCapacity() const override { + return this->backendIndex->indexMetaDataCapacity() + + this->frontendIndex->indexMetaDataCapacity(); + } #endif }; diff --git a/src/VecSim/vec_sim_interface.h b/src/VecSim/vec_sim_interface.h index 801ab6f43..3ff84e4fb 100644 --- a/src/VecSim/vec_sim_interface.h +++ b/src/VecSim/vec_sim_interface.h @@ -264,5 +264,15 @@ struct VecSimIndexInterface : public VecsimBaseObject { } #ifdef BUILD_TESTS virtual void fitMemory() = 0; + /** + * @brief get the capacity of the meta data containers. + * + * @return The capacity of the meta data containers in number of elements. + * The value returned from this function may differ from the indexCapacity() function. For + * example, in HNSW, the capacity of the meta data containers is the capacity of the labels + * lookup table, while the capacity of the data containers is the capacity of the vectors + * container. + */ + virtual size_t indexMetaDataCapacity() const = 0; #endif }; diff --git a/tests/benchmark/bm_vecsim_basics.h b/tests/benchmark/bm_vecsim_basics.h index 09990e7e1..9b03d152d 100644 --- a/tests/benchmark/bm_vecsim_basics.h +++ b/tests/benchmark/bm_vecsim_basics.h @@ -310,6 +310,9 @@ void BM_VecSimBasics::UpdateAtBlockSize(benchmark::State &st) { // Calculate vectors needed to reach next block boundary size_t vecs_to_blocksize = BM_VecSimGeneral::block_size - (initial_index_size % BM_VecSimGeneral::block_size); + size_t initial_index_cap = index->indexMetaDataCapacity(); + assert(initial_index_cap == N_VECTORS + vecs_to_blocksize); + assert(vecs_to_blocksize < BM_VecSimGeneral::block_size); labelType initial_label_count = index->indexLabelCount(); labelType curr_label = initial_label_count; @@ -334,15 +337,20 @@ void BM_VecSimBasics::UpdateAtBlockSize(benchmark::State &st) { // Benchmark loop: repeatedly delete/add same vector to trigger grow-shrink cycles labelType label_to_update = curr_label - 1; - size_t index_cap = index->indexCapacity(); + size_t index_cap = index->indexMetaDataCapacity(); + std::cout << "index_cap after adding vectors " << index_cap << std::endl; + assert(index_cap == initial_index_cap + BM_VecSimGeneral::block_size); + for (auto _ : st) { // Remove the vector directly from hnsw size_t ret = VecSimIndex_DeleteVector( INDICES[st.range(0) == VecSimAlgo_TIERED ? VecSimAlgo_HNSWLIB : st.range(0)], label_to_update); assert(ret == 1); - assert(index->indexCapacity() == index_cap - BM_VecSimGeneral::block_size); - // Capacity should shrink by one block after deletion + + // Capacity should not change + size_t curr_cap = index->indexMetaDataCapacity(); + assert(curr_cap == index_cap); ret = VecSimIndex_AddVector(index, QUERIES[(added_vec_count - 1) % N_QUERIES].data(), label_to_update); assert(ret == 1); @@ -350,8 +358,8 @@ void BM_VecSimBasics::UpdateAtBlockSize(benchmark::State &st) { assert(VecSimIndex_IndexSize( INDICES[st.range(0) == VecSimAlgo_TIERED ? VecSimAlgo_HNSWLIB : st.range(0)]) == N_VECTORS + added_vec_count); - // Capacity should grow back to original size after addition - assert(index->indexCapacity() == index_cap); + // Capacity should not change + assert(index->indexMetaDataCapacity() == index_cap); } assert(VecSimIndex_IndexSize(index) == N_VECTORS + added_vec_count); diff --git a/tests/benchmark/run_files/bm_basics_single_fp32.cpp b/tests/benchmark/run_files/bm_basics_single_fp32.cpp index f7dc238d6..14d5be6ae 100644 --- a/tests/benchmark/run_files/bm_basics_single_fp32.cpp +++ b/tests/benchmark/run_files/bm_basics_single_fp32.cpp @@ -32,7 +32,7 @@ DEFINE_DELETE_LABEL(BM_FUNC_NAME(DeleteLabel, HNSW), fp32_index_t, HNSWIndex_Sin DEFINE_DELETE_LABEL(BM_FUNC_NAME(DeleteLabel, Tiered), fp32_index_t, TieredHNSWIndex, float, float, VecSimAlgo_TIERED) -#include "benchmark/bm_initialization/bm_basics_initialize_fp32.h" +// #include "benchmark/bm_initialization/bm_basics_initialize_fp32.h" // Test oscillations at block size boundaries. BENCHMARK_TEMPLATE_DEFINE_F(BM_VecSimBasics, UpdateAtBlockSize_Single, fp32_index_t) diff --git a/tests/unit/test_allocator.cpp b/tests/unit/test_allocator.cpp index 70f79511d..932a13971 100644 --- a/tests/unit/test_allocator.cpp +++ b/tests/unit/test_allocator.cpp @@ -120,6 +120,7 @@ TYPED_TEST(IndexAllocatorTest, test_bf_index_block_size_1) { ASSERT_EQ(bfIndex->indexCapacity(), expected_map_containers_size); ASSERT_EQ(bfIndex->idToLabelMapping.capacity(), expected_map_containers_size); + ASSERT_EQ(bfIndex->indexMetaDataCapacity(), expected_map_containers_size); ASSERT_EQ(bfIndex->idToLabelMapping.size(), expected_map_containers_size); ASSERT_GE(bfIndex->labelToIdLookup.bucket_count(), expected_map_containers_size); }; @@ -530,6 +531,7 @@ TYPED_TEST(IndexAllocatorTest, test_hnsw_reclaim_memory) { ASSERT_EQ(hnswIndex->getStoredVectorsCount(), expected_size); ASSERT_EQ(hnswIndex->idToMetaData.capacity(), expected_map_containers_size); + ASSERT_EQ(hnswIndex->indexMetaDataCapacity(), expected_map_containers_size); ASSERT_EQ(hnswIndex->idToMetaData.size(), expected_map_containers_size); ASSERT_GE(hnswIndex->labelLookup.bucket_count(), expected_map_containers_size); // Also validate that there are no unidirectional connections (these add memory to the diff --git a/tests/unit/test_hnsw_tiered.cpp b/tests/unit/test_hnsw_tiered.cpp index c070636bb..306b2053e 100644 --- a/tests/unit/test_hnsw_tiered.cpp +++ b/tests/unit/test_hnsw_tiered.cpp @@ -3860,6 +3860,11 @@ TYPED_TEST(HNSWTieredIndexTestBasic, HNSWResize) { ASSERT_EQ(tiered_index->getMainIndexGuardWriteLockCount(), resize_operations); ASSERT_EQ(hnsw_index->indexSize(), 1); ASSERT_EQ(hnsw_index->indexCapacity(), blockSize); + ASSERT_EQ(hnsw_index->indexMetaDataCapacity(), blockSize); + ASSERT_EQ(tiered_index->frontendIndex->indexMetaDataCapacity(), 0); + ASSERT_EQ(tiered_index->indexMetaDataCapacity(), + hnsw_index->indexMetaDataCapacity() + + tiered_index->frontendIndex->indexMetaDataCapacity()); // add up to block size for (size_t i = 1; i < blockSize; i++) { @@ -3870,6 +3875,11 @@ TYPED_TEST(HNSWTieredIndexTestBasic, HNSWResize) { ASSERT_EQ(tiered_index->getMainIndexGuardWriteLockCount(), resize_operations); ASSERT_EQ(hnsw_index->indexSize(), blockSize); ASSERT_EQ(hnsw_index->indexCapacity(), blockSize); + ASSERT_EQ(hnsw_index->indexMetaDataCapacity(), blockSize); + ASSERT_EQ(tiered_index->frontendIndex->indexMetaDataCapacity(), 0); + ASSERT_EQ(tiered_index->indexMetaDataCapacity(), + hnsw_index->indexMetaDataCapacity() + + tiered_index->frontendIndex->indexMetaDataCapacity()); // add one more vector to trigger another resize GenerateAndAddVector(tiered_index, dim, blockSize); @@ -3879,6 +3889,11 @@ TYPED_TEST(HNSWTieredIndexTestBasic, HNSWResize) { ASSERT_EQ(tiered_index->getMainIndexGuardWriteLockCount(), resize_operations); ASSERT_EQ(hnsw_index->indexSize(), blockSize + 1); ASSERT_EQ(hnsw_index->indexCapacity(), 2 * blockSize); + ASSERT_EQ(hnsw_index->indexMetaDataCapacity(), 2 * blockSize); + ASSERT_EQ(tiered_index->frontendIndex->indexMetaDataCapacity(), 0); + ASSERT_EQ(tiered_index->indexMetaDataCapacity(), + hnsw_index->indexMetaDataCapacity() + + tiered_index->frontendIndex->indexMetaDataCapacity()); // delete a vector to shrink data blocks ASSERT_EQ(VecSimIndex_DeleteVector(tiered_index, 0), 1) << "Failed to delete vector 0"; @@ -3890,6 +3905,8 @@ TYPED_TEST(HNSWTieredIndexTestBasic, HNSWResize) { ASSERT_EQ(tiered_index->getMainIndexGuardWriteLockCount(), resize_operations); ASSERT_EQ(hnsw_index->indexSize(), blockSize); ASSERT_EQ(hnsw_index->indexCapacity(), blockSize); + // meta data capacity should not shrink + ASSERT_EQ(hnsw_index->indexMetaDataCapacity(), 2 * blockSize); // add this vector again and verify lock was acquired to resize GenerateAndAddVector(tiered_index, dim, 0); @@ -3898,6 +3915,11 @@ TYPED_TEST(HNSWTieredIndexTestBasic, HNSWResize) { ASSERT_EQ(tiered_index->getMainIndexGuardWriteLockCount(), resize_operations); ASSERT_EQ(hnsw_index->indexSize(), blockSize + 1); ASSERT_EQ(hnsw_index->indexCapacity(), 2 * blockSize); + ASSERT_EQ(hnsw_index->indexMetaDataCapacity(), 2 * blockSize); + ASSERT_EQ(tiered_index->frontendIndex->indexMetaDataCapacity(), 0); + ASSERT_EQ(tiered_index->indexMetaDataCapacity(), + hnsw_index->indexMetaDataCapacity() + + tiered_index->frontendIndex->indexMetaDataCapacity()); // add up to block size (count = 2 blockSize), the lock shouldn't be acquired because no resize // is required @@ -3908,4 +3930,9 @@ TYPED_TEST(HNSWTieredIndexTestBasic, HNSWResize) { ASSERT_EQ(tiered_index->getMainIndexGuardWriteLockCount(), resize_operations); ASSERT_EQ(hnsw_index->indexSize(), 2 * blockSize); ASSERT_EQ(hnsw_index->indexCapacity(), 2 * blockSize); + ASSERT_EQ(hnsw_index->indexMetaDataCapacity(), 2 * blockSize); + ASSERT_EQ(tiered_index->frontendIndex->indexMetaDataCapacity(), 0); + ASSERT_EQ(tiered_index->indexMetaDataCapacity(), + hnsw_index->indexMetaDataCapacity() + + tiered_index->frontendIndex->indexMetaDataCapacity()); } From e623ef46079dd98e7db9317fdb2924805061bfdc Mon Sep 17 00:00:00 2001 From: meiravgri <109056284+meiravgri@users.noreply.github.com> Date: Tue, 4 Nov 2025 10:50:29 +0200 Subject: [PATCH 2/2] Uncomment include for bm_basics_initialize_fp32.h --- tests/benchmark/run_files/bm_basics_single_fp32.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/benchmark/run_files/bm_basics_single_fp32.cpp b/tests/benchmark/run_files/bm_basics_single_fp32.cpp index 14d5be6ae..f7dc238d6 100644 --- a/tests/benchmark/run_files/bm_basics_single_fp32.cpp +++ b/tests/benchmark/run_files/bm_basics_single_fp32.cpp @@ -32,7 +32,7 @@ DEFINE_DELETE_LABEL(BM_FUNC_NAME(DeleteLabel, HNSW), fp32_index_t, HNSWIndex_Sin DEFINE_DELETE_LABEL(BM_FUNC_NAME(DeleteLabel, Tiered), fp32_index_t, TieredHNSWIndex, float, float, VecSimAlgo_TIERED) -// #include "benchmark/bm_initialization/bm_basics_initialize_fp32.h" +#include "benchmark/bm_initialization/bm_basics_initialize_fp32.h" // Test oscillations at block size boundaries. BENCHMARK_TEMPLATE_DEFINE_F(BM_VecSimBasics, UpdateAtBlockSize_Single, fp32_index_t)