diff --git a/tests/benchmark/bm_common.h b/tests/benchmark/bm_common.h index 4e2c01b07..644e633ef 100644 --- a/tests/benchmark/bm_common.h +++ b/tests/benchmark/bm_common.h @@ -62,7 +62,9 @@ void BM_VecSimCommon::Memory_FLAT(benchmark::State &st, unsigned s for (auto _ : st) { // Do nothing... } - st.counters["memory"] = (double)VecSimIndex_StatsInfo(index).memory; + st.counters["memory"] = + benchmark::Counter((double)VecSimIndex_StatsInfo(index).memory, + benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); } template void BM_VecSimCommon::Memory_HNSW(benchmark::State &st, unsigned short index_offset) { @@ -72,7 +74,9 @@ void BM_VecSimCommon::Memory_HNSW(benchmark::State &st, unsigned s for (auto _ : st) { // Do nothing... } - st.counters["memory"] = (double)VecSimIndex_StatsInfo(index).memory; + st.counters["memory"] = + benchmark::Counter((double)VecSimIndex_StatsInfo(index).memory, + benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); } template void BM_VecSimCommon::Memory_Tiered(benchmark::State &st, @@ -82,7 +86,9 @@ void BM_VecSimCommon::Memory_Tiered(benchmark::State &st, for (auto _ : st) { // Do nothing... } - st.counters["memory"] = (double)VecSimIndex_StatsInfo(index).memory; + st.counters["memory"] = + benchmark::Counter((double)VecSimIndex_StatsInfo(index).memory, + benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); } // TopK search BM diff --git a/tests/benchmark/bm_vecsim_basics.h b/tests/benchmark/bm_vecsim_basics.h index 201377a27..09990e7e1 100644 --- a/tests/benchmark/bm_vecsim_basics.h +++ b/tests/benchmark/bm_vecsim_basics.h @@ -32,6 +32,14 @@ class BM_VecSimBasics : public BM_VecSimCommon { static void Range_BF(benchmark::State &st); static void Range_HNSW(benchmark::State &st); + // Reproduces allocation/deallocation oscillation issue at block size boundaries. + // Sets up index at blockSize+1 capacity, then repeatedly deletes and re-adds the same vector, + // triggering constant grow-shrink cycles. + // This behavior was fixed by PR #753 with a conservative resize strategy that only + // shrinks containers when there are 2+ free blocks, preventing oscillation cycles. + // Expected: High allocation overhead before fix, stable performance after fix. + static void UpdateAtBlockSize(benchmark::State &st); + private: // Vectors of vector to store deleted labels' data. using LabelData = std::vector>; @@ -66,7 +74,9 @@ void BM_VecSimBasics::AddLabel(benchmark::State &st) { // For tiered index, wait for all threads to finish indexing BM_VecSimGeneral::mock_thread_pool.thread_pool_wait(); - st.counters["memory_per_vector"] = (double)memory_delta / (double)added_vec_count; + st.counters["memory_per_vector"] = + benchmark::Counter((double)memory_delta / (double)added_vec_count, + benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); st.counters["vectors_per_label"] = vec_per_label; assert(VecSimIndex_IndexSize(index) == N_VECTORS + added_vec_count); @@ -114,7 +124,9 @@ void BM_VecSimBasics::AddLabel_AsyncIngest(benchmark::State &st) { } size_t memory_delta = index->getAllocationSize() - memory_before; - st.counters["memory_per_vector"] = (double)memory_delta / (double)added_vec_count; + st.counters["memory_per_vector"] = + benchmark::Counter((double)memory_delta / (double)added_vec_count, + benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); st.counters["vectors_per_label"] = vec_per_label; st.counters["num_threads"] = BM_VecSimGeneral::mock_thread_pool.thread_pool_size; @@ -164,7 +176,9 @@ void BM_VecSimBasics::DeleteLabel(algo_t *index, benchmark::State if (VecSimIndex_BasicInfo(index).algo == VecSimAlgo_TIERED) { dynamic_cast *>(index)->executeReadySwapJobs(); } - st.counters["memory_per_vector"] = memory_delta / (double)removed_vectors_count; + st.counters["memory_per_vector"] = + benchmark::Counter((double)memory_delta / (double)removed_vectors_count, + benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); // Restore index state. // For each label in removed_labels_data @@ -214,7 +228,10 @@ void BM_VecSimBasics::DeleteLabel_AsyncRepair(benchmark::State &st // Avg. memory delta per vector equals the total memory delta divided by the number // of deleted vectors. double memory_delta = tiered_index->getAllocationSize() - memory_before; - st.counters["memory_per_vector"] = memory_delta / (double)removed_vectors_count; + + st.counters["memory_per_vector"] = + benchmark::Counter((double)memory_delta / (double)removed_vectors_count, + benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); st.counters["num_threads"] = (double)BM_VecSimGeneral::mock_thread_pool.thread_pool_size; st.counters["num_zombies"] = tiered_index->idToSwapJob.size(); @@ -286,6 +303,69 @@ void BM_VecSimBasics::Range_HNSW(benchmark::State &st) { st.counters["Recall"] = (float)total_res / total_res_bf; } +template +void BM_VecSimBasics::UpdateAtBlockSize(benchmark::State &st) { + auto index = INDICES[st.range(0)]; + size_t initial_index_size = VecSimIndex_IndexSize(index); + // Calculate vectors needed to reach next block boundary + size_t vecs_to_blocksize = + BM_VecSimGeneral::block_size - (initial_index_size % BM_VecSimGeneral::block_size); + assert(vecs_to_blocksize < BM_VecSimGeneral::block_size); + labelType initial_label_count = index->indexLabelCount(); + labelType curr_label = initial_label_count; + + // Set up index at blockSize+1 to trigger oscillation issue + // Make sure we have enough queries to add a new label. + assert(N_QUERIES > BM_VecSimGeneral::block_size); + size_t overhead = 1; + size_t added_vec_count = vecs_to_blocksize + overhead; + for (size_t i = 0; i < added_vec_count; ++i) { + VecSimIndex_AddVector(index, QUERIES[added_vec_count % N_QUERIES].data(), curr_label++); + } + // For tiered index, wait for all threads to finish indexing + BM_VecSimGeneral::mock_thread_pool.thread_pool_wait(); + assert(VecSimIndex_IndexSize(index) % BM_VecSimGeneral::block_size == overhead); + assert(VecSimIndex_IndexSize(index) == N_VECTORS + added_vec_count); + + std::cout << "Added " << added_vec_count << " vectors to reach block size boundary." + << std::endl; + std::cout << "Index size is now " << VecSimIndex_IndexSize(index) << std::endl; + std::cout << "Last label is " << curr_label - 1 << std::endl; + + // Benchmark loop: repeatedly delete/add same vector to trigger grow-shrink cycles + labelType label_to_update = curr_label - 1; + size_t index_cap = index->indexCapacity(); + for (auto _ : st) { + // Remove the vector directly from hnsw + size_t ret = VecSimIndex_DeleteVector( + INDICES[st.range(0) == VecSimAlgo_TIERED ? VecSimAlgo_HNSWLIB : st.range(0)], + label_to_update); + assert(ret == 1); + assert(index->indexCapacity() == index_cap - BM_VecSimGeneral::block_size); + // Capacity should shrink by one block after deletion + ret = VecSimIndex_AddVector(index, QUERIES[(added_vec_count - 1) % N_QUERIES].data(), + label_to_update); + assert(ret == 1); + BM_VecSimGeneral::mock_thread_pool.thread_pool_wait(); + assert(VecSimIndex_IndexSize( + INDICES[st.range(0) == VecSimAlgo_TIERED ? VecSimAlgo_HNSWLIB : st.range(0)]) == + N_VECTORS + added_vec_count); + // Capacity should grow back to original size after addition + assert(index->indexCapacity() == index_cap); + } + assert(VecSimIndex_IndexSize(index) == N_VECTORS + added_vec_count); + + // Clean-up all the new vectors to restore the index size to its original value. + + size_t new_label_count = index->indexLabelCount(); + for (size_t label = initial_label_count; label < new_label_count; label++) { + // If index is tiered HNSW, remove directly from the underline HNSW. + VecSimIndex_DeleteVector( + INDICES[st.range(0) == VecSimAlgo_TIERED ? VecSimAlgo_HNSWLIB : st.range(0)], label); + } + assert(VecSimIndex_IndexSize(index) == N_VECTORS); +} + #define UNIT_AND_ITERATIONS Unit(benchmark::kMillisecond)->Iterations(BM_VecSimGeneral::block_size) // The actual radius will be the given arg divided by 100, since arg must be an integer. @@ -331,3 +411,8 @@ void BM_VecSimBasics::Range_HNSW(benchmark::State &st) { } #define REGISTER_DeleteLabel(BM_FUNC) \ BENCHMARK_REGISTER_F(BM_VecSimBasics, BM_FUNC)->UNIT_AND_ITERATIONS + +#define REGISTER_UpdateAtBlockSize(BM_FUNC, VecSimAlgo) \ + BENCHMARK_REGISTER_F(BM_VecSimBasics, BM_FUNC) \ + ->UNIT_AND_ITERATIONS->Arg(VecSimAlgo) \ + ->ArgName(#VecSimAlgo) diff --git a/tests/benchmark/run_files/bm_basics_multi_fp32.cpp b/tests/benchmark/run_files/bm_basics_multi_fp32.cpp index fd614f49d..6fc335b56 100644 --- a/tests/benchmark/run_files/bm_basics_multi_fp32.cpp +++ b/tests/benchmark/run_files/bm_basics_multi_fp32.cpp @@ -33,4 +33,10 @@ DEFINE_DELETE_LABEL(BM_FUNC_NAME(DeleteLabel, Tiered), fp32_index_t, TieredHNSWI VecSimAlgo_TIERED) #include "benchmark/bm_initialization/bm_basics_initialize_fp32.h" +// Test oscillations at block size boundaries. +BENCHMARK_TEMPLATE_DEFINE_F(BM_VecSimBasics, UpdateAtBlockSize_Multi, fp32_index_t) +(benchmark::State &st) { UpdateAtBlockSize(st); } +REGISTER_UpdateAtBlockSize(UpdateAtBlockSize_Multi, VecSimAlgo_BF); +REGISTER_UpdateAtBlockSize(UpdateAtBlockSize_Multi, VecSimAlgo_HNSWLIB); +REGISTER_UpdateAtBlockSize(UpdateAtBlockSize_Multi, VecSimAlgo_TIERED); BENCHMARK_MAIN(); diff --git a/tests/benchmark/run_files/bm_basics_single_fp32.cpp b/tests/benchmark/run_files/bm_basics_single_fp32.cpp index 889927779..f7dc238d6 100644 --- a/tests/benchmark/run_files/bm_basics_single_fp32.cpp +++ b/tests/benchmark/run_files/bm_basics_single_fp32.cpp @@ -31,5 +31,13 @@ DEFINE_DELETE_LABEL(BM_FUNC_NAME(DeleteLabel, HNSW), fp32_index_t, HNSWIndex_Sin VecSimAlgo_HNSWLIB) DEFINE_DELETE_LABEL(BM_FUNC_NAME(DeleteLabel, Tiered), fp32_index_t, TieredHNSWIndex, float, float, VecSimAlgo_TIERED) + #include "benchmark/bm_initialization/bm_basics_initialize_fp32.h" + +// Test oscillations at block size boundaries. +BENCHMARK_TEMPLATE_DEFINE_F(BM_VecSimBasics, UpdateAtBlockSize_Single, fp32_index_t) +(benchmark::State &st) { UpdateAtBlockSize(st); } +REGISTER_UpdateAtBlockSize(UpdateAtBlockSize_Single, VecSimAlgo_BF); +REGISTER_UpdateAtBlockSize(UpdateAtBlockSize_Single, VecSimAlgo_HNSWLIB); +REGISTER_UpdateAtBlockSize(UpdateAtBlockSize_Single, VecSimAlgo_TIERED); BENCHMARK_MAIN();