diff --git a/tests/benchmark/bm_common.h b/tests/benchmark/bm_common.h index fd0cec2b6..0642e6e0a 100644 --- a/tests/benchmark/bm_common.h +++ b/tests/benchmark/bm_common.h @@ -70,7 +70,9 @@ void BM_VecSimCommon::Memory(benchmark::State &st, IndexTypeIndex for (auto _ : st) { // Do nothing... } - st.counters["memory"] = (double)VecSimIndex_StatsInfo(index).memory; + st.counters["memory"] = + benchmark::Counter((double)VecSimIndex_StatsInfo(index).memory, + benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); } // TopK search BM diff --git a/tests/benchmark/bm_vecsim_basics.h b/tests/benchmark/bm_vecsim_basics.h index 2544294a7..b7edd3904 100644 --- a/tests/benchmark/bm_vecsim_basics.h +++ b/tests/benchmark/bm_vecsim_basics.h @@ -42,6 +42,14 @@ class BM_VecSimBasics : public BM_VecSimCommon { static void Range_BF(benchmark::State &st); static void Range_HNSW(benchmark::State &st); + // Reproduces allocation/deallocation oscillation issue at block size boundaries. + // Sets up index at blockSize+1 capacity, then repeatedly deletes and re-adds the same vector, + // triggering constant grow-shrink cycles. + // This behavior was fixed by PR #753 with a conservative resize strategy that only + // shrinks containers when there are 2+ free blocks, preventing oscillation cycles. + // Expected: High allocation overhead before fix, stable performance after fix. + static void UpdateAtBlockSize(benchmark::State &st); + private: // Vectors of vector to store deleted labels' data. using LabelData = std::vector>; @@ -76,7 +84,9 @@ void BM_VecSimBasics::AddLabel(benchmark::State &st) { // For tiered index, wait for all threads to finish indexing BM_VecSimGeneral::mock_thread_pool->thread_pool_wait(); - st.counters["memory_per_vector"] = (double)memory_delta / (double)added_vec_count; + st.counters["memory_per_vector"] = + benchmark::Counter((double)memory_delta / (double)added_vec_count, + benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); st.counters["vectors_per_label"] = vec_per_label; assert(VecSimIndex_IndexSize(index) == N_VECTORS + added_vec_count); @@ -123,7 +133,9 @@ void BM_VecSimBasics::AddLabel_AsyncIngest(benchmark::State &st) { } size_t memory_delta = index->getAllocationSize() - memory_before; - st.counters["memory_per_vector"] = (double)memory_delta / (double)added_vec_count; + st.counters["memory_per_vector"] = + benchmark::Counter((double)memory_delta / (double)added_vec_count, + benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); st.counters["vectors_per_label"] = vec_per_label; st.counters["num_threads"] = BM_VecSimGeneral::mock_thread_pool->thread_pool_size; @@ -173,7 +185,9 @@ void BM_VecSimBasics::DeleteLabel(algo_t *index, benchmark::State if (VecSimIndex_BasicInfo(index).algo == VecSimAlgo_TIERED) { dynamic_cast *>(index)->executeReadySwapJobs(); } - st.counters["memory_per_vector"] = memory_delta / (double)removed_vectors_count; + st.counters["memory_per_vector"] = + benchmark::Counter((double)memory_delta / (double)removed_vectors_count, + benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); // Restore index state. // For each label in removed_labels_data @@ -223,7 +237,10 @@ void BM_VecSimBasics::DeleteLabel_AsyncRepair(benchmark::State &st // Avg. memory delta per vector equals the total memory delta divided by the number // of deleted vectors. double memory_delta = tiered_index->getAllocationSize() - memory_before; - st.counters["memory_per_vector"] = memory_delta / (double)removed_vectors_count; + + st.counters["memory_per_vector"] = + benchmark::Counter((double)memory_delta / (double)removed_vectors_count, + benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); st.counters["num_threads"] = (double)BM_VecSimGeneral::mock_thread_pool->thread_pool_size; st.counters["num_zombies"] = tiered_index->idToSwapJob.size(); @@ -294,6 +311,69 @@ void BM_VecSimBasics::Range_HNSW(benchmark::State &st) { st.counters["Recall"] = (float)total_res / total_res_bf; } +template +void BM_VecSimBasics::UpdateAtBlockSize(benchmark::State &st) { + auto index = GET_INDEX(st.range(0)); + size_t initial_index_size = VecSimIndex_IndexSize(index); + // Calculate vectors needed to reach next block boundary + size_t vecs_to_blocksize = + BM_VecSimGeneral::block_size - (initial_index_size % BM_VecSimGeneral::block_size); + assert(vecs_to_blocksize < BM_VecSimGeneral::block_size); + labelType initial_label_count = index->indexLabelCount(); + labelType curr_label = initial_label_count; + + // Set up index at blockSize+1 to trigger oscillation issue + // Make sure we have enough queries to add a new label. + assert(N_QUERIES > BM_VecSimGeneral::block_size); + size_t overhead = 1; + size_t added_vec_count = vecs_to_blocksize + overhead; + for (size_t i = 0; i < added_vec_count; ++i) { + VecSimIndex_AddVector(index, QUERIES[added_vec_count % N_QUERIES].data(), curr_label++); + } + // For tiered index, wait for all threads to finish indexing + BM_VecSimGeneral::mock_thread_pool->thread_pool_wait(); + assert(VecSimIndex_IndexSize(index) % BM_VecSimGeneral::block_size == overhead); + assert(VecSimIndex_IndexSize(index) == N_VECTORS + added_vec_count); + + std::cout << "Added " << added_vec_count << " vectors to reach block size boundary." + << std::endl; + std::cout << "Index size is now " << VecSimIndex_IndexSize(index) << std::endl; + std::cout << "Last label is " << curr_label - 1 << std::endl; + + // Benchmark loop: repeatedly delete/add same vector to trigger grow-shrink cycles + labelType label_to_update = curr_label - 1; + size_t index_cap = index->indexCapacity(); + for (auto _ : st) { + // Remove the vector directly from hnsw + size_t ret = VecSimIndex_DeleteVector( + GET_INDEX(st.range(0) == INDEX_TIERED_HNSW ? INDEX_HNSW : st.range(0)), + label_to_update); + assert(ret == 1); + assert(index->indexCapacity() == index_cap - BM_VecSimGeneral::block_size); + // Capacity should shrink by one block after deletion + ret = VecSimIndex_AddVector(index, QUERIES[(added_vec_count - 1) % N_QUERIES].data(), + label_to_update); + assert(ret == 1); + BM_VecSimGeneral::mock_thread_pool->thread_pool_wait(); + assert(VecSimIndex_IndexSize( + GET_INDEX(st.range(0) == INDEX_TIERED_HNSW ? INDEX_HNSW : st.range(0))) == + N_VECTORS + added_vec_count); + // Capacity should grow back to original size after addition + assert(index->indexCapacity() == index_cap); + } + assert(VecSimIndex_IndexSize(index) == N_VECTORS + added_vec_count); + + // Clean-up all the new vectors to restore the index size to its original value. + + size_t new_label_count = index->indexLabelCount(); + for (size_t label = initial_label_count; label < new_label_count; label++) { + // If index is tiered HNSW, remove directly from the underline HNSW. + VecSimIndex_DeleteVector( + GET_INDEX(st.range(0) == INDEX_TIERED_HNSW ? INDEX_HNSW : st.range(0)), label); + } + assert(VecSimIndex_IndexSize(index) == N_VECTORS); +} + #define UNIT_AND_ITERATIONS Unit(benchmark::kMillisecond)->Iterations(BM_VecSimGeneral::block_size) // These macros are used to make sure the expansion of other macros happens when needed @@ -345,3 +425,8 @@ void BM_VecSimBasics::Range_HNSW(benchmark::State &st) { } #define REGISTER_DeleteLabel(BM_FUNC) \ BENCHMARK_REGISTER_F(BM_VecSimBasics, BM_FUNC)->UNIT_AND_ITERATIONS + +#define REGISTER_UpdateAtBlockSize(BM_FUNC, VecSimAlgo) \ + BENCHMARK_REGISTER_F(BM_VecSimBasics, BM_FUNC) \ + ->UNIT_AND_ITERATIONS->Arg(VecSimAlgo) \ + ->ArgName(#VecSimAlgo) diff --git a/tests/benchmark/run_files/bm_basics_multi_fp32.cpp b/tests/benchmark/run_files/bm_basics_multi_fp32.cpp index 189e889f6..28c938f0f 100644 --- a/tests/benchmark/run_files/bm_basics_multi_fp32.cpp +++ b/tests/benchmark/run_files/bm_basics_multi_fp32.cpp @@ -35,4 +35,12 @@ DEFINE_DELETE_LABEL(BM_FUNC_NAME(DeleteLabel, Tiered), fp32_index_t, TieredHNSWI INDEX_TIERED_HNSW) #include "benchmark/bm_initialization/bm_basics_initialize_fp32.h" +// Test oscillations at block size boundaries. +BENCHMARK_TEMPLATE_DEFINE_F(BM_VecSimBasics, CONCAT_WITH_UNDERSCORE_ARCH(UpdateAtBlockSize, Multi), + fp32_index_t) +(benchmark::State &st) { UpdateAtBlockSize(st); } +REGISTER_UpdateAtBlockSize(CONCAT_WITH_UNDERSCORE_ARCH(UpdateAtBlockSize, Multi), INDEX_BF); +REGISTER_UpdateAtBlockSize(CONCAT_WITH_UNDERSCORE_ARCH(UpdateAtBlockSize, Multi), INDEX_HNSW); +REGISTER_UpdateAtBlockSize(CONCAT_WITH_UNDERSCORE_ARCH(UpdateAtBlockSize, Multi), + INDEX_TIERED_HNSW); BENCHMARK_MAIN(); diff --git a/tests/benchmark/run_files/bm_basics_single_fp32.cpp b/tests/benchmark/run_files/bm_basics_single_fp32.cpp index b7a9c5fa8..aa931e901 100644 --- a/tests/benchmark/run_files/bm_basics_single_fp32.cpp +++ b/tests/benchmark/run_files/bm_basics_single_fp32.cpp @@ -31,5 +31,14 @@ DEFINE_DELETE_LABEL(BM_FUNC_NAME(DeleteLabel, HNSW), fp32_index_t, HNSWIndex_Sin INDEX_HNSW) DEFINE_DELETE_LABEL(BM_FUNC_NAME(DeleteLabel, Tiered), fp32_index_t, TieredHNSWIndex, float, float, INDEX_TIERED_HNSW) + +// Test Oscilations +BENCHMARK_TEMPLATE_DEFINE_F(BM_VecSimBasics, CONCAT_WITH_UNDERSCORE_ARCH(UpdateAtBlockSize, Single), + fp32_index_t) +(benchmark::State &st) { UpdateAtBlockSize(st); } +REGISTER_UpdateAtBlockSize(CONCAT_WITH_UNDERSCORE_ARCH(UpdateAtBlockSize, Single), INDEX_BF); +REGISTER_UpdateAtBlockSize(CONCAT_WITH_UNDERSCORE_ARCH(UpdateAtBlockSize, Single), INDEX_HNSW); +REGISTER_UpdateAtBlockSize(CONCAT_WITH_UNDERSCORE_ARCH(UpdateAtBlockSize, Single), + INDEX_TIERED_HNSW); #include "benchmark/bm_initialization/bm_basics_initialize_fp32.h" BENCHMARK_MAIN();