diff --git a/src/VecSim/algorithms/brute_force/brute_force.h b/src/VecSim/algorithms/brute_force/brute_force.h index 3d640fa1c..7870f2cc4 100644 --- a/src/VecSim/algorithms/brute_force/brute_force.h +++ b/src/VecSim/algorithms/brute_force/brute_force.h @@ -54,6 +54,17 @@ class BruteForceIndex : public VecSimIndexAbstract { inline vecsim_stl::vector getVectorBlocks() const { return vectorBlocks; } virtual ~BruteForceIndex(); +#ifdef BUILD_TESTS + size_t getStoredVectorsCount() const { + size_t actual_stored_vec = 0; + for (auto &block : vectorBlocks) { + actual_stored_vec += block->getLength(); + } + + return actual_stored_vec; + } +#endif + protected: // Private internal function that implements generic single vector insertion. virtual int appendVector(const void *vector_data, labelType label); @@ -142,10 +153,14 @@ int BruteForceIndex::appendVector(const void *vector_data, l size_t idToLabelMapping_size = this->idToLabelMapping.size(); if (id >= idToLabelMapping_size) { + assert(indexCapacity() == idToLabelMapping.capacity()); + assert(idToLabelMapping.size() == idToLabelMapping.capacity()); size_t last_block_vectors_count = id % this->blockSize; - this->idToLabelMapping.resize( - idToLabelMapping_size + this->blockSize - last_block_vectors_count, 0); + size_t new_size = idToLabelMapping_size + this->blockSize - last_block_vectors_count; + assert(new_size % this->blockSize == 0); + this->idToLabelMapping.resize(new_size, 0); this->idToLabelMapping.shrink_to_fit(); + assert(idToLabelMapping.size() == idToLabelMapping.capacity()); } // add label to idToLabelMapping @@ -196,12 +211,20 @@ int BruteForceIndex::removeVector(idType id_to_delete) { // Resize and align the idToLabelMapping. size_t idToLabel_size = idToLabelMapping.size(); - // If the new size is smaller by at least one block comparing to the idToLabelMapping + // If the new size is smaller by at least two blocks comparing to the idToLabelMapping, + // or if the new size is 0 and the capacity is at least one block, // align to be a multiplication of blocksize and resize by one block. - if (this->count + this->blockSize <= idToLabel_size) { + if ((this->count + 2 * this->blockSize <= idToLabel_size) || + // Handle last block + (this->count == 0 && idToLabel_size >= this->blockSize)) { size_t vector_to_align_count = idToLabel_size % this->blockSize; - this->idToLabelMapping.resize(idToLabel_size - this->blockSize - vector_to_align_count); + size_t new_size = idToLabel_size - this->blockSize - vector_to_align_count; + assert(new_size >= this->count); + assert(new_size % this->blockSize == 0); + assert(idToLabelMapping.size() == idToLabelMapping.capacity()); + this->idToLabelMapping.resize(new_size); this->idToLabelMapping.shrink_to_fit(); + assert(idToLabelMapping.size() == idToLabelMapping.capacity()); } } diff --git a/src/VecSim/algorithms/brute_force/brute_force_friend_tests.h b/src/VecSim/algorithms/brute_force/brute_force_friend_tests.h index 66485cb79..f7842be6e 100644 --- a/src/VecSim/algorithms/brute_force/brute_force_friend_tests.h +++ b/src/VecSim/algorithms/brute_force/brute_force_friend_tests.h @@ -10,6 +10,7 @@ INDEX_TEST_FRIEND_CLASS(BruteForceTest_brute_force_vector_update_test_Test) INDEX_TEST_FRIEND_CLASS(BruteForceTest_resize_and_align_index_Test) INDEX_TEST_FRIEND_CLASS(BruteForceTest_resize_and_align_index_largeInitialCapacity_Test) +INDEX_TEST_FRIEND_CLASS(BruteForceTest_resize_and_align_index_smallInitialCapacity_Test) INDEX_TEST_FRIEND_CLASS(BruteForceTest_brute_force_empty_index_Test) INDEX_TEST_FRIEND_CLASS(BruteForceTest_brute_force_reindexing_same_vector_Test) INDEX_TEST_FRIEND_CLASS(BruteForceTest_brute_force_reindexing_same_vector_different_id_Test) @@ -17,4 +18,5 @@ INDEX_TEST_FRIEND_CLASS(BruteForceTest_test_delete_swap_block_Test) INDEX_TEST_FRIEND_CLASS(BruteForceTest_test_dynamic_bf_info_iterator_Test) INDEX_TEST_FRIEND_CLASS(BruteForceTest_brute_force_zero_minimal_capacity_Test) INDEX_TEST_FRIEND_CLASS(BruteForceTest_preferAdHocOptimization_Test) +INDEX_TEST_FRIEND_CLASS(IndexAllocatorTest_test_bf_index_block_size_1_Test) INDEX_TEST_FRIEND_CLASS(BM_VecSimBasics) diff --git a/src/VecSim/algorithms/hnsw/hnsw.h b/src/VecSim/algorithms/hnsw/hnsw.h index 50a16859d..6098fc99d 100644 --- a/src/VecSim/algorithms/hnsw/hnsw.h +++ b/src/VecSim/algorithms/hnsw/hnsw.h @@ -1151,13 +1151,16 @@ int HNSWIndex::removeVector(const idType element_internal_id --cur_element_count; --max_id; - // If we need to free a complete block & there is a least one block between the - // capacity and the size. + // If the new size is smaller by at least two blocks comparing to the idToLabelMapping, + // or if the new size is 0 and the capacity is at least one block, + // align to be a multiplication of blocksize and resize by one block. if (cur_element_count % this->blockSize == 0 && - cur_element_count + this->blockSize <= max_elements_) { + ((cur_element_count + 2 * this->blockSize <= max_elements_) || + (cur_element_count == 0 && max_elements_ >= this->blockSize))) { // Check if the capacity is aligned to block size. size_t extra_space_to_free = max_elements_ % this->blockSize; + assert(max_elements_ - this->blockSize - extra_space_to_free >= cur_element_count); // Remove one block from the capacity. this->resizeIndex(max_elements_ - this->blockSize - extra_space_to_free); diff --git a/tests/unit/test_allocator.cpp b/tests/unit/test_allocator.cpp index 5ae4a0f80..d4d20b67f 100644 --- a/tests/unit/test_allocator.cpp +++ b/tests/unit/test_allocator.cpp @@ -93,11 +93,12 @@ TYPED_TEST(IndexAllocatorTest, test_bf_index_block_size_1) { ASSERT_EQ(allocator->getAllocationSize(), expectedAllocationSize); // Create only the minimal struct. size_t dim = 128; + size_t blockSize = 1; BFParams params = {.type = TypeParam::get_index_type(), .dim = dim, .metric = VecSimMetric_IP, .initialCapacity = 0, - .blockSize = 1}; + .blockSize = blockSize}; TEST_DATA_T vec[128] = {}; BruteForceIndex_Single *bfIndex = @@ -108,94 +109,228 @@ TYPED_TEST(IndexAllocatorTest, test_bf_index_block_size_1) { size_t memory = VecSimIndex_StatsInfo(bfIndex).memory; ASSERT_EQ(allocator->getAllocationSize(), memory); + // @param expected_size - The expected number of elements in the index. + // @param expected_data_container_blocks - The expected number of blocks in the data containers. + // @param expected_map_containers_capacity - The expected capacity of the map containers in + // number of elements. + auto verify_containers_size = [&](size_t expected_size, size_t expected_data_container_blocks, + size_t expected_map_containers_size) { + ASSERT_EQ(bfIndex->indexSize(), expected_size); + ASSERT_EQ(bfIndex->vectorBlocks.size(), expected_data_container_blocks); + ASSERT_EQ(bfIndex->getStoredVectorsCount(), expected_size); + + ASSERT_EQ(bfIndex->indexCapacity(), expected_map_containers_size); + ASSERT_EQ(bfIndex->idToLabelMapping.capacity(), expected_map_containers_size); + ASSERT_EQ(bfIndex->idToLabelMapping.size(), expected_map_containers_size); + ASSERT_GE(bfIndex->labelToIdLookup.bucket_count(), expected_map_containers_size); + }; + // =========== Add label 1 =========== + size_t buckets_num_before = bfIndex->labelToIdLookup.bucket_count(); + auto &vectors_blocks = bfIndex->vectorBlocks; + size_t vectors_blocks_capacity = vectors_blocks.capacity(); + int addCommandAllocationDelta = VecSimIndex_AddVector(bfIndex, vec, 1); - int64_t expectedAllocationDelta = 0; - expectedAllocationDelta += + int64_t expectedAllocationDelta = sizeof(labelType) + vecsimAllocationOverhead; // resize idToLabelMapping - expectedAllocationDelta += sizeof(VectorBlock) + vecsimAllocationOverhead; // New vector block expectedAllocationDelta += - sizeof(TEST_DATA_T) * dim + vecsimAllocationOverhead; // keep the vector in the vector block + (vectors_blocks.capacity() - vectors_blocks_capacity) * sizeof(VectorBlock *) + + vecsimAllocationOverhead; // New vectors blocks pointers expectedAllocationDelta += - sizeof(VectorBlock *) + vecsimAllocationOverhead; // Keep the allocated vector block + blockSize * sizeof(TEST_DATA_T) * dim + vecsimAllocationOverhead; // block vectors buffer expectedAllocationDelta += - sizeof(std::pair) + vecsimAllocationOverhead; // keep the mapping + sizeof(VectorBlock) + vecsimAllocationOverhead; // Keep the allocated vector block + expectedAllocationDelta += hashTableNodeSize; // New node in the label lookup + // Account for the allocation of a new buckets in the labels_lookup hash table. + expectedAllocationDelta += + (bfIndex->labelToIdLookup.bucket_count() - buckets_num_before) * sizeof(size_t); // Assert that the additional allocated delta did occur, and it is limited, as some STL // collection allocate additional structures for their internal implementation. - ASSERT_EQ(allocator->getAllocationSize(), expectedAllocationSize + addCommandAllocationDelta); - ASSERT_LE(expectedAllocationSize + expectedAllocationDelta, allocator->getAllocationSize()); - ASSERT_LE(expectedAllocationDelta, addCommandAllocationDelta); - memory = VecSimIndex_StatsInfo(bfIndex).memory; - ASSERT_EQ(allocator->getAllocationSize(), memory); + { + SCOPED_TRACE("Verifying allocation delta for adding first vector"); + verify_containers_size(1, 1, 1); + ASSERT_EQ(allocator->getAllocationSize(), + expectedAllocationSize + addCommandAllocationDelta); + ASSERT_LE(expectedAllocationSize + expectedAllocationDelta, allocator->getAllocationSize()); + ASSERT_LE(expectedAllocationDelta, addCommandAllocationDelta); + memory = VecSimIndex_StatsInfo(bfIndex).memory; + ASSERT_EQ(allocator->getAllocationSize(), memory); + } + + // =========== labels = [1], vector blocks = 1, maps capacity = 1. Add label 2 + 3 =========== // Prepare for next assertion test expectedAllocationSize = memory; expectedAllocationDelta = 0; + vectors_blocks_capacity = vectors_blocks.capacity(); + buckets_num_before = bfIndex->labelToIdLookup.bucket_count(); + addCommandAllocationDelta = VecSimIndex_AddVector(bfIndex, vec, 2); - expectedAllocationDelta += sizeof(VectorBlock) + vecsimAllocationOverhead; // New vector block - expectedAllocationDelta += sizeof(labelType); // resize idToLabelMapping + addCommandAllocationDelta += VecSimIndex_AddVector(bfIndex, vec, 3); + expectedAllocationDelta += (vectors_blocks.capacity() - vectors_blocks_capacity) * + sizeof(VectorBlock *); // New vector blocks pointers + expectedAllocationDelta += 2 * sizeof(labelType); // resize idToLabelMapping + expectedAllocationDelta += 2 * (blockSize * sizeof(TEST_DATA_T) * dim + + vecsimAllocationOverhead); // Two block vectors buffer expectedAllocationDelta += - sizeof(TEST_DATA_T) * dim + vecsimAllocationOverhead; // keep the vector in the vector block + 2 * (sizeof(VectorBlock) + vecsimAllocationOverhead); // Keep the allocated vector blocks + expectedAllocationDelta += 2 * hashTableNodeSize; // New nodes in the label lookup expectedAllocationDelta += - sizeof(VectorBlock *) + vecsimAllocationOverhead; // Keep the allocated vector block + (bfIndex->labelToIdLookup.bucket_count() - buckets_num_before) * sizeof(size_t); + { + SCOPED_TRACE("Index size = Verifying allocation delta for adding two more vectors"); + verify_containers_size(3, 3, 3); + ASSERT_EQ(allocator->getAllocationSize(), + expectedAllocationSize + addCommandAllocationDelta); + ASSERT_EQ(expectedAllocationSize + expectedAllocationDelta, allocator->getAllocationSize()); + ASSERT_EQ(expectedAllocationDelta, addCommandAllocationDelta); + memory = VecSimIndex_StatsInfo(bfIndex).memory; + ASSERT_EQ(allocator->getAllocationSize(), memory); + } + + // =========== labels = [1, 2, 3], vector blocks = 3, maps capacity = 3. Delete label 1 + // =========== + + // Prepare for next assertion test + expectedAllocationSize = memory; + expectedAllocationDelta = 0; + + vectors_blocks_capacity = vectors_blocks.capacity(); + buckets_num_before = bfIndex->labelToIdLookup.bucket_count(); + { + SCOPED_TRACE("Verifying allocation delta for deleting a vector from index size 3"); + int deleteCommandAllocationDelta = VecSimIndex_DeleteVector(bfIndex, 1); + verify_containers_size(2, 2, 3); + // Removing blocks doesn't change vectors_blocks.capacity(), but the block buffer is freed. + ASSERT_EQ(vectors_blocks.capacity(), vectors_blocks_capacity); + expectedAllocationDelta -= + (sizeof(VectorBlock) + vecsimAllocationOverhead); // Free the vector block + expectedAllocationDelta -= + blockSize * sizeof(TEST_DATA_T) * dim + + vecsimAllocationOverhead; // Free the vector buffer in the vector block + expectedAllocationDelta -= hashTableNodeSize; // Remove node from the label lookup + // idToLabelMapping and label:id should not change since count > capacity - 2 * blockSize + ASSERT_EQ(bfIndex->labelToIdLookup.bucket_count(), buckets_num_before); + + ASSERT_EQ(allocator->getAllocationSize(), + expectedAllocationSize + deleteCommandAllocationDelta); + ASSERT_EQ(expectedAllocationSize + expectedAllocationDelta, allocator->getAllocationSize()); + ASSERT_EQ(expectedAllocationDelta, deleteCommandAllocationDelta); + + memory = VecSimIndex_StatsInfo(bfIndex).memory; + ASSERT_EQ(allocator->getAllocationSize(), memory); + } + + // =========== labels = [2, 3], vector blocks = 2, maps capacity = 3. Add label 4 =========== + + // Prepare for next assertion test + expectedAllocationSize = memory; + expectedAllocationDelta = 0; + + vectors_blocks_capacity = vectors_blocks.capacity(); + buckets_num_before = bfIndex->labelToIdLookup.bucket_count(); + size_t idToLabel_size_before = bfIndex->idToLabelMapping.size(); + + addCommandAllocationDelta = VecSimIndex_AddVector(bfIndex, vec, 4); + expectedAllocationDelta += (vectors_blocks.capacity() - vectors_blocks_capacity) * + sizeof(VectorBlock *); // New vector block pointers expectedAllocationDelta += - sizeof(std::pair) + vecsimAllocationOverhead; // keep the mapping - // Assert that the additional allocated delta did occur, and it is limited, as some STL - // collection allocate additional structures for their internal implementation. - ASSERT_EQ(allocator->getAllocationSize(), expectedAllocationSize + addCommandAllocationDelta); - ASSERT_LE(expectedAllocationSize + expectedAllocationDelta, allocator->getAllocationSize()); - ASSERT_LE(expectedAllocationDelta, addCommandAllocationDelta); - memory = VecSimIndex_StatsInfo(bfIndex).memory; - ASSERT_EQ(allocator->getAllocationSize(), memory); + sizeof(VectorBlock) + vecsimAllocationOverhead; // Keep the allocated vector blocks + + expectedAllocationDelta += + blockSize * sizeof(TEST_DATA_T) * dim + vecsimAllocationOverhead; // block vectors buffer + expectedAllocationDelta += hashTableNodeSize; // New node in the label lookup + { + SCOPED_TRACE( + "Verifying allocation delta for adding a vector to index size 2 with capacity 3"); + verify_containers_size(3, 3, 3); + ASSERT_EQ(allocator->getAllocationSize(), + expectedAllocationSize + addCommandAllocationDelta); + ASSERT_EQ(expectedAllocationSize + expectedAllocationDelta, allocator->getAllocationSize()); + ASSERT_EQ(expectedAllocationDelta, addCommandAllocationDelta); + memory = VecSimIndex_StatsInfo(bfIndex).memory; + ASSERT_EQ(allocator->getAllocationSize(), memory); + + // idToLabelMapping and label:id should not change since if we one free block + ASSERT_EQ(bfIndex->labelToIdLookup.bucket_count(), buckets_num_before); + ASSERT_EQ(bfIndex->idToLabelMapping.size(), idToLabel_size_before); + } + + // =========== labels = [2, 3, 4], vector blocks = 3, maps capacity = 3. Delete label 2 + 3 + // =========== // Prepare for next assertion test expectedAllocationSize = memory; expectedAllocationDelta = 0; - int deleteCommandAllocationDelta = VecSimIndex_DeleteVector(bfIndex, 2); - expectedAllocationDelta -= - (sizeof(VectorBlock) + vecsimAllocationOverhead); // Free the vector block - expectedAllocationDelta -= - sizeof(TEST_DATA_T) * dim + vecsimAllocationOverhead; // Free the vector in the vector block - expectedAllocationDelta -= sizeof(VectorBlock *); // remove from vectorBlocks vector - expectedAllocationDelta -= sizeof(labelType); // resize idToLabelMapping - expectedAllocationDelta -= - sizeof(std::pair) + vecsimAllocationOverhead; // remove one label:id pair - - // Assert that the reclaiming of memory did occur, and it is limited, as some STL - // collection allocate additional structures for their internal implementation. - ASSERT_EQ(allocator->getAllocationSize(), - expectedAllocationSize + deleteCommandAllocationDelta); - ASSERT_GE(expectedAllocationSize + expectedAllocationDelta, allocator->getAllocationSize()); - ASSERT_GE(expectedAllocationDelta, deleteCommandAllocationDelta); + vectors_blocks_capacity = vectors_blocks.capacity(); + buckets_num_before = bfIndex->labelToIdLookup.bucket_count(); + { + SCOPED_TRACE("Verifying allocation delta for deleting two vectors from index size 3"); + int deleteCommandAllocationDelta = VecSimIndex_DeleteVector(bfIndex, 2); + deleteCommandAllocationDelta += VecSimIndex_DeleteVector(bfIndex, 3); + verify_containers_size(1, 1, 2); + // Removing blocks doesn't change vectors_blocks.capacity(), but the block buffer is freed. + ASSERT_EQ(vectors_blocks.capacity(), vectors_blocks_capacity); + expectedAllocationDelta -= + 2 * (blockSize * sizeof(TEST_DATA_T) * dim + + vecsimAllocationOverhead); // Free the vector buffer in the vector block + expectedAllocationDelta -= + 2 * (sizeof(VectorBlock) + vecsimAllocationOverhead); // Free the vector block + + expectedAllocationDelta -= 2 * hashTableNodeSize; // Remove nodes from the label lookup + // idToLabelMapping and label:id should shrink by block since count >= capacity - 2 * + // blockSize + expectedAllocationDelta -= sizeof(labelType); // remove one idToLabelMapping + expectedAllocationDelta -= + (buckets_num_before - bfIndex->labelToIdLookup.bucket_count()) * sizeof(size_t); + ASSERT_EQ(allocator->getAllocationSize(), + expectedAllocationSize + deleteCommandAllocationDelta); + ASSERT_EQ(expectedAllocationSize + expectedAllocationDelta, allocator->getAllocationSize()); + ASSERT_EQ(expectedAllocationDelta, deleteCommandAllocationDelta); + + memory = VecSimIndex_StatsInfo(bfIndex).memory; + ASSERT_EQ(allocator->getAllocationSize(), memory); + } - memory = VecSimIndex_StatsInfo(bfIndex).memory; - ASSERT_EQ(allocator->getAllocationSize(), memory); + // =========== labels = [4], vector blocks = 1, maps capacity = 2. Delete last label =========== // Prepare for next assertion test expectedAllocationSize = memory; expectedAllocationDelta = 0; - deleteCommandAllocationDelta = VecSimIndex_DeleteVector(bfIndex, 1); - expectedAllocationDelta -= - (sizeof(VectorBlock) + vecsimAllocationOverhead); // Free the vector block - expectedAllocationDelta -= - sizeof(VectorBlock *) + vecsimAllocationOverhead; // remove from vectorBlocks vector - expectedAllocationDelta -= - sizeof(labelType) + vecsimAllocationOverhead; // resize idToLabelMapping - expectedAllocationDelta -= (sizeof(TEST_DATA_T) * dim + - vecsimAllocationOverhead); // Free the vector in the vector block - expectedAllocationDelta -= - sizeof(std::pair) + vecsimAllocationOverhead; // remove one label:id pair + vectors_blocks_capacity = vectors_blocks.capacity(); + buckets_num_before = bfIndex->labelToIdLookup.bucket_count(); + { + SCOPED_TRACE("Verifying allocation delta for emptying the index"); + int deleteCommandAllocationDelta = VecSimIndex_DeleteVector(bfIndex, 4); + + // We decrease meta data containers size by one block + verify_containers_size(0, 0, 1); + // Removing blocks doesn't change vectors_blocks.capacity(), but the block buffer is freed. + ASSERT_EQ(vectors_blocks.capacity(), vectors_blocks_capacity); + expectedAllocationDelta -= + blockSize * sizeof(TEST_DATA_T) * dim + + vecsimAllocationOverhead; // Free the vector buffer in the vector block + expectedAllocationDelta -= + (sizeof(VectorBlock) + vecsimAllocationOverhead); // Free the vector block + expectedAllocationDelta -= hashTableNodeSize; // Remove nodes from the label lookup + // idToLabelMapping and label:id should shrink by block since count >= capacity - 2 * + // blockSize + expectedAllocationDelta -= + sizeof(labelType); // remove one idToLabelMapping and free the container + // resizing labelToIdLookup + size_t buckets_after = bfIndex->labelToIdLookup.bucket_count(); + expectedAllocationDelta -= (buckets_num_before - buckets_after) * sizeof(size_t); + ASSERT_EQ(allocator->getAllocationSize(), + expectedAllocationSize + deleteCommandAllocationDelta); + ASSERT_LE(abs(expectedAllocationDelta), abs(deleteCommandAllocationDelta)); + ASSERT_GE(expectedAllocationSize + expectedAllocationDelta, allocator->getAllocationSize()); + + memory = VecSimIndex_StatsInfo(bfIndex).memory; + ASSERT_EQ(allocator->getAllocationSize(), memory); + } - // Assert that the reclaiming of memory did occur, and it is limited, as some STL - // collection allocate additional structures for their internal implementation. - ASSERT_EQ(allocator->getAllocationSize(), - expectedAllocationSize + deleteCommandAllocationDelta); - ASSERT_LE(expectedAllocationSize + expectedAllocationDelta, allocator->getAllocationSize()); - ASSERT_LE(expectedAllocationDelta, deleteCommandAllocationDelta); - memory = VecSimIndex_StatsInfo(bfIndex).memory; - ASSERT_EQ(allocator->getAllocationSize(), memory); VecSimIndex_Free(bfIndex); } @@ -355,71 +490,118 @@ TYPED_TEST(IndexAllocatorTest, test_hnsw_reclaim_memory) { // Add vectors up to the size of a whole block, and calculate the total memory delta. size_t block_size = hnswIndex->debugInfo().hnswInfo.blockSize; - size_t accumulated_mem_delta = 0; - - for (size_t i = 0; i < block_size; i++) { - accumulated_mem_delta += GenerateAndAddVector(hnswIndex, d, i, i); + // Allocations caused by adding a new vector: + auto compute_vec_mem = [&](idType id) { + // Compute the expected memory allocation due to the last vector insertion. + size_t vec_max_level = hnswIndex->element_levels_[id]; + // Incoming edges + size_t new_vec_mem_delta = + (vec_max_level + 1) * (sizeof(vecsim_stl::vector) + vecsimAllocationOverhead); + if (vec_max_level > 0) { + new_vec_mem_delta += + hnswIndex->size_links_per_element_ * vec_max_level + vecsimAllocationOverhead; + } + // new node in the labels_lookup hash table + new_vec_mem_delta += hashTableNodeSize; + + return new_vec_mem_delta; + }; + // Add the first vector to store the first block allocation delta. + size_t initial_one_block_mem_delta = GenerateAndAddVector(hnswIndex, d, 0, 0); + size_t one_block_mem_delta = initial_one_block_mem_delta; + size_t initial_buckets_count = hnswIndex->label_lookup_.bucket_count(); + for (size_t i = 1; i < block_size; i++) { + one_block_mem_delta += GenerateAndAddVector(hnswIndex, d, i, i); } + + // Remove the memory allocated for the first vector, since it is added as a "dummy" vector. + initial_one_block_mem_delta -= compute_vec_mem(0); + size_t one_block_buckets = hnswIndex->label_lookup_.bucket_count(); + // @param expected_size - The expected number of elements in the index. + // @param expected_capacity - The expected capacity in elements. + auto verify_containers_size = [&](size_t expected_size, size_t expected_capacity) { + SCOPED_TRACE("Verifying containers size for size " + std::to_string(expected_size)); + ASSERT_EQ(hnswIndex->indexSize(), expected_size); + ASSERT_EQ(hnswIndex->indexCapacity(), expected_capacity); + ASSERT_EQ(hnswIndex->indexCapacity(), hnswIndex->max_elements_); + ASSERT_EQ(hnswIndex->element_levels_.size(), expected_capacity); + ASSERT_EQ(hnswIndex->element_levels_.size(), hnswIndex->element_levels_.capacity()); + + ASSERT_GE(hnswIndex->label_lookup_.bucket_count(), expected_capacity); + // Also validate that there are no unidirectional connections (these add memory to the + // incoming edges sets). + ASSERT_EQ(hnswIndex->checkIntegrity().unidirectional_connections, 0); + }; + // Validate that a single block exists. - ASSERT_EQ(hnswIndex->indexSize(), block_size); - ASSERT_EQ(hnswIndex->indexCapacity(), block_size); - ASSERT_EQ(allocator->getAllocationSize(), initial_memory_size + accumulated_mem_delta); - // Also validate that there are no unidirectional connections (these add memory to the incoming - // edges sets). - ASSERT_EQ(hnswIndex->checkIntegrity().unidirectional_connections, 0); + verify_containers_size(block_size, block_size); + ASSERT_EQ(allocator->getAllocationSize(), initial_memory_size + one_block_mem_delta); // Add another vector, expect resizing of the index to contain two blocks. size_t prev_bucket_count = hnswIndex->label_lookup_.bucket_count(); size_t mem_delta = GenerateAndAddVector(hnswIndex, d, block_size, block_size); + verify_containers_size(block_size + 1, 2 * block_size); - ASSERT_EQ(hnswIndex->indexSize(), block_size + 1); - ASSERT_EQ(hnswIndex->indexCapacity(), 2 * block_size); - ASSERT_EQ(hnswIndex->checkIntegrity().unidirectional_connections, 0); - - // Compute the expected memory allocation due to the last vector insertion. - size_t vec_max_level = hnswIndex->element_levels_[block_size]; - size_t expected_mem_delta = - (vec_max_level + 1) * (sizeof(vecsim_stl::vector) + vecsimAllocationOverhead) + - hashTableNodeSize; - if (vec_max_level > 0) { - expected_mem_delta += - hnswIndex->size_links_per_element_ * vec_max_level + 1 + vecsimAllocationOverhead; - } - // Also account for all the memory allocation caused by the resizing that this vector triggered - // except for the bucket count of the labels_lookup hash table that is calculated separately. - size_t size_total_data_per_element = hnswIndex->size_data_per_element_; - expected_mem_delta += - (sizeof(tag_t) + sizeof(void *) + sizeof(size_t) + size_total_data_per_element) * + // Allocations caused by adding a block: + // element_levels, data_level0_memory_, linkLists_, visitedNodesHandlerPool + size_t containers_mem = + (sizeof(size_t) + hnswIndex->size_data_per_element_ + sizeof(void *) + sizeof(tag_t)) * block_size; - expected_mem_delta += + int hash_table_mem_delta = (hnswIndex->label_lookup_.bucket_count() - prev_bucket_count) * sizeof(size_t); + size_t add_one_block_mem_delta = containers_mem + hash_table_mem_delta; + size_t new_vec_mem_delta = compute_vec_mem(block_size); - ASSERT_EQ(expected_mem_delta, mem_delta); + ASSERT_EQ(add_one_block_mem_delta + new_vec_mem_delta, mem_delta); - // Remove the last vector, expect resizing back to a single block, and return to the previous - // memory consumption. - VecSimIndex_DeleteVector(hnswIndex, block_size); - ASSERT_EQ(hnswIndex->indexSize(), block_size); - ASSERT_EQ(hnswIndex->indexCapacity(), block_size); - ASSERT_EQ(hnswIndex->checkIntegrity().unidirectional_connections, 0); - ASSERT_EQ(allocator->getAllocationSize(), initial_memory_size + accumulated_mem_delta); + // Remove the last vector, since size + 2 * block_size > capacity, expect the containers size to + // NOT change. Only the vector's memory is freed. + int delete_vec_mem_delta = VecSimIndex_DeleteVector(hnswIndex, block_size); + verify_containers_size(block_size, 2 * block_size); - // Remove the rest of the vectors, and validate that the memory returns to its initial state. - for (size_t i = 0; i < block_size; i++) { - VecSimIndex_DeleteVector(hnswIndex, i); + ASSERT_EQ(static_cast(-delete_vec_mem_delta), new_vec_mem_delta); + + // Remove the rest of the vectors, meta data containers size should decrease by one block. + prev_bucket_count = hnswIndex->label_lookup_.bucket_count(); + size_t expected_delete_mem_delta = 0; + int delete_block_mem_delta = 0; + for (int i = block_size - 1; i >= 0; i--) { + expected_delete_mem_delta += compute_vec_mem(i); + delete_block_mem_delta += VecSimIndex_DeleteVector(hnswIndex, i); } + verify_containers_size(0, block_size); + + hash_table_mem_delta = + (prev_bucket_count - hnswIndex->label_lookup_.bucket_count()) * sizeof(size_t); + expected_delete_mem_delta += hash_table_mem_delta + containers_mem; + + ASSERT_EQ(static_cast(-delete_block_mem_delta), expected_delete_mem_delta); + + // Adding a vector should not cause new containers memory, only the vector's memory. + mem_delta = GenerateAndAddVector(hnswIndex, d, 0); + verify_containers_size(1, block_size); + + ASSERT_EQ(mem_delta, new_vec_mem_delta); + ASSERT_EQ(initial_memory_size + initial_one_block_mem_delta + mem_delta, + allocator->getAllocationSize()); + + // Delete the new (and only vec) + // Since the index is empty, and the capacity equals block_size, the containers should shrink to + // 0. + prev_bucket_count = hnswIndex->label_lookup_.bucket_count(); + delete_vec_mem_delta = VecSimIndex_DeleteVector(hnswIndex, 0); + verify_containers_size(0, 0); + hash_table_mem_delta = + (initial_buckets_count - hnswIndex->label_lookup_.bucket_count()) * sizeof(size_t); + ASSERT_LE(hnswIndex->label_lookup_.bucket_count(), initial_buckets_count); + ASSERT_LE(containers_mem, initial_one_block_mem_delta); + ASSERT_GE(initial_memory_size + + (initial_one_block_mem_delta - containers_mem - hash_table_mem_delta), + allocator->getAllocationSize()); + hash_table_mem_delta = + (prev_bucket_count - hnswIndex->label_lookup_.bucket_count()) * sizeof(size_t); + expected_delete_mem_delta = hash_table_mem_delta + containers_mem + mem_delta; + ASSERT_LE(expected_delete_mem_delta, static_cast(-delete_vec_mem_delta)); - ASSERT_EQ(hnswIndex->indexSize(), 0); - ASSERT_EQ(hnswIndex->indexCapacity(), 0); - // All data structures' memory returns to as it was, with the exceptional of the labels_lookup - // (STL unordered_map with hash table implementation), that leaves some empty buckets. - size_t hash_table_memory = hnswIndex->label_lookup_.bucket_count() * sizeof(size_t); - // Current memory should be back as it was initially. The label_lookup hash table is an - // exception, since in some platforms, empty buckets remain even when the capacity is set to - // zero, while in others the entire capacity reduced to zero (including the header). - ASSERT_LE(allocator->getAllocationSize(), HNSWFactory::EstimateInitialSize(¶ms) + - hash_table_memory + 2 * vecsimAllocationOverhead); - ASSERT_GE(allocator->getAllocationSize(), - HNSWFactory::EstimateInitialSize(¶ms) + hash_table_memory); VecSimIndex_Free(hnswIndex); } diff --git a/tests/unit/test_bruteforce.cpp b/tests/unit/test_bruteforce.cpp index ac619af35..9b74a7a68 100644 --- a/tests/unit/test_bruteforce.cpp +++ b/tests/unit/test_bruteforce.cpp @@ -108,52 +108,175 @@ TYPED_TEST(BruteForceTest, brute_force_vector_update_test) { TYPED_TEST(BruteForceTest, resize_and_align_index) { size_t dim = 4; - size_t n = 14; size_t blockSize = 10; + size_t curr_label = 0; - BFParams params = { - .dim = dim, .metric = VecSimMetric_L2, .initialCapacity = n, .blockSize = blockSize}; + BFParams params = {.dim = dim, .metric = VecSimMetric_L2, .blockSize = blockSize}; VecSimIndex *index = this->CreateNewIndex(params); BruteForceIndex *bf_index = this->CastToBF(index); - ASSERT_EQ(VecSimIndex_IndexSize(index), 0); + auto verify_index_size = [&](size_t expected_size, size_t expected_capacity, std::string msg) { + SCOPED_TRACE("verify_index_size: " + msg); + ASSERT_EQ(VecSimIndex_IndexSize(index), expected_size); + ASSERT_EQ(bf_index->idToLabelMapping.size(), expected_capacity); + ASSERT_EQ(bf_index->indexCapacity(), expected_capacity); + ASSERT_EQ(bf_index->vectorBlocks.size(), + expected_size / blockSize + (expected_size % blockSize != 0)) + << "expected_size: " << expected_size << " expected_capacity: " << expected_capacity; + ASSERT_EQ(bf_index->getStoredVectorsCount(), expected_size); + }; + // Empty index with no initial capacity + verify_index_size(0, 0, "empty index"); - for (size_t i = 0; i < n; i++) { - GenerateAndAddVector(index, dim, i, i); + // Add one vector, index capacity should grow to blockSize. + GenerateAndAddVector(index, dim, curr_label++); + verify_index_size(1, blockSize, "add one vector"); + + // Add vector up to blocksize, index capacity should remain the same. + while (curr_label < blockSize) { + GenerateAndAddVector(index, dim, curr_label++); } - ASSERT_EQ(bf_index->idToLabelMapping.size(), n); - ASSERT_EQ(VecSimIndex_IndexSize(index), n); + verify_index_size(blockSize, blockSize, "add up to blocksize"); // remove invalid id VecSimIndex_DeleteVector(index, 3459); // This should do nothing - ASSERT_EQ(VecSimIndex_IndexSize(index), n); - ASSERT_EQ(bf_index->idToLabelMapping.size(), n); + verify_index_size(blockSize, blockSize, "remove invalid id"); - // Add another vector, since index size equals to the capacity, this should cause resizing - // (to fit a multiplication of block_size). - GenerateAndAddVector(index, dim, n); - ASSERT_EQ(VecSimIndex_IndexSize(index), n + 1); - // Check new capacity size, should be blockSize * 2. - ASSERT_EQ(bf_index->idToLabelMapping.size(), 2 * blockSize); - ASSERT_EQ(bf_index->idToLabelMapping.capacity(), 2 * blockSize); + // Add another vector, since index size equals to block size, this should cause resizing + // of the capacity by one blocksize + GenerateAndAddVector(index, dim, curr_label++); + verify_index_size(blockSize + 1, 2 * blockSize, + "add one more vector after reaching block size"); - // Now size = n + 1 (= 15), capacity = 2 * bs (= 20). Test capacity overflow again + // Now size = blocksize + 1 (= 11), capacity = 2 * bs (= 20). Test capacity overflow again // to check that it stays aligned with block size. + size_t add_vectors_count = blockSize + 2; // 12 + while (curr_label < add_vectors_count + blockSize + 1) { + GenerateAndAddVector(index, dim, curr_label++); + } + + // Size should be blocksize + 1 + add_vectors_count (= 23). + verify_index_size( + blockSize + 1 + add_vectors_count, 3 * blockSize, + "add more vectors after reaching 2 * blocksize capacity to trigger another resize"); + + // Delete vectors so that indexsize % blocksize == 0 (and then delete one more) + size_t num_deleted = 0; + auto remove_to_one_below_blocksize = [&](size_t initial_label_to_remove) { + while (VecSimIndex_IndexSize(index) % blockSize != 0) { + VecSimIndex_DeleteVector(index, initial_label_to_remove++); + num_deleted++; + } + VecSimIndex_DeleteVector(index, initial_label_to_remove); + num_deleted++; + }; + + // First trigger of remove_to_one_below_blocksize will result in one free block. + // This should not trigger shrinking of metadata containers. + remove_to_one_below_blocksize(0); // remove first block labels. + verify_index_size( + blockSize * 2 - 1, 3 * blockSize, + "delete vectors so that indexsize % blocksize == 0, but there is only one free block"); + + // Second trigger of remove_to_one_below_blocksize will result in two free blocks. + // This should trigger shrinking of metadata containers by one block. + remove_to_one_below_blocksize(num_deleted); + verify_index_size( + blockSize - 1, 2 * blockSize, + "delete vectors so that indexsize % blocksize == 0 and there are two free blocks"); + + // Now there is one block in use and one free. adding vectors up to blocksize should not trigger + // another resize. + GenerateAndAddVector(index, dim, curr_label++); + verify_index_size(blockSize, 2 * blockSize, + "add vectors up to blocksize after deleting two blocks"); - size_t add_vectors_count = 8; - for (size_t i = 0; i < add_vectors_count; i++) { - GenerateAndAddVector(index, dim, n + 2 + i, i); + // Delete all vectors. + while (VecSimIndex_IndexSize(index) > 0) { + VecSimIndex_DeleteVector(index, num_deleted++); } - // Size should be n + 1 + 8 (= 25). - ASSERT_EQ(VecSimIndex_IndexSize(index), n + 1 + add_vectors_count); + // We shrink the capcity by blocksize. + verify_index_size(0, blockSize, "delete all vectors"); - // Check new capacity size, should be blockSize * 3. - ASSERT_EQ(bf_index->idToLabelMapping.size(), 3 * blockSize); - ASSERT_EQ(bf_index->idToLabelMapping.capacity(), 3 * blockSize); + // Add one vector and delete it to verify we shrink to zero. + GenerateAndAddVector(index, dim, 0); + verify_index_size(1, blockSize, "add one vector after deleting all"); + VecSimIndex_DeleteVector(index, 0); + verify_index_size(0, 0, "delete the only vector to verify shrinking to zero"); + + VecSimIndex_Free(index); +} + +TYPED_TEST(BruteForceTest, brute_force_no_oscillation_test) { + size_t dim = 4; + size_t blockSize = 2; + size_t cycles = 5; // Number of add/delete cycles to test + + BFParams params = {.dim = dim, .metric = VecSimMetric_L2, .blockSize = blockSize}; + VecSimIndex *index = this->CreateNewIndex(params); + BruteForceIndex *bf_index = this->CastToBF(index); + + auto verify_no_oscillation = [&](size_t expected_size, size_t expected_capacity, + const std::string &msg) { + SCOPED_TRACE("verify_no_oscillation: " + msg); + ASSERT_EQ(VecSimIndex_IndexSize(index), expected_size); + ASSERT_EQ(bf_index->indexCapacity(), expected_capacity); + }; + + // Initial state: empty index + verify_no_oscillation(0, 0, "initial empty state"); + + size_t current_label = 0; + + // Add initial 3 blocks + size_t initial_num_blocks = 3; + for (size_t i = 0; i < initial_num_blocks * blockSize; i++) { + GenerateAndAddVector(index, dim, current_label++); + } + verify_no_oscillation(initial_num_blocks * blockSize, initial_num_blocks * blockSize, + "initial " + std::to_string(initial_num_blocks) + + " blocks vectors added"); + + // Perform oscillation cycles: delete block, add block, delete block, add block... + for (size_t cycle = 0; cycle < cycles; cycle++) { + // Delete blockSize vectors (size becomes blockSize, but capacity should remain 2 * + // blockSize due to buffer zone) + for (size_t i = 0; i < blockSize; i++) { + VecSimIndex_DeleteVector(index, cycle * blockSize + i); + } + verify_no_oscillation((initial_num_blocks - 1) * blockSize, initial_num_blocks * blockSize, + "cycle " + std::to_string(cycle) + + " - after deleting block of vectors"); + + // Add blockSize vectors back (size becomes 2 * blockSize, capacity should remain 2 * + // blockSize) + for (size_t i = 0; i < blockSize; i++) { + GenerateAndAddVector(index, dim, current_label++); + } + verify_no_oscillation(initial_num_blocks * blockSize, initial_num_blocks * blockSize, + "cycle " + std::to_string(cycle) + + " - after adding blockSize vectors back"); + } + + // Final verification: delete enough vectors to trigger actual shrinking + // Delete blocksize vectors to have only one block of vectors (2 free blocks = shrinking + // condition) + size_t vectors_to_delete = 2 * blockSize; + for (size_t i = 0; i < vectors_to_delete; i++) { + VecSimIndex_DeleteVector(index, cycles * blockSize + i); + } + verify_no_oscillation(blockSize, 2 * blockSize, + "final shrinking to trigger shrinking by one block"); + + // Verify we can still grow normally after the oscillation test + for (size_t i = 0; i < blockSize; i++) { + GenerateAndAddVector(index, dim, current_label++); + } + verify_no_oscillation(2 * blockSize, 2 * blockSize, "growth after oscillation test"); VecSimIndex_Free(index); } @@ -188,7 +311,7 @@ TYPED_TEST(BruteForceTest, resize_and_align_index_largeInitialCapacity) { // Index size = bs = 3. ASSERT_EQ(VecSimIndex_IndexSize(index), bs); - // New idToLabelMapping size = idToLabelMapping_size - block_size - number_of_vectors_to_align = + // New idToLabelMapping size > idToLabelMapping_size - block_size - number_of_vectors_to_align = // 10 - 3 - 10 % 3 (1) = 6 idToLabelMapping_size = bf_index->idToLabelMapping.size(); ASSERT_EQ(idToLabelMapping_size, n - bs - n % bs); @@ -203,9 +326,7 @@ TYPED_TEST(BruteForceTest, resize_and_align_index_largeInitialCapacity) { ASSERT_EQ(bf_index->idToLabelMapping.size(), bs); ASSERT_EQ(bf_index->idToLabelMapping.capacity(), bs); - // Add and delete a vector to achieve: - // size % block_size == 0 && size + bs <= idToLabelMapping_size(3). - // idToLabelMapping_size should be resized to zero. + // Insert and delete one vector. Upon deletion, capacity will be resized again (to 0). GenerateAndAddVector(index, dim, 0); VecSimIndex_DeleteVector(index, 0); ASSERT_EQ(bf_index->idToLabelMapping.size(), 0); @@ -223,6 +344,63 @@ TYPED_TEST(BruteForceTest, resize_and_align_index_largeInitialCapacity) { VecSimIndex_Free(index); } +// Case 2: initial capacity is larger than block size, but smaller then 2 * block_size. +TYPED_TEST(BruteForceTest, resize_and_align_index_smallInitialCapacity) { + size_t dim = 4; + size_t n = 5; // Determines the initial size of idToLabelMapping. + size_t bs = 3; + + BFParams params = { + .dim = dim, .metric = VecSimMetric_L2, .initialCapacity = n, .blockSize = bs}; + + VecSimIndex *index = this->CreateNewIndex(params); + + BruteForceIndex *bf_index = this->CastToBF(index); + ASSERT_EQ(VecSimIndex_IndexSize(index), 0); + + // Add up to block size + 1 = 3 + 1 = 4 + for (size_t i = 0; i < bs + 1; i++) { + GenerateAndAddVector(index, dim, i, i); + } + + size_t idToLabelMapping_size = bf_index->idToLabelMapping.size(); + // The idToLabelMapping size shouldn't change, should remain n. + ASSERT_EQ(idToLabelMapping_size, n); + ASSERT_EQ(VecSimIndex_IndexSize(index), bs + 1); + + // Delete last vector, to get size % block_size == 0. size = 3 + VecSimIndex_DeleteVector(index, bs); + + // Index size = bs = 3. + ASSERT_EQ(VecSimIndex_IndexSize(index), bs); + + // Sinze new_size + 2 * bs > idToLabelMapping_size, we don't resize. + idToLabelMapping_size = bf_index->idToLabelMapping.size(); + ASSERT_EQ(idToLabelMapping_size, n); + ASSERT_EQ(idToLabelMapping_size, bf_index->idToLabelMapping.capacity()); + + // Delete all the vectors to align the capacity to blocksize and decrease it by bs. (in this + // case to 0) + size_t i = 0; + while (VecSimIndex_IndexSize(index) > 0) { + VecSimIndex_DeleteVector(index, i); + ++i; + } + ASSERT_EQ(bf_index->idToLabelMapping.size(), 0); + ASSERT_EQ(bf_index->idToLabelMapping.capacity(), 0); + + // Insert a vector. idToLabelMapping_size is increased by bs. + // Upon deletion it will be resized to zero again. + GenerateAndAddVector(index, dim, 0); + ASSERT_EQ(bf_index->idToLabelMapping.size(), bs); + ASSERT_EQ(bf_index->idToLabelMapping.capacity(), bs); + VecSimIndex_DeleteVector(index, 0); + ASSERT_EQ(bf_index->idToLabelMapping.size(), 0); + ASSERT_EQ(bf_index->idToLabelMapping.capacity(), 0); + + VecSimIndex_Free(index); +} + // Test empty index edge cases. TYPED_TEST(BruteForceTest, brute_force_empty_index) { size_t dim = 4; @@ -840,8 +1018,10 @@ TYPED_TEST(BruteForceTest, brute_force_remove_vector_after_replacing_block) { TYPED_TEST(BruteForceTest, brute_force_zero_minimal_capacity) { size_t dim = 4; size_t n = 2; + size_t bs = 1; - BFParams params = {.dim = dim, .metric = VecSimMetric_L2, .initialCapacity = 0, .blockSize = 1}; + BFParams params = { + .dim = dim, .metric = VecSimMetric_L2, .initialCapacity = 0, .blockSize = bs}; VecSimIndex *index = this->CreateNewIndex(params); @@ -863,8 +1043,10 @@ TYPED_TEST(BruteForceTest, brute_force_zero_minimal_capacity) { VecSimIndex_DeleteVector(index, i); } ASSERT_EQ(VecSimIndex_IndexSize(index), 0); - // id2label size should be the same as index size - ASSERT_EQ(bf_index->idToLabelMapping.size(), 0); + // id2label size should decrease by one block. + ASSERT_EQ(bf_index->idToLabelMapping.size(), n - bs); + + // TODO!!! VecSimIndex_Free(index); } diff --git a/tests/unit/test_bruteforce_multi.cpp b/tests/unit/test_bruteforce_multi.cpp index b74adb20c..32887a02e 100644 --- a/tests/unit/test_bruteforce_multi.cpp +++ b/tests/unit/test_bruteforce_multi.cpp @@ -69,9 +69,12 @@ TYPED_TEST(BruteForceMultiTest, vector_add_multiple_test) { TYPED_TEST(BruteForceMultiTest, resize_and_align_index) { size_t dim = 4; - size_t n = 15; - size_t blockSize = 10; - size_t n_labels = 3; + constexpr size_t blockSize = 10; + constexpr size_t per_label = 3; + constexpr size_t n_labels = 4; + constexpr size_t n = n_labels * per_label; + constexpr size_t initial_cap = n; + size_t expected_cap = initial_cap; BFParams params = { .dim = dim, .metric = VecSimMetric_L2, .initialCapacity = n, .blockSize = blockSize}; @@ -79,62 +82,99 @@ TYPED_TEST(BruteForceMultiTest, resize_and_align_index) { VecSimIndex *index = this->CreateNewIndex(params); auto bf_index = this->CastToBF_Multi(index); - ASSERT_EQ(VecSimIndex_IndexSize(index), 0); + auto verify_index_size = [&](size_t expected_num_vectors, size_t expected_labels, + size_t expected_capacity, std::string msg) { + SCOPED_TRACE("verify_index_size: " + msg); + ASSERT_EQ(VecSimIndex_IndexSize(index), expected_num_vectors); + ASSERT_EQ(bf_index->indexLabelCount(), expected_labels); + ASSERT_EQ(bf_index->idToLabelMapping.size(), expected_capacity); + ASSERT_EQ(bf_index->getStoredVectorsCount(), expected_num_vectors); + ASSERT_EQ(bf_index->indexCapacity(), expected_capacity); + ASSERT_EQ(bf_index->vectorBlocks.size(), + expected_num_vectors / blockSize + (expected_num_vectors % blockSize != 0)) + << "expected_num_vectors: " << expected_num_vectors + << " expected_capacity: " << expected_capacity; + }; + // Empty index with initial capacity + verify_index_size(0, 0, expected_cap, "empty index"); for (size_t i = 0; i < n; i++) { - GenerateAndAddVector(index, dim, i % n_labels, i); + GenerateAndAddVector(index, dim, i % n_labels); } - - VecSimIndexDebugInfo info = VecSimIndex_DebugInfo(index); - ASSERT_EQ(info.bfInfo.indexSize, n); - ASSERT_EQ(info.bfInfo.indexLabelCount, n_labels); - ASSERT_EQ(bf_index->idToLabelMapping.size(), n); - ASSERT_EQ(bf_index->getVectorBlocks().size(), n / blockSize + 1); + verify_index_size(n, n_labels, expected_cap, "add" + std::to_string(n) + " vectors"); // remove invalid id - VecSimIndex_DeleteVector(index, 3459); + ASSERT_EQ(VecSimIndex_DeleteVector(index, 3459), 0); // This should do nothing - info = VecSimIndex_DebugInfo(index); - ASSERT_EQ(info.bfInfo.indexSize, n); - ASSERT_EQ(info.bfInfo.indexLabelCount, n_labels); - ASSERT_EQ(bf_index->idToLabelMapping.size(), n); - ASSERT_EQ(bf_index->getVectorBlocks().size(), n / blockSize + 1); + verify_index_size(n, n_labels, expected_cap, "remove invalid id"); - // Add another vector, since index size equals to the capacity, this should cause resizing - // (to fit a multiplication of block_size). + // Add another vector (index capacity should now increase to align with blocksize). + expected_cap += blockSize - n % blockSize; + // We add to an existing label - number of labels should not change. GenerateAndAddVector(index, dim, 0); - info = VecSimIndex_DebugInfo(index); - ASSERT_EQ(info.bfInfo.indexSize, n + 1); - // Label count doesn't increase because label 0 already exists - ASSERT_EQ(info.bfInfo.indexLabelCount, n_labels); - // Check new capacity size, should be blockSize * 2. - ASSERT_EQ(bf_index->idToLabelMapping.size(), 2 * blockSize); + verify_index_size(n + 1, n_labels, expected_cap, "add one more vector"); - // Now size = n + 1 = 16, capacity = 2* bs = 20. Test capacity overflow again + // Now size = n + 1 = 13, capacity = 2 * bs = 20. Test capacity overflow again // to check that it stays aligned with blocksize. - - size_t add_vectors_count = 8; - for (size_t i = 0; i < add_vectors_count; i++) { + for (size_t i = 0; i < blockSize; i++) { GenerateAndAddVector(index, dim, i % n_labels, i); } - // Size should be n + 1 + 8 = 24. - size_t new_n = n + 1 + add_vectors_count; - info = VecSimIndex_DebugInfo(index); + // Size should be n + 1 + blockSize = 23. + // We add to existing labels only - number of labels doesn't change. + // The new capacity size should be increased by one block (blockSize * 3). + size_t new_n = n + 1 + blockSize; + ASSERT_EQ(expected_cap + blockSize, 3 * blockSize); + verify_index_size(new_n, n_labels, expected_cap + blockSize, + "add vectors to trigger another resize"); - ASSERT_EQ(info.bfInfo.indexSize, new_n); - // Label count doesn't increase because label 0 already exists - ASSERT_EQ(info.bfInfo.indexLabelCount, n_labels); size_t total_vectors = 0; for (auto label_ids : bf_index->labelToIdsLookup) { total_vectors += label_ids.second.size(); } ASSERT_EQ(total_vectors, new_n); - // Check new capacity size, should be blockSize * 3. - ASSERT_EQ(bf_index->idToLabelMapping.size(), 3 * blockSize); + // Delete vectors until one block plus some vectors are removed. + size_t current_vectors_block_count = (new_n + blockSize - 1) / blockSize; + ASSERT_EQ(current_vectors_block_count, 3); + size_t deleted_labels = 0; + size_t label_to_delete = 0; + auto remove_to_one_below_blocksize = [&]() { + while (bf_index->getVectorBlocks().size() == current_vectors_block_count) { + VecSimIndex_DeleteVector(index, label_to_delete++); + deleted_labels++; + } + if (VecSimIndex_IndexSize(bf_index) % blockSize == 0) { + VecSimIndex_DeleteVector(index, label_to_delete++); + deleted_labels++; + } + }; + // First trigger of remove_to_one_below_blocksize will result in one free block. + // This should not trigger shrinking of metadata containers. + remove_to_one_below_blocksize(); // remove first block labels. + ASSERT_LT(VecSimIndex_IndexSize(bf_index), 2 * blockSize); + ASSERT_GT(VecSimIndex_IndexSize(bf_index), blockSize); + verify_index_size( + VecSimIndex_IndexSize(bf_index), n_labels - deleted_labels, 3 * blockSize, + "delete vectors so that indexsize < 2 * blocksize, but there is only one free block"); + + // Second trigger of remove_to_one_below_blocksize will result in two free blocks. + // This should trigger shrinking of metadata containers by one block. + current_vectors_block_count--; + remove_to_one_below_blocksize(); + ASSERT_LT(VecSimIndex_IndexSize(bf_index), blockSize); + verify_index_size( + VecSimIndex_IndexSize(bf_index), n_labels - deleted_labels, 2 * blockSize, + "delete vectors so that indexsize % blocksize == 0 and there are two free blocks"); + + // Delete all vectors. + while (VecSimIndex_IndexSize(index) > 0) { + VecSimIndex_DeleteVector(index, label_to_delete++); + } + // We shrink capacity by one blocksize. + verify_index_size(0, 0, blockSize, "delete all vectors"); VecSimIndex_Free(index); }