Skip to content

Commit 80104e6

Browse files
[8.2] [MOD-12983] [SVS] Add WriteInPlace mode support to TieredSVSIndex::runGC() (#863)
[MOD-12983] [SVS] Add `WriteInPlace` mode support to `TieredSVSIndex::runGC()` (#861) * Add `runGCInPlaceMode` test * Add WriteInPlace support * Spelling fixes in comments * Code review s1e1 * Update tests to run GC on empty index (cherry picked from commit 511baf1) Co-authored-by: Rafik Saliev <[email protected]>
1 parent 55443f2 commit 80104e6

File tree

2 files changed

+95
-0
lines changed

2 files changed

+95
-0
lines changed

src/VecSim/algorithms/svs/svs_tiered.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,10 @@ class TieredSVSIndex : public VecSimTieredIndex<DataType, float> {
540540
index->backendIndex->log(VecSimCommonStrings::LOG_VERBOSE_STRING,
541541
"running asynchronous GC for tiered SVS index");
542542
auto svs_index = index->GetSVSIndex();
543+
if (index->backendIndex->indexSize() == 0) {
544+
// No need to run GC on an empty index.
545+
return;
546+
}
543547
svs_index->setNumThreads(std::min(availableThreads, index->backendIndex->indexSize()));
544548
// VecSimIndexAbstract::runGC() is protected
545549
static_cast<VecSimIndexInterface *>(index->backendIndex)->runGC();
@@ -903,6 +907,21 @@ class TieredSVSIndex : public VecSimTieredIndex<DataType, float> {
903907
}
904908

905909
void runGC() override {
910+
if (this->getWriteMode() == VecSim_WriteInPlace) {
911+
TIERED_LOG(VecSimCommonStrings::LOG_VERBOSE_STRING,
912+
"running synchronous GC for tiered SVS index in write-in-place mode");
913+
// In write-in-place mode, we run GC synchronously.
914+
std::lock_guard lock{this->mainIndexGuard};
915+
if (this->backendIndex->indexSize() == 0) {
916+
// No need to run GC on an empty index.
917+
return;
918+
}
919+
// Force single thread for write-in-place mode.
920+
this->GetSVSIndex()->setNumThreads(1);
921+
// VecSimIndexAbstract::runGC() is protected
922+
static_cast<VecSimIndexInterface *>(this->backendIndex)->runGC();
923+
return;
924+
}
906925
TIERED_LOG(VecSimCommonStrings::LOG_VERBOSE_STRING,
907926
"scheduling asynchronous GC for tiered SVS index");
908927
scheduleSVSIndexGC();

tests/unit/test_svs_tiered.cpp

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2800,6 +2800,13 @@ TYPED_TEST(SVSTieredIndexTestBasic, runGCParallel) {
28002800
ASSERT_INDEX(tiered_index);
28012801
auto allocator = tiered_index->getAllocator();
28022802

2803+
// Run GC on empty index
2804+
VecSimTieredIndex_GC(tiered_index);
2805+
mock_thread_pool.init_threads();
2806+
mock_thread_pool.thread_pool_join();
2807+
ASSERT_EQ(tiered_index->getNumMarkedDeleted(), 0);
2808+
ASSERT_EQ(mock_thread_pool.jobQ.size(), 0);
2809+
28032810
// Insert n vectors directly to SVS.
28042811
std::srand(10); // create pseudo random generator with any arbitrary seed.
28052812
for (size_t i = 0; i < n; i++) {
@@ -2848,6 +2855,75 @@ TYPED_TEST(SVSTieredIndexTestBasic, runGCParallel) {
28482855
EXPECT_EQ(tiered_index->statisticInfo().numberOfMarkedDeleted, 0);
28492856
}
28502857

2858+
TYPED_TEST(SVSTieredIndexTestBasic, runGCInPlaceMode) {
2859+
// Create TieredSVS index instance with a mock queue.
2860+
size_t dim = 4;
2861+
size_t threshold = 1024;
2862+
const size_t n = threshold * 4;
2863+
SVSParams params = {.type = TypeParam::get_index_type(), .dim = dim, .metric = VecSimMetric_L2};
2864+
VecSimParams svs_params = CreateParams(params);
2865+
auto mock_thread_pool = tieredIndexMock();
2866+
2867+
// Force trigger the first update job for 64 first vectors.
2868+
auto *tiered_index = this->CreateTieredSVSIndex(svs_params, mock_thread_pool, 64);
2869+
ASSERT_INDEX(tiered_index);
2870+
auto allocator = tiered_index->getAllocator();
2871+
2872+
VecSim_SetWriteMode(VecSim_WriteInPlace);
2873+
2874+
// Run GC on empty index
2875+
VecSimTieredIndex_GC(tiered_index);
2876+
ASSERT_EQ(tiered_index->getNumMarkedDeleted(), 0);
2877+
ASSERT_EQ(mock_thread_pool.jobQ.size(), 0);
2878+
2879+
// Insert n vectors directly to SVS.
2880+
std::srand(10); // create pseudo random generator with any arbitrary seed.
2881+
for (size_t i = 0; i < n; i++) {
2882+
TEST_DATA_T vector[dim];
2883+
for (size_t j = 0; j < dim; j++) {
2884+
vector[j] = std::rand() / (TEST_DATA_T)RAND_MAX;
2885+
}
2886+
VecSimIndex_AddVector(tiered_index->GetBackendIndex(), vector, i);
2887+
}
2888+
2889+
ASSERT_EQ(tiered_index->indexSize(), n);
2890+
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), n);
2891+
2892+
// Run the mess of add, delete, GC
2893+
for (size_t i = 0; i < threshold; i++) {
2894+
// Run GC for every 64 iterations.
2895+
if (i % 64 == 0) {
2896+
VecSimTieredIndex_GC(tiered_index);
2897+
ASSERT_EQ(tiered_index->getNumMarkedDeleted(), 0);
2898+
ASSERT_EQ(mock_thread_pool.jobQ.size(), 0);
2899+
}
2900+
// Add a new vector
2901+
TEST_DATA_T vector[dim];
2902+
for (size_t j = 0; j < dim; j++) {
2903+
vector[j] = std::rand() / (TEST_DATA_T)RAND_MAX;
2904+
}
2905+
VecSimIndex_AddVector(tiered_index, vector, n + i);
2906+
// Delete an existing vector
2907+
tiered_index->deleteVector(i + threshold);
2908+
}
2909+
// Final GC after all operations.
2910+
VecSimTieredIndex_GC(tiered_index);
2911+
2912+
// Validate if no jobs scheduled to the queue.
2913+
ASSERT_EQ(mock_thread_pool.jobQ.size(), 0);
2914+
2915+
// Validate sizes after GC.
2916+
auto tiered_size = tiered_index->indexSize();
2917+
auto flat_size = tiered_index->GetFlatIndex()->indexSize();
2918+
auto backend_size = tiered_index->GetBackendIndex()->indexSize();
2919+
ASSERT_EQ(tiered_size, n);
2920+
ASSERT_EQ(tiered_size, backend_size + flat_size);
2921+
// Expect that GC cleaned all the deleted vectors.
2922+
ASSERT_EQ(tiered_index->GetSVSIndex()->indexStorageSize(), backend_size);
2923+
ASSERT_EQ(tiered_index->GetSVSIndex()->getNumMarkedDeleted(), 0);
2924+
EXPECT_EQ(tiered_index->statisticInfo().numberOfMarkedDeleted, 0);
2925+
}
2926+
28512927
TYPED_TEST(SVSTieredIndexTestBasic, switchDeleteModes) {
28522928
// Create TieredSVS index instance with a mock queue.
28532929
size_t dim = 16;

0 commit comments

Comments
 (0)