Skip to content

Commit 511baf1

Browse files
authored
[MOD-12983] [SVS] Add WriteInPlace mode support to TieredSVSIndex::runGC() (#861)
* Add `runGCInPlaceMode` test * Add WriteInPlace support * Spelling fixes in comments * Code review s1e1 * Update tests to run GC on empty index
1 parent 5f20db1 commit 511baf1

File tree

2 files changed

+95
-0
lines changed

2 files changed

+95
-0
lines changed

src/VecSim/algorithms/svs/svs_tiered.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -579,6 +579,10 @@ class TieredSVSIndex : public VecSimTieredIndex<DataType, float> {
579579
index->backendIndex->log(VecSimCommonStrings::LOG_VERBOSE_STRING,
580580
"running asynchronous GC for tiered SVS index");
581581
auto svs_index = index->GetSVSIndex();
582+
if (index->backendIndex->indexSize() == 0) {
583+
// No need to run GC on an empty index.
584+
return;
585+
}
582586
svs_index->setNumThreads(std::min(availableThreads, index->backendIndex->indexSize()));
583587
// VecSimIndexAbstract::runGC() is protected
584588
static_cast<VecSimIndexInterface *>(index->backendIndex)->runGC();
@@ -1009,6 +1013,21 @@ class TieredSVSIndex : public VecSimTieredIndex<DataType, float> {
10091013
}
10101014

10111015
void runGC() override {
1016+
if (this->getWriteMode() == VecSim_WriteInPlace) {
1017+
TIERED_LOG(VecSimCommonStrings::LOG_VERBOSE_STRING,
1018+
"running synchronous GC for tiered SVS index in write-in-place mode");
1019+
// In write-in-place mode, we run GC synchronously.
1020+
std::lock_guard lock{this->mainIndexGuard};
1021+
if (this->backendIndex->indexSize() == 0) {
1022+
// No need to run GC on an empty index.
1023+
return;
1024+
}
1025+
// Force single thread for write-in-place mode.
1026+
this->GetSVSIndex()->setNumThreads(1);
1027+
// VecSimIndexAbstract::runGC() is protected
1028+
static_cast<VecSimIndexInterface *>(this->backendIndex)->runGC();
1029+
return;
1030+
}
10121031
TIERED_LOG(VecSimCommonStrings::LOG_VERBOSE_STRING,
10131032
"scheduling asynchronous GC for tiered SVS index");
10141033
scheduleSVSIndexGC();

tests/unit/test_svs_tiered.cpp

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3163,6 +3163,13 @@ TYPED_TEST(SVSTieredIndexTestBasic, runGCParallel) {
31633163
ASSERT_INDEX(tiered_index);
31643164
auto allocator = tiered_index->getAllocator();
31653165

3166+
// Run GC on empty index
3167+
VecSimTieredIndex_GC(tiered_index);
3168+
mock_thread_pool.init_threads();
3169+
mock_thread_pool.thread_pool_join();
3170+
ASSERT_EQ(tiered_index->getNumMarkedDeleted(), 0);
3171+
ASSERT_EQ(mock_thread_pool.jobQ.size(), 0);
3172+
31663173
// Insert n vectors directly to SVS.
31673174
std::srand(10); // create pseudo random generator with any arbitrary seed.
31683175
for (size_t i = 0; i < n; i++) {
@@ -3211,6 +3218,75 @@ TYPED_TEST(SVSTieredIndexTestBasic, runGCParallel) {
32113218
EXPECT_EQ(tiered_index->statisticInfo().numberOfMarkedDeleted, 0);
32123219
}
32133220

3221+
TYPED_TEST(SVSTieredIndexTestBasic, runGCInPlaceMode) {
3222+
// Create TieredSVS index instance with a mock queue.
3223+
size_t dim = 4;
3224+
size_t threshold = 1024;
3225+
const size_t n = threshold * 4;
3226+
SVSParams params = {.type = TypeParam::get_index_type(), .dim = dim, .metric = VecSimMetric_L2};
3227+
VecSimParams svs_params = CreateParams(params);
3228+
auto mock_thread_pool = tieredIndexMock();
3229+
3230+
// Force trigger the first update job for 64 first vectors.
3231+
auto *tiered_index = this->CreateTieredSVSIndex(svs_params, mock_thread_pool, 64);
3232+
ASSERT_INDEX(tiered_index);
3233+
auto allocator = tiered_index->getAllocator();
3234+
3235+
VecSim_SetWriteMode(VecSim_WriteInPlace);
3236+
3237+
// Run GC on empty index
3238+
VecSimTieredIndex_GC(tiered_index);
3239+
ASSERT_EQ(tiered_index->getNumMarkedDeleted(), 0);
3240+
ASSERT_EQ(mock_thread_pool.jobQ.size(), 0);
3241+
3242+
// Insert n vectors directly to SVS.
3243+
std::srand(10); // create pseudo random generator with any arbitrary seed.
3244+
for (size_t i = 0; i < n; i++) {
3245+
TEST_DATA_T vector[dim];
3246+
for (size_t j = 0; j < dim; j++) {
3247+
vector[j] = std::rand() / (TEST_DATA_T)RAND_MAX;
3248+
}
3249+
VecSimIndex_AddVector(tiered_index->GetBackendIndex(), vector, i);
3250+
}
3251+
3252+
ASSERT_EQ(tiered_index->indexSize(), n);
3253+
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), n);
3254+
3255+
// Run the mess of add, delete, GC
3256+
for (size_t i = 0; i < threshold; i++) {
3257+
// Run GC for every 64 iterations.
3258+
if (i % 64 == 0) {
3259+
VecSimTieredIndex_GC(tiered_index);
3260+
ASSERT_EQ(tiered_index->getNumMarkedDeleted(), 0);
3261+
ASSERT_EQ(mock_thread_pool.jobQ.size(), 0);
3262+
}
3263+
// Add a new vector
3264+
TEST_DATA_T vector[dim];
3265+
for (size_t j = 0; j < dim; j++) {
3266+
vector[j] = std::rand() / (TEST_DATA_T)RAND_MAX;
3267+
}
3268+
VecSimIndex_AddVector(tiered_index, vector, n + i);
3269+
// Delete an existing vector
3270+
tiered_index->deleteVector(i + threshold);
3271+
}
3272+
// Final GC after all operations.
3273+
VecSimTieredIndex_GC(tiered_index);
3274+
3275+
// Validate if no jobs scheduled to the queue.
3276+
ASSERT_EQ(mock_thread_pool.jobQ.size(), 0);
3277+
3278+
// Validate sizes after GC.
3279+
auto tiered_size = tiered_index->indexSize();
3280+
auto flat_size = tiered_index->GetFlatIndex()->indexSize();
3281+
auto backend_size = tiered_index->GetBackendIndex()->indexSize();
3282+
ASSERT_EQ(tiered_size, n);
3283+
ASSERT_EQ(tiered_size, backend_size + flat_size);
3284+
// Expect that GC cleaned all the deleted vectors.
3285+
ASSERT_EQ(tiered_index->GetSVSIndex()->indexStorageSize(), backend_size);
3286+
ASSERT_EQ(tiered_index->GetSVSIndex()->getNumMarkedDeleted(), 0);
3287+
EXPECT_EQ(tiered_index->statisticInfo().numberOfMarkedDeleted, 0);
3288+
}
3289+
32143290
TYPED_TEST(SVSTieredIndexTestBasic, switchDeleteModes) {
32153291
// Create TieredSVS index instance with a mock queue.
32163292
size_t dim = 16;

0 commit comments

Comments
 (0)