@@ -27,7 +27,7 @@ using namespace SPTAG;
2727
2828namespace SPFreshTest
2929{
30- SizeType N = 1000 ;
30+ SizeType N = 10000 ;
3131DimensionType M = 100 ;
3232int K = 10 ;
3333int queries = 10 ;
@@ -991,6 +991,146 @@ BOOST_AUTO_TEST_CASE(RefineIndex)
991991 static_cast <SPANN::Index<int8_t > *>(originalIndex.get ())->GetDBStat ();
992992}
993993
994+ BOOST_AUTO_TEST_CASE (CacheTest)
995+ {
996+ using namespace SPFreshTest ;
997+
998+ int iterations = 5 ;
999+ int insertBatchSize = N / iterations;
1000+ int deleteBatchSize = N / iterations;
1001+
1002+ // Generate test data
1003+ std::shared_ptr<VectorSet> vecset, addvecset, queryset, truth;
1004+ std::shared_ptr<MetadataSet> metaset, addmetaset;
1005+
1006+ TestUtils::TestDataGenerator<int8_t > generator (N, queries, M, K, " L2" );
1007+ generator.RunBatches (vecset, metaset, addvecset, addmetaset, queryset, N, insertBatchSize, deleteBatchSize,
1008+ iterations, truth);
1009+
1010+ // Build and save index
1011+ auto originalIndex = BuildIndex<int8_t >(" original_index" , vecset, metaset);
1012+ BOOST_REQUIRE (originalIndex != nullptr );
1013+ BOOST_REQUIRE (originalIndex->SaveIndex (" original_index" ) == ErrorCode::Success);
1014+ originalIndex = nullptr ;
1015+
1016+ std::cout << " =================No Cache===================" << std::endl;
1017+ std::string prevPath = " original_index" ;
1018+ for (int iter = 0 ; iter < iterations; iter++)
1019+ {
1020+ std::string clone_path = " clone_index_" + std::to_string (iter);
1021+ std::shared_ptr<VectorIndex> prevIndex;
1022+ BOOST_REQUIRE (VectorIndex::LoadIndex (prevPath, prevIndex) == ErrorCode::Success);
1023+ BOOST_REQUIRE (prevIndex != nullptr );
1024+ auto t0 = std::chrono::high_resolution_clock::now ();
1025+ BOOST_REQUIRE (prevIndex->Check () == ErrorCode::Success);
1026+ std::cout << " [INFO] Check time for iteration " << iter << " : "
1027+ << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now () - t0).count ()
1028+ << " ms" << std::endl;
1029+
1030+ recall = Search<int8_t >(prevIndex, queryset, vecset, addvecset, K, truth, N, iter);
1031+ std::cout << " [INFO] After Save and Load:" << " recall@5=" << recall << std::endl;
1032+ static_cast <SPANN::Index<int8_t > *>(prevIndex.get ())->GetDBStat ();
1033+
1034+ auto cloneIndex = prevIndex->Clone (clone_path);
1035+
1036+ auto t1 = std::chrono::high_resolution_clock::now ();
1037+ InsertVectors<int8_t >(static_cast <SPANN::Index<int8_t > *>(cloneIndex.get ()), 1 , insertBatchSize, addvecset,
1038+ metaset, iter * insertBatchSize);
1039+ std::cout << " [INFO] Insert time for iteration " << iter << " : "
1040+ << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now () - t1).count ()
1041+ << " ms" << std::endl;
1042+
1043+ for (int i = 0 ; i < deleteBatchSize; i++)
1044+ cloneIndex->DeleteIndex (iter * deleteBatchSize + i);
1045+
1046+ recall = Search<int8_t >(cloneIndex, queryset, vecset, addvecset, K, truth, N, iter + 1 );
1047+ std::cout << " [INFO] After iter " << iter << " : recall@5=" << recall << std::endl;
1048+ static_cast <SPANN::Index<int8_t > *>(cloneIndex.get ())->GetDBStat ();
1049+ BOOST_REQUIRE (cloneIndex->SaveIndex (clone_path) == ErrorCode::Success);
1050+ cloneIndex = nullptr ;
1051+ prevPath = clone_path;
1052+ }
1053+
1054+ std::shared_ptr<VectorIndex> prevIndex;
1055+ BOOST_REQUIRE (VectorIndex::LoadIndex (prevPath, prevIndex) == ErrorCode::Success);
1056+ BOOST_REQUIRE (prevIndex != nullptr );
1057+ auto t0 = std::chrono::high_resolution_clock::now ();
1058+ BOOST_REQUIRE (prevIndex->Check () == ErrorCode::Success);
1059+ std::cout << " [INFO] Check time for iteration " << iterations << " : "
1060+ << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now () - t0).count ()
1061+ << " ms" << std::endl;
1062+
1063+ recall = Search<int8_t >(prevIndex, queryset, vecset, addvecset, K, truth, N, iterations);
1064+ std::cout << " [INFO] After Save and Load:" << " recall@5=" << recall << std::endl;
1065+ static_cast <SPANN::Index<int8_t > *>(prevIndex.get ())->GetDBStat ();
1066+ prevIndex = nullptr ;
1067+ for (int iter = 0 ; iter < insertIterations; iter++)
1068+ {
1069+ std::filesystem::remove_all (" clone_index_" + std::to_string (iter));
1070+ }
1071+
1072+ std::cout << " =================Enable Cache===================" << std::endl;
1073+ std::string prevPath = " original_index" ;
1074+ for (int iter = 0 ; iter < iterations; iter++)
1075+ {
1076+ std::string clone_path = " clone_index_" + std::to_string (iter);
1077+ std::shared_ptr<VectorIndex> prevIndex;
1078+ BOOST_REQUIRE (VectorIndex::LoadIndex (prevPath, prevIndex) == ErrorCode::Success);
1079+ BOOST_REQUIRE (prevIndex != nullptr );
1080+ auto t0 = std::chrono::high_resolution_clock::now ();
1081+ BOOST_REQUIRE (prevIndex->Check () == ErrorCode::Success);
1082+ std::cout << " [INFO] Check time for iteration " << iter << " : "
1083+ << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now () - t0).count ()
1084+ << " ms" << std::endl;
1085+
1086+ recall = Search<int8_t >(prevIndex, queryset, vecset, addvecset, K, truth, N, iter);
1087+ std::cout << " [INFO] After Save and Load:" << " recall@5=" << recall << std::endl;
1088+ static_cast <SPANN::Index<int8_t > *>(prevIndex.get ())->GetDBStat ();
1089+
1090+ prevIndex->SetParameter (" CacheSizeGB" , " 10" , " BuildSSDIndex" );
1091+ prevIndex->SetParameter (" CacheShards" , " 4" , " BuildSSDIndex" );
1092+
1093+ BOOST_REQUIRE (prevIndex->SaveIndex (prevPath) == ErrorCode::Success);
1094+ auto cloneIndex = prevIndex->Clone (clone_path);
1095+
1096+ auto t1 = std::chrono::high_resolution_clock::now ();
1097+ InsertVectors<int8_t >(static_cast <SPANN::Index<int8_t > *>(cloneIndex.get ()), 1 , insertBatchSize, addvecset,
1098+ metaset, iter * insertBatchSize);
1099+ std::cout << " [INFO] Insert time for iteration " << iter << " : "
1100+ << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now () - t1).count ()
1101+ << " ms" << std::endl;
1102+
1103+ for (int i = 0 ; i < deleteBatchSize; i++)
1104+ cloneIndex->DeleteIndex (iter * deleteBatchSize + i);
1105+
1106+ recall = Search<int8_t >(cloneIndex, queryset, vecset, addvecset, K, truth, N, iter + 1 );
1107+ std::cout << " [INFO] After iter " << iter << " : recall@5=" << recall << std::endl;
1108+ static_cast <SPANN::Index<int8_t > *>(cloneIndex.get ())->GetDBStat ();
1109+ BOOST_REQUIRE (cloneIndex->SaveIndex (clone_path) == ErrorCode::Success);
1110+ cloneIndex = nullptr ;
1111+ prevPath = clone_path;
1112+ }
1113+ std::shared_ptr<VectorIndex> prevIndex;
1114+ BOOST_REQUIRE (VectorIndex::LoadIndex (prevPath, prevIndex) == ErrorCode::Success);
1115+ BOOST_REQUIRE (prevIndex != nullptr );
1116+ auto t0 = std::chrono::high_resolution_clock::now ();
1117+ BOOST_REQUIRE (prevIndex->Check () == ErrorCode::Success);
1118+ std::cout << " [INFO] Check time for iteration " << iterations << " : "
1119+ << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now () - t0).count ()
1120+ << " ms" << std::endl;
1121+
1122+ recall = Search<int8_t >(prevIndex, queryset, vecset, addvecset, K, truth, N, iterations);
1123+ std::cout << " [INFO] After Save and Load:" << " recall@5=" << recall << std::endl;
1124+ static_cast <SPANN::Index<int8_t > *>(prevIndex.get ())->GetDBStat ();
1125+ prevIndex = nullptr ;
1126+
1127+ for (int iter = 0 ; iter < insertIterations; iter++)
1128+ {
1129+ std::filesystem::remove_all (" clone_index_" + std::to_string (iter));
1130+ }
1131+ std::filesystem::remove_all (" original_index" );
1132+ }
1133+
9941134BOOST_AUTO_TEST_CASE (IterativeSearchPerf)
9951135{
9961136 using namespace SPFreshTest ;
@@ -1042,7 +1182,7 @@ BOOST_AUTO_TEST_CASE(IterativeSearchPerf)
10421182 {
10431183 std::filesystem::remove_all (" clone_index_" + std::to_string (iter));
10441184 }
1045- // std::filesystem::remove_all("original_index");
1185+ std::filesystem::remove_all (" original_index" );
10461186}
10471187
10481188std::shared_ptr<float []> get_embeddings (uint32_t row_id, uint32_t embedding_dim, uint32_t array_index)
0 commit comments