Skip to content

Commit 7665ad7

Browse files
author
Qi Chen
committed
Add Test for Cache
1 parent 4b3948c commit 7665ad7

File tree

2 files changed

+146
-6
lines changed

2 files changed

+146
-6
lines changed

AnnService/inc/Core/SPANN/ExtraFileController.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -565,8 +565,8 @@ namespace SPTAG::SPANN {
565565
}
566566
if (key >= r) return ErrorCode::Key_OverFlow;
567567

568-
AddressType* addr = At(key);
569-
if (((uintptr_t)addr) == 0xffffffffffffffff) return ErrorCode::Key_Not_Exist;
568+
AddressType* addr = (AddressType*)(At(key));
569+
if (((uintptr_t)addr) == 0xffffffffffffffff) return ErrorCode::Key_NotFound;
570570

571571
auto size = addr[0];
572572
if (size < 0) return ErrorCode::Posting_SizeError;
@@ -623,7 +623,7 @@ namespace SPTAG::SPANN {
623623
r = m_pBlockMapping.R();
624624
}
625625
if (key < r) {
626-
AddressType* addr = At(key);
626+
AddressType* addr = (AddressType*)(At(key));
627627
if (m_pShardedLRUCache && ((uintptr_t)addr) != 0xffffffffffffffff && addr[0] >= 0) {
628628
values[i].SetAvailableSize(addr[0]);
629629
if (m_pShardedLRUCache->get(key, values[i].GetBuffer())) {
@@ -678,7 +678,7 @@ namespace SPTAG::SPANN {
678678
r = m_pBlockMapping.R();
679679
}
680680
if (key < r) {
681-
AddressType* addr = At(key);
681+
AddressType* addr = (AddressType*)(At(key));
682682
if (m_pShardedLRUCache && ((uintptr_t)addr) != 0xffffffffffffffff && addr[0] >= 0) {
683683
(*values)[i].resize(addr[0]);
684684
if (m_pShardedLRUCache->get(key, (*values)[i].data())) {

Test/src/SPFreshTest.cpp

Lines changed: 142 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ using namespace SPTAG;
2727

2828
namespace SPFreshTest
2929
{
30-
SizeType N = 1000;
30+
SizeType N = 10000;
3131
DimensionType M = 100;
3232
int K = 10;
3333
int queries = 10;
@@ -991,6 +991,146 @@ BOOST_AUTO_TEST_CASE(RefineIndex)
991991
static_cast<SPANN::Index<int8_t> *>(originalIndex.get())->GetDBStat();
992992
}
993993

994+
BOOST_AUTO_TEST_CASE(CacheTest)
995+
{
996+
using namespace SPFreshTest;
997+
998+
int iterations = 5;
999+
int insertBatchSize = N / iterations;
1000+
int deleteBatchSize = N / iterations;
1001+
1002+
// Generate test data
1003+
std::shared_ptr<VectorSet> vecset, addvecset, queryset, truth;
1004+
std::shared_ptr<MetadataSet> metaset, addmetaset;
1005+
1006+
TestUtils::TestDataGenerator<int8_t> generator(N, queries, M, K, "L2");
1007+
generator.RunBatches(vecset, metaset, addvecset, addmetaset, queryset, N, insertBatchSize, deleteBatchSize,
1008+
iterations, truth);
1009+
1010+
// Build and save index
1011+
auto originalIndex = BuildIndex<int8_t>("original_index", vecset, metaset);
1012+
BOOST_REQUIRE(originalIndex != nullptr);
1013+
BOOST_REQUIRE(originalIndex->SaveIndex("original_index") == ErrorCode::Success);
1014+
originalIndex = nullptr;
1015+
1016+
std::cout << "=================No Cache===================" << std::endl;
1017+
std::string prevPath = "original_index";
1018+
for (int iter = 0; iter < iterations; iter++)
1019+
{
1020+
std::string clone_path = "clone_index_" + std::to_string(iter);
1021+
std::shared_ptr<VectorIndex> prevIndex;
1022+
BOOST_REQUIRE(VectorIndex::LoadIndex(prevPath, prevIndex) == ErrorCode::Success);
1023+
BOOST_REQUIRE(prevIndex != nullptr);
1024+
auto t0 = std::chrono::high_resolution_clock::now();
1025+
BOOST_REQUIRE(prevIndex->Check() == ErrorCode::Success);
1026+
std::cout << "[INFO] Check time for iteration " << iter << ": "
1027+
<< std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - t0).count()
1028+
<< " ms" << std::endl;
1029+
1030+
recall = Search<int8_t>(prevIndex, queryset, vecset, addvecset, K, truth, N, iter);
1031+
std::cout << "[INFO] After Save and Load:" << " recall@5=" << recall << std::endl;
1032+
static_cast<SPANN::Index<int8_t> *>(prevIndex.get())->GetDBStat();
1033+
1034+
auto cloneIndex = prevIndex->Clone(clone_path);
1035+
1036+
auto t1 = std::chrono::high_resolution_clock::now();
1037+
InsertVectors<int8_t>(static_cast<SPANN::Index<int8_t> *>(cloneIndex.get()), 1, insertBatchSize, addvecset,
1038+
metaset, iter * insertBatchSize);
1039+
std::cout << "[INFO] Insert time for iteration " << iter << ": "
1040+
<< std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - t1).count()
1041+
<< " ms" << std::endl;
1042+
1043+
for (int i = 0; i < deleteBatchSize; i++)
1044+
cloneIndex->DeleteIndex(iter * deleteBatchSize + i);
1045+
1046+
recall = Search<int8_t>(cloneIndex, queryset, vecset, addvecset, K, truth, N, iter + 1);
1047+
std::cout << "[INFO] After iter " << iter << ": recall@5=" << recall << std::endl;
1048+
static_cast<SPANN::Index<int8_t> *>(cloneIndex.get())->GetDBStat();
1049+
BOOST_REQUIRE(cloneIndex->SaveIndex(clone_path) == ErrorCode::Success);
1050+
cloneIndex = nullptr;
1051+
prevPath = clone_path;
1052+
}
1053+
1054+
std::shared_ptr<VectorIndex> prevIndex;
1055+
BOOST_REQUIRE(VectorIndex::LoadIndex(prevPath, prevIndex) == ErrorCode::Success);
1056+
BOOST_REQUIRE(prevIndex != nullptr);
1057+
auto t0 = std::chrono::high_resolution_clock::now();
1058+
BOOST_REQUIRE(prevIndex->Check() == ErrorCode::Success);
1059+
std::cout << "[INFO] Check time for iteration " << iterations << ": "
1060+
<< std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - t0).count()
1061+
<< " ms" << std::endl;
1062+
1063+
recall = Search<int8_t>(prevIndex, queryset, vecset, addvecset, K, truth, N, iterations);
1064+
std::cout << "[INFO] After Save and Load:" << " recall@5=" << recall << std::endl;
1065+
static_cast<SPANN::Index<int8_t> *>(prevIndex.get())->GetDBStat();
1066+
prevIndex = nullptr;
1067+
for (int iter = 0; iter < insertIterations; iter++)
1068+
{
1069+
std::filesystem::remove_all("clone_index_" + std::to_string(iter));
1070+
}
1071+
1072+
std::cout << "=================Enable Cache===================" << std::endl;
1073+
std::string prevPath = "original_index";
1074+
for (int iter = 0; iter < iterations; iter++)
1075+
{
1076+
std::string clone_path = "clone_index_" + std::to_string(iter);
1077+
std::shared_ptr<VectorIndex> prevIndex;
1078+
BOOST_REQUIRE(VectorIndex::LoadIndex(prevPath, prevIndex) == ErrorCode::Success);
1079+
BOOST_REQUIRE(prevIndex != nullptr);
1080+
auto t0 = std::chrono::high_resolution_clock::now();
1081+
BOOST_REQUIRE(prevIndex->Check() == ErrorCode::Success);
1082+
std::cout << "[INFO] Check time for iteration " << iter << ": "
1083+
<< std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - t0).count()
1084+
<< " ms" << std::endl;
1085+
1086+
recall = Search<int8_t>(prevIndex, queryset, vecset, addvecset, K, truth, N, iter);
1087+
std::cout << "[INFO] After Save and Load:" << " recall@5=" << recall << std::endl;
1088+
static_cast<SPANN::Index<int8_t> *>(prevIndex.get())->GetDBStat();
1089+
1090+
prevIndex->SetParameter("CacheSizeGB", "10", "BuildSSDIndex");
1091+
prevIndex->SetParameter("CacheShards", "4", "BuildSSDIndex");
1092+
1093+
BOOST_REQUIRE(prevIndex->SaveIndex(prevPath) == ErrorCode::Success);
1094+
auto cloneIndex = prevIndex->Clone(clone_path);
1095+
1096+
auto t1 = std::chrono::high_resolution_clock::now();
1097+
InsertVectors<int8_t>(static_cast<SPANN::Index<int8_t> *>(cloneIndex.get()), 1, insertBatchSize, addvecset,
1098+
metaset, iter * insertBatchSize);
1099+
std::cout << "[INFO] Insert time for iteration " << iter << ": "
1100+
<< std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - t1).count()
1101+
<< " ms" << std::endl;
1102+
1103+
for (int i = 0; i < deleteBatchSize; i++)
1104+
cloneIndex->DeleteIndex(iter * deleteBatchSize + i);
1105+
1106+
recall = Search<int8_t>(cloneIndex, queryset, vecset, addvecset, K, truth, N, iter + 1);
1107+
std::cout << "[INFO] After iter " << iter << ": recall@5=" << recall << std::endl;
1108+
static_cast<SPANN::Index<int8_t> *>(cloneIndex.get())->GetDBStat();
1109+
BOOST_REQUIRE(cloneIndex->SaveIndex(clone_path) == ErrorCode::Success);
1110+
cloneIndex = nullptr;
1111+
prevPath = clone_path;
1112+
}
1113+
std::shared_ptr<VectorIndex> prevIndex;
1114+
BOOST_REQUIRE(VectorIndex::LoadIndex(prevPath, prevIndex) == ErrorCode::Success);
1115+
BOOST_REQUIRE(prevIndex != nullptr);
1116+
auto t0 = std::chrono::high_resolution_clock::now();
1117+
BOOST_REQUIRE(prevIndex->Check() == ErrorCode::Success);
1118+
std::cout << "[INFO] Check time for iteration " << iterations << ": "
1119+
<< std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - t0).count()
1120+
<< " ms" << std::endl;
1121+
1122+
recall = Search<int8_t>(prevIndex, queryset, vecset, addvecset, K, truth, N, iterations);
1123+
std::cout << "[INFO] After Save and Load:" << " recall@5=" << recall << std::endl;
1124+
static_cast<SPANN::Index<int8_t> *>(prevIndex.get())->GetDBStat();
1125+
prevIndex = nullptr;
1126+
1127+
for (int iter = 0; iter < insertIterations; iter++)
1128+
{
1129+
std::filesystem::remove_all("clone_index_" + std::to_string(iter));
1130+
}
1131+
std::filesystem::remove_all("original_index");
1132+
}
1133+
9941134
BOOST_AUTO_TEST_CASE(IterativeSearchPerf)
9951135
{
9961136
using namespace SPFreshTest;
@@ -1042,7 +1182,7 @@ BOOST_AUTO_TEST_CASE(IterativeSearchPerf)
10421182
{
10431183
std::filesystem::remove_all("clone_index_" + std::to_string(iter));
10441184
}
1045-
// std::filesystem::remove_all("original_index");
1185+
std::filesystem::remove_all("original_index");
10461186
}
10471187

10481188
std::shared_ptr<float[]> get_embeddings(uint32_t row_id, uint32_t embedding_dim, uint32_t array_index)

0 commit comments

Comments
 (0)