Skip to content

Commit 2a26df6

Browse files
authored
test: Vector operations benchmark was added (#5353)
1 parent bfed461 commit 2a26df6

File tree

1 file changed

+157
-0
lines changed

1 file changed

+157
-0
lines changed

src/core/search/search_test.cc

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -835,6 +835,43 @@ TEST_P(KnnTest, AutoResize) {
835835
INSTANTIATE_TEST_SUITE_P(KnnFlat, KnnTest, testing::Values(false));
836836
INSTANTIATE_TEST_SUITE_P(KnnHnsw, KnnTest, testing::Values(true));
837837

838+
TEST_F(SearchTest, VectorDistanceBasic) {
839+
// Test basic vector distance calculations
840+
std::vector<float> vec1 = {1.0f, 2.0f, 3.0f};
841+
std::vector<float> vec2 = {4.0f, 5.0f, 6.0f};
842+
843+
// Test L2 distance
844+
float l2_dist = VectorDistance(vec1.data(), vec2.data(), 3, VectorSimilarity::L2);
845+
EXPECT_GT(l2_dist, 0.0f);
846+
EXPECT_LT(l2_dist, 10.0f); // Should be reasonable value
847+
848+
// Test Cosine distance
849+
float cos_dist = VectorDistance(vec1.data(), vec2.data(), 3, VectorSimilarity::COSINE);
850+
EXPECT_GE(cos_dist, 0.0f);
851+
EXPECT_LE(cos_dist, 2.0f); // Cosine distance range
852+
853+
// Test identical vectors
854+
float l2_same = VectorDistance(vec1.data(), vec1.data(), 3, VectorSimilarity::L2);
855+
EXPECT_NEAR(l2_same, 0.0f, 1e-6);
856+
857+
float cos_same = VectorDistance(vec1.data(), vec1.data(), 3, VectorSimilarity::COSINE);
858+
EXPECT_NEAR(cos_same, 0.0f, 1e-6);
859+
}
860+
861+
TEST_F(SearchTest, VectorDistanceConsistency) {
862+
// Test that results are consistent across multiple calls
863+
std::vector<float> vec1 = {0.1f, 0.2f, 0.3f, 0.4f, 0.5f};
864+
std::vector<float> vec2 = {0.6f, 0.7f, 0.8f, 0.9f, 1.0f};
865+
866+
float l2_dist1 = VectorDistance(vec1.data(), vec2.data(), 5, VectorSimilarity::L2);
867+
float l2_dist2 = VectorDistance(vec1.data(), vec2.data(), 5, VectorSimilarity::L2);
868+
EXPECT_EQ(l2_dist1, l2_dist2);
869+
870+
float cos_dist1 = VectorDistance(vec1.data(), vec2.data(), 5, VectorSimilarity::COSINE);
871+
float cos_dist2 = VectorDistance(vec1.data(), vec2.data(), 5, VectorSimilarity::COSINE);
872+
EXPECT_EQ(cos_dist1, cos_dist2);
873+
}
874+
838875
static void BM_VectorSearch(benchmark::State& state) {
839876
unsigned ndims = state.range(0);
840877
unsigned nvecs = state.range(1);
@@ -1119,6 +1156,126 @@ BENCHMARK(BM_SearchByType_Diverse)
11191156
->ArgNames({"docs", "pattern_len", "search_type"})
11201157
->Unit(benchmark::kMicrosecond);
11211158

1159+
// Helper function to generate random vector
1160+
static std::vector<float> GenerateRandomVector(size_t dims, unsigned seed = 42) {
1161+
std::mt19937 gen(seed);
1162+
std::uniform_real_distribution<float> dis(-1.0f, 1.0f);
1163+
1164+
std::vector<float> vec(dims);
1165+
for (size_t i = 0; i < dims; ++i) {
1166+
vec[i] = dis(gen);
1167+
}
1168+
return vec;
1169+
}
1170+
1171+
// Benchmark vector distance calculation (parametrized by similarity type)
1172+
static void BM_VectorDistance(benchmark::State& state) {
1173+
size_t dims = state.range(0);
1174+
size_t num_pairs = state.range(1);
1175+
VectorSimilarity sim = static_cast<VectorSimilarity>(state.range(2));
1176+
1177+
// Generate test vectors with different seeds per similarity type
1178+
uint32_t seed_offset = (sim == VectorSimilarity::L2) ? 1000 : 2000;
1179+
std::vector<std::vector<float>> vectors_a, vectors_b;
1180+
vectors_a.reserve(num_pairs);
1181+
vectors_b.reserve(num_pairs);
1182+
1183+
for (size_t i = 0; i < num_pairs; ++i) {
1184+
vectors_a.push_back(GenerateRandomVector(dims, i));
1185+
vectors_b.push_back(GenerateRandomVector(dims, i + seed_offset));
1186+
}
1187+
1188+
size_t pair_idx = 0;
1189+
while (state.KeepRunning()) {
1190+
float distance =
1191+
VectorDistance(vectors_a[pair_idx].data(), vectors_b[pair_idx].data(), dims, sim);
1192+
benchmark::DoNotOptimize(distance);
1193+
1194+
pair_idx = (pair_idx + 1) % num_pairs;
1195+
}
1196+
1197+
state.counters["dims"] = dims;
1198+
state.counters["pairs"] = num_pairs;
1199+
1200+
std::string sim_name = (sim == VectorSimilarity::L2) ? "L2" : "Cosine";
1201+
state.SetLabel(sim_name);
1202+
}
1203+
1204+
// Benchmark with different vector dimensions, batch sizes and similarity types
1205+
BENCHMARK(BM_VectorDistance)
1206+
// Small vectors, different batch sizes - L2 Distance
1207+
->Args({32, 100, static_cast<int>(VectorSimilarity::L2)}) // 32D, 100 pairs
1208+
->Args({32, 1000, static_cast<int>(VectorSimilarity::L2)}) // 32D, 1K pairs
1209+
->Args({32, 10000, static_cast<int>(VectorSimilarity::L2)}) // 32D, 10K pairs
1210+
// Medium vectors - L2 Distance
1211+
->Args({128, 100, static_cast<int>(VectorSimilarity::L2)}) // 128D, 100 pairs
1212+
->Args({128, 1000, static_cast<int>(VectorSimilarity::L2)}) // 128D, 1K pairs
1213+
->Args({128, 10000, static_cast<int>(VectorSimilarity::L2)}) // 128D, 10K pairs
1214+
// Large vectors - L2 Distance
1215+
->Args({512, 100, static_cast<int>(VectorSimilarity::L2)}) // 512D, 100 pairs
1216+
->Args({512, 1000, static_cast<int>(VectorSimilarity::L2)}) // 512D, 1K pairs
1217+
->Args({512, 5000, static_cast<int>(VectorSimilarity::L2)}) // 512D, 5K pairs
1218+
// Very large vectors - L2 Distance
1219+
->Args({1536, 100, static_cast<int>(VectorSimilarity::L2)}) // 1536D, 100 pairs
1220+
->Args({1536, 1000, static_cast<int>(VectorSimilarity::L2)}) // 1536D, 1K pairs
1221+
1222+
// Small vectors, different batch sizes - Cosine Distance
1223+
->Args({32, 100, static_cast<int>(VectorSimilarity::COSINE)}) // 32D, 100 pairs
1224+
->Args({32, 1000, static_cast<int>(VectorSimilarity::COSINE)}) // 32D, 1K pairs
1225+
->Args({32, 10000, static_cast<int>(VectorSimilarity::COSINE)}) // 32D, 10K pairs
1226+
// Medium vectors - Cosine Distance
1227+
->Args({128, 100, static_cast<int>(VectorSimilarity::COSINE)}) // 128D, 100 pairs
1228+
->Args({128, 1000, static_cast<int>(VectorSimilarity::COSINE)}) // 128D, 1K pairs
1229+
->Args({128, 10000, static_cast<int>(VectorSimilarity::COSINE)}) // 128D, 10K pairs
1230+
// Large vectors - Cosine Distance
1231+
->Args({512, 100, static_cast<int>(VectorSimilarity::COSINE)}) // 512D, 100 pairs
1232+
->Args({512, 1000, static_cast<int>(VectorSimilarity::COSINE)}) // 512D, 1K pairs
1233+
->Args({512, 5000, static_cast<int>(VectorSimilarity::COSINE)}) // 512D, 5K pairs
1234+
// Very large vectors - Cosine Distance
1235+
->Args({1536, 100, static_cast<int>(VectorSimilarity::COSINE)}) // 1536D, 100 pairs
1236+
->Args({1536, 1000, static_cast<int>(VectorSimilarity::COSINE)}) // 1536D, 1K pairs
1237+
->ArgNames({"dims", "pairs", "similarity"})
1238+
->Unit(benchmark::kMicrosecond);
1239+
1240+
// Intensive benchmark for performance comparison
1241+
static void BM_VectorDistanceIntensive(benchmark::State& state) {
1242+
size_t dims = 512; // Fixed medium size
1243+
size_t batch_size = 1000;
1244+
VectorSimilarity sim = static_cast<VectorSimilarity>(state.range(0));
1245+
1246+
// Generate test vectors
1247+
std::vector<std::vector<float>> vectors_a, vectors_b;
1248+
vectors_a.reserve(batch_size);
1249+
vectors_b.reserve(batch_size);
1250+
1251+
for (size_t i = 0; i < batch_size; ++i) {
1252+
vectors_a.push_back(GenerateRandomVector(dims, i));
1253+
vectors_b.push_back(GenerateRandomVector(dims, i + 3000));
1254+
}
1255+
1256+
size_t total_ops = 0;
1257+
while (state.KeepRunning()) {
1258+
// Process entire batch
1259+
for (size_t i = 0; i < batch_size; ++i) {
1260+
float distance = VectorDistance(vectors_a[i].data(), vectors_b[i].data(), dims, sim);
1261+
benchmark::DoNotOptimize(distance);
1262+
++total_ops;
1263+
}
1264+
}
1265+
1266+
state.counters["ops"] = total_ops;
1267+
state.counters["ops_per_sec"] = benchmark::Counter(total_ops, benchmark::Counter::kIsRate);
1268+
1269+
std::string sim_name = (sim == VectorSimilarity::L2) ? "L2" : "Cosine";
1270+
state.SetLabel(sim_name + "_Intensive");
1271+
}
1272+
1273+
BENCHMARK(BM_VectorDistanceIntensive)
1274+
->Arg(static_cast<int>(VectorSimilarity::L2))
1275+
->Arg(static_cast<int>(VectorSimilarity::COSINE))
1276+
->ArgNames({"similarity_type"})
1277+
->Unit(benchmark::kMicrosecond);
1278+
11221279
static void BM_SearchDocIds(benchmark::State& state) {
11231280
auto schema = MakeSimpleSchema({{"score", SchemaField::NUMERIC}, {"tag", SchemaField::TAG}});
11241281
FieldIndices indices{schema, kEmptyOptions, PMR_NS::get_default_resource(), nullptr};

0 commit comments

Comments
 (0)