Skip to content

Commit 327d3c6

Browse files
authored
SimSIMD integration for Vector operations (#5348)
* feat: SimSIMD support added * fix: cleanup * fix: revert added duplicate * fix: revert * fix: dragonfly only sinsimd benchmark
1 parent 5c056be commit 327d3c6

File tree

3 files changed

+205
-0
lines changed

3 files changed

+205
-0
lines changed

src/CMakeLists.txt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,17 @@ add_third_party(
135135
LIB libhdr_histogram_static.a
136136
)
137137

138+
option(USE_SIMSIMD "Enable SimSIMD vector optimizations" OFF)
139+
if(USE_SIMSIMD)
140+
add_third_party(
141+
simsimd
142+
URL https://github.com/ashvardanian/SimSIMD/archive/refs/tags/v6.4.9.tar.gz
143+
BUILD_COMMAND echo SKIP
144+
INSTALL_COMMAND cp -R <SOURCE_DIR>/include ${THIRD_PARTY_LIB_DIR}/simsimd/
145+
LIB "none"
146+
)
147+
endif()
148+
138149

139150
add_library(TRDP::jsoncons INTERFACE IMPORTED)
140151
add_dependencies(TRDP::jsoncons jsoncons_project)
@@ -156,6 +167,13 @@ add_dependencies(TRDP::fast_float fast_float_project)
156167
set_target_properties(TRDP::fast_float PROPERTIES
157168
INTERFACE_INCLUDE_DIRECTORIES "${FAST_FLOAT_INCLUDE_DIR}")
158169

170+
if(USE_SIMSIMD)
171+
add_library(TRDP::simsimd INTERFACE IMPORTED)
172+
add_dependencies(TRDP::simsimd simsimd_project)
173+
set_target_properties(TRDP::simsimd PROPERTIES
174+
INTERFACE_INCLUDE_DIRECTORIES "${SIMSIMD_INCLUDE_DIR}")
175+
endif()
176+
159177
Message(STATUS "THIRD_PARTY_LIB_DIR ${THIRD_PARTY_LIB_DIR}")
160178

161179
option(ENABLE_GIT_VERSION "Build with Git metadata" OFF)

src/core/search/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,8 @@ cxx_test(block_list_test query_parser LABELS DFLY)
1616
cxx_test(rax_tree_test redis_test_lib LABELS DFLY)
1717
cxx_test(search_parser_test query_parser LABELS DFLY)
1818
cxx_test(search_test redis_test_lib query_parser LABELS DFLY)
19+
20+
if(USE_SIMSIMD)
21+
target_link_libraries(search_test TRDP::simsimd)
22+
target_compile_definitions(search_test PRIVATE USE_SIMSIMD=1)
23+
endif()

src/core/search/search_test.cc

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1304,5 +1304,187 @@ static void BM_SearchDocIds(benchmark::State& state) {
13041304
}
13051305
BENCHMARK(BM_SearchDocIds)->Range(0, 2);
13061306

1307+
#ifdef USE_SIMSIMD
1308+
1309+
#define SIMSIMD_NATIVE_F16 0
1310+
#define SIMSIMD_NATIVE_BF16 0
1311+
#include <simsimd/simsimd.h>
1312+
1313+
namespace {
1314+
1315+
// SimSIMD implementations for testing
1316+
float SimSIMD_L2Distance(const float* u, const float* v, size_t dims) {
1317+
simsimd_distance_t distance = 0;
1318+
simsimd_l2_f32(u, v, dims, &distance); // Note: direct L2 instead of squared
1319+
return static_cast<float>(distance);
1320+
}
1321+
1322+
float SimSIMD_CosineDistance(const float* u, const float* v, size_t dims) {
1323+
simsimd_distance_t distance = 0;
1324+
simsimd_cos_f32(u, v, dims, &distance);
1325+
return static_cast<float>(distance);
1326+
}
1327+
1328+
} // namespace
1329+
1330+
// Test that SimSIMD functions produce similar results to original functions
1331+
TEST(SimSIMDTest, CompareWithOriginal) {
1332+
const size_t dims = 128;
1333+
auto vec1 = GenerateRandomVector(dims, 1);
1334+
auto vec2 = GenerateRandomVector(dims, 2);
1335+
1336+
// Test L2 distance
1337+
float original_l2 = VectorDistance(vec1.data(), vec2.data(), dims, VectorSimilarity::L2);
1338+
float simsimd_l2 = SimSIMD_L2Distance(vec1.data(), vec2.data(), dims);
1339+
1340+
// Allow small floating point differences
1341+
EXPECT_NEAR(original_l2, simsimd_l2, 1e-5f) << "L2 distances should be nearly equal";
1342+
1343+
// Test Cosine distance
1344+
float original_cosine = VectorDistance(vec1.data(), vec2.data(), dims, VectorSimilarity::COSINE);
1345+
float simsimd_cosine = SimSIMD_CosineDistance(vec1.data(), vec2.data(), dims);
1346+
1347+
EXPECT_NEAR(original_cosine, simsimd_cosine, 1e-5f) << "Cosine distances should be nearly equal";
1348+
}
1349+
1350+
// Benchmark SimSIMD L2 distance
1351+
static void BM_SimSIMD_L2Distance(benchmark::State& state) {
1352+
size_t dims = state.range(0);
1353+
size_t num_pairs = state.range(1);
1354+
1355+
std::vector<std::vector<float>> vectors_a, vectors_b;
1356+
vectors_a.reserve(num_pairs);
1357+
vectors_b.reserve(num_pairs);
1358+
1359+
for (size_t i = 0; i < num_pairs; ++i) {
1360+
vectors_a.push_back(GenerateRandomVector(dims, i));
1361+
vectors_b.push_back(GenerateRandomVector(dims, i + 1000));
1362+
}
1363+
1364+
size_t pair_idx = 0;
1365+
while (state.KeepRunning()) {
1366+
float distance =
1367+
SimSIMD_L2Distance(vectors_a[pair_idx].data(), vectors_b[pair_idx].data(), dims);
1368+
benchmark::DoNotOptimize(distance);
1369+
1370+
pair_idx = (pair_idx + 1) % num_pairs;
1371+
}
1372+
1373+
state.counters["dims"] = dims;
1374+
state.counters["pairs"] = num_pairs;
1375+
state.SetLabel("SimSIMD_L2");
1376+
}
1377+
1378+
// Benchmark SimSIMD Cosine distance
1379+
static void BM_SimSIMD_CosineDistance(benchmark::State& state) {
1380+
size_t dims = state.range(0);
1381+
size_t num_pairs = state.range(1);
1382+
1383+
std::vector<std::vector<float>> vectors_a, vectors_b;
1384+
vectors_a.reserve(num_pairs);
1385+
vectors_b.reserve(num_pairs);
1386+
1387+
for (size_t i = 0; i < num_pairs; ++i) {
1388+
vectors_a.push_back(GenerateRandomVector(dims, i));
1389+
vectors_b.push_back(GenerateRandomVector(dims, i + 2000));
1390+
}
1391+
1392+
size_t pair_idx = 0;
1393+
while (state.KeepRunning()) {
1394+
float distance =
1395+
SimSIMD_CosineDistance(vectors_a[pair_idx].data(), vectors_b[pair_idx].data(), dims);
1396+
benchmark::DoNotOptimize(distance);
1397+
1398+
pair_idx = (pair_idx + 1) % num_pairs;
1399+
}
1400+
1401+
state.counters["dims"] = dims;
1402+
state.counters["pairs"] = num_pairs;
1403+
state.SetLabel("SimSIMD_Cosine");
1404+
}
1405+
1406+
// SimSIMD benchmarks with same parameters as original VectorDistance benchmarks
1407+
BENCHMARK(BM_SimSIMD_L2Distance)
1408+
// Small vectors
1409+
->Args({32, 100}) // 32D, 100 pairs
1410+
->Args({32, 1000}) // 32D, 1K pairs
1411+
->Args({32, 10000}) // 32D, 10K pairs
1412+
// Medium vectors
1413+
->Args({128, 100}) // 128D, 100 pairs
1414+
->Args({128, 1000}) // 128D, 1K pairs
1415+
->Args({128, 10000}) // 128D, 10K pairs
1416+
// Large vectors
1417+
->Args({512, 100}) // 512D, 100 pairs
1418+
->Args({512, 1000}) // 512D, 1K pairs
1419+
->Args({512, 5000}) // 512D, 5K pairs
1420+
// Very large vectors
1421+
->Args({1536, 100}) // 1536D, 100 pairs
1422+
->Args({1536, 1000}) // 1536D, 1K pairs
1423+
->ArgNames({"dims", "pairs"})
1424+
->Unit(benchmark::kMicrosecond);
1425+
1426+
BENCHMARK(BM_SimSIMD_CosineDistance)
1427+
// Small vectors
1428+
->Args({32, 100}) // 32D, 100 pairs
1429+
->Args({32, 1000}) // 32D, 1K pairs
1430+
->Args({32, 10000}) // 32D, 10K pairs
1431+
// Medium vectors
1432+
->Args({128, 100}) // 128D, 100 pairs
1433+
->Args({128, 1000}) // 128D, 1K pairs
1434+
->Args({128, 10000}) // 128D, 10K pairs
1435+
// Large vectors
1436+
->Args({512, 100}) // 512D, 100 pairs
1437+
->Args({512, 1000}) // 512D, 1K pairs
1438+
->Args({512, 5000}) // 512D, 5K pairs
1439+
// Very large vectors
1440+
->Args({1536, 100}) // 1536D, 100 pairs
1441+
->Args({1536, 1000}) // 1536D, 1K pairs
1442+
->ArgNames({"dims", "pairs"})
1443+
->Unit(benchmark::kMicrosecond);
1444+
1445+
// Intensive benchmark for SimSIMD performance comparison
1446+
static void BM_SimSIMD_Intensive(benchmark::State& state) {
1447+
size_t dims = 512; // Fixed medium size
1448+
size_t batch_size = 1000;
1449+
bool use_l2 = state.range(0) == 0;
1450+
1451+
std::vector<std::vector<float>> vectors_a, vectors_b;
1452+
vectors_a.reserve(batch_size);
1453+
vectors_b.reserve(batch_size);
1454+
1455+
for (size_t i = 0; i < batch_size; ++i) {
1456+
vectors_a.push_back(GenerateRandomVector(dims, i));
1457+
vectors_b.push_back(GenerateRandomVector(dims, i + 4000));
1458+
}
1459+
1460+
size_t total_ops = 0;
1461+
while (state.KeepRunning()) {
1462+
for (size_t i = 0; i < batch_size; ++i) {
1463+
float distance;
1464+
if (use_l2) {
1465+
distance = SimSIMD_L2Distance(vectors_a[i].data(), vectors_b[i].data(), dims);
1466+
} else {
1467+
distance = SimSIMD_CosineDistance(vectors_a[i].data(), vectors_b[i].data(), dims);
1468+
}
1469+
benchmark::DoNotOptimize(distance);
1470+
++total_ops;
1471+
}
1472+
}
1473+
1474+
state.counters["ops"] = total_ops;
1475+
state.counters["ops_per_sec"] = benchmark::Counter(total_ops, benchmark::Counter::kIsRate);
1476+
1477+
std::string label = use_l2 ? "SimSIMD_L2_Intensive" : "SimSIMD_Cosine_Intensive";
1478+
state.SetLabel(label);
1479+
}
1480+
1481+
BENCHMARK(BM_SimSIMD_Intensive)
1482+
->Arg(0) // L2
1483+
->Arg(1) // Cosine
1484+
->ArgNames({"distance_type"})
1485+
->Unit(benchmark::kMicrosecond);
1486+
1487+
#endif // USE_SIMSIMD
1488+
13071489
} // namespace search
13081490
} // namespace dfly

0 commit comments

Comments
 (0)