From 20627c1c733baa2439c7bde4378d5a6d3600b044 Mon Sep 17 00:00:00 2001 From: krunal Date: Fri, 16 Aug 2024 16:11:46 +0530 Subject: [PATCH 1/2] hamming distance InnerProduct for booleans --- hnswlib/space_ip.h | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/hnswlib/space_ip.h b/hnswlib/space_ip.h index 2b1c359e..ae48d63a 100644 --- a/hnswlib/space_ip.h +++ b/hnswlib/space_ip.h @@ -18,6 +18,19 @@ InnerProductDistance(const void *pVect1, const void *pVect2, const void *qty_ptr return 1.0f - InnerProduct(pVect1, pVect2, qty_ptr); } +static float +InnerProductDistance2(const void *pVect1, const void *pVect2, const void *qty_ptr) { + size_t qty = *((size_t *) qty_ptr); + float res = 0; + int a, b; + for (unsigned i = 0; i < qty; i++) { + a = ((int*)pVect1)[i]; + b = ((int *) pVect2)[i]; + res += a ^ b ; + } + return res; +} + #if defined(USE_AVX) // Favor using AVX if available. @@ -320,7 +333,7 @@ class InnerProductSpace : public SpaceInterface { size_t dim_; public: - InnerProductSpace(size_t dim) { + InnerProductSpace(size_t dim, bool is_boolean) { fstdistfunc_ = InnerProductDistance; #if defined(USE_AVX) || defined(USE_SSE) || defined(USE_AVX512) #if defined(USE_AVX512) @@ -353,9 +366,13 @@ class InnerProductSpace : public SpaceInterface { else if (dim > 4) fstdistfunc_ = InnerProductDistanceSIMD4ExtResiduals; #endif - dim_ = dim; - data_size_ = dim * sizeof(float); - } + if(is_boolean) { + fstdistfunc_ = InnerProductDistance2; + } + + dim_ = dim; + data_size_ = dim * sizeof(float); + } size_t get_data_size() { return data_size_; From ce756b0ca2e83a8815886c193201f26674d75585 Mon Sep 17 00:00:00 2001 From: krunal Date: Fri, 23 Aug 2024 13:12:39 +0530 Subject: [PATCH 2/2] correct hamming distance --- hnswlib/space_ip.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hnswlib/space_ip.h b/hnswlib/space_ip.h index ae48d63a..bc3168cf 100644 --- a/hnswlib/space_ip.h +++ b/hnswlib/space_ip.h @@ -28,7 +28,7 @@ InnerProductDistance2(const void *pVect1, const void *pVect2, const void *qty_pt b = ((int *) pVect2)[i]; res += a ^ b ; } - return res; + return __builtin_popcount(res); } #if defined(USE_AVX)