RedisAI · dor-forer · Jan 6, 2026 · Dec 28, 2025 · Dec 28, 2025 · Dec 28, 2025
diff --git a/src/VecSim/spaces/IP/IP.cpp b/src/VecSim/spaces/IP/IP.cpp
@@ -49,9 +49,10 @@ float SQ8_Cosine(const void *pVect1v, const void *pVect2v, size_t dimension) {
     return 1.0f - res;
 }
 
-// SQ8-to-SQ8: Both vectors are uint8 quantized with precomputed sum
+// SQ8-to-SQ8: Common inner product implementation that returns the raw inner product value
+// (not distance). Used by both SQ8_SQ8_InnerProduct, SQ8_SQ8_Cosine, and SQ8_SQ8_L2Sqr.
 // Vector layout: [uint8_t values (dim)] [min_val (float)] [delta (float)] [sum (float)]
-float SQ8_SQ8_InnerProduct(const void *pVect1v, const void *pVect2v, size_t dimension) {
+float SQ8_SQ8_InnerProduct_Impl(const void *pVect1v, const void *pVect2v, size_t dimension) {
     const auto *pVect1 = static_cast<const uint8_t *>(pVect1v);
     const auto *pVect2 = static_cast<const uint8_t *>(pVect2v);
 
@@ -73,9 +74,14 @@ float SQ8_SQ8_InnerProduct(const void *pVect1v, const void *pVect2v, size_t dime
 
     // Apply the algebraic formula using precomputed sums:
     // IP = min1*sum2 + min2*sum1 + delta1*delta2*Σ(q1[i]*q2[i]) - dim*min1*min2
-    float res = min_val1 * sum2 + min_val2 * sum1 -
-                static_cast<float>(dimension) * min_val1 * min_val2 + delta1 * delta2 * product;
-    return 1.0f - res;
+    return min_val1 * sum2 + min_val2 * sum1 - static_cast<float>(dimension) * min_val1 * min_val2 +
+           delta1 * delta2 * product;
+}
+
+// SQ8-to-SQ8: Both vectors are uint8 quantized with precomputed sum
+// Vector layout: [uint8_t values (dim)] [min_val (float)] [delta (float)] [sum (float)]
+float SQ8_SQ8_InnerProduct(const void *pVect1v, const void *pVect2v, size_t dimension) {
+    return 1.0f - SQ8_SQ8_InnerProduct_Impl(pVect1v, pVect2v, dimension);
 }
 
 // SQ8-to-SQ8: Both vectors are uint8 quantized and normalized with precomputed sum

diff --git a/src/VecSim/spaces/IP/IP.h b/src/VecSim/spaces/IP/IP.h
@@ -16,6 +16,11 @@ float SQ8_InnerProduct(const void *pVect1v, const void *pVect2v, size_t dimensio
 // pVect1v vector of type fp32 and pVect2v vector of type uint8
 float SQ8_Cosine(const void *pVect1v, const void *pVect2v, size_t dimension);
 
+// SQ8-to-SQ8: Common inner product implementation that returns the raw inner product value
+// (not distance). Used by both SQ8_SQ8_InnerProduct, SQ8_SQ8_Cosine, and SQ8_SQ8_L2Sqr.
+// Vector layout: [uint8_t values (dim)] [min_val (float)] [delta (float)] [sum (float)]
+float SQ8_SQ8_InnerProduct_Impl(const void *pVect1v, const void *pVect2v, size_t dimension);
+
 // SQ8-to-SQ8: Both vectors are uint8 quantized with precomputed sum
 // Vector layout: [uint8_t values (dim)] [min_val (float)] [delta (float)] [sum (float)]
 float SQ8_SQ8_InnerProduct(const void *pVect1v, const void *pVect2v, size_t dimension);

diff --git a/src/VecSim/spaces/L2/L2.cpp b/src/VecSim/spaces/L2/L2.cpp
@@ -7,6 +7,7 @@
  * GNU Affero General Public License v3 (AGPLv3).
  */
 #include "L2.h"
+#include "VecSim/spaces/IP/IP.h"
 #include "VecSim/types/bfloat16.h"
 #include "VecSim/types/float16.h"
 #include <cstring>
@@ -132,3 +133,28 @@ float UINT8_L2Sqr(const void *pVect1v, const void *pVect2v, size_t dimension) {
     const auto *pVect2 = static_cast<const uint8_t *>(pVect2v);
     return float(INTEGER_L2Sqr(pVect1, pVect2, dimension));
 }
+
+// SQ8-to-SQ8 L2 squared distance (both vectors are uint8 quantized)
+// Vector layout: [uint8_t values (dim)] [min_val (float)] [delta (float)] [sum (float)]
+// [sum_of_squares (float)]
+//  ||x - y||² = ||x||² + ||y||² - 2*IP(x, y)
+//   where:
+//     - ||x||² = sum_squares_x is precomputed and stored
+//     - ||y||² = sum_squares_y is precomputed and stored
+//     - IP(x, y) is computed using SQ8_SQ8_InnerProduct_Impl
+
+float SQ8_SQ8_L2Sqr(const void *pVect1v, const void *pVect2v, size_t dimension) {
+    const auto *pVect1 = static_cast<const uint8_t *>(pVect1v);
+    const auto *pVect2 = static_cast<const uint8_t *>(pVect2v);
+
+    // Get precomputed sum of squares from both vectors
+    // Layout: [uint8_t values (dim)] [min_val] [delta] [sum] [sum_of_squares]
+    const float sum_sq_1 = *reinterpret_cast<const float *>(pVect1 + dimension + 3 * sizeof(float));
+    const float sum_sq_2 = *reinterpret_cast<const float *>(pVect2 + dimension + 3 * sizeof(float));
+
+    // Use the common inner product implementation
+    const float ip = SQ8_SQ8_InnerProduct_Impl(pVect1v, pVect2v, dimension);
+
+    // L2² = ||x||² + ||y||² - 2*IP(x, y)
+    return sum_sq_1 + sum_sq_2 - 2.0f * ip;
+}
diff --git a/src/VecSim/spaces/L2/L2.h b/src/VecSim/spaces/L2/L2.h
@@ -25,3 +25,6 @@ float FP16_L2Sqr(const void *pVect1, const void *pVect2, size_t dimension);
 float INT8_L2Sqr(const void *pVect1v, const void *pVect2v, size_t dimension);
 
 float UINT8_L2Sqr(const void *pVect1v, const void *pVect2v, size_t dimension);
+
+// SQ8-to-SQ8 L2 squared distance (both vectors are uint8 quantized)
+float SQ8_SQ8_L2Sqr(const void *pVect1v, const void *pVect2v, size_t dimension);
diff --git a/src/VecSim/spaces/L2/L2_AVX512F_BW_VL_VNNI_SQ8_SQ8.h b/src/VecSim/spaces/L2/L2_AVX512F_BW_VL_VNNI_SQ8_SQ8.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2006-Present, Redis Ltd.
+ * All rights reserved.
+ *
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
+ * GNU Affero General Public License v3 (AGPLv3).
+ */
+#pragma once
+#include "VecSim/spaces/space_includes.h"
+#include "VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_SQ8_SQ8.h"
+
+/**
+ * SQ8-to-SQ8 L2 squared distance using AVX512 VNNI.
+ * Computes L2 squared distance between two SQ8 (scalar quantized 8-bit) vectors,
+ * where BOTH vectors are uint8 quantized.
+ *
+ * Uses the identity: ||x - y||² = ||x||² + ||y||² - 2*IP(x, y)
+ * where ||x||² and ||y||² are precomputed sum of squares stored in the vector data.
+ *
+ * Vector layout: [uint8_t values (dim)] [min_val (float)] [delta (float)] [sum (float)]
+ * [sum_of_squares (float)]
+ */
+
+// L2 squared distance using the common inner product implementation
+template <unsigned char residual> // 0..63
+float SQ8_SQ8_L2SqrSIMD64_AVX512F_BW_VL_VNNI(const void *pVec1v, const void *pVec2v,
+                                             size_t dimension) {
+
+    // Use the common inner product implementation (returns raw IP, not distance)
+    const float ip = SQ8_SQ8_InnerProductImp<residual>(pVec1v, pVec2v, dimension);
+
+    const uint8_t *pVec1 = static_cast<const uint8_t *>(pVec1v);
+    const uint8_t *pVec2 = static_cast<const uint8_t *>(pVec2v);
+    // Get precomputed sum of squares from both vectors
+    // Layout: [uint8_t values (dim)] [min_val] [delta] [sum] [sum_of_squares]
+    const float sum_sq_1 = *reinterpret_cast<const float *>(pVec1 + dimension + 3 * sizeof(float));
+    const float sum_sq_2 = *reinterpret_cast<const float *>(pVec2 + dimension + 3 * sizeof(float));
+
+    // L2² = ||x||² + ||y||² - 2*IP(x, y)
+    return sum_sq_1 + sum_sq_2 - 2.0f * ip;
+}
diff --git a/src/VecSim/spaces/L2/L2_NEON_DOTPROD_SQ8_SQ8.h b/src/VecSim/spaces/L2/L2_NEON_DOTPROD_SQ8_SQ8.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2006-Present, Redis Ltd.
+ * All rights reserved.
+ *
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
+ * GNU Affero General Public License v3 (AGPLv3).
+ */
+#pragma once
+#include "VecSim/spaces/space_includes.h"
+#include "VecSim/spaces/IP/IP_NEON_DOTPROD_SQ8_SQ8.h"
+
+/**
+ * SQ8-to-SQ8 L2 squared distance functions for NEON with DOTPROD extension.
+ * Computes L2 squared distance between two SQ8 (scalar quantized 8-bit) vectors,
+ * where BOTH vectors are uint8 quantized.
+ *
+ * Uses the identity: ||x - y||² = ||x||² + ||y||² - 2*IP(x, y)
+ * where ||x||² and ||y||² are precomputed sum of squares stored in the vector data.
+ *
+ * Vector layout: [uint8_t values (dim)] [min_val (float)] [delta (float)] [sum (float)]
+ * [sum_of_squares (float)]
+ */
+
+// L2 squared distance using the common inner product implementation
+template <unsigned char residual> // 0..63
+float SQ8_SQ8_L2SqrSIMD64_NEON_DOTPROD(const void *pVec1v, const void *pVec2v, size_t dimension) {
+    // Use the common inner product implementation (returns raw IP, not distance)
+    const float ip =
+        SQ8_SQ8_InnerProductSIMD64_NEON_DOTPROD_IMP<residual>(pVec1v, pVec2v, dimension);
+
+    const uint8_t *pVec1 = static_cast<const uint8_t *>(pVec1v);
+    const uint8_t *pVec2 = static_cast<const uint8_t *>(pVec2v);
+
+    // Get precomputed sum of squares from both vectors
+    // Layout: [uint8_t values (dim)] [min_val] [delta] [sum] [sum_of_squares]
+    const float sum_sq_1 = *reinterpret_cast<const float *>(pVec1 + dimension + 3 * sizeof(float));
+    const float sum_sq_2 = *reinterpret_cast<const float *>(pVec2 + dimension + 3 * sizeof(float));
+
+    // L2² = ||x||² + ||y||² - 2*IP(x, y)
+    return sum_sq_1 + sum_sq_2 - 2.0f * ip;
+}
diff --git a/src/VecSim/spaces/L2/L2_NEON_SQ8_SQ8.h b/src/VecSim/spaces/L2/L2_NEON_SQ8_SQ8.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2006-Present, Redis Ltd.
+ * All rights reserved.
+ *
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
+ * GNU Affero General Public License v3 (AGPLv3).
+ */
+#pragma once
+#include "VecSim/spaces/space_includes.h"
+#include "VecSim/spaces/IP/IP_NEON_SQ8_SQ8.h"
+
+/**
+ * SQ8-to-SQ8 L2 squared distance functions for NEON.
+ * Computes L2 squared distance between two SQ8 (scalar quantized 8-bit) vectors,
+ * where BOTH vectors are uint8 quantized.
+ *
+ * Uses the identity: ||x - y||² = ||x||² + ||y||² - 2*IP(x, y)
+ * where ||x||² and ||y||² are precomputed sum of squares stored in the vector data.
+ *
+ * Vector layout: [uint8_t values (dim)] [min_val (float)] [delta (float)] [sum (float)]
+ * [sum_of_squares (float)]
+ */
+
+// L2 squared distance using the common inner product implementation
+template <unsigned char residual> // 0..63
+float SQ8_SQ8_L2SqrSIMD64_NEON(const void *pVec1v, const void *pVec2v, size_t dimension) {
+    // Use the common inner product implementation (returns raw IP, not distance)
+    const float ip = SQ8_SQ8_InnerProductSIMD64_NEON_IMP<residual>(pVec1v, pVec2v, dimension);
+
+    const uint8_t *pVec1 = static_cast<const uint8_t *>(pVec1v);
+    const uint8_t *pVec2 = static_cast<const uint8_t *>(pVec2v);
+
+    // Get precomputed sum of squares from both vectors
+    // Layout: [uint8_t values (dim)] [min_val] [delta] [sum] [sum_of_squares]
+    const float sum_sq_1 = *reinterpret_cast<const float *>(pVec1 + dimension + 3 * sizeof(float));
+    const float sum_sq_2 = *reinterpret_cast<const float *>(pVec2 + dimension + 3 * sizeof(float));
+
+    // L2² = ||x||² + ||y||² - 2*IP(x, y)
+    return sum_sq_1 + sum_sq_2 - 2.0f * ip;
+}
diff --git a/src/VecSim/spaces/L2/L2_SVE_SQ8_SQ8.h b/src/VecSim/spaces/L2/L2_SVE_SQ8_SQ8.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2006-Present, Redis Ltd.
+ * All rights reserved.
+ *
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
+ * GNU Affero General Public License v3 (AGPLv3).
+ */
+#pragma once
+#include "VecSim/spaces/space_includes.h"
+#include "VecSim/spaces/IP/IP_SVE_SQ8_SQ8.h"
+
+/**
+ * SQ8-to-SQ8 L2 squared distance functions for SVE.
+ * Computes L2 squared distance between two SQ8 (scalar quantized 8-bit) vectors,
+ * where BOTH vectors are uint8 quantized.
+ *
+ * Uses the identity: ||x - y||² = ||x||² + ||y||² - 2*IP(x, y)
+ * where ||x||² and ||y||² are precomputed sum of squares stored in the vector data.
+ *
+ * Vector layout: [uint8_t values (dim)] [min_val (float)] [delta (float)] [sum (float)]
+ * [sum_of_squares (float)]
+ */
+
+// L2 squared distance using the common inner product implementation
+template <bool partial_chunk, unsigned char additional_steps>
+float SQ8_SQ8_L2SqrSIMD_SVE(const void *pVec1v, const void *pVec2v, size_t dimension) {
+    // Use the common inner product implementation (returns raw IP, not distance)
+    const float ip = SQ8_SQ8_InnerProductSIMD_SVE_IMP<partial_chunk, additional_steps>(
+        pVec1v, pVec2v, dimension);
+
+    const uint8_t *pVec1 = static_cast<const uint8_t *>(pVec1v);
+    const uint8_t *pVec2 = static_cast<const uint8_t *>(pVec2v);
+
+    // Get precomputed sum of squares from both vectors
+    // Layout: [uint8_t values (dim)] [min_val] [delta] [sum] [sum_of_squares]
+    const float sum_sq_1 = *reinterpret_cast<const float *>(pVec1 + dimension + 3 * sizeof(float));
+    const float sum_sq_2 = *reinterpret_cast<const float *>(pVec2 + dimension + 3 * sizeof(float));
+
+    // L2² = ||x||² + ||y||² - 2*IP(x, y)
+    return sum_sq_1 + sum_sq_2 - 2.0f * ip;
+}
diff --git a/src/VecSim/spaces/L2_space.cpp b/src/VecSim/spaces/L2_space.cpp
@@ -417,4 +417,50 @@ dist_func_t<float> L2_UINT8_GetDistFunc(size_t dim, unsigned char *alignment,
     return ret_dist_func;
 }
 
+// SQ8-to-SQ8 L2 squared distance function (both vectors are uint8 quantized)
+dist_func_t<float> L2_SQ8_SQ8_GetDistFunc(size_t dim, unsigned char *alignment,
+                                          const void *arch_opt) {
+    unsigned char dummy_alignment;
+    if (alignment == nullptr) {
+        alignment = &dummy_alignment;
+    }
+
+    dist_func_t<float> ret_dist_func = SQ8_SQ8_L2Sqr;
+    [[maybe_unused]] auto features = getCpuOptimizationFeatures(arch_opt);
+
+#ifdef CPU_FEATURES_ARCH_AARCH64
+#ifdef OPT_SVE2
+    if (features.sve2) {
+        return Choose_SQ8_SQ8_L2_implementation_SVE2(dim);
+    }
+#endif
+#ifdef OPT_SVE
+    if (features.sve) {
+        return Choose_SQ8_SQ8_L2_implementation_SVE(dim);
+    }
+#endif
+#ifdef OPT_NEON_DOTPROD
+    // DOTPROD uses integer arithmetic - much faster than float-based NEON
+    if (dim >= 16 && features.asimddp) {
+        return Choose_SQ8_SQ8_L2_implementation_NEON_DOTPROD(dim);
+    }
+#endif
+#ifdef OPT_NEON
+    if (dim >= 16 && features.asimd) {
+        return Choose_SQ8_SQ8_L2_implementation_NEON(dim);
+    }
+#endif
+#endif // AARCH64
+
+#ifdef CPU_FEATURES_ARCH_X86_64
+#ifdef OPT_AVX512_F_BW_VL_VNNI
+    // AVX512 VNNI SQ8_SQ8 uses 64-element chunks
+    if (dim >= 64 && features.avx512f && features.avx512bw && features.avx512vnni) {
+        return Choose_SQ8_SQ8_L2_implementation_AVX512F_BW_VL_VNNI(dim);
+    }
+#endif
+#endif // __x86_64__
+    return ret_dist_func;
+}
+
 } // namespace spaces
diff --git a/src/VecSim/spaces/L2_space.h b/src/VecSim/spaces/L2_space.h
@@ -24,4 +24,6 @@ dist_func_t<float> L2_UINT8_GetDistFunc(size_t dim, unsigned char *alignment = n
                                         const void *arch_opt = nullptr);
 dist_func_t<float> L2_SQ8_GetDistFunc(size_t dim, unsigned char *alignment = nullptr,
                                       const void *arch_opt = nullptr);
+dist_func_t<float> L2_SQ8_SQ8_GetDistFunc(size_t dim, unsigned char *alignment = nullptr,
+                                          const void *arch_opt = nullptr);
 } // namespace spaces
diff --git a/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.cpp b/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.cpp
@@ -18,6 +18,7 @@
 #include "VecSim/spaces/L2/L2_AVX512F_BW_VL_VNNI_SQ8.h"
 
 #include "VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_SQ8_SQ8.h"
+#include "VecSim/spaces/L2/L2_AVX512F_BW_VL_VNNI_SQ8_SQ8.h"
 
 namespace spaces {
 
@@ -87,6 +88,12 @@ dist_func_t<float> Choose_SQ8_SQ8_Cosine_implementation_AVX512F_BW_VL_VNNI(size_
     return ret_dist_func;
 }
 
+dist_func_t<float> Choose_SQ8_SQ8_L2_implementation_AVX512F_BW_VL_VNNI(size_t dim) {
+    dist_func_t<float> ret_dist_func;
+    CHOOSE_IMPLEMENTATION(ret_dist_func, dim, 64, SQ8_SQ8_L2SqrSIMD64_AVX512F_BW_VL_VNNI);
+    return ret_dist_func;
+}
+
 #include "implementation_chooser_cleanup.h"
 
 } // namespace spaces
diff --git a/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.h b/src/VecSim/spaces/functions/AVX512F_BW_VL_VNNI.h
@@ -27,5 +27,6 @@ dist_func_t<float> Choose_SQ8_L2_implementation_AVX512F_BW_VL_VNNI(size_t dim);
 // SQ8-to-SQ8 distance functions (both vectors are uint8 quantized with precomputed sum)
 dist_func_t<float> Choose_SQ8_SQ8_IP_implementation_AVX512F_BW_VL_VNNI(size_t dim);
 dist_func_t<float> Choose_SQ8_SQ8_Cosine_implementation_AVX512F_BW_VL_VNNI(size_t dim);
+dist_func_t<float> Choose_SQ8_SQ8_L2_implementation_AVX512F_BW_VL_VNNI(size_t dim);
 
 } // namespace spaces
diff --git a/src/VecSim/spaces/functions/NEON.cpp b/src/VecSim/spaces/functions/NEON.cpp
@@ -18,6 +18,7 @@
 #include "VecSim/spaces/L2/L2_NEON_SQ8.h"
 #include "VecSim/spaces/IP/IP_NEON_SQ8.h"
 #include "VecSim/spaces/IP/IP_NEON_SQ8_SQ8.h"
+#include "VecSim/spaces/L2/L2_NEON_SQ8_SQ8.h"
 
 namespace spaces {
 
@@ -114,6 +115,12 @@ dist_func_t<float> Choose_SQ8_SQ8_Cosine_implementation_NEON(size_t dim) {
     return ret_dist_func;
 }
 
+dist_func_t<float> Choose_SQ8_SQ8_L2_implementation_NEON(size_t dim) {
+    dist_func_t<float> ret_dist_func;
+    CHOOSE_IMPLEMENTATION(ret_dist_func, dim, 64, SQ8_SQ8_L2SqrSIMD64_NEON);
+    return ret_dist_func;
+}
+
 #include "implementation_chooser_cleanup.h"
 
 } // namespace spaces
diff --git a/src/VecSim/spaces/functions/NEON.h b/src/VecSim/spaces/functions/NEON.h
@@ -33,5 +33,6 @@ dist_func_t<float> Choose_SQ8_Cosine_implementation_NEON(size_t dim);
 // SQ8-to-SQ8 distance functions (both vectors are uint8 quantized with precomputed sum)
 dist_func_t<float> Choose_SQ8_SQ8_IP_implementation_NEON(size_t dim);
 dist_func_t<float> Choose_SQ8_SQ8_Cosine_implementation_NEON(size_t dim);
+dist_func_t<float> Choose_SQ8_SQ8_L2_implementation_NEON(size_t dim);
 
 } // namespace spaces
diff --git a/src/VecSim/spaces/functions/NEON_DOTPROD.cpp b/src/VecSim/spaces/functions/NEON_DOTPROD.cpp
@@ -12,6 +12,7 @@
 #include "VecSim/spaces/IP/IP_NEON_DOTPROD_SQ8_SQ8.h"
 #include "VecSim/spaces/L2/L2_NEON_DOTPROD_INT8.h"
 #include "VecSim/spaces/L2/L2_NEON_DOTPROD_UINT8.h"
+#include "VecSim/spaces/L2/L2_NEON_DOTPROD_SQ8_SQ8.h"
 
 namespace spaces {
 
@@ -66,6 +67,12 @@ dist_func_t<float> Choose_SQ8_SQ8_Cosine_implementation_NEON_DOTPROD(size_t dim)
     return ret_dist_func;
 }
 
+dist_func_t<float> Choose_SQ8_SQ8_L2_implementation_NEON_DOTPROD(size_t dim) {
+    dist_func_t<float> ret_dist_func;
+    CHOOSE_IMPLEMENTATION(ret_dist_func, dim, 64, SQ8_SQ8_L2SqrSIMD64_NEON_DOTPROD);
+    return ret_dist_func;
+}
+
 #include "implementation_chooser_cleanup.h"
 
 } // namespace spaces
diff --git a/src/VecSim/spaces/functions/NEON_DOTPROD.h b/src/VecSim/spaces/functions/NEON_DOTPROD.h
@@ -24,5 +24,6 @@ dist_func_t<float> Choose_UINT8_L2_implementation_NEON_DOTPROD(size_t dim);
 // SQ8-to-SQ8 DOTPROD-optimized distance functions (with precomputed sum)
 dist_func_t<float> Choose_SQ8_SQ8_IP_implementation_NEON_DOTPROD(size_t dim);
 dist_func_t<float> Choose_SQ8_SQ8_Cosine_implementation_NEON_DOTPROD(size_t dim);
+dist_func_t<float> Choose_SQ8_SQ8_L2_implementation_NEON_DOTPROD(size_t dim);
 
 } // namespace spaces