diff --git a/src/VecSim/spaces/IP/IP.cpp b/src/VecSim/spaces/IP/IP.cpp index d07f526d9..1b0d2db24 100644 --- a/src/VecSim/spaces/IP/IP.cpp +++ b/src/VecSim/spaces/IP/IP.cpp @@ -9,10 +9,12 @@ #include "IP.h" #include "VecSim/types/bfloat16.h" #include "VecSim/types/float16.h" +#include "VecSim/types/sq8.h" #include using bfloat16 = vecsim_types::bfloat16; using float16 = vecsim_types::float16; +using sq8 = vecsim_types::sq8; float FLOAT_INTEGER_InnerProduct(const float *pVect1v, const uint8_t *pVect2v, size_t dimension, float min_val, float delta) { @@ -63,14 +65,16 @@ float SQ8_SQ8_InnerProduct_Impl(const void *pVect1v, const void *pVect2v, size_t } // Get quantization parameters from pVect1 - const float min_val1 = *reinterpret_cast(pVect1 + dimension); - const float delta1 = *reinterpret_cast(pVect1 + dimension + sizeof(float)); - const float sum1 = *reinterpret_cast(pVect1 + dimension + 2 * sizeof(float)); + const float *params1 = reinterpret_cast(pVect1 + dimension); + const float min_val1 = params1[sq8::MIN_VAL]; + const float delta1 = params1[sq8::DELTA]; + const float sum1 = params1[sq8::SUM]; // Get quantization parameters from pVect2 - const float min_val2 = *reinterpret_cast(pVect2 + dimension); - const float delta2 = *reinterpret_cast(pVect2 + dimension + sizeof(float)); - const float sum2 = *reinterpret_cast(pVect2 + dimension + 2 * sizeof(float)); + const float *params2 = reinterpret_cast(pVect2 + dimension); + const float min_val2 = params2[sq8::MIN_VAL]; + const float delta2 = params2[sq8::DELTA]; + const float sum2 = params2[sq8::SUM]; // Apply the algebraic formula using precomputed sums: // IP = min1*sum2 + min2*sum1 + delta1*delta2*Σ(q1[i]*q2[i]) - dim*min1*min2 diff --git a/src/VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_SQ8_SQ8.h b/src/VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_SQ8_SQ8.h index 9b4f7e01a..899b466b9 100644 --- a/src/VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_SQ8_SQ8.h +++ b/src/VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_SQ8_SQ8.h @@ -9,8 +9,11 @@ #pragma once #include "VecSim/spaces/space_includes.h" #include "VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_UINT8.h" +#include "VecSim/types/sq8.h" #include +using sq8 = vecsim_types::sq8; + /** * SQ8-to-SQ8 distance functions using AVX512 VNNI with precomputed sum. * These functions compute distance between two SQ8 (scalar quantized 8-bit) vectors, @@ -45,14 +48,14 @@ float SQ8_SQ8_InnerProductImp(const void *pVec1v, const void *pVec2v, size_t dim const uint8_t *pVec2 = static_cast(pVec2v); const float *params1 = reinterpret_cast(pVec1 + dimension); - const float min1 = params1[0]; - const float delta1 = params1[1]; - const float sum1 = params1[2]; // Precomputed sum of original float elements + const float min1 = params1[sq8::MIN_VAL]; + const float delta1 = params1[sq8::DELTA]; + const float sum1 = params1[sq8::SUM]; // Precomputed sum of original float elements const float *params2 = reinterpret_cast(pVec2 + dimension); - const float min2 = params2[0]; - const float delta2 = params2[1]; - const float sum2 = params2[2]; // Precomputed sum of original float elements + const float min2 = params2[sq8::MIN_VAL]; + const float delta2 = params2[sq8::DELTA]; + const float sum2 = params2[sq8::SUM]; // Precomputed sum of original float elements // Apply the algebraic formula using precomputed sums: // IP = min1*sum2 + min2*sum1 + δ1*δ2 * Σ(q1[i]*q2[i]) - dim*min1*min2 diff --git a/src/VecSim/spaces/IP/IP_NEON_DOTPROD_SQ8_SQ8.h b/src/VecSim/spaces/IP/IP_NEON_DOTPROD_SQ8_SQ8.h index 7b2ed8829..7a122974f 100644 --- a/src/VecSim/spaces/IP/IP_NEON_DOTPROD_SQ8_SQ8.h +++ b/src/VecSim/spaces/IP/IP_NEON_DOTPROD_SQ8_SQ8.h @@ -9,8 +9,11 @@ #pragma once #include "VecSim/spaces/space_includes.h" #include "VecSim/spaces/IP/IP_NEON_DOTPROD_UINT8.h" +#include "VecSim/types/sq8.h" #include +using sq8 = vecsim_types::sq8; + /** * SQ8-to-SQ8 distance functions using ARM NEON DOTPROD with precomputed sum. * These functions compute distance between two SQ8 (scalar quantized 8-bit) vectors, @@ -46,14 +49,14 @@ float SQ8_SQ8_InnerProductSIMD64_NEON_DOTPROD_IMP(const void *pVec1v, const void const uint8_t *pVec2 = static_cast(pVec2v); const float *params1 = reinterpret_cast(pVec1 + dimension); - const float min1 = params1[0]; - const float delta1 = params1[1]; - const float sum1 = params1[2]; // Precomputed sum of original float elements + const float min1 = params1[sq8::MIN_VAL]; + const float delta1 = params1[sq8::DELTA]; + const float sum1 = params1[sq8::SUM]; // Precomputed sum of original float elements const float *params2 = reinterpret_cast(pVec2 + dimension); - const float min2 = params2[0]; - const float delta2 = params2[1]; - const float sum2 = params2[2]; // Precomputed sum of original float elements + const float min2 = params2[sq8::MIN_VAL]; + const float delta2 = params2[sq8::DELTA]; + const float sum2 = params2[sq8::SUM]; // Precomputed sum of original float elements // Apply algebraic formula using precomputed sums: // IP = min1*sum2 + min2*sum1 + δ1*δ2 * Σ(q1*q2) - dim*min1*min2 diff --git a/src/VecSim/spaces/IP/IP_NEON_SQ8_SQ8.h b/src/VecSim/spaces/IP/IP_NEON_SQ8_SQ8.h index 8d6cbd650..b89586322 100644 --- a/src/VecSim/spaces/IP/IP_NEON_SQ8_SQ8.h +++ b/src/VecSim/spaces/IP/IP_NEON_SQ8_SQ8.h @@ -9,8 +9,11 @@ #pragma once #include "VecSim/spaces/space_includes.h" #include "VecSim/spaces/IP/IP_NEON_UINT8.h" +#include "VecSim/types/sq8.h" #include +using sq8 = vecsim_types::sq8; + /** * SQ8-to-SQ8 distance functions using ARM NEON with precomputed sum. * These functions compute distance between two SQ8 (scalar quantized 8-bit) vectors, @@ -46,15 +49,15 @@ float SQ8_SQ8_InnerProductSIMD64_NEON_IMP(const void *pVec1v, const void *pVec2v const uint8_t *pVec2 = static_cast(pVec2v); const float *params1 = reinterpret_cast(pVec1 + dimension); - const float min1 = params1[0]; - const float delta1 = params1[1]; - const float sum1 = params1[2]; // Precomputed sum of original float elements + const float min1 = params1[sq8::MIN_VAL]; + const float delta1 = params1[sq8::DELTA]; + const float sum1 = params1[sq8::SUM]; // Precomputed sum of original float elements // Get dequantization parameters and precomputed values from the end of pVec2 const float *params2 = reinterpret_cast(pVec2 + dimension); - const float min2 = params2[0]; - const float delta2 = params2[1]; - const float sum2 = params2[2]; // Precomputed sum of original float elements + const float min2 = params2[sq8::MIN_VAL]; + const float delta2 = params2[sq8::DELTA]; + const float sum2 = params2[sq8::SUM]; // Precomputed sum of original float elements // Apply algebraic formula using precomputed sums: // IP = min1*sum2 + min2*sum1 + δ1*δ2 * Σ(q1*q2) - dim*min1*min2 diff --git a/src/VecSim/spaces/IP/IP_SVE_SQ8_SQ8.h b/src/VecSim/spaces/IP/IP_SVE_SQ8_SQ8.h index e0369f5b7..a752817dd 100644 --- a/src/VecSim/spaces/IP/IP_SVE_SQ8_SQ8.h +++ b/src/VecSim/spaces/IP/IP_SVE_SQ8_SQ8.h @@ -9,8 +9,11 @@ #pragma once #include "VecSim/spaces/space_includes.h" #include "VecSim/spaces/IP/IP_SVE_UINT8.h" +#include "VecSim/types/sq8.h" #include +using sq8 = vecsim_types::sq8; + /** * SQ8-to-SQ8 distance functions using ARM SVE with precomputed sum. * These functions compute distance between two SQ8 (scalar quantized 8-bit) vectors, @@ -46,14 +49,14 @@ float SQ8_SQ8_InnerProductSIMD_SVE_IMP(const void *pVec1v, const void *pVec2v, s const uint8_t *pVec2 = static_cast(pVec2v); const float *params1 = reinterpret_cast(pVec1 + dimension); - const float min1 = params1[0]; - const float delta1 = params1[1]; - const float sum1 = params1[2]; // Precomputed sum of original float elements + const float min1 = params1[sq8::MIN_VAL]; + const float delta1 = params1[sq8::DELTA]; + const float sum1 = params1[sq8::SUM]; // Precomputed sum of original float elements const float *params2 = reinterpret_cast(pVec2 + dimension); - const float min2 = params2[0]; - const float delta2 = params2[1]; - const float sum2 = params2[2]; // Precomputed sum of original float elements + const float min2 = params2[sq8::MIN_VAL]; + const float delta2 = params2[sq8::DELTA]; + const float sum2 = params2[sq8::SUM]; // Precomputed sum of original float elements // Apply algebraic formula with float conversion only at the end: // IP = min1*sum2 + min2*sum1 + δ1*δ2 * Σ(q1*q2) - dim*min1*min2 diff --git a/src/VecSim/spaces/L2/L2.cpp b/src/VecSim/spaces/L2/L2.cpp index a629a173c..3d5f7490d 100644 --- a/src/VecSim/spaces/L2/L2.cpp +++ b/src/VecSim/spaces/L2/L2.cpp @@ -10,11 +10,13 @@ #include "VecSim/spaces/IP/IP.h" #include "VecSim/types/bfloat16.h" #include "VecSim/types/float16.h" +#include "VecSim/types/sq8.h" #include #include using bfloat16 = vecsim_types::bfloat16; using float16 = vecsim_types::float16; +using sq8 = vecsim_types::sq8; float SQ8_L2Sqr(const void *pVect1v, const void *pVect2v, size_t dimension) { const auto *pVect1 = static_cast(pVect1v); @@ -149,8 +151,10 @@ float SQ8_SQ8_L2Sqr(const void *pVect1v, const void *pVect2v, size_t dimension) // Get precomputed sum of squares from both vectors // Layout: [uint8_t values (dim)] [min_val] [delta] [sum] [sum_of_squares] - const float sum_sq_1 = *reinterpret_cast(pVect1 + dimension + 3 * sizeof(float)); - const float sum_sq_2 = *reinterpret_cast(pVect2 + dimension + 3 * sizeof(float)); + const float sum_sq_1 = + *reinterpret_cast(pVect1 + dimension + sq8::SUM_SQUARES * sizeof(float)); + const float sum_sq_2 = + *reinterpret_cast(pVect2 + dimension + sq8::SUM_SQUARES * sizeof(float)); // Use the common inner product implementation const float ip = SQ8_SQ8_InnerProduct_Impl(pVect1v, pVect2v, dimension); diff --git a/src/VecSim/spaces/L2/L2_NEON_DOTPROD_SQ8_SQ8.h b/src/VecSim/spaces/L2/L2_NEON_DOTPROD_SQ8_SQ8.h index ce2327afb..7de9f336a 100644 --- a/src/VecSim/spaces/L2/L2_NEON_DOTPROD_SQ8_SQ8.h +++ b/src/VecSim/spaces/L2/L2_NEON_DOTPROD_SQ8_SQ8.h @@ -9,6 +9,9 @@ #pragma once #include "VecSim/spaces/space_includes.h" #include "VecSim/spaces/IP/IP_NEON_DOTPROD_SQ8_SQ8.h" +#include "VecSim/types/sq8.h" + +using sq8 = vecsim_types::sq8; /** * SQ8-to-SQ8 L2 squared distance functions for NEON with DOTPROD extension. @@ -34,8 +37,10 @@ float SQ8_SQ8_L2SqrSIMD64_NEON_DOTPROD(const void *pVec1v, const void *pVec2v, s // Get precomputed sum of squares from both vectors // Layout: [uint8_t values (dim)] [min_val] [delta] [sum] [sum_of_squares] - const float sum_sq_1 = *reinterpret_cast(pVec1 + dimension + 3 * sizeof(float)); - const float sum_sq_2 = *reinterpret_cast(pVec2 + dimension + 3 * sizeof(float)); + const float sum_sq_1 = + *reinterpret_cast(pVec1 + dimension + sq8::SUM_SQUARES * sizeof(float)); + const float sum_sq_2 = + *reinterpret_cast(pVec2 + dimension + sq8::SUM_SQUARES * sizeof(float)); // L2² = ||x||² + ||y||² - 2*IP(x, y) return sum_sq_1 + sum_sq_2 - 2.0f * ip; diff --git a/src/VecSim/spaces/L2/L2_NEON_SQ8_SQ8.h b/src/VecSim/spaces/L2/L2_NEON_SQ8_SQ8.h index 945248413..e86838404 100644 --- a/src/VecSim/spaces/L2/L2_NEON_SQ8_SQ8.h +++ b/src/VecSim/spaces/L2/L2_NEON_SQ8_SQ8.h @@ -9,6 +9,9 @@ #pragma once #include "VecSim/spaces/space_includes.h" #include "VecSim/spaces/IP/IP_NEON_SQ8_SQ8.h" +#include "VecSim/types/sq8.h" + +using sq8 = vecsim_types::sq8; /** * SQ8-to-SQ8 L2 squared distance functions for NEON. @@ -33,8 +36,10 @@ float SQ8_SQ8_L2SqrSIMD64_NEON(const void *pVec1v, const void *pVec2v, size_t di // Get precomputed sum of squares from both vectors // Layout: [uint8_t values (dim)] [min_val] [delta] [sum] [sum_of_squares] - const float sum_sq_1 = *reinterpret_cast(pVec1 + dimension + 3 * sizeof(float)); - const float sum_sq_2 = *reinterpret_cast(pVec2 + dimension + 3 * sizeof(float)); + const float sum_sq_1 = + *reinterpret_cast(pVec1 + dimension + sq8::SUM_SQUARES * sizeof(float)); + const float sum_sq_2 = + *reinterpret_cast(pVec2 + dimension + sq8::SUM_SQUARES * sizeof(float)); // L2² = ||x||² + ||y||² - 2*IP(x, y) return sum_sq_1 + sum_sq_2 - 2.0f * ip; diff --git a/src/VecSim/spaces/L2/L2_SVE_SQ8_SQ8.h b/src/VecSim/spaces/L2/L2_SVE_SQ8_SQ8.h index 658cc7a9a..90801f82a 100644 --- a/src/VecSim/spaces/L2/L2_SVE_SQ8_SQ8.h +++ b/src/VecSim/spaces/L2/L2_SVE_SQ8_SQ8.h @@ -9,6 +9,9 @@ #pragma once #include "VecSim/spaces/space_includes.h" #include "VecSim/spaces/IP/IP_SVE_SQ8_SQ8.h" +#include "VecSim/types/sq8.h" + +using sq8 = vecsim_types::sq8; /** * SQ8-to-SQ8 L2 squared distance functions for SVE. @@ -34,8 +37,10 @@ float SQ8_SQ8_L2SqrSIMD_SVE(const void *pVec1v, const void *pVec2v, size_t dimen // Get precomputed sum of squares from both vectors // Layout: [uint8_t values (dim)] [min_val] [delta] [sum] [sum_of_squares] - const float sum_sq_1 = *reinterpret_cast(pVec1 + dimension + 3 * sizeof(float)); - const float sum_sq_2 = *reinterpret_cast(pVec2 + dimension + 3 * sizeof(float)); + const float sum_sq_1 = + *reinterpret_cast(pVec1 + dimension + sq8::SUM_SQUARES * sizeof(float)); + const float sum_sq_2 = + *reinterpret_cast(pVec2 + dimension + sq8::SUM_SQUARES * sizeof(float)); // L2² = ||x||² + ||y||² - 2*IP(x, y) return sum_sq_1 + sum_sq_2 - 2.0f * ip; diff --git a/src/VecSim/spaces/computer/preprocessors.h b/src/VecSim/spaces/computer/preprocessors.h index 9dd3d6e1a..68d62bac1 100644 --- a/src/VecSim/spaces/computer/preprocessors.h +++ b/src/VecSim/spaces/computer/preprocessors.h @@ -18,6 +18,7 @@ #include "VecSim/memory/vecsim_base.h" #include "VecSim/spaces/spaces.h" #include "VecSim/memory/memory_utils.h" +#include "VecSim/types/sq8.h" class PreprocessorInterface : public VecsimBaseObject { public: @@ -216,10 +217,8 @@ class CosinePreprocessor : public PreprocessorInterface { template class QuantPreprocessor : public PreprocessorInterface { using OUTPUT_TYPE = uint8_t; + using sq8 = vecsim_types::sq8; - // For L2: store sum + sum_of_squares (2 extra values) - // For IP/Cosine: store only sum (1 extra value) - static constexpr size_t extra_storage_values_count = (Metric == VecSimMetric_L2) ? 2 : 1; static_assert(Metric == VecSimMetric_L2 || Metric == VecSimMetric_IP || Metric == VecSimMetric_Cosine, "QuantPreprocessor only supports L2, IP and Cosine metrics"); @@ -294,13 +293,13 @@ class QuantPreprocessor : public PreprocessorInterface { DataType *metadata = reinterpret_cast(quantized + this->dim); // Store min_val, delta, in the metadata - metadata[0] = min_val; - metadata[1] = delta; + metadata[sq8::MIN_VAL] = min_val; + metadata[sq8::DELTA] = delta; // Store sum (for all metrics) and sum_squares (for L2 only) - metadata[2] = sum; + metadata[sq8::SUM] = sum; if constexpr (Metric == VecSimMetric_L2) { - metadata[3] = sum_squares; + metadata[sq8::SUM_SQUARES] = sum_squares; } } @@ -352,7 +351,7 @@ class QuantPreprocessor : public PreprocessorInterface { QuantPreprocessor(std::shared_ptr allocator, size_t dim) : PreprocessorInterface(allocator), dim(dim), storage_bytes_count(dim * sizeof(OUTPUT_TYPE) + - (2 + extra_storage_values_count) * sizeof(DataType)), + (vecsim_types::sq8::metadata_count()) * sizeof(DataType)), query_bytes_count((dim + 1) * sizeof(DataType)) { static_assert(std::is_floating_point_v, "QuantPreprocessor only supports floating-point types"); diff --git a/src/VecSim/types/sq8.h b/src/VecSim/types/sq8.h index a2bf738e0..bb5e394d2 100644 --- a/src/VecSim/types/sq8.h +++ b/src/VecSim/types/sq8.h @@ -17,6 +17,20 @@ namespace vecsim_types { // Represents a scalar-quantized 8-bit blob with reconstruction metadata struct sq8 { using value_type = uint8_t; + + // Metadata layout indices (stored after quantized values) + enum MetadataIndex : size_t { + MIN_VAL = 0, + DELTA = 1, + SUM = 2, + SUM_SQUARES = 3 // Only for L2 + }; + + // Template on metric — compile-time constant when metric is known + template + static constexpr size_t metadata_count() { + return (Metric == VecSimMetric_L2) ? 4 : 3; + } }; } // namespace vecsim_types diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp index b7ebfa701..7797544e2 100644 --- a/tests/unit/test_components.cpp +++ b/tests/unit/test_components.cpp @@ -1115,18 +1115,18 @@ TEST(PreprocessorsTest, QuantizationTestAllEntriesEqual) { // Verify metadata: min_val = 3.5f, delta = 1.0f (fallback when diff == 0) const float *metadata = reinterpret_cast(quantized + dim); - ASSERT_FLOAT_EQ(metadata[0], 3.5f); // min_val - ASSERT_FLOAT_EQ(metadata[1], 1.0f); // delta (fallback) + ASSERT_FLOAT_EQ(metadata[sq8::MIN_VAL], 3.5f); // min_val + ASSERT_FLOAT_EQ(metadata[sq8::DELTA], 1.0f); // delta (fallback) // Verify sum and sum_squares for L2 metric float expected_sum = 3.5f * dim; float expected_sum_squares = 3.5f * 3.5f * dim; - ASSERT_FLOAT_EQ(metadata[2], expected_sum); // sum - ASSERT_FLOAT_EQ(metadata[3], expected_sum_squares); // sum_squares + ASSERT_FLOAT_EQ(metadata[sq8::SUM], expected_sum); // sum + ASSERT_FLOAT_EQ(metadata[sq8::SUM_SQUARES], expected_sum_squares); // sum_squares // Reconstruct and verify: min + quantized * delta = 3.5 + 0 * 1 = 3.5 for (size_t i = 0; i < dim; ++i) { - float reconstructed = metadata[0] + quantized[i] * metadata[1]; + float reconstructed = metadata[sq8::MIN_VAL] + quantized[i] * metadata[sq8::DELTA]; ASSERT_FLOAT_EQ(reconstructed, original_blob[i]); } diff --git a/tests/unit/unit_test_utils.h b/tests/unit/unit_test_utils.h index 902583964..a61682902 100644 --- a/tests/unit/unit_test_utils.h +++ b/tests/unit/unit_test_utils.h @@ -16,9 +16,12 @@ #include "VecSim/vec_sim.h" #include "VecSim/algorithms/hnsw/hnsw_tiered.h" +#include "VecSim/types/sq8.h" #include "mock_thread_pool.h" #include "gtest/gtest.h" +using sq8 = vecsim_types::sq8; + // IndexType is used to define indices unit tests template struct IndexType { @@ -249,10 +252,10 @@ inline void ComputeSQ8Quantization(const float *original_blob, size_t dim, uint8 // Store metadata: min_val, delta, sum, sum_squares float *metadata = reinterpret_cast(output + dim); - metadata[0] = min_val; - metadata[1] = delta; - metadata[2] = sum; - metadata[3] = sum_squares; + metadata[sq8::MIN_VAL] = min_val; + metadata[sq8::DELTA] = delta; + metadata[sq8::SUM] = sum; + metadata[sq8::SUM_SQUARES] = sum_squares; } // TODO: Move all test_utils to this namespace diff --git a/tests/utils/tests_utils.h b/tests/utils/tests_utils.h index d288a2a94..4f1bf33e8 100644 --- a/tests/utils/tests_utils.h +++ b/tests/utils/tests_utils.h @@ -13,6 +13,9 @@ #include "VecSim/spaces/normalize/compute_norm.h" #include "VecSim/spaces/spaces.h" #include "VecSim/types/float16.h" +#include "VecSim/types/sq8.h" + +using sq8 = vecsim_types::sq8; namespace test_utils { @@ -164,10 +167,10 @@ static void quantize_float_vec_to_sq8_with_metadata(const float *v, size_t dim, // Store parameters: [min, delta, sum, square_sum] float *params = reinterpret_cast(qv + dim); - params[0] = min_val; - params[1] = delta; - params[2] = sum; - params[3] = square_sum; + params[sq8::MIN_VAL] = min_val; + params[sq8::DELTA] = delta; + params[sq8::SUM] = sum; + params[sq8::SUM_SQUARES] = square_sum; } /**