diff --git a/src/VecSim/spaces/computer/preprocessors.h b/src/VecSim/spaces/computer/preprocessors.h index 68d62bac1..5954b3fc1 100644 --- a/src/VecSim/spaces/computer/preprocessors.h +++ b/src/VecSim/spaces/computer/preprocessors.h @@ -169,12 +169,15 @@ class CosinePreprocessor : public PreprocessorInterface { * The query vector is not quantized. It remains as DataType, but we precompute * and store metric-specific values to accelerate asymmetric distance computation: * - For IP/Cosine: y_sum = Σy_i (sum of query values) - * - For L2: y_sum_squares = Σy_i² (sum of squared query values) + * - For L2: y_sum = Σy_i (sum of query values), y_sum_squares = Σy_i² (sum of squared query values) * * Query blob layout: - * | query_values[dim] | y_sum (IP/Cosine) OR y_sum_squares (L2) | + * - For IP/Cosine: | query_values[dim] | y_sum | + * - For L2: | query_values[dim] | y_sum | y_sum_squares | * - * Query blob size: (dim + 1) * sizeof(DataType) + * Query blob size: + * - For IP/Cosine: (dim + 1) * sizeof(DataType) + * - For L2: (dim + 2) * sizeof(DataType) * * === Asymmetric distance (storage x quantized, query y remains float) === * @@ -303,56 +306,65 @@ class QuantPreprocessor : public PreprocessorInterface { } } - DataType sum_fast(const DataType *p) const { + // Computes and assigns query metadata in a single pass over the input vector. + // For IP/Cosine: assigns y_sum = Σy_i + // For L2: assigns y_sum = Σy_i and y_sum_squares = Σy_i² + void assign_query_metadata(const DataType *input, DataType *output_metadata) const { + // 4 independent accumulators for sum DataType s0{}, s1{}, s2{}, s3{}; + // 4 independent accumulators for sum of squares (only used for L2) + DataType q0{}, q1{}, q2{}, q3{}; size_t i = 0; // round dim down to the nearest multiple of 4 size_t dim_round_down = this->dim & ~size_t(3); for (; i < dim_round_down; i += 4) { - s0 += p[i + 0]; - s1 += p[i + 1]; - s2 += p[i + 2]; - s3 += p[i + 3]; - } + const DataType y0 = input[i + 0]; + const DataType y1 = input[i + 1]; + const DataType y2 = input[i + 2]; + const DataType y3 = input[i + 3]; - DataType sum = (s0 + s1) + (s2 + s3); + s0 += y0; + s1 += y1; + s2 += y2; + s3 += y3; - for (; i < dim; ++i) { - sum += p[i]; + if constexpr (Metric == VecSimMetric_L2) { + q0 += y0 * y0; + q1 += y1 * y1; + q2 += y2 * y2; + q3 += y3 * y3; + } } - return sum; - } - DataType sum_squares_fast(const DataType *p) const { - DataType s0{}, s1{}, s2{}, s3{}; - - size_t i = 0; - // round dim down to the nearest multiple of 4 - size_t dim_round_down = this->dim & ~size_t(3); + DataType sum = (s0 + s1) + (s2 + s3); + DataType sum_squares = (q0 + q1) + (q2 + q3); - for (; i < dim_round_down; i += 4) { - s0 += p[i + 0] * p[i + 0]; - s1 += p[i + 1] * p[i + 1]; - s2 += p[i + 2] * p[i + 2]; - s3 += p[i + 3] * p[i + 3]; + // Tail: handle remaining elements + for (; i < this->dim; ++i) { + const DataType y = input[i]; + sum += y; + if constexpr (Metric == VecSimMetric_L2) { + sum_squares += y * y; + } } - DataType sum = (s0 + s1) + (s2 + s3); - - for (; i < dim; ++i) { - sum += p[i] * p[i]; + // Assign the computed metadata + output_metadata[sq8::SUM_QUERY] = sum; // y_sum for all metrics + if constexpr (Metric == VecSimMetric_L2) { + output_metadata[sq8::SUM_SQUARES_QUERY] = sum_squares; // y_sum_squares for L2 only } - return sum; } public: QuantPreprocessor(std::shared_ptr allocator, size_t dim) : PreprocessorInterface(allocator), dim(dim), storage_bytes_count(dim * sizeof(OUTPUT_TYPE) + - (vecsim_types::sq8::metadata_count()) * sizeof(DataType)), - query_bytes_count((dim + 1) * sizeof(DataType)) { + (vecsim_types::sq8::storage_metadata_count()) * + sizeof(DataType)), + query_bytes_count((dim + vecsim_types::sq8::query_metadata_count()) * + sizeof(DataType)) { static_assert(std::is_floating_point_v, "QuantPreprocessor only supports floating-point types"); } @@ -422,12 +434,18 @@ class QuantPreprocessor : public PreprocessorInterface { /** * Preprocesses the query vector for asymmetric distance computation. * - * The query blob contains the original float values followed by a precomputed value: + * The query blob contains the original float values followed by precomputed values: * - For IP/Cosine: y_sum = Σy_i (sum of query values) - * - For L2: y_sum_squares = Σy_i² (sum of squared query values) + * - For L2: y_sum = Σy_i (sum of query values), y_sum_squares = Σy_i² (sum of squared query + * values) + * + * Query blob layout: + * - For IP/Cosine: | query_values[dim] | y_sum | + * - For L2: | query_values[dim] | y_sum | y_sum_squares | * - * Query blob layout: | query_values[dim] | y_sum OR y_sum_squares | - * Query blob size: (dim + 1) * sizeof(DataType) + * Query blob size: + * - For IP/Cosine: (dim + 1) * sizeof(DataType) + * - For L2: (dim + 2) * sizeof(DataType) */ void preprocessQuery(const void *original_blob, void *&blob, size_t &query_blob_size, unsigned char alignment) const override { @@ -437,13 +455,10 @@ class QuantPreprocessor : public PreprocessorInterface { blob = this->allocator->allocate_aligned(this->query_bytes_count, alignment); memcpy(blob, original_blob, this->dim * sizeof(DataType)); const DataType *input = static_cast(original_blob); - // For IP/Cosine, we need to store the sum of the query vector. - if constexpr (Metric == VecSimMetric_IP || Metric == VecSimMetric_Cosine) { - static_cast(blob)[this->dim] = sum_fast(input); - } // For L2, compute the sum of squares. - else if constexpr (Metric == VecSimMetric_L2) { - static_cast(blob)[this->dim] = sum_squares_fast(input); - } + DataType *output = static_cast(blob); + + // Compute and assign query metadata (sum for IP/Cosine, sum and sum_squares for L2) + assign_query_metadata(input, output + this->dim); query_blob_size = this->query_bytes_count; } diff --git a/src/VecSim/types/sq8.h b/src/VecSim/types/sq8.h index bb5e394d2..7d67928d5 100644 --- a/src/VecSim/types/sq8.h +++ b/src/VecSim/types/sq8.h @@ -26,11 +26,22 @@ struct sq8 { SUM_SQUARES = 3 // Only for L2 }; + // TODO: re-order metadata and merge with the above enum + enum QueryMetadataIndex : size_t { + SUM_QUERY = 0, + SUM_SQUARES_QUERY = 1 // Only for L2 + }; + // Template on metric — compile-time constant when metric is known template - static constexpr size_t metadata_count() { + static constexpr size_t storage_metadata_count() { return (Metric == VecSimMetric_L2) ? 4 : 3; } + + template + static constexpr size_t query_metadata_count() { + return (Metric == VecSimMetric_L2) ? 2 : 1; + } }; } // namespace vecsim_types diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp index 7797544e2..bd3c8d642 100644 --- a/tests/unit/test_components.cpp +++ b/tests/unit/test_components.cpp @@ -999,16 +999,21 @@ TEST(PreprocessorsTest, QuantizationTest) { // === Storage blob expected values === // For L2 metric: quantized values + min + delta + sum + sum_squares = dim bytes + 4 floats - constexpr size_t quantized_blob_bytes_count = dim * sizeof(uint8_t) + 4 * sizeof(float); + constexpr size_t quantized_blob_bytes_count = + dim * sizeof(uint8_t) + sq8::storage_metadata_count() * sizeof(float); uint8_t expected_storage_blob[quantized_blob_bytes_count] = {0}; ComputeSQ8Quantization(original_blob, dim, expected_storage_blob); // === Query blob expected values === // Query layout: | query_values[dim] | y_sum_squares (for L2) | - constexpr size_t query_blob_bytes_count = (dim + 1) * sizeof(float); - // Compute expected sum of squares for L2: 1² + 2² + 3² + 4² + 5² + 6² = 91 + constexpr size_t query_blob_bytes_count = + (dim + sq8::query_metadata_count()) * sizeof(float); + + // Compute expected sum and sum of squares for L2: + float expected_query_sum = 0; float expected_query_sum_squares = 0; for (size_t i = 0; i < dim; ++i) { + expected_query_sum += original_blob[i]; expected_query_sum_squares += original_blob[i] * original_blob[i]; } @@ -1038,7 +1043,8 @@ TEST(PreprocessorsTest, QuantizationTest) { // Verify query blob content const float *query_floats = static_cast(query_blob); EXPECT_NO_FATAL_FAILURE(CompareVectors(query_floats, original_blob, dim)); - ASSERT_FLOAT_EQ(query_floats[dim], expected_query_sum_squares); + ASSERT_FLOAT_EQ(query_floats[dim + sq8::SUM_QUERY], expected_query_sum); + ASSERT_FLOAT_EQ(query_floats[dim + sq8::SUM_SQUARES_QUERY], expected_query_sum_squares); } // Test preprocessForStorage @@ -1060,7 +1066,8 @@ TEST(PreprocessorsTest, QuantizationTest) { // Verify query blob content: original floats followed by sum_squares const float *query_floats = static_cast(query_blob.get()); EXPECT_NO_FATAL_FAILURE(CompareVectors(query_floats, original_blob, dim)); - ASSERT_FLOAT_EQ(query_floats[dim], expected_query_sum_squares); + ASSERT_FLOAT_EQ(query_floats[dim + sq8::SUM_QUERY], expected_query_sum); + ASSERT_FLOAT_EQ(query_floats[dim + sq8::SUM_SQUARES_QUERY], expected_query_sum_squares); // Check address is aligned unsigned char address_alignment = (uintptr_t)(query_blob.get()) % alignment; @@ -1152,8 +1159,9 @@ class QuantPreprocessorMetricTest : public testing::TestWithParam // Storage layout: | quantized_values[dim] | min | delta | sum | (sum_squares for L2) | // L2: dim bytes + 4 floats (min, delta, sum, sum_squares) // IP/Cosine: dim bytes + 3 floats (min, delta, sum) - static size_t getExpectedStorageSize(VecSimMetric metric) { - size_t extra_floats = (metric == VecSimMetric_L2) ? 4 : 3; + template + static size_t getExpectedStorageSize() { + constexpr size_t extra_floats = sq8::storage_metadata_count(); return dim * sizeof(uint8_t) + extra_floats * sizeof(float); } @@ -1161,29 +1169,23 @@ class QuantPreprocessorMetricTest : public testing::TestWithParam // Query layout: | query_values[dim] | y_sum (IP/Cosine) OR y_sum_squares (L2) | // All metrics: (dim + 1) floats - static constexpr size_t getExpectedQuerySize() { return (dim + 1) * sizeof(float); } - - // Compute expected precomputed value for query blob based on metric template - float getExpectedQueryPrecomputedValue() { - float sum = 0; - for (size_t i = 0; i < dim; ++i) { - if constexpr (Metric == VecSimMetric_L2) { - // sum of squares: 1² + 2² + 3² + 4² + 5² = 55 - sum += original_blob[i] * original_blob[i]; - } else { - // sum: 1 + 2 + 3 + 4 + 5 = 15 - sum += original_blob[i]; - } - } - return sum; + static constexpr size_t getExpectedQuerySize() { + return (dim + sq8::query_metadata_count()) * sizeof(float); } // Helper to run quantization test for a specific metric template void runQuantizationTest() { - size_t expected_storage_size = getExpectedStorageSize(Metric); - size_t expected_query_size = getExpectedQuerySize(); + size_t expected_storage_size = getExpectedStorageSize(); + size_t expected_query_size = getExpectedQuerySize(); + + float expected_query_sum = 0; + float expected_query_sum_squares = 0; + for (size_t i = 0; i < dim; ++i) { + expected_query_sum += original_blob[i]; + expected_query_sum_squares += original_blob[i] * original_blob[i]; + } auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, dim); @@ -1221,9 +1223,12 @@ class QuantPreprocessorMetricTest : public testing::TestWithParam const float *query_floats = static_cast(query_blob); EXPECT_NO_FATAL_FAILURE(CompareVectors(query_floats, original_blob, dim)); - // Verify precomputed value (sum for IP/Cosine, sum_squares for L2) - float expected_precomputed = getExpectedQueryPrecomputedValue(); - ASSERT_FLOAT_EQ(query_floats[dim], expected_precomputed); + // Verify precomputed value (sum for IP/Cosine, sum and sum_squares for L2) + ASSERT_FLOAT_EQ(query_floats[dim + sq8::SUM_QUERY], expected_query_sum); + if constexpr (Metric == VecSimMetric_L2) { + ASSERT_FLOAT_EQ(query_floats[dim + sq8::SUM_SQUARES_QUERY], + expected_query_sum_squares); + } allocator->free_allocation(storage_blob); allocator->free_allocation(query_blob); @@ -1254,10 +1259,12 @@ class QuantPreprocessorMetricTest : public testing::TestWithParam const float *query_floats = static_cast(blob); EXPECT_NO_FATAL_FAILURE(CompareVectors(query_floats, original_blob, dim)); - // Verify precomputed value (sum for IP/Cosine, sum_squares for L2) - float expected_precomputed = getExpectedQueryPrecomputedValue(); - ASSERT_FLOAT_EQ(query_floats[dim], expected_precomputed); - + // Verify precomputed value (sum for IP/Cosine, sum and sum_squares for L2) + ASSERT_FLOAT_EQ(query_floats[dim + sq8::SUM_QUERY], expected_query_sum); + if constexpr (Metric == VecSimMetric_L2) { + ASSERT_FLOAT_EQ(query_floats[dim + sq8::SUM_SQUARES_QUERY], + expected_query_sum_squares); + } allocator->free_allocation(blob); }