Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions src/VecSim/spaces/IP/IP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@
#include "IP.h"
#include "VecSim/types/bfloat16.h"
#include "VecSim/types/float16.h"
#include "VecSim/types/sq8.h"
#include <cstring>

using bfloat16 = vecsim_types::bfloat16;
using float16 = vecsim_types::float16;
using sq8 = vecsim_types::sq8;

float FLOAT_INTEGER_InnerProduct(const float *pVect1v, const uint8_t *pVect2v, size_t dimension,
float min_val, float delta) {
Expand Down Expand Up @@ -63,14 +65,16 @@ float SQ8_SQ8_InnerProduct_Impl(const void *pVect1v, const void *pVect2v, size_t
}

// Get quantization parameters from pVect1
const float min_val1 = *reinterpret_cast<const float *>(pVect1 + dimension);
const float delta1 = *reinterpret_cast<const float *>(pVect1 + dimension + sizeof(float));
const float sum1 = *reinterpret_cast<const float *>(pVect1 + dimension + 2 * sizeof(float));
const float *params1 = reinterpret_cast<const float *>(pVect1 + dimension);
const float min_val1 = params1[sq8::MIN_VAL];
const float delta1 = params1[sq8::DELTA];
const float sum1 = params1[sq8::SUM];

// Get quantization parameters from pVect2
const float min_val2 = *reinterpret_cast<const float *>(pVect2 + dimension);
const float delta2 = *reinterpret_cast<const float *>(pVect2 + dimension + sizeof(float));
const float sum2 = *reinterpret_cast<const float *>(pVect2 + dimension + 2 * sizeof(float));
const float *params2 = reinterpret_cast<const float *>(pVect2 + dimension);
const float min_val2 = params2[sq8::MIN_VAL];
const float delta2 = params2[sq8::DELTA];
const float sum2 = params2[sq8::SUM];

// Apply the algebraic formula using precomputed sums:
// IP = min1*sum2 + min2*sum1 + delta1*delta2*Σ(q1[i]*q2[i]) - dim*min1*min2
Expand Down
15 changes: 9 additions & 6 deletions src/VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_SQ8_SQ8.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@
#pragma once
#include "VecSim/spaces/space_includes.h"
#include "VecSim/spaces/IP/IP_AVX512F_BW_VL_VNNI_UINT8.h"
#include "VecSim/types/sq8.h"
#include <immintrin.h>

using sq8 = vecsim_types::sq8;

/**
* SQ8-to-SQ8 distance functions using AVX512 VNNI with precomputed sum.
* These functions compute distance between two SQ8 (scalar quantized 8-bit) vectors,
Expand Down Expand Up @@ -45,14 +48,14 @@ float SQ8_SQ8_InnerProductImp(const void *pVec1v, const void *pVec2v, size_t dim
const uint8_t *pVec2 = static_cast<const uint8_t *>(pVec2v);

const float *params1 = reinterpret_cast<const float *>(pVec1 + dimension);
const float min1 = params1[0];
const float delta1 = params1[1];
const float sum1 = params1[2]; // Precomputed sum of original float elements
const float min1 = params1[sq8::MIN_VAL];
const float delta1 = params1[sq8::DELTA];
const float sum1 = params1[sq8::SUM]; // Precomputed sum of original float elements

const float *params2 = reinterpret_cast<const float *>(pVec2 + dimension);
const float min2 = params2[0];
const float delta2 = params2[1];
const float sum2 = params2[2]; // Precomputed sum of original float elements
const float min2 = params2[sq8::MIN_VAL];
const float delta2 = params2[sq8::DELTA];
const float sum2 = params2[sq8::SUM]; // Precomputed sum of original float elements

// Apply the algebraic formula using precomputed sums:
// IP = min1*sum2 + min2*sum1 + δ1*δ2 * Σ(q1[i]*q2[i]) - dim*min1*min2
Expand Down
15 changes: 9 additions & 6 deletions src/VecSim/spaces/IP/IP_NEON_DOTPROD_SQ8_SQ8.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@
#pragma once
#include "VecSim/spaces/space_includes.h"
#include "VecSim/spaces/IP/IP_NEON_DOTPROD_UINT8.h"
#include "VecSim/types/sq8.h"
#include <arm_neon.h>

using sq8 = vecsim_types::sq8;

/**
* SQ8-to-SQ8 distance functions using ARM NEON DOTPROD with precomputed sum.
* These functions compute distance between two SQ8 (scalar quantized 8-bit) vectors,
Expand Down Expand Up @@ -46,14 +49,14 @@ float SQ8_SQ8_InnerProductSIMD64_NEON_DOTPROD_IMP(const void *pVec1v, const void
const uint8_t *pVec2 = static_cast<const uint8_t *>(pVec2v);

const float *params1 = reinterpret_cast<const float *>(pVec1 + dimension);
const float min1 = params1[0];
const float delta1 = params1[1];
const float sum1 = params1[2]; // Precomputed sum of original float elements
const float min1 = params1[sq8::MIN_VAL];
const float delta1 = params1[sq8::DELTA];
const float sum1 = params1[sq8::SUM]; // Precomputed sum of original float elements

const float *params2 = reinterpret_cast<const float *>(pVec2 + dimension);
const float min2 = params2[0];
const float delta2 = params2[1];
const float sum2 = params2[2]; // Precomputed sum of original float elements
const float min2 = params2[sq8::MIN_VAL];
const float delta2 = params2[sq8::DELTA];
const float sum2 = params2[sq8::SUM]; // Precomputed sum of original float elements

// Apply algebraic formula using precomputed sums:
// IP = min1*sum2 + min2*sum1 + δ1*δ2 * Σ(q1*q2) - dim*min1*min2
Expand Down
15 changes: 9 additions & 6 deletions src/VecSim/spaces/IP/IP_NEON_SQ8_SQ8.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@
#pragma once
#include "VecSim/spaces/space_includes.h"
#include "VecSim/spaces/IP/IP_NEON_UINT8.h"
#include "VecSim/types/sq8.h"
#include <arm_neon.h>

using sq8 = vecsim_types::sq8;

/**
* SQ8-to-SQ8 distance functions using ARM NEON with precomputed sum.
* These functions compute distance between two SQ8 (scalar quantized 8-bit) vectors,
Expand Down Expand Up @@ -46,15 +49,15 @@ float SQ8_SQ8_InnerProductSIMD64_NEON_IMP(const void *pVec1v, const void *pVec2v
const uint8_t *pVec2 = static_cast<const uint8_t *>(pVec2v);

const float *params1 = reinterpret_cast<const float *>(pVec1 + dimension);
const float min1 = params1[0];
const float delta1 = params1[1];
const float sum1 = params1[2]; // Precomputed sum of original float elements
const float min1 = params1[sq8::MIN_VAL];
const float delta1 = params1[sq8::DELTA];
const float sum1 = params1[sq8::SUM]; // Precomputed sum of original float elements

// Get dequantization parameters and precomputed values from the end of pVec2
const float *params2 = reinterpret_cast<const float *>(pVec2 + dimension);
const float min2 = params2[0];
const float delta2 = params2[1];
const float sum2 = params2[2]; // Precomputed sum of original float elements
const float min2 = params2[sq8::MIN_VAL];
const float delta2 = params2[sq8::DELTA];
const float sum2 = params2[sq8::SUM]; // Precomputed sum of original float elements

// Apply algebraic formula using precomputed sums:
// IP = min1*sum2 + min2*sum1 + δ1*δ2 * Σ(q1*q2) - dim*min1*min2
Expand Down
15 changes: 9 additions & 6 deletions src/VecSim/spaces/IP/IP_SVE_SQ8_SQ8.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@
#pragma once
#include "VecSim/spaces/space_includes.h"
#include "VecSim/spaces/IP/IP_SVE_UINT8.h"
#include "VecSim/types/sq8.h"
#include <arm_sve.h>

using sq8 = vecsim_types::sq8;

/**
* SQ8-to-SQ8 distance functions using ARM SVE with precomputed sum.
* These functions compute distance between two SQ8 (scalar quantized 8-bit) vectors,
Expand Down Expand Up @@ -46,14 +49,14 @@ float SQ8_SQ8_InnerProductSIMD_SVE_IMP(const void *pVec1v, const void *pVec2v, s
const uint8_t *pVec2 = static_cast<const uint8_t *>(pVec2v);

const float *params1 = reinterpret_cast<const float *>(pVec1 + dimension);
const float min1 = params1[0];
const float delta1 = params1[1];
const float sum1 = params1[2]; // Precomputed sum of original float elements
const float min1 = params1[sq8::MIN_VAL];
const float delta1 = params1[sq8::DELTA];
const float sum1 = params1[sq8::SUM]; // Precomputed sum of original float elements

const float *params2 = reinterpret_cast<const float *>(pVec2 + dimension);
const float min2 = params2[0];
const float delta2 = params2[1];
const float sum2 = params2[2]; // Precomputed sum of original float elements
const float min2 = params2[sq8::MIN_VAL];
const float delta2 = params2[sq8::DELTA];
const float sum2 = params2[sq8::SUM]; // Precomputed sum of original float elements

// Apply algebraic formula with float conversion only at the end:
// IP = min1*sum2 + min2*sum1 + δ1*δ2 * Σ(q1*q2) - dim*min1*min2
Expand Down
8 changes: 6 additions & 2 deletions src/VecSim/spaces/L2/L2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@
#include "VecSim/spaces/IP/IP.h"
#include "VecSim/types/bfloat16.h"
#include "VecSim/types/float16.h"
#include "VecSim/types/sq8.h"
#include <cstring>
#include <iostream>

using bfloat16 = vecsim_types::bfloat16;
using float16 = vecsim_types::float16;
using sq8 = vecsim_types::sq8;

float SQ8_L2Sqr(const void *pVect1v, const void *pVect2v, size_t dimension) {
const auto *pVect1 = static_cast<const float *>(pVect1v);
Expand Down Expand Up @@ -149,8 +151,10 @@ float SQ8_SQ8_L2Sqr(const void *pVect1v, const void *pVect2v, size_t dimension)

// Get precomputed sum of squares from both vectors
// Layout: [uint8_t values (dim)] [min_val] [delta] [sum] [sum_of_squares]
const float sum_sq_1 = *reinterpret_cast<const float *>(pVect1 + dimension + 3 * sizeof(float));
const float sum_sq_2 = *reinterpret_cast<const float *>(pVect2 + dimension + 3 * sizeof(float));
const float sum_sq_1 =
*reinterpret_cast<const float *>(pVect1 + dimension + sq8::SUM_SQUARES * sizeof(float));
const float sum_sq_2 =
*reinterpret_cast<const float *>(pVect2 + dimension + sq8::SUM_SQUARES * sizeof(float));

// Use the common inner product implementation
const float ip = SQ8_SQ8_InnerProduct_Impl(pVect1v, pVect2v, dimension);
Expand Down
9 changes: 7 additions & 2 deletions src/VecSim/spaces/L2/L2_NEON_DOTPROD_SQ8_SQ8.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
#pragma once
#include "VecSim/spaces/space_includes.h"
#include "VecSim/spaces/IP/IP_NEON_DOTPROD_SQ8_SQ8.h"
#include "VecSim/types/sq8.h"

using sq8 = vecsim_types::sq8;

/**
* SQ8-to-SQ8 L2 squared distance functions for NEON with DOTPROD extension.
Expand All @@ -34,8 +37,10 @@ float SQ8_SQ8_L2SqrSIMD64_NEON_DOTPROD(const void *pVec1v, const void *pVec2v, s

// Get precomputed sum of squares from both vectors
// Layout: [uint8_t values (dim)] [min_val] [delta] [sum] [sum_of_squares]
const float sum_sq_1 = *reinterpret_cast<const float *>(pVec1 + dimension + 3 * sizeof(float));
const float sum_sq_2 = *reinterpret_cast<const float *>(pVec2 + dimension + 3 * sizeof(float));
const float sum_sq_1 =
*reinterpret_cast<const float *>(pVec1 + dimension + sq8::SUM_SQUARES * sizeof(float));
const float sum_sq_2 =
*reinterpret_cast<const float *>(pVec2 + dimension + sq8::SUM_SQUARES * sizeof(float));

// L2² = ||x||² + ||y||² - 2*IP(x, y)
return sum_sq_1 + sum_sq_2 - 2.0f * ip;
Expand Down
9 changes: 7 additions & 2 deletions src/VecSim/spaces/L2/L2_NEON_SQ8_SQ8.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
#pragma once
#include "VecSim/spaces/space_includes.h"
#include "VecSim/spaces/IP/IP_NEON_SQ8_SQ8.h"
#include "VecSim/types/sq8.h"

using sq8 = vecsim_types::sq8;

/**
* SQ8-to-SQ8 L2 squared distance functions for NEON.
Expand All @@ -33,8 +36,10 @@ float SQ8_SQ8_L2SqrSIMD64_NEON(const void *pVec1v, const void *pVec2v, size_t di

// Get precomputed sum of squares from both vectors
// Layout: [uint8_t values (dim)] [min_val] [delta] [sum] [sum_of_squares]
const float sum_sq_1 = *reinterpret_cast<const float *>(pVec1 + dimension + 3 * sizeof(float));
const float sum_sq_2 = *reinterpret_cast<const float *>(pVec2 + dimension + 3 * sizeof(float));
const float sum_sq_1 =
*reinterpret_cast<const float *>(pVec1 + dimension + sq8::SUM_SQUARES * sizeof(float));
const float sum_sq_2 =
*reinterpret_cast<const float *>(pVec2 + dimension + sq8::SUM_SQUARES * sizeof(float));

// L2² = ||x||² + ||y||² - 2*IP(x, y)
return sum_sq_1 + sum_sq_2 - 2.0f * ip;
Expand Down
9 changes: 7 additions & 2 deletions src/VecSim/spaces/L2/L2_SVE_SQ8_SQ8.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
#pragma once
#include "VecSim/spaces/space_includes.h"
#include "VecSim/spaces/IP/IP_SVE_SQ8_SQ8.h"
#include "VecSim/types/sq8.h"

using sq8 = vecsim_types::sq8;

/**
* SQ8-to-SQ8 L2 squared distance functions for SVE.
Expand All @@ -34,8 +37,10 @@ float SQ8_SQ8_L2SqrSIMD_SVE(const void *pVec1v, const void *pVec2v, size_t dimen

// Get precomputed sum of squares from both vectors
// Layout: [uint8_t values (dim)] [min_val] [delta] [sum] [sum_of_squares]
const float sum_sq_1 = *reinterpret_cast<const float *>(pVec1 + dimension + 3 * sizeof(float));
const float sum_sq_2 = *reinterpret_cast<const float *>(pVec2 + dimension + 3 * sizeof(float));
const float sum_sq_1 =
*reinterpret_cast<const float *>(pVec1 + dimension + sq8::SUM_SQUARES * sizeof(float));
const float sum_sq_2 =
*reinterpret_cast<const float *>(pVec2 + dimension + sq8::SUM_SQUARES * sizeof(float));

// L2² = ||x||² + ||y||² - 2*IP(x, y)
return sum_sq_1 + sum_sq_2 - 2.0f * ip;
Expand Down
15 changes: 7 additions & 8 deletions src/VecSim/spaces/computer/preprocessors.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "VecSim/memory/vecsim_base.h"
#include "VecSim/spaces/spaces.h"
#include "VecSim/memory/memory_utils.h"
#include "VecSim/types/sq8.h"

class PreprocessorInterface : public VecsimBaseObject {
public:
Expand Down Expand Up @@ -216,10 +217,8 @@ class CosinePreprocessor : public PreprocessorInterface {
template <typename DataType, VecSimMetric Metric>
class QuantPreprocessor : public PreprocessorInterface {
using OUTPUT_TYPE = uint8_t;
using sq8 = vecsim_types::sq8;

// For L2: store sum + sum_of_squares (2 extra values)
// For IP/Cosine: store only sum (1 extra value)
static constexpr size_t extra_storage_values_count = (Metric == VecSimMetric_L2) ? 2 : 1;
static_assert(Metric == VecSimMetric_L2 || Metric == VecSimMetric_IP ||
Metric == VecSimMetric_Cosine,
"QuantPreprocessor only supports L2, IP and Cosine metrics");
Expand Down Expand Up @@ -294,13 +293,13 @@ class QuantPreprocessor : public PreprocessorInterface {
DataType *metadata = reinterpret_cast<DataType *>(quantized + this->dim);

// Store min_val, delta, in the metadata
metadata[0] = min_val;
metadata[1] = delta;
metadata[sq8::MIN_VAL] = min_val;
metadata[sq8::DELTA] = delta;

// Store sum (for all metrics) and sum_squares (for L2 only)
metadata[2] = sum;
metadata[sq8::SUM] = sum;
if constexpr (Metric == VecSimMetric_L2) {
metadata[3] = sum_squares;
metadata[sq8::SUM_SQUARES] = sum_squares;
}
}

Expand Down Expand Up @@ -352,7 +351,7 @@ class QuantPreprocessor : public PreprocessorInterface {
QuantPreprocessor(std::shared_ptr<VecSimAllocator> allocator, size_t dim)
: PreprocessorInterface(allocator), dim(dim),
storage_bytes_count(dim * sizeof(OUTPUT_TYPE) +
(2 + extra_storage_values_count) * sizeof(DataType)),
(vecsim_types::sq8::metadata_count<Metric>()) * sizeof(DataType)),
query_bytes_count((dim + 1) * sizeof(DataType)) {
static_assert(std::is_floating_point_v<DataType>,
"QuantPreprocessor only supports floating-point types");
Expand Down
14 changes: 14 additions & 0 deletions src/VecSim/types/sq8.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,20 @@ namespace vecsim_types {
// Represents a scalar-quantized 8-bit blob with reconstruction metadata
struct sq8 {
using value_type = uint8_t;

// Metadata layout indices (stored after quantized values)
enum MetadataIndex : size_t {
MIN_VAL = 0,
DELTA = 1,
SUM = 2,
SUM_SQUARES = 3 // Only for L2
};

// Template on metric — compile-time constant when metric is known
template <VecSimMetric Metric>
static constexpr size_t metadata_count() {
return (Metric == VecSimMetric_L2) ? 4 : 3;
}
};

} // namespace vecsim_types
10 changes: 5 additions & 5 deletions tests/unit/test_components.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1115,18 +1115,18 @@ TEST(PreprocessorsTest, QuantizationTestAllEntriesEqual) {

// Verify metadata: min_val = 3.5f, delta = 1.0f (fallback when diff == 0)
const float *metadata = reinterpret_cast<const float *>(quantized + dim);
ASSERT_FLOAT_EQ(metadata[0], 3.5f); // min_val
ASSERT_FLOAT_EQ(metadata[1], 1.0f); // delta (fallback)
ASSERT_FLOAT_EQ(metadata[sq8::MIN_VAL], 3.5f); // min_val
ASSERT_FLOAT_EQ(metadata[sq8::DELTA], 1.0f); // delta (fallback)

// Verify sum and sum_squares for L2 metric
float expected_sum = 3.5f * dim;
float expected_sum_squares = 3.5f * 3.5f * dim;
ASSERT_FLOAT_EQ(metadata[2], expected_sum); // sum
ASSERT_FLOAT_EQ(metadata[3], expected_sum_squares); // sum_squares
ASSERT_FLOAT_EQ(metadata[sq8::SUM], expected_sum); // sum
ASSERT_FLOAT_EQ(metadata[sq8::SUM_SQUARES], expected_sum_squares); // sum_squares

// Reconstruct and verify: min + quantized * delta = 3.5 + 0 * 1 = 3.5
for (size_t i = 0; i < dim; ++i) {
float reconstructed = metadata[0] + quantized[i] * metadata[1];
float reconstructed = metadata[sq8::MIN_VAL] + quantized[i] * metadata[sq8::DELTA];
ASSERT_FLOAT_EQ(reconstructed, original_blob[i]);
}

Expand Down
11 changes: 7 additions & 4 deletions tests/unit/unit_test_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,12 @@

#include "VecSim/vec_sim.h"
#include "VecSim/algorithms/hnsw/hnsw_tiered.h"
#include "VecSim/types/sq8.h"
#include "mock_thread_pool.h"
#include "gtest/gtest.h"

using sq8 = vecsim_types::sq8;

// IndexType is used to define indices unit tests
template <VecSimType type, typename DataType, typename DistType = DataType>
struct IndexType {
Expand Down Expand Up @@ -249,10 +252,10 @@ inline void ComputeSQ8Quantization(const float *original_blob, size_t dim, uint8

// Store metadata: min_val, delta, sum, sum_squares
float *metadata = reinterpret_cast<float *>(output + dim);
metadata[0] = min_val;
metadata[1] = delta;
metadata[2] = sum;
metadata[3] = sum_squares;
metadata[sq8::MIN_VAL] = min_val;
metadata[sq8::DELTA] = delta;
metadata[sq8::SUM] = sum;
metadata[sq8::SUM_SQUARES] = sum_squares;
}

// TODO: Move all test_utils to this namespace
Expand Down
Loading
Loading