Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 58 additions & 43 deletions src/VecSim/spaces/computer/preprocessors.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,12 +169,15 @@ class CosinePreprocessor : public PreprocessorInterface {
* The query vector is not quantized. It remains as DataType, but we precompute
* and store metric-specific values to accelerate asymmetric distance computation:
* - For IP/Cosine: y_sum = Σy_i (sum of query values)
* - For L2: y_sum_squares = Σy_i² (sum of squared query values)
* - For L2: y_sum = Σy_i (sum of query values), y_sum_squares = Σy_i² (sum of squared query values)
*
* Query blob layout:
* | query_values[dim] | y_sum (IP/Cosine) OR y_sum_squares (L2) |
* - For IP/Cosine: | query_values[dim] | y_sum |
* - For L2: | query_values[dim] | y_sum | y_sum_squares |
*
* Query blob size: (dim + 1) * sizeof(DataType)
* Query blob size:
* - For IP/Cosine: (dim + 1) * sizeof(DataType)
* - For L2: (dim + 2) * sizeof(DataType)
*
* === Asymmetric distance (storage x quantized, query y remains float) ===
*
Expand Down Expand Up @@ -303,56 +306,65 @@ class QuantPreprocessor : public PreprocessorInterface {
}
}

DataType sum_fast(const DataType *p) const {
// Computes and assigns query metadata in a single pass over the input vector.
// For IP/Cosine: assigns y_sum = Σy_i
// For L2: assigns y_sum = Σy_i and y_sum_squares = Σy_i²
void assign_query_metadata(const DataType *input, DataType *output_metadata) const {
// 4 independent accumulators for sum
DataType s0{}, s1{}, s2{}, s3{};
// 4 independent accumulators for sum of squares (only used for L2)
DataType q0{}, q1{}, q2{}, q3{};

size_t i = 0;
// round dim down to the nearest multiple of 4
size_t dim_round_down = this->dim & ~size_t(3);

for (; i < dim_round_down; i += 4) {
s0 += p[i + 0];
s1 += p[i + 1];
s2 += p[i + 2];
s3 += p[i + 3];
}
const DataType y0 = input[i + 0];
const DataType y1 = input[i + 1];
const DataType y2 = input[i + 2];
const DataType y3 = input[i + 3];

DataType sum = (s0 + s1) + (s2 + s3);
s0 += y0;
s1 += y1;
s2 += y2;
s3 += y3;

for (; i < dim; ++i) {
sum += p[i];
if constexpr (Metric == VecSimMetric_L2) {
q0 += y0 * y0;
q1 += y1 * y1;
q2 += y2 * y2;
q3 += y3 * y3;
}
}
return sum;
}

DataType sum_squares_fast(const DataType *p) const {
DataType s0{}, s1{}, s2{}, s3{};

size_t i = 0;
// round dim down to the nearest multiple of 4
size_t dim_round_down = this->dim & ~size_t(3);
DataType sum = (s0 + s1) + (s2 + s3);
DataType sum_squares = (q0 + q1) + (q2 + q3);

for (; i < dim_round_down; i += 4) {
s0 += p[i + 0] * p[i + 0];
s1 += p[i + 1] * p[i + 1];
s2 += p[i + 2] * p[i + 2];
s3 += p[i + 3] * p[i + 3];
// Tail: handle remaining elements
for (; i < this->dim; ++i) {
const DataType y = input[i];
sum += y;
if constexpr (Metric == VecSimMetric_L2) {
sum_squares += y * y;
}
}

DataType sum = (s0 + s1) + (s2 + s3);

for (; i < dim; ++i) {
sum += p[i] * p[i];
// Assign the computed metadata
output_metadata[sq8::SUM_QUERY] = sum; // y_sum for all metrics
if constexpr (Metric == VecSimMetric_L2) {
output_metadata[sq8::SUM_SQUARES_QUERY] = sum_squares; // y_sum_squares for L2 only
}
return sum;
}

public:
QuantPreprocessor(std::shared_ptr<VecSimAllocator> allocator, size_t dim)
: PreprocessorInterface(allocator), dim(dim),
storage_bytes_count(dim * sizeof(OUTPUT_TYPE) +
(vecsim_types::sq8::metadata_count<Metric>()) * sizeof(DataType)),
query_bytes_count((dim + 1) * sizeof(DataType)) {
(vecsim_types::sq8::storage_metadata_count<Metric>()) *
sizeof(DataType)),
query_bytes_count((dim + vecsim_types::sq8::query_metadata_count<Metric>()) *
sizeof(DataType)) {
static_assert(std::is_floating_point_v<DataType>,
"QuantPreprocessor only supports floating-point types");
}
Expand Down Expand Up @@ -422,12 +434,18 @@ class QuantPreprocessor : public PreprocessorInterface {
/**
* Preprocesses the query vector for asymmetric distance computation.
*
* The query blob contains the original float values followed by a precomputed value:
* The query blob contains the original float values followed by precomputed values:
* - For IP/Cosine: y_sum = Σy_i (sum of query values)
* - For L2: y_sum_squares = Σy_i² (sum of squared query values)
* - For L2: y_sum = Σy_i (sum of query values), y_sum_squares = Σy_i² (sum of squared query
* values)
*
* Query blob layout:
* - For IP/Cosine: | query_values[dim] | y_sum |
* - For L2: | query_values[dim] | y_sum | y_sum_squares |
*
* Query blob layout: | query_values[dim] | y_sum OR y_sum_squares |
* Query blob size: (dim + 1) * sizeof(DataType)
* Query blob size:
* - For IP/Cosine: (dim + 1) * sizeof(DataType)
* - For L2: (dim + 2) * sizeof(DataType)
*/
void preprocessQuery(const void *original_blob, void *&blob, size_t &query_blob_size,
unsigned char alignment) const override {
Expand All @@ -437,13 +455,10 @@ class QuantPreprocessor : public PreprocessorInterface {
blob = this->allocator->allocate_aligned(this->query_bytes_count, alignment);
memcpy(blob, original_blob, this->dim * sizeof(DataType));
const DataType *input = static_cast<const DataType *>(original_blob);
// For IP/Cosine, we need to store the sum of the query vector.
if constexpr (Metric == VecSimMetric_IP || Metric == VecSimMetric_Cosine) {
static_cast<DataType *>(blob)[this->dim] = sum_fast(input);
} // For L2, compute the sum of squares.
else if constexpr (Metric == VecSimMetric_L2) {
static_cast<DataType *>(blob)[this->dim] = sum_squares_fast(input);
}
DataType *output = static_cast<DataType *>(blob);

// Compute and assign query metadata (sum for IP/Cosine, sum and sum_squares for L2)
assign_query_metadata(input, output + this->dim);

query_blob_size = this->query_bytes_count;
}
Expand Down
13 changes: 12 additions & 1 deletion src/VecSim/types/sq8.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,22 @@ struct sq8 {
SUM_SQUARES = 3 // Only for L2
};

// TODO: re-order metadata and merge with the above enum
enum QueryMetadataIndex : size_t {
SUM_QUERY = 0,
SUM_SQUARES_QUERY = 1 // Only for L2
};

// Template on metric — compile-time constant when metric is known
template <VecSimMetric Metric>
static constexpr size_t metadata_count() {
static constexpr size_t storage_metadata_count() {
return (Metric == VecSimMetric_L2) ? 4 : 3;
}

template <VecSimMetric Metric>
static constexpr size_t query_metadata_count() {
return (Metric == VecSimMetric_L2) ? 2 : 1;
}
};

} // namespace vecsim_types
69 changes: 38 additions & 31 deletions tests/unit/test_components.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -999,16 +999,21 @@ TEST(PreprocessorsTest, QuantizationTest) {

// === Storage blob expected values ===
// For L2 metric: quantized values + min + delta + sum + sum_squares = dim bytes + 4 floats
constexpr size_t quantized_blob_bytes_count = dim * sizeof(uint8_t) + 4 * sizeof(float);
constexpr size_t quantized_blob_bytes_count =
dim * sizeof(uint8_t) + sq8::storage_metadata_count<VecSimMetric_L2>() * sizeof(float);
uint8_t expected_storage_blob[quantized_blob_bytes_count] = {0};
ComputeSQ8Quantization(original_blob, dim, expected_storage_blob);

// === Query blob expected values ===
// Query layout: | query_values[dim] | y_sum_squares (for L2) |
constexpr size_t query_blob_bytes_count = (dim + 1) * sizeof(float);
// Compute expected sum of squares for L2: 1² + 2² + 3² + 4² + 5² + 6² = 91
constexpr size_t query_blob_bytes_count =
(dim + sq8::query_metadata_count<VecSimMetric_L2>()) * sizeof(float);

// Compute expected sum and sum of squares for L2:
float expected_query_sum = 0;
float expected_query_sum_squares = 0;
for (size_t i = 0; i < dim; ++i) {
expected_query_sum += original_blob[i];
expected_query_sum_squares += original_blob[i] * original_blob[i];
}

Expand Down Expand Up @@ -1038,7 +1043,8 @@ TEST(PreprocessorsTest, QuantizationTest) {
// Verify query blob content
const float *query_floats = static_cast<const float *>(query_blob);
EXPECT_NO_FATAL_FAILURE(CompareVectors<float>(query_floats, original_blob, dim));
ASSERT_FLOAT_EQ(query_floats[dim], expected_query_sum_squares);
ASSERT_FLOAT_EQ(query_floats[dim + sq8::SUM_QUERY], expected_query_sum);
ASSERT_FLOAT_EQ(query_floats[dim + sq8::SUM_SQUARES_QUERY], expected_query_sum_squares);
}

// Test preprocessForStorage
Expand All @@ -1060,7 +1066,8 @@ TEST(PreprocessorsTest, QuantizationTest) {
// Verify query blob content: original floats followed by sum_squares
const float *query_floats = static_cast<const float *>(query_blob.get());
EXPECT_NO_FATAL_FAILURE(CompareVectors<float>(query_floats, original_blob, dim));
ASSERT_FLOAT_EQ(query_floats[dim], expected_query_sum_squares);
ASSERT_FLOAT_EQ(query_floats[dim + sq8::SUM_QUERY], expected_query_sum);
ASSERT_FLOAT_EQ(query_floats[dim + sq8::SUM_SQUARES_QUERY], expected_query_sum_squares);

// Check address is aligned
unsigned char address_alignment = (uintptr_t)(query_blob.get()) % alignment;
Expand Down Expand Up @@ -1152,38 +1159,33 @@ class QuantPreprocessorMetricTest : public testing::TestWithParam<VecSimMetric>
// Storage layout: | quantized_values[dim] | min | delta | sum | (sum_squares for L2) |
// L2: dim bytes + 4 floats (min, delta, sum, sum_squares)
// IP/Cosine: dim bytes + 3 floats (min, delta, sum)
static size_t getExpectedStorageSize(VecSimMetric metric) {
size_t extra_floats = (metric == VecSimMetric_L2) ? 4 : 3;
template <VecSimMetric Metric>
static size_t getExpectedStorageSize() {
constexpr size_t extra_floats = sq8::storage_metadata_count<Metric>();
return dim * sizeof(uint8_t) + extra_floats * sizeof(float);
}

// === Query blob helpers ===

// Query layout: | query_values[dim] | y_sum (IP/Cosine) OR y_sum_squares (L2) |
// All metrics: (dim + 1) floats
static constexpr size_t getExpectedQuerySize() { return (dim + 1) * sizeof(float); }

// Compute expected precomputed value for query blob based on metric
template <VecSimMetric Metric>
float getExpectedQueryPrecomputedValue() {
float sum = 0;
for (size_t i = 0; i < dim; ++i) {
if constexpr (Metric == VecSimMetric_L2) {
// sum of squares: 1² + 2² + 3² + 4² + 5² = 55
sum += original_blob[i] * original_blob[i];
} else {
// sum: 1 + 2 + 3 + 4 + 5 = 15
sum += original_blob[i];
}
}
return sum;
static constexpr size_t getExpectedQuerySize() {
return (dim + sq8::query_metadata_count<Metric>()) * sizeof(float);
}

// Helper to run quantization test for a specific metric
template <VecSimMetric Metric>
void runQuantizationTest() {
size_t expected_storage_size = getExpectedStorageSize(Metric);
size_t expected_query_size = getExpectedQuerySize();
size_t expected_storage_size = getExpectedStorageSize<Metric>();
size_t expected_query_size = getExpectedQuerySize<Metric>();

float expected_query_sum = 0;
float expected_query_sum_squares = 0;
for (size_t i = 0; i < dim; ++i) {
expected_query_sum += original_blob[i];
expected_query_sum_squares += original_blob[i] * original_blob[i];
}

auto quant_preprocessor = new (allocator) QuantPreprocessor<float, Metric>(allocator, dim);

Expand Down Expand Up @@ -1221,9 +1223,12 @@ class QuantPreprocessorMetricTest : public testing::TestWithParam<VecSimMetric>
const float *query_floats = static_cast<const float *>(query_blob);
EXPECT_NO_FATAL_FAILURE(CompareVectors<float>(query_floats, original_blob, dim));

// Verify precomputed value (sum for IP/Cosine, sum_squares for L2)
float expected_precomputed = getExpectedQueryPrecomputedValue<Metric>();
ASSERT_FLOAT_EQ(query_floats[dim], expected_precomputed);
// Verify precomputed value (sum for IP/Cosine, sum and sum_squares for L2)
ASSERT_FLOAT_EQ(query_floats[dim + sq8::SUM_QUERY], expected_query_sum);
if constexpr (Metric == VecSimMetric_L2) {
ASSERT_FLOAT_EQ(query_floats[dim + sq8::SUM_SQUARES_QUERY],
expected_query_sum_squares);
}

allocator->free_allocation(storage_blob);
allocator->free_allocation(query_blob);
Expand Down Expand Up @@ -1254,10 +1259,12 @@ class QuantPreprocessorMetricTest : public testing::TestWithParam<VecSimMetric>
const float *query_floats = static_cast<const float *>(blob);
EXPECT_NO_FATAL_FAILURE(CompareVectors<float>(query_floats, original_blob, dim));

// Verify precomputed value (sum for IP/Cosine, sum_squares for L2)
float expected_precomputed = getExpectedQueryPrecomputedValue<Metric>();
ASSERT_FLOAT_EQ(query_floats[dim], expected_precomputed);

// Verify precomputed value (sum for IP/Cosine, sum and sum_squares for L2)
ASSERT_FLOAT_EQ(query_floats[dim + sq8::SUM_QUERY], expected_query_sum);
if constexpr (Metric == VecSimMetric_L2) {
ASSERT_FLOAT_EQ(query_floats[dim + sq8::SUM_SQUARES_QUERY],
expected_query_sum_squares);
}
allocator->free_allocation(blob);
}

Expand Down
Loading