Skip to content

Commit f6df960

Browse files
authored
MOD-13451 Add pre-computed sum to L2 query blob in QuantPreprocessor (#883)
Extended the L2 query blob to include both y_sum
1 parent 51d5247 commit f6df960

File tree

3 files changed

+108
-75
lines changed

3 files changed

+108
-75
lines changed

src/VecSim/spaces/computer/preprocessors.h

Lines changed: 58 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -169,12 +169,15 @@ class CosinePreprocessor : public PreprocessorInterface {
169169
* The query vector is not quantized. It remains as DataType, but we precompute
170170
* and store metric-specific values to accelerate asymmetric distance computation:
171171
* - For IP/Cosine: y_sum = Σy_i (sum of query values)
172-
* - For L2: y_sum_squares = Σy_i² (sum of squared query values)
172+
* - For L2: y_sum = Σy_i (sum of query values), y_sum_squares = Σy_i² (sum of squared query values)
173173
*
174174
* Query blob layout:
175-
* | query_values[dim] | y_sum (IP/Cosine) OR y_sum_squares (L2) |
175+
* - For IP/Cosine: | query_values[dim] | y_sum |
176+
* - For L2: | query_values[dim] | y_sum | y_sum_squares |
176177
*
177-
* Query blob size: (dim + 1) * sizeof(DataType)
178+
* Query blob size:
179+
* - For IP/Cosine: (dim + 1) * sizeof(DataType)
180+
* - For L2: (dim + 2) * sizeof(DataType)
178181
*
179182
* === Asymmetric distance (storage x quantized, query y remains float) ===
180183
*
@@ -303,56 +306,65 @@ class QuantPreprocessor : public PreprocessorInterface {
303306
}
304307
}
305308

306-
DataType sum_fast(const DataType *p) const {
309+
// Computes and assigns query metadata in a single pass over the input vector.
310+
// For IP/Cosine: assigns y_sum = Σy_i
311+
// For L2: assigns y_sum = Σy_i and y_sum_squares = Σy_i²
312+
void assign_query_metadata(const DataType *input, DataType *output_metadata) const {
313+
// 4 independent accumulators for sum
307314
DataType s0{}, s1{}, s2{}, s3{};
315+
// 4 independent accumulators for sum of squares (only used for L2)
316+
DataType q0{}, q1{}, q2{}, q3{};
308317

309318
size_t i = 0;
310319
// round dim down to the nearest multiple of 4
311320
size_t dim_round_down = this->dim & ~size_t(3);
312321

313322
for (; i < dim_round_down; i += 4) {
314-
s0 += p[i + 0];
315-
s1 += p[i + 1];
316-
s2 += p[i + 2];
317-
s3 += p[i + 3];
318-
}
323+
const DataType y0 = input[i + 0];
324+
const DataType y1 = input[i + 1];
325+
const DataType y2 = input[i + 2];
326+
const DataType y3 = input[i + 3];
319327

320-
DataType sum = (s0 + s1) + (s2 + s3);
328+
s0 += y0;
329+
s1 += y1;
330+
s2 += y2;
331+
s3 += y3;
321332

322-
for (; i < dim; ++i) {
323-
sum += p[i];
333+
if constexpr (Metric == VecSimMetric_L2) {
334+
q0 += y0 * y0;
335+
q1 += y1 * y1;
336+
q2 += y2 * y2;
337+
q3 += y3 * y3;
338+
}
324339
}
325-
return sum;
326-
}
327340

328-
DataType sum_squares_fast(const DataType *p) const {
329-
DataType s0{}, s1{}, s2{}, s3{};
330-
331-
size_t i = 0;
332-
// round dim down to the nearest multiple of 4
333-
size_t dim_round_down = this->dim & ~size_t(3);
341+
DataType sum = (s0 + s1) + (s2 + s3);
342+
DataType sum_squares = (q0 + q1) + (q2 + q3);
334343

335-
for (; i < dim_round_down; i += 4) {
336-
s0 += p[i + 0] * p[i + 0];
337-
s1 += p[i + 1] * p[i + 1];
338-
s2 += p[i + 2] * p[i + 2];
339-
s3 += p[i + 3] * p[i + 3];
344+
// Tail: handle remaining elements
345+
for (; i < this->dim; ++i) {
346+
const DataType y = input[i];
347+
sum += y;
348+
if constexpr (Metric == VecSimMetric_L2) {
349+
sum_squares += y * y;
350+
}
340351
}
341352

342-
DataType sum = (s0 + s1) + (s2 + s3);
343-
344-
for (; i < dim; ++i) {
345-
sum += p[i] * p[i];
353+
// Assign the computed metadata
354+
output_metadata[sq8::SUM_QUERY] = sum; // y_sum for all metrics
355+
if constexpr (Metric == VecSimMetric_L2) {
356+
output_metadata[sq8::SUM_SQUARES_QUERY] = sum_squares; // y_sum_squares for L2 only
346357
}
347-
return sum;
348358
}
349359

350360
public:
351361
QuantPreprocessor(std::shared_ptr<VecSimAllocator> allocator, size_t dim)
352362
: PreprocessorInterface(allocator), dim(dim),
353363
storage_bytes_count(dim * sizeof(OUTPUT_TYPE) +
354-
(vecsim_types::sq8::metadata_count<Metric>()) * sizeof(DataType)),
355-
query_bytes_count((dim + 1) * sizeof(DataType)) {
364+
(vecsim_types::sq8::storage_metadata_count<Metric>()) *
365+
sizeof(DataType)),
366+
query_bytes_count((dim + vecsim_types::sq8::query_metadata_count<Metric>()) *
367+
sizeof(DataType)) {
356368
static_assert(std::is_floating_point_v<DataType>,
357369
"QuantPreprocessor only supports floating-point types");
358370
}
@@ -422,12 +434,18 @@ class QuantPreprocessor : public PreprocessorInterface {
422434
/**
423435
* Preprocesses the query vector for asymmetric distance computation.
424436
*
425-
* The query blob contains the original float values followed by a precomputed value:
437+
* The query blob contains the original float values followed by precomputed values:
426438
* - For IP/Cosine: y_sum = Σy_i (sum of query values)
427-
* - For L2: y_sum_squares = Σy_i² (sum of squared query values)
439+
* - For L2: y_sum = Σy_i (sum of query values), y_sum_squares = Σy_i² (sum of squared query
440+
* values)
441+
*
442+
* Query blob layout:
443+
* - For IP/Cosine: | query_values[dim] | y_sum |
444+
* - For L2: | query_values[dim] | y_sum | y_sum_squares |
428445
*
429-
* Query blob layout: | query_values[dim] | y_sum OR y_sum_squares |
430-
* Query blob size: (dim + 1) * sizeof(DataType)
446+
* Query blob size:
447+
* - For IP/Cosine: (dim + 1) * sizeof(DataType)
448+
* - For L2: (dim + 2) * sizeof(DataType)
431449
*/
432450
void preprocessQuery(const void *original_blob, void *&blob, size_t &query_blob_size,
433451
unsigned char alignment) const override {
@@ -437,13 +455,10 @@ class QuantPreprocessor : public PreprocessorInterface {
437455
blob = this->allocator->allocate_aligned(this->query_bytes_count, alignment);
438456
memcpy(blob, original_blob, this->dim * sizeof(DataType));
439457
const DataType *input = static_cast<const DataType *>(original_blob);
440-
// For IP/Cosine, we need to store the sum of the query vector.
441-
if constexpr (Metric == VecSimMetric_IP || Metric == VecSimMetric_Cosine) {
442-
static_cast<DataType *>(blob)[this->dim] = sum_fast(input);
443-
} // For L2, compute the sum of squares.
444-
else if constexpr (Metric == VecSimMetric_L2) {
445-
static_cast<DataType *>(blob)[this->dim] = sum_squares_fast(input);
446-
}
458+
DataType *output = static_cast<DataType *>(blob);
459+
460+
// Compute and assign query metadata (sum for IP/Cosine, sum and sum_squares for L2)
461+
assign_query_metadata(input, output + this->dim);
447462

448463
query_blob_size = this->query_bytes_count;
449464
}

src/VecSim/types/sq8.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,22 @@ struct sq8 {
2626
SUM_SQUARES = 3 // Only for L2
2727
};
2828

29+
// TODO: re-order metadata and merge with the above enum
30+
enum QueryMetadataIndex : size_t {
31+
SUM_QUERY = 0,
32+
SUM_SQUARES_QUERY = 1 // Only for L2
33+
};
34+
2935
// Template on metric — compile-time constant when metric is known
3036
template <VecSimMetric Metric>
31-
static constexpr size_t metadata_count() {
37+
static constexpr size_t storage_metadata_count() {
3238
return (Metric == VecSimMetric_L2) ? 4 : 3;
3339
}
40+
41+
template <VecSimMetric Metric>
42+
static constexpr size_t query_metadata_count() {
43+
return (Metric == VecSimMetric_L2) ? 2 : 1;
44+
}
3445
};
3546

3647
} // namespace vecsim_types

tests/unit/test_components.cpp

Lines changed: 38 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -999,16 +999,21 @@ TEST(PreprocessorsTest, QuantizationTest) {
999999

10001000
// === Storage blob expected values ===
10011001
// For L2 metric: quantized values + min + delta + sum + sum_squares = dim bytes + 4 floats
1002-
constexpr size_t quantized_blob_bytes_count = dim * sizeof(uint8_t) + 4 * sizeof(float);
1002+
constexpr size_t quantized_blob_bytes_count =
1003+
dim * sizeof(uint8_t) + sq8::storage_metadata_count<VecSimMetric_L2>() * sizeof(float);
10031004
uint8_t expected_storage_blob[quantized_blob_bytes_count] = {0};
10041005
ComputeSQ8Quantization(original_blob, dim, expected_storage_blob);
10051006

10061007
// === Query blob expected values ===
10071008
// Query layout: | query_values[dim] | y_sum_squares (for L2) |
1008-
constexpr size_t query_blob_bytes_count = (dim + 1) * sizeof(float);
1009-
// Compute expected sum of squares for L2: 1² + 2² + 3² + 4² + 5² + 6² = 91
1009+
constexpr size_t query_blob_bytes_count =
1010+
(dim + sq8::query_metadata_count<VecSimMetric_L2>()) * sizeof(float);
1011+
1012+
// Compute expected sum and sum of squares for L2:
1013+
float expected_query_sum = 0;
10101014
float expected_query_sum_squares = 0;
10111015
for (size_t i = 0; i < dim; ++i) {
1016+
expected_query_sum += original_blob[i];
10121017
expected_query_sum_squares += original_blob[i] * original_blob[i];
10131018
}
10141019

@@ -1038,7 +1043,8 @@ TEST(PreprocessorsTest, QuantizationTest) {
10381043
// Verify query blob content
10391044
const float *query_floats = static_cast<const float *>(query_blob);
10401045
EXPECT_NO_FATAL_FAILURE(CompareVectors<float>(query_floats, original_blob, dim));
1041-
ASSERT_FLOAT_EQ(query_floats[dim], expected_query_sum_squares);
1046+
ASSERT_FLOAT_EQ(query_floats[dim + sq8::SUM_QUERY], expected_query_sum);
1047+
ASSERT_FLOAT_EQ(query_floats[dim + sq8::SUM_SQUARES_QUERY], expected_query_sum_squares);
10421048
}
10431049

10441050
// Test preprocessForStorage
@@ -1060,7 +1066,8 @@ TEST(PreprocessorsTest, QuantizationTest) {
10601066
// Verify query blob content: original floats followed by sum_squares
10611067
const float *query_floats = static_cast<const float *>(query_blob.get());
10621068
EXPECT_NO_FATAL_FAILURE(CompareVectors<float>(query_floats, original_blob, dim));
1063-
ASSERT_FLOAT_EQ(query_floats[dim], expected_query_sum_squares);
1069+
ASSERT_FLOAT_EQ(query_floats[dim + sq8::SUM_QUERY], expected_query_sum);
1070+
ASSERT_FLOAT_EQ(query_floats[dim + sq8::SUM_SQUARES_QUERY], expected_query_sum_squares);
10641071

10651072
// Check address is aligned
10661073
unsigned char address_alignment = (uintptr_t)(query_blob.get()) % alignment;
@@ -1152,38 +1159,33 @@ class QuantPreprocessorMetricTest : public testing::TestWithParam<VecSimMetric>
11521159
// Storage layout: | quantized_values[dim] | min | delta | sum | (sum_squares for L2) |
11531160
// L2: dim bytes + 4 floats (min, delta, sum, sum_squares)
11541161
// IP/Cosine: dim bytes + 3 floats (min, delta, sum)
1155-
static size_t getExpectedStorageSize(VecSimMetric metric) {
1156-
size_t extra_floats = (metric == VecSimMetric_L2) ? 4 : 3;
1162+
template <VecSimMetric Metric>
1163+
static size_t getExpectedStorageSize() {
1164+
constexpr size_t extra_floats = sq8::storage_metadata_count<Metric>();
11571165
return dim * sizeof(uint8_t) + extra_floats * sizeof(float);
11581166
}
11591167

11601168
// === Query blob helpers ===
11611169

11621170
// Query layout: | query_values[dim] | y_sum (IP/Cosine) OR y_sum_squares (L2) |
11631171
// All metrics: (dim + 1) floats
1164-
static constexpr size_t getExpectedQuerySize() { return (dim + 1) * sizeof(float); }
1165-
1166-
// Compute expected precomputed value for query blob based on metric
11671172
template <VecSimMetric Metric>
1168-
float getExpectedQueryPrecomputedValue() {
1169-
float sum = 0;
1170-
for (size_t i = 0; i < dim; ++i) {
1171-
if constexpr (Metric == VecSimMetric_L2) {
1172-
// sum of squares: 1² + 2² + 3² + 4² + 5² = 55
1173-
sum += original_blob[i] * original_blob[i];
1174-
} else {
1175-
// sum: 1 + 2 + 3 + 4 + 5 = 15
1176-
sum += original_blob[i];
1177-
}
1178-
}
1179-
return sum;
1173+
static constexpr size_t getExpectedQuerySize() {
1174+
return (dim + sq8::query_metadata_count<Metric>()) * sizeof(float);
11801175
}
11811176

11821177
// Helper to run quantization test for a specific metric
11831178
template <VecSimMetric Metric>
11841179
void runQuantizationTest() {
1185-
size_t expected_storage_size = getExpectedStorageSize(Metric);
1186-
size_t expected_query_size = getExpectedQuerySize();
1180+
size_t expected_storage_size = getExpectedStorageSize<Metric>();
1181+
size_t expected_query_size = getExpectedQuerySize<Metric>();
1182+
1183+
float expected_query_sum = 0;
1184+
float expected_query_sum_squares = 0;
1185+
for (size_t i = 0; i < dim; ++i) {
1186+
expected_query_sum += original_blob[i];
1187+
expected_query_sum_squares += original_blob[i] * original_blob[i];
1188+
}
11871189

11881190
auto quant_preprocessor = new (allocator) QuantPreprocessor<float, Metric>(allocator, dim);
11891191

@@ -1221,9 +1223,12 @@ class QuantPreprocessorMetricTest : public testing::TestWithParam<VecSimMetric>
12211223
const float *query_floats = static_cast<const float *>(query_blob);
12221224
EXPECT_NO_FATAL_FAILURE(CompareVectors<float>(query_floats, original_blob, dim));
12231225

1224-
// Verify precomputed value (sum for IP/Cosine, sum_squares for L2)
1225-
float expected_precomputed = getExpectedQueryPrecomputedValue<Metric>();
1226-
ASSERT_FLOAT_EQ(query_floats[dim], expected_precomputed);
1226+
// Verify precomputed value (sum for IP/Cosine, sum and sum_squares for L2)
1227+
ASSERT_FLOAT_EQ(query_floats[dim + sq8::SUM_QUERY], expected_query_sum);
1228+
if constexpr (Metric == VecSimMetric_L2) {
1229+
ASSERT_FLOAT_EQ(query_floats[dim + sq8::SUM_SQUARES_QUERY],
1230+
expected_query_sum_squares);
1231+
}
12271232

12281233
allocator->free_allocation(storage_blob);
12291234
allocator->free_allocation(query_blob);
@@ -1254,10 +1259,12 @@ class QuantPreprocessorMetricTest : public testing::TestWithParam<VecSimMetric>
12541259
const float *query_floats = static_cast<const float *>(blob);
12551260
EXPECT_NO_FATAL_FAILURE(CompareVectors<float>(query_floats, original_blob, dim));
12561261

1257-
// Verify precomputed value (sum for IP/Cosine, sum_squares for L2)
1258-
float expected_precomputed = getExpectedQueryPrecomputedValue<Metric>();
1259-
ASSERT_FLOAT_EQ(query_floats[dim], expected_precomputed);
1260-
1262+
// Verify precomputed value (sum for IP/Cosine, sum and sum_squares for L2)
1263+
ASSERT_FLOAT_EQ(query_floats[dim + sq8::SUM_QUERY], expected_query_sum);
1264+
if constexpr (Metric == VecSimMetric_L2) {
1265+
ASSERT_FLOAT_EQ(query_floats[dim + sq8::SUM_SQUARES_QUERY],
1266+
expected_query_sum_squares);
1267+
}
12611268
allocator->free_allocation(blob);
12621269
}
12631270

0 commit comments

Comments
 (0)