Skip to content

Commit 0004b3a

Browse files
committed
mkae pp templated
optinize in max functon
1 parent 9ad3793 commit 0004b3a

File tree

2 files changed

+39
-50
lines changed

2 files changed

+39
-50
lines changed

src/VecSim/spaces/computer/preprocessors.h

Lines changed: 34 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -140,42 +140,45 @@ class CosinePreprocessor : public PreprocessorInterface {
140140
};
141141

142142
/*
143-
* QuantPreprocessor is a preprocessor that quantizes the input vector of INPUT_TYPE (float) to a
144-
* lower precision representation using OUTPUT_TYPE (uint8_t). It stores the quantized values along
145-
* with metadata (min value and scaling factor) in a single contiguous blob. The quantized values
146-
* are then stored in an OUTPUT_TYPE array. The quantization is done by finding the minimum and
143+
* QuantPreprocessor is a preprocessor that quantizes storage vectors from DataType to a
144+
* lower precision representation using OUTPUT_TYPE (uint8_t).
145+
* Query vectors remain as DataType for asymmetric distance computation.
146+
*
147+
* The quantized storage blob contains the quantized values along with metadata (min value and
148+
* scaling factor) in a single contiguous blob. The quantization is done by finding the minimum and
147149
* maximum values of the input vector, and then scaling the values to fit in the range of [0, 255].
148-
* The quantized blob size is: dim_elements * sizeof(OUTPUT_TYPE) + 2 * sizeof(float)
150+
*
151+
* The quantized blob size is: dim_elements * sizeof(OUTPUT_TYPE) + 2 * sizeof(DataType)
149152
*/
153+
template <typename DataType>
150154
class QuantPreprocessor : public PreprocessorInterface {
151-
using INPUT_TYPE = float;
152155
using OUTPUT_TYPE = uint8_t;
153156

154157
public:
155158
// Constructor for backward compatibility (single blob size)
156159
QuantPreprocessor(std::shared_ptr<VecSimAllocator> allocator, size_t dim)
157160
: PreprocessorInterface(allocator), dim(dim),
158-
storage_bytes_count(dim * sizeof(OUTPUT_TYPE) + 2 * sizeof(float)) {
161+
storage_bytes_count(dim * sizeof(OUTPUT_TYPE) + 2 * sizeof(DataType)) {
159162
} // quantized + min + delta
160163

161164
// Helper function to perform quantization. This function is used by both preprocess and
162165
// preprocessQuery and supports in-place quantization of the storage blob.
163-
void quantize(const INPUT_TYPE *input, OUTPUT_TYPE *quantized) const {
166+
void quantize(const DataType *input, OUTPUT_TYPE *quantized) const {
164167
assert(input && quantized);
165168
// Find min and max values
166169
auto [min_val, max_val] = find_min_max(input);
167170

168171
// Calculate scaling factor
169-
const float diff = (max_val - min_val);
170-
const float delta = diff == 0.0f ? 1.0f : diff / 255.0f;
171-
const float inv_delta = 1.0f / delta;
172+
const DataType diff = (max_val - min_val);
173+
const DataType delta = (diff == DataType{0}) ? DataType{1} : diff / DataType{255};
174+
const DataType inv_delta = DataType{1} / delta;
172175

173176
// Quantize the values
174177
for (size_t i = 0; i < this->dim; i++) {
175178
quantized[i] = static_cast<OUTPUT_TYPE>(std::round((input[i] - min_val) * inv_delta));
176179
}
177180

178-
float *metadata = reinterpret_cast<float *>(quantized + this->dim);
181+
DataType *metadata = reinterpret_cast<DataType *>(quantized + this->dim);
179182

180183
// Store min_val, delta, in the metadata
181184
metadata[0] = min_val;
@@ -189,6 +192,14 @@ class QuantPreprocessor : public PreprocessorInterface {
189192
alignment);
190193
}
191194

195+
/**
196+
* Quantizes the storage blob (DataType → OUTPUT_TYPE) while leaving the query blob unchanged.
197+
*
198+
* Storage vectors are quantized, while query vectors remain as DataType for asymmetric distance
199+
* computation.
200+
*
201+
* Note: query_blob and query_blob_size are not modified, nor allocated by this function.
202+
*/
192203
void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
193204
size_t &storage_blob_size, size_t &query_blob_size,
194205
unsigned char alignment) const override {
@@ -199,20 +210,20 @@ class QuantPreprocessor : public PreprocessorInterface {
199210
this->allocator->allocate_aligned(this->storage_bytes_count, alignment));
200211

201212
// Quantize directly from original data
202-
const INPUT_TYPE *input = static_cast<const INPUT_TYPE *>(original_blob);
213+
const DataType *input = static_cast<const DataType *>(original_blob);
203214
quantize(input, static_cast<OUTPUT_TYPE *>(storage_blob));
204215
}
205216
// CASE 2: STORAGE BLOB EXISTS
206217
else {
207218
// CASE 2A: STORAGE AND QUERY SHARE MEMORY
208219
if (storage_blob == query_blob) {
209-
// Need to allocate a separate storage blob since query remains float32
220+
// Need to allocate a separate storage blob since query remains DataType
210221
// while storage needs to be quantized
211222
void *new_storage =
212223
this->allocator->allocate_aligned(this->storage_bytes_count, alignment);
213224

214225
// Quantize from the shared blob (query_blob) to the new storage blob
215-
quantize(static_cast<const INPUT_TYPE *>(query_blob),
226+
quantize(static_cast<const DataType *>(query_blob),
216227
static_cast<OUTPUT_TYPE *>(new_storage));
217228

218229
// Update storage_blob to point to the new memory
@@ -227,15 +238,15 @@ class QuantPreprocessor : public PreprocessorInterface {
227238
this->allocator->allocate_aligned(this->storage_bytes_count, alignment));
228239

229240
// Quantize from old storage to new storage
230-
quantize(static_cast<const INPUT_TYPE *>(storage_blob),
241+
quantize(static_cast<const DataType *>(storage_blob),
231242
static_cast<OUTPUT_TYPE *>(new_storage));
232243

233244
// Free old storage and update pointer
234245
this->allocator->free_allocation(storage_blob);
235246
storage_blob = new_storage;
236247
} else {
237248
// Storage blob is large enough, quantize in-place
238-
quantize(static_cast<const INPUT_TYPE *>(storage_blob),
249+
quantize(static_cast<const DataType *>(storage_blob),
239250
static_cast<OUTPUT_TYPE *>(storage_blob));
240251
}
241252
}
@@ -252,7 +263,7 @@ class QuantPreprocessor : public PreprocessorInterface {
252263
}
253264

254265
// Cast to appropriate types
255-
const INPUT_TYPE *input = static_cast<const INPUT_TYPE *>(original_blob);
266+
const DataType *input = static_cast<const DataType *>(original_blob);
256267
OUTPUT_TYPE *quantized = static_cast<OUTPUT_TYPE *>(blob);
257268
quantize(input, quantized);
258269

@@ -261,44 +272,22 @@ class QuantPreprocessor : public PreprocessorInterface {
261272

262273
void preprocessQuery(const void *original_blob, void *&blob, size_t &query_blob_size,
263274
unsigned char alignment) const override {
264-
// No-op: queries remain as float32
265-
}
266-
267-
void preprocessQueryInPlace(void *blob, size_t input_blob_size,
268-
unsigned char alignment) const override {
269-
// No-op: queries remain as float32
275+
// No-op: queries remain as original DataType
270276
}
271277

272278
void preprocessStorageInPlace(void *original_blob, size_t input_blob_size) const override {
273279
assert(original_blob);
274280
assert(input_blob_size >= storage_bytes_count &&
275281
"Input buffer too small for in-place quantization");
276282

277-
quantize(static_cast<const INPUT_TYPE *>(original_blob),
283+
quantize(static_cast<const DataType *>(original_blob),
278284
static_cast<OUTPUT_TYPE *>(original_blob));
279285
}
280286

281287
private:
282-
std::pair<float, float> find_min_max(const INPUT_TYPE *input) const {
283-
float min_val = input[0];
284-
float max_val = input[0];
285-
286-
size_t i = 1;
287-
// Process 4 elements at a time for better performance
288-
for (; i + 3 < dim; i += 4) {
289-
const float v0 = input[i];
290-
const float v1 = input[i + 1];
291-
const float v2 = input[i + 2];
292-
const float v3 = input[i + 3];
293-
min_val = std::min({min_val, v0, v1, v2, v3});
294-
max_val = std::max({max_val, v0, v1, v2, v3});
295-
}
296-
// Handle remaining elements
297-
for (; i < dim; i++) {
298-
min_val = std::min(min_val, input[i]);
299-
max_val = std::max(max_val, input[i]);
300-
}
301-
return {min_val, max_val};
288+
std::pair<DataType, DataType> find_min_max(const DataType *input) const {
289+
auto [min_it, max_it] = std::minmax_element(input, input + dim);
290+
return {*min_it, *max_it};
302291
}
303292

304293
const size_t dim;

tests/unit/test_components.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -998,7 +998,7 @@ TEST(PreprocessorsTest, QuantizationTest) {
998998
float *original_blob = static_cast<float *>(original_blob_alloc.get());
999999
test_utils::populate_float_vec(original_blob, elements);
10001000

1001-
auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, elements);
1001+
auto quant_preprocessor = new (allocator) QuantPreprocessor<float>(allocator, elements);
10021002
auto multiPPContainer =
10031003
MultiPreprocessorsContainer<float, n_preprocessors>(allocator, alignment);
10041004
multiPPContainer.addPreprocessor(quant_preprocessor);
@@ -1056,7 +1056,7 @@ TEST(PreprocessorsTest, QuantizationTestWithCosine) {
10561056
float *original_blob = static_cast<float *>(original_blob_alloc.get());
10571057
test_utils::populate_float_vec(original_blob, elements);
10581058

1059-
auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, elements);
1059+
auto quant_preprocessor = new (allocator) QuantPreprocessor<float>(allocator, elements);
10601060
auto cosine_preprocessor =
10611061
new (allocator) CosinePreprocessor<float>(allocator, elements, original_blob_size);
10621062
auto multiPPContainer =
@@ -1113,7 +1113,7 @@ TEST(PreprocessorsTest, ReallocateVectorQuantizationTest) {
11131113
float *original_blob = static_cast<float *>(original_blob_alloc.get());
11141114
test_utils::populate_float_vec(original_blob, elements);
11151115

1116-
auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, elements);
1116+
auto quant_preprocessor = new (allocator) QuantPreprocessor<float>(allocator, elements);
11171117
auto dummy_preprocessor =
11181118
new (allocator) dummyPreprocessors::DummyStoragePreprocessor<float>(allocator, 0.0f);
11191119
auto multiPPContainer =
@@ -1157,7 +1157,7 @@ TEST(PreprocessorsTest, ReallocateVectorCosineQuantizationTest) {
11571157
original_blob[i] = static_cast<float>(i + 2.5f);
11581158
}
11591159

1160-
auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, elements);
1160+
auto quant_preprocessor = new (allocator) QuantPreprocessor<float>(allocator, elements);
11611161
auto cosine_preprocessor =
11621162
new (allocator) CosinePreprocessor<float>(allocator, elements, original_blob_size);
11631163
auto multiPPContainer =
@@ -1198,7 +1198,7 @@ TEST(PreprocessorsTest, QuantizationInPlaceTest) {
11981198
// Create a float array with known values
11991199
float original_data[dim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f};
12001200

1201-
auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, dim);
1201+
auto quant_preprocessor = new (allocator) QuantPreprocessor<float>(allocator, dim);
12021202
auto dummy_preprocessor =
12031203
new (allocator) dummyPreprocessors::DummyStoragePreprocessor<float>(allocator, 0.0f);
12041204
auto multiPPContainer =

0 commit comments

Comments
 (0)