@@ -140,42 +140,45 @@ class CosinePreprocessor : public PreprocessorInterface {
140140};
141141
142142/*
143- * QuantPreprocessor is a preprocessor that quantizes the input vector of INPUT_TYPE (float) to a
144- * lower precision representation using OUTPUT_TYPE (uint8_t). It stores the quantized values along
145- * with metadata (min value and scaling factor) in a single contiguous blob. The quantized values
146- * are then stored in an OUTPUT_TYPE array. The quantization is done by finding the minimum and
143+ * QuantPreprocessor is a preprocessor that quantizes storage vectors from DataType to a
144+ * lower precision representation using OUTPUT_TYPE (uint8_t).
145+ * Query vectors remain as DataType for asymmetric distance computation.
146+ *
147+ * The quantized storage blob contains the quantized values along with metadata (min value and
148+ * scaling factor) in a single contiguous blob. The quantization is done by finding the minimum and
147149 * maximum values of the input vector, and then scaling the values to fit in the range of [0, 255].
148- * The quantized blob size is: dim_elements * sizeof(OUTPUT_TYPE) + 2 * sizeof(float)
150+ *
151+ * The quantized blob size is: dim_elements * sizeof(OUTPUT_TYPE) + 2 * sizeof(DataType)
149152 */
153+ template <typename DataType>
150154class QuantPreprocessor : public PreprocessorInterface {
151- using INPUT_TYPE = float ;
152155 using OUTPUT_TYPE = uint8_t ;
153156
154157public:
155158 // Constructor for backward compatibility (single blob size)
156159 QuantPreprocessor (std::shared_ptr<VecSimAllocator> allocator, size_t dim)
157160 : PreprocessorInterface(allocator), dim(dim),
158- storage_bytes_count (dim * sizeof (OUTPUT_TYPE) + 2 * sizeof(float )) {
161+ storage_bytes_count (dim * sizeof (OUTPUT_TYPE) + 2 * sizeof(DataType )) {
159162 } // quantized + min + delta
160163
161164 // Helper function to perform quantization. This function is used by both preprocess and
162165 // preprocessQuery and supports in-place quantization of the storage blob.
163- void quantize (const INPUT_TYPE *input, OUTPUT_TYPE *quantized) const {
166+ void quantize (const DataType *input, OUTPUT_TYPE *quantized) const {
164167 assert (input && quantized);
165168 // Find min and max values
166169 auto [min_val, max_val] = find_min_max (input);
167170
168171 // Calculate scaling factor
169- const float diff = (max_val - min_val);
170- const float delta = diff == 0 . 0f ? 1 . 0f : diff / 255 . 0f ;
171- const float inv_delta = 1 . 0f / delta;
172+ const DataType diff = (max_val - min_val);
173+ const DataType delta = ( diff == DataType{ 0 }) ? DataType{ 1 } : diff / DataType{ 255 } ;
174+ const DataType inv_delta = DataType{ 1 } / delta;
172175
173176 // Quantize the values
174177 for (size_t i = 0 ; i < this ->dim ; i++) {
175178 quantized[i] = static_cast <OUTPUT_TYPE>(std::round ((input[i] - min_val) * inv_delta));
176179 }
177180
178- float *metadata = reinterpret_cast <float *>(quantized + this ->dim );
181+ DataType *metadata = reinterpret_cast <DataType *>(quantized + this ->dim );
179182
180183 // Store min_val, delta, in the metadata
181184 metadata[0 ] = min_val;
@@ -189,6 +192,14 @@ class QuantPreprocessor : public PreprocessorInterface {
189192 alignment);
190193 }
191194
195+ /* *
196+ * Quantizes the storage blob (DataType → OUTPUT_TYPE) while leaving the query blob unchanged.
197+ *
198+ * Storage vectors are quantized, while query vectors remain as DataType for asymmetric distance
199+ * computation.
200+ *
201+ * Note: query_blob and query_blob_size are not modified, nor allocated by this function.
202+ */
192203 void preprocess (const void *original_blob, void *&storage_blob, void *&query_blob,
193204 size_t &storage_blob_size, size_t &query_blob_size,
194205 unsigned char alignment) const override {
@@ -199,20 +210,20 @@ class QuantPreprocessor : public PreprocessorInterface {
199210 this ->allocator ->allocate_aligned (this ->storage_bytes_count , alignment));
200211
201212 // Quantize directly from original data
202- const INPUT_TYPE *input = static_cast <const INPUT_TYPE *>(original_blob);
213+ const DataType *input = static_cast <const DataType *>(original_blob);
203214 quantize (input, static_cast <OUTPUT_TYPE *>(storage_blob));
204215 }
205216 // CASE 2: STORAGE BLOB EXISTS
206217 else {
207218 // CASE 2A: STORAGE AND QUERY SHARE MEMORY
208219 if (storage_blob == query_blob) {
209- // Need to allocate a separate storage blob since query remains float32
220+ // Need to allocate a separate storage blob since query remains DataType
210221 // while storage needs to be quantized
211222 void *new_storage =
212223 this ->allocator ->allocate_aligned (this ->storage_bytes_count , alignment);
213224
214225 // Quantize from the shared blob (query_blob) to the new storage blob
215- quantize (static_cast <const INPUT_TYPE *>(query_blob),
226+ quantize (static_cast <const DataType *>(query_blob),
216227 static_cast <OUTPUT_TYPE *>(new_storage));
217228
218229 // Update storage_blob to point to the new memory
@@ -227,15 +238,15 @@ class QuantPreprocessor : public PreprocessorInterface {
227238 this ->allocator ->allocate_aligned (this ->storage_bytes_count , alignment));
228239
229240 // Quantize from old storage to new storage
230- quantize (static_cast <const INPUT_TYPE *>(storage_blob),
241+ quantize (static_cast <const DataType *>(storage_blob),
231242 static_cast <OUTPUT_TYPE *>(new_storage));
232243
233244 // Free old storage and update pointer
234245 this ->allocator ->free_allocation (storage_blob);
235246 storage_blob = new_storage;
236247 } else {
237248 // Storage blob is large enough, quantize in-place
238- quantize (static_cast <const INPUT_TYPE *>(storage_blob),
249+ quantize (static_cast <const DataType *>(storage_blob),
239250 static_cast <OUTPUT_TYPE *>(storage_blob));
240251 }
241252 }
@@ -252,7 +263,7 @@ class QuantPreprocessor : public PreprocessorInterface {
252263 }
253264
254265 // Cast to appropriate types
255- const INPUT_TYPE *input = static_cast <const INPUT_TYPE *>(original_blob);
266+ const DataType *input = static_cast <const DataType *>(original_blob);
256267 OUTPUT_TYPE *quantized = static_cast <OUTPUT_TYPE *>(blob);
257268 quantize (input, quantized);
258269
@@ -261,44 +272,22 @@ class QuantPreprocessor : public PreprocessorInterface {
261272
262273 void preprocessQuery (const void *original_blob, void *&blob, size_t &query_blob_size,
263274 unsigned char alignment) const override {
264- // No-op: queries remain as float32
265- }
266-
267- void preprocessQueryInPlace (void *blob, size_t input_blob_size,
268- unsigned char alignment) const override {
269- // No-op: queries remain as float32
275+ // No-op: queries remain as original DataType
270276 }
271277
272278 void preprocessStorageInPlace (void *original_blob, size_t input_blob_size) const override {
273279 assert (original_blob);
274280 assert (input_blob_size >= storage_bytes_count &&
275281 " Input buffer too small for in-place quantization" );
276282
277- quantize (static_cast <const INPUT_TYPE *>(original_blob),
283+ quantize (static_cast <const DataType *>(original_blob),
278284 static_cast <OUTPUT_TYPE *>(original_blob));
279285 }
280286
281287private:
282- std::pair<float , float > find_min_max (const INPUT_TYPE *input) const {
283- float min_val = input[0 ];
284- float max_val = input[0 ];
285-
286- size_t i = 1 ;
287- // Process 4 elements at a time for better performance
288- for (; i + 3 < dim; i += 4 ) {
289- const float v0 = input[i];
290- const float v1 = input[i + 1 ];
291- const float v2 = input[i + 2 ];
292- const float v3 = input[i + 3 ];
293- min_val = std::min ({min_val, v0, v1, v2, v3});
294- max_val = std::max ({max_val, v0, v1, v2, v3});
295- }
296- // Handle remaining elements
297- for (; i < dim; i++) {
298- min_val = std::min (min_val, input[i]);
299- max_val = std::max (max_val, input[i]);
300- }
301- return {min_val, max_val};
288+ std::pair<DataType, DataType> find_min_max (const DataType *input) const {
289+ auto [min_it, max_it] = std::minmax_element (input, input + dim);
290+ return {*min_it, *max_it};
302291 }
303292
304293 const size_t dim;
0 commit comments