diff --git a/src/VecSim/spaces/computer/preprocessors.h b/src/VecSim/spaces/computer/preprocessors.h
index 15e5a4716..9dd3d6e1a 100644
--- a/src/VecSim/spaces/computer/preprocessors.h
+++ b/src/VecSim/spaces/computer/preprocessors.h
@@ -164,22 +164,33 @@ class CosinePreprocessor : public PreprocessorInterface {
  * Given quantized value q_i, the original value is reconstructed as:
  *   x_i ≈ min + delta * q_i
  *
+ * Query processing:
+ * The query vector is not quantized. It remains as DataType, but we precompute
+ * and store metric-specific values to accelerate asymmetric distance computation:
+ * - For IP/Cosine: y_sum = Σy_i (sum of query values)
+ * - For L2: y_sum_squares = Σy_i² (sum of squared query values)
+ *
+ * Query blob layout:
+ * | query_values[dim] | y_sum (IP/Cosine) OR y_sum_squares (L2) |
+ *
+ * Query blob size: (dim + 1) * sizeof(DataType)
+ *
  * === Asymmetric distance (storage x quantized, query y remains float) ===
  *
  * For IP/Cosine:
  *   IP(x, y) = Σ(x_i * y_i)
  *            ≈ Σ((min + delta * q_i) * y_i)
  *            = min * Σy_i + delta * Σ(q_i * y_i)
- *            = min * sum_query + delta * quantized_dot_product
- *   where sum_query = Σy_i is computed at query time.
+ *            = min * y_sum + delta * quantized_dot_product
+ *   where y_sum = Σy_i is precomputed and stored in the query blob.
  *
  * For L2:
  *   ||x - y||² = Σx_i² - 2*Σ(x_i * y_i) + Σy_i²
- *              = sum_squares - 2 * IP(x, y) + sum_sq_query
+ *              = x_sum_squares - 2 * IP(x, y) + y_sum_squares
  *   where:
- *     - sum_squares = Σx_i² is precomputed and stored
+ *     - x_sum_squares = Σx_i² is precomputed and stored in the storage blob
  *     - IP(x, y) is computed using the formula above
- *     - sum_sq_query = Σy_i² is computed at query time
+ *     - y_sum_squares = Σy_i² is precomputed and stored in the query blob
  *
  * === Symmetric distance (both x and y are quantized) ===
  *
@@ -208,7 +219,7 @@ class QuantPreprocessor : public PreprocessorInterface {
 
     // For L2:   store sum + sum_of_squares (2 extra values)
     // For IP/Cosine: store only sum (1 extra value)
-    static constexpr size_t extra_values_count = (Metric == VecSimMetric_L2) ? 2 : 1;
+    static constexpr size_t extra_storage_values_count = (Metric == VecSimMetric_L2) ? 2 : 1;
     static_assert(Metric == VecSimMetric_L2 || Metric == VecSimMetric_IP ||
                       Metric == VecSimMetric_Cosine,
                   "QuantPreprocessor only supports L2, IP and Cosine metrics");
@@ -227,21 +238,56 @@ class QuantPreprocessor : public PreprocessorInterface {
         const DataType inv_delta = DataType{1} / delta;
 
         // Compute sum (and sum of squares for L2) while quantizing
-        DataType sum = DataType{0};
-        DataType sum_squares = DataType{0};
+        // 4 independent accumulators (sum)
+        DataType s0{}, s1{}, s2{}, s3{};
+
+        // 4 independent accumulators (sum of squares), only used for L2
+        DataType q0{}, q1{}, q2{}, q3{};
+
+        size_t i = 0;
+        // round dim down to the nearest multiple of 4
+        size_t dim_round_down = this->dim & ~size_t(3);
 
         // Quantize the values
-        for (size_t i = 0; i < this->dim; i++) {
+        for (; i < dim_round_down; i += 4) {
+            // Load once
+            const DataType x0 = input[i + 0];
+            const DataType x1 = input[i + 1];
+            const DataType x2 = input[i + 2];
+            const DataType x3 = input[i + 3];
             // We know (input - min) => 0
             // If min == max, all values are the same and should be quantized to 0.
             // reconstruction will yield the same original value for all vectors.
-            quantized[i] = static_cast<OUTPUT_TYPE>(std::round((input[i] - min_val) * inv_delta));
+            quantized[i + 0] = static_cast<OUTPUT_TYPE>(std::round((x0 - min_val) * inv_delta));
+            quantized[i + 1] = static_cast<OUTPUT_TYPE>(std::round((x1 - min_val) * inv_delta));
+            quantized[i + 2] = static_cast<OUTPUT_TYPE>(std::round((x2 - min_val) * inv_delta));
+            quantized[i + 3] = static_cast<OUTPUT_TYPE>(std::round((x3 - min_val) * inv_delta));
 
             // Accumulate sum for all metrics
-            sum += input[i];
+            s0 += x0;
+            s1 += x1;
+            s2 += x2;
+            s3 += x3;
+
             // Accumulate sum of squares only for L2 metric
             if constexpr (Metric == VecSimMetric_L2) {
-                sum_squares += input[i] * input[i];
+                q0 += x0 * x0;
+                q1 += x1 * x1;
+                q2 += x2 * x2;
+                q3 += x3 * x3;
+            }
+        }
+
+        // Tail: 0..3 remaining elements (still the same pass, just finishing work)
+        DataType sum = (s0 + s1) + (s2 + s3);
+        DataType sum_squares = (q0 + q1) + (q2 + q3);
+
+        for (; i < this->dim; ++i) {
+            const DataType x = input[i];
+            quantized[i] = static_cast<OUTPUT_TYPE>(std::round((x - min_val) * inv_delta));
+            sum += x;
+            if constexpr (Metric == VecSimMetric_L2) {
+                sum_squares += x * x;
             }
         }
 
@@ -258,11 +304,56 @@ class QuantPreprocessor : public PreprocessorInterface {
         }
     }
 
+    DataType sum_fast(const DataType *p) const {
+        DataType s0{}, s1{}, s2{}, s3{};
+
+        size_t i = 0;
+        // round dim down to the nearest multiple of 4
+        size_t dim_round_down = this->dim & ~size_t(3);
+
+        for (; i < dim_round_down; i += 4) {
+            s0 += p[i + 0];
+            s1 += p[i + 1];
+            s2 += p[i + 2];
+            s3 += p[i + 3];
+        }
+
+        DataType sum = (s0 + s1) + (s2 + s3);
+
+        for (; i < dim; ++i) {
+            sum += p[i];
+        }
+        return sum;
+    }
+
+    DataType sum_squares_fast(const DataType *p) const {
+        DataType s0{}, s1{}, s2{}, s3{};
+
+        size_t i = 0;
+        // round dim down to the nearest multiple of 4
+        size_t dim_round_down = this->dim & ~size_t(3);
+
+        for (; i < dim_round_down; i += 4) {
+            s0 += p[i + 0] * p[i + 0];
+            s1 += p[i + 1] * p[i + 1];
+            s2 += p[i + 2] * p[i + 2];
+            s3 += p[i + 3] * p[i + 3];
+        }
+
+        DataType sum = (s0 + s1) + (s2 + s3);
+
+        for (; i < dim; ++i) {
+            sum += p[i] * p[i];
+        }
+        return sum;
+    }
+
 public:
     QuantPreprocessor(std::shared_ptr<VecSimAllocator> allocator, size_t dim)
         : PreprocessorInterface(allocator), dim(dim),
           storage_bytes_count(dim * sizeof(OUTPUT_TYPE) +
-                              (2 + extra_values_count) * sizeof(DataType)) {
+                              (2 + extra_storage_values_count) * sizeof(DataType)),
+          query_bytes_count((dim + 1) * sizeof(DataType)) {
         static_assert(std::is_floating_point_v<DataType>,
                       "QuantPreprocessor only supports floating-point types");
     }
@@ -274,20 +365,21 @@ class QuantPreprocessor : public PreprocessorInterface {
     }
 
     /**
-     * Quantizes the storage blob (DataType → OUTPUT_TYPE) while leaving the query blob unchanged.
+     * Preprocesses the original blob into separate storage and query blobs.
      *
      * Storage vectors are quantized to uint8_t values, with metadata (min, delta, sum, and
-     * sum_squares for L2) appended for distance reconstruction. Query vectors remain as DataType
-     * for asymmetric distance computation.
+     * sum_squares for L2) appended for distance reconstruction.
      *
-     * Note: query_blob and query_blob_size are not modified, nor allocated by this function.
+     * Query vectors remain as DataType for asymmetric distance computation, with a precomputed
+     * sum (for IP/Cosine) or sum of squares (for L2) appended for efficient distance calculation.
      *
      * Possible scenarios (currently only CASE 1 is implemented):
-     * - CASE 1: STORAGE BLOB NEEDS ALLOCATION (storage_blob == nullptr)
+     * - CASE 1: STORAGE BLOB AND QUERY BLOB NEED ALLOCATION (storage_blob == query_blob == nullptr)
      * - CASE 2: STORAGE BLOB EXISTS (storage_blob != nullptr)
      *   - CASE 2A: STORAGE BLOB EXISTS and its size is insufficient
      * (storage_blob_size < required_size) - reallocate storage
-     *   - CASE 2B: STORAGE AND QUERY SHARE MEMORY (storage_blob == query_blob) - reallocate storage
+     *   - CASE 2B: STORAGE AND QUERY SHARE MEMORY (storage_blob == query_blob != nullptr) -
+     * reallocate storage
      *   - CASE 2C: SEPARATE STORAGE AND QUERY BLOBS (storage_blob != query_blob) - quantize storage
      * in-place
      */
@@ -296,6 +388,10 @@ class QuantPreprocessor : public PreprocessorInterface {
                     unsigned char alignment) const override {
         // CASE 1: STORAGE BLOB NEEDS ALLOCATION - the only implemented case
         assert(!storage_blob && "CASE 1: storage_blob must be nullptr");
+        assert(!query_blob && "CASE 1: query_blob must be nullptr");
+
+        // storage_blob_size and query_blob_size must point to different memory slots.
+        assert(&storage_blob_size != &query_blob_size);
 
         // CASE 2A: STORAGE BLOB EXISTS and its size is insufficient - not implemented
         // storage_blob && storage_blob_size < required_size
@@ -307,15 +403,8 @@ class QuantPreprocessor : public PreprocessorInterface {
         // We can quantize the storage blob in-place (if we already checked storage_blob_size is
         // sufficient)
 
-        // Allocate aligned memory for the quantized storage blob
-        storage_blob = static_cast<OUTPUT_TYPE *>(
-            this->allocator->allocate_aligned(this->storage_bytes_count, alignment));
-
-        // Quantize directly from original data
-        const DataType *input = static_cast<const DataType *>(original_blob);
-        quantize(input, static_cast<OUTPUT_TYPE *>(storage_blob));
-
-        storage_blob_size = this->storage_bytes_count;
+        preprocessForStorage(original_blob, storage_blob, storage_blob_size);
+        preprocessQuery(original_blob, query_blob, query_blob_size, alignment);
     }
 
     void preprocessForStorage(const void *original_blob, void *&blob,
@@ -331,9 +420,33 @@ class QuantPreprocessor : public PreprocessorInterface {
         input_blob_size = storage_bytes_count;
     }
 
+    /**
+     * Preprocesses the query vector for asymmetric distance computation.
+     *
+     * The query blob contains the original float values followed by a precomputed value:
+     * - For IP/Cosine: y_sum = Σy_i (sum of query values)
+     * - For L2: y_sum_squares = Σy_i² (sum of squared query values)
+     *
+     * Query blob layout: | query_values[dim] | y_sum OR y_sum_squares |
+     * Query blob size: (dim + 1) * sizeof(DataType)
+     */
     void preprocessQuery(const void *original_blob, void *&blob, size_t &query_blob_size,
                          unsigned char alignment) const override {
-        // No-op: queries remain as original DataType
+        assert(!blob && "query_blob must be nullptr");
+
+        // Allocate aligned memory for the query blob
+        blob = this->allocator->allocate_aligned(this->query_bytes_count, alignment);
+        memcpy(blob, original_blob, this->dim * sizeof(DataType));
+        const DataType *input = static_cast<const DataType *>(original_blob);
+        // For IP/Cosine, we need to store the sum of the query vector.
+        if constexpr (Metric == VecSimMetric_IP || Metric == VecSimMetric_Cosine) {
+            static_cast<DataType *>(blob)[this->dim] = sum_fast(input);
+        } // For L2, compute the sum of squares.
+        else if constexpr (Metric == VecSimMetric_L2) {
+            static_cast<DataType *>(blob)[this->dim] = sum_squares_fast(input);
+        }
+
+        query_blob_size = this->query_bytes_count;
     }
 
     void preprocessStorageInPlace(void *original_blob, size_t input_blob_size) const override {
@@ -353,4 +466,5 @@ class QuantPreprocessor : public PreprocessorInterface {
 
     const size_t dim;
     const size_t storage_bytes_count;
+    const size_t query_bytes_count;
 };
diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp
index 5f134386b..b7ebfa701 100644
--- a/tests/unit/test_components.cpp
+++ b/tests/unit/test_components.cpp
@@ -985,11 +985,10 @@ TEST(PreprocessorsTest, Int8NormalizeThenIncreaseSize) {
                                                        final_blob_bytes_count));
     }
 }
-// TODO: test edge case where all entries equal.
 
 // Tests the quantization preprocessor with a single preprocessor in the chain.
-// The QuantPreprocessor allocates the storage blob and processes it, while the query blob
-// is unprocessed and allocated by the preprocessors container.
+// The QuantPreprocessor allocates and processes both the storage blob (quantized) and the query
+// blob (original values + precomputed sum_squares for L2).
 TEST(PreprocessorsTest, QuantizationTest) {
     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
     constexpr size_t n_preprocessors = 1;
@@ -998,10 +997,20 @@ TEST(PreprocessorsTest, QuantizationTest) {
     constexpr size_t original_blob_size = dim * sizeof(float);
     float original_blob[dim] = {1, 2, 3, 4, 5, 6};
 
-    // For L2 metric: quantized values + min + delta + sum + sum_squares = dim + 4 floats
+    // === Storage blob expected values ===
+    // For L2 metric: quantized values + min + delta + sum + sum_squares = dim bytes + 4 floats
     constexpr size_t quantized_blob_bytes_count = dim * sizeof(uint8_t) + 4 * sizeof(float);
-    uint8_t expected_processed_blob[quantized_blob_bytes_count] = {0};
-    ComputeSQ8Quantization(original_blob, dim, expected_processed_blob);
+    uint8_t expected_storage_blob[quantized_blob_bytes_count] = {0};
+    ComputeSQ8Quantization(original_blob, dim, expected_storage_blob);
+
+    // === Query blob expected values ===
+    // Query layout: | query_values[dim] | y_sum_squares (for L2) |
+    constexpr size_t query_blob_bytes_count = (dim + 1) * sizeof(float);
+    // Compute expected sum of squares for L2: 1² + 2² + 3² + 4² + 5² + 6² = 91
+    float expected_query_sum_squares = 0;
+    for (size_t i = 0; i < dim; ++i) {
+        expected_query_sum_squares += original_blob[i] * original_blob[i];
+    }
 
     auto quant_preprocessor =
         new (allocator) QuantPreprocessor<float, VecSimMetric_L2>(allocator, dim);
@@ -1009,19 +1018,27 @@ TEST(PreprocessorsTest, QuantizationTest) {
         MultiPreprocessorsContainer<float, n_preprocessors>(allocator, alignment);
     multiPPContainer.addPreprocessor(quant_preprocessor);
 
-    // Test preprocess
+    // Test preprocess (both storage and query)
     {
         ProcessedBlobs processed_blobs =
             multiPPContainer.preprocess(original_blob, original_blob_size);
         const void *storage_blob = processed_blobs.getStorageBlob();
         const void *query_blob = processed_blobs.getQueryBlob();
-        // blobs should NOT point to the same memory slot
+
+        // Verify storage and query blobs are separate
         ASSERT_NE(storage_blob, nullptr);
+        ASSERT_NE(query_blob, nullptr);
         ASSERT_NE(storage_blob, query_blob);
 
+        // Verify storage blob content
         EXPECT_NO_FATAL_FAILURE(CompareVectors<uint8_t>(static_cast<const uint8_t *>(storage_blob),
-                                                        expected_processed_blob,
+                                                        expected_storage_blob,
                                                         quantized_blob_bytes_count));
+
+        // Verify query blob content
+        const float *query_floats = static_cast<const float *>(query_blob);
+        EXPECT_NO_FATAL_FAILURE(CompareVectors<float>(query_floats, original_blob, dim));
+        ASSERT_FLOAT_EQ(query_floats[dim], expected_query_sum_squares);
     }
 
     // Test preprocessForStorage
@@ -1032,17 +1049,19 @@ TEST(PreprocessorsTest, QuantizationTest) {
         ASSERT_NE(storage_blob.get(), nullptr);
         EXPECT_NO_FATAL_FAILURE(
             CompareVectors<uint8_t>(static_cast<const uint8_t *>(storage_blob.get()),
-                                    expected_processed_blob, quantized_blob_bytes_count));
+                                    expected_storage_blob, quantized_blob_bytes_count));
     }
 
-    // Test preprocessQuery (content should not be changed, only reallocated to the required
-    // alignment)
+    // Test preprocessQuery (query values + precomputed sum_squares for L2)
     {
         auto query_blob = multiPPContainer.preprocessQuery(original_blob, original_blob_size);
         ASSERT_NE(query_blob.get(), nullptr);
-        // Verify content is unchanged (original floats, not quantized)
-        EXPECT_NO_FATAL_FAILURE(CompareVectors<float>(static_cast<const float *>(query_blob.get()),
-                                                      original_blob, dim));
+
+        // Verify query blob content: original floats followed by sum_squares
+        const float *query_floats = static_cast<const float *>(query_blob.get());
+        EXPECT_NO_FATAL_FAILURE(CompareVectors<float>(query_floats, original_blob, dim));
+        ASSERT_FLOAT_EQ(query_floats[dim], expected_query_sum_squares);
+
         // Check address is aligned
         unsigned char address_alignment = (uintptr_t)(query_blob.get()) % alignment;
         ASSERT_EQ(address_alignment, 0) << "expected alignment " << alignment;
@@ -1058,7 +1077,7 @@ TEST(PreprocessorsTest, QuantizationTest) {
         multiPPContainer.preprocessStorageInPlace(buffer, original_blob_size);
 
         EXPECT_NO_FATAL_FAILURE(CompareVectors<uint8_t>(reinterpret_cast<const uint8_t *>(buffer),
-                                                        expected_processed_blob,
+                                                        expected_storage_blob,
                                                         quantized_blob_bytes_count));
 #if !defined(NDEBUG)
         EXPECT_EXIT(
@@ -1068,28 +1087,6 @@ TEST(PreprocessorsTest, QuantizationTest) {
     }
 }
 
-// Test preprocessQuery with alignment 0 - no copy, same memory address
-TEST(PreprocessorsTest, QuantizationTestPreprocessQueryNoAlignment) {
-    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-    constexpr size_t n_preprocessors = 1;
-    constexpr size_t no_alignment = 0;
-    constexpr size_t dim = 5;
-    constexpr size_t original_blob_size = dim * sizeof(float);
-    float original_blob[dim] = {1, 2, 3, 4, 5};
-
-    auto quant_preprocessor =
-        new (allocator) QuantPreprocessor<float, VecSimMetric_L2>(allocator, dim);
-    auto multiPPContainer =
-        MultiPreprocessorsContainer<float, n_preprocessors>(allocator, no_alignment);
-    multiPPContainer.addPreprocessor(quant_preprocessor);
-
-    auto query_blob = multiPPContainer.preprocessQuery(original_blob, original_blob_size);
-    ASSERT_EQ(query_blob.get(), original_blob); // same memory address
-    // Verify content is unchanged
-    EXPECT_NO_FATAL_FAILURE(
-        CompareVectors<float>(static_cast<const float *>(query_blob.get()), original_blob, dim));
-}
-
 // Test edge case where all entries are equal
 TEST(PreprocessorsTest, QuantizationTestAllEntriesEqual) {
     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
@@ -1134,6 +1131,7 @@ TEST(PreprocessorsTest, QuantizationTestAllEntriesEqual) {
     }
 
     allocator->free_allocation(storage_blob);
+    allocator->free_allocation(query_blob);
     delete quant_preprocessor;
 }
 
@@ -1149,22 +1147,47 @@ class QuantPreprocessorMetricTest : public testing::TestWithParam<VecSimMetric>
 
     void SetUp() override { allocator = VecSimAllocator::newVecsimAllocator(); }
 
-    // Helper to get expected storage size based on metric
+    // === Storage blob helpers ===
+
+    // Storage layout: | quantized_values[dim] | min | delta | sum | (sum_squares for L2) |
+    // L2: dim bytes + 4 floats (min, delta, sum, sum_squares)
+    // IP/Cosine: dim bytes + 3 floats (min, delta, sum)
     static size_t getExpectedStorageSize(VecSimMetric metric) {
-        // L2: dim + 4 floats (min, delta, sum, sum_squares)
-        // IP/Cosine: dim + 3 floats (min, delta, sum)
         size_t extra_floats = (metric == VecSimMetric_L2) ? 4 : 3;
         return dim * sizeof(uint8_t) + extra_floats * sizeof(float);
     }
 
+    // === Query blob helpers ===
+
+    // Query layout: | query_values[dim] | y_sum (IP/Cosine) OR y_sum_squares (L2) |
+    // All metrics: (dim + 1) floats
+    static constexpr size_t getExpectedQuerySize() { return (dim + 1) * sizeof(float); }
+
+    // Compute expected precomputed value for query blob based on metric
+    template <VecSimMetric Metric>
+    float getExpectedQueryPrecomputedValue() {
+        float sum = 0;
+        for (size_t i = 0; i < dim; ++i) {
+            if constexpr (Metric == VecSimMetric_L2) {
+                // sum of squares: 1² + 2² + 3² + 4² + 5² = 55
+                sum += original_blob[i] * original_blob[i];
+            } else {
+                // sum: 1 + 2 + 3 + 4 + 5 = 15
+                sum += original_blob[i];
+            }
+        }
+        return sum;
+    }
+
     // Helper to run quantization test for a specific metric
     template <VecSimMetric Metric>
     void runQuantizationTest() {
         size_t expected_storage_size = getExpectedStorageSize(Metric);
+        size_t expected_query_size = getExpectedQuerySize();
 
         auto quant_preprocessor = new (allocator) QuantPreprocessor<float, Metric>(allocator, dim);
 
-        // Test preprocess
+        // Test preprocess (both storage and query)
         {
             void *storage_blob = nullptr;
             void *query_blob = nullptr;
@@ -1174,25 +1197,36 @@ class QuantPreprocessorMetricTest : public testing::TestWithParam<VecSimMetric>
             quant_preprocessor->preprocess(original_blob, storage_blob, query_blob,
                                            storage_blob_size, query_blob_size, alignment);
 
+            // Verify storage blob
             ASSERT_NE(storage_blob, nullptr);
             ASSERT_EQ(storage_blob_size, expected_storage_size);
-            ASSERT_EQ(query_blob_size, original_blob_size);
 
-            // Compute expected quantization using utility function
-            // Allocate buffer large enough for L2 (4 floats), even for non-L2 metrics
+            // Verify query blob
+            ASSERT_NE(query_blob, nullptr);
+            ASSERT_EQ(query_blob_size, expected_query_size);
+
+            // === Verify storage blob content ===
             constexpr size_t max_storage_size = dim * sizeof(uint8_t) + 4 * sizeof(float);
-            uint8_t expected_blob[max_storage_size];
-            ComputeSQ8Quantization(original_blob, dim, expected_blob);
+            uint8_t expected_storage_blob[max_storage_size];
+            ComputeSQ8Quantization(original_blob, dim, expected_storage_blob);
 
-            // Compare quantized values and metadata
             // For non-L2 metrics, compare only dim + 3 floats (excluding sum_squares)
             size_t compare_size = (Metric == VecSimMetric_L2)
                                       ? expected_storage_size
                                       : dim * sizeof(uint8_t) + 3 * sizeof(float);
             EXPECT_NO_FATAL_FAILURE(CompareVectors<uint8_t>(
-                static_cast<const uint8_t *>(storage_blob), expected_blob, compare_size));
+                static_cast<const uint8_t *>(storage_blob), expected_storage_blob, compare_size));
+
+            // === Verify query blob content ===
+            const float *query_floats = static_cast<const float *>(query_blob);
+            EXPECT_NO_FATAL_FAILURE(CompareVectors<float>(query_floats, original_blob, dim));
+
+            // Verify precomputed value (sum for IP/Cosine, sum_squares for L2)
+            float expected_precomputed = getExpectedQueryPrecomputedValue<Metric>();
+            ASSERT_FLOAT_EQ(query_floats[dim], expected_precomputed);
 
             allocator->free_allocation(storage_blob);
+            allocator->free_allocation(query_blob);
         }
 
         // Test preprocessForStorage
@@ -1206,15 +1240,25 @@ class QuantPreprocessorMetricTest : public testing::TestWithParam<VecSimMetric>
             allocator->free_allocation(blob);
         }
 
-        // Test preprocessQuery - should be no-op
+        // Test preprocessQuery
         {
             void *blob = nullptr;
             size_t blob_size = original_blob_size;
 
             quant_preprocessor->preprocessQuery(original_blob, blob, blob_size, alignment);
 
-            ASSERT_EQ(blob_size, original_blob_size);
-            ASSERT_EQ(blob, nullptr);
+            ASSERT_NE(blob, nullptr);
+            ASSERT_EQ(blob_size, expected_query_size);
+
+            // Verify query blob content: original values followed by precomputed value
+            const float *query_floats = static_cast<const float *>(blob);
+            EXPECT_NO_FATAL_FAILURE(CompareVectors<float>(query_floats, original_blob, dim));
+
+            // Verify precomputed value (sum for IP/Cosine, sum_squares for L2)
+            float expected_precomputed = getExpectedQueryPrecomputedValue<Metric>();
+            ASSERT_FLOAT_EQ(query_floats[dim], expected_precomputed);
+
+            allocator->free_allocation(blob);
         }
 
         delete quant_preprocessor;