try flattening the corrective terms into the node

mccullocht · mccullocht · commit 5e18d3a6c758 · 2025-10-01T14:39:58.000-07:00
diff --git a/lucene/core/src/java/org/apache/lucene/util/quantization/OptimizedScalarQuantizedVectorSimilarity.java b/lucene/core/src/java/org/apache/lucene/util/quantization/OptimizedScalarQuantizedVectorSimilarity.java
@@ -105,7 +105,8 @@ public float score(
     float ly = (queryCorrections.upperInterval() - ay) * queryScale;
     float y1 = queryCorrections.quantizedComponentSum();
     float score = ax * ay * dimensions + ay * lx * x1 + ax * ly * y1 + lx * ly * dotProduct;
-    // For euclidean, we need to invert the score and apply the additional correction, which is
+    // For euclidean, we need to invert the score and apply the additional
+    // correction, which is
     // assumed to be the squared l2norm of the centroid centered vectors.
     if (similarityFunction == EUCLIDEAN) {
       score =
@@ -114,8 +115,10 @@ public float score(
               - 2 * score;
       return Math.max(1 / (1f + score), 0);
     } else {
-      // For cosine and max inner product, we need to apply the additional correction, which is
-      // assumed to be the non-centered dot-product between the vector and the centroid
+      // For cosine and max inner product, we need to apply the additional correction,
+      // which is
+      // assumed to be the non-centered dot-product between the vector and the
+      // centroid
       score +=
           queryCorrections.additionalCorrection()
               + indexCorrections.additionalCorrection()
@@ -126,4 +129,52 @@ public float score(
       return Math.max((1f + score) / 2f, 0);
     }
   }
+
+  // XXX DO NOT MERGE duplication with above.
+  /**
+   * Computes the similarity score between a 'query' and an 'index' quantized vector, given the dot
+   * product of the two vectors and their corrective factors.
+   *
+   * @param dotProduct - dot product of the two quantized vectors.
+   * @param queryCorrections - corrective factors for vector 'y'.
+   * @param indexLowerInterval - corrective factors for vector 'x'.
+   * @param indexUpperInterval - corrective factors for vector 'x'.
+   * @param indexAdditionalCorrection - corrective factors for vector 'x'.
+   * @param indexQuantizedComponentSum - corrective factors for vector 'x'.
+   * @return - a similarity score value between 0 and 1; higher values are better.
+   */
+  public float score(
+      float dotProduct,
+      OptimizedScalarQuantizer.QuantizationResult queryCorrections,
+      float indexLowerInterval,
+      float indexUpperInterval,
+      float indexAdditionalCorrection,
+      int indexQuantizedComponentSum) {
+    float x1 = indexQuantizedComponentSum;
+    float ax = indexLowerInterval;
+    // Here we must scale according to the bits
+    float lx = (indexUpperInterval - ax) * indexScale;
+    float ay = queryCorrections.lowerInterval();
+    float ly = (queryCorrections.upperInterval() - ay) * queryScale;
+    float y1 = queryCorrections.quantizedComponentSum();
+    float score = ax * ay * dimensions + ay * lx * x1 + ax * ly * y1 + lx * ly * dotProduct;
+    // For euclidean, we need to invert the score and apply the additional
+    // correction, which is
+    // assumed to be the squared l2norm of the centroid centered vectors.
+    if (similarityFunction == EUCLIDEAN) {
+      score = queryCorrections.additionalCorrection() + indexAdditionalCorrection - 2 * score;
+      return Math.max(1 / (1f + score), 0);
+    } else {
+      // For cosine and max inner product, we need to apply the additional correction,
+      // which is
+      // assumed to be the non-centered dot-product between the vector and the
+      // centroid
+      score +=
+          queryCorrections.additionalCorrection() + indexAdditionalCorrection - centroidDotProduct;
+      if (similarityFunction == MAXIMUM_INNER_PRODUCT) {
+        return VectorUtil.scaleMaxInnerProductScore(score);
+      }
+      return Math.max((1f + score) / 2f, 0);
+    }
+  }
 }
diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java
@@ -168,7 +168,11 @@ OptimizedScalarQuantizer.QuantizationResult getCorrectiveTerms(int ord) throws I
     }
 
     record Node(
-        MemorySegment vector, OptimizedScalarQuantizer.QuantizationResult correctiveTerms) {}
+        MemorySegment vector,
+        float lowerInterval,
+        float upperInterval,
+        float additionalCorrection,
+        int componentSum) {}
 
     @SuppressWarnings("restricted")
     Node getNode(int ord) throws IOException {
@@ -182,14 +186,17 @@ Node getNode(int ord) throws IOException {
         input.readBytes(byteOffset, scratch, 0, nodeSize);
         vector = MemorySegment.ofArray(scratch);
       }
-      var correctiveTerms =
-          new OptimizedScalarQuantizer.QuantizationResult(
-              Float.intBitsToFloat(vector.get(INT_UNALIGNED_LE, vectorByteSize)),
-              Float.intBitsToFloat(vector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES)),
-              Float.intBitsToFloat(
-                  vector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES * 2)),
-              vector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES * 3));
-      return new Node(vector.reinterpret(vectorByteSize), correctiveTerms);
+      // XXX investigate reordering the vector so that corrective terms appear first.
+      // we're forced to read them immediately to avoid creating a second memory
+      // segment which is
+      // not cheap, so they might as well be read first to avoid additional memory
+      // latency.
+      return new Node(
+          vector.reinterpret(vectorByteSize),
+          Float.intBitsToFloat(vector.get(INT_UNALIGNED_LE, vectorByteSize)),
+          Float.intBitsToFloat(vector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES)),
+          Float.intBitsToFloat(vector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES * 2)),
+          vector.get(INT_UNALIGNED_LE, vectorByteSize + Integer.BYTES * 3));
     }
 
     OptimizedScalarQuantizedVectorSimilarity getSimilarity() {
@@ -242,7 +249,14 @@ public float score(int node) throws IOException {
       // Call getCorrectiveTerms() after computing dot product since corrective terms
       // bytes appear after the vector bytes, so this sequence of calls is more cache
       // friendly.
-      return getSimilarity().score(dotProduct, queryCorrectiveTerms, doc.correctiveTerms);
+      return getSimilarity()
+          .score(
+              dotProduct,
+              queryCorrectiveTerms,
+              doc.lowerInterval,
+              doc.upperInterval,
+              doc.additionalCorrection,
+              doc.componentSum);
     }
   }