cleanup

mccullocht · mccullocht · commit 955d083a7244 · 2025-10-01T14:39:58.000-07:00
diff --git a/lucene/core/src/java/org/apache/lucene/util/quantization/OptimizedScalarQuantizedVectorSimilarity.java b/lucene/core/src/java/org/apache/lucene/util/quantization/OptimizedScalarQuantizedVectorSimilarity.java
@@ -97,40 +97,15 @@ public float score(
       float dotProduct,
       OptimizedScalarQuantizer.QuantizationResult queryCorrections,
       OptimizedScalarQuantizer.QuantizationResult indexCorrections) {
-    float x1 = indexCorrections.quantizedComponentSum();
-    float ax = indexCorrections.lowerInterval();
-    // Here we must scale according to the bits
-    float lx = (indexCorrections.upperInterval() - ax) * indexScale;
-    float ay = queryCorrections.lowerInterval();
-    float ly = (queryCorrections.upperInterval() - ay) * queryScale;
-    float y1 = queryCorrections.quantizedComponentSum();
-    float score = ax * ay * dimensions + ay * lx * x1 + ax * ly * y1 + lx * ly * dotProduct;
-    // For euclidean, we need to invert the score and apply the additional
-    // correction, which is
-    // assumed to be the squared l2norm of the centroid centered vectors.
-    if (similarityFunction == EUCLIDEAN) {
-      score =
-          queryCorrections.additionalCorrection()
-              + indexCorrections.additionalCorrection()
-              - 2 * score;
-      return Math.max(1 / (1f + score), 0);
-    } else {
-      // For cosine and max inner product, we need to apply the additional correction,
-      // which is
-      // assumed to be the non-centered dot-product between the vector and the
-      // centroid
-      score +=
-          queryCorrections.additionalCorrection()
-              + indexCorrections.additionalCorrection()
-              - centroidDotProduct;
-      if (similarityFunction == MAXIMUM_INNER_PRODUCT) {
-        return VectorUtil.scaleMaxInnerProductScore(score);
-      }
-      return Math.max((1f + score) / 2f, 0);
-    }
+    return score(
+        dotProduct,
+        queryCorrections,
+        indexCorrections.lowerInterval(),
+        indexCorrections.upperInterval(),
+        indexCorrections.additionalCorrection(),
+        indexCorrections.quantizedComponentSum());
   }
 
-  // XXX DO NOT MERGE duplication with above.
   /**
    * Computes the similarity score between a 'query' and an 'index' quantized vector, given the dot
    * product of the two vectors and their corrective factors.
diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/Lucene104MemorySegmentScalarQuantizedVectorScorer.java
@@ -129,44 +129,6 @@ final void checkOrdinal(int ord) {
     private static final ValueLayout.OfInt INT_UNALIGNED_LE =
         JAVA_INT_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN);
 
-    // XXX I need to return something wraps the MemorySegment and can produce the
-    // corrective terms
-    // on demand. rep is probably (MemorySegment, MemorySegment) with a slice for
-    // the corrective terms.
-    @SuppressWarnings("restricted")
-    MemorySegment getVector(int ord) throws IOException {
-      checkOrdinal(ord);
-      long byteOffset = (long) ord * nodeSize;
-      MemorySegment vector = input.segmentSliceOrNull(byteOffset, vectorByteSize);
-      if (vector == null) {
-        if (scratch == null) {
-          scratch = new byte[nodeSize];
-        }
-        input.readBytes(byteOffset, scratch, 0, nodeSize);
-        vector = MemorySegment.ofArray(scratch).reinterpret(vectorByteSize);
-      }
-      return vector;
-    }
-
-    @SuppressWarnings("restricted")
-    OptimizedScalarQuantizer.QuantizationResult getCorrectiveTerms(int ord) throws IOException {
-      checkOrdinal(ord);
-      long byteOffset = (long) ord * nodeSize + vectorByteSize;
-      MemorySegment node = input.segmentSliceOrNull(byteOffset, CORRECTIVE_TERMS_SIZE);
-      if (node == null) {
-        if (scratch == null) {
-          scratch = new byte[nodeSize];
-        }
-        input.readBytes(byteOffset, scratch, 0, CORRECTIVE_TERMS_SIZE);
-        node = MemorySegment.ofArray(scratch).reinterpret(CORRECTIVE_TERMS_SIZE);
-      }
-      return new OptimizedScalarQuantizer.QuantizationResult(
-          Float.intBitsToFloat(node.get(INT_UNALIGNED_LE, 0)),
-          Float.intBitsToFloat(node.get(INT_UNALIGNED_LE, Integer.BYTES)),
-          Float.intBitsToFloat(node.get(INT_UNALIGNED_LE, Integer.BYTES * 2)),
-          node.get(INT_UNALIGNED_LE, Integer.BYTES * 3));
-    }
-
     record Node(
         MemorySegment vector,
         float lowerInterval,
@@ -188,9 +150,8 @@ Node getNode(int ord) throws IOException {
       }
       // XXX investigate reordering the vector so that corrective terms appear first.
       // we're forced to read them immediately to avoid creating a second memory
-      // segment which is
-      // not cheap, so they might as well be read first to avoid additional memory
-      // latency.
+      // segment which is not cheap, so they might as well be read first to avoid
+      // additional memory latency.
       return new Node(
           vector.reinterpret(vectorByteSize),
           Float.intBitsToFloat(vector.get(INT_UNALIGNED_LE, vectorByteSize)),
@@ -260,7 +221,7 @@ public float score(int node) throws IOException {
     }
   }
 
-  private record RandomVectorScorerSupplierImpl(
+  record RandomVectorScorerSupplierImpl(
       VectorSimilarityFunction similarityFunction,
       QuantizedByteVectorValues values,
       MemorySegmentAccessInput input)
@@ -293,23 +254,37 @@ private static class UpdateableRandomVectorScorerImpl extends RandomVectorScorer
     @Override
     public void setScoringOrdinal(int ord) throws IOException {
       checkOrdinal(ord);
-      query = getVector(ord);
-      queryCorrectiveTerms = getCorrectiveTerms(ord);
+      Node node = getNode(ord);
+      query = node.vector();
+      queryCorrectiveTerms =
+          new OptimizedScalarQuantizer.QuantizationResult(
+              node.lowerInterval(),
+              node.upperInterval(),
+              node.additionalCorrection(),
+              node.componentSum());
     }
 
     @Override
     public float score(int node) throws IOException {
-      MemorySegment doc = getVector(node);
+      Node doc = getNode(node);
       float dotProduct =
           switch (getScalarEncoding()) {
-            case UNSIGNED_BYTE -> PanamaVectorUtilSupport.uint8DotProduct(query, doc);
-            case SEVEN_BIT -> PanamaVectorUtilSupport.uint8DotProduct(query, doc);
-            case PACKED_NIBBLE -> PanamaVectorUtilSupport.int4DotProductBothPacked(query, doc);
+            case UNSIGNED_BYTE -> PanamaVectorUtilSupport.uint8DotProduct(query, doc.vector());
+            case SEVEN_BIT -> PanamaVectorUtilSupport.uint8DotProduct(query, doc.vector());
+            case PACKED_NIBBLE ->
+                PanamaVectorUtilSupport.int4DotProductBothPacked(query, doc.vector());
           };
       // Call getCorrectiveTerms() after computing dot product since corrective terms
-      // bytes appear
-      // after the vector bytes, so this sequence of calls is more cache friendly.
-      return getSimilarity().score(dotProduct, queryCorrectiveTerms, getCorrectiveTerms(node));
+      // bytes appear after the vector bytes, so this sequence of calls is more cache
+      // friendly.
+      return getSimilarity()
+          .score(
+              dotProduct,
+              queryCorrectiveTerms,
+              doc.lowerInterval(),
+              doc.upperInterval(),
+              doc.additionalCorrection(),
+              doc.componentSum());
     }
   }
 }