apache
diff --git a/‎lucene/CHANGES.txt‎
Lines changed: 5 additions & 0 deletions b/‎lucene/CHANGES.txt‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104HnswScalarQuantizedVectorsFormat.java‎
Lines changed: 1 addition & 0 deletions b/‎lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104HnswScalarQuantizedVectorsFormat.java‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorScorer.java‎
Lines changed: 80 additions & 8 deletions b/‎lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorScorer.java‎
Lines changed: 80 additions & 8 deletions
diff --git a/‎lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsFormat.java‎
Lines changed: 68 additions & 12 deletions b/‎lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsFormat.java‎
Lines changed: 68 additions & 12 deletions
diff --git a/‎lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsReader.java‎
Lines changed: 1 addition & 1 deletion b/‎lucene/core/src/java/org/apache/lucene/codecs/lucene104/Lucene104ScalarQuantizedVectorsReader.java‎
Lines changed: 1 addition & 1 deletion
@@ -156,6 +156,11 @@ New Features
   `Lucene104HnswScalarQuantizedVectorsFormat` replaces the now legacy `Lucene99HnswScalarQuantizedVectorsFormat`
    (Trevor McCulloch)
 
+ * GITHUB#15271: Extend `Lucene104ScalarQuantizedVectorsFormat` and `Lucene104HnswScalarQuantizedVectorsFormat` to
+   allow asymmetric quantization. The initially supported bits are single bit with 4 bit queries. This is a replacement
+   for the now legacy `Lucene102HnswBinaryQuantizedVectorsFormat` and `Lucene102BinaryQuantizedVectorsFormat`.
+    (Ben Trent)
+
 Improvements
 ---------------------
 * GITHUB#15148: Add support uint8 distance and allow 8 bit scalar quantization (Trevor McCulloch)
 
@@ -86,6 +86,7 @@ public Lucene104HnswScalarQuantizedVectorsFormat(int maxConn, int beamWidth) {
   /**
    * Constructs a format using the given graph construction parameters and scalar quantization.
    *
+   * @param encoding the quantization encoding used to encode the vectors
    * @param maxConn the maximum number of connections to a node in the HNSW graph
    * @param beamWidth the size of the queue maintained during graph construction.
    * @param numMergeWorkers number of workers (threads) that will be used when doing merge. If
 
@@ -64,10 +64,15 @@ public RandomVectorScorer getRandomVectorScorer(
     if (vectorValues instanceof QuantizedByteVectorValues qv) {
       checkDimensions(target.length, qv.dimension());
       OptimizedScalarQuantizer quantizer = qv.getQuantizer();
-      byte[] targetQuantized =
-          new byte
-              [OptimizedScalarQuantizer.discretize(
-                  target.length, qv.getScalarEncoding().getDimensionsPerByte())];
+      Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding scalarEncoding = qv.getScalarEncoding();
+      byte[] scratch = new byte[scalarEncoding.getDiscreteDimensions(qv.dimension())];
+      final byte[] targetQuantized;
+      if (scalarEncoding.isAsymmetric() == false) {
+        targetQuantized = scratch;
+      } else {
+        // This is asymmetric quantization, we will pack the vector
+        targetQuantized = new byte[scalarEncoding.getQueryPackedLength(scratch.length)];
+      }
       // We make a copy as the quantization process mutates the input
       float[] copy = ArrayUtil.copyOfSubArray(target, 0, target.length);
       if (similarityFunction == COSINE) {
@@ -76,7 +81,12 @@ public RandomVectorScorer getRandomVectorScorer(
       target = copy;
       var targetCorrectiveTerms =
           quantizer.scalarQuantize(
-              target, targetQuantized, qv.getScalarEncoding().getBits(), qv.getCentroid());
+              target, scratch, scalarEncoding.getQueryBits(), qv.getCentroid());
+      // for single bit query nibble, we need to transpose the nibbles for fast scoring comparisons
+      if (scalarEncoding
+          == Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SINGLE_BIT_QUERY_NIBBLE) {
+        OptimizedScalarQuantizer.transposeHalfByte(scratch, targetQuantized);
+      }
       return new RandomVectorScorer.AbstractRandomVectorScorer(qv) {
         @Override
         public float score(int node) throws IOException {
@@ -96,13 +106,68 @@ public RandomVectorScorer getRandomVectorScorer(
     return nonQuantizedDelegate.getRandomVectorScorer(similarityFunction, vectorValues, target);
   }
 
+  RandomVectorScorerSupplier getRandomVectorScorerSupplier(
+      VectorSimilarityFunction similarityFunction,
+      QuantizedByteVectorValues scoringVectors,
+      QuantizedByteVectorValues targetVectors) {
+    return new AsymmetricQuantizedRandomVectorScorerSupplier(
+        scoringVectors, targetVectors, similarityFunction);
+  }
+
   @Override
   public String toString() {
     return "Lucene104ScalarQuantizedVectorScorer(nonQuantizedDelegate="
         + nonQuantizedDelegate
         + ")";
   }
 
+  static class AsymmetricQuantizedRandomVectorScorerSupplier implements RandomVectorScorerSupplier {
+    private final QuantizedByteVectorValues queryVectors;
+    private final QuantizedByteVectorValues targetVectors;
+    private final VectorSimilarityFunction similarityFunction;
+
+    AsymmetricQuantizedRandomVectorScorerSupplier(
+        QuantizedByteVectorValues queryVectors,
+        QuantizedByteVectorValues targetVectors,
+        VectorSimilarityFunction similarityFunction) {
+      assert targetVectors.getScalarEncoding().isAsymmetric();
+      this.queryVectors = queryVectors;
+      this.targetVectors = targetVectors;
+      this.similarityFunction = similarityFunction;
+    }
+
+    @Override
+    public UpdateableRandomVectorScorer scorer() throws IOException {
+      final QuantizedByteVectorValues targetVectors = this.targetVectors.copy();
+      final QuantizedByteVectorValues queryVectors = this.queryVectors.copy();
+      return new UpdateableRandomVectorScorer.AbstractUpdateableRandomVectorScorer(targetVectors) {
+        private OptimizedScalarQuantizer.QuantizationResult queryCorrections = null;
+        private byte[] vector = null;
+
+        @Override
+        public void setScoringOrdinal(int node) throws IOException {
+          vector = queryVectors.vectorValue(node);
+          queryCorrections = queryVectors.getCorrectiveTerms(node);
+        }
+
+        @Override
+        public float score(int node) throws IOException {
+          if (vector == null || queryCorrections == null) {
+            throw new IllegalStateException("setScoringOrdinal was not called");
+          }
+
+          return quantizedScore(vector, queryCorrections, targetVectors, node, similarityFunction);
+        }
+      };
+    }
+
+    @Override
+    public RandomVectorScorerSupplier copy() throws IOException {
+      return new AsymmetricQuantizedRandomVectorScorerSupplier(
+          queryVectors.copy(), targetVectors.copy(), similarityFunction);
+    }
+  }
+
   private static final class ScalarQuantizedVectorScorerSupplier
       implements RandomVectorScorerSupplier {
     private final QuantizedByteVectorValues targetValues;
@@ -111,6 +176,7 @@ private static final class ScalarQuantizedVectorScorerSupplier
 
     public ScalarQuantizedVectorScorerSupplier(
         QuantizedByteVectorValues values, VectorSimilarityFunction similarity) throws IOException {
+      assert values.getScalarEncoding().isAsymmetric() == false;
       this.targetValues = values.copy();
       this.values = values;
       this.similarity = similarity;
@@ -131,14 +197,17 @@ public float score(int node) throws IOException {
         public void setScoringOrdinal(int node) throws IOException {
           var rawTargetVector = targetValues.vectorValue(node);
           switch (values.getScalarEncoding()) {
-            case UNSIGNED_BYTE -> targetVector = rawTargetVector;
-            case SEVEN_BIT -> targetVector = rawTargetVector;
+            case UNSIGNED_BYTE, SEVEN_BIT -> targetVector = rawTargetVector;
             case PACKED_NIBBLE -> {
               if (targetVector == null) {
                 targetVector = new byte[OptimizedScalarQuantizer.discretize(values.dimension(), 2)];
               }
               OffHeapScalarQuantizedVectorValues.unpackNibbles(rawTargetVector, targetVector);
             }
+            case SINGLE_BIT_QUERY_NIBBLE -> {
+              throw new IllegalStateException(
+                  "SINGLE_BIT_QUERY_NIBBLE encoding is not supported for symmetric quantization");
+            }
           }
           targetCorrectiveTerms = targetValues.getCorrectiveTerms(node);
         }
@@ -177,16 +246,19 @@ private static float quantizedScore(
           case UNSIGNED_BYTE -> VectorUtil.uint8DotProduct(quantizedQuery, quantizedDoc);
           case SEVEN_BIT -> VectorUtil.dotProduct(quantizedQuery, quantizedDoc);
           case PACKED_NIBBLE -> VectorUtil.int4DotProductSinglePacked(quantizedQuery, quantizedDoc);
+          case SINGLE_BIT_QUERY_NIBBLE ->
+              VectorUtil.int4BitDotProduct(quantizedQuery, quantizedDoc);
         };
     OptimizedScalarQuantizer.QuantizationResult indexCorrections =
         targetVectors.getCorrectiveTerms(targetOrd);
+    float queryScale = SCALE_LUT[scalarEncoding.getQueryBits() - 1];
     float scale = SCALE_LUT[scalarEncoding.getBits() - 1];
     float x1 = indexCorrections.quantizedComponentSum();
     float ax = indexCorrections.lowerInterval();
     // Here we must scale according to the bits
     float lx = (indexCorrections.upperInterval() - ax) * scale;
     float ay = queryCorrections.lowerInterval();
-    float ly = (queryCorrections.upperInterval() - ay) * scale;
+    float ly = (queryCorrections.upperInterval() - ay) * queryScale;
     float y1 = queryCorrections.quantizedComponentSum();
     float score =
         ax * ay * targetVectors.dimension() + ay * lx * x1 + ax * ly * y1 + lx * ly * qcDist;
 
@@ -118,17 +118,25 @@ public class Lucene104ScalarQuantizedVectorsFormat extends FlatVectorsFormat {
    */
   public enum ScalarEncoding {
     /** Each dimension is quantized to 8 bits and treated as an unsigned value. */
-    UNSIGNED_BYTE(0, (byte) 8, 1),
+    UNSIGNED_BYTE(0, (byte) 8, 8),
     /** Each dimension is quantized to 4 bits two values are packed into each output byte. */
-    PACKED_NIBBLE(1, (byte) 4, 2),
+    PACKED_NIBBLE(1, (byte) 4, 4),
     /**
      * Each dimension is quantized to 7 bits and treated as a signed value.
      *
      * <p>This is intended for backwards compatibility with older iterations of scalar quantization.
      * This setting will produce an index the same size as {@link #UNSIGNED_BYTE} but will produce
      * less accurate vector comparisons.
      */
-    SEVEN_BIT(2, (byte) 7, 1);
+    SEVEN_BIT(2, (byte) 7, 8),
+    /**
+     * Each dimension is quantized to a single bit and packed into bytes. During query time, the
+     * query vector is quantized to 4 bits per dimension.
+     *
+     * <p>This is the most space efficient encoding, and will produce an index 8x smaller than
+     * {@link #UNSIGNED_BYTE}. However, this comes at the cost of accuracy.
+     */
+    SINGLE_BIT_QUERY_NIBBLE(3, (byte) 1, 1, (byte) 4, 4);
 
     public static ScalarEncoding fromNumBits(int bits) {
       for (ScalarEncoding encoding : values()) {
@@ -142,13 +150,27 @@ public static ScalarEncoding fromNumBits(int bits) {
     /** The number used to identify this encoding on the wire, rather than relying on ordinal. */
     private final int wireNumber;
 
-    private final byte bits;
-    private final int dimsPerByte;
+    private final byte bits, queryBits;
+    private final int bitsPerDim, queryBitsPerDim;
 
-    ScalarEncoding(int wireNumber, byte bits, int dimsPerByte) {
+    ScalarEncoding(int wireNumber, byte bits, int bitsPerDim) {
       this.wireNumber = wireNumber;
       this.bits = bits;
-      this.dimsPerByte = dimsPerByte;
+      this.queryBits = bits;
+      this.bitsPerDim = bitsPerDim;
+      this.queryBitsPerDim = bitsPerDim;
+    }
+
+    ScalarEncoding(int wireNumber, byte bits, int bitsPerDim, byte queryBits, int queryBitsPerDim) {
+      this.wireNumber = wireNumber;
+      this.bits = bits;
+      this.queryBits = queryBits;
+      this.bitsPerDim = bitsPerDim;
+      this.queryBitsPerDim = queryBitsPerDim;
+    }
+
+    boolean isAsymmetric() {
+      return bits != queryBits;
     }
 
     int getWireNumber() {
@@ -160,14 +182,48 @@ public byte getBits() {
       return bits;
     }
 
+    public byte getQueryBits() {
+      return queryBits;
+    }
+
+    /** Return the number of dimensions rounded up to fit into whole bytes. */
+    public int getDiscreteDimensions(int dimensions) {
+      if (queryBits == bits) {
+        int totalBits = dimensions * bitsPerDim;
+        return (totalBits + 7) / 8 * 8 / bitsPerDim;
+      }
+      int queryDiscretized = (dimensions * queryBitsPerDim + 7) / 8 * 8 / queryBitsPerDim;
+      int docDiscretized = (dimensions * bitsPerDim + 7) / 8 * 8 / bitsPerDim;
+      int maxDiscretized = Math.max(queryDiscretized, docDiscretized);
+      assert maxDiscretized % (8.0 / queryBitsPerDim) == 0
+          : "bad discretized=" + maxDiscretized + " for dim=" + dimensions;
+      assert maxDiscretized % (8.0 / bitsPerDim) == 0
+          : "bad discretized=" + maxDiscretized + " for dim=" + dimensions;
+      return maxDiscretized;
+    }
+
     /** Return the number of dimensions that can be packed into a single byte. */
-    public int getDimensionsPerByte() {
-      return this.dimsPerByte;
+    public int getDocBitsPerDim() {
+      return this.bitsPerDim;
+    }
+
+    public int getQueryBitsPerDim() {
+      return this.queryBitsPerDim;
     }
 
     /** Return the number of bytes required to store a packed vector of the given dimensions. */
-    public int getPackedLength(int dimensions) {
-      return (dimensions + this.dimsPerByte - 1) / this.dimsPerByte;
+    public int getDocPackedLength(int dimensions) {
+      int discretized = getDiscreteDimensions(dimensions);
+      // how many bytes do we need to store the quantized vector?
+      int totalBits = discretized * bitsPerDim;
+      return (totalBits + 7) / 8;
+    }
+
+    public int getQueryPackedLength(int dimensions) {
+      int discretized = getDiscreteDimensions(dimensions);
+      // how many bytes do we need to store the quantized vector?
+      int totalBits = discretized * queryBitsPerDim;
+      return (totalBits + 7) / 8;
     }
 
     /** Returns the encoding for the given wire number, or empty if unknown. */
@@ -186,7 +242,7 @@ public Lucene104ScalarQuantizedVectorsFormat() {
     this(ScalarEncoding.UNSIGNED_BYTE);
   }
 
-  /** Creates a new instance with the chosen encoding. */
+  /** Creates a new instance with the chosen quantization encoding. */
   public Lucene104ScalarQuantizedVectorsFormat(ScalarEncoding encoding) {
     super(NAME);
     this.encoding = encoding;
 
@@ -141,7 +141,7 @@ static void validateFieldEntry(FieldInfo info, FieldEntry fieldEntry) {
 
     long numQuantizedVectorBytes =
         Math.multiplyExact(
-            (fieldEntry.scalarEncoding.getPackedLength(dimension)
+            (fieldEntry.scalarEncoding.getDocPackedLength(dimension)
                 + (Float.BYTES * 3)
                 + Integer.BYTES),
             (long) fieldEntry.size);
Original file line number	Diff line number	Diff line change
`@@ -86,6 +86,7 @@ public Lucene104HnswScalarQuantizedVectorsFormat(int maxConn, int beamWidth) {`
`86`	`86`	`/**`
`87`	`87`	`* Constructs a format using the given graph construction parameters and scalar quantization.`
`88`	`88`	`*`
	`89`	`+ * @param encoding the quantization encoding used to encode the vectors`
`89`	`90`	`* @param maxConn the maximum number of connections to a node in the HNSW graph`
`90`	`91`	`* @param beamWidth the size of the queue maintained during graph construction.`
`91`	`92`	`* @param numMergeWorkers number of workers (threads) that will be used when doing merge. If`