Skip to content

Commit 8beb3c6

Browse files
authored
Adds new simplified rescorer API to [Float|Byte]VectorValues (#15176)
Rescoring against a vector field should be simple and allow for lower level bulk scoring optimizations (e.g. scoring floating point values off heap, possibly bulk prefetching, etc.). However, the current API always returns a scorer against the fastest fidelity vectors, not the most accurate. This commit adds a new method to the vector-value classes that is specifically a `rescorer`, that allows scoring optimizations (including bulk scoring when possible) to take place.
1 parent 9e7381c commit 8beb3c6

File tree

5 files changed

+65
-3
lines changed

5 files changed

+65
-3
lines changed

lucene/core/src/java/org/apache/lucene/codecs/lucene102/Lucene102BinaryQuantizedVectorsReader.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,11 @@ public VectorScorer scorer(float[] query) throws IOException {
431431
return quantizedVectorValues.scorer(query);
432432
}
433433

434+
@Override
435+
public VectorScorer rescorer(float[] query) throws IOException {
436+
return rawVectorValues.rescorer(query);
437+
}
438+
434439
BinarizedByteVectorValues getQuantizedVectorValues() throws IOException {
435440
return quantizedVectorValues;
436441
}

lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsReader.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,11 @@ public VectorScorer scorer(float[] query) throws IOException {
459459
return quantizedVectorValues.scorer(query);
460460
}
461461

462+
@Override
463+
public VectorScorer rescorer(float[] query) throws IOException {
464+
return rawVectorValues.rescorer(query);
465+
}
466+
462467
@Override
463468
public DocIndexIterator iterator() {
464469
return rawVectorValues.iterator();

lucene/core/src/java/org/apache/lucene/index/ByteVectorValues.java

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,8 @@ public static void checkField(LeafReader in, String field) {
6565
}
6666

6767
/**
68-
* Return a {@link VectorScorer} for the given query vector.
68+
* Return a {@link VectorScorer} for the given query vector. When the underlying format quantizes
69+
* the vectors, this will return a {@link VectorScorer} that scores against the quantized vectors.
6970
*
7071
* @param query the query vector
7172
* @return a {@link VectorScorer} instance or null
@@ -74,6 +75,23 @@ public VectorScorer scorer(byte[] query) throws IOException {
7475
throw new UnsupportedOperationException();
7576
}
7677

78+
/**
79+
* Rescore using the given query vector and the current {@link ByteVectorValues}. This is unique
80+
* from scorer() in that it is explicitly for rescoring an existing set of hits and thus will
81+
* often utilize the highest fidelity scoring algorithm available. This is useful when the initial
82+
* search used a quantized index or an approximate scoring algorithm, and now we want to rescore
83+
* the hits using the full fidelity vectors. The default implementation is to call {@link
84+
* #scorer(byte[])} assuming that the scorer is already the highest fidelity implementation
85+
* available.
86+
*
87+
* @param target the query vector
88+
* @return a {@link VectorScorer} instance or null
89+
* @throws IOException if an I/O error occurs
90+
*/
91+
public VectorScorer rescorer(byte[] target) throws IOException {
92+
return scorer(target);
93+
}
94+
7795
@Override
7896
public VectorEncoding getEncoding() {
7997
return VectorEncoding.BYTE;

lucene/core/src/java/org/apache/lucene/index/FloatVectorValues.java

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,8 @@ public static void checkField(LeafReader in, String field) {
6666

6767
/**
6868
* Return a {@link VectorScorer} for the given query vector and the current {@link
69-
* FloatVectorValues}.
69+
* FloatVectorValues}. When the underlying format quantizes the vectors, this will return a {@link
70+
* VectorScorer} that scores against the quantized vectors.
7071
*
7172
* @param target the query vector
7273
* @return a {@link VectorScorer} instance or null
@@ -75,6 +76,23 @@ public VectorScorer scorer(float[] target) throws IOException {
7576
throw new UnsupportedOperationException();
7677
}
7778

79+
/**
80+
* Rescore using the given query vector and the current {@link FloatVectorValues}. This is unique
81+
* from scorer() in that it is explicitly for rescoring an existing set of hits and thus will
82+
* often utilize the highest fidelity scoring algorithm available. This is useful when the initial
83+
* search used a quantized index or an approximate search algorithm, and now we want to rescore
84+
* the hits using the full fidelity vectors. The default implementation is to call {@link
85+
* #scorer(float[])} assuming that the scorer is already the highest fidelity implementation
86+
* available.
87+
*
88+
* @param target the query vector
89+
* @return a {@link VectorScorer} instance or null
90+
* @throws IOException if an I/O error occurs
91+
*/
92+
public VectorScorer rescorer(float[] target) throws IOException {
93+
return scorer(target);
94+
}
95+
7896
@Override
7997
public VectorEncoding getEncoding() {
8098
return VectorEncoding.FLOAT32;

lucene/core/src/java/org/apache/lucene/search/FullPrecisionFloatVectorSimilarityValuesSource.java

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,23 @@ public DoubleValues getValues(LeafReaderContext ctx, DoubleValues scores) throws
8686
}
8787

8888
if (vectorSimilarityFunction == null) {
89-
this.vectorSimilarityFunction = fi.getVectorSimilarityFunction();
89+
VectorScorer scorer = vectorValues.rescorer(queryVector);
90+
if (scorer == null) {
91+
return DoubleValues.EMPTY;
92+
}
93+
DocIdSetIterator iterator = scorer.iterator();
94+
return new DoubleValues() {
95+
@Override
96+
public double doubleValue() throws IOException {
97+
return scorer.score();
98+
}
99+
100+
@Override
101+
public boolean advanceExact(int doc) throws IOException {
102+
return doc >= iterator.docID()
103+
&& (iterator.docID() == doc || iterator.advance(doc) == doc);
104+
}
105+
};
90106
}
91107
final KnnVectorValues.DocIndexIterator iterator = vectorValues.iterator();
92108
return new DoubleValues() {

0 commit comments

Comments
 (0)