Skip to content

Commit d7ca611

Browse files
committed
Mirror upstream elastic#133945 as single snapshot commit for AI review
BASE=3c264cff967750ff7cb581defa42b0d69bb31fc6 HEAD=0e24fc739f9daa5b003d20fb0fffaf197449db67 Branch=main
1 parent 3c264cf commit d7ca611

File tree

2 files changed

+14
-9
lines changed

2 files changed

+14
-9
lines changed

server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,6 @@
4040
*/
4141
public class DefaultIVFVectorsReader extends IVFVectorsReader implements OffHeapStats {
4242

43-
// The percentage of centroids that are scored to keep recall
44-
public static final double CENTROID_SAMPLING_PERCENTAGE = 0.2;
45-
4643
public DefaultIVFVectorsReader(SegmentReadState state, FlatVectorsReader rawVectorsReader) throws IOException {
4744
super(state, rawVectorsReader);
4845
}
@@ -89,7 +86,8 @@ CentroidIterator getCentroidIterator(
8986
int numCentroids,
9087
IndexInput centroids,
9188
float[] targetQuery,
92-
IndexInput postingListSlice
89+
IndexInput postingListSlice,
90+
float visitRatio
9391
) throws IOException {
9492
final FieldEntry fieldEntry = fields.get(fieldInfo.number);
9593
final float globalCentroidDp = fieldEntry.globalCentroidDp();
@@ -112,8 +110,11 @@ CentroidIterator getCentroidIterator(
112110
final ES92Int7VectorsScorer scorer = ESVectorUtil.getES92Int7VectorsScorer(centroids, fieldInfo.getVectorDimension());
113111
centroids.seek(0L);
114112
int numParents = centroids.readVInt();
113+
115114
CentroidIterator centroidIterator;
116115
if (numParents > 0) {
116+
// equivalent to (float) centroidsPerParentCluster / 2
117+
float centroidOversampling = (float) fieldEntry.numCentroids() / (2 * numParents);
117118
centroidIterator = getCentroidIteratorWithParents(
118119
fieldInfo,
119120
centroids,
@@ -122,7 +123,8 @@ CentroidIterator getCentroidIterator(
122123
scorer,
123124
quantized,
124125
queryParams,
125-
globalCentroidDp
126+
globalCentroidDp,
127+
visitRatio * centroidOversampling
126128
);
127129
} else {
128130
centroidIterator = getCentroidIteratorNoParent(
@@ -185,13 +187,14 @@ private static CentroidIterator getCentroidIteratorWithParents(
185187
ES92Int7VectorsScorer scorer,
186188
byte[] quantizeQuery,
187189
OptimizedScalarQuantizer.QuantizationResult queryParams,
188-
float globalCentroidDp
190+
float globalCentroidDp,
191+
float centroidRatio
189192
) throws IOException {
190193
// build the three queues we are going to use
191194
final NeighborQueue parentsQueue = new NeighborQueue(numParents, true);
192195
final int maxChildrenSize = centroids.readVInt();
193196
final NeighborQueue currentParentQueue = new NeighborQueue(maxChildrenSize, true);
194-
final int bufferSize = (int) Math.max(numCentroids * CENTROID_SAMPLING_PERCENTAGE, 1);
197+
final int bufferSize = (int) Math.min(Math.max(centroidRatio * numCentroids, 1), numCentroids);
195198
final NeighborQueue neighborQueue = new NeighborQueue(bufferSize, true);
196199
// score the parents
197200
final float[] scores = new float[ES92Int7VectorsScorer.BULK_SIZE];

server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsReader.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,8 @@ abstract CentroidIterator getCentroidIterator(
9090
int numCentroids,
9191
IndexInput centroids,
9292
float[] target,
93-
IndexInput postingListSlice
93+
IndexInput postingListSlice,
94+
float visitRatio
9495
) throws IOException;
9596

9697
private static IndexInput openDataInput(
@@ -252,7 +253,8 @@ public final void search(String field, float[] target, KnnCollector knnCollector
252253
entry.numCentroids,
253254
entry.centroidSlice(ivfCentroids),
254255
target,
255-
postListSlice
256+
postListSlice,
257+
visitRatio
256258
);
257259
PostingVisitor scorer = getPostingVisitor(fieldInfo, postListSlice, target, acceptDocs);
258260
long expectedDocs = 0;

0 commit comments

Comments
 (0)