Skip to content

Commit 3890f4f

Browse files
committed
Make centroid oversampling depend on the visit ratio
1 parent 7f4ed29 commit 3890f4f

File tree

2 files changed

+13
-8
lines changed

2 files changed

+13
-8
lines changed

server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsReader.java

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@
4040
*/
4141
public class DefaultIVFVectorsReader extends IVFVectorsReader implements OffHeapStats {
4242

43-
// The percentage of centroids that are scored to keep recall
44-
public static final double CENTROID_SAMPLING_PERCENTAGE = 0.2;
43+
// How many extra centroids we need to collect for each visited centroid for hierarchical centroids.
44+
public static final float CENTROID_OVERSAMPLING = 5.0f;
4545

4646
public DefaultIVFVectorsReader(SegmentReadState state, FlatVectorsReader rawVectorsReader) throws IOException {
4747
super(state, rawVectorsReader);
@@ -89,7 +89,8 @@ CentroidIterator getCentroidIterator(
8989
int numCentroids,
9090
IndexInput centroids,
9191
float[] targetQuery,
92-
IndexInput postingListSlice
92+
IndexInput postingListSlice,
93+
float visitRatio
9394
) throws IOException {
9495
final FieldEntry fieldEntry = fields.get(fieldInfo.number);
9596
final float globalCentroidDp = fieldEntry.globalCentroidDp();
@@ -122,7 +123,8 @@ CentroidIterator getCentroidIterator(
122123
scorer,
123124
quantized,
124125
queryParams,
125-
globalCentroidDp
126+
globalCentroidDp,
127+
visitRatio
126128
);
127129
} else {
128130
centroidIterator = getCentroidIteratorNoParent(
@@ -185,13 +187,14 @@ private static CentroidIterator getCentroidIteratorWithParents(
185187
ES92Int7VectorsScorer scorer,
186188
byte[] quantizeQuery,
187189
OptimizedScalarQuantizer.QuantizationResult queryParams,
188-
float globalCentroidDp
190+
float globalCentroidDp,
191+
float visitRatio
189192
) throws IOException {
190193
// build the three queues we are going to use
191194
final NeighborQueue parentsQueue = new NeighborQueue(numParents, true);
192195
final int maxChildrenSize = centroids.readVInt();
193196
final NeighborQueue currentParentQueue = new NeighborQueue(maxChildrenSize, true);
194-
final int bufferSize = (int) Math.max(numCentroids * CENTROID_SAMPLING_PERCENTAGE, 1);
197+
final int bufferSize = (int) Math.min(Math.max(visitRatio * numCentroids * CENTROID_OVERSAMPLING, 1), numCentroids);
195198
final NeighborQueue neighborQueue = new NeighborQueue(bufferSize, true);
196199
// score the parents
197200
final float[] scores = new float[ES92Int7VectorsScorer.BULK_SIZE];

server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsReader.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,8 @@ abstract CentroidIterator getCentroidIterator(
9090
int numCentroids,
9191
IndexInput centroids,
9292
float[] target,
93-
IndexInput postingListSlice
93+
IndexInput postingListSlice,
94+
float visitRatio
9495
) throws IOException;
9596

9697
private static IndexInput openDataInput(
@@ -252,7 +253,8 @@ public final void search(String field, float[] target, KnnCollector knnCollector
252253
entry.numCentroids,
253254
entry.centroidSlice(ivfCentroids),
254255
target,
255-
postListSlice
256+
postListSlice,
257+
visitRatio
256258
);
257259
PostingVisitor scorer = getPostingVisitor(fieldInfo, postListSlice, target, acceptDocs);
258260
long expectedDocs = 0;

0 commit comments

Comments
 (0)