4040 */
4141public class DefaultIVFVectorsReader extends IVFVectorsReader implements OffHeapStats {
4242
43- // The percentage of centroids that are scored to keep recall
44- public static final double CENTROID_SAMPLING_PERCENTAGE = 0.2 ;
45-
4643 public DefaultIVFVectorsReader (SegmentReadState state , FlatVectorsReader rawVectorsReader ) throws IOException {
4744 super (state , rawVectorsReader );
4845 }
@@ -89,7 +86,8 @@ CentroidIterator getCentroidIterator(
8986 int numCentroids ,
9087 IndexInput centroids ,
9188 float [] targetQuery ,
92- IndexInput postingListSlice
89+ IndexInput postingListSlice ,
90+ float visitRatio
9391 ) throws IOException {
9492 final FieldEntry fieldEntry = fields .get (fieldInfo .number );
9593 final float globalCentroidDp = fieldEntry .globalCentroidDp ();
@@ -112,8 +110,11 @@ CentroidIterator getCentroidIterator(
112110 final ES92Int7VectorsScorer scorer = ESVectorUtil .getES92Int7VectorsScorer (centroids , fieldInfo .getVectorDimension ());
113111 centroids .seek (0L );
114112 int numParents = centroids .readVInt ();
113+
115114 CentroidIterator centroidIterator ;
116115 if (numParents > 0 ) {
116+ // equivalent to (float) centroidsPerParentCluster / 2
117+ float centroidOversampling = (float ) fieldEntry .numCentroids () / (2 * numParents );
117118 centroidIterator = getCentroidIteratorWithParents (
118119 fieldInfo ,
119120 centroids ,
@@ -122,7 +123,8 @@ CentroidIterator getCentroidIterator(
122123 scorer ,
123124 quantized ,
124125 queryParams ,
125- globalCentroidDp
126+ globalCentroidDp ,
127+ visitRatio * centroidOversampling
126128 );
127129 } else {
128130 centroidIterator = getCentroidIteratorNoParent (
@@ -185,13 +187,14 @@ private static CentroidIterator getCentroidIteratorWithParents(
185187 ES92Int7VectorsScorer scorer ,
186188 byte [] quantizeQuery ,
187189 OptimizedScalarQuantizer .QuantizationResult queryParams ,
188- float globalCentroidDp
190+ float globalCentroidDp ,
191+ float centroidRatio
189192 ) throws IOException {
190193 // build the three queues we are going to use
191194 final NeighborQueue parentsQueue = new NeighborQueue (numParents , true );
192195 final int maxChildrenSize = centroids .readVInt ();
193196 final NeighborQueue currentParentQueue = new NeighborQueue (maxChildrenSize , true );
194- final int bufferSize = (int ) Math .max (numCentroids * CENTROID_SAMPLING_PERCENTAGE , 1 );
197+ final int bufferSize = (int ) Math .min ( Math . max (centroidRatio * numCentroids , 1 ), numCentroids );
195198 final NeighborQueue neighborQueue = new NeighborQueue (bufferSize , true );
196199 // score the parents
197200 final float [] scores = new float [ES92Int7VectorsScorer .BULK_SIZE ];
0 commit comments