|
40 | 40 | */ |
41 | 41 | public class DefaultIVFVectorsReader extends IVFVectorsReader implements OffHeapStats { |
42 | 42 |
|
43 | | - // The percentage of centroids that are scored to keep recall |
44 | | - public static final double CENTROID_SAMPLING_PERCENTAGE = 0.2; |
| 43 | + // How many extra centroids we need to collect for each visited centroid for hierarchical centroids. |
| 44 | + public static final float CENTROID_OVERSAMPLING = 5.0f; |
45 | 45 |
|
46 | 46 | public DefaultIVFVectorsReader(SegmentReadState state, FlatVectorsReader rawVectorsReader) throws IOException { |
47 | 47 | super(state, rawVectorsReader); |
@@ -89,7 +89,8 @@ CentroidIterator getCentroidIterator( |
89 | 89 | int numCentroids, |
90 | 90 | IndexInput centroids, |
91 | 91 | float[] targetQuery, |
92 | | - IndexInput postingListSlice |
| 92 | + IndexInput postingListSlice, |
| 93 | + float visitRatio |
93 | 94 | ) throws IOException { |
94 | 95 | final FieldEntry fieldEntry = fields.get(fieldInfo.number); |
95 | 96 | final float globalCentroidDp = fieldEntry.globalCentroidDp(); |
@@ -122,7 +123,8 @@ CentroidIterator getCentroidIterator( |
122 | 123 | scorer, |
123 | 124 | quantized, |
124 | 125 | queryParams, |
125 | | - globalCentroidDp |
| 126 | + globalCentroidDp, |
| 127 | + visitRatio |
126 | 128 | ); |
127 | 129 | } else { |
128 | 130 | centroidIterator = getCentroidIteratorNoParent( |
@@ -185,13 +187,14 @@ private static CentroidIterator getCentroidIteratorWithParents( |
185 | 187 | ES92Int7VectorsScorer scorer, |
186 | 188 | byte[] quantizeQuery, |
187 | 189 | OptimizedScalarQuantizer.QuantizationResult queryParams, |
188 | | - float globalCentroidDp |
| 190 | + float globalCentroidDp, |
| 191 | + float visitRatio |
189 | 192 | ) throws IOException { |
190 | 193 | // build the three queues we are going to use |
191 | 194 | final NeighborQueue parentsQueue = new NeighborQueue(numParents, true); |
192 | 195 | final int maxChildrenSize = centroids.readVInt(); |
193 | 196 | final NeighborQueue currentParentQueue = new NeighborQueue(maxChildrenSize, true); |
194 | | - final int bufferSize = (int) Math.max(numCentroids * CENTROID_SAMPLING_PERCENTAGE, 1); |
| 197 | + final int bufferSize = (int) Math.min(Math.max(visitRatio * numCentroids * CENTROID_OVERSAMPLING, 1), numCentroids); |
195 | 198 | final NeighborQueue neighborQueue = new NeighborQueue(bufferSize, true); |
196 | 199 | // score the parents |
197 | 200 | final float[] scores = new float[ES92Int7VectorsScorer.BULK_SIZE]; |
|
0 commit comments