Skip to content

Commit ef9402c

Browse files
committed
scores not distances + added some diagnostics to be removed subsequently
1 parent 5910259 commit ef9402c

File tree

2 files changed

+36
-10
lines changed

2 files changed

+36
-10
lines changed

server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,9 @@ CentroidAssignments calculateAndWriteCentroids(
334334

335335
writeCentroidsAndPartitions(centroidPartitions, centroids, fieldInfo, globalCentroid, centroidOutput);
336336

337+
System.out.println("total parent centroids: " + centroidPartitions.size());
338+
System.out.println("total child centroids: " + centroids.length);
339+
337340
if (logger.isDebugEnabled()) {
338341
logger.debug("calculate centroids and assign vectors time ms: {}", (System.nanoTime() - nanoTime) / 1000000.0);
339342
logger.debug("final centroid count: {}", centroids.length);

server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsReader.java

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,10 @@ public final ByteVectorValues getByteVectorValues(String field) throws IOExcepti
221221
return rawVectorsReader.getByteVectorValues(field);
222222
}
223223

224+
// FIXME: remove the diagnostics
225+
int centroidsRead = 0;
226+
int ii = 0;
227+
224228
@Override
225229
public final void search(String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException {
226230
final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
@@ -292,29 +296,41 @@ public final void search(String field, float[] target, KnnCollector knnCollector
292296

293297
while (parentCentroidQueue.size() > 0 && (centroidsVisited < nProbe || knnCollectorImpl.numCollected() < knnCollector.k())) {
294298
NeighborQueue centroidQueue = new NeighborQueue(centroidQueryScorer.size(), true);
295-
updateCentroidQueueWNextParent(parentCentroidQueryScorer, parentCentroidQueue, centroidQueryScorer, centroidQueue);
299+
centroidsRead++;
300+
centroidsRead += updateCentroidQueueWNextParent(
301+
parentCentroidQueryScorer,
302+
parentCentroidQueue,
303+
centroidQueryScorer,
304+
centroidQueue
305+
);
296306

297307
PostingVisitor scorer = getPostingVisitor(fieldInfo, ivfClusters, target, needsScoring);
298308
// initially we visit only the "centroids to search"
299309
// Note, numCollected is doing the bare minimum here.
300310
// TODO do we need to handle nested doc counts similarly to how we handle
301311
// filtering? E.g. keep exploring until we hit an expected number of parent documents vs. child vectors?
302-
float nextParentDistance = Float.MAX_VALUE;
312+
float nextParentScore = -1f;
303313
if (parentCentroidQueue.size() > 0) {
304-
nextParentDistance = parentCentroidQueue.topScore();
314+
nextParentScore = parentCentroidQueue.topScore();
305315
}
306316
while (centroidQueue.size() > 0 && (centroidsVisited < nProbe || knnCollectorImpl.numCollected() < knnCollector.k())) {
307317
++centroidsVisited;
308-
float centroidDistance = centroidQueue.topScore();
318+
float centroidScore = centroidQueue.topScore();
309319
// the next parent likely contains centroids we need to evaluate prior to evaluating this next centroid
310-
while (parentCentroidQueue.size() > 0 && centroidDistance > nextParentDistance) {
311-
updateCentroidQueueWNextParent(parentCentroidQueryScorer, parentCentroidQueue, centroidQueryScorer, centroidQueue);
320+
while (parentCentroidQueue.size() > 0 && centroidScore < nextParentScore) {
321+
centroidsRead++;
322+
centroidsRead += updateCentroidQueueWNextParent(
323+
parentCentroidQueryScorer,
324+
parentCentroidQueue,
325+
centroidQueryScorer,
326+
centroidQueue
327+
);
312328
if (parentCentroidQueue.size() > 0) {
313-
nextParentDistance = parentCentroidQueue.topScore();
329+
nextParentScore = parentCentroidQueue.topScore();
314330
} else {
315-
nextParentDistance = Float.MAX_VALUE;
331+
nextParentScore = -1f;
316332
}
317-
centroidDistance = centroidQueue.topScore();
333+
centroidScore = centroidQueue.topScore();
318334
}
319335

320336
int centroidOrdinal = centroidQueue.pop();
@@ -334,9 +350,14 @@ public final void search(String field, float[] target, KnnCollector knnCollector
334350
}
335351
}
336352
}
353+
354+
if (ii == 1999) {
355+
System.out.println("total centroids (parent & child) read:" + (centroidsRead / (ii + 1)));
356+
}
357+
ii++;
337358
}
338359

339-
private static void updateCentroidQueueWNextParent(
360+
private static int updateCentroidQueueWNextParent(
340361
CentroidWChildrenQueryScorer parentCentroidQueryScorer,
341362
NeighborQueue parentCentroidQueue,
342363
CentroidQueryScorer centroidQueryScorer,
@@ -356,6 +377,8 @@ private static void updateCentroidQueueWNextParent(
356377
}
357378
// TODO: add back scorePostingLists? seems like it's not doing anything at this point
358379
centroidQueryScorer.bulkScore(centroidQueue, childCentroidOrdinal, childCentroidOrdinal + childCentroidCount);
380+
381+
return childCentroidCount;
359382
}
360383

361384
@Override

0 commit comments

Comments
 (0)