Skip to content

Commit e5a16f0

Browse files
codaityaAnand Kotriwal
andauthored
LUCENE-9674: Use binary search in VectorValues.advance()
Lucene90VectorReader now implements advance() with binary search in place of prior linear scan Co-authored-by: Anand Kotriwal <[email protected]>
1 parent 37e31f2 commit e5a16f0

File tree

3 files changed

+63
-3
lines changed

3 files changed

+63
-3
lines changed

lucene/CHANGES.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,9 @@ Improvements
180180
* LUCENE-8982: Make NativeUnixDirectory pure java with FileChannel direct IO flag,
181181
and rename to DirectIODirectory (Zach Chen, Uwe Schindler, Mike McCandless, Dawid Weiss).
182182

183+
* LUCENE-9674: Implement faster advance on VectorValues using binary search.
184+
(Anand Kotriwal, Mike Sokolov)
185+
183186
Bug fixes
184187

185188
* LUCENE-8663: NRTCachingDirectory.slowFileExists may open a file while

lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90VectorReader.java

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import java.io.IOException;
2323
import java.nio.ByteBuffer;
2424
import java.nio.FloatBuffer;
25+
import java.util.Arrays;
2526
import java.util.HashMap;
2627
import java.util.Map;
2728
import java.util.Random;
@@ -386,9 +387,19 @@ public int nextDoc() {
386387
}
387388

388389
@Override
389-
public int advance(int target) throws IOException {
390-
// We could do better by log-binary search in ordToDoc, but this is never used
391-
return slowAdvance(target);
390+
public int advance(int target) {
391+
assert docID() < target;
392+
ord = Arrays.binarySearch(fieldEntry.ordToDoc, ord + 1, fieldEntry.ordToDoc.length, target);
393+
if (ord < 0) {
394+
ord = -(ord + 1);
395+
}
396+
assert ord >= 0 && ord <= fieldEntry.ordToDoc.length;
397+
if (ord == fieldEntry.ordToDoc.length) {
398+
doc = NO_MORE_DOCS;
399+
} else {
400+
doc = fieldEntry.ordToDoc[ord];
401+
}
402+
return doc;
392403
}
393404

394405
@Override

lucene/core/src/test/org/apache/lucene/index/TestVectorValues.java

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -815,4 +815,50 @@ public void testSearchStrategyIdentifiers() {
815815
assertEquals(2, VectorValues.SearchStrategy.DOT_PRODUCT_HNSW.ordinal());
816816
assertEquals(3, VectorValues.SearchStrategy.values().length);
817817
}
818+
819+
public void testAdvance() throws Exception {
820+
try (Directory dir = newDirectory()) {
821+
try (IndexWriter w = new IndexWriter(dir, createIndexWriterConfig())) {
822+
int numdocs = atLeast(1500);
823+
String fieldName = "field";
824+
for (int i = 0; i < numdocs; i++) {
825+
Document doc = new Document();
826+
// randomly add a vector field
827+
if (random().nextInt(4) == 3) {
828+
doc.add(new VectorField(fieldName, new float[4], SearchStrategy.NONE));
829+
}
830+
w.addDocument(doc);
831+
}
832+
w.forceMerge(1);
833+
try (IndexReader reader = w.getReader()) {
834+
LeafReader r = getOnlyLeafReader(reader);
835+
VectorValues vectorValues = r.getVectorValues(fieldName);
836+
int[] vectorDocs = new int[vectorValues.size() + 1];
837+
int cur = -1;
838+
while (++cur < vectorValues.size() + 1) {
839+
vectorDocs[cur] = vectorValues.nextDoc();
840+
if (cur != 0) {
841+
assertTrue(vectorDocs[cur] > vectorDocs[cur - 1]);
842+
}
843+
}
844+
vectorValues = r.getVectorValues(fieldName);
845+
cur = -1;
846+
for (int i = 0; i < numdocs; i++) {
847+
// randomly advance to i
848+
if (random().nextInt(4) == 3) {
849+
while (vectorDocs[++cur] < i)
850+
;
851+
assertEquals(vectorDocs[cur], vectorValues.advance(i));
852+
assertEquals(vectorDocs[cur], vectorValues.docID());
853+
if (vectorValues.docID() == NO_MORE_DOCS) {
854+
break;
855+
}
856+
// make i equal to docid so that it is greater than docId in the next loop iteration
857+
i = vectorValues.docID();
858+
}
859+
}
860+
}
861+
}
862+
}
863+
}
818864
}

0 commit comments

Comments
 (0)