Skip to content
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions lucene/core/src/java/org/apache/lucene/search/LeafCollector.java
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,31 @@ default void collect(DocIdStream stream) throws IOException {
stream.forEach(this::collect);
}

/**
* Bulk-collect doc IDs.
*
* <p>Note: The provided int[] may be reused across calls and should be consumed immediately.
*
* <p>Note: The provided int[] typically only holds a small subset of query matches. This method
* may be called multiple times per segment.
*
* <p>Like {@link #collect(int)}, it is guaranteed that doc IDs get collected in order, ie. doc
* IDs are collected in order within a int[], and if called twice, all doc IDs from the second
* int[] will be greater than all doc IDs from the first int[].
*
* <p>It is legal for callers to mix calls to {@link #collect(int[], int)}, {@link
* #collect(DocIdStream)} and {@link #collect(int)}.
*
* <p>The default implementation calls {@code for(int i = 0; i < count; ++i) { collect(docs[i]);
* }; }.
*/
default void collect(int[] docs, int count) throws IOException {
for (int i = 0; i < count; ++i) {
collect(docs[i]);
}
Comment on lines +144 to +147
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was hoping that LeafCollector implementations override this method only if they don't need access to the score, but I guess it won't work as the underlying iterator already moved onto the next document

;
}

/**
* Optionally returns an iterator over competitive documents.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,11 @@ public void collect(int doc) throws IOException {
}
}

@Override
public void collect(int[] docs, int count) {
collect(docs, count);
}

private void collectCompetitiveHit(int doc, float score) throws IOException {
final long code = DocScoreEncoder.encode(doc + docBase, score);
topCode = heap.updateTop(code);
Expand Down
56 changes: 44 additions & 12 deletions lucene/core/src/java/org/apache/lucene/search/Weight.java
Original file line number Diff line number Diff line change
Expand Up @@ -275,52 +275,65 @@ public int score(LeafCollector collector, Bits acceptDocs, int min, int max)
// collect() because only a subset of collectors produce a competitive iterator, and the set
// of implementing classes for two-phase approximations is smaller than the set of doc id set
// iterator implementations.

// Is it better to initialize the buffer within each iterator implementation?
int[] docBuffer = new int[64];
if (twoPhase == null && competitiveIterator == null) {
// Optimize simple iterators with collectors that can't skip
scoreIterator(collector, acceptDocs, iterator, max);
scoreIterator(collector, acceptDocs, iterator, max, docBuffer);
} else if (competitiveIterator == null) {
scoreTwoPhaseIterator(collector, acceptDocs, iterator, twoPhase, max);
scoreTwoPhaseIterator(collector, acceptDocs, iterator, twoPhase, max, docBuffer);
} else if (twoPhase == null) {
scoreCompetitiveIterator(collector, acceptDocs, iterator, competitiveIterator, max);
scoreCompetitiveIterator(
collector, acceptDocs, iterator, competitiveIterator, max, docBuffer);
} else {
scoreTwoPhaseOrCompetitiveIterator(
collector, acceptDocs, iterator, twoPhase, competitiveIterator, max);
collector, acceptDocs, iterator, twoPhase, competitiveIterator, max, docBuffer);
}

return iterator.docID();
}

private static void scoreIterator(
LeafCollector collector, Bits acceptDocs, DocIdSetIterator iterator, int max)
LeafCollector collector, Bits acceptDocs, DocIdSetIterator iterator, int max, int[] docs)
throws IOException {
int count = 0;
for (int doc = iterator.docID(); doc < max; doc = iterator.nextDoc()) {
if (acceptDocs == null || acceptDocs.get(doc)) {
collector.collect(doc);
count = collect(collector, docs, count, doc);
}
}

collect(collector, docs, count, -1);
}

private static void scoreTwoPhaseIterator(
LeafCollector collector,
Bits acceptDocs,
DocIdSetIterator iterator,
TwoPhaseIterator twoPhase,
int max)
int max,
int[] docs)
throws IOException {
int count = 0;
for (int doc = iterator.docID(); doc < max; doc = iterator.nextDoc()) {
if ((acceptDocs == null || acceptDocs.get(doc)) && twoPhase.matches()) {
collector.collect(doc);
count = collect(collector, docs, count, doc);
}
}

collect(collector, docs, count, -1);
}

private static void scoreCompetitiveIterator(
LeafCollector collector,
Bits acceptDocs,
DocIdSetIterator iterator,
DocIdSetIterator competitiveIterator,
int max)
int max,
int[] docs)
throws IOException {
int count = 0;
for (int doc = iterator.docID(); doc < max; ) {
assert competitiveIterator.docID() <= doc; // invariant
if (competitiveIterator.docID() < doc) {
Expand All @@ -332,11 +345,13 @@ private static void scoreCompetitiveIterator(
}

if ((acceptDocs == null || acceptDocs.get(doc))) {
collector.collect(doc);
count = collect(collector, docs, count, doc);
}

doc = iterator.nextDoc();
}

collect(collector, docs, count, -1);
}

private static void scoreTwoPhaseOrCompetitiveIterator(
Expand All @@ -345,8 +360,10 @@ private static void scoreTwoPhaseOrCompetitiveIterator(
DocIdSetIterator iterator,
TwoPhaseIterator twoPhase,
DocIdSetIterator competitiveIterator,
int max)
int max,
int[] docs)
throws IOException {
int count = 0;
for (int doc = iterator.docID(); doc < max; ) {
assert competitiveIterator.docID() <= doc; // invariant
if (competitiveIterator.docID() < doc) {
Expand All @@ -358,11 +375,26 @@ private static void scoreTwoPhaseOrCompetitiveIterator(
}

if ((acceptDocs == null || acceptDocs.get(doc)) && twoPhase.matches()) {
collector.collect(doc);
count = collect(collector, docs, count, doc);
}

doc = iterator.nextDoc();
}

collect(collector, docs, count, -1);
}

private static int collect(LeafCollector collector, int[] docs, int count, int docId)
throws IOException {
if (count == docs.length || docId == -1) {
collector.collect(docs, count);
count = 0;
}

// count is always expected to be less than docs.length
docs[count++] = docId;

return count;
}
}
}
Loading