diff --git a/lucene/core/src/java/org/apache/lucene/search/LeafCollector.java b/lucene/core/src/java/org/apache/lucene/search/LeafCollector.java index f8765c18372d..ac54455559e5 100644 --- a/lucene/core/src/java/org/apache/lucene/search/LeafCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/LeafCollector.java @@ -123,6 +123,30 @@ default void collect(DocIdStream stream) throws IOException { stream.forEach(this::collect); } + /** + * Bulk-collect doc IDs. + * + *

Note: The provided int[] may be reused across calls and should be consumed immediately. + * + *

Note: The provided int[] typically only holds a small subset of query matches. This method + * may be called multiple times per segment. + * + *

Like {@link #collect(int)}, it is guaranteed that doc IDs get collected in order, ie. doc + * IDs are collected in order within a int[], and if called twice, all doc IDs from the second + * int[] will be greater than all doc IDs from the first int[]. + * + *

It is legal for callers to mix calls to {@link #collect(int[], int)}, {@link + * #collect(DocIdStream)} and {@link #collect(int)}. + * + *

The default implementation calls {@code for(int i = 0; i < count; ++i) { collect(docs[i]); + * }; }. + */ + default void collect(int[] docs, int count) throws IOException { + for (int i = 0; i < count; ++i) { + collect(docs[i]); + } + } + /** * Optionally returns an iterator over competitive documents. * diff --git a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java index e878f6f880b8..90fbc4472373 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java @@ -127,6 +127,13 @@ public void collect(int doc) throws IOException { } } + @Override + public void collect(int[] docs, int count) throws IOException { + for (int i = 0; i < count; ++i) { + collect(docs[i]); + } + } + private void collectCompetitiveHit(int doc, float score) throws IOException { final long code = DocScoreEncoder.encode(doc + docBase, score); topCode = heap.updateTop(code); diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java index 341dd3cadf6a..e10155e2df01 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Weight.java +++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java @@ -275,29 +275,36 @@ public int score(LeafCollector collector, Bits acceptDocs, int min, int max) // collect() because only a subset of collectors produce a competitive iterator, and the set // of implementing classes for two-phase approximations is smaller than the set of doc id set // iterator implementations. + + // Is it better to initialize the buffer within each iterator implementation? + int[] docBuffer = new int[64]; if (twoPhase == null && competitiveIterator == null) { // Optimize simple iterators with collectors that can't skip - scoreIterator(collector, acceptDocs, iterator, max); + scoreIterator(collector, acceptDocs, iterator, max, docBuffer); } else if (competitiveIterator == null) { - scoreTwoPhaseIterator(collector, acceptDocs, iterator, twoPhase, max); + scoreTwoPhaseIterator(collector, acceptDocs, iterator, twoPhase, max, docBuffer); } else if (twoPhase == null) { - scoreCompetitiveIterator(collector, acceptDocs, iterator, competitiveIterator, max); + scoreCompetitiveIterator( + collector, acceptDocs, iterator, competitiveIterator, max, docBuffer); } else { scoreTwoPhaseOrCompetitiveIterator( - collector, acceptDocs, iterator, twoPhase, competitiveIterator, max); + collector, acceptDocs, iterator, twoPhase, competitiveIterator, max, docBuffer); } return iterator.docID(); } private static void scoreIterator( - LeafCollector collector, Bits acceptDocs, DocIdSetIterator iterator, int max) + LeafCollector collector, Bits acceptDocs, DocIdSetIterator iterator, int max, int[] docs) throws IOException { + int count = 0; for (int doc = iterator.docID(); doc < max; doc = iterator.nextDoc()) { if (acceptDocs == null || acceptDocs.get(doc)) { - collector.collect(doc); + count = collect(collector, docs, count, doc); } } + + collect(collector, docs, count, -1); } private static void scoreTwoPhaseIterator( @@ -305,13 +312,17 @@ private static void scoreTwoPhaseIterator( Bits acceptDocs, DocIdSetIterator iterator, TwoPhaseIterator twoPhase, - int max) + int max, + int[] docs) throws IOException { + int count = 0; for (int doc = iterator.docID(); doc < max; doc = iterator.nextDoc()) { if ((acceptDocs == null || acceptDocs.get(doc)) && twoPhase.matches()) { - collector.collect(doc); + count = collect(collector, docs, count, doc); } } + + collect(collector, docs, count, -1); } private static void scoreCompetitiveIterator( @@ -319,8 +330,10 @@ private static void scoreCompetitiveIterator( Bits acceptDocs, DocIdSetIterator iterator, DocIdSetIterator competitiveIterator, - int max) + int max, + int[] docs) throws IOException { + int count = 0; for (int doc = iterator.docID(); doc < max; ) { assert competitiveIterator.docID() <= doc; // invariant if (competitiveIterator.docID() < doc) { @@ -332,11 +345,13 @@ private static void scoreCompetitiveIterator( } if ((acceptDocs == null || acceptDocs.get(doc))) { - collector.collect(doc); + count = collect(collector, docs, count, doc); } doc = iterator.nextDoc(); } + + collect(collector, docs, count, -1); } private static void scoreTwoPhaseOrCompetitiveIterator( @@ -345,8 +360,10 @@ private static void scoreTwoPhaseOrCompetitiveIterator( DocIdSetIterator iterator, TwoPhaseIterator twoPhase, DocIdSetIterator competitiveIterator, - int max) + int max, + int[] docs) throws IOException { + int count = 0; for (int doc = iterator.docID(); doc < max; ) { assert competitiveIterator.docID() <= doc; // invariant if (competitiveIterator.docID() < doc) { @@ -358,11 +375,26 @@ private static void scoreTwoPhaseOrCompetitiveIterator( } if ((acceptDocs == null || acceptDocs.get(doc)) && twoPhase.matches()) { - collector.collect(doc); + count = collect(collector, docs, count, doc); } doc = iterator.nextDoc(); } + + collect(collector, docs, count, -1); + } + + private static int collect(LeafCollector collector, int[] docs, int count, int docId) + throws IOException { + if (count == docs.length || docId == -1) { + collector.collect(docs, count); + count = 0; + } + + // count is always expected to be less than docs.length + docs[count++] = docId; + + return count; } } }