Skip to content

Commit 5d75dd3

Browse files
committed
Respect minCompetitiveScore in BlockMaxConjunctionBulkScorer (#14751)
1 parent e2fdc61 commit 5d75dd3

File tree

3 files changed

+38
-18
lines changed

3 files changed

+38
-18
lines changed

lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionBulkScorer.java

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import java.util.List;
2323
import org.apache.lucene.search.Weight.DefaultBulkScorer;
2424
import org.apache.lucene.util.Bits;
25+
import org.apache.lucene.util.MathUtil;
2526

2627
/**
2728
* BulkScorer implementation of {@link BlockMaxConjunctionScorer} that focuses on top-level
@@ -34,6 +35,8 @@
3435
*/
3536
final class BlockMaxConjunctionBulkScorer extends BulkScorer {
3637

38+
private static final int MAX_WINDOW_SIZE = 65536;
39+
3740
private final Scorer[] scorers;
3841
private final Scorable[] scorables;
3942
private final DocIdSetIterator[] iterators;
@@ -81,30 +84,30 @@ private float computeMaxScore(int windowMin, int windowMax) throws IOException {
8184
public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException {
8285
collector.setScorer(scorable);
8386

84-
int windowMin = Math.max(lead.docID(), min);
87+
int windowMin = scoreDocFirstUntilDynamicPruning(collector, acceptDocs, min, max);
88+
8589
while (windowMin < max) {
8690
// Use impacts of the least costly scorer to compute windows
8791
// NOTE: windowMax is inclusive
8892
int windowMax = Math.min(scorers[0].advanceShallow(windowMin), max - 1);
93+
// Ensure the scoring window not too big, this especially works for the default implementation
94+
// of `Scorer#advanceShallow` which may return `DocIdSetIterator#NO_MORE_DOCS`.
95+
windowMax = MathUtil.unsignedMin(windowMax, windowMin + MAX_WINDOW_SIZE);
8996

90-
if (0 < scorable.minCompetitiveScore) {
91-
float maxWindowScore = computeMaxScore(windowMin, windowMax);
92-
scoreWindowScoreFirst(collector, acceptDocs, windowMin, windowMax + 1, maxWindowScore);
93-
} else {
94-
scoreWindowDocFirst(collector, acceptDocs, windowMin, windowMax + 1);
95-
}
97+
float maxWindowScore = computeMaxScore(windowMin, windowMax);
98+
scoreWindowScoreFirst(collector, acceptDocs, windowMin, windowMax + 1, maxWindowScore);
9699
windowMin = Math.max(lead.docID(), windowMax + 1);
97100
}
98101

99102
return windowMin >= maxDoc ? DocIdSetIterator.NO_MORE_DOCS : windowMin;
100103
}
101104

102105
/**
103-
* Score a window of doc IDs by first finding agreement between all iterators, and only then
104-
* compute scores and call the collector.
106+
* Score a window of doc IDs by first finding agreement between all iterators and only then
107+
* compute scores and call the collector until dynamic pruning kicks in.
105108
*/
106-
private void scoreWindowDocFirst(LeafCollector collector, Bits acceptDocs, int min, int max)
107-
throws IOException {
109+
private int scoreDocFirstUntilDynamicPruning(
110+
LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException {
108111
int doc = lead.docID();
109112
if (doc < min) {
110113
doc = lead.advance(min);
@@ -131,9 +134,13 @@ private void scoreWindowDocFirst(LeafCollector collector, Bits acceptDocs, int m
131134
}
132135
scorable.score = (float) score;
133136
collector.collect(doc);
137+
if (scorable.minCompetitiveScore > 0) {
138+
return lead.nextDoc();
139+
}
134140
}
135141
doc = lead.nextDoc();
136142
}
143+
return doc;
137144
}
138145

139146
/**
@@ -166,11 +173,13 @@ private void scoreWindowScoreFirst(
166173
docAndScoreAccBuffer.copyFrom(docAndScoreBuffer);
167174

168175
for (int i = 1; i < scorers.length; ++i) {
169-
ScorerUtil.filterCompetitiveHits(
170-
docAndScoreAccBuffer,
171-
sumOfOtherClauses[i],
172-
scorable.minCompetitiveScore,
173-
scorers.length);
176+
double sumOfOtherClause = sumOfOtherClauses[i];
177+
if (sumOfOtherClause != sumOfOtherClauses[i - 1]) {
178+
// two equal consecutive values mean that the first clause always returns a score of zero,
179+
// so we don't need to filter hits by score again.
180+
ScorerUtil.filterCompetitiveHits(
181+
docAndScoreAccBuffer, sumOfOtherClause, scorable.minCompetitiveScore, scorers.length);
182+
}
174183

175184
ScorerUtil.applyRequiredClause(docAndScoreAccBuffer, iterators[i], scorables[i]);
176185
}

lucene/core/src/java/org/apache/lucene/search/Scorer.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ public int advanceShallow(int target) throws IOException {
9393
* <p>The default implementation is provided below:
9494
*
9595
* <pre class="prettyprint">
96-
* int batchSize = 16; // arbitrary
96+
* int batchSize = 64; // arbitrary
9797
* buffer.growNoCopy(batchSize);
9898
* int size = 0;
9999
* DocIdSetIterator iterator = iterator();
@@ -117,7 +117,7 @@ public int advanceShallow(int target) throws IOException {
117117
*/
118118
public void nextDocsAndScores(int upTo, Bits liveDocs, DocAndFloatFeatureBuffer buffer)
119119
throws IOException {
120-
int batchSize = 16; // arbitrary
120+
int batchSize = 64; // arbitrary
121121
buffer.growNoCopy(batchSize);
122122
int size = 0;
123123
DocIdSetIterator iterator = iterator();

lucene/core/src/java/org/apache/lucene/search/ScorerUtil.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,11 +120,22 @@ public int length() {
120120
}
121121
}
122122

123+
/**
124+
* Filters competitive hits from the provided {@link DocAndScoreAccBuffer}.
125+
*
126+
* <p>This method removes documents from the buffer that cannot possibly have a score competitive
127+
* enough to exceed the minimum competitive score, given the maximum remaining score and the
128+
* number of scorers.
129+
*/
123130
static void filterCompetitiveHits(
124131
DocAndScoreAccBuffer buffer,
125132
double maxRemainingScore,
126133
float minCompetitiveScore,
127134
int numScorers) {
135+
if ((float) MathUtil.sumUpperBound(maxRemainingScore, numScorers) >= minCompetitiveScore) {
136+
return;
137+
}
138+
128139
int newSize = 0;
129140
for (int i = 0; i < buffer.size; ++i) {
130141
float maxPossibleScore =

0 commit comments

Comments
 (0)