Skip to content

Commit 6ad8a96

Browse files
peteralfonsiChrisHegarty
authored andcommitted
Fix leadCost calculation in BooleanScorerSupplier.requiredBulkScorer (apache#14543)
Fixes apache#14542 by setting leadCost in BooleanScorerSupplier.requiredBulkScorer to the minimum of both MUST and FILTER clauses' costs. This bug caused performance regressions in some queries. More details are in the original issue.
1 parent cbde9ee commit 6ad8a96

File tree

3 files changed

+64
-13
lines changed

3 files changed

+64
-13
lines changed

lucene/CHANGES.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@ Bug Fixes
6161
* GITHUB#14523, GITHUB#14530: Correct TermOrdValComparator competitive iterator so that it forces sparse
6262
field iteration to be at least scoring window baseline when doing intoBitSet. (Ben Trent, Adrien Grand)
6363

64+
* GITHUB#14543: Fixed lead cost computations for bulk scorers of conjunctive
65+
queries that mix MUST and FILTER clauses, and disjunctive queries that
66+
configure a minimum number of matching SHOULD clauses.
67+
(Peter Alfonsi, Adrien Grand)
68+
6469
======================= Lucene 10.2.0 =======================
6570

6671
API Changes

lucene/core/src/java/org/apache/lucene/search/BooleanScorerSupplier.java

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,14 @@ final class BooleanScorerSupplier extends ScorerSupplier {
6969
this.maxDoc = maxDoc;
7070
}
7171

72+
private long computeShouldCost() {
73+
final Collection<ScorerSupplier> optionalScorers = subs.get(Occur.SHOULD);
74+
return ScorerUtil.costWithMinShouldMatch(
75+
optionalScorers.stream().mapToLong(ScorerSupplier::cost),
76+
optionalScorers.size(),
77+
minShouldMatch);
78+
}
79+
7280
private long computeCost() {
7381
OptionalLong minRequiredCost =
7482
Stream.concat(subs.get(Occur.MUST).stream(), subs.get(Occur.FILTER).stream())
@@ -77,12 +85,7 @@ private long computeCost() {
7785
if (minRequiredCost.isPresent() && minShouldMatch == 0) {
7886
return minRequiredCost.getAsLong();
7987
} else {
80-
final Collection<ScorerSupplier> optionalScorers = subs.get(Occur.SHOULD);
81-
final long shouldCost =
82-
ScorerUtil.costWithMinShouldMatch(
83-
optionalScorers.stream().mapToLong(ScorerSupplier::cost),
84-
optionalScorers.size(),
85-
minShouldMatch);
88+
final long shouldCost = computeShouldCost();
8689
return Math.min(minRequiredCost.orElse(Long.MAX_VALUE), shouldCost);
8790
}
8891
}
@@ -293,9 +296,10 @@ BulkScorer optionalBulkScorer() throws IOException {
293296
return new MaxScoreBulkScorer(maxDoc, optionalScorers, null);
294297
}
295298

299+
long shouldCost = computeShouldCost();
296300
List<Scorer> optional = new ArrayList<Scorer>();
297301
for (ScorerSupplier ss : subs.get(Occur.SHOULD)) {
298-
optional.add(ss.get(Long.MAX_VALUE));
302+
optional.add(ss.get(shouldCost));
299303
}
300304

301305
return new BooleanScorer(optional, Math.max(1, minShouldMatch), scoreMode.needsScores());
@@ -359,10 +363,14 @@ private BulkScorer requiredBulkScorer() throws IOException {
359363
return scorer;
360364
}
361365

362-
long leadCost =
366+
long mustLeadCost =
363367
subs.get(Occur.MUST).stream().mapToLong(ScorerSupplier::cost).min().orElse(Long.MAX_VALUE);
364-
leadCost =
365-
subs.get(Occur.FILTER).stream().mapToLong(ScorerSupplier::cost).min().orElse(leadCost);
368+
long filterLeadCost =
369+
subs.get(Occur.FILTER).stream()
370+
.mapToLong(ScorerSupplier::cost)
371+
.min()
372+
.orElse(Long.MAX_VALUE);
373+
long leadCost = Math.min(mustLeadCost, filterLeadCost);
366374

367375
List<Scorer> requiredNoScoring = new ArrayList<>();
368376
for (ScorerSupplier ss : subs.get(Occur.FILTER)) {

lucene/core/src/test/org/apache/lucene/search/TestBoolean2ScorerSupplier.java

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,19 @@ private static class FakeScorerSupplier extends ScorerSupplier {
105105
@Override
106106
public Scorer get(long leadCost) throws IOException {
107107
if (this.leadCost != null) {
108-
assertEquals(this.toString(), this.leadCost.longValue(), leadCost);
108+
if (this.leadCost < this.cost) {
109+
// If the expected lead cost is less than the cost, ie. another clause is leading
110+
// iteration, then the exact lead cost must be provided.
111+
assertEquals(
112+
this.toString() + " actual leadCost=" + leadCost,
113+
this.leadCost.longValue(),
114+
leadCost);
115+
} else {
116+
// Otherwise the lead cost may be provided as the cost of this very clause or as
117+
// Long.MAX_VALUE (typically for bulk scorers), both signaling that this clause is leading
118+
// iteration.
119+
assertTrue(this.toString() + " actual leadCost=" + leadCost, leadCost >= this.leadCost);
120+
}
109121
}
110122
return new FakeScorer(cost);
111123
}
@@ -269,9 +281,10 @@ public void testDuelCost() throws Exception {
269281

270282
// test the tester...
271283
public void testFakeScorerSupplier() {
272-
FakeScorerSupplier randomAccessSupplier = new FakeScorerSupplier(random().nextInt(100), 30);
284+
FakeScorerSupplier randomAccessSupplier =
285+
new FakeScorerSupplier(TestUtil.nextInt(random(), 31, 100), 30);
273286
expectThrows(AssertionError.class, () -> randomAccessSupplier.get(70));
274-
FakeScorerSupplier sequentialSupplier = new FakeScorerSupplier(random().nextInt(100), 70);
287+
FakeScorerSupplier sequentialSupplier = new FakeScorerSupplier(random().nextInt(70), 70);
275288
expectThrows(AssertionError.class, () -> sequentialSupplier.get(30));
276289
}
277290

@@ -289,6 +302,9 @@ public void testConjunctionLeadCost() throws IOException {
289302
new BooleanScorerSupplier(
290303
new FakeWeight(), subs, RandomPicks.randomFrom(random(), ScoreMode.values()), 0, 100)
291304
.get(Long.MAX_VALUE); // triggers assertions as a side-effect
305+
new BooleanScorerSupplier(
306+
new FakeWeight(), subs, RandomPicks.randomFrom(random(), ScoreMode.values()), 0, 100)
307+
.bulkScorer(); // triggers assertions as a side-effect
292308

293309
subs = new EnumMap<>(Occur.class);
294310
for (Occur occur : Occur.values()) {
@@ -315,6 +331,9 @@ public void testDisjunctionLeadCost() throws IOException {
315331
new BooleanScorerSupplier(
316332
new FakeWeight(), subs, RandomPicks.randomFrom(random(), ScoreMode.values()), 0, 100)
317333
.get(100); // triggers assertions as a side-effect
334+
new BooleanScorerSupplier(
335+
new FakeWeight(), subs, RandomPicks.randomFrom(random(), ScoreMode.values()), 0, 100)
336+
.bulkScorer(); // triggers assertions as a side-effect
318337

319338
subs.get(Occur.SHOULD).clear();
320339
subs.get(Occur.SHOULD).add(new FakeScorerSupplier(42, 20));
@@ -338,6 +357,9 @@ public void testDisjunctionWithMinShouldMatchLeadCost() throws IOException {
338357
new BooleanScorerSupplier(
339358
new FakeWeight(), subs, RandomPicks.randomFrom(random(), ScoreMode.values()), 2, 100)
340359
.get(100); // triggers assertions as a side-effect
360+
new BooleanScorerSupplier(
361+
new FakeWeight(), subs, RandomPicks.randomFrom(random(), ScoreMode.values()), 2, 100)
362+
.bulkScorer(); // triggers assertions as a side-effect
341363

342364
subs = new EnumMap<>(Occur.class);
343365
for (Occur occur : Occur.values()) {
@@ -364,6 +386,9 @@ public void testDisjunctionWithMinShouldMatchLeadCost() throws IOException {
364386
new BooleanScorerSupplier(
365387
new FakeWeight(), subs, RandomPicks.randomFrom(random(), ScoreMode.values()), 2, 100)
366388
.get(100); // triggers assertions as a side-effect
389+
new BooleanScorerSupplier(
390+
new FakeWeight(), subs, RandomPicks.randomFrom(random(), ScoreMode.values()), 2, 100)
391+
.bulkScorer(); // triggers assertions as a side-effect
367392

368393
subs = new EnumMap<>(Occur.class);
369394
for (Occur occur : Occur.values()) {
@@ -377,6 +402,9 @@ public void testDisjunctionWithMinShouldMatchLeadCost() throws IOException {
377402
new BooleanScorerSupplier(
378403
new FakeWeight(), subs, RandomPicks.randomFrom(random(), ScoreMode.values()), 3, 100)
379404
.get(100); // triggers assertions as a side-effect
405+
new BooleanScorerSupplier(
406+
new FakeWeight(), subs, RandomPicks.randomFrom(random(), ScoreMode.values()), 3, 100)
407+
.bulkScorer(); // triggers assertions as a side-effect
380408
}
381409

382410
public void testProhibitedLeadCost() throws IOException {
@@ -391,6 +419,9 @@ public void testProhibitedLeadCost() throws IOException {
391419
new BooleanScorerSupplier(
392420
new FakeWeight(), subs, RandomPicks.randomFrom(random(), ScoreMode.values()), 0, 100)
393421
.get(100); // triggers assertions as a side-effect
422+
new BooleanScorerSupplier(
423+
new FakeWeight(), subs, RandomPicks.randomFrom(random(), ScoreMode.values()), 0, 100)
424+
.bulkScorer(); // triggers assertions as a side-effect
394425

395426
subs.get(Occur.MUST).clear();
396427
subs.get(Occur.MUST_NOT).clear();
@@ -399,6 +430,9 @@ public void testProhibitedLeadCost() throws IOException {
399430
new BooleanScorerSupplier(
400431
new FakeWeight(), subs, RandomPicks.randomFrom(random(), ScoreMode.values()), 0, 100)
401432
.get(100); // triggers assertions as a side-effect
433+
new BooleanScorerSupplier(
434+
new FakeWeight(), subs, RandomPicks.randomFrom(random(), ScoreMode.values()), 0, 100)
435+
.bulkScorer(); // triggers assertions as a side-effect
402436

403437
subs.get(Occur.MUST).clear();
404438
subs.get(Occur.MUST_NOT).clear();
@@ -420,13 +454,17 @@ public void testMixedLeadCost() throws IOException {
420454
subs.get(Occur.SHOULD).add(new FakeScorerSupplier(30, 42));
421455
new BooleanScorerSupplier(new FakeWeight(), subs, ScoreMode.COMPLETE, 0, 100)
422456
.get(100); // triggers assertions as a side-effect
457+
new BooleanScorerSupplier(new FakeWeight(), subs, ScoreMode.COMPLETE, 0, 100)
458+
.bulkScorer(); // triggers assertions as a side-effect
423459

424460
subs.get(Occur.MUST).clear();
425461
subs.get(Occur.SHOULD).clear();
426462
subs.get(Occur.MUST).add(new FakeScorerSupplier(42, 42));
427463
subs.get(Occur.SHOULD).add(new FakeScorerSupplier(80, 42));
428464
new BooleanScorerSupplier(new FakeWeight(), subs, ScoreMode.COMPLETE, 0, 100)
429465
.get(100); // triggers assertions as a side-effect
466+
new BooleanScorerSupplier(new FakeWeight(), subs, ScoreMode.COMPLETE, 0, 100)
467+
.bulkScorer(); // triggers assertions as a side-effect
430468

431469
subs.get(Occur.MUST).clear();
432470
subs.get(Occur.SHOULD).clear();

0 commit comments

Comments
 (0)