Skip to content
Open
5 changes: 2 additions & 3 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ http://s.apache.org/luceneversions

API Changes
---------------------
* GITHUB#15324: Fix MaxScoreBulkScorer could call TermScorer with docID >= maxDoc, causing EOFException on norms access (contributor: kdt523)

* GITHUB#15215: Switch to Java 25 as the minimum required platform. Upgrade to gradle 9.1.0.
(Robert Muir, Kaival Parikh, Dawid Weiss)

Expand Down Expand Up @@ -200,9 +202,6 @@ Optimizations

* GITHUB#15261: Implement longValues for MultiFieldNormValues to speedup CombinedQuery (Ge Song)

* GITHUB#15343: Ensure that `AcceptDocs#cost()` only ever calls `BitSets#cardinality()`
once per instance to avoid redundant computation. (Ben Trent)

Comment on lines 204 to 207
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

woops, seems like a bad delete?

Bug Fixes
---------------------
* GITHUB#14161: PointInSetQuery's constructor now throws IllegalArgumentException
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,15 @@ public int score(LeafCollector collector, Bits acceptDocs, int min, int max) thr
// Then within these outer windows, it creates inner windows of size WINDOW_SIZE that help
// collect matches into a bitset and save the overhead of rebalancing the priority queue on
// every match.
// Never iterate beyond this leaf's maxDoc to avoid scoring invalid doc IDs.
final int loopMax = Math.min(max, maxDoc);

int outerWindowMin = min;
outer:
while (outerWindowMin < max) {
while (outerWindowMin < loopMax) {
int outerWindowMax = computeOuterWindowMax(outerWindowMin);
outerWindowMax = Math.min(outerWindowMax, max);
// Cap outer window by loopMax (which itself is <= maxDoc)
outerWindowMax = Math.min(outerWindowMax, loopMax);

while (true) {
updateMaxWindowScores(outerWindowMin, outerWindowMax);
Expand Down Expand Up @@ -178,7 +182,9 @@ private void scoreInnerWindowWithFilter(
// Only score an inner window, after that we'll check if the min competitive score has increased
// enough for a more favorable partitioning to be used.
int innerWindowMin = top.doc;
int innerWindowMax = MathUtil.unsignedMin(max, innerWindowMin + INNER_WINDOW_SIZE);
// Ensure innerWindowMax never exceeds maxDoc
int innerWindowMax =
Math.min(maxDoc, MathUtil.unsignedMin(max, innerWindowMin + INNER_WINDOW_SIZE));

docAndScoreAccBuffer.size = 0;
while (top.doc < innerWindowMax) {
Expand Down Expand Up @@ -241,7 +247,8 @@ private void scoreInnerWindowMultipleEssentialClauses(
DisiWrapper top = essentialQueue.top();

int innerWindowMin = top.doc;
int innerWindowMax = MathUtil.unsignedMin(max, innerWindowMin + INNER_WINDOW_SIZE);
int innerWindowMax =
Math.min(maxDoc, MathUtil.unsignedMin(max, innerWindowMin + INNER_WINDOW_SIZE));
int innerWindowSize = innerWindowMax - innerWindowMin;

// Collect matches of essential clauses into a bitset
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.tests.util.LuceneTestCase;

/**
* Regression test for a bug where MaxScoreBulkScorer could score past leaf maxDoc when a
* restrictive filter and disjunction were used together.
*/
public class TestMaxScoreBulkScorerFilterBounds extends LuceneTestCase {

public void testFilteredDisjunctionDoesNotScorePastMaxDoc() throws Exception {
Directory dir = new RAMDirectory();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you actually run this? RAMDirectory hasn't existed since Lucene 9....

IndexWriterConfig iwc = new IndexWriterConfig();
try (IndexWriter w = new IndexWriter(dir, iwc)) {
// Create a small index where one clause matches more docs than the other, and a restrictive
// filter
for (int i = 0; i < 200; i++) {
Document d = new Document();
// Clause A matches ~1/3
d.add(new StringField("a", (i % 3 == 0) ? "yes" : "no", Field.Store.NO));
// Clause B matches ~1/9
d.add(new StringField("b", (i % 9 == 0) ? "yes" : "no", Field.Store.NO));
// Restrictive filter matches ~1%
d.add(new StringField("f", (i % 100 == 0) ? "on" : "off", Field.Store.NO));
w.addDocument(d);
}
}

try (DirectoryReader reader = DirectoryReader.open(dir)) {
IndexSearcher searcher = new IndexSearcher(reader);

Query disjunction =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("a", "yes")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term("b", "yes")), BooleanClause.Occur.SHOULD)
.build();

Query filter = new TermQuery(new Term("f", "on"));

Query filtered =
new BooleanQuery.Builder()
.add(disjunction, BooleanClause.Occur.SHOULD)
.add(filter, BooleanClause.Occur.FILTER)
.build();

// This triggers TOP_SCORES path internally; just execute to ensure no exceptions
TopDocs td = searcher.search(filtered, 10);
assertNotNull(td);
// Optionally assert we got at most 2 hits (since ~200 docs, ~1% filter) but not necessary for
// regression
} finally {
dir.close();
}
}
}
Loading