diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index ea9db0d698bb..d70f381c669d 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -7,6 +7,8 @@ http://s.apache.org/luceneversions API Changes --------------------- +* GITHUB#15324: Fix MaxScoreBulkScorer could call TermScorer with docID >= maxDoc, causing EOFException on norms access (contributor: kdt523) + * GITHUB#15215: Switch to Java 25 as the minimum required platform. Upgrade to gradle 9.1.0. (Robert Muir, Kaival Parikh, Dawid Weiss) @@ -98,6 +100,9 @@ Bug Fixes * GITHUB#15125: Handle inconsistent schema on flush with index sorts (Nhat Nguyen) +* GITHUB#15343: Ensure that `AcceptDocs#cost()` only ever calls `BitSets#cardinality()` + once per instance to avoid redundant computation. (Ben Trent) + Changes in Runtime Behavior --------------------- * GITHUB#14187: The query cache is now disabled by default. (Adrien Grand) @@ -204,11 +209,8 @@ Optimizations * GITHUB#15343: Ensure that `AcceptDocs#cost()` only ever calls `BitSets#cardinality()` once per instance to avoid redundant computation. (Ben Trent) - * GITHUB#14963: Bypass HNSW graph building for tiny segments. (Shubham Chaudhary, Ben Trent) - Bug Fixes ---------------------- * GITHUB#14161: PointInSetQuery's constructor now throws IllegalArgumentException instead of UnsupportedOperationException when values are out of order. (Shubham Sharma) diff --git a/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java b/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java index 343239e8c7a4..4e6dc894bbac 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java @@ -89,11 +89,15 @@ public int score(LeafCollector collector, Bits acceptDocs, int min, int max) thr // Then within these outer windows, it creates inner windows of size WINDOW_SIZE that help // collect matches into a bitset and save the overhead of rebalancing the priority queue on // every match. + // Never iterate beyond this leaf's maxDoc to avoid scoring invalid doc IDs. + final int loopMax = Math.min(max, maxDoc); + int outerWindowMin = min; outer: - while (outerWindowMin < max) { + while (outerWindowMin < loopMax) { int outerWindowMax = computeOuterWindowMax(outerWindowMin); - outerWindowMax = Math.min(outerWindowMax, max); + // Cap outer window by loopMax (which itself is <= maxDoc) + outerWindowMax = Math.min(outerWindowMax, loopMax); while (true) { updateMaxWindowScores(outerWindowMin, outerWindowMax); @@ -178,7 +182,9 @@ private void scoreInnerWindowWithFilter( // Only score an inner window, after that we'll check if the min competitive score has increased // enough for a more favorable partitioning to be used. int innerWindowMin = top.doc; - int innerWindowMax = MathUtil.unsignedMin(max, innerWindowMin + INNER_WINDOW_SIZE); + // Ensure innerWindowMax never exceeds maxDoc + int innerWindowMax = + Math.min(maxDoc, MathUtil.unsignedMin(max, innerWindowMin + INNER_WINDOW_SIZE)); docAndScoreAccBuffer.size = 0; while (top.doc < innerWindowMax) { @@ -241,7 +247,8 @@ private void scoreInnerWindowMultipleEssentialClauses( DisiWrapper top = essentialQueue.top(); int innerWindowMin = top.doc; - int innerWindowMax = MathUtil.unsignedMin(max, innerWindowMin + INNER_WINDOW_SIZE); + int innerWindowMax = + Math.min(maxDoc, MathUtil.unsignedMin(max, innerWindowMin + INNER_WINDOW_SIZE)); int innerWindowSize = innerWindowMax - innerWindowMin; // Collect matches of essential clauses into a bitset diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreBulkScorerFilterBounds.java b/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreBulkScorerFilterBounds.java new file mode 100644 index 000000000000..1411f49cc1bb --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreBulkScorerFilterBounds.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +// ...existing code... +import org.apache.lucene.tests.util.LuceneTestCase; + +/** + * Regression test for a bug where MaxScoreBulkScorer could score past leaf maxDoc when a + * restrictive filter and disjunction were used together. + */ +public class TestMaxScoreBulkScorerFilterBounds extends LuceneTestCase { + + public void testFilteredDisjunctionDoesNotScorePastMaxDoc() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = new IndexWriterConfig(); + try (IndexWriter w = new IndexWriter(dir, iwc)) { + // Create a small index where one clause matches more docs than the other, and a restrictive + // filter + for (int i = 0; i < 200; i++) { + Document d = new Document(); + // Clause A matches ~1/3 + d.add(new StringField("a", (i % 3 == 0) ? "yes" : "no", Field.Store.NO)); + // Clause B matches ~1/9 + d.add(new StringField("b", (i % 9 == 0) ? "yes" : "no", Field.Store.NO)); + // Restrictive filter matches ~1% + d.add(new StringField("f", (i % 100 == 0) ? "on" : "off", Field.Store.NO)); + w.addDocument(d); + } + } + + try (DirectoryReader reader = DirectoryReader.open(dir)) { + IndexSearcher searcher = new IndexSearcher(reader); + + Query disjunction = + new BooleanQuery.Builder() + .add(new TermQuery(new Term("a", "yes")), BooleanClause.Occur.SHOULD) + .add(new TermQuery(new Term("b", "yes")), BooleanClause.Occur.SHOULD) + .build(); + + Query filter = new TermQuery(new Term("f", "on")); + + Query filtered = + new BooleanQuery.Builder() + .add(disjunction, BooleanClause.Occur.SHOULD) + .add(filter, BooleanClause.Occur.FILTER) + .build(); + + // This triggers TOP_SCORES path internally; just execute to ensure no exceptions + TopDocs td = searcher.search(filtered, 10); + assertNotNull(td); + // Optionally assert we got at most 2 hits (since ~200 docs, ~1% filter) but not necessary for + // regression + } finally { + dir.close(); + } + } +}