From 7f0ed2a92b3d53e6d1ec8f8cba11a258b0a1e804 Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Sat, 24 May 2025 18:13:45 +0800 Subject: [PATCH 01/24] Speed up term query --- .../lucene/search/BatchScoreBulkScorer.java | 68 +++++++++++++++++++ .../org/apache/lucene/search/ImpactsDISI.java | 2 +- .../org/apache/lucene/search/TermQuery.java | 2 +- .../org/apache/lucene/search/TermScorer.java | 8 +++ .../lucene/search/TestBooleanScorer.java | 2 +- 5 files changed, 79 insertions(+), 3 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/search/BatchScoreBulkScorer.java diff --git a/lucene/core/src/java/org/apache/lucene/search/BatchScoreBulkScorer.java b/lucene/core/src/java/org/apache/lucene/search/BatchScoreBulkScorer.java new file mode 100644 index 000000000000..c000209d2fc1 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/BatchScoreBulkScorer.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search; + +import java.io.IOException; +import org.apache.lucene.util.Bits; + +/** + * A bulk scorer used when {@link ScoreMode#needsScores()} is true and {@link + * Scorer#nextDocsAndScores} has optimizations to run faster than one-by-one iteration. + */ +class BatchScoreBulkScorer extends BulkScorer { + + private final SimpleScorable scorable = new SimpleScorable(); + private final DocAndScoreBuffer buffer = new DocAndScoreBuffer(); + private final Scorer scorer; + + BatchScoreBulkScorer(Scorer scorer) { + this.scorer = scorer; + } + + @Override + public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException { + if (collector.competitiveIterator() != null) { + return new Weight.DefaultBulkScorer(scorer).score(collector, acceptDocs, min, max); + } + + collector.setScorer(scorable); + scorer.setMinCompetitiveScore(scorable.minCompetitiveScore); + + if (scorer.docID() < min) { + scorer.iterator().advance(min); + } + + for (scorer.nextDocsAndScores(max, acceptDocs, buffer); + buffer.size > 0; + scorer.nextDocsAndScores(max, acceptDocs, buffer)) { + for (int i = 0, size = buffer.size; i < size; i++) { + float score = scorable.score = buffer.scores[i]; + if (score >= scorable.minCompetitiveScore) { + collector.collect(buffer.docs[i]); + } + } + scorer.setMinCompetitiveScore(scorable.minCompetitiveScore); + } + + return scorer.docID(); + } + + @Override + public long cost() { + return scorer.iterator().cost(); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/ImpactsDISI.java b/lucene/core/src/java/org/apache/lucene/search/ImpactsDISI.java index 73bbf30d4c21..09ba2dd441a9 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ImpactsDISI.java +++ b/lucene/core/src/java/org/apache/lucene/search/ImpactsDISI.java @@ -64,7 +64,7 @@ public void setMinCompetitiveScore(float minCompetitiveScore) { } } - private int advanceTarget(int target) throws IOException { + int advanceTarget(int target) throws IOException { if (target <= upTo) { // we are still in the current block, which is considered competitive // according to impacts, no skipping diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java index fbf52daefab8..6c29c23095b4 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java @@ -173,7 +173,7 @@ public BulkScorer bulkScorer() throws IOException { return ConstantScoreScorerSupplier.fromIterator(iterator, 0f, scoreMode, maxDoc) .bulkScorer(); } - return super.bulkScorer(); + return new BatchScoreBulkScorer(get(Long.MAX_VALUE)); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java index ea272b6a4f38..8268357bcd1b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java @@ -134,6 +134,14 @@ public void nextDocsAndScores(int upTo, Bits liveDocs, DocAndScoreBuffer buffer) docAndFreqBuffer = new DocAndFreqBuffer(); } + int doc = docID(); + if (impactsDisi != null && doc != DocIdSetIterator.NO_MORE_DOCS) { + int nextCompetitive = impactsDisi.advanceTarget(doc); + if (nextCompetitive != doc) { + postingsEnum.advance(nextCompetitive); + } + } + for (; ; ) { postingsEnum.nextPostings(upTo, docAndFreqBuffer); if (liveDocs != null && docAndFreqBuffer.size != 0) { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java index 67c7ed2fe5a0..2e1ad202f0be 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java @@ -201,7 +201,7 @@ public void testOptimizeTopLevelClauseOrNull() throws IOException { weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE, 1); ss = weight.scorerSupplier(ctx); scorer = ((BooleanScorerSupplier) ss).booleanScorer(); - assertThat(scorer, instanceOf(DefaultBulkScorer.class)); // term scorer + assertThat(scorer, instanceOf(BatchScoreBulkScorer.class)); // term scorer w.close(); reader.close(); From 487bbc84315d345b80b482a9ea24a14d413959b0 Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Sun, 25 May 2025 00:24:24 +0800 Subject: [PATCH 02/24] iter --- .../src/java/org/apache/lucene/search/ImpactsDISI.java | 2 +- .../src/java/org/apache/lucene/search/TermScorer.java | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/ImpactsDISI.java b/lucene/core/src/java/org/apache/lucene/search/ImpactsDISI.java index 09ba2dd441a9..73bbf30d4c21 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ImpactsDISI.java +++ b/lucene/core/src/java/org/apache/lucene/search/ImpactsDISI.java @@ -64,7 +64,7 @@ public void setMinCompetitiveScore(float minCompetitiveScore) { } } - int advanceTarget(int target) throws IOException { + private int advanceTarget(int target) throws IOException { if (target <= upTo) { // we are still in the current block, which is considered competitive // according to impacts, no skipping diff --git a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java index 8268357bcd1b..7e7dcb0decd7 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java @@ -134,12 +134,10 @@ public void nextDocsAndScores(int upTo, Bits liveDocs, DocAndScoreBuffer buffer) docAndFreqBuffer = new DocAndFreqBuffer(); } - int doc = docID(); - if (impactsDisi != null && doc != DocIdSetIterator.NO_MORE_DOCS) { - int nextCompetitive = impactsDisi.advanceTarget(doc); - if (nextCompetitive != doc) { - postingsEnum.advance(nextCompetitive); - } + + if (impactsDisi != null) { + // jump to next competitive doc. + impactsDisi.advance(docID()); } for (; ; ) { From 6ad87d147b5bc79850a089e80de025930a84ccc1 Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Sun, 25 May 2025 00:30:54 +0800 Subject: [PATCH 03/24] iter --- .../core/src/java/org/apache/lucene/search/ImpactsDISI.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/ImpactsDISI.java b/lucene/core/src/java/org/apache/lucene/search/ImpactsDISI.java index 73bbf30d4c21..b16f02938e3d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ImpactsDISI.java +++ b/lucene/core/src/java/org/apache/lucene/search/ImpactsDISI.java @@ -98,9 +98,13 @@ private int advanceTarget(int target) throws IOException { } } + @Override public int advance(int target) throws IOException { - return in.advance(advanceTarget(target)); + int advanceTarget = advanceTarget(target); + if (advanceTarget > docID()) { + return in.advance(advanceTarget); + } } @Override From b55336af7f1ec4c5f8d2650033d0972a87d86fa7 Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Sun, 25 May 2025 00:35:31 +0800 Subject: [PATCH 04/24] iter --- .../org/apache/lucene/search/ImpactsDISI.java | 15 +++++++++++---- .../java/org/apache/lucene/search/TermScorer.java | 4 +--- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/ImpactsDISI.java b/lucene/core/src/java/org/apache/lucene/search/ImpactsDISI.java index b16f02938e3d..d5dd81f7c399 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ImpactsDISI.java +++ b/lucene/core/src/java/org/apache/lucene/search/ImpactsDISI.java @@ -98,13 +98,20 @@ private int advanceTarget(int target) throws IOException { } } + /** + * If current doc is not competitive, move to a competitive one. + */ + void ensureCompetitive() throws IOException { + int doc = docID(); + int advanceTarget = advanceTarget(doc); + if (advanceTarget != doc) { + in.advance(advanceTarget); + } + } @Override public int advance(int target) throws IOException { - int advanceTarget = advanceTarget(target); - if (advanceTarget > docID()) { - return in.advance(advanceTarget); - } + return in.advance(advanceTarget(target)); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java index 7e7dcb0decd7..4d4e6e63bbd6 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java @@ -134,10 +134,8 @@ public void nextDocsAndScores(int upTo, Bits liveDocs, DocAndScoreBuffer buffer) docAndFreqBuffer = new DocAndFreqBuffer(); } - if (impactsDisi != null) { - // jump to next competitive doc. - impactsDisi.advance(docID()); + impactsDisi.ensureCompetitive(); } for (; ; ) { From ab6773dbe3da77a7d2971e0701df73bd47016cca Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Sun, 25 May 2025 00:49:53 +0800 Subject: [PATCH 05/24] tidy --- .../core/src/java/org/apache/lucene/search/ImpactsDISI.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/ImpactsDISI.java b/lucene/core/src/java/org/apache/lucene/search/ImpactsDISI.java index d5dd81f7c399..0a4c0295fe9f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ImpactsDISI.java +++ b/lucene/core/src/java/org/apache/lucene/search/ImpactsDISI.java @@ -98,9 +98,7 @@ private int advanceTarget(int target) throws IOException { } } - /** - * If current doc is not competitive, move to a competitive one. - */ + /** If current doc is not competitive, move to a competitive one. */ void ensureCompetitive() throws IOException { int doc = docID(); int advanceTarget = advanceTarget(doc); From b53ef5ffa47c5f50d18c40f464afc06d7b413a1e Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Mon, 26 May 2025 02:48:49 +0800 Subject: [PATCH 06/24] fix --- .../lucene/search/MaxScoreAccumulator.java | 57 ++++---- .../lucene/search/TopScoreDocCollector.java | 132 +++++++++++++----- 2 files changed, 128 insertions(+), 61 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/MaxScoreAccumulator.java b/lucene/core/src/java/org/apache/lucene/search/MaxScoreAccumulator.java index eac33dbf039d..59e23b2e2a57 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MaxScoreAccumulator.java +++ b/lucene/core/src/java/org/apache/lucene/search/MaxScoreAccumulator.java @@ -18,14 +18,17 @@ package org.apache.lucene.search; import java.util.concurrent.atomic.LongAccumulator; +import org.apache.lucene.util.NumericUtils; /** Maintains the maximum score and its corresponding document id concurrently */ final class MaxScoreAccumulator { // we use 2^10-1 to check the remainder with a bitwise operation - static final int DEFAULT_INTERVAL = 0x3ff; + private static final int DEFAULT_INTERVAL = 0x3ff; + private static final int POS_INF_TO_SORTABLE_INT = NumericUtils.floatToSortableInt(Float.POSITIVE_INFINITY); + static final long LEAST_COMPETITIVE_CODE = encode(Integer.MAX_VALUE, Float.NEGATIVE_INFINITY); // scores are always positive - final LongAccumulator acc = new LongAccumulator(MaxScoreAccumulator::maxEncode, Long.MIN_VALUE); + final LongAccumulator acc = new LongAccumulator(Math::max, Long.MIN_VALUE); // non-final and visible for tests long modInterval; @@ -34,35 +37,41 @@ final class MaxScoreAccumulator { this.modInterval = DEFAULT_INTERVAL; } - /** - * Return the max encoded docId and score found in the two longs, following the encoding in {@link - * #accumulate}. - */ - private static long maxEncode(long v1, long v2) { - float score1 = Float.intBitsToFloat((int) (v1 >> 32)); - float score2 = Float.intBitsToFloat((int) (v2 >> 32)); - int cmp = Float.compare(score1, score2); - if (cmp == 0) { - // tie-break on the minimum doc base - return (int) v1 < (int) v2 ? v1 : v2; - } else if (cmp > 0) { - return v1; - } - return v2; + void accumulate(int docId, float score) { + assert docId >= 0 && score >= 0; + acc.accumulate(encode(docId, score)); } - void accumulate(int docId, float score) { + void accumulateIntScore(int docId, int score) { assert docId >= 0 && score >= 0; - long encode = (((long) Float.floatToIntBits(score)) << 32) | docId; - acc.accumulate(encode); + acc.accumulate(encodeIntScore(docId, score)); + } + + static long encode(int docId, float score) { + return encodeIntScore(docId, NumericUtils.floatToSortableInt(score)); + } + + static long encodeIntScore(int docId, int score) { + return (((long) score) << 32) | (Integer.MAX_VALUE - docId); + } + + static float toScore(long value) { + return NumericUtils.sortableIntToFloat(toIntScore(value)); + } + + static int toIntScore(long value) { + return (int) (value >>> 32); } - public static float toScore(long value) { - return Float.intBitsToFloat((int) (value >> 32)); + static int docId(long value) { + return Integer.MAX_VALUE - ((int) value); } - public static int docId(long value) { - return (int) value; + static int nextUp(int intScore) { + assert intScore <= POS_INF_TO_SORTABLE_INT; + int nextUp = Math.min(POS_INF_TO_SORTABLE_INT, intScore + 1); + assert nextUp == NumericUtils.floatToSortableInt(Math.nextUp(NumericUtils.sortableIntToFloat(intScore))); + return nextUp; } long getRaw() { diff --git a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java index ab39b3e2424e..e61ebcd28e90 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java @@ -17,7 +17,10 @@ package org.apache.lucene.search; import java.io.IOException; +import java.util.stream.IntStream; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.util.LongHeap; +import org.apache.lucene.util.NumericUtils; /** * A {@link Collector} implementation that collects the top-scoring hits, returning them as a {@link @@ -32,31 +35,21 @@ public class TopScoreDocCollector extends TopDocsCollector { private final ScoreDoc after; + private final LongHeap heap; final int totalHitsThreshold; final MaxScoreAccumulator minScoreAcc; // prevents instantiation TopScoreDocCollector( int numHits, ScoreDoc after, int totalHitsThreshold, MaxScoreAccumulator minScoreAcc) { - super(new HitQueue(numHits, true)); + super(null); + this.heap = new LongHeap(numHits); + IntStream.range(0, numHits).forEach(_ -> heap.push(MaxScoreAccumulator.LEAST_COMPETITIVE_CODE)); this.after = after; this.totalHitsThreshold = totalHitsThreshold; this.minScoreAcc = minScoreAcc; } - @Override - protected int topDocsSize() { - // Note: this relies on sentinel values having Integer.MAX_VALUE as a doc ID. - int[] validTopHitCount = new int[1]; - pq.forEach( - scoreDoc -> { - if (scoreDoc.doc != Integer.MAX_VALUE) { - validTopHitCount[0]++; - } - }); - return validTopHitCount[0]; - } - @Override protected TopDocs newTopDocs(ScoreDoc[] results, int start) { return results == null @@ -73,23 +66,32 @@ public ScoreMode scoreMode() { public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { final int docBase = context.docBase; final ScoreDoc after = this.after; - final float afterScore; + final int afterScore; final int afterDoc; if (after == null) { - afterScore = Float.POSITIVE_INFINITY; + afterScore = NumericUtils.floatToSortableInt(Float.POSITIVE_INFINITY); afterDoc = DocIdSetIterator.NO_MORE_DOCS; } else { - afterScore = after.score; + afterScore = NumericUtils.floatToSortableInt(after.score); afterDoc = after.doc - context.docBase; } +// final long afterEncode; +// if (after == null) { +// afterEncode = +// MaxScoreAccumulator.encode(DocIdSetIterator.NO_MORE_DOCS, Float.POSITIVE_INFINITY); +// } else { +// afterEncode = +// MaxScoreAccumulator.encode(after.doc, after.score); +// } return new LeafCollector() { private Scorable scorer; // HitQueue implements getSentinelObject to return a ScoreDoc, so we know // that at this point top() is already initialized. - private ScoreDoc pqTop = pq.top(); - private float minCompetitiveScore; + private int topDoc = MaxScoreAccumulator.docId(heap.top()); + private int topScore = MaxScoreAccumulator.toIntScore(heap.top());; + private int minCompetitiveScore; @Override public void setScorer(Scorable scorer) throws IOException { @@ -103,7 +105,7 @@ public void setScorer(Scorable scorer) throws IOException { @Override public void collect(int doc) throws IOException { - float score = scorer.score(); + final int score = NumericUtils.floatToSortableInt(scorer.score()); int hitCountSoFar = ++totalHits; @@ -121,7 +123,7 @@ public void collect(int doc) throws IOException { return; } - if (score <= pqTop.score) { + if (score <= topScore) { // Note: for queries that match lots of hits, this is the common case: most hits are not // competitive. if (hitCountSoFar == totalHitsThreshold + 1) { @@ -138,10 +140,11 @@ public void collect(int doc) throws IOException { } } - private void collectCompetitiveHit(int doc, float score) throws IOException { - pqTop.doc = doc + docBase; - pqTop.score = score; - pqTop = pq.updateTop(); + private void collectCompetitiveHit(int doc, int score) throws IOException { + final long encode = MaxScoreAccumulator.encodeIntScore(doc + docBase, score); + long topEncode = heap.updateTop(encode); + topDoc = MaxScoreAccumulator.docId(topEncode); + topScore = MaxScoreAccumulator.toIntScore(topEncode); updateMinCompetitiveScore(scorer); } @@ -152,10 +155,10 @@ private void updateGlobalMinCompetitiveScore(Scorable scorer) throws IOException // since we tie-break on doc id and collect in doc id order we can require // the next float if the global minimum score is set on a document id that is // smaller than the ids in the current leaf - float score = MaxScoreAccumulator.toScore(maxMinScore); - score = docBase >= MaxScoreAccumulator.docId(maxMinScore) ? Math.nextUp(score) : score; + int score = MaxScoreAccumulator.toIntScore(maxMinScore); + score = docBase >= MaxScoreAccumulator.docId(maxMinScore) ? MaxScoreAccumulator.nextUp(score) : score; if (score > minCompetitiveScore) { - scorer.setMinCompetitiveScore(score); + scorer.setMinCompetitiveScore(NumericUtils.sortableIntToFloat(score)); minCompetitiveScore = score; totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO; } @@ -164,23 +167,78 @@ private void updateGlobalMinCompetitiveScore(Scorable scorer) throws IOException private void updateMinCompetitiveScore(Scorable scorer) throws IOException { if (totalHits > totalHitsThreshold) { - // since we tie-break on doc id and collect in doc id order, we can require the next float - // pqTop is never null since TopScoreDocCollector fills the priority queue with sentinel - // values if the top element is a sentinel value, its score will be -Infty and the below - // logic is still valid - float localMinScore = Math.nextUp(pqTop.score); - if (localMinScore > minCompetitiveScore) { - scorer.setMinCompetitiveScore(localMinScore); + if (topScore >= minCompetitiveScore) { + minCompetitiveScore = MaxScoreAccumulator.nextUp(topScore); + scorer.setMinCompetitiveScore(NumericUtils.sortableIntToFloat(minCompetitiveScore)); totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO; - minCompetitiveScore = localMinScore; if (minScoreAcc != null) { // we don't use the next float but we register the document id so that other leaves or // leaf partitions can require it if they are after the current maximum - minScoreAcc.accumulate(pqTop.doc, pqTop.score); + minScoreAcc.accumulateIntScore(topDoc, topScore); } } } } }; } + + @Override + protected int topDocsSize() { + int cnt = 0; + for (int i = 1; i <= heap.size(); i++) { + if (heap.get(i) != MaxScoreAccumulator.LEAST_COMPETITIVE_CODE) { + cnt++; + } + } + return cnt; + } + + @Override + protected void populateResults(ScoreDoc[] results, int howMany) { + for (int i = howMany - 1; i >= 0; i--) { + long encode = heap.pop(); + results[i] = + new ScoreDoc(MaxScoreAccumulator.docId(encode), MaxScoreAccumulator.toScore(encode)); + } + } + + @Override + public TopDocs topDocs(int start, int howMany) { + // In case pq was populated with sentinel values, there might be less + // results than pq.size(). Therefore return all results until either + // pq.size() or totalHits. + int size = topDocsSize(); + + if (howMany < 0) { + throw new IllegalArgumentException( + "Number of hits requested must be greater than 0 but value was " + howMany); + } + + if (start < 0) { + throw new IllegalArgumentException( + "Expected value of starting position is between 0 and " + size + ", got " + start); + } + + if (start >= size || howMany == 0) { + return newTopDocs(null, start); + } + + // We know that start < pqsize, so just fix howMany. + howMany = Math.min(size - start, howMany); + ScoreDoc[] results = new ScoreDoc[howMany]; + + // pq's pop() returns the 'least' element in the queue, therefore need + // to discard the first ones, until we reach the requested range. + // Note that this loop will usually not be executed, since the common usage + // should be that the caller asks for the last howMany results. However it's + // needed here for completeness. + for (int i = heap.size() - start - howMany; i > 0; i--) { + heap.pop(); + } + + // Get the requested results from pq. + populateResults(results, howMany); + + return newTopDocs(results, start); + } } From 8ec993032c17a80b4526f9514ed8716a15addb39 Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Mon, 26 May 2025 04:06:18 +0800 Subject: [PATCH 07/24] feedback iter --- .../lucene/search/BatchScoreBulkScorer.java | 68 ------------------- .../org/apache/lucene/search/TermQuery.java | 2 +- .../java/org/apache/lucene/search/Weight.java | 44 +++++++++++- .../lucene/search/TestBooleanScorer.java | 2 +- 4 files changed, 45 insertions(+), 71 deletions(-) delete mode 100644 lucene/core/src/java/org/apache/lucene/search/BatchScoreBulkScorer.java diff --git a/lucene/core/src/java/org/apache/lucene/search/BatchScoreBulkScorer.java b/lucene/core/src/java/org/apache/lucene/search/BatchScoreBulkScorer.java deleted file mode 100644 index c000209d2fc1..000000000000 --- a/lucene/core/src/java/org/apache/lucene/search/BatchScoreBulkScorer.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.search; - -import java.io.IOException; -import org.apache.lucene.util.Bits; - -/** - * A bulk scorer used when {@link ScoreMode#needsScores()} is true and {@link - * Scorer#nextDocsAndScores} has optimizations to run faster than one-by-one iteration. - */ -class BatchScoreBulkScorer extends BulkScorer { - - private final SimpleScorable scorable = new SimpleScorable(); - private final DocAndScoreBuffer buffer = new DocAndScoreBuffer(); - private final Scorer scorer; - - BatchScoreBulkScorer(Scorer scorer) { - this.scorer = scorer; - } - - @Override - public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException { - if (collector.competitiveIterator() != null) { - return new Weight.DefaultBulkScorer(scorer).score(collector, acceptDocs, min, max); - } - - collector.setScorer(scorable); - scorer.setMinCompetitiveScore(scorable.minCompetitiveScore); - - if (scorer.docID() < min) { - scorer.iterator().advance(min); - } - - for (scorer.nextDocsAndScores(max, acceptDocs, buffer); - buffer.size > 0; - scorer.nextDocsAndScores(max, acceptDocs, buffer)) { - for (int i = 0, size = buffer.size; i < size; i++) { - float score = scorable.score = buffer.scores[i]; - if (score >= scorable.minCompetitiveScore) { - collector.collect(buffer.docs[i]); - } - } - scorer.setMinCompetitiveScore(scorable.minCompetitiveScore); - } - - return scorer.docID(); - } - - @Override - public long cost() { - return scorer.iterator().cost(); - } -} diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java index 6c29c23095b4..736db6b963dc 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java @@ -173,7 +173,7 @@ public BulkScorer bulkScorer() throws IOException { return ConstantScoreScorerSupplier.fromIterator(iterator, 0f, scoreMode, maxDoc) .bulkScorer(); } - return new BatchScoreBulkScorer(get(Long.MAX_VALUE)); + return new DefaultBulkScorer(get(Long.MAX_VALUE), scoreMode); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java index 341dd3cadf6a..5912b996678b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Weight.java +++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java @@ -231,9 +231,16 @@ protected static class DefaultBulkScorer extends BulkScorer { private final Scorer scorer; private final DocIdSetIterator iterator; private final TwoPhaseIterator twoPhase; + private final ScoreMode scoreMode; + private DocAndScoreBuffer buffer; /** Sole constructor. */ public DefaultBulkScorer(Scorer scorer) { + this(scorer, null); + } + + /** Sole constructor. */ + public DefaultBulkScorer(Scorer scorer, ScoreMode scoreMode) { this.scorer = Objects.requireNonNull(scorer); this.twoPhase = scorer.twoPhaseIterator(); if (twoPhase == null) { @@ -241,6 +248,7 @@ public DefaultBulkScorer(Scorer scorer) { } else { this.iterator = twoPhase.approximation(); } + this.scoreMode = scoreMode; } @Override @@ -251,9 +259,14 @@ public long cost() { @Override public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException { - collector.setScorer(scorer); DocIdSetIterator competitiveIterator = collector.competitiveIterator(); + if (scoreMode != null && scoreMode.needsScores() && competitiveIterator == null) { + return batchScore(collector, acceptDocs, min, max); + } + + collector.setScorer(scorer); + if (competitiveIterator != null) { if (competitiveIterator.docID() > min) { min = competitiveIterator.docID(); @@ -290,6 +303,35 @@ public int score(LeafCollector collector, Bits acceptDocs, int min, int max) return iterator.docID(); } + private int batchScore(LeafCollector collector, Bits acceptDocs, int min, int max) + throws IOException { + if (buffer == null) { + buffer = new DocAndScoreBuffer(); + } + + SimpleScorable scorable = new SimpleScorable(); + collector.setScorer(scorable); + scorer.setMinCompetitiveScore(scorable.minCompetitiveScore); + + if (scorer.docID() < min) { + scorer.iterator().advance(min); + } + + for (scorer.nextDocsAndScores(max, acceptDocs, buffer); + buffer.size > 0; + scorer.nextDocsAndScores(max, acceptDocs, buffer)) { + for (int i = 0, size = buffer.size; i < size; i++) { + float score = scorable.score = buffer.scores[i]; + if (score >= scorable.minCompetitiveScore) { + collector.collect(buffer.docs[i]); + } + } + scorer.setMinCompetitiveScore(scorable.minCompetitiveScore); + } + + return scorer.docID(); + } + private static void scoreIterator( LeafCollector collector, Bits acceptDocs, DocIdSetIterator iterator, int max) throws IOException { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java index 2e1ad202f0be..67c7ed2fe5a0 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java @@ -201,7 +201,7 @@ public void testOptimizeTopLevelClauseOrNull() throws IOException { weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE, 1); ss = weight.scorerSupplier(ctx); scorer = ((BooleanScorerSupplier) ss).booleanScorer(); - assertThat(scorer, instanceOf(BatchScoreBulkScorer.class)); // term scorer + assertThat(scorer, instanceOf(DefaultBulkScorer.class)); // term scorer w.close(); reader.close(); From 8b25eb35743eaa069cf5af7a6bc0c74a16bd2b26 Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Mon, 26 May 2025 04:30:52 +0800 Subject: [PATCH 08/24] fix --- lucene/core/src/java/org/apache/lucene/search/Weight.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java index 5912b996678b..c4b784144afa 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Weight.java +++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java @@ -233,6 +233,7 @@ protected static class DefaultBulkScorer extends BulkScorer { private final TwoPhaseIterator twoPhase; private final ScoreMode scoreMode; private DocAndScoreBuffer buffer; + private SimpleScorable scorable; /** Sole constructor. */ public DefaultBulkScorer(Scorer scorer) { @@ -308,8 +309,10 @@ private int batchScore(LeafCollector collector, Bits acceptDocs, int min, int ma if (buffer == null) { buffer = new DocAndScoreBuffer(); } + if (scorable == null) { + scorable = new SimpleScorable(); + } - SimpleScorable scorable = new SimpleScorable(); collector.setScorer(scorable); scorer.setMinCompetitiveScore(scorable.minCompetitiveScore); From 1af4d1b23aff940ff3418fca49edc8e5ce88b449 Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Mon, 26 May 2025 15:06:21 +0800 Subject: [PATCH 09/24] iter --- .../apache/lucene/search/DocScoreEncoder.java | 61 +++++++++++++++++++ .../lucene/search/MaxScoreAccumulator.java | 37 +---------- .../lucene/search/TopFieldCollector.java | 2 +- .../lucene/search/TopScoreDocCollector.java | 47 ++++++-------- .../lucene/search/TestDocScoreEncoder.java | 38 ++++++++++++ .../search/TestMaxScoreAccumulator.java | 32 +++++----- .../lucene/search/TestTopDocsCollector.java | 14 ++--- .../lucene/search/TestTopFieldCollector.java | 14 ++--- 8 files changed, 152 insertions(+), 93 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java create mode 100644 lucene/core/src/test/org/apache/lucene/search/TestDocScoreEncoder.java diff --git a/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java b/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java new file mode 100644 index 000000000000..e2e22e6ff722 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java @@ -0,0 +1,61 @@ +package org.apache.lucene.search; + +import java.util.Comparator; +import org.apache.lucene.util.NumericUtils; + +/** + * An encoder do encode (doc, score) pair as a long whose sort order is same as {@code (o1, o2) -> + * Float.compare(o1.score, o2.score)).thenComparing(Comparator.comparingInt((ScoreDoc o) -> + * o.doc).reversed())} + */ +class DocScoreEncoder { + + static final long LEAST_COMPETITIVE_CODE = encode(Integer.MAX_VALUE, Float.NEGATIVE_INFINITY); + private static final int POS_INF_TO_SORTABLE_INT = scoreToSortableInt(Float.POSITIVE_INFINITY); + + static long encode(int docId, float score) { + return encodeIntScore(docId, scoreToSortableInt(score)); + } + + static long encodeIntScore(int docId, int score) { + return (((long) score) << 32) | (Integer.MAX_VALUE - docId); + } + + static float toScore(long value) { + return sortableIntToScore(toIntScore(value)); + } + + static int toIntScore(long value) { + return (int) (value >>> 32); + } + + static int docId(long value) { + return Integer.MAX_VALUE - ((int) value); + } + + static int nextUp(int intScore) { + assert intScore <= POS_INF_TO_SORTABLE_INT; + int nextUp = Math.min(POS_INF_TO_SORTABLE_INT, intScore + 1); + assert nextUp == scoreToSortableInt(Math.nextUp(sortableIntToScore(intScore))); + return nextUp; + } + + /** + * Score is non-negative float so wo use floatToRawIntBits instead of {@link + * NumericUtils#floatToSortableInt}. We do not assert score >= 0 here to allow pass negative float + * to indicate totally non-competitive, e.g. {@link #LEAST_COMPETITIVE_CODE}. + */ + static int scoreToSortableInt(float score) { + assert Float.isNaN(score) == false; + return Float.floatToRawIntBits(score); + } + + /** + * @see #scoreToSortableInt(float) + */ + static float sortableIntToScore(int scoreBits) { + float score = Float.intBitsToFloat(scoreBits); + assert Float.isNaN(score) == false; + return score; + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/MaxScoreAccumulator.java b/lucene/core/src/java/org/apache/lucene/search/MaxScoreAccumulator.java index 59e23b2e2a57..7e60c8109f4b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MaxScoreAccumulator.java +++ b/lucene/core/src/java/org/apache/lucene/search/MaxScoreAccumulator.java @@ -18,14 +18,11 @@ package org.apache.lucene.search; import java.util.concurrent.atomic.LongAccumulator; -import org.apache.lucene.util.NumericUtils; /** Maintains the maximum score and its corresponding document id concurrently */ final class MaxScoreAccumulator { // we use 2^10-1 to check the remainder with a bitwise operation private static final int DEFAULT_INTERVAL = 0x3ff; - private static final int POS_INF_TO_SORTABLE_INT = NumericUtils.floatToSortableInt(Float.POSITIVE_INFINITY); - static final long LEAST_COMPETITIVE_CODE = encode(Integer.MAX_VALUE, Float.NEGATIVE_INFINITY); // scores are always positive final LongAccumulator acc = new LongAccumulator(Math::max, Long.MIN_VALUE); @@ -39,39 +36,11 @@ final class MaxScoreAccumulator { void accumulate(int docId, float score) { assert docId >= 0 && score >= 0; - acc.accumulate(encode(docId, score)); + acc.accumulate(DocScoreEncoder.encode(docId, score)); } - void accumulateIntScore(int docId, int score) { - assert docId >= 0 && score >= 0; - acc.accumulate(encodeIntScore(docId, score)); - } - - static long encode(int docId, float score) { - return encodeIntScore(docId, NumericUtils.floatToSortableInt(score)); - } - - static long encodeIntScore(int docId, int score) { - return (((long) score) << 32) | (Integer.MAX_VALUE - docId); - } - - static float toScore(long value) { - return NumericUtils.sortableIntToFloat(toIntScore(value)); - } - - static int toIntScore(long value) { - return (int) (value >>> 32); - } - - static int docId(long value) { - return Integer.MAX_VALUE - ((int) value); - } - - static int nextUp(int intScore) { - assert intScore <= POS_INF_TO_SORTABLE_INT; - int nextUp = Math.min(POS_INF_TO_SORTABLE_INT, intScore + 1); - assert nextUp == NumericUtils.floatToSortableInt(Math.nextUp(NumericUtils.sortableIntToFloat(intScore))); - return nextUp; + void accumulate(long code) { + acc.accumulate(code); } long getRaw() { diff --git a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java index 384f5fa1168e..bcbc145a72d9 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java @@ -367,7 +367,7 @@ protected void updateGlobalMinCompetitiveScore(Scorable scorer) throws IOExcepti long maxMinScore = minScoreAcc.getRaw(); float score; if (maxMinScore != Long.MIN_VALUE - && (score = MaxScoreAccumulator.toScore(maxMinScore)) > minCompetitiveScore) { + && (score = DocScoreEncoder.toScore(maxMinScore)) > minCompetitiveScore) { scorer.setMinCompetitiveScore(score); minCompetitiveScore = score; totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO; diff --git a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java index e61ebcd28e90..8d981ae9525e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java @@ -20,7 +20,6 @@ import java.util.stream.IntStream; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.util.LongHeap; -import org.apache.lucene.util.NumericUtils; /** * A {@link Collector} implementation that collects the top-scoring hits, returning them as a {@link @@ -44,7 +43,7 @@ public class TopScoreDocCollector extends TopDocsCollector { int numHits, ScoreDoc after, int totalHitsThreshold, MaxScoreAccumulator minScoreAcc) { super(null); this.heap = new LongHeap(numHits); - IntStream.range(0, numHits).forEach(_ -> heap.push(MaxScoreAccumulator.LEAST_COMPETITIVE_CODE)); + IntStream.range(0, numHits).forEach(_ -> heap.push(DocScoreEncoder.LEAST_COMPETITIVE_CODE)); this.after = after; this.totalHitsThreshold = totalHitsThreshold; this.minScoreAcc = minScoreAcc; @@ -69,28 +68,21 @@ public LeafCollector getLeafCollector(LeafReaderContext context) throws IOExcept final int afterScore; final int afterDoc; if (after == null) { - afterScore = NumericUtils.floatToSortableInt(Float.POSITIVE_INFINITY); + afterScore = Integer.MAX_VALUE; afterDoc = DocIdSetIterator.NO_MORE_DOCS; } else { - afterScore = NumericUtils.floatToSortableInt(after.score); + afterScore = DocScoreEncoder.scoreToSortableInt(after.score); afterDoc = after.doc - context.docBase; } -// final long afterEncode; -// if (after == null) { -// afterEncode = -// MaxScoreAccumulator.encode(DocIdSetIterator.NO_MORE_DOCS, Float.POSITIVE_INFINITY); -// } else { -// afterEncode = -// MaxScoreAccumulator.encode(after.doc, after.score); -// } return new LeafCollector() { private Scorable scorer; // HitQueue implements getSentinelObject to return a ScoreDoc, so we know // that at this point top() is already initialized. - private int topDoc = MaxScoreAccumulator.docId(heap.top()); - private int topScore = MaxScoreAccumulator.toIntScore(heap.top());; + private long topCode = heap.top(); + private int topScore = DocScoreEncoder.toIntScore(topCode); + ; private int minCompetitiveScore; @Override @@ -105,7 +97,7 @@ public void setScorer(Scorable scorer) throws IOException { @Override public void collect(int doc) throws IOException { - final int score = NumericUtils.floatToSortableInt(scorer.score()); + final int score = DocScoreEncoder.scoreToSortableInt(scorer.score()); int hitCountSoFar = ++totalHits; @@ -141,10 +133,9 @@ public void collect(int doc) throws IOException { } private void collectCompetitiveHit(int doc, int score) throws IOException { - final long encode = MaxScoreAccumulator.encodeIntScore(doc + docBase, score); - long topEncode = heap.updateTop(encode); - topDoc = MaxScoreAccumulator.docId(topEncode); - topScore = MaxScoreAccumulator.toIntScore(topEncode); + final long code = DocScoreEncoder.encodeIntScore(doc + docBase, score); + topCode = heap.updateTop(code); + topScore = DocScoreEncoder.toIntScore(topCode); updateMinCompetitiveScore(scorer); } @@ -155,10 +146,11 @@ private void updateGlobalMinCompetitiveScore(Scorable scorer) throws IOException // since we tie-break on doc id and collect in doc id order we can require // the next float if the global minimum score is set on a document id that is // smaller than the ids in the current leaf - int score = MaxScoreAccumulator.toIntScore(maxMinScore); - score = docBase >= MaxScoreAccumulator.docId(maxMinScore) ? MaxScoreAccumulator.nextUp(score) : score; + int score = DocScoreEncoder.toIntScore(maxMinScore); + score = + docBase >= DocScoreEncoder.docId(maxMinScore) ? DocScoreEncoder.nextUp(score) : score; if (score > minCompetitiveScore) { - scorer.setMinCompetitiveScore(NumericUtils.sortableIntToFloat(score)); + scorer.setMinCompetitiveScore(DocScoreEncoder.sortableIntToScore(score)); minCompetitiveScore = score; totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO; } @@ -168,13 +160,13 @@ private void updateGlobalMinCompetitiveScore(Scorable scorer) throws IOException private void updateMinCompetitiveScore(Scorable scorer) throws IOException { if (totalHits > totalHitsThreshold) { if (topScore >= minCompetitiveScore) { - minCompetitiveScore = MaxScoreAccumulator.nextUp(topScore); - scorer.setMinCompetitiveScore(NumericUtils.sortableIntToFloat(minCompetitiveScore)); + minCompetitiveScore = DocScoreEncoder.nextUp(topScore); + scorer.setMinCompetitiveScore(DocScoreEncoder.sortableIntToScore(minCompetitiveScore)); totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO; if (minScoreAcc != null) { // we don't use the next float but we register the document id so that other leaves or // leaf partitions can require it if they are after the current maximum - minScoreAcc.accumulateIntScore(topDoc, topScore); + minScoreAcc.accumulate(topCode); } } } @@ -186,7 +178,7 @@ private void updateMinCompetitiveScore(Scorable scorer) throws IOException { protected int topDocsSize() { int cnt = 0; for (int i = 1; i <= heap.size(); i++) { - if (heap.get(i) != MaxScoreAccumulator.LEAST_COMPETITIVE_CODE) { + if (heap.get(i) != DocScoreEncoder.LEAST_COMPETITIVE_CODE) { cnt++; } } @@ -197,8 +189,7 @@ protected int topDocsSize() { protected void populateResults(ScoreDoc[] results, int howMany) { for (int i = howMany - 1; i >= 0; i--) { long encode = heap.pop(); - results[i] = - new ScoreDoc(MaxScoreAccumulator.docId(encode), MaxScoreAccumulator.toScore(encode)); + results[i] = new ScoreDoc(DocScoreEncoder.docId(encode), DocScoreEncoder.toScore(encode)); } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestDocScoreEncoder.java b/lucene/core/src/test/org/apache/lucene/search/TestDocScoreEncoder.java new file mode 100644 index 000000000000..32640ac623d3 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/TestDocScoreEncoder.java @@ -0,0 +1,38 @@ +package org.apache.lucene.search; + +import org.apache.lucene.tests.util.LuceneTestCase; +import org.apache.lucene.util.NumericUtils; + +public class TestDocScoreEncoder extends LuceneTestCase { + + public void testFloat() { + for (int i = 0; i < 100; i++) { + doAssert(Float.intBitsToFloat(random().nextInt()), Float.intBitsToFloat(random().nextInt())); + } + } + + private void doAssert(float f1, float f2) { + if (Float.isNaN(f1) || Float.isNaN(f2)) { + return; + } + + int rawInt1 = Float.floatToRawIntBits(f1); + int rawInt2 = Float.floatToRawIntBits(f2); + int sortInt1 = NumericUtils.floatToSortableInt(f1); + int sortInt2 = NumericUtils.floatToSortableInt(f2); + + if (f1 > 0) { + assertTrue(rawInt1 > 0); + assertEquals(rawInt1, sortInt1); + } + + // System.out.println("f1: " + f1); + // System.out.println("rawInt1: " + rawInt1); + // System.out.println("sortInt1: " + sortInt1); + // System.out.println("f2: " + f2); + // System.out.println("rawInt2: " + rawInt2); + // System.out.println("sortInt2: " + sortInt2); + // System.out.println(); + + } +} diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreAccumulator.java b/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreAccumulator.java index 561609719313..5937b2fe8ed1 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreAccumulator.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreAccumulator.java @@ -23,28 +23,28 @@ public class TestMaxScoreAccumulator extends LuceneTestCase { public void testSimple() { MaxScoreAccumulator acc = new MaxScoreAccumulator(); acc.accumulate(0, 0f); - assertEquals(0f, MaxScoreAccumulator.toScore(acc.getRaw()), 0); - assertEquals(0, MaxScoreAccumulator.docId(acc.getRaw()), 0); + assertEquals(0f, DocScoreEncoder.toScore(acc.getRaw()), 0); + assertEquals(0, DocScoreEncoder.docId(acc.getRaw()), 0); acc.accumulate(10, 0f); - assertEquals(0f, MaxScoreAccumulator.toScore(acc.getRaw()), 0); - assertEquals(0, MaxScoreAccumulator.docId(acc.getRaw()), 0); + assertEquals(0f, DocScoreEncoder.toScore(acc.getRaw()), 0); + assertEquals(0, DocScoreEncoder.docId(acc.getRaw()), 0); acc.accumulate(100, 1000f); - assertEquals(1000f, MaxScoreAccumulator.toScore(acc.getRaw()), 0); - assertEquals(100, MaxScoreAccumulator.docId(acc.getRaw()), 0); + assertEquals(1000f, DocScoreEncoder.toScore(acc.getRaw()), 0); + assertEquals(100, DocScoreEncoder.docId(acc.getRaw()), 0); acc.accumulate(1000, 5f); - assertEquals(1000f, MaxScoreAccumulator.toScore(acc.getRaw()), 0); - assertEquals(100, MaxScoreAccumulator.docId(acc.getRaw()), 0); + assertEquals(1000f, DocScoreEncoder.toScore(acc.getRaw()), 0); + assertEquals(100, DocScoreEncoder.docId(acc.getRaw()), 0); acc.accumulate(99, 1000f); - assertEquals(1000f, MaxScoreAccumulator.toScore(acc.getRaw()), 0); - assertEquals(99, MaxScoreAccumulator.docId(acc.getRaw()), 0); + assertEquals(1000f, DocScoreEncoder.toScore(acc.getRaw()), 0); + assertEquals(99, DocScoreEncoder.docId(acc.getRaw()), 0); acc.accumulate(1000, 1001f); - assertEquals(1001f, MaxScoreAccumulator.toScore(acc.getRaw()), 0); - assertEquals(1000, MaxScoreAccumulator.docId(acc.getRaw()), 0); + assertEquals(1001f, DocScoreEncoder.toScore(acc.getRaw()), 0); + assertEquals(1000, DocScoreEncoder.docId(acc.getRaw()), 0); acc.accumulate(10, 1001f); - assertEquals(1001f, MaxScoreAccumulator.toScore(acc.getRaw()), 0); - assertEquals(10, MaxScoreAccumulator.docId(acc.getRaw()), 0); + assertEquals(1001f, DocScoreEncoder.toScore(acc.getRaw()), 0); + assertEquals(10, DocScoreEncoder.docId(acc.getRaw()), 0); acc.accumulate(100, 1001f); - assertEquals(1001f, MaxScoreAccumulator.toScore(acc.getRaw()), 0); - assertEquals(10, MaxScoreAccumulator.docId(acc.getRaw()), 0); + assertEquals(1001f, DocScoreEncoder.toScore(acc.getRaw()), 0); + assertEquals(10, DocScoreEncoder.docId(acc.getRaw()), 0); } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java index cc7405f67724..bef372ecc110 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java @@ -537,25 +537,25 @@ public void testConcurrentMinScore() throws Exception { scorer2.score = 7; leafCollector2.collect(2); - assertEquals(MaxScoreAccumulator.toScore(minValueChecker.getRaw()), 7f, 0f); + assertEquals(DocScoreEncoder.toScore(minValueChecker.getRaw()), 7f, 0f); assertNull(scorer.minCompetitiveScore); assertEquals(Math.nextUp(7f), scorer2.minCompetitiveScore, 0f); scorer2.score = 1; leafCollector2.collect(3); - assertEquals(MaxScoreAccumulator.toScore(minValueChecker.getRaw()), 7f, 0f); + assertEquals(DocScoreEncoder.toScore(minValueChecker.getRaw()), 7f, 0f); assertNull(scorer.minCompetitiveScore); assertEquals(Math.nextUp(7f), scorer2.minCompetitiveScore, 0f); scorer.score = 10; leafCollector.collect(2); - assertEquals(MaxScoreAccumulator.toScore(minValueChecker.getRaw()), 7f, 0f); + assertEquals(DocScoreEncoder.toScore(minValueChecker.getRaw()), 7f, 0f); assertEquals(7f, scorer.minCompetitiveScore, 0f); assertEquals(Math.nextUp(7f), scorer2.minCompetitiveScore, 0f); scorer.score = 11; leafCollector.collect(3); - assertEquals(MaxScoreAccumulator.toScore(minValueChecker.getRaw()), 10, 0f); + assertEquals(DocScoreEncoder.toScore(minValueChecker.getRaw()), 10, 0f); assertEquals(Math.nextUp(10f), scorer.minCompetitiveScore, 0f); assertEquals(Math.nextUp(7f), scorer2.minCompetitiveScore, 0f); @@ -567,19 +567,19 @@ public void testConcurrentMinScore() throws Exception { scorer3.score = 1f; leafCollector3.collect(0); - assertEquals(10f, MaxScoreAccumulator.toScore(minValueChecker.getRaw()), 0f); + assertEquals(10f, DocScoreEncoder.toScore(minValueChecker.getRaw()), 0f); assertEquals(Math.nextUp(10f), scorer3.minCompetitiveScore, 0f); scorer.score = 11; leafCollector.collect(4); - assertEquals(11f, MaxScoreAccumulator.toScore(minValueChecker.getRaw()), 0f); + assertEquals(11f, DocScoreEncoder.toScore(minValueChecker.getRaw()), 0f); assertEquals(Math.nextUp(11f), scorer.minCompetitiveScore, 0f); assertEquals(Math.nextUp(7f), scorer2.minCompetitiveScore, 0f); assertEquals(Math.nextUp(10f), scorer3.minCompetitiveScore, 0f); scorer3.score = 2f; leafCollector3.collect(1); - assertEquals(MaxScoreAccumulator.toScore(minValueChecker.getRaw()), 11f, 0f); + assertEquals(DocScoreEncoder.toScore(minValueChecker.getRaw()), 11f, 0f); assertEquals(Math.nextUp(11f), scorer.minCompetitiveScore, 0f); assertEquals(Math.nextUp(7f), scorer2.minCompetitiveScore, 0f); assertEquals(Math.nextUp(11f), scorer3.minCompetitiveScore, 0f); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java index 4393ace2c265..231b4696e085 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java @@ -595,25 +595,25 @@ public void testConcurrentMinScore() throws Exception { scorer2.score = 7; leafCollector2.collect(2); - assertEquals(7f, MaxScoreAccumulator.toScore(minValueChecker.getRaw()), 0f); + assertEquals(7f, DocScoreEncoder.toScore(minValueChecker.getRaw()), 0f); assertNull(scorer.minCompetitiveScore); assertEquals(7f, scorer2.minCompetitiveScore, 0f); scorer2.score = 1; leafCollector2.collect(3); - assertEquals(7f, MaxScoreAccumulator.toScore(minValueChecker.getRaw()), 0f); + assertEquals(7f, DocScoreEncoder.toScore(minValueChecker.getRaw()), 0f); assertNull(scorer.minCompetitiveScore); assertEquals(7f, scorer2.minCompetitiveScore, 0f); scorer.score = 10; leafCollector.collect(2); - assertEquals(7f, MaxScoreAccumulator.toScore(minValueChecker.getRaw()), 0f); + assertEquals(7f, DocScoreEncoder.toScore(minValueChecker.getRaw()), 0f); assertEquals(7f, scorer.minCompetitiveScore, 0f); assertEquals(7f, scorer2.minCompetitiveScore, 0f); scorer.score = 11; leafCollector.collect(3); - assertEquals(10f, MaxScoreAccumulator.toScore(minValueChecker.getRaw()), 0f); + assertEquals(10f, DocScoreEncoder.toScore(minValueChecker.getRaw()), 0f); assertEquals(10f, scorer.minCompetitiveScore, 0f); assertEquals(7f, scorer2.minCompetitiveScore, 0f); @@ -625,19 +625,19 @@ public void testConcurrentMinScore() throws Exception { scorer3.score = 1f; leafCollector3.collect(0); - assertEquals(10f, MaxScoreAccumulator.toScore(minValueChecker.getRaw()), 0f); + assertEquals(10f, DocScoreEncoder.toScore(minValueChecker.getRaw()), 0f); assertEquals(10f, scorer3.minCompetitiveScore, 0f); scorer.score = 11; leafCollector.collect(4); - assertEquals(11f, MaxScoreAccumulator.toScore(minValueChecker.getRaw()), 0f); + assertEquals(11f, DocScoreEncoder.toScore(minValueChecker.getRaw()), 0f); assertEquals(11f, scorer.minCompetitiveScore, 0f); assertEquals(7f, scorer2.minCompetitiveScore, 0f); assertEquals(10f, scorer3.minCompetitiveScore, 0f); scorer3.score = 2f; leafCollector3.collect(1); - assertEquals(11f, MaxScoreAccumulator.toScore(minValueChecker.getRaw()), 0f); + assertEquals(11f, DocScoreEncoder.toScore(minValueChecker.getRaw()), 0f); assertEquals(11f, scorer.minCompetitiveScore, 0f); assertEquals(7f, scorer2.minCompetitiveScore, 0f); assertEquals(11f, scorer3.minCompetitiveScore, 0f); From 6c7c2ebb9fe98e0fd5e503f6387654a90dd0c827 Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Mon, 26 May 2025 15:34:52 +0800 Subject: [PATCH 10/24] iter --- .../apache/lucene/search/DocScoreEncoder.java | 5 +- .../lucene/search/MaxScoreAccumulator.java | 5 -- .../lucene/search/TopFieldCollector.java | 2 +- .../lucene/search/TestDocScoreEncoder.java | 75 +++++++++++++------ .../search/TestMaxScoreAccumulator.java | 16 ++-- 5 files changed, 67 insertions(+), 36 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java b/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java index e2e22e6ff722..fafc37e03da7 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java +++ b/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java @@ -1,12 +1,15 @@ package org.apache.lucene.search; -import java.util.Comparator; import org.apache.lucene.util.NumericUtils; /** * An encoder do encode (doc, score) pair as a long whose sort order is same as {@code (o1, o2) -> * Float.compare(o1.score, o2.score)).thenComparing(Comparator.comparingInt((ScoreDoc o) -> * o.doc).reversed())} + * + *

Note that negative score is allowed but relationship between two codes encoded by negative + * scores is undefined. The only thing guaranteed is codes encoded from negative score are smaller + * than codes encoded from non-negative score. */ class DocScoreEncoder { diff --git a/lucene/core/src/java/org/apache/lucene/search/MaxScoreAccumulator.java b/lucene/core/src/java/org/apache/lucene/search/MaxScoreAccumulator.java index 7e60c8109f4b..3c0b11792ad7 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MaxScoreAccumulator.java +++ b/lucene/core/src/java/org/apache/lucene/search/MaxScoreAccumulator.java @@ -34,11 +34,6 @@ final class MaxScoreAccumulator { this.modInterval = DEFAULT_INTERVAL; } - void accumulate(int docId, float score) { - assert docId >= 0 && score >= 0; - acc.accumulate(DocScoreEncoder.encode(docId, score)); - } - void accumulate(long code) { acc.accumulate(code); } diff --git a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java index bcbc145a72d9..1b24e37c0047 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java @@ -384,7 +384,7 @@ protected void updateMinCompetitiveScore(Scorable scorer) throws IOException { minCompetitiveScore = minScore; totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO; if (minScoreAcc != null) { - minScoreAcc.accumulate(docBase, minScore); + minScoreAcc.accumulate(DocScoreEncoder.encode(docBase, minScore)); } } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestDocScoreEncoder.java b/lucene/core/src/test/org/apache/lucene/search/TestDocScoreEncoder.java index 32640ac623d3..d3f40d813176 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestDocScoreEncoder.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestDocScoreEncoder.java @@ -1,38 +1,71 @@ package org.apache.lucene.search; import org.apache.lucene.tests.util.LuceneTestCase; -import org.apache.lucene.util.NumericUtils; public class TestDocScoreEncoder extends LuceneTestCase { - public void testFloat() { - for (int i = 0; i < 100; i++) { - doAssert(Float.intBitsToFloat(random().nextInt()), Float.intBitsToFloat(random().nextInt())); + public void testRandom() { + for (int i = 0; i < 1000; i++) { + doAssert( + Float.intBitsToFloat(random().nextInt()), + random().nextInt(Integer.MAX_VALUE), + Float.intBitsToFloat(random().nextInt()), + random().nextInt(Integer.MAX_VALUE) + ); } } - private void doAssert(float f1, float f2) { - if (Float.isNaN(f1) || Float.isNaN(f2)) { - return; + public void testSameDoc() { + for (int i = 0; i < 1000; i++) { + doAssert( + Float.intBitsToFloat(random().nextInt()), + 1, + Float.intBitsToFloat(random().nextInt()), + 1 + ); } + } - int rawInt1 = Float.floatToRawIntBits(f1); - int rawInt2 = Float.floatToRawIntBits(f2); - int sortInt1 = NumericUtils.floatToSortableInt(f1); - int sortInt2 = NumericUtils.floatToSortableInt(f2); + public void testSameScore() { + for (int i = 0; i < 1000; i++) { + doAssert( + 1f, + random().nextInt(Integer.MAX_VALUE), + 1f, + random().nextInt(Integer.MAX_VALUE) + ); + } + } - if (f1 > 0) { - assertTrue(rawInt1 > 0); - assertEquals(rawInt1, sortInt1); + private void doAssert(float score1, int doc1, float score2, int doc2) { + if (Float.isNaN(score1) || Float.isNaN(score2)) { + return; } - // System.out.println("f1: " + f1); - // System.out.println("rawInt1: " + rawInt1); - // System.out.println("sortInt1: " + sortInt1); - // System.out.println("f2: " + f2); - // System.out.println("rawInt2: " + rawInt2); - // System.out.println("sortInt2: " + sortInt2); - // System.out.println(); + long code1 = DocScoreEncoder.encode(doc1, score1); + long code2 = DocScoreEncoder.encode(doc2, score2); + assertEquals(doc1, DocScoreEncoder.docId(code1)); + assertEquals(doc2, DocScoreEncoder.docId(code2)); + assertEquals(score1, DocScoreEncoder.toScore(code1), 0f); + assertEquals(score2, DocScoreEncoder.toScore(code2), 0f); + + if (score1 < 0 && score2 < 0) { + return; + } + + if (score1 < 0) { + assertTrue(code1 < code2); + } else if (score2 < 0) { + assertTrue(code2 < code1); + } else if (score1 == score2 && doc1 == doc2) { + assertEquals(code1, code2); + } else if (score1 < score2) { + assertTrue(code1 < code2); + } else if (score1 > score2) { + assertTrue(code1 > code2); + } else { + assertEquals(code1 > code2, doc1 < doc2); + } } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreAccumulator.java b/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreAccumulator.java index 5937b2fe8ed1..2ba5c9016c36 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreAccumulator.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreAccumulator.java @@ -22,28 +22,28 @@ public class TestMaxScoreAccumulator extends LuceneTestCase { public void testSimple() { MaxScoreAccumulator acc = new MaxScoreAccumulator(); - acc.accumulate(0, 0f); + acc.accumulate(DocScoreEncoder.encode(0, 0f)); assertEquals(0f, DocScoreEncoder.toScore(acc.getRaw()), 0); assertEquals(0, DocScoreEncoder.docId(acc.getRaw()), 0); - acc.accumulate(10, 0f); + acc.accumulate(DocScoreEncoder.encode(10, 0f)); assertEquals(0f, DocScoreEncoder.toScore(acc.getRaw()), 0); assertEquals(0, DocScoreEncoder.docId(acc.getRaw()), 0); - acc.accumulate(100, 1000f); + acc.accumulate(DocScoreEncoder.encode(100, 1000f)); assertEquals(1000f, DocScoreEncoder.toScore(acc.getRaw()), 0); assertEquals(100, DocScoreEncoder.docId(acc.getRaw()), 0); - acc.accumulate(1000, 5f); + acc.accumulate(DocScoreEncoder.encode(1000, 5f)); assertEquals(1000f, DocScoreEncoder.toScore(acc.getRaw()), 0); assertEquals(100, DocScoreEncoder.docId(acc.getRaw()), 0); - acc.accumulate(99, 1000f); + acc.accumulate(DocScoreEncoder.encode(99, 1000f)); assertEquals(1000f, DocScoreEncoder.toScore(acc.getRaw()), 0); assertEquals(99, DocScoreEncoder.docId(acc.getRaw()), 0); - acc.accumulate(1000, 1001f); + acc.accumulate(DocScoreEncoder.encode(1000, 1001f)); assertEquals(1001f, DocScoreEncoder.toScore(acc.getRaw()), 0); assertEquals(1000, DocScoreEncoder.docId(acc.getRaw()), 0); - acc.accumulate(10, 1001f); + acc.accumulate(DocScoreEncoder.encode(10, 1001f)); assertEquals(1001f, DocScoreEncoder.toScore(acc.getRaw()), 0); assertEquals(10, DocScoreEncoder.docId(acc.getRaw()), 0); - acc.accumulate(100, 1001f); + acc.accumulate(DocScoreEncoder.encode(100, 1001f)); assertEquals(1001f, DocScoreEncoder.toScore(acc.getRaw()), 0); assertEquals(10, DocScoreEncoder.docId(acc.getRaw()), 0); } From ac598dfa54614c43ae7b6cf1e24482639a321f03 Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Mon, 26 May 2025 15:36:50 +0800 Subject: [PATCH 11/24] iter --- .../apache/lucene/search/DocScoreEncoder.java | 25 +++++++++++++++++++ .../lucene/search/TestDocScoreEncoder.java | 25 +++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java b/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java index fafc37e03da7..38452bf63fc0 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java +++ b/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java @@ -1,3 +1,28 @@ +/* + * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + package org.apache.lucene.search; import org.apache.lucene.util.NumericUtils; diff --git a/lucene/core/src/test/org/apache/lucene/search/TestDocScoreEncoder.java b/lucene/core/src/test/org/apache/lucene/search/TestDocScoreEncoder.java index d3f40d813176..68d1060ba4cb 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestDocScoreEncoder.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestDocScoreEncoder.java @@ -1,3 +1,28 @@ +/* + * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + package org.apache.lucene.search; import org.apache.lucene.tests.util.LuceneTestCase; From 505e0abff2425b439ef449a5a5f4be0911f6293b Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Mon, 26 May 2025 16:07:38 +0800 Subject: [PATCH 12/24] fix --- .../apache/lucene/search/DocScoreEncoder.java | 4 ++-- .../lucene/search/TestDocScoreEncoder.java | 16 +++------------- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java b/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java index 38452bf63fc0..0d6bb6cc6c41 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java +++ b/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java @@ -33,8 +33,8 @@ * o.doc).reversed())} * *

Note that negative score is allowed but relationship between two codes encoded by negative - * scores is undefined. The only thing guaranteed is codes encoded from negative score are smaller - * than codes encoded from non-negative score. + * scores is undefined. The only thing guaranteed is codes encoded from negative scores are smaller + * than codes encoded from non-negative scores. */ class DocScoreEncoder { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestDocScoreEncoder.java b/lucene/core/src/test/org/apache/lucene/search/TestDocScoreEncoder.java index 68d1060ba4cb..919bf91392bc 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestDocScoreEncoder.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestDocScoreEncoder.java @@ -35,30 +35,20 @@ public void testRandom() { Float.intBitsToFloat(random().nextInt()), random().nextInt(Integer.MAX_VALUE), Float.intBitsToFloat(random().nextInt()), - random().nextInt(Integer.MAX_VALUE) - ); + random().nextInt(Integer.MAX_VALUE)); } } public void testSameDoc() { for (int i = 0; i < 1000; i++) { doAssert( - Float.intBitsToFloat(random().nextInt()), - 1, - Float.intBitsToFloat(random().nextInt()), - 1 - ); + Float.intBitsToFloat(random().nextInt()), 1, Float.intBitsToFloat(random().nextInt()), 1); } } public void testSameScore() { for (int i = 0; i < 1000; i++) { - doAssert( - 1f, - random().nextInt(Integer.MAX_VALUE), - 1f, - random().nextInt(Integer.MAX_VALUE) - ); + doAssert(1f, random().nextInt(Integer.MAX_VALUE), 1f, random().nextInt(Integer.MAX_VALUE)); } } From 8d129cee2f16f1456940dde353a38a45f737cc88 Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Mon, 26 May 2025 16:18:37 +0800 Subject: [PATCH 13/24] iter --- .../src/java/org/apache/lucene/search/TopScoreDocCollector.java | 1 - 1 file changed, 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java index 8d981ae9525e..295b15e7c4b9 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java @@ -82,7 +82,6 @@ public LeafCollector getLeafCollector(LeafReaderContext context) throws IOExcept // that at this point top() is already initialized. private long topCode = heap.top(); private int topScore = DocScoreEncoder.toIntScore(topCode); - ; private int minCompetitiveScore; @Override From dea53e8a9280bdf2f2a244640cb0374937156d9c Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Mon, 26 May 2025 16:19:05 +0800 Subject: [PATCH 14/24] iter --- .../src/java/org/apache/lucene/search/TopScoreDocCollector.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java index 295b15e7c4b9..c218dc185631 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java @@ -78,8 +78,6 @@ public LeafCollector getLeafCollector(LeafReaderContext context) throws IOExcept return new LeafCollector() { private Scorable scorer; - // HitQueue implements getSentinelObject to return a ScoreDoc, so we know - // that at this point top() is already initialized. private long topCode = heap.top(); private int topScore = DocScoreEncoder.toIntScore(topCode); private int minCompetitiveScore; From de56623f4ff0f5dcc68af82e8f550718df12c7b6 Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Mon, 26 May 2025 16:25:45 +0800 Subject: [PATCH 15/24] license --- .../apache/lucene/search/DocScoreEncoder.java | 32 +++++++------------ .../lucene/search/TestDocScoreEncoder.java | 32 +++++++------------ 2 files changed, 24 insertions(+), 40 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java b/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java index 0d6bb6cc6c41..6c523a707b84 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java +++ b/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java @@ -1,26 +1,18 @@ /* - * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. + * http://www.apache.org/licenses/LICENSE-2.0 * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ package org.apache.lucene.search; diff --git a/lucene/core/src/test/org/apache/lucene/search/TestDocScoreEncoder.java b/lucene/core/src/test/org/apache/lucene/search/TestDocScoreEncoder.java index 919bf91392bc..0b3afc20e37e 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestDocScoreEncoder.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestDocScoreEncoder.java @@ -1,26 +1,18 @@ /* - * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. + * http://www.apache.org/licenses/LICENSE-2.0 * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ package org.apache.lucene.search; From d1ac4b11f3355a6f2fca32fe44e25338fc0a9933 Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Wed, 28 May 2025 13:44:41 +0800 Subject: [PATCH 16/24] Revert "Merge branch 'opt_term_query' into int_score" This reverts commit 212d73d35c0803bf955beb7f97be748525b60696, reversing changes made to ac598dfa54614c43ae7b6cf1e24482639a321f03. --- .../lucene/search/BatchScoreBulkScorer.java | 68 +++++++++++++++++++ .../org/apache/lucene/search/TermQuery.java | 2 +- .../java/org/apache/lucene/search/Weight.java | 47 +------------ .../lucene/search/TestBooleanScorer.java | 2 +- 4 files changed, 71 insertions(+), 48 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/search/BatchScoreBulkScorer.java diff --git a/lucene/core/src/java/org/apache/lucene/search/BatchScoreBulkScorer.java b/lucene/core/src/java/org/apache/lucene/search/BatchScoreBulkScorer.java new file mode 100644 index 000000000000..c000209d2fc1 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/BatchScoreBulkScorer.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search; + +import java.io.IOException; +import org.apache.lucene.util.Bits; + +/** + * A bulk scorer used when {@link ScoreMode#needsScores()} is true and {@link + * Scorer#nextDocsAndScores} has optimizations to run faster than one-by-one iteration. + */ +class BatchScoreBulkScorer extends BulkScorer { + + private final SimpleScorable scorable = new SimpleScorable(); + private final DocAndScoreBuffer buffer = new DocAndScoreBuffer(); + private final Scorer scorer; + + BatchScoreBulkScorer(Scorer scorer) { + this.scorer = scorer; + } + + @Override + public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException { + if (collector.competitiveIterator() != null) { + return new Weight.DefaultBulkScorer(scorer).score(collector, acceptDocs, min, max); + } + + collector.setScorer(scorable); + scorer.setMinCompetitiveScore(scorable.minCompetitiveScore); + + if (scorer.docID() < min) { + scorer.iterator().advance(min); + } + + for (scorer.nextDocsAndScores(max, acceptDocs, buffer); + buffer.size > 0; + scorer.nextDocsAndScores(max, acceptDocs, buffer)) { + for (int i = 0, size = buffer.size; i < size; i++) { + float score = scorable.score = buffer.scores[i]; + if (score >= scorable.minCompetitiveScore) { + collector.collect(buffer.docs[i]); + } + } + scorer.setMinCompetitiveScore(scorable.minCompetitiveScore); + } + + return scorer.docID(); + } + + @Override + public long cost() { + return scorer.iterator().cost(); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java index 736db6b963dc..6c29c23095b4 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java @@ -173,7 +173,7 @@ public BulkScorer bulkScorer() throws IOException { return ConstantScoreScorerSupplier.fromIterator(iterator, 0f, scoreMode, maxDoc) .bulkScorer(); } - return new DefaultBulkScorer(get(Long.MAX_VALUE), scoreMode); + return new BatchScoreBulkScorer(get(Long.MAX_VALUE)); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java index c4b784144afa..341dd3cadf6a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Weight.java +++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java @@ -231,17 +231,9 @@ protected static class DefaultBulkScorer extends BulkScorer { private final Scorer scorer; private final DocIdSetIterator iterator; private final TwoPhaseIterator twoPhase; - private final ScoreMode scoreMode; - private DocAndScoreBuffer buffer; - private SimpleScorable scorable; /** Sole constructor. */ public DefaultBulkScorer(Scorer scorer) { - this(scorer, null); - } - - /** Sole constructor. */ - public DefaultBulkScorer(Scorer scorer, ScoreMode scoreMode) { this.scorer = Objects.requireNonNull(scorer); this.twoPhase = scorer.twoPhaseIterator(); if (twoPhase == null) { @@ -249,7 +241,6 @@ public DefaultBulkScorer(Scorer scorer, ScoreMode scoreMode) { } else { this.iterator = twoPhase.approximation(); } - this.scoreMode = scoreMode; } @Override @@ -260,13 +251,8 @@ public long cost() { @Override public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException { - DocIdSetIterator competitiveIterator = collector.competitiveIterator(); - - if (scoreMode != null && scoreMode.needsScores() && competitiveIterator == null) { - return batchScore(collector, acceptDocs, min, max); - } - collector.setScorer(scorer); + DocIdSetIterator competitiveIterator = collector.competitiveIterator(); if (competitiveIterator != null) { if (competitiveIterator.docID() > min) { @@ -304,37 +290,6 @@ public int score(LeafCollector collector, Bits acceptDocs, int min, int max) return iterator.docID(); } - private int batchScore(LeafCollector collector, Bits acceptDocs, int min, int max) - throws IOException { - if (buffer == null) { - buffer = new DocAndScoreBuffer(); - } - if (scorable == null) { - scorable = new SimpleScorable(); - } - - collector.setScorer(scorable); - scorer.setMinCompetitiveScore(scorable.minCompetitiveScore); - - if (scorer.docID() < min) { - scorer.iterator().advance(min); - } - - for (scorer.nextDocsAndScores(max, acceptDocs, buffer); - buffer.size > 0; - scorer.nextDocsAndScores(max, acceptDocs, buffer)) { - for (int i = 0, size = buffer.size; i < size; i++) { - float score = scorable.score = buffer.scores[i]; - if (score >= scorable.minCompetitiveScore) { - collector.collect(buffer.docs[i]); - } - } - scorer.setMinCompetitiveScore(scorable.minCompetitiveScore); - } - - return scorer.docID(); - } - private static void scoreIterator( LeafCollector collector, Bits acceptDocs, DocIdSetIterator iterator, int max) throws IOException { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java index 67c7ed2fe5a0..2e1ad202f0be 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java @@ -201,7 +201,7 @@ public void testOptimizeTopLevelClauseOrNull() throws IOException { weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE, 1); ss = weight.scorerSupplier(ctx); scorer = ((BooleanScorerSupplier) ss).booleanScorer(); - assertThat(scorer, instanceOf(DefaultBulkScorer.class)); // term scorer + assertThat(scorer, instanceOf(BatchScoreBulkScorer.class)); // term scorer w.close(); reader.close(); From 5f947251291829cf8990804a9d7086b98313b834 Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Wed, 28 May 2025 14:45:52 +0800 Subject: [PATCH 17/24] minimum override --- .../lucene/search/TopDocsCollector.java | 17 ++++++--- .../lucene/search/TopScoreDocCollector.java | 37 +------------------ 2 files changed, 14 insertions(+), 40 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/TopDocsCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopDocsCollector.java index 3cb4024fe498..337e8d5acacd 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TopDocsCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/TopDocsCollector.java @@ -153,18 +153,25 @@ public TopDocs topDocs(int start, int howMany) { howMany = Math.min(size - start, howMany); ScoreDoc[] results = new ScoreDoc[howMany]; - // pq's pop() returns the 'least' element in the queue, therefore need - // to discard the first ones, until we reach the requested range. + // Prune the least competitive hits until we reach the requested range. // Note that this loop will usually not be executed, since the common usage // should be that the caller asks for the last howMany results. However it's // needed here for completeness. - for (int i = pq.size() - start - howMany; i > 0; i--) { - pq.pop(); - } + pruneLeastCompetitiveHitsTo(start + howMany); // Get the requested results from pq. populateResults(results, howMany); return newTopDocs(results, start); } + + /** + * Prune the least competitive hits until the number of candidates is less than or equal to {@code + * keep}. This is typically called before {@link #populateResults} to ensure we are at right pos. + */ + protected void pruneLeastCompetitiveHitsTo(int keep) { + for (int i = pq.size() - keep; i > 0; i--) { + pq.pop(); + } + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java index c218dc185631..81e3fbe50f5e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java @@ -191,42 +191,9 @@ protected void populateResults(ScoreDoc[] results, int howMany) { } @Override - public TopDocs topDocs(int start, int howMany) { - // In case pq was populated with sentinel values, there might be less - // results than pq.size(). Therefore return all results until either - // pq.size() or totalHits. - int size = topDocsSize(); - - if (howMany < 0) { - throw new IllegalArgumentException( - "Number of hits requested must be greater than 0 but value was " + howMany); - } - - if (start < 0) { - throw new IllegalArgumentException( - "Expected value of starting position is between 0 and " + size + ", got " + start); - } - - if (start >= size || howMany == 0) { - return newTopDocs(null, start); - } - - // We know that start < pqsize, so just fix howMany. - howMany = Math.min(size - start, howMany); - ScoreDoc[] results = new ScoreDoc[howMany]; - - // pq's pop() returns the 'least' element in the queue, therefore need - // to discard the first ones, until we reach the requested range. - // Note that this loop will usually not be executed, since the common usage - // should be that the caller asks for the last howMany results. However it's - // needed here for completeness. - for (int i = heap.size() - start - howMany; i > 0; i--) { + protected void pruneLeastCompetitiveHitsTo(int keep) { + for (int i = heap.size() - keep; i > 0; i--) { heap.pop(); } - - // Get the requested results from pq. - populateResults(results, howMany); - - return newTopDocs(results, start); } } From b729ea7ad0cff917d40c1b2cbd826052106904fd Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Wed, 28 May 2025 14:47:27 +0800 Subject: [PATCH 18/24] iter --- lucene/core/src/java/org/apache/lucene/search/TermScorer.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java index 30da9024eda1..db782692f45a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java @@ -134,10 +134,6 @@ public void nextDocsAndScores(int upTo, Bits liveDocs, DocAndScoreBuffer buffer) docAndFreqBuffer = new DocAndFreqBuffer(); } - if (impactsDisi != null) { - impactsDisi.ensureCompetitive(); - } - for (; ; ) { if (impactsDisi != null) { impactsDisi.ensureCompetitive(); From 776e6e894253874fe0da7a2f3e1cd014d9d1893b Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Wed, 28 May 2025 14:51:54 +0800 Subject: [PATCH 19/24] CHANGES --- lucene/CHANGES.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 10f27f3bbf83..d561ec29f0dd 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -128,6 +128,8 @@ Optimizations * GITHUB#14674: Optimize AbstractKnnVectorQuery#createBitSet with intoBitset. (Guo Feng) +* GITHUB#14714: Move HitQueue in TopScoreDocCollector to a LongHeap. (Guo Feng) + Bug Fixes --------------------- * GITHUB#14654: ValueSource.fromDoubleValuesSource(dvs).getSortField() would throw errors when From e650ac4db19a03f417afab67b24406a8a684b7d4 Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Wed, 28 May 2025 15:09:04 +0800 Subject: [PATCH 20/24] simplify --- .../src/java/org/apache/lucene/search/DocScoreEncoder.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java b/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java index 6c523a707b84..761d074ea84f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java +++ b/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java @@ -38,7 +38,7 @@ static long encode(int docId, float score) { } static long encodeIntScore(int docId, int score) { - return (((long) score) << 32) | (Integer.MAX_VALUE - docId); + return (((long) score) << 32) | (~docId & 0xFFFFFFFFL); } static float toScore(long value) { @@ -50,7 +50,7 @@ static int toIntScore(long value) { } static int docId(long value) { - return Integer.MAX_VALUE - ((int) value); + return (int) ~value; } static int nextUp(int intScore) { From 2ed31cd95ae40884ead867504753d19fc8668055 Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Thu, 29 May 2025 20:52:49 +0800 Subject: [PATCH 21/24] reveiew iter --- .../apache/lucene/search/DocScoreEncoder.java | 39 +------------------ .../lucene/search/TopScoreDocCollector.java | 31 ++++++++------- 2 files changed, 18 insertions(+), 52 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java b/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java index 761d074ea84f..6c45439f9437 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java +++ b/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java @@ -31,51 +31,16 @@ class DocScoreEncoder { static final long LEAST_COMPETITIVE_CODE = encode(Integer.MAX_VALUE, Float.NEGATIVE_INFINITY); - private static final int POS_INF_TO_SORTABLE_INT = scoreToSortableInt(Float.POSITIVE_INFINITY); static long encode(int docId, float score) { - return encodeIntScore(docId, scoreToSortableInt(score)); - } - - static long encodeIntScore(int docId, int score) { - return (((long) score) << 32) | (~docId & 0xFFFFFFFFL); + return (((long) NumericUtils.floatToSortableInt(score)) << 32) | (~docId & 0xFFFFFFFFL); } static float toScore(long value) { - return sortableIntToScore(toIntScore(value)); - } - - static int toIntScore(long value) { - return (int) (value >>> 32); + return NumericUtils.sortableIntToFloat((int) (value >>> 32)); } static int docId(long value) { return (int) ~value; } - - static int nextUp(int intScore) { - assert intScore <= POS_INF_TO_SORTABLE_INT; - int nextUp = Math.min(POS_INF_TO_SORTABLE_INT, intScore + 1); - assert nextUp == scoreToSortableInt(Math.nextUp(sortableIntToScore(intScore))); - return nextUp; - } - - /** - * Score is non-negative float so wo use floatToRawIntBits instead of {@link - * NumericUtils#floatToSortableInt}. We do not assert score >= 0 here to allow pass negative float - * to indicate totally non-competitive, e.g. {@link #LEAST_COMPETITIVE_CODE}. - */ - static int scoreToSortableInt(float score) { - assert Float.isNaN(score) == false; - return Float.floatToRawIntBits(score); - } - - /** - * @see #scoreToSortableInt(float) - */ - static float sortableIntToScore(int scoreBits) { - float score = Float.intBitsToFloat(scoreBits); - assert Float.isNaN(score) == false; - return score; - } } diff --git a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java index 81e3fbe50f5e..888f84a8f5ae 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java @@ -20,6 +20,7 @@ import java.util.stream.IntStream; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.util.LongHeap; +import org.apache.lucene.util.NumericUtils; /** * A {@link Collector} implementation that collects the top-scoring hits, returning them as a {@link @@ -65,13 +66,13 @@ public ScoreMode scoreMode() { public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { final int docBase = context.docBase; final ScoreDoc after = this.after; - final int afterScore; + final float afterScore; final int afterDoc; if (after == null) { afterScore = Integer.MAX_VALUE; afterDoc = DocIdSetIterator.NO_MORE_DOCS; } else { - afterScore = DocScoreEncoder.scoreToSortableInt(after.score); + afterScore = NumericUtils.floatToSortableInt(after.score); afterDoc = after.doc - context.docBase; } @@ -79,8 +80,8 @@ public LeafCollector getLeafCollector(LeafReaderContext context) throws IOExcept private Scorable scorer; private long topCode = heap.top(); - private int topScore = DocScoreEncoder.toIntScore(topCode); - private int minCompetitiveScore; + private float topScore = DocScoreEncoder.toScore(topCode); + private float minCompetitiveScore; @Override public void setScorer(Scorable scorer) throws IOException { @@ -94,7 +95,7 @@ public void setScorer(Scorable scorer) throws IOException { @Override public void collect(int doc) throws IOException { - final int score = DocScoreEncoder.scoreToSortableInt(scorer.score()); + float score = scorer.score(); int hitCountSoFar = ++totalHits; @@ -129,10 +130,10 @@ public void collect(int doc) throws IOException { } } - private void collectCompetitiveHit(int doc, int score) throws IOException { - final long code = DocScoreEncoder.encodeIntScore(doc + docBase, score); + private void collectCompetitiveHit(int doc, float score) throws IOException { + final long code = DocScoreEncoder.encode(doc + docBase, score); topCode = heap.updateTop(code); - topScore = DocScoreEncoder.toIntScore(topCode); + topScore = DocScoreEncoder.toScore(topCode); updateMinCompetitiveScore(scorer); } @@ -143,11 +144,10 @@ private void updateGlobalMinCompetitiveScore(Scorable scorer) throws IOException // since we tie-break on doc id and collect in doc id order we can require // the next float if the global minimum score is set on a document id that is // smaller than the ids in the current leaf - int score = DocScoreEncoder.toIntScore(maxMinScore); - score = - docBase >= DocScoreEncoder.docId(maxMinScore) ? DocScoreEncoder.nextUp(score) : score; + float score = DocScoreEncoder.toScore(maxMinScore); + score = docBase >= DocScoreEncoder.docId(maxMinScore) ? Math.nextUp(score) : score; if (score > minCompetitiveScore) { - scorer.setMinCompetitiveScore(DocScoreEncoder.sortableIntToScore(score)); + scorer.setMinCompetitiveScore(score); minCompetitiveScore = score; totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO; } @@ -156,10 +156,11 @@ private void updateGlobalMinCompetitiveScore(Scorable scorer) throws IOException private void updateMinCompetitiveScore(Scorable scorer) throws IOException { if (totalHits > totalHitsThreshold) { - if (topScore >= minCompetitiveScore) { - minCompetitiveScore = DocScoreEncoder.nextUp(topScore); - scorer.setMinCompetitiveScore(DocScoreEncoder.sortableIntToScore(minCompetitiveScore)); + float localMinScore = Math.nextUp(topScore); + if (localMinScore > minCompetitiveScore) { + scorer.setMinCompetitiveScore(localMinScore); totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO; + minCompetitiveScore = localMinScore; if (minScoreAcc != null) { // we don't use the next float but we register the document id so that other leaves or // leaf partitions can require it if they are after the current maximum From 4b4878bc5e6447af7d5bc4898a9661ae3cd74937 Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Thu, 29 May 2025 20:53:57 +0800 Subject: [PATCH 22/24] fix doc --- .../src/java/org/apache/lucene/search/DocScoreEncoder.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java b/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java index 6c45439f9437..a0cd8972a77f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java +++ b/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java @@ -23,10 +23,6 @@ * An encoder do encode (doc, score) pair as a long whose sort order is same as {@code (o1, o2) -> * Float.compare(o1.score, o2.score)).thenComparing(Comparator.comparingInt((ScoreDoc o) -> * o.doc).reversed())} - * - *

Note that negative score is allowed but relationship between two codes encoded by negative - * scores is undefined. The only thing guaranteed is codes encoded from negative scores are smaller - * than codes encoded from non-negative scores. */ class DocScoreEncoder { From 8f1abc65c93d0b5bebf369cc1e930cf4cf1e65c0 Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Thu, 29 May 2025 23:04:31 +0800 Subject: [PATCH 23/24] fix --- .../org/apache/lucene/search/DocScoreEncoder.java | 4 ++-- .../apache/lucene/search/TopScoreDocCollector.java | 9 ++++++--- .../apache/lucene/search/TestDocScoreEncoder.java | 14 +++----------- 3 files changed, 11 insertions(+), 16 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java b/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java index a0cd8972a77f..9ae86f007b36 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java +++ b/lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java @@ -29,7 +29,7 @@ class DocScoreEncoder { static final long LEAST_COMPETITIVE_CODE = encode(Integer.MAX_VALUE, Float.NEGATIVE_INFINITY); static long encode(int docId, float score) { - return (((long) NumericUtils.floatToSortableInt(score)) << 32) | (~docId & 0xFFFFFFFFL); + return (((long) NumericUtils.floatToSortableInt(score)) << 32) | (Integer.MAX_VALUE - docId); } static float toScore(long value) { @@ -37,6 +37,6 @@ static float toScore(long value) { } static int docId(long value) { - return (int) ~value; + return Integer.MAX_VALUE - ((int) value); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java index 888f84a8f5ae..556f4b7a6cfa 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java @@ -20,7 +20,6 @@ import java.util.stream.IntStream; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.util.LongHeap; -import org.apache.lucene.util.NumericUtils; /** * A {@link Collector} implementation that collects the top-scoring hits, returning them as a {@link @@ -69,10 +68,10 @@ public LeafCollector getLeafCollector(LeafReaderContext context) throws IOExcept final float afterScore; final int afterDoc; if (after == null) { - afterScore = Integer.MAX_VALUE; + afterScore = Float.POSITIVE_INFINITY; afterDoc = DocIdSetIterator.NO_MORE_DOCS; } else { - afterScore = NumericUtils.floatToSortableInt(after.score); + afterScore = after.score; afterDoc = after.doc - context.docBase; } @@ -156,6 +155,10 @@ private void updateGlobalMinCompetitiveScore(Scorable scorer) throws IOException private void updateMinCompetitiveScore(Scorable scorer) throws IOException { if (totalHits > totalHitsThreshold) { + // since we tie-break on doc id and collect in doc id order, we can require the next float + // pqTop is never null since TopScoreDocCollector fills the priority queue with sentinel + // values if the top element is a sentinel value, its score will be -Infty and the below + // logic is still valid float localMinScore = Math.nextUp(topScore); if (localMinScore > minCompetitiveScore) { scorer.setMinCompetitiveScore(localMinScore); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestDocScoreEncoder.java b/lucene/core/src/test/org/apache/lucene/search/TestDocScoreEncoder.java index 0b3afc20e37e..c0e983466b14 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestDocScoreEncoder.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestDocScoreEncoder.java @@ -57,20 +57,12 @@ private void doAssert(float score1, int doc1, float score2, int doc2) { assertEquals(score1, DocScoreEncoder.toScore(code1), 0f); assertEquals(score2, DocScoreEncoder.toScore(code2), 0f); - if (score1 < 0 && score2 < 0) { - return; - } - - if (score1 < 0) { - assertTrue(code1 < code2); - } else if (score2 < 0) { - assertTrue(code2 < code1); - } else if (score1 == score2 && doc1 == doc2) { - assertEquals(code1, code2); - } else if (score1 < score2) { + if (score1 < score2) { assertTrue(code1 < code2); } else if (score1 > score2) { assertTrue(code1 > code2); + } else if (doc1 == doc2) { + assertEquals(code1, code2); } else { assertEquals(code1 > code2, doc1 < doc2); } From 784058db864d256d78a7272733843cb6538a45ca Mon Sep 17 00:00:00 2001 From: "guofeng.my" Date: Wed, 4 Jun 2025 14:22:23 +0800 Subject: [PATCH 24/24] review iter --- .../apache/lucene/search/TopScoreDocCollector.java | 4 +--- .../src/java/org/apache/lucene/util/LongHeap.java | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java index 556f4b7a6cfa..2ab46cb38362 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java @@ -17,7 +17,6 @@ package org.apache.lucene.search; import java.io.IOException; -import java.util.stream.IntStream; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.util.LongHeap; @@ -42,8 +41,7 @@ public class TopScoreDocCollector extends TopDocsCollector { TopScoreDocCollector( int numHits, ScoreDoc after, int totalHitsThreshold, MaxScoreAccumulator minScoreAcc) { super(null); - this.heap = new LongHeap(numHits); - IntStream.range(0, numHits).forEach(_ -> heap.push(DocScoreEncoder.LEAST_COMPETITIVE_CODE)); + this.heap = new LongHeap(numHits, DocScoreEncoder.LEAST_COMPETITIVE_CODE); this.after = after; this.totalHitsThreshold = totalHitsThreshold; this.minScoreAcc = minScoreAcc; diff --git a/lucene/core/src/java/org/apache/lucene/util/LongHeap.java b/lucene/core/src/java/org/apache/lucene/util/LongHeap.java index b1f64fa48270..f47e12d30797 100644 --- a/lucene/core/src/java/org/apache/lucene/util/LongHeap.java +++ b/lucene/core/src/java/org/apache/lucene/util/LongHeap.java @@ -16,6 +16,8 @@ */ package org.apache.lucene.util; +import java.util.Arrays; + /** * A min heap that stores longs; a primitive priority queue that like all priority queues maintains * a partial ordering of its elements such that the least element can always be found in constant @@ -33,6 +35,18 @@ public final class LongHeap { private long[] heap; private int size = 0; + /** + * Constructs a heap with specified size and initializes all elements with the given value. + * + * @param size the number of elements to initialize in the heap. + * @param initialValue the value to fill the heap with. + */ + public LongHeap(int size, long initialValue) { + this(size); + Arrays.fill(heap, 1, size + 1, initialValue); + this.size = size; + } + /** * Create an empty priority queue of the configured initial size. *