Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ New Features

* GITHUB#14784: Make pack methods public for BigIntegerPoint and HalfFloatPoint. (Prudhvi Godithi)

* GITHUB#14009: Add a new Query that can rescore other Query based on a generic DoubleValueSource
and trim the results down to top N (Anh Dung Bui)

Improvements
---------------------
* GITHUB#14458: Add an IndexDeletion policy that retains the last N commits. (Owais Kazi)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,8 @@
*/
package org.apache.lucene.search;

import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.Callable;
Expand Down Expand Up @@ -106,7 +102,7 @@ public Query rewrite(IndexSearcher indexSearcher) throws IOException {
if (topK.scoreDocs.length == 0) {
return new MatchNoDocsQuery();
}
return createRewrittenQuery(reader, topK);
return DocAndScoreQuery.createDocAndScoreQuery(reader, topK);
}

private TopDocs searchLeaf(
Expand Down Expand Up @@ -275,41 +271,6 @@ protected TopDocs mergeLeafResults(TopDocs[] perLeafResults) {
return TopDocs.merge(k, perLeafResults);
}

private Query createRewrittenQuery(IndexReader reader, TopDocs topK) {
int len = topK.scoreDocs.length;

assert len > 0;
float maxScore = topK.scoreDocs[0].score;

Arrays.sort(topK.scoreDocs, Comparator.comparingInt(a -> a.doc));
int[] docs = new int[len];
float[] scores = new float[len];
for (int i = 0; i < len; i++) {
docs[i] = topK.scoreDocs[i].doc;
scores[i] = topK.scoreDocs[i].score;
}
int[] segmentStarts = findSegmentStarts(reader.leaves(), docs);
return new DocAndScoreQuery(docs, scores, maxScore, segmentStarts, reader.getContext().id());
}

static int[] findSegmentStarts(List<LeafReaderContext> leaves, int[] docs) {
int[] starts = new int[leaves.size() + 1];
starts[starts.length - 1] = docs.length;
if (starts.length == 2) {
return starts;
}
int resultIndex = 0;
for (int i = 1; i < starts.length - 1; i++) {
int upper = leaves.get(i).docBase;
resultIndex = Arrays.binarySearch(docs, resultIndex, docs.length, upper);
if (resultIndex < 0) {
resultIndex = -1 - resultIndex;
}
starts[i] = resultIndex;
}
return starts;
}

@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field)) {
Expand Down Expand Up @@ -355,166 +316,6 @@ public Query getFilter() {
return filter;
}

/** Caches the results of a KnnVector search: a list of docs and their scores */
static class DocAndScoreQuery extends Query {

private final int[] docs;
private final float[] scores;
private final float maxScore;
private final int[] segmentStarts;
private final Object contextIdentity;

/**
* Constructor
*
* @param docs the global docids of documents that match, in ascending order
* @param scores the scores of the matching documents
* @param segmentStarts the indexes in docs and scores corresponding to the first matching
* document in each segment. If a segment has no matching documents, it should be assigned
* the index of the next segment that does. There should be a final entry that is always
* docs.length-1.
* @param contextIdentity an object identifying the reader context that was used to build this
* query
*/
DocAndScoreQuery(
int[] docs, float[] scores, float maxScore, int[] segmentStarts, Object contextIdentity) {
this.docs = docs;
this.scores = scores;
this.maxScore = maxScore;
this.segmentStarts = segmentStarts;
this.contextIdentity = contextIdentity;
}

@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
throws IOException {
if (searcher.getIndexReader().getContext().id() != contextIdentity) {
throw new IllegalStateException("This DocAndScore query was created by a different reader");
}
return new Weight(this) {
@Override
public Explanation explain(LeafReaderContext context, int doc) {
int found = Arrays.binarySearch(docs, doc + context.docBase);
if (found < 0) {
return Explanation.noMatch("not in top " + docs.length + " docs");
}
return Explanation.match(scores[found] * boost, "within top " + docs.length + " docs");
}

@Override
public int count(LeafReaderContext context) {
return segmentStarts[context.ord + 1] - segmentStarts[context.ord];
}

@Override
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
if (segmentStarts[context.ord] == segmentStarts[context.ord + 1]) {
return null;
}
final var scorer =
new Scorer() {
final int lower = segmentStarts[context.ord];
final int upper = segmentStarts[context.ord + 1];
int upTo = -1;

@Override
public DocIdSetIterator iterator() {
return new DocIdSetIterator() {
@Override
public int docID() {
return docIdNoShadow();
}

@Override
public int nextDoc() {
if (upTo == -1) {
upTo = lower;
} else {
++upTo;
}
return docIdNoShadow();
}

@Override
public int advance(int target) throws IOException {
return slowAdvance(target);
}

@Override
public long cost() {
return upper - lower;
}
};
}

@Override
public float getMaxScore(int docId) {
return maxScore * boost;
}

@Override
public float score() {
return scores[upTo] * boost;
}

/**
* move the implementation of docID() into a differently-named method so we can call
* it from DocIDSetIterator.docID() even though this class is anonymous
*
* @return the current docid
*/
private int docIdNoShadow() {
if (upTo == -1) {
return -1;
}
if (upTo >= upper) {
return NO_MORE_DOCS;
}
return docs[upTo] - context.docBase;
}

@Override
public int docID() {
return docIdNoShadow();
}
};
return new DefaultScorerSupplier(scorer);
}

@Override
public boolean isCacheable(LeafReaderContext ctx) {
return true;
}
};
}

@Override
public String toString(String field) {
return "DocAndScoreQuery[" + docs[0] + ",...][" + scores[0] + ",...]," + maxScore;
}

@Override
public void visit(QueryVisitor visitor) {
visitor.visitLeaf(this);
}

@Override
public boolean equals(Object obj) {
if (sameClassAs(obj) == false) {
return false;
}
return contextIdentity == ((DocAndScoreQuery) obj).contextIdentity
&& Arrays.equals(docs, ((DocAndScoreQuery) obj).docs)
&& Arrays.equals(scores, ((DocAndScoreQuery) obj).scores);
}

@Override
public int hashCode() {
return Objects.hash(
classHash(), contextIdentity, Arrays.hashCode(docs), Arrays.hashCode(scores));
}
}

public KnnSearchStrategy getSearchStrategy() {
return searchStrategy;
}
Expand Down
Loading