Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,9 @@ New Features

* GITHUB#14784: Make pack methods public for BigIntegerPoint and HalfFloatPoint. (Prudhvi Godithi)

* GITHUB#14009: Add a new Query that can rescore other Query based on a generic DoubleValueSource
and trim the results down to top N (Anh Dung Bui)

Improvements
---------------------
* GITHUB#14458: Add an IndexDeletion policy that retains the last N commits. (Owais Kazi)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,8 @@
*/
package org.apache.lucene.search;

import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
Expand Down Expand Up @@ -142,7 +138,7 @@ public Query rewrite(IndexSearcher indexSearcher) throws IOException {
if (topK.scoreDocs.length == 0) {
return new MatchNoDocsQuery();
}
return createRewrittenQuery(reader, topK, reentryCount);
return DocAndScoreQuery.createDocAndScoreQuery(reader, topK, reentryCount);
}

private TopDocs runSearchTasks(
Expand Down Expand Up @@ -398,46 +394,6 @@ public KnnCollector newCollector(
}
}

protected Query createRewrittenQuery(IndexReader reader, TopDocs topK, int reentryCount) {
int len = topK.scoreDocs.length;
assert len > 0;
float maxScore = topK.scoreDocs[0].score;
Arrays.sort(topK.scoreDocs, Comparator.comparingInt(a -> a.doc));
int[] docs = new int[len];
float[] scores = new float[len];
for (int i = 0; i < len; i++) {
docs[i] = topK.scoreDocs[i].doc;
scores[i] = topK.scoreDocs[i].score;
}
int[] segmentStarts = findSegmentStarts(reader.leaves(), docs);
return new DocAndScoreQuery(
docs,
scores,
maxScore,
segmentStarts,
topK.totalHits.value(),
reader.getContext().id(),
reentryCount);
}

static int[] findSegmentStarts(List<LeafReaderContext> leaves, int[] docs) {
int[] starts = new int[leaves.size() + 1];
starts[starts.length - 1] = docs.length;
if (starts.length == 2) {
return starts;
}
int resultIndex = 0;
for (int i = 1; i < starts.length - 1; i++) {
int upper = leaves.get(i).docBase;
resultIndex = Arrays.binarySearch(docs, resultIndex, docs.length, upper);
if (resultIndex < 0) {
resultIndex = -1 - resultIndex;
}
starts[i] = resultIndex;
}
return starts;
}

@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field)) {
Expand Down Expand Up @@ -483,199 +439,6 @@ public Query getFilter() {
return filter;
}

/** Caches the results of a KnnVector search: a list of docs and their scores */
static class DocAndScoreQuery extends Query {

private final int[] docs;
private final float[] scores;
private final float maxScore;
private final int[] segmentStarts;
private final long visited;
private final Object contextIdentity;
private final int reentryCount;

/**
* Constructor
*
* @param docs the global docids of documents that match, in ascending order
* @param scores the scores of the matching documents
* @param maxScore the max of those scores? why do we need to pass in?
* @param segmentStarts the indexes in docs and scores corresponding to the first matching
* document in each segment. If a segment has no matching documents, it should be assigned
* the index of the next segment that does. There should be a final entry that is always
* docs.length-1.
* @param visited the number of graph nodes that were visited, and for which vector distance
* scores were evaluated.
* @param contextIdentity an object identifying the reader context that was used to build this
* query
*/
DocAndScoreQuery(
int[] docs,
float[] scores,
float maxScore,
int[] segmentStarts,
long visited,
Object contextIdentity,
int reentryCount) {
this.docs = docs;
this.scores = scores;
this.maxScore = maxScore;
this.segmentStarts = segmentStarts;
this.visited = visited;
this.contextIdentity = contextIdentity;
this.reentryCount = reentryCount;
}

/*
DocAndScoreQuery(DocAndScoreQuery other) {
this.docs = other.docs;
this.scores = other.scores;
this.maxScore = other.maxScore;
this.segmentStarts = other.segmentStarts;
this.visited = other.visited;
this.contextIdentity = other.contextIdentity;
this.reentryCount = other.reentryCount;
}
*/

int reentryCount() {
return reentryCount;
}

@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
throws IOException {
if (searcher.getIndexReader().getContext().id() != contextIdentity) {
throw new IllegalStateException("This DocAndScore query was created by a different reader");
}
return new Weight(this) {
@Override
public Explanation explain(LeafReaderContext context, int doc) {
int found = Arrays.binarySearch(docs, doc + context.docBase);
if (found < 0) {
return Explanation.noMatch("not in top " + docs.length + " docs");
}
return Explanation.match(scores[found] * boost, "within top " + docs.length + " docs");
}

@Override
public int count(LeafReaderContext context) {
return segmentStarts[context.ord + 1] - segmentStarts[context.ord];
}

@Override
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
if (segmentStarts[context.ord] == segmentStarts[context.ord + 1]) {
return null;
}
final var scorer =
new Scorer() {
final int lower = segmentStarts[context.ord];
final int upper = segmentStarts[context.ord + 1];
int upTo = -1;

@Override
public DocIdSetIterator iterator() {
return new DocIdSetIterator() {
@Override
public int docID() {
return docIdNoShadow();
}

@Override
public int nextDoc() {
if (upTo == -1) {
upTo = lower;
} else {
++upTo;
}
return docIdNoShadow();
}

@Override
public int advance(int target) throws IOException {
return slowAdvance(target);
}

@Override
public long cost() {
return upper - lower;
}
};
}

@Override
public float getMaxScore(int docId) {
return maxScore * boost;
}

@Override
public float score() {
return scores[upTo] * boost;
}

/**
* move the implementation of docID() into a differently-named method so we can call
* it from DocIDSetIterator.docID() even though this class is anonymous
*
* @return the current docid
*/
private int docIdNoShadow() {
if (upTo == -1) {
return -1;
}
if (upTo >= upper) {
return NO_MORE_DOCS;
}
return docs[upTo] - context.docBase;
}

@Override
public int docID() {
return docIdNoShadow();
}
};
return new DefaultScorerSupplier(scorer);
}

@Override
public boolean isCacheable(LeafReaderContext ctx) {
return true;
}
};
}

@Override
public String toString(String field) {
return "DocAndScoreQuery[" + docs[0] + ",...][" + scores[0] + ",...]," + maxScore;
}

@Override
public void visit(QueryVisitor visitor) {
visitor.visitLeaf(this);
}

public long visited() {
return visited;
}

@Override
public boolean equals(Object obj) {
if (sameClassAs(obj) == false) {
return false;
}
return contextIdentity == ((DocAndScoreQuery) obj).contextIdentity
&& Arrays.equals(docs, ((DocAndScoreQuery) obj).docs)
&& Arrays.equals(scores, ((DocAndScoreQuery) obj).scores);
}

@Override
public int hashCode() {
return Objects.hash(
classHash(), contextIdentity, Arrays.hashCode(docs), Arrays.hashCode(scores));
}
}

public KnnSearchStrategy getSearchStrategy() {
return searchStrategy;
}
Expand Down
Loading
Loading