apache · vigyasharma · Jun 28, 2025 · Nov 14, 2024 · Nov 15, 2024 · Nov 15, 2024
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
@@ -111,6 +111,9 @@ New Features
 
 * GITHUB#14784: Make pack methods public for BigIntegerPoint and HalfFloatPoint. (Prudhvi Godithi)
 
+* GITHUB#14009: Add a new Query that can rescore other Query based on a generic DoubleValueSource
+  and trim the results down to top N (Anh Dung Bui)
+
 Improvements
 ---------------------
 * GITHUB#14458: Add an IndexDeletion policy that retains the last N commits. (Owais Kazi)

diff --git a/lucene/core/src/java/org/apache/lucene/search/AbstractKnnVectorQuery.java b/lucene/core/src/java/org/apache/lucene/search/AbstractKnnVectorQuery.java
@@ -16,12 +16,8 @@
  */
 package org.apache.lucene.search;
 
-import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
-
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Comparator;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
@@ -142,7 +138,7 @@ public Query rewrite(IndexSearcher indexSearcher) throws IOException {
     if (topK.scoreDocs.length == 0) {
       return new MatchNoDocsQuery();
     }
-    return createRewrittenQuery(reader, topK, reentryCount);
+    return DocAndScoreQuery.createDocAndScoreQuery(reader, topK, reentryCount);
   }
 
   private TopDocs runSearchTasks(
@@ -398,46 +394,6 @@ public KnnCollector newCollector(
     }
   }
 
-  protected Query createRewrittenQuery(IndexReader reader, TopDocs topK, int reentryCount) {
-    int len = topK.scoreDocs.length;
-    assert len > 0;
-    float maxScore = topK.scoreDocs[0].score;
-    Arrays.sort(topK.scoreDocs, Comparator.comparingInt(a -> a.doc));
-    int[] docs = new int[len];
-    float[] scores = new float[len];
-    for (int i = 0; i < len; i++) {
-      docs[i] = topK.scoreDocs[i].doc;
-      scores[i] = topK.scoreDocs[i].score;
-    }
-    int[] segmentStarts = findSegmentStarts(reader.leaves(), docs);
-    return new DocAndScoreQuery(
-        docs,
-        scores,
-        maxScore,
-        segmentStarts,
-        topK.totalHits.value(),
-        reader.getContext().id(),
-        reentryCount);
-  }
-
-  static int[] findSegmentStarts(List<LeafReaderContext> leaves, int[] docs) {
-    int[] starts = new int[leaves.size() + 1];
-    starts[starts.length - 1] = docs.length;
-    if (starts.length == 2) {
-      return starts;
-    }
-    int resultIndex = 0;
-    for (int i = 1; i < starts.length - 1; i++) {
-      int upper = leaves.get(i).docBase;
-      resultIndex = Arrays.binarySearch(docs, resultIndex, docs.length, upper);
-      if (resultIndex < 0) {
-        resultIndex = -1 - resultIndex;
-      }
-      starts[i] = resultIndex;
-    }
-    return starts;
-  }
-
   @Override
   public void visit(QueryVisitor visitor) {
     if (visitor.acceptField(field)) {
@@ -483,199 +439,6 @@ public Query getFilter() {
     return filter;
   }
 
-  /** Caches the results of a KnnVector search: a list of docs and their scores */
-  static class DocAndScoreQuery extends Query {
-
-    private final int[] docs;
-    private final float[] scores;
-    private final float maxScore;
-    private final int[] segmentStarts;
-    private final long visited;
-    private final Object contextIdentity;
-    private final int reentryCount;
-
-    /**
-     * Constructor
-     *
-     * @param docs the global docids of documents that match, in ascending order
-     * @param scores the scores of the matching documents
-     * @param maxScore the max of those scores? why do we need to pass in?
-     * @param segmentStarts the indexes in docs and scores corresponding to the first matching
-     *     document in each segment. If a segment has no matching documents, it should be assigned
-     *     the index of the next segment that does. There should be a final entry that is always
-     *     docs.length-1.
-     * @param visited the number of graph nodes that were visited, and for which vector distance
-     *     scores were evaluated.
-     * @param contextIdentity an object identifying the reader context that was used to build this
-     *     query
-     */
-    DocAndScoreQuery(
-        int[] docs,
-        float[] scores,
-        float maxScore,
-        int[] segmentStarts,
-        long visited,
-        Object contextIdentity,
-        int reentryCount) {
-      this.docs = docs;
-      this.scores = scores;
-      this.maxScore = maxScore;
-      this.segmentStarts = segmentStarts;
-      this.visited = visited;
-      this.contextIdentity = contextIdentity;
-      this.reentryCount = reentryCount;
-    }
-
-    /*
-    DocAndScoreQuery(DocAndScoreQuery other) {
-      this.docs = other.docs;
-      this.scores = other.scores;
-      this.maxScore = other.maxScore;
-      this.segmentStarts = other.segmentStarts;
-      this.visited = other.visited;
-      this.contextIdentity = other.contextIdentity;
-      this.reentryCount = other.reentryCount;
-    }
-    */
-
-    int reentryCount() {
-      return reentryCount;
-    }
-
-    @Override
-    public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
-        throws IOException {
-      if (searcher.getIndexReader().getContext().id() != contextIdentity) {
-        throw new IllegalStateException("This DocAndScore query was created by a different reader");
-      }
-      return new Weight(this) {
-        @Override
-        public Explanation explain(LeafReaderContext context, int doc) {
-          int found = Arrays.binarySearch(docs, doc + context.docBase);
-          if (found < 0) {
-            return Explanation.noMatch("not in top " + docs.length + " docs");
-          }
-          return Explanation.match(scores[found] * boost, "within top " + docs.length + " docs");
-        }
-
-        @Override
-        public int count(LeafReaderContext context) {
-          return segmentStarts[context.ord + 1] - segmentStarts[context.ord];
-        }
-
-        @Override
-        public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
-          if (segmentStarts[context.ord] == segmentStarts[context.ord + 1]) {
-            return null;
-          }
-          final var scorer =
-              new Scorer() {
-                final int lower = segmentStarts[context.ord];
-                final int upper = segmentStarts[context.ord + 1];
-                int upTo = -1;
-
-                @Override
-                public DocIdSetIterator iterator() {
-                  return new DocIdSetIterator() {
-                    @Override
-                    public int docID() {
-                      return docIdNoShadow();
-                    }
-
-                    @Override
-                    public int nextDoc() {
-                      if (upTo == -1) {
-                        upTo = lower;
-                      } else {
-                        ++upTo;
-                      }
-                      return docIdNoShadow();
-                    }
-
-                    @Override
-                    public int advance(int target) throws IOException {
-                      return slowAdvance(target);
-                    }
-
-                    @Override
-                    public long cost() {
-                      return upper - lower;
-                    }
-                  };
-                }
-
-                @Override
-                public float getMaxScore(int docId) {
-                  return maxScore * boost;
-                }
-
-                @Override
-                public float score() {
-                  return scores[upTo] * boost;
-                }
-
-                /**
-                 * move the implementation of docID() into a differently-named method so we can call
-                 * it from DocIDSetIterator.docID() even though this class is anonymous
-                 *
-                 * @return the current docid
-                 */
-                private int docIdNoShadow() {
-                  if (upTo == -1) {
-                    return -1;
-                  }
-                  if (upTo >= upper) {
-                    return NO_MORE_DOCS;
-                  }
-                  return docs[upTo] - context.docBase;
-                }
-
-                @Override
-                public int docID() {
-                  return docIdNoShadow();
-                }
-              };
-          return new DefaultScorerSupplier(scorer);
-        }
-
-        @Override
-        public boolean isCacheable(LeafReaderContext ctx) {
-          return true;
-        }
-      };
-    }
-
-    @Override
-    public String toString(String field) {
-      return "DocAndScoreQuery[" + docs[0] + ",...][" + scores[0] + ",...]," + maxScore;
-    }
-
-    @Override
-    public void visit(QueryVisitor visitor) {
-      visitor.visitLeaf(this);
-    }
-
-    public long visited() {
-      return visited;
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      if (sameClassAs(obj) == false) {
-        return false;
-      }
-      return contextIdentity == ((DocAndScoreQuery) obj).contextIdentity
-          && Arrays.equals(docs, ((DocAndScoreQuery) obj).docs)
-          && Arrays.equals(scores, ((DocAndScoreQuery) obj).scores);
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(
-          classHash(), contextIdentity, Arrays.hashCode(docs), Arrays.hashCode(scores));
-    }
-  }
-
   public KnnSearchStrategy getSearchStrategy() {
     return searchStrategy;
   }