Further improve filtering by score. (#14970)

jpountz · web-flow · commit 7fc9fd36095b · 2025-07-24T16:44:16.000+02:00
PRs #14906 and #14896 improved the efficiency of filtering by score. This PR tries to get some extra speedup by: - Skipping filtering by score when applying a non-essential clause that doesn't have matches over the range of doc IDs being scored. - Filtering on float[] scores rather than double[] scores whenever applicable so that vectorization can work on 2x more lanes at once. - Filtering by score using `VectorUtil#filterByScore` instead of relying on the collector to do it.
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
@@ -218,6 +218,8 @@ Optimizations
 
 * GITHUB#14976: Utilize docIdRunEnd on ReqExclBulkScorer. (Ge Song)
 
+* GITHUB#14970: Further speed up filtering hits by score. (Adrien Grand)
+
 Changes in Runtime Behavior
 ---------------------
 * GITHUB#14823: Decrease TieredMergePolicy's default number of segments per
diff --git a/lucene/core/src/java/org/apache/lucene/internal/vectorization/DefaultVectorUtilSupport.java b/lucene/core/src/java/org/apache/lucene/internal/vectorization/DefaultVectorUtilSupport.java
@@ -310,6 +310,22 @@ private float quantizeFloat(float v, byte[] dest, int destIndex) {
     }
   }
 
+  @Override
+  public int filterByScore(
+      int[] docBuffer, float[] scoreBuffer, float minScoreInclusive, int upTo) {
+    int newSize = 0;
+    for (int i = 0; i < upTo; ++i) {
+      int doc = docBuffer[i];
+      float score = scoreBuffer[i];
+      docBuffer[newSize] = doc;
+      scoreBuffer[newSize] = score;
+      if (score >= minScoreInclusive) {
+        newSize++;
+      }
+    }
+    return newSize;
+  }
+
   @Override
   public int filterByScore(
       int[] docBuffer, double[] scoreBuffer, double minScoreInclusive, int upTo) {
diff --git a/lucene/core/src/java/org/apache/lucene/internal/vectorization/VectorUtilSupport.java b/lucene/core/src/java/org/apache/lucene/internal/vectorization/VectorUtilSupport.java
@@ -101,6 +101,20 @@ float recalculateScalarQuantizationOffset(
       float minQuantile,
       float maxQuantile);
 
+  /**
+   * filter both {@code docBuffer} and {@code scoreBuffer} with {@code minScoreInclusive}, each
+   * {@code docBuffer} and {@code scoreBuffer} of the same index forms a pair, pairs with score not
+   * greater than or equal to {@code minScoreInclusive} will be filtered out from the array.
+   *
+   * @param docBuffer doc buffer contains docs (or some other value forms a pair with {@code
+   *     scoreBuffer})
+   * @param scoreBuffer score buffer contains scores to be compared with {@code minScoreInclusive}
+   * @param minScoreInclusive minimal required score to not be filtered out
+   * @param upTo where the filter should end
+   * @return how many pairs left after filter
+   */
+  int filterByScore(int[] docBuffer, float[] scoreBuffer, float minScoreInclusive, int upTo);
+
   /**
    * filter both {@code docBuffer} and {@code scoreBuffer} with {@code minScoreInclusive}, each
    * {@code docBuffer} and {@code scoreBuffer} of the same index forms a pair, pairs with score not
diff --git a/lucene/core/src/java/org/apache/lucene/search/BatchScoreBulkScorer.java b/lucene/core/src/java/org/apache/lucene/search/BatchScoreBulkScorer.java
@@ -18,6 +18,7 @@
 
 import java.io.IOException;
 import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.VectorUtil;
 
 /**
  * A bulk scorer used when {@link ScoreMode#needsScores()} is true and {@link
@@ -49,11 +50,16 @@ public int score(LeafCollector collector, Bits acceptDocs, int min, int max) thr
     for (scorer.nextDocsAndScores(max, acceptDocs, buffer);
         buffer.size > 0;
         scorer.nextDocsAndScores(max, acceptDocs, buffer)) {
+
+      // The collector already filters hits whose scores is less than the minimum competitive score,
+      // but doing it here is a bit more efficient.
+      buffer.size =
+          VectorUtil.filterByScore(
+              buffer.docs, buffer.features, scorable.minCompetitiveScore, buffer.size);
+
       for (int i = 0, size = buffer.size; i < size; i++) {
-        float score = scorable.score = buffer.features[i];
-        if (score >= scorable.minCompetitiveScore) {
-          collector.collect(buffer.docs[i]);
-        }
+        scorable.score = buffer.features[i];
+        collector.collect(buffer.docs[i]);
       }
       scorer.setMinCompetitiveScore(scorable.minCompetitiveScore);
     }
diff --git a/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionBulkScorer.java b/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionBulkScorer.java
@@ -166,15 +166,24 @@ private void scoreWindowScoreFirst(
       return;
     }
 
+    // two equal consecutive values mean that the first clause always returns a score of zero, so we
+    // don't need to filter hits by score again.
+    boolean leadingClauseHasZeroScores = sumOfOtherClauses[1] == sumOfOtherClauses[0];
+
     for (scorers[0].nextDocsAndScores(max, acceptDocs, docAndScoreBuffer);
         docAndScoreBuffer.size > 0;
         scorers[0].nextDocsAndScores(max, acceptDocs, docAndScoreBuffer)) {
 
+      if (leadingClauseHasZeroScores == false) {
+        ScorerUtil.filterCompetitiveHits(
+            docAndScoreBuffer, sumOfOtherClauses[1], scorable.minCompetitiveScore, scorers.length);
+      }
+
       docAndScoreAccBuffer.copyFrom(docAndScoreBuffer);
 
       for (int i = 1; i < scorers.length; ++i) {
         double sumOfOtherClause = sumOfOtherClauses[i];
-        if (sumOfOtherClause != sumOfOtherClauses[i - 1]) {
+        if (i > 1 && sumOfOtherClause != sumOfOtherClauses[i - 1]) {
           // two equal consecutive values mean that the first clause always returns a score of zero,
           // so we don't need to filter hits by score again.
           ScorerUtil.filterCompetitiveHits(
diff --git a/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java b/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java
@@ -22,6 +22,7 @@
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.MathUtil;
+import org.apache.lucene.util.VectorUtil;
 
 final class MaxScoreBulkScorer extends BulkScorer {
 
@@ -40,6 +41,8 @@ final class MaxScoreBulkScorer extends BulkScorer {
   // Index of the first scorer that is required, this scorer and all following scorers are required
   // for a document to match.
   int firstRequiredScorer;
+  // Index of the first scorer that may produce positive scores on this window.
+  int firstNonNullScorer;
   // The minimum value of minCompetitiveScore that would produce a more favorable partitioning.
   float nextMinCompetitiveScore;
   private final long cost;
@@ -230,8 +233,39 @@ private void scoreInnerWindowSingleEssentialClause(
         docAndScoreBuffer.size > 0;
         top.scorer.nextDocsAndScores(upTo, acceptDocs, docAndScoreBuffer)) {
 
-      docAndScoreAccBuffer.copyFrom(docAndScoreBuffer);
-      scoreNonEssentialClauses(collector, docAndScoreAccBuffer, firstEssentialScorer);
+      if (firstNonNullScorer >= firstEssentialScorer) {
+        // Note: firstNonNullScorer may be > firstEssentialScorer if minCompetitiveScore=0 since
+        // hits with a score of 0 are still competitive
+        // There are no non-essential clauses, filter non-competitive hits and collect directly
+
+        int[] docs = docAndScoreBuffer.docs;
+        float[] scores = docAndScoreBuffer.features;
+        int size = docAndScoreBuffer.size;
+        size = VectorUtil.filterByScore(docs, scores, scorable.minCompetitiveScore, size);
+
+        for (int i = 0; i < size; ++i) {
+          scorable.score = scores[i];
+          collector.collect(docs[i]);
+        }
+      } else {
+        // Filter based on float scores before promoting them to doubles so that vectorization can
+        // work on 2x more values at once.
+        ScorerUtil.filterCompetitiveHits(
+            docAndScoreBuffer,
+            maxScoreSums[firstEssentialScorer - 1],
+            scorable.minCompetitiveScore,
+            allScorers.length);
+
+        docAndScoreAccBuffer.copyFrom(docAndScoreBuffer);
+
+        // Apply the last non-essential clause here instead of delegating it to
+        // `scoreNonEssentialClauses` so that it doesn't re-do filtering by score.
+        DisiWrapper scorer = allScorers[firstEssentialScorer - 1];
+        ScorerUtil.applyOptionalClause(docAndScoreAccBuffer, scorer.iterator, scorer.scorable);
+        scorer.doc = scorer.iterator.docID();
+
+        scoreNonEssentialClauses(collector, docAndScoreAccBuffer, firstEssentialScorer - 1);
+      }
     }
 
     top.doc = top.iterator.docID();
@@ -250,11 +284,19 @@ private void scoreInnerWindowAsConjunction(LeafCollector collector, Bits acceptD
         docAndScoreBuffer.size > 0;
         lead1.scorer.nextDocsAndScores(max, acceptDocs, docAndScoreBuffer)) {
 
+      // Filter based on float scores before promoting them to doubles so that vectorization can
+      // work on 2x more values at once.
+      ScorerUtil.filterCompetitiveHits(
+          docAndScoreBuffer,
+          maxScoreSums[allScorers.length - 2],
+          scorable.minCompetitiveScore,
+          allScorers.length);
+
       docAndScoreAccBuffer.copyFrom(docAndScoreBuffer);
 
       for (int i = allScorers.length - 2; i >= firstRequiredScorer; --i) {
 
-        if (scorable.minCompetitiveScore > 0) {
+        if (i < allScorers.length - 2 && scorable.minCompetitiveScore > 0) {
           ScorerUtil.filterCompetitiveHits(
               docAndScoreAccBuffer,
               maxScoreSums[i],
@@ -371,7 +413,7 @@ private void scoreNonEssentialClauses(
       throws IOException {
     numCandidates += buffer.size;
 
-    for (int i = numNonEssentialClauses - 1; i >= 0; --i) {
+    for (int i = numNonEssentialClauses - 1; i >= firstNonNullScorer; --i) {
       DisiWrapper scorer = allScorers[i];
       assert scorable.minCompetitiveScore > 0
           : "All clauses are essential if minCompetitiveScore is equal to zero";
@@ -381,9 +423,20 @@ private void scoreNonEssentialClauses(
       scorer.doc = scorer.iterator.docID();
     }
 
-    for (int i = 0; i < buffer.size; ++i) {
-      scorable.score = (float) buffer.scores[i];
-      collector.collect(buffer.docs[i]);
+    // The collector already filters hits whose score is less than the min competitive score, but
+    // doing it here is a bit more efficient.
+    int size = buffer.size;
+    int[] docs = buffer.docs;
+    docAndScoreBuffer.growNoCopy(size);
+    float[] scores = docAndScoreBuffer.features;
+    for (int i = 0; i < size; ++i) {
+      scores[i] = (float) buffer.scores[i];
+    }
+    size = VectorUtil.filterByScore(docs, scores, scorable.minCompetitiveScore, size);
+
+    for (int i = 0; i < size; ++i) {
+      scorable.score = scores[i];
+      collector.collect(docs[i]);
     }
   }
 
@@ -408,10 +461,14 @@ boolean partitionScorers() {
               (double) scorer2.maxWindowScore / Math.max(1L, scorer2.cost));
         });
     double maxScoreSum = 0;
+    firstNonNullScorer = 0;
     firstEssentialScorer = 0;
     nextMinCompetitiveScore = Float.POSITIVE_INFINITY;
     for (int i = 0; i < allScorers.length; ++i) {
       final DisiWrapper w = scratch[i];
+      if (w.maxWindowScore == 0f) {
+        firstNonNullScorer = i + 1;
+      }
       double newMaxScoreSum = maxScoreSum + w.maxWindowScore;
       float maxScoreSumFloat =
           (float) MathUtil.sumUpperBound(newMaxScoreSum, firstEssentialScorer + 1);
diff --git a/lucene/core/src/java/org/apache/lucene/search/ScorerUtil.java b/lucene/core/src/java/org/apache/lucene/search/ScorerUtil.java
@@ -138,6 +138,34 @@ static double minRequiredScore(
     return minRequiredScore;
   }
 
+  /**
+   * Filters competitive hits from the provided {@link DocAndFloatFeatureBuffer}.
+   *
+   * <p>This method removes documents from the buffer that cannot possibly have a score competitive
+   * enough to exceed the minimum competitive score, given the maximum remaining score and the
+   * number of scorers.
+   */
+  static void filterCompetitiveHits(
+      DocAndFloatFeatureBuffer buffer,
+      double maxRemainingScore,
+      float minCompetitiveScore,
+      int numScorers) {
+    double minRequiredScoreDouble =
+        minRequiredScore(maxRemainingScore, minCompetitiveScore, numScorers);
+    float minRequiredScoreFloat = (float) minRequiredScoreDouble;
+    if ((double) minRequiredScoreFloat > minRequiredScoreDouble) { // the cast rounded up
+      minRequiredScoreFloat = Math.nextDown(minRequiredScoreFloat);
+    }
+    assert (double) minRequiredScoreFloat <= minRequiredScoreDouble;
+
+    if (minRequiredScoreFloat <= 0) {
+      return;
+    }
+
+    buffer.size =
+        VectorUtil.filterByScore(buffer.docs, buffer.features, minRequiredScoreFloat, buffer.size);
+  }
+
   /**
    * Filters competitive hits from the provided {@link DocAndScoreAccBuffer}.
    *
diff --git a/lucene/core/src/java/org/apache/lucene/util/VectorUtil.java b/lucene/core/src/java/org/apache/lucene/util/VectorUtil.java
@@ -377,6 +377,27 @@ public static float recalculateOffset(
         vector, oldAlpha, oldMinQuantile, scale, alpha, minQuantile, maxQuantile);
   }
 
+  /**
+   * filter both {@code docBuffer} and {@code scoreBuffer} with {@code minScoreInclusive}, each
+   * {@code docBuffer} and {@code scoreBuffer} of the same index forms a pair, pairs with score not
+   * greater than or equal to {@code minScoreInclusive} will be filtered out from the array.
+   *
+   * @param docBuffer doc buffer contains docs (or some other value forms a pair with {@code
+   *     scoreBuffer})
+   * @param scoreBuffer score buffer contains scores to be compared with {@code minScoreInclusive}
+   * @param minScoreInclusive minimal required score to not be filtered out
+   * @param upTo where the filter should end
+   * @return how many pairs left after filter
+   */
+  public static int filterByScore(
+      int[] docBuffer, float[] scoreBuffer, float minScoreInclusive, int upTo) {
+    if (docBuffer.length < upTo || scoreBuffer.length < upTo) {
+      throw new IllegalArgumentException(
+          "docBuffer and scoreBuffer should be at least as long as upTo");
+    }
+    return IMPL.filterByScore(docBuffer, scoreBuffer, minScoreInclusive, upTo);
+  }
+
   /**
    * filter both {@code docBuffer} and {@code scoreBuffer} with {@code minScoreInclusive}, each
    * {@code docBuffer} and {@code scoreBuffer} of the same index forms a pair, pairs with score not
@@ -391,9 +412,9 @@ public static float recalculateOffset(
    */
   public static int filterByScore(
       int[] docBuffer, double[] scoreBuffer, double minScoreInclusive, int upTo) {
-    if (docBuffer.length != scoreBuffer.length || docBuffer.length < upTo) {
+    if (docBuffer.length < upTo || scoreBuffer.length < upTo) {
       throw new IllegalArgumentException(
-          "docBuffer and scoreBuffer should keep same length and at least as long as upTo");
+          "docBuffer and scoreBuffer should be at least as long as upTo");
     }
     return IMPL.filterByScore(docBuffer, scoreBuffer, minScoreInclusive, upTo);
   }
diff --git a/lucene/core/src/java24/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java b/lucene/core/src/java24/org/apache/lucene/internal/vectorization/PanamaVectorUtilSupport.java
@@ -1005,6 +1005,35 @@ public float recalculateScalarQuantizationOffset(
     return correction;
   }
 
+  @SuppressForbidden(reason = "Uses compress and cast only where fast and carefully contained")
+  @Override
+  public int filterByScore(
+      int[] docBuffer, float[] scoreBuffer, float minScoreInclusive, int upTo) {
+    int newUpto = 0;
+    int i = 0;
+    if (Constants.HAS_FAST_COMPRESS_MASK_CAST) {
+      for (int bound = FLOAT_SPECIES.loopBound(upTo); i < bound; i += FLOAT_SPECIES.length()) {
+        FloatVector scoreVector = FloatVector.fromArray(FLOAT_SPECIES, scoreBuffer, i);
+        IntVector docVector = IntVector.fromArray(INT_SPECIES, docBuffer, i);
+        VectorMask<Float> mask = scoreVector.compare(VectorOperators.GE, minScoreInclusive);
+        scoreVector.compress(mask).intoArray(scoreBuffer, newUpto);
+        docVector.compress(mask.cast(INT_SPECIES)).intoArray(docBuffer, newUpto);
+        newUpto += mask.trueCount();
+      }
+    }
+
+    for (; i < upTo; ++i) {
+      int doc = docBuffer[i];
+      float score = scoreBuffer[i];
+      docBuffer[newUpto] = doc;
+      scoreBuffer[newUpto] = score;
+      if (score >= minScoreInclusive) {
+        newUpto++;
+      }
+    }
+    return newUpto;
+  }
+
   @SuppressForbidden(reason = "Uses compress and cast only where fast and carefully contained")
   @Override
   public int filterByScore(
diff --git a/lucene/core/src/test/org/apache/lucene/util/TestVectorUtil.java b/lucene/core/src/test/org/apache/lucene/util/TestVectorUtil.java