apache
diff --git a/‎lucene/CHANGES.txt‎
Lines changed: 3 additions & 0 deletions b/‎lucene/CHANGES.txt‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionBulkScorer.java‎
Lines changed: 33 additions & 100 deletions b/‎lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionBulkScorer.java‎
Lines changed: 33 additions & 100 deletions
diff --git a/‎lucene/core/src/java/org/apache/lucene/search/DocAndScoreAccBuffer.java‎
Lines changed: 74 additions & 0 deletions b/‎lucene/core/src/java/org/apache/lucene/search/DocAndScoreAccBuffer.java‎
Lines changed: 74 additions & 0 deletions
@@ -119,6 +119,9 @@ Optimizations
 * GITHUB#14700: Return MatchNoDocsQuery when IndexOrDocValuesQuery::rewrite does not match
   (Chris Hegarty)
 
+* GITHUB#14701: Optimize top-n bulk scorers by evaluating scoring windows in a
+  term-at-a-time fashion instead of doc-at-a-time. (Adrien Grand)
+
 Bug Fixes
 ---------------------
 * GITHUB#14654: ValueSource.fromDoubleValuesSource(dvs).getSortField() would throw errors when
 
@@ -22,7 +22,6 @@
 import java.util.List;
 import org.apache.lucene.search.Weight.DefaultBulkScorer;
 import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.MathUtil;
 
 /**
  * BulkScorer implementation of {@link BlockMaxConjunctionScorer} that focuses on top-level
@@ -38,11 +37,12 @@ final class BlockMaxConjunctionBulkScorer extends BulkScorer {
   private final Scorer[] scorers;
   private final Scorable[] scorables;
   private final DocIdSetIterator[] iterators;
-  private final DocIdSetIterator lead1, lead2;
-  private final Scorable scorer1, scorer2;
+  private final DocIdSetIterator lead;
   private final DocAndScore scorable = new DocAndScore();
   private final double[] sumOfOtherClauses;
   private final int maxDoc;
+  private final DocAndScoreBuffer docAndScoreBuffer = new DocAndScoreBuffer();
+  private final DocAndScoreAccBuffer docAndScoreAccBuffer = new DocAndScoreAccBuffer();
 
   BlockMaxConjunctionBulkScorer(int maxDoc, List<Scorer> scorers) throws IOException {
     if (scorers.size() <= 1) {
@@ -54,14 +54,9 @@ final class BlockMaxConjunctionBulkScorer extends BulkScorer {
         Arrays.stream(this.scorers).map(ScorerUtil::likelyTermScorer).toArray(Scorable[]::new);
     this.iterators =
         Arrays.stream(this.scorers).map(Scorer::iterator).toArray(DocIdSetIterator[]::new);
-    lead1 = ScorerUtil.likelyImpactsEnum(iterators[0]);
-    lead2 = ScorerUtil.likelyImpactsEnum(iterators[1]);
-    scorer1 = this.scorables[0];
-    scorer2 = this.scorables[1];
+    lead = ScorerUtil.likelyImpactsEnum(iterators[0]);
     this.sumOfOtherClauses = new double[this.scorers.length];
-    for (int i = 0; i < sumOfOtherClauses.length; i++) {
-      sumOfOtherClauses[i] = Double.POSITIVE_INFINITY;
-    }
+    Arrays.fill(sumOfOtherClauses, Double.POSITIVE_INFINITY);
     this.maxDoc = maxDoc;
   }
 
@@ -86,7 +81,7 @@ private float computeMaxScore(int windowMin, int windowMax) throws IOException {
   public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException {
     collector.setScorer(scorable);
 
-    int windowMin = Math.max(lead1.docID(), min);
+    int windowMin = Math.max(lead.docID(), min);
     while (windowMin < max) {
       // Use impacts of the least costly scorer to compute windows
       // NOTE: windowMax is inclusive
@@ -97,7 +92,7 @@ public int score(LeafCollector collector, Bits acceptDocs, int min, int max) thr
         maxWindowScore = computeMaxScore(windowMin, windowMax);
       }
       scoreWindow(collector, acceptDocs, windowMin, windowMax + 1, maxWindowScore);
-      windowMin = Math.max(lead1.docID(), windowMax + 1);
+      windowMin = Math.max(lead.docID(), windowMax + 1);
     }
 
     return windowMin >= maxDoc ? DocIdSetIterator.NO_MORE_DOCS : windowMin;
@@ -111,111 +106,49 @@ private void scoreWindow(
       return;
     }
 
-    if (lead1.docID() < min) {
-      lead1.advance(min);
+    if (lead.docID() < min) {
+      lead.advance(min);
     }
-    if (lead1.docID() >= max) {
+    if (lead.docID() >= max) {
       return;
     }
 
-    Scorable scorer1 = this.scorer1;
-    if (scorers[0].getMaxScore(max - 1) == 0f) {
-      // Null out scorer1 if it may only produce 0 scores over this window. In practice, this is
-      // mostly useful because FILTER clauses are pushed as constant-scoring MUST clauses with a
-      // 0 score to this scorer. Setting it to null instead of using a different impl helps
-      // reduce polymorphism of calls to Scorable#score and skip the check of whether the leading
-      // clause produced a high-enough score for the doc to be competitive.
-      scorer1 = null;
-    }
+    for (scorers[0].nextDocsAndScores(max, acceptDocs, docAndScoreBuffer);
+        docAndScoreBuffer.size > 0;
+        scorers[0].nextDocsAndScores(max, acceptDocs, docAndScoreBuffer)) {
 
-    final double sumOfOtherMaxScoresAt1 = sumOfOtherClauses[1];
+      docAndScoreAccBuffer.copyFrom(docAndScoreBuffer);
 
-    advanceHead:
-    for (int doc = lead1.docID(); doc < max; ) {
-      if (acceptDocs != null && acceptDocs.get(doc) == false) {
-        doc = lead1.nextDoc();
-        continue;
-      }
-
-      // Compute the score as we find more matching clauses, in order to skip advancing other
-      // clauses if the total score has no chance of being competitive. This works well because
-      // computing a score is usually cheaper than decoding a full block of postings and
-      // frequencies.
-      final boolean hasMinCompetitiveScore = scorable.minCompetitiveScore > 0;
-      double currentScore;
-      if (scorer1 != null && hasMinCompetitiveScore) {
-        currentScore = scorer1.score();
-
-        // This is the same logic as in the below for loop, specialized for the 2nd least costly
-        // clause. This seems to help the JVM.
-
-        // First check if we have a chance of having a match based on max scores
-        if ((float) MathUtil.sumUpperBound(currentScore + sumOfOtherMaxScoresAt1, scorers.length)
-            < scorable.minCompetitiveScore) {
-          doc = lead1.nextDoc();
-          continue advanceHead;
+      for (int i = 1; i < scorers.length; ++i) {
+        if (scorable.minCompetitiveScore > 0) {
+          ScorerUtil.filterCompetitiveHits(
+              docAndScoreAccBuffer,
+              sumOfOtherClauses[i],
+              scorable.minCompetitiveScore,
+              scorers.length);
         }
-      } else {
-        currentScore = 0;
-      }
 
-      // NOTE: lead2 may be on `doc` already if we `continue`d on the previous loop iteration.
-      if (lead2.docID() < doc) {
-        int next = lead2.advance(doc);
-        if (next != doc) {
-          doc = lead1.advance(next);
-          continue advanceHead;
-        }
-      }
-      assert lead2.docID() == doc;
-      if (hasMinCompetitiveScore) {
-        currentScore += scorer2.score();
+        ScorerUtil.applyRequiredClause(docAndScoreAccBuffer, iterators[i], scorables[i]);
       }
 
-      for (int i = 2; i < iterators.length; ++i) {
-        // First check if we have a chance of having a match based on max scores
-        if (hasMinCompetitiveScore
-            && (float) MathUtil.sumUpperBound(currentScore + sumOfOtherClauses[i], scorers.length)
-                < scorable.minCompetitiveScore) {
-          doc = lead1.nextDoc();
-          continue advanceHead;
-        }
-
-        // NOTE: these iterators may be on `doc` already if we called `continue advanceHead` on the
-        // previous loop iteration.
-        if (iterators[i].docID() < doc) {
-          int next = iterators[i].advance(doc);
-          if (next != doc) {
-            doc = lead1.advance(next);
-            continue advanceHead;
-          }
-        }
-        assert iterators[i].docID() == doc;
-        if (hasMinCompetitiveScore) {
-          currentScore += scorables[i].score();
-        }
-      }
-
-      if (hasMinCompetitiveScore == false) {
-        for (Scorable scorer : scorables) {
-          currentScore += scorer.score();
-        }
-      }
-      scorable.score = (float) currentScore;
-      collector.collect(doc);
-      // The collect() call may have updated the minimum competitive score.
-      if (maxWindowScore < scorable.minCompetitiveScore) {
-        // no more hits are competitive
-        return;
+      for (int i = 0; i < docAndScoreAccBuffer.size; ++i) {
+        scorable.score = (float) docAndScoreAccBuffer.scores[i];
+        collector.collect(docAndScoreAccBuffer.docs[i]);
       }
+    }
 
-      doc = lead1.nextDoc();
+    int maxOtherDoc = -1;
+    for (int i = 1; i < iterators.length; ++i) {
+      maxOtherDoc = Math.max(iterators[i].docID(), maxOtherDoc);
+    }
+    if (lead.docID() < maxOtherDoc) {
+      lead.advance(maxOtherDoc);
     }
   }
 
   @Override
   public long cost() {
-    return lead1.cost();
+    return lead.cost();
   }
 
   private static class DocAndScore extends Scorable {
 
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search;
+
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.IntsRef;
+
+/**
+ * Wrapper around parallel arrays storing doc IDs and their corresponding score accumulators.
+ *
+ * @lucene.internal
+ */
+public final class DocAndScoreAccBuffer {
+
+  private static final double[] EMPTY_DOUBLES = new double[0];
+
+  /** Doc IDs */
+  public int[] docs = IntsRef.EMPTY_INTS;
+
+  /** Scores */
+  public double[] scores = EMPTY_DOUBLES;
+
+  /** Number of valid entries in the doc ID and score arrays. */
+  public int size;
+
+  /** Sole constructor. */
+  public DocAndScoreAccBuffer() {}
+
+  /**
+   * Grow both arrays to ensure that they can store at least the given number of entries. Existing
+   * content may be discarded.
+   */
+  public void growNoCopy(int minSize) {
+    if (docs.length < minSize) {
+      docs = ArrayUtil.growNoCopy(docs, minSize);
+      scores = new double[docs.length];
+    }
+  }
+
+  /**
+   * Grow both arrays to ensure that they can store at least the given number of entries. Existing
+   * content is preserved.
+   */
+  public void grow(int minSize) {
+    if (docs.length < minSize) {
+      docs = ArrayUtil.grow(docs, minSize);
+      scores = ArrayUtil.growExact(scores, docs.length);
+    }
+  }
+
+  /** Copy content from the given {@link DocAndScoreBuffer}, expanding float scores to doubles. */
+  public void copyFrom(DocAndScoreBuffer buffer) {
+    growNoCopy(buffer.size);
+    System.arraycopy(buffer.docs, 0, docs, 0, buffer.size);
+    for (int i = 0; i < buffer.size; ++i) {
+      scores[i] = buffer.scores[i];
+    }
+    this.size = buffer.size;
+  }
+}