elastic
diff --git a/‎docs/changelog/133245.yaml‎
Lines changed: 5 additions & 0 deletions b/‎docs/changelog/133245.yaml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java‎
Lines changed: 30 additions & 22 deletions b/‎x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java‎
Lines changed: 30 additions & 22 deletions
diff --git a/‎x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java‎
Lines changed: 8 additions & 10 deletions b/‎x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluator.java‎
Lines changed: 8 additions & 10 deletions
diff --git a/‎x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluator.java‎
Lines changed: 7 additions & 8 deletions b/‎x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryScoreEvaluator.java‎
Lines changed: 7 additions & 8 deletions
diff --git a/‎x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneSlice.java‎
Lines changed: 1 addition & 0 deletions b/‎x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneSlice.java‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneSliceQueue.java‎
Lines changed: 27 additions & 11 deletions b/‎x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneSliceQueue.java‎
Lines changed: 27 additions & 11 deletions
@@ -0,0 +1,5 @@
+pr: 133245
+summary: Add query heads priority to `SliceQueue`
+area: ES|QL
+type: enhancement
+issues: []
@@ -24,7 +24,6 @@
 import org.elasticsearch.compute.data.DocVector;
 import org.elasticsearch.compute.data.IntVector;
 import org.elasticsearch.compute.data.Page;
-import org.elasticsearch.compute.data.Vector;
 import org.elasticsearch.core.Releasable;
 import org.elasticsearch.core.Releasables;
 
@@ -44,12 +43,12 @@
  * It's much faster to push queries to the {@link LuceneSourceOperator} or the like, but sometimes this isn't possible. So
  * this class is here to save the day.
  */
-public abstract class LuceneQueryEvaluator<T extends Vector.Builder> implements Releasable {
+public abstract class LuceneQueryEvaluator<T extends Block.Builder> implements Releasable {
 
     public record ShardConfig(Query query, IndexSearcher searcher) {}
 
     private final BlockFactory blockFactory;
-    private final ShardConfig[] shards;
+    protected final ShardConfig[] shards;
 
     private final List<ShardState> perShardState;
 
@@ -67,9 +66,9 @@ public Block executeQuery(Page page) {
         DocVector docs = (DocVector) block.asVector();
         try {
             if (docs.singleSegmentNonDecreasing()) {
-                return evalSingleSegmentNonDecreasing(docs).asBlock();
+                return evalSingleSegmentNonDecreasing(docs);
             } else {
-                return evalSlow(docs).asBlock();
+                return evalSlow(docs);
             }
         } catch (IOException e) {
             throw new UncheckedIOException(e);
@@ -106,15 +105,15 @@ public Block executeQuery(Page page) {
      *     common.
      * </p>
      */
-    private Vector evalSingleSegmentNonDecreasing(DocVector docs) throws IOException {
+    private Block evalSingleSegmentNonDecreasing(DocVector docs) throws IOException {
         ShardState shardState = shardState(docs.shards().getInt(0));
         SegmentState segmentState = shardState.segmentState(docs.segments().getInt(0));
         int min = docs.docs().getInt(0);
         int max = docs.docs().getInt(docs.getPositionCount() - 1);
         int length = max - min + 1;
-        try (T scoreBuilder = createVectorBuilder(blockFactory, docs.getPositionCount())) {
+        try (T scoreBuilder = createBlockBuilder(blockFactory, docs.getPositionCount())) {
             if (length == docs.getPositionCount() && length > 1) {
-                return segmentState.scoreDense(scoreBuilder, min, max);
+                return segmentState.scoreDense(scoreBuilder, min, max, docs.getPositionCount());
             }
             return segmentState.scoreSparse(scoreBuilder, docs.docs());
         }
@@ -134,13 +133,13 @@ private Vector evalSingleSegmentNonDecreasing(DocVector docs) throws IOException
      *     the order that the {@link DocVector} came in.
      * </p>
      */
-    private Vector evalSlow(DocVector docs) throws IOException {
+    private Block evalSlow(DocVector docs) throws IOException {
         int[] map = docs.shardSegmentDocMapForwards();
         // Clear any state flags from the previous run
         int prevShard = -1;
         int prevSegment = -1;
         SegmentState segmentState = null;
-        try (T scoreBuilder = createVectorBuilder(blockFactory, docs.getPositionCount())) {
+        try (T scoreBuilder = createBlockBuilder(blockFactory, docs.getPositionCount())) {
             for (int i = 0; i < docs.getPositionCount(); i++) {
                 int shard = docs.shards().getInt(docs.shards().getInt(map[i]));
                 int segment = docs.segments().getInt(map[i]);
@@ -156,7 +155,7 @@ private Vector evalSlow(DocVector docs) throws IOException {
                     segmentState.scoreSingleDocWithScorer(scoreBuilder, docs.docs().getInt(map[i]));
                 }
             }
-            try (Vector outOfOrder = scoreBuilder.build()) {
+            try (Block outOfOrder = scoreBuilder.build()) {
                 return outOfOrder.filter(docs.shardSegmentDocMapBackwards());
             }
         }
@@ -247,9 +246,9 @@ private SegmentState(Weight weight, LeafReaderContext ctx) {
          * Score a range using the {@link BulkScorer}. This should be faster
          * than using {@link #scoreSparse} for dense doc ids.
          */
-        Vector scoreDense(T scoreBuilder, int min, int max) throws IOException {
+        Block scoreDense(T scoreBuilder, int min, int max, int positionCount) throws IOException {
             if (noMatch) {
-                return createNoMatchVector(blockFactory, max - min + 1);
+                return createNoMatchBlock(blockFactory, max - min + 1);
             }
             if (bulkScorer == null ||  // The bulkScorer wasn't initialized
                 Thread.currentThread() != bulkScorerThread // The bulkScorer was initialized on a different thread
@@ -258,19 +257,22 @@ Vector scoreDense(T scoreBuilder, int min, int max) throws IOException {
                 bulkScorer = weight.bulkScorer(ctx);
                 if (bulkScorer == null) {
                     noMatch = true;
-                    return createNoMatchVector(blockFactory, max - min + 1);
+                    return createNoMatchBlock(blockFactory, positionCount);
                 }
             }
             try (
                 DenseCollector<T> collector = new DenseCollector<>(
                     min,
                     max,
                     scoreBuilder,
+                    ctx,
                     LuceneQueryEvaluator.this::appendNoMatch,
-                    LuceneQueryEvaluator.this::appendMatch
+                    LuceneQueryEvaluator.this::appendMatch,
+                    weight.getQuery()
                 )
             ) {
                 bulkScorer.score(collector, ctx.reader().getLiveDocs(), min, max + 1);
+                collector.finish();
                 return collector.build();
             }
         }
@@ -279,10 +281,10 @@ Vector scoreDense(T scoreBuilder, int min, int max) throws IOException {
          * Score a vector of doc ids using {@link Scorer}. If you have a dense range of
          * doc ids it'd be faster to use {@link #scoreDense}.
          */
-        Vector scoreSparse(T scoreBuilder, IntVector docs) throws IOException {
+        Block scoreSparse(T scoreBuilder, IntVector docs) throws IOException {
             initScorer(docs.getInt(0));
             if (noMatch) {
-                return createNoMatchVector(blockFactory, docs.getPositionCount());
+                return createNoMatchBlock(blockFactory, docs.getPositionCount());
             }
             for (int i = 0; i < docs.getPositionCount(); i++) {
                 scoreSingleDocWithScorer(scoreBuilder, docs.getInt(i));
@@ -326,11 +328,13 @@ private void scoreSingleDocWithScorer(T builder, int doc) throws IOException {
      * doc ids are sent to {@link LeafCollector#collect(int)} in ascending order
      * which isn't documented, but @jpountz swears is true.
      */
-    static class DenseCollector<U extends Vector.Builder> implements LeafCollector, Releasable {
+    static class DenseCollector<U extends Block.Builder> implements LeafCollector, Releasable {
         private final U scoreBuilder;
         private final int max;
+        private final LeafReaderContext leafReaderContext;
         private final Consumer<U> appendNoMatch;
         private final CheckedBiConsumer<U, Scorable, IOException> appendMatch;
+        private final Query query;
 
         private Scorable scorer;
         int next;
@@ -339,14 +343,18 @@ static class DenseCollector<U extends Vector.Builder> implements LeafCollector,
             int min,
             int max,
             U scoreBuilder,
+            LeafReaderContext leafReaderContext,
             Consumer<U> appendNoMatch,
-            CheckedBiConsumer<U, Scorable, IOException> appendMatch
+            CheckedBiConsumer<U, Scorable, IOException> appendMatch,
+            Query query
         ) {
             this.scoreBuilder = scoreBuilder;
             this.max = max;
             next = min;
+            this.leafReaderContext = leafReaderContext;
             this.appendNoMatch = appendNoMatch;
             this.appendMatch = appendMatch;
+            this.query = query;
         }
 
         @Override
@@ -362,7 +370,7 @@ public void collect(int doc) throws IOException {
             appendMatch.accept(scoreBuilder, scorer);
         }
 
-        public Vector build() {
+        public Block build() {
             return scoreBuilder.build();
         }
 
@@ -387,12 +395,12 @@ public void close() {
     /**
      * Creates a vector where all positions correspond to elements that don't match the query
      */
-    protected abstract Vector createNoMatchVector(BlockFactory blockFactory, int size);
+    protected abstract Block createNoMatchBlock(BlockFactory blockFactory, int size);
 
     /**
      * Creates the corresponding vector builder to store the results of evaluating the query
      */
-    protected abstract T createVectorBuilder(BlockFactory blockFactory, int size);
+    protected abstract T createBlockBuilder(BlockFactory blockFactory, int size);
 
     /**
      * Appends a matching result to a builder created by @link createVectorBuilder}
 
@@ -12,9 +12,9 @@
 import org.apache.lucene.search.ScoreMode;
 import org.elasticsearch.compute.data.Block;
 import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.BooleanBlock;
 import org.elasticsearch.compute.data.BooleanVector;
 import org.elasticsearch.compute.data.Page;
-import org.elasticsearch.compute.data.Vector;
 import org.elasticsearch.compute.operator.DriverContext;
 import org.elasticsearch.compute.operator.EvalOperator;
 
@@ -26,9 +26,7 @@
  * a {@link BooleanVector}.
  * @see LuceneQueryScoreEvaluator
  */
-public class LuceneQueryExpressionEvaluator extends LuceneQueryEvaluator<BooleanVector.Builder>
-    implements
-        EvalOperator.ExpressionEvaluator {
+public class LuceneQueryExpressionEvaluator extends LuceneQueryEvaluator<BooleanBlock.Builder> implements EvalOperator.ExpressionEvaluator {
 
     LuceneQueryExpressionEvaluator(BlockFactory blockFactory, ShardConfig[] shards) {
         super(blockFactory, shards);
@@ -45,22 +43,22 @@ protected ScoreMode scoreMode() {
     }
 
     @Override
-    protected Vector createNoMatchVector(BlockFactory blockFactory, int size) {
-        return blockFactory.newConstantBooleanVector(false, size);
+    protected Block createNoMatchBlock(BlockFactory blockFactory, int size) {
+        return blockFactory.newConstantBooleanBlockWith(false, size);
     }
 
     @Override
-    protected BooleanVector.Builder createVectorBuilder(BlockFactory blockFactory, int size) {
-        return blockFactory.newBooleanVectorFixedBuilder(size);
+    protected BooleanBlock.Builder createBlockBuilder(BlockFactory blockFactory, int size) {
+        return blockFactory.newBooleanBlockBuilder(size);
     }
 
     @Override
-    protected void appendNoMatch(BooleanVector.Builder builder) {
+    protected void appendNoMatch(BooleanBlock.Builder builder) {
         builder.appendBoolean(false);
     }
 
     @Override
-    protected void appendMatch(BooleanVector.Builder builder, Scorable scorer) throws IOException {
+    protected void appendMatch(BooleanBlock.Builder builder, Scorable scorer) throws IOException {
         builder.appendBoolean(true);
     }
 
 
@@ -14,7 +14,6 @@
 import org.elasticsearch.compute.data.DoubleBlock;
 import org.elasticsearch.compute.data.DoubleVector;
 import org.elasticsearch.compute.data.Page;
-import org.elasticsearch.compute.data.Vector;
 import org.elasticsearch.compute.operator.DriverContext;
 import org.elasticsearch.compute.operator.ScoreOperator;
 
@@ -27,7 +26,7 @@
  * Elements that don't match will have a score of {@link #NO_MATCH_SCORE}.
  * @see LuceneQueryScoreEvaluator
  */
-public class LuceneQueryScoreEvaluator extends LuceneQueryEvaluator<DoubleVector.Builder> implements ScoreOperator.ExpressionScorer {
+public class LuceneQueryScoreEvaluator extends LuceneQueryEvaluator<DoubleBlock.Builder> implements ScoreOperator.ExpressionScorer {
 
     public static final double NO_MATCH_SCORE = 0.0;
 
@@ -46,22 +45,22 @@ protected ScoreMode scoreMode() {
     }
 
     @Override
-    protected Vector createNoMatchVector(BlockFactory blockFactory, int size) {
-        return blockFactory.newConstantDoubleVector(NO_MATCH_SCORE, size);
+    protected DoubleBlock createNoMatchBlock(BlockFactory blockFactory, int size) {
+        return blockFactory.newConstantDoubleBlockWith(NO_MATCH_SCORE, size);
     }
 
     @Override
-    protected DoubleVector.Builder createVectorBuilder(BlockFactory blockFactory, int size) {
-        return blockFactory.newDoubleVectorFixedBuilder(size);
+    protected DoubleBlock.Builder createBlockBuilder(BlockFactory blockFactory, int size) {
+        return blockFactory.newDoubleBlockBuilder(size);
     }
 
     @Override
-    protected void appendNoMatch(DoubleVector.Builder builder) {
+    protected void appendNoMatch(DoubleBlock.Builder builder) {
         builder.appendDouble(NO_MATCH_SCORE);
     }
 
     @Override
-    protected void appendMatch(DoubleVector.Builder builder, Scorable scorer) throws IOException {
+    protected void appendMatch(DoubleBlock.Builder builder, Scorable scorer) throws IOException {
         builder.appendDouble(scorer.score());
     }
 
 
@@ -16,6 +16,7 @@
  */
 public record LuceneSlice(
     int slicePosition,
+    boolean queryHead,
     ShardContext shardContext,
     List<PartialLeafReaderContext> leaves,
     Weight weight,
 
@@ -83,13 +83,21 @@ public record QueryAndTags(Query query, List<Object> tags) {}
     private final Map<String, PartitioningStrategy> partitioningStrategies;
 
     private final AtomicReferenceArray<LuceneSlice> slices;
+    /**
+     * Queue of slice IDs that are the primary entry point for a new query.
+     * A driver should prioritize polling from this queue after failing to get a sequential
+     * slice (the query/segment affinity). This ensures that threads start work on fresh,
+     * independent query before stealing segments from other queries.
+     */
+    private final Queue<Integer> queryHeads;
+
     /**
      * Queue of slice IDs that are the primary entry point for a new group of segments.
      * A driver should prioritize polling from this queue after failing to get a sequential
      * slice (the segment affinity). This ensures that threads start work on fresh,
      * independent segment groups before resorting to work stealing.
      */
-    private final Queue<Integer> sliceHeads;
+    private final Queue<Integer> segmentHeads;
 
     /**
      * Queue of slice IDs that are not the primary entry point for a segment group.
@@ -106,11 +114,14 @@ public record QueryAndTags(Query query, List<Object> tags) {}
             slices.set(i, sliceList.get(i));
         }
         this.partitioningStrategies = partitioningStrategies;
-        this.sliceHeads = ConcurrentCollections.newQueue();
+        this.queryHeads = ConcurrentCollections.newQueue();
+        this.segmentHeads = ConcurrentCollections.newQueue();
         this.stealableSlices = ConcurrentCollections.newQueue();
         for (LuceneSlice slice : sliceList) {
-            if (slice.getLeaf(0).minDoc() == 0) {
-                sliceHeads.add(slice.slicePosition());
+            if (slice.queryHead()) {
+                queryHeads.add(slice.slicePosition());
+            } else if (slice.getLeaf(0).minDoc() == 0) {
+                segmentHeads.add(slice.slicePosition());
             } else {
                 stealableSlices.add(slice.slicePosition());
             }
@@ -120,12 +131,14 @@ public record QueryAndTags(Query query, List<Object> tags) {}
     /**
      * Retrieves the next available {@link LuceneSlice} for processing.
      * <p>
-     * This method implements a three-tiered strategy to minimize the overhead of switching between segments:
+     * This method implements a four-tiered strategy to minimize the overhead of switching between queries/segments:
      * 1. If a previous slice is provided, it first attempts to return the next sequential slice.
-     * This keeps a thread working on the same segments, minimizing the overhead of segment switching.
-     * 2. If affinity fails, it returns a slice from the {@link #sliceHeads} queue, which is an entry point for
-     * a new, independent group of segments, allowing the calling Driver to work on a fresh set of segments.
-     * 3. If the {@link #sliceHeads} queue is exhausted, it "steals" a slice
+     * This keeps a thread working on the same query and same segment, minimizing the overhead of query/segment switching.
+     * 2. If affinity fails, it returns a slice from the {@link #queryHeads} queue, which is an entry point for
+     * a new query, allowing the calling Driver to work on a fresh query with a new set of segments.
+     * 3. If the {@link #queryHeads} queue is exhausted, it returns a slice from the {@link #segmentHeads} queue of other queries,
+     * which is an entry point for a new, independent group of segments, allowing the calling Driver to work on a fresh set of segments.
+     * 4. If the {@link #segmentHeads} queue is exhausted, it "steals" a slice
      * from the {@link #stealableSlices} queue. This fallback ensures all threads remain utilized.
      *
      * @param prev the previously returned {@link LuceneSlice}, or {@code null} if starting
@@ -142,7 +155,7 @@ public LuceneSlice nextSlice(LuceneSlice prev) {
                 }
             }
         }
-        for (var ids : List.of(sliceHeads, stealableSlices)) {
+        for (var ids : List.of(queryHeads, segmentHeads, stealableSlices)) {
             Integer nextId;
             while ((nextId = ids.poll()) != null) {
                 var slice = slices.getAndSet(nextId, null);
@@ -209,9 +222,12 @@ public static LuceneSliceQueue create(
                 partitioningStrategies.put(ctx.shardIdentifier(), partitioning);
                 List<List<PartialLeafReaderContext>> groups = partitioning.groups(ctx.searcher(), taskConcurrency);
                 Weight weight = weight(ctx, query, scoreMode);
+                boolean queryHead = true;
                 for (List<PartialLeafReaderContext> group : groups) {
                     if (group.isEmpty() == false) {
-                        slices.add(new LuceneSlice(nextSliceId++, ctx, group, weight, queryAndExtra.tags));
+                        final int slicePosition = nextSliceId++;
+                        slices.add(new LuceneSlice(slicePosition, queryHead, ctx, group, weight, queryAndExtra.tags));
+                        queryHead = false;
                     }
                 }
             }