elastic · afoucret · Jul 31, 2025 · Jul 28, 2025 · Jul 28, 2025 · Jul 28, 2025
diff --git a/.../esql/compute/test/src/main/java/org/elasticsearch/compute/test/CannedSourceOperator.java b/.../esql/compute/test/src/main/java/org/elasticsearch/compute/test/CannedSourceOperator.java
@@ -84,14 +84,20 @@ public static List<Page> deepCopyOf(BlockFactory blockFactory, List<Page> pages)
         try {
             for (Page p : pages) {
                 Block[] blocks = new Block[p.getBlockCount()];
-                for (int b = 0; b < blocks.length; b++) {
-                    Block orig = p.getBlock(b);
-                    try (Block.Builder builder = orig.elementType().newBlockBuilder(p.getPositionCount(), blockFactory)) {
-                        builder.copyFrom(orig, 0, p.getPositionCount());
-                        blocks[b] = builder.build();
+                try {
+                    for (int b = 0; b < blocks.length; b++) {
+                        Block orig = p.getBlock(b);
+                        try (Block.Builder builder = orig.elementType().newBlockBuilder(p.getPositionCount(), blockFactory)) {
+                            builder.copyFrom(orig, 0, p.getPositionCount());
+                            blocks[b] = builder.build();
+                        }
                     }
+                    out.add(new Page(blocks));
+                } catch (Exception e) {
+                    // Something went wrong, release the blocks.
+                    Releasables.closeExpectNoException(blocks);
+                    throw e;
                 }
-                out.add(new Page(blocks));
             }
         } finally {
             if (pages.size() != out.size()) {

diff --git a/...plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java b/...plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java
@@ -12,7 +12,7 @@
 import org.apache.lucene.sandbox.document.HalfFloatPoint;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.ExceptionsHelper;
-import org.elasticsearch.action.ActionListener;
+import org.elasticsearch.client.internal.Client;
 import org.elasticsearch.cluster.RemoteException;
 import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
 import org.elasticsearch.cluster.project.ProjectResolver;
@@ -76,7 +76,7 @@
 import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.NotEquals;
 import org.elasticsearch.xpack.esql.index.EsIndex;
 import org.elasticsearch.xpack.esql.inference.InferenceResolution;
-import org.elasticsearch.xpack.esql.inference.InferenceRunner;
+import org.elasticsearch.xpack.esql.inference.InferenceService;
 import org.elasticsearch.xpack.esql.optimizer.LogicalOptimizerContext;
 import org.elasticsearch.xpack.esql.parser.QueryParam;
 import org.elasticsearch.xpack.esql.plan.logical.Enrich;
@@ -161,8 +161,6 @@
 import static org.hamcrest.Matchers.instanceOf;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
-import static org.mockito.ArgumentMatchers.any;
-import static org.mockito.Mockito.doAnswer;
 import static org.mockito.Mockito.mock;
 
 public final class EsqlTestUtils {
@@ -422,20 +420,9 @@ public static LogicalOptimizerContext unboundLogicalOptimizerContext() {
         mock(ProjectResolver.class),
         mock(IndexNameExpressionResolver.class),
         null,
-        mockInferenceRunner()
+        new InferenceService(mock(Client.class))
     );
 
-    @SuppressWarnings("unchecked")
-    private static InferenceRunner mockInferenceRunner() {
-        InferenceRunner inferenceRunner = mock(InferenceRunner.class);
-        doAnswer(i -> {
-            i.getArgument(1, ActionListener.class).onResponse(emptyInferenceResolution());
-            return null;
-        }).when(inferenceRunner).resolveInferenceIds(any(), any());
-
-        return inferenceRunner;
-    }
-
     private EsqlTestUtils() {}
 
     public static Configuration configuration(QueryPragmas pragmas, String query) {

diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java
@@ -188,9 +188,9 @@ public class Analyzer extends ParameterizedRuleExecutor<LogicalPlan, AnalyzerCon
             Limiter.ONCE,
             new ResolveTable(),
             new ResolveEnrich(),
-            new ResolveInference(),
             new ResolveLookupTables(),
             new ResolveFunctions(),
+            new ResolveInference(),
             new DateMillisToNanosInEsRelation(IMPLICIT_CASTING_DATE_AND_DATE_NANOS.isEnabled())
         ),
         new Batch<>(
@@ -414,34 +414,6 @@ private static NamedExpression createEnrichFieldExpression(
         }
     }
 
-    private static class ResolveInference extends ParameterizedAnalyzerRule<InferencePlan<?>, AnalyzerContext> {
-        @Override
-        protected LogicalPlan rule(InferencePlan<?> plan, AnalyzerContext context) {
-            assert plan.inferenceId().resolved() && plan.inferenceId().foldable();
-
-            String inferenceId = BytesRefs.toString(plan.inferenceId().fold(FoldContext.small()));
-            ResolvedInference resolvedInference = context.inferenceResolution().getResolvedInference(inferenceId);
-
-            if (resolvedInference != null && resolvedInference.taskType() == plan.taskType()) {
-                return plan;
-            } else if (resolvedInference != null) {
-                String error = "cannot use inference endpoint ["
-                    + inferenceId
-                    + "] with task type ["
-                    + resolvedInference.taskType()
-                    + "] within a "
-                    + plan.nodeName()
-                    + " command. Only inference endpoints with the task type ["
-                    + plan.taskType()
-                    + "] are supported.";
-                return plan.withInferenceResolutionError(inferenceId, error);
-            } else {
-                String error = context.inferenceResolution().getError(inferenceId);
-                return plan.withInferenceResolutionError(inferenceId, error);
-            }
-        }
-    }
-
     private static class ResolveLookupTables extends ParameterizedAnalyzerRule<Lookup, AnalyzerContext> {
 
         @Override
@@ -1335,6 +1307,41 @@ public static org.elasticsearch.xpack.esql.core.expression.function.Function res
         }
     }
 
+    private static class ResolveInference extends ParameterizedRule<LogicalPlan, LogicalPlan, AnalyzerContext> {
+
+        @Override
+        public LogicalPlan apply(LogicalPlan plan, AnalyzerContext context) {
+            return plan.transformDown(InferencePlan.class, p -> resolveInferencePlan(p, context));
+        }
+
+        private LogicalPlan resolveInferencePlan(InferencePlan<?> plan, AnalyzerContext context) {
+            assert plan.inferenceId().resolved() && plan.inferenceId().foldable();
+
+            String inferenceId = BytesRefs.toString(plan.inferenceId().fold(FoldContext.small()));
+            ResolvedInference resolvedInference = context.inferenceResolution().getResolvedInference(inferenceId);
+
+            if (resolvedInference == null) {
+                String error = context.inferenceResolution().getError(inferenceId);
+                return plan.withInferenceResolutionError(inferenceId, error);
+            }
+
+            if (resolvedInference.taskType() != plan.taskType()) {
+                String error = "cannot use inference endpoint ["
+                    + inferenceId
+                    + "] with task type ["
+                    + resolvedInference.taskType()
+                    + "] within a "
+                    + plan.nodeName()
+                    + " command. Only inference endpoints with the task type ["
+                    + plan.taskType()
+                    + "] are supported.";
+                return plan.withInferenceResolutionError(inferenceId, error);
+            }
+
+            return plan;
+        }
+    }
+
     private static class AddImplicitLimit extends ParameterizedRule<LogicalPlan, LogicalPlan, AnalyzerContext> {
         @Override
         public LogicalPlan apply(LogicalPlan logicalPlan, AnalyzerContext context) {

diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/PreAnalyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/PreAnalyzer.java
@@ -13,7 +13,6 @@
 import org.elasticsearch.xpack.esql.plan.logical.Enrich;
 import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
 import org.elasticsearch.xpack.esql.plan.logical.UnresolvedRelation;
-import org.elasticsearch.xpack.esql.plan.logical.inference.InferencePlan;
 
 import java.util.ArrayList;
 import java.util.HashSet;
@@ -28,25 +27,17 @@
 public class PreAnalyzer {
 
     public static class PreAnalysis {
-        public static final PreAnalysis EMPTY = new PreAnalysis(null, emptyList(), emptyList(), emptyList(), emptyList());
+        public static final PreAnalysis EMPTY = new PreAnalysis(null, emptyList(), emptyList(), emptyList());
 
         public final IndexMode indexMode;
         public final List<IndexPattern> indices;
         public final List<Enrich> enriches;
-        public final List<InferencePlan<?>> inferencePlans;
         public final List<IndexPattern> lookupIndices;
 
-        public PreAnalysis(
-            IndexMode indexMode,
-            List<IndexPattern> indices,
-            List<Enrich> enriches,
-            List<InferencePlan<?>> inferencePlans,
-            List<IndexPattern> lookupIndices
-        ) {
+        public PreAnalysis(IndexMode indexMode, List<IndexPattern> indices, List<Enrich> enriches, List<IndexPattern> lookupIndices) {
             this.indexMode = indexMode;
             this.indices = indices;
             this.enriches = enriches;
-            this.inferencePlans = inferencePlans;
             this.lookupIndices = lookupIndices;
         }
     }
@@ -64,7 +55,7 @@ protected PreAnalysis doPreAnalyze(LogicalPlan plan) {
 
         List<Enrich> unresolvedEnriches = new ArrayList<>();
         List<IndexPattern> lookupIndices = new ArrayList<>();
-        List<InferencePlan<?>> unresolvedInferencePlans = new ArrayList<>();
+
         Holder<IndexMode> indexMode = new Holder<>();
         plan.forEachUp(UnresolvedRelation.class, p -> {
             if (p.indexMode() == IndexMode.LOOKUP) {
@@ -78,11 +69,11 @@ protected PreAnalysis doPreAnalyze(LogicalPlan plan) {
         });
 
         plan.forEachUp(Enrich.class, unresolvedEnriches::add);
-        plan.forEachUp(InferencePlan.class, unresolvedInferencePlans::add);
 
         // mark plan as preAnalyzed (if it were marked, there would be no analysis)
         plan.forEachUp(LogicalPlan::setPreAnalyzed);
 
-        return new PreAnalysis(indexMode.get(), indices.stream().toList(), unresolvedEnriches, unresolvedInferencePlans, lookupIndices);
+        return new PreAnalysis(indexMode.get(), indices.stream().toList(), unresolvedEnriches, lookupIndices);
     }
+
 }
diff --git a/...k/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/inference/InferenceOperator.java b/...k/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/inference/InferenceOperator.java
@@ -15,18 +15,16 @@
 import org.elasticsearch.core.Releasable;
 import org.elasticsearch.core.Releasables;
 import org.elasticsearch.inference.InferenceServiceResults;
-import org.elasticsearch.threadpool.ThreadPool;
 import org.elasticsearch.xpack.core.inference.action.InferenceAction;
-import org.elasticsearch.xpack.esql.inference.bulk.BulkInferenceExecutionConfig;
-import org.elasticsearch.xpack.esql.inference.bulk.BulkInferenceExecutor;
 import org.elasticsearch.xpack.esql.inference.bulk.BulkInferenceRequestIterator;
+import org.elasticsearch.xpack.esql.inference.bulk.BulkInferenceRunner;
 
 import java.util.List;
 
 import static org.elasticsearch.common.logging.LoggerMessageFormat.format;
 
 /**
- * An abstract asynchronous operator that performs throttled bulk inference execution using an {@link InferenceRunner}.
+ * An abstract asynchronous operator that performs throttled bulk inference execution using an {@link InferenceResolver}.
  * <p>
  * The {@code InferenceOperator} integrates with the compute framework  supports throttled bulk execution of inference requests. It
  * transforms input {@link Page} into inference requests, asynchronously executes them, and converts the responses into a new {@link Page}.
@@ -35,27 +33,25 @@
 public abstract class InferenceOperator extends AsyncOperator<InferenceOperator.OngoingInferenceResult> {
     private final String inferenceId;
     private final BlockFactory blockFactory;
-    private final BulkInferenceExecutor bulkInferenceExecutor;
+    private final BulkInferenceRunner bulkInferenceRunner;
 
     /**
      * Constructs a new {@code InferenceOperator}.
      *
      * @param driverContext        The driver context.
-     * @param inferenceRunner      The runner used to execute inference requests.
-     * @param bulkExecutionConfig  Configuration for inference execution.
-     * @param threadPool           The thread pool used for executing async inference.
+     * @param bulkInferenceRunner  Inference runner used to execute inference requests.
      * @param inferenceId          The ID of the inference model to use.
+     * @param maxOutstandingPages  The number of concurrent pages to process in parallel.
      */
     public InferenceOperator(
         DriverContext driverContext,
-        InferenceRunner inferenceRunner,
-        BulkInferenceExecutionConfig bulkExecutionConfig,
-        ThreadPool threadPool,
-        String inferenceId
+        BulkInferenceRunner bulkInferenceRunner,
+        String inferenceId,
+        int maxOutstandingPages
     ) {
-        super(driverContext, inferenceRunner.threadPool().getThreadContext(), bulkExecutionConfig.workers());
+        super(driverContext, bulkInferenceRunner.threadPool().getThreadContext(), maxOutstandingPages);
         this.blockFactory = driverContext.blockFactory();
-        this.bulkInferenceExecutor = new BulkInferenceExecutor(inferenceRunner, threadPool, bulkExecutionConfig);
+        this.bulkInferenceRunner = bulkInferenceRunner;
         this.inferenceId = inferenceId;
     }
 
@@ -81,7 +77,8 @@ protected void performAsync(Page input, ActionListener<OngoingInferenceResult> l
         try {
             BulkInferenceRequestIterator requests = requests(input);
             listener = ActionListener.releaseBefore(requests, listener);
-            bulkInferenceExecutor.execute(requests, listener.map(responses -> new OngoingInferenceResult(input, responses)));
+
+            bulkInferenceRunner.executeBulk(requests, listener.map(responses -> new OngoingInferenceResult(input, responses)));
         } catch (Exception e) {
             listener.onFailure(e);
         }
@@ -110,9 +107,9 @@ public Page getOutput() {
                 outputBuilder.addInferenceResponse(response);
             }
             return outputBuilder.buildOutput();
-
-        } finally {
+        } catch (Exception e) {
             releaseFetchedOnAnyThread(ongoingInferenceResult);
+            throw e;
         }
     }