elastic
diff --git a/‎x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/inference/InferenceOperator.java‎
Lines changed: 95 additions & 16 deletions b/‎x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/inference/InferenceOperator.java‎
Lines changed: 95 additions & 16 deletions
diff --git a/‎x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/inference/bulk/BulkInferenceExecutionState.java‎
Lines changed: 60 additions & 5 deletions b/‎x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/inference/bulk/BulkInferenceExecutionState.java‎
Lines changed: 60 additions & 5 deletions
@@ -13,6 +13,7 @@
 import org.elasticsearch.compute.operator.AsyncOperator;
 import org.elasticsearch.compute.operator.DriverContext;
 import org.elasticsearch.core.Releasable;
+import org.elasticsearch.core.Releasables;
 import org.elasticsearch.inference.InferenceServiceResults;
 import org.elasticsearch.threadpool.ThreadPool;
 import org.elasticsearch.xpack.core.inference.action.InferenceAction;
@@ -24,70 +25,137 @@
 
 import static org.elasticsearch.common.logging.LoggerMessageFormat.format;
 
-public abstract class InferenceOperator extends AsyncOperator<InferenceOperator.OngoingInference> {
+/**
+ * An abstract asynchronous operator that performs throttled bulk inference execution using an {@link InferenceRunner}.
+ * <p>
+ * The {@code InferenceOperator} integrates with the compute framework  supports throttled bulk execution of inference requests. It
+ * transforms input {@link Page} into inference requests, asynchronously executes them, and converts the responses into a new {@link Page}.
+ * </p>
+ */
+public abstract class InferenceOperator extends AsyncOperator<InferenceOperator.OngoingInferenceResult> {
     private final String inferenceId;
     private final BlockFactory blockFactory;
     private final BulkInferenceExecutor bulkInferenceExecutor;
 
+    /**
+     * Constructs a new {@code InferenceOperator}.
+     *
+     * @param driverContext        The driver context.
+     * @param inferenceRunner      The runner used to execute inference requests.
+     * @param bulkExecutionConfig  Configuration for inference execution.
+     * @param threadPool           The thread pool used for executing async inference.
+     * @param inferenceId          The ID of the inference model to use.
+     */
     public InferenceOperator(
         DriverContext driverContext,
         InferenceRunner inferenceRunner,
         BulkInferenceExecutionConfig bulkExecutionConfig,
         ThreadPool threadPool,
         String inferenceId
     ) {
-        super(driverContext, threadPool.getThreadContext(), bulkExecutionConfig.workers());
+        super(driverContext, inferenceRunner.threadPool().getThreadContext(), bulkExecutionConfig.workers());
         this.blockFactory = driverContext.blockFactory();
         this.bulkInferenceExecutor = new BulkInferenceExecutor(inferenceRunner, threadPool, bulkExecutionConfig);
         this.inferenceId = inferenceId;
     }
 
+    /**
+     * Returns the {@link BlockFactory} used to create output data blocks.
+     */
     protected BlockFactory blockFactory() {
         return blockFactory;
     }
 
+    /**
+     * Returns the inference model ID used for this operator.
+     */
     protected String inferenceId() {
         return inferenceId;
     }
 
+    /**
+     * Initiates asynchronous inferences for the given input page.
+     */
     @Override
-    protected void releaseFetchedOnAnyThread(OngoingInference result) {
-        releasePageOnAnyThread(result.inputPage);
-    }
-
-    @Override
-    protected void performAsync(Page input, ActionListener<OngoingInference> listener) {
+    protected void performAsync(Page input, ActionListener<OngoingInferenceResult> listener) {
         try {
             BulkInferenceRequestIterator requests = requests(input);
             listener = ActionListener.releaseBefore(requests, listener);
-            bulkInferenceExecutor.execute(requests, listener.map(responses -> new OngoingInference(input, responses)));
+            bulkInferenceExecutor.execute(requests, listener.map(responses -> new OngoingInferenceResult(input, responses)));
         } catch (Exception e) {
             listener.onFailure(e);
         }
     }
 
+    /**
+     * Releases resources associated with an ongoing inference.
+     */
+    @Override
+    protected void releaseFetchedOnAnyThread(OngoingInferenceResult ongoingInferenceResult) {
+        Releasables.close(ongoingInferenceResult);
+    }
+
+    /**
+     * Returns the next available output page constructed from completed inference results.
+     */
     @Override
     public Page getOutput() {
-        OngoingInference ongoingInference = fetchFromBuffer();
-        if (ongoingInference == null) {
+        OngoingInferenceResult ongoingInferenceResult = fetchFromBuffer();
+        if (ongoingInferenceResult == null) {
             return null;
         }
 
-        try (OutputBuilder outputBuilder = outputBuilder(ongoingInference.inputPage)) {
-            ongoingInference.responses.forEach(outputBuilder::addInferenceResponse);
+        try (OutputBuilder outputBuilder = outputBuilder(ongoingInferenceResult.inputPage)) {
+            assert ongoingInferenceResult.inputPage.getPositionCount() == ongoingInferenceResult.responses.size();
+            for (InferenceAction.Response response : ongoingInferenceResult.responses) {
+                try {
+                    outputBuilder.addInferenceResponse(response);
+                } catch (IllegalArgumentException e) {
+                    throw new IllegalStateException("Invalid inference response", e);
+                }
+            }
             return outputBuilder.buildOutput();
+
         } finally {
-            releaseFetchedOnAnyThread(ongoingInference);
+            releaseFetchedOnAnyThread(ongoingInferenceResult);
         }
     }
 
+    /**
+     * Converts the given input page into a sequence of inference requests.
+     *
+     * @param input The input page to process.
+     */
     protected abstract BulkInferenceRequestIterator requests(Page input);
 
+    /**
+     * Creates a new {@link OutputBuilder} instance used to build the output page.
+     *
+     * @param input The corresponding input page used to generate the inference requests.
+     */
     protected abstract OutputBuilder outputBuilder(Page input);
 
+    /**
+     * An interface for accumulating inference responses and constructing a result {@link Page}.
+     */
     public interface OutputBuilder extends Releasable {
+
+        /**
+         * Adds an inference response to the output.
+         * <p>
+         * The responses must be added in the same order as the corresponding inference requests were generated.
+         * Failing to preserve order may lead to incorrect or misaligned output rows.
+         * </p>
+         *
+         * @param inferenceResponse The inference response to include.
+         */
         void addInferenceResponse(InferenceAction.Response inferenceResponse);
 
+        /**
+         * Builds the final output page from accumulated inference responses.
+         *
+         * @return The constructed output page.
+         */
         Page buildOutput();
 
         static <IR extends InferenceServiceResults> IR inferenceResults(InferenceAction.Response inferenceResponse, Class<IR> clazz) {
@@ -102,7 +170,18 @@ static <IR extends InferenceServiceResults> IR inferenceResults(InferenceAction.
         }
     }
 
-    public record OngoingInference(Page inputPage, List<InferenceAction.Response> responses) {
-
+    /**
+     * Represents the result of an ongoing inference operation, including the original input page
+     * and the list of inference responses.
+     *
+     * @param inputPage The input page used to generate inference requests.
+     * @param responses The inference responses returned by the inference service.
+     */
+    public record OngoingInferenceResult(Page inputPage, List<InferenceAction.Response> responses) implements Releasable {
+
+        @Override
+        public void close() {
+            releasePageOnAnyThread(inputPage);
+        }
     }
 }
@@ -17,53 +17,102 @@
 
 import static org.elasticsearch.index.seqno.SequenceNumbers.NO_OPS_PERFORMED;
 
+/**
+ * Tracks the state of a bulk inference execution, including sequencing, failure management, and buffering of inference responses for
+ * ordered output construction.
+ */
 public class BulkInferenceExecutionState {
     private final LocalCheckpointTracker checkpoint = new LocalCheckpointTracker(NO_OPS_PERFORMED, NO_OPS_PERFORMED);
     private final FailureCollector failureCollector = new FailureCollector();
-    private final Map<Long, InferenceAction.Response> bufferedResponses = new ConcurrentHashMap<>();
+    private final Map<Long, InferenceAction.Response> bufferedResponses;
     private final AtomicBoolean finished = new AtomicBoolean(false);
 
+    public BulkInferenceExecutionState(int bufferSize) {
+        this.bufferedResponses = new ConcurrentHashMap<>(bufferSize);
+    }
+
+    /**
+     * Generates a new unique sequence number for an inference request.
+     */
     public long generateSeqNo() {
         return checkpoint.generateSeqNo();
     }
 
+    /**
+     * Returns the highest sequence number marked as persisted, such that all lower sequence numbers have also been marked as persisted.
+     */
     public long getPersistedCheckpoint() {
         return checkpoint.getPersistedCheckpoint();
     }
 
+    /**
+     * Returns the highest sequence number marked as processed, such that all lower sequence numbers have also been marked as processed.
+     */
     public long getProcessedCheckpoint() {
         return checkpoint.getProcessedCheckpoint();
     }
 
+    /**
+     * Highest generated sequence number.
+     */
     public long getMaxSeqNo() {
         return checkpoint.getMaxSeqNo();
     }
 
+    /**
+     * Marks an inference response as persisted.
+     *
+     * @param seqNo The corresponding sequence number
+     */
+    public void markSeqNoAsPersisted(long seqNo) {
+        checkpoint.markSeqNoAsPersisted(seqNo);
+    }
+
+    /**
+     *  Add an inference response to the buffer and marks the corresponding sequence number as processed.
+     *
+     *  @param seqNo    The sequence number of the inference request.
+     *  @param response The inference response.
+     */
     public synchronized void onInferenceResponse(long seqNo, InferenceAction.Response response) {
         if (failureCollector.hasFailure() == false) {
             bufferedResponses.put(seqNo, response);
         }
         checkpoint.markSeqNoAsProcessed(seqNo);
     }
 
+    /**
+     * * Handles an exception thrown during inference execution.
+     *  Records the failure and marks the corresponding sequence number as processed.
+     *
+     *  @param seqNo The sequence number of the inference request.
+     *  @param e     The exception
+     */
     public synchronized void onInferenceException(long seqNo, Exception e) {
         failureCollector.unwrapAndCollect(e);
         checkpoint.markSeqNoAsProcessed(seqNo);
         bufferedResponses.clear();
     }
 
+    /**
+     * Retrieves and removes the buffered response by  sequence number.
+     *
+     * @param seqNo The sequence number of the response to fetch.
+     */
     public synchronized InferenceAction.Response fetchBufferedResponse(long seqNo) {
         return bufferedResponses.remove(seqNo);
     }
 
-    public void markSeqNoAsPersisted(long seqNo) {
-        checkpoint.markSeqNoAsPersisted(seqNo);
-    }
-
+    /**
+     * Returns whether any failure has been recorded during execution.
+     */
     public boolean hasFailure() {
         return failureCollector.hasFailure();
     }
 
+    /**
+     * Returns the recorded failure, if any.
+     */
     public Exception getFailure() {
         return failureCollector.getFailure();
     }
@@ -72,10 +121,16 @@ public void addFailure(Exception e) {
         failureCollector.unwrapAndCollect(e);
     }
 
+    /**
+     * Indicates whether the entire bulk execution is marked as finished and all responses have been successfully persisted.
+     */
     public boolean finished() {
         return finished.get() && getMaxSeqNo() == getPersistedCheckpoint();
     }
 
+    /**
+     * Marks the bulk as finished, indicating that all inference requests have been sent.
+     */
     public void finish() {
         this.finished.set(true);
     }