[ML] Deploy default on chunked infer (#114141)

davidkyle · web-flow · commit c59e3509dab7 · 2024-10-04T18:01:13.000+01:00
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java
@@ -608,26 +608,33 @@ public void chunkedInfer(
             return;
         }
 
-        var configUpdate = chunkingOptions != null
-            ? new TokenizationConfigUpdate(chunkingOptions.windowSize(), chunkingOptions.span())
-            : new TokenizationConfigUpdate(null, null);
+        if (model instanceof ElasticsearchInternalModel esModel) {
 
-        var request = buildInferenceRequest(
-            model.getConfigurations().getInferenceEntityId(),
-            configUpdate,
-            input,
-            inputType,
-            timeout,
-            true
-        );
+            var configUpdate = chunkingOptions != null
+                ? new TokenizationConfigUpdate(chunkingOptions.windowSize(), chunkingOptions.span())
+                : new TokenizationConfigUpdate(null, null);
+
+            var request = buildInferenceRequest(
+                model.getConfigurations().getInferenceEntityId(),
+                configUpdate,
+                input,
+                inputType,
+                timeout,
+                true
+            );
 
-        client.execute(
-            InferModelAction.INSTANCE,
-            request,
-            listener.delegateFailureAndWrap(
+            ActionListener<InferModelAction.Response> mlResultsListener = listener.delegateFailureAndWrap(
                 (l, inferenceResult) -> l.onResponse(translateToChunkedResults(inferenceResult.getInferenceResults()))
-            )
-        );
+            );
+
+            var maybeDeployListener = mlResultsListener.delegateResponse(
+                (l, exception) -> maybeStartDeployment(esModel, exception, request, mlResultsListener)
+            );
+
+            client.execute(InferModelAction.INSTANCE, request, maybeDeployListener);
+        } else {
+            listener.onFailure(notElasticsearchModelException(model));
+        }
     }
 
     private static List<ChunkedInferenceServiceResults> translateToChunkedResults(List<InferenceResults> inferenceResults) {