Skip to content

Commit c59e350

Browse files
authored
[ML] Deploy default on chunked infer (#114141)
1 parent bb22946 commit c59e350

File tree

1 file changed

+24
-17
lines changed

1 file changed

+24
-17
lines changed

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -608,26 +608,33 @@ public void chunkedInfer(
608608
return;
609609
}
610610

611-
var configUpdate = chunkingOptions != null
612-
? new TokenizationConfigUpdate(chunkingOptions.windowSize(), chunkingOptions.span())
613-
: new TokenizationConfigUpdate(null, null);
611+
if (model instanceof ElasticsearchInternalModel esModel) {
614612

615-
var request = buildInferenceRequest(
616-
model.getConfigurations().getInferenceEntityId(),
617-
configUpdate,
618-
input,
619-
inputType,
620-
timeout,
621-
true
622-
);
613+
var configUpdate = chunkingOptions != null
614+
? new TokenizationConfigUpdate(chunkingOptions.windowSize(), chunkingOptions.span())
615+
: new TokenizationConfigUpdate(null, null);
616+
617+
var request = buildInferenceRequest(
618+
model.getConfigurations().getInferenceEntityId(),
619+
configUpdate,
620+
input,
621+
inputType,
622+
timeout,
623+
true
624+
);
623625

624-
client.execute(
625-
InferModelAction.INSTANCE,
626-
request,
627-
listener.delegateFailureAndWrap(
626+
ActionListener<InferModelAction.Response> mlResultsListener = listener.delegateFailureAndWrap(
628627
(l, inferenceResult) -> l.onResponse(translateToChunkedResults(inferenceResult.getInferenceResults()))
629-
)
630-
);
628+
);
629+
630+
var maybeDeployListener = mlResultsListener.delegateResponse(
631+
(l, exception) -> maybeStartDeployment(esModel, exception, request, mlResultsListener)
632+
);
633+
634+
client.execute(InferModelAction.INSTANCE, request, maybeDeployListener);
635+
} else {
636+
listener.onFailure(notElasticsearchModelException(model));
637+
}
631638
}
632639

633640
private static List<ChunkedInferenceServiceResults> translateToChunkedResults(List<InferenceResults> inferenceResults) {

0 commit comments

Comments
 (0)