@@ -608,26 +608,33 @@ public void chunkedInfer(
608608 return ;
609609 }
610610
611- var configUpdate = chunkingOptions != null
612- ? new TokenizationConfigUpdate (chunkingOptions .windowSize (), chunkingOptions .span ())
613- : new TokenizationConfigUpdate (null , null );
611+ if (model instanceof ElasticsearchInternalModel esModel ) {
614612
615- var request = buildInferenceRequest (
616- model .getConfigurations ().getInferenceEntityId (),
617- configUpdate ,
618- input ,
619- inputType ,
620- timeout ,
621- true
622- );
613+ var configUpdate = chunkingOptions != null
614+ ? new TokenizationConfigUpdate (chunkingOptions .windowSize (), chunkingOptions .span ())
615+ : new TokenizationConfigUpdate (null , null );
616+
617+ var request = buildInferenceRequest (
618+ model .getConfigurations ().getInferenceEntityId (),
619+ configUpdate ,
620+ input ,
621+ inputType ,
622+ timeout ,
623+ true
624+ );
623625
624- client .execute (
625- InferModelAction .INSTANCE ,
626- request ,
627- listener .delegateFailureAndWrap (
626+ ActionListener <InferModelAction .Response > mlResultsListener = listener .delegateFailureAndWrap (
628627 (l , inferenceResult ) -> l .onResponse (translateToChunkedResults (inferenceResult .getInferenceResults ()))
629- )
630- );
628+ );
629+
630+ var maybeDeployListener = mlResultsListener .delegateResponse (
631+ (l , exception ) -> maybeStartDeployment (esModel , exception , request , mlResultsListener )
632+ );
633+
634+ client .execute (InferModelAction .INSTANCE , request , maybeDeployListener );
635+ } else {
636+ listener .onFailure (notElasticsearchModelException (model ));
637+ }
631638 }
632639
633640 private static List <ChunkedInferenceServiceResults > translateToChunkedResults (List <InferenceResults > inferenceResults ) {
0 commit comments