elastic · jonathan-buttner · Oct 10, 2025 · Sep 30, 2025 · Sep 30, 2025 · Sep 30, 2025
diff --git a/server/src/main/java/org/elasticsearch/inference/UnifiedCompletionRequest.java b/server/src/main/java/org/elasticsearch/inference/UnifiedCompletionRequest.java
@@ -89,51 +89,34 @@ public record UnifiedCompletionRequest(
 
     /**
      * Creates a {@link org.elasticsearch.xcontent.ToXContent.Params} that causes ToXContent to include the key values:
-     * - Key: {@link #MODEL_FIELD}, Value: modelId
+     * - Key: {@link #MODEL_FIELD}, Value: modelId, if modelId is not null
      * - Key: {@link #MAX_TOKENS_FIELD}, Value: {@link #maxCompletionTokens()}
      */
-    public static Params withMaxTokens(String modelId, Params params) {
-        return new DelegatingMapParams(
-            Map.ofEntries(Map.entry(MODEL_ID_PARAM, modelId), Map.entry(MAX_TOKENS_PARAM, MAX_TOKENS_FIELD)),
-            params
-        );
+    public static Params withMaxTokens(@Nullable String modelId, Params params) {
+        Map<String, String> entries = modelId != null
+            ? Map.ofEntries(Map.entry(MODEL_ID_PARAM, modelId), Map.entry(MAX_TOKENS_PARAM, MAX_TOKENS_FIELD))
+            : Map.ofEntries(Map.entry(MAX_TOKENS_PARAM, MAX_TOKENS_FIELD));
+        return new DelegatingMapParams(entries, params);
     }
 
     /**
      * Creates a {@link org.elasticsearch.xcontent.ToXContent.Params} that causes ToXContent to include the key values:
+     * - Key: {@link #MODEL_FIELD}, Value: modelId, if modelId is not null
      * - Key: {@link #MAX_TOKENS_FIELD}, Value: {@link #maxCompletionTokens()}
-     */
-    public static Params withMaxTokens(Params params) {
-        return new DelegatingMapParams(Map.ofEntries(Map.entry(MAX_TOKENS_PARAM, MAX_TOKENS_FIELD)), params);
-    }
-
-    /**
-     * Creates a {@link org.elasticsearch.xcontent.ToXContent.Params} that causes ToXContent to include the key values:
-     * - Key: {@link #MODEL_FIELD}, Value: modelId
-     * - Key: {@link #MAX_TOKENS_FIELD}, Value: {@link #MAX_TOKENS_FIELD}
      * - Key: {@link #INCLUDE_STREAM_OPTIONS_PARAM}, Value: "false"
      */
-    public static Params withMaxTokensAndSkipStreamOptionsField(String modelId, Params params) {
-        return new DelegatingMapParams(
-            Map.ofEntries(
+    public static Params withMaxTokensAndSkipStreamOptionsField(@Nullable String modelId, Params params) {
+        Map<String, String> entries = modelId != null
+            ? Map.ofEntries(
                 Map.entry(MODEL_ID_PARAM, modelId),
                 Map.entry(MAX_TOKENS_PARAM, MAX_TOKENS_FIELD),
                 Map.entry(INCLUDE_STREAM_OPTIONS_PARAM, Boolean.FALSE.toString())
-            ),
-            params
-        );
-    }
-
-    /**
-     * Creates a {@link org.elasticsearch.xcontent.ToXContent.Params} that causes ToXContent to include the key values:
-     * - Key: {@link #MAX_TOKENS_FIELD}, Value: {@link #MAX_TOKENS_FIELD}
-     * - Key: {@link #INCLUDE_STREAM_OPTIONS_PARAM}, Value: "false"
-     */
-    public static Params withMaxTokensAndSkipStreamOptionsField(Params params) {
-        return new DelegatingMapParams(
-            Map.ofEntries(Map.entry(MAX_TOKENS_PARAM, MAX_TOKENS_FIELD), Map.entry(INCLUDE_STREAM_OPTIONS_PARAM, Boolean.FALSE.toString())),
-            params
-        );
+            )
+            : Map.ofEntries(
+                Map.entry(MAX_TOKENS_PARAM, MAX_TOKENS_FIELD),
+                Map.entry(INCLUDE_STREAM_OPTIONS_PARAM, Boolean.FALSE.toString())
+            );
+        return new DelegatingMapParams(entries, params);
     }
 
     /**

diff --git a/.../elasticsearch/xpack/inference/services/ai21/request/Ai21ChatCompletionRequestEntity.java b/.../elasticsearch/xpack/inference/services/ai21/request/Ai21ChatCompletionRequestEntity.java
@@ -33,13 +33,7 @@ public Ai21ChatCompletionRequestEntity(UnifiedChatInput unifiedChatInput, @Nulla
     @Override
     public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
         builder.startObject();
-        if (modelId != null) {
-            // Some models require the model ID to be specified in the request body
-            unifiedRequestEntity.toXContent(builder, UnifiedCompletionRequest.withMaxTokens(modelId, params));
-        } else {
-            // Some models do not require the model ID to be specified in the request body
-            unifiedRequestEntity.toXContent(builder, UnifiedCompletionRequest.withMaxTokens(params));
-        }
+        unifiedRequestEntity.toXContent(builder, UnifiedCompletionRequest.withMaxTokens(modelId, params));
         builder.endObject();
         return builder;
     }

diff --git a/.../org/elasticsearch/xpack/inference/services/googlevertexai/GoogleModelGardenProvider.java b/.../org/elasticsearch/xpack/inference/services/googlevertexai/GoogleModelGardenProvider.java
@@ -7,6 +7,29 @@
 
 package org.elasticsearch.xpack.inference.services.googlevertexai;
 
+import org.elasticsearch.xcontent.ToXContentObject;
+import org.elasticsearch.xpack.inference.external.http.retry.ErrorResponse;
+import org.elasticsearch.xpack.inference.external.http.retry.ResponseHandler;
+import org.elasticsearch.xpack.inference.external.http.sender.UnifiedChatInput;
+import org.elasticsearch.xpack.inference.services.ai21.completion.Ai21ChatCompletionResponseHandler;
+import org.elasticsearch.xpack.inference.services.ai21.request.Ai21ChatCompletionRequestEntity;
+import org.elasticsearch.xpack.inference.services.anthropic.AnthropicChatCompletionResponseHandler;
+import org.elasticsearch.xpack.inference.services.anthropic.AnthropicResponseHandler;
+import org.elasticsearch.xpack.inference.services.anthropic.response.AnthropicChatCompletionResponseEntity;
+import org.elasticsearch.xpack.inference.services.googlevertexai.completion.GoogleVertexAiChatCompletionTaskSettings;
+import org.elasticsearch.xpack.inference.services.googlevertexai.request.completion.GoogleModelGardenAnthropicChatCompletionRequestEntity;
+import org.elasticsearch.xpack.inference.services.googlevertexai.request.completion.GoogleVertexAiUnifiedChatCompletionRequestEntity;
+import org.elasticsearch.xpack.inference.services.googlevertexai.response.GoogleVertexAiCompletionResponseEntity;
+import org.elasticsearch.xpack.inference.services.huggingface.HuggingFaceChatCompletionResponseHandler;
+import org.elasticsearch.xpack.inference.services.huggingface.request.completion.HuggingFaceUnifiedChatCompletionRequestEntity;
+import org.elasticsearch.xpack.inference.services.llama.completion.LlamaChatCompletionResponseHandler;
+import org.elasticsearch.xpack.inference.services.llama.completion.LlamaCompletionResponseHandler;
+import org.elasticsearch.xpack.inference.services.llama.request.completion.LlamaChatCompletionRequestEntity;
+import org.elasticsearch.xpack.inference.services.mistral.MistralUnifiedChatCompletionResponseHandler;
+import org.elasticsearch.xpack.inference.services.mistral.request.completion.MistralChatCompletionRequestEntity;
+import org.elasticsearch.xpack.inference.services.openai.OpenAiChatCompletionResponseHandler;
+import org.elasticsearch.xpack.inference.services.openai.response.OpenAiChatCompletionResponseEntity;
+
 import java.util.Locale;
 
 /**
@@ -20,7 +43,120 @@ public enum GoogleModelGardenProvider {
     MISTRAL,
     AI21;
 
-    public static final String NAME = "google_model_garden_provider";
+    private static final ResponseHandler GOOGLE_VERTEX_AI_COMPLETION_HANDLER = new GoogleVertexAiResponseHandler(
+        "Google Vertex AI completion",
+        GoogleVertexAiCompletionResponseEntity::fromResponse,
+        GoogleVertexAiUnifiedChatCompletionResponseHandler.GoogleVertexAiErrorResponse::fromResponse,
+        true
+    );
+
+    private static final ResponseHandler GOOGLE_MODEL_GARDEN_ANTHROPIC_COMPLETION_HANDLER = new AnthropicResponseHandler(
+        "Google Model Garden Anthropic completion",
+        AnthropicChatCompletionResponseEntity::fromResponse,
+        true
+    );
+
+    private static final ResponseHandler GOOGLE_MODEL_GARDEN_META_COMPLETION_HANDLER = new LlamaCompletionResponseHandler(
+        "Google Model Garden Meta completion",
+        OpenAiChatCompletionResponseEntity::fromResponse
+    );
+
+    private static final ResponseHandler GOOGLE_MODEL_GARDEN_HUGGING_FACE_COMPLETION_HANDLER = new OpenAiChatCompletionResponseHandler(
+        "Google Model Garden Hugging Face completion",
+        OpenAiChatCompletionResponseEntity::fromResponse
+    );
+
+    private static final ResponseHandler GOOGLE_MODEL_GARDEN_MISTRAL_COMPLETION_HANDLER = new OpenAiChatCompletionResponseHandler(
+        "Google Model Garden Mistral completion",
+        OpenAiChatCompletionResponseEntity::fromResponse,
+        ErrorResponse::fromResponse
+    );
+
+    private static final ResponseHandler GOOGLE_MODEL_GARDEN_AI21_COMPLETION_HANDLER = new OpenAiChatCompletionResponseHandler(
+        "Google Model Garden AI21 completion",
+        OpenAiChatCompletionResponseEntity::fromResponse,
+        ErrorResponse::fromResponse
+    );
+
+    private static final ResponseHandler GOOGLE_VERTEX_AI_CHAT_COMPLETION_HANDLER = new GoogleVertexAiUnifiedChatCompletionResponseHandler(
+        "Google Vertex AI chat completion"
+    );
+
+    private static final ResponseHandler ANTHROPIC_CHAT_COMPLETION_HANDLER = new AnthropicChatCompletionResponseHandler(
+        "Google Model Garden Anthropic chat completion"
+    );
+
+    private static final ResponseHandler META_CHAT_COMPLETION_HANDLER = new LlamaChatCompletionResponseHandler(
+        "Google Model Garden Meta chat completion",
+        OpenAiChatCompletionResponseEntity::fromResponse
+    );
+
+    private static final ResponseHandler HUGGING_FACE_CHAT_COMPLETION_HANDLER = new HuggingFaceChatCompletionResponseHandler(
+        "Google Model Garden Hugging Face chat completion",
+        OpenAiChatCompletionResponseEntity::fromResponse
+    );
+
+    private static final ResponseHandler MISTRAL_CHAT_COMPLETION_HANDLER = new MistralUnifiedChatCompletionResponseHandler(
+        "Google Model Garden Mistral chat completions",
+        OpenAiChatCompletionResponseEntity::fromResponse
+    );
+
+    private static final ResponseHandler AI21_CHAT_COMPLETION_HANDLER = new Ai21ChatCompletionResponseHandler(
+        "Google Model Garden Ai21 chat completions",
+        OpenAiChatCompletionResponseEntity::fromResponse
+    );
+
+    /**
+     * Gets the completion response handler for the model garden provider.
+     * @return the ResponseHandler associated with the provider
+     */
+    public ResponseHandler getCompletionResponseHandler() {
+        return switch (this) {
+            case GOOGLE -> GOOGLE_VERTEX_AI_COMPLETION_HANDLER;
+            case ANTHROPIC -> GOOGLE_MODEL_GARDEN_ANTHROPIC_COMPLETION_HANDLER;
+            case META -> GOOGLE_MODEL_GARDEN_META_COMPLETION_HANDLER;
+            case HUGGING_FACE -> GOOGLE_MODEL_GARDEN_HUGGING_FACE_COMPLETION_HANDLER;
+            case MISTRAL -> GOOGLE_MODEL_GARDEN_MISTRAL_COMPLETION_HANDLER;
+            case AI21 -> GOOGLE_MODEL_GARDEN_AI21_COMPLETION_HANDLER;
+        };
+    }
+
+    /**
+     * Gets the chat completion response handler for the model garden provider.
+     * @return the ResponseHandler associated with the provider
+     */
+    public ResponseHandler getChatCompletionResponseHandler() {
+        return switch (this) {
+            case GOOGLE -> GOOGLE_VERTEX_AI_CHAT_COMPLETION_HANDLER;
+            case ANTHROPIC -> ANTHROPIC_CHAT_COMPLETION_HANDLER;
+            case META -> META_CHAT_COMPLETION_HANDLER;
+            case HUGGING_FACE -> HUGGING_FACE_CHAT_COMPLETION_HANDLER;
+            case MISTRAL -> MISTRAL_CHAT_COMPLETION_HANDLER;
+            case AI21 -> AI21_CHAT_COMPLETION_HANDLER;
+        };
+    }
+
+    /**
+     * Creates the request entity for the model garden provider based on the unified chat input and model ID.
+     * @param unifiedChatInput the unified chat input containing messages and parameters for the chat completion request
+     * @param modelId the model ID to be used for the request
+     * @param taskSettings the task settings specific to Google Vertex AI chat completion
+     * @return a ToXContentObject representing the request entity for the provider
+     */
+    public ToXContentObject createRequestEntity(
+        UnifiedChatInput unifiedChatInput,
+        String modelId,
+        GoogleVertexAiChatCompletionTaskSettings taskSettings
+    ) {
+        return switch (this) {
+            case GOOGLE -> new GoogleVertexAiUnifiedChatCompletionRequestEntity(unifiedChatInput, taskSettings.thinkingConfig());
+            case ANTHROPIC -> new GoogleModelGardenAnthropicChatCompletionRequestEntity(unifiedChatInput, taskSettings);
+            case META -> new LlamaChatCompletionRequestEntity(unifiedChatInput, modelId);
+            case HUGGING_FACE -> new HuggingFaceUnifiedChatCompletionRequestEntity(unifiedChatInput, modelId);
+            case MISTRAL -> new MistralChatCompletionRequestEntity(unifiedChatInput, modelId);
+            case AI21 -> new Ai21ChatCompletionRequestEntity(unifiedChatInput, modelId);
+        };
+    }
 
     public static GoogleModelGardenProvider fromString(String name) {
         return valueOf(name.trim().toUpperCase(Locale.ROOT));

diff --git a/...java/org/elasticsearch/xpack/inference/services/googlevertexai/GoogleVertexAiService.java b/...java/org/elasticsearch/xpack/inference/services/googlevertexai/GoogleVertexAiService.java
@@ -33,7 +33,6 @@
 import org.elasticsearch.xpack.inference.chunking.ChunkingSettingsBuilder;
 import org.elasticsearch.xpack.inference.chunking.EmbeddingRequestChunker;
 import org.elasticsearch.xpack.inference.external.action.SenderExecutableAction;
-import org.elasticsearch.xpack.inference.external.http.retry.ResponseHandler;
 import org.elasticsearch.xpack.inference.external.http.sender.EmbeddingsInput;
 import org.elasticsearch.xpack.inference.external.http.sender.GenericRequestManager;
 import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender;
@@ -43,18 +42,12 @@
 import org.elasticsearch.xpack.inference.services.SenderService;
 import org.elasticsearch.xpack.inference.services.ServiceComponents;
 import org.elasticsearch.xpack.inference.services.ServiceUtils;
-import org.elasticsearch.xpack.inference.services.ai21.completion.Ai21ChatCompletionResponseHandler;
-import org.elasticsearch.xpack.inference.services.anthropic.AnthropicChatCompletionResponseHandler;
 import org.elasticsearch.xpack.inference.services.googlevertexai.action.GoogleVertexAiActionCreator;
 import org.elasticsearch.xpack.inference.services.googlevertexai.completion.GoogleVertexAiChatCompletionModel;
 import org.elasticsearch.xpack.inference.services.googlevertexai.embeddings.GoogleVertexAiEmbeddingsModel;
 import org.elasticsearch.xpack.inference.services.googlevertexai.embeddings.GoogleVertexAiEmbeddingsServiceSettings;
 import org.elasticsearch.xpack.inference.services.googlevertexai.request.completion.GoogleVertexAiUnifiedChatCompletionRequest;
 import org.elasticsearch.xpack.inference.services.googlevertexai.rerank.GoogleVertexAiRerankModel;
-import org.elasticsearch.xpack.inference.services.huggingface.HuggingFaceChatCompletionResponseHandler;
-import org.elasticsearch.xpack.inference.services.llama.completion.LlamaChatCompletionResponseHandler;
-import org.elasticsearch.xpack.inference.services.mistral.MistralUnifiedChatCompletionResponseHandler;
-import org.elasticsearch.xpack.inference.services.openai.response.OpenAiChatCompletionResponseEntity;
 import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings;
 
 import java.util.EnumSet;
@@ -97,34 +90,6 @@ public class GoogleVertexAiService extends SenderService implements RerankingInf
         InputType.INTERNAL_SEARCH
     );
 
-    public static final ResponseHandler GOOGLE_VERTEX_AI_CHAT_COMPLETION_HANDLER = new GoogleVertexAiUnifiedChatCompletionResponseHandler(
-        "Google Vertex AI chat completion"
-    );
-
-    private static final ResponseHandler ANTHROPIC_CHAT_COMPLETION_HANDLER = new AnthropicChatCompletionResponseHandler(
-        "Google Model Garden Anthropic chat completion"
-    );
-
-    private static final ResponseHandler META_CHAT_COMPLETION_HANDLER = new LlamaChatCompletionResponseHandler(
-        "Google Model Garden Meta chat completion",
-        OpenAiChatCompletionResponseEntity::fromResponse
-    );
-
-    private static final ResponseHandler HUGGING_FACE_CHAT_COMPLETION_HANDLER = new HuggingFaceChatCompletionResponseHandler(
-        "Google Model Garden Hugging Face chat completion",
-        OpenAiChatCompletionResponseEntity::fromResponse
-    );
-
-    private static final ResponseHandler MISTRAL_CHAT_COMPLETION_HANDLER = new MistralUnifiedChatCompletionResponseHandler(
-        "Google Model Garden Mistral chat completions",
-        OpenAiChatCompletionResponseEntity::fromResponse
-    );
-
-    private static final ResponseHandler AI21_CHAT_COMPLETION_HANDLER = new Ai21ChatCompletionResponseHandler(
-        "Google Model Garden Ai21 chat completions",
-        OpenAiChatCompletionResponseEntity::fromResponse
-    );
-
     @Override
     public Set<TaskType> supportedStreamingTasks() {
         return EnumSet.of(TaskType.CHAT_COMPLETION, TaskType.COMPLETION);
@@ -290,51 +255,16 @@ protected void doUnifiedCompletionInfer(
         }
     }
 
-    /**
-     * Create the request manager based on the provider specified in the model's service settings.
-     * @param model The GoogleVertexAiChatCompletionModel containing the provider information.
-     * @return A GenericRequestManager configured with the appropriate response handler.
-     */
-    private GenericRequestManager<UnifiedChatInput> createRequestManager(GoogleVertexAiChatCompletionModel model) {
-        switch (model.getServiceSettings().provider()) {
-            case GOOGLE -> {
-                return createRequestManagerWithHandler(model, GOOGLE_VERTEX_AI_CHAT_COMPLETION_HANDLER);
-            }
-            case ANTHROPIC -> {
-                return createRequestManagerWithHandler(model, ANTHROPIC_CHAT_COMPLETION_HANDLER);
-            }
-            case META -> {
-                return createRequestManagerWithHandler(model, META_CHAT_COMPLETION_HANDLER);
-            }
-            case HUGGING_FACE -> {
-                return createRequestManagerWithHandler(model, HUGGING_FACE_CHAT_COMPLETION_HANDLER);
-            }
-            case MISTRAL -> {
-                return createRequestManagerWithHandler(model, MISTRAL_CHAT_COMPLETION_HANDLER);
-            }
-            case AI21 -> {
-                return createRequestManagerWithHandler(model, AI21_CHAT_COMPLETION_HANDLER);
-            }
-            case null, default -> throw new ElasticsearchException(
-                "Unsupported Google Model Garden provider: " + model.getServiceSettings().provider()
-            );
-        }
-    }
-
     /**
      * Helper method to create a GenericRequestManager with a specified response handler.
      * @param model The GoogleVertexAiChatCompletionModel to be used for requests.
-     * @param responseHandler The ResponseHandler to process the responses.
      * @return A GenericRequestManager configured with the provided response handler.
      */
-    private GenericRequestManager<UnifiedChatInput> createRequestManagerWithHandler(
-        GoogleVertexAiChatCompletionModel model,
-        ResponseHandler responseHandler
-    ) {
+    private GenericRequestManager<UnifiedChatInput> createRequestManager(GoogleVertexAiChatCompletionModel model) {
         return new GenericRequestManager<>(
             getServiceComponents().threadPool(),
             model,
-            responseHandler,
+            model.getServiceSettings().provider().getChatCompletionResponseHandler(),
             unifiedChatInput -> new GoogleVertexAiUnifiedChatCompletionRequest(unifiedChatInput, model),
             UnifiedChatInput.class
         );