elastic · jonathan-buttner · Oct 10, 2025 · Sep 30, 2025 · Sep 30, 2025 · Sep 30, 2025
diff --git a/docs/changelog/135701.yaml b/docs/changelog/135701.yaml
@@ -0,0 +1,5 @@
+pr: 135701
+summary: Add Google Model Garden's Meta, Mistral, Hugging Face and Ai21 providers support to Inference Plugin
+area: Machine Learning
+type: enhancement
+issues: []
diff --git a/server/src/main/java/org/elasticsearch/inference/UnifiedCompletionRequest.java b/server/src/main/java/org/elasticsearch/inference/UnifiedCompletionRequest.java
@@ -99,6 +99,14 @@ public static Params withMaxTokens(String modelId, Params params) {
         );
     }
 
+    /**
+     * Creates a {@link org.elasticsearch.xcontent.ToXContent.Params} that causes ToXContent to include the key values:
+     * - Key: {@link #MAX_TOKENS_FIELD}, Value: {@link #maxCompletionTokens()}
+     */
+    public static Params withMaxTokens(Params params) {
+        return new DelegatingMapParams(Map.ofEntries(Map.entry(MAX_TOKENS_PARAM, MAX_TOKENS_FIELD)), params);
+    }
+
     /**
      * Creates a {@link org.elasticsearch.xcontent.ToXContent.Params} that causes ToXContent to include the key values:
      * - Key: {@link #MODEL_FIELD}, Value: modelId
@@ -116,6 +124,18 @@ public static Params withMaxTokensAndSkipStreamOptionsField(String modelId, Para
         );
     }
 
+    /**
+     * Creates a {@link org.elasticsearch.xcontent.ToXContent.Params} that causes ToXContent to include the key values:
+     * - Key: {@link #MAX_TOKENS_FIELD}, Value: {@link #MAX_TOKENS_FIELD}
+     * - Key: {@link #INCLUDE_STREAM_OPTIONS_PARAM}, Value: "false"
+     */
+    public static Params withMaxTokensAndSkipStreamOptionsField(Params params) {
+        return new DelegatingMapParams(
+            Map.ofEntries(Map.entry(MAX_TOKENS_PARAM, MAX_TOKENS_FIELD), Map.entry(INCLUDE_STREAM_OPTIONS_PARAM, Boolean.FALSE.toString())),
+            params
+        );
+    }
+
     /**
      * Creates a {@link org.elasticsearch.xcontent.ToXContent.Params} that causes ToXContent to include the key values:
      * - Key: {@link #MODEL_FIELD}, Value: modelId

diff --git a/...va/org/elasticsearch/xpack/inference/services/ai21/request/Ai21ChatCompletionRequest.java b/...va/org/elasticsearch/xpack/inference/services/ai21/request/Ai21ChatCompletionRequest.java
@@ -43,7 +43,8 @@ public HttpRequest createHttpRequest() {
         HttpPost httpPost = new HttpPost(model.uri());
 
         ByteArrayEntity byteEntity = new ByteArrayEntity(
-            Strings.toString(new Ai21ChatCompletionRequestEntity(chatInput, model)).getBytes(StandardCharsets.UTF_8)
+            Strings.toString(new Ai21ChatCompletionRequestEntity(chatInput, model.getServiceSettings().modelId()))
+                .getBytes(StandardCharsets.UTF_8)
         );
         httpPost.setEntity(byteEntity);
 

diff --git a/.../elasticsearch/xpack/inference/services/ai21/request/Ai21ChatCompletionRequestEntity.java b/.../elasticsearch/xpack/inference/services/ai21/request/Ai21ChatCompletionRequestEntity.java
@@ -7,34 +7,39 @@
 
 package org.elasticsearch.xpack.inference.services.ai21.request;
 
+import org.elasticsearch.core.Nullable;
 import org.elasticsearch.inference.UnifiedCompletionRequest;
 import org.elasticsearch.xcontent.ToXContentObject;
 import org.elasticsearch.xcontent.XContentBuilder;
 import org.elasticsearch.xpack.inference.external.http.sender.UnifiedChatInput;
 import org.elasticsearch.xpack.inference.external.unified.UnifiedChatCompletionRequestEntity;
-import org.elasticsearch.xpack.inference.services.ai21.completion.Ai21ChatCompletionModel;
 
 import java.io.IOException;
-import java.util.Objects;
 
 /**
  * Ai21ChatCompletionRequestEntity is responsible for creating the request entity for Ai21 chat completion.
  * It implements ToXContentObject to allow serialization to XContent format.
  */
 public class Ai21ChatCompletionRequestEntity implements ToXContentObject {
 
-    private final Ai21ChatCompletionModel model;
+    private final String modelId;
     private final UnifiedChatCompletionRequestEntity unifiedRequestEntity;
 
-    public Ai21ChatCompletionRequestEntity(UnifiedChatInput unifiedChatInput, Ai21ChatCompletionModel model) {
+    public Ai21ChatCompletionRequestEntity(UnifiedChatInput unifiedChatInput, @Nullable String modelId) {
         this.unifiedRequestEntity = new UnifiedChatCompletionRequestEntity(unifiedChatInput);
-        this.model = Objects.requireNonNull(model);
+        this.modelId = modelId;
     }
 
     @Override
     public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
         builder.startObject();
-        unifiedRequestEntity.toXContent(builder, UnifiedCompletionRequest.withMaxTokens(model.getServiceSettings().modelId(), params));
+        if (modelId != null) {
+            // Some models require the model ID to be specified in the request body
+            unifiedRequestEntity.toXContent(builder, UnifiedCompletionRequest.withMaxTokens(modelId, params));
+        } else {
+            // Some models do not require the model ID to be specified in the request body
+            unifiedRequestEntity.toXContent(builder, UnifiedCompletionRequest.withMaxTokens(params));
+        }
         builder.endObject();
         return builder;
     }

diff --git a/.../org/elasticsearch/xpack/inference/services/googlevertexai/GoogleModelGardenProvider.java b/.../org/elasticsearch/xpack/inference/services/googlevertexai/GoogleModelGardenProvider.java
@@ -14,7 +14,11 @@
  */
 public enum GoogleModelGardenProvider {
     GOOGLE,
-    ANTHROPIC;
+    ANTHROPIC,
+    META,
+    HUGGING_FACE,
+    MISTRAL,
+    AI21;
 
     public static final String NAME = "google_model_garden_provider";
 

diff --git a/...java/org/elasticsearch/xpack/inference/services/googlevertexai/GoogleVertexAiService.java b/...java/org/elasticsearch/xpack/inference/services/googlevertexai/GoogleVertexAiService.java
@@ -43,13 +43,18 @@
 import org.elasticsearch.xpack.inference.services.SenderService;
 import org.elasticsearch.xpack.inference.services.ServiceComponents;
 import org.elasticsearch.xpack.inference.services.ServiceUtils;
+import org.elasticsearch.xpack.inference.services.ai21.completion.Ai21ChatCompletionResponseHandler;
 import org.elasticsearch.xpack.inference.services.anthropic.AnthropicChatCompletionResponseHandler;
 import org.elasticsearch.xpack.inference.services.googlevertexai.action.GoogleVertexAiActionCreator;
 import org.elasticsearch.xpack.inference.services.googlevertexai.completion.GoogleVertexAiChatCompletionModel;
 import org.elasticsearch.xpack.inference.services.googlevertexai.embeddings.GoogleVertexAiEmbeddingsModel;
 import org.elasticsearch.xpack.inference.services.googlevertexai.embeddings.GoogleVertexAiEmbeddingsServiceSettings;
 import org.elasticsearch.xpack.inference.services.googlevertexai.request.completion.GoogleVertexAiUnifiedChatCompletionRequest;
 import org.elasticsearch.xpack.inference.services.googlevertexai.rerank.GoogleVertexAiRerankModel;
+import org.elasticsearch.xpack.inference.services.huggingface.HuggingFaceChatCompletionResponseHandler;
+import org.elasticsearch.xpack.inference.services.llama.completion.LlamaChatCompletionResponseHandler;
+import org.elasticsearch.xpack.inference.services.mistral.MistralUnifiedChatCompletionResponseHandler;
+import org.elasticsearch.xpack.inference.services.openai.response.OpenAiChatCompletionResponseEntity;
 import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings;
 
 import java.util.EnumSet;
@@ -96,10 +101,30 @@ public class GoogleVertexAiService extends SenderService implements RerankingInf
         "Google Vertex AI chat completion"
     );
 
-    public static final ResponseHandler GOOGLE_MODEL_GARDEN_ANTHROPIC_CHAT_COMPLETION_HANDLER = new AnthropicChatCompletionResponseHandler(
+    private static final ResponseHandler ANTHROPIC_CHAT_COMPLETION_HANDLER = new AnthropicChatCompletionResponseHandler(
         "Google Model Garden Anthropic chat completion"
     );
 
+    private static final ResponseHandler META_CHAT_COMPLETION_HANDLER = new LlamaChatCompletionResponseHandler(
+        "Google Model Garden Meta chat completion",
+        OpenAiChatCompletionResponseEntity::fromResponse
+    );
+
+    private static final ResponseHandler HUGGING_FACE_CHAT_COMPLETION_HANDLER = new HuggingFaceChatCompletionResponseHandler(
+        "Google Model Garden Hugging Face chat completion",
+        OpenAiChatCompletionResponseEntity::fromResponse
+    );
+
+    private static final ResponseHandler MISTRAL_CHAT_COMPLETION_HANDLER = new MistralUnifiedChatCompletionResponseHandler(
+        "Google Model Garden Mistral chat completions",
+        OpenAiChatCompletionResponseEntity::fromResponse
+    );
+
+    private static final ResponseHandler AI21_CHAT_COMPLETION_HANDLER = new Ai21ChatCompletionResponseHandler(
+        "Google Model Garden Ai21 chat completions",
+        OpenAiChatCompletionResponseEntity::fromResponse
+    );
+
     @Override
     public Set<TaskType> supportedStreamingTasks() {
         return EnumSet.of(TaskType.CHAT_COMPLETION, TaskType.COMPLETION);
@@ -265,20 +290,43 @@ protected void doUnifiedCompletionInfer(
         }
     }
 
+    /**
+     * Create the request manager based on the provider specified in the model's service settings.
+     * @param model The GoogleVertexAiChatCompletionModel containing the provider information.
+     * @return A GenericRequestManager configured with the appropriate response handler.
+     */
     private GenericRequestManager<UnifiedChatInput> createRequestManager(GoogleVertexAiChatCompletionModel model) {
         switch (model.getServiceSettings().provider()) {
-            case ANTHROPIC -> {
-                return createRequestManagerWithHandler(model, GOOGLE_MODEL_GARDEN_ANTHROPIC_CHAT_COMPLETION_HANDLER);
-            }
             case GOOGLE -> {
                 return createRequestManagerWithHandler(model, GOOGLE_VERTEX_AI_CHAT_COMPLETION_HANDLER);
             }
+            case ANTHROPIC -> {
+                return createRequestManagerWithHandler(model, ANTHROPIC_CHAT_COMPLETION_HANDLER);
+            }
+            case META -> {
+                return createRequestManagerWithHandler(model, META_CHAT_COMPLETION_HANDLER);
+            }
+            case HUGGING_FACE -> {
+                return createRequestManagerWithHandler(model, HUGGING_FACE_CHAT_COMPLETION_HANDLER);
+            }
+            case MISTRAL -> {
+                return createRequestManagerWithHandler(model, MISTRAL_CHAT_COMPLETION_HANDLER);
+            }
+            case AI21 -> {
+                return createRequestManagerWithHandler(model, AI21_CHAT_COMPLETION_HANDLER);
+            }
             case null, default -> throw new ElasticsearchException(
                 "Unsupported Google Model Garden provider: " + model.getServiceSettings().provider()
             );
         }
     }
 
+    /**
+     * Helper method to create a GenericRequestManager with a specified response handler.
+     * @param model The GoogleVertexAiChatCompletionModel to be used for requests.
+     * @param responseHandler The ResponseHandler to process the responses.
+     * @return A GenericRequestManager configured with the provided response handler.
+     */
     private GenericRequestManager<UnifiedChatInput> createRequestManagerWithHandler(
         GoogleVertexAiChatCompletionModel model,
         ResponseHandler responseHandler

diff --git a/...ticsearch/xpack/inference/services/googlevertexai/action/GoogleVertexAiActionCreator.java b/...ticsearch/xpack/inference/services/googlevertexai/action/GoogleVertexAiActionCreator.java
@@ -11,6 +11,7 @@
 import org.elasticsearch.xpack.inference.external.action.ExecutableAction;
 import org.elasticsearch.xpack.inference.external.action.SenderExecutableAction;
 import org.elasticsearch.xpack.inference.external.action.SingleInputSenderExecutableAction;
+import org.elasticsearch.xpack.inference.external.http.retry.ErrorResponse;
 import org.elasticsearch.xpack.inference.external.http.retry.ResponseHandler;
 import org.elasticsearch.xpack.inference.external.http.sender.ChatCompletionInput;
 import org.elasticsearch.xpack.inference.external.http.sender.GenericRequestManager;
@@ -28,6 +29,9 @@
 import org.elasticsearch.xpack.inference.services.googlevertexai.request.completion.GoogleVertexAiUnifiedChatCompletionRequest;
 import org.elasticsearch.xpack.inference.services.googlevertexai.rerank.GoogleVertexAiRerankModel;
 import org.elasticsearch.xpack.inference.services.googlevertexai.response.GoogleVertexAiCompletionResponseEntity;
+import org.elasticsearch.xpack.inference.services.llama.completion.LlamaCompletionResponseHandler;
+import org.elasticsearch.xpack.inference.services.openai.OpenAiChatCompletionResponseHandler;
+import org.elasticsearch.xpack.inference.services.openai.response.OpenAiChatCompletionResponseEntity;
 
 import java.util.Map;
 import java.util.Objects;
@@ -54,6 +58,28 @@ public class GoogleVertexAiActionCreator implements GoogleVertexAiActionVisitor
         true
     );
 
+    static final ResponseHandler GOOGLE_MODEL_GARDEN_META_COMPLETION_HANDLER = new LlamaCompletionResponseHandler(
+        "Google Model Garden Meta completion",
+        OpenAiChatCompletionResponseEntity::fromResponse
+    );
+
+    static final ResponseHandler GOOGLE_MODEL_GARDEN_HUGGING_FACE_COMPLETION_HANDLER = new OpenAiChatCompletionResponseHandler(
+        "Google Model Garden Hugging Face completion",
+        OpenAiChatCompletionResponseEntity::fromResponse
+    );
+
+    static final ResponseHandler GOOGLE_MODEL_GARDEN_MISTRAL_COMPLETION_HANDLER = new OpenAiChatCompletionResponseHandler(
+        "Google Model Garden Mistral completion",
+        OpenAiChatCompletionResponseEntity::fromResponse,
+        ErrorResponse::fromResponse
+    );
+
+    static final ResponseHandler GOOGLE_MODEL_GARDEN_AI21_COMPLETION_HANDLER = new OpenAiChatCompletionResponseHandler(
+        "Google Model Garden AI21 completion",
+        OpenAiChatCompletionResponseEntity::fromResponse,
+        ErrorResponse::fromResponse
+    );
+
     static final String USER_ROLE = "user";
 
     public GoogleVertexAiActionCreator(Sender sender, ServiceComponents serviceComponents) {
@@ -91,11 +117,23 @@ public ExecutableAction create(GoogleVertexAiChatCompletionModel model, Map<Stri
 
     private GenericRequestManager<ChatCompletionInput> createRequestManager(GoogleVertexAiChatCompletionModel model) {
         switch (model.getServiceSettings().provider()) {
+            case GOOGLE -> {
+                return createRequestManagerWithHandler(model, GOOGLE_VERTEX_AI_COMPLETION_HANDLER);
+            }
             case ANTHROPIC -> {
                 return createRequestManagerWithHandler(model, GOOGLE_MODEL_GARDEN_ANTHROPIC_COMPLETION_HANDLER);
             }
-            case GOOGLE -> {
-                return createRequestManagerWithHandler(model, GOOGLE_VERTEX_AI_COMPLETION_HANDLER);
+            case META -> {
+                return createRequestManagerWithHandler(model, GOOGLE_MODEL_GARDEN_META_COMPLETION_HANDLER);
+            }
+            case HUGGING_FACE -> {
+                return createRequestManagerWithHandler(model, GOOGLE_MODEL_GARDEN_HUGGING_FACE_COMPLETION_HANDLER);
+            }
+            case MISTRAL -> {
+                return createRequestManagerWithHandler(model, GOOGLE_MODEL_GARDEN_MISTRAL_COMPLETION_HANDLER);
+            }
+            case AI21 -> {
+                return createRequestManagerWithHandler(model, GOOGLE_MODEL_GARDEN_AI21_COMPLETION_HANDLER);
             }
             case null, default -> throw new ElasticsearchException(
                 "Unsupported Google Model Garden provider: " + model.getServiceSettings().provider()
@@ -104,14 +142,14 @@ private GenericRequestManager<ChatCompletionInput> createRequestManager(GoogleVe
     }
 
     private GenericRequestManager<ChatCompletionInput> createRequestManagerWithHandler(
-        GoogleVertexAiChatCompletionModel overriddenModel,
+        GoogleVertexAiChatCompletionModel model,
         ResponseHandler responseHandler
     ) {
         return new GenericRequestManager<>(
             serviceComponents.threadPool(),
-            overriddenModel,
+            model,
             responseHandler,
-            inputs -> new GoogleVertexAiUnifiedChatCompletionRequest(new UnifiedChatInput(inputs, USER_ROLE), overriddenModel),
+            inputs -> new GoogleVertexAiUnifiedChatCompletionRequest(new UnifiedChatInput(inputs, USER_ROLE), model),
             ChatCompletionInput.class
         );
     }

diff --git a/...ervices/googlevertexai/request/completion/GoogleVertexAiUnifiedChatCompletionRequest.java b/...ervices/googlevertexai/request/completion/GoogleVertexAiUnifiedChatCompletionRequest.java
@@ -17,8 +17,12 @@
 import org.elasticsearch.xpack.inference.external.http.sender.UnifiedChatInput;
 import org.elasticsearch.xpack.inference.external.request.HttpRequest;
 import org.elasticsearch.xpack.inference.external.request.Request;
+import org.elasticsearch.xpack.inference.services.ai21.request.Ai21ChatCompletionRequestEntity;
 import org.elasticsearch.xpack.inference.services.googlevertexai.completion.GoogleVertexAiChatCompletionModel;
 import org.elasticsearch.xpack.inference.services.googlevertexai.request.GoogleVertexAiRequest;
+import org.elasticsearch.xpack.inference.services.huggingface.request.completion.HuggingFaceUnifiedChatCompletionRequestEntity;
+import org.elasticsearch.xpack.inference.services.llama.request.completion.LlamaChatCompletionRequestEntity;
+import org.elasticsearch.xpack.inference.services.mistral.request.completion.MistralChatCompletionRequestEntity;
 
 import java.net.URI;
 import java.nio.charset.StandardCharsets;
@@ -53,19 +57,41 @@ public HttpRequest createHttpRequest() {
     }
 
     private ToXContentObject createRequestEntity() {
+        final var modelId = extractModelId();
         switch (model.getServiceSettings().provider()) {
+            case GOOGLE -> {
+                return new GoogleVertexAiUnifiedChatCompletionRequestEntity(unifiedChatInput, model.getTaskSettings().thinkingConfig());
+            }
             case ANTHROPIC -> {
                 return new GoogleModelGardenAnthropicChatCompletionRequestEntity(unifiedChatInput, model.getTaskSettings());
             }
-            case GOOGLE -> {
-                return new GoogleVertexAiUnifiedChatCompletionRequestEntity(unifiedChatInput, model.getTaskSettings().thinkingConfig());
+            case META -> {
+                return new LlamaChatCompletionRequestEntity(unifiedChatInput, modelId);
+            }
+            case HUGGING_FACE -> {
+                return new HuggingFaceUnifiedChatCompletionRequestEntity(unifiedChatInput, modelId);
+            }
+            case MISTRAL -> {
+                return new MistralChatCompletionRequestEntity(unifiedChatInput, modelId);
+            }
+            case AI21 -> {
+                return new Ai21ChatCompletionRequestEntity(unifiedChatInput, modelId);
             }
             case null, default -> throw new ElasticsearchException(
                 "Unsupported Google Model Garden provider: " + model.getServiceSettings().provider()
             );
         }
     }
 
+    /**
+     * Extracts the model ID to be used for the request. If the request contains a model ID, it is preferred.
+     * Otherwise, the model ID from the configuration is used.
+     * @return the model ID to be used for the request
+     */
+    private String extractModelId() {
+        return unifiedChatInput.getRequest().model() != null ? unifiedChatInput.getRequest().model() : model.getServiceSettings().modelId();
+    }
+
     public void decorateWithAuth(HttpPost httpPost) {
         GoogleVertexAiRequest.decorateWithBearerToken(httpPost, model.getSecretSettings());
     }

diff --git a/...ence/services/huggingface/request/completion/HuggingFaceUnifiedChatCompletionRequest.java b/...ence/services/huggingface/request/completion/HuggingFaceUnifiedChatCompletionRequest.java
@@ -50,7 +50,8 @@ public HttpRequest createHttpRequest() {
         HttpPost httpPost = new HttpPost(getURI());
 
         ByteArrayEntity byteEntity = new ByteArrayEntity(
-            Strings.toString(new HuggingFaceUnifiedChatCompletionRequestEntity(unifiedChatInput, model)).getBytes(StandardCharsets.UTF_8)
+            Strings.toString(new HuggingFaceUnifiedChatCompletionRequestEntity(unifiedChatInput, model.getServiceSettings().modelId()))
+                .getBytes(StandardCharsets.UTF_8)
         );
         httpPost.setEntity(byteEntity);