elastic · jonathan-buttner · Jun 9, 2025 · May 30, 2025 · May 30, 2025 · May 30, 2025
diff --git a/docs/changelog/128694.yaml b/docs/changelog/128694.yaml
@@ -0,0 +1,5 @@
+pr: 128694
+summary: "Adding Google VertexAI completion integration"
+area: Inference
+type: enhancement
+issues: [ ]
diff --git a/...g/elasticsearch/xpack/inference/services/googlevertexai/GoogleVertexAiSecretSettings.java b/...g/elasticsearch/xpack/inference/services/googlevertexai/GoogleVertexAiSecretSettings.java
@@ -124,8 +124,9 @@ public static Map<String, SettingsConfiguration> get() {
                 var configurationMap = new HashMap<String, SettingsConfiguration>();
                 configurationMap.put(
                     SERVICE_ACCOUNT_JSON,
-                    new SettingsConfiguration.Builder(EnumSet.of(TaskType.TEXT_EMBEDDING, TaskType.RERANK, TaskType.CHAT_COMPLETION))
-                        .setDescription("API Key for the provider you're connecting to.")
+                    new SettingsConfiguration.Builder(
+                        EnumSet.of(TaskType.TEXT_EMBEDDING, TaskType.RERANK, TaskType.CHAT_COMPLETION, TaskType.COMPLETION)
+                    ).setDescription("API Key for the provider you're connecting to.")
                         .setLabel("Credentials JSON")
                         .setRequired(true)
                         .setSensitive(true)

diff --git a/...java/org/elasticsearch/xpack/inference/services/googlevertexai/GoogleVertexAiService.java b/...java/org/elasticsearch/xpack/inference/services/googlevertexai/GoogleVertexAiService.java
@@ -42,6 +42,7 @@
 import org.elasticsearch.xpack.inference.services.ServiceUtils;
 import org.elasticsearch.xpack.inference.services.googlevertexai.action.GoogleVertexAiActionCreator;
 import org.elasticsearch.xpack.inference.services.googlevertexai.completion.GoogleVertexAiChatCompletionModel;
+import org.elasticsearch.xpack.inference.services.googlevertexai.completion.GoogleVertexAiCompletionModel;
 import org.elasticsearch.xpack.inference.services.googlevertexai.embeddings.GoogleVertexAiEmbeddingsModel;
 import org.elasticsearch.xpack.inference.services.googlevertexai.embeddings.GoogleVertexAiEmbeddingsServiceSettings;
 import org.elasticsearch.xpack.inference.services.googlevertexai.request.GoogleVertexAiUnifiedChatCompletionRequest;
@@ -75,7 +76,8 @@ public class GoogleVertexAiService extends SenderService {
     private static final EnumSet<TaskType> supportedTaskTypes = EnumSet.of(
         TaskType.TEXT_EMBEDDING,
         TaskType.RERANK,
-        TaskType.CHAT_COMPLETION
+        TaskType.CHAT_COMPLETION,
+        TaskType.COMPLETION
     );
 
     public static final EnumSet<InputType> VALID_INPUT_TYPE_VALUES = EnumSet.of(
@@ -220,11 +222,11 @@ protected void doInfer(
             return;
         }
 
-        GoogleVertexAiModel googleVertexAiModel = (GoogleVertexAiModel) model;
+        var completionModel = (GoogleVertexAiCompletionModel) model;
 
         var actionCreator = new GoogleVertexAiActionCreator(getSender(), getServiceComponents());
 
-        var action = googleVertexAiModel.accept(actionCreator, taskSettings);
+        var action = completionModel.accept(actionCreator, taskSettings);
         action.execute(inputs, timeout, listener);
     }
 
@@ -368,6 +370,16 @@ private static GoogleVertexAiModel createModel(
                 context
             );
 
+            case COMPLETION -> new GoogleVertexAiCompletionModel(
+                inferenceEntityId,
+                taskType,
+                NAME,
+                serviceSettings,
+                taskSettings,
+                secretSettings,
+                context
+            );
+
             default -> throw new ElasticsearchStatusException(failureMessage, RestStatus.BAD_REQUEST);
         };
     }
@@ -396,10 +408,11 @@ public static InferenceServiceConfiguration get() {
 
                 configurationMap.put(
                     LOCATION,
-                    new SettingsConfiguration.Builder(EnumSet.of(TaskType.TEXT_EMBEDDING, TaskType.CHAT_COMPLETION)).setDescription(
-                        "Please provide the GCP region where the Vertex AI API(s) is enabled. "
-                            + "For more information, refer to the {geminiVertexAIDocs}."
-                    )
+                    new SettingsConfiguration.Builder(EnumSet.of(TaskType.TEXT_EMBEDDING, TaskType.CHAT_COMPLETION, TaskType.COMPLETION))
+                        .setDescription(
+                            "Please provide the GCP region where the Vertex AI API(s) is enabled. "
+                                + "For more information, refer to the {geminiVertexAIDocs}."
+                        )
                         .setLabel("GCP Region")
                         .setRequired(true)
                         .setSensitive(false)

diff --git a/...inference/services/googlevertexai/GoogleVertexAiUnifiedChatCompletionResponseHandler.java b/...inference/services/googlevertexai/GoogleVertexAiUnifiedChatCompletionResponseHandler.java
@@ -23,10 +23,10 @@
 import org.elasticsearch.xpack.core.inference.results.UnifiedChatCompletionException;
 import org.elasticsearch.xpack.inference.external.http.HttpResult;
 import org.elasticsearch.xpack.inference.external.http.retry.ErrorResponse;
-import org.elasticsearch.xpack.inference.external.http.retry.ResponseParser;
 import org.elasticsearch.xpack.inference.external.request.Request;
 import org.elasticsearch.xpack.inference.external.response.streaming.ServerSentEventParser;
 import org.elasticsearch.xpack.inference.external.response.streaming.ServerSentEventProcessor;
+import org.elasticsearch.xpack.inference.services.googlevertexai.response.GoogleVertexAiCompletionResponseEntity;
 
 import java.nio.charset.StandardCharsets;
 import java.util.Locale;
@@ -43,10 +43,8 @@ public class GoogleVertexAiUnifiedChatCompletionResponseHandler extends GoogleVe
     private static final String ERROR_MESSAGE_FIELD = "message";
     private static final String ERROR_STATUS_FIELD = "status";
 
-    private static final ResponseParser noopParseFunction = (a, b) -> null;
-
     public GoogleVertexAiUnifiedChatCompletionResponseHandler(String requestType) {
-        super(requestType, noopParseFunction, GoogleVertexAiErrorResponse::fromResponse, true);
+        super(requestType, GoogleVertexAiCompletionResponseEntity::fromResponse, GoogleVertexAiErrorResponse::fromResponse, true);
     }
 
     @Override
@@ -63,7 +61,6 @@ public InferenceServiceResults parseResult(Request request, Flow.Publisher<HttpR
 
     @Override
     protected Exception buildError(String message, Request request, HttpResult result, ErrorResponse errorResponse) {
-        assert request.isStreaming() : "Only streaming requests support this format";
         var responseStatusCode = result.response().getStatusLine().getStatusCode();
         var errorMessage = errorMessage(message, request, result, errorResponse, responseStatusCode);
         var restStatus = toRestStatus(responseStatusCode);

diff --git a/...rch/xpack/inference/services/googlevertexai/completion/GoogleVertexAiCompletionModel.java b/...rch/xpack/inference/services/googlevertexai/completion/GoogleVertexAiCompletionModel.java
@@ -0,0 +1,58 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.inference.services.googlevertexai.completion;
+
+import org.apache.http.client.utils.URIBuilder;
+import org.elasticsearch.inference.TaskType;
+import org.elasticsearch.xpack.inference.services.ConfigurationParseContext;
+import org.elasticsearch.xpack.inference.services.googlevertexai.request.GoogleVertexAiUtils;
+
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.Map;
+
+import static org.elasticsearch.core.Strings.format;
+
+public class GoogleVertexAiCompletionModel extends GoogleVertexAiChatCompletionModel {
+
+    public GoogleVertexAiCompletionModel(
+        String inferenceEntityId,
+        TaskType taskType,
+        String service,
+        Map<String, Object> serviceSettings,
+        Map<String, Object> taskSettings,
+        Map<String, Object> secrets,
+        ConfigurationParseContext context
+    ) {
+        super(inferenceEntityId, taskType, service, serviceSettings, taskSettings, secrets, context);
+        try {
+            var modelServiceSettings = this.getServiceSettings();
+            this.uri = buildUri(modelServiceSettings.location(), modelServiceSettings.projectId(), modelServiceSettings.modelId());
+        } catch (URISyntaxException e) {
+            throw new RuntimeException(e);
+        }
+
+    }
+
+    public static URI buildUri(String location, String projectId, String model) throws URISyntaxException {
+        return new URIBuilder().setScheme("https")
+            .setHost(format("%s%s", location, GoogleVertexAiUtils.GOOGLE_VERTEX_AI_HOST_SUFFIX))
+            .setPathSegments(
+                GoogleVertexAiUtils.V1,
+                GoogleVertexAiUtils.PROJECTS,
+                projectId,
+                GoogleVertexAiUtils.LOCATIONS,
+                GoogleVertexAiUtils.GLOBAL,
+                GoogleVertexAiUtils.PUBLISHERS,
+                GoogleVertexAiUtils.PUBLISHER_GOOGLE,
+                GoogleVertexAiUtils.MODELS,
+                format("%s:%s", model, GoogleVertexAiUtils.GENERATE_CONTENT)
+            )
+            .build();
+    }
+}
diff --git a/...rg/elasticsearch/xpack/inference/services/googlevertexai/request/GoogleVertexAiUtils.java b/...rg/elasticsearch/xpack/inference/services/googlevertexai/request/GoogleVertexAiUtils.java
@@ -37,6 +37,8 @@ public final class GoogleVertexAiUtils {
 
     public static final String STREAM_GENERATE_CONTENT = "streamGenerateContent";
 
+    public static final String GENERATE_CONTENT = "generateContent";
+
     public static final String QUERY_PARAM_ALT_SSE = "alt=sse";
 
     private GoogleVertexAiUtils() {}

diff --git a/...ck/inference/services/googlevertexai/response/GoogleVertexAiCompletionResponseEntity.java b/...ck/inference/services/googlevertexai/response/GoogleVertexAiCompletionResponseEntity.java
@@ -0,0 +1,101 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.inference.services.googlevertexai.response;
+
+import org.elasticsearch.inference.InferenceServiceResults;
+import org.elasticsearch.xcontent.XContentFactory;
+import org.elasticsearch.xcontent.XContentParser;
+import org.elasticsearch.xcontent.XContentParserConfiguration;
+import org.elasticsearch.xcontent.XContentType;
+import org.elasticsearch.xpack.core.inference.results.ChatCompletionResults;
+import org.elasticsearch.xpack.core.inference.results.StreamingUnifiedChatCompletionResults;
+import org.elasticsearch.xpack.inference.external.http.HttpResult;
+import org.elasticsearch.xpack.inference.external.request.Request;
+import org.elasticsearch.xpack.inference.services.googlevertexai.GoogleVertexAiUnifiedStreamingProcessor;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+
+public class GoogleVertexAiCompletionResponseEntity {
+    /**
+     * Parses the response from Google Vertex AI's generateContent endpoint
+     * For a request like:
+     * <pre>
+     *     <code>
+     *         {
+     *             "inputs": "Please summarize this text: some text"
+     *         }
+     *     </code>
+     * </pre>
+     *
+     * The response is a <a href="https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/GenerateContentResponse">GenerateContentResponse</a> objects that looks like:
+     *
+     * <pre>
+     *     <code>
+     *
+     * {
+     *   "candidates": [
+     *     {
+     *       "content": {
+     *         "role": "model",
+     *         "parts": [
+     *           {
+     *             "text": "I am sorry, I cannot summarize the text because I do not have access to the text you are referring to."
+     *           }
+     *         ]
+     *       },
+     *       "finishReason": "STOP",
+     *       "avgLogprobs": -0.19326641248620074
+     *     }
+     *   ],
+     *   "usageMetadata": {
+     *     "promptTokenCount": 71,
+     *     "candidatesTokenCount": 23,
+     *     "totalTokenCount": 94,
+     *     "trafficType": "ON_DEMAND",
+     *     "promptTokensDetails": [
+     *       {
+     *         "modality": "TEXT",
+     *         "tokenCount": 71
+     *       }
+     *     ],
+     *     "candidatesTokensDetails": [
+     *       {
+     *         "modality": "TEXT",
+     *         "tokenCount": 23
+     *       }
+     *     ]
+     *   },
+     *   "modelVersion": "gemini-2.0-flash-001",
+     *   "createTime": "2025-05-28T15:08:20.049493Z",
+     *   "responseId": "5CY3aNWCA6mm4_UPr-zduAE"
+     * }
+     *    </code>
+     * </pre>
+     *
+     * @param request The original request made to the service.
+     **/
+    public static InferenceServiceResults fromResponse(Request request, HttpResult response) throws IOException {
+        var responseJson = new String(response.body(), StandardCharsets.UTF_8);
+
+        // Response from generateContent has the same shape as streamGenerateContent. We reuse the already implemented
+        // class to avoid code duplication
+
+        StreamingUnifiedChatCompletionResults.ChatCompletionChunk chunk;
+        try (
+            XContentParser parser = XContentFactory.xContent(XContentType.JSON)
+                .createParser(XContentParserConfiguration.EMPTY, responseJson)
+        ) {
+            parser.nextToken();
+            chunk = GoogleVertexAiUnifiedStreamingProcessor.GoogleVertexAiChatCompletionChunkParser.parse(parser);
+        }
+        var results = chunk.choices().stream().map(choice -> choice.delta().content()).map(ChatCompletionResults.Result::new).toList();
+
+        return new ChatCompletionResults(results);
+    }
+}
diff --git a/...org/elasticsearch/xpack/inference/services/googlevertexai/GoogleVertexAiServiceTests.java b/...org/elasticsearch/xpack/inference/services/googlevertexai/GoogleVertexAiServiceTests.java
@@ -974,7 +974,7 @@ public void testGetConfiguration() throws Exception {
                     {
                            "service": "googlevertexai",
                            "name": "Google Vertex AI",
-                           "task_types": ["text_embedding", "rerank", "chat_completion"],
+                           "task_types": ["text_embedding", "rerank", "completion", "chat_completion"],
                            "configurations": {
                                "service_account_json": {
                                    "description": "API Key for the provider you're connecting to.",
@@ -983,7 +983,7 @@ public void testGetConfiguration() throws Exception {
                                    "sensitive": true,
                                    "updatable": true,
                                    "type": "str",
-                                   "supported_task_types": ["text_embedding", "rerank", "chat_completion"]
+                                   "supported_task_types": ["text_embedding", "rerank", "completion", "chat_completion"]
                                },
                                "project_id": {
                                    "description": "The GCP Project ID which has Vertex AI API(s) enabled. For more information on the URL, refer to the {geminiVertexAIDocs}.",
@@ -992,7 +992,7 @@ public void testGetConfiguration() throws Exception {
                                    "sensitive": false,
                                    "updatable": false,
                                    "type": "str",
-                                   "supported_task_types": ["text_embedding", "rerank", "chat_completion"]
+                                   "supported_task_types": ["text_embedding", "rerank", "completion", "chat_completion"]
                                },
                                "location": {
                                    "description": "Please provide the GCP region where the Vertex AI API(s) is enabled. For more information, refer to the {geminiVertexAIDocs}.",
@@ -1001,7 +1001,7 @@ public void testGetConfiguration() throws Exception {
                                    "sensitive": false,
                                    "updatable": false,
                                    "type": "str",
-                                   "supported_task_types": ["text_embedding", "chat_completion"]
+                                   "supported_task_types": ["text_embedding", "completion", "chat_completion"]
                                },
                                "rate_limit.requests_per_minute": {
                                    "description": "Minimize the number of rate limit errors.",
@@ -1010,7 +1010,7 @@ public void testGetConfiguration() throws Exception {
                                    "sensitive": false,
                                    "updatable": false,
                                    "type": "int",
-                                   "supported_task_types": ["text_embedding", "rerank", "chat_completion"]
+                                   "supported_task_types": ["text_embedding", "rerank", "completion", "chat_completion"]
                                },
                                "model_id": {
                                    "description": "ID of the LLM you're using.",
@@ -1019,7 +1019,7 @@ public void testGetConfiguration() throws Exception {
                                    "sensitive": false,
                                    "updatable": false,
                                    "type": "str",
-                                   "supported_task_types": ["text_embedding", "rerank", "chat_completion"]
+                                   "supported_task_types": ["text_embedding", "rerank", "completion", "chat_completion"]
                                }
                            }
                        }

diff --git a/...pack/inference/services/googlevertexai/completion/GoogleVertexAiCompletionModelTests.java b/...pack/inference/services/googlevertexai/completion/GoogleVertexAiCompletionModelTests.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.inference.services.googlevertexai.completion;
+
+import org.elasticsearch.core.Strings;
+import org.elasticsearch.inference.TaskType;
+import org.elasticsearch.test.ESTestCase;
+import org.elasticsearch.xpack.inference.services.ConfigurationParseContext;
+
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.hamcrest.Matchers.*;
+
+public class GoogleVertexAiCompletionModelTests extends ESTestCase {
+
+    private static final String DEFAULT_PROJECT_ID = "test-project";
+    private static final String DEFAULT_LOCATION = "us-central1";
+    private static final String DEFAULT_MODEL_ID = "gemini-pro";
+
+    public void testCreateModel() throws URISyntaxException {
+        var model = createCompletionModel(DEFAULT_PROJECT_ID, DEFAULT_LOCATION, DEFAULT_MODEL_ID);
+        URI expectedUri = new URI(
+            Strings.format(
+                "https://%s-aiplatform.googleapis.com/v1/projects/%s" + "/locations/global/publishers/google/models/%s:generateContent",
+                DEFAULT_LOCATION,
+                DEFAULT_PROJECT_ID,
+                DEFAULT_MODEL_ID
+
+            )
+        );
+        assertThat(model.uri(), is(expectedUri));
+    }
+
+    private static GoogleVertexAiCompletionModel createCompletionModel(String projectId, String location, String modelId) {
+        return new GoogleVertexAiCompletionModel(
+            "google-vertex-ai-chat-test-id",
+            TaskType.CHAT_COMPLETION,
+            "google_vertex_ai",
+            new HashMap<>(Map.of("project_id", projectId, "location", location, "model_id", modelId)),
+            new HashMap<>(),
+            new HashMap<>(Map.of("service_account_json", "{}")),
+            ConfigurationParseContext.PERSISTENT
+        );
+    }
+}