elastic · jonathan-buttner · Jun 9, 2025 · May 30, 2025 · May 30, 2025 · May 30, 2025
diff --git a/docs/changelog/128694.yaml b/docs/changelog/128694.yaml
@@ -0,0 +1,5 @@
+pr: 128694
+summary: "Adding Google VertexAI completion integration"
+area: Inference
+type: enhancement
+issues: [ ]
diff --git a/.../xpack/inference/services/googlevertexai/GoogleVertexAiChatCompletionResponseHandler.java b/.../xpack/inference/services/googlevertexai/GoogleVertexAiChatCompletionResponseHandler.java
@@ -0,0 +1,22 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.inference.services.googlevertexai;
+
+import org.elasticsearch.xpack.inference.services.googlevertexai.response.GoogleVertexAiCompletionResponseEntity;
+
+public class GoogleVertexAiChatCompletionResponseHandler extends GoogleVertexAiResponseHandler {
+
+    public GoogleVertexAiChatCompletionResponseHandler(String requestType) {
+        super(
+            requestType,
+            GoogleVertexAiCompletionResponseEntity::fromResponse,
+            GoogleVertexAiUnifiedChatCompletionResponseHandler.GoogleVertexAiErrorResponse::fromResponse,
+            true
+        );
+    }
+}
diff --git a/.../elasticsearch/xpack/inference/services/googlevertexai/GoogleVertexAiResponseHandler.java b/.../elasticsearch/xpack/inference/services/googlevertexai/GoogleVertexAiResponseHandler.java
@@ -7,14 +7,19 @@
 
 package org.elasticsearch.xpack.inference.services.googlevertexai;
 
+import org.elasticsearch.inference.InferenceServiceResults;
+import org.elasticsearch.xpack.core.inference.results.StreamingChatCompletionResults;
 import org.elasticsearch.xpack.inference.external.http.HttpResult;
 import org.elasticsearch.xpack.inference.external.http.retry.BaseResponseHandler;
 import org.elasticsearch.xpack.inference.external.http.retry.ErrorResponse;
 import org.elasticsearch.xpack.inference.external.http.retry.ResponseParser;
 import org.elasticsearch.xpack.inference.external.http.retry.RetryException;
 import org.elasticsearch.xpack.inference.external.request.Request;
+import org.elasticsearch.xpack.inference.external.response.streaming.ServerSentEventParser;
+import org.elasticsearch.xpack.inference.external.response.streaming.ServerSentEventProcessor;
 import org.elasticsearch.xpack.inference.services.googlevertexai.response.GoogleVertexAiErrorResponseEntity;
 
+import java.util.concurrent.Flow;
 import java.util.function.Function;
 
 import static org.elasticsearch.core.Strings.format;
@@ -66,4 +71,14 @@ protected void checkForFailureStatusCode(Request request, HttpResult result) thr
     private static String resourceNotFoundError(Request request) {
         return format("Resource not found at [%s]", request.getURI());
     }
+
+    @Override
+    public InferenceServiceResults parseResult(Request request, Flow.Publisher<HttpResult> flow) {
+        var serverSentEventProcessor = new ServerSentEventProcessor(new ServerSentEventParser());
+        var googleVertexAiProcessor = new GoogleVertexAiStreamingProcessor();
+
+        flow.subscribe(serverSentEventProcessor);
+        serverSentEventProcessor.subscribe(googleVertexAiProcessor);
+        return new StreamingChatCompletionResults(googleVertexAiProcessor);
+    }
 }
diff --git a/...g/elasticsearch/xpack/inference/services/googlevertexai/GoogleVertexAiSecretSettings.java b/...g/elasticsearch/xpack/inference/services/googlevertexai/GoogleVertexAiSecretSettings.java
@@ -124,8 +124,9 @@ public static Map<String, SettingsConfiguration> get() {
                 var configurationMap = new HashMap<String, SettingsConfiguration>();
                 configurationMap.put(
                     SERVICE_ACCOUNT_JSON,
-                    new SettingsConfiguration.Builder(EnumSet.of(TaskType.TEXT_EMBEDDING, TaskType.RERANK, TaskType.CHAT_COMPLETION))
-                        .setDescription("API Key for the provider you're connecting to.")
+                    new SettingsConfiguration.Builder(
+                        EnumSet.of(TaskType.TEXT_EMBEDDING, TaskType.RERANK, TaskType.CHAT_COMPLETION, TaskType.COMPLETION)
+                    ).setDescription("API Key for the provider you're connecting to.")
                         .setLabel("Credentials JSON")
                         .setRequired(true)
                         .setSensitive(true)

diff --git a/...java/org/elasticsearch/xpack/inference/services/googlevertexai/GoogleVertexAiService.java b/...java/org/elasticsearch/xpack/inference/services/googlevertexai/GoogleVertexAiService.java
@@ -42,6 +42,7 @@
 import org.elasticsearch.xpack.inference.services.ServiceUtils;
 import org.elasticsearch.xpack.inference.services.googlevertexai.action.GoogleVertexAiActionCreator;
 import org.elasticsearch.xpack.inference.services.googlevertexai.completion.GoogleVertexAiChatCompletionModel;
+import org.elasticsearch.xpack.inference.services.googlevertexai.completion.GoogleVertexAiCompletionModel;
 import org.elasticsearch.xpack.inference.services.googlevertexai.embeddings.GoogleVertexAiEmbeddingsModel;
 import org.elasticsearch.xpack.inference.services.googlevertexai.embeddings.GoogleVertexAiEmbeddingsServiceSettings;
 import org.elasticsearch.xpack.inference.services.googlevertexai.request.GoogleVertexAiUnifiedChatCompletionRequest;
@@ -75,7 +76,8 @@ public class GoogleVertexAiService extends SenderService {
     private static final EnumSet<TaskType> supportedTaskTypes = EnumSet.of(
         TaskType.TEXT_EMBEDDING,
         TaskType.RERANK,
-        TaskType.CHAT_COMPLETION
+        TaskType.CHAT_COMPLETION,
+        TaskType.COMPLETION
     );
 
     public static final EnumSet<InputType> VALID_INPUT_TYPE_VALUES = EnumSet.of(
@@ -93,7 +95,7 @@ public class GoogleVertexAiService extends SenderService {
 
     @Override
     public Set<TaskType> supportedStreamingTasks() {
-        return EnumSet.of(TaskType.CHAT_COMPLETION);
+        return EnumSet.of(TaskType.CHAT_COMPLETION, TaskType.COMPLETION);
     }
 
     public GoogleVertexAiService(HttpRequestSender.Factory factory, ServiceComponents serviceComponents) {
@@ -368,6 +370,16 @@ private static GoogleVertexAiModel createModel(
                 context
             );
 
+            case COMPLETION -> new GoogleVertexAiCompletionModel(
+                inferenceEntityId,
+                taskType,
+                NAME,
+                serviceSettings,
+                taskSettings,
+                secretSettings,
+                context
+            );
+
             default -> throw new ElasticsearchStatusException(failureMessage, RestStatus.BAD_REQUEST);
         };
     }
@@ -396,10 +408,11 @@ public static InferenceServiceConfiguration get() {
 
                 configurationMap.put(
                     LOCATION,
-                    new SettingsConfiguration.Builder(EnumSet.of(TaskType.TEXT_EMBEDDING, TaskType.CHAT_COMPLETION)).setDescription(
-                        "Please provide the GCP region where the Vertex AI API(s) is enabled. "
-                            + "For more information, refer to the {geminiVertexAIDocs}."
-                    )
+                    new SettingsConfiguration.Builder(EnumSet.of(TaskType.TEXT_EMBEDDING, TaskType.CHAT_COMPLETION, TaskType.COMPLETION))
+                        .setDescription(
+                            "Please provide the GCP region where the Vertex AI API(s) is enabled. "
+                                + "For more information, refer to the {geminiVertexAIDocs}."
+                        )
                         .setLabel("GCP Region")
                         .setRequired(true)
                         .setSensitive(false)

diff --git a/...asticsearch/xpack/inference/services/googlevertexai/GoogleVertexAiStreamingProcessor.java b/...asticsearch/xpack/inference/services/googlevertexai/GoogleVertexAiStreamingProcessor.java
@@ -0,0 +1,69 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.inference.services.googlevertexai;
+
+import org.elasticsearch.ElasticsearchStatusException;
+import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
+import org.elasticsearch.inference.InferenceServiceResults;
+import org.elasticsearch.rest.RestStatus;
+import org.elasticsearch.xcontent.XContentFactory;
+import org.elasticsearch.xcontent.XContentParser;
+import org.elasticsearch.xcontent.XContentParserConfiguration;
+import org.elasticsearch.xcontent.XContentType;
+import org.elasticsearch.xpack.core.inference.results.StreamingChatCompletionResults;
+import org.elasticsearch.xpack.inference.common.DelegatingProcessor;
+import org.elasticsearch.xpack.inference.external.response.streaming.ServerSentEvent;
+
+import java.io.IOException;
+import java.util.Deque;
+import java.util.Objects;
+import java.util.stream.Stream;
+
+import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken;
+import static org.elasticsearch.xpack.inference.external.response.XContentUtils.moveToFirstToken;
+
+public class GoogleVertexAiStreamingProcessor extends DelegatingProcessor<Deque<ServerSentEvent>, InferenceServiceResults.Result> {
+
+    @Override
+    protected void next(Deque<ServerSentEvent> item) throws Exception {
+        var parserConfig = XContentParserConfiguration.EMPTY.withDeprecationHandler(LoggingDeprecationHandler.INSTANCE);
+        var results = parseEvent(item, GoogleVertexAiStreamingProcessor::parse, parserConfig);
+
+        if (results.isEmpty()) {
+            upstream().request(1);
+        } else {
+            downstream().onNext(new StreamingChatCompletionResults.Results(results));
+        }
+    }
+
+    public static Stream<StreamingChatCompletionResults.Result> parse(XContentParserConfiguration parserConfig, ServerSentEvent event) {
+        String data = event.data();
+        try (XContentParser jsonParser = XContentFactory.xContent(XContentType.JSON).createParser(parserConfig, data)) {
+            moveToFirstToken(jsonParser);
+            ensureExpectedToken(XContentParser.Token.START_OBJECT, jsonParser.currentToken(), jsonParser);
+
+            var chunk = GoogleVertexAiUnifiedStreamingProcessor.GoogleVertexAiChatCompletionChunkParser.parse(jsonParser);
+
+            return chunk.choices()
+                .stream()
+                .map(choice -> choice.delta())
+                .filter(Objects::nonNull)
+                .map(delta -> delta.content())
+                .filter(content -> content != null && content.isEmpty() == false)
+                .map(StreamingChatCompletionResults.Result::new);
+
+        } catch (IOException e) {
+            throw new ElasticsearchStatusException(
+                "Failed to parse event from inference provider: {}",
+                RestStatus.INTERNAL_SERVER_ERROR,
+                e,
+                event
+            );
+        }
+    }
+}
diff --git a/...inference/services/googlevertexai/GoogleVertexAiUnifiedChatCompletionResponseHandler.java b/...inference/services/googlevertexai/GoogleVertexAiUnifiedChatCompletionResponseHandler.java
@@ -23,10 +23,10 @@
 import org.elasticsearch.xpack.core.inference.results.UnifiedChatCompletionException;
 import org.elasticsearch.xpack.inference.external.http.HttpResult;
 import org.elasticsearch.xpack.inference.external.http.retry.ErrorResponse;
-import org.elasticsearch.xpack.inference.external.http.retry.ResponseParser;
 import org.elasticsearch.xpack.inference.external.request.Request;
 import org.elasticsearch.xpack.inference.external.response.streaming.ServerSentEventParser;
 import org.elasticsearch.xpack.inference.external.response.streaming.ServerSentEventProcessor;
+import org.elasticsearch.xpack.inference.services.googlevertexai.response.GoogleVertexAiCompletionResponseEntity;
 
 import java.nio.charset.StandardCharsets;
 import java.util.Locale;
@@ -43,10 +43,8 @@ public class GoogleVertexAiUnifiedChatCompletionResponseHandler extends GoogleVe
     private static final String ERROR_MESSAGE_FIELD = "message";
     private static final String ERROR_STATUS_FIELD = "status";
 
-    private static final ResponseParser noopParseFunction = (a, b) -> null;
-
     public GoogleVertexAiUnifiedChatCompletionResponseHandler(String requestType) {
-        super(requestType, noopParseFunction, GoogleVertexAiErrorResponse::fromResponse, true);
+        super(requestType, GoogleVertexAiCompletionResponseEntity::fromResponse, GoogleVertexAiErrorResponse::fromResponse, true);
     }
 
     @Override
@@ -64,6 +62,7 @@ public InferenceServiceResults parseResult(Request request, Flow.Publisher<HttpR
     @Override
     protected Exception buildError(String message, Request request, HttpResult result, ErrorResponse errorResponse) {
         assert request.isStreaming() : "Only streaming requests support this format";
+
         var responseStatusCode = result.response().getStatusLine().getStatusCode();
         var errorMessage = errorMessage(message, request, result, errorResponse, responseStatusCode);
         var restStatus = toRestStatus(responseStatusCode);
@@ -111,7 +110,7 @@ private static Exception buildMidStreamError(Request request, String message, Ex
         }
     }
 
-    private static class GoogleVertexAiErrorResponse extends ErrorResponse {
+    public static class GoogleVertexAiErrorResponse extends ErrorResponse {
         private static final Logger logger = LogManager.getLogger(GoogleVertexAiErrorResponse.class);
         private static final ConstructingObjectParser<Optional<ErrorResponse>, Void> ERROR_PARSER = new ConstructingObjectParser<>(
             "google_vertex_ai_error_wrapper",
@@ -138,7 +137,7 @@ private static class GoogleVertexAiErrorResponse extends ErrorResponse {
             );
         }
 
-        static ErrorResponse fromResponse(HttpResult response) {
+        public static ErrorResponse fromResponse(HttpResult response) {
             try (
                 XContentParser parser = XContentFactory.xContent(XContentType.JSON)
                     .createParser(XContentParserConfiguration.EMPTY, response.body())

diff --git a/...ticsearch/xpack/inference/services/googlevertexai/action/GoogleVertexAiActionCreator.java b/...ticsearch/xpack/inference/services/googlevertexai/action/GoogleVertexAiActionCreator.java
@@ -7,6 +7,8 @@
 
 package org.elasticsearch.xpack.inference.services.googlevertexai.action;
 
+import org.elasticsearch.ElasticsearchStatusException;
+import org.elasticsearch.rest.RestStatus;
 import org.elasticsearch.xpack.inference.external.action.ExecutableAction;
 import org.elasticsearch.xpack.inference.external.action.SenderExecutableAction;
 import org.elasticsearch.xpack.inference.external.action.SingleInputSenderExecutableAction;
@@ -16,14 +18,17 @@
 import org.elasticsearch.xpack.inference.external.http.sender.Sender;
 import org.elasticsearch.xpack.inference.external.http.sender.UnifiedChatInput;
 import org.elasticsearch.xpack.inference.services.ServiceComponents;
+import org.elasticsearch.xpack.inference.services.googlevertexai.GoogleVertexAiChatCompletionResponseHandler;
 import org.elasticsearch.xpack.inference.services.googlevertexai.GoogleVertexAiEmbeddingsRequestManager;
 import org.elasticsearch.xpack.inference.services.googlevertexai.GoogleVertexAiRerankRequestManager;
 import org.elasticsearch.xpack.inference.services.googlevertexai.GoogleVertexAiUnifiedChatCompletionResponseHandler;
 import org.elasticsearch.xpack.inference.services.googlevertexai.completion.GoogleVertexAiChatCompletionModel;
+import org.elasticsearch.xpack.inference.services.googlevertexai.completion.GoogleVertexAiCompletionModel;
 import org.elasticsearch.xpack.inference.services.googlevertexai.embeddings.GoogleVertexAiEmbeddingsModel;
 import org.elasticsearch.xpack.inference.services.googlevertexai.request.GoogleVertexAiUnifiedChatCompletionRequest;
 import org.elasticsearch.xpack.inference.services.googlevertexai.rerank.GoogleVertexAiRerankModel;
 
+import java.net.URISyntaxException;
 import java.util.Map;
 import java.util.Objects;
 
@@ -36,9 +41,10 @@ public class GoogleVertexAiActionCreator implements GoogleVertexAiActionVisitor
 
     private final ServiceComponents serviceComponents;
 
-    static final ResponseHandler COMPLETION_HANDLER = new GoogleVertexAiUnifiedChatCompletionResponseHandler(
+    static final ResponseHandler UNIFIED_CHAT_COMPLETION_HANDLER = new GoogleVertexAiUnifiedChatCompletionResponseHandler(
         "Google VertexAI chat completion"
     );
+    static final ResponseHandler CHAT_COMPLETION_HANDLER = new GoogleVertexAiChatCompletionResponseHandler("Google VertexAI completion");
     static final String USER_ROLE = "user";
 
     public GoogleVertexAiActionCreator(Sender sender, ServiceComponents serviceComponents) {
@@ -72,11 +78,31 @@ public ExecutableAction create(GoogleVertexAiChatCompletionModel model, Map<Stri
         var manager = new GenericRequestManager<>(
             serviceComponents.threadPool(),
             model,
-            COMPLETION_HANDLER,
+            UNIFIED_CHAT_COMPLETION_HANDLER,
             inputs -> new GoogleVertexAiUnifiedChatCompletionRequest(new UnifiedChatInput(inputs, USER_ROLE), model),
             ChatCompletionInput.class
         );
 
         return new SingleInputSenderExecutableAction(sender, manager, failedToSendRequestErrorMessage, COMPLETION_ERROR_PREFIX);
     }
+
+    @Override
+    public ExecutableAction create(GoogleVertexAiCompletionModel model, Map<String, Object> taskSettings) {
+        var failedToSendRequestErrorMessage = constructFailedToSendRequestMessage(COMPLETION_ERROR_PREFIX);
+
+        var manager = new GenericRequestManager<>(serviceComponents.threadPool(), model, CHAT_COMPLETION_HANDLER, inputs -> {
+            try {
+                model.updateUri(inputs.stream());
+            } catch (URISyntaxException e) {
+                throw new ElasticsearchStatusException(
+                    "Error constructing URI for Google VertexAI completion",
+                    RestStatus.INTERNAL_SERVER_ERROR,
+                    e
+                );
+            }
+            return new GoogleVertexAiUnifiedChatCompletionRequest(new UnifiedChatInput(inputs, USER_ROLE), model);
+        }, ChatCompletionInput.class);
+
+        return new SingleInputSenderExecutableAction(sender, manager, failedToSendRequestErrorMessage, COMPLETION_ERROR_PREFIX);
+    }
 }
diff --git a/...ticsearch/xpack/inference/services/googlevertexai/action/GoogleVertexAiActionVisitor.java b/...ticsearch/xpack/inference/services/googlevertexai/action/GoogleVertexAiActionVisitor.java
@@ -9,6 +9,7 @@
 
 import org.elasticsearch.xpack.inference.external.action.ExecutableAction;
 import org.elasticsearch.xpack.inference.services.googlevertexai.completion.GoogleVertexAiChatCompletionModel;
+import org.elasticsearch.xpack.inference.services.googlevertexai.completion.GoogleVertexAiCompletionModel;
 import org.elasticsearch.xpack.inference.services.googlevertexai.embeddings.GoogleVertexAiEmbeddingsModel;
 import org.elasticsearch.xpack.inference.services.googlevertexai.rerank.GoogleVertexAiRerankModel;
 
@@ -21,4 +22,6 @@ public interface GoogleVertexAiActionVisitor {
     ExecutableAction create(GoogleVertexAiRerankModel model, Map<String, Object> taskSettings);
 
     ExecutableAction create(GoogleVertexAiChatCompletionModel model, Map<String, Object> taskSettings);
+
+    ExecutableAction create(GoogleVertexAiCompletionModel model, Map<String, Object> taskSettings);
 }