Google Vertex AI completion model, response entity and tests

leo-hoet · leo-hoet · commit a9df8e333554 · 2025-05-30T11:07:11.000-03:00
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/GoogleVertexAiService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/GoogleVertexAiService.java
@@ -42,6 +42,7 @@
 import org.elasticsearch.xpack.inference.services.ServiceUtils;
 import org.elasticsearch.xpack.inference.services.googlevertexai.action.GoogleVertexAiActionCreator;
 import org.elasticsearch.xpack.inference.services.googlevertexai.completion.GoogleVertexAiChatCompletionModel;
+import org.elasticsearch.xpack.inference.services.googlevertexai.completion.GoogleVertexAiCompletionModel;
 import org.elasticsearch.xpack.inference.services.googlevertexai.embeddings.GoogleVertexAiEmbeddingsModel;
 import org.elasticsearch.xpack.inference.services.googlevertexai.embeddings.GoogleVertexAiEmbeddingsServiceSettings;
 import org.elasticsearch.xpack.inference.services.googlevertexai.request.GoogleVertexAiUnifiedChatCompletionRequest;
@@ -75,7 +76,8 @@ public class GoogleVertexAiService extends SenderService {
     private static final EnumSet<TaskType> supportedTaskTypes = EnumSet.of(
         TaskType.TEXT_EMBEDDING,
         TaskType.RERANK,
-        TaskType.CHAT_COMPLETION
+        TaskType.CHAT_COMPLETION,
+        TaskType.COMPLETION
     );
 
     public static final EnumSet<InputType> VALID_INPUT_TYPE_VALUES = EnumSet.of(
@@ -220,11 +222,11 @@ protected void doInfer(
             return;
         }
 
-        GoogleVertexAiModel googleVertexAiModel = (GoogleVertexAiModel) model;
+        var completionModel = (GoogleVertexAiCompletionModel) model;
 
         var actionCreator = new GoogleVertexAiActionCreator(getSender(), getServiceComponents());
 
-        var action = googleVertexAiModel.accept(actionCreator, taskSettings);
+        var action = completionModel.accept(actionCreator, taskSettings);
         action.execute(inputs, timeout, listener);
     }
 
@@ -368,6 +370,16 @@ private static GoogleVertexAiModel createModel(
                 context
             );
 
+            case COMPLETION -> new GoogleVertexAiCompletionModel(
+                inferenceEntityId,
+                taskType,
+                NAME,
+                serviceSettings,
+                taskSettings,
+                secretSettings,
+                context
+            );
+
             default -> throw new ElasticsearchStatusException(failureMessage, RestStatus.BAD_REQUEST);
         };
     }
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/GoogleVertexAiUnifiedChatCompletionResponseHandler.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/GoogleVertexAiUnifiedChatCompletionResponseHandler.java
@@ -23,10 +23,10 @@
 import org.elasticsearch.xpack.core.inference.results.UnifiedChatCompletionException;
 import org.elasticsearch.xpack.inference.external.http.HttpResult;
 import org.elasticsearch.xpack.inference.external.http.retry.ErrorResponse;
-import org.elasticsearch.xpack.inference.external.http.retry.ResponseParser;
 import org.elasticsearch.xpack.inference.external.request.Request;
 import org.elasticsearch.xpack.inference.external.response.streaming.ServerSentEventParser;
 import org.elasticsearch.xpack.inference.external.response.streaming.ServerSentEventProcessor;
+import org.elasticsearch.xpack.inference.services.googlevertexai.response.GoogleVertexAiCompletionResponseEntity;
 
 import java.nio.charset.StandardCharsets;
 import java.util.Locale;
@@ -43,10 +43,8 @@ public class GoogleVertexAiUnifiedChatCompletionResponseHandler extends GoogleVe
     private static final String ERROR_MESSAGE_FIELD = "message";
     private static final String ERROR_STATUS_FIELD = "status";
 
-    private static final ResponseParser noopParseFunction = (a, b) -> null;
-
     public GoogleVertexAiUnifiedChatCompletionResponseHandler(String requestType) {
-        super(requestType, noopParseFunction, GoogleVertexAiErrorResponse::fromResponse, true);
+        super(requestType, GoogleVertexAiCompletionResponseEntity::fromResponse, GoogleVertexAiErrorResponse::fromResponse, true);
     }
 
     @Override
@@ -63,7 +61,6 @@ public InferenceServiceResults parseResult(Request request, Flow.Publisher<HttpR
 
     @Override
     protected Exception buildError(String message, Request request, HttpResult result, ErrorResponse errorResponse) {
-        assert request.isStreaming() : "Only streaming requests support this format";
         var responseStatusCode = result.response().getStatusLine().getStatusCode();
         var errorMessage = errorMessage(message, request, result, errorResponse, responseStatusCode);
         var restStatus = toRestStatus(responseStatusCode);
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/completion/GoogleVertexAiCompletionModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/completion/GoogleVertexAiCompletionModel.java
@@ -0,0 +1,58 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.inference.services.googlevertexai.completion;
+
+import org.apache.http.client.utils.URIBuilder;
+import org.elasticsearch.inference.TaskType;
+import org.elasticsearch.xpack.inference.services.ConfigurationParseContext;
+import org.elasticsearch.xpack.inference.services.googlevertexai.request.GoogleVertexAiUtils;
+
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.Map;
+
+import static org.elasticsearch.core.Strings.format;
+
+public class GoogleVertexAiCompletionModel extends GoogleVertexAiChatCompletionModel {
+
+    public GoogleVertexAiCompletionModel(
+        String inferenceEntityId,
+        TaskType taskType,
+        String service,
+        Map<String, Object> serviceSettings,
+        Map<String, Object> taskSettings,
+        Map<String, Object> secrets,
+        ConfigurationParseContext context
+    ) {
+        super(inferenceEntityId, taskType, service, serviceSettings, taskSettings, secrets, context);
+        try {
+            var modelServiceSettings = this.getServiceSettings();
+            this.uri = buildUri(modelServiceSettings.location(), modelServiceSettings.projectId(), modelServiceSettings.modelId());
+        } catch (URISyntaxException e) {
+            throw new RuntimeException(e);
+        }
+
+    }
+
+    public static URI buildUri(String location, String projectId, String model) throws URISyntaxException {
+        return new URIBuilder().setScheme("https")
+            .setHost(format("%s%s", location, GoogleVertexAiUtils.GOOGLE_VERTEX_AI_HOST_SUFFIX))
+            .setPathSegments(
+                GoogleVertexAiUtils.V1,
+                GoogleVertexAiUtils.PROJECTS,
+                projectId,
+                GoogleVertexAiUtils.LOCATIONS,
+                GoogleVertexAiUtils.GLOBAL,
+                GoogleVertexAiUtils.PUBLISHERS,
+                GoogleVertexAiUtils.PUBLISHER_GOOGLE,
+                GoogleVertexAiUtils.MODELS,
+                format("%s:%s", model, GoogleVertexAiUtils.GENERATE_CONTENT)
+            )
+            .build();
+    }
+}
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/request/GoogleVertexAiUtils.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/request/GoogleVertexAiUtils.java
@@ -37,6 +37,8 @@ public final class GoogleVertexAiUtils {
 
     public static final String STREAM_GENERATE_CONTENT = "streamGenerateContent";
 
+    public static final String GENERATE_CONTENT = "generateContent";
+
     public static final String QUERY_PARAM_ALT_SSE = "alt=sse";
 
     private GoogleVertexAiUtils() {}
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/response/GoogleVertexAiCompletionResponseEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/response/GoogleVertexAiCompletionResponseEntity.java
@@ -0,0 +1,101 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.inference.services.googlevertexai.response;
+
+import org.elasticsearch.inference.InferenceServiceResults;
+import org.elasticsearch.xcontent.XContentFactory;
+import org.elasticsearch.xcontent.XContentParser;
+import org.elasticsearch.xcontent.XContentParserConfiguration;
+import org.elasticsearch.xcontent.XContentType;
+import org.elasticsearch.xpack.core.inference.results.ChatCompletionResults;
+import org.elasticsearch.xpack.core.inference.results.StreamingUnifiedChatCompletionResults;
+import org.elasticsearch.xpack.inference.external.http.HttpResult;
+import org.elasticsearch.xpack.inference.external.request.Request;
+import org.elasticsearch.xpack.inference.services.googlevertexai.GoogleVertexAiUnifiedStreamingProcessor;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+
+public class GoogleVertexAiCompletionResponseEntity {
+    /**
+     * Parses the response from Google Vertex AI's generateContent endpoint
+     * For a request like:
+     * <pre>
+     *     <code>
+     *         {
+     *             "inputs": "Please summarize this text: some text"
+     *         }
+     *     </code>
+     * </pre>
+     *
+     * The response is a <a href="https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/GenerateContentResponse">GenerateContentResponse</a> objects that looks like:
+     *
+     * <pre>
+     *     <code>
+     *
+     * {
+     *   "candidates": [
+     *     {
+     *       "content": {
+     *         "role": "model",
+     *         "parts": [
+     *           {
+     *             "text": "I am sorry, I cannot summarize the text because I do not have access to the text you are referring to."
+     *           }
+     *         ]
+     *       },
+     *       "finishReason": "STOP",
+     *       "avgLogprobs": -0.19326641248620074
+     *     }
+     *   ],
+     *   "usageMetadata": {
+     *     "promptTokenCount": 71,
+     *     "candidatesTokenCount": 23,
+     *     "totalTokenCount": 94,
+     *     "trafficType": "ON_DEMAND",
+     *     "promptTokensDetails": [
+     *       {
+     *         "modality": "TEXT",
+     *         "tokenCount": 71
+     *       }
+     *     ],
+     *     "candidatesTokensDetails": [
+     *       {
+     *         "modality": "TEXT",
+     *         "tokenCount": 23
+     *       }
+     *     ]
+     *   },
+     *   "modelVersion": "gemini-2.0-flash-001",
+     *   "createTime": "2025-05-28T15:08:20.049493Z",
+     *   "responseId": "5CY3aNWCA6mm4_UPr-zduAE"
+     * }
+     *    </code>
+     * </pre>
+     *
+     * @param request The original request made to the service.
+     **/
+    public static InferenceServiceResults fromResponse(Request request, HttpResult response) throws IOException {
+        var responseJson = new String(response.body(), StandardCharsets.UTF_8);
+
+        // Response from generateContent has the same shape as streamGenerateContent. We reuse the already implemented
+        // class to avoid code duplication
+
+        StreamingUnifiedChatCompletionResults.ChatCompletionChunk chunk;
+        try (
+            XContentParser parser = XContentFactory.xContent(XContentType.JSON)
+                .createParser(XContentParserConfiguration.EMPTY, responseJson)
+        ) {
+            parser.nextToken();
+            chunk = GoogleVertexAiUnifiedStreamingProcessor.GoogleVertexAiChatCompletionChunkParser.parse(parser);
+        }
+        var results = chunk.choices().stream().map(choice -> choice.delta().content()).map(ChatCompletionResults.Result::new).toList();
+
+        return new ChatCompletionResults(results);
+    }
+}
diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/googlevertexai/completion/GoogleVertexAiCompletionModelTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/googlevertexai/completion/GoogleVertexAiCompletionModelTests.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.inference.services.googlevertexai.completion;
+
+import org.elasticsearch.core.Strings;
+import org.elasticsearch.inference.TaskType;
+import org.elasticsearch.test.ESTestCase;
+import org.elasticsearch.xpack.inference.services.ConfigurationParseContext;
+
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.hamcrest.Matchers.*;
+
+public class GoogleVertexAiCompletionModelTests extends ESTestCase {
+
+    private static final String DEFAULT_PROJECT_ID = "test-project";
+    private static final String DEFAULT_LOCATION = "us-central1";
+    private static final String DEFAULT_MODEL_ID = "gemini-pro";
+
+    public void testCreateModel() throws URISyntaxException {
+        var model = createCompletionModel(DEFAULT_PROJECT_ID, DEFAULT_LOCATION, DEFAULT_MODEL_ID);
+        URI expectedUri = new URI(
+            Strings.format(
+                "https://%s-aiplatform.googleapis.com/v1/projects/%s" + "/locations/global/publishers/google/models/%s:generateContent",
+                DEFAULT_LOCATION,
+                DEFAULT_PROJECT_ID,
+                DEFAULT_MODEL_ID
+
+            )
+        );
+        assertThat(model.uri(), is(expectedUri));
+    }
+
+    private static GoogleVertexAiCompletionModel createCompletionModel(String projectId, String location, String modelId) {
+        return new GoogleVertexAiCompletionModel(
+            "google-vertex-ai-chat-test-id",
+            TaskType.CHAT_COMPLETION,
+            "google_vertex_ai",
+            new HashMap<>(Map.of("project_id", projectId, "location", location, "model_id", modelId)),
+            new HashMap<>(),
+            new HashMap<>(Map.of("service_account_json", "{}")),
+            ConfigurationParseContext.PERSISTENT
+        );
+    }
+}
diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/googlevertexai/response/GoogleVertexAiCompletionResponseEntityTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/googlevertexai/response/GoogleVertexAiCompletionResponseEntityTests.java
@@ -0,0 +1,80 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.inference.services.googlevertexai.response;
+
+import org.apache.http.HttpResponse;
+import org.elasticsearch.core.Strings;
+import org.elasticsearch.test.ESTestCase;
+import org.elasticsearch.xpack.core.inference.results.ChatCompletionResults;
+import org.elasticsearch.xpack.inference.external.http.HttpResult;
+import org.elasticsearch.xpack.inference.external.request.Request;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+
+import static org.hamcrest.Matchers.is;
+import static org.mockito.Mockito.mock;
+
+public class GoogleVertexAiCompletionResponseEntityTests extends ESTestCase {
+
+    public void testFromResponse_Javadoc() throws IOException {
+        var responseText = "I am sorry, I cannot summarize the text because I do not have access to the text you are referring to.";
+
+        String responseJson = Strings.format("""
+              {
+              "candidates": [
+                {
+                  "content": {
+                    "role": "model",
+                    "parts": [
+                      {
+                        "text": "%s"
+                      }
+                    ]
+                  },
+                  "finishReason": "STOP",
+                  "avgLogprobs": -0.19326641248620074
+                }
+              ],
+              "usageMetadata": {
+                "promptTokenCount": 71,
+                "candidatesTokenCount": 23,
+                "totalTokenCount": 94,
+                "trafficType": "ON_DEMAND",
+                "promptTokensDetails": [
+                  {
+                    "modality": "TEXT",
+                    "tokenCount": 71
+                  }
+                ],
+                "candidatesTokensDetails": [
+                  {
+                    "modality": "TEXT",
+                    "tokenCount": 23
+                  }
+                ]
+              },
+              "modelVersion": "gemini-2.0-flash-001",
+              "createTime": "2025-05-28T15:08:20.049493Z",
+              "responseId": "5CY3aNWCA6mm4_UPr-zduAE"
+            }
+            """, responseText);
+
+        var parsedResults = GoogleVertexAiCompletionResponseEntity.fromResponse(
+            mock(Request.class),
+            new HttpResult(mock(HttpResponse.class), responseJson.getBytes(StandardCharsets.UTF_8))
+        );
+
+        assert parsedResults instanceof ChatCompletionResults;
+        var results = (ChatCompletionResults) parsedResults;
+
+        assertThat(results.isStreaming(), is(false));
+        assertThat(results.results().size(), is(1));
+        assertThat(results.results().get(0).content(), is(responseText));
+    }
+}