elastic
diff --git a/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceNamedWriteablesProvider.java‎
Lines changed: 9 additions & 0 deletions b/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceNamedWriteablesProvider.java‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/action/GoogleVertexAiActionCreator.java‎
Lines changed: 3 additions & 2 deletions b/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/action/GoogleVertexAiActionCreator.java‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/completion/GoogleVertexAiChatCompletionModel.java‎
Lines changed: 33 additions & 7 deletions b/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/completion/GoogleVertexAiChatCompletionModel.java‎
Lines changed: 33 additions & 7 deletions
diff --git a/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/completion/GoogleVertexAiChatCompletionServiceSettings.java‎
Lines changed: 5 additions & 30 deletions b/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/completion/GoogleVertexAiChatCompletionServiceSettings.java‎
Lines changed: 5 additions & 30 deletions
diff --git a/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/completion/GoogleVertexAiChatCompletionTaskSettings.java‎
Lines changed: 129 additions & 0 deletions b/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/completion/GoogleVertexAiChatCompletionTaskSettings.java‎
Lines changed: 129 additions & 0 deletions
@@ -89,6 +89,7 @@
 import org.elasticsearch.xpack.inference.services.googleaistudio.embeddings.GoogleAiStudioEmbeddingsServiceSettings;
 import org.elasticsearch.xpack.inference.services.googlevertexai.GoogleVertexAiSecretSettings;
 import org.elasticsearch.xpack.inference.services.googlevertexai.completion.GoogleVertexAiChatCompletionServiceSettings;
+import org.elasticsearch.xpack.inference.services.googlevertexai.completion.GoogleVertexAiChatCompletionTaskSettings;
 import org.elasticsearch.xpack.inference.services.googlevertexai.embeddings.GoogleVertexAiEmbeddingsServiceSettings;
 import org.elasticsearch.xpack.inference.services.googlevertexai.embeddings.GoogleVertexAiEmbeddingsTaskSettings;
 import org.elasticsearch.xpack.inference.services.googlevertexai.rerank.GoogleVertexAiRerankServiceSettings;
@@ -570,6 +571,14 @@ private static void addGoogleVertexAiNamedWriteables(List<NamedWriteableRegistry
             )
         );
 
+        namedWriteables.add(
+            new NamedWriteableRegistry.Entry(
+                TaskSettings.class,
+                GoogleVertexAiChatCompletionTaskSettings.NAME,
+                GoogleVertexAiChatCompletionTaskSettings::new
+            )
+        );
+
     }
 
     private static void addInternalNamedWriteables(List<NamedWriteableRegistry.Entry> namedWriteables) {
 
@@ -73,13 +73,14 @@ public ExecutableAction create(GoogleVertexAiRerankModel model, Map<String, Obje
 
     @Override
     public ExecutableAction create(GoogleVertexAiChatCompletionModel model, Map<String, Object> taskSettings) {
+        var overriddenModel = GoogleVertexAiChatCompletionModel.of(model, taskSettings);
         var failedToSendRequestErrorMessage = constructFailedToSendRequestMessage(COMPLETION_ERROR_PREFIX);
 
         var manager = new GenericRequestManager<>(
             serviceComponents.threadPool(),
-            model,
+            overriddenModel,
             CHAT_COMPLETION_HANDLER,
-            inputs -> new GoogleVertexAiUnifiedChatCompletionRequest(new UnifiedChatInput(inputs, USER_ROLE), model),
+            inputs -> new GoogleVertexAiUnifiedChatCompletionRequest(new UnifiedChatInput(inputs, USER_ROLE), overriddenModel),
             ChatCompletionInput.class
         );
 
 
@@ -9,7 +9,6 @@
 
 import org.apache.http.client.utils.URIBuilder;
 import org.elasticsearch.core.Nullable;
-import org.elasticsearch.inference.EmptyTaskSettings;
 import org.elasticsearch.inference.ModelConfigurations;
 import org.elasticsearch.inference.ModelSecrets;
 import org.elasticsearch.inference.TaskType;
@@ -47,7 +46,7 @@ public GoogleVertexAiChatCompletionModel(
             taskType,
             service,
             GoogleVertexAiChatCompletionServiceSettings.fromMap(serviceSettings, context),
-            new EmptyTaskSettings(),
+            GoogleVertexAiChatCompletionTaskSettings.fromMap(taskSettings),
             GoogleVertexAiSecretSettings.fromMap(secrets)
         );
     }
@@ -57,7 +56,7 @@ public GoogleVertexAiChatCompletionModel(
         TaskType taskType,
         String service,
         GoogleVertexAiChatCompletionServiceSettings serviceSettings,
-        EmptyTaskSettings taskSettings,
+        GoogleVertexAiChatCompletionTaskSettings taskSettings,
         @Nullable GoogleVertexAiSecretSettings secrets
     ) {
         super(
@@ -73,15 +72,22 @@ public GoogleVertexAiChatCompletionModel(
         }
     }
 
+    private GoogleVertexAiChatCompletionModel(
+        GoogleVertexAiChatCompletionModel model,
+        GoogleVertexAiChatCompletionTaskSettings taskSettings
+    ) {
+        super(model, taskSettings);
+        streamingURI = model.streamingURI();
+    }
+
     public static GoogleVertexAiChatCompletionModel of(GoogleVertexAiChatCompletionModel model, UnifiedCompletionRequest request) {
         var originalModelServiceSettings = model.getServiceSettings();
 
         var newServiceSettings = new GoogleVertexAiChatCompletionServiceSettings(
             originalModelServiceSettings.projectId(),
             originalModelServiceSettings.location(),
             Objects.requireNonNullElse(request.model(), originalModelServiceSettings.modelId()),
-            originalModelServiceSettings.rateLimitSettings(),
-            originalModelServiceSettings.thinkingConfig()
+            originalModelServiceSettings.rateLimitSettings()
         );
 
         return new GoogleVertexAiChatCompletionModel(
@@ -94,6 +100,26 @@ public static GoogleVertexAiChatCompletionModel of(GoogleVertexAiChatCompletionM
         );
     }
 
+    /**
+     * Overrides the task settings in the given model with the settings in the map. If no new settings are present or the provided settings
+     * do not differ from those already in the model, returns the original model
+     * @param model the model whose task settings will be overridden
+     * @param taskSettingsMap the new task settings to use
+     * @return a {@link GoogleVertexAiChatCompletionModel} with overridden {@link GoogleVertexAiChatCompletionTaskSettings}
+     */
+    public static GoogleVertexAiChatCompletionModel of(GoogleVertexAiChatCompletionModel model, Map<String, Object> taskSettingsMap) {
+        if (taskSettingsMap == null || taskSettingsMap.isEmpty()) {
+            return model;
+        }
+
+        var requestTaskSettings = GoogleVertexAiChatCompletionTaskSettings.fromMap(taskSettingsMap);
+        if (requestTaskSettings.isEmpty() || model.getTaskSettings().equals(requestTaskSettings)) {
+            return model;
+        }
+        var combinedTaskSettings = GoogleVertexAiChatCompletionTaskSettings.of(model.getTaskSettings(), requestTaskSettings);
+        return new GoogleVertexAiChatCompletionModel(model, combinedTaskSettings);
+    }
+
     @Override
     public ExecutableAction accept(GoogleVertexAiActionVisitor visitor, Map<String, Object> taskSettings) {
         return visitor.create(this, taskSettings);
@@ -110,8 +136,8 @@ public GoogleVertexAiChatCompletionServiceSettings getServiceSettings() {
     }
 
     @Override
-    public EmptyTaskSettings getTaskSettings() {
-        return (EmptyTaskSettings) super.getTaskSettings();
+    public GoogleVertexAiChatCompletionTaskSettings getTaskSettings() {
+        return (GoogleVertexAiChatCompletionTaskSettings) super.getTaskSettings();
     }
 
     @Override
 
@@ -45,23 +45,12 @@ public class GoogleVertexAiChatCompletionServiceSettings extends FilteredXConten
     private final String projectId;
 
     private final RateLimitSettings rateLimitSettings;
-    private final ThinkingConfig thinkingConfig;
 
     // https://cloud.google.com/vertex-ai/docs/quotas#eval-quotas
     private static final RateLimitSettings DEFAULT_RATE_LIMIT_SETTINGS = new RateLimitSettings(1000);
-    private static final ThinkingConfig EMPTY_THINKING_CONFIG = new ThinkingConfig();
 
     public GoogleVertexAiChatCompletionServiceSettings(StreamInput in) throws IOException {
-        this.projectId = in.readString();
-        this.location = in.readString();
-        this.modelId = in.readString();
-        this.rateLimitSettings = new RateLimitSettings(in);
-
-        if (in.getTransportVersion().onOrAfter(TransportVersions.GEMINI_THINKING_BUDGET_ADDED)) {
-            thinkingConfig = new ThinkingConfig(in);
-        } else {
-            thinkingConfig = EMPTY_THINKING_CONFIG;
-        }
+        this(in.readString(), in.readString(), in.readString(), new RateLimitSettings(in));
     }
 
     @Override
@@ -70,7 +59,6 @@ protected XContentBuilder toXContentFragmentOfExposedFields(XContentBuilder buil
         builder.field(LOCATION, location);
         builder.field(MODEL_ID, modelId);
         rateLimitSettings.toXContent(builder, params);
-        thinkingConfig.toXContent(builder, params);
         return builder;
     }
 
@@ -91,28 +79,23 @@ public static GoogleVertexAiChatCompletionServiceSettings fromMap(Map<String, Ob
             context
         );
 
-        // Extract optional thinkingConfig settings
-        ThinkingConfig thinkingConfig = ThinkingConfig.of(map, validationException, GoogleVertexAiService.NAME, context);
-
         if (validationException.validationErrors().isEmpty() == false) {
             throw validationException;
         }
 
-        return new GoogleVertexAiChatCompletionServiceSettings(projectId, location, modelId, rateLimitSettings, thinkingConfig);
+        return new GoogleVertexAiChatCompletionServiceSettings(projectId, location, modelId, rateLimitSettings);
     }
 
     public GoogleVertexAiChatCompletionServiceSettings(
         String projectId,
         String location,
         String modelId,
-        @Nullable RateLimitSettings rateLimitSettings,
-        @Nullable ThinkingConfig thinkingConfig
+        @Nullable RateLimitSettings rateLimitSettings
     ) {
         this.projectId = projectId;
         this.location = location;
         this.modelId = modelId;
         this.rateLimitSettings = Objects.requireNonNullElse(rateLimitSettings, DEFAULT_RATE_LIMIT_SETTINGS);
-        this.thinkingConfig = Objects.requireNonNullElse(thinkingConfig, EMPTY_THINKING_CONFIG);
     }
 
     public String location() {
@@ -139,10 +122,6 @@ public RateLimitSettings rateLimitSettings() {
         return rateLimitSettings;
     }
 
-    public ThinkingConfig thinkingConfig() {
-        return thinkingConfig;
-    }
-
     @Override
     public TransportVersion getMinimalSupportedVersion() {
         assert false : "should never be called when supportsVersion is used";
@@ -161,9 +140,6 @@ public void writeTo(StreamOutput out) throws IOException {
         out.writeString(location);
         out.writeString(modelId);
         rateLimitSettings.writeTo(out);
-        if (out.getTransportVersion().onOrAfter(TransportVersions.GEMINI_THINKING_BUDGET_ADDED)) {
-            thinkingConfig.writeTo(out);
-        }
     }
 
     @Override
@@ -182,13 +158,12 @@ public boolean equals(Object o) {
         return Objects.equals(location, that.location)
             && Objects.equals(modelId, that.modelId)
             && Objects.equals(projectId, that.projectId)
-            && Objects.equals(rateLimitSettings, that.rateLimitSettings)
-            && Objects.equals(thinkingConfig, that.thinkingConfig);
+            && Objects.equals(rateLimitSettings, that.rateLimitSettings);
     }
 
     @Override
     public int hashCode() {
-        return Objects.hash(location, modelId, projectId, rateLimitSettings, thinkingConfig);
+        return Objects.hash(location, modelId, projectId, rateLimitSettings);
     }
 
     @Override
 
@@ -0,0 +1,129 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.inference.services.googlevertexai.completion;
+
+import org.elasticsearch.TransportVersion;
+import org.elasticsearch.TransportVersions;
+import org.elasticsearch.common.Strings;
+import org.elasticsearch.common.ValidationException;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.inference.TaskSettings;
+import org.elasticsearch.xcontent.XContentBuilder;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Objects;
+
+public class GoogleVertexAiChatCompletionTaskSettings implements TaskSettings {
+    public static final String NAME = "google_vertex_ai_chatcompletion_task_settings";
+
+    private final ThinkingConfig thinkingConfig;
+
+    public static final GoogleVertexAiChatCompletionTaskSettings EMPTY_SETTINGS = new GoogleVertexAiChatCompletionTaskSettings();
+    private static final ThinkingConfig EMPTY_THINKING_CONFIG = new ThinkingConfig();
+
+    public GoogleVertexAiChatCompletionTaskSettings() {
+        thinkingConfig = EMPTY_THINKING_CONFIG;
+    }
+
+    public GoogleVertexAiChatCompletionTaskSettings(ThinkingConfig thinkingConfig) {
+        this.thinkingConfig = Objects.requireNonNullElse(thinkingConfig, EMPTY_THINKING_CONFIG);
+    }
+
+    public GoogleVertexAiChatCompletionTaskSettings(StreamInput in) throws IOException {
+        if (in.getTransportVersion().onOrAfter(TransportVersions.GEMINI_THINKING_BUDGET_ADDED)) {
+            thinkingConfig = new ThinkingConfig(in);
+        } else {
+            thinkingConfig = EMPTY_THINKING_CONFIG;
+        }
+    }
+
+    public static GoogleVertexAiChatCompletionTaskSettings fromMap(Map<String, Object> taskSettings) {
+        ValidationException validationException = new ValidationException();
+
+        // Extract optional thinkingConfig settings
+        ThinkingConfig thinkingConfig = ThinkingConfig.fromMap(taskSettings, validationException);
+
+        if (validationException.validationErrors().isEmpty() == false) {
+            throw validationException;
+        }
+
+        return new GoogleVertexAiChatCompletionTaskSettings(thinkingConfig);
+    }
+
+    public static GoogleVertexAiChatCompletionTaskSettings of(
+        GoogleVertexAiChatCompletionTaskSettings originalTaskSettings,
+        GoogleVertexAiChatCompletionTaskSettings newTaskSettings
+    ) {
+        ThinkingConfig thinkingConfig = newTaskSettings.thinkingConfig().isEmpty()
+            ? originalTaskSettings.thinkingConfig()
+            : newTaskSettings.thinkingConfig();
+        return new GoogleVertexAiChatCompletionTaskSettings(thinkingConfig);
+    }
+
+    public ThinkingConfig thinkingConfig() {
+        return thinkingConfig;
+    }
+
+    @Override
+    public boolean isEmpty() {
+        return thinkingConfig.isEmpty();
+    }
+
+    @Override
+    public TaskSettings updatedTaskSettings(Map<String, Object> newSettings) {
+        GoogleVertexAiChatCompletionTaskSettings newTaskSettings = GoogleVertexAiChatCompletionTaskSettings.fromMap(
+            new HashMap<>(newSettings)
+        );
+        return GoogleVertexAiChatCompletionTaskSettings.of(this, newTaskSettings);
+    }
+
+    @Override
+    public String getWriteableName() {
+        return NAME;
+    }
+
+    @Override
+    public TransportVersion getMinimalSupportedVersion() {
+        return TransportVersions.GEMINI_THINKING_BUDGET_ADDED;
+    }
+
+    @Override
+    public void writeTo(StreamOutput out) throws IOException {
+        if (out.getTransportVersion().onOrAfter(TransportVersions.GEMINI_THINKING_BUDGET_ADDED)) {
+            thinkingConfig.writeTo(out);
+        }
+    }
+
+    @Override
+    public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+        builder.startObject();
+        thinkingConfig.toXContent(builder, params);
+        builder.endObject();
+        return builder;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (o == null || getClass() != o.getClass()) return false;
+        GoogleVertexAiChatCompletionTaskSettings that = (GoogleVertexAiChatCompletionTaskSettings) o;
+        return Objects.equals(thinkingConfig, that.thinkingConfig);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hashCode(thinkingConfig);
+    }
+
+    @Override
+    public String toString() {
+        return Strings.toString(this);
+    }
+}