jonathan-buttner
diff --git a/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/TransportInferenceActionProxy.java‎
Lines changed: 5 additions & 4 deletions b/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/TransportInferenceActionProxy.java‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ServiceUtils.java‎
Lines changed: 10 additions & 0 deletions b/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ServiceUtils.java‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockService.java‎
Lines changed: 21 additions & 7 deletions b/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockService.java‎
Lines changed: 21 additions & 7 deletions
diff --git a/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/client/AmazonBedrockChatCompletionExecutor.java‎
Lines changed: 9 additions & 11 deletions b/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/client/AmazonBedrockChatCompletionExecutor.java‎
Lines changed: 9 additions & 11 deletions
@@ -37,6 +37,10 @@
 import static org.elasticsearch.xpack.core.ClientHelper.INFERENCE_ORIGIN;
 
 public class TransportInferenceActionProxy extends HandledTransportAction<InferenceActionProxy.Request, InferenceAction.Response> {
+    public static final ElasticsearchStatusException CHAT_COMPLETION_STREAMING_ONLY_EXCEPTION = new ElasticsearchStatusException(
+        "The [chat_completion] task type only supports streaming, please try again with the _stream API",
+        RestStatus.BAD_REQUEST
+    );
     private final ModelRegistry modelRegistry;
     private final Client client;
 
@@ -87,10 +91,7 @@ private void sendUnifiedCompletionRequest(InferenceActionProxy.Request request,
 
         try {
             if (request.isStreaming() == false) {
-                throw new ElasticsearchStatusException(
-                    "The [chat_completion] task type only supports streaming, please try again with the _stream API",
-                    RestStatus.BAD_REQUEST
-                );
+                throw CHAT_COMPLETION_STREAMING_ONLY_EXCEPTION;
             }
 
             UnifiedCompletionAction.Request unifiedRequest;
 
@@ -1022,6 +1022,16 @@ public static String unsupportedTaskTypeForInference(Model model, EnumSet<TaskTy
         );
     }
 
+    public static ElasticsearchStatusException createUnsupportedTaskTypeStatusException(Model model, EnumSet<TaskType> supportedTaskTypes) {
+        var responseString = ServiceUtils.unsupportedTaskTypeForInference(model, supportedTaskTypes);
+
+        if (model.getTaskType() == TaskType.CHAT_COMPLETION) {
+            responseString = responseString + " " + useChatCompletionUrlMessage(model);
+        }
+
+        return new ElasticsearchStatusException(responseString, RestStatus.BAD_REQUEST);
+    }
+
     public static String useChatCompletionUrlMessage(Model model) {
         return org.elasticsearch.common.Strings.format(
             "The task type for the inference entity is %s, please use the _inference/%s/%s/%s URL.",
 
@@ -32,6 +32,7 @@
 import org.elasticsearch.inference.TaskType;
 import org.elasticsearch.inference.configuration.SettingsConfigurationFieldType;
 import org.elasticsearch.rest.RestStatus;
+import org.elasticsearch.xpack.core.inference.action.InferenceAction;
 import org.elasticsearch.xpack.core.inference.chunking.ChunkingSettingsBuilder;
 import org.elasticsearch.xpack.core.inference.chunking.EmbeddingRequestChunker;
 import org.elasticsearch.xpack.inference.common.amazon.AwsSecretSettings;
@@ -63,6 +64,7 @@
 import static org.elasticsearch.xpack.inference.services.ServiceFields.DIMENSIONS;
 import static org.elasticsearch.xpack.inference.services.ServiceUtils.createInvalidModelException;
 import static org.elasticsearch.xpack.inference.services.ServiceUtils.createInvalidTaskTypeException;
+import static org.elasticsearch.xpack.inference.services.ServiceUtils.createUnsupportedTaskTypeStatusException;
 import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMap;
 import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMapOrDefaultEmpty;
 import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMapOrThrowIfNull;
@@ -89,11 +91,16 @@ public class AmazonBedrockService extends SenderService {
 
     private final Sender amazonBedrockSender;
 
-    private static final EnumSet<TaskType> supportedTaskTypes = EnumSet.of(
+    // The task types exposed via the _inference/_services API
+    private static final EnumSet<TaskType> SUPPORTED_TASK_TYPES_FOR_SERVICES_API = EnumSet.of(
         TaskType.TEXT_EMBEDDING,
         TaskType.COMPLETION,
         TaskType.CHAT_COMPLETION
     );
+    /**
+     * The task types that the {@link InferenceAction.Request} can accept.
+     */
+    private static final EnumSet<TaskType> SUPPORTED_INFERENCE_ACTION_TASK_TYPES = EnumSet.of(TaskType.TEXT_EMBEDDING, TaskType.COMPLETION);
 
     private static final EnumSet<InputType> VALID_INPUT_TYPE_VALUES = EnumSet.of(
         InputType.INGEST,
@@ -154,6 +161,11 @@ protected void doInfer(
         TimeValue timeout,
         ActionListener<InferenceServiceResults> listener
     ) {
+        if (SUPPORTED_INFERENCE_ACTION_TASK_TYPES.contains(model.getTaskType()) == false) {
+            listener.onFailure(createUnsupportedTaskTypeStatusException(model, SUPPORTED_INFERENCE_ACTION_TASK_TYPES));
+            return;
+        }
+
         if (model instanceof AmazonBedrockModel == false) {
             listener.onFailure(createInvalidModelException(model));
             return;
@@ -298,7 +310,7 @@ public InferenceServiceConfiguration getConfiguration() {
 
     @Override
     public EnumSet<TaskType> supportedTaskTypes() {
-        return supportedTaskTypes;
+        return SUPPORTED_TASK_TYPES_FOR_SERVICES_API;
     }
 
     private static AmazonBedrockModel createModel(
@@ -429,7 +441,9 @@ public static InferenceServiceConfiguration get() {
 
                 configurationMap.put(
                     PROVIDER_FIELD,
-                    new SettingsConfiguration.Builder(supportedTaskTypes).setDescription("The model provider for your deployment.")
+                    new SettingsConfiguration.Builder(SUPPORTED_TASK_TYPES_FOR_SERVICES_API).setDescription(
+                        "The model provider for your deployment."
+                    )
                         .setLabel("Provider")
                         .setRequired(true)
                         .setSensitive(false)
@@ -440,7 +454,7 @@ public static InferenceServiceConfiguration get() {
 
                 configurationMap.put(
                     MODEL_FIELD,
-                    new SettingsConfiguration.Builder(supportedTaskTypes).setDescription(
+                    new SettingsConfiguration.Builder(SUPPORTED_TASK_TYPES_FOR_SERVICES_API).setDescription(
                         "The base model ID or an ARN to a custom model based on a foundational model."
                     )
                         .setLabel("Model")
@@ -453,7 +467,7 @@ public static InferenceServiceConfiguration get() {
 
                 configurationMap.put(
                     REGION_FIELD,
-                    new SettingsConfiguration.Builder(supportedTaskTypes).setDescription(
+                    new SettingsConfiguration.Builder(SUPPORTED_TASK_TYPES_FOR_SERVICES_API).setDescription(
                         "The region that your model or ARN is deployed in."
                     )
                         .setLabel("Region")
@@ -482,13 +496,13 @@ public static InferenceServiceConfiguration get() {
                 configurationMap.putAll(
                     RateLimitSettings.toSettingsConfigurationWithDescription(
                         "By default, the amazonbedrock service sets the number of requests allowed per minute to 240.",
-                        supportedTaskTypes
+                        SUPPORTED_TASK_TYPES_FOR_SERVICES_API
                     )
                 );
 
                 return new InferenceServiceConfiguration.Builder().setService(NAME)
                     .setName(SERVICE_NAME)
-                    .setTaskTypes(supportedTaskTypes)
+                    .setTaskTypes(SUPPORTED_TASK_TYPES_FOR_SERVICES_API)
                     .setConfigurations(configurationMap)
                     .build();
             }
 
@@ -13,10 +13,11 @@
 import org.elasticsearch.xpack.core.inference.results.StreamingUnifiedChatCompletionResults;
 import org.elasticsearch.xpack.inference.services.amazonbedrock.request.completion.AmazonBedrockChatCompletionRequest;
 import org.elasticsearch.xpack.inference.services.amazonbedrock.response.AmazonBedrockResponseHandler;
-import org.elasticsearch.xpack.inference.services.amazonbedrock.response.completion.AmazonBedrockChatCompletionResponseListener;
 
 import java.util.function.Supplier;
 
+import static org.elasticsearch.xpack.inference.action.TransportInferenceActionProxy.CHAT_COMPLETION_STREAMING_ONLY_EXCEPTION;
+
 public class AmazonBedrockChatCompletionExecutor extends AmazonBedrockExecutor {
     private final AmazonBedrockChatCompletionRequest chatCompletionRequest;
 
@@ -34,16 +35,13 @@ protected AmazonBedrockChatCompletionExecutor(
 
     @Override
     protected void executeClientRequest(AmazonBedrockBaseClient awsBedrockClient) {
-        if (chatCompletionRequest.isStreaming()) {
-            var publisher = chatCompletionRequest.executeStreamChatCompletionRequest(awsBedrockClient);
-            inferenceResultsListener.onResponse(new StreamingUnifiedChatCompletionResults(publisher));
-        } else {
-            var completionResponseListener = new AmazonBedrockChatCompletionResponseListener(
-                chatCompletionRequest,
-                responseHandler,
-                inferenceResultsListener
-            );
-            chatCompletionRequest.executeChatCompletionRequest(awsBedrockClient, completionResponseListener);
+        // Chat completions only supports streaming
+        if (chatCompletionRequest.isStreaming() == false) {
+            inferenceResultsListener.onFailure(CHAT_COMPLETION_STREAMING_ONLY_EXCEPTION);
+            return;
         }
+
+        var publisher = chatCompletionRequest.executeStreamChatCompletionRequest(awsBedrockClient);
+        inferenceResultsListener.onResponse(new StreamingUnifiedChatCompletionResults(publisher));
     }
 }