From 78ab1da62235c9c60e7f628f04efda54725f6592 Mon Sep 17 00:00:00 2001 From: Evgenii_Kazannik Date: Wed, 28 May 2025 15:58:40 +0200 Subject: [PATCH 1/4] Add Ibm Granite Completion and Chat Completion support --- .../org/elasticsearch/TransportVersions.java | 3 + .../inference/InferenceGetServicesIT.java | 10 +- .../InferenceNamedWriteablesProvider.java | 8 + ...nUnifiedChatCompletionResponseHandler.java | 51 +++++ .../IbmWatsonxCompletionResponseHandler.java | 25 +++ .../services/ibmwatsonx/IbmWatsonxModel.java | 18 +- .../ibmwatsonx/IbmWatsonxService.java | 50 ++++- .../action/IbmWatsonxActionCreator.java | 49 ++++- .../action/IbmWatsonxActionVisitor.java | 29 +++ .../IbmWatsonxChatCompletionModel.java | 149 ++++++++++++++ ...mWatsonxChatCompletionServiceSettings.java | 193 ++++++++++++++++++ .../IbmWatsonxChatCompletionRequest.java | 82 ++++++++ ...IbmWatsonxChatCompletionRequestEntity.java | 47 +++++ .../ibmwatsonx/request/IbmWatsonxUtils.java | 1 + .../IbmWatsonCompletionResponseHandler.java | 28 +++ ...ifiedChatCompletionRequestEntityTests.java | 1 + .../ibmwatsonx/IbmWatsonxServiceTests.java | 10 +- .../IbmWatsonxChatCompletionActionTests.java | 168 +++++++++++++++ .../IbmWatsonxChatCompletionModelTests.java | 30 +++ ...onxChatCompletionServiceSettingsTests.java | 111 ++++++++++ ...tsonxChatCompletionRequestEntityTests.java | 66 ++++++ .../IbmWatsonxChatCompletionRequestTests.java | 96 +++++++++ 22 files changed, 1210 insertions(+), 15 deletions(-) create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonUnifiedChatCompletionResponseHandler.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxCompletionResponseHandler.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/completion/IbmWatsonxChatCompletionModel.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/completion/IbmWatsonxChatCompletionServiceSettings.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/request/IbmWatsonxChatCompletionRequest.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/request/IbmWatsonxChatCompletionRequestEntity.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/IbmWatsonCompletionResponseHandler.java create mode 100644 x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/action/IbmWatsonxChatCompletionActionTests.java create mode 100644 x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/completion/IbmWatsonxChatCompletionModelTests.java create mode 100644 x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/completion/IbmWatsonxChatCompletionServiceSettingsTests.java create mode 100644 x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/request/IbmWatsonxChatCompletionRequestEntityTests.java create mode 100644 x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/request/IbmWatsonxChatCompletionRequestTests.java diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index 40a5d851ace98..ce30394858e19 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -192,6 +192,7 @@ static TransportVersion def(int id) { public static final TransportVersion ESQL_REGEX_MATCH_WITH_CASE_INSENSITIVITY_8_19 = def(8_841_0_44); public static final TransportVersion ESQL_QUERY_PLANNING_DURATION_8_19 = def(8_841_0_45); public static final TransportVersion SEARCH_SOURCE_EXCLUDE_VECTORS_PARAM_8_19 = def(8_841_0_46); + public static final TransportVersion ML_INFERENCE_IBM_WATSONX_COMPLETION_ADDED_8_19 = def(8_841_0_47); public static final TransportVersion V_9_0_0 = def(9_000_0_09); public static final TransportVersion INITIAL_ELASTICSEARCH_9_0_1 = def(9_000_0_10); public static final TransportVersion INITIAL_ELASTICSEARCH_9_0_2 = def(9_000_0_11); @@ -288,6 +289,8 @@ static TransportVersion def(int id) { public static final TransportVersion ML_INFERENCE_MISTRAL_CHAT_COMPLETION_ADDED = def(9_090_0_00); public static final TransportVersion IDP_CUSTOM_SAML_ATTRIBUTES_ALLOW_LIST = def(9_091_0_00); public static final TransportVersion SEARCH_SOURCE_EXCLUDE_VECTORS_PARAM = def(9_092_0_00); + public static final TransportVersion ML_INFERENCE_IBM_WATSONX_COMPLETION_ADDED = def(9_093_0_00); + /* * STOP! READ THIS FIRST! No, really, * ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _ diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetServicesIT.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetServicesIT.java index ecf89dff104a0..983a364f92c05 100644 --- a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetServicesIT.java +++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetServicesIT.java @@ -135,7 +135,7 @@ public void testGetServicesWithRerankTaskType() throws IOException { public void testGetServicesWithCompletionTaskType() throws IOException { List services = getServices(TaskType.COMPLETION); - assertThat(services.size(), equalTo(14)); + assertThat(services.size(), equalTo(15)); var providers = providers(services); @@ -157,7 +157,8 @@ public void testGetServicesWithCompletionTaskType() throws IOException { "completion_test_service", "hugging_face", "amazon_sagemaker", - "mistral" + "mistral", + "watsonxai" ).toArray() ) ); @@ -165,7 +166,7 @@ public void testGetServicesWithCompletionTaskType() throws IOException { public void testGetServicesWithChatCompletionTaskType() throws IOException { List services = getServices(TaskType.CHAT_COMPLETION); - assertThat(services.size(), equalTo(8)); + assertThat(services.size(), equalTo(9)); var providers = providers(services); @@ -180,7 +181,8 @@ public void testGetServicesWithChatCompletionTaskType() throws IOException { "hugging_face", "amazon_sagemaker", "googlevertexai", - "mistral" + "mistral", + "watsonxai" ).toArray() ) ); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceNamedWriteablesProvider.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceNamedWriteablesProvider.java index 54e8f3102aa45..167a17452fb93 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceNamedWriteablesProvider.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceNamedWriteablesProvider.java @@ -92,6 +92,7 @@ import org.elasticsearch.xpack.inference.services.huggingface.elser.HuggingFaceElserServiceSettings; import org.elasticsearch.xpack.inference.services.huggingface.rerank.HuggingFaceRerankServiceSettings; import org.elasticsearch.xpack.inference.services.huggingface.rerank.HuggingFaceRerankTaskSettings; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.completion.IbmWatsonxChatCompletionServiceSettings; import org.elasticsearch.xpack.inference.services.ibmwatsonx.embeddings.IbmWatsonxEmbeddingsServiceSettings; import org.elasticsearch.xpack.inference.services.ibmwatsonx.rerank.IbmWatsonxRerankServiceSettings; import org.elasticsearch.xpack.inference.services.ibmwatsonx.rerank.IbmWatsonxRerankTaskSettings; @@ -472,6 +473,13 @@ private static void addIbmWatsonxNamedWritables(List namedWriteables) { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonUnifiedChatCompletionResponseHandler.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonUnifiedChatCompletionResponseHandler.java new file mode 100644 index 0000000000000..41b82bbf2cd02 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonUnifiedChatCompletionResponseHandler.java @@ -0,0 +1,51 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.ibmwatsonx; + +import org.elasticsearch.xpack.core.inference.results.UnifiedChatCompletionException; +import org.elasticsearch.xpack.inference.external.http.HttpResult; +import org.elasticsearch.xpack.inference.external.http.retry.ErrorResponse; +import org.elasticsearch.xpack.inference.external.http.retry.ResponseParser; +import org.elasticsearch.xpack.inference.external.request.Request; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.response.IbmWatsonxErrorResponseEntity; +import org.elasticsearch.xpack.inference.services.openai.OpenAiUnifiedChatCompletionResponseHandler; + +import java.util.Locale; + +/** + * Handles streaming chat completion responses and error parsing for Watsonx inference endpoints. + * Adapts the OpenAI handler to support Watsonx's error schema. + */ +public class IbmWatsonUnifiedChatCompletionResponseHandler extends OpenAiUnifiedChatCompletionResponseHandler { + + private static final String WATSONX_ERROR = "watsonx_error"; + + public IbmWatsonUnifiedChatCompletionResponseHandler(String requestType, ResponseParser parseFunction) { + super(requestType, parseFunction, IbmWatsonxErrorResponseEntity::fromResponse); + } + + @Override + protected Exception buildError(String message, Request request, HttpResult result, ErrorResponse errorResponse) { + assert request.isStreaming() : "Only streaming requests support this format"; + var responseStatusCode = result.response().getStatusLine().getStatusCode(); + if (request.isStreaming()) { + var errorMessage = errorMessage(message, request, result, errorResponse, responseStatusCode); + var restStatus = toRestStatus(responseStatusCode); + return errorResponse instanceof IbmWatsonxErrorResponseEntity + ? new UnifiedChatCompletionException(restStatus, errorMessage, WATSONX_ERROR, restStatus.name().toLowerCase(Locale.ROOT)) + : new UnifiedChatCompletionException( + restStatus, + errorMessage, + createErrorType(errorResponse), + restStatus.name().toLowerCase(Locale.ROOT) + ); + } else { + return super.buildError(message, request, result, errorResponse); + } + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxCompletionResponseHandler.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxCompletionResponseHandler.java new file mode 100644 index 0000000000000..79bfa1bb79cac --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxCompletionResponseHandler.java @@ -0,0 +1,25 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.ibmwatsonx; + +import org.elasticsearch.xpack.inference.external.http.retry.ResponseParser; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.response.IbmWatsonxErrorResponseEntity; +import org.elasticsearch.xpack.inference.services.openai.OpenAiChatCompletionResponseHandler; + +public class IbmWatsonxCompletionResponseHandler extends OpenAiChatCompletionResponseHandler { + + /** + * Constructs a IbmWatsonxCompletionResponseHandler with the specified request type and response parser. + * + * @param requestType The type of request being handled (e.g., "Ibm WatsonX completions"). + * @param parseFunction The function to parse the response. + */ + public IbmWatsonxCompletionResponseHandler(String requestType, ResponseParser parseFunction) { + super(requestType, parseFunction, IbmWatsonxErrorResponseEntity::fromResponse); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxModel.java index dbdff0316199d..fe1472d9e3d60 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxModel.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxModel.java @@ -7,21 +7,25 @@ package org.elasticsearch.xpack.inference.services.ibmwatsonx; -import org.elasticsearch.inference.Model; import org.elasticsearch.inference.ModelConfigurations; import org.elasticsearch.inference.ModelSecrets; import org.elasticsearch.inference.ServiceSettings; import org.elasticsearch.inference.TaskSettings; import org.elasticsearch.xpack.inference.external.action.ExecutableAction; +import org.elasticsearch.xpack.inference.services.RateLimitGroupingModel; import org.elasticsearch.xpack.inference.services.ibmwatsonx.action.IbmWatsonxActionVisitor; +import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings; +import java.net.URI; import java.util.Map; import java.util.Objects; -public abstract class IbmWatsonxModel extends Model { +public abstract class IbmWatsonxModel extends RateLimitGroupingModel { private final IbmWatsonxRateLimitServiceSettings rateLimitServiceSettings; + protected URI uri; + public IbmWatsonxModel( ModelConfigurations configurations, ModelSecrets secrets, @@ -49,4 +53,14 @@ public IbmWatsonxModel(IbmWatsonxModel model, TaskSettings taskSettings) { public IbmWatsonxRateLimitServiceSettings rateLimitServiceSettings() { return rateLimitServiceSettings; } + + @Override + public int rateLimitGroupingHash() { + return Objects.hash(uri); + } + + @Override + public RateLimitSettings rateLimitSettings() { + return this.rateLimitServiceSettings().rateLimitSettings(); + } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxService.java index 7dfb0002bb062..253099254e035 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxService.java @@ -30,7 +30,10 @@ import org.elasticsearch.rest.RestStatus; import org.elasticsearch.xpack.inference.chunking.ChunkingSettingsBuilder; import org.elasticsearch.xpack.inference.chunking.EmbeddingRequestChunker; +import org.elasticsearch.xpack.inference.external.action.SenderExecutableAction; +import org.elasticsearch.xpack.inference.external.http.retry.ResponseHandler; import org.elasticsearch.xpack.inference.external.http.sender.EmbeddingsInput; +import org.elasticsearch.xpack.inference.external.http.sender.GenericRequestManager; import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender; import org.elasticsearch.xpack.inference.external.http.sender.InferenceInputs; import org.elasticsearch.xpack.inference.external.http.sender.Sender; @@ -40,14 +43,18 @@ import org.elasticsearch.xpack.inference.services.ServiceComponents; import org.elasticsearch.xpack.inference.services.ServiceUtils; import org.elasticsearch.xpack.inference.services.ibmwatsonx.action.IbmWatsonxActionCreator; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.completion.IbmWatsonxChatCompletionModel; import org.elasticsearch.xpack.inference.services.ibmwatsonx.embeddings.IbmWatsonxEmbeddingsModel; import org.elasticsearch.xpack.inference.services.ibmwatsonx.embeddings.IbmWatsonxEmbeddingsServiceSettings; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.request.IbmWatsonxChatCompletionRequest; import org.elasticsearch.xpack.inference.services.ibmwatsonx.rerank.IbmWatsonxRerankModel; +import org.elasticsearch.xpack.inference.services.openai.response.OpenAiChatCompletionResponseEntity; import java.util.EnumSet; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; import static org.elasticsearch.xpack.inference.services.ServiceFields.MAX_INPUT_TOKENS; import static org.elasticsearch.xpack.inference.services.ServiceFields.MODEL_ID; @@ -56,7 +63,6 @@ import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMapOrDefaultEmpty; import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMapOrThrowIfNull; import static org.elasticsearch.xpack.inference.services.ServiceUtils.throwIfNotEmptyMap; -import static org.elasticsearch.xpack.inference.services.ServiceUtils.throwUnsupportedUnifiedCompletionOperation; import static org.elasticsearch.xpack.inference.services.huggingface.elser.HuggingFaceElserServiceSettings.URL; import static org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxServiceFields.API_VERSION; import static org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxServiceFields.EMBEDDING_MAX_BATCH_SIZE; @@ -67,7 +73,15 @@ public class IbmWatsonxService extends SenderService { public static final String NAME = "watsonxai"; private static final String SERVICE_NAME = "IBM Watsonx"; - private static final EnumSet supportedTaskTypes = EnumSet.of(TaskType.TEXT_EMBEDDING); + private static final EnumSet supportedTaskTypes = EnumSet.of( + TaskType.TEXT_EMBEDDING, + TaskType.COMPLETION, + TaskType.CHAT_COMPLETION + ); + private static final ResponseHandler UNIFIED_CHAT_COMPLETION_HANDLER = new IbmWatsonUnifiedChatCompletionResponseHandler( + "ibm watsonx chat completions", + OpenAiChatCompletionResponseEntity::fromResponse + ); public IbmWatsonxService(HttpRequestSender.Factory factory, ServiceComponents serviceComponents) { super(factory, serviceComponents); @@ -148,6 +162,14 @@ private static IbmWatsonxModel createModel( secretSettings, context ); + case CHAT_COMPLETION, COMPLETION -> new IbmWatsonxChatCompletionModel( + inferenceEntityId, + taskType, + NAME, + serviceSettings, + secretSettings, + context + ); default -> throw new ElasticsearchStatusException(failureMessage, RestStatus.BAD_REQUEST); }; } @@ -236,6 +258,11 @@ public TransportVersion getMinimalSupportedVersion() { return TransportVersions.V_8_16_0; } + @Override + public Set supportedStreamingTasks() { + return EnumSet.of(TaskType.COMPLETION, TaskType.CHAT_COMPLETION); + } + @Override public Model updateModelWithEmbeddingDetails(Model model, int embeddingSize) { if (model instanceof IbmWatsonxEmbeddingsModel embeddingsModel) { @@ -291,7 +318,24 @@ protected void doUnifiedCompletionInfer( TimeValue timeout, ActionListener listener ) { - throwUnsupportedUnifiedCompletionOperation(NAME); + if (model instanceof IbmWatsonxChatCompletionModel == false) { + listener.onFailure(createInvalidModelException(model)); + return; + } + + IbmWatsonxChatCompletionModel ibmWatsonxChatCompletionModel = (IbmWatsonxChatCompletionModel) model; + var overriddenModel = IbmWatsonxChatCompletionModel.of(ibmWatsonxChatCompletionModel, inputs.getRequest()); + var manager = new GenericRequestManager<>( + getServiceComponents().threadPool(), + overriddenModel, + UNIFIED_CHAT_COMPLETION_HANDLER, + unifiedChatInput -> new IbmWatsonxChatCompletionRequest(unifiedChatInput, overriddenModel), + UnifiedChatInput.class + ); + var errorMessage = IbmWatsonxActionCreator.buildErrorMessage(TaskType.CHAT_COMPLETION, model.getInferenceEntityId()); + var action = new SenderExecutableAction(getSender(), manager, errorMessage); + + action.execute(inputs, timeout, listener); } @Override diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/action/IbmWatsonxActionCreator.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/action/IbmWatsonxActionCreator.java index 46b84689ee3bf..703f7f298230a 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/action/IbmWatsonxActionCreator.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/action/IbmWatsonxActionCreator.java @@ -7,26 +7,48 @@ package org.elasticsearch.xpack.inference.services.ibmwatsonx.action; +import org.elasticsearch.inference.TaskType; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.xpack.inference.common.Truncator; import org.elasticsearch.xpack.inference.external.action.ExecutableAction; import org.elasticsearch.xpack.inference.external.action.SenderExecutableAction; +import org.elasticsearch.xpack.inference.external.action.SingleInputSenderExecutableAction; +import org.elasticsearch.xpack.inference.external.http.retry.ResponseHandler; +import org.elasticsearch.xpack.inference.external.http.sender.ChatCompletionInput; +import org.elasticsearch.xpack.inference.external.http.sender.GenericRequestManager; import org.elasticsearch.xpack.inference.external.http.sender.Sender; +import org.elasticsearch.xpack.inference.external.http.sender.UnifiedChatInput; import org.elasticsearch.xpack.inference.services.ServiceComponents; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxCompletionResponseHandler; import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxEmbeddingsRequestManager; import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxRerankRequestManager; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.completion.IbmWatsonxChatCompletionModel; import org.elasticsearch.xpack.inference.services.ibmwatsonx.embeddings.IbmWatsonxEmbeddingsModel; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.request.IbmWatsonxChatCompletionRequest; import org.elasticsearch.xpack.inference.services.ibmwatsonx.rerank.IbmWatsonxRerankModel; +import org.elasticsearch.xpack.inference.services.openai.response.OpenAiChatCompletionResponseEntity; import java.util.Map; import java.util.Objects; +import static org.elasticsearch.core.Strings.format; import static org.elasticsearch.xpack.inference.external.action.ActionUtils.constructFailedToSendRequestMessage; +/** + * IbmWatsonxActionCreator is responsible for creating executable actions for various models. + * It implements the IbmWatsonxActionVisitor interface to provide specific implementations. + */ public class IbmWatsonxActionCreator implements IbmWatsonxActionVisitor { private final Sender sender; private final ServiceComponents serviceComponents; + static final String COMPLETION_REQUEST_TYPE = "IBM WatsonX completions"; + static final String USER_ROLE = "user"; + static final ResponseHandler COMPLETION_HANDLER = new IbmWatsonxCompletionResponseHandler( + COMPLETION_REQUEST_TYPE, + OpenAiChatCompletionResponseEntity::fromResponse + ); + public IbmWatsonxActionCreator(Sender sender, ServiceComponents serviceComponents) { this.sender = Objects.requireNonNull(sender); this.serviceComponents = Objects.requireNonNull(serviceComponents); @@ -46,10 +68,24 @@ public ExecutableAction create(IbmWatsonxEmbeddingsModel model, Map taskSettings) { var overriddenModel = IbmWatsonxRerankModel.of(model, taskSettings); var requestCreator = IbmWatsonxRerankRequestManager.of(overriddenModel, serviceComponents.threadPool()); - var failedToSendRequestErrorMessage = constructFailedToSendRequestMessage("Ibm Watsonx rerank"); + var failedToSendRequestErrorMessage = buildErrorMessage(TaskType.RERANK, overriddenModel.getInferenceEntityId()); return new SenderExecutableAction(sender, requestCreator, failedToSendRequestErrorMessage); } + @Override + public ExecutableAction create(IbmWatsonxChatCompletionModel chatCompletionModel) { + var manager = new GenericRequestManager<>( + serviceComponents.threadPool(), + chatCompletionModel, + COMPLETION_HANDLER, + inputs -> new IbmWatsonxChatCompletionRequest(new UnifiedChatInput(inputs, USER_ROLE), chatCompletionModel), + ChatCompletionInput.class + ); + + var failedToSendRequestErrorMessage = buildErrorMessage(TaskType.COMPLETION, chatCompletionModel.getInferenceEntityId()); + return new SingleInputSenderExecutableAction(sender, manager, failedToSendRequestErrorMessage, COMPLETION_REQUEST_TYPE); + } + protected IbmWatsonxEmbeddingsRequestManager getEmbeddingsRequestManager( IbmWatsonxEmbeddingsModel model, Truncator truncator, @@ -57,4 +93,15 @@ protected IbmWatsonxEmbeddingsRequestManager getEmbeddingsRequestManager( ) { return new IbmWatsonxEmbeddingsRequestManager(model, truncator, threadPool); } + + /** + * Builds an error message for Ibm Watsonx actions. + * + * @param requestType The type of request (e.g. COMPLETION, EMBEDDING, RERANK). + * @param inferenceId The ID of the inference entity. + * @return A formatted error message. + */ + public static String buildErrorMessage(TaskType requestType, String inferenceId) { + return format("Failed to send Ibm Watsonx %s request from inference entity id [%s]", requestType.toString(), inferenceId); + } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/action/IbmWatsonxActionVisitor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/action/IbmWatsonxActionVisitor.java index 64e05769f4a8d..687abdef19638 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/action/IbmWatsonxActionVisitor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/action/IbmWatsonxActionVisitor.java @@ -8,13 +8,42 @@ package org.elasticsearch.xpack.inference.services.ibmwatsonx.action; import org.elasticsearch.xpack.inference.external.action.ExecutableAction; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.completion.IbmWatsonxChatCompletionModel; import org.elasticsearch.xpack.inference.services.ibmwatsonx.embeddings.IbmWatsonxEmbeddingsModel; import org.elasticsearch.xpack.inference.services.ibmwatsonx.rerank.IbmWatsonxRerankModel; import java.util.Map; +/** + * Interface for creating {@link ExecutableAction} instances for Watsonx models. + *

+ * This interface is used to create {@link ExecutableAction} instances for different types of Watsonx models, such as + * {@link IbmWatsonxEmbeddingsModel} and {@link IbmWatsonxRerankModel} and {@link IbmWatsonxChatCompletionModel}. + */ public interface IbmWatsonxActionVisitor { + + /** + * Creates an {@link ExecutableAction} for the given {@link IbmWatsonxEmbeddingsModel}. + * + * @param model The model to create the action for. + * @param taskSettings The task settings to use. + * @return An {@link ExecutableAction} for the given model. + */ ExecutableAction create(IbmWatsonxEmbeddingsModel model, Map taskSettings); + /** + * Creates an {@link ExecutableAction} for the given {@link IbmWatsonxRerankModel}. + * + * @param model The model to create the action for. + * @return An {@link ExecutableAction} for the given model. + */ ExecutableAction create(IbmWatsonxRerankModel model, Map taskSettings); + + /** + * Creates an {@link ExecutableAction} for the given {@link IbmWatsonxChatCompletionModel}. + * + * @param model The model to create the action for. + * @return An {@link ExecutableAction} for the given model. + */ + ExecutableAction create(IbmWatsonxChatCompletionModel model); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/completion/IbmWatsonxChatCompletionModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/completion/IbmWatsonxChatCompletionModel.java new file mode 100644 index 0000000000000..6c12adba8b8cc --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/completion/IbmWatsonxChatCompletionModel.java @@ -0,0 +1,149 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.ibmwatsonx.completion; + +import org.apache.http.client.utils.URIBuilder; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.inference.ModelConfigurations; +import org.elasticsearch.inference.ModelSecrets; +import org.elasticsearch.inference.TaskType; +import org.elasticsearch.inference.UnifiedCompletionRequest; +import org.elasticsearch.xpack.inference.external.action.ExecutableAction; +import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxModel; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxRateLimitServiceSettings; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.action.IbmWatsonxActionVisitor; +import org.elasticsearch.xpack.inference.services.settings.DefaultSecretSettings; + +import java.net.URI; +import java.net.URISyntaxException; +import java.util.Map; + +import static org.elasticsearch.xpack.inference.services.ibmwatsonx.request.IbmWatsonxUtils.COMPLETIONS; +import static org.elasticsearch.xpack.inference.services.ibmwatsonx.request.IbmWatsonxUtils.ML; +import static org.elasticsearch.xpack.inference.services.ibmwatsonx.request.IbmWatsonxUtils.TEXT; +import static org.elasticsearch.xpack.inference.services.ibmwatsonx.request.IbmWatsonxUtils.V1; + +public class IbmWatsonxChatCompletionModel extends IbmWatsonxModel { + + /** + * Constructor for IbmWatsonxChatCompletionModel. + * + * @param inferenceEntityId The unique identifier for the inference entity. + * @param taskType The type of task this model is designed for. + * @param service The name of the service this model belongs to. + * @param serviceSettings The settings specific to the Ibm Granite chat completion service. + * @param secrets The secrets required for accessing the service. + * @param context The context for parsing configuration settings. + */ + public IbmWatsonxChatCompletionModel( + String inferenceEntityId, + TaskType taskType, + String service, + Map serviceSettings, + @Nullable Map secrets, + ConfigurationParseContext context + ) { + this( + inferenceEntityId, + taskType, + service, + IbmWatsonxChatCompletionServiceSettings.fromMap(serviceSettings, context), + DefaultSecretSettings.fromMap(secrets) + ); + } + + /** + * Creates a new IbmWatsonxChatCompletionModel with overridden service settings. + * + * @param model The original IbmWatsonxChatCompletionModel. + * @param request The UnifiedCompletionRequest containing the model override. + * @return A new IbmWatsonxChatCompletionModel with the overridden model ID. + */ + public static IbmWatsonxChatCompletionModel of(IbmWatsonxChatCompletionModel model, UnifiedCompletionRequest request) { + if (request.model() == null) { + // If no model is specified in the request, return the original model + return model; + } + + var originalModelServiceSettings = model.getServiceSettings(); + var overriddenServiceSettings = new IbmWatsonxChatCompletionServiceSettings( + originalModelServiceSettings.uri(), + originalModelServiceSettings.apiVersion(), + originalModelServiceSettings.modelId(), + originalModelServiceSettings.projectId(), + originalModelServiceSettings.rateLimitSettings() + ); + + return new IbmWatsonxChatCompletionModel( + model.getInferenceEntityId(), + model.getTaskType(), + model.getConfigurations().getService(), + overriddenServiceSettings, + model.getSecretSettings() + ); + } + + // should only be used for testing + IbmWatsonxChatCompletionModel( + String inferenceEntityId, + TaskType taskType, + String service, + IbmWatsonxChatCompletionServiceSettings serviceSettings, + @Nullable DefaultSecretSettings secretSettings + ) { + super( + new ModelConfigurations(inferenceEntityId, taskType, service, serviceSettings), + new ModelSecrets(secretSettings), + serviceSettings + ); + } + + @Override + public IbmWatsonxRateLimitServiceSettings rateLimitServiceSettings() { + return super.rateLimitServiceSettings(); + } + + @Override + public IbmWatsonxChatCompletionServiceSettings getServiceSettings() { + return (IbmWatsonxChatCompletionServiceSettings) super.getServiceSettings(); + } + + @Override + public DefaultSecretSettings getSecretSettings() { + return (DefaultSecretSettings) super.getSecretSettings(); + } + + public URI uri() { + URI uri; + try { + uri = buildUri(this.getServiceSettings().uri().toString(), this.getServiceSettings().apiVersion()); + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } + + return uri; + } + + /** + * Accepts a visitor to create an executable action. The returned action will not return documents in the response. + * @param visitor _ + * @return the completion action + */ + public ExecutableAction accept(IbmWatsonxActionVisitor visitor, Map taskSettings) { + return visitor.create(this); + } + + public static URI buildUri(String uri, String apiVersion) throws URISyntaxException { + return new URIBuilder().setScheme("https") + .setHost(uri) + .setPathSegments(ML, V1, TEXT, COMPLETIONS) + .setParameter("version", apiVersion) + .build(); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/completion/IbmWatsonxChatCompletionServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/completion/IbmWatsonxChatCompletionServiceSettings.java new file mode 100644 index 0000000000000..c731c477ebdbe --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/completion/IbmWatsonxChatCompletionServiceSettings.java @@ -0,0 +1,193 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.ibmwatsonx.completion; + +import org.elasticsearch.TransportVersion; +import org.elasticsearch.TransportVersions; +import org.elasticsearch.common.ValidationException; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.inference.ModelConfigurations; +import org.elasticsearch.inference.ServiceSettings; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxRateLimitServiceSettings; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxService; +import org.elasticsearch.xpack.inference.services.settings.FilteredXContentObject; +import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings; + +import java.io.IOException; +import java.net.URI; +import java.util.Map; +import java.util.Objects; + +import static org.elasticsearch.xpack.inference.services.ServiceFields.MODEL_ID; +import static org.elasticsearch.xpack.inference.services.ServiceFields.URL; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.convertToUri; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.createUri; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractRequiredString; +import static org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxServiceFields.API_VERSION; +import static org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxServiceFields.PROJECT_ID; + +public class IbmWatsonxChatCompletionServiceSettings extends FilteredXContentObject + implements + ServiceSettings, + IbmWatsonxRateLimitServiceSettings { + public static final String NAME = "ibm_watsonx_completion_service_settings"; + + /** + * Rate limits are defined at + * Watson Machine Learning plans. + * For Lite plan, you've 120 requests per minute. + */ + private static final RateLimitSettings DEFAULT_RATE_LIMIT_SETTINGS = new RateLimitSettings(120); + + public static IbmWatsonxChatCompletionServiceSettings fromMap(Map map, ConfigurationParseContext context) { + ValidationException validationException = new ValidationException(); + + String url = extractRequiredString(map, URL, ModelConfigurations.SERVICE_SETTINGS, validationException); + URI uri = convertToUri(url, URL, ModelConfigurations.SERVICE_SETTINGS, validationException); + String apiVersion = extractRequiredString(map, API_VERSION, ModelConfigurations.SERVICE_SETTINGS, validationException); + + String modelId = extractRequiredString(map, MODEL_ID, ModelConfigurations.SERVICE_SETTINGS, validationException); + String projectId = extractRequiredString(map, PROJECT_ID, ModelConfigurations.SERVICE_SETTINGS, validationException); + + RateLimitSettings rateLimitSettings = RateLimitSettings.of( + map, + DEFAULT_RATE_LIMIT_SETTINGS, + validationException, + IbmWatsonxService.NAME, + context + ); + + if (validationException.validationErrors().isEmpty() == false) { + throw validationException; + } + + return new IbmWatsonxChatCompletionServiceSettings(uri, apiVersion, modelId, projectId, rateLimitSettings); + } + + private final URI uri; + + private final String apiVersion; + + private final String modelId; + + private final String projectId; + + private final RateLimitSettings rateLimitSettings; + + public IbmWatsonxChatCompletionServiceSettings( + URI uri, + String apiVersion, + String modelId, + String projectId, + @Nullable RateLimitSettings rateLimitSettings + ) { + this.uri = uri; + this.apiVersion = apiVersion; + this.projectId = projectId; + this.modelId = modelId; + this.rateLimitSettings = Objects.requireNonNullElse(rateLimitSettings, DEFAULT_RATE_LIMIT_SETTINGS); + } + + public IbmWatsonxChatCompletionServiceSettings(StreamInput in) throws IOException { + this.uri = createUri(in.readString()); + this.apiVersion = in.readString(); + this.modelId = in.readString(); + this.projectId = in.readString(); + this.rateLimitSettings = new RateLimitSettings(in); + + } + + public URI uri() { + return uri; + } + + public String apiVersion() { + return apiVersion; + } + + @Override + public String modelId() { + return modelId; + } + + public String projectId() { + return projectId; + } + + @Override + public RateLimitSettings rateLimitSettings() { + return rateLimitSettings; + } + + @Override + public String getWriteableName() { + return NAME; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + + toXContentFragmentOfExposedFields(builder, params); + + builder.endObject(); + return builder; + } + + @Override + protected XContentBuilder toXContentFragmentOfExposedFields(XContentBuilder builder, Params params) throws IOException { + builder.field(URL, uri.toString()); + + builder.field(API_VERSION, apiVersion); + + builder.field(MODEL_ID, modelId); + + builder.field(PROJECT_ID, projectId); + + rateLimitSettings.toXContent(builder, params); + + return builder; + } + + @Override + public TransportVersion getMinimalSupportedVersion() { + return TransportVersions.ML_INFERENCE_IBM_WATSONX_COMPLETION_ADDED; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(uri.toString()); + out.writeString(apiVersion); + + out.writeString(modelId); + out.writeString(projectId); + + rateLimitSettings.writeTo(out); + } + + @Override + public boolean equals(Object object) { + if (this == object) return true; + if (object == null || getClass() != object.getClass()) return false; + IbmWatsonxChatCompletionServiceSettings that = (IbmWatsonxChatCompletionServiceSettings) object; + return Objects.equals(uri, that.uri) + && Objects.equals(apiVersion, that.apiVersion) + && Objects.equals(modelId, that.modelId) + && Objects.equals(projectId, that.projectId) + && Objects.equals(rateLimitSettings, that.rateLimitSettings); + } + + @Override + public int hashCode() { + return Objects.hash(uri, apiVersion, modelId, projectId, rateLimitSettings); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/request/IbmWatsonxChatCompletionRequest.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/request/IbmWatsonxChatCompletionRequest.java new file mode 100644 index 0000000000000..1c63c6ea6262b --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/request/IbmWatsonxChatCompletionRequest.java @@ -0,0 +1,82 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.ibmwatsonx.request; + +import org.apache.http.HttpHeaders; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.entity.ByteArrayEntity; +import org.elasticsearch.common.Strings; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.inference.external.http.sender.UnifiedChatInput; +import org.elasticsearch.xpack.inference.external.request.HttpRequest; +import org.elasticsearch.xpack.inference.external.request.Request; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.completion.IbmWatsonxChatCompletionModel; + +import java.net.URI; +import java.nio.charset.StandardCharsets; +import java.util.Objects; + +import static org.elasticsearch.xpack.inference.external.request.RequestUtils.createAuthBearerHeader; + +public class IbmWatsonxChatCompletionRequest implements IbmWatsonxRequest { + private final IbmWatsonxChatCompletionModel model; + private final UnifiedChatInput chatInput; + + public IbmWatsonxChatCompletionRequest(UnifiedChatInput chatInput, IbmWatsonxChatCompletionModel model) { + this.chatInput = Objects.requireNonNull(chatInput); + this.model = Objects.requireNonNull(model); + } + + @Override + public HttpRequest createHttpRequest() { + HttpPost httpPost = new HttpPost(model.uri()); + + ByteArrayEntity byteEntity = new ByteArrayEntity( + Strings.toString(new IbmWatsonxChatCompletionRequestEntity(chatInput, model)).getBytes(StandardCharsets.UTF_8) + ); + httpPost.setEntity(byteEntity); + + httpPost.setHeader(HttpHeaders.CONTENT_TYPE, XContentType.JSON.mediaType()); + httpPost.setHeader(createAuthBearerHeader(model.getSecretSettings().apiKey())); + + decorateWithAuth(httpPost); + + return new HttpRequest(httpPost, getInferenceEntityId()); + } + + @Override + public URI getURI() { + return model.uri(); + } + + public void decorateWithAuth(HttpPost httpPost) { + IbmWatsonxRequest.decorateWithBearerToken(httpPost, model.getSecretSettings(), model.getInferenceEntityId()); + } + + @Override + public Request truncate() { + // No truncation for Ibm WatsonX chat completions + return this; + } + + @Override + public boolean[] getTruncationInfo() { + // No truncation for Ibm WatsonX chat completions + return null; + } + + @Override + public String getInferenceEntityId() { + return model.getInferenceEntityId(); + } + + @Override + public boolean isStreaming() { + return chatInput.stream(); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/request/IbmWatsonxChatCompletionRequestEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/request/IbmWatsonxChatCompletionRequestEntity.java new file mode 100644 index 0000000000000..9f4cb9575f3cb --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/request/IbmWatsonxChatCompletionRequestEntity.java @@ -0,0 +1,47 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.ibmwatsonx.request; + +import org.elasticsearch.inference.UnifiedCompletionRequest; +import org.elasticsearch.xcontent.ToXContentObject; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xpack.inference.external.http.sender.UnifiedChatInput; +import org.elasticsearch.xpack.inference.external.unified.UnifiedChatCompletionRequestEntity; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.completion.IbmWatsonxChatCompletionModel; + +import java.io.IOException; +import java.util.Objects; + +/** + * IbmWatsonxChatCompletionRequestEntity is responsible for creating the request entity for Watsonx chat completion. + * It implements ToXContentObject to allow serialization to XContent format. + */ +public class IbmWatsonxChatCompletionRequestEntity implements ToXContentObject { + + private final IbmWatsonxChatCompletionModel model; + private final UnifiedChatCompletionRequestEntity unifiedRequestEntity; + + private static final String PROJECT_ID_FIELD = "project_id"; + + public IbmWatsonxChatCompletionRequestEntity(UnifiedChatInput unifiedChatInput, IbmWatsonxChatCompletionModel model) { + this.unifiedRequestEntity = new UnifiedChatCompletionRequestEntity(unifiedChatInput); + this.model = Objects.requireNonNull(model); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(PROJECT_ID_FIELD, model.getServiceSettings().projectId()); + unifiedRequestEntity.toXContent( + builder, + UnifiedCompletionRequest.withMaxTokensAndSkipStreamOptionsField(model.getServiceSettings().modelId(), params) + ); + builder.endObject(); + return builder; + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/request/IbmWatsonxUtils.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/request/IbmWatsonxUtils.java index 7c4dc526ca509..856c91f509b26 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/request/IbmWatsonxUtils.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/request/IbmWatsonxUtils.java @@ -14,6 +14,7 @@ public class IbmWatsonxUtils { public static final String TEXT = "text"; public static final String EMBEDDINGS = "embeddings"; public static final String RERANKS = "reranks"; + public static final String COMPLETIONS = "chat"; private IbmWatsonxUtils() {} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/IbmWatsonCompletionResponseHandler.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/IbmWatsonCompletionResponseHandler.java new file mode 100644 index 0000000000000..19e98f668ee4b --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/IbmWatsonCompletionResponseHandler.java @@ -0,0 +1,28 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.openai; + +import org.elasticsearch.xpack.inference.external.http.retry.ResponseParser; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.response.IbmWatsonxErrorResponseEntity; + +/** + * Handles non-streaming chat completion responses for Ibm foundation models, extending the OpenAI chat completion response handler. + * This class is specifically designed to handle Ibm Watson error response format. + */ +public class IbmWatsonCompletionResponseHandler extends OpenAiChatCompletionResponseHandler { + + /** + * Constructs an IbmWatsonCompletionResponseHandler with the specified request type and response parser. + * + * @param requestType The type of request being handled (e.g., "IBM WatsonX completions). + * @param parseFunction The function to parse the response. + */ + public IbmWatsonCompletionResponseHandler(String requestType, ResponseParser parseFunction) { + super(requestType, parseFunction, IbmWatsonxErrorResponseEntity::fromResponse); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/unified/UnifiedChatCompletionRequestEntityTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/unified/UnifiedChatCompletionRequestEntityTests.java index 63e83f97fe026..9badb27204e25 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/unified/UnifiedChatCompletionRequestEntityTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/unified/UnifiedChatCompletionRequestEntityTests.java @@ -293,6 +293,7 @@ public void testSerializationWithNestedObjects() throws IOException { String randomStop = "stop" + random.nextInt(1000); float randomTemperature = (float) ((float) Math.round(0.5d + (double) random.nextFloat() * 0.5d * 100000d) / 100000d); float randomTopP = (float) ((float) Math.round(0.5d + (double) random.nextFloat() * 0.5d * 100000d) / 100000d); + int randomTimeLimit = random.nextInt(1000); UnifiedCompletionRequest.Message message = new UnifiedCompletionRequest.Message( new UnifiedCompletionRequest.ContentString(randomContent), diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxServiceTests.java index 35dbcdd6aa99f..b892bbc9aaf66 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxServiceTests.java @@ -919,7 +919,7 @@ public void testGetConfiguration() throws Exception { { "service": "watsonxai", "name": "IBM Watsonx", - "task_types": ["text_embedding"], + "task_types": ["text_embedding", "completion", "chat_completion"], "configurations": { "project_id": { "description": "", @@ -928,7 +928,7 @@ public void testGetConfiguration() throws Exception { "sensitive": false, "updatable": false, "type": "str", - "supported_task_types": ["text_embedding"] + "supported_task_types": ["text_embedding", "completion", "chat_completion"] }, "model_id": { "description": "The name of the model to use for the inference task.", @@ -937,7 +937,7 @@ public void testGetConfiguration() throws Exception { "sensitive": false, "updatable": false, "type": "str", - "supported_task_types": ["text_embedding"] + "supported_task_types": ["text_embedding", "completion", "chat_completion"] }, "api_version": { "description": "The IBM Watsonx API version ID to use.", @@ -946,7 +946,7 @@ public void testGetConfiguration() throws Exception { "sensitive": false, "updatable": false, "type": "str", - "supported_task_types": ["text_embedding"] + "supported_task_types": ["text_embedding", "completion", "chat_completion"] }, "max_input_tokens": { "description": "Allows you to specify the maximum number of tokens per input.", @@ -964,7 +964,7 @@ public void testGetConfiguration() throws Exception { "sensitive": false, "updatable": false, "type": "str", - "supported_task_types": ["text_embedding"] + "supported_task_types": ["text_embedding", "completion", "chat_completion"] } } } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/action/IbmWatsonxChatCompletionActionTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/action/IbmWatsonxChatCompletionActionTests.java new file mode 100644 index 0000000000000..1e13179e6068e --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/action/IbmWatsonxChatCompletionActionTests.java @@ -0,0 +1,168 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.ibmwatsonx.action; + +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.ElasticsearchStatusException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.support.PlainActionFuture; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.http.MockResponse; +import org.elasticsearch.test.http.MockWebServer; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.xpack.core.inference.action.InferenceAction; +import org.elasticsearch.xpack.inference.external.action.ExecutableAction; +import org.elasticsearch.xpack.inference.external.action.SingleInputSenderExecutableAction; +import org.elasticsearch.xpack.inference.external.http.HttpClientManager; +import org.elasticsearch.xpack.inference.external.http.sender.ChatCompletionInput; +import org.elasticsearch.xpack.inference.external.http.sender.GenericRequestManager; +import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSenderTests; +import org.elasticsearch.xpack.inference.external.http.sender.Sender; +import org.elasticsearch.xpack.inference.external.http.sender.UnifiedChatInput; +import org.elasticsearch.xpack.inference.logging.ThrottlerManager; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.request.IbmWatsonxChatCompletionRequest; +import org.junit.After; +import org.junit.Before; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.List; +import java.util.concurrent.TimeUnit; + +import static org.elasticsearch.xpack.inference.Utils.inferenceUtilityPool; +import static org.elasticsearch.xpack.inference.Utils.mockClusterServiceEmpty; +import static org.elasticsearch.xpack.inference.external.action.ActionUtils.constructFailedToSendRequestMessage; +import static org.elasticsearch.xpack.inference.external.http.Utils.getUrl; +import static org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSenderTests.createSender; +import static org.elasticsearch.xpack.inference.services.ibmwatsonx.action.IbmWatsonxActionCreator.COMPLETION_HANDLER; +import static org.elasticsearch.xpack.inference.services.ibmwatsonx.action.IbmWatsonxActionCreator.USER_ROLE; +import static org.elasticsearch.xpack.inference.services.ibmwatsonx.completion.IbmWatsonxChatCompletionModelTests.createModel; +import static org.hamcrest.Matchers.is; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; + +public class IbmWatsonxChatCompletionActionTests extends ESTestCase { + private static final TimeValue TIMEOUT = new TimeValue(30, TimeUnit.SECONDS); + private final MockWebServer webServer = new MockWebServer(); + private ThreadPool threadPool; + private HttpClientManager clientManager; + + @Before + public void init() throws Exception { + webServer.start(); + threadPool = createThreadPool(inferenceUtilityPool()); + clientManager = HttpClientManager.create(Settings.EMPTY, threadPool, mockClusterServiceEmpty(), mock(ThrottlerManager.class)); + } + + @After + public void shutdown() throws IOException { + clientManager.close(); + terminate(threadPool); + webServer.close(); + } + + public void testExecute_ThrowsElasticsearchException() throws URISyntaxException { + var sender = mock(Sender.class); + doThrow(new ElasticsearchException("failed")).when(sender).send(any(), any(), any(), any()); + + var action = createAction(getUrl(webServer), sender); + + PlainActionFuture listener = new PlainActionFuture<>(); + action.execute(new ChatCompletionInput(List.of("abc")), InferenceAction.Request.DEFAULT_TIMEOUT, listener); + + var thrownException = expectThrows(ElasticsearchException.class, () -> listener.actionGet(TIMEOUT)); + + assertThat(thrownException.getMessage(), is("failed")); + } + + public void testExecute_ThrowsElasticsearchException_WhenSenderOnFailureIsCalled() throws URISyntaxException { + var sender = mock(Sender.class); + + doAnswer(invocation -> { + ActionListener listener = invocation.getArgument(3); + listener.onFailure(new IllegalStateException("failed")); + + return Void.TYPE; + }).when(sender).send(any(), any(), any(), any()); + + var action = createAction(getUrl(webServer), sender); + + PlainActionFuture listener = new PlainActionFuture<>(); + action.execute(new ChatCompletionInput(List.of("abc")), InferenceAction.Request.DEFAULT_TIMEOUT, listener); + + var thrownException = expectThrows(ElasticsearchException.class, () -> listener.actionGet(TIMEOUT)); + + assertThat(thrownException.getMessage(), is("Failed to send watsonx chat completions request. Cause: failed")); + } + + public void testExecute_ThrowsException_WhenInputIsGreaterThanOne() throws IOException, URISyntaxException { + var senderFactory = HttpRequestSenderTests.createSenderFactory(threadPool, clientManager); + + try (var sender = createSender(senderFactory)) { + sender.start(); + + String responseJson = """ + { + "id": "9d80f26810ac4e9582f927fcf0512ec7", + "object": "chat.completion", + "created": 1748596419, + "model": "modelId", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "tool_calls": null, + "content": "result content" + }, + "finish_reason": "length", + "logprobs": null + } + ], + "usage": { + "prompt_tokens": 10, + "total_tokens": 11, + "completion_tokens": 1 + } + } + """; + + webServer.enqueue(new MockResponse().setResponseCode(200).setBody(responseJson)); + + var action = createAction(getUrl(webServer), sender); + + PlainActionFuture listener = new PlainActionFuture<>(); + action.execute(new ChatCompletionInput(List.of("abc", "def")), InferenceAction.Request.DEFAULT_TIMEOUT, listener); + + var thrownException = expectThrows(ElasticsearchStatusException.class, () -> listener.actionGet(TIMEOUT)); + + assertThat(thrownException.getMessage(), is("watsonx chat completions only accepts 1 input")); + assertThat(thrownException.status(), is(RestStatus.BAD_REQUEST)); + } + } + + private ExecutableAction createAction(String url, Sender sender) throws URISyntaxException { + var model = createModel(new URI("abc.com"), "apiVersion", "modelId", "projectId", "apiKey"); + var manager = new GenericRequestManager<>( + threadPool, + model, + COMPLETION_HANDLER, + inputs -> new IbmWatsonxChatCompletionRequest(new UnifiedChatInput(inputs, USER_ROLE), model), + ChatCompletionInput.class + ); + var errorMessage = constructFailedToSendRequestMessage("watsonx chat completions"); + return new SingleInputSenderExecutableAction(sender, manager, errorMessage, "watsonx chat completions"); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/completion/IbmWatsonxChatCompletionModelTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/completion/IbmWatsonxChatCompletionModelTests.java new file mode 100644 index 0000000000000..c573f8daf23b6 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/completion/IbmWatsonxChatCompletionModelTests.java @@ -0,0 +1,30 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.ibmwatsonx.completion; + +import org.elasticsearch.common.settings.SecureString; +import org.elasticsearch.inference.TaskType; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.inference.services.settings.DefaultSecretSettings; + +import java.net.URI; +import java.net.URISyntaxException; + +public class IbmWatsonxChatCompletionModelTests extends ESTestCase { + + public static IbmWatsonxChatCompletionModel createModel(URI uri, String apiVersion, String modelId, String projectId, String apiKey) + throws URISyntaxException { + return new IbmWatsonxChatCompletionModel( + "id", + TaskType.COMPLETION, + "service", + new IbmWatsonxChatCompletionServiceSettings(uri, apiVersion, modelId, projectId, null), + new DefaultSecretSettings(new SecureString(apiKey.toCharArray())) + ); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/completion/IbmWatsonxChatCompletionServiceSettingsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/completion/IbmWatsonxChatCompletionServiceSettingsTests.java new file mode 100644 index 0000000000000..c143cd6ae21e4 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/completion/IbmWatsonxChatCompletionServiceSettingsTests.java @@ -0,0 +1,111 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.ibmwatsonx.completion; + +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.test.AbstractWireSerializingTestCase; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; +import org.elasticsearch.xpack.inference.services.ServiceFields; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxServiceFields; +import org.elasticsearch.xpack.inference.services.settings.RateLimitSettingsTests; + +import java.io.IOException; +import java.net.URI; +import java.util.HashMap; +import java.util.Map; + +import static org.elasticsearch.xpack.inference.MatchersUtils.equalToIgnoringWhitespaceInJsonString; +import static org.hamcrest.Matchers.is; + +public class IbmWatsonxChatCompletionServiceSettingsTests extends AbstractWireSerializingTestCase { + + private static IbmWatsonxChatCompletionServiceSettings createRandom() { + URI uri = null; + try { + uri = new URI("abc.com"); + } catch (Exception ignored) {} + + return new IbmWatsonxChatCompletionServiceSettings( + uri, + randomAlphaOfLength(8), + randomAlphaOfLength(8), + randomAlphaOfLength(8), + randomFrom(RateLimitSettingsTests.createRandom(), null) + ); + } + + public void testFromMap_Request_CreatesSettingsCorrectly() { + var model = randomAlphaOfLength(8); + var projectId = randomAlphaOfLength(8); + URI uri = null; + try { + uri = new URI("abc.com"); + } catch (Exception ignored) {} + var apiVersion = randomAlphaOfLength(8); + + var serviceSettings = IbmWatsonxChatCompletionServiceSettings.fromMap( + new HashMap<>( + Map.of( + ServiceFields.URL, + uri.toString(), + IbmWatsonxServiceFields.API_VERSION, + apiVersion, + ServiceFields.MODEL_ID, + model, + IbmWatsonxServiceFields.PROJECT_ID, + projectId + ) + ), + ConfigurationParseContext.PERSISTENT + ); + + assertThat(serviceSettings, is(new IbmWatsonxChatCompletionServiceSettings(uri, apiVersion, model, projectId, null))); + } + + public void testToXContent_WritesAllValues() throws IOException { + URI uri = null; + try { + uri = new URI("abc.com"); + } catch (Exception ignored) {} + var entity = new IbmWatsonxChatCompletionServiceSettings(uri, "2024-05-02", "model", "project_id", null); + + XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON); + entity.toXContent(builder, null); + String xContentResult = Strings.toString(builder); + + assertThat(xContentResult, equalToIgnoringWhitespaceInJsonString(""" + { + "url":"abc.com", + "api_version":"2024-05-02", + "model_id":"model", + "project_id":"project_id", + "rate_limit": { + "requests_per_minute":120 + } + }""")); + } + + @Override + protected Writeable.Reader instanceReader() { + return IbmWatsonxChatCompletionServiceSettings::new; + } + + @Override + protected IbmWatsonxChatCompletionServiceSettings createTestInstance() { + return createRandom(); + } + + @Override + protected IbmWatsonxChatCompletionServiceSettings mutateInstance(IbmWatsonxChatCompletionServiceSettings instance) throws IOException { + return randomValueOtherThan(instance, IbmWatsonxChatCompletionServiceSettingsTests::createRandom); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/request/IbmWatsonxChatCompletionRequestEntityTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/request/IbmWatsonxChatCompletionRequestEntityTests.java new file mode 100644 index 0000000000000..39b607db9cacb --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/request/IbmWatsonxChatCompletionRequestEntityTests.java @@ -0,0 +1,66 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.ibmwatsonx.request; + +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.xcontent.XContentHelper; +import org.elasticsearch.inference.UnifiedCompletionRequest; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xcontent.ToXContent; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.json.JsonXContent; +import org.elasticsearch.xpack.inference.external.http.sender.UnifiedChatInput; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.completion.IbmWatsonxChatCompletionModel; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.ArrayList; + +import static org.elasticsearch.xpack.inference.services.ibmwatsonx.completion.IbmWatsonxChatCompletionModelTests.createModel; + +public class IbmWatsonxChatCompletionRequestEntityTests extends ESTestCase { + + private static final String ROLE = "user"; + + public void testModelUserFieldsSerialization() throws IOException, URISyntaxException { + UnifiedCompletionRequest.Message message = new UnifiedCompletionRequest.Message( + new UnifiedCompletionRequest.ContentString("test content"), + ROLE, + null, + null + ); + var messageList = new ArrayList(); + messageList.add(message); + + var unifiedRequest = UnifiedCompletionRequest.of(messageList); + + UnifiedChatInput unifiedChatInput = new UnifiedChatInput(unifiedRequest, true); + IbmWatsonxChatCompletionModel model = createModel(new URI("abc.com"), "apiVersion", "modelId", "projectId", "apiKey"); + + IbmWatsonxChatCompletionRequestEntity entity = new IbmWatsonxChatCompletionRequestEntity(unifiedChatInput, model); + + XContentBuilder builder = JsonXContent.contentBuilder(); + entity.toXContent(builder, ToXContent.EMPTY_PARAMS); + String expectedJson = """ + { + "project_id": "projectId", + "messages": [ + { + "content": "test content", + "role": "user" + } + ], + "model": "modelId", + "n": 1, + "stream": true + } + """; + assertEquals(XContentHelper.stripWhitespace(expectedJson), Strings.toString(builder)); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/request/IbmWatsonxChatCompletionRequestTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/request/IbmWatsonxChatCompletionRequestTests.java new file mode 100644 index 0000000000000..f730961452787 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/request/IbmWatsonxChatCompletionRequestTests.java @@ -0,0 +1,96 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.ibmwatsonx.request; + +import org.apache.http.HttpHeaders; +import org.apache.http.client.methods.HttpPost; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.inference.external.http.sender.UnifiedChatInput; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.completion.IbmWatsonxChatCompletionModel; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.completion.IbmWatsonxChatCompletionModelTests; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.xpack.inference.external.http.Utils.entityAsMap; +import static org.hamcrest.Matchers.aMapWithSize; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.is; + +public class IbmWatsonxChatCompletionRequestTests extends ESTestCase { + private static final String AUTH_HEADER_VALUE = "foo"; + private static final String API_COMPLETIONS_PATH = "https://abc.com/ml/v1/text/chat?version=apiVersion"; + + public void testCreateRequest_WithStreaming() throws IOException, URISyntaxException { + var request = createRequest("secret", randomAlphaOfLength(15), "model", true); + var httpRequest = request.createHttpRequest(); + + assertThat(httpRequest.httpRequestBase(), instanceOf(HttpPost.class)); + var httpPost = (HttpPost) httpRequest.httpRequestBase(); + + var requestMap = entityAsMap(httpPost.getEntity().getContent()); + assertThat(requestMap.get("stream"), is(true)); + } + + public void testTruncate_DoesNotReduceInputTextSize() throws IOException, URISyntaxException { + String input = randomAlphaOfLength(5); + var request = createRequest("secret", input, "model", true); + var truncatedRequest = request.truncate(); + assertThat(request.getURI().toString(), is(API_COMPLETIONS_PATH)); + + var httpRequest = truncatedRequest.createHttpRequest(); + assertThat(httpRequest.httpRequestBase(), instanceOf(HttpPost.class)); + + var httpPost = (HttpPost) httpRequest.httpRequestBase(); + var requestMap = entityAsMap(httpPost.getEntity().getContent()); + assertThat(requestMap, aMapWithSize(5)); + + assertThat(requestMap.get("messages"), is(List.of(Map.of("role", "user", "content", input)))); + assertThat(requestMap.get("model"), is("model")); + assertThat(requestMap.get("n"), is(1)); + assertTrue((Boolean) requestMap.get("stream")); + assertNull(requestMap.get("stream_options")); + } + + public void testTruncationInfo_ReturnsNull() throws URISyntaxException { + var request = createRequest("secret", randomAlphaOfLength(5), "model", true); + assertNull(request.getTruncationInfo()); + } + + public static IbmWatsonxChatCompletionRequest createRequest(String apiKey, String input, @Nullable String model) + throws URISyntaxException { + return createRequest(apiKey, input, model, false); + } + + public static IbmWatsonxChatCompletionRequest createRequest(String apiKey, String input, @Nullable String model, boolean stream) + throws URISyntaxException { + var chatCompletionModel = IbmWatsonxChatCompletionModelTests.createModel( + new URI("abc.com"), + "apiVersion", + model, + "projectId", + apiKey + ); + return new IbmWatsonxChatCompletionWithoutAuthRequest(new UnifiedChatInput(List.of(input), "user", stream), chatCompletionModel); + } + + private static class IbmWatsonxChatCompletionWithoutAuthRequest extends IbmWatsonxChatCompletionRequest { + IbmWatsonxChatCompletionWithoutAuthRequest(UnifiedChatInput input, IbmWatsonxChatCompletionModel model) { + super(input, model); + } + + @Override + public void decorateWithAuth(HttpPost httpPost) { + httpPost.setHeader(HttpHeaders.AUTHORIZATION, AUTH_HEADER_VALUE); + } + } +} From f92f348e33a3a49cf0efaff72f63b3a333738fa5 Mon Sep 17 00:00:00 2001 From: Evgenii_Kazannik Date: Tue, 10 Jun 2025 15:20:19 +0200 Subject: [PATCH 2/4] Apply suggestions --- .../services/cohere/rerank/CohereRerankModel.java | 4 ++-- .../ibmwatsonx/IbmWatsonxCompletionResponseHandler.java | 2 +- .../ibmwatsonx/IbmWatsonxEmbeddingsRequestManager.java | 2 +- .../ibmwatsonx/IbmWatsonxRerankRequestManager.java | 2 +- .../inference/services/ibmwatsonx/IbmWatsonxService.java | 2 +- .../services/ibmwatsonx/IbmWatsonxServiceFields.java | 2 +- .../ibmwatsonx/action/IbmWatsonxActionCreator.java | 8 ++++---- .../ibmwatsonx/action/IbmWatsonxActionVisitor.java | 4 ++-- .../completion/IbmWatsonxChatCompletionModel.java | 2 +- .../IbmWatsonxChatCompletionServiceSettings.java | 2 +- .../embeddings/IbmWatsonxEmbeddingsServiceSettings.java | 2 +- .../request/IbmWatsonxChatCompletionRequest.java | 4 ++-- .../services/ibmwatsonx/rerank/IbmWatsonxRerankModel.java | 4 ++-- .../rerank/IbmWatsonxRerankServiceSettings.java | 2 +- .../response/IbmWatsonxRankedResponseEntity.java | 4 ++-- .../services/jinaai/rerank/JinaAIRerankModel.java | 4 ++-- .../openai/IbmWatsonCompletionResponseHandler.java | 2 +- .../services/voyageai/rerank/VoyageAIRerankModel.java | 4 ++-- 18 files changed, 28 insertions(+), 28 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/cohere/rerank/CohereRerankModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/cohere/rerank/CohereRerankModel.java index ca853a2d28909..2916afe94de80 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/cohere/rerank/CohereRerankModel.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/cohere/rerank/CohereRerankModel.java @@ -87,8 +87,8 @@ public DefaultSecretSettings getSecretSettings() { /** * Accepts a visitor to create an executable action. The returned action will not return documents in the response. - * @param visitor _ - * @param taskSettings _ + * @param visitor Interface for creating {@link ExecutableAction} instances for Cohere models. + * @param taskSettings Settings in the request to override the model's defaults * @return the rerank action */ @Override diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxCompletionResponseHandler.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxCompletionResponseHandler.java index 79bfa1bb79cac..3ef5f2dd93305 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxCompletionResponseHandler.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxCompletionResponseHandler.java @@ -16,7 +16,7 @@ public class IbmWatsonxCompletionResponseHandler extends OpenAiChatCompletionRes /** * Constructs a IbmWatsonxCompletionResponseHandler with the specified request type and response parser. * - * @param requestType The type of request being handled (e.g., "Ibm WatsonX completions"). + * @param requestType The type of request being handled (e.g., "IBM Watsonx completions"). * @param parseFunction The function to parse the response. */ public IbmWatsonxCompletionResponseHandler(String requestType, ResponseParser parseFunction) { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxEmbeddingsRequestManager.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxEmbeddingsRequestManager.java index b7c679d3cda54..520ec9ad6fc6f 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxEmbeddingsRequestManager.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxEmbeddingsRequestManager.java @@ -35,7 +35,7 @@ public class IbmWatsonxEmbeddingsRequestManager extends IbmWatsonxRequestManager private static final ResponseHandler HANDLER = createEmbeddingsHandler(); private static ResponseHandler createEmbeddingsHandler() { - return new IbmWatsonxResponseHandler("ibm watsonx embeddings", IbmWatsonxEmbeddingsResponseEntity::fromResponse); + return new IbmWatsonxResponseHandler("IBM Watsonx embeddings", IbmWatsonxEmbeddingsResponseEntity::fromResponse); } private final IbmWatsonxEmbeddingsModel model; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxRerankRequestManager.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxRerankRequestManager.java index d62722a2b593b..c89878910231c 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxRerankRequestManager.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxRerankRequestManager.java @@ -31,7 +31,7 @@ public class IbmWatsonxRerankRequestManager extends IbmWatsonxRequestManager { private static ResponseHandler createIbmWatsonxResponseHandler() { return new IbmWatsonxResponseHandler( - "ibm watsonx rerank", + "IBM Watsonx rerank", (request, response) -> IbmWatsonxRankedResponseEntity.fromResponse(response) ); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxService.java index 253099254e035..8c7e2283361c8 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxService.java @@ -79,7 +79,7 @@ public class IbmWatsonxService extends SenderService { TaskType.CHAT_COMPLETION ); private static final ResponseHandler UNIFIED_CHAT_COMPLETION_HANDLER = new IbmWatsonUnifiedChatCompletionResponseHandler( - "ibm watsonx chat completions", + "IBM Watsonx chat completions", OpenAiChatCompletionResponseEntity::fromResponse ); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxServiceFields.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxServiceFields.java index 01c0a0abe6272..bfeabfc054986 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxServiceFields.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxServiceFields.java @@ -11,7 +11,7 @@ public class IbmWatsonxServiceFields { /** * Taken from - * Introduction to IBM watsonx.ai as a Service + * Introduction to IBM Watsonx.ai as a Service */ static final int EMBEDDING_MAX_BATCH_SIZE = 1000; public static final String API_VERSION = "api_version"; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/action/IbmWatsonxActionCreator.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/action/IbmWatsonxActionCreator.java index 703f7f298230a..bf5b885dd7eac 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/action/IbmWatsonxActionCreator.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/action/IbmWatsonxActionCreator.java @@ -42,7 +42,7 @@ public class IbmWatsonxActionCreator implements IbmWatsonxActionVisitor { private final Sender sender; private final ServiceComponents serviceComponents; - static final String COMPLETION_REQUEST_TYPE = "IBM WatsonX completions"; + static final String COMPLETION_REQUEST_TYPE = "IBM Watsonx completions"; static final String USER_ROLE = "user"; static final ResponseHandler COMPLETION_HANDLER = new IbmWatsonxCompletionResponseHandler( COMPLETION_REQUEST_TYPE, @@ -56,7 +56,7 @@ public IbmWatsonxActionCreator(Sender sender, ServiceComponents serviceComponent @Override public ExecutableAction create(IbmWatsonxEmbeddingsModel model, Map taskSettings) { - var failedToSendRequestErrorMessage = constructFailedToSendRequestMessage("IBM WatsonX embeddings"); + var failedToSendRequestErrorMessage = constructFailedToSendRequestMessage("IBM Watsonx embeddings"); return new SenderExecutableAction( sender, getEmbeddingsRequestManager(model, serviceComponents.truncator(), serviceComponents.threadPool()), @@ -95,13 +95,13 @@ protected IbmWatsonxEmbeddingsRequestManager getEmbeddingsRequestManager( } /** - * Builds an error message for Ibm Watsonx actions. + * Builds an error message for IBM Watsonx actions. * * @param requestType The type of request (e.g. COMPLETION, EMBEDDING, RERANK). * @param inferenceId The ID of the inference entity. * @return A formatted error message. */ public static String buildErrorMessage(TaskType requestType, String inferenceId) { - return format("Failed to send Ibm Watsonx %s request from inference entity id [%s]", requestType.toString(), inferenceId); + return format("Failed to send IBM Watsonx %s request from inference entity id [%s]", requestType.toString(), inferenceId); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/action/IbmWatsonxActionVisitor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/action/IbmWatsonxActionVisitor.java index 687abdef19638..0c2581ac33902 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/action/IbmWatsonxActionVisitor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/action/IbmWatsonxActionVisitor.java @@ -15,9 +15,9 @@ import java.util.Map; /** - * Interface for creating {@link ExecutableAction} instances for Watsonx models. + * Interface for creating {@link ExecutableAction} instances for IBM Watsonx models. *

- * This interface is used to create {@link ExecutableAction} instances for different types of Watsonx models, such as + * This interface is used to create {@link ExecutableAction} instances for different types of IBM Watsonx models, such as * {@link IbmWatsonxEmbeddingsModel} and {@link IbmWatsonxRerankModel} and {@link IbmWatsonxChatCompletionModel}. */ public interface IbmWatsonxActionVisitor { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/completion/IbmWatsonxChatCompletionModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/completion/IbmWatsonxChatCompletionModel.java index 6c12adba8b8cc..dac4e664923a2 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/completion/IbmWatsonxChatCompletionModel.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/completion/IbmWatsonxChatCompletionModel.java @@ -132,7 +132,7 @@ public URI uri() { /** * Accepts a visitor to create an executable action. The returned action will not return documents in the response. - * @param visitor _ + * @param visitor Interface for creating {@link ExecutableAction} instances for IBM Watsonx models. * @return the completion action */ public ExecutableAction accept(IbmWatsonxActionVisitor visitor, Map taskSettings) { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/completion/IbmWatsonxChatCompletionServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/completion/IbmWatsonxChatCompletionServiceSettings.java index c731c477ebdbe..0492a626787cf 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/completion/IbmWatsonxChatCompletionServiceSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/completion/IbmWatsonxChatCompletionServiceSettings.java @@ -44,7 +44,7 @@ public class IbmWatsonxChatCompletionServiceSettings extends FilteredXContentObj /** * Rate limits are defined at * Watson Machine Learning plans. - * For Lite plan, you've 120 requests per minute. + * For the Lite plan, the limit is 120 requests per minute. */ private static final RateLimitSettings DEFAULT_RATE_LIMIT_SETTINGS = new RateLimitSettings(120); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/embeddings/IbmWatsonxEmbeddingsServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/embeddings/IbmWatsonxEmbeddingsServiceSettings.java index 3a9625aef31c7..404df1c9cb4b4 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/embeddings/IbmWatsonxEmbeddingsServiceSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/embeddings/IbmWatsonxEmbeddingsServiceSettings.java @@ -52,7 +52,7 @@ public class IbmWatsonxEmbeddingsServiceSettings extends FilteredXContentObject /** * Rate limits are defined at * Watson Machine Learning plans. - * For Lite plan, you've 120 requests per minute. + * For the Lite plan, the limit is 120 requests per minute. */ private static final RateLimitSettings DEFAULT_RATE_LIMIT_SETTINGS = new RateLimitSettings(120); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/request/IbmWatsonxChatCompletionRequest.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/request/IbmWatsonxChatCompletionRequest.java index 1c63c6ea6262b..b8489c5105125 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/request/IbmWatsonxChatCompletionRequest.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/request/IbmWatsonxChatCompletionRequest.java @@ -60,13 +60,13 @@ public void decorateWithAuth(HttpPost httpPost) { @Override public Request truncate() { - // No truncation for Ibm WatsonX chat completions + // No truncation for IBM Watsonx chat completions return this; } @Override public boolean[] getTruncationInfo() { - // No truncation for Ibm WatsonX chat completions + // No truncation for IBM Watsonx chat completions return null; } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/rerank/IbmWatsonxRerankModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/rerank/IbmWatsonxRerankModel.java index 15dd648c2fa1a..a32f21e4c2af2 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/rerank/IbmWatsonxRerankModel.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/rerank/IbmWatsonxRerankModel.java @@ -100,8 +100,8 @@ public URI uri() { /** * Accepts a visitor to create an executable action. The returned action will not return documents in the response. - * @param visitor _ - * @param taskSettings _ + * @param visitor Interface for creating {@link ExecutableAction} instances for IBM Watsonx models. + * @param taskSettings Settings in the request to override the model's defaults * @return the rerank action */ @Override diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/rerank/IbmWatsonxRerankServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/rerank/IbmWatsonxRerankServiceSettings.java index 969622f9ba54f..b4b183736e841 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/rerank/IbmWatsonxRerankServiceSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/rerank/IbmWatsonxRerankServiceSettings.java @@ -41,7 +41,7 @@ public class IbmWatsonxRerankServiceSettings extends FilteredXContentObject impl /** * Rate limits are defined at * Watson Machine Learning plans. - * For Lite plan, you've 120 requests per minute. + * For the Lite plan, the limit is 120 requests per minute. */ private static final RateLimitSettings DEFAULT_RATE_LIMIT_SETTINGS = new RateLimitSettings(120); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/response/IbmWatsonxRankedResponseEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/response/IbmWatsonxRankedResponseEntity.java index 729e6ef980350..9d8edacab5c25 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/response/IbmWatsonxRankedResponseEntity.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/response/IbmWatsonxRankedResponseEntity.java @@ -32,7 +32,7 @@ public class IbmWatsonxRankedResponseEntity { private static final Logger logger = LogManager.getLogger(IbmWatsonxRankedResponseEntity.class); /** - * Parses the Ibm Watsonx ranked response. + * Parses the IBM Watsonx ranked response. * * For a request like: * "model": "rerank-english-v2.0", @@ -71,7 +71,7 @@ public class IbmWatsonxRankedResponseEntity { * ], * } * - * @param response the http response from ibm watsonx + * @param response the http response from IBM Watsonx * @return the parsed response * @throws IOException if there is an error parsing the response */ diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/jinaai/rerank/JinaAIRerankModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/jinaai/rerank/JinaAIRerankModel.java index a1fed50753627..a3404ed308835 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/jinaai/rerank/JinaAIRerankModel.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/jinaai/rerank/JinaAIRerankModel.java @@ -84,8 +84,8 @@ public DefaultSecretSettings getSecretSettings() { /** * Accepts a visitor to create an executable action. The returned action will not return documents in the response. - * @param visitor _ - * @param taskSettings _ + * @param visitor Interface for creating {@link ExecutableAction} instances for Jina AI models. + * @param taskSettings Settings in the request to override the model's defaults * @return the rerank action */ @Override diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/IbmWatsonCompletionResponseHandler.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/IbmWatsonCompletionResponseHandler.java index 19e98f668ee4b..8e33b0e366962 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/IbmWatsonCompletionResponseHandler.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/IbmWatsonCompletionResponseHandler.java @@ -19,7 +19,7 @@ public class IbmWatsonCompletionResponseHandler extends OpenAiChatCompletionResp /** * Constructs an IbmWatsonCompletionResponseHandler with the specified request type and response parser. * - * @param requestType The type of request being handled (e.g., "IBM WatsonX completions). + * @param requestType The type of request being handled (e.g., "IBM Watsonx completions). * @param parseFunction The function to parse the response. */ public IbmWatsonCompletionResponseHandler(String requestType, ResponseParser parseFunction) { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/voyageai/rerank/VoyageAIRerankModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/voyageai/rerank/VoyageAIRerankModel.java index 7e58843c78f18..be391f05aa4cc 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/voyageai/rerank/VoyageAIRerankModel.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/voyageai/rerank/VoyageAIRerankModel.java @@ -109,8 +109,8 @@ public DefaultSecretSettings getSecretSettings() { /** * Accepts a visitor to create an executable action. The returned action will not return documents in the response. - * @param visitor _ - * @param taskSettings _ + * @param visitor Interface for creating {@link ExecutableAction} instances for IBM Voyage AI models. + * @param taskSettings Settings in the request to override the model's defaults * @return the rerank action */ @Override From 136416dd7d14c71b26db0ba3ce3f266792342db6 Mon Sep 17 00:00:00 2001 From: Evgenii_Kazannik Date: Wed, 2 Jul 2025 16:34:21 +0200 Subject: [PATCH 3/4] remove ibm watsonx transport version constant --- server/src/main/java/org/elasticsearch/TransportVersions.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index 80569dda5934c..a9eb1d9396274 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -210,7 +210,6 @@ static TransportVersion def(int id) { public static final TransportVersion ML_INFERENCE_COHERE_API_VERSION_8_19 = def(8_841_0_60); public static final TransportVersion ESQL_DOCUMENTS_FOUND_AND_VALUES_LOADED_8_19 = def(8_841_0_61); public static final TransportVersion ESQL_PROFILE_INCLUDE_PLAN_8_19 = def(8_841_0_62); - public static final TransportVersion ML_INFERENCE_IBM_WATSONX_COMPLETION_ADDED_8_19 = def(8_841_0_63); public static final TransportVersion V_9_0_0 = def(9_000_0_09); public static final TransportVersion INITIAL_ELASTICSEARCH_9_0_1 = def(9_000_0_10); public static final TransportVersion INITIAL_ELASTICSEARCH_9_0_2 = def(9_000_0_11); From b219e7274f049ae171ff1948699e3988af1014d1 Mon Sep 17 00:00:00 2001 From: Evgenii_Kazannik Date: Wed, 2 Jul 2025 19:03:40 +0200 Subject: [PATCH 4/4] update transport version --- server/src/main/java/org/elasticsearch/TransportVersions.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index f99befc71cf74..3c190e037b6ee 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -326,9 +326,8 @@ static TransportVersion def(int id) { public static final TransportVersion ML_INFERENCE_COHERE_API_VERSION = def(9_110_0_00); public static final TransportVersion ESQL_PROFILE_INCLUDE_PLAN = def(9_111_0_00); public static final TransportVersion MAPPINGS_IN_DATA_STREAMS = def(9_112_0_00); - public static final TransportVersion ML_INFERENCE_IBM_WATSONX_COMPLETION_ADDED = def(9_113_0_00); - public static final TransportVersion ESQL_SERIALIZE_TIMESERIES_FIELD_TYPE = def(9_113_0_00); + public static final TransportVersion ML_INFERENCE_IBM_WATSONX_COMPLETION_ADDED = def(9_114_0_00); /* * STOP! READ THIS FIRST! No, really, * ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _