Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
78ab1da
Add Ibm Granite Completion and Chat Completion support
Evgenii-Kazannik May 28, 2025
f92f348
Apply suggestions
Evgenii-Kazannik Jun 10, 2025
510e3c5
Merge branch 'main' into Add-IBM-Granite-support-for-completion-and-c…
Evgenii-Kazannik Jun 13, 2025
d6d19be
Merge branch 'main' into Add-IBM-Granite-support-for-completion-and-c…
Evgenii-Kazannik Jun 17, 2025
a6eaec6
Merge branch 'main' into Add-IBM-Granite-support-for-completion-and-c…
Evgenii-Kazannik Jun 23, 2025
9faf6f6
Merge branch 'main' into Add-IBM-Granite-support-for-completion-and-c…
Evgenii-Kazannik Jun 30, 2025
b23bdfb
Merge branch 'main' into Add-IBM-Granite-support-for-completion-and-c…
Evgenii-Kazannik Jul 2, 2025
136416d
remove ibm watsonx transport version constant
Evgenii-Kazannik Jul 2, 2025
ff6ccf5
Merge branch 'main' into Add-IBM-Granite-support-for-completion-and-c…
Evgenii-Kazannik Jul 2, 2025
80537a4
Merge branch 'main' into Add-IBM-Granite-support-for-completion-and-c…
Evgenii-Kazannik Jul 2, 2025
b44bab6
Merge branch 'main' into Add-IBM-Granite-support-for-completion-and-c…
Evgenii-Kazannik Jul 2, 2025
b1a76c3
Merge branch 'main' into Add-IBM-Granite-support-for-completion-and-c…
Evgenii-Kazannik Jul 2, 2025
1bf81ed
Merge branch 'main' into Add-IBM-Granite-support-for-completion-and-c…
Evgenii-Kazannik Jul 2, 2025
e70752f
Merge remote-tracking branch 'origin/Add-IBM-Granite-support-for-comp…
Evgenii-Kazannik Jul 2, 2025
b219e72
update transport version
Evgenii-Kazannik Jul 2, 2025
c950380
Merge branch 'main' into Add-IBM-Granite-support-for-completion-and-c…
Evgenii-Kazannik Jul 2, 2025
bf882a0
Merge branch 'main' into Add-IBM-Granite-support-for-completion-and-c…
Evgenii-Kazannik Jul 2, 2025
f9b086f
Merge branch 'main' into Add-IBM-Granite-support-for-completion-and-c…
Evgenii-Kazannik Jul 2, 2025
8e08b9e
Merge branch 'main' into Add-IBM-Granite-support-for-completion-and-c…
Evgenii-Kazannik Jul 2, 2025
08ab2f6
Merge branch 'main' into Add-IBM-Granite-support-for-completion-and-c…
Evgenii-Kazannik Jul 2, 2025
4ed865c
Merge branch 'main' into Add-IBM-Granite-support-for-completion-and-c…
Evgenii-Kazannik Jul 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/129146.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 129146
summary: "[ML] Add IBM watsonx Completion and Chat Completion support to the Inference Plugin"
area: Machine Learning
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,10 @@ static TransportVersion def(int id) {
public static final TransportVersion HEAP_USAGE_IN_CLUSTER_INFO = def(9_096_0_00);
public static final TransportVersion NONE_CHUNKING_STRATEGY = def(9_097_0_00);
public static final TransportVersion PROJECT_DELETION_GLOBAL_BLOCK = def(9_098_0_00);
public static final TransportVersion ML_INFERENCE_IBM_WATSONX_COMPLETION_ADDED = def(9_099_0_00);
public static final TransportVersion SECURITY_CLOUD_API_KEY_REALM_AND_TYPE = def(9_099_0_00);
public static final TransportVersion STATE_PARAM_GET_SNAPSHOT = def(9_100_0_00);
public static final TransportVersion PROJECT_ID_IN_SNAPSHOTS_DELETIONS_AND_REPO_CLEANUP = def(9_101_0_00);
public static final TransportVersion ML_INFERENCE_IBM_WATSONX_COMPLETION_ADDED = def(9_102_0_00);

/*
* STOP! READ THIS FIRST! No, really,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public class IbmWatsonxCompletionResponseHandler extends OpenAiChatCompletionRes
/**
* Constructs a IbmWatsonxCompletionResponseHandler with the specified request type and response parser.
*
* @param requestType The type of request being handled (e.g., "IBM Watsonx completions").
* @param requestType The type of request being handled (e.g., "IBM watsonx completions").
* @param parseFunction The function to parse the response.
*/
public IbmWatsonxCompletionResponseHandler(String requestType, ResponseParser parseFunction) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public class IbmWatsonxEmbeddingsRequestManager extends IbmWatsonxRequestManager
private static final ResponseHandler HANDLER = createEmbeddingsHandler();

private static ResponseHandler createEmbeddingsHandler() {
return new IbmWatsonxResponseHandler("IBM Watsonx embeddings", IbmWatsonxEmbeddingsResponseEntity::fromResponse);
return new IbmWatsonxResponseHandler("IBM watsonx embeddings", IbmWatsonxEmbeddingsResponseEntity::fromResponse);
}

private final IbmWatsonxEmbeddingsModel model;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,13 @@
import org.elasticsearch.xpack.inference.services.ibmwatsonx.action.IbmWatsonxActionVisitor;
import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings;

import java.net.URI;
import java.util.Map;
import java.util.Objects;

public abstract class IbmWatsonxModel extends RateLimitGroupingModel {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you clarify why this needs to be a RateLimitGroupingModel?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This type needs to be used in GenericRequestManager
which I believe is also going to handle the requests for other tasks in the future


private final IbmWatsonxRateLimitServiceSettings rateLimitServiceSettings;

protected URI uri;

public IbmWatsonxModel(
ModelConfigurations configurations,
ModelSecrets secrets,
Expand Down Expand Up @@ -56,7 +53,7 @@ public IbmWatsonxRateLimitServiceSettings rateLimitServiceSettings() {

@Override
public int rateLimitGroupingHash() {
return Objects.hash(uri);
return Objects.hash(this.rateLimitServiceSettings);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public class IbmWatsonxRerankRequestManager extends IbmWatsonxRequestManager {

private static ResponseHandler createIbmWatsonxResponseHandler() {
return new IbmWatsonxResponseHandler(
"IBM Watsonx rerank",
"IBM watsonx rerank",
(request, response) -> IbmWatsonxRankedResponseEntity.fromResponse(response)
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,14 +72,14 @@ public class IbmWatsonxService extends SenderService {

public static final String NAME = "watsonxai";

private static final String SERVICE_NAME = "IBM Watsonx";
private static final String SERVICE_NAME = "IBM watsonx";
private static final EnumSet<TaskType> supportedTaskTypes = EnumSet.of(
TaskType.TEXT_EMBEDDING,
TaskType.COMPLETION,
TaskType.CHAT_COMPLETION
);
private static final ResponseHandler UNIFIED_CHAT_COMPLETION_HANDLER = new IbmWatsonUnifiedChatCompletionResponseHandler(
"IBM Watsonx chat completions",
"IBM watsonx chat completions",
OpenAiChatCompletionResponseEntity::fromResponse
);

Expand Down Expand Up @@ -375,7 +375,7 @@ public static InferenceServiceConfiguration get() {

configurationMap.put(
API_VERSION,
new SettingsConfiguration.Builder(supportedTaskTypes).setDescription("The IBM Watsonx API version ID to use.")
new SettingsConfiguration.Builder(supportedTaskTypes).setDescription("The IBM watsonx API version ID to use.")
.setLabel("API Version")
.setRequired(true)
.setSensitive(false)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ public class IbmWatsonxServiceFields {

/**
* Taken from <a href="https://cloud.ibm.com/apidocs/watsonx-ai#text-embeddings">
* Introduction to IBM Watsonx.ai as a Service</a>
* Introduction to IBM watsonx.ai as a Service</a>
*/
static final int EMBEDDING_MAX_BATCH_SIZE = 1000;
public static final String API_VERSION = "api_version";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public class IbmWatsonxActionCreator implements IbmWatsonxActionVisitor {
private final Sender sender;
private final ServiceComponents serviceComponents;

static final String COMPLETION_REQUEST_TYPE = "IBM Watsonx completions";
static final String COMPLETION_REQUEST_TYPE = "IBM watsonx completions";
static final String USER_ROLE = "user";
static final ResponseHandler COMPLETION_HANDLER = new IbmWatsonxCompletionResponseHandler(
COMPLETION_REQUEST_TYPE,
Expand All @@ -56,7 +56,7 @@ public IbmWatsonxActionCreator(Sender sender, ServiceComponents serviceComponent

@Override
public ExecutableAction create(IbmWatsonxEmbeddingsModel model, Map<String, Object> taskSettings) {
var failedToSendRequestErrorMessage = constructFailedToSendRequestMessage("IBM Watsonx embeddings");
var failedToSendRequestErrorMessage = constructFailedToSendRequestMessage("IBM watsonx embeddings");
return new SenderExecutableAction(
sender,
getEmbeddingsRequestManager(model, serviceComponents.truncator(), serviceComponents.threadPool()),
Expand Down Expand Up @@ -95,13 +95,13 @@ protected IbmWatsonxEmbeddingsRequestManager getEmbeddingsRequestManager(
}

/**
* Builds an error message for IBM Watsonx actions.
* Builds an error message for IBM watsonx actions.
*
* @param requestType The type of request (e.g. COMPLETION, EMBEDDING, RERANK).
* @param inferenceId The ID of the inference entity.
* @return A formatted error message.
*/
public static String buildErrorMessage(TaskType requestType, String inferenceId) {
return format("Failed to send IBM Watsonx %s request from inference entity id [%s]", requestType.toString(), inferenceId);
return format("Failed to send IBM watsonx %s request from inference entity id [%s]", requestType.toString(), inferenceId);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
import java.util.Map;

/**
* Interface for creating {@link ExecutableAction} instances for IBM Watsonx models.
* Interface for creating {@link ExecutableAction} instances for IBM watsonx models.
* <p>
* This interface is used to create {@link ExecutableAction} instances for different types of IBM Watsonx models, such as
* This interface is used to create {@link ExecutableAction} instances for different types of IBM watsonx models, such as
* {@link IbmWatsonxEmbeddingsModel} and {@link IbmWatsonxRerankModel} and {@link IbmWatsonxChatCompletionModel}.
*/
public interface IbmWatsonxActionVisitor {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import org.elasticsearch.xpack.inference.external.action.ExecutableAction;
import org.elasticsearch.xpack.inference.services.ConfigurationParseContext;
import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxModel;
import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxRateLimitServiceSettings;
import org.elasticsearch.xpack.inference.services.ibmwatsonx.action.IbmWatsonxActionVisitor;
import org.elasticsearch.xpack.inference.services.settings.DefaultSecretSettings;

Expand Down Expand Up @@ -75,7 +74,7 @@ public static IbmWatsonxChatCompletionModel of(IbmWatsonxChatCompletionModel mod
var overriddenServiceSettings = new IbmWatsonxChatCompletionServiceSettings(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like this overridenServiceSettings doesn't override any of the original service settings? Should this be taking some value from the request passed into this function and overriding it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. I corrected it so now we can override model (model_id)

originalModelServiceSettings.uri(),
originalModelServiceSettings.apiVersion(),
originalModelServiceSettings.modelId(),
request.model(),
originalModelServiceSettings.projectId(),
originalModelServiceSettings.rateLimitSettings()
);
Expand Down Expand Up @@ -104,11 +103,6 @@ public static IbmWatsonxChatCompletionModel of(IbmWatsonxChatCompletionModel mod
);
}

@Override
public IbmWatsonxRateLimitServiceSettings rateLimitServiceSettings() {
return super.rateLimitServiceSettings();
}

@Override
public IbmWatsonxChatCompletionServiceSettings getServiceSettings() {
return (IbmWatsonxChatCompletionServiceSettings) super.getServiceSettings();
Expand All @@ -132,7 +126,7 @@ public URI uri() {

/**
* Accepts a visitor to create an executable action. The returned action will not return documents in the response.
* @param visitor Interface for creating {@link ExecutableAction} instances for IBM Watsonx models.
* @param visitor Interface for creating {@link ExecutableAction} instances for IBM watsonx models.
* @return the completion action
*/
public ExecutableAction accept(IbmWatsonxActionVisitor visitor, Map<String, Object> taskSettings) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@
import java.nio.charset.StandardCharsets;
import java.util.Objects;

import static org.elasticsearch.xpack.inference.external.request.RequestUtils.createAuthBearerHeader;

public class IbmWatsonxChatCompletionRequest implements IbmWatsonxRequest {
private final IbmWatsonxChatCompletionModel model;
private final UnifiedChatInput chatInput;
Expand All @@ -42,7 +40,6 @@ public HttpRequest createHttpRequest() {
httpPost.setEntity(byteEntity);

httpPost.setHeader(HttpHeaders.CONTENT_TYPE, XContentType.JSON.mediaType());
httpPost.setHeader(createAuthBearerHeader(model.getSecretSettings().apiKey()));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you clarify why this was removed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure. The method decorateWithAuth( adds a header with Bearer token so it's a bit of duplication there


decorateWithAuth(httpPost);

Expand All @@ -60,13 +57,13 @@ public void decorateWithAuth(HttpPost httpPost) {

@Override
public Request truncate() {
// No truncation for IBM Watsonx chat completions
// No truncation for IBM watsonx chat completions
return this;
}

@Override
public boolean[] getTruncationInfo() {
// No truncation for IBM Watsonx chat completions
// No truncation for IBM watsonx chat completions
return null;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ public URI uri() {

/**
* Accepts a visitor to create an executable action. The returned action will not return documents in the response.
* @param visitor Interface for creating {@link ExecutableAction} instances for IBM Watsonx models.
* @param visitor Interface for creating {@link ExecutableAction} instances for IBM watsonx models.
* @param taskSettings Settings in the request to override the model's defaults
* @return the rerank action
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

public class IbmWatsonxEmbeddingsResponseEntity {

private static final String FAILED_TO_FIND_FIELD_TEMPLATE = "Failed to find required field [%s] in IBM Watsonx embeddings response";
private static final String FAILED_TO_FIND_FIELD_TEMPLATE = "Failed to find required field [%s] in IBM watsonx embeddings response";

public static TextEmbeddingFloatResults fromResponse(Request request, HttpResult response) throws IOException {
var parserConfig = XContentParserConfiguration.EMPTY.withDeprecationHandler(LoggingDeprecationHandler.INSTANCE);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ public class IbmWatsonxRankedResponseEntity {
private static final Logger logger = LogManager.getLogger(IbmWatsonxRankedResponseEntity.class);

/**
* Parses the IBM Watsonx ranked response.
* Parses the IBM watsonx ranked response.
*
* For a request like:
* "model": "rerank-english-v2.0",
Expand Down Expand Up @@ -71,7 +71,7 @@ public class IbmWatsonxRankedResponseEntity {
* ],
* }
*
* @param response the http response from IBM Watsonx
* @param response the http response from IBM watsonx
* @return the parsed response
* @throws IOException if there is an error parsing the response
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@
public class MistralActionCreator implements MistralActionVisitor {

public static final String COMPLETION_ERROR_PREFIX = "Mistral completions";
static final String USER_ROLE = "user";
static final ResponseHandler COMPLETION_HANDLER = new MistralCompletionResponseHandler(
public static final String USER_ROLE = "user";
public static final ResponseHandler COMPLETION_HANDLER = new MistralCompletionResponseHandler(
"mistral completions",
OpenAiChatCompletionResponseEntity::fromResponse
);
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ public DefaultSecretSettings getSecretSettings() {

/**
* Accepts a visitor to create an executable action. The returned action will not return documents in the response.
* @param visitor Interface for creating {@link ExecutableAction} instances for IBM Voyage AI models.
* @param visitor Interface for creating {@link ExecutableAction} instances for Voyage AI models.
* @param taskSettings Settings in the request to override the model's defaults
* @return the rerank action
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,6 @@ public void testSerializationWithNestedObjects() throws IOException {
String randomStop = "stop" + random.nextInt(1000);
float randomTemperature = (float) ((float) Math.round(0.5d + (double) random.nextFloat() * 0.5d * 100000d) / 100000d);
float randomTopP = (float) ((float) Math.round(0.5d + (double) random.nextFloat() * 0.5d * 100000d) / 100000d);
int randomTimeLimit = random.nextInt(1000);

UnifiedCompletionRequest.Message message = new UnifiedCompletionRequest.Message(
new UnifiedCompletionRequest.ContentString(randomContent),
Expand Down
Loading
Loading
You are viewing a condensed version of this merge commit. You can view the full changes here.