-
Notifications
You must be signed in to change notification settings - Fork 25.6k
Add Ibm Granite Completion and Chat Completion support #129146
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
78ab1da
f92f348
510e3c5
d6d19be
a6eaec6
9faf6f6
b23bdfb
136416d
ff6ccf5
80537a4
b44bab6
b1a76c3
1bf81ed
e70752f
b219e72
c950380
bf882a0
f9b086f
8e08b9e
08ab2f6
4ed865c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| pr: 129146 | ||
| summary: "[ML] Add IBM watsonx Completion and Chat Completion support to the Inference Plugin" | ||
| area: Machine Learning | ||
| type: enhancement | ||
| issues: [] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,7 +16,6 @@ | |
| import org.elasticsearch.xpack.inference.external.action.ExecutableAction; | ||
| import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; | ||
| import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxModel; | ||
| import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxRateLimitServiceSettings; | ||
| import org.elasticsearch.xpack.inference.services.ibmwatsonx.action.IbmWatsonxActionVisitor; | ||
| import org.elasticsearch.xpack.inference.services.settings.DefaultSecretSettings; | ||
|
|
||
|
|
@@ -75,7 +74,7 @@ public static IbmWatsonxChatCompletionModel of(IbmWatsonxChatCompletionModel mod | |
| var overriddenServiceSettings = new IbmWatsonxChatCompletionServiceSettings( | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks. I corrected it so now we can override model (model_id) |
||
| originalModelServiceSettings.uri(), | ||
| originalModelServiceSettings.apiVersion(), | ||
| originalModelServiceSettings.modelId(), | ||
| request.model(), | ||
| originalModelServiceSettings.projectId(), | ||
| originalModelServiceSettings.rateLimitSettings() | ||
| ); | ||
|
|
@@ -104,11 +103,6 @@ public static IbmWatsonxChatCompletionModel of(IbmWatsonxChatCompletionModel mod | |
| ); | ||
| } | ||
|
|
||
| @Override | ||
| public IbmWatsonxRateLimitServiceSettings rateLimitServiceSettings() { | ||
| return super.rateLimitServiceSettings(); | ||
| } | ||
|
|
||
| @Override | ||
| public IbmWatsonxChatCompletionServiceSettings getServiceSettings() { | ||
| return (IbmWatsonxChatCompletionServiceSettings) super.getServiceSettings(); | ||
|
|
@@ -132,7 +126,7 @@ public URI uri() { | |
|
|
||
| /** | ||
| * Accepts a visitor to create an executable action. The returned action will not return documents in the response. | ||
| * @param visitor Interface for creating {@link ExecutableAction} instances for IBM Watsonx models. | ||
| * @param visitor Interface for creating {@link ExecutableAction} instances for IBM watsonx models. | ||
| * @return the completion action | ||
| */ | ||
| public ExecutableAction accept(IbmWatsonxActionVisitor visitor, Map<String, Object> taskSettings) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,8 +21,6 @@ | |
| import java.nio.charset.StandardCharsets; | ||
| import java.util.Objects; | ||
|
|
||
| import static org.elasticsearch.xpack.inference.external.request.RequestUtils.createAuthBearerHeader; | ||
|
|
||
| public class IbmWatsonxChatCompletionRequest implements IbmWatsonxRequest { | ||
| private final IbmWatsonxChatCompletionModel model; | ||
| private final UnifiedChatInput chatInput; | ||
|
|
@@ -42,7 +40,6 @@ public HttpRequest createHttpRequest() { | |
| httpPost.setEntity(byteEntity); | ||
|
|
||
| httpPost.setHeader(HttpHeaders.CONTENT_TYPE, XContentType.JSON.mediaType()); | ||
| httpPost.setHeader(createAuthBearerHeader(model.getSecretSettings().apiKey())); | ||
|
||
|
|
||
| decorateWithAuth(httpPost); | ||
|
|
||
|
|
@@ -60,13 +57,13 @@ public void decorateWithAuth(HttpPost httpPost) { | |
|
|
||
| @Override | ||
| public Request truncate() { | ||
| // No truncation for IBM Watsonx chat completions | ||
| // No truncation for IBM watsonx chat completions | ||
| return this; | ||
| } | ||
|
|
||
| @Override | ||
| public boolean[] getTruncationInfo() { | ||
| // No truncation for IBM Watsonx chat completions | ||
| // No truncation for IBM watsonx chat completions | ||
| return null; | ||
| } | ||
|
|
||
|
|
||
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you clarify why this needs to be a
RateLimitGroupingModel?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This type needs to be used in GenericRequestManager
which I believe is also going to handle the requests for other tasks in the future