Skip to content

Commit 30f53cd

Browse files
committed
Fixed rate limit has of GoogleVertexAiRerankModel and refactored uri for GoogleVertexAiUnifiedChatCompletionRequest
1 parent ab1fe7a commit 30f53cd

File tree

2 files changed

+5
-4
lines changed

2 files changed

+5
-4
lines changed

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/request/GoogleVertexAiUnifiedChatCompletionRequest.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,15 +25,16 @@ public class GoogleVertexAiUnifiedChatCompletionRequest implements GoogleVertexA
2525

2626
private final GoogleVertexAiChatCompletionModel model;
2727
private final UnifiedChatInput unifiedChatInput;
28+
private final URI uri;
2829

2930
public GoogleVertexAiUnifiedChatCompletionRequest(UnifiedChatInput unifiedChatInput, GoogleVertexAiChatCompletionModel model) {
3031
this.model = Objects.requireNonNull(model);
3132
this.unifiedChatInput = Objects.requireNonNull(unifiedChatInput);
33+
this.uri = unifiedChatInput.stream() ? model.streamingURI() : model.nonStreamingUri();
3234
}
3335

3436
@Override
3537
public HttpRequest createHttpRequest() {
36-
var uri = unifiedChatInput.stream() ? model.streamingURI() : model.nonStreamingUri();
3738
HttpPost httpPost = new HttpPost(uri);
3839

3940
var requestEntity = new GoogleVertexAiUnifiedChatCompletionRequestEntity(unifiedChatInput);
@@ -53,7 +54,7 @@ public void decorateWithAuth(HttpPost httpPost) {
5354

5455
@Override
5556
public URI getURI() {
56-
return model.nonStreamingUri();
57+
return this.uri;
5758
}
5859

5960
@Override

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/rerank/GoogleVertexAiRerankModel.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import static org.elasticsearch.core.Strings.format;
2828

2929
public class GoogleVertexAiRerankModel extends GoogleVertexAiModel {
30+
private static final String RERANK_RATE_LIMIT_ENDPOINT_ID = "rerank";
3031

3132
public GoogleVertexAiRerankModel(
3233
String inferenceEntityId,
@@ -143,7 +144,6 @@ public int rateLimitGroupingHash() {
143144
var projectId = getServiceSettings().projectId();
144145
var modelId = getServiceSettings().modelId();
145146

146-
// In this case, we don't include the location since it's a global endpoint
147-
return Objects.hash(projectId, modelId);
147+
return Objects.hash(projectId, modelId, RERANK_RATE_LIMIT_ENDPOINT_ID);
148148
}
149149
}

0 commit comments

Comments
 (0)