Skip to content

Commit ab1fe7a

Browse files
committed
Moved rateLimitGroupHashing to subclasses of GoogleVertexAiModel
1 parent bf27166 commit ab1fe7a

File tree

5 files changed

+49
-12
lines changed

5 files changed

+49
-12
lines changed

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/GoogleVertexAiModel.java

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -60,15 +60,6 @@ public URI nonStreamingUri() {
6060
return nonStreamingUri;
6161
}
6262

63-
@Override
64-
public int rateLimitGroupingHash() {
65-
// In VertexAI rate limiting is scoped to the project, region and model. URI already has this information so we are using that.
66-
// API Key does not affect the quota
67-
// https://ai.google.dev/gemini-api/docs/rate-limits
68-
// https://cloud.google.com/vertex-ai/docs/quotas
69-
return Objects.hash(nonStreamingUri);
70-
}
71-
7263
@Override
7364
public RateLimitSettings rateLimitSettings() {
7465
return rateLimitServiceSettings().rateLimitSettings();

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/GoogleVertexAiStreamingProcessor.java

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,6 @@
2525
import java.util.Objects;
2626
import java.util.stream.Stream;
2727

28-
import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken;
29-
import static org.elasticsearch.xpack.inference.external.response.XContentUtils.moveToFirstToken;
30-
3128
public class GoogleVertexAiStreamingProcessor extends DelegatingProcessor<Deque<ServerSentEvent>, InferenceServiceResults.Result> {
3229

3330
@Override

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/completion/GoogleVertexAiChatCompletionModel.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,4 +156,25 @@ public static URI buildUriStreaming(String location, String projectId, String mo
156156
.setCustomQuery(GoogleVertexAiUtils.QUERY_PARAM_ALT_SSE)
157157
.build();
158158
}
159+
160+
@Override
161+
public int rateLimitGroupingHash() {
162+
// In VertexAI rate limiting is scoped to the project, region, model and endpoint.
163+
// API Key does not affect the quota
164+
// https://ai.google.dev/gemini-api/docs/rate-limits
165+
// https://cloud.google.com/vertex-ai/docs/quotas
166+
var projectId = getServiceSettings().projectId();
167+
var location = getServiceSettings().location();
168+
var modelId = getServiceSettings().modelId();
169+
170+
// Since we don't beforehand know which API is going to be used, we take a conservative approach and
171+
// count both endpoint for the rate limit
172+
return Objects.hash(
173+
projectId,
174+
location,
175+
modelId,
176+
GoogleVertexAiUtils.GENERATE_CONTENT,
177+
GoogleVertexAiUtils.STREAM_GENERATE_CONTENT
178+
);
179+
}
159180
}

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/embeddings/GoogleVertexAiEmbeddingsModel.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import java.net.URI;
2424
import java.net.URISyntaxException;
2525
import java.util.Map;
26+
import java.util.Objects;
2627

2728
import static org.elasticsearch.core.Strings.format;
2829

@@ -150,4 +151,17 @@ public static URI buildUri(String location, String projectId, String modelId) th
150151
)
151152
.build();
152153
}
154+
155+
@Override
156+
public int rateLimitGroupingHash() {
157+
// In VertexAI rate limiting is scoped to the project, region, model and endpoint.
158+
// API Key does not affect the quota
159+
// https://ai.google.dev/gemini-api/docs/rate-limits
160+
// https://cloud.google.com/vertex-ai/docs/quotas
161+
var projectId = getServiceSettings().projectId();
162+
var location = getServiceSettings().location();
163+
var modelId = getServiceSettings().modelId();
164+
165+
return Objects.hash(projectId, location, modelId, GoogleVertexAiUtils.PREDICT);
166+
}
153167
}

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/rerank/GoogleVertexAiRerankModel.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import java.net.URI;
2323
import java.net.URISyntaxException;
2424
import java.util.Map;
25+
import java.util.Objects;
2526

2627
import static org.elasticsearch.core.Strings.format;
2728

@@ -132,4 +133,17 @@ public static URI buildUri(String projectId) throws URISyntaxException {
132133
)
133134
.build();
134135
}
136+
137+
@Override
138+
public int rateLimitGroupingHash() {
139+
// In VertexAI rate limiting is scoped to the project, region, model and endpoint.
140+
// API Key does not affect the quota
141+
// https://ai.google.dev/gemini-api/docs/rate-limits
142+
// https://cloud.google.com/vertex-ai/docs/quotas
143+
var projectId = getServiceSettings().projectId();
144+
var modelId = getServiceSettings().modelId();
145+
146+
// In this case, we don't include the location since it's a global endpoint
147+
return Objects.hash(projectId, modelId);
148+
}
135149
}

0 commit comments

Comments
 (0)